From 254416aae70ab2e6b57fd79782c8a67196234d02 Mon Sep 17 00:00:00 2001
From: "John W. Linville" <linville@tuxdriver.com>
Date: Wed, 9 Dec 2009 16:43:52 -0500
Subject: wireless: report reasonable bitrate for MCS rates through wext

Previously, cfg80211 had reported "0" for MCS (i.e. 802.11n) bitrates
through the wireless extensions interface.  However, nl80211 was
converting MCS rates into a reasonable bitrate number.  This patch moves
the nl80211 code to cfg80211 where it is now shared between both the
nl80211 interface and the wireless extensions interface.

Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/wireless/core.h        |  2 ++
 net/wireless/nl80211.c     | 37 ++-----------------------------------
 net/wireless/util.c        | 33 +++++++++++++++++++++++++++++++++
 net/wireless/wext-compat.c |  5 +----
 4 files changed, 38 insertions(+), 39 deletions(-)

(limited to 'net')

diff --git a/net/wireless/core.h b/net/wireless/core.h
index 4ef3efc94106..35b712127143 100644
--- a/net/wireless/core.h
+++ b/net/wireless/core.h
@@ -378,6 +378,8 @@ int rdev_set_freq(struct cfg80211_registered_device *rdev,
 		  struct wireless_dev *for_wdev,
 		  int freq, enum nl80211_channel_type channel_type);
 
+u16 cfg80211_calculate_bitrate(struct rate_info *rate);
+
 #ifdef CONFIG_CFG80211_DEVELOPER_WARNINGS
 #define CFG80211_DEV_WARN_ON(cond)	WARN_ON(cond)
 #else
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index a6028433e3a0..7cb0d647fc34 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -1637,39 +1637,6 @@ static int parse_station_flags(struct genl_info *info,
 	return 0;
 }
 
-static u16 nl80211_calculate_bitrate(struct rate_info *rate)
-{
-	int modulation, streams, bitrate;
-
-	if (!(rate->flags & RATE_INFO_FLAGS_MCS))
-		return rate->legacy;
-
-	/* the formula below does only work for MCS values smaller than 32 */
-	if (rate->mcs >= 32)
-		return 0;
-
-	modulation = rate->mcs & 7;
-	streams = (rate->mcs >> 3) + 1;
-
-	bitrate = (rate->flags & RATE_INFO_FLAGS_40_MHZ_WIDTH) ?
-			13500000 : 6500000;
-
-	if (modulation < 4)
-		bitrate *= (modulation + 1);
-	else if (modulation == 4)
-		bitrate *= (modulation + 2);
-	else
-		bitrate *= (modulation + 3);
-
-	bitrate *= streams;
-
-	if (rate->flags & RATE_INFO_FLAGS_SHORT_GI)
-		bitrate = (bitrate / 9) * 10;
-
-	/* do NOT round down here */
-	return (bitrate + 50000) / 100000;
-}
-
 static int nl80211_send_station(struct sk_buff *msg, u32 pid, u32 seq,
 				int flags, struct net_device *dev,
 				u8 *mac_addr, struct station_info *sinfo)
@@ -1716,8 +1683,8 @@ static int nl80211_send_station(struct sk_buff *msg, u32 pid, u32 seq,
 		if (!txrate)
 			goto nla_put_failure;
 
-		/* nl80211_calculate_bitrate will return 0 for mcs >= 32 */
-		bitrate = nl80211_calculate_bitrate(&sinfo->txrate);
+		/* cfg80211_calculate_bitrate will return 0 for mcs >= 32 */
+		bitrate = cfg80211_calculate_bitrate(&sinfo->txrate);
 		if (bitrate > 0)
 			NLA_PUT_U16(msg, NL80211_RATE_INFO_BITRATE, bitrate);
 
diff --git a/net/wireless/util.c b/net/wireless/util.c
index 59361fdcb5d0..a3c841a255db 100644
--- a/net/wireless/util.c
+++ b/net/wireless/util.c
@@ -720,3 +720,36 @@ int cfg80211_change_iface(struct cfg80211_registered_device *rdev,
 
 	return err;
 }
+
+u16 cfg80211_calculate_bitrate(struct rate_info *rate)
+{
+	int modulation, streams, bitrate;
+
+	if (!(rate->flags & RATE_INFO_FLAGS_MCS))
+		return rate->legacy;
+
+	/* the formula below does only work for MCS values smaller than 32 */
+	if (rate->mcs >= 32)
+		return 0;
+
+	modulation = rate->mcs & 7;
+	streams = (rate->mcs >> 3) + 1;
+
+	bitrate = (rate->flags & RATE_INFO_FLAGS_40_MHZ_WIDTH) ?
+			13500000 : 6500000;
+
+	if (modulation < 4)
+		bitrate *= (modulation + 1);
+	else if (modulation == 4)
+		bitrate *= (modulation + 2);
+	else
+		bitrate *= (modulation + 3);
+
+	bitrate *= streams;
+
+	if (rate->flags & RATE_INFO_FLAGS_SHORT_GI)
+		bitrate = (bitrate / 9) * 10;
+
+	/* do NOT round down here */
+	return (bitrate + 50000) / 100000;
+}
diff --git a/net/wireless/wext-compat.c b/net/wireless/wext-compat.c
index 584eb4826e02..2fa8de1140e9 100644
--- a/net/wireless/wext-compat.c
+++ b/net/wireless/wext-compat.c
@@ -1256,10 +1256,7 @@ int cfg80211_wext_giwrate(struct net_device *dev,
 	if (!(sinfo.filled & STATION_INFO_TX_BITRATE))
 		return -EOPNOTSUPP;
 
-	rate->value = 0;
-
-	if (!(sinfo.txrate.flags & RATE_INFO_FLAGS_MCS))
-		rate->value = 100000 * sinfo.txrate.legacy;
+	rate->value = 100000 * cfg80211_calculate_bitrate(&sinfo.txrate);
 
 	return 0;
 }
-- 
cgit v1.2.2


From abe60632f311d515b082b450504ee24006023951 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes@sipsolutions.net>
Date: Wed, 25 Nov 2009 17:46:18 +0100
Subject: mac80211: make station management completely depend on vif

The station management currently uses the virtual
interface, but you cannot add the same station to
multiple virtual interfaces if you're communicating
with it in multiple ways.

This restriction should be lifted so that in the
future we can, for instance, support bluetooth 3
with an access point that mac80211 is already
associated to.

We can do that by requiring all sta_info_get users
to provide the virtual interface and making the RX
code aware that an address may match more than one
station struct. Thanks to the previous patches this
one isn't all that large and except for the RX and
TX status paths changes has low complexity.

Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/agg-rx.c     |   3 +-
 net/mac80211/agg-tx.c     |   4 +-
 net/mac80211/cfg.c        |  25 ++++------
 net/mac80211/ibss.c       |   2 +-
 net/mac80211/key.c        |   2 +-
 net/mac80211/mesh_hwmp.c  |   2 +-
 net/mac80211/mesh_plink.c |   4 +-
 net/mac80211/mlme.c       |  10 ++--
 net/mac80211/rx.c         | 119 +++++++++++++++++++++++++---------------------
 net/mac80211/sta_info.c   |  18 ++++---
 net/mac80211/sta_info.h   |  29 ++++++++++-
 net/mac80211/status.c     |   8 ++--
 net/mac80211/tx.c         |   9 ++--
 13 files changed, 134 insertions(+), 101 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/agg-rx.c b/net/mac80211/agg-rx.c
index 51c7dc3c4c3b..f16d49d474b4 100644
--- a/net/mac80211/agg-rx.c
+++ b/net/mac80211/agg-rx.c
@@ -83,12 +83,11 @@ void __ieee80211_stop_rx_ba_session(struct sta_info *sta, u16 tid,
 void ieee80211_sta_stop_rx_ba_session(struct ieee80211_sub_if_data *sdata, u8 *ra, u16 tid,
 					u16 initiator, u16 reason)
 {
-	struct ieee80211_local *local = sdata->local;
 	struct sta_info *sta;
 
 	rcu_read_lock();
 
-	sta = sta_info_get(local, ra);
+	sta = sta_info_get(sdata, ra);
 	if (!sta) {
 		rcu_read_unlock();
 		return;
diff --git a/net/mac80211/agg-tx.c b/net/mac80211/agg-tx.c
index 5e3a7eccef5a..b05de532c379 100644
--- a/net/mac80211/agg-tx.c
+++ b/net/mac80211/agg-tx.c
@@ -441,7 +441,7 @@ void ieee80211_start_tx_ba_cb(struct ieee80211_vif *vif, u8 *ra, u16 tid)
 	}
 
 	rcu_read_lock();
-	sta = sta_info_get(local, ra);
+	sta = sta_info_get(sdata, ra);
 	if (!sta) {
 		rcu_read_unlock();
 #ifdef CONFIG_MAC80211_HT_DEBUG
@@ -564,7 +564,7 @@ void ieee80211_stop_tx_ba_cb(struct ieee80211_vif *vif, u8 *ra, u8 tid)
 #endif /* CONFIG_MAC80211_HT_DEBUG */
 
 	rcu_read_lock();
-	sta = sta_info_get(local, ra);
+	sta = sta_info_get(sdata, ra);
 	if (!sta) {
 #ifdef CONFIG_MAC80211_HT_DEBUG
 		printk(KERN_DEBUG "Could not find station: %pM\n", ra);
diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c
index 93ee1fd5c08d..14e1f4015a72 100644
--- a/net/mac80211/cfg.c
+++ b/net/mac80211/cfg.c
@@ -150,7 +150,7 @@ static int ieee80211_add_key(struct wiphy *wiphy, struct net_device *dev,
 	rcu_read_lock();
 
 	if (mac_addr) {
-		sta = sta_info_get(sdata->local, mac_addr);
+		sta = sta_info_get(sdata, mac_addr);
 		if (!sta) {
 			ieee80211_key_free(key);
 			err = -ENOENT;
@@ -181,7 +181,7 @@ static int ieee80211_del_key(struct wiphy *wiphy, struct net_device *dev,
 	if (mac_addr) {
 		ret = -ENOENT;
 
-		sta = sta_info_get(sdata->local, mac_addr);
+		sta = sta_info_get(sdata, mac_addr);
 		if (!sta)
 			goto out_unlock;
 
@@ -228,7 +228,7 @@ static int ieee80211_get_key(struct wiphy *wiphy, struct net_device *dev,
 	rcu_read_lock();
 
 	if (mac_addr) {
-		sta = sta_info_get(sdata->local, mac_addr);
+		sta = sta_info_get(sdata, mac_addr);
 		if (!sta)
 			goto out;
 
@@ -414,15 +414,13 @@ static int ieee80211_dump_station(struct wiphy *wiphy, struct net_device *dev,
 static int ieee80211_get_station(struct wiphy *wiphy, struct net_device *dev,
 				 u8 *mac, struct station_info *sinfo)
 {
-	struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr);
+	struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
 	struct sta_info *sta;
 	int ret = -ENOENT;
 
 	rcu_read_lock();
 
-	/* XXX: verify sta->dev == dev */
-
-	sta = sta_info_get(local, mac);
+	sta = sta_info_get(sdata, mac);
 	if (sta) {
 		ret = 0;
 		sta_set_sinfo(sta, sinfo);
@@ -778,8 +776,7 @@ static int ieee80211_del_station(struct wiphy *wiphy, struct net_device *dev,
 	if (mac) {
 		rcu_read_lock();
 
-		/* XXX: get sta belonging to dev */
-		sta = sta_info_get(local, mac);
+		sta = sta_info_get(sdata, mac);
 		if (!sta) {
 			rcu_read_unlock();
 			return -ENOENT;
@@ -800,14 +797,14 @@ static int ieee80211_change_station(struct wiphy *wiphy,
 				    u8 *mac,
 				    struct station_parameters *params)
 {
+	struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
 	struct ieee80211_local *local = wiphy_priv(wiphy);
 	struct sta_info *sta;
 	struct ieee80211_sub_if_data *vlansdata;
 
 	rcu_read_lock();
 
-	/* XXX: get sta belonging to dev */
-	sta = sta_info_get(local, mac);
+	sta = sta_info_get(sdata, mac);
 	if (!sta) {
 		rcu_read_unlock();
 		return -ENOENT;
@@ -846,7 +843,6 @@ static int ieee80211_change_station(struct wiphy *wiphy,
 static int ieee80211_add_mpath(struct wiphy *wiphy, struct net_device *dev,
 				 u8 *dst, u8 *next_hop)
 {
-	struct ieee80211_local *local = wiphy_priv(wiphy);
 	struct ieee80211_sub_if_data *sdata;
 	struct mesh_path *mpath;
 	struct sta_info *sta;
@@ -855,7 +851,7 @@ static int ieee80211_add_mpath(struct wiphy *wiphy, struct net_device *dev,
 	sdata = IEEE80211_DEV_TO_SUB_IF(dev);
 
 	rcu_read_lock();
-	sta = sta_info_get(local, next_hop);
+	sta = sta_info_get(sdata, next_hop);
 	if (!sta) {
 		rcu_read_unlock();
 		return -ENOENT;
@@ -894,7 +890,6 @@ static int ieee80211_change_mpath(struct wiphy *wiphy,
 				    struct net_device *dev,
 				    u8 *dst, u8 *next_hop)
 {
-	struct ieee80211_local *local = wiphy_priv(wiphy);
 	struct ieee80211_sub_if_data *sdata;
 	struct mesh_path *mpath;
 	struct sta_info *sta;
@@ -903,7 +898,7 @@ static int ieee80211_change_mpath(struct wiphy *wiphy,
 
 	rcu_read_lock();
 
-	sta = sta_info_get(local, next_hop);
+	sta = sta_info_get(sdata, next_hop);
 	if (!sta) {
 		rcu_read_unlock();
 		return -ENOENT;
diff --git a/net/mac80211/ibss.c b/net/mac80211/ibss.c
index 10d13856f86c..1925be9b82fb 100644
--- a/net/mac80211/ibss.c
+++ b/net/mac80211/ibss.c
@@ -252,7 +252,7 @@ static void ieee80211_rx_bss_info(struct ieee80211_sub_if_data *sdata,
 
 		rcu_read_lock();
 
-		sta = sta_info_get(local, mgmt->sa);
+		sta = sta_info_get(sdata, mgmt->sa);
 		if (sta) {
 			u32 prev_rates;
 
diff --git a/net/mac80211/key.c b/net/mac80211/key.c
index 659a42d529e3..ac1a777674f6 100644
--- a/net/mac80211/key.c
+++ b/net/mac80211/key.c
@@ -421,7 +421,7 @@ void ieee80211_key_link(struct ieee80211_key *key,
 			 */
 
 			/* same here, the AP could be using QoS */
-			ap = sta_info_get(key->local, key->sdata->u.mgd.bssid);
+			ap = sta_info_get(key->sdata, key->sdata->u.mgd.bssid);
 			if (ap) {
 				if (test_sta_flags(ap, WLAN_STA_WME))
 					key->conf.flags |=
diff --git a/net/mac80211/mesh_hwmp.c b/net/mac80211/mesh_hwmp.c
index 833b2f3670c5..b836892f0ba9 100644
--- a/net/mac80211/mesh_hwmp.c
+++ b/net/mac80211/mesh_hwmp.c
@@ -335,7 +335,7 @@ static u32 hwmp_route_info_get(struct ieee80211_sub_if_data *sdata,
 	bool process = true;
 
 	rcu_read_lock();
-	sta = sta_info_get(local, mgmt->sa);
+	sta = sta_info_get(sdata, mgmt->sa);
 	if (!sta) {
 		rcu_read_unlock();
 		return 0;
diff --git a/net/mac80211/mesh_plink.c b/net/mac80211/mesh_plink.c
index 0f7c6e6a4248..8fcf56ee7743 100644
--- a/net/mac80211/mesh_plink.c
+++ b/net/mac80211/mesh_plink.c
@@ -234,7 +234,7 @@ void mesh_neighbour_update(u8 *hw_addr, u32 rates, struct ieee80211_sub_if_data
 
 	rcu_read_lock();
 
-	sta = sta_info_get(local, hw_addr);
+	sta = sta_info_get(sdata, hw_addr);
 	if (!sta) {
 		sta = mesh_plink_alloc(sdata, hw_addr, rates);
 		if (!sta) {
@@ -455,7 +455,7 @@ void mesh_rx_plink_frame(struct ieee80211_sub_if_data *sdata, struct ieee80211_m
 
 	rcu_read_lock();
 
-	sta = sta_info_get(local, mgmt->sa);
+	sta = sta_info_get(sdata, mgmt->sa);
 	if (!sta && ftype != PLINK_OPEN) {
 		mpl_dbg("Mesh plink: cls or cnf from unknown peer\n");
 		rcu_read_unlock();
diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index 6dc7b5ad9a41..c16667a7c8df 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -202,7 +202,7 @@ static u32 ieee80211_enable_ht(struct ieee80211_sub_if_data *sdata,
 		ieee80211_hw_config(local, 0);
 
 		rcu_read_lock();
-		sta = sta_info_get(local, bssid);
+		sta = sta_info_get(sdata, bssid);
 		if (sta)
 			rate_control_rate_update(local, sband, sta,
 						 IEEE80211_RC_HT_CHANGED);
@@ -1070,7 +1070,7 @@ static void ieee80211_set_disassoc(struct ieee80211_sub_if_data *sdata,
 	netif_carrier_off(sdata->dev);
 
 	rcu_read_lock();
-	sta = sta_info_get(local, bssid);
+	sta = sta_info_get(sdata, bssid);
 	if (sta)
 		ieee80211_sta_tear_down_BA_sessions(sta);
 	rcu_read_unlock();
@@ -1109,7 +1109,7 @@ static void ieee80211_set_disassoc(struct ieee80211_sub_if_data *sdata,
 
 	rcu_read_lock();
 
-	sta = sta_info_get(local, bssid);
+	sta = sta_info_get(sdata, bssid);
 	if (!sta) {
 		rcu_read_unlock();
 		return;
@@ -1489,7 +1489,7 @@ ieee80211_rx_mgmt_assoc_resp(struct ieee80211_sub_if_data *sdata,
 	rcu_read_lock();
 
 	/* Add STA entry for the AP */
-	sta = sta_info_get(local, wk->bss->cbss.bssid);
+	sta = sta_info_get(sdata, wk->bss->cbss.bssid);
 	if (!sta) {
 		newsta = true;
 
@@ -1857,7 +1857,7 @@ static void ieee80211_rx_mgmt_beacon(struct ieee80211_sub_if_data *sdata,
 
 		rcu_read_lock();
 
-		sta = sta_info_get(local, bssid);
+		sta = sta_info_get(sdata, bssid);
 		if (WARN_ON(!sta)) {
 			rcu_read_unlock();
 			return;
diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c
index f237df408378..5bae28693da8 100644
--- a/net/mac80211/rx.c
+++ b/net/mac80211/rx.c
@@ -1472,7 +1472,6 @@ ieee80211_deliver_skb(struct ieee80211_rx_data *rx)
 {
 	struct ieee80211_sub_if_data *sdata = rx->sdata;
 	struct net_device *dev = sdata->dev;
-	struct ieee80211_local *local = rx->local;
 	struct sk_buff *skb, *xmit_skb;
 	struct ethhdr *ehdr = (struct ethhdr *) rx->skb->data;
 	struct sta_info *dsta;
@@ -1495,8 +1494,8 @@ ieee80211_deliver_skb(struct ieee80211_rx_data *rx)
 				printk(KERN_DEBUG "%s: failed to clone "
 				       "multicast frame\n", dev->name);
 		} else {
-			dsta = sta_info_get(local, skb->data);
-			if (dsta && dsta->sdata->dev == dev) {
+			dsta = sta_info_get(sdata, skb->data);
+			if (dsta) {
 				/*
 				 * The destination station is associated to
 				 * this AP (in this VLAN), so send the frame
@@ -2363,6 +2362,8 @@ static void __ieee80211_rx_handle_packet(struct ieee80211_hw *hw,
 	int prepares;
 	struct ieee80211_sub_if_data *prev = NULL;
 	struct sk_buff *skb_new;
+	struct sta_info *sta, *tmp;
+	bool found_sta = false;
 
 	hdr = (struct ieee80211_hdr *)skb->data;
 	memset(&rx, 0, sizeof(rx));
@@ -2379,68 +2380,76 @@ static void __ieee80211_rx_handle_packet(struct ieee80211_hw *hw,
 	ieee80211_parse_qos(&rx);
 	ieee80211_verify_alignment(&rx);
 
-	rx.sta = sta_info_get(local, hdr->addr2);
-	if (rx.sta)
-		rx.sdata = rx.sta->sdata;
-
-	if (rx.sdata && ieee80211_is_data(hdr->frame_control)) {
-		rx.flags |= IEEE80211_RX_RA_MATCH;
-		prepares = prepare_for_handlers(rx.sdata, &rx, hdr);
-		if (prepares) {
-			if (status->flag & RX_FLAG_MMIC_ERROR) {
-				if (rx.flags & IEEE80211_RX_RA_MATCH)
-					ieee80211_rx_michael_mic_report(hdr, &rx);
-			} else
-				prev = rx.sdata;
+	if (ieee80211_is_data(hdr->frame_control)) {
+		for_each_sta_info(local, hdr->addr2, sta, tmp) {
+			rx.sta = sta;
+			found_sta = true;
+			rx.sdata = sta->sdata;
+
+			rx.flags |= IEEE80211_RX_RA_MATCH;
+			prepares = prepare_for_handlers(rx.sdata, &rx, hdr);
+			if (prepares) {
+				if (status->flag & RX_FLAG_MMIC_ERROR) {
+					if (rx.flags & IEEE80211_RX_RA_MATCH)
+						ieee80211_rx_michael_mic_report(hdr, &rx);
+				} else
+					prev = rx.sdata;
+			}
 		}
-	} else list_for_each_entry_rcu(sdata, &local->interfaces, list) {
-		if (!netif_running(sdata->dev))
-			continue;
+	}
+	if (!found_sta) {
+		list_for_each_entry_rcu(sdata, &local->interfaces, list) {
+			if (!netif_running(sdata->dev))
+				continue;
 
-		if (sdata->vif.type == NL80211_IFTYPE_MONITOR ||
-		    sdata->vif.type == NL80211_IFTYPE_AP_VLAN)
-			continue;
+			if (sdata->vif.type == NL80211_IFTYPE_MONITOR ||
+			    sdata->vif.type == NL80211_IFTYPE_AP_VLAN)
+				continue;
 
-		rx.flags |= IEEE80211_RX_RA_MATCH;
-		prepares = prepare_for_handlers(sdata, &rx, hdr);
+			rx.sta = sta_info_get(sdata, hdr->addr2);
 
-		if (!prepares)
-			continue;
+			rx.flags |= IEEE80211_RX_RA_MATCH;
+			prepares = prepare_for_handlers(sdata, &rx, hdr);
 
-		if (status->flag & RX_FLAG_MMIC_ERROR) {
-			rx.sdata = sdata;
-			if (rx.flags & IEEE80211_RX_RA_MATCH)
-				ieee80211_rx_michael_mic_report(hdr, &rx);
-			continue;
-		}
+			if (!prepares)
+				continue;
 
-		/*
-		 * frame is destined for this interface, but if it's not
-		 * also for the previous one we handle that after the
-		 * loop to avoid copying the SKB once too much
-		 */
+			if (status->flag & RX_FLAG_MMIC_ERROR) {
+				rx.sdata = sdata;
+				if (rx.flags & IEEE80211_RX_RA_MATCH)
+					ieee80211_rx_michael_mic_report(hdr,
+									&rx);
+				continue;
+			}
 
-		if (!prev) {
-			prev = sdata;
-			continue;
-		}
+			/*
+			 * frame is destined for this interface, but if it's
+			 * not also for the previous one we handle that after
+			 * the loop to avoid copying the SKB once too much
+			 */
 
-		/*
-		 * frame was destined for the previous interface
-		 * so invoke RX handlers for it
-		 */
+			if (!prev) {
+				prev = sdata;
+				continue;
+			}
 
-		skb_new = skb_copy(skb, GFP_ATOMIC);
-		if (!skb_new) {
-			if (net_ratelimit())
-				printk(KERN_DEBUG "%s: failed to copy "
-				       "multicast frame for %s\n",
-				       wiphy_name(local->hw.wiphy),
-				       prev->dev->name);
-			continue;
+			/*
+			 * frame was destined for the previous interface
+			 * so invoke RX handlers for it
+			 */
+
+			skb_new = skb_copy(skb, GFP_ATOMIC);
+			if (!skb_new) {
+				if (net_ratelimit())
+					printk(KERN_DEBUG "%s: failed to copy "
+					       "multicast frame for %s\n",
+					       wiphy_name(local->hw.wiphy),
+					       prev->dev->name);
+				continue;
+			}
+			ieee80211_invoke_rx_handlers(prev, &rx, skb_new, rate);
+			prev = sdata;
 		}
-		ieee80211_invoke_rx_handlers(prev, &rx, skb_new, rate);
-		prev = sdata;
 	}
 	if (prev)
 		ieee80211_invoke_rx_handlers(prev, &rx, skb, rate);
diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c
index 71f370dd24bc..c58a23eea58c 100644
--- a/net/mac80211/sta_info.c
+++ b/net/mac80211/sta_info.c
@@ -103,13 +103,16 @@ static int sta_info_hash_del(struct ieee80211_local *local,
 }
 
 /* protected by RCU */
-struct sta_info *sta_info_get(struct ieee80211_local *local, const u8 *addr)
+struct sta_info *sta_info_get(struct ieee80211_sub_if_data *sdata,
+			      const u8 *addr)
 {
+	struct ieee80211_local *local = sdata->local;
 	struct sta_info *sta;
 
 	sta = rcu_dereference(local->sta_hash[STA_HASH(addr)]);
 	while (sta) {
-		if (memcmp(sta->sta.addr, addr, ETH_ALEN) == 0)
+		if (sta->sdata == sdata &&
+		    memcmp(sta->sta.addr, addr, ETH_ALEN) == 0)
 			break;
 		sta = rcu_dereference(sta->hnext);
 	}
@@ -377,7 +380,7 @@ int sta_info_insert(struct sta_info *sta)
 
 	spin_lock_irqsave(&local->sta_lock, flags);
 	/* check if STA exists already */
-	if (sta_info_get(local, sta->sta.addr)) {
+	if (sta_info_get(sdata, sta->sta.addr)) {
 		spin_unlock_irqrestore(&local->sta_lock, flags);
 		err = -EEXIST;
 		goto out_free;
@@ -843,11 +846,12 @@ void ieee80211_sta_expire(struct ieee80211_sub_if_data *sdata,
 struct ieee80211_sta *ieee80211_find_sta_by_hw(struct ieee80211_hw *hw,
 					       const u8 *addr)
 {
-	struct sta_info *sta = sta_info_get(hw_to_local(hw), addr);
+	struct sta_info *sta, *nxt;
 
-	if (!sta)
-		return NULL;
-	return &sta->sta;
+	/* Just return a random station ... first in list ... */
+	for_each_sta_info(hw_to_local(hw), addr, sta, nxt)
+		return &sta->sta;
+	return NULL;
 }
 EXPORT_SYMBOL_GPL(ieee80211_find_sta_by_hw);
 
diff --git a/net/mac80211/sta_info.h b/net/mac80211/sta_info.h
index b4810f6aa94f..c8208236e896 100644
--- a/net/mac80211/sta_info.h
+++ b/net/mac80211/sta_info.h
@@ -403,9 +403,34 @@ static inline u32 get_sta_flags(struct sta_info *sta)
 #define STA_INFO_CLEANUP_INTERVAL (10 * HZ)
 
 /*
- * Get a STA info, must have be under RCU read lock.
+ * Get a STA info, must be under RCU read lock.
  */
-struct sta_info *sta_info_get(struct ieee80211_local *local, const u8 *addr);
+struct sta_info *sta_info_get(struct ieee80211_sub_if_data *sdata,
+			      const u8 *addr);
+
+static inline
+void for_each_sta_info_type_check(struct ieee80211_local *local,
+				  const u8 *addr,
+				  struct sta_info *sta,
+				  struct sta_info *nxt)
+{
+}
+
+#define for_each_sta_info(local, _addr, sta, nxt) 			\
+	for (	/* initialise loop */					\
+		sta = rcu_dereference(local->sta_hash[STA_HASH(_addr)]),\
+		nxt = sta ? rcu_dereference(sta->hnext) : NULL;		\
+		/* typecheck */						\
+		for_each_sta_info_type_check(local, (_addr), sta, nxt),	\
+		/* continue condition */				\
+		sta;							\
+		/* advance loop */					\
+		sta = nxt,						\
+		nxt = sta ? rcu_dereference(sta->hnext) : NULL		\
+	     )								\
+	/* compare address and run code only if it matches */		\
+	if (memcmp(sta->sta.addr, (_addr), ETH_ALEN) == 0)
+
 /*
  * Get STA info by index, BROKEN!
  */
diff --git a/net/mac80211/status.c b/net/mac80211/status.c
index d78f36c64c7b..32fe327acf4e 100644
--- a/net/mac80211/status.c
+++ b/net/mac80211/status.c
@@ -146,7 +146,7 @@ void ieee80211_tx_status(struct ieee80211_hw *hw, struct sk_buff *skb)
 	struct ieee80211_tx_status_rtap_hdr *rthdr;
 	struct ieee80211_sub_if_data *sdata;
 	struct net_device *prev_dev = NULL;
-	struct sta_info *sta;
+	struct sta_info *sta, *tmp;
 	int retry_count = -1, i;
 	bool injected;
 
@@ -166,9 +166,11 @@ void ieee80211_tx_status(struct ieee80211_hw *hw, struct sk_buff *skb)
 
 	sband = local->hw.wiphy->bands[info->band];
 
-	sta = sta_info_get(local, hdr->addr1);
+	for_each_sta_info(local, hdr->addr1, sta, tmp) {
+		/* skip wrong virtual interface */
+		if (memcmp(hdr->addr2, sta->sdata->dev->dev_addr, ETH_ALEN))
+			continue;
 
-	if (sta) {
 		if (!(info->flags & IEEE80211_TX_STAT_ACK) &&
 		    test_sta_flags(sta, WLAN_STA_PS_STA)) {
 			/*
diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
index 8834cc93c716..b913bfc34a9f 100644
--- a/net/mac80211/tx.c
+++ b/net/mac80211/tx.c
@@ -1055,7 +1055,7 @@ ieee80211_tx_prepare(struct ieee80211_sub_if_data *sdata,
 	if (sdata->vif.type == NL80211_IFTYPE_AP_VLAN)
 		tx->sta = rcu_dereference(sdata->u.vlan.sta);
 	if (!tx->sta)
-		tx->sta = sta_info_get(local, hdr->addr1);
+		tx->sta = sta_info_get(sdata, hdr->addr1);
 
 	if (tx->sta && ieee80211_is_data_qos(hdr->frame_control) &&
 	    (local->hw.flags & IEEE80211_HW_AMPDU_AGGREGATION)) {
@@ -1761,9 +1761,8 @@ netdev_tx_t ieee80211_subif_start_xmit(struct sk_buff *skb,
 	 */
 	if (!is_multicast_ether_addr(hdr.addr1)) {
 		rcu_read_lock();
-		sta = sta_info_get(local, hdr.addr1);
-		/* XXX: in the future, use sdata to look up the sta */
-		if (sta && sta->sdata == sdata)
+		sta = sta_info_get(sdata, hdr.addr1);
+		if (sta)
 			sta_flags = get_sta_flags(sta);
 		rcu_read_unlock();
 	}
@@ -1922,7 +1921,7 @@ static bool ieee80211_tx_pending_skb(struct ieee80211_local *local,
 		ieee80211_tx(sdata, skb, true);
 	} else {
 		hdr = (struct ieee80211_hdr *)skb->data;
-		sta = sta_info_get(local, hdr->addr1);
+		sta = sta_info_get(sdata, hdr->addr1);
 
 		ret = __ieee80211_tx(local, &skb, sta, true);
 		if (ret != IEEE80211_TX_OK)
-- 
cgit v1.2.2


From 47846c9b0c10808d9337d2e7d09361f3e0a0a71a Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes@sipsolutions.net>
Date: Wed, 25 Nov 2009 17:46:19 +0100
Subject: mac80211: reduce reliance on netdev

For bluetooth 3, we will most likely not have
a netdev for a virtual interface (sdata), so
prepare for that by reducing the reliance on
having a netdev. This patch moves the name
and address fields into the sdata struct and
uses them from there all over. Some work is
needed to keep them sync'ed, but that's not
a lot of work and in slow paths anyway.

In doing so, this also reduces the number of
pointer dereferences in many places, because
of things like sdata->dev->dev_addr becoming
sdata->vif.addr.

Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/agg-rx.c         |  6 ++---
 net/mac80211/agg-tx.c         | 14 +++++-----
 net/mac80211/cfg.c            |  2 +-
 net/mac80211/debugfs_key.c    |  2 +-
 net/mac80211/debugfs_netdev.c | 55 ++++----------------------------------
 net/mac80211/debugfs_netdev.h |  9 +++----
 net/mac80211/debugfs_sta.c    |  2 +-
 net/mac80211/driver-ops.h     |  4 +--
 net/mac80211/driver-trace.h   | 12 ++++-----
 net/mac80211/ht.c             |  6 ++---
 net/mac80211/ibss.c           | 32 +++++++++++-----------
 net/mac80211/ieee80211_i.h    |  5 +++-
 net/mac80211/iface.c          | 62 ++++++++++++++++++++++++++++++++++++++++---
 net/mac80211/main.c           | 17 ++++++++----
 net/mac80211/mesh.c           |  4 +--
 net/mac80211/mesh_hwmp.c      | 18 ++++++-------
 net/mac80211/mesh_pathtbl.c   |  6 ++---
 net/mac80211/mesh_plink.c     |  2 +-
 net/mac80211/mlme.c           | 62 +++++++++++++++++++++----------------------
 net/mac80211/pm.c             |  2 +-
 net/mac80211/rx.c             | 43 +++++++++++++++---------------
 net/mac80211/scan.c           |  2 +-
 net/mac80211/spectmgmt.c      |  4 +--
 net/mac80211/sta_info.c       |  8 +++---
 net/mac80211/status.c         |  2 +-
 net/mac80211/tx.c             | 33 +++++++++++------------
 net/mac80211/util.c           | 14 +++++-----
 27 files changed, 221 insertions(+), 207 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/agg-rx.c b/net/mac80211/agg-rx.c
index f16d49d474b4..37ecdeddd5ac 100644
--- a/net/mac80211/agg-rx.c
+++ b/net/mac80211/agg-rx.c
@@ -135,7 +135,7 @@ static void ieee80211_send_addba_resp(struct ieee80211_sub_if_data *sdata, u8 *d
 
 	if (!skb) {
 		printk(KERN_DEBUG "%s: failed to allocate buffer "
-		       "for addba resp frame\n", sdata->dev->name);
+		       "for addba resp frame\n", sdata->name);
 		return;
 	}
 
@@ -143,10 +143,10 @@ static void ieee80211_send_addba_resp(struct ieee80211_sub_if_data *sdata, u8 *d
 	mgmt = (struct ieee80211_mgmt *) skb_put(skb, 24);
 	memset(mgmt, 0, 24);
 	memcpy(mgmt->da, da, ETH_ALEN);
-	memcpy(mgmt->sa, sdata->dev->dev_addr, ETH_ALEN);
+	memcpy(mgmt->sa, sdata->vif.addr, ETH_ALEN);
 	if (sdata->vif.type == NL80211_IFTYPE_AP ||
 	    sdata->vif.type == NL80211_IFTYPE_AP_VLAN)
-		memcpy(mgmt->bssid, sdata->dev->dev_addr, ETH_ALEN);
+		memcpy(mgmt->bssid, sdata->vif.addr, ETH_ALEN);
 	else if (sdata->vif.type == NL80211_IFTYPE_STATION)
 		memcpy(mgmt->bssid, sdata->u.mgd.bssid, ETH_ALEN);
 
diff --git a/net/mac80211/agg-tx.c b/net/mac80211/agg-tx.c
index b05de532c379..209be60564ce 100644
--- a/net/mac80211/agg-tx.c
+++ b/net/mac80211/agg-tx.c
@@ -58,17 +58,17 @@ static void ieee80211_send_addba_request(struct ieee80211_sub_if_data *sdata,
 
 	if (!skb) {
 		printk(KERN_ERR "%s: failed to allocate buffer "
-				"for addba request frame\n", sdata->dev->name);
+				"for addba request frame\n", sdata->name);
 		return;
 	}
 	skb_reserve(skb, local->hw.extra_tx_headroom);
 	mgmt = (struct ieee80211_mgmt *) skb_put(skb, 24);
 	memset(mgmt, 0, 24);
 	memcpy(mgmt->da, da, ETH_ALEN);
-	memcpy(mgmt->sa, sdata->dev->dev_addr, ETH_ALEN);
+	memcpy(mgmt->sa, sdata->vif.addr, ETH_ALEN);
 	if (sdata->vif.type == NL80211_IFTYPE_AP ||
 	    sdata->vif.type == NL80211_IFTYPE_AP_VLAN)
-		memcpy(mgmt->bssid, sdata->dev->dev_addr, ETH_ALEN);
+		memcpy(mgmt->bssid, sdata->vif.addr, ETH_ALEN);
 	else if (sdata->vif.type == NL80211_IFTYPE_STATION)
 		memcpy(mgmt->bssid, sdata->u.mgd.bssid, ETH_ALEN);
 
@@ -104,7 +104,7 @@ void ieee80211_send_bar(struct ieee80211_sub_if_data *sdata, u8 *ra, u16 tid, u1
 	skb = dev_alloc_skb(sizeof(*bar) + local->hw.extra_tx_headroom);
 	if (!skb) {
 		printk(KERN_ERR "%s: failed to allocate buffer for "
-			"bar frame\n", sdata->dev->name);
+			"bar frame\n", sdata->name);
 		return;
 	}
 	skb_reserve(skb, local->hw.extra_tx_headroom);
@@ -113,7 +113,7 @@ void ieee80211_send_bar(struct ieee80211_sub_if_data *sdata, u8 *ra, u16 tid, u1
 	bar->frame_control = cpu_to_le16(IEEE80211_FTYPE_CTL |
 					 IEEE80211_STYPE_BACK_REQ);
 	memcpy(bar->ra, ra, ETH_ALEN);
-	memcpy(bar->ta, sdata->dev->dev_addr, ETH_ALEN);
+	memcpy(bar->ta, sdata->vif.addr, ETH_ALEN);
 	bar_control |= (u16)IEEE80211_BAR_CTRL_ACK_POLICY_NORMAL;
 	bar_control |= (u16)IEEE80211_BAR_CTRL_CBMTID_COMPRESSED_BA;
 	bar_control |= (u16)(tid << 12);
@@ -489,7 +489,7 @@ void ieee80211_start_tx_ba_cb_irqsafe(struct ieee80211_vif *vif,
 #ifdef CONFIG_MAC80211_HT_DEBUG
 		if (net_ratelimit())
 			printk(KERN_WARNING "%s: Not enough memory, "
-			       "dropping start BA session", skb->dev->name);
+			       "dropping start BA session", sdata->name);
 #endif
 		return;
 	}
@@ -621,7 +621,7 @@ void ieee80211_stop_tx_ba_cb_irqsafe(struct ieee80211_vif *vif,
 #ifdef CONFIG_MAC80211_HT_DEBUG
 		if (net_ratelimit())
 			printk(KERN_WARNING "%s: Not enough memory, "
-			       "dropping stop BA session", skb->dev->name);
+			       "dropping stop BA session", sdata->name);
 #endif
 		return;
 	}
diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c
index 14e1f4015a72..fcfa1bf776a7 100644
--- a/net/mac80211/cfg.c
+++ b/net/mac80211/cfg.c
@@ -729,7 +729,7 @@ static int ieee80211_add_station(struct wiphy *wiphy, struct net_device *dev,
 	} else
 		sdata = IEEE80211_DEV_TO_SUB_IF(dev);
 
-	if (compare_ether_addr(mac, dev->dev_addr) == 0)
+	if (compare_ether_addr(mac, sdata->vif.addr) == 0)
 		return -EINVAL;
 
 	if (is_multicast_ether_addr(mac))
diff --git a/net/mac80211/debugfs_key.c b/net/mac80211/debugfs_key.c
index e0f5224630da..d12e743cb4e1 100644
--- a/net/mac80211/debugfs_key.c
+++ b/net/mac80211/debugfs_key.c
@@ -56,7 +56,7 @@ KEY_CONF_FILE(keyidx, D);
 KEY_CONF_FILE(hw_key_idx, D);
 KEY_FILE(flags, X);
 KEY_FILE(tx_rx_count, D);
-KEY_READ(ifindex, sdata->dev->ifindex, 20, "%d\n");
+KEY_READ(ifindex, sdata->name, IFNAMSIZ + 2, "%s\n");
 KEY_OPS(ifindex);
 
 static ssize_t key_algorithm_read(struct file *file,
diff --git a/net/mac80211/debugfs_netdev.c b/net/mac80211/debugfs_netdev.c
index 472b2039906c..5d9c797635a9 100644
--- a/net/mac80211/debugfs_netdev.c
+++ b/net/mac80211/debugfs_netdev.c
@@ -280,16 +280,11 @@ static void add_files(struct ieee80211_sub_if_data *sdata)
 	}
 }
 
-static int notif_registered;
-
 void ieee80211_debugfs_add_netdev(struct ieee80211_sub_if_data *sdata)
 {
 	char buf[10+IFNAMSIZ];
 
-	if (!notif_registered)
-		return;
-
-	sprintf(buf, "netdev:%s", sdata->dev->name);
+	sprintf(buf, "netdev:%s", sdata->name);
 	sdata->debugfs.dir = debugfs_create_dir(buf,
 		sdata->local->hw.wiphy->debugfsdir);
 	add_files(sdata);
@@ -304,58 +299,18 @@ void ieee80211_debugfs_remove_netdev(struct ieee80211_sub_if_data *sdata)
 	sdata->debugfs.dir = NULL;
 }
 
-static int netdev_notify(struct notifier_block *nb,
-			 unsigned long state,
-			 void *ndev)
+void ieee80211_debugfs_rename_netdev(struct ieee80211_sub_if_data *sdata)
 {
-	struct net_device *dev = ndev;
 	struct dentry *dir;
-	struct ieee80211_sub_if_data *sdata;
-	char buf[10+IFNAMSIZ];
-
-	if (state != NETDEV_CHANGENAME)
-		return 0;
-
-	if (!dev->ieee80211_ptr || !dev->ieee80211_ptr->wiphy)
-		return 0;
-
-	if (dev->ieee80211_ptr->wiphy->privid != mac80211_wiphy_privid)
-		return 0;
-
-	sdata = IEEE80211_DEV_TO_SUB_IF(dev);
+	char buf[10 + IFNAMSIZ];
 
 	dir = sdata->debugfs.dir;
 
 	if (!dir)
-		return 0;
+		return;
 
-	sprintf(buf, "netdev:%s", dev->name);
+	sprintf(buf, "netdev:%s", sdata->name);
 	if (!debugfs_rename(dir->d_parent, dir, dir->d_parent, buf))
 		printk(KERN_ERR "mac80211: debugfs: failed to rename debugfs "
 		       "dir to %s\n", buf);
-
-	return 0;
-}
-
-static struct notifier_block mac80211_debugfs_netdev_notifier = {
-	.notifier_call = netdev_notify,
-};
-
-void ieee80211_debugfs_netdev_init(void)
-{
-	int err;
-
-	err = register_netdevice_notifier(&mac80211_debugfs_netdev_notifier);
-	if (err) {
-		printk(KERN_ERR
-		       "mac80211: failed to install netdev notifier,"
-		       " disabling per-netdev debugfs!\n");
-	} else
-		notif_registered = 1;
-}
-
-void ieee80211_debugfs_netdev_exit(void)
-{
-	unregister_netdevice_notifier(&mac80211_debugfs_netdev_notifier);
-	notif_registered = 0;
 }
diff --git a/net/mac80211/debugfs_netdev.h b/net/mac80211/debugfs_netdev.h
index 7af731f0b731..79025e79f4d6 100644
--- a/net/mac80211/debugfs_netdev.h
+++ b/net/mac80211/debugfs_netdev.h
@@ -6,8 +6,7 @@
 #ifdef CONFIG_MAC80211_DEBUGFS
 void ieee80211_debugfs_add_netdev(struct ieee80211_sub_if_data *sdata);
 void ieee80211_debugfs_remove_netdev(struct ieee80211_sub_if_data *sdata);
-void ieee80211_debugfs_netdev_init(void);
-void ieee80211_debugfs_netdev_exit(void);
+void ieee80211_debugfs_rename_netdev(struct ieee80211_sub_if_data *sdata);
 #else
 static inline void ieee80211_debugfs_add_netdev(
 	struct ieee80211_sub_if_data *sdata)
@@ -15,10 +14,8 @@ static inline void ieee80211_debugfs_add_netdev(
 static inline void ieee80211_debugfs_remove_netdev(
 	struct ieee80211_sub_if_data *sdata)
 {}
-static inline void ieee80211_debugfs_netdev_init(void)
-{}
-
-static inline void ieee80211_debugfs_netdev_exit(void)
+static inline void ieee80211_debugfs_rename_netdev(
+	struct ieee80211_sub_if_data *sdata)
 {}
 #endif
 
diff --git a/net/mac80211/debugfs_sta.c b/net/mac80211/debugfs_sta.c
index 3f41608c8081..374ff6f98a9c 100644
--- a/net/mac80211/debugfs_sta.c
+++ b/net/mac80211/debugfs_sta.c
@@ -44,7 +44,7 @@ static const struct file_operations sta_ ##name## _ops = {		\
 		STA_OPS(name)
 
 STA_FILE(aid, sta.aid, D);
-STA_FILE(dev, sdata->dev->name, S);
+STA_FILE(dev, sdata->name, S);
 STA_FILE(rx_packets, rx_packets, LU);
 STA_FILE(tx_packets, tx_packets, LU);
 STA_FILE(rx_bytes, rx_bytes, LU);
diff --git a/net/mac80211/driver-ops.h b/net/mac80211/driver-ops.h
index 921dd9c9ff62..a4002657dac5 100644
--- a/net/mac80211/driver-ops.h
+++ b/net/mac80211/driver-ops.h
@@ -39,7 +39,7 @@ static inline int drv_add_interface(struct ieee80211_local *local,
 				    struct ieee80211_if_init_conf *conf)
 {
 	int ret = local->ops->add_interface(&local->hw, conf);
-	trace_drv_add_interface(local, conf->mac_addr, conf->vif, ret);
+	trace_drv_add_interface(local, conf->vif, ret);
 	return ret;
 }
 
@@ -47,7 +47,7 @@ static inline void drv_remove_interface(struct ieee80211_local *local,
 					struct ieee80211_if_init_conf *conf)
 {
 	local->ops->remove_interface(&local->hw, conf);
-	trace_drv_remove_interface(local, conf->mac_addr, conf->vif);
+	trace_drv_remove_interface(local, conf->vif);
 }
 
 static inline int drv_config(struct ieee80211_local *local, u32 changed)
diff --git a/net/mac80211/driver-trace.h b/net/mac80211/driver-trace.h
index ee94ea0c67e9..7b155b947209 100644
--- a/net/mac80211/driver-trace.h
+++ b/net/mac80211/driver-trace.h
@@ -70,11 +70,10 @@ TRACE_EVENT(drv_stop,
 
 TRACE_EVENT(drv_add_interface,
 	TP_PROTO(struct ieee80211_local *local,
-		 const u8 *addr,
 		 struct ieee80211_vif *vif,
 		 int ret),
 
-	TP_ARGS(local, addr, vif, ret),
+	TP_ARGS(local, vif, ret),
 
 	TP_STRUCT__entry(
 		LOCAL_ENTRY
@@ -86,7 +85,7 @@ TRACE_EVENT(drv_add_interface,
 	TP_fast_assign(
 		LOCAL_ASSIGN;
 		VIF_ASSIGN;
-		memcpy(__entry->addr, addr, 6);
+		memcpy(__entry->addr, vif->addr, 6);
 		__entry->ret = ret;
 	),
 
@@ -97,10 +96,9 @@ TRACE_EVENT(drv_add_interface,
 );
 
 TRACE_EVENT(drv_remove_interface,
-	TP_PROTO(struct ieee80211_local *local,
-		 const u8 *addr, struct ieee80211_vif *vif),
+	TP_PROTO(struct ieee80211_local *local, struct ieee80211_vif *vif),
 
-	TP_ARGS(local, addr, vif),
+	TP_ARGS(local, vif),
 
 	TP_STRUCT__entry(
 		LOCAL_ENTRY
@@ -111,7 +109,7 @@ TRACE_EVENT(drv_remove_interface,
 	TP_fast_assign(
 		LOCAL_ASSIGN;
 		VIF_ASSIGN;
-		memcpy(__entry->addr, addr, 6);
+		memcpy(__entry->addr, vif->addr, 6);
 	),
 
 	TP_printk(
diff --git a/net/mac80211/ht.c b/net/mac80211/ht.c
index 3787455fb696..45ebd062a2fb 100644
--- a/net/mac80211/ht.c
+++ b/net/mac80211/ht.c
@@ -106,7 +106,7 @@ void ieee80211_send_delba(struct ieee80211_sub_if_data *sdata,
 
 	if (!skb) {
 		printk(KERN_ERR "%s: failed to allocate buffer "
-					"for delba frame\n", sdata->dev->name);
+					"for delba frame\n", sdata->name);
 		return;
 	}
 
@@ -114,10 +114,10 @@ void ieee80211_send_delba(struct ieee80211_sub_if_data *sdata,
 	mgmt = (struct ieee80211_mgmt *) skb_put(skb, 24);
 	memset(mgmt, 0, 24);
 	memcpy(mgmt->da, da, ETH_ALEN);
-	memcpy(mgmt->sa, sdata->dev->dev_addr, ETH_ALEN);
+	memcpy(mgmt->sa, sdata->vif.addr, ETH_ALEN);
 	if (sdata->vif.type == NL80211_IFTYPE_AP ||
 	    sdata->vif.type == NL80211_IFTYPE_AP_VLAN)
-		memcpy(mgmt->bssid, sdata->dev->dev_addr, ETH_ALEN);
+		memcpy(mgmt->bssid, sdata->vif.addr, ETH_ALEN);
 	else if (sdata->vif.type == NL80211_IFTYPE_STATION)
 		memcpy(mgmt->bssid, sdata->u.mgd.bssid, ETH_ALEN);
 
diff --git a/net/mac80211/ibss.c b/net/mac80211/ibss.c
index 1925be9b82fb..ef6c6b2401d1 100644
--- a/net/mac80211/ibss.c
+++ b/net/mac80211/ibss.c
@@ -117,7 +117,7 @@ static void __ieee80211_sta_join_ibss(struct ieee80211_sub_if_data *sdata,
 	mgmt->frame_control = cpu_to_le16(IEEE80211_FTYPE_MGMT |
 					  IEEE80211_STYPE_PROBE_RESP);
 	memset(mgmt->da, 0xff, ETH_ALEN);
-	memcpy(mgmt->sa, sdata->dev->dev_addr, ETH_ALEN);
+	memcpy(mgmt->sa, sdata->vif.addr, ETH_ALEN);
 	memcpy(mgmt->bssid, ifibss->bssid, ETH_ALEN);
 	mgmt->u.beacon.beacon_int = cpu_to_le16(beacon_int);
 	mgmt->u.beacon.timestamp = cpu_to_le64(tsf);
@@ -266,7 +266,7 @@ static void ieee80211_rx_bss_info(struct ieee80211_sub_if_data *sdata,
 				printk(KERN_DEBUG "%s: updated supp_rates set "
 				    "for %pM based on beacon info (0x%llx | "
 				    "0x%llx -> 0x%llx)\n",
-				    sdata->dev->name,
+				    sdata->name,
 				    sta->sta.addr,
 				    (unsigned long long) prev_rates,
 				    (unsigned long long) supp_rates,
@@ -364,7 +364,7 @@ static void ieee80211_rx_bss_info(struct ieee80211_sub_if_data *sdata,
 #ifdef CONFIG_MAC80211_IBSS_DEBUG
 		printk(KERN_DEBUG "%s: beacon TSF higher than "
 		       "local TSF - IBSS merge with BSSID %pM\n",
-		       sdata->dev->name, mgmt->bssid);
+		       sdata->name, mgmt->bssid);
 #endif
 		ieee80211_sta_join_ibss(sdata, bss);
 		ieee80211_ibss_add_sta(sdata, mgmt->bssid, mgmt->sa, supp_rates);
@@ -393,7 +393,7 @@ struct sta_info *ieee80211_ibss_add_sta(struct ieee80211_sub_if_data *sdata,
 	if (local->num_sta >= IEEE80211_IBSS_MAX_STA_ENTRIES) {
 		if (net_ratelimit())
 			printk(KERN_DEBUG "%s: No room for a new IBSS STA entry %pM\n",
-			       sdata->dev->name, addr);
+			       sdata->name, addr);
 		return NULL;
 	}
 
@@ -402,7 +402,7 @@ struct sta_info *ieee80211_ibss_add_sta(struct ieee80211_sub_if_data *sdata,
 
 #ifdef CONFIG_MAC80211_VERBOSE_DEBUG
 	printk(KERN_DEBUG "%s: Adding new IBSS station %pM (dev=%s)\n",
-	       wiphy_name(local->hw.wiphy), addr, sdata->dev->name);
+	       wiphy_name(local->hw.wiphy), addr, sdata->name);
 #endif
 
 	sta = sta_info_alloc(sdata, addr, GFP_ATOMIC);
@@ -466,7 +466,7 @@ static void ieee80211_sta_merge_ibss(struct ieee80211_sub_if_data *sdata)
 		return;
 
 	printk(KERN_DEBUG "%s: No active IBSS STAs - trying to scan for other "
-	       "IBSS networks with same SSID (merge)\n", sdata->dev->name);
+	       "IBSS networks with same SSID (merge)\n", sdata->name);
 
 	ieee80211_request_internal_scan(sdata, ifibss->ssid, ifibss->ssid_len);
 }
@@ -488,13 +488,13 @@ static void ieee80211_sta_create_ibss(struct ieee80211_sub_if_data *sdata)
 		 * random number generator get different BSSID. */
 		get_random_bytes(bssid, ETH_ALEN);
 		for (i = 0; i < ETH_ALEN; i++)
-			bssid[i] ^= sdata->dev->dev_addr[i];
+			bssid[i] ^= sdata->vif.addr[i];
 		bssid[0] &= ~0x01;
 		bssid[0] |= 0x02;
 	}
 
 	printk(KERN_DEBUG "%s: Creating new IBSS network, BSSID %pM\n",
-	       sdata->dev->name, bssid);
+	       sdata->name, bssid);
 
 	sband = local->hw.wiphy->bands[ifibss->channel->band];
 
@@ -523,7 +523,7 @@ static void ieee80211_sta_find_ibss(struct ieee80211_sub_if_data *sdata)
 	active_ibss = ieee80211_sta_active_ibss(sdata);
 #ifdef CONFIG_MAC80211_IBSS_DEBUG
 	printk(KERN_DEBUG "%s: sta_find_ibss (active_ibss=%d)\n",
-	       sdata->dev->name, active_ibss);
+	       sdata->name, active_ibss);
 #endif /* CONFIG_MAC80211_IBSS_DEBUG */
 
 	if (active_ibss)
@@ -552,7 +552,7 @@ static void ieee80211_sta_find_ibss(struct ieee80211_sub_if_data *sdata)
 
 		printk(KERN_DEBUG "%s: Selected IBSS BSSID %pM"
 		       " based on configured SSID\n",
-		       sdata->dev->name, bss->cbss.bssid);
+		       sdata->name, bss->cbss.bssid);
 
 		ieee80211_sta_join_ibss(sdata, bss);
 		ieee80211_rx_bss_put(local, bss);
@@ -571,7 +571,7 @@ static void ieee80211_sta_find_ibss(struct ieee80211_sub_if_data *sdata)
 	} else if (time_after(jiffies, ifibss->last_scan_completed +
 					IEEE80211_SCAN_INTERVAL)) {
 		printk(KERN_DEBUG "%s: Trigger new scan to find an IBSS to "
-		       "join\n", sdata->dev->name);
+		       "join\n", sdata->name);
 
 		ieee80211_request_internal_scan(sdata, ifibss->ssid,
 						ifibss->ssid_len);
@@ -585,7 +585,7 @@ static void ieee80211_sta_find_ibss(struct ieee80211_sub_if_data *sdata)
 				return;
 			}
 			printk(KERN_DEBUG "%s: IBSS not allowed on"
-			       " %d MHz\n", sdata->dev->name,
+			       " %d MHz\n", sdata->name,
 			       local->hw.conf.channel->center_freq);
 
 			/* No IBSS found - decrease scan interval and continue
@@ -619,7 +619,7 @@ static void ieee80211_rx_mgmt_probe_req(struct ieee80211_sub_if_data *sdata,
 #ifdef CONFIG_MAC80211_IBSS_DEBUG
 	printk(KERN_DEBUG "%s: RX ProbeReq SA=%pM DA=%pM BSSID=%pM"
 	       " (tx_last_beacon=%d)\n",
-	       sdata->dev->name, mgmt->sa, mgmt->da,
+	       sdata->name, mgmt->sa, mgmt->da,
 	       mgmt->bssid, tx_last_beacon);
 #endif /* CONFIG_MAC80211_IBSS_DEBUG */
 
@@ -637,7 +637,7 @@ static void ieee80211_rx_mgmt_probe_req(struct ieee80211_sub_if_data *sdata,
 #ifdef CONFIG_MAC80211_IBSS_DEBUG
 		printk(KERN_DEBUG "%s: Invalid SSID IE in ProbeReq "
 		       "from %pM\n",
-		       sdata->dev->name, mgmt->sa);
+		       sdata->name, mgmt->sa);
 #endif
 		return;
 	}
@@ -657,7 +657,7 @@ static void ieee80211_rx_mgmt_probe_req(struct ieee80211_sub_if_data *sdata,
 	memcpy(resp->da, mgmt->sa, ETH_ALEN);
 #ifdef CONFIG_MAC80211_IBSS_DEBUG
 	printk(KERN_DEBUG "%s: Sending ProbeResp to %pM\n",
-	       sdata->dev->name, resp->da);
+	       sdata->name, resp->da);
 #endif /* CONFIG_MAC80211_IBSS_DEBUG */
 	IEEE80211_SKB_CB(skb)->flags |= IEEE80211_TX_INTFL_DONT_ENCRYPT;
 	ieee80211_tx_skb(sdata, skb);
@@ -671,7 +671,7 @@ static void ieee80211_rx_mgmt_probe_resp(struct ieee80211_sub_if_data *sdata,
 	size_t baselen;
 	struct ieee802_11_elems elems;
 
-	if (memcmp(mgmt->da, sdata->dev->dev_addr, ETH_ALEN))
+	if (memcmp(mgmt->da, sdata->vif.addr, ETH_ALEN))
 		return; /* ignore ProbeResp to foreign address */
 
 	baselen = (u8 *) mgmt->u.probe_resp.variable - (u8 *) mgmt;
diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index 419f186cfcf0..178e329f9257 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -140,7 +140,6 @@ typedef unsigned __bitwise__ ieee80211_tx_result;
 
 struct ieee80211_tx_data {
 	struct sk_buff *skb;
-	struct net_device *dev;
 	struct ieee80211_local *local;
 	struct ieee80211_sub_if_data *sdata;
 	struct sta_info *sta;
@@ -433,6 +432,8 @@ struct ieee80211_sub_if_data {
 
 	int drop_unencrypted;
 
+	char name[IFNAMSIZ];
+
 	/*
 	 * keep track of whether the HT opmode (stored in
 	 * vif.bss_info.ht_operation_mode) is valid.
@@ -937,6 +938,8 @@ void ieee80211_rx_bss_put(struct ieee80211_local *local,
 			  struct ieee80211_bss *bss);
 
 /* interface handling */
+int ieee80211_iface_init(void);
+void ieee80211_iface_exit(void);
 int ieee80211_if_add(struct ieee80211_local *local, const char *name,
 		     struct net_device **new_dev, enum nl80211_iftype type,
 		     struct vif_params *params);
diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c
index 80c16f6e2af6..a6e6da3cab70 100644
--- a/net/mac80211/iface.c
+++ b/net/mac80211/iface.c
@@ -60,6 +60,22 @@ static int ieee80211_change_mtu(struct net_device *dev, int new_mtu)
 	return 0;
 }
 
+static int ieee80211_change_mac(struct net_device *dev, void *addr)
+{
+	struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
+	int ret;
+
+	if (netif_running(dev))
+		return -EBUSY;
+
+	ret = eth_mac_addr(dev, addr);
+
+	if (ret == 0)
+		memcpy(sdata->vif.addr, addr, ETH_ALEN);
+
+	return ret;
+}
+
 static inline int identical_mac_addr_allowed(int type1, int type2)
 {
 	return type1 == NL80211_IFTYPE_MONITOR ||
@@ -234,7 +250,7 @@ static int ieee80211_open(struct net_device *dev)
 	default:
 		conf.vif = &sdata->vif;
 		conf.type = sdata->vif.type;
-		conf.mac_addr = dev->dev_addr;
+		conf.mac_addr = sdata->vif.addr;
 		res = drv_add_interface(local, &conf);
 		if (res)
 			goto err_stop;
@@ -514,7 +530,7 @@ static int ieee80211_stop(struct net_device *dev)
 
 		conf.vif = &sdata->vif;
 		conf.type = sdata->vif.type;
-		conf.mac_addr = dev->dev_addr;
+		conf.mac_addr = sdata->vif.addr;
 		/* disable all keys for as long as this netdev is down */
 		ieee80211_disable_keys(sdata);
 		drv_remove_interface(local, &conf);
@@ -651,7 +667,7 @@ static const struct net_device_ops ieee80211_dataif_ops = {
 	.ndo_start_xmit		= ieee80211_subif_start_xmit,
 	.ndo_set_multicast_list = ieee80211_set_multicast_list,
 	.ndo_change_mtu 	= ieee80211_change_mtu,
-	.ndo_set_mac_address 	= eth_mac_addr,
+	.ndo_set_mac_address 	= ieee80211_change_mac,
 };
 
 static const struct net_device_ops ieee80211_monitorif_ops = {
@@ -794,6 +810,8 @@ int ieee80211_if_add(struct ieee80211_local *local, const char *name,
 	/* don't use IEEE80211_DEV_TO_SUB_IF because it checks too much */
 	sdata = netdev_priv(ndev);
 	ndev->ieee80211_ptr = &sdata->wdev;
+	memcpy(sdata->vif.addr, ndev->dev_addr, ETH_ALEN);
+	memcpy(sdata->name, ndev->name, IFNAMSIZ);
 
 	/* initialise type-independent data */
 	sdata->wdev.wiphy = local->hw.wiphy;
@@ -945,3 +963,41 @@ void ieee80211_recalc_idle(struct ieee80211_local *local)
 	if (chg)
 		ieee80211_hw_config(local, chg);
 }
+
+static int netdev_notify(struct notifier_block *nb,
+			 unsigned long state,
+			 void *ndev)
+{
+	struct net_device *dev = ndev;
+	struct ieee80211_sub_if_data *sdata;
+
+	if (state != NETDEV_CHANGENAME)
+		return 0;
+
+	if (!dev->ieee80211_ptr || !dev->ieee80211_ptr->wiphy)
+		return 0;
+
+	if (dev->ieee80211_ptr->wiphy->privid != mac80211_wiphy_privid)
+		return 0;
+
+	sdata = IEEE80211_DEV_TO_SUB_IF(dev);
+
+	memcpy(sdata->name, sdata->name, IFNAMSIZ);
+
+	ieee80211_debugfs_rename_netdev(sdata);
+	return 0;
+}
+
+static struct notifier_block mac80211_netdev_notifier = {
+	.notifier_call = netdev_notify,
+};
+
+int ieee80211_iface_init(void)
+{
+	return register_netdevice_notifier(&mac80211_netdev_notifier);
+}
+
+void ieee80211_iface_exit(void)
+{
+	unregister_netdevice_notifier(&mac80211_netdev_notifier);
+}
diff --git a/net/mac80211/main.c b/net/mac80211/main.c
index 8116d1a96a4a..dbf05b4dd003 100644
--- a/net/mac80211/main.c
+++ b/net/mac80211/main.c
@@ -32,7 +32,6 @@
 #include "led.h"
 #include "cfg.h"
 #include "debugfs.h"
-#include "debugfs_netdev.h"
 
 void ieee80211_configure_filter(struct ieee80211_local *local)
 {
@@ -173,7 +172,7 @@ void ieee80211_bss_info_change_notify(struct ieee80211_sub_if_data *sdata,
 	} else if (sdata->vif.type == NL80211_IFTYPE_ADHOC)
 		sdata->vif.bss_conf.bssid = sdata->u.ibss.bssid;
 	else if (sdata->vif.type == NL80211_IFTYPE_AP)
-		sdata->vif.bss_conf.bssid = sdata->dev->dev_addr;
+		sdata->vif.bss_conf.bssid = sdata->vif.addr;
 	else if (ieee80211_vif_is_mesh(&sdata->vif)) {
 		sdata->vif.bss_conf.bssid = zero;
 	} else {
@@ -672,11 +671,19 @@ static int __init ieee80211_init(void)
 
 	ret = rc80211_pid_init();
 	if (ret)
-		return ret;
+		goto err_pid;
 
-	ieee80211_debugfs_netdev_init();
+	ret = ieee80211_iface_init();
+	if (ret)
+		goto err_netdev;
 
 	return 0;
+ err_netdev:
+	rc80211_pid_exit();
+ err_pid:
+	rc80211_minstrel_exit();
+
+	return ret;
 }
 
 static void __exit ieee80211_exit(void)
@@ -693,7 +700,7 @@ static void __exit ieee80211_exit(void)
 	if (mesh_allocated)
 		ieee80211s_stop();
 
-	ieee80211_debugfs_netdev_exit();
+	ieee80211_iface_exit();
 }
 
 
diff --git a/net/mac80211/mesh.c b/net/mac80211/mesh.c
index c0fe46493f71..63299b72a7b0 100644
--- a/net/mac80211/mesh.c
+++ b/net/mac80211/mesh.c
@@ -457,7 +457,7 @@ static void ieee80211_mesh_housekeeping(struct ieee80211_sub_if_data *sdata,
 
 #ifdef CONFIG_MAC80211_VERBOSE_DEBUG
 	printk(KERN_DEBUG "%s: running mesh housekeeping\n",
-	       sdata->dev->name);
+	       sdata->name);
 #endif
 
 	ieee80211_sta_expire(sdata, IEEE80211_MESH_PEER_INACTIVITY_LIMIT);
@@ -565,7 +565,7 @@ static void ieee80211_mesh_rx_bcn_presp(struct ieee80211_sub_if_data *sdata,
 
 	/* ignore ProbeResp to foreign address */
 	if (stype == IEEE80211_STYPE_PROBE_RESP &&
-	    compare_ether_addr(mgmt->da, sdata->dev->dev_addr))
+	    compare_ether_addr(mgmt->da, sdata->vif.addr))
 		return;
 
 	baselen = (u8 *) mgmt->u.probe_resp.variable - (u8 *) mgmt;
diff --git a/net/mac80211/mesh_hwmp.c b/net/mac80211/mesh_hwmp.c
index b836892f0ba9..664f5cc2b273 100644
--- a/net/mac80211/mesh_hwmp.c
+++ b/net/mac80211/mesh_hwmp.c
@@ -128,9 +128,9 @@ static int mesh_path_sel_frame_tx(enum mpath_frame_type action, u8 flags,
 					  IEEE80211_STYPE_ACTION);
 
 	memcpy(mgmt->da, da, ETH_ALEN);
-	memcpy(mgmt->sa, sdata->dev->dev_addr, ETH_ALEN);
+	memcpy(mgmt->sa, sdata->vif.addr, ETH_ALEN);
 	/* BSSID == SA */
-	memcpy(mgmt->bssid, sdata->dev->dev_addr, ETH_ALEN);
+	memcpy(mgmt->bssid, sdata->vif.addr, ETH_ALEN);
 	mgmt->u.action.category = MESH_PATH_SEL_CATEGORY;
 	mgmt->u.action.u.mesh_action.action_code = MESH_PATH_SEL_ACTION;
 
@@ -222,7 +222,7 @@ int mesh_path_error_tx(u8 ttl, u8 *target, __le32 target_sn,
 					  IEEE80211_STYPE_ACTION);
 
 	memcpy(mgmt->da, ra, ETH_ALEN);
-	memcpy(mgmt->sa, sdata->dev->dev_addr, ETH_ALEN);
+	memcpy(mgmt->sa, sdata->vif.addr, ETH_ALEN);
 	/* BSSID is left zeroed, wildcard value */
 	mgmt->u.action.category = MESH_PATH_SEL_CATEGORY;
 	mgmt->u.action.u.mesh_action.action_code = MESH_PATH_SEL_ACTION;
@@ -374,7 +374,7 @@ static u32 hwmp_route_info_get(struct ieee80211_sub_if_data *sdata,
 		new_metric = MAX_METRIC;
 	exp_time = TU_TO_EXP_TIME(orig_lifetime);
 
-	if (memcmp(orig_addr, sdata->dev->dev_addr, ETH_ALEN) == 0) {
+	if (memcmp(orig_addr, sdata->vif.addr, ETH_ALEN) == 0) {
 		/* This MP is the originator, we are not interested in this
 		 * frame, except for updating transmitter's path info.
 		 */
@@ -486,7 +486,7 @@ static void hwmp_preq_frame_process(struct ieee80211_sub_if_data *sdata,
 
 	mhwmp_dbg("received PREQ from %pM\n", orig_addr);
 
-	if (memcmp(target_addr, sdata->dev->dev_addr, ETH_ALEN) == 0) {
+	if (memcmp(target_addr, sdata->vif.addr, ETH_ALEN) == 0) {
 		mhwmp_dbg("PREQ is for us\n");
 		forward = false;
 		reply = true;
@@ -579,7 +579,7 @@ static void hwmp_prep_frame_process(struct ieee80211_sub_if_data *sdata,
 	 * replies
 	 */
 	target_addr = PREP_IE_TARGET_ADDR(prep_elem);
-	if (memcmp(target_addr, sdata->dev->dev_addr, ETH_ALEN) == 0)
+	if (memcmp(target_addr, sdata->vif.addr, ETH_ALEN) == 0)
 		/* destination, no forwarding required */
 		return;
 
@@ -890,7 +890,7 @@ void mesh_path_start_discovery(struct ieee80211_sub_if_data *sdata)
 		target_flags = MP_F_RF;
 
 	spin_unlock_bh(&mpath->state_lock);
-	mesh_path_sel_frame_tx(MPATH_PREQ, 0, sdata->dev->dev_addr,
+	mesh_path_sel_frame_tx(MPATH_PREQ, 0, sdata->vif.addr,
 			cpu_to_le32(ifmsh->sn), target_flags, mpath->dst,
 			cpu_to_le32(mpath->sn), broadcast_addr, 0,
 			ttl, cpu_to_le32(lifetime), 0,
@@ -939,7 +939,7 @@ int mesh_nexthop_lookup(struct sk_buff *skb,
 		if (time_after(jiffies,
 			       mpath->exp_time +
 			       msecs_to_jiffies(sdata->u.mesh.mshcfg.path_refresh_time)) &&
-		    !memcmp(sdata->dev->dev_addr, hdr->addr4, ETH_ALEN) &&
+		    !memcmp(sdata->vif.addr, hdr->addr4, ETH_ALEN) &&
 		    !(mpath->flags & MESH_PATH_RESOLVING) &&
 		    !(mpath->flags & MESH_PATH_FIXED)) {
 			mesh_queue_preq(mpath,
@@ -1010,7 +1010,7 @@ mesh_path_tx_root_frame(struct ieee80211_sub_if_data *sdata)
 {
 	struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh;
 
-	mesh_path_sel_frame_tx(MPATH_RANN, 0, sdata->dev->dev_addr,
+	mesh_path_sel_frame_tx(MPATH_RANN, 0, sdata->vif.addr,
 			       cpu_to_le32(++ifmsh->sn),
 			       0, NULL, 0, broadcast_addr,
 			       0, MESH_TTL, 0, 0, 0, sdata);
diff --git a/net/mac80211/mesh_pathtbl.c b/net/mac80211/mesh_pathtbl.c
index a8da23905c70..fbef678f64c8 100644
--- a/net/mac80211/mesh_pathtbl.c
+++ b/net/mac80211/mesh_pathtbl.c
@@ -260,7 +260,7 @@ int mesh_path_add(u8 *dst, struct ieee80211_sub_if_data *sdata)
 	int err = 0;
 	u32 hash_idx;
 
-	if (memcmp(dst, sdata->dev->dev_addr, ETH_ALEN) == 0)
+	if (memcmp(dst, sdata->vif.addr, ETH_ALEN) == 0)
 		/* never add ourselves as neighbours */
 		return -ENOTSUPP;
 
@@ -377,7 +377,7 @@ int mpp_path_add(u8 *dst, u8 *mpp, struct ieee80211_sub_if_data *sdata)
 	int err = 0;
 	u32 hash_idx;
 
-	if (memcmp(dst, sdata->dev->dev_addr, ETH_ALEN) == 0)
+	if (memcmp(dst, sdata->vif.addr, ETH_ALEN) == 0)
 		/* never add ourselves as neighbours */
 		return -ENOTSUPP;
 
@@ -605,7 +605,7 @@ void mesh_path_discard_frame(struct sk_buff *skb,
 	struct mesh_path *mpath;
 	u32 sn = 0;
 
-	if (memcmp(hdr->addr4, sdata->dev->dev_addr, ETH_ALEN) != 0) {
+	if (memcmp(hdr->addr4, sdata->vif.addr, ETH_ALEN) != 0) {
 		u8 *ra, *da;
 
 		da = hdr->addr3;
diff --git a/net/mac80211/mesh_plink.c b/net/mac80211/mesh_plink.c
index 8fcf56ee7743..7985e5150898 100644
--- a/net/mac80211/mesh_plink.c
+++ b/net/mac80211/mesh_plink.c
@@ -169,7 +169,7 @@ static int mesh_plink_frame_tx(struct ieee80211_sub_if_data *sdata,
 	mgmt->frame_control = cpu_to_le16(IEEE80211_FTYPE_MGMT |
 					  IEEE80211_STYPE_ACTION);
 	memcpy(mgmt->da, da, ETH_ALEN);
-	memcpy(mgmt->sa, sdata->dev->dev_addr, ETH_ALEN);
+	memcpy(mgmt->sa, sdata->vif.addr, ETH_ALEN);
 	/* BSSID is left zeroed, wildcard value */
 	mgmt->u.action.category = MESH_PLINK_CATEGORY;
 	mgmt->u.action.u.plink_action.action_code = action;
diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index c16667a7c8df..cd5dcc3d8c2b 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -248,7 +248,7 @@ static void ieee80211_send_assoc(struct ieee80211_sub_if_data *sdata,
 			    wk->ssid_len);
 	if (!skb) {
 		printk(KERN_DEBUG "%s: failed to allocate buffer for assoc "
-		       "frame\n", sdata->dev->name);
+		       "frame\n", sdata->name);
 		return;
 	}
 	skb_reserve(skb, local->hw.extra_tx_headroom);
@@ -282,7 +282,7 @@ static void ieee80211_send_assoc(struct ieee80211_sub_if_data *sdata,
 	mgmt = (struct ieee80211_mgmt *) skb_put(skb, 24);
 	memset(mgmt, 0, 24);
 	memcpy(mgmt->da, wk->bss->cbss.bssid, ETH_ALEN);
-	memcpy(mgmt->sa, sdata->dev->dev_addr, ETH_ALEN);
+	memcpy(mgmt->sa, sdata->vif.addr, ETH_ALEN);
 	memcpy(mgmt->bssid, wk->bss->cbss.bssid, ETH_ALEN);
 
 	if (!is_zero_ether_addr(wk->prev_bssid)) {
@@ -443,7 +443,7 @@ static void ieee80211_send_deauth_disassoc(struct ieee80211_sub_if_data *sdata,
 	skb = dev_alloc_skb(local->hw.extra_tx_headroom + sizeof(*mgmt));
 	if (!skb) {
 		printk(KERN_DEBUG "%s: failed to allocate buffer for "
-		       "deauth/disassoc frame\n", sdata->dev->name);
+		       "deauth/disassoc frame\n", sdata->name);
 		return;
 	}
 	skb_reserve(skb, local->hw.extra_tx_headroom);
@@ -451,7 +451,7 @@ static void ieee80211_send_deauth_disassoc(struct ieee80211_sub_if_data *sdata,
 	mgmt = (struct ieee80211_mgmt *) skb_put(skb, 24);
 	memset(mgmt, 0, 24);
 	memcpy(mgmt->da, bssid, ETH_ALEN);
-	memcpy(mgmt->sa, sdata->dev->dev_addr, ETH_ALEN);
+	memcpy(mgmt->sa, sdata->vif.addr, ETH_ALEN);
 	memcpy(mgmt->bssid, bssid, ETH_ALEN);
 	mgmt->frame_control = cpu_to_le16(IEEE80211_FTYPE_MGMT | stype);
 	skb_put(skb, 2);
@@ -484,7 +484,7 @@ void ieee80211_send_pspoll(struct ieee80211_local *local,
 	skb = dev_alloc_skb(local->hw.extra_tx_headroom + sizeof(*pspoll));
 	if (!skb) {
 		printk(KERN_DEBUG "%s: failed to allocate buffer for "
-		       "pspoll frame\n", sdata->dev->name);
+		       "pspoll frame\n", sdata->name);
 		return;
 	}
 	skb_reserve(skb, local->hw.extra_tx_headroom);
@@ -499,7 +499,7 @@ void ieee80211_send_pspoll(struct ieee80211_local *local,
 	pspoll->aid |= cpu_to_le16(1 << 15 | 1 << 14);
 
 	memcpy(pspoll->bssid, ifmgd->bssid, ETH_ALEN);
-	memcpy(pspoll->ta, sdata->dev->dev_addr, ETH_ALEN);
+	memcpy(pspoll->ta, sdata->vif.addr, ETH_ALEN);
 
 	IEEE80211_SKB_CB(skb)->flags |= IEEE80211_TX_INTFL_DONT_ENCRYPT;
 	ieee80211_tx_skb(sdata, skb);
@@ -519,7 +519,7 @@ void ieee80211_send_nullfunc(struct ieee80211_local *local,
 	skb = dev_alloc_skb(local->hw.extra_tx_headroom + 24);
 	if (!skb) {
 		printk(KERN_DEBUG "%s: failed to allocate buffer for nullfunc "
-		       "frame\n", sdata->dev->name);
+		       "frame\n", sdata->name);
 		return;
 	}
 	skb_reserve(skb, local->hw.extra_tx_headroom);
@@ -532,7 +532,7 @@ void ieee80211_send_nullfunc(struct ieee80211_local *local,
 		fc |= cpu_to_le16(IEEE80211_FCTL_PM);
 	nullfunc->frame_control = fc;
 	memcpy(nullfunc->addr1, sdata->u.mgd.bssid, ETH_ALEN);
-	memcpy(nullfunc->addr2, sdata->dev->dev_addr, ETH_ALEN);
+	memcpy(nullfunc->addr2, sdata->vif.addr, ETH_ALEN);
 	memcpy(nullfunc->addr3, sdata->u.mgd.bssid, ETH_ALEN);
 
 	IEEE80211_SKB_CB(skb)->flags |= IEEE80211_TX_INTFL_DONT_ENCRYPT;
@@ -948,7 +948,7 @@ ieee80211_direct_probe(struct ieee80211_sub_if_data *sdata,
 	wk->tries++;
 	if (wk->tries > IEEE80211_AUTH_MAX_TRIES) {
 		printk(KERN_DEBUG "%s: direct probe to AP %pM timed out\n",
-		       sdata->dev->name, wk->bss->cbss.bssid);
+		       sdata->name, wk->bss->cbss.bssid);
 
 		/*
 		 * Most likely AP is not in the range so remove the
@@ -966,7 +966,7 @@ ieee80211_direct_probe(struct ieee80211_sub_if_data *sdata,
 	}
 
 	printk(KERN_DEBUG "%s: direct probe to AP %pM (try %d)\n",
-			sdata->dev->name, wk->bss->cbss.bssid,
+			sdata->name, wk->bss->cbss.bssid,
 			wk->tries);
 
 	/*
@@ -993,7 +993,7 @@ ieee80211_authenticate(struct ieee80211_sub_if_data *sdata,
 	if (wk->tries > IEEE80211_AUTH_MAX_TRIES) {
 		printk(KERN_DEBUG "%s: authentication with AP %pM"
 		       " timed out\n",
-		       sdata->dev->name, wk->bss->cbss.bssid);
+		       sdata->name, wk->bss->cbss.bssid);
 
 		/*
 		 * Most likely AP is not in the range so remove the
@@ -1011,7 +1011,7 @@ ieee80211_authenticate(struct ieee80211_sub_if_data *sdata,
 	}
 
 	printk(KERN_DEBUG "%s: authenticate with AP %pM (try %d)\n",
-	       sdata->dev->name, wk->bss->cbss.bssid, wk->tries);
+	       sdata->name, wk->bss->cbss.bssid, wk->tries);
 
 	ieee80211_send_auth(sdata, 1, wk->auth_alg, wk->ie, wk->ie_len,
 			    wk->bss->cbss.bssid, NULL, 0, 0);
@@ -1133,7 +1133,7 @@ ieee80211_associate(struct ieee80211_sub_if_data *sdata,
 	if (wk->tries > IEEE80211_ASSOC_MAX_TRIES) {
 		printk(KERN_DEBUG "%s: association with AP %pM"
 		       " timed out\n",
-		       sdata->dev->name, wk->bss->cbss.bssid);
+		       sdata->name, wk->bss->cbss.bssid);
 
 		/*
 		 * Most likely AP is not in the range so remove the
@@ -1151,7 +1151,7 @@ ieee80211_associate(struct ieee80211_sub_if_data *sdata,
 	}
 
 	printk(KERN_DEBUG "%s: associate with AP %pM (try %d)\n",
-	       sdata->dev->name, wk->bss->cbss.bssid, wk->tries);
+	       sdata->name, wk->bss->cbss.bssid, wk->tries);
 	ieee80211_send_assoc(sdata, wk);
 
 	wk->timeout = jiffies + IEEE80211_ASSOC_TIMEOUT;
@@ -1212,7 +1212,7 @@ static void ieee80211_mgd_probe_ap(struct ieee80211_sub_if_data *sdata,
 #ifdef CONFIG_MAC80211_VERBOSE_DEBUG
 	if (beacon && net_ratelimit())
 		printk(KERN_DEBUG "%s: detected beacon loss from AP "
-		       "- sending probe request\n", sdata->dev->name);
+		       "- sending probe request\n", sdata->name);
 #endif
 
 	/*
@@ -1269,7 +1269,7 @@ static void ieee80211_auth_completed(struct ieee80211_sub_if_data *sdata,
 				     struct ieee80211_mgd_work *wk)
 {
 	wk->state = IEEE80211_MGD_STATE_IDLE;
-	printk(KERN_DEBUG "%s: authenticated\n", sdata->dev->name);
+	printk(KERN_DEBUG "%s: authenticated\n", sdata->name);
 }
 
 
@@ -1366,7 +1366,7 @@ ieee80211_rx_mgmt_deauth(struct ieee80211_sub_if_data *sdata,
 	reason_code = le16_to_cpu(mgmt->u.deauth.reason_code);
 
 	printk(KERN_DEBUG "%s: deauthenticated from %pM (Reason: %u)\n",
-			sdata->dev->name, bssid, reason_code);
+			sdata->name, bssid, reason_code);
 
 	if (!wk) {
 		ieee80211_set_disassoc(sdata, true);
@@ -1400,7 +1400,7 @@ ieee80211_rx_mgmt_disassoc(struct ieee80211_sub_if_data *sdata,
 	reason_code = le16_to_cpu(mgmt->u.disassoc.reason_code);
 
 	printk(KERN_DEBUG "%s: disassociated from %pM (Reason: %u)\n",
-			sdata->dev->name, mgmt->sa, reason_code);
+			sdata->name, mgmt->sa, reason_code);
 
 	ieee80211_set_disassoc(sdata, false);
 	return RX_MGMT_CFG80211_DISASSOC;
@@ -1444,7 +1444,7 @@ ieee80211_rx_mgmt_assoc_resp(struct ieee80211_sub_if_data *sdata,
 
 	printk(KERN_DEBUG "%s: RX %sssocResp from %pM (capab=0x%x "
 	       "status=%d aid=%d)\n",
-	       sdata->dev->name, reassoc ? "Rea" : "A", mgmt->sa,
+	       sdata->name, reassoc ? "Rea" : "A", mgmt->sa,
 	       capab_info, status_code, (u16)(aid & ~(BIT(15) | BIT(14))));
 
 	pos = mgmt->u.assoc_resp.variable;
@@ -1458,7 +1458,7 @@ ieee80211_rx_mgmt_assoc_resp(struct ieee80211_sub_if_data *sdata,
 		ms = tu * 1024 / 1000;
 		printk(KERN_DEBUG "%s: AP rejected association temporarily; "
 		       "comeback duration %u TU (%u ms)\n",
-		       sdata->dev->name, tu, ms);
+		       sdata->name, tu, ms);
 		wk->timeout = jiffies + msecs_to_jiffies(ms);
 		if (ms > IEEE80211_ASSOC_TIMEOUT)
 			run_again(ifmgd, jiffies + msecs_to_jiffies(ms));
@@ -1467,23 +1467,23 @@ ieee80211_rx_mgmt_assoc_resp(struct ieee80211_sub_if_data *sdata,
 
 	if (status_code != WLAN_STATUS_SUCCESS) {
 		printk(KERN_DEBUG "%s: AP denied association (code=%d)\n",
-		       sdata->dev->name, status_code);
+		       sdata->name, status_code);
 		wk->state = IEEE80211_MGD_STATE_IDLE;
 		return RX_MGMT_CFG80211_ASSOC;
 	}
 
 	if ((aid & (BIT(15) | BIT(14))) != (BIT(15) | BIT(14)))
 		printk(KERN_DEBUG "%s: invalid aid value %d; bits 15:14 not "
-		       "set\n", sdata->dev->name, aid);
+		       "set\n", sdata->name, aid);
 	aid &= ~(BIT(15) | BIT(14));
 
 	if (!elems.supp_rates) {
 		printk(KERN_DEBUG "%s: no SuppRates element in AssocResp\n",
-		       sdata->dev->name);
+		       sdata->name);
 		return RX_MGMT_NONE;
 	}
 
-	printk(KERN_DEBUG "%s: associated\n", sdata->dev->name);
+	printk(KERN_DEBUG "%s: associated\n", sdata->name);
 	ifmgd->aid = aid;
 
 	rcu_read_lock();
@@ -1498,7 +1498,7 @@ ieee80211_rx_mgmt_assoc_resp(struct ieee80211_sub_if_data *sdata,
 		sta = sta_info_alloc(sdata, wk->bss->cbss.bssid, GFP_KERNEL);
 		if (!sta) {
 			printk(KERN_DEBUG "%s: failed to alloc STA entry for"
-			       " the AP\n", sdata->dev->name);
+			       " the AP\n", sdata->name);
 			return RX_MGMT_NONE;
 		}
 
@@ -1576,7 +1576,7 @@ ieee80211_rx_mgmt_assoc_resp(struct ieee80211_sub_if_data *sdata,
 		int err = sta_info_insert(sta);
 		if (err) {
 			printk(KERN_DEBUG "%s: failed to insert STA entry for"
-			       " the AP (error %d)\n", sdata->dev->name, err);
+			       " the AP (error %d)\n", sdata->name, err);
 			rcu_read_unlock();
 			return RX_MGMT_NONE;
 		}
@@ -1671,7 +1671,7 @@ static void ieee80211_rx_mgmt_probe_resp(struct ieee80211_sub_if_data *sdata,
 
 	ASSERT_MGD_MTX(ifmgd);
 
-	if (memcmp(mgmt->da, sdata->dev->dev_addr, ETH_ALEN))
+	if (memcmp(mgmt->da, sdata->vif.addr, ETH_ALEN))
 		return; /* ignore ProbeResp to foreign address */
 
 	baselen = (u8 *) mgmt->u.probe_resp.variable - (u8 *) mgmt;
@@ -1686,7 +1686,7 @@ static void ieee80211_rx_mgmt_probe_resp(struct ieee80211_sub_if_data *sdata,
 	/* direct probe may be part of the association flow */
 	if (wk && wk->state == IEEE80211_MGD_STATE_PROBE) {
 		printk(KERN_DEBUG "%s: direct probe responded\n",
-		       sdata->dev->name);
+		       sdata->name);
 		wk->tries = 0;
 		wk->state = IEEE80211_MGD_STATE_AUTH;
 		WARN_ON(ieee80211_authenticate(sdata, wk) != RX_MGMT_NONE);
@@ -1779,7 +1779,7 @@ static void ieee80211_rx_mgmt_beacon(struct ieee80211_sub_if_data *sdata,
 #ifdef CONFIG_MAC80211_VERBOSE_DEBUG
 		if (net_ratelimit()) {
 			printk(KERN_DEBUG "%s: cancelling probereq poll due "
-			       "to a received beacon\n", sdata->dev->name);
+			       "to a received beacon\n", sdata->name);
 		}
 #endif
 		ifmgd->flags &= ~IEEE80211_STA_BEACON_POLL;
@@ -2554,7 +2554,7 @@ int ieee80211_mgd_deauth(struct ieee80211_sub_if_data *sdata,
 	mutex_unlock(&ifmgd->mtx);
 
 	printk(KERN_DEBUG "%s: deauthenticating from %pM by local choice (reason=%d)\n",
-	       sdata->dev->name, bssid, req->reason_code);
+	       sdata->name, bssid, req->reason_code);
 
 	ieee80211_send_deauth_disassoc(sdata, bssid,
 			IEEE80211_STYPE_DEAUTH, req->reason_code,
@@ -2583,7 +2583,7 @@ int ieee80211_mgd_disassoc(struct ieee80211_sub_if_data *sdata,
 	}
 
 	printk(KERN_DEBUG "%s: disassociating from %pM by local choice (reason=%d)\n",
-	       sdata->dev->name, req->bss->bssid, req->reason_code);
+	       sdata->name, req->bss->bssid, req->reason_code);
 
 	ieee80211_set_disassoc(sdata, false);
 
diff --git a/net/mac80211/pm.c b/net/mac80211/pm.c
index e535f1c988fe..05ccaadb6a0e 100644
--- a/net/mac80211/pm.c
+++ b/net/mac80211/pm.c
@@ -102,7 +102,7 @@ int __ieee80211_suspend(struct ieee80211_hw *hw)
 
 		conf.vif = &sdata->vif;
 		conf.type = sdata->vif.type;
-		conf.mac_addr = sdata->dev->dev_addr;
+		conf.mac_addr = sdata->vif.addr;
 		drv_remove_interface(local, &conf);
 	}
 
diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c
index 5bae28693da8..d0136e3da487 100644
--- a/net/mac80211/rx.c
+++ b/net/mac80211/rx.c
@@ -283,15 +283,15 @@ ieee80211_rx_monitor(struct ieee80211_local *local, struct sk_buff *origskb,
 	skb->protocol = htons(ETH_P_802_2);
 
 	list_for_each_entry_rcu(sdata, &local->interfaces, list) {
-		if (!netif_running(sdata->dev))
-			continue;
-
 		if (sdata->vif.type != NL80211_IFTYPE_MONITOR)
 			continue;
 
 		if (sdata->u.mntr_flags & MONITOR_FLAG_COOK_FRAMES)
 			continue;
 
+		if (!netif_running(sdata->dev))
+			continue;
+
 		if (prev_dev) {
 			skb2 = skb_clone(skb, GFP_ATOMIC);
 			if (skb2) {
@@ -476,7 +476,7 @@ ieee80211_rx_mesh_check(struct ieee80211_rx_data *rx)
 {
 	struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)rx->skb->data;
 	unsigned int hdrlen = ieee80211_hdrlen(hdr->frame_control);
-	char *dev_addr = rx->sdata->dev->dev_addr;
+	char *dev_addr = rx->sdata->vif.addr;
 
 	if (ieee80211_is_data(hdr->frame_control)) {
 		if (is_multicast_ether_addr(hdr->addr1)) {
@@ -1024,7 +1024,7 @@ static void ap_sta_ps_start(struct sta_info *sta)
 	drv_sta_notify(local, &sdata->vif, STA_NOTIFY_SLEEP, &sta->sta);
 #ifdef CONFIG_MAC80211_VERBOSE_PS_DEBUG
 	printk(KERN_DEBUG "%s: STA %pM aid %d enters power save mode\n",
-	       sdata->dev->name, sta->sta.addr, sta->sta.aid);
+	       sdata->name, sta->sta.addr, sta->sta.aid);
 #endif /* CONFIG_MAC80211_VERBOSE_PS_DEBUG */
 }
 
@@ -1038,13 +1038,13 @@ static void ap_sta_ps_end(struct sta_info *sta)
 
 #ifdef CONFIG_MAC80211_VERBOSE_PS_DEBUG
 	printk(KERN_DEBUG "%s: STA %pM aid %d exits power save mode\n",
-	       sdata->dev->name, sta->sta.addr, sta->sta.aid);
+	       sdata->name, sta->sta.addr, sta->sta.aid);
 #endif /* CONFIG_MAC80211_VERBOSE_PS_DEBUG */
 
 	if (test_sta_flags(sta, WLAN_STA_PS_DRIVER)) {
 #ifdef CONFIG_MAC80211_VERBOSE_PS_DEBUG
 		printk(KERN_DEBUG "%s: STA %pM aid %d driver-ps-blocked\n",
-		       sdata->dev->name, sta->sta.addr, sta->sta.aid);
+		       sdata->name, sta->sta.addr, sta->sta.aid);
 #endif /* CONFIG_MAC80211_VERBOSE_PS_DEBUG */
 		return;
 	}
@@ -1156,7 +1156,7 @@ ieee80211_reassemble_add(struct ieee80211_sub_if_data *sdata,
 		printk(KERN_DEBUG "%s: RX reassembly removed oldest "
 		       "fragment entry (idx=%d age=%lu seq=%d last_frag=%d "
 		       "addr1=%pM addr2=%pM\n",
-		       sdata->dev->name, idx,
+		       sdata->name, idx,
 		       jiffies - entry->first_frag_time, entry->seq,
 		       entry->last_frag, hdr->addr1, hdr->addr2);
 #endif
@@ -1424,7 +1424,6 @@ static int
 __ieee80211_data_to_8023(struct ieee80211_rx_data *rx)
 {
 	struct ieee80211_sub_if_data *sdata = rx->sdata;
-	struct net_device *dev = sdata->dev;
 	struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)rx->skb->data;
 
 	if (ieee80211_has_a4(hdr->frame_control) &&
@@ -1436,7 +1435,7 @@ __ieee80211_data_to_8023(struct ieee80211_rx_data *rx)
 	     (sdata->vif.type == NL80211_IFTYPE_STATION && sdata->u.mgd.use_4addr)))
 		return -1;
 
-	return ieee80211_data_to_8023(rx->skb, dev->dev_addr, sdata->vif.type);
+	return ieee80211_data_to_8023(rx->skb, sdata->vif.addr, sdata->vif.type);
 }
 
 /*
@@ -1453,7 +1452,7 @@ static bool ieee80211_frame_allowed(struct ieee80211_rx_data *rx, __le16 fc)
 	 * of whether the frame was encrypted or not.
 	 */
 	if (ehdr->h_proto == htons(ETH_P_PAE) &&
-	    (compare_ether_addr(ehdr->h_dest, rx->sdata->dev->dev_addr) == 0 ||
+	    (compare_ether_addr(ehdr->h_dest, rx->sdata->vif.addr) == 0 ||
 	     compare_ether_addr(ehdr->h_dest, pae_group_addr) == 0))
 		return true;
 
@@ -1721,7 +1720,7 @@ ieee80211_rx_h_mesh_fwding(struct ieee80211_rx_data *rx)
 
 	/* Frame has reached destination.  Don't forward */
 	if (!is_multicast_ether_addr(hdr->addr1) &&
-	    compare_ether_addr(sdata->dev->dev_addr, hdr->addr3) == 0)
+	    compare_ether_addr(sdata->vif.addr, hdr->addr3) == 0)
 		return RX_CONTINUE;
 
 	mesh_hdr->ttl--;
@@ -1738,10 +1737,10 @@ ieee80211_rx_h_mesh_fwding(struct ieee80211_rx_data *rx)
 
 			if (!fwd_skb && net_ratelimit())
 				printk(KERN_DEBUG "%s: failed to clone mesh frame\n",
-						   sdata->dev->name);
+						   sdata->name);
 
 			fwd_hdr =  (struct ieee80211_hdr *) fwd_skb->data;
-			memcpy(fwd_hdr->addr2, sdata->dev->dev_addr, ETH_ALEN);
+			memcpy(fwd_hdr->addr2, sdata->vif.addr, ETH_ALEN);
 			info = IEEE80211_SKB_CB(fwd_skb);
 			memset(info, 0, sizeof(*info));
 			info->flags |= IEEE80211_TX_INTFL_NEED_TXPROCESSING;
@@ -1870,7 +1869,7 @@ static void ieee80211_process_sa_query_req(struct ieee80211_sub_if_data *sdata,
 	struct sk_buff *skb;
 	struct ieee80211_mgmt *resp;
 
-	if (compare_ether_addr(mgmt->da, sdata->dev->dev_addr) != 0) {
+	if (compare_ether_addr(mgmt->da, sdata->vif.addr) != 0) {
 		/* Not to own unicast address */
 		return;
 	}
@@ -1894,7 +1893,7 @@ static void ieee80211_process_sa_query_req(struct ieee80211_sub_if_data *sdata,
 	resp = (struct ieee80211_mgmt *) skb_put(skb, 24);
 	memset(resp, 0, 24);
 	memcpy(resp->da, mgmt->sa, ETH_ALEN);
-	memcpy(resp->sa, sdata->dev->dev_addr, ETH_ALEN);
+	memcpy(resp->sa, sdata->vif.addr, ETH_ALEN);
 	memcpy(resp->bssid, sdata->u.mgd.bssid, ETH_ALEN);
 	resp->frame_control = cpu_to_le16(IEEE80211_FTYPE_MGMT |
 					  IEEE80211_STYPE_ACTION);
@@ -2274,7 +2273,7 @@ static int prepare_for_handlers(struct ieee80211_sub_if_data *sdata,
 		if (!bssid && !sdata->u.mgd.use_4addr)
 			return 0;
 		if (!multicast &&
-		    compare_ether_addr(sdata->dev->dev_addr, hdr->addr1) != 0) {
+		    compare_ether_addr(sdata->vif.addr, hdr->addr1) != 0) {
 			if (!(sdata->dev->flags & IFF_PROMISC))
 				return 0;
 			rx->flags &= ~IEEE80211_RX_RA_MATCH;
@@ -2291,7 +2290,7 @@ static int prepare_for_handlers(struct ieee80211_sub_if_data *sdata,
 				return 0;
 			rx->flags &= ~IEEE80211_RX_RA_MATCH;
 		} else if (!multicast &&
-			   compare_ether_addr(sdata->dev->dev_addr,
+			   compare_ether_addr(sdata->vif.addr,
 					      hdr->addr1) != 0) {
 			if (!(sdata->dev->flags & IFF_PROMISC))
 				return 0;
@@ -2308,7 +2307,7 @@ static int prepare_for_handlers(struct ieee80211_sub_if_data *sdata,
 		break;
 	case NL80211_IFTYPE_MESH_POINT:
 		if (!multicast &&
-		    compare_ether_addr(sdata->dev->dev_addr,
+		    compare_ether_addr(sdata->vif.addr,
 				       hdr->addr1) != 0) {
 			if (!(sdata->dev->flags & IFF_PROMISC))
 				return 0;
@@ -2319,11 +2318,11 @@ static int prepare_for_handlers(struct ieee80211_sub_if_data *sdata,
 	case NL80211_IFTYPE_AP_VLAN:
 	case NL80211_IFTYPE_AP:
 		if (!bssid) {
-			if (compare_ether_addr(sdata->dev->dev_addr,
+			if (compare_ether_addr(sdata->vif.addr,
 					       hdr->addr1))
 				return 0;
 		} else if (!ieee80211_bssid_match(bssid,
-					sdata->dev->dev_addr)) {
+					sdata->vif.addr)) {
 			if (!(rx->flags & IEEE80211_RX_IN_SCAN))
 				return 0;
 			rx->flags &= ~IEEE80211_RX_RA_MATCH;
@@ -2444,7 +2443,7 @@ static void __ieee80211_rx_handle_packet(struct ieee80211_hw *hw,
 					printk(KERN_DEBUG "%s: failed to copy "
 					       "multicast frame for %s\n",
 					       wiphy_name(local->hw.wiphy),
-					       prev->dev->name);
+					       prev->name);
 				continue;
 			}
 			ieee80211_invoke_rx_handlers(prev, &rx, skb_new, rate);
diff --git a/net/mac80211/scan.c b/net/mac80211/scan.c
index 4cf387c944bf..d3381ba5f457 100644
--- a/net/mac80211/scan.c
+++ b/net/mac80211/scan.c
@@ -147,7 +147,7 @@ ieee80211_scan_rx(struct ieee80211_sub_if_data *sdata, struct sk_buff *skb)
 	presp = ieee80211_is_probe_resp(fc);
 	if (presp) {
 		/* ignore ProbeResp to foreign address */
-		if (memcmp(mgmt->da, sdata->dev->dev_addr, ETH_ALEN))
+		if (memcmp(mgmt->da, sdata->vif.addr, ETH_ALEN))
 			return RX_DROP_MONITOR;
 
 		presp = true;
diff --git a/net/mac80211/spectmgmt.c b/net/mac80211/spectmgmt.c
index aa743a895cf9..7733f66ee2c4 100644
--- a/net/mac80211/spectmgmt.c
+++ b/net/mac80211/spectmgmt.c
@@ -35,7 +35,7 @@ static void ieee80211_send_refuse_measurement_request(struct ieee80211_sub_if_da
 
 	if (!skb) {
 		printk(KERN_ERR "%s: failed to allocate buffer for "
-				"measurement report frame\n", sdata->dev->name);
+				"measurement report frame\n", sdata->name);
 		return;
 	}
 
@@ -43,7 +43,7 @@ static void ieee80211_send_refuse_measurement_request(struct ieee80211_sub_if_da
 	msr_report = (struct ieee80211_mgmt *)skb_put(skb, 24);
 	memset(msr_report, 0, 24);
 	memcpy(msr_report->da, da, ETH_ALEN);
-	memcpy(msr_report->sa, sdata->dev->dev_addr, ETH_ALEN);
+	memcpy(msr_report->sa, sdata->vif.addr, ETH_ALEN);
 	memcpy(msr_report->bssid, bssid, ETH_ALEN);
 	msr_report->frame_control = cpu_to_le16(IEEE80211_FTYPE_MGMT |
 						IEEE80211_STYPE_ACTION);
diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c
index c58a23eea58c..294f1b8b1545 100644
--- a/net/mac80211/sta_info.c
+++ b/net/mac80211/sta_info.c
@@ -372,7 +372,7 @@ int sta_info_insert(struct sta_info *sta)
 		goto out_free;
 	}
 
-	if (WARN_ON(compare_ether_addr(sta->sta.addr, sdata->dev->dev_addr) == 0 ||
+	if (WARN_ON(compare_ether_addr(sta->sta.addr, sdata->vif.addr) == 0 ||
 		    is_multicast_ether_addr(sta->sta.addr))) {
 		err = -EINVAL;
 		goto out_free;
@@ -831,7 +831,7 @@ void ieee80211_sta_expire(struct ieee80211_sub_if_data *sdata,
 		if (time_after(jiffies, sta->last_rx + exp_time)) {
 #ifdef CONFIG_MAC80211_IBSS_DEBUG
 			printk(KERN_DEBUG "%s: expiring inactive STA %pM\n",
-			       sdata->dev->name, sta->sta.addr);
+			       sdata->name, sta->sta.addr);
 #endif
 			__sta_info_unlink(&sta);
 			if (sta)
@@ -889,7 +889,7 @@ void ieee80211_sta_ps_deliver_wakeup(struct sta_info *sta)
 
 #ifdef CONFIG_MAC80211_VERBOSE_PS_DEBUG
 	printk(KERN_DEBUG "%s: STA %pM aid %d sending %d filtered/%d PS frames "
-	       "since STA not sleeping anymore\n", sdata->dev->name,
+	       "since STA not sleeping anymore\n", sdata->name,
 	       sta->sta.addr, sta->sta.aid, sent - buffered, buffered);
 #endif /* CONFIG_MAC80211_VERBOSE_PS_DEBUG */
 }
@@ -948,7 +948,7 @@ void ieee80211_sta_ps_deliver_poll_response(struct sta_info *sta)
 		 */
 		printk(KERN_DEBUG "%s: STA %pM sent PS Poll even "
 		       "though there are no buffered frames for it\n",
-		       sdata->dev->name, sta->sta.addr);
+		       sdata->name, sta->sta.addr);
 #endif /* CONFIG_MAC80211_VERBOSE_PS_DEBUG */
 	}
 }
diff --git a/net/mac80211/status.c b/net/mac80211/status.c
index 32fe327acf4e..b4608f11a40f 100644
--- a/net/mac80211/status.c
+++ b/net/mac80211/status.c
@@ -168,7 +168,7 @@ void ieee80211_tx_status(struct ieee80211_hw *hw, struct sk_buff *skb)
 
 	for_each_sta_info(local, hdr->addr1, sta, tmp) {
 		/* skip wrong virtual interface */
-		if (memcmp(hdr->addr2, sta->sdata->dev->dev_addr, ETH_ALEN))
+		if (memcmp(hdr->addr2, sta->sdata->vif.addr, ETH_ALEN))
 			continue;
 
 		if (!(info->flags & IEEE80211_TX_STAT_ACK) &&
diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
index b913bfc34a9f..ac48c86ae6b3 100644
--- a/net/mac80211/tx.c
+++ b/net/mac80211/tx.c
@@ -223,7 +223,7 @@ ieee80211_tx_h_check_assoc(struct ieee80211_tx_data *tx)
 #ifdef CONFIG_MAC80211_VERBOSE_DEBUG
 			printk(KERN_DEBUG "%s: dropped data frame to not "
 			       "associated station %pM\n",
-			       tx->dev->name, hdr->addr1);
+			       tx->sdata->name, hdr->addr1);
 #endif /* CONFIG_MAC80211_VERBOSE_DEBUG */
 			I802_DEBUG_INC(tx->local->tx_handlers_drop_not_assoc);
 			return TX_DROP;
@@ -331,7 +331,7 @@ ieee80211_tx_h_multicast_ps_buf(struct ieee80211_tx_data *tx)
 #ifdef CONFIG_MAC80211_VERBOSE_PS_DEBUG
 		if (net_ratelimit())
 			printk(KERN_DEBUG "%s: BC TX buffer full - dropping the oldest frame\n",
-			       tx->dev->name);
+			       tx->sdata->name);
 #endif
 		dev_kfree_skb(skb_dequeue(&tx->sdata->bss->ps_bc_buf));
 	} else
@@ -391,7 +391,7 @@ ieee80211_tx_h_unicast_ps_buf(struct ieee80211_tx_data *tx)
 			if (net_ratelimit()) {
 				printk(KERN_DEBUG "%s: STA %pM TX "
 				       "buffer full - dropping oldest frame\n",
-				       tx->dev->name, sta->sta.addr);
+				       tx->sdata->name, sta->sta.addr);
 			}
 #endif
 			dev_kfree_skb(old);
@@ -416,7 +416,7 @@ ieee80211_tx_h_unicast_ps_buf(struct ieee80211_tx_data *tx)
 #ifdef CONFIG_MAC80211_VERBOSE_PS_DEBUG
 	else if (unlikely(staflags & WLAN_STA_PS_STA)) {
 		printk(KERN_DEBUG "%s: STA %pM in PS mode, but pspoll "
-		       "set -> send frame\n", tx->dev->name,
+		       "set -> send frame\n", tx->sdata->name,
 		       sta->sta.addr);
 	}
 #endif /* CONFIG_MAC80211_VERBOSE_PS_DEBUG */
@@ -549,7 +549,7 @@ ieee80211_tx_h_rate_ctrl(struct ieee80211_tx_data *tx)
 		 "%s: Dropped data frame as no usable bitrate found while "
 		 "scanning and associated. Target station: "
 		 "%pM on %d GHz band\n",
-		 tx->dev->name, hdr->addr1,
+		 tx->sdata->name, hdr->addr1,
 		 tx->channel->band ? 5 : 2))
 		return TX_DROP;
 
@@ -1021,7 +1021,6 @@ ieee80211_tx_prepare(struct ieee80211_sub_if_data *sdata,
 
 	memset(tx, 0, sizeof(*tx));
 	tx->skb = skb;
-	tx->dev = sdata->dev; /* use original interface */
 	tx->local = local;
 	tx->sdata = sdata;
 	tx->channel = local->hw.conf.channel;
@@ -1474,7 +1473,7 @@ static void ieee80211_xmit(struct ieee80211_sub_if_data *sdata,
 					continue;
 				if (tmp_sdata->vif.type != NL80211_IFTYPE_AP)
 					continue;
-				if (compare_ether_addr(tmp_sdata->dev->dev_addr,
+				if (compare_ether_addr(tmp_sdata->vif.addr,
 						       hdr->addr2) == 0) {
 					sdata = tmp_sdata;
 					break;
@@ -1638,7 +1637,7 @@ netdev_tx_t ieee80211_subif_start_xmit(struct sk_buff *skb,
 			fc |= cpu_to_le16(IEEE80211_FCTL_FROMDS | IEEE80211_FCTL_TODS);
 			/* RA TA DA SA */
 			memcpy(hdr.addr1, sta->sta.addr, ETH_ALEN);
-			memcpy(hdr.addr2, dev->dev_addr, ETH_ALEN);
+			memcpy(hdr.addr2, sdata->vif.addr, ETH_ALEN);
 			memcpy(hdr.addr3, skb->data, ETH_ALEN);
 			memcpy(hdr.addr4, skb->data + ETH_ALEN, ETH_ALEN);
 			hdrlen = 30;
@@ -1652,7 +1651,7 @@ netdev_tx_t ieee80211_subif_start_xmit(struct sk_buff *skb,
 		fc |= cpu_to_le16(IEEE80211_FCTL_FROMDS);
 		/* DA BSSID SA */
 		memcpy(hdr.addr1, skb->data, ETH_ALEN);
-		memcpy(hdr.addr2, dev->dev_addr, ETH_ALEN);
+		memcpy(hdr.addr2, sdata->vif.addr, ETH_ALEN);
 		memcpy(hdr.addr3, skb->data + ETH_ALEN, ETH_ALEN);
 		hdrlen = 24;
 		break;
@@ -1660,7 +1659,7 @@ netdev_tx_t ieee80211_subif_start_xmit(struct sk_buff *skb,
 		fc |= cpu_to_le16(IEEE80211_FCTL_FROMDS | IEEE80211_FCTL_TODS);
 		/* RA TA DA SA */
 		memcpy(hdr.addr1, sdata->u.wds.remote_addr, ETH_ALEN);
-		memcpy(hdr.addr2, dev->dev_addr, ETH_ALEN);
+		memcpy(hdr.addr2, sdata->vif.addr, ETH_ALEN);
 		memcpy(hdr.addr3, skb->data, ETH_ALEN);
 		memcpy(hdr.addr4, skb->data + ETH_ALEN, ETH_ALEN);
 		hdrlen = 30;
@@ -1674,8 +1673,8 @@ netdev_tx_t ieee80211_subif_start_xmit(struct sk_buff *skb,
 			goto fail;
 		}
 
-		if (compare_ether_addr(dev->dev_addr,
-					  skb->data + ETH_ALEN) == 0) {
+		if (compare_ether_addr(sdata->vif.addr,
+				       skb->data + ETH_ALEN) == 0) {
 			hdrlen = ieee80211_fill_mesh_addresses(&hdr, &fc,
 					skb->data, skb->data + ETH_ALEN);
 			meshhdrlen = ieee80211_new_mesh_header(&mesh_hdr,
@@ -1705,7 +1704,7 @@ netdev_tx_t ieee80211_subif_start_xmit(struct sk_buff *skb,
 				}
 			}
 			hdrlen = ieee80211_fill_mesh_addresses(&hdr, &fc,
-					mesh_da, dev->dev_addr);
+					mesh_da, sdata->vif.addr);
 			rcu_read_unlock();
 			if (is_mesh_mcast)
 				meshhdrlen =
@@ -1730,7 +1729,7 @@ netdev_tx_t ieee80211_subif_start_xmit(struct sk_buff *skb,
 		if (sdata->u.mgd.use_4addr && ethertype != ETH_P_PAE) {
 			fc |= cpu_to_le16(IEEE80211_FCTL_FROMDS | IEEE80211_FCTL_TODS);
 			/* RA TA DA SA */
-			memcpy(hdr.addr2, dev->dev_addr, ETH_ALEN);
+			memcpy(hdr.addr2, sdata->vif.addr, ETH_ALEN);
 			memcpy(hdr.addr3, skb->data, ETH_ALEN);
 			memcpy(hdr.addr4, skb->data + ETH_ALEN, ETH_ALEN);
 			hdrlen = 30;
@@ -1781,7 +1780,7 @@ netdev_tx_t ieee80211_subif_start_xmit(struct sk_buff *skb,
 		unlikely(!is_multicast_ether_addr(hdr.addr1) &&
 		      !(sta_flags & WLAN_STA_AUTHORIZED) &&
 		      !(ethertype == ETH_P_PAE &&
-		       compare_ether_addr(dev->dev_addr,
+		       compare_ether_addr(sdata->vif.addr,
 					  skb->data + ETH_ALEN) == 0))) {
 #ifdef CONFIG_MAC80211_VERBOSE_DEBUG
 		if (net_ratelimit())
@@ -2145,8 +2144,8 @@ struct sk_buff *ieee80211_beacon_get_tim(struct ieee80211_hw *hw,
 		mgmt->frame_control =
 		    cpu_to_le16(IEEE80211_FTYPE_MGMT | IEEE80211_STYPE_BEACON);
 		memset(mgmt->da, 0xff, ETH_ALEN);
-		memcpy(mgmt->sa, sdata->dev->dev_addr, ETH_ALEN);
-		memcpy(mgmt->bssid, sdata->dev->dev_addr, ETH_ALEN);
+		memcpy(mgmt->sa, sdata->vif.addr, ETH_ALEN);
+		memcpy(mgmt->bssid, sdata->vif.addr, ETH_ALEN);
 		mgmt->u.beacon.beacon_int =
 			cpu_to_le16(sdata->vif.bss_conf.beacon_int);
 		mgmt->u.beacon.capab_info = 0x0; /* 0x0 for MPs */
diff --git a/net/mac80211/util.c b/net/mac80211/util.c
index d09f78bb2442..d45760eae6c7 100644
--- a/net/mac80211/util.c
+++ b/net/mac80211/util.c
@@ -469,7 +469,7 @@ void ieee80211_iterate_active_interfaces(
 			break;
 		}
 		if (netif_running(sdata->dev))
-			iterator(data, sdata->dev->dev_addr,
+			iterator(data, sdata->vif.addr,
 				 &sdata->vif);
 	}
 
@@ -503,7 +503,7 @@ void ieee80211_iterate_active_interfaces_atomic(
 			break;
 		}
 		if (netif_running(sdata->dev))
-			iterator(data, sdata->dev->dev_addr,
+			iterator(data, sdata->vif.addr,
 				 &sdata->vif);
 	}
 
@@ -848,7 +848,7 @@ void ieee80211_send_auth(struct ieee80211_sub_if_data *sdata,
 			    sizeof(*mgmt) + 6 + extra_len);
 	if (!skb) {
 		printk(KERN_DEBUG "%s: failed to allocate buffer for auth "
-		       "frame\n", sdata->dev->name);
+		       "frame\n", sdata->name);
 		return;
 	}
 	skb_reserve(skb, local->hw.extra_tx_headroom);
@@ -858,7 +858,7 @@ void ieee80211_send_auth(struct ieee80211_sub_if_data *sdata,
 	mgmt->frame_control = cpu_to_le16(IEEE80211_FTYPE_MGMT |
 					  IEEE80211_STYPE_AUTH);
 	memcpy(mgmt->da, bssid, ETH_ALEN);
-	memcpy(mgmt->sa, sdata->dev->dev_addr, ETH_ALEN);
+	memcpy(mgmt->sa, sdata->vif.addr, ETH_ALEN);
 	memcpy(mgmt->bssid, bssid, ETH_ALEN);
 	mgmt->u.auth.auth_alg = cpu_to_le16(auth_alg);
 	mgmt->u.auth.auth_transaction = cpu_to_le16(transaction);
@@ -949,7 +949,7 @@ void ieee80211_send_probe_req(struct ieee80211_sub_if_data *sdata, u8 *dst,
 			    ie_len);
 	if (!skb) {
 		printk(KERN_DEBUG "%s: failed to allocate buffer for probe "
-		       "request\n", sdata->dev->name);
+		       "request\n", sdata->name);
 		return;
 	}
 	skb_reserve(skb, local->hw.extra_tx_headroom);
@@ -958,7 +958,7 @@ void ieee80211_send_probe_req(struct ieee80211_sub_if_data *sdata, u8 *dst,
 	memset(mgmt, 0, 24);
 	mgmt->frame_control = cpu_to_le16(IEEE80211_FTYPE_MGMT |
 					  IEEE80211_STYPE_PROBE_REQ);
-	memcpy(mgmt->sa, sdata->dev->dev_addr, ETH_ALEN);
+	memcpy(mgmt->sa, sdata->vif.addr, ETH_ALEN);
 	if (dst) {
 		memcpy(mgmt->da, dst, ETH_ALEN);
 		memcpy(mgmt->bssid, dst, ETH_ALEN);
@@ -1051,7 +1051,7 @@ int ieee80211_reconfig(struct ieee80211_local *local)
 		    netif_running(sdata->dev)) {
 			conf.vif = &sdata->vif;
 			conf.type = sdata->vif.type;
-			conf.mac_addr = sdata->dev->dev_addr;
+			conf.mac_addr = sdata->vif.addr;
 			res = drv_add_interface(local, &conf);
 		}
 	}
-- 
cgit v1.2.2


From 12375ef933fa8271396ed0c1e318cb1bd2e2689d Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes@sipsolutions.net>
Date: Wed, 25 Nov 2009 20:30:31 +0100
Subject: mac80211: trace interface name

It's not all that useful to have the vif/sdata pointer,
we'd rather refer to the interfaces by their name.

Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/agg-rx.c       |  6 ++----
 net/mac80211/agg-tx.c       |  7 +++----
 net/mac80211/driver-ops.h   | 29 +++++++++++++++--------------
 net/mac80211/driver-trace.h | 38 ++++++++++++++++++++------------------
 net/mac80211/key.c          |  4 ++--
 net/mac80211/main.c         |  3 +--
 net/mac80211/pm.c           |  2 +-
 net/mac80211/rx.c           |  2 +-
 net/mac80211/sta_info.c     |  6 +++---
 net/mac80211/util.c         |  2 +-
 10 files changed, 49 insertions(+), 50 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/agg-rx.c b/net/mac80211/agg-rx.c
index 37ecdeddd5ac..a978e666ed6f 100644
--- a/net/mac80211/agg-rx.c
+++ b/net/mac80211/agg-rx.c
@@ -41,8 +41,7 @@ void __ieee80211_stop_rx_ba_session(struct sta_info *sta, u16 tid,
 	       sta->sta.addr, tid);
 #endif /* CONFIG_MAC80211_HT_DEBUG */
 
-	if (drv_ampdu_action(local, &sta->sdata->vif,
-			     IEEE80211_AMPDU_RX_STOP,
+	if (drv_ampdu_action(local, sta->sdata, IEEE80211_AMPDU_RX_STOP,
 			     &sta->sta, tid, NULL))
 		printk(KERN_DEBUG "HW problem - can not stop rx "
 				"aggregation for tid %d\n", tid);
@@ -280,8 +279,7 @@ void ieee80211_process_addba_request(struct ieee80211_local *local,
 		goto end;
 	}
 
-	ret = drv_ampdu_action(local, &sta->sdata->vif,
-			       IEEE80211_AMPDU_RX_START,
+	ret = drv_ampdu_action(local, sta->sdata, IEEE80211_AMPDU_RX_START,
 			       &sta->sta, tid, &start_seq_num);
 #ifdef CONFIG_MAC80211_HT_DEBUG
 	printk(KERN_DEBUG "Rx A-MPDU request on tid %d result %d\n", tid, ret);
diff --git a/net/mac80211/agg-tx.c b/net/mac80211/agg-tx.c
index 209be60564ce..ceda36618d3c 100644
--- a/net/mac80211/agg-tx.c
+++ b/net/mac80211/agg-tx.c
@@ -144,7 +144,7 @@ int ___ieee80211_stop_tx_ba_session(struct sta_info *sta, u16 tid,
 	*state = HT_AGG_STATE_REQ_STOP_BA_MSK |
 		(initiator << HT_AGG_STATE_INITIATOR_SHIFT);
 
-	ret = drv_ampdu_action(local, &sta->sdata->vif,
+	ret = drv_ampdu_action(local, sta->sdata,
 			       IEEE80211_AMPDU_TX_STOP,
 			       &sta->sta, tid, NULL);
 
@@ -303,8 +303,7 @@ int ieee80211_start_tx_ba_session(struct ieee80211_sta *pubsta, u16 tid)
 
 	start_seq_num = sta->tid_seq[tid];
 
-	ret = drv_ampdu_action(local, &sdata->vif,
-			       IEEE80211_AMPDU_TX_START,
+	ret = drv_ampdu_action(local, sdata, IEEE80211_AMPDU_TX_START,
 			       pubsta, tid, &start_seq_num);
 
 	if (ret) {
@@ -420,7 +419,7 @@ static void ieee80211_agg_tx_operational(struct ieee80211_local *local,
 	ieee80211_agg_splice_finish(local, sta, tid);
 	spin_unlock(&local->ampdu_lock);
 
-	drv_ampdu_action(local, &sta->sdata->vif,
+	drv_ampdu_action(local, sta->sdata,
 			 IEEE80211_AMPDU_TX_OPERATIONAL,
 			 &sta->sta, tid, NULL);
 }
diff --git a/net/mac80211/driver-ops.h b/net/mac80211/driver-ops.h
index a4002657dac5..727e4cf7b8a6 100644
--- a/net/mac80211/driver-ops.h
+++ b/net/mac80211/driver-ops.h
@@ -39,7 +39,7 @@ static inline int drv_add_interface(struct ieee80211_local *local,
 				    struct ieee80211_if_init_conf *conf)
 {
 	int ret = local->ops->add_interface(&local->hw, conf);
-	trace_drv_add_interface(local, conf->vif, ret);
+	trace_drv_add_interface(local, vif_to_sdata(conf->vif), ret);
 	return ret;
 }
 
@@ -47,7 +47,7 @@ static inline void drv_remove_interface(struct ieee80211_local *local,
 					struct ieee80211_if_init_conf *conf)
 {
 	local->ops->remove_interface(&local->hw, conf);
-	trace_drv_remove_interface(local, conf->vif);
+	trace_drv_remove_interface(local, vif_to_sdata(conf->vif));
 }
 
 static inline int drv_config(struct ieee80211_local *local, u32 changed)
@@ -58,13 +58,13 @@ static inline int drv_config(struct ieee80211_local *local, u32 changed)
 }
 
 static inline void drv_bss_info_changed(struct ieee80211_local *local,
-					struct ieee80211_vif *vif,
+					struct ieee80211_sub_if_data *sdata,
 					struct ieee80211_bss_conf *info,
 					u32 changed)
 {
 	if (local->ops->bss_info_changed)
-		local->ops->bss_info_changed(&local->hw, vif, info, changed);
-	trace_drv_bss_info_changed(local, vif, info, changed);
+		local->ops->bss_info_changed(&local->hw, &sdata->vif, info, changed);
+	trace_drv_bss_info_changed(local, sdata, info, changed);
 }
 
 static inline u64 drv_prepare_multicast(struct ieee80211_local *local,
@@ -106,12 +106,13 @@ static inline int drv_set_tim(struct ieee80211_local *local,
 }
 
 static inline int drv_set_key(struct ieee80211_local *local,
-			      enum set_key_cmd cmd, struct ieee80211_vif *vif,
+			      enum set_key_cmd cmd,
+			      struct ieee80211_sub_if_data *sdata,
 			      struct ieee80211_sta *sta,
 			      struct ieee80211_key_conf *key)
 {
-	int ret = local->ops->set_key(&local->hw, cmd, vif, sta, key);
-	trace_drv_set_key(local, cmd, vif, sta, key, ret);
+	int ret = local->ops->set_key(&local->hw, cmd, &sdata->vif, sta, key);
+	trace_drv_set_key(local, cmd, sdata, sta, key, ret);
 	return ret;
 }
 
@@ -179,13 +180,13 @@ static inline int drv_set_rts_threshold(struct ieee80211_local *local,
 }
 
 static inline void drv_sta_notify(struct ieee80211_local *local,
-				  struct ieee80211_vif *vif,
+				  struct ieee80211_sub_if_data *sdata,
 				  enum sta_notify_cmd cmd,
 				  struct ieee80211_sta *sta)
 {
 	if (local->ops->sta_notify)
-		local->ops->sta_notify(&local->hw, vif, cmd, sta);
-	trace_drv_sta_notify(local, vif, cmd, sta);
+		local->ops->sta_notify(&local->hw, &sdata->vif, cmd, sta);
+	trace_drv_sta_notify(local, sdata, cmd, sta);
 }
 
 static inline int drv_conf_tx(struct ieee80211_local *local, u16 queue,
@@ -239,16 +240,16 @@ static inline int drv_tx_last_beacon(struct ieee80211_local *local)
 }
 
 static inline int drv_ampdu_action(struct ieee80211_local *local,
-				   struct ieee80211_vif *vif,
+				   struct ieee80211_sub_if_data *sdata,
 				   enum ieee80211_ampdu_mlme_action action,
 				   struct ieee80211_sta *sta, u16 tid,
 				   u16 *ssn)
 {
 	int ret = -EOPNOTSUPP;
 	if (local->ops->ampdu_action)
-		ret = local->ops->ampdu_action(&local->hw, vif, action,
+		ret = local->ops->ampdu_action(&local->hw, &sdata->vif, action,
 					       sta, tid, ssn);
-	trace_drv_ampdu_action(local, vif, action, sta, tid, ssn, ret);
+	trace_drv_ampdu_action(local, sdata, action, sta, tid, ssn, ret);
 	return ret;
 }
 
diff --git a/net/mac80211/driver-trace.h b/net/mac80211/driver-trace.h
index 7b155b947209..ee2d19a25ce1 100644
--- a/net/mac80211/driver-trace.h
+++ b/net/mac80211/driver-trace.h
@@ -25,10 +25,12 @@ static inline void trace_ ## name(proto) {}
 #define STA_PR_FMT	" sta:%pM"
 #define STA_PR_ARG	__entry->sta_addr
 
-#define VIF_ENTRY	__field(enum nl80211_iftype, vif_type) __field(void *, vif)
-#define VIF_ASSIGN	__entry->vif_type = vif ? vif->type : 0; __entry->vif = vif
-#define VIF_PR_FMT	" vif:%p(%d)"
-#define VIF_PR_ARG	__entry->vif, __entry->vif_type
+#define VIF_ENTRY	__field(enum nl80211_iftype, vif_type) __field(void *, sdata) \
+			__string(vif_name, sdata->dev ? sdata->dev->name : "<nodev>")
+#define VIF_ASSIGN	__entry->vif_type = sdata->vif.type; __entry->sdata = sdata; \
+			__assign_str(vif_name, sdata->dev ? sdata->dev->name : "<nodev>")
+#define VIF_PR_FMT	" vif:%s(%d)"
+#define VIF_PR_ARG	__get_str(vif_name), __entry->vif_type
 
 TRACE_EVENT(drv_start,
 	TP_PROTO(struct ieee80211_local *local, int ret),
@@ -70,10 +72,10 @@ TRACE_EVENT(drv_stop,
 
 TRACE_EVENT(drv_add_interface,
 	TP_PROTO(struct ieee80211_local *local,
-		 struct ieee80211_vif *vif,
+		 struct ieee80211_sub_if_data *sdata,
 		 int ret),
 
-	TP_ARGS(local, vif, ret),
+	TP_ARGS(local, sdata, ret),
 
 	TP_STRUCT__entry(
 		LOCAL_ENTRY
@@ -85,7 +87,7 @@ TRACE_EVENT(drv_add_interface,
 	TP_fast_assign(
 		LOCAL_ASSIGN;
 		VIF_ASSIGN;
-		memcpy(__entry->addr, vif->addr, 6);
+		memcpy(__entry->addr, sdata->vif.addr, 6);
 		__entry->ret = ret;
 	),
 
@@ -96,9 +98,9 @@ TRACE_EVENT(drv_add_interface,
 );
 
 TRACE_EVENT(drv_remove_interface,
-	TP_PROTO(struct ieee80211_local *local, struct ieee80211_vif *vif),
+	TP_PROTO(struct ieee80211_local *local, struct ieee80211_sub_if_data *sdata),
 
-	TP_ARGS(local, vif),
+	TP_ARGS(local, sdata),
 
 	TP_STRUCT__entry(
 		LOCAL_ENTRY
@@ -109,7 +111,7 @@ TRACE_EVENT(drv_remove_interface,
 	TP_fast_assign(
 		LOCAL_ASSIGN;
 		VIF_ASSIGN;
-		memcpy(__entry->addr, vif->addr, 6);
+		memcpy(__entry->addr, sdata->vif.addr, 6);
 	),
 
 	TP_printk(
@@ -163,11 +165,11 @@ TRACE_EVENT(drv_config,
 
 TRACE_EVENT(drv_bss_info_changed,
 	TP_PROTO(struct ieee80211_local *local,
-		 struct ieee80211_vif *vif,
+		 struct ieee80211_sub_if_data *sdata,
 		 struct ieee80211_bss_conf *info,
 		 u32 changed),
 
-	TP_ARGS(local, vif, info, changed),
+	TP_ARGS(local, sdata, info, changed),
 
 	TP_STRUCT__entry(
 		LOCAL_ENTRY
@@ -291,11 +293,11 @@ TRACE_EVENT(drv_set_tim,
 
 TRACE_EVENT(drv_set_key,
 	TP_PROTO(struct ieee80211_local *local,
-		 enum set_key_cmd cmd, struct ieee80211_vif *vif,
+		 enum set_key_cmd cmd, struct ieee80211_sub_if_data *sdata,
 		 struct ieee80211_sta *sta,
 		 struct ieee80211_key_conf *key, int ret),
 
-	TP_ARGS(local, cmd, vif, sta, key, ret),
+	TP_ARGS(local, cmd, sdata, sta, key, ret),
 
 	TP_STRUCT__entry(
 		LOCAL_ENTRY
@@ -489,11 +491,11 @@ TRACE_EVENT(drv_set_rts_threshold,
 
 TRACE_EVENT(drv_sta_notify,
 	TP_PROTO(struct ieee80211_local *local,
-		 struct ieee80211_vif *vif,
+		 struct ieee80211_sub_if_data *sdata,
 		 enum sta_notify_cmd cmd,
 		 struct ieee80211_sta *sta),
 
-	TP_ARGS(local, vif, cmd, sta),
+	TP_ARGS(local, sdata, cmd, sta),
 
 	TP_STRUCT__entry(
 		LOCAL_ENTRY
@@ -654,12 +656,12 @@ TRACE_EVENT(drv_tx_last_beacon,
 
 TRACE_EVENT(drv_ampdu_action,
 	TP_PROTO(struct ieee80211_local *local,
-		 struct ieee80211_vif *vif,
+		 struct ieee80211_sub_if_data *sdata,
 		 enum ieee80211_ampdu_mlme_action action,
 		 struct ieee80211_sta *sta, u16 tid,
 		 u16 *ssn, int ret),
 
-	TP_ARGS(local, vif, action, sta, tid, ssn, ret),
+	TP_ARGS(local, sdata, action, sta, tid, ssn, ret),
 
 	TP_STRUCT__entry(
 		LOCAL_ENTRY
diff --git a/net/mac80211/key.c b/net/mac80211/key.c
index ac1a777674f6..32ee6d0ee34d 100644
--- a/net/mac80211/key.c
+++ b/net/mac80211/key.c
@@ -139,7 +139,7 @@ static void ieee80211_key_enable_hw_accel(struct ieee80211_key *key)
 				     struct ieee80211_sub_if_data,
 				     u.ap);
 
-	ret = drv_set_key(key->local, SET_KEY, &sdata->vif, sta, &key->conf);
+	ret = drv_set_key(key->local, SET_KEY, sdata, sta, &key->conf);
 
 	if (!ret) {
 		spin_lock_bh(&todo_lock);
@@ -181,7 +181,7 @@ static void ieee80211_key_disable_hw_accel(struct ieee80211_key *key)
 				     struct ieee80211_sub_if_data,
 				     u.ap);
 
-	ret = drv_set_key(key->local, DISABLE_KEY, &sdata->vif,
+	ret = drv_set_key(key->local, DISABLE_KEY, sdata,
 			  sta, &key->conf);
 
 	if (ret)
diff --git a/net/mac80211/main.c b/net/mac80211/main.c
index dbf05b4dd003..98320a94c270 100644
--- a/net/mac80211/main.c
+++ b/net/mac80211/main.c
@@ -222,8 +222,7 @@ void ieee80211_bss_info_change_notify(struct ieee80211_sub_if_data *sdata,
 		}
 	}
 
-	drv_bss_info_changed(local, &sdata->vif,
-			     &sdata->vif.bss_conf, changed);
+	drv_bss_info_changed(local, sdata, &sdata->vif.bss_conf, changed);
 }
 
 u32 ieee80211_reset_erp_info(struct ieee80211_sub_if_data *sdata)
diff --git a/net/mac80211/pm.c b/net/mac80211/pm.c
index 05ccaadb6a0e..05e161c3cbc5 100644
--- a/net/mac80211/pm.c
+++ b/net/mac80211/pm.c
@@ -65,7 +65,7 @@ int __ieee80211_suspend(struct ieee80211_hw *hw)
 					     struct ieee80211_sub_if_data,
 					     u.ap);
 
-			drv_sta_notify(local, &sdata->vif, STA_NOTIFY_REMOVE,
+			drv_sta_notify(local, sdata, STA_NOTIFY_REMOVE,
 				       &sta->sta);
 		}
 
diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c
index d0136e3da487..ebab696def5b 100644
--- a/net/mac80211/rx.c
+++ b/net/mac80211/rx.c
@@ -1021,7 +1021,7 @@ static void ap_sta_ps_start(struct sta_info *sta)
 
 	atomic_inc(&sdata->bss->num_sta_ps);
 	set_sta_flags(sta, WLAN_STA_PS_STA);
-	drv_sta_notify(local, &sdata->vif, STA_NOTIFY_SLEEP, &sta->sta);
+	drv_sta_notify(local, sdata, STA_NOTIFY_SLEEP, &sta->sta);
 #ifdef CONFIG_MAC80211_VERBOSE_PS_DEBUG
 	printk(KERN_DEBUG "%s: STA %pM aid %d enters power save mode\n",
 	       sdata->name, sta->sta.addr, sta->sta.aid);
diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c
index 294f1b8b1545..d1a77e79d7a9 100644
--- a/net/mac80211/sta_info.c
+++ b/net/mac80211/sta_info.c
@@ -397,7 +397,7 @@ int sta_info_insert(struct sta_info *sta)
 					     struct ieee80211_sub_if_data,
 					     u.ap);
 
-		drv_sta_notify(local, &sdata->vif, STA_NOTIFY_ADD, &sta->sta);
+		drv_sta_notify(local, sdata, STA_NOTIFY_ADD, &sta->sta);
 		sdata = sta->sdata;
 	}
 
@@ -537,7 +537,7 @@ static void __sta_info_unlink(struct sta_info **sta)
 					     struct ieee80211_sub_if_data,
 					     u.ap);
 
-		drv_sta_notify(local, &sdata->vif, STA_NOTIFY_REMOVE,
+		drv_sta_notify(local, sdata, STA_NOTIFY_REMOVE,
 			       &(*sta)->sta);
 		sdata = (*sta)->sdata;
 	}
@@ -876,7 +876,7 @@ void ieee80211_sta_ps_deliver_wakeup(struct sta_info *sta)
 	struct ieee80211_local *local = sdata->local;
 	int sent, buffered;
 
-	drv_sta_notify(local, &sdata->vif, STA_NOTIFY_AWAKE, &sta->sta);
+	drv_sta_notify(local, sdata, STA_NOTIFY_AWAKE, &sta->sta);
 
 	if (!skb_queue_empty(&sta->ps_tx_buf))
 		sta_info_clear_tim_bit(sta);
diff --git a/net/mac80211/util.c b/net/mac80211/util.c
index d45760eae6c7..d54dbe8e09ba 100644
--- a/net/mac80211/util.c
+++ b/net/mac80211/util.c
@@ -1066,7 +1066,7 @@ int ieee80211_reconfig(struct ieee80211_local *local)
 					     struct ieee80211_sub_if_data,
 					     u.ap);
 
-			drv_sta_notify(local, &sdata->vif, STA_NOTIFY_ADD,
+			drv_sta_notify(local, sdata, STA_NOTIFY_ADD,
 				       &sta->sta);
 		}
 		spin_unlock_irqrestore(&local->sta_lock, flags);
-- 
cgit v1.2.2


From 59d9cb071d6209f2e8df2d16228cfdc7bab1f2d1 Mon Sep 17 00:00:00 2001
From: Kalle Valo <kalle.valo@nokia.com>
Date: Thu, 17 Dec 2009 13:54:57 +0100
Subject: mac80211: remove payload alignment warning

The payload alignment warning enabled by MAC80211_DEBUG_PACKET_ALIGNMENT is
difficult. To fix it, a firmware change is needed but in most cases that's
very difficult. So the benefit from the warning is low and most probably
it just creates more confusion for people who just enable all warnings
(like it did for me).

Remove the unaligned IP payload warning and the kconfig option. But
leave the unaligned packet warning, it will be enabled with
MAC80211_VERBOSE_DEBUG.

Signed-off-by: Kalle Valo <kalle.valo@nokia.com>
Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/Kconfig | 12 ------------
 net/mac80211/rx.c    | 27 +++++++--------------------
 2 files changed, 7 insertions(+), 32 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/Kconfig b/net/mac80211/Kconfig
index a10d508b07e1..a952b7f8c648 100644
--- a/net/mac80211/Kconfig
+++ b/net/mac80211/Kconfig
@@ -96,18 +96,6 @@ menuconfig MAC80211_DEBUG_MENU
 	---help---
 	  This option collects various mac80211 debug settings.
 
-config MAC80211_DEBUG_PACKET_ALIGNMENT
-	bool "Enable packet alignment debugging"
-	depends on MAC80211_DEBUG_MENU
-	---help---
-	  This option is recommended for driver authors and strongly
-	  discouraged for everybody else, it will trigger a warning
-	  when a driver hands mac80211 a buffer that is aligned in
-	  a way that will cause problems with the IP stack on some
-	  architectures.
-
-	  Say N unless you're writing a mac80211 based driver.
-
 config MAC80211_NOINLINE
 	bool "Do not inline TX/RX handlers"
 	depends on MAC80211_DEBUG_MENU
diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c
index ebab696def5b..dbfd684e3e2e 100644
--- a/net/mac80211/rx.c
+++ b/net/mac80211/rx.c
@@ -361,7 +361,9 @@ static void ieee80211_parse_qos(struct ieee80211_rx_data *rx)
  * boundary. In the case of regular frames, this simply means aligning the
  * payload to a four-byte boundary (because either the IP header is directly
  * contained, or IV/RFC1042 headers that have a length divisible by four are
- * in front of it).
+ * in front of it).  If the payload data is not properly aligned and the
+ * architecture doesn't support efficient unaligned operations, mac80211
+ * will align the data.
  *
  * With A-MSDU frames, however, the payload data address must yield two modulo
  * four because there are 14-byte 802.3 headers within the A-MSDU frames that
@@ -375,25 +377,10 @@ static void ieee80211_parse_qos(struct ieee80211_rx_data *rx)
  */
 static void ieee80211_verify_alignment(struct ieee80211_rx_data *rx)
 {
-	struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)rx->skb->data;
-	int hdrlen;
-
-#ifndef CONFIG_MAC80211_DEBUG_PACKET_ALIGNMENT
-	return;
+#ifdef CONFIG_MAC80211_VERBOSE_DEBUG
+	WARN_ONCE((unsigned long)rx->skb->data & 1,
+		  "unaligned packet at 0x%p\n", rx->skb->data);
 #endif
-
-	if (WARN_ONCE((unsigned long)rx->skb->data & 1,
-		      "unaligned packet at 0x%p\n", rx->skb->data))
-		return;
-
-	if (!ieee80211_is_data_present(hdr->frame_control))
-		return;
-
-	hdrlen = ieee80211_hdrlen(hdr->frame_control);
-	if (rx->flags & IEEE80211_RX_AMSDU)
-		hdrlen += ETH_HLEN;
-	WARN_ONCE(((unsigned long)(rx->skb->data + hdrlen)) & 3,
-		  "unaligned IP payload at 0x%p\n", rx->skb->data + hdrlen);
 }
 
 
@@ -1510,7 +1497,7 @@ ieee80211_deliver_skb(struct ieee80211_rx_data *rx)
 	if (skb) {
 		int align __maybe_unused;
 
-#if defined(CONFIG_MAC80211_DEBUG_PACKET_ALIGNMENT) || !defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS)
+#ifndef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS
 		/*
 		 * 'align' will only take the values 0 or 2 here
 		 * since all frames are required to be aligned
-- 
cgit v1.2.2


From 3b377ea9d4efc94dc52fe41b4dfdb463635ab298 Mon Sep 17 00:00:00 2001
From: "John W. Linville" <linville@tuxdriver.com>
Date: Fri, 18 Dec 2009 17:59:01 -0500
Subject: wireless: support internal statically compiled regulatory database

This patch provides infrastructure for machine translation of the
regulatory rules database used by CRDA into a C data structure.
It includes code for searching that database as an alternative
to dynamic regulatory rules updates via CRDA.  Most people should
use CRDA instead of this infrastructure, but it provides a better
alternative than the WIRELESS_OLD_REGULATORY infrastructure (which
can now be removed).

Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/wireless/.gitignore   |   1 +
 net/wireless/Kconfig      |  16 +++++++
 net/wireless/Makefile     |   6 +++
 net/wireless/db.txt       |  17 +++++++
 net/wireless/genregdb.awk | 118 +++++++++++++++++++++++++++++++++++++++++++++
 net/wireless/reg.c        | 120 ++++++++++++++++++++++++++++++++++++----------
 net/wireless/regdb.h      |   7 +++
 7 files changed, 261 insertions(+), 24 deletions(-)
 create mode 100644 net/wireless/.gitignore
 create mode 100644 net/wireless/db.txt
 create mode 100644 net/wireless/genregdb.awk
 create mode 100644 net/wireless/regdb.h

(limited to 'net')

diff --git a/net/wireless/.gitignore b/net/wireless/.gitignore
new file mode 100644
index 000000000000..c33451b896d9
--- /dev/null
+++ b/net/wireless/.gitignore
@@ -0,0 +1 @@
+regdb.c
diff --git a/net/wireless/Kconfig b/net/wireless/Kconfig
index 90e93a5701aa..8419971f07c5 100644
--- a/net/wireless/Kconfig
+++ b/net/wireless/Kconfig
@@ -109,6 +109,22 @@ config WIRELESS_OLD_REGULATORY
 
 	  Say N and if you say Y, please tell us why. The default is N.
 
+config CFG80211_INTERNAL_REGDB
+	bool "use statically compiled regulatory rules database" if EMBEDDED
+	default n
+	depends on CFG80211
+	---help---
+	  This option generates an internal data structure representing
+	  the wireless regulatory rules described in net/wireless/db.txt
+	  and includes code to query that database.  This is an alternative
+	  to using CRDA for defining regulatory rules for the kernel.
+
+	  For details see:
+
+	  http://wireless.kernel.org/en/developers/Regulatory
+
+	  Most distributions have a CRDA package.  So if unsure, say N.
+
 config CFG80211_WEXT
 	bool "cfg80211 wireless extensions compatibility"
 	depends on CFG80211
diff --git a/net/wireless/Makefile b/net/wireless/Makefile
index f07c8dc7aab2..e77e508126fa 100644
--- a/net/wireless/Makefile
+++ b/net/wireless/Makefile
@@ -13,5 +13,11 @@ cfg80211-y += core.o sysfs.o radiotap.o util.o reg.o scan.o nl80211.o
 cfg80211-y += mlme.o ibss.o sme.o chan.o ethtool.o
 cfg80211-$(CONFIG_CFG80211_DEBUGFS) += debugfs.o
 cfg80211-$(CONFIG_CFG80211_WEXT) += wext-compat.o wext-sme.o
+cfg80211-$(CONFIG_CFG80211_INTERNAL_REGDB) += regdb.o
 
 ccflags-y += -D__CHECK_ENDIAN__
+
+$(obj)/regdb.c: $(src)/db.txt $(src)/genregdb.awk
+	@$(AWK) -f $(srctree)/$(src)/genregdb.awk < $< > $@
+
+clean-files := regdb.c
diff --git a/net/wireless/db.txt b/net/wireless/db.txt
new file mode 100644
index 000000000000..a2fc3a09ccdc
--- /dev/null
+++ b/net/wireless/db.txt
@@ -0,0 +1,17 @@
+#
+# This file is a placeholder to prevent accidental build breakage if someone
+# enables CONFIG_CFG80211_INTERNAL_REGDB.  Almost no one actually needs to
+# enable that build option.
+#
+# You should be using CRDA instead.  It is even better if you use the CRDA
+# package provided by your distribution, since they will probably keep it
+# up-to-date on your behalf.
+#
+# If you _really_ intend to use CONFIG_CFG80211_INTERNAL_REGDB then you will
+# need to replace this file with one containing appropriately formatted
+# regulatory rules that cover the regulatory domains you will be using.  Your
+# best option is to extract the db.txt file from the wireless-regdb git
+# repository:
+#
+#   git://git.kernel.org/pub/scm/linux/kernel/git/linville/wireless-regdb.git
+#
diff --git a/net/wireless/genregdb.awk b/net/wireless/genregdb.awk
new file mode 100644
index 000000000000..8316cf075ce9
--- /dev/null
+++ b/net/wireless/genregdb.awk
@@ -0,0 +1,118 @@
+#!/usr/bin/awk -f
+#
+# genregdb.awk -- generate regdb.c from db.txt
+#
+# Actually, it reads from stdin (presumed to be db.txt) and writes
+# to stdout (presumed to be regdb.c), but close enough...
+#
+# Copyright 2009 John W. Linville <linville@tuxdriver.com>
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License version 2 as
+# published by the Free Software Foundation.
+#
+
+BEGIN {
+	active = 0
+	rules = 0;
+	print "/*"
+	print " * DO NOT EDIT -- file generated from data in db.txt"
+	print " */"
+	print ""
+	print "#include <linux/nl80211.h>"
+	print "#include <net/cfg80211.h>"
+	print ""
+	regdb = "const struct ieee80211_regdomain *reg_regdb[] = {\n"
+}
+
+/^[ \t]*#/ {
+	/* Ignore */
+}
+
+!active && /^[ \t]*$/ {
+	/* Ignore */
+}
+
+!active && /country/ {
+	country=$2
+	sub(/:/, "", country)
+	printf "static const struct ieee80211_regdomain regdom_%s = {\n", country
+	printf "\t.alpha2 = \"%s\",\n", country
+	printf "\t.reg_rules = {\n"
+	active = 1
+	regdb = regdb "\t&regdom_" country ",\n"
+}
+
+active && /^[ \t]*\(/ {
+	start = $1
+	sub(/\(/, "", start)
+	end = $3
+	bw = $5
+	sub(/\),/, "", bw)
+	gain = $6
+	sub(/\(/, "", gain)
+	sub(/,/, "", gain)
+	power = $7
+	sub(/\)/, "", power)
+	sub(/,/, "", power)
+	# power might be in mW...
+	units = $8
+	sub(/\)/, "", units)
+	sub(/,/, "", units)
+	if (units == "mW") {
+		if (power == 100) {
+			power = 20
+		} else if (power == 200) {
+			power = 23
+		} else if (power == 500) {
+			power = 27
+		} else if (power == 1000) {
+			power = 30
+		} else {
+			print "Unknown power value in database!"
+		}
+	}
+	flagstr = ""
+	for (i=8; i<=NF; i++)
+		flagstr = flagstr $i
+	split(flagstr, flagarray, ",")
+	flags = ""
+	for (arg in flagarray) {
+		if (flagarray[arg] == "NO-OFDM") {
+			flags = flags "\n\t\t\tNL80211_RRF_NO_OFDM | "
+		} else if (flagarray[arg] == "NO-CCK") {
+			flags = flags "\n\t\t\tNL80211_RRF_NO_CCK | "
+		} else if (flagarray[arg] == "NO-INDOOR") {
+			flags = flags "\n\t\t\tNL80211_RRF_NO_INDOOR | "
+		} else if (flagarray[arg] == "NO-OUTDOOR") {
+			flags = flags "\n\t\t\tNL80211_RRF_NO_OUTDOOR | "
+		} else if (flagarray[arg] == "DFS") {
+			flags = flags "\n\t\t\tNL80211_RRF_DFS | "
+		} else if (flagarray[arg] == "PTP-ONLY") {
+			flags = flags "\n\t\t\tNL80211_RRF_PTP_ONLY | "
+		} else if (flagarray[arg] == "PTMP-ONLY") {
+			flags = flags "\n\t\t\tNL80211_RRF_PTMP_ONLY | "
+		} else if (flagarray[arg] == "PASSIVE-SCAN") {
+			flags = flags "\n\t\t\tNL80211_RRF_PASSIVE_SCAN | "
+		} else if (flagarray[arg] == "NO-IBSS") {
+			flags = flags "\n\t\t\tNL80211_RRF_NO_IBSS | "
+		}
+	}
+	flags = flags "0"
+	printf "\t\tREG_RULE(%d, %d, %d, %d, %d, %s),\n", start, end, bw, gain, power, flags
+	rules++
+}
+
+active && /^[ \t]*$/ {
+	active = 0
+	printf "\t},\n"
+	printf "\t.n_reg_rules = %d\n", rules
+	printf "};\n\n"
+	rules = 0;
+}
+
+END {
+	print regdb "};"
+	print ""
+	print "int reg_regdb_size = ARRAY_SIZE(reg_regdb);"
+}
diff --git a/net/wireless/reg.c b/net/wireless/reg.c
index c01470e7de15..65f86264f7bb 100644
--- a/net/wireless/reg.c
+++ b/net/wireless/reg.c
@@ -40,6 +40,7 @@
 #include <net/cfg80211.h>
 #include "core.h"
 #include "reg.h"
+#include "regdb.h"
 #include "nl80211.h"
 
 /* Receipt of information from last regulatory request */
@@ -360,6 +361,98 @@ static bool country_ie_integrity_changes(u32 checksum)
 	return false;
 }
 
+static int reg_copy_regd(const struct ieee80211_regdomain **dst_regd,
+			 const struct ieee80211_regdomain *src_regd)
+{
+	struct ieee80211_regdomain *regd;
+	int size_of_regd = 0;
+	unsigned int i;
+
+	size_of_regd = sizeof(struct ieee80211_regdomain) +
+	  ((src_regd->n_reg_rules + 1) * sizeof(struct ieee80211_reg_rule));
+
+	regd = kzalloc(size_of_regd, GFP_KERNEL);
+	if (!regd)
+		return -ENOMEM;
+
+	memcpy(regd, src_regd, sizeof(struct ieee80211_regdomain));
+
+	for (i = 0; i < src_regd->n_reg_rules; i++)
+		memcpy(&regd->reg_rules[i], &src_regd->reg_rules[i],
+			sizeof(struct ieee80211_reg_rule));
+
+	*dst_regd = regd;
+	return 0;
+}
+
+#ifdef CONFIG_CFG80211_INTERNAL_REGDB
+struct reg_regdb_search_request {
+	char alpha2[2];
+	struct list_head list;
+};
+
+static LIST_HEAD(reg_regdb_search_list);
+static DEFINE_SPINLOCK(reg_regdb_search_lock);
+
+static void reg_regdb_search(struct work_struct *work)
+{
+	struct reg_regdb_search_request *request;
+	const struct ieee80211_regdomain *curdom, *regdom;
+	int i, r;
+
+	spin_lock(&reg_regdb_search_lock);
+	while (!list_empty(&reg_regdb_search_list)) {
+		request = list_first_entry(&reg_regdb_search_list,
+					   struct reg_regdb_search_request,
+					   list);
+		list_del(&request->list);
+
+		for (i=0; i<reg_regdb_size; i++) {
+			curdom = reg_regdb[i];
+
+			if (!memcmp(request->alpha2, curdom->alpha2, 2)) {
+				r = reg_copy_regd(&regdom, curdom);
+				if (r)
+					break;
+				spin_unlock(&reg_regdb_search_lock);
+				mutex_lock(&cfg80211_mutex);
+				set_regdom(regdom);
+				mutex_unlock(&cfg80211_mutex);
+				spin_lock(&reg_regdb_search_lock);
+				break;
+			}
+		}
+
+		kfree(request);
+	}
+	spin_unlock(&reg_regdb_search_lock);
+}
+
+static DECLARE_WORK(reg_regdb_work, reg_regdb_search);
+
+static void reg_regdb_query(const char *alpha2)
+{
+	struct reg_regdb_search_request *request;
+
+	if (!alpha2)
+		return;
+
+	request = kzalloc(sizeof(struct reg_regdb_search_request), GFP_KERNEL);
+	if (!request)
+		return;
+
+	memcpy(request->alpha2, alpha2, 2);
+
+	spin_lock(&reg_regdb_search_lock);
+	list_add_tail(&request->list, &reg_regdb_search_list);
+	spin_unlock(&reg_regdb_search_lock);
+
+	schedule_work(&reg_regdb_work);
+}
+#else
+static inline void reg_regdb_query(const char *alpha2) {}
+#endif /* CONFIG_CFG80211_INTERNAL_REGDB */
+
 /*
  * This lets us keep regulatory code which is updated on a regulatory
  * basis in userspace.
@@ -379,6 +472,9 @@ static int call_crda(const char *alpha2)
 		printk(KERN_INFO "cfg80211: Calling CRDA to update world "
 			"regulatory domain\n");
 
+	/* query internal regulatory database (if it exists) */
+	reg_regdb_query(alpha2);
+
 	country_env[8] = alpha2[0];
 	country_env[9] = alpha2[1];
 
@@ -1367,30 +1463,6 @@ void wiphy_apply_custom_regulatory(struct wiphy *wiphy,
 }
 EXPORT_SYMBOL(wiphy_apply_custom_regulatory);
 
-static int reg_copy_regd(const struct ieee80211_regdomain **dst_regd,
-			 const struct ieee80211_regdomain *src_regd)
-{
-	struct ieee80211_regdomain *regd;
-	int size_of_regd = 0;
-	unsigned int i;
-
-	size_of_regd = sizeof(struct ieee80211_regdomain) +
-	  ((src_regd->n_reg_rules + 1) * sizeof(struct ieee80211_reg_rule));
-
-	regd = kzalloc(size_of_regd, GFP_KERNEL);
-	if (!regd)
-		return -ENOMEM;
-
-	memcpy(regd, src_regd, sizeof(struct ieee80211_regdomain));
-
-	for (i = 0; i < src_regd->n_reg_rules; i++)
-		memcpy(&regd->reg_rules[i], &src_regd->reg_rules[i],
-			sizeof(struct ieee80211_reg_rule));
-
-	*dst_regd = regd;
-	return 0;
-}
-
 /*
  * Return value which can be used by ignore_request() to indicate
  * it has been determined we should intersect two regulatory domains
diff --git a/net/wireless/regdb.h b/net/wireless/regdb.h
new file mode 100644
index 000000000000..818222c92513
--- /dev/null
+++ b/net/wireless/regdb.h
@@ -0,0 +1,7 @@
+#ifndef __REGDB_H__
+#define __REGDB_H__
+
+extern const struct ieee80211_regdomain *reg_regdb[];
+extern int reg_regdb_size;
+
+#endif /* __REGDB_H__ */
-- 
cgit v1.2.2


From ca99861d5421c91f5a8fd3a77acb4b7be14f119d Mon Sep 17 00:00:00 2001
From: gregor kowski <gregor.kowski@gmail.com>
Date: Wed, 9 Dec 2009 23:25:05 +0100
Subject: mac80211 : fix a race with update_tkip_key

The mac80211 tkip code won't call update_tkip_key, if rx packets
are received without KEY_FLAG_UPLOADED_TO_HARDWARE. This can happen on
first packet because the hardware key stuff is called asynchronously with
todo workqueue.

This patch workaround that by tracking if we sent the key to the driver.

Signed-off-by: Gregor Kowski <gregor.kowski@gmail.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/key.h  |  8 +++++++-
 net/mac80211/tkip.c | 38 ++++++++++++++++++++------------------
 2 files changed, 27 insertions(+), 19 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/key.h b/net/mac80211/key.h
index a49f93b79e92..bdc2968c2bbe 100644
--- a/net/mac80211/key.h
+++ b/net/mac80211/key.h
@@ -59,11 +59,17 @@ enum ieee80211_internal_key_flags {
 	KEY_FLAG_TODO_DEFMGMTKEY	= BIT(6),
 };
 
+enum ieee80211_internal_tkip_state {
+	TKIP_STATE_NOT_INIT,
+	TKIP_STATE_PHASE1_DONE,
+	TKIP_STATE_PHASE1_HW_UPLOADED,
+};
+
 struct tkip_ctx {
 	u32 iv32;
 	u16 iv16;
 	u16 p1k[5];
-	int initialized;
+	enum ieee80211_internal_tkip_state state;
 };
 
 struct ieee80211_key {
diff --git a/net/mac80211/tkip.c b/net/mac80211/tkip.c
index 4921d724b6c7..b73454a507f9 100644
--- a/net/mac80211/tkip.c
+++ b/net/mac80211/tkip.c
@@ -100,7 +100,7 @@ static void tkip_mixing_phase1(const u8 *tk, struct tkip_ctx *ctx,
 		p1k[3] += tkipS(p1k[2] ^ get_unaligned_le16(tk + 12 + j));
 		p1k[4] += tkipS(p1k[3] ^ get_unaligned_le16(tk + 0 + j)) + i;
 	}
-	ctx->initialized = 1;
+	ctx->state = TKIP_STATE_PHASE1_DONE;
 }
 
 static void tkip_mixing_phase2(const u8 *tk, struct tkip_ctx *ctx,
@@ -183,7 +183,7 @@ void ieee80211_get_tkip_key(struct ieee80211_key_conf *keyconf,
 	/* Update the p1k only when the iv16 in the packet wraps around, this
 	 * might occur after the wrap around of iv16 in the key in case of
 	 * fragmented packets. */
-	if (iv16 == 0 || !ctx->initialized)
+	if (iv16 == 0 || ctx->state == TKIP_STATE_NOT_INIT)
 		tkip_mixing_phase1(tk, ctx, hdr->addr2, iv32);
 
 	if (type == IEEE80211_TKIP_P1_KEY) {
@@ -209,7 +209,7 @@ void ieee80211_tkip_encrypt_data(struct crypto_blkcipher *tfm,
 	const u8 *tk = &key->conf.key[NL80211_TKIP_DATA_OFFSET_ENCR_KEY];
 
 	/* Calculate per-packet key */
-	if (ctx->iv16 == 0 || !ctx->initialized)
+	if (ctx->iv16 == 0 || ctx->state == TKIP_STATE_NOT_INIT)
 		tkip_mixing_phase1(tk, ctx, ta, ctx->iv32);
 
 	tkip_mixing_phase2(tk, ctx, ctx->iv16, rc4key);
@@ -259,7 +259,7 @@ int ieee80211_tkip_decrypt_data(struct crypto_blkcipher *tfm,
 	if ((keyid >> 6) != key->conf.keyidx)
 		return TKIP_DECRYPT_INVALID_KEYIDX;
 
-	if (key->u.tkip.rx[queue].initialized &&
+	if (key->u.tkip.rx[queue].state != TKIP_STATE_NOT_INIT &&
 	    (iv32 < key->u.tkip.rx[queue].iv32 ||
 	     (iv32 == key->u.tkip.rx[queue].iv32 &&
 	      iv16 <= key->u.tkip.rx[queue].iv16))) {
@@ -275,11 +275,11 @@ int ieee80211_tkip_decrypt_data(struct crypto_blkcipher *tfm,
 
 	if (only_iv) {
 		res = TKIP_DECRYPT_OK;
-		key->u.tkip.rx[queue].initialized = 1;
+		key->u.tkip.rx[queue].state = TKIP_STATE_PHASE1_HW_UPLOADED;
 		goto done;
 	}
 
-	if (!key->u.tkip.rx[queue].initialized ||
+	if (key->u.tkip.rx[queue].state == TKIP_STATE_NOT_INIT ||
 	    key->u.tkip.rx[queue].iv32 != iv32) {
 		/* IV16 wrapped around - perform TKIP phase 1 */
 		tkip_mixing_phase1(tk, &key->u.tkip.rx[queue], ta, iv32);
@@ -299,18 +299,20 @@ int ieee80211_tkip_decrypt_data(struct crypto_blkcipher *tfm,
 			printk("\n");
 		}
 #endif
-		if (key->local->ops->update_tkip_key &&
-			key->flags & KEY_FLAG_UPLOADED_TO_HARDWARE) {
-			static const u8 bcast[ETH_ALEN] =
-				{0xff, 0xff, 0xff, 0xff, 0xff, 0xff};
-			const u8 *sta_addr = key->sta->sta.addr;
-
-			if (is_multicast_ether_addr(ra))
-				sta_addr = bcast;
-
-			drv_update_tkip_key(key->local, &key->conf, sta_addr,
-					    iv32, key->u.tkip.rx[queue].p1k);
-		}
+	}
+	if (key->local->ops->update_tkip_key &&
+	    key->flags & KEY_FLAG_UPLOADED_TO_HARDWARE &&
+	    key->u.tkip.rx[queue].state != TKIP_STATE_PHASE1_HW_UPLOADED) {
+		static const u8 bcast[ETH_ALEN] =
+		{0xff, 0xff, 0xff, 0xff, 0xff, 0xff};
+		const u8 *sta_addr = key->sta->sta.addr;
+
+		if (is_multicast_ether_addr(ra))
+			sta_addr = bcast;
+
+		drv_update_tkip_key(key->local, &key->conf, sta_addr,
+				iv32, key->u.tkip.rx[queue].p1k);
+		key->u.tkip.rx[queue].state = TKIP_STATE_PHASE1_HW_UPLOADED;
 	}
 
 	tkip_mixing_phase2(tk, &key->u.tkip.rx[queue], iv16, rc4key);
-- 
cgit v1.2.2


From eaf85ca7fecb218fc41ff57c1642ca73b097aabb Mon Sep 17 00:00:00 2001
From: Zhu Yi <yi.zhu@intel.com>
Date: Tue, 1 Dec 2009 10:18:37 +0800
Subject: wireless: add ieee80211_amsdu_to_8023s

Move the A-MSDU handling code from mac80211 to cfg80211 so that more
drivers can use it. The new created function ieee80211_amsdu_to_8023s
converts an A-MSDU frame to a list of 802.3 frames.

Cc: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: Zhu Yi <yi.zhu@intel.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/rx.c   | 106 ++++++++++------------------------------------------
 net/wireless/util.c |  99 +++++++++++++++++++++++++++++++++++++++++++++++-
 2 files changed, 117 insertions(+), 88 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c
index dbfd684e3e2e..a182e423109b 100644
--- a/net/mac80211/rx.c
+++ b/net/mac80211/rx.c
@@ -1541,16 +1541,10 @@ static ieee80211_rx_result debug_noinline
 ieee80211_rx_h_amsdu(struct ieee80211_rx_data *rx)
 {
 	struct net_device *dev = rx->sdata->dev;
-	struct ieee80211_local *local = rx->local;
-	u16 ethertype;
-	u8 *payload;
-	struct sk_buff *skb = rx->skb, *frame = NULL;
+	struct sk_buff *skb = rx->skb;
 	struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)skb->data;
 	__le16 fc = hdr->frame_control;
-	const struct ethhdr *eth;
-	int remaining, err;
-	u8 dst[ETH_ALEN];
-	u8 src[ETH_ALEN];
+	struct sk_buff_head frame_list;
 
 	if (unlikely(!ieee80211_is_data(fc)))
 		return RX_CONTINUE;
@@ -1561,94 +1555,34 @@ ieee80211_rx_h_amsdu(struct ieee80211_rx_data *rx)
 	if (!(rx->flags & IEEE80211_RX_AMSDU))
 		return RX_CONTINUE;
 
-	err = __ieee80211_data_to_8023(rx);
-	if (unlikely(err))
+	if (ieee80211_has_a4(hdr->frame_control) &&
+	    rx->sdata->vif.type == NL80211_IFTYPE_AP_VLAN &&
+	    !rx->sdata->u.vlan.sta)
 		return RX_DROP_UNUSABLE;
 
-	skb->dev = dev;
-
-	dev->stats.rx_packets++;
-	dev->stats.rx_bytes += skb->len;
-
-	/* skip the wrapping header */
-	eth = (struct ethhdr *) skb_pull(skb, sizeof(struct ethhdr));
-	if (!eth)
+	if (is_multicast_ether_addr(hdr->addr1) &&
+	    ((rx->sdata->vif.type == NL80211_IFTYPE_AP_VLAN &&
+	      rx->sdata->u.vlan.sta) ||
+	     (rx->sdata->vif.type == NL80211_IFTYPE_STATION &&
+	      rx->sdata->u.mgd.use_4addr)))
 		return RX_DROP_UNUSABLE;
 
-	while (skb != frame) {
-		u8 padding;
-		__be16 len = eth->h_proto;
-		unsigned int subframe_len = sizeof(struct ethhdr) + ntohs(len);
-
-		remaining = skb->len;
-		memcpy(dst, eth->h_dest, ETH_ALEN);
-		memcpy(src, eth->h_source, ETH_ALEN);
+	skb->dev = dev;
+	__skb_queue_head_init(&frame_list);
 
-		padding = ((4 - subframe_len) & 0x3);
-		/* the last MSDU has no padding */
-		if (subframe_len > remaining)
-			return RX_DROP_UNUSABLE;
+	ieee80211_amsdu_to_8023s(skb, &frame_list, dev->dev_addr,
+				 rx->sdata->vif.type,
+				 rx->local->hw.extra_tx_headroom);
 
-		skb_pull(skb, sizeof(struct ethhdr));
-		/* if last subframe reuse skb */
-		if (remaining <= subframe_len + padding)
-			frame = skb;
-		else {
-			/*
-			 * Allocate and reserve two bytes more for payload
-			 * alignment since sizeof(struct ethhdr) is 14.
-			 */
-			frame = dev_alloc_skb(
-				ALIGN(local->hw.extra_tx_headroom, 4) +
-				subframe_len + 2);
-
-			if (frame == NULL)
-				return RX_DROP_UNUSABLE;
-
-			skb_reserve(frame,
-				    ALIGN(local->hw.extra_tx_headroom, 4) +
-				    sizeof(struct ethhdr) + 2);
-			memcpy(skb_put(frame, ntohs(len)), skb->data,
-				ntohs(len));
-
-			eth = (struct ethhdr *) skb_pull(skb, ntohs(len) +
-							padding);
-			if (!eth) {
-				dev_kfree_skb(frame);
-				return RX_DROP_UNUSABLE;
-			}
-		}
-
-		skb_reset_network_header(frame);
-		frame->dev = dev;
-		frame->priority = skb->priority;
-		rx->skb = frame;
-
-		payload = frame->data;
-		ethertype = (payload[6] << 8) | payload[7];
-
-		if (likely((compare_ether_addr(payload, rfc1042_header) == 0 &&
-			    ethertype != ETH_P_AARP && ethertype != ETH_P_IPX) ||
-			   compare_ether_addr(payload,
-					      bridge_tunnel_header) == 0)) {
-			/* remove RFC1042 or Bridge-Tunnel
-			 * encapsulation and replace EtherType */
-			skb_pull(frame, 6);
-			memcpy(skb_push(frame, ETH_ALEN), src, ETH_ALEN);
-			memcpy(skb_push(frame, ETH_ALEN), dst, ETH_ALEN);
-		} else {
-			memcpy(skb_push(frame, sizeof(__be16)),
-			       &len, sizeof(__be16));
-			memcpy(skb_push(frame, ETH_ALEN), src, ETH_ALEN);
-			memcpy(skb_push(frame, ETH_ALEN), dst, ETH_ALEN);
-		}
+	while (!skb_queue_empty(&frame_list)) {
+		rx->skb = __skb_dequeue(&frame_list);
 
 		if (!ieee80211_frame_allowed(rx, fc)) {
-			if (skb == frame) /* last frame */
-				return RX_DROP_UNUSABLE;
-			dev_kfree_skb(frame);
+			dev_kfree_skb(rx->skb);
 			continue;
 		}
+		dev->stats.rx_packets++;
+		dev->stats.rx_bytes += rx->skb->len;
 
 		ieee80211_deliver_skb(rx);
 	}
diff --git a/net/wireless/util.c b/net/wireless/util.c
index a3c841a255db..23557c1d0a9c 100644
--- a/net/wireless/util.c
+++ b/net/wireless/util.c
@@ -285,7 +285,7 @@ static int ieee80211_get_mesh_hdrlen(struct ieee80211s_hdr *meshhdr)
 	}
 }
 
-int ieee80211_data_to_8023(struct sk_buff *skb, u8 *addr,
+int ieee80211_data_to_8023(struct sk_buff *skb, const u8 *addr,
 			   enum nl80211_iftype iftype)
 {
 	struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data;
@@ -383,7 +383,7 @@ int ieee80211_data_to_8023(struct sk_buff *skb, u8 *addr,
 }
 EXPORT_SYMBOL(ieee80211_data_to_8023);
 
-int ieee80211_data_from_8023(struct sk_buff *skb, u8 *addr,
+int ieee80211_data_from_8023(struct sk_buff *skb, const u8 *addr,
 			     enum nl80211_iftype iftype, u8 *bssid, bool qos)
 {
 	struct ieee80211_hdr hdr;
@@ -497,6 +497,101 @@ int ieee80211_data_from_8023(struct sk_buff *skb, u8 *addr,
 }
 EXPORT_SYMBOL(ieee80211_data_from_8023);
 
+
+void ieee80211_amsdu_to_8023s(struct sk_buff *skb, struct sk_buff_head *list,
+			      const u8 *addr, enum nl80211_iftype iftype,
+			      const unsigned int extra_headroom)
+{
+	struct sk_buff *frame = NULL;
+	u16 ethertype;
+	u8 *payload;
+	const struct ethhdr *eth;
+	int remaining, err;
+	u8 dst[ETH_ALEN], src[ETH_ALEN];
+
+	err = ieee80211_data_to_8023(skb, addr, iftype);
+	if (err)
+		goto out;
+
+	/* skip the wrapping header */
+	eth = (struct ethhdr *) skb_pull(skb, sizeof(struct ethhdr));
+	if (!eth)
+		goto out;
+
+	while (skb != frame) {
+		u8 padding;
+		__be16 len = eth->h_proto;
+		unsigned int subframe_len = sizeof(struct ethhdr) + ntohs(len);
+
+		remaining = skb->len;
+		memcpy(dst, eth->h_dest, ETH_ALEN);
+		memcpy(src, eth->h_source, ETH_ALEN);
+
+		padding = (4 - subframe_len) & 0x3;
+		/* the last MSDU has no padding */
+		if (subframe_len > remaining)
+			goto purge;
+
+		skb_pull(skb, sizeof(struct ethhdr));
+		/* reuse skb for the last subframe */
+		if (remaining <= subframe_len + padding)
+			frame = skb;
+		else {
+			unsigned int hlen = ALIGN(extra_headroom, 4);
+			/*
+			 * Allocate and reserve two bytes more for payload
+			 * alignment since sizeof(struct ethhdr) is 14.
+			 */
+			frame = dev_alloc_skb(hlen + subframe_len + 2);
+			if (!frame)
+				goto purge;
+
+			skb_reserve(frame, hlen + sizeof(struct ethhdr) + 2);
+			memcpy(skb_put(frame, ntohs(len)), skb->data,
+				ntohs(len));
+
+			eth = (struct ethhdr *)skb_pull(skb, ntohs(len) +
+							padding);
+			if (!eth) {
+				dev_kfree_skb(frame);
+				goto purge;
+			}
+		}
+
+		skb_reset_network_header(frame);
+		frame->dev = skb->dev;
+		frame->priority = skb->priority;
+
+		payload = frame->data;
+		ethertype = (payload[6] << 8) | payload[7];
+
+		if (likely((compare_ether_addr(payload, rfc1042_header) == 0 &&
+			    ethertype != ETH_P_AARP && ethertype != ETH_P_IPX) ||
+			   compare_ether_addr(payload,
+					      bridge_tunnel_header) == 0)) {
+			/* remove RFC1042 or Bridge-Tunnel
+			 * encapsulation and replace EtherType */
+			skb_pull(frame, 6);
+			memcpy(skb_push(frame, ETH_ALEN), src, ETH_ALEN);
+			memcpy(skb_push(frame, ETH_ALEN), dst, ETH_ALEN);
+		} else {
+			memcpy(skb_push(frame, sizeof(__be16)), &len,
+				sizeof(__be16));
+			memcpy(skb_push(frame, ETH_ALEN), src, ETH_ALEN);
+			memcpy(skb_push(frame, ETH_ALEN), dst, ETH_ALEN);
+		}
+		__skb_queue_tail(list, frame);
+	}
+
+	return;
+
+ purge:
+	__skb_queue_purge(list);
+ out:
+	dev_kfree_skb(skb);
+}
+EXPORT_SYMBOL(ieee80211_amsdu_to_8023s);
+
 /* Given a data frame determine the 802.1p/1d tag to use. */
 unsigned int cfg80211_classify8021d(struct sk_buff *skb)
 {
-- 
cgit v1.2.2


From 0f78231bffb868a30e8533aace142213266bb811 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes@sipsolutions.net>
Date: Tue, 1 Dec 2009 13:37:02 +0100
Subject: mac80211: enable spatial multiplexing powersave

Enable spatial multiplexing in mac80211 by telling the
driver what to do and, where necessary, sending action
frames to the AP to update the requested SMPS mode.

Also includes a trivial implementation for hwsim that
just logs the requested mode.

For now, the userspace interface is in debugfs only,
and let you toggle the requested mode at any time.

Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/cfg.c            |  49 +++++++++++++++++++
 net/mac80211/debugfs_netdev.c | 111 ++++++++++++++++++++++++++++++++++++++++--
 net/mac80211/driver-trace.h   |   2 +
 net/mac80211/ht.c             |  47 ++++++++++++++++++
 net/mac80211/ieee80211_i.h    |  14 ++++++
 net/mac80211/main.c           |  24 +++++++++
 net/mac80211/mlme.c           |  63 ++++++++++++++++++++++--
 net/mac80211/status.c         |  38 +++++++++++++++
 net/mac80211/util.c           |  74 ++++++++++++++++++++++++++++
 9 files changed, 415 insertions(+), 7 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c
index fcfa1bf776a7..8c35418d1c96 100644
--- a/net/mac80211/cfg.c
+++ b/net/mac80211/cfg.c
@@ -1318,6 +1318,50 @@ static int ieee80211_testmode_cmd(struct wiphy *wiphy, void *data, int len)
 }
 #endif
 
+int __ieee80211_request_smps(struct ieee80211_sub_if_data *sdata,
+			     enum ieee80211_smps_mode smps_mode)
+{
+	const u8 *ap;
+	enum ieee80211_smps_mode old_req;
+	int err;
+
+	old_req = sdata->u.mgd.req_smps;
+	sdata->u.mgd.req_smps = smps_mode;
+
+	if (old_req == smps_mode &&
+	    smps_mode != IEEE80211_SMPS_AUTOMATIC)
+		return 0;
+
+	/*
+	 * If not associated, or current association is not an HT
+	 * association, there's no need to send an action frame.
+	 */
+	if (!sdata->u.mgd.associated ||
+	    sdata->local->oper_channel_type == NL80211_CHAN_NO_HT) {
+		mutex_lock(&sdata->local->iflist_mtx);
+		ieee80211_recalc_smps(sdata->local, sdata);
+		mutex_unlock(&sdata->local->iflist_mtx);
+		return 0;
+	}
+
+	ap = sdata->u.mgd.associated->cbss.bssid;
+
+	if (smps_mode == IEEE80211_SMPS_AUTOMATIC) {
+		if (sdata->u.mgd.powersave)
+			smps_mode = IEEE80211_SMPS_DYNAMIC;
+		else
+			smps_mode = IEEE80211_SMPS_OFF;
+	}
+
+	/* send SM PS frame to AP */
+	err = ieee80211_send_smps_action(sdata, smps_mode,
+					 ap, ap);
+	if (err)
+		sdata->u.mgd.req_smps = old_req;
+
+	return err;
+}
+
 static int ieee80211_set_power_mgmt(struct wiphy *wiphy, struct net_device *dev,
 				    bool enabled, int timeout)
 {
@@ -1335,6 +1379,11 @@ static int ieee80211_set_power_mgmt(struct wiphy *wiphy, struct net_device *dev,
 	sdata->u.mgd.powersave = enabled;
 	conf->dynamic_ps_timeout = timeout;
 
+	/* no change, but if automatic follow powersave */
+	mutex_lock(&sdata->u.mgd.mtx);
+	__ieee80211_request_smps(sdata, sdata->u.mgd.req_smps);
+	mutex_unlock(&sdata->u.mgd.mtx);
+
 	if (local->hw.flags & IEEE80211_HW_SUPPORTS_DYNAMIC_PS)
 		ieee80211_hw_config(local, IEEE80211_CONF_CHANGE_PS);
 
diff --git a/net/mac80211/debugfs_netdev.c b/net/mac80211/debugfs_netdev.c
index 5d9c797635a9..355983503885 100644
--- a/net/mac80211/debugfs_netdev.c
+++ b/net/mac80211/debugfs_netdev.c
@@ -41,6 +41,30 @@ static ssize_t ieee80211_if_read(
 	return ret;
 }
 
+static ssize_t ieee80211_if_write(
+	struct ieee80211_sub_if_data *sdata,
+	const char __user *userbuf,
+	size_t count, loff_t *ppos,
+	ssize_t (*write)(struct ieee80211_sub_if_data *, const char *, int))
+{
+	u8 *buf;
+	ssize_t ret = -ENODEV;
+
+	buf = kzalloc(count, GFP_KERNEL);
+	if (!buf)
+		return -ENOMEM;
+
+	if (copy_from_user(buf, userbuf, count))
+		return -EFAULT;
+
+	rtnl_lock();
+	if (sdata->dev->reg_state == NETREG_REGISTERED)
+		ret = (*write)(sdata, buf, count);
+	rtnl_unlock();
+
+	return ret;
+}
+
 #define IEEE80211_IF_FMT(name, field, format_string)			\
 static ssize_t ieee80211_if_fmt_##name(					\
 	const struct ieee80211_sub_if_data *sdata, char *buf,		\
@@ -71,7 +95,7 @@ static ssize_t ieee80211_if_fmt_##name(					\
 	return scnprintf(buf, buflen, "%pM\n", sdata->field);		\
 }
 
-#define __IEEE80211_IF_FILE(name)					\
+#define __IEEE80211_IF_FILE(name, _write)				\
 static ssize_t ieee80211_if_read_##name(struct file *file,		\
 					char __user *userbuf,		\
 					size_t count, loff_t *ppos)	\
@@ -82,12 +106,24 @@ static ssize_t ieee80211_if_read_##name(struct file *file,		\
 }									\
 static const struct file_operations name##_ops = {			\
 	.read = ieee80211_if_read_##name,				\
+	.write = (_write),						\
 	.open = mac80211_open_file_generic,				\
 }
 
+#define __IEEE80211_IF_FILE_W(name)					\
+static ssize_t ieee80211_if_write_##name(struct file *file,		\
+					 const char __user *userbuf,	\
+					 size_t count, loff_t *ppos)	\
+{									\
+	return ieee80211_if_write(file->private_data, userbuf, count,	\
+				  ppos, ieee80211_if_parse_##name);	\
+}									\
+__IEEE80211_IF_FILE(name, ieee80211_if_write_##name)
+
+
 #define IEEE80211_IF_FILE(name, field, format)				\
 		IEEE80211_IF_FMT_##format(name, field)			\
-		__IEEE80211_IF_FILE(name)
+		__IEEE80211_IF_FILE(name, NULL)
 
 /* common attributes */
 IEEE80211_IF_FILE(drop_unencrypted, drop_unencrypted, DEC);
@@ -99,6 +135,70 @@ IEEE80211_IF_FILE(bssid, u.mgd.bssid, MAC);
 IEEE80211_IF_FILE(aid, u.mgd.aid, DEC);
 IEEE80211_IF_FILE(capab, u.mgd.capab, HEX);
 
+static int ieee80211_set_smps(struct ieee80211_sub_if_data *sdata,
+			      enum ieee80211_smps_mode smps_mode)
+{
+	struct ieee80211_local *local = sdata->local;
+	int err;
+
+	if (!(local->hw.flags & IEEE80211_HW_SUPPORTS_STATIC_SMPS) &&
+	    smps_mode == IEEE80211_SMPS_STATIC)
+		return -EINVAL;
+
+	/* auto should be dynamic if in PS mode */
+	if (!(local->hw.flags & IEEE80211_HW_SUPPORTS_DYNAMIC_SMPS) &&
+	    (smps_mode == IEEE80211_SMPS_DYNAMIC ||
+	     smps_mode == IEEE80211_SMPS_AUTOMATIC))
+		return -EINVAL;
+
+	/* supported only on managed interfaces for now */
+	if (sdata->vif.type != NL80211_IFTYPE_STATION)
+		return -EOPNOTSUPP;
+
+	mutex_lock(&local->iflist_mtx);
+	err = __ieee80211_request_smps(sdata, smps_mode);
+	mutex_unlock(&local->iflist_mtx);
+
+	return err;
+}
+
+static const char *smps_modes[IEEE80211_SMPS_NUM_MODES] = {
+	[IEEE80211_SMPS_AUTOMATIC] = "auto",
+	[IEEE80211_SMPS_OFF] = "off",
+	[IEEE80211_SMPS_STATIC] = "static",
+	[IEEE80211_SMPS_DYNAMIC] = "dynamic",
+};
+
+static ssize_t ieee80211_if_fmt_smps(const struct ieee80211_sub_if_data *sdata,
+				     char *buf, int buflen)
+{
+	if (sdata->vif.type != NL80211_IFTYPE_STATION)
+		return -EOPNOTSUPP;
+
+	return snprintf(buf, buflen, "request: %s\nused: %s\n",
+			smps_modes[sdata->u.mgd.req_smps],
+			smps_modes[sdata->u.mgd.ap_smps]);
+}
+
+static ssize_t ieee80211_if_parse_smps(struct ieee80211_sub_if_data *sdata,
+				       const char *buf, int buflen)
+{
+	enum ieee80211_smps_mode mode;
+
+	for (mode = 0; mode < IEEE80211_SMPS_NUM_MODES; mode++) {
+		if (strncmp(buf, smps_modes[mode], buflen) == 0) {
+			int err = ieee80211_set_smps(sdata, mode);
+			if (!err)
+				return buflen;
+			return err;
+		}
+	}
+
+	return -EINVAL;
+}
+
+__IEEE80211_IF_FILE_W(smps);
+
 /* AP attributes */
 IEEE80211_IF_FILE(num_sta_ps, u.ap.num_sta_ps, ATOMIC);
 IEEE80211_IF_FILE(dtim_count, u.ap.dtim_count, DEC);
@@ -109,7 +209,7 @@ static ssize_t ieee80211_if_fmt_num_buffered_multicast(
 	return scnprintf(buf, buflen, "%u\n",
 			 skb_queue_len(&sdata->u.ap.ps_bc_buf));
 }
-__IEEE80211_IF_FILE(num_buffered_multicast);
+__IEEE80211_IF_FILE(num_buffered_multicast, NULL);
 
 /* WDS attributes */
 IEEE80211_IF_FILE(peer, u.wds.remote_addr, MAC);
@@ -158,6 +258,10 @@ IEEE80211_IF_FILE(dot11MeshHWMPRootMode,
 	debugfs_create_file(#name, 0400, sdata->debugfs.dir, \
 			    sdata, &name##_ops);
 
+#define DEBUGFS_ADD_MODE(name, mode) \
+	debugfs_create_file(#name, mode, sdata->debugfs.dir, \
+			    sdata, &name##_ops);
+
 static void add_sta_files(struct ieee80211_sub_if_data *sdata)
 {
 	DEBUGFS_ADD(drop_unencrypted, sta);
@@ -167,6 +271,7 @@ static void add_sta_files(struct ieee80211_sub_if_data *sdata)
 	DEBUGFS_ADD(bssid, sta);
 	DEBUGFS_ADD(aid, sta);
 	DEBUGFS_ADD(capab, sta);
+	DEBUGFS_ADD_MODE(smps, 0600);
 }
 
 static void add_ap_files(struct ieee80211_sub_if_data *sdata)
diff --git a/net/mac80211/driver-trace.h b/net/mac80211/driver-trace.h
index ee2d19a25ce1..7a849b920165 100644
--- a/net/mac80211/driver-trace.h
+++ b/net/mac80211/driver-trace.h
@@ -140,6 +140,7 @@ TRACE_EVENT(drv_config,
 		__field(u8, short_frame_max_tx_count)
 		__field(int, center_freq)
 		__field(int, channel_type)
+		__field(int, smps)
 	),
 
 	TP_fast_assign(
@@ -155,6 +156,7 @@ TRACE_EVENT(drv_config,
 		__entry->short_frame_max_tx_count = local->hw.conf.short_frame_max_tx_count;
 		__entry->center_freq = local->hw.conf.channel->center_freq;
 		__entry->channel_type = local->hw.conf.channel_type;
+		__entry->smps = local->hw.conf.smps_mode;
 	),
 
 	TP_printk(
diff --git a/net/mac80211/ht.c b/net/mac80211/ht.c
index 45ebd062a2fb..63b8f86b7f16 100644
--- a/net/mac80211/ht.c
+++ b/net/mac80211/ht.c
@@ -166,3 +166,50 @@ void ieee80211_process_delba(struct ieee80211_sub_if_data *sdata,
 		spin_unlock_bh(&sta->lock);
 	}
 }
+
+int ieee80211_send_smps_action(struct ieee80211_sub_if_data *sdata,
+			       enum ieee80211_smps_mode smps, const u8 *da,
+			       const u8 *bssid)
+{
+	struct ieee80211_local *local = sdata->local;
+	struct sk_buff *skb;
+	struct ieee80211_mgmt *action_frame;
+
+	/* 27 = header + category + action + smps mode */
+	skb = dev_alloc_skb(27 + local->hw.extra_tx_headroom);
+	if (!skb)
+		return -ENOMEM;
+
+	skb_reserve(skb, local->hw.extra_tx_headroom);
+	action_frame = (void *)skb_put(skb, 27);
+	memcpy(action_frame->da, da, ETH_ALEN);
+	memcpy(action_frame->sa, sdata->dev->dev_addr, ETH_ALEN);
+	memcpy(action_frame->bssid, bssid, ETH_ALEN);
+	action_frame->frame_control = cpu_to_le16(IEEE80211_FTYPE_MGMT |
+						  IEEE80211_STYPE_ACTION);
+	action_frame->u.action.category = WLAN_CATEGORY_HT;
+	action_frame->u.action.u.ht_smps.action = WLAN_HT_ACTION_SMPS;
+	switch (smps) {
+	case IEEE80211_SMPS_AUTOMATIC:
+	case IEEE80211_SMPS_NUM_MODES:
+		WARN_ON(1);
+	case IEEE80211_SMPS_OFF:
+		action_frame->u.action.u.ht_smps.smps_control =
+				WLAN_HT_SMPS_CONTROL_DISABLED;
+		break;
+	case IEEE80211_SMPS_STATIC:
+		action_frame->u.action.u.ht_smps.smps_control =
+				WLAN_HT_SMPS_CONTROL_STATIC;
+		break;
+	case IEEE80211_SMPS_DYNAMIC:
+		action_frame->u.action.u.ht_smps.smps_control =
+				WLAN_HT_SMPS_CONTROL_DYNAMIC;
+		break;
+	}
+
+	/* we'll do more on status of this frame */
+	IEEE80211_SKB_CB(skb)->flags |= IEEE80211_TX_CTL_REQ_TX_STATUS;
+	ieee80211_tx_skb(sdata, skb);
+
+	return 0;
+}
diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index 178e329f9257..e63aecbddfbe 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -297,6 +297,8 @@ struct ieee80211_if_managed {
 
 	unsigned long timers_running; /* used for quiesce/restart */
 	bool powersave; /* powersave requested for this iface */
+	enum ieee80211_smps_mode req_smps, /* requested smps mode */
+				 ap_smps; /* smps mode AP thinks we're in */
 
 	unsigned long request;
 
@@ -587,6 +589,9 @@ struct ieee80211_local {
 	/* used for uploading changed mc list */
 	struct work_struct reconfig_filter;
 
+	/* used to reconfigure hardware SM PS */
+	struct work_struct recalc_smps;
+
 	/* aggregated multicast list */
 	struct dev_addr_list *mc_list;
 	int mc_count;
@@ -760,6 +765,8 @@ struct ieee80211_local {
 	int user_power_level; /* in dBm */
 	int power_constr_level; /* in dBm */
 
+	enum ieee80211_smps_mode smps_mode;
+
 	struct work_struct restart_work;
 
 #ifdef CONFIG_MAC80211_DEBUGFS
@@ -978,6 +985,9 @@ void ieee80211_send_bar(struct ieee80211_sub_if_data *sdata, u8 *ra, u16 tid, u1
 void ieee80211_send_delba(struct ieee80211_sub_if_data *sdata,
 			  const u8 *da, u16 tid,
 			  u16 initiator, u16 reason_code);
+int ieee80211_send_smps_action(struct ieee80211_sub_if_data *sdata,
+			       enum ieee80211_smps_mode smps, const u8 *da,
+			       const u8 *bssid);
 
 void ieee80211_sta_stop_rx_ba_session(struct ieee80211_sub_if_data *sdata, u8 *da,
 				u16 tid, u16 initiator, u16 reason);
@@ -1088,6 +1098,10 @@ void ieee80211_sta_def_wmm_params(struct ieee80211_sub_if_data *sdata,
 u32 ieee80211_sta_get_rates(struct ieee80211_local *local,
 			    struct ieee802_11_elems *elems,
 			    enum ieee80211_band band);
+int __ieee80211_request_smps(struct ieee80211_sub_if_data *sdata,
+			     enum ieee80211_smps_mode smps_mode);
+void ieee80211_recalc_smps(struct ieee80211_local *local,
+			   struct ieee80211_sub_if_data *forsdata);
 
 #ifdef CONFIG_MAC80211_NOINLINE
 #define debug_noinline noinline
diff --git a/net/mac80211/main.c b/net/mac80211/main.c
index 98320a94c270..e1293e8ed83a 100644
--- a/net/mac80211/main.c
+++ b/net/mac80211/main.c
@@ -113,6 +113,18 @@ int ieee80211_hw_config(struct ieee80211_local *local, u32 changed)
 		changed |= IEEE80211_CONF_CHANGE_CHANNEL;
 	}
 
+	if (!conf_is_ht(&local->hw.conf)) {
+		/*
+		 * mac80211.h documents that this is only valid
+		 * when the channel is set to an HT type, and
+		 * that otherwise STATIC is used.
+		 */
+		local->hw.conf.smps_mode = IEEE80211_SMPS_STATIC;
+	} else if (local->hw.conf.smps_mode != local->smps_mode) {
+		local->hw.conf.smps_mode = local->smps_mode;
+		changed |= IEEE80211_CONF_CHANGE_SMPS;
+	}
+
 	if (scan_chan)
 		power = chan->max_power;
 	else
@@ -297,6 +309,16 @@ void ieee80211_restart_hw(struct ieee80211_hw *hw)
 }
 EXPORT_SYMBOL(ieee80211_restart_hw);
 
+static void ieee80211_recalc_smps_work(struct work_struct *work)
+{
+	struct ieee80211_local *local =
+		container_of(work, struct ieee80211_local, recalc_smps);
+
+	mutex_lock(&local->iflist_mtx);
+	ieee80211_recalc_smps(local, NULL);
+	mutex_unlock(&local->iflist_mtx);
+}
+
 struct ieee80211_hw *ieee80211_alloc_hw(size_t priv_data_len,
 					const struct ieee80211_ops *ops)
 {
@@ -370,6 +392,8 @@ struct ieee80211_hw *ieee80211_alloc_hw(size_t priv_data_len,
 	INIT_WORK(&local->restart_work, ieee80211_restart_work);
 
 	INIT_WORK(&local->reconfig_filter, ieee80211_reconfig_filter);
+	INIT_WORK(&local->recalc_smps, ieee80211_recalc_smps_work);
+	local->smps_mode = IEEE80211_SMPS_OFF;
 
 	INIT_WORK(&local->dynamic_ps_enable_work,
 		  ieee80211_dynamic_ps_enable_work);
diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index cd5dcc3d8c2b..0a762a9ba4df 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -398,6 +398,8 @@ static void ieee80211_send_assoc(struct ieee80211_sub_if_data *sdata,
 		__le16 tmp;
 		u32 flags = local->hw.conf.channel->flags;
 
+		/* determine capability flags */
+
 		switch (ht_info->ht_param & IEEE80211_HT_PARAM_CHA_SEC_OFFSET) {
 		case IEEE80211_HT_PARAM_CHA_SEC_ABOVE:
 			if (flags & IEEE80211_CHAN_NO_HT40PLUS) {
@@ -413,17 +415,64 @@ static void ieee80211_send_assoc(struct ieee80211_sub_if_data *sdata,
 			break;
 		}
 
-		tmp = cpu_to_le16(cap);
-		pos = skb_put(skb, sizeof(struct ieee80211_ht_cap)+2);
+		/* set SM PS mode properly */
+		cap &= ~IEEE80211_HT_CAP_SM_PS;
+		/* new association always uses requested smps mode */
+		if (ifmgd->req_smps == IEEE80211_SMPS_AUTOMATIC) {
+			if (ifmgd->powersave)
+				ifmgd->ap_smps = IEEE80211_SMPS_DYNAMIC;
+			else
+				ifmgd->ap_smps = IEEE80211_SMPS_OFF;
+		} else
+			ifmgd->ap_smps = ifmgd->req_smps;
+
+		switch (ifmgd->ap_smps) {
+		case IEEE80211_SMPS_AUTOMATIC:
+		case IEEE80211_SMPS_NUM_MODES:
+			WARN_ON(1);
+		case IEEE80211_SMPS_OFF:
+			cap |= WLAN_HT_CAP_SM_PS_DISABLED <<
+				IEEE80211_HT_CAP_SM_PS_SHIFT;
+			break;
+		case IEEE80211_SMPS_STATIC:
+			cap |= WLAN_HT_CAP_SM_PS_STATIC <<
+				IEEE80211_HT_CAP_SM_PS_SHIFT;
+			break;
+		case IEEE80211_SMPS_DYNAMIC:
+			cap |= WLAN_HT_CAP_SM_PS_DYNAMIC <<
+				IEEE80211_HT_CAP_SM_PS_SHIFT;
+			break;
+		}
+
+		/* reserve and fill IE */
+
+		pos = skb_put(skb, sizeof(struct ieee80211_ht_cap) + 2);
 		*pos++ = WLAN_EID_HT_CAPABILITY;
 		*pos++ = sizeof(struct ieee80211_ht_cap);
 		memset(pos, 0, sizeof(struct ieee80211_ht_cap));
+
+		/* capability flags */
+		tmp = cpu_to_le16(cap);
 		memcpy(pos, &tmp, sizeof(u16));
 		pos += sizeof(u16);
-		/* TODO: needs a define here for << 2 */
+
+		/* AMPDU parameters */
 		*pos++ = sband->ht_cap.ampdu_factor |
-			 (sband->ht_cap.ampdu_density << 2);
+			 (sband->ht_cap.ampdu_density <<
+				IEEE80211_HT_AMPDU_PARM_DENSITY_SHIFT);
+
+		/* MCS set */
 		memcpy(pos, &sband->ht_cap.mcs, sizeof(sband->ht_cap.mcs));
+		pos += sizeof(sband->ht_cap.mcs);
+
+		/* extended capabilities */
+		pos += sizeof(__le16);
+
+		/* BF capabilities */
+		pos += sizeof(__le32);
+
+		/* antenna selection */
+		pos += sizeof(u8);
 	}
 
 	IEEE80211_SKB_CB(skb)->flags |= IEEE80211_TX_INTFL_DONT_ENCRYPT;
@@ -932,6 +981,7 @@ static void ieee80211_set_associated(struct ieee80211_sub_if_data *sdata,
 
 	mutex_lock(&local->iflist_mtx);
 	ieee80211_recalc_ps(local, -1);
+	ieee80211_recalc_smps(local, sdata);
 	mutex_unlock(&local->iflist_mtx);
 
 	netif_start_queue(sdata->dev);
@@ -2327,6 +2377,11 @@ void ieee80211_sta_setup_sdata(struct ieee80211_sub_if_data *sdata)
 		ifmgd->flags |= IEEE80211_STA_WMM_ENABLED;
 
 	mutex_init(&ifmgd->mtx);
+
+	if (sdata->local->hw.flags & IEEE80211_HW_SUPPORTS_DYNAMIC_SMPS)
+		ifmgd->req_smps = IEEE80211_SMPS_AUTOMATIC;
+	else
+		ifmgd->req_smps = IEEE80211_SMPS_OFF;
 }
 
 /* scan finished notification */
diff --git a/net/mac80211/status.c b/net/mac80211/status.c
index b4608f11a40f..0c0850d37dda 100644
--- a/net/mac80211/status.c
+++ b/net/mac80211/status.c
@@ -134,6 +134,40 @@ static void ieee80211_handle_filtered_frame(struct ieee80211_local *local,
 	dev_kfree_skb(skb);
 }
 
+static void ieee80211_frame_acked(struct sta_info *sta, struct sk_buff *skb)
+{
+	struct ieee80211_mgmt *mgmt = (void *) skb->data;
+	struct ieee80211_local *local = sta->local;
+	struct ieee80211_sub_if_data *sdata = sta->sdata;
+
+	if (ieee80211_is_action(mgmt->frame_control) &&
+	    sdata->vif.type == NL80211_IFTYPE_STATION &&
+	    mgmt->u.action.category == WLAN_CATEGORY_HT &&
+	    mgmt->u.action.u.ht_smps.action == WLAN_HT_ACTION_SMPS) {
+		/*
+		 * This update looks racy, but isn't -- if we come
+		 * here we've definitely got a station that we're
+		 * talking to, and on a managed interface that can
+		 * only be the AP. And the only other place updating
+		 * this variable is before we're associated.
+		 */
+		switch (mgmt->u.action.u.ht_smps.smps_control) {
+		case WLAN_HT_SMPS_CONTROL_DYNAMIC:
+			sta->sdata->u.mgd.ap_smps = IEEE80211_SMPS_DYNAMIC;
+			break;
+		case WLAN_HT_SMPS_CONTROL_STATIC:
+			sta->sdata->u.mgd.ap_smps = IEEE80211_SMPS_STATIC;
+			break;
+		case WLAN_HT_SMPS_CONTROL_DISABLED:
+		default: /* shouldn't happen since we don't send that */
+			sta->sdata->u.mgd.ap_smps = IEEE80211_SMPS_OFF;
+			break;
+		}
+
+		ieee80211_queue_work(&local->hw, &local->recalc_smps);
+	}
+}
+
 void ieee80211_tx_status(struct ieee80211_hw *hw, struct sk_buff *skb)
 {
 	struct sk_buff *skb2;
@@ -210,6 +244,10 @@ void ieee80211_tx_status(struct ieee80211_hw *hw, struct sk_buff *skb)
 		rate_control_tx_status(local, sband, sta, skb);
 		if (ieee80211_vif_is_mesh(&sta->sdata->vif))
 			ieee80211s_update_metric(local, sta, skb);
+
+		if (!(info->flags & IEEE80211_TX_CTL_INJECTED) &&
+		    (info->flags & IEEE80211_TX_STAT_ACK))
+			ieee80211_frame_acked(sta, skb);
 	}
 
 	rcu_read_unlock();
diff --git a/net/mac80211/util.c b/net/mac80211/util.c
index d54dbe8e09ba..086ef6257b4b 100644
--- a/net/mac80211/util.c
+++ b/net/mac80211/util.c
@@ -1170,3 +1170,77 @@ int ieee80211_reconfig(struct ieee80211_local *local)
 	return 0;
 }
 
+static int check_mgd_smps(struct ieee80211_if_managed *ifmgd,
+			  enum ieee80211_smps_mode *smps_mode)
+{
+	if (ifmgd->associated) {
+		*smps_mode = ifmgd->ap_smps;
+
+		if (*smps_mode == IEEE80211_SMPS_AUTOMATIC) {
+			if (ifmgd->powersave)
+				*smps_mode = IEEE80211_SMPS_DYNAMIC;
+			else
+				*smps_mode = IEEE80211_SMPS_OFF;
+		}
+
+		return 1;
+	}
+
+	return 0;
+}
+
+/* must hold iflist_mtx */
+void ieee80211_recalc_smps(struct ieee80211_local *local,
+			   struct ieee80211_sub_if_data *forsdata)
+{
+	struct ieee80211_sub_if_data *sdata;
+	enum ieee80211_smps_mode smps_mode = IEEE80211_SMPS_OFF;
+	int count = 0;
+
+	if (forsdata)
+		WARN_ON(!mutex_is_locked(&forsdata->u.mgd.mtx));
+
+	WARN_ON(!mutex_is_locked(&local->iflist_mtx));
+
+	/*
+	 * This function could be improved to handle multiple
+	 * interfaces better, but right now it makes any
+	 * non-station interfaces force SM PS to be turned
+	 * off. If there are multiple station interfaces it
+	 * could also use the best possible mode, e.g. if
+	 * one is in static and the other in dynamic then
+	 * dynamic is ok.
+	 */
+
+	list_for_each_entry(sdata, &local->interfaces, list) {
+		if (!netif_running(sdata->dev))
+			continue;
+		if (sdata->vif.type != NL80211_IFTYPE_STATION)
+			goto set;
+		if (sdata != forsdata) {
+			/*
+			 * This nested is ok -- we are holding the iflist_mtx
+			 * so can't get here twice or so. But it's required
+			 * since normally we acquire it first and then the
+			 * iflist_mtx.
+			 */
+			mutex_lock_nested(&sdata->u.mgd.mtx, SINGLE_DEPTH_NESTING);
+			count += check_mgd_smps(&sdata->u.mgd, &smps_mode);
+			mutex_unlock(&sdata->u.mgd.mtx);
+		} else
+			count += check_mgd_smps(&sdata->u.mgd, &smps_mode);
+
+		if (count > 1) {
+			smps_mode = IEEE80211_SMPS_OFF;
+			break;
+		}
+	}
+
+	if (smps_mode == local->smps_mode)
+		return;
+
+ set:
+	local->smps_mode = smps_mode;
+	/* changed flag is auto-detected for this */
+	ieee80211_hw_config(local, 0);
+}
-- 
cgit v1.2.2


From f38fd12fa7b7b98e158a9b31d388da34eef25c22 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes@sipsolutions.net>
Date: Tue, 1 Dec 2009 18:29:42 +0100
Subject: mac80211: allow disabling 40MHz on 2.4GHz

In some situations it is required that a system be
configured with no support for 40 MHz channels in
the 2.4 GHz band. Rather than imposing any such
restrictions on everybody, allow configuration a
system like that with a module parameter. It is
writable at runtime but only takes effect at the
time of the next association.

Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/ieee80211_i.h |  2 ++
 net/mac80211/main.c        |  6 ++++++
 net/mac80211/mlme.c        |  6 ++++++
 net/mac80211/util.c        | 14 +++++++++++---
 4 files changed, 25 insertions(+), 3 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index e63aecbddfbe..6fb3f7181536 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -881,6 +881,8 @@ void ieee80211_bss_info_change_notify(struct ieee80211_sub_if_data *sdata,
 void ieee80211_configure_filter(struct ieee80211_local *local);
 u32 ieee80211_reset_erp_info(struct ieee80211_sub_if_data *sdata);
 
+extern bool ieee80211_disable_40mhz_24ghz;
+
 /* STA code */
 void ieee80211_sta_setup_sdata(struct ieee80211_sub_if_data *sdata);
 int ieee80211_mgd_auth(struct ieee80211_sub_if_data *sdata,
diff --git a/net/mac80211/main.c b/net/mac80211/main.c
index e1293e8ed83a..25f52098b636 100644
--- a/net/mac80211/main.c
+++ b/net/mac80211/main.c
@@ -33,6 +33,12 @@
 #include "cfg.h"
 #include "debugfs.h"
 
+
+bool ieee80211_disable_40mhz_24ghz;
+module_param(ieee80211_disable_40mhz_24ghz, bool, 0644);
+MODULE_PARM_DESC(ieee80211_disable_40mhz_24ghz,
+		 "Disable 40MHz support in the 2.4GHz band");
+
 void ieee80211_configure_filter(struct ieee80211_local *local)
 {
 	u64 mc;
diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index 0a762a9ba4df..a7472c979c63 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -400,6 +400,12 @@ static void ieee80211_send_assoc(struct ieee80211_sub_if_data *sdata,
 
 		/* determine capability flags */
 
+		if (ieee80211_disable_40mhz_24ghz &&
+		    sband->band == IEEE80211_BAND_2GHZ) {
+			cap &= ~IEEE80211_HT_CAP_SUP_WIDTH_20_40;
+			cap &= ~IEEE80211_HT_CAP_SGI_40;
+		}
+
 		switch (ht_info->ht_param & IEEE80211_HT_PARAM_CHA_SEC_OFFSET) {
 		case IEEE80211_HT_PARAM_CHA_SEC_ABOVE:
 			if (flags & IEEE80211_CHAN_NO_HT40PLUS) {
diff --git a/net/mac80211/util.c b/net/mac80211/util.c
index 086ef6257b4b..acb6626ad0a4 100644
--- a/net/mac80211/util.c
+++ b/net/mac80211/util.c
@@ -908,16 +908,24 @@ int ieee80211_build_preq_ies(struct ieee80211_local *local, u8 *buffer,
 	}
 
 	if (sband->ht_cap.ht_supported) {
-		__le16 tmp = cpu_to_le16(sband->ht_cap.cap);
+		u16 cap = sband->ht_cap.cap;
+		__le16 tmp;
+
+		if (ieee80211_disable_40mhz_24ghz &&
+		    sband->band == IEEE80211_BAND_2GHZ) {
+			cap &= ~IEEE80211_HT_CAP_SUP_WIDTH_20_40;
+			cap &= ~IEEE80211_HT_CAP_SGI_40;
+		}
 
 		*pos++ = WLAN_EID_HT_CAPABILITY;
 		*pos++ = sizeof(struct ieee80211_ht_cap);
 		memset(pos, 0, sizeof(struct ieee80211_ht_cap));
+		tmp = cpu_to_le16(cap);
 		memcpy(pos, &tmp, sizeof(u16));
 		pos += sizeof(u16);
-		/* TODO: needs a define here for << 2 */
 		*pos++ = sband->ht_cap.ampdu_factor |
-			 (sband->ht_cap.ampdu_density << 2);
+			 (sband->ht_cap.ampdu_density <<
+				IEEE80211_HT_AMPDU_PARM_DENSITY_SHIFT);
 		memcpy(pos, &sband->ht_cap.mcs, sizeof(sband->ht_cap.mcs));
 		pos += sizeof(sband->ht_cap.mcs);
 		pos += 2 + 4 + 1; /* ext info, BF cap, antsel */
-- 
cgit v1.2.2


From 5fba4af32ceeb935b3926714df9a64a33c2c9cf5 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes@sipsolutions.net>
Date: Wed, 2 Dec 2009 12:43:42 +0100
Subject: cfg80211: avoid sending spurious deauth to userspace

Before
  commit ca9034592823e8179511e48a78731f95bfdd766c
  Author: Holger Schurig <hs4233@mail.mn-solutions.de>
  Date:   Tue Oct 13 13:45:28 2009 +0200

      cfg80211: remove warning in deauth case

we assumed that drivers never give us spurious deauth
frames because they filter them out based on the auth
state they keep track of. This turned out to be racy,
because userspace might deauth while the AP is also
sending a deauth frame, so the warning was removed.

However, in that case we should not tell userspace
about the AP's frame if it requested deauth "first",
where "first" means it came to cfg80211 first.

Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/wireless/mlme.c | 11 +++++++++--
 1 file changed, 9 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/wireless/mlme.c b/net/wireless/mlme.c
index 1001db4912f7..acaeaa784d68 100644
--- a/net/wireless/mlme.c
+++ b/net/wireless/mlme.c
@@ -137,22 +137,23 @@ void __cfg80211_send_deauth(struct net_device *dev,
 	struct ieee80211_mgmt *mgmt = (struct ieee80211_mgmt *)buf;
 	const u8 *bssid = mgmt->bssid;
 	int i;
+	bool found = false;
 
 	ASSERT_WDEV_LOCK(wdev);
 
-	nl80211_send_deauth(rdev, dev, buf, len, GFP_KERNEL);
-
 	if (wdev->current_bss &&
 	    memcmp(wdev->current_bss->pub.bssid, bssid, ETH_ALEN) == 0) {
 		cfg80211_unhold_bss(wdev->current_bss);
 		cfg80211_put_bss(&wdev->current_bss->pub);
 		wdev->current_bss = NULL;
+		found = true;
 	} else for (i = 0; i < MAX_AUTH_BSSES; i++) {
 		if (wdev->auth_bsses[i] &&
 		    memcmp(wdev->auth_bsses[i]->pub.bssid, bssid, ETH_ALEN) == 0) {
 			cfg80211_unhold_bss(wdev->auth_bsses[i]);
 			cfg80211_put_bss(&wdev->auth_bsses[i]->pub);
 			wdev->auth_bsses[i] = NULL;
+			found = true;
 			break;
 		}
 		if (wdev->authtry_bsses[i] &&
@@ -160,10 +161,16 @@ void __cfg80211_send_deauth(struct net_device *dev,
 			cfg80211_unhold_bss(wdev->authtry_bsses[i]);
 			cfg80211_put_bss(&wdev->authtry_bsses[i]->pub);
 			wdev->authtry_bsses[i] = NULL;
+			found = true;
 			break;
 		}
 	}
 
+	if (!found)
+		return;
+
+	nl80211_send_deauth(rdev, dev, buf, len, GFP_KERNEL);
+
 	if (wdev->sme_state == CFG80211_SME_CONNECTED) {
 		u16 reason_code;
 		bool from_ap;
-- 
cgit v1.2.2


From 5d1ec85f00e999dba61bdcbd959d62439d418e56 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes@sipsolutions.net>
Date: Wed, 2 Dec 2009 12:43:43 +0100
Subject: mac80211: dont try to use existing sta for AP

Clean out some cruft that could use an already existing
sta_info struct -- that case cannot happen. Also, there's
a bug there -- if allocation/insertion fails then it is
possible that we are left in a lingering state where
mac80211 waits for the AP, cfg80211 waits for mac80211,
but the AP has already replied. Since there's no way to
indicate an internal error, pretend there was a timeout,
i.e. that the AP never responded.

Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/mlme.c | 57 ++++++++++++++++++++++-------------------------------
 1 file changed, 24 insertions(+), 33 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index a7472c979c63..5174bfc5710d 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -75,6 +75,9 @@ enum rx_mgmt_action {
 	/* caller must call cfg80211_send_disassoc() */
 	RX_MGMT_CFG80211_DISASSOC,
 
+	/* caller must tell cfg80211 about internal error */
+	RX_MGMT_CFG80211_ASSOC_ERROR,
+
 	/* caller must call cfg80211_auth_timeout() & free work */
 	RX_MGMT_CFG80211_AUTH_TO,
 
@@ -1479,8 +1482,8 @@ ieee80211_rx_mgmt_assoc_resp(struct ieee80211_sub_if_data *sdata,
 	struct ieee80211_bss_conf *bss_conf = &sdata->vif.bss_conf;
 	u8 *pos;
 	u32 changed = 0;
-	int i, j;
-	bool have_higher_than_11mbit = false, newsta = false;
+	int i, j, err;
+	bool have_higher_than_11mbit = false;
 	u16 ap_ht_cap_flags;
 
 	/*
@@ -1542,30 +1545,18 @@ ieee80211_rx_mgmt_assoc_resp(struct ieee80211_sub_if_data *sdata,
 	printk(KERN_DEBUG "%s: associated\n", sdata->name);
 	ifmgd->aid = aid;
 
-	rcu_read_lock();
-
-	/* Add STA entry for the AP */
-	sta = sta_info_get(sdata, wk->bss->cbss.bssid);
+	sta = sta_info_alloc(sdata, wk->bss->cbss.bssid, GFP_KERNEL);
 	if (!sta) {
-		newsta = true;
-
-		rcu_read_unlock();
-
-		sta = sta_info_alloc(sdata, wk->bss->cbss.bssid, GFP_KERNEL);
-		if (!sta) {
-			printk(KERN_DEBUG "%s: failed to alloc STA entry for"
-			       " the AP\n", sdata->name);
-			return RX_MGMT_NONE;
-		}
-
-		set_sta_flags(sta, WLAN_STA_AUTH | WLAN_STA_ASSOC |
-				   WLAN_STA_ASSOC_AP);
-		if (!(ifmgd->flags & IEEE80211_STA_CONTROL_PORT))
-			set_sta_flags(sta, WLAN_STA_AUTHORIZED);
-
-		rcu_read_lock();
+		printk(KERN_DEBUG "%s: failed to alloc STA entry for"
+		       " the AP\n", sdata->name);
+		return RX_MGMT_CFG80211_ASSOC_ERROR;
 	}
 
+	set_sta_flags(sta, WLAN_STA_AUTH | WLAN_STA_ASSOC |
+			   WLAN_STA_ASSOC_AP);
+	if (!(ifmgd->flags & IEEE80211_STA_CONTROL_PORT))
+		set_sta_flags(sta, WLAN_STA_AUTHORIZED);
+
 	rates = 0;
 	basic_rates = 0;
 	sband = local->hw.wiphy->bands[local->hw.conf.channel->band];
@@ -1628,18 +1619,14 @@ ieee80211_rx_mgmt_assoc_resp(struct ieee80211_sub_if_data *sdata,
 	if (elems.wmm_param)
 		set_sta_flags(sta, WLAN_STA_WME);
 
-	if (newsta) {
-		int err = sta_info_insert(sta);
-		if (err) {
-			printk(KERN_DEBUG "%s: failed to insert STA entry for"
-			       " the AP (error %d)\n", sdata->name, err);
-			rcu_read_unlock();
-			return RX_MGMT_NONE;
-		}
+	err = sta_info_insert(sta);
+	sta = NULL;
+	if (err) {
+		printk(KERN_DEBUG "%s: failed to insert STA entry for"
+		       " the AP (error %d)\n", sdata->name, err);
+		return RX_MGMT_CFG80211_ASSOC_ERROR;
 	}
 
-	rcu_read_unlock();
-
 	if (elems.wmm_param)
 		ieee80211_sta_wmm_params(local, ifmgd, elems.wmm_param,
 					 elems.wmm_param_len);
@@ -2084,6 +2071,10 @@ static void ieee80211_sta_rx_queued_mgmt(struct ieee80211_sub_if_data *sdata,
 	case RX_MGMT_CFG80211_DEAUTH:
 		cfg80211_send_deauth(sdata->dev, (u8 *)mgmt, skb->len);
 		break;
+	case RX_MGMT_CFG80211_ASSOC_ERROR:
+		/* an internal error -- pretend timeout for now */
+		cfg80211_send_assoc_timeout(sdata->dev, mgmt->bssid);
+		break;
 	default:
 		WARN(1, "unexpected: %d", rma);
 	}
-- 
cgit v1.2.2


From 2c7e6bc9ac7cb518cf037495932d80f71a1596f2 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes@sipsolutions.net>
Date: Fri, 4 Dec 2009 09:26:38 +0100
Subject: mac80211: disallow fixing bitrates with hw rate control

When hw rate control is used, these parameters have
no meaning because the hardware cannot get at them
right now, so disallow setting them. Also clean up
the function a bit.

Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/cfg.c | 36 ++++++++++++++++++++++--------------
 1 file changed, 22 insertions(+), 14 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c
index 8c35418d1c96..f07c4abefe56 100644
--- a/net/mac80211/cfg.c
+++ b/net/mac80211/cfg.c
@@ -1399,15 +1399,25 @@ static int ieee80211_set_bitrate_mask(struct wiphy *wiphy,
 {
 	struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
 	struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr);
-	int i, err = -EINVAL;
+	int i;
 	u32 target_rate;
 	struct ieee80211_supported_band *sband;
 
+	/*
+	 * This _could_ be supported by providing a hook for
+	 * drivers for this function, but at this point it
+	 * doesn't seem worth bothering.
+	 */
+	if (local->hw.flags & IEEE80211_HW_HAS_RATE_CONTROL)
+		return -EOPNOTSUPP;
+
 	sband = local->hw.wiphy->bands[local->hw.conf.channel->band];
 
-	/* target_rate = -1, rate->fixed = 0 means auto only, so use all rates
+	/*
+	 * target_rate = -1, rate->fixed = 0 means auto only, so use all rates
 	 * target_rate = X, rate->fixed = 1 means only rate X
-	 * target_rate = X, rate->fixed = 0 means all rates <= X */
+	 * target_rate = X, rate->fixed = 0 means all rates <= X
+	 */
 	sdata->max_ratectrl_rateidx = -1;
 	sdata->force_unicast_rateidx = -1;
 
@@ -1418,20 +1428,18 @@ static int ieee80211_set_bitrate_mask(struct wiphy *wiphy,
 	else
 		return 0;
 
-	for (i=0; i< sband->n_bitrates; i++) {
-		struct ieee80211_rate *brate = &sband->bitrates[i];
-		int this_rate = brate->bitrate;
+	for (i = 0; i< sband->n_bitrates; i++) {
+		if (target_rate != sband->bitrates[i].bitrate)
+			continue;
 
-		if (target_rate == this_rate) {
-			sdata->max_ratectrl_rateidx = i;
-			if (mask->fixed)
-				sdata->force_unicast_rateidx = i;
-			err = 0;
-			break;
-		}
+		/* requested bitrate found */
+		sdata->max_ratectrl_rateidx = i;
+		if (mask->fixed)
+			sdata->force_unicast_rateidx = i;
+		return 0;
 	}
 
-	return err;
+	return -EINVAL;
 }
 
 struct cfg80211_ops mac80211_config_ops = {
-- 
cgit v1.2.2


From cb136f54ee11f9ead716b5ad0b10fa1c1b05271d Mon Sep 17 00:00:00 2001
From: "Luis R. Rodriguez" <lrodriguez@atheros.com>
Date: Mon, 7 Dec 2009 14:01:28 -0500
Subject: mac80211: make debugfs mcs set entry reflect 16 bits

The MCS set is 16 bits so when debugging ensure the full 16 bits
are represented. Current reading would make you think its only
8 bits.

Cc: johannes@sipsolutions.net
Signed-off-by: Luis R. Rodriguez <lrodriguez@atheros.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/debugfs_sta.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/mac80211/debugfs_sta.c b/net/mac80211/debugfs_sta.c
index 374ff6f98a9c..9a1d1e40eca8 100644
--- a/net/mac80211/debugfs_sta.c
+++ b/net/mac80211/debugfs_sta.c
@@ -168,7 +168,7 @@ static ssize_t sta_ht_capa_read(struct file *file, char __user *userbuf,
 	p += scnprintf(p, sizeof(buf) + buf - p, "ht %ssupported\n",
 			htc->ht_supported ? "" : "not ");
 	if (htc->ht_supported) {
-		p += scnprintf(p, sizeof(buf)+buf-p, "cap: %#.2x\n", htc->cap);
+		p += scnprintf(p, sizeof(buf)+buf-p, "cap: %#.4x\n", htc->cap);
 		p += scnprintf(p, sizeof(buf)+buf-p, "ampdu factor/density: %d/%d\n",
 				htc->ampdu_factor, htc->ampdu_density);
 		p += scnprintf(p, sizeof(buf)+buf-p, "MCS mask:");
-- 
cgit v1.2.2


From 7db94e210351e8578d4a98fed3edd4df5f10ae9d Mon Sep 17 00:00:00 2001
From: "Luis R. Rodriguez" <lrodriguez@atheros.com>
Date: Mon, 7 Dec 2009 14:54:45 -0500
Subject: mac80211: parse the HT capabilities info through debugfs

When debugging you want to be lazy and not have to parse
bits yourself so let mac80211 debugfs do the parsing for you.

This is what I get against my WRT610N:

root@tux:~# cat /sys/kernel/debug/ieee80211/phy0/stations/00\:22\:6b\:aa\:bb\:01/ht_capa
ht supported
cap: 0x000e
	HT20/HT40
	SM Power Save disabled
	No RX STBC
	Max AMSDU length: 7935 bytes
	No DSSS/CCK HT40
ampdu factor/density: 2/6
MCS mask: ff ff 00 00 00 00 00 00 00 00
MCS rx highest: 0
MCS tx params: 0

Cc: johannes@sipsolutions.net
Signed-off-by: Luis R. Rodriguez <lrodriguez@atheros.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/debugfs_sta.c | 48 +++++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 47 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/mac80211/debugfs_sta.c b/net/mac80211/debugfs_sta.c
index 9a1d1e40eca8..c833b6ce9902 100644
--- a/net/mac80211/debugfs_sta.c
+++ b/net/mac80211/debugfs_sta.c
@@ -160,7 +160,12 @@ STA_OPS(agg_status);
 static ssize_t sta_ht_capa_read(struct file *file, char __user *userbuf,
 				size_t count, loff_t *ppos)
 {
-	char buf[200], *p = buf;
+#define PRINT_HT_CAP(_cond, _str) \
+	do { \
+	if (_cond) \
+			p += scnprintf(p, sizeof(buf)+buf-p, "\t" _str "\n"); \
+	} while (0)
+	char buf[1024], *p = buf;
 	int i;
 	struct sta_info *sta = file->private_data;
 	struct ieee80211_sta_ht_cap *htc = &sta->sta.ht_cap;
@@ -169,6 +174,47 @@ static ssize_t sta_ht_capa_read(struct file *file, char __user *userbuf,
 			htc->ht_supported ? "" : "not ");
 	if (htc->ht_supported) {
 		p += scnprintf(p, sizeof(buf)+buf-p, "cap: %#.4x\n", htc->cap);
+
+		PRINT_HT_CAP((htc->cap & BIT(0)), "RX LDCP");
+		PRINT_HT_CAP((htc->cap & BIT(1)), "HT20/HT40");
+		PRINT_HT_CAP(!(htc->cap & BIT(1)), "HT20");
+
+		PRINT_HT_CAP(((htc->cap >> 2) & 0x3) == 0, "Static SM Power Save");
+		PRINT_HT_CAP(((htc->cap >> 2) & 0x3) == 1, "Dynamic SM Power Save");
+		PRINT_HT_CAP(((htc->cap >> 2) & 0x3) == 3, "SM Power Save disabled");
+
+		PRINT_HT_CAP((htc->cap & BIT(4)), "RX Greenfield");
+		PRINT_HT_CAP((htc->cap & BIT(5)), "RX HT20 SGI");
+		PRINT_HT_CAP((htc->cap & BIT(6)), "RX HT40 SGI");
+		PRINT_HT_CAP((htc->cap & BIT(7)), "TX STBC");
+
+		PRINT_HT_CAP(((htc->cap >> 8) & 0x3) == 0, "No RX STBC");
+		PRINT_HT_CAP(((htc->cap >> 8) & 0x3) == 1, "RX STBC 1-stream");
+		PRINT_HT_CAP(((htc->cap >> 8) & 0x3) == 2, "RX STBC 2-streams");
+		PRINT_HT_CAP(((htc->cap >> 8) & 0x3) == 3, "RX STBC 3-streams");
+
+		PRINT_HT_CAP((htc->cap & BIT(10)), "HT Delayed Block Ack");
+
+		PRINT_HT_CAP((htc->cap & BIT(11)), "Max AMSDU length: "
+			     "3839 bytes");
+		PRINT_HT_CAP(!(htc->cap & BIT(11)), "Max AMSDU length: "
+			     "7935 bytes");
+
+		/*
+		 * For beacons and probe response this would mean the BSS
+		 * does or does not allow the usage of DSSS/CCK HT40.
+		 * Otherwise it means the STA does or does not use
+		 * DSSS/CCK HT40.
+		 */
+		PRINT_HT_CAP((htc->cap & BIT(12)), "DSSS/CCK HT40");
+		PRINT_HT_CAP(!(htc->cap & BIT(12)), "No DSSS/CCK HT40");
+
+		/* BIT(13) is reserved */
+
+		PRINT_HT_CAP((htc->cap & BIT(14)), "40 MHz Intolerant");
+
+		PRINT_HT_CAP((htc->cap & BIT(15)), "L-SIG TXOP protection");
+
 		p += scnprintf(p, sizeof(buf)+buf-p, "ampdu factor/density: %d/%d\n",
 				htc->ampdu_factor, htc->ampdu_density);
 		p += scnprintf(p, sizeof(buf)+buf-p, "MCS mask:");
-- 
cgit v1.2.2


From 9da3e068142ec7856b2f13261dcf0660fad32b61 Mon Sep 17 00:00:00 2001
From: "Luis R. Rodriguez" <lrodriguez@atheros.com>
Date: Mon, 7 Dec 2009 15:57:50 -0500
Subject: mac80211: only bother printing highest data rate on debugfs if its
 set

IEEE-802.11n spec says the RX highest data rate field does
not specify the highest supported RX data rate if its not set.
Ignore it if not set then. Refer to section 7.3.56.4

Cc: johannes@sipsolutions.net
Signed-off-by: Luis R. Rodriguez <lrodriguez@atheros.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/debugfs_sta.c | 12 ++++++++++--
 1 file changed, 10 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/debugfs_sta.c b/net/mac80211/debugfs_sta.c
index c833b6ce9902..0d4a759ba72c 100644
--- a/net/mac80211/debugfs_sta.c
+++ b/net/mac80211/debugfs_sta.c
@@ -218,11 +218,19 @@ static ssize_t sta_ht_capa_read(struct file *file, char __user *userbuf,
 		p += scnprintf(p, sizeof(buf)+buf-p, "ampdu factor/density: %d/%d\n",
 				htc->ampdu_factor, htc->ampdu_density);
 		p += scnprintf(p, sizeof(buf)+buf-p, "MCS mask:");
+
 		for (i = 0; i < IEEE80211_HT_MCS_MASK_LEN; i++)
 			p += scnprintf(p, sizeof(buf)+buf-p, " %.2x",
 					htc->mcs.rx_mask[i]);
-		p += scnprintf(p, sizeof(buf)+buf-p, "\nMCS rx highest: %d\n",
-				le16_to_cpu(htc->mcs.rx_highest));
+		p += scnprintf(p, sizeof(buf)+buf-p, "\n");
+
+		/* If not set this is meaningless */
+		if (le16_to_cpu(htc->mcs.rx_highest)) {
+			p += scnprintf(p, sizeof(buf)+buf-p,
+				       "MCS rx highest: %d Mbps\n",
+				       le16_to_cpu(htc->mcs.rx_highest));
+		}
+
 		p += scnprintf(p, sizeof(buf)+buf-p, "MCS tx params: %x\n",
 				htc->mcs.tx_params);
 	}
-- 
cgit v1.2.2


From 12d50c46dc0f7fd2e625c4befaa5fa5740a7a594 Mon Sep 17 00:00:00 2001
From: Krishna Kumar <krkumar2@in.ibm.com>
Date: Tue, 8 Dec 2009 22:26:13 +0000
Subject: tcp: Remove check in __tcp_push_pending_frames
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

tcp_push checks tcp_send_head and calls __tcp_push_pending_frames,
which again checks tcp_send_head, and this unnecessary check is
done for every other caller of __tcp_push_pending_frames.

Remove tcp_send_head check in __tcp_push_pending_frames and add
the check to tcp_push_pending_frames. Other functions call
__tcp_push_pending_frames only when tcp_send_head would evaluate
to true.

Signed-off-by: Krishna Kumar <krkumar2@in.ibm.com>
Acked-by: Ilpo Järvinen <ilpo.jarvinen@helsinki.fi>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp_output.c | 5 -----
 1 file changed, 5 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 383ce237640f..12b2af36eab8 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -1794,11 +1794,6 @@ static int tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle,
 void __tcp_push_pending_frames(struct sock *sk, unsigned int cur_mss,
 			       int nonagle)
 {
-	struct sk_buff *skb = tcp_send_head(sk);
-
-	if (!skb)
-		return;
-
 	/* If we are closed, the bytes will have to remain here.
 	 * In time closedown will finish, we empty the write queue and
 	 * all will be happy.
-- 
cgit v1.2.2


From afeca340c078e17ca233b3c68c3c3a70c56bfe1d Mon Sep 17 00:00:00 2001
From: Krishna Kumar <krkumar2@in.ibm.com>
Date: Thu, 10 Dec 2009 07:16:52 +0000
Subject: tcp: Remove unrequired operations in tcp_push()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Remove unrequired operations in tcp_push()

Changelog:
	Removed a temporary skb variable from tcp_push()

Signed-off-by: Krishna Kumar <krkumar2@in.ibm.com>
Acked-by: Ilpo Järvinen <ilpo.jarvinen@helsinki.fi>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp.c | 13 ++++++-------
 1 file changed, 6 insertions(+), 7 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index b0a26bb25e2e..8a3f05adb39b 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -536,8 +536,7 @@ static inline void skb_entail(struct sock *sk, struct sk_buff *skb)
 		tp->nonagle &= ~TCP_NAGLE_PUSH;
 }
 
-static inline void tcp_mark_urg(struct tcp_sock *tp, int flags,
-				struct sk_buff *skb)
+static inline void tcp_mark_urg(struct tcp_sock *tp, int flags)
 {
 	if (flags & MSG_OOB)
 		tp->snd_up = tp->write_seq;
@@ -546,13 +545,13 @@ static inline void tcp_mark_urg(struct tcp_sock *tp, int flags,
 static inline void tcp_push(struct sock *sk, int flags, int mss_now,
 			    int nonagle)
 {
-	struct tcp_sock *tp = tcp_sk(sk);
-
 	if (tcp_send_head(sk)) {
-		struct sk_buff *skb = tcp_write_queue_tail(sk);
+		struct tcp_sock *tp = tcp_sk(sk);
+
 		if (!(flags & MSG_MORE) || forced_push(tp))
-			tcp_mark_push(tp, skb);
-		tcp_mark_urg(tp, flags, skb);
+			tcp_mark_push(tp, tcp_write_queue_tail(sk));
+
+		tcp_mark_urg(tp, flags);
 		__tcp_push_pending_frames(sk, mss_now,
 					  (flags & MSG_MORE) ? TCP_NAGLE_CORK : nonagle);
 	}
-- 
cgit v1.2.2


From def87cf42069a6d4fd42a2ede8f19c620a292568 Mon Sep 17 00:00:00 2001
From: Krishna Kumar <krkumar2@in.ibm.com>
Date: Thu, 10 Dec 2009 07:16:59 +0000
Subject: tcp: Slightly optimize tcp_sendmsg
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Slightly optimize tcp_sendmsg since NETIF_F_SG is used many
times iteratively in the loop. The only other modification is
to change:
			} else if (i == MAX_SKB_FRAGS ||
				   (!i &&
				   !(sk->sk_route_caps & NETIF_F_SG))) {
	to:
			} else if (i == MAX_SKB_FRAGS || !sg) {

The reason why this change is correct: this code (other than
the MAX_SKB_FRAGS case) executes only due to the else part
of: "if (skb_tailroom(skb) > 0) {" - i.e. there was no space
in the skb to put the data inline. Hence SG is false is a
sufficient condition, and there is no way a fragment can be
added to the skb.

Changelog:
	- Added the above explanation for the change

Signed-off-by: Krishna Kumar <krkumar2@in.ibm.com>
Acked-by: Ilpo Järvinen <ilpo.jarvinen@helsinki.fi>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp.c | 17 +++++++++--------
 1 file changed, 9 insertions(+), 8 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 8a3f05adb39b..d5d69ea8f249 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -876,12 +876,12 @@ ssize_t tcp_sendpage(struct socket *sock, struct page *page, int offset,
 #define TCP_PAGE(sk)	(sk->sk_sndmsg_page)
 #define TCP_OFF(sk)	(sk->sk_sndmsg_off)
 
-static inline int select_size(struct sock *sk)
+static inline int select_size(struct sock *sk, int sg)
 {
 	struct tcp_sock *tp = tcp_sk(sk);
 	int tmp = tp->mss_cache;
 
-	if (sk->sk_route_caps & NETIF_F_SG) {
+	if (sg) {
 		if (sk_can_gso(sk))
 			tmp = 0;
 		else {
@@ -905,7 +905,7 @@ int tcp_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg,
 	struct sk_buff *skb;
 	int iovlen, flags;
 	int mss_now, size_goal;
-	int err, copied;
+	int sg, err, copied;
 	long timeo;
 
 	lock_sock(sk);
@@ -933,6 +933,8 @@ int tcp_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg,
 	if (sk->sk_err || (sk->sk_shutdown & SEND_SHUTDOWN))
 		goto out_err;
 
+	sg = sk->sk_route_caps & NETIF_F_SG;
+
 	while (--iovlen >= 0) {
 		int seglen = iov->iov_len;
 		unsigned char __user *from = iov->iov_base;
@@ -958,8 +960,9 @@ new_segment:
 				if (!sk_stream_memory_free(sk))
 					goto wait_for_sndbuf;
 
-				skb = sk_stream_alloc_skb(sk, select_size(sk),
-						sk->sk_allocation);
+				skb = sk_stream_alloc_skb(sk,
+							  select_size(sk, sg),
+							  sk->sk_allocation);
 				if (!skb)
 					goto wait_for_memory;
 
@@ -996,9 +999,7 @@ new_segment:
 					/* We can extend the last page
 					 * fragment. */
 					merge = 1;
-				} else if (i == MAX_SKB_FRAGS ||
-					   (!i &&
-					   !(sk->sk_route_caps & NETIF_F_SG))) {
+				} else if (i == MAX_SKB_FRAGS || !sg) {
 					/* Need to add new fragment and cannot
 					 * do this because interface is non-SG,
 					 * or because all the page slots are
-- 
cgit v1.2.2


From 068a2de57ddf4f472e32e7af868613c574ad1d88 Mon Sep 17 00:00:00 2001
From: Krishna Kumar <krkumar2@in.ibm.com>
Date: Wed, 9 Dec 2009 20:59:58 +0000
Subject: net: release dst entry while cache-hot for GSO case too

Non-GSO code drops dst entry for performance reasons, but
the same is missing for GSO code. Drop dst while cache-hot
for GSO case too.

Signed-off-by: Krishna Kumar <krkumar2@in.ibm.com>
Acked-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/dev.c | 8 ++++++++
 1 file changed, 8 insertions(+)

(limited to 'net')

diff --git a/net/core/dev.c b/net/core/dev.c
index be9924f60ec3..a8d68cdedbbe 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -1853,6 +1853,14 @@ gso:
 
 		skb->next = nskb->next;
 		nskb->next = NULL;
+
+		/*
+		 * If device doesnt need nskb->dst, release it right now while
+		 * its hot in this cpu cache
+		 */
+		if (dev->priv_flags & IFF_XMIT_DST_RELEASE)
+			skb_dst_drop(nskb);
+
 		rc = ops->ndo_start_xmit(nskb, dev);
 		if (unlikely(rc != NETDEV_TX_OK)) {
 			if (rc & ~NETDEV_TX_MASK)
-- 
cgit v1.2.2


From 31d12926e37291970dd4f6e9940df3897766a81d Mon Sep 17 00:00:00 2001
From: laurent chavey <chavey@google.com>
Date: Tue, 15 Dec 2009 11:15:28 +0000
Subject: net: Add rtnetlink init_rcvwnd to set the TCP initial receive window

Add rtnetlink init_rcvwnd to set the TCP initial receive window size
advertised by passive and active TCP connections.
The current Linux TCP implementation limits the advertised TCP initial
receive window to the one prescribed by slow start. For short lived
TCP connections used for transaction type of traffic (i.e. http
requests), bounding the advertised TCP initial receive window results
in increased latency to complete the transaction.
Support for setting initial congestion window is already supported
using rtnetlink init_cwnd, but the feature is useless without the
ability to set a larger TCP initial receive window.
The rtnetlink init_rcvwnd allows increasing the TCP initial receive
window, allowing TCP connection to advertise larger TCP receive window
than the ones bounded by slow start.

Signed-off-by: Laurent Chavey <chavey@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/syncookies.c |  3 ++-
 net/ipv4/tcp_output.c | 17 +++++++++++++----
 net/ipv6/syncookies.c |  3 ++-
 3 files changed, 17 insertions(+), 6 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c
index 66fd80ef2473..5c24db4a3c91 100644
--- a/net/ipv4/syncookies.c
+++ b/net/ipv4/syncookies.c
@@ -358,7 +358,8 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb,
 
 	tcp_select_initial_window(tcp_full_space(sk), req->mss,
 				  &req->rcv_wnd, &req->window_clamp,
-				  ireq->wscale_ok, &rcv_wscale);
+				  ireq->wscale_ok, &rcv_wscale,
+				  dst_metric(&rt->u.dst, RTAX_INITRWND));
 
 	ireq->rcv_wscale  = rcv_wscale;
 
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 12b2af36eab8..4a1605d3f909 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -183,7 +183,8 @@ static inline void tcp_event_ack_sent(struct sock *sk, unsigned int pkts)
  */
 void tcp_select_initial_window(int __space, __u32 mss,
 			       __u32 *rcv_wnd, __u32 *window_clamp,
-			       int wscale_ok, __u8 *rcv_wscale)
+			       int wscale_ok, __u8 *rcv_wscale,
+			       __u32 init_rcv_wnd)
 {
 	unsigned int space = (__space < 0 ? 0 : __space);
 
@@ -232,7 +233,13 @@ void tcp_select_initial_window(int __space, __u32 mss,
 			init_cwnd = 2;
 		else if (mss > 1460)
 			init_cwnd = 3;
-		if (*rcv_wnd > init_cwnd * mss)
+		/* when initializing use the value from init_rcv_wnd
+		 * rather than the default from above
+		 */
+		if (init_rcv_wnd &&
+		    (*rcv_wnd > init_rcv_wnd * mss))
+			*rcv_wnd = init_rcv_wnd * mss;
+		else if (*rcv_wnd > init_cwnd * mss)
 			*rcv_wnd = init_cwnd * mss;
 	}
 
@@ -2417,7 +2424,8 @@ struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst,
 			&req->rcv_wnd,
 			&req->window_clamp,
 			ireq->wscale_ok,
-			&rcv_wscale);
+			&rcv_wscale,
+			dst_metric(dst, RTAX_INITRWND));
 		ireq->rcv_wscale = rcv_wscale;
 	}
 
@@ -2544,7 +2552,8 @@ static void tcp_connect_init(struct sock *sk)
 				  &tp->rcv_wnd,
 				  &tp->window_clamp,
 				  sysctl_tcp_window_scaling,
-				  &rcv_wscale);
+				  &rcv_wscale,
+				  dst_metric(dst, RTAX_INITRWND));
 
 	tp->rx_opt.rcv_wscale = rcv_wscale;
 	tp->rcv_ssthresh = tp->rcv_wnd;
diff --git a/net/ipv6/syncookies.c b/net/ipv6/syncookies.c
index 7208a06576c6..34d1f0690d7e 100644
--- a/net/ipv6/syncookies.c
+++ b/net/ipv6/syncookies.c
@@ -269,7 +269,8 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb)
 	req->window_clamp = tp->window_clamp ? :dst_metric(dst, RTAX_WINDOW);
 	tcp_select_initial_window(tcp_full_space(sk), req->mss,
 				  &req->rcv_wnd, &req->window_clamp,
-				  ireq->wscale_ok, &rcv_wscale);
+				  ireq->wscale_ok, &rcv_wscale,
+				  dst_metric(dst, RTAX_INITRWND));
 
 	ireq->rcv_wscale = rcv_wscale;
 
-- 
cgit v1.2.2


From f83d664eef180478c2dc0a0099e9d7bc1c8177ff Mon Sep 17 00:00:00 2001
From: "John W. Linville" <linville@tuxdriver.com>
Date: Fri, 25 Dec 2009 16:35:43 -0800
Subject: wireless: fix comments in genregdb.awk

Apparently some awk versions choke on C-style comments -- who knew? :-)

Signed-off-by: John W. Linville <linville@tuxdriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/wireless/genregdb.awk | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/wireless/genregdb.awk b/net/wireless/genregdb.awk
index 8316cf075ce9..3cc9e69880a8 100644
--- a/net/wireless/genregdb.awk
+++ b/net/wireless/genregdb.awk
@@ -26,11 +26,11 @@ BEGIN {
 }
 
 /^[ \t]*#/ {
-	/* Ignore */
+	# Ignore
 }
 
 !active && /^[ \t]*$/ {
-	/* Ignore */
+	# Ignore
 }
 
 !active && /country/ {
-- 
cgit v1.2.2


From bf9ae5386bca8836c16e69ab8fdbe46767d7452a Mon Sep 17 00:00:00 2001
From: Octavian Purdila <opurdila@ixiacom.com>
Date: Sat, 26 Dec 2009 11:50:59 +0000
Subject: llc: use dev_hard_header

Using dev_hard_header allows us to use LLC with VLANs and potentially
other Ethernet/TokernRing specific encapsulations. It also removes code
duplication between LLC and Ethernet/TokenRing core code.

Signed-off-by: Octavian Purdila <opurdila@ixiacom.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/8021q/vlan_dev.c |  7 +++----
 net/ethernet/eth.c   |  6 +++---
 net/llc/llc_output.c | 45 ++++++++-------------------------------------
 3 files changed, 14 insertions(+), 44 deletions(-)

(limited to 'net')

diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c
index b7889782047e..77a49ffdd0ef 100644
--- a/net/8021q/vlan_dev.c
+++ b/net/8021q/vlan_dev.c
@@ -263,11 +263,10 @@ static int vlan_dev_hard_header(struct sk_buff *skb, struct net_device *dev,
 		vhdr->h_vlan_TCI = htons(vlan_tci);
 
 		/*
-		 *  Set the protocol type. For a packet of type ETH_P_802_3 we
-		 *  put the length in here instead. It is up to the 802.2
-		 *  layer to carry protocol information.
+		 *  Set the protocol type. For a packet of type ETH_P_802_3/2 we
+		 *  put the length in here instead.
 		 */
-		if (type != ETH_P_802_3)
+		if (type != ETH_P_802_3 && type != ETH_P_802_2)
 			vhdr->h_vlan_encapsulated_proto = htons(type);
 		else
 			vhdr->h_vlan_encapsulated_proto = htons(len);
diff --git a/net/ethernet/eth.c b/net/ethernet/eth.c
index dd3db88f8f0a..205a1c12f3c0 100644
--- a/net/ethernet/eth.c
+++ b/net/ethernet/eth.c
@@ -73,8 +73,8 @@ __setup("ether=", netdev_boot_setup);
  * @len:   packet length (<= skb->len)
  *
  *
- * Set the protocol type. For a packet of type ETH_P_802_3 we put the length
- * in here instead. It is up to the 802.2 layer to carry protocol information.
+ * Set the protocol type. For a packet of type ETH_P_802_3/2 we put the length
+ * in here instead.
  */
 int eth_header(struct sk_buff *skb, struct net_device *dev,
 	       unsigned short type,
@@ -82,7 +82,7 @@ int eth_header(struct sk_buff *skb, struct net_device *dev,
 {
 	struct ethhdr *eth = (struct ethhdr *)skb_push(skb, ETH_HLEN);
 
-	if (type != ETH_P_802_3)
+	if (type != ETH_P_802_3 && type != ETH_P_802_2)
 		eth->h_proto = htons(type);
 	else
 		eth->h_proto = htons(len);
diff --git a/net/llc/llc_output.c b/net/llc/llc_output.c
index 754f4fedc852..b38a1079a98e 100644
--- a/net/llc/llc_output.c
+++ b/net/llc/llc_output.c
@@ -33,48 +33,19 @@
 int llc_mac_hdr_init(struct sk_buff *skb,
 		     const unsigned char *sa, const unsigned char *da)
 {
-	int rc = 0;
+	int rc = -EINVAL;
 
 	switch (skb->dev->type) {
-#ifdef CONFIG_TR
-	case ARPHRD_IEEE802_TR: {
-		struct net_device *dev = skb->dev;
-		struct trh_hdr *trh;
-
-		skb_push(skb, sizeof(*trh));
-		skb_reset_mac_header(skb);
-		trh = tr_hdr(skb);
-		trh->ac = AC;
-		trh->fc = LLC_FRAME;
-		if (sa)
-			memcpy(trh->saddr, sa, dev->addr_len);
-		else
-			memset(trh->saddr, 0, dev->addr_len);
-		if (da) {
-			memcpy(trh->daddr, da, dev->addr_len);
-			tr_source_route(skb, trh, dev);
-			skb_reset_mac_header(skb);
-		}
-		break;
-	}
-#endif
+	case ARPHRD_IEEE802_TR:
 	case ARPHRD_ETHER:
-	case ARPHRD_LOOPBACK: {
-		unsigned short len = skb->len;
-		struct ethhdr *eth;
-
-		skb_push(skb, sizeof(*eth));
-		skb_reset_mac_header(skb);
-		eth = eth_hdr(skb);
-		eth->h_proto = htons(len);
-		memcpy(eth->h_dest, da, ETH_ALEN);
-		memcpy(eth->h_source, sa, ETH_ALEN);
+	case ARPHRD_LOOPBACK:
+		rc = dev_hard_header(skb, skb->dev, ETH_P_802_2, da, sa,
+				     skb->len);
+		if (rc > 0)
+			rc = 0;
 		break;
-	}
 	default:
-		printk(KERN_WARNING "device type not supported: %d\n",
-		       skb->dev->type);
-		rc = -EINVAL;
+		WARN(1, "device type not supported: %d\n", skb->dev->type);
 	}
 	return rc;
 }
-- 
cgit v1.2.2


From e5cd6fe391aa8c93560bb7ffdfe334cf4d0a02e4 Mon Sep 17 00:00:00 2001
From: Octavian Purdila <opurdila@ixiacom.com>
Date: Sat, 26 Dec 2009 11:51:00 +0000
Subject: llc: add support for LLC_OPT_PKTINFO

Signed-off-by: Octavian Purdila <opurdila@ixiacom.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/llc/af_llc.c | 29 +++++++++++++++++++++++++++++
 1 file changed, 29 insertions(+)

(limited to 'net')

diff --git a/net/llc/af_llc.c b/net/llc/af_llc.c
index 3a66546cad06..ac691fe08076 100644
--- a/net/llc/af_llc.c
+++ b/net/llc/af_llc.c
@@ -47,6 +47,10 @@ static int llc_ui_wait_for_busy_core(struct sock *sk, long timeout);
 #define dprintk(args...)
 #endif
 
+/* Maybe we'll add some more in the future. */
+#define LLC_CMSG_PKTINFO	1
+
+
 /**
  *	llc_ui_next_link_no - return the next unused link number for a sap
  *	@sap: Address of sap to get link number from.
@@ -591,6 +595,20 @@ static int llc_wait_data(struct sock *sk, long timeo)
 	return rc;
 }
 
+static void llc_cmsg_rcv(struct msghdr *msg, struct sk_buff *skb)
+{
+	struct llc_sock *llc = llc_sk(skb->sk);
+
+	if (llc->cmsg_flags & LLC_CMSG_PKTINFO) {
+		struct llc_pktinfo info;
+
+		info.lpi_ifindex = llc_sk(skb->sk)->dev->ifindex;
+		llc_pdu_decode_dsap(skb, &info.lpi_sap);
+		llc_pdu_decode_da(skb, info.lpi_mac);
+		put_cmsg(msg, SOL_LLC, LLC_OPT_PKTINFO, sizeof(info), &info);
+	}
+}
+
 /**
  *	llc_ui_accept - accept a new incoming connection.
  *	@sock: Socket which connections arrive on.
@@ -812,6 +830,8 @@ copy_uaddr:
 		memcpy(uaddr, llc_ui_skb_cb(skb), sizeof(*uaddr));
 		msg->msg_namelen = sizeof(*uaddr);
 	}
+	if (llc_sk(sk)->cmsg_flags)
+		llc_cmsg_rcv(msg, skb);
 	goto out;
 }
 
@@ -1030,6 +1050,12 @@ static int llc_ui_setsockopt(struct socket *sock, int level, int optname,
 			goto out;
 		llc->rw = opt;
 		break;
+	case LLC_OPT_PKTINFO:
+		if (opt)
+			llc->cmsg_flags |= LLC_CMSG_PKTINFO;
+		else
+			llc->cmsg_flags &= ~LLC_CMSG_PKTINFO;
+		break;
 	default:
 		rc = -ENOPROTOOPT;
 		goto out;
@@ -1083,6 +1109,9 @@ static int llc_ui_getsockopt(struct socket *sock, int level, int optname,
 		val = llc->k;				break;
 	case LLC_OPT_RX_WIN:
 		val = llc->rw;				break;
+	case LLC_OPT_PKTINFO:
+		val = (llc->cmsg_flags & LLC_CMSG_PKTINFO) != 0;
+		break;
 	default:
 		rc = -ENOPROTOOPT;
 		goto out;
-- 
cgit v1.2.2


From abf9d537fea225af60762640361af7fb233b3103 Mon Sep 17 00:00:00 2001
From: Octavian Purdila <opurdila@ixiacom.com>
Date: Sat, 26 Dec 2009 11:51:01 +0000
Subject: llc: add support for SO_BINDTODEVICE

Using bind(MAC address) with LLC sockets has O(n) complexity, where n
is the number of interfaces. To overcome this, we add support for
SO_BINDTODEVICE which drops the complexity to O(1).

Signed-off-by: Octavian Purdila <opurdila@ixiacom.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/llc/af_llc.c | 29 +++++++++++++++++++++++++++--
 1 file changed, 27 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/llc/af_llc.c b/net/llc/af_llc.c
index ac691fe08076..c4d1a1da813c 100644
--- a/net/llc/af_llc.c
+++ b/net/llc/af_llc.c
@@ -259,7 +259,14 @@ static int llc_ui_autobind(struct socket *sock, struct sockaddr_llc *addr)
 	if (!sock_flag(sk, SOCK_ZAPPED))
 		goto out;
 	rc = -ENODEV;
-	llc->dev = dev_getfirstbyhwtype(&init_net, addr->sllc_arphrd);
+	if (sk->sk_bound_dev_if) {
+		llc->dev = dev_get_by_index(&init_net, sk->sk_bound_dev_if);
+		if (llc->dev && addr->sllc_arphrd != llc->dev->type) {
+			dev_put(llc->dev);
+			llc->dev = NULL;
+		}
+	} else
+		llc->dev = dev_getfirstbyhwtype(&init_net, addr->sllc_arphrd);
 	if (!llc->dev)
 		goto out;
 	rc = -EUSERS;
@@ -310,7 +317,25 @@ static int llc_ui_bind(struct socket *sock, struct sockaddr *uaddr, int addrlen)
 		goto out;
 	rc = -ENODEV;
 	rtnl_lock();
-	llc->dev = dev_getbyhwaddr(&init_net, addr->sllc_arphrd, addr->sllc_mac);
+	if (sk->sk_bound_dev_if) {
+		llc->dev = dev_get_by_index(&init_net, sk->sk_bound_dev_if);
+		if (llc->dev) {
+			if (!addr->sllc_arphrd)
+				addr->sllc_arphrd = llc->dev->type;
+			if (llc_mac_null(addr->sllc_mac))
+				memcpy(addr->sllc_mac, llc->dev->dev_addr,
+				       IFHWADDRLEN);
+			if (addr->sllc_arphrd != llc->dev->type ||
+			    !llc_mac_match(addr->sllc_mac,
+					   llc->dev->dev_addr)) {
+				rc = -EINVAL;
+				dev_put(llc->dev);
+				llc->dev = NULL;
+			}
+		}
+	} else
+		llc->dev = dev_getbyhwaddr(&init_net, addr->sllc_arphrd,
+					   addr->sllc_mac);
 	rtnl_unlock();
 	if (!llc->dev)
 		goto out;
-- 
cgit v1.2.2


From b76f5a8427ac2928c07fa4ff2144bb8db072c240 Mon Sep 17 00:00:00 2001
From: Octavian Purdila <opurdila@ixiacom.com>
Date: Sat, 26 Dec 2009 11:51:02 +0000
Subject: llc: convert the socket list to RCU locking

For the reclamation phase we use the SLAB_DESTROY_BY_RCU mechanism,
which require some extra checks in the lookup code:

a) If the current socket was released, reallocated & inserted in
another list it will short circuit the iteration for the current list,
thus we need to restart the lookup.

b) If the current socket was released, reallocated & inserted in the
same list we just need to recheck it matches the look-up criteria and
if not we can skip to the next element.

In this case there is no need to restart the lookup, since sockets are
inserted at the start of the list and the worst that will happen is
that we will iterate throught some of the list elements more then
once.

Note that the /proc and multicast delivery was not yet converted to
RCU, it still uses spinlocks for protection.

Signed-off-by: Octavian Purdila <opurdila@ixiacom.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/llc/af_llc.c   |  1 +
 net/llc/llc_conn.c | 93 ++++++++++++++++++++++++++++++++++++------------------
 net/llc/llc_core.c |  5 +--
 net/llc/llc_proc.c | 22 ++++++-------
 net/llc/llc_sap.c  | 66 ++++++++++++++++++++++++--------------
 5 files changed, 120 insertions(+), 67 deletions(-)

(limited to 'net')

diff --git a/net/llc/af_llc.c b/net/llc/af_llc.c
index c4d1a1da813c..f49f3dd6fbd3 100644
--- a/net/llc/af_llc.c
+++ b/net/llc/af_llc.c
@@ -140,6 +140,7 @@ static struct proto llc_proto = {
 	.name	  = "LLC",
 	.owner	  = THIS_MODULE,
 	.obj_size = sizeof(struct llc_sock),
+	.slab_flags = SLAB_DESTROY_BY_RCU,
 };
 
 /**
diff --git a/net/llc/llc_conn.c b/net/llc/llc_conn.c
index c6bab39b018e..77bb3816655e 100644
--- a/net/llc/llc_conn.c
+++ b/net/llc/llc_conn.c
@@ -468,6 +468,19 @@ static int llc_exec_conn_trans_actions(struct sock *sk,
 	return rc;
 }
 
+static inline bool llc_estab_match(const struct llc_sap *sap,
+				   const struct llc_addr *daddr,
+				   const struct llc_addr *laddr,
+				   const struct sock *sk)
+{
+	struct llc_sock *llc = llc_sk(sk);
+
+	return llc->laddr.lsap == laddr->lsap &&
+		llc->daddr.lsap == daddr->lsap &&
+		llc_mac_match(llc->laddr.mac, laddr->mac) &&
+		llc_mac_match(llc->daddr.mac, daddr->mac);
+}
+
 /**
  *	__llc_lookup_established - Finds connection for the remote/local sap/mac
  *	@sap: SAP
@@ -484,23 +497,26 @@ static struct sock *__llc_lookup_established(struct llc_sap *sap,
 					     struct llc_addr *laddr)
 {
 	struct sock *rc;
-	struct hlist_node *node;
-
-	read_lock(&sap->sk_list.lock);
-	sk_for_each(rc, node, &sap->sk_list.list) {
-		struct llc_sock *llc = llc_sk(rc);
-
-		if (llc->laddr.lsap == laddr->lsap &&
-		    llc->daddr.lsap == daddr->lsap &&
-		    llc_mac_match(llc->laddr.mac, laddr->mac) &&
-		    llc_mac_match(llc->daddr.mac, daddr->mac)) {
-			sock_hold(rc);
+	struct hlist_nulls_node *node;
+
+	rcu_read_lock();
+again:
+	sk_nulls_for_each_rcu(rc, node, &sap->sk_list) {
+		if (llc_estab_match(sap, daddr, laddr, rc)) {
+			/* Extra checks required by SLAB_DESTROY_BY_RCU */
+			if (unlikely(!atomic_inc_not_zero(&rc->sk_refcnt)))
+				goto again;
+			if (unlikely(llc_sk(rc)->sap != sap ||
+				     !llc_estab_match(sap, daddr, laddr, rc))) {
+				sock_put(rc);
+				continue;
+			}
 			goto found;
 		}
 	}
 	rc = NULL;
 found:
-	read_unlock(&sap->sk_list.lock);
+	rcu_read_unlock();
 	return rc;
 }
 
@@ -516,6 +532,18 @@ struct sock *llc_lookup_established(struct llc_sap *sap,
 	return sk;
 }
 
+static inline bool llc_listener_match(const struct llc_sap *sap,
+				      const struct llc_addr *laddr,
+				      const struct sock *sk)
+{
+	struct llc_sock *llc = llc_sk(sk);
+
+	return sk->sk_type == SOCK_STREAM && sk->sk_state == TCP_LISTEN &&
+		llc->laddr.lsap == laddr->lsap &&
+		(llc_mac_match(llc->laddr.mac, laddr->mac) ||
+		 llc_mac_null(llc->laddr.mac));
+}
+
 /**
  *	llc_lookup_listener - Finds listener for local MAC + SAP
  *	@sap: SAP
@@ -530,23 +558,26 @@ static struct sock *llc_lookup_listener(struct llc_sap *sap,
 					struct llc_addr *laddr)
 {
 	struct sock *rc;
-	struct hlist_node *node;
-
-	read_lock(&sap->sk_list.lock);
-	sk_for_each(rc, node, &sap->sk_list.list) {
-		struct llc_sock *llc = llc_sk(rc);
-
-		if (rc->sk_type == SOCK_STREAM && rc->sk_state == TCP_LISTEN &&
-		    llc->laddr.lsap == laddr->lsap &&
-		    (llc_mac_match(llc->laddr.mac, laddr->mac) ||
-		     llc_mac_null(llc->laddr.mac))) {
-			sock_hold(rc);
+	struct hlist_nulls_node *node;
+
+	rcu_read_lock();
+again:
+	sk_nulls_for_each_rcu(rc, node, &sap->sk_list) {
+		if (llc_listener_match(sap, laddr, rc)) {
+			/* Extra checks required by SLAB_DESTROY_BY_RCU */
+			if (unlikely(!atomic_inc_not_zero(&rc->sk_refcnt)))
+				goto again;
+			if (unlikely(llc_sk(rc)->sap != sap ||
+				     !llc_listener_match(sap, laddr, rc))) {
+				sock_put(rc);
+				continue;
+			}
 			goto found;
 		}
 	}
 	rc = NULL;
 found:
-	read_unlock(&sap->sk_list.lock);
+	rcu_read_unlock();
 	return rc;
 }
 
@@ -652,10 +683,10 @@ static int llc_find_offset(int state, int ev_type)
 void llc_sap_add_socket(struct llc_sap *sap, struct sock *sk)
 {
 	llc_sap_hold(sap);
-	write_lock_bh(&sap->sk_list.lock);
+	spin_lock_bh(&sap->sk_lock);
 	llc_sk(sk)->sap = sap;
-	sk_add_node(sk, &sap->sk_list.list);
-	write_unlock_bh(&sap->sk_list.lock);
+	sk_nulls_add_node_rcu(sk, &sap->sk_list);
+	spin_unlock_bh(&sap->sk_lock);
 }
 
 /**
@@ -663,14 +694,14 @@ void llc_sap_add_socket(struct llc_sap *sap, struct sock *sk)
  *	@sap: SAP
  *	@sk: socket
  *
- *	This function removes a connection from sk_list.list of a SAP if
+ *	This function removes a connection from sk_list of a SAP if
  *	the connection was in this list.
  */
 void llc_sap_remove_socket(struct llc_sap *sap, struct sock *sk)
 {
-	write_lock_bh(&sap->sk_list.lock);
-	sk_del_node_init(sk);
-	write_unlock_bh(&sap->sk_list.lock);
+	spin_lock_bh(&sap->sk_lock);
+	sk_nulls_del_node_init_rcu(sk);
+	spin_unlock_bh(&sap->sk_lock);
 	llc_sap_put(sap);
 }
 
diff --git a/net/llc/llc_core.c b/net/llc/llc_core.c
index ff4c0ab96a69..5276b9722077 100644
--- a/net/llc/llc_core.c
+++ b/net/llc/llc_core.c
@@ -37,7 +37,8 @@ static struct llc_sap *llc_sap_alloc(void)
 	if (sap) {
 		/* sap->laddr.mac - leave as a null, it's filled by bind */
 		sap->state = LLC_SAP_STATE_ACTIVE;
-		rwlock_init(&sap->sk_list.lock);
+		spin_lock_init(&sap->sk_lock);
+		INIT_HLIST_NULLS_HEAD(&sap->sk_list, 0);
 		atomic_set(&sap->refcnt, 1);
 	}
 	return sap;
@@ -142,7 +143,7 @@ out:
  */
 void llc_sap_close(struct llc_sap *sap)
 {
-	WARN_ON(!hlist_empty(&sap->sk_list.list));
+	WARN_ON(!hlist_nulls_empty(&sap->sk_list));
 	llc_del_sap(sap);
 	kfree(sap);
 }
diff --git a/net/llc/llc_proc.c b/net/llc/llc_proc.c
index be47ac427f6b..6b3d033b3236 100644
--- a/net/llc/llc_proc.c
+++ b/net/llc/llc_proc.c
@@ -34,19 +34,19 @@ static struct sock *llc_get_sk_idx(loff_t pos)
 {
 	struct list_head *sap_entry;
 	struct llc_sap *sap;
-	struct hlist_node *node;
+	struct hlist_nulls_node *node;
 	struct sock *sk = NULL;
 
 	list_for_each(sap_entry, &llc_sap_list) {
 		sap = list_entry(sap_entry, struct llc_sap, node);
 
-		read_lock_bh(&sap->sk_list.lock);
-		sk_for_each(sk, node, &sap->sk_list.list) {
+		spin_lock_bh(&sap->sk_lock);
+		sk_nulls_for_each(sk, node, &sap->sk_list) {
 			if (!pos)
 				goto found;
 			--pos;
 		}
-		read_unlock_bh(&sap->sk_list.lock);
+		spin_unlock_bh(&sap->sk_lock);
 	}
 	sk = NULL;
 found:
@@ -73,25 +73,25 @@ static void *llc_seq_next(struct seq_file *seq, void *v, loff_t *pos)
 		goto out;
 	}
 	sk = v;
-	next = sk_next(sk);
+	next = sk_nulls_next(sk);
 	if (next) {
 		sk = next;
 		goto out;
 	}
 	llc = llc_sk(sk);
 	sap = llc->sap;
-	read_unlock_bh(&sap->sk_list.lock);
+	spin_unlock_bh(&sap->sk_lock);
 	sk = NULL;
 	for (;;) {
 		if (sap->node.next == &llc_sap_list)
 			break;
 		sap = list_entry(sap->node.next, struct llc_sap, node);
-		read_lock_bh(&sap->sk_list.lock);
-		if (!hlist_empty(&sap->sk_list.list)) {
-			sk = sk_head(&sap->sk_list.list);
+		spin_lock_bh(&sap->sk_lock);
+		if (!hlist_nulls_empty(&sap->sk_list)) {
+			sk = sk_nulls_head(&sap->sk_list);
 			break;
 		}
-		read_unlock_bh(&sap->sk_list.lock);
+		spin_unlock_bh(&sap->sk_lock);
 	}
 out:
 	return sk;
@@ -104,7 +104,7 @@ static void llc_seq_stop(struct seq_file *seq, void *v)
 		struct llc_sock *llc = llc_sk(sk);
 		struct llc_sap *sap = llc->sap;
 
-		read_unlock_bh(&sap->sk_list.lock);
+		spin_unlock_bh(&sap->sk_lock);
 	}
 	read_unlock_bh(&llc_sap_list_lock);
 }
diff --git a/net/llc/llc_sap.c b/net/llc/llc_sap.c
index 008de1fc42ca..39760d013ce2 100644
--- a/net/llc/llc_sap.c
+++ b/net/llc/llc_sap.c
@@ -297,6 +297,17 @@ static void llc_sap_rcv(struct llc_sap *sap, struct sk_buff *skb,
 	llc_sap_state_process(sap, skb);
 }
 
+static inline bool llc_dgram_match(const struct llc_sap *sap,
+				   const struct llc_addr *laddr,
+				   const struct sock *sk)
+{
+     struct llc_sock *llc = llc_sk(sk);
+
+     return sk->sk_type == SOCK_DGRAM &&
+	  llc->laddr.lsap == laddr->lsap &&
+	  llc_mac_match(llc->laddr.mac, laddr->mac);
+}
+
 /**
  *	llc_lookup_dgram - Finds dgram socket for the local sap/mac
  *	@sap: SAP
@@ -309,25 +320,41 @@ static struct sock *llc_lookup_dgram(struct llc_sap *sap,
 				     const struct llc_addr *laddr)
 {
 	struct sock *rc;
-	struct hlist_node *node;
-
-	read_lock_bh(&sap->sk_list.lock);
-	sk_for_each(rc, node, &sap->sk_list.list) {
-		struct llc_sock *llc = llc_sk(rc);
-
-		if (rc->sk_type == SOCK_DGRAM &&
-		    llc->laddr.lsap == laddr->lsap &&
-		    llc_mac_match(llc->laddr.mac, laddr->mac)) {
-			sock_hold(rc);
+	struct hlist_nulls_node *node;
+
+	rcu_read_lock_bh();
+again:
+	sk_nulls_for_each_rcu(rc, node, &sap->sk_list) {
+		if (llc_dgram_match(sap, laddr, rc)) {
+			/* Extra checks required by SLAB_DESTROY_BY_RCU */
+			if (unlikely(!atomic_inc_not_zero(&rc->sk_refcnt)))
+				goto again;
+			if (unlikely(llc_sk(rc)->sap != sap ||
+				     !llc_dgram_match(sap, laddr, rc))) {
+				sock_put(rc);
+				continue;
+			}
 			goto found;
 		}
 	}
 	rc = NULL;
 found:
-	read_unlock_bh(&sap->sk_list.lock);
+	rcu_read_unlock_bh();
 	return rc;
 }
 
+static inline bool llc_mcast_match(const struct llc_sap *sap,
+				   const struct llc_addr *laddr,
+				   const struct sk_buff *skb,
+				   const struct sock *sk)
+{
+     struct llc_sock *llc = llc_sk(sk);
+
+     return sk->sk_type == SOCK_DGRAM &&
+	  llc->laddr.lsap == laddr->lsap &&
+	  llc->dev == skb->dev;
+}
+
 /**
  * 	llc_sap_mcast - Deliver multicast PDU's to all matching datagram sockets.
  *	@sap: SAP
@@ -341,20 +368,13 @@ static void llc_sap_mcast(struct llc_sap *sap,
 			  struct sk_buff *skb)
 {
 	struct sock *sk;
-	struct hlist_node *node;
+	struct hlist_nulls_node *node;
 
-	read_lock_bh(&sap->sk_list.lock);
-	sk_for_each(sk, node, &sap->sk_list.list) {
-		struct llc_sock *llc = llc_sk(sk);
+	spin_lock_bh(&sap->sk_lock);
+	sk_nulls_for_each_rcu(sk, node, &sap->sk_list) {
 		struct sk_buff *skb1;
 
-		if (sk->sk_type != SOCK_DGRAM)
-			continue;
-
-		if (llc->laddr.lsap != laddr->lsap)
-			continue;
-
-		if (llc->dev != skb->dev)
+		if (!llc_mcast_match(sap, laddr, skb, sk))
 			continue;
 
 		skb1 = skb_clone(skb, GFP_ATOMIC);
@@ -365,7 +385,7 @@ static void llc_sap_mcast(struct llc_sap *sap,
 		llc_sap_rcv(sap, skb1, sk);
 		sock_put(sk);
 	}
-	read_unlock_bh(&sap->sk_list.lock);
+	spin_unlock_bh(&sap->sk_lock);
 }
 
 
-- 
cgit v1.2.2


From 0f7b67dd9e1192976f5e5a78934c7a339ff7c45f Mon Sep 17 00:00:00 2001
From: Octavian Purdila <opurdila@ixiacom.com>
Date: Sat, 26 Dec 2009 11:51:03 +0000
Subject: llc: optimize multicast delivery

Optimize multicast delivery by doing the actual delivery without
holding the lock. Based on the same approach used in UDP code.

Signed-off-by: Octavian Purdila <opurdila@ixiacom.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/llc/llc_sap.c | 36 ++++++++++++++++++++++++++++--------
 1 file changed, 28 insertions(+), 8 deletions(-)

(limited to 'net')

diff --git a/net/llc/llc_sap.c b/net/llc/llc_sap.c
index 39760d013ce2..94790e60d072 100644
--- a/net/llc/llc_sap.c
+++ b/net/llc/llc_sap.c
@@ -355,6 +355,24 @@ static inline bool llc_mcast_match(const struct llc_sap *sap,
 	  llc->dev == skb->dev;
 }
 
+static void llc_do_mcast(struct llc_sap *sap, struct sk_buff *skb,
+			 struct sock **stack, int count)
+{
+	struct sk_buff *skb1;
+	int i;
+
+	for (i = 0; i < count; i++) {
+		skb1 = skb_clone(skb, GFP_ATOMIC);
+		if (!skb1) {
+			sock_put(stack[i]);
+			continue;
+		}
+
+		llc_sap_rcv(sap, skb1, stack[i]);
+		sock_put(stack[i]);
+	}
+}
+
 /**
  * 	llc_sap_mcast - Deliver multicast PDU's to all matching datagram sockets.
  *	@sap: SAP
@@ -367,25 +385,27 @@ static void llc_sap_mcast(struct llc_sap *sap,
 			  const struct llc_addr *laddr,
 			  struct sk_buff *skb)
 {
-	struct sock *sk;
+	int i = 0, count = 256 / sizeof(struct sock *);
+	struct sock *sk, *stack[count];
 	struct hlist_nulls_node *node;
 
 	spin_lock_bh(&sap->sk_lock);
 	sk_nulls_for_each_rcu(sk, node, &sap->sk_list) {
-		struct sk_buff *skb1;
 
 		if (!llc_mcast_match(sap, laddr, skb, sk))
 			continue;
 
-		skb1 = skb_clone(skb, GFP_ATOMIC);
-		if (!skb1)
-			break;
-
 		sock_hold(sk);
-		llc_sap_rcv(sap, skb1, sk);
-		sock_put(sk);
+		if (i < count)
+			stack[i++] = sk;
+		else {
+			llc_do_mcast(sap, skb, stack, i);
+			i = 0;
+		}
 	}
 	spin_unlock_bh(&sap->sk_lock);
+
+	llc_do_mcast(sap, skb, stack, i);
 }
 
 
-- 
cgit v1.2.2


From 6d2e3ea284463d5ab34e9cf2a41d0b8627b95d02 Mon Sep 17 00:00:00 2001
From: Octavian Purdila <opurdila@ixiacom.com>
Date: Sat, 26 Dec 2009 11:51:04 +0000
Subject: llc: use a device based hash table to speed up multicast delivery

This patch adds a per SAP device based hash table to solve the
multicast delivery scalability issue when we have large number of
interfaces and a large number of sockets bound to the same SAP.

Signed-off-by: Octavian Purdila <opurdila@ixiacom.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/llc/llc_conn.c | 10 +++++++++-
 net/llc/llc_sap.c  |  8 ++++++--
 2 files changed, 15 insertions(+), 3 deletions(-)

(limited to 'net')

diff --git a/net/llc/llc_conn.c b/net/llc/llc_conn.c
index 77bb3816655e..10cdfe2db830 100644
--- a/net/llc/llc_conn.c
+++ b/net/llc/llc_conn.c
@@ -682,10 +682,15 @@ static int llc_find_offset(int state, int ev_type)
  */
 void llc_sap_add_socket(struct llc_sap *sap, struct sock *sk)
 {
+	struct llc_sock *llc = llc_sk(sk);
+	struct hlist_head *dev_hb = llc_sk_dev_hash(sap, llc->dev->ifindex);
+
 	llc_sap_hold(sap);
-	spin_lock_bh(&sap->sk_lock);
 	llc_sk(sk)->sap = sap;
+
+	spin_lock_bh(&sap->sk_lock);
 	sk_nulls_add_node_rcu(sk, &sap->sk_list);
+	hlist_add_head(&llc->dev_hash_node, dev_hb);
 	spin_unlock_bh(&sap->sk_lock);
 }
 
@@ -699,8 +704,11 @@ void llc_sap_add_socket(struct llc_sap *sap, struct sock *sk)
  */
 void llc_sap_remove_socket(struct llc_sap *sap, struct sock *sk)
 {
+	struct llc_sock *llc = llc_sk(sk);
+
 	spin_lock_bh(&sap->sk_lock);
 	sk_nulls_del_node_init_rcu(sk);
+	hlist_del(&llc->dev_hash_node);
 	spin_unlock_bh(&sap->sk_lock);
 	llc_sap_put(sap);
 }
diff --git a/net/llc/llc_sap.c b/net/llc/llc_sap.c
index 94790e60d072..94cb706f6cc4 100644
--- a/net/llc/llc_sap.c
+++ b/net/llc/llc_sap.c
@@ -387,10 +387,14 @@ static void llc_sap_mcast(struct llc_sap *sap,
 {
 	int i = 0, count = 256 / sizeof(struct sock *);
 	struct sock *sk, *stack[count];
-	struct hlist_nulls_node *node;
+	struct hlist_node *node;
+	struct llc_sock *llc;
+	struct hlist_head *dev_hb = llc_sk_dev_hash(sap, skb->dev->ifindex);
 
 	spin_lock_bh(&sap->sk_lock);
-	sk_nulls_for_each_rcu(sk, node, &sap->sk_list) {
+	hlist_for_each_entry(llc, node, dev_hb, dev_hash_node) {
+
+		sk = &llc->sk;
 
 		if (!llc_mcast_match(sap, laddr, skb, sk))
 			continue;
-- 
cgit v1.2.2


From 52d58aef5ee460fedd7f250f05e79081019f2c79 Mon Sep 17 00:00:00 2001
From: Octavian Purdila <opurdila@ixiacom.com>
Date: Sat, 26 Dec 2009 11:51:05 +0000
Subject: llc: replace the socket list with a local address based hash

For the cases where a lot of interfaces are used in conjunction with a
lot of LLC sockets bound to the same SAP, the iteration of the socket
list becomes prohibitively expensive.

Replacing the list with a a local address based hash significantly
improves the bind and listener lookup operations as well as the
datagram delivery.

Connected sockets delivery is also improved, but this patch does not
address the case where we have lots of sockets with the same local
address connected to different remote addresses.

In order to keep the socket sanity checks alive and fast a socket
counter was added to the SAP structure.

Signed-off-by: Octavian Purdila <opurdila@ixiacom.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/llc/llc_conn.c | 70 +++++++++++++++++++++++++++++++++++++++---------------
 net/llc/llc_core.c |  6 +++--
 net/llc/llc_proc.c | 44 +++++++++++++++++++++++-----------
 net/llc/llc_sap.c  | 11 ++++++++-
 4 files changed, 95 insertions(+), 36 deletions(-)

(limited to 'net')

diff --git a/net/llc/llc_conn.c b/net/llc/llc_conn.c
index 10cdfe2db830..a8dde9b010da 100644
--- a/net/llc/llc_conn.c
+++ b/net/llc/llc_conn.c
@@ -498,10 +498,12 @@ static struct sock *__llc_lookup_established(struct llc_sap *sap,
 {
 	struct sock *rc;
 	struct hlist_nulls_node *node;
+	int slot = llc_sk_laddr_hashfn(sap, laddr);
+	struct hlist_nulls_head *laddr_hb = &sap->sk_laddr_hash[slot];
 
 	rcu_read_lock();
 again:
-	sk_nulls_for_each_rcu(rc, node, &sap->sk_list) {
+	sk_nulls_for_each_rcu(rc, node, laddr_hb) {
 		if (llc_estab_match(sap, daddr, laddr, rc)) {
 			/* Extra checks required by SLAB_DESTROY_BY_RCU */
 			if (unlikely(!atomic_inc_not_zero(&rc->sk_refcnt)))
@@ -515,6 +517,13 @@ again:
 		}
 	}
 	rc = NULL;
+	/*
+	 * if the nulls value we got at the end of this lookup is
+	 * not the expected one, we must restart lookup.
+	 * We probably met an item that was moved to another chain.
+	 */
+	if (unlikely(get_nulls_value(node) != slot))
+		goto again;
 found:
 	rcu_read_unlock();
 	return rc;
@@ -540,29 +549,20 @@ static inline bool llc_listener_match(const struct llc_sap *sap,
 
 	return sk->sk_type == SOCK_STREAM && sk->sk_state == TCP_LISTEN &&
 		llc->laddr.lsap == laddr->lsap &&
-		(llc_mac_match(llc->laddr.mac, laddr->mac) ||
-		 llc_mac_null(llc->laddr.mac));
+		llc_mac_match(llc->laddr.mac, laddr->mac);
 }
 
-/**
- *	llc_lookup_listener - Finds listener for local MAC + SAP
- *	@sap: SAP
- *	@laddr: address of local LLC (MAC + SAP)
- *
- *	Search connection list of the SAP and finds connection listening on
- *	local mac, and local sap. Returns pointer for parent socket found,
- *	%NULL otherwise.
- *	Caller has to make sure local_bh is disabled.
- */
-static struct sock *llc_lookup_listener(struct llc_sap *sap,
-					struct llc_addr *laddr)
+static struct sock *__llc_lookup_listener(struct llc_sap *sap,
+					  struct llc_addr *laddr)
 {
 	struct sock *rc;
 	struct hlist_nulls_node *node;
+	int slot = llc_sk_laddr_hashfn(sap, laddr);
+	struct hlist_nulls_head *laddr_hb = &sap->sk_laddr_hash[slot];
 
 	rcu_read_lock();
 again:
-	sk_nulls_for_each_rcu(rc, node, &sap->sk_list) {
+	sk_nulls_for_each_rcu(rc, node, laddr_hb) {
 		if (llc_listener_match(sap, laddr, rc)) {
 			/* Extra checks required by SLAB_DESTROY_BY_RCU */
 			if (unlikely(!atomic_inc_not_zero(&rc->sk_refcnt)))
@@ -576,11 +576,40 @@ again:
 		}
 	}
 	rc = NULL;
+	/*
+	 * if the nulls value we got at the end of this lookup is
+	 * not the expected one, we must restart lookup.
+	 * We probably met an item that was moved to another chain.
+	 */
+	if (unlikely(get_nulls_value(node) != slot))
+		goto again;
 found:
 	rcu_read_unlock();
 	return rc;
 }
 
+/**
+ *	llc_lookup_listener - Finds listener for local MAC + SAP
+ *	@sap: SAP
+ *	@laddr: address of local LLC (MAC + SAP)
+ *
+ *	Search connection list of the SAP and finds connection listening on
+ *	local mac, and local sap. Returns pointer for parent socket found,
+ *	%NULL otherwise.
+ *	Caller has to make sure local_bh is disabled.
+ */
+static struct sock *llc_lookup_listener(struct llc_sap *sap,
+					struct llc_addr *laddr)
+{
+	static struct llc_addr null_addr;
+	struct sock *rc = __llc_lookup_listener(sap, laddr);
+
+	if (!rc)
+		rc = __llc_lookup_listener(sap, &null_addr);
+
+	return rc;
+}
+
 static struct sock *__llc_lookup(struct llc_sap *sap,
 				 struct llc_addr *daddr,
 				 struct llc_addr *laddr)
@@ -678,18 +707,20 @@ static int llc_find_offset(int state, int ev_type)
  *	@sap: SAP
  *	@sk: socket
  *
- *	This function adds a socket to sk_list of a SAP.
+ *	This function adds a socket to the hash tables of a SAP.
  */
 void llc_sap_add_socket(struct llc_sap *sap, struct sock *sk)
 {
 	struct llc_sock *llc = llc_sk(sk);
 	struct hlist_head *dev_hb = llc_sk_dev_hash(sap, llc->dev->ifindex);
+	struct hlist_nulls_head *laddr_hb = llc_sk_laddr_hash(sap, &llc->laddr);
 
 	llc_sap_hold(sap);
 	llc_sk(sk)->sap = sap;
 
 	spin_lock_bh(&sap->sk_lock);
-	sk_nulls_add_node_rcu(sk, &sap->sk_list);
+	sap->sk_count++;
+	sk_nulls_add_node_rcu(sk, laddr_hb);
 	hlist_add_head(&llc->dev_hash_node, dev_hb);
 	spin_unlock_bh(&sap->sk_lock);
 }
@@ -699,7 +730,7 @@ void llc_sap_add_socket(struct llc_sap *sap, struct sock *sk)
  *	@sap: SAP
  *	@sk: socket
  *
- *	This function removes a connection from sk_list of a SAP if
+ *	This function removes a connection from the hash tables of a SAP if
  *	the connection was in this list.
  */
 void llc_sap_remove_socket(struct llc_sap *sap, struct sock *sk)
@@ -709,6 +740,7 @@ void llc_sap_remove_socket(struct llc_sap *sap, struct sock *sk)
 	spin_lock_bh(&sap->sk_lock);
 	sk_nulls_del_node_init_rcu(sk);
 	hlist_del(&llc->dev_hash_node);
+	sap->sk_count--;
 	spin_unlock_bh(&sap->sk_lock);
 	llc_sap_put(sap);
 }
diff --git a/net/llc/llc_core.c b/net/llc/llc_core.c
index 5276b9722077..0c9ef8bc7655 100644
--- a/net/llc/llc_core.c
+++ b/net/llc/llc_core.c
@@ -33,12 +33,14 @@ DEFINE_RWLOCK(llc_sap_list_lock);
 static struct llc_sap *llc_sap_alloc(void)
 {
 	struct llc_sap *sap = kzalloc(sizeof(*sap), GFP_ATOMIC);
+	int i;
 
 	if (sap) {
 		/* sap->laddr.mac - leave as a null, it's filled by bind */
 		sap->state = LLC_SAP_STATE_ACTIVE;
 		spin_lock_init(&sap->sk_lock);
-		INIT_HLIST_NULLS_HEAD(&sap->sk_list, 0);
+		for (i = 0; i < LLC_SK_LADDR_HASH_ENTRIES; i++)
+			INIT_HLIST_NULLS_HEAD(&sap->sk_laddr_hash[i], i);
 		atomic_set(&sap->refcnt, 1);
 	}
 	return sap;
@@ -143,7 +145,7 @@ out:
  */
 void llc_sap_close(struct llc_sap *sap)
 {
-	WARN_ON(!hlist_nulls_empty(&sap->sk_list));
+	WARN_ON(sap->sk_count);
 	llc_del_sap(sap);
 	kfree(sap);
 }
diff --git a/net/llc/llc_proc.c b/net/llc/llc_proc.c
index 6b3d033b3236..09dec6307206 100644
--- a/net/llc/llc_proc.c
+++ b/net/llc/llc_proc.c
@@ -34,17 +34,22 @@ static struct sock *llc_get_sk_idx(loff_t pos)
 {
 	struct list_head *sap_entry;
 	struct llc_sap *sap;
-	struct hlist_nulls_node *node;
 	struct sock *sk = NULL;
+	int i;
 
 	list_for_each(sap_entry, &llc_sap_list) {
 		sap = list_entry(sap_entry, struct llc_sap, node);
 
 		spin_lock_bh(&sap->sk_lock);
-		sk_nulls_for_each(sk, node, &sap->sk_list) {
-			if (!pos)
-				goto found;
-			--pos;
+		for (i = 0; i < LLC_SK_LADDR_HASH_ENTRIES; i++) {
+			struct hlist_nulls_head *head = &sap->sk_laddr_hash[i];
+			struct hlist_nulls_node *node;
+
+			sk_nulls_for_each(sk, node, head) {
+				if (!pos)
+					goto found; /* keep the lock */
+				--pos;
+			}
 		}
 		spin_unlock_bh(&sap->sk_lock);
 	}
@@ -61,6 +66,19 @@ static void *llc_seq_start(struct seq_file *seq, loff_t *pos)
 	return l ? llc_get_sk_idx(--l) : SEQ_START_TOKEN;
 }
 
+static struct sock *laddr_hash_next(struct llc_sap *sap, int bucket)
+{
+	struct hlist_nulls_node *node;
+	struct sock *sk = NULL;
+
+	while (++bucket < LLC_SK_LADDR_HASH_ENTRIES)
+		sk_nulls_for_each(sk, node, &sap->sk_laddr_hash[bucket])
+			goto out;
+
+out:
+	return sk;
+}
+
 static void *llc_seq_next(struct seq_file *seq, void *v, loff_t *pos)
 {
 	struct sock* sk, *next;
@@ -80,17 +98,15 @@ static void *llc_seq_next(struct seq_file *seq, void *v, loff_t *pos)
 	}
 	llc = llc_sk(sk);
 	sap = llc->sap;
+	sk = laddr_hash_next(sap, llc_sk_laddr_hashfn(sap, &llc->laddr));
+	if (sk)
+		goto out;
 	spin_unlock_bh(&sap->sk_lock);
-	sk = NULL;
-	for (;;) {
-		if (sap->node.next == &llc_sap_list)
-			break;
-		sap = list_entry(sap->node.next, struct llc_sap, node);
+	list_for_each_entry_continue(sap, &llc_sap_list, node) {
 		spin_lock_bh(&sap->sk_lock);
-		if (!hlist_nulls_empty(&sap->sk_list)) {
-			sk = sk_nulls_head(&sap->sk_list);
-			break;
-		}
+		sk = laddr_hash_next(sap, -1);
+		if (sk)
+			break; /* keep the lock */
 		spin_unlock_bh(&sap->sk_lock);
 	}
 out:
diff --git a/net/llc/llc_sap.c b/net/llc/llc_sap.c
index 94cb706f6cc4..ad6e6e1cf22f 100644
--- a/net/llc/llc_sap.c
+++ b/net/llc/llc_sap.c
@@ -321,10 +321,12 @@ static struct sock *llc_lookup_dgram(struct llc_sap *sap,
 {
 	struct sock *rc;
 	struct hlist_nulls_node *node;
+	int slot = llc_sk_laddr_hashfn(sap, laddr);
+	struct hlist_nulls_head *laddr_hb = &sap->sk_laddr_hash[slot];
 
 	rcu_read_lock_bh();
 again:
-	sk_nulls_for_each_rcu(rc, node, &sap->sk_list) {
+	sk_nulls_for_each_rcu(rc, node, laddr_hb) {
 		if (llc_dgram_match(sap, laddr, rc)) {
 			/* Extra checks required by SLAB_DESTROY_BY_RCU */
 			if (unlikely(!atomic_inc_not_zero(&rc->sk_refcnt)))
@@ -338,6 +340,13 @@ again:
 		}
 	}
 	rc = NULL;
+	/*
+	 * if the nulls value we got at the end of this lookup is
+	 * not the expected one, we must restart lookup.
+	 * We probably met an item that was moved to another chain.
+	 */
+	if (unlikely(get_nulls_value(node) != slot))
+		goto again;
 found:
 	rcu_read_unlock_bh();
 	return rc;
-- 
cgit v1.2.2


From 8beb9ab6c2df203e8d68cb1f48cf42604a6bed86 Mon Sep 17 00:00:00 2001
From: Octavian Purdila <opurdila@ixiacom.com>
Date: Sat, 26 Dec 2009 11:51:06 +0000
Subject: llc: convert llc_sap_list to RCU

Signed-off-by: Octavian Purdila <opurdila@ixiacom.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/llc/llc_core.c | 46 ++++++++++++++--------------------------------
 net/llc/llc_proc.c | 11 ++++-------
 2 files changed, 18 insertions(+), 39 deletions(-)

(limited to 'net')

diff --git a/net/llc/llc_core.c b/net/llc/llc_core.c
index 0c9ef8bc7655..78167e81dfeb 100644
--- a/net/llc/llc_core.c
+++ b/net/llc/llc_core.c
@@ -23,7 +23,7 @@
 #include <net/llc.h>
 
 LIST_HEAD(llc_sap_list);
-DEFINE_RWLOCK(llc_sap_list_lock);
+DEFINE_SPINLOCK(llc_sap_list_lock);
 
 /**
  *	llc_sap_alloc - allocates and initializes sap.
@@ -46,30 +46,6 @@ static struct llc_sap *llc_sap_alloc(void)
 	return sap;
 }
 
-/**
- *	llc_add_sap - add sap to station list
- *	@sap: Address of the sap
- *
- *	Adds a sap to the LLC's station sap list.
- */
-static void llc_add_sap(struct llc_sap *sap)
-{
-	list_add_tail(&sap->node, &llc_sap_list);
-}
-
-/**
- *	llc_del_sap - del sap from station list
- *	@sap: Address of the sap
- *
- *	Removes a sap to the LLC's station sap list.
- */
-static void llc_del_sap(struct llc_sap *sap)
-{
-	write_lock_bh(&llc_sap_list_lock);
-	list_del(&sap->node);
-	write_unlock_bh(&llc_sap_list_lock);
-}
-
 static struct llc_sap *__llc_sap_find(unsigned char sap_value)
 {
 	struct llc_sap* sap;
@@ -93,13 +69,13 @@ out:
  */
 struct llc_sap *llc_sap_find(unsigned char sap_value)
 {
-	struct llc_sap* sap;
+	struct llc_sap *sap;
 
-	read_lock_bh(&llc_sap_list_lock);
+	rcu_read_lock_bh();
 	sap = __llc_sap_find(sap_value);
 	if (sap)
 		llc_sap_hold(sap);
-	read_unlock_bh(&llc_sap_list_lock);
+	rcu_read_unlock_bh();
 	return sap;
 }
 
@@ -120,7 +96,7 @@ struct llc_sap *llc_sap_open(unsigned char lsap,
 {
 	struct llc_sap *sap = NULL;
 
-	write_lock_bh(&llc_sap_list_lock);
+	spin_lock_bh(&llc_sap_list_lock);
 	if (__llc_sap_find(lsap)) /* SAP already exists */
 		goto out;
 	sap = llc_sap_alloc();
@@ -128,9 +104,9 @@ struct llc_sap *llc_sap_open(unsigned char lsap,
 		goto out;
 	sap->laddr.lsap = lsap;
 	sap->rcv_func	= func;
-	llc_add_sap(sap);
+	list_add_tail_rcu(&sap->node, &llc_sap_list);
 out:
-	write_unlock_bh(&llc_sap_list_lock);
+	spin_unlock_bh(&llc_sap_list_lock);
 	return sap;
 }
 
@@ -146,7 +122,13 @@ out:
 void llc_sap_close(struct llc_sap *sap)
 {
 	WARN_ON(sap->sk_count);
-	llc_del_sap(sap);
+
+	spin_lock_bh(&llc_sap_list_lock);
+	list_del_rcu(&sap->node);
+	spin_unlock_bh(&llc_sap_list_lock);
+
+	synchronize_rcu();
+
 	kfree(sap);
 }
 
diff --git a/net/llc/llc_proc.c b/net/llc/llc_proc.c
index 09dec6307206..7af1ff2d1f19 100644
--- a/net/llc/llc_proc.c
+++ b/net/llc/llc_proc.c
@@ -32,14 +32,11 @@ static void llc_ui_format_mac(struct seq_file *seq, u8 *addr)
 
 static struct sock *llc_get_sk_idx(loff_t pos)
 {
-	struct list_head *sap_entry;
 	struct llc_sap *sap;
 	struct sock *sk = NULL;
 	int i;
 
-	list_for_each(sap_entry, &llc_sap_list) {
-		sap = list_entry(sap_entry, struct llc_sap, node);
-
+	list_for_each_entry_rcu(sap, &llc_sap_list, node) {
 		spin_lock_bh(&sap->sk_lock);
 		for (i = 0; i < LLC_SK_LADDR_HASH_ENTRIES; i++) {
 			struct hlist_nulls_head *head = &sap->sk_laddr_hash[i];
@@ -62,7 +59,7 @@ static void *llc_seq_start(struct seq_file *seq, loff_t *pos)
 {
 	loff_t l = *pos;
 
-	read_lock_bh(&llc_sap_list_lock);
+	rcu_read_lock_bh();
 	return l ? llc_get_sk_idx(--l) : SEQ_START_TOKEN;
 }
 
@@ -102,7 +99,7 @@ static void *llc_seq_next(struct seq_file *seq, void *v, loff_t *pos)
 	if (sk)
 		goto out;
 	spin_unlock_bh(&sap->sk_lock);
-	list_for_each_entry_continue(sap, &llc_sap_list, node) {
+	list_for_each_entry_continue_rcu(sap, &llc_sap_list, node) {
 		spin_lock_bh(&sap->sk_lock);
 		sk = laddr_hash_next(sap, -1);
 		if (sk)
@@ -122,7 +119,7 @@ static void llc_seq_stop(struct seq_file *seq, void *v)
 
 		spin_unlock_bh(&sap->sk_lock);
 	}
-	read_unlock_bh(&llc_sap_list_lock);
+	rcu_read_unlock_bh();
 }
 
 static int llc_seq_socket_show(struct seq_file *seq, void *v)
-- 
cgit v1.2.2


From 3100aa9d74db9c6d8d9a3b6421721fc1aef4728f Mon Sep 17 00:00:00 2001
From: Octavian Purdila <opurdila@ixiacom.com>
Date: Sat, 26 Dec 2009 11:51:08 +0000
Subject: llc: fix SAP reference counting w.r.t. socket handling

The SAP ref counter gets decremented twice when deleting a socket,
although for all but the first socket of a SAP the SAP ref counter was
incremented only once.

Signed-off-by: Octavian Purdila <opurdila@ixiacom.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/llc/af_llc.c | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

(limited to 'net')

diff --git a/net/llc/af_llc.c b/net/llc/af_llc.c
index f49f3dd6fbd3..e35d907fba2c 100644
--- a/net/llc/af_llc.c
+++ b/net/llc/af_llc.c
@@ -197,10 +197,8 @@ static int llc_ui_release(struct socket *sock)
 		llc->laddr.lsap, llc->daddr.lsap);
 	if (!llc_send_disc(sk))
 		llc_ui_wait_for_disc(sk, sk->sk_rcvtimeo);
-	if (!sock_flag(sk, SOCK_ZAPPED)) {
-		llc_sap_put(llc->sap);
+	if (!sock_flag(sk, SOCK_ZAPPED))
 		llc_sap_remove_socket(llc->sap, sk);
-	}
 	release_sock(sk);
 	if (llc->dev)
 		dev_put(llc->dev);
@@ -352,7 +350,6 @@ static int llc_ui_bind(struct socket *sock, struct sockaddr *uaddr, int addrlen)
 		rc = -EBUSY; /* some other network layer is using the sap */
 		if (!sap)
 			goto out;
-		llc_sap_hold(sap);
 	} else {
 		struct llc_addr laddr, daddr;
 		struct sock *ask;
-- 
cgit v1.2.2


From baeb66fe2306783e3b9a492b03882f2e249b2eeb Mon Sep 17 00:00:00 2001
From: "John W. Linville" <linville@tuxdriver.com>
Date: Fri, 18 Dec 2009 17:59:02 -0500
Subject: wireless: remove CONFIG_WIRELESS_OLD_REGULATORY

This is no longer needed with the availability of
CONFIG_CFG80211_INTERNAL_REGDB.

Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/wireless/Kconfig   | 15 ---------
 net/wireless/nl80211.c |  6 ----
 net/wireless/reg.c     | 89 ++------------------------------------------------
 3 files changed, 2 insertions(+), 108 deletions(-)

(limited to 'net')

diff --git a/net/wireless/Kconfig b/net/wireless/Kconfig
index 8419971f07c5..d0ee29063e5d 100644
--- a/net/wireless/Kconfig
+++ b/net/wireless/Kconfig
@@ -94,21 +94,6 @@ config CFG80211_DEBUGFS
 
 	  If unsure, say N.
 
-config WIRELESS_OLD_REGULATORY
-	bool "Old wireless static regulatory definitions"
-	default n
-	depends on CFG80211
-	---help---
-	  This option enables the old static regulatory information
-	  and uses it within the new framework. This option is available
-	  for historical reasons and it is advised to leave it off.
-
-	  For details see:
-
-	  http://wireless.kernel.org/en/developers/Regulatory
-
-	  Say N and if you say Y, please tell us why. The default is N.
-
 config CFG80211_INTERNAL_REGDB
 	bool "use statically compiled regulatory rules database" if EMBEDDED
 	default n
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index 7cb0d647fc34..60f854377f90 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -2550,12 +2550,6 @@ static int nl80211_req_set_reg(struct sk_buff *skb, struct genl_info *info)
 
 	data = nla_data(info->attrs[NL80211_ATTR_REG_ALPHA2]);
 
-#ifdef CONFIG_WIRELESS_OLD_REGULATORY
-	/* We ignore world regdom requests with the old regdom setup */
-	if (is_world_regdom(data))
-		return -EINVAL;
-#endif
-
 	r = regulatory_hint_user(data);
 
 	return r;
diff --git a/net/wireless/reg.c b/net/wireless/reg.c
index dc13c3ffeca6..87ea60d84c3c 100644
--- a/net/wireless/reg.c
+++ b/net/wireless/reg.c
@@ -129,78 +129,6 @@ static char *ieee80211_regdom = "00";
 module_param(ieee80211_regdom, charp, 0444);
 MODULE_PARM_DESC(ieee80211_regdom, "IEEE 802.11 regulatory domain code");
 
-#ifdef CONFIG_WIRELESS_OLD_REGULATORY
-/*
- * We assume 40 MHz bandwidth for the old regulatory work.
- * We make emphasis we are using the exact same frequencies
- * as before
- */
-
-static const struct ieee80211_regdomain us_regdom = {
-	.n_reg_rules = 6,
-	.alpha2 =  "US",
-	.reg_rules = {
-		/* IEEE 802.11b/g, channels 1..11 */
-		REG_RULE(2412-10, 2462+10, 40, 6, 27, 0),
-		/* IEEE 802.11a, channel 36..48 */
-		REG_RULE(5180-10, 5240+10, 40, 6, 17, 0),
-		/* IEEE 802.11a, channels 48..64 */
-		REG_RULE(5260-10, 5320+10, 40, 6, 20, NL80211_RRF_DFS),
-		/* IEEE 802.11a, channels 100..124 */
-		REG_RULE(5500-10, 5590+10, 40, 6, 20, NL80211_RRF_DFS),
-		/* IEEE 802.11a, channels 132..144 */
-		REG_RULE(5660-10, 5700+10, 40, 6, 20, NL80211_RRF_DFS),
-		/* IEEE 802.11a, channels 149..165, outdoor */
-		REG_RULE(5745-10, 5825+10, 40, 6, 30, 0),
-	}
-};
-
-static const struct ieee80211_regdomain jp_regdom = {
-	.n_reg_rules = 6,
-	.alpha2 =  "JP",
-	.reg_rules = {
-		/* IEEE 802.11b/g, channels 1..11 */
-		REG_RULE(2412-10, 2462+10, 40, 6, 20, 0),
-		/* IEEE 802.11b/g, channels 12..13 */
-		REG_RULE(2467-10, 2472+10, 20, 6, 20, 0),
-		/* IEEE 802.11b/g, channel 14 */
-		REG_RULE(2484-10, 2484+10, 20, 6, 20, NL80211_RRF_NO_OFDM),
-		/* IEEE 802.11a, channels 36..48 */
-		REG_RULE(5180-10, 5240+10, 40, 6, 20, 0),
-		/* IEEE 802.11a, channels 52..64 */
-		REG_RULE(5260-10, 5320+10, 40, 6, 20, NL80211_RRF_DFS),
-		/* IEEE 802.11a, channels 100..144 */
-		REG_RULE(5500-10, 5700+10, 40, 6, 23, NL80211_RRF_DFS),
-	}
-};
-
-static const struct ieee80211_regdomain *static_regdom(char *alpha2)
-{
-	if (alpha2[0] == 'U' && alpha2[1] == 'S')
-		return &us_regdom;
-	if (alpha2[0] == 'J' && alpha2[1] == 'P')
-		return &jp_regdom;
-	/* Use world roaming rules for "EU", since it was a pseudo
-	   domain anyway... */
-	if (alpha2[0] == 'E' && alpha2[1] == 'U')
-		return &world_regdom;
-	/* Default, world roaming rules */
-	return &world_regdom;
-}
-
-static bool is_old_static_regdom(const struct ieee80211_regdomain *rd)
-{
-	if (rd == &us_regdom || rd == &jp_regdom || rd == &world_regdom)
-		return true;
-	return false;
-}
-#else
-static inline bool is_old_static_regdom(const struct ieee80211_regdomain *rd)
-{
-	return false;
-}
-#endif
-
 static void reset_regdomains(void)
 {
 	/* avoid freeing static information or freeing something twice */
@@ -210,8 +138,6 @@ static void reset_regdomains(void)
 		cfg80211_world_regdom = NULL;
 	if (cfg80211_regdomain == &world_regdom)
 		cfg80211_regdomain = NULL;
-	if (is_old_static_regdom(cfg80211_regdomain))
-		cfg80211_regdomain = NULL;
 
 	kfree(cfg80211_regdomain);
 	kfree(cfg80211_world_regdom);
@@ -1490,8 +1416,6 @@ static int ignore_request(struct wiphy *wiphy,
 		return REG_INTERSECT;
 	case NL80211_REGDOM_SET_BY_DRIVER:
 		if (last_request->initiator == NL80211_REGDOM_SET_BY_CORE) {
-			if (is_old_static_regdom(cfg80211_regdomain))
-				return 0;
 			if (regdom_changes(pending_request->alpha2))
 				return 0;
 			return -EALREADY;
@@ -1528,8 +1452,7 @@ static int ignore_request(struct wiphy *wiphy,
 				return -EAGAIN;
 		}
 
-		if (!is_old_static_regdom(cfg80211_regdomain) &&
-		    !regdom_changes(pending_request->alpha2))
+		if (!regdom_changes(pending_request->alpha2))
 			return -EALREADY;
 
 		return 0;
@@ -2111,8 +2034,7 @@ static int __set_regdom(const struct ieee80211_regdomain *rd)
 		 * If someone else asked us to change the rd lets only bother
 		 * checking if the alpha2 changes if CRDA was already called
 		 */
-		if (!is_old_static_regdom(cfg80211_regdomain) &&
-		    !regdom_changes(rd->alpha2))
+		if (!regdom_changes(rd->alpha2))
 			return -EINVAL;
 	}
 
@@ -2311,15 +2233,8 @@ int regulatory_init(void)
 	spin_lock_init(&reg_requests_lock);
 	spin_lock_init(&reg_pending_beacons_lock);
 
-#ifdef CONFIG_WIRELESS_OLD_REGULATORY
-	cfg80211_regdomain = static_regdom(ieee80211_regdom);
-
-	printk(KERN_INFO "cfg80211: Using static regulatory domain info\n");
-	print_regdomain_info(cfg80211_regdomain);
-#else
 	cfg80211_regdomain = cfg80211_world_regdom;
 
-#endif
 	/* We always try to get an update for the static regdomain */
 	err = regulatory_hint_core(cfg80211_regdomain->alpha2);
 	if (err) {
-- 
cgit v1.2.2


From 9607e6b66a0d25ca63b70d54a4283fa13d8f7c9d Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes@sipsolutions.net>
Date: Wed, 23 Dec 2009 13:15:31 +0100
Subject: mac80211: add ieee80211_sdata_running

Instead of always using netif_running(sdata->dev)
use ieee80211_sdata_running(sdata) now which is
just an inline containing netif_running() for now.

Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/cfg.c         |  6 ++----
 net/mac80211/ibss.c        |  4 ++--
 net/mac80211/ieee80211_i.h |  5 +++++
 net/mac80211/iface.c       | 10 +++++-----
 net/mac80211/key.c         |  4 ++--
 net/mac80211/main.c        |  2 +-
 net/mac80211/mesh.c        |  2 +-
 net/mac80211/mlme.c        |  8 ++++----
 net/mac80211/pm.c          |  2 +-
 net/mac80211/rx.c          |  6 +++---
 net/mac80211/scan.c        | 12 ++++++------
 net/mac80211/sta_info.c    |  2 +-
 net/mac80211/status.c      |  2 +-
 net/mac80211/tx.c          |  2 +-
 net/mac80211/util.c        | 10 +++++-----
 15 files changed, 40 insertions(+), 37 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c
index 63843e3e576a..fdac1bcbfcc0 100644
--- a/net/mac80211/cfg.c
+++ b/net/mac80211/cfg.c
@@ -78,17 +78,15 @@ static int ieee80211_change_iface(struct wiphy *wiphy,
 				  enum nl80211_iftype type, u32 *flags,
 				  struct vif_params *params)
 {
-	struct ieee80211_sub_if_data *sdata;
+	struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
 	int ret;
 
-	if (netif_running(dev))
+	if (ieee80211_sdata_running(sdata))
 		return -EBUSY;
 
 	if (!nl80211_params_check(type, params))
 		return -EINVAL;
 
-	sdata = IEEE80211_DEV_TO_SUB_IF(dev);
-
 	ret = ieee80211_if_change_type(sdata, type);
 	if (ret)
 		return ret;
diff --git a/net/mac80211/ibss.c b/net/mac80211/ibss.c
index ef6c6b2401d1..3a61f3ba85c9 100644
--- a/net/mac80211/ibss.c
+++ b/net/mac80211/ibss.c
@@ -744,7 +744,7 @@ static void ieee80211_ibss_work(struct work_struct *work)
 	if (WARN_ON(local->suspended))
 		return;
 
-	if (!netif_running(sdata->dev))
+	if (!ieee80211_sdata_running(sdata))
 		return;
 
 	if (local->scanning)
@@ -827,7 +827,7 @@ void ieee80211_ibss_notify_scan_completed(struct ieee80211_local *local)
 
 	mutex_lock(&local->iflist_mtx);
 	list_for_each_entry(sdata, &local->interfaces, list) {
-		if (!netif_running(sdata->dev))
+		if (!ieee80211_sdata_running(sdata))
 			continue;
 		if (sdata->vif.type != NL80211_IFTYPE_ADHOC)
 			continue;
diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index 88b0ba6c7484..adeae03c26a3 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -960,6 +960,11 @@ void ieee80211_remove_interfaces(struct ieee80211_local *local);
 u32 __ieee80211_recalc_idle(struct ieee80211_local *local);
 void ieee80211_recalc_idle(struct ieee80211_local *local);
 
+static inline bool ieee80211_sdata_running(struct ieee80211_sub_if_data *sdata)
+{
+	return netif_running(sdata->dev);
+}
+
 /* tx handling */
 void ieee80211_clear_tx_pending(struct ieee80211_local *local);
 void ieee80211_tx_pending(unsigned long data);
diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c
index a6e6da3cab70..1ceca14331d4 100644
--- a/net/mac80211/iface.c
+++ b/net/mac80211/iface.c
@@ -65,7 +65,7 @@ static int ieee80211_change_mac(struct net_device *dev, void *addr)
 	struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
 	int ret;
 
-	if (netif_running(dev))
+	if (ieee80211_sdata_running(sdata))
 		return -EBUSY;
 
 	ret = eth_mac_addr(dev, addr);
@@ -111,7 +111,7 @@ static int ieee80211_open(struct net_device *dev)
 	list_for_each_entry(nsdata, &local->interfaces, list) {
 		struct net_device *ndev = nsdata->dev;
 
-		if (ndev != dev && netif_running(ndev)) {
+		if (ndev != dev && ieee80211_sdata_running(nsdata)) {
 			/*
 			 * Allow only a single IBSS interface to be up at any
 			 * time. This is restricted because beacon distribution
@@ -197,7 +197,7 @@ static int ieee80211_open(struct net_device *dev)
 		struct net_device *ndev = nsdata->dev;
 
 		/*
-		 * No need to check netif_running since we do not allow
+		 * No need to check running since we do not allow
 		 * it to start up with this invalid address.
 		 */
 		if (compare_ether_addr(null_addr, ndev->dev_addr) == 0) {
@@ -756,7 +756,7 @@ int ieee80211_if_change_type(struct ieee80211_sub_if_data *sdata,
 	 * and goes into the requested mode.
 	 */
 
-	if (netif_running(sdata->dev))
+	if (ieee80211_sdata_running(sdata))
 		return -EBUSY;
 
 	/* Purge and reset type-dependent state. */
@@ -930,7 +930,7 @@ u32 __ieee80211_recalc_idle(struct ieee80211_local *local)
 		return ieee80211_idle_off(local, "scanning");
 
 	list_for_each_entry(sdata, &local->interfaces, list) {
-		if (!netif_running(sdata->dev))
+		if (!ieee80211_sdata_running(sdata))
 			continue;
 		/* do not count disabled managed interfaces */
 		if (sdata->vif.type == NL80211_IFTYPE_STATION &&
diff --git a/net/mac80211/key.c b/net/mac80211/key.c
index 32ee6d0ee34d..8160d9c5372e 100644
--- a/net/mac80211/key.c
+++ b/net/mac80211/key.c
@@ -443,7 +443,7 @@ void ieee80211_key_link(struct ieee80211_key *key,
 	add_todo(old_key, KEY_FLAG_TODO_DELETE);
 
 	add_todo(key, KEY_FLAG_TODO_ADD_DEBUGFS);
-	if (netif_running(sdata->dev))
+	if (ieee80211_sdata_running(sdata))
 		add_todo(key, KEY_FLAG_TODO_HWACCEL_ADD);
 
 	spin_unlock_irqrestore(&sdata->local->key_lock, flags);
@@ -509,7 +509,7 @@ void ieee80211_enable_keys(struct ieee80211_sub_if_data *sdata)
 {
 	ASSERT_RTNL();
 
-	if (WARN_ON(!netif_running(sdata->dev)))
+	if (WARN_ON(!ieee80211_sdata_running(sdata)))
 		return;
 
 	ieee80211_todo_for_each_key(sdata, KEY_FLAG_TODO_HWACCEL_ADD);
diff --git a/net/mac80211/main.c b/net/mac80211/main.c
index d4426748ab10..e93bc558d785 100644
--- a/net/mac80211/main.c
+++ b/net/mac80211/main.c
@@ -212,7 +212,7 @@ void ieee80211_bss_info_change_notify(struct ieee80211_sub_if_data *sdata,
 	}
 
 	if (changed & BSS_CHANGED_BEACON_ENABLED) {
-		if (local->quiescing || !netif_running(sdata->dev) ||
+		if (local->quiescing || !ieee80211_sdata_running(sdata) ||
 		    test_bit(SCAN_SW_SCANNING, &local->scanning)) {
 			sdata->vif.bss_conf.enable_beacon = false;
 		} else {
diff --git a/net/mac80211/mesh.c b/net/mac80211/mesh.c
index e0bd85e3d4b6..61080c5fad50 100644
--- a/net/mac80211/mesh.c
+++ b/net/mac80211/mesh.c
@@ -645,7 +645,7 @@ static void ieee80211_mesh_work(struct work_struct *work)
 	struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh;
 	struct sk_buff *skb;
 
-	if (!netif_running(sdata->dev))
+	if (!ieee80211_sdata_running(sdata))
 		return;
 
 	if (local->scanning)
diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index 2f9ed8b9c3f0..7f9909340c09 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -604,7 +604,7 @@ static void ieee80211_chswitch_work(struct work_struct *work)
 		container_of(work, struct ieee80211_sub_if_data, u.mgd.chswitch_work);
 	struct ieee80211_if_managed *ifmgd = &sdata->u.mgd;
 
-	if (!netif_running(sdata->dev))
+	if (!ieee80211_sdata_running(sdata))
 		return;
 
 	mutex_lock(&ifmgd->mtx);
@@ -750,7 +750,7 @@ void ieee80211_recalc_ps(struct ieee80211_local *local, s32 latency)
 	}
 
 	list_for_each_entry(sdata, &local->interfaces, list) {
-		if (!netif_running(sdata->dev))
+		if (!ieee80211_sdata_running(sdata))
 			continue;
 		if (sdata->vif.type != NL80211_IFTYPE_STATION)
 			continue;
@@ -1263,7 +1263,7 @@ static void ieee80211_mgd_probe_ap(struct ieee80211_sub_if_data *sdata,
 	struct ieee80211_if_managed *ifmgd = &sdata->u.mgd;
 	bool already = false;
 
-	if (!netif_running(sdata->dev))
+	if (!ieee80211_sdata_running(sdata))
 		return;
 
 	if (sdata->local->scanning)
@@ -2118,7 +2118,7 @@ static void ieee80211_sta_work(struct work_struct *work)
 	enum rx_mgmt_action rma;
 	bool anybusy = false;
 
-	if (!netif_running(sdata->dev))
+	if (!ieee80211_sdata_running(sdata))
 		return;
 
 	if (local->scanning)
diff --git a/net/mac80211/pm.c b/net/mac80211/pm.c
index 05e161c3cbc5..913dc7e3b29e 100644
--- a/net/mac80211/pm.c
+++ b/net/mac80211/pm.c
@@ -93,7 +93,7 @@ int __ieee80211_suspend(struct ieee80211_hw *hw)
 			break;
 		}
 
-		if (!netif_running(sdata->dev))
+		if (!ieee80211_sdata_running(sdata))
 			continue;
 
 		/* disable beaconing */
diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c
index 6cbf1a7b3157..f60dfca52196 100644
--- a/net/mac80211/rx.c
+++ b/net/mac80211/rx.c
@@ -289,7 +289,7 @@ ieee80211_rx_monitor(struct ieee80211_local *local, struct sk_buff *origskb,
 		if (sdata->u.mntr_flags & MONITOR_FLAG_COOK_FRAMES)
 			continue;
 
-		if (!netif_running(sdata->dev))
+		if (!ieee80211_sdata_running(sdata))
 			continue;
 
 		if (prev_dev) {
@@ -2056,7 +2056,7 @@ static void ieee80211_rx_cooked_monitor(struct ieee80211_rx_data *rx,
 	skb->protocol = htons(ETH_P_802_2);
 
 	list_for_each_entry_rcu(sdata, &local->interfaces, list) {
-		if (!netif_running(sdata->dev))
+		if (!ieee80211_sdata_running(sdata))
 			continue;
 
 		if (sdata->vif.type != NL80211_IFTYPE_MONITOR ||
@@ -2318,7 +2318,7 @@ static void __ieee80211_rx_handle_packet(struct ieee80211_hw *hw,
 	}
 	if (!found_sta) {
 		list_for_each_entry_rcu(sdata, &local->interfaces, list) {
-			if (!netif_running(sdata->dev))
+			if (!ieee80211_sdata_running(sdata))
 				continue;
 
 			if (sdata->vif.type == NL80211_IFTYPE_MONITOR ||
diff --git a/net/mac80211/scan.c b/net/mac80211/scan.c
index 66da0ab1d8fa..ae1830056521 100644
--- a/net/mac80211/scan.c
+++ b/net/mac80211/scan.c
@@ -346,7 +346,7 @@ void ieee80211_scan_completed(struct ieee80211_hw *hw, bool aborted)
 
 	mutex_lock(&local->iflist_mtx);
 	list_for_each_entry(sdata, &local->interfaces, list) {
-		if (!netif_running(sdata->dev))
+		if (!ieee80211_sdata_running(sdata))
 			continue;
 
 		/* Tell AP we're back */
@@ -396,7 +396,7 @@ static int ieee80211_start_sw_scan(struct ieee80211_local *local)
 
 	mutex_lock(&local->iflist_mtx);
 	list_for_each_entry(sdata, &local->interfaces, list) {
-		if (!netif_running(sdata->dev))
+		if (!ieee80211_sdata_running(sdata))
 			continue;
 
 		/* disable beaconing */
@@ -526,7 +526,7 @@ static int ieee80211_scan_state_decision(struct ieee80211_local *local,
 	/* check if at least one STA interface is associated */
 	mutex_lock(&local->iflist_mtx);
 	list_for_each_entry(sdata, &local->interfaces, list) {
-		if (!netif_running(sdata->dev))
+		if (!ieee80211_sdata_running(sdata))
 			continue;
 
 		if (sdata->vif.type == NL80211_IFTYPE_STATION) {
@@ -571,7 +571,7 @@ static void ieee80211_scan_state_leave_oper_channel(struct ieee80211_local *loca
 	 */
 	mutex_lock(&local->iflist_mtx);
 	list_for_each_entry(sdata, &local->interfaces, list) {
-		if (!netif_running(sdata->dev))
+		if (!ieee80211_sdata_running(sdata))
 			continue;
 
 		if (sdata->vif.type == NL80211_IFTYPE_STATION) {
@@ -603,7 +603,7 @@ static void ieee80211_scan_state_enter_oper_channel(struct ieee80211_local *loca
 	 */
 	mutex_lock(&local->iflist_mtx);
 	list_for_each_entry(sdata, &local->interfaces, list) {
-		if (!netif_running(sdata->dev))
+		if (!ieee80211_sdata_running(sdata))
 			continue;
 
 		/* Tell AP we're back */
@@ -727,7 +727,7 @@ void ieee80211_scan_work(struct work_struct *work)
 	/*
 	 * Avoid re-scheduling when the sdata is going away.
 	 */
-	if (!netif_running(sdata->dev)) {
+	if (!ieee80211_sdata_running(sdata)) {
 		ieee80211_scan_completed(&local->hw, true);
 		return;
 	}
diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c
index d1a77e79d7a9..f039e761aec1 100644
--- a/net/mac80211/sta_info.c
+++ b/net/mac80211/sta_info.c
@@ -367,7 +367,7 @@ int sta_info_insert(struct sta_info *sta)
 	 * something inserts a STA (on one CPU) without holding the RTNL
 	 * and another CPU turns off the net device.
 	 */
-	if (unlikely(!netif_running(sdata->dev))) {
+	if (unlikely(!ieee80211_sdata_running(sdata))) {
 		err = -ENETDOWN;
 		goto out_free;
 	}
diff --git a/net/mac80211/status.c b/net/mac80211/status.c
index 0c0850d37dda..0ebcdda24200 100644
--- a/net/mac80211/status.c
+++ b/net/mac80211/status.c
@@ -351,7 +351,7 @@ void ieee80211_tx_status(struct ieee80211_hw *hw, struct sk_buff *skb)
 	rcu_read_lock();
 	list_for_each_entry_rcu(sdata, &local->interfaces, list) {
 		if (sdata->vif.type == NL80211_IFTYPE_MONITOR) {
-			if (!netif_running(sdata->dev))
+			if (!ieee80211_sdata_running(sdata))
 				continue;
 
 			if ((sdata->u.mntr_flags & MONITOR_FLAG_COOK_FRAMES) &&
diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
index ac48c86ae6b3..1593a2ffd67a 100644
--- a/net/mac80211/tx.c
+++ b/net/mac80211/tx.c
@@ -1469,7 +1469,7 @@ static void ieee80211_xmit(struct ieee80211_sub_if_data *sdata,
 
 			list_for_each_entry_rcu(tmp_sdata, &local->interfaces,
 						list) {
-				if (!netif_running(tmp_sdata->dev))
+				if (!ieee80211_sdata_running(tmp_sdata))
 					continue;
 				if (tmp_sdata->vif.type != NL80211_IFTYPE_AP)
 					continue;
diff --git a/net/mac80211/util.c b/net/mac80211/util.c
index b01972579c7c..5ffe9e831b66 100644
--- a/net/mac80211/util.c
+++ b/net/mac80211/util.c
@@ -468,7 +468,7 @@ void ieee80211_iterate_active_interfaces(
 		case NL80211_IFTYPE_MESH_POINT:
 			break;
 		}
-		if (netif_running(sdata->dev))
+		if (ieee80211_sdata_running(sdata))
 			iterator(data, sdata->vif.addr,
 				 &sdata->vif);
 	}
@@ -502,7 +502,7 @@ void ieee80211_iterate_active_interfaces_atomic(
 		case NL80211_IFTYPE_MESH_POINT:
 			break;
 		}
-		if (netif_running(sdata->dev))
+		if (ieee80211_sdata_running(sdata))
 			iterator(data, sdata->vif.addr,
 				 &sdata->vif);
 	}
@@ -1056,7 +1056,7 @@ int ieee80211_reconfig(struct ieee80211_local *local)
 	list_for_each_entry(sdata, &local->interfaces, list) {
 		if (sdata->vif.type != NL80211_IFTYPE_AP_VLAN &&
 		    sdata->vif.type != NL80211_IFTYPE_MONITOR &&
-		    netif_running(sdata->dev)) {
+		    ieee80211_sdata_running(sdata)) {
 			conf.vif = &sdata->vif;
 			conf.type = sdata->vif.type;
 			conf.mac_addr = sdata->vif.addr;
@@ -1103,7 +1103,7 @@ int ieee80211_reconfig(struct ieee80211_local *local)
 	/* Finally also reconfigure all the BSS information */
 	list_for_each_entry(sdata, &local->interfaces, list) {
 		u32 changed = ~0;
-		if (!netif_running(sdata->dev))
+		if (!ieee80211_sdata_running(sdata))
 			continue;
 		switch (sdata->vif.type) {
 		case NL80211_IFTYPE_STATION:
@@ -1131,7 +1131,7 @@ int ieee80211_reconfig(struct ieee80211_local *local)
 
 	/* add back keys */
 	list_for_each_entry(sdata, &local->interfaces, list)
-		if (netif_running(sdata->dev))
+		if (ieee80211_sdata_running(sdata))
 			ieee80211_enable_keys(sdata);
 
 	ieee80211_wake_queues_by_reason(hw,
-- 
cgit v1.2.2


From a80f7c0b088187c8471b441d461e937991870661 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes@sipsolutions.net>
Date: Wed, 23 Dec 2009 13:15:32 +0100
Subject: mac80211: introduce flush operation

We've long lacked a good confirmation that frames
have really gone out, e.g. before going off-channel
for a scan. Add a flush() operation that drivers
can implement to provide that confirmation, and use
it in a few places:
 * before scanning sends the nullfunc frames
 * after scanning sends the nullfunc frames, if any
 * when going idle, to send any pending frames

Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/driver-ops.h   |  7 +++++++
 net/mac80211/driver-trace.h | 21 +++++++++++++++++++++
 net/mac80211/iface.c        |  2 ++
 net/mac80211/scan.c         | 13 +++++++++++--
 4 files changed, 41 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/driver-ops.h b/net/mac80211/driver-ops.h
index 727e4cf7b8a6..cbe133bcdf34 100644
--- a/net/mac80211/driver-ops.h
+++ b/net/mac80211/driver-ops.h
@@ -259,4 +259,11 @@ static inline void drv_rfkill_poll(struct ieee80211_local *local)
 	if (local->ops->rfkill_poll)
 		local->ops->rfkill_poll(&local->hw);
 }
+
+static inline void drv_flush(struct ieee80211_local *local, bool drop)
+{
+	trace_drv_flush(local, drop);
+	if (local->ops->flush)
+		local->ops->flush(&local->hw, drop);
+}
 #endif /* __MAC80211_DRIVER_OPS */
diff --git a/net/mac80211/driver-trace.h b/net/mac80211/driver-trace.h
index 7a849b920165..977cc7528bc6 100644
--- a/net/mac80211/driver-trace.h
+++ b/net/mac80211/driver-trace.h
@@ -690,6 +690,27 @@ TRACE_EVENT(drv_ampdu_action,
 		LOCAL_PR_ARG, VIF_PR_ARG, STA_PR_ARG, __entry->action, __entry->tid, __entry->ret
 	)
 );
+
+TRACE_EVENT(drv_flush,
+	TP_PROTO(struct ieee80211_local *local, bool drop),
+
+	TP_ARGS(local, drop),
+
+	TP_STRUCT__entry(
+		LOCAL_ENTRY
+		__field(bool, drop)
+	),
+
+	TP_fast_assign(
+		LOCAL_ASSIGN;
+		__entry->drop = drop;
+	),
+
+	TP_printk(
+		LOCAL_PR_FMT " drop:%d",
+		LOCAL_PR_ARG, __entry->drop
+	)
+);
 #endif /* !__MAC80211_DRIVER_TRACE || TRACE_HEADER_MULTI_READ */
 
 #undef TRACE_INCLUDE_PATH
diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c
index 1ceca14331d4..389dc8d880f3 100644
--- a/net/mac80211/iface.c
+++ b/net/mac80211/iface.c
@@ -917,6 +917,8 @@ static u32 ieee80211_idle_on(struct ieee80211_local *local)
 	       wiphy_name(local->hw.wiphy));
 #endif
 
+	drv_flush(local, false);
+
 	local->hw.conf.flags |= IEEE80211_CONF_IDLE;
 	return IEEE80211_CONF_CHANGE_IDLE;
 }
diff --git a/net/mac80211/scan.c b/net/mac80211/scan.c
index ae1830056521..d98c45e5528b 100644
--- a/net/mac80211/scan.c
+++ b/net/mac80211/scan.c
@@ -418,9 +418,10 @@ static int ieee80211_start_sw_scan(struct ieee80211_local *local)
 	local->next_scan_state = SCAN_DECISION;
 	local->scan_channel_idx = 0;
 
+	drv_flush(local, false);
+
 	ieee80211_configure_filter(local);
 
-	/* TODO: start scan as soon as all nullfunc frames are ACKed */
 	ieee80211_queue_delayed_work(&local->hw,
 				     &local->scan_work,
 				     IEEE80211_CHANNEL_TIME);
@@ -584,8 +585,16 @@ static void ieee80211_scan_state_leave_oper_channel(struct ieee80211_local *loca
 
 	__set_bit(SCAN_OFF_CHANNEL, &local->scanning);
 
+	/*
+	 * What if the nullfunc frames didn't arrive?
+	 */
+	drv_flush(local, false);
+	if (local->ops->flush)
+		*next_delay = 0;
+	else
+		*next_delay = HZ / 10;
+
 	/* advance to the next channel to be scanned */
-	*next_delay = HZ / 10;
 	local->next_scan_state = SCAN_SET_CHANNEL;
 }
 
-- 
cgit v1.2.2


From 63f170e0c80a131cdd549fab7afb5036009944fc Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes@sipsolutions.net>
Date: Wed, 23 Dec 2009 13:15:33 +0100
Subject: mac80211: let cfg80211 manage auth state

mac80211 currently hangs on to the auth state by
keeping it on the work list. That can lead to
confusing behaviour like rejecting scans while
authenticated to any AP (but not yet associated.)
It also means that it needs to keep track of the
work struct while associated for when it gets
disassociated (or disassociates.)

Change this to free the work struct after the
authentication completed successfully and
allocate a new one for associating, thereby
letting cfg80211 manage the auth state. Another
change necessary for this is to tell cfg80211
about all unicast deauth frames sent to mac80211
since now it can no longer check the auth state,
but that check was racy anyway.

Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/ieee80211_i.h |   3 +-
 net/mac80211/mlme.c        | 173 +++++++++++++++++++--------------------------
 2 files changed, 73 insertions(+), 103 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index adeae03c26a3..e21e0301548b 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -228,7 +228,7 @@ struct mesh_preq_queue {
 };
 
 enum ieee80211_mgd_state {
-	IEEE80211_MGD_STATE_IDLE,
+	IEEE80211_MGD_STATE_INVALID,
 	IEEE80211_MGD_STATE_PROBE,
 	IEEE80211_MGD_STATE_AUTH,
 	IEEE80211_MGD_STATE_ASSOC,
@@ -285,7 +285,6 @@ struct ieee80211_if_managed {
 
 	struct mutex mtx;
 	struct ieee80211_bss *associated;
-	struct ieee80211_mgd_work *old_associate_work;
 	struct list_head work_list;
 
 	u8 bssid[ETH_ALEN];
diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index 7f9909340c09..f060bc616203 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -949,11 +949,10 @@ static u32 ieee80211_handle_bss_capability(struct ieee80211_sub_if_data *sdata,
 }
 
 static void ieee80211_set_associated(struct ieee80211_sub_if_data *sdata,
-				     struct ieee80211_mgd_work *wk,
+				     struct ieee80211_bss *bss,
 				     u32 bss_info_changed)
 {
 	struct ieee80211_local *local = sdata->local;
-	struct ieee80211_bss *bss = wk->bss;
 
 	bss_info_changed |= BSS_CHANGED_ASSOC;
 	/* set timing information */
@@ -966,7 +965,6 @@ static void ieee80211_set_associated(struct ieee80211_sub_if_data *sdata,
 		bss->cbss.capability, bss->has_erp_value, bss->erp_value);
 
 	sdata->u.mgd.associated = bss;
-	sdata->u.mgd.old_associate_work = wk;
 	memcpy(sdata->u.mgd.bssid, bss->cbss.bssid, ETH_ALEN);
 
 	/* just to be sure */
@@ -1090,8 +1088,7 @@ ieee80211_authenticate(struct ieee80211_sub_if_data *sdata,
 	return RX_MGMT_NONE;
 }
 
-static void ieee80211_set_disassoc(struct ieee80211_sub_if_data *sdata,
-				   bool deauth)
+static void ieee80211_set_disassoc(struct ieee80211_sub_if_data *sdata)
 {
 	struct ieee80211_if_managed *ifmgd = &sdata->u.mgd;
 	struct ieee80211_local *local = sdata->local;
@@ -1109,16 +1106,6 @@ static void ieee80211_set_disassoc(struct ieee80211_sub_if_data *sdata,
 	ifmgd->associated = NULL;
 	memset(ifmgd->bssid, 0, ETH_ALEN);
 
-	if (deauth) {
-		kfree(ifmgd->old_associate_work);
-		ifmgd->old_associate_work = NULL;
-	} else {
-		struct ieee80211_mgd_work *wk = ifmgd->old_associate_work;
-
-		wk->state = IEEE80211_MGD_STATE_IDLE;
-		list_add(&wk->list, &ifmgd->work_list);
-	}
-
 	/*
 	 * we need to commit the associated = NULL change because the
 	 * scan code uses that to determine whether this iface should
@@ -1333,7 +1320,8 @@ EXPORT_SYMBOL(ieee80211_beacon_loss);
 static void ieee80211_auth_completed(struct ieee80211_sub_if_data *sdata,
 				     struct ieee80211_mgd_work *wk)
 {
-	wk->state = IEEE80211_MGD_STATE_IDLE;
+	list_del(&wk->list);
+	kfree(wk);
 	printk(KERN_DEBUG "%s: authenticated\n", sdata->name);
 }
 
@@ -1411,7 +1399,6 @@ ieee80211_rx_mgmt_auth(struct ieee80211_sub_if_data *sdata,
 
 static enum rx_mgmt_action __must_check
 ieee80211_rx_mgmt_deauth(struct ieee80211_sub_if_data *sdata,
-			 struct ieee80211_mgd_work *wk,
 			 struct ieee80211_mgmt *mgmt, size_t len)
 {
 	struct ieee80211_if_managed *ifmgd = &sdata->u.mgd;
@@ -1423,23 +1410,15 @@ ieee80211_rx_mgmt_deauth(struct ieee80211_sub_if_data *sdata,
 
 	ASSERT_MGD_MTX(ifmgd);
 
-	if (wk)
-		bssid = wk->bss->cbss.bssid;
-	else
-		bssid = ifmgd->associated->cbss.bssid;
+	bssid = ifmgd->associated->cbss.bssid;
 
 	reason_code = le16_to_cpu(mgmt->u.deauth.reason_code);
 
 	printk(KERN_DEBUG "%s: deauthenticated from %pM (Reason: %u)\n",
 			sdata->name, bssid, reason_code);
 
-	if (!wk) {
-		ieee80211_set_disassoc(sdata, true);
-		ieee80211_recalc_idle(sdata->local);
-	} else {
-		list_del(&wk->list);
-		kfree(wk);
-	}
+	ieee80211_set_disassoc(sdata);
+	ieee80211_recalc_idle(sdata->local);
 
 	return RX_MGMT_CFG80211_DEAUTH;
 }
@@ -1468,7 +1447,7 @@ ieee80211_rx_mgmt_disassoc(struct ieee80211_sub_if_data *sdata,
 	printk(KERN_DEBUG "%s: disassociated from %pM (Reason: %u)\n",
 			sdata->name, mgmt->sa, reason_code);
 
-	ieee80211_set_disassoc(sdata, false);
+	ieee80211_set_disassoc(sdata);
 	ieee80211_recalc_idle(sdata->local);
 	return RX_MGMT_CFG80211_DISASSOC;
 }
@@ -1484,6 +1463,7 @@ ieee80211_rx_mgmt_assoc_resp(struct ieee80211_sub_if_data *sdata,
 	struct ieee80211_local *local = sdata->local;
 	struct ieee80211_supported_band *sband;
 	struct sta_info *sta;
+	struct ieee80211_bss *bss = wk->bss;
 	u32 rates, basic_rates;
 	u16 capab_info, status_code, aid;
 	struct ieee802_11_elems elems;
@@ -1502,7 +1482,7 @@ ieee80211_rx_mgmt_assoc_resp(struct ieee80211_sub_if_data *sdata,
 	if (len < 24 + 6)
 		return RX_MGMT_NONE;
 
-	if (memcmp(wk->bss->cbss.bssid, mgmt->sa, ETH_ALEN) != 0)
+	if (memcmp(bss->cbss.bssid, mgmt->sa, ETH_ALEN) != 0)
 		return RX_MGMT_NONE;
 
 	capab_info = le16_to_cpu(mgmt->u.assoc_resp.capab_info);
@@ -1532,10 +1512,17 @@ ieee80211_rx_mgmt_assoc_resp(struct ieee80211_sub_if_data *sdata,
 		return RX_MGMT_NONE;
 	}
 
+	/*
+	 * Here the association was either successful or not.
+	 */
+
+	/* delete work item -- must be before set_associated for PS */
+	list_del(&wk->list);
+	kfree(wk);
+
 	if (status_code != WLAN_STATUS_SUCCESS) {
 		printk(KERN_DEBUG "%s: AP denied association (code=%d)\n",
 		       sdata->name, status_code);
-		wk->state = IEEE80211_MGD_STATE_IDLE;
 		return RX_MGMT_CFG80211_ASSOC;
 	}
 
@@ -1553,7 +1540,7 @@ ieee80211_rx_mgmt_assoc_resp(struct ieee80211_sub_if_data *sdata,
 	printk(KERN_DEBUG "%s: associated\n", sdata->name);
 	ifmgd->aid = aid;
 
-	sta = sta_info_alloc(sdata, wk->bss->cbss.bssid, GFP_KERNEL);
+	sta = sta_info_alloc(sdata, bss->cbss.bssid, GFP_KERNEL);
 	if (!sta) {
 		printk(KERN_DEBUG "%s: failed to alloc STA entry for"
 		       " the AP\n", sdata->name);
@@ -1645,18 +1632,14 @@ ieee80211_rx_mgmt_assoc_resp(struct ieee80211_sub_if_data *sdata,
 	    (ifmgd->flags & IEEE80211_STA_WMM_ENABLED) &&
 	    !(ifmgd->flags & IEEE80211_STA_DISABLE_11N))
 		changed |= ieee80211_enable_ht(sdata, elems.ht_info_elem,
-					       wk->bss->cbss.bssid,
+					       bss->cbss.bssid,
 					       ap_ht_cap_flags);
 
-        /* delete work item -- must be before set_associated for PS */
-	list_del(&wk->list);
-
 	/* set AID and assoc capability,
 	 * ieee80211_set_associated() will tell the driver */
 	bss_conf->aid = aid;
 	bss_conf->assoc_capability = capab_info;
-	/* this will take ownership of wk */
-	ieee80211_set_associated(sdata, wk, changed);
+	ieee80211_set_associated(sdata, bss, changed);
 
 	/*
 	 * Start timer to probe the connection to the AP now.
@@ -1999,8 +1982,7 @@ static void ieee80211_sta_rx_queued_mgmt(struct ieee80211_sub_if_data *sdata,
 						     skb->len, rx_status);
 			break;
 		case IEEE80211_STYPE_DEAUTH:
-			rma = ieee80211_rx_mgmt_deauth(sdata, NULL,
-						       mgmt, skb->len);
+			rma = ieee80211_rx_mgmt_deauth(sdata, mgmt, skb->len);
 			break;
 		case IEEE80211_STYPE_DISASSOC:
 			rma = ieee80211_rx_mgmt_disassoc(sdata, mgmt, skb->len);
@@ -2051,8 +2033,15 @@ static void ieee80211_sta_rx_queued_mgmt(struct ieee80211_sub_if_data *sdata,
 							   skb->len, true);
 			break;
 		case IEEE80211_STYPE_DEAUTH:
-			rma = ieee80211_rx_mgmt_deauth(sdata, wk, mgmt,
-						       skb->len);
+			if (skb->len >= 24 + 2 /* mgmt + deauth reason */) {
+				/*
+				 * We get here if we get deauth while
+				 * trying to auth/assoc. Telling cfg80211
+				 * is handled below, unconditionally.
+				 */
+				list_del(&wk->list);
+				kfree(wk);
+			}
 			break;
 		}
 		/*
@@ -2066,6 +2055,12 @@ static void ieee80211_sta_rx_queued_mgmt(struct ieee80211_sub_if_data *sdata,
 
 	mutex_unlock(&ifmgd->mtx);
 
+	if (skb->len >= 24 + 2 /* mgmt + deauth reason */ &&
+	    (fc & IEEE80211_FCTL_STYPE) == IEEE80211_STYPE_DEAUTH) {
+		WARN_ON(rma != RX_MGMT_NONE);
+		rma = RX_MGMT_CFG80211_DEAUTH;
+	}
+
 	switch (rma) {
 	case RX_MGMT_NONE:
 		/* no action */
@@ -2116,7 +2111,6 @@ static void ieee80211_sta_work(struct work_struct *work)
 	struct ieee80211_mgd_work *wk, *tmp;
 	LIST_HEAD(free_work);
 	enum rx_mgmt_action rma;
-	bool anybusy = false;
 
 	if (!ieee80211_sdata_running(sdata))
 		return;
@@ -2171,7 +2165,7 @@ static void ieee80211_sta_work(struct work_struct *work)
 			printk(KERN_DEBUG "No probe response from AP %pM"
 				" after %dms, disconnecting.\n",
 				bssid, (1000 * IEEE80211_PROBE_WAIT)/HZ);
-			ieee80211_set_disassoc(sdata, true);
+			ieee80211_set_disassoc(sdata);
 			ieee80211_recalc_idle(local);
 			mutex_unlock(&ifmgd->mtx);
 			/*
@@ -2203,8 +2197,6 @@ static void ieee80211_sta_work(struct work_struct *work)
 		switch (wk->state) {
 		default:
 			WARN_ON(1);
-			/* fall through */
-		case IEEE80211_MGD_STATE_IDLE:
 			/* nothing */
 			rma = RX_MGMT_NONE;
 			break;
@@ -2227,20 +2219,19 @@ static void ieee80211_sta_work(struct work_struct *work)
 		case RX_MGMT_CFG80211_ASSOC_TO:
 			list_del(&wk->list);
 			list_add(&wk->list, &free_work);
-			wk->tries = rma; /* small abuse but only local */
+			/*
+			 * small abuse but only local -- keep the
+			 * action type in wk->timeout while the item
+			 * is on the cleanup list
+			 */
+			wk->timeout = rma;
 			break;
 		default:
 			WARN(1, "unexpected: %d", rma);
 		}
 	}
 
-	list_for_each_entry(wk, &ifmgd->work_list, list) {
-		if (wk->state != IEEE80211_MGD_STATE_IDLE) {
-			anybusy = true;
-			break;
-		}
-	}
-	if (!anybusy &&
+	if (list_empty(&ifmgd->work_list) &&
 	    test_and_clear_bit(IEEE80211_STA_REQ_SCAN, &ifmgd->request))
 		ieee80211_queue_delayed_work(&local->hw,
 					     &local->scan_work,
@@ -2249,7 +2240,8 @@ static void ieee80211_sta_work(struct work_struct *work)
 	mutex_unlock(&ifmgd->mtx);
 
 	list_for_each_entry_safe(wk, tmp, &free_work, list) {
-		switch (wk->tries) {
+		/* see above how we're using wk->timeout */
+		switch (wk->timeout) {
 		case RX_MGMT_CFG80211_AUTH_TO:
 			cfg80211_send_auth_timeout(sdata->dev,
 						   wk->bss->cbss.bssid);
@@ -2259,7 +2251,7 @@ static void ieee80211_sta_work(struct work_struct *work)
 						    wk->bss->cbss.bssid);
 			break;
 		default:
-			WARN(1, "unexpected: %d", wk->tries);
+			WARN(1, "unexpected: %lu", wk->timeout);
 		}
 
 		list_del(&wk->list);
@@ -2487,35 +2479,18 @@ int ieee80211_mgd_assoc(struct ieee80211_sub_if_data *sdata,
 			struct cfg80211_assoc_request *req)
 {
 	struct ieee80211_if_managed *ifmgd = &sdata->u.mgd;
-	struct ieee80211_mgd_work *wk, *found = NULL;
+	struct ieee80211_mgd_work *wk;
+	const u8 *ssid;
 	int i, err;
 
 	mutex_lock(&ifmgd->mtx);
 
-	list_for_each_entry(wk, &ifmgd->work_list, list) {
-		if (&wk->bss->cbss == req->bss &&
-		    wk->state == IEEE80211_MGD_STATE_IDLE) {
-			found = wk;
-			break;
-		}
-	}
-
-	if (!found) {
-		err = -ENOLINK;
-		goto out;
-	}
-
-	list_del(&found->list);
-
-	wk = krealloc(found, sizeof(*wk) + req->ie_len, GFP_KERNEL);
+	wk = kzalloc(sizeof(*wk) + req->ie_len, GFP_KERNEL);
 	if (!wk) {
-		list_add(&found->list, &ifmgd->work_list);
 		err = -ENOMEM;
 		goto out;
 	}
 
-	list_add(&wk->list, &ifmgd->work_list);
-
 	ifmgd->flags &= ~IEEE80211_STA_DISABLE_11N;
 
 	for (i = 0; i < req->crypto.n_ciphers_pairwise; i++)
@@ -2524,8 +2499,6 @@ int ieee80211_mgd_assoc(struct ieee80211_sub_if_data *sdata,
 		    req->crypto.ciphers_pairwise[i] == WLAN_CIPHER_SUITE_WEP104)
 			ifmgd->flags |= IEEE80211_STA_DISABLE_11N;
 
-	sdata->local->oper_channel = req->bss->channel;
-	ieee80211_hw_config(sdata->local, 0);
 
 	if (req->ie && req->ie_len) {
 		memcpy(wk->ie, req->ie, req->ie_len);
@@ -2533,11 +2506,16 @@ int ieee80211_mgd_assoc(struct ieee80211_sub_if_data *sdata,
 	} else
 		wk->ie_len = 0;
 
+	wk->bss = (void *)req->bss;
+
+	ssid = ieee80211_bss_get_ie(req->bss, WLAN_EID_SSID);
+	memcpy(wk->ssid, ssid + 2, ssid[1]);
+	wk->ssid_len = ssid[1];
+
 	if (req->prev_bssid)
 		memcpy(wk->prev_bssid, req->prev_bssid, ETH_ALEN);
 
 	wk->state = IEEE80211_MGD_STATE_ASSOC;
-	wk->tries = 0;
 	wk->timeout = jiffies; /* run right away */
 
 	if (req->use_mfp) {
@@ -2553,6 +2531,10 @@ int ieee80211_mgd_assoc(struct ieee80211_sub_if_data *sdata,
 	else
 		ifmgd->flags &= ~IEEE80211_STA_CONTROL_PORT;
 
+	sdata->local->oper_channel = req->bss->channel;
+	ieee80211_hw_config(sdata->local, 0);
+
+	list_add(&wk->list, &ifmgd->work_list);
 	ieee80211_queue_work(&sdata->local->hw, &sdata->u.mgd.work);
 
 	err = 0;
@@ -2568,23 +2550,23 @@ int ieee80211_mgd_deauth(struct ieee80211_sub_if_data *sdata,
 {
 	struct ieee80211_if_managed *ifmgd = &sdata->u.mgd;
 	struct ieee80211_mgd_work *wk;
-	const u8 *bssid = NULL;
+	const u8 *bssid = req->bss->bssid;
 	bool not_auth_yet = false;
 
 	mutex_lock(&ifmgd->mtx);
 
 	if (ifmgd->associated && &ifmgd->associated->cbss == req->bss) {
 		bssid = req->bss->bssid;
-		ieee80211_set_disassoc(sdata, true);
+		ieee80211_set_disassoc(sdata);
 	} else list_for_each_entry(wk, &ifmgd->work_list, list) {
-		if (&wk->bss->cbss == req->bss) {
-			bssid = req->bss->bssid;
-			if (wk->state == IEEE80211_MGD_STATE_PROBE)
-				not_auth_yet = true;
-			list_del(&wk->list);
-			kfree(wk);
-			break;
-		}
+		if (wk->state != IEEE80211_MGD_STATE_PROBE)
+			continue;
+		if (req->bss != &wk->bss->cbss)
+			continue;
+		not_auth_yet = true;
+		list_del(&wk->list);
+		kfree(wk);
+		break;
 	}
 
 	/*
@@ -2601,17 +2583,6 @@ int ieee80211_mgd_deauth(struct ieee80211_sub_if_data *sdata,
 		return 0;
 	}
 
-	/*
-	 * cfg80211 should catch this ... but it's racy since
-	 * we can receive a deauth frame, process it, hand it
-	 * to cfg80211 while that's in a locked section already
-	 * trying to tell us that the user wants to disconnect.
-	 */
-	if (!bssid) {
-		mutex_unlock(&ifmgd->mtx);
-		return -ENOLINK;
-	}
-
 	mutex_unlock(&ifmgd->mtx);
 
 	printk(KERN_DEBUG "%s: deauthenticating from %pM by local choice (reason=%d)\n",
@@ -2648,7 +2619,7 @@ int ieee80211_mgd_disassoc(struct ieee80211_sub_if_data *sdata,
 	printk(KERN_DEBUG "%s: disassociating from %pM by local choice (reason=%d)\n",
 	       sdata->name, req->bss->bssid, req->reason_code);
 
-	ieee80211_set_disassoc(sdata, false);
+	ieee80211_set_disassoc(sdata);
 
 	mutex_unlock(&ifmgd->mtx);
 
-- 
cgit v1.2.2


From f679f65d417c3ea3f91b4bbfb68e3951c9eb8f04 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes@sipsolutions.net>
Date: Wed, 23 Dec 2009 13:15:34 +0100
Subject: mac80211: generalise management work a bit

As a first step of generalising management work,
this renames a few things and puts more information
directly into the struct so that auth/assoc need
not access the BSS pointer as often -- in fact it
can be removed from auth completely. Also since the
previous patch made sure a new work item is used
for association, we can make the different data a
union.

Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/ieee80211_i.h |  53 +++++++----
 net/mac80211/mlme.c        | 226 +++++++++++++++++++++++++++------------------
 2 files changed, 172 insertions(+), 107 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index e21e0301548b..0339e909e0c4 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -227,31 +227,48 @@ struct mesh_preq_queue {
 	u8 flags;
 };
 
-enum ieee80211_mgd_state {
-	IEEE80211_MGD_STATE_INVALID,
-	IEEE80211_MGD_STATE_PROBE,
-	IEEE80211_MGD_STATE_AUTH,
-	IEEE80211_MGD_STATE_ASSOC,
+enum ieee80211_work_type {
+	IEEE80211_WORK_AUTH_PROBE,
+	IEEE80211_WORK_AUTH,
+	IEEE80211_WORK_ASSOC,
 };
 
-struct ieee80211_mgd_work {
+struct ieee80211_work {
 	struct list_head list;
-	struct ieee80211_bss *bss;
-	int ie_len;
-	u8 prev_bssid[ETH_ALEN];
-	u8 ssid[IEEE80211_MAX_SSID_LEN];
-	u8 ssid_len;
-	unsigned long timeout;
-	enum ieee80211_mgd_state state;
-	u16 auth_alg, auth_transaction;
 
-	int tries;
+	struct ieee80211_channel *chan;
+	/* XXX: chan type? -- right now not really needed */
+	unsigned long timeout;
+	enum ieee80211_work_type type;
 
-	u8 key[WLAN_KEY_LEN_WEP104];
-	u8 key_len, key_idx;
+	union {
+		struct {
+			int tries;
+			u16 algorithm, transaction;
+			u8 ssid[IEEE80211_MAX_SSID_LEN];
+			u8 ssid_len;
+			u8 bssid[ETH_ALEN];
+			u8 key[WLAN_KEY_LEN_WEP104];
+			u8 key_len, key_idx;
+			bool privacy;
+		} auth;
+		struct {
+			struct ieee80211_bss *bss;
+			const u8 *supp_rates;
+			const u8 *ht_information_ie;
+			int tries;
+			u16 capability;
+			u8 bssid[ETH_ALEN], prev_bssid[ETH_ALEN];
+			u8 ssid[IEEE80211_MAX_SSID_LEN];
+			u8 ssid_len;
+			u8 supp_rates_len;
+			bool wmm_used;
+		} assoc;
+	};
 
+	int ie_len;
 	/* must be last */
-	u8 ie[0]; /* for auth or assoc frame, not probe */
+	u8 ie[0];
 };
 
 /* flags used in struct ieee80211_if_managed.flags */
diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index f060bc616203..c65225f29bb6 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -125,15 +125,15 @@ static int ecw2cw(int ecw)
 	return (1 << ecw) - 1;
 }
 
-static int ieee80211_compatible_rates(struct ieee80211_bss *bss,
+static int ieee80211_compatible_rates(const u8 *supp_rates, int supp_rates_len,
 				      struct ieee80211_supported_band *sband,
 				      u32 *rates)
 {
 	int i, j, count;
 	*rates = 0;
 	count = 0;
-	for (i = 0; i < bss->supp_rates_len; i++) {
-		int rate = (bss->supp_rates[i] & 0x7F) * 5;
+	for (i = 0; i < supp_rates_len; i++) {
+		int rate = (supp_rates[i] & 0x7F) * 5;
 
 		for (j = 0; j < sband->n_bitrates; j++)
 			if (sband->bitrates[j].bitrate == rate) {
@@ -232,7 +232,7 @@ static u32 ieee80211_enable_ht(struct ieee80211_sub_if_data *sdata,
 /* frame sending functions */
 
 static void ieee80211_send_assoc(struct ieee80211_sub_if_data *sdata,
-				 struct ieee80211_mgd_work *wk)
+				 struct ieee80211_work *wk)
 {
 	struct ieee80211_if_managed *ifmgd = &sdata->u.mgd;
 	struct ieee80211_local *local = sdata->local;
@@ -248,7 +248,7 @@ static void ieee80211_send_assoc(struct ieee80211_sub_if_data *sdata,
 
 	skb = dev_alloc_skb(local->hw.extra_tx_headroom +
 			    sizeof(*mgmt) + 200 + wk->ie_len +
-			    wk->ssid_len);
+			    wk->assoc.ssid_len);
 	if (!skb) {
 		printk(KERN_DEBUG "%s: failed to allocate buffer for assoc "
 		       "frame\n", sdata->name);
@@ -267,35 +267,37 @@ static void ieee80211_send_assoc(struct ieee80211_sub_if_data *sdata,
 			capab |= WLAN_CAPABILITY_SHORT_PREAMBLE;
 	}
 
-	if (wk->bss->cbss.capability & WLAN_CAPABILITY_PRIVACY)
+	if (wk->assoc.capability & WLAN_CAPABILITY_PRIVACY)
 		capab |= WLAN_CAPABILITY_PRIVACY;
-	if (wk->bss->wmm_used)
+	if (wk->assoc.wmm_used)
 		wmm = 1;
 
 	/* get all rates supported by the device and the AP as
 	 * some APs don't like getting a superset of their rates
 	 * in the association request (e.g. D-Link DAP 1353 in
 	 * b-only mode) */
-	rates_len = ieee80211_compatible_rates(wk->bss, sband, &rates);
+	rates_len = ieee80211_compatible_rates(wk->assoc.supp_rates,
+					       wk->assoc.supp_rates_len,
+					       sband, &rates);
 
-	if ((wk->bss->cbss.capability & WLAN_CAPABILITY_SPECTRUM_MGMT) &&
+	if ((wk->assoc.capability & WLAN_CAPABILITY_SPECTRUM_MGMT) &&
 	    (local->hw.flags & IEEE80211_HW_SPECTRUM_MGMT))
 		capab |= WLAN_CAPABILITY_SPECTRUM_MGMT;
 
 	mgmt = (struct ieee80211_mgmt *) skb_put(skb, 24);
 	memset(mgmt, 0, 24);
-	memcpy(mgmt->da, wk->bss->cbss.bssid, ETH_ALEN);
+	memcpy(mgmt->da, wk->assoc.bssid, ETH_ALEN);
 	memcpy(mgmt->sa, sdata->vif.addr, ETH_ALEN);
-	memcpy(mgmt->bssid, wk->bss->cbss.bssid, ETH_ALEN);
+	memcpy(mgmt->bssid, wk->assoc.bssid, ETH_ALEN);
 
-	if (!is_zero_ether_addr(wk->prev_bssid)) {
+	if (!is_zero_ether_addr(wk->assoc.prev_bssid)) {
 		skb_put(skb, 10);
 		mgmt->frame_control = cpu_to_le16(IEEE80211_FTYPE_MGMT |
 						  IEEE80211_STYPE_REASSOC_REQ);
 		mgmt->u.reassoc_req.capab_info = cpu_to_le16(capab);
 		mgmt->u.reassoc_req.listen_interval =
 				cpu_to_le16(local->hw.conf.listen_interval);
-		memcpy(mgmt->u.reassoc_req.current_ap, wk->prev_bssid,
+		memcpy(mgmt->u.reassoc_req.current_ap, wk->assoc.prev_bssid,
 		       ETH_ALEN);
 	} else {
 		skb_put(skb, 4);
@@ -307,10 +309,10 @@ static void ieee80211_send_assoc(struct ieee80211_sub_if_data *sdata,
 	}
 
 	/* SSID */
-	ies = pos = skb_put(skb, 2 + wk->ssid_len);
+	ies = pos = skb_put(skb, 2 + wk->assoc.ssid_len);
 	*pos++ = WLAN_EID_SSID;
-	*pos++ = wk->ssid_len;
-	memcpy(pos, wk->ssid, wk->ssid_len);
+	*pos++ = wk->assoc.ssid_len;
+	memcpy(pos, wk->assoc.ssid, wk->assoc.ssid_len);
 
 	/* add all rates which were marked to be used above */
 	supp_rates_len = rates_len;
@@ -392,7 +394,7 @@ static void ieee80211_send_assoc(struct ieee80211_sub_if_data *sdata,
 	 */
 	if (wmm && (ifmgd->flags & IEEE80211_STA_WMM_ENABLED) &&
 	    sband->ht_cap.ht_supported &&
-	    (ht_ie = ieee80211_bss_get_ie(&wk->bss->cbss, WLAN_EID_HT_INFORMATION)) &&
+	    (ht_ie = wk->assoc.ht_information_ie) &&
 	    ht_ie[1] >= sizeof(struct ieee80211_ht_info) &&
 	    (!(ifmgd->flags & IEEE80211_STA_DISABLE_11N))) {
 		struct ieee80211_ht_info *ht_info =
@@ -1003,23 +1005,43 @@ static void ieee80211_set_associated(struct ieee80211_sub_if_data *sdata,
 	netif_carrier_on(sdata->dev);
 }
 
+static void ieee80211_remove_auth_bss(struct ieee80211_local *local,
+				      struct ieee80211_work *wk)
+{
+	struct cfg80211_bss *cbss;
+	u16 capa_val = WLAN_CAPABILITY_ESS;
+
+	if (wk->auth.privacy)
+		capa_val |= WLAN_CAPABILITY_PRIVACY;
+
+	cbss = cfg80211_get_bss(local->hw.wiphy, wk->chan, wk->auth.bssid,
+				wk->auth.ssid, wk->auth.ssid_len,
+				WLAN_CAPABILITY_ESS | WLAN_CAPABILITY_PRIVACY,
+				capa_val);
+	if (!cbss)
+		return;
+
+	cfg80211_unlink_bss(local->hw.wiphy, cbss);
+	cfg80211_put_bss(cbss);
+}
+
 static enum rx_mgmt_action __must_check
 ieee80211_direct_probe(struct ieee80211_sub_if_data *sdata,
-		       struct ieee80211_mgd_work *wk)
+		       struct ieee80211_work *wk)
 {
 	struct ieee80211_if_managed *ifmgd = &sdata->u.mgd;
 	struct ieee80211_local *local = sdata->local;
 
-	wk->tries++;
-	if (wk->tries > IEEE80211_AUTH_MAX_TRIES) {
+	wk->auth.tries++;
+	if (wk->auth.tries > IEEE80211_AUTH_MAX_TRIES) {
 		printk(KERN_DEBUG "%s: direct probe to AP %pM timed out\n",
-		       sdata->name, wk->bss->cbss.bssid);
+		       sdata->name, wk->auth.bssid);
 
 		/*
 		 * Most likely AP is not in the range so remove the
 		 * bss struct for that AP.
 		 */
-		cfg80211_unlink_bss(local->hw.wiphy, &wk->bss->cbss);
+		ieee80211_remove_auth_bss(local, wk);
 
 		/*
 		 * We might have a pending scan which had no chance to run yet
@@ -1031,14 +1053,14 @@ ieee80211_direct_probe(struct ieee80211_sub_if_data *sdata,
 	}
 
 	printk(KERN_DEBUG "%s: direct probe to AP %pM (try %d)\n",
-			sdata->name, wk->bss->cbss.bssid,
-			wk->tries);
+			sdata->name, wk->auth.bssid, wk->auth.tries);
 
 	/*
 	 * Direct probe is sent to broadcast address as some APs
 	 * will not answer to direct packet in unassociated state.
 	 */
-	ieee80211_send_probe_req(sdata, NULL, wk->ssid, wk->ssid_len, NULL, 0);
+	ieee80211_send_probe_req(sdata, NULL, wk->auth.ssid, wk->auth.ssid_len,
+				 NULL, 0);
 
 	wk->timeout = jiffies + IEEE80211_AUTH_TIMEOUT;
 	run_again(ifmgd, wk->timeout);
@@ -1049,22 +1071,21 @@ ieee80211_direct_probe(struct ieee80211_sub_if_data *sdata,
 
 static enum rx_mgmt_action __must_check
 ieee80211_authenticate(struct ieee80211_sub_if_data *sdata,
-		       struct ieee80211_mgd_work *wk)
+		       struct ieee80211_work *wk)
 {
 	struct ieee80211_if_managed *ifmgd = &sdata->u.mgd;
 	struct ieee80211_local *local = sdata->local;
 
-	wk->tries++;
-	if (wk->tries > IEEE80211_AUTH_MAX_TRIES) {
+	wk->auth.tries++;
+	if (wk->auth.tries > IEEE80211_AUTH_MAX_TRIES) {
 		printk(KERN_DEBUG "%s: authentication with AP %pM"
-		       " timed out\n",
-		       sdata->name, wk->bss->cbss.bssid);
+		       " timed out\n", sdata->name, wk->auth.bssid);
 
 		/*
 		 * Most likely AP is not in the range so remove the
 		 * bss struct for that AP.
 		 */
-		cfg80211_unlink_bss(local->hw.wiphy, &wk->bss->cbss);
+		ieee80211_remove_auth_bss(local, wk);
 
 		/*
 		 * We might have a pending scan which had no chance to run yet
@@ -1076,11 +1097,11 @@ ieee80211_authenticate(struct ieee80211_sub_if_data *sdata,
 	}
 
 	printk(KERN_DEBUG "%s: authenticate with AP %pM (try %d)\n",
-	       sdata->name, wk->bss->cbss.bssid, wk->tries);
+	       sdata->name, wk->auth.bssid, wk->auth.tries);
 
-	ieee80211_send_auth(sdata, 1, wk->auth_alg, wk->ie, wk->ie_len,
-			    wk->bss->cbss.bssid, NULL, 0, 0);
-	wk->auth_transaction = 2;
+	ieee80211_send_auth(sdata, 1, wk->auth.algorithm, wk->ie, wk->ie_len,
+			    wk->auth.bssid, NULL, 0, 0);
+	wk->auth.transaction = 2;
 
 	wk->timeout = jiffies + IEEE80211_AUTH_TIMEOUT;
 	run_again(ifmgd, wk->timeout);
@@ -1176,22 +1197,22 @@ static void ieee80211_set_disassoc(struct ieee80211_sub_if_data *sdata)
 
 static enum rx_mgmt_action __must_check
 ieee80211_associate(struct ieee80211_sub_if_data *sdata,
-		    struct ieee80211_mgd_work *wk)
+		    struct ieee80211_work *wk)
 {
 	struct ieee80211_if_managed *ifmgd = &sdata->u.mgd;
 	struct ieee80211_local *local = sdata->local;
 
-	wk->tries++;
-	if (wk->tries > IEEE80211_ASSOC_MAX_TRIES) {
+	wk->assoc.tries++;
+	if (wk->assoc.tries > IEEE80211_ASSOC_MAX_TRIES) {
 		printk(KERN_DEBUG "%s: association with AP %pM"
 		       " timed out\n",
-		       sdata->name, wk->bss->cbss.bssid);
+		       sdata->name, wk->assoc.bssid);
 
 		/*
 		 * Most likely AP is not in the range so remove the
 		 * bss struct for that AP.
 		 */
-		cfg80211_unlink_bss(local->hw.wiphy, &wk->bss->cbss);
+		cfg80211_unlink_bss(local->hw.wiphy, &wk->assoc.bss->cbss);
 
 		/*
 		 * We might have a pending scan which had no chance to run yet
@@ -1203,7 +1224,7 @@ ieee80211_associate(struct ieee80211_sub_if_data *sdata,
 	}
 
 	printk(KERN_DEBUG "%s: associate with AP %pM (try %d)\n",
-	       sdata->name, wk->bss->cbss.bssid, wk->tries);
+	       sdata->name, wk->assoc.bssid, wk->assoc.tries);
 	ieee80211_send_assoc(sdata, wk);
 
 	wk->timeout = jiffies + IEEE80211_ASSOC_TIMEOUT;
@@ -1318,7 +1339,7 @@ void ieee80211_beacon_loss(struct ieee80211_vif *vif)
 EXPORT_SYMBOL(ieee80211_beacon_loss);
 
 static void ieee80211_auth_completed(struct ieee80211_sub_if_data *sdata,
-				     struct ieee80211_mgd_work *wk)
+				     struct ieee80211_work *wk)
 {
 	list_del(&wk->list);
 	kfree(wk);
@@ -1327,7 +1348,7 @@ static void ieee80211_auth_completed(struct ieee80211_sub_if_data *sdata,
 
 
 static void ieee80211_auth_challenge(struct ieee80211_sub_if_data *sdata,
-				     struct ieee80211_mgd_work *wk,
+				     struct ieee80211_work *wk,
 				     struct ieee80211_mgmt *mgmt,
 				     size_t len)
 {
@@ -1338,38 +1359,38 @@ static void ieee80211_auth_challenge(struct ieee80211_sub_if_data *sdata,
 	ieee802_11_parse_elems(pos, len - (pos - (u8 *) mgmt), &elems);
 	if (!elems.challenge)
 		return;
-	ieee80211_send_auth(sdata, 3, wk->auth_alg,
+	ieee80211_send_auth(sdata, 3, wk->auth.algorithm,
 			    elems.challenge - 2, elems.challenge_len + 2,
-			    wk->bss->cbss.bssid,
-			    wk->key, wk->key_len, wk->key_idx);
-	wk->auth_transaction = 4;
+			    wk->auth.bssid, wk->auth.key, wk->auth.key_len,
+			    wk->auth.key_idx);
+	wk->auth.transaction = 4;
 }
 
 static enum rx_mgmt_action __must_check
 ieee80211_rx_mgmt_auth(struct ieee80211_sub_if_data *sdata,
-		       struct ieee80211_mgd_work *wk,
+		       struct ieee80211_work *wk,
 		       struct ieee80211_mgmt *mgmt, size_t len)
 {
 	u16 auth_alg, auth_transaction, status_code;
 
-	if (wk->state != IEEE80211_MGD_STATE_AUTH)
+	if (wk->type != IEEE80211_WORK_AUTH)
 		return RX_MGMT_NONE;
 
 	if (len < 24 + 6)
 		return RX_MGMT_NONE;
 
-	if (memcmp(wk->bss->cbss.bssid, mgmt->sa, ETH_ALEN) != 0)
+	if (memcmp(wk->auth.bssid, mgmt->sa, ETH_ALEN) != 0)
 		return RX_MGMT_NONE;
 
-	if (memcmp(wk->bss->cbss.bssid, mgmt->bssid, ETH_ALEN) != 0)
+	if (memcmp(wk->auth.bssid, mgmt->bssid, ETH_ALEN) != 0)
 		return RX_MGMT_NONE;
 
 	auth_alg = le16_to_cpu(mgmt->u.auth.auth_alg);
 	auth_transaction = le16_to_cpu(mgmt->u.auth.auth_transaction);
 	status_code = le16_to_cpu(mgmt->u.auth.status_code);
 
-	if (auth_alg != wk->auth_alg ||
-	    auth_transaction != wk->auth_transaction)
+	if (auth_alg != wk->auth.algorithm ||
+	    auth_transaction != wk->auth.transaction)
 		return RX_MGMT_NONE;
 
 	if (status_code != WLAN_STATUS_SUCCESS) {
@@ -1378,14 +1399,14 @@ ieee80211_rx_mgmt_auth(struct ieee80211_sub_if_data *sdata,
 		return RX_MGMT_CFG80211_AUTH;
 	}
 
-	switch (wk->auth_alg) {
+	switch (wk->auth.algorithm) {
 	case WLAN_AUTH_OPEN:
 	case WLAN_AUTH_LEAP:
 	case WLAN_AUTH_FT:
 		ieee80211_auth_completed(sdata, wk);
 		return RX_MGMT_CFG80211_AUTH;
 	case WLAN_AUTH_SHARED_KEY:
-		if (wk->auth_transaction == 4) {
+		if (wk->auth.transaction == 4) {
 			ieee80211_auth_completed(sdata, wk);
 			return RX_MGMT_CFG80211_AUTH;
 		} else
@@ -1455,7 +1476,7 @@ ieee80211_rx_mgmt_disassoc(struct ieee80211_sub_if_data *sdata,
 
 static enum rx_mgmt_action __must_check
 ieee80211_rx_mgmt_assoc_resp(struct ieee80211_sub_if_data *sdata,
-			     struct ieee80211_mgd_work *wk,
+			     struct ieee80211_work *wk,
 			     struct ieee80211_mgmt *mgmt, size_t len,
 			     bool reassoc)
 {
@@ -1463,7 +1484,7 @@ ieee80211_rx_mgmt_assoc_resp(struct ieee80211_sub_if_data *sdata,
 	struct ieee80211_local *local = sdata->local;
 	struct ieee80211_supported_band *sband;
 	struct sta_info *sta;
-	struct ieee80211_bss *bss = wk->bss;
+	struct ieee80211_bss *bss = wk->assoc.bss;
 	u32 rates, basic_rates;
 	u16 capab_info, status_code, aid;
 	struct ieee802_11_elems elems;
@@ -1693,7 +1714,7 @@ static void ieee80211_rx_bss_info(struct ieee80211_sub_if_data *sdata,
 
 
 static void ieee80211_rx_mgmt_probe_resp(struct ieee80211_sub_if_data *sdata,
-					 struct ieee80211_mgd_work *wk,
+					 struct ieee80211_work *wk,
 					 struct ieee80211_mgmt *mgmt, size_t len,
 					 struct ieee80211_rx_status *rx_status)
 {
@@ -1718,11 +1739,11 @@ static void ieee80211_rx_mgmt_probe_resp(struct ieee80211_sub_if_data *sdata,
 	ieee80211_rx_bss_info(sdata, mgmt, len, rx_status, &elems, false);
 
 	/* direct probe may be part of the association flow */
-	if (wk && wk->state == IEEE80211_MGD_STATE_PROBE) {
+	if (wk && wk->type == IEEE80211_WORK_AUTH_PROBE) {
 		printk(KERN_DEBUG "%s: direct probe responded\n",
 		       sdata->name);
-		wk->tries = 0;
-		wk->state = IEEE80211_MGD_STATE_AUTH;
+		wk->auth.tries = 0;
+		wk->type = IEEE80211_WORK_AUTH;
 		WARN_ON(ieee80211_authenticate(sdata, wk) != RX_MGMT_NONE);
 	}
 
@@ -1959,7 +1980,7 @@ static void ieee80211_sta_rx_queued_mgmt(struct ieee80211_sub_if_data *sdata,
 	struct ieee80211_if_managed *ifmgd = &sdata->u.mgd;
 	struct ieee80211_rx_status *rx_status;
 	struct ieee80211_mgmt *mgmt;
-	struct ieee80211_mgd_work *wk;
+	struct ieee80211_work *wk;
 	enum rx_mgmt_action rma = RX_MGMT_NONE;
 	u16 fc;
 
@@ -2013,7 +2034,20 @@ static void ieee80211_sta_rx_queued_mgmt(struct ieee80211_sub_if_data *sdata,
 	}
 
 	list_for_each_entry(wk, &ifmgd->work_list, list) {
-		if (memcmp(wk->bss->cbss.bssid, mgmt->bssid, ETH_ALEN) != 0)
+		const u8 *bssid = NULL;
+
+		switch (wk->type) {
+		case IEEE80211_WORK_AUTH_PROBE:
+		case IEEE80211_WORK_AUTH:
+			bssid = wk->auth.bssid;
+			break;
+		case IEEE80211_WORK_ASSOC:
+			bssid = wk->assoc.bssid;
+			break;
+		default:
+			continue;
+		}
+		if (memcmp(bssid, mgmt->bssid, ETH_ALEN) != 0)
 			continue;
 
 		switch (fc & IEEE80211_FCTL_STYPE) {
@@ -2108,7 +2142,7 @@ static void ieee80211_sta_work(struct work_struct *work)
 	struct ieee80211_local *local = sdata->local;
 	struct ieee80211_if_managed *ifmgd;
 	struct sk_buff *skb;
-	struct ieee80211_mgd_work *wk, *tmp;
+	struct ieee80211_work *wk, *tmp;
 	LIST_HEAD(free_work);
 	enum rx_mgmt_action rma;
 
@@ -2194,19 +2228,19 @@ static void ieee80211_sta_work(struct work_struct *work)
 			continue;
 		}
 
-		switch (wk->state) {
+		switch (wk->type) {
 		default:
 			WARN_ON(1);
 			/* nothing */
 			rma = RX_MGMT_NONE;
 			break;
-		case IEEE80211_MGD_STATE_PROBE:
+		case IEEE80211_WORK_AUTH_PROBE:
 			rma = ieee80211_direct_probe(sdata, wk);
 			break;
-		case IEEE80211_MGD_STATE_AUTH:
+		case IEEE80211_WORK_AUTH:
 			rma = ieee80211_authenticate(sdata, wk);
 			break;
-		case IEEE80211_MGD_STATE_ASSOC:
+		case IEEE80211_WORK_ASSOC:
 			rma = ieee80211_associate(sdata, wk);
 			break;
 		}
@@ -2243,12 +2277,11 @@ static void ieee80211_sta_work(struct work_struct *work)
 		/* see above how we're using wk->timeout */
 		switch (wk->timeout) {
 		case RX_MGMT_CFG80211_AUTH_TO:
-			cfg80211_send_auth_timeout(sdata->dev,
-						   wk->bss->cbss.bssid);
+			cfg80211_send_auth_timeout(sdata->dev, wk->auth.bssid);
 			break;
 		case RX_MGMT_CFG80211_ASSOC_TO:
 			cfg80211_send_assoc_timeout(sdata->dev,
-						    wk->bss->cbss.bssid);
+						    wk->assoc.bssid);
 			break;
 		default:
 			WARN(1, "unexpected: %lu", wk->timeout);
@@ -2415,7 +2448,7 @@ int ieee80211_mgd_auth(struct ieee80211_sub_if_data *sdata,
 {
 	struct ieee80211_if_managed *ifmgd = &sdata->u.mgd;
 	const u8 *ssid;
-	struct ieee80211_mgd_work *wk;
+	struct ieee80211_work *wk;
 	u16 auth_alg;
 
 	switch (req->auth_type) {
@@ -2439,7 +2472,7 @@ int ieee80211_mgd_auth(struct ieee80211_sub_if_data *sdata,
 	if (!wk)
 		return -ENOMEM;
 
-	wk->bss = (void *)req->bss;
+	memcpy(wk->auth.bssid, req->bss->bssid, ETH_ALEN);;
 
 	if (req->ie && req->ie_len) {
 		memcpy(wk->ie, req->ie, req->ie_len);
@@ -2447,22 +2480,27 @@ int ieee80211_mgd_auth(struct ieee80211_sub_if_data *sdata,
 	}
 
 	if (req->key && req->key_len) {
-		wk->key_len = req->key_len;
-		wk->key_idx = req->key_idx;
-		memcpy(wk->key, req->key, req->key_len);
+		wk->auth.key_len = req->key_len;
+		wk->auth.key_idx = req->key_idx;
+		memcpy(wk->auth.key, req->key, req->key_len);
 	}
 
 	ssid = ieee80211_bss_get_ie(req->bss, WLAN_EID_SSID);
-	memcpy(wk->ssid, ssid + 2, ssid[1]);
-	wk->ssid_len = ssid[1];
+	memcpy(wk->auth.ssid, ssid + 2, ssid[1]);
+	wk->auth.ssid_len = ssid[1];
+
+	wk->auth.algorithm = auth_alg;
+	wk->auth.privacy = req->bss->capability & WLAN_CAPABILITY_PRIVACY;
 
-	wk->state = IEEE80211_MGD_STATE_PROBE;
-	wk->auth_alg = auth_alg;
+	wk->type = IEEE80211_WORK_AUTH_PROBE;
 	wk->timeout = jiffies; /* run right away */
+	wk->chan = req->bss->channel;
 
 	/*
 	 * XXX: if still associated need to tell AP that we're going
 	 *	to sleep and then change channel etc.
+	 *	For now switch channel here, later will be handled
+	 *	by submitting this as an off-channel work item.
 	 */
 	sdata->local->oper_channel = req->bss->channel;
 	ieee80211_hw_config(sdata->local, 0);
@@ -2479,7 +2517,7 @@ int ieee80211_mgd_assoc(struct ieee80211_sub_if_data *sdata,
 			struct cfg80211_assoc_request *req)
 {
 	struct ieee80211_if_managed *ifmgd = &sdata->u.mgd;
-	struct ieee80211_mgd_work *wk;
+	struct ieee80211_work *wk;
 	const u8 *ssid;
 	int i, err;
 
@@ -2506,17 +2544,27 @@ int ieee80211_mgd_assoc(struct ieee80211_sub_if_data *sdata,
 	} else
 		wk->ie_len = 0;
 
-	wk->bss = (void *)req->bss;
+	wk->assoc.bss = (void *)req->bss;
+
+	memcpy(wk->assoc.bssid, req->bss->bssid, ETH_ALEN);
+
+	wk->assoc.capability = req->bss->capability;
+	wk->assoc.wmm_used = wk->assoc.bss->wmm_used;
+	wk->assoc.supp_rates = wk->assoc.bss->supp_rates;
+	wk->assoc.supp_rates_len = wk->assoc.bss->supp_rates_len;
+	wk->assoc.ht_information_ie =
+		ieee80211_bss_get_ie(req->bss, WLAN_EID_HT_INFORMATION);
 
 	ssid = ieee80211_bss_get_ie(req->bss, WLAN_EID_SSID);
-	memcpy(wk->ssid, ssid + 2, ssid[1]);
-	wk->ssid_len = ssid[1];
+	memcpy(wk->assoc.ssid, ssid + 2, ssid[1]);
+	wk->assoc.ssid_len = ssid[1];
 
 	if (req->prev_bssid)
-		memcpy(wk->prev_bssid, req->prev_bssid, ETH_ALEN);
+		memcpy(wk->assoc.prev_bssid, req->prev_bssid, ETH_ALEN);
 
-	wk->state = IEEE80211_MGD_STATE_ASSOC;
+	wk->type = IEEE80211_WORK_ASSOC;
 	wk->timeout = jiffies; /* run right away */
+	wk->chan = req->bss->channel;
 
 	if (req->use_mfp) {
 		ifmgd->mfp = IEEE80211_MFP_REQUIRED;
@@ -2549,7 +2597,7 @@ int ieee80211_mgd_deauth(struct ieee80211_sub_if_data *sdata,
 			 void *cookie)
 {
 	struct ieee80211_if_managed *ifmgd = &sdata->u.mgd;
-	struct ieee80211_mgd_work *wk;
+	struct ieee80211_work *wk;
 	const u8 *bssid = req->bss->bssid;
 	bool not_auth_yet = false;
 
@@ -2559,9 +2607,9 @@ int ieee80211_mgd_deauth(struct ieee80211_sub_if_data *sdata,
 		bssid = req->bss->bssid;
 		ieee80211_set_disassoc(sdata);
 	} else list_for_each_entry(wk, &ifmgd->work_list, list) {
-		if (wk->state != IEEE80211_MGD_STATE_PROBE)
+		if (wk->type != IEEE80211_WORK_AUTH_PROBE)
 			continue;
-		if (req->bss != &wk->bss->cbss)
+		if (memcmp(req->bss->bssid, wk->auth.bssid, ETH_ALEN))
 			continue;
 		not_auth_yet = true;
 		list_del(&wk->list);
-- 
cgit v1.2.2


From af6b63741cc4e4dfd575d06beb333b11a8a6e0c0 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes@sipsolutions.net>
Date: Wed, 23 Dec 2009 13:15:35 +0100
Subject: mac80211: generalise work handling

In order to use auth/assoc for different purposes
other than MLME, it needs to be split up. For other
purposes, a generic work handling (potentially on
another channel) will be useful.

To achieve that, this patch moves much of the MLME
work handling out of mlme into a new work API. The
API can currently handle probing a specific AP,
authentication and association. The MLME previously
handled probe/authentication as one step and will
continue to do so, but they are separate in the new
work handling.

Work items are RCU-managed to be able to check for
existence of an item for a specific frame in the RX
path, but they can be re-used which the MLME right
now will do for its combined probe/auth step.

Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/Makefile         |   2 +-
 net/mac80211/debugfs_netdev.c |   2 -
 net/mac80211/ieee80211_i.h    |  59 ++-
 net/mac80211/iface.c          |  11 +-
 net/mac80211/main.c           |   2 +
 net/mac80211/mlme.c           | 956 +++++++-----------------------------------
 net/mac80211/rx.c             |   5 +
 net/mac80211/scan.c           |   8 +-
 net/mac80211/work.c           | 902 +++++++++++++++++++++++++++++++++++++++
 9 files changed, 1112 insertions(+), 835 deletions(-)
 create mode 100644 net/mac80211/work.c

(limited to 'net')

diff --git a/net/mac80211/Makefile b/net/mac80211/Makefile
index 298cfcc1bf8d..5a1f57df7cd6 100644
--- a/net/mac80211/Makefile
+++ b/net/mac80211/Makefile
@@ -9,7 +9,7 @@ mac80211-y := \
 	scan.o \
 	ht.o agg-tx.o agg-rx.o \
 	ibss.o \
-	mlme.o \
+	mlme.o work.o \
 	iface.o \
 	rate.o \
 	michael.o \
diff --git a/net/mac80211/debugfs_netdev.c b/net/mac80211/debugfs_netdev.c
index 355983503885..59f6e3bcbd09 100644
--- a/net/mac80211/debugfs_netdev.c
+++ b/net/mac80211/debugfs_netdev.c
@@ -133,7 +133,6 @@ IEEE80211_IF_FILE(max_ratectrl_rateidx, max_ratectrl_rateidx, DEC);
 /* STA attributes */
 IEEE80211_IF_FILE(bssid, u.mgd.bssid, MAC);
 IEEE80211_IF_FILE(aid, u.mgd.aid, DEC);
-IEEE80211_IF_FILE(capab, u.mgd.capab, HEX);
 
 static int ieee80211_set_smps(struct ieee80211_sub_if_data *sdata,
 			      enum ieee80211_smps_mode smps_mode)
@@ -270,7 +269,6 @@ static void add_sta_files(struct ieee80211_sub_if_data *sdata)
 
 	DEBUGFS_ADD(bssid, sta);
 	DEBUGFS_ADD(aid, sta);
-	DEBUGFS_ADD(capab, sta);
 	DEBUGFS_ADD_MODE(smps, 0600);
 }
 
diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index 0339e909e0c4..97b6076b492e 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -228,41 +228,63 @@ struct mesh_preq_queue {
 };
 
 enum ieee80211_work_type {
-	IEEE80211_WORK_AUTH_PROBE,
+	IEEE80211_WORK_DIRECT_PROBE,
 	IEEE80211_WORK_AUTH,
 	IEEE80211_WORK_ASSOC,
 };
 
+/**
+ * enum work_done_result - indicates what to do after work was done
+ *
+ * @WORK_DONE_DESTROY: This work item is no longer needed, destroy.
+ * @WORK_DONE_REQUEUE: This work item was reset to be reused, and
+ *	should be requeued.
+ */
+enum work_done_result {
+	WORK_DONE_DESTROY,
+	WORK_DONE_REQUEUE,
+};
+
 struct ieee80211_work {
 	struct list_head list;
 
+	struct rcu_head rcu_head;
+
+	struct ieee80211_sub_if_data *sdata;
+
+	enum work_done_result (*done)(struct ieee80211_work *wk,
+				      struct sk_buff *skb);
+
 	struct ieee80211_channel *chan;
 	/* XXX: chan type? -- right now not really needed */
+
 	unsigned long timeout;
 	enum ieee80211_work_type type;
 
+	u8 filter_ta[ETH_ALEN];
+
 	union {
 		struct {
 			int tries;
 			u16 algorithm, transaction;
 			u8 ssid[IEEE80211_MAX_SSID_LEN];
 			u8 ssid_len;
-			u8 bssid[ETH_ALEN];
 			u8 key[WLAN_KEY_LEN_WEP104];
 			u8 key_len, key_idx;
 			bool privacy;
-		} auth;
+		} probe_auth;
 		struct {
 			struct ieee80211_bss *bss;
 			const u8 *supp_rates;
 			const u8 *ht_information_ie;
+			enum ieee80211_smps_mode smps;
 			int tries;
 			u16 capability;
-			u8 bssid[ETH_ALEN], prev_bssid[ETH_ALEN];
+			u8 prev_bssid[ETH_ALEN];
 			u8 ssid[IEEE80211_MAX_SSID_LEN];
 			u8 ssid_len;
 			u8 supp_rates_len;
-			bool wmm_used;
+			bool wmm_used, use_11n;
 		} assoc;
 	};
 
@@ -276,17 +298,11 @@ enum ieee80211_sta_flags {
 	IEEE80211_STA_BEACON_POLL	= BIT(0),
 	IEEE80211_STA_CONNECTION_POLL	= BIT(1),
 	IEEE80211_STA_CONTROL_PORT	= BIT(2),
-	IEEE80211_STA_WMM_ENABLED	= BIT(3),
 	IEEE80211_STA_DISABLE_11N	= BIT(4),
 	IEEE80211_STA_CSA_RECEIVED	= BIT(5),
 	IEEE80211_STA_MFP_ENABLED	= BIT(6),
 };
 
-/* flags for MLME request */
-enum ieee80211_sta_request {
-	IEEE80211_STA_REQ_SCAN,
-};
-
 struct ieee80211_if_managed {
 	struct timer_list timer;
 	struct timer_list conn_mon_timer;
@@ -302,12 +318,10 @@ struct ieee80211_if_managed {
 
 	struct mutex mtx;
 	struct ieee80211_bss *associated;
-	struct list_head work_list;
 
 	u8 bssid[ETH_ALEN];
 
 	u16 aid;
-	u16 capab;
 
 	struct sk_buff_head skb_queue;
 
@@ -316,8 +330,6 @@ struct ieee80211_if_managed {
 	enum ieee80211_smps_mode req_smps, /* requested smps mode */
 				 ap_smps; /* smps mode AP thinks we're in */
 
-	unsigned long request;
-
 	unsigned int flags;
 
 	u32 beacon_crc;
@@ -583,6 +595,15 @@ struct ieee80211_local {
 
 	const struct ieee80211_ops *ops;
 
+	/*
+	 * work stuff, potentially off-channel (in the future)
+	 */
+	struct mutex work_mtx;
+	struct list_head work_list;
+	struct timer_list work_timer;
+	struct work_struct work_work;
+	struct sk_buff_head work_skb_queue;
+
 	/*
 	 * private workqueue to mac80211. mac80211 makes this accessible
 	 * via ieee80211_queue_work()
@@ -1127,6 +1148,14 @@ int __ieee80211_request_smps(struct ieee80211_sub_if_data *sdata,
 void ieee80211_recalc_smps(struct ieee80211_local *local,
 			   struct ieee80211_sub_if_data *forsdata);
 
+/* internal work items */
+void ieee80211_work_init(struct ieee80211_local *local);
+void ieee80211_add_work(struct ieee80211_work *wk);
+void free_work(struct ieee80211_work *wk);
+void ieee80211_work_purge(struct ieee80211_sub_if_data *sdata);
+ieee80211_rx_result ieee80211_work_rx_mgmt(struct ieee80211_sub_if_data *sdata,
+					   struct sk_buff *skb);
+
 #ifdef CONFIG_MAC80211_NOINLINE
 #define debug_noinline noinline
 #else
diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c
index 389dc8d880f3..7d410f15281a 100644
--- a/net/mac80211/iface.c
+++ b/net/mac80211/iface.c
@@ -361,6 +361,11 @@ static int ieee80211_stop(struct net_device *dev)
 	 */
 	netif_stop_queue(dev);
 
+	/*
+	 * Purge work for this interface.
+	 */
+	ieee80211_work_purge(sdata);
+
 	/*
 	 * Now delete all active aggregation sessions.
 	 */
@@ -928,6 +933,9 @@ u32 __ieee80211_recalc_idle(struct ieee80211_local *local)
 	struct ieee80211_sub_if_data *sdata;
 	int count = 0;
 
+	if (!list_empty(&local->work_list))
+		return ieee80211_idle_off(local, "working");
+
 	if (local->scanning)
 		return ieee80211_idle_off(local, "scanning");
 
@@ -936,8 +944,7 @@ u32 __ieee80211_recalc_idle(struct ieee80211_local *local)
 			continue;
 		/* do not count disabled managed interfaces */
 		if (sdata->vif.type == NL80211_IFTYPE_STATION &&
-		    !sdata->u.mgd.associated &&
-		    list_empty(&sdata->u.mgd.work_list))
+		    !sdata->u.mgd.associated)
 			continue;
 		/* do not count unused IBSS interfaces */
 		if (sdata->vif.type == NL80211_IFTYPE_ADHOC &&
diff --git a/net/mac80211/main.c b/net/mac80211/main.c
index e93bc558d785..d35023ce7fa1 100644
--- a/net/mac80211/main.c
+++ b/net/mac80211/main.c
@@ -395,6 +395,8 @@ struct ieee80211_hw *ieee80211_alloc_hw(size_t priv_data_len,
 
 	INIT_DELAYED_WORK(&local->scan_work, ieee80211_scan_work);
 
+	ieee80211_work_init(local);
+
 	INIT_WORK(&local->restart_work, ieee80211_restart_work);
 
 	INIT_WORK(&local->reconfig_filter, ieee80211_reconfig_filter);
diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index c65225f29bb6..7c1f91bcc834 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -77,12 +77,6 @@ enum rx_mgmt_action {
 
 	/* caller must tell cfg80211 about internal error */
 	RX_MGMT_CFG80211_ASSOC_ERROR,
-
-	/* caller must call cfg80211_auth_timeout() & free work */
-	RX_MGMT_CFG80211_AUTH_TO,
-
-	/* caller must call cfg80211_assoc_timeout() & free work */
-	RX_MGMT_CFG80211_ASSOC_TO,
 };
 
 /* utils */
@@ -125,27 +119,6 @@ static int ecw2cw(int ecw)
 	return (1 << ecw) - 1;
 }
 
-static int ieee80211_compatible_rates(const u8 *supp_rates, int supp_rates_len,
-				      struct ieee80211_supported_band *sband,
-				      u32 *rates)
-{
-	int i, j, count;
-	*rates = 0;
-	count = 0;
-	for (i = 0; i < supp_rates_len; i++) {
-		int rate = (supp_rates[i] & 0x7F) * 5;
-
-		for (j = 0; j < sband->n_bitrates; j++)
-			if (sband->bitrates[j].bitrate == rate) {
-				*rates |= BIT(j);
-				count++;
-				break;
-			}
-	}
-
-	return count;
-}
-
 /*
  * ieee80211_enable_ht should be called only after the operating band
  * has been determined as ht configuration depends on the hw's
@@ -231,266 +204,6 @@ static u32 ieee80211_enable_ht(struct ieee80211_sub_if_data *sdata,
 
 /* frame sending functions */
 
-static void ieee80211_send_assoc(struct ieee80211_sub_if_data *sdata,
-				 struct ieee80211_work *wk)
-{
-	struct ieee80211_if_managed *ifmgd = &sdata->u.mgd;
-	struct ieee80211_local *local = sdata->local;
-	struct sk_buff *skb;
-	struct ieee80211_mgmt *mgmt;
-	u8 *pos;
-	const u8 *ies, *ht_ie;
-	int i, len, count, rates_len, supp_rates_len;
-	u16 capab;
-	int wmm = 0;
-	struct ieee80211_supported_band *sband;
-	u32 rates = 0;
-
-	skb = dev_alloc_skb(local->hw.extra_tx_headroom +
-			    sizeof(*mgmt) + 200 + wk->ie_len +
-			    wk->assoc.ssid_len);
-	if (!skb) {
-		printk(KERN_DEBUG "%s: failed to allocate buffer for assoc "
-		       "frame\n", sdata->name);
-		return;
-	}
-	skb_reserve(skb, local->hw.extra_tx_headroom);
-
-	sband = local->hw.wiphy->bands[local->hw.conf.channel->band];
-
-	capab = ifmgd->capab;
-
-	if (local->hw.conf.channel->band == IEEE80211_BAND_2GHZ) {
-		if (!(local->hw.flags & IEEE80211_HW_2GHZ_SHORT_SLOT_INCAPABLE))
-			capab |= WLAN_CAPABILITY_SHORT_SLOT_TIME;
-		if (!(local->hw.flags & IEEE80211_HW_2GHZ_SHORT_PREAMBLE_INCAPABLE))
-			capab |= WLAN_CAPABILITY_SHORT_PREAMBLE;
-	}
-
-	if (wk->assoc.capability & WLAN_CAPABILITY_PRIVACY)
-		capab |= WLAN_CAPABILITY_PRIVACY;
-	if (wk->assoc.wmm_used)
-		wmm = 1;
-
-	/* get all rates supported by the device and the AP as
-	 * some APs don't like getting a superset of their rates
-	 * in the association request (e.g. D-Link DAP 1353 in
-	 * b-only mode) */
-	rates_len = ieee80211_compatible_rates(wk->assoc.supp_rates,
-					       wk->assoc.supp_rates_len,
-					       sband, &rates);
-
-	if ((wk->assoc.capability & WLAN_CAPABILITY_SPECTRUM_MGMT) &&
-	    (local->hw.flags & IEEE80211_HW_SPECTRUM_MGMT))
-		capab |= WLAN_CAPABILITY_SPECTRUM_MGMT;
-
-	mgmt = (struct ieee80211_mgmt *) skb_put(skb, 24);
-	memset(mgmt, 0, 24);
-	memcpy(mgmt->da, wk->assoc.bssid, ETH_ALEN);
-	memcpy(mgmt->sa, sdata->vif.addr, ETH_ALEN);
-	memcpy(mgmt->bssid, wk->assoc.bssid, ETH_ALEN);
-
-	if (!is_zero_ether_addr(wk->assoc.prev_bssid)) {
-		skb_put(skb, 10);
-		mgmt->frame_control = cpu_to_le16(IEEE80211_FTYPE_MGMT |
-						  IEEE80211_STYPE_REASSOC_REQ);
-		mgmt->u.reassoc_req.capab_info = cpu_to_le16(capab);
-		mgmt->u.reassoc_req.listen_interval =
-				cpu_to_le16(local->hw.conf.listen_interval);
-		memcpy(mgmt->u.reassoc_req.current_ap, wk->assoc.prev_bssid,
-		       ETH_ALEN);
-	} else {
-		skb_put(skb, 4);
-		mgmt->frame_control = cpu_to_le16(IEEE80211_FTYPE_MGMT |
-						  IEEE80211_STYPE_ASSOC_REQ);
-		mgmt->u.assoc_req.capab_info = cpu_to_le16(capab);
-		mgmt->u.assoc_req.listen_interval =
-				cpu_to_le16(local->hw.conf.listen_interval);
-	}
-
-	/* SSID */
-	ies = pos = skb_put(skb, 2 + wk->assoc.ssid_len);
-	*pos++ = WLAN_EID_SSID;
-	*pos++ = wk->assoc.ssid_len;
-	memcpy(pos, wk->assoc.ssid, wk->assoc.ssid_len);
-
-	/* add all rates which were marked to be used above */
-	supp_rates_len = rates_len;
-	if (supp_rates_len > 8)
-		supp_rates_len = 8;
-
-	len = sband->n_bitrates;
-	pos = skb_put(skb, supp_rates_len + 2);
-	*pos++ = WLAN_EID_SUPP_RATES;
-	*pos++ = supp_rates_len;
-
-	count = 0;
-	for (i = 0; i < sband->n_bitrates; i++) {
-		if (BIT(i) & rates) {
-			int rate = sband->bitrates[i].bitrate;
-			*pos++ = (u8) (rate / 5);
-			if (++count == 8)
-				break;
-		}
-	}
-
-	if (rates_len > count) {
-		pos = skb_put(skb, rates_len - count + 2);
-		*pos++ = WLAN_EID_EXT_SUPP_RATES;
-		*pos++ = rates_len - count;
-
-		for (i++; i < sband->n_bitrates; i++) {
-			if (BIT(i) & rates) {
-				int rate = sband->bitrates[i].bitrate;
-				*pos++ = (u8) (rate / 5);
-			}
-		}
-	}
-
-	if (capab & WLAN_CAPABILITY_SPECTRUM_MGMT) {
-		/* 1. power capabilities */
-		pos = skb_put(skb, 4);
-		*pos++ = WLAN_EID_PWR_CAPABILITY;
-		*pos++ = 2;
-		*pos++ = 0; /* min tx power */
-		*pos++ = local->hw.conf.channel->max_power; /* max tx power */
-
-		/* 2. supported channels */
-		/* TODO: get this in reg domain format */
-		pos = skb_put(skb, 2 * sband->n_channels + 2);
-		*pos++ = WLAN_EID_SUPPORTED_CHANNELS;
-		*pos++ = 2 * sband->n_channels;
-		for (i = 0; i < sband->n_channels; i++) {
-			*pos++ = ieee80211_frequency_to_channel(
-					sband->channels[i].center_freq);
-			*pos++ = 1; /* one channel in the subband*/
-		}
-	}
-
-	if (wk->ie_len && wk->ie) {
-		pos = skb_put(skb, wk->ie_len);
-		memcpy(pos, wk->ie, wk->ie_len);
-	}
-
-	if (wmm && (ifmgd->flags & IEEE80211_STA_WMM_ENABLED)) {
-		pos = skb_put(skb, 9);
-		*pos++ = WLAN_EID_VENDOR_SPECIFIC;
-		*pos++ = 7; /* len */
-		*pos++ = 0x00; /* Microsoft OUI 00:50:F2 */
-		*pos++ = 0x50;
-		*pos++ = 0xf2;
-		*pos++ = 2; /* WME */
-		*pos++ = 0; /* WME info */
-		*pos++ = 1; /* WME ver */
-		*pos++ = 0;
-	}
-
-	/* wmm support is a must to HT */
-	/*
-	 * IEEE802.11n does not allow TKIP/WEP as pairwise
-	 * ciphers in HT mode. We still associate in non-ht
-	 * mode (11a/b/g) if any one of these ciphers is
-	 * configured as pairwise.
-	 */
-	if (wmm && (ifmgd->flags & IEEE80211_STA_WMM_ENABLED) &&
-	    sband->ht_cap.ht_supported &&
-	    (ht_ie = wk->assoc.ht_information_ie) &&
-	    ht_ie[1] >= sizeof(struct ieee80211_ht_info) &&
-	    (!(ifmgd->flags & IEEE80211_STA_DISABLE_11N))) {
-		struct ieee80211_ht_info *ht_info =
-			(struct ieee80211_ht_info *)(ht_ie + 2);
-		u16 cap = sband->ht_cap.cap;
-		__le16 tmp;
-		u32 flags = local->hw.conf.channel->flags;
-
-		/* determine capability flags */
-
-		if (ieee80211_disable_40mhz_24ghz &&
-		    sband->band == IEEE80211_BAND_2GHZ) {
-			cap &= ~IEEE80211_HT_CAP_SUP_WIDTH_20_40;
-			cap &= ~IEEE80211_HT_CAP_SGI_40;
-		}
-
-		switch (ht_info->ht_param & IEEE80211_HT_PARAM_CHA_SEC_OFFSET) {
-		case IEEE80211_HT_PARAM_CHA_SEC_ABOVE:
-			if (flags & IEEE80211_CHAN_NO_HT40PLUS) {
-				cap &= ~IEEE80211_HT_CAP_SUP_WIDTH_20_40;
-				cap &= ~IEEE80211_HT_CAP_SGI_40;
-			}
-			break;
-		case IEEE80211_HT_PARAM_CHA_SEC_BELOW:
-			if (flags & IEEE80211_CHAN_NO_HT40MINUS) {
-				cap &= ~IEEE80211_HT_CAP_SUP_WIDTH_20_40;
-				cap &= ~IEEE80211_HT_CAP_SGI_40;
-			}
-			break;
-		}
-
-		/* set SM PS mode properly */
-		cap &= ~IEEE80211_HT_CAP_SM_PS;
-		/* new association always uses requested smps mode */
-		if (ifmgd->req_smps == IEEE80211_SMPS_AUTOMATIC) {
-			if (ifmgd->powersave)
-				ifmgd->ap_smps = IEEE80211_SMPS_DYNAMIC;
-			else
-				ifmgd->ap_smps = IEEE80211_SMPS_OFF;
-		} else
-			ifmgd->ap_smps = ifmgd->req_smps;
-
-		switch (ifmgd->ap_smps) {
-		case IEEE80211_SMPS_AUTOMATIC:
-		case IEEE80211_SMPS_NUM_MODES:
-			WARN_ON(1);
-		case IEEE80211_SMPS_OFF:
-			cap |= WLAN_HT_CAP_SM_PS_DISABLED <<
-				IEEE80211_HT_CAP_SM_PS_SHIFT;
-			break;
-		case IEEE80211_SMPS_STATIC:
-			cap |= WLAN_HT_CAP_SM_PS_STATIC <<
-				IEEE80211_HT_CAP_SM_PS_SHIFT;
-			break;
-		case IEEE80211_SMPS_DYNAMIC:
-			cap |= WLAN_HT_CAP_SM_PS_DYNAMIC <<
-				IEEE80211_HT_CAP_SM_PS_SHIFT;
-			break;
-		}
-
-		/* reserve and fill IE */
-
-		pos = skb_put(skb, sizeof(struct ieee80211_ht_cap) + 2);
-		*pos++ = WLAN_EID_HT_CAPABILITY;
-		*pos++ = sizeof(struct ieee80211_ht_cap);
-		memset(pos, 0, sizeof(struct ieee80211_ht_cap));
-
-		/* capability flags */
-		tmp = cpu_to_le16(cap);
-		memcpy(pos, &tmp, sizeof(u16));
-		pos += sizeof(u16);
-
-		/* AMPDU parameters */
-		*pos++ = sband->ht_cap.ampdu_factor |
-			 (sband->ht_cap.ampdu_density <<
-				IEEE80211_HT_AMPDU_PARM_DENSITY_SHIFT);
-
-		/* MCS set */
-		memcpy(pos, &sband->ht_cap.mcs, sizeof(sband->ht_cap.mcs));
-		pos += sizeof(sband->ht_cap.mcs);
-
-		/* extended capabilities */
-		pos += sizeof(__le16);
-
-		/* BF capabilities */
-		pos += sizeof(__le32);
-
-		/* antenna selection */
-		pos += sizeof(u8);
-	}
-
-	IEEE80211_SKB_CB(skb)->flags |= IEEE80211_TX_INTFL_DONT_ENCRYPT;
-	ieee80211_tx_skb(sdata, skb);
-}
-
-
 static void ieee80211_send_deauth_disassoc(struct ieee80211_sub_if_data *sdata,
 					   const u8 *bssid, u16 stype, u16 reason,
 					   void *cookie)
@@ -751,6 +464,11 @@ void ieee80211_recalc_ps(struct ieee80211_local *local, s32 latency)
 		return;
 	}
 
+	if (!list_empty(&local->work_list)) {
+		local->ps_sdata = NULL;
+		goto change;
+	}
+
 	list_for_each_entry(sdata, &local->interfaces, list) {
 		if (!ieee80211_sdata_running(sdata))
 			continue;
@@ -761,7 +479,7 @@ void ieee80211_recalc_ps(struct ieee80211_local *local, s32 latency)
 	}
 
 	if (count == 1 && found->u.mgd.powersave &&
-	    found->u.mgd.associated && list_empty(&found->u.mgd.work_list) &&
+	    found->u.mgd.associated &&
 	    !(found->u.mgd.flags & (IEEE80211_STA_BEACON_POLL |
 				    IEEE80211_STA_CONNECTION_POLL))) {
 		s32 beaconint_us;
@@ -789,6 +507,7 @@ void ieee80211_recalc_ps(struct ieee80211_local *local, s32 latency)
 		local->ps_sdata = NULL;
 	}
 
+ change:
 	ieee80211_change_ps(local);
 }
 
@@ -848,7 +567,7 @@ static void ieee80211_sta_wmm_params(struct ieee80211_local *local,
 	int count;
 	u8 *pos;
 
-	if (!(ifmgd->flags & IEEE80211_STA_WMM_ENABLED))
+	if (local->hw.queues < 4)
 		return;
 
 	if (!wmm_param)
@@ -1005,110 +724,6 @@ static void ieee80211_set_associated(struct ieee80211_sub_if_data *sdata,
 	netif_carrier_on(sdata->dev);
 }
 
-static void ieee80211_remove_auth_bss(struct ieee80211_local *local,
-				      struct ieee80211_work *wk)
-{
-	struct cfg80211_bss *cbss;
-	u16 capa_val = WLAN_CAPABILITY_ESS;
-
-	if (wk->auth.privacy)
-		capa_val |= WLAN_CAPABILITY_PRIVACY;
-
-	cbss = cfg80211_get_bss(local->hw.wiphy, wk->chan, wk->auth.bssid,
-				wk->auth.ssid, wk->auth.ssid_len,
-				WLAN_CAPABILITY_ESS | WLAN_CAPABILITY_PRIVACY,
-				capa_val);
-	if (!cbss)
-		return;
-
-	cfg80211_unlink_bss(local->hw.wiphy, cbss);
-	cfg80211_put_bss(cbss);
-}
-
-static enum rx_mgmt_action __must_check
-ieee80211_direct_probe(struct ieee80211_sub_if_data *sdata,
-		       struct ieee80211_work *wk)
-{
-	struct ieee80211_if_managed *ifmgd = &sdata->u.mgd;
-	struct ieee80211_local *local = sdata->local;
-
-	wk->auth.tries++;
-	if (wk->auth.tries > IEEE80211_AUTH_MAX_TRIES) {
-		printk(KERN_DEBUG "%s: direct probe to AP %pM timed out\n",
-		       sdata->name, wk->auth.bssid);
-
-		/*
-		 * Most likely AP is not in the range so remove the
-		 * bss struct for that AP.
-		 */
-		ieee80211_remove_auth_bss(local, wk);
-
-		/*
-		 * We might have a pending scan which had no chance to run yet
-		 * due to work needing to be done. Hence, queue the STAs work
-		 * again for that.
-		 */
-		ieee80211_queue_work(&local->hw, &ifmgd->work);
-		return RX_MGMT_CFG80211_AUTH_TO;
-	}
-
-	printk(KERN_DEBUG "%s: direct probe to AP %pM (try %d)\n",
-			sdata->name, wk->auth.bssid, wk->auth.tries);
-
-	/*
-	 * Direct probe is sent to broadcast address as some APs
-	 * will not answer to direct packet in unassociated state.
-	 */
-	ieee80211_send_probe_req(sdata, NULL, wk->auth.ssid, wk->auth.ssid_len,
-				 NULL, 0);
-
-	wk->timeout = jiffies + IEEE80211_AUTH_TIMEOUT;
-	run_again(ifmgd, wk->timeout);
-
-	return RX_MGMT_NONE;
-}
-
-
-static enum rx_mgmt_action __must_check
-ieee80211_authenticate(struct ieee80211_sub_if_data *sdata,
-		       struct ieee80211_work *wk)
-{
-	struct ieee80211_if_managed *ifmgd = &sdata->u.mgd;
-	struct ieee80211_local *local = sdata->local;
-
-	wk->auth.tries++;
-	if (wk->auth.tries > IEEE80211_AUTH_MAX_TRIES) {
-		printk(KERN_DEBUG "%s: authentication with AP %pM"
-		       " timed out\n", sdata->name, wk->auth.bssid);
-
-		/*
-		 * Most likely AP is not in the range so remove the
-		 * bss struct for that AP.
-		 */
-		ieee80211_remove_auth_bss(local, wk);
-
-		/*
-		 * We might have a pending scan which had no chance to run yet
-		 * due to work needing to be done. Hence, queue the STAs work
-		 * again for that.
-		 */
-		ieee80211_queue_work(&local->hw, &ifmgd->work);
-		return RX_MGMT_CFG80211_AUTH_TO;
-	}
-
-	printk(KERN_DEBUG "%s: authenticate with AP %pM (try %d)\n",
-	       sdata->name, wk->auth.bssid, wk->auth.tries);
-
-	ieee80211_send_auth(sdata, 1, wk->auth.algorithm, wk->ie, wk->ie_len,
-			    wk->auth.bssid, NULL, 0, 0);
-	wk->auth.transaction = 2;
-
-	wk->timeout = jiffies + IEEE80211_AUTH_TIMEOUT;
-	run_again(ifmgd, wk->timeout);
-
-	return RX_MGMT_NONE;
-}
-
 static void ieee80211_set_disassoc(struct ieee80211_sub_if_data *sdata)
 {
 	struct ieee80211_if_managed *ifmgd = &sdata->u.mgd;
@@ -1195,44 +810,6 @@ static void ieee80211_set_disassoc(struct ieee80211_sub_if_data *sdata)
 	sta_info_destroy(sta);
 }
 
-static enum rx_mgmt_action __must_check
-ieee80211_associate(struct ieee80211_sub_if_data *sdata,
-		    struct ieee80211_work *wk)
-{
-	struct ieee80211_if_managed *ifmgd = &sdata->u.mgd;
-	struct ieee80211_local *local = sdata->local;
-
-	wk->assoc.tries++;
-	if (wk->assoc.tries > IEEE80211_ASSOC_MAX_TRIES) {
-		printk(KERN_DEBUG "%s: association with AP %pM"
-		       " timed out\n",
-		       sdata->name, wk->assoc.bssid);
-
-		/*
-		 * Most likely AP is not in the range so remove the
-		 * bss struct for that AP.
-		 */
-		cfg80211_unlink_bss(local->hw.wiphy, &wk->assoc.bss->cbss);
-
-		/*
-		 * We might have a pending scan which had no chance to run yet
-		 * due to work needing to be done. Hence, queue the STAs work
-		 * again for that.
-		 */
-		ieee80211_queue_work(&local->hw, &ifmgd->work);
-		return RX_MGMT_CFG80211_ASSOC_TO;
-	}
-
-	printk(KERN_DEBUG "%s: associate with AP %pM (try %d)\n",
-	       sdata->name, wk->assoc.bssid, wk->assoc.tries);
-	ieee80211_send_assoc(sdata, wk);
-
-	wk->timeout = jiffies + IEEE80211_ASSOC_TIMEOUT;
-	run_again(ifmgd, wk->timeout);
-
-	return RX_MGMT_NONE;
-}
-
 void ieee80211_sta_rx_notify(struct ieee80211_sub_if_data *sdata,
 			     struct ieee80211_hdr *hdr)
 {
@@ -1338,86 +915,6 @@ void ieee80211_beacon_loss(struct ieee80211_vif *vif)
 }
 EXPORT_SYMBOL(ieee80211_beacon_loss);
 
-static void ieee80211_auth_completed(struct ieee80211_sub_if_data *sdata,
-				     struct ieee80211_work *wk)
-{
-	list_del(&wk->list);
-	kfree(wk);
-	printk(KERN_DEBUG "%s: authenticated\n", sdata->name);
-}
-
-
-static void ieee80211_auth_challenge(struct ieee80211_sub_if_data *sdata,
-				     struct ieee80211_work *wk,
-				     struct ieee80211_mgmt *mgmt,
-				     size_t len)
-{
-	u8 *pos;
-	struct ieee802_11_elems elems;
-
-	pos = mgmt->u.auth.variable;
-	ieee802_11_parse_elems(pos, len - (pos - (u8 *) mgmt), &elems);
-	if (!elems.challenge)
-		return;
-	ieee80211_send_auth(sdata, 3, wk->auth.algorithm,
-			    elems.challenge - 2, elems.challenge_len + 2,
-			    wk->auth.bssid, wk->auth.key, wk->auth.key_len,
-			    wk->auth.key_idx);
-	wk->auth.transaction = 4;
-}
-
-static enum rx_mgmt_action __must_check
-ieee80211_rx_mgmt_auth(struct ieee80211_sub_if_data *sdata,
-		       struct ieee80211_work *wk,
-		       struct ieee80211_mgmt *mgmt, size_t len)
-{
-	u16 auth_alg, auth_transaction, status_code;
-
-	if (wk->type != IEEE80211_WORK_AUTH)
-		return RX_MGMT_NONE;
-
-	if (len < 24 + 6)
-		return RX_MGMT_NONE;
-
-	if (memcmp(wk->auth.bssid, mgmt->sa, ETH_ALEN) != 0)
-		return RX_MGMT_NONE;
-
-	if (memcmp(wk->auth.bssid, mgmt->bssid, ETH_ALEN) != 0)
-		return RX_MGMT_NONE;
-
-	auth_alg = le16_to_cpu(mgmt->u.auth.auth_alg);
-	auth_transaction = le16_to_cpu(mgmt->u.auth.auth_transaction);
-	status_code = le16_to_cpu(mgmt->u.auth.status_code);
-
-	if (auth_alg != wk->auth.algorithm ||
-	    auth_transaction != wk->auth.transaction)
-		return RX_MGMT_NONE;
-
-	if (status_code != WLAN_STATUS_SUCCESS) {
-		list_del(&wk->list);
-		kfree(wk);
-		return RX_MGMT_CFG80211_AUTH;
-	}
-
-	switch (wk->auth.algorithm) {
-	case WLAN_AUTH_OPEN:
-	case WLAN_AUTH_LEAP:
-	case WLAN_AUTH_FT:
-		ieee80211_auth_completed(sdata, wk);
-		return RX_MGMT_CFG80211_AUTH;
-	case WLAN_AUTH_SHARED_KEY:
-		if (wk->auth.transaction == 4) {
-			ieee80211_auth_completed(sdata, wk);
-			return RX_MGMT_CFG80211_AUTH;
-		} else
-			ieee80211_auth_challenge(sdata, wk, mgmt, len);
-		break;
-	}
-
-	return RX_MGMT_NONE;
-}
-
-
 static enum rx_mgmt_action __must_check
 ieee80211_rx_mgmt_deauth(struct ieee80211_sub_if_data *sdata,
 			 struct ieee80211_mgmt *mgmt, size_t len)
@@ -1474,98 +971,51 @@ ieee80211_rx_mgmt_disassoc(struct ieee80211_sub_if_data *sdata,
 }
 
 
-static enum rx_mgmt_action __must_check
-ieee80211_rx_mgmt_assoc_resp(struct ieee80211_sub_if_data *sdata,
-			     struct ieee80211_work *wk,
-			     struct ieee80211_mgmt *mgmt, size_t len,
-			     bool reassoc)
+static bool ieee80211_assoc_success(struct ieee80211_work *wk,
+				    struct ieee80211_mgmt *mgmt, size_t len)
 {
+	struct ieee80211_sub_if_data *sdata = wk->sdata;
 	struct ieee80211_if_managed *ifmgd = &sdata->u.mgd;
 	struct ieee80211_local *local = sdata->local;
 	struct ieee80211_supported_band *sband;
 	struct sta_info *sta;
 	struct ieee80211_bss *bss = wk->assoc.bss;
+	u8 *pos;
 	u32 rates, basic_rates;
-	u16 capab_info, status_code, aid;
+	u16 capab_info, aid;
 	struct ieee802_11_elems elems;
 	struct ieee80211_bss_conf *bss_conf = &sdata->vif.bss_conf;
-	u8 *pos;
 	u32 changed = 0;
 	int i, j, err;
 	bool have_higher_than_11mbit = false;
 	u16 ap_ht_cap_flags;
 
-	/*
-	 * AssocResp and ReassocResp have identical structure, so process both
-	 * of them in this function.
-	 */
-
-	if (len < 24 + 6)
-		return RX_MGMT_NONE;
+	/* AssocResp and ReassocResp have identical structure */
 
-	if (memcmp(bss->cbss.bssid, mgmt->sa, ETH_ALEN) != 0)
-		return RX_MGMT_NONE;
-
-	capab_info = le16_to_cpu(mgmt->u.assoc_resp.capab_info);
-	status_code = le16_to_cpu(mgmt->u.assoc_resp.status_code);
 	aid = le16_to_cpu(mgmt->u.assoc_resp.aid);
-
-	printk(KERN_DEBUG "%s: RX %sssocResp from %pM (capab=0x%x "
-	       "status=%d aid=%d)\n",
-	       sdata->name, reassoc ? "Rea" : "A", mgmt->sa,
-	       capab_info, status_code, (u16)(aid & ~(BIT(15) | BIT(14))));
-
-	pos = mgmt->u.assoc_resp.variable;
-	ieee802_11_parse_elems(pos, len - (pos - (u8 *) mgmt), &elems);
-
-	if (status_code == WLAN_STATUS_ASSOC_REJECTED_TEMPORARILY &&
-	    elems.timeout_int && elems.timeout_int_len == 5 &&
-	    elems.timeout_int[0] == WLAN_TIMEOUT_ASSOC_COMEBACK) {
-		u32 tu, ms;
-		tu = get_unaligned_le32(elems.timeout_int + 1);
-		ms = tu * 1024 / 1000;
-		printk(KERN_DEBUG "%s: AP rejected association temporarily; "
-		       "comeback duration %u TU (%u ms)\n",
-		       sdata->name, tu, ms);
-		wk->timeout = jiffies + msecs_to_jiffies(ms);
-		if (ms > IEEE80211_ASSOC_TIMEOUT)
-			run_again(ifmgd, jiffies + msecs_to_jiffies(ms));
-		return RX_MGMT_NONE;
-	}
-
-	/*
-	 * Here the association was either successful or not.
-	 */
-
-	/* delete work item -- must be before set_associated for PS */
-	list_del(&wk->list);
-	kfree(wk);
-
-	if (status_code != WLAN_STATUS_SUCCESS) {
-		printk(KERN_DEBUG "%s: AP denied association (code=%d)\n",
-		       sdata->name, status_code);
-		return RX_MGMT_CFG80211_ASSOC;
-	}
+	capab_info = le16_to_cpu(mgmt->u.assoc_resp.capab_info);
 
 	if ((aid & (BIT(15) | BIT(14))) != (BIT(15) | BIT(14)))
 		printk(KERN_DEBUG "%s: invalid aid value %d; bits 15:14 not "
 		       "set\n", sdata->name, aid);
 	aid &= ~(BIT(15) | BIT(14));
 
+	pos = mgmt->u.assoc_resp.variable;
+	ieee802_11_parse_elems(pos, len - (pos - (u8 *) mgmt), &elems);
+
 	if (!elems.supp_rates) {
 		printk(KERN_DEBUG "%s: no SuppRates element in AssocResp\n",
 		       sdata->name);
-		return RX_MGMT_NONE;
+		return false;
 	}
 
-	printk(KERN_DEBUG "%s: associated\n", sdata->name);
 	ifmgd->aid = aid;
 
 	sta = sta_info_alloc(sdata, bss->cbss.bssid, GFP_KERNEL);
 	if (!sta) {
 		printk(KERN_DEBUG "%s: failed to alloc STA entry for"
 		       " the AP\n", sdata->name);
-		return RX_MGMT_CFG80211_ASSOC_ERROR;
+		return false;
 	}
 
 	set_sta_flags(sta, WLAN_STA_AUTH | WLAN_STA_ASSOC |
@@ -1650,7 +1100,7 @@ ieee80211_rx_mgmt_assoc_resp(struct ieee80211_sub_if_data *sdata,
 		ieee80211_set_wmm_default(sdata);
 
 	if (elems.ht_info_elem && elems.wmm_param &&
-	    (ifmgd->flags & IEEE80211_STA_WMM_ENABLED) &&
+	    (sdata->local->hw.queues >= 4) &&
 	    !(ifmgd->flags & IEEE80211_STA_DISABLE_11N))
 		changed |= ieee80211_enable_ht(sdata, elems.ht_info_elem,
 					       bss->cbss.bssid,
@@ -1669,7 +1119,7 @@ ieee80211_rx_mgmt_assoc_resp(struct ieee80211_sub_if_data *sdata,
 	ieee80211_sta_rx_notify(sdata, (struct ieee80211_hdr *)mgmt);
 	mod_beacon_timer(sdata);
 
-	return RX_MGMT_CFG80211_ASSOC;
+	return true;
 }
 
 
@@ -1714,12 +1164,12 @@ static void ieee80211_rx_bss_info(struct ieee80211_sub_if_data *sdata,
 
 
 static void ieee80211_rx_mgmt_probe_resp(struct ieee80211_sub_if_data *sdata,
-					 struct ieee80211_work *wk,
-					 struct ieee80211_mgmt *mgmt, size_t len,
-					 struct ieee80211_rx_status *rx_status)
+					 struct sk_buff *skb)
 {
+	struct ieee80211_mgmt *mgmt = (void *)skb->data;
 	struct ieee80211_if_managed *ifmgd;
-	size_t baselen;
+	struct ieee80211_rx_status *rx_status = (void *) skb->cb;
+	size_t baselen, len = skb->len;
 	struct ieee802_11_elems elems;
 
 	ifmgd = &sdata->u.mgd;
@@ -1738,15 +1188,6 @@ static void ieee80211_rx_mgmt_probe_resp(struct ieee80211_sub_if_data *sdata,
 
 	ieee80211_rx_bss_info(sdata, mgmt, len, rx_status, &elems, false);
 
-	/* direct probe may be part of the association flow */
-	if (wk && wk->type == IEEE80211_WORK_AUTH_PROBE) {
-		printk(KERN_DEBUG "%s: direct probe responded\n",
-		       sdata->name);
-		wk->auth.tries = 0;
-		wk->type = IEEE80211_WORK_AUTH;
-		WARN_ON(ieee80211_authenticate(sdata, wk) != RX_MGMT_NONE);
-	}
-
 	if (ifmgd->associated &&
 	    memcmp(mgmt->bssid, ifmgd->associated->cbss.bssid, ETH_ALEN) == 0 &&
 	    ifmgd->flags & (IEEE80211_STA_BEACON_POLL |
@@ -1960,9 +1401,6 @@ ieee80211_rx_result ieee80211_sta_rx_mgmt(struct ieee80211_sub_if_data *sdata,
 	switch (fc & IEEE80211_FCTL_STYPE) {
 	case IEEE80211_STYPE_PROBE_RESP:
 	case IEEE80211_STYPE_BEACON:
-	case IEEE80211_STYPE_AUTH:
-	case IEEE80211_STYPE_ASSOC_RESP:
-	case IEEE80211_STYPE_REASSOC_RESP:
 	case IEEE80211_STYPE_DEAUTH:
 	case IEEE80211_STYPE_DISASSOC:
 	case IEEE80211_STYPE_ACTION:
@@ -1980,7 +1418,6 @@ static void ieee80211_sta_rx_queued_mgmt(struct ieee80211_sub_if_data *sdata,
 	struct ieee80211_if_managed *ifmgd = &sdata->u.mgd;
 	struct ieee80211_rx_status *rx_status;
 	struct ieee80211_mgmt *mgmt;
-	struct ieee80211_work *wk;
 	enum rx_mgmt_action rma = RX_MGMT_NONE;
 	u16 fc;
 
@@ -1999,8 +1436,7 @@ static void ieee80211_sta_rx_queued_mgmt(struct ieee80211_sub_if_data *sdata,
 						 rx_status);
 			break;
 		case IEEE80211_STYPE_PROBE_RESP:
-			ieee80211_rx_mgmt_probe_resp(sdata, NULL, mgmt,
-						     skb->len, rx_status);
+			ieee80211_rx_mgmt_probe_resp(sdata, skb);
 			break;
 		case IEEE80211_STYPE_DEAUTH:
 			rma = ieee80211_rx_mgmt_deauth(sdata, mgmt, skb->len);
@@ -2033,88 +1469,11 @@ static void ieee80211_sta_rx_queued_mgmt(struct ieee80211_sub_if_data *sdata,
 		goto out;
 	}
 
-	list_for_each_entry(wk, &ifmgd->work_list, list) {
-		const u8 *bssid = NULL;
-
-		switch (wk->type) {
-		case IEEE80211_WORK_AUTH_PROBE:
-		case IEEE80211_WORK_AUTH:
-			bssid = wk->auth.bssid;
-			break;
-		case IEEE80211_WORK_ASSOC:
-			bssid = wk->assoc.bssid;
-			break;
-		default:
-			continue;
-		}
-		if (memcmp(bssid, mgmt->bssid, ETH_ALEN) != 0)
-			continue;
-
-		switch (fc & IEEE80211_FCTL_STYPE) {
-		case IEEE80211_STYPE_PROBE_RESP:
-			ieee80211_rx_mgmt_probe_resp(sdata, wk, mgmt, skb->len,
-						     rx_status);
-			break;
-		case IEEE80211_STYPE_AUTH:
-			rma = ieee80211_rx_mgmt_auth(sdata, wk, mgmt, skb->len);
-			break;
-		case IEEE80211_STYPE_ASSOC_RESP:
-			rma = ieee80211_rx_mgmt_assoc_resp(sdata, wk, mgmt,
-							   skb->len, false);
-			break;
-		case IEEE80211_STYPE_REASSOC_RESP:
-			rma = ieee80211_rx_mgmt_assoc_resp(sdata, wk, mgmt,
-							   skb->len, true);
-			break;
-		case IEEE80211_STYPE_DEAUTH:
-			if (skb->len >= 24 + 2 /* mgmt + deauth reason */) {
-				/*
-				 * We get here if we get deauth while
-				 * trying to auth/assoc. Telling cfg80211
-				 * is handled below, unconditionally.
-				 */
-				list_del(&wk->list);
-				kfree(wk);
-			}
-			break;
-		}
-		/*
-		 * We've processed this frame for that work, so it can't
-		 * belong to another work struct.
-		 * NB: this is also required for correctness because the
-		 * called functions can free 'wk', and for 'rma'!
-		 */
-		break;
-	}
-
 	mutex_unlock(&ifmgd->mtx);
 
 	if (skb->len >= 24 + 2 /* mgmt + deauth reason */ &&
-	    (fc & IEEE80211_FCTL_STYPE) == IEEE80211_STYPE_DEAUTH) {
-		WARN_ON(rma != RX_MGMT_NONE);
-		rma = RX_MGMT_CFG80211_DEAUTH;
-	}
-
-	switch (rma) {
-	case RX_MGMT_NONE:
-		/* no action */
-		break;
-	case RX_MGMT_CFG80211_AUTH:
-		cfg80211_send_rx_auth(sdata->dev, (u8 *) mgmt, skb->len);
-		break;
-	case RX_MGMT_CFG80211_ASSOC:
-		cfg80211_send_rx_assoc(sdata->dev, (u8 *) mgmt, skb->len);
-		break;
-	case RX_MGMT_CFG80211_DEAUTH:
+	    (fc & IEEE80211_FCTL_STYPE) == IEEE80211_STYPE_DEAUTH)
 		cfg80211_send_deauth(sdata->dev, (u8 *)mgmt, skb->len);
-		break;
-	case RX_MGMT_CFG80211_ASSOC_ERROR:
-		/* an internal error -- pretend timeout for now */
-		cfg80211_send_assoc_timeout(sdata->dev, mgmt->bssid);
-		break;
-	default:
-		WARN(1, "unexpected: %d", rma);
-	}
 
  out:
 	kfree_skb(skb);
@@ -2142,9 +1501,6 @@ static void ieee80211_sta_work(struct work_struct *work)
 	struct ieee80211_local *local = sdata->local;
 	struct ieee80211_if_managed *ifmgd;
 	struct sk_buff *skb;
-	struct ieee80211_work *wk, *tmp;
-	LIST_HEAD(free_work);
-	enum rx_mgmt_action rma;
 
 	if (!ieee80211_sdata_running(sdata))
 		return;
@@ -2214,84 +1570,7 @@ static void ieee80211_sta_work(struct work_struct *work)
 		}
 	}
 
-
-	ieee80211_recalc_idle(local);
-
-	list_for_each_entry_safe(wk, tmp, &ifmgd->work_list, list) {
-		if (time_is_after_jiffies(wk->timeout)) {
-			/*
-			 * This work item isn't supposed to be worked on
-			 * right now, but take care to adjust the timer
-			 * properly.
-			 */
-			run_again(ifmgd, wk->timeout);
-			continue;
-		}
-
-		switch (wk->type) {
-		default:
-			WARN_ON(1);
-			/* nothing */
-			rma = RX_MGMT_NONE;
-			break;
-		case IEEE80211_WORK_AUTH_PROBE:
-			rma = ieee80211_direct_probe(sdata, wk);
-			break;
-		case IEEE80211_WORK_AUTH:
-			rma = ieee80211_authenticate(sdata, wk);
-			break;
-		case IEEE80211_WORK_ASSOC:
-			rma = ieee80211_associate(sdata, wk);
-			break;
-		}
-
-		switch (rma) {
-		case RX_MGMT_NONE:
-			/* no action required */
-			break;
-		case RX_MGMT_CFG80211_AUTH_TO:
-		case RX_MGMT_CFG80211_ASSOC_TO:
-			list_del(&wk->list);
-			list_add(&wk->list, &free_work);
-			/*
-			 * small abuse but only local -- keep the
-			 * action type in wk->timeout while the item
-			 * is on the cleanup list
-			 */
-			wk->timeout = rma;
-			break;
-		default:
-			WARN(1, "unexpected: %d", rma);
-		}
-	}
-
-	if (list_empty(&ifmgd->work_list) &&
-	    test_and_clear_bit(IEEE80211_STA_REQ_SCAN, &ifmgd->request))
-		ieee80211_queue_delayed_work(&local->hw,
-					     &local->scan_work,
-					     round_jiffies_relative(0));
-
 	mutex_unlock(&ifmgd->mtx);
-
-	list_for_each_entry_safe(wk, tmp, &free_work, list) {
-		/* see above how we're using wk->timeout */
-		switch (wk->timeout) {
-		case RX_MGMT_CFG80211_AUTH_TO:
-			cfg80211_send_auth_timeout(sdata->dev, wk->auth.bssid);
-			break;
-		case RX_MGMT_CFG80211_ASSOC_TO:
-			cfg80211_send_assoc_timeout(sdata->dev,
-						    wk->assoc.bssid);
-			break;
-		default:
-			WARN(1, "unexpected: %lu", wk->timeout);
-		}
-
-		list_del(&wk->list);
-		kfree(wk);
-	}
-
-	ieee80211_recalc_idle(local);
 }
 
 static void ieee80211_sta_bcn_mon_timer(unsigned long data)
@@ -2400,12 +1679,7 @@ void ieee80211_sta_setup_sdata(struct ieee80211_sub_if_data *sdata)
 		    (unsigned long) sdata);
 	skb_queue_head_init(&ifmgd->skb_queue);
 
-	INIT_LIST_HEAD(&ifmgd->work_list);
-
-	ifmgd->capab = WLAN_CAPABILITY_ESS;
 	ifmgd->flags = 0;
-	if (sdata->local->hw.queues >= 4)
-		ifmgd->flags |= IEEE80211_STA_WMM_ENABLED;
 
 	mutex_init(&ifmgd->mtx);
 
@@ -2443,10 +1717,32 @@ int ieee80211_max_network_latency(struct notifier_block *nb,
 }
 
 /* config hooks */
+static enum work_done_result
+ieee80211_probe_auth_done(struct ieee80211_work *wk,
+			  struct sk_buff *skb)
+{
+	if (!skb) {
+		cfg80211_send_auth_timeout(wk->sdata->dev, wk->filter_ta);
+		return WORK_DONE_DESTROY;
+	}
+
+	if (wk->type == IEEE80211_WORK_AUTH) {
+		cfg80211_send_rx_auth(wk->sdata->dev, skb->data, skb->len);
+		return WORK_DONE_DESTROY;
+	}
+
+	mutex_lock(&wk->sdata->u.mgd.mtx);
+	ieee80211_rx_mgmt_probe_resp(wk->sdata, skb);
+	mutex_unlock(&wk->sdata->u.mgd.mtx);
+
+	wk->type = IEEE80211_WORK_AUTH;
+	wk->probe_auth.tries = 0;
+	return WORK_DONE_REQUEUE;
+}
+
 int ieee80211_mgd_auth(struct ieee80211_sub_if_data *sdata,
 		       struct cfg80211_auth_request *req)
 {
-	struct ieee80211_if_managed *ifmgd = &sdata->u.mgd;
 	const u8 *ssid;
 	struct ieee80211_work *wk;
 	u16 auth_alg;
@@ -2472,7 +1768,7 @@ int ieee80211_mgd_auth(struct ieee80211_sub_if_data *sdata,
 	if (!wk)
 		return -ENOMEM;
 
-	memcpy(wk->auth.bssid, req->bss->bssid, ETH_ALEN);;
+	memcpy(wk->filter_ta, req->bss->bssid, ETH_ALEN);;
 
 	if (req->ie && req->ie_len) {
 		memcpy(wk->ie, req->ie, req->ie_len);
@@ -2480,21 +1776,22 @@ int ieee80211_mgd_auth(struct ieee80211_sub_if_data *sdata,
 	}
 
 	if (req->key && req->key_len) {
-		wk->auth.key_len = req->key_len;
-		wk->auth.key_idx = req->key_idx;
-		memcpy(wk->auth.key, req->key, req->key_len);
+		wk->probe_auth.key_len = req->key_len;
+		wk->probe_auth.key_idx = req->key_idx;
+		memcpy(wk->probe_auth.key, req->key, req->key_len);
 	}
 
 	ssid = ieee80211_bss_get_ie(req->bss, WLAN_EID_SSID);
-	memcpy(wk->auth.ssid, ssid + 2, ssid[1]);
-	wk->auth.ssid_len = ssid[1];
+	memcpy(wk->probe_auth.ssid, ssid + 2, ssid[1]);
+	wk->probe_auth.ssid_len = ssid[1];
 
-	wk->auth.algorithm = auth_alg;
-	wk->auth.privacy = req->bss->capability & WLAN_CAPABILITY_PRIVACY;
+	wk->probe_auth.algorithm = auth_alg;
+	wk->probe_auth.privacy = req->bss->capability & WLAN_CAPABILITY_PRIVACY;
 
-	wk->type = IEEE80211_WORK_AUTH_PROBE;
-	wk->timeout = jiffies; /* run right away */
+	wk->type = IEEE80211_WORK_DIRECT_PROBE;
 	wk->chan = req->bss->channel;
+	wk->sdata = sdata;
+	wk->done = ieee80211_probe_auth_done;
 
 	/*
 	 * XXX: if still associated need to tell AP that we're going
@@ -2505,29 +1802,58 @@ int ieee80211_mgd_auth(struct ieee80211_sub_if_data *sdata,
 	sdata->local->oper_channel = req->bss->channel;
 	ieee80211_hw_config(sdata->local, 0);
 
-	mutex_lock(&ifmgd->mtx);
-	list_add(&wk->list, &sdata->u.mgd.work_list);
-	mutex_unlock(&ifmgd->mtx);
-
-	ieee80211_queue_work(&sdata->local->hw, &sdata->u.mgd.work);
+	ieee80211_add_work(wk);
 	return 0;
 }
 
+static enum work_done_result ieee80211_assoc_done(struct ieee80211_work *wk,
+						  struct sk_buff *skb)
+{
+	struct ieee80211_mgmt *mgmt;
+	u16 status;
+
+	if (!skb) {
+		cfg80211_send_assoc_timeout(wk->sdata->dev, wk->filter_ta);
+		return WORK_DONE_DESTROY;
+	}
+
+	mgmt = (void *)skb->data;
+	status = le16_to_cpu(mgmt->u.assoc_resp.status_code);
+
+	if (status == WLAN_STATUS_SUCCESS) {
+		mutex_lock(&wk->sdata->u.mgd.mtx);
+		if (!ieee80211_assoc_success(wk, mgmt, skb->len)) {
+			mutex_unlock(&wk->sdata->u.mgd.mtx);
+			/* oops -- internal error -- send timeout for now */
+			cfg80211_send_assoc_timeout(wk->sdata->dev,
+						    wk->filter_ta);
+			return WORK_DONE_DESTROY;
+		}
+		mutex_unlock(&wk->sdata->u.mgd.mtx);
+	}
+
+	cfg80211_send_rx_assoc(wk->sdata->dev, skb->data, skb->len);
+	return WORK_DONE_DESTROY;
+}
+
 int ieee80211_mgd_assoc(struct ieee80211_sub_if_data *sdata,
 			struct cfg80211_assoc_request *req)
 {
 	struct ieee80211_if_managed *ifmgd = &sdata->u.mgd;
 	struct ieee80211_work *wk;
 	const u8 *ssid;
-	int i, err;
+	int i;
 
 	mutex_lock(&ifmgd->mtx);
+	if (ifmgd->associated) {
+		mutex_unlock(&ifmgd->mtx);
+		return -EALREADY;
+	}
+	mutex_unlock(&ifmgd->mtx);
 
 	wk = kzalloc(sizeof(*wk) + req->ie_len, GFP_KERNEL);
-	if (!wk) {
-		err = -ENOMEM;
-		goto out;
-	}
+	if (!wk)
+		return -ENOMEM;
 
 	ifmgd->flags &= ~IEEE80211_STA_DISABLE_11N;
 
@@ -2546,8 +1872,19 @@ int ieee80211_mgd_assoc(struct ieee80211_sub_if_data *sdata,
 
 	wk->assoc.bss = (void *)req->bss;
 
-	memcpy(wk->assoc.bssid, req->bss->bssid, ETH_ALEN);
+	memcpy(wk->filter_ta, req->bss->bssid, ETH_ALEN);
 
+	/* new association always uses requested smps mode */
+	if (ifmgd->req_smps == IEEE80211_SMPS_AUTOMATIC) {
+		if (ifmgd->powersave)
+			ifmgd->ap_smps = IEEE80211_SMPS_DYNAMIC;
+		else
+			ifmgd->ap_smps = IEEE80211_SMPS_OFF;
+	} else
+		ifmgd->ap_smps = ifmgd->req_smps;
+
+	wk->assoc.smps = ifmgd->ap_smps;
+	wk->assoc.use_11n = !(ifmgd->flags & IEEE80211_STA_DISABLE_11N);
 	wk->assoc.capability = req->bss->capability;
 	wk->assoc.wmm_used = wk->assoc.bss->wmm_used;
 	wk->assoc.supp_rates = wk->assoc.bss->supp_rates;
@@ -2563,8 +1900,9 @@ int ieee80211_mgd_assoc(struct ieee80211_sub_if_data *sdata,
 		memcpy(wk->assoc.prev_bssid, req->prev_bssid, ETH_ALEN);
 
 	wk->type = IEEE80211_WORK_ASSOC;
-	wk->timeout = jiffies; /* run right away */
 	wk->chan = req->bss->channel;
+	wk->sdata = sdata;
+	wk->done = ieee80211_assoc_done;
 
 	if (req->use_mfp) {
 		ifmgd->mfp = IEEE80211_MFP_REQUIRED;
@@ -2582,56 +1920,56 @@ int ieee80211_mgd_assoc(struct ieee80211_sub_if_data *sdata,
 	sdata->local->oper_channel = req->bss->channel;
 	ieee80211_hw_config(sdata->local, 0);
 
-	list_add(&wk->list, &ifmgd->work_list);
-	ieee80211_queue_work(&sdata->local->hw, &sdata->u.mgd.work);
-
-	err = 0;
-
- out:
-	mutex_unlock(&ifmgd->mtx);
-	return err;
+	ieee80211_add_work(wk);
+	return 0;
 }
 
 int ieee80211_mgd_deauth(struct ieee80211_sub_if_data *sdata,
 			 struct cfg80211_deauth_request *req,
 			 void *cookie)
 {
+	struct ieee80211_local *local = sdata->local;
 	struct ieee80211_if_managed *ifmgd = &sdata->u.mgd;
 	struct ieee80211_work *wk;
 	const u8 *bssid = req->bss->bssid;
-	bool not_auth_yet = false;
 
 	mutex_lock(&ifmgd->mtx);
 
 	if (ifmgd->associated && &ifmgd->associated->cbss == req->bss) {
 		bssid = req->bss->bssid;
 		ieee80211_set_disassoc(sdata);
-	} else list_for_each_entry(wk, &ifmgd->work_list, list) {
-		if (wk->type != IEEE80211_WORK_AUTH_PROBE)
-			continue;
-		if (memcmp(req->bss->bssid, wk->auth.bssid, ETH_ALEN))
-			continue;
-		not_auth_yet = true;
-		list_del(&wk->list);
-		kfree(wk);
-		break;
-	}
+		mutex_unlock(&ifmgd->mtx);
+	} else {
+		bool not_auth_yet = false;
 
-	/*
-	 * If somebody requests authentication and we haven't
-	 * sent out an auth frame yet there's no need to send
-	 * out a deauth frame either. If the state was PROBE,
-	 * then this is the case. If it's AUTH we have sent a
-	 * frame, and if it's IDLE we have completed the auth
-	 * process already.
-	 */
-	if (not_auth_yet) {
 		mutex_unlock(&ifmgd->mtx);
-		__cfg80211_auth_canceled(sdata->dev, bssid);
-		return 0;
-	}
 
-	mutex_unlock(&ifmgd->mtx);
+		mutex_lock(&local->work_mtx);
+		list_for_each_entry(wk, &local->work_list, list) {
+			if (wk->type != IEEE80211_WORK_DIRECT_PROBE)
+				continue;
+			if (memcmp(req->bss->bssid, wk->filter_ta, ETH_ALEN))
+				continue;
+			not_auth_yet = true;
+			list_del(&wk->list);
+			free_work(wk);
+			break;
+		}
+		mutex_unlock(&local->work_mtx);
+
+		/*
+		 * If somebody requests authentication and we haven't
+		 * sent out an auth frame yet there's no need to send
+		 * out a deauth frame either. If the state was PROBE,
+		 * then this is the case. If it's AUTH we have sent a
+		 * frame, and if it's IDLE we have completed the auth
+		 * process already.
+		 */
+		if (not_auth_yet) {
+			__cfg80211_auth_canceled(sdata->dev, bssid);
+			return 0;
+		}
+	}
 
 	printk(KERN_DEBUG "%s: deauthenticating from %pM by local choice (reason=%d)\n",
 	       sdata->name, bssid, req->reason_code);
diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c
index f60dfca52196..bfcf09eb64b4 100644
--- a/net/mac80211/rx.c
+++ b/net/mac80211/rx.c
@@ -1945,6 +1945,7 @@ ieee80211_rx_h_mgmt(struct ieee80211_rx_data *rx)
 {
 	struct ieee80211_sub_if_data *sdata = rx->sdata;
 	struct ieee80211_mgmt *mgmt = (struct ieee80211_mgmt *) rx->skb->data;
+	ieee80211_rx_result rxs;
 
 	if (!(rx->flags & IEEE80211_RX_RA_MATCH))
 		return RX_DROP_MONITOR;
@@ -1952,6 +1953,10 @@ ieee80211_rx_h_mgmt(struct ieee80211_rx_data *rx)
 	if (ieee80211_drop_unencrypted(rx, mgmt->frame_control))
 		return RX_DROP_MONITOR;
 
+	rxs = ieee80211_work_rx_mgmt(rx->sdata, rx->skb);
+	if (rxs != RX_CONTINUE)
+		return rxs;
+
 	if (ieee80211_vif_is_mesh(&sdata->vif))
 		return ieee80211_mesh_rx_mgmt(sdata, rx->skb);
 
diff --git a/net/mac80211/scan.c b/net/mac80211/scan.c
index d98c45e5528b..fb89e4c0fbfd 100644
--- a/net/mac80211/scan.c
+++ b/net/mac80211/scan.c
@@ -434,7 +434,6 @@ static int __ieee80211_start_scan(struct ieee80211_sub_if_data *sdata,
 				  struct cfg80211_scan_request *req)
 {
 	struct ieee80211_local *local = sdata->local;
-	struct ieee80211_if_managed *ifmgd = &sdata->u.mgd;
 	int rc;
 
 	if (local->scan_req)
@@ -464,11 +463,8 @@ static int __ieee80211_start_scan(struct ieee80211_sub_if_data *sdata,
 	local->scan_req = req;
 	local->scan_sdata = sdata;
 
-	if (req != local->int_scan_req &&
-	    sdata->vif.type == NL80211_IFTYPE_STATION &&
-	    !list_empty(&ifmgd->work_list)) {
-		/* actually wait for the work it's doing to finish/time out */
-		set_bit(IEEE80211_STA_REQ_SCAN, &ifmgd->request);
+	if (!list_empty(&local->work_list)) {
+		/* wait for the work to finish/time out */
 		return 0;
 	}
 
diff --git a/net/mac80211/work.c b/net/mac80211/work.c
new file mode 100644
index 000000000000..8b8961d806ab
--- /dev/null
+++ b/net/mac80211/work.c
@@ -0,0 +1,902 @@
+/*
+ * mac80211 work implementation
+ *
+ * Copyright 2003-2008, Jouni Malinen <j@w1.fi>
+ * Copyright 2004, Instant802 Networks, Inc.
+ * Copyright 2005, Devicescape Software, Inc.
+ * Copyright 2006-2007	Jiri Benc <jbenc@suse.cz>
+ * Copyright 2007, Michael Wu <flamingice@sourmilk.net>
+ * Copyright 2009, Johannes Berg <johannes@sipsolutions.net>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/delay.h>
+#include <linux/if_ether.h>
+#include <linux/skbuff.h>
+#include <linux/if_arp.h>
+#include <linux/etherdevice.h>
+#include <linux/crc32.h>
+#include <net/mac80211.h>
+#include <asm/unaligned.h>
+
+#include "ieee80211_i.h"
+#include "rate.h"
+
+#define IEEE80211_AUTH_TIMEOUT (HZ / 5)
+#define IEEE80211_AUTH_MAX_TRIES 3
+#define IEEE80211_ASSOC_TIMEOUT (HZ / 5)
+#define IEEE80211_ASSOC_MAX_TRIES 3
+#define IEEE80211_MAX_PROBE_TRIES 5
+
+enum work_action {
+	WORK_ACT_NONE,
+	WORK_ACT_TIMEOUT,
+	WORK_ACT_DONE,
+};
+
+
+/* utils */
+static inline void ASSERT_WORK_MTX(struct ieee80211_local *local)
+{
+	WARN_ON(!mutex_is_locked(&local->work_mtx));
+}
+
+/*
+ * We can have multiple work items (and connection probing)
+ * scheduling this timer, but we need to take care to only
+ * reschedule it when it should fire _earlier_ than it was
+ * asked for before, or if it's not pending right now. This
+ * function ensures that. Note that it then is required to
+ * run this function for all timeouts after the first one
+ * has happened -- the work that runs from this timer will
+ * do that.
+ */
+static void run_again(struct ieee80211_local *local,
+		      unsigned long timeout)
+{
+	ASSERT_WORK_MTX(local);
+
+	if (!timer_pending(&local->work_timer) ||
+	    time_before(timeout, local->work_timer.expires))
+		mod_timer(&local->work_timer, timeout);
+}
+
+static void work_free_rcu(struct rcu_head *head)
+{
+	struct ieee80211_work *wk =
+		container_of(head, struct ieee80211_work, rcu_head);
+
+	kfree(wk);
+}
+
+void free_work(struct ieee80211_work *wk)
+{
+	call_rcu(&wk->rcu_head, work_free_rcu);
+}
+
+static int ieee80211_compatible_rates(const u8 *supp_rates, int supp_rates_len,
+				      struct ieee80211_supported_band *sband,
+				      u32 *rates)
+{
+	int i, j, count;
+	*rates = 0;
+	count = 0;
+	for (i = 0; i < supp_rates_len; i++) {
+		int rate = (supp_rates[i] & 0x7F) * 5;
+
+		for (j = 0; j < sband->n_bitrates; j++)
+			if (sband->bitrates[j].bitrate == rate) {
+				*rates |= BIT(j);
+				count++;
+				break;
+			}
+	}
+
+	return count;
+}
+
+/* frame sending functions */
+
+static void ieee80211_send_assoc(struct ieee80211_sub_if_data *sdata,
+				 struct ieee80211_work *wk)
+{
+	struct ieee80211_local *local = sdata->local;
+	struct sk_buff *skb;
+	struct ieee80211_mgmt *mgmt;
+	u8 *pos;
+	const u8 *ies, *ht_ie;
+	int i, len, count, rates_len, supp_rates_len;
+	u16 capab;
+	struct ieee80211_supported_band *sband;
+	u32 rates = 0;
+
+	skb = dev_alloc_skb(local->hw.extra_tx_headroom +
+			    sizeof(*mgmt) + 200 + wk->ie_len +
+			    wk->assoc.ssid_len);
+	if (!skb) {
+		printk(KERN_DEBUG "%s: failed to allocate buffer for assoc "
+		       "frame\n", sdata->name);
+		return;
+	}
+	skb_reserve(skb, local->hw.extra_tx_headroom);
+
+	sband = local->hw.wiphy->bands[wk->chan->band];
+
+	capab = WLAN_CAPABILITY_ESS;
+
+	if (sband->band == IEEE80211_BAND_2GHZ) {
+		if (!(local->hw.flags & IEEE80211_HW_2GHZ_SHORT_SLOT_INCAPABLE))
+			capab |= WLAN_CAPABILITY_SHORT_SLOT_TIME;
+		if (!(local->hw.flags & IEEE80211_HW_2GHZ_SHORT_PREAMBLE_INCAPABLE))
+			capab |= WLAN_CAPABILITY_SHORT_PREAMBLE;
+	}
+
+	if (wk->assoc.capability & WLAN_CAPABILITY_PRIVACY)
+		capab |= WLAN_CAPABILITY_PRIVACY;
+
+	/*
+	 * Get all rates supported by the device and the AP as
+	 * some APs don't like getting a superset of their rates
+	 * in the association request (e.g. D-Link DAP 1353 in
+	 * b-only mode)...
+	 */
+	rates_len = ieee80211_compatible_rates(wk->assoc.supp_rates,
+					       wk->assoc.supp_rates_len,
+					       sband, &rates);
+
+	if ((wk->assoc.capability & WLAN_CAPABILITY_SPECTRUM_MGMT) &&
+	    (local->hw.flags & IEEE80211_HW_SPECTRUM_MGMT))
+		capab |= WLAN_CAPABILITY_SPECTRUM_MGMT;
+
+	mgmt = (struct ieee80211_mgmt *) skb_put(skb, 24);
+	memset(mgmt, 0, 24);
+	memcpy(mgmt->da, wk->filter_ta, ETH_ALEN);
+	memcpy(mgmt->sa, sdata->vif.addr, ETH_ALEN);
+	memcpy(mgmt->bssid, wk->filter_ta, ETH_ALEN);
+
+	if (!is_zero_ether_addr(wk->assoc.prev_bssid)) {
+		skb_put(skb, 10);
+		mgmt->frame_control = cpu_to_le16(IEEE80211_FTYPE_MGMT |
+						  IEEE80211_STYPE_REASSOC_REQ);
+		mgmt->u.reassoc_req.capab_info = cpu_to_le16(capab);
+		mgmt->u.reassoc_req.listen_interval =
+				cpu_to_le16(local->hw.conf.listen_interval);
+		memcpy(mgmt->u.reassoc_req.current_ap, wk->assoc.prev_bssid,
+		       ETH_ALEN);
+	} else {
+		skb_put(skb, 4);
+		mgmt->frame_control = cpu_to_le16(IEEE80211_FTYPE_MGMT |
+						  IEEE80211_STYPE_ASSOC_REQ);
+		mgmt->u.assoc_req.capab_info = cpu_to_le16(capab);
+		mgmt->u.assoc_req.listen_interval =
+				cpu_to_le16(local->hw.conf.listen_interval);
+	}
+
+	/* SSID */
+	ies = pos = skb_put(skb, 2 + wk->assoc.ssid_len);
+	*pos++ = WLAN_EID_SSID;
+	*pos++ = wk->assoc.ssid_len;
+	memcpy(pos, wk->assoc.ssid, wk->assoc.ssid_len);
+
+	/* add all rates which were marked to be used above */
+	supp_rates_len = rates_len;
+	if (supp_rates_len > 8)
+		supp_rates_len = 8;
+
+	len = sband->n_bitrates;
+	pos = skb_put(skb, supp_rates_len + 2);
+	*pos++ = WLAN_EID_SUPP_RATES;
+	*pos++ = supp_rates_len;
+
+	count = 0;
+	for (i = 0; i < sband->n_bitrates; i++) {
+		if (BIT(i) & rates) {
+			int rate = sband->bitrates[i].bitrate;
+			*pos++ = (u8) (rate / 5);
+			if (++count == 8)
+				break;
+		}
+	}
+
+	if (rates_len > count) {
+		pos = skb_put(skb, rates_len - count + 2);
+		*pos++ = WLAN_EID_EXT_SUPP_RATES;
+		*pos++ = rates_len - count;
+
+		for (i++; i < sband->n_bitrates; i++) {
+			if (BIT(i) & rates) {
+				int rate = sband->bitrates[i].bitrate;
+				*pos++ = (u8) (rate / 5);
+			}
+		}
+	}
+
+	if (capab & WLAN_CAPABILITY_SPECTRUM_MGMT) {
+		/* 1. power capabilities */
+		pos = skb_put(skb, 4);
+		*pos++ = WLAN_EID_PWR_CAPABILITY;
+		*pos++ = 2;
+		*pos++ = 0; /* min tx power */
+		*pos++ = local->hw.conf.channel->max_power; /* max tx power */
+
+		/* 2. supported channels */
+		/* TODO: get this in reg domain format */
+		pos = skb_put(skb, 2 * sband->n_channels + 2);
+		*pos++ = WLAN_EID_SUPPORTED_CHANNELS;
+		*pos++ = 2 * sband->n_channels;
+		for (i = 0; i < sband->n_channels; i++) {
+			*pos++ = ieee80211_frequency_to_channel(
+					sband->channels[i].center_freq);
+			*pos++ = 1; /* one channel in the subband*/
+		}
+	}
+
+	if (wk->ie_len && wk->ie) {
+		pos = skb_put(skb, wk->ie_len);
+		memcpy(pos, wk->ie, wk->ie_len);
+	}
+
+	if (wk->assoc.wmm_used && local->hw.queues >= 4) {
+		pos = skb_put(skb, 9);
+		*pos++ = WLAN_EID_VENDOR_SPECIFIC;
+		*pos++ = 7; /* len */
+		*pos++ = 0x00; /* Microsoft OUI 00:50:F2 */
+		*pos++ = 0x50;
+		*pos++ = 0xf2;
+		*pos++ = 2; /* WME */
+		*pos++ = 0; /* WME info */
+		*pos++ = 1; /* WME ver */
+		*pos++ = 0;
+	}
+
+	/* wmm support is a must to HT */
+	/*
+	 * IEEE802.11n does not allow TKIP/WEP as pairwise
+	 * ciphers in HT mode. We still associate in non-ht
+	 * mode (11a/b/g) if any one of these ciphers is
+	 * configured as pairwise.
+	 */
+	if (wk->assoc.use_11n && wk->assoc.wmm_used &&
+	    (local->hw.queues >= 4) &&
+	    sband->ht_cap.ht_supported &&
+	    (ht_ie = wk->assoc.ht_information_ie) &&
+	    ht_ie[1] >= sizeof(struct ieee80211_ht_info)) {
+		struct ieee80211_ht_info *ht_info =
+			(struct ieee80211_ht_info *)(ht_ie + 2);
+		u16 cap = sband->ht_cap.cap;
+		__le16 tmp;
+		u32 flags = local->hw.conf.channel->flags;
+
+		/* determine capability flags */
+
+		if (ieee80211_disable_40mhz_24ghz &&
+		    sband->band == IEEE80211_BAND_2GHZ) {
+			cap &= ~IEEE80211_HT_CAP_SUP_WIDTH_20_40;
+			cap &= ~IEEE80211_HT_CAP_SGI_40;
+		}
+
+		switch (ht_info->ht_param & IEEE80211_HT_PARAM_CHA_SEC_OFFSET) {
+		case IEEE80211_HT_PARAM_CHA_SEC_ABOVE:
+			if (flags & IEEE80211_CHAN_NO_HT40PLUS) {
+				cap &= ~IEEE80211_HT_CAP_SUP_WIDTH_20_40;
+				cap &= ~IEEE80211_HT_CAP_SGI_40;
+			}
+			break;
+		case IEEE80211_HT_PARAM_CHA_SEC_BELOW:
+			if (flags & IEEE80211_CHAN_NO_HT40MINUS) {
+				cap &= ~IEEE80211_HT_CAP_SUP_WIDTH_20_40;
+				cap &= ~IEEE80211_HT_CAP_SGI_40;
+			}
+			break;
+		}
+
+		/* set SM PS mode properly */
+		cap &= ~IEEE80211_HT_CAP_SM_PS;
+		switch (wk->assoc.smps) {
+		case IEEE80211_SMPS_AUTOMATIC:
+		case IEEE80211_SMPS_NUM_MODES:
+			WARN_ON(1);
+		case IEEE80211_SMPS_OFF:
+			cap |= WLAN_HT_CAP_SM_PS_DISABLED <<
+				IEEE80211_HT_CAP_SM_PS_SHIFT;
+			break;
+		case IEEE80211_SMPS_STATIC:
+			cap |= WLAN_HT_CAP_SM_PS_STATIC <<
+				IEEE80211_HT_CAP_SM_PS_SHIFT;
+			break;
+		case IEEE80211_SMPS_DYNAMIC:
+			cap |= WLAN_HT_CAP_SM_PS_DYNAMIC <<
+				IEEE80211_HT_CAP_SM_PS_SHIFT;
+			break;
+		}
+
+		/* reserve and fill IE */
+
+		pos = skb_put(skb, sizeof(struct ieee80211_ht_cap) + 2);
+		*pos++ = WLAN_EID_HT_CAPABILITY;
+		*pos++ = sizeof(struct ieee80211_ht_cap);
+		memset(pos, 0, sizeof(struct ieee80211_ht_cap));
+
+		/* capability flags */
+		tmp = cpu_to_le16(cap);
+		memcpy(pos, &tmp, sizeof(u16));
+		pos += sizeof(u16);
+
+		/* AMPDU parameters */
+		*pos++ = sband->ht_cap.ampdu_factor |
+			 (sband->ht_cap.ampdu_density <<
+				IEEE80211_HT_AMPDU_PARM_DENSITY_SHIFT);
+
+		/* MCS set */
+		memcpy(pos, &sband->ht_cap.mcs, sizeof(sband->ht_cap.mcs));
+		pos += sizeof(sband->ht_cap.mcs);
+
+		/* extended capabilities */
+		pos += sizeof(__le16);
+
+		/* BF capabilities */
+		pos += sizeof(__le32);
+
+		/* antenna selection */
+		pos += sizeof(u8);
+	}
+
+	IEEE80211_SKB_CB(skb)->flags |= IEEE80211_TX_INTFL_DONT_ENCRYPT;
+	ieee80211_tx_skb(sdata, skb);
+}
+
+static void ieee80211_remove_auth_bss(struct ieee80211_local *local,
+				      struct ieee80211_work *wk)
+{
+	struct cfg80211_bss *cbss;
+	u16 capa_val = WLAN_CAPABILITY_ESS;
+
+	if (wk->probe_auth.privacy)
+		capa_val |= WLAN_CAPABILITY_PRIVACY;
+
+	cbss = cfg80211_get_bss(local->hw.wiphy, wk->chan, wk->filter_ta,
+				wk->probe_auth.ssid, wk->probe_auth.ssid_len,
+				WLAN_CAPABILITY_ESS | WLAN_CAPABILITY_PRIVACY,
+				capa_val);
+	if (!cbss)
+		return;
+
+	cfg80211_unlink_bss(local->hw.wiphy, cbss);
+	cfg80211_put_bss(cbss);
+}
+
+static enum work_action __must_check
+ieee80211_direct_probe(struct ieee80211_work *wk)
+{
+	struct ieee80211_sub_if_data *sdata = wk->sdata;
+	struct ieee80211_local *local = sdata->local;
+
+	wk->probe_auth.tries++;
+	if (wk->probe_auth.tries > IEEE80211_AUTH_MAX_TRIES) {
+		printk(KERN_DEBUG "%s: direct probe to AP %pM timed out\n",
+		       sdata->name, wk->filter_ta);
+
+		/*
+		 * Most likely AP is not in the range so remove the
+		 * bss struct for that AP.
+		 */
+		ieee80211_remove_auth_bss(local, wk);
+
+		/*
+		 * We might have a pending scan which had no chance to run yet
+		 * due to work needing to be done. Hence, queue the STAs work
+		 * again for that.
+		 */
+		ieee80211_queue_work(&local->hw, &local->work_work);
+		return WORK_ACT_TIMEOUT;
+	}
+
+	printk(KERN_DEBUG "%s: direct probe to AP %pM (try %d)\n",
+			sdata->name, wk->filter_ta, wk->probe_auth.tries);
+
+	/*
+	 * Direct probe is sent to broadcast address as some APs
+	 * will not answer to direct packet in unassociated state.
+	 */
+	ieee80211_send_probe_req(sdata, NULL, wk->probe_auth.ssid,
+				 wk->probe_auth.ssid_len, NULL, 0);
+
+	wk->timeout = jiffies + IEEE80211_AUTH_TIMEOUT;
+	run_again(local, wk->timeout);
+
+	return WORK_ACT_NONE;
+}
+
+
+static enum work_action __must_check
+ieee80211_authenticate(struct ieee80211_work *wk)
+{
+	struct ieee80211_sub_if_data *sdata = wk->sdata;
+	struct ieee80211_local *local = sdata->local;
+
+	wk->probe_auth.tries++;
+	if (wk->probe_auth.tries > IEEE80211_AUTH_MAX_TRIES) {
+		printk(KERN_DEBUG "%s: authentication with AP %pM"
+		       " timed out\n", sdata->name, wk->filter_ta);
+
+		/*
+		 * Most likely AP is not in the range so remove the
+		 * bss struct for that AP.
+		 */
+		ieee80211_remove_auth_bss(local, wk);
+
+		/*
+		 * We might have a pending scan which had no chance to run yet
+		 * due to work needing to be done. Hence, queue the STAs work
+		 * again for that.
+		 */
+		ieee80211_queue_work(&local->hw, &local->work_work);
+		return WORK_ACT_TIMEOUT;
+	}
+
+	printk(KERN_DEBUG "%s: authenticate with AP %pM (try %d)\n",
+	       sdata->name, wk->filter_ta, wk->probe_auth.tries);
+
+	ieee80211_send_auth(sdata, 1, wk->probe_auth.algorithm, wk->ie,
+			    wk->ie_len, wk->filter_ta, NULL, 0, 0);
+	wk->probe_auth.transaction = 2;
+
+	wk->timeout = jiffies + IEEE80211_AUTH_TIMEOUT;
+	run_again(local, wk->timeout);
+
+	return WORK_ACT_NONE;
+}
+
+static enum work_action __must_check
+ieee80211_associate(struct ieee80211_work *wk)
+{
+	struct ieee80211_sub_if_data *sdata = wk->sdata;
+	struct ieee80211_local *local = sdata->local;
+
+	wk->assoc.tries++;
+	if (wk->assoc.tries > IEEE80211_ASSOC_MAX_TRIES) {
+		printk(KERN_DEBUG "%s: association with AP %pM"
+		       " timed out\n",
+		       sdata->name, wk->filter_ta);
+
+		/*
+		 * Most likely AP is not in the range so remove the
+		 * bss struct for that AP.
+		 */
+		if (wk->assoc.bss)
+			cfg80211_unlink_bss(local->hw.wiphy,
+					    &wk->assoc.bss->cbss);
+
+		/*
+		 * We might have a pending scan which had no chance to run yet
+		 * due to work needing to be done. Hence, queue the STAs work
+		 * again for that.
+		 */
+		ieee80211_queue_work(&local->hw, &local->work_work);
+		return WORK_ACT_TIMEOUT;
+	}
+
+	printk(KERN_DEBUG "%s: associate with AP %pM (try %d)\n",
+	       sdata->name, wk->filter_ta, wk->assoc.tries);
+	ieee80211_send_assoc(sdata, wk);
+
+	wk->timeout = jiffies + IEEE80211_ASSOC_TIMEOUT;
+	run_again(local, wk->timeout);
+
+	return WORK_ACT_NONE;
+}
+
+static void ieee80211_auth_challenge(struct ieee80211_work *wk,
+				     struct ieee80211_mgmt *mgmt,
+				     size_t len)
+{
+	struct ieee80211_sub_if_data *sdata = wk->sdata;
+	u8 *pos;
+	struct ieee802_11_elems elems;
+
+	pos = mgmt->u.auth.variable;
+	ieee802_11_parse_elems(pos, len - (pos - (u8 *) mgmt), &elems);
+	if (!elems.challenge)
+		return;
+	ieee80211_send_auth(sdata, 3, wk->probe_auth.algorithm,
+			    elems.challenge - 2, elems.challenge_len + 2,
+			    wk->filter_ta, wk->probe_auth.key,
+			    wk->probe_auth.key_len, wk->probe_auth.key_idx);
+	wk->probe_auth.transaction = 4;
+}
+
+static enum work_action __must_check
+ieee80211_rx_mgmt_auth(struct ieee80211_work *wk,
+		       struct ieee80211_mgmt *mgmt, size_t len)
+{
+	u16 auth_alg, auth_transaction, status_code;
+
+	if (wk->type != IEEE80211_WORK_AUTH)
+		return WORK_ACT_NONE;
+
+	if (len < 24 + 6)
+		return WORK_ACT_NONE;
+
+	auth_alg = le16_to_cpu(mgmt->u.auth.auth_alg);
+	auth_transaction = le16_to_cpu(mgmt->u.auth.auth_transaction);
+	status_code = le16_to_cpu(mgmt->u.auth.status_code);
+
+	if (auth_alg != wk->probe_auth.algorithm ||
+	    auth_transaction != wk->probe_auth.transaction)
+		return WORK_ACT_NONE;
+
+	if (status_code != WLAN_STATUS_SUCCESS) {
+		printk(KERN_DEBUG "%s: %pM denied authentication (status %d)\n",
+		       wk->sdata->name, mgmt->sa, status_code);
+		return WORK_ACT_DONE;
+	}
+
+	switch (wk->probe_auth.algorithm) {
+	case WLAN_AUTH_OPEN:
+	case WLAN_AUTH_LEAP:
+	case WLAN_AUTH_FT:
+		break;
+	case WLAN_AUTH_SHARED_KEY:
+		if (wk->probe_auth.transaction != 4) {
+			ieee80211_auth_challenge(wk, mgmt, len);
+			/* need another frame */
+			return WORK_ACT_NONE;
+		}
+		break;
+	default:
+		WARN_ON(1);
+		return WORK_ACT_NONE;
+	}
+
+	printk(KERN_DEBUG "%s: authenticated\n", wk->sdata->name);
+	return WORK_ACT_DONE;
+}
+
+static enum work_action __must_check
+ieee80211_rx_mgmt_assoc_resp(struct ieee80211_work *wk,
+			     struct ieee80211_mgmt *mgmt, size_t len,
+			     bool reassoc)
+{
+	struct ieee80211_sub_if_data *sdata = wk->sdata;
+	struct ieee80211_local *local = sdata->local;
+	u16 capab_info, status_code, aid;
+	struct ieee802_11_elems elems;
+	u8 *pos;
+
+	/*
+	 * AssocResp and ReassocResp have identical structure, so process both
+	 * of them in this function.
+	 */
+
+	if (len < 24 + 6)
+		return WORK_ACT_NONE;
+
+	capab_info = le16_to_cpu(mgmt->u.assoc_resp.capab_info);
+	status_code = le16_to_cpu(mgmt->u.assoc_resp.status_code);
+	aid = le16_to_cpu(mgmt->u.assoc_resp.aid);
+
+	printk(KERN_DEBUG "%s: RX %sssocResp from %pM (capab=0x%x "
+	       "status=%d aid=%d)\n",
+	       sdata->name, reassoc ? "Rea" : "A", mgmt->sa,
+	       capab_info, status_code, (u16)(aid & ~(BIT(15) | BIT(14))));
+
+	pos = mgmt->u.assoc_resp.variable;
+	ieee802_11_parse_elems(pos, len - (pos - (u8 *) mgmt), &elems);
+
+	if (status_code == WLAN_STATUS_ASSOC_REJECTED_TEMPORARILY &&
+	    elems.timeout_int && elems.timeout_int_len == 5 &&
+	    elems.timeout_int[0] == WLAN_TIMEOUT_ASSOC_COMEBACK) {
+		u32 tu, ms;
+		tu = get_unaligned_le32(elems.timeout_int + 1);
+		ms = tu * 1024 / 1000;
+		printk(KERN_DEBUG "%s: AP rejected association temporarily; "
+		       "comeback duration %u TU (%u ms)\n",
+		       sdata->name, tu, ms);
+		wk->timeout = jiffies + msecs_to_jiffies(ms);
+		if (ms > IEEE80211_ASSOC_TIMEOUT)
+			run_again(local, wk->timeout);
+		return WORK_ACT_NONE;
+	}
+
+	if (status_code != WLAN_STATUS_SUCCESS)
+		printk(KERN_DEBUG "%s: AP denied association (code=%d)\n",
+		       sdata->name, status_code);
+	else
+		printk(KERN_DEBUG "%s: associated\n", sdata->name);
+
+	return WORK_ACT_DONE;
+}
+
+static enum work_action __must_check
+ieee80211_rx_mgmt_probe_resp(struct ieee80211_work *wk,
+			     struct ieee80211_mgmt *mgmt, size_t len,
+			     struct ieee80211_rx_status *rx_status)
+{
+	struct ieee80211_sub_if_data *sdata = wk->sdata;
+	struct ieee80211_local *local = sdata->local;
+	size_t baselen;
+
+	ASSERT_WORK_MTX(local);
+
+	baselen = (u8 *) mgmt->u.probe_resp.variable - (u8 *) mgmt;
+	if (baselen > len)
+		return WORK_ACT_NONE;
+
+	printk(KERN_DEBUG "%s: direct probe responded\n", sdata->name);
+	return WORK_ACT_DONE;
+}
+
+static void ieee80211_work_rx_queued_mgmt(struct ieee80211_local *local,
+					  struct sk_buff *skb)
+{
+	struct ieee80211_rx_status *rx_status;
+	struct ieee80211_mgmt *mgmt;
+	struct ieee80211_work *wk;
+	enum work_action rma = WORK_ACT_NONE;
+	u16 fc;
+
+	rx_status = (struct ieee80211_rx_status *) skb->cb;
+	mgmt = (struct ieee80211_mgmt *) skb->data;
+	fc = le16_to_cpu(mgmt->frame_control);
+
+	mutex_lock(&local->work_mtx);
+
+	list_for_each_entry(wk, &local->work_list, list) {
+		const u8 *bssid = NULL;
+
+		switch (wk->type) {
+		case IEEE80211_WORK_DIRECT_PROBE:
+		case IEEE80211_WORK_AUTH:
+		case IEEE80211_WORK_ASSOC:
+			bssid = wk->filter_ta;
+			break;
+		default:
+			continue;
+		}
+
+		/*
+		 * Before queuing, we already verified mgmt->sa,
+		 * so this is needed just for matching.
+		 */
+		if (compare_ether_addr(bssid, mgmt->bssid))
+			continue;
+
+		switch (fc & IEEE80211_FCTL_STYPE) {
+		case IEEE80211_STYPE_PROBE_RESP:
+			rma = ieee80211_rx_mgmt_probe_resp(wk, mgmt, skb->len,
+							   rx_status);
+			break;
+		case IEEE80211_STYPE_AUTH:
+			rma = ieee80211_rx_mgmt_auth(wk, mgmt, skb->len);
+			break;
+		case IEEE80211_STYPE_ASSOC_RESP:
+			rma = ieee80211_rx_mgmt_assoc_resp(wk, mgmt,
+							   skb->len, false);
+			break;
+		case IEEE80211_STYPE_REASSOC_RESP:
+			rma = ieee80211_rx_mgmt_assoc_resp(wk, mgmt,
+							   skb->len, true);
+			break;
+		default:
+			WARN_ON(1);
+		}
+		/*
+		 * We've processed this frame for that work, so it can't
+		 * belong to another work struct.
+		 * NB: this is also required for correctness for 'rma'!
+		 */
+		break;
+	}
+
+	switch (rma) {
+	case WORK_ACT_NONE:
+		break;
+	case WORK_ACT_DONE:
+		list_del_rcu(&wk->list);
+		break;
+	default:
+		WARN(1, "unexpected: %d", rma);
+	}
+
+	mutex_unlock(&local->work_mtx);
+
+	if (rma != WORK_ACT_DONE)
+		goto out;
+
+	switch (wk->done(wk, skb)) {
+	case WORK_DONE_DESTROY:
+		free_work(wk);
+		break;
+	case WORK_DONE_REQUEUE:
+		synchronize_rcu();
+		wk->timeout = jiffies; /* run again directly */
+		mutex_lock(&local->work_mtx);
+		list_add_tail(&wk->list, &local->work_list);
+		mutex_unlock(&local->work_mtx);
+	}
+
+ out:
+	kfree_skb(skb);
+}
+
+static void ieee80211_work_timer(unsigned long data)
+{
+	struct ieee80211_local *local = (void *) data;
+
+	if (local->quiescing)
+		return;
+
+	ieee80211_queue_work(&local->hw, &local->work_work);
+}
+
+static void ieee80211_work_work(struct work_struct *work)
+{
+	struct ieee80211_local *local =
+		container_of(work, struct ieee80211_local, work_work);
+	struct sk_buff *skb;
+	struct ieee80211_work *wk, *tmp;
+	LIST_HEAD(free_work);
+	enum work_action rma;
+
+	if (local->scanning)
+		return;
+
+	/*
+	 * ieee80211_queue_work() should have picked up most cases,
+	 * here we'll pick the the rest.
+	 */
+	if (WARN(local->suspended, "work scheduled while going to suspend\n"))
+		return;
+
+	/* first process frames to avoid timing out while a frame is pending */
+	while ((skb = skb_dequeue(&local->work_skb_queue)))
+		ieee80211_work_rx_queued_mgmt(local, skb);
+
+	ieee80211_recalc_idle(local);
+
+	mutex_lock(&local->work_mtx);
+
+	list_for_each_entry_safe(wk, tmp, &local->work_list, list) {
+		if (time_is_after_jiffies(wk->timeout)) {
+			/*
+			 * This work item isn't supposed to be worked on
+			 * right now, but take care to adjust the timer
+			 * properly.
+			 */
+			run_again(local, wk->timeout);
+			continue;
+		}
+
+		switch (wk->type) {
+		default:
+			WARN_ON(1);
+			/* nothing */
+			rma = WORK_ACT_NONE;
+			break;
+		case IEEE80211_WORK_DIRECT_PROBE:
+			rma = ieee80211_direct_probe(wk);
+			break;
+		case IEEE80211_WORK_AUTH:
+			rma = ieee80211_authenticate(wk);
+			break;
+		case IEEE80211_WORK_ASSOC:
+			rma = ieee80211_associate(wk);
+			break;
+		}
+
+		switch (rma) {
+		case WORK_ACT_NONE:
+			/* no action required */
+			break;
+		case WORK_ACT_TIMEOUT:
+			list_del_rcu(&wk->list);
+			synchronize_rcu();
+			list_add(&wk->list, &free_work);
+			break;
+		default:
+			WARN(1, "unexpected: %d", rma);
+		}
+	}
+
+	if (list_empty(&local->work_list) && local->scan_req)
+		ieee80211_queue_delayed_work(&local->hw,
+					     &local->scan_work,
+					     round_jiffies_relative(0));
+
+	mutex_unlock(&local->work_mtx);
+
+	list_for_each_entry_safe(wk, tmp, &free_work, list) {
+		wk->done(wk, NULL);
+		list_del(&wk->list);
+		kfree(wk);
+	}
+}
+
+void ieee80211_add_work(struct ieee80211_work *wk)
+{
+	struct ieee80211_local *local;
+
+	if (WARN_ON(!wk->chan))
+		return;
+
+	if (WARN_ON(!wk->sdata))
+		return;
+
+	if (WARN_ON(!wk->done))
+		return;
+
+	wk->timeout = jiffies;
+
+	local = wk->sdata->local;
+	mutex_lock(&local->work_mtx);
+	list_add_tail(&wk->list, &local->work_list);
+	mutex_unlock(&local->work_mtx);
+
+	ieee80211_queue_work(&local->hw, &local->work_work);
+}
+
+void ieee80211_work_init(struct ieee80211_local *local)
+{
+	mutex_init(&local->work_mtx);
+	INIT_LIST_HEAD(&local->work_list);
+	setup_timer(&local->work_timer, ieee80211_work_timer,
+		    (unsigned long)local);
+	INIT_WORK(&local->work_work, ieee80211_work_work);
+	skb_queue_head_init(&local->work_skb_queue);
+}
+
+void ieee80211_work_purge(struct ieee80211_sub_if_data *sdata)
+{
+	struct ieee80211_local *local = sdata->local;
+	struct ieee80211_work *wk, *tmp;
+
+	mutex_lock(&local->work_mtx);
+	list_for_each_entry_safe(wk, tmp, &local->work_list, list) {
+		if (wk->sdata != sdata)
+			continue;
+		list_del(&wk->list);
+		free_work(wk);
+	}
+	mutex_unlock(&local->work_mtx);
+}
+
+ieee80211_rx_result ieee80211_work_rx_mgmt(struct ieee80211_sub_if_data *sdata,
+					   struct sk_buff *skb)
+{
+	struct ieee80211_local *local = sdata->local;
+	struct ieee80211_mgmt *mgmt;
+	struct ieee80211_work *wk;
+	u16 fc;
+
+	if (skb->len < 24)
+		return RX_DROP_MONITOR;
+
+	mgmt = (struct ieee80211_mgmt *) skb->data;
+	fc = le16_to_cpu(mgmt->frame_control);
+
+	list_for_each_entry_rcu(wk, &local->work_list, list) {
+		if (sdata != wk->sdata)
+			continue;
+		if (compare_ether_addr(wk->filter_ta, mgmt->sa))
+			continue;
+		if (compare_ether_addr(wk->filter_ta, mgmt->bssid))
+			continue;
+
+		switch (fc & IEEE80211_FCTL_STYPE) {
+		case IEEE80211_STYPE_AUTH:
+		case IEEE80211_STYPE_PROBE_RESP:
+		case IEEE80211_STYPE_ASSOC_RESP:
+		case IEEE80211_STYPE_REASSOC_RESP:
+		case IEEE80211_STYPE_DEAUTH:
+		case IEEE80211_STYPE_DISASSOC:
+			skb_queue_tail(&local->work_skb_queue, skb);
+			ieee80211_queue_work(&local->hw, &local->work_work);
+			return RX_QUEUED;
+		}
+	}
+
+	return RX_CONTINUE;
+}
-- 
cgit v1.2.2


From 7d3a1c3b03c3a571a2c8c393b75558a5f4a7532a Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes@sipsolutions.net>
Date: Wed, 23 Dec 2009 13:15:36 +0100
Subject: mac80211: rewrite a few work messages

The station we're authenticating/associating with
may not always be an AP in the sense that word is
mostly understood, so print only the MAC address
of the peer instead.

Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/work.c | 20 ++++++++++----------
 1 file changed, 10 insertions(+), 10 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/work.c b/net/mac80211/work.c
index 8b8961d806ab..874345918e83 100644
--- a/net/mac80211/work.c
+++ b/net/mac80211/work.c
@@ -376,7 +376,7 @@ ieee80211_direct_probe(struct ieee80211_work *wk)
 
 	wk->probe_auth.tries++;
 	if (wk->probe_auth.tries > IEEE80211_AUTH_MAX_TRIES) {
-		printk(KERN_DEBUG "%s: direct probe to AP %pM timed out\n",
+		printk(KERN_DEBUG "%s: direct probe to %pM timed out\n",
 		       sdata->name, wk->filter_ta);
 
 		/*
@@ -394,7 +394,7 @@ ieee80211_direct_probe(struct ieee80211_work *wk)
 		return WORK_ACT_TIMEOUT;
 	}
 
-	printk(KERN_DEBUG "%s: direct probe to AP %pM (try %d)\n",
+	printk(KERN_DEBUG "%s: direct probe to %pM (try %d)\n",
 			sdata->name, wk->filter_ta, wk->probe_auth.tries);
 
 	/*
@@ -419,7 +419,7 @@ ieee80211_authenticate(struct ieee80211_work *wk)
 
 	wk->probe_auth.tries++;
 	if (wk->probe_auth.tries > IEEE80211_AUTH_MAX_TRIES) {
-		printk(KERN_DEBUG "%s: authentication with AP %pM"
+		printk(KERN_DEBUG "%s: authentication with %pM"
 		       " timed out\n", sdata->name, wk->filter_ta);
 
 		/*
@@ -437,7 +437,7 @@ ieee80211_authenticate(struct ieee80211_work *wk)
 		return WORK_ACT_TIMEOUT;
 	}
 
-	printk(KERN_DEBUG "%s: authenticate with AP %pM (try %d)\n",
+	printk(KERN_DEBUG "%s: authenticate with %pM (try %d)\n",
 	       sdata->name, wk->filter_ta, wk->probe_auth.tries);
 
 	ieee80211_send_auth(sdata, 1, wk->probe_auth.algorithm, wk->ie,
@@ -458,7 +458,7 @@ ieee80211_associate(struct ieee80211_work *wk)
 
 	wk->assoc.tries++;
 	if (wk->assoc.tries > IEEE80211_ASSOC_MAX_TRIES) {
-		printk(KERN_DEBUG "%s: association with AP %pM"
+		printk(KERN_DEBUG "%s: association with %pM"
 		       " timed out\n",
 		       sdata->name, wk->filter_ta);
 
@@ -479,7 +479,7 @@ ieee80211_associate(struct ieee80211_work *wk)
 		return WORK_ACT_TIMEOUT;
 	}
 
-	printk(KERN_DEBUG "%s: associate with AP %pM (try %d)\n",
+	printk(KERN_DEBUG "%s: associate with %pM (try %d)\n",
 	       sdata->name, wk->filter_ta, wk->assoc.tries);
 	ieee80211_send_assoc(sdata, wk);
 
@@ -592,9 +592,9 @@ ieee80211_rx_mgmt_assoc_resp(struct ieee80211_work *wk,
 		u32 tu, ms;
 		tu = get_unaligned_le32(elems.timeout_int + 1);
 		ms = tu * 1024 / 1000;
-		printk(KERN_DEBUG "%s: AP rejected association temporarily; "
+		printk(KERN_DEBUG "%s: %pM rejected association temporarily; "
 		       "comeback duration %u TU (%u ms)\n",
-		       sdata->name, tu, ms);
+		       sdata->name, mgmt->sa, tu, ms);
 		wk->timeout = jiffies + msecs_to_jiffies(ms);
 		if (ms > IEEE80211_ASSOC_TIMEOUT)
 			run_again(local, wk->timeout);
@@ -602,8 +602,8 @@ ieee80211_rx_mgmt_assoc_resp(struct ieee80211_work *wk,
 	}
 
 	if (status_code != WLAN_STATUS_SUCCESS)
-		printk(KERN_DEBUG "%s: AP denied association (code=%d)\n",
-		       sdata->name, status_code);
+		printk(KERN_DEBUG "%s: %pM denied association (code=%d)\n",
+		       sdata->name, mgmt->sa, status_code);
 	else
 		printk(KERN_DEBUG "%s: associated\n", sdata->name);
 
-- 
cgit v1.2.2


From 77c8144ad3ee7fae834e13cb7e83f5b7c8c5329e Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes@sipsolutions.net>
Date: Wed, 23 Dec 2009 13:15:37 +0100
Subject: mac80211: refactor association

Refactor the code to reserve an skb of the right size
(instead of hoping 200 bytes are enough forever), and
also put HT IE generation into an own function.

Additionally, put the HT IE before the vendor-specific
WMM IE. This still leaves things not quite ordered
correctly, due to user-specified IEs, add a note about
that for now.

Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/mlme.c |   7 ++
 net/mac80211/work.c | 239 ++++++++++++++++++++++++++++------------------------
 2 files changed, 137 insertions(+), 109 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index 7c1f91bcc834..ea434b5a779e 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -1884,6 +1884,13 @@ int ieee80211_mgd_assoc(struct ieee80211_sub_if_data *sdata,
 		ifmgd->ap_smps = ifmgd->req_smps;
 
 	wk->assoc.smps = ifmgd->ap_smps;
+	/*
+	 * IEEE802.11n does not allow TKIP/WEP as pairwise ciphers in HT mode.
+	 * We still associate in non-HT mode (11a/b/g) if any one of these
+	 * ciphers is configured as pairwise.
+	 * We can set this to true for non-11n hardware, that'll be checked
+	 * separately along with the peer capabilities.
+	 */
 	wk->assoc.use_11n = !(ifmgd->flags & IEEE80211_STA_DISABLE_11N);
 	wk->assoc.capability = req->bss->capability;
 	wk->assoc.wmm_used = wk->assoc.bss->wmm_used;
diff --git a/net/mac80211/work.c b/net/mac80211/work.c
index 874345918e83..c03c22d5bca3 100644
--- a/net/mac80211/work.c
+++ b/net/mac80211/work.c
@@ -100,6 +100,102 @@ static int ieee80211_compatible_rates(const u8 *supp_rates, int supp_rates_len,
 
 /* frame sending functions */
 
+static void ieee80211_add_ht_ie(struct sk_buff *skb, const u8 *ht_info_ie,
+				struct ieee80211_supported_band *sband,
+				struct ieee80211_channel *channel,
+				enum ieee80211_smps_mode smps)
+{
+	struct ieee80211_ht_info *ht_info;
+	u8 *pos;
+	u32 flags = channel->flags;
+	u16 cap = sband->ht_cap.cap;
+	__le16 tmp;
+
+	if (!sband->ht_cap.ht_supported)
+		return;
+
+	if (!ht_info_ie)
+		return;
+
+	if (ht_info_ie[1] < sizeof(struct ieee80211_ht_info))
+		return;
+
+	ht_info = (struct ieee80211_ht_info *)(ht_info_ie + 2);
+
+	/* determine capability flags */
+
+	if (ieee80211_disable_40mhz_24ghz &&
+	    sband->band == IEEE80211_BAND_2GHZ) {
+		cap &= ~IEEE80211_HT_CAP_SUP_WIDTH_20_40;
+		cap &= ~IEEE80211_HT_CAP_SGI_40;
+	}
+
+	switch (ht_info->ht_param & IEEE80211_HT_PARAM_CHA_SEC_OFFSET) {
+	case IEEE80211_HT_PARAM_CHA_SEC_ABOVE:
+		if (flags & IEEE80211_CHAN_NO_HT40PLUS) {
+			cap &= ~IEEE80211_HT_CAP_SUP_WIDTH_20_40;
+			cap &= ~IEEE80211_HT_CAP_SGI_40;
+		}
+		break;
+	case IEEE80211_HT_PARAM_CHA_SEC_BELOW:
+		if (flags & IEEE80211_CHAN_NO_HT40MINUS) {
+			cap &= ~IEEE80211_HT_CAP_SUP_WIDTH_20_40;
+			cap &= ~IEEE80211_HT_CAP_SGI_40;
+		}
+		break;
+	}
+
+	/* set SM PS mode properly */
+	cap &= ~IEEE80211_HT_CAP_SM_PS;
+	switch (smps) {
+	case IEEE80211_SMPS_AUTOMATIC:
+	case IEEE80211_SMPS_NUM_MODES:
+		WARN_ON(1);
+	case IEEE80211_SMPS_OFF:
+		cap |= WLAN_HT_CAP_SM_PS_DISABLED <<
+			IEEE80211_HT_CAP_SM_PS_SHIFT;
+		break;
+	case IEEE80211_SMPS_STATIC:
+		cap |= WLAN_HT_CAP_SM_PS_STATIC <<
+			IEEE80211_HT_CAP_SM_PS_SHIFT;
+		break;
+	case IEEE80211_SMPS_DYNAMIC:
+		cap |= WLAN_HT_CAP_SM_PS_DYNAMIC <<
+			IEEE80211_HT_CAP_SM_PS_SHIFT;
+		break;
+	}
+
+	/* reserve and fill IE */
+
+	pos = skb_put(skb, sizeof(struct ieee80211_ht_cap) + 2);
+	*pos++ = WLAN_EID_HT_CAPABILITY;
+	*pos++ = sizeof(struct ieee80211_ht_cap);
+	memset(pos, 0, sizeof(struct ieee80211_ht_cap));
+
+	/* capability flags */
+	tmp = cpu_to_le16(cap);
+	memcpy(pos, &tmp, sizeof(u16));
+	pos += sizeof(u16);
+
+	/* AMPDU parameters */
+	*pos++ = sband->ht_cap.ampdu_factor |
+		 (sband->ht_cap.ampdu_density <<
+			IEEE80211_HT_AMPDU_PARM_DENSITY_SHIFT);
+
+	/* MCS set */
+	memcpy(pos, &sband->ht_cap.mcs, sizeof(sband->ht_cap.mcs));
+	pos += sizeof(sband->ht_cap.mcs);
+
+	/* extended capabilities */
+	pos += sizeof(__le16);
+
+	/* BF capabilities */
+	pos += sizeof(__le32);
+
+	/* antenna selection */
+	pos += sizeof(u8);
+}
+
 static void ieee80211_send_assoc(struct ieee80211_sub_if_data *sdata,
 				 struct ieee80211_work *wk)
 {
@@ -107,15 +203,34 @@ static void ieee80211_send_assoc(struct ieee80211_sub_if_data *sdata,
 	struct sk_buff *skb;
 	struct ieee80211_mgmt *mgmt;
 	u8 *pos;
-	const u8 *ies, *ht_ie;
+	const u8 *ies;
 	int i, len, count, rates_len, supp_rates_len;
 	u16 capab;
 	struct ieee80211_supported_band *sband;
 	u32 rates = 0;
 
-	skb = dev_alloc_skb(local->hw.extra_tx_headroom +
-			    sizeof(*mgmt) + 200 + wk->ie_len +
-			    wk->assoc.ssid_len);
+	sband = local->hw.wiphy->bands[wk->chan->band];
+
+	/*
+	 * Get all rates supported by the device and the AP as
+	 * some APs don't like getting a superset of their rates
+	 * in the association request (e.g. D-Link DAP 1353 in
+	 * b-only mode)...
+	 */
+	rates_len = ieee80211_compatible_rates(wk->assoc.supp_rates,
+					       wk->assoc.supp_rates_len,
+					       sband, &rates);
+
+	skb = alloc_skb(local->hw.extra_tx_headroom +
+			sizeof(*mgmt) + /* bit too much but doesn't matter */
+			2 + wk->assoc.ssid_len + /* SSID */
+			4 + rates_len + /* (extended) rates */
+			4 + /* power capability */
+			2 + 2 * sband->n_channels + /* supported channels */
+			2 + sizeof(struct ieee80211_ht_cap) + /* HT */
+			wk->ie_len + /* extra IEs */
+			9, /* WMM */
+			GFP_KERNEL);
 	if (!skb) {
 		printk(KERN_DEBUG "%s: failed to allocate buffer for assoc "
 		       "frame\n", sdata->name);
@@ -123,8 +238,6 @@ static void ieee80211_send_assoc(struct ieee80211_sub_if_data *sdata,
 	}
 	skb_reserve(skb, local->hw.extra_tx_headroom);
 
-	sband = local->hw.wiphy->bands[wk->chan->band];
-
 	capab = WLAN_CAPABILITY_ESS;
 
 	if (sband->band == IEEE80211_BAND_2GHZ) {
@@ -137,16 +250,6 @@ static void ieee80211_send_assoc(struct ieee80211_sub_if_data *sdata,
 	if (wk->assoc.capability & WLAN_CAPABILITY_PRIVACY)
 		capab |= WLAN_CAPABILITY_PRIVACY;
 
-	/*
-	 * Get all rates supported by the device and the AP as
-	 * some APs don't like getting a superset of their rates
-	 * in the association request (e.g. D-Link DAP 1353 in
-	 * b-only mode)...
-	 */
-	rates_len = ieee80211_compatible_rates(wk->assoc.supp_rates,
-					       wk->assoc.supp_rates_len,
-					       sband, &rates);
-
 	if ((wk->assoc.capability & WLAN_CAPABILITY_SPECTRUM_MGMT) &&
 	    (local->hw.flags & IEEE80211_HW_SPECTRUM_MGMT))
 		capab |= WLAN_CAPABILITY_SPECTRUM_MGMT;
@@ -220,7 +323,7 @@ static void ieee80211_send_assoc(struct ieee80211_sub_if_data *sdata,
 		*pos++ = WLAN_EID_PWR_CAPABILITY;
 		*pos++ = 2;
 		*pos++ = 0; /* min tx power */
-		*pos++ = local->hw.conf.channel->max_power; /* max tx power */
+		*pos++ = wk->chan->max_power; /* max tx power */
 
 		/* 2. supported channels */
 		/* TODO: get this in reg domain format */
@@ -234,11 +337,21 @@ static void ieee80211_send_assoc(struct ieee80211_sub_if_data *sdata,
 		}
 	}
 
+	/*
+	 * XXX:	These IEs could contain (vendor-specified)
+	 *	IEs that belong after HT -- the buffer may
+	 *	need to be split up.
+	 */
 	if (wk->ie_len && wk->ie) {
 		pos = skb_put(skb, wk->ie_len);
 		memcpy(pos, wk->ie, wk->ie_len);
 	}
 
+	if (wk->assoc.use_11n && wk->assoc.wmm_used &&
+	    local->hw.queues >= 4)
+		ieee80211_add_ht_ie(skb, wk->assoc.ht_information_ie,
+				    sband, wk->chan, wk->assoc.smps);
+
 	if (wk->assoc.wmm_used && local->hw.queues >= 4) {
 		pos = skb_put(skb, 9);
 		*pos++ = WLAN_EID_VENDOR_SPECIFIC;
@@ -252,98 +365,6 @@ static void ieee80211_send_assoc(struct ieee80211_sub_if_data *sdata,
 		*pos++ = 0;
 	}
 
-	/* wmm support is a must to HT */
-	/*
-	 * IEEE802.11n does not allow TKIP/WEP as pairwise
-	 * ciphers in HT mode. We still associate in non-ht
-	 * mode (11a/b/g) if any one of these ciphers is
-	 * configured as pairwise.
-	 */
-	if (wk->assoc.use_11n && wk->assoc.wmm_used &&
-	    (local->hw.queues >= 4) &&
-	    sband->ht_cap.ht_supported &&
-	    (ht_ie = wk->assoc.ht_information_ie) &&
-	    ht_ie[1] >= sizeof(struct ieee80211_ht_info)) {
-		struct ieee80211_ht_info *ht_info =
-			(struct ieee80211_ht_info *)(ht_ie + 2);
-		u16 cap = sband->ht_cap.cap;
-		__le16 tmp;
-		u32 flags = local->hw.conf.channel->flags;
-
-		/* determine capability flags */
-
-		if (ieee80211_disable_40mhz_24ghz &&
-		    sband->band == IEEE80211_BAND_2GHZ) {
-			cap &= ~IEEE80211_HT_CAP_SUP_WIDTH_20_40;
-			cap &= ~IEEE80211_HT_CAP_SGI_40;
-		}
-
-		switch (ht_info->ht_param & IEEE80211_HT_PARAM_CHA_SEC_OFFSET) {
-		case IEEE80211_HT_PARAM_CHA_SEC_ABOVE:
-			if (flags & IEEE80211_CHAN_NO_HT40PLUS) {
-				cap &= ~IEEE80211_HT_CAP_SUP_WIDTH_20_40;
-				cap &= ~IEEE80211_HT_CAP_SGI_40;
-			}
-			break;
-		case IEEE80211_HT_PARAM_CHA_SEC_BELOW:
-			if (flags & IEEE80211_CHAN_NO_HT40MINUS) {
-				cap &= ~IEEE80211_HT_CAP_SUP_WIDTH_20_40;
-				cap &= ~IEEE80211_HT_CAP_SGI_40;
-			}
-			break;
-		}
-
-		/* set SM PS mode properly */
-		cap &= ~IEEE80211_HT_CAP_SM_PS;
-		switch (wk->assoc.smps) {
-		case IEEE80211_SMPS_AUTOMATIC:
-		case IEEE80211_SMPS_NUM_MODES:
-			WARN_ON(1);
-		case IEEE80211_SMPS_OFF:
-			cap |= WLAN_HT_CAP_SM_PS_DISABLED <<
-				IEEE80211_HT_CAP_SM_PS_SHIFT;
-			break;
-		case IEEE80211_SMPS_STATIC:
-			cap |= WLAN_HT_CAP_SM_PS_STATIC <<
-				IEEE80211_HT_CAP_SM_PS_SHIFT;
-			break;
-		case IEEE80211_SMPS_DYNAMIC:
-			cap |= WLAN_HT_CAP_SM_PS_DYNAMIC <<
-				IEEE80211_HT_CAP_SM_PS_SHIFT;
-			break;
-		}
-
-		/* reserve and fill IE */
-
-		pos = skb_put(skb, sizeof(struct ieee80211_ht_cap) + 2);
-		*pos++ = WLAN_EID_HT_CAPABILITY;
-		*pos++ = sizeof(struct ieee80211_ht_cap);
-		memset(pos, 0, sizeof(struct ieee80211_ht_cap));
-
-		/* capability flags */
-		tmp = cpu_to_le16(cap);
-		memcpy(pos, &tmp, sizeof(u16));
-		pos += sizeof(u16);
-
-		/* AMPDU parameters */
-		*pos++ = sband->ht_cap.ampdu_factor |
-			 (sband->ht_cap.ampdu_density <<
-				IEEE80211_HT_AMPDU_PARM_DENSITY_SHIFT);
-
-		/* MCS set */
-		memcpy(pos, &sband->ht_cap.mcs, sizeof(sband->ht_cap.mcs));
-		pos += sizeof(sband->ht_cap.mcs);
-
-		/* extended capabilities */
-		pos += sizeof(__le16);
-
-		/* BF capabilities */
-		pos += sizeof(__le32);
-
-		/* antenna selection */
-		pos += sizeof(u8);
-	}
-
 	IEEE80211_SKB_CB(skb)->flags |= IEEE80211_TX_INTFL_DONT_ENCRYPT;
 	ieee80211_tx_skb(sdata, skb);
 }
-- 
cgit v1.2.2


From 8e664fb3fd2b04e3ac5fad7f046000ba54e0e275 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes@sipsolutions.net>
Date: Wed, 23 Dec 2009 13:15:38 +0100
Subject: mac80211: split up and insert custom IEs correctly

Currently, we insert all user-specified IEs before the HT
IE for association, and after the HT IE for probe requests.
For association, that's correct only if the user-specified
IEs are RSN only, incorrect in all other cases including
WPA. Change this to split apart the user-specified IEs in
two places for association: before the HT IE (e.g. RSN),
after the HT IE (generally empty right now I think?) and
after WMM (all other vendor-specific IEs). For probes,
split the IEs in different places to be correct according
to the spec.

Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/ieee80211_i.h |   4 ++
 net/mac80211/util.c        | 134 ++++++++++++++++++++++++++++++++++++++-------
 net/mac80211/work.c        |  43 ++++++++++++---
 3 files changed, 154 insertions(+), 27 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index 97b6076b492e..6ea4ffbf84d8 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -1148,6 +1148,10 @@ int __ieee80211_request_smps(struct ieee80211_sub_if_data *sdata,
 void ieee80211_recalc_smps(struct ieee80211_local *local,
 			   struct ieee80211_sub_if_data *forsdata);
 
+size_t ieee80211_ie_split(const u8 *ies, size_t ielen,
+			  const u8 *ids, int n_ids, size_t offset);
+size_t ieee80211_ie_split_vendor(const u8 *ies, size_t ielen, size_t offset);
+
 /* internal work items */
 void ieee80211_work_init(struct ieee80211_local *local);
 void ieee80211_add_work(struct ieee80211_work *wk);
diff --git a/net/mac80211/util.c b/net/mac80211/util.c
index 5ffe9e831b66..1fdb80ff9241 100644
--- a/net/mac80211/util.c
+++ b/net/mac80211/util.c
@@ -881,30 +881,66 @@ int ieee80211_build_preq_ies(struct ieee80211_local *local, u8 *buffer,
 			     enum ieee80211_band band)
 {
 	struct ieee80211_supported_band *sband;
-	u8 *pos, *supp_rates_len, *esupp_rates_len = NULL;
-	int i;
+	u8 *pos;
+	size_t offset = 0, noffset;
+	int supp_rates_len, i;
 
 	sband = local->hw.wiphy->bands[band];
 
 	pos = buffer;
 
+	supp_rates_len = min_t(int, sband->n_bitrates, 8);
+
 	*pos++ = WLAN_EID_SUPP_RATES;
-	supp_rates_len = pos;
-	*pos++ = 0;
-
-	for (i = 0; i < sband->n_bitrates; i++) {
-		struct ieee80211_rate *rate = &sband->bitrates[i];
-
-		if (esupp_rates_len) {
-			*esupp_rates_len += 1;
-		} else if (*supp_rates_len == 8) {
-			*pos++ = WLAN_EID_EXT_SUPP_RATES;
-			esupp_rates_len = pos;
-			*pos++ = 1;
-		} else
-			*supp_rates_len += 1;
+	*pos++ = supp_rates_len;
 
-		*pos++ = rate->bitrate / 5;
+	for (i = 0; i < supp_rates_len; i++) {
+		int rate = sband->bitrates[i].bitrate;
+		*pos++ = (u8) (rate / 5);
+	}
+
+	/* insert "request information" if in custom IEs */
+	if (ie && ie_len) {
+		static const u8 before_extrates[] = {
+			WLAN_EID_SSID,
+			WLAN_EID_SUPP_RATES,
+			WLAN_EID_REQUEST,
+		};
+		noffset = ieee80211_ie_split(ie, ie_len,
+					     before_extrates,
+					     ARRAY_SIZE(before_extrates),
+					     offset);
+		memcpy(pos, ie + offset, noffset - offset);
+		pos += noffset - offset;
+		offset = noffset;
+	}
+
+	if (sband->n_bitrates > i) {
+		*pos++ = WLAN_EID_EXT_SUPP_RATES;
+		*pos++ = sband->n_bitrates - i;
+
+		for (; i < sband->n_bitrates; i++) {
+			int rate = sband->bitrates[i].bitrate;
+			*pos++ = (u8) (rate / 5);
+		}
+	}
+
+	/* insert custom IEs that go before HT */
+	if (ie && ie_len) {
+		static const u8 before_ht[] = {
+			WLAN_EID_SSID,
+			WLAN_EID_SUPP_RATES,
+			WLAN_EID_REQUEST,
+			WLAN_EID_EXT_SUPP_RATES,
+			WLAN_EID_DS_PARAMS,
+			WLAN_EID_SUPPORTED_REGULATORY_CLASSES,
+		};
+		noffset = ieee80211_ie_split(ie, ie_len,
+					     before_ht, ARRAY_SIZE(before_ht),
+					     offset);
+		memcpy(pos, ie + offset, noffset - offset);
+		pos += noffset - offset;
+		offset = noffset;
 	}
 
 	if (sband->ht_cap.ht_supported) {
@@ -936,9 +972,11 @@ int ieee80211_build_preq_ies(struct ieee80211_local *local, u8 *buffer,
 	 * that calculates local->scan_ies_len.
 	 */
 
-	if (ie) {
-		memcpy(pos, ie, ie_len);
-		pos += ie_len;
+	/* add any remaining custom IEs */
+	if (ie && ie_len) {
+		noffset = ie_len;
+		memcpy(pos, ie + offset, noffset - offset);
+		pos += noffset - offset;
 	}
 
 	return pos - buffer;
@@ -1252,3 +1290,59 @@ void ieee80211_recalc_smps(struct ieee80211_local *local,
 	/* changed flag is auto-detected for this */
 	ieee80211_hw_config(local, 0);
 }
+
+static bool ieee80211_id_in_list(const u8 *ids, int n_ids, u8 id)
+{
+	int i;
+
+	for (i = 0; i < n_ids; i++)
+		if (ids[i] == id)
+			return true;
+	return false;
+}
+
+/**
+ * ieee80211_ie_split - split an IE buffer according to ordering
+ *
+ * @ies: the IE buffer
+ * @ielen: the length of the IE buffer
+ * @ids: an array with element IDs that are allowed before
+ *	the split
+ * @n_ids: the size of the element ID array
+ * @offset: offset where to start splitting in the buffer
+ *
+ * This function splits an IE buffer by updating the @offset
+ * variable to point to the location where the buffer should be
+ * split.
+ *
+ * It assumes that the given IE buffer is well-formed, this
+ * has to be guaranteed by the caller!
+ *
+ * It also assumes that the IEs in the buffer are ordered
+ * correctly, if not the result of using this function will not
+ * be ordered correctly either, i.e. it does no reordering.
+ *
+ * The function returns the offset where the next part of the
+ * buffer starts, which may be @ielen if the entire (remainder)
+ * of the buffer should be used.
+ */
+size_t ieee80211_ie_split(const u8 *ies, size_t ielen,
+			  const u8 *ids, int n_ids, size_t offset)
+{
+	size_t pos = offset;
+
+	while (pos < ielen && ieee80211_id_in_list(ids, n_ids, ies[pos]))
+		pos += 2 + ies[pos + 1];
+
+	return pos;
+}
+
+size_t ieee80211_ie_split_vendor(const u8 *ies, size_t ielen, size_t offset)
+{
+	size_t pos = offset;
+
+	while (pos < ielen && ies[pos] != WLAN_EID_VENDOR_SPECIFIC)
+		pos += 2 + ies[pos + 1];
+
+	return pos;
+}
diff --git a/net/mac80211/work.c b/net/mac80211/work.c
index c03c22d5bca3..affdd10b67ad 100644
--- a/net/mac80211/work.c
+++ b/net/mac80211/work.c
@@ -204,6 +204,7 @@ static void ieee80211_send_assoc(struct ieee80211_sub_if_data *sdata,
 	struct ieee80211_mgmt *mgmt;
 	u8 *pos;
 	const u8 *ies;
+	size_t offset = 0, noffset;
 	int i, len, count, rates_len, supp_rates_len;
 	u16 capab;
 	struct ieee80211_supported_band *sband;
@@ -337,14 +338,26 @@ static void ieee80211_send_assoc(struct ieee80211_sub_if_data *sdata,
 		}
 	}
 
-	/*
-	 * XXX:	These IEs could contain (vendor-specified)
-	 *	IEs that belong after HT -- the buffer may
-	 *	need to be split up.
-	 */
+	/* if present, add any custom IEs that go before HT */
 	if (wk->ie_len && wk->ie) {
-		pos = skb_put(skb, wk->ie_len);
-		memcpy(pos, wk->ie, wk->ie_len);
+		static const u8 before_ht[] = {
+			WLAN_EID_SSID,
+			WLAN_EID_SUPP_RATES,
+			WLAN_EID_EXT_SUPP_RATES,
+			WLAN_EID_PWR_CAPABILITY,
+			WLAN_EID_SUPPORTED_CHANNELS,
+			WLAN_EID_RSN,
+			WLAN_EID_QOS_CAPA,
+			WLAN_EID_RRM_ENABLED_CAPABILITIES,
+			WLAN_EID_MOBILITY_DOMAIN,
+			WLAN_EID_SUPPORTED_REGULATORY_CLASSES,
+		};
+		noffset = ieee80211_ie_split(wk->ie, wk->ie_len,
+					     before_ht, ARRAY_SIZE(before_ht),
+					     offset);
+		pos = skb_put(skb, noffset - offset);
+		memcpy(pos, wk->ie + offset, noffset - offset);
+		offset = noffset;
 	}
 
 	if (wk->assoc.use_11n && wk->assoc.wmm_used &&
@@ -352,6 +365,15 @@ static void ieee80211_send_assoc(struct ieee80211_sub_if_data *sdata,
 		ieee80211_add_ht_ie(skb, wk->assoc.ht_information_ie,
 				    sband, wk->chan, wk->assoc.smps);
 
+	/* if present, add any custom non-vendor IEs that go after HT */
+	if (wk->ie_len && wk->ie) {
+		noffset = ieee80211_ie_split_vendor(wk->ie, wk->ie_len,
+						    offset);
+		pos = skb_put(skb, noffset - offset);
+		memcpy(pos, wk->ie + offset, noffset - offset);
+		offset = noffset;
+	}
+
 	if (wk->assoc.wmm_used && local->hw.queues >= 4) {
 		pos = skb_put(skb, 9);
 		*pos++ = WLAN_EID_VENDOR_SPECIFIC;
@@ -365,6 +387,13 @@ static void ieee80211_send_assoc(struct ieee80211_sub_if_data *sdata,
 		*pos++ = 0;
 	}
 
+	/* add any remaining custom (i.e. vendor specific here) IEs */
+	if (wk->ie_len && wk->ie) {
+		noffset = wk->ie_len;
+		pos = skb_put(skb, noffset - offset);
+		memcpy(pos, wk->ie + offset, noffset - offset);
+	}
+
 	IEEE80211_SKB_CB(skb)->flags |= IEEE80211_TX_INTFL_DONT_ENCRYPT;
 	ieee80211_tx_skb(sdata, skb);
 }
-- 
cgit v1.2.2


From 0c1ad2cac1cb54db38fd4cc1822965071ee83f6e Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes@sipsolutions.net>
Date: Wed, 23 Dec 2009 13:15:39 +0100
Subject: mac80211: proper bss private data handling

cfg80211 offers private data for each BSS struct,
which mac80211 uses. However, mac80211 uses internal
and external (cfg80211) BSS pointers interchangeably
and has a hack to put the cfg80211 bss struct into
the private struct.

Remove this hack, properly converting between the
pointers wherever necessary.

Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/cfg.c         |  2 +-
 net/mac80211/ibss.c        | 45 ++++++++++++++++++--------------
 net/mac80211/ieee80211_i.h |  7 ++---
 net/mac80211/main.c        |  4 +--
 net/mac80211/mlme.c        | 64 ++++++++++++++++++++++++----------------------
 net/mac80211/scan.c        | 29 +++++++++++++--------
 net/mac80211/work.c        |  3 +--
 7 files changed, 82 insertions(+), 72 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c
index fdac1bcbfcc0..ea862dfc08ed 100644
--- a/net/mac80211/cfg.c
+++ b/net/mac80211/cfg.c
@@ -1343,7 +1343,7 @@ int __ieee80211_request_smps(struct ieee80211_sub_if_data *sdata,
 		return 0;
 	}
 
-	ap = sdata->u.mgd.associated->cbss.bssid;
+	ap = sdata->u.mgd.associated->bssid;
 
 	if (smps_mode == IEEE80211_SMPS_AUTOMATIC) {
 		if (sdata->u.mgd.powersave)
diff --git a/net/mac80211/ibss.c b/net/mac80211/ibss.c
index 3a61f3ba85c9..621a54c0573a 100644
--- a/net/mac80211/ibss.c
+++ b/net/mac80211/ibss.c
@@ -187,15 +187,17 @@ static void __ieee80211_sta_join_ibss(struct ieee80211_sub_if_data *sdata,
 static void ieee80211_sta_join_ibss(struct ieee80211_sub_if_data *sdata,
 				    struct ieee80211_bss *bss)
 {
+	struct cfg80211_bss *cbss =
+		container_of((void *)bss, struct cfg80211_bss, priv);
 	struct ieee80211_supported_band *sband;
 	u32 basic_rates;
 	int i, j;
-	u16 beacon_int = bss->cbss.beacon_interval;
+	u16 beacon_int = cbss->beacon_interval;
 
 	if (beacon_int < 10)
 		beacon_int = 10;
 
-	sband = sdata->local->hw.wiphy->bands[bss->cbss.channel->band];
+	sband = sdata->local->hw.wiphy->bands[cbss->channel->band];
 
 	basic_rates = 0;
 
@@ -212,12 +214,12 @@ static void ieee80211_sta_join_ibss(struct ieee80211_sub_if_data *sdata,
 		}
 	}
 
-	__ieee80211_sta_join_ibss(sdata, bss->cbss.bssid,
+	__ieee80211_sta_join_ibss(sdata, cbss->bssid,
 				  beacon_int,
-				  bss->cbss.channel,
+				  cbss->channel,
 				  basic_rates,
-				  bss->cbss.capability,
-				  bss->cbss.tsf);
+				  cbss->capability,
+				  cbss->tsf);
 }
 
 static void ieee80211_rx_bss_info(struct ieee80211_sub_if_data *sdata,
@@ -229,6 +231,7 @@ static void ieee80211_rx_bss_info(struct ieee80211_sub_if_data *sdata,
 {
 	struct ieee80211_local *local = sdata->local;
 	int freq;
+	struct cfg80211_bss *cbss;
 	struct ieee80211_bss *bss;
 	struct sta_info *sta;
 	struct ieee80211_channel *channel;
@@ -283,8 +286,10 @@ static void ieee80211_rx_bss_info(struct ieee80211_sub_if_data *sdata,
 	if (!bss)
 		return;
 
+	cbss = container_of((void *)bss, struct cfg80211_bss, priv);
+
 	/* was just updated in ieee80211_bss_info_update */
-	beacon_timestamp = bss->cbss.tsf;
+	beacon_timestamp = cbss->tsf;
 
 	/* check if we need to merge IBSS */
 
@@ -297,11 +302,11 @@ static void ieee80211_rx_bss_info(struct ieee80211_sub_if_data *sdata,
 		goto put_bss;
 
 	/* not an IBSS */
-	if (!(bss->cbss.capability & WLAN_CAPABILITY_IBSS))
+	if (!(cbss->capability & WLAN_CAPABILITY_IBSS))
 		goto put_bss;
 
 	/* different channel */
-	if (bss->cbss.channel != local->oper_channel)
+	if (cbss->channel != local->oper_channel)
 		goto put_bss;
 
 	/* different SSID */
@@ -311,7 +316,7 @@ static void ieee80211_rx_bss_info(struct ieee80211_sub_if_data *sdata,
 		goto put_bss;
 
 	/* same BSSID */
-	if (memcmp(bss->cbss.bssid, sdata->u.ibss.bssid, ETH_ALEN) == 0)
+	if (memcmp(cbss->bssid, sdata->u.ibss.bssid, ETH_ALEN) == 0)
 		goto put_bss;
 
 	if (rx_status->flag & RX_FLAG_TSFT) {
@@ -514,7 +519,7 @@ static void ieee80211_sta_find_ibss(struct ieee80211_sub_if_data *sdata)
 {
 	struct ieee80211_if_ibss *ifibss = &sdata->u.ibss;
 	struct ieee80211_local *local = sdata->local;
-	struct ieee80211_bss *bss;
+	struct cfg80211_bss *cbss;
 	struct ieee80211_channel *chan = NULL;
 	const u8 *bssid = NULL;
 	int active_ibss;
@@ -538,21 +543,23 @@ static void ieee80211_sta_find_ibss(struct ieee80211_sub_if_data *sdata)
 		chan = ifibss->channel;
 	if (!is_zero_ether_addr(ifibss->bssid))
 		bssid = ifibss->bssid;
-	bss = (void *)cfg80211_get_bss(local->hw.wiphy, chan, bssid,
-				       ifibss->ssid, ifibss->ssid_len,
-				       WLAN_CAPABILITY_IBSS |
-				       WLAN_CAPABILITY_PRIVACY,
-				       capability);
+	cbss = cfg80211_get_bss(local->hw.wiphy, chan, bssid,
+				ifibss->ssid, ifibss->ssid_len,
+				WLAN_CAPABILITY_IBSS | WLAN_CAPABILITY_PRIVACY,
+				capability);
+
+	if (cbss) {
+		struct ieee80211_bss *bss;
 
-	if (bss) {
+		bss = (void *)cbss->priv;
 #ifdef CONFIG_MAC80211_IBSS_DEBUG
 		printk(KERN_DEBUG "   sta_find_ibss: selected %pM current "
-		       "%pM\n", bss->cbss.bssid, ifibss->bssid);
+		       "%pM\n", cbss->bssid, ifibss->bssid);
 #endif /* CONFIG_MAC80211_IBSS_DEBUG */
 
 		printk(KERN_DEBUG "%s: Selected IBSS BSSID %pM"
 		       " based on configured SSID\n",
-		       sdata->name, bss->cbss.bssid);
+		       sdata->name, cbss->bssid);
 
 		ieee80211_sta_join_ibss(sdata, bss);
 		ieee80211_rx_bss_put(local, bss);
diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index 6ea4ffbf84d8..de068ad6223b 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -71,9 +71,6 @@ struct ieee80211_fragment_entry {
 
 
 struct ieee80211_bss {
-	/* Yes, this is a hack */
-	struct cfg80211_bss cbss;
-
 	/* don't want to look up all the time */
 	size_t ssid_len;
 	u8 ssid[IEEE80211_MAX_SSID_LEN];
@@ -274,7 +271,7 @@ struct ieee80211_work {
 			bool privacy;
 		} probe_auth;
 		struct {
-			struct ieee80211_bss *bss;
+			struct cfg80211_bss *bss;
 			const u8 *supp_rates;
 			const u8 *ht_information_ie;
 			enum ieee80211_smps_mode smps;
@@ -317,7 +314,7 @@ struct ieee80211_if_managed {
 	int probe_send_count;
 
 	struct mutex mtx;
-	struct ieee80211_bss *associated;
+	struct cfg80211_bss *associated;
 
 	u8 bssid[ETH_ALEN];
 
diff --git a/net/mac80211/main.c b/net/mac80211/main.c
index d35023ce7fa1..5fcd3548417e 100644
--- a/net/mac80211/main.c
+++ b/net/mac80211/main.c
@@ -359,9 +359,7 @@ struct ieee80211_hw *ieee80211_alloc_hw(size_t priv_data_len,
 			WIPHY_FLAG_4ADDR_STATION;
 	wiphy->privid = mac80211_wiphy_privid;
 
-	/* Yes, putting cfg80211_bss into ieee80211_bss is a hack */
-	wiphy->bss_priv_size = sizeof(struct ieee80211_bss) -
-			       sizeof(struct cfg80211_bss);
+	wiphy->bss_priv_size = sizeof(struct ieee80211_bss);
 
 	local = wiphy_priv(wiphy);
 
diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index ea434b5a779e..e44f1ed0b0da 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -330,7 +330,7 @@ static void ieee80211_chswitch_work(struct work_struct *work)
 	ieee80211_hw_config(sdata->local, IEEE80211_CONF_CHANGE_CHANNEL);
 
 	/* XXX: shouldn't really modify cfg80211-owned data! */
-	ifmgd->associated->cbss.channel = sdata->local->oper_channel;
+	ifmgd->associated->channel = sdata->local->oper_channel;
 
 	ieee80211_wake_queues_by_reason(&sdata->local->hw,
 					IEEE80211_QUEUE_STOP_REASON_CSA);
@@ -357,6 +357,8 @@ void ieee80211_sta_process_chanswitch(struct ieee80211_sub_if_data *sdata,
 				      struct ieee80211_channel_sw_ie *sw_elem,
 				      struct ieee80211_bss *bss)
 {
+	struct cfg80211_bss *cbss =
+		container_of((void *)bss, struct cfg80211_bss, priv);
 	struct ieee80211_channel *new_ch;
 	struct ieee80211_if_managed *ifmgd = &sdata->u.mgd;
 	int new_freq = ieee80211_channel_to_frequency(sw_elem->new_ch_num);
@@ -390,7 +392,7 @@ void ieee80211_sta_process_chanswitch(struct ieee80211_sub_if_data *sdata,
 		mod_timer(&ifmgd->chswitch_timer,
 			  jiffies +
 			  msecs_to_jiffies(sw_elem->count *
-					   bss->cbss.beacon_interval));
+					   cbss->beacon_interval));
 	}
 }
 
@@ -670,23 +672,24 @@ static u32 ieee80211_handle_bss_capability(struct ieee80211_sub_if_data *sdata,
 }
 
 static void ieee80211_set_associated(struct ieee80211_sub_if_data *sdata,
-				     struct ieee80211_bss *bss,
+				     struct cfg80211_bss *cbss,
 				     u32 bss_info_changed)
 {
+	struct ieee80211_bss *bss = (void *)cbss->priv;
 	struct ieee80211_local *local = sdata->local;
 
 	bss_info_changed |= BSS_CHANGED_ASSOC;
 	/* set timing information */
-	sdata->vif.bss_conf.beacon_int = bss->cbss.beacon_interval;
-	sdata->vif.bss_conf.timestamp = bss->cbss.tsf;
+	sdata->vif.bss_conf.beacon_int = cbss->beacon_interval;
+	sdata->vif.bss_conf.timestamp = cbss->tsf;
 	sdata->vif.bss_conf.dtim_period = bss->dtim_period;
 
 	bss_info_changed |= BSS_CHANGED_BEACON_INT;
 	bss_info_changed |= ieee80211_handle_bss_capability(sdata,
-		bss->cbss.capability, bss->has_erp_value, bss->erp_value);
+		cbss->capability, bss->has_erp_value, bss->erp_value);
 
-	sdata->u.mgd.associated = bss;
-	memcpy(sdata->u.mgd.bssid, bss->cbss.bssid, ETH_ALEN);
+	sdata->u.mgd.associated = cbss;
+	memcpy(sdata->u.mgd.bssid, cbss->bssid, ETH_ALEN);
 
 	/* just to be sure */
 	sdata->u.mgd.flags &= ~(IEEE80211_STA_CONNECTION_POLL |
@@ -737,7 +740,7 @@ static void ieee80211_set_disassoc(struct ieee80211_sub_if_data *sdata)
 	if (WARN_ON(!ifmgd->associated))
 		return;
 
-	memcpy(bssid, ifmgd->associated->cbss.bssid, ETH_ALEN);
+	memcpy(bssid, ifmgd->associated->bssid, ETH_ALEN);
 
 	ifmgd->associated = NULL;
 	memset(ifmgd->bssid, 0, ETH_ALEN);
@@ -833,8 +836,8 @@ static void ieee80211_mgd_probe_ap_send(struct ieee80211_sub_if_data *sdata)
 	struct ieee80211_if_managed *ifmgd = &sdata->u.mgd;
 	const u8 *ssid;
 
-	ssid = ieee80211_bss_get_ie(&ifmgd->associated->cbss, WLAN_EID_SSID);
-	ieee80211_send_probe_req(sdata, ifmgd->associated->cbss.bssid,
+	ssid = ieee80211_bss_get_ie(ifmgd->associated, WLAN_EID_SSID);
+	ieee80211_send_probe_req(sdata, ifmgd->associated->bssid,
 				 ssid + 2, ssid[1], NULL, 0);
 
 	ifmgd->probe_send_count++;
@@ -928,7 +931,7 @@ ieee80211_rx_mgmt_deauth(struct ieee80211_sub_if_data *sdata,
 
 	ASSERT_MGD_MTX(ifmgd);
 
-	bssid = ifmgd->associated->cbss.bssid;
+	bssid = ifmgd->associated->bssid;
 
 	reason_code = le16_to_cpu(mgmt->u.deauth.reason_code);
 
@@ -957,7 +960,7 @@ ieee80211_rx_mgmt_disassoc(struct ieee80211_sub_if_data *sdata,
 	if (WARN_ON(!ifmgd->associated))
 		return RX_MGMT_NONE;
 
-	if (WARN_ON(memcmp(ifmgd->associated->cbss.bssid, mgmt->sa, ETH_ALEN)))
+	if (WARN_ON(memcmp(ifmgd->associated->bssid, mgmt->sa, ETH_ALEN)))
 		return RX_MGMT_NONE;
 
 	reason_code = le16_to_cpu(mgmt->u.disassoc.reason_code);
@@ -979,7 +982,7 @@ static bool ieee80211_assoc_success(struct ieee80211_work *wk,
 	struct ieee80211_local *local = sdata->local;
 	struct ieee80211_supported_band *sband;
 	struct sta_info *sta;
-	struct ieee80211_bss *bss = wk->assoc.bss;
+	struct cfg80211_bss *cbss = wk->assoc.bss;
 	u8 *pos;
 	u32 rates, basic_rates;
 	u16 capab_info, aid;
@@ -1011,7 +1014,7 @@ static bool ieee80211_assoc_success(struct ieee80211_work *wk,
 
 	ifmgd->aid = aid;
 
-	sta = sta_info_alloc(sdata, bss->cbss.bssid, GFP_KERNEL);
+	sta = sta_info_alloc(sdata, cbss->bssid, GFP_KERNEL);
 	if (!sta) {
 		printk(KERN_DEBUG "%s: failed to alloc STA entry for"
 		       " the AP\n", sdata->name);
@@ -1103,14 +1106,13 @@ static bool ieee80211_assoc_success(struct ieee80211_work *wk,
 	    (sdata->local->hw.queues >= 4) &&
 	    !(ifmgd->flags & IEEE80211_STA_DISABLE_11N))
 		changed |= ieee80211_enable_ht(sdata, elems.ht_info_elem,
-					       bss->cbss.bssid,
-					       ap_ht_cap_flags);
+					       cbss->bssid, ap_ht_cap_flags);
 
 	/* set AID and assoc capability,
 	 * ieee80211_set_associated() will tell the driver */
 	bss_conf->aid = aid;
 	bss_conf->assoc_capability = capab_info;
-	ieee80211_set_associated(sdata, bss, changed);
+	ieee80211_set_associated(sdata, cbss, changed);
 
 	/*
 	 * Start timer to probe the connection to the AP now.
@@ -1154,7 +1156,7 @@ static void ieee80211_rx_bss_info(struct ieee80211_sub_if_data *sdata,
 		return;
 
 	if (elems->ch_switch_elem && (elems->ch_switch_elem_len == 3) &&
-	    (memcmp(mgmt->bssid, sdata->u.mgd.associated->cbss.bssid,
+	    (memcmp(mgmt->bssid, sdata->u.mgd.associated->bssid,
 							ETH_ALEN) == 0)) {
 		struct ieee80211_channel_sw_ie *sw_elem =
 			(struct ieee80211_channel_sw_ie *)elems->ch_switch_elem;
@@ -1189,7 +1191,7 @@ static void ieee80211_rx_mgmt_probe_resp(struct ieee80211_sub_if_data *sdata,
 	ieee80211_rx_bss_info(sdata, mgmt, len, rx_status, &elems, false);
 
 	if (ifmgd->associated &&
-	    memcmp(mgmt->bssid, ifmgd->associated->cbss.bssid, ETH_ALEN) == 0 &&
+	    memcmp(mgmt->bssid, ifmgd->associated->bssid, ETH_ALEN) == 0 &&
 	    ifmgd->flags & (IEEE80211_STA_BEACON_POLL |
 			    IEEE80211_STA_CONNECTION_POLL)) {
 		ifmgd->flags &= ~(IEEE80211_STA_CONNECTION_POLL |
@@ -1262,7 +1264,7 @@ static void ieee80211_rx_mgmt_beacon(struct ieee80211_sub_if_data *sdata,
 	if (!ifmgd->associated)
 		return;
 
-	bssid = ifmgd->associated->cbss.bssid;
+	bssid = ifmgd->associated->bssid;
 
 	/*
 	 * And in theory even frames from a different AP we were just
@@ -1428,8 +1430,7 @@ static void ieee80211_sta_rx_queued_mgmt(struct ieee80211_sub_if_data *sdata,
 	mutex_lock(&ifmgd->mtx);
 
 	if (ifmgd->associated &&
-	    memcmp(ifmgd->associated->cbss.bssid, mgmt->bssid,
-							ETH_ALEN) == 0) {
+	    memcmp(ifmgd->associated->bssid, mgmt->bssid, ETH_ALEN) == 0) {
 		switch (fc & IEEE80211_FCTL_STYPE) {
 		case IEEE80211_STYPE_BEACON:
 			ieee80211_rx_mgmt_beacon(sdata, mgmt, skb->len,
@@ -1448,7 +1449,7 @@ static void ieee80211_sta_rx_queued_mgmt(struct ieee80211_sub_if_data *sdata,
 			/* XXX: differentiate, can only happen for CSA now! */
 			ieee80211_sta_process_chanswitch(sdata,
 					&mgmt->u.action.u.chan_switch.sw_elem,
-					ifmgd->associated);
+					(void *)ifmgd->associated->priv);
 			break;
 		}
 		mutex_unlock(&ifmgd->mtx);
@@ -1533,7 +1534,7 @@ static void ieee80211_sta_work(struct work_struct *work)
 	    ifmgd->associated) {
 		u8 bssid[ETH_ALEN];
 
-		memcpy(bssid, ifmgd->associated->cbss.bssid, ETH_ALEN);
+		memcpy(bssid, ifmgd->associated->bssid, ETH_ALEN);
 		if (time_is_after_jiffies(ifmgd->probe_timeout))
 			run_again(ifmgd, ifmgd->probe_timeout);
 
@@ -1840,6 +1841,7 @@ int ieee80211_mgd_assoc(struct ieee80211_sub_if_data *sdata,
 			struct cfg80211_assoc_request *req)
 {
 	struct ieee80211_if_managed *ifmgd = &sdata->u.mgd;
+	struct ieee80211_bss *bss = (void *)req->bss->priv;
 	struct ieee80211_work *wk;
 	const u8 *ssid;
 	int i;
@@ -1870,7 +1872,7 @@ int ieee80211_mgd_assoc(struct ieee80211_sub_if_data *sdata,
 	} else
 		wk->ie_len = 0;
 
-	wk->assoc.bss = (void *)req->bss;
+	wk->assoc.bss = req->bss;
 
 	memcpy(wk->filter_ta, req->bss->bssid, ETH_ALEN);
 
@@ -1893,9 +1895,9 @@ int ieee80211_mgd_assoc(struct ieee80211_sub_if_data *sdata,
 	 */
 	wk->assoc.use_11n = !(ifmgd->flags & IEEE80211_STA_DISABLE_11N);
 	wk->assoc.capability = req->bss->capability;
-	wk->assoc.wmm_used = wk->assoc.bss->wmm_used;
-	wk->assoc.supp_rates = wk->assoc.bss->supp_rates;
-	wk->assoc.supp_rates_len = wk->assoc.bss->supp_rates_len;
+	wk->assoc.wmm_used = bss->wmm_used;
+	wk->assoc.supp_rates = bss->supp_rates;
+	wk->assoc.supp_rates_len = bss->supp_rates_len;
 	wk->assoc.ht_information_ie =
 		ieee80211_bss_get_ie(req->bss, WLAN_EID_HT_INFORMATION);
 
@@ -1942,7 +1944,7 @@ int ieee80211_mgd_deauth(struct ieee80211_sub_if_data *sdata,
 
 	mutex_lock(&ifmgd->mtx);
 
-	if (ifmgd->associated && &ifmgd->associated->cbss == req->bss) {
+	if (ifmgd->associated == req->bss) {
 		bssid = req->bss->bssid;
 		ieee80211_set_disassoc(sdata);
 		mutex_unlock(&ifmgd->mtx);
@@ -2004,7 +2006,7 @@ int ieee80211_mgd_disassoc(struct ieee80211_sub_if_data *sdata,
 	 * to cfg80211 while that's in a locked section already
 	 * trying to tell us that the user wants to disconnect.
 	 */
-	if (&ifmgd->associated->cbss != req->bss) {
+	if (ifmgd->associated != req->bss) {
 		mutex_unlock(&ifmgd->mtx);
 		return -ENOLINK;
 	}
diff --git a/net/mac80211/scan.c b/net/mac80211/scan.c
index fb89e4c0fbfd..2a2d7f6005af 100644
--- a/net/mac80211/scan.c
+++ b/net/mac80211/scan.c
@@ -29,16 +29,19 @@ struct ieee80211_bss *
 ieee80211_rx_bss_get(struct ieee80211_local *local, u8 *bssid, int freq,
 		     u8 *ssid, u8 ssid_len)
 {
-	return (void *)cfg80211_get_bss(local->hw.wiphy,
-					ieee80211_get_channel(local->hw.wiphy,
-							      freq),
-					bssid, ssid, ssid_len,
-					0, 0);
+	struct cfg80211_bss *cbss;
+
+	cbss = cfg80211_get_bss(local->hw.wiphy,
+				ieee80211_get_channel(local->hw.wiphy, freq),
+				bssid, ssid, ssid_len, 0, 0);
+	if (!cbss)
+		return NULL;
+	return (void *)cbss->priv;
 }
 
 static void ieee80211_rx_bss_free(struct cfg80211_bss *cbss)
 {
-	struct ieee80211_bss *bss = (void *)cbss;
+	struct ieee80211_bss *bss = (void *)cbss->priv;
 
 	kfree(bss_mesh_id(bss));
 	kfree(bss_mesh_cfg(bss));
@@ -47,7 +50,9 @@ static void ieee80211_rx_bss_free(struct cfg80211_bss *cbss)
 void ieee80211_rx_bss_put(struct ieee80211_local *local,
 			  struct ieee80211_bss *bss)
 {
-	cfg80211_put_bss((struct cfg80211_bss *)bss);
+	if (!bss)
+		return;
+	cfg80211_put_bss(container_of((void *)bss, struct cfg80211_bss, priv));
 }
 
 struct ieee80211_bss *
@@ -59,6 +64,7 @@ ieee80211_bss_info_update(struct ieee80211_local *local,
 			  struct ieee80211_channel *channel,
 			  bool beacon)
 {
+	struct cfg80211_bss *cbss;
 	struct ieee80211_bss *bss;
 	int clen;
 	s32 signal = 0;
@@ -68,13 +74,14 @@ ieee80211_bss_info_update(struct ieee80211_local *local,
 	else if (local->hw.flags & IEEE80211_HW_SIGNAL_UNSPEC)
 		signal = (rx_status->signal * 100) / local->hw.max_signal;
 
-	bss = (void *)cfg80211_inform_bss_frame(local->hw.wiphy, channel,
-						mgmt, len, signal, GFP_ATOMIC);
+	cbss = cfg80211_inform_bss_frame(local->hw.wiphy, channel,
+					 mgmt, len, signal, GFP_ATOMIC);
 
-	if (!bss)
+	if (!cbss)
 		return NULL;
 
-	bss->cbss.free_priv = ieee80211_rx_bss_free;
+	cbss->free_priv = ieee80211_rx_bss_free;
+	bss = (void *)cbss->priv;
 
 	/* save the ERP value so that it is available at association time */
 	if (elems->erp_info && elems->erp_info_len >= 1) {
diff --git a/net/mac80211/work.c b/net/mac80211/work.c
index affdd10b67ad..0b8c31c600aa 100644
--- a/net/mac80211/work.c
+++ b/net/mac80211/work.c
@@ -517,8 +517,7 @@ ieee80211_associate(struct ieee80211_work *wk)
 		 * bss struct for that AP.
 		 */
 		if (wk->assoc.bss)
-			cfg80211_unlink_bss(local->hw.wiphy,
-					    &wk->assoc.bss->cbss);
+			cfg80211_unlink_bss(local->hw.wiphy, wk->assoc.bss);
 
 		/*
 		 * We might have a pending scan which had no chance to run yet
-- 
cgit v1.2.2


From b203ffc3a447eb8d9e6120b783ddee081b143061 Mon Sep 17 00:00:00 2001
From: Jouni Malinen <jouni.malinen@atheros.com>
Date: Wed, 23 Dec 2009 13:15:40 +0100
Subject: mac80211: Generalize off-channel operation helpers from scan code

The off-channel operations for going into power save mode (station
mode) or stop beaconing (AP/IBSS) are not limited to scanning. Move
these into a separate file and allow them to be used for other
purposes, too.

Signed-off-by: Jouni Malinen <jouni.malinen@atheros.com>
Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/Makefile      |   2 +-
 net/mac80211/ieee80211_i.h |   8 ++-
 net/mac80211/offchannel.c  | 164 +++++++++++++++++++++++++++++++++++++++++++++
 net/mac80211/scan.c        | 156 ++----------------------------------------
 4 files changed, 178 insertions(+), 152 deletions(-)
 create mode 100644 net/mac80211/offchannel.c

(limited to 'net')

diff --git a/net/mac80211/Makefile b/net/mac80211/Makefile
index 5a1f57df7cd6..04420291e7ad 100644
--- a/net/mac80211/Makefile
+++ b/net/mac80211/Makefile
@@ -6,7 +6,7 @@ mac80211-y := \
 	sta_info.o \
 	wep.o \
 	wpa.o \
-	scan.o \
+	scan.o offchannel.o \
 	ht.o agg-tx.o agg-rx.o \
 	ibss.o \
 	mlme.o work.o \
diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index de068ad6223b..fd912eb5309e 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -786,7 +786,7 @@ struct ieee80211_local {
 	unsigned int wmm_acm; /* bit field of ACM bits (BIT(802.1D tag)) */
 
 	bool pspolling;
-	bool scan_ps_enabled;
+	bool offchannel_ps_enabled;
 	/*
 	 * PS can only be enabled when we have exactly one managed
 	 * interface (and monitors) in PS, this then points there.
@@ -981,6 +981,12 @@ ieee80211_rx_bss_get(struct ieee80211_local *local, u8 *bssid, int freq,
 void ieee80211_rx_bss_put(struct ieee80211_local *local,
 			  struct ieee80211_bss *bss);
 
+/* off-channel helpers */
+void ieee80211_offchannel_stop_beaconing(struct ieee80211_local *local);
+void ieee80211_offchannel_stop_station(struct ieee80211_local *local);
+void ieee80211_offchannel_return(struct ieee80211_local *local,
+				 bool enable_beaconing);
+
 /* interface handling */
 int ieee80211_iface_init(void);
 void ieee80211_iface_exit(void);
diff --git a/net/mac80211/offchannel.c b/net/mac80211/offchannel.c
new file mode 100644
index 000000000000..2cd880e444d1
--- /dev/null
+++ b/net/mac80211/offchannel.c
@@ -0,0 +1,164 @@
+/*
+ * Off-channel operation helpers
+ *
+ * Copyright 2003, Jouni Malinen <jkmaline@cc.hut.fi>
+ * Copyright 2004, Instant802 Networks, Inc.
+ * Copyright 2005, Devicescape Software, Inc.
+ * Copyright 2006-2007	Jiri Benc <jbenc@suse.cz>
+ * Copyright 2007, Michael Wu <flamingice@sourmilk.net>
+ * Copyright 2009	Johannes Berg <johannes@sipsolutions.net>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <net/mac80211.h>
+#include "ieee80211_i.h"
+
+/*
+ * inform AP that we will go to sleep so that it will buffer the frames
+ * while we scan
+ */
+static void ieee80211_offchannel_ps_enable(struct ieee80211_sub_if_data *sdata)
+{
+	struct ieee80211_local *local = sdata->local;
+
+	local->offchannel_ps_enabled = false;
+
+	/* FIXME: what to do when local->pspolling is true? */
+
+	del_timer_sync(&local->dynamic_ps_timer);
+	cancel_work_sync(&local->dynamic_ps_enable_work);
+
+	if (local->hw.conf.flags & IEEE80211_CONF_PS) {
+		local->offchannel_ps_enabled = true;
+		local->hw.conf.flags &= ~IEEE80211_CONF_PS;
+		ieee80211_hw_config(local, IEEE80211_CONF_CHANGE_PS);
+	}
+
+	if (!(local->offchannel_ps_enabled) ||
+	    !(local->hw.flags & IEEE80211_HW_PS_NULLFUNC_STACK))
+		/*
+		 * If power save was enabled, no need to send a nullfunc
+		 * frame because AP knows that we are sleeping. But if the
+		 * hardware is creating the nullfunc frame for power save
+		 * status (ie. IEEE80211_HW_PS_NULLFUNC_STACK is not
+		 * enabled) and power save was enabled, the firmware just
+		 * sent a null frame with power save disabled. So we need
+		 * to send a new nullfunc frame to inform the AP that we
+		 * are again sleeping.
+		 */
+		ieee80211_send_nullfunc(local, sdata, 1);
+}
+
+/* inform AP that we are awake again, unless power save is enabled */
+static void ieee80211_offchannel_ps_disable(struct ieee80211_sub_if_data *sdata)
+{
+	struct ieee80211_local *local = sdata->local;
+
+	if (!local->ps_sdata)
+		ieee80211_send_nullfunc(local, sdata, 0);
+	else if (local->offchannel_ps_enabled) {
+		/*
+		 * In !IEEE80211_HW_PS_NULLFUNC_STACK case the hardware
+		 * will send a nullfunc frame with the powersave bit set
+		 * even though the AP already knows that we are sleeping.
+		 * This could be avoided by sending a null frame with power
+		 * save bit disabled before enabling the power save, but
+		 * this doesn't gain anything.
+		 *
+		 * When IEEE80211_HW_PS_NULLFUNC_STACK is enabled, no need
+		 * to send a nullfunc frame because AP already knows that
+		 * we are sleeping, let's just enable power save mode in
+		 * hardware.
+		 */
+		local->hw.conf.flags |= IEEE80211_CONF_PS;
+		ieee80211_hw_config(local, IEEE80211_CONF_CHANGE_PS);
+	} else if (local->hw.conf.dynamic_ps_timeout > 0) {
+		/*
+		 * If IEEE80211_CONF_PS was not set and the dynamic_ps_timer
+		 * had been running before leaving the operating channel,
+		 * restart the timer now and send a nullfunc frame to inform
+		 * the AP that we are awake.
+		 */
+		ieee80211_send_nullfunc(local, sdata, 0);
+		mod_timer(&local->dynamic_ps_timer, jiffies +
+			  msecs_to_jiffies(local->hw.conf.dynamic_ps_timeout));
+	}
+}
+
+void ieee80211_offchannel_stop_beaconing(struct ieee80211_local *local)
+{
+	struct ieee80211_sub_if_data *sdata;
+
+	mutex_lock(&local->iflist_mtx);
+	list_for_each_entry(sdata, &local->interfaces, list) {
+		if (!ieee80211_sdata_running(sdata))
+			continue;
+
+		/* disable beaconing */
+		if (sdata->vif.type == NL80211_IFTYPE_AP ||
+		    sdata->vif.type == NL80211_IFTYPE_ADHOC ||
+		    sdata->vif.type == NL80211_IFTYPE_MESH_POINT)
+			ieee80211_bss_info_change_notify(
+				sdata, BSS_CHANGED_BEACON_ENABLED);
+
+		/*
+		 * only handle non-STA interfaces here, STA interfaces
+		 * are handled in ieee80211_offchannel_stop_station(),
+		 * e.g., from the background scan state machine
+		 */
+		if (sdata->vif.type != NL80211_IFTYPE_STATION)
+			netif_stop_queue(sdata->dev);
+	}
+	mutex_unlock(&local->iflist_mtx);
+}
+
+void ieee80211_offchannel_stop_station(struct ieee80211_local *local)
+{
+	struct ieee80211_sub_if_data *sdata;
+
+	/*
+	 * notify the AP about us leaving the channel and stop all STA interfaces
+	 */
+	mutex_lock(&local->iflist_mtx);
+	list_for_each_entry(sdata, &local->interfaces, list) {
+		if (!ieee80211_sdata_running(sdata))
+			continue;
+
+		if (sdata->vif.type == NL80211_IFTYPE_STATION) {
+			netif_stop_queue(sdata->dev);
+			if (sdata->u.mgd.associated)
+				ieee80211_offchannel_ps_enable(sdata);
+		}
+	}
+	mutex_unlock(&local->iflist_mtx);
+}
+
+void ieee80211_offchannel_return(struct ieee80211_local *local,
+				 bool enable_beaconing)
+{
+	struct ieee80211_sub_if_data *sdata;
+
+	mutex_lock(&local->iflist_mtx);
+	list_for_each_entry(sdata, &local->interfaces, list) {
+		if (!ieee80211_sdata_running(sdata))
+			continue;
+
+		/* Tell AP we're back */
+		if (sdata->vif.type == NL80211_IFTYPE_STATION) {
+			if (sdata->u.mgd.associated)
+				ieee80211_offchannel_ps_disable(sdata);
+			netif_wake_queue(sdata->dev);
+		}
+
+		/* re-enable beaconing */
+		if (enable_beaconing &&
+		    (sdata->vif.type == NL80211_IFTYPE_AP ||
+		     sdata->vif.type == NL80211_IFTYPE_ADHOC ||
+		     sdata->vif.type == NL80211_IFTYPE_MESH_POINT))
+			ieee80211_bss_info_change_notify(
+				sdata, BSS_CHANGED_BEACON_ENABLED);
+	}
+	mutex_unlock(&local->iflist_mtx);
+}
diff --git a/net/mac80211/scan.c b/net/mac80211/scan.c
index 2a2d7f6005af..365f40975511 100644
--- a/net/mac80211/scan.c
+++ b/net/mac80211/scan.c
@@ -227,82 +227,9 @@ static bool ieee80211_prep_hw_scan(struct ieee80211_local *local)
 	return true;
 }
 
-/*
- * inform AP that we will go to sleep so that it will buffer the frames
- * while we scan
- */
-static void ieee80211_scan_ps_enable(struct ieee80211_sub_if_data *sdata)
-{
-	struct ieee80211_local *local = sdata->local;
-
-	local->scan_ps_enabled = false;
-
-	/* FIXME: what to do when local->pspolling is true? */
-
-	del_timer_sync(&local->dynamic_ps_timer);
-	cancel_work_sync(&local->dynamic_ps_enable_work);
-
-	if (local->hw.conf.flags & IEEE80211_CONF_PS) {
-		local->scan_ps_enabled = true;
-		local->hw.conf.flags &= ~IEEE80211_CONF_PS;
-		ieee80211_hw_config(local, IEEE80211_CONF_CHANGE_PS);
-	}
-
-	if (!(local->scan_ps_enabled) ||
-	    !(local->hw.flags & IEEE80211_HW_PS_NULLFUNC_STACK))
-		/*
-		 * If power save was enabled, no need to send a nullfunc
-		 * frame because AP knows that we are sleeping. But if the
-		 * hardware is creating the nullfunc frame for power save
-		 * status (ie. IEEE80211_HW_PS_NULLFUNC_STACK is not
-		 * enabled) and power save was enabled, the firmware just
-		 * sent a null frame with power save disabled. So we need
-		 * to send a new nullfunc frame to inform the AP that we
-		 * are again sleeping.
-		 */
-		ieee80211_send_nullfunc(local, sdata, 1);
-}
-
-/* inform AP that we are awake again, unless power save is enabled */
-static void ieee80211_scan_ps_disable(struct ieee80211_sub_if_data *sdata)
-{
-	struct ieee80211_local *local = sdata->local;
-
-	if (!local->ps_sdata)
-		ieee80211_send_nullfunc(local, sdata, 0);
-	else if (local->scan_ps_enabled) {
-		/*
-		 * In !IEEE80211_HW_PS_NULLFUNC_STACK case the hardware
-		 * will send a nullfunc frame with the powersave bit set
-		 * even though the AP already knows that we are sleeping.
-		 * This could be avoided by sending a null frame with power
-		 * save bit disabled before enabling the power save, but
-		 * this doesn't gain anything.
-		 *
-		 * When IEEE80211_HW_PS_NULLFUNC_STACK is enabled, no need
-		 * to send a nullfunc frame because AP already knows that
-		 * we are sleeping, let's just enable power save mode in
-		 * hardware.
-		 */
-		local->hw.conf.flags |= IEEE80211_CONF_PS;
-		ieee80211_hw_config(local, IEEE80211_CONF_CHANGE_PS);
-	} else if (local->hw.conf.dynamic_ps_timeout > 0) {
-		/*
-		 * If IEEE80211_CONF_PS was not set and the dynamic_ps_timer
-		 * had been running before leaving the operating channel,
-		 * restart the timer now and send a nullfunc frame to inform
-		 * the AP that we are awake.
-		 */
-		ieee80211_send_nullfunc(local, sdata, 0);
-		mod_timer(&local->dynamic_ps_timer, jiffies +
-			  msecs_to_jiffies(local->hw.conf.dynamic_ps_timeout));
-	}
-}
-
 void ieee80211_scan_completed(struct ieee80211_hw *hw, bool aborted)
 {
 	struct ieee80211_local *local = hw_to_local(hw);
-	struct ieee80211_sub_if_data *sdata;
 	bool was_hw_scan;
 
 	mutex_lock(&local->scan_mtx);
@@ -351,28 +278,7 @@ void ieee80211_scan_completed(struct ieee80211_hw *hw, bool aborted)
 
 	drv_sw_scan_complete(local);
 
-	mutex_lock(&local->iflist_mtx);
-	list_for_each_entry(sdata, &local->interfaces, list) {
-		if (!ieee80211_sdata_running(sdata))
-			continue;
-
-		/* Tell AP we're back */
-		if (sdata->vif.type == NL80211_IFTYPE_STATION) {
-			if (sdata->u.mgd.associated) {
-				ieee80211_scan_ps_disable(sdata);
-				netif_wake_queue(sdata->dev);
-			}
-		} else
-			netif_wake_queue(sdata->dev);
-
-		/* re-enable beaconing */
-		if (sdata->vif.type == NL80211_IFTYPE_AP ||
-		    sdata->vif.type == NL80211_IFTYPE_ADHOC ||
-		    sdata->vif.type == NL80211_IFTYPE_MESH_POINT)
-			ieee80211_bss_info_change_notify(
-				sdata, BSS_CHANGED_BEACON_ENABLED);
-	}
-	mutex_unlock(&local->iflist_mtx);
+	ieee80211_offchannel_return(local, true);
 
  done:
 	ieee80211_recalc_idle(local);
@@ -384,8 +290,6 @@ EXPORT_SYMBOL(ieee80211_scan_completed);
 
 static int ieee80211_start_sw_scan(struct ieee80211_local *local)
 {
-	struct ieee80211_sub_if_data *sdata;
-
 	/*
 	 * Hardware/driver doesn't support hw_scan, so use software
 	 * scanning instead. First send a nullfunc frame with power save
@@ -401,26 +305,7 @@ static int ieee80211_start_sw_scan(struct ieee80211_local *local)
 	 */
 	drv_sw_scan_start(local);
 
-	mutex_lock(&local->iflist_mtx);
-	list_for_each_entry(sdata, &local->interfaces, list) {
-		if (!ieee80211_sdata_running(sdata))
-			continue;
-
-		/* disable beaconing */
-		if (sdata->vif.type == NL80211_IFTYPE_AP ||
-		    sdata->vif.type == NL80211_IFTYPE_ADHOC ||
-		    sdata->vif.type == NL80211_IFTYPE_MESH_POINT)
-			ieee80211_bss_info_change_notify(
-				sdata, BSS_CHANGED_BEACON_ENABLED);
-
-		/*
-		 * only handle non-STA interfaces here, STA interfaces
-		 * are handled in the scan state machine
-		 */
-		if (sdata->vif.type != NL80211_IFTYPE_STATION)
-			netif_stop_queue(sdata->dev);
-	}
-	mutex_unlock(&local->iflist_mtx);
+	ieee80211_offchannel_stop_beaconing(local);
 
 	local->next_scan_state = SCAN_DECISION;
 	local->scan_channel_idx = 0;
@@ -568,23 +453,7 @@ static int ieee80211_scan_state_decision(struct ieee80211_local *local,
 static void ieee80211_scan_state_leave_oper_channel(struct ieee80211_local *local,
 						    unsigned long *next_delay)
 {
-	struct ieee80211_sub_if_data *sdata;
-
-	/*
-	 * notify the AP about us leaving the channel and stop all STA interfaces
-	 */
-	mutex_lock(&local->iflist_mtx);
-	list_for_each_entry(sdata, &local->interfaces, list) {
-		if (!ieee80211_sdata_running(sdata))
-			continue;
-
-		if (sdata->vif.type == NL80211_IFTYPE_STATION) {
-			netif_stop_queue(sdata->dev);
-			if (sdata->u.mgd.associated)
-				ieee80211_scan_ps_enable(sdata);
-		}
-	}
-	mutex_unlock(&local->iflist_mtx);
+	ieee80211_offchannel_stop_station(local);
 
 	__set_bit(SCAN_OFF_CHANNEL, &local->scanning);
 
@@ -604,28 +473,15 @@ static void ieee80211_scan_state_leave_oper_channel(struct ieee80211_local *loca
 static void ieee80211_scan_state_enter_oper_channel(struct ieee80211_local *local,
 						    unsigned long *next_delay)
 {
-	struct ieee80211_sub_if_data *sdata = local->scan_sdata;
-
 	/* switch back to the operating channel */
 	local->scan_channel = NULL;
 	ieee80211_hw_config(local, IEEE80211_CONF_CHANGE_CHANNEL);
 
 	/*
-	 * notify the AP about us being back and restart all STA interfaces
+	 * Only re-enable station mode interface now; beaconing will be
+	 * re-enabled once the full scan has been completed.
 	 */
-	mutex_lock(&local->iflist_mtx);
-	list_for_each_entry(sdata, &local->interfaces, list) {
-		if (!ieee80211_sdata_running(sdata))
-			continue;
-
-		/* Tell AP we're back */
-		if (sdata->vif.type == NL80211_IFTYPE_STATION) {
-			if (sdata->u.mgd.associated)
-				ieee80211_scan_ps_disable(sdata);
-			netif_wake_queue(sdata->dev);
-		}
-	}
-	mutex_unlock(&local->iflist_mtx);
+	ieee80211_offchannel_return(local, false);
 
 	__clear_bit(SCAN_OFF_CHANNEL, &local->scanning);
 
-- 
cgit v1.2.2


From 9588bbd5529461a3dacd435bf239c84c3508f569 Mon Sep 17 00:00:00 2001
From: Jouni Malinen <jouni.malinen@atheros.com>
Date: Wed, 23 Dec 2009 13:15:41 +0100
Subject: cfg80211: add remain-on-channel command

Add new commands for requesting the driver to remain awake
on a specified channel for the specified amount of time
(and another command to cancel such an operation). This
can be used to implement userspace-controlled off-channel
operations, like Public Action frame exchange on another
channel than the operation channel.

The off-channel operation should behave similarly to scan,
i.e. the local station (if associated) moves into power
save mode to request the AP to buffer frames for it and
then moves to the other channel to allow the off-channel
operation to be completed. The duration parameter can be
used to request enough time to receive a response from
the target station.

Signed-off-by: Jouni Malinen <jouni.malinen@atheros.com>
Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/wireless/chan.c    |  41 ++++++----
 net/wireless/core.h    |   3 +
 net/wireless/mlme.c    |  27 ++++++
 net/wireless/nl80211.c | 218 ++++++++++++++++++++++++++++++++++++++++++++++++-
 net/wireless/nl80211.h |  11 +++
 5 files changed, 285 insertions(+), 15 deletions(-)

(limited to 'net')

diff --git a/net/wireless/chan.c b/net/wireless/chan.c
index a46ac6c9b365..bf1737fc9a7e 100644
--- a/net/wireless/chan.c
+++ b/net/wireless/chan.c
@@ -41,44 +41,57 @@ rdev_fixed_channel(struct cfg80211_registered_device *rdev,
 	return result;
 }
 
-int rdev_set_freq(struct cfg80211_registered_device *rdev,
-		  struct wireless_dev *for_wdev,
+struct ieee80211_channel *
+rdev_freq_to_chan(struct cfg80211_registered_device *rdev,
 		  int freq, enum nl80211_channel_type channel_type)
 {
 	struct ieee80211_channel *chan;
 	struct ieee80211_sta_ht_cap *ht_cap;
-	int result;
-
-	if (rdev_fixed_channel(rdev, for_wdev))
-		return -EBUSY;
-
-	if (!rdev->ops->set_channel)
-		return -EOPNOTSUPP;
 
 	chan = ieee80211_get_channel(&rdev->wiphy, freq);
 
 	/* Primary channel not allowed */
 	if (!chan || chan->flags & IEEE80211_CHAN_DISABLED)
-		return -EINVAL;
+		return NULL;
 
 	if (channel_type == NL80211_CHAN_HT40MINUS &&
 	    chan->flags & IEEE80211_CHAN_NO_HT40MINUS)
-		return -EINVAL;
+		return NULL;
 	else if (channel_type == NL80211_CHAN_HT40PLUS &&
 		 chan->flags & IEEE80211_CHAN_NO_HT40PLUS)
-		return -EINVAL;
+		return NULL;
 
 	ht_cap = &rdev->wiphy.bands[chan->band]->ht_cap;
 
 	if (channel_type != NL80211_CHAN_NO_HT) {
 		if (!ht_cap->ht_supported)
-			return -EINVAL;
+			return NULL;
 
 		if (!(ht_cap->cap & IEEE80211_HT_CAP_SUP_WIDTH_20_40) ||
 		    ht_cap->cap & IEEE80211_HT_CAP_40MHZ_INTOLERANT)
-			return -EINVAL;
+			return NULL;
 	}
 
+	return chan;
+}
+
+int rdev_set_freq(struct cfg80211_registered_device *rdev,
+		  struct wireless_dev *for_wdev,
+		  int freq, enum nl80211_channel_type channel_type)
+{
+	struct ieee80211_channel *chan;
+	int result;
+
+	if (rdev_fixed_channel(rdev, for_wdev))
+		return -EBUSY;
+
+	if (!rdev->ops->set_channel)
+		return -EOPNOTSUPP;
+
+	chan = rdev_freq_to_chan(rdev, freq, channel_type);
+	if (!chan)
+		return -EINVAL;
+
 	result = rdev->ops->set_channel(&rdev->wiphy, chan, channel_type);
 	if (result)
 		return result;
diff --git a/net/wireless/core.h b/net/wireless/core.h
index 35b712127143..30ec95f05b52 100644
--- a/net/wireless/core.h
+++ b/net/wireless/core.h
@@ -374,6 +374,9 @@ void cfg80211_process_rdev_events(struct cfg80211_registered_device *rdev);
 struct ieee80211_channel *
 rdev_fixed_channel(struct cfg80211_registered_device *rdev,
 		   struct wireless_dev *for_wdev);
+struct ieee80211_channel *
+rdev_freq_to_chan(struct cfg80211_registered_device *rdev,
+		  int freq, enum nl80211_channel_type channel_type);
 int rdev_set_freq(struct cfg80211_registered_device *rdev,
 		  struct wireless_dev *for_wdev,
 		  int freq, enum nl80211_channel_type channel_type);
diff --git a/net/wireless/mlme.c b/net/wireless/mlme.c
index acaeaa784d68..11f6469b3f98 100644
--- a/net/wireless/mlme.c
+++ b/net/wireless/mlme.c
@@ -680,3 +680,30 @@ void cfg80211_mlme_down(struct cfg80211_registered_device *rdev,
 		}
 	}
 }
+
+void cfg80211_ready_on_channel(struct net_device *dev, u64 cookie,
+			       struct ieee80211_channel *chan,
+			       enum nl80211_channel_type channel_type,
+			       unsigned int duration, gfp_t gfp)
+{
+	struct wiphy *wiphy = dev->ieee80211_ptr->wiphy;
+	struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy);
+
+	nl80211_send_remain_on_channel(rdev, dev, cookie, chan, channel_type,
+				       duration, gfp);
+}
+EXPORT_SYMBOL(cfg80211_ready_on_channel);
+
+void cfg80211_remain_on_channel_expired(struct net_device *dev,
+					u64 cookie,
+					struct ieee80211_channel *chan,
+					enum nl80211_channel_type channel_type,
+					gfp_t gfp)
+{
+	struct wiphy *wiphy = dev->ieee80211_ptr->wiphy;
+	struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy);
+
+	nl80211_send_remain_on_channel_cancel(rdev, dev, cookie, chan,
+					      channel_type, gfp);
+}
+EXPORT_SYMBOL(cfg80211_remain_on_channel_expired);
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index 60f854377f90..ff857f10cb85 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -141,6 +141,8 @@ static struct nla_policy nl80211_policy[NL80211_ATTR_MAX+1] __read_mostly = {
 	[NL80211_ATTR_4ADDR] = { .type = NLA_U8 },
 	[NL80211_ATTR_PMKID] = { .type = NLA_BINARY,
 				 .len = WLAN_PMKID_LEN },
+	[NL80211_ATTR_DURATION] = { .type = NLA_U32 },
+	[NL80211_ATTR_COOKIE] = { .type = NLA_U64 },
 };
 
 /* policy for the attributes */
@@ -569,6 +571,7 @@ static int nl80211_send_wiphy(struct sk_buff *msg, u32 pid, u32 seq, int flags,
 	CMD(set_pmksa, SET_PMKSA);
 	CMD(del_pmksa, DEL_PMKSA);
 	CMD(flush_pmksa, FLUSH_PMKSA);
+	CMD(remain_on_channel, REMAIN_ON_CHANNEL);
 	if (dev->wiphy.flags & WIPHY_FLAG_NETNS_OK) {
 		i++;
 		NLA_PUT_U32(msg, i, NL80211_CMD_SET_WIPHY_NETNS);
@@ -4283,6 +4286,143 @@ static int nl80211_flush_pmksa(struct sk_buff *skb, struct genl_info *info)
 
 }
 
+static int nl80211_remain_on_channel(struct sk_buff *skb,
+				     struct genl_info *info)
+{
+	struct cfg80211_registered_device *rdev;
+	struct net_device *dev;
+	struct ieee80211_channel *chan;
+	struct sk_buff *msg;
+	void *hdr;
+	u64 cookie;
+	enum nl80211_channel_type channel_type = NL80211_CHAN_NO_HT;
+	u32 freq, duration;
+	int err;
+
+	if (!info->attrs[NL80211_ATTR_WIPHY_FREQ] ||
+	    !info->attrs[NL80211_ATTR_DURATION])
+		return -EINVAL;
+
+	duration = nla_get_u32(info->attrs[NL80211_ATTR_DURATION]);
+
+	/*
+	 * We should be on that channel for at least one jiffie,
+	 * and more than 5 seconds seems excessive.
+	 */
+	if (!duration || !msecs_to_jiffies(duration) || duration > 5000)
+		return -EINVAL;
+
+	rtnl_lock();
+
+	err = get_rdev_dev_by_info_ifindex(info, &rdev, &dev);
+	if (err)
+		goto unlock_rtnl;
+
+	if (!rdev->ops->remain_on_channel) {
+		err = -EOPNOTSUPP;
+		goto out;
+	}
+
+	if (!netif_running(dev)) {
+		err = -ENETDOWN;
+		goto out;
+	}
+
+	if (info->attrs[NL80211_ATTR_WIPHY_CHANNEL_TYPE]) {
+		channel_type = nla_get_u32(
+			info->attrs[NL80211_ATTR_WIPHY_CHANNEL_TYPE]);
+		if (channel_type != NL80211_CHAN_NO_HT &&
+		    channel_type != NL80211_CHAN_HT20 &&
+		    channel_type != NL80211_CHAN_HT40PLUS &&
+		    channel_type != NL80211_CHAN_HT40MINUS)
+			err = -EINVAL;
+			goto out;
+	}
+
+	freq = nla_get_u32(info->attrs[NL80211_ATTR_WIPHY_FREQ]);
+	chan = rdev_freq_to_chan(rdev, freq, channel_type);
+	if (chan == NULL) {
+		err = -EINVAL;
+		goto out;
+	}
+
+	msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+	if (!msg) {
+		err = -ENOMEM;
+		goto out;
+	}
+
+	hdr = nl80211hdr_put(msg, info->snd_pid, info->snd_seq, 0,
+			     NL80211_CMD_REMAIN_ON_CHANNEL);
+
+	if (IS_ERR(hdr)) {
+		err = PTR_ERR(hdr);
+		goto free_msg;
+	}
+
+	err = rdev->ops->remain_on_channel(&rdev->wiphy, dev, chan,
+					   channel_type, duration, &cookie);
+
+	if (err)
+		goto free_msg;
+
+	NLA_PUT_U64(msg, NL80211_ATTR_COOKIE, cookie);
+
+	genlmsg_end(msg, hdr);
+	err = genlmsg_reply(msg, info);
+	goto out;
+
+ nla_put_failure:
+	err = -ENOBUFS;
+ free_msg:
+	nlmsg_free(msg);
+ out:
+	cfg80211_unlock_rdev(rdev);
+	dev_put(dev);
+ unlock_rtnl:
+	rtnl_unlock();
+	return err;
+}
+
+static int nl80211_cancel_remain_on_channel(struct sk_buff *skb,
+					    struct genl_info *info)
+{
+	struct cfg80211_registered_device *rdev;
+	struct net_device *dev;
+	u64 cookie;
+	int err;
+
+	if (!info->attrs[NL80211_ATTR_COOKIE])
+		return -EINVAL;
+
+	rtnl_lock();
+
+	err = get_rdev_dev_by_info_ifindex(info, &rdev, &dev);
+	if (err)
+		goto unlock_rtnl;
+
+	if (!rdev->ops->cancel_remain_on_channel) {
+		err = -EOPNOTSUPP;
+		goto out;
+	}
+
+	if (!netif_running(dev)) {
+		err = -ENETDOWN;
+		goto out;
+	}
+
+	cookie = nla_get_u64(info->attrs[NL80211_ATTR_COOKIE]);
+
+	err = rdev->ops->cancel_remain_on_channel(&rdev->wiphy, dev, cookie);
+
+ out:
+	cfg80211_unlock_rdev(rdev);
+	dev_put(dev);
+ unlock_rtnl:
+	rtnl_unlock();
+	return err;
+}
+
 static struct genl_ops nl80211_ops[] = {
 	{
 		.cmd = NL80211_CMD_GET_WIPHY,
@@ -4545,8 +4685,20 @@ static struct genl_ops nl80211_ops[] = {
 		.policy = nl80211_policy,
 		.flags = GENL_ADMIN_PERM,
 	},
-
+	{
+		.cmd = NL80211_CMD_REMAIN_ON_CHANNEL,
+		.doit = nl80211_remain_on_channel,
+		.policy = nl80211_policy,
+		.flags = GENL_ADMIN_PERM,
+	},
+	{
+		.cmd = NL80211_CMD_CANCEL_REMAIN_ON_CHANNEL,
+		.doit = nl80211_cancel_remain_on_channel,
+		.policy = nl80211_policy,
+		.flags = GENL_ADMIN_PERM,
+	},
 };
+
 static struct genl_multicast_group nl80211_mlme_mcgrp = {
 	.name = "mlme",
 };
@@ -5134,6 +5286,70 @@ nla_put_failure:
 	nlmsg_free(msg);
 }
 
+static void nl80211_send_remain_on_chan_event(
+	int cmd, struct cfg80211_registered_device *rdev,
+	struct net_device *netdev, u64 cookie,
+	struct ieee80211_channel *chan,
+	enum nl80211_channel_type channel_type,
+	unsigned int duration, gfp_t gfp)
+{
+	struct sk_buff *msg;
+	void *hdr;
+
+	msg = nlmsg_new(NLMSG_DEFAULT_SIZE, gfp);
+	if (!msg)
+		return;
+
+	hdr = nl80211hdr_put(msg, 0, 0, 0, cmd);
+	if (!hdr) {
+		nlmsg_free(msg);
+		return;
+	}
+
+	NLA_PUT_U32(msg, NL80211_ATTR_WIPHY, rdev->wiphy_idx);
+	NLA_PUT_U32(msg, NL80211_ATTR_IFINDEX, netdev->ifindex);
+	NLA_PUT_U32(msg, NL80211_ATTR_WIPHY_FREQ, chan->center_freq);
+	NLA_PUT_U32(msg, NL80211_ATTR_WIPHY_CHANNEL_TYPE, channel_type);
+	NLA_PUT_U64(msg, NL80211_ATTR_COOKIE, cookie);
+
+	if (cmd == NL80211_CMD_REMAIN_ON_CHANNEL)
+		NLA_PUT_U32(msg, NL80211_ATTR_DURATION, duration);
+
+	if (genlmsg_end(msg, hdr) < 0) {
+		nlmsg_free(msg);
+		return;
+	}
+
+	genlmsg_multicast_netns(wiphy_net(&rdev->wiphy), msg, 0,
+				nl80211_mlme_mcgrp.id, gfp);
+	return;
+
+ nla_put_failure:
+	genlmsg_cancel(msg, hdr);
+	nlmsg_free(msg);
+}
+
+void nl80211_send_remain_on_channel(struct cfg80211_registered_device *rdev,
+				    struct net_device *netdev, u64 cookie,
+				    struct ieee80211_channel *chan,
+				    enum nl80211_channel_type channel_type,
+				    unsigned int duration, gfp_t gfp)
+{
+	nl80211_send_remain_on_chan_event(NL80211_CMD_REMAIN_ON_CHANNEL,
+					  rdev, netdev, cookie, chan,
+					  channel_type, duration, gfp);
+}
+
+void nl80211_send_remain_on_channel_cancel(
+	struct cfg80211_registered_device *rdev, struct net_device *netdev,
+	u64 cookie, struct ieee80211_channel *chan,
+	enum nl80211_channel_type channel_type, gfp_t gfp)
+{
+	nl80211_send_remain_on_chan_event(NL80211_CMD_CANCEL_REMAIN_ON_CHANNEL,
+					  rdev, netdev, cookie, chan,
+					  channel_type, 0, gfp);
+}
+
 /* initialisation/exit functions */
 
 int nl80211_init(void)
diff --git a/net/wireless/nl80211.h b/net/wireless/nl80211.h
index 44cc2a76a1b0..a5e2de419b7a 100644
--- a/net/wireless/nl80211.h
+++ b/net/wireless/nl80211.h
@@ -59,4 +59,15 @@ void nl80211_send_ibss_bssid(struct cfg80211_registered_device *rdev,
 			     struct net_device *netdev, const u8 *bssid,
 			     gfp_t gfp);
 
+void nl80211_send_remain_on_channel(struct cfg80211_registered_device *rdev,
+				    struct net_device *netdev,
+				    u64 cookie,
+				    struct ieee80211_channel *chan,
+				    enum nl80211_channel_type channel_type,
+				    unsigned int duration, gfp_t gfp);
+void nl80211_send_remain_on_channel_cancel(
+	struct cfg80211_registered_device *rdev, struct net_device *netdev,
+	u64 cookie, struct ieee80211_channel *chan,
+	enum nl80211_channel_type channel_type, gfp_t gfp);
+
 #endif /* __NET_WIRELESS_NL80211_H */
-- 
cgit v1.2.2


From b8bc4b0aa9bfba755c64b11b8f60e6cfab25dc9d Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes@sipsolutions.net>
Date: Wed, 23 Dec 2009 13:15:42 +0100
Subject: mac80211: support remain-on-channel command

This implements the new remain-on-channel cfg80211
command in mac80211, extending the work interface.

Also change the work purge code to be able to clean
up events properly (pretending they timed out.)

Signed-off-by: Jouni Malinen <jouni.malinen@atheros.com>
Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/cfg.c         |  24 ++++++++
 net/mac80211/ieee80211_i.h |  15 +++++
 net/mac80211/main.c        |   3 +
 net/mac80211/mlme.c        |   3 +
 net/mac80211/offchannel.c  |   8 ++-
 net/mac80211/work.c        | 134 +++++++++++++++++++++++++++++++++++++++++++--
 6 files changed, 181 insertions(+), 6 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c
index ea862dfc08ed..2e5e841e9b7b 100644
--- a/net/mac80211/cfg.c
+++ b/net/mac80211/cfg.c
@@ -1441,6 +1441,28 @@ static int ieee80211_set_bitrate_mask(struct wiphy *wiphy,
 	return -EINVAL;
 }
 
+static int ieee80211_remain_on_channel(struct wiphy *wiphy,
+				       struct net_device *dev,
+				       struct ieee80211_channel *chan,
+				       enum nl80211_channel_type channel_type,
+				       unsigned int duration,
+				       u64 *cookie)
+{
+	struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
+
+	return ieee80211_wk_remain_on_channel(sdata, chan, channel_type,
+					      duration, cookie);
+}
+
+static int ieee80211_cancel_remain_on_channel(struct wiphy *wiphy,
+					      struct net_device *dev,
+					      u64 cookie)
+{
+	struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
+
+	return ieee80211_wk_cancel_remain_on_channel(sdata, cookie);
+}
+
 struct cfg80211_ops mac80211_config_ops = {
 	.add_virtual_intf = ieee80211_add_iface,
 	.del_virtual_intf = ieee80211_del_iface,
@@ -1487,4 +1509,6 @@ struct cfg80211_ops mac80211_config_ops = {
 	CFG80211_TESTMODE_CMD(ieee80211_testmode_cmd)
 	.set_power_mgmt = ieee80211_set_power_mgmt,
 	.set_bitrate_mask = ieee80211_set_bitrate_mask,
+	.remain_on_channel = ieee80211_remain_on_channel,
+	.cancel_remain_on_channel = ieee80211_cancel_remain_on_channel,
 };
diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index fd912eb5309e..23547ebacf3d 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -225,9 +225,11 @@ struct mesh_preq_queue {
 };
 
 enum ieee80211_work_type {
+	IEEE80211_WORK_ABORT,
 	IEEE80211_WORK_DIRECT_PROBE,
 	IEEE80211_WORK_AUTH,
 	IEEE80211_WORK_ASSOC,
+	IEEE80211_WORK_REMAIN_ON_CHANNEL,
 };
 
 /**
@@ -283,6 +285,9 @@ struct ieee80211_work {
 			u8 supp_rates_len;
 			bool wmm_used, use_11n;
 		} assoc;
+		struct {
+			unsigned long timeout;
+		} remain;
 	};
 
 	int ie_len;
@@ -729,6 +734,10 @@ struct ieee80211_local {
 	enum nl80211_channel_type oper_channel_type;
 	struct ieee80211_channel *oper_channel, *csa_channel;
 
+	/* Temporary remain-on-channel for off-channel operations */
+	struct ieee80211_channel *tmp_channel;
+	enum nl80211_channel_type tmp_channel_type;
+
 	/* SNMP counters */
 	/* dot11CountersTable */
 	u32 dot11TransmittedFragmentCount;
@@ -1162,6 +1171,12 @@ void free_work(struct ieee80211_work *wk);
 void ieee80211_work_purge(struct ieee80211_sub_if_data *sdata);
 ieee80211_rx_result ieee80211_work_rx_mgmt(struct ieee80211_sub_if_data *sdata,
 					   struct sk_buff *skb);
+int ieee80211_wk_remain_on_channel(struct ieee80211_sub_if_data *sdata,
+				   struct ieee80211_channel *chan,
+				   enum nl80211_channel_type channel_type,
+				   unsigned int duration, u64 *cookie);
+int ieee80211_wk_cancel_remain_on_channel(
+	struct ieee80211_sub_if_data *sdata, u64 cookie);
 
 #ifdef CONFIG_MAC80211_NOINLINE
 #define debug_noinline noinline
diff --git a/net/mac80211/main.c b/net/mac80211/main.c
index 5fcd3548417e..d0a14d953f08 100644
--- a/net/mac80211/main.c
+++ b/net/mac80211/main.c
@@ -107,6 +107,9 @@ int ieee80211_hw_config(struct ieee80211_local *local, u32 changed)
 	if (scan_chan) {
 		chan = scan_chan;
 		channel_type = NL80211_CHAN_NO_HT;
+	} else if (local->tmp_channel) {
+		chan = scan_chan = local->tmp_channel;
+		channel_type = local->tmp_channel_type;
 	} else {
 		chan = local->oper_channel;
 		channel_type = local->oper_channel_type;
diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index e44f1ed0b0da..32d6e6614f9f 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -857,6 +857,9 @@ static void ieee80211_mgd_probe_ap(struct ieee80211_sub_if_data *sdata,
 	if (sdata->local->scanning)
 		return;
 
+	if (sdata->local->tmp_channel)
+		return;
+
 	mutex_lock(&ifmgd->mtx);
 
 	if (!ifmgd->associated)
diff --git a/net/mac80211/offchannel.c b/net/mac80211/offchannel.c
index 2cd880e444d1..a7bbfc40a648 100644
--- a/net/mac80211/offchannel.c
+++ b/net/mac80211/offchannel.c
@@ -106,9 +106,13 @@ void ieee80211_offchannel_stop_beaconing(struct ieee80211_local *local)
 		/*
 		 * only handle non-STA interfaces here, STA interfaces
 		 * are handled in ieee80211_offchannel_stop_station(),
-		 * e.g., from the background scan state machine
+		 * e.g., from the background scan state machine.
+		 *
+		 * In addition, do not stop monitor interface to allow it to be
+		 * used from user space controlled off-channel operations.
 		 */
-		if (sdata->vif.type != NL80211_IFTYPE_STATION)
+		if (sdata->vif.type != NL80211_IFTYPE_STATION &&
+		    sdata->vif.type != NL80211_IFTYPE_MONITOR)
 			netif_stop_queue(sdata->dev);
 	}
 	mutex_unlock(&local->iflist_mtx);
diff --git a/net/mac80211/work.c b/net/mac80211/work.c
index 0b8c31c600aa..0acea7cf714a 100644
--- a/net/mac80211/work.c
+++ b/net/mac80211/work.c
@@ -538,6 +538,44 @@ ieee80211_associate(struct ieee80211_work *wk)
 	return WORK_ACT_NONE;
 }
 
+static enum work_action __must_check
+ieee80211_remain_on_channel_timeout(struct ieee80211_work *wk)
+{
+	struct ieee80211_sub_if_data *sdata = wk->sdata;
+	struct ieee80211_local *local = sdata->local;
+
+	/*
+	 * First time we run, do nothing -- the generic code will
+	 * have switched to the right channel etc.
+	 */
+	if (wk->timeout != wk->remain.timeout) {
+		wk->timeout = wk->remain.timeout;
+		return WORK_ACT_NONE;
+	}
+
+	/*
+	 * We are done serving the remain-on-channel command; kill the work
+	 * item to allow idle state to be entered again. In addition, clear the
+	 * temporary channel information to allow operational channel to be
+	 * used.
+	 */
+	list_del(&wk->list);
+	free_work(wk);
+
+	if (local->tmp_channel) {
+		cfg80211_remain_on_channel_expired(sdata->dev, (u64)wk,
+						   local->tmp_channel,
+						   local->tmp_channel_type,
+						   GFP_KERNEL);
+
+		local->tmp_channel = NULL;
+		ieee80211_hw_config(local, IEEE80211_CONF_CHANGE_CHANNEL);
+		ieee80211_offchannel_return(local, true);
+	}
+
+	return WORK_ACT_NONE;
+}
+
 static void ieee80211_auth_challenge(struct ieee80211_work *wk,
 				     struct ieee80211_mgmt *mgmt,
 				     size_t len)
@@ -825,6 +863,8 @@ static void ieee80211_work_work(struct work_struct *work)
 			/* nothing */
 			rma = WORK_ACT_NONE;
 			break;
+		case IEEE80211_WORK_ABORT:
+			rma = WORK_ACT_TIMEOUT;
 		case IEEE80211_WORK_DIRECT_PROBE:
 			rma = ieee80211_direct_probe(wk);
 			break;
@@ -834,6 +874,9 @@ static void ieee80211_work_work(struct work_struct *work)
 		case IEEE80211_WORK_ASSOC:
 			rma = ieee80211_associate(wk);
 			break;
+		case IEEE80211_WORK_REMAIN_ON_CHANNEL:
+			rma = ieee80211_remain_on_channel_timeout(wk);
+			break;
 		}
 
 		switch (rma) {
@@ -900,14 +943,25 @@ void ieee80211_work_init(struct ieee80211_local *local)
 void ieee80211_work_purge(struct ieee80211_sub_if_data *sdata)
 {
 	struct ieee80211_local *local = sdata->local;
-	struct ieee80211_work *wk, *tmp;
+	struct ieee80211_work *wk;
 
 	mutex_lock(&local->work_mtx);
-	list_for_each_entry_safe(wk, tmp, &local->work_list, list) {
+	list_for_each_entry(wk, &local->work_list, list) {
 		if (wk->sdata != sdata)
 			continue;
-		list_del(&wk->list);
-		free_work(wk);
+		wk->type = IEEE80211_WORK_ABORT;
+	}
+	mutex_unlock(&local->work_mtx);
+
+	/* run cleanups etc. */
+	ieee80211_work_work(&local->work_work);
+
+	mutex_lock(&local->work_mtx);
+	list_for_each_entry(wk, &local->work_list, list) {
+		if (wk->sdata != sdata)
+			continue;
+		WARN_ON(1);
+		break;
 	}
 	mutex_unlock(&local->work_mtx);
 }
@@ -949,3 +1003,75 @@ ieee80211_rx_result ieee80211_work_rx_mgmt(struct ieee80211_sub_if_data *sdata,
 
 	return RX_CONTINUE;
 }
+
+int ieee80211_wk_remain_on_channel(struct ieee80211_sub_if_data *sdata,
+				   struct ieee80211_channel *chan,
+				   enum nl80211_channel_type channel_type,
+				   unsigned int duration, u64 *cookie)
+{
+	struct ieee80211_local *local = sdata->local;
+	struct ieee80211_work *wk;
+
+	wk = kzalloc(sizeof(*wk), GFP_KERNEL);
+	if (!wk)
+		return -ENOMEM;
+
+	wk->type = IEEE80211_WORK_REMAIN_ON_CHANNEL;
+	wk->chan = chan;
+	wk->sdata = sdata;
+
+	wk->remain.timeout = jiffies + msecs_to_jiffies(duration);
+
+	*cookie = (u64)wk;
+
+	ieee80211_add_work(wk);
+
+	/*
+	 * TODO: could optimize this by leaving the station vifs in awake mode
+	 * if they happen to be on the same channel as the requested channel
+	 */
+	ieee80211_offchannel_stop_beaconing(local);
+	ieee80211_offchannel_stop_station(local);
+
+	sdata->local->tmp_channel = chan;
+	sdata->local->tmp_channel_type = channel_type;
+	ieee80211_hw_config(sdata->local, IEEE80211_CONF_CHANGE_CHANNEL);
+
+	cfg80211_ready_on_channel(sdata->dev, (u64)wk, chan, channel_type,
+				  duration, GFP_KERNEL);
+
+	return 0;
+}
+
+int ieee80211_wk_cancel_remain_on_channel(struct ieee80211_sub_if_data *sdata,
+					  u64 cookie)
+{
+	struct ieee80211_local *local = sdata->local;
+	struct ieee80211_work *wk, *tmp;
+	bool found = false;
+
+	mutex_lock(&local->work_mtx);
+	list_for_each_entry_safe(wk, tmp, &local->work_list, list) {
+		if ((u64)wk == cookie) {
+			found = true;
+			list_del(&wk->list);
+			free_work(wk);
+			break;
+		}
+	}
+	mutex_unlock(&local->work_mtx);
+
+	if (!found)
+		return -ENOENT;
+
+	if (sdata->local->tmp_channel) {
+		sdata->local->tmp_channel = NULL;
+		ieee80211_hw_config(sdata->local,
+				    IEEE80211_CONF_CHANGE_CHANNEL);
+		ieee80211_offchannel_return(sdata->local, true);
+	}
+
+	ieee80211_recalc_idle(local);
+
+	return 0;
+}
-- 
cgit v1.2.2


From e4da8c37af626001ff704fb29ea14eb58f5f7208 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes@sipsolutions.net>
Date: Wed, 23 Dec 2009 13:15:43 +0100
Subject: mac80211: make off-channel work generic

This changes mac80211 to allow being off-channel for
any type of work, not just the 'remain-on-channel'
work. This also helps fast transition to a BSS on a
different channel.

Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/ieee80211_i.h |   7 ++-
 net/mac80211/mlme.c        |  14 +----
 net/mac80211/work.c        | 135 +++++++++++++++++++++++++++------------------
 3 files changed, 88 insertions(+), 68 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index 23547ebacf3d..a27921ee6e63 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -255,13 +255,15 @@ struct ieee80211_work {
 				      struct sk_buff *skb);
 
 	struct ieee80211_channel *chan;
-	/* XXX: chan type? -- right now not really needed */
+	enum nl80211_channel_type chan_type;
 
 	unsigned long timeout;
 	enum ieee80211_work_type type;
 
 	u8 filter_ta[ETH_ALEN];
 
+	bool started;
+
 	union {
 		struct {
 			int tries;
@@ -286,7 +288,8 @@ struct ieee80211_work {
 			bool wmm_used, use_11n;
 		} assoc;
 		struct {
-			unsigned long timeout;
+			u32 duration;
+			bool started;
 		} remain;
 	};
 
diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index 32d6e6614f9f..72920ee07885 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -1105,6 +1105,8 @@ static bool ieee80211_assoc_success(struct ieee80211_work *wk,
 	else
 		ieee80211_set_wmm_default(sdata);
 
+	local->oper_channel = wk->chan;
+
 	if (elems.ht_info_elem && elems.wmm_param &&
 	    (sdata->local->hw.queues >= 4) &&
 	    !(ifmgd->flags & IEEE80211_STA_DISABLE_11N))
@@ -1797,15 +1799,6 @@ int ieee80211_mgd_auth(struct ieee80211_sub_if_data *sdata,
 	wk->sdata = sdata;
 	wk->done = ieee80211_probe_auth_done;
 
-	/*
-	 * XXX: if still associated need to tell AP that we're going
-	 *	to sleep and then change channel etc.
-	 *	For now switch channel here, later will be handled
-	 *	by submitting this as an off-channel work item.
-	 */
-	sdata->local->oper_channel = req->bss->channel;
-	ieee80211_hw_config(sdata->local, 0);
-
 	ieee80211_add_work(wk);
 	return 0;
 }
@@ -1929,9 +1922,6 @@ int ieee80211_mgd_assoc(struct ieee80211_sub_if_data *sdata,
 	else
 		ifmgd->flags &= ~IEEE80211_STA_CONTROL_PORT;
 
-	sdata->local->oper_channel = req->bss->channel;
-	ieee80211_hw_config(sdata->local, 0);
-
 	ieee80211_add_work(wk);
 	return 0;
 }
diff --git a/net/mac80211/work.c b/net/mac80211/work.c
index 0acea7cf714a..0bffb6a42534 100644
--- a/net/mac80211/work.c
+++ b/net/mac80211/work.c
@@ -541,39 +541,22 @@ ieee80211_associate(struct ieee80211_work *wk)
 static enum work_action __must_check
 ieee80211_remain_on_channel_timeout(struct ieee80211_work *wk)
 {
-	struct ieee80211_sub_if_data *sdata = wk->sdata;
-	struct ieee80211_local *local = sdata->local;
-
 	/*
 	 * First time we run, do nothing -- the generic code will
 	 * have switched to the right channel etc.
 	 */
-	if (wk->timeout != wk->remain.timeout) {
-		wk->timeout = wk->remain.timeout;
-		return WORK_ACT_NONE;
-	}
+	if (!wk->remain.started) {
+		wk->remain.started = true;
+		wk->timeout = jiffies + msecs_to_jiffies(wk->remain.duration);
 
-	/*
-	 * We are done serving the remain-on-channel command; kill the work
-	 * item to allow idle state to be entered again. In addition, clear the
-	 * temporary channel information to allow operational channel to be
-	 * used.
-	 */
-	list_del(&wk->list);
-	free_work(wk);
+		cfg80211_ready_on_channel(wk->sdata->dev, (u64)wk, wk->chan,
+					  wk->chan_type, wk->remain.duration,
+					  GFP_KERNEL);
 
-	if (local->tmp_channel) {
-		cfg80211_remain_on_channel_expired(sdata->dev, (u64)wk,
-						   local->tmp_channel,
-						   local->tmp_channel_type,
-						   GFP_KERNEL);
-
-		local->tmp_channel = NULL;
-		ieee80211_hw_config(local, IEEE80211_CONF_CHANGE_CHANNEL);
-		ieee80211_offchannel_return(local, true);
+		return WORK_ACT_NONE;
 	}
 
-	return WORK_ACT_NONE;
+	return WORK_ACT_TIMEOUT;
 }
 
 static void ieee80211_auth_challenge(struct ieee80211_work *wk,
@@ -799,7 +782,7 @@ static void ieee80211_work_rx_queued_mgmt(struct ieee80211_local *local,
 		break;
 	case WORK_DONE_REQUEUE:
 		synchronize_rcu();
-		wk->timeout = jiffies; /* run again directly */
+		wk->started = false; /* restart */
 		mutex_lock(&local->work_mtx);
 		list_add_tail(&wk->list, &local->work_list);
 		mutex_unlock(&local->work_mtx);
@@ -827,6 +810,7 @@ static void ieee80211_work_work(struct work_struct *work)
 	struct ieee80211_work *wk, *tmp;
 	LIST_HEAD(free_work);
 	enum work_action rma;
+	bool remain_off_channel = false;
 
 	if (local->scanning)
 		return;
@@ -847,6 +831,34 @@ static void ieee80211_work_work(struct work_struct *work)
 	mutex_lock(&local->work_mtx);
 
 	list_for_each_entry_safe(wk, tmp, &local->work_list, list) {
+		/* mark work as started if it's on the current off-channel */
+		if (!wk->started && local->tmp_channel &&
+		    wk->chan == local->tmp_channel &&
+		    wk->chan_type == local->tmp_channel_type) {
+			wk->started = true;
+		}
+
+		if (!wk->started && !local->tmp_channel) {
+			/*
+			 * TODO: could optimize this by leaving the
+			 *	 station vifs in awake mode if they
+			 *	 happen to be on the same channel as
+			 *	 the requested channel
+			 */
+			ieee80211_offchannel_stop_beaconing(local);
+			ieee80211_offchannel_stop_station(local);
+
+			local->tmp_channel = wk->chan;
+			local->tmp_channel_type = wk->chan_type;
+			ieee80211_hw_config(local, 0);
+			wk->started = true;
+			wk->timeout = jiffies;
+		}
+
+		/* don't try to work with items that aren't started */
+		if (!wk->started)
+			continue;
+
 		if (time_is_after_jiffies(wk->timeout)) {
 			/*
 			 * This work item isn't supposed to be worked on
@@ -881,7 +893,8 @@ static void ieee80211_work_work(struct work_struct *work)
 
 		switch (rma) {
 		case WORK_ACT_NONE:
-			/* no action required */
+			/* might have changed the timeout */
+			run_again(local, wk->timeout);
 			break;
 		case WORK_ACT_TIMEOUT:
 			list_del_rcu(&wk->list);
@@ -893,6 +906,24 @@ static void ieee80211_work_work(struct work_struct *work)
 		}
 	}
 
+	list_for_each_entry(wk, &local->work_list, list) {
+		if (!wk->started)
+			continue;
+		if (wk->chan != local->tmp_channel)
+			continue;
+		if (wk->chan_type != local->tmp_channel_type)
+			continue;
+		remain_off_channel = true;
+	}
+
+	if (!remain_off_channel && local->tmp_channel) {
+		local->tmp_channel = NULL;
+		ieee80211_hw_config(local, 0);
+		ieee80211_offchannel_return(local, true);
+		/* give connection some time to breathe */
+		run_again(local, jiffies + HZ/2);
+	}
+
 	if (list_empty(&local->work_list) && local->scan_req)
 		ieee80211_queue_delayed_work(&local->hw,
 					     &local->scan_work,
@@ -900,6 +931,8 @@ static void ieee80211_work_work(struct work_struct *work)
 
 	mutex_unlock(&local->work_mtx);
 
+	ieee80211_recalc_idle(local);
+
 	list_for_each_entry_safe(wk, tmp, &free_work, list) {
 		wk->done(wk, NULL);
 		list_del(&wk->list);
@@ -920,7 +953,7 @@ void ieee80211_add_work(struct ieee80211_work *wk)
 	if (WARN_ON(!wk->done))
 		return;
 
-	wk->timeout = jiffies;
+	wk->started = false;
 
 	local = wk->sdata->local;
 	mutex_lock(&local->work_mtx);
@@ -950,6 +983,8 @@ void ieee80211_work_purge(struct ieee80211_sub_if_data *sdata)
 		if (wk->sdata != sdata)
 			continue;
 		wk->type = IEEE80211_WORK_ABORT;
+		wk->started = true;
+		wk->timeout = jiffies;
 	}
 	mutex_unlock(&local->work_mtx);
 
@@ -1004,12 +1039,24 @@ ieee80211_rx_result ieee80211_work_rx_mgmt(struct ieee80211_sub_if_data *sdata,
 	return RX_CONTINUE;
 }
 
+static enum work_done_result ieee80211_remain_done(struct ieee80211_work *wk,
+						   struct sk_buff *skb)
+{
+	/*
+	 * We are done serving the remain-on-channel command.
+	 */
+	cfg80211_remain_on_channel_expired(wk->sdata->dev, (u64)wk,
+					   wk->chan, wk->chan_type,
+					   GFP_KERNEL);
+
+	return WORK_DONE_DESTROY;
+}
+
 int ieee80211_wk_remain_on_channel(struct ieee80211_sub_if_data *sdata,
 				   struct ieee80211_channel *chan,
 				   enum nl80211_channel_type channel_type,
 				   unsigned int duration, u64 *cookie)
 {
-	struct ieee80211_local *local = sdata->local;
 	struct ieee80211_work *wk;
 
 	wk = kzalloc(sizeof(*wk), GFP_KERNEL);
@@ -1018,28 +1065,16 @@ int ieee80211_wk_remain_on_channel(struct ieee80211_sub_if_data *sdata,
 
 	wk->type = IEEE80211_WORK_REMAIN_ON_CHANNEL;
 	wk->chan = chan;
+	wk->chan_type = channel_type;
 	wk->sdata = sdata;
+	wk->done = ieee80211_remain_done;
 
-	wk->remain.timeout = jiffies + msecs_to_jiffies(duration);
+	wk->remain.duration = duration;
 
 	*cookie = (u64)wk;
 
 	ieee80211_add_work(wk);
 
-	/*
-	 * TODO: could optimize this by leaving the station vifs in awake mode
-	 * if they happen to be on the same channel as the requested channel
-	 */
-	ieee80211_offchannel_stop_beaconing(local);
-	ieee80211_offchannel_stop_station(local);
-
-	sdata->local->tmp_channel = chan;
-	sdata->local->tmp_channel_type = channel_type;
-	ieee80211_hw_config(sdata->local, IEEE80211_CONF_CHANGE_CHANNEL);
-
-	cfg80211_ready_on_channel(sdata->dev, (u64)wk, chan, channel_type,
-				  duration, GFP_KERNEL);
-
 	return 0;
 }
 
@@ -1053,9 +1088,8 @@ int ieee80211_wk_cancel_remain_on_channel(struct ieee80211_sub_if_data *sdata,
 	mutex_lock(&local->work_mtx);
 	list_for_each_entry_safe(wk, tmp, &local->work_list, list) {
 		if ((u64)wk == cookie) {
+			wk->timeout = jiffies;
 			found = true;
-			list_del(&wk->list);
-			free_work(wk);
 			break;
 		}
 	}
@@ -1064,14 +1098,7 @@ int ieee80211_wk_cancel_remain_on_channel(struct ieee80211_sub_if_data *sdata,
 	if (!found)
 		return -ENOENT;
 
-	if (sdata->local->tmp_channel) {
-		sdata->local->tmp_channel = NULL;
-		ieee80211_hw_config(sdata->local,
-				    IEEE80211_CONF_CHANGE_CHANNEL);
-		ieee80211_offchannel_return(sdata->local, true);
-	}
-
-	ieee80211_recalc_idle(local);
+	ieee80211_queue_work(&local->hw, &local->work_work);
 
 	return 0;
 }
-- 
cgit v1.2.2


From 98b6218388e345064c3f2d3c161383a18274c638 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes@sipsolutions.net>
Date: Wed, 23 Dec 2009 13:15:44 +0100
Subject: mac80211/cfg80211: add station events

When, for instance, a new IBSS peer is found, userspace
wants to be notified. Add events for all new stations
that mac80211 learns about.

Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/sta_info.c |  5 +++++
 net/wireless/mlme.c     | 10 ++++++++++
 net/wireless/nl80211.c  | 21 ++++++++++++++++++++-
 net/wireless/nl80211.h  |  4 ++++
 4 files changed, 39 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c
index f039e761aec1..47da552ce8a6 100644
--- a/net/mac80211/sta_info.c
+++ b/net/mac80211/sta_info.c
@@ -359,6 +359,7 @@ int sta_info_insert(struct sta_info *sta)
 {
 	struct ieee80211_local *local = sta->local;
 	struct ieee80211_sub_if_data *sdata = sta->sdata;
+	struct station_info sinfo;
 	unsigned long flags;
 	int err = 0;
 
@@ -408,6 +409,10 @@ int sta_info_insert(struct sta_info *sta)
 
 	spin_unlock_irqrestore(&local->sta_lock, flags);
 
+	sinfo.filled = 0;
+	sinfo.generation = local->sta_generation;
+	cfg80211_new_sta(sdata->dev, sta->sta.addr, &sinfo, GFP_ATOMIC);
+
 #ifdef CONFIG_MAC80211_DEBUGFS
 	/*
 	 * Debugfs entry adding might sleep, so schedule process
diff --git a/net/wireless/mlme.c b/net/wireless/mlme.c
index 11f6469b3f98..3dba19e17271 100644
--- a/net/wireless/mlme.c
+++ b/net/wireless/mlme.c
@@ -707,3 +707,13 @@ void cfg80211_remain_on_channel_expired(struct net_device *dev,
 					      channel_type, gfp);
 }
 EXPORT_SYMBOL(cfg80211_remain_on_channel_expired);
+
+void cfg80211_new_sta(struct net_device *dev, const u8 *mac_addr,
+		      struct station_info *sinfo, gfp_t gfp)
+{
+	struct wiphy *wiphy = dev->ieee80211_ptr->wiphy;
+	struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy);
+
+	nl80211_send_sta_event(rdev, dev, mac_addr, sinfo, gfp);
+}
+EXPORT_SYMBOL(cfg80211_new_sta);
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index ff857f10cb85..e3bee3cecdfa 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -1642,7 +1642,7 @@ static int parse_station_flags(struct genl_info *info,
 
 static int nl80211_send_station(struct sk_buff *msg, u32 pid, u32 seq,
 				int flags, struct net_device *dev,
-				u8 *mac_addr, struct station_info *sinfo)
+				const u8 *mac_addr, struct station_info *sinfo)
 {
 	void *hdr;
 	struct nlattr *sinfoattr, *txrate;
@@ -5350,6 +5350,25 @@ void nl80211_send_remain_on_channel_cancel(
 					  channel_type, 0, gfp);
 }
 
+void nl80211_send_sta_event(struct cfg80211_registered_device *rdev,
+			    struct net_device *dev, const u8 *mac_addr,
+			    struct station_info *sinfo, gfp_t gfp)
+{
+	struct sk_buff *msg;
+
+	msg = nlmsg_new(NLMSG_GOODSIZE, gfp);
+	if (!msg)
+		return;
+
+	if (nl80211_send_station(msg, 0, 0, 0, dev, mac_addr, sinfo) < 0) {
+		nlmsg_free(msg);
+		return;
+	}
+
+	genlmsg_multicast_netns(wiphy_net(&rdev->wiphy), msg, 0,
+				nl80211_mlme_mcgrp.id, gfp);
+}
+
 /* initialisation/exit functions */
 
 int nl80211_init(void)
diff --git a/net/wireless/nl80211.h b/net/wireless/nl80211.h
index a5e2de419b7a..14855b8fb430 100644
--- a/net/wireless/nl80211.h
+++ b/net/wireless/nl80211.h
@@ -70,4 +70,8 @@ void nl80211_send_remain_on_channel_cancel(
 	u64 cookie, struct ieee80211_channel *chan,
 	enum nl80211_channel_type channel_type, gfp_t gfp);
 
+void nl80211_send_sta_event(struct cfg80211_registered_device *rdev,
+			    struct net_device *dev, const u8 *mac_addr,
+			    struct station_info *sinfo, gfp_t gfp);
+
 #endif /* __NET_WIRELESS_NL80211_H */
-- 
cgit v1.2.2


From 1ed32e4fc8cfc9656cc1101e7f9617d485fcbe7b Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes@sipsolutions.net>
Date: Wed, 23 Dec 2009 13:15:45 +0100
Subject: mac80211: remove struct ieee80211_if_init_conf

All its members (vif, mac_addr, type) are now available
in the vif struct directly, so we can pass that instead
of the conf struct. I generated this patch (except the
mac80211 and header file changes) with this semantic
patch:

@@
identifier conf, fn, hw;
type tp;
@@
tp fn(struct ieee80211_hw *hw,
-struct ieee80211_if_init_conf *conf)
+struct ieee80211_vif *vif)
{
<...
(
-conf->type
+vif->type
|
-conf->mac_addr
+vif->addr
|
-conf->vif
+vif
)
...>
}

Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/driver-ops.h | 12 ++++++------
 net/mac80211/iface.c      | 14 +++-----------
 net/mac80211/pm.c         |  6 +-----
 net/mac80211/util.c       |  9 ++-------
 4 files changed, 12 insertions(+), 29 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/driver-ops.h b/net/mac80211/driver-ops.h
index cbe133bcdf34..bc7c8f55487a 100644
--- a/net/mac80211/driver-ops.h
+++ b/net/mac80211/driver-ops.h
@@ -36,18 +36,18 @@ static inline void drv_stop(struct ieee80211_local *local)
 }
 
 static inline int drv_add_interface(struct ieee80211_local *local,
-				    struct ieee80211_if_init_conf *conf)
+				    struct ieee80211_vif *vif)
 {
-	int ret = local->ops->add_interface(&local->hw, conf);
-	trace_drv_add_interface(local, vif_to_sdata(conf->vif), ret);
+	int ret = local->ops->add_interface(&local->hw, vif);
+	trace_drv_add_interface(local, vif_to_sdata(vif), ret);
 	return ret;
 }
 
 static inline void drv_remove_interface(struct ieee80211_local *local,
-					struct ieee80211_if_init_conf *conf)
+					struct ieee80211_vif *vif)
 {
-	local->ops->remove_interface(&local->hw, conf);
-	trace_drv_remove_interface(local, vif_to_sdata(conf->vif));
+	local->ops->remove_interface(&local->hw, vif);
+	trace_drv_remove_interface(local, vif_to_sdata(vif));
 }
 
 static inline int drv_config(struct ieee80211_local *local, u32 changed)
diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c
index 7d410f15281a..00a1f4ccdaf1 100644
--- a/net/mac80211/iface.c
+++ b/net/mac80211/iface.c
@@ -96,7 +96,6 @@ static int ieee80211_open(struct net_device *dev)
 	struct ieee80211_sub_if_data *nsdata;
 	struct ieee80211_local *local = sdata->local;
 	struct sta_info *sta;
-	struct ieee80211_if_init_conf conf;
 	u32 changed = 0;
 	int res;
 	u32 hw_reconf_flags = 0;
@@ -248,10 +247,7 @@ static int ieee80211_open(struct net_device *dev)
 		ieee80211_configure_filter(local);
 		break;
 	default:
-		conf.vif = &sdata->vif;
-		conf.type = sdata->vif.type;
-		conf.mac_addr = sdata->vif.addr;
-		res = drv_add_interface(local, &conf);
+		res = drv_add_interface(local, &sdata->vif);
 		if (res)
 			goto err_stop;
 
@@ -334,7 +330,7 @@ static int ieee80211_open(struct net_device *dev)
 
 	return 0;
  err_del_interface:
-	drv_remove_interface(local, &conf);
+	drv_remove_interface(local, &sdata->vif);
  err_stop:
 	if (!local->open_count)
 		drv_stop(local);
@@ -349,7 +345,6 @@ static int ieee80211_stop(struct net_device *dev)
 {
 	struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
 	struct ieee80211_local *local = sdata->local;
-	struct ieee80211_if_init_conf conf;
 	struct sta_info *sta;
 	unsigned long flags;
 	struct sk_buff *skb, *tmp;
@@ -533,12 +528,9 @@ static int ieee80211_stop(struct net_device *dev)
 				BSS_CHANGED_BEACON_ENABLED);
 		}
 
-		conf.vif = &sdata->vif;
-		conf.type = sdata->vif.type;
-		conf.mac_addr = sdata->vif.addr;
 		/* disable all keys for as long as this netdev is down */
 		ieee80211_disable_keys(sdata);
-		drv_remove_interface(local, &conf);
+		drv_remove_interface(local, &sdata->vif);
 	}
 
 	sdata->bss = NULL;
diff --git a/net/mac80211/pm.c b/net/mac80211/pm.c
index 913dc7e3b29e..47f818959ad7 100644
--- a/net/mac80211/pm.c
+++ b/net/mac80211/pm.c
@@ -10,7 +10,6 @@ int __ieee80211_suspend(struct ieee80211_hw *hw)
 {
 	struct ieee80211_local *local = hw_to_local(hw);
 	struct ieee80211_sub_if_data *sdata;
-	struct ieee80211_if_init_conf conf;
 	struct sta_info *sta;
 	unsigned long flags;
 
@@ -100,10 +99,7 @@ int __ieee80211_suspend(struct ieee80211_hw *hw)
 		ieee80211_bss_info_change_notify(sdata,
 			BSS_CHANGED_BEACON_ENABLED);
 
-		conf.vif = &sdata->vif;
-		conf.type = sdata->vif.type;
-		conf.mac_addr = sdata->vif.addr;
-		drv_remove_interface(local, &conf);
+		drv_remove_interface(local, &sdata->vif);
 	}
 
 	/* stop hardware - this must stop RX */
diff --git a/net/mac80211/util.c b/net/mac80211/util.c
index 1fdb80ff9241..4b930308b1fb 100644
--- a/net/mac80211/util.c
+++ b/net/mac80211/util.c
@@ -1075,7 +1075,6 @@ int ieee80211_reconfig(struct ieee80211_local *local)
 {
 	struct ieee80211_hw *hw = &local->hw;
 	struct ieee80211_sub_if_data *sdata;
-	struct ieee80211_if_init_conf conf;
 	struct sta_info *sta;
 	unsigned long flags;
 	int res;
@@ -1094,12 +1093,8 @@ int ieee80211_reconfig(struct ieee80211_local *local)
 	list_for_each_entry(sdata, &local->interfaces, list) {
 		if (sdata->vif.type != NL80211_IFTYPE_AP_VLAN &&
 		    sdata->vif.type != NL80211_IFTYPE_MONITOR &&
-		    ieee80211_sdata_running(sdata)) {
-			conf.vif = &sdata->vif;
-			conf.type = sdata->vif.type;
-			conf.mac_addr = sdata->vif.addr;
-			res = drv_add_interface(local, &conf);
-		}
+		    ieee80211_sdata_running(sdata))
+			res = drv_add_interface(local, &sdata->vif);
 	}
 
 	/* add STAs back */
-- 
cgit v1.2.2


From 095d5ef608b58ece49f4131925700d27314ecdd8 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes@sipsolutions.net>
Date: Wed, 23 Dec 2009 13:15:46 +0100
Subject: mac80211: remove requeue from work

There's no need to be requeueing the work struct
since we check for the scan after removing items
due to possible timeouts.

Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/work.c | 18 ------------------
 1 file changed, 18 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/work.c b/net/mac80211/work.c
index 0bffb6a42534..ea89ed70734d 100644
--- a/net/mac80211/work.c
+++ b/net/mac80211/work.c
@@ -435,12 +435,6 @@ ieee80211_direct_probe(struct ieee80211_work *wk)
 		 */
 		ieee80211_remove_auth_bss(local, wk);
 
-		/*
-		 * We might have a pending scan which had no chance to run yet
-		 * due to work needing to be done. Hence, queue the STAs work
-		 * again for that.
-		 */
-		ieee80211_queue_work(&local->hw, &local->work_work);
 		return WORK_ACT_TIMEOUT;
 	}
 
@@ -478,12 +472,6 @@ ieee80211_authenticate(struct ieee80211_work *wk)
 		 */
 		ieee80211_remove_auth_bss(local, wk);
 
-		/*
-		 * We might have a pending scan which had no chance to run yet
-		 * due to work needing to be done. Hence, queue the STAs work
-		 * again for that.
-		 */
-		ieee80211_queue_work(&local->hw, &local->work_work);
 		return WORK_ACT_TIMEOUT;
 	}
 
@@ -519,12 +507,6 @@ ieee80211_associate(struct ieee80211_work *wk)
 		if (wk->assoc.bss)
 			cfg80211_unlink_bss(local->hw.wiphy, wk->assoc.bss);
 
-		/*
-		 * We might have a pending scan which had no chance to run yet
-		 * due to work needing to be done. Hence, queue the STAs work
-		 * again for that.
-		 */
-		ieee80211_queue_work(&local->hw, &local->work_work);
 		return WORK_ACT_TIMEOUT;
 	}
 
-- 
cgit v1.2.2


From e1781ed33a8809c58ad6c3b6d432d656446efa43 Mon Sep 17 00:00:00 2001
From: Kalle Valo <kalle.valo@iki.fi>
Date: Wed, 23 Dec 2009 13:15:47 +0100
Subject: mac80211: annotate sleeping driver ops

To make it easier to notice cases of calling sleeping ops in atomic context,
annotate driver-ops.h with appropiate might_sleep() calls. At the same time,
also document in mac80211.h the op functions with missing contexts.

mac80211 doesn't seem to use get_tx_stats anywhere currently. Just to be on
the safe side, I documented it to be atomic, but hopefully the op can be
removed in the future.

Compile-tested only.

Signed-off-by: Kalle Valo <kalle.valo@iki.fi>
Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/driver-ops.h | 60 +++++++++++++++++++++++++++++++++++++++++++----
 1 file changed, 56 insertions(+), 4 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/driver-ops.h b/net/mac80211/driver-ops.h
index bc7c8f55487a..8757ea73d544 100644
--- a/net/mac80211/driver-ops.h
+++ b/net/mac80211/driver-ops.h
@@ -14,6 +14,8 @@ static inline int drv_start(struct ieee80211_local *local)
 {
 	int ret;
 
+	might_sleep();
+
 	local->started = true;
 	smp_mb();
 	ret = local->ops->start(&local->hw);
@@ -23,6 +25,8 @@ static inline int drv_start(struct ieee80211_local *local)
 
 static inline void drv_stop(struct ieee80211_local *local)
 {
+	might_sleep();
+
 	local->ops->stop(&local->hw);
 	trace_drv_stop(local);
 
@@ -38,7 +42,11 @@ static inline void drv_stop(struct ieee80211_local *local)
 static inline int drv_add_interface(struct ieee80211_local *local,
 				    struct ieee80211_vif *vif)
 {
-	int ret = local->ops->add_interface(&local->hw, vif);
+	int ret;
+
+	might_sleep();
+
+	ret = local->ops->add_interface(&local->hw, vif);
 	trace_drv_add_interface(local, vif_to_sdata(vif), ret);
 	return ret;
 }
@@ -46,13 +54,19 @@ static inline int drv_add_interface(struct ieee80211_local *local,
 static inline void drv_remove_interface(struct ieee80211_local *local,
 					struct ieee80211_vif *vif)
 {
+	might_sleep();
+
 	local->ops->remove_interface(&local->hw, vif);
 	trace_drv_remove_interface(local, vif_to_sdata(vif));
 }
 
 static inline int drv_config(struct ieee80211_local *local, u32 changed)
 {
-	int ret = local->ops->config(&local->hw, changed);
+	int ret;
+
+	might_sleep();
+
+	ret = local->ops->config(&local->hw, changed);
 	trace_drv_config(local, changed, ret);
 	return ret;
 }
@@ -62,6 +76,8 @@ static inline void drv_bss_info_changed(struct ieee80211_local *local,
 					struct ieee80211_bss_conf *info,
 					u32 changed)
 {
+	might_sleep();
+
 	if (local->ops->bss_info_changed)
 		local->ops->bss_info_changed(&local->hw, &sdata->vif, info, changed);
 	trace_drv_bss_info_changed(local, sdata, info, changed);
@@ -111,7 +127,11 @@ static inline int drv_set_key(struct ieee80211_local *local,
 			      struct ieee80211_sta *sta,
 			      struct ieee80211_key_conf *key)
 {
-	int ret = local->ops->set_key(&local->hw, cmd, &sdata->vif, sta, key);
+	int ret;
+
+	might_sleep();
+
+	ret = local->ops->set_key(&local->hw, cmd, &sdata->vif, sta, key);
 	trace_drv_set_key(local, cmd, sdata, sta, key, ret);
 	return ret;
 }
@@ -121,6 +141,8 @@ static inline void drv_update_tkip_key(struct ieee80211_local *local,
 				       const u8 *address, u32 iv32,
 				       u16 *phase1key)
 {
+	might_sleep();
+
 	if (local->ops->update_tkip_key)
 		local->ops->update_tkip_key(&local->hw, conf, address,
 					    iv32, phase1key);
@@ -130,13 +152,19 @@ static inline void drv_update_tkip_key(struct ieee80211_local *local,
 static inline int drv_hw_scan(struct ieee80211_local *local,
 			      struct cfg80211_scan_request *req)
 {
-	int ret = local->ops->hw_scan(&local->hw, req);
+	int ret;
+
+	might_sleep();
+
+	ret = local->ops->hw_scan(&local->hw, req);
 	trace_drv_hw_scan(local, req, ret);
 	return ret;
 }
 
 static inline void drv_sw_scan_start(struct ieee80211_local *local)
 {
+	might_sleep();
+
 	if (local->ops->sw_scan_start)
 		local->ops->sw_scan_start(&local->hw);
 	trace_drv_sw_scan_start(local);
@@ -144,6 +172,8 @@ static inline void drv_sw_scan_start(struct ieee80211_local *local)
 
 static inline void drv_sw_scan_complete(struct ieee80211_local *local)
 {
+	might_sleep();
+
 	if (local->ops->sw_scan_complete)
 		local->ops->sw_scan_complete(&local->hw);
 	trace_drv_sw_scan_complete(local);
@@ -154,6 +184,8 @@ static inline int drv_get_stats(struct ieee80211_local *local,
 {
 	int ret = -EOPNOTSUPP;
 
+	might_sleep();
+
 	if (local->ops->get_stats)
 		ret = local->ops->get_stats(&local->hw, stats);
 	trace_drv_get_stats(local, stats, ret);
@@ -173,6 +205,9 @@ static inline int drv_set_rts_threshold(struct ieee80211_local *local,
 					u32 value)
 {
 	int ret = 0;
+
+	might_sleep();
+
 	if (local->ops->set_rts_threshold)
 		ret = local->ops->set_rts_threshold(&local->hw, value);
 	trace_drv_set_rts_threshold(local, value, ret);
@@ -193,6 +228,9 @@ static inline int drv_conf_tx(struct ieee80211_local *local, u16 queue,
 			      const struct ieee80211_tx_queue_params *params)
 {
 	int ret = -EOPNOTSUPP;
+
+	might_sleep();
+
 	if (local->ops->conf_tx)
 		ret = local->ops->conf_tx(&local->hw, queue, params);
 	trace_drv_conf_tx(local, queue, params, ret);
@@ -210,6 +248,9 @@ static inline int drv_get_tx_stats(struct ieee80211_local *local,
 static inline u64 drv_get_tsf(struct ieee80211_local *local)
 {
 	u64 ret = -1ULL;
+
+	might_sleep();
+
 	if (local->ops->get_tsf)
 		ret = local->ops->get_tsf(&local->hw);
 	trace_drv_get_tsf(local, ret);
@@ -218,6 +259,8 @@ static inline u64 drv_get_tsf(struct ieee80211_local *local)
 
 static inline void drv_set_tsf(struct ieee80211_local *local, u64 tsf)
 {
+	might_sleep();
+
 	if (local->ops->set_tsf)
 		local->ops->set_tsf(&local->hw, tsf);
 	trace_drv_set_tsf(local, tsf);
@@ -225,6 +268,8 @@ static inline void drv_set_tsf(struct ieee80211_local *local, u64 tsf)
 
 static inline void drv_reset_tsf(struct ieee80211_local *local)
 {
+	might_sleep();
+
 	if (local->ops->reset_tsf)
 		local->ops->reset_tsf(&local->hw);
 	trace_drv_reset_tsf(local);
@@ -233,6 +278,9 @@ static inline void drv_reset_tsf(struct ieee80211_local *local)
 static inline int drv_tx_last_beacon(struct ieee80211_local *local)
 {
 	int ret = 1;
+
+	might_sleep();
+
 	if (local->ops->tx_last_beacon)
 		ret = local->ops->tx_last_beacon(&local->hw);
 	trace_drv_tx_last_beacon(local, ret);
@@ -256,12 +304,16 @@ static inline int drv_ampdu_action(struct ieee80211_local *local,
 
 static inline void drv_rfkill_poll(struct ieee80211_local *local)
 {
+	might_sleep();
+
 	if (local->ops->rfkill_poll)
 		local->ops->rfkill_poll(&local->hw);
 }
 
 static inline void drv_flush(struct ieee80211_local *local, bool drop)
 {
+	might_sleep();
+
 	trace_drv_flush(local, drop);
 	if (local->ops->flush)
 		local->ops->flush(&local->hw, drop);
-- 
cgit v1.2.2


From 1f3c8804acba841b5573b953f5560d2683d2db0d Mon Sep 17 00:00:00 2001
From: Andy Gospodarek <andy@greyhouse.net>
Date: Mon, 14 Dec 2009 10:48:58 +0000
Subject: bonding: allow arp_ip_targets on separate vlans to use arp validation

This allows a bond device to specify an arp_ip_target as a host that is
not on the same vlan as the base bond device and still use arp
validation.  A configuration like this, now works:

BONDING_OPTS="mode=active-backup arp_interval=1000 arp_ip_target=10.0.100.1 arp_validate=3"

1: lo: <LOOPBACK,UP,LOWER_UP> mtu 16436 qdisc noqueue
    link/loopback 00:00:00:00:00:00 brd 00:00:00:00:00:00
    inet 127.0.0.1/8 scope host lo
    inet6 ::1/128 scope host
       valid_lft forever preferred_lft forever
2: eth1: <BROADCAST,MULTICAST,SLAVE,UP,LOWER_UP> mtu 1500 qdisc pfifo_fast master bond0 qlen 1000
    link/ether 00:13:21:be:33:e9 brd ff:ff:ff:ff:ff:ff
3: eth0: <BROADCAST,MULTICAST,SLAVE,UP,LOWER_UP> mtu 1500 qdisc pfifo_fast master bond0 qlen 1000
    link/ether 00:13:21:be:33:e9 brd ff:ff:ff:ff:ff:ff
8: bond0: <BROADCAST,MULTICAST,MASTER,UP,LOWER_UP> mtu 1500 qdisc noqueue
    link/ether 00:13:21:be:33:e9 brd ff:ff:ff:ff:ff:ff
    inet6 fe80::213:21ff:febe:33e9/64 scope link
       valid_lft forever preferred_lft forever
9: bond0.100@bond0: <BROADCAST,MULTICAST,MASTER,UP,LOWER_UP> mtu 1500 qdisc noqueue
    link/ether 00:13:21:be:33:e9 brd ff:ff:ff:ff:ff:ff
    inet 10.0.100.2/24 brd 10.0.100.255 scope global bond0.100
    inet6 fe80::213:21ff:febe:33e9/64 scope link
       valid_lft forever preferred_lft forever

Ethernet Channel Bonding Driver: v3.6.0 (September 26, 2009)

Bonding Mode: fault-tolerance (active-backup)
Primary Slave: None
Currently Active Slave: eth1
MII Status: up
MII Polling Interval (ms): 0
Up Delay (ms): 0
Down Delay (ms): 0
ARP Polling Interval (ms): 1000
ARP IP target/s (n.n.n.n form): 10.0.100.1

Slave Interface: eth1
MII Status: up
Link Failure Count: 1
Permanent HW addr: 00:40:05:30:ff:30

Slave Interface: eth0
MII Status: up
Link Failure Count: 0
Permanent HW addr: 00:13:21:be:33:e9

Signed-off-by: Andy Gospodarek <andy@greyhouse.net>
Signed-off-by: Jay Vosburgh <fubar@us.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/8021q/vlan_core.c |  2 ++
 net/core/dev.c        | 20 +++++++++++++++++---
 2 files changed, 19 insertions(+), 3 deletions(-)

(limited to 'net')

diff --git a/net/8021q/vlan_core.c b/net/8021q/vlan_core.c
index e75a2f3b10af..c0316e0ca6e8 100644
--- a/net/8021q/vlan_core.c
+++ b/net/8021q/vlan_core.c
@@ -14,6 +14,7 @@ int __vlan_hwaccel_rx(struct sk_buff *skb, struct vlan_group *grp,
 	if (skb_bond_should_drop(skb))
 		goto drop;
 
+	skb->skb_iif = skb->dev->ifindex;
 	__vlan_hwaccel_put_tag(skb, vlan_tci);
 	skb->dev = vlan_group_get_device(grp, vlan_tci & VLAN_VID_MASK);
 
@@ -85,6 +86,7 @@ vlan_gro_common(struct napi_struct *napi, struct vlan_group *grp,
 	if (skb_bond_should_drop(skb))
 		goto drop;
 
+	skb->skb_iif = skb->dev->ifindex;
 	__vlan_hwaccel_put_tag(skb, vlan_tci);
 	skb->dev = vlan_group_get_device(grp, vlan_tci & VLAN_VID_MASK);
 
diff --git a/net/core/dev.c b/net/core/dev.c
index a8d68cdedbbe..f9aa699ab6cb 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -2495,12 +2495,26 @@ ncls:
 	if (!skb)
 		goto out;
 
+	/*
+	 * Make sure frames received on VLAN interfaces stacked on
+	 * bonding interfaces still make their way to any base bonding
+	 * device that may have registered for a specific ptype.  The
+	 * handler may have to adjust skb->dev and orig_dev.
+	 *
+	 * null_or_orig can be overloaded since it will not be set when
+	 * using VLANs on top of bonding.  Putting it here prevents
+	 * disturbing the ptype_all handlers above.
+	 */
+	if ((skb->dev->priv_flags & IFF_802_1Q_VLAN) &&
+	    (vlan_dev_real_dev(skb->dev)->priv_flags & IFF_BONDING)) {
+		null_or_orig = vlan_dev_real_dev(skb->dev);
+	}
+
 	type = skb->protocol;
 	list_for_each_entry_rcu(ptype,
 			&ptype_base[ntohs(type) & PTYPE_HASH_MASK], list) {
-		if (ptype->type == type &&
-		    (ptype->dev == null_or_orig || ptype->dev == skb->dev ||
-		     ptype->dev == orig_dev)) {
+		if (ptype->type == type && (ptype->dev == null_or_orig ||
+		     ptype->dev == skb->dev || ptype->dev == orig_dev)) {
 			if (pt_prev)
 				ret = deliver_skb(skb, pt_prev, orig_dev);
 			pt_prev = ptype;
-- 
cgit v1.2.2


From 20dd3850bcf860561496827b711fa10fecf6e787 Mon Sep 17 00:00:00 2001
From: Oliver Hartkopp <socketcan@hartkopp.net>
Date: Fri, 25 Dec 2009 06:47:47 +0000
Subject: can: Speed up CAN frame receiption by using ml_priv

this patch removes the hlist that contains the CAN receiver filter lists.
It uses the 'midlayer private' pointer ml_priv and links the filters directly
to the CAN netdevice, which allows to omit the walk through the complete CAN
devices hlist for each received CAN frame.

This patch is tested and does not remove any locking.

Signed-off-by: Oliver Hartkopp <oliver@hartkopp.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/can/af_can.c | 118 ++++++++++++++++---------------------------------------
 net/can/af_can.h |   4 +-
 net/can/proc.c   |  93 ++++++++++++++++++++++++++++---------------
 3 files changed, 96 insertions(+), 119 deletions(-)

(limited to 'net')

diff --git a/net/can/af_can.c b/net/can/af_can.c
index 51adc4c2b860..bc18b084ffdb 100644
--- a/net/can/af_can.c
+++ b/net/can/af_can.c
@@ -77,8 +77,8 @@ static int stats_timer __read_mostly = 1;
 module_param(stats_timer, int, S_IRUGO);
 MODULE_PARM_DESC(stats_timer, "enable timer for statistics (default:on)");
 
-HLIST_HEAD(can_rx_dev_list);
-static struct dev_rcv_lists can_rx_alldev_list;
+/* receive filters subscribed for 'all' CAN devices */
+struct dev_rcv_lists can_rx_alldev_list;
 static DEFINE_SPINLOCK(can_rcvlists_lock);
 
 static struct kmem_cache *rcv_cache __read_mostly;
@@ -292,28 +292,10 @@ EXPORT_SYMBOL(can_send);
 
 static struct dev_rcv_lists *find_dev_rcv_lists(struct net_device *dev)
 {
-	struct dev_rcv_lists *d = NULL;
-	struct hlist_node *n;
-
-	/*
-	 * find receive list for this device
-	 *
-	 * The hlist_for_each_entry*() macros curse through the list
-	 * using the pointer variable n and set d to the containing
-	 * struct in each list iteration.  Therefore, after list
-	 * iteration, d is unmodified when the list is empty, and it
-	 * points to last list element, when the list is non-empty
-	 * but no match in the loop body is found.  I.e. d is *not*
-	 * NULL when no match is found.  We can, however, use the
-	 * cursor variable n to decide if a match was found.
-	 */
-
-	hlist_for_each_entry_rcu(d, n, &can_rx_dev_list, list) {
-		if (d->dev == dev)
-			break;
-	}
-
-	return n ? d : NULL;
+	if (!dev)
+		return &can_rx_alldev_list;
+	else
+		return (struct dev_rcv_lists *)dev->ml_priv;
 }
 
 /**
@@ -467,16 +449,6 @@ int can_rx_register(struct net_device *dev, canid_t can_id, canid_t mask,
 }
 EXPORT_SYMBOL(can_rx_register);
 
-/*
- * can_rx_delete_device - rcu callback for dev_rcv_lists structure removal
- */
-static void can_rx_delete_device(struct rcu_head *rp)
-{
-	struct dev_rcv_lists *d = container_of(rp, struct dev_rcv_lists, rcu);
-
-	kfree(d);
-}
-
 /*
  * can_rx_delete_receiver - rcu callback for single receiver entry removal
  */
@@ -541,7 +513,6 @@ void can_rx_unregister(struct net_device *dev, canid_t can_id, canid_t mask,
 		       "dev %s, id %03X, mask %03X\n",
 		       DNAME(dev), can_id, mask);
 		r = NULL;
-		d = NULL;
 		goto out;
 	}
 
@@ -552,10 +523,10 @@ void can_rx_unregister(struct net_device *dev, canid_t can_id, canid_t mask,
 		can_pstats.rcv_entries--;
 
 	/* remove device structure requested by NETDEV_UNREGISTER */
-	if (d->remove_on_zero_entries && !d->entries)
-		hlist_del_rcu(&d->list);
-	else
-		d = NULL;
+	if (d->remove_on_zero_entries && !d->entries) {
+		kfree(d);
+		dev->ml_priv = NULL;
+	}
 
  out:
 	spin_unlock(&can_rcvlists_lock);
@@ -563,10 +534,6 @@ void can_rx_unregister(struct net_device *dev, canid_t can_id, canid_t mask,
 	/* schedule the receiver item for deletion */
 	if (r)
 		call_rcu(&r->rcu, can_rx_delete_receiver);
-
-	/* schedule the device structure for deletion */
-	if (d)
-		call_rcu(&d->rcu, can_rx_delete_device);
 }
 EXPORT_SYMBOL(can_rx_unregister);
 
@@ -780,48 +747,35 @@ static int can_notifier(struct notifier_block *nb, unsigned long msg,
 
 	case NETDEV_REGISTER:
 
-		/*
-		 * create new dev_rcv_lists for this device
-		 *
-		 * N.B. zeroing the struct is the correct initialization
-		 * for the embedded hlist_head structs.
-		 * Another list type, e.g. list_head, would require
-		 * explicit initialization.
-		 */
-
+		/* create new dev_rcv_lists for this device */
 		d = kzalloc(sizeof(*d), GFP_KERNEL);
 		if (!d) {
 			printk(KERN_ERR
 			       "can: allocation of receive list failed\n");
 			return NOTIFY_DONE;
 		}
-		d->dev = dev;
-
-		spin_lock(&can_rcvlists_lock);
-		hlist_add_head_rcu(&d->list, &can_rx_dev_list);
-		spin_unlock(&can_rcvlists_lock);
+		BUG_ON(dev->ml_priv);
+		dev->ml_priv = d;
 
 		break;
 
 	case NETDEV_UNREGISTER:
 		spin_lock(&can_rcvlists_lock);
 
-		d = find_dev_rcv_lists(dev);
+		d = dev->ml_priv;
 		if (d) {
-			if (d->entries) {
+			if (d->entries)
 				d->remove_on_zero_entries = 1;
-				d = NULL;
-			} else
-				hlist_del_rcu(&d->list);
+			else {
+				kfree(d);
+				dev->ml_priv = NULL;
+			}
 		} else
 			printk(KERN_ERR "can: notifier: receive list not "
 			       "found for dev %s\n", dev->name);
 
 		spin_unlock(&can_rcvlists_lock);
 
-		if (d)
-			call_rcu(&d->rcu, can_rx_delete_device);
-
 		break;
 	}
 
@@ -853,21 +807,13 @@ static __init int can_init(void)
 {
 	printk(banner);
 
+	memset(&can_rx_alldev_list, 0, sizeof(can_rx_alldev_list));
+
 	rcv_cache = kmem_cache_create("can_receiver", sizeof(struct receiver),
 				      0, 0, NULL);
 	if (!rcv_cache)
 		return -ENOMEM;
 
-	/*
-	 * Insert can_rx_alldev_list for reception on all devices.
-	 * This struct is zero initialized which is correct for the
-	 * embedded hlist heads, the dev pointer, and the entries counter.
-	 */
-
-	spin_lock(&can_rcvlists_lock);
-	hlist_add_head_rcu(&can_rx_alldev_list.list, &can_rx_dev_list);
-	spin_unlock(&can_rcvlists_lock);
-
 	if (stats_timer) {
 		/* the statistics are updated every second (timer triggered) */
 		setup_timer(&can_stattimer, can_stat_update, 0);
@@ -887,8 +833,7 @@ static __init int can_init(void)
 
 static __exit void can_exit(void)
 {
-	struct dev_rcv_lists *d;
-	struct hlist_node *n, *next;
+	struct net_device *dev;
 
 	if (stats_timer)
 		del_timer(&can_stattimer);
@@ -900,14 +845,19 @@ static __exit void can_exit(void)
 	unregister_netdevice_notifier(&can_netdev_notifier);
 	sock_unregister(PF_CAN);
 
-	/* remove can_rx_dev_list */
-	spin_lock(&can_rcvlists_lock);
-	hlist_del(&can_rx_alldev_list.list);
-	hlist_for_each_entry_safe(d, n, next, &can_rx_dev_list, list) {
-		hlist_del(&d->list);
-		kfree(d);
+	/* remove created dev_rcv_lists from still registered CAN devices */
+	rcu_read_lock();
+	for_each_netdev_rcu(&init_net, dev) {
+		if (dev->type == ARPHRD_CAN && dev->ml_priv){
+
+			struct dev_rcv_lists *d = dev->ml_priv;
+
+			BUG_ON(d->entries);
+			kfree(d);
+			dev->ml_priv = NULL;
+		}
 	}
-	spin_unlock(&can_rcvlists_lock);
+	rcu_read_unlock();
 
 	rcu_barrier(); /* Wait for completion of call_rcu()'s */
 
diff --git a/net/can/af_can.h b/net/can/af_can.h
index 18f91e37cc30..34253b84e30f 100644
--- a/net/can/af_can.h
+++ b/net/can/af_can.h
@@ -63,10 +63,8 @@ struct receiver {
 
 enum { RX_ERR, RX_ALL, RX_FIL, RX_INV, RX_EFF, RX_MAX };
 
+/* per device receive filters linked at dev->ml_priv */
 struct dev_rcv_lists {
-	struct hlist_node list;
-	struct rcu_head rcu;
-	struct net_device *dev;
 	struct hlist_head rx[RX_MAX];
 	struct hlist_head rx_sff[0x800];
 	int remove_on_zero_entries;
diff --git a/net/can/proc.c b/net/can/proc.c
index 9b9ad29be567..f4265cc9c3fb 100644
--- a/net/can/proc.c
+++ b/net/can/proc.c
@@ -45,6 +45,7 @@
 #include <linux/proc_fs.h>
 #include <linux/list.h>
 #include <linux/rcupdate.h>
+#include <linux/if_arp.h>
 #include <linux/can/core.h>
 
 #include "af_can.h"
@@ -84,6 +85,9 @@ static const char rx_list_name[][8] = {
 	[RX_EFF] = "rx_eff",
 };
 
+/* receive filters subscribed for 'all' CAN devices */
+extern struct dev_rcv_lists can_rx_alldev_list;
+
 /*
  * af_can statistics stuff
  */
@@ -190,10 +194,6 @@ void can_stat_update(unsigned long data)
 
 /*
  * proc read functions
- *
- * From known use-cases we expect about 10 entries in a receive list to be
- * printed in the proc_fs. So PAGE_SIZE is definitely enough space here.
- *
  */
 
 static void can_print_rcvlist(struct seq_file *m, struct hlist_head *rx_list,
@@ -202,7 +202,6 @@ static void can_print_rcvlist(struct seq_file *m, struct hlist_head *rx_list,
 	struct receiver *r;
 	struct hlist_node *n;
 
-	rcu_read_lock();
 	hlist_for_each_entry_rcu(r, n, rx_list, list) {
 		char *fmt = (r->can_id & CAN_EFF_FLAG)?
 			"   %-5s  %08X  %08x  %08x  %08x  %8ld  %s\n" :
@@ -212,7 +211,6 @@ static void can_print_rcvlist(struct seq_file *m, struct hlist_head *rx_list,
 				(unsigned long)r->func, (unsigned long)r->data,
 				r->matches, r->ident);
 	}
-	rcu_read_unlock();
 }
 
 static void can_print_recv_banner(struct seq_file *m)
@@ -346,24 +344,39 @@ static const struct file_operations can_version_proc_fops = {
 	.release	= single_release,
 };
 
+static inline void can_rcvlist_proc_show_one(struct seq_file *m, int idx,
+					     struct net_device *dev,
+					     struct dev_rcv_lists *d)
+{
+	if (!hlist_empty(&d->rx[idx])) {
+		can_print_recv_banner(m);
+		can_print_rcvlist(m, &d->rx[idx], dev);
+	} else
+		seq_printf(m, "  (%s: no entry)\n", DNAME(dev));
+
+}
+
 static int can_rcvlist_proc_show(struct seq_file *m, void *v)
 {
 	/* double cast to prevent GCC warning */
 	int idx = (int)(long)m->private;
+	struct net_device *dev;
 	struct dev_rcv_lists *d;
-	struct hlist_node *n;
 
 	seq_printf(m, "\nreceive list '%s':\n", rx_list_name[idx]);
 
 	rcu_read_lock();
-	hlist_for_each_entry_rcu(d, n, &can_rx_dev_list, list) {
 
-		if (!hlist_empty(&d->rx[idx])) {
-			can_print_recv_banner(m);
-			can_print_rcvlist(m, &d->rx[idx], d->dev);
-		} else
-			seq_printf(m, "  (%s: no entry)\n", DNAME(d->dev));
+	/* receive list for 'all' CAN devices (dev == NULL) */
+	d = &can_rx_alldev_list;
+	can_rcvlist_proc_show_one(m, idx, NULL, d);
+
+	/* receive list for registered CAN devices */
+	for_each_netdev_rcu(&init_net, dev) {
+		if (dev->type == ARPHRD_CAN && dev->ml_priv)
+			can_rcvlist_proc_show_one(m, idx, dev, dev->ml_priv);
 	}
+
 	rcu_read_unlock();
 
 	seq_putc(m, '\n');
@@ -383,34 +396,50 @@ static const struct file_operations can_rcvlist_proc_fops = {
 	.release	= single_release,
 };
 
+static inline void can_rcvlist_sff_proc_show_one(struct seq_file *m,
+						 struct net_device *dev,
+						 struct dev_rcv_lists *d)
+{
+	int i;
+	int all_empty = 1;
+
+	/* check wether at least one list is non-empty */
+	for (i = 0; i < 0x800; i++)
+		if (!hlist_empty(&d->rx_sff[i])) {
+			all_empty = 0;
+			break;
+		}
+
+	if (!all_empty) {
+		can_print_recv_banner(m);
+		for (i = 0; i < 0x800; i++) {
+			if (!hlist_empty(&d->rx_sff[i]))
+				can_print_rcvlist(m, &d->rx_sff[i], dev);
+		}
+	} else
+		seq_printf(m, "  (%s: no entry)\n", DNAME(dev));
+}
+
 static int can_rcvlist_sff_proc_show(struct seq_file *m, void *v)
 {
+	struct net_device *dev;
 	struct dev_rcv_lists *d;
-	struct hlist_node *n;
 
 	/* RX_SFF */
 	seq_puts(m, "\nreceive list 'rx_sff':\n");
 
 	rcu_read_lock();
-	hlist_for_each_entry_rcu(d, n, &can_rx_dev_list, list) {
-		int i, all_empty = 1;
-		/* check wether at least one list is non-empty */
-		for (i = 0; i < 0x800; i++)
-			if (!hlist_empty(&d->rx_sff[i])) {
-				all_empty = 0;
-				break;
-			}
-
-		if (!all_empty) {
-			can_print_recv_banner(m);
-			for (i = 0; i < 0x800; i++) {
-				if (!hlist_empty(&d->rx_sff[i]))
-					can_print_rcvlist(m, &d->rx_sff[i],
-							  d->dev);
-			}
-		} else
-			seq_printf(m, "  (%s: no entry)\n", DNAME(d->dev));
+
+	/* sff receive list for 'all' CAN devices (dev == NULL) */
+	d = &can_rx_alldev_list;
+	can_rcvlist_sff_proc_show_one(m, NULL, d);
+
+	/* sff receive list for registered CAN devices */
+	for_each_netdev_rcu(&init_net, dev) {
+		if (dev->type == ARPHRD_CAN && dev->ml_priv)
+			can_rcvlist_sff_proc_show_one(m, dev, dev->ml_priv);
 	}
+
 	rcu_read_unlock();
 
 	seq_putc(m, '\n');
-- 
cgit v1.2.2


From ee983ac76865797a5553597a9412c835c2710f51 Mon Sep 17 00:00:00 2001
From: Amerigo Wang <amwang@redhat.com>
Date: Thu, 24 Dec 2009 17:26:48 +0000
Subject: tipc: use kconfig to limit numeric ranges

We can rely on kconfig to limit these numbers,
no need to limit them at compile time/run time.

Users who modify these numbers manually should
be responsible for themself. :)

Signed-off-by: WANG Cong <amwang@redhat.com>
Cc: Per Liden <per.liden@ericsson.com>
Cc: Jon Maloy <jon.maloy@ericsson.com>
Cc: Allan Stephens <allan.stephens@windriver.com>
Cc: David S. Miller <davem@davemloft.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/tipc/Kconfig |  8 ++++++--
 net/tipc/core.c  | 10 +++++-----
 2 files changed, 11 insertions(+), 7 deletions(-)

(limited to 'net')

diff --git a/net/tipc/Kconfig b/net/tipc/Kconfig
index 3b30d1130b61..dafbd533067c 100644
--- a/net/tipc/Kconfig
+++ b/net/tipc/Kconfig
@@ -33,6 +33,7 @@ config TIPC_ADVANCED
 config TIPC_ZONES
 	int "Maximum number of zones in network"
 	depends on TIPC_ADVANCED
+	range 1 255
 	default "3"
 	help
 	 Max number of zones inside TIPC network. Max supported value 
@@ -44,10 +45,10 @@ config TIPC_ZONES
 config TIPC_CLUSTERS
 	int "Maximum number of clusters in a zone"
 	depends on TIPC_ADVANCED
+	range 1 1
 	default "1"
 	help
-          ***Only 1 (one cluster in a zone) is supported by current code.
-          Any value set here will be overridden.***
+          ***Only 1 (one cluster in a zone) is supported by current code.***
 
           (Max number of clusters inside TIPC zone. Max supported 
           value is 4095 clusters, minimum is 1.
@@ -59,6 +60,7 @@ config TIPC_CLUSTERS
 config TIPC_NODES
 	int "Maximum number of nodes in cluster"
 	depends on TIPC_ADVANCED
+	range 8 2047
 	default "255"
 	help
 	  Maximum number of nodes inside a TIPC cluster. Maximum 
@@ -70,6 +72,7 @@ config TIPC_NODES
 config TIPC_SLAVE_NODES
 	int "Maximum number of slave nodes in cluster"
 	depends on TIPC_ADVANCED
+	range 0 2047
 	default "0"
 	help
           ***This capability is not supported by current code.***
@@ -83,6 +86,7 @@ config TIPC_SLAVE_NODES
 config TIPC_PORTS
 	int "Maximum number of ports in a node"
 	depends on TIPC_ADVANCED
+	range 217 65536
 	default "8191"
 	help
 	  Maximum number of ports within a node. Maximum 
diff --git a/net/tipc/core.c b/net/tipc/core.c
index 3256bd7d398f..52c571fedbe0 100644
--- a/net/tipc/core.c
+++ b/net/tipc/core.c
@@ -189,11 +189,11 @@ static int __init tipc_init(void)
 	tipc_remote_management = 1;
 	tipc_max_publications = 10000;
 	tipc_max_subscriptions = 2000;
-	tipc_max_ports = delimit(CONFIG_TIPC_PORTS, 127, 65536);
-	tipc_max_zones = delimit(CONFIG_TIPC_ZONES, 1, 255);
-	tipc_max_clusters = delimit(CONFIG_TIPC_CLUSTERS, 1, 1);
-	tipc_max_nodes = delimit(CONFIG_TIPC_NODES, 8, 2047);
-	tipc_max_slaves = delimit(CONFIG_TIPC_SLAVE_NODES, 0, 2047);
+	tipc_max_ports = CONFIG_TIPC_PORTS;
+	tipc_max_zones = CONFIG_TIPC_ZONES;
+	tipc_max_clusters = CONFIG_TIPC_CLUSTERS;
+	tipc_max_nodes = CONFIG_TIPC_NODES;
+	tipc_max_slaves = CONFIG_TIPC_SLAVE_NODES;
 	tipc_net_id = 4711;
 
 	if ((res = tipc_core_start()))
-- 
cgit v1.2.2


From 71c3ebfdb27b50dcaef38b6f70da82b9142c5fb6 Mon Sep 17 00:00:00 2001
From: Julia Lawall <julia@diku.dk>
Date: Mon, 4 Jan 2010 15:21:31 +0100
Subject: netfilter: SNMP NAT: correct the size argument to kzalloc

obj has type struct snmp_object **, not struct snmp_object *.  But indeed
it is not even clear why kmalloc is needed.  The memory is freed by the end
of the function, so the local variable of pointer type should be sufficient.

The semantic patch that makes this change is as follows:
(http://coccinelle.lip6.fr/)

// <smpl>
@disable sizeof_type_expr@
type T;
T **x;
@@

  x =
  <+...sizeof(
- T
+ *x
  )...+>
// </smpl>

Signed-off-by: Julia Lawall <julia@diku.dk>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 net/ipv4/netfilter/nf_nat_snmp_basic.c | 31 +++++++++++--------------------
 1 file changed, 11 insertions(+), 20 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/netfilter/nf_nat_snmp_basic.c b/net/ipv4/netfilter/nf_nat_snmp_basic.c
index d9521f6f9ed0..0b9c7ce3d6c5 100644
--- a/net/ipv4/netfilter/nf_nat_snmp_basic.c
+++ b/net/ipv4/netfilter/nf_nat_snmp_basic.c
@@ -1038,7 +1038,7 @@ static int snmp_parse_mangle(unsigned char *msg,
 	unsigned int cls, con, tag, vers, pdutype;
 	struct asn1_ctx ctx;
 	struct asn1_octstr comm;
-	struct snmp_object **obj;
+	struct snmp_object *obj;
 
 	if (debug > 1)
 		hex_dump(msg, len);
@@ -1148,43 +1148,34 @@ static int snmp_parse_mangle(unsigned char *msg,
 	if (cls != ASN1_UNI || con != ASN1_CON || tag != ASN1_SEQ)
 		return 0;
 
-	obj = kmalloc(sizeof(struct snmp_object), GFP_ATOMIC);
-	if (obj == NULL) {
-		if (net_ratelimit())
-			printk(KERN_WARNING "OOM in bsalg(%d)\n", __LINE__);
-		return 0;
-	}
-
 	while (!asn1_eoc_decode(&ctx, eoc)) {
 		unsigned int i;
 
-		if (!snmp_object_decode(&ctx, obj)) {
-			if (*obj) {
-				kfree((*obj)->id);
-				kfree(*obj);
+		if (!snmp_object_decode(&ctx, &obj)) {
+			if (obj) {
+				kfree(obj->id);
+				kfree(obj);
 			}
-			kfree(obj);
 			return 0;
 		}
 
 		if (debug > 1) {
 			printk(KERN_DEBUG "bsalg: object: ");
-			for (i = 0; i < (*obj)->id_len; i++) {
+			for (i = 0; i < obj->id_len; i++) {
 				if (i > 0)
 					printk(".");
-				printk("%lu", (*obj)->id[i]);
+				printk("%lu", obj->id[i]);
 			}
-			printk(": type=%u\n", (*obj)->type);
+			printk(": type=%u\n", obj->type);
 
 		}
 
-		if ((*obj)->type == SNMP_IPADDR)
+		if (obj->type == SNMP_IPADDR)
 			mangle_address(ctx.begin, ctx.pointer - 4 , map, check);
 
-		kfree((*obj)->id);
-		kfree(*obj);
+		kfree(obj->id);
+		kfree(obj);
 	}
-	kfree(obj);
 
 	if (!asn1_eoc_decode(&ctx, eoc))
 		return 0;
-- 
cgit v1.2.2


From 89bc7a0f64de7bed2e0bc68a23d75699a610cd37 Mon Sep 17 00:00:00 2001
From: Jan Engelhardt <jengelh@medozas.de>
Date: Mon, 4 Jan 2010 16:26:03 +0100
Subject: netfilter: xt_recent: save 8 bytes per htable

Moving rnd_inited into the hole after the uint8 lets go of the uint32
rnd_inited was using, plus the padding that would follow the int group.

Signed-off-by: Jan Engelhardt <jengelh@medozas.de>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 net/netfilter/xt_hashlimit.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'net')

diff --git a/net/netfilter/xt_hashlimit.c b/net/netfilter/xt_hashlimit.c
index dd16e404424f..5bdc1fbf3ad7 100644
--- a/net/netfilter/xt_hashlimit.c
+++ b/net/netfilter/xt_hashlimit.c
@@ -81,13 +81,13 @@ struct xt_hashlimit_htable {
 	struct hlist_node node;		/* global list of all htables */
 	atomic_t use;
 	u_int8_t family;
+	bool rnd_initialized;
 
 	struct hashlimit_cfg1 cfg;	/* config */
 
 	/* used internally */
 	spinlock_t lock;		/* lock for list_head */
 	u_int32_t rnd;			/* random seed for hash */
-	int rnd_initialized;
 	unsigned int count;		/* number entries in table */
 	struct timer_list timer;	/* timer for gc */
 
@@ -150,7 +150,7 @@ dsthash_alloc_init(struct xt_hashlimit_htable *ht,
 	 * the first hashtable entry */
 	if (!ht->rnd_initialized) {
 		get_random_bytes(&ht->rnd, sizeof(ht->rnd));
-		ht->rnd_initialized = 1;
+		ht->rnd_initialized = true;
 	}
 
 	if (ht->cfg.max && ht->count >= ht->cfg.max) {
@@ -235,7 +235,7 @@ static int htable_create_v0(struct xt_hashlimit_info *minfo, u_int8_t family)
 	atomic_set(&hinfo->use, 1);
 	hinfo->count = 0;
 	hinfo->family = family;
-	hinfo->rnd_initialized = 0;
+	hinfo->rnd_initialized = false;
 	spin_lock_init(&hinfo->lock);
 	hinfo->pde = proc_create_data(minfo->name, 0,
 		(family == NFPROTO_IPV4) ?
@@ -296,7 +296,7 @@ static int htable_create(struct xt_hashlimit_mtinfo1 *minfo, u_int8_t family)
 	atomic_set(&hinfo->use, 1);
 	hinfo->count = 0;
 	hinfo->family = family;
-	hinfo->rnd_initialized = 0;
+	hinfo->rnd_initialized = false;
 	spin_lock_init(&hinfo->lock);
 
 	hinfo->pde = proc_create_data(minfo->name, 0,
-- 
cgit v1.2.2


From 5191d50192ec1281e51cbcb5248cb2667ff4d896 Mon Sep 17 00:00:00 2001
From: Jan Engelhardt <jengelh@medozas.de>
Date: Mon, 4 Jan 2010 16:27:25 +0100
Subject: netfilter: xtables: do not grab random bytes at __init

"It is deliberately not done in the init function, since we might not
have sufficient random while booting."

Signed-off-by: Jan Engelhardt <jengelh@medozas.de>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 net/netfilter/xt_NFQUEUE.c | 6 +++++-
 net/netfilter/xt_RATEEST.c | 7 ++++++-
 2 files changed, 11 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/netfilter/xt_NFQUEUE.c b/net/netfilter/xt_NFQUEUE.c
index f28f6a5fc02d..12dcd7007c3e 100644
--- a/net/netfilter/xt_NFQUEUE.c
+++ b/net/netfilter/xt_NFQUEUE.c
@@ -28,6 +28,7 @@ MODULE_ALIAS("ip6t_NFQUEUE");
 MODULE_ALIAS("arpt_NFQUEUE");
 
 static u32 jhash_initval __read_mostly;
+static bool rnd_inited __read_mostly;
 
 static unsigned int
 nfqueue_tg(struct sk_buff *skb, const struct xt_target_param *par)
@@ -90,6 +91,10 @@ static bool nfqueue_tg_v1_check(const struct xt_tgchk_param *par)
 	const struct xt_NFQ_info_v1 *info = par->targinfo;
 	u32 maxid;
 
+	if (unlikely(!rnd_inited)) {
+		get_random_bytes(&jhash_initval, sizeof(jhash_initval));
+		rnd_inited = true;
+	}
 	if (info->queues_total == 0) {
 		pr_err("NFQUEUE: number of total queues is 0\n");
 		return false;
@@ -135,7 +140,6 @@ static struct xt_target nfqueue_tg_reg[] __read_mostly = {
 
 static int __init nfqueue_tg_init(void)
 {
-	get_random_bytes(&jhash_initval, sizeof(jhash_initval));
 	return xt_register_targets(nfqueue_tg_reg, ARRAY_SIZE(nfqueue_tg_reg));
 }
 
diff --git a/net/netfilter/xt_RATEEST.c b/net/netfilter/xt_RATEEST.c
index d80b8192e0d4..87ae97e5516f 100644
--- a/net/netfilter/xt_RATEEST.c
+++ b/net/netfilter/xt_RATEEST.c
@@ -23,6 +23,7 @@ static DEFINE_MUTEX(xt_rateest_mutex);
 #define RATEEST_HSIZE	16
 static struct hlist_head rateest_hash[RATEEST_HSIZE] __read_mostly;
 static unsigned int jhash_rnd __read_mostly;
+static bool rnd_inited __read_mostly;
 
 static unsigned int xt_rateest_hash(const char *name)
 {
@@ -93,6 +94,11 @@ static bool xt_rateest_tg_checkentry(const struct xt_tgchk_param *par)
 		struct gnet_estimator	est;
 	} cfg;
 
+	if (unlikely(!rnd_inited)) {
+		get_random_bytes(&jhash_rnd, sizeof(jhash_rnd));
+		rnd_inited = true;
+	}
+
 	est = xt_rateest_lookup(info->name);
 	if (est) {
 		/*
@@ -164,7 +170,6 @@ static int __init xt_rateest_tg_init(void)
 	for (i = 0; i < ARRAY_SIZE(rateest_hash); i++)
 		INIT_HLIST_HEAD(&rateest_hash[i]);
 
-	get_random_bytes(&jhash_rnd, sizeof(jhash_rnd));
 	return xt_register_target(&xt_rateest_tg_reg);
 }
 
-- 
cgit v1.2.2


From 294188ae32f984a072c64c959354b2f6f52f80a7 Mon Sep 17 00:00:00 2001
From: Jan Engelhardt <jengelh@medozas.de>
Date: Mon, 4 Jan 2010 16:28:38 +0100
Subject: netfilter: xtables: obtain random bytes earlier, in checkentry

We can initialize the random hash bytes on checkentry. This is
preferable since it is outside the hot path.

Reference: http://bugzilla.netfilter.org/show_bug.cgi?id=621
Signed-off-by: Jan Engelhardt <jengelh@medozas.de>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 net/netfilter/xt_connlimit.c | 17 ++++++-----------
 net/netfilter/xt_recent.c    | 20 ++++++++------------
 2 files changed, 14 insertions(+), 23 deletions(-)

(limited to 'net')

diff --git a/net/netfilter/xt_connlimit.c b/net/netfilter/xt_connlimit.c
index 38f03f75a636..8103bef78e44 100644
--- a/net/netfilter/xt_connlimit.c
+++ b/net/netfilter/xt_connlimit.c
@@ -40,15 +40,11 @@ struct xt_connlimit_data {
 	spinlock_t lock;
 };
 
-static u_int32_t connlimit_rnd;
-static bool connlimit_rnd_inited;
+static u_int32_t connlimit_rnd __read_mostly;
+static bool connlimit_rnd_inited __read_mostly;
 
 static inline unsigned int connlimit_iphash(__be32 addr)
 {
-	if (unlikely(!connlimit_rnd_inited)) {
-		get_random_bytes(&connlimit_rnd, sizeof(connlimit_rnd));
-		connlimit_rnd_inited = true;
-	}
 	return jhash_1word((__force __u32)addr, connlimit_rnd) & 0xFF;
 }
 
@@ -59,11 +55,6 @@ connlimit_iphash6(const union nf_inet_addr *addr,
 	union nf_inet_addr res;
 	unsigned int i;
 
-	if (unlikely(!connlimit_rnd_inited)) {
-		get_random_bytes(&connlimit_rnd, sizeof(connlimit_rnd));
-		connlimit_rnd_inited = true;
-	}
-
 	for (i = 0; i < ARRAY_SIZE(addr->ip6); ++i)
 		res.ip6[i] = addr->ip6[i] & mask->ip6[i];
 
@@ -226,6 +217,10 @@ static bool connlimit_mt_check(const struct xt_mtchk_param *par)
 	struct xt_connlimit_info *info = par->matchinfo;
 	unsigned int i;
 
+	if (unlikely(!connlimit_rnd_inited)) {
+		get_random_bytes(&connlimit_rnd, sizeof(connlimit_rnd));
+		connlimit_rnd_inited = true;
+	}
 	if (nf_ct_l3proto_try_module_get(par->family) < 0) {
 		printk(KERN_WARNING "cannot load conntrack support for "
 		       "address family %u\n", par->family);
diff --git a/net/netfilter/xt_recent.c b/net/netfilter/xt_recent.c
index fc70a49c0afd..768d01ff1fea 100644
--- a/net/netfilter/xt_recent.c
+++ b/net/netfilter/xt_recent.c
@@ -90,25 +90,17 @@ static struct proc_dir_entry *recent_proc_dir;
 static const struct file_operations recent_old_fops, recent_mt_fops;
 #endif
 
-static u_int32_t hash_rnd;
-static bool hash_rnd_initted;
+static u_int32_t hash_rnd __read_mostly;
+static bool hash_rnd_inited __read_mostly;
 
-static unsigned int recent_entry_hash4(const union nf_inet_addr *addr)
+static inline unsigned int recent_entry_hash4(const union nf_inet_addr *addr)
 {
-	if (!hash_rnd_initted) {
-		get_random_bytes(&hash_rnd, sizeof(hash_rnd));
-		hash_rnd_initted = true;
-	}
 	return jhash_1word((__force u32)addr->ip, hash_rnd) &
 	       (ip_list_hash_size - 1);
 }
 
-static unsigned int recent_entry_hash6(const union nf_inet_addr *addr)
+static inline unsigned int recent_entry_hash6(const union nf_inet_addr *addr)
 {
-	if (!hash_rnd_initted) {
-		get_random_bytes(&hash_rnd, sizeof(hash_rnd));
-		hash_rnd_initted = true;
-	}
 	return jhash2((u32 *)addr->ip6, ARRAY_SIZE(addr->ip6), hash_rnd) &
 	       (ip_list_hash_size - 1);
 }
@@ -287,6 +279,10 @@ static bool recent_mt_check(const struct xt_mtchk_param *par)
 	unsigned i;
 	bool ret = false;
 
+	if (unlikely(!hash_rnd_inited)) {
+		get_random_bytes(&hash_rnd, sizeof(hash_rnd));
+		hash_rnd_inited = true;
+	}
 	if (hweight8(info->check_set &
 		     (XT_RECENT_SET | XT_RECENT_REMOVE |
 		      XT_RECENT_CHECK | XT_RECENT_UPDATE)) != 1)
-- 
cgit v1.2.2


From e5eb8bd9429ebd04cf906156d1fe40b52f88e82f Mon Sep 17 00:00:00 2001
From: "John W. Linville" <linville@tuxdriver.com>
Date: Mon, 4 Jan 2010 15:58:03 -0500
Subject: mac80211: fix typo added by "mac80211: fix propagation of failed..."

'Typo: it's "Hardware", not "Harware". Hmm, sometimes it's hairware :-)"'
	-- Holger Schurig

Reported-by: Holger Schurig <holgerschurig@googlemail.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/util.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/mac80211/util.c b/net/mac80211/util.c
index 7e38858a9280..29e1acca207c 100644
--- a/net/mac80211/util.c
+++ b/net/mac80211/util.c
@@ -1094,7 +1094,7 @@ int ieee80211_reconfig(struct ieee80211_local *local)
 		if (res) {
 			WARN(local->suspended, "Harware became unavailable "
 			     "upon resume. This is could be a software issue"
-			     "prior to suspend or a harware issue\n");
+			     "prior to suspend or a hardware issue\n");
 			return res;
 		}
 
-- 
cgit v1.2.2


From 6f7edb4881bf51300060e89915926e070ace8c4d Mon Sep 17 00:00:00 2001
From: "Catalin(ux) M. BOIE" <catab@embedromix.ro>
Date: Tue, 5 Jan 2010 05:50:24 +0100
Subject: IPVS: Allow boot time change of hash size

I was very frustrated about the fact that I have to recompile the kernel
to change the hash size. So, I created this patch.

If IPVS is built-in you can append ip_vs.conn_tab_bits=?? to kernel
command line, or, if you built IPVS as modules, you can add
options ip_vs conn_tab_bits=??.

To keep everything backward compatible, you still can select the size at
compile time, and that will be used as default.

It has been about a year since this patch was originally posted
and subsequently dropped on the basis of insufficient test data.

Mark Bergsma has provided the following test results which seem
to strongly support the need for larger hash table sizes:

We do however run into the same problem with the default setting (212 =
4096 entries), as most of our LVS balancers handle around a million
connections/SLAB entries at any point in time (around 100-150 kpps
load). With only 4096 hash table entries this implies that each entry
consists of a linked list of 256 connections *on average*.

To provide some statistics, I did an oprofile run on an 2.6.31 kernel,
with both the default 4096 table size, and the same kernel recompiled
with IP_VS_CONN_TAB_BITS set to 18 (218 = 262144 entries). I built a
quick test setup with a part of Wikimedia/Wikipedia's live traffic
mirrored by the switch to the test host.

With the default setting, at ~ 120 kpps packet load we saw a typical %si
CPU usage of around 30-35%, and oprofile reported a hot spot in
ip_vs_conn_in_get:

samples  %        image name               app name
symbol name
1719761  42.3741  ip_vs.ko                 ip_vs.ko      ip_vs_conn_in_get
302577    7.4554  bnx2                     bnx2          /bnx2
181984    4.4840  vmlinux                  vmlinux       __ticket_spin_lock
128636    3.1695  vmlinux                  vmlinux       ip_route_input
74345     1.8318  ip_vs.ko                 ip_vs.ko      ip_vs_conn_out_get
68482     1.6874  vmlinux                  vmlinux       mwait_idle

After loading the recompiled kernel with 218 entries, %si CPU usage
dropped in half to around 12-18%, and oprofile looks much healthier,
with only 7% spent in ip_vs_conn_in_get:

samples  %        image name               app name
symbol name
265641   14.4616  bnx2                     bnx2         /bnx2
143251    7.7986  vmlinux                  vmlinux      __ticket_spin_lock
140661    7.6576  ip_vs.ko                 ip_vs.ko     ip_vs_conn_in_get
94364     5.1372  vmlinux                  vmlinux      mwait_idle
86267     4.6964  vmlinux                  vmlinux      ip_route_input

[ horms@verge.net.au: trivial up-port and minor style fixes ]
Signed-off-by: Catalin(ux) M. BOIE <catab@embedromix.ro>
Cc: Mark Bergsma <mark@wikimedia.org>
Signed-off-by: Simon Horman <horms@verge.net.au>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 net/netfilter/ipvs/Kconfig      |  4 ++++
 net/netfilter/ipvs/ip_vs_conn.c | 42 ++++++++++++++++++++++++++++++-----------
 net/netfilter/ipvs/ip_vs_ctl.c  |  8 ++++----
 3 files changed, 39 insertions(+), 15 deletions(-)

(limited to 'net')

diff --git a/net/netfilter/ipvs/Kconfig b/net/netfilter/ipvs/Kconfig
index 79a698052218..c71e543d2549 100644
--- a/net/netfilter/ipvs/Kconfig
+++ b/net/netfilter/ipvs/Kconfig
@@ -68,6 +68,10 @@ config	IP_VS_TAB_BITS
 	  each hash entry uses 8 bytes, so you can estimate how much memory is
 	  needed for your box.
 
+	  You can overwrite this number setting conn_tab_bits module parameter
+	  or by appending ip_vs.conn_tab_bits=? to the kernel command line
+	  if IP VS was compiled built-in.
+
 comment "IPVS transport protocol load balancing support"
 
 config	IP_VS_PROTO_TCP
diff --git a/net/netfilter/ipvs/ip_vs_conn.c b/net/netfilter/ipvs/ip_vs_conn.c
index 27c30cf933da..60bb41a8d8d4 100644
--- a/net/netfilter/ipvs/ip_vs_conn.c
+++ b/net/netfilter/ipvs/ip_vs_conn.c
@@ -40,6 +40,21 @@
 #include <net/ip_vs.h>
 
 
+#ifndef CONFIG_IP_VS_TAB_BITS
+#define CONFIG_IP_VS_TAB_BITS	12
+#endif
+
+/*
+ * Connection hash size. Default is what was selected at compile time.
+*/
+int ip_vs_conn_tab_bits = CONFIG_IP_VS_TAB_BITS;
+module_param_named(conn_tab_bits, ip_vs_conn_tab_bits, int, 0444);
+MODULE_PARM_DESC(conn_tab_bits, "Set connections' hash size");
+
+/* size and mask values */
+int ip_vs_conn_tab_size;
+int ip_vs_conn_tab_mask;
+
 /*
  *  Connection hash table: for input and output packets lookups of IPVS
  */
@@ -125,11 +140,11 @@ static unsigned int ip_vs_conn_hashkey(int af, unsigned proto,
 	if (af == AF_INET6)
 		return jhash_3words(jhash(addr, 16, ip_vs_conn_rnd),
 				    (__force u32)port, proto, ip_vs_conn_rnd)
-			& IP_VS_CONN_TAB_MASK;
+			& ip_vs_conn_tab_mask;
 #endif
 	return jhash_3words((__force u32)addr->ip, (__force u32)port, proto,
 			    ip_vs_conn_rnd)
-		& IP_VS_CONN_TAB_MASK;
+		& ip_vs_conn_tab_mask;
 }
 
 
@@ -760,7 +775,7 @@ static void *ip_vs_conn_array(struct seq_file *seq, loff_t pos)
 	int idx;
 	struct ip_vs_conn *cp;
 
-	for(idx = 0; idx < IP_VS_CONN_TAB_SIZE; idx++) {
+	for (idx = 0; idx < ip_vs_conn_tab_size; idx++) {
 		ct_read_lock_bh(idx);
 		list_for_each_entry(cp, &ip_vs_conn_tab[idx], c_list) {
 			if (pos-- == 0) {
@@ -797,7 +812,7 @@ static void *ip_vs_conn_seq_next(struct seq_file *seq, void *v, loff_t *pos)
 	idx = l - ip_vs_conn_tab;
 	ct_read_unlock_bh(idx);
 
-	while (++idx < IP_VS_CONN_TAB_SIZE) {
+	while (++idx < ip_vs_conn_tab_size) {
 		ct_read_lock_bh(idx);
 		list_for_each_entry(cp, &ip_vs_conn_tab[idx], c_list) {
 			seq->private = &ip_vs_conn_tab[idx];
@@ -976,8 +991,8 @@ void ip_vs_random_dropentry(void)
 	/*
 	 * Randomly scan 1/32 of the whole table every second
 	 */
-	for (idx = 0; idx < (IP_VS_CONN_TAB_SIZE>>5); idx++) {
-		unsigned hash = net_random() & IP_VS_CONN_TAB_MASK;
+	for (idx = 0; idx < (ip_vs_conn_tab_size>>5); idx++) {
+		unsigned hash = net_random() & ip_vs_conn_tab_mask;
 
 		/*
 		 *  Lock is actually needed in this loop.
@@ -1029,7 +1044,7 @@ static void ip_vs_conn_flush(void)
 	struct ip_vs_conn *cp;
 
   flush_again:
-	for (idx=0; idx<IP_VS_CONN_TAB_SIZE; idx++) {
+	for (idx = 0; idx < ip_vs_conn_tab_size; idx++) {
 		/*
 		 *  Lock is actually needed in this loop.
 		 */
@@ -1060,10 +1075,15 @@ int __init ip_vs_conn_init(void)
 {
 	int idx;
 
+	/* Compute size and mask */
+	ip_vs_conn_tab_size = 1 << ip_vs_conn_tab_bits;
+	ip_vs_conn_tab_mask = ip_vs_conn_tab_size - 1;
+
 	/*
 	 * Allocate the connection hash table and initialize its list heads
 	 */
-	ip_vs_conn_tab = vmalloc(IP_VS_CONN_TAB_SIZE*sizeof(struct list_head));
+	ip_vs_conn_tab = vmalloc(ip_vs_conn_tab_size *
+				 sizeof(struct list_head));
 	if (!ip_vs_conn_tab)
 		return -ENOMEM;
 
@@ -1078,12 +1098,12 @@ int __init ip_vs_conn_init(void)
 
 	pr_info("Connection hash table configured "
 		"(size=%d, memory=%ldKbytes)\n",
-		IP_VS_CONN_TAB_SIZE,
-		(long)(IP_VS_CONN_TAB_SIZE*sizeof(struct list_head))/1024);
+		ip_vs_conn_tab_size,
+		(long)(ip_vs_conn_tab_size*sizeof(struct list_head))/1024);
 	IP_VS_DBG(0, "Each connection entry needs %Zd bytes at least\n",
 		  sizeof(struct ip_vs_conn));
 
-	for (idx = 0; idx < IP_VS_CONN_TAB_SIZE; idx++) {
+	for (idx = 0; idx < ip_vs_conn_tab_size; idx++) {
 		INIT_LIST_HEAD(&ip_vs_conn_tab[idx]);
 	}
 
diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c
index 6bde12da2fe0..93420ea8b0bc 100644
--- a/net/netfilter/ipvs/ip_vs_ctl.c
+++ b/net/netfilter/ipvs/ip_vs_ctl.c
@@ -1843,7 +1843,7 @@ static int ip_vs_info_seq_show(struct seq_file *seq, void *v)
 	if (v == SEQ_START_TOKEN) {
 		seq_printf(seq,
 			"IP Virtual Server version %d.%d.%d (size=%d)\n",
-			NVERSION(IP_VS_VERSION_CODE), IP_VS_CONN_TAB_SIZE);
+			NVERSION(IP_VS_VERSION_CODE), ip_vs_conn_tab_size);
 		seq_puts(seq,
 			 "Prot LocalAddress:Port Scheduler Flags\n");
 		seq_puts(seq,
@@ -2374,7 +2374,7 @@ do_ip_vs_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
 		char buf[64];
 
 		sprintf(buf, "IP Virtual Server version %d.%d.%d (size=%d)",
-			NVERSION(IP_VS_VERSION_CODE), IP_VS_CONN_TAB_SIZE);
+			NVERSION(IP_VS_VERSION_CODE), ip_vs_conn_tab_size);
 		if (copy_to_user(user, buf, strlen(buf)+1) != 0) {
 			ret = -EFAULT;
 			goto out;
@@ -2387,7 +2387,7 @@ do_ip_vs_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
 	{
 		struct ip_vs_getinfo info;
 		info.version = IP_VS_VERSION_CODE;
-		info.size = IP_VS_CONN_TAB_SIZE;
+		info.size = ip_vs_conn_tab_size;
 		info.num_services = ip_vs_num_services;
 		if (copy_to_user(user, &info, sizeof(info)) != 0)
 			ret = -EFAULT;
@@ -3231,7 +3231,7 @@ static int ip_vs_genl_get_cmd(struct sk_buff *skb, struct genl_info *info)
 	case IPVS_CMD_GET_INFO:
 		NLA_PUT_U32(msg, IPVS_INFO_ATTR_VERSION, IP_VS_VERSION_CODE);
 		NLA_PUT_U32(msg, IPVS_INFO_ATTR_CONN_TAB_SIZE,
-			    IP_VS_CONN_TAB_SIZE);
+			    ip_vs_conn_tab_size);
 		break;
 	}
 
-- 
cgit v1.2.2


From 29401f6630c02ce996561bf2667d84ecdfacb823 Mon Sep 17 00:00:00 2001
From: Jouni Malinen <jouni.malinen@atheros.com>
Date: Tue, 29 Dec 2009 12:43:58 +0200
Subject: mac80211: No need to include WEXT headers here

Remove the forgotten linux/wireless.h inclusion from mac80211.

Signed-off-by: Jouni Malinen <jouni.malinen@atheros.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/main.c | 1 -
 net/mac80211/scan.c | 1 -
 net/mac80211/util.c | 1 -
 3 files changed, 3 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/main.c b/net/mac80211/main.c
index d0a14d953f08..468829143991 100644
--- a/net/mac80211/main.c
+++ b/net/mac80211/main.c
@@ -17,7 +17,6 @@
 #include <linux/skbuff.h>
 #include <linux/etherdevice.h>
 #include <linux/if_arp.h>
-#include <linux/wireless.h>
 #include <linux/rtnetlink.h>
 #include <linux/bitmap.h>
 #include <linux/pm_qos_params.h>
diff --git a/net/mac80211/scan.c b/net/mac80211/scan.c
index 365f40975511..a4c63d4e6845 100644
--- a/net/mac80211/scan.c
+++ b/net/mac80211/scan.c
@@ -12,7 +12,6 @@
  * published by the Free Software Foundation.
  */
 
-#include <linux/wireless.h>
 #include <linux/if_arp.h>
 #include <linux/rtnetlink.h>
 #include <net/mac80211.h>
diff --git a/net/mac80211/util.c b/net/mac80211/util.c
index 29e1acca207c..4635d4e5af9e 100644
--- a/net/mac80211/util.c
+++ b/net/mac80211/util.c
@@ -18,7 +18,6 @@
 #include <linux/skbuff.h>
 #include <linux/etherdevice.h>
 #include <linux/if_arp.h>
-#include <linux/wireless.h>
 #include <linux/bitmap.h>
 #include <linux/crc32.h>
 #include <net/net_namespace.h>
-- 
cgit v1.2.2


From 1990ca6113399be9249433d5ab377a2a444f1dd8 Mon Sep 17 00:00:00 2001
From: Kalle Valo <kalle.valo@nokia.com>
Date: Wed, 30 Dec 2009 14:42:20 +0200
Subject: mac80211: fix a warning related to pointer conversion to u64 cookie

On a 32 bit system (in this case an omap 3430 system) gcc warned about
pointer conversion:

net/mac80211/work.c: In function 'ieee80211_remain_on_channel_timeout':
net/mac80211/work.c:534: warning: cast from pointer to integer of different size
net/mac80211/work.c: In function 'ieee80211_remain_done':
net/mac80211/work.c:1030: warning: cast from pointer to integer of different size
net/mac80211/work.c: In function 'ieee80211_wk_remain_on_channel':
net/mac80211/work.c:1056: warning: cast from pointer to integer of different size
net/mac80211/work.c: In function 'ieee80211_wk_cancel_remain_on_channel':
net/mac80211/work.c:1072: warning: cast from pointer to integer of different size

Fix it by casting the pointers to unsigned long instead. This makes the
compiler happy again.

Compile-tested only.

Cc: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: Kalle Valo <kalle.valo@nokia.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/work.c | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/work.c b/net/mac80211/work.c
index ea89ed70734d..5ba75990c9fd 100644
--- a/net/mac80211/work.c
+++ b/net/mac80211/work.c
@@ -531,9 +531,9 @@ ieee80211_remain_on_channel_timeout(struct ieee80211_work *wk)
 		wk->remain.started = true;
 		wk->timeout = jiffies + msecs_to_jiffies(wk->remain.duration);
 
-		cfg80211_ready_on_channel(wk->sdata->dev, (u64)wk, wk->chan,
-					  wk->chan_type, wk->remain.duration,
-					  GFP_KERNEL);
+		cfg80211_ready_on_channel(wk->sdata->dev, (unsigned long) wk,
+					  wk->chan, wk->chan_type,
+					  wk->remain.duration, GFP_KERNEL);
 
 		return WORK_ACT_NONE;
 	}
@@ -1027,7 +1027,7 @@ static enum work_done_result ieee80211_remain_done(struct ieee80211_work *wk,
 	/*
 	 * We are done serving the remain-on-channel command.
 	 */
-	cfg80211_remain_on_channel_expired(wk->sdata->dev, (u64)wk,
+	cfg80211_remain_on_channel_expired(wk->sdata->dev, (unsigned long) wk,
 					   wk->chan, wk->chan_type,
 					   GFP_KERNEL);
 
@@ -1053,7 +1053,7 @@ int ieee80211_wk_remain_on_channel(struct ieee80211_sub_if_data *sdata,
 
 	wk->remain.duration = duration;
 
-	*cookie = (u64)wk;
+	*cookie = (unsigned long) wk;
 
 	ieee80211_add_work(wk);
 
@@ -1069,7 +1069,7 @@ int ieee80211_wk_cancel_remain_on_channel(struct ieee80211_sub_if_data *sdata,
 
 	mutex_lock(&local->work_mtx);
 	list_for_each_entry_safe(wk, tmp, &local->work_list, list) {
-		if ((u64)wk == cookie) {
+		if ((unsigned long) wk == cookie) {
 			wk->timeout = jiffies;
 			found = true;
 			break;
-- 
cgit v1.2.2


From fc5f75773c0b3c5b44785e4efcc54c5f496211a9 Mon Sep 17 00:00:00 2001
From: Kalle Valo <kalle.valo@nokia.com>
Date: Wed, 30 Dec 2009 15:54:03 +0200
Subject: mac80211: fix ieee80211_change_mac() to use struct sockaddr

Setting the mac address from user space was buggy. For example, when
executing this command:

ip link set wlan0 address 00:1f:df:88:cd:55

mac80211 used the address 01:00:00:1f:df:88 instead. It was shifted two
bytes.

The reason was that the addr (type of void *) provided to
ieee80211_change_mac() is actually of type struct sockaddr, not just the
mac address array. Also the call to eth_mac_addr() expects the address to
be struct sockaddr.

Signed-off-by: Kalle Valo <kalle.valo@nokia.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/iface.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c
index 00a1f4ccdaf1..72189661fc49 100644
--- a/net/mac80211/iface.c
+++ b/net/mac80211/iface.c
@@ -63,15 +63,16 @@ static int ieee80211_change_mtu(struct net_device *dev, int new_mtu)
 static int ieee80211_change_mac(struct net_device *dev, void *addr)
 {
 	struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
+	struct sockaddr *sa = addr;
 	int ret;
 
 	if (ieee80211_sdata_running(sdata))
 		return -EBUSY;
 
-	ret = eth_mac_addr(dev, addr);
+	ret = eth_mac_addr(dev, sa);
 
 	if (ret == 0)
-		memcpy(sdata->vif.addr, addr, ETH_ALEN);
+		memcpy(sdata->vif.addr, sa->sa_data, ETH_ALEN);
 
 	return ret;
 }
-- 
cgit v1.2.2


From 4113f75187bfebccc54bf13c0ed09593023b53ca Mon Sep 17 00:00:00 2001
From: "Luis R. Rodriguez" <lrodriguez@atheros.com>
Date: Mon, 4 Jan 2010 11:50:11 -0500
Subject: cfg80211: add a regulatory debug print

Instead of sprinkling code with ifdef's define REG_DBG_PRINT() instead.

Signed-off-by: Luis R. Rodriguez <lrodriguez@atheros.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/wireless/reg.c | 32 ++++++++++++++++++--------------
 1 file changed, 18 insertions(+), 14 deletions(-)

(limited to 'net')

diff --git a/net/wireless/reg.c b/net/wireless/reg.c
index 87ea60d84c3c..ab29a6135d22 100644
--- a/net/wireless/reg.c
+++ b/net/wireless/reg.c
@@ -43,6 +43,15 @@
 #include "regdb.h"
 #include "nl80211.h"
 
+#ifdef CONFIG_CFG80211_REG_DEBUG
+#define REG_DBG_PRINT(args...) \
+	do { \
+		printk(KERN_DEBUG args); \
+	} while (0)
+#else
+#define REG_DBG_PRINT(args)
+#endif
+
 /* Receipt of information from last regulatory request */
 static struct regulatory_request *last_request;
 
@@ -972,25 +981,21 @@ static void handle_channel(struct wiphy *wiphy, enum ieee80211_band band,
 		if (r == -ERANGE &&
 		    last_request->initiator ==
 		    NL80211_REGDOM_SET_BY_COUNTRY_IE) {
-#ifdef CONFIG_CFG80211_REG_DEBUG
-			printk(KERN_DEBUG "cfg80211: Leaving channel %d MHz "
+			REG_DBG_PRINT("cfg80211: Leaving channel %d MHz "
 				"intact on %s - no rule found in band on "
 				"Country IE\n",
-				chan->center_freq, wiphy_name(wiphy));
-#endif
+			chan->center_freq, wiphy_name(wiphy));
 		} else {
 		/*
 		 * In this case we know the country IE has at least one reg rule
 		 * for the band so we respect its band definitions
 		 */
-#ifdef CONFIG_CFG80211_REG_DEBUG
 			if (last_request->initiator ==
 			    NL80211_REGDOM_SET_BY_COUNTRY_IE)
-				printk(KERN_DEBUG "cfg80211: Disabling "
+				REG_DBG_PRINT("cfg80211: Disabling "
 					"channel %d MHz on %s due to "
 					"Country IE\n",
 					chan->center_freq, wiphy_name(wiphy));
-#endif
 			flags |= IEEE80211_CHAN_DISABLED;
 			chan->flags = flags;
 		}
@@ -1870,13 +1875,12 @@ int regulatory_hint_found_beacon(struct wiphy *wiphy,
 	if (!reg_beacon)
 		return -ENOMEM;
 
-#ifdef CONFIG_CFG80211_REG_DEBUG
-	printk(KERN_DEBUG "cfg80211: Found new beacon on "
-		"frequency: %d MHz (Ch %d) on %s\n",
-		beacon_chan->center_freq,
-		ieee80211_frequency_to_channel(beacon_chan->center_freq),
-		wiphy_name(wiphy));
-#endif
+	REG_DBG_PRINT("cfg80211: Found new beacon on "
+		      "frequency: %d MHz (Ch %d) on %s\n",
+		      beacon_chan->center_freq,
+		      ieee80211_frequency_to_channel(beacon_chan->center_freq),
+		      wiphy_name(wiphy));
+
 	memcpy(&reg_beacon->chan, beacon_chan,
 		sizeof(struct ieee80211_channel));
 
-- 
cgit v1.2.2


From f3f66b69c8ff08b46975d9e99c7ecb92a8b12eda Mon Sep 17 00:00:00 2001
From: Christian Lamparter <chunkeey@googlemail.com>
Date: Mon, 4 Jan 2010 00:52:56 +0100
Subject: mac80211: fix ampdu_action tx_start ssn

The start_seq_num is taken from the station's tid_seq[tid].
This is fine, except tid_seq sequence counter is shifted
by 4 bits to accommodate for frame fragmentation.

Both (iwlagn & ath9k) were unaffected by this minor glitch,
because they don't read the *ssn for the AMPDU_TX_START action.

Signed-off-by: Christian Lamparter <chunkeey@googlemail.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/agg-tx.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/mac80211/agg-tx.c b/net/mac80211/agg-tx.c
index ceda36618d3c..5aa8f4a3ed17 100644
--- a/net/mac80211/agg-tx.c
+++ b/net/mac80211/agg-tx.c
@@ -301,7 +301,7 @@ int ieee80211_start_tx_ba_session(struct ieee80211_sta *pubsta, u16 tid)
 	 * call back right away, it must see that the flow has begun */
 	*state |= HT_ADDBA_REQUESTED_MSK;
 
-	start_seq_num = sta->tid_seq[tid];
+	start_seq_num = sta->tid_seq[tid] >> 4;
 
 	ret = drv_ampdu_action(local, sdata, IEEE80211_AMPDU_TX_START,
 			       pubsta, tid, &start_seq_num);
-- 
cgit v1.2.2


From 8271195e38d01f3551bb10b1c7de856cd8507882 Mon Sep 17 00:00:00 2001
From: "John W. Linville" <linville@tuxdriver.com>
Date: Tue, 5 Jan 2010 17:57:20 -0500
Subject: wireless: fix build breakage when CONFIG_CFG80211_REG_DEBUG not set

Bad macro definition in "cfg80211: add a regulatory debug print"...

Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/wireless/reg.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'net')

diff --git a/net/wireless/reg.c b/net/wireless/reg.c
index 89c3e68a1cc6..f79d6613c5ff 100644
--- a/net/wireless/reg.c
+++ b/net/wireless/reg.c
@@ -44,12 +44,12 @@
 #include "nl80211.h"
 
 #ifdef CONFIG_CFG80211_REG_DEBUG
-#define REG_DBG_PRINT(args...) \
+#define REG_DBG_PRINT(format, args...) \
 	do { \
-		printk(KERN_DEBUG args); \
+		printk(KERN_DEBUG format , ## args); \
 	} while (0)
 #else
-#define REG_DBG_PRINT(args)
+#define REG_DBG_PRINT(args...)
 #endif
 
 /* Receipt of information from last regulatory request */
-- 
cgit v1.2.2


From fc6a110754476362f9f4fa3199a637f2331c5993 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?R=C3=A9mi=20Denis-Courmont?= <remi.denis-courmont@nokia.com>
Date: Mon, 4 Jan 2010 02:02:47 +0000
Subject: Phonet: zero-copy aligned GPRS RX
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Newer Nokia cellular modems can use aligned payload for their GPRS pipe.

Signed-off-by: Rémi Denis-Courmont <remi.denis-courmont@nokia.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/phonet/pep-gprs.c | 4 ++--
 net/phonet/pep.c      | 3 +++
 2 files changed, 5 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/phonet/pep-gprs.c b/net/phonet/pep-gprs.c
index d183509d3fa6..d01208968c83 100644
--- a/net/phonet/pep-gprs.c
+++ b/net/phonet/pep-gprs.c
@@ -96,11 +96,11 @@ static int gprs_recv(struct gprs_dev *gp, struct sk_buff *skb)
 		goto drop;
 	}
 
-	if (likely(skb_headroom(skb) & 3)) {
+	if (skb_headroom(skb) & 3) {
 		struct sk_buff *rskb, *fs;
 		int flen = 0;
 
-		/* Phonet Pipe data header is misaligned (3 bytes),
+		/* Phonet Pipe data header may be misaligned (3 bytes),
 		 * so wrap the IP packet as a single fragment of an head-less
 		 * socket buffer. The network stack will pull what it needs,
 		 * but at least, the whole IP payload is not memcpy'd. */
diff --git a/net/phonet/pep.c b/net/phonet/pep.c
index b6356f3832f6..e23e30907d34 100644
--- a/net/phonet/pep.c
+++ b/net/phonet/pep.c
@@ -354,6 +354,9 @@ static int pipe_do_rcv(struct sock *sk, struct sk_buff *skb)
 		queue = &pn->ctrlreq_queue;
 		goto queue;
 
+	case PNS_PIPE_ALIGNED_DATA:
+		__skb_pull(skb, 1);
+		/* fall through */
 	case PNS_PIPE_DATA:
 		__skb_pull(skb, 3); /* Pipe data header */
 		if (!pn_flow_safe(pn->rx_fc)) {
-- 
cgit v1.2.2


From fea93ecef619b5779ca6984568517b1685079b05 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?R=C3=A9mi=20Denis-Courmont?= <remi.denis-courmont@nokia.com>
Date: Mon, 4 Jan 2010 02:02:48 +0000
Subject: Phonet: zero-copy GPRS TX
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Send aligned pipe payload if requested to do so. Then, the socket buffer
needs not be fragmented anymore.

Signed-off-by: Rémi Denis-Courmont <remi.denis-courmont@nokia.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/phonet/pep.c | 16 ++++++++++++++--
 1 file changed, 14 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/phonet/pep.c b/net/phonet/pep.c
index e23e30907d34..72db27e3fc06 100644
--- a/net/phonet/pep.c
+++ b/net/phonet/pep.c
@@ -444,6 +444,7 @@ static int pep_connreq_rcv(struct sock *sk, struct sk_buff *skb)
 	struct sockaddr_pn dst;
 	u16 peer_type;
 	u8 pipe_handle, enabled, n_sb;
+	u8 aligned = 0;
 
 	if (!pskb_pull(skb, sizeof(*hdr) + 4))
 		return -EINVAL;
@@ -482,6 +483,9 @@ static int pep_connreq_rcv(struct sock *sk, struct sk_buff *skb)
 				return -EINVAL;
 			peer_type = (peer_type & 0xff00) | data[0];
 			break;
+		case PN_PIPE_SB_ALIGNED_DATA:
+			aligned = data[0] != 0;
+			break;
 		}
 		n_sb--;
 	}
@@ -513,6 +517,7 @@ static int pep_connreq_rcv(struct sock *sk, struct sk_buff *skb)
 	newpn->rx_credits = 0;
 	newpn->rx_fc = newpn->tx_fc = PN_LEGACY_FLOW_CONTROL;
 	newpn->init_enable = enabled;
+	newpn->aligned = aligned;
 
 	BUG_ON(!skb_queue_empty(&newsk->sk_receive_queue));
 	skb_queue_head(&newsk->sk_receive_queue, skb);
@@ -832,11 +837,15 @@ static int pipe_skb_send(struct sock *sk, struct sk_buff *skb)
 		return -ENOBUFS;
 	}
 
-	skb_push(skb, 3);
+	skb_push(skb, 3 + pn->aligned);
 	skb_reset_transport_header(skb);
 	ph = pnp_hdr(skb);
 	ph->utid = 0;
-	ph->message_id = PNS_PIPE_DATA;
+	if (pn->aligned) {
+		ph->message_id = PNS_PIPE_ALIGNED_DATA;
+		ph->data[0] = 0; /* padding */
+	} else
+		ph->message_id = PNS_PIPE_DATA;
 	ph->pipe_handle = pn->pipe_handle;
 
 	return pn_skb_send(sk, skb, &pipe_srv);
@@ -930,6 +939,9 @@ int pep_write(struct sock *sk, struct sk_buff *skb)
 	struct sk_buff *rskb, *fs;
 	int flen = 0;
 
+	if (pep_sk(sk)->aligned)
+		return pipe_skb_send(sk, skb);
+
 	rskb = alloc_skb(MAX_PNPIPE_HEADER, GFP_ATOMIC);
 	if (!rskb) {
 		kfree_skb(skb);
-- 
cgit v1.2.2


From 82ecbcb9c6b5257929968d5b0ed536772a9b4ac5 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?R=C3=A9mi=20Denis-Courmont?= <remi.denis-courmont@nokia.com>
Date: Mon, 4 Jan 2010 02:02:49 +0000
Subject: Phonet: reject unsupported sendmsg/recvmsg flags
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: Rémi Denis-Courmont <remi.denis-courmont@nokia.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/phonet/datagram.c |  6 ++++--
 net/phonet/pep.c      | 10 +++++++++-
 2 files changed, 13 insertions(+), 3 deletions(-)

(limited to 'net')

diff --git a/net/phonet/datagram.c b/net/phonet/datagram.c
index 67f072e94d00..387197b579b1 100644
--- a/net/phonet/datagram.c
+++ b/net/phonet/datagram.c
@@ -75,7 +75,8 @@ static int pn_sendmsg(struct kiocb *iocb, struct sock *sk,
 	struct sk_buff *skb;
 	int err;
 
-	if (msg->msg_flags & MSG_OOB)
+	if (msg->msg_flags & ~(MSG_DONTWAIT|MSG_EOR|MSG_NOSIGNAL|
+				MSG_CMSG_COMPAT))
 		return -EOPNOTSUPP;
 
 	if (msg->msg_name == NULL)
@@ -119,7 +120,8 @@ static int pn_recvmsg(struct kiocb *iocb, struct sock *sk,
 	int rval = -EOPNOTSUPP;
 	int copylen;
 
-	if (flags & MSG_OOB)
+	if (flags & ~(MSG_PEEK|MSG_TRUNC|MSG_DONTWAIT|MSG_NOSIGNAL|
+			MSG_CMSG_COMPAT))
 		goto out_nofree;
 
 	if (addr_len)
diff --git a/net/phonet/pep.c b/net/phonet/pep.c
index 72db27e3fc06..360cf377693e 100644
--- a/net/phonet/pep.c
+++ b/net/phonet/pep.c
@@ -860,7 +860,9 @@ static int pep_sendmsg(struct kiocb *iocb, struct sock *sk,
 	int flags = msg->msg_flags;
 	int err, done;
 
-	if (msg->msg_flags & MSG_OOB || !(msg->msg_flags & MSG_EOR))
+	if ((msg->msg_flags & ~(MSG_DONTWAIT|MSG_EOR|MSG_NOSIGNAL|
+				MSG_CMSG_COMPAT)) ||
+			!(msg->msg_flags & MSG_EOR))
 		return -EOPNOTSUPP;
 
 	skb = sock_alloc_send_skb(sk, MAX_PNPIPE_HEADER + len,
@@ -981,6 +983,10 @@ static int pep_recvmsg(struct kiocb *iocb, struct sock *sk,
 	struct sk_buff *skb;
 	int err;
 
+	if (flags & ~(MSG_OOB|MSG_PEEK|MSG_TRUNC|MSG_DONTWAIT|MSG_WAITALL|
+			MSG_NOSIGNAL|MSG_CMSG_COMPAT))
+		return -EOPNOTSUPP;
+
 	if (unlikely(1 << sk->sk_state & (TCPF_LISTEN | TCPF_CLOSE)))
 		return -ENOTCONN;
 
@@ -988,6 +994,8 @@ static int pep_recvmsg(struct kiocb *iocb, struct sock *sk,
 		/* Dequeue and acknowledge control request */
 		struct pep_sock *pn = pep_sk(sk);
 
+		if (flags & MSG_PEEK)
+			return -EOPNOTSUPP;
 		skb = skb_dequeue(&pn->ctrlreq_queue);
 		if (skb) {
 			pep_ctrlreq_error(sk, skb, PN_PIPE_NO_ERROR,
-- 
cgit v1.2.2


From ca8d9ea30bc79b2965a1d169dcb2f48f02af4d2d Mon Sep 17 00:00:00 2001
From: Andy Gospodarek <andy@greyhouse.net>
Date: Wed, 6 Jan 2010 12:56:37 +0000
Subject: fix bonding: allow arp_ip_targets on separate vlans to use arp
 validation
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

On Wed, Jan 06, 2010 at 10:10:03PM +0100, Eric Dumazet wrote:
> Le 06/01/2010 19:38, Eric Dumazet a écrit :
> >
> > (net-next-2.6 doesnt work well on my bond/vlan setup, I suspect I need a bisection)
>
> David, I had to revert 1f3c8804acba841b5573b953f5560d2683d2db0d
> (bonding: allow arp_ip_targets on separate vlans to use arp validation)
>
> Or else, my vlan devices dont work (unfortunatly I dont have much time
> these days to debug the thing)
>
> My config :
>
>               +---------+
> vlan.103 -----+ bond0   +--- eth1 (bnx2)
>               |         +
> vlan.825 -----+         +--- eth2 (tg3)
>               +---------+
>
> $ cat /proc/net/bonding/bond0
> Ethernet Channel Bonding Driver: v3.6.0 (September 26, 2009)
>
> Bonding Mode: fault-tolerance (active-backup)
> Primary Slave: None
> Currently Active Slave: eth2
> MII Status: up
> MII Polling Interval (ms): 100
> Up Delay (ms): 0
> Down Delay (ms): 0
>
> Slave Interface: eth1  (bnx2)
> MII Status: down
> Link Failure Count: 1
> Permanent HW addr: 00:1e:0b:ec:d3:d2
>
> Slave Interface: eth2   (tg3)
> MII Status: up
> Link Failure Count: 0
> Permanent HW addr: 00:1e:0b:92:78:50
>

This patch fixes up a problem with found with commit
1f3c8804acba841b5573b953f5560d2683d2db0d.  The original change
overloaded null_or_orig, but doing that prevented any packet handlers
that were not tied to a specific device (i.e. ptype->dev == NULL) from
ever receiving any frames.

The null_or_orig variable cannot be overloaded, and must be kept as NULL
to prevent the frame from being ignored by packet handlers designed to
accept frames on any interface.

Signed-off-by: Andy Gospodarek <andy@greyhouse.net>
Signed-off-by: Jay Vosburgh <fubar@us.ibm.com>
Acked-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/dev.c | 11 +++++------
 1 file changed, 5 insertions(+), 6 deletions(-)

(limited to 'net')

diff --git a/net/core/dev.c b/net/core/dev.c
index f9aa699ab6cb..d9ab9be0c323 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -2430,6 +2430,7 @@ int netif_receive_skb(struct sk_buff *skb)
 	struct packet_type *ptype, *pt_prev;
 	struct net_device *orig_dev;
 	struct net_device *null_or_orig;
+	struct net_device *null_or_bond;
 	int ret = NET_RX_DROP;
 	__be16 type;
 
@@ -2500,21 +2501,19 @@ ncls:
 	 * bonding interfaces still make their way to any base bonding
 	 * device that may have registered for a specific ptype.  The
 	 * handler may have to adjust skb->dev and orig_dev.
-	 *
-	 * null_or_orig can be overloaded since it will not be set when
-	 * using VLANs on top of bonding.  Putting it here prevents
-	 * disturbing the ptype_all handlers above.
 	 */
+	null_or_bond = NULL;
 	if ((skb->dev->priv_flags & IFF_802_1Q_VLAN) &&
 	    (vlan_dev_real_dev(skb->dev)->priv_flags & IFF_BONDING)) {
-		null_or_orig = vlan_dev_real_dev(skb->dev);
+		null_or_bond = vlan_dev_real_dev(skb->dev);
 	}
 
 	type = skb->protocol;
 	list_for_each_entry_rcu(ptype,
 			&ptype_base[ntohs(type) & PTYPE_HASH_MASK], list) {
 		if (ptype->type == type && (ptype->dev == null_or_orig ||
-		     ptype->dev == skb->dev || ptype->dev == orig_dev)) {
+		     ptype->dev == skb->dev || ptype->dev == orig_dev ||
+		     ptype->dev == null_or_bond)) {
 			if (pt_prev)
 				ret = deliver_skb(skb, pt_prev, orig_dev);
 			pt_prev = ptype;
-- 
cgit v1.2.2


From 65324144b50bc7022cc9b6ca8f4a536a957019e3 Mon Sep 17 00:00:00 2001
From: Jesper Dangaard Brouer <hawk@comx.dk>
Date: Tue, 5 Jan 2010 05:50:47 +0000
Subject: net: RFC3069, private VLAN proxy arp support

This is to be used together with switch technologies, like RFC3069,
that where the individual ports are not allowed to communicate with
each other, but they are allowed to talk to the upstream router.  As
described in RFC 3069, it is possible to allow these hosts to
communicate through the upstream router by proxy_arp'ing.

This patch basically allow proxy arp replies back to the same
interface (from which the ARP request/solicitation was received).

Tunable per device via proc "proxy_arp_pvlan":
  /proc/sys/net/ipv4/conf/*/proxy_arp_pvlan

This switch technology is known by different vendor names:
 - In RFC 3069 it is called VLAN Aggregation.
 - Cisco and Allied Telesyn call it Private VLAN.
 - Hewlett-Packard call it Source-Port filtering or port-isolation.
 - Ericsson call it MAC-Forced Forwarding (RFC Draft).

Signed-off-by: Jesper Dangaard Brouer <hawk@comx.dk>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/arp.c     | 52 ++++++++++++++++++++++++++++++++++++++++++++++++----
 net/ipv4/devinet.c |  1 +
 net/ipv4/route.c   |  7 ++++++-
 3 files changed, 55 insertions(+), 5 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c
index c95cd93acf29..078709233bc4 100644
--- a/net/ipv4/arp.c
+++ b/net/ipv4/arp.c
@@ -70,6 +70,7 @@
  *					bonding can change the skb before
  *					sending (e.g. insert 8021q tag).
  *		Harald Welte	:	convert to make use of jenkins hash
+ *		Jesper D. Brouer:       Proxy ARP PVLAN RFC 3069 support.
  */
 
 #include <linux/module.h>
@@ -524,12 +525,15 @@ int arp_bind_neighbour(struct dst_entry *dst)
 /*
  * Check if we can use proxy ARP for this path
  */
-
-static inline int arp_fwd_proxy(struct in_device *in_dev, struct rtable *rt)
+static inline int arp_fwd_proxy(struct in_device *in_dev,
+				struct net_device *dev,	struct rtable *rt)
 {
 	struct in_device *out_dev;
 	int imi, omi = -1;
 
+	if (rt->u.dst.dev == dev)
+		return 0;
+
 	if (!IN_DEV_PROXY_ARP(in_dev))
 		return 0;
 
@@ -547,6 +551,43 @@ static inline int arp_fwd_proxy(struct in_device *in_dev, struct rtable *rt)
 	return (omi != imi && omi != -1);
 }
 
+/*
+ * Check for RFC3069 proxy arp private VLAN (allow to send back to same dev)
+ *
+ * RFC3069 supports proxy arp replies back to the same interface.  This
+ * is done to support (ethernet) switch features, like RFC 3069, where
+ * the individual ports are not allowed to communicate with each
+ * other, BUT they are allowed to talk to the upstream router.  As
+ * described in RFC 3069, it is possible to allow these hosts to
+ * communicate through the upstream router, by proxy_arp'ing.
+ *
+ * RFC 3069: "VLAN Aggregation for Efficient IP Address Allocation"
+ *
+ *  This technology is known by different names:
+ *    In RFC 3069 it is called VLAN Aggregation.
+ *    Cisco and Allied Telesyn call it Private VLAN.
+ *    Hewlett-Packard call it Source-Port filtering or port-isolation.
+ *    Ericsson call it MAC-Forced Forwarding (RFC Draft).
+ *
+ */
+static inline int arp_fwd_pvlan(struct in_device *in_dev,
+				struct net_device *dev,	struct rtable *rt,
+				__be32 sip, __be32 tip)
+{
+	/* Private VLAN is only concerned about the same ethernet segment */
+	if (rt->u.dst.dev != dev)
+		return 0;
+
+	/* Don't reply on self probes (often done by windowz boxes)*/
+	if (sip == tip)
+		return 0;
+
+	if (IN_DEV_PROXY_ARP_PVLAN(in_dev))
+		return 1;
+	else
+		return 0;
+}
+
 /*
  *	Interface to link layer: send routine and receive handler.
  */
@@ -833,8 +874,11 @@ static int arp_process(struct sk_buff *skb)
 			}
 			goto out;
 		} else if (IN_DEV_FORWARD(in_dev)) {
-			    if (addr_type == RTN_UNICAST  && rt->u.dst.dev != dev &&
-			     (arp_fwd_proxy(in_dev, rt) || pneigh_lookup(&arp_tbl, net, &tip, dev, 0))) {
+			if (addr_type == RTN_UNICAST  &&
+			    (arp_fwd_proxy(in_dev, dev, rt) ||
+			     arp_fwd_pvlan(in_dev, dev, rt, sip, tip) ||
+			     pneigh_lookup(&arp_tbl, net, &tip, dev, 0)))
+			{
 				n = neigh_event_ns(&arp_tbl, sha, &sip, dev);
 				if (n)
 					neigh_release(n);
diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index 5cdbc102a418..0715f4cac391 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -1407,6 +1407,7 @@ static struct devinet_sysctl_table {
 		DEVINET_SYSCTL_RW_ENTRY(ARP_IGNORE, "arp_ignore"),
 		DEVINET_SYSCTL_RW_ENTRY(ARP_ACCEPT, "arp_accept"),
 		DEVINET_SYSCTL_RW_ENTRY(ARP_NOTIFY, "arp_notify"),
+		DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP_PVLAN, "proxy_arp_pvlan"),
 
 		DEVINET_SYSCTL_FLUSHING_ENTRY(NOXFRM, "disable_xfrm"),
 		DEVINET_SYSCTL_FLUSHING_ENTRY(NOPOLICY, "disable_policy"),
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index e446496f564f..1cc339441e7d 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -1988,8 +1988,13 @@ static int __mkroute_input(struct sk_buff *skb,
 	if (skb->protocol != htons(ETH_P_IP)) {
 		/* Not IP (i.e. ARP). Do not create route, if it is
 		 * invalid for proxy arp. DNAT routes are always valid.
+		 *
+		 * Proxy arp feature have been extended to allow, ARP
+		 * replies back to the same interface, to support
+		 * Private VLAN switch technologies. See arp.c.
 		 */
-		if (out_dev == in_dev) {
+		if (out_dev == in_dev &&
+		    IN_DEV_PROXY_ARP_PVLAN(in_dev) == 0) {
 			err = -EINVAL;
 			goto cleanup;
 		}
-- 
cgit v1.2.2


From 2d13bafeba24f732e89b818b8c66b07893457570 Mon Sep 17 00:00:00 2001
From: Jesper Dangaard Brouer <hawk@comx.dk>
Date: Tue, 5 Jan 2010 05:50:52 +0000
Subject: net: Make it easier to parse /proc/net/dev contents.

The contents of /proc/net/dev is annoying to parse, because
it changes whether there is a space after the "ethX:" or not.
It depends upon the size of the "Receive bytes" counter,
if the number is below 7 digits, then there is whitespaces
else if the number is 8 digits or above there is no space
between the ":" and the number.

This patch changes the output to assure there is always a space
between the ":" and the number.  Given that all existing userspace
application already need to handle the whitespaces, I see
no breakage of existing tools.

Signed-off-by: Jesper Dangaard Brouer <hawk@comx.dk>
Acked-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/dev.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/core/dev.c b/net/core/dev.c
index d9ab9be0c323..a008f6987a95 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -3206,7 +3206,7 @@ static void dev_seq_printf_stats(struct seq_file *seq, struct net_device *dev)
 {
 	const struct net_device_stats *stats = dev_get_stats(dev);
 
-	seq_printf(seq, "%6s:%8lu %7lu %4lu %4lu %4lu %5lu %10lu %9lu "
+	seq_printf(seq, "%6s: %7lu %7lu %4lu %4lu %4lu %5lu %10lu %9lu "
 		   "%8lu %7lu %4lu %4lu %4lu %5lu %7lu %10lu\n",
 		   dev->name, stats->rx_bytes, stats->rx_packets,
 		   stats->rx_errors,
-- 
cgit v1.2.2


From 5856b606e69d3e4dc2d718b475e216eb30ee2006 Mon Sep 17 00:00:00 2001
From: Joe Perches <joe@perches.com>
Date: Fri, 8 Jan 2010 00:59:52 -0800
Subject: net/ipv6/tcp_ipv6.c: Use compressed IPv6 address

Use "[compressed ipv6]:port" form suggested by:
http://tools.ietf.org/id/draft-ietf-6man-text-addr-representation-03.txt

Signed-off-by: Joe Perches <joe@perches.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/tcp_ipv6.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index febfd595a40d..1c832bf198b3 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -876,7 +876,7 @@ static int tcp_v6_inbound_md5_hash (struct sock *sk, struct sk_buff *skb)
 
 	if (genhash || memcmp(hash_location, newhash, 16) != 0) {
 		if (net_ratelimit()) {
-			printk(KERN_INFO "MD5 Hash %s for (%pI6, %u)->(%pI6, %u)\n",
+			printk(KERN_INFO "MD5 Hash %s for [%pI6c]:%u->[%pI6c]:%u\n",
 			       genhash ? "failed" : "mismatch",
 			       &ip6h->saddr, ntohs(th->source),
 			       &ip6h->daddr, ntohs(th->dest));
-- 
cgit v1.2.2


From c3f6c21d6e60e39c81c27f18ebad9c7615eff545 Mon Sep 17 00:00:00 2001
From: Jiri Slaby <jslaby@suse.cz>
Date: Sun, 10 Jan 2010 00:43:03 +0000
Subject: NET: ipv6, remove unnecessary check

Stanse found a potential null dereference in snmp6_unregister_dev.
There is a check for idev being NULL, but it is dereferenced
earlier. But idev cannot be NULL when passed to
snmp6_unregister_dev, so remove the test.

Signed-off-by: Jiri Slaby <jslaby@suse.cz>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: Alexey Kuznetsov <kuznet@ms2.inr.ac.ru>
Cc: "Pekka Savola (ipv6)" <pekkas@netcore.fi>
Cc: James Morris <jmorris@namei.org>
Cc: Hideaki YOSHIFUJI <yoshfuji@linux-ipv6.org>
Cc: Patrick McHardy <kaber@trash.net>
Cc: netdev@vger.kernel.org
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/proc.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/ipv6/proc.c b/net/ipv6/proc.c
index c9605c3ad91f..7b197b7132e0 100644
--- a/net/ipv6/proc.c
+++ b/net/ipv6/proc.c
@@ -259,7 +259,7 @@ int snmp6_unregister_dev(struct inet6_dev *idev)
 	struct net *net = dev_net(idev->dev);
 	if (!net->mib.proc_net_devsnmp6)
 		return -ENOENT;
-	if (!idev || !idev->stats.proc_dir_entry)
+	if (!idev->stats.proc_dir_entry)
 		return -EINVAL;
 	remove_proc_entry(idev->stats.proc_dir_entry->name,
 			  net->mib.proc_net_devsnmp6);
-- 
cgit v1.2.2


From c026ca581f22497c42cbe7a2004fa0d5f1bd6c42 Mon Sep 17 00:00:00 2001
From: Jiri Slaby <jslaby@suse.cz>
Date: Sun, 10 Jan 2010 01:17:34 +0000
Subject: NET: irda, remove unnecessary checks

Stanse found a potential null dereference in ircomm_tty_close
and ircomm_tty_hangup. There is a check for tty being NULL,
but it is dereferenced earlier. But it is bogus, the tty cannot
be NULL, so remove the !tty checks.

Signed-off-by: Jiri Slaby <jslaby@suse.cz>
Cc: Samuel Ortiz <samuel@sortiz.org>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: Alan Cox <alan@linux.intel.com>
Cc: Greg Kroah-Hartman <gregkh@suse.de>
Cc: netdev@vger.kernel.org
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/irda/ircomm/ircomm_tty.c | 6 ------
 1 file changed, 6 deletions(-)

(limited to 'net')

diff --git a/net/irda/ircomm/ircomm_tty.c b/net/irda/ircomm/ircomm_tty.c
index 811984d9324b..8b85d774e47f 100644
--- a/net/irda/ircomm/ircomm_tty.c
+++ b/net/irda/ircomm/ircomm_tty.c
@@ -496,9 +496,6 @@ static void ircomm_tty_close(struct tty_struct *tty, struct file *filp)
 
 	IRDA_DEBUG(0, "%s()\n", __func__ );
 
-	if (!tty)
-		return;
-
 	IRDA_ASSERT(self != NULL, return;);
 	IRDA_ASSERT(self->magic == IRCOMM_TTY_MAGIC, return;);
 
@@ -1007,9 +1004,6 @@ static void ircomm_tty_hangup(struct tty_struct *tty)
 	IRDA_ASSERT(self != NULL, return;);
 	IRDA_ASSERT(self->magic == IRCOMM_TTY_MAGIC, return;);
 
-	if (!tty)
-		return;
-
 	/* ircomm_tty_flush_buffer(tty); */
 	ircomm_tty_shutdown(self);
 
-- 
cgit v1.2.2


From c299bd53aa2616e6afc304b4f848186af3b3a881 Mon Sep 17 00:00:00 2001
From: Joe Perches <joe@perches.com>
Date: Mon, 11 Jan 2010 11:49:51 +0100
Subject: netfilter: nf_nat_ftp: remove (*mangle[]) array and functions, use
 %pI4

These functions merely exist to format a buffer and call
nf_nat_mangle_tcp_packet.

Format the buffer and perform the call in nf_nat_ftp instead.

Use %pI4 for the IP address.

Saves ~600 bytes of text

old:
$ size net/ipv4/netfilter/nf_nat_ftp.o
   text	   data	    bss	    dec	    hex	filename
   2187	    160	    408	   2755	    ac3	net/ipv4/netfilter/nf_nat_ftp.o
new:
$ size net/ipv4/netfilter/nf_nat_ftp.o
   text    data     bss     dec     hex filename
   1532     112     288    1932     78c net/ipv4/netfilter/nf_nat_ftp.o

Signed-off-by: Joe Perches <joe@perches.com>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 net/ipv4/netfilter/nf_nat_ftp.c | 105 ++++++++++++++--------------------------
 1 file changed, 35 insertions(+), 70 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/netfilter/nf_nat_ftp.c b/net/ipv4/netfilter/nf_nat_ftp.c
index a1d5d58a58bf..86e0e84ff0a0 100644
--- a/net/ipv4/netfilter/nf_nat_ftp.c
+++ b/net/ipv4/netfilter/nf_nat_ftp.c
@@ -27,76 +27,29 @@ MODULE_ALIAS("ip_nat_ftp");
 
 /* FIXME: Time out? --RR */
 
-static int
-mangle_rfc959_packet(struct sk_buff *skb,
-		     __be32 newip,
-		     u_int16_t port,
-		     unsigned int matchoff,
-		     unsigned int matchlen,
-		     struct nf_conn *ct,
-		     enum ip_conntrack_info ctinfo)
+static int nf_nat_ftp_fmt_cmd(enum nf_ct_ftp_type type,
+			      char *buffer, size_t buflen,
+			      __be32 addr, u16 port)
 {
-	char buffer[sizeof("nnn,nnn,nnn,nnn,nnn,nnn")];
-
-	sprintf(buffer, "%u,%u,%u,%u,%u,%u",
-		NIPQUAD(newip), port>>8, port&0xFF);
-
-	pr_debug("calling nf_nat_mangle_tcp_packet\n");
-
-	return nf_nat_mangle_tcp_packet(skb, ct, ctinfo, matchoff,
-					matchlen, buffer, strlen(buffer));
-}
-
-/* |1|132.235.1.2|6275| */
-static int
-mangle_eprt_packet(struct sk_buff *skb,
-		   __be32 newip,
-		   u_int16_t port,
-		   unsigned int matchoff,
-		   unsigned int matchlen,
-		   struct nf_conn *ct,
-		   enum ip_conntrack_info ctinfo)
-{
-	char buffer[sizeof("|1|255.255.255.255|65535|")];
-
-	sprintf(buffer, "|1|%u.%u.%u.%u|%u|", NIPQUAD(newip), port);
-
-	pr_debug("calling nf_nat_mangle_tcp_packet\n");
-
-	return nf_nat_mangle_tcp_packet(skb, ct, ctinfo, matchoff,
-					matchlen, buffer, strlen(buffer));
-}
-
-/* |1|132.235.1.2|6275| */
-static int
-mangle_epsv_packet(struct sk_buff *skb,
-		   __be32 newip,
-		   u_int16_t port,
-		   unsigned int matchoff,
-		   unsigned int matchlen,
-		   struct nf_conn *ct,
-		   enum ip_conntrack_info ctinfo)
-{
-	char buffer[sizeof("|||65535|")];
-
-	sprintf(buffer, "|||%u|", port);
-
-	pr_debug("calling nf_nat_mangle_tcp_packet\n");
+	switch (type) {
+	case NF_CT_FTP_PORT:
+	case NF_CT_FTP_PASV:
+		return snprintf(buffer, buflen, "%u,%u,%u,%u,%u,%u",
+				((unsigned char *)&addr)[0],
+				((unsigned char *)&addr)[1],
+				((unsigned char *)&addr)[2],
+				((unsigned char *)&addr)[3],
+				port >> 8,
+				port & 0xFF);
+	case NF_CT_FTP_EPRT:
+		return snprintf(buffer, buflen, "|1|%pI4|%u|", &addr, port);
+	case NF_CT_FTP_EPSV:
+		return snprintf(buffer, buflen, "|||%u|", port);
+	}
 
-	return nf_nat_mangle_tcp_packet(skb, ct, ctinfo, matchoff,
-					matchlen, buffer, strlen(buffer));
+	return 0;
 }
 
-static int (*mangle[])(struct sk_buff *, __be32, u_int16_t,
-		       unsigned int, unsigned int, struct nf_conn *,
-		       enum ip_conntrack_info)
-= {
-	[NF_CT_FTP_PORT] = mangle_rfc959_packet,
-	[NF_CT_FTP_PASV] = mangle_rfc959_packet,
-	[NF_CT_FTP_EPRT] = mangle_eprt_packet,
-	[NF_CT_FTP_EPSV] = mangle_epsv_packet
-};
-
 /* So, this packet has hit the connection tracking matching code.
    Mangle it, and change the expectation to match the new version. */
 static unsigned int nf_nat_ftp(struct sk_buff *skb,
@@ -110,6 +63,8 @@ static unsigned int nf_nat_ftp(struct sk_buff *skb,
 	u_int16_t port;
 	int dir = CTINFO2DIR(ctinfo);
 	struct nf_conn *ct = exp->master;
+	char buffer[sizeof("|1|255.255.255.255|65535|")];
+	unsigned int buflen;
 
 	pr_debug("FTP_NAT: type %i, off %u len %u\n", type, matchoff, matchlen);
 
@@ -132,11 +87,21 @@ static unsigned int nf_nat_ftp(struct sk_buff *skb,
 	if (port == 0)
 		return NF_DROP;
 
-	if (!mangle[type](skb, newip, port, matchoff, matchlen, ct, ctinfo)) {
-		nf_ct_unexpect_related(exp);
-		return NF_DROP;
-	}
+	buflen = nf_nat_ftp_fmt_cmd(type, buffer, sizeof(buffer), newip, port);
+	if (!buflen)
+		goto out;
+
+	pr_debug("calling nf_nat_mangle_tcp_packet\n");
+
+	if (!nf_nat_mangle_tcp_packet(skb, ct, ctinfo, matchoff,
+				      matchlen, buffer, buflen))
+		goto out;
+
 	return NF_ACCEPT;
+
+out:
+	nf_ct_unexpect_related(exp);
+	return NF_DROP;
 }
 
 static void __exit nf_nat_ftp_fini(void)
-- 
cgit v1.2.2


From a79e7ac4ad77e1833e8f69e99113204d03018255 Mon Sep 17 00:00:00 2001
From: Joe Perches <joe@perches.com>
Date: Mon, 11 Jan 2010 11:53:31 +0100
Subject: ipvs: use standardized format in sprintf

Use the same format string as net/ipv4/netfilter/nf_nat_ftp.c
to encode an ipv4 address and port.

Both uses should be a single common function.

Signed-off-by: Joe Perches <joe@perches.com>
Acked-by: Simon Horman <horms@verge.net.au>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 net/netfilter/ipvs/ip_vs_ftp.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/netfilter/ipvs/ip_vs_ftp.c b/net/netfilter/ipvs/ip_vs_ftp.c
index 33e2c799cba7..73f38ea98f25 100644
--- a/net/netfilter/ipvs/ip_vs_ftp.c
+++ b/net/netfilter/ipvs/ip_vs_ftp.c
@@ -208,7 +208,7 @@ static int ip_vs_ftp_out(struct ip_vs_app *app, struct ip_vs_conn *cp,
 		 */
 		from.ip = n_cp->vaddr.ip;
 		port = n_cp->vport;
-		sprintf(buf, "%d,%d,%d,%d,%d,%d", NIPQUAD(from.ip),
+		sprintf(buf, "%u,%u,%u,%u,%u,%u", NIPQUAD(from.ip),
 			(ntohs(port)>>8)&255, ntohs(port)&255);
 		buf_len = strlen(buf);
 
-- 
cgit v1.2.2


From 7f635d0d1bf84ad7a0032cbce9d902b9384c48b7 Mon Sep 17 00:00:00 2001
From: Joe Perches <joe@perches.com>
Date: Mon, 11 Jan 2010 11:55:36 +0100
Subject: netfilter: xt_osf: change %pi4 to %pI4

commit 8a27f7c90ffcb791eed7574922b51fb60b08fc89
changed the output style of %pi4 to use fixed
width leading zero IP addresses "001.002.003.004".

It's useful when printing multiple lines of
addresses, but was a change in output style for
some existing uses.

Using %pI4 restores the previous output style.

Signed-off-by: Joe Perches <joe@perches.com>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 net/netfilter/xt_osf.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/netfilter/xt_osf.c b/net/netfilter/xt_osf.c
index 4d1a41bbd5d7..4169e200588d 100644
--- a/net/netfilter/xt_osf.c
+++ b/net/netfilter/xt_osf.c
@@ -334,7 +334,7 @@ static bool xt_osf_match_packet(const struct sk_buff *skb,
 			if (info->flags & XT_OSF_LOG)
 				nf_log_packet(p->family, p->hooknum, skb,
 					p->in, p->out, NULL,
-					"%s [%s:%s] : %pi4:%d -> %pi4:%d hops=%d\n",
+					"%s [%s:%s] : %pI4:%d -> %pI4:%d hops=%d\n",
 					f->genre, f->version, f->subtype,
 					&ip->saddr, ntohs(tcp->source),
 					&ip->daddr, ntohs(tcp->dest),
@@ -349,7 +349,7 @@ static bool xt_osf_match_packet(const struct sk_buff *skb,
 
 	if (!fcount && (info->flags & XT_OSF_LOG))
 		nf_log_packet(p->family, p->hooknum, skb, p->in, p->out, NULL,
-			"Remote OS is not known: %pi4:%u -> %pi4:%u\n",
+			"Remote OS is not known: %pI4:%u -> %pI4:%u\n",
 				&ip->saddr, ntohs(tcp->source),
 				&ip->daddr, ntohs(tcp->dest));
 
-- 
cgit v1.2.2


From d218d11133d888f9745802146a50255a4781d37a Mon Sep 17 00:00:00 2001
From: Stephen Hemminger <shemminger@vyatta.com>
Date: Mon, 11 Jan 2010 16:28:01 -0800
Subject: tcp: Generalized TTL Security Mechanism

This patch adds the kernel portions needed to implement
RFC 5082 Generalized TTL Security Mechanism (GTSM).
It is a lightweight security measure against forged
packets causing DoS attacks (for BGP).

This is already implemented the same way in BSD kernels.
For the necessary Quagga patch
  http://www.gossamer-threads.com/lists/quagga/dev/17389

Description from Cisco
  http://www.cisco.com/en/US/docs/ios/12_3t/12_3t7/feature/guide/gt_btsh.html

It does add one byte to each socket structure, but I did
a little rearrangement to reuse a hole (on 64 bit), but it
does grow the structure on 32 bit

This should be documented on ip(4) man page and the Glibc in.h
file also needs update.  IPV6_MINHOPLIMIT should also be added
(although BSD doesn't support that).

Only TCP is supported, but could also be added to UDP, DCCP, SCTP
if desired.

Signed-off-by: Stephen Hemminger <shemminger@vyatta.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/ip_sockglue.c | 14 +++++++++++++-
 net/ipv4/tcp_ipv4.c    |  3 +++
 2 files changed, 16 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c
index cafad9baff03..644dc43a55de 100644
--- a/net/ipv4/ip_sockglue.c
+++ b/net/ipv4/ip_sockglue.c
@@ -451,7 +451,8 @@ static int do_ip_setsockopt(struct sock *sk, int level,
 			     (1<<IP_TTL) | (1<<IP_HDRINCL) |
 			     (1<<IP_MTU_DISCOVER) | (1<<IP_RECVERR) |
 			     (1<<IP_ROUTER_ALERT) | (1<<IP_FREEBIND) |
-			     (1<<IP_PASSSEC) | (1<<IP_TRANSPARENT))) ||
+			     (1<<IP_PASSSEC) | (1<<IP_TRANSPARENT) |
+			     (1<<IP_MINTTL))) ||
 	    optname == IP_MULTICAST_TTL ||
 	    optname == IP_MULTICAST_ALL ||
 	    optname == IP_MULTICAST_LOOP ||
@@ -936,6 +937,14 @@ mc_msf_out:
 		inet->transparent = !!val;
 		break;
 
+	case IP_MINTTL:
+		if (optlen < 1)
+			goto e_inval;
+		if (val < 0 || val > 255)
+			goto e_inval;
+		inet->min_ttl = val;
+		break;
+
 	default:
 		err = -ENOPROTOOPT;
 		break;
@@ -1198,6 +1207,9 @@ static int do_ip_getsockopt(struct sock *sk, int level, int optname,
 	case IP_TRANSPARENT:
 		val = inet->transparent;
 		break;
+	case IP_MINTTL:
+		val = inet->min_ttl;
+		break;
 	default:
 		release_sock(sk);
 		return -ENOPROTOOPT;
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 65b8ebfd078a..382f667238ec 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -1649,6 +1649,9 @@ int tcp_v4_rcv(struct sk_buff *skb)
 	if (!sk)
 		goto no_tcp_socket;
 
+	if (iph->ttl < inet_sk(sk)->min_ttl)
+		goto discard_and_relse;
+
 process:
 	if (sk->sk_state == TCP_TIME_WAIT)
 		goto do_time_wait;
-- 
cgit v1.2.2


From 81077e82c3f591578625805dd6464a27a9ff56ec Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Luk=C3=A1=C5=A1=20Turek?= <8an@praha12.net>
Date: Mon, 21 Dec 2009 22:50:47 +0100
Subject: nl80211: Add new WIPHY attribute COVERAGE_CLASS

The new attribute NL80211_ATTR_WIPHY_COVERAGE_CLASS sets IEEE 802.11
Coverage Class, which depends on maximum distance of nodes in a
wireless network. It's required for long distance links (more than a few
hundred meters).

The attribute is now ignored by two non-mac80211 drivers, rndis and
iwmc3200wifi, together with WIPHY_PARAM_RETRY_SHORT and
WIPHY_PARAM_RETRY_LONG. If it turns out to be a problem, we could split
set_wiphy_params callback or add new capability bits.

Signed-off-by: Lukas Turek <8an@praha12.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/wireless/core.c    |  1 +
 net/wireless/nl80211.c | 15 +++++++++++++++
 2 files changed, 16 insertions(+)

(limited to 'net')

diff --git a/net/wireless/core.c b/net/wireless/core.c
index c2a2c563d21a..0a545bb6ed05 100644
--- a/net/wireless/core.c
+++ b/net/wireless/core.c
@@ -402,6 +402,7 @@ struct wiphy *wiphy_new(const struct cfg80211_ops *ops, int sizeof_priv)
 	rdev->wiphy.retry_long = 4;
 	rdev->wiphy.frag_threshold = (u32) -1;
 	rdev->wiphy.rts_threshold = (u32) -1;
+	rdev->wiphy.coverage_class = 0;
 
 	return &rdev->wiphy;
 }
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index e3bee3cecdfa..c09fbcd278fb 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -69,6 +69,7 @@ static struct nla_policy nl80211_policy[NL80211_ATTR_MAX+1] __read_mostly = {
 	[NL80211_ATTR_WIPHY_RETRY_LONG] = { .type = NLA_U8 },
 	[NL80211_ATTR_WIPHY_FRAG_THRESHOLD] = { .type = NLA_U32 },
 	[NL80211_ATTR_WIPHY_RTS_THRESHOLD] = { .type = NLA_U32 },
+	[NL80211_ATTR_WIPHY_COVERAGE_CLASS] = { .type = NLA_U8 },
 
 	[NL80211_ATTR_IFTYPE] = { .type = NLA_U32 },
 	[NL80211_ATTR_IFINDEX] = { .type = NLA_U32 },
@@ -444,6 +445,8 @@ static int nl80211_send_wiphy(struct sk_buff *msg, u32 pid, u32 seq, int flags,
 		    dev->wiphy.frag_threshold);
 	NLA_PUT_U32(msg, NL80211_ATTR_WIPHY_RTS_THRESHOLD,
 		    dev->wiphy.rts_threshold);
+	NLA_PUT_U8(msg, NL80211_ATTR_WIPHY_COVERAGE_CLASS,
+		    dev->wiphy.coverage_class);
 
 	NLA_PUT_U8(msg, NL80211_ATTR_MAX_NUM_SCAN_SSIDS,
 		   dev->wiphy.max_scan_ssids);
@@ -684,6 +687,7 @@ static int nl80211_set_wiphy(struct sk_buff *skb, struct genl_info *info)
 	u32 changed;
 	u8 retry_short = 0, retry_long = 0;
 	u32 frag_threshold = 0, rts_threshold = 0;
+	u8 coverage_class = 0;
 
 	rtnl_lock();
 
@@ -806,9 +810,16 @@ static int nl80211_set_wiphy(struct sk_buff *skb, struct genl_info *info)
 		changed |= WIPHY_PARAM_RTS_THRESHOLD;
 	}
 
+	if (info->attrs[NL80211_ATTR_WIPHY_COVERAGE_CLASS]) {
+		coverage_class = nla_get_u8(
+			info->attrs[NL80211_ATTR_WIPHY_COVERAGE_CLASS]);
+		changed |= WIPHY_PARAM_COVERAGE_CLASS;
+	}
+
 	if (changed) {
 		u8 old_retry_short, old_retry_long;
 		u32 old_frag_threshold, old_rts_threshold;
+		u8 old_coverage_class;
 
 		if (!rdev->ops->set_wiphy_params) {
 			result = -EOPNOTSUPP;
@@ -819,6 +830,7 @@ static int nl80211_set_wiphy(struct sk_buff *skb, struct genl_info *info)
 		old_retry_long = rdev->wiphy.retry_long;
 		old_frag_threshold = rdev->wiphy.frag_threshold;
 		old_rts_threshold = rdev->wiphy.rts_threshold;
+		old_coverage_class = rdev->wiphy.coverage_class;
 
 		if (changed & WIPHY_PARAM_RETRY_SHORT)
 			rdev->wiphy.retry_short = retry_short;
@@ -828,6 +840,8 @@ static int nl80211_set_wiphy(struct sk_buff *skb, struct genl_info *info)
 			rdev->wiphy.frag_threshold = frag_threshold;
 		if (changed & WIPHY_PARAM_RTS_THRESHOLD)
 			rdev->wiphy.rts_threshold = rts_threshold;
+		if (changed & WIPHY_PARAM_COVERAGE_CLASS)
+			rdev->wiphy.coverage_class = coverage_class;
 
 		result = rdev->ops->set_wiphy_params(&rdev->wiphy, changed);
 		if (result) {
@@ -835,6 +849,7 @@ static int nl80211_set_wiphy(struct sk_buff *skb, struct genl_info *info)
 			rdev->wiphy.retry_long = old_retry_long;
 			rdev->wiphy.frag_threshold = old_frag_threshold;
 			rdev->wiphy.rts_threshold = old_rts_threshold;
+			rdev->wiphy.coverage_class = old_coverage_class;
 		}
 	}
 
-- 
cgit v1.2.2


From 310bc676e314e92c18257bfc916951879451ee32 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Luk=C3=A1=C5=A1=20Turek?= <8an@praha12.net>
Date: Mon, 21 Dec 2009 22:50:48 +0100
Subject: mac80211: Add new callback set_coverage_class

Mac80211 callback to driver set_coverage_class() sets slot time and ACK
timeout for given IEEE 802.11 coverage class. The callback is optional,
but it's essential for long distance links.

Signed-off-by: Lukas Turek <8an@praha12.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/cfg.c          |  7 +++++++
 net/mac80211/driver-ops.h   | 15 +++++++++++++++
 net/mac80211/driver-trace.h | 23 +++++++++++++++++++++++
 3 files changed, 45 insertions(+)

(limited to 'net')

diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c
index 2e5e841e9b7b..976014c5e742 100644
--- a/net/mac80211/cfg.c
+++ b/net/mac80211/cfg.c
@@ -1230,6 +1230,13 @@ static int ieee80211_set_wiphy_params(struct wiphy *wiphy, u32 changed)
 	struct ieee80211_local *local = wiphy_priv(wiphy);
 	int err;
 
+	if (changed & WIPHY_PARAM_COVERAGE_CLASS) {
+		err = drv_set_coverage_class(local, wiphy->coverage_class);
+
+		if (err)
+			return err;
+	}
+
 	if (changed & WIPHY_PARAM_RTS_THRESHOLD) {
 		err = drv_set_rts_threshold(local, wiphy->rts_threshold);
 
diff --git a/net/mac80211/driver-ops.h b/net/mac80211/driver-ops.h
index 8757ea73d544..de91d39e0276 100644
--- a/net/mac80211/driver-ops.h
+++ b/net/mac80211/driver-ops.h
@@ -214,6 +214,21 @@ static inline int drv_set_rts_threshold(struct ieee80211_local *local,
 	return ret;
 }
 
+static inline int drv_set_coverage_class(struct ieee80211_local *local,
+					 u8 value)
+{
+	int ret = 0;
+	might_sleep();
+
+	if (local->ops->set_coverage_class)
+		local->ops->set_coverage_class(&local->hw, value);
+	else
+		ret = -EOPNOTSUPP;
+
+	trace_drv_set_coverage_class(local, value, ret);
+	return ret;
+}
+
 static inline void drv_sta_notify(struct ieee80211_local *local,
 				  struct ieee80211_sub_if_data *sdata,
 				  enum sta_notify_cmd cmd,
diff --git a/net/mac80211/driver-trace.h b/net/mac80211/driver-trace.h
index 977cc7528bc6..0ea258123b8e 100644
--- a/net/mac80211/driver-trace.h
+++ b/net/mac80211/driver-trace.h
@@ -491,6 +491,29 @@ TRACE_EVENT(drv_set_rts_threshold,
 	)
 );
 
+TRACE_EVENT(drv_set_coverage_class,
+	TP_PROTO(struct ieee80211_local *local, u8 value, int ret),
+
+	TP_ARGS(local, value, ret),
+
+	TP_STRUCT__entry(
+		LOCAL_ENTRY
+		__field(u8, value)
+		__field(int, ret)
+	),
+
+	TP_fast_assign(
+		LOCAL_ASSIGN;
+		__entry->ret = ret;
+		__entry->value = value;
+	),
+
+	TP_printk(
+		LOCAL_PR_FMT " value:%d ret:%d",
+		LOCAL_PR_ARG, __entry->value, __entry->ret
+	)
+);
+
 TRACE_EVENT(drv_sta_notify,
 	TP_PROTO(struct ieee80211_local *local,
 		 struct ieee80211_sub_if_data *sdata,
-- 
cgit v1.2.2


From 3dc1de0bf23816ed557ac8addf680cd5ee57e805 Mon Sep 17 00:00:00 2001
From: Zhu Yi <yi.zhu@intel.com>
Date: Mon, 28 Dec 2009 16:57:15 +0800
Subject: mac80211: quit addba_resp_timer if Tx BA session is torn down

Make addba_resp_timer aware the HT_AGG_STATE_REQ_STOP_BA_MSK mask
so that when ___ieee80211_stop_tx_ba_session() is issued the timer
will quit. Otherwise when suspend happens before the timer expired,
the timer handler will be called immediately after resume and
messes up driver status.

Signed-off-by: Zhu Yi <yi.zhu@intel.com>
Acked-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/agg-tx.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/mac80211/agg-tx.c b/net/mac80211/agg-tx.c
index 5aa8f4a3ed17..718fbcff84d2 100644
--- a/net/mac80211/agg-tx.c
+++ b/net/mac80211/agg-tx.c
@@ -179,7 +179,8 @@ static void sta_addba_resp_timer_expired(unsigned long data)
 
 	/* check if the TID waits for addBA response */
 	spin_lock_bh(&sta->lock);
-	if ((*state & (HT_ADDBA_REQUESTED_MSK | HT_ADDBA_RECEIVED_MSK)) !=
+	if ((*state & (HT_ADDBA_REQUESTED_MSK | HT_ADDBA_RECEIVED_MSK |
+		       HT_AGG_STATE_REQ_STOP_BA_MSK)) !=
 						HT_ADDBA_REQUESTED_MSK) {
 		spin_unlock_bh(&sta->lock);
 		*state = HT_AGG_STATE_IDLE;
-- 
cgit v1.2.2


From e00cfce0cb2a397859607bf515c6de9ce064b64a Mon Sep 17 00:00:00 2001
From: Jouni Malinen <jouni.malinen@atheros.com>
Date: Tue, 29 Dec 2009 12:59:19 +0200
Subject: mac80211: Select lowest rate based on basic rate set in AP mode

If the basic rate set is configured to not include the lowest rate
(e.g., basic rate set = 6, 12, 24 Mbps in IEEE 802.11g mode), the AP
should not send out broadcast frames at 1 Mbps. This type of
configuration can be used to optimize channel usage in cases where
there is no need for backwards compatibility with IEEE 802.11b-only
devices.

In AP mode, mac80211 was unconditionally using the lowest rate for
Beacon frames and similarly, with all rate control algorithms that use
rate_control_send_low(), the lowest rate ended up being used for all
broadcast frames (and all unicast frames that are sent before
association). Change this to take into account the basic rate
configuration in AP mode, i.e., use the lowest rate in the basic rate
set instead of the lowest supported rate when selecting the rate.

Signed-off-by: Jouni Malinen <jouni.malinen@atheros.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/rate.c | 25 +++++++++++++++++++++++++
 net/mac80211/tx.c   | 24 +++++++++++++-----------
 2 files changed, 38 insertions(+), 11 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/rate.c b/net/mac80211/rate.c
index b9007f80cb92..6349e7f4dcae 100644
--- a/net/mac80211/rate.c
+++ b/net/mac80211/rate.c
@@ -207,6 +207,27 @@ static bool rc_no_data_or_no_ack(struct ieee80211_tx_rate_control *txrc)
 	return ((info->flags & IEEE80211_TX_CTL_NO_ACK) || !ieee80211_is_data(fc));
 }
 
+static void rc_send_low_broadcast(s8 *idx, u32 basic_rates, u8 max_rate_idx)
+{
+	u8 i;
+
+	if (basic_rates == 0)
+		return; /* assume basic rates unknown and accept rate */
+	if (*idx < 0)
+		return;
+	if (basic_rates & (1 << *idx))
+		return; /* selected rate is a basic rate */
+
+	for (i = *idx + 1; i <= max_rate_idx; i++) {
+		if (basic_rates & (1 << i)) {
+			*idx = i;
+			return;
+		}
+	}
+
+	/* could not find a basic rate; use original selection */
+}
+
 bool rate_control_send_low(struct ieee80211_sta *sta,
 			   void *priv_sta,
 			   struct ieee80211_tx_rate_control *txrc)
@@ -218,6 +239,10 @@ bool rate_control_send_low(struct ieee80211_sta *sta,
 		info->control.rates[0].count =
 			(info->flags & IEEE80211_TX_CTL_NO_ACK) ?
 			1 : txrc->hw->max_rate_tries;
+		if (!sta && txrc->ap)
+			rc_send_low_broadcast(&info->control.rates[0].idx,
+					      txrc->bss_conf->basic_rates,
+					      txrc->sband->n_bitrates);
 		return true;
 	}
 	return false;
diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
index 140da4a7f13d..4961168f5091 100644
--- a/net/mac80211/tx.c
+++ b/net/mac80211/tx.c
@@ -520,6 +520,7 @@ ieee80211_tx_h_rate_ctrl(struct ieee80211_tx_data *tx)
 	txrc.skb = tx->skb;
 	txrc.reported_rate.idx = -1;
 	txrc.max_rate_idx = tx->sdata->max_ratectrl_rateidx;
+	txrc.ap = tx->sdata->vif.type == NL80211_IFTYPE_AP;
 
 	/* set up RTS protection if desired */
 	if (len > tx->local->hw.wiphy->rts_threshold) {
@@ -2060,6 +2061,7 @@ struct sk_buff *ieee80211_beacon_get_tim(struct ieee80211_hw *hw,
 	struct beacon_data *beacon;
 	struct ieee80211_supported_band *sband;
 	enum ieee80211_band band = local->hw.conf.channel->band;
+	struct ieee80211_tx_rate_control txrc;
 
 	sband = local->hw.wiphy->bands[band];
 
@@ -2167,21 +2169,21 @@ struct sk_buff *ieee80211_beacon_get_tim(struct ieee80211_hw *hw,
 	info = IEEE80211_SKB_CB(skb);
 
 	info->flags |= IEEE80211_TX_INTFL_DONT_ENCRYPT;
+	info->flags |= IEEE80211_TX_CTL_NO_ACK;
 	info->band = band;
-	/*
-	 * XXX: For now, always use the lowest rate
-	 */
-	info->control.rates[0].idx = 0;
-	info->control.rates[0].count = 1;
-	info->control.rates[1].idx = -1;
-	info->control.rates[2].idx = -1;
-	info->control.rates[3].idx = -1;
-	info->control.rates[4].idx = -1;
-	BUILD_BUG_ON(IEEE80211_TX_MAX_RATES != 5);
+
+	memset(&txrc, 0, sizeof(txrc));
+	txrc.hw = hw;
+	txrc.sband = sband;
+	txrc.bss_conf = &sdata->vif.bss_conf;
+	txrc.skb = skb;
+	txrc.reported_rate.idx = -1;
+	txrc.max_rate_idx = sdata->max_ratectrl_rateidx;
+	txrc.ap = true;
+	rate_control_get_rate(sdata, NULL, &txrc);
 
 	info->control.vif = vif;
 
-	info->flags |= IEEE80211_TX_CTL_NO_ACK;
 	info->flags |= IEEE80211_TX_CTL_CLEAR_PS_FILT;
 	info->flags |= IEEE80211_TX_CTL_ASSIGN_SEQ;
  out:
-- 
cgit v1.2.2


From 37eb0b164cf9fa9f70c8500926f5cde7c652f48e Mon Sep 17 00:00:00 2001
From: Jouni Malinen <jouni.malinen@atheros.com>
Date: Wed, 6 Jan 2010 13:09:08 +0200
Subject: cfg80211/mac80211: Use more generic bitrate mask for rate control

Extend struct cfg80211_bitrate_mask to actually use a bitfield mask
instead of just a single fixed or maximum rate index. This change
itself does not modify the behavior (except for debugfs files), but it
prepares cfg80211 and mac80211 for a new nl80211 command for setting
which rates can be used in TX rate control.

Since frames are now going through the rate control algorithm
unconditionally, the internal IEEE80211_TX_INTFL_RCALGO flag can now
be removed. The RC implementations can use the rate_idx_mask value to
optimize their behavior if only a single rate is enabled.

The old max_rate_idx in struct ieee80211_tx_rate_control is maintained
(but commented as deprecated) for backwards compatibility with existing
RC implementations. Once these implementations have been updated to
use the more generic rate_idx_mask, the max_rate_idx value can be
removed.

Signed-off-by: Jouni Malinen <jouni.malinen@atheros.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/cfg.c            | 32 +++-------------------
 net/mac80211/debugfs_netdev.c | 22 ++++++++-------
 net/mac80211/ieee80211_i.h    |  4 +--
 net/mac80211/iface.c          |  8 ++++--
 net/mac80211/rate.c           | 63 +++++++++++++++++++++++++++++++++++--------
 net/mac80211/rate.h           |  5 +---
 net/mac80211/tx.c             | 12 +++++++--
 net/wireless/wext-compat.c    | 34 ++++++++++++++++++++---
 8 files changed, 116 insertions(+), 64 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c
index 976014c5e742..e5dda6fb8dff 100644
--- a/net/mac80211/cfg.c
+++ b/net/mac80211/cfg.c
@@ -1406,8 +1406,6 @@ static int ieee80211_set_bitrate_mask(struct wiphy *wiphy,
 	struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
 	struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr);
 	int i;
-	u32 target_rate;
-	struct ieee80211_supported_band *sband;
 
 	/*
 	 * This _could_ be supported by providing a hook for
@@ -1417,35 +1415,11 @@ static int ieee80211_set_bitrate_mask(struct wiphy *wiphy,
 	if (local->hw.flags & IEEE80211_HW_HAS_RATE_CONTROL)
 		return -EOPNOTSUPP;
 
-	sband = local->hw.wiphy->bands[local->hw.conf.channel->band];
-
-	/*
-	 * target_rate = -1, rate->fixed = 0 means auto only, so use all rates
-	 * target_rate = X, rate->fixed = 1 means only rate X
-	 * target_rate = X, rate->fixed = 0 means all rates <= X
-	 */
-	sdata->max_ratectrl_rateidx = -1;
-	sdata->force_unicast_rateidx = -1;
 
-	if (mask->fixed)
-		target_rate = mask->fixed / 100;
-	else if (mask->maxrate)
-		target_rate = mask->maxrate / 100;
-	else
-		return 0;
+	for (i = 0; i < IEEE80211_NUM_BANDS; i++)
+		sdata->rc_rateidx_mask[i] = mask->control[i].legacy;
 
-	for (i = 0; i< sband->n_bitrates; i++) {
-		if (target_rate != sband->bitrates[i].bitrate)
-			continue;
-
-		/* requested bitrate found */
-		sdata->max_ratectrl_rateidx = i;
-		if (mask->fixed)
-			sdata->force_unicast_rateidx = i;
-		return 0;
-	}
-
-	return -EINVAL;
+	return 0;
 }
 
 static int ieee80211_remain_on_channel(struct wiphy *wiphy,
diff --git a/net/mac80211/debugfs_netdev.c b/net/mac80211/debugfs_netdev.c
index 59f6e3bcbd09..1481049f0f71 100644
--- a/net/mac80211/debugfs_netdev.c
+++ b/net/mac80211/debugfs_netdev.c
@@ -127,8 +127,10 @@ __IEEE80211_IF_FILE(name, ieee80211_if_write_##name)
 
 /* common attributes */
 IEEE80211_IF_FILE(drop_unencrypted, drop_unencrypted, DEC);
-IEEE80211_IF_FILE(force_unicast_rateidx, force_unicast_rateidx, DEC);
-IEEE80211_IF_FILE(max_ratectrl_rateidx, max_ratectrl_rateidx, DEC);
+IEEE80211_IF_FILE(rc_rateidx_mask_2ghz, rc_rateidx_mask[IEEE80211_BAND_2GHZ],
+		  HEX);
+IEEE80211_IF_FILE(rc_rateidx_mask_5ghz, rc_rateidx_mask[IEEE80211_BAND_5GHZ],
+		  HEX);
 
 /* STA attributes */
 IEEE80211_IF_FILE(bssid, u.mgd.bssid, MAC);
@@ -264,8 +266,8 @@ IEEE80211_IF_FILE(dot11MeshHWMPRootMode,
 static void add_sta_files(struct ieee80211_sub_if_data *sdata)
 {
 	DEBUGFS_ADD(drop_unencrypted, sta);
-	DEBUGFS_ADD(force_unicast_rateidx, sta);
-	DEBUGFS_ADD(max_ratectrl_rateidx, sta);
+	DEBUGFS_ADD(rc_rateidx_mask_2ghz, sta);
+	DEBUGFS_ADD(rc_rateidx_mask_5ghz, sta);
 
 	DEBUGFS_ADD(bssid, sta);
 	DEBUGFS_ADD(aid, sta);
@@ -275,8 +277,8 @@ static void add_sta_files(struct ieee80211_sub_if_data *sdata)
 static void add_ap_files(struct ieee80211_sub_if_data *sdata)
 {
 	DEBUGFS_ADD(drop_unencrypted, ap);
-	DEBUGFS_ADD(force_unicast_rateidx, ap);
-	DEBUGFS_ADD(max_ratectrl_rateidx, ap);
+	DEBUGFS_ADD(rc_rateidx_mask_2ghz, ap);
+	DEBUGFS_ADD(rc_rateidx_mask_5ghz, ap);
 
 	DEBUGFS_ADD(num_sta_ps, ap);
 	DEBUGFS_ADD(dtim_count, ap);
@@ -286,8 +288,8 @@ static void add_ap_files(struct ieee80211_sub_if_data *sdata)
 static void add_wds_files(struct ieee80211_sub_if_data *sdata)
 {
 	DEBUGFS_ADD(drop_unencrypted, wds);
-	DEBUGFS_ADD(force_unicast_rateidx, wds);
-	DEBUGFS_ADD(max_ratectrl_rateidx, wds);
+	DEBUGFS_ADD(rc_rateidx_mask_2ghz, wds);
+	DEBUGFS_ADD(rc_rateidx_mask_5ghz, wds);
 
 	DEBUGFS_ADD(peer, wds);
 }
@@ -295,8 +297,8 @@ static void add_wds_files(struct ieee80211_sub_if_data *sdata)
 static void add_vlan_files(struct ieee80211_sub_if_data *sdata)
 {
 	DEBUGFS_ADD(drop_unencrypted, vlan);
-	DEBUGFS_ADD(force_unicast_rateidx, vlan);
-	DEBUGFS_ADD(max_ratectrl_rateidx, vlan);
+	DEBUGFS_ADD(rc_rateidx_mask_2ghz, vlan);
+	DEBUGFS_ADD(rc_rateidx_mask_5ghz, vlan);
 }
 
 static void add_monitor_files(struct ieee80211_sub_if_data *sdata)
diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index a27921ee6e63..3e4ac3f30857 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -494,8 +494,8 @@ struct ieee80211_sub_if_data {
 	 */
 	struct ieee80211_if_ap *bss;
 
-	int force_unicast_rateidx; /* forced TX rateidx for unicast frames */
-	int max_ratectrl_rateidx; /* max TX rateidx for rate control */
+	/* bitmap of allowed (non-MCS) rate indexes for rate control */
+	u32 rc_rateidx_mask[IEEE80211_NUM_BANDS];
 
 	union {
 		struct ieee80211_if_ap ap;
diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c
index 264a6c975f8b..fe140bf033f9 100644
--- a/net/mac80211/iface.c
+++ b/net/mac80211/iface.c
@@ -856,8 +856,12 @@ int ieee80211_if_add(struct ieee80211_local *local, const char *name,
 
 	INIT_LIST_HEAD(&sdata->key_list);
 
-	sdata->force_unicast_rateidx = -1;
-	sdata->max_ratectrl_rateidx = -1;
+	for (i = 0; i < IEEE80211_NUM_BANDS; i++) {
+		struct ieee80211_supported_band *sband;
+		sband = local->hw.wiphy->bands[i];
+		sdata->rc_rateidx_mask[i] =
+			sband ? (1 << sband->n_bitrates) - 1 : 0;
+	}
 
 	/* setup type-dependent data */
 	ieee80211_setup_sdata(sdata, type);
diff --git a/net/mac80211/rate.c b/net/mac80211/rate.c
index 6349e7f4dcae..c74b7c85403c 100644
--- a/net/mac80211/rate.c
+++ b/net/mac80211/rate.c
@@ -249,6 +249,38 @@ bool rate_control_send_low(struct ieee80211_sta *sta,
 }
 EXPORT_SYMBOL(rate_control_send_low);
 
+static void rate_idx_match_mask(struct ieee80211_tx_rate *rate,
+				int n_bitrates, u32 mask)
+{
+	int j;
+
+	/* See whether the selected rate or anything below it is allowed. */
+	for (j = rate->idx; j >= 0; j--) {
+		if (mask & (1 << j)) {
+			/* Okay, found a suitable rate. Use it. */
+			rate->idx = j;
+			return;
+		}
+	}
+
+	/* Try to find a higher rate that would be allowed */
+	for (j = rate->idx + 1; j < n_bitrates; j++) {
+		if (mask & (1 << j)) {
+			/* Okay, found a suitable rate. Use it. */
+			rate->idx = j;
+			return;
+		}
+	}
+
+	/*
+	 * Uh.. No suitable rate exists. This should not really happen with
+	 * sane TX rate mask configurations. However, should someone manage to
+	 * configure supported rates and TX rate mask in incompatible way,
+	 * allow the frame to be transmitted with whatever the rate control
+	 * selected.
+	 */
+}
+
 void rate_control_get_rate(struct ieee80211_sub_if_data *sdata,
 			   struct sta_info *sta,
 			   struct ieee80211_tx_rate_control *txrc)
@@ -258,6 +290,7 @@ void rate_control_get_rate(struct ieee80211_sub_if_data *sdata,
 	struct ieee80211_sta *ista = NULL;
 	struct ieee80211_tx_info *info = IEEE80211_SKB_CB(txrc->skb);
 	int i;
+	u32 mask;
 
 	if (sta) {
 		ista = &sta->sta;
@@ -270,23 +303,31 @@ void rate_control_get_rate(struct ieee80211_sub_if_data *sdata,
 		info->control.rates[i].count = 1;
 	}
 
-	if (sta && sdata->force_unicast_rateidx > -1) {
-		info->control.rates[0].idx = sdata->force_unicast_rateidx;
-	} else {
-		ref->ops->get_rate(ref->priv, ista, priv_sta, txrc);
-		info->flags |= IEEE80211_TX_INTFL_RCALGO;
-	}
+	ref->ops->get_rate(ref->priv, ista, priv_sta, txrc);
 
 	/*
-	 * try to enforce the maximum rate the user wanted
+	 * Try to enforce the rateidx mask the user wanted. skip this if the
+	 * default mask (allow all rates) is used to save some processing for
+	 * the common case.
 	 */
-	if (sdata->max_ratectrl_rateidx > -1)
+	mask = sdata->rc_rateidx_mask[info->band];
+	if (mask != (1 << txrc->sband->n_bitrates) - 1) {
+		if (sta) {
+			/* Filter out rates that the STA does not support */
+			mask &= sta->sta.supp_rates[info->band];
+		}
+		/*
+		 * Make sure the rate index selected for each TX rate is
+		 * included in the configured mask and change the rate indexes
+		 * if needed.
+		 */
 		for (i = 0; i < IEEE80211_TX_MAX_RATES; i++) {
+			/* Rate masking supports only legacy rates for now */
 			if (info->control.rates[i].flags & IEEE80211_TX_RC_MCS)
 				continue;
-			info->control.rates[i].idx =
-				min_t(s8, info->control.rates[i].idx,
-				      sdata->max_ratectrl_rateidx);
+			rate_idx_match_mask(&info->control.rates[i],
+					    txrc->sband->n_bitrates, mask);
+		}
 	}
 
 	BUG_ON(info->control.rates[0].idx < 0);
diff --git a/net/mac80211/rate.h b/net/mac80211/rate.h
index cb9bd1f65e27..669dddd40521 100644
--- a/net/mac80211/rate.h
+++ b/net/mac80211/rate.h
@@ -44,10 +44,7 @@ static inline void rate_control_tx_status(struct ieee80211_local *local,
 	struct rate_control_ref *ref = local->rate_ctrl;
 	struct ieee80211_sta *ista = &sta->sta;
 	void *priv_sta = sta->rate_ctrl_priv;
-	struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb);
-
-	if (likely(info->flags & IEEE80211_TX_INTFL_RCALGO))
-		ref->ops->tx_status(ref->priv, sband, ista, priv_sta, skb);
+	ref->ops->tx_status(ref->priv, sband, ista, priv_sta, skb);
 }
 
 
diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
index 4961168f5091..d3a44812f8bf 100644
--- a/net/mac80211/tx.c
+++ b/net/mac80211/tx.c
@@ -519,7 +519,11 @@ ieee80211_tx_h_rate_ctrl(struct ieee80211_tx_data *tx)
 	txrc.bss_conf = &tx->sdata->vif.bss_conf;
 	txrc.skb = tx->skb;
 	txrc.reported_rate.idx = -1;
-	txrc.max_rate_idx = tx->sdata->max_ratectrl_rateidx;
+	txrc.rate_idx_mask = tx->sdata->rc_rateidx_mask[tx->channel->band];
+	if (txrc.rate_idx_mask == (1 << sband->n_bitrates) - 1)
+		txrc.max_rate_idx = -1;
+	else
+		txrc.max_rate_idx = fls(txrc.rate_idx_mask) - 1;
 	txrc.ap = tx->sdata->vif.type == NL80211_IFTYPE_AP;
 
 	/* set up RTS protection if desired */
@@ -2178,7 +2182,11 @@ struct sk_buff *ieee80211_beacon_get_tim(struct ieee80211_hw *hw,
 	txrc.bss_conf = &sdata->vif.bss_conf;
 	txrc.skb = skb;
 	txrc.reported_rate.idx = -1;
-	txrc.max_rate_idx = sdata->max_ratectrl_rateidx;
+	txrc.rate_idx_mask = sdata->rc_rateidx_mask[band];
+	if (txrc.rate_idx_mask == (1 << sband->n_bitrates) - 1)
+		txrc.max_rate_idx = -1;
+	else
+		txrc.max_rate_idx = fls(txrc.rate_idx_mask) - 1;
 	txrc.ap = true;
 	rate_control_get_rate(sdata, NULL, &txrc);
 
diff --git a/net/wireless/wext-compat.c b/net/wireless/wext-compat.c
index 4198243a3dff..966d2f01beac 100644
--- a/net/wireless/wext-compat.c
+++ b/net/wireless/wext-compat.c
@@ -1204,21 +1204,47 @@ int cfg80211_wext_siwrate(struct net_device *dev,
 	struct wireless_dev *wdev = dev->ieee80211_ptr;
 	struct cfg80211_registered_device *rdev = wiphy_to_dev(wdev->wiphy);
 	struct cfg80211_bitrate_mask mask;
+	u32 fixed, maxrate;
+	struct ieee80211_supported_band *sband;
+	int band, ridx;
+	bool match = false;
 
 	if (!rdev->ops->set_bitrate_mask)
 		return -EOPNOTSUPP;
 
-	mask.fixed = 0;
-	mask.maxrate = 0;
+	memset(&mask, 0, sizeof(mask));
+	fixed = 0;
+	maxrate = 0;
 
 	if (rate->value < 0) {
 		/* nothing */
 	} else if (rate->fixed) {
-		mask.fixed = rate->value / 1000; /* kbps */
+		fixed = rate->value / 100000;
 	} else {
-		mask.maxrate = rate->value / 1000; /* kbps */
+		maxrate = rate->value / 100000;
+	}
+
+	for (band = 0; band < IEEE80211_NUM_BANDS; band++) {
+		sband = wdev->wiphy->bands[band];
+		if (sband == NULL)
+			continue;
+		for (ridx = 0; ridx < sband->n_bitrates; ridx++) {
+			struct ieee80211_rate *srate = &sband->bitrates[ridx];
+			if (fixed == srate->bitrate) {
+				mask.control[band].legacy = 1 << ridx;
+				match = true;
+				break;
+			}
+			if (srate->bitrate <= maxrate) {
+				mask.control[band].legacy |= 1 << ridx;
+				match = true;
+			}
+		}
 	}
 
+	if (!match)
+		return -EINVAL;
+
 	return rdev->ops->set_bitrate_mask(wdev->wiphy, dev, NULL, &mask);
 }
 EXPORT_SYMBOL_GPL(cfg80211_wext_siwrate);
-- 
cgit v1.2.2


From 13ae75b103e07304a34ab40c9136e9f53e06475c Mon Sep 17 00:00:00 2001
From: Jouni Malinen <jouni.malinen@atheros.com>
Date: Tue, 29 Dec 2009 12:59:45 +0200
Subject: nl80211: New command for setting TX rate mask for rate control

Add a new NL80211_CMD_SET_TX_BITRATE_MASK command and related
attributes to provide support for setting TX rate mask for rate
control. This uses the existing cfg80211 set_bitrate_mask operation
that was previously used only with WEXT compat code (SIOCSIWRATE). The
nl80211 command allows more generic configuration of allowed rates as
a mask instead of fixed/max rate.

Signed-off-by: Jouni Malinen <jouni.malinen@atheros.com>
Acked-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/wireless/nl80211.c | 111 +++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 111 insertions(+)

(limited to 'net')

diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index c09fbcd278fb..b804062e0179 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -144,6 +144,7 @@ static struct nla_policy nl80211_policy[NL80211_ATTR_MAX+1] __read_mostly = {
 				 .len = WLAN_PMKID_LEN },
 	[NL80211_ATTR_DURATION] = { .type = NLA_U32 },
 	[NL80211_ATTR_COOKIE] = { .type = NLA_U64 },
+	[NL80211_ATTR_TX_RATES] = { .type = NLA_NESTED },
 };
 
 /* policy for the attributes */
@@ -575,6 +576,7 @@ static int nl80211_send_wiphy(struct sk_buff *msg, u32 pid, u32 seq, int flags,
 	CMD(del_pmksa, DEL_PMKSA);
 	CMD(flush_pmksa, FLUSH_PMKSA);
 	CMD(remain_on_channel, REMAIN_ON_CHANNEL);
+	CMD(set_bitrate_mask, SET_TX_BITRATE_MASK);
 	if (dev->wiphy.flags & WIPHY_FLAG_NETNS_OK) {
 		i++;
 		NLA_PUT_U32(msg, i, NL80211_CMD_SET_WIPHY_NETNS);
@@ -4438,6 +4440,109 @@ static int nl80211_cancel_remain_on_channel(struct sk_buff *skb,
 	return err;
 }
 
+static u32 rateset_to_mask(struct ieee80211_supported_band *sband,
+			   u8 *rates, u8 rates_len)
+{
+	u8 i;
+	u32 mask = 0;
+
+	for (i = 0; i < rates_len; i++) {
+		int rate = (rates[i] & 0x7f) * 5;
+		int ridx;
+		for (ridx = 0; ridx < sband->n_bitrates; ridx++) {
+			struct ieee80211_rate *srate =
+				&sband->bitrates[ridx];
+			if (rate == srate->bitrate) {
+				mask |= 1 << ridx;
+				break;
+			}
+		}
+		if (ridx == sband->n_bitrates)
+			return 0; /* rate not found */
+	}
+
+	return mask;
+}
+
+static struct nla_policy
+nl80211_txattr_policy[NL80211_TXRATE_MAX + 1] __read_mostly = {
+	[NL80211_TXRATE_LEGACY] = { .type = NLA_BINARY,
+				    .len = NL80211_MAX_SUPP_RATES },
+};
+
+static int nl80211_set_tx_bitrate_mask(struct sk_buff *skb,
+				       struct genl_info *info)
+{
+	struct nlattr *tb[NL80211_TXRATE_MAX + 1];
+	struct cfg80211_registered_device *rdev;
+	struct cfg80211_bitrate_mask mask;
+	int err, rem, i;
+	struct net_device *dev;
+	struct nlattr *tx_rates;
+	struct ieee80211_supported_band *sband;
+
+	if (info->attrs[NL80211_ATTR_TX_RATES] == NULL)
+		return -EINVAL;
+
+	rtnl_lock();
+
+	err = get_rdev_dev_by_info_ifindex(info, &rdev, &dev);
+	if (err)
+		goto unlock_rtnl;
+
+	if (!rdev->ops->set_bitrate_mask) {
+		err = -EOPNOTSUPP;
+		goto unlock;
+	}
+
+	memset(&mask, 0, sizeof(mask));
+	/* Default to all rates enabled */
+	for (i = 0; i < IEEE80211_NUM_BANDS; i++) {
+		sband = rdev->wiphy.bands[i];
+		mask.control[i].legacy =
+			sband ? (1 << sband->n_bitrates) - 1 : 0;
+	}
+
+	/*
+	 * The nested attribute uses enum nl80211_band as the index. This maps
+	 * directly to the enum ieee80211_band values used in cfg80211.
+	 */
+	nla_for_each_nested(tx_rates, info->attrs[NL80211_ATTR_TX_RATES], rem)
+	{
+		enum ieee80211_band band = nla_type(tx_rates);
+		if (band < 0 || band >= IEEE80211_NUM_BANDS) {
+			err = -EINVAL;
+			goto unlock;
+		}
+		sband = rdev->wiphy.bands[band];
+		if (sband == NULL) {
+			err = -EINVAL;
+			goto unlock;
+		}
+		nla_parse(tb, NL80211_TXRATE_MAX, nla_data(tx_rates),
+			  nla_len(tx_rates), nl80211_txattr_policy);
+		if (tb[NL80211_TXRATE_LEGACY]) {
+			mask.control[band].legacy = rateset_to_mask(
+				sband,
+				nla_data(tb[NL80211_TXRATE_LEGACY]),
+				nla_len(tb[NL80211_TXRATE_LEGACY]));
+			if (mask.control[band].legacy == 0) {
+				err = -EINVAL;
+				goto unlock;
+			}
+		}
+	}
+
+	err = rdev->ops->set_bitrate_mask(&rdev->wiphy, dev, NULL, &mask);
+
+ unlock:
+	dev_put(dev);
+	cfg80211_unlock_rdev(rdev);
+ unlock_rtnl:
+	rtnl_unlock();
+	return err;
+}
+
 static struct genl_ops nl80211_ops[] = {
 	{
 		.cmd = NL80211_CMD_GET_WIPHY,
@@ -4712,6 +4817,12 @@ static struct genl_ops nl80211_ops[] = {
 		.policy = nl80211_policy,
 		.flags = GENL_ADMIN_PERM,
 	},
+	{
+		.cmd = NL80211_CMD_SET_TX_BITRATE_MASK,
+		.doit = nl80211_set_tx_bitrate_mask,
+		.policy = nl80211_policy,
+		.flags = GENL_ADMIN_PERM,
+	},
 };
 
 static struct genl_multicast_group nl80211_mlme_mcgrp = {
-- 
cgit v1.2.2


From 7044cc565b45a898c140fb185174a66f2d68a163 Mon Sep 17 00:00:00 2001
From: Kalle Valo <kalle.valo@nokia.com>
Date: Tue, 5 Jan 2010 20:16:19 +0200
Subject: mac80211: add functions to create PS Poll and Nullfunc templates

Some hardware, for example wl1251 and wl1271, handle the transmission
of power save related frames in hardware, but the driver is responsible
for creating the templates. It's better to create the templates in mac80211,
that way all drivers can benefit from this.

Add two new functions, ieee80211_pspoll_get() and ieee80211_nullfunc_get()
which drivers need to call to get the frame. Drivers are also responsible
for updating the templates after each association.

Also new struct ieee80211_hdr_3addr is added to ieee80211.h to make it
easy to calculate length of the Nullfunc frame.

Signed-off-by: Kalle Valo <kalle.valo@nokia.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/tx.c | 78 +++++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 78 insertions(+)

(limited to 'net')

diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
index d3a44812f8bf..055b45b146d9 100644
--- a/net/mac80211/tx.c
+++ b/net/mac80211/tx.c
@@ -2200,6 +2200,84 @@ struct sk_buff *ieee80211_beacon_get_tim(struct ieee80211_hw *hw,
 }
 EXPORT_SYMBOL(ieee80211_beacon_get_tim);
 
+struct sk_buff *ieee80211_pspoll_get(struct ieee80211_hw *hw,
+				     struct ieee80211_vif *vif)
+{
+	struct ieee80211_sub_if_data *sdata;
+	struct ieee80211_if_managed *ifmgd;
+	struct ieee80211_pspoll *pspoll;
+	struct ieee80211_local *local;
+	struct sk_buff *skb;
+
+	if (WARN_ON(vif->type != NL80211_IFTYPE_STATION))
+		return NULL;
+
+	sdata = vif_to_sdata(vif);
+	ifmgd = &sdata->u.mgd;
+	local = sdata->local;
+
+	skb = dev_alloc_skb(local->hw.extra_tx_headroom + sizeof(*pspoll));
+	if (!skb) {
+		printk(KERN_DEBUG "%s: failed to allocate buffer for "
+		       "pspoll template\n", sdata->name);
+		return NULL;
+	}
+	skb_reserve(skb, local->hw.extra_tx_headroom);
+
+	pspoll = (struct ieee80211_pspoll *) skb_put(skb, sizeof(*pspoll));
+	memset(pspoll, 0, sizeof(*pspoll));
+	pspoll->frame_control = cpu_to_le16(IEEE80211_FTYPE_CTL |
+					    IEEE80211_STYPE_PSPOLL);
+	pspoll->aid = cpu_to_le16(ifmgd->aid);
+
+	/* aid in PS-Poll has its two MSBs each set to 1 */
+	pspoll->aid |= cpu_to_le16(1 << 15 | 1 << 14);
+
+	memcpy(pspoll->bssid, ifmgd->bssid, ETH_ALEN);
+	memcpy(pspoll->ta, vif->addr, ETH_ALEN);
+
+	return skb;
+}
+EXPORT_SYMBOL(ieee80211_pspoll_get);
+
+struct sk_buff *ieee80211_nullfunc_get(struct ieee80211_hw *hw,
+				       struct ieee80211_vif *vif)
+{
+	struct ieee80211_hdr_3addr *nullfunc;
+	struct ieee80211_sub_if_data *sdata;
+	struct ieee80211_if_managed *ifmgd;
+	struct ieee80211_local *local;
+	struct sk_buff *skb;
+
+	if (WARN_ON(vif->type != NL80211_IFTYPE_STATION))
+		return NULL;
+
+	sdata = vif_to_sdata(vif);
+	ifmgd = &sdata->u.mgd;
+	local = sdata->local;
+
+	skb = dev_alloc_skb(local->hw.extra_tx_headroom + sizeof(*nullfunc));
+	if (!skb) {
+		printk(KERN_DEBUG "%s: failed to allocate buffer for nullfunc "
+		       "template\n", sdata->name);
+		return NULL;
+	}
+	skb_reserve(skb, local->hw.extra_tx_headroom);
+
+	nullfunc = (struct ieee80211_hdr_3addr *) skb_put(skb,
+							  sizeof(*nullfunc));
+	memset(nullfunc, 0, sizeof(*nullfunc));
+	nullfunc->frame_control = cpu_to_le16(IEEE80211_FTYPE_DATA |
+					      IEEE80211_STYPE_NULLFUNC |
+					      IEEE80211_FCTL_TODS);
+	memcpy(nullfunc->addr1, ifmgd->bssid, ETH_ALEN);
+	memcpy(nullfunc->addr2, vif->addr, ETH_ALEN);
+	memcpy(nullfunc->addr3, ifmgd->bssid, ETH_ALEN);
+
+	return skb;
+}
+EXPORT_SYMBOL(ieee80211_nullfunc_get);
+
 void ieee80211_rts_get(struct ieee80211_hw *hw, struct ieee80211_vif *vif,
 		       const void *frame, size_t frame_len,
 		       const struct ieee80211_tx_info *frame_txctl,
-- 
cgit v1.2.2


From d8cd189e9b1e050629f545e76b21a321f62c29bf Mon Sep 17 00:00:00 2001
From: Kalle Valo <kalle.valo@nokia.com>
Date: Tue, 5 Jan 2010 20:16:26 +0200
Subject: mac80211: use PS Poll and Nullfunc templates when sending such frames

To avoid duplicate code, use ieee80211_[pspoll|nullfunc]_get() to get
templates for PS Poll and Nullfunc frames in mlme.c.

Compile-tested only.

Signed-off-by: Kalle Valo <kalle.valo@nokia.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/mlme.c | 48 +++++++++---------------------------------------
 1 file changed, 9 insertions(+), 39 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index 97bcf2278bdb..5484cf930a87 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -249,30 +249,15 @@ static void ieee80211_send_deauth_disassoc(struct ieee80211_sub_if_data *sdata,
 void ieee80211_send_pspoll(struct ieee80211_local *local,
 			   struct ieee80211_sub_if_data *sdata)
 {
-	struct ieee80211_if_managed *ifmgd = &sdata->u.mgd;
 	struct ieee80211_pspoll *pspoll;
 	struct sk_buff *skb;
-	u16 fc;
 
-	skb = dev_alloc_skb(local->hw.extra_tx_headroom + sizeof(*pspoll));
-	if (!skb) {
-		printk(KERN_DEBUG "%s: failed to allocate buffer for "
-		       "pspoll frame\n", sdata->name);
+	skb = ieee80211_pspoll_get(&local->hw, &sdata->vif);
+	if (!skb)
 		return;
-	}
-	skb_reserve(skb, local->hw.extra_tx_headroom);
-
-	pspoll = (struct ieee80211_pspoll *) skb_put(skb, sizeof(*pspoll));
-	memset(pspoll, 0, sizeof(*pspoll));
-	fc = IEEE80211_FTYPE_CTL | IEEE80211_STYPE_PSPOLL | IEEE80211_FCTL_PM;
-	pspoll->frame_control = cpu_to_le16(fc);
-	pspoll->aid = cpu_to_le16(ifmgd->aid);
 
-	/* aid in PS-Poll has its two MSBs each set to 1 */
-	pspoll->aid |= cpu_to_le16(1 << 15 | 1 << 14);
-
-	memcpy(pspoll->bssid, ifmgd->bssid, ETH_ALEN);
-	memcpy(pspoll->ta, sdata->vif.addr, ETH_ALEN);
+	pspoll = (struct ieee80211_pspoll *) skb->data;
+	pspoll->frame_control |= cpu_to_le16(IEEE80211_FCTL_PM);
 
 	IEEE80211_SKB_CB(skb)->flags |= IEEE80211_TX_INTFL_DONT_ENCRYPT;
 	ieee80211_tx_skb(sdata, skb);
@@ -283,30 +268,15 @@ void ieee80211_send_nullfunc(struct ieee80211_local *local,
 			     int powersave)
 {
 	struct sk_buff *skb;
-	struct ieee80211_hdr *nullfunc;
-	__le16 fc;
+	struct ieee80211_hdr_3addr *nullfunc;
 
-	if (WARN_ON(sdata->vif.type != NL80211_IFTYPE_STATION))
+	skb = ieee80211_nullfunc_get(&local->hw, &sdata->vif);
+	if (!skb)
 		return;
 
-	skb = dev_alloc_skb(local->hw.extra_tx_headroom + 24);
-	if (!skb) {
-		printk(KERN_DEBUG "%s: failed to allocate buffer for nullfunc "
-		       "frame\n", sdata->name);
-		return;
-	}
-	skb_reserve(skb, local->hw.extra_tx_headroom);
-
-	nullfunc = (struct ieee80211_hdr *) skb_put(skb, 24);
-	memset(nullfunc, 0, 24);
-	fc = cpu_to_le16(IEEE80211_FTYPE_DATA | IEEE80211_STYPE_NULLFUNC |
-			 IEEE80211_FCTL_TODS);
+	nullfunc = (struct ieee80211_hdr_3addr *) skb->data;
 	if (powersave)
-		fc |= cpu_to_le16(IEEE80211_FCTL_PM);
-	nullfunc->frame_control = fc;
-	memcpy(nullfunc->addr1, sdata->u.mgd.bssid, ETH_ALEN);
-	memcpy(nullfunc->addr2, sdata->vif.addr, ETH_ALEN);
-	memcpy(nullfunc->addr3, sdata->u.mgd.bssid, ETH_ALEN);
+		nullfunc->frame_control |= cpu_to_le16(IEEE80211_FCTL_PM);
 
 	IEEE80211_SKB_CB(skb)->flags |= IEEE80211_TX_INTFL_DONT_ENCRYPT;
 	ieee80211_tx_skb(sdata, skb);
-- 
cgit v1.2.2


From 05e54ea6cce400ac34528d705179b45244f61074 Mon Sep 17 00:00:00 2001
From: Kalle Valo <kalle.valo@nokia.com>
Date: Tue, 5 Jan 2010 20:16:38 +0200
Subject: mac80211: create Probe Request template

Certain type of hardware, for example wl1251 and wl1271, need a template
for the Probe Request. Create a function ieee80211_probereq_get() which
creates the template and drivers send it to hardware.

Signed-off-by: Kalle Valo <kalle.valo@nokia.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/tx.c | 50 ++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 50 insertions(+)

(limited to 'net')

diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
index 055b45b146d9..0661e696a1dd 100644
--- a/net/mac80211/tx.c
+++ b/net/mac80211/tx.c
@@ -2278,6 +2278,56 @@ struct sk_buff *ieee80211_nullfunc_get(struct ieee80211_hw *hw,
 }
 EXPORT_SYMBOL(ieee80211_nullfunc_get);
 
+struct sk_buff *ieee80211_probereq_get(struct ieee80211_hw *hw,
+				       struct ieee80211_vif *vif,
+				       const u8 *ssid, size_t ssid_len,
+				       const u8 *ie, size_t ie_len)
+{
+	struct ieee80211_sub_if_data *sdata;
+	struct ieee80211_local *local;
+	struct ieee80211_hdr_3addr *hdr;
+	struct sk_buff *skb;
+	size_t ie_ssid_len;
+	u8 *pos;
+
+	sdata = vif_to_sdata(vif);
+	local = sdata->local;
+	ie_ssid_len = 2 + ssid_len;
+
+	skb = dev_alloc_skb(local->hw.extra_tx_headroom + sizeof(*hdr) +
+			    ie_ssid_len + ie_len);
+	if (!skb) {
+		printk(KERN_DEBUG "%s: failed to allocate buffer for probe "
+		       "request template\n", sdata->name);
+		return NULL;
+	}
+
+	skb_reserve(skb, local->hw.extra_tx_headroom);
+
+	hdr = (struct ieee80211_hdr_3addr *) skb_put(skb, sizeof(*hdr));
+	memset(hdr, 0, sizeof(*hdr));
+	hdr->frame_control = cpu_to_le16(IEEE80211_FTYPE_MGMT |
+					 IEEE80211_STYPE_PROBE_REQ);
+	memset(hdr->addr1, 0xff, ETH_ALEN);
+	memcpy(hdr->addr2, vif->addr, ETH_ALEN);
+	memset(hdr->addr3, 0xff, ETH_ALEN);
+
+	pos = skb_put(skb, ie_ssid_len);
+	*pos++ = WLAN_EID_SSID;
+	*pos++ = ssid_len;
+	if (ssid)
+		memcpy(pos, ssid, ssid_len);
+	pos += ssid_len;
+
+	if (ie) {
+		pos = skb_put(skb, ie_len);
+		memcpy(pos, ie, ie_len);
+	}
+
+	return skb;
+}
+EXPORT_SYMBOL(ieee80211_probereq_get);
+
 void ieee80211_rts_get(struct ieee80211_hw *hw, struct ieee80211_vif *vif,
 		       const void *frame, size_t frame_len,
 		       const struct ieee80211_tx_info *frame_txctl,
-- 
cgit v1.2.2


From 7c12ce8b854df346388ea56d684784e3484012cf Mon Sep 17 00:00:00 2001
From: Kalle Valo <kalle.valo@nokia.com>
Date: Tue, 5 Jan 2010 20:16:44 +0200
Subject: mac80211: use Probe Request template when sending a direct scan

As mac80211 now has a separate function for creating Probe Request templates,
better to use it when sending direct Probe Requests to an AP. Only the
bssid needs to be updated in the template before sending it.

Signed-off-by: Kalle Valo <kalle.valo@nokia.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/util.c | 40 ++++++++++++++++------------------------
 1 file changed, 16 insertions(+), 24 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/util.c b/net/mac80211/util.c
index bc73904d561c..72a98e844718 100644
--- a/net/mac80211/util.c
+++ b/net/mac80211/util.c
@@ -1000,37 +1000,29 @@ void ieee80211_send_probe_req(struct ieee80211_sub_if_data *sdata, u8 *dst,
 	struct ieee80211_local *local = sdata->local;
 	struct sk_buff *skb;
 	struct ieee80211_mgmt *mgmt;
-	u8 *pos;
-
-	skb = dev_alloc_skb(local->hw.extra_tx_headroom + sizeof(*mgmt) + 200 +
-			    ie_len);
-	if (!skb) {
-		printk(KERN_DEBUG "%s: failed to allocate buffer for probe "
-		       "request\n", sdata->name);
+	size_t buf_len;
+	u8 *buf;
+
+	/* FIXME: come up with a proper value */
+	buf = kmalloc(200 + ie_len, GFP_KERNEL);
+	if (!buf) {
+		printk(KERN_DEBUG "%s: failed to allocate temporary IE "
+		       "buffer\n", sdata->name);
 		return;
 	}
-	skb_reserve(skb, local->hw.extra_tx_headroom);
 
-	mgmt = (struct ieee80211_mgmt *) skb_put(skb, 24);
-	memset(mgmt, 0, 24);
-	mgmt->frame_control = cpu_to_le16(IEEE80211_FTYPE_MGMT |
-					  IEEE80211_STYPE_PROBE_REQ);
-	memcpy(mgmt->sa, sdata->vif.addr, ETH_ALEN);
+	buf_len = ieee80211_build_preq_ies(local, buf, ie, ie_len,
+					   local->hw.conf.channel->band);
+
+	skb = ieee80211_probereq_get(&local->hw, &sdata->vif,
+				     ssid, ssid_len,
+				     buf, buf_len);
+
 	if (dst) {
+		mgmt = (struct ieee80211_mgmt *) skb->data;
 		memcpy(mgmt->da, dst, ETH_ALEN);
 		memcpy(mgmt->bssid, dst, ETH_ALEN);
-	} else {
-		memset(mgmt->da, 0xff, ETH_ALEN);
-		memset(mgmt->bssid, 0xff, ETH_ALEN);
 	}
-	pos = skb_put(skb, 2 + ssid_len);
-	*pos++ = WLAN_EID_SSID;
-	*pos++ = ssid_len;
-	memcpy(pos, ssid, ssid_len);
-	pos += ssid_len;
-
-	skb_put(skb, ieee80211_build_preq_ies(local, pos, ie, ie_len,
-					      local->hw.conf.channel->band));
 
 	IEEE80211_SKB_CB(skb)->flags |= IEEE80211_TX_INTFL_DONT_ENCRYPT;
 	ieee80211_tx_skb(sdata, skb);
-- 
cgit v1.2.2


From 34a6eddbabd704b3c7dae9362234552267573be2 Mon Sep 17 00:00:00 2001
From: Jouni Malinen <j@w1.fi>
Date: Wed, 6 Jan 2010 16:19:24 +0200
Subject: cfg80211: Store IEs from both Beacon and Probe Response frames

Store information elements from Beacon and Probe Response frames in
separate buffers to allow both sets to be made available through
nl80211. This allows user space applications to get access to IEs from
Beacon frames even if we have received Probe Response frames from the
BSS. Previously, the IEs from Probe Response frames would have
overridden the IEs from Beacon frames.

This feature is of somewhat limited use since most protocols include
the same (or extended) information in Probe Response frames. However,
there are couple of exceptions where the IEs from Beacon frames could
be of some use: TIM IE is only included in Beacon frames (and it would
be needed to figure out the DTIM period used in the BSS) and at least
some implementations of Wireless Provisioning Services seem to include
the full IE only in Beacon frames).

The new BSS attribute for scan results is added to allow both the IE
sets to be delivered. This is done in a way that maintains the
previously used behavior for applications that are not aware of the
new NL80211_BSS_BEACON_IES attribute.

Signed-off-by: Jouni Malinen <j@w1.fi>
Acked-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/wireless/core.h    |   3 +-
 net/wireless/nl80211.c |   4 ++
 net/wireless/scan.c    | 120 ++++++++++++++++++++++++++++++++++++-------------
 3 files changed, 96 insertions(+), 31 deletions(-)

(limited to 'net')

diff --git a/net/wireless/core.h b/net/wireless/core.h
index 30ec95f05b52..2d6a6b9c0c43 100644
--- a/net/wireless/core.h
+++ b/net/wireless/core.h
@@ -111,7 +111,8 @@ struct cfg80211_internal_bss {
 	unsigned long ts;
 	struct kref ref;
 	atomic_t hold;
-	bool ies_allocated;
+	bool beacon_ies_allocated;
+	bool proberesp_ies_allocated;
 
 	/* must be last because of priv member */
 	struct cfg80211_bss pub;
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index b804062e0179..4af7991a9ec8 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -3163,6 +3163,10 @@ static int nl80211_send_bss(struct sk_buff *msg, u32 pid, u32 seq, int flags,
 		NLA_PUT(msg, NL80211_BSS_INFORMATION_ELEMENTS,
 			res->len_information_elements,
 			res->information_elements);
+	if (res->beacon_ies && res->len_beacon_ies &&
+	    res->beacon_ies != res->information_elements)
+		NLA_PUT(msg, NL80211_BSS_BEACON_IES,
+			res->len_beacon_ies, res->beacon_ies);
 	if (res->tsf)
 		NLA_PUT_U64(msg, NL80211_BSS_TSF, res->tsf);
 	if (res->beacon_interval)
diff --git a/net/wireless/scan.c b/net/wireless/scan.c
index 0c2cbbebca95..06b0231ee5e3 100644
--- a/net/wireless/scan.c
+++ b/net/wireless/scan.c
@@ -100,8 +100,10 @@ static void bss_release(struct kref *ref)
 	if (bss->pub.free_priv)
 		bss->pub.free_priv(&bss->pub);
 
-	if (bss->ies_allocated)
-		kfree(bss->pub.information_elements);
+	if (bss->beacon_ies_allocated)
+		kfree(bss->pub.beacon_ies);
+	if (bss->proberesp_ies_allocated)
+		kfree(bss->pub.proberesp_ies);
 
 	BUG_ON(atomic_read(&bss->hold));
 
@@ -375,8 +377,7 @@ rb_find_bss(struct cfg80211_registered_device *dev,
 
 static struct cfg80211_internal_bss *
 cfg80211_bss_update(struct cfg80211_registered_device *dev,
-		    struct cfg80211_internal_bss *res,
-		    bool overwrite)
+		    struct cfg80211_internal_bss *res)
 {
 	struct cfg80211_internal_bss *found = NULL;
 	const u8 *meshid, *meshcfg;
@@ -418,28 +419,64 @@ cfg80211_bss_update(struct cfg80211_registered_device *dev,
 		found->pub.capability = res->pub.capability;
 		found->ts = res->ts;
 
-		/* overwrite IEs */
-		if (overwrite) {
+		/* Update IEs */
+		if (res->pub.proberesp_ies) {
 			size_t used = dev->wiphy.bss_priv_size + sizeof(*res);
-			size_t ielen = res->pub.len_information_elements;
+			size_t ielen = res->pub.len_proberesp_ies;
+
+			if (found->pub.proberesp_ies &&
+			    !found->proberesp_ies_allocated &&
+			    ksize(found) >= used + ielen) {
+				memcpy(found->pub.proberesp_ies,
+				       res->pub.proberesp_ies, ielen);
+				found->pub.len_proberesp_ies = ielen;
+			} else {
+				u8 *ies = found->pub.proberesp_ies;
+
+				if (found->proberesp_ies_allocated)
+					ies = krealloc(ies, ielen, GFP_ATOMIC);
+				else
+					ies = kmalloc(ielen, GFP_ATOMIC);
+
+				if (ies) {
+					memcpy(ies, res->pub.proberesp_ies,
+					       ielen);
+					found->proberesp_ies_allocated = true;
+					found->pub.proberesp_ies = ies;
+					found->pub.len_proberesp_ies = ielen;
+				}
+			}
 
-			if (!found->ies_allocated && ksize(found) >= used + ielen) {
-				memcpy(found->pub.information_elements,
-				       res->pub.information_elements, ielen);
-				found->pub.len_information_elements = ielen;
+			/* Override possible earlier Beacon frame IEs */
+			found->pub.information_elements =
+				found->pub.proberesp_ies;
+			found->pub.len_information_elements =
+				found->pub.len_proberesp_ies;
+		}
+		if (res->pub.beacon_ies) {
+			size_t used = dev->wiphy.bss_priv_size + sizeof(*res);
+			size_t ielen = res->pub.len_beacon_ies;
+
+			if (found->pub.beacon_ies &&
+			    !found->beacon_ies_allocated &&
+			    ksize(found) >= used + ielen) {
+				memcpy(found->pub.beacon_ies,
+				       res->pub.beacon_ies, ielen);
+				found->pub.len_beacon_ies = ielen;
 			} else {
-				u8 *ies = found->pub.information_elements;
+				u8 *ies = found->pub.beacon_ies;
 
-				if (found->ies_allocated)
+				if (found->beacon_ies_allocated)
 					ies = krealloc(ies, ielen, GFP_ATOMIC);
 				else
 					ies = kmalloc(ielen, GFP_ATOMIC);
 
 				if (ies) {
-					memcpy(ies, res->pub.information_elements, ielen);
-					found->ies_allocated = true;
-					found->pub.information_elements = ies;
-					found->pub.len_information_elements = ielen;
+					memcpy(ies, res->pub.beacon_ies,
+					       ielen);
+					found->beacon_ies_allocated = true;
+					found->pub.beacon_ies = ies;
+					found->pub.len_beacon_ies = ielen;
 				}
 			}
 		}
@@ -489,14 +526,26 @@ cfg80211_inform_bss(struct wiphy *wiphy,
 	res->pub.tsf = timestamp;
 	res->pub.beacon_interval = beacon_interval;
 	res->pub.capability = capability;
-	/* point to after the private area */
-	res->pub.information_elements = (u8 *)res + sizeof(*res) + privsz;
-	memcpy(res->pub.information_elements, ie, ielen);
-	res->pub.len_information_elements = ielen;
+	/*
+	 * Since we do not know here whether the IEs are from a Beacon or Probe
+	 * Response frame, we need to pick one of the options and only use it
+	 * with the driver that does not provide the full Beacon/Probe Response
+	 * frame. Use Beacon frame pointer to avoid indicating that this should
+	 * override the information_elements pointer should we have received an
+	 * earlier indication of Probe Response data.
+	 *
+	 * The initial buffer for the IEs is allocated with the BSS entry and
+	 * is located after the private area.
+	 */
+	res->pub.beacon_ies = (u8 *)res + sizeof(*res) + privsz;
+	memcpy(res->pub.beacon_ies, ie, ielen);
+	res->pub.len_beacon_ies = ielen;
+	res->pub.information_elements = res->pub.beacon_ies;
+	res->pub.len_information_elements = res->pub.len_beacon_ies;
 
 	kref_init(&res->ref);
 
-	res = cfg80211_bss_update(wiphy_to_dev(wiphy), res, 0);
+	res = cfg80211_bss_update(wiphy_to_dev(wiphy), res);
 	if (!res)
 		return NULL;
 
@@ -517,7 +566,6 @@ cfg80211_inform_bss_frame(struct wiphy *wiphy,
 	struct cfg80211_internal_bss *res;
 	size_t ielen = len - offsetof(struct ieee80211_mgmt,
 				      u.probe_resp.variable);
-	bool overwrite;
 	size_t privsz = wiphy->bss_priv_size;
 
 	if (WARN_ON(wiphy->signal_type == NL80211_BSS_SIGNAL_UNSPEC &&
@@ -538,16 +586,28 @@ cfg80211_inform_bss_frame(struct wiphy *wiphy,
 	res->pub.tsf = le64_to_cpu(mgmt->u.probe_resp.timestamp);
 	res->pub.beacon_interval = le16_to_cpu(mgmt->u.probe_resp.beacon_int);
 	res->pub.capability = le16_to_cpu(mgmt->u.probe_resp.capab_info);
-	/* point to after the private area */
-	res->pub.information_elements = (u8 *)res + sizeof(*res) + privsz;
-	memcpy(res->pub.information_elements, mgmt->u.probe_resp.variable, ielen);
-	res->pub.len_information_elements = ielen;
+	/*
+	 * The initial buffer for the IEs is allocated with the BSS entry and
+	 * is located after the private area.
+	 */
+	if (ieee80211_is_probe_resp(mgmt->frame_control)) {
+		res->pub.proberesp_ies = (u8 *) res + sizeof(*res) + privsz;
+		memcpy(res->pub.proberesp_ies, mgmt->u.probe_resp.variable,
+		       ielen);
+		res->pub.len_proberesp_ies = ielen;
+		res->pub.information_elements = res->pub.proberesp_ies;
+		res->pub.len_information_elements = res->pub.len_proberesp_ies;
+	} else {
+		res->pub.beacon_ies = (u8 *) res + sizeof(*res) + privsz;
+		memcpy(res->pub.beacon_ies, mgmt->u.beacon.variable, ielen);
+		res->pub.len_beacon_ies = ielen;
+		res->pub.information_elements = res->pub.beacon_ies;
+		res->pub.len_information_elements = res->pub.len_beacon_ies;
+	}
 
 	kref_init(&res->ref);
 
-	overwrite = ieee80211_is_probe_resp(mgmt->frame_control);
-
-	res = cfg80211_bss_update(wiphy_to_dev(wiphy), res, overwrite);
+	res = cfg80211_bss_update(wiphy_to_dev(wiphy), res);
 	if (!res)
 		return NULL;
 
-- 
cgit v1.2.2


From 81ac3462d346ee7aaf037a35156b0a7a354e98cf Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes@sipsolutions.net>
Date: Wed, 6 Jan 2010 15:30:58 +0100
Subject: mac80211: fix a few work bugs

Kalle and Lennert reported problems with the new work
code, and at least Kalle's problem I was able to trace
to a missing jiffies initialisation.

I also ran into a problem where occasionally I couldn't
connect, which seems fixed with kicking the work items
after scanning.

Finally, also add some sanity checking code to verify
that we're not adding work items while an interface is
down -- that case could lead to something similar to
what Lennert was seeing.

There still seems to be a race condition that we're
trying to figure out separately.

Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
Tested-by: Lennert Buytenhek <buytenh@marvell.com>
Tested-by: Kalle Valo <kalle.valo@iki.fi>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/scan.c | 1 +
 net/mac80211/work.c | 4 ++++
 2 files changed, 5 insertions(+)

(limited to 'net')

diff --git a/net/mac80211/scan.c b/net/mac80211/scan.c
index a4c63d4e6845..30cb62bb45b3 100644
--- a/net/mac80211/scan.c
+++ b/net/mac80211/scan.c
@@ -284,6 +284,7 @@ void ieee80211_scan_completed(struct ieee80211_hw *hw, bool aborted)
 	ieee80211_mlme_notify_scan_completed(local);
 	ieee80211_ibss_notify_scan_completed(local);
 	ieee80211_mesh_notify_scan_completed(local);
+	ieee80211_queue_work(&local->hw, &local->work_work);
 }
 EXPORT_SYMBOL(ieee80211_scan_completed);
 
diff --git a/net/mac80211/work.c b/net/mac80211/work.c
index 5ba75990c9fd..7c5d95b1bc04 100644
--- a/net/mac80211/work.c
+++ b/net/mac80211/work.c
@@ -818,6 +818,7 @@ static void ieee80211_work_work(struct work_struct *work)
 		    wk->chan == local->tmp_channel &&
 		    wk->chan_type == local->tmp_channel_type) {
 			wk->started = true;
+			wk->timeout = jiffies;
 		}
 
 		if (!wk->started && !local->tmp_channel) {
@@ -935,6 +936,9 @@ void ieee80211_add_work(struct ieee80211_work *wk)
 	if (WARN_ON(!wk->done))
 		return;
 
+	if (WARN_ON(!ieee80211_sdata_running(wk->sdata)))
+		return;
+
 	wk->started = false;
 
 	local = wk->sdata->local;
-- 
cgit v1.2.2


From b74d12e116528fadc35f305eb6e9525a3a2b62b1 Mon Sep 17 00:00:00 2001
From: "Luis R. Rodriguez" <lrodriguez@atheros.com>
Date: Thu, 7 Jan 2010 17:24:54 -0500
Subject: cfg80211: add debug print when we drop a bogus country IE

Signed-off-by: Luis R. Rodriguez <lrodriguez@atheros.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/wireless/reg.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/wireless/reg.c b/net/wireless/reg.c
index f79d6613c5ff..389247cb7059 100644
--- a/net/wireless/reg.c
+++ b/net/wireless/reg.c
@@ -1806,8 +1806,10 @@ void regulatory_hint_11d(struct wiphy *wiphy,
 		goto out;
 
 	rd = country_ie_2_rd(country_ie, country_ie_len, &checksum);
-	if (!rd)
+	if (!rd) {
+		REG_DBG_PRINT("cfg80211: Ignoring bogus country IE\n");
 		goto out;
+	}
 
 	/*
 	 * This will not happen right now but we leave it here for the
-- 
cgit v1.2.2


From 08030db6e5275dda19ea1b3ab8a41c992799db4a Mon Sep 17 00:00:00 2001
From: "Luis R. Rodriguez" <lrodriguez@atheros.com>
Date: Thu, 7 Jan 2010 17:24:56 -0500
Subject: cfg80211: process the max power on a country IE

The max power from each country IE triplet was being ignored.
This fix isn't critical as CRDA was always being used for the lower
limit, but we should process it in case the AP still wants to
decrease power output even more for whatever reason.

Reported-by: Benoit PAPILLAULT <benoit.papillault@free.fr>
Signed-off-by: Luis R. Rodriguez <lrodriguez@atheros.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/wireless/reg.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/wireless/reg.c b/net/wireless/reg.c
index 389247cb7059..77d0bb6f6e7a 100644
--- a/net/wireless/reg.c
+++ b/net/wireless/reg.c
@@ -680,7 +680,7 @@ static struct ieee80211_regdomain *country_ie_2_rd(
 		 */
 		freq_range->max_bandwidth_khz = MHZ_TO_KHZ(40);
 		power_rule->max_antenna_gain = DBI_TO_MBI(100);
-		power_rule->max_eirp = DBM_TO_MBM(100);
+		power_rule->max_eirp = DBM_TO_MBM(triplet->chans.max_power);
 
 		country_ie += 3;
 		country_ie_len -= 3;
-- 
cgit v1.2.2


From cc5d8a3772ee4e2ed29558ba548b4747959ba971 Mon Sep 17 00:00:00 2001
From: "Luis R. Rodriguez" <lrodriguez@atheros.com>
Date: Thu, 7 Jan 2010 17:24:57 -0500
Subject: cfg80211: Fix country IE parsing for single channel triplets

This enhances the way we parse country IEs to minimize
the number of regulatory rules that we create. It also fixes
our current implementation which treated country IE triplets
with only one channel as one independed regulatory rule even
though adjecent rules were also being provided.

Without this patch APs which send country IE information with
a channel triplet for each individual channel will force cfg80211
to deny HT40 operation as a regulatory rule would have been created
independently for each channel and as such configured only for
20 MHz operation.

Although 802.11n APs which send country IEs triplets in this fassion
are likely rare Benoit reports this against the Ubiquity NanoStation M5,
with Country "FR" and HT40 enabled.

Since we now have a helper which parses the triplets in intermediate
steps we now take care extra care to process padding.

Reported-by: Benoit PAPILLAULT <benoit.papillault@free.fr>
Signed-off-by: Luis R. Rodriguez <lrodriguez@atheros.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/wireless/reg.c | 225 ++++++++++++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 221 insertions(+), 4 deletions(-)

(limited to 'net')

diff --git a/net/wireless/reg.c b/net/wireless/reg.c
index 77d0bb6f6e7a..a5c2d3a6cbb2 100644
--- a/net/wireless/reg.c
+++ b/net/wireless/reg.c
@@ -484,6 +484,178 @@ static bool freq_in_rule_band(const struct ieee80211_freq_range *freq_range,
 #undef ONE_GHZ_IN_KHZ
 }
 
+/*
+ * Some APs may send a country IE triplet for each channel they
+ * support and while this is completely overkill and silly we still
+ * need to support it. We avoid making a single rule for each channel
+ * though and to help us with this we use this helper to find the
+ * actual subband end channel. These type of country IE triplet
+ * scenerios are handled then, all yielding two regulaotry rules from
+ * parsing a country IE:
+ *
+ * [1]
+ * [2]
+ * [36]
+ * [40]
+ *
+ * [1]
+ * [2-4]
+ * [5-12]
+ * [36]
+ * [40-44]
+ *
+ * [1-4]
+ * [5-7]
+ * [36-44]
+ * [48-64]
+ *
+ * [36-36]
+ * [40-40]
+ * [44-44]
+ * [48-48]
+ * [52-52]
+ * [56-56]
+ * [60-60]
+ * [64-64]
+ * [100-100]
+ * [104-104]
+ * [108-108]
+ * [112-112]
+ * [116-116]
+ * [120-120]
+ * [124-124]
+ * [128-128]
+ * [132-132]
+ * [136-136]
+ * [140-140]
+ *
+ * Returns 0 if the IE has been found to be invalid in the middle
+ * somewhere.
+ */
+static int max_subband_chan(int orig_cur_chan,
+			    int orig_end_channel,
+			    s8 orig_max_power,
+			    u8 **country_ie,
+			    u8 *country_ie_len)
+{
+	u8 *triplets_start = *country_ie;
+	u8 len_at_triplet = *country_ie_len;
+	int end_subband_chan = orig_end_channel;
+	enum ieee80211_band band;
+
+	/*
+	 * We'll deal with padding for the caller unless
+	 * its not immediate and we don't process any channels
+	 */
+	if (*country_ie_len == 1) {
+		*country_ie += 1;
+		*country_ie_len -= 1;
+		return orig_end_channel;
+	}
+
+	/* Move to the next triplet and then start search */
+	*country_ie += 3;
+	*country_ie_len -= 3;
+
+	if (orig_cur_chan <= 14)
+		band = IEEE80211_BAND_2GHZ;
+	else
+		band = IEEE80211_BAND_5GHZ;
+
+	while (*country_ie_len >= 3) {
+		int end_channel = 0;
+		struct ieee80211_country_ie_triplet *triplet =
+			(struct ieee80211_country_ie_triplet *) *country_ie;
+		int cur_channel = 0, next_expected_chan;
+		enum ieee80211_band next_band = IEEE80211_BAND_2GHZ;
+
+		/* means last triplet is completely unrelated to this one */
+		if (triplet->ext.reg_extension_id >=
+				IEEE80211_COUNTRY_EXTENSION_ID) {
+			*country_ie -= 3;
+			*country_ie_len += 3;
+			break;
+		}
+
+		if (triplet->chans.first_channel == 0) {
+			*country_ie += 1;
+			*country_ie_len -= 1;
+			if (*country_ie_len != 0)
+				return 0;
+			break;
+		}
+
+		/* Monitonically increasing channel order */
+		if (triplet->chans.first_channel <= end_subband_chan)
+			return 0;
+
+		/* 2 GHz */
+		if (triplet->chans.first_channel <= 14) {
+			end_channel = triplet->chans.first_channel +
+				triplet->chans.num_channels - 1;
+		}
+		else {
+			end_channel =  triplet->chans.first_channel +
+				(4 * (triplet->chans.num_channels - 1));
+			next_band = IEEE80211_BAND_5GHZ;
+		}
+
+		if (band != next_band) {
+			*country_ie -= 3;
+			*country_ie_len += 3;
+			break;
+		}
+
+		if (orig_max_power != triplet->chans.max_power) {
+			*country_ie -= 3;
+			*country_ie_len += 3;
+			break;
+		}
+
+		cur_channel = triplet->chans.first_channel;
+
+		/* The key is finding the right next expected channel */
+		if (band == IEEE80211_BAND_2GHZ)
+			next_expected_chan = end_subband_chan + 1;
+		 else
+			next_expected_chan = end_subband_chan + 4;
+
+		if (cur_channel != next_expected_chan) {
+			*country_ie -= 3;
+			*country_ie_len += 3;
+			break;
+		}
+
+		end_subband_chan = end_channel;
+
+		/* Move to the next one */
+		*country_ie += 3;
+		*country_ie_len -= 3;
+
+		/*
+		 * Padding needs to be dealt with if we processed
+		 * some channels.
+		 */
+		if (*country_ie_len == 1) {
+			*country_ie += 1;
+			*country_ie_len -= 1;
+			break;
+		}
+
+		/* If seen, the IE is invalid */
+		if (*country_ie_len == 2)
+			return 0;
+	}
+
+	if (end_subband_chan == orig_end_channel) {
+		*country_ie = triplets_start;
+		*country_ie_len = len_at_triplet;
+		return orig_end_channel;
+	}
+
+	return end_subband_chan;
+}
+
 /*
  * Converts a country IE to a regulatory domain. A regulatory domain
  * structure has a lot of information which the IE doesn't yet have,
@@ -552,6 +724,19 @@ static struct ieee80211_regdomain *country_ie_2_rd(
 			continue;
 		}
 
+		/*
+		 * APs can add padding to make length divisible
+		 * by two, required by the spec.
+		 */
+		if (triplet->chans.first_channel == 0) {
+			country_ie++;
+			country_ie_len--;
+			/* This is expected to be at the very end only */
+			if (country_ie_len != 0)
+				return NULL;
+			break;
+		}
+
 		/* 2 GHz */
 		if (triplet->chans.first_channel <= 14)
 			end_channel = triplet->chans.first_channel +
@@ -570,6 +755,20 @@ static struct ieee80211_regdomain *country_ie_2_rd(
 				(4 * (triplet->chans.num_channels - 1));
 
 		cur_channel = triplet->chans.first_channel;
+
+		/*
+		 * Enhancement for APs that send a triplet for every channel
+		 * or for whatever reason sends triplets with multiple channels
+		 * separated when in fact they should be together.
+		 */
+		end_channel = max_subband_chan(cur_channel,
+					       end_channel,
+					       triplet->chans.max_power,
+					       &country_ie,
+					       &country_ie_len);
+		if (!end_channel)
+			return NULL;
+
 		cur_sub_max_channel = end_channel;
 
 		/* Basic sanity check */
@@ -600,10 +799,13 @@ static struct ieee80211_regdomain *country_ie_2_rd(
 
 		last_sub_max_channel = cur_sub_max_channel;
 
-		country_ie += 3;
-		country_ie_len -= 3;
 		num_rules++;
 
+		if (country_ie_len >= 3) {
+			country_ie += 3;
+			country_ie_len -= 3;
+		}
+
 		/*
 		 * Note: this is not a IEEE requirement but
 		 * simply a memory requirement
@@ -646,6 +848,12 @@ static struct ieee80211_regdomain *country_ie_2_rd(
 			continue;
 		}
 
+		if (triplet->chans.first_channel == 0) {
+			country_ie++;
+			country_ie_len--;
+			break;
+		}
+
 		reg_rule = &rd->reg_rules[i];
 		freq_range = &reg_rule->freq_range;
 		power_rule = &reg_rule->power_rule;
@@ -660,6 +868,12 @@ static struct ieee80211_regdomain *country_ie_2_rd(
 			end_channel =  triplet->chans.first_channel +
 				(4 * (triplet->chans.num_channels - 1));
 
+		end_channel = max_subband_chan(triplet->chans.first_channel,
+					       end_channel,
+					       triplet->chans.max_power,
+					       &country_ie,
+					       &country_ie_len);
+
 		/*
 		 * The +10 is since the regulatory domain expects
 		 * the actual band edge, not the center of freq for
@@ -682,10 +896,13 @@ static struct ieee80211_regdomain *country_ie_2_rd(
 		power_rule->max_antenna_gain = DBI_TO_MBI(100);
 		power_rule->max_eirp = DBM_TO_MBM(triplet->chans.max_power);
 
-		country_ie += 3;
-		country_ie_len -= 3;
 		i++;
 
+		if (country_ie_len >= 3) {
+			country_ie += 3;
+			country_ie_len -= 3;
+		}
+
 		BUG_ON(i > NL80211_MAX_SUPP_REG_RULES);
 	}
 
-- 
cgit v1.2.2


From d524215f6cad245249df8def19125ae6fd0bcc9b Mon Sep 17 00:00:00 2001
From: Felix Fietkau <nbd@openwrt.org>
Date: Fri, 8 Jan 2010 18:06:26 +0100
Subject: mac80211: use nullfunc frames for 4-addr sta detection

To detect incoming 4-addr stations, hostapd needs to receive a 4-addr
data frame from the remote station, so that it can create the AP VLAN
for it. With this patch, the mlme code emits a 4-addr nullfunc frame
immediately after assoc. On the AP side it also drops 4-addr nullfunc
frames to the cooked monitor mode interface, if the interface hasn't
been fully set up to receive 4-addr data frames yet.

Signed-off-by: Felix Fietkau <nbd@openwrt.org>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/mlme.c | 39 +++++++++++++++++++++++++++++++++++++++
 net/mac80211/rx.c   | 12 ++++++++++++
 2 files changed, 51 insertions(+)

(limited to 'net')

diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index 5484cf930a87..0336dbb45ac5 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -282,6 +282,38 @@ void ieee80211_send_nullfunc(struct ieee80211_local *local,
 	ieee80211_tx_skb(sdata, skb);
 }
 
+static void ieee80211_send_4addr_nullfunc(struct ieee80211_local *local,
+					  struct ieee80211_sub_if_data *sdata)
+{
+	struct sk_buff *skb;
+	struct ieee80211_hdr *nullfunc;
+	__le16 fc;
+
+	if (WARN_ON(sdata->vif.type != NL80211_IFTYPE_STATION))
+		return;
+
+	skb = dev_alloc_skb(local->hw.extra_tx_headroom + 30);
+	if (!skb) {
+		printk(KERN_DEBUG "%s: failed to allocate buffer for 4addr "
+		       "nullfunc frame\n", sdata->name);
+		return;
+	}
+	skb_reserve(skb, local->hw.extra_tx_headroom);
+
+	nullfunc = (struct ieee80211_hdr *) skb_put(skb, 30);
+	memset(nullfunc, 0, 30);
+	fc = cpu_to_le16(IEEE80211_FTYPE_DATA | IEEE80211_STYPE_NULLFUNC |
+			 IEEE80211_FCTL_FROMDS | IEEE80211_FCTL_TODS);
+	nullfunc->frame_control = fc;
+	memcpy(nullfunc->addr1, sdata->u.mgd.bssid, ETH_ALEN);
+	memcpy(nullfunc->addr2, sdata->vif.addr, ETH_ALEN);
+	memcpy(nullfunc->addr3, sdata->u.mgd.bssid, ETH_ALEN);
+	memcpy(nullfunc->addr4, sdata->vif.addr, ETH_ALEN);
+
+	IEEE80211_SKB_CB(skb)->flags |= IEEE80211_TX_INTFL_DONT_ENCRYPT;
+	ieee80211_tx_skb(sdata, skb);
+}
+
 /* spectrum management related things */
 static void ieee80211_chswitch_work(struct work_struct *work)
 {
@@ -1089,6 +1121,13 @@ static bool ieee80211_assoc_success(struct ieee80211_work *wk,
 	bss_conf->assoc_capability = capab_info;
 	ieee80211_set_associated(sdata, cbss, changed);
 
+	/*
+	 * If we're using 4-addr mode, let the AP know that we're
+	 * doing so, so that it can create the STA VLAN on its side
+	 */
+	if (ifmgd->use_4addr)
+		ieee80211_send_4addr_nullfunc(local, sdata);
+
 	/*
 	 * Start timer to probe the connection to the AP now.
 	 * Also start the timer that will detect beacon loss.
diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c
index e12f39a96460..efa6d3689c5e 100644
--- a/net/mac80211/rx.c
+++ b/net/mac80211/rx.c
@@ -1111,6 +1111,18 @@ ieee80211_rx_h_sta_process(struct ieee80211_rx_data *rx)
 	if (ieee80211_is_nullfunc(hdr->frame_control) ||
 	    ieee80211_is_qos_nullfunc(hdr->frame_control)) {
 		I802_DEBUG_INC(rx->local->rx_handlers_drop_nullfunc);
+
+		/*
+		 * If we receive a 4-addr nullfunc frame from a STA
+		 * that was not moved to a 4-addr STA vlan yet, drop
+		 * the frame to the monitor interface, to make sure
+		 * that hostapd sees it
+		 */
+		if (ieee80211_has_a4(hdr->frame_control) &&
+		    (rx->sdata->vif.type == NL80211_IFTYPE_AP ||
+		     (rx->sdata->vif.type == NL80211_IFTYPE_AP_VLAN &&
+		      !rx->sdata->u.vlan.sta)))
+			return RX_DROP_MONITOR;
 		/*
 		 * Update counter and free packet here to avoid
 		 * counting this as a dropped packed.
-- 
cgit v1.2.2


From 0e5ded5a87c097760abd68521b86f1025dedc7d7 Mon Sep 17 00:00:00 2001
From: Felix Fietkau <nbd@openwrt.org>
Date: Fri, 8 Jan 2010 18:10:58 +0100
Subject: mac80211: allow station updates on ap interfaces for vlan stations

Since the per-vif station changes, sta_info_get on the ap sdata no
longer returns entries for stations on ap vlans. This causes issues
with hostapd, which currently always passes the ap interface name to
nl80211 calls. This patch provides bug compatibility with the earlier
versions until hostapd is fixed.

Signed-off-by: Felix Fietkau <nbd@openwrt.org>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/cfg.c      | 12 ++++++------
 net/mac80211/sta_info.c | 21 +++++++++++++++++++++
 net/mac80211/sta_info.h |  3 +++
 3 files changed, 30 insertions(+), 6 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c
index e5dda6fb8dff..dc12e9466ffd 100644
--- a/net/mac80211/cfg.c
+++ b/net/mac80211/cfg.c
@@ -148,7 +148,7 @@ static int ieee80211_add_key(struct wiphy *wiphy, struct net_device *dev,
 	rcu_read_lock();
 
 	if (mac_addr) {
-		sta = sta_info_get(sdata, mac_addr);
+		sta = sta_info_get_bss(sdata, mac_addr);
 		if (!sta) {
 			ieee80211_key_free(key);
 			err = -ENOENT;
@@ -179,7 +179,7 @@ static int ieee80211_del_key(struct wiphy *wiphy, struct net_device *dev,
 	if (mac_addr) {
 		ret = -ENOENT;
 
-		sta = sta_info_get(sdata, mac_addr);
+		sta = sta_info_get_bss(sdata, mac_addr);
 		if (!sta)
 			goto out_unlock;
 
@@ -226,7 +226,7 @@ static int ieee80211_get_key(struct wiphy *wiphy, struct net_device *dev,
 	rcu_read_lock();
 
 	if (mac_addr) {
-		sta = sta_info_get(sdata, mac_addr);
+		sta = sta_info_get_bss(sdata, mac_addr);
 		if (!sta)
 			goto out;
 
@@ -419,7 +419,7 @@ static int ieee80211_get_station(struct wiphy *wiphy, struct net_device *dev,
 
 	rcu_read_lock();
 
-	sta = sta_info_get(sdata, mac);
+	sta = sta_info_get_bss(sdata, mac);
 	if (sta) {
 		ret = 0;
 		sta_set_sinfo(sta, sinfo);
@@ -775,7 +775,7 @@ static int ieee80211_del_station(struct wiphy *wiphy, struct net_device *dev,
 	if (mac) {
 		rcu_read_lock();
 
-		sta = sta_info_get(sdata, mac);
+		sta = sta_info_get_bss(sdata, mac);
 		if (!sta) {
 			rcu_read_unlock();
 			return -ENOENT;
@@ -803,7 +803,7 @@ static int ieee80211_change_station(struct wiphy *wiphy,
 
 	rcu_read_lock();
 
-	sta = sta_info_get(sdata, mac);
+	sta = sta_info_get_bss(sdata, mac);
 	if (!sta) {
 		rcu_read_unlock();
 		return -ENOENT;
diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c
index 47da552ce8a6..f735826f055c 100644
--- a/net/mac80211/sta_info.c
+++ b/net/mac80211/sta_info.c
@@ -119,6 +119,27 @@ struct sta_info *sta_info_get(struct ieee80211_sub_if_data *sdata,
 	return sta;
 }
 
+/*
+ * Get sta info either from the specified interface
+ * or from one of its vlans
+ */
+struct sta_info *sta_info_get_bss(struct ieee80211_sub_if_data *sdata,
+				  const u8 *addr)
+{
+	struct ieee80211_local *local = sdata->local;
+	struct sta_info *sta;
+
+	sta = rcu_dereference(local->sta_hash[STA_HASH(addr)]);
+	while (sta) {
+		if ((sta->sdata == sdata ||
+		     sta->sdata->bss == sdata->bss) &&
+		    memcmp(sta->sta.addr, addr, ETH_ALEN) == 0)
+			break;
+		sta = rcu_dereference(sta->hnext);
+	}
+	return sta;
+}
+
 struct sta_info *sta_info_get_by_idx(struct ieee80211_sub_if_data *sdata,
 				     int idx)
 {
diff --git a/net/mac80211/sta_info.h b/net/mac80211/sta_info.h
index c8208236e896..6f79bba5706e 100644
--- a/net/mac80211/sta_info.h
+++ b/net/mac80211/sta_info.h
@@ -408,6 +408,9 @@ static inline u32 get_sta_flags(struct sta_info *sta)
 struct sta_info *sta_info_get(struct ieee80211_sub_if_data *sdata,
 			      const u8 *addr);
 
+struct sta_info *sta_info_get_bss(struct ieee80211_sub_if_data *sdata,
+				  const u8 *addr);
+
 static inline
 void for_each_sta_info_type_check(struct ieee80211_local *local,
 				  const u8 *addr,
-- 
cgit v1.2.2


From 3f0e0b220f80075ce15483b20458192c0ac27426 Mon Sep 17 00:00:00 2001
From: Felix Fietkau <nbd@openwrt.org>
Date: Fri, 8 Jan 2010 18:15:13 +0100
Subject: mac80211: do not transmit frames on unconfigured 4-addr vlan
 interfaces

If frames are transmitted on 4-addr ap vlan interfaces with no station,
they end up being transmitted unencrypted, even if the ap interface
uses WPA. This patch add some sanity checking to make sure that this
does not happen.

Signed-off-by: Felix Fietkau <nbd@openwrt.org>
Cc: stable@kernel.org
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/tx.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
index 0661e696a1dd..47ca59e52e71 100644
--- a/net/mac80211/tx.c
+++ b/net/mac80211/tx.c
@@ -1056,8 +1056,11 @@ ieee80211_tx_prepare(struct ieee80211_sub_if_data *sdata,
 
 	hdr = (struct ieee80211_hdr *) skb->data;
 
-	if (sdata->vif.type == NL80211_IFTYPE_AP_VLAN)
+	if (sdata->vif.type == NL80211_IFTYPE_AP_VLAN) {
 		tx->sta = rcu_dereference(sdata->u.vlan.sta);
+		if (!tx->sta && sdata->dev->ieee80211_ptr->use_4addr)
+			return TX_DROP;
+	}
 	if (!tx->sta)
 		tx->sta = sta_info_get(sdata, hdr->addr1);
 
-- 
cgit v1.2.2


From 90be561b119a9e5439733f09cc70bd4ce9ec8022 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes@sipsolutions.net>
Date: Fri, 8 Jan 2010 19:01:07 +0100
Subject: mac80211: fix return from ieee80211_assoc_success

sparse pointed out that I made a mistake converting
the return value of ieee80211_assoc_success to bool,
this place should return false instead of one of the
enum values (which would be true).

Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/mlme.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index 0336dbb45ac5..8045fd63616b 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -1098,7 +1098,7 @@ static bool ieee80211_assoc_success(struct ieee80211_work *wk,
 	if (err) {
 		printk(KERN_DEBUG "%s: failed to insert STA entry for"
 		       " the AP (error %d)\n", sdata->name, err);
-		return RX_MGMT_CFG80211_ASSOC_ERROR;
+		return false;
 	}
 
 	if (elems.wmm_param)
-- 
cgit v1.2.2


From 5e124bd5e00fcf54df555b368c2dafe6886f1df2 Mon Sep 17 00:00:00 2001
From: Joe Perches <joe@perches.com>
Date: Fri, 8 Jan 2010 22:33:38 -0800
Subject: net/mac80211/mlme.c: Remove unnecessary semicolons

Signed-off-by: Joe Perches <joe@perches.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/mlme.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index 8045fd63616b..86f025bc9456 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -1783,7 +1783,7 @@ int ieee80211_mgd_auth(struct ieee80211_sub_if_data *sdata,
 	if (!wk)
 		return -ENOMEM;
 
-	memcpy(wk->filter_ta, req->bss->bssid, ETH_ALEN);;
+	memcpy(wk->filter_ta, req->bss->bssid, ETH_ALEN);
 
 	if (req->ie && req->ie_len) {
 		memcpy(wk->ie, req->ie, req->ie_len);
-- 
cgit v1.2.2


From 678f415fdc534c0a806fce992e4c62df0eff19d2 Mon Sep 17 00:00:00 2001
From: Lennert Buytenhek <buytenh@wantstofly.org>
Date: Sun, 10 Jan 2010 14:07:53 +0100
Subject: mac80211: flush workqueue before calling driver ->stop() method

Since commit "mwl8k: handle station database update for AP's sta entry
via ->sta_notify()", mwl8k every now and then gets a command timeout
when ifconfig'ing a STA interface down.  This turns out to be due to
mwl8k_stop() being called while the work queue item that was scheduled
by mwl8k_sta_notify() to remove the STA entry for the associated AP is
still queued, and the former disables interrupts so that when the
latter eventually runs, a command completion interrupt is never seen.

Fix this by changing ieee80211_stop_device() so that the workqueue is
flushed before drv_stop() is called, instead of doing it the other way
around as is done now.  (As ->stop() is allowed to sleep, there isn't
any reason for drivers to queue work from within it.)

Signed-off-by: Lennert Buytenhek <buytenh@marvell.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/util.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/mac80211/util.c b/net/mac80211/util.c
index 72a98e844718..a2ba6e29bd9a 100644
--- a/net/mac80211/util.c
+++ b/net/mac80211/util.c
@@ -1069,9 +1069,9 @@ void ieee80211_stop_device(struct ieee80211_local *local)
 	ieee80211_led_radio(local, false);
 
 	cancel_work_sync(&local->reconfig_filter);
-	drv_stop(local);
 
 	flush_workqueue(local->workqueue);
+	drv_stop(local);
 }
 
 int ieee80211_reconfig(struct ieee80211_local *local)
-- 
cgit v1.2.2


From 2d46d7c121436f1dafe91b0a8d9b99e534cfa5f8 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes@sipsolutions.net>
Date: Sun, 10 Jan 2010 17:12:41 +0100
Subject: mac80211: remove unused type argument

The type argument to DEBUGFS_ADD() isn't used
and can be removed, it's around from before
the conversion to debugfs_remove_recursive().

Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/debugfs_netdev.c | 38 +++++++++++++++++++-------------------
 1 file changed, 19 insertions(+), 19 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/debugfs_netdev.c b/net/mac80211/debugfs_netdev.c
index 1481049f0f71..9affe2cd185f 100644
--- a/net/mac80211/debugfs_netdev.c
+++ b/net/mac80211/debugfs_netdev.c
@@ -255,7 +255,7 @@ IEEE80211_IF_FILE(dot11MeshHWMPRootMode,
 #endif
 
 
-#define DEBUGFS_ADD(name, type) \
+#define DEBUGFS_ADD(name) \
 	debugfs_create_file(#name, 0400, sdata->debugfs.dir, \
 			    sdata, &name##_ops);
 
@@ -265,40 +265,40 @@ IEEE80211_IF_FILE(dot11MeshHWMPRootMode,
 
 static void add_sta_files(struct ieee80211_sub_if_data *sdata)
 {
-	DEBUGFS_ADD(drop_unencrypted, sta);
-	DEBUGFS_ADD(rc_rateidx_mask_2ghz, sta);
-	DEBUGFS_ADD(rc_rateidx_mask_5ghz, sta);
+	DEBUGFS_ADD(drop_unencrypted);
+	DEBUGFS_ADD(rc_rateidx_mask_2ghz);
+	DEBUGFS_ADD(rc_rateidx_mask_5ghz);
 
-	DEBUGFS_ADD(bssid, sta);
-	DEBUGFS_ADD(aid, sta);
+	DEBUGFS_ADD(bssid);
+	DEBUGFS_ADD(aid);
 	DEBUGFS_ADD_MODE(smps, 0600);
 }
 
 static void add_ap_files(struct ieee80211_sub_if_data *sdata)
 {
-	DEBUGFS_ADD(drop_unencrypted, ap);
-	DEBUGFS_ADD(rc_rateidx_mask_2ghz, ap);
-	DEBUGFS_ADD(rc_rateidx_mask_5ghz, ap);
+	DEBUGFS_ADD(drop_unencrypted);
+	DEBUGFS_ADD(rc_rateidx_mask_2ghz);
+	DEBUGFS_ADD(rc_rateidx_mask_5ghz);
 
-	DEBUGFS_ADD(num_sta_ps, ap);
-	DEBUGFS_ADD(dtim_count, ap);
-	DEBUGFS_ADD(num_buffered_multicast, ap);
+	DEBUGFS_ADD(num_sta_ps);
+	DEBUGFS_ADD(dtim_count);
+	DEBUGFS_ADD(num_buffered_multicast);
 }
 
 static void add_wds_files(struct ieee80211_sub_if_data *sdata)
 {
-	DEBUGFS_ADD(drop_unencrypted, wds);
-	DEBUGFS_ADD(rc_rateidx_mask_2ghz, wds);
-	DEBUGFS_ADD(rc_rateidx_mask_5ghz, wds);
+	DEBUGFS_ADD(drop_unencrypted);
+	DEBUGFS_ADD(rc_rateidx_mask_2ghz);
+	DEBUGFS_ADD(rc_rateidx_mask_5ghz);
 
-	DEBUGFS_ADD(peer, wds);
+	DEBUGFS_ADD(peer);
 }
 
 static void add_vlan_files(struct ieee80211_sub_if_data *sdata)
 {
-	DEBUGFS_ADD(drop_unencrypted, vlan);
-	DEBUGFS_ADD(rc_rateidx_mask_2ghz, vlan);
-	DEBUGFS_ADD(rc_rateidx_mask_5ghz, vlan);
+	DEBUGFS_ADD(drop_unencrypted);
+	DEBUGFS_ADD(rc_rateidx_mask_2ghz);
+	DEBUGFS_ADD(rc_rateidx_mask_5ghz);
 }
 
 static void add_monitor_files(struct ieee80211_sub_if_data *sdata)
-- 
cgit v1.2.2


From ab13315af97919fae0e014748105fdc2e30afb2d Mon Sep 17 00:00:00 2001
From: Kalle Valo <kalle.valo@nokia.com>
Date: Tue, 12 Jan 2010 10:42:31 +0200
Subject: mac80211: add U-APSD client support

Add Unscheduled Automatic Power-Save Delivery (U-APSD) client support. The
idea is that the data frames from the client trigger AP to send the buffered
frames with ACs which have U-APSD enabled. This decreases latency and makes it
possible to save even more power.

Driver needs to use IEEE80211_HW_UAPSD to enable the feature. The current
implementation assumes that firmware takes care of the wakeup and
hardware needing IEEE80211_HW_PS_NULLFUNC_STACK is not yet supported.

Tested with wl1251 on a Nokia N900 and Cisco Aironet 1231G AP and running
various test traffic with ping.

Signed-off-by: Kalle Valo <kalle.valo@nokia.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/cfg.c         |  7 +++++++
 net/mac80211/ieee80211_i.h | 13 ++++++++++++-
 net/mac80211/main.c        |  4 ++++
 net/mac80211/mlme.c        | 31 ++++++++++++++++++++++++++++---
 net/mac80211/scan.c        | 18 ++++++++++++++++++
 net/mac80211/util.c        |  2 ++
 net/mac80211/work.c        | 12 ++++++++++--
 7 files changed, 81 insertions(+), 6 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c
index dc12e9466ffd..8286df5822d5 100644
--- a/net/mac80211/cfg.c
+++ b/net/mac80211/cfg.c
@@ -1128,6 +1128,13 @@ static int ieee80211_set_txq_params(struct wiphy *wiphy,
 	p.cw_max = params->cwmax;
 	p.cw_min = params->cwmin;
 	p.txop = params->txop;
+
+	/*
+	 * Setting tx queue params disables u-apsd because it's only
+	 * called in master mode.
+	 */
+	p.uapsd = false;
+
 	if (drv_conf_tx(local, params->queue, &p)) {
 		printk(KERN_DEBUG "%s: failed to set TX queue "
 		       "parameters for queue %d\n",
diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index 3e4ac3f30857..3468e378509a 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -58,6 +58,15 @@ struct ieee80211_local;
 
 #define TU_TO_EXP_TIME(x)	(jiffies + usecs_to_jiffies((x) * 1024))
 
+#define IEEE80211_DEFAULT_UAPSD_QUEUES \
+	(IEEE80211_WMM_IE_STA_QOSINFO_AC_BK |	\
+	 IEEE80211_WMM_IE_STA_QOSINFO_AC_BE |	\
+	 IEEE80211_WMM_IE_STA_QOSINFO_AC_VI |	\
+	 IEEE80211_WMM_IE_STA_QOSINFO_AC_VO)
+
+#define IEEE80211_DEFAULT_MAX_SP_LEN		\
+	IEEE80211_WMM_IE_STA_QOSINFO_SP_ALL
+
 struct ieee80211_fragment_entry {
 	unsigned long first_frag_time;
 	unsigned int seq;
@@ -78,6 +87,7 @@ struct ieee80211_bss {
 	u8 dtim_period;
 
 	bool wmm_used;
+	bool uapsd_supported;
 
 	unsigned long last_probe_resp;
 
@@ -285,7 +295,7 @@ struct ieee80211_work {
 			u8 ssid[IEEE80211_MAX_SSID_LEN];
 			u8 ssid_len;
 			u8 supp_rates_len;
-			bool wmm_used, use_11n;
+			bool wmm_used, use_11n, uapsd_used;
 		} assoc;
 		struct {
 			u32 duration;
@@ -306,6 +316,7 @@ enum ieee80211_sta_flags {
 	IEEE80211_STA_DISABLE_11N	= BIT(4),
 	IEEE80211_STA_CSA_RECEIVED	= BIT(5),
 	IEEE80211_STA_MFP_ENABLED	= BIT(6),
+	IEEE80211_STA_UAPSD_ENABLED	= BIT(7),
 };
 
 struct ieee80211_if_managed {
diff --git a/net/mac80211/main.c b/net/mac80211/main.c
index 468829143991..0054bba08ce1 100644
--- a/net/mac80211/main.c
+++ b/net/mac80211/main.c
@@ -491,6 +491,10 @@ int ieee80211_register_hw(struct ieee80211_hw *hw)
 	else if (local->hw.flags & IEEE80211_HW_SIGNAL_UNSPEC)
 		local->hw.wiphy->signal_type = CFG80211_SIGNAL_TYPE_UNSPEC;
 
+	WARN((local->hw.flags & IEEE80211_HW_SUPPORTS_UAPSD)
+	     && (local->hw.flags & IEEE80211_HW_PS_NULLFUNC_STACK),
+	     "U-APSD not supported with HW_PS_NULLFUNC_STACK\n");
+
 	/*
 	 * Calculate scan IE length -- we need this to alloc
 	 * memory and to subtract from the driver limit. It
diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index 86f025bc9456..39c27d83a4f2 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -569,7 +569,7 @@ static void ieee80211_sta_wmm_params(struct ieee80211_local *local,
 	struct ieee80211_tx_queue_params params;
 	size_t left;
 	int count;
-	u8 *pos;
+	u8 *pos, uapsd_queues = 0;
 
 	if (local->hw.queues < 4)
 		return;
@@ -579,6 +579,10 @@ static void ieee80211_sta_wmm_params(struct ieee80211_local *local,
 
 	if (wmm_param_len < 8 || wmm_param[5] /* version */ != 1)
 		return;
+
+	if (ifmgd->flags & IEEE80211_STA_UAPSD_ENABLED)
+		uapsd_queues = IEEE80211_DEFAULT_UAPSD_QUEUES;
+
 	count = wmm_param[6] & 0x0f;
 	if (count == ifmgd->wmm_last_param_set)
 		return;
@@ -593,6 +597,7 @@ static void ieee80211_sta_wmm_params(struct ieee80211_local *local,
 	for (; left >= 4; left -= 4, pos += 4) {
 		int aci = (pos[0] >> 5) & 0x03;
 		int acm = (pos[0] >> 4) & 0x01;
+		bool uapsd = false;
 		int queue;
 
 		switch (aci) {
@@ -600,22 +605,30 @@ static void ieee80211_sta_wmm_params(struct ieee80211_local *local,
 			queue = 3;
 			if (acm)
 				local->wmm_acm |= BIT(1) | BIT(2); /* BK/- */
+			if (uapsd_queues & IEEE80211_WMM_IE_STA_QOSINFO_AC_BK)
+				uapsd = true;
 			break;
 		case 2: /* AC_VI */
 			queue = 1;
 			if (acm)
 				local->wmm_acm |= BIT(4) | BIT(5); /* CL/VI */
+			if (uapsd_queues & IEEE80211_WMM_IE_STA_QOSINFO_AC_VI)
+				uapsd = true;
 			break;
 		case 3: /* AC_VO */
 			queue = 0;
 			if (acm)
 				local->wmm_acm |= BIT(6) | BIT(7); /* VO/NC */
+			if (uapsd_queues & IEEE80211_WMM_IE_STA_QOSINFO_AC_VO)
+				uapsd = true;
 			break;
 		case 0: /* AC_BE */
 		default:
 			queue = 2;
 			if (acm)
 				local->wmm_acm |= BIT(0) | BIT(3); /* BE/EE */
+			if (uapsd_queues & IEEE80211_WMM_IE_STA_QOSINFO_AC_BE)
+				uapsd = true;
 			break;
 		}
 
@@ -623,11 +636,14 @@ static void ieee80211_sta_wmm_params(struct ieee80211_local *local,
 		params.cw_max = ecw2cw((pos[1] & 0xf0) >> 4);
 		params.cw_min = ecw2cw(pos[1] & 0x0f);
 		params.txop = get_unaligned_le16(pos + 2);
+		params.uapsd = uapsd;
+
 #ifdef CONFIG_MAC80211_VERBOSE_DEBUG
 		printk(KERN_DEBUG "%s: WMM queue=%d aci=%d acm=%d aifs=%d "
-		       "cWmin=%d cWmax=%d txop=%d\n",
+		       "cWmin=%d cWmax=%d txop=%d uapsd=%d\n",
 		       wiphy_name(local->hw.wiphy), queue, aci, acm,
-		       params.aifs, params.cw_min, params.cw_max, params.txop);
+		       params.aifs, params.cw_min, params.cw_max, params.txop,
+		       params.uapsd);
 #endif
 		if (drv_conf_tx(local, queue, &params) && local->ops->conf_tx)
 			printk(KERN_DEBUG "%s: failed to set TX queue "
@@ -1906,6 +1922,15 @@ int ieee80211_mgd_assoc(struct ieee80211_sub_if_data *sdata,
 	wk->assoc.ht_information_ie =
 		ieee80211_bss_get_ie(req->bss, WLAN_EID_HT_INFORMATION);
 
+	if (bss->wmm_used && bss->uapsd_supported &&
+	    (sdata->local->hw.flags & IEEE80211_HW_SUPPORTS_UAPSD)) {
+		wk->assoc.uapsd_used = true;
+		ifmgd->flags |= IEEE80211_STA_UAPSD_ENABLED;
+	} else {
+		wk->assoc.uapsd_used = false;
+		ifmgd->flags &= ~IEEE80211_STA_UAPSD_ENABLED;
+	}
+
 	ssid = ieee80211_bss_get_ie(req->bss, WLAN_EID_SSID);
 	memcpy(wk->assoc.ssid, ssid + 2, ssid[1]);
 	wk->assoc.ssid_len = ssid[1];
diff --git a/net/mac80211/scan.c b/net/mac80211/scan.c
index 30cb62bb45b3..9afe2f9885dc 100644
--- a/net/mac80211/scan.c
+++ b/net/mac80211/scan.c
@@ -54,6 +54,23 @@ void ieee80211_rx_bss_put(struct ieee80211_local *local,
 	cfg80211_put_bss(container_of((void *)bss, struct cfg80211_bss, priv));
 }
 
+static bool is_uapsd_supported(struct ieee802_11_elems *elems)
+{
+	u8 qos_info;
+
+	if (elems->wmm_info && elems->wmm_info_len == 7
+	    && elems->wmm_info[5] == 1)
+		qos_info = elems->wmm_info[6];
+	else if (elems->wmm_param && elems->wmm_param_len == 24
+		 && elems->wmm_param[5] == 1)
+		qos_info = elems->wmm_param[6];
+	else
+		/* no valid wmm information or parameter element found */
+		return false;
+
+	return qos_info & IEEE80211_WMM_IE_AP_QOSINFO_UAPSD;
+}
+
 struct ieee80211_bss *
 ieee80211_bss_info_update(struct ieee80211_local *local,
 			  struct ieee80211_rx_status *rx_status,
@@ -117,6 +134,7 @@ ieee80211_bss_info_update(struct ieee80211_local *local,
 	}
 
 	bss->wmm_used = elems->wmm_param || elems->wmm_info;
+	bss->uapsd_supported = is_uapsd_supported(elems);
 
 	if (!beacon)
 		bss->last_probe_resp = jiffies;
diff --git a/net/mac80211/util.c b/net/mac80211/util.c
index a2ba6e29bd9a..e278f97c8305 100644
--- a/net/mac80211/util.c
+++ b/net/mac80211/util.c
@@ -792,6 +792,8 @@ void ieee80211_set_wmm_default(struct ieee80211_sub_if_data *sdata)
 			break;
 		}
 
+		qparam.uapsd = false;
+
 		drv_conf_tx(local, queue, &qparam);
 	}
 }
diff --git a/net/mac80211/work.c b/net/mac80211/work.c
index 7c5d95b1bc04..a74fd6ee0083 100644
--- a/net/mac80211/work.c
+++ b/net/mac80211/work.c
@@ -202,7 +202,7 @@ static void ieee80211_send_assoc(struct ieee80211_sub_if_data *sdata,
 	struct ieee80211_local *local = sdata->local;
 	struct sk_buff *skb;
 	struct ieee80211_mgmt *mgmt;
-	u8 *pos;
+	u8 *pos, qos_info;
 	const u8 *ies;
 	size_t offset = 0, noffset;
 	int i, len, count, rates_len, supp_rates_len;
@@ -375,6 +375,14 @@ static void ieee80211_send_assoc(struct ieee80211_sub_if_data *sdata,
 	}
 
 	if (wk->assoc.wmm_used && local->hw.queues >= 4) {
+		if (wk->assoc.uapsd_used) {
+			qos_info = IEEE80211_DEFAULT_UAPSD_QUEUES;
+			qos_info |= (IEEE80211_DEFAULT_MAX_SP_LEN <<
+				     IEEE80211_WMM_IE_STA_QOSINFO_SP_SHIFT);
+		} else {
+			qos_info = 0;
+		}
+
 		pos = skb_put(skb, 9);
 		*pos++ = WLAN_EID_VENDOR_SPECIFIC;
 		*pos++ = 7; /* len */
@@ -384,7 +392,7 @@ static void ieee80211_send_assoc(struct ieee80211_sub_if_data *sdata,
 		*pos++ = 2; /* WME */
 		*pos++ = 0; /* WME info */
 		*pos++ = 1; /* WME ver */
-		*pos++ = 0;
+		*pos++ = qos_info;
 	}
 
 	/* add any remaining custom (i.e. vendor specific here) IEs */
-- 
cgit v1.2.2


From 50ae0cf15c3da2f6a8e4558de5010923e84736b2 Mon Sep 17 00:00:00 2001
From: Kalle Valo <kalle.valo@nokia.com>
Date: Tue, 12 Jan 2010 10:42:39 +0200
Subject: mac80211: add debugfs interface for U-APSD queue configuration

Because it's not yet decided how to configure which queues are U-APSD
enabled, add a debugfs interface for testing purposes.

Signed-off-by: Kalle Valo <kalle.valo@nokia.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/debugfs.c     | 94 ++++++++++++++++++++++++++++++++++++++++++++++
 net/mac80211/ieee80211_i.h | 14 +++++++
 net/mac80211/main.c        |  2 +
 net/mac80211/mlme.c        |  2 +-
 net/mac80211/work.c        |  4 +-
 5 files changed, 113 insertions(+), 3 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/debugfs.c b/net/mac80211/debugfs.c
index e4b54093d41b..b3bc32b62a5a 100644
--- a/net/mac80211/debugfs.c
+++ b/net/mac80211/debugfs.c
@@ -158,6 +158,98 @@ static const struct file_operations noack_ops = {
 	.open = mac80211_open_file_generic
 };
 
+static ssize_t uapsd_queues_read(struct file *file, char __user *user_buf,
+				 size_t count, loff_t *ppos)
+{
+	struct ieee80211_local *local = file->private_data;
+	int res;
+	char buf[10];
+
+	res = scnprintf(buf, sizeof(buf), "0x%x\n", local->uapsd_queues);
+
+	return simple_read_from_buffer(user_buf, count, ppos, buf, res);
+}
+
+static ssize_t uapsd_queues_write(struct file *file,
+				  const char __user *user_buf,
+				  size_t count, loff_t *ppos)
+{
+	struct ieee80211_local *local = file->private_data;
+	unsigned long val;
+	char buf[10];
+	size_t len;
+	int ret;
+
+	len = min(count, sizeof(buf) - 1);
+	if (copy_from_user(buf, user_buf, len))
+		return -EFAULT;
+	buf[len] = '\0';
+
+	ret = strict_strtoul(buf, 0, &val);
+
+	if (ret)
+		return -EINVAL;
+
+	if (val & ~IEEE80211_WMM_IE_STA_QOSINFO_AC_MASK)
+		return -ERANGE;
+
+	local->uapsd_queues = val;
+
+	return count;
+}
+
+static const struct file_operations uapsd_queues_ops = {
+	.read = uapsd_queues_read,
+	.write = uapsd_queues_write,
+	.open = mac80211_open_file_generic
+};
+
+static ssize_t uapsd_max_sp_len_read(struct file *file, char __user *user_buf,
+				     size_t count, loff_t *ppos)
+{
+	struct ieee80211_local *local = file->private_data;
+	int res;
+	char buf[10];
+
+	res = scnprintf(buf, sizeof(buf), "0x%x\n", local->uapsd_max_sp_len);
+
+	return simple_read_from_buffer(user_buf, count, ppos, buf, res);
+}
+
+static ssize_t uapsd_max_sp_len_write(struct file *file,
+				      const char __user *user_buf,
+				      size_t count, loff_t *ppos)
+{
+	struct ieee80211_local *local = file->private_data;
+	unsigned long val;
+	char buf[10];
+	size_t len;
+	int ret;
+
+	len = min(count, sizeof(buf) - 1);
+	if (copy_from_user(buf, user_buf, len))
+		return -EFAULT;
+	buf[len] = '\0';
+
+	ret = strict_strtoul(buf, 0, &val);
+
+	if (ret)
+		return -EINVAL;
+
+	if (val & ~IEEE80211_WMM_IE_STA_QOSINFO_SP_MASK)
+		return -ERANGE;
+
+	local->uapsd_max_sp_len = val;
+
+	return count;
+}
+
+static const struct file_operations uapsd_max_sp_len_ops = {
+	.read = uapsd_max_sp_len_read,
+	.write = uapsd_max_sp_len_write,
+	.open = mac80211_open_file_generic
+};
+
 static ssize_t queues_read(struct file *file, char __user *user_buf,
 			   size_t count, loff_t *ppos)
 {
@@ -314,6 +406,8 @@ void debugfs_hw_add(struct ieee80211_local *local)
 	DEBUGFS_ADD(queues);
 	DEBUGFS_ADD_MODE(reset, 0200);
 	DEBUGFS_ADD(noack);
+	DEBUGFS_ADD(uapsd_queues);
+	DEBUGFS_ADD(uapsd_max_sp_len);
 
 	statsd = debugfs_create_dir("statistics", phyd);
 
diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index 3468e378509a..c18f576f1848 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -808,6 +808,20 @@ struct ieee80211_local {
 	int wifi_wme_noack_test;
 	unsigned int wmm_acm; /* bit field of ACM bits (BIT(802.1D tag)) */
 
+	/*
+	 * Bitmask of enabled u-apsd queues,
+	 * IEEE80211_WMM_IE_STA_QOSINFO_AC_BE & co. Needs a new association
+	 * to take effect.
+	 */
+	unsigned int uapsd_queues;
+
+	/*
+	 * Maximum number of buffered frames AP can deliver during a
+	 * service period, IEEE80211_WMM_IE_STA_QOSINFO_SP_ALL or similar.
+	 * Needs a new association to take effect.
+	 */
+	unsigned int uapsd_max_sp_len;
+
 	bool pspolling;
 	bool offchannel_ps_enabled;
 	/*
diff --git a/net/mac80211/main.c b/net/mac80211/main.c
index 0054bba08ce1..ec8f767ba95b 100644
--- a/net/mac80211/main.c
+++ b/net/mac80211/main.c
@@ -384,6 +384,8 @@ struct ieee80211_hw *ieee80211_alloc_hw(size_t priv_data_len,
 	local->hw.conf.long_frame_max_tx_count = wiphy->retry_long;
 	local->hw.conf.short_frame_max_tx_count = wiphy->retry_short;
 	local->user_power_level = -1;
+	local->uapsd_queues = IEEE80211_DEFAULT_UAPSD_QUEUES;
+	local->uapsd_max_sp_len = IEEE80211_DEFAULT_MAX_SP_LEN;
 
 	INIT_LIST_HEAD(&local->interfaces);
 	mutex_init(&local->iflist_mtx);
diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index 39c27d83a4f2..2746391248d3 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -581,7 +581,7 @@ static void ieee80211_sta_wmm_params(struct ieee80211_local *local,
 		return;
 
 	if (ifmgd->flags & IEEE80211_STA_UAPSD_ENABLED)
-		uapsd_queues = IEEE80211_DEFAULT_UAPSD_QUEUES;
+		uapsd_queues = local->uapsd_queues;
 
 	count = wmm_param[6] & 0x0f;
 	if (count == ifmgd->wmm_last_param_set)
diff --git a/net/mac80211/work.c b/net/mac80211/work.c
index a74fd6ee0083..81bd5d592bb4 100644
--- a/net/mac80211/work.c
+++ b/net/mac80211/work.c
@@ -376,8 +376,8 @@ static void ieee80211_send_assoc(struct ieee80211_sub_if_data *sdata,
 
 	if (wk->assoc.wmm_used && local->hw.queues >= 4) {
 		if (wk->assoc.uapsd_used) {
-			qos_info = IEEE80211_DEFAULT_UAPSD_QUEUES;
-			qos_info |= (IEEE80211_DEFAULT_MAX_SP_LEN <<
+			qos_info = local->uapsd_queues;
+			qos_info |= (local->uapsd_max_sp_len <<
 				     IEEE80211_WMM_IE_STA_QOSINFO_SP_SHIFT);
 		} else {
 			qos_info = 0;
-- 
cgit v1.2.2


From 5c1b98a52c3af1044c2d3842af8bae9a89502ca9 Mon Sep 17 00:00:00 2001
From: Kalle Valo <kalle.valo@nokia.com>
Date: Tue, 12 Jan 2010 10:42:46 +0200
Subject: mac80211: create tx handler for dynamic ps

Currently dynamic ps check is in ieee80211_xmit(), but it's cleaner
to have a separate tx handler for this. Also this is a prerequisite for
U-APSD client mode which needs to know the queue frame is in.

Also need_dynamic_ps() function is embedded to the tx handler.

No functional changes expect that the code is run in a later phase than
originally.

Signed-off-by: Kalle Valo <kalle.valo@nokia.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/tx.c | 81 ++++++++++++++++++++++++++++---------------------------
 1 file changed, 41 insertions(+), 40 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
index 47ca59e52e71..6fcc85a2806a 100644
--- a/net/mac80211/tx.c
+++ b/net/mac80211/tx.c
@@ -180,6 +180,46 @@ static int inline is_ieee80211_device(struct ieee80211_local *local,
 }
 
 /* tx handlers */
+static ieee80211_tx_result debug_noinline
+ieee80211_tx_h_dynamic_ps(struct ieee80211_tx_data *tx)
+{
+	struct ieee80211_local *local = tx->local;
+
+	/* driver doesn't support power save */
+	if (!(local->hw.flags & IEEE80211_HW_SUPPORTS_PS))
+		return TX_CONTINUE;
+
+	/* hardware does dynamic power save */
+	if (local->hw.flags & IEEE80211_HW_SUPPORTS_DYNAMIC_PS)
+		return TX_CONTINUE;
+
+	/* dynamic power save disabled */
+	if (local->hw.conf.dynamic_ps_timeout <= 0)
+		return TX_CONTINUE;
+
+	/* we are scanning, don't enable power save */
+	if (local->scanning)
+		return TX_CONTINUE;
+
+	if (!local->ps_sdata)
+		return TX_CONTINUE;
+
+	/* No point if we're going to suspend */
+	if (local->quiescing)
+		return TX_CONTINUE;
+
+	if (local->hw.conf.flags & IEEE80211_CONF_PS) {
+		ieee80211_stop_queues_by_reason(&local->hw,
+						IEEE80211_QUEUE_STOP_REASON_PS);
+		ieee80211_queue_work(&local->hw,
+				     &local->dynamic_ps_disable_work);
+	}
+
+	mod_timer(&local->dynamic_ps_timer, jiffies +
+		  msecs_to_jiffies(local->hw.conf.dynamic_ps_timeout));
+
+	return TX_CONTINUE;
+}
 
 static ieee80211_tx_result debug_noinline
 ieee80211_tx_h_check_assoc(struct ieee80211_tx_data *tx)
@@ -1223,6 +1263,7 @@ static int invoke_tx_handlers(struct ieee80211_tx_data *tx)
 			goto txh_done;	\
 	} while (0)
 
+	CALL_TXH(ieee80211_tx_h_dynamic_ps);
 	CALL_TXH(ieee80211_tx_h_check_assoc);
 	CALL_TXH(ieee80211_tx_h_ps_buf);
 	CALL_TXH(ieee80211_tx_h_select_key);
@@ -1405,34 +1446,6 @@ static int ieee80211_skb_resize(struct ieee80211_local *local,
 	return 0;
 }
 
-static bool need_dynamic_ps(struct ieee80211_local *local)
-{
-	/* driver doesn't support power save */
-	if (!(local->hw.flags & IEEE80211_HW_SUPPORTS_PS))
-		return false;
-
-	/* hardware does dynamic power save */
-	if (local->hw.flags & IEEE80211_HW_SUPPORTS_DYNAMIC_PS)
-		return false;
-
-	/* dynamic power save disabled */
-	if (local->hw.conf.dynamic_ps_timeout <= 0)
-		return false;
-
-	/* we are scanning, don't enable power save */
-	if (local->scanning)
-		return false;
-
-	if (!local->ps_sdata)
-		return false;
-
-	/* No point if we're going to suspend */
-	if (local->quiescing)
-		return false;
-
-	return true;
-}
-
 static void ieee80211_xmit(struct ieee80211_sub_if_data *sdata,
 			   struct sk_buff *skb)
 {
@@ -1443,18 +1456,6 @@ static void ieee80211_xmit(struct ieee80211_sub_if_data *sdata,
 	int headroom;
 	bool may_encrypt;
 
-	if (need_dynamic_ps(local)) {
-		if (local->hw.conf.flags & IEEE80211_CONF_PS) {
-			ieee80211_stop_queues_by_reason(&local->hw,
-					IEEE80211_QUEUE_STOP_REASON_PS);
-			ieee80211_queue_work(&local->hw,
-					&local->dynamic_ps_disable_work);
-		}
-
-		mod_timer(&local->dynamic_ps_timer, jiffies +
-		        msecs_to_jiffies(local->hw.conf.dynamic_ps_timeout));
-	}
-
 	rcu_read_lock();
 
 	if (unlikely(sdata->vif.type == NL80211_IFTYPE_MONITOR)) {
-- 
cgit v1.2.2


From 0c74211d19d83729c209ddcd4dc026c2aedeb29e Mon Sep 17 00:00:00 2001
From: Kalle Valo <kalle.valo@nokia.com>
Date: Tue, 12 Jan 2010 10:42:53 +0200
Subject: mac80211: check uapsd state for dynamic power save

To make U-APSD client mode effective, we must not wake up from dynamic power
save when transmitting frames. So if dynamic power save is enabled, it needs
check the queue the transmitted packet is in and decide if we need to wake
up or not.

In a perfect world, where all packets would have correct QoS tags, U-APSD
enabled queues should not trigger wakeup from power save. But in the real
world, where very few packets have correct QoS tags, this won't work. For
example, if only voip class has U-APSD enabled and we send a packet in voip
class, but the packets we receive are in best effort class, we would receive
the packets with the legacy power save method. And that would increase
latencies too much from a voip application point of view.

The workaround is to enable U-APSD for all qeueus and still use dynamic ps
wakeup for all other queues except voip. That way we can still save power
with a voip application and not sacrifice latency. Normal traffic (in
background, best effort or video class) would still trigger wakeup from
dynamic power save.

Signed-off-by: Kalle Valo <kalle.valo@nokia.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/tx.c | 25 +++++++++++++++++++++++++
 1 file changed, 25 insertions(+)

(limited to 'net')

diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
index 6fcc85a2806a..daf81048c1f7 100644
--- a/net/mac80211/tx.c
+++ b/net/mac80211/tx.c
@@ -184,6 +184,7 @@ static ieee80211_tx_result debug_noinline
 ieee80211_tx_h_dynamic_ps(struct ieee80211_tx_data *tx)
 {
 	struct ieee80211_local *local = tx->local;
+	struct ieee80211_if_managed *ifmgd;
 
 	/* driver doesn't support power save */
 	if (!(local->hw.flags & IEEE80211_HW_SUPPORTS_PS))
@@ -208,6 +209,30 @@ ieee80211_tx_h_dynamic_ps(struct ieee80211_tx_data *tx)
 	if (local->quiescing)
 		return TX_CONTINUE;
 
+	/* dynamic ps is supported only in managed mode */
+	if (tx->sdata->vif.type != NL80211_IFTYPE_STATION)
+		return TX_CONTINUE;
+
+	ifmgd = &tx->sdata->u.mgd;
+
+	/*
+	 * Don't wakeup from power save if u-apsd is enabled, voip ac has
+	 * u-apsd enabled and the frame is in voip class. This effectively
+	 * means that even if all access categories have u-apsd enabled, in
+	 * practise u-apsd is only used with the voip ac. This is a
+	 * workaround for the case when received voip class packets do not
+	 * have correct qos tag for some reason, due the network or the
+	 * peer application.
+	 *
+	 * Note: local->uapsd_queues access is racy here. If the value is
+	 * changed via debugfs, user needs to reassociate manually to have
+	 * everything in sync.
+	 */
+	if ((ifmgd->flags & IEEE80211_STA_UAPSD_ENABLED)
+	    && (local->uapsd_queues & IEEE80211_WMM_IE_STA_QOSINFO_AC_VO)
+	    && skb_get_queue_mapping(tx->skb) == 0)
+		return TX_CONTINUE;
+
 	if (local->hw.conf.flags & IEEE80211_CONF_PS) {
 		ieee80211_stop_queues_by_reason(&local->hw,
 						IEEE80211_QUEUE_STOP_REASON_PS);
-- 
cgit v1.2.2


From cd8c20b650f49354722b8cc1f03320b004815a0a Mon Sep 17 00:00:00 2001
From: Alexey Dobriyan <adobriyan@gmail.com>
Date: Wed, 13 Jan 2010 16:02:14 +0100
Subject: netfilter: nfnetlink: netns support

Make nfnl socket per-petns.

Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 net/netfilter/nf_conntrack_netlink.c | 13 ++++----
 net/netfilter/nfnetlink.c            | 65 +++++++++++++++++++++++-------------
 net/netfilter/nfnetlink_log.c        |  3 +-
 net/netfilter/nfnetlink_queue.c      |  2 +-
 4 files changed, 52 insertions(+), 31 deletions(-)

(limited to 'net')

diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index 59d8064eb522..d4c5d06677f9 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -482,7 +482,7 @@ ctnetlink_conntrack_event(unsigned int events, struct nf_ct_event *item)
 	} else
 		return 0;
 
-	if (!item->report && !nfnetlink_has_listeners(group))
+	if (!item->report && !nfnetlink_has_listeners(&init_net, group))
 		return 0;
 
 	skb = nlmsg_new(ctnetlink_nlmsg_size(ct), GFP_ATOMIC);
@@ -559,7 +559,8 @@ ctnetlink_conntrack_event(unsigned int events, struct nf_ct_event *item)
 	rcu_read_unlock();
 
 	nlmsg_end(skb, nlh);
-	err = nfnetlink_send(skb, item->pid, group, item->report, GFP_ATOMIC);
+	err = nfnetlink_send(skb, &init_net, item->pid, group, item->report,
+			     GFP_ATOMIC);
 	if (err == -ENOBUFS || err == -EAGAIN)
 		return -ENOBUFS;
 
@@ -571,7 +572,7 @@ nla_put_failure:
 nlmsg_failure:
 	kfree_skb(skb);
 errout:
-	nfnetlink_set_err(0, group, -ENOBUFS);
+	nfnetlink_set_err(&init_net, 0, group, -ENOBUFS);
 	return 0;
 }
 #endif /* CONFIG_NF_CONNTRACK_EVENTS */
@@ -1539,7 +1540,7 @@ ctnetlink_expect_event(unsigned int events, struct nf_exp_event *item)
 		return 0;
 
 	if (!item->report &&
-	    !nfnetlink_has_listeners(NFNLGRP_CONNTRACK_EXP_NEW))
+	    !nfnetlink_has_listeners(&init_net, NFNLGRP_CONNTRACK_EXP_NEW))
 		return 0;
 
 	skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_ATOMIC);
@@ -1562,7 +1563,7 @@ ctnetlink_expect_event(unsigned int events, struct nf_exp_event *item)
 	rcu_read_unlock();
 
 	nlmsg_end(skb, nlh);
-	nfnetlink_send(skb, item->pid, NFNLGRP_CONNTRACK_EXP_NEW,
+	nfnetlink_send(skb, &init_net, item->pid, NFNLGRP_CONNTRACK_EXP_NEW,
 		       item->report, GFP_ATOMIC);
 	return 0;
 
@@ -1572,7 +1573,7 @@ nla_put_failure:
 nlmsg_failure:
 	kfree_skb(skb);
 errout:
-	nfnetlink_set_err(0, 0, -ENOBUFS);
+	nfnetlink_set_err(&init_net, 0, 0, -ENOBUFS);
 	return 0;
 }
 #endif
diff --git a/net/netfilter/nfnetlink.c b/net/netfilter/nfnetlink.c
index eedc0c1ac7a4..8eb0cc23ada3 100644
--- a/net/netfilter/nfnetlink.c
+++ b/net/netfilter/nfnetlink.c
@@ -40,7 +40,6 @@ MODULE_ALIAS_NET_PF_PROTO(PF_NETLINK, NETLINK_NETFILTER);
 
 static char __initdata nfversion[] = "0.30";
 
-static struct sock *nfnl = NULL;
 static const struct nfnetlink_subsystem *subsys_table[NFNL_SUBSYS_COUNT];
 static DEFINE_MUTEX(nfnl_mutex);
 
@@ -101,34 +100,35 @@ nfnetlink_find_client(u_int16_t type, const struct nfnetlink_subsystem *ss)
 	return &ss->cb[cb_id];
 }
 
-int nfnetlink_has_listeners(unsigned int group)
+int nfnetlink_has_listeners(struct net *net, unsigned int group)
 {
-	return netlink_has_listeners(nfnl, group);
+	return netlink_has_listeners(net->nfnl, group);
 }
 EXPORT_SYMBOL_GPL(nfnetlink_has_listeners);
 
-int nfnetlink_send(struct sk_buff *skb, u32 pid,
+int nfnetlink_send(struct sk_buff *skb, struct net *net, u32 pid,
 		   unsigned group, int echo, gfp_t flags)
 {
-	return nlmsg_notify(nfnl, skb, pid, group, echo, flags);
+	return nlmsg_notify(net->nfnl, skb, pid, group, echo, flags);
 }
 EXPORT_SYMBOL_GPL(nfnetlink_send);
 
-void nfnetlink_set_err(u32 pid, u32 group, int error)
+void nfnetlink_set_err(struct net *net, u32 pid, u32 group, int error)
 {
-	netlink_set_err(nfnl, pid, group, error);
+	netlink_set_err(net->nfnl, pid, group, error);
 }
 EXPORT_SYMBOL_GPL(nfnetlink_set_err);
 
-int nfnetlink_unicast(struct sk_buff *skb, u_int32_t pid, int flags)
+int nfnetlink_unicast(struct sk_buff *skb, struct net *net, u_int32_t pid, int flags)
 {
-	return netlink_unicast(nfnl, skb, pid, flags);
+	return netlink_unicast(net->nfnl, skb, pid, flags);
 }
 EXPORT_SYMBOL_GPL(nfnetlink_unicast);
 
 /* Process one complete nfnetlink message. */
 static int nfnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
 {
+	struct net *net = sock_net(skb->sk);
 	const struct nfnl_callback *nc;
 	const struct nfnetlink_subsystem *ss;
 	int type, err;
@@ -170,7 +170,7 @@ replay:
 		if (err < 0)
 			return err;
 
-		err = nc->call(nfnl, skb, nlh, (const struct nlattr **)cda);
+		err = nc->call(net->nfnl, skb, nlh, (const struct nlattr **)cda);
 		if (err == -EAGAIN)
 			goto replay;
 		return err;
@@ -184,26 +184,45 @@ static void nfnetlink_rcv(struct sk_buff *skb)
 	nfnl_unlock();
 }
 
-static void __exit nfnetlink_exit(void)
+static int __net_init nfnetlink_net_init(struct net *net)
 {
-	printk("Removing netfilter NETLINK layer.\n");
-	netlink_kernel_release(nfnl);
-	return;
+	struct sock *nfnl;
+
+	nfnl = netlink_kernel_create(net, NETLINK_NETFILTER, NFNLGRP_MAX,
+				     nfnetlink_rcv, NULL, THIS_MODULE);
+	if (!nfnl)
+		return -ENOMEM;
+	net->nfnl_stash = nfnl;
+	rcu_assign_pointer(net->nfnl, nfnl);
+	return 0;
 }
 
-static int __init nfnetlink_init(void)
+static void __net_exit nfnetlink_net_exit_batch(struct list_head *net_exit_list)
 {
-	printk("Netfilter messages via NETLINK v%s.\n", nfversion);
+	struct net *net;
 
-	nfnl = netlink_kernel_create(&init_net, NETLINK_NETFILTER, NFNLGRP_MAX,
-				     nfnetlink_rcv, NULL, THIS_MODULE);
-	if (!nfnl) {
-		printk(KERN_ERR "cannot initialize nfnetlink!\n");
-		return -ENOMEM;
-	}
+	list_for_each_entry(net, net_exit_list, exit_list)
+		rcu_assign_pointer(net->nfnl, NULL);
+	synchronize_net();
+	list_for_each_entry(net, net_exit_list, exit_list)
+		netlink_kernel_release(net->nfnl_stash);
+}
 
-	return 0;
+static struct pernet_operations nfnetlink_net_ops = {
+	.init		= nfnetlink_net_init,
+	.exit_batch	= nfnetlink_net_exit_batch,
+};
+
+static int __init nfnetlink_init(void)
+{
+	printk("Netfilter messages via NETLINK v%s.\n", nfversion);
+	return register_pernet_subsys(&nfnetlink_net_ops);
 }
 
+static void __exit nfnetlink_exit(void)
+{
+	printk("Removing netfilter NETLINK layer.\n");
+	unregister_pernet_subsys(&nfnetlink_net_ops);
+}
 module_init(nfnetlink_init);
 module_exit(nfnetlink_exit);
diff --git a/net/netfilter/nfnetlink_log.c b/net/netfilter/nfnetlink_log.c
index 9de0470d557e..285e9029a9ff 100644
--- a/net/netfilter/nfnetlink_log.c
+++ b/net/netfilter/nfnetlink_log.c
@@ -323,7 +323,8 @@ __nfulnl_send(struct nfulnl_instance *inst)
 			  NLMSG_DONE,
 			  sizeof(struct nfgenmsg));
 
-	status = nfnetlink_unicast(inst->skb, inst->peer_pid, MSG_DONTWAIT);
+	status = nfnetlink_unicast(inst->skb, &init_net, inst->peer_pid,
+				   MSG_DONTWAIT);
 
 	inst->qlen = 0;
 	inst->skb = NULL;
diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c
index 7e3fa410641e..5c589b27d6eb 100644
--- a/net/netfilter/nfnetlink_queue.c
+++ b/net/netfilter/nfnetlink_queue.c
@@ -420,7 +420,7 @@ nfqnl_enqueue_packet(struct nf_queue_entry *entry, unsigned int queuenum)
 	}
 
 	/* nfnetlink_unicast will either free the nskb or add it to a socket */
-	err = nfnetlink_unicast(nskb, queue->peer_pid, MSG_DONTWAIT);
+	err = nfnetlink_unicast(nskb, &init_net, queue->peer_pid, MSG_DONTWAIT);
 	if (err < 0) {
 		queue->queue_user_dropped++;
 		goto err_out_unlock;
-- 
cgit v1.2.2


From 9592a5c01e79dbc59eb56fa26b124e94ffcd0962 Mon Sep 17 00:00:00 2001
From: Alexey Dobriyan <adobriyan@gmail.com>
Date: Wed, 13 Jan 2010 16:04:18 +0100
Subject: netfilter: ctnetlink: netns support

Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 net/netfilter/nf_conntrack_netlink.c | 65 +++++++++++++++++++++---------------
 1 file changed, 39 insertions(+), 26 deletions(-)

(limited to 'net')

diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index d4c5d06677f9..79478dfba27e 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -30,6 +30,7 @@
 
 #include <linux/netfilter.h>
 #include <net/netlink.h>
+#include <net/sock.h>
 #include <net/netfilter/nf_conntrack.h>
 #include <net/netfilter/nf_conntrack_core.h>
 #include <net/netfilter/nf_conntrack_expect.h>
@@ -456,6 +457,7 @@ ctnetlink_nlmsg_size(const struct nf_conn *ct)
 static int
 ctnetlink_conntrack_event(unsigned int events, struct nf_ct_event *item)
 {
+	struct net *net;
 	struct nlmsghdr *nlh;
 	struct nfgenmsg *nfmsg;
 	struct nlattr *nest_parms;
@@ -482,7 +484,8 @@ ctnetlink_conntrack_event(unsigned int events, struct nf_ct_event *item)
 	} else
 		return 0;
 
-	if (!item->report && !nfnetlink_has_listeners(&init_net, group))
+	net = nf_ct_net(ct);
+	if (!item->report && !nfnetlink_has_listeners(net, group))
 		return 0;
 
 	skb = nlmsg_new(ctnetlink_nlmsg_size(ct), GFP_ATOMIC);
@@ -559,7 +562,7 @@ ctnetlink_conntrack_event(unsigned int events, struct nf_ct_event *item)
 	rcu_read_unlock();
 
 	nlmsg_end(skb, nlh);
-	err = nfnetlink_send(skb, &init_net, item->pid, group, item->report,
+	err = nfnetlink_send(skb, net, item->pid, group, item->report,
 			     GFP_ATOMIC);
 	if (err == -ENOBUFS || err == -EAGAIN)
 		return -ENOBUFS;
@@ -572,7 +575,7 @@ nla_put_failure:
 nlmsg_failure:
 	kfree_skb(skb);
 errout:
-	nfnetlink_set_err(&init_net, 0, group, -ENOBUFS);
+	nfnetlink_set_err(net, 0, group, -ENOBUFS);
 	return 0;
 }
 #endif /* CONFIG_NF_CONNTRACK_EVENTS */
@@ -587,6 +590,7 @@ static int ctnetlink_done(struct netlink_callback *cb)
 static int
 ctnetlink_dump_table(struct sk_buff *skb, struct netlink_callback *cb)
 {
+	struct net *net = sock_net(skb->sk);
 	struct nf_conn *ct, *last;
 	struct nf_conntrack_tuple_hash *h;
 	struct hlist_nulls_node *n;
@@ -597,7 +601,7 @@ ctnetlink_dump_table(struct sk_buff *skb, struct netlink_callback *cb)
 	last = (struct nf_conn *)cb->args[1];
 	for (; cb->args[0] < nf_conntrack_htable_size; cb->args[0]++) {
 restart:
-		hlist_nulls_for_each_entry_rcu(h, n, &init_net.ct.hash[cb->args[0]],
+		hlist_nulls_for_each_entry_rcu(h, n, &net->ct.hash[cb->args[0]],
 					 hnnode) {
 			if (NF_CT_DIRECTION(h) != IP_CT_DIR_ORIGINAL)
 				continue;
@@ -769,6 +773,7 @@ ctnetlink_del_conntrack(struct sock *ctnl, struct sk_buff *skb,
 			const struct nlmsghdr *nlh,
 			const struct nlattr * const cda[])
 {
+	struct net *net = sock_net(ctnl);
 	struct nf_conntrack_tuple_hash *h;
 	struct nf_conntrack_tuple tuple;
 	struct nf_conn *ct;
@@ -782,7 +787,7 @@ ctnetlink_del_conntrack(struct sock *ctnl, struct sk_buff *skb,
 		err = ctnetlink_parse_tuple(cda, &tuple, CTA_TUPLE_REPLY, u3);
 	else {
 		/* Flush the whole table */
-		nf_conntrack_flush_report(&init_net,
+		nf_conntrack_flush_report(net,
 					 NETLINK_CB(skb).pid,
 					 nlmsg_report(nlh));
 		return 0;
@@ -791,7 +796,7 @@ ctnetlink_del_conntrack(struct sock *ctnl, struct sk_buff *skb,
 	if (err < 0)
 		return err;
 
-	h = nf_conntrack_find_get(&init_net, &tuple);
+	h = nf_conntrack_find_get(net, &tuple);
 	if (!h)
 		return -ENOENT;
 
@@ -829,6 +834,7 @@ ctnetlink_get_conntrack(struct sock *ctnl, struct sk_buff *skb,
 			const struct nlmsghdr *nlh,
 			const struct nlattr * const cda[])
 {
+	struct net *net = sock_net(ctnl);
 	struct nf_conntrack_tuple_hash *h;
 	struct nf_conntrack_tuple tuple;
 	struct nf_conn *ct;
@@ -851,7 +857,7 @@ ctnetlink_get_conntrack(struct sock *ctnl, struct sk_buff *skb,
 	if (err < 0)
 		return err;
 
-	h = nf_conntrack_find_get(&init_net, &tuple);
+	h = nf_conntrack_find_get(net, &tuple);
 	if (!h)
 		return -ENOENT;
 
@@ -1176,7 +1182,8 @@ ctnetlink_change_conntrack(struct nf_conn *ct,
 }
 
 static struct nf_conn *
-ctnetlink_create_conntrack(const struct nlattr * const cda[],
+ctnetlink_create_conntrack(struct net *net,
+			   const struct nlattr * const cda[],
 			   struct nf_conntrack_tuple *otuple,
 			   struct nf_conntrack_tuple *rtuple,
 			   u8 u3)
@@ -1185,7 +1192,7 @@ ctnetlink_create_conntrack(const struct nlattr * const cda[],
 	int err = -EINVAL;
 	struct nf_conntrack_helper *helper;
 
-	ct = nf_conntrack_alloc(&init_net, otuple, rtuple, GFP_ATOMIC);
+	ct = nf_conntrack_alloc(net, otuple, rtuple, GFP_ATOMIC);
 	if (IS_ERR(ct))
 		return ERR_PTR(-ENOMEM);
 
@@ -1286,7 +1293,7 @@ ctnetlink_create_conntrack(const struct nlattr * const cda[],
 		if (err < 0)
 			goto err2;
 
-		master_h = nf_conntrack_find_get(&init_net, &master);
+		master_h = nf_conntrack_find_get(net, &master);
 		if (master_h == NULL) {
 			err = -ENOENT;
 			goto err2;
@@ -1314,6 +1321,7 @@ ctnetlink_new_conntrack(struct sock *ctnl, struct sk_buff *skb,
 			const struct nlmsghdr *nlh,
 			const struct nlattr * const cda[])
 {
+	struct net *net = sock_net(ctnl);
 	struct nf_conntrack_tuple otuple, rtuple;
 	struct nf_conntrack_tuple_hash *h = NULL;
 	struct nfgenmsg *nfmsg = nlmsg_data(nlh);
@@ -1334,9 +1342,9 @@ ctnetlink_new_conntrack(struct sock *ctnl, struct sk_buff *skb,
 
 	spin_lock_bh(&nf_conntrack_lock);
 	if (cda[CTA_TUPLE_ORIG])
-		h = __nf_conntrack_find(&init_net, &otuple);
+		h = __nf_conntrack_find(net, &otuple);
 	else if (cda[CTA_TUPLE_REPLY])
-		h = __nf_conntrack_find(&init_net, &rtuple);
+		h = __nf_conntrack_find(net, &rtuple);
 
 	if (h == NULL) {
 		err = -ENOENT;
@@ -1344,7 +1352,7 @@ ctnetlink_new_conntrack(struct sock *ctnl, struct sk_buff *skb,
 			struct nf_conn *ct;
 			enum ip_conntrack_events events;
 
-			ct = ctnetlink_create_conntrack(cda, &otuple,
+			ct = ctnetlink_create_conntrack(net, cda, &otuple,
 							&rtuple, u3);
 			if (IS_ERR(ct)) {
 				err = PTR_ERR(ct);
@@ -1526,9 +1534,10 @@ nla_put_failure:
 static int
 ctnetlink_expect_event(unsigned int events, struct nf_exp_event *item)
 {
+	struct nf_conntrack_expect *exp = item->exp;
+	struct net *net = nf_ct_exp_net(exp);
 	struct nlmsghdr *nlh;
 	struct nfgenmsg *nfmsg;
-	struct nf_conntrack_expect *exp = item->exp;
 	struct sk_buff *skb;
 	unsigned int type;
 	int flags = 0;
@@ -1540,7 +1549,7 @@ ctnetlink_expect_event(unsigned int events, struct nf_exp_event *item)
 		return 0;
 
 	if (!item->report &&
-	    !nfnetlink_has_listeners(&init_net, NFNLGRP_CONNTRACK_EXP_NEW))
+	    !nfnetlink_has_listeners(net, NFNLGRP_CONNTRACK_EXP_NEW))
 		return 0;
 
 	skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_ATOMIC);
@@ -1563,7 +1572,7 @@ ctnetlink_expect_event(unsigned int events, struct nf_exp_event *item)
 	rcu_read_unlock();
 
 	nlmsg_end(skb, nlh);
-	nfnetlink_send(skb, &init_net, item->pid, NFNLGRP_CONNTRACK_EXP_NEW,
+	nfnetlink_send(skb, net, item->pid, NFNLGRP_CONNTRACK_EXP_NEW,
 		       item->report, GFP_ATOMIC);
 	return 0;
 
@@ -1573,7 +1582,7 @@ nla_put_failure:
 nlmsg_failure:
 	kfree_skb(skb);
 errout:
-	nfnetlink_set_err(&init_net, 0, 0, -ENOBUFS);
+	nfnetlink_set_err(net, 0, 0, -ENOBUFS);
 	return 0;
 }
 #endif
@@ -1587,7 +1596,7 @@ static int ctnetlink_exp_done(struct netlink_callback *cb)
 static int
 ctnetlink_exp_dump_table(struct sk_buff *skb, struct netlink_callback *cb)
 {
-	struct net *net = &init_net;
+	struct net *net = sock_net(skb->sk);
 	struct nf_conntrack_expect *exp, *last;
 	struct nfgenmsg *nfmsg = nlmsg_data(cb->nlh);
 	struct hlist_node *n;
@@ -1640,6 +1649,7 @@ ctnetlink_get_expect(struct sock *ctnl, struct sk_buff *skb,
 		     const struct nlmsghdr *nlh,
 		     const struct nlattr * const cda[])
 {
+	struct net *net = sock_net(ctnl);
 	struct nf_conntrack_tuple tuple;
 	struct nf_conntrack_expect *exp;
 	struct sk_buff *skb2;
@@ -1661,7 +1671,7 @@ ctnetlink_get_expect(struct sock *ctnl, struct sk_buff *skb,
 	if (err < 0)
 		return err;
 
-	exp = nf_ct_expect_find_get(&init_net, &tuple);
+	exp = nf_ct_expect_find_get(net, &tuple);
 	if (!exp)
 		return -ENOENT;
 
@@ -1701,6 +1711,7 @@ ctnetlink_del_expect(struct sock *ctnl, struct sk_buff *skb,
 		     const struct nlmsghdr *nlh,
 		     const struct nlattr * const cda[])
 {
+	struct net *net = sock_net(ctnl);
 	struct nf_conntrack_expect *exp;
 	struct nf_conntrack_tuple tuple;
 	struct nf_conntrack_helper *h;
@@ -1717,7 +1728,7 @@ ctnetlink_del_expect(struct sock *ctnl, struct sk_buff *skb,
 			return err;
 
 		/* bump usage count to 2 */
-		exp = nf_ct_expect_find_get(&init_net, &tuple);
+		exp = nf_ct_expect_find_get(net, &tuple);
 		if (!exp)
 			return -ENOENT;
 
@@ -1747,7 +1758,7 @@ ctnetlink_del_expect(struct sock *ctnl, struct sk_buff *skb,
 		}
 		for (i = 0; i < nf_ct_expect_hsize; i++) {
 			hlist_for_each_entry_safe(exp, n, next,
-						  &init_net.ct.expect_hash[i],
+						  &net->ct.expect_hash[i],
 						  hnode) {
 				m_help = nfct_help(exp->master);
 				if (m_help->helper == h
@@ -1763,7 +1774,7 @@ ctnetlink_del_expect(struct sock *ctnl, struct sk_buff *skb,
 		spin_lock_bh(&nf_conntrack_lock);
 		for (i = 0; i < nf_ct_expect_hsize; i++) {
 			hlist_for_each_entry_safe(exp, n, next,
-						  &init_net.ct.expect_hash[i],
+						  &net->ct.expect_hash[i],
 						  hnode) {
 				if (del_timer(&exp->timeout)) {
 					nf_ct_unlink_expect(exp);
@@ -1784,7 +1795,8 @@ ctnetlink_change_expect(struct nf_conntrack_expect *x,
 }
 
 static int
-ctnetlink_create_expect(const struct nlattr * const cda[], u_int8_t u3,
+ctnetlink_create_expect(struct net *net, const struct nlattr * const cda[],
+			u_int8_t u3,
 			u32 pid, int report)
 {
 	struct nf_conntrack_tuple tuple, mask, master_tuple;
@@ -1806,7 +1818,7 @@ ctnetlink_create_expect(const struct nlattr * const cda[], u_int8_t u3,
 		return err;
 
 	/* Look for master conntrack of this expectation */
-	h = nf_conntrack_find_get(&init_net, &master_tuple);
+	h = nf_conntrack_find_get(net, &master_tuple);
 	if (!h)
 		return -ENOENT;
 	ct = nf_ct_tuplehash_to_ctrack(h);
@@ -1846,6 +1858,7 @@ ctnetlink_new_expect(struct sock *ctnl, struct sk_buff *skb,
 		     const struct nlmsghdr *nlh,
 		     const struct nlattr * const cda[])
 {
+	struct net *net = sock_net(ctnl);
 	struct nf_conntrack_tuple tuple;
 	struct nf_conntrack_expect *exp;
 	struct nfgenmsg *nfmsg = nlmsg_data(nlh);
@@ -1862,13 +1875,13 @@ ctnetlink_new_expect(struct sock *ctnl, struct sk_buff *skb,
 		return err;
 
 	spin_lock_bh(&nf_conntrack_lock);
-	exp = __nf_ct_expect_find(&init_net, &tuple);
+	exp = __nf_ct_expect_find(net, &tuple);
 
 	if (!exp) {
 		spin_unlock_bh(&nf_conntrack_lock);
 		err = -ENOENT;
 		if (nlh->nlmsg_flags & NLM_F_CREATE) {
-			err = ctnetlink_create_expect(cda,
+			err = ctnetlink_create_expect(net, cda,
 						      u3,
 						      NETLINK_CB(skb).pid,
 						      nlmsg_report(nlh));
-- 
cgit v1.2.2


From e1d5a010729a7a495a8a8de85727f3f0d62f06a0 Mon Sep 17 00:00:00 2001
From: Samir Bellabes <sam@synack.fr>
Date: Thu, 7 Jan 2010 22:10:56 +0000
Subject: genetlink: optimize ctrl_dumpfamily()

there is a unnecessary test which can be replaced by a good initialization in
the 'for' statement

Noticed by Serge E. Hallyn <serue@us.ibm.com>

Signed-off-by: Samir Bellabes <sam@synack.fr>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/netlink/genetlink.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

(limited to 'net')

diff --git a/net/netlink/genetlink.c b/net/netlink/genetlink.c
index d07ecda0a92d..a4b6e148c5de 100644
--- a/net/netlink/genetlink.c
+++ b/net/netlink/genetlink.c
@@ -681,9 +681,7 @@ static int ctrl_dumpfamily(struct sk_buff *skb, struct netlink_callback *cb)
 	int chains_to_skip = cb->args[0];
 	int fams_to_skip = cb->args[1];
 
-	for (i = 0; i < GENL_FAM_TAB_SIZE; i++) {
-		if (i < chains_to_skip)
-			continue;
+	for (i = chains_to_skip; i < GENL_FAM_TAB_SIZE; i++) {
 		n = 0;
 		list_for_each_entry(rt, genl_family_chain(i), family_list) {
 			if (!rt->netnsok && !net_eq(net, &init_net))
-- 
cgit v1.2.2


From 508e14b4a4fb1a824a14f2c5b8d7df67b313f8e4 Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <danborkmann@googlemail.com>
Date: Tue, 12 Jan 2010 14:27:30 +0000
Subject: netpoll: allow execution of multiple rx_hooks per interface

Signed-off-by: Daniel Borkmann <danborkmann@googlemail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/netpoll.c | 169 +++++++++++++++++++++++++++++++++--------------------
 1 file changed, 106 insertions(+), 63 deletions(-)

(limited to 'net')

diff --git a/net/core/netpoll.c b/net/core/netpoll.c
index 0b4d0d35ef40..7aa697253765 100644
--- a/net/core/netpoll.c
+++ b/net/core/netpoll.c
@@ -407,11 +407,24 @@ static void arp_reply(struct sk_buff *skb)
 	__be32 sip, tip;
 	unsigned char *sha;
 	struct sk_buff *send_skb;
-	struct netpoll *np = NULL;
+	struct netpoll *np, *tmp;
+	unsigned long flags;
+	int hits = 0;
+
+	if (list_empty(&npinfo->rx_np))
+		return;
+
+	/* Before checking the packet, we do some early
+	   inspection whether this is interesting at all */
+	spin_lock_irqsave(&npinfo->rx_lock, flags);
+	list_for_each_entry_safe(np, tmp, &npinfo->rx_np, rx) {
+		if (np->dev == skb->dev)
+			hits++;
+	}
+	spin_unlock_irqrestore(&npinfo->rx_lock, flags);
 
-	if (npinfo->rx_np && npinfo->rx_np->dev == skb->dev)
-		np = npinfo->rx_np;
-	if (!np)
+	/* No netpoll struct is using this dev */
+	if (!hits)
 		return;
 
 	/* No arp on this interface */
@@ -437,77 +450,91 @@ static void arp_reply(struct sk_buff *skb)
 	arp_ptr += skb->dev->addr_len;
 	memcpy(&sip, arp_ptr, 4);
 	arp_ptr += 4;
-	/* if we actually cared about dst hw addr, it would get copied here */
+	/* If we actually cared about dst hw addr,
+	   it would get copied here */
 	arp_ptr += skb->dev->addr_len;
 	memcpy(&tip, arp_ptr, 4);
 
 	/* Should we ignore arp? */
-	if (tip != np->local_ip ||
-	    ipv4_is_loopback(tip) || ipv4_is_multicast(tip))
+	if (ipv4_is_loopback(tip) || ipv4_is_multicast(tip))
 		return;
 
 	size = arp_hdr_len(skb->dev);
-	send_skb = find_skb(np, size + LL_ALLOCATED_SPACE(np->dev),
-			    LL_RESERVED_SPACE(np->dev));
 
-	if (!send_skb)
-		return;
-
-	skb_reset_network_header(send_skb);
-	arp = (struct arphdr *) skb_put(send_skb, size);
-	send_skb->dev = skb->dev;
-	send_skb->protocol = htons(ETH_P_ARP);
+	spin_lock_irqsave(&npinfo->rx_lock, flags);
+	list_for_each_entry_safe(np, tmp, &npinfo->rx_np, rx) {
+		if (tip != np->local_ip)
+			continue;
 
-	/* Fill the device header for the ARP frame */
-	if (dev_hard_header(send_skb, skb->dev, ptype,
-			    sha, np->dev->dev_addr,
-			    send_skb->len) < 0) {
-		kfree_skb(send_skb);
-		return;
-	}
+		send_skb = find_skb(np, size + LL_ALLOCATED_SPACE(np->dev),
+				    LL_RESERVED_SPACE(np->dev));
+		if (!send_skb)
+			continue;
 
-	/*
-	 * Fill out the arp protocol part.
-	 *
-	 * we only support ethernet device type,
-	 * which (according to RFC 1390) should always equal 1 (Ethernet).
-	 */
+		skb_reset_network_header(send_skb);
+		arp = (struct arphdr *) skb_put(send_skb, size);
+		send_skb->dev = skb->dev;
+		send_skb->protocol = htons(ETH_P_ARP);
 
-	arp->ar_hrd = htons(np->dev->type);
-	arp->ar_pro = htons(ETH_P_IP);
-	arp->ar_hln = np->dev->addr_len;
-	arp->ar_pln = 4;
-	arp->ar_op = htons(type);
+		/* Fill the device header for the ARP frame */
+		if (dev_hard_header(send_skb, skb->dev, ptype,
+				    sha, np->dev->dev_addr,
+				    send_skb->len) < 0) {
+			kfree_skb(send_skb);
+			continue;
+		}
 
-	arp_ptr=(unsigned char *)(arp + 1);
-	memcpy(arp_ptr, np->dev->dev_addr, np->dev->addr_len);
-	arp_ptr += np->dev->addr_len;
-	memcpy(arp_ptr, &tip, 4);
-	arp_ptr += 4;
-	memcpy(arp_ptr, sha, np->dev->addr_len);
-	arp_ptr += np->dev->addr_len;
-	memcpy(arp_ptr, &sip, 4);
+		/*
+		 * Fill out the arp protocol part.
+		 *
+		 * we only support ethernet device type,
+		 * which (according to RFC 1390) should
+		 * always equal 1 (Ethernet).
+		 */
 
-	netpoll_send_skb(np, send_skb);
+		arp->ar_hrd = htons(np->dev->type);
+		arp->ar_pro = htons(ETH_P_IP);
+		arp->ar_hln = np->dev->addr_len;
+		arp->ar_pln = 4;
+		arp->ar_op = htons(type);
+
+		arp_ptr = (unsigned char *)(arp + 1);
+		memcpy(arp_ptr, np->dev->dev_addr, np->dev->addr_len);
+		arp_ptr += np->dev->addr_len;
+		memcpy(arp_ptr, &tip, 4);
+		arp_ptr += 4;
+		memcpy(arp_ptr, sha, np->dev->addr_len);
+		arp_ptr += np->dev->addr_len;
+		memcpy(arp_ptr, &sip, 4);
+
+		netpoll_send_skb(np, send_skb);
+
+		/* If there are several rx_hooks for the same address,
+		   we're fine by sending a single reply */
+		break;
+	}
+	spin_unlock_irqrestore(&npinfo->rx_lock, flags);
 }
 
 int __netpoll_rx(struct sk_buff *skb)
 {
 	int proto, len, ulen;
+	int hits = 0;
 	struct iphdr *iph;
 	struct udphdr *uh;
-	struct netpoll_info *npi = skb->dev->npinfo;
-	struct netpoll *np = npi->rx_np;
+	struct netpoll_info *npinfo = skb->dev->npinfo;
+	struct netpoll *np, *tmp;
 
-	if (!np)
+	if (list_empty(&npinfo->rx_np))
 		goto out;
+
 	if (skb->dev->type != ARPHRD_ETHER)
 		goto out;
 
 	/* check if netpoll clients need ARP */
 	if (skb->protocol == htons(ETH_P_ARP) &&
 	    atomic_read(&trapped)) {
-		skb_queue_tail(&npi->arp_tx, skb);
+		skb_queue_tail(&npinfo->arp_tx, skb);
 		return 1;
 	}
 
@@ -551,16 +578,23 @@ int __netpoll_rx(struct sk_buff *skb)
 		goto out;
 	if (checksum_udp(skb, uh, ulen, iph->saddr, iph->daddr))
 		goto out;
-	if (np->local_ip && np->local_ip != iph->daddr)
-		goto out;
-	if (np->remote_ip && np->remote_ip != iph->saddr)
-		goto out;
-	if (np->local_port && np->local_port != ntohs(uh->dest))
-		goto out;
 
-	np->rx_hook(np, ntohs(uh->source),
-		    (char *)(uh+1),
-		    ulen - sizeof(struct udphdr));
+	list_for_each_entry_safe(np, tmp, &npinfo->rx_np, rx) {
+		if (np->local_ip && np->local_ip != iph->daddr)
+			continue;
+		if (np->remote_ip && np->remote_ip != iph->saddr)
+			continue;
+		if (np->local_port && np->local_port != ntohs(uh->dest))
+			continue;
+
+		np->rx_hook(np, ntohs(uh->source),
+			       (char *)(uh+1),
+			       ulen - sizeof(struct udphdr));
+		hits++;
+	}
+
+	if (!hits)
+		goto out;
 
 	kfree_skb(skb);
 	return 1;
@@ -684,6 +718,7 @@ int netpoll_setup(struct netpoll *np)
 	struct net_device *ndev = NULL;
 	struct in_device *in_dev;
 	struct netpoll_info *npinfo;
+	struct netpoll *npe, *tmp;
 	unsigned long flags;
 	int err;
 
@@ -704,7 +739,7 @@ int netpoll_setup(struct netpoll *np)
 		}
 
 		npinfo->rx_flags = 0;
-		npinfo->rx_np = NULL;
+		INIT_LIST_HEAD(&npinfo->rx_np);
 
 		spin_lock_init(&npinfo->rx_lock);
 		skb_queue_head_init(&npinfo->arp_tx);
@@ -785,7 +820,7 @@ int netpoll_setup(struct netpoll *np)
 	if (np->rx_hook) {
 		spin_lock_irqsave(&npinfo->rx_lock, flags);
 		npinfo->rx_flags |= NETPOLL_RX_ENABLED;
-		npinfo->rx_np = np;
+		list_add_tail(&np->rx, &npinfo->rx_np);
 		spin_unlock_irqrestore(&npinfo->rx_lock, flags);
 	}
 
@@ -801,9 +836,16 @@ int netpoll_setup(struct netpoll *np)
 	return 0;
 
  release:
-	if (!ndev->npinfo)
+	if (!ndev->npinfo) {
+		spin_lock_irqsave(&npinfo->rx_lock, flags);
+		list_for_each_entry_safe(npe, tmp, &npinfo->rx_np, rx) {
+			npe->dev = NULL;
+		}
+		spin_unlock_irqrestore(&npinfo->rx_lock, flags);
+
 		kfree(npinfo);
-	np->dev = NULL;
+	}
+
 	dev_put(ndev);
 	return err;
 }
@@ -823,10 +865,11 @@ void netpoll_cleanup(struct netpoll *np)
 	if (np->dev) {
 		npinfo = np->dev->npinfo;
 		if (npinfo) {
-			if (npinfo->rx_np == np) {
+			if (!list_empty(&npinfo->rx_np)) {
 				spin_lock_irqsave(&npinfo->rx_lock, flags);
-				npinfo->rx_np = NULL;
-				npinfo->rx_flags &= ~NETPOLL_RX_ENABLED;
+				list_del(&np->rx);
+				if (list_empty(&npinfo->rx_np))
+					npinfo->rx_flags &= ~NETPOLL_RX_ENABLED;
 				spin_unlock_irqrestore(&npinfo->rx_lock, flags);
 			}
 
-- 
cgit v1.2.2


From 9a58a80a701bdb2d220cdab4914218df5b48d781 Mon Sep 17 00:00:00 2001
From: Alexey Dobriyan <adobriyan@gmail.com>
Date: Thu, 14 Jan 2010 03:10:54 -0800
Subject: proc_fops: convert drivers/isdn/ to seq_file

Convert code away from ->read_proc/->write_proc interfaces.  Switch to
proc_create()/proc_create_data() which make addition of proc entries
reliable wrt NULL ->proc_fops, NULL ->data and so on.

Problem with ->read_proc et al is described here commit
786d7e1612f0b0adb6046f19b906609e4fe8b1ba "Fix rmmod/read/write races in
/proc entries"

[akpm@linux-foundation.org: CONFIG_PROC_FS=n build fix]
Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Signed-off-by: Tilman Schmidt <tilman@imap.cc>
Signed-off-by: Karsten Keil <keil@b1-systems.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/bluetooth/cmtp/capi.c | 37 +++++++++++++++++++++----------------
 1 file changed, 21 insertions(+), 16 deletions(-)

(limited to 'net')

diff --git a/net/bluetooth/cmtp/capi.c b/net/bluetooth/cmtp/capi.c
index 97f8d68d574d..3487cfe74aec 100644
--- a/net/bluetooth/cmtp/capi.c
+++ b/net/bluetooth/cmtp/capi.c
@@ -21,7 +21,8 @@
 */
 
 #include <linux/module.h>
-
+#include <linux/proc_fs.h>
+#include <linux/seq_file.h>
 #include <linux/types.h>
 #include <linux/errno.h>
 #include <linux/kernel.h>
@@ -516,33 +517,37 @@ static char *cmtp_procinfo(struct capi_ctr *ctrl)
 	return "CAPI Message Transport Protocol";
 }
 
-static int cmtp_ctr_read_proc(char *page, char **start, off_t off, int count, int *eof, struct capi_ctr *ctrl)
+static int cmtp_proc_show(struct seq_file *m, void *v)
 {
+	struct capi_ctr *ctrl = m->private;
 	struct cmtp_session *session = ctrl->driverdata;
 	struct cmtp_application *app;
 	struct list_head *p, *n;
-	int len = 0;
 
-	len += sprintf(page + len, "%s\n\n", cmtp_procinfo(ctrl));
-	len += sprintf(page + len, "addr %s\n", session->name);
-	len += sprintf(page + len, "ctrl %d\n", session->num);
+	seq_printf(m, "%s\n\n", cmtp_procinfo(ctrl));
+	seq_printf(m, "addr %s\n", session->name);
+	seq_printf(m, "ctrl %d\n", session->num);
 
 	list_for_each_safe(p, n, &session->applications) {
 		app = list_entry(p, struct cmtp_application, list);
-		len += sprintf(page + len, "appl %d -> %d\n", app->appl, app->mapping);
+		seq_printf(m, "appl %d -> %d\n", app->appl, app->mapping);
 	}
 
-	if (off + count >= len)
-		*eof = 1;
-
-	if (len < off)
-		return 0;
-
-	*start = page + off;
+	return 0;
+}
 
-	return ((count < len - off) ? count : len - off);
+static int cmtp_proc_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, cmtp_proc_show, PDE(inode)->data);
 }
 
+static const struct file_operations cmtp_proc_fops = {
+	.owner		= THIS_MODULE,
+	.open		= cmtp_proc_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= single_release,
+};
 
 int cmtp_attach_device(struct cmtp_session *session)
 {
@@ -582,7 +587,7 @@ int cmtp_attach_device(struct cmtp_session *session)
 	session->ctrl.send_message  = cmtp_send_message;
 
 	session->ctrl.procinfo      = cmtp_procinfo;
-	session->ctrl.ctr_read_proc = cmtp_ctr_read_proc;
+	session->ctrl.proc_fops = &cmtp_proc_fops;
 
 	if (attach_capi_ctr(&session->ctrl) < 0) {
 		BT_ERR("Can't attach new controller");
-- 
cgit v1.2.2


From e99c7cd57b2481ee36b2338040e3ce1cd17e0763 Mon Sep 17 00:00:00 2001
From: "Luis R. Rodriguez" <lrodriguez@atheros.com>
Date: Thu, 7 Jan 2010 17:24:55 -0500
Subject: cfg80211: fix 2 GHz subband calculation for country IEs

Country IEs triplets are getting an extra channel with
the current count. This does not affect regulatory because
we always took the intersection between what the AP gave
and what CRDA believed is correct.

This however does fix processing some Country IEs with multiple
sequential 2 GHz triplets. Since our parser and the spec mandates
all channels be monitonically increasing we would drop the IE after
noticing the second triplet begins on a channel already processed.
APs that send these type of country IEs seems rare though.

Signed-off-by: Luis R. Rodriguez <lrodriguez@atheros.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/wireless/reg.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/wireless/reg.c b/net/wireless/reg.c
index a5c2d3a6cbb2..f0859eada758 100644
--- a/net/wireless/reg.c
+++ b/net/wireless/reg.c
@@ -740,7 +740,7 @@ static struct ieee80211_regdomain *country_ie_2_rd(
 		/* 2 GHz */
 		if (triplet->chans.first_channel <= 14)
 			end_channel = triplet->chans.first_channel +
-				triplet->chans.num_channels;
+				triplet->chans.num_channels - 1;
 		else
 			/*
 			 * 5 GHz -- For example in country IEs if the first
@@ -863,7 +863,7 @@ static struct ieee80211_regdomain *country_ie_2_rd(
 		/* 2 GHz */
 		if (triplet->chans.first_channel <= 14)
 			end_channel = triplet->chans.first_channel +
-				triplet->chans.num_channels;
+				triplet->chans.num_channels -1;
 		else
 			end_channel =  triplet->chans.first_channel +
 				(4 * (triplet->chans.num_channels - 1));
-- 
cgit v1.2.2


From a0f2e0fca1e72c1de07e834be05b61d33842253e Mon Sep 17 00:00:00 2001
From: "Luis R. Rodriguez" <lrodriguez@atheros.com>
Date: Thu, 14 Jan 2010 13:27:46 -0500
Subject: cfg80211: Ingore country IEs with a zero set of number of channels

Previous to this and the last patch, titled,

"cfg80211: Fix 2 GHz subband calculation for country IEs"

we would end up treating these IEs as single channel units. These are in
fact just bogus IE triplets so ignore the entire IE if these are found.

Signed-off-by: Luis R. Rodriguez <lrodriguez@atheros.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/wireless/reg.c | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'net')

diff --git a/net/wireless/reg.c b/net/wireless/reg.c
index f0859eada758..f3b77f7b8e3d 100644
--- a/net/wireless/reg.c
+++ b/net/wireless/reg.c
@@ -585,6 +585,9 @@ static int max_subband_chan(int orig_cur_chan,
 			break;
 		}
 
+		if (triplet->chans.num_channels == 0)
+			return 0;
+
 		/* Monitonically increasing channel order */
 		if (triplet->chans.first_channel <= end_subband_chan)
 			return 0;
@@ -737,6 +740,9 @@ static struct ieee80211_regdomain *country_ie_2_rd(
 			break;
 		}
 
+		if (triplet->chans.num_channels == 0)
+			return NULL;
+
 		/* 2 GHz */
 		if (triplet->chans.first_channel <= 14)
 			end_channel = triplet->chans.first_channel +
-- 
cgit v1.2.2


From 4d0392be21d4710121f855c0caf57d32ffd1ced0 Mon Sep 17 00:00:00 2001
From: H Hartley Sweeten <hsweeten@visionengravers.com>
Date: Fri, 15 Jan 2010 01:08:58 -0800
Subject: net/core/sock.c: quiet sparse noise

In sock_getsockopt the symbol 'lv' is declared as an
unsigned int type, probably due to sizeof returning a
size_t which is really an unsigned int.

This produces a sparse warning for SO_PEERNAME due to
the sock->ops->getname() call:

warning: incorrect type in argument 3 (different signedness)
   expected int *sockaddr_len
   got unsigned int *<noident>

Quiet the warning by changing the type of 'lv' to an int.

Signed-off-by: H Hartley Sweeten <hsweeten@visionengravers.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/sock.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/core/sock.c b/net/core/sock.c
index e1f6f225f012..10b1d3243a72 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -741,7 +741,7 @@ int sock_getsockopt(struct socket *sock, int level, int optname,
 		struct timeval tm;
 	} v;
 
-	unsigned int lv = sizeof(int);
+	int lv = sizeof(int);
 	int len;
 
 	if (get_user(len, optlen))
-- 
cgit v1.2.2


From 71fceff0ea36d5a6cffecb272b8b3970535fe905 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Fri, 15 Jan 2010 01:16:40 -0800
Subject: ipv4: Use less conflicting local var name in change_nexthops() loop
 macro.

As noticed by H Hartley Sweeten, since change_nexthops() uses 'nh'
as it's iterator variable, it can conflict with other existing
local vars.

Use "nexthop_nh" to avoid the conflict and make it easier to figure
out where this magic variable comes from.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/fib_semantics.c | 76 +++++++++++++++++++++++++-----------------------
 1 file changed, 40 insertions(+), 36 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c
index ed19aa6919c2..96b21011a3e4 100644
--- a/net/ipv4/fib_semantics.c
+++ b/net/ipv4/fib_semantics.c
@@ -62,8 +62,8 @@ static DEFINE_SPINLOCK(fib_multipath_lock);
 #define for_nexthops(fi) { int nhsel; const struct fib_nh * nh; \
 for (nhsel=0, nh = (fi)->fib_nh; nhsel < (fi)->fib_nhs; nh++, nhsel++)
 
-#define change_nexthops(fi) { int nhsel; struct fib_nh * nh; \
-for (nhsel=0, nh = (struct fib_nh *)((fi)->fib_nh); nhsel < (fi)->fib_nhs; nh++, nhsel++)
+#define change_nexthops(fi) { int nhsel; struct fib_nh *nexthop_nh; \
+for (nhsel=0, nexthop_nh = (struct fib_nh *)((fi)->fib_nh); nhsel < (fi)->fib_nhs; nexthop_nh++, nhsel++)
 
 #else /* CONFIG_IP_ROUTE_MULTIPATH */
 
@@ -72,7 +72,7 @@ for (nhsel=0, nh = (struct fib_nh *)((fi)->fib_nh); nhsel < (fi)->fib_nhs; nh++,
 #define for_nexthops(fi) { int nhsel = 0; const struct fib_nh * nh = (fi)->fib_nh; \
 for (nhsel=0; nhsel < 1; nhsel++)
 
-#define change_nexthops(fi) { int nhsel = 0; struct fib_nh * nh = (struct fib_nh *)((fi)->fib_nh); \
+#define change_nexthops(fi) { int nhsel = 0; struct fib_nh *nexthop_nh = (struct fib_nh *)((fi)->fib_nh); \
 for (nhsel=0; nhsel < 1; nhsel++)
 
 #endif /* CONFIG_IP_ROUTE_MULTIPATH */
@@ -145,9 +145,9 @@ void free_fib_info(struct fib_info *fi)
 		return;
 	}
 	change_nexthops(fi) {
-		if (nh->nh_dev)
-			dev_put(nh->nh_dev);
-		nh->nh_dev = NULL;
+		if (nexthop_nh->nh_dev)
+			dev_put(nexthop_nh->nh_dev);
+		nexthop_nh->nh_dev = NULL;
 	} endfor_nexthops(fi);
 	fib_info_cnt--;
 	release_net(fi->fib_net);
@@ -162,9 +162,9 @@ void fib_release_info(struct fib_info *fi)
 		if (fi->fib_prefsrc)
 			hlist_del(&fi->fib_lhash);
 		change_nexthops(fi) {
-			if (!nh->nh_dev)
+			if (!nexthop_nh->nh_dev)
 				continue;
-			hlist_del(&nh->nh_hash);
+			hlist_del(&nexthop_nh->nh_hash);
 		} endfor_nexthops(fi)
 		fi->fib_dead = 1;
 		fib_info_put(fi);
@@ -395,19 +395,20 @@ static int fib_get_nhs(struct fib_info *fi, struct rtnexthop *rtnh,
 		if (!rtnh_ok(rtnh, remaining))
 			return -EINVAL;
 
-		nh->nh_flags = (cfg->fc_flags & ~0xFF) | rtnh->rtnh_flags;
-		nh->nh_oif = rtnh->rtnh_ifindex;
-		nh->nh_weight = rtnh->rtnh_hops + 1;
+		nexthop_nh->nh_flags =
+			(cfg->fc_flags & ~0xFF) | rtnh->rtnh_flags;
+		nexthop_nh->nh_oif = rtnh->rtnh_ifindex;
+		nexthop_nh->nh_weight = rtnh->rtnh_hops + 1;
 
 		attrlen = rtnh_attrlen(rtnh);
 		if (attrlen > 0) {
 			struct nlattr *nla, *attrs = rtnh_attrs(rtnh);
 
 			nla = nla_find(attrs, attrlen, RTA_GATEWAY);
-			nh->nh_gw = nla ? nla_get_be32(nla) : 0;
+			nexthop_nh->nh_gw = nla ? nla_get_be32(nla) : 0;
 #ifdef CONFIG_NET_CLS_ROUTE
 			nla = nla_find(attrs, attrlen, RTA_FLOW);
-			nh->nh_tclassid = nla ? nla_get_u32(nla) : 0;
+			nexthop_nh->nh_tclassid = nla ? nla_get_u32(nla) : 0;
 #endif
 		}
 
@@ -738,7 +739,7 @@ struct fib_info *fib_create_info(struct fib_config *cfg)
 
 	fi->fib_nhs = nhs;
 	change_nexthops(fi) {
-		nh->nh_parent = fi;
+		nexthop_nh->nh_parent = fi;
 	} endfor_nexthops(fi)
 
 	if (cfg->fc_mx) {
@@ -808,7 +809,7 @@ struct fib_info *fib_create_info(struct fib_config *cfg)
 			goto failure;
 	} else {
 		change_nexthops(fi) {
-			if ((err = fib_check_nh(cfg, fi, nh)) != 0)
+			if ((err = fib_check_nh(cfg, fi, nexthop_nh)) != 0)
 				goto failure;
 		} endfor_nexthops(fi)
 	}
@@ -843,11 +844,11 @@ link_it:
 		struct hlist_head *head;
 		unsigned int hash;
 
-		if (!nh->nh_dev)
+		if (!nexthop_nh->nh_dev)
 			continue;
-		hash = fib_devindex_hashfn(nh->nh_dev->ifindex);
+		hash = fib_devindex_hashfn(nexthop_nh->nh_dev->ifindex);
 		head = &fib_info_devhash[hash];
-		hlist_add_head(&nh->nh_hash, head);
+		hlist_add_head(&nexthop_nh->nh_hash, head);
 	} endfor_nexthops(fi)
 	spin_unlock_bh(&fib_info_lock);
 	return fi;
@@ -1080,21 +1081,21 @@ int fib_sync_down_dev(struct net_device *dev, int force)
 		prev_fi = fi;
 		dead = 0;
 		change_nexthops(fi) {
-			if (nh->nh_flags&RTNH_F_DEAD)
+			if (nexthop_nh->nh_flags&RTNH_F_DEAD)
 				dead++;
-			else if (nh->nh_dev == dev &&
-					nh->nh_scope != scope) {
-				nh->nh_flags |= RTNH_F_DEAD;
+			else if (nexthop_nh->nh_dev == dev &&
+				 nexthop_nh->nh_scope != scope) {
+				nexthop_nh->nh_flags |= RTNH_F_DEAD;
 #ifdef CONFIG_IP_ROUTE_MULTIPATH
 				spin_lock_bh(&fib_multipath_lock);
-				fi->fib_power -= nh->nh_power;
-				nh->nh_power = 0;
+				fi->fib_power -= nexthop_nh->nh_power;
+				nexthop_nh->nh_power = 0;
 				spin_unlock_bh(&fib_multipath_lock);
 #endif
 				dead++;
 			}
 #ifdef CONFIG_IP_ROUTE_MULTIPATH
-			if (force > 1 && nh->nh_dev == dev) {
+			if (force > 1 && nexthop_nh->nh_dev == dev) {
 				dead = fi->fib_nhs;
 				break;
 			}
@@ -1144,18 +1145,20 @@ int fib_sync_up(struct net_device *dev)
 		prev_fi = fi;
 		alive = 0;
 		change_nexthops(fi) {
-			if (!(nh->nh_flags&RTNH_F_DEAD)) {
+			if (!(nexthop_nh->nh_flags&RTNH_F_DEAD)) {
 				alive++;
 				continue;
 			}
-			if (nh->nh_dev == NULL || !(nh->nh_dev->flags&IFF_UP))
+			if (nexthop_nh->nh_dev == NULL ||
+			    !(nexthop_nh->nh_dev->flags&IFF_UP))
 				continue;
-			if (nh->nh_dev != dev || !__in_dev_get_rtnl(dev))
+			if (nexthop_nh->nh_dev != dev ||
+			    !__in_dev_get_rtnl(dev))
 				continue;
 			alive++;
 			spin_lock_bh(&fib_multipath_lock);
-			nh->nh_power = 0;
-			nh->nh_flags &= ~RTNH_F_DEAD;
+			nexthop_nh->nh_power = 0;
+			nexthop_nh->nh_flags &= ~RTNH_F_DEAD;
 			spin_unlock_bh(&fib_multipath_lock);
 		} endfor_nexthops(fi)
 
@@ -1182,9 +1185,9 @@ void fib_select_multipath(const struct flowi *flp, struct fib_result *res)
 	if (fi->fib_power <= 0) {
 		int power = 0;
 		change_nexthops(fi) {
-			if (!(nh->nh_flags&RTNH_F_DEAD)) {
-				power += nh->nh_weight;
-				nh->nh_power = nh->nh_weight;
+			if (!(nexthop_nh->nh_flags&RTNH_F_DEAD)) {
+				power += nexthop_nh->nh_weight;
+				nexthop_nh->nh_power = nexthop_nh->nh_weight;
 			}
 		} endfor_nexthops(fi);
 		fi->fib_power = power;
@@ -1204,9 +1207,10 @@ void fib_select_multipath(const struct flowi *flp, struct fib_result *res)
 	w = jiffies % fi->fib_power;
 
 	change_nexthops(fi) {
-		if (!(nh->nh_flags&RTNH_F_DEAD) && nh->nh_power) {
-			if ((w -= nh->nh_power) <= 0) {
-				nh->nh_power--;
+		if (!(nexthop_nh->nh_flags&RTNH_F_DEAD) &&
+		    nexthop_nh->nh_power) {
+			if ((w -= nexthop_nh->nh_power) <= 0) {
+				nexthop_nh->nh_power--;
 				fi->fib_power--;
 				res->nh_sel = nhsel;
 				spin_unlock_bh(&fib_multipath_lock);
-- 
cgit v1.2.2


From 93895757df4ebe22c98b9128b98ebf8cec972c60 Mon Sep 17 00:00:00 2001
From: Benoit Papillault <benoit.papillault@free.fr>
Date: Thu, 14 Jan 2010 23:20:31 +0100
Subject: mac80211: Fixed netif_tx_wake_all_queues in IBSS mode

When ieee80211_offchannel_return is called, it needs to re-enabled TX
queues that have been stopped in ieee80211_offchannel_stop_beaconing or
ieee80211_offchannel_stop_station. It happens if we are doing a scan with an
IBSS interface. In this case, the interface stopped transmitting.

Signed-off-by: Benoit Papillault <benoit.papillault@free.fr>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/offchannel.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/mac80211/offchannel.c b/net/mac80211/offchannel.c
index 1facfeb1f79b..c36b1911987a 100644
--- a/net/mac80211/offchannel.c
+++ b/net/mac80211/offchannel.c
@@ -153,9 +153,11 @@ void ieee80211_offchannel_return(struct ieee80211_local *local,
 		if (sdata->vif.type == NL80211_IFTYPE_STATION) {
 			if (sdata->u.mgd.associated)
 				ieee80211_offchannel_ps_disable(sdata);
-			netif_tx_wake_all_queues(sdata->dev);
 		}
 
+		if (sdata->vif.type != NL80211_IFTYPE_MONITOR)
+			netif_tx_wake_all_queues(sdata->dev);
+
 		/* re-enable beaconing */
 		if (enable_beaconing &&
 		    (sdata->vif.type == NL80211_IFTYPE_AP ||
-- 
cgit v1.2.2


From 84920e3e47f654a22b540606fb8c1ab90b870942 Mon Sep 17 00:00:00 2001
From: "Luis R. Rodriguez" <lrodriguez@atheros.com>
Date: Thu, 14 Jan 2010 20:08:20 -0500
Subject: cfg80211: make regulatory_hint_11d() band specific

In practice APs do not send country IE channel triplets for channels
the AP is not operating on and if they were to do so they would have
to use the regulatory extension which we currently do not process.
No AP has been seen in practice that does this though so just drop
those country IEs.

Additionally it has been noted the first series of country IE
channels triplets are specific to the band the AP sends. Propagate
the band on which the country IE was found on reject the country
IE then if the triplets are ever oustide of the band.

Although we now won't process country IE information with multiple
band information we leave the intersection work as is as it is
technically possible for someone to want to eventually process these
type of country IEs with regulatory extensions.

Cc: Jouni Malinen <jouni.malinen@atheros.com>
Cc: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: Luis R. Rodriguez <lrodriguez@atheros.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/wireless/reg.c | 74 +++++++++++++++++++++++++++++++++++++++---------------
 net/wireless/reg.h | 11 ++++++++
 net/wireless/sme.c |  1 +
 3 files changed, 66 insertions(+), 20 deletions(-)

(limited to 'net')

diff --git a/net/wireless/reg.c b/net/wireless/reg.c
index f3b77f7b8e3d..5f8071de7950 100644
--- a/net/wireless/reg.c
+++ b/net/wireless/reg.c
@@ -484,6 +484,34 @@ static bool freq_in_rule_band(const struct ieee80211_freq_range *freq_range,
 #undef ONE_GHZ_IN_KHZ
 }
 
+/*
+ * This is a work around for sanity checking ieee80211_channel_to_frequency()'s
+ * work. ieee80211_channel_to_frequency() can for example currently provide a
+ * 2 GHz channel when in fact a 5 GHz channel was desired. An example would be
+ * an AP providing channel 8 on a country IE triplet when it sent this on the
+ * 5 GHz band, that channel is designed to be channel 8 on 5 GHz, not a 2 GHz
+ * channel.
+ *
+ * This can be removed once ieee80211_channel_to_frequency() takes in a band.
+ */
+static bool chan_in_band(int chan, enum ieee80211_band band)
+{
+	int center_freq = ieee80211_channel_to_frequency(chan);
+
+	switch (band) {
+	case IEEE80211_BAND_2GHZ:
+		if (center_freq <= 2484)
+			return true;
+		return false;
+	case IEEE80211_BAND_5GHZ:
+		if (center_freq >= 5005)
+			return true;
+		return false;
+	default:
+		return false;
+	}
+}
+
 /*
  * Some APs may send a country IE triplet for each channel they
  * support and while this is completely overkill and silly we still
@@ -532,7 +560,8 @@ static bool freq_in_rule_band(const struct ieee80211_freq_range *freq_range,
  * Returns 0 if the IE has been found to be invalid in the middle
  * somewhere.
  */
-static int max_subband_chan(int orig_cur_chan,
+static int max_subband_chan(enum ieee80211_band band,
+			    int orig_cur_chan,
 			    int orig_end_channel,
 			    s8 orig_max_power,
 			    u8 **country_ie,
@@ -541,7 +570,6 @@ static int max_subband_chan(int orig_cur_chan,
 	u8 *triplets_start = *country_ie;
 	u8 len_at_triplet = *country_ie_len;
 	int end_subband_chan = orig_end_channel;
-	enum ieee80211_band band;
 
 	/*
 	 * We'll deal with padding for the caller unless
@@ -557,17 +585,14 @@ static int max_subband_chan(int orig_cur_chan,
 	*country_ie += 3;
 	*country_ie_len -= 3;
 
-	if (orig_cur_chan <= 14)
-		band = IEEE80211_BAND_2GHZ;
-	else
-		band = IEEE80211_BAND_5GHZ;
+	if (!chan_in_band(orig_cur_chan, band))
+		return 0;
 
 	while (*country_ie_len >= 3) {
 		int end_channel = 0;
 		struct ieee80211_country_ie_triplet *triplet =
 			(struct ieee80211_country_ie_triplet *) *country_ie;
 		int cur_channel = 0, next_expected_chan;
-		enum ieee80211_band next_band = IEEE80211_BAND_2GHZ;
 
 		/* means last triplet is completely unrelated to this one */
 		if (triplet->ext.reg_extension_id >=
@@ -592,6 +617,9 @@ static int max_subband_chan(int orig_cur_chan,
 		if (triplet->chans.first_channel <= end_subband_chan)
 			return 0;
 
+		if (!chan_in_band(triplet->chans.first_channel, band))
+			return 0;
+
 		/* 2 GHz */
 		if (triplet->chans.first_channel <= 14) {
 			end_channel = triplet->chans.first_channel +
@@ -600,14 +628,10 @@ static int max_subband_chan(int orig_cur_chan,
 		else {
 			end_channel =  triplet->chans.first_channel +
 				(4 * (triplet->chans.num_channels - 1));
-			next_band = IEEE80211_BAND_5GHZ;
 		}
 
-		if (band != next_band) {
-			*country_ie -= 3;
-			*country_ie_len += 3;
-			break;
-		}
+		if (!chan_in_band(end_channel, band))
+			return 0;
 
 		if (orig_max_power != triplet->chans.max_power) {
 			*country_ie -= 3;
@@ -666,6 +690,7 @@ static int max_subband_chan(int orig_cur_chan,
  * with our userspace regulatory agent to get lower bounds.
  */
 static struct ieee80211_regdomain *country_ie_2_rd(
+				enum ieee80211_band band,
 				u8 *country_ie,
 				u8 country_ie_len,
 				u32 *checksum)
@@ -743,8 +768,11 @@ static struct ieee80211_regdomain *country_ie_2_rd(
 		if (triplet->chans.num_channels == 0)
 			return NULL;
 
+		if (!chan_in_band(triplet->chans.first_channel, band))
+			return NULL;
+
 		/* 2 GHz */
-		if (triplet->chans.first_channel <= 14)
+		if (band == IEEE80211_BAND_2GHZ)
 			end_channel = triplet->chans.first_channel +
 				triplet->chans.num_channels - 1;
 		else
@@ -767,7 +795,8 @@ static struct ieee80211_regdomain *country_ie_2_rd(
 		 * or for whatever reason sends triplets with multiple channels
 		 * separated when in fact they should be together.
 		 */
-		end_channel = max_subband_chan(cur_channel,
+		end_channel = max_subband_chan(band,
+					       cur_channel,
 					       end_channel,
 					       triplet->chans.max_power,
 					       &country_ie,
@@ -775,6 +804,9 @@ static struct ieee80211_regdomain *country_ie_2_rd(
 		if (!end_channel)
 			return NULL;
 
+		if (!chan_in_band(end_channel, band))
+			return NULL;
+
 		cur_sub_max_channel = end_channel;
 
 		/* Basic sanity check */
@@ -867,14 +899,15 @@ static struct ieee80211_regdomain *country_ie_2_rd(
 		reg_rule->flags = flags;
 
 		/* 2 GHz */
-		if (triplet->chans.first_channel <= 14)
+		if (band == IEEE80211_BAND_2GHZ)
 			end_channel = triplet->chans.first_channel +
 				triplet->chans.num_channels -1;
 		else
 			end_channel =  triplet->chans.first_channel +
 				(4 * (triplet->chans.num_channels - 1));
 
-		end_channel = max_subband_chan(triplet->chans.first_channel,
+		end_channel = max_subband_chan(band,
+					       triplet->chans.first_channel,
 					       end_channel,
 					       triplet->chans.max_power,
 					       &country_ie,
@@ -1981,8 +2014,9 @@ static bool reg_same_country_ie_hint(struct wiphy *wiphy,
  * therefore cannot iterate over the rdev list here.
  */
 void regulatory_hint_11d(struct wiphy *wiphy,
-			u8 *country_ie,
-			u8 country_ie_len)
+			 enum ieee80211_band band,
+			 u8 *country_ie,
+			 u8 country_ie_len)
 {
 	struct ieee80211_regdomain *rd = NULL;
 	char alpha2[2];
@@ -2028,7 +2062,7 @@ void regulatory_hint_11d(struct wiphy *wiphy,
 	    wiphy_idx_valid(last_request->wiphy_idx)))
 		goto out;
 
-	rd = country_ie_2_rd(country_ie, country_ie_len, &checksum);
+	rd = country_ie_2_rd(band, country_ie, country_ie_len, &checksum);
 	if (!rd) {
 		REG_DBG_PRINT("cfg80211: Ignoring bogus country IE\n");
 		goto out;
diff --git a/net/wireless/reg.h b/net/wireless/reg.h
index 3362c7c069b2..3018508226ab 100644
--- a/net/wireless/reg.h
+++ b/net/wireless/reg.h
@@ -41,14 +41,25 @@ int regulatory_hint_found_beacon(struct wiphy *wiphy,
  * regulatory_hint_11d - hints a country IE as a regulatory domain
  * @wiphy: the wireless device giving the hint (used only for reporting
  *	conflicts)
+ * @band: the band on which the country IE was received on. This determines
+ *	the band we'll process the country IE channel triplets for.
  * @country_ie: pointer to the country IE
  * @country_ie_len: length of the country IE
  *
  * We will intersect the rd with the what CRDA tells us should apply
  * for the alpha2 this country IE belongs to, this prevents APs from
  * sending us incorrect or outdated information against a country.
+ *
+ * The AP is expected to provide Country IE channel triplets for the
+ * band it is on. It is technically possible for APs to send channel
+ * country IE triplets even for channels outside of the band they are
+ * in but for that they would have to use the regulatory extension
+ * in combination with a triplet but this behaviour is currently
+ * not observed. For this reason if a triplet is seen with channel
+ * information for a band the BSS is not present in it will be ignored.
  */
 void regulatory_hint_11d(struct wiphy *wiphy,
+			 enum ieee80211_band band,
 			 u8 *country_ie,
 			 u8 country_ie_len);
 
diff --git a/net/wireless/sme.c b/net/wireless/sme.c
index 2333d78187e4..2ce5e1609a3d 100644
--- a/net/wireless/sme.c
+++ b/net/wireless/sme.c
@@ -454,6 +454,7 @@ void __cfg80211_connect_result(struct net_device *dev, const u8 *bssid,
 	 * - and country_ie[1] which is the IE length
 	 */
 	regulatory_hint_11d(wdev->wiphy,
+			    bss->channel->band,
 			    country_ie + 2,
 			    country_ie[1]);
 }
-- 
cgit v1.2.2


From 43d3534344bbdcfa9c61a6b38490cd4cbb2f6bb6 Mon Sep 17 00:00:00 2001
From: Felix Fietkau <nbd@openwrt.org>
Date: Fri, 15 Jan 2010 03:00:48 +0100
Subject: mac80211: force use_short_slot=true for 5GHz

Normally 5GHz does not have a concept of long vs short slot time,
however the slot time that it ends up using is the same as for 2.4 GHZ
and use_short_slot == true
Because of that, it makes more sense to force use_short_slot = true
whenever 5 GHz is being used, so that this particular check does not
have to be in every single driver that uses this flag.

Signed-off-by: Felix Fietkau <nbd@openwrt.org>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/cfg.c  | 7 +++++++
 net/mac80211/mlme.c | 2 ++
 2 files changed, 9 insertions(+)

(limited to 'net')

diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c
index 8286df5822d5..b0102c538b30 100644
--- a/net/mac80211/cfg.c
+++ b/net/mac80211/cfg.c
@@ -1085,6 +1085,13 @@ static int ieee80211_change_bss(struct wiphy *wiphy,
 			params->use_short_preamble;
 		changed |= BSS_CHANGED_ERP_PREAMBLE;
 	}
+
+	if (!sdata->vif.bss_conf.use_short_slot &&
+	    sdata->local->hw.conf.channel->band == IEEE80211_BAND_5GHZ) {
+		sdata->vif.bss_conf.use_short_slot = true;
+		changed |= BSS_CHANGED_ERP_SLOT;
+	}
+
 	if (params->use_short_slot_time >= 0) {
 		sdata->vif.bss_conf.use_short_slot =
 			params->use_short_slot_time;
diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index 2746391248d3..a82564e73d91 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -670,6 +670,8 @@ static u32 ieee80211_handle_bss_capability(struct ieee80211_sub_if_data *sdata,
 	}
 
 	use_short_slot = !!(capab & WLAN_CAPABILITY_SHORT_SLOT_TIME);
+	if (sdata->local->hw.conf.channel->band == IEEE80211_BAND_5GHZ)
+		use_short_slot = true;
 
 	if (use_protection != bss_conf->use_cts_prot) {
 		bss_conf->use_cts_prot = use_protection;
-- 
cgit v1.2.2


From 145b6d1a56f224d15c61aa7ecfda9a1171b47b6a Mon Sep 17 00:00:00 2001
From: Ming Lei <tom.leiming@gmail.com>
Date: Fri, 15 Jan 2010 21:44:21 +0800
Subject: mac80211: fix memory leak in ieee80211_send_probe_req

This patch fixes memory leak in ieee80211_send_probe_req, which
is introduced in 7c12ce8b854df346388ea56d684784e3484012cf:

	mac80211: use Probe Request template when sending a direct scan

The patch is against the latest wireless-test tree.

Signed-off-by: Ming Lei <tom.leiming@gmail.com>
Reviewed-by: Kalle Valo <kalle.valo@nokia.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/util.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'net')

diff --git a/net/mac80211/util.c b/net/mac80211/util.c
index e278f97c8305..ca170b417da6 100644
--- a/net/mac80211/util.c
+++ b/net/mac80211/util.c
@@ -1028,6 +1028,7 @@ void ieee80211_send_probe_req(struct ieee80211_sub_if_data *sdata, u8 *dst,
 
 	IEEE80211_SKB_CB(skb)->flags |= IEEE80211_TX_INTFL_DONT_ENCRYPT;
 	ieee80211_tx_skb(sdata, skb);
+	kfree(buf);
 }
 
 u32 ieee80211_sta_get_rates(struct ieee80211_local *local,
-- 
cgit v1.2.2


From 73c89c15b959adf06366722c4be8d2eddec0a529 Mon Sep 17 00:00:00 2001
From: Tobias Brunner <tobias@strongswan.org>
Date: Sun, 17 Jan 2010 21:52:11 +1100
Subject: crypto: gcm - Add RFC4543 wrapper for GCM

This patch adds the RFC4543 (GMAC) wrapper for GCM similar to the
existing RFC4106 wrapper. The main differences between GCM and GMAC are
the contents of the AAD and that the plaintext is empty for the latter.

Signed-off-by: Tobias Brunner <tobias@strongswan.org>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 net/xfrm/xfrm_algo.c | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)

(limited to 'net')

diff --git a/net/xfrm/xfrm_algo.c b/net/xfrm/xfrm_algo.c
index 743c0134a6a9..8b4d6e3246e5 100644
--- a/net/xfrm/xfrm_algo.c
+++ b/net/xfrm/xfrm_algo.c
@@ -125,6 +125,22 @@ static struct xfrm_algo_desc aead_list[] = {
 		.sadb_alg_maxbits = 256
 	}
 },
+{
+	.name = "rfc4543(gcm(aes))",
+
+	.uinfo = {
+		.aead = {
+			.icv_truncbits = 128,
+		}
+	},
+
+	.desc = {
+		.sadb_alg_id = SADB_X_EALG_NULL_AES_GMAC,
+		.sadb_alg_ivlen = 8,
+		.sadb_alg_minbits = 128,
+		.sadb_alg_maxbits = 256
+	}
+},
 };
 
 static struct xfrm_algo_desc aalg_list[] = {
-- 
cgit v1.2.2


From 72659ecce68588b74f6c46862c2b4cec137d7a5a Mon Sep 17 00:00:00 2001
From: Octavian Purdila <opurdila@ixiacom.com>
Date: Sun, 17 Jan 2010 19:09:39 -0800
Subject: tcp: account SYN-ACK timeouts & retransmissions

Currently we don't increment SYN-ACK timeouts & retransmissions
although we do increment the same stats for SYN. We seem to have lost
the SYN-ACK accounting with the introduction of tcp_syn_recv_timer
(commit 2248761e in the netdev-vger-cvs tree).

This patch fixes this issue. In the process we also rename the v4/v6
syn/ack retransmit functions for clarity. We also add a new
request_socket operations (syn_ack_timeout) so we can keep code in
inet_connection_sock.c protocol agnostic.

Signed-off-by: Octavian Purdila <opurdila@ixiacom.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/inet_connection_sock.c |  2 ++
 net/ipv4/tcp_ipv4.c             | 18 ++++++++++--------
 net/ipv4/tcp_timer.c            |  6 ++++++
 net/ipv6/tcp_ipv6.c             | 12 ++++++++++--
 4 files changed, 28 insertions(+), 10 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
index ee16475f8fc3..8da6429269dd 100644
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -529,6 +529,8 @@ void inet_csk_reqsk_queue_prune(struct sock *parent,
 				syn_ack_recalc(req, thresh, max_retries,
 					       queue->rskq_defer_accept,
 					       &expire, &resend);
+				if (req->rsk_ops->syn_ack_timeout)
+					req->rsk_ops->syn_ack_timeout(parent, req);
 				if (!expire &&
 				    (!resend ||
 				     !req->rsk_ops->rtx_syn_ack(parent, req, NULL) ||
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 382f667238ec..356f544c4c10 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -742,9 +742,9 @@ static void tcp_v4_reqsk_send_ack(struct sock *sk, struct sk_buff *skb,
  *	This still operates on a request_sock only, not on a big
  *	socket.
  */
-static int __tcp_v4_send_synack(struct sock *sk, struct dst_entry *dst,
-				struct request_sock *req,
-				struct request_values *rvp)
+static int tcp_v4_send_synack(struct sock *sk, struct dst_entry *dst,
+			      struct request_sock *req,
+			      struct request_values *rvp)
 {
 	const struct inet_request_sock *ireq = inet_rsk(req);
 	int err = -1;
@@ -775,10 +775,11 @@ static int __tcp_v4_send_synack(struct sock *sk, struct dst_entry *dst,
 	return err;
 }
 
-static int tcp_v4_send_synack(struct sock *sk, struct request_sock *req,
+static int tcp_v4_rtx_synack(struct sock *sk, struct request_sock *req,
 			      struct request_values *rvp)
 {
-	return __tcp_v4_send_synack(sk, NULL, req, rvp);
+	TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_RETRANSSEGS);
+	return tcp_v4_send_synack(sk, NULL, req, rvp);
 }
 
 /*
@@ -1192,10 +1193,11 @@ static int tcp_v4_inbound_md5_hash(struct sock *sk, struct sk_buff *skb)
 struct request_sock_ops tcp_request_sock_ops __read_mostly = {
 	.family		=	PF_INET,
 	.obj_size	=	sizeof(struct tcp_request_sock),
-	.rtx_syn_ack	=	tcp_v4_send_synack,
+	.rtx_syn_ack	=	tcp_v4_rtx_synack,
 	.send_ack	=	tcp_v4_reqsk_send_ack,
 	.destructor	=	tcp_v4_reqsk_destructor,
 	.send_reset	=	tcp_v4_send_reset,
+	.syn_ack_timeout = 	tcp_syn_ack_timeout,
 };
 
 #ifdef CONFIG_TCP_MD5SIG
@@ -1373,8 +1375,8 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
 	}
 	tcp_rsk(req)->snt_isn = isn;
 
-	if (__tcp_v4_send_synack(sk, dst, req,
-				 (struct request_values *)&tmp_ext) ||
+	if (tcp_v4_send_synack(sk, dst, req,
+			       (struct request_values *)&tmp_ext) ||
 	    want_cookie)
 		goto drop_and_free;
 
diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c
index 8816a20c2597..de7d1bf9114f 100644
--- a/net/ipv4/tcp_timer.c
+++ b/net/ipv4/tcp_timer.c
@@ -474,6 +474,12 @@ static void tcp_synack_timer(struct sock *sk)
 				   TCP_TIMEOUT_INIT, TCP_RTO_MAX);
 }
 
+void tcp_syn_ack_timeout(struct sock *sk, struct request_sock *req)
+{
+	NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPTIMEOUTS);
+}
+EXPORT_SYMBOL(tcp_syn_ack_timeout);
+
 void tcp_set_keepalive(struct sock *sk, int val)
 {
 	if ((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN))
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 1c832bf198b3..82f2dea0e39e 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -520,6 +520,13 @@ done:
 	return err;
 }
 
+static int tcp_v6_rtx_synack(struct sock *sk, struct request_sock *req,
+			     struct request_values *rvp)
+{
+	TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_RETRANSSEGS);
+	return tcp_v6_send_synack(sk, req, rvp);
+}
+
 static inline void syn_flood_warning(struct sk_buff *skb)
 {
 #ifdef CONFIG_SYN_COOKIES
@@ -890,10 +897,11 @@ static int tcp_v6_inbound_md5_hash (struct sock *sk, struct sk_buff *skb)
 struct request_sock_ops tcp6_request_sock_ops __read_mostly = {
 	.family		=	AF_INET6,
 	.obj_size	=	sizeof(struct tcp6_request_sock),
-	.rtx_syn_ack	=	tcp_v6_send_synack,
+	.rtx_syn_ack	=	tcp_v6_rtx_synack,
 	.send_ack	=	tcp_v6_reqsk_send_ack,
 	.destructor	=	tcp_v6_reqsk_destructor,
-	.send_reset	=	tcp_v6_send_reset
+	.send_reset	=	tcp_v6_send_reset,
+	.syn_ack_timeout = 	tcp_syn_ack_timeout,
 };
 
 #ifdef CONFIG_TCP_MD5SIG
-- 
cgit v1.2.2


From 2c8c1e7297e19bdef3c178c3ea41d898a7716e3e Mon Sep 17 00:00:00 2001
From: Alexey Dobriyan <adobriyan@gmail.com>
Date: Sun, 17 Jan 2010 03:35:32 +0000
Subject: net: spread __net_init, __net_exit

__net_init/__net_exit are apparently not going away, so use them
to full extent.

In some cases __net_init was removed, because it was called from
__net_exit code.

Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/8021q/vlan.c           |  4 ++--
 net/8021q/vlanproc.c       |  2 +-
 net/bridge/br_if.c         |  2 +-
 net/core/fib_rules.c       |  2 +-
 net/core/rtnetlink.c       |  4 ++--
 net/core/sock.c            |  4 ++--
 net/dccp/ipv4.c            |  4 ++--
 net/dccp/ipv6.c            |  4 ++--
 net/ipv4/fib_frontend.c    |  4 ++--
 net/ipv4/igmp.c            |  4 ++--
 net/ipv4/ip_fragment.c     |  8 ++++----
 net/ipv4/ip_gre.c          |  4 ++--
 net/ipv4/ipip.c            |  7 +++----
 net/ipv4/tcp_ipv4.c        |  4 ++--
 net/ipv4/udp.c             |  4 ++--
 net/ipv4/udplite.c         |  4 ++--
 net/ipv6/addrconf.c        |  8 ++++----
 net/ipv6/af_inet6.c        |  4 ++--
 net/ipv6/anycast.c         |  2 +-
 net/ipv6/fib6_rules.c      |  4 ++--
 net/ipv6/icmp.c            |  2 +-
 net/ipv6/ip6_fib.c         |  6 +++---
 net/ipv6/ip6_flowlabel.c   |  9 ++++-----
 net/ipv6/ip6_tunnel.c      |  9 ++++-----
 net/ipv6/mcast.c           | 13 ++++++-------
 net/ipv6/ndisc.c           |  4 ++--
 net/ipv6/proc.c            |  4 ++--
 net/ipv6/raw.c             |  4 ++--
 net/ipv6/reassembly.c      |  8 ++++----
 net/ipv6/route.c           |  6 +++---
 net/ipv6/sit.c             |  9 ++++-----
 net/ipv6/sysctl_net_ipv6.c |  4 ++--
 net/ipv6/tcp_ipv6.c        |  8 ++++----
 net/ipv6/udp.c             |  2 +-
 net/ipv6/udplite.c         |  4 ++--
 net/key/af_key.c           |  6 +++---
 net/packet/af_packet.c     |  4 ++--
 net/phonet/pn_dev.c        |  4 ++--
 net/sysctl_net.c           |  4 ++--
 net/unix/af_unix.c         |  4 ++--
 net/unix/sysctl_net_unix.c |  2 +-
 net/wireless/wext-proc.c   |  4 ++--
 net/xfrm/xfrm_sysctl.c     |  4 ++--
 43 files changed, 101 insertions(+), 106 deletions(-)

(limited to 'net')

diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c
index 33f90e7362cc..453512266ea1 100644
--- a/net/8021q/vlan.c
+++ b/net/8021q/vlan.c
@@ -663,7 +663,7 @@ out:
 	return err;
 }
 
-static int vlan_init_net(struct net *net)
+static int __net_init vlan_init_net(struct net *net)
 {
 	struct vlan_net *vn = net_generic(net, vlan_net_id);
 	int err;
@@ -675,7 +675,7 @@ static int vlan_init_net(struct net *net)
 	return err;
 }
 
-static void vlan_exit_net(struct net *net)
+static void __net_exit vlan_exit_net(struct net *net)
 {
 	vlan_proc_cleanup(net);
 }
diff --git a/net/8021q/vlanproc.c b/net/8021q/vlanproc.c
index 9ec1f057c03a..afead353e215 100644
--- a/net/8021q/vlanproc.c
+++ b/net/8021q/vlanproc.c
@@ -140,7 +140,7 @@ void vlan_proc_cleanup(struct net *net)
  *	Create /proc/net/vlan entries
  */
 
-int vlan_proc_init(struct net *net)
+int __net_init vlan_proc_init(struct net *net)
 {
 	struct vlan_net *vn = net_generic(net, vlan_net_id);
 
diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c
index a2cbe61f6e65..7bc0604069c7 100644
--- a/net/bridge/br_if.c
+++ b/net/bridge/br_if.c
@@ -467,7 +467,7 @@ int br_del_if(struct net_bridge *br, struct net_device *dev)
 	return 0;
 }
 
-void br_net_exit(struct net *net)
+void __net_exit br_net_exit(struct net *net)
 {
 	struct net_device *dev;
 	LIST_HEAD(list);
diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c
index 02a3b2c69c1e..9a24377146bf 100644
--- a/net/core/fib_rules.c
+++ b/net/core/fib_rules.c
@@ -708,7 +708,7 @@ static struct notifier_block fib_rules_notifier = {
 	.notifier_call = fib_rules_event,
 };
 
-static int fib_rules_net_init(struct net *net)
+static int __net_init fib_rules_net_init(struct net *net)
 {
 	INIT_LIST_HEAD(&net->rules_ops);
 	spin_lock_init(&net->rules_mod_lock);
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 794bcb897ff0..62f3878a6010 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -1386,7 +1386,7 @@ static struct notifier_block rtnetlink_dev_notifier = {
 };
 
 
-static int rtnetlink_net_init(struct net *net)
+static int __net_init rtnetlink_net_init(struct net *net)
 {
 	struct sock *sk;
 	sk = netlink_kernel_create(net, NETLINK_ROUTE, RTNLGRP_MAX,
@@ -1397,7 +1397,7 @@ static int rtnetlink_net_init(struct net *net)
 	return 0;
 }
 
-static void rtnetlink_net_exit(struct net *net)
+static void __net_exit rtnetlink_net_exit(struct net *net)
 {
 	netlink_kernel_release(net->rtnl);
 	net->rtnl = NULL;
diff --git a/net/core/sock.c b/net/core/sock.c
index 10b1d3243a72..ceef50bd131b 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -2140,13 +2140,13 @@ int sock_prot_inuse_get(struct net *net, struct proto *prot)
 }
 EXPORT_SYMBOL_GPL(sock_prot_inuse_get);
 
-static int sock_inuse_init_net(struct net *net)
+static int __net_init sock_inuse_init_net(struct net *net)
 {
 	net->core.inuse = alloc_percpu(struct prot_inuse);
 	return net->core.inuse ? 0 : -ENOMEM;
 }
 
-static void sock_inuse_exit_net(struct net *net)
+static void __net_exit sock_inuse_exit_net(struct net *net)
 {
 	free_percpu(net->core.inuse);
 }
diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c
index dad7bc4878e0..b195c4feaa0a 100644
--- a/net/dccp/ipv4.c
+++ b/net/dccp/ipv4.c
@@ -996,7 +996,7 @@ static struct inet_protosw dccp_v4_protosw = {
 	.flags		= INET_PROTOSW_ICSK,
 };
 
-static int dccp_v4_init_net(struct net *net)
+static int __net_init dccp_v4_init_net(struct net *net)
 {
 	int err;
 
@@ -1005,7 +1005,7 @@ static int dccp_v4_init_net(struct net *net)
 	return err;
 }
 
-static void dccp_v4_exit_net(struct net *net)
+static void __net_exit dccp_v4_exit_net(struct net *net)
 {
 	inet_ctl_sock_destroy(net->dccp.v4_ctl_sk);
 }
diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c
index baf05cf43c28..1aec6349e858 100644
--- a/net/dccp/ipv6.c
+++ b/net/dccp/ipv6.c
@@ -1189,7 +1189,7 @@ static struct inet_protosw dccp_v6_protosw = {
 	.flags		= INET_PROTOSW_ICSK,
 };
 
-static int dccp_v6_init_net(struct net *net)
+static int __net_init dccp_v6_init_net(struct net *net)
 {
 	int err;
 
@@ -1198,7 +1198,7 @@ static int dccp_v6_init_net(struct net *net)
 	return err;
 }
 
-static void dccp_v6_exit_net(struct net *net)
+static void __net_exit dccp_v6_exit_net(struct net *net)
 {
 	inet_ctl_sock_destroy(net->dccp.v6_ctl_sk);
 }
diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index 82dbf711d6d0..9b3e28ed5240 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -883,7 +883,7 @@ static void nl_fib_input(struct sk_buff *skb)
 	netlink_unicast(net->ipv4.fibnl, skb, pid, MSG_DONTWAIT);
 }
 
-static int nl_fib_lookup_init(struct net *net)
+static int __net_init nl_fib_lookup_init(struct net *net)
 {
 	struct sock *sk;
 	sk = netlink_kernel_create(net, NETLINK_FIB_LOOKUP, 0,
@@ -1004,7 +1004,7 @@ fail:
 	return err;
 }
 
-static void __net_exit ip_fib_net_exit(struct net *net)
+static void ip_fib_net_exit(struct net *net)
 {
 	unsigned int i;
 
diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c
index 76c08402c933..8f5468393f01 100644
--- a/net/ipv4/igmp.c
+++ b/net/ipv4/igmp.c
@@ -2603,7 +2603,7 @@ static const struct file_operations igmp_mcf_seq_fops = {
 	.release	=	seq_release_net,
 };
 
-static int igmp_net_init(struct net *net)
+static int __net_init igmp_net_init(struct net *net)
 {
 	struct proc_dir_entry *pde;
 
@@ -2621,7 +2621,7 @@ out_igmp:
 	return -ENOMEM;
 }
 
-static void igmp_net_exit(struct net *net)
+static void __net_exit igmp_net_exit(struct net *net)
 {
 	proc_net_remove(net, "mcfilter");
 	proc_net_remove(net, "igmp");
diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c
index 86964b353c31..891c72aea520 100644
--- a/net/ipv4/ip_fragment.c
+++ b/net/ipv4/ip_fragment.c
@@ -646,7 +646,7 @@ static struct ctl_table ip4_frags_ctl_table[] = {
 	{ }
 };
 
-static int ip4_frags_ns_ctl_register(struct net *net)
+static int __net_init ip4_frags_ns_ctl_register(struct net *net)
 {
 	struct ctl_table *table;
 	struct ctl_table_header *hdr;
@@ -676,7 +676,7 @@ err_alloc:
 	return -ENOMEM;
 }
 
-static void ip4_frags_ns_ctl_unregister(struct net *net)
+static void __net_exit ip4_frags_ns_ctl_unregister(struct net *net)
 {
 	struct ctl_table *table;
 
@@ -704,7 +704,7 @@ static inline void ip4_frags_ctl_register(void)
 }
 #endif
 
-static int ipv4_frags_init_net(struct net *net)
+static int __net_init ipv4_frags_init_net(struct net *net)
 {
 	/*
 	 * Fragment cache limits. We will commit 256K at one time. Should we
@@ -726,7 +726,7 @@ static int ipv4_frags_init_net(struct net *net)
 	return ip4_frags_ns_ctl_register(net);
 }
 
-static void ipv4_frags_exit_net(struct net *net)
+static void __net_exit ipv4_frags_exit_net(struct net *net)
 {
 	ip4_frags_ns_ctl_unregister(net);
 	inet_frags_exit_net(&net->ipv4.frags, &ip4_frags);
diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index f36ce156cac6..7631b20490f5 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -1307,7 +1307,7 @@ static void ipgre_destroy_tunnels(struct ipgre_net *ign, struct list_head *head)
 	}
 }
 
-static int ipgre_init_net(struct net *net)
+static int __net_init ipgre_init_net(struct net *net)
 {
 	struct ipgre_net *ign = net_generic(net, ipgre_net_id);
 	int err;
@@ -1334,7 +1334,7 @@ err_alloc_dev:
 	return err;
 }
 
-static void ipgre_exit_net(struct net *net)
+static void __net_exit ipgre_exit_net(struct net *net)
 {
 	struct ipgre_net *ign;
 	LIST_HEAD(list);
diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c
index eda04fed3379..95db732e542b 100644
--- a/net/ipv4/ipip.c
+++ b/net/ipv4/ipip.c
@@ -130,7 +130,6 @@ struct ipip_net {
 	struct net_device *fb_tunnel_dev;
 };
 
-static void ipip_fb_tunnel_init(struct net_device *dev);
 static void ipip_tunnel_init(struct net_device *dev);
 static void ipip_tunnel_setup(struct net_device *dev);
 
@@ -730,7 +729,7 @@ static void ipip_tunnel_init(struct net_device *dev)
 	ipip_tunnel_bind_dev(dev);
 }
 
-static void ipip_fb_tunnel_init(struct net_device *dev)
+static void __net_init ipip_fb_tunnel_init(struct net_device *dev)
 {
 	struct ip_tunnel *tunnel = netdev_priv(dev);
 	struct iphdr *iph = &tunnel->parms.iph;
@@ -773,7 +772,7 @@ static void ipip_destroy_tunnels(struct ipip_net *ipn, struct list_head *head)
 	}
 }
 
-static int ipip_init_net(struct net *net)
+static int __net_init ipip_init_net(struct net *net)
 {
 	struct ipip_net *ipn = net_generic(net, ipip_net_id);
 	int err;
@@ -806,7 +805,7 @@ err_alloc_dev:
 	return err;
 }
 
-static void ipip_exit_net(struct net *net)
+static void __net_exit ipip_exit_net(struct net *net)
 {
 	struct ipip_net *ipn = net_generic(net, ipip_net_id);
 	LIST_HEAD(list);
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 356f544c4c10..c3588b4fd979 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -2430,12 +2430,12 @@ static struct tcp_seq_afinfo tcp4_seq_afinfo = {
 	},
 };
 
-static int tcp4_proc_init_net(struct net *net)
+static int __net_init tcp4_proc_init_net(struct net *net)
 {
 	return tcp_proc_register(net, &tcp4_seq_afinfo);
 }
 
-static void tcp4_proc_exit_net(struct net *net)
+static void __net_exit tcp4_proc_exit_net(struct net *net)
 {
 	tcp_proc_unregister(net, &tcp4_seq_afinfo);
 }
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index f0126fdd7e04..4f7d2122d818 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -2027,12 +2027,12 @@ static struct udp_seq_afinfo udp4_seq_afinfo = {
 	},
 };
 
-static int udp4_proc_init_net(struct net *net)
+static int __net_init udp4_proc_init_net(struct net *net)
 {
 	return udp_proc_register(net, &udp4_seq_afinfo);
 }
 
-static void udp4_proc_exit_net(struct net *net)
+static void __net_exit udp4_proc_exit_net(struct net *net)
 {
 	udp_proc_unregister(net, &udp4_seq_afinfo);
 }
diff --git a/net/ipv4/udplite.c b/net/ipv4/udplite.c
index 66f79513f4a5..6610bf76369f 100644
--- a/net/ipv4/udplite.c
+++ b/net/ipv4/udplite.c
@@ -81,12 +81,12 @@ static struct udp_seq_afinfo udplite4_seq_afinfo = {
 	},
 };
 
-static int udplite4_proc_init_net(struct net *net)
+static int __net_init udplite4_proc_init_net(struct net *net)
 {
 	return udp_proc_register(net, &udplite4_seq_afinfo);
 }
 
-static void udplite4_proc_exit_net(struct net *net)
+static void __net_exit udplite4_proc_exit_net(struct net *net)
 {
 	udp_proc_unregister(net, &udplite4_seq_afinfo);
 }
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index de7a194a64ab..1593289155ff 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -3027,14 +3027,14 @@ static const struct file_operations if6_fops = {
 	.release	= seq_release_net,
 };
 
-static int if6_proc_net_init(struct net *net)
+static int __net_init if6_proc_net_init(struct net *net)
 {
 	if (!proc_net_fops_create(net, "if_inet6", S_IRUGO, &if6_fops))
 		return -ENOMEM;
 	return 0;
 }
 
-static void if6_proc_net_exit(struct net *net)
+static void __net_exit if6_proc_net_exit(struct net *net)
 {
        proc_net_remove(net, "if_inet6");
 }
@@ -4418,7 +4418,7 @@ static void addrconf_sysctl_unregister(struct inet6_dev *idev)
 
 #endif
 
-static int addrconf_init_net(struct net *net)
+static int __net_init addrconf_init_net(struct net *net)
 {
 	int err;
 	struct ipv6_devconf *all, *dflt;
@@ -4467,7 +4467,7 @@ err_alloc_all:
 	return err;
 }
 
-static void addrconf_exit_net(struct net *net)
+static void __net_exit addrconf_exit_net(struct net *net)
 {
 #ifdef CONFIG_SYSCTL
 	__addrconf_sysctl_unregister(net->ipv6.devconf_dflt);
diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index 12e69d364dd5..e29160ff4a0f 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -999,7 +999,7 @@ err_udplite_mib:
 	return -ENOMEM;
 }
 
-static void __net_exit ipv6_cleanup_mibs(struct net *net)
+static void ipv6_cleanup_mibs(struct net *net)
 {
 	snmp_mib_free((void **)net->mib.udp_stats_in6);
 	snmp_mib_free((void **)net->mib.udplite_stats_in6);
@@ -1042,7 +1042,7 @@ out:
 #endif
 }
 
-static void inet6_net_exit(struct net *net)
+static void __net_exit inet6_net_exit(struct net *net)
 {
 #ifdef CONFIG_PROC_FS
 	udp6_proc_exit(net);
diff --git a/net/ipv6/anycast.c b/net/ipv6/anycast.c
index f1c74c8ef9de..c4f6ca32fa74 100644
--- a/net/ipv6/anycast.c
+++ b/net/ipv6/anycast.c
@@ -538,7 +538,7 @@ static const struct file_operations ac6_seq_fops = {
 	.release	=	seq_release_net,
 };
 
-int ac6_proc_init(struct net *net)
+int __net_init ac6_proc_init(struct net *net)
 {
 	if (!proc_net_fops_create(net, "anycast6", S_IRUGO, &ac6_seq_fops))
 		return -ENOMEM;
diff --git a/net/ipv6/fib6_rules.c b/net/ipv6/fib6_rules.c
index b7aa7c64cc4a..551882b9dfd6 100644
--- a/net/ipv6/fib6_rules.c
+++ b/net/ipv6/fib6_rules.c
@@ -262,7 +262,7 @@ static struct fib_rules_ops fib6_rules_ops_template = {
 	.fro_net		= &init_net,
 };
 
-static int fib6_rules_net_init(struct net *net)
+static int __net_init fib6_rules_net_init(struct net *net)
 {
 	struct fib_rules_ops *ops;
 	int err = -ENOMEM;
@@ -291,7 +291,7 @@ out_fib6_rules_ops:
 	goto out;
 }
 
-static void fib6_rules_net_exit(struct net *net)
+static void __net_exit fib6_rules_net_exit(struct net *net)
 {
 	fib_rules_unregister(net->ipv6.fib6_rules_ops);
 }
diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c
index 4ae661bc3677..217dbc2e28d4 100644
--- a/net/ipv6/icmp.c
+++ b/net/ipv6/icmp.c
@@ -951,7 +951,7 @@ ctl_table ipv6_icmp_table_template[] = {
 	{ },
 };
 
-struct ctl_table *ipv6_icmp_sysctl_init(struct net *net)
+struct ctl_table * __net_init ipv6_icmp_sysctl_init(struct net *net)
 {
 	struct ctl_table *table;
 
diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c
index 0e93ca56eb69..f626ea2b304f 100644
--- a/net/ipv6/ip6_fib.c
+++ b/net/ipv6/ip6_fib.c
@@ -239,7 +239,7 @@ struct fib6_table *fib6_get_table(struct net *net, u32 id)
 	return NULL;
 }
 
-static void fib6_tables_init(struct net *net)
+static void __net_init fib6_tables_init(struct net *net)
 {
 	fib6_link_table(net, net->ipv6.fib6_main_tbl);
 	fib6_link_table(net, net->ipv6.fib6_local_tbl);
@@ -262,7 +262,7 @@ struct dst_entry *fib6_rule_lookup(struct net *net, struct flowi *fl,
 	return (struct dst_entry *) lookup(net, net->ipv6.fib6_main_tbl, fl, flags);
 }
 
-static void fib6_tables_init(struct net *net)
+static void __net_init fib6_tables_init(struct net *net)
 {
 	fib6_link_table(net, net->ipv6.fib6_main_tbl);
 }
@@ -1469,7 +1469,7 @@ static void fib6_gc_timer_cb(unsigned long arg)
 	fib6_run_gc(0, (struct net *)arg);
 }
 
-static int fib6_net_init(struct net *net)
+static int __net_init fib6_net_init(struct net *net)
 {
 	setup_timer(&net->ipv6.ip6_fib_timer, fib6_gc_timer_cb, (unsigned long)net);
 
diff --git a/net/ipv6/ip6_flowlabel.c b/net/ipv6/ip6_flowlabel.c
index 6e7bffa2205e..e41eba8aacf1 100644
--- a/net/ipv6/ip6_flowlabel.c
+++ b/net/ipv6/ip6_flowlabel.c
@@ -154,7 +154,7 @@ static void ip6_fl_gc(unsigned long dummy)
 	write_unlock(&ip6_fl_lock);
 }
 
-static void ip6_fl_purge(struct net *net)
+static void __net_exit ip6_fl_purge(struct net *net)
 {
 	int i;
 
@@ -735,7 +735,7 @@ static const struct file_operations ip6fl_seq_fops = {
 	.release	=	seq_release_net,
 };
 
-static int ip6_flowlabel_proc_init(struct net *net)
+static int __net_init ip6_flowlabel_proc_init(struct net *net)
 {
 	if (!proc_net_fops_create(net, "ip6_flowlabel",
 				  S_IRUGO, &ip6fl_seq_fops))
@@ -743,7 +743,7 @@ static int ip6_flowlabel_proc_init(struct net *net)
 	return 0;
 }
 
-static void ip6_flowlabel_proc_fini(struct net *net)
+static void __net_exit ip6_flowlabel_proc_fini(struct net *net)
 {
 	proc_net_remove(net, "ip6_flowlabel");
 }
@@ -754,11 +754,10 @@ static inline int ip6_flowlabel_proc_init(struct net *net)
 }
 static inline void ip6_flowlabel_proc_fini(struct net *net)
 {
-	return ;
 }
 #endif
 
-static inline void ip6_flowlabel_net_exit(struct net *net)
+static void __net_exit ip6_flowlabel_net_exit(struct net *net)
 {
 	ip6_fl_purge(net);
 	ip6_flowlabel_proc_fini(net);
diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c
index d453d07b0dfe..fbd786981aa9 100644
--- a/net/ipv6/ip6_tunnel.c
+++ b/net/ipv6/ip6_tunnel.c
@@ -74,7 +74,6 @@ MODULE_LICENSE("GPL");
 		     (addr)->s6_addr32[2] ^ (addr)->s6_addr32[3]) & \
 		    (HASH_SIZE - 1))
 
-static void ip6_fb_tnl_dev_init(struct net_device *dev);
 static void ip6_tnl_dev_init(struct net_device *dev);
 static void ip6_tnl_dev_setup(struct net_device *dev);
 
@@ -1364,7 +1363,7 @@ static void ip6_tnl_dev_init(struct net_device *dev)
  * Return: 0
  **/
 
-static void ip6_fb_tnl_dev_init(struct net_device *dev)
+static void __net_init ip6_fb_tnl_dev_init(struct net_device *dev)
 {
 	struct ip6_tnl *t = netdev_priv(dev);
 	struct net *net = dev_net(dev);
@@ -1388,7 +1387,7 @@ static struct xfrm6_tunnel ip6ip6_handler = {
 	.priority	=	1,
 };
 
-static void ip6_tnl_destroy_tunnels(struct ip6_tnl_net *ip6n)
+static void __net_exit ip6_tnl_destroy_tunnels(struct ip6_tnl_net *ip6n)
 {
 	int h;
 	struct ip6_tnl *t;
@@ -1407,7 +1406,7 @@ static void ip6_tnl_destroy_tunnels(struct ip6_tnl_net *ip6n)
 	unregister_netdevice_many(&list);
 }
 
-static int ip6_tnl_init_net(struct net *net)
+static int __net_init ip6_tnl_init_net(struct net *net)
 {
 	struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id);
 	int err;
@@ -1436,7 +1435,7 @@ err_alloc_dev:
 	return err;
 }
 
-static void ip6_tnl_exit_net(struct net *net)
+static void __net_exit ip6_tnl_exit_net(struct net *net)
 {
 	struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id);
 
diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c
index 1f9c44442e65..25f6cca79e6b 100644
--- a/net/ipv6/mcast.c
+++ b/net/ipv6/mcast.c
@@ -2646,7 +2646,7 @@ static const struct file_operations igmp6_mcf_seq_fops = {
 	.release	=	seq_release_net,
 };
 
-static int igmp6_proc_init(struct net *net)
+static int __net_init igmp6_proc_init(struct net *net)
 {
 	int err;
 
@@ -2666,23 +2666,22 @@ out_proc_net_igmp6:
 	goto out;
 }
 
-static void igmp6_proc_exit(struct net *net)
+static void __net_exit igmp6_proc_exit(struct net *net)
 {
 	proc_net_remove(net, "mcfilter6");
 	proc_net_remove(net, "igmp6");
 }
 #else
-static int igmp6_proc_init(struct net *net)
+static inline int igmp6_proc_init(struct net *net)
 {
 	return 0;
 }
-static void igmp6_proc_exit(struct net *net)
+static inline void igmp6_proc_exit(struct net *net)
 {
-	;
 }
 #endif
 
-static int igmp6_net_init(struct net *net)
+static int __net_init igmp6_net_init(struct net *net)
 {
 	int err;
 
@@ -2708,7 +2707,7 @@ out_sock_create:
 	goto out;
 }
 
-static void igmp6_net_exit(struct net *net)
+static void __net_exit igmp6_net_exit(struct net *net)
 {
 	inet_ctl_sock_destroy(net->ipv6.igmp_sk);
 	igmp6_proc_exit(net);
diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c
index c45852798092..2dfec6bb8ada 100644
--- a/net/ipv6/ndisc.c
+++ b/net/ipv6/ndisc.c
@@ -1772,7 +1772,7 @@ int ndisc_ifinfo_sysctl_change(struct ctl_table *ctl, int write, void __user *bu
 
 #endif
 
-static int ndisc_net_init(struct net *net)
+static int __net_init ndisc_net_init(struct net *net)
 {
 	struct ipv6_pinfo *np;
 	struct sock *sk;
@@ -1797,7 +1797,7 @@ static int ndisc_net_init(struct net *net)
 	return 0;
 }
 
-static void ndisc_net_exit(struct net *net)
+static void __net_exit ndisc_net_exit(struct net *net)
 {
 	inet_ctl_sock_destroy(net->ipv6.ndisc_sk);
 }
diff --git a/net/ipv6/proc.c b/net/ipv6/proc.c
index 7b197b7132e0..02f20016b4c9 100644
--- a/net/ipv6/proc.c
+++ b/net/ipv6/proc.c
@@ -267,7 +267,7 @@ int snmp6_unregister_dev(struct inet6_dev *idev)
 	return 0;
 }
 
-static int ipv6_proc_init_net(struct net *net)
+static int __net_init ipv6_proc_init_net(struct net *net)
 {
 	if (!proc_net_fops_create(net, "sockstat6", S_IRUGO,
 			&sockstat6_seq_fops))
@@ -288,7 +288,7 @@ proc_dev_snmp6_fail:
 	return -ENOMEM;
 }
 
-static void ipv6_proc_exit_net(struct net *net)
+static void __net_exit ipv6_proc_exit_net(struct net *net)
 {
 	proc_net_remove(net, "sockstat6");
 	proc_net_remove(net, "dev_snmp6");
diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c
index 926ce8eeffaf..ed31c37c6e39 100644
--- a/net/ipv6/raw.c
+++ b/net/ipv6/raw.c
@@ -1275,7 +1275,7 @@ static const struct file_operations raw6_seq_fops = {
 	.release =	seq_release_net,
 };
 
-static int raw6_init_net(struct net *net)
+static int __net_init raw6_init_net(struct net *net)
 {
 	if (!proc_net_fops_create(net, "raw6", S_IRUGO, &raw6_seq_fops))
 		return -ENOMEM;
@@ -1283,7 +1283,7 @@ static int raw6_init_net(struct net *net)
 	return 0;
 }
 
-static void raw6_exit_net(struct net *net)
+static void __net_exit raw6_exit_net(struct net *net)
 {
 	proc_net_remove(net, "raw6");
 }
diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c
index 2cddea3bd6be..fa38fc7cc6e9 100644
--- a/net/ipv6/reassembly.c
+++ b/net/ipv6/reassembly.c
@@ -672,7 +672,7 @@ static struct ctl_table ip6_frags_ctl_table[] = {
 	{ }
 };
 
-static int ip6_frags_ns_sysctl_register(struct net *net)
+static int __net_init ip6_frags_ns_sysctl_register(struct net *net)
 {
 	struct ctl_table *table;
 	struct ctl_table_header *hdr;
@@ -702,7 +702,7 @@ err_alloc:
 	return -ENOMEM;
 }
 
-static void ip6_frags_ns_sysctl_unregister(struct net *net)
+static void __net_exit ip6_frags_ns_sysctl_unregister(struct net *net)
 {
 	struct ctl_table *table;
 
@@ -745,7 +745,7 @@ static inline void ip6_frags_sysctl_unregister(void)
 }
 #endif
 
-static int ipv6_frags_init_net(struct net *net)
+static int __net_init ipv6_frags_init_net(struct net *net)
 {
 	net->ipv6.frags.high_thresh = 256 * 1024;
 	net->ipv6.frags.low_thresh = 192 * 1024;
@@ -756,7 +756,7 @@ static int ipv6_frags_init_net(struct net *net)
 	return ip6_frags_ns_sysctl_register(net);
 }
 
-static void ipv6_frags_exit_net(struct net *net)
+static void __net_exit ipv6_frags_exit_net(struct net *net)
 {
 	ip6_frags_ns_sysctl_unregister(net);
 	inet_frags_exit_net(&net->ipv6.frags, &ip6_frags);
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index c2bd74c5f8d9..8500156f2637 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -2612,7 +2612,7 @@ ctl_table ipv6_route_table_template[] = {
 	{ }
 };
 
-struct ctl_table *ipv6_route_sysctl_init(struct net *net)
+struct ctl_table * __net_init ipv6_route_sysctl_init(struct net *net)
 {
 	struct ctl_table *table;
 
@@ -2637,7 +2637,7 @@ struct ctl_table *ipv6_route_sysctl_init(struct net *net)
 }
 #endif
 
-static int ip6_route_net_init(struct net *net)
+static int __net_init ip6_route_net_init(struct net *net)
 {
 	int ret = -ENOMEM;
 
@@ -2702,7 +2702,7 @@ out_ip6_dst_ops:
 	goto out;
 }
 
-static void ip6_route_net_exit(struct net *net)
+static void __net_exit ip6_route_net_exit(struct net *net)
 {
 #ifdef CONFIG_PROC_FS
 	proc_net_remove(net, "ipv6_route");
diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c
index 976e68244b99..10207cc8cc0e 100644
--- a/net/ipv6/sit.c
+++ b/net/ipv6/sit.c
@@ -62,7 +62,6 @@
 #define HASH_SIZE  16
 #define HASH(addr) (((__force u32)addr^((__force u32)addr>>4))&0xF)
 
-static void ipip6_fb_tunnel_init(struct net_device *dev);
 static void ipip6_tunnel_init(struct net_device *dev);
 static void ipip6_tunnel_setup(struct net_device *dev);
 
@@ -1120,7 +1119,7 @@ static void ipip6_tunnel_init(struct net_device *dev)
 	ipip6_tunnel_bind_dev(dev);
 }
 
-static void ipip6_fb_tunnel_init(struct net_device *dev)
+static void __net_init ipip6_fb_tunnel_init(struct net_device *dev)
 {
 	struct ip_tunnel *tunnel = netdev_priv(dev);
 	struct iphdr *iph = &tunnel->parms.iph;
@@ -1145,7 +1144,7 @@ static struct xfrm_tunnel sit_handler = {
 	.priority	=	1,
 };
 
-static void sit_destroy_tunnels(struct sit_net *sitn, struct list_head *head)
+static void __net_exit sit_destroy_tunnels(struct sit_net *sitn, struct list_head *head)
 {
 	int prio;
 
@@ -1162,7 +1161,7 @@ static void sit_destroy_tunnels(struct sit_net *sitn, struct list_head *head)
 	}
 }
 
-static int sit_init_net(struct net *net)
+static int __net_init sit_init_net(struct net *net)
 {
 	struct sit_net *sitn = net_generic(net, sit_net_id);
 	int err;
@@ -1195,7 +1194,7 @@ err_alloc_dev:
 	return err;
 }
 
-static void sit_exit_net(struct net *net)
+static void __net_exit sit_exit_net(struct net *net)
 {
 	struct sit_net *sitn = net_generic(net, sit_net_id);
 	LIST_HEAD(list);
diff --git a/net/ipv6/sysctl_net_ipv6.c b/net/ipv6/sysctl_net_ipv6.c
index c690736885b4..f841d93bf987 100644
--- a/net/ipv6/sysctl_net_ipv6.c
+++ b/net/ipv6/sysctl_net_ipv6.c
@@ -55,7 +55,7 @@ struct ctl_path net_ipv6_ctl_path[] = {
 };
 EXPORT_SYMBOL_GPL(net_ipv6_ctl_path);
 
-static int ipv6_sysctl_net_init(struct net *net)
+static int __net_init ipv6_sysctl_net_init(struct net *net)
 {
 	struct ctl_table *ipv6_table;
 	struct ctl_table *ipv6_route_table;
@@ -98,7 +98,7 @@ out_ipv6_table:
 	goto out;
 }
 
-static void ipv6_sysctl_net_exit(struct net *net)
+static void __net_exit ipv6_sysctl_net_exit(struct net *net)
 {
 	struct ctl_table *ipv6_table;
 	struct ctl_table *ipv6_route_table;
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 82f2dea0e39e..6963a6b6763e 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -2113,7 +2113,7 @@ static struct tcp_seq_afinfo tcp6_seq_afinfo = {
 	},
 };
 
-int tcp6_proc_init(struct net *net)
+int __net_init tcp6_proc_init(struct net *net)
 {
 	return tcp_proc_register(net, &tcp6_seq_afinfo);
 }
@@ -2182,18 +2182,18 @@ static struct inet_protosw tcpv6_protosw = {
 				INET_PROTOSW_ICSK,
 };
 
-static int tcpv6_net_init(struct net *net)
+static int __net_init tcpv6_net_init(struct net *net)
 {
 	return inet_ctl_sock_create(&net->ipv6.tcp_sk, PF_INET6,
 				    SOCK_RAW, IPPROTO_TCP, net);
 }
 
-static void tcpv6_net_exit(struct net *net)
+static void __net_exit tcpv6_net_exit(struct net *net)
 {
 	inet_ctl_sock_destroy(net->ipv6.tcp_sk);
 }
 
-static void tcpv6_net_exit_batch(struct list_head *net_exit_list)
+static void __net_exit tcpv6_net_exit_batch(struct list_head *net_exit_list)
 {
 	inet_twsk_purge(&tcp_hashinfo, &tcp_death_row, AF_INET6);
 }
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index 69ebdbe78c47..34efb3589ffa 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -1396,7 +1396,7 @@ static struct udp_seq_afinfo udp6_seq_afinfo = {
 	},
 };
 
-int udp6_proc_init(struct net *net)
+int __net_init udp6_proc_init(struct net *net)
 {
 	return udp_proc_register(net, &udp6_seq_afinfo);
 }
diff --git a/net/ipv6/udplite.c b/net/ipv6/udplite.c
index 6ea6938919e6..5f48fadc27f7 100644
--- a/net/ipv6/udplite.c
+++ b/net/ipv6/udplite.c
@@ -104,12 +104,12 @@ static struct udp_seq_afinfo udplite6_seq_afinfo = {
 	},
 };
 
-static int udplite6_proc_init_net(struct net *net)
+static int __net_init udplite6_proc_init_net(struct net *net)
 {
 	return udp_proc_register(net, &udplite6_seq_afinfo);
 }
 
-static void udplite6_proc_exit_net(struct net *net)
+static void __net_exit udplite6_proc_exit_net(struct net *net)
 {
 	udp_proc_unregister(net, &udplite6_seq_afinfo);
 }
diff --git a/net/key/af_key.c b/net/key/af_key.c
index 76fa6fef6473..4744b1f6372f 100644
--- a/net/key/af_key.c
+++ b/net/key/af_key.c
@@ -3738,17 +3738,17 @@ static int __net_init pfkey_init_proc(struct net *net)
 	return 0;
 }
 
-static void pfkey_exit_proc(struct net *net)
+static void __net_exit pfkey_exit_proc(struct net *net)
 {
 	proc_net_remove(net, "pfkey");
 }
 #else
-static int __net_init pfkey_init_proc(struct net *net)
+static inline int pfkey_init_proc(struct net *net)
 {
 	return 0;
 }
 
-static void pfkey_exit_proc(struct net *net)
+static inline void pfkey_exit_proc(struct net *net)
 {
 }
 #endif
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index e0516a22be2e..a97acfe7e770 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -2448,7 +2448,7 @@ static const struct file_operations packet_seq_fops = {
 
 #endif
 
-static int packet_net_init(struct net *net)
+static int __net_init packet_net_init(struct net *net)
 {
 	rwlock_init(&net->packet.sklist_lock);
 	INIT_HLIST_HEAD(&net->packet.sklist);
@@ -2459,7 +2459,7 @@ static int packet_net_init(struct net *net)
 	return 0;
 }
 
-static void packet_net_exit(struct net *net)
+static void __net_exit packet_net_exit(struct net *net)
 {
 	proc_net_remove(net, "packet");
 }
diff --git a/net/phonet/pn_dev.c b/net/phonet/pn_dev.c
index bc4a33bf2d3d..c597cc53a6fb 100644
--- a/net/phonet/pn_dev.c
+++ b/net/phonet/pn_dev.c
@@ -311,7 +311,7 @@ static struct notifier_block phonet_device_notifier = {
 };
 
 /* Per-namespace Phonet devices handling */
-static int phonet_init_net(struct net *net)
+static int __net_init phonet_init_net(struct net *net)
 {
 	struct phonet_net *pnn = net_generic(net, phonet_net_id);
 
@@ -324,7 +324,7 @@ static int phonet_init_net(struct net *net)
 	return 0;
 }
 
-static void phonet_exit_net(struct net *net)
+static void __net_exit phonet_exit_net(struct net *net)
 {
 	struct phonet_net *pnn = net_generic(net, phonet_net_id);
 	struct net_device *dev;
diff --git a/net/sysctl_net.c b/net/sysctl_net.c
index 0b15d7250c40..53196009160a 100644
--- a/net/sysctl_net.c
+++ b/net/sysctl_net.c
@@ -71,7 +71,7 @@ static struct ctl_table_root net_sysctl_ro_root = {
 	.permissions = net_ctl_ro_header_perms,
 };
 
-static int sysctl_net_init(struct net *net)
+static int __net_init sysctl_net_init(struct net *net)
 {
 	setup_sysctl_set(&net->sysctls,
 			 &net_sysctl_ro_root.default_set,
@@ -79,7 +79,7 @@ static int sysctl_net_init(struct net *net)
 	return 0;
 }
 
-static void sysctl_net_exit(struct net *net)
+static void __net_exit sysctl_net_exit(struct net *net)
 {
 	WARN_ON(!list_empty(&net->sysctls.list));
 	return;
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index f25511903115..9bc9b92bc099 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -2224,7 +2224,7 @@ static const struct net_proto_family unix_family_ops = {
 };
 
 
-static int unix_net_init(struct net *net)
+static int __net_init unix_net_init(struct net *net)
 {
 	int error = -ENOMEM;
 
@@ -2243,7 +2243,7 @@ out:
 	return error;
 }
 
-static void unix_net_exit(struct net *net)
+static void __net_exit unix_net_exit(struct net *net)
 {
 	unix_sysctl_unregister(net);
 	proc_net_remove(net, "unix");
diff --git a/net/unix/sysctl_net_unix.c b/net/unix/sysctl_net_unix.c
index 708f5df6b7f0..d095c7be10d0 100644
--- a/net/unix/sysctl_net_unix.c
+++ b/net/unix/sysctl_net_unix.c
@@ -31,7 +31,7 @@ static struct ctl_path unix_path[] = {
 	{ },
 };
 
-int unix_sysctl_register(struct net *net)
+int __net_init unix_sysctl_register(struct net *net)
 {
 	struct ctl_table *table;
 
diff --git a/net/wireless/wext-proc.c b/net/wireless/wext-proc.c
index 273a7f77c834..8bafa31fa9f8 100644
--- a/net/wireless/wext-proc.c
+++ b/net/wireless/wext-proc.c
@@ -140,7 +140,7 @@ static const struct file_operations wireless_seq_fops = {
 	.release = seq_release_net,
 };
 
-int wext_proc_init(struct net *net)
+int __net_init wext_proc_init(struct net *net)
 {
 	/* Create /proc/net/wireless entry */
 	if (!proc_net_fops_create(net, "wireless", S_IRUGO, &wireless_seq_fops))
@@ -149,7 +149,7 @@ int wext_proc_init(struct net *net)
 	return 0;
 }
 
-void wext_proc_exit(struct net *net)
+void __net_exit wext_proc_exit(struct net *net)
 {
 	proc_net_remove(net, "wireless");
 }
diff --git a/net/xfrm/xfrm_sysctl.c b/net/xfrm/xfrm_sysctl.c
index 2e221f2cad7e..2c4d6cdcba49 100644
--- a/net/xfrm/xfrm_sysctl.c
+++ b/net/xfrm/xfrm_sysctl.c
@@ -2,7 +2,7 @@
 #include <net/net_namespace.h>
 #include <net/xfrm.h>
 
-static void __xfrm_sysctl_init(struct net *net)
+static void __net_init __xfrm_sysctl_init(struct net *net)
 {
 	net->xfrm.sysctl_aevent_etime = XFRM_AE_ETIME;
 	net->xfrm.sysctl_aevent_rseqth = XFRM_AE_SEQT_SIZE;
@@ -64,7 +64,7 @@ out_kmemdup:
 	return -ENOMEM;
 }
 
-void xfrm_sysctl_fini(struct net *net)
+void __net_exit xfrm_sysctl_fini(struct net *net)
 {
 	struct ctl_table *table;
 
-- 
cgit v1.2.2


From 83fc81024bd8572f31db784f8c0079e999a4fa44 Mon Sep 17 00:00:00 2001
From: Alexey Dobriyan <adobriyan@gmail.com>
Date: Mon, 18 Jan 2010 08:07:50 +0100
Subject: netfilter: xt_connlimit: netns support

Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 net/netfilter/xt_connlimit.c | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

(limited to 'net')

diff --git a/net/netfilter/xt_connlimit.c b/net/netfilter/xt_connlimit.c
index 8103bef78e44..0d9d18ea2b09 100644
--- a/net/netfilter/xt_connlimit.c
+++ b/net/netfilter/xt_connlimit.c
@@ -90,7 +90,8 @@ same_source_net(const union nf_inet_addr *addr,
 	}
 }
 
-static int count_them(struct xt_connlimit_data *data,
+static int count_them(struct net *net,
+		      struct xt_connlimit_data *data,
 		      const struct nf_conntrack_tuple *tuple,
 		      const union nf_inet_addr *addr,
 		      const union nf_inet_addr *mask,
@@ -113,7 +114,7 @@ static int count_them(struct xt_connlimit_data *data,
 
 	/* check the saved connections */
 	list_for_each_entry_safe(conn, tmp, hash, list) {
-		found    = nf_conntrack_find_get(&init_net, &conn->tuple);
+		found    = nf_conntrack_find_get(net, &conn->tuple);
 		found_ct = NULL;
 
 		if (found != NULL)
@@ -171,6 +172,7 @@ static int count_them(struct xt_connlimit_data *data,
 static bool
 connlimit_mt(const struct sk_buff *skb, const struct xt_match_param *par)
 {
+	struct net *net = dev_net(par->in ? par->in : par->out);
 	const struct xt_connlimit_info *info = par->matchinfo;
 	union nf_inet_addr addr;
 	struct nf_conntrack_tuple tuple;
@@ -195,7 +197,7 @@ connlimit_mt(const struct sk_buff *skb, const struct xt_match_param *par)
 	}
 
 	spin_lock_bh(&info->data->lock);
-	connections = count_them(info->data, tuple_ptr, &addr,
+	connections = count_them(net, info->data, tuple_ptr, &addr,
 	                         &info->mask, par->family);
 	spin_unlock_bh(&info->data->lock);
 
-- 
cgit v1.2.2


From a1004d8e3d463012f231bab104325ecb15637f78 Mon Sep 17 00:00:00 2001
From: Alexey Dobriyan <adobriyan@gmail.com>
Date: Mon, 18 Jan 2010 08:14:50 +0100
Subject: netfilter: xt_hashlimit: simplify seqfile code

Simply pass hashtable to seqfile iterators, proc entry itself is not needed.

Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 net/netfilter/xt_hashlimit.c | 14 +++++---------
 1 file changed, 5 insertions(+), 9 deletions(-)

(limited to 'net')

diff --git a/net/netfilter/xt_hashlimit.c b/net/netfilter/xt_hashlimit.c
index 5bdc1fbf3ad7..944fd11c8989 100644
--- a/net/netfilter/xt_hashlimit.c
+++ b/net/netfilter/xt_hashlimit.c
@@ -841,8 +841,7 @@ static struct xt_match hashlimit_mt_reg[] __read_mostly = {
 static void *dl_seq_start(struct seq_file *s, loff_t *pos)
 	__acquires(htable->lock)
 {
-	struct proc_dir_entry *pde = s->private;
-	struct xt_hashlimit_htable *htable = pde->data;
+	struct xt_hashlimit_htable *htable = s->private;
 	unsigned int *bucket;
 
 	spin_lock_bh(&htable->lock);
@@ -859,8 +858,7 @@ static void *dl_seq_start(struct seq_file *s, loff_t *pos)
 
 static void *dl_seq_next(struct seq_file *s, void *v, loff_t *pos)
 {
-	struct proc_dir_entry *pde = s->private;
-	struct xt_hashlimit_htable *htable = pde->data;
+	struct xt_hashlimit_htable *htable = s->private;
 	unsigned int *bucket = (unsigned int *)v;
 
 	*pos = ++(*bucket);
@@ -874,8 +872,7 @@ static void *dl_seq_next(struct seq_file *s, void *v, loff_t *pos)
 static void dl_seq_stop(struct seq_file *s, void *v)
 	__releases(htable->lock)
 {
-	struct proc_dir_entry *pde = s->private;
-	struct xt_hashlimit_htable *htable = pde->data;
+	struct xt_hashlimit_htable *htable = s->private;
 	unsigned int *bucket = (unsigned int *)v;
 
 	kfree(bucket);
@@ -917,8 +914,7 @@ static int dl_seq_real_show(struct dsthash_ent *ent, u_int8_t family,
 
 static int dl_seq_show(struct seq_file *s, void *v)
 {
-	struct proc_dir_entry *pde = s->private;
-	struct xt_hashlimit_htable *htable = pde->data;
+	struct xt_hashlimit_htable *htable = s->private;
 	unsigned int *bucket = (unsigned int *)v;
 	struct dsthash_ent *ent;
 	struct hlist_node *pos;
@@ -944,7 +940,7 @@ static int dl_proc_open(struct inode *inode, struct file *file)
 
 	if (!ret) {
 		struct seq_file *sf = file->private_data;
-		sf->private = PDE(inode);
+		sf->private = PDE(inode)->data;
 	}
 	return ret;
 }
-- 
cgit v1.2.2


From a83d8e8d099fc373a5ca7112ad08c553bb2c180f Mon Sep 17 00:00:00 2001
From: Alexey Dobriyan <adobriyan@gmail.com>
Date: Mon, 18 Jan 2010 08:21:13 +0100
Subject: netfilter: xtables: add struct xt_mtchk_param::net

Some complex match modules (like xt_hashlimit/xt_recent) want netns
information at constructor and destructor time. We propably can play
games at match destruction time, because netns can be passed in object,
but I think it's cleaner to explicitly pass netns.

Add ->net, make sure it's set from ebtables/iptables/ip6tables code.

Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 net/bridge/netfilter/ebtables.c | 14 +++++++++-----
 net/ipv4/netfilter/ip_tables.c  | 24 ++++++++++++++----------
 net/ipv6/netfilter/ip6_tables.c | 14 ++++++++------
 3 files changed, 31 insertions(+), 21 deletions(-)

(limited to 'net')

diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c
index bd1c65425d4f..c77bab986696 100644
--- a/net/bridge/netfilter/ebtables.c
+++ b/net/bridge/netfilter/ebtables.c
@@ -619,7 +619,9 @@ ebt_cleanup_entry(struct ebt_entry *e, unsigned int *cnt)
 }
 
 static inline int
-ebt_check_entry(struct ebt_entry *e, struct ebt_table_info *newinfo,
+ebt_check_entry(struct ebt_entry *e,
+   struct net *net,
+   struct ebt_table_info *newinfo,
    const char *name, unsigned int *cnt,
    struct ebt_cl_stack *cl_s, unsigned int udc_cnt)
 {
@@ -671,6 +673,7 @@ ebt_check_entry(struct ebt_entry *e, struct ebt_table_info *newinfo,
 	}
 	i = 0;
 
+	mtpar.net	= net;
 	mtpar.table     = tgpar.table     = name;
 	mtpar.entryinfo = tgpar.entryinfo = e;
 	mtpar.hook_mask = tgpar.hook_mask = hookmask;
@@ -808,7 +811,8 @@ letscontinue:
 }
 
 /* do the parsing of the table/chains/entries/matches/watchers/targets, heh */
-static int translate_table(char *name, struct ebt_table_info *newinfo)
+static int translate_table(struct net *net, char *name,
+			   struct ebt_table_info *newinfo)
 {
 	unsigned int i, j, k, udc_cnt;
 	int ret;
@@ -917,7 +921,7 @@ static int translate_table(char *name, struct ebt_table_info *newinfo)
 	/* used to know what we need to clean up if something goes wrong */
 	i = 0;
 	ret = EBT_ENTRY_ITERATE(newinfo->entries, newinfo->entries_size,
-	   ebt_check_entry, newinfo, name, &i, cl_s, udc_cnt);
+	   ebt_check_entry, net, newinfo, name, &i, cl_s, udc_cnt);
 	if (ret != 0) {
 		EBT_ENTRY_ITERATE(newinfo->entries, newinfo->entries_size,
 		   ebt_cleanup_entry, &i);
@@ -1017,7 +1021,7 @@ static int do_replace(struct net *net, void __user *user, unsigned int len)
 	if (ret != 0)
 		goto free_counterstmp;
 
-	ret = translate_table(tmp.name, newinfo);
+	ret = translate_table(net, tmp.name, newinfo);
 
 	if (ret != 0)
 		goto free_counterstmp;
@@ -1154,7 +1158,7 @@ ebt_register_table(struct net *net, const struct ebt_table *input_table)
 			newinfo->hook_entry[i] = p +
 				((char *)repl->hook_entry[i] - repl->entries);
 	}
-	ret = translate_table(repl->name, newinfo);
+	ret = translate_table(net, repl->name, newinfo);
 	if (ret != 0) {
 		BUGPRINT("Translate_table failed\n");
 		goto free_chainstack;
diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c
index 572330a552ef..a069d72d9482 100644
--- a/net/ipv4/netfilter/ip_tables.c
+++ b/net/ipv4/netfilter/ip_tables.c
@@ -661,8 +661,8 @@ static int check_target(struct ipt_entry *e, const char *name)
 }
 
 static int
-find_check_entry(struct ipt_entry *e, const char *name, unsigned int size,
-		 unsigned int *i)
+find_check_entry(struct ipt_entry *e, struct net *net, const char *name,
+		 unsigned int size, unsigned int *i)
 {
 	struct ipt_entry_target *t;
 	struct xt_target *target;
@@ -675,6 +675,7 @@ find_check_entry(struct ipt_entry *e, const char *name, unsigned int size,
 		return ret;
 
 	j = 0;
+	mtpar.net	= net;
 	mtpar.table     = name;
 	mtpar.entryinfo = &e->ip;
 	mtpar.hook_mask = e->comefrom;
@@ -798,7 +799,8 @@ cleanup_entry(struct ipt_entry *e, unsigned int *i)
 /* Checks and translates the user-supplied table segment (held in
    newinfo) */
 static int
-translate_table(const char *name,
+translate_table(struct net *net,
+		const char *name,
 		unsigned int valid_hooks,
 		struct xt_table_info *newinfo,
 		void *entry0,
@@ -860,7 +862,7 @@ translate_table(const char *name,
 	/* Finally, each sanity check must pass */
 	i = 0;
 	ret = IPT_ENTRY_ITERATE(entry0, newinfo->size,
-				find_check_entry, name, size, &i);
+				find_check_entry, net, name, size, &i);
 
 	if (ret != 0) {
 		IPT_ENTRY_ITERATE(entry0, newinfo->size,
@@ -1303,7 +1305,7 @@ do_replace(struct net *net, void __user *user, unsigned int len)
 		goto free_newinfo;
 	}
 
-	ret = translate_table(tmp.name, tmp.valid_hooks,
+	ret = translate_table(net, tmp.name, tmp.valid_hooks,
 			      newinfo, loc_cpu_entry, tmp.size, tmp.num_entries,
 			      tmp.hook_entry, tmp.underflow);
 	if (ret != 0)
@@ -1655,7 +1657,7 @@ compat_copy_entry_from_user(struct compat_ipt_entry *e, void **dstptr,
 }
 
 static int
-compat_check_entry(struct ipt_entry *e, const char *name,
+compat_check_entry(struct ipt_entry *e, struct net *net, const char *name,
 				     unsigned int *i)
 {
 	struct xt_mtchk_param mtpar;
@@ -1663,6 +1665,7 @@ compat_check_entry(struct ipt_entry *e, const char *name,
 	int ret;
 
 	j = 0;
+	mtpar.net	= net;
 	mtpar.table     = name;
 	mtpar.entryinfo = &e->ip;
 	mtpar.hook_mask = e->comefrom;
@@ -1684,7 +1687,8 @@ compat_check_entry(struct ipt_entry *e, const char *name,
 }
 
 static int
-translate_compat_table(const char *name,
+translate_compat_table(struct net *net,
+		       const char *name,
 		       unsigned int valid_hooks,
 		       struct xt_table_info **pinfo,
 		       void **pentry0,
@@ -1773,7 +1777,7 @@ translate_compat_table(const char *name,
 
 	i = 0;
 	ret = IPT_ENTRY_ITERATE(entry1, newinfo->size, compat_check_entry,
-				name, &i);
+				net, name, &i);
 	if (ret) {
 		j -= i;
 		COMPAT_IPT_ENTRY_ITERATE_CONTINUE(entry0, newinfo->size, i,
@@ -1833,7 +1837,7 @@ compat_do_replace(struct net *net, void __user *user, unsigned int len)
 		goto free_newinfo;
 	}
 
-	ret = translate_compat_table(tmp.name, tmp.valid_hooks,
+	ret = translate_compat_table(net, tmp.name, tmp.valid_hooks,
 				     &newinfo, &loc_cpu_entry, tmp.size,
 				     tmp.num_entries, tmp.hook_entry,
 				     tmp.underflow);
@@ -2086,7 +2090,7 @@ struct xt_table *ipt_register_table(struct net *net,
 	loc_cpu_entry = newinfo->entries[raw_smp_processor_id()];
 	memcpy(loc_cpu_entry, repl->entries, repl->size);
 
-	ret = translate_table(table->name, table->valid_hooks,
+	ret = translate_table(net, table->name, table->valid_hooks,
 			      newinfo, loc_cpu_entry, repl->size,
 			      repl->num_entries,
 			      repl->hook_entry,
diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c
index 480d7f8c9802..a825940a92ef 100644
--- a/net/ipv6/netfilter/ip6_tables.c
+++ b/net/ipv6/netfilter/ip6_tables.c
@@ -693,8 +693,8 @@ static int check_target(struct ip6t_entry *e, const char *name)
 }
 
 static int
-find_check_entry(struct ip6t_entry *e, const char *name, unsigned int size,
-		 unsigned int *i)
+find_check_entry(struct ip6t_entry *e, struct net *net, const char *name,
+		 unsigned int size, unsigned int *i)
 {
 	struct ip6t_entry_target *t;
 	struct xt_target *target;
@@ -707,6 +707,7 @@ find_check_entry(struct ip6t_entry *e, const char *name, unsigned int size,
 		return ret;
 
 	j = 0;
+	mtpar.net	= net;
 	mtpar.table     = name;
 	mtpar.entryinfo = &e->ipv6;
 	mtpar.hook_mask = e->comefrom;
@@ -830,7 +831,8 @@ cleanup_entry(struct ip6t_entry *e, unsigned int *i)
 /* Checks and translates the user-supplied table segment (held in
    newinfo) */
 static int
-translate_table(const char *name,
+translate_table(struct net *net,
+		const char *name,
 		unsigned int valid_hooks,
 		struct xt_table_info *newinfo,
 		void *entry0,
@@ -892,7 +894,7 @@ translate_table(const char *name,
 	/* Finally, each sanity check must pass */
 	i = 0;
 	ret = IP6T_ENTRY_ITERATE(entry0, newinfo->size,
-				find_check_entry, name, size, &i);
+				find_check_entry, net, name, size, &i);
 
 	if (ret != 0) {
 		IP6T_ENTRY_ITERATE(entry0, newinfo->size,
@@ -1336,7 +1338,7 @@ do_replace(struct net *net, void __user *user, unsigned int len)
 		goto free_newinfo;
 	}
 
-	ret = translate_table(tmp.name, tmp.valid_hooks,
+	ret = translate_table(net, tmp.name, tmp.valid_hooks,
 			      newinfo, loc_cpu_entry, tmp.size, tmp.num_entries,
 			      tmp.hook_entry, tmp.underflow);
 	if (ret != 0)
@@ -2121,7 +2123,7 @@ struct xt_table *ip6t_register_table(struct net *net,
 	loc_cpu_entry = newinfo->entries[raw_smp_processor_id()];
 	memcpy(loc_cpu_entry, repl->entries, repl->size);
 
-	ret = translate_table(table->name, table->valid_hooks,
+	ret = translate_table(net, table->name, table->valid_hooks,
 			      newinfo, loc_cpu_entry, repl->size,
 			      repl->num_entries,
 			      repl->hook_entry,
-- 
cgit v1.2.2


From f54e9367f8499a9bf6b2afbc0dce63e1d53c525a Mon Sep 17 00:00:00 2001
From: Alexey Dobriyan <adobriyan@gmail.com>
Date: Mon, 18 Jan 2010 08:25:47 +0100
Subject: netfilter: xtables: add struct xt_mtdtor_param::net

Add ->net to match destructor list like ->net in constructor list.

Make sure it's set in ebtables/iptables/ip6tables, this requires to
propagate netns up to *_unregister_table().

Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 net/bridge/netfilter/ebtable_broute.c  |  2 +-
 net/bridge/netfilter/ebtable_filter.c  |  2 +-
 net/bridge/netfilter/ebtable_nat.c     |  2 +-
 net/bridge/netfilter/ebtables.c        | 19 ++++++++---------
 net/ipv4/netfilter/ip_tables.c         | 25 ++++++++++++-----------
 net/ipv4/netfilter/iptable_filter.c    |  2 +-
 net/ipv4/netfilter/iptable_mangle.c    |  2 +-
 net/ipv4/netfilter/iptable_raw.c       |  2 +-
 net/ipv4/netfilter/iptable_security.c  |  2 +-
 net/ipv4/netfilter/nf_nat_rule.c       |  2 +-
 net/ipv6/netfilter/ip6_tables.c        | 37 ++++++++++++++++++----------------
 net/ipv6/netfilter/ip6table_filter.c   |  2 +-
 net/ipv6/netfilter/ip6table_mangle.c   |  2 +-
 net/ipv6/netfilter/ip6table_raw.c      |  2 +-
 net/ipv6/netfilter/ip6table_security.c |  2 +-
 15 files changed, 55 insertions(+), 50 deletions(-)

(limited to 'net')

diff --git a/net/bridge/netfilter/ebtable_broute.c b/net/bridge/netfilter/ebtable_broute.c
index d32ab13e728c..ae3f106c3908 100644
--- a/net/bridge/netfilter/ebtable_broute.c
+++ b/net/bridge/netfilter/ebtable_broute.c
@@ -71,7 +71,7 @@ static int __net_init broute_net_init(struct net *net)
 
 static void __net_exit broute_net_exit(struct net *net)
 {
-	ebt_unregister_table(net->xt.broute_table);
+	ebt_unregister_table(net, net->xt.broute_table);
 }
 
 static struct pernet_operations broute_net_ops = {
diff --git a/net/bridge/netfilter/ebtable_filter.c b/net/bridge/netfilter/ebtable_filter.c
index 60b1a6ca7185..42e6bd094574 100644
--- a/net/bridge/netfilter/ebtable_filter.c
+++ b/net/bridge/netfilter/ebtable_filter.c
@@ -107,7 +107,7 @@ static int __net_init frame_filter_net_init(struct net *net)
 
 static void __net_exit frame_filter_net_exit(struct net *net)
 {
-	ebt_unregister_table(net->xt.frame_filter);
+	ebt_unregister_table(net, net->xt.frame_filter);
 }
 
 static struct pernet_operations frame_filter_net_ops = {
diff --git a/net/bridge/netfilter/ebtable_nat.c b/net/bridge/netfilter/ebtable_nat.c
index 4a98804203b0..6dc2f878ae05 100644
--- a/net/bridge/netfilter/ebtable_nat.c
+++ b/net/bridge/netfilter/ebtable_nat.c
@@ -107,7 +107,7 @@ static int __net_init frame_nat_net_init(struct net *net)
 
 static void __net_exit frame_nat_net_exit(struct net *net)
 {
-	ebt_unregister_table(net->xt.frame_nat);
+	ebt_unregister_table(net, net->xt.frame_nat);
 }
 
 static struct pernet_operations frame_nat_net_ops = {
diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c
index c77bab986696..1aa0e4c1f52d 100644
--- a/net/bridge/netfilter/ebtables.c
+++ b/net/bridge/netfilter/ebtables.c
@@ -561,13 +561,14 @@ ebt_get_udc_positions(struct ebt_entry *e, struct ebt_table_info *newinfo,
 }
 
 static inline int
-ebt_cleanup_match(struct ebt_entry_match *m, unsigned int *i)
+ebt_cleanup_match(struct ebt_entry_match *m, struct net *net, unsigned int *i)
 {
 	struct xt_mtdtor_param par;
 
 	if (i && (*i)-- == 0)
 		return 1;
 
+	par.net       = net;
 	par.match     = m->u.match;
 	par.matchinfo = m->data;
 	par.family    = NFPROTO_BRIDGE;
@@ -595,7 +596,7 @@ ebt_cleanup_watcher(struct ebt_entry_watcher *w, unsigned int *i)
 }
 
 static inline int
-ebt_cleanup_entry(struct ebt_entry *e, unsigned int *cnt)
+ebt_cleanup_entry(struct ebt_entry *e, struct net *net, unsigned int *cnt)
 {
 	struct xt_tgdtor_param par;
 	struct ebt_entry_target *t;
@@ -606,7 +607,7 @@ ebt_cleanup_entry(struct ebt_entry *e, unsigned int *cnt)
 	if (cnt && (*cnt)-- == 0)
 		return 1;
 	EBT_WATCHER_ITERATE(e, ebt_cleanup_watcher, NULL);
-	EBT_MATCH_ITERATE(e, ebt_cleanup_match, NULL);
+	EBT_MATCH_ITERATE(e, ebt_cleanup_match, net, NULL);
 	t = (struct ebt_entry_target *)(((char *)e) + e->target_offset);
 
 	par.target   = t->u.target;
@@ -731,7 +732,7 @@ ebt_check_entry(struct ebt_entry *e,
 cleanup_watchers:
 	EBT_WATCHER_ITERATE(e, ebt_cleanup_watcher, &j);
 cleanup_matches:
-	EBT_MATCH_ITERATE(e, ebt_cleanup_match, &i);
+	EBT_MATCH_ITERATE(e, ebt_cleanup_match, net, &i);
 	return ret;
 }
 
@@ -924,7 +925,7 @@ static int translate_table(struct net *net, char *name,
 	   ebt_check_entry, net, newinfo, name, &i, cl_s, udc_cnt);
 	if (ret != 0) {
 		EBT_ENTRY_ITERATE(newinfo->entries, newinfo->entries_size,
-		   ebt_cleanup_entry, &i);
+				  ebt_cleanup_entry, net, &i);
 	}
 	vfree(cl_s);
 	return ret;
@@ -1074,7 +1075,7 @@ static int do_replace(struct net *net, void __user *user, unsigned int len)
 
 	/* decrease module count and free resources */
 	EBT_ENTRY_ITERATE(table->entries, table->entries_size,
-	   ebt_cleanup_entry, NULL);
+			  ebt_cleanup_entry, net, NULL);
 
 	vfree(table->entries);
 	if (table->chainstack) {
@@ -1091,7 +1092,7 @@ free_unlock:
 	mutex_unlock(&ebt_mutex);
 free_iterate:
 	EBT_ENTRY_ITERATE(newinfo->entries, newinfo->entries_size,
-	   ebt_cleanup_entry, NULL);
+			  ebt_cleanup_entry, net, NULL);
 free_counterstmp:
 	vfree(counterstmp);
 	/* can be initialized in translate_table() */
@@ -1208,7 +1209,7 @@ out:
 	return ERR_PTR(ret);
 }
 
-void ebt_unregister_table(struct ebt_table *table)
+void ebt_unregister_table(struct net *net, struct ebt_table *table)
 {
 	int i;
 
@@ -1220,7 +1221,7 @@ void ebt_unregister_table(struct ebt_table *table)
 	list_del(&table->list);
 	mutex_unlock(&ebt_mutex);
 	EBT_ENTRY_ITERATE(table->private->entries, table->private->entries_size,
-			  ebt_cleanup_entry, NULL);
+			  ebt_cleanup_entry, net, NULL);
 	if (table->private->nentries)
 		module_put(table->me);
 	vfree(table->private->entries);
diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c
index a069d72d9482..cfaba0e2e6fc 100644
--- a/net/ipv4/netfilter/ip_tables.c
+++ b/net/ipv4/netfilter/ip_tables.c
@@ -553,13 +553,14 @@ mark_source_chains(struct xt_table_info *newinfo,
 }
 
 static int
-cleanup_match(struct ipt_entry_match *m, unsigned int *i)
+cleanup_match(struct ipt_entry_match *m, struct net *net, unsigned int *i)
 {
 	struct xt_mtdtor_param par;
 
 	if (i && (*i)-- == 0)
 		return 1;
 
+	par.net       = net;
 	par.match     = m->u.kernel.match;
 	par.matchinfo = m->data;
 	par.family    = NFPROTO_IPV4;
@@ -705,7 +706,7 @@ find_check_entry(struct ipt_entry *e, struct net *net, const char *name,
  err:
 	module_put(t->u.kernel.target->me);
  cleanup_matches:
-	IPT_MATCH_ITERATE(e, cleanup_match, &j);
+	IPT_MATCH_ITERATE(e, cleanup_match, net, &j);
 	return ret;
 }
 
@@ -775,7 +776,7 @@ check_entry_size_and_hooks(struct ipt_entry *e,
 }
 
 static int
-cleanup_entry(struct ipt_entry *e, unsigned int *i)
+cleanup_entry(struct ipt_entry *e, struct net *net, unsigned int *i)
 {
 	struct xt_tgdtor_param par;
 	struct ipt_entry_target *t;
@@ -784,7 +785,7 @@ cleanup_entry(struct ipt_entry *e, unsigned int *i)
 		return 1;
 
 	/* Cleanup all matches */
-	IPT_MATCH_ITERATE(e, cleanup_match, NULL);
+	IPT_MATCH_ITERATE(e, cleanup_match, net, NULL);
 	t = ipt_get_target(e);
 
 	par.target   = t->u.kernel.target;
@@ -866,7 +867,7 @@ translate_table(struct net *net,
 
 	if (ret != 0) {
 		IPT_ENTRY_ITERATE(entry0, newinfo->size,
-				cleanup_entry, &i);
+				cleanup_entry, net, &i);
 		return ret;
 	}
 
@@ -1260,7 +1261,7 @@ __do_replace(struct net *net, const char *name, unsigned int valid_hooks,
 	/* Decrease module usage counts and free resource */
 	loc_cpu_old_entry = oldinfo->entries[raw_smp_processor_id()];
 	IPT_ENTRY_ITERATE(loc_cpu_old_entry, oldinfo->size, cleanup_entry,
-			  NULL);
+			  net, NULL);
 	xt_free_table_info(oldinfo);
 	if (copy_to_user(counters_ptr, counters,
 			 sizeof(struct xt_counters) * num_counters) != 0)
@@ -1320,7 +1321,7 @@ do_replace(struct net *net, void __user *user, unsigned int len)
 	return 0;
 
  free_newinfo_untrans:
-	IPT_ENTRY_ITERATE(loc_cpu_entry, newinfo->size, cleanup_entry, NULL);
+	IPT_ENTRY_ITERATE(loc_cpu_entry, newinfo->size, cleanup_entry, net, NULL);
  free_newinfo:
 	xt_free_table_info(newinfo);
 	return ret;
@@ -1682,7 +1683,7 @@ compat_check_entry(struct ipt_entry *e, struct net *net, const char *name,
 	return 0;
 
  cleanup_matches:
-	IPT_MATCH_ITERATE(e, cleanup_match, &j);
+	IPT_MATCH_ITERATE(e, cleanup_match, net, &j);
 	return ret;
 }
 
@@ -1782,7 +1783,7 @@ translate_compat_table(struct net *net,
 		j -= i;
 		COMPAT_IPT_ENTRY_ITERATE_CONTINUE(entry0, newinfo->size, i,
 						  compat_release_entry, &j);
-		IPT_ENTRY_ITERATE(entry1, newinfo->size, cleanup_entry, &i);
+		IPT_ENTRY_ITERATE(entry1, newinfo->size, cleanup_entry, net, &i);
 		xt_free_table_info(newinfo);
 		return ret;
 	}
@@ -1853,7 +1854,7 @@ compat_do_replace(struct net *net, void __user *user, unsigned int len)
 	return 0;
 
  free_newinfo_untrans:
-	IPT_ENTRY_ITERATE(loc_cpu_entry, newinfo->size, cleanup_entry, NULL);
+	IPT_ENTRY_ITERATE(loc_cpu_entry, newinfo->size, cleanup_entry, net, NULL);
  free_newinfo:
 	xt_free_table_info(newinfo);
 	return ret;
@@ -2112,7 +2113,7 @@ out:
 	return ERR_PTR(ret);
 }
 
-void ipt_unregister_table(struct xt_table *table)
+void ipt_unregister_table(struct net *net, struct xt_table *table)
 {
 	struct xt_table_info *private;
 	void *loc_cpu_entry;
@@ -2122,7 +2123,7 @@ void ipt_unregister_table(struct xt_table *table)
 
 	/* Decrease module usage counts and free resources */
 	loc_cpu_entry = private->entries[raw_smp_processor_id()];
-	IPT_ENTRY_ITERATE(loc_cpu_entry, private->size, cleanup_entry, NULL);
+	IPT_ENTRY_ITERATE(loc_cpu_entry, private->size, cleanup_entry, net, NULL);
 	if (private->number > private->initial_entries)
 		module_put(table_owner);
 	xt_free_table_info(private);
diff --git a/net/ipv4/netfilter/iptable_filter.c b/net/ipv4/netfilter/iptable_filter.c
index df566cbd68e5..dee90eb8aa47 100644
--- a/net/ipv4/netfilter/iptable_filter.c
+++ b/net/ipv4/netfilter/iptable_filter.c
@@ -138,7 +138,7 @@ static int __net_init iptable_filter_net_init(struct net *net)
 
 static void __net_exit iptable_filter_net_exit(struct net *net)
 {
-	ipt_unregister_table(net->ipv4.iptable_filter);
+	ipt_unregister_table(net, net->ipv4.iptable_filter);
 }
 
 static struct pernet_operations iptable_filter_net_ops = {
diff --git a/net/ipv4/netfilter/iptable_mangle.c b/net/ipv4/netfilter/iptable_mangle.c
index fae78c3076c4..e07bf242343a 100644
--- a/net/ipv4/netfilter/iptable_mangle.c
+++ b/net/ipv4/netfilter/iptable_mangle.c
@@ -208,7 +208,7 @@ static int __net_init iptable_mangle_net_init(struct net *net)
 
 static void __net_exit iptable_mangle_net_exit(struct net *net)
 {
-	ipt_unregister_table(net->ipv4.iptable_mangle);
+	ipt_unregister_table(net, net->ipv4.iptable_mangle);
 }
 
 static struct pernet_operations iptable_mangle_net_ops = {
diff --git a/net/ipv4/netfilter/iptable_raw.c b/net/ipv4/netfilter/iptable_raw.c
index 993edc23be09..40f2b9f611a2 100644
--- a/net/ipv4/netfilter/iptable_raw.c
+++ b/net/ipv4/netfilter/iptable_raw.c
@@ -100,7 +100,7 @@ static int __net_init iptable_raw_net_init(struct net *net)
 
 static void __net_exit iptable_raw_net_exit(struct net *net)
 {
-	ipt_unregister_table(net->ipv4.iptable_raw);
+	ipt_unregister_table(net, net->ipv4.iptable_raw);
 }
 
 static struct pernet_operations iptable_raw_net_ops = {
diff --git a/net/ipv4/netfilter/iptable_security.c b/net/ipv4/netfilter/iptable_security.c
index 3bd3d6388da5..7ce2366e4305 100644
--- a/net/ipv4/netfilter/iptable_security.c
+++ b/net/ipv4/netfilter/iptable_security.c
@@ -138,7 +138,7 @@ static int __net_init iptable_security_net_init(struct net *net)
 
 static void __net_exit iptable_security_net_exit(struct net *net)
 {
-	ipt_unregister_table(net->ipv4.iptable_security);
+	ipt_unregister_table(net, net->ipv4.iptable_security);
 }
 
 static struct pernet_operations iptable_security_net_ops = {
diff --git a/net/ipv4/netfilter/nf_nat_rule.c b/net/ipv4/netfilter/nf_nat_rule.c
index 9e81e0dfb4ec..85da34fdc755 100644
--- a/net/ipv4/netfilter/nf_nat_rule.c
+++ b/net/ipv4/netfilter/nf_nat_rule.c
@@ -195,7 +195,7 @@ static int __net_init nf_nat_rule_net_init(struct net *net)
 
 static void __net_exit nf_nat_rule_net_exit(struct net *net)
 {
-	ipt_unregister_table(net->ipv4.nat_table);
+	ipt_unregister_table(net, net->ipv4.nat_table);
 }
 
 static struct pernet_operations nf_nat_rule_net_ops = {
diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c
index a825940a92ef..9f1d45f2ba8f 100644
--- a/net/ipv6/netfilter/ip6_tables.c
+++ b/net/ipv6/netfilter/ip6_tables.c
@@ -585,13 +585,14 @@ mark_source_chains(struct xt_table_info *newinfo,
 }
 
 static int
-cleanup_match(struct ip6t_entry_match *m, unsigned int *i)
+cleanup_match(struct ip6t_entry_match *m, struct net *net, unsigned int *i)
 {
 	struct xt_mtdtor_param par;
 
 	if (i && (*i)-- == 0)
 		return 1;
 
+	par.net       = net;
 	par.match     = m->u.kernel.match;
 	par.matchinfo = m->data;
 	par.family    = NFPROTO_IPV6;
@@ -737,7 +738,7 @@ find_check_entry(struct ip6t_entry *e, struct net *net, const char *name,
  err:
 	module_put(t->u.kernel.target->me);
  cleanup_matches:
-	IP6T_MATCH_ITERATE(e, cleanup_match, &j);
+	IP6T_MATCH_ITERATE(e, cleanup_match, net, &j);
 	return ret;
 }
 
@@ -807,7 +808,7 @@ check_entry_size_and_hooks(struct ip6t_entry *e,
 }
 
 static int
-cleanup_entry(struct ip6t_entry *e, unsigned int *i)
+cleanup_entry(struct ip6t_entry *e, struct net *net, unsigned int *i)
 {
 	struct xt_tgdtor_param par;
 	struct ip6t_entry_target *t;
@@ -816,7 +817,7 @@ cleanup_entry(struct ip6t_entry *e, unsigned int *i)
 		return 1;
 
 	/* Cleanup all matches */
-	IP6T_MATCH_ITERATE(e, cleanup_match, NULL);
+	IP6T_MATCH_ITERATE(e, cleanup_match, net, NULL);
 	t = ip6t_get_target(e);
 
 	par.target   = t->u.kernel.target;
@@ -898,7 +899,7 @@ translate_table(struct net *net,
 
 	if (ret != 0) {
 		IP6T_ENTRY_ITERATE(entry0, newinfo->size,
-				   cleanup_entry, &i);
+				   cleanup_entry, net, &i);
 		return ret;
 	}
 
@@ -1293,7 +1294,7 @@ __do_replace(struct net *net, const char *name, unsigned int valid_hooks,
 	/* Decrease module usage counts and free resource */
 	loc_cpu_old_entry = oldinfo->entries[raw_smp_processor_id()];
 	IP6T_ENTRY_ITERATE(loc_cpu_old_entry, oldinfo->size, cleanup_entry,
-			   NULL);
+			   net, NULL);
 	xt_free_table_info(oldinfo);
 	if (copy_to_user(counters_ptr, counters,
 			 sizeof(struct xt_counters) * num_counters) != 0)
@@ -1353,7 +1354,7 @@ do_replace(struct net *net, void __user *user, unsigned int len)
 	return 0;
 
  free_newinfo_untrans:
-	IP6T_ENTRY_ITERATE(loc_cpu_entry, newinfo->size, cleanup_entry, NULL);
+	IP6T_ENTRY_ITERATE(loc_cpu_entry, newinfo->size, cleanup_entry, net, NULL);
  free_newinfo:
 	xt_free_table_info(newinfo);
 	return ret;
@@ -1692,14 +1693,15 @@ compat_copy_entry_from_user(struct compat_ip6t_entry *e, void **dstptr,
 	return ret;
 }
 
-static int compat_check_entry(struct ip6t_entry *e, const char *name,
-				     unsigned int *i)
+static int compat_check_entry(struct ip6t_entry *e, struct net *net,
+			      const char *name, unsigned int *i)
 {
 	unsigned int j;
 	int ret;
 	struct xt_mtchk_param mtpar;
 
 	j = 0;
+	mtpar.net	= net;
 	mtpar.table     = name;
 	mtpar.entryinfo = &e->ipv6;
 	mtpar.hook_mask = e->comefrom;
@@ -1716,12 +1718,13 @@ static int compat_check_entry(struct ip6t_entry *e, const char *name,
 	return 0;
 
  cleanup_matches:
-	IP6T_MATCH_ITERATE(e, cleanup_match, &j);
+	IP6T_MATCH_ITERATE(e, cleanup_match, net, &j);
 	return ret;
 }
 
 static int
-translate_compat_table(const char *name,
+translate_compat_table(struct net *net,
+		       const char *name,
 		       unsigned int valid_hooks,
 		       struct xt_table_info **pinfo,
 		       void **pentry0,
@@ -1810,12 +1813,12 @@ translate_compat_table(const char *name,
 
 	i = 0;
 	ret = IP6T_ENTRY_ITERATE(entry1, newinfo->size, compat_check_entry,
-				 name, &i);
+				 net, name, &i);
 	if (ret) {
 		j -= i;
 		COMPAT_IP6T_ENTRY_ITERATE_CONTINUE(entry0, newinfo->size, i,
 						   compat_release_entry, &j);
-		IP6T_ENTRY_ITERATE(entry1, newinfo->size, cleanup_entry, &i);
+		IP6T_ENTRY_ITERATE(entry1, newinfo->size, cleanup_entry, net, &i);
 		xt_free_table_info(newinfo);
 		return ret;
 	}
@@ -1870,7 +1873,7 @@ compat_do_replace(struct net *net, void __user *user, unsigned int len)
 		goto free_newinfo;
 	}
 
-	ret = translate_compat_table(tmp.name, tmp.valid_hooks,
+	ret = translate_compat_table(net, tmp.name, tmp.valid_hooks,
 				     &newinfo, &loc_cpu_entry, tmp.size,
 				     tmp.num_entries, tmp.hook_entry,
 				     tmp.underflow);
@@ -1886,7 +1889,7 @@ compat_do_replace(struct net *net, void __user *user, unsigned int len)
 	return 0;
 
  free_newinfo_untrans:
-	IP6T_ENTRY_ITERATE(loc_cpu_entry, newinfo->size, cleanup_entry, NULL);
+	IP6T_ENTRY_ITERATE(loc_cpu_entry, newinfo->size, cleanup_entry, net, NULL);
  free_newinfo:
 	xt_free_table_info(newinfo);
 	return ret;
@@ -2144,7 +2147,7 @@ out:
 	return ERR_PTR(ret);
 }
 
-void ip6t_unregister_table(struct xt_table *table)
+void ip6t_unregister_table(struct net *net, struct xt_table *table)
 {
 	struct xt_table_info *private;
 	void *loc_cpu_entry;
@@ -2154,7 +2157,7 @@ void ip6t_unregister_table(struct xt_table *table)
 
 	/* Decrease module usage counts and free resources */
 	loc_cpu_entry = private->entries[raw_smp_processor_id()];
-	IP6T_ENTRY_ITERATE(loc_cpu_entry, private->size, cleanup_entry, NULL);
+	IP6T_ENTRY_ITERATE(loc_cpu_entry, private->size, cleanup_entry, net, NULL);
 	if (private->number > private->initial_entries)
 		module_put(table_owner);
 	xt_free_table_info(private);
diff --git a/net/ipv6/netfilter/ip6table_filter.c b/net/ipv6/netfilter/ip6table_filter.c
index ad378efd0eb8..33ddfe53e18d 100644
--- a/net/ipv6/netfilter/ip6table_filter.c
+++ b/net/ipv6/netfilter/ip6table_filter.c
@@ -131,7 +131,7 @@ static int __net_init ip6table_filter_net_init(struct net *net)
 
 static void __net_exit ip6table_filter_net_exit(struct net *net)
 {
-	ip6t_unregister_table(net->ipv6.ip6table_filter);
+	ip6t_unregister_table(net, net->ipv6.ip6table_filter);
 }
 
 static struct pernet_operations ip6table_filter_net_ops = {
diff --git a/net/ipv6/netfilter/ip6table_mangle.c b/net/ipv6/netfilter/ip6table_mangle.c
index a929c19d30e3..9bc483f000e5 100644
--- a/net/ipv6/netfilter/ip6table_mangle.c
+++ b/net/ipv6/netfilter/ip6table_mangle.c
@@ -182,7 +182,7 @@ static int __net_init ip6table_mangle_net_init(struct net *net)
 
 static void __net_exit ip6table_mangle_net_exit(struct net *net)
 {
-	ip6t_unregister_table(net->ipv6.ip6table_mangle);
+	ip6t_unregister_table(net, net->ipv6.ip6table_mangle);
 }
 
 static struct pernet_operations ip6table_mangle_net_ops = {
diff --git a/net/ipv6/netfilter/ip6table_raw.c b/net/ipv6/netfilter/ip6table_raw.c
index ed1a1180f3b3..4c90b552e433 100644
--- a/net/ipv6/netfilter/ip6table_raw.c
+++ b/net/ipv6/netfilter/ip6table_raw.c
@@ -94,7 +94,7 @@ static int __net_init ip6table_raw_net_init(struct net *net)
 
 static void __net_exit ip6table_raw_net_exit(struct net *net)
 {
-	ip6t_unregister_table(net->ipv6.ip6table_raw);
+	ip6t_unregister_table(net, net->ipv6.ip6table_raw);
 }
 
 static struct pernet_operations ip6table_raw_net_ops = {
diff --git a/net/ipv6/netfilter/ip6table_security.c b/net/ipv6/netfilter/ip6table_security.c
index 41b444c60934..baa8d4ef3b0a 100644
--- a/net/ipv6/netfilter/ip6table_security.c
+++ b/net/ipv6/netfilter/ip6table_security.c
@@ -134,7 +134,7 @@ static int __net_init ip6table_security_net_init(struct net *net)
 
 static void __net_exit ip6table_security_net_exit(struct net *net)
 {
-	ip6t_unregister_table(net->ipv6.ip6table_security);
+	ip6t_unregister_table(net, net->ipv6.ip6table_security);
 }
 
 static struct pernet_operations ip6table_security_net_ops = {
-- 
cgit v1.2.2


From 7d07d5632b672c892a65882c2a119345fd9596c9 Mon Sep 17 00:00:00 2001
From: Alexey Dobriyan <adobriyan@gmail.com>
Date: Mon, 18 Jan 2010 08:31:00 +0100
Subject: netfilter: xt_recent: netns support

Make recent table list per-netns.
Make proc files per-netns.

Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 net/netfilter/xt_recent.c | 136 ++++++++++++++++++++++++++++++++--------------
 1 file changed, 95 insertions(+), 41 deletions(-)

(limited to 'net')

diff --git a/net/netfilter/xt_recent.c b/net/netfilter/xt_recent.c
index 768d01ff1fea..203333107367 100644
--- a/net/netfilter/xt_recent.c
+++ b/net/netfilter/xt_recent.c
@@ -28,6 +28,7 @@
 #include <linux/skbuff.h>
 #include <linux/inet.h>
 #include <net/net_namespace.h>
+#include <net/netns/generic.h>
 
 #include <linux/netfilter/x_tables.h>
 #include <linux/netfilter/xt_recent.h>
@@ -78,15 +79,26 @@ struct recent_table {
 	struct list_head	iphash[0];
 };
 
-static LIST_HEAD(tables);
+struct recent_net {
+	struct list_head	tables;
+#ifdef CONFIG_PROC_FS
+	struct proc_dir_entry	*xt_recent;
+#ifdef CONFIG_NETFILTER_XT_MATCH_RECENT_PROC_COMPAT
+	struct proc_dir_entry	*ipt_recent;
+#endif
+#endif
+};
+
+static int recent_net_id;
+static inline struct recent_net *recent_pernet(struct net *net)
+{
+	return net_generic(net, recent_net_id);
+}
+
 static DEFINE_SPINLOCK(recent_lock);
 static DEFINE_MUTEX(recent_mutex);
 
 #ifdef CONFIG_PROC_FS
-#ifdef CONFIG_NETFILTER_XT_MATCH_RECENT_PROC_COMPAT
-static struct proc_dir_entry *proc_old_dir;
-#endif
-static struct proc_dir_entry *recent_proc_dir;
 static const struct file_operations recent_old_fops, recent_mt_fops;
 #endif
 
@@ -172,11 +184,12 @@ static void recent_entry_update(struct recent_table *t, struct recent_entry *e)
 	list_move_tail(&e->lru_list, &t->lru_list);
 }
 
-static struct recent_table *recent_table_lookup(const char *name)
+static struct recent_table *recent_table_lookup(struct recent_net *recent_net,
+						const char *name)
 {
 	struct recent_table *t;
 
-	list_for_each_entry(t, &tables, list)
+	list_for_each_entry(t, &recent_net->tables, list)
 		if (!strcmp(t->name, name))
 			return t;
 	return NULL;
@@ -195,6 +208,8 @@ static void recent_table_flush(struct recent_table *t)
 static bool
 recent_mt(const struct sk_buff *skb, const struct xt_match_param *par)
 {
+	struct net *net = dev_net(par->in ? par->in : par->out);
+	struct recent_net *recent_net = recent_pernet(net);
 	const struct xt_recent_mtinfo *info = par->matchinfo;
 	struct recent_table *t;
 	struct recent_entry *e;
@@ -227,7 +242,7 @@ recent_mt(const struct sk_buff *skb, const struct xt_match_param *par)
 		ttl++;
 
 	spin_lock_bh(&recent_lock);
-	t = recent_table_lookup(info->name);
+	t = recent_table_lookup(recent_net, info->name);
 	e = recent_entry_lookup(t, &addr, par->match->family,
 				(info->check_set & XT_RECENT_TTL) ? ttl : 0);
 	if (e == NULL) {
@@ -271,6 +286,7 @@ out:
 
 static bool recent_mt_check(const struct xt_mtchk_param *par)
 {
+	struct recent_net *recent_net = recent_pernet(par->net);
 	const struct xt_recent_mtinfo *info = par->matchinfo;
 	struct recent_table *t;
 #ifdef CONFIG_PROC_FS
@@ -297,7 +313,7 @@ static bool recent_mt_check(const struct xt_mtchk_param *par)
 		return false;
 
 	mutex_lock(&recent_mutex);
-	t = recent_table_lookup(info->name);
+	t = recent_table_lookup(recent_net, info->name);
 	if (t != NULL) {
 		t->refcnt++;
 		ret = true;
@@ -314,7 +330,7 @@ static bool recent_mt_check(const struct xt_mtchk_param *par)
 	for (i = 0; i < ip_list_hash_size; i++)
 		INIT_LIST_HEAD(&t->iphash[i]);
 #ifdef CONFIG_PROC_FS
-	pde = proc_create_data(t->name, ip_list_perms, recent_proc_dir,
+	pde = proc_create_data(t->name, ip_list_perms, recent_net->xt_recent,
 		  &recent_mt_fops, t);
 	if (pde == NULL) {
 		kfree(t);
@@ -323,10 +339,10 @@ static bool recent_mt_check(const struct xt_mtchk_param *par)
 	pde->uid = ip_list_uid;
 	pde->gid = ip_list_gid;
 #ifdef CONFIG_NETFILTER_XT_MATCH_RECENT_PROC_COMPAT
-	pde = proc_create_data(t->name, ip_list_perms, proc_old_dir,
+	pde = proc_create_data(t->name, ip_list_perms, recent_net->ipt_recent,
 		      &recent_old_fops, t);
 	if (pde == NULL) {
-		remove_proc_entry(t->name, proc_old_dir);
+		remove_proc_entry(t->name, recent_net->xt_recent);
 		kfree(t);
 		goto out;
 	}
@@ -335,7 +351,7 @@ static bool recent_mt_check(const struct xt_mtchk_param *par)
 #endif
 #endif
 	spin_lock_bh(&recent_lock);
-	list_add_tail(&t->list, &tables);
+	list_add_tail(&t->list, &recent_net->tables);
 	spin_unlock_bh(&recent_lock);
 	ret = true;
 out:
@@ -345,20 +361,21 @@ out:
 
 static void recent_mt_destroy(const struct xt_mtdtor_param *par)
 {
+	struct recent_net *recent_net = recent_pernet(par->net);
 	const struct xt_recent_mtinfo *info = par->matchinfo;
 	struct recent_table *t;
 
 	mutex_lock(&recent_mutex);
-	t = recent_table_lookup(info->name);
+	t = recent_table_lookup(recent_net, info->name);
 	if (--t->refcnt == 0) {
 		spin_lock_bh(&recent_lock);
 		list_del(&t->list);
 		spin_unlock_bh(&recent_lock);
 #ifdef CONFIG_PROC_FS
 #ifdef CONFIG_NETFILTER_XT_MATCH_RECENT_PROC_COMPAT
-		remove_proc_entry(t->name, proc_old_dir);
+		remove_proc_entry(t->name, recent_net->ipt_recent);
 #endif
-		remove_proc_entry(t->name, recent_proc_dir);
+		remove_proc_entry(t->name, recent_net->xt_recent);
 #endif
 		recent_table_flush(t);
 		kfree(t);
@@ -607,8 +624,65 @@ static const struct file_operations recent_mt_fops = {
 	.release = seq_release_private,
 	.owner   = THIS_MODULE,
 };
+
+static int __net_init recent_proc_net_init(struct net *net)
+{
+	struct recent_net *recent_net = recent_pernet(net);
+
+	recent_net->xt_recent = proc_mkdir("xt_recent", net->proc_net);
+	if (!recent_net->xt_recent)
+		return -ENOMEM;
+#ifdef CONFIG_NETFILTER_XT_MATCH_RECENT_PROC_COMPAT
+	recent_net->ipt_recent = proc_mkdir("ipt_recent", net->proc_net);
+	if (!recent_net->ipt_recent) {
+		proc_net_remove(net, "xt_recent");
+		return -ENOMEM;
+	}
+#endif
+	return 0;
+}
+
+static void __net_exit recent_proc_net_exit(struct net *net)
+{
+#ifdef CONFIG_NETFILTER_XT_MATCH_RECENT_PROC_COMPAT
+	proc_net_remove(net, "ipt_recent");
+#endif
+	proc_net_remove(net, "xt_recent");
+}
+#else
+static inline int recent_proc_net_init(struct net *net)
+{
+	return 0;
+}
+
+static inline void recent_proc_net_exit(struct net *net)
+{
+}
 #endif /* CONFIG_PROC_FS */
 
+static int __net_init recent_net_init(struct net *net)
+{
+	struct recent_net *recent_net = recent_pernet(net);
+
+	INIT_LIST_HEAD(&recent_net->tables);
+	return recent_proc_net_init(net);
+}
+
+static void __net_exit recent_net_exit(struct net *net)
+{
+	struct recent_net *recent_net = recent_pernet(net);
+
+	BUG_ON(!list_empty(&recent_net->tables));
+	recent_proc_net_exit(net);
+}
+
+static struct pernet_operations recent_net_ops = {
+	.init	= recent_net_init,
+	.exit	= recent_net_exit,
+	.id	= &recent_net_id,
+	.size	= sizeof(struct recent_net),
+};
+
 static struct xt_match recent_mt_reg[] __read_mostly = {
 	{
 		.name       = "recent",
@@ -640,39 +714,19 @@ static int __init recent_mt_init(void)
 		return -EINVAL;
 	ip_list_hash_size = 1 << fls(ip_list_tot);
 
-	err = xt_register_matches(recent_mt_reg, ARRAY_SIZE(recent_mt_reg));
-#ifdef CONFIG_PROC_FS
+	err = register_pernet_subsys(&recent_net_ops);
 	if (err)
 		return err;
-	recent_proc_dir = proc_mkdir("xt_recent", init_net.proc_net);
-	if (recent_proc_dir == NULL) {
-		xt_unregister_matches(recent_mt_reg, ARRAY_SIZE(recent_mt_reg));
-		err = -ENOMEM;
-	}
-#ifdef CONFIG_NETFILTER_XT_MATCH_RECENT_PROC_COMPAT
-	if (err < 0)
-		return err;
-	proc_old_dir = proc_mkdir("ipt_recent", init_net.proc_net);
-	if (proc_old_dir == NULL) {
-		remove_proc_entry("xt_recent", init_net.proc_net);
-		xt_unregister_matches(recent_mt_reg, ARRAY_SIZE(recent_mt_reg));
-		err = -ENOMEM;
-	}
-#endif
-#endif
+	err = xt_register_matches(recent_mt_reg, ARRAY_SIZE(recent_mt_reg));
+	if (err)
+		unregister_pernet_subsys(&recent_net_ops);
 	return err;
 }
 
 static void __exit recent_mt_exit(void)
 {
-	BUG_ON(!list_empty(&tables));
 	xt_unregister_matches(recent_mt_reg, ARRAY_SIZE(recent_mt_reg));
-#ifdef CONFIG_PROC_FS
-#ifdef CONFIG_NETFILTER_XT_MATCH_RECENT_PROC_COMPAT
-	remove_proc_entry("ipt_recent", init_net.proc_net);
-#endif
-	remove_proc_entry("xt_recent", init_net.proc_net);
-#endif
+	unregister_pernet_subsys(&recent_net_ops);
 }
 
 module_init(recent_mt_init);
-- 
cgit v1.2.2


From e89fc3f1b06d9241f65e580b002789abaa6d11ac Mon Sep 17 00:00:00 2001
From: Alexey Dobriyan <adobriyan@gmail.com>
Date: Mon, 18 Jan 2010 08:33:28 +0100
Subject: netfilter: xt_hashlimit: netns support

Make hashtable per-netns.
Make proc files per-netns.

Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 net/netfilter/xt_hashlimit.c | 141 ++++++++++++++++++++++++++++++-------------
 1 file changed, 98 insertions(+), 43 deletions(-)

(limited to 'net')

diff --git a/net/netfilter/xt_hashlimit.c b/net/netfilter/xt_hashlimit.c
index 944fd11c8989..fb7fcb773a3f 100644
--- a/net/netfilter/xt_hashlimit.c
+++ b/net/netfilter/xt_hashlimit.c
@@ -26,6 +26,7 @@
 #endif
 
 #include <net/net_namespace.h>
+#include <net/netns/generic.h>
 
 #include <linux/netfilter/x_tables.h>
 #include <linux/netfilter_ipv4/ip_tables.h>
@@ -40,9 +41,19 @@ MODULE_DESCRIPTION("Xtables: per hash-bucket rate-limit match");
 MODULE_ALIAS("ipt_hashlimit");
 MODULE_ALIAS("ip6t_hashlimit");
 
+struct hashlimit_net {
+	struct hlist_head	htables;
+	struct proc_dir_entry	*ipt_hashlimit;
+	struct proc_dir_entry	*ip6t_hashlimit;
+};
+
+static int hashlimit_net_id;
+static inline struct hashlimit_net *hashlimit_pernet(struct net *net)
+{
+	return net_generic(net, hashlimit_net_id);
+}
+
 /* need to declare this at the top */
-static struct proc_dir_entry *hashlimit_procdir4;
-static struct proc_dir_entry *hashlimit_procdir6;
 static const struct file_operations dl_file_ops;
 
 /* hash table crap */
@@ -93,13 +104,13 @@ struct xt_hashlimit_htable {
 
 	/* seq_file stuff */
 	struct proc_dir_entry *pde;
+	struct net *net;
 
 	struct hlist_head hash[0];	/* hashtable itself */
 };
 
 static DEFINE_SPINLOCK(hashlimit_lock);	/* protects htables list */
 static DEFINE_MUTEX(hlimit_mutex);	/* additional checkentry protection */
-static HLIST_HEAD(hashlimit_htables);
 static struct kmem_cache *hashlimit_cachep __read_mostly;
 
 static inline bool dst_cmp(const struct dsthash_ent *ent,
@@ -185,8 +196,9 @@ dsthash_free(struct xt_hashlimit_htable *ht, struct dsthash_ent *ent)
 }
 static void htable_gc(unsigned long htlong);
 
-static int htable_create_v0(struct xt_hashlimit_info *minfo, u_int8_t family)
+static int htable_create_v0(struct net *net, struct xt_hashlimit_info *minfo, u_int8_t family)
 {
+	struct hashlimit_net *hashlimit_net = hashlimit_pernet(net);
 	struct xt_hashlimit_htable *hinfo;
 	unsigned int size;
 	unsigned int i;
@@ -239,26 +251,29 @@ static int htable_create_v0(struct xt_hashlimit_info *minfo, u_int8_t family)
 	spin_lock_init(&hinfo->lock);
 	hinfo->pde = proc_create_data(minfo->name, 0,
 		(family == NFPROTO_IPV4) ?
-		hashlimit_procdir4 : hashlimit_procdir6,
+		hashlimit_net->ipt_hashlimit : hashlimit_net->ip6t_hashlimit,
 		&dl_file_ops, hinfo);
 	if (!hinfo->pde) {
 		vfree(hinfo);
 		return -1;
 	}
+	hinfo->net = net;
 
 	setup_timer(&hinfo->timer, htable_gc, (unsigned long )hinfo);
 	hinfo->timer.expires = jiffies + msecs_to_jiffies(hinfo->cfg.gc_interval);
 	add_timer(&hinfo->timer);
 
 	spin_lock_bh(&hashlimit_lock);
-	hlist_add_head(&hinfo->node, &hashlimit_htables);
+	hlist_add_head(&hinfo->node, &hashlimit_net->htables);
 	spin_unlock_bh(&hashlimit_lock);
 
 	return 0;
 }
 
-static int htable_create(struct xt_hashlimit_mtinfo1 *minfo, u_int8_t family)
+static int htable_create(struct net *net, struct xt_hashlimit_mtinfo1 *minfo,
+			 u_int8_t family)
 {
+	struct hashlimit_net *hashlimit_net = hashlimit_pernet(net);
 	struct xt_hashlimit_htable *hinfo;
 	unsigned int size;
 	unsigned int i;
@@ -301,19 +316,20 @@ static int htable_create(struct xt_hashlimit_mtinfo1 *minfo, u_int8_t family)
 
 	hinfo->pde = proc_create_data(minfo->name, 0,
 		(family == NFPROTO_IPV4) ?
-		hashlimit_procdir4 : hashlimit_procdir6,
+		hashlimit_net->ipt_hashlimit : hashlimit_net->ip6t_hashlimit,
 		&dl_file_ops, hinfo);
 	if (hinfo->pde == NULL) {
 		vfree(hinfo);
 		return -1;
 	}
+	hinfo->net = net;
 
 	setup_timer(&hinfo->timer, htable_gc, (unsigned long)hinfo);
 	hinfo->timer.expires = jiffies + msecs_to_jiffies(hinfo->cfg.gc_interval);
 	add_timer(&hinfo->timer);
 
 	spin_lock_bh(&hashlimit_lock);
-	hlist_add_head(&hinfo->node, &hashlimit_htables);
+	hlist_add_head(&hinfo->node, &hashlimit_net->htables);
 	spin_unlock_bh(&hashlimit_lock);
 
 	return 0;
@@ -364,24 +380,30 @@ static void htable_gc(unsigned long htlong)
 
 static void htable_destroy(struct xt_hashlimit_htable *hinfo)
 {
+	struct hashlimit_net *hashlimit_net = hashlimit_pernet(hinfo->net);
+	struct proc_dir_entry *parent;
+
 	del_timer_sync(&hinfo->timer);
 
-	/* remove proc entry */
-	remove_proc_entry(hinfo->pde->name,
-			  hinfo->family == NFPROTO_IPV4 ? hashlimit_procdir4 :
-						     hashlimit_procdir6);
+	if (hinfo->family == NFPROTO_IPV4)
+		parent = hashlimit_net->ipt_hashlimit;
+	else
+		parent = hashlimit_net->ip6t_hashlimit;
+	remove_proc_entry(hinfo->pde->name, parent);
 	htable_selective_cleanup(hinfo, select_all);
 	vfree(hinfo);
 }
 
-static struct xt_hashlimit_htable *htable_find_get(const char *name,
+static struct xt_hashlimit_htable *htable_find_get(struct net *net,
+						   const char *name,
 						   u_int8_t family)
 {
+	struct hashlimit_net *hashlimit_net = hashlimit_pernet(net);
 	struct xt_hashlimit_htable *hinfo;
 	struct hlist_node *pos;
 
 	spin_lock_bh(&hashlimit_lock);
-	hlist_for_each_entry(hinfo, pos, &hashlimit_htables, node) {
+	hlist_for_each_entry(hinfo, pos, &hashlimit_net->htables, node) {
 		if (!strcmp(name, hinfo->pde->name) &&
 		    hinfo->family == family) {
 			atomic_inc(&hinfo->use);
@@ -665,6 +687,7 @@ hashlimit_mt(const struct sk_buff *skb, const struct xt_match_param *par)
 
 static bool hashlimit_mt_check_v0(const struct xt_mtchk_param *par)
 {
+	struct net *net = par->net;
 	struct xt_hashlimit_info *r = par->matchinfo;
 
 	/* Check for overflow. */
@@ -694,8 +717,8 @@ static bool hashlimit_mt_check_v0(const struct xt_mtchk_param *par)
 	 * the list of htable's in htable_create(), since then we would
 	 * create duplicate proc files. -HW */
 	mutex_lock(&hlimit_mutex);
-	r->hinfo = htable_find_get(r->name, par->match->family);
-	if (!r->hinfo && htable_create_v0(r, par->match->family) != 0) {
+	r->hinfo = htable_find_get(net, r->name, par->match->family);
+	if (!r->hinfo && htable_create_v0(net, r, par->match->family) != 0) {
 		mutex_unlock(&hlimit_mutex);
 		return false;
 	}
@@ -706,6 +729,7 @@ static bool hashlimit_mt_check_v0(const struct xt_mtchk_param *par)
 
 static bool hashlimit_mt_check(const struct xt_mtchk_param *par)
 {
+	struct net *net = par->net;
 	struct xt_hashlimit_mtinfo1 *info = par->matchinfo;
 
 	/* Check for overflow. */
@@ -735,8 +759,8 @@ static bool hashlimit_mt_check(const struct xt_mtchk_param *par)
 	 * the list of htable's in htable_create(), since then we would
 	 * create duplicate proc files. -HW */
 	mutex_lock(&hlimit_mutex);
-	info->hinfo = htable_find_get(info->name, par->match->family);
-	if (!info->hinfo && htable_create(info, par->match->family) != 0) {
+	info->hinfo = htable_find_get(net, info->name, par->match->family);
+	if (!info->hinfo && htable_create(net, info, par->match->family) != 0) {
 		mutex_unlock(&hlimit_mutex);
 		return false;
 	}
@@ -953,10 +977,61 @@ static const struct file_operations dl_file_ops = {
 	.release = seq_release
 };
 
+static int __net_init hashlimit_proc_net_init(struct net *net)
+{
+	struct hashlimit_net *hashlimit_net = hashlimit_pernet(net);
+
+	hashlimit_net->ipt_hashlimit = proc_mkdir("ipt_hashlimit", net->proc_net);
+	if (!hashlimit_net->ipt_hashlimit)
+		return -ENOMEM;
+#if defined(CONFIG_IP6_NF_IPTABLES) || defined(CONFIG_IP6_NF_IPTABLES_MODULE)
+	hashlimit_net->ip6t_hashlimit = proc_mkdir("ip6t_hashlimit", net->proc_net);
+	if (!hashlimit_net->ip6t_hashlimit) {
+		proc_net_remove(net, "ipt_hashlimit");
+		return -ENOMEM;
+	}
+#endif
+	return 0;
+}
+
+static void __net_exit hashlimit_proc_net_exit(struct net *net)
+{
+	proc_net_remove(net, "ipt_hashlimit");
+#if defined(CONFIG_IP6_NF_IPTABLES) || defined(CONFIG_IP6_NF_IPTABLES_MODULE)
+	proc_net_remove(net, "ip6t_hashlimit");
+#endif
+}
+
+static int __net_init hashlimit_net_init(struct net *net)
+{
+	struct hashlimit_net *hashlimit_net = hashlimit_pernet(net);
+
+	INIT_HLIST_HEAD(&hashlimit_net->htables);
+	return hashlimit_proc_net_init(net);
+}
+
+static void __net_exit hashlimit_net_exit(struct net *net)
+{
+	struct hashlimit_net *hashlimit_net = hashlimit_pernet(net);
+
+	BUG_ON(!hlist_empty(&hashlimit_net->htables));
+	hashlimit_proc_net_exit(net);
+}
+
+static struct pernet_operations hashlimit_net_ops = {
+	.init	= hashlimit_net_init,
+	.exit	= hashlimit_net_exit,
+	.id	= &hashlimit_net_id,
+	.size	= sizeof(struct hashlimit_net),
+};
+
 static int __init hashlimit_mt_init(void)
 {
 	int err;
 
+	err = register_pernet_subsys(&hashlimit_net_ops);
+	if (err < 0)
+		return err;
 	err = xt_register_matches(hashlimit_mt_reg,
 	      ARRAY_SIZE(hashlimit_mt_reg));
 	if (err < 0)
@@ -970,41 +1045,21 @@ static int __init hashlimit_mt_init(void)
 		printk(KERN_ERR "xt_hashlimit: unable to create slab cache\n");
 		goto err2;
 	}
-	hashlimit_procdir4 = proc_mkdir("ipt_hashlimit", init_net.proc_net);
-	if (!hashlimit_procdir4) {
-		printk(KERN_ERR "xt_hashlimit: unable to create proc dir "
-				"entry\n");
-		goto err3;
-	}
-	err = 0;
-#if defined(CONFIG_IP6_NF_IPTABLES) || defined(CONFIG_IP6_NF_IPTABLES_MODULE)
-	hashlimit_procdir6 = proc_mkdir("ip6t_hashlimit", init_net.proc_net);
-	if (!hashlimit_procdir6) {
-		printk(KERN_ERR "xt_hashlimit: unable to create proc dir "
-				"entry\n");
-		err = -ENOMEM;
-	}
-#endif
-	if (!err)
-		return 0;
-	remove_proc_entry("ipt_hashlimit", init_net.proc_net);
-err3:
-	kmem_cache_destroy(hashlimit_cachep);
+	return 0;
+
 err2:
 	xt_unregister_matches(hashlimit_mt_reg, ARRAY_SIZE(hashlimit_mt_reg));
 err1:
+	unregister_pernet_subsys(&hashlimit_net_ops);
 	return err;
 
 }
 
 static void __exit hashlimit_mt_exit(void)
 {
-	remove_proc_entry("ipt_hashlimit", init_net.proc_net);
-#if defined(CONFIG_IP6_NF_IPTABLES) || defined(CONFIG_IP6_NF_IPTABLES_MODULE)
-	remove_proc_entry("ip6t_hashlimit", init_net.proc_net);
-#endif
 	kmem_cache_destroy(hashlimit_cachep);
 	xt_unregister_matches(hashlimit_mt_reg, ARRAY_SIZE(hashlimit_mt_reg));
+	unregister_pernet_subsys(&hashlimit_net_ops);
 }
 
 module_init(hashlimit_mt_init);
-- 
cgit v1.2.2


From a5d896adf019143adf72d08826fe5359b6a8762c Mon Sep 17 00:00:00 2001
From: Eric Leblond <eric@inl.fr>
Date: Mon, 18 Jan 2010 09:44:39 +0100
Subject: netfilter: nfnetlink_queue: simplify warning message

This patch remove variable part from a debug message to have
message concatenation from syslog.

Signed-off-by: Eric Leblond <eric@inl.fr>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 net/netfilter/nfnetlink_queue.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c
index 5c589b27d6eb..90cf36decea3 100644
--- a/net/netfilter/nfnetlink_queue.c
+++ b/net/netfilter/nfnetlink_queue.c
@@ -414,8 +414,8 @@ nfqnl_enqueue_packet(struct nf_queue_entry *entry, unsigned int queuenum)
 		queue->queue_dropped++;
 		if (net_ratelimit())
 			  printk(KERN_WARNING "nf_queue: full at %d entries, "
-				 "dropping packets(s). Dropped: %d\n",
-				 queue->queue_total, queue->queue_dropped);
+				 "dropping packets(s).\n",
+				 queue->queue_total);
 		goto err_out_free_nskb;
 	}
 
-- 
cgit v1.2.2


From 6d955180b2f9ccff444df06265160868cabb289a Mon Sep 17 00:00:00 2001
From: Octavian Purdila <opurdila@ixiacom.com>
Date: Mon, 18 Jan 2010 12:58:44 +0000
Subject: ipv4: allow warming up the ARP cache with request type gratuitous ARP

If the per device ARP_ACCEPT option is enable, currently we only allow
creating new ARP cache entries for response type gratuitous ARP.

Allowing gratuitous ARP to create new ARP entries (not only to update
existing ones) is useful when we want to avoid unnecessary delays for
the first packet of a stream.

This patch allows request type gratuitous ARP to create new ARP cache
entries as well. This is useful when we want to populate the ARP cache
entries for a large number of hosts on the same LAN.

Signed-off-by: Octavian Purdila <opurdila@ixiacom.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/arp.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c
index 078709233bc4..1940b4df7699 100644
--- a/net/ipv4/arp.c
+++ b/net/ipv4/arp.c
@@ -907,7 +907,8 @@ static int arp_process(struct sk_buff *skb)
 		   devices (strip is candidate)
 		 */
 		if (n == NULL &&
-		    arp->ar_op == htons(ARPOP_REPLY) &&
+		    (arp->ar_op == htons(ARPOP_REPLY) ||
+		     (arp->ar_op == htons(ARPOP_REQUEST) && tip == sip)) &&
 		    inet_addr_type(net, sip) == RTN_UNICAST)
 			n = __neigh_lookup(&arp_tbl, &sip, dev, 1);
 	}
-- 
cgit v1.2.2


From 135d01899b1fba17045961febff7e5141db6048f Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Tue, 19 Jan 2010 19:06:59 +0100
Subject: netfilter: nf_conntrack_sip: fix off-by-one in compact header parsing

In a string like "v:SIP/2.0..." it was checking for !isalpha('S') when it
meant to be inspecting the ':'.

Patch by Greg Alexander <greqcs@galexander.org>

Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 net/netfilter/nf_conntrack_sip.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/netfilter/nf_conntrack_sip.c b/net/netfilter/nf_conntrack_sip.c
index 4b572163784b..023966b569bf 100644
--- a/net/netfilter/nf_conntrack_sip.c
+++ b/net/netfilter/nf_conntrack_sip.c
@@ -376,7 +376,7 @@ int ct_sip_get_header(const struct nf_conn *ct, const char *dptr,
 			dptr += hdr->len;
 		else if (hdr->cname && limit - dptr >= hdr->clen + 1 &&
 			 strnicmp(dptr, hdr->cname, hdr->clen) == 0 &&
-			 !isalpha(*(dptr + hdr->clen + 1)))
+			 !isalpha(*(dptr + hdr->clen)))
 			dptr += hdr->clen;
 		else
 			continue;
-- 
cgit v1.2.2


From 861a57cd01f97e984320b5aeeee019ede48c714d Mon Sep 17 00:00:00 2001
From: Felix Fietkau <nbd@openwrt.org>
Date: Tue, 12 Jan 2010 04:08:26 +0100
Subject: mac80211: fix WMM AC default for non-QoS data frames

The WMM AC selection added to the monitor mode selection function
accidentally assigns non-QoS data frames to the same AC as mgmt frames
(VO). This is not serious, but should be fixed anyway. This patch
assigns them to the BE AC instead.

Signed-off-by: Felix Fietkau <nbd@openwrt.org>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/iface.c | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c
index edf21cebeee8..09fff4662e80 100644
--- a/net/mac80211/iface.c
+++ b/net/mac80211/iface.c
@@ -695,10 +695,14 @@ static u16 ieee80211_monitor_select_queue(struct net_device *dev,
 
 	hdr = (void *)((u8 *)skb->data + le16_to_cpu(rtap->it_len));
 
-	if (!ieee80211_is_data_qos(hdr->frame_control)) {
+	if (!ieee80211_is_data(hdr->frame_control)) {
 		skb->priority = 7;
 		return ieee802_1d_to_ac[skb->priority];
 	}
+	if (!ieee80211_is_data_qos(hdr->frame_control)) {
+		skb->priority = 0;
+		return ieee802_1d_to_ac[skb->priority];
+	}
 
 	p = ieee80211_get_qos_ctl(hdr);
 	skb->priority = *p & IEEE80211_QOS_CTL_TAG1D_MASK;
-- 
cgit v1.2.2


From 8e9310c1790566ea2de2e8b6e1c04bacbbee648c Mon Sep 17 00:00:00 2001
From: Bob Copeland <me@bobcopeland.com>
Date: Sat, 16 Jan 2010 14:36:53 -0500
Subject: mac80211: pid: replace open-coded msecs_to_jiffies

Code directly scaling by HZ and rounding can be more efficiently
and clearly performed with msecs_to_jiffies.

Signed-off-by: Bob Copeland <me@bobcopeland.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/rc80211_pid_algo.c | 8 ++------
 1 file changed, 2 insertions(+), 6 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/rc80211_pid_algo.c b/net/mac80211/rc80211_pid_algo.c
index 29bc4c516238..2652a374974e 100644
--- a/net/mac80211/rc80211_pid_algo.c
+++ b/net/mac80211/rc80211_pid_algo.c
@@ -157,9 +157,7 @@ static void rate_control_pid_sample(struct rc_pid_info *pinfo,
 
 	/* In case nothing happened during the previous control interval, turn
 	 * the sharpening factor on. */
-	period = (HZ * pinfo->sampling_period + 500) / 1000;
-	if (!period)
-		period = 1;
+	period = msecs_to_jiffies(pinfo->sampling_period);
 	if (jiffies - spinfo->last_sample > 2 * period)
 		spinfo->sharp_cnt = pinfo->sharpen_duration;
 
@@ -252,9 +250,7 @@ static void rate_control_pid_tx_status(void *priv, struct ieee80211_supported_ba
 	}
 
 	/* Update PID controller state. */
-	period = (HZ * pinfo->sampling_period + 500) / 1000;
-	if (!period)
-		period = 1;
+	period = msecs_to_jiffies(pinfo->sampling_period);
 	if (time_after(jiffies, spinfo->last_sample + period))
 		rate_control_pid_sample(pinfo, sband, sta, spinfo);
 }
-- 
cgit v1.2.2


From edc6ccb7b992bd9ea5db4555c8f0bf74c656f964 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes@sipsolutions.net>
Date: Sun, 17 Jan 2010 01:47:55 +0100
Subject: mac80211: move and rename misc tx handler

This TX handler is used only for assigning the
station pointer in the control information, so
give it a better name. Also move it before rate
control.

Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/tx.c | 24 ++++++++++++------------
 1 file changed, 12 insertions(+), 12 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
index daf81048c1f7..a74ab797fed9 100644
--- a/net/mac80211/tx.c
+++ b/net/mac80211/tx.c
@@ -559,6 +559,17 @@ ieee80211_tx_h_select_key(struct ieee80211_tx_data *tx)
 	return TX_CONTINUE;
 }
 
+static ieee80211_tx_result debug_noinline
+ieee80211_tx_h_sta(struct ieee80211_tx_data *tx)
+{
+	struct ieee80211_tx_info *info = IEEE80211_SKB_CB(tx->skb);
+
+	if (tx->sta)
+		info->control.sta = &tx->sta->sta;
+
+	return TX_CONTINUE;
+}
+
 static ieee80211_tx_result debug_noinline
 ieee80211_tx_h_rate_ctrl(struct ieee80211_tx_data *tx)
 {
@@ -733,17 +744,6 @@ ieee80211_tx_h_rate_ctrl(struct ieee80211_tx_data *tx)
 	return TX_CONTINUE;
 }
 
-static ieee80211_tx_result debug_noinline
-ieee80211_tx_h_misc(struct ieee80211_tx_data *tx)
-{
-	struct ieee80211_tx_info *info = IEEE80211_SKB_CB(tx->skb);
-
-	if (tx->sta)
-		info->control.sta = &tx->sta->sta;
-
-	return TX_CONTINUE;
-}
-
 static ieee80211_tx_result debug_noinline
 ieee80211_tx_h_sequence(struct ieee80211_tx_data *tx)
 {
@@ -1292,10 +1292,10 @@ static int invoke_tx_handlers(struct ieee80211_tx_data *tx)
 	CALL_TXH(ieee80211_tx_h_check_assoc);
 	CALL_TXH(ieee80211_tx_h_ps_buf);
 	CALL_TXH(ieee80211_tx_h_select_key);
+	CALL_TXH(ieee80211_tx_h_sta);
 	CALL_TXH(ieee80211_tx_h_michael_mic_add);
 	if (!(tx->local->hw.flags & IEEE80211_HW_HAS_RATE_CONTROL))
 		CALL_TXH(ieee80211_tx_h_rate_ctrl);
-	CALL_TXH(ieee80211_tx_h_misc);
 	CALL_TXH(ieee80211_tx_h_sequence);
 	CALL_TXH(ieee80211_tx_h_fragment);
 	/* handlers after fragment must be aware of tx info fragmentation! */
-- 
cgit v1.2.2


From 697e6a0fb0c8783695d4b4a5d7131476b296d623 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes@sipsolutions.net>
Date: Sun, 17 Jan 2010 01:47:56 +0100
Subject: mac80211: clear TX control on filtered frames

When an skb survived a round-trip through the driver
and needs to be re-used, its control information is
definitely not valid any more, the driver will have
overwritten it.

Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/status.c | 8 ++++++++
 1 file changed, 8 insertions(+)

(limited to 'net')

diff --git a/net/mac80211/status.c b/net/mac80211/status.c
index 0ebcdda24200..9e171b178276 100644
--- a/net/mac80211/status.c
+++ b/net/mac80211/status.c
@@ -69,6 +69,14 @@ static void ieee80211_handle_filtered_frame(struct ieee80211_local *local,
 	 */
 	goto drop;
 
+	/*
+	 * This skb 'survived' a round-trip through the driver, and
+	 * hopefully the driver didn't mangle it too badly. However,
+	 * we can definitely not rely on the the control information
+	 * being correct. Clear it so we don't get junk there.
+	 */
+	memset(&info->control, 0, sizeof(info->control));
+
 	sta->tx_filtered_count++;
 
 	/*
-- 
cgit v1.2.2


From a6bae9e7ab19876a157c91019852395539e4f20e Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes@sipsolutions.net>
Date: Sun, 17 Jan 2010 01:47:57 +0100
Subject: mac80211: remove useless setting of IEEE80211_TX_INTFL_DONT_ENCRYPT

There's no value in setting a flag that will
never be checked after this point, this seems
to be legacy code -- I think previously the
flag was used to check whether to encrypt the
frame or not. Now, however, the flag need not
be set, and setting it actually interferes if
the frame will be processed again later.

Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/tx.c | 3 ---
 1 file changed, 3 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
index a74ab797fed9..9afbee0d53c0 100644
--- a/net/mac80211/tx.c
+++ b/net/mac80211/tx.c
@@ -553,9 +553,6 @@ ieee80211_tx_h_select_key(struct ieee80211_tx_data *tx)
 		}
 	}
 
-	if (!tx->key || !(tx->key->flags & KEY_FLAG_UPLOADED_TO_HARDWARE))
-		info->flags |= IEEE80211_TX_INTFL_DONT_ENCRYPT;
-
 	return TX_CONTINUE;
 }
 
-- 
cgit v1.2.2


From 813d76694043d00b59475baa1fbfaf54a2eb7fad Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes@sipsolutions.net>
Date: Sun, 17 Jan 2010 01:47:58 +0100
Subject: mac80211: move control.hw_key assignment

When mac80211 asks a driver to encrypt a frame, it
must assign the control.hw_key pointer for it to
know which key to use etc. Currently, mac80211 does
this whenever it would software-encrypt a frame.

Change the logic of this code to assign the hw_key
pointer when selecting the key, and later check it
when deciding whether to encrypt the frame or let
it be encrypted by the hardware. This allows us to
later simply skip the encryption function since it
no longer modifies the TX control.

Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/tkip.c | 11 ++++++-----
 net/mac80211/tx.c   |  9 +++++++++
 net/mac80211/wep.c  | 18 ++++++++---------
 net/mac80211/wpa.c  | 57 +++++++++++++++++++++--------------------------------
 4 files changed, 46 insertions(+), 49 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/tkip.c b/net/mac80211/tkip.c
index b73454a507f9..14fe49332c02 100644
--- a/net/mac80211/tkip.c
+++ b/net/mac80211/tkip.c
@@ -195,11 +195,13 @@ void ieee80211_get_tkip_key(struct ieee80211_key_conf *keyconf,
 }
 EXPORT_SYMBOL(ieee80211_get_tkip_key);
 
-/* Encrypt packet payload with TKIP using @key. @pos is a pointer to the
+/*
+ * Encrypt packet payload with TKIP using @key. @pos is a pointer to the
  * beginning of the buffer containing payload. This payload must include
- * headroom of eight octets for IV and Ext. IV and taildroom of four octets
- * for ICV. @payload_len is the length of payload (_not_ including extra
- * headroom and tailroom). @ta is the transmitter addresses. */
+ * the IV/Ext.IV and space for (taildroom) four octets for ICV.
+ * @payload_len is the length of payload (_not_ including IV/ICV length).
+ * @ta is the transmitter addresses.
+ */
 void ieee80211_tkip_encrypt_data(struct crypto_blkcipher *tfm,
 				 struct ieee80211_key *key,
 				 u8 *pos, size_t payload_len, u8 *ta)
@@ -214,7 +216,6 @@ void ieee80211_tkip_encrypt_data(struct crypto_blkcipher *tfm,
 
 	tkip_mixing_phase2(tk, ctx, ctx->iv16, rc4key);
 
-	pos = ieee80211_tkip_add_iv(pos, key, key->u.tkip.tx.iv16);
 	ieee80211_wep_encrypt_data(tfm, rc4key, 16, pos, payload_len);
 }
 
diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
index 9afbee0d53c0..e3d8ff533ee6 100644
--- a/net/mac80211/tx.c
+++ b/net/mac80211/tx.c
@@ -529,6 +529,8 @@ ieee80211_tx_h_select_key(struct ieee80211_tx_data *tx)
 		tx->key = NULL;
 
 	if (tx->key) {
+		bool skip_hw = false;
+
 		tx->key->tx_rx_count++;
 		/* TODO: add threshold stuff again */
 
@@ -545,12 +547,19 @@ ieee80211_tx_h_select_key(struct ieee80211_tx_data *tx)
 			    !ieee80211_use_mfp(hdr->frame_control, tx->sta,
 					       tx->skb))
 				tx->key = NULL;
+			skip_hw = (tx->key->conf.flags &
+						IEEE80211_KEY_FLAG_SW_MGMT) &&
+				   ieee80211_is_mgmt(hdr->frame_control);
 			break;
 		case ALG_AES_CMAC:
 			if (!ieee80211_is_mgmt(hdr->frame_control))
 				tx->key = NULL;
 			break;
 		}
+
+		if (!skip_hw &&
+		    tx->key->conf.flags & KEY_FLAG_UPLOADED_TO_HARDWARE)
+			info->control.hw_key = &tx->key->conf;
 	}
 
 	return TX_CONTINUE;
diff --git a/net/mac80211/wep.c b/net/mac80211/wep.c
index 247123fe1a7a..0a4c641c9605 100644
--- a/net/mac80211/wep.c
+++ b/net/mac80211/wep.c
@@ -305,20 +305,20 @@ static int wep_encrypt_skb(struct ieee80211_tx_data *tx, struct sk_buff *skb)
 {
 	struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb);
 
-	if (!(tx->key->flags & KEY_FLAG_UPLOADED_TO_HARDWARE)) {
+	if (!info->control.hw_key) {
 		if (ieee80211_wep_encrypt(tx->local, skb, tx->key->conf.key,
 					  tx->key->conf.keylen,
 					  tx->key->conf.keyidx))
 			return -1;
-	} else {
-		info->control.hw_key = &tx->key->conf;
-		if (tx->key->conf.flags & IEEE80211_KEY_FLAG_GENERATE_IV) {
-			if (!ieee80211_wep_add_iv(tx->local, skb,
-						  tx->key->conf.keylen,
-						  tx->key->conf.keyidx))
-				return -1;
-		}
 	}
+
+	if (info->control.hw_key->flags & IEEE80211_KEY_FLAG_GENERATE_IV) {
+		if (!ieee80211_wep_add_iv(tx->local, skb,
+					  tx->key->conf.keylen,
+					  tx->key->conf.keyidx))
+			return -1;
+	}
+
 	return 0;
 }
 
diff --git a/net/mac80211/wpa.c b/net/mac80211/wpa.c
index 5332014cb229..f4971cd45c64 100644
--- a/net/mac80211/wpa.c
+++ b/net/mac80211/wpa.c
@@ -31,8 +31,8 @@ ieee80211_tx_h_michael_mic_add(struct ieee80211_tx_data *tx)
 	unsigned int hdrlen;
 	struct ieee80211_hdr *hdr;
 	struct sk_buff *skb = tx->skb;
+	struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb);
 	int authenticator;
-	int wpa_test = 0;
 	int tail;
 
 	hdr = (struct ieee80211_hdr *)skb->data;
@@ -47,16 +47,15 @@ ieee80211_tx_h_michael_mic_add(struct ieee80211_tx_data *tx)
 	data = skb->data + hdrlen;
 	data_len = skb->len - hdrlen;
 
-	if ((tx->key->flags & KEY_FLAG_UPLOADED_TO_HARDWARE) &&
+	if (info->control.hw_key &&
 	    !(tx->flags & IEEE80211_TX_FRAGMENTED) &&
-	    !(tx->key->conf.flags & IEEE80211_KEY_FLAG_GENERATE_MMIC) &&
-	    !wpa_test) {
-		/* hwaccel - with no need for preallocated room for MMIC */
+	    !(tx->key->conf.flags & IEEE80211_KEY_FLAG_GENERATE_MMIC)) {
+		/* hwaccel - with no need for SW-generated MMIC */
 		return TX_CONTINUE;
 	}
 
 	tail = MICHAEL_MIC_LEN;
-	if (!(tx->key->flags & KEY_FLAG_UPLOADED_TO_HARDWARE))
+	if (!info->control.hw_key)
 		tail += TKIP_ICV_LEN;
 
 	if (WARN_ON(skb_tailroom(skb) < tail ||
@@ -147,17 +146,16 @@ static int tkip_encrypt_skb(struct ieee80211_tx_data *tx, struct sk_buff *skb)
 	int len, tail;
 	u8 *pos;
 
-	if ((tx->key->flags & KEY_FLAG_UPLOADED_TO_HARDWARE) &&
-	    !(tx->key->conf.flags & IEEE80211_KEY_FLAG_GENERATE_IV)) {
-		/* hwaccel - with no need for preallocated room for IV/ICV */
-		info->control.hw_key = &tx->key->conf;
+	if (info->control.hw_key &&
+	    !(info->control.hw_key->flags & IEEE80211_KEY_FLAG_GENERATE_IV)) {
+		/* hwaccel - with no need for software-generated IV */
 		return 0;
 	}
 
 	hdrlen = ieee80211_hdrlen(hdr->frame_control);
 	len = skb->len - hdrlen;
 
-	if (tx->key->flags & KEY_FLAG_UPLOADED_TO_HARDWARE)
+	if (info->control.hw_key)
 		tail = 0;
 	else
 		tail = TKIP_ICV_LEN;
@@ -175,13 +173,11 @@ static int tkip_encrypt_skb(struct ieee80211_tx_data *tx, struct sk_buff *skb)
 	if (key->u.tkip.tx.iv16 == 0)
 		key->u.tkip.tx.iv32++;
 
-	if (tx->key->flags & KEY_FLAG_UPLOADED_TO_HARDWARE) {
-		/* hwaccel - with preallocated room for IV */
-		ieee80211_tkip_add_iv(pos, key, key->u.tkip.tx.iv16);
+	pos = ieee80211_tkip_add_iv(pos, key, key->u.tkip.tx.iv16);
 
-		info->control.hw_key = &tx->key->conf;
+	/* hwaccel - with software IV */
+	if (info->control.hw_key)
 		return 0;
-	}
 
 	/* Add room for ICV */
 	skb_put(skb, TKIP_ICV_LEN);
@@ -363,24 +359,20 @@ static int ccmp_encrypt_skb(struct ieee80211_tx_data *tx, struct sk_buff *skb)
 	int hdrlen, len, tail;
 	u8 *pos, *pn;
 	int i;
-	bool skip_hw;
-
-	skip_hw = (tx->key->conf.flags & IEEE80211_KEY_FLAG_SW_MGMT) &&
-		ieee80211_is_mgmt(hdr->frame_control);
 
-	if ((tx->key->flags & KEY_FLAG_UPLOADED_TO_HARDWARE) &&
-	    !(tx->key->conf.flags & IEEE80211_KEY_FLAG_GENERATE_IV) &&
-	    !skip_hw) {
-		/* hwaccel - with no need for preallocated room for CCMP
-		 * header or MIC fields */
-		info->control.hw_key = &tx->key->conf;
+	if (info->control.hw_key &&
+	    !(info->control.hw_key->flags & IEEE80211_KEY_FLAG_GENERATE_IV)) {
+		/*
+		 * hwaccel has no need for preallocated room for CCMP
+		 * header or MIC fields
+		 */
 		return 0;
 	}
 
 	hdrlen = ieee80211_hdrlen(hdr->frame_control);
 	len = skb->len - hdrlen;
 
-	if (key->flags & KEY_FLAG_UPLOADED_TO_HARDWARE)
+	if (info->control.hw_key)
 		tail = 0;
 	else
 		tail = CCMP_MIC_LEN;
@@ -405,11 +397,9 @@ static int ccmp_encrypt_skb(struct ieee80211_tx_data *tx, struct sk_buff *skb)
 
 	ccmp_pn2hdr(pos, pn, key->conf.keyidx);
 
-	if ((key->flags & KEY_FLAG_UPLOADED_TO_HARDWARE) && !skip_hw) {
-		/* hwaccel - with preallocated room for CCMP header */
-		info->control.hw_key = &tx->key->conf;
+	/* hwaccel - with software CCMP header */
+	if (info->control.hw_key)
 		return 0;
-	}
 
 	pos += CCMP_HDR_LEN;
 	ccmp_special_blocks(skb, pn, key->u.ccmp.tx_crypto_buf, 0);
@@ -525,11 +515,8 @@ ieee80211_crypto_aes_cmac_encrypt(struct ieee80211_tx_data *tx)
 	u8 *pn, aad[20];
 	int i;
 
-	if (tx->key->flags & KEY_FLAG_UPLOADED_TO_HARDWARE) {
-		/* hwaccel */
-		info->control.hw_key = &tx->key->conf;
+	if (info->control.hw_key)
 		return 0;
-	}
 
 	if (WARN_ON(skb_tailroom(skb) < sizeof(*mmie)))
 		return TX_DROP;
-- 
cgit v1.2.2


From c6fcf6bcfc3cfc1c00cc7fd9610cfa2b1a18041f Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes@sipsolutions.net>
Date: Sun, 17 Jan 2010 01:47:59 +0100
Subject: mac80211: re-enable re-transmission of filtered frames

In an earlier commit,

    mac80211: disable software retry for now

    Pavel Roskin reported a problem that seems to be due to
    software retry of already transmitted frames. It turns
    out that we've never done that correctly, but due to
    some recent changes it now crashes in the TX code. I've
    added a comment in the patch that explains the problem
    better and also points to possible solutions -- which
    I can't implement right now.

I disabled software retry of failed/filtered frames
because it was broken. With the work of the previous
patches, it now becomes fairly easy to re-enable it
by adding a flag indicating that the frame shouldn't
be modified, but still running it through the transmit
handlers to populate the control information.

Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/status.c | 32 +++++---------------------------
 net/mac80211/tx.c     |  7 ++++++-
 2 files changed, 11 insertions(+), 28 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/status.c b/net/mac80211/status.c
index 9e171b178276..800b6777e0ed 100644
--- a/net/mac80211/status.c
+++ b/net/mac80211/status.c
@@ -44,38 +44,17 @@ static void ieee80211_handle_filtered_frame(struct ieee80211_local *local,
 {
 	struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb);
 
-	/*
-	 * XXX: This is temporary!
-	 *
-	 *	The problem here is that when we get here, the driver will
-	 *	quite likely have pretty much overwritten info->control by
-	 *	using info->driver_data or info->rate_driver_data. Thus,
-	 *	when passing out the frame to the driver again, we would be
-	 *	passing completely bogus data since the driver would then
-	 *	expect a properly filled info->control. In mac80211 itself
-	 *	the same problem occurs, since we need info->control.vif
-	 *	internally.
-	 *
-	 *	To fix this, we should send the frame through TX processing
-	 *	again. However, it's not that simple, since the frame will
-	 *	have been software-encrypted (if applicable) already, and
-	 *	encrypting it again doesn't do much good. So to properly do
-	 *	that, we not only have to skip the actual 'raw' encryption
-	 *	(key selection etc. still has to be done!) but also the
-	 *	sequence number assignment since that impacts the crypto
-	 *	encapsulation, of course.
-	 *
-	 *	Hence, for now, fix the bug by just dropping the frame.
-	 */
-	goto drop;
-
 	/*
 	 * This skb 'survived' a round-trip through the driver, and
 	 * hopefully the driver didn't mangle it too badly. However,
 	 * we can definitely not rely on the the control information
-	 * being correct. Clear it so we don't get junk there.
+	 * being correct. Clear it so we don't get junk there, and
+	 * indicate that it needs new processing, but must not be
+	 * modified/encrypted again.
 	 */
 	memset(&info->control, 0, sizeof(info->control));
+	info->flags |= IEEE80211_TX_INTFL_NEED_TXPROCESSING |
+		       IEEE80211_TX_INTFL_RETRANSMISSION;
 
 	sta->tx_filtered_count++;
 
@@ -130,7 +109,6 @@ static void ieee80211_handle_filtered_frame(struct ieee80211_local *local,
 		return;
 	}
 
- drop:
 #ifdef CONFIG_MAC80211_VERBOSE_DEBUG
 	if (net_ratelimit())
 		printk(KERN_DEBUG "%s: dropped TX filtered frame, "
diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
index e3d8ff533ee6..da557b0d0114 100644
--- a/net/mac80211/tx.c
+++ b/net/mac80211/tx.c
@@ -1285,6 +1285,7 @@ static int __ieee80211_tx(struct ieee80211_local *local,
 static int invoke_tx_handlers(struct ieee80211_tx_data *tx)
 {
 	struct sk_buff *skb = tx->skb;
+	struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb);
 	ieee80211_tx_result res = TX_DROP;
 
 #define CALL_TXH(txh) \
@@ -1299,9 +1300,13 @@ static int invoke_tx_handlers(struct ieee80211_tx_data *tx)
 	CALL_TXH(ieee80211_tx_h_ps_buf);
 	CALL_TXH(ieee80211_tx_h_select_key);
 	CALL_TXH(ieee80211_tx_h_sta);
-	CALL_TXH(ieee80211_tx_h_michael_mic_add);
 	if (!(tx->local->hw.flags & IEEE80211_HW_HAS_RATE_CONTROL))
 		CALL_TXH(ieee80211_tx_h_rate_ctrl);
+
+	if (unlikely(info->flags & IEEE80211_TX_INTFL_RETRANSMISSION))
+		goto txh_done;
+
+	CALL_TXH(ieee80211_tx_h_michael_mic_add);
 	CALL_TXH(ieee80211_tx_h_sequence);
 	CALL_TXH(ieee80211_tx_h_fragment);
 	/* handlers after fragment must be aware of tx info fragmentation! */
-- 
cgit v1.2.2


From 5f2aa25e0e5b221a176ab3d1c51d51da265cb4a7 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes@sipsolutions.net>
Date: Sun, 17 Jan 2010 15:49:02 +0100
Subject: cfg80211: rcu-ify rdev and wdev

Future code will need to look up rdev and wdev
within atomic sections, but currently we need
to lock a mutex for such lookups. Change the
list handling for both to be RCU-safe so that
we can look them up in rcu sections instead in
the future.

Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/wireless/core.c | 26 +++++++++++++++-----------
 net/wireless/core.h |  3 ++-
 2 files changed, 17 insertions(+), 12 deletions(-)

(limited to 'net')

diff --git a/net/wireless/core.c b/net/wireless/core.c
index 20db90246de5..d07f57c906db 100644
--- a/net/wireless/core.c
+++ b/net/wireless/core.c
@@ -1,7 +1,7 @@
 /*
  * This is the linux wireless configuration interface.
  *
- * Copyright 2006-2009		Johannes Berg <johannes@sipsolutions.net>
+ * Copyright 2006-2010		Johannes Berg <johannes@sipsolutions.net>
  */
 
 #include <linux/if.h>
@@ -31,15 +31,10 @@ MODULE_AUTHOR("Johannes Berg");
 MODULE_LICENSE("GPL");
 MODULE_DESCRIPTION("wireless configuration support");
 
-/* RCU might be appropriate here since we usually
- * only read the list, and that can happen quite
- * often because we need to do it for each command */
+/* RCU-protected (and cfg80211_mutex for writers) */
 LIST_HEAD(cfg80211_rdev_list);
 int cfg80211_rdev_list_generation;
 
-/*
- * This is used to protect the cfg80211_rdev_list
- */
 DEFINE_MUTEX(cfg80211_mutex);
 
 /* for debugfs */
@@ -477,7 +472,7 @@ int wiphy_register(struct wiphy *wiphy)
 	/* set up regulatory info */
 	wiphy_update_regulatory(wiphy, NL80211_REGDOM_SET_BY_CORE);
 
-	list_add(&rdev->list, &cfg80211_rdev_list);
+	list_add_rcu(&rdev->list, &cfg80211_rdev_list);
 	cfg80211_rdev_list_generation++;
 
 	mutex_unlock(&cfg80211_mutex);
@@ -554,7 +549,8 @@ void wiphy_unregister(struct wiphy *wiphy)
 	 * it impossible to find from userspace.
 	 */
 	debugfs_remove_recursive(rdev->wiphy.debugfsdir);
-	list_del(&rdev->list);
+	list_del_rcu(&rdev->list);
+	synchronize_rcu();
 
 	/*
 	 * Try to grab rdev->mtx. If a command is still in progress,
@@ -670,7 +666,7 @@ static int cfg80211_netdev_notifier_call(struct notifier_block * nb,
 		INIT_LIST_HEAD(&wdev->event_list);
 		spin_lock_init(&wdev->event_lock);
 		mutex_lock(&rdev->devlist_mtx);
-		list_add(&wdev->list, &rdev->netdev_list);
+		list_add_rcu(&wdev->list, &rdev->netdev_list);
 		rdev->devlist_generation++;
 		/* can only change netns with wiphy */
 		dev->features |= NETIF_F_NETNS_LOCAL;
@@ -782,13 +778,21 @@ static int cfg80211_netdev_notifier_call(struct notifier_block * nb,
 		 */
 		if (!list_empty(&wdev->list)) {
 			sysfs_remove_link(&dev->dev.kobj, "phy80211");
-			list_del_init(&wdev->list);
+			list_del_rcu(&wdev->list);
 			rdev->devlist_generation++;
 #ifdef CONFIG_CFG80211_WEXT
 			kfree(wdev->wext.keys);
 #endif
 		}
 		mutex_unlock(&rdev->devlist_mtx);
+		/*
+		 * synchronise (so that we won't find this netdev
+		 * from other code any more) and then clear the list
+		 * head so that the above code can safely check for
+		 * !list_empty() to avoid double-cleanup.
+		 */
+		synchronize_rcu();
+		INIT_LIST_HEAD(&wdev->list);
 		break;
 	case NETDEV_PRE_UP:
 		if (!(wdev->wiphy->interface_modes & BIT(wdev->iftype)))
diff --git a/net/wireless/core.h b/net/wireless/core.h
index 2d6a6b9c0c43..c326a667022a 100644
--- a/net/wireless/core.h
+++ b/net/wireless/core.h
@@ -1,7 +1,7 @@
 /*
  * Wireless configuration interface internals.
  *
- * Copyright 2006-2009	Johannes Berg <johannes@sipsolutions.net>
+ * Copyright 2006-2010	Johannes Berg <johannes@sipsolutions.net>
  */
 #ifndef __NET_WIRELESS_CORE_H
 #define __NET_WIRELESS_CORE_H
@@ -48,6 +48,7 @@ struct cfg80211_registered_device {
 
 	/* associate netdev list */
 	struct mutex devlist_mtx;
+	/* protected by devlist_mtx or RCU */
 	struct list_head netdev_list;
 	int devlist_generation;
 	int opencount; /* also protected by devlist_mtx */
-- 
cgit v1.2.2


From ce9058aedd75f14785400dcc49a2bc352ca38871 Mon Sep 17 00:00:00 2001
From: Benoit Papillault <benoit.papillault@free.fr>
Date: Sun, 17 Jan 2010 22:45:23 +0100
Subject: mac80211: removed useless code in IBSS management

ieee82011_sta_find_ibss() and ieee80211_sta_merge_ibss() are always
called with a defined state. So it's useless to check it or set it in
those function.

Signed-off-by: Benoit Papillault <benoit.papillault@free.fr>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/ibss.c | 16 +++++++++-------
 1 file changed, 9 insertions(+), 7 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/ibss.c b/net/mac80211/ibss.c
index 5bcde4c3fba1..c2a708e3a18c 100644
--- a/net/mac80211/ibss.c
+++ b/net/mac80211/ibss.c
@@ -454,6 +454,9 @@ static int ieee80211_sta_active_ibss(struct ieee80211_sub_if_data *sdata)
 	return active;
 }
 
+/*
+ * This function is called with state == IEEE80211_IBSS_MLME_JOINED
+ */
 
 static void ieee80211_sta_merge_ibss(struct ieee80211_sub_if_data *sdata)
 {
@@ -519,6 +522,10 @@ static void ieee80211_sta_create_ibss(struct ieee80211_sub_if_data *sdata)
 				  capability, 0);
 }
 
+/*
+ * This function is called with state == IEEE80211_IBSS_MLME_SEARCH
+ */
+
 static void ieee80211_sta_find_ibss(struct ieee80211_sub_if_data *sdata)
 {
 	struct ieee80211_if_ibss *ifibss = &sdata->u.ibss;
@@ -575,18 +582,14 @@ static void ieee80211_sta_find_ibss(struct ieee80211_sub_if_data *sdata)
 #endif /* CONFIG_MAC80211_IBSS_DEBUG */
 
 	/* Selected IBSS not found in current scan results - try to scan */
-	if (ifibss->state == IEEE80211_IBSS_MLME_JOINED &&
-	    !ieee80211_sta_active_ibss(sdata)) {
-		mod_timer(&ifibss->timer,
-			  round_jiffies(jiffies + IEEE80211_IBSS_MERGE_INTERVAL));
-	} else if (time_after(jiffies, ifibss->last_scan_completed +
+	if (time_after(jiffies, ifibss->last_scan_completed +
 					IEEE80211_SCAN_INTERVAL)) {
 		printk(KERN_DEBUG "%s: Trigger new scan to find an IBSS to "
 		       "join\n", sdata->name);
 
 		ieee80211_request_internal_scan(sdata, ifibss->ssid,
 						ifibss->ssid_len);
-	} else if (ifibss->state != IEEE80211_IBSS_MLME_JOINED) {
+	} else {
 		int interval = IEEE80211_SCAN_INTERVAL;
 
 		if (time_after(jiffies, ifibss->ibss_join_req +
@@ -604,7 +607,6 @@ static void ieee80211_sta_find_ibss(struct ieee80211_sub_if_data *sdata)
 			interval = IEEE80211_SCAN_INTERVAL_SLOW;
 		}
 
-		ifibss->state = IEEE80211_IBSS_MLME_SEARCH;
 		mod_timer(&ifibss->timer,
 			  round_jiffies(jiffies + interval));
 	}
-- 
cgit v1.2.2


From a98bfec2985221d8e0904a526cbe88590eaad2a6 Mon Sep 17 00:00:00 2001
From: Benoit Papillault <benoit.papillault@free.fr>
Date: Sun, 17 Jan 2010 22:45:24 +0100
Subject: mac80211: Fixed a bug in IBSS merge

First, both beacons and probe responses can be used for IBSS merge.
Next, sdata->u.ibss.bssid was always true (and thus IBSS merge was
disabled). We should use sdata->u.ibss.fixed_bssid instead.

Signed-off-by: Benoit Papillault <benoit.papillault@free.fr>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/ibss.c | 6 +-----
 1 file changed, 1 insertion(+), 5 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/ibss.c b/net/mac80211/ibss.c
index c2a708e3a18c..f95750b423e3 100644
--- a/net/mac80211/ibss.c
+++ b/net/mac80211/ibss.c
@@ -293,12 +293,8 @@ static void ieee80211_rx_bss_info(struct ieee80211_sub_if_data *sdata,
 
 	/* check if we need to merge IBSS */
 
-	/* merge only on beacons (???) */
-	if (!beacon)
-		goto put_bss;
-
 	/* we use a fixed BSSID */
-	if (sdata->u.ibss.bssid)
+	if (sdata->u.ibss.fixed_bssid)
 		goto put_bss;
 
 	/* not an IBSS */
-- 
cgit v1.2.2


From 11380a4b2d86fae9a6bce75c9373668cc323fe57 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Tue, 19 Jan 2010 13:46:10 -0800
Subject: net: Unexport napi_gro_flush().

Nothing outside of net/core/dev.c uses it.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/dev.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/core/dev.c b/net/core/dev.c
index a008f6987a95..5747b9edc1bb 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -2582,7 +2582,7 @@ out:
 	return netif_receive_skb(skb);
 }
 
-void napi_gro_flush(struct napi_struct *napi)
+static void napi_gro_flush(struct napi_struct *napi)
 {
 	struct sk_buff *skb, *next;
 
@@ -2595,7 +2595,6 @@ void napi_gro_flush(struct napi_struct *napi)
 	napi->gro_count = 0;
 	napi->gro_list = NULL;
 }
-EXPORT_SYMBOL(napi_gro_flush);
 
 enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
 {
-- 
cgit v1.2.2


From 27e310c91c3433ab9997b925b3fd65e660634c76 Mon Sep 17 00:00:00 2001
From: Jouni Malinen <j@w1.fi>
Date: Tue, 19 Jan 2010 10:53:30 -0800
Subject: nl80211: Allow association to change channels during reassociation

nl80211_associate() was rejecting (re)association attempts with EBUSY
in some cases where we are currently associated with an AP that uses
different channel from the destination AP. Fix this by passing the
current wdev to rdev_fixed_channel() in the same way that was already
done for join-IBSS and connect commands. This allows the fixedchan
check to skipped for the current wdev and allows the reassociation to
proceed.

Signed-off-by: Jouni Malinen <j@w1.fi>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/wireless/nl80211.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index 4af7991a9ec8..5b79ecf17bea 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -3571,6 +3571,7 @@ static int nl80211_associate(struct sk_buff *skb, struct genl_info *info)
 {
 	struct cfg80211_registered_device *rdev;
 	struct net_device *dev;
+	struct wireless_dev *wdev;
 	struct cfg80211_crypto_settings crypto;
 	struct ieee80211_channel *chan, *fixedchan;
 	const u8 *bssid, *ssid, *ie = NULL, *prev_bssid = NULL;
@@ -3616,7 +3617,8 @@ static int nl80211_associate(struct sk_buff *skb, struct genl_info *info)
 	}
 
 	mutex_lock(&rdev->devlist_mtx);
-	fixedchan = rdev_fixed_channel(rdev, NULL);
+	wdev = dev->ieee80211_ptr;
+	fixedchan = rdev_fixed_channel(rdev, wdev);
 	if (fixedchan && chan != fixedchan) {
 		err = -EBUSY;
 		mutex_unlock(&rdev->devlist_mtx);
-- 
cgit v1.2.2


From 7981d6f6b280d28779343cff4a88029fe53d1b47 Mon Sep 17 00:00:00 2001
From: Allan Stephens <allan.stephens@windriver.com>
Date: Tue, 19 Jan 2010 14:23:57 -0800
Subject: tipc: Clean up configuration file

This patch addresses a number of minor (mostly cosmetic) issues relating
to the configuration of TIPC, including the following:

- Corrects range limits for maximum number of ports per node
- Adds missing range limits for size of log buffer
- Removes configuration setting relating to unsupported slave node capability
- Standardizes description and help text wording for configuration settings
- Removes unneeded blank spaces

Signed-off-by: Allan Stephens <allan.stephens@windriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/tipc/Kconfig | 73 +++++++++++++++++++++-----------------------------------
 1 file changed, 27 insertions(+), 46 deletions(-)

(limited to 'net')

diff --git a/net/tipc/Kconfig b/net/tipc/Kconfig
index dafbd533067c..b74f78d0c033 100644
--- a/net/tipc/Kconfig
+++ b/net/tipc/Kconfig
@@ -10,7 +10,7 @@ menuconfig TIPC
 	  specially designed for intra cluster communication. This protocol
 	  originates from Ericsson where it has been used in carrier grade
 	  cluster applications for many years.
-	
+
 	  For more information about TIPC, see http://tipc.sourceforge.net.
 
 	  This protocol support is also available as a module ( = code which
@@ -23,24 +23,23 @@ menuconfig TIPC
 if TIPC
 
 config TIPC_ADVANCED
-	bool "TIPC: Advanced configuration"
+	bool "Advanced TIPC configuration"
 	default n
 	help
-	  Saying Y here will open some advanced configuration
-          for TIPC. Most users do not need to bother, so if
-          unsure, just say N.
+	  Saying Y here will open some advanced configuration for TIPC.
+	  Most users do not need to bother; if unsure, just say N.
 
 config TIPC_ZONES
-	int "Maximum number of zones in network"
+	int "Maximum number of zones in a network"
 	depends on TIPC_ADVANCED
 	range 1 255
 	default "3"
 	help
-	 Max number of zones inside TIPC network. Max supported value 
-         is 255 zones, minimum is 1
+	  Specifies how many zones can be supported in a TIPC network.
+	  Can range from 1 to 255 zones; default is 3.
 
-	 Default is 3 zones in a network; setting this to higher
-	 allows more zones but might use more memory.
+	  Setting this to a smaller value saves some memory;
+	  setting it to a higher value allows for more zones.
 
 config TIPC_CLUSTERS
 	int "Maximum number of clusters in a zone"
@@ -48,70 +47,52 @@ config TIPC_CLUSTERS
 	range 1 1
 	default "1"
 	help
-          ***Only 1 (one cluster in a zone) is supported by current code.***
-
-          (Max number of clusters inside TIPC zone. Max supported 
-          value is 4095 clusters, minimum is 1.
+	  Specifies how many clusters can be supported in a TIPC zone.
 
-	  Default is 1; setting this to smaller value might save 
-          some memory, setting it to higher
-	  allows more clusters and might consume more memory.)
+	  *** Currently TIPC only supports a single cluster per zone. ***
 
 config TIPC_NODES
-	int "Maximum number of nodes in cluster"
+	int "Maximum number of nodes in a cluster"
 	depends on TIPC_ADVANCED
 	range 8 2047
 	default "255"
 	help
-	  Maximum number of nodes inside a TIPC cluster. Maximum 
-          supported value is 2047 nodes, minimum is 8. 
-
-	  Setting this to a smaller value saves some memory, 
-	  setting it to higher allows more nodes.
-
-config TIPC_SLAVE_NODES
-	int "Maximum number of slave nodes in cluster"
-	depends on TIPC_ADVANCED
-	range 0 2047
-	default "0"
-	help
-          ***This capability is not supported by current code.***
-	  
-	  Maximum number of slave nodes inside a TIPC cluster. Maximum 
-          supported value is 2047 nodes, minimum is 0. 
+	  Specifies how many nodes can be supported in a TIPC cluster.
+	  Can range from 8 to 2047 nodes; default is 255.
 
-	  Setting this to a smaller value saves some memory, 
-	  setting it to higher allows more nodes.
+	  Setting this to a smaller value saves some memory;
+	  setting it to higher allows for more nodes.
 
 config TIPC_PORTS
 	int "Maximum number of ports in a node"
 	depends on TIPC_ADVANCED
-	range 217 65536
+	range 127 65535
 	default "8191"
 	help
-	  Maximum number of ports within a node. Maximum 
-          supported value is 64535 nodes, minimum is 127. 
+	  Specifies how many ports can be supported by a node.
+	  Can range from 127 to 65535 ports; default is 8191.
 
 	  Setting this to a smaller value saves some memory, 
-	  setting it to higher allows more ports.
+	  setting it to higher allows for more ports.
 
 config TIPC_LOG
 	int "Size of log buffer"
 	depends on TIPC_ADVANCED
-	default 0
+	range 0 32768
+	default "0"
 	help
- 	  Size (in bytes) of TIPC's internal log buffer, which records the
-	  occurrence of significant events.  Maximum supported value
-	  is 32768 bytes, minimum is 0.
+	  Size (in bytes) of TIPC's internal log buffer, which records the
+	  occurrence of significant events.  Can range from 0 to 32768 bytes;
+	  default is 0.
 
 	  There is no need to enable the log buffer unless the node will be
 	  managed remotely via TIPC.
 
 config TIPC_DEBUG
-	bool "Enable debugging support"
+	bool "Enable debug messages"
 	default n
 	help
- 	  This will enable debugging of TIPC.
+	  This enables debugging of TIPC.
 
 	  Only say Y here if you are having trouble with TIPC.  It will
 	  enable the display of detailed information about what is going on.
-- 
cgit v1.2.2


From b38f6eddeee510ce8178c2d2db54ed25f1d7cb63 Mon Sep 17 00:00:00 2001
From: Shan Wei <shanwei@cn.fujitsu.com>
Date: Wed, 20 Jan 2010 10:39:14 +0100
Subject: netfilter: nf_conntrack_ipv6: delete the redundant macro definitions

The following three macro definitions are never used, so delete them.

Signed-off-by: Shan Wei <shanwei@cn.fujitsu.com>
Acked-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 net/ipv6/netfilter/nf_conntrack_reasm.c | 3 ---
 1 file changed, 3 deletions(-)

(limited to 'net')

diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c
index 312c20adc83f..1030ce1e6c79 100644
--- a/net/ipv6/netfilter/nf_conntrack_reasm.c
+++ b/net/ipv6/netfilter/nf_conntrack_reasm.c
@@ -45,9 +45,6 @@
 #include <linux/kernel.h>
 #include <linux/module.h>
 
-#define NF_CT_FRAG6_HIGH_THRESH 262144 /* == 256*1024 */
-#define NF_CT_FRAG6_LOW_THRESH 196608  /* == 192*1024 */
-#define NF_CT_FRAG6_TIMEOUT IPV6_FRAG_TIMEOUT
 
 struct nf_ct_frag6_skb_cb
 {
-- 
cgit v1.2.2


From 7c070aa947d1a4105742378579c267f6e7fd08a1 Mon Sep 17 00:00:00 2001
From: Shan Wei <shanwei@cn.fujitsu.com>
Date: Wed, 20 Jan 2010 10:42:41 +0100
Subject: IPv6: reassembly: replace magic number with macro definitions

Use macro to define high/low thresh value, refer to IPV6_FRAG_TIMEOUT.

Signed-off-by: Shan Wei <shanwei@cn.fujitsu.com>
Acked-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 net/ipv6/netfilter/nf_conntrack_reasm.c | 4 ++--
 net/ipv6/reassembly.c                   | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

(limited to 'net')

diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c
index 1030ce1e6c79..744ea49de356 100644
--- a/net/ipv6/netfilter/nf_conntrack_reasm.c
+++ b/net/ipv6/netfilter/nf_conntrack_reasm.c
@@ -666,8 +666,8 @@ int nf_ct_frag6_init(void)
 	nf_frags.frag_expire = nf_ct_frag6_expire;
 	nf_frags.secret_interval = 10 * 60 * HZ;
 	nf_init_frags.timeout = IPV6_FRAG_TIMEOUT;
-	nf_init_frags.high_thresh = 256 * 1024;
-	nf_init_frags.low_thresh = 192 * 1024;
+	nf_init_frags.high_thresh = IPV6_FRAG_HIGH_THRESH;
+	nf_init_frags.low_thresh = IPV6_FRAG_LOW_THRESH;
 	inet_frags_init_net(&nf_init_frags);
 	inet_frags_init(&nf_frags);
 
diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c
index 2cddea3bd6be..15bb122e1ce4 100644
--- a/net/ipv6/reassembly.c
+++ b/net/ipv6/reassembly.c
@@ -747,8 +747,8 @@ static inline void ip6_frags_sysctl_unregister(void)
 
 static int ipv6_frags_init_net(struct net *net)
 {
-	net->ipv6.frags.high_thresh = 256 * 1024;
-	net->ipv6.frags.low_thresh = 192 * 1024;
+	net->ipv6.frags.high_thresh = IPV6_FRAG_HIGH_THRESH;
+	net->ipv6.frags.low_thresh = IPV6_FRAG_LOW_THRESH;
 	net->ipv6.frags.timeout = IPV6_FRAG_TIMEOUT;
 
 	inet_frags_init_net(&net->ipv6.frags);
-- 
cgit v1.2.2


From 4b258461c0b31ded170a1a56b944b0fded1c887b Mon Sep 17 00:00:00 2001
From: Krishna Kumar <krkumar2@in.ibm.com>
Date: Thu, 21 Jan 2010 01:26:29 -0800
Subject: net: Optimize non-gso test checks

Avoid checking twice whether skb needs to be linearized, if one
skb_linearize was already done.

Signed-off-by: Krishna Kumar <krkumar2@in.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/dev.c | 29 +++++++++++++++++------------
 1 file changed, 17 insertions(+), 12 deletions(-)

(limited to 'net')

diff --git a/net/core/dev.c b/net/core/dev.c
index 5747b9edc1bb..4fad9db417b1 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -1982,6 +1982,21 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q,
 	return rc;
 }
 
+/*
+ * Returns true if either:
+ *	1. skb has frag_list and the device doesn't support FRAGLIST, or
+ *	2. skb is fragmented and the device does not support SG, or if
+ *	   at least one of fragments is in highmem and device does not
+ *	   support DMA from it.
+ */
+static inline int skb_needs_linearize(struct sk_buff *skb,
+				      struct net_device *dev)
+{
+	return (skb_has_frags(skb) && !(dev->features & NETIF_F_FRAGLIST)) ||
+	       (skb_shinfo(skb)->nr_frags && (!(dev->features & NETIF_F_SG) ||
+					      illegal_highdma(dev, skb)));
+}
+
 /**
  *	dev_queue_xmit - transmit a buffer
  *	@skb: buffer to transmit
@@ -2018,18 +2033,8 @@ int dev_queue_xmit(struct sk_buff *skb)
 	if (netif_needs_gso(dev, skb))
 		goto gso;
 
-	if (skb_has_frags(skb) &&
-	    !(dev->features & NETIF_F_FRAGLIST) &&
-	    __skb_linearize(skb))
-		goto out_kfree_skb;
-
-	/* Fragmented skb is linearized if device does not support SG,
-	 * or if at least one of fragments is in highmem and device
-	 * does not support DMA from it.
-	 */
-	if (skb_shinfo(skb)->nr_frags &&
-	    (!(dev->features & NETIF_F_SG) || illegal_highdma(dev, skb)) &&
-	    __skb_linearize(skb))
+	/* Convert a paged skb to linear, if required */
+	if (skb_needs_linearize(skb, dev) && __skb_linearize(skb))
 		goto out_kfree_skb;
 
 	/* If packet is not checksummed and device does not support
-- 
cgit v1.2.2


From 09cb47a2c68f9596927bc05dab0453edb35cd32d Mon Sep 17 00:00:00 2001
From: Julia Lawall <julia@diku.dk>
Date: Thu, 21 Jan 2010 02:43:20 -0800
Subject: net/sctp: Eliminate useless code

The variable newinet is initialized twice to the same (side effect-free)
expression.  Drop one initialization.

A simplified version of the semantic match that finds this problem is:
(http://coccinelle.lip6.fr/)

// <smpl>
@forall@
idexpression *x;
identifier f!=ERR_PTR;
@@

x = f(...)
... when != x
(
x = f(...,<+...x...+>,...)
|
* x = f(...)
)
// </smpl>

Signed-off-by: Julia Lawall <julia@diku.dk>
Acked-by: Vlad Yasevich <vladislav.yasevich@hp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sctp/socket.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index 67fdac9d2d33..f6d1e59c4151 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -6359,7 +6359,7 @@ void sctp_copy_sock(struct sock *newsk, struct sock *sk,
 		    struct sctp_association *asoc)
 {
 	struct inet_sock *inet = inet_sk(sk);
-	struct inet_sock *newinet = inet_sk(newsk);
+	struct inet_sock *newinet;
 
 	newsk->sk_type = sk->sk_type;
 	newsk->sk_bound_dev_if = sk->sk_bound_dev_if;
-- 
cgit v1.2.2


From e4fca007b06165900d0e44e8d5e251376819bf5d Mon Sep 17 00:00:00 2001
From: "John W. Linville" <linville@tuxdriver.com>
Date: Fri, 22 Jan 2010 12:33:09 -0500
Subject: mac80211: avoid NULL ptr deref when using WEP

"mac80211: move control.hw_key assignment" changed an if-else into two
separate if statments, but the if-else is needed to prevent
dereferencing a null info->control.hw_key.  This fixes avoids a lock-up
during association on my machine when using WEP.

Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/wep.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/wep.c b/net/mac80211/wep.c
index 0a4c641c9605..5d745f2d7236 100644
--- a/net/mac80211/wep.c
+++ b/net/mac80211/wep.c
@@ -310,9 +310,8 @@ static int wep_encrypt_skb(struct ieee80211_tx_data *tx, struct sk_buff *skb)
 					  tx->key->conf.keylen,
 					  tx->key->conf.keyidx))
 			return -1;
-	}
-
-	if (info->control.hw_key->flags & IEEE80211_KEY_FLAG_GENERATE_IV) {
+	} else if (info->control.hw_key->flags &
+			IEEE80211_KEY_FLAG_GENERATE_IV) {
 		if (!ieee80211_wep_add_iv(tx->local, skb,
 					  tx->key->conf.keylen,
 					  tx->key->conf.keyidx))
-- 
cgit v1.2.2


From b3fbdcf49f940d0703c356441e0daf045e64e076 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes@sipsolutions.net>
Date: Thu, 21 Jan 2010 11:40:47 +0100
Subject: mac80211: pass vif and station to update_tkip_key

When a TKIP key is updated, we should pass the station
pointer instead of just the address, since drivers can
use that to store their own data. We also need to pass
the virtual interface pointer.

Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/driver-ops.h   | 14 ++++++++++----
 net/mac80211/driver-trace.h | 15 +++++++++------
 net/mac80211/tkip.c         | 12 +++++-------
 3 files changed, 24 insertions(+), 17 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/driver-ops.h b/net/mac80211/driver-ops.h
index de91d39e0276..40c6e9a89864 100644
--- a/net/mac80211/driver-ops.h
+++ b/net/mac80211/driver-ops.h
@@ -137,16 +137,22 @@ static inline int drv_set_key(struct ieee80211_local *local,
 }
 
 static inline void drv_update_tkip_key(struct ieee80211_local *local,
+				       struct ieee80211_sub_if_data *sdata,
 				       struct ieee80211_key_conf *conf,
-				       const u8 *address, u32 iv32,
+				       struct sta_info *sta, u32 iv32,
 				       u16 *phase1key)
 {
+	struct ieee80211_sta *ista = NULL;
+
 	might_sleep();
 
+	if (sta)
+		ista = &sta->sta;
+
 	if (local->ops->update_tkip_key)
-		local->ops->update_tkip_key(&local->hw, conf, address,
-					    iv32, phase1key);
-	trace_drv_update_tkip_key(local, conf, address, iv32);
+		local->ops->update_tkip_key(&local->hw, &sdata->vif, conf,
+					    ista, iv32, phase1key);
+	trace_drv_update_tkip_key(local, sdata, conf, ista, iv32);
 }
 
 static inline int drv_hw_scan(struct ieee80211_local *local,
diff --git a/net/mac80211/driver-trace.h b/net/mac80211/driver-trace.h
index 0ea258123b8e..fefa6e6b01bc 100644
--- a/net/mac80211/driver-trace.h
+++ b/net/mac80211/driver-trace.h
@@ -331,26 +331,29 @@ TRACE_EVENT(drv_set_key,
 
 TRACE_EVENT(drv_update_tkip_key,
 	TP_PROTO(struct ieee80211_local *local,
+		 struct ieee80211_sub_if_data *sdata,
 		 struct ieee80211_key_conf *conf,
-		 const u8 *address, u32 iv32),
+		 struct ieee80211_sta *sta, u32 iv32),
 
-	TP_ARGS(local, conf, address, iv32),
+	TP_ARGS(local, sdata, conf, sta, iv32),
 
 	TP_STRUCT__entry(
 		LOCAL_ENTRY
-		__array(u8, addr, 6)
+		VIF_ENTRY
+		STA_ENTRY
 		__field(u32, iv32)
 	),
 
 	TP_fast_assign(
 		LOCAL_ASSIGN;
-		memcpy(__entry->addr, address, 6);
+		VIF_ASSIGN;
+		STA_ASSIGN;
 		__entry->iv32 = iv32;
 	),
 
 	TP_printk(
-		LOCAL_PR_FMT " addr:%pM iv32:%#x",
-		LOCAL_PR_ARG, __entry->addr, __entry->iv32
+		LOCAL_PR_FMT VIF_PR_FMT STA_PR_FMT " iv32:%#x",
+		LOCAL_PR_ARG,VIF_PR_ARG,STA_PR_ARG, __entry->iv32
 	)
 );
 
diff --git a/net/mac80211/tkip.c b/net/mac80211/tkip.c
index 14fe49332c02..7ef491e9d66d 100644
--- a/net/mac80211/tkip.c
+++ b/net/mac80211/tkip.c
@@ -304,14 +304,12 @@ int ieee80211_tkip_decrypt_data(struct crypto_blkcipher *tfm,
 	if (key->local->ops->update_tkip_key &&
 	    key->flags & KEY_FLAG_UPLOADED_TO_HARDWARE &&
 	    key->u.tkip.rx[queue].state != TKIP_STATE_PHASE1_HW_UPLOADED) {
-		static const u8 bcast[ETH_ALEN] =
-		{0xff, 0xff, 0xff, 0xff, 0xff, 0xff};
-		const u8 *sta_addr = key->sta->sta.addr;
+		struct ieee80211_sub_if_data *sdata = key->sdata;
 
-		if (is_multicast_ether_addr(ra))
-			sta_addr = bcast;
-
-		drv_update_tkip_key(key->local, &key->conf, sta_addr,
+		if (sdata->vif.type == NL80211_IFTYPE_AP_VLAN)
+			sdata = container_of(key->sdata->bss,
+					struct ieee80211_sub_if_data, u.ap);
+		drv_update_tkip_key(key->local, sdata, &key->conf, key->sta,
 				iv32, key->u.tkip.rx[queue].p1k);
 		key->u.tkip.rx[queue].state = TKIP_STATE_PHASE1_HW_UPLOADED;
 	}
-- 
cgit v1.2.2


From ef15aac6073b27fd4f70007784d2d52ed394bf43 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes@sipsolutions.net>
Date: Wed, 20 Jan 2010 12:02:33 +0100
Subject: cfg80211: export multiple MAC addresses in sysfs

If a device has multiple MAC addresses, userspace will
need to know about that. Similarly, if it allows the
MAC addresses to vary by a bitmask.

If a driver exports multiple addresses, it is assumed
that it will be able to deal with that many different
addresses, which need not necessarily match the ones
programmed into the device; if a mask is set then the
device should deal addresses within that mask based
on an arbitrary "base address".

To test it all and show how it is used, add support
to hwsim even though it can't actually deal with
addresses different from the default.

Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/wireless/core.c  | 12 ++++++++++++
 net/wireless/sysfs.c | 20 ++++++++++++++++++++
 2 files changed, 32 insertions(+)

(limited to 'net')

diff --git a/net/wireless/core.c b/net/wireless/core.c
index d07f57c906db..71b6b3a9cf1f 100644
--- a/net/wireless/core.c
+++ b/net/wireless/core.c
@@ -413,6 +413,18 @@ int wiphy_register(struct wiphy *wiphy)
 	int i;
 	u16 ifmodes = wiphy->interface_modes;
 
+	if (WARN_ON(wiphy->addresses && !wiphy->n_addresses))
+		return -EINVAL;
+
+	if (WARN_ON(wiphy->addresses &&
+		    !is_zero_ether_addr(wiphy->perm_addr) &&
+		    memcmp(wiphy->perm_addr, wiphy->addresses[0].addr,
+			   ETH_ALEN)))
+		return -EINVAL;
+
+	if (wiphy->addresses)
+		memcpy(wiphy->perm_addr, wiphy->addresses[0].addr, ETH_ALEN);
+
 	/* sanity check ifmodes */
 	WARN_ON(!ifmodes);
 	ifmodes &= ((1 << __NL80211_IFTYPE_AFTER_LAST) - 1) & ~1;
diff --git a/net/wireless/sysfs.c b/net/wireless/sysfs.c
index efe3c5c92b2d..9f2cef3e0ca0 100644
--- a/net/wireless/sysfs.c
+++ b/net/wireless/sysfs.c
@@ -33,10 +33,30 @@ static ssize_t name ## _show(struct device *dev,			\
 
 SHOW_FMT(index, "%d", wiphy_idx);
 SHOW_FMT(macaddress, "%pM", wiphy.perm_addr);
+SHOW_FMT(address_mask, "%pM", wiphy.addr_mask);
+
+static ssize_t addresses_show(struct device *dev,
+			      struct device_attribute *attr,
+			      char *buf)
+{
+	struct wiphy *wiphy = &dev_to_rdev(dev)->wiphy;
+	char *start = buf;
+	int i;
+
+	if (!wiphy->addresses)
+		return sprintf(buf, "%pM\n", wiphy->perm_addr);
+
+	for (i = 0; i < wiphy->n_addresses; i++)
+		buf += sprintf(buf, "%pM\n", &wiphy->addresses[i].addr);
+
+	return buf - start;
+}
 
 static struct device_attribute ieee80211_dev_attrs[] = {
 	__ATTR_RO(index),
 	__ATTR_RO(macaddress),
+	__ATTR_RO(address_mask),
+	__ATTR_RO(addresses),
 	{}
 };
 
-- 
cgit v1.2.2


From d0dd2de0d055f0ffb1e2ecdc21380de9d12a85e2 Mon Sep 17 00:00:00 2001
From: Andriy Tkachuk <andrit@ukr.net>
Date: Wed, 20 Jan 2010 13:55:06 +0200
Subject: mac80211: Account HT Control field in Data frame hdrlen according to
 802.11n-2009

ieee80211_hdrlen() should account account new HT Control field in 802.11
data frame header introduced by IEEE 802.11n standard.

According to 802.11n-2009 HT Control field is present in data frames
when both of following are met:

   1. It is QoS data frame.
   2. Order bit is set in Frame Control field.

The change might be totally compatible with legacy non-11n aware frames,
because 802.11-2007 standard states that "all QoS STAs set this subfield
to 0".

Signed-off-by: Andriy V. Tkachuk <andrit@ukr.net>
Acked-by : Benoit Papillault <benoit.papillault@free.fr>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/wireless/util.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/wireless/util.c b/net/wireless/util.c
index 23557c1d0a9c..be2ab8c59e3a 100644
--- a/net/wireless/util.c
+++ b/net/wireless/util.c
@@ -227,8 +227,11 @@ unsigned int ieee80211_hdrlen(__le16 fc)
 	if (ieee80211_is_data(fc)) {
 		if (ieee80211_has_a4(fc))
 			hdrlen = 30;
-		if (ieee80211_is_data_qos(fc))
+		if (ieee80211_is_data_qos(fc)) {
 			hdrlen += IEEE80211_QOS_CTL_LEN;
+			if (ieee80211_has_order(fc))
+				hdrlen += IEEE80211_HT_CTL_LEN;
+		}
 		goto out;
 	}
 
-- 
cgit v1.2.2


From 4bb29f8c390fb7be207ec3f11b9d30ccdf1cb6ac Mon Sep 17 00:00:00 2001
From: Felix Fietkau <nbd@openwrt.org>
Date: Fri, 22 Jan 2010 00:36:39 +0100
Subject: mac80211: fix rx data handling for non-data frames on multiple vifs

The loop that passes non-data frames to all relevant vifs inside the
__ieee80211_rx_handle_packet keeps a pointer to the previous sdata to
avoid having to make unnecessary copies of the frame it's handling.
This led to a bug that caused it to apply the ieee80211_rx_data state
to the wrong interface, thereby either missing the rx.sta pointer or
having it assigned where it shouldn't be.
This breaks (among other things) aggregation on some vifs, as action
frame exchages are dropped to the cooked monitor interface due to
rx->sta being NULL.
Fix this by restructuring the loop so that it prepares the rx data just
before making the skb copy and calling the rx handlers.

Cc: stable@kernel.org
Signed-off-by: Felix Fietkau <nbd@openwrt.org>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/rx.c | 45 ++++++++++++++++++++++++++++-----------------
 1 file changed, 28 insertions(+), 17 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c
index a8e15b84c05b..7e0b3e340389 100644
--- a/net/mac80211/rx.c
+++ b/net/mac80211/rx.c
@@ -2348,22 +2348,6 @@ static void __ieee80211_rx_handle_packet(struct ieee80211_hw *hw,
 			    sdata->vif.type == NL80211_IFTYPE_AP_VLAN)
 				continue;
 
-			rx.sta = sta_info_get(sdata, hdr->addr2);
-
-			rx.flags |= IEEE80211_RX_RA_MATCH;
-			prepares = prepare_for_handlers(sdata, &rx, hdr);
-
-			if (!prepares)
-				continue;
-
-			if (status->flag & RX_FLAG_MMIC_ERROR) {
-				rx.sdata = sdata;
-				if (rx.flags & IEEE80211_RX_RA_MATCH)
-					ieee80211_rx_michael_mic_report(hdr,
-									&rx);
-				continue;
-			}
-
 			/*
 			 * frame is destined for this interface, but if it's
 			 * not also for the previous one we handle that after
@@ -2375,6 +2359,22 @@ static void __ieee80211_rx_handle_packet(struct ieee80211_hw *hw,
 				continue;
 			}
 
+			rx.sta = sta_info_get(prev, hdr->addr2);
+
+			rx.flags |= IEEE80211_RX_RA_MATCH;
+			prepares = prepare_for_handlers(prev, &rx, hdr);
+
+			if (!prepares)
+				goto next;
+
+			if (status->flag & RX_FLAG_MMIC_ERROR) {
+				rx.sdata = prev;
+				if (rx.flags & IEEE80211_RX_RA_MATCH)
+					ieee80211_rx_michael_mic_report(hdr,
+									&rx);
+				goto next;
+			}
+
 			/*
 			 * frame was destined for the previous interface
 			 * so invoke RX handlers for it
@@ -2387,11 +2387,22 @@ static void __ieee80211_rx_handle_packet(struct ieee80211_hw *hw,
 					       "multicast frame for %s\n",
 					       wiphy_name(local->hw.wiphy),
 					       prev->name);
-				continue;
+				goto next;
 			}
 			ieee80211_invoke_rx_handlers(prev, &rx, skb_new, rate);
+next:
 			prev = sdata;
 		}
+
+		if (prev) {
+			rx.sta = sta_info_get(prev, hdr->addr2);
+
+			rx.flags |= IEEE80211_RX_RA_MATCH;
+			prepares = prepare_for_handlers(prev, &rx, hdr);
+
+			if (!prepares)
+				prev = NULL;
+		}
 	}
 	if (prev)
 		ieee80211_invoke_rx_handlers(prev, &rx, skb, rate);
-- 
cgit v1.2.2


From 477781477a88f60c89003c852def4aedc6f78101 Mon Sep 17 00:00:00 2001
From: Alexey Dobriyan <adobriyan@gmail.com>
Date: Fri, 22 Jan 2010 22:21:18 +0100
Subject: netfiltr: ipt_CLUSTERIP: simplify seq_file codeA

Pass "struct clusterip_config" itself to seq_file iterators
and save one dereference. Proc entry itself isn't interesting.

Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 net/ipv4/netfilter/ipt_CLUSTERIP.c | 14 +++++---------
 1 file changed, 5 insertions(+), 9 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/netfilter/ipt_CLUSTERIP.c b/net/ipv4/netfilter/ipt_CLUSTERIP.c
index 40ca2d240abb..0886f96c736b 100644
--- a/net/ipv4/netfilter/ipt_CLUSTERIP.c
+++ b/net/ipv4/netfilter/ipt_CLUSTERIP.c
@@ -560,8 +560,7 @@ struct clusterip_seq_position {
 
 static void *clusterip_seq_start(struct seq_file *s, loff_t *pos)
 {
-	const struct proc_dir_entry *pde = s->private;
-	struct clusterip_config *c = pde->data;
+	struct clusterip_config *c = s->private;
 	unsigned int weight;
 	u_int32_t local_nodes;
 	struct clusterip_seq_position *idx;
@@ -632,10 +631,9 @@ static int clusterip_proc_open(struct inode *inode, struct file *file)
 
 	if (!ret) {
 		struct seq_file *sf = file->private_data;
-		struct proc_dir_entry *pde = PDE(inode);
-		struct clusterip_config *c = pde->data;
+		struct clusterip_config *c = PDE(inode)->data;
 
-		sf->private = pde;
+		sf->private = c;
 
 		clusterip_config_get(c);
 	}
@@ -645,8 +643,7 @@ static int clusterip_proc_open(struct inode *inode, struct file *file)
 
 static int clusterip_proc_release(struct inode *inode, struct file *file)
 {
-	struct proc_dir_entry *pde = PDE(inode);
-	struct clusterip_config *c = pde->data;
+	struct clusterip_config *c = PDE(inode)->data;
 	int ret;
 
 	ret = seq_release(inode, file);
@@ -660,10 +657,9 @@ static int clusterip_proc_release(struct inode *inode, struct file *file)
 static ssize_t clusterip_proc_write(struct file *file, const char __user *input,
 				size_t size, loff_t *ofs)
 {
+	struct clusterip_config *c = PDE(file->f_path.dentry->d_inode)->data;
 #define PROC_WRITELEN	10
 	char buffer[PROC_WRITELEN+1];
-	const struct proc_dir_entry *pde = PDE(file->f_path.dentry->d_inode);
-	struct clusterip_config *c = pde->data;
 	unsigned long nodenum;
 
 	if (copy_from_user(buffer, input, PROC_WRITELEN))
-- 
cgit v1.2.2


From f12553ebe045a8a40ab33fa500fb57d10706e226 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes@sipsolutions.net>
Date: Fri, 22 Jan 2010 22:07:59 +0100
Subject: mac80211: add missing key check

ieee80211_tx_h_select_key might decide that a frame
need not be encrypted at all, in which case it will
clear tx->key. In that case it may crash if a key
was previously selected, e.g. as the default key.

This is also due to my patch
"mac80211: move control.hw_key assignment".

Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/tx.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
index da557b0d0114..fcfa988a37a3 100644
--- a/net/mac80211/tx.c
+++ b/net/mac80211/tx.c
@@ -557,7 +557,7 @@ ieee80211_tx_h_select_key(struct ieee80211_tx_data *tx)
 			break;
 		}
 
-		if (!skip_hw &&
+		if (!skip_hw && tx->key &&
 		    tx->key->conf.flags & KEY_FLAG_UPLOADED_TO_HARDWARE)
 			info->control.hw_key = &tx->key->conf;
 	}
-- 
cgit v1.2.2


From 81c1ebfc4379f529b001e23164dd5c2282bdc0ec Mon Sep 17 00:00:00 2001
From: Alexey Dobriyan <adobriyan@gmail.com>
Date: Fri, 22 Jan 2010 10:16:05 +0000
Subject: neigh: simplify seq_file code

Simpily pass 'struct neigh_table' with seq_file private pointer,
and save one dereference. Proc entry itself isn't interesting.

Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/neighbour.c | 11 ++++-------
 1 file changed, 4 insertions(+), 7 deletions(-)

(limited to 'net')

diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index f35377b643e4..f2efd72da799 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -2417,8 +2417,7 @@ EXPORT_SYMBOL(neigh_seq_stop);
 
 static void *neigh_stat_seq_start(struct seq_file *seq, loff_t *pos)
 {
-	struct proc_dir_entry *pde = seq->private;
-	struct neigh_table *tbl = pde->data;
+	struct neigh_table *tbl = seq->private;
 	int cpu;
 
 	if (*pos == 0)
@@ -2435,8 +2434,7 @@ static void *neigh_stat_seq_start(struct seq_file *seq, loff_t *pos)
 
 static void *neigh_stat_seq_next(struct seq_file *seq, void *v, loff_t *pos)
 {
-	struct proc_dir_entry *pde = seq->private;
-	struct neigh_table *tbl = pde->data;
+	struct neigh_table *tbl = seq->private;
 	int cpu;
 
 	for (cpu = *pos; cpu < nr_cpu_ids; ++cpu) {
@@ -2455,8 +2453,7 @@ static void neigh_stat_seq_stop(struct seq_file *seq, void *v)
 
 static int neigh_stat_seq_show(struct seq_file *seq, void *v)
 {
-	struct proc_dir_entry *pde = seq->private;
-	struct neigh_table *tbl = pde->data;
+	struct neigh_table *tbl = seq->private;
 	struct neigh_statistics *st = v;
 
 	if (v == SEQ_START_TOKEN) {
@@ -2501,7 +2498,7 @@ static int neigh_stat_seq_open(struct inode *inode, struct file *file)
 
 	if (!ret) {
 		struct seq_file *sf = file->private_data;
-		sf->private = PDE(inode);
+		sf->private = PDE(inode)->data;
 	}
 	return ret;
 };
-- 
cgit v1.2.2


From 5833929cc2ad2b3064b4fac8c44e293972d240d8 Mon Sep 17 00:00:00 2001
From: Alexey Dobriyan <adobriyan@gmail.com>
Date: Fri, 22 Jan 2010 10:17:26 +0000
Subject: net: constify MIB name tables

Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/proc.c      |  4 ++--
 net/ipv6/proc.c      | 12 ++++++------
 net/sctp/proc.c      |  2 +-
 net/xfrm/xfrm_proc.c |  2 +-
 4 files changed, 10 insertions(+), 10 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c
index f25542c48b7d..1b09a6dde7c0 100644
--- a/net/ipv4/proc.c
+++ b/net/ipv4/proc.c
@@ -127,8 +127,8 @@ static const struct snmp_mib snmp4_ipextstats_list[] = {
 	SNMP_MIB_SENTINEL
 };
 
-static struct {
-	char *name;
+static const struct {
+	const char *name;
 	int index;
 } icmpmibmap[] = {
 	{ "DestUnreachs", ICMP_DEST_UNREACH },
diff --git a/net/ipv6/proc.c b/net/ipv6/proc.c
index 02f20016b4c9..bfe2598dd563 100644
--- a/net/ipv6/proc.c
+++ b/net/ipv6/proc.c
@@ -59,7 +59,7 @@ static const struct file_operations sockstat6_seq_fops = {
 	.release = single_release_net,
 };
 
-static struct snmp_mib snmp6_ipstats_list[] = {
+static const struct snmp_mib snmp6_ipstats_list[] = {
 /* ipv6 mib according to RFC 2465 */
 	SNMP_MIB_ITEM("Ip6InReceives", IPSTATS_MIB_INPKTS),
 	SNMP_MIB_ITEM("Ip6InHdrErrors", IPSTATS_MIB_INHDRERRORS),
@@ -92,7 +92,7 @@ static struct snmp_mib snmp6_ipstats_list[] = {
 	SNMP_MIB_SENTINEL
 };
 
-static struct snmp_mib snmp6_icmp6_list[] = {
+static const struct snmp_mib snmp6_icmp6_list[] = {
 /* icmpv6 mib according to RFC 2466 */
 	SNMP_MIB_ITEM("Icmp6InMsgs", ICMP6_MIB_INMSGS),
 	SNMP_MIB_ITEM("Icmp6InErrors", ICMP6_MIB_INERRORS),
@@ -120,7 +120,7 @@ static const char *const icmp6type2name[256] = {
 };
 
 
-static struct snmp_mib snmp6_udp6_list[] = {
+static const struct snmp_mib snmp6_udp6_list[] = {
 	SNMP_MIB_ITEM("Udp6InDatagrams", UDP_MIB_INDATAGRAMS),
 	SNMP_MIB_ITEM("Udp6NoPorts", UDP_MIB_NOPORTS),
 	SNMP_MIB_ITEM("Udp6InErrors", UDP_MIB_INERRORS),
@@ -128,7 +128,7 @@ static struct snmp_mib snmp6_udp6_list[] = {
 	SNMP_MIB_SENTINEL
 };
 
-static struct snmp_mib snmp6_udplite6_list[] = {
+static const struct snmp_mib snmp6_udplite6_list[] = {
 	SNMP_MIB_ITEM("UdpLite6InDatagrams", UDP_MIB_INDATAGRAMS),
 	SNMP_MIB_ITEM("UdpLite6NoPorts", UDP_MIB_NOPORTS),
 	SNMP_MIB_ITEM("UdpLite6InErrors", UDP_MIB_INERRORS),
@@ -170,8 +170,8 @@ static void snmp6_seq_show_icmpv6msg(struct seq_file *seq, void **mib)
 	return;
 }
 
-static inline void
-snmp6_seq_show_item(struct seq_file *seq, void **mib, struct snmp_mib *itemlist)
+static void snmp6_seq_show_item(struct seq_file *seq, void **mib,
+				const struct snmp_mib *itemlist)
 {
 	int i;
 	for (i=0; itemlist[i].name; i++)
diff --git a/net/sctp/proc.c b/net/sctp/proc.c
index d093cbfeaac4..a5ac6e0a8d9c 100644
--- a/net/sctp/proc.c
+++ b/net/sctp/proc.c
@@ -40,7 +40,7 @@
 #include <net/sctp/sctp.h>
 #include <net/ip.h> /* for snmp_fold_field */
 
-static struct snmp_mib sctp_snmp_list[] = {
+static const struct snmp_mib sctp_snmp_list[] = {
 	SNMP_MIB_ITEM("SctpCurrEstab", SCTP_MIB_CURRESTAB),
 	SNMP_MIB_ITEM("SctpActiveEstabs", SCTP_MIB_ACTIVEESTABS),
 	SNMP_MIB_ITEM("SctpPassiveEstabs", SCTP_MIB_PASSIVEESTABS),
diff --git a/net/xfrm/xfrm_proc.c b/net/xfrm/xfrm_proc.c
index fef8db553e8d..c083a4e4e796 100644
--- a/net/xfrm/xfrm_proc.c
+++ b/net/xfrm/xfrm_proc.c
@@ -15,7 +15,7 @@
 #include <net/snmp.h>
 #include <net/xfrm.h>
 
-static struct snmp_mib xfrm_mib_list[] = {
+static const struct snmp_mib xfrm_mib_list[] = {
 	SNMP_MIB_ITEM("XfrmInError", LINUX_MIB_XFRMINERROR),
 	SNMP_MIB_ITEM("XfrmInBufferError", LINUX_MIB_XFRMINBUFFERERROR),
 	SNMP_MIB_ITEM("XfrmInHdrError", LINUX_MIB_XFRMINHDRERROR),
-- 
cgit v1.2.2


From e754834e65220b2b674c55c3b6dfb2fb1a2804d0 Mon Sep 17 00:00:00 2001
From: Alexey Dobriyan <adobriyan@gmail.com>
Date: Fri, 22 Jan 2010 10:18:25 +0000
Subject: icmp: move icmp_err_convert[] to .rodata

Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/icmp.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c
index fe11f60ce41b..4b4c2bcd15db 100644
--- a/net/ipv4/icmp.c
+++ b/net/ipv4/icmp.c
@@ -114,7 +114,7 @@ struct icmp_bxm {
 /* An array of errno for error messages from dest unreach. */
 /* RFC 1122: 3.2.2.1 States that NET_UNREACH, HOST_UNREACH and SR_FAILED MUST be considered 'transient errs'. */
 
-struct icmp_err icmp_err_convert[] = {
+const struct icmp_err icmp_err_convert[] = {
 	{
 		.errno = ENETUNREACH,	/* ICMP_NET_UNREACH */
 		.fatal = 0,
-- 
cgit v1.2.2


From e9017b55189355e9e6569990a18919e83f35bccb Mon Sep 17 00:00:00 2001
From: Shan Wei <shanwei@cn.fujitsu.com>
Date: Sat, 23 Jan 2010 01:57:42 -0800
Subject: IP: Send an ICMP "Fragment Reassembly Timeout" message when enabling
 connection track

No matter whether connection track is enabled, an end host should send
an ICMPv4 "Fragment Reassembly Timeout" message when defrag timeout.
The reasons are following two points:

1. RFC 792 says:
   >>>> >> > >   If a host reassembling a fragmented datagram cannot complete the
   >>>> >> > >   reassembly due to missing fragments within its time limit it
   >>>> >> > >   discards the datagram, and it may send a time exceeded message.
   >>>> >> > >
   >>>> >> > >   If fragment zero is not available then no time exceeded need be
   >>>> >> > >   sent at all.
   >>>> >> > >
   >>>> >> > > Read more: http://www.faqs.org/rfcs/rfc792.html#ixzz0aOXRD7Wp

2. Patrick McHardy also agrees with this opinion.   :-)
   About the discussion of this opinion, refer to http://patchwork.ozlabs.org/patch/41649

The patch fixed the problem like this:
When enabling connection track, fragments are received at PRE_ROUTING HOOK.
If they are failed to reassemble, ip_expire() will be called.
Before sending an ICMP "Fragment Reassembly Timeout" message,
the patch searches router table to get the destination entry only for host type.

The patch has been tested on both host type and route type.

Signed-off-by: Shan Wei <shanwei@cn.fujitsu.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/ip_fragment.c | 34 ++++++++++++++++++++++++++++++----
 1 file changed, 30 insertions(+), 4 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c
index 891c72aea520..9f41bd311754 100644
--- a/net/ipv4/ip_fragment.c
+++ b/net/ipv4/ip_fragment.c
@@ -32,6 +32,8 @@
 #include <linux/netdevice.h>
 #include <linux/jhash.h>
 #include <linux/random.h>
+#include <net/route.h>
+#include <net/dst.h>
 #include <net/sock.h>
 #include <net/ip.h>
 #include <net/icmp.h>
@@ -205,13 +207,37 @@ static void ip_expire(unsigned long arg)
 	if ((qp->q.last_in & INET_FRAG_FIRST_IN) && qp->q.fragments != NULL) {
 		struct sk_buff *head = qp->q.fragments;
 
-		/* Send an ICMP "Fragment Reassembly Timeout" message. */
 		rcu_read_lock();
 		head->dev = dev_get_by_index_rcu(net, qp->iif);
-		if (head->dev)
-			icmp_send(head, ICMP_TIME_EXCEEDED, ICMP_EXC_FRAGTIME, 0);
-		rcu_read_unlock();
+		if (!head->dev)
+			goto out_rcu_unlock;
+
+		/*
+		 * Only search router table for the head fragment,
+		 * when defraging timeout at PRE_ROUTING HOOK.
+		 */
+		if (qp->user == IP_DEFRAG_CONNTRACK_IN && !skb_dst(head)) {
+			const struct iphdr *iph = ip_hdr(head);
+			int err = ip_route_input(head, iph->daddr, iph->saddr,
+						 iph->tos, head->dev);
+			if (unlikely(err))
+				goto out_rcu_unlock;
+
+			/*
+			 * Only an end host needs to send an ICMP
+			 * "Fragment Reassembly Timeout" message, per RFC792.
+			 */
+			if (skb_rtable(head)->rt_type != RTN_LOCAL)
+				goto out_rcu_unlock;
+
+		}
+
+		/* Send an ICMP "Fragment Reassembly Timeout" message. */
+		icmp_send(head, ICMP_TIME_EXCEEDED, ICMP_EXC_FRAGTIME, 0);
 	}
+
+out_rcu_unlock:
+	rcu_read_unlock();
 out:
 	spin_unlock(&qp->q.lock);
 	ipq_put(qp);
-- 
cgit v1.2.2


From 3b43a18743421cccd33902e29016fa49b2d52dbb Mon Sep 17 00:00:00 2001
From: Kalle Valo <kalle.valo@iki.fi>
Date: Sat, 23 Jan 2010 20:27:14 +0200
Subject: mac80211: fix tx select key null pointer crash with hostapd

Pavel Roskin reported a crash in ieee80211_tx_h_select_key():

http://marc.info/?l=linux-wireless&m=126419655108528&w=2

This is a regression from patch "mac80211: move control.hw_key assignment".
Fix it as suggested by Johannes, adding an else statement to make sure
that tx->key is not accessed when it's null.

Compile-tested only.

Reported-by: Pavel Roskin <proski@gnu.org>
Cc: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: Kalle Valo <kalle.valo@iki.fi>
Tested-by: Bob Copeland <me@bobcopeland.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/tx.c | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
index fcfa988a37a3..d017b3530d4d 100644
--- a/net/mac80211/tx.c
+++ b/net/mac80211/tx.c
@@ -547,9 +547,10 @@ ieee80211_tx_h_select_key(struct ieee80211_tx_data *tx)
 			    !ieee80211_use_mfp(hdr->frame_control, tx->sta,
 					       tx->skb))
 				tx->key = NULL;
-			skip_hw = (tx->key->conf.flags &
-						IEEE80211_KEY_FLAG_SW_MGMT) &&
-				   ieee80211_is_mgmt(hdr->frame_control);
+			else
+				skip_hw = (tx->key->conf.flags &
+					   IEEE80211_KEY_FLAG_SW_MGMT) &&
+					ieee80211_is_mgmt(hdr->frame_control);
 			break;
 		case ALG_AES_CMAC:
 			if (!ieee80211_is_mgmt(hdr->frame_control))
-- 
cgit v1.2.2


From 32e7bfc41110bc8f29ec0f293c3bcee6645fef34 Mon Sep 17 00:00:00 2001
From: Jiri Pirko <jpirko@redhat.com>
Date: Mon, 25 Jan 2010 13:36:10 -0800
Subject: net: use helpers to access uc list V2

This patch introduces three macros to work with uc list from net drivers.

Signed-off-by: Jiri Pirko <jpirko@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/dev.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/core/dev.c b/net/core/dev.c
index 4fad9db417b1..2cba5c521e56 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -3665,10 +3665,10 @@ void __dev_set_rx_mode(struct net_device *dev)
 		/* Unicast addresses changes may only happen under the rtnl,
 		 * therefore calling __dev_set_promiscuity here is safe.
 		 */
-		if (dev->uc.count > 0 && !dev->uc_promisc) {
+		if (!netdev_uc_empty(dev) && !dev->uc_promisc) {
 			__dev_set_promiscuity(dev, 1);
 			dev->uc_promisc = 1;
-		} else if (dev->uc.count == 0 && dev->uc_promisc) {
+		} else if (netdev_uc_empty(dev) && dev->uc_promisc) {
 			__dev_set_promiscuity(dev, -1);
 			dev->uc_promisc = 0;
 		}
-- 
cgit v1.2.2


From eb807fb23878bc319e029ed8ce3d835d239723a5 Mon Sep 17 00:00:00 2001
From: Kalle Valo <kalle.valo@iki.fi>
Date: Sun, 24 Jan 2010 14:55:12 +0200
Subject: mac80211: fix update_tkip_key() documentation about the context

Johannes noticed that I had incorrectly documented the context of
update_tkip_key() driver operation. It must be atomic because all
RX code is run inside rcu critical section.

Reported-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: Kalle Valo <kalle.valo@iki.fi>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/driver-ops.h | 2 --
 1 file changed, 2 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/driver-ops.h b/net/mac80211/driver-ops.h
index 40c6e9a89864..6c31f38ac7f5 100644
--- a/net/mac80211/driver-ops.h
+++ b/net/mac80211/driver-ops.h
@@ -144,8 +144,6 @@ static inline void drv_update_tkip_key(struct ieee80211_local *local,
 {
 	struct ieee80211_sta *ista = NULL;
 
-	might_sleep();
-
 	if (sta)
 		ista = &sta->sta;
 
-- 
cgit v1.2.2


From 1396b231b0369c4146988c2f42fb416ae19e2572 Mon Sep 17 00:00:00 2001
From: Felix Fietkau <nbd@openwrt.org>
Date: Sun, 24 Jan 2010 20:44:35 +0100
Subject: mac80211: fix WARN_ON in the new work code

ieee80211_work_rx_mgmt currently enqueues various management frames,
including deauth and disassoc frames, however the function
ieee80211_work_rx_queued_mgmt does not handle these, as they should
only occur if the AP is buggy. It does emit a WARN_ON when this happens
though, and several users have reported such instances.
Fix the WARN_ON by not queueing such frames in the first place.

Signed-off-by: Felix Fietkau <nbd@openwrt.org>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/work.c | 2 --
 1 file changed, 2 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/work.c b/net/mac80211/work.c
index 81bd5d592bb4..df8277cdb4d0 100644
--- a/net/mac80211/work.c
+++ b/net/mac80211/work.c
@@ -1022,8 +1022,6 @@ ieee80211_rx_result ieee80211_work_rx_mgmt(struct ieee80211_sub_if_data *sdata,
 		case IEEE80211_STYPE_PROBE_RESP:
 		case IEEE80211_STYPE_ASSOC_RESP:
 		case IEEE80211_STYPE_REASSOC_RESP:
-		case IEEE80211_STYPE_DEAUTH:
-		case IEEE80211_STYPE_DISASSOC:
 			skb_queue_tail(&local->work_skb_queue, skb);
 			ieee80211_queue_work(&local->hw, &local->work_work);
 			return RX_QUEUED;
-- 
cgit v1.2.2


From 382b16559d599c4260aeb82a5ea5ba44459d1cd2 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes@sipsolutions.net>
Date: Mon, 25 Jan 2010 11:36:16 +0100
Subject: mac80211: fix sw crypto

What a stupid mistake. In

    commit 813d76694043d00b59475baa1fbfaf54a2eb7fad
    Author: Johannes Berg <johannes@sipsolutions.net>
    Date:   Sun Jan 17 01:47:58 2010 +0100

        mac80211: move control.hw_key assignment

I inserted code testing the wrong flags field,
which means that the test is almost always true
(it's really testing for the peer's WMM support)
and thus the later parts of the stack assume hw
crypto will be done even if that's not true.

Obviously, that broke software crypto. Maxim
said so specifically, and Jochen probably uses
some cipher that iwl3945 doesn't support in
hardware, which might also explain that Maxim
reports that even hw crypto is broken.

Fix this to test the right flags field.

Reported-by: Maxim Levitsky <maximlevitsky@gmail.com>
Reported-by: Jochen Friedrich <jochen@scram.de>
Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/tx.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
index d017b3530d4d..14c70452c245 100644
--- a/net/mac80211/tx.c
+++ b/net/mac80211/tx.c
@@ -559,7 +559,7 @@ ieee80211_tx_h_select_key(struct ieee80211_tx_data *tx)
 		}
 
 		if (!skip_hw && tx->key &&
-		    tx->key->conf.flags & KEY_FLAG_UPLOADED_TO_HARDWARE)
+		    tx->key->flags & KEY_FLAG_UPLOADED_TO_HARDWARE)
 			info->control.hw_key = &tx->key->conf;
 	}
 
-- 
cgit v1.2.2


From 723bae7ee44fd79c1cd3c7531ed581d373920774 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes@sipsolutions.net>
Date: Mon, 25 Jan 2010 13:36:36 +0100
Subject: mac80211: track work started through callbacks

Currently, the remain_on_channel work callback needs
to track in its own data structure whether the work
was just started or not. By reordering some code this
becomes unnecessary, the generic wk->started variable
can still be 'false' on the first invocation and only
be 'true' on actual timeout invocations, so that the
extra variable can be removed.

Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/ieee80211_i.h |  1 -
 net/mac80211/work.c        | 17 ++++++++++-------
 2 files changed, 10 insertions(+), 8 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index c18f576f1848..3067fbd69d63 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -299,7 +299,6 @@ struct ieee80211_work {
 		} assoc;
 		struct {
 			u32 duration;
-			bool started;
 		} remain;
 	};
 
diff --git a/net/mac80211/work.c b/net/mac80211/work.c
index df8277cdb4d0..7e708d5c88b4 100644
--- a/net/mac80211/work.c
+++ b/net/mac80211/work.c
@@ -535,8 +535,7 @@ ieee80211_remain_on_channel_timeout(struct ieee80211_work *wk)
 	 * First time we run, do nothing -- the generic code will
 	 * have switched to the right channel etc.
 	 */
-	if (!wk->remain.started) {
-		wk->remain.started = true;
+	if (!wk->started) {
 		wk->timeout = jiffies + msecs_to_jiffies(wk->remain.duration);
 
 		cfg80211_ready_on_channel(wk->sdata->dev, (unsigned long) wk,
@@ -821,15 +820,17 @@ static void ieee80211_work_work(struct work_struct *work)
 	mutex_lock(&local->work_mtx);
 
 	list_for_each_entry_safe(wk, tmp, &local->work_list, list) {
+		bool started = wk->started;
+
 		/* mark work as started if it's on the current off-channel */
-		if (!wk->started && local->tmp_channel &&
+		if (!started && local->tmp_channel &&
 		    wk->chan == local->tmp_channel &&
 		    wk->chan_type == local->tmp_channel_type) {
-			wk->started = true;
+			started = true;
 			wk->timeout = jiffies;
 		}
 
-		if (!wk->started && !local->tmp_channel) {
+		if (!started && !local->tmp_channel) {
 			/*
 			 * TODO: could optimize this by leaving the
 			 *	 station vifs in awake mode if they
@@ -842,12 +843,12 @@ static void ieee80211_work_work(struct work_struct *work)
 			local->tmp_channel = wk->chan;
 			local->tmp_channel_type = wk->chan_type;
 			ieee80211_hw_config(local, 0);
-			wk->started = true;
+			started = true;
 			wk->timeout = jiffies;
 		}
 
 		/* don't try to work with items that aren't started */
-		if (!wk->started)
+		if (!started)
 			continue;
 
 		if (time_is_after_jiffies(wk->timeout)) {
@@ -882,6 +883,8 @@ static void ieee80211_work_work(struct work_struct *work)
 			break;
 		}
 
+		wk->started = started;
+
 		switch (rma) {
 		case WORK_ACT_NONE:
 			/* might have changed the timeout */
-- 
cgit v1.2.2


From 18c949070b57d2cbcc0b25c5cfa003ece204e468 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes@sipsolutions.net>
Date: Mon, 25 Jan 2010 19:07:39 +0100
Subject: mac80211: fill jiffies/vif on filtered frames

Filtered frames not only need their control information
cleared to avoid wrong checks, but also need to have
jiffies and vif assigned so they can be processed or
expired.

Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/status.c | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'net')

diff --git a/net/mac80211/status.c b/net/mac80211/status.c
index 800b6777e0ed..e57ad6b1d7ea 100644
--- a/net/mac80211/status.c
+++ b/net/mac80211/status.c
@@ -53,6 +53,9 @@ static void ieee80211_handle_filtered_frame(struct ieee80211_local *local,
 	 * modified/encrypted again.
 	 */
 	memset(&info->control, 0, sizeof(info->control));
+
+	info->control.jiffies = jiffies;
+	info->control.vif = &sta->sdata->vif;
 	info->flags |= IEEE80211_TX_INTFL_NEED_TXPROCESSING |
 		       IEEE80211_TX_INTFL_RETRANSMISSION;
 
-- 
cgit v1.2.2


From c92b544bd5d8e7ed7d81c77bbecab6df2a95aa53 Mon Sep 17 00:00:00 2001
From: Shan Wei <shanwei@cn.fujitsu.com>
Date: Tue, 26 Jan 2010 02:40:38 +0000
Subject: ipv6: conntrack: Add member of user to nf_ct_frag6_queue structure

The commit 0b5ccb2(title:ipv6: reassembly: use seperate reassembly queues for
conntrack and local delivery) has broken the saddr&&daddr member of
nf_ct_frag6_queue when creating new queue.  And then hash value
generated by nf_hashfn() was not equal with that generated by fq_find().
So, a new received fragment can't be inserted to right queue.

The patch fixes the bug with adding member of user to nf_ct_frag6_queue structure.

Signed-off-by: Shan Wei <shanwei@cn.fujitsu.com>
Acked-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/netfilter/nf_conntrack_reasm.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'net')

diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c
index 312c20adc83f..624a54832a7c 100644
--- a/net/ipv6/netfilter/nf_conntrack_reasm.c
+++ b/net/ipv6/netfilter/nf_conntrack_reasm.c
@@ -63,6 +63,7 @@ struct nf_ct_frag6_queue
 	struct inet_frag_queue	q;
 
 	__be32			id;		/* fragment id		*/
+	u32			user;
 	struct in6_addr		saddr;
 	struct in6_addr		daddr;
 
-- 
cgit v1.2.2


From e578756c35859a459d78d8416195bc5f5ff897d0 Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Tue, 26 Jan 2010 17:04:02 +0100
Subject: netfilter: ctnetlink: fix expectation mask dump

The protocol number is not initialized, so userspace can't interpret
the layer 4 data properly.

Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 net/netfilter/nf_conntrack_netlink.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index 59d8064eb522..42f21c01a93e 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -1437,8 +1437,9 @@ ctnetlink_exp_dump_mask(struct sk_buff *skb,
 	struct nlattr *nest_parms;
 
 	memset(&m, 0xFF, sizeof(m));
-	m.src.u.all = mask->src.u.all;
 	memcpy(&m.src.u3, &mask->src.u3, sizeof(m.src.u3));
+	m.src.u.all = mask->src.u.all;
+	m.dst.protonum = tuple->dst.protonum;
 
 	nest_parms = nla_nest_start(skb, CTA_EXPECT_MASK | NLA_F_NESTED);
 	if (!nest_parms)
-- 
cgit v1.2.2


From 3092ad054406f069991ca561adc74f2d9fbb6867 Mon Sep 17 00:00:00 2001
From: Zhu Yi <yi.zhu@intel.com>
Date: Tue, 26 Jan 2010 15:58:57 +0800
Subject: mac80211: fix NULL pointer dereference when ftrace is enabled

I got below kernel oops when I try to bring down the network interface if
ftrace is enabled. The root cause is drv_ampdu_action() is passed with a
NULL ssn pointer in the BA session tear down case. We need to check and
avoid dereferencing it in trace entry assignment.

BUG: unable to handle kernel NULL pointer dereference
Modules linked in: at (null)
IP: [<f98fe02a>] ftrace_raw_event_drv_ampdu_action+0x10a/0x160 [mac80211]
*pde = 00000000
Oops: 0000 [#1] SMP DEBUG_PAGEALLOC
[...]
Call Trace:
 [<f98fdf20>] ? ftrace_raw_event_drv_ampdu_action+0x0/0x160 [mac80211]
 [<f98dac4c>] ? __ieee80211_stop_rx_ba_session+0xfc/0x220 [mac80211]
 [<f98d97fb>] ? ieee80211_sta_tear_down_BA_sessions+0x3b/0x50 [mac80211]
 [<f98dc6f6>] ? ieee80211_set_disassoc+0xe6/0x230 [mac80211]
 [<f98dc6ac>] ? ieee80211_set_disassoc+0x9c/0x230 [mac80211]
 [<f98dcbb8>] ? ieee80211_mgd_deauth+0x158/0x170 [mac80211]
 [<f98e4bdb>] ? ieee80211_deauth+0x1b/0x20 [mac80211]
 [<f8987f49>] ? __cfg80211_mlme_deauth+0xe9/0x120 [cfg80211]
 [<f898b870>] ? __cfg80211_disconnect+0x170/0x1d0 [cfg80211]

Cc: Johannes Berg <johannes@sipsolutions.net>
Cc: stable@kernel.org
Signed-off-by: Zhu Yi <yi.zhu@intel.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/driver-trace.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/mac80211/driver-trace.h b/net/mac80211/driver-trace.h
index ee94ea0c67e9..da8497ef7063 100644
--- a/net/mac80211/driver-trace.h
+++ b/net/mac80211/driver-trace.h
@@ -680,7 +680,7 @@ TRACE_EVENT(drv_ampdu_action,
 		__entry->ret = ret;
 		__entry->action = action;
 		__entry->tid = tid;
-		__entry->ssn = *ssn;
+		__entry->ssn = ssn ? *ssn : 0;
 	),
 
 	TP_printk(
-- 
cgit v1.2.2


From c21dbf9214bce129f92e1af05552553ff0e318ed Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes@sipsolutions.net>
Date: Tue, 26 Jan 2010 14:15:46 +0100
Subject: cfg80211: export cfg80211_find_ie

This new function (previously a static function
called just "find_ie" can be used to find a
specific IE in a buffer of IEs.

Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/wireless/scan.c | 38 ++++++++++++++++++++------------------
 1 file changed, 20 insertions(+), 18 deletions(-)

(limited to 'net')

diff --git a/net/wireless/scan.c b/net/wireless/scan.c
index 06b0231ee5e3..978cac3414b5 100644
--- a/net/wireless/scan.c
+++ b/net/wireless/scan.c
@@ -143,9 +143,9 @@ void cfg80211_bss_expire(struct cfg80211_registered_device *dev)
 		dev->bss_generation++;
 }
 
-static u8 *find_ie(u8 num, u8 *ies, int len)
+const u8 *cfg80211_find_ie(u8 eid, const u8 *ies, int len)
 {
-	while (len > 2 && ies[0] != num) {
+	while (len > 2 && ies[0] != eid) {
 		len -= ies[1] + 2;
 		ies += ies[1] + 2;
 	}
@@ -155,11 +155,12 @@ static u8 *find_ie(u8 num, u8 *ies, int len)
 		return NULL;
 	return ies;
 }
+EXPORT_SYMBOL(cfg80211_find_ie);
 
 static int cmp_ies(u8 num, u8 *ies1, size_t len1, u8 *ies2, size_t len2)
 {
-	const u8 *ie1 = find_ie(num, ies1, len1);
-	const u8 *ie2 = find_ie(num, ies2, len2);
+	const u8 *ie1 = cfg80211_find_ie(num, ies1, len1);
+	const u8 *ie2 = cfg80211_find_ie(num, ies2, len2);
 	int r;
 
 	if (!ie1 && !ie2)
@@ -185,9 +186,9 @@ static bool is_bss(struct cfg80211_bss *a,
 	if (!ssid)
 		return true;
 
-	ssidie = find_ie(WLAN_EID_SSID,
-			 a->information_elements,
-			 a->len_information_elements);
+	ssidie = cfg80211_find_ie(WLAN_EID_SSID,
+				  a->information_elements,
+				  a->len_information_elements);
 	if (!ssidie)
 		return false;
 	if (ssidie[1] != ssid_len)
@@ -204,9 +205,9 @@ static bool is_mesh(struct cfg80211_bss *a,
 	if (!is_zero_ether_addr(a->bssid))
 		return false;
 
-	ie = find_ie(WLAN_EID_MESH_ID,
-		     a->information_elements,
-		     a->len_information_elements);
+	ie = cfg80211_find_ie(WLAN_EID_MESH_ID,
+			      a->information_elements,
+			      a->len_information_elements);
 	if (!ie)
 		return false;
 	if (ie[1] != meshidlen)
@@ -214,9 +215,9 @@ static bool is_mesh(struct cfg80211_bss *a,
 	if (memcmp(ie + 2, meshid, meshidlen))
 		return false;
 
-	ie = find_ie(WLAN_EID_MESH_CONFIG,
-		     a->information_elements,
-		     a->len_information_elements);
+	ie = cfg80211_find_ie(WLAN_EID_MESH_CONFIG,
+			      a->information_elements,
+			      a->len_information_elements);
 	if (!ie)
 		return false;
 	if (ie[1] != sizeof(struct ieee80211_meshconf_ie))
@@ -395,11 +396,12 @@ cfg80211_bss_update(struct cfg80211_registered_device *dev,
 
 	if (is_zero_ether_addr(res->pub.bssid)) {
 		/* must be mesh, verify */
-		meshid = find_ie(WLAN_EID_MESH_ID, res->pub.information_elements,
-				 res->pub.len_information_elements);
-		meshcfg = find_ie(WLAN_EID_MESH_CONFIG,
-				  res->pub.information_elements,
-				  res->pub.len_information_elements);
+		meshid = cfg80211_find_ie(WLAN_EID_MESH_ID,
+					  res->pub.information_elements,
+					  res->pub.len_information_elements);
+		meshcfg = cfg80211_find_ie(WLAN_EID_MESH_CONFIG,
+					   res->pub.information_elements,
+					   res->pub.len_information_elements);
 		if (!meshid || !meshcfg ||
 		    meshcfg[1] != sizeof(struct ieee80211_meshconf_ie)) {
 			/* bogus mesh */
-- 
cgit v1.2.2


From 56007a028c51cbf800a6c969d6f6431d23443b99 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes@sipsolutions.net>
Date: Tue, 26 Jan 2010 14:19:52 +0100
Subject: mac80211: wait for beacon before enabling powersave

Because DTIM information is required for powersave
but is only conveyed in beacons, wait for a beacon
before enabling powersave, and change the way the
information is conveyed to the driver accordingly.

mwl8k doesn't currently seem to implement PS but
requires the DTIM period in a different way; after
talking to Lennert we agreed to just have mwl8k do
the parsing itself in the finalize_join work.

Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
Acked-by: Lennert Buytenhek <buytenh@marvell.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/mlme.c | 27 ++++++++++++++++++++++++---
 net/mac80211/scan.c |  4 ----
 2 files changed, 24 insertions(+), 7 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index 1e1d16c55ee5..86c6ad1b058d 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -484,6 +484,7 @@ void ieee80211_recalc_ps(struct ieee80211_local *local, s32 latency)
 
 	if (count == 1 && found->u.mgd.powersave &&
 	    found->u.mgd.associated &&
+	    found->u.mgd.associated->beacon_ies &&
 	    !(found->u.mgd.flags & (IEEE80211_STA_BEACON_POLL |
 				    IEEE80211_STA_CONNECTION_POLL))) {
 		s32 beaconint_us;
@@ -497,14 +498,22 @@ void ieee80211_recalc_ps(struct ieee80211_local *local, s32 latency)
 		if (beaconint_us > latency) {
 			local->ps_sdata = NULL;
 		} else {
-			u8 dtimper = found->vif.bss_conf.dtim_period;
+			struct ieee80211_bss *bss;
 			int maxslp = 1;
+			u8 dtimper;
 
-			if (dtimper > 1)
+			bss = (void *)found->u.mgd.associated->priv;
+			dtimper = bss->dtim_period;
+
+			/* If the TIM IE is invalid, pretend the value is 1 */
+			if (!dtimper)
+				dtimper = 1;
+			else if (dtimper > 1)
 				maxslp = min_t(int, dtimper,
 						    latency / beaconint_us);
 
 			local->hw.conf.max_sleep_period = maxslp;
+			local->hw.conf.ps_dtim_period = dtimper;
 			local->ps_sdata = found;
 		}
 	} else {
@@ -702,7 +711,6 @@ static void ieee80211_set_associated(struct ieee80211_sub_if_data *sdata,
 	/* set timing information */
 	sdata->vif.bss_conf.beacon_int = cbss->beacon_interval;
 	sdata->vif.bss_conf.timestamp = cbss->tsf;
-	sdata->vif.bss_conf.dtim_period = bss->dtim_period;
 
 	bss_info_changed |= BSS_CHANGED_BEACON_INT;
 	bss_info_changed |= ieee80211_handle_bss_capability(sdata,
@@ -1168,6 +1176,13 @@ static void ieee80211_rx_bss_info(struct ieee80211_sub_if_data *sdata,
 	int freq;
 	struct ieee80211_bss *bss;
 	struct ieee80211_channel *channel;
+	bool need_ps = false;
+
+	if (sdata->u.mgd.associated) {
+		bss = (void *)sdata->u.mgd.associated->priv;
+		/* not previously set so we may need to recalc */
+		need_ps = !bss->dtim_period;
+	}
 
 	if (elems->ds_params && elems->ds_params_len == 1)
 		freq = ieee80211_channel_to_frequency(elems->ds_params[0]);
@@ -1187,6 +1202,12 @@ static void ieee80211_rx_bss_info(struct ieee80211_sub_if_data *sdata,
 	if (!sdata->u.mgd.associated)
 		return;
 
+	if (need_ps) {
+		mutex_lock(&local->iflist_mtx);
+		ieee80211_recalc_ps(local, -1);
+		mutex_unlock(&local->iflist_mtx);
+	}
+
 	if (elems->ch_switch_elem && (elems->ch_switch_elem_len == 3) &&
 	    (memcmp(mgmt->bssid, sdata->u.mgd.associated->bssid,
 							ETH_ALEN) == 0)) {
diff --git a/net/mac80211/scan.c b/net/mac80211/scan.c
index 9afe2f9885dc..bc061f629674 100644
--- a/net/mac80211/scan.c
+++ b/net/mac80211/scan.c
@@ -111,10 +111,6 @@ ieee80211_bss_info_update(struct ieee80211_local *local,
 		bss->dtim_period = tim_ie->dtim_period;
 	}
 
-	/* set default value for buggy AP/no TIM element */
-	if (bss->dtim_period == 0)
-		bss->dtim_period = 1;
-
 	bss->supp_rates_len = 0;
 	if (elems->supp_rates) {
 		clen = IEEE80211_MAX_SUPP_RATES - bss->supp_rates_len;
-- 
cgit v1.2.2


From 07396051a5c6901693a97e35cb731a01b0b348e4 Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Tue, 26 Jan 2010 14:03:47 -0500
Subject: SUNRPC: Use rpc_pton() in ip_map_parse()

The existing logic in ip_map_parse() can not currently parse
shorthanded IPv6 addresses (anything with a double colon), nor can
it parse an IPv6 presentation address with a scope ID.  An
IPv6-enabled mountd can pass down both.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
---
 net/sunrpc/svcauth_unix.c | 47 ++++++++++++++++++++++++++---------------------
 1 file changed, 26 insertions(+), 21 deletions(-)

(limited to 'net')

diff --git a/net/sunrpc/svcauth_unix.c b/net/sunrpc/svcauth_unix.c
index d8c041114497..97f0e9e12024 100644
--- a/net/sunrpc/svcauth_unix.c
+++ b/net/sunrpc/svcauth_unix.c
@@ -15,6 +15,7 @@
 #include <linux/kernel.h>
 #define RPCDBG_FACILITY	RPCDBG_AUTH
 
+#include <linux/sunrpc/clnt.h>
 
 /*
  * AUTHUNIX and AUTHNULL credentials are both handled here.
@@ -187,10 +188,13 @@ static int ip_map_parse(struct cache_detail *cd,
 	 * for scratch: */
 	char *buf = mesg;
 	int len;
-	int b1, b2, b3, b4, b5, b6, b7, b8;
-	char c;
 	char class[8];
-	struct in6_addr addr;
+	union {
+		struct sockaddr		sa;
+		struct sockaddr_in	s4;
+		struct sockaddr_in6	s6;
+	} address;
+	struct sockaddr_in6 sin6;
 	int err;
 
 	struct ip_map *ipmp;
@@ -209,24 +213,24 @@ static int ip_map_parse(struct cache_detail *cd,
 	len = qword_get(&mesg, buf, mlen);
 	if (len <= 0) return -EINVAL;
 
-	if (sscanf(buf, "%u.%u.%u.%u%c", &b1, &b2, &b3, &b4, &c) == 4) {
-		addr.s6_addr32[0] = 0;
-		addr.s6_addr32[1] = 0;
-		addr.s6_addr32[2] = htonl(0xffff);
-		addr.s6_addr32[3] =
-			htonl((((((b1<<8)|b2)<<8)|b3)<<8)|b4);
-       } else if (sscanf(buf, "%04x:%04x:%04x:%04x:%04x:%04x:%04x:%04x%c",
-			&b1, &b2, &b3, &b4, &b5, &b6, &b7, &b8, &c) == 8) {
-		addr.s6_addr16[0] = htons(b1);
-		addr.s6_addr16[1] = htons(b2);
-		addr.s6_addr16[2] = htons(b3);
-		addr.s6_addr16[3] = htons(b4);
-		addr.s6_addr16[4] = htons(b5);
-		addr.s6_addr16[5] = htons(b6);
-		addr.s6_addr16[6] = htons(b7);
-		addr.s6_addr16[7] = htons(b8);
-       } else
+	if (rpc_pton(buf, len, &address.sa, sizeof(address)) == 0)
 		return -EINVAL;
+	switch (address.sa.sa_family) {
+	case AF_INET:
+		/* Form a mapped IPv4 address in sin6 */
+		memset(&sin6, 0, sizeof(sin6));
+		sin6.sin6_family = AF_INET6;
+		sin6.sin6_addr.s6_addr32[2] = htonl(0xffff);
+		sin6.sin6_addr.s6_addr32[3] = address.s4.sin_addr.s_addr;
+		break;
+#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+	case AF_INET6:
+		memcpy(&sin6, &address.s6, sizeof(sin6));
+		break;
+#endif
+	default:
+		return -EINVAL;
+	}
 
 	expiry = get_expiry(&mesg);
 	if (expiry ==0)
@@ -243,7 +247,8 @@ static int ip_map_parse(struct cache_detail *cd,
 	} else
 		dom = NULL;
 
-	ipmp = ip_map_lookup(class, &addr);
+	/* IPv6 scope IDs are ignored for now */
+	ipmp = ip_map_lookup(class, &sin6.sin6_addr);
 	if (ipmp) {
 		err = ip_map_update(ipmp,
 			     container_of(dom, struct unix_domain, h),
-- 
cgit v1.2.2


From 205ba42308729f4f41f21d314a4435e7de5c9a2e Mon Sep 17 00:00:00 2001
From: Aime Le Rouzic <aime.le-rouzic@bull.net>
Date: Tue, 26 Jan 2010 14:03:56 -0500
Subject: NFSD: Support AF_INET6 in svc_addsock() function

Relax the address family check at the top of svc_addsock() to allow AF_INET6
listener sockets to be specified via /proc/fs/nfsd/portlist.

Signed-off-by: Aime Le Rouzic <aime.le-rouzic@bull.net>
Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
---
 net/sunrpc/svcsock.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c
index 870929e08e5d..9e093910c479 100644
--- a/net/sunrpc/svcsock.c
+++ b/net/sunrpc/svcsock.c
@@ -1357,7 +1357,7 @@ int svc_addsock(struct svc_serv *serv, const int fd, char *name_return,
 
 	if (!so)
 		return err;
-	if (so->sk->sk_family != AF_INET)
+	if ((so->sk->sk_family != PF_INET) && (so->sk->sk_family != PF_INET6))
 		err =  -EAFNOSUPPORT;
 	else if (so->sk->sk_protocol != IPPROTO_TCP &&
 	    so->sk->sk_protocol != IPPROTO_UDP)
-- 
cgit v1.2.2


From d6783b2b6c4050df0ba0a84c6842cf5bc2212ef9 Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Tue, 26 Jan 2010 14:04:04 -0500
Subject: SUNRPC: Bury "#ifdef IPV6" in svc_create_xprt()

Clean up:  Bruce observed we have more or less common logic in each of
svc_create_xprt()'s callers:  the check to create an IPv6 RPC listener
socket only if CONFIG_IPV6 is set.  I'm about to add another case
that does just the same.

If we move the ifdefs into __svc_xpo_create(), then svc_create_xprt()
call sites can get rid of the "#ifdef" ugliness, and can use the same
logic with or without IPv6 support available in the kernel.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
---
 net/sunrpc/svc_xprt.c | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'net')

diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c
index 7d1f9e928f69..f886ff367c80 100644
--- a/net/sunrpc/svc_xprt.c
+++ b/net/sunrpc/svc_xprt.c
@@ -173,11 +173,13 @@ static struct svc_xprt *__svc_xpo_create(struct svc_xprt_class *xcl,
 		.sin_addr.s_addr	= htonl(INADDR_ANY),
 		.sin_port		= htons(port),
 	};
+#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
 	struct sockaddr_in6 sin6 = {
 		.sin6_family		= AF_INET6,
 		.sin6_addr		= IN6ADDR_ANY_INIT,
 		.sin6_port		= htons(port),
 	};
+#endif	/* defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) */
 	struct sockaddr *sap;
 	size_t len;
 
@@ -186,10 +188,12 @@ static struct svc_xprt *__svc_xpo_create(struct svc_xprt_class *xcl,
 		sap = (struct sockaddr *)&sin;
 		len = sizeof(sin);
 		break;
+#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
 	case PF_INET6:
 		sap = (struct sockaddr *)&sin6;
 		len = sizeof(sin6);
 		break;
+#endif	/* defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) */
 	default:
 		return ERR_PTR(-EAFNOSUPPORT);
 	}
-- 
cgit v1.2.2


From 68717908155a9dcd4161f4d730fea478712d9794 Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Tue, 26 Jan 2010 14:04:13 -0500
Subject: SUNRPC: NFS kernel APIs shouldn't return ENOENT for "transport not
 found"

write_ports() converts svc_create_xprt()'s ENOENT error return to
EPROTONOSUPPORT so that rpc.nfsd (in user space) can report an error
message that makes sense.

It turns out that several of the other kernel APIs rpc.nfsd use can
also return ENOENT from svc_create_xprt(), by way of lockd_up().

On the client side, an NFSv2 or NFSv3 mount request can also return
the result of lockd_up().  This error may also be returned during an
NFSv4 mount request, since the NFSv4 callback service uses
svc_create_xprt() to create the callback listener.  An ENOENT error
return results in a confusing error message from the mount command.

Let's have svc_create_xprt() return EPROTONOSUPPORT instead of ENOENT.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
---
 net/sunrpc/svc_xprt.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c
index f886ff367c80..d7ec5caf998c 100644
--- a/net/sunrpc/svc_xprt.c
+++ b/net/sunrpc/svc_xprt.c
@@ -235,7 +235,10 @@ int svc_create_xprt(struct svc_serv *serv, const char *xprt_name,
  err:
 	spin_unlock(&svc_xprt_class_lock);
 	dprintk("svc: transport %s not found\n", xprt_name);
-	return -ENOENT;
+
+	/* This errno is exposed to user space.  Provide a reasonable
+	 * perror msg for a bad transport. */
+	return -EPROTONOSUPPORT;
 }
 EXPORT_SYMBOL_GPL(svc_create_xprt);
 
-- 
cgit v1.2.2


From 99824461ea72ca0044cc6508f02c0e1cabf37ba5 Mon Sep 17 00:00:00 2001
From: Joe Perches <joe@perches.com>
Date: Tue, 26 Jan 2010 11:40:00 +0000
Subject: net/atm: Convert printk to pr_<level>

Add #define pr_fmt(fmt) KBUILD_MODNAME ":%s: " fmt, __func__
Remove function names from output
Use single line pr_debug instead of broken multiple uses without newline

Signed-off-by: Joe Perches <joe@perches.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/atm/br2684.c    |  18 ++---
 net/atm/clip.c      |  60 ++++++++--------
 net/atm/common.c    |  52 +++++++-------
 net/atm/ioctl.c     |   7 +-
 net/atm/lec.c       | 192 ++++++++++++++++++++++------------------------------
 net/atm/mpc.c       |   4 +-
 net/atm/mpoa_proc.c |   3 +-
 net/atm/pppoatm.c   |  17 ++---
 net/atm/raw.c       |   5 +-
 net/atm/resources.c |  12 ++--
 net/atm/signaling.c |  19 +++---
 net/atm/svc.c       |  13 ++--
 12 files changed, 190 insertions(+), 212 deletions(-)

(limited to 'net')

diff --git a/net/atm/br2684.c b/net/atm/br2684.c
index c9230c398697..d72b1a579911 100644
--- a/net/atm/br2684.c
+++ b/net/atm/br2684.c
@@ -6,6 +6,8 @@
  *          Eric Kinzie, 2006-2007, US Naval Research Laboratory
  */
 
+#define pr_fmt(fmt) KBUILD_MODNAME ":%s: " fmt, __func__
+
 #include <linux/module.h>
 #include <linux/init.h>
 #include <linux/kernel.h>
@@ -148,7 +150,7 @@ static void br2684_pop(struct atm_vcc *vcc, struct sk_buff *skb)
 	struct br2684_vcc *brvcc = BR2684_VCC(vcc);
 	struct net_device *net_dev = skb->dev;
 
-	pr_debug("br2684_pop(vcc %p ; net_dev %p )\n", vcc, net_dev);
+	pr_debug("(vcc %p ; net_dev %p )\n", vcc, net_dev);
 	brvcc->old_pop(vcc, skb);
 
 	if (!net_dev)
@@ -244,7 +246,7 @@ static netdev_tx_t br2684_start_xmit(struct sk_buff *skb,
 	struct br2684_dev *brdev = BRPRIV(dev);
 	struct br2684_vcc *brvcc;
 
-	pr_debug("br2684_start_xmit, skb_dst(skb)=%p\n", skb_dst(skb));
+	pr_debug("skb_dst(skb)=%p\n", skb_dst(skb));
 	read_lock(&devs_lock);
 	brvcc = pick_outgoing_vcc(skb, brdev);
 	if (brvcc == NULL) {
@@ -352,7 +354,7 @@ static void br2684_push(struct atm_vcc *atmvcc, struct sk_buff *skb)
 	struct net_device *net_dev = brvcc->device;
 	struct br2684_dev *brdev = BRPRIV(net_dev);
 
-	pr_debug("br2684_push\n");
+	pr_debug("\n");
 
 	if (unlikely(skb == NULL)) {
 		/* skb==NULL means VCC is being destroyed */
@@ -479,8 +481,7 @@ static int br2684_regvcc(struct atm_vcc *atmvcc, void __user * arg)
 	write_lock_irq(&devs_lock);
 	net_dev = br2684_find_dev(&be.ifspec);
 	if (net_dev == NULL) {
-		printk(KERN_ERR
-		       "br2684: tried to attach to non-existant device\n");
+		pr_err("tried to attach to non-existant device\n");
 		err = -ENXIO;
 		goto error;
 	}
@@ -503,8 +504,7 @@ static int br2684_regvcc(struct atm_vcc *atmvcc, void __user * arg)
 		err = -EINVAL;
 		goto error;
 	}
-	pr_debug("br2684_regvcc vcc=%p, encaps=%d, brvcc=%p\n", atmvcc,
-		 be.encaps, brvcc);
+	pr_debug("vcc=%p, encaps=%d, brvcc=%p\n", atmvcc, be.encaps, brvcc);
 	if (list_empty(&brdev->brvccs) && !brdev->mac_was_set) {
 		unsigned char *esi = atmvcc->dev->esi;
 		if (esi[0] | esi[1] | esi[2] | esi[3] | esi[4] | esi[5])
@@ -595,7 +595,7 @@ static int br2684_create(void __user * arg)
 	struct atm_newif_br2684 ni;
 	enum br2684_payload payload;
 
-	pr_debug("br2684_create\n");
+	pr_debug("\n");
 
 	if (copy_from_user(&ni, arg, sizeof ni)) {
 		return -EFAULT;
@@ -624,7 +624,7 @@ static int br2684_create(void __user * arg)
 	/* open, stop, do_ioctl ? */
 	err = register_netdev(netdev);
 	if (err < 0) {
-		printk(KERN_ERR "br2684_create: register_netdev failed\n");
+		pr_err("register_netdev failed\n");
 		free_netdev(netdev);
 		return err;
 	}
diff --git a/net/atm/clip.c b/net/atm/clip.c
index 64629c354343..d7939fd58cd3 100644
--- a/net/atm/clip.c
+++ b/net/atm/clip.c
@@ -2,6 +2,8 @@
 
 /* Written 1995-2000 by Werner Almesberger, EPFL LRC/ICA */
 
+#define pr_fmt(fmt) KBUILD_MODNAME ":%s: " fmt, __func__
+
 #include <linux/string.h>
 #include <linux/errno.h>
 #include <linux/kernel.h> /* for UINT_MAX */
@@ -51,7 +53,7 @@ static int to_atmarpd(enum atmarp_ctrl_type type, int itf, __be32 ip)
 	struct atmarp_ctrl *ctrl;
 	struct sk_buff *skb;
 
-	pr_debug("to_atmarpd(%d)\n", type);
+	pr_debug("(%d)\n", type);
 	if (!atmarpd)
 		return -EUNATCH;
 	skb = alloc_skb(sizeof(struct atmarp_ctrl),GFP_ATOMIC);
@@ -71,8 +73,7 @@ static int to_atmarpd(enum atmarp_ctrl_type type, int itf, __be32 ip)
 
 static void link_vcc(struct clip_vcc *clip_vcc, struct atmarp_entry *entry)
 {
-	pr_debug("link_vcc %p to entry %p (neigh %p)\n", clip_vcc, entry,
-		entry->neigh);
+	pr_debug("%p to entry %p (neigh %p)\n", clip_vcc, entry, entry->neigh);
 	clip_vcc->entry = entry;
 	clip_vcc->xoff = 0;	/* @@@ may overrun buffer by one packet */
 	clip_vcc->next = entry->vccs;
@@ -86,7 +87,7 @@ static void unlink_clip_vcc(struct clip_vcc *clip_vcc)
 	struct clip_vcc **walk;
 
 	if (!entry) {
-		printk(KERN_CRIT "!clip_vcc->entry (clip_vcc %p)\n", clip_vcc);
+		pr_crit("!clip_vcc->entry (clip_vcc %p)\n", clip_vcc);
 		return;
 	}
 	netif_tx_lock_bh(entry->neigh->dev);	/* block clip_start_xmit() */
@@ -106,12 +107,10 @@ static void unlink_clip_vcc(struct clip_vcc *clip_vcc)
 			error = neigh_update(entry->neigh, NULL, NUD_NONE,
 					     NEIGH_UPDATE_F_ADMIN);
 			if (error)
-				printk(KERN_CRIT "unlink_clip_vcc: "
-				       "neigh_update failed with %d\n", error);
+				pr_crit("neigh_update failed with %d\n", error);
 			goto out;
 		}
-	printk(KERN_CRIT "ATMARP: unlink_clip_vcc failed (entry %p, vcc "
-	       "0x%p)\n", entry, clip_vcc);
+	pr_crit("ATMARP: failed (entry %p, vcc 0x%p)\n", entry, clip_vcc);
       out:
 	netif_tx_unlock_bh(entry->neigh->dev);
 }
@@ -127,7 +126,7 @@ static int neigh_check_cb(struct neighbour *n)
 
 		if (cv->idle_timeout && time_after(jiffies, exp)) {
 			pr_debug("releasing vcc %p->%p of entry %p\n",
-				cv, cv->vcc, entry);
+				 cv, cv->vcc, entry);
 			vcc_release_async(cv->vcc, -ETIMEDOUT);
 		}
 	}
@@ -139,7 +138,7 @@ static int neigh_check_cb(struct neighbour *n)
 		struct sk_buff *skb;
 
 		pr_debug("destruction postponed with ref %d\n",
-			atomic_read(&n->refcnt));
+			 atomic_read(&n->refcnt));
 
 		while ((skb = skb_dequeue(&n->arp_queue)) != NULL)
 			dev_kfree_skb(skb);
@@ -163,7 +162,7 @@ static int clip_arp_rcv(struct sk_buff *skb)
 {
 	struct atm_vcc *vcc;
 
-	pr_debug("clip_arp_rcv\n");
+	pr_debug("\n");
 	vcc = ATM_SKB(skb)->vcc;
 	if (!vcc || !atm_charge(vcc, skb->truesize)) {
 		dev_kfree_skb_any(skb);
@@ -188,7 +187,7 @@ static void clip_push(struct atm_vcc *vcc, struct sk_buff *skb)
 {
 	struct clip_vcc *clip_vcc = CLIP_VCC(vcc);
 
-	pr_debug("clip push\n");
+	pr_debug("\n");
 	if (!skb) {
 		pr_debug("removing VCC %p\n", clip_vcc);
 		if (clip_vcc->entry)
@@ -239,7 +238,7 @@ static void clip_pop(struct atm_vcc *vcc, struct sk_buff *skb)
 	int old;
 	unsigned long flags;
 
-	pr_debug("clip_pop(vcc %p)\n", vcc);
+	pr_debug("(vcc %p)\n", vcc);
 	clip_vcc->old_pop(vcc, skb);
 	/* skb->dev == NULL in outbound ARP packets */
 	if (!dev)
@@ -255,7 +254,7 @@ static void clip_pop(struct atm_vcc *vcc, struct sk_buff *skb)
 
 static void clip_neigh_solicit(struct neighbour *neigh, struct sk_buff *skb)
 {
-	pr_debug("clip_neigh_solicit (neigh %p, skb %p)\n", neigh, skb);
+	pr_debug("(neigh %p, skb %p)\n", neigh, skb);
 	to_atmarpd(act_need, PRIV(neigh->dev)->number, NEIGH2ENTRY(neigh)->ip);
 }
 
@@ -284,7 +283,7 @@ static int clip_constructor(struct neighbour *neigh)
 	struct in_device *in_dev;
 	struct neigh_parms *parms;
 
-	pr_debug("clip_constructor (neigh %p, entry %p)\n", neigh, entry);
+	pr_debug("(neigh %p, entry %p)\n", neigh, entry);
 	neigh->type = inet_addr_type(&init_net, entry->ip);
 	if (neigh->type != RTN_UNICAST)
 		return -EINVAL;
@@ -369,9 +368,9 @@ static netdev_tx_t clip_start_xmit(struct sk_buff *skb,
 	int old;
 	unsigned long flags;
 
-	pr_debug("clip_start_xmit (skb %p)\n", skb);
+	pr_debug("(skb %p)\n", skb);
 	if (!skb_dst(skb)) {
-		printk(KERN_ERR "clip_start_xmit: skb_dst(skb) == NULL\n");
+		pr_err("skb_dst(skb) == NULL\n");
 		dev_kfree_skb(skb);
 		dev->stats.tx_dropped++;
 		return NETDEV_TX_OK;
@@ -385,7 +384,7 @@ static netdev_tx_t clip_start_xmit(struct sk_buff *skb,
 			return 0;
 		}
 #endif
-		printk(KERN_ERR "clip_start_xmit: NO NEIGHBOUR !\n");
+		pr_err("NO NEIGHBOUR !\n");
 		dev_kfree_skb(skb);
 		dev->stats.tx_dropped++;
 		return NETDEV_TX_OK;
@@ -421,7 +420,7 @@ static netdev_tx_t clip_start_xmit(struct sk_buff *skb,
 	pr_debug("atm_skb(%p)->vcc(%p)->dev(%p)\n", skb, vcc, vcc->dev);
 	old = xchg(&entry->vccs->xoff, 1);	/* assume XOFF ... */
 	if (old) {
-		printk(KERN_WARNING "clip_start_xmit: XOFF->XOFF transition\n");
+		pr_warning("XOFF->XOFF transition\n");
 		return NETDEV_TX_OK;
 	}
 	dev->stats.tx_packets++;
@@ -456,7 +455,7 @@ static int clip_mkip(struct atm_vcc *vcc, int timeout)
 	clip_vcc = kmalloc(sizeof(struct clip_vcc), GFP_KERNEL);
 	if (!clip_vcc)
 		return -ENOMEM;
-	pr_debug("mkip clip_vcc %p vcc %p\n", clip_vcc, vcc);
+	pr_debug("%p vcc %p\n", clip_vcc, vcc);
 	clip_vcc->vcc = vcc;
 	vcc->user_back = clip_vcc;
 	set_bit(ATM_VF_IS_CLIP, &vcc->flags);
@@ -506,16 +505,16 @@ static int clip_setentry(struct atm_vcc *vcc, __be32 ip)
 	struct rtable *rt;
 
 	if (vcc->push != clip_push) {
-		printk(KERN_WARNING "clip_setentry: non-CLIP VCC\n");
+		pr_warning("non-CLIP VCC\n");
 		return -EBADF;
 	}
 	clip_vcc = CLIP_VCC(vcc);
 	if (!ip) {
 		if (!clip_vcc->entry) {
-			printk(KERN_ERR "hiding hidden ATMARP entry\n");
+			pr_err("hiding hidden ATMARP entry\n");
 			return 0;
 		}
-		pr_debug("setentry: remove\n");
+		pr_debug("remove\n");
 		unlink_clip_vcc(clip_vcc);
 		return 0;
 	}
@@ -529,9 +528,9 @@ static int clip_setentry(struct atm_vcc *vcc, __be32 ip)
 	entry = NEIGH2ENTRY(neigh);
 	if (entry != clip_vcc->entry) {
 		if (!clip_vcc->entry)
-			pr_debug("setentry: add\n");
+			pr_debug("add\n");
 		else {
-			pr_debug("setentry: update\n");
+			pr_debug("update\n");
 			unlink_clip_vcc(clip_vcc);
 		}
 		link_vcc(clip_vcc, entry);
@@ -614,16 +613,16 @@ static int clip_device_event(struct notifier_block *this, unsigned long event,
 
 	switch (event) {
 	case NETDEV_UP:
-		pr_debug("clip_device_event NETDEV_UP\n");
+		pr_debug("NETDEV_UP\n");
 		to_atmarpd(act_up, PRIV(dev)->number, 0);
 		break;
 	case NETDEV_GOING_DOWN:
-		pr_debug("clip_device_event NETDEV_DOWN\n");
+		pr_debug("NETDEV_DOWN\n");
 		to_atmarpd(act_down, PRIV(dev)->number, 0);
 		break;
 	case NETDEV_CHANGE:
 	case NETDEV_CHANGEMTU:
-		pr_debug("clip_device_event NETDEV_CHANGE*\n");
+		pr_debug("NETDEV_CHANGE*\n");
 		to_atmarpd(act_change, PRIV(dev)->number, 0);
 		break;
 	}
@@ -660,7 +659,7 @@ static struct notifier_block clip_inet_notifier = {
 
 static void atmarpd_close(struct atm_vcc *vcc)
 {
-	pr_debug("atmarpd_close\n");
+	pr_debug("\n");
 
 	rtnl_lock();
 	atmarpd = NULL;
@@ -950,8 +949,7 @@ static int __init atm_clip_init(void)
 
 		p = proc_create("arp", S_IRUGO, atm_proc_root, &arp_seq_fops);
 		if (!p) {
-			printk(KERN_ERR "Unable to initialize "
-			       "/proc/net/atm/arp\n");
+			pr_err("Unable to initialize /proc/net/atm/arp\n");
 			atm_clip_exit_noproc();
 			return -ENOMEM;
 		}
diff --git a/net/atm/common.c b/net/atm/common.c
index d61e051e0a3f..17f7e5f2131b 100644
--- a/net/atm/common.c
+++ b/net/atm/common.c
@@ -2,6 +2,7 @@
 
 /* Written 1995-2000 by Werner Almesberger, EPFL LRC/ICA */
 
+#define pr_fmt(fmt) KBUILD_MODNAME ":%s: " fmt, __func__
 
 #include <linux/module.h>
 #include <linux/kmod.h>
@@ -64,8 +65,7 @@ static struct sk_buff *alloc_tx(struct atm_vcc *vcc,unsigned int size)
 
 	if (sk_wmem_alloc_get(sk) && !atm_may_send(vcc, size)) {
 		pr_debug("Sorry: wmem_alloc = %d, size = %d, sndbuf = %d\n",
-			sk_wmem_alloc_get(sk), size,
-			sk->sk_sndbuf);
+			 sk_wmem_alloc_get(sk), size, sk->sk_sndbuf);
 		return NULL;
 	}
 	while (!(skb = alloc_skb(size, GFP_KERNEL)))
@@ -249,8 +249,7 @@ static int adjust_tp(struct atm_trafprm *tp,unsigned char aal)
 			max_sdu = ATM_MAX_AAL34_PDU;
 			break;
 		default:
-			printk(KERN_WARNING "ATM: AAL problems ... "
-			    "(%d)\n",aal);
+			pr_warning("AAL problems ... (%d)\n", aal);
 			/* fall through */
 		case ATM_AAL5:
 			max_sdu = ATM_MAX_AAL5_PDU;
@@ -385,11 +384,17 @@ static int __vcc_connect(struct atm_vcc *vcc, struct atm_dev *dev, short vpi,
 	if (!error) error = adjust_tp(&vcc->qos.rxtp,vcc->qos.aal);
 	if (error)
 		goto fail;
-	pr_debug("VCC %d.%d, AAL %d\n",vpi,vci,vcc->qos.aal);
-	pr_debug("  TX: %d, PCR %d..%d, SDU %d\n",vcc->qos.txtp.traffic_class,
-	    vcc->qos.txtp.min_pcr,vcc->qos.txtp.max_pcr,vcc->qos.txtp.max_sdu);
-	pr_debug("  RX: %d, PCR %d..%d, SDU %d\n",vcc->qos.rxtp.traffic_class,
-	    vcc->qos.rxtp.min_pcr,vcc->qos.rxtp.max_pcr,vcc->qos.rxtp.max_sdu);
+	pr_debug("VCC %d.%d, AAL %d\n", vpi, vci, vcc->qos.aal);
+	pr_debug("  TX: %d, PCR %d..%d, SDU %d\n",
+		 vcc->qos.txtp.traffic_class,
+		 vcc->qos.txtp.min_pcr,
+		 vcc->qos.txtp.max_pcr,
+		 vcc->qos.txtp.max_sdu);
+	pr_debug("  RX: %d, PCR %d..%d, SDU %d\n",
+		 vcc->qos.rxtp.traffic_class,
+		 vcc->qos.rxtp.min_pcr,
+		 vcc->qos.rxtp.max_pcr,
+		 vcc->qos.rxtp.max_sdu);
 
 	if (dev->ops->open) {
 		if ((error = dev->ops->open(vcc)))
@@ -413,7 +418,7 @@ int vcc_connect(struct socket *sock, int itf, short vpi, int vci)
 	struct atm_vcc *vcc = ATM_SD(sock);
 	int error;
 
-	pr_debug("vcc_connect (vpi %d, vci %d)\n",vpi,vci);
+	pr_debug("(vpi %d, vci %d)\n", vpi, vci);
 	if (sock->state == SS_CONNECTED)
 		return -EISCONN;
 	if (sock->state != SS_UNCONNECTED)
@@ -426,14 +431,15 @@ int vcc_connect(struct socket *sock, int itf, short vpi, int vci)
 	else
 		if (test_bit(ATM_VF_PARTIAL,&vcc->flags))
 			return -EINVAL;
-	pr_debug("vcc_connect (TX: cl %d,bw %d-%d,sdu %d; "
-	    "RX: cl %d,bw %d-%d,sdu %d,AAL %s%d)\n",
-	    vcc->qos.txtp.traffic_class,vcc->qos.txtp.min_pcr,
-	    vcc->qos.txtp.max_pcr,vcc->qos.txtp.max_sdu,
-	    vcc->qos.rxtp.traffic_class,vcc->qos.rxtp.min_pcr,
-	    vcc->qos.rxtp.max_pcr,vcc->qos.rxtp.max_sdu,
-	    vcc->qos.aal == ATM_AAL5 ? "" : vcc->qos.aal == ATM_AAL0 ? "" :
-	    " ??? code ",vcc->qos.aal == ATM_AAL0 ? 0 : vcc->qos.aal);
+	pr_debug("(TX: cl %d,bw %d-%d,sdu %d; "
+		 "RX: cl %d,bw %d-%d,sdu %d,AAL %s%d)\n",
+		 vcc->qos.txtp.traffic_class, vcc->qos.txtp.min_pcr,
+		 vcc->qos.txtp.max_pcr, vcc->qos.txtp.max_sdu,
+		 vcc->qos.rxtp.traffic_class, vcc->qos.rxtp.min_pcr,
+		 vcc->qos.rxtp.max_pcr,vcc->qos.rxtp.max_sdu,
+		 vcc->qos.aal == ATM_AAL5 ? "" :
+		 vcc->qos.aal == ATM_AAL0 ? "" : " ??? code ",
+		 vcc->qos.aal == ATM_AAL0 ? 0 : vcc->qos.aal);
 	if (!test_bit(ATM_VF_HASQOS, &vcc->flags))
 		return -EBADFD;
 	if (vcc->qos.txtp.traffic_class == ATM_ANYCLASS ||
@@ -497,7 +503,7 @@ int vcc_recvmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg,
 	if (error)
 		return error;
 	sock_recv_ts_and_drops(msg, sk, skb);
-	pr_debug("RcvM %d -= %d\n", atomic_read(&sk->sk_rmem_alloc), skb->truesize);
+	pr_debug("%d -= %d\n", atomic_read(&sk->sk_rmem_alloc), skb->truesize);
 	atm_return(vcc, skb->truesize);
 	skb_free_datagram(sk, skb);
 	return copied;
@@ -772,19 +778,19 @@ static int __init atm_init(void)
 		goto out;
 
 	if ((error = atmpvc_init()) < 0) {
-		printk(KERN_ERR "atmpvc_init() failed with %d\n", error);
+		pr_err("atmpvc_init() failed with %d\n", error);
 		goto out_unregister_vcc_proto;
 	}
 	if ((error = atmsvc_init()) < 0) {
-		printk(KERN_ERR "atmsvc_init() failed with %d\n", error);
+		pr_err("atmsvc_init() failed with %d\n", error);
 		goto out_atmpvc_exit;
 	}
 	if ((error = atm_proc_init()) < 0) {
-		printk(KERN_ERR "atm_proc_init() failed with %d\n",error);
+		pr_err("atm_proc_init() failed with %d\n", error);
 		goto out_atmsvc_exit;
 	}
 	if ((error = atm_sysfs_init()) < 0) {
-		printk(KERN_ERR "atm_sysfs_init() failed with %d\n",error);
+		pr_err("atm_sysfs_init() failed with %d\n", error);
 		goto out_atmproc_exit;
 	}
 out:
diff --git a/net/atm/ioctl.c b/net/atm/ioctl.c
index 2ea40995dced..b75afba1f72b 100644
--- a/net/atm/ioctl.c
+++ b/net/atm/ioctl.c
@@ -3,6 +3,7 @@
 /* Written 1995-2000 by Werner Almesberger, EPFL LRC/ICA */
 /* 2003 John Levon  <levon@movementarian.org> */
 
+#define pr_fmt(fmt) KBUILD_MODNAME ":%s: " fmt, __func__
 
 #include <linux/module.h>
 #include <linux/kmod.h>
@@ -97,8 +98,8 @@ static int do_vcc_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg
 			goto done;
 		case ATM_SETSC:
 			if (net_ratelimit())
-				printk(KERN_WARNING "ATM_SETSC is obsolete; used by %s:%d\n",
-				       current->comm, task_pid_nr(current));
+				pr_warning("ATM_SETSC is obsolete; used by %s:%d\n",
+					   current->comm, task_pid_nr(current));
 			error = 0;
 			goto done;
 		case ATMSIGD_CTRL:
@@ -123,7 +124,7 @@ static int do_vcc_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg
 			   to think about it at all. dwmw2. */
 			if (compat) {
 				if (net_ratelimit())
-					printk(KERN_WARNING "32-bit task cannot be atmsigd\n");
+					pr_warning("32-bit task cannot be atmsigd\n");
 				error = -EINVAL;
 				goto done;
 			}
diff --git a/net/atm/lec.c b/net/atm/lec.c
index 42749b7b917c..6873813c3c99 100644
--- a/net/atm/lec.c
+++ b/net/atm/lec.c
@@ -4,6 +4,8 @@
  * Marko Kiiskila <mkiiskila@yahoo.com>
  */
 
+#define pr_fmt(fmt) KBUILD_MODNAME ":%s: " fmt, __func__
+
 #include <linux/kernel.h>
 #include <linux/bitops.h>
 #include <linux/capability.h>
@@ -242,7 +244,7 @@ lec_send(struct atm_vcc *vcc, struct sk_buff *skb)
 
 static void lec_tx_timeout(struct net_device *dev)
 {
-	printk(KERN_INFO "%s: tx timeout\n", dev->name);
+	pr_info("%s\n", dev->name);
 	dev->trans_start = jiffies;
 	netif_wake_queue(dev);
 }
@@ -266,7 +268,7 @@ static netdev_tx_t lec_start_xmit(struct sk_buff *skb,
 	int i = 0;
 #endif /* DUMP_PACKETS >0 */
 
-	pr_debug("lec_start_xmit called\n");
+	pr_debug("called\n");
 	if (!priv->lecd) {
 		printk("%s:No lecd attached\n", dev->name);
 		dev->stats.tx_errors++;
@@ -276,8 +278,8 @@ static netdev_tx_t lec_start_xmit(struct sk_buff *skb,
 	}
 
 	pr_debug("skbuff head:%lx data:%lx tail:%lx end:%lx\n",
-		(long)skb->head, (long)skb->data, (long)skb_tail_pointer(skb),
-		(long)skb_end_pointer(skb));
+		 (long)skb->head, (long)skb->data, (long)skb_tail_pointer(skb),
+		 (long)skb_end_pointer(skb));
 #if defined(CONFIG_BRIDGE) || defined(CONFIG_BRIDGE_MODULE)
 	if (memcmp(skb->data, bridge_ula_lec, sizeof(bridge_ula_lec)) == 0)
 		lec_handle_bridge(skb, dev);
@@ -286,7 +288,7 @@ static netdev_tx_t lec_start_xmit(struct sk_buff *skb,
 	/* Make sure we have room for lec_id */
 	if (skb_headroom(skb) < 2) {
 
-		pr_debug("lec_start_xmit: reallocating skb\n");
+		pr_debug("reallocating skb\n");
 		skb2 = skb_realloc_headroom(skb, LEC_HEADER_LEN);
 		kfree_skb(skb);
 		if (skb2 == NULL)
@@ -367,19 +369,16 @@ static netdev_tx_t lec_start_xmit(struct sk_buff *skb,
 #endif
 	entry = NULL;
 	vcc = lec_arp_resolve(priv, dst, is_rdesc, &entry);
-	pr_debug("%s:vcc:%p vcc_flags:%lx, entry:%p\n", dev->name,
-		vcc, vcc ? vcc->flags : 0, entry);
+	pr_debug("%s:vcc:%p vcc_flags:%lx, entry:%p\n",
+		 dev->name, vcc, vcc ? vcc->flags : 0, entry);
 	if (!vcc || !test_bit(ATM_VF_READY, &vcc->flags)) {
 		if (entry && (entry->tx_wait.qlen < LEC_UNRES_QUE_LEN)) {
-			pr_debug("%s:lec_start_xmit: queuing packet, ",
-				dev->name);
-			pr_debug("MAC address %pM\n", lec_h->h_dest);
+			pr_debug("%s:queuing packet, MAC address %pM\n",
+				 dev->name, lec_h->h_dest);
 			skb_queue_tail(&entry->tx_wait, skb);
 		} else {
-			pr_debug
-			    ("%s:lec_start_xmit: tx queue full or no arp entry, dropping, ",
-			     dev->name);
-			pr_debug("MAC address %pM\n", lec_h->h_dest);
+			pr_debug("%s:tx queue full or no arp entry, dropping, MAC address: %pM\n",
+				 dev->name, lec_h->h_dest);
 			dev->stats.tx_dropped++;
 			dev_kfree_skb(skb);
 		}
@@ -390,8 +389,7 @@ static netdev_tx_t lec_start_xmit(struct sk_buff *skb,
 #endif /* DUMP_PACKETS > 0 */
 
 	while (entry && (skb2 = skb_dequeue(&entry->tx_wait))) {
-		pr_debug("lec.c: emptying tx queue, ");
-		pr_debug("MAC address %pM\n", lec_h->h_dest);
+		pr_debug("emptying tx queue, MAC address %pM\n", lec_h->h_dest);
 		lec_send(vcc, skb2);
 	}
 
@@ -477,10 +475,10 @@ static int lec_atm_send(struct atm_vcc *vcc, struct sk_buff *skb)
 			       mesg->content.normal.atm_addr,
 			       mesg->content.normal.flag,
 			       mesg->content.normal.targetless_le_arp);
-		pr_debug("lec: in l_arp_update\n");
+		pr_debug("in l_arp_update\n");
 		if (mesg->sizeoftlvs != 0) {	/* LANE2 3.1.5 */
-			pr_debug("lec: LANE2 3.1.5, got tlvs, size %d\n",
-				mesg->sizeoftlvs);
+			pr_debug("LANE2 3.1.5, got tlvs, size %d\n",
+				 mesg->sizeoftlvs);
 			lane2_associate_ind(dev, mesg->content.normal.mac_addr,
 					    tmp, mesg->sizeoftlvs);
 		}
@@ -531,9 +529,8 @@ static int lec_atm_send(struct atm_vcc *vcc, struct sk_buff *skb)
 				struct sk_buff *skb2;
 				struct sock *sk;
 
-				pr_debug
-				    ("%s: entry found, responding to zeppelin\n",
-				     dev->name);
+				pr_debug("%s: entry found, responding to zeppelin\n",
+					 dev->name);
 				skb2 =
 				    alloc_skb(sizeof(struct atmlec_msg),
 					      GFP_ATOMIC);
@@ -633,7 +630,7 @@ send_to_lecd(struct lec_priv *priv, atmlec_msg_type type,
 	sk->sk_data_ready(sk, skb->len);
 
 	if (data != NULL) {
-		pr_debug("lec: about to send %d bytes of data\n", data->len);
+		pr_debug("about to send %d bytes of data\n", data->len);
 		atm_force_charge(priv->lecd, data->truesize);
 		skb_queue_tail(&sk->sk_receive_queue, data);
 		sk->sk_data_ready(sk, skb->len);
@@ -692,35 +689,27 @@ static void lec_push(struct atm_vcc *vcc, struct sk_buff *skb)
 	struct lec_priv *priv = netdev_priv(dev);
 
 #if DUMP_PACKETS >0
-	int i = 0;
-	char buf[300];
-
-	printk("%s: lec_push vcc vpi:%d vci:%d\n", dev->name,
-	       vcc->vpi, vcc->vci);
+	printk(KERN_DEBUG "%s: vcc vpi:%d vci:%d\n",
+	       dev->name, vcc->vpi, vcc->vci);
 #endif
 	if (!skb) {
 		pr_debug("%s: null skb\n", dev->name);
 		lec_vcc_close(priv, vcc);
 		return;
 	}
-#if DUMP_PACKETS > 0
-	printk("%s: rcv datalen:%ld lecid:%4.4x\n", dev->name,
-	       skb->len, priv->lecid);
 #if DUMP_PACKETS >= 2
-	for (i = 0; i < skb->len && i < 99; i++) {
-		sprintf(buf + i * 3, "%2.2x ", 0xff & skb->data[i]);
-	}
+#define MAX_SKB_DUMP 99
 #elif DUMP_PACKETS >= 1
-	for (i = 0; i < skb->len && i < 30; i++) {
-		sprintf(buf + i * 3, "%2.2x ", 0xff & skb->data[i]);
-	}
-#endif /* DUMP_PACKETS >= 1 */
-	if (i == skb->len)
-		printk("%s\n", buf);
-	else
-		printk("%s...\n", buf);
+#define MAX_SKB_DUMP 30
+#endif
+#if DUMP_PACKETS > 0
+	printk(KERN_DEBUG "%s: rcv datalen:%ld lecid:%4.4x\n",
+	       dev->name, skb->len, priv->lecid);
+	print_hex_dump(KERN_DEBUG, "", DUMP_OFFSET, 16, 1,
+		       skb->data, min(MAX_SKB_DUMP, skb->len), true);
 #endif /* DUMP_PACKETS > 0 */
-	if (memcmp(skb->data, lec_ctrl_magic, 4) == 0) {	/* Control frame, to daemon */
+	if (memcmp(skb->data, lec_ctrl_magic, 4) == 0) {
+				/* Control frame, to daemon */
 		struct sock *sk = sk_atm(vcc);
 
 		pr_debug("%s: To daemon\n", dev->name);
@@ -801,7 +790,7 @@ static void lec_pop(struct atm_vcc *vcc, struct sk_buff *skb)
 	struct net_device *dev = skb->dev;
 
 	if (vpriv == NULL) {
-		printk("lec_pop(): vpriv = NULL!?!?!?\n");
+		pr_info("vpriv = NULL!?!?!?\n");
 		return;
 	}
 
@@ -822,11 +811,8 @@ static int lec_vcc_attach(struct atm_vcc *vcc, void __user *arg)
 
 	/* Lecd must be up in this case */
 	bytes_left = copy_from_user(&ioc_data, arg, sizeof(struct atmlec_ioc));
-	if (bytes_left != 0) {
-		printk
-		    ("lec: lec_vcc_attach, copy from user failed for %d bytes\n",
-		     bytes_left);
-	}
+	if (bytes_left != 0)
+		pr_info("copy from user failed for %d bytes\n", bytes_left);
 	if (ioc_data.dev_num < 0 || ioc_data.dev_num >= MAX_LEC_ITF ||
 	    !dev_lec[ioc_data.dev_num])
 		return -EINVAL;
@@ -1199,7 +1185,7 @@ static int __init lane_module_init(void)
 
 	p = proc_create("lec", S_IRUGO, atm_proc_root, &lec_seq_fops);
 	if (!p) {
-		printk(KERN_ERR "Unable to initialize /proc/net/atm/lec\n");
+		pr_err("Unable to initialize /proc/net/atm/lec\n");
 		return -ENOMEM;
 	}
 #endif
@@ -1381,7 +1367,7 @@ static void lane2_associate_ind(struct net_device *dev, const u8 *mac_addr,
 #if 0
 #define pr_debug(format,args...)
 /*
-#define pr_debug printk
+  #define pr_debug printk
 */
 #endif
 #define DEBUG_ARP_TABLE 0
@@ -1450,10 +1436,7 @@ lec_arp_add(struct lec_priv *priv, struct lec_arp_table *entry)
 	tmp = &priv->lec_arp_tables[HASH(entry->mac_addr[ETH_ALEN - 1])];
 	hlist_add_head(&entry->next, tmp);
 
-	pr_debug("LEC_ARP: Added entry:%2.2x %2.2x %2.2x %2.2x %2.2x %2.2x\n",
-		0xff & entry->mac_addr[0], 0xff & entry->mac_addr[1],
-		0xff & entry->mac_addr[2], 0xff & entry->mac_addr[3],
-		0xff & entry->mac_addr[4], 0xff & entry->mac_addr[5]);
+	pr_debug("Added entry:%pM\n", entry->mac_addr);
 }
 
 /*
@@ -1492,10 +1475,7 @@ lec_arp_remove(struct lec_priv *priv, struct lec_arp_table *to_remove)
 	}
 	skb_queue_purge(&to_remove->tx_wait);	/* FIXME: good place for this? */
 
-	pr_debug("LEC_ARP: Removed entry:%2.2x %2.2x %2.2x %2.2x %2.2x %2.2x\n",
-		0xff & to_remove->mac_addr[0], 0xff & to_remove->mac_addr[1],
-		0xff & to_remove->mac_addr[2], 0xff & to_remove->mac_addr[3],
-		0xff & to_remove->mac_addr[4], 0xff & to_remove->mac_addr[5]);
+	pr_debug("Removed entry:%pM\n", to_remove->mac_addr);
 	return 0;
 }
 
@@ -1714,9 +1694,7 @@ static struct lec_arp_table *lec_arp_find(struct lec_priv *priv,
 	struct hlist_head *head;
 	struct lec_arp_table *entry;
 
-	pr_debug("LEC_ARP: lec_arp_find :%2.2x %2.2x %2.2x %2.2x %2.2x %2.2x\n",
-		mac_addr[0] & 0xff, mac_addr[1] & 0xff, mac_addr[2] & 0xff,
-		mac_addr[3] & 0xff, mac_addr[4] & 0xff, mac_addr[5] & 0xff);
+	pr_debug("%pM\n", mac_addr);
 
 	head = &priv->lec_arp_tables[HASH(mac_addr[ETH_ALEN - 1])];
 	hlist_for_each_entry(entry, node, head, next) {
@@ -1755,7 +1733,7 @@ static void lec_arp_expire_arp(unsigned long data)
 
 	entry = (struct lec_arp_table *)data;
 
-	pr_debug("lec_arp_expire_arp\n");
+	pr_debug("\n");
 	if (entry->status == ESI_ARP_PENDING) {
 		if (entry->no_tries <= entry->priv->max_retry_count) {
 			if (entry->is_rdesc)
@@ -1779,10 +1757,10 @@ static void lec_arp_expire_vcc(unsigned long data)
 
 	del_timer(&to_remove->timer);
 
-	pr_debug("LEC_ARP %p %p: lec_arp_expire_vcc vpi:%d vci:%d\n",
-		to_remove, priv,
-		to_remove->vcc ? to_remove->recv_vcc->vpi : 0,
-		to_remove->vcc ? to_remove->recv_vcc->vci : 0);
+	pr_debug("%p %p: vpi:%d vci:%d\n",
+		 to_remove, priv,
+		 to_remove->vcc ? to_remove->recv_vcc->vpi : 0,
+		 to_remove->vcc ? to_remove->recv_vcc->vci : 0);
 
 	spin_lock_irqsave(&priv->lec_arp_lock, flags);
 	hlist_del(&to_remove->next);
@@ -1819,7 +1797,7 @@ static void lec_arp_check_expire(struct work_struct *work)
 	unsigned long time_to_check;
 	int i;
 
-	pr_debug("lec_arp_check_expire %p\n", priv);
+	pr_debug("%p\n", priv);
 	now = jiffies;
 restart:
 	spin_lock_irqsave(&priv->lec_arp_lock, flags);
@@ -1832,12 +1810,12 @@ restart:
 				time_to_check = priv->aging_time;
 
 			pr_debug("About to expire: %lx - %lx > %lx\n",
-				now, entry->last_used, time_to_check);
+				 now, entry->last_used, time_to_check);
 			if (time_after(now, entry->last_used + time_to_check)
 			    && !(entry->flags & LEC_PERMANENT_FLAG)
 			    && !(entry->mac_addr[0] & 0x01)) {	/* LANE2: 7.1.20 */
 				/* Remove entry */
-				pr_debug("LEC:Entry timed out\n");
+				pr_debug("Entry timed out\n");
 				lec_arp_remove(priv, entry);
 				lec_arp_put(entry);
 			} else {
@@ -1934,7 +1912,7 @@ static struct atm_vcc *lec_arp_resolve(struct lec_priv *priv,
 		    entry->packets_flooded <
 		    priv->maximum_unknown_frame_count) {
 			entry->packets_flooded++;
-			pr_debug("LEC_ARP: Flooding..\n");
+			pr_debug("Flooding..\n");
 			found = priv->mcast_vcc;
 			goto out;
 		}
@@ -1945,13 +1923,13 @@ static struct atm_vcc *lec_arp_resolve(struct lec_priv *priv,
 		 */
 		lec_arp_hold(entry);
 		*ret_entry = entry;
-		pr_debug("lec: entry->status %d entry->vcc %p\n", entry->status,
-			entry->vcc);
+		pr_debug("entry->status %d entry->vcc %p\n", entry->status,
+			 entry->vcc);
 		found = NULL;
 	} else {
 		/* No matching entry was found */
 		entry = make_entry(priv, mac_to_find);
-		pr_debug("LEC_ARP: Making entry\n");
+		pr_debug("Making entry\n");
 		if (!entry) {
 			found = priv->mcast_vcc;
 			goto out;
@@ -1988,7 +1966,7 @@ lec_addr_delete(struct lec_priv *priv, const unsigned char *atm_addr,
 	struct lec_arp_table *entry;
 	int i;
 
-	pr_debug("lec_addr_delete\n");
+	pr_debug("\n");
 	spin_lock_irqsave(&priv->lec_arp_lock, flags);
 	for (i = 0; i < LEC_ARP_TABLE_SIZE; i++) {
 		hlist_for_each_entry_safe(entry, node, next, &priv->lec_arp_tables[i], next) {
@@ -2019,10 +1997,8 @@ lec_arp_update(struct lec_priv *priv, const unsigned char *mac_addr,
 	struct lec_arp_table *entry, *tmp;
 	int i;
 
-	pr_debug("lec:%s", (targetless_le_arp) ? "targetless " : " ");
-	pr_debug("lec_arp_update mac:%2.2x%2.2x%2.2x%2.2x%2.2x%2.2x\n",
-		mac_addr[0], mac_addr[1], mac_addr[2], mac_addr[3],
-		mac_addr[4], mac_addr[5]);
+	pr_debug("%smac:%pM\n",
+		 (targetless_le_arp) ? "targetless " : "", mac_addr);
 
 	spin_lock_irqsave(&priv->lec_arp_lock, flags);
 	entry = lec_arp_find(priv, mac_addr);
@@ -2149,19 +2125,17 @@ lec_vcc_added(struct lec_priv *priv, const struct atmlec_ioc *ioc_data,
 		 * Vcc which we don't want to make default vcc,
 		 * attach it anyway.
 		 */
-		pr_debug
-		    ("LEC_ARP:Attaching data direct, not default: "
-		     "%2.2x%2.2x%2.2x%2.2x%2.2x%2.2x%2.2x%2.2x%2.2x%2.2x%2.2x%2.2x%2.2x%2.2x%2.2x%2.2x%2.2x%2.2x%2.2x%2.2x\n",
-		     ioc_data->atm_addr[0], ioc_data->atm_addr[1],
-		     ioc_data->atm_addr[2], ioc_data->atm_addr[3],
-		     ioc_data->atm_addr[4], ioc_data->atm_addr[5],
-		     ioc_data->atm_addr[6], ioc_data->atm_addr[7],
-		     ioc_data->atm_addr[8], ioc_data->atm_addr[9],
-		     ioc_data->atm_addr[10], ioc_data->atm_addr[11],
-		     ioc_data->atm_addr[12], ioc_data->atm_addr[13],
-		     ioc_data->atm_addr[14], ioc_data->atm_addr[15],
-		     ioc_data->atm_addr[16], ioc_data->atm_addr[17],
-		     ioc_data->atm_addr[18], ioc_data->atm_addr[19]);
+		pr_debug("LEC_ARP:Attaching data direct, not default: %2.2x%2.2x%2.2x%2.2x%2.2x%2.2x%2.2x%2.2x%2.2x%2.2x%2.2x%2.2x%2.2x%2.2x%2.2x%2.2x%2.2x%2.2x%2.2x%2.2x\n",
+			 ioc_data->atm_addr[0], ioc_data->atm_addr[1],
+			 ioc_data->atm_addr[2], ioc_data->atm_addr[3],
+			 ioc_data->atm_addr[4], ioc_data->atm_addr[5],
+			 ioc_data->atm_addr[6], ioc_data->atm_addr[7],
+			 ioc_data->atm_addr[8], ioc_data->atm_addr[9],
+			 ioc_data->atm_addr[10], ioc_data->atm_addr[11],
+			 ioc_data->atm_addr[12], ioc_data->atm_addr[13],
+			 ioc_data->atm_addr[14], ioc_data->atm_addr[15],
+			 ioc_data->atm_addr[16], ioc_data->atm_addr[17],
+			 ioc_data->atm_addr[18], ioc_data->atm_addr[19]);
 		entry = make_entry(priv, bus_mac);
 		if (entry == NULL)
 			goto out;
@@ -2177,19 +2151,17 @@ lec_vcc_added(struct lec_priv *priv, const struct atmlec_ioc *ioc_data,
 		dump_arp_table(priv);
 		goto out;
 	}
-	pr_debug
-	    ("LEC_ARP:Attaching data direct, default: "
-	     "%2.2x%2.2x%2.2x%2.2x%2.2x%2.2x%2.2x%2.2x%2.2x%2.2x%2.2x%2.2x%2.2x%2.2x%2.2x%2.2x%2.2x%2.2x%2.2x%2.2x\n",
-	     ioc_data->atm_addr[0], ioc_data->atm_addr[1],
-	     ioc_data->atm_addr[2], ioc_data->atm_addr[3],
-	     ioc_data->atm_addr[4], ioc_data->atm_addr[5],
-	     ioc_data->atm_addr[6], ioc_data->atm_addr[7],
-	     ioc_data->atm_addr[8], ioc_data->atm_addr[9],
-	     ioc_data->atm_addr[10], ioc_data->atm_addr[11],
-	     ioc_data->atm_addr[12], ioc_data->atm_addr[13],
-	     ioc_data->atm_addr[14], ioc_data->atm_addr[15],
-	     ioc_data->atm_addr[16], ioc_data->atm_addr[17],
-	     ioc_data->atm_addr[18], ioc_data->atm_addr[19]);
+	pr_debug("LEC_ARP:Attaching data direct, default: %2.2x%2.2x%2.2x%2.2x%2.2x%2.2x%2.2x%2.2x%2.2x%2.2x%2.2x%2.2x%2.2x%2.2x%2.2x%2.2x%2.2x%2.2x%2.2x%2.2x\n",
+		 ioc_data->atm_addr[0], ioc_data->atm_addr[1],
+		 ioc_data->atm_addr[2], ioc_data->atm_addr[3],
+		 ioc_data->atm_addr[4], ioc_data->atm_addr[5],
+		 ioc_data->atm_addr[6], ioc_data->atm_addr[7],
+		 ioc_data->atm_addr[8], ioc_data->atm_addr[9],
+		 ioc_data->atm_addr[10], ioc_data->atm_addr[11],
+		 ioc_data->atm_addr[12], ioc_data->atm_addr[13],
+		 ioc_data->atm_addr[14], ioc_data->atm_addr[15],
+		 ioc_data->atm_addr[16], ioc_data->atm_addr[17],
+		 ioc_data->atm_addr[18], ioc_data->atm_addr[19]);
 	for (i = 0; i < LEC_ARP_TABLE_SIZE; i++) {
 		hlist_for_each_entry(entry, node, &priv->lec_arp_tables[i], next) {
 			if (memcmp
@@ -2197,9 +2169,9 @@ lec_vcc_added(struct lec_priv *priv, const struct atmlec_ioc *ioc_data,
 			     ATM_ESA_LEN) == 0) {
 				pr_debug("LEC_ARP: Attaching data direct\n");
 				pr_debug("Currently -> Vcc: %d, Rvcc:%d\n",
-					entry->vcc ? entry->vcc->vci : 0,
-					entry->recv_vcc ? entry->recv_vcc->
-					vci : 0);
+					 entry->vcc ? entry->vcc->vci : 0,
+					 entry->recv_vcc ? entry->recv_vcc->
+					 vci : 0);
 				found_entry = 1;
 				del_timer(&entry->timer);
 				entry->vcc = vcc;
@@ -2271,7 +2243,7 @@ static void lec_flush_complete(struct lec_priv *priv, unsigned long tran_id)
 	struct lec_arp_table *entry;
 	int i;
 
-	pr_debug("LEC:lec_flush_complete %lx\n", tran_id);
+	pr_debug("%lx\n", tran_id);
 restart:
 	spin_lock_irqsave(&priv->lec_arp_lock, flags);
 	for (i = 0; i < LEC_ARP_TABLE_SIZE; i++) {
@@ -2312,7 +2284,7 @@ lec_set_flush_tran_id(struct lec_priv *priv,
 			if (!memcmp(atm_addr, entry->atm_addr, ATM_ESA_LEN)) {
 				entry->flush_tran_id = tran_id;
 				pr_debug("Set flush transaction id to %lx for %p\n",
-					tran_id, entry);
+					 tran_id, entry);
 			}
 		}
 	spin_unlock_irqrestore(&priv->lec_arp_lock, flags);
diff --git a/net/atm/mpc.c b/net/atm/mpc.c
index 38a6cb0863f0..3c45aef47b7e 100644
--- a/net/atm/mpc.c
+++ b/net/atm/mpc.c
@@ -1,3 +1,5 @@
+#define pr_fmt(fmt) KBUILD_MODNAME ":%s: " fmt, __func__
+
 #include <linux/kernel.h>
 #include <linux/string.h>
 #include <linux/timer.h>
@@ -1447,7 +1449,7 @@ static __init int atm_mpoa_init(void)
 	register_atm_ioctl(&atm_ioctl_ops);
 
 	if (mpc_proc_init() != 0)
-		printk(KERN_INFO "mpoa: failed to initialize /proc/mpoa\n");
+		pr_info("failed to initialize /proc/mpoa\n");
 
 	printk("mpc.c: " __DATE__ " " __TIME__ " initialized\n");
 
diff --git a/net/atm/mpoa_proc.c b/net/atm/mpoa_proc.c
index 1a0f5ccea9c4..0603ab478cf5 100644
--- a/net/atm/mpoa_proc.c
+++ b/net/atm/mpoa_proc.c
@@ -1,3 +1,4 @@
+#define pr_fmt(fmt) KBUILD_MODNAME ":%s: " fmt, __func__
 
 #ifdef CONFIG_PROC_FS
 #include <linux/errno.h>
@@ -278,7 +279,7 @@ int mpc_proc_init(void)
 
 	p = proc_create(STAT_FILE_NAME, 0, atm_proc_root, &mpc_file_operations);
 	if (!p) {
-		printk(KERN_ERR "Unable to initialize /proc/atm/%s\n", STAT_FILE_NAME);
+		pr_err("Unable to initialize /proc/atm/%s\n", STAT_FILE_NAME);
 		return -ENOMEM;
 	}
 	return 0;
diff --git a/net/atm/pppoatm.c b/net/atm/pppoatm.c
index 0af84cd4f65b..62db6d71dbab 100644
--- a/net/atm/pppoatm.c
+++ b/net/atm/pppoatm.c
@@ -33,6 +33,8 @@
  * These hooks are not yet available in ppp_generic
  */
 
+#define pr_fmt(fmt) KBUILD_MODNAME ":%s: " fmt, __func__
+
 #include <linux/module.h>
 #include <linux/init.h>
 #include <linux/skbuff.h>
@@ -132,7 +134,7 @@ static void pppoatm_unassign_vcc(struct atm_vcc *atmvcc)
 static void pppoatm_push(struct atm_vcc *atmvcc, struct sk_buff *skb)
 {
 	struct pppoatm_vcc *pvcc = atmvcc_to_pvcc(atmvcc);
-	pr_debug("pppoatm push\n");
+	pr_debug("\n");
 	if (skb == NULL) {			/* VCC was closed */
 		pr_debug("removing ATMPPP VCC %p\n", pvcc);
 		pppoatm_unassign_vcc(atmvcc);
@@ -165,10 +167,9 @@ static void pppoatm_push(struct atm_vcc *atmvcc, struct sk_buff *skb)
 			pvcc->chan.mtu += LLC_LEN;
 			break;
 		}
-		pr_debug("Couldn't autodetect yet "
-		    "(skb: %02X %02X %02X %02X %02X %02X)\n",
-		    skb->data[0], skb->data[1], skb->data[2],
-		    skb->data[3], skb->data[4], skb->data[5]);
+		pr_debug("Couldn't autodetect yet (skb: %02X %02X %02X %02X %02X %02X)\n",
+			 skb->data[0], skb->data[1], skb->data[2],
+			 skb->data[3], skb->data[4], skb->data[5]);
 		goto error;
 	case e_vc:
 		break;
@@ -194,7 +195,7 @@ static int pppoatm_send(struct ppp_channel *chan, struct sk_buff *skb)
 {
 	struct pppoatm_vcc *pvcc = chan_to_pvcc(chan);
 	ATM_SKB(skb)->vcc = pvcc->atmvcc;
-	pr_debug("pppoatm_send (skb=0x%p, vcc=0x%p)\n", skb, pvcc->atmvcc);
+	pr_debug("(skb=0x%p, vcc=0x%p)\n", skb, pvcc->atmvcc);
 	if (skb->data[0] == '\0' && (pvcc->flags & SC_COMP_PROT))
 		(void) skb_pull(skb, 1);
 	switch (pvcc->encaps) {		/* LLC encapsulation needed */
@@ -226,8 +227,8 @@ static int pppoatm_send(struct ppp_channel *chan, struct sk_buff *skb)
 
 	atomic_add(skb->truesize, &sk_atm(ATM_SKB(skb)->vcc)->sk_wmem_alloc);
 	ATM_SKB(skb)->atm_options = ATM_SKB(skb)->vcc->atm_options;
-	pr_debug("atm_skb(%p)->vcc(%p)->dev(%p)\n", skb, ATM_SKB(skb)->vcc,
-	    ATM_SKB(skb)->vcc->dev);
+	pr_debug("atm_skb(%p)->vcc(%p)->dev(%p)\n",
+		 skb, ATM_SKB(skb)->vcc, ATM_SKB(skb)->vcc->dev);
 	return ATM_SKB(skb)->vcc->send(ATM_SKB(skb)->vcc, skb)
 	    ? DROP_PACKET : 1;
     nospace:
diff --git a/net/atm/raw.c b/net/atm/raw.c
index cbfcc71a17b1..fb8a9497653c 100644
--- a/net/atm/raw.c
+++ b/net/atm/raw.c
@@ -2,6 +2,7 @@
 
 /* Written 1995-2000 by Werner Almesberger, EPFL LRC/ICA */
 
+#define pr_fmt(fmt) KBUILD_MODNAME ":%s: " fmt, __func__
 
 #include <linux/module.h>
 #include <linux/atmdev.h>
@@ -32,8 +33,8 @@ static void atm_pop_raw(struct atm_vcc *vcc,struct sk_buff *skb)
 {
 	struct sock *sk = sk_atm(vcc);
 
-	pr_debug("APopR (%d) %d -= %d\n", vcc->vci,
-		sk_wmem_alloc_get(sk), skb->truesize);
+	pr_debug("(%d) %d -= %d\n",
+		 vcc->vci, sk_wmem_alloc_get(sk), skb->truesize);
 	atomic_sub(skb->truesize, &sk->sk_wmem_alloc);
 	dev_kfree_skb_any(skb);
 	sk->sk_write_space(sk);
diff --git a/net/atm/resources.c b/net/atm/resources.c
index 56b7322ff461..0d4c0ee090db 100644
--- a/net/atm/resources.c
+++ b/net/atm/resources.c
@@ -7,6 +7,7 @@
  * 2002/01 - don't free the whole struct sock on sk->destruct time,
  * 	     use the default destruct function initialized by sock_init_data */
 
+#define pr_fmt(fmt) KBUILD_MODNAME ":%s: " fmt, __func__
 
 #include <linux/ctype.h>
 #include <linux/string.h>
@@ -79,8 +80,7 @@ struct atm_dev *atm_dev_register(const char *type, const struct atmdev_ops *ops,
 
 	dev = __alloc_atm_dev(type);
 	if (!dev) {
-		printk(KERN_ERR "atm_dev_register: no space for dev %s\n",
-		    type);
+		pr_err("no space for dev %s\n", type);
 		return NULL;
 	}
 	mutex_lock(&atm_dev_mutex);
@@ -109,16 +109,12 @@ struct atm_dev *atm_dev_register(const char *type, const struct atmdev_ops *ops,
 	atomic_set(&dev->refcnt, 1);
 
 	if (atm_proc_dev_register(dev) < 0) {
-		printk(KERN_ERR "atm_dev_register: "
-		       "atm_proc_dev_register failed for dev %s\n",
-		       type);
+		pr_err("atm_proc_dev_register failed for dev %s\n", type);
 		goto out_fail;
 	}
 
 	if (atm_register_sysfs(dev) < 0) {
-		printk(KERN_ERR "atm_dev_register: "
-		       "atm_register_sysfs failed for dev %s\n",
-		       type);
+		pr_err("atm_register_sysfs failed for dev %s\n", type);
 		atm_proc_dev_deregister(dev);
 		goto out_fail;
 	}
diff --git a/net/atm/signaling.c b/net/atm/signaling.c
index 229921400522..28df4edf9ca4 100644
--- a/net/atm/signaling.c
+++ b/net/atm/signaling.c
@@ -2,6 +2,7 @@
 
 /* Written 1995-2000 by Werner Almesberger, EPFL LRC/ICA */
 
+#define pr_fmt(fmt) KBUILD_MODNAME ":%s: " fmt, __func__
 
 #include <linux/errno.h>	/* error codes */
 #include <linux/kernel.h>	/* printk */
@@ -37,14 +38,14 @@ static void sigd_put_skb(struct sk_buff *skb)
 	add_wait_queue(&sigd_sleep,&wait);
 	while (!sigd) {
 		set_current_state(TASK_UNINTERRUPTIBLE);
-		pr_debug("atmsvc: waiting for signaling demon...\n");
+		pr_debug("atmsvc: waiting for signaling daemon...\n");
 		schedule();
 	}
 	current->state = TASK_RUNNING;
 	remove_wait_queue(&sigd_sleep,&wait);
 #else
 	if (!sigd) {
-		pr_debug("atmsvc: no signaling demon\n");
+		pr_debug("atmsvc: no signaling daemon\n");
 		kfree_skb(skb);
 		return;
 	}
@@ -90,8 +91,7 @@ static int sigd_send(struct atm_vcc *vcc,struct sk_buff *skb)
 	msg = (struct atmsvc_msg *) skb->data;
 	atomic_sub(skb->truesize, &sk_atm(vcc)->sk_wmem_alloc);
 	vcc = *(struct atm_vcc **) &msg->vcc;
-	pr_debug("sigd_send %d (0x%lx)\n",(int) msg->type,
-	  (unsigned long) vcc);
+	pr_debug("%d (0x%lx)\n", (int)msg->type, (unsigned long)vcc);
 	sk = sk_atm(vcc);
 
 	switch (msg->type) {
@@ -150,8 +150,7 @@ as_indicate_complete:
 			clear_bit(ATM_VF_WAITING, &vcc->flags);
 			break;
 		default:
-			printk(KERN_ALERT "sigd_send: bad message type %d\n",
-			    (int) msg->type);
+			pr_alert("bad message type %d\n", (int)msg->type);
 			return -EINVAL;
 	}
 	sk->sk_state_change(sk);
@@ -169,7 +168,7 @@ void sigd_enq2(struct atm_vcc *vcc,enum atmsvc_msg_type type,
 	struct atmsvc_msg *msg;
 	static unsigned session = 0;
 
-	pr_debug("sigd_enq %d (0x%p)\n",(int) type,vcc);
+	pr_debug("%d (0x%p)\n", (int)type, vcc);
 	while (!(skb = alloc_skb(sizeof(struct atmsvc_msg),GFP_KERNEL)))
 		schedule();
 	msg = (struct atmsvc_msg *) skb_put(skb,sizeof(struct atmsvc_msg));
@@ -219,10 +218,10 @@ static void sigd_close(struct atm_vcc *vcc)
 	struct sock *s;
 	int i;
 
-	pr_debug("sigd_close\n");
+	pr_debug("\n");
 	sigd = NULL;
 	if (skb_peek(&sk_atm(vcc)->sk_receive_queue))
-		printk(KERN_ERR "sigd_close: closing with requests pending\n");
+		pr_err("closing with requests pending\n");
 	skb_queue_purge(&sk_atm(vcc)->sk_receive_queue);
 
 	read_lock(&vcc_sklist_lock);
@@ -256,7 +255,7 @@ static struct atm_dev sigd_dev = {
 int sigd_attach(struct atm_vcc *vcc)
 {
 	if (sigd) return -EADDRINUSE;
-	pr_debug("sigd_attach\n");
+	pr_debug("\n");
 	sigd = vcc;
 	vcc->dev = &sigd_dev;
 	vcc_insert_socket(sk_atm(vcc));
diff --git a/net/atm/svc.c b/net/atm/svc.c
index 66e1d9b3e5de..251ddbc42e4b 100644
--- a/net/atm/svc.c
+++ b/net/atm/svc.c
@@ -2,6 +2,7 @@
 
 /* Written 1995-2000 by Werner Almesberger, EPFL LRC/ICA */
 
+#define pr_fmt(fmt) KBUILD_MODNAME ":%s: " fmt, __func__
 
 #include <linux/string.h>
 #include <linux/net.h>		/* struct socket, struct proto_ops */
@@ -46,7 +47,7 @@ static void svc_disconnect(struct atm_vcc *vcc)
 	struct sk_buff *skb;
 	struct sock *sk = sk_atm(vcc);
 
-	pr_debug("svc_disconnect %p\n",vcc);
+	pr_debug("%p\n",vcc);
 	if (test_bit(ATM_VF_REGIS,&vcc->flags)) {
 		prepare_to_wait(sk->sk_sleep, &wait, TASK_UNINTERRUPTIBLE);
 		sigd_enq(vcc,as_close,NULL,NULL,NULL);
@@ -76,7 +77,7 @@ static int svc_release(struct socket *sock)
 
 	if (sk)  {
 		vcc = ATM_SD(sock);
-		pr_debug("svc_release %p\n", vcc);
+		pr_debug("%p\n", vcc);
 		clear_bit(ATM_VF_READY, &vcc->flags);
 		/* VCC pointer is used as a reference, so we must not free it
 		   (thereby subjecting it to re-use) before all pending connections
@@ -153,7 +154,7 @@ static int svc_connect(struct socket *sock,struct sockaddr *sockaddr,
 	struct atm_vcc *vcc = ATM_SD(sock);
 	int error;
 
-	pr_debug("svc_connect %p\n",vcc);
+	pr_debug("%p\n",vcc);
 	lock_sock(sk);
 	if (sockaddr_len != sizeof(struct sockaddr_atmsvc)) {
 		error = -EINVAL;
@@ -286,7 +287,7 @@ static int svc_listen(struct socket *sock,int backlog)
 	struct atm_vcc *vcc = ATM_SD(sock);
 	int error;
 
-	pr_debug("svc_listen %p\n",vcc);
+	pr_debug("%p\n", vcc);
 	lock_sock(sk);
 	/* let server handle listen on unbound sockets */
 	if (test_bit(ATM_VF_SESSION,&vcc->flags)) {
@@ -336,7 +337,7 @@ static int svc_accept(struct socket *sock,struct socket *newsock,int flags)
 
 	new_vcc = ATM_SD(newsock);
 
-	pr_debug("svc_accept %p -> %p\n",old_vcc,new_vcc);
+	pr_debug("%p -> %p\n", old_vcc, new_vcc);
 	while (1) {
 		DEFINE_WAIT(wait);
 
@@ -540,7 +541,7 @@ static int svc_addparty(struct socket *sock, struct sockaddr *sockaddr,
 		error = -EINPROGRESS;
 		goto out;
 	}
-	pr_debug("svc_addparty added wait queue\n");
+	pr_debug("added wait queue\n");
 	while (test_bit(ATM_VF_WAITING, &vcc->flags) && sigd) {
 		schedule();
 		prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE);
-- 
cgit v1.2.2


From c39f01d7883f944ae75961cc1a31d348c7970599 Mon Sep 17 00:00:00 2001
From: Joe Perches <joe@perches.com>
Date: Tue, 26 Jan 2010 11:40:01 +0000
Subject: net/atm/addr.c: Convert include <asm to include <linux

Signed-off-by: Joe Perches <joe@perches.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/atm/addr.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/atm/addr.c b/net/atm/addr.c
index 82e85abc303d..cf3ae8b47572 100644
--- a/net/atm/addr.c
+++ b/net/atm/addr.c
@@ -4,7 +4,7 @@
 
 #include <linux/atm.h>
 #include <linux/atmdev.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 
 #include "signaling.h"
 #include "addr.h"
-- 
cgit v1.2.2


From 3356b4d41605f9b0977b0963c65f466eee213fe9 Mon Sep 17 00:00:00 2001
From: Joe Perches <joe@perches.com>
Date: Tue, 26 Jan 2010 11:40:02 +0000
Subject: net/atm/atm_misc.c: checkpatch cleanups

Moved EXPORT_SYMBOL to follow definition
Add space after commas

Signed-off-by: Joe Perches <joe@perches.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/atm/atm_misc.c | 40 ++++++++++++++++------------------------
 1 file changed, 16 insertions(+), 24 deletions(-)

(limited to 'net')

diff --git a/net/atm/atm_misc.c b/net/atm/atm_misc.c
index 02cc7e71efea..fc63526d8695 100644
--- a/net/atm/atm_misc.c
+++ b/net/atm/atm_misc.c
@@ -2,37 +2,35 @@
 
 /* Written 1995-2000 by Werner Almesberger, EPFL ICA */
 
-
 #include <linux/module.h>
 #include <linux/atm.h>
 #include <linux/atmdev.h>
 #include <linux/skbuff.h>
 #include <linux/sonet.h>
 #include <linux/bitops.h>
+#include <linux/errno.h>
 #include <asm/atomic.h>
-#include <asm/errno.h>
-
 
-int atm_charge(struct atm_vcc *vcc,int truesize)
+int atm_charge(struct atm_vcc *vcc, int truesize)
 {
-	atm_force_charge(vcc,truesize);
+	atm_force_charge(vcc, truesize);
 	if (atomic_read(&sk_atm(vcc)->sk_rmem_alloc) <= sk_atm(vcc)->sk_rcvbuf)
 		return 1;
-	atm_return(vcc,truesize);
+	atm_return(vcc, truesize);
 	atomic_inc(&vcc->stats->rx_drop);
 	return 0;
 }
+EXPORT_SYMBOL(atm_charge);
 
-
-struct sk_buff *atm_alloc_charge(struct atm_vcc *vcc,int pdu_size,
-    gfp_t gfp_flags)
+struct sk_buff *atm_alloc_charge(struct atm_vcc *vcc, int pdu_size,
+				 gfp_t gfp_flags)
 {
 	struct sock *sk = sk_atm(vcc);
 	int guess = atm_guess_pdu2truesize(pdu_size);
 
-	atm_force_charge(vcc,guess);
+	atm_force_charge(vcc, guess);
 	if (atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf) {
-		struct sk_buff *skb = alloc_skb(pdu_size,gfp_flags);
+		struct sk_buff *skb = alloc_skb(pdu_size, gfp_flags);
 
 		if (skb) {
 			atomic_add(skb->truesize-guess,
@@ -40,10 +38,11 @@ struct sk_buff *atm_alloc_charge(struct atm_vcc *vcc,int pdu_size,
 			return skb;
 		}
 	}
-	atm_return(vcc,guess);
+	atm_return(vcc, guess);
 	atomic_inc(&vcc->stats->rx_drop);
 	return NULL;
 }
+EXPORT_SYMBOL(atm_alloc_charge);
 
 
 /*
@@ -73,7 +72,6 @@ struct sk_buff *atm_alloc_charge(struct atm_vcc *vcc,int pdu_size,
  *	 else *
  */
 
-
 int atm_pcr_goal(const struct atm_trafprm *tp)
 {
 	if (tp->pcr && tp->pcr != ATM_MAX_PCR)
@@ -84,26 +82,20 @@ int atm_pcr_goal(const struct atm_trafprm *tp)
 		return -tp->max_pcr;
 	return 0;
 }
+EXPORT_SYMBOL(atm_pcr_goal);
 
-
-void sonet_copy_stats(struct k_sonet_stats *from,struct sonet_stats *to)
+void sonet_copy_stats(struct k_sonet_stats *from, struct sonet_stats *to)
 {
 #define __HANDLE_ITEM(i) to->i = atomic_read(&from->i)
 	__SONET_ITEMS
 #undef __HANDLE_ITEM
 }
+EXPORT_SYMBOL(sonet_copy_stats);
 
-
-void sonet_subtract_stats(struct k_sonet_stats *from,struct sonet_stats *to)
+void sonet_subtract_stats(struct k_sonet_stats *from, struct sonet_stats *to)
 {
-#define __HANDLE_ITEM(i) atomic_sub(to->i,&from->i)
+#define __HANDLE_ITEM(i) atomic_sub(to->i, &from->i)
 	__SONET_ITEMS
 #undef __HANDLE_ITEM
 }
-
-
-EXPORT_SYMBOL(atm_charge);
-EXPORT_SYMBOL(atm_alloc_charge);
-EXPORT_SYMBOL(atm_pcr_goal);
-EXPORT_SYMBOL(sonet_copy_stats);
 EXPORT_SYMBOL(sonet_subtract_stats);
-- 
cgit v1.2.2


From f0a6cb118d7f9e72799a9c0c13f75d1b236a6546 Mon Sep 17 00:00:00 2001
From: Joe Perches <joe@perches.com>
Date: Tue, 26 Jan 2010 11:40:03 +0000
Subject: net/atm/atm_sysfs.c: checkpatch cleanups

Add space after for
Indent switch/case statements
80 column wrapping

Signed-off-by: Joe Perches <joe@perches.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/atm/atm_sysfs.c | 27 ++++++++++++++-------------
 1 file changed, 14 insertions(+), 13 deletions(-)

(limited to 'net')

diff --git a/net/atm/atm_sysfs.c b/net/atm/atm_sysfs.c
index b5674dc2083d..f693b78eb467 100644
--- a/net/atm/atm_sysfs.c
+++ b/net/atm/atm_sysfs.c
@@ -42,13 +42,14 @@ static ssize_t show_atmaddress(struct device *cdev,
 
 	spin_lock_irqsave(&adev->lock, flags);
 	list_for_each_entry(aaddr, &adev->local, entry) {
-		for(i = 0, j = 0; i < ATM_ESA_LEN; ++i, ++j) {
+		for (i = 0, j = 0; i < ATM_ESA_LEN; ++i, ++j) {
 			if (j == *fmt) {
 				pos += sprintf(pos, ".");
 				++fmt;
 				j = 0;
 			}
-			pos += sprintf(pos, "%02x", aaddr->addr.sas_addr.prv[i]);
+			pos += sprintf(pos, "%02x",
+				       aaddr->addr.sas_addr.prv[i]);
 		}
 		pos += sprintf(pos, "\n");
 	}
@@ -78,17 +79,17 @@ static ssize_t show_link_rate(struct device *cdev,
 
 	/* show the link rate, not the data rate */
 	switch (adev->link_rate) {
-		case ATM_OC3_PCR:
-			link_rate = 155520000;
-			break;
-		case ATM_OC12_PCR:
-			link_rate = 622080000;
-			break;
-		case ATM_25_PCR:
-			link_rate = 25600000;
-			break;
-		default:
-			link_rate = adev->link_rate * 8 * 53;
+	case ATM_OC3_PCR:
+		link_rate = 155520000;
+		break;
+	case ATM_OC12_PCR:
+		link_rate = 622080000;
+		break;
+	case ATM_25_PCR:
+		link_rate = 25600000;
+		break;
+	default:
+		link_rate = adev->link_rate * 8 * 53;
 	}
 	pos += sprintf(pos, "%d\n", link_rate);
 
-- 
cgit v1.2.2


From 641d729eb6236db0ce33e2b07a7f93193e60ea5a Mon Sep 17 00:00:00 2001
From: Joe Perches <joe@perches.com>
Date: Tue, 26 Jan 2010 11:40:04 +0000
Subject: net/atm/br2684.c: checkpatch cleanups

Convert #include <asm... to #include <linux...
Use print_hex_dump
Spacing cleanups
Move logical continuation tests to end of previous line
80 column wrapping
Move goto branch label to column 1
Remove unnecessary single statement braces

Signed-off-by: Joe Perches <joe@perches.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/atm/br2684.c | 72 ++++++++++++++++++++++++--------------------------------
 1 file changed, 31 insertions(+), 41 deletions(-)

(limited to 'net')

diff --git a/net/atm/br2684.c b/net/atm/br2684.c
index d72b1a579911..4d64d87e7578 100644
--- a/net/atm/br2684.c
+++ b/net/atm/br2684.c
@@ -17,7 +17,7 @@
 #include <linux/etherdevice.h>
 #include <linux/rtnetlink.h>
 #include <linux/ip.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <net/arp.h>
 #include <linux/atm.h>
 #include <linux/atmdev.h>
@@ -28,20 +28,14 @@
 
 #include "common.h"
 
-#ifdef SKB_DEBUG
 static void skb_debug(const struct sk_buff *skb)
 {
+#ifdef SKB_DEBUG
 #define NUM2PRINT 50
-	char buf[NUM2PRINT * 3 + 1];	/* 3 chars per byte */
-	int i = 0;
-	for (i = 0; i < skb->len && i < NUM2PRINT; i++) {
-		sprintf(buf + i * 3, "%2.2x ", 0xff & skb->data[i]);
-	}
-	printk(KERN_DEBUG "br2684: skb: %s\n", buf);
-}
-#else
-#define skb_debug(skb)	do {} while (0)
+	print_hex_dump(KERN_DEBUG, "br2684: skb: ", DUMP_OFFSET,
+		       16, 1, skb->data, min(NUM2PRINT, skb->len), true);
 #endif
+}
 
 #define BR2684_ETHERTYPE_LEN	2
 #define BR2684_PAD_LEN		2
@@ -70,7 +64,7 @@ struct br2684_vcc {
 	struct atm_vcc *atmvcc;
 	struct net_device *device;
 	/* keep old push, pop functions for chaining */
-	void (*old_push) (struct atm_vcc * vcc, struct sk_buff * skb);
+	void (*old_push)(struct atm_vcc *vcc, struct sk_buff *skb);
 	void (*old_pop)(struct atm_vcc *vcc, struct sk_buff *skb);
 	enum br2684_encaps encaps;
 	struct list_head brvccs;
@@ -302,7 +296,8 @@ static int br2684_setfilt(struct atm_vcc *atmvcc, void __user * arg)
 		struct br2684_dev *brdev;
 		read_lock(&devs_lock);
 		brdev = BRPRIV(br2684_find_dev(&fs.ifspec));
-		if (brdev == NULL || list_empty(&brdev->brvccs) || brdev->brvccs.next != brdev->brvccs.prev)	/* >1 VCC */
+		if (brdev == NULL || list_empty(&brdev->brvccs) ||
+		    brdev->brvccs.next != brdev->brvccs.prev)	/* >1 VCC */
 			brvcc = NULL;
 		else
 			brvcc = list_entry_brvcc(brdev->brvccs.next);
@@ -378,29 +373,25 @@ static void br2684_push(struct atm_vcc *atmvcc, struct sk_buff *skb)
 			__skb_trim(skb, skb->len - 4);
 
 		/* accept packets that have "ipv[46]" in the snap header */
-		if ((skb->len >= (sizeof(llc_oui_ipv4)))
-		    &&
-		    (memcmp
-		     (skb->data, llc_oui_ipv4,
-		      sizeof(llc_oui_ipv4) - BR2684_ETHERTYPE_LEN) == 0)) {
-			if (memcmp
-			    (skb->data + 6, ethertype_ipv6,
-			     sizeof(ethertype_ipv6)) == 0)
+		if ((skb->len >= (sizeof(llc_oui_ipv4))) &&
+		    (memcmp(skb->data, llc_oui_ipv4,
+			    sizeof(llc_oui_ipv4) - BR2684_ETHERTYPE_LEN) == 0)) {
+			if (memcmp(skb->data + 6, ethertype_ipv6,
+				   sizeof(ethertype_ipv6)) == 0)
 				skb->protocol = htons(ETH_P_IPV6);
-			else if (memcmp
-				 (skb->data + 6, ethertype_ipv4,
-				  sizeof(ethertype_ipv4)) == 0)
+			else if (memcmp(skb->data + 6, ethertype_ipv4,
+					sizeof(ethertype_ipv4)) == 0)
 				skb->protocol = htons(ETH_P_IP);
 			else
 				goto error;
 			skb_pull(skb, sizeof(llc_oui_ipv4));
 			skb_reset_network_header(skb);
 			skb->pkt_type = PACKET_HOST;
-			/*
-			 * Let us waste some time for checking the encapsulation.
-			 * Note, that only 7 char is checked so frames with a valid FCS
-			 * are also accepted (but FCS is not checked of course).
-			 */
+		/*
+		 * Let us waste some time for checking the encapsulation.
+		 * Note, that only 7 char is checked so frames with a valid FCS
+		 * are also accepted (but FCS is not checked of course).
+		 */
 		} else if ((skb->len >= sizeof(llc_oui_pid_pad)) &&
 			   (memcmp(skb->data, llc_oui_pid_pad, 7) == 0)) {
 			skb_pull(skb, sizeof(llc_oui_pid_pad));
@@ -495,12 +486,12 @@ static int br2684_regvcc(struct atm_vcc *atmvcc, void __user * arg)
 		err = -EEXIST;
 		goto error;
 	}
-	if (be.fcs_in != BR2684_FCSIN_NO || be.fcs_out != BR2684_FCSOUT_NO ||
-	    be.fcs_auto || be.has_vpiid || be.send_padding || (be.encaps !=
-							       BR2684_ENCAPS_VC
-							       && be.encaps !=
-							       BR2684_ENCAPS_LLC)
-	    || be.min_size != 0) {
+	if (be.fcs_in != BR2684_FCSIN_NO ||
+	    be.fcs_out != BR2684_FCSOUT_NO ||
+	    be.fcs_auto || be.has_vpiid || be.send_padding ||
+	    (be.encaps != BR2684_ENCAPS_VC &&
+	     be.encaps != BR2684_ENCAPS_LLC) ||
+	    be.min_size != 0) {
 		err = -EINVAL;
 		goto error;
 	}
@@ -541,7 +532,8 @@ static int br2684_regvcc(struct atm_vcc *atmvcc, void __user * arg)
 	}
 	__module_get(THIS_MODULE);
 	return 0;
-      error:
+
+error:
 	write_unlock_irq(&devs_lock);
 	kfree(brvcc);
 	return err;
@@ -587,7 +579,7 @@ static void br2684_setup_routed(struct net_device *netdev)
 	INIT_LIST_HEAD(&brdev->brvccs);
 }
 
-static int br2684_create(void __user * arg)
+static int br2684_create(void __user *arg)
 {
 	int err;
 	struct net_device *netdev;
@@ -597,9 +589,8 @@ static int br2684_create(void __user * arg)
 
 	pr_debug("\n");
 
-	if (copy_from_user(&ni, arg, sizeof ni)) {
+	if (copy_from_user(&ni, arg, sizeof ni))
 		return -EFAULT;
-	}
 
 	if (ni.media & BR2684_FLAG_ROUTED)
 		payload = p_routed;
@@ -607,9 +598,8 @@ static int br2684_create(void __user * arg)
 		payload = p_bridged;
 	ni.media &= 0xffff;	/* strip flags */
 
-	if (ni.media != BR2684_MEDIA_ETHERNET || ni.mtu != 1500) {
+	if (ni.media != BR2684_MEDIA_ETHERNET || ni.mtu != 1500)
 		return -EINVAL;
-	}
 
 	netdev = alloc_netdev(sizeof(struct br2684_dev),
 			      ni.ifname[0] ? ni.ifname : "nas%d",
-- 
cgit v1.2.2


From e956ea1b7de1df5ae4d6dc3e7460ed9e140803cd Mon Sep 17 00:00:00 2001
From: Joe Perches <joe@perches.com>
Date: Tue, 26 Jan 2010 11:40:05 +0000
Subject: net/atm/clip.c: checkpatch cleanups

Convert #include <asm... to #include <linux...
Spacing cleanups
Move labels to column 1
Move logical continuation tests to end of previous line

Signed-off-by: Joe Perches <joe@perches.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/atm/clip.c | 26 ++++++++++++--------------
 1 file changed, 12 insertions(+), 14 deletions(-)

(limited to 'net')

diff --git a/net/atm/clip.c b/net/atm/clip.c
index d7939fd58cd3..ebfa022008f7 100644
--- a/net/atm/clip.c
+++ b/net/atm/clip.c
@@ -32,10 +32,10 @@
 #include <linux/jhash.h>
 #include <net/route.h> /* for struct rtable and routing */
 #include <net/icmp.h> /* icmp_send */
-#include <asm/param.h> /* for HZ */
+#include <linux/param.h> /* for HZ */
+#include <linux/uaccess.h>
 #include <asm/byteorder.h> /* for htons etc. */
 #include <asm/system.h> /* save/restore_flags */
-#include <asm/uaccess.h>
 #include <asm/atomic.h>
 
 #include "common.h"
@@ -56,10 +56,10 @@ static int to_atmarpd(enum atmarp_ctrl_type type, int itf, __be32 ip)
 	pr_debug("(%d)\n", type);
 	if (!atmarpd)
 		return -EUNATCH;
-	skb = alloc_skb(sizeof(struct atmarp_ctrl),GFP_ATOMIC);
+	skb = alloc_skb(sizeof(struct atmarp_ctrl), GFP_ATOMIC);
 	if (!skb)
 		return -ENOMEM;
-	ctrl = (struct atmarp_ctrl *) skb_put(skb,sizeof(struct atmarp_ctrl));
+	ctrl = (struct atmarp_ctrl *)skb_put(skb, sizeof(struct atmarp_ctrl));
 	ctrl->type = type;
 	ctrl->itf_num = itf;
 	ctrl->ip = ip;
@@ -111,7 +111,7 @@ static void unlink_clip_vcc(struct clip_vcc *clip_vcc)
 			goto out;
 		}
 	pr_crit("ATMARP: failed (entry %p, vcc 0x%p)\n", entry, clip_vcc);
-      out:
+out:
 	netif_tx_unlock_bh(entry->neigh->dev);
 }
 
@@ -205,12 +205,12 @@ static void clip_push(struct atm_vcc *vcc, struct sk_buff *skb)
 	}
 	ATM_SKB(skb)->vcc = vcc;
 	skb_reset_mac_header(skb);
-	if (!clip_vcc->encap
-	    || skb->len < RFC1483LLC_LEN
-	    || memcmp(skb->data, llc_oui, sizeof (llc_oui)))
+	if (!clip_vcc->encap ||
+	    skb->len < RFC1483LLC_LEN ||
+	    memcmp(skb->data, llc_oui, sizeof(llc_oui)))
 		skb->protocol = htons(ETH_P_IP);
 	else {
-		skb->protocol = ((__be16 *) skb->data)[3];
+		skb->protocol = ((__be16 *)skb->data)[3];
 		skb_pull(skb, RFC1483LLC_LEN);
 		if (skb->protocol == htons(ETH_P_ARP)) {
 			skb->dev->stats.rx_packets++;
@@ -644,7 +644,6 @@ static int clip_inet_event(struct notifier_block *this, unsigned long event,
 	return clip_device_event(this, NETDEV_CHANGE, in_dev->dev);
 }
 
-
 static struct notifier_block clip_dev_notifier = {
 	.notifier_call = clip_device_event,
 };
@@ -670,7 +669,6 @@ static void atmarpd_close(struct atm_vcc *vcc)
 	module_put(THIS_MODULE);
 }
 
-
 static struct atmdev_ops atmarpd_dev_ops = {
 	.close = atmarpd_close
 };
@@ -692,11 +690,11 @@ static int atm_init_atmarp(struct atm_vcc *vcc)
 		return -EADDRINUSE;
 	}
 
-	mod_timer(&idle_timer, jiffies+CLIP_CHECK_INTERVAL*HZ);
+	mod_timer(&idle_timer, jiffies + CLIP_CHECK_INTERVAL * HZ);
 
 	atmarpd = vcc;
-	set_bit(ATM_VF_META,&vcc->flags);
-	set_bit(ATM_VF_READY,&vcc->flags);
+	set_bit(ATM_VF_META, &vcc->flags);
+	set_bit(ATM_VF_READY, &vcc->flags);
 	    /* allow replies and avoid getting closed if signaling dies */
 	vcc->dev = &atmarpd_dev;
 	vcc_insert_socket(sk_atm(vcc));
-- 
cgit v1.2.2


From a8147d737bd37bd51bb01737ac9c17a2cfc02a38 Mon Sep 17 00:00:00 2001
From: Joe Perches <joe@perches.com>
Date: Tue, 26 Jan 2010 11:40:06 +0000
Subject: net/atm/common.c: checkpatch cleanups

Convert #include <asm... to #include <linux...
Move EXPORT_SYMBOL
Spacing cleanups
Use __func__ in printks
Indent switch/case statements
Move trailing statements to new line
Hoist assigns from if tests

Signed-off-by: Joe Perches <joe@perches.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/atm/common.c | 338 ++++++++++++++++++++++++++++---------------------------
 1 file changed, 173 insertions(+), 165 deletions(-)

(limited to 'net')

diff --git a/net/atm/common.c b/net/atm/common.c
index 17f7e5f2131b..74d095a081e3 100644
--- a/net/atm/common.c
+++ b/net/atm/common.c
@@ -19,11 +19,10 @@
 #include <linux/bitops.h>
 #include <linux/init.h>
 #include <net/sock.h>		/* struct sock */
+#include <linux/uaccess.h>
+#include <linux/poll.h>
 
-#include <asm/uaccess.h>
 #include <asm/atomic.h>
-#include <asm/poll.h>
-
 
 #include "resources.h"		/* atm_find_dev */
 #include "common.h"		/* prototypes */
@@ -32,13 +31,15 @@
 #include "signaling.h"		/* for WAITING and sigd_attach */
 
 struct hlist_head vcc_hash[VCC_HTABLE_SIZE];
+EXPORT_SYMBOL(vcc_hash);
+
 DEFINE_RWLOCK(vcc_sklist_lock);
+EXPORT_SYMBOL(vcc_sklist_lock);
 
 static void __vcc_insert_socket(struct sock *sk)
 {
 	struct atm_vcc *vcc = atm_sk(sk);
-	struct hlist_head *head = &vcc_hash[vcc->vci &
-					(VCC_HTABLE_SIZE - 1)];
+	struct hlist_head *head = &vcc_hash[vcc->vci & (VCC_HTABLE_SIZE - 1)];
 	sk->sk_hash = vcc->vci & (VCC_HTABLE_SIZE - 1);
 	sk_add_node(sk, head);
 }
@@ -49,6 +50,7 @@ void vcc_insert_socket(struct sock *sk)
 	__vcc_insert_socket(sk);
 	write_unlock_irq(&vcc_sklist_lock);
 }
+EXPORT_SYMBOL(vcc_insert_socket);
 
 static void vcc_remove_socket(struct sock *sk)
 {
@@ -57,8 +59,7 @@ static void vcc_remove_socket(struct sock *sk)
 	write_unlock_irq(&vcc_sklist_lock);
 }
 
-
-static struct sk_buff *alloc_tx(struct atm_vcc *vcc,unsigned int size)
+static struct sk_buff *alloc_tx(struct atm_vcc *vcc, unsigned int size)
 {
 	struct sk_buff *skb;
 	struct sock *sk = sk_atm(vcc);
@@ -70,23 +71,20 @@ static struct sk_buff *alloc_tx(struct atm_vcc *vcc,unsigned int size)
 	}
 	while (!(skb = alloc_skb(size, GFP_KERNEL)))
 		schedule();
-	pr_debug("AlTx %d += %d\n", sk_wmem_alloc_get(sk), skb->truesize);
+	pr_debug("%d += %d\n", sk_wmem_alloc_get(sk), skb->truesize);
 	atomic_add(skb->truesize, &sk->sk_wmem_alloc);
 	return skb;
 }
 
-
-EXPORT_SYMBOL(vcc_hash);
-EXPORT_SYMBOL(vcc_sklist_lock);
-EXPORT_SYMBOL(vcc_insert_socket);
-
 static void vcc_sock_destruct(struct sock *sk)
 {
 	if (atomic_read(&sk->sk_rmem_alloc))
-		printk(KERN_DEBUG "vcc_sock_destruct: rmem leakage (%d bytes) detected.\n", atomic_read(&sk->sk_rmem_alloc));
+		printk(KERN_DEBUG "%s: rmem leakage (%d bytes) detected.\n",
+		       __func__, atomic_read(&sk->sk_rmem_alloc));
 
 	if (atomic_read(&sk->sk_wmem_alloc))
-		printk(KERN_DEBUG "vcc_sock_destruct: wmem leakage (%d bytes) detected.\n", atomic_read(&sk->sk_wmem_alloc));
+		printk(KERN_DEBUG "%s: wmem leakage (%d bytes) detected.\n",
+		       __func__, atomic_read(&sk->sk_wmem_alloc));
 }
 
 static void vcc_def_wakeup(struct sock *sk)
@@ -142,8 +140,8 @@ int vcc_create(struct net *net, struct socket *sock, int protocol, int family)
 
 	vcc = atm_sk(sk);
 	vcc->dev = NULL;
-	memset(&vcc->local,0,sizeof(struct sockaddr_atmsvc));
-	memset(&vcc->remote,0,sizeof(struct sockaddr_atmsvc));
+	memset(&vcc->local, 0, sizeof(struct sockaddr_atmsvc));
+	memset(&vcc->remote, 0, sizeof(struct sockaddr_atmsvc));
 	vcc->qos.txtp.max_sdu = 1 << 16; /* for meta VCs */
 	atomic_set(&sk->sk_wmem_alloc, 1);
 	atomic_set(&sk->sk_rmem_alloc, 0);
@@ -156,7 +154,6 @@ int vcc_create(struct net *net, struct socket *sock, int protocol, int family)
 	return 0;
 }
 
-
 static void vcc_destroy_socket(struct sock *sk)
 {
 	struct atm_vcc *vcc = atm_sk(sk);
@@ -171,7 +168,7 @@ static void vcc_destroy_socket(struct sock *sk)
 			vcc->push(vcc, NULL); /* atmarpd has no push */
 
 		while ((skb = skb_dequeue(&sk->sk_receive_queue)) != NULL) {
-			atm_return(vcc,skb->truesize);
+			atm_return(vcc, skb->truesize);
 			kfree_skb(skb);
 		}
 
@@ -182,7 +179,6 @@ static void vcc_destroy_socket(struct sock *sk)
 	vcc_remove_socket(sk);
 }
 
-
 int vcc_release(struct socket *sock)
 {
 	struct sock *sk = sock->sk;
@@ -197,7 +193,6 @@ int vcc_release(struct socket *sock)
 	return 0;
 }
 
-
 void vcc_release_async(struct atm_vcc *vcc, int reply)
 {
 	struct sock *sk = sk_atm(vcc);
@@ -208,8 +203,6 @@ void vcc_release_async(struct atm_vcc *vcc, int reply)
 	clear_bit(ATM_VF_WAITING, &vcc->flags);
 	sk->sk_state_change(sk);
 }
-
-
 EXPORT_SYMBOL(vcc_release_async);
 
 
@@ -235,36 +228,37 @@ void atm_dev_release_vccs(struct atm_dev *dev)
 	write_unlock_irq(&vcc_sklist_lock);
 }
 
-
-static int adjust_tp(struct atm_trafprm *tp,unsigned char aal)
+static int adjust_tp(struct atm_trafprm *tp, unsigned char aal)
 {
 	int max_sdu;
 
-	if (!tp->traffic_class) return 0;
+	if (!tp->traffic_class)
+		return 0;
 	switch (aal) {
-		case ATM_AAL0:
-			max_sdu = ATM_CELL_SIZE-1;
-			break;
-		case ATM_AAL34:
-			max_sdu = ATM_MAX_AAL34_PDU;
-			break;
-		default:
-			pr_warning("AAL problems ... (%d)\n", aal);
-			/* fall through */
-		case ATM_AAL5:
-			max_sdu = ATM_MAX_AAL5_PDU;
+	case ATM_AAL0:
+		max_sdu = ATM_CELL_SIZE-1;
+		break;
+	case ATM_AAL34:
+		max_sdu = ATM_MAX_AAL34_PDU;
+		break;
+	default:
+		pr_warning("AAL problems ... (%d)\n", aal);
+		/* fall through */
+	case ATM_AAL5:
+		max_sdu = ATM_MAX_AAL5_PDU;
 	}
-	if (!tp->max_sdu) tp->max_sdu = max_sdu;
-	else if (tp->max_sdu > max_sdu) return -EINVAL;
-	if (!tp->max_cdv) tp->max_cdv = ATM_MAX_CDV;
+	if (!tp->max_sdu)
+		tp->max_sdu = max_sdu;
+	else if (tp->max_sdu > max_sdu)
+		return -EINVAL;
+	if (!tp->max_cdv)
+		tp->max_cdv = ATM_MAX_CDV;
 	return 0;
 }
 
-
 static int check_ci(const struct atm_vcc *vcc, short vpi, int vci)
 {
-	struct hlist_head *head = &vcc_hash[vci &
-					(VCC_HTABLE_SIZE - 1)];
+	struct hlist_head *head = &vcc_hash[vci & (VCC_HTABLE_SIZE - 1)];
 	struct hlist_node *node;
 	struct sock *s;
 	struct atm_vcc *walk;
@@ -288,7 +282,6 @@ static int check_ci(const struct atm_vcc *vcc, short vpi, int vci)
 	return 0;
 }
 
-
 static int find_ci(const struct atm_vcc *vcc, short *vpi, int *vci)
 {
 	static short p;        /* poor man's per-device cache */
@@ -326,14 +319,13 @@ static int find_ci(const struct atm_vcc *vcc, short *vpi, int *vci)
 		if ((c == ATM_NOT_RSV_VCI || *vci != ATM_VCI_ANY) &&
 		    *vpi == ATM_VPI_ANY) {
 			p++;
-			if (p >= 1 << vcc->dev->ci_range.vpi_bits) p = 0;
+			if (p >= 1 << vcc->dev->ci_range.vpi_bits)
+				p = 0;
 		}
-	}
-	while (old_p != p || old_c != c);
+	} while (old_p != p || old_c != c);
 	return -EADDRINUSE;
 }
 
-
 static int __vcc_connect(struct atm_vcc *vcc, struct atm_dev *dev, short vpi,
 			 int vci)
 {
@@ -361,27 +353,29 @@ static int __vcc_connect(struct atm_vcc *vcc, struct atm_dev *dev, short vpi,
 	__vcc_insert_socket(sk);
 	write_unlock_irq(&vcc_sklist_lock);
 	switch (vcc->qos.aal) {
-		case ATM_AAL0:
-			error = atm_init_aal0(vcc);
-			vcc->stats = &dev->stats.aal0;
-			break;
-		case ATM_AAL34:
-			error = atm_init_aal34(vcc);
-			vcc->stats = &dev->stats.aal34;
-			break;
-		case ATM_NO_AAL:
-			/* ATM_AAL5 is also used in the "0 for default" case */
-			vcc->qos.aal = ATM_AAL5;
-			/* fall through */
-		case ATM_AAL5:
-			error = atm_init_aal5(vcc);
-			vcc->stats = &dev->stats.aal5;
-			break;
-		default:
-			error = -EPROTOTYPE;
+	case ATM_AAL0:
+		error = atm_init_aal0(vcc);
+		vcc->stats = &dev->stats.aal0;
+		break;
+	case ATM_AAL34:
+		error = atm_init_aal34(vcc);
+		vcc->stats = &dev->stats.aal34;
+		break;
+	case ATM_NO_AAL:
+		/* ATM_AAL5 is also used in the "0 for default" case */
+		vcc->qos.aal = ATM_AAL5;
+		/* fall through */
+	case ATM_AAL5:
+		error = atm_init_aal5(vcc);
+		vcc->stats = &dev->stats.aal5;
+		break;
+	default:
+		error = -EPROTOTYPE;
 	}
-	if (!error) error = adjust_tp(&vcc->qos.txtp,vcc->qos.aal);
-	if (!error) error = adjust_tp(&vcc->qos.rxtp,vcc->qos.aal);
+	if (!error)
+		error = adjust_tp(&vcc->qos.txtp, vcc->qos.aal);
+	if (!error)
+		error = adjust_tp(&vcc->qos.rxtp, vcc->qos.aal);
 	if (error)
 		goto fail;
 	pr_debug("VCC %d.%d, AAL %d\n", vpi, vci, vcc->qos.aal);
@@ -397,7 +391,8 @@ static int __vcc_connect(struct atm_vcc *vcc, struct atm_dev *dev, short vpi,
 		 vcc->qos.rxtp.max_sdu);
 
 	if (dev->ops->open) {
-		if ((error = dev->ops->open(vcc)))
+		error = dev->ops->open(vcc);
+		if (error)
 			goto fail;
 	}
 	return 0;
@@ -411,7 +406,6 @@ fail_module_put:
 	return error;
 }
 
-
 int vcc_connect(struct socket *sock, int itf, short vpi, int vci)
 {
 	struct atm_dev *dev;
@@ -427,16 +421,16 @@ int vcc_connect(struct socket *sock, int itf, short vpi, int vci)
 		return -EINVAL;
 
 	if (vpi != ATM_VPI_UNSPEC && vci != ATM_VCI_UNSPEC)
-		clear_bit(ATM_VF_PARTIAL,&vcc->flags);
+		clear_bit(ATM_VF_PARTIAL, &vcc->flags);
 	else
-		if (test_bit(ATM_VF_PARTIAL,&vcc->flags))
+		if (test_bit(ATM_VF_PARTIAL, &vcc->flags))
 			return -EINVAL;
 	pr_debug("(TX: cl %d,bw %d-%d,sdu %d; "
 		 "RX: cl %d,bw %d-%d,sdu %d,AAL %s%d)\n",
 		 vcc->qos.txtp.traffic_class, vcc->qos.txtp.min_pcr,
 		 vcc->qos.txtp.max_pcr, vcc->qos.txtp.max_sdu,
 		 vcc->qos.rxtp.traffic_class, vcc->qos.rxtp.min_pcr,
-		 vcc->qos.rxtp.max_pcr,vcc->qos.rxtp.max_sdu,
+		 vcc->qos.rxtp.max_pcr, vcc->qos.rxtp.max_sdu,
 		 vcc->qos.aal == ATM_AAL5 ? "" :
 		 vcc->qos.aal == ATM_AAL0 ? "" : " ??? code ",
 		 vcc->qos.aal == ATM_AAL0 ? 0 : vcc->qos.aal);
@@ -446,12 +440,14 @@ int vcc_connect(struct socket *sock, int itf, short vpi, int vci)
 	    vcc->qos.rxtp.traffic_class == ATM_ANYCLASS)
 		return -EINVAL;
 	if (likely(itf != ATM_ITF_ANY)) {
-		dev = try_then_request_module(atm_dev_lookup(itf), "atm-device-%d", itf);
+		dev = try_then_request_module(atm_dev_lookup(itf),
+					      "atm-device-%d", itf);
 	} else {
 		dev = NULL;
 		mutex_lock(&atm_dev_mutex);
 		if (!list_empty(&atm_devs)) {
-			dev = list_entry(atm_devs.next, struct atm_dev, dev_list);
+			dev = list_entry(atm_devs.next,
+					 struct atm_dev, dev_list);
 			atm_dev_hold(dev);
 		}
 		mutex_unlock(&atm_dev_mutex);
@@ -464,13 +460,12 @@ int vcc_connect(struct socket *sock, int itf, short vpi, int vci)
 		return error;
 	}
 	if (vpi == ATM_VPI_UNSPEC || vci == ATM_VCI_UNSPEC)
-		set_bit(ATM_VF_PARTIAL,&vcc->flags);
-	if (test_bit(ATM_VF_READY,&ATM_SD(sock)->flags))
+		set_bit(ATM_VF_PARTIAL, &vcc->flags);
+	if (test_bit(ATM_VF_READY, &ATM_SD(sock)->flags))
 		sock->state = SS_CONNECTED;
 	return 0;
 }
 
-
 int vcc_recvmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg,
 		size_t size, int flags)
 {
@@ -484,8 +479,8 @@ int vcc_recvmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg,
 	if (flags & ~MSG_DONTWAIT)		/* only handle MSG_DONTWAIT */
 		return -EOPNOTSUPP;
 	vcc = ATM_SD(sock);
-	if (test_bit(ATM_VF_RELEASED,&vcc->flags) ||
-	    test_bit(ATM_VF_CLOSE,&vcc->flags) ||
+	if (test_bit(ATM_VF_RELEASED, &vcc->flags) ||
+	    test_bit(ATM_VF_CLOSE, &vcc->flags) ||
 	    !test_bit(ATM_VF_READY, &vcc->flags))
 		return 0;
 
@@ -509,7 +504,6 @@ int vcc_recvmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg,
 	return copied;
 }
 
-
 int vcc_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *m,
 		size_t total_len)
 {
@@ -517,7 +511,7 @@ int vcc_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *m,
 	DEFINE_WAIT(wait);
 	struct atm_vcc *vcc;
 	struct sk_buff *skb;
-	int eff,error;
+	int eff, error;
 	const void __user *buff;
 	int size;
 
@@ -556,7 +550,7 @@ int vcc_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *m,
 	eff = (size+3) & ~3; /* align to word boundary */
 	prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE);
 	error = 0;
-	while (!(skb = alloc_tx(vcc,eff))) {
+	while (!(skb = alloc_tx(vcc, eff))) {
 		if (m->msg_flags & MSG_DONTWAIT) {
 			error = -EAGAIN;
 			break;
@@ -566,9 +560,9 @@ int vcc_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *m,
 			error = -ERESTARTSYS;
 			break;
 		}
-		if (test_bit(ATM_VF_RELEASED,&vcc->flags) ||
-		    test_bit(ATM_VF_CLOSE,&vcc->flags) ||
-		    !test_bit(ATM_VF_READY,&vcc->flags)) {
+		if (test_bit(ATM_VF_RELEASED, &vcc->flags) ||
+		    test_bit(ATM_VF_CLOSE, &vcc->flags) ||
+		    !test_bit(ATM_VF_READY, &vcc->flags)) {
 			error = -EPIPE;
 			send_sig(SIGPIPE, current, 0);
 			break;
@@ -580,20 +574,20 @@ int vcc_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *m,
 		goto out;
 	skb->dev = NULL; /* for paths shared with net_device interfaces */
 	ATM_SKB(skb)->atm_options = vcc->atm_options;
-	if (copy_from_user(skb_put(skb,size),buff,size)) {
+	if (copy_from_user(skb_put(skb, size), buff, size)) {
 		kfree_skb(skb);
 		error = -EFAULT;
 		goto out;
 	}
-	if (eff != size) memset(skb->data+size,0,eff-size);
-	error = vcc->dev->ops->send(vcc,skb);
+	if (eff != size)
+		memset(skb->data + size, 0, eff-size);
+	error = vcc->dev->ops->send(vcc, skb);
 	error = error ? error : size;
 out:
 	release_sock(sk);
 	return error;
 }
 
-
 unsigned int vcc_poll(struct file *file, struct socket *sock, poll_table *wait)
 {
 	struct sock *sk = sock->sk;
@@ -629,8 +623,7 @@ unsigned int vcc_poll(struct file *file, struct socket *sock, poll_table *wait)
 	return mask;
 }
 
-
-static int atm_change_qos(struct atm_vcc *vcc,struct atm_qos *qos)
+static int atm_change_qos(struct atm_vcc *vcc, struct atm_qos *qos)
 {
 	int error;
 
@@ -642,25 +635,31 @@ static int atm_change_qos(struct atm_vcc *vcc,struct atm_qos *qos)
 	    qos->rxtp.traffic_class != vcc->qos.rxtp.traffic_class ||
 	    qos->txtp.traffic_class != vcc->qos.txtp.traffic_class)
 		return -EINVAL;
-	error = adjust_tp(&qos->txtp,qos->aal);
-	if (!error) error = adjust_tp(&qos->rxtp,qos->aal);
-	if (error) return error;
-	if (!vcc->dev->ops->change_qos) return -EOPNOTSUPP;
+	error = adjust_tp(&qos->txtp, qos->aal);
+	if (!error)
+		error = adjust_tp(&qos->rxtp, qos->aal);
+	if (error)
+		return error;
+	if (!vcc->dev->ops->change_qos)
+		return -EOPNOTSUPP;
 	if (sk_atm(vcc)->sk_family == AF_ATMPVC)
-		return vcc->dev->ops->change_qos(vcc,qos,ATM_MF_SET);
-	return svc_change_qos(vcc,qos);
+		return vcc->dev->ops->change_qos(vcc, qos, ATM_MF_SET);
+	return svc_change_qos(vcc, qos);
 }
 
-
 static int check_tp(const struct atm_trafprm *tp)
 {
 	/* @@@ Should be merged with adjust_tp */
-	if (!tp->traffic_class || tp->traffic_class == ATM_ANYCLASS) return 0;
+	if (!tp->traffic_class || tp->traffic_class == ATM_ANYCLASS)
+		return 0;
 	if (tp->traffic_class != ATM_UBR && !tp->min_pcr && !tp->pcr &&
-	    !tp->max_pcr) return -EINVAL;
-	if (tp->min_pcr == ATM_MAX_PCR) return -EINVAL;
+	    !tp->max_pcr)
+		return -EINVAL;
+	if (tp->min_pcr == ATM_MAX_PCR)
+		return -EINVAL;
 	if (tp->min_pcr && tp->max_pcr && tp->max_pcr != ATM_MAX_PCR &&
-	    tp->min_pcr > tp->max_pcr) return -EINVAL;
+	    tp->min_pcr > tp->max_pcr)
+		return -EINVAL;
 	/*
 	 * We allow pcr to be outside [min_pcr,max_pcr], because later
 	 * adjustment may still push it in the valid range.
@@ -668,7 +667,6 @@ static int check_tp(const struct atm_trafprm *tp)
 	return 0;
 }
 
-
 static int check_qos(const struct atm_qos *qos)
 {
 	int error;
@@ -678,9 +676,11 @@ static int check_qos(const struct atm_qos *qos)
 	if (qos->txtp.traffic_class != qos->rxtp.traffic_class &&
 	    qos->txtp.traffic_class && qos->rxtp.traffic_class &&
 	    qos->txtp.traffic_class != ATM_ANYCLASS &&
-	    qos->rxtp.traffic_class != ATM_ANYCLASS) return -EINVAL;
+	    qos->rxtp.traffic_class != ATM_ANYCLASS)
+		return -EINVAL;
 	error = check_tp(&qos->txtp);
-	if (error) return error;
+	if (error)
+		return error;
 	return check_tp(&qos->rxtp);
 }
 
@@ -696,37 +696,41 @@ int vcc_setsockopt(struct socket *sock, int level, int optname,
 
 	vcc = ATM_SD(sock);
 	switch (optname) {
-		case SO_ATMQOS:
-			{
-				struct atm_qos qos;
-
-				if (copy_from_user(&qos,optval,sizeof(qos)))
-					return -EFAULT;
-				error = check_qos(&qos);
-				if (error) return error;
-				if (sock->state == SS_CONNECTED)
-					return atm_change_qos(vcc,&qos);
-				if (sock->state != SS_UNCONNECTED)
-					return -EBADFD;
-				vcc->qos = qos;
-				set_bit(ATM_VF_HASQOS,&vcc->flags);
-				return 0;
-			}
-		case SO_SETCLP:
-			if (get_user(value,(unsigned long __user *)optval))
-				return -EFAULT;
-			if (value) vcc->atm_options |= ATM_ATMOPT_CLP;
-			else vcc->atm_options &= ~ATM_ATMOPT_CLP;
-			return 0;
-		default:
-			if (level == SOL_SOCKET) return -EINVAL;
-			break;
+	case SO_ATMQOS:
+	{
+		struct atm_qos qos;
+
+		if (copy_from_user(&qos, optval, sizeof(qos)))
+			return -EFAULT;
+		error = check_qos(&qos);
+		if (error)
+			return error;
+		if (sock->state == SS_CONNECTED)
+			return atm_change_qos(vcc, &qos);
+		if (sock->state != SS_UNCONNECTED)
+			return -EBADFD;
+		vcc->qos = qos;
+		set_bit(ATM_VF_HASQOS, &vcc->flags);
+		return 0;
 	}
-	if (!vcc->dev || !vcc->dev->ops->setsockopt) return -EINVAL;
-	return vcc->dev->ops->setsockopt(vcc,level,optname,optval,optlen);
+	case SO_SETCLP:
+		if (get_user(value, (unsigned long __user *)optval))
+			return -EFAULT;
+		if (value)
+			vcc->atm_options |= ATM_ATMOPT_CLP;
+		else
+			vcc->atm_options &= ~ATM_ATMOPT_CLP;
+		return 0;
+	default:
+		if (level == SOL_SOCKET)
+			return -EINVAL;
+		break;
+	}
+	if (!vcc->dev || !vcc->dev->ops->setsockopt)
+		return -EINVAL;
+	return vcc->dev->ops->setsockopt(vcc, level, optname, optval, optlen);
 }
 
-
 int vcc_getsockopt(struct socket *sock, int level, int optname,
 		   char __user *optval, int __user *optlen)
 {
@@ -740,33 +744,33 @@ int vcc_getsockopt(struct socket *sock, int level, int optname,
 
 	vcc = ATM_SD(sock);
 	switch (optname) {
-		case SO_ATMQOS:
-			if (!test_bit(ATM_VF_HASQOS,&vcc->flags))
-				return -EINVAL;
-			return copy_to_user(optval,&vcc->qos,sizeof(vcc->qos)) ?
-			    -EFAULT : 0;
-		case SO_SETCLP:
-			return put_user(vcc->atm_options & ATM_ATMOPT_CLP ? 1 :
-			  0,(unsigned long __user *)optval) ? -EFAULT : 0;
-		case SO_ATMPVC:
-			{
-				struct sockaddr_atmpvc pvc;
-
-				if (!vcc->dev ||
-				    !test_bit(ATM_VF_ADDR,&vcc->flags))
-					return -ENOTCONN;
-				pvc.sap_family = AF_ATMPVC;
-				pvc.sap_addr.itf = vcc->dev->number;
-				pvc.sap_addr.vpi = vcc->vpi;
-				pvc.sap_addr.vci = vcc->vci;
-				return copy_to_user(optval,&pvc,sizeof(pvc)) ?
-				    -EFAULT : 0;
-			}
-		default:
-			if (level == SOL_SOCKET) return -EINVAL;
+	case SO_ATMQOS:
+		if (!test_bit(ATM_VF_HASQOS, &vcc->flags))
+			return -EINVAL;
+		return copy_to_user(optval, &vcc->qos, sizeof(vcc->qos))
+			? -EFAULT : 0;
+	case SO_SETCLP:
+		return put_user(vcc->atm_options & ATM_ATMOPT_CLP ? 1 : 0,
+				(unsigned long __user *)optval) ? -EFAULT : 0;
+	case SO_ATMPVC:
+	{
+		struct sockaddr_atmpvc pvc;
+
+		if (!vcc->dev || !test_bit(ATM_VF_ADDR, &vcc->flags))
+			return -ENOTCONN;
+		pvc.sap_family = AF_ATMPVC;
+		pvc.sap_addr.itf = vcc->dev->number;
+		pvc.sap_addr.vpi = vcc->vpi;
+		pvc.sap_addr.vci = vcc->vci;
+		return copy_to_user(optval, &pvc, sizeof(pvc)) ? -EFAULT : 0;
+	}
+	default:
+		if (level == SOL_SOCKET)
+			return -EINVAL;
 			break;
 	}
-	if (!vcc->dev || !vcc->dev->ops->getsockopt) return -EINVAL;
+	if (!vcc->dev || !vcc->dev->ops->getsockopt)
+		return -EINVAL;
 	return vcc->dev->ops->getsockopt(vcc, level, optname, optval, len);
 }
 
@@ -774,22 +778,26 @@ static int __init atm_init(void)
 {
 	int error;
 
-	if ((error = proto_register(&vcc_proto, 0)) < 0)
+	error = proto_register(&vcc_proto, 0);
+	if (error < 0)
 		goto out;
-
-	if ((error = atmpvc_init()) < 0) {
+	error = atmpvc_init();
+	if (error < 0) {
 		pr_err("atmpvc_init() failed with %d\n", error);
 		goto out_unregister_vcc_proto;
 	}
-	if ((error = atmsvc_init()) < 0) {
+	error = atmsvc_init();
+	if (error < 0) {
 		pr_err("atmsvc_init() failed with %d\n", error);
 		goto out_atmpvc_exit;
 	}
-	if ((error = atm_proc_init()) < 0) {
+	error = atm_proc_init();
+	if (error < 0) {
 		pr_err("atm_proc_init() failed with %d\n", error);
 		goto out_atmsvc_exit;
 	}
-	if ((error = atm_sysfs_init()) < 0) {
+	error = atm_sysfs_init();
+	if (error < 0) {
 		pr_err("atm_sysfs_init() failed with %d\n", error);
 		goto out_atmproc_exit;
 	}
-- 
cgit v1.2.2


From 5ff7ef7911d100b6568c731b1d078f819da82d03 Mon Sep 17 00:00:00 2001
From: Joe Perches <joe@perches.com>
Date: Tue, 26 Jan 2010 11:40:07 +0000
Subject: net/atm/ioctl.c: checkpatch cleanups

Spacing cleanups
Moved EXPORT_SYMBOL
Mostly 80 column wrapped.
switch/case cleanups

Signed-off-by: Joe Perches <joe@perches.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/atm/ioctl.c | 195 ++++++++++++++++++++++++++++----------------------------
 1 file changed, 97 insertions(+), 98 deletions(-)

(limited to 'net')

diff --git a/net/atm/ioctl.c b/net/atm/ioctl.c
index b75afba1f72b..62dc8bfe6fe7 100644
--- a/net/atm/ioctl.c
+++ b/net/atm/ioctl.c
@@ -37,6 +37,7 @@ void register_atm_ioctl(struct atm_ioctl *ioctl)
 	list_add_tail(&ioctl->list, &ioctl_list);
 	mutex_unlock(&ioctl_mutex);
 }
+EXPORT_SYMBOL(register_atm_ioctl);
 
 void deregister_atm_ioctl(struct atm_ioctl *ioctl)
 {
@@ -44,129 +45,128 @@ void deregister_atm_ioctl(struct atm_ioctl *ioctl)
 	list_del(&ioctl->list);
 	mutex_unlock(&ioctl_mutex);
 }
-
-EXPORT_SYMBOL(register_atm_ioctl);
 EXPORT_SYMBOL(deregister_atm_ioctl);
 
-static int do_vcc_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg, int compat)
+static int do_vcc_ioctl(struct socket *sock, unsigned int cmd,
+			unsigned long arg, int compat)
 {
 	struct sock *sk = sock->sk;
 	struct atm_vcc *vcc;
 	int error;
-	struct list_head * pos;
+	struct list_head *pos;
 	void __user *argp = (void __user *)arg;
 
 	vcc = ATM_SD(sock);
 	switch (cmd) {
-		case SIOCOUTQ:
-			if (sock->state != SS_CONNECTED ||
-			    !test_bit(ATM_VF_READY, &vcc->flags)) {
-				error =  -EINVAL;
-				goto done;
-			}
-			error = put_user(sk->sk_sndbuf - sk_wmem_alloc_get(sk),
-					 (int __user *) argp) ? -EFAULT : 0;
+	case SIOCOUTQ:
+		if (sock->state != SS_CONNECTED ||
+		    !test_bit(ATM_VF_READY, &vcc->flags)) {
+			error =  -EINVAL;
+			goto done;
+		}
+		error = put_user(sk->sk_sndbuf - sk_wmem_alloc_get(sk),
+				 (int __user *)argp) ? -EFAULT : 0;
+		goto done;
+	case SIOCINQ:
+	{
+		struct sk_buff *skb;
+
+		if (sock->state != SS_CONNECTED) {
+			error = -EINVAL;
 			goto done;
-		case SIOCINQ:
-			{
-				struct sk_buff *skb;
-
-				if (sock->state != SS_CONNECTED) {
-					error = -EINVAL;
-					goto done;
-				}
-				skb = skb_peek(&sk->sk_receive_queue);
-				error = put_user(skb ? skb->len : 0,
-						 (int __user *)argp) ? -EFAULT : 0;
-				goto done;
-			}
-		case SIOCGSTAMP: /* borrowed from IP */
+		}
+		skb = skb_peek(&sk->sk_receive_queue);
+		error = put_user(skb ? skb->len : 0,
+				 (int __user *)argp) ? -EFAULT : 0;
+		goto done;
+	}
+	case SIOCGSTAMP: /* borrowed from IP */
 #ifdef CONFIG_COMPAT
-			if (compat)
-				error = compat_sock_get_timestamp(sk, argp);
-			else
+		if (compat)
+			error = compat_sock_get_timestamp(sk, argp);
+		else
 #endif
-				error = sock_get_timestamp(sk, argp);
-			goto done;
-		case SIOCGSTAMPNS: /* borrowed from IP */
+			error = sock_get_timestamp(sk, argp);
+		goto done;
+	case SIOCGSTAMPNS: /* borrowed from IP */
 #ifdef CONFIG_COMPAT
-			if (compat)
-				error = compat_sock_get_timestampns(sk, argp);
-			else
+		if (compat)
+			error = compat_sock_get_timestampns(sk, argp);
+		else
 #endif
-				error = sock_get_timestampns(sk, argp);
+			error = sock_get_timestampns(sk, argp);
+		goto done;
+	case ATM_SETSC:
+		if (net_ratelimit())
+			pr_warning("ATM_SETSC is obsolete; used by %s:%d\n",
+				   current->comm, task_pid_nr(current));
+		error = 0;
+		goto done;
+	case ATMSIGD_CTRL:
+		if (!capable(CAP_NET_ADMIN)) {
+			error = -EPERM;
 			goto done;
-		case ATM_SETSC:
-			if (net_ratelimit())
-				pr_warning("ATM_SETSC is obsolete; used by %s:%d\n",
-					   current->comm, task_pid_nr(current));
-			error = 0;
+		}
+		/*
+		 * The user/kernel protocol for exchanging signalling
+		 * info uses kernel pointers as opaque references,
+		 * so the holder of the file descriptor can scribble
+		 * on the kernel... so we should make sure that we
+		 * have the same privileges that /proc/kcore needs
+		 */
+		if (!capable(CAP_SYS_RAWIO)) {
+			error = -EPERM;
 			goto done;
-		case ATMSIGD_CTRL:
-			if (!capable(CAP_NET_ADMIN)) {
-				error = -EPERM;
-				goto done;
-			}
-			/*
-			 * The user/kernel protocol for exchanging signalling
-			 * info uses kernel pointers as opaque references,
-			 * so the holder of the file descriptor can scribble
-			 * on the kernel... so we should make sure that we
-			 * have the same privileges that /proc/kcore needs
-			 */
-			if (!capable(CAP_SYS_RAWIO)) {
-				error = -EPERM;
-				goto done;
-			}
+		}
 #ifdef CONFIG_COMPAT
-			/* WTF? I don't even want to _think_ about making this
-			   work for 32-bit userspace. TBH I don't really want
-			   to think about it at all. dwmw2. */
-			if (compat) {
-				if (net_ratelimit())
-					pr_warning("32-bit task cannot be atmsigd\n");
-				error = -EINVAL;
-				goto done;
-			}
+		/* WTF? I don't even want to _think_ about making this
+		   work for 32-bit userspace. TBH I don't really want
+		   to think about it at all. dwmw2. */
+		if (compat) {
+			if (net_ratelimit())
+				pr_warning("32-bit task cannot be atmsigd\n");
+			error = -EINVAL;
+			goto done;
+		}
 #endif
-			error = sigd_attach(vcc);
-			if (!error)
-				sock->state = SS_CONNECTED;
+		error = sigd_attach(vcc);
+		if (!error)
+			sock->state = SS_CONNECTED;
+		goto done;
+	case ATM_SETBACKEND:
+	case ATM_NEWBACKENDIF:
+	{
+		atm_backend_t backend;
+		error = get_user(backend, (atm_backend_t __user *)argp);
+		if (error)
 			goto done;
-		case ATM_SETBACKEND:
-		case ATM_NEWBACKENDIF:
-			{
-				atm_backend_t backend;
-				error = get_user(backend, (atm_backend_t __user *) argp);
-				if (error)
-					goto done;
-				switch (backend) {
-					case ATM_BACKEND_PPP:
-						request_module("pppoatm");
-						break;
-					case ATM_BACKEND_BR2684:
-						request_module("br2684");
-						break;
-				}
-			}
-			break;
-		case ATMMPC_CTRL:
-		case ATMMPC_DATA:
-			request_module("mpoa");
-			break;
-		case ATMARPD_CTRL:
-			request_module("clip");
+		switch (backend) {
+		case ATM_BACKEND_PPP:
+			request_module("pppoatm");
 			break;
-		case ATMLEC_CTRL:
-			request_module("lec");
+		case ATM_BACKEND_BR2684:
+			request_module("br2684");
 			break;
+		}
+		break;
+	}
+	case ATMMPC_CTRL:
+	case ATMMPC_DATA:
+		request_module("mpoa");
+		break;
+	case ATMARPD_CTRL:
+		request_module("clip");
+		break;
+	case ATMLEC_CTRL:
+		request_module("lec");
+		break;
 	}
 
 	error = -ENOIOCTLCMD;
 
 	mutex_lock(&ioctl_mutex);
 	list_for_each(pos, &ioctl_list) {
-		struct atm_ioctl * ic = list_entry(pos, struct atm_ioctl, list);
+		struct atm_ioctl *ic = list_entry(pos, struct atm_ioctl, list);
 		if (try_module_get(ic->owner)) {
 			error = ic->ioctl(sock, cmd, arg);
 			module_put(ic->owner);
@@ -185,7 +185,6 @@ done:
 	return error;
 }
 
-
 int vcc_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
 {
 	return do_vcc_ioctl(sock, cmd, arg, 0);
@@ -288,8 +287,8 @@ static int do_atmif_sioc(struct socket *sock, unsigned int cmd,
 	sioc = compat_alloc_user_space(sizeof(*sioc));
 	sioc32 = compat_ptr(arg);
 
-	if (copy_in_user(&sioc->number, &sioc32->number, 2 * sizeof(int))
-	    || get_user(data, &sioc32->arg))
+	if (copy_in_user(&sioc->number, &sioc32->number, 2 * sizeof(int)) ||
+	    get_user(data, &sioc32->arg))
 		return -EFAULT;
 	datap = compat_ptr(data);
 	if (put_user(datap, &sioc->arg))
-- 
cgit v1.2.2


From c48192a7075fb218d92810fbe76ddd8732f2bbb8 Mon Sep 17 00:00:00 2001
From: Joe Perches <joe@perches.com>
Date: Tue, 26 Jan 2010 11:40:08 +0000
Subject: net/atm/lec.c: checkpatch cleanups

Convert #include <asm... to #include <linux...
Mostly 80 column wrapped.
Spacing cleanups
Convert printks to pr_<level>
Use print_hex_dump
Move embedded assigns out of tests
Move trailing statements to new lines
Remove unnecessary braces around single line statements
switch/case cleanups
Removed paren around returns
Use %pM
Moved leading continuation logical tests to end of previous line

Signed-off-by: Joe Perches <joe@perches.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/atm/lec.c | 284 ++++++++++++++++++++++++++++------------------------------
 1 file changed, 136 insertions(+), 148 deletions(-)

(limited to 'net')

diff --git a/net/atm/lec.c b/net/atm/lec.c
index 6873813c3c99..aefd278d6af6 100644
--- a/net/atm/lec.c
+++ b/net/atm/lec.c
@@ -18,7 +18,7 @@
 #include <linux/skbuff.h>
 #include <linux/ip.h>
 #include <asm/byteorder.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <net/arp.h>
 #include <net/dst.h>
 #include <linux/proc_fs.h>
@@ -87,17 +87,19 @@ static struct atm_vcc *lec_arp_resolve(struct lec_priv *priv,
 				       int is_rdesc,
 				       struct lec_arp_table **ret_entry);
 static void lec_arp_update(struct lec_priv *priv, const unsigned char *mac_addr,
-			   const unsigned char *atm_addr, unsigned long remoteflag,
+			   const unsigned char *atm_addr,
+			   unsigned long remoteflag,
 			   unsigned int targetless_le_arp);
 static void lec_flush_complete(struct lec_priv *priv, unsigned long tran_id);
 static int lec_mcast_make(struct lec_priv *priv, struct atm_vcc *vcc);
 static void lec_set_flush_tran_id(struct lec_priv *priv,
 				  const unsigned char *atm_addr,
 				  unsigned long tran_id);
-static void lec_vcc_added(struct lec_priv *priv, const struct atmlec_ioc *ioc_data,
+static void lec_vcc_added(struct lec_priv *priv,
+			  const struct atmlec_ioc *ioc_data,
 			  struct atm_vcc *vcc,
-			  void (*old_push) (struct atm_vcc *vcc,
-					    struct sk_buff *skb));
+			  void (*old_push)(struct atm_vcc *vcc,
+					   struct sk_buff *skb));
 static void lec_vcc_close(struct lec_priv *priv, struct atm_vcc *vcc);
 
 /* must be done under lec_arp_lock */
@@ -112,7 +114,6 @@ static inline void lec_arp_put(struct lec_arp_table *entry)
 		kfree(entry);
 }
 
-
 static struct lane2_ops lane2_ops = {
 	lane2_resolve,		/* resolve,             spec 3.1.3 */
 	lane2_associate_req,	/* associate_req,       spec 3.1.4 */
@@ -150,7 +151,8 @@ static void lec_handle_bridge(struct sk_buff *skb, struct net_device *dev)
 		mesg = (struct atmlec_msg *)skb2->data;
 		mesg->type = l_topology_change;
 		buff += 4;
-		mesg->content.normal.flag = *buff & 0x01;	/* 0x01 is topology change */
+		mesg->content.normal.flag = *buff & 0x01;
+					/* 0x01 is topology change */
 
 		priv = netdev_priv(dev);
 		atm_force_charge(priv->lecd, skb2->truesize);
@@ -263,14 +265,10 @@ static netdev_tx_t lec_start_xmit(struct sk_buff *skb,
 	unsigned char rdesc[ETH_ALEN];	/* Token Ring route descriptor */
 #endif
 	int is_rdesc;
-#if DUMP_PACKETS > 0
-	char buf[300];
-	int i = 0;
-#endif /* DUMP_PACKETS >0 */
 
 	pr_debug("called\n");
 	if (!priv->lecd) {
-		printk("%s:No lecd attached\n", dev->name);
+		pr_info("%s:No lecd attached\n", dev->name);
 		dev->stats.tx_errors++;
 		netif_stop_queue(dev);
 		kfree_skb(skb);
@@ -315,23 +313,17 @@ static netdev_tx_t lec_start_xmit(struct sk_buff *skb,
 	}
 #endif
 
-#if DUMP_PACKETS > 0
-	printk("%s: send datalen:%ld lecid:%4.4x\n", dev->name,
-	       skb->len, priv->lecid);
 #if DUMP_PACKETS >= 2
-	for (i = 0; i < skb->len && i < 99; i++) {
-		sprintf(buf + i * 3, "%2.2x ", 0xff & skb->data[i]);
-	}
+#define MAX_DUMP_SKB 99
 #elif DUMP_PACKETS >= 1
-	for (i = 0; i < skb->len && i < 30; i++) {
-		sprintf(buf + i * 3, "%2.2x ", 0xff & skb->data[i]);
-	}
+#define MAX_DUMP_SKB 30
+#endif
+#if DUMP_PACKETS >= 1
+	printk(KERN_DEBUG "%s: send datalen:%ld lecid:%4.4x\n",
+	       dev->name, skb->len, priv->lecid);
+	print_hex_dump(KERN_DEBUG, "", DUMP_OFFSET, 16, 1,
+		       skb->data, min(skb->len, MAX_DUMP_SKB), true);
 #endif /* DUMP_PACKETS >= 1 */
-	if (i == skb->len)
-		printk("%s\n", buf);
-	else
-		printk("%s...\n", buf);
-#endif /* DUMP_PACKETS > 0 */
 
 	/* Minimum ethernet-frame size */
 #ifdef CONFIG_TR
@@ -385,7 +377,8 @@ static netdev_tx_t lec_start_xmit(struct sk_buff *skb,
 		goto out;
 	}
 #if DUMP_PACKETS > 0
-	printk("%s:sending to vpi:%d vci:%d\n", dev->name, vcc->vpi, vcc->vci);
+	printk(KERN_DEBUG "%s:sending to vpi:%d vci:%d\n",
+	       dev->name, vcc->vpi, vcc->vci);
 #endif /* DUMP_PACKETS > 0 */
 
 	while (entry && (skb2 = skb_dequeue(&entry->tx_wait))) {
@@ -442,14 +435,12 @@ static int lec_atm_send(struct atm_vcc *vcc, struct sk_buff *skb)
 	pr_debug("%s: msg from zeppelin:%d\n", dev->name, mesg->type);
 	switch (mesg->type) {
 	case l_set_mac_addr:
-		for (i = 0; i < 6; i++) {
+		for (i = 0; i < 6; i++)
 			dev->dev_addr[i] = mesg->content.normal.mac_addr[i];
-		}
 		break;
 	case l_del_mac_addr:
-		for (i = 0; i < 6; i++) {
+		for (i = 0; i < 6; i++)
 			dev->dev_addr[i] = 0;
-		}
 		break;
 	case l_addr_delete:
 		lec_addr_delete(priv, mesg->content.normal.atm_addr,
@@ -497,13 +488,14 @@ static int lec_atm_send(struct atm_vcc *vcc, struct sk_buff *skb)
 		priv->flush_timeout = (mesg->content.config.flush_timeout * HZ);
 		priv->path_switching_delay =
 		    (mesg->content.config.path_switching_delay * HZ);
-		priv->lane_version = mesg->content.config.lane_version;	/* LANE2 */
+		priv->lane_version = mesg->content.config.lane_version;
+					/* LANE2 */
 		priv->lane2_ops = NULL;
 		if (priv->lane_version > 1)
 			priv->lane2_ops = &lane2_ops;
 		if (dev_set_mtu(dev, mesg->content.config.mtu))
-			printk("%s: change_mtu to %d failed\n", dev->name,
-			       mesg->content.config.mtu);
+			pr_info("%s: change_mtu to %d failed\n",
+				dev->name, mesg->content.config.mtu);
 		priv->is_proxy = mesg->content.config.is_proxy;
 		break;
 	case l_flush_tran_id:
@@ -524,16 +516,15 @@ static int lec_atm_send(struct atm_vcc *vcc, struct sk_buff *skb)
 				break;
 
 			if (br_fdb_test_addr_hook(dev,
-					mesg->content.proxy.mac_addr)) {
+						  mesg->content.proxy.mac_addr)) {
 				/* hit from bridge table, send LE_ARP_RESPONSE */
 				struct sk_buff *skb2;
 				struct sock *sk;
 
 				pr_debug("%s: entry found, responding to zeppelin\n",
 					 dev->name);
-				skb2 =
-				    alloc_skb(sizeof(struct atmlec_msg),
-					      GFP_ATOMIC);
+				skb2 = alloc_skb(sizeof(struct atmlec_msg),
+						 GFP_ATOMIC);
 				if (skb2 == NULL)
 					break;
 				skb2->len = sizeof(struct atmlec_msg);
@@ -548,7 +539,7 @@ static int lec_atm_send(struct atm_vcc *vcc, struct sk_buff *skb)
 #endif /* defined(CONFIG_BRIDGE) || defined(CONFIG_BRIDGE_MODULE) */
 		break;
 	default:
-		printk("%s: Unknown message type %d\n", dev->name, mesg->type);
+		pr_info("%s: Unknown message type %d\n", dev->name, mesg->type);
 		dev_kfree_skb(skb);
 		return -EINVAL;
 	}
@@ -569,14 +560,13 @@ static void lec_atm_close(struct atm_vcc *vcc)
 	lec_arp_destroy(priv);
 
 	if (skb_peek(&sk_atm(vcc)->sk_receive_queue))
-		printk("%s lec_atm_close: closing with messages pending\n",
-		       dev->name);
+		pr_info("%s closing with messages pending\n", dev->name);
 	while ((skb = skb_dequeue(&sk_atm(vcc)->sk_receive_queue)) != NULL) {
 		atm_return(vcc, skb->truesize);
 		dev_kfree_skb(skb);
 	}
 
-	printk("%s: Shut down!\n", dev->name);
+	pr_info("%s: Shut down!\n", dev->name);
 	module_put(THIS_MODULE);
 }
 
@@ -605,9 +595,8 @@ send_to_lecd(struct lec_priv *priv, atmlec_msg_type type,
 	struct sk_buff *skb;
 	struct atmlec_msg *mesg;
 
-	if (!priv || !priv->lecd) {
+	if (!priv || !priv->lecd)
 		return -1;
-	}
 	skb = alloc_skb(sizeof(struct atmlec_msg), GFP_ATOMIC);
 	if (!skb)
 		return -1;
@@ -688,7 +677,7 @@ static void lec_push(struct atm_vcc *vcc, struct sk_buff *skb)
 	struct net_device *dev = (struct net_device *)vcc->proto_data;
 	struct lec_priv *priv = netdev_priv(dev);
 
-#if DUMP_PACKETS >0
+#if DUMP_PACKETS > 0
 	printk(KERN_DEBUG "%s: vcc vpi:%d vci:%d\n",
 	       dev->name, vcc->vpi, vcc->vci);
 #endif
@@ -767,9 +756,8 @@ static void lec_push(struct atm_vcc *vcc, struct sk_buff *skb)
 			dev_kfree_skb(skb);
 			return;
 		}
-		if (!hlist_empty(&priv->lec_arp_empty_ones)) {
+		if (!hlist_empty(&priv->lec_arp_empty_ones))
 			lec_arp_check_empties(priv, vcc, skb);
-		}
 		skb_pull(skb, 2);	/* skip lec_id */
 #ifdef CONFIG_TR
 		if (priv->is_trdev)
@@ -816,7 +804,8 @@ static int lec_vcc_attach(struct atm_vcc *vcc, void __user *arg)
 	if (ioc_data.dev_num < 0 || ioc_data.dev_num >= MAX_LEC_ITF ||
 	    !dev_lec[ioc_data.dev_num])
 		return -EINVAL;
-	if (!(vpriv = kmalloc(sizeof(struct lec_vcc_priv), GFP_KERNEL)))
+	vpriv = kmalloc(sizeof(struct lec_vcc_priv), GFP_KERNEL);
+	if (!vpriv)
 		return -ENOMEM;
 	vpriv->xoff = 0;
 	vpriv->old_pop = vcc->pop;
@@ -907,9 +896,8 @@ static int lecd_attach(struct atm_vcc *vcc, int arg)
 	priv->flush_timeout = (4 * HZ);
 	priv->path_switching_delay = (6 * HZ);
 
-	if (dev_lec[i]->flags & IFF_UP) {
+	if (dev_lec[i]->flags & IFF_UP)
 		netif_start_queue(dev_lec[i]);
-	}
 	__module_get(THIS_MODULE);
 	return i;
 }
@@ -1111,7 +1099,9 @@ static int lec_seq_show(struct seq_file *seq, void *v)
 	else {
 		struct lec_state *state = seq->private;
 		struct net_device *dev = state->dev;
-		struct lec_arp_table *entry = hlist_entry(state->node, struct lec_arp_table, next);
+		struct lec_arp_table *entry = hlist_entry(state->node,
+							  struct lec_arp_table,
+							  next);
 
 		seq_printf(seq, "%s ", dev->name);
 		lec_info(seq, entry);
@@ -1191,7 +1181,7 @@ static int __init lane_module_init(void)
 #endif
 
 	register_atm_ioctl(&lane_ioctl_ops);
-	printk("lec.c: " __DATE__ " " __TIME__ " initialized\n");
+	pr_info("lec.c: " __DATE__ " " __TIME__ " initialized\n");
 	return 0;
 }
 
@@ -1280,13 +1270,13 @@ static int lane2_associate_req(struct net_device *dev, const u8 *lan_dst,
 	struct lec_priv *priv = netdev_priv(dev);
 
 	if (compare_ether_addr(lan_dst, dev->dev_addr))
-		return (0);	/* not our mac address */
+		return 0;	/* not our mac address */
 
 	kfree(priv->tlvs);	/* NULL if there was no previous association */
 
 	priv->tlvs = kmemdup(tlvs, sizeoftlvs, GFP_KERNEL);
 	if (priv->tlvs == NULL)
-		return (0);
+		return 0;
 	priv->sizeoftlvs = sizeoftlvs;
 
 	skb = alloc_skb(sizeoftlvs, GFP_ATOMIC);
@@ -1296,12 +1286,12 @@ static int lane2_associate_req(struct net_device *dev, const u8 *lan_dst,
 	skb_copy_to_linear_data(skb, tlvs, sizeoftlvs);
 	retval = send_to_lecd(priv, l_associate_req, NULL, NULL, skb);
 	if (retval != 0)
-		printk("lec.c: lane2_associate_req() failed\n");
+		pr_info("lec.c: lane2_associate_req() failed\n");
 	/*
 	 * If the previous association has changed we must
 	 * somehow notify other LANE entities about the change
 	 */
-	return (1);
+	return 1;
 }
 
 /*
@@ -1334,12 +1324,12 @@ static void lane2_associate_ind(struct net_device *dev, const u8 *mac_addr,
 	entry->sizeoftlvs = sizeoftlvs;
 #endif
 #if 0
-	printk("lec.c: lane2_associate_ind()\n");
-	printk("dump of tlvs, sizeoftlvs=%d\n", sizeoftlvs);
+	pr_info("\n");
+	pr_info("dump of tlvs, sizeoftlvs=%d\n", sizeoftlvs);
 	while (i < sizeoftlvs)
-		printk("%02x ", tlvs[i++]);
+		pr_cont("%02x ", tlvs[i++]);
 
-	printk("\n");
+	pr_cont("\n");
 #endif
 
 	/* tell MPOA about the TLVs we saw */
@@ -1359,13 +1349,13 @@ static void lane2_associate_ind(struct net_device *dev, const u8 *mac_addr,
 
 #include <linux/types.h>
 #include <linux/timer.h>
-#include <asm/param.h>
+#include <linux/param.h>
 #include <asm/atomic.h>
 #include <linux/inetdevice.h>
 #include <net/route.h>
 
 #if 0
-#define pr_debug(format,args...)
+#define pr_debug(format, args...)
 /*
   #define pr_debug printk
 */
@@ -1381,7 +1371,7 @@ static void lec_arp_expire_arp(unsigned long data);
  * Arp table funcs
  */
 
-#define HASH(ch) (ch & (LEC_ARP_TABLE_SIZE -1))
+#define HASH(ch) (ch & (LEC_ARP_TABLE_SIZE - 1))
 
 /*
  * Initialization of arp-cache
@@ -1390,9 +1380,8 @@ static void lec_arp_init(struct lec_priv *priv)
 {
 	unsigned short i;
 
-	for (i = 0; i < LEC_ARP_TABLE_SIZE; i++) {
+	for (i = 0; i < LEC_ARP_TABLE_SIZE; i++)
 		INIT_HLIST_HEAD(&priv->lec_arp_tables[i]);
-	}
 	INIT_HLIST_HEAD(&priv->lec_arp_empty_ones);
 	INIT_HLIST_HEAD(&priv->lec_no_forward);
 	INIT_HLIST_HEAD(&priv->mcast_fwds);
@@ -1449,20 +1438,23 @@ lec_arp_remove(struct lec_priv *priv, struct lec_arp_table *to_remove)
 	struct lec_arp_table *entry;
 	int i, remove_vcc = 1;
 
-	if (!to_remove) {
+	if (!to_remove)
 		return -1;
-	}
 
 	hlist_del(&to_remove->next);
 	del_timer(&to_remove->timer);
 
-	/* If this is the only MAC connected to this VCC, also tear down the VCC */
+	/*
+	 * If this is the only MAC connected to this VCC,
+	 * also tear down the VCC
+	 */
 	if (to_remove->status >= ESI_FLUSH_PENDING) {
 		/*
 		 * ESI_FLUSH_PENDING, ESI_FORWARD_DIRECT
 		 */
 		for (i = 0; i < LEC_ARP_TABLE_SIZE; i++) {
-			hlist_for_each_entry(entry, node, &priv->lec_arp_tables[i], next) {
+			hlist_for_each_entry(entry, node,
+					     &priv->lec_arp_tables[i], next) {
 				if (memcmp(to_remove->atm_addr,
 					   entry->atm_addr, ATM_ESA_LEN) == 0) {
 					remove_vcc = 0;
@@ -1493,9 +1485,8 @@ static const char *get_status_string(unsigned char st)
 		return "ESI_FLUSH_PENDING";
 	case ESI_FORWARD_DIRECT:
 		return "ESI_FORWARD_DIRECT";
-	default:
-		return "<UNKNOWN>";
 	}
+	return "<UNKNOWN>";
 }
 
 static void dump_arp_table(struct lec_priv *priv)
@@ -1505,18 +1496,15 @@ static void dump_arp_table(struct lec_priv *priv)
 	char buf[256];
 	int i, j, offset;
 
-	printk("Dump %p:\n", priv);
+	pr_info("Dump %p:\n", priv);
 	for (i = 0; i < LEC_ARP_TABLE_SIZE; i++) {
-		hlist_for_each_entry(rulla, node, &priv->lec_arp_tables[i], next) {
+		hlist_for_each_entry(rulla, node,
+				     &priv->lec_arp_tables[i], next) {
 			offset = 0;
 			offset += sprintf(buf, "%d: %p\n", i, rulla);
-			offset += sprintf(buf + offset, "Mac:");
-			for (j = 0; j < ETH_ALEN; j++) {
-				offset += sprintf(buf + offset,
-						  "%2.2x ",
-						  rulla->mac_addr[j] & 0xff);
-			}
-			offset += sprintf(buf + offset, "Atm:");
+			offset += sprintf(buf + offset, "Mac: %pM",
+					  rulla->mac_addr);
+			offset += sprintf(buf + offset, " Atm:");
 			for (j = 0; j < ATM_ESA_LEN; j++) {
 				offset += sprintf(buf + offset,
 						  "%2.2x ",
@@ -1536,20 +1524,16 @@ static void dump_arp_table(struct lec_priv *priv)
 				    "Flags:%x, Packets_flooded:%x, Status: %s ",
 				    rulla->flags, rulla->packets_flooded,
 				    get_status_string(rulla->status));
-			printk("%s\n", buf);
+			pr_info("%s\n", buf);
 		}
 	}
 
 	if (!hlist_empty(&priv->lec_no_forward))
-		printk("No forward\n");
+		pr_info("No forward\n");
 	hlist_for_each_entry(rulla, node, &priv->lec_no_forward, next) {
 		offset = 0;
-		offset += sprintf(buf + offset, "Mac:");
-		for (j = 0; j < ETH_ALEN; j++) {
-			offset += sprintf(buf + offset, "%2.2x ",
-					  rulla->mac_addr[j] & 0xff);
-		}
-		offset += sprintf(buf + offset, "Atm:");
+		offset += sprintf(buf + offset, "Mac: %pM", rulla->mac_addr);
+		offset += sprintf(buf + offset, " Atm:");
 		for (j = 0; j < ATM_ESA_LEN; j++) {
 			offset += sprintf(buf + offset, "%2.2x ",
 					  rulla->atm_addr[j] & 0xff);
@@ -1566,19 +1550,15 @@ static void dump_arp_table(struct lec_priv *priv)
 				  "Flags:%x, Packets_flooded:%x, Status: %s ",
 				  rulla->flags, rulla->packets_flooded,
 				  get_status_string(rulla->status));
-		printk("%s\n", buf);
+		pr_info("%s\n", buf);
 	}
 
 	if (!hlist_empty(&priv->lec_arp_empty_ones))
-		printk("Empty ones\n");
+		pr_info("Empty ones\n");
 	hlist_for_each_entry(rulla, node, &priv->lec_arp_empty_ones, next) {
 		offset = 0;
-		offset += sprintf(buf + offset, "Mac:");
-		for (j = 0; j < ETH_ALEN; j++) {
-			offset += sprintf(buf + offset, "%2.2x ",
-					  rulla->mac_addr[j] & 0xff);
-		}
-		offset += sprintf(buf + offset, "Atm:");
+		offset += sprintf(buf + offset, "Mac: %pM", rulla->mac_addr);
+		offset += sprintf(buf + offset, " Atm:");
 		for (j = 0; j < ATM_ESA_LEN; j++) {
 			offset += sprintf(buf + offset, "%2.2x ",
 					  rulla->atm_addr[j] & 0xff);
@@ -1595,19 +1575,15 @@ static void dump_arp_table(struct lec_priv *priv)
 				  "Flags:%x, Packets_flooded:%x, Status: %s ",
 				  rulla->flags, rulla->packets_flooded,
 				  get_status_string(rulla->status));
-		printk("%s", buf);
+		pr_info("%s", buf);
 	}
 
 	if (!hlist_empty(&priv->mcast_fwds))
-		printk("Multicast Forward VCCs\n");
+		pr_info("Multicast Forward VCCs\n");
 	hlist_for_each_entry(rulla, node, &priv->mcast_fwds, next) {
 		offset = 0;
-		offset += sprintf(buf + offset, "Mac:");
-		for (j = 0; j < ETH_ALEN; j++) {
-			offset += sprintf(buf + offset, "%2.2x ",
-					  rulla->mac_addr[j] & 0xff);
-		}
-		offset += sprintf(buf + offset, "Atm:");
+		offset += sprintf(buf + offset, "Mac: %pM", rulla->mac_addr);
+		offset += sprintf(buf + offset, " Atm:");
 		for (j = 0; j < ATM_ESA_LEN; j++) {
 			offset += sprintf(buf + offset, "%2.2x ",
 					  rulla->atm_addr[j] & 0xff);
@@ -1624,7 +1600,7 @@ static void dump_arp_table(struct lec_priv *priv)
 				  "Flags:%x, Packets_flooded:%x, Status: %s ",
 				  rulla->flags, rulla->packets_flooded,
 				  get_status_string(rulla->status));
-		printk("%s\n", buf);
+		pr_info("%s\n", buf);
 	}
 
 }
@@ -1650,14 +1626,16 @@ static void lec_arp_destroy(struct lec_priv *priv)
 
 	spin_lock_irqsave(&priv->lec_arp_lock, flags);
 	for (i = 0; i < LEC_ARP_TABLE_SIZE; i++) {
-		hlist_for_each_entry_safe(entry, node, next, &priv->lec_arp_tables[i], next) {
+		hlist_for_each_entry_safe(entry, node, next,
+					  &priv->lec_arp_tables[i], next) {
 			lec_arp_remove(priv, entry);
 			lec_arp_put(entry);
 		}
 		INIT_HLIST_HEAD(&priv->lec_arp_tables[i]);
 	}
 
-	hlist_for_each_entry_safe(entry, node, next, &priv->lec_arp_empty_ones, next) {
+	hlist_for_each_entry_safe(entry, node, next,
+				  &priv->lec_arp_empty_ones, next) {
 		del_timer_sync(&entry->timer);
 		lec_arp_clear_vccs(entry);
 		hlist_del(&entry->next);
@@ -1665,7 +1643,8 @@ static void lec_arp_destroy(struct lec_priv *priv)
 	}
 	INIT_HLIST_HEAD(&priv->lec_arp_empty_ones);
 
-	hlist_for_each_entry_safe(entry, node, next, &priv->lec_no_forward, next) {
+	hlist_for_each_entry_safe(entry, node, next,
+				  &priv->lec_no_forward, next) {
 		del_timer_sync(&entry->timer);
 		lec_arp_clear_vccs(entry);
 		hlist_del(&entry->next);
@@ -1698,9 +1677,8 @@ static struct lec_arp_table *lec_arp_find(struct lec_priv *priv,
 
 	head = &priv->lec_arp_tables[HASH(mac_addr[ETH_ALEN - 1])];
 	hlist_for_each_entry(entry, node, head, next) {
-		if (!compare_ether_addr(mac_addr, entry->mac_addr)) {
+		if (!compare_ether_addr(mac_addr, entry->mac_addr))
 			return entry;
-		}
 	}
 	return NULL;
 }
@@ -1712,7 +1690,7 @@ static struct lec_arp_table *make_entry(struct lec_priv *priv,
 
 	to_return = kzalloc(sizeof(struct lec_arp_table), GFP_ATOMIC);
 	if (!to_return) {
-		printk("LEC: Arp entry kmalloc failed\n");
+		pr_info("LEC: Arp entry kmalloc failed\n");
 		return NULL;
 	}
 	memcpy(to_return->mac_addr, mac_addr, ETH_ALEN);
@@ -1802,7 +1780,8 @@ static void lec_arp_check_expire(struct work_struct *work)
 restart:
 	spin_lock_irqsave(&priv->lec_arp_lock, flags);
 	for (i = 0; i < LEC_ARP_TABLE_SIZE; i++) {
-		hlist_for_each_entry_safe(entry, node, next, &priv->lec_arp_tables[i], next) {
+		hlist_for_each_entry_safe(entry, node, next,
+					  &priv->lec_arp_tables[i], next) {
 			if ((entry->flags) & LEC_REMOTE_FLAG &&
 			    priv->topology_change)
 				time_to_check = priv->forward_delay_time;
@@ -1811,9 +1790,9 @@ restart:
 
 			pr_debug("About to expire: %lx - %lx > %lx\n",
 				 now, entry->last_used, time_to_check);
-			if (time_after(now, entry->last_used + time_to_check)
-			    && !(entry->flags & LEC_PERMANENT_FLAG)
-			    && !(entry->mac_addr[0] & 0x01)) {	/* LANE2: 7.1.20 */
+			if (time_after(now, entry->last_used + time_to_check) &&
+			    !(entry->flags & LEC_PERMANENT_FLAG) &&
+			    !(entry->mac_addr[0] & 0x01)) {	/* LANE2: 7.1.20 */
 				/* Remove entry */
 				pr_debug("Entry timed out\n");
 				lec_arp_remove(priv, entry);
@@ -1821,11 +1800,10 @@ restart:
 			} else {
 				/* Something else */
 				if ((entry->status == ESI_VC_PENDING ||
-				     entry->status == ESI_ARP_PENDING)
-				    && time_after_eq(now,
-						     entry->timestamp +
-						     priv->
-						     max_unknown_frame_time)) {
+				     entry->status == ESI_ARP_PENDING) &&
+				    time_after_eq(now,
+						  entry->timestamp +
+						  priv->max_unknown_frame_time)) {
 					entry->timestamp = jiffies;
 					entry->packets_flooded = 0;
 					if (entry->status == ESI_VC_PENDING)
@@ -1834,8 +1812,7 @@ restart:
 							     entry->atm_addr,
 							     NULL);
 				}
-				if (entry->status == ESI_FLUSH_PENDING
-				    &&
+				if (entry->status == ESI_FLUSH_PENDING &&
 				    time_after_eq(now, entry->timestamp +
 						  priv->path_switching_delay)) {
 					struct sk_buff *skb;
@@ -1863,7 +1840,8 @@ restart:
  *
  */
 static struct atm_vcc *lec_arp_resolve(struct lec_priv *priv,
-				       const unsigned char *mac_to_find, int is_rdesc,
+				       const unsigned char *mac_to_find,
+				       int is_rdesc,
 				       struct lec_arp_table **ret_entry)
 {
 	unsigned long flags;
@@ -1899,9 +1877,8 @@ static struct atm_vcc *lec_arp_resolve(struct lec_priv *priv,
 		 * If the LE_ARP cache entry is still pending, reset count to 0
 		 * so another LE_ARP request can be made for this frame.
 		 */
-		if (entry->status == ESI_ARP_PENDING) {
+		if (entry->status == ESI_ARP_PENDING)
 			entry->no_tries = 0;
-		}
 		/*
 		 * Data direct VC not yet set up, check to see if the unknown
 		 * frame count is greater than the limit. If the limit has
@@ -1969,10 +1946,11 @@ lec_addr_delete(struct lec_priv *priv, const unsigned char *atm_addr,
 	pr_debug("\n");
 	spin_lock_irqsave(&priv->lec_arp_lock, flags);
 	for (i = 0; i < LEC_ARP_TABLE_SIZE; i++) {
-		hlist_for_each_entry_safe(entry, node, next, &priv->lec_arp_tables[i], next) {
-			if (!memcmp(atm_addr, entry->atm_addr, ATM_ESA_LEN)
-			    && (permanent ||
-				!(entry->flags & LEC_PERMANENT_FLAG))) {
+		hlist_for_each_entry_safe(entry, node, next,
+					  &priv->lec_arp_tables[i], next) {
+			if (!memcmp(atm_addr, entry->atm_addr, ATM_ESA_LEN) &&
+			    (permanent ||
+			     !(entry->flags & LEC_PERMANENT_FLAG))) {
 				lec_arp_remove(priv, entry);
 				lec_arp_put(entry);
 			}
@@ -2008,7 +1986,8 @@ lec_arp_update(struct lec_priv *priv, const unsigned char *mac_addr,
 				 * we have no entry in the cache. 7.1.30
 				 */
 	if (!hlist_empty(&priv->lec_arp_empty_ones)) {
-		hlist_for_each_entry_safe(entry, node, next, &priv->lec_arp_empty_ones, next) {
+		hlist_for_each_entry_safe(entry, node, next,
+					  &priv->lec_arp_empty_ones, next) {
 			if (memcmp(entry->atm_addr, atm_addr, ATM_ESA_LEN) == 0) {
 				hlist_del(&entry->next);
 				del_timer(&entry->timer);
@@ -2052,7 +2031,8 @@ lec_arp_update(struct lec_priv *priv, const unsigned char *mac_addr,
 	memcpy(entry->atm_addr, atm_addr, ATM_ESA_LEN);
 	del_timer(&entry->timer);
 	for (i = 0; i < LEC_ARP_TABLE_SIZE; i++) {
-		hlist_for_each_entry(tmp, node, &priv->lec_arp_tables[i], next) {
+		hlist_for_each_entry(tmp, node,
+				     &priv->lec_arp_tables[i], next) {
 			if (entry != tmp &&
 			    !memcmp(tmp->atm_addr, atm_addr, ATM_ESA_LEN)) {
 				/* Vcc to this host exists */
@@ -2097,14 +2077,13 @@ lec_vcc_added(struct lec_priv *priv, const struct atmlec_ioc *ioc_data,
 	int i, found_entry = 0;
 
 	spin_lock_irqsave(&priv->lec_arp_lock, flags);
+	/* Vcc for Multicast Forward. No timer, LANEv2 7.1.20 and 2.3.5.3 */
 	if (ioc_data->receive == 2) {
-		/* Vcc for Multicast Forward. No timer, LANEv2 7.1.20 and 2.3.5.3 */
-
 		pr_debug("LEC_ARP: Attaching mcast forward\n");
 #if 0
 		entry = lec_arp_find(priv, bus_mac);
 		if (!entry) {
-			printk("LEC_ARP: Multicast entry not found!\n");
+			pr_info("LEC_ARP: Multicast entry not found!\n");
 			goto out;
 		}
 		memcpy(entry->atm_addr, ioc_data->atm_addr, ATM_ESA_LEN);
@@ -2163,7 +2142,8 @@ lec_vcc_added(struct lec_priv *priv, const struct atmlec_ioc *ioc_data,
 		 ioc_data->atm_addr[16], ioc_data->atm_addr[17],
 		 ioc_data->atm_addr[18], ioc_data->atm_addr[19]);
 	for (i = 0; i < LEC_ARP_TABLE_SIZE; i++) {
-		hlist_for_each_entry(entry, node, &priv->lec_arp_tables[i], next) {
+		hlist_for_each_entry(entry, node,
+				     &priv->lec_arp_tables[i], next) {
 			if (memcmp
 			    (ioc_data->atm_addr, entry->atm_addr,
 			     ATM_ESA_LEN) == 0) {
@@ -2247,14 +2227,16 @@ static void lec_flush_complete(struct lec_priv *priv, unsigned long tran_id)
 restart:
 	spin_lock_irqsave(&priv->lec_arp_lock, flags);
 	for (i = 0; i < LEC_ARP_TABLE_SIZE; i++) {
-		hlist_for_each_entry(entry, node, &priv->lec_arp_tables[i], next) {
-			if (entry->flush_tran_id == tran_id
-			    && entry->status == ESI_FLUSH_PENDING) {
+		hlist_for_each_entry(entry, node,
+				     &priv->lec_arp_tables[i], next) {
+			if (entry->flush_tran_id == tran_id &&
+			    entry->status == ESI_FLUSH_PENDING) {
 				struct sk_buff *skb;
 				struct atm_vcc *vcc = entry->vcc;
 
 				lec_arp_hold(entry);
-				spin_unlock_irqrestore(&priv->lec_arp_lock, flags);
+				spin_unlock_irqrestore(&priv->lec_arp_lock,
+						       flags);
 				while ((skb = skb_dequeue(&entry->tx_wait)) != NULL)
 					lec_send(vcc, skb);
 				entry->last_used = jiffies;
@@ -2280,7 +2262,8 @@ lec_set_flush_tran_id(struct lec_priv *priv,
 
 	spin_lock_irqsave(&priv->lec_arp_lock, flags);
 	for (i = 0; i < LEC_ARP_TABLE_SIZE; i++)
-		hlist_for_each_entry(entry, node, &priv->lec_arp_tables[i], next) {
+		hlist_for_each_entry(entry, node,
+				     &priv->lec_arp_tables[i], next) {
 			if (!memcmp(atm_addr, entry->atm_addr, ATM_ESA_LEN)) {
 				entry->flush_tran_id = tran_id;
 				pr_debug("Set flush transaction id to %lx for %p\n",
@@ -2300,7 +2283,8 @@ static int lec_mcast_make(struct lec_priv *priv, struct atm_vcc *vcc)
 	struct lec_vcc_priv *vpriv;
 	int err = 0;
 
-	if (!(vpriv = kmalloc(sizeof(struct lec_vcc_priv), GFP_KERNEL)))
+	vpriv = kmalloc(sizeof(struct lec_vcc_priv), GFP_KERNEL);
+	if (!vpriv)
 		return -ENOMEM;
 	vpriv->xoff = 0;
 	vpriv->old_pop = vcc->pop;
@@ -2340,18 +2324,19 @@ static void lec_vcc_close(struct lec_priv *priv, struct atm_vcc *vcc)
 	spin_lock_irqsave(&priv->lec_arp_lock, flags);
 
 	for (i = 0; i < LEC_ARP_TABLE_SIZE; i++) {
-		hlist_for_each_entry_safe(entry, node, next, &priv->lec_arp_tables[i], next) {
+		hlist_for_each_entry_safe(entry, node, next,
+					  &priv->lec_arp_tables[i], next) {
 			if (vcc == entry->vcc) {
 				lec_arp_remove(priv, entry);
 				lec_arp_put(entry);
-				if (priv->mcast_vcc == vcc) {
+				if (priv->mcast_vcc == vcc)
 					priv->mcast_vcc = NULL;
-				}
 			}
 		}
 	}
 
-	hlist_for_each_entry_safe(entry, node, next, &priv->lec_arp_empty_ones, next) {
+	hlist_for_each_entry_safe(entry, node, next,
+				  &priv->lec_arp_empty_ones, next) {
 		if (entry->vcc == vcc) {
 			lec_arp_clear_vccs(entry);
 			del_timer(&entry->timer);
@@ -2360,7 +2345,8 @@ static void lec_vcc_close(struct lec_priv *priv, struct atm_vcc *vcc)
 		}
 	}
 
-	hlist_for_each_entry_safe(entry, node, next, &priv->lec_no_forward, next) {
+	hlist_for_each_entry_safe(entry, node, next,
+				  &priv->lec_no_forward, next) {
 		if (entry->recv_vcc == vcc) {
 			lec_arp_clear_vccs(entry);
 			del_timer(&entry->timer);
@@ -2401,14 +2387,16 @@ lec_arp_check_empties(struct lec_priv *priv,
 		src = hdr->h_source;
 
 	spin_lock_irqsave(&priv->lec_arp_lock, flags);
-	hlist_for_each_entry_safe(entry, node, next, &priv->lec_arp_empty_ones, next) {
+	hlist_for_each_entry_safe(entry, node, next,
+				  &priv->lec_arp_empty_ones, next) {
 		if (vcc == entry->vcc) {
 			del_timer(&entry->timer);
 			memcpy(entry->mac_addr, src, ETH_ALEN);
 			entry->status = ESI_FORWARD_DIRECT;
 			entry->last_used = jiffies;
 			/* We might have got an entry */
-			if ((tmp = lec_arp_find(priv, src))) {
+			tmp = lec_arp_find(priv, src);
+			if (tmp) {
 				lec_arp_remove(priv, tmp);
 				lec_arp_put(tmp);
 			}
-- 
cgit v1.2.2


From 5710044073ea734c0d7806d7fb9cd6308053704b Mon Sep 17 00:00:00 2001
From: Joe Perches <joe@perches.com>
Date: Tue, 26 Jan 2010 11:40:09 +0000
Subject: net/atm/mpc.c: checkpatch cleanups

Convert #include <asm... to #include <linux...
Add printk argument verification to dprintk and ddprintk
Spacing cleanups
Mostly 80 column wrapped.
printk->pr_info and pr_cont
Moved labels to column 1
Move trailing statements to new lines
switch/case cleanups
	remove unnecessary breaks after returns
Remove unnecessary braces around single line statements

Signed-off-by: Joe Perches <joe@perches.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/atm/mpc.c | 445 +++++++++++++++++++++++++++++++++-------------------------
 1 file changed, 252 insertions(+), 193 deletions(-)

(limited to 'net')

diff --git a/net/atm/mpc.c b/net/atm/mpc.c
index 3c45aef47b7e..55dba22e44b3 100644
--- a/net/atm/mpc.c
+++ b/net/atm/mpc.c
@@ -15,8 +15,8 @@
 #include <net/sock.h>
 #include <linux/skbuff.h>
 #include <linux/ip.h>
+#include <linux/uaccess.h>
 #include <asm/byteorder.h>
-#include <asm/uaccess.h>
 #include <net/checksum.h>   /* for ip_fast_csum() */
 #include <net/arp.h>
 #include <net/dst.h>
@@ -38,15 +38,17 @@
  */
 
 #if 0
-#define dprintk printk   /* debug */
+#define dprintk(format, args...) printk(KERN_DEBUG format, ##args)   /* debug */
 #else
-#define dprintk(format,args...)
+#define dprintk(format, args...)			\
+	do { if (0) printk(KERN_DEBUG format, ##args); } while (0)
 #endif
 
 #if 0
-#define ddprintk printk  /* more debug */
+#define ddprintk printk(KERN_DEBUG format, ##args)  /* more debug */
 #else
-#define ddprintk(format,args...)
+#define ddprintk(format, args...)			\
+	do { if (0) printk(KERN_DEBUG format, ##args); } while (0)
 #endif
 
 
@@ -54,15 +56,19 @@
 #define MPOA_TAG_LEN 4
 
 /* mpc_daemon -> kernel */
-static void MPOA_trigger_rcvd (struct k_message *msg, struct mpoa_client *mpc);
+static void MPOA_trigger_rcvd(struct k_message *msg, struct mpoa_client *mpc);
 static void MPOA_res_reply_rcvd(struct k_message *msg, struct mpoa_client *mpc);
 static void ingress_purge_rcvd(struct k_message *msg, struct mpoa_client *mpc);
 static void egress_purge_rcvd(struct k_message *msg, struct mpoa_client *mpc);
 static void mps_death(struct k_message *msg, struct mpoa_client *mpc);
-static void clean_up(struct k_message *msg, struct mpoa_client *mpc, int action);
-static void MPOA_cache_impos_rcvd(struct k_message *msg, struct mpoa_client *mpc);
-static void set_mpc_ctrl_addr_rcvd(struct k_message *mesg, struct mpoa_client *mpc);
-static void set_mps_mac_addr_rcvd(struct k_message *mesg, struct mpoa_client *mpc);
+static void clean_up(struct k_message *msg, struct mpoa_client *mpc,
+		     int action);
+static void MPOA_cache_impos_rcvd(struct k_message *msg,
+				  struct mpoa_client *mpc);
+static void set_mpc_ctrl_addr_rcvd(struct k_message *mesg,
+				   struct mpoa_client *mpc);
+static void set_mps_mac_addr_rcvd(struct k_message *mesg,
+				  struct mpoa_client *mpc);
 
 static const uint8_t *copy_macs(struct mpoa_client *mpc,
 				const uint8_t *router_mac,
@@ -76,10 +82,11 @@ static int msg_from_mpoad(struct atm_vcc *vcc, struct sk_buff *skb);
 
 static void mpc_push(struct atm_vcc *vcc, struct sk_buff *skb);
 static netdev_tx_t mpc_send_packet(struct sk_buff *skb,
-					 struct net_device *dev);
-static int mpoa_event_listener(struct notifier_block *mpoa_notifier, unsigned long event, void *dev);
+				   struct net_device *dev);
+static int mpoa_event_listener(struct notifier_block *mpoa_notifier,
+			       unsigned long event, void *dev);
 static void mpc_timer_refresh(void);
-static void mpc_cache_check( unsigned long checking_time  );
+static void mpc_cache_check(unsigned long checking_time);
 
 static struct llc_snap_hdr llc_snap_mpoa_ctrl = {
 	0xaa, 0xaa, 0x03,
@@ -169,7 +176,7 @@ struct atm_mpoa_qos *atm_mpoa_add_qos(__be32 dst_ip, struct atm_qos *qos)
 
 	entry = kmalloc(sizeof(struct atm_mpoa_qos), GFP_KERNEL);
 	if (entry == NULL) {
-		printk("mpoa: atm_mpoa_add_qos: out of memory\n");
+		pr_info("mpoa: out of memory\n");
 		return entry;
 	}
 
@@ -187,10 +194,9 @@ struct atm_mpoa_qos *atm_mpoa_search_qos(__be32 dst_ip)
 	struct atm_mpoa_qos *qos;
 
 	qos = qos_head;
-	while( qos != NULL ){
-		if(qos->ipaddr == dst_ip) {
+	while (qos) {
+		if (qos->ipaddr == dst_ip)
 			break;
-		}
 		qos = qos->next;
 	}
 
@@ -202,10 +208,10 @@ struct atm_mpoa_qos *atm_mpoa_search_qos(__be32 dst_ip)
  */
 int atm_mpoa_delete_qos(struct atm_mpoa_qos *entry)
 {
-
 	struct atm_mpoa_qos *curr;
 
-	if (entry == NULL) return 0;
+	if (entry == NULL)
+		return 0;
 	if (entry == qos_head) {
 		qos_head = qos_head->next;
 		kfree(entry);
@@ -236,9 +242,17 @@ void atm_mpoa_disp_qos(struct seq_file *m)
 
 	while (qos != NULL) {
 		seq_printf(m, "%pI4\n     %-7d %-7d %-7d %-7d %-7d\n     %-7d %-7d %-7d %-7d %-7d\n",
-				&qos->ipaddr,
-				qos->qos.txtp.max_pcr, qos->qos.txtp.pcr, qos->qos.txtp.min_pcr, qos->qos.txtp.max_cdv, qos->qos.txtp.max_sdu,
-				qos->qos.rxtp.max_pcr, qos->qos.rxtp.pcr, qos->qos.rxtp.min_pcr, qos->qos.rxtp.max_cdv, qos->qos.rxtp.max_sdu);
+			   &qos->ipaddr,
+			   qos->qos.txtp.max_pcr,
+			   qos->qos.txtp.pcr,
+			   qos->qos.txtp.min_pcr,
+			   qos->qos.txtp.max_cdv,
+			   qos->qos.txtp.max_sdu,
+			   qos->qos.rxtp.max_pcr,
+			   qos->qos.rxtp.pcr,
+			   qos->qos.rxtp.min_pcr,
+			   qos->qos.rxtp.max_cdv,
+			   qos->qos.rxtp.max_sdu);
 		qos = qos->next;
 	}
 }
@@ -258,7 +272,7 @@ static struct mpoa_client *alloc_mpc(void)
 {
 	struct mpoa_client *mpc;
 
-	mpc = kzalloc(sizeof (struct mpoa_client), GFP_KERNEL);
+	mpc = kzalloc(sizeof(struct mpoa_client), GFP_KERNEL);
 	if (mpc == NULL)
 		return NULL;
 	rwlock_init(&mpc->ingress_lock);
@@ -268,7 +282,7 @@ static struct mpoa_client *alloc_mpc(void)
 
 	mpc->parameters.mpc_p1 = MPC_P1;
 	mpc->parameters.mpc_p2 = MPC_P2;
-	memset(mpc->parameters.mpc_p3,0,sizeof(mpc->parameters.mpc_p3));
+	memset(mpc->parameters.mpc_p3, 0, sizeof(mpc->parameters.mpc_p3));
 	mpc->parameters.mpc_p4 = MPC_P4;
 	mpc->parameters.mpc_p5 = MPC_P5;
 	mpc->parameters.mpc_p6 = MPC_P6;
@@ -290,7 +304,7 @@ static void start_mpc(struct mpoa_client *mpc, struct net_device *dev)
 
 	dprintk("mpoa: (%s) start_mpc:\n", mpc->dev->name);
 	if (!dev->netdev_ops)
-		printk("mpoa: (%s) start_mpc  not starting\n", dev->name);
+		pr_info("(%s) not starting\n", dev->name);
 	else {
 		mpc->old_ops = dev->netdev_ops;
 		mpc->new_ops = *mpc->old_ops;
@@ -321,25 +335,18 @@ static const char *mpoa_device_type_string(char type) __attribute__ ((unused));
 
 static const char *mpoa_device_type_string(char type)
 {
-	switch(type) {
+	switch (type) {
 	case NON_MPOA:
 		return "non-MPOA device";
-		break;
 	case MPS:
 		return "MPS";
-		break;
 	case MPC:
 		return "MPC";
-		break;
 	case MPS_AND_MPC:
 		return "both MPS and MPC";
-		break;
-	default:
-		return "unspecified (non-MPOA) device";
-		break;
 	}
 
-	return ""; /* not reached */
+	return "unspecified (non-MPOA) device";
 }
 
 /*
@@ -368,22 +375,24 @@ static void lane2_assoc_ind(struct net_device *dev, const u8 *mac_addr,
 	dprintk("total length of all TLVs %d\n", sizeoftlvs);
 	mpc = find_mpc_by_lec(dev); /* Sampo-Fix: moved here from below */
 	if (mpc == NULL) {
-		printk("mpoa: (%s) lane2_assoc_ind: no mpc\n", dev->name);
+		pr_info("(%s) no mpc\n", dev->name);
 		return;
 	}
 	end_of_tlvs = tlvs + sizeoftlvs;
 	while (end_of_tlvs - tlvs >= 5) {
-		type = (tlvs[0] << 24) | (tlvs[1] << 16) | (tlvs[2] << 8) | tlvs[3];
+		type = ((tlvs[0] << 24) | (tlvs[1] << 16) |
+			(tlvs[2] << 8) | tlvs[3]);
 		length = tlvs[4];
 		tlvs += 5;
 		dprintk("    type 0x%x length %02x\n", type, length);
 		if (tlvs + length > end_of_tlvs) {
-			printk("TLV value extends past its buffer, aborting parse\n");
+			pr_info("TLV value extends past its buffer, aborting parse\n");
 			return;
 		}
 
 		if (type == 0) {
-			printk("mpoa: (%s) lane2_assoc_ind: TLV type was 0, returning\n", dev->name);
+			pr_info("mpoa: (%s) TLV type was 0, returning\n",
+				dev->name);
 			return;
 		}
 
@@ -393,39 +402,47 @@ static void lane2_assoc_ind(struct net_device *dev, const u8 *mac_addr,
 		}
 		mpoa_device_type = *tlvs++;
 		number_of_mps_macs = *tlvs++;
-		dprintk("mpoa: (%s) MPOA device type '%s', ", dev->name, mpoa_device_type_string(mpoa_device_type));
+		dprintk("mpoa: (%s) MPOA device type '%s', ",
+			dev->name, mpoa_device_type_string(mpoa_device_type));
 		if (mpoa_device_type == MPS_AND_MPC &&
 		    length < (42 + number_of_mps_macs*ETH_ALEN)) { /* :) */
-			printk("\nmpoa: (%s) lane2_assoc_ind: short MPOA Device Type TLV\n",
-			       dev->name);
+			pr_info("(%s) short MPOA Device Type TLV\n",
+				dev->name);
 			continue;
 		}
-		if ((mpoa_device_type == MPS || mpoa_device_type == MPC)
-		    && length < 22 + number_of_mps_macs*ETH_ALEN) {
-			printk("\nmpoa: (%s) lane2_assoc_ind: short MPOA Device Type TLV\n",
-				dev->name);
+		if ((mpoa_device_type == MPS || mpoa_device_type == MPC) &&
+		    length < 22 + number_of_mps_macs*ETH_ALEN) {
+			pr_info("(%s) short MPOA Device Type TLV\n", dev->name);
 			continue;
 		}
-		if (mpoa_device_type != MPS && mpoa_device_type != MPS_AND_MPC) {
+		if (mpoa_device_type != MPS &&
+		    mpoa_device_type != MPS_AND_MPC) {
 			dprintk("ignoring non-MPS device\n");
-			if (mpoa_device_type == MPC) tlvs += 20;
+			if (mpoa_device_type == MPC)
+				tlvs += 20;
 			continue;  /* we are only interested in MPSs */
 		}
-		if (number_of_mps_macs == 0 && mpoa_device_type == MPS_AND_MPC) {
-			printk("\nmpoa: (%s) lane2_assoc_ind: MPS_AND_MPC has zero MACs\n", dev->name);
+		if (number_of_mps_macs == 0 &&
+		    mpoa_device_type == MPS_AND_MPC) {
+			pr_info("(%s) MPS_AND_MPC has zero MACs\n", dev->name);
 			continue;  /* someone should read the spec */
 		}
 		dprintk("this MPS has %d MAC addresses\n", number_of_mps_macs);
 
-		/* ok, now we can go and tell our daemon the control address of MPS */
+		/*
+		 * ok, now we can go and tell our daemon
+		 * the control address of MPS
+		 */
 		send_set_mps_ctrl_addr(tlvs, mpc);
 
-		tlvs = copy_macs(mpc, mac_addr, tlvs, number_of_mps_macs, mpoa_device_type);
-		if (tlvs == NULL) return;
+		tlvs = copy_macs(mpc, mac_addr, tlvs,
+				 number_of_mps_macs, mpoa_device_type);
+		if (tlvs == NULL)
+			return;
 	}
 	if (end_of_tlvs - tlvs != 0)
-		printk("mpoa: (%s) lane2_assoc_ind: ignoring %Zd bytes of trailing TLV carbage\n",
-		       dev->name, end_of_tlvs - tlvs);
+		pr_info("(%s) ignoring %Zd bytes of trailing TLV garbage\n",
+			dev->name, end_of_tlvs - tlvs);
 	return;
 }
 
@@ -443,11 +460,12 @@ static const uint8_t *copy_macs(struct mpoa_client *mpc,
 	num_macs = (mps_macs > 1) ? mps_macs : 1;
 
 	if (mpc->number_of_mps_macs != num_macs) { /* need to reallocate? */
-		if (mpc->number_of_mps_macs != 0) kfree(mpc->mps_macs);
+		if (mpc->number_of_mps_macs != 0)
+			kfree(mpc->mps_macs);
 		mpc->number_of_mps_macs = 0;
-		mpc->mps_macs = kmalloc(num_macs*ETH_ALEN, GFP_KERNEL);
+		mpc->mps_macs = kmalloc(num_macs * ETH_ALEN, GFP_KERNEL);
 		if (mpc->mps_macs == NULL) {
-			printk("mpoa: (%s) copy_macs: out of mem\n", mpc->dev->name);
+			pr_info("(%s) out of mem\n", mpc->dev->name);
 			return NULL;
 		}
 	}
@@ -480,24 +498,30 @@ static int send_via_shortcut(struct sk_buff *skb, struct mpoa_client *mpc)
 	iph = (struct iphdr *)buff;
 	ipaddr = iph->daddr;
 
-	ddprintk("mpoa: (%s) send_via_shortcut: ipaddr 0x%x\n", mpc->dev->name, ipaddr);
+	ddprintk("mpoa: (%s) send_via_shortcut: ipaddr 0x%x\n",
+		 mpc->dev->name, ipaddr);
 
 	entry = mpc->in_ops->get(ipaddr, mpc);
 	if (entry == NULL) {
 		entry = mpc->in_ops->add_entry(ipaddr, mpc);
-		if (entry != NULL) mpc->in_ops->put(entry);
+		if (entry != NULL)
+			mpc->in_ops->put(entry);
 		return 1;
 	}
-	if (mpc->in_ops->cache_hit(entry, mpc) != OPEN){   /* threshold not exceeded or VCC not ready */
-		ddprintk("mpoa: (%s) send_via_shortcut: cache_hit: returns != OPEN\n", mpc->dev->name);
+	/* threshold not exceeded or VCC not ready */
+	if (mpc->in_ops->cache_hit(entry, mpc) != OPEN) {
+		ddprintk("mpoa: (%s) send_via_shortcut: cache_hit: returns != OPEN\n",
+			 mpc->dev->name);
 		mpc->in_ops->put(entry);
 		return 1;
 	}
 
-	ddprintk("mpoa: (%s) send_via_shortcut: using shortcut\n", mpc->dev->name);
+	ddprintk("mpoa: (%s) send_via_shortcut: using shortcut\n",
+		 mpc->dev->name);
 	/* MPOA spec A.1.4, MPOA client must decrement IP ttl at least by one */
 	if (iph->ttl <= 1) {
-		ddprintk("mpoa: (%s) send_via_shortcut: IP ttl = %u, using LANE\n", mpc->dev->name, iph->ttl);
+		ddprintk("mpoa: (%s) send_via_shortcut: IP ttl = %u, using LANE\n",
+			 mpc->dev->name, iph->ttl);
 		mpc->in_ops->put(entry);
 		return 1;
 	}
@@ -506,15 +530,18 @@ static int send_via_shortcut(struct sk_buff *skb, struct mpoa_client *mpc)
 	iph->check = ip_fast_csum((unsigned char *)iph, iph->ihl);
 
 	if (entry->ctrl_info.tag != 0) {
-		ddprintk("mpoa: (%s) send_via_shortcut: adding tag 0x%x\n", mpc->dev->name, entry->ctrl_info.tag);
+		ddprintk("mpoa: (%s) send_via_shortcut: adding tag 0x%x\n",
+			 mpc->dev->name, entry->ctrl_info.tag);
 		tagged_llc_snap_hdr.tag = entry->ctrl_info.tag;
-		skb_pull(skb, ETH_HLEN);                       /* get rid of Eth header */
-		skb_push(skb, sizeof(tagged_llc_snap_hdr));    /* add LLC/SNAP header   */
+		skb_pull(skb, ETH_HLEN);	/* get rid of Eth header */
+		skb_push(skb, sizeof(tagged_llc_snap_hdr));
+						/* add LLC/SNAP header   */
 		skb_copy_to_linear_data(skb, &tagged_llc_snap_hdr,
 					sizeof(tagged_llc_snap_hdr));
 	} else {
-		skb_pull(skb, ETH_HLEN);                        /* get rid of Eth header */
-		skb_push(skb, sizeof(struct llc_snap_hdr));     /* add LLC/SNAP header + tag  */
+		skb_pull(skb, ETH_HLEN);	/* get rid of Eth header */
+		skb_push(skb, sizeof(struct llc_snap_hdr));
+						/* add LLC/SNAP header + tag  */
 		skb_copy_to_linear_data(skb, &llc_snap_mpoa_data,
 					sizeof(struct llc_snap_hdr));
 	}
@@ -539,8 +566,8 @@ static netdev_tx_t mpc_send_packet(struct sk_buff *skb,
 	int i = 0;
 
 	mpc = find_mpc_by_lec(dev); /* this should NEVER fail */
-	if(mpc == NULL) {
-		printk("mpoa: (%s) mpc_send_packet: no MPC found\n", dev->name);
+	if (mpc == NULL) {
+		pr_info("(%s) no MPC found\n", dev->name);
 		goto non_ip;
 	}
 
@@ -556,14 +583,15 @@ static netdev_tx_t mpc_send_packet(struct sk_buff *skb,
 		goto non_ip;
 
 	while (i < mpc->number_of_mps_macs) {
-		if (!compare_ether_addr(eth->h_dest, (mpc->mps_macs + i*ETH_ALEN)))
-			if ( send_via_shortcut(skb, mpc) == 0 )           /* try shortcut */
-				return NETDEV_TX_OK;                      /* success!     */
+		if (!compare_ether_addr(eth->h_dest,
+					(mpc->mps_macs + i*ETH_ALEN)))
+			if (send_via_shortcut(skb, mpc) == 0) /* try shortcut */
+				return NETDEV_TX_OK;
 		i++;
 	}
 
- non_ip:
-	return mpc->old_ops->ndo_start_xmit(skb,dev);
+non_ip:
+	return mpc->old_ops->ndo_start_xmit(skb, dev);
 }
 
 static int atm_mpoa_vcc_attach(struct atm_vcc *vcc, void __user *arg)
@@ -576,7 +604,8 @@ static int atm_mpoa_vcc_attach(struct atm_vcc *vcc, void __user *arg)
 
 	bytes_left = copy_from_user(&ioc_data, arg, sizeof(struct atmmpc_ioc));
 	if (bytes_left != 0) {
-		printk("mpoa: mpc_vcc_attach: Short read (missed %d bytes) from userland\n", bytes_left);
+		pr_info("mpoa:Short read (missed %d bytes) from userland\n",
+			bytes_left);
 		return -EFAULT;
 	}
 	ipaddr = ioc_data.ipaddr;
@@ -589,18 +618,20 @@ static int atm_mpoa_vcc_attach(struct atm_vcc *vcc, void __user *arg)
 
 	if (ioc_data.type == MPC_SOCKET_INGRESS) {
 		in_entry = mpc->in_ops->get(ipaddr, mpc);
-		if (in_entry == NULL || in_entry->entry_state < INGRESS_RESOLVED) {
-			printk("mpoa: (%s) mpc_vcc_attach: did not find RESOLVED entry from ingress cache\n",
+		if (in_entry == NULL ||
+		    in_entry->entry_state < INGRESS_RESOLVED) {
+			pr_info("(%s) did not find RESOLVED entry from ingress cache\n",
 				mpc->dev->name);
-			if (in_entry != NULL) mpc->in_ops->put(in_entry);
+			if (in_entry != NULL)
+				mpc->in_ops->put(in_entry);
 			return -EINVAL;
 		}
-		printk("mpoa: (%s) mpc_vcc_attach: attaching ingress SVC, entry = %pI4\n",
-		       mpc->dev->name, &in_entry->ctrl_info.in_dst_ip);
+		pr_info("(%s) attaching ingress SVC, entry = %pI4\n",
+			mpc->dev->name, &in_entry->ctrl_info.in_dst_ip);
 		in_entry->shortcut = vcc;
 		mpc->in_ops->put(in_entry);
 	} else {
-		printk("mpoa: (%s) mpc_vcc_attach: attaching egress SVC\n", mpc->dev->name);
+		pr_info("(%s) attaching egress SVC\n", mpc->dev->name);
 	}
 
 	vcc->proto_data = mpc->dev;
@@ -620,7 +651,7 @@ static void mpc_vcc_close(struct atm_vcc *vcc, struct net_device *dev)
 
 	mpc = find_mpc_by_lec(dev);
 	if (mpc == NULL) {
-		printk("mpoa: (%s) mpc_vcc_close: close for unknown MPC\n", dev->name);
+		pr_info("(%s) close for unknown MPC\n", dev->name);
 		return;
 	}
 
@@ -628,19 +659,21 @@ static void mpc_vcc_close(struct atm_vcc *vcc, struct net_device *dev)
 	in_entry = mpc->in_ops->get_by_vcc(vcc, mpc);
 	if (in_entry) {
 		dprintk("mpoa: (%s) mpc_vcc_close: ingress SVC closed ip = %pI4\n",
-		       mpc->dev->name, &in_entry->ctrl_info.in_dst_ip);
+			mpc->dev->name, &in_entry->ctrl_info.in_dst_ip);
 		in_entry->shortcut = NULL;
 		mpc->in_ops->put(in_entry);
 	}
 	eg_entry = mpc->eg_ops->get_by_vcc(vcc, mpc);
 	if (eg_entry) {
-		dprintk("mpoa: (%s) mpc_vcc_close: egress SVC closed\n", mpc->dev->name);
+		dprintk("mpoa: (%s) mpc_vcc_close: egress SVC closed\n",
+			mpc->dev->name);
 		eg_entry->shortcut = NULL;
 		mpc->eg_ops->put(eg_entry);
 	}
 
 	if (in_entry == NULL && eg_entry == NULL)
-		dprintk("mpoa: (%s) mpc_vcc_close:  unused vcc closed\n", dev->name);
+		dprintk("mpoa: (%s) mpc_vcc_close:  unused vcc closed\n",
+			dev->name);
 
 	return;
 }
@@ -656,16 +689,19 @@ static void mpc_push(struct atm_vcc *vcc, struct sk_buff *skb)
 
 	ddprintk("mpoa: (%s) mpc_push:\n", dev->name);
 	if (skb == NULL) {
-		dprintk("mpoa: (%s) mpc_push: null skb, closing VCC\n", dev->name);
+		dprintk("mpoa: (%s) mpc_push: null skb, closing VCC\n",
+			dev->name);
 		mpc_vcc_close(vcc, dev);
 		return;
 	}
 
 	skb->dev = dev;
-	if (memcmp(skb->data, &llc_snap_mpoa_ctrl, sizeof(struct llc_snap_hdr)) == 0) {
+	if (memcmp(skb->data, &llc_snap_mpoa_ctrl,
+		   sizeof(struct llc_snap_hdr)) == 0) {
 		struct sock *sk = sk_atm(vcc);
 
-		dprintk("mpoa: (%s) mpc_push: control packet arrived\n", dev->name);
+		dprintk("mpoa: (%s) mpc_push: control packet arrived\n",
+			dev->name);
 		/* Pass control packets to daemon */
 		skb_queue_tail(&sk->sk_receive_queue, skb);
 		sk->sk_data_ready(sk, skb->len);
@@ -677,20 +713,23 @@ static void mpc_push(struct atm_vcc *vcc, struct sk_buff *skb)
 
 	mpc = find_mpc_by_lec(dev);
 	if (mpc == NULL) {
-		printk("mpoa: (%s) mpc_push: unknown MPC\n", dev->name);
+		pr_info("(%s) unknown MPC\n", dev->name);
 		return;
 	}
 
-	if (memcmp(skb->data, &llc_snap_mpoa_data_tagged, sizeof(struct llc_snap_hdr)) == 0) { /* MPOA tagged data */
-		ddprintk("mpoa: (%s) mpc_push: tagged data packet arrived\n", dev->name);
+	if (memcmp(skb->data, &llc_snap_mpoa_data_tagged,
+		   sizeof(struct llc_snap_hdr)) == 0) { /* MPOA tagged data */
+		ddprintk("mpoa: (%s) mpc_push: tagged data packet arrived\n",
+			 dev->name);
 
-	} else if (memcmp(skb->data, &llc_snap_mpoa_data, sizeof(struct llc_snap_hdr)) == 0) { /* MPOA data */
-		printk("mpoa: (%s) mpc_push: non-tagged data packet arrived\n", dev->name);
-		printk("           mpc_push: non-tagged data unsupported, purging\n");
+	} else if (memcmp(skb->data, &llc_snap_mpoa_data,
+			  sizeof(struct llc_snap_hdr)) == 0) { /* MPOA data */
+		pr_info("(%s) Unsupported non-tagged data packet arrived.  Purging\n",
+			dev->name);
 		dev_kfree_skb_any(skb);
 		return;
 	} else {
-		printk("mpoa: (%s) mpc_push: garbage arrived, purging\n", dev->name);
+		pr_info("(%s) garbage arrived, purging\n", dev->name);
 		dev_kfree_skb_any(skb);
 		return;
 	}
@@ -700,8 +739,8 @@ static void mpc_push(struct atm_vcc *vcc, struct sk_buff *skb)
 
 	eg = mpc->eg_ops->get_by_tag(tag, mpc);
 	if (eg == NULL) {
-		printk("mpoa: (%s) mpc_push: Didn't find egress cache entry, tag = %u\n",
-		       dev->name,tag);
+		pr_info("mpoa: (%s) Didn't find egress cache entry, tag = %u\n",
+			dev->name, tag);
 		purge_egress_shortcut(vcc, NULL);
 		dev_kfree_skb_any(skb);
 		return;
@@ -713,13 +752,15 @@ static void mpc_push(struct atm_vcc *vcc, struct sk_buff *skb)
 	 */
 	if (eg->shortcut == NULL) {
 		eg->shortcut = vcc;
-		printk("mpoa: (%s) mpc_push: egress SVC in use\n", dev->name);
+		pr_info("(%s) egress SVC in use\n", dev->name);
 	}
 
-	skb_pull(skb, sizeof(struct llc_snap_hdr) + sizeof(tag)); /* get rid of LLC/SNAP header */
-	new_skb = skb_realloc_headroom(skb, eg->ctrl_info.DH_length); /* LLC/SNAP is shorter than MAC header :( */
+	skb_pull(skb, sizeof(struct llc_snap_hdr) + sizeof(tag));
+					/* get rid of LLC/SNAP header */
+	new_skb = skb_realloc_headroom(skb, eg->ctrl_info.DH_length);
+					/* LLC/SNAP is shorter than MAC header :( */
 	dev_kfree_skb_any(skb);
-	if (new_skb == NULL){
+	if (new_skb == NULL) {
 		mpc->eg_ops->put(eg);
 		return;
 	}
@@ -752,7 +793,7 @@ static struct atm_dev mpc_dev = {
 	/* members not explicitly initialised will be 0 */
 };
 
-static int atm_mpoa_mpoad_attach (struct atm_vcc *vcc, int arg)
+static int atm_mpoa_mpoad_attach(struct atm_vcc *vcc, int arg)
 {
 	struct mpoa_client *mpc;
 	struct lec_priv *priv;
@@ -772,15 +813,17 @@ static int atm_mpoa_mpoad_attach (struct atm_vcc *vcc, int arg)
 
 	mpc = find_mpc_by_itfnum(arg);
 	if (mpc == NULL) {
-		dprintk("mpoa: mpoad_attach: allocating new mpc for itf %d\n", arg);
+		dprintk("mpoa: mpoad_attach: allocating new mpc for itf %d\n",
+			arg);
 		mpc = alloc_mpc();
 		if (mpc == NULL)
 			return -ENOMEM;
 		mpc->dev_num = arg;
-		mpc->dev = find_lec_by_itfnum(arg); /* NULL if there was no lec */
+		mpc->dev = find_lec_by_itfnum(arg);
+					/* NULL if there was no lec */
 	}
 	if (mpc->mpoad_vcc) {
-		printk("mpoa: mpoad_attach: mpoad is already present for itf %d\n", arg);
+		pr_info("mpoad is already present for itf %d\n", arg);
 		return -EADDRINUSE;
 	}
 
@@ -796,8 +839,8 @@ static int atm_mpoa_mpoad_attach (struct atm_vcc *vcc, int arg)
 	mpc->mpoad_vcc = vcc;
 	vcc->dev = &mpc_dev;
 	vcc_insert_socket(sk_atm(vcc));
-	set_bit(ATM_VF_META,&vcc->flags);
-	set_bit(ATM_VF_READY,&vcc->flags);
+	set_bit(ATM_VF_META, &vcc->flags);
+	set_bit(ATM_VF_READY, &vcc->flags);
 
 	if (mpc->dev) {
 		char empty[ATM_ESA_LEN];
@@ -807,7 +850,7 @@ static int atm_mpoa_mpoad_attach (struct atm_vcc *vcc, int arg)
 		/* set address if mpcd e.g. gets killed and restarted.
 		 * If we do not do it now we have to wait for the next LE_ARP
 		 */
-		if ( memcmp(mpc->mps_ctrl_addr, empty, ATM_ESA_LEN) != 0 )
+		if (memcmp(mpc->mps_ctrl_addr, empty, ATM_ESA_LEN) != 0)
 			send_set_mps_ctrl_addr(mpc->mps_ctrl_addr, mpc);
 	}
 
@@ -819,7 +862,7 @@ static void send_set_mps_ctrl_addr(const char *addr, struct mpoa_client *mpc)
 {
 	struct k_message mesg;
 
-	memcpy (mpc->mps_ctrl_addr, addr, ATM_ESA_LEN);
+	memcpy(mpc->mps_ctrl_addr, addr, ATM_ESA_LEN);
 
 	mesg.type = SET_MPS_CTRL_ADDR;
 	memcpy(mesg.MPS_ctrl, addr, ATM_ESA_LEN);
@@ -835,11 +878,11 @@ static void mpoad_close(struct atm_vcc *vcc)
 
 	mpc = find_mpc_by_vcc(vcc);
 	if (mpc == NULL) {
-		printk("mpoa: mpoad_close: did not find MPC\n");
+		pr_info("did not find MPC\n");
 		return;
 	}
 	if (!mpc->mpoad_vcc) {
-		printk("mpoa: mpoad_close: close for non-present mpoad\n");
+		pr_info("close for non-present mpoad\n");
 		return;
 	}
 
@@ -859,7 +902,7 @@ static void mpoad_close(struct atm_vcc *vcc)
 		kfree_skb(skb);
 	}
 
-	printk("mpoa: (%s) going down\n",
+	pr_info("(%s) going down\n",
 		(mpc->dev) ? mpc->dev->name : "<unknown>");
 	module_put(THIS_MODULE);
 
@@ -873,15 +916,16 @@ static int msg_from_mpoad(struct atm_vcc *vcc, struct sk_buff *skb)
 {
 
 	struct mpoa_client *mpc = find_mpc_by_vcc(vcc);
-	struct k_message *mesg = (struct k_message*)skb->data;
+	struct k_message *mesg = (struct k_message *)skb->data;
 	atomic_sub(skb->truesize, &sk_atm(vcc)->sk_wmem_alloc);
 
 	if (mpc == NULL) {
-		printk("mpoa: msg_from_mpoad: no mpc found\n");
+		pr_info("no mpc found\n");
 		return 0;
 	}
-	dprintk("mpoa: (%s) msg_from_mpoad:", (mpc->dev) ? mpc->dev->name : "<unknown>");
-	switch(mesg->type) {
+	dprintk("mpoa: (%s) msg_from_mpoad:",
+		(mpc->dev) ? mpc->dev->name : "<unknown>");
+	switch (mesg->type) {
 	case MPOA_RES_REPLY_RCVD:
 		dprintk(" mpoa_res_reply_rcvd\n");
 		MPOA_res_reply_rcvd(mesg, mpc);
@@ -942,7 +986,7 @@ int msg_to_mpoad(struct k_message *mesg, struct mpoa_client *mpc)
 	struct sock *sk;
 
 	if (mpc == NULL || !mpc->mpoad_vcc) {
-		printk("mpoa: msg_to_mpoad: mesg %d to a non-existent mpoad\n", mesg->type);
+		pr_info("mesg %d to a non-existent mpoad\n", mesg->type);
 		return -ENXIO;
 	}
 
@@ -960,7 +1004,8 @@ int msg_to_mpoad(struct k_message *mesg, struct mpoa_client *mpc)
 	return 0;
 }
 
-static int mpoa_event_listener(struct notifier_block *mpoa_notifier, unsigned long event, void *dev_ptr)
+static int mpoa_event_listener(struct notifier_block *mpoa_notifier,
+			       unsigned long event, void *dev_ptr)
 {
 	struct net_device *dev;
 	struct mpoa_client *mpc;
@@ -983,10 +1028,10 @@ static int mpoa_event_listener(struct notifier_block *mpoa_notifier, unsigned lo
 		mpc = find_mpc_by_itfnum(priv->itfnum);
 		if (mpc == NULL) {
 			dprintk("mpoa: mpoa_event_listener: allocating new mpc for %s\n",
-			       dev->name);
+				dev->name);
 			mpc = alloc_mpc();
 			if (mpc == NULL) {
-				printk("mpoa: mpoa_event_listener: no new mpc");
+				pr_info("no new mpc");
 				break;
 			}
 		}
@@ -1010,9 +1055,8 @@ static int mpoa_event_listener(struct notifier_block *mpoa_notifier, unsigned lo
 		mpc = find_mpc_by_lec(dev);
 		if (mpc == NULL)
 			break;
-		if (mpc->mpoad_vcc != NULL) {
+		if (mpc->mpoad_vcc != NULL)
 			start_mpc(mpc, dev);
-		}
 		break;
 	case NETDEV_DOWN:
 		/* the dev was ifconfig'ed down */
@@ -1022,9 +1066,8 @@ static int mpoa_event_listener(struct notifier_block *mpoa_notifier, unsigned lo
 		mpc = find_mpc_by_lec(dev);
 		if (mpc == NULL)
 			break;
-		if (mpc->mpoad_vcc != NULL) {
+		if (mpc->mpoad_vcc != NULL)
 			stop_mpc(mpc);
-		}
 		break;
 	case NETDEV_REBOOT:
 	case NETDEV_CHANGE:
@@ -1051,7 +1094,7 @@ static void MPOA_trigger_rcvd(struct k_message *msg, struct mpoa_client *mpc)
 	in_cache_entry *entry;
 
 	entry = mpc->in_ops->get(dst_ip, mpc);
-	if(entry == NULL){
+	if (entry == NULL) {
 		entry = mpc->in_ops->add_entry(dst_ip, mpc);
 		entry->entry_state = INGRESS_RESOLVING;
 		msg->type = SND_MPOA_RES_RQST;
@@ -1062,7 +1105,7 @@ static void MPOA_trigger_rcvd(struct k_message *msg, struct mpoa_client *mpc)
 		return;
 	}
 
-	if(entry->entry_state == INGRESS_INVALID){
+	if (entry->entry_state == INGRESS_INVALID) {
 		entry->entry_state = INGRESS_RESOLVING;
 		msg->type = SND_MPOA_RES_RQST;
 		msg->content.in_info = entry->ctrl_info;
@@ -1072,7 +1115,7 @@ static void MPOA_trigger_rcvd(struct k_message *msg, struct mpoa_client *mpc)
 		return;
 	}
 
-	printk("mpoa: (%s) MPOA_trigger_rcvd: entry already in resolving state\n",
+	pr_info("(%s) entry already in resolving state\n",
 		(mpc->dev) ? mpc->dev->name : "<unknown>");
 	mpc->in_ops->put(entry);
 	return;
@@ -1082,22 +1125,24 @@ static void MPOA_trigger_rcvd(struct k_message *msg, struct mpoa_client *mpc)
  * Things get complicated because we have to check if there's an egress
  * shortcut with suitable traffic parameters we could use.
  */
-static void check_qos_and_open_shortcut(struct k_message *msg, struct mpoa_client *client, in_cache_entry *entry)
+static void check_qos_and_open_shortcut(struct k_message *msg,
+					struct mpoa_client *client,
+					in_cache_entry *entry)
 {
 	__be32 dst_ip = msg->content.in_info.in_dst_ip;
 	struct atm_mpoa_qos *qos = atm_mpoa_search_qos(dst_ip);
 	eg_cache_entry *eg_entry = client->eg_ops->get_by_src_ip(dst_ip, client);
 
-	if(eg_entry && eg_entry->shortcut){
-		if(eg_entry->shortcut->qos.txtp.traffic_class &
-		   msg->qos.txtp.traffic_class &
-		   (qos ? qos->qos.txtp.traffic_class : ATM_UBR | ATM_CBR)){
-			    if(eg_entry->shortcut->qos.txtp.traffic_class == ATM_UBR)
-				    entry->shortcut = eg_entry->shortcut;
-			    else if(eg_entry->shortcut->qos.txtp.max_pcr > 0)
-				    entry->shortcut = eg_entry->shortcut;
+	if (eg_entry && eg_entry->shortcut) {
+		if (eg_entry->shortcut->qos.txtp.traffic_class &
+		    msg->qos.txtp.traffic_class &
+		    (qos ? qos->qos.txtp.traffic_class : ATM_UBR | ATM_CBR)) {
+			if (eg_entry->shortcut->qos.txtp.traffic_class == ATM_UBR)
+				entry->shortcut = eg_entry->shortcut;
+			else if (eg_entry->shortcut->qos.txtp.max_pcr > 0)
+				entry->shortcut = eg_entry->shortcut;
 		}
-		if(entry->shortcut){
+		if (entry->shortcut) {
 			dprintk("mpoa: (%s) using egress SVC to reach %pI4\n",
 				client->dev->name, &dst_ip);
 			client->eg_ops->put(eg_entry);
@@ -1109,12 +1154,13 @@ static void check_qos_and_open_shortcut(struct k_message *msg, struct mpoa_clien
 
 	/* No luck in the egress cache we must open an ingress SVC */
 	msg->type = OPEN_INGRESS_SVC;
-	if (qos && (qos->qos.txtp.traffic_class == msg->qos.txtp.traffic_class))
-	{
+	if (qos &&
+	    (qos->qos.txtp.traffic_class == msg->qos.txtp.traffic_class)) {
 		msg->qos = qos->qos;
-		printk("mpoa: (%s) trying to get a CBR shortcut\n",client->dev->name);
-	}
-	else memset(&msg->qos,0,sizeof(struct atm_qos));
+		pr_info("(%s) trying to get a CBR shortcut\n",
+			client->dev->name);
+	} else
+		memset(&msg->qos, 0, sizeof(struct atm_qos));
 	msg_to_mpoad(msg, client);
 	return;
 }
@@ -1126,15 +1172,17 @@ static void MPOA_res_reply_rcvd(struct k_message *msg, struct mpoa_client *mpc)
 
 	dprintk("mpoa: (%s) MPOA_res_reply_rcvd: ip %pI4\n",
 		mpc->dev->name, &dst_ip);
-	ddprintk("mpoa: (%s) MPOA_res_reply_rcvd() entry = %p", mpc->dev->name, entry);
-	if(entry == NULL){
-		printk("\nmpoa: (%s) ARGH, received res. reply for an entry that doesn't exist.\n", mpc->dev->name);
+	ddprintk("mpoa: (%s) MPOA_res_reply_rcvd() entry = %p",
+		 mpc->dev->name, entry);
+	if (entry == NULL) {
+		pr_info("(%s) ARGH, received res. reply for an entry that doesn't exist.\n",
+			mpc->dev->name);
 		return;
 	}
 	ddprintk(" entry_state = %d ", entry->entry_state);
 
 	if (entry->entry_state == INGRESS_RESOLVED) {
-		printk("\nmpoa: (%s) MPOA_res_reply_rcvd for RESOLVED entry!\n", mpc->dev->name);
+		pr_info("(%s) RESOLVED entry!\n", mpc->dev->name);
 		mpc->in_ops->put(entry);
 		return;
 	}
@@ -1145,15 +1193,16 @@ static void MPOA_res_reply_rcvd(struct k_message *msg, struct mpoa_client *mpc)
 	entry->refresh_time = 0;
 	ddprintk("entry->shortcut = %p\n", entry->shortcut);
 
-	if(entry->entry_state == INGRESS_RESOLVING && entry->shortcut != NULL){
+	if (entry->entry_state == INGRESS_RESOLVING &&
+	    entry->shortcut != NULL) {
 		entry->entry_state = INGRESS_RESOLVED;
 		mpc->in_ops->put(entry);
 		return; /* Shortcut already open... */
 	}
 
 	if (entry->shortcut != NULL) {
-		printk("mpoa: (%s) MPOA_res_reply_rcvd: entry->shortcut != NULL, impossible!\n",
-		       mpc->dev->name);
+		pr_info("(%s) entry->shortcut != NULL, impossible!\n",
+			mpc->dev->name);
 		mpc->in_ops->put(entry);
 		return;
 	}
@@ -1172,9 +1221,9 @@ static void ingress_purge_rcvd(struct k_message *msg, struct mpoa_client *mpc)
 	__be32 mask = msg->ip_mask;
 	in_cache_entry *entry = mpc->in_ops->get_with_mask(dst_ip, mpc, mask);
 
-	if(entry == NULL){
-		printk("mpoa: (%s) ingress_purge_rcvd: purge for a non-existing entry, ip = %pI4\n",
-		       mpc->dev->name, &dst_ip);
+	if (entry == NULL) {
+		pr_info("(%s) purge for a non-existing entry, ip = %pI4\n",
+			mpc->dev->name, &dst_ip);
 		return;
 	}
 
@@ -1197,7 +1246,8 @@ static void egress_purge_rcvd(struct k_message *msg, struct mpoa_client *mpc)
 	eg_cache_entry *entry = mpc->eg_ops->get_by_cache_id(cache_id, mpc);
 
 	if (entry == NULL) {
-		dprintk("mpoa: (%s) egress_purge_rcvd: purge for a non-existing entry\n", mpc->dev->name);
+		dprintk("mpoa: (%s) egress_purge_rcvd: purge for a non-existing entry\n",
+			mpc->dev->name);
 		return;
 	}
 
@@ -1218,13 +1268,13 @@ static void purge_egress_shortcut(struct atm_vcc *vcc, eg_cache_entry *entry)
 
 	dprintk("mpoa: purge_egress_shortcut: entering\n");
 	if (vcc == NULL) {
-		printk("mpoa: purge_egress_shortcut: vcc == NULL\n");
+		pr_info("vcc == NULL\n");
 		return;
 	}
 
 	skb = alloc_skb(sizeof(struct k_message), GFP_ATOMIC);
 	if (skb == NULL) {
-		 printk("mpoa: purge_egress_shortcut: out of memory\n");
+		pr_info("out of memory\n");
 		return;
 	}
 
@@ -1249,14 +1299,14 @@ static void purge_egress_shortcut(struct atm_vcc *vcc, eg_cache_entry *entry)
  * Our MPS died. Tell our daemon to send NHRP data plane purge to each
  * of the egress shortcuts we have.
  */
-static void mps_death( struct k_message * msg, struct mpoa_client * mpc )
+static void mps_death(struct k_message *msg, struct mpoa_client *mpc)
 {
 	eg_cache_entry *entry;
 
 	dprintk("mpoa: (%s) mps_death:\n", mpc->dev->name);
 
-	if(memcmp(msg->MPS_ctrl, mpc->mps_ctrl_addr, ATM_ESA_LEN)){
-		printk("mpoa: (%s) mps_death: wrong MPS\n", mpc->dev->name);
+	if (memcmp(msg->MPS_ctrl, mpc->mps_ctrl_addr, ATM_ESA_LEN)) {
+		pr_info("(%s) wrong MPS\n", mpc->dev->name);
 		return;
 	}
 
@@ -1275,20 +1325,21 @@ static void mps_death( struct k_message * msg, struct mpoa_client * mpc )
 	return;
 }
 
-static void MPOA_cache_impos_rcvd( struct k_message * msg, struct mpoa_client * mpc)
+static void MPOA_cache_impos_rcvd(struct k_message *msg,
+				  struct mpoa_client *mpc)
 {
 	uint16_t holding_time;
 	eg_cache_entry *entry = mpc->eg_ops->get_by_cache_id(msg->content.eg_info.cache_id, mpc);
 
 	holding_time = msg->content.eg_info.holding_time;
 	dprintk("mpoa: (%s) MPOA_cache_impos_rcvd: entry = %p, holding_time = %u\n",
-	       mpc->dev->name, entry, holding_time);
-	if(entry == NULL && holding_time) {
+		mpc->dev->name, entry, holding_time);
+	if (entry == NULL && holding_time) {
 		entry = mpc->eg_ops->add_entry(msg, mpc);
 		mpc->eg_ops->put(entry);
 		return;
 	}
-	if(holding_time){
+	if (holding_time) {
 		mpc->eg_ops->update(entry, holding_time);
 		return;
 	}
@@ -1302,7 +1353,8 @@ static void MPOA_cache_impos_rcvd( struct k_message * msg, struct mpoa_client *
 	return;
 }
 
-static void set_mpc_ctrl_addr_rcvd(struct k_message *mesg, struct mpoa_client *mpc)
+static void set_mpc_ctrl_addr_rcvd(struct k_message *mesg,
+				   struct mpoa_client *mpc)
 {
 	struct lec_priv *priv;
 	int i, retval ;
@@ -1318,33 +1370,38 @@ static void set_mpc_ctrl_addr_rcvd(struct k_message *mesg, struct mpoa_client *m
 	memcpy(mpc->our_ctrl_addr, mesg->MPS_ctrl, ATM_ESA_LEN);
 
 	dprintk("mpoa: (%s) setting MPC ctrl ATM address to ",
-	       (mpc->dev) ? mpc->dev->name : "<unknown>");
+		(mpc->dev) ? mpc->dev->name : "<unknown>");
 	for (i = 7; i < sizeof(tlv); i++)
 		dprintk("%02x ", tlv[i]);
 	dprintk("\n");
 
 	if (mpc->dev) {
 		priv = netdev_priv(mpc->dev);
-		retval = priv->lane2_ops->associate_req(mpc->dev, mpc->dev->dev_addr, tlv, sizeof(tlv));
+		retval = priv->lane2_ops->associate_req(mpc->dev,
+							mpc->dev->dev_addr,
+							tlv, sizeof(tlv));
 		if (retval == 0)
-			printk("mpoa: (%s) MPOA device type TLV association failed\n", mpc->dev->name);
+			pr_info("(%s) MPOA device type TLV association failed\n",
+				mpc->dev->name);
 		retval = priv->lane2_ops->resolve(mpc->dev, NULL, 1, NULL, NULL);
 		if (retval < 0)
-			printk("mpoa: (%s) targetless LE_ARP request failed\n", mpc->dev->name);
+			pr_info("(%s) targetless LE_ARP request failed\n",
+				mpc->dev->name);
 	}
 
 	return;
 }
 
-static void set_mps_mac_addr_rcvd(struct k_message *msg, struct mpoa_client *client)
+static void set_mps_mac_addr_rcvd(struct k_message *msg,
+				  struct mpoa_client *client)
 {
 
-	if(client->number_of_mps_macs)
+	if (client->number_of_mps_macs)
 		kfree(client->mps_macs);
 	client->number_of_mps_macs = 0;
 	client->mps_macs = kmemdup(msg->MPS_ctrl, ETH_ALEN, GFP_KERNEL);
 	if (client->mps_macs == NULL) {
-		printk("mpoa: set_mps_mac_addr_rcvd: out of memory\n");
+		pr_info("out of memory\n");
 		return;
 	}
 	client->number_of_mps_macs = 1;
@@ -1365,11 +1422,11 @@ static void clean_up(struct k_message *msg, struct mpoa_client *mpc, int action)
 	/* FIXME: This knows too much of the cache structure */
 	read_lock_irq(&mpc->egress_lock);
 	entry = mpc->eg_cache;
-	while (entry != NULL){
-		    msg->content.eg_info = entry->ctrl_info;
-		    dprintk("mpoa: cache_id %u\n", entry->ctrl_info.cache_id);
-		    msg_to_mpoad(msg, mpc);
-		    entry = entry->next;
+	while (entry != NULL) {
+		msg->content.eg_info = entry->ctrl_info;
+		dprintk("mpoa: cache_id %u\n", entry->ctrl_info.cache_id);
+		msg_to_mpoad(msg, mpc);
+		entry = entry->next;
 	}
 	read_unlock_irq(&mpc->egress_lock);
 
@@ -1388,20 +1445,22 @@ static void mpc_timer_refresh(void)
 	return;
 }
 
-static void mpc_cache_check( unsigned long checking_time  )
+static void mpc_cache_check(unsigned long checking_time)
 {
 	struct mpoa_client *mpc = mpcs;
 	static unsigned long previous_resolving_check_time;
 	static unsigned long previous_refresh_time;
 
-	while( mpc != NULL ){
+	while (mpc != NULL) {
 		mpc->in_ops->clear_count(mpc);
 		mpc->eg_ops->clear_expired(mpc);
-		if(checking_time - previous_resolving_check_time > mpc->parameters.mpc_p4 * HZ ){
+		if (checking_time - previous_resolving_check_time >
+		    mpc->parameters.mpc_p4 * HZ) {
 			mpc->in_ops->check_resolving(mpc);
 			previous_resolving_check_time = checking_time;
 		}
-		if(checking_time - previous_refresh_time > mpc->parameters.mpc_p5 * HZ ){
+		if (checking_time - previous_refresh_time >
+		    mpc->parameters.mpc_p5 * HZ) {
 			mpc->in_ops->refresh(mpc);
 			previous_refresh_time = checking_time;
 		}
@@ -1412,7 +1471,8 @@ static void mpc_cache_check( unsigned long checking_time  )
 	return;
 }
 
-static int atm_mpoa_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
+static int atm_mpoa_ioctl(struct socket *sock, unsigned int cmd,
+			  unsigned long arg)
 {
 	int err = 0;
 	struct atm_vcc *vcc = ATM_SD(sock);
@@ -1424,21 +1484,20 @@ static int atm_mpoa_ioctl(struct socket *sock, unsigned int cmd, unsigned long a
 		return -EPERM;
 
 	switch (cmd) {
-		case ATMMPC_CTRL:
-			err = atm_mpoa_mpoad_attach(vcc, (int)arg);
-			if (err >= 0)
-				sock->state = SS_CONNECTED;
-			break;
-		case ATMMPC_DATA:
-			err = atm_mpoa_vcc_attach(vcc, (void __user *)arg);
-			break;
-		default:
-			break;
+	case ATMMPC_CTRL:
+		err = atm_mpoa_mpoad_attach(vcc, (int)arg);
+		if (err >= 0)
+			sock->state = SS_CONNECTED;
+		break;
+	case ATMMPC_DATA:
+		err = atm_mpoa_vcc_attach(vcc, (void __user *)arg);
+		break;
+	default:
+		break;
 	}
 	return err;
 }
 
-
 static struct atm_ioctl atm_ioctl_ops = {
 	.owner	= THIS_MODULE,
 	.ioctl	= atm_mpoa_ioctl,
@@ -1451,7 +1510,7 @@ static __init int atm_mpoa_init(void)
 	if (mpc_proc_init() != 0)
 		pr_info("failed to initialize /proc/mpoa\n");
 
-	printk("mpc.c: " __DATE__ " " __TIME__ " initialized\n");
+	pr_info("mpc.c: " __DATE__ " " __TIME__ " initialized\n");
 
 	return 0;
 }
-- 
cgit v1.2.2


From bee67d34b2dd495feafb1a37b5ea2d6179b74178 Mon Sep 17 00:00:00 2001
From: Joe Perches <joe@perches.com>
Date: Tue, 26 Jan 2010 11:40:10 +0000
Subject: net/atm/mpoa_caches.c: checkpatch cleanups

Convert #include <asm... to #include <linux...
Add printk argument verification to dprintk and ddprintk
Spacing cleanups
Mostly 80 column wrapped.

Signed-off-by: Joe Perches <joe@perches.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/atm/mpoa_caches.c | 160 +++++++++++++++++++++++++++-----------------------
 1 file changed, 88 insertions(+), 72 deletions(-)

(limited to 'net')

diff --git a/net/atm/mpoa_caches.c b/net/atm/mpoa_caches.c
index 4504a4b339bb..de21cc66feb4 100644
--- a/net/atm/mpoa_caches.c
+++ b/net/atm/mpoa_caches.c
@@ -11,15 +11,17 @@
  */
 
 #if 0
-#define dprintk printk    /* debug */
+#define dprintk(format, args...) printk(KERN_DEBUG format, ##args)  /* debug */
 #else
-#define dprintk(format,args...)
+#define dprintk(format, args...)			\
+	do { if (0) printk(KERN_DEBUG format, ##args); } while (0)
 #endif
 
 #if 0
-#define ddprintk printk  /* more debug */
+#define ddprintk printk(KERN_DEBUG format, ##args)  /* more debug */
 #else
-#define ddprintk(format,args...)
+#define ddprintk(format, args...)			\
+	do { if (0) printk(KERN_DEBUG format, ##args); } while (0)
 #endif
 
 static in_cache_entry *in_cache_get(__be32 dst_ip,
@@ -29,8 +31,8 @@ static in_cache_entry *in_cache_get(__be32 dst_ip,
 
 	read_lock_bh(&client->ingress_lock);
 	entry = client->in_cache;
-	while(entry != NULL){
-		if( entry->ctrl_info.in_dst_ip == dst_ip ){
+	while (entry != NULL) {
+		if (entry->ctrl_info.in_dst_ip == dst_ip) {
 			atomic_inc(&entry->use);
 			read_unlock_bh(&client->ingress_lock);
 			return entry;
@@ -50,8 +52,8 @@ static in_cache_entry *in_cache_get_with_mask(__be32 dst_ip,
 
 	read_lock_bh(&client->ingress_lock);
 	entry = client->in_cache;
-	while(entry != NULL){
-		if((entry->ctrl_info.in_dst_ip & mask)  == (dst_ip & mask )){
+	while (entry != NULL) {
+		if ((entry->ctrl_info.in_dst_ip & mask) == (dst_ip & mask)) {
 			atomic_inc(&entry->use);
 			read_unlock_bh(&client->ingress_lock);
 			return entry;
@@ -65,14 +67,14 @@ static in_cache_entry *in_cache_get_with_mask(__be32 dst_ip,
 }
 
 static in_cache_entry *in_cache_get_by_vcc(struct atm_vcc *vcc,
-					   struct mpoa_client *client )
+					   struct mpoa_client *client)
 {
 	in_cache_entry *entry;
 
 	read_lock_bh(&client->ingress_lock);
 	entry = client->in_cache;
-	while(entry != NULL){
-		if(entry->shortcut == vcc) {
+	while (entry != NULL) {
+		if (entry->shortcut == vcc) {
 			atomic_inc(&entry->use);
 			read_unlock_bh(&client->ingress_lock);
 			return entry;
@@ -90,11 +92,12 @@ static in_cache_entry *in_cache_add_entry(__be32 dst_ip,
 	in_cache_entry *entry = kzalloc(sizeof(in_cache_entry), GFP_KERNEL);
 
 	if (entry == NULL) {
-		printk("mpoa: mpoa_caches.c: new_in_cache_entry: out of memory\n");
+		pr_info("mpoa: mpoa_caches.c: new_in_cache_entry: out of memory\n");
 		return NULL;
 	}
 
-	dprintk("mpoa: mpoa_caches.c: adding an ingress entry, ip = %pI4\n", &dst_ip);
+	dprintk("mpoa: mpoa_caches.c: adding an ingress entry, ip = %pI4\n",
+		&dst_ip);
 
 	atomic_set(&entry->use, 1);
 	dprintk("mpoa: mpoa_caches.c: new_in_cache_entry: about to lock\n");
@@ -126,39 +129,41 @@ static int cache_hit(in_cache_entry *entry, struct mpoa_client *mpc)
 	struct k_message msg;
 
 	entry->count++;
-	if(entry->entry_state == INGRESS_RESOLVED && entry->shortcut != NULL)
+	if (entry->entry_state == INGRESS_RESOLVED && entry->shortcut != NULL)
 		return OPEN;
 
-	if(entry->entry_state == INGRESS_REFRESHING){
-		if(entry->count > mpc->parameters.mpc_p1){
+	if (entry->entry_state == INGRESS_REFRESHING) {
+		if (entry->count > mpc->parameters.mpc_p1) {
 			msg.type = SND_MPOA_RES_RQST;
 			msg.content.in_info = entry->ctrl_info;
 			memcpy(msg.MPS_ctrl, mpc->mps_ctrl_addr, ATM_ESA_LEN);
 			qos = atm_mpoa_search_qos(entry->ctrl_info.in_dst_ip);
-			if (qos != NULL) msg.qos = qos->qos;
+			if (qos != NULL)
+				msg.qos = qos->qos;
 			msg_to_mpoad(&msg, mpc);
 			do_gettimeofday(&(entry->reply_wait));
 			entry->entry_state = INGRESS_RESOLVING;
 		}
-		if(entry->shortcut != NULL)
+		if (entry->shortcut != NULL)
 			return OPEN;
 		return CLOSED;
 	}
 
-	if(entry->entry_state == INGRESS_RESOLVING && entry->shortcut != NULL)
+	if (entry->entry_state == INGRESS_RESOLVING && entry->shortcut != NULL)
 		return OPEN;
 
-	if( entry->count > mpc->parameters.mpc_p1 &&
-	    entry->entry_state == INGRESS_INVALID){
+	if (entry->count > mpc->parameters.mpc_p1 &&
+	    entry->entry_state == INGRESS_INVALID) {
 		dprintk("mpoa: (%s) mpoa_caches.c: threshold exceeded for ip %pI4, sending MPOA res req\n",
 			mpc->dev->name, &entry->ctrl_info.in_dst_ip);
 		entry->entry_state = INGRESS_RESOLVING;
-		msg.type =  SND_MPOA_RES_RQST;
-		memcpy(msg.MPS_ctrl, mpc->mps_ctrl_addr, ATM_ESA_LEN );
+		msg.type = SND_MPOA_RES_RQST;
+		memcpy(msg.MPS_ctrl, mpc->mps_ctrl_addr, ATM_ESA_LEN);
 		msg.content.in_info = entry->ctrl_info;
 		qos = atm_mpoa_search_qos(entry->ctrl_info.in_dst_ip);
-		if (qos != NULL) msg.qos = qos->qos;
-		msg_to_mpoad( &msg, mpc);
+		if (qos != NULL)
+			msg.qos = qos->qos;
+		msg_to_mpoad(&msg, mpc);
 		do_gettimeofday(&(entry->reply_wait));
 	}
 
@@ -195,14 +200,15 @@ static void in_cache_remove_entry(in_cache_entry *entry,
 	if (entry->next != NULL)
 		entry->next->prev = entry->prev;
 	client->in_ops->put(entry);
-	if(client->in_cache == NULL && client->eg_cache == NULL){
+	if (client->in_cache == NULL && client->eg_cache == NULL) {
 		msg.type = STOP_KEEP_ALIVE_SM;
-		msg_to_mpoad(&msg,client);
+		msg_to_mpoad(&msg, client);
 	}
 
 	/* Check if the egress side still uses this VCC */
 	if (vcc != NULL) {
-		eg_cache_entry *eg_entry = client->eg_ops->get_by_vcc(vcc, client);
+		eg_cache_entry *eg_entry = client->eg_ops->get_by_vcc(vcc,
+								      client);
 		if (eg_entry != NULL) {
 			client->eg_ops->put(eg_entry);
 			return;
@@ -213,7 +219,6 @@ static void in_cache_remove_entry(in_cache_entry *entry,
 	return;
 }
 
-
 /* Call this every MPC-p2 seconds... Not exactly correct solution,
    but an easy one... */
 static void clear_count_and_expired(struct mpoa_client *client)
@@ -225,11 +230,11 @@ static void clear_count_and_expired(struct mpoa_client *client)
 
 	write_lock_bh(&client->ingress_lock);
 	entry = client->in_cache;
-	while(entry != NULL){
-		entry->count=0;
+	while (entry != NULL) {
+		entry->count = 0;
 		next_entry = entry->next;
-		if((now.tv_sec - entry->tv.tv_sec)
-		   > entry->ctrl_info.holding_time){
+		if ((now.tv_sec - entry->tv.tv_sec)
+		   > entry->ctrl_info.holding_time) {
 			dprintk("mpoa: mpoa_caches.c: holding time expired, ip = %pI4\n",
 				&entry->ctrl_info.in_dst_ip);
 			client->in_ops->remove_entry(entry, client);
@@ -250,33 +255,38 @@ static void check_resolving_entries(struct mpoa_client *client)
 	struct timeval now;
 	struct k_message msg;
 
-	do_gettimeofday( &now );
+	do_gettimeofday(&now);
 
 	read_lock_bh(&client->ingress_lock);
 	entry = client->in_cache;
-	while( entry != NULL ){
-		if(entry->entry_state == INGRESS_RESOLVING){
-			if(now.tv_sec - entry->hold_down.tv_sec < client->parameters.mpc_p6){
-				entry = entry->next;                      /* Entry in hold down */
+	while (entry != NULL) {
+		if (entry->entry_state == INGRESS_RESOLVING) {
+			if ((now.tv_sec - entry->hold_down.tv_sec) <
+			    client->parameters.mpc_p6) {
+				entry = entry->next;	/* Entry in hold down */
 				continue;
 			}
-			if( (now.tv_sec - entry->reply_wait.tv_sec) >
-			    entry->retry_time ){
-				entry->retry_time = MPC_C1*( entry->retry_time );
-				if(entry->retry_time > client->parameters.mpc_p5){
-					/* Retry time maximum exceeded, put entry in hold down. */
+			if ((now.tv_sec - entry->reply_wait.tv_sec) >
+			    entry->retry_time) {
+				entry->retry_time = MPC_C1 * (entry->retry_time);
+				/*
+				 * Retry time maximum exceeded,
+				 * put entry in hold down.
+				 */
+				if (entry->retry_time > client->parameters.mpc_p5) {
 					do_gettimeofday(&(entry->hold_down));
 					entry->retry_time = client->parameters.mpc_p4;
 					entry = entry->next;
 					continue;
 				}
 				/* Ask daemon to send a resolution request. */
-				memset(&(entry->hold_down),0,sizeof(struct timeval));
+				memset(&(entry->hold_down), 0, sizeof(struct timeval));
 				msg.type = SND_MPOA_RES_RTRY;
 				memcpy(msg.MPS_ctrl, client->mps_ctrl_addr, ATM_ESA_LEN);
 				msg.content.in_info = entry->ctrl_info;
 				qos = atm_mpoa_search_qos(entry->ctrl_info.in_dst_ip);
-				if (qos != NULL) msg.qos = qos->qos;
+				if (qos != NULL)
+					msg.qos = qos->qos;
 				msg_to_mpoad(&msg, client);
 				do_gettimeofday(&(entry->reply_wait));
 			}
@@ -296,11 +306,12 @@ static void refresh_entries(struct mpoa_client *client)
 	do_gettimeofday(&now);
 
 	read_lock_bh(&client->ingress_lock);
-	while( entry != NULL ){
-		if( entry->entry_state == INGRESS_RESOLVED ){
-			if(!(entry->refresh_time))
-				entry->refresh_time = (2*(entry->ctrl_info.holding_time))/3;
-			if( (now.tv_sec - entry->reply_wait.tv_sec) > entry->refresh_time ){
+	while (entry != NULL) {
+		if (entry->entry_state == INGRESS_RESOLVED) {
+			if (!(entry->refresh_time))
+				entry->refresh_time = (2 * (entry->ctrl_info.holding_time))/3;
+			if ((now.tv_sec - entry->reply_wait.tv_sec) >
+			    entry->refresh_time) {
 				dprintk("mpoa: mpoa_caches.c: refreshing an entry.\n");
 				entry->entry_state = INGRESS_REFRESHING;
 
@@ -314,21 +325,22 @@ static void refresh_entries(struct mpoa_client *client)
 static void in_destroy_cache(struct mpoa_client *mpc)
 {
 	write_lock_irq(&mpc->ingress_lock);
-	while(mpc->in_cache != NULL)
+	while (mpc->in_cache != NULL)
 		mpc->in_ops->remove_entry(mpc->in_cache, mpc);
 	write_unlock_irq(&mpc->ingress_lock);
 
 	return;
 }
 
-static eg_cache_entry *eg_cache_get_by_cache_id(__be32 cache_id, struct mpoa_client *mpc)
+static eg_cache_entry *eg_cache_get_by_cache_id(__be32 cache_id,
+						struct mpoa_client *mpc)
 {
 	eg_cache_entry *entry;
 
 	read_lock_irq(&mpc->egress_lock);
 	entry = mpc->eg_cache;
-	while(entry != NULL){
-		if(entry->ctrl_info.cache_id == cache_id){
+	while (entry != NULL) {
+		if (entry->ctrl_info.cache_id == cache_id) {
 			atomic_inc(&entry->use);
 			read_unlock_irq(&mpc->egress_lock);
 			return entry;
@@ -348,7 +360,7 @@ static eg_cache_entry *eg_cache_get_by_tag(__be32 tag, struct mpoa_client *mpc)
 
 	read_lock_irqsave(&mpc->egress_lock, flags);
 	entry = mpc->eg_cache;
-	while (entry != NULL){
+	while (entry != NULL) {
 		if (entry->ctrl_info.tag == tag) {
 			atomic_inc(&entry->use);
 			read_unlock_irqrestore(&mpc->egress_lock, flags);
@@ -362,14 +374,15 @@ static eg_cache_entry *eg_cache_get_by_tag(__be32 tag, struct mpoa_client *mpc)
 }
 
 /* This can be called from any context since it saves CPU flags */
-static eg_cache_entry *eg_cache_get_by_vcc(struct atm_vcc *vcc, struct mpoa_client *mpc)
+static eg_cache_entry *eg_cache_get_by_vcc(struct atm_vcc *vcc,
+					   struct mpoa_client *mpc)
 {
 	unsigned long flags;
 	eg_cache_entry *entry;
 
 	read_lock_irqsave(&mpc->egress_lock, flags);
 	entry = mpc->eg_cache;
-	while (entry != NULL){
+	while (entry != NULL) {
 		if (entry->shortcut == vcc) {
 			atomic_inc(&entry->use);
 			read_unlock_irqrestore(&mpc->egress_lock, flags);
@@ -382,14 +395,15 @@ static eg_cache_entry *eg_cache_get_by_vcc(struct atm_vcc *vcc, struct mpoa_clie
 	return NULL;
 }
 
-static eg_cache_entry *eg_cache_get_by_src_ip(__be32 ipaddr, struct mpoa_client *mpc)
+static eg_cache_entry *eg_cache_get_by_src_ip(__be32 ipaddr,
+					      struct mpoa_client *mpc)
 {
 	eg_cache_entry *entry;
 
 	read_lock_irq(&mpc->egress_lock);
 	entry = mpc->eg_cache;
-	while(entry != NULL){
-		if(entry->latest_ip_addr == ipaddr) {
+	while (entry != NULL) {
+		if (entry->latest_ip_addr == ipaddr) {
 			atomic_inc(&entry->use);
 			read_unlock_irq(&mpc->egress_lock);
 			return entry;
@@ -429,9 +443,9 @@ static void eg_cache_remove_entry(eg_cache_entry *entry,
 	if (entry->next != NULL)
 		entry->next->prev = entry->prev;
 	client->eg_ops->put(entry);
-	if(client->in_cache == NULL && client->eg_cache == NULL){
+	if (client->in_cache == NULL && client->eg_cache == NULL) {
 		msg.type = STOP_KEEP_ALIVE_SM;
-		msg_to_mpoad(&msg,client);
+		msg_to_mpoad(&msg, client);
 	}
 
 	/* Check if the ingress side still uses this VCC */
@@ -447,12 +461,13 @@ static void eg_cache_remove_entry(eg_cache_entry *entry,
 	return;
 }
 
-static eg_cache_entry *eg_cache_add_entry(struct k_message *msg, struct mpoa_client *client)
+static eg_cache_entry *eg_cache_add_entry(struct k_message *msg,
+					  struct mpoa_client *client)
 {
 	eg_cache_entry *entry = kzalloc(sizeof(eg_cache_entry), GFP_KERNEL);
 
 	if (entry == NULL) {
-		printk("mpoa: mpoa_caches.c: new_eg_cache_entry: out of memory\n");
+		pr_info("out of memory\n");
 		return NULL;
 	}
 
@@ -472,7 +487,8 @@ static eg_cache_entry *eg_cache_add_entry(struct k_message *msg, struct mpoa_cli
 	entry->ctrl_info = msg->content.eg_info;
 	do_gettimeofday(&(entry->tv));
 	entry->entry_state = EGRESS_RESOLVED;
-	dprintk("mpoa: mpoa_caches.c: new_eg_cache_entry cache_id %lu\n", ntohl(entry->ctrl_info.cache_id));
+	dprintk("mpoa: mpoa_caches.c: new_eg_cache_entry cache_id %u\n",
+		ntohl(entry->ctrl_info.cache_id));
 	dprintk("mpoa: mpoa_caches.c: mps_ip = %pI4\n",
 		&entry->ctrl_info.mps_ip);
 	atomic_inc(&entry->use);
@@ -483,7 +499,7 @@ static eg_cache_entry *eg_cache_add_entry(struct k_message *msg, struct mpoa_cli
 	return entry;
 }
 
-static void update_eg_cache_entry(eg_cache_entry * entry, uint16_t holding_time)
+static void update_eg_cache_entry(eg_cache_entry *entry, uint16_t holding_time)
 {
 	do_gettimeofday(&(entry->tv));
 	entry->entry_state = EGRESS_RESOLVED;
@@ -502,13 +518,14 @@ static void clear_expired(struct mpoa_client *client)
 
 	write_lock_irq(&client->egress_lock);
 	entry = client->eg_cache;
-	while(entry != NULL){
+	while (entry != NULL) {
 		next_entry = entry->next;
-		if((now.tv_sec - entry->tv.tv_sec)
-		   > entry->ctrl_info.holding_time){
+		if ((now.tv_sec - entry->tv.tv_sec)
+		   > entry->ctrl_info.holding_time) {
 			msg.type = SND_EGRESS_PURGE;
 			msg.content.eg_info = entry->ctrl_info;
-			dprintk("mpoa: mpoa_caches.c: egress_cache: holding time expired, cache_id = %lu.\n",ntohl(entry->ctrl_info.cache_id));
+			dprintk("mpoa: mpoa_caches.c: egress_cache: holding time expired, cache_id = %u.\n",
+				ntohl(entry->ctrl_info.cache_id));
 			msg_to_mpoad(&msg, client);
 			client->eg_ops->remove_entry(entry, client);
 		}
@@ -522,7 +539,7 @@ static void clear_expired(struct mpoa_client *client)
 static void eg_destroy_cache(struct mpoa_client *mpc)
 {
 	write_lock_irq(&mpc->egress_lock);
-	while(mpc->eg_cache != NULL)
+	while (mpc->eg_cache != NULL)
 		mpc->eg_ops->remove_entry(mpc->eg_cache, mpc);
 	write_unlock_irq(&mpc->egress_lock);
 
@@ -530,7 +547,6 @@ static void eg_destroy_cache(struct mpoa_client *mpc)
 }
 
 
-
 static struct in_cache_ops ingress_ops = {
 	in_cache_add_entry,               /* add_entry       */
 	in_cache_get,                     /* get             */
-- 
cgit v1.2.2


From f1e100491e0e696b19f37f2b0cf728a5f2bc56e7 Mon Sep 17 00:00:00 2001
From: Joe Perches <joe@perches.com>
Date: Tue, 26 Jan 2010 11:40:11 +0000
Subject: net/atm/mpoa_proc.c: checkpatch cleanups

Convert #include <asm... to #include <linux...
Add printk argument verification to dprintk and ddprintk
Spacing cleanups
Mostly 80 column wrapped.
Removed unnecessary breaks after returns
Use %pI4 in seq_printf of IP address

Signed-off-by: Joe Perches <joe@perches.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/atm/mpoa_proc.c | 64 ++++++++++++++++++++++++++---------------------------
 1 file changed, 31 insertions(+), 33 deletions(-)

(limited to 'net')

diff --git a/net/atm/mpoa_proc.c b/net/atm/mpoa_proc.c
index 0603ab478cf5..aae4a87c22ac 100644
--- a/net/atm/mpoa_proc.c
+++ b/net/atm/mpoa_proc.c
@@ -9,7 +9,7 @@
 #include <linux/proc_fs.h>
 #include <linux/time.h>
 #include <linux/seq_file.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <linux/atmmpc.h>
 #include <linux/atm.h>
 #include "mpc.h"
@@ -21,9 +21,10 @@
  */
 
 #if 1
-#define dprintk printk   /* debug */
+#define dprintk(format, args...) printk(KERN_DEBUG format, ##args)   /* debug */
 #else
-#define dprintk(format,args...)
+#define dprintk(format, args...)			\
+	do { if (0) printk(KERN_DEBUG format, ##args); } while (0)
 #endif
 
 #define STAT_FILE_NAME "mpc"     /* Our statistic file's name */
@@ -52,42 +53,37 @@ static const struct file_operations mpc_file_operations = {
 /*
  * Returns the state of an ingress cache entry as a string
  */
-static const char *ingress_state_string(int state){
-	switch(state) {
+static const char *ingress_state_string(int state)
+{
+	switch (state) {
 	case INGRESS_RESOLVING:
 		return "resolving  ";
-		break;
 	case INGRESS_RESOLVED:
 		return "resolved   ";
-		break;
 	case INGRESS_INVALID:
 		return "invalid    ";
-		break;
 	case INGRESS_REFRESHING:
 		return "refreshing ";
-		break;
-	default:
-	       return "";
 	}
+
+	return "";
 }
 
 /*
  * Returns the state of an egress cache entry as a string
  */
-static const char *egress_state_string(int state){
-	switch(state) {
+static const char *egress_state_string(int state)
+{
+	switch (state) {
 	case EGRESS_RESOLVED:
 		return "resolved   ";
-		break;
 	case EGRESS_PURGE:
 		return "purge      ";
-		break;
 	case EGRESS_INVALID:
 		return "invalid    ";
-		break;
-	default:
-	       return "";
 	}
+
+	return "";
 }
 
 /*
@@ -124,7 +120,6 @@ static void mpc_stop(struct seq_file *m, void *v)
 static int mpc_show(struct seq_file *m, void *v)
 {
 	struct mpoa_client *mpc = v;
-	unsigned char *temp;
 	int i;
 	in_cache_entry *in_entry;
 	eg_cache_entry *eg_entry;
@@ -141,15 +136,17 @@ static int mpc_show(struct seq_file *m, void *v)
 	do_gettimeofday(&now);
 
 	for (in_entry = mpc->in_cache; in_entry; in_entry = in_entry->next) {
-		temp = (unsigned char *)&in_entry->ctrl_info.in_dst_ip;
-		sprintf(ip_string,"%d.%d.%d.%d", temp[0], temp[1], temp[2], temp[3]);
+		sprintf(ip_string, "%pI4", &in_entry->ctrl_info.in_dst_ip);
 		seq_printf(m, "%-16s%s%-14lu%-12u",
-			      ip_string,
-			      ingress_state_string(in_entry->entry_state),
-			      in_entry->ctrl_info.holding_time-(now.tv_sec-in_entry->tv.tv_sec),
-			      in_entry->packets_fwded);
+			   ip_string,
+			   ingress_state_string(in_entry->entry_state),
+			   in_entry->ctrl_info.holding_time -
+			   (now.tv_sec-in_entry->tv.tv_sec),
+			   in_entry->packets_fwded);
 		if (in_entry->shortcut)
-			seq_printf(m, "   %-3d  %-3d",in_entry->shortcut->vpi,in_entry->shortcut->vci);
+			seq_printf(m, "   %-3d  %-3d",
+				   in_entry->shortcut->vpi,
+				   in_entry->shortcut->vci);
 		seq_printf(m, "\n");
 	}
 
@@ -157,21 +154,23 @@ static int mpc_show(struct seq_file *m, void *v)
 	seq_printf(m, "Egress Entries:\nIngress MPC ATM addr\nCache-id        State      Holding time  Packets recvd  Latest IP addr   VPI VCI\n");
 	for (eg_entry = mpc->eg_cache; eg_entry; eg_entry = eg_entry->next) {
 		unsigned char *p = eg_entry->ctrl_info.in_MPC_data_ATM_addr;
-		for(i = 0; i < ATM_ESA_LEN; i++)
+		for (i = 0; i < ATM_ESA_LEN; i++)
 			seq_printf(m, "%02x", p[i]);
 		seq_printf(m, "\n%-16lu%s%-14lu%-15u",
 			   (unsigned long)ntohl(eg_entry->ctrl_info.cache_id),
 			   egress_state_string(eg_entry->entry_state),
-			   (eg_entry->ctrl_info.holding_time-(now.tv_sec-eg_entry->tv.tv_sec)),
+			   (eg_entry->ctrl_info.holding_time -
+			    (now.tv_sec-eg_entry->tv.tv_sec)),
 			   eg_entry->packets_rcvd);
 
 		/* latest IP address */
-		temp = (unsigned char *)&eg_entry->latest_ip_addr;
-		sprintf(ip_string, "%d.%d.%d.%d", temp[0], temp[1], temp[2], temp[3]);
+		sprintf(ip_string, "%pI4", &eg_entry->latest_ip_addr);
 		seq_printf(m, "%-16s", ip_string);
 
 		if (eg_entry->shortcut)
-			seq_printf(m, " %-3d %-3d",eg_entry->shortcut->vpi,eg_entry->shortcut->vci);
+			seq_printf(m, " %-3d %-3d",
+				   eg_entry->shortcut->vpi,
+				   eg_entry->shortcut->vci);
 		seq_printf(m, "\n");
 	}
 	seq_printf(m, "\n");
@@ -290,10 +289,9 @@ int mpc_proc_init(void)
  */
 void mpc_proc_clean(void)
 {
-	remove_proc_entry(STAT_FILE_NAME,atm_proc_root);
+	remove_proc_entry(STAT_FILE_NAME, atm_proc_root);
 }
 
-
 #endif /* CONFIG_PROC_FS */
 
 
-- 
cgit v1.2.2


From d81219db6add0a176c37d6fe4e1c050778de9d2f Mon Sep 17 00:00:00 2001
From: Joe Perches <joe@perches.com>
Date: Tue, 26 Jan 2010 11:40:12 +0000
Subject: net/atm/pppoatm.c: checkpatch cleanups

Move embedded assigns out of tests
Move trailing statements to new lines
Move labels to column 1

Signed-off-by: Joe Perches <joe@perches.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/atm/pppoatm.c | 11 +++++++----
 1 file changed, 7 insertions(+), 4 deletions(-)

(limited to 'net')

diff --git a/net/atm/pppoatm.c b/net/atm/pppoatm.c
index 62db6d71dbab..400839273c67 100644
--- a/net/atm/pppoatm.c
+++ b/net/atm/pppoatm.c
@@ -176,7 +176,8 @@ static void pppoatm_push(struct atm_vcc *atmvcc, struct sk_buff *skb)
 	}
 	ppp_input(&pvcc->chan, skb);
 	return;
-    error:
+
+error:
 	kfree_skb(skb);
 	ppp_input_error(&pvcc->chan, 0);
 }
@@ -209,7 +210,8 @@ static int pppoatm_send(struct ppp_channel *chan, struct sk_buff *skb)
 				goto nospace;
 			}
 			kfree_skb(skb);
-			if ((skb = n) == NULL)
+			skb = n;
+			if (skb == NULL)
 				return DROP_PACKET;
 		} else if (!atm_may_send(pvcc->atmvcc, skb->truesize))
 			goto nospace;
@@ -231,7 +233,7 @@ static int pppoatm_send(struct ppp_channel *chan, struct sk_buff *skb)
 		 skb, ATM_SKB(skb)->vcc, ATM_SKB(skb)->vcc->dev);
 	return ATM_SKB(skb)->vcc->send(ATM_SKB(skb)->vcc, skb)
 	    ? DROP_PACKET : 1;
-    nospace:
+nospace:
 	/*
 	 * We don't have space to send this SKB now, but we might have
 	 * already applied SC_COMP_PROT compression, so may need to undo
@@ -290,7 +292,8 @@ static int pppoatm_assign_vcc(struct atm_vcc *atmvcc, void __user *arg)
 	    (be.encaps == e_vc ? 0 : LLC_LEN);
 	pvcc->wakeup_tasklet = tasklet_proto;
 	pvcc->wakeup_tasklet.data = (unsigned long) &pvcc->chan;
-	if ((err = ppp_register_channel(&pvcc->chan)) != 0) {
+	err = ppp_register_channel(&pvcc->chan);
+	if (err != 0) {
 		kfree(pvcc);
 		return err;
 	}
-- 
cgit v1.2.2


From 07367adbe5f18b6b7a7476094181ff99cd90cb7e Mon Sep 17 00:00:00 2001
From: Joe Perches <joe@perches.com>
Date: Tue, 26 Jan 2010 11:40:13 +0000
Subject: net/atm/proc.c: checkpatch cleanups

Convert #include <asm... to #include <linux...
Mostly 80 column wrapped.
Spacing cleanups
Move trailing statements to new lines
switch/case cleanups

Signed-off-by: Joe Perches <joe@perches.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/atm/proc.c | 81 +++++++++++++++++++++++++++++++---------------------------
 1 file changed, 44 insertions(+), 37 deletions(-)

(limited to 'net')

diff --git a/net/atm/proc.c b/net/atm/proc.c
index ab8419a324b6..476779d845eb 100644
--- a/net/atm/proc.c
+++ b/net/atm/proc.c
@@ -24,15 +24,15 @@
 #include <linux/init.h> /* for __init */
 #include <net/net_namespace.h>
 #include <net/atmclip.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
+#include <linux/param.h> /* for HZ */
 #include <asm/atomic.h>
-#include <asm/param.h> /* for HZ */
 #include "resources.h"
 #include "common.h" /* atm_proc_init prototype */
 #include "signaling.h" /* to get sigd - ugly too */
 
-static ssize_t proc_dev_atm_read(struct file *file,char __user *buf,size_t count,
-    loff_t *pos);
+static ssize_t proc_dev_atm_read(struct file *file, char __user *buf,
+				 size_t count, loff_t *pos);
 
 static const struct file_operations proc_atm_dev_ops = {
 	.owner =	THIS_MODULE,
@@ -43,9 +43,9 @@ static void add_stats(struct seq_file *seq, const char *aal,
   const struct k_atm_aal_stats *stats)
 {
 	seq_printf(seq, "%s ( %d %d %d %d %d )", aal,
-	    atomic_read(&stats->tx),atomic_read(&stats->tx_err),
-	    atomic_read(&stats->rx),atomic_read(&stats->rx_err),
-	    atomic_read(&stats->rx_drop));
+		   atomic_read(&stats->tx), atomic_read(&stats->tx_err),
+		   atomic_read(&stats->rx), atomic_read(&stats->rx_err),
+		   atomic_read(&stats->rx_drop));
 }
 
 static void atm_dev_info(struct seq_file *seq, const struct atm_dev *dev)
@@ -151,8 +151,8 @@ static void *vcc_seq_next(struct seq_file *seq, void *v, loff_t *pos)
 
 static void pvc_info(struct seq_file *seq, struct atm_vcc *vcc)
 {
-	static const char *const class_name[] =
-		{"off","UBR","CBR","VBR","ABR"};
+	static const char *const class_name[] = {
+		"off", "UBR", "CBR", "VBR", "ABR"};
 	static const char *const aal_name[] = {
 		"---",	"1",	"2",	"3/4",	/*  0- 3 */
 		"???",	"5",	"???",	"???",	/*  4- 7 */
@@ -160,11 +160,12 @@ static void pvc_info(struct seq_file *seq, struct atm_vcc *vcc)
 		"???",	"0",	"???",	"???"};	/* 12-15 */
 
 	seq_printf(seq, "%3d %3d %5d %-3s %7d %-5s %7d %-6s",
-	    vcc->dev->number,vcc->vpi,vcc->vci,
-	    vcc->qos.aal >= ARRAY_SIZE(aal_name) ? "err" :
-	    aal_name[vcc->qos.aal],vcc->qos.rxtp.min_pcr,
-	    class_name[vcc->qos.rxtp.traffic_class],vcc->qos.txtp.min_pcr,
-	    class_name[vcc->qos.txtp.traffic_class]);
+		   vcc->dev->number, vcc->vpi, vcc->vci,
+		   vcc->qos.aal >= ARRAY_SIZE(aal_name) ? "err" :
+		   aal_name[vcc->qos.aal], vcc->qos.rxtp.min_pcr,
+		   class_name[vcc->qos.rxtp.traffic_class],
+		   vcc->qos.txtp.min_pcr,
+		   class_name[vcc->qos.txtp.traffic_class]);
 	if (test_bit(ATM_VF_IS_CLIP, &vcc->flags)) {
 		struct clip_vcc *clip_vcc = CLIP_VCC(vcc);
 		struct net_device *dev;
@@ -195,19 +196,20 @@ static void vcc_info(struct seq_file *seq, struct atm_vcc *vcc)
 		seq_printf(seq, "%3d %3d %5d ", vcc->dev->number, vcc->vpi,
 			vcc->vci);
 	switch (sk->sk_family) {
-		case AF_ATMPVC:
-			seq_printf(seq, "PVC");
-			break;
-		case AF_ATMSVC:
-			seq_printf(seq, "SVC");
-			break;
-		default:
-			seq_printf(seq, "%3d", sk->sk_family);
+	case AF_ATMPVC:
+		seq_printf(seq, "PVC");
+		break;
+	case AF_ATMSVC:
+		seq_printf(seq, "SVC");
+		break;
+	default:
+		seq_printf(seq, "%3d", sk->sk_family);
 	}
-	seq_printf(seq, " %04lx  %5d %7d/%7d %7d/%7d [%d]\n", vcc->flags, sk->sk_err,
-		  sk_wmem_alloc_get(sk), sk->sk_sndbuf,
-		  sk_rmem_alloc_get(sk), sk->sk_rcvbuf,
-		  atomic_read(&sk->sk_refcnt));
+	seq_printf(seq, " %04lx  %5d %7d/%7d %7d/%7d [%d]\n",
+		   vcc->flags, sk->sk_err,
+		   sk_wmem_alloc_get(sk), sk->sk_sndbuf,
+		   sk_rmem_alloc_get(sk), sk->sk_rcvbuf,
+		   atomic_read(&sk->sk_refcnt));
 }
 
 static void svc_info(struct seq_file *seq, struct atm_vcc *vcc)
@@ -376,32 +378,35 @@ static ssize_t proc_dev_atm_read(struct file *file, char __user *buf,
 	unsigned long page;
 	int length;
 
-	if (count == 0) return 0;
+	if (count == 0)
+		return 0;
 	page = get_zeroed_page(GFP_KERNEL);
-	if (!page) return -ENOMEM;
+	if (!page)
+		return -ENOMEM;
 	dev = PDE(file->f_path.dentry->d_inode)->data;
 	if (!dev->ops->proc_read)
 		length = -EINVAL;
 	else {
-		length = dev->ops->proc_read(dev,pos,(char *) page);
-		if (length > count) length = -EINVAL;
+		length = dev->ops->proc_read(dev, pos, (char *)page);
+		if (length > count)
+			length = -EINVAL;
 	}
 	if (length >= 0) {
-		if (copy_to_user(buf,(char *) page,length)) length = -EFAULT;
+		if (copy_to_user(buf, (char *)page, length))
+			length = -EFAULT;
 		(*pos)++;
 	}
 	free_page(page);
 	return length;
 }
 
-
 struct proc_dir_entry *atm_proc_root;
 EXPORT_SYMBOL(atm_proc_root);
 
 
 int atm_proc_dev_register(struct atm_dev *dev)
 {
-	int digits,num;
+	int digits, num;
 	int error;
 
 	/* No proc info */
@@ -410,26 +415,28 @@ int atm_proc_dev_register(struct atm_dev *dev)
 
 	error = -ENOMEM;
 	digits = 0;
-	for (num = dev->number; num; num /= 10) digits++;
-	if (!digits) digits++;
+	for (num = dev->number; num; num /= 10)
+		digits++;
+	if (!digits)
+		digits++;
 
 	dev->proc_name = kmalloc(strlen(dev->type) + digits + 2, GFP_KERNEL);
 	if (!dev->proc_name)
 		goto err_out;
-	sprintf(dev->proc_name,"%s:%d",dev->type, dev->number);
+	sprintf(dev->proc_name, "%s:%d", dev->type, dev->number);
 
 	dev->proc_entry = proc_create_data(dev->proc_name, 0, atm_proc_root,
 					   &proc_atm_dev_ops, dev);
 	if (!dev->proc_entry)
 		goto err_free_name;
 	return 0;
+
 err_free_name:
 	kfree(dev->proc_name);
 err_out:
 	return error;
 }
 
-
 void atm_proc_dev_deregister(struct atm_dev *dev)
 {
 	if (!dev->ops->proc_read)
-- 
cgit v1.2.2


From 6b6dd498f409463635c4f29a3a49585bc7e5d43e Mon Sep 17 00:00:00 2001
From: Joe Perches <joe@perches.com>
Date: Tue, 26 Jan 2010 11:40:14 +0000
Subject: net/atm/pvc.c: checkpatch cleanups

Spacing cleanups
Mostly 80 column wrapped.
Move trailing statements to new lines

Signed-off-by: Joe Perches <joe@perches.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/atm/pvc.c | 43 +++++++++++++++++++++----------------------
 1 file changed, 21 insertions(+), 22 deletions(-)

(limited to 'net')

diff --git a/net/atm/pvc.c b/net/atm/pvc.c
index 8d74e62b0d79..437ee70c5e62 100644
--- a/net/atm/pvc.c
+++ b/net/atm/pvc.c
@@ -17,32 +17,35 @@
 #include "common.h"		/* common for PVCs and SVCs */
 
 
-static int pvc_shutdown(struct socket *sock,int how)
+static int pvc_shutdown(struct socket *sock, int how)
 {
 	return 0;
 }
 
-
-static int pvc_bind(struct socket *sock,struct sockaddr *sockaddr,
-    int sockaddr_len)
+static int pvc_bind(struct socket *sock, struct sockaddr *sockaddr,
+		    int sockaddr_len)
 {
 	struct sock *sk = sock->sk;
 	struct sockaddr_atmpvc *addr;
 	struct atm_vcc *vcc;
 	int error;
 
-	if (sockaddr_len != sizeof(struct sockaddr_atmpvc)) return -EINVAL;
-	addr = (struct sockaddr_atmpvc *) sockaddr;
-	if (addr->sap_family != AF_ATMPVC) return -EAFNOSUPPORT;
+	if (sockaddr_len != sizeof(struct sockaddr_atmpvc))
+		return -EINVAL;
+	addr = (struct sockaddr_atmpvc *)sockaddr;
+	if (addr->sap_family != AF_ATMPVC)
+		return -EAFNOSUPPORT;
 	lock_sock(sk);
 	vcc = ATM_SD(sock);
 	if (!test_bit(ATM_VF_HASQOS, &vcc->flags)) {
 		error = -EBADFD;
 		goto out;
 	}
-	if (test_bit(ATM_VF_PARTIAL,&vcc->flags)) {
-		if (vcc->vpi != ATM_VPI_UNSPEC) addr->sap_addr.vpi = vcc->vpi;
-		if (vcc->vci != ATM_VCI_UNSPEC) addr->sap_addr.vci = vcc->vci;
+	if (test_bit(ATM_VF_PARTIAL, &vcc->flags)) {
+		if (vcc->vpi != ATM_VPI_UNSPEC)
+			addr->sap_addr.vpi = vcc->vpi;
+		if (vcc->vci != ATM_VCI_UNSPEC)
+			addr->sap_addr.vci = vcc->vci;
 	}
 	error = vcc_connect(sock, addr->sap_addr.itf, addr->sap_addr.vpi,
 			    addr->sap_addr.vci);
@@ -51,11 +54,10 @@ out:
 	return error;
 }
 
-
-static int pvc_connect(struct socket *sock,struct sockaddr *sockaddr,
-    int sockaddr_len,int flags)
+static int pvc_connect(struct socket *sock, struct sockaddr *sockaddr,
+		       int sockaddr_len, int flags)
 {
-	return pvc_bind(sock,sockaddr,sockaddr_len);
+	return pvc_bind(sock, sockaddr, sockaddr_len);
 }
 
 static int pvc_setsockopt(struct socket *sock, int level, int optname,
@@ -70,7 +72,6 @@ static int pvc_setsockopt(struct socket *sock, int level, int optname,
 	return error;
 }
 
-
 static int pvc_getsockopt(struct socket *sock, int level, int optname,
 			  char __user *optval, int __user *optlen)
 {
@@ -83,16 +84,16 @@ static int pvc_getsockopt(struct socket *sock, int level, int optname,
 	return error;
 }
 
-
-static int pvc_getname(struct socket *sock,struct sockaddr *sockaddr,
-    int *sockaddr_len,int peer)
+static int pvc_getname(struct socket *sock, struct sockaddr *sockaddr,
+		       int *sockaddr_len, int peer)
 {
 	struct sockaddr_atmpvc *addr;
 	struct atm_vcc *vcc = ATM_SD(sock);
 
-	if (!vcc->dev || !test_bit(ATM_VF_ADDR,&vcc->flags)) return -ENOTCONN;
+	if (!vcc->dev || !test_bit(ATM_VF_ADDR, &vcc->flags))
+		return -ENOTCONN;
 	*sockaddr_len = sizeof(struct sockaddr_atmpvc);
-	addr = (struct sockaddr_atmpvc *) sockaddr;
+	addr = (struct sockaddr_atmpvc *)sockaddr;
 	addr->sap_family = AF_ATMPVC;
 	addr->sap_addr.itf = vcc->dev->number;
 	addr->sap_addr.vpi = vcc->vpi;
@@ -100,7 +101,6 @@ static int pvc_getname(struct socket *sock,struct sockaddr *sockaddr,
 	return 0;
 }
 
-
 static const struct proto_ops pvc_proto_ops = {
 	.family =	PF_ATMPVC,
 	.owner =	THIS_MODULE,
@@ -137,7 +137,6 @@ static int pvc_create(struct net *net, struct socket *sock, int protocol,
 	return vcc_create(net, sock, protocol, PF_ATMPVC);
 }
 
-
 static const struct net_proto_family pvc_family_ops = {
 	.family = PF_ATMPVC,
 	.create = pvc_create,
-- 
cgit v1.2.2


From fa61f0cac8b91e671eb0ba27a4972c7e72a909a6 Mon Sep 17 00:00:00 2001
From: Joe Perches <joe@perches.com>
Date: Tue, 26 Jan 2010 11:40:15 +0000
Subject: net/atm/raw.c: checkpatch cleanups

Spacing cleanups
Mostly 80 column wrapped.

Signed-off-by: Joe Perches <joe@perches.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/atm/raw.c | 21 +++++++--------------
 1 file changed, 7 insertions(+), 14 deletions(-)

(limited to 'net')

diff --git a/net/atm/raw.c b/net/atm/raw.c
index fb8a9497653c..d0c4bd047dc4 100644
--- a/net/atm/raw.c
+++ b/net/atm/raw.c
@@ -18,7 +18,7 @@
  * SKB == NULL indicates that the link is being closed
  */
 
-static void atm_push_raw(struct atm_vcc *vcc,struct sk_buff *skb)
+static void atm_push_raw(struct atm_vcc *vcc, struct sk_buff *skb)
 {
 	if (skb) {
 		struct sock *sk = sk_atm(vcc);
@@ -28,8 +28,7 @@ static void atm_push_raw(struct atm_vcc *vcc,struct sk_buff *skb)
 	}
 }
 
-
-static void atm_pop_raw(struct atm_vcc *vcc,struct sk_buff *skb)
+static void atm_pop_raw(struct atm_vcc *vcc, struct sk_buff *skb)
 {
 	struct sock *sk = sk_atm(vcc);
 
@@ -40,24 +39,22 @@ static void atm_pop_raw(struct atm_vcc *vcc,struct sk_buff *skb)
 	sk->sk_write_space(sk);
 }
 
-
-static int atm_send_aal0(struct atm_vcc *vcc,struct sk_buff *skb)
+static int atm_send_aal0(struct atm_vcc *vcc, struct sk_buff *skb)
 {
 	/*
 	 * Note that if vpi/vci are _ANY or _UNSPEC the below will
 	 * still work
 	 */
 	if (!capable(CAP_NET_ADMIN) &&
-	    (((u32 *) skb->data)[0] & (ATM_HDR_VPI_MASK | ATM_HDR_VCI_MASK)) !=
-	    ((vcc->vpi << ATM_HDR_VPI_SHIFT) | (vcc->vci << ATM_HDR_VCI_SHIFT)))
-	    {
+	    (((u32 *)skb->data)[0] & (ATM_HDR_VPI_MASK | ATM_HDR_VCI_MASK)) !=
+	    ((vcc->vpi << ATM_HDR_VPI_SHIFT) |
+	     (vcc->vci << ATM_HDR_VCI_SHIFT))) {
 		kfree_skb(skb);
 		return -EADDRNOTAVAIL;
 	}
-	return vcc->dev->ops->send(vcc,skb);
+	return vcc->dev->ops->send(vcc, skb);
 }
 
-
 int atm_init_aal0(struct atm_vcc *vcc)
 {
 	vcc->push = atm_push_raw;
@@ -67,7 +64,6 @@ int atm_init_aal0(struct atm_vcc *vcc)
 	return 0;
 }
 
-
 int atm_init_aal34(struct atm_vcc *vcc)
 {
 	vcc->push = atm_push_raw;
@@ -77,7 +73,6 @@ int atm_init_aal34(struct atm_vcc *vcc)
 	return 0;
 }
 
-
 int atm_init_aal5(struct atm_vcc *vcc)
 {
 	vcc->push = atm_push_raw;
@@ -86,6 +81,4 @@ int atm_init_aal5(struct atm_vcc *vcc)
 	vcc->send = vcc->dev->ops->send;
 	return 0;
 }
-
-
 EXPORT_SYMBOL(atm_init_aal5);
-- 
cgit v1.2.2


From 07b54c9ad4804bdd546d3db0889b0db381c726e3 Mon Sep 17 00:00:00 2001
From: Joe Perches <joe@perches.com>
Date: Tue, 26 Jan 2010 11:40:16 +0000
Subject: net/atm/resources.c: checkpatch cleanups

Spacing cleanups
Mostly 80 column wrapped.
Move embedded assigns out of tests
Move trailing statements to new lines
switch/case cleanups

Signed-off-by: Joe Perches <joe@perches.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/atm/resources.c | 390 ++++++++++++++++++++++++++--------------------------
 1 file changed, 192 insertions(+), 198 deletions(-)

(limited to 'net')

diff --git a/net/atm/resources.c b/net/atm/resources.c
index 0d4c0ee090db..447ed89205d8 100644
--- a/net/atm/resources.c
+++ b/net/atm/resources.c
@@ -71,7 +71,7 @@ struct atm_dev *atm_dev_lookup(int number)
 	mutex_unlock(&atm_dev_mutex);
 	return dev;
 }
-
+EXPORT_SYMBOL(atm_dev_lookup);
 
 struct atm_dev *atm_dev_register(const char *type, const struct atmdev_ops *ops,
 				 int number, unsigned long *flags)
@@ -85,7 +85,8 @@ struct atm_dev *atm_dev_register(const char *type, const struct atmdev_ops *ops,
 	}
 	mutex_lock(&atm_dev_mutex);
 	if (number != -1) {
-		if ((inuse = __atm_dev_lookup(number))) {
+		inuse = __atm_dev_lookup(number);
+		if (inuse) {
 			atm_dev_put(inuse);
 			mutex_unlock(&atm_dev_mutex);
 			kfree(dev);
@@ -130,7 +131,7 @@ out_fail:
 	dev = NULL;
 	goto out;
 }
-
+EXPORT_SYMBOL(atm_dev_register);
 
 void atm_dev_deregister(struct atm_dev *dev)
 {
@@ -152,7 +153,7 @@ void atm_dev_deregister(struct atm_dev *dev)
 
 	atm_dev_put(dev);
 }
-
+EXPORT_SYMBOL(atm_dev_deregister);
 
 static void copy_aal_stats(struct k_atm_aal_stats *from,
     struct atm_aal_stats *to)
@@ -162,7 +163,6 @@ static void copy_aal_stats(struct k_atm_aal_stats *from,
 #undef __HANDLE_ITEM
 }
 
-
 static void subtract_aal_stats(struct k_atm_aal_stats *from,
     struct atm_aal_stats *to)
 {
@@ -171,8 +171,8 @@ static void subtract_aal_stats(struct k_atm_aal_stats *from,
 #undef __HANDLE_ITEM
 }
 
-
-static int fetch_stats(struct atm_dev *dev, struct atm_dev_stats __user *arg, int zero)
+static int fetch_stats(struct atm_dev *dev, struct atm_dev_stats __user *arg,
+		       int zero)
 {
 	struct atm_dev_stats tmp;
 	int error = 0;
@@ -190,7 +190,6 @@ static int fetch_stats(struct atm_dev *dev, struct atm_dev_stats __user *arg, in
 	return error ? -EFAULT : 0;
 }
 
-
 int atm_dev_ioctl(unsigned int cmd, void __user *arg, int compat)
 {
 	void __user *buf;
@@ -206,50 +205,49 @@ int atm_dev_ioctl(unsigned int cmd, void __user *arg, int compat)
 #endif
 
 	switch (cmd) {
-		case ATM_GETNAMES:
-
-			if (compat) {
+	case ATM_GETNAMES:
+		if (compat) {
 #ifdef CONFIG_COMPAT
-				struct compat_atm_iobuf __user *ciobuf = arg;
-				compat_uptr_t cbuf;
-				iobuf_len = &ciobuf->length;
-				if (get_user(cbuf, &ciobuf->buffer))
-					return -EFAULT;
-				buf = compat_ptr(cbuf);
+			struct compat_atm_iobuf __user *ciobuf = arg;
+			compat_uptr_t cbuf;
+			iobuf_len = &ciobuf->length;
+			if (get_user(cbuf, &ciobuf->buffer))
+				return -EFAULT;
+			buf = compat_ptr(cbuf);
 #endif
-			} else {
-				struct atm_iobuf __user *iobuf = arg;
-				iobuf_len = &iobuf->length;
-				if (get_user(buf, &iobuf->buffer))
-					return -EFAULT;
-			}
-			if (get_user(len, iobuf_len))
+		} else {
+			struct atm_iobuf __user *iobuf = arg;
+			iobuf_len = &iobuf->length;
+			if (get_user(buf, &iobuf->buffer))
 				return -EFAULT;
-			mutex_lock(&atm_dev_mutex);
-			list_for_each(p, &atm_devs)
-				size += sizeof(int);
-			if (size > len) {
-				mutex_unlock(&atm_dev_mutex);
-				return -E2BIG;
-			}
-			tmp_buf = kmalloc(size, GFP_ATOMIC);
-			if (!tmp_buf) {
-				mutex_unlock(&atm_dev_mutex);
-				return -ENOMEM;
-			}
-			tmp_p = tmp_buf;
-			list_for_each(p, &atm_devs) {
-				dev = list_entry(p, struct atm_dev, dev_list);
-				*tmp_p++ = dev->number;
-			}
+		}
+		if (get_user(len, iobuf_len))
+			return -EFAULT;
+		mutex_lock(&atm_dev_mutex);
+		list_for_each(p, &atm_devs)
+			size += sizeof(int);
+		if (size > len) {
 			mutex_unlock(&atm_dev_mutex);
-			error = ((copy_to_user(buf, tmp_buf, size)) ||
-					put_user(size, iobuf_len))
-						? -EFAULT : 0;
-			kfree(tmp_buf);
-			return error;
-		default:
-			break;
+			return -E2BIG;
+		}
+		tmp_buf = kmalloc(size, GFP_ATOMIC);
+		if (!tmp_buf) {
+			mutex_unlock(&atm_dev_mutex);
+			return -ENOMEM;
+		}
+		tmp_p = tmp_buf;
+		list_for_each(p, &atm_devs) {
+			dev = list_entry(p, struct atm_dev, dev_list);
+			*tmp_p++ = dev->number;
+		}
+		mutex_unlock(&atm_dev_mutex);
+		error = ((copy_to_user(buf, tmp_buf, size)) ||
+			 put_user(size, iobuf_len))
+			? -EFAULT : 0;
+		kfree(tmp_buf);
+		return error;
+	default:
+		break;
 	}
 
 	if (compat) {
@@ -278,166 +276,167 @@ int atm_dev_ioctl(unsigned int cmd, void __user *arg, int compat)
 		if (get_user(number, &sioc->number))
 			return -EFAULT;
 	}
-	if (!(dev = try_then_request_module(atm_dev_lookup(number),
-					    "atm-device-%d", number)))
+
+	dev = try_then_request_module(atm_dev_lookup(number), "atm-device-%d",
+				      number);
+	if (!dev)
 		return -ENODEV;
 
 	switch (cmd) {
-		case ATM_GETTYPE:
-			size = strlen(dev->type) + 1;
-			if (copy_to_user(buf, dev->type, size)) {
-				error = -EFAULT;
-				goto done;
-			}
-			break;
-		case ATM_GETESI:
-			size = ESI_LEN;
-			if (copy_to_user(buf, dev->esi, size)) {
-				error = -EFAULT;
-				goto done;
-			}
-			break;
-		case ATM_SETESI:
-			{
-				int i;
-
-				for (i = 0; i < ESI_LEN; i++)
-					if (dev->esi[i]) {
-						error = -EEXIST;
-						goto done;
-					}
-			}
-			/* fall through */
-		case ATM_SETESIF:
-			{
-				unsigned char esi[ESI_LEN];
-
-				if (!capable(CAP_NET_ADMIN)) {
-					error = -EPERM;
-					goto done;
-				}
-				if (copy_from_user(esi, buf, ESI_LEN)) {
-					error = -EFAULT;
-					goto done;
-				}
-				memcpy(dev->esi, esi, ESI_LEN);
-				error =  ESI_LEN;
-				goto done;
-			}
-		case ATM_GETSTATZ:
-			if (!capable(CAP_NET_ADMIN)) {
-				error = -EPERM;
-				goto done;
-			}
-			/* fall through */
-		case ATM_GETSTAT:
-			size = sizeof(struct atm_dev_stats);
-			error = fetch_stats(dev, buf, cmd == ATM_GETSTATZ);
-			if (error)
-				goto done;
-			break;
-		case ATM_GETCIRANGE:
-			size = sizeof(struct atm_cirange);
-			if (copy_to_user(buf, &dev->ci_range, size)) {
-				error = -EFAULT;
-				goto done;
-			}
-			break;
-		case ATM_GETLINKRATE:
-			size = sizeof(int);
-			if (copy_to_user(buf, &dev->link_rate, size)) {
-				error = -EFAULT;
-				goto done;
-			}
-			break;
-		case ATM_RSTADDR:
-			if (!capable(CAP_NET_ADMIN)) {
-				error = -EPERM;
-				goto done;
-			}
-			atm_reset_addr(dev, ATM_ADDR_LOCAL);
-			break;
-		case ATM_ADDADDR:
-		case ATM_DELADDR:
-		case ATM_ADDLECSADDR:
-		case ATM_DELLECSADDR:
-			if (!capable(CAP_NET_ADMIN)) {
-				error = -EPERM;
-				goto done;
-			}
-			{
-				struct sockaddr_atmsvc addr;
-
-				if (copy_from_user(&addr, buf, sizeof(addr))) {
-					error = -EFAULT;
-					goto done;
-				}
-				if (cmd == ATM_ADDADDR || cmd == ATM_ADDLECSADDR)
-					error = atm_add_addr(dev, &addr,
-							     (cmd == ATM_ADDADDR ?
-							      ATM_ADDR_LOCAL : ATM_ADDR_LECS));
-				else
-					error = atm_del_addr(dev, &addr,
-							     (cmd == ATM_DELADDR ?
-							      ATM_ADDR_LOCAL : ATM_ADDR_LECS));
+	case ATM_GETTYPE:
+		size = strlen(dev->type) + 1;
+		if (copy_to_user(buf, dev->type, size)) {
+			error = -EFAULT;
+			goto done;
+		}
+		break;
+	case ATM_GETESI:
+		size = ESI_LEN;
+		if (copy_to_user(buf, dev->esi, size)) {
+			error = -EFAULT;
+			goto done;
+		}
+		break;
+	case ATM_SETESI:
+	{
+		int i;
+
+		for (i = 0; i < ESI_LEN; i++)
+			if (dev->esi[i]) {
+				error = -EEXIST;
 				goto done;
 			}
-		case ATM_GETADDR:
-		case ATM_GETLECSADDR:
-			error = atm_get_addr(dev, buf, len,
-					     (cmd == ATM_GETADDR ?
+	}
+	/* fall through */
+	case ATM_SETESIF:
+	{
+		unsigned char esi[ESI_LEN];
+
+		if (!capable(CAP_NET_ADMIN)) {
+			error = -EPERM;
+			goto done;
+		}
+		if (copy_from_user(esi, buf, ESI_LEN)) {
+			error = -EFAULT;
+			goto done;
+		}
+		memcpy(dev->esi, esi, ESI_LEN);
+		error =  ESI_LEN;
+		goto done;
+	}
+	case ATM_GETSTATZ:
+		if (!capable(CAP_NET_ADMIN)) {
+			error = -EPERM;
+			goto done;
+		}
+		/* fall through */
+	case ATM_GETSTAT:
+		size = sizeof(struct atm_dev_stats);
+		error = fetch_stats(dev, buf, cmd == ATM_GETSTATZ);
+		if (error)
+			goto done;
+		break;
+	case ATM_GETCIRANGE:
+		size = sizeof(struct atm_cirange);
+		if (copy_to_user(buf, &dev->ci_range, size)) {
+			error = -EFAULT;
+			goto done;
+		}
+		break;
+	case ATM_GETLINKRATE:
+		size = sizeof(int);
+		if (copy_to_user(buf, &dev->link_rate, size)) {
+			error = -EFAULT;
+			goto done;
+		}
+		break;
+	case ATM_RSTADDR:
+		if (!capable(CAP_NET_ADMIN)) {
+			error = -EPERM;
+			goto done;
+		}
+		atm_reset_addr(dev, ATM_ADDR_LOCAL);
+		break;
+	case ATM_ADDADDR:
+	case ATM_DELADDR:
+	case ATM_ADDLECSADDR:
+	case ATM_DELLECSADDR:
+	{
+		struct sockaddr_atmsvc addr;
+
+		if (!capable(CAP_NET_ADMIN)) {
+			error = -EPERM;
+			goto done;
+		}
+
+		if (copy_from_user(&addr, buf, sizeof(addr))) {
+			error = -EFAULT;
+			goto done;
+		}
+		if (cmd == ATM_ADDADDR || cmd == ATM_ADDLECSADDR)
+			error = atm_add_addr(dev, &addr,
+					     (cmd == ATM_ADDADDR ?
 					      ATM_ADDR_LOCAL : ATM_ADDR_LECS));
-			if (error < 0)
-				goto done;
-			size = error;
-			/* may return 0, but later on size == 0 means "don't
-			   write the length" */
-			error = put_user(size, sioc_len)
-				? -EFAULT : 0;
+		else
+			error = atm_del_addr(dev, &addr,
+					     (cmd == ATM_DELADDR ?
+					      ATM_ADDR_LOCAL : ATM_ADDR_LECS));
+		goto done;
+	}
+	case ATM_GETADDR:
+	case ATM_GETLECSADDR:
+		error = atm_get_addr(dev, buf, len,
+				     (cmd == ATM_GETADDR ?
+				      ATM_ADDR_LOCAL : ATM_ADDR_LECS));
+		if (error < 0)
+			goto done;
+		size = error;
+		/* may return 0, but later on size == 0 means "don't
+		   write the length" */
+		error = put_user(size, sioc_len) ? -EFAULT : 0;
+		goto done;
+	case ATM_SETLOOP:
+		if (__ATM_LM_XTRMT((int) (unsigned long) buf) &&
+		    __ATM_LM_XTLOC((int) (unsigned long) buf) >
+		    __ATM_LM_XTRMT((int) (unsigned long) buf)) {
+			error = -EINVAL;
 			goto done;
-		case ATM_SETLOOP:
-			if (__ATM_LM_XTRMT((int) (unsigned long) buf) &&
-			    __ATM_LM_XTLOC((int) (unsigned long) buf) >
-			    __ATM_LM_XTRMT((int) (unsigned long) buf)) {
+		}
+		/* fall through */
+	case ATM_SETCIRANGE:
+	case SONET_GETSTATZ:
+	case SONET_SETDIAG:
+	case SONET_CLRDIAG:
+	case SONET_SETFRAMING:
+		if (!capable(CAP_NET_ADMIN)) {
+			error = -EPERM;
+			goto done;
+		}
+		/* fall through */
+	default:
+		if (compat) {
+#ifdef CONFIG_COMPAT
+			if (!dev->ops->compat_ioctl) {
 				error = -EINVAL;
 				goto done;
 			}
-			/* fall through */
-		case ATM_SETCIRANGE:
-		case SONET_GETSTATZ:
-		case SONET_SETDIAG:
-		case SONET_CLRDIAG:
-		case SONET_SETFRAMING:
-			if (!capable(CAP_NET_ADMIN)) {
-				error = -EPERM;
-				goto done;
-			}
-			/* fall through */
-		default:
-			if (compat) {
-#ifdef CONFIG_COMPAT
-				if (!dev->ops->compat_ioctl) {
-					error = -EINVAL;
-					goto done;
-				}
-				size = dev->ops->compat_ioctl(dev, cmd, buf);
+			size = dev->ops->compat_ioctl(dev, cmd, buf);
 #endif
-			} else {
-				if (!dev->ops->ioctl) {
-					error = -EINVAL;
-					goto done;
-				}
-				size = dev->ops->ioctl(dev, cmd, buf);
-			}
-			if (size < 0) {
-				error = (size == -ENOIOCTLCMD ? -EINVAL : size);
+		} else {
+			if (!dev->ops->ioctl) {
+				error = -EINVAL;
 				goto done;
 			}
+			size = dev->ops->ioctl(dev, cmd, buf);
+		}
+		if (size < 0) {
+			error = (size == -ENOIOCTLCMD ? -EINVAL : size);
+			goto done;
+		}
 	}
 
 	if (size)
-		error = put_user(size, sioc_len)
-			? -EFAULT : 0;
+		error = put_user(size, sioc_len) ? -EFAULT : 0;
 	else
 		error = 0;
 done:
@@ -445,7 +444,7 @@ done:
 	return error;
 }
 
-static __inline__ void *dev_get_idx(loff_t left)
+static inline void *dev_get_idx(loff_t left)
 {
 	struct list_head *p;
 
@@ -474,8 +473,3 @@ void *atm_dev_seq_next(struct seq_file *seq, void *v, loff_t *pos)
 		? atm_devs.next : ((struct list_head *)v)->next;
 	return (v == &atm_devs) ? NULL : v;
 }
-
-
-EXPORT_SYMBOL(atm_dev_register);
-EXPORT_SYMBOL(atm_dev_deregister);
-EXPORT_SYMBOL(atm_dev_lookup);
-- 
cgit v1.2.2


From 0ec96e656fcd4a8ea8cb0e92f90ab4bb6f79cada Mon Sep 17 00:00:00 2001
From: Joe Perches <joe@perches.com>
Date: Tue, 26 Jan 2010 11:40:17 +0000
Subject: net/atm/signaling.c: checkpatch cleanups

Mostly 80 column wrapped.
Move embedded assigns out of tests
Move trailing statements to new lines
switch/case cleanups

Signed-off-by: Joe Perches <joe@perches.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/atm/signaling.c | 202 ++++++++++++++++++++++++++--------------------------
 1 file changed, 101 insertions(+), 101 deletions(-)

(limited to 'net')

diff --git a/net/atm/signaling.c b/net/atm/signaling.c
index 28df4edf9ca4..ad1d28ae512b 100644
--- a/net/atm/signaling.c
+++ b/net/atm/signaling.c
@@ -18,7 +18,6 @@
 #include "resources.h"
 #include "signaling.h"
 
-
 #undef WAIT_FOR_DEMON		/* #define this if system calls on SVC sockets
 				   should block until the demon runs.
 				   Danger: may cause nasty hangs if the demon
@@ -29,20 +28,19 @@ struct atm_vcc *sigd = NULL;
 static DECLARE_WAIT_QUEUE_HEAD(sigd_sleep);
 #endif
 
-
 static void sigd_put_skb(struct sk_buff *skb)
 {
 #ifdef WAIT_FOR_DEMON
-	DECLARE_WAITQUEUE(wait,current);
+	DECLARE_WAITQUEUE(wait, current);
 
-	add_wait_queue(&sigd_sleep,&wait);
+	add_wait_queue(&sigd_sleep, &wait);
 	while (!sigd) {
 		set_current_state(TASK_UNINTERRUPTIBLE);
 		pr_debug("atmsvc: waiting for signaling daemon...\n");
 		schedule();
 	}
 	current->state = TASK_RUNNING;
-	remove_wait_queue(&sigd_sleep,&wait);
+	remove_wait_queue(&sigd_sleep, &wait);
 #else
 	if (!sigd) {
 		pr_debug("atmsvc: no signaling daemon\n");
@@ -50,39 +48,39 @@ static void sigd_put_skb(struct sk_buff *skb)
 		return;
 	}
 #endif
-	atm_force_charge(sigd,skb->truesize);
-	skb_queue_tail(&sk_atm(sigd)->sk_receive_queue,skb);
+	atm_force_charge(sigd, skb->truesize);
+	skb_queue_tail(&sk_atm(sigd)->sk_receive_queue, skb);
 	sk_atm(sigd)->sk_data_ready(sk_atm(sigd), skb->len);
 }
 
-
-static void modify_qos(struct atm_vcc *vcc,struct atmsvc_msg *msg)
+static void modify_qos(struct atm_vcc *vcc, struct atmsvc_msg *msg)
 {
 	struct sk_buff *skb;
 
-	if (test_bit(ATM_VF_RELEASED,&vcc->flags) ||
-	    !test_bit(ATM_VF_READY,&vcc->flags))
+	if (test_bit(ATM_VF_RELEASED, &vcc->flags) ||
+	    !test_bit(ATM_VF_READY, &vcc->flags))
 		return;
 	msg->type = as_error;
-	if (!vcc->dev->ops->change_qos) msg->reply = -EOPNOTSUPP;
+	if (!vcc->dev->ops->change_qos)
+		msg->reply = -EOPNOTSUPP;
 	else {
 		/* should lock VCC */
-		msg->reply = vcc->dev->ops->change_qos(vcc,&msg->qos,
-		    msg->reply);
-		if (!msg->reply) msg->type = as_okay;
+		msg->reply = vcc->dev->ops->change_qos(vcc, &msg->qos,
+						       msg->reply);
+		if (!msg->reply)
+			msg->type = as_okay;
 	}
 	/*
 	 * Should probably just turn around the old skb. But the, the buffer
 	 * space accounting needs to follow the change too. Maybe later.
 	 */
-	while (!(skb = alloc_skb(sizeof(struct atmsvc_msg),GFP_KERNEL)))
+	while (!(skb = alloc_skb(sizeof(struct atmsvc_msg), GFP_KERNEL)))
 		schedule();
-	*(struct atmsvc_msg *) skb_put(skb,sizeof(struct atmsvc_msg)) = *msg;
+	*(struct atmsvc_msg *)skb_put(skb, sizeof(struct atmsvc_msg)) = *msg;
 	sigd_put_skb(skb);
 }
 
-
-static int sigd_send(struct atm_vcc *vcc,struct sk_buff *skb)
+static int sigd_send(struct atm_vcc *vcc, struct sk_buff *skb)
 {
 	struct atmsvc_msg *msg;
 	struct atm_vcc *session_vcc;
@@ -95,63 +93,64 @@ static int sigd_send(struct atm_vcc *vcc,struct sk_buff *skb)
 	sk = sk_atm(vcc);
 
 	switch (msg->type) {
-		case as_okay:
-			sk->sk_err = -msg->reply;
-			clear_bit(ATM_VF_WAITING, &vcc->flags);
-			if (!*vcc->local.sas_addr.prv &&
-			    !*vcc->local.sas_addr.pub) {
-				vcc->local.sas_family = AF_ATMSVC;
-				memcpy(vcc->local.sas_addr.prv,
-				    msg->local.sas_addr.prv,ATM_ESA_LEN);
-				memcpy(vcc->local.sas_addr.pub,
-				    msg->local.sas_addr.pub,ATM_E164_LEN+1);
-			}
-			session_vcc = vcc->session ? vcc->session : vcc;
-			if (session_vcc->vpi || session_vcc->vci) break;
-			session_vcc->itf = msg->pvc.sap_addr.itf;
-			session_vcc->vpi = msg->pvc.sap_addr.vpi;
-			session_vcc->vci = msg->pvc.sap_addr.vci;
-			if (session_vcc->vpi || session_vcc->vci)
-				session_vcc->qos = msg->qos;
-			break;
-		case as_error:
-			clear_bit(ATM_VF_REGIS,&vcc->flags);
-			clear_bit(ATM_VF_READY,&vcc->flags);
-			sk->sk_err = -msg->reply;
-			clear_bit(ATM_VF_WAITING, &vcc->flags);
+	case as_okay:
+		sk->sk_err = -msg->reply;
+		clear_bit(ATM_VF_WAITING, &vcc->flags);
+		if (!*vcc->local.sas_addr.prv && !*vcc->local.sas_addr.pub) {
+			vcc->local.sas_family = AF_ATMSVC;
+			memcpy(vcc->local.sas_addr.prv,
+			       msg->local.sas_addr.prv, ATM_ESA_LEN);
+			memcpy(vcc->local.sas_addr.pub,
+			       msg->local.sas_addr.pub, ATM_E164_LEN + 1);
+		}
+		session_vcc = vcc->session ? vcc->session : vcc;
+		if (session_vcc->vpi || session_vcc->vci)
 			break;
-		case as_indicate:
-			vcc = *(struct atm_vcc **) &msg->listen_vcc;
-			sk = sk_atm(vcc);
-			pr_debug("as_indicate!!!\n");
-			lock_sock(sk);
-			if (sk_acceptq_is_full(sk)) {
-				sigd_enq(NULL,as_reject,vcc,NULL,NULL);
-				dev_kfree_skb(skb);
-				goto as_indicate_complete;
-			}
-			sk->sk_ack_backlog++;
-			skb_queue_tail(&sk->sk_receive_queue, skb);
-			pr_debug("waking sk->sk_sleep 0x%p\n", sk->sk_sleep);
-			sk->sk_state_change(sk);
+		session_vcc->itf = msg->pvc.sap_addr.itf;
+		session_vcc->vpi = msg->pvc.sap_addr.vpi;
+		session_vcc->vci = msg->pvc.sap_addr.vci;
+		if (session_vcc->vpi || session_vcc->vci)
+			session_vcc->qos = msg->qos;
+		break;
+	case as_error:
+		clear_bit(ATM_VF_REGIS, &vcc->flags);
+		clear_bit(ATM_VF_READY, &vcc->flags);
+		sk->sk_err = -msg->reply;
+		clear_bit(ATM_VF_WAITING, &vcc->flags);
+		break;
+	case as_indicate:
+		vcc = *(struct atm_vcc **)&msg->listen_vcc;
+		sk = sk_atm(vcc);
+		pr_debug("as_indicate!!!\n");
+		lock_sock(sk);
+		if (sk_acceptq_is_full(sk)) {
+			sigd_enq(NULL, as_reject, vcc, NULL, NULL);
+			dev_kfree_skb(skb);
+			goto as_indicate_complete;
+		}
+		sk->sk_ack_backlog++;
+		skb_queue_tail(&sk->sk_receive_queue, skb);
+		pr_debug("waking sk->sk_sleep 0x%p\n", sk->sk_sleep);
+		sk->sk_state_change(sk);
 as_indicate_complete:
-			release_sock(sk);
-			return 0;
-		case as_close:
-			set_bit(ATM_VF_RELEASED,&vcc->flags);
-			vcc_release_async(vcc, msg->reply);
-			goto out;
-		case as_modify:
-			modify_qos(vcc,msg);
-			break;
-		case as_addparty:
-		case as_dropparty:
-			sk->sk_err_soft = msg->reply;	/* < 0 failure, otherwise ep_ref */
-			clear_bit(ATM_VF_WAITING, &vcc->flags);
-			break;
-		default:
-			pr_alert("bad message type %d\n", (int)msg->type);
-			return -EINVAL;
+		release_sock(sk);
+		return 0;
+	case as_close:
+		set_bit(ATM_VF_RELEASED, &vcc->flags);
+		vcc_release_async(vcc, msg->reply);
+		goto out;
+	case as_modify:
+		modify_qos(vcc, msg);
+		break;
+	case as_addparty:
+	case as_dropparty:
+		sk->sk_err_soft = msg->reply;
+					/* < 0 failure, otherwise ep_ref */
+		clear_bit(ATM_VF_WAITING, &vcc->flags);
+		break;
+	default:
+		pr_alert("bad message type %d\n", (int)msg->type);
+		return -EINVAL;
 	}
 	sk->sk_state_change(sk);
 out:
@@ -159,48 +158,52 @@ out:
 	return 0;
 }
 
-
-void sigd_enq2(struct atm_vcc *vcc,enum atmsvc_msg_type type,
-    struct atm_vcc *listen_vcc,const struct sockaddr_atmpvc *pvc,
-    const struct sockaddr_atmsvc *svc,const struct atm_qos *qos,int reply)
+void sigd_enq2(struct atm_vcc *vcc, enum atmsvc_msg_type type,
+	       struct atm_vcc *listen_vcc, const struct sockaddr_atmpvc *pvc,
+	       const struct sockaddr_atmsvc *svc, const struct atm_qos *qos,
+	       int reply)
 {
 	struct sk_buff *skb;
 	struct atmsvc_msg *msg;
 	static unsigned session = 0;
 
 	pr_debug("%d (0x%p)\n", (int)type, vcc);
-	while (!(skb = alloc_skb(sizeof(struct atmsvc_msg),GFP_KERNEL)))
+	while (!(skb = alloc_skb(sizeof(struct atmsvc_msg), GFP_KERNEL)))
 		schedule();
-	msg = (struct atmsvc_msg *) skb_put(skb,sizeof(struct atmsvc_msg));
-	memset(msg,0,sizeof(*msg));
+	msg = (struct atmsvc_msg *)skb_put(skb, sizeof(struct atmsvc_msg));
+	memset(msg, 0, sizeof(*msg));
 	msg->type = type;
 	*(struct atm_vcc **) &msg->vcc = vcc;
 	*(struct atm_vcc **) &msg->listen_vcc = listen_vcc;
 	msg->reply = reply;
-	if (qos) msg->qos = *qos;
-	if (vcc) msg->sap = vcc->sap;
-	if (svc) msg->svc = *svc;
-	if (vcc) msg->local = vcc->local;
-	if (pvc) msg->pvc = *pvc;
+	if (qos)
+		msg->qos = *qos;
+	if (vcc)
+		msg->sap = vcc->sap;
+	if (svc)
+		msg->svc = *svc;
+	if (vcc)
+		msg->local = vcc->local;
+	if (pvc)
+		msg->pvc = *pvc;
 	if (vcc) {
 		if (type == as_connect && test_bit(ATM_VF_SESSION, &vcc->flags))
 			msg->session = ++session;
 			/* every new pmp connect gets the next session number */
 	}
 	sigd_put_skb(skb);
-	if (vcc) set_bit(ATM_VF_REGIS,&vcc->flags);
+	if (vcc)
+		set_bit(ATM_VF_REGIS, &vcc->flags);
 }
 
-
-void sigd_enq(struct atm_vcc *vcc,enum atmsvc_msg_type type,
-    struct atm_vcc *listen_vcc,const struct sockaddr_atmpvc *pvc,
-    const struct sockaddr_atmsvc *svc)
+void sigd_enq(struct atm_vcc *vcc, enum atmsvc_msg_type type,
+	      struct atm_vcc *listen_vcc, const struct sockaddr_atmpvc *pvc,
+	      const struct sockaddr_atmsvc *svc)
 {
-	sigd_enq2(vcc,type,listen_vcc,pvc,svc,vcc ? &vcc->qos : NULL,0);
+	sigd_enq2(vcc, type, listen_vcc, pvc, svc, vcc ? &vcc->qos : NULL, 0);
 	/* other ISP applications may use "reply" */
 }
 
-
 static void purge_vcc(struct atm_vcc *vcc)
 {
 	if (sk_atm(vcc)->sk_family == PF_ATMSVC &&
@@ -211,7 +214,6 @@ static void purge_vcc(struct atm_vcc *vcc)
 	}
 }
 
-
 static void sigd_close(struct atm_vcc *vcc)
 {
 	struct hlist_node *node;
@@ -225,7 +227,7 @@ static void sigd_close(struct atm_vcc *vcc)
 	skb_queue_purge(&sk_atm(vcc)->sk_receive_queue);
 
 	read_lock(&vcc_sklist_lock);
-	for(i = 0; i < VCC_HTABLE_SIZE; ++i) {
+	for (i = 0; i < VCC_HTABLE_SIZE; ++i) {
 		struct hlist_head *head = &vcc_hash[i];
 
 		sk_for_each(s, node, head) {
@@ -237,13 +239,11 @@ static void sigd_close(struct atm_vcc *vcc)
 	read_unlock(&vcc_sklist_lock);
 }
 
-
 static struct atmdev_ops sigd_dev_ops = {
 	.close = sigd_close,
 	.send =	sigd_send
 };
 
-
 static struct atm_dev sigd_dev = {
 	.ops =		&sigd_dev_ops,
 	.type =		"sig",
@@ -251,16 +251,16 @@ static struct atm_dev sigd_dev = {
 	.lock =		__SPIN_LOCK_UNLOCKED(sigd_dev.lock)
 };
 
-
 int sigd_attach(struct atm_vcc *vcc)
 {
-	if (sigd) return -EADDRINUSE;
+	if (sigd)
+		return -EADDRINUSE;
 	pr_debug("\n");
 	sigd = vcc;
 	vcc->dev = &sigd_dev;
 	vcc_insert_socket(sk_atm(vcc));
-	set_bit(ATM_VF_META,&vcc->flags);
-	set_bit(ATM_VF_READY,&vcc->flags);
+	set_bit(ATM_VF_META, &vcc->flags);
+	set_bit(ATM_VF_READY, &vcc->flags);
 #ifdef WAIT_FOR_DEMON
 	wake_up(&sigd_sleep);
 #endif
-- 
cgit v1.2.2


From b7d9371befd6fa3eb0ad9d574860095642bf70ed Mon Sep 17 00:00:00 2001
From: Joe Perches <joe@perches.com>
Date: Tue, 26 Jan 2010 11:40:18 +0000
Subject: net/atm/svc.c: checkpatch cleanups

Convert #include <asm... to #include <linux...
Spacing cleanups
Mostly 80 column wrapped
Move trailing statements to new lines
switch/case cleanups

Signed-off-by: Joe Perches <joe@perches.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/atm/svc.c | 249 +++++++++++++++++++++++++++++-----------------------------
 1 file changed, 125 insertions(+), 124 deletions(-)

(limited to 'net')

diff --git a/net/atm/svc.c b/net/atm/svc.c
index 251ddbc42e4b..3ba9a45a51ac 100644
--- a/net/atm/svc.c
+++ b/net/atm/svc.c
@@ -19,14 +19,15 @@
 #include <linux/atmdev.h>
 #include <linux/bitops.h>
 #include <net/sock.h>		/* for sock_no_* */
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 
 #include "resources.h"
 #include "common.h"		/* common for PVCs and SVCs */
 #include "signaling.h"
 #include "addr.h"
 
-static int svc_create(struct net *net, struct socket *sock, int protocol, int kern);
+static int svc_create(struct net *net, struct socket *sock, int protocol,
+		      int kern);
 
 /*
  * Note: since all this is still nicely synchronized with the signaling demon,
@@ -35,25 +36,25 @@ static int svc_create(struct net *net, struct socket *sock, int protocol, int ke
  */
 
 
-static int svc_shutdown(struct socket *sock,int how)
+static int svc_shutdown(struct socket *sock, int how)
 {
 	return 0;
 }
 
-
 static void svc_disconnect(struct atm_vcc *vcc)
 {
 	DEFINE_WAIT(wait);
 	struct sk_buff *skb;
 	struct sock *sk = sk_atm(vcc);
 
-	pr_debug("%p\n",vcc);
-	if (test_bit(ATM_VF_REGIS,&vcc->flags)) {
+	pr_debug("%p\n", vcc);
+	if (test_bit(ATM_VF_REGIS, &vcc->flags)) {
 		prepare_to_wait(sk->sk_sleep, &wait, TASK_UNINTERRUPTIBLE);
-		sigd_enq(vcc,as_close,NULL,NULL,NULL);
-		while (!test_bit(ATM_VF_RELEASED,&vcc->flags) && sigd) {
+		sigd_enq(vcc, as_close, NULL, NULL, NULL);
+		while (!test_bit(ATM_VF_RELEASED, &vcc->flags) && sigd) {
 			schedule();
-			prepare_to_wait(sk->sk_sleep, &wait, TASK_UNINTERRUPTIBLE);
+			prepare_to_wait(sk->sk_sleep, &wait,
+					TASK_UNINTERRUPTIBLE);
 		}
 		finish_wait(sk->sk_sleep, &wait);
 	}
@@ -62,35 +63,35 @@ static void svc_disconnect(struct atm_vcc *vcc)
 	while ((skb = skb_dequeue(&sk->sk_receive_queue)) != NULL) {
 		atm_return(vcc, skb->truesize);
 		pr_debug("LISTEN REL\n");
-		sigd_enq2(NULL,as_reject,vcc,NULL,NULL,&vcc->qos,0);
+		sigd_enq2(NULL, as_reject, vcc, NULL, NULL, &vcc->qos, 0);
 		dev_kfree_skb(skb);
 	}
 	clear_bit(ATM_VF_REGIS, &vcc->flags);
 	/* ... may retry later */
 }
 
-
 static int svc_release(struct socket *sock)
 {
 	struct sock *sk = sock->sk;
 	struct atm_vcc *vcc;
 
-	if (sk)  {
+	if (sk) {
 		vcc = ATM_SD(sock);
 		pr_debug("%p\n", vcc);
 		clear_bit(ATM_VF_READY, &vcc->flags);
-		/* VCC pointer is used as a reference, so we must not free it
-		   (thereby subjecting it to re-use) before all pending connections
-		   are closed */
+		/*
+		 * VCC pointer is used as a reference,
+		 * so we must not free it (thereby subjecting it to re-use)
+		 * before all pending connections are closed
+		 */
 		svc_disconnect(vcc);
 		vcc_release(sock);
 	}
 	return 0;
 }
 
-
-static int svc_bind(struct socket *sock,struct sockaddr *sockaddr,
-    int sockaddr_len)
+static int svc_bind(struct socket *sock, struct sockaddr *sockaddr,
+		    int sockaddr_len)
 {
 	DEFINE_WAIT(wait);
 	struct sock *sk = sock->sk;
@@ -115,38 +116,37 @@ static int svc_bind(struct socket *sock,struct sockaddr *sockaddr,
 		error = -EAFNOSUPPORT;
 		goto out;
 	}
-	clear_bit(ATM_VF_BOUND,&vcc->flags);
+	clear_bit(ATM_VF_BOUND, &vcc->flags);
 	    /* failing rebind will kill old binding */
 	/* @@@ check memory (de)allocation on rebind */
-	if (!test_bit(ATM_VF_HASQOS,&vcc->flags)) {
+	if (!test_bit(ATM_VF_HASQOS, &vcc->flags)) {
 		error = -EBADFD;
 		goto out;
 	}
 	vcc->local = *addr;
 	set_bit(ATM_VF_WAITING, &vcc->flags);
 	prepare_to_wait(sk->sk_sleep, &wait, TASK_UNINTERRUPTIBLE);
-	sigd_enq(vcc,as_bind,NULL,NULL,&vcc->local);
+	sigd_enq(vcc, as_bind, NULL, NULL, &vcc->local);
 	while (test_bit(ATM_VF_WAITING, &vcc->flags) && sigd) {
 		schedule();
 		prepare_to_wait(sk->sk_sleep, &wait, TASK_UNINTERRUPTIBLE);
 	}
 	finish_wait(sk->sk_sleep, &wait);
-	clear_bit(ATM_VF_REGIS,&vcc->flags); /* doesn't count */
+	clear_bit(ATM_VF_REGIS, &vcc->flags); /* doesn't count */
 	if (!sigd) {
 		error = -EUNATCH;
 		goto out;
 	}
 	if (!sk->sk_err)
-		set_bit(ATM_VF_BOUND,&vcc->flags);
+		set_bit(ATM_VF_BOUND, &vcc->flags);
 	error = -sk->sk_err;
 out:
 	release_sock(sk);
 	return error;
 }
 
-
-static int svc_connect(struct socket *sock,struct sockaddr *sockaddr,
-    int sockaddr_len,int flags)
+static int svc_connect(struct socket *sock, struct sockaddr *sockaddr,
+		       int sockaddr_len, int flags)
 {
 	DEFINE_WAIT(wait);
 	struct sock *sk = sock->sk;
@@ -154,7 +154,7 @@ static int svc_connect(struct socket *sock,struct sockaddr *sockaddr,
 	struct atm_vcc *vcc = ATM_SD(sock);
 	int error;
 
-	pr_debug("%p\n",vcc);
+	pr_debug("%p\n", vcc);
 	lock_sock(sk);
 	if (sockaddr_len != sizeof(struct sockaddr_atmsvc)) {
 		error = -EINVAL;
@@ -202,7 +202,7 @@ static int svc_connect(struct socket *sock,struct sockaddr *sockaddr,
 		vcc->remote = *addr;
 		set_bit(ATM_VF_WAITING, &vcc->flags);
 		prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE);
-		sigd_enq(vcc,as_connect,NULL,NULL,&vcc->remote);
+		sigd_enq(vcc, as_connect, NULL, NULL, &vcc->remote);
 		if (flags & O_NONBLOCK) {
 			finish_wait(sk->sk_sleep, &wait);
 			sock->state = SS_CONNECTING;
@@ -213,7 +213,8 @@ static int svc_connect(struct socket *sock,struct sockaddr *sockaddr,
 		while (test_bit(ATM_VF_WAITING, &vcc->flags) && sigd) {
 			schedule();
 			if (!signal_pending(current)) {
-				prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE);
+				prepare_to_wait(sk->sk_sleep, &wait,
+						TASK_INTERRUPTIBLE);
 				continue;
 			}
 			pr_debug("*ABORT*\n");
@@ -229,20 +230,22 @@ static int svc_connect(struct socket *sock,struct sockaddr *sockaddr,
 			 *   Kernel <--okay---- Demon
 			 *   Kernel <--close--- Demon
 			 */
-			sigd_enq(vcc,as_close,NULL,NULL,NULL);
+			sigd_enq(vcc, as_close, NULL, NULL, NULL);
 			while (test_bit(ATM_VF_WAITING, &vcc->flags) && sigd) {
-				prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE);
+				prepare_to_wait(sk->sk_sleep, &wait,
+						TASK_INTERRUPTIBLE);
 				schedule();
 			}
 			if (!sk->sk_err)
-				while (!test_bit(ATM_VF_RELEASED,&vcc->flags)
-				    && sigd) {
-					prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE);
+				while (!test_bit(ATM_VF_RELEASED, &vcc->flags) &&
+				       sigd) {
+					prepare_to_wait(sk->sk_sleep, &wait,
+							TASK_INTERRUPTIBLE);
 					schedule();
 				}
-			clear_bit(ATM_VF_REGIS,&vcc->flags);
-			clear_bit(ATM_VF_RELEASED,&vcc->flags);
-			clear_bit(ATM_VF_CLOSE,&vcc->flags);
+			clear_bit(ATM_VF_REGIS, &vcc->flags);
+			clear_bit(ATM_VF_RELEASED, &vcc->flags);
+			clear_bit(ATM_VF_CLOSE, &vcc->flags);
 			    /* we're gone now but may connect later */
 			error = -EINTR;
 			break;
@@ -270,17 +273,17 @@ static int svc_connect(struct socket *sock,struct sockaddr *sockaddr,
 /*
  * #endif
  */
-	if (!(error = vcc_connect(sock, vcc->itf, vcc->vpi, vcc->vci)))
+	error = vcc_connect(sock, vcc->itf, vcc->vpi, vcc->vci);
+	if (!error)
 		sock->state = SS_CONNECTED;
 	else
-		(void) svc_disconnect(vcc);
+		(void)svc_disconnect(vcc);
 out:
 	release_sock(sk);
 	return error;
 }
 
-
-static int svc_listen(struct socket *sock,int backlog)
+static int svc_listen(struct socket *sock, int backlog)
 {
 	DEFINE_WAIT(wait);
 	struct sock *sk = sock->sk;
@@ -290,17 +293,17 @@ static int svc_listen(struct socket *sock,int backlog)
 	pr_debug("%p\n", vcc);
 	lock_sock(sk);
 	/* let server handle listen on unbound sockets */
-	if (test_bit(ATM_VF_SESSION,&vcc->flags)) {
+	if (test_bit(ATM_VF_SESSION, &vcc->flags)) {
 		error = -EINVAL;
 		goto out;
 	}
 	if (test_bit(ATM_VF_LISTEN, &vcc->flags)) {
 		error = -EADDRINUSE;
 		goto out;
-        }
+	}
 	set_bit(ATM_VF_WAITING, &vcc->flags);
 	prepare_to_wait(sk->sk_sleep, &wait, TASK_UNINTERRUPTIBLE);
-	sigd_enq(vcc,as_listen,NULL,NULL,&vcc->local);
+	sigd_enq(vcc, as_listen, NULL, NULL, &vcc->local);
 	while (test_bit(ATM_VF_WAITING, &vcc->flags) && sigd) {
 		schedule();
 		prepare_to_wait(sk->sk_sleep, &wait, TASK_UNINTERRUPTIBLE);
@@ -310,7 +313,7 @@ static int svc_listen(struct socket *sock,int backlog)
 		error = -EUNATCH;
 		goto out;
 	}
-	set_bit(ATM_VF_LISTEN,&vcc->flags);
+	set_bit(ATM_VF_LISTEN, &vcc->flags);
 	vcc_insert_socket(sk);
 	sk->sk_max_ack_backlog = backlog > 0 ? backlog : ATM_BACKLOG_DEFAULT;
 	error = -sk->sk_err;
@@ -319,8 +322,7 @@ out:
 	return error;
 }
 
-
-static int svc_accept(struct socket *sock,struct socket *newsock,int flags)
+static int svc_accept(struct socket *sock, struct socket *newsock, int flags)
 {
 	struct sock *sk = sock->sk;
 	struct sk_buff *skb;
@@ -344,8 +346,9 @@ static int svc_accept(struct socket *sock,struct socket *newsock,int flags)
 		prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE);
 		while (!(skb = skb_dequeue(&sk->sk_receive_queue)) &&
 		       sigd) {
-			if (test_bit(ATM_VF_RELEASED,&old_vcc->flags)) break;
-			if (test_bit(ATM_VF_CLOSE,&old_vcc->flags)) {
+			if (test_bit(ATM_VF_RELEASED, &old_vcc->flags))
+				break;
+			if (test_bit(ATM_VF_CLOSE, &old_vcc->flags)) {
 				error = -sk->sk_err;
 				break;
 			}
@@ -360,7 +363,8 @@ static int svc_accept(struct socket *sock,struct socket *newsock,int flags)
 				error = -ERESTARTSYS;
 				break;
 			}
-			prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE);
+			prepare_to_wait(sk->sk_sleep, &wait,
+					TASK_INTERRUPTIBLE);
 		}
 		finish_wait(sk->sk_sleep, &wait);
 		if (error)
@@ -369,31 +373,34 @@ static int svc_accept(struct socket *sock,struct socket *newsock,int flags)
 			error = -EUNATCH;
 			goto out;
 		}
-		msg = (struct atmsvc_msg *) skb->data;
+		msg = (struct atmsvc_msg *)skb->data;
 		new_vcc->qos = msg->qos;
-		set_bit(ATM_VF_HASQOS,&new_vcc->flags);
+		set_bit(ATM_VF_HASQOS, &new_vcc->flags);
 		new_vcc->remote = msg->svc;
 		new_vcc->local = msg->local;
 		new_vcc->sap = msg->sap;
 		error = vcc_connect(newsock, msg->pvc.sap_addr.itf,
-				    msg->pvc.sap_addr.vpi, msg->pvc.sap_addr.vci);
+				    msg->pvc.sap_addr.vpi,
+				    msg->pvc.sap_addr.vci);
 		dev_kfree_skb(skb);
 		sk->sk_ack_backlog--;
 		if (error) {
-			sigd_enq2(NULL,as_reject,old_vcc,NULL,NULL,
-			    &old_vcc->qos,error);
+			sigd_enq2(NULL, as_reject, old_vcc, NULL, NULL,
+				  &old_vcc->qos, error);
 			error = error == -EAGAIN ? -EBUSY : error;
 			goto out;
 		}
 		/* wait should be short, so we ignore the non-blocking flag */
 		set_bit(ATM_VF_WAITING, &new_vcc->flags);
-		prepare_to_wait(sk_atm(new_vcc)->sk_sleep, &wait, TASK_UNINTERRUPTIBLE);
-		sigd_enq(new_vcc,as_accept,old_vcc,NULL,NULL);
+		prepare_to_wait(sk_atm(new_vcc)->sk_sleep, &wait,
+				TASK_UNINTERRUPTIBLE);
+		sigd_enq(new_vcc, as_accept, old_vcc, NULL, NULL);
 		while (test_bit(ATM_VF_WAITING, &new_vcc->flags) && sigd) {
 			release_sock(sk);
 			schedule();
 			lock_sock(sk);
-			prepare_to_wait(sk_atm(new_vcc)->sk_sleep, &wait, TASK_UNINTERRUPTIBLE);
+			prepare_to_wait(sk_atm(new_vcc)->sk_sleep, &wait,
+					TASK_UNINTERRUPTIBLE);
 		}
 		finish_wait(sk_atm(new_vcc)->sk_sleep, &wait);
 		if (!sigd) {
@@ -413,39 +420,37 @@ out:
 	return error;
 }
 
-
-static int svc_getname(struct socket *sock,struct sockaddr *sockaddr,
-    int *sockaddr_len,int peer)
+static int svc_getname(struct socket *sock, struct sockaddr *sockaddr,
+		       int *sockaddr_len, int peer)
 {
 	struct sockaddr_atmsvc *addr;
 
 	*sockaddr_len = sizeof(struct sockaddr_atmsvc);
 	addr = (struct sockaddr_atmsvc *) sockaddr;
-	memcpy(addr,peer ? &ATM_SD(sock)->remote : &ATM_SD(sock)->local,
-	    sizeof(struct sockaddr_atmsvc));
+	memcpy(addr, peer ? &ATM_SD(sock)->remote : &ATM_SD(sock)->local,
+	       sizeof(struct sockaddr_atmsvc));
 	return 0;
 }
 
-
-int svc_change_qos(struct atm_vcc *vcc,struct atm_qos *qos)
+int svc_change_qos(struct atm_vcc *vcc, struct atm_qos *qos)
 {
 	struct sock *sk = sk_atm(vcc);
 	DEFINE_WAIT(wait);
 
 	set_bit(ATM_VF_WAITING, &vcc->flags);
 	prepare_to_wait(sk->sk_sleep, &wait, TASK_UNINTERRUPTIBLE);
-	sigd_enq2(vcc,as_modify,NULL,NULL,&vcc->local,qos,0);
+	sigd_enq2(vcc, as_modify, NULL, NULL, &vcc->local, qos, 0);
 	while (test_bit(ATM_VF_WAITING, &vcc->flags) &&
 	       !test_bit(ATM_VF_RELEASED, &vcc->flags) && sigd) {
 		schedule();
 		prepare_to_wait(sk->sk_sleep, &wait, TASK_UNINTERRUPTIBLE);
 	}
 	finish_wait(sk->sk_sleep, &wait);
-	if (!sigd) return -EUNATCH;
+	if (!sigd)
+		return -EUNATCH;
 	return -sk->sk_err;
 }
 
-
 static int svc_setsockopt(struct socket *sock, int level, int optname,
 			  char __user *optval, unsigned int optlen)
 {
@@ -455,37 +460,35 @@ static int svc_setsockopt(struct socket *sock, int level, int optname,
 
 	lock_sock(sk);
 	switch (optname) {
-		case SO_ATMSAP:
-			if (level != SOL_ATM || optlen != sizeof(struct atm_sap)) {
-				error = -EINVAL;
-				goto out;
-			}
-			if (copy_from_user(&vcc->sap, optval, optlen)) {
-				error = -EFAULT;
-				goto out;
-			}
-			set_bit(ATM_VF_HASSAP, &vcc->flags);
-			break;
-		case SO_MULTIPOINT:
-			if (level != SOL_ATM || optlen != sizeof(int)) {
-				error = -EINVAL;
-				goto out;
-			}
-			if (get_user(value, (int __user *) optval)) {
-				error = -EFAULT;
-				goto out;
-			}
-			if (value == 1) {
-				set_bit(ATM_VF_SESSION, &vcc->flags);
-			} else if (value == 0) {
-				clear_bit(ATM_VF_SESSION, &vcc->flags);
-			} else {
-				error = -EINVAL;
-			}
-			break;
-		default:
-			error = vcc_setsockopt(sock, level, optname,
-					       optval, optlen);
+	case SO_ATMSAP:
+		if (level != SOL_ATM || optlen != sizeof(struct atm_sap)) {
+			error = -EINVAL;
+			goto out;
+		}
+		if (copy_from_user(&vcc->sap, optval, optlen)) {
+			error = -EFAULT;
+			goto out;
+		}
+		set_bit(ATM_VF_HASSAP, &vcc->flags);
+		break;
+	case SO_MULTIPOINT:
+		if (level != SOL_ATM || optlen != sizeof(int)) {
+			error = -EINVAL;
+			goto out;
+		}
+		if (get_user(value, (int __user *)optval)) {
+			error = -EFAULT;
+			goto out;
+		}
+		if (value == 1)
+			set_bit(ATM_VF_SESSION, &vcc->flags);
+		else if (value == 0)
+			clear_bit(ATM_VF_SESSION, &vcc->flags);
+		else
+			error = -EINVAL;
+		break;
+	default:
+		error = vcc_setsockopt(sock, level, optname, optval, optlen);
 	}
 
 out:
@@ -493,9 +496,8 @@ out:
 	return error;
 }
 
-
-static int svc_getsockopt(struct socket *sock,int level,int optname,
-    char __user *optval,int __user *optlen)
+static int svc_getsockopt(struct socket *sock, int level, int optname,
+			  char __user *optval, int __user *optlen)
 {
 	struct sock *sk = sock->sk;
 	int error = 0, len;
@@ -522,7 +524,6 @@ out:
 	return error;
 }
 
-
 static int svc_addparty(struct socket *sock, struct sockaddr *sockaddr,
 			int sockaddr_len, int flags)
 {
@@ -553,7 +554,6 @@ out:
 	return error;
 }
 
-
 static int svc_dropparty(struct socket *sock, int ep_ref)
 {
 	DEFINE_WAIT(wait);
@@ -580,7 +580,6 @@ out:
 	return error;
 }
 
-
 static int svc_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
 {
 	int error, ep_ref;
@@ -588,29 +587,31 @@ static int svc_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
 	struct atm_vcc *vcc = ATM_SD(sock);
 
 	switch (cmd) {
-		case ATM_ADDPARTY:
-			if (!test_bit(ATM_VF_SESSION, &vcc->flags))
-				return -EINVAL;
-			if (copy_from_user(&sa, (void __user *) arg, sizeof(sa)))
-				return -EFAULT;
-			error = svc_addparty(sock, (struct sockaddr *) &sa, sizeof(sa), 0);
-			break;
-		case ATM_DROPPARTY:
-			if (!test_bit(ATM_VF_SESSION, &vcc->flags))
-				return -EINVAL;
-			if (copy_from_user(&ep_ref, (void __user *) arg, sizeof(int)))
-				return -EFAULT;
-			error = svc_dropparty(sock, ep_ref);
-			break;
-		default:
-			error = vcc_ioctl(sock, cmd, arg);
+	case ATM_ADDPARTY:
+		if (!test_bit(ATM_VF_SESSION, &vcc->flags))
+			return -EINVAL;
+		if (copy_from_user(&sa, (void __user *) arg, sizeof(sa)))
+			return -EFAULT;
+		error = svc_addparty(sock, (struct sockaddr *)&sa, sizeof(sa),
+				     0);
+		break;
+	case ATM_DROPPARTY:
+		if (!test_bit(ATM_VF_SESSION, &vcc->flags))
+			return -EINVAL;
+		if (copy_from_user(&ep_ref, (void __user *) arg, sizeof(int)))
+			return -EFAULT;
+		error = svc_dropparty(sock, ep_ref);
+		break;
+	default:
+		error = vcc_ioctl(sock, cmd, arg);
 	}
 
 	return error;
 }
 
 #ifdef CONFIG_COMPAT
-static int svc_compat_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
+static int svc_compat_ioctl(struct socket *sock, unsigned int cmd,
+			    unsigned long arg)
 {
 	/* The definition of ATM_ADDPARTY uses the size of struct atm_iobuf.
 	   But actually it takes a struct sockaddr_atmsvc, which doesn't need
@@ -661,13 +662,13 @@ static int svc_create(struct net *net, struct socket *sock, int protocol,
 
 	sock->ops = &svc_proto_ops;
 	error = vcc_create(net, sock, protocol, AF_ATMSVC);
-	if (error) return error;
+	if (error)
+		return error;
 	ATM_SD(sock)->local.sas_family = AF_ATMSVC;
 	ATM_SD(sock)->remote.sas_family = AF_ATMSVC;
 	return 0;
 }
 
-
 static const struct net_proto_family svc_family_ops = {
 	.family = PF_ATMSVC,
 	.create = svc_create,
-- 
cgit v1.2.2


From b4c84ec0fd9ec2297b796443051554d062007ba3 Mon Sep 17 00:00:00 2001
From: Joe Perches <joe@perches.com>
Date: Tue, 26 Jan 2010 11:40:19 +0000
Subject: net/atm/lec.c: Add __lec_arp_check_expire and other cleanups

Reduce indentation in lec_arp_check_expire
Indent a case label
Remove != NULL logical tests from while ((skb = foo())) assign and tests

Signed-off-by: Joe Perches <joe@perches.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/atm/lec.c | 153 ++++++++++++++++++++++++++++++----------------------------
 1 file changed, 80 insertions(+), 73 deletions(-)

(limited to 'net')

diff --git a/net/atm/lec.c b/net/atm/lec.c
index aefd278d6af6..5da5753157f9 100644
--- a/net/atm/lec.c
+++ b/net/atm/lec.c
@@ -285,7 +285,6 @@ static netdev_tx_t lec_start_xmit(struct sk_buff *skb,
 
 	/* Make sure we have room for lec_id */
 	if (skb_headroom(skb) < 2) {
-
 		pr_debug("reallocating skb\n");
 		skb2 = skb_realloc_headroom(skb, LEC_HEADER_LEN);
 		kfree_skb(skb);
@@ -508,34 +507,31 @@ static int lec_atm_send(struct atm_vcc *vcc, struct sk_buff *skb)
 		break;
 	case l_should_bridge:
 #if defined(CONFIG_BRIDGE) || defined(CONFIG_BRIDGE_MODULE)
-		{
-			pr_debug("%s: bridge zeppelin asks about %pM\n",
-				 dev->name, mesg->content.proxy.mac_addr);
+	{
+		pr_debug("%s: bridge zeppelin asks about %pM\n",
+			 dev->name, mesg->content.proxy.mac_addr);
 
-			if (br_fdb_test_addr_hook == NULL)
-				break;
+		if (br_fdb_test_addr_hook == NULL)
+			break;
 
-			if (br_fdb_test_addr_hook(dev,
-						  mesg->content.proxy.mac_addr)) {
-				/* hit from bridge table, send LE_ARP_RESPONSE */
-				struct sk_buff *skb2;
-				struct sock *sk;
-
-				pr_debug("%s: entry found, responding to zeppelin\n",
-					 dev->name);
-				skb2 = alloc_skb(sizeof(struct atmlec_msg),
-						 GFP_ATOMIC);
-				if (skb2 == NULL)
-					break;
-				skb2->len = sizeof(struct atmlec_msg);
-				skb_copy_to_linear_data(skb2, mesg,
-							sizeof(*mesg));
-				atm_force_charge(priv->lecd, skb2->truesize);
-				sk = sk_atm(priv->lecd);
-				skb_queue_tail(&sk->sk_receive_queue, skb2);
-				sk->sk_data_ready(sk, skb2->len);
-			}
+		if (br_fdb_test_addr_hook(dev, mesg->content.proxy.mac_addr)) {
+			/* hit from bridge table, send LE_ARP_RESPONSE */
+			struct sk_buff *skb2;
+			struct sock *sk;
+
+			pr_debug("%s: entry found, responding to zeppelin\n",
+				 dev->name);
+			skb2 = alloc_skb(sizeof(struct atmlec_msg), GFP_ATOMIC);
+			if (skb2 == NULL)
+				break;
+			skb2->len = sizeof(struct atmlec_msg);
+			skb_copy_to_linear_data(skb2, mesg, sizeof(*mesg));
+			atm_force_charge(priv->lecd, skb2->truesize);
+			sk = sk_atm(priv->lecd);
+			skb_queue_tail(&sk->sk_receive_queue, skb2);
+			sk->sk_data_ready(sk, skb2->len);
 		}
+	}
 #endif /* defined(CONFIG_BRIDGE) || defined(CONFIG_BRIDGE_MODULE) */
 		break;
 	default:
@@ -561,7 +557,7 @@ static void lec_atm_close(struct atm_vcc *vcc)
 
 	if (skb_peek(&sk_atm(vcc)->sk_receive_queue))
 		pr_info("%s closing with messages pending\n", dev->name);
-	while ((skb = skb_dequeue(&sk_atm(vcc)->sk_receive_queue)) != NULL) {
+	while ((skb = skb_dequeue(&sk_atm(vcc)->sk_receive_queue))) {
 		atm_return(vcc, skb->truesize);
 		dev_kfree_skb(skb);
 	}
@@ -1748,6 +1744,50 @@ static void lec_arp_expire_vcc(unsigned long data)
 	lec_arp_put(to_remove);
 }
 
+static bool __lec_arp_check_expire(struct lec_arp_table *entry,
+				   unsigned long now,
+				   struct lec_priv *priv)
+{
+	unsigned long time_to_check;
+
+	if ((entry->flags) & LEC_REMOTE_FLAG && priv->topology_change)
+		time_to_check = priv->forward_delay_time;
+	else
+		time_to_check = priv->aging_time;
+
+	pr_debug("About to expire: %lx - %lx > %lx\n",
+		 now, entry->last_used, time_to_check);
+	if (time_after(now, entry->last_used + time_to_check) &&
+	    !(entry->flags & LEC_PERMANENT_FLAG) &&
+	    !(entry->mac_addr[0] & 0x01)) {	/* LANE2: 7.1.20 */
+		/* Remove entry */
+		pr_debug("Entry timed out\n");
+		lec_arp_remove(priv, entry);
+		lec_arp_put(entry);
+	} else {
+		/* Something else */
+		if ((entry->status == ESI_VC_PENDING ||
+		     entry->status == ESI_ARP_PENDING) &&
+		    time_after_eq(now, entry->timestamp +
+				       priv->max_unknown_frame_time)) {
+			entry->timestamp = jiffies;
+			entry->packets_flooded = 0;
+			if (entry->status == ESI_VC_PENDING)
+				send_to_lecd(priv, l_svc_setup,
+					     entry->mac_addr,
+					     entry->atm_addr,
+					     NULL);
+		}
+		if (entry->status == ESI_FLUSH_PENDING &&
+		    time_after_eq(now, entry->timestamp +
+				       priv->path_switching_delay)) {
+			lec_arp_hold(entry);
+			return true;
+		}
+	}
+
+	return false;
+}
 /*
  * Expire entries.
  * 1. Re-set timer
@@ -1772,7 +1812,6 @@ static void lec_arp_check_expire(struct work_struct *work)
 	struct hlist_node *node, *next;
 	struct lec_arp_table *entry;
 	unsigned long now;
-	unsigned long time_to_check;
 	int i;
 
 	pr_debug("%p\n", priv);
@@ -1782,51 +1821,19 @@ restart:
 	for (i = 0; i < LEC_ARP_TABLE_SIZE; i++) {
 		hlist_for_each_entry_safe(entry, node, next,
 					  &priv->lec_arp_tables[i], next) {
-			if ((entry->flags) & LEC_REMOTE_FLAG &&
-			    priv->topology_change)
-				time_to_check = priv->forward_delay_time;
-			else
-				time_to_check = priv->aging_time;
-
-			pr_debug("About to expire: %lx - %lx > %lx\n",
-				 now, entry->last_used, time_to_check);
-			if (time_after(now, entry->last_used + time_to_check) &&
-			    !(entry->flags & LEC_PERMANENT_FLAG) &&
-			    !(entry->mac_addr[0] & 0x01)) {	/* LANE2: 7.1.20 */
-				/* Remove entry */
-				pr_debug("Entry timed out\n");
-				lec_arp_remove(priv, entry);
+			if (__lec_arp_check_expire(entry, now, priv)) {
+				struct sk_buff *skb;
+				struct atm_vcc *vcc = entry->vcc;
+
+				spin_unlock_irqrestore(&priv->lec_arp_lock,
+						       flags);
+				while ((skb = skb_dequeue(&entry->tx_wait)))
+					lec_send(vcc, skb);
+				entry->last_used = jiffies;
+				entry->status = ESI_FORWARD_DIRECT;
 				lec_arp_put(entry);
-			} else {
-				/* Something else */
-				if ((entry->status == ESI_VC_PENDING ||
-				     entry->status == ESI_ARP_PENDING) &&
-				    time_after_eq(now,
-						  entry->timestamp +
-						  priv->max_unknown_frame_time)) {
-					entry->timestamp = jiffies;
-					entry->packets_flooded = 0;
-					if (entry->status == ESI_VC_PENDING)
-						send_to_lecd(priv, l_svc_setup,
-							     entry->mac_addr,
-							     entry->atm_addr,
-							     NULL);
-				}
-				if (entry->status == ESI_FLUSH_PENDING &&
-				    time_after_eq(now, entry->timestamp +
-						  priv->path_switching_delay)) {
-					struct sk_buff *skb;
-					struct atm_vcc *vcc = entry->vcc;
-
-					lec_arp_hold(entry);
-					spin_unlock_irqrestore(&priv->lec_arp_lock, flags);
-					while ((skb = skb_dequeue(&entry->tx_wait)) != NULL)
-						lec_send(vcc, skb);
-					entry->last_used = jiffies;
-					entry->status = ESI_FORWARD_DIRECT;
-					lec_arp_put(entry);
-					goto restart;
-				}
+
+				goto restart;
 			}
 		}
 	}
@@ -2237,7 +2244,7 @@ restart:
 				lec_arp_hold(entry);
 				spin_unlock_irqrestore(&priv->lec_arp_lock,
 						       flags);
-				while ((skb = skb_dequeue(&entry->tx_wait)) != NULL)
+				while ((skb = skb_dequeue(&entry->tx_wait)))
 					lec_send(vcc, skb);
 				entry->last_used = jiffies;
 				entry->status = ESI_FORWARD_DIRECT;
-- 
cgit v1.2.2


From b50c2ea72a8ed6bc2a954019b6feb6ca41fce07e Mon Sep 17 00:00:00 2001
From: Joe Perches <joe@perches.com>
Date: Tue, 26 Jan 2010 11:40:20 +0000
Subject: net/atm: Cleanup dprint/ddprintk #defines and uses

Move "mpoa:%s: ", __func__/__FILE__ to #defines
Remove mpoa __func__/__FILE__ from dprintk uses
Add and use #define dprint_cont where appropriate

Signed-off-by: Joe Perches <joe@perches.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/atm/mpc.c         | 153 +++++++++++++++++++++++++-------------------------
 net/atm/mpoa_caches.c |  50 +++++++++--------
 net/atm/mpoa_proc.c   |  28 ++++++---
 3 files changed, 124 insertions(+), 107 deletions(-)

(limited to 'net')

diff --git a/net/atm/mpc.c b/net/atm/mpc.c
index 55dba22e44b3..a6521c8aa88b 100644
--- a/net/atm/mpc.c
+++ b/net/atm/mpc.c
@@ -38,21 +38,31 @@
  */
 
 #if 0
-#define dprintk(format, args...) printk(KERN_DEBUG format, ##args)   /* debug */
+#define dprintk(format, args...) \
+	printk(KERN_DEBUG "mpoa:%s: " format, __func__, ##args)
+#define dprintk_cont(format, args...) printk(KERN_CONT format, ##args)
 #else
-#define dprintk(format, args...)			\
-	do { if (0) printk(KERN_DEBUG format, ##args); } while (0)
+#define dprintk(format, args...)					\
+	do { if (0)							\
+		printk(KERN_DEBUG "mpoa:%s: " format, __func__, ##args);\
+	} while (0)
+#define dprintk_cont(format, args...)			\
+	do { if (0) printk(KERN_CONT format, ##args); } while (0)
 #endif
 
 #if 0
-#define ddprintk printk(KERN_DEBUG format, ##args)  /* more debug */
+#define ddprintk(format, args...) \
+	printk(KERN_DEBUG "mpoa:%s: " format, __func__, ##args)
+#define ddprintk_cont(format, args...) printk(KERN_CONT format, ##args)
 #else
-#define ddprintk(format, args...)			\
-	do { if (0) printk(KERN_DEBUG format, ##args); } while (0)
+#define ddprintk(format, args...)					\
+	do { if (0)							\
+		printk(KERN_DEBUG "mpoa:%s: " format, __func__, ##args);\
+	} while (0)
+#define ddprintk_cont(format, args...)			\
+	do { if (0) printk(KERN_CONT format, ##args); } while (0)
 #endif
 
-
-
 #define MPOA_TAG_LEN 4
 
 /* mpc_daemon -> kernel */
@@ -302,7 +312,7 @@ static struct mpoa_client *alloc_mpc(void)
 static void start_mpc(struct mpoa_client *mpc, struct net_device *dev)
 {
 
-	dprintk("mpoa: (%s) start_mpc:\n", mpc->dev->name);
+	dprintk("(%s)\n", mpc->dev->name);
 	if (!dev->netdev_ops)
 		pr_info("(%s) not starting\n", dev->name);
 	else {
@@ -316,14 +326,14 @@ static void start_mpc(struct mpoa_client *mpc, struct net_device *dev)
 static void stop_mpc(struct mpoa_client *mpc)
 {
 	struct net_device *dev = mpc->dev;
-	dprintk("mpoa: (%s) stop_mpc:", mpc->dev->name);
+	dprintk("(%s)", mpc->dev->name);
 
 	/* Lets not nullify lec device's dev->hard_start_xmit */
 	if (dev->netdev_ops != &mpc->new_ops) {
-		dprintk(" mpc already stopped, not fatal\n");
+		dprintk_cont(" mpc already stopped, not fatal\n");
 		return;
 	}
-	dprintk("\n");
+	dprintk_cont("\n");
 
 	dev->netdev_ops = mpc->old_ops;
 	mpc->old_ops = NULL;
@@ -371,7 +381,7 @@ static void lane2_assoc_ind(struct net_device *dev, const u8 *mac_addr,
 	struct mpoa_client *mpc;
 
 	mpoa_device_type = number_of_mps_macs = 0; /* silence gcc */
-	dprintk("mpoa: (%s) lane2_assoc_ind: received TLV(s), ", dev->name);
+	dprintk("(%s) received TLV(s), ", dev->name);
 	dprintk("total length of all TLVs %d\n", sizeoftlvs);
 	mpc = find_mpc_by_lec(dev); /* Sampo-Fix: moved here from below */
 	if (mpc == NULL) {
@@ -402,7 +412,7 @@ static void lane2_assoc_ind(struct net_device *dev, const u8 *mac_addr,
 		}
 		mpoa_device_type = *tlvs++;
 		number_of_mps_macs = *tlvs++;
-		dprintk("mpoa: (%s) MPOA device type '%s', ",
+		dprintk("(%s) MPOA device type '%s', ",
 			dev->name, mpoa_device_type_string(mpoa_device_type));
 		if (mpoa_device_type == MPS_AND_MPC &&
 		    length < (42 + number_of_mps_macs*ETH_ALEN)) { /* :) */
@@ -417,7 +427,7 @@ static void lane2_assoc_ind(struct net_device *dev, const u8 *mac_addr,
 		}
 		if (mpoa_device_type != MPS &&
 		    mpoa_device_type != MPS_AND_MPC) {
-			dprintk("ignoring non-MPS device\n");
+			dprintk("ignoring non-MPS device ");
 			if (mpoa_device_type == MPC)
 				tlvs += 20;
 			continue;  /* we are only interested in MPSs */
@@ -427,7 +437,8 @@ static void lane2_assoc_ind(struct net_device *dev, const u8 *mac_addr,
 			pr_info("(%s) MPS_AND_MPC has zero MACs\n", dev->name);
 			continue;  /* someone should read the spec */
 		}
-		dprintk("this MPS has %d MAC addresses\n", number_of_mps_macs);
+		dprintk_cont("this MPS has %d MAC addresses\n",
+			     number_of_mps_macs);
 
 		/*
 		 * ok, now we can go and tell our daemon
@@ -498,7 +509,7 @@ static int send_via_shortcut(struct sk_buff *skb, struct mpoa_client *mpc)
 	iph = (struct iphdr *)buff;
 	ipaddr = iph->daddr;
 
-	ddprintk("mpoa: (%s) send_via_shortcut: ipaddr 0x%x\n",
+	ddprintk("(%s) ipaddr 0x%x\n",
 		 mpc->dev->name, ipaddr);
 
 	entry = mpc->in_ops->get(ipaddr, mpc);
@@ -510,17 +521,17 @@ static int send_via_shortcut(struct sk_buff *skb, struct mpoa_client *mpc)
 	}
 	/* threshold not exceeded or VCC not ready */
 	if (mpc->in_ops->cache_hit(entry, mpc) != OPEN) {
-		ddprintk("mpoa: (%s) send_via_shortcut: cache_hit: returns != OPEN\n",
+		ddprintk("(%s) cache_hit: returns != OPEN\n",
 			 mpc->dev->name);
 		mpc->in_ops->put(entry);
 		return 1;
 	}
 
-	ddprintk("mpoa: (%s) send_via_shortcut: using shortcut\n",
+	ddprintk("(%s) using shortcut\n",
 		 mpc->dev->name);
 	/* MPOA spec A.1.4, MPOA client must decrement IP ttl at least by one */
 	if (iph->ttl <= 1) {
-		ddprintk("mpoa: (%s) send_via_shortcut: IP ttl = %u, using LANE\n",
+		ddprintk("(%s) IP ttl = %u, using LANE\n",
 			 mpc->dev->name, iph->ttl);
 		mpc->in_ops->put(entry);
 		return 1;
@@ -530,7 +541,7 @@ static int send_via_shortcut(struct sk_buff *skb, struct mpoa_client *mpc)
 	iph->check = ip_fast_csum((unsigned char *)iph, iph->ihl);
 
 	if (entry->ctrl_info.tag != 0) {
-		ddprintk("mpoa: (%s) send_via_shortcut: adding tag 0x%x\n",
+		ddprintk("(%s) adding tag 0x%x\n",
 			 mpc->dev->name, entry->ctrl_info.tag);
 		tagged_llc_snap_hdr.tag = entry->ctrl_info.tag;
 		skb_pull(skb, ETH_HLEN);	/* get rid of Eth header */
@@ -655,25 +666,23 @@ static void mpc_vcc_close(struct atm_vcc *vcc, struct net_device *dev)
 		return;
 	}
 
-	dprintk("mpoa: (%s) mpc_vcc_close:\n", dev->name);
+	dprintk("(%s)\n", dev->name);
 	in_entry = mpc->in_ops->get_by_vcc(vcc, mpc);
 	if (in_entry) {
-		dprintk("mpoa: (%s) mpc_vcc_close: ingress SVC closed ip = %pI4\n",
+		dprintk("(%s) ingress SVC closed ip = %pI4\n",
 			mpc->dev->name, &in_entry->ctrl_info.in_dst_ip);
 		in_entry->shortcut = NULL;
 		mpc->in_ops->put(in_entry);
 	}
 	eg_entry = mpc->eg_ops->get_by_vcc(vcc, mpc);
 	if (eg_entry) {
-		dprintk("mpoa: (%s) mpc_vcc_close: egress SVC closed\n",
-			mpc->dev->name);
+		dprintk("(%s) egress SVC closed\n", mpc->dev->name);
 		eg_entry->shortcut = NULL;
 		mpc->eg_ops->put(eg_entry);
 	}
 
 	if (in_entry == NULL && eg_entry == NULL)
-		dprintk("mpoa: (%s) mpc_vcc_close:  unused vcc closed\n",
-			dev->name);
+		dprintk("(%s) unused vcc closed\n", dev->name);
 
 	return;
 }
@@ -687,10 +696,9 @@ static void mpc_push(struct atm_vcc *vcc, struct sk_buff *skb)
 	__be32 tag;
 	char *tmp;
 
-	ddprintk("mpoa: (%s) mpc_push:\n", dev->name);
+	ddprintk("(%s)\n", dev->name);
 	if (skb == NULL) {
-		dprintk("mpoa: (%s) mpc_push: null skb, closing VCC\n",
-			dev->name);
+		dprintk("(%s) null skb, closing VCC\n", dev->name);
 		mpc_vcc_close(vcc, dev);
 		return;
 	}
@@ -700,8 +708,7 @@ static void mpc_push(struct atm_vcc *vcc, struct sk_buff *skb)
 		   sizeof(struct llc_snap_hdr)) == 0) {
 		struct sock *sk = sk_atm(vcc);
 
-		dprintk("mpoa: (%s) mpc_push: control packet arrived\n",
-			dev->name);
+		dprintk("(%s) control packet arrived\n", dev->name);
 		/* Pass control packets to daemon */
 		skb_queue_tail(&sk->sk_receive_queue, skb);
 		sk->sk_data_ready(sk, skb->len);
@@ -719,8 +726,7 @@ static void mpc_push(struct atm_vcc *vcc, struct sk_buff *skb)
 
 	if (memcmp(skb->data, &llc_snap_mpoa_data_tagged,
 		   sizeof(struct llc_snap_hdr)) == 0) { /* MPOA tagged data */
-		ddprintk("mpoa: (%s) mpc_push: tagged data packet arrived\n",
-			 dev->name);
+		ddprintk("(%s) tagged data packet arrived\n", dev->name);
 
 	} else if (memcmp(skb->data, &llc_snap_mpoa_data,
 			  sizeof(struct llc_snap_hdr)) == 0) { /* MPOA data */
@@ -813,8 +819,7 @@ static int atm_mpoa_mpoad_attach(struct atm_vcc *vcc, int arg)
 
 	mpc = find_mpc_by_itfnum(arg);
 	if (mpc == NULL) {
-		dprintk("mpoa: mpoad_attach: allocating new mpc for itf %d\n",
-			arg);
+		dprintk("allocating new mpc for itf %d\n", arg);
 		mpc = alloc_mpc();
 		if (mpc == NULL)
 			return -ENOMEM;
@@ -923,55 +928,54 @@ static int msg_from_mpoad(struct atm_vcc *vcc, struct sk_buff *skb)
 		pr_info("no mpc found\n");
 		return 0;
 	}
-	dprintk("mpoa: (%s) msg_from_mpoad:",
-		(mpc->dev) ? mpc->dev->name : "<unknown>");
+	dprintk("(%s)", mpc->dev ? mpc->dev->name : "<unknown>");
 	switch (mesg->type) {
 	case MPOA_RES_REPLY_RCVD:
-		dprintk(" mpoa_res_reply_rcvd\n");
+		dprintk_cont("mpoa_res_reply_rcvd\n");
 		MPOA_res_reply_rcvd(mesg, mpc);
 		break;
 	case MPOA_TRIGGER_RCVD:
-		dprintk(" mpoa_trigger_rcvd\n");
+		dprintk_cont("mpoa_trigger_rcvd\n");
 		MPOA_trigger_rcvd(mesg, mpc);
 		break;
 	case INGRESS_PURGE_RCVD:
-		dprintk(" nhrp_purge_rcvd\n");
+		dprintk_cont("nhrp_purge_rcvd\n");
 		ingress_purge_rcvd(mesg, mpc);
 		break;
 	case EGRESS_PURGE_RCVD:
-		dprintk(" egress_purge_reply_rcvd\n");
+		dprintk_cont("egress_purge_reply_rcvd\n");
 		egress_purge_rcvd(mesg, mpc);
 		break;
 	case MPS_DEATH:
-		dprintk(" mps_death\n");
+		dprintk_cont("mps_death\n");
 		mps_death(mesg, mpc);
 		break;
 	case CACHE_IMPOS_RCVD:
-		dprintk(" cache_impos_rcvd\n");
+		dprintk_cont("cache_impos_rcvd\n");
 		MPOA_cache_impos_rcvd(mesg, mpc);
 		break;
 	case SET_MPC_CTRL_ADDR:
-		dprintk(" set_mpc_ctrl_addr\n");
+		dprintk_cont("set_mpc_ctrl_addr\n");
 		set_mpc_ctrl_addr_rcvd(mesg, mpc);
 		break;
 	case SET_MPS_MAC_ADDR:
-		dprintk(" set_mps_mac_addr\n");
+		dprintk_cont("set_mps_mac_addr\n");
 		set_mps_mac_addr_rcvd(mesg, mpc);
 		break;
 	case CLEAN_UP_AND_EXIT:
-		dprintk(" clean_up_and_exit\n");
+		dprintk_cont("clean_up_and_exit\n");
 		clean_up(mesg, mpc, DIE);
 		break;
 	case RELOAD:
-		dprintk(" reload\n");
+		dprintk_cont("reload\n");
 		clean_up(mesg, mpc, RELOAD);
 		break;
 	case SET_MPC_PARAMS:
-		dprintk(" set_mpc_params\n");
+		dprintk_cont("set_mpc_params\n");
 		mpc->parameters = mesg->content.params;
 		break;
 	default:
-		dprintk(" unknown message %d\n", mesg->type);
+		dprintk_cont("unknown message %d\n", mesg->type);
 		break;
 	}
 	kfree_skb(skb);
@@ -1027,8 +1031,7 @@ static int mpoa_event_listener(struct notifier_block *mpoa_notifier,
 		priv->lane2_ops->associate_indicator = lane2_assoc_ind;
 		mpc = find_mpc_by_itfnum(priv->itfnum);
 		if (mpc == NULL) {
-			dprintk("mpoa: mpoa_event_listener: allocating new mpc for %s\n",
-				dev->name);
+			dprintk("allocating new mpc for %s\n", dev->name);
 			mpc = alloc_mpc();
 			if (mpc == NULL) {
 				pr_info("no new mpc");
@@ -1038,14 +1041,14 @@ static int mpoa_event_listener(struct notifier_block *mpoa_notifier,
 		mpc->dev_num = priv->itfnum;
 		mpc->dev = dev;
 		dev_hold(dev);
-		dprintk("mpoa: (%s) was initialized\n", dev->name);
+		dprintk("(%s) was initialized\n", dev->name);
 		break;
 	case NETDEV_UNREGISTER:
 		/* the lec device was deallocated */
 		mpc = find_mpc_by_lec(dev);
 		if (mpc == NULL)
 			break;
-		dprintk("mpoa: device (%s) was deallocated\n", dev->name);
+		dprintk("device (%s) was deallocated\n", dev->name);
 		stop_mpc(mpc);
 		dev_put(mpc->dev);
 		mpc->dev = NULL;
@@ -1143,7 +1146,7 @@ static void check_qos_and_open_shortcut(struct k_message *msg,
 				entry->shortcut = eg_entry->shortcut;
 		}
 		if (entry->shortcut) {
-			dprintk("mpoa: (%s) using egress SVC to reach %pI4\n",
+			dprintk("(%s) using egress SVC to reach %pI4\n",
 				client->dev->name, &dst_ip);
 			client->eg_ops->put(eg_entry);
 			return;
@@ -1170,16 +1173,16 @@ static void MPOA_res_reply_rcvd(struct k_message *msg, struct mpoa_client *mpc)
 	__be32 dst_ip = msg->content.in_info.in_dst_ip;
 	in_cache_entry *entry = mpc->in_ops->get(dst_ip, mpc);
 
-	dprintk("mpoa: (%s) MPOA_res_reply_rcvd: ip %pI4\n",
+	dprintk("(%s) ip %pI4\n",
 		mpc->dev->name, &dst_ip);
-	ddprintk("mpoa: (%s) MPOA_res_reply_rcvd() entry = %p",
+	ddprintk("(%s) entry = %p",
 		 mpc->dev->name, entry);
 	if (entry == NULL) {
 		pr_info("(%s) ARGH, received res. reply for an entry that doesn't exist.\n",
 			mpc->dev->name);
 		return;
 	}
-	ddprintk(" entry_state = %d ", entry->entry_state);
+	ddprintk_cont(" entry_state = %d ", entry->entry_state);
 
 	if (entry->entry_state == INGRESS_RESOLVED) {
 		pr_info("(%s) RESOLVED entry!\n", mpc->dev->name);
@@ -1191,7 +1194,7 @@ static void MPOA_res_reply_rcvd(struct k_message *msg, struct mpoa_client *mpc)
 	do_gettimeofday(&(entry->tv));
 	do_gettimeofday(&(entry->reply_wait)); /* Used in refreshing func from now on */
 	entry->refresh_time = 0;
-	ddprintk("entry->shortcut = %p\n", entry->shortcut);
+	ddprintk_cont("entry->shortcut = %p\n", entry->shortcut);
 
 	if (entry->entry_state == INGRESS_RESOLVING &&
 	    entry->shortcut != NULL) {
@@ -1228,7 +1231,7 @@ static void ingress_purge_rcvd(struct k_message *msg, struct mpoa_client *mpc)
 	}
 
 	do {
-		dprintk("mpoa: (%s) ingress_purge_rcvd: removing an ingress entry, ip = %pI4\n",
+		dprintk("(%s) removing an ingress entry, ip = %pI4\n",
 			mpc->dev->name, &dst_ip);
 		write_lock_bh(&mpc->ingress_lock);
 		mpc->in_ops->remove_entry(entry, mpc);
@@ -1246,7 +1249,7 @@ static void egress_purge_rcvd(struct k_message *msg, struct mpoa_client *mpc)
 	eg_cache_entry *entry = mpc->eg_ops->get_by_cache_id(cache_id, mpc);
 
 	if (entry == NULL) {
-		dprintk("mpoa: (%s) egress_purge_rcvd: purge for a non-existing entry\n",
+		dprintk("(%s) purge for a non-existing entry\n",
 			mpc->dev->name);
 		return;
 	}
@@ -1266,7 +1269,7 @@ static void purge_egress_shortcut(struct atm_vcc *vcc, eg_cache_entry *entry)
 	struct k_message *purge_msg;
 	struct sk_buff *skb;
 
-	dprintk("mpoa: purge_egress_shortcut: entering\n");
+	dprintk("entering\n");
 	if (vcc == NULL) {
 		pr_info("vcc == NULL\n");
 		return;
@@ -1290,7 +1293,7 @@ static void purge_egress_shortcut(struct atm_vcc *vcc, eg_cache_entry *entry)
 	sk = sk_atm(vcc);
 	skb_queue_tail(&sk->sk_receive_queue, skb);
 	sk->sk_data_ready(sk, skb->len);
-	dprintk("mpoa: purge_egress_shortcut: exiting:\n");
+	dprintk("exiting\n");
 
 	return;
 }
@@ -1303,7 +1306,7 @@ static void mps_death(struct k_message *msg, struct mpoa_client *mpc)
 {
 	eg_cache_entry *entry;
 
-	dprintk("mpoa: (%s) mps_death:\n", mpc->dev->name);
+	dprintk("(%s)\n", mpc->dev->name);
 
 	if (memcmp(msg->MPS_ctrl, mpc->mps_ctrl_addr, ATM_ESA_LEN)) {
 		pr_info("(%s) wrong MPS\n", mpc->dev->name);
@@ -1332,7 +1335,7 @@ static void MPOA_cache_impos_rcvd(struct k_message *msg,
 	eg_cache_entry *entry = mpc->eg_ops->get_by_cache_id(msg->content.eg_info.cache_id, mpc);
 
 	holding_time = msg->content.eg_info.holding_time;
-	dprintk("mpoa: (%s) MPOA_cache_impos_rcvd: entry = %p, holding_time = %u\n",
+	dprintk("(%s) entry = %p, holding_time = %u\n",
 		mpc->dev->name, entry, holding_time);
 	if (entry == NULL && holding_time) {
 		entry = mpc->eg_ops->add_entry(msg, mpc);
@@ -1369,11 +1372,11 @@ static void set_mpc_ctrl_addr_rcvd(struct k_message *mesg,
 	memcpy(&tlv[7], mesg->MPS_ctrl, ATM_ESA_LEN); /* MPC ctrl ATM addr */
 	memcpy(mpc->our_ctrl_addr, mesg->MPS_ctrl, ATM_ESA_LEN);
 
-	dprintk("mpoa: (%s) setting MPC ctrl ATM address to ",
-		(mpc->dev) ? mpc->dev->name : "<unknown>");
+	dprintk("(%s) setting MPC ctrl ATM address to",
+		mpc->dev ? mpc->dev->name : "<unknown>");
 	for (i = 7; i < sizeof(tlv); i++)
-		dprintk("%02x ", tlv[i]);
-	dprintk("\n");
+		dprintk_cont(" %02x", tlv[i]);
+	dprintk_cont("\n");
 
 	if (mpc->dev) {
 		priv = netdev_priv(mpc->dev);
@@ -1424,7 +1427,7 @@ static void clean_up(struct k_message *msg, struct mpoa_client *mpc, int action)
 	entry = mpc->eg_cache;
 	while (entry != NULL) {
 		msg->content.eg_info = entry->ctrl_info;
-		dprintk("mpoa: cache_id %u\n", entry->ctrl_info.cache_id);
+		dprintk("cache_id %u\n", entry->ctrl_info.cache_id);
 		msg_to_mpoad(msg, mpc);
 		entry = entry->next;
 	}
@@ -1537,15 +1540,15 @@ static void __exit atm_mpoa_cleanup(void)
 			if (priv->lane2_ops != NULL)
 				priv->lane2_ops->associate_indicator = NULL;
 		}
-		ddprintk("mpoa: cleanup_module: about to clear caches\n");
+		ddprintk("about to clear caches\n");
 		mpc->in_ops->destroy_cache(mpc);
 		mpc->eg_ops->destroy_cache(mpc);
-		ddprintk("mpoa: cleanup_module: caches cleared\n");
+		ddprintk("caches cleared\n");
 		kfree(mpc->mps_macs);
 		memset(mpc, 0, sizeof(struct mpoa_client));
-		ddprintk("mpoa: cleanup_module: about to kfree %p\n", mpc);
+		ddprintk("about to kfree %p\n", mpc);
 		kfree(mpc);
-		ddprintk("mpoa: cleanup_module: next mpc is at %p\n", tmp);
+		ddprintk("next mpc is at %p\n", tmp);
 		mpc = tmp;
 	}
 
@@ -1553,7 +1556,7 @@ static void __exit atm_mpoa_cleanup(void)
 	qos_head = NULL;
 	while (qos != NULL) {
 		nextqos = qos->next;
-		dprintk("mpoa: cleanup_module: freeing qos entry %p\n", qos);
+		dprintk("freeing qos entry %p\n", qos);
 		kfree(qos);
 		qos = nextqos;
 	}
diff --git a/net/atm/mpoa_caches.c b/net/atm/mpoa_caches.c
index de21cc66feb4..4c141810eb6d 100644
--- a/net/atm/mpoa_caches.c
+++ b/net/atm/mpoa_caches.c
@@ -11,17 +11,23 @@
  */
 
 #if 0
-#define dprintk(format, args...) printk(KERN_DEBUG format, ##args)  /* debug */
+#define dprintk(format, args...)					\
+	printk(KERN_DEBUG "mpoa:%s: " format, __FILE__, ##args)  /* debug */
 #else
-#define dprintk(format, args...)			\
-	do { if (0) printk(KERN_DEBUG format, ##args); } while (0)
+#define dprintk(format, args...)					\
+	do { if (0)							\
+		printk(KERN_DEBUG "mpoa:%s: " format, __FILE__, ##args);\
+	} while (0)
 #endif
 
 #if 0
-#define ddprintk printk(KERN_DEBUG format, ##args)  /* more debug */
+#define ddprintk(format, args...)					\
+	printk(KERN_DEBUG "mpoa:%s: " format, __FILE__, ##args)  /* debug */
 #else
-#define ddprintk(format, args...)			\
-	do { if (0) printk(KERN_DEBUG format, ##args); } while (0)
+#define ddprintk(format, args...)					\
+	do { if (0)							\
+		printk(KERN_DEBUG "mpoa:%s: " format, __FILE__, ##args);\
+	} while (0)
 #endif
 
 static in_cache_entry *in_cache_get(__be32 dst_ip,
@@ -96,11 +102,10 @@ static in_cache_entry *in_cache_add_entry(__be32 dst_ip,
 		return NULL;
 	}
 
-	dprintk("mpoa: mpoa_caches.c: adding an ingress entry, ip = %pI4\n",
-		&dst_ip);
+	dprintk("adding an ingress entry, ip = %pI4\n", &dst_ip);
 
 	atomic_set(&entry->use, 1);
-	dprintk("mpoa: mpoa_caches.c: new_in_cache_entry: about to lock\n");
+	dprintk("new_in_cache_entry: about to lock\n");
 	write_lock_bh(&client->ingress_lock);
 	entry->next = client->in_cache;
 	entry->prev = NULL;
@@ -118,7 +123,7 @@ static in_cache_entry *in_cache_add_entry(__be32 dst_ip,
 	atomic_inc(&entry->use);
 
 	write_unlock_bh(&client->ingress_lock);
-	dprintk("mpoa: mpoa_caches.c: new_in_cache_entry: unlocked\n");
+	dprintk("new_in_cache_entry: unlocked\n");
 
 	return entry;
 }
@@ -154,7 +159,7 @@ static int cache_hit(in_cache_entry *entry, struct mpoa_client *mpc)
 
 	if (entry->count > mpc->parameters.mpc_p1 &&
 	    entry->entry_state == INGRESS_INVALID) {
-		dprintk("mpoa: (%s) mpoa_caches.c: threshold exceeded for ip %pI4, sending MPOA res req\n",
+		dprintk("(%s) threshold exceeded for ip %pI4, sending MPOA res req\n",
 			mpc->dev->name, &entry->ctrl_info.in_dst_ip);
 		entry->entry_state = INGRESS_RESOLVING;
 		msg.type = SND_MPOA_RES_RQST;
@@ -190,7 +195,7 @@ static void in_cache_remove_entry(in_cache_entry *entry,
 	struct k_message msg;
 
 	vcc = entry->shortcut;
-	dprintk("mpoa: mpoa_caches.c: removing an ingress entry, ip = %pI4\n",
+	dprintk("removing an ingress entry, ip = %pI4\n",
 		&entry->ctrl_info.in_dst_ip);
 
 	if (entry->prev != NULL)
@@ -235,7 +240,7 @@ static void clear_count_and_expired(struct mpoa_client *client)
 		next_entry = entry->next;
 		if ((now.tv_sec - entry->tv.tv_sec)
 		   > entry->ctrl_info.holding_time) {
-			dprintk("mpoa: mpoa_caches.c: holding time expired, ip = %pI4\n",
+			dprintk("holding time expired, ip = %pI4\n",
 				&entry->ctrl_info.in_dst_ip);
 			client->in_ops->remove_entry(entry, client);
 		}
@@ -302,7 +307,7 @@ static void refresh_entries(struct mpoa_client *client)
 	struct timeval now;
 	struct in_cache_entry *entry = client->in_cache;
 
-	ddprintk("mpoa: mpoa_caches.c: refresh_entries\n");
+	ddprintk("refresh_entries\n");
 	do_gettimeofday(&now);
 
 	read_lock_bh(&client->ingress_lock);
@@ -312,7 +317,7 @@ static void refresh_entries(struct mpoa_client *client)
 				entry->refresh_time = (2 * (entry->ctrl_info.holding_time))/3;
 			if ((now.tv_sec - entry->reply_wait.tv_sec) >
 			    entry->refresh_time) {
-				dprintk("mpoa: mpoa_caches.c: refreshing an entry.\n");
+				dprintk("refreshing an entry.\n");
 				entry->entry_state = INGRESS_REFRESHING;
 
 			}
@@ -435,7 +440,7 @@ static void eg_cache_remove_entry(eg_cache_entry *entry,
 	struct k_message msg;
 
 	vcc = entry->shortcut;
-	dprintk("mpoa: mpoa_caches.c: removing an egress entry.\n");
+	dprintk("removing an egress entry.\n");
 	if (entry->prev != NULL)
 		entry->prev->next = entry->next;
 	else
@@ -471,11 +476,11 @@ static eg_cache_entry *eg_cache_add_entry(struct k_message *msg,
 		return NULL;
 	}
 
-	dprintk("mpoa: mpoa_caches.c: adding an egress entry, ip = %pI4, this should be our IP\n",
+	dprintk("adding an egress entry, ip = %pI4, this should be our IP\n",
 		&msg->content.eg_info.eg_dst_ip);
 
 	atomic_set(&entry->use, 1);
-	dprintk("mpoa: mpoa_caches.c: new_eg_cache_entry: about to lock\n");
+	dprintk("new_eg_cache_entry: about to lock\n");
 	write_lock_irq(&client->egress_lock);
 	entry->next = client->eg_cache;
 	entry->prev = NULL;
@@ -487,14 +492,13 @@ static eg_cache_entry *eg_cache_add_entry(struct k_message *msg,
 	entry->ctrl_info = msg->content.eg_info;
 	do_gettimeofday(&(entry->tv));
 	entry->entry_state = EGRESS_RESOLVED;
-	dprintk("mpoa: mpoa_caches.c: new_eg_cache_entry cache_id %u\n",
+	dprintk("new_eg_cache_entry cache_id %u\n",
 		ntohl(entry->ctrl_info.cache_id));
-	dprintk("mpoa: mpoa_caches.c: mps_ip = %pI4\n",
-		&entry->ctrl_info.mps_ip);
+	dprintk("mps_ip = %pI4\n", &entry->ctrl_info.mps_ip);
 	atomic_inc(&entry->use);
 
 	write_unlock_irq(&client->egress_lock);
-	dprintk("mpoa: mpoa_caches.c: new_eg_cache_entry: unlocked\n");
+	dprintk("new_eg_cache_entry: unlocked\n");
 
 	return entry;
 }
@@ -524,7 +528,7 @@ static void clear_expired(struct mpoa_client *client)
 		   > entry->ctrl_info.holding_time) {
 			msg.type = SND_EGRESS_PURGE;
 			msg.content.eg_info = entry->ctrl_info;
-			dprintk("mpoa: mpoa_caches.c: egress_cache: holding time expired, cache_id = %u.\n",
+			dprintk("egress_cache: holding time expired, cache_id = %u.\n",
 				ntohl(entry->ctrl_info.cache_id));
 			msg_to_mpoad(&msg, client);
 			client->eg_ops->remove_entry(entry, client);
diff --git a/net/atm/mpoa_proc.c b/net/atm/mpoa_proc.c
index aae4a87c22ac..b9bdb98427e4 100644
--- a/net/atm/mpoa_proc.c
+++ b/net/atm/mpoa_proc.c
@@ -21,10 +21,23 @@
  */
 
 #if 1
-#define dprintk(format, args...) printk(KERN_DEBUG format, ##args)   /* debug */
+#define dprintk(format, args...)					\
+	printk(KERN_DEBUG "mpoa:%s: " format, __FILE__, ##args)  /* debug */
 #else
-#define dprintk(format, args...)			\
-	do { if (0) printk(KERN_DEBUG format, ##args); } while (0)
+#define dprintk(format, args...)					\
+	do { if (0)							\
+		printk(KERN_DEBUG "mpoa:%s: " format, __FILE__, ##args);\
+	} while (0)
+#endif
+
+#if 0
+#define ddprintk(format, args...)					\
+	printk(KERN_DEBUG "mpoa:%s: " format, __FILE__, ##args)  /* debug */
+#else
+#define ddprintk(format, args...)					\
+	do { if (0)							\
+		printk(KERN_DEBUG "mpoa:%s: " format, __FILE__, ##args);\
+	} while (0)
 #endif
 
 #define STAT_FILE_NAME "mpc"     /* Our statistic file's name */
@@ -258,12 +271,9 @@ static int parse_qos(const char *buff)
 	qos.rxtp.max_pcr = rx_pcr;
 	qos.rxtp.max_sdu = rx_sdu;
 	qos.aal = ATM_AAL5;
-	dprintk("mpoa: mpoa_proc.c: parse_qos(): setting qos paramameters to tx=%d,%d rx=%d,%d\n",
-		qos.txtp.max_pcr,
-		qos.txtp.max_sdu,
-		qos.rxtp.max_pcr,
-		qos.rxtp.max_sdu
-		);
+	dprintk("parse_qos(): setting qos paramameters to tx=%d,%d rx=%d,%d\n",
+		qos.txtp.max_pcr, qos.txtp.max_sdu,
+		qos.rxtp.max_pcr, qos.rxtp.max_sdu);
 
 	atm_mpoa_add_qos(ipaddr, &qos);
 	return 1;
-- 
cgit v1.2.2


From 54233261d5aa2926f080b67ac22f508c9f15e690 Mon Sep 17 00:00:00 2001
From: "John W. Linville" <linville@tuxdriver.com>
Date: Wed, 27 Jan 2010 09:44:48 -0500
Subject: cfg80211: fix wext-compat for setting rate to 'auto'

Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/wireless/wext-compat.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/wireless/wext-compat.c b/net/wireless/wext-compat.c
index 966d2f01beac..b17eeae448d5 100644
--- a/net/wireless/wext-compat.c
+++ b/net/wireless/wext-compat.c
@@ -1214,7 +1214,7 @@ int cfg80211_wext_siwrate(struct net_device *dev,
 
 	memset(&mask, 0, sizeof(mask));
 	fixed = 0;
-	maxrate = 0;
+	maxrate = (u32)-1;
 
 	if (rate->value < 0) {
 		/* nothing */
-- 
cgit v1.2.2


From e924960dacdf85d118a98c7262edf2f99c3015cf Mon Sep 17 00:00:00 2001
From: Alexey Dobriyan <adobriyan@gmail.com>
Date: Mon, 25 Jan 2010 10:28:21 +0000
Subject: netns xfrm: fixup xfrm6_tunnel error propagation

Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/xfrm6_tunnel.c | 16 +++++++++++-----
 1 file changed, 11 insertions(+), 5 deletions(-)

(limited to 'net')

diff --git a/net/ipv6/xfrm6_tunnel.c b/net/ipv6/xfrm6_tunnel.c
index 438831d33593..23fb1002124c 100644
--- a/net/ipv6/xfrm6_tunnel.c
+++ b/net/ipv6/xfrm6_tunnel.c
@@ -353,13 +353,19 @@ static struct xfrm6_tunnel xfrm46_tunnel_handler = {
 
 static int __init xfrm6_tunnel_init(void)
 {
-	if (xfrm_register_type(&xfrm6_tunnel_type, AF_INET6) < 0)
+	int rv;
+
+	rv = xfrm_register_type(&xfrm6_tunnel_type, AF_INET6);
+	if (rv < 0)
 		goto err;
-	if (xfrm6_tunnel_register(&xfrm6_tunnel_handler, AF_INET6))
+	rv = xfrm6_tunnel_register(&xfrm6_tunnel_handler, AF_INET6);
+	if (rv < 0)
 		goto unreg;
-	if (xfrm6_tunnel_register(&xfrm46_tunnel_handler, AF_INET))
+	rv = xfrm6_tunnel_register(&xfrm46_tunnel_handler, AF_INET);
+	if (rv < 0)
 		goto dereg6;
-	if (xfrm6_tunnel_spi_init() < 0)
+	rv = xfrm6_tunnel_spi_init();
+	if (rv < 0)
 		goto dereg46;
 	return 0;
 
@@ -370,7 +376,7 @@ dereg6:
 unreg:
 	xfrm_unregister_type(&xfrm6_tunnel_type, AF_INET6);
 err:
-	return -EAGAIN;
+	return rv;
 }
 
 static void __exit xfrm6_tunnel_fini(void)
-- 
cgit v1.2.2


From a1664773907a2b69e2a3019598dcbeffa6bc724b Mon Sep 17 00:00:00 2001
From: Alexey Dobriyan <adobriyan@gmail.com>
Date: Mon, 25 Jan 2010 10:37:54 +0000
Subject: netns xfrm: xfrm6_tunnel in netns

I'm not sure about rcu stuff near kmem cache destruction:
* checks for non-empty hashes look bogus, they're done _before_
  rcu_berrier()
* unregistering netns ops is done before kmem_cache destoy
  (as it should), and unregistering involves rcu barriers by itself

So it looks nothing should be done.

Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/ipcomp6.c      |   4 +-
 net/ipv6/xfrm6_tunnel.c | 140 ++++++++++++++++++++++++++++--------------------
 2 files changed, 85 insertions(+), 59 deletions(-)

(limited to 'net')

diff --git a/net/ipv6/ipcomp6.c b/net/ipv6/ipcomp6.c
index 2f2a5ca2c878..1d1faf757c9a 100644
--- a/net/ipv6/ipcomp6.c
+++ b/net/ipv6/ipcomp6.c
@@ -81,7 +81,7 @@ static struct xfrm_state *ipcomp6_tunnel_create(struct xfrm_state *x)
 		goto out;
 
 	t->id.proto = IPPROTO_IPV6;
-	t->id.spi = xfrm6_tunnel_alloc_spi((xfrm_address_t *)&x->props.saddr);
+	t->id.spi = xfrm6_tunnel_alloc_spi(&init_net, (xfrm_address_t *)&x->props.saddr);
 	if (!t->id.spi)
 		goto error;
 
@@ -112,7 +112,7 @@ static int ipcomp6_tunnel_attach(struct xfrm_state *x)
 	struct xfrm_state *t = NULL;
 	__be32 spi;
 
-	spi = xfrm6_tunnel_spi_lookup((xfrm_address_t *)&x->props.saddr);
+	spi = xfrm6_tunnel_spi_lookup(&init_net, (xfrm_address_t *)&x->props.saddr);
 	if (spi)
 		t = xfrm_state_lookup(&init_net, (xfrm_address_t *)&x->id.daddr,
 					      spi, IPPROTO_IPV6, AF_INET6);
diff --git a/net/ipv6/xfrm6_tunnel.c b/net/ipv6/xfrm6_tunnel.c
index 23fb1002124c..d6f9aeec69f7 100644
--- a/net/ipv6/xfrm6_tunnel.c
+++ b/net/ipv6/xfrm6_tunnel.c
@@ -30,6 +30,25 @@
 #include <linux/ipv6.h>
 #include <linux/icmpv6.h>
 #include <linux/mutex.h>
+#include <net/netns/generic.h>
+
+#define XFRM6_TUNNEL_SPI_BYADDR_HSIZE 256
+#define XFRM6_TUNNEL_SPI_BYSPI_HSIZE 256
+
+#define XFRM6_TUNNEL_SPI_MIN	1
+#define XFRM6_TUNNEL_SPI_MAX	0xffffffff
+
+struct xfrm6_tunnel_net {
+	struct hlist_head spi_byaddr[XFRM6_TUNNEL_SPI_BYADDR_HSIZE];
+	struct hlist_head spi_byspi[XFRM6_TUNNEL_SPI_BYSPI_HSIZE];
+	u32 spi;
+};
+
+static int xfrm6_tunnel_net_id __read_mostly;
+static inline struct xfrm6_tunnel_net *xfrm6_tunnel_pernet(struct net *net)
+{
+	return net_generic(net, xfrm6_tunnel_net_id);
+}
 
 /*
  * xfrm_tunnel_spi things are for allocating unique id ("spi")
@@ -46,19 +65,8 @@ struct xfrm6_tunnel_spi {
 
 static DEFINE_SPINLOCK(xfrm6_tunnel_spi_lock);
 
-static u32 xfrm6_tunnel_spi;
-
-#define XFRM6_TUNNEL_SPI_MIN	1
-#define XFRM6_TUNNEL_SPI_MAX	0xffffffff
-
 static struct kmem_cache *xfrm6_tunnel_spi_kmem __read_mostly;
 
-#define XFRM6_TUNNEL_SPI_BYADDR_HSIZE 256
-#define XFRM6_TUNNEL_SPI_BYSPI_HSIZE 256
-
-static struct hlist_head xfrm6_tunnel_spi_byaddr[XFRM6_TUNNEL_SPI_BYADDR_HSIZE];
-static struct hlist_head xfrm6_tunnel_spi_byspi[XFRM6_TUNNEL_SPI_BYSPI_HSIZE];
-
 static inline unsigned xfrm6_tunnel_spi_hash_byaddr(xfrm_address_t *addr)
 {
 	unsigned h;
@@ -77,49 +85,30 @@ static inline unsigned xfrm6_tunnel_spi_hash_byspi(u32 spi)
 }
 
 
-static int xfrm6_tunnel_spi_init(void)
+static int __init xfrm6_tunnel_spi_init(void)
 {
-	int i;
-
-	xfrm6_tunnel_spi = 0;
 	xfrm6_tunnel_spi_kmem = kmem_cache_create("xfrm6_tunnel_spi",
 						  sizeof(struct xfrm6_tunnel_spi),
 						  0, SLAB_HWCACHE_ALIGN,
 						  NULL);
 	if (!xfrm6_tunnel_spi_kmem)
 		return -ENOMEM;
-
-	for (i = 0; i < XFRM6_TUNNEL_SPI_BYADDR_HSIZE; i++)
-		INIT_HLIST_HEAD(&xfrm6_tunnel_spi_byaddr[i]);
-	for (i = 0; i < XFRM6_TUNNEL_SPI_BYSPI_HSIZE; i++)
-		INIT_HLIST_HEAD(&xfrm6_tunnel_spi_byspi[i]);
 	return 0;
 }
 
 static void xfrm6_tunnel_spi_fini(void)
 {
-	int i;
-
-	for (i = 0; i < XFRM6_TUNNEL_SPI_BYADDR_HSIZE; i++) {
-		if (!hlist_empty(&xfrm6_tunnel_spi_byaddr[i]))
-			return;
-	}
-	for (i = 0; i < XFRM6_TUNNEL_SPI_BYSPI_HSIZE; i++) {
-		if (!hlist_empty(&xfrm6_tunnel_spi_byspi[i]))
-			return;
-	}
-	rcu_barrier();
 	kmem_cache_destroy(xfrm6_tunnel_spi_kmem);
-	xfrm6_tunnel_spi_kmem = NULL;
 }
 
-static struct xfrm6_tunnel_spi *__xfrm6_tunnel_spi_lookup(xfrm_address_t *saddr)
+static struct xfrm6_tunnel_spi *__xfrm6_tunnel_spi_lookup(struct net *net, xfrm_address_t *saddr)
 {
+	struct xfrm6_tunnel_net *xfrm6_tn = xfrm6_tunnel_pernet(net);
 	struct xfrm6_tunnel_spi *x6spi;
 	struct hlist_node *pos;
 
 	hlist_for_each_entry_rcu(x6spi, pos,
-			     &xfrm6_tunnel_spi_byaddr[xfrm6_tunnel_spi_hash_byaddr(saddr)],
+			     &xfrm6_tn->spi_byaddr[xfrm6_tunnel_spi_hash_byaddr(saddr)],
 			     list_byaddr) {
 		if (memcmp(&x6spi->addr, saddr, sizeof(x6spi->addr)) == 0)
 			return x6spi;
@@ -128,13 +117,13 @@ static struct xfrm6_tunnel_spi *__xfrm6_tunnel_spi_lookup(xfrm_address_t *saddr)
 	return NULL;
 }
 
-__be32 xfrm6_tunnel_spi_lookup(xfrm_address_t *saddr)
+__be32 xfrm6_tunnel_spi_lookup(struct net *net, xfrm_address_t *saddr)
 {
 	struct xfrm6_tunnel_spi *x6spi;
 	u32 spi;
 
 	rcu_read_lock_bh();
-	x6spi = __xfrm6_tunnel_spi_lookup(saddr);
+	x6spi = __xfrm6_tunnel_spi_lookup(net, saddr);
 	spi = x6spi ? x6spi->spi : 0;
 	rcu_read_unlock_bh();
 	return htonl(spi);
@@ -142,14 +131,15 @@ __be32 xfrm6_tunnel_spi_lookup(xfrm_address_t *saddr)
 
 EXPORT_SYMBOL(xfrm6_tunnel_spi_lookup);
 
-static int __xfrm6_tunnel_spi_check(u32 spi)
+static int __xfrm6_tunnel_spi_check(struct net *net, u32 spi)
 {
+	struct xfrm6_tunnel_net *xfrm6_tn = xfrm6_tunnel_pernet(net);
 	struct xfrm6_tunnel_spi *x6spi;
 	int index = xfrm6_tunnel_spi_hash_byspi(spi);
 	struct hlist_node *pos;
 
 	hlist_for_each_entry(x6spi, pos,
-			     &xfrm6_tunnel_spi_byspi[index],
+			     &xfrm6_tn->spi_byspi[index],
 			     list_byspi) {
 		if (x6spi->spi == spi)
 			return -1;
@@ -157,32 +147,33 @@ static int __xfrm6_tunnel_spi_check(u32 spi)
 	return index;
 }
 
-static u32 __xfrm6_tunnel_alloc_spi(xfrm_address_t *saddr)
+static u32 __xfrm6_tunnel_alloc_spi(struct net *net, xfrm_address_t *saddr)
 {
+	struct xfrm6_tunnel_net *xfrm6_tn = xfrm6_tunnel_pernet(net);
 	u32 spi;
 	struct xfrm6_tunnel_spi *x6spi;
 	int index;
 
-	if (xfrm6_tunnel_spi < XFRM6_TUNNEL_SPI_MIN ||
-	    xfrm6_tunnel_spi >= XFRM6_TUNNEL_SPI_MAX)
-		xfrm6_tunnel_spi = XFRM6_TUNNEL_SPI_MIN;
+	if (xfrm6_tn->spi < XFRM6_TUNNEL_SPI_MIN ||
+	    xfrm6_tn->spi >= XFRM6_TUNNEL_SPI_MAX)
+		xfrm6_tn->spi = XFRM6_TUNNEL_SPI_MIN;
 	else
-		xfrm6_tunnel_spi++;
+		xfrm6_tn->spi++;
 
-	for (spi = xfrm6_tunnel_spi; spi <= XFRM6_TUNNEL_SPI_MAX; spi++) {
-		index = __xfrm6_tunnel_spi_check(spi);
+	for (spi = xfrm6_tn->spi; spi <= XFRM6_TUNNEL_SPI_MAX; spi++) {
+		index = __xfrm6_tunnel_spi_check(net, spi);
 		if (index >= 0)
 			goto alloc_spi;
 	}
-	for (spi = XFRM6_TUNNEL_SPI_MIN; spi < xfrm6_tunnel_spi; spi++) {
-		index = __xfrm6_tunnel_spi_check(spi);
+	for (spi = XFRM6_TUNNEL_SPI_MIN; spi < xfrm6_tn->spi; spi++) {
+		index = __xfrm6_tunnel_spi_check(net, spi);
 		if (index >= 0)
 			goto alloc_spi;
 	}
 	spi = 0;
 	goto out;
 alloc_spi:
-	xfrm6_tunnel_spi = spi;
+	xfrm6_tn->spi = spi;
 	x6spi = kmem_cache_alloc(xfrm6_tunnel_spi_kmem, GFP_ATOMIC);
 	if (!x6spi)
 		goto out;
@@ -192,26 +183,26 @@ alloc_spi:
 	x6spi->spi = spi;
 	atomic_set(&x6spi->refcnt, 1);
 
-	hlist_add_head_rcu(&x6spi->list_byspi, &xfrm6_tunnel_spi_byspi[index]);
+	hlist_add_head_rcu(&x6spi->list_byspi, &xfrm6_tn->spi_byspi[index]);
 
 	index = xfrm6_tunnel_spi_hash_byaddr(saddr);
-	hlist_add_head_rcu(&x6spi->list_byaddr, &xfrm6_tunnel_spi_byaddr[index]);
+	hlist_add_head_rcu(&x6spi->list_byaddr, &xfrm6_tn->spi_byaddr[index]);
 out:
 	return spi;
 }
 
-__be32 xfrm6_tunnel_alloc_spi(xfrm_address_t *saddr)
+__be32 xfrm6_tunnel_alloc_spi(struct net *net, xfrm_address_t *saddr)
 {
 	struct xfrm6_tunnel_spi *x6spi;
 	u32 spi;
 
 	spin_lock_bh(&xfrm6_tunnel_spi_lock);
-	x6spi = __xfrm6_tunnel_spi_lookup(saddr);
+	x6spi = __xfrm6_tunnel_spi_lookup(net, saddr);
 	if (x6spi) {
 		atomic_inc(&x6spi->refcnt);
 		spi = x6spi->spi;
 	} else
-		spi = __xfrm6_tunnel_alloc_spi(saddr);
+		spi = __xfrm6_tunnel_alloc_spi(net, saddr);
 	spin_unlock_bh(&xfrm6_tunnel_spi_lock);
 
 	return htonl(spi);
@@ -225,15 +216,16 @@ static void x6spi_destroy_rcu(struct rcu_head *head)
 			container_of(head, struct xfrm6_tunnel_spi, rcu_head));
 }
 
-void xfrm6_tunnel_free_spi(xfrm_address_t *saddr)
+void xfrm6_tunnel_free_spi(struct net *net, xfrm_address_t *saddr)
 {
+	struct xfrm6_tunnel_net *xfrm6_tn = xfrm6_tunnel_pernet(net);
 	struct xfrm6_tunnel_spi *x6spi;
 	struct hlist_node *pos, *n;
 
 	spin_lock_bh(&xfrm6_tunnel_spi_lock);
 
 	hlist_for_each_entry_safe(x6spi, pos, n,
-				  &xfrm6_tunnel_spi_byaddr[xfrm6_tunnel_spi_hash_byaddr(saddr)],
+				  &xfrm6_tn->spi_byaddr[xfrm6_tunnel_spi_hash_byaddr(saddr)],
 				  list_byaddr)
 	{
 		if (memcmp(&x6spi->addr, saddr, sizeof(x6spi->addr)) == 0) {
@@ -263,10 +255,11 @@ static int xfrm6_tunnel_input(struct xfrm_state *x, struct sk_buff *skb)
 
 static int xfrm6_tunnel_rcv(struct sk_buff *skb)
 {
+	struct net *net = dev_net(skb->dev);
 	struct ipv6hdr *iph = ipv6_hdr(skb);
 	__be32 spi;
 
-	spi = xfrm6_tunnel_spi_lookup((xfrm_address_t *)&iph->saddr);
+	spi = xfrm6_tunnel_spi_lookup(net, (xfrm_address_t *)&iph->saddr);
 	return xfrm6_rcv_spi(skb, IPPROTO_IPV6, spi) > 0 ? : 0;
 }
 
@@ -326,7 +319,9 @@ static int xfrm6_tunnel_init_state(struct xfrm_state *x)
 
 static void xfrm6_tunnel_destroy(struct xfrm_state *x)
 {
-	xfrm6_tunnel_free_spi((xfrm_address_t *)&x->props.saddr);
+	struct net *net = xs_net(x);
+
+	xfrm6_tunnel_free_spi(net, (xfrm_address_t *)&x->props.saddr);
 }
 
 static const struct xfrm_type xfrm6_tunnel_type = {
@@ -351,6 +346,31 @@ static struct xfrm6_tunnel xfrm46_tunnel_handler = {
 	.priority	= 2,
 };
 
+static int __net_init xfrm6_tunnel_net_init(struct net *net)
+{
+	struct xfrm6_tunnel_net *xfrm6_tn = xfrm6_tunnel_pernet(net);
+	unsigned int i;
+
+	for (i = 0; i < XFRM6_TUNNEL_SPI_BYADDR_HSIZE; i++)
+		INIT_HLIST_HEAD(&xfrm6_tn->spi_byaddr[i]);
+	for (i = 0; i < XFRM6_TUNNEL_SPI_BYSPI_HSIZE; i++)
+		INIT_HLIST_HEAD(&xfrm6_tn->spi_byspi[i]);
+	xfrm6_tn->spi = 0;
+
+	return 0;
+}
+
+static void __net_exit xfrm6_tunnel_net_exit(struct net *net)
+{
+}
+
+static struct pernet_operations xfrm6_tunnel_net_ops = {
+	.init	= xfrm6_tunnel_net_init,
+	.exit	= xfrm6_tunnel_net_exit,
+	.id	= &xfrm6_tunnel_net_id,
+	.size	= sizeof(struct xfrm6_tunnel_net),
+};
+
 static int __init xfrm6_tunnel_init(void)
 {
 	int rv;
@@ -367,8 +387,13 @@ static int __init xfrm6_tunnel_init(void)
 	rv = xfrm6_tunnel_spi_init();
 	if (rv < 0)
 		goto dereg46;
+	rv = register_pernet_subsys(&xfrm6_tunnel_net_ops);
+	if (rv < 0)
+		goto deregspi;
 	return 0;
 
+deregspi:
+	xfrm6_tunnel_spi_fini();
 dereg46:
 	xfrm6_tunnel_deregister(&xfrm46_tunnel_handler, AF_INET);
 dereg6:
@@ -381,6 +406,7 @@ err:
 
 static void __exit xfrm6_tunnel_fini(void)
 {
+	unregister_pernet_subsys(&xfrm6_tunnel_net_ops);
 	xfrm6_tunnel_spi_fini();
 	xfrm6_tunnel_deregister(&xfrm46_tunnel_handler, AF_INET);
 	xfrm6_tunnel_deregister(&xfrm6_tunnel_handler, AF_INET6);
-- 
cgit v1.2.2


From a92df2545402c1a08e7a158f4477a52dea0eeeed Mon Sep 17 00:00:00 2001
From: Alexey Dobriyan <adobriyan@gmail.com>
Date: Mon, 25 Jan 2010 10:38:34 +0000
Subject: netns xfrm: ipcomp support

Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/ipcomp.c | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/ipcomp.c b/net/ipv4/ipcomp.c
index 38fbf04150ae..b55a0c3df82f 100644
--- a/net/ipv4/ipcomp.c
+++ b/net/ipv4/ipcomp.c
@@ -25,6 +25,7 @@
 
 static void ipcomp4_err(struct sk_buff *skb, u32 info)
 {
+	struct net *net = dev_net(skb->dev);
 	__be32 spi;
 	struct iphdr *iph = (struct iphdr *)skb->data;
 	struct ip_comp_hdr *ipch = (struct ip_comp_hdr *)(skb->data+(iph->ihl<<2));
@@ -35,7 +36,7 @@ static void ipcomp4_err(struct sk_buff *skb, u32 info)
 		return;
 
 	spi = htonl(ntohs(ipch->cpi));
-	x = xfrm_state_lookup(&init_net, (xfrm_address_t *)&iph->daddr,
+	x = xfrm_state_lookup(net, (xfrm_address_t *)&iph->daddr,
 			      spi, IPPROTO_COMP, AF_INET);
 	if (!x)
 		return;
@@ -47,9 +48,10 @@ static void ipcomp4_err(struct sk_buff *skb, u32 info)
 /* We always hold one tunnel user reference to indicate a tunnel */
 static struct xfrm_state *ipcomp_tunnel_create(struct xfrm_state *x)
 {
+	struct net *net = xs_net(x);
 	struct xfrm_state *t;
 
-	t = xfrm_state_alloc(&init_net);
+	t = xfrm_state_alloc(net);
 	if (t == NULL)
 		goto out;
 
@@ -82,10 +84,11 @@ error:
  */
 static int ipcomp_tunnel_attach(struct xfrm_state *x)
 {
+	struct net *net = xs_net(x);
 	int err = 0;
 	struct xfrm_state *t;
 
-	t = xfrm_state_lookup(&init_net, (xfrm_address_t *)&x->id.daddr.a4,
+	t = xfrm_state_lookup(net, (xfrm_address_t *)&x->id.daddr.a4,
 			      x->props.saddr.a4, IPPROTO_IPIP, AF_INET);
 	if (!t) {
 		t = ipcomp_tunnel_create(x);
-- 
cgit v1.2.2


From d74340d31bf1dbeb00acadddd8697666528a7846 Mon Sep 17 00:00:00 2001
From: Alexey Dobriyan <adobriyan@gmail.com>
Date: Mon, 25 Jan 2010 10:39:09 +0000
Subject: netns xfrm: ipcomp6 support

Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/ipcomp6.c | 13 ++++++++-----
 1 file changed, 8 insertions(+), 5 deletions(-)

(limited to 'net')

diff --git a/net/ipv6/ipcomp6.c b/net/ipv6/ipcomp6.c
index 1d1faf757c9a..a9fbb151bb79 100644
--- a/net/ipv6/ipcomp6.c
+++ b/net/ipv6/ipcomp6.c
@@ -53,6 +53,7 @@
 static void ipcomp6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
 				u8 type, u8 code, int offset, __be32 info)
 {
+	struct net *net = dev_net(skb->dev);
 	__be32 spi;
 	struct ipv6hdr *iph = (struct ipv6hdr*)skb->data;
 	struct ip_comp_hdr *ipcomph =
@@ -63,7 +64,7 @@ static void ipcomp6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
 		return;
 
 	spi = htonl(ntohs(ipcomph->cpi));
-	x = xfrm_state_lookup(&init_net, (xfrm_address_t *)&iph->daddr, spi, IPPROTO_COMP, AF_INET6);
+	x = xfrm_state_lookup(net, (xfrm_address_t *)&iph->daddr, spi, IPPROTO_COMP, AF_INET6);
 	if (!x)
 		return;
 
@@ -74,14 +75,15 @@ static void ipcomp6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
 
 static struct xfrm_state *ipcomp6_tunnel_create(struct xfrm_state *x)
 {
+	struct net *net = xs_net(x);
 	struct xfrm_state *t = NULL;
 
-	t = xfrm_state_alloc(&init_net);
+	t = xfrm_state_alloc(net);
 	if (!t)
 		goto out;
 
 	t->id.proto = IPPROTO_IPV6;
-	t->id.spi = xfrm6_tunnel_alloc_spi(&init_net, (xfrm_address_t *)&x->props.saddr);
+	t->id.spi = xfrm6_tunnel_alloc_spi(net, (xfrm_address_t *)&x->props.saddr);
 	if (!t->id.spi)
 		goto error;
 
@@ -108,13 +110,14 @@ error:
 
 static int ipcomp6_tunnel_attach(struct xfrm_state *x)
 {
+	struct net *net = xs_net(x);
 	int err = 0;
 	struct xfrm_state *t = NULL;
 	__be32 spi;
 
-	spi = xfrm6_tunnel_spi_lookup(&init_net, (xfrm_address_t *)&x->props.saddr);
+	spi = xfrm6_tunnel_spi_lookup(net, (xfrm_address_t *)&x->props.saddr);
 	if (spi)
-		t = xfrm_state_lookup(&init_net, (xfrm_address_t *)&x->id.daddr,
+		t = xfrm_state_lookup(net, (xfrm_address_t *)&x->id.daddr,
 					      spi, IPPROTO_IPV6, AF_INET6);
 	if (!t) {
 		t = ipcomp6_tunnel_create(x);
-- 
cgit v1.2.2


From 57dbb2d83d100ea601c54fe129bfde0678db5dee Mon Sep 17 00:00:00 2001
From: Hagen Paul Pfeifer <hagen@jauu.net>
Date: Sun, 24 Jan 2010 12:30:59 +0000
Subject: sched: add head drop fifo queue

This adds an additional queuing strategy, called pfifo_head_drop,
to remove the oldest skb in the case of an overflow within the queue -
the head element - instead of the last skb (tail). To remove the oldest
skb in congested situations is useful for sensor network environments
where newer packets reflect the superior information.

Reviewed-by: Florian Westphal <fw@strlen.de>
Acked-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: Hagen Paul Pfeifer <hagen@jauu.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/sch_api.c  |  1 +
 net/sched/sch_fifo.c | 34 ++++++++++++++++++++++++++++++++++
 2 files changed, 35 insertions(+)

(limited to 'net')

diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c
index 75fd1c672c61..6cd491013b50 100644
--- a/net/sched/sch_api.c
+++ b/net/sched/sch_api.c
@@ -1707,6 +1707,7 @@ static int __init pktsched_init(void)
 {
 	register_qdisc(&pfifo_qdisc_ops);
 	register_qdisc(&bfifo_qdisc_ops);
+	register_qdisc(&pfifo_head_drop_qdisc_ops);
 	register_qdisc(&mq_qdisc_ops);
 	proc_net_fops_create(&init_net, "psched", 0, &psched_fops);
 
diff --git a/net/sched/sch_fifo.c b/net/sched/sch_fifo.c
index 69188e8358b4..4b0a6cc44c77 100644
--- a/net/sched/sch_fifo.c
+++ b/net/sched/sch_fifo.c
@@ -43,6 +43,26 @@ static int pfifo_enqueue(struct sk_buff *skb, struct Qdisc* sch)
 	return qdisc_reshape_fail(skb, sch);
 }
 
+static int pfifo_tail_enqueue(struct sk_buff *skb, struct Qdisc* sch)
+{
+	struct sk_buff *skb_head;
+	struct fifo_sched_data *q = qdisc_priv(sch);
+
+	if (likely(skb_queue_len(&sch->q) < q->limit))
+		return qdisc_enqueue_tail(skb, sch);
+
+	/* queue full, remove one skb to fulfill the limit */
+	skb_head = qdisc_dequeue_head(sch);
+	sch->bstats.bytes -= qdisc_pkt_len(skb_head);
+	sch->bstats.packets--;
+	sch->qstats.drops++;
+	kfree_skb(skb_head);
+
+	qdisc_enqueue_tail(skb, sch);
+
+	return NET_XMIT_CN;
+}
+
 static int fifo_init(struct Qdisc *sch, struct nlattr *opt)
 {
 	struct fifo_sched_data *q = qdisc_priv(sch);
@@ -108,6 +128,20 @@ struct Qdisc_ops bfifo_qdisc_ops __read_mostly = {
 };
 EXPORT_SYMBOL(bfifo_qdisc_ops);
 
+struct Qdisc_ops pfifo_head_drop_qdisc_ops __read_mostly = {
+	.id		=	"pfifo_head_drop",
+	.priv_size	=	sizeof(struct fifo_sched_data),
+	.enqueue	=	pfifo_tail_enqueue,
+	.dequeue	=	qdisc_dequeue_head,
+	.peek		=	qdisc_peek_head,
+	.drop		=	qdisc_queue_drop_head,
+	.init		=	fifo_init,
+	.reset		=	qdisc_reset_queue,
+	.change		=	fifo_init,
+	.dump		=	fifo_dump,
+	.owner		=	THIS_MODULE,
+};
+
 /* Pass size change message down to embedded FIFO */
 int fifo_set_limit(struct Qdisc *q, unsigned int limit)
 {
-- 
cgit v1.2.2


From fcafde2e6d81aa7901f9b10e6a097592f0637b79 Mon Sep 17 00:00:00 2001
From: Andrei Emeltchenko <andrei.emeltchenko@nokia.com>
Date: Tue, 22 Dec 2009 15:58:08 +0200
Subject: Bluetooth: Remove double free of SKB pointer in L2CAP

Trivial fix for double free of SKB pointer with kfree_skb to
make code simplier and cleaner. Remove unused variable err.

Signed-off-by: Andrei Emeltchenko <andrei.emeltchenko@nokia.com>
Acked-by: Gustavo F. Padovan <padovan@profusion.mobi>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 net/bluetooth/l2cap.c | 11 ++++-------
 1 file changed, 4 insertions(+), 7 deletions(-)

(limited to 'net')

diff --git a/net/bluetooth/l2cap.c b/net/bluetooth/l2cap.c
index 1120cf14a548..8acc19ed9c4d 100644
--- a/net/bluetooth/l2cap.c
+++ b/net/bluetooth/l2cap.c
@@ -3518,7 +3518,6 @@ static inline int l2cap_data_channel(struct l2cap_conn *conn, u16 cid, struct sk
 	struct l2cap_pinfo *pi;
 	u16 control, len;
 	u8 tx_seq;
-	int err;
 
 	sk = l2cap_get_chan_by_scid(&conn->chan_list, cid);
 	if (!sk) {
@@ -3570,13 +3569,11 @@ static inline int l2cap_data_channel(struct l2cap_conn *conn, u16 cid, struct sk
 			goto drop;
 
 		if (__is_iframe(control))
-			err = l2cap_data_channel_iframe(sk, control, skb);
+			l2cap_data_channel_iframe(sk, control, skb);
 		else
-			err = l2cap_data_channel_sframe(sk, control, skb);
+			l2cap_data_channel_sframe(sk, control, skb);
 
-		if (!err)
-			goto done;
-		break;
+		goto done;
 
 	case L2CAP_MODE_STREAMING:
 		control = get_unaligned_le16(skb->data);
@@ -3602,7 +3599,7 @@ static inline int l2cap_data_channel(struct l2cap_conn *conn, u16 cid, struct sk
 		else
 			pi->expected_tx_seq = tx_seq + 1;
 
-		err = l2cap_sar_reassembly_sdu(sk, skb, control);
+		l2cap_sar_reassembly_sdu(sk, skb, control);
 
 		goto done;
 
-- 
cgit v1.2.2


From e420aba331f44de0eed6871441293a6124d566d1 Mon Sep 17 00:00:00 2001
From: Andrei Emeltchenko <andrei.emeltchenko@nokia.com>
Date: Wed, 23 Dec 2009 13:07:14 +0200
Subject: Bluetooth: Fix memory leak in L2CAP

Move skb_clone after error confition check so it is not going
potentially out of the scope.

Signed-off-by: Andrei Emeltchenko <andrei.emeltchenko@nokia.com>
Acked-by: Gustavo F. Padovan <padovan@profusion.mobi>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 net/bluetooth/l2cap.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/bluetooth/l2cap.c b/net/bluetooth/l2cap.c
index 8acc19ed9c4d..400efa26ddba 100644
--- a/net/bluetooth/l2cap.c
+++ b/net/bluetooth/l2cap.c
@@ -1368,7 +1368,6 @@ static int l2cap_ertm_send(struct sock *sk)
 
 	while ((skb = sk->sk_send_head) && (!l2cap_tx_window_full(sk)) &&
 	       !(pi->conn_state & L2CAP_CONN_REMOTE_BUSY)) {
-		tx_skb = skb_clone(skb, GFP_ATOMIC);
 
 		if (pi->remote_max_tx &&
 				bt_cb(skb)->retries == pi->remote_max_tx) {
@@ -1376,6 +1375,8 @@ static int l2cap_ertm_send(struct sock *sk)
 			break;
 		}
 
+		tx_skb = skb_clone(skb, GFP_ATOMIC);
+
 		bt_cb(skb)->retries++;
 
 		control = get_unaligned_le16(tx_skb->data + L2CAP_HDR_SIZE);
-- 
cgit v1.2.2


From 6bf8268f9a91f1065c99503161ebd061492bebe3 Mon Sep 17 00:00:00 2001
From: Bastien Nocera <hadess@hadess.net>
Date: Wed, 20 Jan 2010 12:00:42 +0000
Subject: Bluetooth: Use the control channel for raw HID reports

In commit 2da31939a42f7a676a0bc5155d6a0a39ed8451f2, support
for Bluetooth hid_output_raw_report was added, but it pushes
the data to the interrupt channel instead of the contol one.

This patch makes hid_output_raw_report use the control channel
instead. Using the interrupt channel was a mistake.

Signed-off-by: Bastien Nocera <hadess@hadess.net>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 net/bluetooth/hidp/core.c | 70 ++++++++++++++++++++++++-----------------------
 1 file changed, 36 insertions(+), 34 deletions(-)

(limited to 'net')

diff --git a/net/bluetooth/hidp/core.c b/net/bluetooth/hidp/core.c
index 18e7f5a43dc4..6cf526d06e21 100644
--- a/net/bluetooth/hidp/core.c
+++ b/net/bluetooth/hidp/core.c
@@ -243,6 +243,39 @@ static void hidp_input_report(struct hidp_session *session, struct sk_buff *skb)
 	input_sync(dev);
 }
 
+static int __hidp_send_ctrl_message(struct hidp_session *session,
+			unsigned char hdr, unsigned char *data, int size)
+{
+	struct sk_buff *skb;
+
+	BT_DBG("session %p data %p size %d", session, data, size);
+
+	if (!(skb = alloc_skb(size + 1, GFP_ATOMIC))) {
+		BT_ERR("Can't allocate memory for new frame");
+		return -ENOMEM;
+	}
+
+	*skb_put(skb, 1) = hdr;
+	if (data && size > 0)
+		memcpy(skb_put(skb, size), data, size);
+
+	skb_queue_tail(&session->ctrl_transmit, skb);
+
+	return 0;
+}
+
+static inline int hidp_send_ctrl_message(struct hidp_session *session,
+			unsigned char hdr, unsigned char *data, int size)
+{
+	int err;
+
+	err = __hidp_send_ctrl_message(session, hdr, data, size);
+
+	hidp_schedule(session);
+
+	return err;
+}
+
 static int hidp_queue_report(struct hidp_session *session,
 				unsigned char *data, int size)
 {
@@ -282,7 +315,9 @@ static int hidp_send_report(struct hidp_session *session, struct hid_report *rep
 
 static int hidp_output_raw_report(struct hid_device *hid, unsigned char *data, size_t count)
 {
-	if (hidp_queue_report(hid->driver_data, data, count))
+	if (hidp_send_ctrl_message(hid->driver_data,
+			HIDP_TRANS_SET_REPORT | HIDP_DATA_RTYPE_FEATURE,
+			data, count))
 		return -ENOMEM;
 	return count;
 }
@@ -307,39 +342,6 @@ static inline void hidp_del_timer(struct hidp_session *session)
 		del_timer(&session->timer);
 }
 
-static int __hidp_send_ctrl_message(struct hidp_session *session,
-			unsigned char hdr, unsigned char *data, int size)
-{
-	struct sk_buff *skb;
-
-	BT_DBG("session %p data %p size %d", session, data, size);
-
-	if (!(skb = alloc_skb(size + 1, GFP_ATOMIC))) {
-		BT_ERR("Can't allocate memory for new frame");
-		return -ENOMEM;
-	}
-
-	*skb_put(skb, 1) = hdr;
-	if (data && size > 0)
-		memcpy(skb_put(skb, size), data, size);
-
-	skb_queue_tail(&session->ctrl_transmit, skb);
-
-	return 0;
-}
-
-static inline int hidp_send_ctrl_message(struct hidp_session *session,
-			unsigned char hdr, unsigned char *data, int size)
-{
-	int err;
-
-	err = __hidp_send_ctrl_message(session, hdr, data, size);
-
-	hidp_schedule(session);
-
-	return err;
-}
-
 static void hidp_process_handshake(struct hidp_session *session,
 					unsigned char param)
 {
-- 
cgit v1.2.2


From a2bff2694b02448e1d5873ac010582bc9898021c Mon Sep 17 00:00:00 2001
From: "Luis R. Rodriguez" <lrodriguez@atheros.com>
Date: Fri, 29 Jan 2010 19:58:56 -0500
Subject: cfg80211: avoid flushing the global workqueue for core reg hints

When cfg80211 starts it will send a core regulatory hint. This is
sent to the global workqueue but we force processing of it by
flushing the global workqueue. The flushing was done since
cfg80211 needs last_request to always be populated.

Avoid flushing the global workqueue by processing the work
required immediately instead of putting it into a linked
list and processing it after the flush.

Signed-off-by: Luis R. Rodriguez <lrodriguez@atheros.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/wireless/reg.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

(limited to 'net')

diff --git a/net/wireless/reg.c b/net/wireless/reg.c
index 5f8071de7950..5dcda28b6f04 100644
--- a/net/wireless/reg.c
+++ b/net/wireless/reg.c
@@ -1920,14 +1920,12 @@ static int regulatory_hint_core(const char *alpha2)
 	request->alpha2[1] = alpha2[1];
 	request->initiator = NL80211_REGDOM_SET_BY_CORE;
 
-	queue_regulatory_request(request);
-
 	/*
 	 * This ensures last_request is populated once modules
 	 * come swinging in and calling regulatory hints and
 	 * wiphy_apply_custom_regulatory().
 	 */
-	flush_scheduled_work();
+	reg_process_hint(request);
 
 	return 0;
 }
-- 
cgit v1.2.2


From 09d989d179d0c679043556dda77c51b41a2dae7e Mon Sep 17 00:00:00 2001
From: "Luis R. Rodriguez" <lrodriguez@atheros.com>
Date: Fri, 29 Jan 2010 19:58:57 -0500
Subject: cfg80211: add regulatory hint disconnect support

This adds a new regulatory hint to be used when we know all
devices have been disconnected and idle. This can happen
when we suspend, for instance. When we disconnect we can
no longer assume the same regulatory rules learned from
a country IE or beacon hints are applicable so restore
regulatory settings to an initial state.

Since driver hints are cached on the wiphy that called
the hint, those hints are not reproduced onto cfg80211
as the wiphy will respect its own wiphy->regd regardless.

Signed-off-by: Luis R. Rodriguez <lrodriguez@atheros.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/wireless/reg.c | 157 ++++++++++++++++++++++++++++++++++++++++++++++++++++-
 net/wireless/reg.h |  18 ++++++
 net/wireless/sme.c |  40 ++++++++++++++
 3 files changed, 212 insertions(+), 3 deletions(-)

(limited to 'net')

diff --git a/net/wireless/reg.c b/net/wireless/reg.c
index 5dcda28b6f04..ed89c59bb431 100644
--- a/net/wireless/reg.c
+++ b/net/wireless/reg.c
@@ -134,6 +134,7 @@ static const struct ieee80211_regdomain *cfg80211_world_regdom =
 	&world_regdom;
 
 static char *ieee80211_regdom = "00";
+static char user_alpha2[2];
 
 module_param(ieee80211_regdom, charp, 0444);
 MODULE_PARM_DESC(ieee80211_regdom, "IEEE 802.11 regulatory domain code");
@@ -252,6 +253,27 @@ static bool regdom_changes(const char *alpha2)
 	return true;
 }
 
+/*
+ * The NL80211_REGDOM_SET_BY_USER regdom alpha2 is cached, this lets
+ * you know if a valid regulatory hint with NL80211_REGDOM_SET_BY_USER
+ * has ever been issued.
+ */
+static bool is_user_regdom_saved(void)
+{
+	if (user_alpha2[0] == '9' && user_alpha2[1] == '7')
+		return false;
+
+	/* This would indicate a mistake on the design */
+	if (WARN((!is_world_regdom(user_alpha2) &&
+		  !is_an_alpha2(user_alpha2)),
+		 "Unexpected user alpha2: %c%c\n",
+		 user_alpha2[0],
+	         user_alpha2[1]))
+		return false;
+
+	return true;
+}
+
 /**
  * country_ie_integrity_changes - tells us if the country IE has changed
  * @checksum: checksum of country IE of fields we are interested in
@@ -1646,7 +1668,7 @@ static int ignore_request(struct wiphy *wiphy,
 
 	switch (pending_request->initiator) {
 	case NL80211_REGDOM_SET_BY_CORE:
-		return -EINVAL;
+		return 0;
 	case NL80211_REGDOM_SET_BY_COUNTRY_IE:
 
 		last_wiphy = wiphy_idx_to_wiphy(last_request->wiphy_idx);
@@ -1785,6 +1807,11 @@ new_request:
 
 	pending_request = NULL;
 
+	if (last_request->initiator == NL80211_REGDOM_SET_BY_USER) {
+		user_alpha2[0] = last_request->alpha2[0];
+		user_alpha2[1] = last_request->alpha2[1];
+	}
+
 	/* When r == REG_INTERSECT we do need to call CRDA */
 	if (r < 0) {
 		/*
@@ -1904,12 +1931,16 @@ static void queue_regulatory_request(struct regulatory_request *request)
 	schedule_work(&reg_work);
 }
 
-/* Core regulatory hint -- happens once during cfg80211_init() */
+/*
+ * Core regulatory hint -- happens during cfg80211_init()
+ * and when we restore regulatory settings.
+ */
 static int regulatory_hint_core(const char *alpha2)
 {
 	struct regulatory_request *request;
 
-	BUG_ON(last_request);
+	kfree(last_request);
+	last_request = NULL;
 
 	request = kzalloc(sizeof(struct regulatory_request),
 			  GFP_KERNEL);
@@ -2107,6 +2138,123 @@ out:
 	mutex_unlock(&reg_mutex);
 }
 
+static void restore_alpha2(char *alpha2, bool reset_user)
+{
+	/* indicates there is no alpha2 to consider for restoration */
+	alpha2[0] = '9';
+	alpha2[1] = '7';
+
+	/* The user setting has precedence over the module parameter */
+	if (is_user_regdom_saved()) {
+		/* Unless we're asked to ignore it and reset it */
+		if (reset_user) {
+			REG_DBG_PRINT("cfg80211: Restoring regulatory settings "
+			       "including user preference\n");
+			user_alpha2[0] = '9';
+			user_alpha2[1] = '7';
+
+			/*
+			 * If we're ignoring user settings, we still need to
+			 * check the module parameter to ensure we put things
+			 * back as they were for a full restore.
+			 */
+			if (!is_world_regdom(ieee80211_regdom)) {
+				REG_DBG_PRINT("cfg80211: Keeping preference on "
+				       "module parameter ieee80211_regdom: %c%c\n",
+				       ieee80211_regdom[0],
+				       ieee80211_regdom[1]);
+				alpha2[0] = ieee80211_regdom[0];
+				alpha2[1] = ieee80211_regdom[1];
+			}
+		} else {
+			REG_DBG_PRINT("cfg80211: Restoring regulatory settings "
+			       "while preserving user preference for: %c%c\n",
+			       user_alpha2[0],
+			       user_alpha2[1]);
+			alpha2[0] = user_alpha2[0];
+			alpha2[1] = user_alpha2[1];
+		}
+	} else if (!is_world_regdom(ieee80211_regdom)) {
+		REG_DBG_PRINT("cfg80211: Keeping preference on "
+		       "module parameter ieee80211_regdom: %c%c\n",
+		       ieee80211_regdom[0],
+		       ieee80211_regdom[1]);
+		alpha2[0] = ieee80211_regdom[0];
+		alpha2[1] = ieee80211_regdom[1];
+	} else
+		REG_DBG_PRINT("cfg80211: Restoring regulatory settings\n");
+}
+
+/*
+ * Restoring regulatory settings involves ingoring any
+ * possibly stale country IE information and user regulatory
+ * settings if so desired, this includes any beacon hints
+ * learned as we could have traveled outside to another country
+ * after disconnection. To restore regulatory settings we do
+ * exactly what we did at bootup:
+ *
+ *   - send a core regulatory hint
+ *   - send a user regulatory hint if applicable
+ *
+ * Device drivers that send a regulatory hint for a specific country
+ * keep their own regulatory domain on wiphy->regd so that does does
+ * not need to be remembered.
+ */
+static void restore_regulatory_settings(bool reset_user)
+{
+	char alpha2[2];
+	struct reg_beacon *reg_beacon, *btmp;
+
+	mutex_lock(&cfg80211_mutex);
+	mutex_lock(&reg_mutex);
+
+	reset_regdomains();
+	restore_alpha2(alpha2, reset_user);
+
+	/* Clear beacon hints */
+	spin_lock_bh(&reg_pending_beacons_lock);
+	if (!list_empty(&reg_pending_beacons)) {
+		list_for_each_entry_safe(reg_beacon, btmp,
+					 &reg_pending_beacons, list) {
+			list_del(&reg_beacon->list);
+			kfree(reg_beacon);
+		}
+	}
+	spin_unlock_bh(&reg_pending_beacons_lock);
+
+	if (!list_empty(&reg_beacon_list)) {
+		list_for_each_entry_safe(reg_beacon, btmp,
+					 &reg_beacon_list, list) {
+			list_del(&reg_beacon->list);
+			kfree(reg_beacon);
+		}
+	}
+
+	/* First restore to the basic regulatory settings */
+	cfg80211_regdomain = cfg80211_world_regdom;
+
+	mutex_unlock(&reg_mutex);
+	mutex_unlock(&cfg80211_mutex);
+
+	regulatory_hint_core(cfg80211_regdomain->alpha2);
+
+	/*
+	 * This restores the ieee80211_regdom module parameter
+	 * preference or the last user requested regulatory
+	 * settings, user regulatory settings takes precedence.
+	 */
+	if (is_an_alpha2(alpha2))
+		regulatory_hint_user(user_alpha2);
+}
+
+
+void regulatory_hint_disconnect(void)
+{
+	REG_DBG_PRINT("cfg80211: All devices are disconnected, going to "
+		      "restore regulatory settings\n");
+	restore_regulatory_settings(false);
+}
+
 static bool freq_is_chan_12_13_14(u16 freq)
 {
 	if (freq == ieee80211_channel_to_frequency(12) ||
@@ -2496,6 +2644,9 @@ int regulatory_init(void)
 
 	cfg80211_regdomain = cfg80211_world_regdom;
 
+	user_alpha2[0] = '9';
+	user_alpha2[1] = '7';
+
 	/* We always try to get an update for the static regdomain */
 	err = regulatory_hint_core(cfg80211_regdomain->alpha2);
 	if (err) {
diff --git a/net/wireless/reg.h b/net/wireless/reg.h
index 3018508226ab..b26224a9f3bc 100644
--- a/net/wireless/reg.h
+++ b/net/wireless/reg.h
@@ -63,4 +63,22 @@ void regulatory_hint_11d(struct wiphy *wiphy,
 			 u8 *country_ie,
 			 u8 country_ie_len);
 
+/**
+ * regulatory_hint_disconnect - informs all devices have been disconneted
+ *
+ * Regulotory rules can be enhanced further upon scanning and upon
+ * connection to an AP. These rules become stale if we disconnect
+ * and go to another country, whether or not we suspend and resume.
+ * If we suspend, go to another country and resume we'll automatically
+ * get disconnected shortly after resuming and things will be reset as well.
+ * This routine is a helper to restore regulatory settings to how they were
+ * prior to our first connect attempt. This includes ignoring country IE and
+ * beacon regulatory hints. The ieee80211_regdom module parameter will always
+ * be respected but if a user had set the regulatory domain that will take
+ * precedence.
+ *
+ * Must be called from process context.
+ */
+void regulatory_hint_disconnect(void);
+
 #endif  /* __NET_WIRELESS_REG_H */
diff --git a/net/wireless/sme.c b/net/wireless/sme.c
index 745c37e7992e..17fde0da1b08 100644
--- a/net/wireless/sme.c
+++ b/net/wireless/sme.c
@@ -34,6 +34,44 @@ struct cfg80211_conn {
 	bool auto_auth, prev_bssid_valid;
 };
 
+bool cfg80211_is_all_idle(void)
+{
+	struct cfg80211_registered_device *rdev;
+	struct wireless_dev *wdev;
+	bool is_all_idle = true;
+
+	mutex_lock(&cfg80211_mutex);
+
+	/*
+	 * All devices must be idle as otherwise if you are actively
+	 * scanning some new beacon hints could be learned and would
+	 * count as new regulatory hints.
+	 */
+	list_for_each_entry(rdev, &cfg80211_rdev_list, list) {
+		cfg80211_lock_rdev(rdev);
+		list_for_each_entry(wdev, &rdev->netdev_list, list) {
+			wdev_lock(wdev);
+			if (wdev->sme_state != CFG80211_SME_IDLE)
+				is_all_idle = false;
+			wdev_unlock(wdev);
+		}
+		cfg80211_unlock_rdev(rdev);
+	}
+
+	mutex_unlock(&cfg80211_mutex);
+
+	return is_all_idle;
+}
+
+static void disconnect_work(struct work_struct *work)
+{
+	if (!cfg80211_is_all_idle())
+		return;
+
+	regulatory_hint_disconnect();
+}
+
+static DECLARE_WORK(cfg80211_disconnect_work, disconnect_work);
 
 static int cfg80211_conn_scan(struct wireless_dev *wdev)
 {
@@ -658,6 +696,8 @@ void __cfg80211_disconnected(struct net_device *dev, const u8 *ie,
 	wireless_send_event(dev, SIOCGIWAP, &wrqu, NULL);
 	wdev->wext.connect.ssid_len = 0;
 #endif
+
+	schedule_work(&cfg80211_disconnect_work);
 }
 
 void cfg80211_disconnected(struct net_device *dev, u16 reason,
-- 
cgit v1.2.2


From 4c82bf8e5689b1dddd9bcec70efc3b70edef1670 Mon Sep 17 00:00:00 2001
From: Pavel Roskin <proski@gnu.org>
Date: Sat, 30 Jan 2010 19:55:09 -0500
Subject: mac80211: reduce stack usage in sta_ht_capa_read()

The maximal size of the "ht_capa" file is 430 bytes.  In most cases,
it's much shorter.  Use a 512 byte long buffer.  1024 bytes is too much
and causes a warning with CONFIG_FRAME_WARN=1024.

Signed-off-by: Pavel Roskin <proski@gnu.org>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/debugfs_sta.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/mac80211/debugfs_sta.c b/net/mac80211/debugfs_sta.c
index 0d4a759ba72c..84865e7ef13b 100644
--- a/net/mac80211/debugfs_sta.c
+++ b/net/mac80211/debugfs_sta.c
@@ -165,7 +165,7 @@ static ssize_t sta_ht_capa_read(struct file *file, char __user *userbuf,
 	if (_cond) \
 			p += scnprintf(p, sizeof(buf)+buf-p, "\t" _str "\n"); \
 	} while (0)
-	char buf[1024], *p = buf;
+	char buf[512], *p = buf;
 	int i;
 	struct sta_info *sta = file->private_data;
 	struct ieee80211_sta_ht_cap *htc = &sta->sta.ht_cap;
-- 
cgit v1.2.2


From e0b20f1c67fc4379fce430ff720969f35e123eed Mon Sep 17 00:00:00 2001
From: Pavel Roskin <proski@gnu.org>
Date: Sat, 30 Jan 2010 19:55:27 -0500
Subject: mac80211: reduce stack usage in sta_agg_status_read()

Use a more compact and readable format for "agg_status" to reduce the
stack frame to less than 1024 bytes.

Signed-off-by: Pavel Roskin <proski@gnu.org>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/debugfs_sta.c | 24 +++++++++++++-----------
 1 file changed, 13 insertions(+), 11 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/debugfs_sta.c b/net/mac80211/debugfs_sta.c
index 84865e7ef13b..d92800bb2d2f 100644
--- a/net/mac80211/debugfs_sta.c
+++ b/net/mac80211/debugfs_sta.c
@@ -120,36 +120,38 @@ STA_OPS(last_seq_ctrl);
 static ssize_t sta_agg_status_read(struct file *file, char __user *userbuf,
 					size_t count, loff_t *ppos)
 {
-	char buf[30 + STA_TID_NUM * 70], *p = buf;
+	char buf[64 + STA_TID_NUM * 40], *p = buf;
 	int i;
 	struct sta_info *sta = file->private_data;
 
 	spin_lock_bh(&sta->lock);
-	p += scnprintf(p, sizeof(buf)+buf-p, "next dialog_token is %#02x\n",
+	p += scnprintf(p, sizeof(buf) + buf - p, "next dialog_token: %#02x\n",
 			sta->ampdu_mlme.dialog_token_allocator + 1);
+	p += scnprintf(p, sizeof(buf) + buf - p,
+		       "TID\t\tRX\tDTKN\tSSN\t\tTX\tDTKN\tSSN\tpending\n");
 	for (i = 0; i < STA_TID_NUM; i++) {
-		p += scnprintf(p, sizeof(buf)+buf-p, "TID %02d:", i);
-		p += scnprintf(p, sizeof(buf)+buf-p, " RX=%x",
+		p += scnprintf(p, sizeof(buf) + buf - p, "%02d", i);
+		p += scnprintf(p, sizeof(buf) + buf - p, "\t\t%x",
 				sta->ampdu_mlme.tid_state_rx[i]);
-		p += scnprintf(p, sizeof(buf)+buf-p, "/DTKN=%#.2x",
+		p += scnprintf(p, sizeof(buf) + buf - p, "\t%#.2x",
 				sta->ampdu_mlme.tid_state_rx[i] ?
 				sta->ampdu_mlme.tid_rx[i]->dialog_token : 0);
-		p += scnprintf(p, sizeof(buf)+buf-p, "/SSN=%#.3x",
+		p += scnprintf(p, sizeof(buf) + buf - p, "\t%#.3x",
 				sta->ampdu_mlme.tid_state_rx[i] ?
 				sta->ampdu_mlme.tid_rx[i]->ssn : 0);
 
-		p += scnprintf(p, sizeof(buf)+buf-p, " TX=%x",
+		p += scnprintf(p, sizeof(buf) + buf - p, "\t\t%x",
 				sta->ampdu_mlme.tid_state_tx[i]);
-		p += scnprintf(p, sizeof(buf)+buf-p, "/DTKN=%#.2x",
+		p += scnprintf(p, sizeof(buf) + buf - p, "\t%#.2x",
 				sta->ampdu_mlme.tid_state_tx[i] ?
 				sta->ampdu_mlme.tid_tx[i]->dialog_token : 0);
-		p += scnprintf(p, sizeof(buf)+buf-p, "/SSN=%#.3x",
+		p += scnprintf(p, sizeof(buf) + buf - p, "\t%#.3x",
 				sta->ampdu_mlme.tid_state_tx[i] ?
 				sta->ampdu_mlme.tid_tx[i]->ssn : 0);
-		p += scnprintf(p, sizeof(buf)+buf-p, "/pending=%03d",
+		p += scnprintf(p, sizeof(buf) + buf - p, "\t%03d",
 				sta->ampdu_mlme.tid_state_tx[i] ?
 				skb_queue_len(&sta->ampdu_mlme.tid_tx[i]->pending) : 0);
-		p += scnprintf(p, sizeof(buf)+buf-p, "\n");
+		p += scnprintf(p, sizeof(buf) + buf - p, "\n");
 	}
 	spin_unlock_bh(&sta->lock);
 
-- 
cgit v1.2.2


From 4754ffd68bc14de8db01451c49bb07adebe1e422 Mon Sep 17 00:00:00 2001
From: Felix Fietkau <nbd@openwrt.org>
Date: Sun, 31 Jan 2010 21:50:12 +0100
Subject: mac80211: fix sta lookup for received action frames on an AP VLAN

When looking for a matching interface, __ieee80211_rx_handle_packet
loops over all active interfaces, looking for matching stations.
Because AP VLAN interfaces are not processed as part of this loop, it
needs to use sta_info_get_bss instead of sta_info_get in order to find
a STA that has been moved to a VLAN.
This fixes issues with aggregation setup/teardown.

Signed-off-by: Felix Fietkau <nbd@openwrt.org>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/rx.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c
index 7e0b3e340389..5709307fcb9b 100644
--- a/net/mac80211/rx.c
+++ b/net/mac80211/rx.c
@@ -2359,7 +2359,7 @@ static void __ieee80211_rx_handle_packet(struct ieee80211_hw *hw,
 				continue;
 			}
 
-			rx.sta = sta_info_get(prev, hdr->addr2);
+			rx.sta = sta_info_get_bss(prev, hdr->addr2);
 
 			rx.flags |= IEEE80211_RX_RA_MATCH;
 			prepares = prepare_for_handlers(prev, &rx, hdr);
@@ -2395,7 +2395,7 @@ next:
 		}
 
 		if (prev) {
-			rx.sta = sta_info_get(prev, hdr->addr2);
+			rx.sta = sta_info_get_bss(prev, hdr->addr2);
 
 			rx.flags |= IEEE80211_RX_RA_MATCH;
 			prepares = prepare_for_handlers(prev, &rx, hdr);
-- 
cgit v1.2.2


From 17ad353b8d9843731258b5d23556667b764939e9 Mon Sep 17 00:00:00 2001
From: Felix Fietkau <nbd@openwrt.org>
Date: Sun, 31 Jan 2010 21:56:25 +0100
Subject: mac80211: fix monitor mode tx radiotap header handling

When an injected frame gets buffered for a powersave STA or filtered
and retransmitted, mac80211 attempts to parse the radiotap header
again, which doesn't work because it's gone at that point.
This patch adds a new flag for checking the availability of a radiotap
header, so that it only attempts to parse it once, reusing the tx info
on the next call to ieee80211_tx().
This fixes severe issues with rekeying in AP mode.

Signed-off-by: Felix Fietkau <nbd@openwrt.org>
Cc: stable@kernel.org
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/tx.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
index 14c70452c245..e7b1cdc7651b 100644
--- a/net/mac80211/tx.c
+++ b/net/mac80211/tx.c
@@ -1108,7 +1108,7 @@ ieee80211_tx_prepare(struct ieee80211_sub_if_data *sdata,
 	tx->flags |= IEEE80211_TX_FRAGMENTED;
 
 	/* process and remove the injection radiotap header */
-	if (unlikely(info->flags & IEEE80211_TX_CTL_INJECTED)) {
+	if (unlikely(info->flags & IEEE80211_TX_INTFL_HAS_RADIOTAP)) {
 		if (!__ieee80211_parse_tx_radiotap(tx, skb))
 			return TX_DROP;
 
@@ -1117,6 +1117,7 @@ ieee80211_tx_prepare(struct ieee80211_sub_if_data *sdata,
 		 * the radiotap header that was present and pre-filled
 		 * 'tx' with tx control information.
 		 */
+		info->flags &= ~IEEE80211_TX_INTFL_HAS_RADIOTAP;
 	}
 
 	/*
@@ -1499,7 +1500,8 @@ static void ieee80211_xmit(struct ieee80211_sub_if_data *sdata,
 		int hdrlen;
 		u16 len_rthdr;
 
-		info->flags |= IEEE80211_TX_CTL_INJECTED;
+		info->flags |= IEEE80211_TX_CTL_INJECTED |
+			       IEEE80211_TX_INTFL_HAS_RADIOTAP;
 
 		len_rthdr = ieee80211_get_radiotap_len(skb->data);
 		hdr = (struct ieee80211_hdr *)(skb->data + len_rthdr);
-- 
cgit v1.2.2


From b4d57adb727ec7c34020390eeb0eeb9e0a2959bc Mon Sep 17 00:00:00 2001
From: Felix Fietkau <nbd@openwrt.org>
Date: Sun, 31 Jan 2010 23:25:24 +0100
Subject: mac80211: fix sta lookup with AP VLAN interfaces and injected frames

When injecting frames, mac80211 currently looks for the first AP
interface that matches the source address of the injected frame.
This breaks when such a frame is directed at a STA that has been moved
to a VLAN. This patch fixes it by using sta_info_get_bss instead of
sta_info_get, which also finds stations belonging to a VLAN interface
of the same BSS as the AP interface.

Signed-off-by: Felix Fietkau <nbd@openwrt.org>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/tx.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'net')

diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
index e7b1cdc7651b..85e382aa894e 100644
--- a/net/mac80211/tx.c
+++ b/net/mac80211/tx.c
@@ -1133,6 +1133,8 @@ ieee80211_tx_prepare(struct ieee80211_sub_if_data *sdata,
 		tx->sta = rcu_dereference(sdata->u.vlan.sta);
 		if (!tx->sta && sdata->dev->ieee80211_ptr->use_4addr)
 			return TX_DROP;
+	} else if (info->flags & IEEE80211_TX_CTL_INJECTED) {
+		tx->sta = sta_info_get_bss(sdata, hdr->addr1);
 	}
 	if (!tx->sta)
 		tx->sta = sta_info_get(sdata, hdr->addr1);
-- 
cgit v1.2.2


From 3c384053ce4cb1949f5575c28e30e6ceea8cb39b Mon Sep 17 00:00:00 2001
From: Vasanthakumar <vasanth@atheros.com>
Date: Mon, 1 Feb 2010 18:49:07 +0530
Subject: mac80211: Don't call rate control when HW handles it

Rate control should not be called to update the tx status
when HW does the RC.

Signed-off-by: Vasanthakumar <vasanth@atheros.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/rate.h | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'net')

diff --git a/net/mac80211/rate.h b/net/mac80211/rate.h
index 669dddd40521..998cf7a935b6 100644
--- a/net/mac80211/rate.h
+++ b/net/mac80211/rate.h
@@ -44,6 +44,10 @@ static inline void rate_control_tx_status(struct ieee80211_local *local,
 	struct rate_control_ref *ref = local->rate_ctrl;
 	struct ieee80211_sta *ista = &sta->sta;
 	void *priv_sta = sta->rate_ctrl_priv;
+
+	if (!ref)
+		return;
+
 	ref->ops->tx_status(ref->priv, sband, ista, priv_sta, skb);
 }
 
-- 
cgit v1.2.2


From 10a199394b8f9b4c4e0be6e14a61109a7d891b1b Mon Sep 17 00:00:00 2001
From: Simon Arlott <simon@fire.lp0.eu>
Date: Tue, 2 Feb 2010 15:33:38 +0100
Subject: netfilter: xt_TCPMSS: SYN packets are allowed to contain data

The TCPMSS target is dropping SYN packets where:
  1) There is data, or
  2) The data offset makes the TCP header larger than the packet.

Both of these result in an error level printk. This printk has been
removed.

This change avoids dropping SYN packets containing data. If there
is also no MSS option (as well as data), one will not be added
because of possible complications due to the increased packet size.

Signed-off-by: Simon Arlott <simon@fire.lp0.eu>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 net/netfilter/xt_TCPMSS.c | 18 ++++++++----------
 1 file changed, 8 insertions(+), 10 deletions(-)

(limited to 'net')

diff --git a/net/netfilter/xt_TCPMSS.c b/net/netfilter/xt_TCPMSS.c
index eda64c1cb1e5..6f21b4377dbb 100644
--- a/net/netfilter/xt_TCPMSS.c
+++ b/net/netfilter/xt_TCPMSS.c
@@ -60,17 +60,9 @@ tcpmss_mangle_packet(struct sk_buff *skb,
 	tcplen = skb->len - tcphoff;
 	tcph = (struct tcphdr *)(skb_network_header(skb) + tcphoff);
 
-	/* Since it passed flags test in tcp match, we know it is is
-	   not a fragment, and has data >= tcp header length.  SYN
-	   packets should not contain data: if they did, then we risk
-	   running over MTU, sending Frag Needed and breaking things
-	   badly. --RR */
-	if (tcplen != tcph->doff*4) {
-		if (net_ratelimit())
-			printk(KERN_ERR "xt_TCPMSS: bad length (%u bytes)\n",
-			       skb->len);
+	/* Header cannot be larger than the packet */
+	if (tcplen < tcph->doff*4)
 		return -1;
-	}
 
 	if (info->mss == XT_TCPMSS_CLAMP_PMTU) {
 		if (dst_mtu(skb_dst(skb)) <= minlen) {
@@ -115,6 +107,12 @@ tcpmss_mangle_packet(struct sk_buff *skb,
 		}
 	}
 
+	/* There is data after the header so the option can't be added
+	   without moving it, and doing so may make the SYN packet
+	   itself too large. Accept the packet unmodified instead. */
+	if (tcplen > tcph->doff*4)
+		return 0;
+
 	/*
 	 * MSS Option not found ?! add it..
 	 */
-- 
cgit v1.2.2


From 8b64056dacf6ec81986d63dff96fca039fe95f6e Mon Sep 17 00:00:00 2001
From: Oliver Hartkopp <oliver@hartkopp.net>
Date: Tue, 2 Feb 2010 07:21:34 -0800
Subject: can: deny filterlist access on non-CAN interfaces

In commit 20dd3850bcf860561496827b711fa10fecf6e787 "can: Speed up CAN frame
receiption by using ml_priv" the formerly used hlist of receiver lists for
each CAN netdevice has been replaced.

The hlist content ensured only CAN netdevices to be accessed by the
can_rx_(un)register() functions which accidently dropped away together with
the hlist receiver implementation.

This patch re-introduces the check for CAN netdevices in can_rx_(un)register().

Signed-off-by: Oliver Hartkopp <oliver@hartkopp.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/can/af_can.c | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'net')

diff --git a/net/can/af_can.c b/net/can/af_can.c
index bc18b084ffdb..702be5a2c956 100644
--- a/net/can/af_can.c
+++ b/net/can/af_can.c
@@ -415,6 +415,9 @@ int can_rx_register(struct net_device *dev, canid_t can_id, canid_t mask,
 
 	/* insert new receiver  (dev,canid,mask) -> (func,data) */
 
+	if (dev && dev->type != ARPHRD_CAN)
+		return -ENODEV;
+
 	r = kmem_cache_alloc(rcv_cache, GFP_KERNEL);
 	if (!r)
 		return -ENOMEM;
@@ -478,6 +481,9 @@ void can_rx_unregister(struct net_device *dev, canid_t can_id, canid_t mask,
 	struct hlist_node *next;
 	struct dev_rcv_lists *d;
 
+	if (dev && dev->type != ARPHRD_CAN)
+		return;
+
 	spin_lock(&can_rcvlists_lock);
 
 	d = find_dev_rcv_lists(dev);
-- 
cgit v1.2.2


From c85bb41e93184bf5494dde6d8fe5a81b564c84c8 Mon Sep 17 00:00:00 2001
From: Flavio Leitner <fleitner@redhat.com>
Date: Tue, 2 Feb 2010 07:32:29 -0800
Subject: igmp: fix ip_mc_sf_allow race [v5]

Almost all igmp functions accessing inet->mc_list are protected by
rtnl_lock(), but there is one exception which is ip_mc_sf_allow(),
so there is a chance of either ip_mc_drop_socket or ip_mc_leave_group
remove an entry while ip_mc_sf_allow is running causing a crash.

Signed-off-by: Flavio Leitner <fleitner@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/igmp.c | 83 ++++++++++++++++++++++++++++++++++++++++++---------------
 1 file changed, 62 insertions(+), 21 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c
index 8f5468393f01..d28363998743 100644
--- a/net/ipv4/igmp.c
+++ b/net/ipv4/igmp.c
@@ -1799,7 +1799,7 @@ int ip_mc_join_group(struct sock *sk , struct ip_mreqn *imr)
 	iml->next = inet->mc_list;
 	iml->sflist = NULL;
 	iml->sfmode = MCAST_EXCLUDE;
-	inet->mc_list = iml;
+	rcu_assign_pointer(inet->mc_list, iml);
 	ip_mc_inc_group(in_dev, addr);
 	err = 0;
 done:
@@ -1807,24 +1807,46 @@ done:
 	return err;
 }
 
+static void ip_sf_socklist_reclaim(struct rcu_head *rp)
+{
+	struct ip_sf_socklist *psf;
+
+	psf = container_of(rp, struct ip_sf_socklist, rcu);
+	/* sk_omem_alloc should have been decreased by the caller*/
+	kfree(psf);
+}
+
 static int ip_mc_leave_src(struct sock *sk, struct ip_mc_socklist *iml,
 			   struct in_device *in_dev)
 {
+	struct ip_sf_socklist *psf = iml->sflist;
 	int err;
 
-	if (iml->sflist == NULL) {
+	if (psf == NULL) {
 		/* any-source empty exclude case */
 		return ip_mc_del_src(in_dev, &iml->multi.imr_multiaddr.s_addr,
 			iml->sfmode, 0, NULL, 0);
 	}
 	err = ip_mc_del_src(in_dev, &iml->multi.imr_multiaddr.s_addr,
-			iml->sfmode, iml->sflist->sl_count,
-			iml->sflist->sl_addr, 0);
-	sock_kfree_s(sk, iml->sflist, IP_SFLSIZE(iml->sflist->sl_max));
-	iml->sflist = NULL;
+			iml->sfmode, psf->sl_count, psf->sl_addr, 0);
+	rcu_assign_pointer(iml->sflist, NULL);
+	/* decrease mem now to avoid the memleak warning */
+	atomic_sub(IP_SFLSIZE(psf->sl_max), &sk->sk_omem_alloc);
+	call_rcu(&psf->rcu, ip_sf_socklist_reclaim);
 	return err;
 }
 
+
+static void ip_mc_socklist_reclaim(struct rcu_head *rp)
+{
+	struct ip_mc_socklist *iml;
+
+	iml = container_of(rp, struct ip_mc_socklist, rcu);
+	/* sk_omem_alloc should have been decreased by the caller*/
+	kfree(iml);
+}
+
+
 /*
  *	Ask a socket to leave a group.
  */
@@ -1854,12 +1876,14 @@ int ip_mc_leave_group(struct sock *sk, struct ip_mreqn *imr)
 
 		(void) ip_mc_leave_src(sk, iml, in_dev);
 
-		*imlp = iml->next;
+		rcu_assign_pointer(*imlp, iml->next);
 
 		if (in_dev)
 			ip_mc_dec_group(in_dev, group);
 		rtnl_unlock();
-		sock_kfree_s(sk, iml, sizeof(*iml));
+		/* decrease mem now to avoid the memleak warning */
+		atomic_sub(sizeof(*iml), &sk->sk_omem_alloc);
+		call_rcu(&iml->rcu, ip_mc_socklist_reclaim);
 		return 0;
 	}
 	if (!in_dev)
@@ -1974,9 +1998,12 @@ int ip_mc_source(int add, int omode, struct sock *sk, struct
 		if (psl) {
 			for (i=0; i<psl->sl_count; i++)
 				newpsl->sl_addr[i] = psl->sl_addr[i];
-			sock_kfree_s(sk, psl, IP_SFLSIZE(psl->sl_max));
+			/* decrease mem now to avoid the memleak warning */
+			atomic_sub(IP_SFLSIZE(psl->sl_max), &sk->sk_omem_alloc);
+			call_rcu(&psl->rcu, ip_sf_socklist_reclaim);
 		}
-		pmc->sflist = psl = newpsl;
+		rcu_assign_pointer(pmc->sflist, newpsl);
+		psl = newpsl;
 	}
 	rv = 1;	/* > 0 for insert logic below if sl_count is 0 */
 	for (i=0; i<psl->sl_count; i++) {
@@ -2072,11 +2099,13 @@ int ip_mc_msfilter(struct sock *sk, struct ip_msfilter *msf, int ifindex)
 	if (psl) {
 		(void) ip_mc_del_src(in_dev, &msf->imsf_multiaddr, pmc->sfmode,
 			psl->sl_count, psl->sl_addr, 0);
-		sock_kfree_s(sk, psl, IP_SFLSIZE(psl->sl_max));
+		/* decrease mem now to avoid the memleak warning */
+		atomic_sub(IP_SFLSIZE(psl->sl_max), &sk->sk_omem_alloc);
+		call_rcu(&psl->rcu, ip_sf_socklist_reclaim);
 	} else
 		(void) ip_mc_del_src(in_dev, &msf->imsf_multiaddr, pmc->sfmode,
 			0, NULL, 0);
-	pmc->sflist = newpsl;
+	rcu_assign_pointer(pmc->sflist, newpsl);
 	pmc->sfmode = msf->imsf_fmode;
 	err = 0;
 done:
@@ -2209,30 +2238,40 @@ int ip_mc_sf_allow(struct sock *sk, __be32 loc_addr, __be32 rmt_addr, int dif)
 	struct ip_mc_socklist *pmc;
 	struct ip_sf_socklist *psl;
 	int i;
+	int ret;
 
+	ret = 1;
 	if (!ipv4_is_multicast(loc_addr))
-		return 1;
+		goto out;
 
-	for (pmc=inet->mc_list; pmc; pmc=pmc->next) {
+	rcu_read_lock();
+	for (pmc=rcu_dereference(inet->mc_list); pmc; pmc=rcu_dereference(pmc->next)) {
 		if (pmc->multi.imr_multiaddr.s_addr == loc_addr &&
 		    pmc->multi.imr_ifindex == dif)
 			break;
 	}
+	ret = inet->mc_all;
 	if (!pmc)
-		return inet->mc_all;
+		goto unlock;
 	psl = pmc->sflist;
+	ret = (pmc->sfmode == MCAST_EXCLUDE);
 	if (!psl)
-		return pmc->sfmode == MCAST_EXCLUDE;
+		goto unlock;
 
 	for (i=0; i<psl->sl_count; i++) {
 		if (psl->sl_addr[i] == rmt_addr)
 			break;
 	}
+	ret = 0;
 	if (pmc->sfmode == MCAST_INCLUDE && i >= psl->sl_count)
-		return 0;
+		goto unlock;
 	if (pmc->sfmode == MCAST_EXCLUDE && i < psl->sl_count)
-		return 0;
-	return 1;
+		goto unlock;
+	ret = 1;
+unlock:
+	rcu_read_unlock();
+out:
+	return ret;
 }
 
 /*
@@ -2251,7 +2290,7 @@ void ip_mc_drop_socket(struct sock *sk)
 	rtnl_lock();
 	while ((iml = inet->mc_list) != NULL) {
 		struct in_device *in_dev;
-		inet->mc_list = iml->next;
+		rcu_assign_pointer(inet->mc_list, iml->next);
 
 		in_dev = inetdev_by_index(net, iml->multi.imr_ifindex);
 		(void) ip_mc_leave_src(sk, iml, in_dev);
@@ -2259,7 +2298,9 @@ void ip_mc_drop_socket(struct sock *sk)
 			ip_mc_dec_group(in_dev, iml->multi.imr_multiaddr.s_addr);
 			in_dev_put(in_dev);
 		}
-		sock_kfree_s(sk, iml, sizeof(*iml));
+		/* decrease mem now to avoid the memleak warning */
+		atomic_sub(sizeof(*iml), &sk->sk_omem_alloc);
+		call_rcu(&iml->rcu, ip_mc_socklist_reclaim);
 	}
 	rtnl_unlock();
 }
-- 
cgit v1.2.2


From d1c9ae6d1e7b95cedc8e39e8949e795379a0669e Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Tue, 2 Feb 2010 11:46:50 -0800
Subject: ipv4: ip_fragment: fix unbalanced rcu_read_unlock()

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/ip_fragment.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c
index 9f41bd311754..b59430bc041c 100644
--- a/net/ipv4/ip_fragment.c
+++ b/net/ipv4/ip_fragment.c
@@ -234,10 +234,9 @@ static void ip_expire(unsigned long arg)
 
 		/* Send an ICMP "Fragment Reassembly Timeout" message. */
 		icmp_send(head, ICMP_TIME_EXCEEDED, ICMP_EXC_FRAGTIME, 0);
-	}
-
 out_rcu_unlock:
-	rcu_read_unlock();
+		rcu_read_unlock();
+	}
 out:
 	spin_unlock(&qp->q.lock);
 	ipq_put(qp);
-- 
cgit v1.2.2


From d0833a6a2dbc169aba3abcc0904e5aea348bb4d3 Mon Sep 17 00:00:00 2001
From: Andriy Tkachuk <andrit@ukr.net>
Date: Tue, 2 Feb 2010 15:58:53 +0200
Subject: lib80211: Cosmetics - make room for MIC/CRC near the actual
 calculation

Signed-off-by: Andriy V. Tkachuk <andrit@ukr.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/wireless/lib80211_crypt_ccmp.c | 2 +-
 net/wireless/lib80211_crypt_tkip.c | 3 +--
 2 files changed, 2 insertions(+), 3 deletions(-)

(limited to 'net')

diff --git a/net/wireless/lib80211_crypt_ccmp.c b/net/wireless/lib80211_crypt_ccmp.c
index 2301dc1edc4c..b7fa31d5fd13 100644
--- a/net/wireless/lib80211_crypt_ccmp.c
+++ b/net/wireless/lib80211_crypt_ccmp.c
@@ -237,7 +237,6 @@ static int lib80211_ccmp_encrypt(struct sk_buff *skb, int hdr_len, void *priv)
 		return -1;
 
 	pos = skb->data + hdr_len + CCMP_HDR_LEN;
-	mic = skb_put(skb, CCMP_MIC_LEN);
 	hdr = (struct ieee80211_hdr *)skb->data;
 	ccmp_init_blocks(key->tfm, hdr, key->tx_pn, data_len, b0, b, s0);
 
@@ -257,6 +256,7 @@ static int lib80211_ccmp_encrypt(struct sk_buff *skb, int hdr_len, void *priv)
 		pos += len;
 	}
 
+	mic = skb_put(skb, CCMP_MIC_LEN);
 	for (i = 0; i < CCMP_MIC_LEN; i++)
 		mic[i] = b[i] ^ s0[i];
 
diff --git a/net/wireless/lib80211_crypt_tkip.c b/net/wireless/lib80211_crypt_tkip.c
index c36287399d7e..c6a3cae1f85e 100644
--- a/net/wireless/lib80211_crypt_tkip.c
+++ b/net/wireless/lib80211_crypt_tkip.c
@@ -384,9 +384,8 @@ static int lib80211_tkip_encrypt(struct sk_buff *skb, int hdr_len, void *priv)
 	if ((lib80211_tkip_hdr(skb, hdr_len, rc4key, 16, priv)) < 0)
 		return -1;
 
-	icv = skb_put(skb, 4);
-
 	crc = ~crc32_le(~0, pos, len);
+	icv = skb_put(skb, 4);
 	icv[0] = crc;
 	icv[1] = crc >> 8;
 	icv[2] = crc >> 16;
-- 
cgit v1.2.2


From 299af9d3db0fd3a4994e5e66717ecd276bdd60da Mon Sep 17 00:00:00 2001
From: Andriy Tkachuk <andrit@ukr.net>
Date: Tue, 2 Feb 2010 16:33:53 +0200
Subject: lib80211: Introduce TKIP_HDR_LEN define for code clarity

Introduce TKIP_HDR_LEN define for code clarity (in the same way as
CCMP_HDR_LEN).

Also odd len variable (not used) dropped from lib80211_tkip_hdr().

Signed-off-by: Andriy V. Tkachuk <andrit@ukr.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/wireless/lib80211_crypt_tkip.c | 20 ++++++++++----------
 1 file changed, 10 insertions(+), 10 deletions(-)

(limited to 'net')

diff --git a/net/wireless/lib80211_crypt_tkip.c b/net/wireless/lib80211_crypt_tkip.c
index c6a3cae1f85e..8cbdb32ff316 100644
--- a/net/wireless/lib80211_crypt_tkip.c
+++ b/net/wireless/lib80211_crypt_tkip.c
@@ -36,6 +36,8 @@ MODULE_AUTHOR("Jouni Malinen");
 MODULE_DESCRIPTION("lib80211 crypt: TKIP");
 MODULE_LICENSE("GPL");
 
+#define TKIP_HDR_LEN 8
+
 struct lib80211_tkip_data {
 #define TKIP_KEY_LEN 32
 	u8 key[TKIP_KEY_LEN];
@@ -314,13 +316,12 @@ static int lib80211_tkip_hdr(struct sk_buff *skb, int hdr_len,
 			      u8 * rc4key, int keylen, void *priv)
 {
 	struct lib80211_tkip_data *tkey = priv;
-	int len;
 	u8 *pos;
 	struct ieee80211_hdr *hdr;
 
 	hdr = (struct ieee80211_hdr *)skb->data;
 
-	if (skb_headroom(skb) < 8 || skb->len < hdr_len)
+	if (skb_headroom(skb) < TKIP_HDR_LEN || skb->len < hdr_len)
 		return -1;
 
 	if (rc4key == NULL || keylen < 16)
@@ -333,9 +334,8 @@ static int lib80211_tkip_hdr(struct sk_buff *skb, int hdr_len,
 	}
 	tkip_mixing_phase2(rc4key, tkey->key, tkey->tx_ttak, tkey->tx_iv16);
 
-	len = skb->len - hdr_len;
-	pos = skb_push(skb, 8);
-	memmove(pos, pos + 8, hdr_len);
+	pos = skb_push(skb, TKIP_HDR_LEN);
+	memmove(pos, pos + TKIP_HDR_LEN, hdr_len);
 	pos += hdr_len;
 
 	*pos++ = *rc4key;
@@ -353,7 +353,7 @@ static int lib80211_tkip_hdr(struct sk_buff *skb, int hdr_len,
 		tkey->tx_iv32++;
 	}
 
-	return 8;
+	return TKIP_HDR_LEN;
 }
 
 static int lib80211_tkip_encrypt(struct sk_buff *skb, int hdr_len, void *priv)
@@ -433,7 +433,7 @@ static int lib80211_tkip_decrypt(struct sk_buff *skb, int hdr_len, void *priv)
 		return -1;
 	}
 
-	if (skb->len < hdr_len + 8 + 4)
+	if (skb->len < hdr_len + TKIP_HDR_LEN + 4)
 		return -1;
 
 	pos = skb->data + hdr_len;
@@ -461,7 +461,7 @@ static int lib80211_tkip_decrypt(struct sk_buff *skb, int hdr_len, void *priv)
 	}
 	iv16 = (pos[0] << 8) | pos[2];
 	iv32 = pos[4] | (pos[5] << 8) | (pos[6] << 16) | (pos[7] << 24);
-	pos += 8;
+	pos += TKIP_HDR_LEN;
 
 	if (tkip_replay_check(iv32, iv16, tkey->rx_iv32, tkey->rx_iv16)) {
 #ifdef CONFIG_LIB80211_DEBUG
@@ -522,8 +522,8 @@ static int lib80211_tkip_decrypt(struct sk_buff *skb, int hdr_len, void *priv)
 	tkey->rx_iv16_new = iv16;
 
 	/* Remove IV and ICV */
-	memmove(skb->data + 8, skb->data, hdr_len);
-	skb_pull(skb, 8);
+	memmove(skb->data + TKIP_HDR_LEN, skb->data, hdr_len);
+	skb_pull(skb, TKIP_HDR_LEN);
 	skb_trim(skb, skb->len - 4);
 
 	return keyidx;
-- 
cgit v1.2.2


From 28aecb9d7728dc26bf03ce7925fe622023a83a2a Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Fri, 29 Jan 2010 04:05:52 +0000
Subject: xfrm: avoid spinlock in get_acqseq()

Use atomic_inc_return() in get_acqseq() to avoid taking a spinlock

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/key/af_key.c | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

(limited to 'net')

diff --git a/net/key/af_key.c b/net/key/af_key.c
index 4744b1f6372f..e2aacf0ba013 100644
--- a/net/key/af_key.c
+++ b/net/key/af_key.c
@@ -3019,12 +3019,11 @@ static int pfkey_send_policy_notify(struct xfrm_policy *xp, int dir, struct km_e
 static u32 get_acqseq(void)
 {
 	u32 res;
-	static u32 acqseq;
-	static DEFINE_SPINLOCK(acqseq_lock);
+	static atomic_t acqseq;
 
-	spin_lock_bh(&acqseq_lock);
-	res = (++acqseq ? : ++acqseq);
-	spin_unlock_bh(&acqseq_lock);
+	do {
+		res = atomic_inc_return(&acqseq);
+	} while (!res);
 	return res;
 }
 
-- 
cgit v1.2.2


From 2eff25c18c3d332d3c4dd98f2ac9b7114e9771b0 Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Wed, 3 Feb 2010 13:24:54 +0100
Subject: netfilter: xt_hashlimit: fix race condition and simplify locking

As noticed by Shin Hong <hongshin@gmail.com>, there is a race between
htable_find_get() and htable_put():

htable_put():				htable_find_get():

					spin_lock_bh(&hashlimit_lock);
					<search entry>
atomic_dec_and_test(&hinfo->use)
					atomic_inc(&hinfo->use)
					spin_unlock_bh(&hashlimit_lock)
					return hinfo;
spin_lock_bh(&hashlimit_lock);
hlist_del(&hinfo->node);
spin_unlock_bh(&hashlimit_lock);
htable_destroy(hinfo);

The entire locking concept is overly complicated, tables are only
created/referenced and released in process context, so a single
mutex works just fine. Remove the hashinfo_spinlock and atomic
reference count and use the mutex to protect table lookups/creation
and reference count changes.

Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 net/netfilter/xt_hashlimit.c | 52 +++++++++++++++-----------------------------
 1 file changed, 18 insertions(+), 34 deletions(-)

(limited to 'net')

diff --git a/net/netfilter/xt_hashlimit.c b/net/netfilter/xt_hashlimit.c
index fb7fcb773a3f..017c95966aa8 100644
--- a/net/netfilter/xt_hashlimit.c
+++ b/net/netfilter/xt_hashlimit.c
@@ -90,7 +90,7 @@ struct dsthash_ent {
 
 struct xt_hashlimit_htable {
 	struct hlist_node node;		/* global list of all htables */
-	atomic_t use;
+	int use;
 	u_int8_t family;
 	bool rnd_initialized;
 
@@ -109,8 +109,7 @@ struct xt_hashlimit_htable {
 	struct hlist_head hash[0];	/* hashtable itself */
 };
 
-static DEFINE_SPINLOCK(hashlimit_lock);	/* protects htables list */
-static DEFINE_MUTEX(hlimit_mutex);	/* additional checkentry protection */
+static DEFINE_MUTEX(hashlimit_mutex);	/* protects htables list */
 static struct kmem_cache *hashlimit_cachep __read_mostly;
 
 static inline bool dst_cmp(const struct dsthash_ent *ent,
@@ -244,7 +243,7 @@ static int htable_create_v0(struct net *net, struct xt_hashlimit_info *minfo, u_
 	for (i = 0; i < hinfo->cfg.size; i++)
 		INIT_HLIST_HEAD(&hinfo->hash[i]);
 
-	atomic_set(&hinfo->use, 1);
+	hinfo->use = 1;
 	hinfo->count = 0;
 	hinfo->family = family;
 	hinfo->rnd_initialized = false;
@@ -263,9 +262,9 @@ static int htable_create_v0(struct net *net, struct xt_hashlimit_info *minfo, u_
 	hinfo->timer.expires = jiffies + msecs_to_jiffies(hinfo->cfg.gc_interval);
 	add_timer(&hinfo->timer);
 
-	spin_lock_bh(&hashlimit_lock);
+	mutex_lock(&hashlimit_mutex);
 	hlist_add_head(&hinfo->node, &hashlimit_net->htables);
-	spin_unlock_bh(&hashlimit_lock);
+	mutex_unlock(&hashlimit_mutex);
 
 	return 0;
 }
@@ -308,7 +307,7 @@ static int htable_create(struct net *net, struct xt_hashlimit_mtinfo1 *minfo,
 	for (i = 0; i < hinfo->cfg.size; i++)
 		INIT_HLIST_HEAD(&hinfo->hash[i]);
 
-	atomic_set(&hinfo->use, 1);
+	hinfo->use = 1;
 	hinfo->count = 0;
 	hinfo->family = family;
 	hinfo->rnd_initialized = false;
@@ -328,9 +327,9 @@ static int htable_create(struct net *net, struct xt_hashlimit_mtinfo1 *minfo,
 	hinfo->timer.expires = jiffies + msecs_to_jiffies(hinfo->cfg.gc_interval);
 	add_timer(&hinfo->timer);
 
-	spin_lock_bh(&hashlimit_lock);
+	mutex_lock(&hashlimit_mutex);
 	hlist_add_head(&hinfo->node, &hashlimit_net->htables);
-	spin_unlock_bh(&hashlimit_lock);
+	mutex_unlock(&hashlimit_mutex);
 
 	return 0;
 }
@@ -402,27 +401,24 @@ static struct xt_hashlimit_htable *htable_find_get(struct net *net,
 	struct xt_hashlimit_htable *hinfo;
 	struct hlist_node *pos;
 
-	spin_lock_bh(&hashlimit_lock);
 	hlist_for_each_entry(hinfo, pos, &hashlimit_net->htables, node) {
 		if (!strcmp(name, hinfo->pde->name) &&
 		    hinfo->family == family) {
-			atomic_inc(&hinfo->use);
-			spin_unlock_bh(&hashlimit_lock);
+			hinfo->use++;
 			return hinfo;
 		}
 	}
-	spin_unlock_bh(&hashlimit_lock);
 	return NULL;
 }
 
 static void htable_put(struct xt_hashlimit_htable *hinfo)
 {
-	if (atomic_dec_and_test(&hinfo->use)) {
-		spin_lock_bh(&hashlimit_lock);
+	mutex_lock(&hashlimit_mutex);
+	if (--hinfo->use == 0) {
 		hlist_del(&hinfo->node);
-		spin_unlock_bh(&hashlimit_lock);
 		htable_destroy(hinfo);
 	}
+	mutex_unlock(&hashlimit_mutex);
 }
 
 /* The algorithm used is the Simple Token Bucket Filter (TBF)
@@ -710,19 +706,13 @@ static bool hashlimit_mt_check_v0(const struct xt_mtchk_param *par)
 	if (r->name[sizeof(r->name) - 1] != '\0')
 		return false;
 
-	/* This is the best we've got: We cannot release and re-grab lock,
-	 * since checkentry() is called before x_tables.c grabs xt_mutex.
-	 * We also cannot grab the hashtable spinlock, since htable_create will
-	 * call vmalloc, and that can sleep.  And we cannot just re-search
-	 * the list of htable's in htable_create(), since then we would
-	 * create duplicate proc files. -HW */
-	mutex_lock(&hlimit_mutex);
+	mutex_lock(&hashlimit_mutex);
 	r->hinfo = htable_find_get(net, r->name, par->match->family);
 	if (!r->hinfo && htable_create_v0(net, r, par->match->family) != 0) {
-		mutex_unlock(&hlimit_mutex);
+		mutex_unlock(&hashlimit_mutex);
 		return false;
 	}
-	mutex_unlock(&hlimit_mutex);
+	mutex_unlock(&hashlimit_mutex);
 
 	return true;
 }
@@ -752,19 +742,13 @@ static bool hashlimit_mt_check(const struct xt_mtchk_param *par)
 			return false;
 	}
 
-	/* This is the best we've got: We cannot release and re-grab lock,
-	 * since checkentry() is called before x_tables.c grabs xt_mutex.
-	 * We also cannot grab the hashtable spinlock, since htable_create will
-	 * call vmalloc, and that can sleep.  And we cannot just re-search
-	 * the list of htable's in htable_create(), since then we would
-	 * create duplicate proc files. -HW */
-	mutex_lock(&hlimit_mutex);
+	mutex_lock(&hashlimit_mutex);
 	info->hinfo = htable_find_get(net, info->name, par->match->family);
 	if (!info->hinfo && htable_create(net, info, par->match->family) != 0) {
-		mutex_unlock(&hlimit_mutex);
+		mutex_unlock(&hashlimit_mutex);
 		return false;
 	}
-	mutex_unlock(&hlimit_mutex);
+	mutex_unlock(&hashlimit_mutex);
 	return true;
 }
 
-- 
cgit v1.2.2


From 794e68716bab578ae8f8912dc934496d7c7abc90 Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Wed, 3 Feb 2010 13:41:29 +0100
Subject: netfilter: ctnetlink: only assign helpers for matching protocols

Make sure not to assign a helper for a different network or transport
layer protocol to a connection.

Additionally change expectation deletion by helper to compare the name
directly - there might be multiple helper registrations using the same
name, currently one of them is chosen in an unpredictable manner and
only those expectations are removed.

Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 net/netfilter/nf_conntrack_helper.c  |  8 +++++---
 net/netfilter/nf_conntrack_netlink.c | 23 +++++++++++------------
 2 files changed, 16 insertions(+), 15 deletions(-)

(limited to 'net')

diff --git a/net/netfilter/nf_conntrack_helper.c b/net/netfilter/nf_conntrack_helper.c
index 65c2a7bc3afc..c0e461f466ae 100644
--- a/net/netfilter/nf_conntrack_helper.c
+++ b/net/netfilter/nf_conntrack_helper.c
@@ -65,7 +65,7 @@ __nf_ct_helper_find(const struct nf_conntrack_tuple *tuple)
 }
 
 struct nf_conntrack_helper *
-__nf_conntrack_helper_find_byname(const char *name)
+__nf_conntrack_helper_find(const char *name, u16 l3num, u8 protonum)
 {
 	struct nf_conntrack_helper *h;
 	struct hlist_node *n;
@@ -73,13 +73,15 @@ __nf_conntrack_helper_find_byname(const char *name)
 
 	for (i = 0; i < nf_ct_helper_hsize; i++) {
 		hlist_for_each_entry_rcu(h, n, &nf_ct_helper_hash[i], hnode) {
-			if (!strcmp(h->name, name))
+			if (!strcmp(h->name, name) &&
+			    h->tuple.src.l3num == l3num &&
+			    h->tuple.dst.protonum == protonum)
 				return h;
 		}
 	}
 	return NULL;
 }
-EXPORT_SYMBOL_GPL(__nf_conntrack_helper_find_byname);
+EXPORT_SYMBOL_GPL(__nf_conntrack_helper_find);
 
 struct nf_conn_help *nf_ct_helper_ext_add(struct nf_conn *ct, gfp_t gfp)
 {
diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index 79478dfba27e..16f86d61e5d1 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -1001,7 +1001,8 @@ ctnetlink_change_helper(struct nf_conn *ct, const struct nlattr * const cda[])
 		return 0;
 	}
 
-	helper = __nf_conntrack_helper_find_byname(helpname);
+	helper = __nf_conntrack_helper_find(helpname, nf_ct_l3num(ct),
+					    nf_ct_protonum(ct));
 	if (helper == NULL) {
 #ifdef CONFIG_MODULES
 		spin_unlock_bh(&nf_conntrack_lock);
@@ -1012,7 +1013,8 @@ ctnetlink_change_helper(struct nf_conn *ct, const struct nlattr * const cda[])
 		}
 
 		spin_lock_bh(&nf_conntrack_lock);
-		helper = __nf_conntrack_helper_find_byname(helpname);
+		helper = __nf_conntrack_helper_find(helpname, nf_ct_l3num(ct),
+						    nf_ct_protonum(ct));
 		if (helper)
 			return -EAGAIN;
 #endif
@@ -1211,7 +1213,8 @@ ctnetlink_create_conntrack(struct net *net,
  		if (err < 0)
 			goto err2;
 
-		helper = __nf_conntrack_helper_find_byname(helpname);
+		helper = __nf_conntrack_helper_find(helpname, nf_ct_l3num(ct),
+						    nf_ct_protonum(ct));
 		if (helper == NULL) {
 			rcu_read_unlock();
 #ifdef CONFIG_MODULES
@@ -1221,7 +1224,9 @@ ctnetlink_create_conntrack(struct net *net,
 			}
 
 			rcu_read_lock();
-			helper = __nf_conntrack_helper_find_byname(helpname);
+			helper = __nf_conntrack_helper_find(helpname,
+							    nf_ct_l3num(ct),
+							    nf_ct_protonum(ct));
 			if (helper) {
 				err = -EAGAIN;
 				goto err2;
@@ -1714,7 +1719,6 @@ ctnetlink_del_expect(struct sock *ctnl, struct sk_buff *skb,
 	struct net *net = sock_net(ctnl);
 	struct nf_conntrack_expect *exp;
 	struct nf_conntrack_tuple tuple;
-	struct nf_conntrack_helper *h;
 	struct nfgenmsg *nfmsg = nlmsg_data(nlh);
 	struct hlist_node *n, *next;
 	u_int8_t u3 = nfmsg->nfgen_family;
@@ -1751,18 +1755,13 @@ ctnetlink_del_expect(struct sock *ctnl, struct sk_buff *skb,
 
 		/* delete all expectations for this helper */
 		spin_lock_bh(&nf_conntrack_lock);
-		h = __nf_conntrack_helper_find_byname(name);
-		if (!h) {
-			spin_unlock_bh(&nf_conntrack_lock);
-			return -EOPNOTSUPP;
-		}
 		for (i = 0; i < nf_ct_expect_hsize; i++) {
 			hlist_for_each_entry_safe(exp, n, next,
 						  &net->ct.expect_hash[i],
 						  hnode) {
 				m_help = nfct_help(exp->master);
-				if (m_help->helper == h
-				    && del_timer(&exp->timeout)) {
+				if (!strcmp(m_help->helper->name, name) &&
+				    del_timer(&exp->timeout)) {
 					nf_ct_unlink_expect(exp);
 					nf_ct_expect_put(exp);
 				}
-- 
cgit v1.2.2


From add67461240c1dadc7c8d97e66f8f92b556ca523 Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Wed, 3 Feb 2010 13:45:12 +0100
Subject: netfilter: add struct net * to target parameters

Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 net/bridge/netfilter/ebtables.c | 10 ++++++----
 net/ipv4/netfilter/ip_tables.c  |  8 +++++---
 net/ipv6/netfilter/ip6_tables.c |  8 +++++---
 3 files changed, 16 insertions(+), 10 deletions(-)

(limited to 'net')

diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c
index 1aa0e4c1f52d..12beb580aa21 100644
--- a/net/bridge/netfilter/ebtables.c
+++ b/net/bridge/netfilter/ebtables.c
@@ -579,13 +579,14 @@ ebt_cleanup_match(struct ebt_entry_match *m, struct net *net, unsigned int *i)
 }
 
 static inline int
-ebt_cleanup_watcher(struct ebt_entry_watcher *w, unsigned int *i)
+ebt_cleanup_watcher(struct ebt_entry_watcher *w, struct net *net, unsigned int *i)
 {
 	struct xt_tgdtor_param par;
 
 	if (i && (*i)-- == 0)
 		return 1;
 
+	par.net      = net;
 	par.target   = w->u.watcher;
 	par.targinfo = w->data;
 	par.family   = NFPROTO_BRIDGE;
@@ -606,10 +607,11 @@ ebt_cleanup_entry(struct ebt_entry *e, struct net *net, unsigned int *cnt)
 	/* we're done */
 	if (cnt && (*cnt)-- == 0)
 		return 1;
-	EBT_WATCHER_ITERATE(e, ebt_cleanup_watcher, NULL);
+	EBT_WATCHER_ITERATE(e, ebt_cleanup_watcher, net, NULL);
 	EBT_MATCH_ITERATE(e, ebt_cleanup_match, net, NULL);
 	t = (struct ebt_entry_target *)(((char *)e) + e->target_offset);
 
+	par.net      = net;
 	par.target   = t->u.target;
 	par.targinfo = t->data;
 	par.family   = NFPROTO_BRIDGE;
@@ -674,7 +676,7 @@ ebt_check_entry(struct ebt_entry *e,
 	}
 	i = 0;
 
-	mtpar.net	= net;
+	mtpar.net	= tgpar.net       = net;
 	mtpar.table     = tgpar.table     = name;
 	mtpar.entryinfo = tgpar.entryinfo = e;
 	mtpar.hook_mask = tgpar.hook_mask = hookmask;
@@ -730,7 +732,7 @@ ebt_check_entry(struct ebt_entry *e,
 	(*cnt)++;
 	return 0;
 cleanup_watchers:
-	EBT_WATCHER_ITERATE(e, ebt_cleanup_watcher, &j);
+	EBT_WATCHER_ITERATE(e, ebt_cleanup_watcher, net, &j);
 cleanup_matches:
 	EBT_MATCH_ITERATE(e, ebt_cleanup_match, net, &i);
 	return ret;
diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c
index cfaba0e2e6fc..7fde8f6950d8 100644
--- a/net/ipv4/netfilter/ip_tables.c
+++ b/net/ipv4/netfilter/ip_tables.c
@@ -638,10 +638,11 @@ err:
 	return ret;
 }
 
-static int check_target(struct ipt_entry *e, const char *name)
+static int check_target(struct ipt_entry *e, struct net *net, const char *name)
 {
 	struct ipt_entry_target *t = ipt_get_target(e);
 	struct xt_tgchk_param par = {
+		.net       = net,
 		.table     = name,
 		.entryinfo = e,
 		.target    = t->u.kernel.target,
@@ -697,7 +698,7 @@ find_check_entry(struct ipt_entry *e, struct net *net, const char *name,
 	}
 	t->u.kernel.target = target;
 
-	ret = check_target(e, name);
+	ret = check_target(e, net, name);
 	if (ret)
 		goto err;
 
@@ -788,6 +789,7 @@ cleanup_entry(struct ipt_entry *e, struct net *net, unsigned int *i)
 	IPT_MATCH_ITERATE(e, cleanup_match, net, NULL);
 	t = ipt_get_target(e);
 
+	par.net      = net;
 	par.target   = t->u.kernel.target;
 	par.targinfo = t->data;
 	par.family   = NFPROTO_IPV4;
@@ -1675,7 +1677,7 @@ compat_check_entry(struct ipt_entry *e, struct net *net, const char *name,
 	if (ret)
 		goto cleanup_matches;
 
-	ret = check_target(e, name);
+	ret = check_target(e, net, name);
 	if (ret)
 		goto cleanup_matches;
 
diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c
index 9f1d45f2ba8f..0376ed6d5594 100644
--- a/net/ipv6/netfilter/ip6_tables.c
+++ b/net/ipv6/netfilter/ip6_tables.c
@@ -669,10 +669,11 @@ err:
 	return ret;
 }
 
-static int check_target(struct ip6t_entry *e, const char *name)
+static int check_target(struct ip6t_entry *e, struct net *net, const char *name)
 {
 	struct ip6t_entry_target *t = ip6t_get_target(e);
 	struct xt_tgchk_param par = {
+		.net       = net,
 		.table     = name,
 		.entryinfo = e,
 		.target    = t->u.kernel.target,
@@ -729,7 +730,7 @@ find_check_entry(struct ip6t_entry *e, struct net *net, const char *name,
 	}
 	t->u.kernel.target = target;
 
-	ret = check_target(e, name);
+	ret = check_target(e, net, name);
 	if (ret)
 		goto err;
 
@@ -820,6 +821,7 @@ cleanup_entry(struct ip6t_entry *e, struct net *net, unsigned int *i)
 	IP6T_MATCH_ITERATE(e, cleanup_match, net, NULL);
 	t = ip6t_get_target(e);
 
+	par.net      = net;
 	par.target   = t->u.kernel.target;
 	par.targinfo = t->data;
 	par.family   = NFPROTO_IPV6;
@@ -1710,7 +1712,7 @@ static int compat_check_entry(struct ip6t_entry *e, struct net *net,
 	if (ret)
 		goto cleanup_matches;
 
-	ret = check_target(e, name);
+	ret = check_target(e, net, name);
 	if (ret)
 		goto cleanup_matches;
 
-- 
cgit v1.2.2


From 858b31330054a9ad259feceea0ad1ce5385c47f0 Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Wed, 3 Feb 2010 13:48:53 +0100
Subject: netfilter: nf_conntrack: split up IPCT_STATUS event

Split up the IPCT_STATUS event into an IPCT_REPLY event, which is generated
when the IPS_SEEN_REPLY bit is set, and an IPCT_ASSURED event, which is
generated when the IPS_ASSURED bit is set.

In combination with a following patch to support selective event delivery,
this can be used for "sparse" conntrack replication: start replicating the
conntrack entry after it reached the ASSURED state and that way it's SYN-flood
resistant.

Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 net/netfilter/nf_conntrack_core.c          | 2 +-
 net/netfilter/nf_conntrack_netlink.c       | 6 ++++--
 net/netfilter/nf_conntrack_proto_gre.c     | 2 +-
 net/netfilter/nf_conntrack_proto_sctp.c    | 2 +-
 net/netfilter/nf_conntrack_proto_tcp.c     | 2 +-
 net/netfilter/nf_conntrack_proto_udp.c     | 2 +-
 net/netfilter/nf_conntrack_proto_udplite.c | 2 +-
 7 files changed, 10 insertions(+), 8 deletions(-)

(limited to 'net')

diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index 0e98c3282d42..091ff770eb7b 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -825,7 +825,7 @@ nf_conntrack_in(struct net *net, u_int8_t pf, unsigned int hooknum,
 	}
 
 	if (set_reply && !test_and_set_bit(IPS_SEEN_REPLY_BIT, &ct->status))
-		nf_conntrack_event_cache(IPCT_STATUS, ct);
+		nf_conntrack_event_cache(IPCT_REPLY, ct);
 
 	return ret;
 }
diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index 16f86d61e5d1..ff594eb138c1 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -1371,7 +1371,8 @@ ctnetlink_new_conntrack(struct sock *ctnl, struct sk_buff *skb,
 			else
 				events = IPCT_NEW;
 
-			nf_conntrack_eventmask_report((1 << IPCT_STATUS) |
+			nf_conntrack_eventmask_report((1 << IPCT_REPLY) |
+						      (1 << IPCT_ASSURED) |
 						      (1 << IPCT_HELPER) |
 						      (1 << IPCT_PROTOINFO) |
 						      (1 << IPCT_NATSEQADJ) |
@@ -1396,7 +1397,8 @@ ctnetlink_new_conntrack(struct sock *ctnl, struct sk_buff *skb,
 		if (err == 0) {
 			nf_conntrack_get(&ct->ct_general);
 			spin_unlock_bh(&nf_conntrack_lock);
-			nf_conntrack_eventmask_report((1 << IPCT_STATUS) |
+			nf_conntrack_eventmask_report((1 << IPCT_REPLY) |
+						      (1 << IPCT_ASSURED) |
 						      (1 << IPCT_HELPER) |
 						      (1 << IPCT_PROTOINFO) |
 						      (1 << IPCT_NATSEQADJ) |
diff --git a/net/netfilter/nf_conntrack_proto_gre.c b/net/netfilter/nf_conntrack_proto_gre.c
index c99cfba64ddc..d899b1a69940 100644
--- a/net/netfilter/nf_conntrack_proto_gre.c
+++ b/net/netfilter/nf_conntrack_proto_gre.c
@@ -241,7 +241,7 @@ static int gre_packet(struct nf_conn *ct,
 				   ct->proto.gre.stream_timeout);
 		/* Also, more likely to be important, and not a probe. */
 		set_bit(IPS_ASSURED_BIT, &ct->status);
-		nf_conntrack_event_cache(IPCT_STATUS, ct);
+		nf_conntrack_event_cache(IPCT_ASSURED, ct);
 	} else
 		nf_ct_refresh_acct(ct, ctinfo, skb,
 				   ct->proto.gre.timeout);
diff --git a/net/netfilter/nf_conntrack_proto_sctp.c b/net/netfilter/nf_conntrack_proto_sctp.c
index f9d930f80276..b68ff15ed979 100644
--- a/net/netfilter/nf_conntrack_proto_sctp.c
+++ b/net/netfilter/nf_conntrack_proto_sctp.c
@@ -377,7 +377,7 @@ static int sctp_packet(struct nf_conn *ct,
 	    new_state == SCTP_CONNTRACK_ESTABLISHED) {
 		pr_debug("Setting assured bit\n");
 		set_bit(IPS_ASSURED_BIT, &ct->status);
-		nf_conntrack_event_cache(IPCT_STATUS, ct);
+		nf_conntrack_event_cache(IPCT_ASSURED, ct);
 	}
 
 	return NF_ACCEPT;
diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c
index 3c96437b45ad..ad118053971a 100644
--- a/net/netfilter/nf_conntrack_proto_tcp.c
+++ b/net/netfilter/nf_conntrack_proto_tcp.c
@@ -1045,7 +1045,7 @@ static int tcp_packet(struct nf_conn *ct,
 		   after SYN_RECV or a valid answer for a picked up
 		   connection. */
 		set_bit(IPS_ASSURED_BIT, &ct->status);
-		nf_conntrack_event_cache(IPCT_STATUS, ct);
+		nf_conntrack_event_cache(IPCT_ASSURED, ct);
 	}
 	nf_ct_refresh_acct(ct, ctinfo, skb, timeout);
 
diff --git a/net/netfilter/nf_conntrack_proto_udp.c b/net/netfilter/nf_conntrack_proto_udp.c
index 5c5518bedb4b..8d38f9a4bed8 100644
--- a/net/netfilter/nf_conntrack_proto_udp.c
+++ b/net/netfilter/nf_conntrack_proto_udp.c
@@ -77,7 +77,7 @@ static int udp_packet(struct nf_conn *ct,
 		nf_ct_refresh_acct(ct, ctinfo, skb, nf_ct_udp_timeout_stream);
 		/* Also, more likely to be important, and not a probe */
 		if (!test_and_set_bit(IPS_ASSURED_BIT, &ct->status))
-			nf_conntrack_event_cache(IPCT_STATUS, ct);
+			nf_conntrack_event_cache(IPCT_ASSURED, ct);
 	} else
 		nf_ct_refresh_acct(ct, ctinfo, skb, nf_ct_udp_timeout);
 
diff --git a/net/netfilter/nf_conntrack_proto_udplite.c b/net/netfilter/nf_conntrack_proto_udplite.c
index 458655bb2106..0b1bc9ba6678 100644
--- a/net/netfilter/nf_conntrack_proto_udplite.c
+++ b/net/netfilter/nf_conntrack_proto_udplite.c
@@ -75,7 +75,7 @@ static int udplite_packet(struct nf_conn *ct,
 				   nf_ct_udplite_timeout_stream);
 		/* Also, more likely to be important, and not a probe */
 		if (!test_and_set_bit(IPS_ASSURED_BIT, &ct->status))
-			nf_conntrack_event_cache(IPCT_STATUS, ct);
+			nf_conntrack_event_cache(IPCT_ASSURED, ct);
 	} else
 		nf_ct_refresh_acct(ct, ctinfo, skb, nf_ct_udplite_timeout);
 
-- 
cgit v1.2.2


From 0cebe4b4163b6373c9d24c1a192939777bc27e55 Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Wed, 3 Feb 2010 13:51:51 +0100
Subject: netfilter: ctnetlink: support selective event delivery

Add two masks for conntrack end expectation events to struct nf_conntrack_ecache
and use them to filter events. Their default value is "all events" when the
event sysctl is on and "no events" when it is off. A following patch will add
specific initializations. Expectation events depend on the ecache struct of
their master conntrack.

Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 net/netfilter/nf_conntrack_core.c    | 2 +-
 net/netfilter/nf_conntrack_netlink.c | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index 091ff770eb7b..53b8da6ad6b7 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -648,7 +648,7 @@ init_conntrack(struct net *net,
 	}
 
 	nf_ct_acct_ext_add(ct, GFP_ATOMIC);
-	nf_ct_ecache_ext_add(ct, GFP_ATOMIC);
+	nf_ct_ecache_ext_add(ct, 0, 0, GFP_ATOMIC);
 
 	spin_lock_bh(&nf_conntrack_lock);
 	exp = nf_ct_find_expectation(net, tuple);
diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index ff594eb138c1..f5c0b09e12f1 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -1281,7 +1281,7 @@ ctnetlink_create_conntrack(struct net *net,
 	}
 
 	nf_ct_acct_ext_add(ct, GFP_ATOMIC);
-	nf_ct_ecache_ext_add(ct, GFP_ATOMIC);
+	nf_ct_ecache_ext_add(ct, 0, 0, GFP_ATOMIC);
 
 #if defined(CONFIG_NF_CONNTRACK_MARK)
 	if (cda[CTA_MARK])
-- 
cgit v1.2.2


From b2a15a604d379af323645e330638e2cfcc696aff Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Wed, 3 Feb 2010 14:13:03 +0100
Subject: netfilter: nf_conntrack: support conntrack templates

Support initializing selected parameters of new conntrack entries from a
"conntrack template", which is a specially marked conntrack entry attached
to the skb.

Currently the helper and the event delivery masks can be initialized this
way.

Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 net/ipv4/netfilter/nf_defrag_ipv4.c            |  2 +-
 net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c |  2 +-
 net/netfilter/nf_conntrack_core.c              | 50 +++++++++++++++++---------
 net/netfilter/nf_conntrack_helper.c            | 17 ++++++---
 net/netfilter/nf_conntrack_netlink.c           |  2 +-
 5 files changed, 50 insertions(+), 23 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/netfilter/nf_defrag_ipv4.c b/net/ipv4/netfilter/nf_defrag_ipv4.c
index 331ead3ebd1b..77627fa80561 100644
--- a/net/ipv4/netfilter/nf_defrag_ipv4.c
+++ b/net/ipv4/netfilter/nf_defrag_ipv4.c
@@ -59,7 +59,7 @@ static unsigned int ipv4_conntrack_defrag(unsigned int hooknum,
 #if !defined(CONFIG_NF_NAT) && !defined(CONFIG_NF_NAT_MODULE)
 	/* Previously seen (loopback)?  Ignore.  Do this before
 	   fragment check. */
-	if (skb->nfct)
+	if (skb->nfct && !nf_ct_is_template((struct nf_conn *)skb->nfct))
 		return NF_ACCEPT;
 #endif
 #endif
diff --git a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
index 0956ebabbff2..55ce22e5de49 100644
--- a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
+++ b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
@@ -212,7 +212,7 @@ static unsigned int ipv6_defrag(unsigned int hooknum,
 	struct sk_buff *reasm;
 
 	/* Previously seen (loopback)?  */
-	if (skb->nfct)
+	if (skb->nfct && !nf_ct_is_template((struct nf_conn *)skb->nfct))
 		return NF_ACCEPT;
 
 	reasm = nf_ct_frag6_gather(skb, nf_ct6_defrag_user(hooknum, skb));
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index 53b8da6ad6b7..471e2a79d26f 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -618,7 +618,7 @@ EXPORT_SYMBOL_GPL(nf_conntrack_free);
 /* Allocate a new conntrack: we return -ENOMEM if classification
    failed due to stress.  Otherwise it really is unclassifiable. */
 static struct nf_conntrack_tuple_hash *
-init_conntrack(struct net *net,
+init_conntrack(struct net *net, struct nf_conn *tmpl,
 	       const struct nf_conntrack_tuple *tuple,
 	       struct nf_conntrack_l3proto *l3proto,
 	       struct nf_conntrack_l4proto *l4proto,
@@ -628,6 +628,7 @@ init_conntrack(struct net *net,
 	struct nf_conn *ct;
 	struct nf_conn_help *help;
 	struct nf_conntrack_tuple repl_tuple;
+	struct nf_conntrack_ecache *ecache;
 	struct nf_conntrack_expect *exp;
 
 	if (!nf_ct_invert_tuple(&repl_tuple, tuple, l3proto, l4proto)) {
@@ -648,7 +649,11 @@ init_conntrack(struct net *net,
 	}
 
 	nf_ct_acct_ext_add(ct, GFP_ATOMIC);
-	nf_ct_ecache_ext_add(ct, 0, 0, GFP_ATOMIC);
+
+	ecache = tmpl ? nf_ct_ecache_find(tmpl) : NULL;
+	nf_ct_ecache_ext_add(ct, ecache ? ecache->ctmask : 0,
+				 ecache ? ecache->expmask : 0,
+			     GFP_ATOMIC);
 
 	spin_lock_bh(&nf_conntrack_lock);
 	exp = nf_ct_find_expectation(net, tuple);
@@ -673,7 +678,7 @@ init_conntrack(struct net *net,
 		nf_conntrack_get(&ct->master->ct_general);
 		NF_CT_STAT_INC(net, expect_new);
 	} else {
-		__nf_ct_try_assign_helper(ct, GFP_ATOMIC);
+		__nf_ct_try_assign_helper(ct, tmpl, GFP_ATOMIC);
 		NF_CT_STAT_INC(net, new);
 	}
 
@@ -694,7 +699,7 @@ init_conntrack(struct net *net,
 
 /* On success, returns conntrack ptr, sets skb->nfct and ctinfo */
 static inline struct nf_conn *
-resolve_normal_ct(struct net *net,
+resolve_normal_ct(struct net *net, struct nf_conn *tmpl,
 		  struct sk_buff *skb,
 		  unsigned int dataoff,
 		  u_int16_t l3num,
@@ -718,7 +723,8 @@ resolve_normal_ct(struct net *net,
 	/* look for tuple match */
 	h = nf_conntrack_find_get(net, &tuple);
 	if (!h) {
-		h = init_conntrack(net, &tuple, l3proto, l4proto, skb, dataoff);
+		h = init_conntrack(net, tmpl, &tuple, l3proto, l4proto,
+				   skb, dataoff);
 		if (!h)
 			return NULL;
 		if (IS_ERR(h))
@@ -755,7 +761,7 @@ unsigned int
 nf_conntrack_in(struct net *net, u_int8_t pf, unsigned int hooknum,
 		struct sk_buff *skb)
 {
-	struct nf_conn *ct;
+	struct nf_conn *ct, *tmpl = NULL;
 	enum ip_conntrack_info ctinfo;
 	struct nf_conntrack_l3proto *l3proto;
 	struct nf_conntrack_l4proto *l4proto;
@@ -764,10 +770,14 @@ nf_conntrack_in(struct net *net, u_int8_t pf, unsigned int hooknum,
 	int set_reply = 0;
 	int ret;
 
-	/* Previously seen (loopback or untracked)?  Ignore. */
 	if (skb->nfct) {
-		NF_CT_STAT_INC_ATOMIC(net, ignore);
-		return NF_ACCEPT;
+		/* Previously seen (loopback or untracked)?  Ignore. */
+		tmpl = (struct nf_conn *)skb->nfct;
+		if (!nf_ct_is_template(tmpl)) {
+			NF_CT_STAT_INC_ATOMIC(net, ignore);
+			return NF_ACCEPT;
+		}
+		skb->nfct = NULL;
 	}
 
 	/* rcu_read_lock()ed by nf_hook_slow */
@@ -778,7 +788,8 @@ nf_conntrack_in(struct net *net, u_int8_t pf, unsigned int hooknum,
 		pr_debug("not prepared to track yet or error occured\n");
 		NF_CT_STAT_INC_ATOMIC(net, error);
 		NF_CT_STAT_INC_ATOMIC(net, invalid);
-		return -ret;
+		ret = -ret;
+		goto out;
 	}
 
 	l4proto = __nf_ct_l4proto_find(pf, protonum);
@@ -791,22 +802,25 @@ nf_conntrack_in(struct net *net, u_int8_t pf, unsigned int hooknum,
 		if (ret <= 0) {
 			NF_CT_STAT_INC_ATOMIC(net, error);
 			NF_CT_STAT_INC_ATOMIC(net, invalid);
-			return -ret;
+			ret = -ret;
+			goto out;
 		}
 	}
 
-	ct = resolve_normal_ct(net, skb, dataoff, pf, protonum,
+	ct = resolve_normal_ct(net, tmpl, skb, dataoff, pf, protonum,
 			       l3proto, l4proto, &set_reply, &ctinfo);
 	if (!ct) {
 		/* Not valid part of a connection */
 		NF_CT_STAT_INC_ATOMIC(net, invalid);
-		return NF_ACCEPT;
+		ret = NF_ACCEPT;
+		goto out;
 	}
 
 	if (IS_ERR(ct)) {
 		/* Too stressed to deal. */
 		NF_CT_STAT_INC_ATOMIC(net, drop);
-		return NF_DROP;
+		ret = NF_DROP;
+		goto out;
 	}
 
 	NF_CT_ASSERT(skb->nfct);
@@ -821,11 +835,15 @@ nf_conntrack_in(struct net *net, u_int8_t pf, unsigned int hooknum,
 		NF_CT_STAT_INC_ATOMIC(net, invalid);
 		if (ret == -NF_DROP)
 			NF_CT_STAT_INC_ATOMIC(net, drop);
-		return -ret;
+		ret = -ret;
+		goto out;
 	}
 
 	if (set_reply && !test_and_set_bit(IPS_SEEN_REPLY_BIT, &ct->status))
 		nf_conntrack_event_cache(IPCT_REPLY, ct);
+out:
+	if (tmpl)
+		nf_ct_put(tmpl);
 
 	return ret;
 }
@@ -864,7 +882,7 @@ void nf_conntrack_alter_reply(struct nf_conn *ct,
 		return;
 
 	rcu_read_lock();
-	__nf_ct_try_assign_helper(ct, GFP_ATOMIC);
+	__nf_ct_try_assign_helper(ct, NULL, GFP_ATOMIC);
 	rcu_read_unlock();
 }
 EXPORT_SYMBOL_GPL(nf_conntrack_alter_reply);
diff --git a/net/netfilter/nf_conntrack_helper.c b/net/netfilter/nf_conntrack_helper.c
index c0e461f466ae..8144b0da5515 100644
--- a/net/netfilter/nf_conntrack_helper.c
+++ b/net/netfilter/nf_conntrack_helper.c
@@ -96,13 +96,22 @@ struct nf_conn_help *nf_ct_helper_ext_add(struct nf_conn *ct, gfp_t gfp)
 }
 EXPORT_SYMBOL_GPL(nf_ct_helper_ext_add);
 
-int __nf_ct_try_assign_helper(struct nf_conn *ct, gfp_t flags)
+int __nf_ct_try_assign_helper(struct nf_conn *ct, struct nf_conn *tmpl,
+			      gfp_t flags)
 {
+	struct nf_conntrack_helper *helper = NULL;
+	struct nf_conn_help *help;
 	int ret = 0;
-	struct nf_conntrack_helper *helper;
-	struct nf_conn_help *help = nfct_help(ct);
 
-	helper = __nf_ct_helper_find(&ct->tuplehash[IP_CT_DIR_REPLY].tuple);
+	if (tmpl != NULL) {
+		help = nfct_help(tmpl);
+		if (help != NULL)
+			helper = help->helper;
+	}
+
+	help = nfct_help(ct);
+	if (helper == NULL)
+		helper = __nf_ct_helper_find(&ct->tuplehash[IP_CT_DIR_REPLY].tuple);
 	if (helper == NULL) {
 		if (help)
 			rcu_assign_pointer(help->helper, NULL);
diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index f5c0b09e12f1..09044f9f4b2e 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -1249,7 +1249,7 @@ ctnetlink_create_conntrack(struct net *net,
 		}
 	} else {
 		/* try an implicit helper assignation */
-		err = __nf_ct_try_assign_helper(ct, GFP_ATOMIC);
+		err = __nf_ct_try_assign_helper(ct, NULL, GFP_ATOMIC);
 		if (err < 0)
 			goto err2;
 	}
-- 
cgit v1.2.2


From d4bfa033ed84e0ae446eff445d107ffd5ee78df3 Mon Sep 17 00:00:00 2001
From: Jiri Kosina <jkosina@suse.cz>
Date: Fri, 29 Jan 2010 15:03:36 +0100
Subject: HID: make raw reports possible for both feature and output reports

In commit 2da31939a42 ("Bluetooth: Implement raw output support for HIDP
layer"), support for Bluetooth hid_output_raw_report was added, but it
pushes the data to the intr socket instead of the ctrl one. This has been
fixed by 6bf8268f9a91f1 ("Bluetooth: Use the control channel for raw HID reports")

Still, it is necessary to distinguish whether the report in question should be
either FEATURE or OUTPUT. For this, we have to extend the generic HID API,
so that hid_output_raw_report() callback provides means to specify this
value so that it can be passed down to lower level hardware drivers (currently
Bluetooth and USB).

Based on original patch by Bastien Nocera <hadess@hadess.net>
Acked-by: Marcel Holtmann <marcel@holtmann.org>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
---
 net/bluetooth/hidp/core.c | 17 ++++++++++++++---
 1 file changed, 14 insertions(+), 3 deletions(-)

(limited to 'net')

diff --git a/net/bluetooth/hidp/core.c b/net/bluetooth/hidp/core.c
index 6cf526d06e21..37ba153c4cd4 100644
--- a/net/bluetooth/hidp/core.c
+++ b/net/bluetooth/hidp/core.c
@@ -313,10 +313,21 @@ static int hidp_send_report(struct hidp_session *session, struct hid_report *rep
 	return hidp_queue_report(session, buf, rsize);
 }
 
-static int hidp_output_raw_report(struct hid_device *hid, unsigned char *data, size_t count)
+static int hidp_output_raw_report(struct hid_device *hid, unsigned char *data, size_t count,
+		unsigned char report_type)
 {
-	if (hidp_send_ctrl_message(hid->driver_data,
-			HIDP_TRANS_SET_REPORT | HIDP_DATA_RTYPE_FEATURE,
+	switch (report_type) {
+	case HID_FEATURE_REPORT:
+		report_type = HIDP_TRANS_SET_REPORT | HIDP_DATA_RTYPE_FEATURE;
+		break;
+	case HID_OUTPUT_REPORT:
+		report_type = HIDP_TRANS_DATA | HIDP_DATA_RTYPE_OUPUT;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	if (hidp_send_ctrl_message(hid->driver_data, report_type,
 			data, count))
 		return -ENOMEM;
 	return count;
-- 
cgit v1.2.2


From 84f3bb9ae9db90f7fb15d98b55279a58ab1b2363 Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Wed, 3 Feb 2010 17:17:06 +0100
Subject: netfilter: xtables: add CT target

Add a new target for the raw table, which can be used to specify conntrack
parameters for specific connections, f.i. the conntrack helper.

The target attaches a "template" connection tracking entry to the skb, which
is used by the conntrack core when initializing a new conntrack.

Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 net/netfilter/Kconfig               |  12 +++
 net/netfilter/Makefile              |   1 +
 net/netfilter/nf_conntrack_helper.c |  19 +++++
 net/netfilter/xt_CT.c               | 158 ++++++++++++++++++++++++++++++++++++
 4 files changed, 190 insertions(+)
 create mode 100644 net/netfilter/xt_CT.c

(limited to 'net')

diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig
index 634d14affc8d..4469d45261f4 100644
--- a/net/netfilter/Kconfig
+++ b/net/netfilter/Kconfig
@@ -341,6 +341,18 @@ config NETFILTER_XT_TARGET_CONNSECMARK
 
 	  To compile it as a module, choose M here.  If unsure, say N.
 
+config NETFILTER_XT_TARGET_CT
+	tristate '"CT" target support'
+	depends on NF_CONNTRACK
+	depends on IP_NF_RAW || IP6_NF_RAW
+	depends on NETFILTER_ADVANCED
+	help
+	  This options adds a `CT' target, which allows to specify initial
+	  connection tracking parameters like events to be delivered and
+	  the helper to be used.
+
+	  To compile it as a module, choose M here.  If unsure, say N.
+
 config NETFILTER_XT_TARGET_DSCP
 	tristate '"DSCP" and "TOS" target support'
 	depends on IP_NF_MANGLE || IP6_NF_MANGLE
diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile
index 49f62ee4e9ff..f873644f02f6 100644
--- a/net/netfilter/Makefile
+++ b/net/netfilter/Makefile
@@ -44,6 +44,7 @@ obj-$(CONFIG_NETFILTER_XTABLES) += x_tables.o xt_tcpudp.o
 obj-$(CONFIG_NETFILTER_XT_TARGET_CLASSIFY) += xt_CLASSIFY.o
 obj-$(CONFIG_NETFILTER_XT_TARGET_CONNMARK) += xt_CONNMARK.o
 obj-$(CONFIG_NETFILTER_XT_TARGET_CONNSECMARK) += xt_CONNSECMARK.o
+obj-$(CONFIG_NETFILTER_XT_TARGET_CT) += xt_CT.o
 obj-$(CONFIG_NETFILTER_XT_TARGET_DSCP) += xt_DSCP.o
 obj-$(CONFIG_NETFILTER_XT_TARGET_HL) += xt_HL.o
 obj-$(CONFIG_NETFILTER_XT_TARGET_LED) += xt_LED.o
diff --git a/net/netfilter/nf_conntrack_helper.c b/net/netfilter/nf_conntrack_helper.c
index 8144b0da5515..a74a5769877b 100644
--- a/net/netfilter/nf_conntrack_helper.c
+++ b/net/netfilter/nf_conntrack_helper.c
@@ -83,6 +83,25 @@ __nf_conntrack_helper_find(const char *name, u16 l3num, u8 protonum)
 }
 EXPORT_SYMBOL_GPL(__nf_conntrack_helper_find);
 
+struct nf_conntrack_helper *
+nf_conntrack_helper_try_module_get(const char *name, u16 l3num, u8 protonum)
+{
+	struct nf_conntrack_helper *h;
+
+	h = __nf_conntrack_helper_find(name, l3num, protonum);
+#ifdef CONFIG_MODULES
+	if (h == NULL) {
+		if (request_module("nfct-helper-%s", name) == 0)
+			h = __nf_conntrack_helper_find(name, l3num, protonum);
+	}
+#endif
+	if (h != NULL && !try_module_get(h->me))
+		h = NULL;
+
+	return h;
+}
+EXPORT_SYMBOL_GPL(nf_conntrack_helper_try_module_get);
+
 struct nf_conn_help *nf_ct_helper_ext_add(struct nf_conn *ct, gfp_t gfp)
 {
 	struct nf_conn_help *help;
diff --git a/net/netfilter/xt_CT.c b/net/netfilter/xt_CT.c
new file mode 100644
index 000000000000..8183a054256f
--- /dev/null
+++ b/net/netfilter/xt_CT.c
@@ -0,0 +1,158 @@
+/*
+ * Copyright (c) 2010 Patrick McHardy <kaber@trash.net>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/module.h>
+#include <linux/skbuff.h>
+#include <linux/selinux.h>
+#include <linux/netfilter_ipv4/ip_tables.h>
+#include <linux/netfilter_ipv6/ip6_tables.h>
+#include <linux/netfilter/x_tables.h>
+#include <linux/netfilter/xt_CT.h>
+#include <net/netfilter/nf_conntrack.h>
+#include <net/netfilter/nf_conntrack_helper.h>
+#include <net/netfilter/nf_conntrack_ecache.h>
+
+static unsigned int xt_ct_target(struct sk_buff *skb,
+				 const struct xt_target_param *par)
+{
+	const struct xt_ct_target_info *info = par->targinfo;
+	struct nf_conn *ct = info->ct;
+
+	/* Previously seen (loopback)? Ignore. */
+	if (skb->nfct != NULL)
+		return XT_CONTINUE;
+
+	atomic_inc(&ct->ct_general.use);
+	skb->nfct = &ct->ct_general;
+	skb->nfctinfo = IP_CT_NEW;
+
+	return XT_CONTINUE;
+}
+
+static u8 xt_ct_find_proto(const struct xt_tgchk_param *par)
+{
+	if (par->family == AF_INET) {
+		const struct ipt_entry *e = par->entryinfo;
+
+		if (e->ip.invflags & IPT_INV_PROTO)
+			return 0;
+		return e->ip.proto;
+	} else if (par->family == AF_INET6) {
+		const struct ip6t_entry *e = par->entryinfo;
+
+		if (e->ipv6.invflags & IP6T_INV_PROTO)
+			return 0;
+		return e->ipv6.proto;
+	} else
+		return 0;
+}
+
+static bool xt_ct_tg_check(const struct xt_tgchk_param *par)
+{
+	struct xt_ct_target_info *info = par->targinfo;
+	struct nf_conntrack_tuple t;
+	struct nf_conn_help *help;
+	struct nf_conn *ct;
+	u8 proto;
+
+	if (info->flags & ~XT_CT_NOTRACK)
+		return false;
+
+	if (info->flags & XT_CT_NOTRACK) {
+		ct = &nf_conntrack_untracked;
+		atomic_inc(&ct->ct_general.use);
+		goto out;
+	}
+
+	if (nf_ct_l3proto_try_module_get(par->family) < 0)
+		goto err1;
+
+	memset(&t, 0, sizeof(t));
+	ct = nf_conntrack_alloc(par->net, &t, &t, GFP_KERNEL);
+	if (IS_ERR(ct))
+		goto err2;
+
+	if ((info->ct_events || info->exp_events) &&
+	    !nf_ct_ecache_ext_add(ct, info->ct_events, info->exp_events,
+				  GFP_KERNEL))
+		goto err3;
+
+	if (info->helper[0]) {
+		proto = xt_ct_find_proto(par);
+		if (!proto)
+			goto err3;
+
+		help = nf_ct_helper_ext_add(ct, GFP_KERNEL);
+		if (help == NULL)
+			goto err3;
+
+		help->helper = nf_conntrack_helper_try_module_get(info->helper,
+								  par->family,
+								  proto);
+		if (help->helper == NULL)
+			goto err3;
+	}
+
+	__set_bit(IPS_TEMPLATE_BIT, &ct->status);
+	__set_bit(IPS_CONFIRMED_BIT, &ct->status);
+out:
+	info->ct = ct;
+	return true;
+
+err3:
+	nf_conntrack_free(ct);
+err2:
+	nf_ct_l3proto_module_put(par->family);
+err1:
+	return false;
+}
+
+static void xt_ct_tg_destroy(const struct xt_tgdtor_param *par)
+{
+	struct xt_ct_target_info *info = par->targinfo;
+	struct nf_conn *ct = info->ct;
+	struct nf_conn_help *help;
+
+	if (ct != &nf_conntrack_untracked) {
+		help = nfct_help(ct);
+		if (help)
+			module_put(help->helper->me);
+
+		nf_ct_l3proto_module_put(par->family);
+	}
+	nf_ct_put(info->ct);
+}
+
+static struct xt_target xt_ct_tg __read_mostly = {
+	.name		= "CT",
+	.family		= NFPROTO_UNSPEC,
+	.targetsize	= XT_ALIGN(sizeof(struct xt_ct_target_info)),
+	.checkentry	= xt_ct_tg_check,
+	.destroy	= xt_ct_tg_destroy,
+	.target		= xt_ct_target,
+	.table		= "raw",
+	.me		= THIS_MODULE,
+};
+
+static int __init xt_ct_tg_init(void)
+{
+	return xt_register_target(&xt_ct_tg);
+}
+
+static void __exit xt_ct_tg_exit(void)
+{
+	xt_unregister_target(&xt_ct_tg);
+}
+
+module_init(xt_ct_tg_init);
+module_exit(xt_ct_tg_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("Xtables: connection tracking target");
+MODULE_ALIAS("ipt_CT");
+MODULE_ALIAS("ip6t_CT");
-- 
cgit v1.2.2


From 1038a00b458997661bcd0e780a24dc280a8841fc Mon Sep 17 00:00:00 2001
From: Nick Pelly <npelly@google.com>
Date: Wed, 3 Feb 2010 11:42:26 -0800
Subject: Bluetooth: Fallback eSCO to SCO on error 0x1a (Unsupported Remote
 Feature)

General Motors carkits that use LGE BT chipsets return this error code
when an eSCO is attempted, despite advertising eSCO support.

2009-08-13 14:41:39.755518 < HCI Command: Setup Synchronous Connection (0x01|0x0028) plen 17
   handle 1 voice setting 0x0060
2009-08-13 14:41:39.757563 > HCI Event: Command Status (0x0f) plen 4
   Setup Synchronous Connection (0x01|0x0028) status 0x00 ncmd 1
2009-08-13 14:41:39.789484 > HCI Event: Synchronous Connect Complete (0x2c) plen 17
   status 0x1a handle 257 bdaddr 00:1E:B2:23:5E:B3 type eSCO
   Error: Unsupported Remote Feature / Unsupported LMP Feature

Signed-off-by: Jaikumar Ganesh <jaikumar@google.com>
Signed-off-by: Nick Pelly <npelly@google.com>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 net/bluetooth/hci_event.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'net')

diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c
index 28517bad796c..592da5c909c1 100644
--- a/net/bluetooth/hci_event.c
+++ b/net/bluetooth/hci_event.c
@@ -1699,6 +1699,7 @@ static inline void hci_sync_conn_complete_evt(struct hci_dev *hdev, struct sk_bu
 		break;
 
 	case 0x1c:	/* SCO interval rejected */
+	case 0x1a:	/* Unsupported Remote Feature */
 	case 0x1f:	/* Unspecified error */
 		if (conn->out && conn->attempt < 2) {
 			conn->pkt_type = (hdev->esco_type & SCO_ESCO_MASK) |
-- 
cgit v1.2.2


From 485f1eff73a7b932fd3abb0dfcf804e1a1f59025 Mon Sep 17 00:00:00 2001
From: Marcel Holtmann <marcel@holtmann.org>
Date: Wed, 3 Feb 2010 15:52:18 -0800
Subject: Bluetooth: Fix sleeping function in RFCOMM within invalid context

With the commit 9e726b17422bade75fba94e625cd35fd1353e682 the
rfcomm_session_put() gets accidentially called from a timeout
callback and results in this:

BUG: sleeping function called from invalid context at net/core/sock.c:1897
in_atomic(): 1, irqs_disabled(): 0, pid: 0, name: swapper
Pid: 0, comm: swapper Tainted: P           2.6.32 #31
Call Trace:
 <IRQ>  [<ffffffff81036455>] __might_sleep+0xf8/0xfa
 [<ffffffff8138ef1d>] lock_sock_nested+0x29/0xc4
 [<ffffffffa03921b3>] lock_sock+0xb/0xd [l2cap]
 [<ffffffffa03948e6>] l2cap_sock_shutdown+0x1c/0x76 [l2cap]
 [<ffffffff8106adea>] ? clockevents_program_event+0x75/0x7e
 [<ffffffff8106bea2>] ? tick_dev_program_event+0x37/0xa5
 [<ffffffffa0394967>] l2cap_sock_release+0x27/0x67 [l2cap]
 [<ffffffff8138c971>] sock_release+0x1a/0x67
 [<ffffffffa03d2492>] rfcomm_session_del+0x34/0x53 [rfcomm]
 [<ffffffffa03d24c5>] rfcomm_session_put+0x14/0x16 [rfcomm]
 [<ffffffffa03d28b4>] rfcomm_session_timeout+0xe/0x1a [rfcomm]
 [<ffffffff810554a8>] run_timer_softirq+0x1e2/0x29a
 [<ffffffffa03d28a6>] ? rfcomm_session_timeout+0x0/0x1a [rfcomm]
 [<ffffffff8104e0f6>] __do_softirq+0xfe/0x1c5
 [<ffffffff8100e8ce>] ? timer_interrupt+0x1a/0x21
 [<ffffffff8100cc4c>] call_softirq+0x1c/0x28
 [<ffffffff8100e05b>] do_softirq+0x33/0x6b
 [<ffffffff8104daf6>] irq_exit+0x36/0x85
 [<ffffffff8100d7a9>] do_IRQ+0xa6/0xbd
 [<ffffffff8100c493>] ret_from_intr+0x0/0xa
 <EOI>  [<ffffffff812585b3>] ? acpi_idle_enter_bm+0x269/0x294
 [<ffffffff812585a9>] ? acpi_idle_enter_bm+0x25f/0x294
 [<ffffffff81373ddc>] ? cpuidle_idle_call+0x97/0x107
 [<ffffffff8100aca0>] ? cpu_idle+0x53/0xaa
 [<ffffffff81429006>] ? rest_init+0x7a/0x7c
 [<ffffffff8177bc8c>] ? start_kernel+0x389/0x394
 [<ffffffff8177b29c>] ? x86_64_start_reservations+0xac/0xb0
 [<ffffffff8177b384>] ? x86_64_start_kernel+0xe4/0xeb

To fix this, the rfcomm_session_put() needs to be moved out of
rfcomm_session_timeout() into rfcomm_process_sessions(). In that
context it is perfectly fine to sleep and disconnect the socket.

Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
Tested-by: David John <davidjon@xenontk.org>
---
 net/bluetooth/rfcomm/core.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/bluetooth/rfcomm/core.c b/net/bluetooth/rfcomm/core.c
index fc5ee3296e22..2b506373957a 100644
--- a/net/bluetooth/rfcomm/core.c
+++ b/net/bluetooth/rfcomm/core.c
@@ -252,7 +252,6 @@ static void rfcomm_session_timeout(unsigned long arg)
 	BT_DBG("session %p state %ld", s, s->state);
 
 	set_bit(RFCOMM_TIMED_OUT, &s->flags);
-	rfcomm_session_put(s);
 	rfcomm_schedule(RFCOMM_SCHED_TIMEO);
 }
 
@@ -1920,6 +1919,7 @@ static inline void rfcomm_process_sessions(void)
 		if (test_and_clear_bit(RFCOMM_TIMED_OUT, &s->flags)) {
 			s->state = BT_DISCONN;
 			rfcomm_send_disc(s, 0);
+			rfcomm_session_put(s);
 			continue;
 		}
 
-- 
cgit v1.2.2


From 6c2718da59613d76013b501bf0f8bcf9d7794b2d Mon Sep 17 00:00:00 2001
From: Nick Pelly <npelly@google.com>
Date: Wed, 3 Feb 2010 16:18:36 -0800
Subject: Bluetooth: Do not call rfcomm_session_put() for RFCOMM UA on closed
 socket

When processing a RFCOMM UA frame when the socket is closed and we were
not the RFCOMM initiator would cause rfcomm_session_put() to be called
twice during rfcomm_process_rx(). This would cause a kernel panic in
rfcomm_session_close() then.

This could be easily reproduced during disconnect with devices such as
Motorola H270 that send RFCOMM UA followed quickly by L2CAP disconnect
request. This trace for this looks like:

2009-09-21 17:22:37.788895 < ACL data: handle 1 flags 0x02 dlen 8
   L2CAP(d): cid 0x0041 len 4 [psm 3]
     RFCOMM(s): DISC: cr 0 dlci 20 pf 1 ilen 0 fcs 0x7d
2009-09-21 17:22:37.906204 > HCI Event: Number of Completed Packets (0x13) plen 5
   handle 1 packets 1
2009-09-21 17:22:37.933090 > ACL data: handle 1 flags 0x02 dlen 8
   L2CAP(d): cid 0x0040 len 4 [psm 3]
     RFCOMM(s): UA: cr 0 dlci 20 pf 1 ilen 0 fcs 0x57
2009-09-21 17:22:38.636764 < ACL data: handle 1 flags 0x02 dlen 8
   L2CAP(d): cid 0x0041 len 4 [psm 3]
     RFCOMM(s): DISC: cr 0 dlci 0 pf 1 ilen 0 fcs 0x9c
2009-09-21 17:22:38.744125 > HCI Event: Number of Completed Packets (0x13) plen 5
   handle 1 packets 1
2009-09-21 17:22:38.763687 > ACL data: handle 1 flags 0x02 dlen 8
   L2CAP(d): cid 0x0040 len 4 [psm 3]
     RFCOMM(s): UA: cr 0 dlci 0 pf 1 ilen 0 fcs 0xb6
2009-09-21 17:22:38.783554 > ACL data: handle 1 flags 0x02 dlen 12
   L2CAP(s): Disconn req: dcid 0x0040 scid 0x0041

Avoid calling rfcomm_session_put() twice by skipping this call
in rfcomm_recv_ua() if the socket is closed.

Signed-off-by: Nick Pelly <npelly@google.com>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 net/bluetooth/rfcomm/core.c | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/bluetooth/rfcomm/core.c b/net/bluetooth/rfcomm/core.c
index 2b506373957a..89f4a59eb82b 100644
--- a/net/bluetooth/rfcomm/core.c
+++ b/net/bluetooth/rfcomm/core.c
@@ -1150,7 +1150,11 @@ static int rfcomm_recv_ua(struct rfcomm_session *s, u8 dlci)
 			break;
 
 		case BT_DISCONN:
-			rfcomm_session_put(s);
+			/* When socket is closed and we are not RFCOMM
+			 * initiator rfcomm_process_rx already calls
+			 * rfcomm_session_put() */
+			if (s->sock->sk->sk_state != BT_CLOSED)
+				rfcomm_session_put(s);
 			break;
 		}
 	}
-- 
cgit v1.2.2


From 180211b841b5bf13ab10d19202adab3eb7749f6c Mon Sep 17 00:00:00 2001
From: Alexey Dobriyan <adobriyan@gmail.com>
Date: Sat, 30 Jan 2010 02:53:27 +0000
Subject: af_key: fix netns ops ordering on module load/unload

1. After sock_register() returns, it's possible to create sockets,
   even if module still not initialized fully (blame generic module code
   for that!)
2. Consequently, pfkey_create() can be called with pfkey_net_id still not
   initialized which will BUG_ON in net_generic():
	kernel BUG at include/net/netns/generic.h:43!
3. During netns shutdown, netns ops should be unregistered after
   key manager unregistered because key manager calls can be triggered
   from xfrm_user module:

   	general protection fault: 0000 [#1] PREEMPT SMP DEBUG_PAGEALLOC
	pfkey_broadcast+0x111/0x210 [af_key]
	pfkey_send_notify+0x16a/0x300 [af_key]
	km_state_notify+0x41/0x70
	xfrm_flush_sa+0x75/0x90 [xfrm_user]
4. Unregister netns ops after socket ops just in case and for symmetry.

Reported by Luca Tettamanti.

Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Tested-by: Luca Tettamanti <kronos.it@gmail.com>
Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/key/af_key.c | 15 ++++++++-------
 1 file changed, 8 insertions(+), 7 deletions(-)

(limited to 'net')

diff --git a/net/key/af_key.c b/net/key/af_key.c
index 76fa6fef6473..539f43bc97db 100644
--- a/net/key/af_key.c
+++ b/net/key/af_key.c
@@ -3794,9 +3794,9 @@ static struct pernet_operations pfkey_net_ops = {
 
 static void __exit ipsec_pfkey_exit(void)
 {
-	unregister_pernet_subsys(&pfkey_net_ops);
 	xfrm_unregister_km(&pfkeyv2_mgr);
 	sock_unregister(PF_KEY);
+	unregister_pernet_subsys(&pfkey_net_ops);
 	proto_unregister(&key_proto);
 }
 
@@ -3807,21 +3807,22 @@ static int __init ipsec_pfkey_init(void)
 	if (err != 0)
 		goto out;
 
-	err = sock_register(&pfkey_family_ops);
+	err = register_pernet_subsys(&pfkey_net_ops);
 	if (err != 0)
 		goto out_unregister_key_proto;
+	err = sock_register(&pfkey_family_ops);
+	if (err != 0)
+		goto out_unregister_pernet;
 	err = xfrm_register_km(&pfkeyv2_mgr);
 	if (err != 0)
 		goto out_sock_unregister;
-	err = register_pernet_subsys(&pfkey_net_ops);
-	if (err != 0)
-		goto out_xfrm_unregister_km;
 out:
 	return err;
-out_xfrm_unregister_km:
-	xfrm_unregister_km(&pfkeyv2_mgr);
+
 out_sock_unregister:
 	sock_unregister(PF_KEY);
+out_unregister_pernet:
+	unregister_pernet_subsys(&pfkey_net_ops);
 out_unregister_key_proto:
 	proto_unregister(&key_proto);
 	goto out;
-- 
cgit v1.2.2


From 974c37e9d88c3e5a3e56eb98cb9c84232eb2bdcb Mon Sep 17 00:00:00 2001
From: Alexey Dobriyan <adobriyan@gmail.com>
Date: Sat, 30 Jan 2010 10:05:05 +0000
Subject: netlink: fix for too early rmmod

Netlink code does module autoload if protocol userspace is asking for is
not ready. However, module can dissapear right after it was autoloaded.
Example: modprobe/rmmod stress-testing and xfrm_user.ko providing NETLINK_XFRM.

netlink_create() in such situation _will_ create userspace socket and
_will_not_ pin module. Now if module was removed and we're going to call
->netlink_rcv into nothing:

BUG: unable to handle kernel paging request at ffffffffa02f842a
					       ^^^^^^^^^^^^^^^^
	modules are loaded near these addresses here

IP: [<ffffffffa02f842a>] 0xffffffffa02f842a
PGD 161f067 PUD 1623063 PMD baa12067 PTE 0
Oops: 0010 [#1] PREEMPT SMP DEBUG_PAGEALLOC
last sysfs file: /sys/devices/pci0000:00/0000:00:1f.2/host0/target0:0:0/0:0:0:0/block/sda/uevent
CPU 1
Pid: 11515, comm: ip Not tainted 2.6.33-rc5-netns-00594-gaaa5728-dirty #6 P5E/P5E
RIP: 0010:[<ffffffffa02f842a>]  [<ffffffffa02f842a>] 0xffffffffa02f842a
RSP: 0018:ffff8800baa3db48  EFLAGS: 00010292
RAX: ffff8800baa3dfd8 RBX: ffff8800be353640 RCX: 0000000000000000
RDX: ffffffff81959380 RSI: ffff8800bab7f130 RDI: 0000000000000001
RBP: ffff8800baa3db58 R08: 0000000000000001 R09: 0000000000000000
R10: 0000000000000001 R11: 0000000000000001 R12: 0000000000000011
R13: ffff8800be353640 R14: ffff8800bcdec240 R15: ffff8800bd488010
FS:  00007f93749656f0(0000) GS:ffff880002300000(0000) knlGS:0000000000000000
CS:  0010 DS: 0000 ES: 0000 CR0: 000000008005003b
CR2: ffffffffa02f842a CR3: 00000000ba82b000 CR4: 00000000000006e0
DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400
Process ip (pid: 11515, threadinfo ffff8800baa3c000, task ffff8800bab7eb30)
Stack:
 ffffffff813637c0 ffff8800bd488000 ffff8800baa3dba8 ffffffff8136397d
<0> 0000000000000000 ffffffff81344adc 7fffffffffffffff 0000000000000000
<0> ffff8800baa3ded8 ffff8800be353640 ffff8800bcdec240 0000000000000000
Call Trace:
 [<ffffffff813637c0>] ? netlink_unicast+0x100/0x2d0
 [<ffffffff8136397d>] netlink_unicast+0x2bd/0x2d0

	netlink_unicast_kernel:
		nlk->netlink_rcv(skb);

 [<ffffffff81344adc>] ? memcpy_fromiovec+0x6c/0x90
 [<ffffffff81364263>] netlink_sendmsg+0x1d3/0x2d0
 [<ffffffff8133975b>] sock_sendmsg+0xbb/0xf0
 [<ffffffff8106cdeb>] ? __lock_acquire+0x27b/0xa60
 [<ffffffff810a18c3>] ? might_fault+0x73/0xd0
 [<ffffffff810a18c3>] ? might_fault+0x73/0xd0
 [<ffffffff8106db22>] ? __lock_release+0x82/0x170
 [<ffffffff810a190e>] ? might_fault+0xbe/0xd0
 [<ffffffff810a18c3>] ? might_fault+0x73/0xd0
 [<ffffffff81344c77>] ? verify_iovec+0x47/0xd0
 [<ffffffff8133a509>] sys_sendmsg+0x1a9/0x360
 [<ffffffff813c2be5>] ? _raw_spin_unlock_irqrestore+0x65/0x70
 [<ffffffff8106aced>] ? trace_hardirqs_on+0xd/0x10
 [<ffffffff813c2bc2>] ? _raw_spin_unlock_irqrestore+0x42/0x70
 [<ffffffff81197004>] ? __up_read+0x84/0xb0
 [<ffffffff8106ac95>] ? trace_hardirqs_on_caller+0x145/0x190
 [<ffffffff813c207f>] ? trace_hardirqs_on_thunk+0x3a/0x3f
 [<ffffffff8100262b>] system_call_fastpath+0x16/0x1b
Code:  Bad RIP value.
RIP  [<ffffffffa02f842a>] 0xffffffffa02f842a
 RSP <ffff8800baa3db48>
CR2: ffffffffa02f842a

If module was quickly removed after autoloading, return -E.

Return -EPROTONOSUPPORT if module was quickly removed after autoloading.

Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/netlink/af_netlink.c | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'net')

diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index a4957bf2ca60..4c5972ba8c78 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -455,9 +455,14 @@ static int netlink_create(struct net *net, struct socket *sock, int protocol,
 	if (nl_table[protocol].registered &&
 	    try_module_get(nl_table[protocol].module))
 		module = nl_table[protocol].module;
+	else
+		err = -EPROTONOSUPPORT;
 	cb_mutex = nl_table[protocol].cb_mutex;
 	netlink_unlock_table();
 
+	if (err < 0)
+		goto out;
+
 	err = __netlink_create(net, sock, cb_mutex, protocol);
 	if (err < 0)
 		goto out_module;
-- 
cgit v1.2.2


From 8ed030dd0aa400d18c63861c2c6deb7c38f4edde Mon Sep 17 00:00:00 2001
From: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Date: Mon, 1 Feb 2010 02:12:19 +0000
Subject: dccp: fix bug in cache allocation

This fixes a bug introduced in commit de4ef86cfce60d2250111f34f8a084e769f23b16
("dccp: fix dccp rmmod when kernel configured to use slub", 17 Jan): the
vsnprintf used sizeof(slab_name_fmt), which became truncated to 4 bytes, since
slab_name_fmt is now a 4-byte pointer and no longer a 32-character array.

This lead to error messages such as
 FATAL: Error inserting dccp: No buffer space available

 >> kernel: [ 1456.341501] kmem_cache_create: duplicate cache cci
generated due to the truncation after the 3rd character.

Fixed for the moment by introducing a symbolic constant. Tested to fix the bug.

Signed-off-by: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Acked-by: Neil Horman <nhorman@tuxdriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/dccp/ccid.c | 2 +-
 net/dccp/ccid.h | 8 +++++---
 2 files changed, 6 insertions(+), 4 deletions(-)

(limited to 'net')

diff --git a/net/dccp/ccid.c b/net/dccp/ccid.c
index 57dfb9c8c4f2..ff16e9df1969 100644
--- a/net/dccp/ccid.c
+++ b/net/dccp/ccid.c
@@ -83,7 +83,7 @@ static struct kmem_cache *ccid_kmem_cache_create(int obj_size, char *slab_name_f
 	va_list args;
 
 	va_start(args, fmt);
-	vsnprintf(slab_name_fmt, sizeof(slab_name_fmt), fmt, args);
+	vsnprintf(slab_name_fmt, CCID_SLAB_NAME_LENGTH, fmt, args);
 	va_end(args);
 
 	slab = kmem_cache_create(slab_name_fmt, sizeof(struct ccid) + obj_size, 0,
diff --git a/net/dccp/ccid.h b/net/dccp/ccid.h
index 269958bf7fe9..6df6f8ac9636 100644
--- a/net/dccp/ccid.h
+++ b/net/dccp/ccid.h
@@ -19,7 +19,9 @@
 #include <linux/list.h>
 #include <linux/module.h>
 
-#define CCID_MAX 255
+/* maximum value for a CCID (RFC 4340, 19.5) */
+#define CCID_MAX		255
+#define CCID_SLAB_NAME_LENGTH	32
 
 struct tcp_info;
 
@@ -49,8 +51,8 @@ struct ccid_operations {
 	const char		*ccid_name;
 	struct kmem_cache	*ccid_hc_rx_slab,
 				*ccid_hc_tx_slab;
-	char			ccid_hc_rx_slab_name[32];
-	char			ccid_hc_tx_slab_name[32];
+	char			ccid_hc_rx_slab_name[CCID_SLAB_NAME_LENGTH];
+	char			ccid_hc_tx_slab_name[CCID_SLAB_NAME_LENGTH];
 	__u32			ccid_hc_rx_obj_size,
 				ccid_hc_tx_obj_size;
 	/* Interface Routines */
-- 
cgit v1.2.2


From 1386be55e32a3c5d8ef4a2b243c530a7b664c02c Mon Sep 17 00:00:00 2001
From: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Date: Tue, 2 Feb 2010 20:16:56 +0000
Subject: dccp: fix auto-loading of dccp(_probe)

This fixes commit (38ff3e6bb987ec583268da8eb22628293095d43b) ("dccp_probe:
Fix module load dependencies between dccp and dccp_probe", from 15 Jan).

It fixes the construction of the first argument of try_then_request_module(),
where only valid return codes from the first argument should be returned.

What we do now is assign the result of register_jprobe() to ret, without
the side effect of the comparison.

Acked-by: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Signed-off-by: Neil Horman <nhorman@tuxdriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/dccp/probe.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/dccp/probe.c b/net/dccp/probe.c
index bace1d8cbcfd..f5b3464f1242 100644
--- a/net/dccp/probe.c
+++ b/net/dccp/probe.c
@@ -161,8 +161,8 @@ static __init int dccpprobe_init(void)
 	if (!proc_net_fops_create(&init_net, procname, S_IRUSR, &dccpprobe_fops))
 		goto err0;
 
-	ret = try_then_request_module((register_jprobe(&dccp_send_probe) == 0),
-					"dccp");
+	try_then_request_module((ret = register_jprobe(&dccp_send_probe)) == 0,
+				"dccp");
 	if (ret)
 		goto err1;
 
-- 
cgit v1.2.2


From c390216b3e868b16d8154939f4b6f8c16dbd9a9f Mon Sep 17 00:00:00 2001
From: Nick Pelly <npelly@google.com>
Date: Fri, 13 Nov 2009 14:16:32 -0800
Subject: Bluetooth: Enter active mode before establishing a SCO link.

When in sniff mode with a long interval time (1.28s) it can take 4+ seconds
to establish a SCO link. Fix by requesting active mode before requesting
SCO connection. This improves SCO setup time to ~500ms.

Bluetooth headsets that use a long interval time, and exhibit the long
SCO connection time include Motorola H790, HX1 and H17. They have a
CSR 2.1 chipset.

Verified this behavior and fix with host Bluetooth chipsets: BCM4329 and
TI1271.

2009-10-13 14:17:46.183722 > HCI Event: Mode Change (0x14) plen 6
    status 0x00 handle 1 mode 0x02 interval 2048
    Mode: Sniff
2009-10-13 14:17:53.436285 < HCI Command: Setup Synchronous Connection (0x01|0x0028) plen 17
    handle 1 voice setting 0x0060
2009-10-13 14:17:53.445593 > HCI Event: Command Status (0x0f) plen 4
    Setup Synchronous Connection (0x01|0x0028) status 0x00 ncmd 1
2009-10-13 14:17:57.788855 > HCI Event: Synchronous Connect Complete 0x2c) plen 17
    status 0x00 handle 257 bdaddr 00:1A:0E:F1:A4:7F type eSCO
    Air mode: CVSD

Signed-off-by: Nick Pelly <npelly@google.com>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 net/bluetooth/hci_conn.c | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'net')

diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c
index b7c4224f4e7d..b10e3cdb08f8 100644
--- a/net/bluetooth/hci_conn.c
+++ b/net/bluetooth/hci_conn.c
@@ -377,6 +377,9 @@ struct hci_conn *hci_connect(struct hci_dev *hdev, int type, bdaddr_t *dst, __u8
 
 	if (acl->state == BT_CONNECTED &&
 			(sco->state == BT_OPEN || sco->state == BT_CLOSED)) {
+		acl->power_save = 1;
+		hci_conn_enter_active_mode(acl);
+
 		if (lmp_esco_capable(hdev))
 			hci_setup_sync(sco, acl->handle);
 		else
-- 
cgit v1.2.2


From 454debe45c86102528c90c12eb6a99245b773bfe Mon Sep 17 00:00:00 2001
From: Thadeu Lima de Souza Cascardo <cascardo@holoscopio.com>
Date: Mon, 1 Feb 2010 08:21:34 +0000
Subject: irda: unbalanced lock_kernel in irnet_ppp

Add the missing unlock_kernel in one ioctl operation.

Signed-off-by: Thadeu Lima de Souza Cascardo <cascardo@holoscopio.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/irda/irnet/irnet_ppp.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/irda/irnet/irnet_ppp.c b/net/irda/irnet/irnet_ppp.c
index 156020d138b5..7c22c126f0ea 100644
--- a/net/irda/irnet/irnet_ppp.c
+++ b/net/irda/irnet/irnet_ppp.c
@@ -706,7 +706,8 @@ dev_irnet_ioctl(
       lock_kernel();
       if(ap->ppp_open && !put_user(ppp_unit_number(&ap->chan),
 						(int __user *)argp))
-      err = 0;
+        err = 0;
+      unlock_kernel();
       break;
 
       /* All these ioctls can be passed both directly and from ppp_generic,
-- 
cgit v1.2.2


From 3fdde0a1602d20c02a7d66e07ab6718ab8d79b12 Mon Sep 17 00:00:00 2001
From: Thadeu Lima de Souza Cascardo <cascardo@holoscopio.com>
Date: Mon, 1 Feb 2010 08:21:35 +0000
Subject: irda: add missing BKL in irnet_ppp ioctl

One ioctl has been forgotten when the BKL was push down into irnet_ppp
ioctl function.

Signed-off-by: Thadeu Lima de Souza Cascardo <cascardo@holoscopio.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/irda/irnet/irnet_ppp.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'net')

diff --git a/net/irda/irnet/irnet_ppp.c b/net/irda/irnet/irnet_ppp.c
index 7c22c126f0ea..6b3602de359a 100644
--- a/net/irda/irnet/irnet_ppp.c
+++ b/net/irda/irnet/irnet_ppp.c
@@ -698,9 +698,11 @@ dev_irnet_ioctl(
 
       /* Query PPP channel and unit number */
     case PPPIOCGCHAN:
+      lock_kernel();
       if(ap->ppp_open && !put_user(ppp_channel_index(&ap->chan),
 						(int __user *)argp))
 	err = 0;
+      unlock_kernel();
       break;
     case PPPIOCGUNIT:
       lock_kernel();
-- 
cgit v1.2.2


From 6884b348ed759184032306c9435a727741a72298 Mon Sep 17 00:00:00 2001
From: Joe Perches <joe@perches.com>
Date: Tue, 2 Feb 2010 12:43:59 +0000
Subject: net/rds: remove uses of NIPQUAD, use %pI4

Signed-off-by: Joe Perches <joe@perches.com>
Cc: Andy Grover <andy.grover@oracle.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/rds/tcp_connect.c | 7 +++----
 net/rds/tcp_listen.c  | 6 +++---
 net/rds/tcp_send.c    | 4 ++--
 3 files changed, 8 insertions(+), 9 deletions(-)

(limited to 'net')

diff --git a/net/rds/tcp_connect.c b/net/rds/tcp_connect.c
index 211522f9a9a2..056256285987 100644
--- a/net/rds/tcp_connect.c
+++ b/net/rds/tcp_connect.c
@@ -90,8 +90,8 @@ int rds_tcp_conn_connect(struct rds_connection *conn)
 
 	ret = sock->ops->bind(sock, (struct sockaddr *)&src, sizeof(src));
 	if (ret) {
-		rdsdebug("bind failed with %d at address %u.%u.%u.%u\n",
-		     ret, NIPQUAD(conn->c_laddr));
+		rdsdebug("bind failed with %d at address %pI4\n",
+			 ret, &conn->c_laddr);
 		goto out;
 	}
 
@@ -108,8 +108,7 @@ int rds_tcp_conn_connect(struct rds_connection *conn)
 				 O_NONBLOCK);
 	sock = NULL;
 
-	rdsdebug("connect to address %u.%u.%u.%u returned %d\n",
-		 NIPQUAD(conn->c_faddr), ret);
+	rdsdebug("connect to address %pI4 returned %d\n", &conn->c_faddr, ret);
 	if (ret == -EINPROGRESS)
 		ret = 0;
 
diff --git a/net/rds/tcp_listen.c b/net/rds/tcp_listen.c
index 45474a436862..53cb1b54165d 100644
--- a/net/rds/tcp_listen.c
+++ b/net/rds/tcp_listen.c
@@ -66,9 +66,9 @@ static int rds_tcp_accept_one(struct socket *sock)
 
 	inet = inet_sk(new_sock->sk);
 
-	rdsdebug("accepted tcp %u.%u.%u.%u:%u -> %u.%u.%u.%u:%u\n",
-		  NIPQUAD(inet->inet_saddr), ntohs(inet->inet_sport),
-		  NIPQUAD(inet->inet_daddr), ntohs(inet->inet_dport));
+	rdsdebug("accepted tcp %pI4:%u -> %pI4:%u\n",
+		 &inet->inet_saddr, ntohs(inet->inet_sport),
+		 &inet->inet_daddr, ntohs(inet->inet_dport));
 
 	conn = rds_conn_create(inet->inet_saddr, inet->inet_daddr,
 			       &rds_tcp_transport, GFP_KERNEL);
diff --git a/net/rds/tcp_send.c b/net/rds/tcp_send.c
index ab545e0cd5d6..34fdcc059e54 100644
--- a/net/rds/tcp_send.c
+++ b/net/rds/tcp_send.c
@@ -193,9 +193,9 @@ out:
 			rds_tcp_stats_inc(s_tcp_sndbuf_full);
 			ret = 0;
 		} else {
-			printk(KERN_WARNING "RDS/tcp: send to %u.%u.%u.%u "
+			printk(KERN_WARNING "RDS/tcp: send to %pI4 "
 			       "returned %d, disconnecting and reconnecting\n",
-			       NIPQUAD(conn->c_faddr), ret);
+			       &conn->c_faddr, ret);
 			rds_conn_drop(conn);
 		}
 	}
-- 
cgit v1.2.2


From 8a83a00b0735190384a348156837918271034144 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Sat, 30 Jan 2010 12:23:03 +0000
Subject: net: maintain namespace isolation between vlan and real device

In the vlan and macvlan drivers, the start_xmit function forwards
data to the dev_queue_xmit function for another device, which may
potentially belong to a different namespace.

To make sure that classification stays within a single namespace,
this resets the potentially critical fields.

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/8021q/vlan_dev.c |  2 +-
 net/core/dev.c       | 35 +++++++++++++++++++++++++++++++----
 2 files changed, 32 insertions(+), 5 deletions(-)

(limited to 'net')

diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c
index a9e1f1785614..9e83272fc5b0 100644
--- a/net/8021q/vlan_dev.c
+++ b/net/8021q/vlan_dev.c
@@ -322,7 +322,7 @@ static netdev_tx_t vlan_dev_hard_start_xmit(struct sk_buff *skb,
 	}
 
 
-	skb->dev = vlan_dev_info(dev)->real_dev;
+	skb_set_dev(skb, vlan_dev_info(dev)->real_dev);
 	len = skb->len;
 	ret = dev_queue_xmit(skb);
 
diff --git a/net/core/dev.c b/net/core/dev.c
index 2cba5c521e56..94c1eeed25e5 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -1448,13 +1448,10 @@ int dev_forward_skb(struct net_device *dev, struct sk_buff *skb)
 	if (skb->len > (dev->mtu + dev->hard_header_len))
 		return NET_RX_DROP;
 
-	skb_dst_drop(skb);
+	skb_set_dev(skb, dev);
 	skb->tstamp.tv64 = 0;
 	skb->pkt_type = PACKET_HOST;
 	skb->protocol = eth_type_trans(skb, dev);
-	skb->mark = 0;
-	secpath_reset(skb);
-	nf_reset(skb);
 	return netif_rx(skb);
 }
 EXPORT_SYMBOL_GPL(dev_forward_skb);
@@ -1614,6 +1611,36 @@ static bool dev_can_checksum(struct net_device *dev, struct sk_buff *skb)
 	return false;
 }
 
+/**
+ * skb_dev_set -- assign a new device to a buffer
+ * @skb: buffer for the new device
+ * @dev: network device
+ *
+ * If an skb is owned by a device already, we have to reset
+ * all data private to the namespace a device belongs to
+ * before assigning it a new device.
+ */
+#ifdef CONFIG_NET_NS
+void skb_set_dev(struct sk_buff *skb, struct net_device *dev)
+{
+	skb_dst_drop(skb);
+	if (skb->dev && !net_eq(dev_net(skb->dev), dev_net(dev))) {
+		secpath_reset(skb);
+		nf_reset(skb);
+		skb_init_secmark(skb);
+		skb->mark = 0;
+		skb->priority = 0;
+		skb->nf_trace = 0;
+		skb->ipvs_property = 0;
+#ifdef CONFIG_NET_SCHED
+		skb->tc_index = 0;
+#endif
+	}
+	skb->dev = dev;
+}
+EXPORT_SYMBOL(skb_set_dev);
+#endif /* CONFIG_NET_NS */
+
 /*
  * Invalidate hardware checksum when packet is to be mangled, and
  * complete checksum manually on outgoing path.
-- 
cgit v1.2.2


From 1b3f720bf033fde1fbb6231f9b156b918c5f68d8 Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rjw@sisk.pl>
Date: Thu, 4 Feb 2010 14:00:41 -0800
Subject: pktgen: Fix freezing problem

Add missing try_to_freeze() to one of the pktgen_thread_worker() code
paths so that it doesn't block suspend/hibernation.

Fixes http://bugzilla.kernel.org/show_bug.cgi?id=15006

Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl>
Reported-and-tested-by: Ciprian Dorin Craciun <ciprian.craciun@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/pktgen.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'net')

diff --git a/net/core/pktgen.c b/net/core/pktgen.c
index de0c2c726420..2e692afdc55d 100644
--- a/net/core/pktgen.c
+++ b/net/core/pktgen.c
@@ -3524,6 +3524,7 @@ static int pktgen_thread_worker(void *arg)
 			wait_event_interruptible_timeout(t->queue,
 							 t->control != 0,
 							 HZ/10);
+			try_to_freeze();
 			continue;
 		}
 
-- 
cgit v1.2.2


From d088dde7b19628f386c486f16cd471f5b5682ca8 Mon Sep 17 00:00:00 2001
From: Christoph Egger <siccegge@stud.informatik.uni-erlangen.de>
Date: Wed, 3 Feb 2010 06:05:54 +0000
Subject: ipv4: obsolete config in kernel source (IP_ROUTE_PERVASIVE)

CONFIG_IP_ROUTE_PERVASIVE is missing a corresponding config
IP_ROUTE_PERVASIVE somewhere in KConfig (and missing it for ages
already) so it looks like some aging artefact no longer needed.

Therefor this patch kills of the only remaining reference to that
config Item removing the already unrechable code snipet.

Signed-off-by: Christoph Egger <siccegge@stud.informatik.uni-erlangen.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/fib_semantics.c | 4 ----
 1 file changed, 4 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c
index 96b21011a3e4..1af0ea0fb6a2 100644
--- a/net/ipv4/fib_semantics.c
+++ b/net/ipv4/fib_semantics.c
@@ -528,10 +528,6 @@ static int fib_check_nh(struct fib_config *cfg, struct fib_info *fi,
 	if (nh->nh_gw) {
 		struct fib_result res;
 
-#ifdef CONFIG_IP_ROUTE_PERVASIVE
-		if (nh->nh_flags&RTNH_F_PERVASIVE)
-			return 0;
-#endif
 		if (nh->nh_flags&RTNH_F_ONLINK) {
 			struct net_device *dev;
 
-- 
cgit v1.2.2


From bfd5f4a3d605e0f6054df0b59fe0907ff7e696d3 Mon Sep 17 00:00:00 2001
From: Sridhar Samudrala <sri@us.ibm.com>
Date: Thu, 4 Feb 2010 20:24:10 -0800
Subject: packet: Add GSO/csum offload support.

This patch adds GSO/checksum offload to af_packet sockets using
virtio_net_hdr. Based on Rusty's patch to add this support to tun.
It allows GSO/checksum offload to be enabled when using raw socket
backend with virtio_net.
Adds PACKET_VNET_HDR socket option to prepend virtio_net_hdr in the
receive path and process/skip virtio_net_hdr in the send path. This
option is only allowed with SOCK_RAW sockets attached to ethernet
type devices.

v2 updates
----------
Michael's Comments
- Perform length check in packet_snd() when GSO is off even when
  vnet_hdr is present.
- Check for SKB_GSO_FCOE type and return -EINVAL
- don't allow tx/rx ring when vnet_hdr is enabled.
Herbert's Comments
- Removed ethernet specific code.
- protocol value is assumed to be passed in by the caller.

Signed-off-by: Sridhar Samudrala <sri@us.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/packet/af_packet.c | 187 ++++++++++++++++++++++++++++++++++++++++++++++---
 1 file changed, 176 insertions(+), 11 deletions(-)

(limited to 'net')

diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index 53633c5fdb1d..178e2937bbaa 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -80,6 +80,7 @@
 #include <linux/init.h>
 #include <linux/mutex.h>
 #include <linux/if_vlan.h>
+#include <linux/virtio_net.h>
 
 #ifdef CONFIG_INET
 #include <net/inet_common.h>
@@ -193,7 +194,8 @@ struct packet_sock {
 	struct mutex		pg_vec_lock;
 	unsigned int		running:1,	/* prot_hook is attached*/
 				auxdata:1,
-				origdev:1;
+				origdev:1,
+				has_vnet_hdr:1;
 	int			ifindex;	/* bound device		*/
 	__be16			num;
 	struct packet_mclist	*mclist;
@@ -1056,6 +1058,30 @@ out:
 }
 #endif
 
+static inline struct sk_buff *packet_alloc_skb(struct sock *sk, size_t prepad,
+					       size_t reserve, size_t len,
+					       size_t linear, int noblock,
+					       int *err)
+{
+	struct sk_buff *skb;
+
+	/* Under a page?  Don't bother with paged skb. */
+	if (prepad + len < PAGE_SIZE || !linear)
+		linear = len;
+
+	skb = sock_alloc_send_pskb(sk, prepad + linear, len - linear, noblock,
+				   err);
+	if (!skb)
+		return NULL;
+
+	skb_reserve(skb, reserve);
+	skb_put(skb, linear);
+	skb->data_len = len - linear;
+	skb->len += len - linear;
+
+	return skb;
+}
+
 static int packet_snd(struct socket *sock,
 			  struct msghdr *msg, size_t len)
 {
@@ -1066,14 +1092,17 @@ static int packet_snd(struct socket *sock,
 	__be16 proto;
 	unsigned char *addr;
 	int ifindex, err, reserve = 0;
+	struct virtio_net_hdr vnet_hdr = { 0 };
+	int offset = 0;
+	int vnet_hdr_len;
+	struct packet_sock *po = pkt_sk(sk);
+	unsigned short gso_type = 0;
 
 	/*
 	 *	Get and verify the address.
 	 */
 
 	if (saddr == NULL) {
-		struct packet_sock *po = pkt_sk(sk);
-
 		ifindex	= po->ifindex;
 		proto	= po->num;
 		addr	= NULL;
@@ -1100,25 +1129,74 @@ static int packet_snd(struct socket *sock,
 	if (!(dev->flags & IFF_UP))
 		goto out_unlock;
 
+	if (po->has_vnet_hdr) {
+		vnet_hdr_len = sizeof(vnet_hdr);
+
+		err = -EINVAL;
+		if (len < vnet_hdr_len)
+			goto out_unlock;
+
+		len -= vnet_hdr_len;
+
+		err = memcpy_fromiovec((void *)&vnet_hdr, msg->msg_iov,
+				       vnet_hdr_len);
+		if (err < 0)
+			goto out_unlock;
+
+		if ((vnet_hdr.flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) &&
+		    (vnet_hdr.csum_start + vnet_hdr.csum_offset + 2 >
+		      vnet_hdr.hdr_len))
+			vnet_hdr.hdr_len = vnet_hdr.csum_start +
+						 vnet_hdr.csum_offset + 2;
+
+		err = -EINVAL;
+		if (vnet_hdr.hdr_len > len)
+			goto out_unlock;
+
+		if (vnet_hdr.gso_type != VIRTIO_NET_HDR_GSO_NONE) {
+			switch (vnet_hdr.gso_type & ~VIRTIO_NET_HDR_GSO_ECN) {
+			case VIRTIO_NET_HDR_GSO_TCPV4:
+				gso_type = SKB_GSO_TCPV4;
+				break;
+			case VIRTIO_NET_HDR_GSO_TCPV6:
+				gso_type = SKB_GSO_TCPV6;
+				break;
+			case VIRTIO_NET_HDR_GSO_UDP:
+				gso_type = SKB_GSO_UDP;
+				break;
+			default:
+				goto out_unlock;
+			}
+
+			if (vnet_hdr.gso_type & VIRTIO_NET_HDR_GSO_ECN)
+				gso_type |= SKB_GSO_TCP_ECN;
+
+			if (vnet_hdr.gso_size == 0)
+				goto out_unlock;
+
+		}
+	}
+
 	err = -EMSGSIZE;
-	if (len > dev->mtu+reserve)
+	if (!gso_type && (len > dev->mtu+reserve))
 		goto out_unlock;
 
-	skb = sock_alloc_send_skb(sk, len + LL_ALLOCATED_SPACE(dev),
-				msg->msg_flags & MSG_DONTWAIT, &err);
+	err = -ENOBUFS;
+	skb = packet_alloc_skb(sk, LL_ALLOCATED_SPACE(dev),
+			       LL_RESERVED_SPACE(dev), len, vnet_hdr.hdr_len,
+			       msg->msg_flags & MSG_DONTWAIT, &err);
 	if (skb == NULL)
 		goto out_unlock;
 
-	skb_reserve(skb, LL_RESERVED_SPACE(dev));
-	skb_reset_network_header(skb);
+	skb_set_network_header(skb, reserve);
 
 	err = -EINVAL;
 	if (sock->type == SOCK_DGRAM &&
-	    dev_hard_header(skb, dev, ntohs(proto), addr, NULL, len) < 0)
+	    (offset = dev_hard_header(skb, dev, ntohs(proto), addr, NULL, len)) < 0)
 		goto out_free;
 
 	/* Returns -EFAULT on error */
-	err = memcpy_fromiovec(skb_put(skb, len), msg->msg_iov, len);
+	err = skb_copy_datagram_from_iovec(skb, offset, msg->msg_iov, 0, len);
 	if (err)
 		goto out_free;
 
@@ -1127,6 +1205,25 @@ static int packet_snd(struct socket *sock,
 	skb->priority = sk->sk_priority;
 	skb->mark = sk->sk_mark;
 
+	if (po->has_vnet_hdr) {
+		if (vnet_hdr.flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) {
+			if (!skb_partial_csum_set(skb, vnet_hdr.csum_start,
+						  vnet_hdr.csum_offset)) {
+				err = -EINVAL;
+				goto out_free;
+			}
+		}
+
+		skb_shinfo(skb)->gso_size = vnet_hdr.gso_size;
+		skb_shinfo(skb)->gso_type = gso_type;
+
+		/* Header must be checked, and gso_segs computed. */
+		skb_shinfo(skb)->gso_type |= SKB_GSO_DODGY;
+		skb_shinfo(skb)->gso_segs = 0;
+
+		len += vnet_hdr_len;
+	}
+
 	/*
 	 *	Now send it
 	 */
@@ -1420,6 +1517,7 @@ static int packet_recvmsg(struct kiocb *iocb, struct socket *sock,
 	struct sk_buff *skb;
 	int copied, err;
 	struct sockaddr_ll *sll;
+	int vnet_hdr_len = 0;
 
 	err = -EINVAL;
 	if (flags & ~(MSG_PEEK|MSG_DONTWAIT|MSG_TRUNC|MSG_CMSG_COMPAT))
@@ -1451,6 +1549,48 @@ static int packet_recvmsg(struct kiocb *iocb, struct socket *sock,
 	if (skb == NULL)
 		goto out;
 
+	if (pkt_sk(sk)->has_vnet_hdr) {
+		struct virtio_net_hdr vnet_hdr = { 0 };
+
+		err = -EINVAL;
+		vnet_hdr_len = sizeof(vnet_hdr);
+		if ((len -= vnet_hdr_len) < 0)
+			goto out_free;
+
+		if (skb_is_gso(skb)) {
+			struct skb_shared_info *sinfo = skb_shinfo(skb);
+
+			/* This is a hint as to how much should be linear. */
+			vnet_hdr.hdr_len = skb_headlen(skb);
+			vnet_hdr.gso_size = sinfo->gso_size;
+			if (sinfo->gso_type & SKB_GSO_TCPV4)
+				vnet_hdr.gso_type = VIRTIO_NET_HDR_GSO_TCPV4;
+			else if (sinfo->gso_type & SKB_GSO_TCPV6)
+				vnet_hdr.gso_type = VIRTIO_NET_HDR_GSO_TCPV6;
+			else if (sinfo->gso_type & SKB_GSO_UDP)
+				vnet_hdr.gso_type = VIRTIO_NET_HDR_GSO_UDP;
+			else if (sinfo->gso_type & SKB_GSO_FCOE)
+				goto out_free;
+			else
+				BUG();
+			if (sinfo->gso_type & SKB_GSO_TCP_ECN)
+				vnet_hdr.gso_type |= VIRTIO_NET_HDR_GSO_ECN;
+		} else
+			vnet_hdr.gso_type = VIRTIO_NET_HDR_GSO_NONE;
+
+		if (skb->ip_summed == CHECKSUM_PARTIAL) {
+			vnet_hdr.flags = VIRTIO_NET_HDR_F_NEEDS_CSUM;
+			vnet_hdr.csum_start = skb->csum_start -
+							skb_headroom(skb);
+			vnet_hdr.csum_offset = skb->csum_offset;
+		} /* else everything is zero */
+
+		err = memcpy_toiovec(msg->msg_iov, (void *)&vnet_hdr,
+				     vnet_hdr_len);
+		if (err < 0)
+			goto out_free;
+	}
+
 	/*
 	 *	If the address length field is there to be filled in, we fill
 	 *	it in now.
@@ -1502,7 +1642,7 @@ static int packet_recvmsg(struct kiocb *iocb, struct socket *sock,
 	 *	Free or return the buffer as appropriate. Again this
 	 *	hides all the races and re-entrancy issues from us.
 	 */
-	err = (flags&MSG_TRUNC) ? skb->len : copied;
+	err = vnet_hdr_len + ((flags&MSG_TRUNC) ? skb->len : copied);
 
 out_free:
 	skb_free_datagram(sk, skb);
@@ -1740,6 +1880,8 @@ packet_setsockopt(struct socket *sock, int level, int optname, char __user *optv
 
 		if (optlen < sizeof(req))
 			return -EINVAL;
+		if (pkt_sk(sk)->has_vnet_hdr)
+			return -EINVAL;
 		if (copy_from_user(&req, optval, sizeof(req)))
 			return -EFAULT;
 		return packet_set_ring(sk, &req, 0, optname == PACKET_TX_RING);
@@ -1826,6 +1968,22 @@ packet_setsockopt(struct socket *sock, int level, int optname, char __user *optv
 		po->origdev = !!val;
 		return 0;
 	}
+	case PACKET_VNET_HDR:
+	{
+		int val;
+
+		if (sock->type != SOCK_RAW)
+			return -EINVAL;
+		if (po->rx_ring.pg_vec || po->tx_ring.pg_vec)
+			return -EBUSY;
+		if (optlen < sizeof(val))
+			return -EINVAL;
+		if (copy_from_user(&val, optval, sizeof(val)))
+			return -EFAULT;
+
+		po->has_vnet_hdr = !!val;
+		return 0;
+	}
 	default:
 		return -ENOPROTOOPT;
 	}
@@ -1874,6 +2032,13 @@ static int packet_getsockopt(struct socket *sock, int level, int optname,
 			len = sizeof(int);
 		val = po->origdev;
 
+		data = &val;
+		break;
+	case PACKET_VNET_HDR:
+		if (len > sizeof(int))
+			len = sizeof(int);
+		val = po->has_vnet_hdr;
+
 		data = &val;
 		break;
 #ifdef CONFIG_PACKET_MMAP
-- 
cgit v1.2.2


From 570930fe1ee497de6aafd895dbe80116cb10525f Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Thu, 4 Feb 2010 20:28:48 -0800
Subject: bridge: Remove unused age_list

This patch removes the unused age_list member from the net_bridge
structure.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Acked-by: Stephen Hemminger <shemminger@vyatta.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/bridge/br_if.c      | 2 --
 net/bridge/br_private.h | 1 -
 2 files changed, 3 deletions(-)

(limited to 'net')

diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c
index 7bc0604069c7..bc2b1badab88 100644
--- a/net/bridge/br_if.c
+++ b/net/bridge/br_if.c
@@ -206,8 +206,6 @@ static struct net_device *new_bridge_dev(struct net *net, const char *name)
 
 	br_netfilter_rtable_init(br);
 
-	INIT_LIST_HEAD(&br->age_list);
-
 	br_stp_timer_init(br);
 
 	return dev;
diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h
index 2114e45682ea..1f0c4f44b765 100644
--- a/net/bridge/br_private.h
+++ b/net/bridge/br_private.h
@@ -93,7 +93,6 @@ struct net_bridge
 	struct net_device		*dev;
 	spinlock_t			hash_lock;
 	struct hlist_head		hash[BR_HASH_SIZE];
-	struct list_head		age_list;
 	unsigned long			feature_mask;
 #ifdef CONFIG_BRIDGE_NETFILTER
 	struct rtable 			fake_rtable;
-- 
cgit v1.2.2


From 15c697ce1c5b408c5e20dcdc6aea2968d1125b75 Mon Sep 17 00:00:00 2001
From: Michael Poole <mdpoole@troilus.org>
Date: Fri, 5 Feb 2010 12:23:43 -0500
Subject: Bluetooth: Keep a copy of each HID device's report descriptor

The report descriptor is read by user space (via the Service
Discovery Protocol), so it is only available during the ioctl
to connect. However, the HID probe function that needs the
descriptor might not be called until a specific module is
loaded. Keep a copy of the descriptor so it is available for
later use.

Signed-off-by: Michael Poole <mdpoole@troilus.org>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 net/bluetooth/hidp/core.c | 49 +++++++++++++++++++++++------------------------
 net/bluetooth/hidp/hidp.h |  4 +++-
 2 files changed, 27 insertions(+), 26 deletions(-)

(limited to 'net')

diff --git a/net/bluetooth/hidp/core.c b/net/bluetooth/hidp/core.c
index 6cf526d06e21..fc6ec1e72652 100644
--- a/net/bluetooth/hidp/core.c
+++ b/net/bluetooth/hidp/core.c
@@ -703,29 +703,9 @@ static void hidp_close(struct hid_device *hid)
 static int hidp_parse(struct hid_device *hid)
 {
 	struct hidp_session *session = hid->driver_data;
-	struct hidp_connadd_req *req = session->req;
-	unsigned char *buf;
-	int ret;
-
-	buf = kmalloc(req->rd_size, GFP_KERNEL);
-	if (!buf)
-		return -ENOMEM;
-
-	if (copy_from_user(buf, req->rd_data, req->rd_size)) {
-		kfree(buf);
-		return -EFAULT;
-	}
-
-	ret = hid_parse_report(session->hid, buf, req->rd_size);
-
-	kfree(buf);
-
-	if (ret)
-		return ret;
-
-	session->req = NULL;
 
-	return 0;
+	return hid_parse_report(session->hid, session->rd_data,
+			session->rd_size);
 }
 
 static int hidp_start(struct hid_device *hid)
@@ -770,12 +750,24 @@ static int hidp_setup_hid(struct hidp_session *session,
 	bdaddr_t src, dst;
 	int err;
 
+	session->rd_data = kzalloc(req->rd_size, GFP_KERNEL);
+	if (!session->rd_data)
+		return -ENOMEM;
+
+	if (copy_from_user(session->rd_data, req->rd_data, req->rd_size)) {
+		err = -EFAULT;
+		goto fault;
+	}
+	session->rd_size = req->rd_size;
+
 	hid = hid_allocate_device();
-	if (IS_ERR(hid))
-		return PTR_ERR(hid);
+	if (IS_ERR(hid)) {
+		err = PTR_ERR(hid);
+		goto fault;
+	}
 
 	session->hid = hid;
-	session->req = req;
+
 	hid->driver_data = session;
 
 	baswap(&src, &bt_sk(session->ctrl_sock->sk)->src);
@@ -806,6 +798,10 @@ failed:
 	hid_destroy_device(hid);
 	session->hid = NULL;
 
+fault:
+	kfree(session->rd_data);
+	session->rd_data = NULL;
+
 	return err;
 }
 
@@ -900,6 +896,9 @@ unlink:
 		session->hid = NULL;
 	}
 
+	kfree(session->rd_data);
+	session->rd_data = NULL;
+
 purge:
 	skb_queue_purge(&session->ctrl_transmit);
 	skb_queue_purge(&session->intr_transmit);
diff --git a/net/bluetooth/hidp/hidp.h b/net/bluetooth/hidp/hidp.h
index faf3d74c3586..a4e215d50c10 100644
--- a/net/bluetooth/hidp/hidp.h
+++ b/net/bluetooth/hidp/hidp.h
@@ -154,7 +154,9 @@ struct hidp_session {
 	struct sk_buff_head ctrl_transmit;
 	struct sk_buff_head intr_transmit;
 
-	struct hidp_connadd_req *req;
+	/* Report descriptor */
+	__u8 *rd_data;
+	uint rd_size;
 };
 
 static inline void hidp_schedule(struct hidp_session *session)
-- 
cgit v1.2.2


From 889b8f964f2f226b7cd5a0a515109e3d8d9d1613 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Fri, 5 Feb 2010 16:29:48 -0800
Subject: packet: Kill CONFIG_PACKET_MMAP.

Early on this was an experimental facility that few
people other than Alexey Kuznetsov played with.

Now it's a pretty fundamental thing and as people add
more features to AF_PACKET sockets this config options
creates ifdef spaghetti.

So kill it off.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/packet/Kconfig     | 10 ----------
 net/packet/af_packet.c | 29 -----------------------------
 2 files changed, 39 deletions(-)

(limited to 'net')

diff --git a/net/packet/Kconfig b/net/packet/Kconfig
index 34ff93ff894d..0060e3b396b7 100644
--- a/net/packet/Kconfig
+++ b/net/packet/Kconfig
@@ -14,13 +14,3 @@ config PACKET
 	  be called af_packet.
 
 	  If unsure, say Y.
-
-config PACKET_MMAP
-	bool "Packet socket: mmapped IO"
-	depends on PACKET
-	help
-	  If you say Y here, the Packet protocol driver will use an IO
-	  mechanism that results in faster communication.
-
-	  If unsure, say N.
-
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index 178e2937bbaa..6ecb426bc0cf 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -157,7 +157,6 @@ struct packet_mreq_max {
 	unsigned char	mr_address[MAX_ADDR_LEN];
 };
 
-#ifdef CONFIG_PACKET_MMAP
 static int packet_set_ring(struct sock *sk, struct tpacket_req *req,
 		int closing, int tx_ring);
 
@@ -177,7 +176,6 @@ struct packet_ring_buffer {
 
 struct packet_sock;
 static int tpacket_snd(struct packet_sock *po, struct msghdr *msg);
-#endif
 
 static void packet_flush_mclist(struct sock *sk);
 
@@ -185,11 +183,9 @@ struct packet_sock {
 	/* struct sock has to be the first member of packet_sock */
 	struct sock		sk;
 	struct tpacket_stats	stats;
-#ifdef CONFIG_PACKET_MMAP
 	struct packet_ring_buffer	rx_ring;
 	struct packet_ring_buffer	tx_ring;
 	int			copy_thresh;
-#endif
 	spinlock_t		bind_lock;
 	struct mutex		pg_vec_lock;
 	unsigned int		running:1,	/* prot_hook is attached*/
@@ -199,13 +195,11 @@ struct packet_sock {
 	int			ifindex;	/* bound device		*/
 	__be16			num;
 	struct packet_mclist	*mclist;
-#ifdef CONFIG_PACKET_MMAP
 	atomic_t		mapped;
 	enum tpacket_versions	tp_version;
 	unsigned int		tp_hdrlen;
 	unsigned int		tp_reserve;
 	unsigned int		tp_loss:1;
-#endif
 	struct packet_type	prot_hook ____cacheline_aligned_in_smp;
 };
 
@@ -219,8 +213,6 @@ struct packet_skb_cb {
 
 #define PACKET_SKB_CB(__skb)	((struct packet_skb_cb *)((__skb)->cb))
 
-#ifdef CONFIG_PACKET_MMAP
-
 static void __packet_set_status(struct packet_sock *po, void *frame, int status)
 {
 	union {
@@ -315,8 +307,6 @@ static inline void packet_increment_head(struct packet_ring_buffer *buff)
 	buff->head = buff->head != buff->frame_max ? buff->head+1 : 0;
 }
 
-#endif
-
 static inline struct packet_sock *pkt_sk(struct sock *sk)
 {
 	return (struct packet_sock *)sk;
@@ -640,7 +630,6 @@ drop:
 	return 0;
 }
 
-#ifdef CONFIG_PACKET_MMAP
 static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev,
 		       struct packet_type *pt, struct net_device *orig_dev)
 {
@@ -1056,7 +1045,6 @@ out:
 	mutex_unlock(&po->pg_vec_lock);
 	return err;
 }
-#endif
 
 static inline struct sk_buff *packet_alloc_skb(struct sock *sk, size_t prepad,
 					       size_t reserve, size_t len,
@@ -1248,13 +1236,11 @@ out:
 static int packet_sendmsg(struct kiocb *iocb, struct socket *sock,
 		struct msghdr *msg, size_t len)
 {
-#ifdef CONFIG_PACKET_MMAP
 	struct sock *sk = sock->sk;
 	struct packet_sock *po = pkt_sk(sk);
 	if (po->tx_ring.pg_vec)
 		return tpacket_snd(po, msg);
 	else
-#endif
 		return packet_snd(sock, msg, len);
 }
 
@@ -1268,9 +1254,7 @@ static int packet_release(struct socket *sock)
 	struct sock *sk = sock->sk;
 	struct packet_sock *po;
 	struct net *net;
-#ifdef CONFIG_PACKET_MMAP
 	struct tpacket_req req;
-#endif
 
 	if (!sk)
 		return 0;
@@ -1299,7 +1283,6 @@ static int packet_release(struct socket *sock)
 
 	packet_flush_mclist(sk);
 
-#ifdef CONFIG_PACKET_MMAP
 	memset(&req, 0, sizeof(req));
 
 	if (po->rx_ring.pg_vec)
@@ -1307,7 +1290,6 @@ static int packet_release(struct socket *sock)
 
 	if (po->tx_ring.pg_vec)
 		packet_set_ring(sk, &req, 1, 1);
-#endif
 
 	/*
 	 *	Now the socket is dead. No more input will appear.
@@ -1872,7 +1854,6 @@ packet_setsockopt(struct socket *sock, int level, int optname, char __user *optv
 		return ret;
 	}
 
-#ifdef CONFIG_PACKET_MMAP
 	case PACKET_RX_RING:
 	case PACKET_TX_RING:
 	{
@@ -1943,7 +1924,6 @@ packet_setsockopt(struct socket *sock, int level, int optname, char __user *optv
 		po->tp_loss = !!val;
 		return 0;
 	}
-#endif
 	case PACKET_AUXDATA:
 	{
 		int val;
@@ -2041,7 +2021,6 @@ static int packet_getsockopt(struct socket *sock, int level, int optname,
 
 		data = &val;
 		break;
-#ifdef CONFIG_PACKET_MMAP
 	case PACKET_VERSION:
 		if (len > sizeof(int))
 			len = sizeof(int);
@@ -2077,7 +2056,6 @@ static int packet_getsockopt(struct socket *sock, int level, int optname,
 		val = po->tp_loss;
 		data = &val;
 		break;
-#endif
 	default:
 		return -ENOPROTOOPT;
 	}
@@ -2197,11 +2175,6 @@ static int packet_ioctl(struct socket *sock, unsigned int cmd,
 	return 0;
 }
 
-#ifndef CONFIG_PACKET_MMAP
-#define packet_mmap sock_no_mmap
-#define packet_poll datagram_poll
-#else
-
 static unsigned int packet_poll(struct file *file, struct socket *sock,
 				poll_table *wait)
 {
@@ -2483,8 +2456,6 @@ out:
 	mutex_unlock(&po->pg_vec_lock);
 	return err;
 }
-#endif
-
 
 static const struct proto_ops packet_ops_spkt = {
 	.family =	PF_PACKET,
-- 
cgit v1.2.2


From 76780373190d7e8ddfb6fed06aef068e2445c743 Mon Sep 17 00:00:00 2001
From: Florian Westphal <fwestphal@astaro.com>
Date: Mon, 8 Feb 2010 15:39:16 +0100
Subject: netfilter: fix build failure with CONNTRACK=y NAT=n

net/ipv4/netfilter/nf_defrag_ipv4.c: In function 'ipv4_conntrack_defrag':
net/ipv4/netfilter/nf_defrag_ipv4.c:62: error: implicit declaration of function 'nf_ct_is_template'

Signed-off-by: Florian Westphal <fwestphal@astaro.com>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 net/ipv4/netfilter/nf_defrag_ipv4.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'net')

diff --git a/net/ipv4/netfilter/nf_defrag_ipv4.c b/net/ipv4/netfilter/nf_defrag_ipv4.c
index 77627fa80561..f6f46686cbc0 100644
--- a/net/ipv4/netfilter/nf_defrag_ipv4.c
+++ b/net/ipv4/netfilter/nf_defrag_ipv4.c
@@ -17,6 +17,7 @@
 #include <linux/netfilter_bridge.h>
 #include <linux/netfilter_ipv4.h>
 #include <net/netfilter/ipv4/nf_defrag_ipv4.h>
+#include <net/netfilter/nf_conntrack.h>
 
 /* Returns new sk_buff, or NULL */
 static int nf_ct_ipv4_gather_frags(struct sk_buff *skb, u_int32_t user)
-- 
cgit v1.2.2


From 9edd7ca0a3e3999c260642c92fa008892d82ca6e Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Mon, 8 Feb 2010 11:16:26 -0800
Subject: netfilter: nf_conntrack: fix memory corruption with multiple
 namespaces

As discovered by Jon Masters <jonathan@jonmasters.org>, the "untracked"
conntrack, which is located in the data section, might be accidentally
freed when a new namespace is instantiated while the untracked conntrack
is attached to a skb because the reference count it re-initialized.

The best fix would be to use a seperate untracked conntrack per
namespace since it includes a namespace pointer. Unfortunately this is
not possible without larger changes since the namespace is not easily
available everywhere we need it. For now move the untracked conntrack
initialization to the init_net setup function to make sure the reference
count is not re-initialized and handle cleanup in the init_net cleanup
function to make sure namespaces can exit properly while the untracked
conntrack is in use in other namespaces.

Cc: stable@kernel.org
Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/netfilter/nf_conntrack_core.c | 24 ++++++++++++------------
 1 file changed, 12 insertions(+), 12 deletions(-)

(limited to 'net')

diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index 0e98c3282d42..37e2b88313f2 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -1113,6 +1113,10 @@ static void nf_ct_release_dying_list(struct net *net)
 
 static void nf_conntrack_cleanup_init_net(void)
 {
+	/* wait until all references to nf_conntrack_untracked are dropped */
+	while (atomic_read(&nf_conntrack_untracked.ct_general.use) > 1)
+		schedule();
+
 	nf_conntrack_helper_fini();
 	nf_conntrack_proto_fini();
 	kmem_cache_destroy(nf_conntrack_cachep);
@@ -1127,9 +1131,6 @@ static void nf_conntrack_cleanup_net(struct net *net)
 		schedule();
 		goto i_see_dead_people;
 	}
-	/* wait until all references to nf_conntrack_untracked are dropped */
-	while (atomic_read(&nf_conntrack_untracked.ct_general.use) > 1)
-		schedule();
 
 	nf_ct_free_hashtable(net->ct.hash, net->ct.hash_vmalloc,
 			     nf_conntrack_htable_size);
@@ -1288,6 +1289,14 @@ static int nf_conntrack_init_init_net(void)
 	if (ret < 0)
 		goto err_helper;
 
+	/* Set up fake conntrack: to never be deleted, not in any hashes */
+#ifdef CONFIG_NET_NS
+	nf_conntrack_untracked.ct_net = &init_net;
+#endif
+	atomic_set(&nf_conntrack_untracked.ct_general.use, 1);
+	/*  - and look it like as a confirmed connection */
+	set_bit(IPS_CONFIRMED_BIT, &nf_conntrack_untracked.status);
+
 	return 0;
 
 err_helper:
@@ -1333,15 +1342,6 @@ static int nf_conntrack_init_net(struct net *net)
 	if (ret < 0)
 		goto err_ecache;
 
-	/* Set up fake conntrack:
-	    - to never be deleted, not in any hashes */
-#ifdef CONFIG_NET_NS
-	nf_conntrack_untracked.ct_net = &init_net;
-#endif
-	atomic_set(&nf_conntrack_untracked.ct_general.use, 1);
-	/*  - and look it like as a confirmed connection */
-	set_bit(IPS_CONFIRMED_BIT, &nf_conntrack_untracked.status);
-
 	return 0;
 
 err_ecache:
-- 
cgit v1.2.2


From 5b3501faa8741d50617ce4191c20061c6ef36cb3 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Mon, 8 Feb 2010 11:16:56 -0800
Subject: netfilter: nf_conntrack: per netns nf_conntrack_cachep

nf_conntrack_cachep is currently shared by all netns instances, but
because of SLAB_DESTROY_BY_RCU special semantics, this is wrong.

If we use a shared slab cache, one object can instantly flight between
one hash table (netns ONE) to another one (netns TWO), and concurrent
reader (doing a lookup in netns ONE, 'finding' an object of netns TWO)
can be fooled without notice, because no RCU grace period has to be
observed between object freeing and its reuse.

We dont have this problem with UDP/TCP slab caches because TCP/UDP
hashtables are global to the machine (and each object has a pointer to
its netns).

If we use per netns conntrack hash tables, we also *must* use per netns
conntrack slab caches, to guarantee an object can not escape from one
namespace to another one.

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
[Patrick: added unique slab name allocation]
Cc: stable@kernel.org
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 net/netfilter/nf_conntrack_core.c | 39 +++++++++++++++++++++++----------------
 1 file changed, 23 insertions(+), 16 deletions(-)

(limited to 'net')

diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index 37e2b88313f2..9de4bd4c0dd7 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -63,8 +63,6 @@ EXPORT_SYMBOL_GPL(nf_conntrack_max);
 struct nf_conn nf_conntrack_untracked __read_mostly;
 EXPORT_SYMBOL_GPL(nf_conntrack_untracked);
 
-static struct kmem_cache *nf_conntrack_cachep __read_mostly;
-
 static int nf_conntrack_hash_rnd_initted;
 static unsigned int nf_conntrack_hash_rnd;
 
@@ -572,7 +570,7 @@ struct nf_conn *nf_conntrack_alloc(struct net *net,
 	 * Do not use kmem_cache_zalloc(), as this cache uses
 	 * SLAB_DESTROY_BY_RCU.
 	 */
-	ct = kmem_cache_alloc(nf_conntrack_cachep, gfp);
+	ct = kmem_cache_alloc(net->ct.nf_conntrack_cachep, gfp);
 	if (ct == NULL) {
 		pr_debug("nf_conntrack_alloc: Can't alloc conntrack.\n");
 		atomic_dec(&net->ct.count);
@@ -611,7 +609,7 @@ void nf_conntrack_free(struct nf_conn *ct)
 	nf_ct_ext_destroy(ct);
 	atomic_dec(&net->ct.count);
 	nf_ct_ext_free(ct);
-	kmem_cache_free(nf_conntrack_cachep, ct);
+	kmem_cache_free(net->ct.nf_conntrack_cachep, ct);
 }
 EXPORT_SYMBOL_GPL(nf_conntrack_free);
 
@@ -1119,7 +1117,6 @@ static void nf_conntrack_cleanup_init_net(void)
 
 	nf_conntrack_helper_fini();
 	nf_conntrack_proto_fini();
-	kmem_cache_destroy(nf_conntrack_cachep);
 }
 
 static void nf_conntrack_cleanup_net(struct net *net)
@@ -1137,6 +1134,8 @@ static void nf_conntrack_cleanup_net(struct net *net)
 	nf_conntrack_ecache_fini(net);
 	nf_conntrack_acct_fini(net);
 	nf_conntrack_expect_fini(net);
+	kmem_cache_destroy(net->ct.nf_conntrack_cachep);
+	kfree(net->ct.slabname);
 	free_percpu(net->ct.stat);
 }
 
@@ -1272,15 +1271,6 @@ static int nf_conntrack_init_init_net(void)
 	       NF_CONNTRACK_VERSION, nf_conntrack_htable_size,
 	       nf_conntrack_max);
 
-	nf_conntrack_cachep = kmem_cache_create("nf_conntrack",
-						sizeof(struct nf_conn),
-						0, SLAB_DESTROY_BY_RCU, NULL);
-	if (!nf_conntrack_cachep) {
-		printk(KERN_ERR "Unable to create nf_conn slab cache\n");
-		ret = -ENOMEM;
-		goto err_cache;
-	}
-
 	ret = nf_conntrack_proto_init();
 	if (ret < 0)
 		goto err_proto;
@@ -1302,8 +1292,6 @@ static int nf_conntrack_init_init_net(void)
 err_helper:
 	nf_conntrack_proto_fini();
 err_proto:
-	kmem_cache_destroy(nf_conntrack_cachep);
-err_cache:
 	return ret;
 }
 
@@ -1325,6 +1313,21 @@ static int nf_conntrack_init_net(struct net *net)
 		ret = -ENOMEM;
 		goto err_stat;
 	}
+
+	net->ct.slabname = kasprintf(GFP_KERNEL, "nf_conntrack_%p", net);
+	if (!net->ct.slabname) {
+		ret = -ENOMEM;
+		goto err_slabname;
+	}
+
+	net->ct.nf_conntrack_cachep = kmem_cache_create(net->ct.slabname,
+							sizeof(struct nf_conn), 0,
+							SLAB_DESTROY_BY_RCU, NULL);
+	if (!net->ct.nf_conntrack_cachep) {
+		printk(KERN_ERR "Unable to create nf_conn slab cache\n");
+		ret = -ENOMEM;
+		goto err_cache;
+	}
 	net->ct.hash = nf_ct_alloc_hashtable(&nf_conntrack_htable_size,
 					     &net->ct.hash_vmalloc, 1);
 	if (!net->ct.hash) {
@@ -1352,6 +1355,10 @@ err_expect:
 	nf_ct_free_hashtable(net->ct.hash, net->ct.hash_vmalloc,
 			     nf_conntrack_htable_size);
 err_hash:
+	kmem_cache_destroy(net->ct.nf_conntrack_cachep);
+err_cache:
+	kfree(net->ct.slabname);
+err_slabname:
 	free_percpu(net->ct.stat);
 err_stat:
 	return ret;
-- 
cgit v1.2.2


From 13ccdfc2af03e09e60791f7d4bc4ccf53398af7c Mon Sep 17 00:00:00 2001
From: Alexey Dobriyan <adobriyan@gmail.com>
Date: Mon, 8 Feb 2010 11:17:22 -0800
Subject: netfilter: nf_conntrack: restrict runtime expect hashsize
 modifications

Expectation hashtable size was simply glued to a variable with no code
to rehash expectations, so it was a bug to allow writing to it.
Make "expect_hashsize" readonly.

Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Cc: stable@kernel.org
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 net/netfilter/nf_conntrack_expect.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/netfilter/nf_conntrack_expect.c b/net/netfilter/nf_conntrack_expect.c
index fdf5d2a1d9b4..4ad7d1d809af 100644
--- a/net/netfilter/nf_conntrack_expect.c
+++ b/net/netfilter/nf_conntrack_expect.c
@@ -569,7 +569,7 @@ static void exp_proc_remove(struct net *net)
 #endif /* CONFIG_PROC_FS */
 }
 
-module_param_named(expect_hashsize, nf_ct_expect_hsize, uint, 0600);
+module_param_named(expect_hashsize, nf_ct_expect_hsize, uint, 0400);
 
 int nf_conntrack_expect_init(struct net *net)
 {
-- 
cgit v1.2.2


From 14c7dbe043d01a83a30633ab6b109ba2ac61d9f7 Mon Sep 17 00:00:00 2001
From: Alexey Dobriyan <adobriyan@gmail.com>
Date: Mon, 8 Feb 2010 11:17:43 -0800
Subject: netfilter: xtables: compat out of scope fix

As per C99 6.2.4(2) when temporary table data goes out of scope,
the behaviour is undefined:

	if (compat) {
		struct foo tmp;
		...
		private = &tmp;
	}
	[dereference private]

Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Cc: stable@kernel.org
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 net/ipv4/netfilter/arp_tables.c | 4 ++--
 net/ipv4/netfilter/ip_tables.c  | 4 ++--
 net/ipv6/netfilter/ip6_tables.c | 4 ++--
 3 files changed, 6 insertions(+), 6 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c
index 06632762ba5f..90203e1b9187 100644
--- a/net/ipv4/netfilter/arp_tables.c
+++ b/net/ipv4/netfilter/arp_tables.c
@@ -925,10 +925,10 @@ static int get_info(struct net *net, void __user *user, int *len, int compat)
 	if (t && !IS_ERR(t)) {
 		struct arpt_getinfo info;
 		const struct xt_table_info *private = t->private;
-
 #ifdef CONFIG_COMPAT
+		struct xt_table_info tmp;
+
 		if (compat) {
-			struct xt_table_info tmp;
 			ret = compat_table_info(private, &tmp);
 			xt_compat_flush_offsets(NFPROTO_ARP);
 			private = &tmp;
diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c
index 572330a552ef..3ce53cf13d5a 100644
--- a/net/ipv4/netfilter/ip_tables.c
+++ b/net/ipv4/netfilter/ip_tables.c
@@ -1132,10 +1132,10 @@ static int get_info(struct net *net, void __user *user, int *len, int compat)
 	if (t && !IS_ERR(t)) {
 		struct ipt_getinfo info;
 		const struct xt_table_info *private = t->private;
-
 #ifdef CONFIG_COMPAT
+		struct xt_table_info tmp;
+
 		if (compat) {
-			struct xt_table_info tmp;
 			ret = compat_table_info(private, &tmp);
 			xt_compat_flush_offsets(AF_INET);
 			private = &tmp;
diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c
index 480d7f8c9802..8a7e0f52e177 100644
--- a/net/ipv6/netfilter/ip6_tables.c
+++ b/net/ipv6/netfilter/ip6_tables.c
@@ -1164,10 +1164,10 @@ static int get_info(struct net *net, void __user *user, int *len, int compat)
 	if (t && !IS_ERR(t)) {
 		struct ip6t_getinfo info;
 		const struct xt_table_info *private = t->private;
-
 #ifdef CONFIG_COMPAT
+		struct xt_table_info tmp;
+
 		if (compat) {
-			struct xt_table_info tmp;
 			ret = compat_table_info(private, &tmp);
 			xt_compat_flush_offsets(AF_INET6);
 			private = &tmp;
-- 
cgit v1.2.2


From d696c7bdaa55e2208e56c6f98e6bc1599f34286d Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Mon, 8 Feb 2010 11:18:07 -0800
Subject: netfilter: nf_conntrack: fix hash resizing with namespaces

As noticed by Jon Masters <jonathan@jonmasters.org>, the conntrack hash
size is global and not per namespace, but modifiable at runtime through
/sys/module/nf_conntrack/hashsize. Changing the hash size will only
resize the hash in the current namespace however, so other namespaces
will use an invalid hash size. This can cause crashes when enlarging
the hashsize, or false negative lookups when shrinking it.

Move the hash size into the per-namespace data and only use the global
hash size to initialize the per-namespace value when instanciating a
new namespace. Additionally restrict hash resizing to init_net for
now as other namespaces are not handled currently.

Cc: stable@kernel.org
Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c     |  2 +-
 .../netfilter/nf_conntrack_l3proto_ipv4_compat.c   |  4 +-
 net/ipv4/netfilter/nf_nat_core.c                   | 22 ++++-----
 net/netfilter/nf_conntrack_core.c                  | 53 ++++++++++++----------
 net/netfilter/nf_conntrack_expect.c                |  2 +-
 net/netfilter/nf_conntrack_helper.c                |  2 +-
 net/netfilter/nf_conntrack_netlink.c               |  2 +-
 net/netfilter/nf_conntrack_standalone.c            |  7 +--
 8 files changed, 47 insertions(+), 47 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
index d171b123a656..d1ea38a7c490 100644
--- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
+++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
@@ -210,7 +210,7 @@ static ctl_table ip_ct_sysctl_table[] = {
 	},
 	{
 		.procname	= "ip_conntrack_buckets",
-		.data		= &nf_conntrack_htable_size,
+		.data		= &init_net.ct.htable_size,
 		.maxlen		= sizeof(unsigned int),
 		.mode		= 0444,
 		.proc_handler	= proc_dointvec,
diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c
index 8668a3defda6..2fb7b76da94f 100644
--- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c
+++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c
@@ -32,7 +32,7 @@ static struct hlist_nulls_node *ct_get_first(struct seq_file *seq)
 	struct hlist_nulls_node *n;
 
 	for (st->bucket = 0;
-	     st->bucket < nf_conntrack_htable_size;
+	     st->bucket < net->ct.htable_size;
 	     st->bucket++) {
 		n = rcu_dereference(net->ct.hash[st->bucket].first);
 		if (!is_a_nulls(n))
@@ -50,7 +50,7 @@ static struct hlist_nulls_node *ct_get_next(struct seq_file *seq,
 	head = rcu_dereference(head->next);
 	while (is_a_nulls(head)) {
 		if (likely(get_nulls_value(head) == st->bucket)) {
-			if (++st->bucket >= nf_conntrack_htable_size)
+			if (++st->bucket >= net->ct.htable_size)
 				return NULL;
 		}
 		head = rcu_dereference(net->ct.hash[st->bucket].first);
diff --git a/net/ipv4/netfilter/nf_nat_core.c b/net/ipv4/netfilter/nf_nat_core.c
index fe1a64479dd0..26066a2327ad 100644
--- a/net/ipv4/netfilter/nf_nat_core.c
+++ b/net/ipv4/netfilter/nf_nat_core.c
@@ -35,9 +35,6 @@ static DEFINE_SPINLOCK(nf_nat_lock);
 
 static struct nf_conntrack_l3proto *l3proto __read_mostly;
 
-/* Calculated at init based on memory size */
-static unsigned int nf_nat_htable_size __read_mostly;
-
 #define MAX_IP_NAT_PROTO 256
 static const struct nf_nat_protocol *nf_nat_protos[MAX_IP_NAT_PROTO]
 						__read_mostly;
@@ -72,7 +69,7 @@ EXPORT_SYMBOL_GPL(nf_nat_proto_put);
 
 /* We keep an extra hash for each conntrack, for fast searching. */
 static inline unsigned int
-hash_by_src(const struct nf_conntrack_tuple *tuple)
+hash_by_src(const struct net *net, const struct nf_conntrack_tuple *tuple)
 {
 	unsigned int hash;
 
@@ -80,7 +77,7 @@ hash_by_src(const struct nf_conntrack_tuple *tuple)
 	hash = jhash_3words((__force u32)tuple->src.u3.ip,
 			    (__force u32)tuple->src.u.all,
 			    tuple->dst.protonum, 0);
-	return ((u64)hash * nf_nat_htable_size) >> 32;
+	return ((u64)hash * net->ipv4.nat_htable_size) >> 32;
 }
 
 /* Is this tuple already taken? (not by us) */
@@ -147,7 +144,7 @@ find_appropriate_src(struct net *net,
 		     struct nf_conntrack_tuple *result,
 		     const struct nf_nat_range *range)
 {
-	unsigned int h = hash_by_src(tuple);
+	unsigned int h = hash_by_src(net, tuple);
 	const struct nf_conn_nat *nat;
 	const struct nf_conn *ct;
 	const struct hlist_node *n;
@@ -330,7 +327,7 @@ nf_nat_setup_info(struct nf_conn *ct,
 	if (have_to_hash) {
 		unsigned int srchash;
 
-		srchash = hash_by_src(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple);
+		srchash = hash_by_src(net, &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple);
 		spin_lock_bh(&nf_nat_lock);
 		/* nf_conntrack_alter_reply might re-allocate exntension aera */
 		nat = nfct_nat(ct);
@@ -679,8 +676,10 @@ nfnetlink_parse_nat_setup(struct nf_conn *ct,
 
 static int __net_init nf_nat_net_init(struct net *net)
 {
-	net->ipv4.nat_bysource = nf_ct_alloc_hashtable(&nf_nat_htable_size,
-						      &net->ipv4.nat_vmalloced, 0);
+	/* Leave them the same for the moment. */
+	net->ipv4.nat_htable_size = net->ct.htable_size;
+	net->ipv4.nat_bysource = nf_ct_alloc_hashtable(&net->ipv4.nat_htable_size,
+						       &net->ipv4.nat_vmalloced, 0);
 	if (!net->ipv4.nat_bysource)
 		return -ENOMEM;
 	return 0;
@@ -703,7 +702,7 @@ static void __net_exit nf_nat_net_exit(struct net *net)
 	nf_ct_iterate_cleanup(net, &clean_nat, NULL);
 	synchronize_rcu();
 	nf_ct_free_hashtable(net->ipv4.nat_bysource, net->ipv4.nat_vmalloced,
-			     nf_nat_htable_size);
+			     net->ipv4.nat_htable_size);
 }
 
 static struct pernet_operations nf_nat_net_ops = {
@@ -724,9 +723,6 @@ static int __init nf_nat_init(void)
 		return ret;
 	}
 
-	/* Leave them the same for the moment. */
-	nf_nat_htable_size = nf_conntrack_htable_size;
-
 	ret = register_pernet_subsys(&nf_nat_net_ops);
 	if (ret < 0)
 		goto cleanup_extend;
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index 9de4bd4c0dd7..4d79e3c1616c 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -30,6 +30,7 @@
 #include <linux/netdevice.h>
 #include <linux/socket.h>
 #include <linux/mm.h>
+#include <linux/nsproxy.h>
 #include <linux/rculist_nulls.h>
 
 #include <net/netfilter/nf_conntrack.h>
@@ -84,9 +85,10 @@ static u_int32_t __hash_conntrack(const struct nf_conntrack_tuple *tuple,
 	return ((u64)h * size) >> 32;
 }
 
-static inline u_int32_t hash_conntrack(const struct nf_conntrack_tuple *tuple)
+static inline u_int32_t hash_conntrack(const struct net *net,
+				       const struct nf_conntrack_tuple *tuple)
 {
-	return __hash_conntrack(tuple, nf_conntrack_htable_size,
+	return __hash_conntrack(tuple, net->ct.htable_size,
 				nf_conntrack_hash_rnd);
 }
 
@@ -294,7 +296,7 @@ __nf_conntrack_find(struct net *net, const struct nf_conntrack_tuple *tuple)
 {
 	struct nf_conntrack_tuple_hash *h;
 	struct hlist_nulls_node *n;
-	unsigned int hash = hash_conntrack(tuple);
+	unsigned int hash = hash_conntrack(net, tuple);
 
 	/* Disable BHs the entire time since we normally need to disable them
 	 * at least once for the stats anyway.
@@ -364,10 +366,11 @@ static void __nf_conntrack_hash_insert(struct nf_conn *ct,
 
 void nf_conntrack_hash_insert(struct nf_conn *ct)
 {
+	struct net *net = nf_ct_net(ct);
 	unsigned int hash, repl_hash;
 
-	hash = hash_conntrack(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple);
-	repl_hash = hash_conntrack(&ct->tuplehash[IP_CT_DIR_REPLY].tuple);
+	hash = hash_conntrack(net, &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple);
+	repl_hash = hash_conntrack(net, &ct->tuplehash[IP_CT_DIR_REPLY].tuple);
 
 	__nf_conntrack_hash_insert(ct, hash, repl_hash);
 }
@@ -395,8 +398,8 @@ __nf_conntrack_confirm(struct sk_buff *skb)
 	if (CTINFO2DIR(ctinfo) != IP_CT_DIR_ORIGINAL)
 		return NF_ACCEPT;
 
-	hash = hash_conntrack(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple);
-	repl_hash = hash_conntrack(&ct->tuplehash[IP_CT_DIR_REPLY].tuple);
+	hash = hash_conntrack(net, &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple);
+	repl_hash = hash_conntrack(net, &ct->tuplehash[IP_CT_DIR_REPLY].tuple);
 
 	/* We're not in hash table, and we refuse to set up related
 	   connections for unconfirmed conns.  But packet copies and
@@ -466,7 +469,7 @@ nf_conntrack_tuple_taken(const struct nf_conntrack_tuple *tuple,
 	struct net *net = nf_ct_net(ignored_conntrack);
 	struct nf_conntrack_tuple_hash *h;
 	struct hlist_nulls_node *n;
-	unsigned int hash = hash_conntrack(tuple);
+	unsigned int hash = hash_conntrack(net, tuple);
 
 	/* Disable BHs the entire time since we need to disable them at
 	 * least once for the stats anyway.
@@ -501,7 +504,7 @@ static noinline int early_drop(struct net *net, unsigned int hash)
 	int dropped = 0;
 
 	rcu_read_lock();
-	for (i = 0; i < nf_conntrack_htable_size; i++) {
+	for (i = 0; i < net->ct.htable_size; i++) {
 		hlist_nulls_for_each_entry_rcu(h, n, &net->ct.hash[hash],
 					 hnnode) {
 			tmp = nf_ct_tuplehash_to_ctrack(h);
@@ -521,7 +524,7 @@ static noinline int early_drop(struct net *net, unsigned int hash)
 		if (cnt >= NF_CT_EVICTION_RANGE)
 			break;
 
-		hash = (hash + 1) % nf_conntrack_htable_size;
+		hash = (hash + 1) % net->ct.htable_size;
 	}
 	rcu_read_unlock();
 
@@ -555,7 +558,7 @@ struct nf_conn *nf_conntrack_alloc(struct net *net,
 
 	if (nf_conntrack_max &&
 	    unlikely(atomic_read(&net->ct.count) > nf_conntrack_max)) {
-		unsigned int hash = hash_conntrack(orig);
+		unsigned int hash = hash_conntrack(net, orig);
 		if (!early_drop(net, hash)) {
 			atomic_dec(&net->ct.count);
 			if (net_ratelimit())
@@ -1012,7 +1015,7 @@ get_next_corpse(struct net *net, int (*iter)(struct nf_conn *i, void *data),
 	struct hlist_nulls_node *n;
 
 	spin_lock_bh(&nf_conntrack_lock);
-	for (; *bucket < nf_conntrack_htable_size; (*bucket)++) {
+	for (; *bucket < net->ct.htable_size; (*bucket)++) {
 		hlist_nulls_for_each_entry(h, n, &net->ct.hash[*bucket], hnnode) {
 			ct = nf_ct_tuplehash_to_ctrack(h);
 			if (iter(ct, data))
@@ -1130,7 +1133,7 @@ static void nf_conntrack_cleanup_net(struct net *net)
 	}
 
 	nf_ct_free_hashtable(net->ct.hash, net->ct.hash_vmalloc,
-			     nf_conntrack_htable_size);
+			     net->ct.htable_size);
 	nf_conntrack_ecache_fini(net);
 	nf_conntrack_acct_fini(net);
 	nf_conntrack_expect_fini(net);
@@ -1190,10 +1193,12 @@ int nf_conntrack_set_hashsize(const char *val, struct kernel_param *kp)
 {
 	int i, bucket, vmalloced, old_vmalloced;
 	unsigned int hashsize, old_size;
-	int rnd;
 	struct hlist_nulls_head *hash, *old_hash;
 	struct nf_conntrack_tuple_hash *h;
 
+	if (current->nsproxy->net_ns != &init_net)
+		return -EOPNOTSUPP;
+
 	/* On boot, we can set this without any fancy locking. */
 	if (!nf_conntrack_htable_size)
 		return param_set_uint(val, kp);
@@ -1206,33 +1211,29 @@ int nf_conntrack_set_hashsize(const char *val, struct kernel_param *kp)
 	if (!hash)
 		return -ENOMEM;
 
-	/* We have to rehahs for the new table anyway, so we also can
-	 * use a newrandom seed */
-	get_random_bytes(&rnd, sizeof(rnd));
-
 	/* Lookups in the old hash might happen in parallel, which means we
 	 * might get false negatives during connection lookup. New connections
 	 * created because of a false negative won't make it into the hash
 	 * though since that required taking the lock.
 	 */
 	spin_lock_bh(&nf_conntrack_lock);
-	for (i = 0; i < nf_conntrack_htable_size; i++) {
+	for (i = 0; i < init_net.ct.htable_size; i++) {
 		while (!hlist_nulls_empty(&init_net.ct.hash[i])) {
 			h = hlist_nulls_entry(init_net.ct.hash[i].first,
 					struct nf_conntrack_tuple_hash, hnnode);
 			hlist_nulls_del_rcu(&h->hnnode);
-			bucket = __hash_conntrack(&h->tuple, hashsize, rnd);
+			bucket = __hash_conntrack(&h->tuple, hashsize,
+						  nf_conntrack_hash_rnd);
 			hlist_nulls_add_head_rcu(&h->hnnode, &hash[bucket]);
 		}
 	}
-	old_size = nf_conntrack_htable_size;
+	old_size = init_net.ct.htable_size;
 	old_vmalloced = init_net.ct.hash_vmalloc;
 	old_hash = init_net.ct.hash;
 
-	nf_conntrack_htable_size = hashsize;
+	init_net.ct.htable_size = nf_conntrack_htable_size = hashsize;
 	init_net.ct.hash_vmalloc = vmalloced;
 	init_net.ct.hash = hash;
-	nf_conntrack_hash_rnd = rnd;
 	spin_unlock_bh(&nf_conntrack_lock);
 
 	nf_ct_free_hashtable(old_hash, old_vmalloced, old_size);
@@ -1328,7 +1329,9 @@ static int nf_conntrack_init_net(struct net *net)
 		ret = -ENOMEM;
 		goto err_cache;
 	}
-	net->ct.hash = nf_ct_alloc_hashtable(&nf_conntrack_htable_size,
+
+	net->ct.htable_size = nf_conntrack_htable_size;
+	net->ct.hash = nf_ct_alloc_hashtable(&net->ct.htable_size,
 					     &net->ct.hash_vmalloc, 1);
 	if (!net->ct.hash) {
 		ret = -ENOMEM;
@@ -1353,7 +1356,7 @@ err_acct:
 	nf_conntrack_expect_fini(net);
 err_expect:
 	nf_ct_free_hashtable(net->ct.hash, net->ct.hash_vmalloc,
-			     nf_conntrack_htable_size);
+			     net->ct.htable_size);
 err_hash:
 	kmem_cache_destroy(net->ct.nf_conntrack_cachep);
 err_cache:
diff --git a/net/netfilter/nf_conntrack_expect.c b/net/netfilter/nf_conntrack_expect.c
index 4ad7d1d809af..2f25ff610982 100644
--- a/net/netfilter/nf_conntrack_expect.c
+++ b/net/netfilter/nf_conntrack_expect.c
@@ -577,7 +577,7 @@ int nf_conntrack_expect_init(struct net *net)
 
 	if (net_eq(net, &init_net)) {
 		if (!nf_ct_expect_hsize) {
-			nf_ct_expect_hsize = nf_conntrack_htable_size / 256;
+			nf_ct_expect_hsize = net->ct.htable_size / 256;
 			if (!nf_ct_expect_hsize)
 				nf_ct_expect_hsize = 1;
 		}
diff --git a/net/netfilter/nf_conntrack_helper.c b/net/netfilter/nf_conntrack_helper.c
index 65c2a7bc3afc..4b1a56bd074c 100644
--- a/net/netfilter/nf_conntrack_helper.c
+++ b/net/netfilter/nf_conntrack_helper.c
@@ -192,7 +192,7 @@ static void __nf_conntrack_helper_unregister(struct nf_conntrack_helper *me,
 	/* Get rid of expecteds, set helpers to NULL. */
 	hlist_nulls_for_each_entry(h, nn, &net->ct.unconfirmed, hnnode)
 		unhelp(h, me);
-	for (i = 0; i < nf_conntrack_htable_size; i++) {
+	for (i = 0; i < net->ct.htable_size; i++) {
 		hlist_nulls_for_each_entry(h, nn, &net->ct.hash[i], hnnode)
 			unhelp(h, me);
 	}
diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index 42f21c01a93e..0ffe689dfe97 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -594,7 +594,7 @@ ctnetlink_dump_table(struct sk_buff *skb, struct netlink_callback *cb)
 
 	rcu_read_lock();
 	last = (struct nf_conn *)cb->args[1];
-	for (; cb->args[0] < nf_conntrack_htable_size; cb->args[0]++) {
+	for (; cb->args[0] < init_net.ct.htable_size; cb->args[0]++) {
 restart:
 		hlist_nulls_for_each_entry_rcu(h, n, &init_net.ct.hash[cb->args[0]],
 					 hnnode) {
diff --git a/net/netfilter/nf_conntrack_standalone.c b/net/netfilter/nf_conntrack_standalone.c
index 028aba667ef7..e310f1561bb2 100644
--- a/net/netfilter/nf_conntrack_standalone.c
+++ b/net/netfilter/nf_conntrack_standalone.c
@@ -51,7 +51,7 @@ static struct hlist_nulls_node *ct_get_first(struct seq_file *seq)
 	struct hlist_nulls_node *n;
 
 	for (st->bucket = 0;
-	     st->bucket < nf_conntrack_htable_size;
+	     st->bucket < net->ct.htable_size;
 	     st->bucket++) {
 		n = rcu_dereference(net->ct.hash[st->bucket].first);
 		if (!is_a_nulls(n))
@@ -69,7 +69,7 @@ static struct hlist_nulls_node *ct_get_next(struct seq_file *seq,
 	head = rcu_dereference(head->next);
 	while (is_a_nulls(head)) {
 		if (likely(get_nulls_value(head) == st->bucket)) {
-			if (++st->bucket >= nf_conntrack_htable_size)
+			if (++st->bucket >= net->ct.htable_size)
 				return NULL;
 		}
 		head = rcu_dereference(net->ct.hash[st->bucket].first);
@@ -355,7 +355,7 @@ static ctl_table nf_ct_sysctl_table[] = {
 	},
 	{
 		.procname       = "nf_conntrack_buckets",
-		.data           = &nf_conntrack_htable_size,
+		.data           = &init_net.ct.htable_size,
 		.maxlen         = sizeof(unsigned int),
 		.mode           = 0444,
 		.proc_handler   = proc_dointvec,
@@ -421,6 +421,7 @@ static int nf_conntrack_standalone_init_sysctl(struct net *net)
 		goto out_kmemdup;
 
 	table[1].data = &net->ct.count;
+	table[2].data = &net->ct.htable_size;
 	table[3].data = &net->ct.sysctl_checksum;
 	table[4].data = &net->ct.sysctl_log_invalid;
 
-- 
cgit v1.2.2


From 562ada612058133a5483c68a73605f3c5f42fffe Mon Sep 17 00:00:00 2001
From: Eric Van Hensbergen <ericvh@gmail.com>
Date: Fri, 15 Jan 2010 18:54:03 -0600
Subject: net/9p: fix virtio transport to correctly update status on connect

The 9p virtio transport was not updating its connection status correctly
preventing it from being able to mount the server.

Signed-off-by: Eric Van Hensbergen <ericvh@gmail.com>
---
 net/9p/trans_virtio.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'net')

diff --git a/net/9p/trans_virtio.c b/net/9p/trans_virtio.c
index ea1e3daabefe..67c4bc704c5a 100644
--- a/net/9p/trans_virtio.c
+++ b/net/9p/trans_virtio.c
@@ -311,6 +311,7 @@ p9_virtio_create(struct p9_client *client, const char *devname, char *args)
 	}
 
 	client->trans = (void *)chan;
+	client->status = Connected;
 	chan->client = client;
 
 	return 0;
-- 
cgit v1.2.2


From 349d3bb878d71978650a0634b5445af3c1cc1cd8 Mon Sep 17 00:00:00 2001
From: Eric Van Hensbergen <ericvh@gmail.com>
Date: Fri, 15 Jan 2010 19:01:10 -0600
Subject: net/9p: fail when user specifies a transport which we can't find

If the user specifies a transport and we can't find it, we failed back
to the default trainsport silently.  This patch will make the code
complain more loudly and return an error code.

Signed-off-by: Eric Van Hensbergen <ericvh@gmail.com>
---
 net/9p/client.c | 8 ++++++++
 1 file changed, 8 insertions(+)

(limited to 'net')

diff --git a/net/9p/client.c b/net/9p/client.c
index 8af95b2dddd6..90a2eb926d19 100644
--- a/net/9p/client.c
+++ b/net/9p/client.c
@@ -108,6 +108,13 @@ static int parse_opts(char *opts, struct p9_client *clnt)
 			break;
 		case Opt_trans:
 			clnt->trans_mod = v9fs_get_trans_by_name(&args[0]);
+			if(clnt->trans_mod == NULL) {
+				P9_DPRINTK(P9_DEBUG_ERROR,
+				   "Could not find request transport: %s\n",
+				   (char *) &args[0]);
+				ret = -EINVAL;
+				goto free_and_return;
+			}
 			break;
 		case Opt_legacy:
 			clnt->dotu = 0;
@@ -117,6 +124,7 @@ static int parse_opts(char *opts, struct p9_client *clnt)
 		}
 	}
 
+free_and_return:
 	kfree(options);
 	return ret;
 }
-- 
cgit v1.2.2


From 9d6939dac77102b09396ee0b89392ec7639612a7 Mon Sep 17 00:00:00 2001
From: Eric Van Hensbergen <ericvh@gmail.com>
Date: Fri, 15 Jan 2010 19:01:56 -0600
Subject: net/9p: fix statsize inside twstat

stat structures contain a size prefix.  In our twstat messages
we were including the size of the size prefix in the prefix, which is not
what the protocol wants, and Inferno servers would complain.

Signed-off-by: Eric Van Hensbergen <ericvh@gmail.com>
---
 net/9p/client.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/9p/client.c b/net/9p/client.c
index 90a2eb926d19..a2e2d61b903b 100644
--- a/net/9p/client.c
+++ b/net/9p/client.c
@@ -1222,10 +1222,11 @@ static int p9_client_statsize(struct p9_wstat *wst, int optional)
 {
 	int ret;
 
+	/* NOTE: size shouldn't include its own length */
 	/* size[2] type[2] dev[4] qid[13] */
 	/* mode[4] atime[4] mtime[4] length[8]*/
 	/* name[s] uid[s] gid[s] muid[s] */
-	ret = 2+2+4+13+4+4+4+8+2+2+2+2;
+	ret = 2+4+13+4+4+4+8+2+2+2+2;
 
 	if (wst->name)
 		ret += strlen(wst->name);
@@ -1266,7 +1267,7 @@ int p9_client_wstat(struct p9_fid *fid, struct p9_wstat *wst)
 		wst->name, wst->uid, wst->gid, wst->muid, wst->extension,
 		wst->n_uid, wst->n_gid, wst->n_muid);
 
-	req = p9_client_rpc(clnt, P9_TWSTAT, "dwS", fid->fid, wst->size, wst);
+	req = p9_client_rpc(clnt, P9_TWSTAT, "dwS", fid->fid, wst->size+2, wst);
 	if (IS_ERR(req)) {
 		err = PTR_ERR(req);
 		goto error;
-- 
cgit v1.2.2


From 0da780c269957783d341fc3559e6b4c9912af7b4 Mon Sep 17 00:00:00 2001
From: Benoit Papillault <benoit.papillault@free.fr>
Date: Fri, 5 Feb 2010 01:21:03 +0100
Subject: mac80211: Fix probe request filtering in IBSS mode

We only reply to probe request if either the requested SSID is the
broadcast SSID or if the requested SSID matches our own SSID. This
latter case was not properly handled since we were replying to different
SSID with the same length as our own SSID.

Signed-off-by: Benoit Papillault <benoit.papillault@free.fr>
Cc: stable@kernel.org
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/ibss.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/mac80211/ibss.c b/net/mac80211/ibss.c
index 1f2db647bb5c..22f0c2aa7a89 100644
--- a/net/mac80211/ibss.c
+++ b/net/mac80211/ibss.c
@@ -647,7 +647,7 @@ static void ieee80211_rx_mgmt_probe_req(struct ieee80211_sub_if_data *sdata,
 	}
 	if (pos[1] != 0 &&
 	    (pos[1] != ifibss->ssid_len ||
-	     !memcmp(pos + 2, ifibss->ssid, ifibss->ssid_len))) {
+	     memcmp(pos + 2, ifibss->ssid, ifibss->ssid_len))) {
 		/* Ignore ProbeReq for foreign SSID */
 		return;
 	}
-- 
cgit v1.2.2


From c0ce77b8323c1a0d4eeef97caf16c0ea971222a9 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes@sipsolutions.net>
Date: Wed, 3 Feb 2010 10:22:31 +0100
Subject: mac80211: fix deferred hardware scan requests

Reinette found the reason for the warnings that
happened occasionally when a hw-offloaded scan
finished; her description of the problem:

  mac80211 will defer the handling of scan requests if it is
  busy with management work at the time. The scan requests
  are deferred and run after the work has completed. When
  this occurs there are currently two problems.

  * The scan request for hardware scan is not fully populated
    with the band and channels to scan not initialized.

  * When the scan is queued the state is not correctly updated
    to reflect that a scan is in progress. The problem here is
    that when the driver completes the scan and calls
    ieee80211_scan_completed() a warning will be triggered
    since mac80211 was not aware that a scan was in progress.

The reason is that the queued scan work will start
the hw scan right away when the hw_scan_req struct
has already been allocated. However, in the first
pass it will not have been filled, which happens
at the same time as setting the bits. To fix this,
simply move the allocation after the pending work
test as well, so that the first iteration of the
scan work will call __ieee80211_start_scan() even
in the hardware scan case.

Bug-identified-by: Reinette Chatre <reinette.chatre@intel.com>
Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/scan.c | 18 ++++++++++--------
 1 file changed, 10 insertions(+), 8 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/scan.c b/net/mac80211/scan.c
index f934c9620b73..bc17cf7d68db 100644
--- a/net/mac80211/scan.c
+++ b/net/mac80211/scan.c
@@ -439,6 +439,16 @@ static int __ieee80211_start_scan(struct ieee80211_sub_if_data *sdata,
 	if (local->scan_req)
 		return -EBUSY;
 
+	if (req != local->int_scan_req &&
+	    sdata->vif.type == NL80211_IFTYPE_STATION &&
+	    !list_empty(&ifmgd->work_list)) {
+		/* actually wait for the work it's doing to finish/time out */
+		set_bit(IEEE80211_STA_REQ_SCAN, &ifmgd->request);
+		local->scan_req = req;
+		local->scan_sdata = sdata;
+		return 0;
+	}
+
 	if (local->ops->hw_scan) {
 		u8 *ies;
 
@@ -463,14 +473,6 @@ static int __ieee80211_start_scan(struct ieee80211_sub_if_data *sdata,
 	local->scan_req = req;
 	local->scan_sdata = sdata;
 
-	if (req != local->int_scan_req &&
-	    sdata->vif.type == NL80211_IFTYPE_STATION &&
-	    !list_empty(&ifmgd->work_list)) {
-		/* actually wait for the work it's doing to finish/time out */
-		set_bit(IEEE80211_STA_REQ_SCAN, &ifmgd->request);
-		return 0;
-	}
-
 	if (local->ops->hw_scan)
 		__set_bit(SCAN_HW_SCANNING, &local->scanning);
 	else
-- 
cgit v1.2.2


From 33e5a2f776e331dc8a4379b6efb660d38f182d96 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes@sipsolutions.net>
Date: Wed, 3 Feb 2010 10:24:30 +0100
Subject: wireless: update radiotap parser

Upstream radiotap has adopted the namespace
proposal David Young made and I then took care
of, for which I had adapted the radiotap parser
as a library outside the kernel. This brings
the in-kernel parser up to speed.

Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/tx.c       |   9 +-
 net/wireless/radiotap.c | 305 +++++++++++++++++++++++++++++++-----------------
 2 files changed, 203 insertions(+), 111 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
index 85e382aa894e..e392820a4c33 100644
--- a/net/mac80211/tx.c
+++ b/net/mac80211/tx.c
@@ -1010,7 +1010,8 @@ static bool __ieee80211_parse_tx_radiotap(struct ieee80211_tx_data *tx,
 		(struct ieee80211_radiotap_header *) skb->data;
 	struct ieee80211_supported_band *sband;
 	struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb);
-	int ret = ieee80211_radiotap_iterator_init(&iterator, rthdr, skb->len);
+	int ret = ieee80211_radiotap_iterator_init(&iterator, rthdr, skb->len,
+						   NULL);
 
 	sband = tx->local->hw.wiphy->bands[tx->channel->band];
 
@@ -1046,7 +1047,7 @@ static bool __ieee80211_parse_tx_radiotap(struct ieee80211_tx_data *tx,
 				 * because it will be recomputed and added
 				 * on transmission
 				 */
-				if (skb->len < (iterator.max_length + FCS_LEN))
+				if (skb->len < (iterator._max_length + FCS_LEN))
 					return false;
 
 				skb_trim(skb, skb->len - FCS_LEN);
@@ -1073,10 +1074,10 @@ static bool __ieee80211_parse_tx_radiotap(struct ieee80211_tx_data *tx,
 
 	/*
 	 * remove the radiotap header
-	 * iterator->max_length was sanity-checked against
+	 * iterator->_max_length was sanity-checked against
 	 * skb->len by iterator init
 	 */
-	skb_pull(skb, iterator.max_length);
+	skb_pull(skb, iterator._max_length);
 
 	return true;
 }
diff --git a/net/wireless/radiotap.c b/net/wireless/radiotap.c
index f591871a7b4f..1332c445d1c7 100644
--- a/net/wireless/radiotap.c
+++ b/net/wireless/radiotap.c
@@ -2,6 +2,16 @@
  * Radiotap parser
  *
  * Copyright 2007		Andy Green <andy@warmcat.com>
+ * Copyright 2009		Johannes Berg <johannes@sipsolutions.net>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * Alternatively, this software may be distributed under the terms of BSD
+ * license.
+ *
+ * See COPYING for more details.
  */
 
 #include <net/cfg80211.h>
@@ -10,6 +20,35 @@
 
 /* function prototypes and related defs are in include/net/cfg80211.h */
 
+static const struct radiotap_align_size rtap_namespace_sizes[] = {
+	[IEEE80211_RADIOTAP_TSFT] = { .align = 8, .size = 8, },
+	[IEEE80211_RADIOTAP_FLAGS] = { .align = 1, .size = 1, },
+	[IEEE80211_RADIOTAP_RATE] = { .align = 1, .size = 1, },
+	[IEEE80211_RADIOTAP_CHANNEL] = { .align = 2, .size = 4, },
+	[IEEE80211_RADIOTAP_FHSS] = { .align = 2, .size = 2, },
+	[IEEE80211_RADIOTAP_DBM_ANTSIGNAL] = { .align = 1, .size = 1, },
+	[IEEE80211_RADIOTAP_DBM_ANTNOISE] = { .align = 1, .size = 1, },
+	[IEEE80211_RADIOTAP_LOCK_QUALITY] = { .align = 2, .size = 2, },
+	[IEEE80211_RADIOTAP_TX_ATTENUATION] = { .align = 2, .size = 2, },
+	[IEEE80211_RADIOTAP_DB_TX_ATTENUATION] = { .align = 2, .size = 2, },
+	[IEEE80211_RADIOTAP_DBM_TX_POWER] = { .align = 1, .size = 1, },
+	[IEEE80211_RADIOTAP_ANTENNA] = { .align = 1, .size = 1, },
+	[IEEE80211_RADIOTAP_DB_ANTSIGNAL] = { .align = 1, .size = 1, },
+	[IEEE80211_RADIOTAP_DB_ANTNOISE] = { .align = 1, .size = 1, },
+	[IEEE80211_RADIOTAP_RX_FLAGS] = { .align = 2, .size = 2, },
+	[IEEE80211_RADIOTAP_TX_FLAGS] = { .align = 2, .size = 2, },
+	[IEEE80211_RADIOTAP_RTS_RETRIES] = { .align = 1, .size = 1, },
+	[IEEE80211_RADIOTAP_DATA_RETRIES] = { .align = 1, .size = 1, },
+	/*
+	 * add more here as they are defined in radiotap.h
+	 */
+};
+
+static const struct ieee80211_radiotap_namespace radiotap_ns = {
+	.n_bits = sizeof(rtap_namespace_sizes) / sizeof(rtap_namespace_sizes[0]),
+	.align_size = rtap_namespace_sizes,
+};
+
 /**
  * ieee80211_radiotap_iterator_init - radiotap parser iterator initialization
  * @iterator: radiotap_iterator to initialize
@@ -50,9 +89,9 @@
  */
 
 int ieee80211_radiotap_iterator_init(
-    struct ieee80211_radiotap_iterator *iterator,
-    struct ieee80211_radiotap_header *radiotap_header,
-    int max_length)
+	struct ieee80211_radiotap_iterator *iterator,
+	struct ieee80211_radiotap_header *radiotap_header,
+	int max_length, const struct ieee80211_radiotap_vendor_namespaces *vns)
 {
 	/* Linux only supports version 0 radiotap format */
 	if (radiotap_header->it_version)
@@ -62,19 +101,24 @@ int ieee80211_radiotap_iterator_init(
 	if (max_length < get_unaligned_le16(&radiotap_header->it_len))
 		return -EINVAL;
 
-	iterator->rtheader = radiotap_header;
-	iterator->max_length = get_unaligned_le16(&radiotap_header->it_len);
-	iterator->arg_index = 0;
-	iterator->bitmap_shifter = get_unaligned_le32(&radiotap_header->it_present);
-	iterator->arg = (u8 *)radiotap_header + sizeof(*radiotap_header);
-	iterator->this_arg = NULL;
+	iterator->_rtheader = radiotap_header;
+	iterator->_max_length = get_unaligned_le16(&radiotap_header->it_len);
+	iterator->_arg_index = 0;
+	iterator->_bitmap_shifter = get_unaligned_le32(&radiotap_header->it_present);
+	iterator->_arg = (uint8_t *)radiotap_header + sizeof(*radiotap_header);
+	iterator->_reset_on_ext = 0;
+	iterator->_next_bitmap = &radiotap_header->it_present;
+	iterator->_next_bitmap++;
+	iterator->_vns = vns;
+	iterator->current_namespace = &radiotap_ns;
+	iterator->is_radiotap_ns = 1;
 
 	/* find payload start allowing for extended bitmap(s) */
 
-	if (unlikely(iterator->bitmap_shifter & (1<<IEEE80211_RADIOTAP_EXT))) {
-		while (get_unaligned_le32(iterator->arg) &
-		       (1 << IEEE80211_RADIOTAP_EXT)) {
-			iterator->arg += sizeof(u32);
+	if (iterator->_bitmap_shifter & (1<<IEEE80211_RADIOTAP_EXT)) {
+		while (get_unaligned_le32(iterator->_arg) &
+					(1 << IEEE80211_RADIOTAP_EXT)) {
+			iterator->_arg += sizeof(uint32_t);
 
 			/*
 			 * check for insanity where the present bitmaps
@@ -82,12 +126,13 @@ int ieee80211_radiotap_iterator_init(
 			 * stated radiotap header length
 			 */
 
-			if (((ulong)iterator->arg -
-			     (ulong)iterator->rtheader) > iterator->max_length)
+			if ((unsigned long)iterator->_arg -
+			    (unsigned long)iterator->_rtheader >
+			    (unsigned long)iterator->_max_length)
 				return -EINVAL;
 		}
 
-		iterator->arg += sizeof(u32);
+		iterator->_arg += sizeof(uint32_t);
 
 		/*
 		 * no need to check again for blowing past stated radiotap
@@ -96,12 +141,36 @@ int ieee80211_radiotap_iterator_init(
 		 */
 	}
 
+	iterator->this_arg = iterator->_arg;
+
 	/* we are all initialized happily */
 
 	return 0;
 }
 EXPORT_SYMBOL(ieee80211_radiotap_iterator_init);
 
+static void find_ns(struct ieee80211_radiotap_iterator *iterator,
+		    uint32_t oui, uint8_t subns)
+{
+	int i;
+
+	iterator->current_namespace = NULL;
+
+	if (!iterator->_vns)
+		return;
+
+	for (i = 0; i < iterator->_vns->n_ns; i++) {
+		if (iterator->_vns->ns[i].oui != oui)
+			continue;
+		if (iterator->_vns->ns[i].subns != subns)
+			continue;
+
+		iterator->current_namespace = &iterator->_vns->ns[i];
+		break;
+	}
+}
+
+
 
 /**
  * ieee80211_radiotap_iterator_next - return next radiotap parser iterator arg
@@ -127,99 +196,80 @@ EXPORT_SYMBOL(ieee80211_radiotap_iterator_init);
  */
 
 int ieee80211_radiotap_iterator_next(
-    struct ieee80211_radiotap_iterator *iterator)
+	struct ieee80211_radiotap_iterator *iterator)
 {
-
-	/*
-	 * small length lookup table for all radiotap types we heard of
-	 * starting from b0 in the bitmap, so we can walk the payload
-	 * area of the radiotap header
-	 *
-	 * There is a requirement to pad args, so that args
-	 * of a given length must begin at a boundary of that length
-	 * -- but note that compound args are allowed (eg, 2 x u16
-	 * for IEEE80211_RADIOTAP_CHANNEL) so total arg length is not
-	 * a reliable indicator of alignment requirement.
-	 *
-	 * upper nybble: content alignment for arg
-	 * lower nybble: content length for arg
-	 */
-
-	static const u8 rt_sizes[] = {
-		[IEEE80211_RADIOTAP_TSFT] = 0x88,
-		[IEEE80211_RADIOTAP_FLAGS] = 0x11,
-		[IEEE80211_RADIOTAP_RATE] = 0x11,
-		[IEEE80211_RADIOTAP_CHANNEL] = 0x24,
-		[IEEE80211_RADIOTAP_FHSS] = 0x22,
-		[IEEE80211_RADIOTAP_DBM_ANTSIGNAL] = 0x11,
-		[IEEE80211_RADIOTAP_DBM_ANTNOISE] = 0x11,
-		[IEEE80211_RADIOTAP_LOCK_QUALITY] = 0x22,
-		[IEEE80211_RADIOTAP_TX_ATTENUATION] = 0x22,
-		[IEEE80211_RADIOTAP_DB_TX_ATTENUATION] = 0x22,
-		[IEEE80211_RADIOTAP_DBM_TX_POWER] = 0x11,
-		[IEEE80211_RADIOTAP_ANTENNA] = 0x11,
-		[IEEE80211_RADIOTAP_DB_ANTSIGNAL] = 0x11,
-		[IEEE80211_RADIOTAP_DB_ANTNOISE] = 0x11,
-		[IEEE80211_RADIOTAP_RX_FLAGS] = 0x22,
-		[IEEE80211_RADIOTAP_TX_FLAGS] = 0x22,
-		[IEEE80211_RADIOTAP_RTS_RETRIES] = 0x11,
-		[IEEE80211_RADIOTAP_DATA_RETRIES] = 0x11,
-		/*
-		 * add more here as they are defined in
-		 * include/net/ieee80211_radiotap.h
-		 */
-	};
-
-	/*
-	 * for every radiotap entry we can at
-	 * least skip (by knowing the length)...
-	 */
-
-	while (iterator->arg_index < sizeof(rt_sizes)) {
+	while (1) {
 		int hit = 0;
-		int pad;
+		int pad, align, size, subns, vnslen;
+		uint32_t oui;
 
-		if (!(iterator->bitmap_shifter & 1))
+		/* if no more EXT bits, that's it */
+		if ((iterator->_arg_index % 32) == IEEE80211_RADIOTAP_EXT &&
+		    !(iterator->_bitmap_shifter & 1))
+			return -ENOENT;
+
+		if (!(iterator->_bitmap_shifter & 1))
 			goto next_entry; /* arg not present */
 
+		/* get alignment/size of data */
+		switch (iterator->_arg_index % 32) {
+		case IEEE80211_RADIOTAP_RADIOTAP_NAMESPACE:
+		case IEEE80211_RADIOTAP_EXT:
+			align = 1;
+			size = 0;
+			break;
+		case IEEE80211_RADIOTAP_VENDOR_NAMESPACE:
+			align = 2;
+			size = 6;
+			break;
+		default:
+			if (!iterator->current_namespace ||
+			    iterator->_arg_index >= iterator->current_namespace->n_bits) {
+				if (iterator->current_namespace == &radiotap_ns)
+					return -ENOENT;
+				align = 0;
+			} else {
+				align = iterator->current_namespace->align_size[iterator->_arg_index].align;
+				size = iterator->current_namespace->align_size[iterator->_arg_index].size;
+			}
+			if (!align) {
+				/* skip all subsequent data */
+				iterator->_arg = iterator->_next_ns_data;
+				/* give up on this namespace */
+				iterator->current_namespace = NULL;
+				goto next_entry;
+			}
+			break;
+		}
+
 		/*
 		 * arg is present, account for alignment padding
-		 *  8-bit args can be at any alignment
-		 * 16-bit args must start on 16-bit boundary
-		 * 32-bit args must start on 32-bit boundary
-		 * 64-bit args must start on 64-bit boundary
 		 *
-		 * note that total arg size can differ from alignment of
-		 * elements inside arg, so we use upper nybble of length
-		 * table to base alignment on
-		 *
-		 * also note: these alignments are ** relative to the
-		 * start of the radiotap header **.  There is no guarantee
+		 * Note that these alignments are relative to the start
+		 * of the radiotap header.  There is no guarantee
 		 * that the radiotap header itself is aligned on any
 		 * kind of boundary.
 		 *
-		 * the above is why get_unaligned() is used to dereference
-		 * multibyte elements from the radiotap area
+		 * The above is why get_unaligned() is used to dereference
+		 * multibyte elements from the radiotap area.
 		 */
 
-		pad = (((ulong)iterator->arg) -
-			((ulong)iterator->rtheader)) &
-			((rt_sizes[iterator->arg_index] >> 4) - 1);
+		pad = ((unsigned long)iterator->_arg -
+		       (unsigned long)iterator->_rtheader) & (align - 1);
 
 		if (pad)
-			iterator->arg +=
-				(rt_sizes[iterator->arg_index] >> 4) - pad;
+			iterator->_arg += align - pad;
 
 		/*
 		 * this is what we will return to user, but we need to
 		 * move on first so next call has something fresh to test
 		 */
-		iterator->this_arg_index = iterator->arg_index;
-		iterator->this_arg = iterator->arg;
-		hit = 1;
+		iterator->this_arg_index = iterator->_arg_index;
+		iterator->this_arg = iterator->_arg;
+		iterator->this_arg_size = size;
 
 		/* internally move on the size of this arg */
-		iterator->arg += rt_sizes[iterator->arg_index] & 0x0f;
+		iterator->_arg += size;
 
 		/*
 		 * check for insanity where we are given a bitmap that
@@ -228,32 +278,73 @@ int ieee80211_radiotap_iterator_next(
 		 * max_length on the last arg, never exceeding it.
 		 */
 
-		if (((ulong)iterator->arg - (ulong)iterator->rtheader) >
-		    iterator->max_length)
+		if ((unsigned long)iterator->_arg -
+		    (unsigned long)iterator->_rtheader >
+		    (unsigned long)iterator->_max_length)
 			return -EINVAL;
 
-	next_entry:
-		iterator->arg_index++;
-		if (unlikely((iterator->arg_index & 31) == 0)) {
-			/* completed current u32 bitmap */
-			if (iterator->bitmap_shifter & 1) {
-				/* b31 was set, there is more */
-				/* move to next u32 bitmap */
-				iterator->bitmap_shifter =
-				    get_unaligned_le32(iterator->next_bitmap);
-				iterator->next_bitmap++;
-			} else
-				/* no more bitmaps: end */
-				iterator->arg_index = sizeof(rt_sizes);
-		} else /* just try the next bit */
-			iterator->bitmap_shifter >>= 1;
+		/* these special ones are valid in each bitmap word */
+		switch (iterator->_arg_index % 32) {
+		case IEEE80211_RADIOTAP_VENDOR_NAMESPACE:
+			iterator->_bitmap_shifter >>= 1;
+			iterator->_arg_index++;
+
+			iterator->_reset_on_ext = 1;
+
+			vnslen = get_unaligned_le16(iterator->this_arg + 4);
+			iterator->_next_ns_data = iterator->_arg + vnslen;
+			oui = (*iterator->this_arg << 16) |
+				(*(iterator->this_arg + 1) << 8) |
+				*(iterator->this_arg + 2);
+			subns = *(iterator->this_arg + 3);
+
+			find_ns(iterator, oui, subns);
+
+			iterator->is_radiotap_ns = 0;
+			/* allow parsers to show this information */
+			iterator->this_arg_index =
+				IEEE80211_RADIOTAP_VENDOR_NAMESPACE;
+			iterator->this_arg_size += vnslen;
+			if ((unsigned long)iterator->this_arg +
+			    iterator->this_arg_size -
+			    (unsigned long)iterator->_rtheader >
+			    (unsigned long)(unsigned long)iterator->_max_length)
+				return -EINVAL;
+			hit = 1;
+			break;
+		case IEEE80211_RADIOTAP_RADIOTAP_NAMESPACE:
+			iterator->_bitmap_shifter >>= 1;
+			iterator->_arg_index++;
+
+			iterator->_reset_on_ext = 1;
+			iterator->current_namespace = &radiotap_ns;
+			iterator->is_radiotap_ns = 1;
+			break;
+		case IEEE80211_RADIOTAP_EXT:
+			/*
+			 * bit 31 was set, there is more
+			 * -- move to next u32 bitmap
+			 */
+			iterator->_bitmap_shifter =
+				get_unaligned_le32(iterator->_next_bitmap);
+			iterator->_next_bitmap++;
+			if (iterator->_reset_on_ext)
+				iterator->_arg_index = 0;
+			else
+				iterator->_arg_index++;
+			iterator->_reset_on_ext = 0;
+			break;
+		default:
+			/* we've got a hit! */
+			hit = 1;
+ next_entry:
+			iterator->_bitmap_shifter >>= 1;
+			iterator->_arg_index++;
+		}
 
 		/* if we found a valid arg earlier, return it now */
 		if (hit)
 			return 0;
 	}
-
-	/* we don't know how to handle any more args, we're done */
-	return -ENOENT;
 }
 EXPORT_SYMBOL(ieee80211_radiotap_iterator_next);
-- 
cgit v1.2.2


From 070bb5477fb4029131aad4941d7aaf0093db0c38 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes@sipsolutions.net>
Date: Wed, 3 Feb 2010 13:57:46 +0100
Subject: mac80211: don't probe if we have probe response

We can now easily determine whether we already
have probe response information for the BSS we
are asked to connect to, in which case there's
little point in probing the BSS again.

Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/mlme.c | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index 86c6ad1b058d..f437284830ef 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -27,10 +27,6 @@
 #include "rate.h"
 #include "led.h"
 
-#define IEEE80211_AUTH_TIMEOUT (HZ / 5)
-#define IEEE80211_AUTH_MAX_TRIES 3
-#define IEEE80211_ASSOC_TIMEOUT (HZ / 5)
-#define IEEE80211_ASSOC_MAX_TRIES 3
 #define IEEE80211_MAX_PROBE_TRIES 5
 
 /*
@@ -1844,7 +1840,11 @@ int ieee80211_mgd_auth(struct ieee80211_sub_if_data *sdata,
 	wk->probe_auth.algorithm = auth_alg;
 	wk->probe_auth.privacy = req->bss->capability & WLAN_CAPABILITY_PRIVACY;
 
-	wk->type = IEEE80211_WORK_DIRECT_PROBE;
+	/* if we already have a probe, don't probe again */
+	if (req->bss->proberesp_ies)
+		wk->type = IEEE80211_WORK_AUTH;
+	else
+		wk->type = IEEE80211_WORK_DIRECT_PROBE;
 	wk->chan = req->bss->channel;
 	wk->sdata = sdata;
 	wk->done = ieee80211_probe_auth_done;
-- 
cgit v1.2.2


From 34e895075e21be3e21e71d6317440d1ee7969ad0 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes@sipsolutions.net>
Date: Wed, 3 Feb 2010 13:59:58 +0100
Subject: mac80211: allow station add/remove to sleep

Many drivers would like to sleep during station
addition and removal, and currently have a high
complexity there from not being able to.

This introduces two new callbacks sta_add() and
sta_remove() that drivers can implement instead
of using sta_notify() and that can sleep, and
the new sta_add() callback is also allowed to
fail.

The reason we didn't do this previously is that
the IBSS code wants to insert stations from the
RX path, which is a tasklet, so cannot sleep.
This patch will keep the station allocation in
that path, but moves adding the station to the
driver out of line. Since the addition can now
fail, we can have IBSS peer structs the driver
rejected -- in that case we still talk to the
station but never tell the driver about it in
the control.sta pointer. If there will ever be
a driver that has a low limit on the number of
stations and that cannot talk to any stations
that are not known to it, we need to do come up
with a new strategy of handling larger IBSSs,
maybe quicker expiry or rejecting peers.

Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/cfg.c          |  23 +-
 net/mac80211/driver-ops.h   |  34 +++
 net/mac80211/driver-trace.h |  52 ++++
 net/mac80211/ibss.c         |  22 +-
 net/mac80211/ieee80211_i.h  |  18 +-
 net/mac80211/mesh_plink.c   |  17 +-
 net/mac80211/mlme.c         |  14 +-
 net/mac80211/pm.c           |  10 +-
 net/mac80211/rx.c           |   4 +-
 net/mac80211/sta_info.c     | 731 ++++++++++++++++++++++----------------------
 net/mac80211/sta_info.h     |  32 +-
 net/mac80211/tx.c           |   2 +-
 net/mac80211/util.c         |  16 +-
 13 files changed, 511 insertions(+), 464 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c
index facf233843e0..a362523d8eb7 100644
--- a/net/mac80211/cfg.c
+++ b/net/mac80211/cfg.c
@@ -747,9 +747,7 @@ static int ieee80211_add_station(struct wiphy *wiphy, struct net_device *dev,
 	layer2_update = sdata->vif.type == NL80211_IFTYPE_AP_VLAN ||
 		sdata->vif.type == NL80211_IFTYPE_AP;
 
-	rcu_read_lock();
-
-	err = sta_info_insert(sta);
+	err = sta_info_insert_rcu(sta);
 	if (err) {
 		rcu_read_unlock();
 		return err;
@@ -768,26 +766,13 @@ static int ieee80211_del_station(struct wiphy *wiphy, struct net_device *dev,
 {
 	struct ieee80211_local *local = wiphy_priv(wiphy);
 	struct ieee80211_sub_if_data *sdata;
-	struct sta_info *sta;
 
 	sdata = IEEE80211_DEV_TO_SUB_IF(dev);
 
-	if (mac) {
-		rcu_read_lock();
-
-		sta = sta_info_get_bss(sdata, mac);
-		if (!sta) {
-			rcu_read_unlock();
-			return -ENOENT;
-		}
-
-		sta_info_unlink(&sta);
-		rcu_read_unlock();
-
-		sta_info_destroy(sta);
-	} else
-		sta_info_flush(local, sdata);
+	if (mac)
+		return sta_info_destroy_addr_bss(sdata, mac);
 
+	sta_info_flush(local, sdata);
 	return 0;
 }
 
diff --git a/net/mac80211/driver-ops.h b/net/mac80211/driver-ops.h
index 6c31f38ac7f5..855e85b55061 100644
--- a/net/mac80211/driver-ops.h
+++ b/net/mac80211/driver-ops.h
@@ -243,6 +243,40 @@ static inline void drv_sta_notify(struct ieee80211_local *local,
 	trace_drv_sta_notify(local, sdata, cmd, sta);
 }
 
+static inline int drv_sta_add(struct ieee80211_local *local,
+			      struct ieee80211_sub_if_data *sdata,
+			      struct ieee80211_sta *sta)
+{
+	int ret = 0;
+
+	might_sleep();
+
+	if (local->ops->sta_add)
+		ret = local->ops->sta_add(&local->hw, &sdata->vif, sta);
+	else if (local->ops->sta_notify)
+		local->ops->sta_notify(&local->hw, &sdata->vif,
+					STA_NOTIFY_ADD, sta);
+
+	trace_drv_sta_add(local, sdata, sta, ret);
+
+	return ret;
+}
+
+static inline void drv_sta_remove(struct ieee80211_local *local,
+				  struct ieee80211_sub_if_data *sdata,
+				  struct ieee80211_sta *sta)
+{
+	might_sleep();
+
+	if (local->ops->sta_remove)
+		local->ops->sta_remove(&local->hw, &sdata->vif, sta);
+	else if (local->ops->sta_notify)
+		local->ops->sta_notify(&local->hw, &sdata->vif,
+					STA_NOTIFY_REMOVE, sta);
+
+	trace_drv_sta_remove(local, sdata, sta);
+}
+
 static inline int drv_conf_tx(struct ieee80211_local *local, u16 queue,
 			      const struct ieee80211_tx_queue_params *params)
 {
diff --git a/net/mac80211/driver-trace.h b/net/mac80211/driver-trace.h
index 502424b2538a..c984910bf275 100644
--- a/net/mac80211/driver-trace.h
+++ b/net/mac80211/driver-trace.h
@@ -545,6 +545,58 @@ TRACE_EVENT(drv_sta_notify,
 	)
 );
 
+TRACE_EVENT(drv_sta_add,
+	TP_PROTO(struct ieee80211_local *local,
+		 struct ieee80211_sub_if_data *sdata,
+		 struct ieee80211_sta *sta, int ret),
+
+	TP_ARGS(local, sdata, sta, ret),
+
+	TP_STRUCT__entry(
+		LOCAL_ENTRY
+		VIF_ENTRY
+		STA_ENTRY
+		__field(int, ret)
+	),
+
+	TP_fast_assign(
+		LOCAL_ASSIGN;
+		VIF_ASSIGN;
+		STA_ASSIGN;
+		__entry->ret = ret;
+	),
+
+	TP_printk(
+		LOCAL_PR_FMT  VIF_PR_FMT  STA_PR_FMT " ret:%d",
+		LOCAL_PR_ARG, VIF_PR_ARG, STA_PR_ARG, __entry->ret
+	)
+);
+
+TRACE_EVENT(drv_sta_remove,
+	TP_PROTO(struct ieee80211_local *local,
+		 struct ieee80211_sub_if_data *sdata,
+		 struct ieee80211_sta *sta),
+
+	TP_ARGS(local, sdata, sta),
+
+	TP_STRUCT__entry(
+		LOCAL_ENTRY
+		VIF_ENTRY
+		STA_ENTRY
+	),
+
+	TP_fast_assign(
+		LOCAL_ASSIGN;
+		VIF_ASSIGN;
+		STA_ASSIGN;
+	),
+
+	TP_printk(
+		LOCAL_PR_FMT  VIF_PR_FMT  STA_PR_FMT,
+		LOCAL_PR_ARG, VIF_PR_ARG, STA_PR_ARG
+	)
+);
+
 TRACE_EVENT(drv_conf_tx,
 	TP_PROTO(struct ieee80211_local *local, u16 queue,
 		 const struct ieee80211_tx_queue_params *params,
diff --git a/net/mac80211/ibss.c b/net/mac80211/ibss.c
index 85c4ba14c77d..f3e942486749 100644
--- a/net/mac80211/ibss.c
+++ b/net/mac80211/ibss.c
@@ -275,10 +275,12 @@ static void ieee80211_rx_bss_info(struct ieee80211_sub_if_data *sdata,
 				    (unsigned long long) supp_rates,
 				    (unsigned long long) sta->sta.supp_rates[band]);
 #endif
-		} else
-			ieee80211_ibss_add_sta(sdata, mgmt->bssid, mgmt->sa, supp_rates);
-
-		rcu_read_unlock();
+			rcu_read_unlock();
+		} else {
+			rcu_read_unlock();
+			ieee80211_ibss_add_sta(sdata, mgmt->bssid, mgmt->sa,
+					       supp_rates, GFP_KERNEL);
+		}
 	}
 
 	bss = ieee80211_bss_info_update(local, rx_status, mgmt, len, elems,
@@ -368,7 +370,8 @@ static void ieee80211_rx_bss_info(struct ieee80211_sub_if_data *sdata,
 		       sdata->name, mgmt->bssid);
 #endif
 		ieee80211_sta_join_ibss(sdata, bss);
-		ieee80211_ibss_add_sta(sdata, mgmt->bssid, mgmt->sa, supp_rates);
+		ieee80211_ibss_add_sta(sdata, mgmt->bssid, mgmt->sa,
+				       supp_rates, GFP_KERNEL);
 	}
 
  put_bss:
@@ -381,7 +384,8 @@ static void ieee80211_rx_bss_info(struct ieee80211_sub_if_data *sdata,
  * must be callable in atomic context.
  */
 struct sta_info *ieee80211_ibss_add_sta(struct ieee80211_sub_if_data *sdata,
-					u8 *bssid,u8 *addr, u32 supp_rates)
+					u8 *bssid,u8 *addr, u32 supp_rates,
+					gfp_t gfp)
 {
 	struct ieee80211_if_ibss *ifibss = &sdata->u.ibss;
 	struct ieee80211_local *local = sdata->local;
@@ -410,7 +414,7 @@ struct sta_info *ieee80211_ibss_add_sta(struct ieee80211_sub_if_data *sdata,
 	       wiphy_name(local->hw.wiphy), addr, sdata->name);
 #endif
 
-	sta = sta_info_alloc(sdata, addr, GFP_ATOMIC);
+	sta = sta_info_alloc(sdata, addr, gfp);
 	if (!sta)
 		return NULL;
 
@@ -422,9 +426,9 @@ struct sta_info *ieee80211_ibss_add_sta(struct ieee80211_sub_if_data *sdata,
 
 	rate_control_rate_init(sta);
 
+	/* If it fails, maybe we raced another insertion? */
 	if (sta_info_insert(sta))
-		return NULL;
-
+		return sta_info_get(sdata, addr);
 	return sta;
 }
 
diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index 3067fbd69d63..a5911191f224 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -688,15 +688,18 @@ struct ieee80211_local {
 
 	/* Station data */
 	/*
-	 * The lock only protects the list, hash, timer and counter
-	 * against manipulation, reads are done in RCU. Additionally,
-	 * the lock protects each BSS's TIM bitmap.
+	 * The mutex only protects the list and counter,
+	 * reads are done in RCU.
+	 * Additionally, the lock protects the hash table,
+	 * the pending list and each BSS's TIM bitmap.
 	 */
+	struct mutex sta_mtx;
 	spinlock_t sta_lock;
 	unsigned long num_sta;
-	struct list_head sta_list;
+	struct list_head sta_list, sta_pending_list;
 	struct sta_info *sta_hash[STA_HASH_SIZE];
 	struct timer_list sta_cleanup;
+	struct work_struct sta_finish_work;
 	int sta_generation;
 
 	struct sk_buff_head pending[IEEE80211_MAX_QUEUES];
@@ -770,10 +773,6 @@ struct ieee80211_local {
 	     assoc_led_name[32], radio_led_name[32];
 #endif
 
-#ifdef CONFIG_MAC80211_DEBUGFS
-	struct work_struct sta_debugfs_add;
-#endif
-
 #ifdef CONFIG_MAC80211_DEBUG_COUNTERS
 	/* TX/RX handler statistics */
 	unsigned int tx_handlers_drop;
@@ -985,7 +984,8 @@ void ieee80211_ibss_setup_sdata(struct ieee80211_sub_if_data *sdata);
 ieee80211_rx_result
 ieee80211_ibss_rx_mgmt(struct ieee80211_sub_if_data *sdata, struct sk_buff *skb);
 struct sta_info *ieee80211_ibss_add_sta(struct ieee80211_sub_if_data *sdata,
-					u8 *bssid, u8 *addr, u32 supp_rates);
+					u8 *bssid, u8 *addr, u32 supp_rates,
+					gfp_t gfp);
 int ieee80211_ibss_join(struct ieee80211_sub_if_data *sdata,
 			struct cfg80211_ibss_params *params);
 int ieee80211_ibss_leave(struct ieee80211_sub_if_data *sdata);
diff --git a/net/mac80211/mesh_plink.c b/net/mac80211/mesh_plink.c
index 7985e5150898..bc4e20e57ff5 100644
--- a/net/mac80211/mesh_plink.c
+++ b/net/mac80211/mesh_plink.c
@@ -102,7 +102,7 @@ static struct sta_info *mesh_plink_alloc(struct ieee80211_sub_if_data *sdata,
 	if (local->num_sta >= MESH_MAX_PLINKS)
 		return NULL;
 
-	sta = sta_info_alloc(sdata, hw_addr, GFP_ATOMIC);
+	sta = sta_info_alloc(sdata, hw_addr, GFP_KERNEL);
 	if (!sta)
 		return NULL;
 
@@ -236,12 +236,12 @@ void mesh_neighbour_update(u8 *hw_addr, u32 rates, struct ieee80211_sub_if_data
 
 	sta = sta_info_get(sdata, hw_addr);
 	if (!sta) {
+		rcu_read_unlock();
+
 		sta = mesh_plink_alloc(sdata, hw_addr, rates);
-		if (!sta) {
-			rcu_read_unlock();
+		if (!sta)
 			return;
-		}
-		if (sta_info_insert(sta)) {
+		if (sta_info_insert_rcu(sta)) {
 			rcu_read_unlock();
 			return;
 		}
@@ -485,9 +485,11 @@ void mesh_rx_plink_frame(struct ieee80211_sub_if_data *sdata, struct ieee80211_m
 	} else if (!sta) {
 		/* ftype == PLINK_OPEN */
 		u32 rates;
+
+		rcu_read_unlock();
+
 		if (!mesh_plink_free_count(sdata)) {
 			mpl_dbg("Mesh plink error: no more free plinks\n");
-			rcu_read_unlock();
 			return;
 		}
 
@@ -495,10 +497,9 @@ void mesh_rx_plink_frame(struct ieee80211_sub_if_data *sdata, struct ieee80211_m
 		sta = mesh_plink_alloc(sdata, mgmt->sa, rates);
 		if (!sta) {
 			mpl_dbg("Mesh plink error: plink table full\n");
-			rcu_read_unlock();
 			return;
 		}
-		if (sta_info_insert(sta)) {
+		if (sta_info_insert_rcu(sta)) {
 			rcu_read_unlock();
 			return;
 		}
diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index f437284830ef..ac9429e8d72b 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -822,19 +822,7 @@ static void ieee80211_set_disassoc(struct ieee80211_sub_if_data *sdata)
 	changed |= BSS_CHANGED_BSSID;
 	ieee80211_bss_info_change_notify(sdata, changed);
 
-	rcu_read_lock();
-
-	sta = sta_info_get(sdata, bssid);
-	if (!sta) {
-		rcu_read_unlock();
-		return;
-	}
-
-	sta_info_unlink(&sta);
-
-	rcu_read_unlock();
-
-	sta_info_destroy(sta);
+	sta_info_destroy_addr(sdata, bssid);
 }
 
 void ieee80211_sta_rx_notify(struct ieee80211_sub_if_data *sdata,
diff --git a/net/mac80211/pm.c b/net/mac80211/pm.c
index 47f818959ad7..0e64484e861c 100644
--- a/net/mac80211/pm.c
+++ b/net/mac80211/pm.c
@@ -11,7 +11,6 @@ int __ieee80211_suspend(struct ieee80211_hw *hw)
 	struct ieee80211_local *local = hw_to_local(hw);
 	struct ieee80211_sub_if_data *sdata;
 	struct sta_info *sta;
-	unsigned long flags;
 
 	ieee80211_scan_cancel(local);
 
@@ -55,22 +54,21 @@ int __ieee80211_suspend(struct ieee80211_hw *hw)
 	rcu_read_unlock();
 
 	/* remove STAs */
-	spin_lock_irqsave(&local->sta_lock, flags);
+	mutex_lock(&local->sta_mtx);
 	list_for_each_entry(sta, &local->sta_list, list) {
-		if (local->ops->sta_notify) {
+		if (sta->uploaded) {
 			sdata = sta->sdata;
 			if (sdata->vif.type == NL80211_IFTYPE_AP_VLAN)
 				sdata = container_of(sdata->bss,
 					     struct ieee80211_sub_if_data,
 					     u.ap);
 
-			drv_sta_notify(local, sdata, STA_NOTIFY_REMOVE,
-				       &sta->sta);
+			drv_sta_remove(local, sdata, &sta->sta);
 		}
 
 		mesh_plink_quiesce(sta);
 	}
-	spin_unlock_irqrestore(&local->sta_lock, flags);
+	mutex_unlock(&local->sta_mtx);
 
 	/* remove all interfaces */
 	list_for_each_entry(sdata, &local->interfaces, list) {
diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c
index 5709307fcb9b..01dba7618397 100644
--- a/net/mac80211/rx.c
+++ b/net/mac80211/rx.c
@@ -2244,8 +2244,8 @@ static int prepare_for_handlers(struct ieee80211_sub_if_data *sdata,
 				rate_idx = 0; /* TODO: HT rates */
 			else
 				rate_idx = status->rate_idx;
-			rx->sta = ieee80211_ibss_add_sta(sdata, bssid, hdr->addr2,
-				BIT(rate_idx));
+			rx->sta = ieee80211_ibss_add_sta(sdata, bssid,
+					hdr->addr2, BIT(rate_idx), GFP_ATOMIC);
 		}
 		break;
 	case NL80211_IFTYPE_MESH_POINT:
diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c
index f735826f055c..211c475f73c6 100644
--- a/net/mac80211/sta_info.c
+++ b/net/mac80211/sta_info.c
@@ -32,49 +32,33 @@
  * for faster lookup and a list for iteration. They are managed using
  * RCU, i.e. access to the list and hash table is protected by RCU.
  *
- * Upon allocating a STA info structure with sta_info_alloc(), the caller owns
- * that structure. It must then either destroy it using sta_info_destroy()
- * (which is pretty useless) or insert it into the hash table using
- * sta_info_insert() which demotes the reference from ownership to a regular
- * RCU-protected reference; if the function is called without protection by an
- * RCU critical section the reference is instantly invalidated. Note that the
- * caller may not do much with the STA info before inserting it, in particular,
- * it may not start any mesh peer link management or add encryption keys.
+ * Upon allocating a STA info structure with sta_info_alloc(), the caller
+ * owns that structure. It must then insert it into the hash table using
+ * either sta_info_insert() or sta_info_insert_rcu(); only in the latter
+ * case (which acquires an rcu read section but must not be called from
+ * within one) will the pointer still be valid after the call. Note that
+ * the caller may not do much with the STA info before inserting it, in
+ * particular, it may not start any mesh peer link management or add
+ * encryption keys.
  *
  * When the insertion fails (sta_info_insert()) returns non-zero), the
  * structure will have been freed by sta_info_insert()!
  *
- * sta entries are added by mac80211 when you establish a link with a
+ * Station entries are added by mac80211 when you establish a link with a
  * peer. This means different things for the different type of interfaces
  * we support. For a regular station this mean we add the AP sta when we
  * receive an assocation response from the AP. For IBSS this occurs when
- * we receive a probe response or a beacon from target IBSS network. For
- * WDS we add the sta for the peer imediately upon device open. When using
- * AP mode we add stations for each respective station upon request from
- * userspace through nl80211.
+ * get to know about a peer on the same IBSS. For WDS we add the sta for
+ * the peer imediately upon device open. When using AP mode we add stations
+ * for each respective station upon request from userspace through nl80211.
  *
- * Because there are debugfs entries for each station, and adding those
- * must be able to sleep, it is also possible to "pin" a station entry,
- * that means it can be removed from the hash table but not be freed.
- * See the comment in __sta_info_unlink() for more information, this is
- * an internal capability only.
+ * In order to remove a STA info structure, various sta_info_destroy_*()
+ * calls are available.
  *
- * In order to remove a STA info structure, the caller needs to first
- * unlink it (sta_info_unlink()) from the list and hash tables and
- * then destroy it; sta_info_destroy() will wait for an RCU grace period
- * to elapse before actually freeing it. Due to the pinning and the
- * possibility of multiple callers trying to remove the same STA info at
- * the same time, sta_info_unlink() can clear the STA info pointer it is
- * passed to indicate that the STA info is owned by somebody else now.
- *
- * If sta_info_unlink() did not clear the pointer then the caller owns
- * the STA info structure now and is responsible of destroying it with
- * a call to sta_info_destroy().
- *
- * In all other cases, there is no concept of ownership on a STA entry,
- * each structure is owned by the global hash table/list until it is
- * removed. All users of the structure need to be RCU protected so that
- * the structure won't be freed before they are done using it.
+ * There is no concept of ownership on a STA entry, each structure is
+ * owned by the global hash table/list until it is removed. All users of
+ * the structure need to be RCU protected so that the structure won't be
+ * freed before they are done using it.
  */
 
 /* Caller must hold local->sta_lock */
@@ -185,101 +169,6 @@ static void __sta_info_free(struct ieee80211_local *local,
 	kfree(sta);
 }
 
-void sta_info_destroy(struct sta_info *sta)
-{
-	struct ieee80211_local *local;
-	struct sk_buff *skb;
-	int i;
-
-	might_sleep();
-
-	if (!sta)
-		return;
-
-	local = sta->local;
-
-	cancel_work_sync(&sta->drv_unblock_wk);
-
-	rate_control_remove_sta_debugfs(sta);
-	ieee80211_sta_debugfs_remove(sta);
-
-#ifdef CONFIG_MAC80211_MESH
-	if (ieee80211_vif_is_mesh(&sta->sdata->vif))
-		mesh_plink_deactivate(sta);
-#endif
-
-	/*
-	 * We have only unlinked the key, and actually destroying it
-	 * may mean it is removed from hardware which requires that
-	 * the key->sta pointer is still valid, so flush the key todo
-	 * list here.
-	 *
-	 * ieee80211_key_todo() will synchronize_rcu() so after this
-	 * nothing can reference this sta struct any more.
-	 */
-	ieee80211_key_todo();
-
-#ifdef CONFIG_MAC80211_MESH
-	if (ieee80211_vif_is_mesh(&sta->sdata->vif))
-		del_timer_sync(&sta->plink_timer);
-#endif
-
-	while ((skb = skb_dequeue(&sta->ps_tx_buf)) != NULL) {
-		local->total_ps_buffered--;
-		dev_kfree_skb_any(skb);
-	}
-
-	while ((skb = skb_dequeue(&sta->tx_filtered)) != NULL)
-		dev_kfree_skb_any(skb);
-
-	for (i = 0; i <  STA_TID_NUM; i++) {
-		struct tid_ampdu_rx *tid_rx;
-		struct tid_ampdu_tx *tid_tx;
-
-		spin_lock_bh(&sta->lock);
-		tid_rx = sta->ampdu_mlme.tid_rx[i];
-		/* Make sure timer won't free the tid_rx struct, see below */
-		if (tid_rx)
-			tid_rx->shutdown = true;
-
-		spin_unlock_bh(&sta->lock);
-
-		/*
-		 * Outside spinlock - shutdown is true now so that the timer
-		 * won't free tid_rx, we have to do that now. Can't let the
-		 * timer do it because we have to sync the timer outside the
-		 * lock that it takes itself.
-		 */
-		if (tid_rx) {
-			del_timer_sync(&tid_rx->session_timer);
-			kfree(tid_rx);
-		}
-
-		/*
-		 * No need to do such complications for TX agg sessions, the
-		 * path leading to freeing the tid_tx struct goes via a call
-		 * from the driver, and thus needs to look up the sta struct
-		 * again, which cannot be found when we get here. Hence, we
-		 * just need to delete the timer and free the aggregation
-		 * info; we won't be telling the peer about it then but that
-		 * doesn't matter if we're not talking to it again anyway.
-		 */
-		tid_tx = sta->ampdu_mlme.tid_tx[i];
-		if (tid_tx) {
-			del_timer_sync(&tid_tx->addba_resp_timer);
-			/*
-			 * STA removed while aggregation session being
-			 * started? Bit odd, but purge frames anyway.
-			 */
-			skb_queue_purge(&tid_tx->pending);
-			kfree(tid_tx);
-		}
-	}
-
-	__sta_info_free(local, sta);
-}
-
-
 /* Caller must hold local->sta_lock */
 static void sta_info_hash_add(struct ieee80211_local *local,
 			      struct sta_info *sta)
@@ -376,7 +265,7 @@ struct sta_info *sta_info_alloc(struct ieee80211_sub_if_data *sdata,
 	return sta;
 }
 
-int sta_info_insert(struct sta_info *sta)
+static int sta_info_finish_insert(struct sta_info *sta, bool async)
 {
 	struct ieee80211_local *local = sta->local;
 	struct ieee80211_sub_if_data *sdata = sta->sdata;
@@ -384,6 +273,91 @@ int sta_info_insert(struct sta_info *sta)
 	unsigned long flags;
 	int err = 0;
 
+	WARN_ON(!mutex_is_locked(&local->sta_mtx));
+
+	/* notify driver */
+	if (sdata->vif.type == NL80211_IFTYPE_AP_VLAN)
+		sdata = container_of(sdata->bss,
+				     struct ieee80211_sub_if_data,
+				     u.ap);
+	err = drv_sta_add(local, sdata, &sta->sta);
+	if (err) {
+		if (!async)
+			return err;
+		printk(KERN_DEBUG "%s: failed to add IBSS STA %pM to driver (%d)"
+				  " - keeping it anyway.\n",
+		       sdata->name, sta->sta.addr, err);
+	} else {
+		sta->uploaded = true;
+#ifdef CONFIG_MAC80211_VERBOSE_DEBUG
+		if (async)
+			printk(KERN_DEBUG "%s: Finished adding IBSS STA %pM\n",
+			       wiphy_name(local->hw.wiphy), sta->sta.addr);
+#endif
+	}
+
+	sdata = sta->sdata;
+
+	if (!async) {
+		local->num_sta++;
+		local->sta_generation++;
+		smp_mb();
+
+		/* make the station visible */
+		spin_lock_irqsave(&local->sta_lock, flags);
+		sta_info_hash_add(local, sta);
+		spin_unlock_irqrestore(&local->sta_lock, flags);
+	}
+
+	list_add(&sta->list, &local->sta_list);
+
+	ieee80211_sta_debugfs_add(sta);
+	rate_control_add_sta_debugfs(sta);
+
+	sinfo.filled = 0;
+	sinfo.generation = local->sta_generation;
+	cfg80211_new_sta(sdata->dev, sta->sta.addr, &sinfo, GFP_KERNEL);
+
+
+	return 0;
+}
+
+static void sta_info_finish_pending(struct ieee80211_local *local)
+{
+	struct sta_info *sta;
+	unsigned long flags;
+
+	spin_lock_irqsave(&local->sta_lock, flags);
+	while (!list_empty(&local->sta_pending_list)) {
+		sta = list_first_entry(&local->sta_pending_list,
+				       struct sta_info, list);
+		list_del(&sta->list);
+		spin_unlock_irqrestore(&local->sta_lock, flags);
+
+		sta_info_finish_insert(sta, true);
+
+		spin_lock_irqsave(&local->sta_lock, flags);
+	}
+	spin_unlock_irqrestore(&local->sta_lock, flags);
+}
+
+static void sta_info_finish_work(struct work_struct *work)
+{
+	struct ieee80211_local *local =
+		container_of(work, struct ieee80211_local, sta_finish_work);
+
+	mutex_lock(&local->sta_mtx);
+	sta_info_finish_pending(local);
+	mutex_unlock(&local->sta_mtx);
+}
+
+int sta_info_insert_rcu(struct sta_info *sta) __acquires(RCU)
+{
+	struct ieee80211_local *local = sta->local;
+	struct ieee80211_sub_if_data *sdata = sta->sdata;
+	unsigned long flags;
+	int err = 0;
+
 	/*
 	 * Can't be a WARN_ON because it can be triggered through a race:
 	 * something inserts a STA (on one CPU) without holding the RTNL
@@ -391,36 +365,87 @@ int sta_info_insert(struct sta_info *sta)
 	 */
 	if (unlikely(!ieee80211_sdata_running(sdata))) {
 		err = -ENETDOWN;
+		rcu_read_lock();
 		goto out_free;
 	}
 
 	if (WARN_ON(compare_ether_addr(sta->sta.addr, sdata->vif.addr) == 0 ||
 		    is_multicast_ether_addr(sta->sta.addr))) {
 		err = -EINVAL;
+		rcu_read_lock();
 		goto out_free;
 	}
 
+	/*
+	 * In ad-hoc mode, we sometimes need to insert stations
+	 * from tasklet context from the RX path. To avoid races,
+	 * always do so in that case -- see the comment below.
+	 */
+	if (sdata->vif.type == NL80211_IFTYPE_ADHOC) {
+		spin_lock_irqsave(&local->sta_lock, flags);
+		/* check if STA exists already */
+		if (sta_info_get_bss(sdata, sta->sta.addr)) {
+			spin_unlock_irqrestore(&local->sta_lock, flags);
+			rcu_read_lock();
+			err = -EEXIST;
+			goto out_free;
+		}
+
+		local->num_sta++;
+		local->sta_generation++;
+		smp_mb();
+		sta_info_hash_add(local, sta);
+
+		list_add_tail(&sta->list, &local->sta_pending_list);
+
+		rcu_read_lock();
+		spin_unlock_irqrestore(&local->sta_lock, flags);
+
+#ifdef CONFIG_MAC80211_VERBOSE_DEBUG
+		printk(KERN_DEBUG "%s: Added IBSS STA %pM\n",
+		       wiphy_name(local->hw.wiphy), sta->sta.addr);
+#endif /* CONFIG_MAC80211_VERBOSE_DEBUG */
+
+		ieee80211_queue_work(&local->hw, &local->sta_finish_work);
+
+		return 0;
+	}
+
+	/*
+	 * On first glance, this will look racy, because the code
+	 * below this point, which inserts a station with sleeping,
+	 * unlocks the sta_lock between checking existence in the
+	 * hash table and inserting into it.
+	 *
+	 * However, it is not racy against itself because it keeps
+	 * the mutex locked. It still seems to race against the
+	 * above code that atomically inserts the station... That,
+	 * however, is not true because the above code can only
+	 * be invoked for IBSS interfaces, and the below code will
+	 * not be -- and the two do not race against each other as
+	 * the hash table also keys off the interface.
+	 */
+
+	might_sleep();
+
+	mutex_lock(&local->sta_mtx);
+
 	spin_lock_irqsave(&local->sta_lock, flags);
 	/* check if STA exists already */
-	if (sta_info_get(sdata, sta->sta.addr)) {
+	if (sta_info_get_bss(sdata, sta->sta.addr)) {
 		spin_unlock_irqrestore(&local->sta_lock, flags);
+		rcu_read_lock();
 		err = -EEXIST;
 		goto out_free;
 	}
-	list_add(&sta->list, &local->sta_list);
-	local->sta_generation++;
-	local->num_sta++;
-	sta_info_hash_add(local, sta);
 
-	/* notify driver */
-	if (local->ops->sta_notify) {
-		if (sdata->vif.type == NL80211_IFTYPE_AP_VLAN)
-			sdata = container_of(sdata->bss,
-					     struct ieee80211_sub_if_data,
-					     u.ap);
+	spin_unlock_irqrestore(&local->sta_lock, flags);
 
-		drv_sta_notify(local, sdata, STA_NOTIFY_ADD, &sta->sta);
-		sdata = sta->sdata;
+	err = sta_info_finish_insert(sta, false);
+	if (err) {
+		mutex_unlock(&local->sta_mtx);
+		rcu_read_lock();
+		goto out_free;
 	}
 
 #ifdef CONFIG_MAC80211_VERBOSE_DEBUG
@@ -428,22 +453,9 @@ int sta_info_insert(struct sta_info *sta)
 	       wiphy_name(local->hw.wiphy), sta->sta.addr);
 #endif /* CONFIG_MAC80211_VERBOSE_DEBUG */
 
-	spin_unlock_irqrestore(&local->sta_lock, flags);
-
-	sinfo.filled = 0;
-	sinfo.generation = local->sta_generation;
-	cfg80211_new_sta(sdata->dev, sta->sta.addr, &sinfo, GFP_ATOMIC);
-
-#ifdef CONFIG_MAC80211_DEBUGFS
-	/*
-	 * Debugfs entry adding might sleep, so schedule process
-	 * context task for adding entry for STAs that do not yet
-	 * have one.
-	 * NOTE: due to auto-freeing semantics this may only be done
-	 *       if the insertion is successful!
-	 */
-	schedule_work(&local->sta_debugfs_add);
-#endif
+	/* move reference to rcu-protected */
+	rcu_read_lock();
+	mutex_unlock(&local->sta_mtx);
 
 	if (ieee80211_vif_is_mesh(&sdata->vif))
 		mesh_accept_plinks_update(sdata);
@@ -455,6 +467,15 @@ int sta_info_insert(struct sta_info *sta)
 	return err;
 }
 
+int sta_info_insert(struct sta_info *sta)
+{
+	int err = sta_info_insert_rcu(sta);
+
+	rcu_read_unlock();
+
+	return err;
+}
+
 static inline void __bss_tim_set(struct ieee80211_if_ap *bss, u16 aid)
 {
 	/*
@@ -523,108 +544,6 @@ void sta_info_clear_tim_bit(struct sta_info *sta)
 	spin_unlock_irqrestore(&sta->local->sta_lock, flags);
 }
 
-static void __sta_info_unlink(struct sta_info **sta)
-{
-	struct ieee80211_local *local = (*sta)->local;
-	struct ieee80211_sub_if_data *sdata = (*sta)->sdata;
-	/*
-	 * pull caller's reference if we're already gone.
-	 */
-	if (sta_info_hash_del(local, *sta)) {
-		*sta = NULL;
-		return;
-	}
-
-	if ((*sta)->key) {
-		ieee80211_key_free((*sta)->key);
-		WARN_ON((*sta)->key);
-	}
-
-	list_del(&(*sta)->list);
-	(*sta)->dead = true;
-
-	if (test_and_clear_sta_flags(*sta,
-				WLAN_STA_PS_STA | WLAN_STA_PS_DRIVER)) {
-		BUG_ON(!sdata->bss);
-
-		atomic_dec(&sdata->bss->num_sta_ps);
-		__sta_info_clear_tim_bit(sdata->bss, *sta);
-	}
-
-	local->num_sta--;
-	local->sta_generation++;
-
-	if (sdata->vif.type == NL80211_IFTYPE_AP_VLAN)
-		rcu_assign_pointer(sdata->u.vlan.sta, NULL);
-
-	if (local->ops->sta_notify) {
-		if (sdata->vif.type == NL80211_IFTYPE_AP_VLAN)
-			sdata = container_of(sdata->bss,
-					     struct ieee80211_sub_if_data,
-					     u.ap);
-
-		drv_sta_notify(local, sdata, STA_NOTIFY_REMOVE,
-			       &(*sta)->sta);
-		sdata = (*sta)->sdata;
-	}
-
-	if (ieee80211_vif_is_mesh(&sdata->vif)) {
-		mesh_accept_plinks_update(sdata);
-#ifdef CONFIG_MAC80211_MESH
-		del_timer(&(*sta)->plink_timer);
-#endif
-	}
-
-#ifdef CONFIG_MAC80211_VERBOSE_DEBUG
-	printk(KERN_DEBUG "%s: Removed STA %pM\n",
-	       wiphy_name(local->hw.wiphy), (*sta)->sta.addr);
-#endif /* CONFIG_MAC80211_VERBOSE_DEBUG */
-
-	/*
-	 * Finally, pull caller's reference if the STA is pinned by the
-	 * task that is adding the debugfs entries. In that case, we
-	 * leave the STA "to be freed".
-	 *
-	 * The rules are not trivial, but not too complex either:
-	 *  (1) pin_status is only modified under the sta_lock
-	 *  (2) STAs may only be pinned under the RTNL so that
-	 *	sta_info_flush() is guaranteed to actually destroy
-	 *	all STAs that are active for a given interface, this
-	 *	is required for correctness because otherwise we
-	 *	could notify a driver that an interface is going
-	 *	away and only after that (!) notify it about a STA
-	 *	on that interface going away.
-	 *  (3) sta_info_debugfs_add_work() will set the status
-	 *	to PINNED when it found an item that needs a new
-	 *	debugfs directory created. In that case, that item
-	 *	must not be freed although all *RCU* users are done
-	 *	with it. Hence, we tell the caller of _unlink()
-	 *	that the item is already gone (as can happen when
-	 *	two tasks try to unlink/destroy at the same time)
-	 *  (4) We set the pin_status to DESTROY here when we
-	 *	find such an item.
-	 *  (5) sta_info_debugfs_add_work() will reset the pin_status
-	 *	from PINNED to NORMAL when it is done with the item,
-	 *	but will check for DESTROY before resetting it in
-	 *	which case it will free the item.
-	 */
-	if ((*sta)->pin_status == STA_INFO_PIN_STAT_PINNED) {
-		(*sta)->pin_status = STA_INFO_PIN_STAT_DESTROY;
-		*sta = NULL;
-		return;
-	}
-}
-
-void sta_info_unlink(struct sta_info **sta)
-{
-	struct ieee80211_local *local = (*sta)->local;
-	unsigned long flags;
-
-	spin_lock_irqsave(&local->sta_lock, flags);
-	__sta_info_unlink(sta);
-	spin_unlock_irqrestore(&local->sta_lock, flags);
-}
-
 static int sta_info_buffer_expired(struct sta_info *sta,
 				   struct sk_buff *skb)
 {
@@ -681,109 +600,209 @@ static void sta_info_cleanup_expire_buffered(struct ieee80211_local *local,
 	}
 }
 
-
-static void sta_info_cleanup(unsigned long data)
+static int __must_check __sta_info_destroy(struct sta_info *sta)
 {
-	struct ieee80211_local *local = (struct ieee80211_local *) data;
-	struct sta_info *sta;
+	struct ieee80211_local *local;
+	struct ieee80211_sub_if_data *sdata;
+	struct sk_buff *skb;
+	unsigned long flags;
+	int ret, i;
 
-	rcu_read_lock();
-	list_for_each_entry_rcu(sta, &local->sta_list, list)
-		sta_info_cleanup_expire_buffered(local, sta);
-	rcu_read_unlock();
+	might_sleep();
 
-	if (local->quiescing)
-		return;
+	if (!sta)
+		return -ENOENT;
 
-	local->sta_cleanup.expires =
-		round_jiffies(jiffies + STA_INFO_CLEANUP_INTERVAL);
-	add_timer(&local->sta_cleanup);
-}
+	local = sta->local;
+	sdata = sta->sdata;
 
-#ifdef CONFIG_MAC80211_DEBUGFS
-/*
- * See comment in __sta_info_unlink,
- * caller must hold local->sta_lock.
- */
-static void __sta_info_pin(struct sta_info *sta)
-{
-	WARN_ON(sta->pin_status != STA_INFO_PIN_STAT_NORMAL);
-	sta->pin_status = STA_INFO_PIN_STAT_PINNED;
+	spin_lock_irqsave(&local->sta_lock, flags);
+	ret = sta_info_hash_del(local, sta);
+	/* this might still be the pending list ... which is fine */
+	if (!ret)
+		list_del(&sta->list);
+	spin_unlock_irqrestore(&local->sta_lock, flags);
+	if (ret)
+		return ret;
+
+	if (sta->key) {
+		ieee80211_key_free(sta->key);
+		/*
+		 * We have only unlinked the key, and actually destroying it
+		 * may mean it is removed from hardware which requires that
+		 * the key->sta pointer is still valid, so flush the key todo
+		 * list here.
+		 *
+		 * ieee80211_key_todo() will synchronize_rcu() so after this
+		 * nothing can reference this sta struct any more.
+		 */
+		ieee80211_key_todo();
+
+		WARN_ON(sta->key);
+	}
+
+	sta->dead = true;
+
+	if (test_and_clear_sta_flags(sta,
+				WLAN_STA_PS_STA | WLAN_STA_PS_DRIVER)) {
+		BUG_ON(!sdata->bss);
+
+		atomic_dec(&sdata->bss->num_sta_ps);
+		__sta_info_clear_tim_bit(sdata->bss, sta);
+	}
+
+	local->num_sta--;
+	local->sta_generation++;
+
+	if (sdata->vif.type == NL80211_IFTYPE_AP_VLAN)
+		rcu_assign_pointer(sdata->u.vlan.sta, NULL);
+
+	if (sta->uploaded) {
+		if (sdata->vif.type == NL80211_IFTYPE_AP_VLAN)
+			sdata = container_of(sdata->bss,
+					     struct ieee80211_sub_if_data,
+					     u.ap);
+		drv_sta_remove(local, sdata, &sta->sta);
+		sdata = sta->sdata;
+	}
+
+#ifdef CONFIG_MAC80211_MESH
+	if (ieee80211_vif_is_mesh(&sdata->vif)) {
+		mesh_accept_plinks_update(sdata);
+		del_timer(&sta->plink_timer);
+	}
+#endif
+
+#ifdef CONFIG_MAC80211_VERBOSE_DEBUG
+	printk(KERN_DEBUG "%s: Removed STA %pM\n",
+	       wiphy_name(local->hw.wiphy), sta->sta.addr);
+#endif /* CONFIG_MAC80211_VERBOSE_DEBUG */
+	cancel_work_sync(&sta->drv_unblock_wk);
+
+	rate_control_remove_sta_debugfs(sta);
+	ieee80211_sta_debugfs_remove(sta);
+
+#ifdef CONFIG_MAC80211_MESH
+	if (ieee80211_vif_is_mesh(&sta->sdata->vif)) {
+		mesh_plink_deactivate(sta);
+		del_timer_sync(&sta->plink_timer);
+	}
+#endif
+
+	while ((skb = skb_dequeue(&sta->ps_tx_buf)) != NULL) {
+		local->total_ps_buffered--;
+		dev_kfree_skb_any(skb);
+	}
+
+	while ((skb = skb_dequeue(&sta->tx_filtered)) != NULL)
+		dev_kfree_skb_any(skb);
+
+	for (i = 0; i <  STA_TID_NUM; i++) {
+		struct tid_ampdu_rx *tid_rx;
+		struct tid_ampdu_tx *tid_tx;
+
+		spin_lock_bh(&sta->lock);
+		tid_rx = sta->ampdu_mlme.tid_rx[i];
+		/* Make sure timer won't free the tid_rx struct, see below */
+		if (tid_rx)
+			tid_rx->shutdown = true;
+
+		spin_unlock_bh(&sta->lock);
+
+		/*
+		 * Outside spinlock - shutdown is true now so that the timer
+		 * won't free tid_rx, we have to do that now. Can't let the
+		 * timer do it because we have to sync the timer outside the
+		 * lock that it takes itself.
+		 */
+		if (tid_rx) {
+			del_timer_sync(&tid_rx->session_timer);
+			kfree(tid_rx);
+		}
+
+		/*
+		 * No need to do such complications for TX agg sessions, the
+		 * path leading to freeing the tid_tx struct goes via a call
+		 * from the driver, and thus needs to look up the sta struct
+		 * again, which cannot be found when we get here. Hence, we
+		 * just need to delete the timer and free the aggregation
+		 * info; we won't be telling the peer about it then but that
+		 * doesn't matter if we're not talking to it again anyway.
+		 */
+		tid_tx = sta->ampdu_mlme.tid_tx[i];
+		if (tid_tx) {
+			del_timer_sync(&tid_tx->addba_resp_timer);
+			/*
+			 * STA removed while aggregation session being
+			 * started? Bit odd, but purge frames anyway.
+			 */
+			skb_queue_purge(&tid_tx->pending);
+			kfree(tid_tx);
+		}
+	}
+
+	__sta_info_free(local, sta);
+
+	return 0;
 }
 
-/*
- * See comment in __sta_info_unlink, returns sta if it
- * needs to be destroyed.
- */
-static struct sta_info *__sta_info_unpin(struct sta_info *sta)
+int sta_info_destroy_addr(struct ieee80211_sub_if_data *sdata, const u8 *addr)
 {
-	struct sta_info *ret = NULL;
-	unsigned long flags;
+	struct sta_info *sta;
+	int ret;
 
-	spin_lock_irqsave(&sta->local->sta_lock, flags);
-	WARN_ON(sta->pin_status != STA_INFO_PIN_STAT_DESTROY &&
-		sta->pin_status != STA_INFO_PIN_STAT_PINNED);
-	if (sta->pin_status == STA_INFO_PIN_STAT_DESTROY)
-		ret = sta;
-	sta->pin_status = STA_INFO_PIN_STAT_NORMAL;
-	spin_unlock_irqrestore(&sta->local->sta_lock, flags);
+	mutex_lock(&sdata->local->sta_mtx);
+	sta = sta_info_get(sdata, addr);
+	ret = __sta_info_destroy(sta);
+	mutex_unlock(&sdata->local->sta_mtx);
 
 	return ret;
 }
 
-static void sta_info_debugfs_add_work(struct work_struct *work)
+int sta_info_destroy_addr_bss(struct ieee80211_sub_if_data *sdata,
+			      const u8 *addr)
 {
-	struct ieee80211_local *local =
-		container_of(work, struct ieee80211_local, sta_debugfs_add);
-	struct sta_info *sta, *tmp;
-	unsigned long flags;
+	struct sta_info *sta;
+	int ret;
 
-	/* We need to keep the RTNL across the whole pinned status. */
-	rtnl_lock();
-	while (1) {
-		sta = NULL;
+	mutex_lock(&sdata->local->sta_mtx);
+	sta = sta_info_get_bss(sdata, addr);
+	ret = __sta_info_destroy(sta);
+	mutex_unlock(&sdata->local->sta_mtx);
 
-		spin_lock_irqsave(&local->sta_lock, flags);
-		list_for_each_entry(tmp, &local->sta_list, list) {
-			/*
-			 * debugfs.add_has_run will be set by
-			 * ieee80211_sta_debugfs_add regardless
-			 * of what else it does.
-			 */
-			if (!tmp->debugfs.add_has_run) {
-				sta = tmp;
-				__sta_info_pin(sta);
-				break;
-			}
-		}
-		spin_unlock_irqrestore(&local->sta_lock, flags);
+	return ret;
+}
 
-		if (!sta)
-			break;
+static void sta_info_cleanup(unsigned long data)
+{
+	struct ieee80211_local *local = (struct ieee80211_local *) data;
+	struct sta_info *sta;
+
+	rcu_read_lock();
+	list_for_each_entry_rcu(sta, &local->sta_list, list)
+		sta_info_cleanup_expire_buffered(local, sta);
+	rcu_read_unlock();
 
-		ieee80211_sta_debugfs_add(sta);
-		rate_control_add_sta_debugfs(sta);
+	if (local->quiescing)
+		return;
 
-		sta = __sta_info_unpin(sta);
-		sta_info_destroy(sta);
-	}
-	rtnl_unlock();
+	local->sta_cleanup.expires =
+		round_jiffies(jiffies + STA_INFO_CLEANUP_INTERVAL);
+	add_timer(&local->sta_cleanup);
 }
-#endif
 
 void sta_info_init(struct ieee80211_local *local)
 {
 	spin_lock_init(&local->sta_lock);
+	mutex_init(&local->sta_mtx);
 	INIT_LIST_HEAD(&local->sta_list);
+	INIT_LIST_HEAD(&local->sta_pending_list);
+	INIT_WORK(&local->sta_finish_work, sta_info_finish_work);
 
 	setup_timer(&local->sta_cleanup, sta_info_cleanup,
 		    (unsigned long)local);
 	local->sta_cleanup.expires =
 		round_jiffies(jiffies + STA_INFO_CLEANUP_INTERVAL);
-
-#ifdef CONFIG_MAC80211_DEBUGFS
-	INIT_WORK(&local->sta_debugfs_add, sta_info_debugfs_add_work);
-#endif
 }
 
 int sta_info_start(struct ieee80211_local *local)
@@ -795,16 +814,6 @@ int sta_info_start(struct ieee80211_local *local)
 void sta_info_stop(struct ieee80211_local *local)
 {
 	del_timer(&local->sta_cleanup);
-#ifdef CONFIG_MAC80211_DEBUGFS
-	/*
-	 * Make sure the debugfs adding work isn't pending after this
-	 * because we're about to be destroyed. It doesn't matter
-	 * whether it ran or not since we're going to flush all STAs
-	 * anyway.
-	 */
-	cancel_work_sync(&local->sta_debugfs_add);
-#endif
-
 	sta_info_flush(local, NULL);
 }
 
@@ -820,26 +829,19 @@ int sta_info_flush(struct ieee80211_local *local,
 		   struct ieee80211_sub_if_data *sdata)
 {
 	struct sta_info *sta, *tmp;
-	LIST_HEAD(tmp_list);
 	int ret = 0;
-	unsigned long flags;
 
 	might_sleep();
 
-	spin_lock_irqsave(&local->sta_lock, flags);
+	mutex_lock(&local->sta_mtx);
+
+	sta_info_finish_pending(local);
+
 	list_for_each_entry_safe(sta, tmp, &local->sta_list, list) {
-		if (!sdata || sdata == sta->sdata) {
-			__sta_info_unlink(&sta);
-			if (sta) {
-				list_add_tail(&sta->list, &tmp_list);
-				ret++;
-			}
-		}
+		if (!sdata || sdata == sta->sdata)
+			WARN_ON(__sta_info_destroy(sta));
 	}
-	spin_unlock_irqrestore(&local->sta_lock, flags);
-
-	list_for_each_entry_safe(sta, tmp, &tmp_list, list)
-		sta_info_destroy(sta);
+	mutex_unlock(&local->sta_mtx);
 
 	return ret;
 }
@@ -849,24 +851,17 @@ void ieee80211_sta_expire(struct ieee80211_sub_if_data *sdata,
 {
 	struct ieee80211_local *local = sdata->local;
 	struct sta_info *sta, *tmp;
-	LIST_HEAD(tmp_list);
-	unsigned long flags;
 
-	spin_lock_irqsave(&local->sta_lock, flags);
+	mutex_lock(&local->sta_mtx);
 	list_for_each_entry_safe(sta, tmp, &local->sta_list, list)
 		if (time_after(jiffies, sta->last_rx + exp_time)) {
 #ifdef CONFIG_MAC80211_IBSS_DEBUG
 			printk(KERN_DEBUG "%s: expiring inactive STA %pM\n",
 			       sdata->name, sta->sta.addr);
 #endif
-			__sta_info_unlink(&sta);
-			if (sta)
-				list_add(&sta->list, &tmp_list);
+			WARN_ON(__sta_info_destroy(sta));
 		}
-	spin_unlock_irqrestore(&local->sta_lock, flags);
-
-	list_for_each_entry_safe(sta, tmp, &tmp_list, list)
-		sta_info_destroy(sta);
+	mutex_unlock(&local->sta_mtx);
 }
 
 struct ieee80211_sta *ieee80211_find_sta_by_hw(struct ieee80211_hw *hw,
diff --git a/net/mac80211/sta_info.h b/net/mac80211/sta_info.h
index 6f79bba5706e..5ff611a35979 100644
--- a/net/mac80211/sta_info.h
+++ b/net/mac80211/sta_info.h
@@ -162,11 +162,6 @@ struct sta_ampdu_mlme {
 };
 
 
-/* see __sta_info_unlink */
-#define STA_INFO_PIN_STAT_NORMAL	0
-#define STA_INFO_PIN_STAT_PINNED	1
-#define STA_INFO_PIN_STAT_DESTROY	2
-
 /**
  * struct sta_info - STA information
  *
@@ -187,7 +182,6 @@ struct sta_ampdu_mlme {
  * @flaglock: spinlock for flags accesses
  * @drv_unblock_wk: used for driver PS unblocking
  * @listen_interval: listen interval of this station, when we're acting as AP
- * @pin_status: used internally for pinning a STA struct into memory
  * @flags: STA flags, see &enum ieee80211_sta_info_flags
  * @ps_tx_buf: buffer of frames to transmit to this station
  *	when it leaves power saving state
@@ -226,6 +220,7 @@ struct sta_ampdu_mlme {
  * @debugfs: debug filesystem info
  * @sta: station information we share with the driver
  * @dead: set to true when sta is unlinked
+ * @uploaded: set to true when sta is uploaded to the driver
  */
 struct sta_info {
 	/* General information, mostly static */
@@ -245,11 +240,7 @@ struct sta_info {
 
 	bool dead;
 
-	/*
-	 * for use by the internal lifetime management,
-	 * see __sta_info_unlink
-	 */
-	u8 pin_status;
+	bool uploaded;
 
 	/*
 	 * frequently updated, locked with own spinlock (flaglock),
@@ -449,18 +440,19 @@ struct sta_info *sta_info_alloc(struct ieee80211_sub_if_data *sdata,
  * Insert STA info into hash table/list, returns zero or a
  * -EEXIST if (if the same MAC address is already present).
  *
- * Calling this without RCU protection makes the caller
- * relinquish its reference to @sta.
+ * Calling the non-rcu version makes the caller relinquish,
+ * the _rcu version calls read_lock_rcu() and must be called
+ * without it held.
  */
 int sta_info_insert(struct sta_info *sta);
-/*
- * Unlink a STA info from the hash table/list.
- * This can NULL the STA pointer if somebody else
- * has already unlinked it.
- */
-void sta_info_unlink(struct sta_info **sta);
+int sta_info_insert_rcu(struct sta_info *sta) __acquires(RCU);
+int sta_info_insert_atomic(struct sta_info *sta);
+
+int sta_info_destroy_addr(struct ieee80211_sub_if_data *sdata,
+			  const u8 *addr);
+int sta_info_destroy_addr_bss(struct ieee80211_sub_if_data *sdata,
+			      const u8 *addr);
 
-void sta_info_destroy(struct sta_info *sta);
 void sta_info_set_tim_bit(struct sta_info *sta);
 void sta_info_clear_tim_bit(struct sta_info *sta);
 
diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
index e392820a4c33..cbe53ed4fb0b 100644
--- a/net/mac80211/tx.c
+++ b/net/mac80211/tx.c
@@ -571,7 +571,7 @@ ieee80211_tx_h_sta(struct ieee80211_tx_data *tx)
 {
 	struct ieee80211_tx_info *info = IEEE80211_SKB_CB(tx->skb);
 
-	if (tx->sta)
+	if (tx->sta && tx->sta->uploaded)
 		info->control.sta = &tx->sta->sta;
 
 	return TX_CONTINUE;
diff --git a/net/mac80211/util.c b/net/mac80211/util.c
index ca170b417da6..3af439a85b33 100644
--- a/net/mac80211/util.c
+++ b/net/mac80211/util.c
@@ -1082,7 +1082,6 @@ int ieee80211_reconfig(struct ieee80211_local *local)
 	struct ieee80211_hw *hw = &local->hw;
 	struct ieee80211_sub_if_data *sdata;
 	struct sta_info *sta;
-	unsigned long flags;
 	int res;
 
 	if (local->suspended)
@@ -1116,20 +1115,19 @@ int ieee80211_reconfig(struct ieee80211_local *local)
 	}
 
 	/* add STAs back */
-	if (local->ops->sta_notify) {
-		spin_lock_irqsave(&local->sta_lock, flags);
-		list_for_each_entry(sta, &local->sta_list, list) {
+	mutex_lock(&local->sta_mtx);
+	list_for_each_entry(sta, &local->sta_list, list) {
+		if (sta->uploaded) {
 			sdata = sta->sdata;
 			if (sdata->vif.type == NL80211_IFTYPE_AP_VLAN)
 				sdata = container_of(sdata->bss,
 					     struct ieee80211_sub_if_data,
 					     u.ap);
 
-			drv_sta_notify(local, sdata, STA_NOTIFY_ADD,
-				       &sta->sta);
+			WARN_ON(drv_sta_add(local, sdata, &sta->sta));
 		}
-		spin_unlock_irqrestore(&local->sta_lock, flags);
 	}
+	mutex_unlock(&local->sta_mtx);
 
 	/* Clear Suspend state so that ADDBA requests can be processed */
 
@@ -1219,10 +1217,10 @@ int ieee80211_reconfig(struct ieee80211_local *local)
 
 	add_timer(&local->sta_cleanup);
 
-	spin_lock_irqsave(&local->sta_lock, flags);
+	mutex_lock(&local->sta_mtx);
 	list_for_each_entry(sta, &local->sta_list, list)
 		mesh_plink_restart(sta);
-	spin_unlock_irqrestore(&local->sta_lock, flags);
+	mutex_unlock(&local->sta_mtx);
 #else
 	WARN_ON(1);
 #endif
-- 
cgit v1.2.2


From 74e2bd1fa3ae9695af566ad5a7a288898787b909 Mon Sep 17 00:00:00 2001
From: Wey-Yi Guy <wey-yi.w.guy@intel.com>
Date: Wed, 3 Feb 2010 09:28:55 -0800
Subject: mac80211: tear down all agg queues when restart/reconfig hw

When there is a need to restart/reconfig hw, tear down all the
aggregation queues and let the mac80211 and driver get in-sync to have
the opportunity to re-establish the aggregation queues again.

Need to wait until driver re-establish all the station information before tear
down the aggregation queues, driver(at least iwlwifi driver) will reject the
stop aggregation queue request if station is not ready. But also need to make
sure the aggregation queues are tear down before waking up the queues, so
mac80211 will not sending frames with aggregation bit set.

Signed-off-by: Wey-Yi Guy <wey-yi.w.guy@intel.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/util.c | 8 ++++++++
 1 file changed, 8 insertions(+)

(limited to 'net')

diff --git a/net/mac80211/util.c b/net/mac80211/util.c
index 3af439a85b33..c453226f06b2 100644
--- a/net/mac80211/util.c
+++ b/net/mac80211/util.c
@@ -1178,6 +1178,14 @@ int ieee80211_reconfig(struct ieee80211_local *local)
 		}
 	}
 
+	rcu_read_lock();
+	if (hw->flags & IEEE80211_HW_AMPDU_AGGREGATION) {
+		list_for_each_entry_rcu(sta, &local->sta_list, list) {
+			ieee80211_sta_tear_down_BA_sessions(sta);
+		}
+	}
+	rcu_read_unlock();
+
 	/* add back keys */
 	list_for_each_entry(sdata, &local->interfaces, list)
 		if (ieee80211_sdata_running(sdata))
-- 
cgit v1.2.2


From 199d69f27326858b16449eb1cc1623299db64415 Mon Sep 17 00:00:00 2001
From: Benoit Papillault <benoit.papillault@free.fr>
Date: Thu, 4 Feb 2010 22:00:20 +0100
Subject: mac80211: Added a new debugfs file for reading channel_type

This file helps debugging HT channels since it displays if we are on
ht20 or ht40+/ht40-

Signed-off-by: Benoit Papillault <benoit.papillault@free.fr>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/debugfs.c | 33 +++++++++++++++++++++++++++++++++
 1 file changed, 33 insertions(+)

(limited to 'net')

diff --git a/net/mac80211/debugfs.c b/net/mac80211/debugfs.c
index b3bc32b62a5a..637929b65ccc 100644
--- a/net/mac80211/debugfs.c
+++ b/net/mac80211/debugfs.c
@@ -250,6 +250,38 @@ static const struct file_operations uapsd_max_sp_len_ops = {
 	.open = mac80211_open_file_generic
 };
 
+static ssize_t channel_type_read(struct file *file, char __user *user_buf,
+		       size_t count, loff_t *ppos)
+{
+	struct ieee80211_local *local = file->private_data;
+	const char *buf;
+
+	switch (local->hw.conf.channel_type) {
+	case NL80211_CHAN_NO_HT:
+		buf = "no ht\n";
+		break;
+	case NL80211_CHAN_HT20:
+		buf = "ht20\n";
+		break;
+	case NL80211_CHAN_HT40MINUS:
+		buf = "ht40-\n";
+		break;
+	case NL80211_CHAN_HT40PLUS:
+		buf = "ht40+\n";
+		break;
+	default:
+		buf = "???";
+		break;
+	}
+
+	return simple_read_from_buffer(user_buf, count, ppos, buf, strlen(buf));
+}
+
+static const struct file_operations channel_type_ops = {
+	.read = channel_type_read,
+	.open = mac80211_open_file_generic
+};
+
 static ssize_t queues_read(struct file *file, char __user *user_buf,
 			   size_t count, loff_t *ppos)
 {
@@ -408,6 +440,7 @@ void debugfs_hw_add(struct ieee80211_local *local)
 	DEBUGFS_ADD(noack);
 	DEBUGFS_ADD(uapsd_queues);
 	DEBUGFS_ADD(uapsd_max_sp_len);
+	DEBUGFS_ADD(channel_type);
 
 	statsd = debugfs_create_dir("statistics", phyd);
 
-- 
cgit v1.2.2


From 19885c4fbd79439efd6b3798bfb73f2f30e27104 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes@sipsolutions.net>
Date: Fri, 5 Feb 2010 11:45:06 +0100
Subject: mac80211: fix bss_conf.dtim_period

In AP mode, the only mode where the parameter
is supposed to be valid, we never assign it!
Fix that to allow drivers to avoid parsing
the TIM IE for the value.

Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/cfg.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'net')

diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c
index a362523d8eb7..e1731b7c2523 100644
--- a/net/mac80211/cfg.c
+++ b/net/mac80211/cfg.c
@@ -515,6 +515,8 @@ static int ieee80211_config_beacon(struct ieee80211_sub_if_data *sdata,
 		if (old)
 			memcpy(new->tail, old->tail, new_tail_len);
 
+	sdata->vif.bss_conf.dtim_period = new->dtim_period;
+
 	rcu_assign_pointer(sdata->u.ap.beacon, new);
 
 	synchronize_rcu();
-- 
cgit v1.2.2


From 29165e4c8b265a415f1fd1dca947b5d4c72abc47 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes@sipsolutions.net>
Date: Sat, 6 Feb 2010 15:20:13 +0100
Subject: mac80211: fix deauth race

When userspace requests a deauth while the
authentication work is pending in the auth
(not probe) state, we do not properly abort
the work and then things get confused.

Fix that and also improve the checks here
to include the correct virtual interface,
just in case two virtual interfaces would
ever try to connect to the same BSS.

Also fix a bug -- need to use list_del_rcu
instead of just list_del to free a work
item.

Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/mlme.c | 12 +++++++++---
 1 file changed, 9 insertions(+), 3 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index ac9429e8d72b..7a7921476582 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -1995,12 +1995,18 @@ int ieee80211_mgd_deauth(struct ieee80211_sub_if_data *sdata,
 
 		mutex_lock(&local->work_mtx);
 		list_for_each_entry(wk, &local->work_list, list) {
-			if (wk->type != IEEE80211_WORK_DIRECT_PROBE)
+			if (wk->sdata != sdata)
 				continue;
+
+			if (wk->type != IEEE80211_WORK_DIRECT_PROBE &&
+			    wk->type != IEEE80211_WORK_AUTH)
+				continue;
+
 			if (memcmp(req->bss->bssid, wk->filter_ta, ETH_ALEN))
 				continue;
-			not_auth_yet = true;
-			list_del(&wk->list);
+
+			not_auth_yet = wk->type == IEEE80211_WORK_DIRECT_PROBE;
+			list_del_rcu(&wk->list);
 			free_work(wk);
 			break;
 		}
-- 
cgit v1.2.2


From 349e6b7289f8a3d3d5d3b859e00b41f27d1211df Mon Sep 17 00:00:00 2001
From: Kalle Valo <kalle.valo@iki.fi>
Date: Sun, 7 Feb 2010 10:22:01 +0200
Subject: mac80211: remove get_tx_stats() driver op

get_tx_stats() driver operation is not currently used anywhere in mac80211
and there are no plans to use it in the not-so-near future. So it can go
without anyone missing it.

Signed-off-by: Kalle Valo <kalle.valo@iki.fi>
Acked-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/driver-ops.h   |  8 --------
 net/mac80211/driver-trace.h | 23 -----------------------
 2 files changed, 31 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/driver-ops.h b/net/mac80211/driver-ops.h
index 855e85b55061..c3d844093a2f 100644
--- a/net/mac80211/driver-ops.h
+++ b/net/mac80211/driver-ops.h
@@ -290,14 +290,6 @@ static inline int drv_conf_tx(struct ieee80211_local *local, u16 queue,
 	return ret;
 }
 
-static inline int drv_get_tx_stats(struct ieee80211_local *local,
-				   struct ieee80211_tx_queue_stats *stats)
-{
-	int ret = local->ops->get_tx_stats(&local->hw, stats);
-	trace_drv_get_tx_stats(local, stats, ret);
-	return ret;
-}
-
 static inline u64 drv_get_tsf(struct ieee80211_local *local)
 {
 	u64 ret = -1ULL;
diff --git a/net/mac80211/driver-trace.h b/net/mac80211/driver-trace.h
index c984910bf275..41baf730a5c7 100644
--- a/net/mac80211/driver-trace.h
+++ b/net/mac80211/driver-trace.h
@@ -630,29 +630,6 @@ TRACE_EVENT(drv_conf_tx,
 	)
 );
 
-TRACE_EVENT(drv_get_tx_stats,
-	TP_PROTO(struct ieee80211_local *local,
-		 struct ieee80211_tx_queue_stats *stats,
-		 int ret),
-
-	TP_ARGS(local, stats, ret),
-
-	TP_STRUCT__entry(
-		LOCAL_ENTRY
-		__field(int, ret)
-	),
-
-	TP_fast_assign(
-		LOCAL_ASSIGN;
-		__entry->ret = ret;
-	),
-
-	TP_printk(
-		LOCAL_PR_FMT " ret:%d",
-		LOCAL_PR_ARG, __entry->ret
-	)
-);
-
 TRACE_EVENT(drv_get_tsf,
 	TP_PROTO(struct ieee80211_local *local, u64 ret),
 
-- 
cgit v1.2.2


From 209c671db7a917740ab9873d442b10ae7e369937 Mon Sep 17 00:00:00 2001
From: Andres Salomon <dilinger@collabora.co.uk>
Date: Sun, 7 Feb 2010 21:47:50 -0500
Subject: mac80211: make rate_control_alloc static

rate_control_alloc is not used by anything outside of
ieee80211_init_rate_ctrl_alg.  Both are in rate.c; there's no reason to make
rate_control_alloc visible outside of it.

Signed-off-by: Andres Salomon <dilinger@collabora.co.uk>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/rate.c | 2 +-
 net/mac80211/rate.h | 7 ++-----
 2 files changed, 3 insertions(+), 6 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/rate.c b/net/mac80211/rate.c
index c74b7c85403c..99ab24cc9783 100644
--- a/net/mac80211/rate.c
+++ b/net/mac80211/rate.c
@@ -145,7 +145,7 @@ static const struct file_operations rcname_ops = {
 };
 #endif
 
-struct rate_control_ref *rate_control_alloc(const char *name,
+static struct rate_control_ref *rate_control_alloc(const char *name,
 					    struct ieee80211_local *local)
 {
 	struct dentry *debugfsdir = NULL;
diff --git a/net/mac80211/rate.h b/net/mac80211/rate.h
index 998cf7a935b6..b6108bca96d4 100644
--- a/net/mac80211/rate.h
+++ b/net/mac80211/rate.h
@@ -26,10 +26,6 @@ struct rate_control_ref {
 	struct kref kref;
 };
 
-/* Get a reference to the rate control algorithm. If `name' is NULL, get the
- * first available algorithm. */
-struct rate_control_ref *rate_control_alloc(const char *name,
-					    struct ieee80211_local *local);
 void rate_control_get_rate(struct ieee80211_sub_if_data *sdata,
 			   struct sta_info *sta,
 			   struct ieee80211_tx_rate_control *txrc);
@@ -116,7 +112,8 @@ static inline void rate_control_remove_sta_debugfs(struct sta_info *sta)
 #endif
 }
 
-/* functions for rate control related to a device */
+/* Get a reference to the rate control algorithm. If `name' is NULL, get the
+ * first available algorithm. */
 int ieee80211_init_rate_ctrl_alg(struct ieee80211_local *local,
 				 const char *name);
 void rate_control_deinitialize(struct ieee80211_local *local);
-- 
cgit v1.2.2


From e15276a4b220c54db665cf46a92bd9ceb9aeb052 Mon Sep 17 00:00:00 2001
From: Vivek Natarajan <vnatarajan@atheros.com>
Date: Mon, 8 Feb 2010 17:47:01 +0530
Subject: mac80211: Reset dynamic ps timer in Rx path.

The current mac80211 implementation enables power save if there
is no Tx traffic for a specific timeout. Hence, PS is triggered
even if there is a continuous Rx only traffic(like UDP) going on.
This makes the drivers to wait on the tim bit in the next beacon
to awake which leads to redundant sleep-wake cycles.
Fix this by restarting the dynamic ps timer on receiving every
data packet.

Signed-off-by: Vivek Natarajan <vnatarajan@atheros.com>
CC: stable@kernel.org
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/rx.c | 8 ++++++++
 1 file changed, 8 insertions(+)

(limited to 'net')

diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c
index 01dba7618397..c9755f3d986c 100644
--- a/net/mac80211/rx.c
+++ b/net/mac80211/rx.c
@@ -1719,6 +1719,7 @@ static ieee80211_rx_result debug_noinline
 ieee80211_rx_h_data(struct ieee80211_rx_data *rx)
 {
 	struct ieee80211_sub_if_data *sdata = rx->sdata;
+	struct ieee80211_local *local = rx->local;
 	struct net_device *dev = sdata->dev;
 	struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)rx->skb->data;
 	__le16 fc = hdr->frame_control;
@@ -1750,6 +1751,13 @@ ieee80211_rx_h_data(struct ieee80211_rx_data *rx)
 	dev->stats.rx_packets++;
 	dev->stats.rx_bytes += rx->skb->len;
 
+	if (ieee80211_is_data(hdr->frame_control) &&
+	    !is_multicast_ether_addr(hdr->addr1) &&
+	    local->hw.conf.dynamic_ps_timeout > 0 && local->ps_sdata) {
+			mod_timer(&local->dynamic_ps_timer, jiffies +
+			 msecs_to_jiffies(local->hw.conf.dynamic_ps_timeout));
+	}
+
 	ieee80211_deliver_skb(rx);
 
 	return RX_QUEUED;
-- 
cgit v1.2.2


From d8c8a9e36560e9ff4c99279d64ce5dd0e1a33fa6 Mon Sep 17 00:00:00 2001
From: Eric Van Hensbergen <ericvh@gmail.com>
Date: Mon, 8 Feb 2010 16:23:23 -0600
Subject: 9p: fix option parsing

Options pointer is being moved before calling kfree() which seems
to cause problems.  This uses a separate pointer to track and free
original allocation.

Signed-off-by: Venkateswararao Jujjuri <jvrao@us.ibm.com>
Signed-off-by: Eric Van Hensbergen <ericvh@gmail.com>w
---
 net/9p/client.c     |  9 +++++----
 net/9p/trans_fd.c   | 10 ++++++----
 net/9p/trans_rdma.c |  9 +++++----
 3 files changed, 16 insertions(+), 12 deletions(-)

(limited to 'net')

diff --git a/net/9p/client.c b/net/9p/client.c
index a2e2d61b903b..cbe066966b3c 100644
--- a/net/9p/client.c
+++ b/net/9p/client.c
@@ -69,7 +69,7 @@ p9_client_rpc(struct p9_client *c, int8_t type, const char *fmt, ...);
 
 static int parse_opts(char *opts, struct p9_client *clnt)
 {
-	char *options;
+	char *options, *tmp_options;
 	char *p;
 	substring_t args[MAX_OPT_ARGS];
 	int option;
@@ -81,12 +81,13 @@ static int parse_opts(char *opts, struct p9_client *clnt)
 	if (!opts)
 		return 0;
 
-	options = kstrdup(opts, GFP_KERNEL);
-	if (!options) {
+	tmp_options = kstrdup(opts, GFP_KERNEL);
+	if (!tmp_options) {
 		P9_DPRINTK(P9_DEBUG_ERROR,
 				"failed to allocate copy of option string\n");
 		return -ENOMEM;
 	}
+	options = tmp_options;
 
 	while ((p = strsep(&options, ",")) != NULL) {
 		int token;
@@ -125,7 +126,7 @@ static int parse_opts(char *opts, struct p9_client *clnt)
 	}
 
 free_and_return:
-	kfree(options);
+	kfree(tmp_options);
 	return ret;
 }
 
diff --git a/net/9p/trans_fd.c b/net/9p/trans_fd.c
index be1cb909d8c0..31d0b05582a9 100644
--- a/net/9p/trans_fd.c
+++ b/net/9p/trans_fd.c
@@ -714,7 +714,7 @@ static int parse_opts(char *params, struct p9_fd_opts *opts)
 	char *p;
 	substring_t args[MAX_OPT_ARGS];
 	int option;
-	char *options;
+	char *options, *tmp_options;
 	int ret;
 
 	opts->port = P9_PORT;
@@ -724,12 +724,13 @@ static int parse_opts(char *params, struct p9_fd_opts *opts)
 	if (!params)
 		return 0;
 
-	options = kstrdup(params, GFP_KERNEL);
-	if (!options) {
+	tmp_options = kstrdup(params, GFP_KERNEL);
+	if (!tmp_options) {
 		P9_DPRINTK(P9_DEBUG_ERROR,
 				"failed to allocate copy of option string\n");
 		return -ENOMEM;
 	}
+	options = tmp_options;
 
 	while ((p = strsep(&options, ",")) != NULL) {
 		int token;
@@ -760,7 +761,8 @@ static int parse_opts(char *params, struct p9_fd_opts *opts)
 			continue;
 		}
 	}
-	kfree(options);
+
+	kfree(tmp_options);
 	return 0;
 }
 
diff --git a/net/9p/trans_rdma.c b/net/9p/trans_rdma.c
index 65cb29db03f8..2c95a89c0f46 100644
--- a/net/9p/trans_rdma.c
+++ b/net/9p/trans_rdma.c
@@ -166,7 +166,7 @@ static int parse_opts(char *params, struct p9_rdma_opts *opts)
 	char *p;
 	substring_t args[MAX_OPT_ARGS];
 	int option;
-	char *options;
+	char *options, *tmp_options;
 	int ret;
 
 	opts->port = P9_PORT;
@@ -177,12 +177,13 @@ static int parse_opts(char *params, struct p9_rdma_opts *opts)
 	if (!params)
 		return 0;
 
-	options = kstrdup(params, GFP_KERNEL);
-	if (!options) {
+	tmp_options = kstrdup(params, GFP_KERNEL);
+	if (!tmp_options) {
 		P9_DPRINTK(P9_DEBUG_ERROR,
 			   "failed to allocate copy of option string\n");
 		return -ENOMEM;
 	}
+	options = tmp_options;
 
 	while ((p = strsep(&options, ",")) != NULL) {
 		int token;
@@ -216,7 +217,7 @@ static int parse_opts(char *params, struct p9_rdma_opts *opts)
 	}
 	/* RQ must be at least as large as the SQ */
 	opts->rq_depth = max(opts->rq_depth, opts->sq_depth);
-	kfree(options);
+	kfree(tmp_options);
 	return 0;
 }
 
-- 
cgit v1.2.2


From 2fc1b5dd99f66d93ffc23fd8df82d384c1a354c8 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Mon, 8 Feb 2010 15:00:39 -0800
Subject: dst: call cond_resched() in dst_gc_task()

Kernel bugzilla #15239

On some workloads, it is quite possible to get a huge dst list to
process in dst_gc_task(), and trigger soft lockup detection.

Fix is to call cond_resched(), as we run in process context.

Reported-by: Pawel Staszewski <pstaszewski@itcare.pl>
Tested-by: Pawel Staszewski <pstaszewski@itcare.pl>
Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/dst.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'net')

diff --git a/net/core/dst.c b/net/core/dst.c
index 57bc4d5b8d08..cb1b3488b739 100644
--- a/net/core/dst.c
+++ b/net/core/dst.c
@@ -17,6 +17,7 @@
 #include <linux/string.h>
 #include <linux/types.h>
 #include <net/net_namespace.h>
+#include <linux/sched.h>
 
 #include <net/dst.h>
 
@@ -79,6 +80,7 @@ loop:
 	while ((dst = next) != NULL) {
 		next = dst->next;
 		prefetch(&next->next);
+		cond_resched();
 		if (likely(atomic_read(&dst->__refcnt))) {
 			last->next = dst;
 			last = dst;
-- 
cgit v1.2.2


From fb786100f7c75e154e63d0f5a2982e6d46dfb602 Mon Sep 17 00:00:00 2001
From: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>
Date: Mon, 8 Feb 2010 11:50:32 +0000
Subject: 9p: Fix the kernel crash on a failed mount

The patch fix the crash repoted below

[   15.149907] BUG: unable to handle kernel NULL pointer dereference at 00000001
[   15.150806] IP: [<c140b886>] p9_virtio_close+0x18/0x24
.....
....
[   15.150806] Call Trace:
[   15.150806]  [<c1408e78>] ? p9_client_destroy+0x3f/0x163
[   15.150806]  [<c1409342>] ? p9_client_create+0x25f/0x270
[   15.150806]  [<c1063b72>] ? trace_hardirqs_on+0xb/0xd
[   15.150806]  [<c11ed4e8>] ? match_token+0x64/0x164
[   15.150806]  [<c1175e8d>] ? v9fs_session_init+0x2f1/0x3c8
[   15.150806]  [<c109cfc9>] ? kmem_cache_alloc+0x98/0xb8
[   15.150806]  [<c1063b72>] ? trace_hardirqs_on+0xb/0xd
[   15.150806]  [<c1173dd1>] ? v9fs_get_sb+0x47/0x1e8
[   15.150806]  [<c1173dea>] ? v9fs_get_sb+0x60/0x1e8
[   15.150806]  [<c10a2e77>] ? vfs_kern_mount+0x81/0x11a
[   15.150806]  [<c10a2f55>] ? do_kern_mount+0x33/0xbe
[   15.150806]  [<c10b40b9>] ? do_mount+0x654/0x6b3
[   15.150806]  [<c1038949>] ? do_page_fault+0x0/0x284
[   15.150806]  [<c10b28ec>] ? copy_mount_options+0x73/0xd2
[   15.150806]  [<c10b4179>] ? sys_mount+0x61/0x94
[   15.150806]  [<c14284e9>] ? syscall_call+0x7/0xb
....
[   15.203562] ---[ end trace 1dd159357709eb4b ]---
[

Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
Signed-off-by: Eric Van Hensbergen <ericvh@gmail.com>
---
 net/9p/trans_virtio.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/9p/trans_virtio.c b/net/9p/trans_virtio.c
index 67c4bc704c5a..cb50f4ae5eef 100644
--- a/net/9p/trans_virtio.c
+++ b/net/9p/trans_virtio.c
@@ -102,7 +102,8 @@ static void p9_virtio_close(struct p9_client *client)
 	struct virtio_chan *chan = client->trans;
 
 	mutex_lock(&virtio_9p_lock);
-	chan->inuse = false;
+	if (chan)
+		chan->inuse = false;
 	mutex_unlock(&virtio_9p_lock);
 }
 
-- 
cgit v1.2.2


From 8781ff9495578dbb74065fae55305110d9f81cb9 Mon Sep 17 00:00:00 2001
From: Eric Van Hensbergen <ericvh@gmail.com>
Date: Mon, 8 Feb 2010 18:18:34 -0600
Subject: 9p: fix p9_client_destroy unconditional calling v9fs_put_trans

restructure client create code to handle error cases better and
only cleanup initialized portions of the stack.

Signed-off-by: Venkateswararao Jujjuri <jvrao@us.ibm.com>
Signed-off-by: Eric Van Hensbergen <ericvh@gmail.com>
---
 net/9p/client.c | 31 +++++++++++++++++++------------
 1 file changed, 19 insertions(+), 12 deletions(-)

(limited to 'net')

diff --git a/net/9p/client.c b/net/9p/client.c
index cbe066966b3c..09d4f1e2e4a8 100644
--- a/net/9p/client.c
+++ b/net/9p/client.c
@@ -676,18 +676,12 @@ struct p9_client *p9_client_create(const char *dev_name, char *options)
 	clnt->trans = NULL;
 	spin_lock_init(&clnt->lock);
 	INIT_LIST_HEAD(&clnt->fidlist);
-	clnt->fidpool = p9_idpool_create();
-	if (IS_ERR(clnt->fidpool)) {
-		err = PTR_ERR(clnt->fidpool);
-		clnt->fidpool = NULL;
-		goto error;
-	}
 
 	p9_tag_init(clnt);
 
 	err = parse_opts(options, clnt);
 	if (err < 0)
-		goto error;
+		goto free_client;
 
 	if (!clnt->trans_mod)
 		clnt->trans_mod = v9fs_get_default_trans();
@@ -696,7 +690,14 @@ struct p9_client *p9_client_create(const char *dev_name, char *options)
 		err = -EPROTONOSUPPORT;
 		P9_DPRINTK(P9_DEBUG_ERROR,
 				"No transport defined or default transport\n");
-		goto error;
+		goto free_client;
+	}
+
+	clnt->fidpool = p9_idpool_create();
+	if (IS_ERR(clnt->fidpool)) {
+		err = PTR_ERR(clnt->fidpool);
+		clnt->fidpool = NULL;
+		goto put_trans;
 	}
 
 	P9_DPRINTK(P9_DEBUG_MUX, "clnt %p trans %p msize %d dotu %d\n",
@@ -704,19 +705,25 @@ struct p9_client *p9_client_create(const char *dev_name, char *options)
 
 	err = clnt->trans_mod->create(clnt, dev_name, options);
 	if (err)
-		goto error;
+		goto destroy_fidpool;
 
 	if ((clnt->msize+P9_IOHDRSZ) > clnt->trans_mod->maxsize)
 		clnt->msize = clnt->trans_mod->maxsize-P9_IOHDRSZ;
 
 	err = p9_client_version(clnt);
 	if (err)
-		goto error;
+		goto close_trans;
 
 	return clnt;
 
-error:
-	p9_client_destroy(clnt);
+close_trans:
+	clnt->trans_mod->close(clnt);
+destroy_fidpool:
+	p9_idpool_destroy(clnt->fidpool);
+put_trans:
+	v9fs_put_trans(clnt->trans_mod);
+free_client:
+	kfree(clnt);
 	return ERR_PTR(err);
 }
 EXPORT_SYMBOL(p9_client_create);
-- 
cgit v1.2.2


From d4ae20b3799e0b6fa0d832a645a422da9f239868 Mon Sep 17 00:00:00 2001
From: Jan Luebbe <jluebbe@debian.org>
Date: Mon, 8 Feb 2010 22:41:44 -0800
Subject: net/sched: Fix module name in Kconfig

The action modules have been prefixed with 'act_', but the Kconfig
description was not changed.

Signed-off-by: Jan Luebbe <jluebbe@debian.org>
Acked-by: Jamal Hadi Salim <hadi@cyberus.ca>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/Kconfig | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

(limited to 'net')

diff --git a/net/sched/Kconfig b/net/sched/Kconfig
index 929218a47620..21f9c7678aa3 100644
--- a/net/sched/Kconfig
+++ b/net/sched/Kconfig
@@ -433,7 +433,7 @@ config NET_ACT_POLICE
 	  module.
 
 	  To compile this code as a module, choose M here: the
-	  module will be called police.
+	  module will be called act_police.
 
 config NET_ACT_GACT
         tristate "Generic actions"
@@ -443,7 +443,7 @@ config NET_ACT_GACT
 	  accepting packets.
 
 	  To compile this code as a module, choose M here: the
-	  module will be called gact.
+	  module will be called act_gact.
 
 config GACT_PROB
         bool "Probability support"
@@ -459,7 +459,7 @@ config NET_ACT_MIRRED
 	  other devices.
 
 	  To compile this code as a module, choose M here: the
-	  module will be called mirred.
+	  module will be called act_mirred.
 
 config NET_ACT_IPT
         tristate "IPtables targets"
@@ -469,7 +469,7 @@ config NET_ACT_IPT
 	  classification.
 
 	  To compile this code as a module, choose M here: the
-	  module will be called ipt.
+	  module will be called act_ipt.
 
 config NET_ACT_NAT
         tristate "Stateless NAT"
@@ -479,7 +479,7 @@ config NET_ACT_NAT
 	  netfilter for NAT unless you know what you are doing.
 
 	  To compile this code as a module, choose M here: the
-	  module will be called nat.
+	  module will be called act_nat.
 
 config NET_ACT_PEDIT
         tristate "Packet Editing"
@@ -488,7 +488,7 @@ config NET_ACT_PEDIT
 	  Say Y here if you want to mangle the content of packets.
 
 	  To compile this code as a module, choose M here: the
-	  module will be called pedit.
+	  module will be called act_pedit.
 
 config NET_ACT_SIMP
         tristate "Simple Example (Debug)"
@@ -502,7 +502,7 @@ config NET_ACT_SIMP
 	  If unsure, say N.
 
 	  To compile this code as a module, choose M here: the
-	  module will be called simple.
+	  module will be called act_simple.
 
 config NET_ACT_SKBEDIT
         tristate "SKB Editing"
@@ -513,7 +513,7 @@ config NET_ACT_SKBEDIT
 	  If unsure, say N.
 
 	  To compile this code as a module, choose M here: the
-	  module will be called skbedit.
+	  module will be called act_skbedit.
 
 config NET_CLS_IND
 	bool "Incoming device classification"
-- 
cgit v1.2.2


From 3ad2f3fbb961429d2aa627465ae4829758bc7e07 Mon Sep 17 00:00:00 2001
From: Daniel Mack <daniel@caiaq.de>
Date: Wed, 3 Feb 2010 08:01:28 +0800
Subject: tree-wide: Assorted spelling fixes

In particular, several occurances of funny versions of 'success',
'unknown', 'therefore', 'acknowledge', 'argument', 'achieve', 'address',
'beginning', 'desirable', 'separate' and 'necessary' are fixed.

Signed-off-by: Daniel Mack <daniel@caiaq.de>
Cc: Joe Perches <joe@perches.com>
Cc: Junio C Hamano <gitster@pobox.com>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
---
 net/ipv4/tcp_timer.c             | 2 +-
 net/mac80211/mesh_plink.c        | 2 +-
 net/netfilter/nf_conntrack_sip.c | 4 ++--
 net/netfilter/xt_hashlimit.c     | 2 +-
 net/sctp/sm_sideeffect.c         | 2 +-
 5 files changed, 6 insertions(+), 6 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c
index 8816a20c2597..aff48d657181 100644
--- a/net/ipv4/tcp_timer.c
+++ b/net/ipv4/tcp_timer.c
@@ -133,7 +133,7 @@ static void tcp_mtu_probing(struct inet_connection_sock *icsk, struct sock *sk)
 }
 
 /* This function calculates a "timeout" which is equivalent to the timeout of a
- * TCP connection after "boundary" unsucessful, exponentially backed-off
+ * TCP connection after "boundary" unsuccessful, exponentially backed-off
  * retransmissions with an initial RTO of TCP_RTO_MIN.
  */
 static bool retransmits_timed_out(struct sock *sk,
diff --git a/net/mac80211/mesh_plink.c b/net/mac80211/mesh_plink.c
index 0f7c6e6a4248..54e4c8bb23e7 100644
--- a/net/mac80211/mesh_plink.c
+++ b/net/mac80211/mesh_plink.c
@@ -743,7 +743,7 @@ void mesh_rx_plink_frame(struct ieee80211_sub_if_data *sdata, struct ieee80211_m
 		break;
 	default:
 		/* should not get here, PLINK_BLOCKED is dealt with at the
-		 * beggining of the function
+		 * beginning of the function
 		 */
 		spin_unlock_bh(&sta->lock);
 		break;
diff --git a/net/netfilter/nf_conntrack_sip.c b/net/netfilter/nf_conntrack_sip.c
index 023966b569bf..fbe94adee7ac 100644
--- a/net/netfilter/nf_conntrack_sip.c
+++ b/net/netfilter/nf_conntrack_sip.c
@@ -276,7 +276,7 @@ EXPORT_SYMBOL_GPL(ct_sip_parse_request);
  * tabs, spaces and continuation lines, which are treated as a single whitespace
  * character.
  *
- * Some headers may appear multiple times. A comma seperated list of values is
+ * Some headers may appear multiple times. A comma separated list of values is
  * equivalent to multiple headers.
  */
 static const struct sip_header ct_sip_hdrs[] = {
@@ -412,7 +412,7 @@ int ct_sip_get_header(const struct nf_conn *ct, const char *dptr,
 }
 EXPORT_SYMBOL_GPL(ct_sip_get_header);
 
-/* Get next header field in a list of comma seperated values */
+/* Get next header field in a list of comma separated values */
 static int ct_sip_next_header(const struct nf_conn *ct, const char *dptr,
 			      unsigned int dataoff, unsigned int datalen,
 			      enum sip_header_types type,
diff --git a/net/netfilter/xt_hashlimit.c b/net/netfilter/xt_hashlimit.c
index dd16e404424f..cbaac92dad59 100644
--- a/net/netfilter/xt_hashlimit.c
+++ b/net/netfilter/xt_hashlimit.c
@@ -1,6 +1,6 @@
 /*
  *	xt_hashlimit - Netfilter module to limit the number of packets per time
- *	seperately for each hashbucket (sourceip/sourceport/dstip/dstport)
+ *	separately for each hashbucket (sourceip/sourceport/dstip/dstport)
  *
  *	(C) 2003-2004 by Harald Welte <laforge@netfilter.org>
  *	Copyright © CC Computer Consultants GmbH, 2007 - 2008
diff --git a/net/sctp/sm_sideeffect.c b/net/sctp/sm_sideeffect.c
index 4e4ca65cd320..500886bda9b4 100644
--- a/net/sctp/sm_sideeffect.c
+++ b/net/sctp/sm_sideeffect.c
@@ -475,7 +475,7 @@ static void sctp_do_8_2_transport_strike(struct sctp_association *asoc,
 	 * used to provide an upper bound to this doubling operation.
 	 *
 	 * Special Case:  the first HB doesn't trigger exponential backoff.
-	 * The first unacknowleged HB triggers it.  We do this with a flag
+	 * The first unacknowledged HB triggers it.  We do this with a flag
 	 * that indicates that we have an outstanding HB.
 	 */
 	if (!is_hb || transport->hb_sent) {
-- 
cgit v1.2.2


From 375177bf35efc08e1bd37bbda4cc0c8cc4db8500 Mon Sep 17 00:00:00 2001
From: Vivek Natarajan <vnatarajan@atheros.com>
Date: Tue, 9 Feb 2010 14:50:28 +0530
Subject: mac80211: Retry null data frame for power save.

Even if the null data frame is not acked by the AP, mac80211
goes into power save. This might lead to loss of frames
from the AP.
Prevent this by restarting dynamic_ps_timer when ack is not
received for null data frames.

Cc: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: Vivek Natarajan <vnatarajan@atheros.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/ieee80211_i.h |  1 +
 net/mac80211/mlme.c        | 20 +++++++++++++++-----
 net/mac80211/status.c      | 17 +++++++++++++++--
 3 files changed, 31 insertions(+), 7 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index a5911191f224..9dd98b674cbc 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -316,6 +316,7 @@ enum ieee80211_sta_flags {
 	IEEE80211_STA_CSA_RECEIVED	= BIT(5),
 	IEEE80211_STA_MFP_ENABLED	= BIT(6),
 	IEEE80211_STA_UAPSD_ENABLED	= BIT(7),
+	IEEE80211_STA_NULLFUNC_ACKED	= BIT(8),
 };
 
 struct ieee80211_if_managed {
diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index 7a7921476582..ee9443dc20ff 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -434,8 +434,11 @@ static void ieee80211_enable_ps(struct ieee80211_local *local,
 	} else {
 		if (local->hw.flags & IEEE80211_HW_PS_NULLFUNC_STACK)
 			ieee80211_send_nullfunc(local, sdata, 1);
-		conf->flags |= IEEE80211_CONF_PS;
-		ieee80211_hw_config(local, IEEE80211_CONF_CHANGE_PS);
+
+		if (!(local->hw.flags & IEEE80211_HW_REPORTS_TX_ACK_STATUS)) {
+			conf->flags |= IEEE80211_CONF_PS;
+			ieee80211_hw_config(local, IEEE80211_CONF_CHANGE_PS);
+		}
 	}
 }
 
@@ -541,6 +544,7 @@ void ieee80211_dynamic_ps_enable_work(struct work_struct *work)
 		container_of(work, struct ieee80211_local,
 			     dynamic_ps_enable_work);
 	struct ieee80211_sub_if_data *sdata = local->ps_sdata;
+	struct ieee80211_if_managed *ifmgd = &sdata->u.mgd;
 
 	/* can only happen when PS was just disabled anyway */
 	if (!sdata)
@@ -549,11 +553,16 @@ void ieee80211_dynamic_ps_enable_work(struct work_struct *work)
 	if (local->hw.conf.flags & IEEE80211_CONF_PS)
 		return;
 
-	if (local->hw.flags & IEEE80211_HW_PS_NULLFUNC_STACK)
+	if ((local->hw.flags & IEEE80211_HW_PS_NULLFUNC_STACK) &&
+	    (!(ifmgd->flags & IEEE80211_STA_NULLFUNC_ACKED)))
 		ieee80211_send_nullfunc(local, sdata, 1);
 
-	local->hw.conf.flags |= IEEE80211_CONF_PS;
-	ieee80211_hw_config(local, IEEE80211_CONF_CHANGE_PS);
+	if (!(local->hw.flags & IEEE80211_HW_REPORTS_TX_ACK_STATUS) ||
+	    (ifmgd->flags & IEEE80211_STA_NULLFUNC_ACKED)) {
+		ifmgd->flags &= ~IEEE80211_STA_NULLFUNC_ACKED;
+		local->hw.conf.flags |= IEEE80211_CONF_PS;
+		ieee80211_hw_config(local, IEEE80211_CONF_CHANGE_PS);
+	}
 }
 
 void ieee80211_dynamic_ps_timer(unsigned long data)
@@ -1892,6 +1901,7 @@ int ieee80211_mgd_assoc(struct ieee80211_sub_if_data *sdata,
 		return -ENOMEM;
 
 	ifmgd->flags &= ~IEEE80211_STA_DISABLE_11N;
+	ifmgd->flags &= ~IEEE80211_STA_NULLFUNC_ACKED;
 
 	for (i = 0; i < req->crypto.n_ciphers_pairwise; i++)
 		if (req->crypto.ciphers_pairwise[i] == WLAN_CIPHER_SUITE_WEP40 ||
diff --git a/net/mac80211/status.c b/net/mac80211/status.c
index e57ad6b1d7ea..ded98730c111 100644
--- a/net/mac80211/status.c
+++ b/net/mac80211/status.c
@@ -188,6 +188,7 @@ void ieee80211_tx_status(struct ieee80211_hw *hw, struct sk_buff *skb)
 	rcu_read_lock();
 
 	sband = local->hw.wiphy->bands[info->band];
+	fc = hdr->frame_control;
 
 	for_each_sta_info(local, hdr->addr1, sta, tmp) {
 		/* skip wrong virtual interface */
@@ -205,8 +206,6 @@ void ieee80211_tx_status(struct ieee80211_hw *hw, struct sk_buff *skb)
 			return;
 		}
 
-		fc = hdr->frame_control;
-
 		if ((info->flags & IEEE80211_TX_STAT_AMPDU_NO_BACK) &&
 		    (ieee80211_is_data_qos(fc))) {
 			u16 tid, ssn;
@@ -275,6 +274,20 @@ void ieee80211_tx_status(struct ieee80211_hw *hw, struct sk_buff *skb)
 			local->dot11FailedCount++;
 	}
 
+	if (ieee80211_is_nullfunc(fc) && ieee80211_has_pm(fc) &&
+	    (local->hw.flags & IEEE80211_HW_REPORTS_TX_ACK_STATUS) &&
+	    !(info->flags & IEEE80211_TX_CTL_INJECTED) &&
+	    local->ps_sdata && !(local->scanning)) {
+		if (info->flags & IEEE80211_TX_STAT_ACK) {
+			local->ps_sdata->u.mgd.flags |=
+					IEEE80211_STA_NULLFUNC_ACKED;
+			ieee80211_queue_work(&local->hw,
+					&local->dynamic_ps_enable_work);
+		} else
+			mod_timer(&local->dynamic_ps_timer, jiffies +
+					msecs_to_jiffies(10));
+	}
+
 	/* this was a transmitted frame, but now we want to reuse it */
 	skb_orphan(skb);
 
-- 
cgit v1.2.2


From dc5ddce956660247e004a4b20a26b7d137ab1644 Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@redhat.com>
Date: Thu, 7 Jan 2010 09:42:02 -0500
Subject: sunrpc: parse and return errors reported by gssd

The kernel currently ignores any error code sent by gssd and always
considers it to be -EACCES. In order to better handle the situation of
an expired KRB5 TGT, the kernel needs to be able to parse and deal with
the errors that gssd sends. Aside from -EACCES the only error we care
about is -EKEYEXPIRED, which we're using to indicate that the upper
layers should retry the call a little later.

To maintain backward compatibility with older gssd's, any error other
than -EKEYEXPIRED is interpreted as -EACCES.

Signed-off-by: Jeff Layton <jlayton@redhat.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 net/sunrpc/auth_gss/auth_gss.c | 11 +++++++++--
 1 file changed, 9 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c
index f7a7f8380e38..0cfccc2a0297 100644
--- a/net/sunrpc/auth_gss/auth_gss.c
+++ b/net/sunrpc/auth_gss/auth_gss.c
@@ -206,8 +206,14 @@ gss_fill_context(const void *p, const void *end, struct gss_cl_ctx *ctx, struct
 	ctx->gc_win = window_size;
 	/* gssd signals an error by passing ctx->gc_win = 0: */
 	if (ctx->gc_win == 0) {
-		/* in which case, p points to  an error code which we ignore */
-		p = ERR_PTR(-EACCES);
+		/*
+		 * in which case, p points to an error code. Anything other
+		 * than -EKEYEXPIRED gets converted to -EACCES.
+		 */
+		p = simple_get_bytes(p, end, &ret, sizeof(ret));
+		if (!IS_ERR(p))
+			p = (ret == -EKEYEXPIRED) ? ERR_PTR(-EKEYEXPIRED) :
+						    ERR_PTR(-EACCES);
 		goto err;
 	}
 	/* copy the opaque wire context */
@@ -646,6 +652,7 @@ gss_pipe_downcall(struct file *filp, const char __user *src, size_t mlen)
 		err = PTR_ERR(p);
 		switch (err) {
 		case -EACCES:
+		case -EKEYEXPIRED:
 			gss_msg->msg.errno = err;
 			err = mlen;
 			break;
-- 
cgit v1.2.2


From f1a89a118299df9e5afbaaedf83e5709a28632b6 Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Tue, 12 Jan 2010 17:41:10 -0500
Subject: SUNRPC: Deprecate support for site-local addresses

RFC 3879 "formally deprecates" site-local IPv6 addresses.  We
interpret that to mean that the scope ID is ignored for all but
link-local addresses.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Acked-by: Jeff Layton <jlayton@redhat.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 net/sunrpc/addr.c | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

(limited to 'net')

diff --git a/net/sunrpc/addr.c b/net/sunrpc/addr.c
index 6dcdd2517819..0756c5da12f4 100644
--- a/net/sunrpc/addr.c
+++ b/net/sunrpc/addr.c
@@ -71,8 +71,7 @@ static size_t rpc_ntop6(const struct sockaddr *sap,
 	if (unlikely(len == 0))
 		return len;
 
-	if (!(ipv6_addr_type(&sin6->sin6_addr) & IPV6_ADDR_LINKLOCAL) &&
-	    !(ipv6_addr_type(&sin6->sin6_addr) & IPV6_ADDR_SITELOCAL))
+	if (!(ipv6_addr_type(&sin6->sin6_addr) & IPV6_ADDR_LINKLOCAL))
 		return len;
 
 	rc = snprintf(scopebuf, sizeof(scopebuf), "%c%u",
@@ -165,8 +164,7 @@ static int rpc_parse_scope_id(const char *buf, const size_t buflen,
 	if (*delim != IPV6_SCOPE_DELIMITER)
 		return 0;
 
-	if (!(ipv6_addr_type(&sin6->sin6_addr) & IPV6_ADDR_LINKLOCAL) &&
-	    !(ipv6_addr_type(&sin6->sin6_addr) & IPV6_ADDR_SITELOCAL))
+	if (!(ipv6_addr_type(&sin6->sin6_addr) & IPV6_ADDR_LINKLOCAL))
 		return 0;
 
 	len = (buf + buflen) - delim - 1;
-- 
cgit v1.2.2


From 7a88efe9760de3bc75bde61b0a4c56dbb6cf2494 Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Tue, 12 Jan 2010 17:41:20 -0500
Subject: SUNRPC: Don't display zero scope IDs

A zero scope ID means that it wasn't set, so we don't need to append
it to presentation format addresses.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Acked-by: Jeff Layton <jlayton@redhat.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 net/sunrpc/addr.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'net')

diff --git a/net/sunrpc/addr.c b/net/sunrpc/addr.c
index 0756c5da12f4..f845d9d72f73 100644
--- a/net/sunrpc/addr.c
+++ b/net/sunrpc/addr.c
@@ -73,6 +73,8 @@ static size_t rpc_ntop6(const struct sockaddr *sap,
 
 	if (!(ipv6_addr_type(&sin6->sin6_addr) & IPV6_ADDR_LINKLOCAL))
 		return len;
+	if (sin6->sin6_scope_id == 0)
+		return len;
 
 	rc = snprintf(scopebuf, sizeof(scopebuf), "%c%u",
 			IPV6_SCOPE_DELIMITER, sin6->sin6_scope_id);
-- 
cgit v1.2.2


From 5a51f13adf7909caec2f8182767485c30e21364e Mon Sep 17 00:00:00 2001
From: H Hartley Sweeten <hartleys@visionengravers.com>
Date: Thu, 14 Jan 2010 15:38:31 -0700
Subject: xprtsock.c: make bc_{malloc/free} static

xprtsock.c: make bc_{malloc/free} static

The server backchannel buf_alloc and buf_free methods should
be static since they are not used outside this file.

Signed-off-by: H Hartley Sweeten <hsweeten@visionengravers.com>
Cc: J. Bruce Fields <bfields@fieldses.org>
Cc: Neil Brown <neilb@suse.de>
Cc: Trond Myklebust <Trond.Myklebust@netapp.com>
Cc: David S. Miller <davem@davemloft.net>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 net/sunrpc/xprtsock.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
index 3d739e5d15d8..721bafd95a0f 100644
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c
@@ -2100,7 +2100,7 @@ static void xs_tcp_print_stats(struct rpc_xprt *xprt, struct seq_file *seq)
  * we allocate pages instead doing a kmalloc like rpc_malloc is because we want
  * to use the server side send routines.
  */
-void *bc_malloc(struct rpc_task *task, size_t size)
+static void *bc_malloc(struct rpc_task *task, size_t size)
 {
 	struct page *page;
 	struct rpc_buffer *buf;
@@ -2120,7 +2120,7 @@ void *bc_malloc(struct rpc_task *task, size_t size)
 /*
  * Free the space allocated in the bc_alloc routine
  */
-void bc_free(void *buffer)
+static void bc_free(void *buffer)
 {
 	struct rpc_buffer *buf;
 
-- 
cgit v1.2.2


From ba17686f62db88f6a591121e768a0c83a2a2647d Mon Sep 17 00:00:00 2001
From: Andy Adamson <andros@netapp.com>
Date: Tue, 26 Jan 2010 21:24:04 -0500
Subject: nfs41 do not allocate unused back channel pages

Signed-off-by: Andy Adamson <andros@netapp.com>
[Trond.Myklebust@netapp.com: moved definition of svc_is_backchannel()
 into include/linux/sunrpc/bc_xprt.h.]
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 net/sunrpc/svc.c | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'net')

diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c
index 538ca433a56c..6dcf8c9c784c 100644
--- a/net/sunrpc/svc.c
+++ b/net/sunrpc/svc.c
@@ -506,6 +506,10 @@ svc_init_buffer(struct svc_rqst *rqstp, unsigned int size)
 {
 	unsigned int pages, arghi;
 
+	/* bc_xprt uses fore channel allocated buffers */
+	if (svc_is_backchannel(rqstp))
+		return 1;
+
 	pages = size / PAGE_SIZE + 1; /* extra page as we hold both request and reply.
 				       * We assume one is at most one page
 				       */
-- 
cgit v1.2.2


From 42107f5009da223daa800d6da6904d77297ae829 Mon Sep 17 00:00:00 2001
From: Alexey Dobriyan <adobriyan@gmail.com>
Date: Wed, 10 Feb 2010 15:03:27 +0100
Subject: netfilter: xtables: symmetric COMPAT_XT_ALIGN definition

Rewrite COMPAT_XT_ALIGN in terms of dummy structure hack.
Compat counters logically have nothing to do with it.
Use ALIGN() macro while I'm at it for same types.

Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 net/netfilter/x_tables.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c
index f01955cce314..5c564ff10a3b 100644
--- a/net/netfilter/x_tables.c
+++ b/net/netfilter/x_tables.c
@@ -364,7 +364,7 @@ int xt_check_match(struct xt_mtchk_param *par,
 		 * ebt_among is exempt from centralized matchsize checking
 		 * because it uses a dynamic-size data set.
 		 */
-		pr_err("%s_tables: %s match: invalid size %Zu != %u\n",
+		pr_err("%s_tables: %s match: invalid size %u != %u\n",
 		       xt_prefix[par->family], par->match->name,
 		       XT_ALIGN(par->match->matchsize), size);
 		return -EINVAL;
@@ -514,7 +514,7 @@ int xt_check_target(struct xt_tgchk_param *par,
 		    unsigned int size, u_int8_t proto, bool inv_proto)
 {
 	if (XT_ALIGN(par->target->targetsize) != size) {
-		pr_err("%s_tables: %s target: invalid size %Zu != %u\n",
+		pr_err("%s_tables: %s target: invalid size %u != %u\n",
 		       xt_prefix[par->family], par->target->name,
 		       XT_ALIGN(par->target->targetsize), size);
 		return -EINVAL;
-- 
cgit v1.2.2


From d0b0268fddea3235a8df35e52167c3b206bf2f5a Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Wed, 10 Feb 2010 15:38:33 +0100
Subject: netfilter: ctnetlink: add missing netlink attribute policies

Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 net/netfilter/nf_conntrack_netlink.c | 41 +++++++++++++++++++++++++++++++-----
 1 file changed, 36 insertions(+), 5 deletions(-)

(limited to 'net')

diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index 327c5174440c..5c103b8c7df0 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -708,6 +708,11 @@ ctnetlink_parse_tuple_proto(struct nlattr *attr,
 	return ret;
 }
 
+static const struct nla_policy tuple_nla_policy[CTA_TUPLE_MAX+1] = {
+	[CTA_TUPLE_IP]		= { .type = NLA_NESTED },
+	[CTA_TUPLE_PROTO]	= { .type = NLA_NESTED },
+};
+
 static int
 ctnetlink_parse_tuple(const struct nlattr * const cda[],
 		      struct nf_conntrack_tuple *tuple,
@@ -718,7 +723,7 @@ ctnetlink_parse_tuple(const struct nlattr * const cda[],
 
 	memset(tuple, 0, sizeof(*tuple));
 
-	nla_parse_nested(tb, CTA_TUPLE_MAX, cda[type], NULL);
+	nla_parse_nested(tb, CTA_TUPLE_MAX, cda[type], tuple_nla_policy);
 
 	if (!tb[CTA_TUPLE_IP])
 		return -EINVAL;
@@ -745,12 +750,16 @@ ctnetlink_parse_tuple(const struct nlattr * const cda[],
 	return 0;
 }
 
+static const struct nla_policy help_nla_policy[CTA_HELP_MAX+1] = {
+	[CTA_HELP_NAME]		= { .type = NLA_NUL_STRING },
+};
+
 static inline int
 ctnetlink_parse_help(const struct nlattr *attr, char **helper_name)
 {
 	struct nlattr *tb[CTA_HELP_MAX+1];
 
-	nla_parse_nested(tb, CTA_HELP_MAX, attr, NULL);
+	nla_parse_nested(tb, CTA_HELP_MAX, attr, help_nla_policy);
 
 	if (!tb[CTA_HELP_NAME])
 		return -EINVAL;
@@ -761,11 +770,17 @@ ctnetlink_parse_help(const struct nlattr *attr, char **helper_name)
 }
 
 static const struct nla_policy ct_nla_policy[CTA_MAX+1] = {
+	[CTA_TUPLE_ORIG]	= { .type = NLA_NESTED },
+	[CTA_TUPLE_REPLY]	= { .type = NLA_NESTED },
 	[CTA_STATUS] 		= { .type = NLA_U32 },
+	[CTA_PROTOINFO]		= { .type = NLA_NESTED },
+	[CTA_HELP]		= { .type = NLA_NESTED },
+	[CTA_NAT_SRC]		= { .type = NLA_NESTED },
 	[CTA_TIMEOUT] 		= { .type = NLA_U32 },
 	[CTA_MARK]		= { .type = NLA_U32 },
-	[CTA_USE]		= { .type = NLA_U32 },
 	[CTA_ID]		= { .type = NLA_U32 },
+	[CTA_NAT_DST]		= { .type = NLA_NESTED },
+	[CTA_TUPLE_MASTER]	= { .type = NLA_NESTED },
 };
 
 static int
@@ -1053,6 +1068,12 @@ ctnetlink_change_timeout(struct nf_conn *ct, const struct nlattr * const cda[])
 	return 0;
 }
 
+static const struct nla_policy protoinfo_policy[CTA_PROTOINFO_MAX+1] = {
+	[CTA_PROTOINFO_TCP]	= { .type = NLA_NESTED },
+	[CTA_PROTOINFO_DCCP]	= { .type = NLA_NESTED },
+	[CTA_PROTOINFO_SCTP]	= { .type = NLA_NESTED },
+};
+
 static inline int
 ctnetlink_change_protoinfo(struct nf_conn *ct, const struct nlattr * const cda[])
 {
@@ -1061,7 +1082,7 @@ ctnetlink_change_protoinfo(struct nf_conn *ct, const struct nlattr * const cda[]
 	struct nf_conntrack_l4proto *l4proto;
 	int err = 0;
 
-	nla_parse_nested(tb, CTA_PROTOINFO_MAX, attr, NULL);
+	nla_parse_nested(tb, CTA_PROTOINFO_MAX, attr, protoinfo_policy);
 
 	rcu_read_lock();
 	l4proto = __nf_ct_l4proto_find(nf_ct_l3num(ct), nf_ct_protonum(ct));
@@ -1073,12 +1094,18 @@ ctnetlink_change_protoinfo(struct nf_conn *ct, const struct nlattr * const cda[]
 }
 
 #ifdef CONFIG_NF_NAT_NEEDED
+static const struct nla_policy nat_seq_policy[CTA_NAT_SEQ_MAX+1] = {
+	[CTA_NAT_SEQ_CORRECTION_POS]	= { .type = NLA_U32 },
+	[CTA_NAT_SEQ_OFFSET_BEFORE]	= { .type = NLA_U32 },
+	[CTA_NAT_SEQ_OFFSET_AFTER]	= { .type = NLA_U32 },
+};
+
 static inline int
 change_nat_seq_adj(struct nf_nat_seq *natseq, const struct nlattr * const attr)
 {
 	struct nlattr *cda[CTA_NAT_SEQ_MAX+1];
 
-	nla_parse_nested(cda, CTA_NAT_SEQ_MAX, attr, NULL);
+	nla_parse_nested(cda, CTA_NAT_SEQ_MAX, attr, nat_seq_policy);
 
 	if (!cda[CTA_NAT_SEQ_CORRECTION_POS])
 		return -EINVAL;
@@ -1648,8 +1675,12 @@ out:
 }
 
 static const struct nla_policy exp_nla_policy[CTA_EXPECT_MAX+1] = {
+	[CTA_EXPECT_MASTER]	= { .type = NLA_NESTED },
+	[CTA_EXPECT_TUPLE]	= { .type = NLA_NESTED },
+	[CTA_EXPECT_MASK]	= { .type = NLA_NESTED },
 	[CTA_EXPECT_TIMEOUT]	= { .type = NLA_U32 },
 	[CTA_EXPECT_ID]		= { .type = NLA_U32 },
+	[CTA_EXPECT_HELP_NAME]	= { .type = NLA_NUL_STRING },
 };
 
 static int
-- 
cgit v1.2.2


From 737535c5cf3524e4bfaa91e22edefd52eccabbce Mon Sep 17 00:00:00 2001
From: Jan Engelhardt <jengelh@medozas.de>
Date: Sat, 13 Jun 2009 06:46:36 +0200
Subject: netfilter: xtables: compact table hook functions (1/2)

This patch combines all the per-hook functions in a given table into
a single function. Together with the 2nd patch, further
simplifications are possible up to the point of output code reduction.

Signed-off-by: Jan Engelhardt <jengelh@medozas.de>
---
 net/ipv4/netfilter/arptable_filter.c   | 30 ++++++--------
 net/ipv4/netfilter/iptable_filter.c    | 50 ++++++++----------------
 net/ipv4/netfilter/iptable_mangle.c    | 71 ++++++++++------------------------
 net/ipv4/netfilter/iptable_raw.c       | 26 +++++--------
 net/ipv4/netfilter/iptable_security.c  | 50 ++++++++----------------
 net/ipv6/netfilter/ip6table_filter.c   | 40 ++++++-------------
 net/ipv6/netfilter/ip6table_mangle.c   | 50 ++++++++++--------------
 net/ipv6/netfilter/ip6table_raw.c      | 26 +++++--------
 net/ipv6/netfilter/ip6table_security.c | 41 ++++++--------------
 9 files changed, 126 insertions(+), 258 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/netfilter/arptable_filter.c b/net/ipv4/netfilter/arptable_filter.c
index 97337601827a..e9d823b149cd 100644
--- a/net/ipv4/netfilter/arptable_filter.c
+++ b/net/ipv4/netfilter/arptable_filter.c
@@ -53,43 +53,37 @@ static const struct xt_table packet_filter = {
 };
 
 /* The work comes in here from netfilter.c */
-static unsigned int arpt_in_hook(unsigned int hook,
-				 struct sk_buff *skb,
-				 const struct net_device *in,
-				 const struct net_device *out,
-				 int (*okfn)(struct sk_buff *))
+static unsigned int
+arptable_filter_hook(unsigned int hook, struct sk_buff *skb,
+		     const struct net_device *in, const struct net_device *out,
+		     int (*okfn)(struct sk_buff *))
 {
-	return arpt_do_table(skb, hook, in, out,
-			     dev_net(in)->ipv4.arptable_filter);
-}
+	if (hook == NF_ARP_OUT)
+		return arpt_do_table(skb, hook, in, out,
+				     dev_net(out)->ipv4.arptable_filter);
 
-static unsigned int arpt_out_hook(unsigned int hook,
-				  struct sk_buff *skb,
-				  const struct net_device *in,
-				  const struct net_device *out,
-				  int (*okfn)(struct sk_buff *))
-{
+	/* INPUT/FORWARD: */
 	return arpt_do_table(skb, hook, in, out,
-			     dev_net(out)->ipv4.arptable_filter);
+			     dev_net(in)->ipv4.arptable_filter);
 }
 
 static struct nf_hook_ops arpt_ops[] __read_mostly = {
 	{
-		.hook		= arpt_in_hook,
+		.hook		= arptable_filter_hook,
 		.owner		= THIS_MODULE,
 		.pf		= NFPROTO_ARP,
 		.hooknum	= NF_ARP_IN,
 		.priority	= NF_IP_PRI_FILTER,
 	},
 	{
-		.hook		= arpt_out_hook,
+		.hook		= arptable_filter_hook,
 		.owner		= THIS_MODULE,
 		.pf		= NFPROTO_ARP,
 		.hooknum	= NF_ARP_OUT,
 		.priority	= NF_IP_PRI_FILTER,
 	},
 	{
-		.hook		= arpt_in_hook,
+		.hook		= arptable_filter_hook,
 		.owner		= THIS_MODULE,
 		.pf		= NFPROTO_ARP,
 		.hooknum	= NF_ARP_FORWARD,
diff --git a/net/ipv4/netfilter/iptable_filter.c b/net/ipv4/netfilter/iptable_filter.c
index dee90eb8aa47..5369833ad56a 100644
--- a/net/ipv4/netfilter/iptable_filter.c
+++ b/net/ipv4/netfilter/iptable_filter.c
@@ -60,61 +60,43 @@ static const struct xt_table packet_filter = {
 	.af		= NFPROTO_IPV4,
 };
 
-/* The work comes in here from netfilter.c. */
 static unsigned int
-ipt_local_in_hook(unsigned int hook,
-		  struct sk_buff *skb,
-		  const struct net_device *in,
-		  const struct net_device *out,
-		  int (*okfn)(struct sk_buff *))
+iptable_filter_hook(unsigned int hook, struct sk_buff *skb,
+		    const struct net_device *in, const struct net_device *out,
+		    int (*okfn)(struct sk_buff *))
 {
-	return ipt_do_table(skb, hook, in, out,
-			    dev_net(in)->ipv4.iptable_filter);
-}
+	if (hook == NF_INET_LOCAL_OUT) {
+		if (skb->len < sizeof(struct iphdr) ||
+		    ip_hdrlen(skb) < sizeof(struct iphdr))
+			/* root is playing with raw sockets. */
+			return NF_ACCEPT;
+
+		return ipt_do_table(skb, hook, in, out,
+				    dev_net(out)->ipv4.iptable_filter);
+	}
 
-static unsigned int
-ipt_hook(unsigned int hook,
-	 struct sk_buff *skb,
-	 const struct net_device *in,
-	 const struct net_device *out,
-	 int (*okfn)(struct sk_buff *))
-{
+	/* LOCAL_IN/FORWARD: */
 	return ipt_do_table(skb, hook, in, out,
 			    dev_net(in)->ipv4.iptable_filter);
 }
 
-static unsigned int
-ipt_local_out_hook(unsigned int hook,
-		   struct sk_buff *skb,
-		   const struct net_device *in,
-		   const struct net_device *out,
-		   int (*okfn)(struct sk_buff *))
-{
-	/* root is playing with raw sockets. */
-	if (skb->len < sizeof(struct iphdr) ||
-	    ip_hdrlen(skb) < sizeof(struct iphdr))
-		return NF_ACCEPT;
-	return ipt_do_table(skb, hook, in, out,
-			    dev_net(out)->ipv4.iptable_filter);
-}
-
 static struct nf_hook_ops ipt_ops[] __read_mostly = {
 	{
-		.hook		= ipt_local_in_hook,
+		.hook		= iptable_filter_hook,
 		.owner		= THIS_MODULE,
 		.pf		= NFPROTO_IPV4,
 		.hooknum	= NF_INET_LOCAL_IN,
 		.priority	= NF_IP_PRI_FILTER,
 	},
 	{
-		.hook		= ipt_hook,
+		.hook		= iptable_filter_hook,
 		.owner		= THIS_MODULE,
 		.pf		= NFPROTO_IPV4,
 		.hooknum	= NF_INET_FORWARD,
 		.priority	= NF_IP_PRI_FILTER,
 	},
 	{
-		.hook		= ipt_local_out_hook,
+		.hook		= iptable_filter_hook,
 		.owner		= THIS_MODULE,
 		.pf		= NFPROTO_IPV4,
 		.hooknum	= NF_INET_LOCAL_OUT,
diff --git a/net/ipv4/netfilter/iptable_mangle.c b/net/ipv4/netfilter/iptable_mangle.c
index e07bf242343a..4e699cd275c6 100644
--- a/net/ipv4/netfilter/iptable_mangle.c
+++ b/net/ipv4/netfilter/iptable_mangle.c
@@ -71,51 +71,6 @@ static const struct xt_table packet_mangler = {
 	.af		= NFPROTO_IPV4,
 };
 
-/* The work comes in here from netfilter.c. */
-static unsigned int
-ipt_pre_routing_hook(unsigned int hook,
-		     struct sk_buff *skb,
-		     const struct net_device *in,
-		     const struct net_device *out,
-		     int (*okfn)(struct sk_buff *))
-{
-	return ipt_do_table(skb, hook, in, out,
-			    dev_net(in)->ipv4.iptable_mangle);
-}
-
-static unsigned int
-ipt_post_routing_hook(unsigned int hook,
-		      struct sk_buff *skb,
-		      const struct net_device *in,
-		      const struct net_device *out,
-		      int (*okfn)(struct sk_buff *))
-{
-	return ipt_do_table(skb, hook, in, out,
-			    dev_net(out)->ipv4.iptable_mangle);
-}
-
-static unsigned int
-ipt_local_in_hook(unsigned int hook,
-		  struct sk_buff *skb,
-		  const struct net_device *in,
-		  const struct net_device *out,
-		  int (*okfn)(struct sk_buff *))
-{
-	return ipt_do_table(skb, hook, in, out,
-			    dev_net(in)->ipv4.iptable_mangle);
-}
-
-static unsigned int
-ipt_forward_hook(unsigned int hook,
-	 struct sk_buff *skb,
-	 const struct net_device *in,
-	 const struct net_device *out,
-	 int (*okfn)(struct sk_buff *))
-{
-	return ipt_do_table(skb, hook, in, out,
-			    dev_net(in)->ipv4.iptable_mangle);
-}
-
 static unsigned int
 ipt_local_hook(unsigned int hook,
 		   struct sk_buff *skb,
@@ -158,37 +113,53 @@ ipt_local_hook(unsigned int hook,
 	return ret;
 }
 
+/* The work comes in here from netfilter.c. */
+static unsigned int
+iptable_mangle_hook(unsigned int hook,
+		     struct sk_buff *skb,
+		     const struct net_device *in,
+		     const struct net_device *out,
+		     int (*okfn)(struct sk_buff *))
+{
+	if (hook == NF_INET_LOCAL_OUT)
+		return ipt_local_hook(hook, skb, in, out, okfn);
+
+	/* PREROUTING/INPUT/FORWARD: */
+	return ipt_do_table(skb, hook, in, out,
+			    dev_net(in)->ipv4.iptable_mangle);
+}
+
 static struct nf_hook_ops ipt_ops[] __read_mostly = {
 	{
-		.hook		= ipt_pre_routing_hook,
+		.hook		= iptable_mangle_hook,
 		.owner		= THIS_MODULE,
 		.pf		= NFPROTO_IPV4,
 		.hooknum	= NF_INET_PRE_ROUTING,
 		.priority	= NF_IP_PRI_MANGLE,
 	},
 	{
-		.hook		= ipt_local_in_hook,
+		.hook		= iptable_mangle_hook,
 		.owner		= THIS_MODULE,
 		.pf		= NFPROTO_IPV4,
 		.hooknum	= NF_INET_LOCAL_IN,
 		.priority	= NF_IP_PRI_MANGLE,
 	},
 	{
-		.hook		= ipt_forward_hook,
+		.hook		= iptable_mangle_hook,
 		.owner		= THIS_MODULE,
 		.pf		= NFPROTO_IPV4,
 		.hooknum	= NF_INET_FORWARD,
 		.priority	= NF_IP_PRI_MANGLE,
 	},
 	{
-		.hook		= ipt_local_hook,
+		.hook		= iptable_mangle_hook,
 		.owner		= THIS_MODULE,
 		.pf		= NFPROTO_IPV4,
 		.hooknum	= NF_INET_LOCAL_OUT,
 		.priority	= NF_IP_PRI_MANGLE,
 	},
 	{
-		.hook		= ipt_post_routing_hook,
+		.hook		= iptable_mangle_hook,
 		.owner		= THIS_MODULE,
 		.pf		= NFPROTO_IPV4,
 		.hooknum	= NF_INET_POST_ROUTING,
diff --git a/net/ipv4/netfilter/iptable_raw.c b/net/ipv4/netfilter/iptable_raw.c
index 40f2b9f611a2..2c55575e89f5 100644
--- a/net/ipv4/netfilter/iptable_raw.c
+++ b/net/ipv4/netfilter/iptable_raw.c
@@ -45,23 +45,15 @@ static const struct xt_table packet_raw = {
 
 /* The work comes in here from netfilter.c. */
 static unsigned int
-ipt_hook(unsigned int hook,
-	 struct sk_buff *skb,
-	 const struct net_device *in,
-	 const struct net_device *out,
-	 int (*okfn)(struct sk_buff *))
+iptable_raw_hook(unsigned int hook, struct sk_buff *skb,
+		 const struct net_device *in, const struct net_device *out,
+		 int (*okfn)(struct sk_buff *))
 {
-	return ipt_do_table(skb, hook, in, out,
-			    dev_net(in)->ipv4.iptable_raw);
-}
+	if (hook == NF_INET_PRE_ROUTING)
+		return ipt_do_table(skb, hook, in, out,
+				    dev_net(in)->ipv4.iptable_raw);
 
-static unsigned int
-ipt_local_hook(unsigned int hook,
-	       struct sk_buff *skb,
-	       const struct net_device *in,
-	       const struct net_device *out,
-	       int (*okfn)(struct sk_buff *))
-{
+	/* OUTPUT: */
 	/* root is playing with raw sockets. */
 	if (skb->len < sizeof(struct iphdr) ||
 	    ip_hdrlen(skb) < sizeof(struct iphdr))
@@ -73,14 +65,14 @@ ipt_local_hook(unsigned int hook,
 /* 'raw' is the very first table. */
 static struct nf_hook_ops ipt_ops[] __read_mostly = {
 	{
-		.hook = ipt_hook,
+		.hook = iptable_raw_hook,
 		.pf = NFPROTO_IPV4,
 		.hooknum = NF_INET_PRE_ROUTING,
 		.priority = NF_IP_PRI_RAW,
 		.owner = THIS_MODULE,
 	},
 	{
-		.hook = ipt_local_hook,
+		.hook = iptable_raw_hook,
 		.pf = NFPROTO_IPV4,
 		.hooknum = NF_INET_LOCAL_OUT,
 		.priority = NF_IP_PRI_RAW,
diff --git a/net/ipv4/netfilter/iptable_security.c b/net/ipv4/netfilter/iptable_security.c
index 7ce2366e4305..1c666bab3269 100644
--- a/net/ipv4/netfilter/iptable_security.c
+++ b/net/ipv4/netfilter/iptable_security.c
@@ -65,59 +65,43 @@ static const struct xt_table security_table = {
 };
 
 static unsigned int
-ipt_local_in_hook(unsigned int hook,
-		  struct sk_buff *skb,
-		  const struct net_device *in,
-		  const struct net_device *out,
-		  int (*okfn)(struct sk_buff *))
+iptable_security_hook(unsigned int hook, struct sk_buff *skb,
+		      const struct net_device *in,
+		      const struct net_device *out,
+		      int (*okfn)(struct sk_buff *))
 {
-	return ipt_do_table(skb, hook, in, out,
-			    dev_net(in)->ipv4.iptable_security);
-}
+	if (hook == NF_INET_LOCAL_OUT) {
+		if (skb->len < sizeof(struct iphdr) ||
+		    ip_hdrlen(skb) < sizeof(struct iphdr))
+			/* Somebody is playing with raw sockets. */
+			return NF_ACCEPT;
 
-static unsigned int
-ipt_forward_hook(unsigned int hook,
-		 struct sk_buff *skb,
-		 const struct net_device *in,
-		 const struct net_device *out,
-		 int (*okfn)(struct sk_buff *))
-{
-	return ipt_do_table(skb, hook, in, out,
-			    dev_net(in)->ipv4.iptable_security);
-}
+		return ipt_do_table(skb, hook, in, out,
+				    dev_net(out)->ipv4.iptable_security);
+	}
 
-static unsigned int
-ipt_local_out_hook(unsigned int hook,
-		   struct sk_buff *skb,
-		   const struct net_device *in,
-		   const struct net_device *out,
-		   int (*okfn)(struct sk_buff *))
-{
-	/* Somebody is playing with raw sockets. */
-	if (skb->len < sizeof(struct iphdr) ||
-	    ip_hdrlen(skb) < sizeof(struct iphdr))
-		return NF_ACCEPT;
+	/* INPUT/FORWARD: */
 	return ipt_do_table(skb, hook, in, out,
-			    dev_net(out)->ipv4.iptable_security);
+			    dev_net(in)->ipv4.iptable_security);
 }
 
 static struct nf_hook_ops ipt_ops[] __read_mostly = {
 	{
-		.hook		= ipt_local_in_hook,
+		.hook		= iptable_security_hook,
 		.owner		= THIS_MODULE,
 		.pf		= NFPROTO_IPV4,
 		.hooknum	= NF_INET_LOCAL_IN,
 		.priority	= NF_IP_PRI_SECURITY,
 	},
 	{
-		.hook		= ipt_forward_hook,
+		.hook		= iptable_security_hook,
 		.owner		= THIS_MODULE,
 		.pf		= NFPROTO_IPV4,
 		.hooknum	= NF_INET_FORWARD,
 		.priority	= NF_IP_PRI_SECURITY,
 	},
 	{
-		.hook		= ipt_local_out_hook,
+		.hook		= iptable_security_hook,
 		.owner		= THIS_MODULE,
 		.pf		= NFPROTO_IPV4,
 		.hooknum	= NF_INET_LOCAL_OUT,
diff --git a/net/ipv6/netfilter/ip6table_filter.c b/net/ipv6/netfilter/ip6table_filter.c
index 33ddfe53e18d..38074e933f67 100644
--- a/net/ipv6/netfilter/ip6table_filter.c
+++ b/net/ipv6/netfilter/ip6table_filter.c
@@ -60,54 +60,36 @@ static const struct xt_table packet_filter = {
 
 /* The work comes in here from netfilter.c. */
 static unsigned int
-ip6t_in_hook(unsigned int hook,
-		   struct sk_buff *skb,
-		   const struct net_device *in,
-		   const struct net_device *out,
-		   int (*okfn)(struct sk_buff *))
+ip6table_filter_hook(unsigned int hook, struct sk_buff *skb,
+		     const struct net_device *in, const struct net_device *out,
+		     int (*okfn)(struct sk_buff *))
 {
-	return ip6t_do_table(skb, hook, in, out,
-			     dev_net(in)->ipv6.ip6table_filter);
-}
-
-static unsigned int
-ip6t_local_out_hook(unsigned int hook,
-		   struct sk_buff *skb,
-		   const struct net_device *in,
-		   const struct net_device *out,
-		   int (*okfn)(struct sk_buff *))
-{
-#if 0
-	/* root is playing with raw sockets. */
-	if (skb->len < sizeof(struct iphdr) ||
-	    ip_hdrlen(skb) < sizeof(struct iphdr)) {
-		if (net_ratelimit())
-			printk("ip6t_hook: happy cracking.\n");
-		return NF_ACCEPT;
-	}
-#endif
+	if (hook == NF_INET_LOCAL_OUT)
+		return ip6t_do_table(skb, hook, in, out,
+				     dev_net(out)->ipv6.ip6table_filter);
 
+	/* INPUT/FORWARD: */
 	return ip6t_do_table(skb, hook, in, out,
-			     dev_net(out)->ipv6.ip6table_filter);
+			     dev_net(in)->ipv6.ip6table_filter);
 }
 
 static struct nf_hook_ops ip6t_ops[] __read_mostly = {
 	{
-		.hook		= ip6t_in_hook,
+		.hook		= ip6table_filter_hook,
 		.owner		= THIS_MODULE,
 		.pf		= NFPROTO_IPV6,
 		.hooknum	= NF_INET_LOCAL_IN,
 		.priority	= NF_IP6_PRI_FILTER,
 	},
 	{
-		.hook		= ip6t_in_hook,
+		.hook		= ip6table_filter_hook,
 		.owner		= THIS_MODULE,
 		.pf		= NFPROTO_IPV6,
 		.hooknum	= NF_INET_FORWARD,
 		.priority	= NF_IP6_PRI_FILTER,
 	},
 	{
-		.hook		= ip6t_local_out_hook,
+		.hook		= ip6table_filter_hook,
 		.owner		= THIS_MODULE,
 		.pf		= NFPROTO_IPV6,
 		.hooknum	= NF_INET_LOCAL_OUT,
diff --git a/net/ipv6/netfilter/ip6table_mangle.c b/net/ipv6/netfilter/ip6table_mangle.c
index 9bc483f000e5..405ac1f76390 100644
--- a/net/ipv6/netfilter/ip6table_mangle.c
+++ b/net/ipv6/netfilter/ip6table_mangle.c
@@ -64,33 +64,9 @@ static const struct xt_table packet_mangler = {
 	.af		= NFPROTO_IPV6,
 };
 
-/* The work comes in here from netfilter.c. */
-static unsigned int
-ip6t_in_hook(unsigned int hook,
-	 struct sk_buff *skb,
-	 const struct net_device *in,
-	 const struct net_device *out,
-	 int (*okfn)(struct sk_buff *))
-{
-	return ip6t_do_table(skb, hook, in, out,
-			     dev_net(in)->ipv6.ip6table_mangle);
-}
-
-static unsigned int
-ip6t_post_routing_hook(unsigned int hook,
-		struct sk_buff *skb,
-		const struct net_device *in,
-		const struct net_device *out,
-		int (*okfn)(struct sk_buff *))
-{
-	return ip6t_do_table(skb, hook, in, out,
-			     dev_net(out)->ipv6.ip6table_mangle);
-}
-
 static unsigned int
 ip6t_local_out_hook(unsigned int hook,
 		   struct sk_buff *skb,
-		   const struct net_device *in,
 		   const struct net_device *out,
 		   int (*okfn)(struct sk_buff *))
 {
@@ -119,7 +95,7 @@ ip6t_local_out_hook(unsigned int hook,
 	/* flowlabel and prio (includes version, which shouldn't change either */
 	flowlabel = *((u_int32_t *)ipv6_hdr(skb));
 
-	ret = ip6t_do_table(skb, hook, in, out,
+	ret = ip6t_do_table(skb, hook, NULL, out,
 			    dev_net(out)->ipv6.ip6table_mangle);
 
 	if (ret != NF_DROP && ret != NF_STOLEN &&
@@ -132,37 +108,51 @@ ip6t_local_out_hook(unsigned int hook,
 	return ret;
 }
 
+/* The work comes in here from netfilter.c. */
+static unsigned int
+ip6table_mangle_hook(unsigned int hook, struct sk_buff *skb,
+		     const struct net_device *in, const struct net_device *out,
+		     int (*okfn)(struct sk_buff *))
+{
+	if (hook == NF_INET_LOCAL_OUT)
+		return ip6t_local_out_hook(hook, skb, out, okfn);
+
+	/* INPUT/FORWARD */
+	return ip6t_do_table(skb, hook, in, out,
+			     dev_net(in)->ipv6.ip6table_mangle);
+}
+
 static struct nf_hook_ops ip6t_ops[] __read_mostly = {
 	{
-		.hook		= ip6t_in_hook,
+		.hook		= ip6table_mangle_hook,
 		.owner		= THIS_MODULE,
 		.pf		= NFPROTO_IPV6,
 		.hooknum	= NF_INET_PRE_ROUTING,
 		.priority	= NF_IP6_PRI_MANGLE,
 	},
 	{
-		.hook		= ip6t_in_hook,
+		.hook		= ip6table_mangle_hook,
 		.owner		= THIS_MODULE,
 		.pf		= NFPROTO_IPV6,
 		.hooknum	= NF_INET_LOCAL_IN,
 		.priority	= NF_IP6_PRI_MANGLE,
 	},
 	{
-		.hook		= ip6t_in_hook,
+		.hook		= ip6table_mangle_hook,
 		.owner		= THIS_MODULE,
 		.pf		= NFPROTO_IPV6,
 		.hooknum	= NF_INET_FORWARD,
 		.priority	= NF_IP6_PRI_MANGLE,
 	},
 	{
-		.hook		= ip6t_local_out_hook,
+		.hook		= ip6table_mangle_hook,
 		.owner		= THIS_MODULE,
 		.pf		= NFPROTO_IPV6,
 		.hooknum	= NF_INET_LOCAL_OUT,
 		.priority	= NF_IP6_PRI_MANGLE,
 	},
 	{
-		.hook		= ip6t_post_routing_hook,
+		.hook		= ip6table_mangle_hook,
 		.owner		= THIS_MODULE,
 		.pf		= NFPROTO_IPV6,
 		.hooknum	= NF_INET_POST_ROUTING,
diff --git a/net/ipv6/netfilter/ip6table_raw.c b/net/ipv6/netfilter/ip6table_raw.c
index 4c90b552e433..985e27cf1e0c 100644
--- a/net/ipv6/netfilter/ip6table_raw.c
+++ b/net/ipv6/netfilter/ip6table_raw.c
@@ -44,37 +44,29 @@ static const struct xt_table packet_raw = {
 
 /* The work comes in here from netfilter.c. */
 static unsigned int
-ip6t_pre_routing_hook(unsigned int hook,
-	 struct sk_buff *skb,
-	 const struct net_device *in,
-	 const struct net_device *out,
-	 int (*okfn)(struct sk_buff *))
+ip6table_raw_hook(unsigned int hook, struct sk_buff *skb,
+		  const struct net_device *in, const struct net_device *out,
+		  int (*okfn)(struct sk_buff *))
 {
-	return ip6t_do_table(skb, hook, in, out,
-			     dev_net(in)->ipv6.ip6table_raw);
-}
+	if (hook == NF_INET_PRE_ROUTING)
+		return ip6t_do_table(skb, hook, in, out,
+				     dev_net(in)->ipv6.ip6table_raw);
 
-static unsigned int
-ip6t_local_out_hook(unsigned int hook,
-	 struct sk_buff *skb,
-	 const struct net_device *in,
-	 const struct net_device *out,
-	 int (*okfn)(struct sk_buff *))
-{
+	/* OUTPUT: */
 	return ip6t_do_table(skb, hook, in, out,
 			     dev_net(out)->ipv6.ip6table_raw);
 }
 
 static struct nf_hook_ops ip6t_ops[] __read_mostly = {
 	{
-	  .hook = ip6t_pre_routing_hook,
+	  .hook = ip6table_raw_hook,
 	  .pf = NFPROTO_IPV6,
 	  .hooknum = NF_INET_PRE_ROUTING,
 	  .priority = NF_IP6_PRI_FIRST,
 	  .owner = THIS_MODULE,
 	},
 	{
-	  .hook = ip6t_local_out_hook,
+	  .hook = ip6table_raw_hook,
 	  .pf = NFPROTO_IPV6,
 	  .hooknum = NF_INET_LOCAL_OUT,
 	  .priority = NF_IP6_PRI_FIRST,
diff --git a/net/ipv6/netfilter/ip6table_security.c b/net/ipv6/netfilter/ip6table_security.c
index baa8d4ef3b0a..835858929358 100644
--- a/net/ipv6/netfilter/ip6table_security.c
+++ b/net/ipv6/netfilter/ip6table_security.c
@@ -64,56 +64,37 @@ static const struct xt_table security_table = {
 };
 
 static unsigned int
-ip6t_local_in_hook(unsigned int hook,
-		   struct sk_buff *skb,
-		   const struct net_device *in,
-		   const struct net_device *out,
-		   int (*okfn)(struct sk_buff *))
+ip6table_security_hook(unsigned int hook, struct sk_buff *skb,
+		       const struct net_device *in,
+		       const struct net_device *out,
+		       int (*okfn)(struct sk_buff *))
 {
-	return ip6t_do_table(skb, hook, in, out,
-			     dev_net(in)->ipv6.ip6table_security);
-}
+	if (hook == NF_INET_LOCAL_OUT)
+		return ip6t_do_table(skb, hook, in, out,
+				     dev_net(out)->ipv6.ip6table_security);
 
-static unsigned int
-ip6t_forward_hook(unsigned int hook,
-		  struct sk_buff *skb,
-		  const struct net_device *in,
-		  const struct net_device *out,
-		  int (*okfn)(struct sk_buff *))
-{
+	/* INPUT/FORWARD: */
 	return ip6t_do_table(skb, hook, in, out,
 			     dev_net(in)->ipv6.ip6table_security);
 }
 
-static unsigned int
-ip6t_local_out_hook(unsigned int hook,
-		    struct sk_buff *skb,
-		    const struct net_device *in,
-		    const struct net_device *out,
-		    int (*okfn)(struct sk_buff *))
-{
-	/* TBD: handle short packets via raw socket */
-	return ip6t_do_table(skb, hook, in, out,
-			     dev_net(out)->ipv6.ip6table_security);
-}
-
 static struct nf_hook_ops ip6t_ops[] __read_mostly = {
 	{
-		.hook		= ip6t_local_in_hook,
+		.hook		= ip6table_security_hook,
 		.owner		= THIS_MODULE,
 		.pf		= NFPROTO_IPV6,
 		.hooknum	= NF_INET_LOCAL_IN,
 		.priority	= NF_IP6_PRI_SECURITY,
 	},
 	{
-		.hook		= ip6t_forward_hook,
+		.hook		= ip6table_security_hook,
 		.owner		= THIS_MODULE,
 		.pf		= NFPROTO_IPV6,
 		.hooknum	= NF_INET_FORWARD,
 		.priority	= NF_IP6_PRI_SECURITY,
 	},
 	{
-		.hook		= ip6t_local_out_hook,
+		.hook		= ip6table_security_hook,
 		.owner		= THIS_MODULE,
 		.pf		= NFPROTO_IPV6,
 		.hooknum	= NF_INET_LOCAL_OUT,
-- 
cgit v1.2.2


From 2b21e051472fdb4680076278b2ccf63ebc1cc3bc Mon Sep 17 00:00:00 2001
From: Jan Engelhardt <jengelh@medozas.de>
Date: Sat, 13 Jun 2009 06:57:10 +0200
Subject: netfilter: xtables: compact table hook functions (2/2)

The calls to ip6t_do_table only show minimal differences, so it seems
like a good cleanup to merge them to a single one too.
Space saving obtained by both patches: 6807725->6807373
("Total" column from `size -A`.)

Signed-off-by: Jan Engelhardt <jengelh@medozas.de>
---
 net/ipv4/netfilter/arptable_filter.c   |  8 ++------
 net/ipv4/netfilter/iptable_filter.c    | 21 +++++++++------------
 net/ipv4/netfilter/iptable_raw.c       | 19 +++++++++----------
 net/ipv4/netfilter/iptable_security.c  | 23 ++++++++++-------------
 net/ipv6/netfilter/ip6table_filter.c   |  8 ++------
 net/ipv6/netfilter/ip6table_raw.c      |  8 ++------
 net/ipv6/netfilter/ip6table_security.c |  8 ++------
 7 files changed, 36 insertions(+), 59 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/netfilter/arptable_filter.c b/net/ipv4/netfilter/arptable_filter.c
index e9d823b149cd..deeda9b2cf05 100644
--- a/net/ipv4/netfilter/arptable_filter.c
+++ b/net/ipv4/netfilter/arptable_filter.c
@@ -58,13 +58,9 @@ arptable_filter_hook(unsigned int hook, struct sk_buff *skb,
 		     const struct net_device *in, const struct net_device *out,
 		     int (*okfn)(struct sk_buff *))
 {
-	if (hook == NF_ARP_OUT)
-		return arpt_do_table(skb, hook, in, out,
-				     dev_net(out)->ipv4.arptable_filter);
+	const struct net *net = dev_net((in != NULL) ? in : out);
 
-	/* INPUT/FORWARD: */
-	return arpt_do_table(skb, hook, in, out,
-			     dev_net(in)->ipv4.arptable_filter);
+	return arpt_do_table(skb, hook, in, out, net->ipv4.arptable_filter);
 }
 
 static struct nf_hook_ops arpt_ops[] __read_mostly = {
diff --git a/net/ipv4/netfilter/iptable_filter.c b/net/ipv4/netfilter/iptable_filter.c
index 5369833ad56a..1bfeaae6f624 100644
--- a/net/ipv4/netfilter/iptable_filter.c
+++ b/net/ipv4/netfilter/iptable_filter.c
@@ -65,19 +65,16 @@ iptable_filter_hook(unsigned int hook, struct sk_buff *skb,
 		    const struct net_device *in, const struct net_device *out,
 		    int (*okfn)(struct sk_buff *))
 {
-	if (hook == NF_INET_LOCAL_OUT) {
-		if (skb->len < sizeof(struct iphdr) ||
-		    ip_hdrlen(skb) < sizeof(struct iphdr))
-			/* root is playing with raw sockets. */
-			return NF_ACCEPT;
-
-		return ipt_do_table(skb, hook, in, out,
-				    dev_net(out)->ipv4.iptable_filter);
-	}
+	const struct net *net;
+
+	if (hook == NF_INET_LOCAL_OUT &&
+	    (skb->len < sizeof(struct iphdr) ||
+	     ip_hdrlen(skb) < sizeof(struct iphdr)))
+		/* root is playing with raw sockets. */
+		return NF_ACCEPT;
 
-	/* LOCAL_IN/FORWARD: */
-	return ipt_do_table(skb, hook, in, out,
-			    dev_net(in)->ipv4.iptable_filter);
+	net = dev_net((in != NULL) ? in : out);
+	return ipt_do_table(skb, hook, in, out, net->ipv4.iptable_filter);
 }
 
 static struct nf_hook_ops ipt_ops[] __read_mostly = {
diff --git a/net/ipv4/netfilter/iptable_raw.c b/net/ipv4/netfilter/iptable_raw.c
index 2c55575e89f5..d16e43777c31 100644
--- a/net/ipv4/netfilter/iptable_raw.c
+++ b/net/ipv4/netfilter/iptable_raw.c
@@ -49,17 +49,16 @@ iptable_raw_hook(unsigned int hook, struct sk_buff *skb,
 		 const struct net_device *in, const struct net_device *out,
 		 int (*okfn)(struct sk_buff *))
 {
-	if (hook == NF_INET_PRE_ROUTING)
-		return ipt_do_table(skb, hook, in, out,
-				    dev_net(in)->ipv4.iptable_raw);
-
-	/* OUTPUT: */
-	/* root is playing with raw sockets. */
-	if (skb->len < sizeof(struct iphdr) ||
-	    ip_hdrlen(skb) < sizeof(struct iphdr))
+	const struct net *net;
+
+	if (hook == NF_INET_LOCAL_OUT && 
+	    (skb->len < sizeof(struct iphdr) ||
+	     ip_hdrlen(skb) < sizeof(struct iphdr)))
+		/* root is playing with raw sockets. */
 		return NF_ACCEPT;
-	return ipt_do_table(skb, hook, in, out,
-			    dev_net(out)->ipv4.iptable_raw);
+
+	net = dev_net((in != NULL) ? in : out);
+	return ipt_do_table(skb, hook, in, out, net->ipv4.iptable_raw);
 }
 
 /* 'raw' is the very first table. */
diff --git a/net/ipv4/netfilter/iptable_security.c b/net/ipv4/netfilter/iptable_security.c
index 1c666bab3269..324505aaaa73 100644
--- a/net/ipv4/netfilter/iptable_security.c
+++ b/net/ipv4/netfilter/iptable_security.c
@@ -70,19 +70,16 @@ iptable_security_hook(unsigned int hook, struct sk_buff *skb,
 		      const struct net_device *out,
 		      int (*okfn)(struct sk_buff *))
 {
-	if (hook == NF_INET_LOCAL_OUT) {
-		if (skb->len < sizeof(struct iphdr) ||
-		    ip_hdrlen(skb) < sizeof(struct iphdr))
-			/* Somebody is playing with raw sockets. */
-			return NF_ACCEPT;
-
-		return ipt_do_table(skb, hook, in, out,
-				    dev_net(out)->ipv4.iptable_security);
-	}
-
-	/* INPUT/FORWARD: */
-	return ipt_do_table(skb, hook, in, out,
-			    dev_net(in)->ipv4.iptable_security);
+	const struct net *net;
+
+	if (hook == NF_INET_LOCAL_OUT &&
+	    (skb->len < sizeof(struct iphdr) ||
+	     ip_hdrlen(skb) < sizeof(struct iphdr)))
+		/* Somebody is playing with raw sockets. */
+		return NF_ACCEPT;
+
+	net = dev_net((in != NULL) ? in : out);
+	return ipt_do_table(skb, hook, in, out, net->ipv4.iptable_security);
 }
 
 static struct nf_hook_ops ipt_ops[] __read_mostly = {
diff --git a/net/ipv6/netfilter/ip6table_filter.c b/net/ipv6/netfilter/ip6table_filter.c
index 38074e933f67..866f34ae236b 100644
--- a/net/ipv6/netfilter/ip6table_filter.c
+++ b/net/ipv6/netfilter/ip6table_filter.c
@@ -64,13 +64,9 @@ ip6table_filter_hook(unsigned int hook, struct sk_buff *skb,
 		     const struct net_device *in, const struct net_device *out,
 		     int (*okfn)(struct sk_buff *))
 {
-	if (hook == NF_INET_LOCAL_OUT)
-		return ip6t_do_table(skb, hook, in, out,
-				     dev_net(out)->ipv6.ip6table_filter);
+	const struct net *net = dev_net((in != NULL) ? in : out);
 
-	/* INPUT/FORWARD: */
-	return ip6t_do_table(skb, hook, in, out,
-			     dev_net(in)->ipv6.ip6table_filter);
+	return ip6t_do_table(skb, hook, in, out, net->ipv6.ip6table_filter);
 }
 
 static struct nf_hook_ops ip6t_ops[] __read_mostly = {
diff --git a/net/ipv6/netfilter/ip6table_raw.c b/net/ipv6/netfilter/ip6table_raw.c
index 985e27cf1e0c..5451a36fbc21 100644
--- a/net/ipv6/netfilter/ip6table_raw.c
+++ b/net/ipv6/netfilter/ip6table_raw.c
@@ -48,13 +48,9 @@ ip6table_raw_hook(unsigned int hook, struct sk_buff *skb,
 		  const struct net_device *in, const struct net_device *out,
 		  int (*okfn)(struct sk_buff *))
 {
-	if (hook == NF_INET_PRE_ROUTING)
-		return ip6t_do_table(skb, hook, in, out,
-				     dev_net(in)->ipv6.ip6table_raw);
+	const struct net *net = dev_net((in != NULL) ? in : out);
 
-	/* OUTPUT: */
-	return ip6t_do_table(skb, hook, in, out,
-			     dev_net(out)->ipv6.ip6table_raw);
+	return ip6t_do_table(skb, hook, in, out, net->ipv6.ip6table_raw);
 }
 
 static struct nf_hook_ops ip6t_ops[] __read_mostly = {
diff --git a/net/ipv6/netfilter/ip6table_security.c b/net/ipv6/netfilter/ip6table_security.c
index 835858929358..841ea77f5218 100644
--- a/net/ipv6/netfilter/ip6table_security.c
+++ b/net/ipv6/netfilter/ip6table_security.c
@@ -69,13 +69,9 @@ ip6table_security_hook(unsigned int hook, struct sk_buff *skb,
 		       const struct net_device *out,
 		       int (*okfn)(struct sk_buff *))
 {
-	if (hook == NF_INET_LOCAL_OUT)
-		return ip6t_do_table(skb, hook, in, out,
-				     dev_net(out)->ipv6.ip6table_security);
+	const struct net *net = dev_net((in != NULL) ? in : out);
 
-	/* INPUT/FORWARD: */
-	return ip6t_do_table(skb, hook, in, out,
-			     dev_net(in)->ipv6.ip6table_security);
+	return ip6t_do_table(skb, hook, in, out, net->ipv6.ip6table_security);
 }
 
 static struct nf_hook_ops ip6t_ops[] __read_mostly = {
-- 
cgit v1.2.2


From 2b95efe7f6bb750256a702cc32d33b0cb2cd8223 Mon Sep 17 00:00:00 2001
From: Jan Engelhardt <jengelh@medozas.de>
Date: Wed, 17 Jun 2009 13:57:48 +0200
Subject: netfilter: xtables: use xt_table for hook instantiation

The respective xt_table structures already have most of the metadata
needed for hook setup. Add a 'priority' field to struct xt_table so
that xt_hook_link() can be called with a reduced number of arguments.

So should we be having more tables in the future, it comes at no
static cost (only runtime, as before) - space saved:
6807373->6806555.

Signed-off-by: Jan Engelhardt <jengelh@medozas.de>
---
 net/ipv4/netfilter/arptable_filter.c   | 33 +++++----------------
 net/ipv4/netfilter/iptable_filter.c    | 33 +++++----------------
 net/ipv4/netfilter/iptable_mangle.c    | 47 +++++------------------------
 net/ipv4/netfilter/iptable_raw.c       | 27 +++++------------
 net/ipv4/netfilter/iptable_security.c  | 33 +++++----------------
 net/ipv6/netfilter/ip6table_filter.c   | 33 +++++----------------
 net/ipv6/netfilter/ip6table_mangle.c   | 48 +++++-------------------------
 net/ipv6/netfilter/ip6table_raw.c      | 26 +++++-----------
 net/ipv6/netfilter/ip6table_security.c | 33 +++++----------------
 net/netfilter/x_tables.c               | 54 ++++++++++++++++++++++++++++++++++
 10 files changed, 117 insertions(+), 250 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/netfilter/arptable_filter.c b/net/ipv4/netfilter/arptable_filter.c
index deeda9b2cf05..b361de0dac4c 100644
--- a/net/ipv4/netfilter/arptable_filter.c
+++ b/net/ipv4/netfilter/arptable_filter.c
@@ -50,6 +50,7 @@ static const struct xt_table packet_filter = {
 	.valid_hooks	= FILTER_VALID_HOOKS,
 	.me		= THIS_MODULE,
 	.af		= NFPROTO_ARP,
+	.priority	= NF_IP_PRI_FILTER,
 };
 
 /* The work comes in here from netfilter.c */
@@ -63,29 +64,7 @@ arptable_filter_hook(unsigned int hook, struct sk_buff *skb,
 	return arpt_do_table(skb, hook, in, out, net->ipv4.arptable_filter);
 }
 
-static struct nf_hook_ops arpt_ops[] __read_mostly = {
-	{
-		.hook		= arptable_filter_hook,
-		.owner		= THIS_MODULE,
-		.pf		= NFPROTO_ARP,
-		.hooknum	= NF_ARP_IN,
-		.priority	= NF_IP_PRI_FILTER,
-	},
-	{
-		.hook		= arptable_filter_hook,
-		.owner		= THIS_MODULE,
-		.pf		= NFPROTO_ARP,
-		.hooknum	= NF_ARP_OUT,
-		.priority	= NF_IP_PRI_FILTER,
-	},
-	{
-		.hook		= arptable_filter_hook,
-		.owner		= THIS_MODULE,
-		.pf		= NFPROTO_ARP,
-		.hooknum	= NF_ARP_FORWARD,
-		.priority	= NF_IP_PRI_FILTER,
-	},
-};
+static struct nf_hook_ops *arpfilter_ops __read_mostly;
 
 static int __net_init arptable_filter_net_init(struct net *net)
 {
@@ -115,9 +94,11 @@ static int __init arptable_filter_init(void)
 	if (ret < 0)
 		return ret;
 
-	ret = nf_register_hooks(arpt_ops, ARRAY_SIZE(arpt_ops));
-	if (ret < 0)
+	arpfilter_ops = xt_hook_link(&packet_filter, arptable_filter_hook);
+	if (IS_ERR(arpfilter_ops)) {
+		ret = PTR_ERR(arpfilter_ops);
 		goto cleanup_table;
+	}
 	return ret;
 
 cleanup_table:
@@ -127,7 +108,7 @@ cleanup_table:
 
 static void __exit arptable_filter_fini(void)
 {
-	nf_unregister_hooks(arpt_ops, ARRAY_SIZE(arpt_ops));
+	xt_hook_unlink(&packet_filter, arpfilter_ops);
 	unregister_pernet_subsys(&arptable_filter_net_ops);
 }
 
diff --git a/net/ipv4/netfilter/iptable_filter.c b/net/ipv4/netfilter/iptable_filter.c
index 1bfeaae6f624..c14bb85db1d9 100644
--- a/net/ipv4/netfilter/iptable_filter.c
+++ b/net/ipv4/netfilter/iptable_filter.c
@@ -58,6 +58,7 @@ static const struct xt_table packet_filter = {
 	.valid_hooks	= FILTER_VALID_HOOKS,
 	.me		= THIS_MODULE,
 	.af		= NFPROTO_IPV4,
+	.priority	= NF_IP_PRI_FILTER,
 };
 
 static unsigned int
@@ -77,29 +78,7 @@ iptable_filter_hook(unsigned int hook, struct sk_buff *skb,
 	return ipt_do_table(skb, hook, in, out, net->ipv4.iptable_filter);
 }
 
-static struct nf_hook_ops ipt_ops[] __read_mostly = {
-	{
-		.hook		= iptable_filter_hook,
-		.owner		= THIS_MODULE,
-		.pf		= NFPROTO_IPV4,
-		.hooknum	= NF_INET_LOCAL_IN,
-		.priority	= NF_IP_PRI_FILTER,
-	},
-	{
-		.hook		= iptable_filter_hook,
-		.owner		= THIS_MODULE,
-		.pf		= NFPROTO_IPV4,
-		.hooknum	= NF_INET_FORWARD,
-		.priority	= NF_IP_PRI_FILTER,
-	},
-	{
-		.hook		= iptable_filter_hook,
-		.owner		= THIS_MODULE,
-		.pf		= NFPROTO_IPV4,
-		.hooknum	= NF_INET_LOCAL_OUT,
-		.priority	= NF_IP_PRI_FILTER,
-	},
-};
+static struct nf_hook_ops *filter_ops __read_mostly;
 
 /* Default to forward because I got too much mail already. */
 static int forward = NF_ACCEPT;
@@ -142,9 +121,11 @@ static int __init iptable_filter_init(void)
 		return ret;
 
 	/* Register hooks */
-	ret = nf_register_hooks(ipt_ops, ARRAY_SIZE(ipt_ops));
-	if (ret < 0)
+	filter_ops = xt_hook_link(&packet_filter, iptable_filter_hook);
+	if (IS_ERR(filter_ops)) {
+		ret = PTR_ERR(filter_ops);
 		goto cleanup_table;
+	}
 
 	return ret;
 
@@ -155,7 +136,7 @@ static int __init iptable_filter_init(void)
 
 static void __exit iptable_filter_fini(void)
 {
-	nf_unregister_hooks(ipt_ops, ARRAY_SIZE(ipt_ops));
+	xt_hook_unlink(&packet_filter, filter_ops);
 	unregister_pernet_subsys(&iptable_filter_net_ops);
 }
 
diff --git a/net/ipv4/netfilter/iptable_mangle.c b/net/ipv4/netfilter/iptable_mangle.c
index 4e699cd275c6..2355a229f8ee 100644
--- a/net/ipv4/netfilter/iptable_mangle.c
+++ b/net/ipv4/netfilter/iptable_mangle.c
@@ -69,6 +69,7 @@ static const struct xt_table packet_mangler = {
 	.valid_hooks	= MANGLE_VALID_HOOKS,
 	.me		= THIS_MODULE,
 	.af		= NFPROTO_IPV4,
+	.priority	= NF_IP_PRI_MANGLE,
 };
 
 static unsigned int
@@ -129,43 +130,7 @@ iptable_mangle_hook(unsigned int hook,
 			    dev_net(in)->ipv4.iptable_mangle);
 }
 
-static struct nf_hook_ops ipt_ops[] __read_mostly = {
-	{
-		.hook		= iptable_mangle_hook,
-		.owner		= THIS_MODULE,
-		.pf		= NFPROTO_IPV4,
-		.hooknum	= NF_INET_PRE_ROUTING,
-		.priority	= NF_IP_PRI_MANGLE,
-	},
-	{
-		.hook		= iptable_mangle_hook,
-		.owner		= THIS_MODULE,
-		.pf		= NFPROTO_IPV4,
-		.hooknum	= NF_INET_LOCAL_IN,
-		.priority	= NF_IP_PRI_MANGLE,
-	},
-	{
-		.hook		= iptable_mangle_hook,
-		.owner		= THIS_MODULE,
-		.pf		= NFPROTO_IPV4,
-		.hooknum	= NF_INET_FORWARD,
-		.priority	= NF_IP_PRI_MANGLE,
-	},
-	{
-		.hook		= iptable_mangle_hook,
-		.owner		= THIS_MODULE,
-		.pf		= NFPROTO_IPV4,
-		.hooknum	= NF_INET_LOCAL_OUT,
-		.priority	= NF_IP_PRI_MANGLE,
-	},
-	{
-		.hook		= iptable_mangle_hook,
-		.owner		= THIS_MODULE,
-		.pf		= NFPROTO_IPV4,
-		.hooknum	= NF_INET_POST_ROUTING,
-		.priority	= NF_IP_PRI_MANGLE,
-	},
-};
+static struct nf_hook_ops *mangle_ops __read_mostly;
 
 static int __net_init iptable_mangle_net_init(struct net *net)
 {
@@ -196,9 +161,11 @@ static int __init iptable_mangle_init(void)
 		return ret;
 
 	/* Register hooks */
-	ret = nf_register_hooks(ipt_ops, ARRAY_SIZE(ipt_ops));
-	if (ret < 0)
+	mangle_ops = xt_hook_link(&packet_mangler, iptable_mangle_hook);
+	if (IS_ERR(mangle_ops)) {
+		ret = PTR_ERR(mangle_ops);
 		goto cleanup_table;
+	}
 
 	return ret;
 
@@ -209,7 +176,7 @@ static int __init iptable_mangle_init(void)
 
 static void __exit iptable_mangle_fini(void)
 {
-	nf_unregister_hooks(ipt_ops, ARRAY_SIZE(ipt_ops));
+	xt_hook_unlink(&packet_mangler, mangle_ops);
 	unregister_pernet_subsys(&iptable_mangle_net_ops);
 }
 
diff --git a/net/ipv4/netfilter/iptable_raw.c b/net/ipv4/netfilter/iptable_raw.c
index d16e43777c31..62a99154f14c 100644
--- a/net/ipv4/netfilter/iptable_raw.c
+++ b/net/ipv4/netfilter/iptable_raw.c
@@ -41,6 +41,7 @@ static const struct xt_table packet_raw = {
 	.valid_hooks =  RAW_VALID_HOOKS,
 	.me = THIS_MODULE,
 	.af = NFPROTO_IPV4,
+	.priority = NF_IP_PRI_RAW,
 };
 
 /* The work comes in here from netfilter.c. */
@@ -61,23 +62,7 @@ iptable_raw_hook(unsigned int hook, struct sk_buff *skb,
 	return ipt_do_table(skb, hook, in, out, net->ipv4.iptable_raw);
 }
 
-/* 'raw' is the very first table. */
-static struct nf_hook_ops ipt_ops[] __read_mostly = {
-	{
-		.hook = iptable_raw_hook,
-		.pf = NFPROTO_IPV4,
-		.hooknum = NF_INET_PRE_ROUTING,
-		.priority = NF_IP_PRI_RAW,
-		.owner = THIS_MODULE,
-	},
-	{
-		.hook = iptable_raw_hook,
-		.pf = NFPROTO_IPV4,
-		.hooknum = NF_INET_LOCAL_OUT,
-		.priority = NF_IP_PRI_RAW,
-		.owner = THIS_MODULE,
-	},
-};
+static struct nf_hook_ops *rawtable_ops __read_mostly;
 
 static int __net_init iptable_raw_net_init(struct net *net)
 {
@@ -108,9 +93,11 @@ static int __init iptable_raw_init(void)
 		return ret;
 
 	/* Register hooks */
-	ret = nf_register_hooks(ipt_ops, ARRAY_SIZE(ipt_ops));
-	if (ret < 0)
+	rawtable_ops = xt_hook_link(&packet_raw, iptable_raw_hook);
+	if (IS_ERR(rawtable_ops)) {
+		ret = PTR_ERR(rawtable_ops);
 		goto cleanup_table;
+	}
 
 	return ret;
 
@@ -121,7 +108,7 @@ static int __init iptable_raw_init(void)
 
 static void __exit iptable_raw_fini(void)
 {
-	nf_unregister_hooks(ipt_ops, ARRAY_SIZE(ipt_ops));
+	xt_hook_unlink(&packet_raw, rawtable_ops);
 	unregister_pernet_subsys(&iptable_raw_net_ops);
 }
 
diff --git a/net/ipv4/netfilter/iptable_security.c b/net/ipv4/netfilter/iptable_security.c
index 324505aaaa73..b1bf3ca2c6c7 100644
--- a/net/ipv4/netfilter/iptable_security.c
+++ b/net/ipv4/netfilter/iptable_security.c
@@ -62,6 +62,7 @@ static const struct xt_table security_table = {
 	.valid_hooks	= SECURITY_VALID_HOOKS,
 	.me		= THIS_MODULE,
 	.af		= NFPROTO_IPV4,
+	.priority	= NF_IP_PRI_SECURITY,
 };
 
 static unsigned int
@@ -82,29 +83,7 @@ iptable_security_hook(unsigned int hook, struct sk_buff *skb,
 	return ipt_do_table(skb, hook, in, out, net->ipv4.iptable_security);
 }
 
-static struct nf_hook_ops ipt_ops[] __read_mostly = {
-	{
-		.hook		= iptable_security_hook,
-		.owner		= THIS_MODULE,
-		.pf		= NFPROTO_IPV4,
-		.hooknum	= NF_INET_LOCAL_IN,
-		.priority	= NF_IP_PRI_SECURITY,
-	},
-	{
-		.hook		= iptable_security_hook,
-		.owner		= THIS_MODULE,
-		.pf		= NFPROTO_IPV4,
-		.hooknum	= NF_INET_FORWARD,
-		.priority	= NF_IP_PRI_SECURITY,
-	},
-	{
-		.hook		= iptable_security_hook,
-		.owner		= THIS_MODULE,
-		.pf		= NFPROTO_IPV4,
-		.hooknum	= NF_INET_LOCAL_OUT,
-		.priority	= NF_IP_PRI_SECURITY,
-	},
-};
+static struct nf_hook_ops *sectbl_ops __read_mostly;
 
 static int __net_init iptable_security_net_init(struct net *net)
 {
@@ -135,9 +114,11 @@ static int __init iptable_security_init(void)
         if (ret < 0)
 		return ret;
 
-	ret = nf_register_hooks(ipt_ops, ARRAY_SIZE(ipt_ops));
-	if (ret < 0)
+	sectbl_ops = xt_hook_link(&security_table, iptable_security_hook);
+	if (IS_ERR(sectbl_ops)) {
+		ret = PTR_ERR(sectbl_ops);
 		goto cleanup_table;
+	}
 
 	return ret;
 
@@ -148,7 +129,7 @@ cleanup_table:
 
 static void __exit iptable_security_fini(void)
 {
-	nf_unregister_hooks(ipt_ops, ARRAY_SIZE(ipt_ops));
+	xt_hook_unlink(&security_table, sectbl_ops);
 	unregister_pernet_subsys(&iptable_security_net_ops);
 }
 
diff --git a/net/ipv6/netfilter/ip6table_filter.c b/net/ipv6/netfilter/ip6table_filter.c
index 866f34ae236b..6e95d0614ca9 100644
--- a/net/ipv6/netfilter/ip6table_filter.c
+++ b/net/ipv6/netfilter/ip6table_filter.c
@@ -56,6 +56,7 @@ static const struct xt_table packet_filter = {
 	.valid_hooks	= FILTER_VALID_HOOKS,
 	.me		= THIS_MODULE,
 	.af		= NFPROTO_IPV6,
+	.priority	= NF_IP6_PRI_FILTER,
 };
 
 /* The work comes in here from netfilter.c. */
@@ -69,29 +70,7 @@ ip6table_filter_hook(unsigned int hook, struct sk_buff *skb,
 	return ip6t_do_table(skb, hook, in, out, net->ipv6.ip6table_filter);
 }
 
-static struct nf_hook_ops ip6t_ops[] __read_mostly = {
-	{
-		.hook		= ip6table_filter_hook,
-		.owner		= THIS_MODULE,
-		.pf		= NFPROTO_IPV6,
-		.hooknum	= NF_INET_LOCAL_IN,
-		.priority	= NF_IP6_PRI_FILTER,
-	},
-	{
-		.hook		= ip6table_filter_hook,
-		.owner		= THIS_MODULE,
-		.pf		= NFPROTO_IPV6,
-		.hooknum	= NF_INET_FORWARD,
-		.priority	= NF_IP6_PRI_FILTER,
-	},
-	{
-		.hook		= ip6table_filter_hook,
-		.owner		= THIS_MODULE,
-		.pf		= NFPROTO_IPV6,
-		.hooknum	= NF_INET_LOCAL_OUT,
-		.priority	= NF_IP6_PRI_FILTER,
-	},
-};
+static struct nf_hook_ops *filter_ops __read_mostly;
 
 /* Default to forward because I got too much mail already. */
 static int forward = NF_ACCEPT;
@@ -134,9 +113,11 @@ static int __init ip6table_filter_init(void)
 		return ret;
 
 	/* Register hooks */
-	ret = nf_register_hooks(ip6t_ops, ARRAY_SIZE(ip6t_ops));
-	if (ret < 0)
+	filter_ops = xt_hook_link(&packet_filter, ip6table_filter_hook);
+	if (IS_ERR(filter_ops)) {
+		ret = PTR_ERR(filter_ops);
 		goto cleanup_table;
+	}
 
 	return ret;
 
@@ -147,7 +128,7 @@ static int __init ip6table_filter_init(void)
 
 static void __exit ip6table_filter_fini(void)
 {
-	nf_unregister_hooks(ip6t_ops, ARRAY_SIZE(ip6t_ops));
+	xt_hook_unlink(&packet_filter, filter_ops);
 	unregister_pernet_subsys(&ip6table_filter_net_ops);
 }
 
diff --git a/net/ipv6/netfilter/ip6table_mangle.c b/net/ipv6/netfilter/ip6table_mangle.c
index 405ac1f76390..5023ac52ffec 100644
--- a/net/ipv6/netfilter/ip6table_mangle.c
+++ b/net/ipv6/netfilter/ip6table_mangle.c
@@ -62,6 +62,7 @@ static const struct xt_table packet_mangler = {
 	.valid_hooks	= MANGLE_VALID_HOOKS,
 	.me		= THIS_MODULE,
 	.af		= NFPROTO_IPV6,
+	.priority	= NF_IP6_PRI_MANGLE,
 };
 
 static unsigned int
@@ -122,44 +123,7 @@ ip6table_mangle_hook(unsigned int hook, struct sk_buff *skb,
 			     dev_net(in)->ipv6.ip6table_mangle);
 }
 
-static struct nf_hook_ops ip6t_ops[] __read_mostly = {
-	{
-		.hook		= ip6table_mangle_hook,
-		.owner		= THIS_MODULE,
-		.pf		= NFPROTO_IPV6,
-		.hooknum	= NF_INET_PRE_ROUTING,
-		.priority	= NF_IP6_PRI_MANGLE,
-	},
-	{
-		.hook		= ip6table_mangle_hook,
-		.owner		= THIS_MODULE,
-		.pf		= NFPROTO_IPV6,
-		.hooknum	= NF_INET_LOCAL_IN,
-		.priority	= NF_IP6_PRI_MANGLE,
-	},
-	{
-		.hook		= ip6table_mangle_hook,
-		.owner		= THIS_MODULE,
-		.pf		= NFPROTO_IPV6,
-		.hooknum	= NF_INET_FORWARD,
-		.priority	= NF_IP6_PRI_MANGLE,
-	},
-	{
-		.hook		= ip6table_mangle_hook,
-		.owner		= THIS_MODULE,
-		.pf		= NFPROTO_IPV6,
-		.hooknum	= NF_INET_LOCAL_OUT,
-		.priority	= NF_IP6_PRI_MANGLE,
-	},
-	{
-		.hook		= ip6table_mangle_hook,
-		.owner		= THIS_MODULE,
-		.pf		= NFPROTO_IPV6,
-		.hooknum	= NF_INET_POST_ROUTING,
-		.priority	= NF_IP6_PRI_MANGLE,
-	},
-};
-
+static struct nf_hook_ops *mangle_ops __read_mostly;
 static int __net_init ip6table_mangle_net_init(struct net *net)
 {
 	/* Register table */
@@ -189,9 +153,11 @@ static int __init ip6table_mangle_init(void)
 		return ret;
 
 	/* Register hooks */
-	ret = nf_register_hooks(ip6t_ops, ARRAY_SIZE(ip6t_ops));
-	if (ret < 0)
+	mangle_ops = xt_hook_link(&packet_mangler, ip6table_mangle_hook);
+	if (IS_ERR(mangle_ops)) {
+		ret = PTR_ERR(mangle_ops);
 		goto cleanup_table;
+	}
 
 	return ret;
 
@@ -202,7 +168,7 @@ static int __init ip6table_mangle_init(void)
 
 static void __exit ip6table_mangle_fini(void)
 {
-	nf_unregister_hooks(ip6t_ops, ARRAY_SIZE(ip6t_ops));
+	xt_hook_unlink(&packet_mangler, mangle_ops);
 	unregister_pernet_subsys(&ip6table_mangle_net_ops);
 }
 
diff --git a/net/ipv6/netfilter/ip6table_raw.c b/net/ipv6/netfilter/ip6table_raw.c
index 5451a36fbc21..3bfa69511641 100644
--- a/net/ipv6/netfilter/ip6table_raw.c
+++ b/net/ipv6/netfilter/ip6table_raw.c
@@ -40,6 +40,7 @@ static const struct xt_table packet_raw = {
 	.valid_hooks = RAW_VALID_HOOKS,
 	.me = THIS_MODULE,
 	.af = NFPROTO_IPV6,
+	.priority = NF_IP6_PRI_FIRST,
 };
 
 /* The work comes in here from netfilter.c. */
@@ -53,22 +54,7 @@ ip6table_raw_hook(unsigned int hook, struct sk_buff *skb,
 	return ip6t_do_table(skb, hook, in, out, net->ipv6.ip6table_raw);
 }
 
-static struct nf_hook_ops ip6t_ops[] __read_mostly = {
-	{
-	  .hook = ip6table_raw_hook,
-	  .pf = NFPROTO_IPV6,
-	  .hooknum = NF_INET_PRE_ROUTING,
-	  .priority = NF_IP6_PRI_FIRST,
-	  .owner = THIS_MODULE,
-	},
-	{
-	  .hook = ip6table_raw_hook,
-	  .pf = NFPROTO_IPV6,
-	  .hooknum = NF_INET_LOCAL_OUT,
-	  .priority = NF_IP6_PRI_FIRST,
-	  .owner = THIS_MODULE,
-	},
-};
+static struct nf_hook_ops *rawtable_ops __read_mostly;
 
 static int __net_init ip6table_raw_net_init(struct net *net)
 {
@@ -99,9 +85,11 @@ static int __init ip6table_raw_init(void)
 		return ret;
 
 	/* Register hooks */
-	ret = nf_register_hooks(ip6t_ops, ARRAY_SIZE(ip6t_ops));
-	if (ret < 0)
+	rawtable_ops = xt_hook_link(&packet_raw, ip6table_raw_hook);
+	if (IS_ERR(rawtable_ops)) {
+		ret = PTR_ERR(rawtable_ops);
 		goto cleanup_table;
+	}
 
 	return ret;
 
@@ -112,7 +100,7 @@ static int __init ip6table_raw_init(void)
 
 static void __exit ip6table_raw_fini(void)
 {
-	nf_unregister_hooks(ip6t_ops, ARRAY_SIZE(ip6t_ops));
+	xt_hook_unlink(&packet_raw, rawtable_ops);
 	unregister_pernet_subsys(&ip6table_raw_net_ops);
 }
 
diff --git a/net/ipv6/netfilter/ip6table_security.c b/net/ipv6/netfilter/ip6table_security.c
index 841ea77f5218..dd2200f17a6c 100644
--- a/net/ipv6/netfilter/ip6table_security.c
+++ b/net/ipv6/netfilter/ip6table_security.c
@@ -61,6 +61,7 @@ static const struct xt_table security_table = {
 	.valid_hooks	= SECURITY_VALID_HOOKS,
 	.me		= THIS_MODULE,
 	.af		= NFPROTO_IPV6,
+	.priority	= NF_IP6_PRI_SECURITY,
 };
 
 static unsigned int
@@ -74,29 +75,7 @@ ip6table_security_hook(unsigned int hook, struct sk_buff *skb,
 	return ip6t_do_table(skb, hook, in, out, net->ipv6.ip6table_security);
 }
 
-static struct nf_hook_ops ip6t_ops[] __read_mostly = {
-	{
-		.hook		= ip6table_security_hook,
-		.owner		= THIS_MODULE,
-		.pf		= NFPROTO_IPV6,
-		.hooknum	= NF_INET_LOCAL_IN,
-		.priority	= NF_IP6_PRI_SECURITY,
-	},
-	{
-		.hook		= ip6table_security_hook,
-		.owner		= THIS_MODULE,
-		.pf		= NFPROTO_IPV6,
-		.hooknum	= NF_INET_FORWARD,
-		.priority	= NF_IP6_PRI_SECURITY,
-	},
-	{
-		.hook		= ip6table_security_hook,
-		.owner		= THIS_MODULE,
-		.pf		= NFPROTO_IPV6,
-		.hooknum	= NF_INET_LOCAL_OUT,
-		.priority	= NF_IP6_PRI_SECURITY,
-	},
-};
+static struct nf_hook_ops *sectbl_ops __read_mostly;
 
 static int __net_init ip6table_security_net_init(struct net *net)
 {
@@ -127,9 +106,11 @@ static int __init ip6table_security_init(void)
 	if (ret < 0)
 		return ret;
 
-	ret = nf_register_hooks(ip6t_ops, ARRAY_SIZE(ip6t_ops));
-	if (ret < 0)
+	sectbl_ops = xt_hook_link(&security_table, ip6table_security_hook);
+	if (IS_ERR(sectbl_ops)) {
+		ret = PTR_ERR(sectbl_ops);
 		goto cleanup_table;
+	}
 
 	return ret;
 
@@ -140,7 +121,7 @@ cleanup_table:
 
 static void __exit ip6table_security_fini(void)
 {
-	nf_unregister_hooks(ip6t_ops, ARRAY_SIZE(ip6t_ops));
+	xt_hook_unlink(&security_table, sectbl_ops);
 	unregister_pernet_subsys(&ip6table_security_net_ops);
 }
 
diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c
index f01955cce314..b51cb0d7234a 100644
--- a/net/netfilter/x_tables.c
+++ b/net/netfilter/x_tables.c
@@ -1091,6 +1091,60 @@ static const struct file_operations xt_target_ops = {
 
 #endif /* CONFIG_PROC_FS */
 
+/**
+ * xt_hook_link - set up hooks for a new table
+ * @table:	table with metadata needed to set up hooks
+ * @fn:		Hook function
+ *
+ * This function will take care of creating and registering the necessary
+ * Netfilter hooks for XT tables.
+ */
+struct nf_hook_ops *xt_hook_link(const struct xt_table *table, nf_hookfn *fn)
+{
+	unsigned int hook_mask = table->valid_hooks;
+	uint8_t i, num_hooks = hweight32(hook_mask);
+	uint8_t hooknum;
+	struct nf_hook_ops *ops;
+	int ret;
+
+	ops = kmalloc(sizeof(*ops) * num_hooks, GFP_KERNEL);
+	if (ops == NULL)
+		return ERR_PTR(-ENOMEM);
+
+	for (i = 0, hooknum = 0; i < num_hooks && hook_mask != 0;
+	     hook_mask >>= 1, ++hooknum) {
+		if (!(hook_mask & 1))
+			continue;
+		ops[i].hook     = fn;
+		ops[i].owner    = table->me;
+		ops[i].pf       = table->af;
+		ops[i].hooknum  = hooknum;
+		ops[i].priority = table->priority;
+		++i;
+	}
+
+	ret = nf_register_hooks(ops, num_hooks);
+	if (ret < 0) {
+		kfree(ops);
+		return ERR_PTR(ret);
+	}
+
+	return ops;
+}
+EXPORT_SYMBOL_GPL(xt_hook_link);
+
+/**
+ * xt_hook_unlink - remove hooks for a table
+ * @ops:	nf_hook_ops array as returned by nf_hook_link
+ * @hook_mask:	the very same mask that was passed to nf_hook_link
+ */
+void xt_hook_unlink(const struct xt_table *table, struct nf_hook_ops *ops)
+{
+	nf_unregister_hooks(ops, hweight32(table->valid_hooks));
+	kfree(ops);
+}
+EXPORT_SYMBOL_GPL(xt_hook_unlink);
+
 int xt_proto_init(struct net *net, u_int8_t af)
 {
 #ifdef CONFIG_PROC_FS
-- 
cgit v1.2.2


From e3eaa9910b380530cfd2c0670fcd3f627674da8a Mon Sep 17 00:00:00 2001
From: Jan Engelhardt <jengelh@medozas.de>
Date: Wed, 17 Jun 2009 22:14:54 +0200
Subject: netfilter: xtables: generate initial table on-demand

The static initial tables are pretty large, and after the net
namespace has been instantiated, they just hang around for nothing.
This commit removes them and creates tables on-demand at runtime when
needed.

Size shrinks by 7735 bytes (x86_64).

Signed-off-by: Jan Engelhardt <jengelh@medozas.de>
---
 net/ipv4/netfilter/arp_tables.c        |  7 ++++++
 net/ipv4/netfilter/arptable_filter.c   | 40 ++++++-----------------------
 net/ipv4/netfilter/ip_tables.c         |  7 ++++++
 net/ipv4/netfilter/iptable_filter.c    | 46 ++++++++--------------------------
 net/ipv4/netfilter/iptable_mangle.c    | 46 ++++++----------------------------
 net/ipv4/netfilter/iptable_raw.c       | 36 ++++++--------------------
 net/ipv4/netfilter/iptable_security.c  | 39 ++++++----------------------
 net/ipv4/netfilter/nf_nat_rule.c       | 39 ++++++----------------------
 net/ipv6/netfilter/ip6_tables.c        |  7 ++++++
 net/ipv6/netfilter/ip6table_filter.c   | 46 ++++++++--------------------------
 net/ipv6/netfilter/ip6table_mangle.c   | 45 ++++++---------------------------
 net/ipv6/netfilter/ip6table_raw.c      | 36 ++++++--------------------
 net/ipv6/netfilter/ip6table_security.c | 39 ++++++----------------------
 net/netfilter/x_tables.c               |  4 ++-
 net/netfilter/xt_repldata.h            | 35 ++++++++++++++++++++++++++
 15 files changed, 138 insertions(+), 334 deletions(-)
 create mode 100644 net/netfilter/xt_repldata.h

(limited to 'net')

diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c
index 90203e1b9187..72723ea1054b 100644
--- a/net/ipv4/netfilter/arp_tables.c
+++ b/net/ipv4/netfilter/arp_tables.c
@@ -27,6 +27,7 @@
 
 #include <linux/netfilter/x_tables.h>
 #include <linux/netfilter_arp/arp_tables.h>
+#include "../../netfilter/xt_repldata.h"
 
 MODULE_LICENSE("GPL");
 MODULE_AUTHOR("David S. Miller <davem@redhat.com>");
@@ -58,6 +59,12 @@ do {								\
 #define ARP_NF_ASSERT(x)
 #endif
 
+void *arpt_alloc_initial_table(const struct xt_table *info)
+{
+	return xt_alloc_initial_table(arpt, ARPT);
+}
+EXPORT_SYMBOL_GPL(arpt_alloc_initial_table);
+
 static inline int arp_devaddr_compare(const struct arpt_devaddr_info *ap,
 				      const char *hdr_addr, int len)
 {
diff --git a/net/ipv4/netfilter/arptable_filter.c b/net/ipv4/netfilter/arptable_filter.c
index b361de0dac4c..bfe26f32b930 100644
--- a/net/ipv4/netfilter/arptable_filter.c
+++ b/net/ipv4/netfilter/arptable_filter.c
@@ -6,6 +6,7 @@
  */
 
 #include <linux/module.h>
+#include <linux/netfilter/x_tables.h>
 #include <linux/netfilter_arp/arp_tables.h>
 
 MODULE_LICENSE("GPL");
@@ -15,36 +16,6 @@ MODULE_DESCRIPTION("arptables filter table");
 #define FILTER_VALID_HOOKS ((1 << NF_ARP_IN) | (1 << NF_ARP_OUT) | \
 			   (1 << NF_ARP_FORWARD))
 
-static const struct
-{
-	struct arpt_replace repl;
-	struct arpt_standard entries[3];
-	struct arpt_error term;
-} initial_table __net_initdata = {
-	.repl = {
-		.name = "filter",
-		.valid_hooks = FILTER_VALID_HOOKS,
-		.num_entries = 4,
-		.size = sizeof(struct arpt_standard) * 3 + sizeof(struct arpt_error),
-		.hook_entry = {
-			[NF_ARP_IN] = 0,
-			[NF_ARP_OUT] = sizeof(struct arpt_standard),
-			[NF_ARP_FORWARD] = 2 * sizeof(struct arpt_standard),
-		},
-		.underflow = {
-			[NF_ARP_IN] = 0,
-			[NF_ARP_OUT] = sizeof(struct arpt_standard),
-			[NF_ARP_FORWARD] = 2 * sizeof(struct arpt_standard),
-		},
-	},
-	.entries = {
-		ARPT_STANDARD_INIT(NF_ACCEPT),	/* ARP_IN */
-		ARPT_STANDARD_INIT(NF_ACCEPT),	/* ARP_OUT */
-		ARPT_STANDARD_INIT(NF_ACCEPT),	/* ARP_FORWARD */
-	},
-	.term = ARPT_ERROR_INIT,
-};
-
 static const struct xt_table packet_filter = {
 	.name		= "filter",
 	.valid_hooks	= FILTER_VALID_HOOKS,
@@ -68,9 +39,14 @@ static struct nf_hook_ops *arpfilter_ops __read_mostly;
 
 static int __net_init arptable_filter_net_init(struct net *net)
 {
-	/* Register table */
+	struct arpt_replace *repl;
+	
+	repl = arpt_alloc_initial_table(&packet_filter);
+	if (repl == NULL)
+		return -ENOMEM;
 	net->ipv4.arptable_filter =
-		arpt_register_table(net, &packet_filter, &initial_table.repl);
+		arpt_register_table(net, &packet_filter, repl);
+	kfree(repl);
 	if (IS_ERR(net->ipv4.arptable_filter))
 		return PTR_ERR(net->ipv4.arptable_filter);
 	return 0;
diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c
index 5bf7de1527a5..2057b1bb6178 100644
--- a/net/ipv4/netfilter/ip_tables.c
+++ b/net/ipv4/netfilter/ip_tables.c
@@ -28,6 +28,7 @@
 #include <linux/netfilter/x_tables.h>
 #include <linux/netfilter_ipv4/ip_tables.h>
 #include <net/netfilter/nf_log.h>
+#include "../../netfilter/xt_repldata.h"
 
 MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Netfilter Core Team <coreteam@netfilter.org>");
@@ -66,6 +67,12 @@ do {								\
 #define inline
 #endif
 
+void *ipt_alloc_initial_table(const struct xt_table *info)
+{
+	return xt_alloc_initial_table(ipt, IPT);
+}
+EXPORT_SYMBOL_GPL(ipt_alloc_initial_table);
+
 /*
    We keep a set of rules for each CPU, so we can avoid write-locking
    them in the softirq when updating the counters and therefore
diff --git a/net/ipv4/netfilter/iptable_filter.c b/net/ipv4/netfilter/iptable_filter.c
index c14bb85db1d9..c8dc9800d620 100644
--- a/net/ipv4/netfilter/iptable_filter.c
+++ b/net/ipv4/netfilter/iptable_filter.c
@@ -23,36 +23,6 @@ MODULE_DESCRIPTION("iptables filter table");
 			    (1 << NF_INET_FORWARD) | \
 			    (1 << NF_INET_LOCAL_OUT))
 
-static struct
-{
-	struct ipt_replace repl;
-	struct ipt_standard entries[3];
-	struct ipt_error term;
-} initial_table __net_initdata = {
-	.repl = {
-		.name = "filter",
-		.valid_hooks = FILTER_VALID_HOOKS,
-		.num_entries = 4,
-		.size = sizeof(struct ipt_standard) * 3 + sizeof(struct ipt_error),
-		.hook_entry = {
-			[NF_INET_LOCAL_IN] = 0,
-			[NF_INET_FORWARD] = sizeof(struct ipt_standard),
-			[NF_INET_LOCAL_OUT] = sizeof(struct ipt_standard) * 2,
-		},
-		.underflow = {
-			[NF_INET_LOCAL_IN] = 0,
-			[NF_INET_FORWARD] = sizeof(struct ipt_standard),
-			[NF_INET_LOCAL_OUT] = sizeof(struct ipt_standard) * 2,
-		},
-	},
-	.entries = {
-		IPT_STANDARD_INIT(NF_ACCEPT),	/* LOCAL_IN */
-		IPT_STANDARD_INIT(NF_ACCEPT),	/* FORWARD */
-		IPT_STANDARD_INIT(NF_ACCEPT),	/* LOCAL_OUT */
-	},
-	.term = IPT_ERROR_INIT,			/* ERROR */
-};
-
 static const struct xt_table packet_filter = {
 	.name		= "filter",
 	.valid_hooks	= FILTER_VALID_HOOKS,
@@ -86,9 +56,18 @@ module_param(forward, bool, 0000);
 
 static int __net_init iptable_filter_net_init(struct net *net)
 {
-	/* Register table */
+	struct ipt_replace *repl;
+
+	repl = ipt_alloc_initial_table(&packet_filter);
+	if (repl == NULL)
+		return -ENOMEM;
+	/* Entry 1 is the FORWARD hook */
+	((struct ipt_standard *)repl->entries)[1].target.verdict =
+		-forward - 1;
+
 	net->ipv4.iptable_filter =
-		ipt_register_table(net, &packet_filter, &initial_table.repl);
+		ipt_register_table(net, &packet_filter, repl);
+	kfree(repl);
 	if (IS_ERR(net->ipv4.iptable_filter))
 		return PTR_ERR(net->ipv4.iptable_filter);
 	return 0;
@@ -113,9 +92,6 @@ static int __init iptable_filter_init(void)
 		return -EINVAL;
 	}
 
-	/* Entry 1 is the FORWARD hook */
-	initial_table.entries[1].target.verdict = -forward - 1;
-
 	ret = register_pernet_subsys(&iptable_filter_net_ops);
 	if (ret < 0)
 		return ret;
diff --git a/net/ipv4/netfilter/iptable_mangle.c b/net/ipv4/netfilter/iptable_mangle.c
index 2355a229f8ee..58d7097baa3d 100644
--- a/net/ipv4/netfilter/iptable_mangle.c
+++ b/net/ipv4/netfilter/iptable_mangle.c
@@ -27,43 +27,6 @@ MODULE_DESCRIPTION("iptables mangle table");
 			    (1 << NF_INET_LOCAL_OUT) | \
 			    (1 << NF_INET_POST_ROUTING))
 
-/* Ouch - five different hooks? Maybe this should be a config option..... -- BC */
-static const struct
-{
-	struct ipt_replace repl;
-	struct ipt_standard entries[5];
-	struct ipt_error term;
-} initial_table __net_initdata = {
-	.repl = {
-		.name = "mangle",
-		.valid_hooks = MANGLE_VALID_HOOKS,
-		.num_entries = 6,
-		.size = sizeof(struct ipt_standard) * 5 + sizeof(struct ipt_error),
-		.hook_entry = {
-			[NF_INET_PRE_ROUTING] 	= 0,
-			[NF_INET_LOCAL_IN] 	= sizeof(struct ipt_standard),
-			[NF_INET_FORWARD] 	= sizeof(struct ipt_standard) * 2,
-			[NF_INET_LOCAL_OUT] 	= sizeof(struct ipt_standard) * 3,
-			[NF_INET_POST_ROUTING] 	= sizeof(struct ipt_standard) * 4,
-		},
-		.underflow = {
-			[NF_INET_PRE_ROUTING] 	= 0,
-			[NF_INET_LOCAL_IN] 	= sizeof(struct ipt_standard),
-			[NF_INET_FORWARD] 	= sizeof(struct ipt_standard) * 2,
-			[NF_INET_LOCAL_OUT] 	= sizeof(struct ipt_standard) * 3,
-			[NF_INET_POST_ROUTING]	= sizeof(struct ipt_standard) * 4,
-		},
-	},
-	.entries = {
-		IPT_STANDARD_INIT(NF_ACCEPT),	/* PRE_ROUTING */
-		IPT_STANDARD_INIT(NF_ACCEPT),	/* LOCAL_IN */
-		IPT_STANDARD_INIT(NF_ACCEPT),	/* FORWARD */
-		IPT_STANDARD_INIT(NF_ACCEPT),	/* LOCAL_OUT */
-		IPT_STANDARD_INIT(NF_ACCEPT),	/* POST_ROUTING */
-	},
-	.term = IPT_ERROR_INIT,			/* ERROR */
-};
-
 static const struct xt_table packet_mangler = {
 	.name		= "mangle",
 	.valid_hooks	= MANGLE_VALID_HOOKS,
@@ -134,9 +97,14 @@ static struct nf_hook_ops *mangle_ops __read_mostly;
 
 static int __net_init iptable_mangle_net_init(struct net *net)
 {
-	/* Register table */
+	struct ipt_replace *repl;
+
+	repl = ipt_alloc_initial_table(&packet_mangler);
+	if (repl == NULL)
+		return -ENOMEM;
 	net->ipv4.iptable_mangle =
-		ipt_register_table(net, &packet_mangler, &initial_table.repl);
+		ipt_register_table(net, &packet_mangler, repl);
+	kfree(repl);
 	if (IS_ERR(net->ipv4.iptable_mangle))
 		return PTR_ERR(net->ipv4.iptable_mangle);
 	return 0;
diff --git a/net/ipv4/netfilter/iptable_raw.c b/net/ipv4/netfilter/iptable_raw.c
index 62a99154f14c..06fb9d11953c 100644
--- a/net/ipv4/netfilter/iptable_raw.c
+++ b/net/ipv4/netfilter/iptable_raw.c
@@ -9,33 +9,6 @@
 
 #define RAW_VALID_HOOKS ((1 << NF_INET_PRE_ROUTING) | (1 << NF_INET_LOCAL_OUT))
 
-static const struct
-{
-	struct ipt_replace repl;
-	struct ipt_standard entries[2];
-	struct ipt_error term;
-} initial_table __net_initdata = {
-	.repl = {
-		.name = "raw",
-		.valid_hooks = RAW_VALID_HOOKS,
-		.num_entries = 3,
-		.size = sizeof(struct ipt_standard) * 2 + sizeof(struct ipt_error),
-		.hook_entry = {
-			[NF_INET_PRE_ROUTING] = 0,
-			[NF_INET_LOCAL_OUT] = sizeof(struct ipt_standard)
-		},
-		.underflow = {
-			[NF_INET_PRE_ROUTING] = 0,
-			[NF_INET_LOCAL_OUT]  = sizeof(struct ipt_standard)
-		},
-	},
-	.entries = {
-		IPT_STANDARD_INIT(NF_ACCEPT),	/* PRE_ROUTING */
-		IPT_STANDARD_INIT(NF_ACCEPT),	/* LOCAL_OUT */
-	},
-	.term = IPT_ERROR_INIT,			/* ERROR */
-};
-
 static const struct xt_table packet_raw = {
 	.name = "raw",
 	.valid_hooks =  RAW_VALID_HOOKS,
@@ -66,9 +39,14 @@ static struct nf_hook_ops *rawtable_ops __read_mostly;
 
 static int __net_init iptable_raw_net_init(struct net *net)
 {
-	/* Register table */
+	struct ipt_replace *repl;
+
+	repl = ipt_alloc_initial_table(&packet_raw);
+	if (repl == NULL)
+		return -ENOMEM;
 	net->ipv4.iptable_raw =
-		ipt_register_table(net, &packet_raw, &initial_table.repl);
+		ipt_register_table(net, &packet_raw, repl);
+	kfree(repl);
 	if (IS_ERR(net->ipv4.iptable_raw))
 		return PTR_ERR(net->ipv4.iptable_raw);
 	return 0;
diff --git a/net/ipv4/netfilter/iptable_security.c b/net/ipv4/netfilter/iptable_security.c
index b1bf3ca2c6c7..cce2f64e6f21 100644
--- a/net/ipv4/netfilter/iptable_security.c
+++ b/net/ipv4/netfilter/iptable_security.c
@@ -27,36 +27,6 @@ MODULE_DESCRIPTION("iptables security table, for MAC rules");
 				(1 << NF_INET_FORWARD) | \
 				(1 << NF_INET_LOCAL_OUT)
 
-static const struct
-{
-	struct ipt_replace repl;
-	struct ipt_standard entries[3];
-	struct ipt_error term;
-} initial_table __net_initdata = {
-	.repl = {
-		.name = "security",
-		.valid_hooks = SECURITY_VALID_HOOKS,
-		.num_entries = 4,
-		.size = sizeof(struct ipt_standard) * 3 + sizeof(struct ipt_error),
-		.hook_entry = {
-			[NF_INET_LOCAL_IN] 	= 0,
-			[NF_INET_FORWARD] 	= sizeof(struct ipt_standard),
-			[NF_INET_LOCAL_OUT] 	= sizeof(struct ipt_standard) * 2,
-		},
-		.underflow = {
-			[NF_INET_LOCAL_IN] 	= 0,
-			[NF_INET_FORWARD] 	= sizeof(struct ipt_standard),
-			[NF_INET_LOCAL_OUT] 	= sizeof(struct ipt_standard) * 2,
-		},
-	},
-	.entries = {
-		IPT_STANDARD_INIT(NF_ACCEPT),	/* LOCAL_IN */
-		IPT_STANDARD_INIT(NF_ACCEPT),	/* FORWARD */
-		IPT_STANDARD_INIT(NF_ACCEPT),	/* LOCAL_OUT */
-	},
-	.term = IPT_ERROR_INIT,			/* ERROR */
-};
-
 static const struct xt_table security_table = {
 	.name		= "security",
 	.valid_hooks	= SECURITY_VALID_HOOKS,
@@ -87,9 +57,14 @@ static struct nf_hook_ops *sectbl_ops __read_mostly;
 
 static int __net_init iptable_security_net_init(struct net *net)
 {
-	net->ipv4.iptable_security =
-		ipt_register_table(net, &security_table, &initial_table.repl);
+	struct ipt_replace *repl;
 
+	repl = ipt_alloc_initial_table(&security_table);
+	if (repl == NULL)
+		return -ENOMEM;
+	net->ipv4.iptable_security =
+		ipt_register_table(net, &security_table, repl);
+	kfree(repl);
 	if (IS_ERR(net->ipv4.iptable_security))
 		return PTR_ERR(net->ipv4.iptable_security);
 
diff --git a/net/ipv4/netfilter/nf_nat_rule.c b/net/ipv4/netfilter/nf_nat_rule.c
index 85da34fdc755..ab74cc0535e2 100644
--- a/net/ipv4/netfilter/nf_nat_rule.c
+++ b/net/ipv4/netfilter/nf_nat_rule.c
@@ -28,36 +28,6 @@
 			 (1 << NF_INET_POST_ROUTING) | \
 			 (1 << NF_INET_LOCAL_OUT))
 
-static const struct
-{
-	struct ipt_replace repl;
-	struct ipt_standard entries[3];
-	struct ipt_error term;
-} nat_initial_table __net_initdata = {
-	.repl = {
-		.name = "nat",
-		.valid_hooks = NAT_VALID_HOOKS,
-		.num_entries = 4,
-		.size = sizeof(struct ipt_standard) * 3 + sizeof(struct ipt_error),
-		.hook_entry = {
-			[NF_INET_PRE_ROUTING] = 0,
-			[NF_INET_POST_ROUTING] = sizeof(struct ipt_standard),
-			[NF_INET_LOCAL_OUT] = sizeof(struct ipt_standard) * 2
-		},
-		.underflow = {
-			[NF_INET_PRE_ROUTING] = 0,
-			[NF_INET_POST_ROUTING] = sizeof(struct ipt_standard),
-			[NF_INET_LOCAL_OUT] = sizeof(struct ipt_standard) * 2
-		},
-	},
-	.entries = {
-		IPT_STANDARD_INIT(NF_ACCEPT),	/* PRE_ROUTING */
-		IPT_STANDARD_INIT(NF_ACCEPT),	/* POST_ROUTING */
-		IPT_STANDARD_INIT(NF_ACCEPT),	/* LOCAL_OUT */
-	},
-	.term = IPT_ERROR_INIT,			/* ERROR */
-};
-
 static const struct xt_table nat_table = {
 	.name		= "nat",
 	.valid_hooks	= NAT_VALID_HOOKS,
@@ -186,8 +156,13 @@ static struct xt_target ipt_dnat_reg __read_mostly = {
 
 static int __net_init nf_nat_rule_net_init(struct net *net)
 {
-	net->ipv4.nat_table = ipt_register_table(net, &nat_table,
-						 &nat_initial_table.repl);
+	struct ipt_replace *repl;
+
+	repl = ipt_alloc_initial_table(&nat_table);
+	if (repl == NULL)
+		return -ENOMEM;
+	net->ipv4.nat_table = ipt_register_table(net, &nat_table, repl);
+	kfree(repl);
 	if (IS_ERR(net->ipv4.nat_table))
 		return PTR_ERR(net->ipv4.nat_table);
 	return 0;
diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c
index 4332f4591482..dcd7825fe7b6 100644
--- a/net/ipv6/netfilter/ip6_tables.c
+++ b/net/ipv6/netfilter/ip6_tables.c
@@ -29,6 +29,7 @@
 #include <linux/netfilter_ipv6/ip6_tables.h>
 #include <linux/netfilter/x_tables.h>
 #include <net/netfilter/nf_log.h>
+#include "../../netfilter/xt_repldata.h"
 
 MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Netfilter Core Team <coreteam@netfilter.org>");
@@ -67,6 +68,12 @@ do {								\
 #define inline
 #endif
 
+void *ip6t_alloc_initial_table(const struct xt_table *info)
+{
+	return xt_alloc_initial_table(ip6t, IP6T);
+}
+EXPORT_SYMBOL_GPL(ip6t_alloc_initial_table);
+
 /*
    We keep a set of rules for each CPU, so we can avoid write-locking
    them in the softirq when updating the counters and therefore
diff --git a/net/ipv6/netfilter/ip6table_filter.c b/net/ipv6/netfilter/ip6table_filter.c
index 6e95d0614ca9..36b72cafc227 100644
--- a/net/ipv6/netfilter/ip6table_filter.c
+++ b/net/ipv6/netfilter/ip6table_filter.c
@@ -21,36 +21,6 @@ MODULE_DESCRIPTION("ip6tables filter table");
 			    (1 << NF_INET_FORWARD) | \
 			    (1 << NF_INET_LOCAL_OUT))
 
-static struct
-{
-	struct ip6t_replace repl;
-	struct ip6t_standard entries[3];
-	struct ip6t_error term;
-} initial_table __net_initdata = {
-	.repl = {
-		.name = "filter",
-		.valid_hooks = FILTER_VALID_HOOKS,
-		.num_entries = 4,
-		.size = sizeof(struct ip6t_standard) * 3 + sizeof(struct ip6t_error),
-		.hook_entry = {
-			[NF_INET_LOCAL_IN] = 0,
-			[NF_INET_FORWARD] = sizeof(struct ip6t_standard),
-			[NF_INET_LOCAL_OUT] = sizeof(struct ip6t_standard) * 2
-		},
-		.underflow = {
-			[NF_INET_LOCAL_IN] = 0,
-			[NF_INET_FORWARD] = sizeof(struct ip6t_standard),
-			[NF_INET_LOCAL_OUT] = sizeof(struct ip6t_standard) * 2
-		},
-	},
-	.entries = {
-		IP6T_STANDARD_INIT(NF_ACCEPT),	/* LOCAL_IN */
-		IP6T_STANDARD_INIT(NF_ACCEPT),	/* FORWARD */
-		IP6T_STANDARD_INIT(NF_ACCEPT),	/* LOCAL_OUT */
-	},
-	.term = IP6T_ERROR_INIT,		/* ERROR */
-};
-
 static const struct xt_table packet_filter = {
 	.name		= "filter",
 	.valid_hooks	= FILTER_VALID_HOOKS,
@@ -78,9 +48,18 @@ module_param(forward, bool, 0000);
 
 static int __net_init ip6table_filter_net_init(struct net *net)
 {
-	/* Register table */
+	struct ip6t_replace *repl;
+
+	repl = ip6t_alloc_initial_table(&packet_filter);
+	if (repl == NULL)
+		return -ENOMEM;
+	/* Entry 1 is the FORWARD hook */
+	((struct ip6t_standard *)repl->entries)[1].target.verdict =
+		-forward - 1;
+
 	net->ipv6.ip6table_filter =
-		ip6t_register_table(net, &packet_filter, &initial_table.repl);
+		ip6t_register_table(net, &packet_filter, repl);
+	kfree(repl);
 	if (IS_ERR(net->ipv6.ip6table_filter))
 		return PTR_ERR(net->ipv6.ip6table_filter);
 	return 0;
@@ -105,9 +84,6 @@ static int __init ip6table_filter_init(void)
 		return -EINVAL;
 	}
 
-	/* Entry 1 is the FORWARD hook */
-	initial_table.entries[1].target.verdict = -forward - 1;
-
 	ret = register_pernet_subsys(&ip6table_filter_net_ops);
 	if (ret < 0)
 		return ret;
diff --git a/net/ipv6/netfilter/ip6table_mangle.c b/net/ipv6/netfilter/ip6table_mangle.c
index 5023ac52ffec..dc803b7e8e54 100644
--- a/net/ipv6/netfilter/ip6table_mangle.c
+++ b/net/ipv6/netfilter/ip6table_mangle.c
@@ -21,42 +21,6 @@ MODULE_DESCRIPTION("ip6tables mangle table");
 			    (1 << NF_INET_LOCAL_OUT) | \
 			    (1 << NF_INET_POST_ROUTING))
 
-static const struct
-{
-	struct ip6t_replace repl;
-	struct ip6t_standard entries[5];
-	struct ip6t_error term;
-} initial_table __net_initdata = {
-	.repl = {
-		.name = "mangle",
-		.valid_hooks = MANGLE_VALID_HOOKS,
-		.num_entries = 6,
-		.size = sizeof(struct ip6t_standard) * 5 + sizeof(struct ip6t_error),
-		.hook_entry = {
-			[NF_INET_PRE_ROUTING] 	= 0,
-			[NF_INET_LOCAL_IN]	= sizeof(struct ip6t_standard),
-			[NF_INET_FORWARD]	= sizeof(struct ip6t_standard) * 2,
-			[NF_INET_LOCAL_OUT] 	= sizeof(struct ip6t_standard) * 3,
-			[NF_INET_POST_ROUTING]	= sizeof(struct ip6t_standard) * 4,
-		},
-		.underflow = {
-			[NF_INET_PRE_ROUTING] 	= 0,
-			[NF_INET_LOCAL_IN]	= sizeof(struct ip6t_standard),
-			[NF_INET_FORWARD]	= sizeof(struct ip6t_standard) * 2,
-			[NF_INET_LOCAL_OUT] 	= sizeof(struct ip6t_standard) * 3,
-			[NF_INET_POST_ROUTING]	= sizeof(struct ip6t_standard) * 4,
-		},
-	},
-	.entries = {
-		IP6T_STANDARD_INIT(NF_ACCEPT),	/* PRE_ROUTING */
-		IP6T_STANDARD_INIT(NF_ACCEPT),	/* LOCAL_IN */
-		IP6T_STANDARD_INIT(NF_ACCEPT),	/* FORWARD */
-		IP6T_STANDARD_INIT(NF_ACCEPT),	/* LOCAL_OUT */
-		IP6T_STANDARD_INIT(NF_ACCEPT),	/* POST_ROUTING */
-	},
-	.term = IP6T_ERROR_INIT,		/* ERROR */
-};
-
 static const struct xt_table packet_mangler = {
 	.name		= "mangle",
 	.valid_hooks	= MANGLE_VALID_HOOKS,
@@ -126,9 +90,14 @@ ip6table_mangle_hook(unsigned int hook, struct sk_buff *skb,
 static struct nf_hook_ops *mangle_ops __read_mostly;
 static int __net_init ip6table_mangle_net_init(struct net *net)
 {
-	/* Register table */
+	struct ip6t_replace *repl;
+
+	repl = ip6t_alloc_initial_table(&packet_mangler);
+	if (repl == NULL)
+		return -ENOMEM;
 	net->ipv6.ip6table_mangle =
-		ip6t_register_table(net, &packet_mangler, &initial_table.repl);
+		ip6t_register_table(net, &packet_mangler, repl);
+	kfree(repl);
 	if (IS_ERR(net->ipv6.ip6table_mangle))
 		return PTR_ERR(net->ipv6.ip6table_mangle);
 	return 0;
diff --git a/net/ipv6/netfilter/ip6table_raw.c b/net/ipv6/netfilter/ip6table_raw.c
index 3bfa69511641..aef31a29de9e 100644
--- a/net/ipv6/netfilter/ip6table_raw.c
+++ b/net/ipv6/netfilter/ip6table_raw.c
@@ -8,33 +8,6 @@
 
 #define RAW_VALID_HOOKS ((1 << NF_INET_PRE_ROUTING) | (1 << NF_INET_LOCAL_OUT))
 
-static const struct
-{
-	struct ip6t_replace repl;
-	struct ip6t_standard entries[2];
-	struct ip6t_error term;
-} initial_table __net_initdata = {
-	.repl = {
-		.name = "raw",
-		.valid_hooks = RAW_VALID_HOOKS,
-		.num_entries = 3,
-		.size = sizeof(struct ip6t_standard) * 2 + sizeof(struct ip6t_error),
-		.hook_entry = {
-			[NF_INET_PRE_ROUTING] = 0,
-			[NF_INET_LOCAL_OUT] = sizeof(struct ip6t_standard)
-		},
-		.underflow = {
-			[NF_INET_PRE_ROUTING] = 0,
-			[NF_INET_LOCAL_OUT] = sizeof(struct ip6t_standard)
-		},
-	},
-	.entries = {
-		IP6T_STANDARD_INIT(NF_ACCEPT),	/* PRE_ROUTING */
-		IP6T_STANDARD_INIT(NF_ACCEPT),	/* LOCAL_OUT */
-	},
-	.term = IP6T_ERROR_INIT,		/* ERROR */
-};
-
 static const struct xt_table packet_raw = {
 	.name = "raw",
 	.valid_hooks = RAW_VALID_HOOKS,
@@ -58,9 +31,14 @@ static struct nf_hook_ops *rawtable_ops __read_mostly;
 
 static int __net_init ip6table_raw_net_init(struct net *net)
 {
-	/* Register table */
+	struct ip6t_replace *repl;
+
+	repl = ip6t_alloc_initial_table(&packet_raw);
+	if (repl == NULL)
+		return -ENOMEM;
 	net->ipv6.ip6table_raw =
-		ip6t_register_table(net, &packet_raw, &initial_table.repl);
+		ip6t_register_table(net, &packet_raw, repl);
+	kfree(repl);
 	if (IS_ERR(net->ipv6.ip6table_raw))
 		return PTR_ERR(net->ipv6.ip6table_raw);
 	return 0;
diff --git a/net/ipv6/netfilter/ip6table_security.c b/net/ipv6/netfilter/ip6table_security.c
index dd2200f17a6c..0824d865aa9b 100644
--- a/net/ipv6/netfilter/ip6table_security.c
+++ b/net/ipv6/netfilter/ip6table_security.c
@@ -26,36 +26,6 @@ MODULE_DESCRIPTION("ip6tables security table, for MAC rules");
 				(1 << NF_INET_FORWARD) | \
 				(1 << NF_INET_LOCAL_OUT)
 
-static const struct
-{
-	struct ip6t_replace repl;
-	struct ip6t_standard entries[3];
-	struct ip6t_error term;
-} initial_table __net_initdata = {
-	.repl = {
-		.name = "security",
-		.valid_hooks = SECURITY_VALID_HOOKS,
-		.num_entries = 4,
-		.size = sizeof(struct ip6t_standard) * 3 + sizeof(struct ip6t_error),
-		.hook_entry = {
-			[NF_INET_LOCAL_IN] 	= 0,
-			[NF_INET_FORWARD] 	= sizeof(struct ip6t_standard),
-			[NF_INET_LOCAL_OUT] 	= sizeof(struct ip6t_standard) * 2,
-		},
-		.underflow = {
-			[NF_INET_LOCAL_IN] 	= 0,
-			[NF_INET_FORWARD] 	= sizeof(struct ip6t_standard),
-			[NF_INET_LOCAL_OUT] 	= sizeof(struct ip6t_standard) * 2,
-		},
-	},
-	.entries = {
-		IP6T_STANDARD_INIT(NF_ACCEPT),	/* LOCAL_IN */
-		IP6T_STANDARD_INIT(NF_ACCEPT),	/* FORWARD */
-		IP6T_STANDARD_INIT(NF_ACCEPT),	/* LOCAL_OUT */
-	},
-	.term = IP6T_ERROR_INIT,		/* ERROR */
-};
-
 static const struct xt_table security_table = {
 	.name		= "security",
 	.valid_hooks	= SECURITY_VALID_HOOKS,
@@ -79,9 +49,14 @@ static struct nf_hook_ops *sectbl_ops __read_mostly;
 
 static int __net_init ip6table_security_net_init(struct net *net)
 {
-	net->ipv6.ip6table_security =
-		ip6t_register_table(net, &security_table, &initial_table.repl);
+	struct ip6t_replace *repl;
 
+	repl = ip6t_alloc_initial_table(&security_table);
+	if (repl == NULL)
+		return -ENOMEM;
+	net->ipv6.ip6table_security =
+		ip6t_register_table(net, &security_table, repl);
+	kfree(repl);
 	if (IS_ERR(net->ipv6.ip6table_security))
 		return PTR_ERR(net->ipv6.ip6table_security);
 
diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c
index b51cb0d7234a..dc2e05cb54c0 100644
--- a/net/netfilter/x_tables.c
+++ b/net/netfilter/x_tables.c
@@ -26,7 +26,9 @@
 
 #include <linux/netfilter/x_tables.h>
 #include <linux/netfilter_arp.h>
-
+#include <linux/netfilter_ipv4/ip_tables.h>
+#include <linux/netfilter_ipv6/ip6_tables.h>
+#include <linux/netfilter_arp/arp_tables.h>
 
 MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Harald Welte <laforge@netfilter.org>");
diff --git a/net/netfilter/xt_repldata.h b/net/netfilter/xt_repldata.h
new file mode 100644
index 000000000000..6efe4e5a81c6
--- /dev/null
+++ b/net/netfilter/xt_repldata.h
@@ -0,0 +1,35 @@
+/*
+ * Today's hack: quantum tunneling in structs
+ *
+ * 'entries' and 'term' are never anywhere referenced by word in code. In fact,
+ * they serve as the hanging-off data accessed through repl.data[].
+ */
+
+#define xt_alloc_initial_table(type, typ2) ({ \
+	unsigned int hook_mask = info->valid_hooks; \
+	unsigned int nhooks = hweight32(hook_mask); \
+	unsigned int bytes = 0, hooknum = 0, i = 0; \
+	struct { \
+		struct type##_replace repl; \
+		struct type##_standard entries[nhooks]; \
+		struct type##_error term; \
+	} *tbl = kzalloc(sizeof(*tbl), GFP_KERNEL); \
+	if (tbl == NULL) \
+		return NULL; \
+	strncpy(tbl->repl.name, info->name, sizeof(tbl->repl.name)); \
+	tbl->term = (struct type##_error)typ2##_ERROR_INIT;  \
+	tbl->repl.valid_hooks = hook_mask; \
+	tbl->repl.num_entries = nhooks + 1; \
+	tbl->repl.size = nhooks * sizeof(struct type##_standard) + \
+	                 sizeof(struct type##_error); \
+	for (; hook_mask != 0; hook_mask >>= 1, ++hooknum) { \
+		if (!(hook_mask & 1)) \
+			continue; \
+		tbl->repl.hook_entry[hooknum] = bytes; \
+		tbl->repl.underflow[hooknum]  = bytes; \
+		tbl->entries[i++] = (struct type##_standard) \
+			typ2##_STANDARD_INIT(NF_ACCEPT); \
+		bytes += sizeof(struct type##_standard); \
+	} \
+	tbl; \
+})
-- 
cgit v1.2.2


From 32d2e3a149772441a6a146ad96cbae319e27742a Mon Sep 17 00:00:00 2001
From: Li Zefan <lizf@cn.fujitsu.com>
Date: Mon, 8 Feb 2010 23:19:04 +0000
Subject: net: x25: use seq_hlist_foo() helpers

Simplify seq_file code.

Signed-off-by: Li Zefan <lizf@cn.fujtisu.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/x25/x25_proc.c | 30 +++---------------------------
 1 file changed, 3 insertions(+), 27 deletions(-)

(limited to 'net')

diff --git a/net/x25/x25_proc.c b/net/x25/x25_proc.c
index 0a04e62e0e18..c4cd3226a053 100644
--- a/net/x25/x25_proc.c
+++ b/net/x25/x25_proc.c
@@ -93,40 +93,16 @@ out:
 	return 0;
 }
 
-static __inline__ struct sock *x25_get_socket_idx(loff_t pos)
-{
-	struct sock *s;
-	struct hlist_node *node;
-
-	sk_for_each(s, node, &x25_list)
-		if (!pos--)
-			goto found;
-	s = NULL;
-found:
-	return s;
-}
-
 static void *x25_seq_socket_start(struct seq_file *seq, loff_t *pos)
 	__acquires(x25_list_lock)
 {
-	loff_t l = *pos;
-
 	read_lock_bh(&x25_list_lock);
-	return l ? x25_get_socket_idx(--l) : SEQ_START_TOKEN;
+	return seq_hlist_start_head(&x25_list, *pos);
 }
 
 static void *x25_seq_socket_next(struct seq_file *seq, void *v, loff_t *pos)
 {
-	struct sock *s;
-
-	++*pos;
-	if (v == SEQ_START_TOKEN) {
-		s = sk_head(&x25_list);
-		goto out;
-	}
-	s = sk_next(v);
-out:
-	return s;
+	return seq_hlist_next(v, &x25_list, pos);
 }
 
 static void x25_seq_socket_stop(struct seq_file *seq, void *v)
@@ -148,7 +124,7 @@ static int x25_seq_socket_show(struct seq_file *seq, void *v)
 		goto out;
 	}
 
-	s = v;
+	s = sk_entry(v);
 	x25 = x25_sk(s);
 
 	if (!x25->neighbour || (dev = x25->neighbour->dev) == NULL)
-- 
cgit v1.2.2


From b999748acce739675a5e9420f21e54b8cbad9d81 Mon Sep 17 00:00:00 2001
From: Li Zefan <lizf@cn.fujitsu.com>
Date: Mon, 8 Feb 2010 23:19:17 +0000
Subject: net: rose: use seq_hlist_foo() helpers

Simplify seq_file code.

Signed-off-by: Li Zefan <lizf@cn.fujitsu.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/rose/af_rose.c | 22 +++-------------------
 1 file changed, 3 insertions(+), 19 deletions(-)

(limited to 'net')

diff --git a/net/rose/af_rose.c b/net/rose/af_rose.c
index 8feb9e5d6623..e90b9b6c16ae 100644
--- a/net/rose/af_rose.c
+++ b/net/rose/af_rose.c
@@ -1404,29 +1404,13 @@ static int rose_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
 static void *rose_info_start(struct seq_file *seq, loff_t *pos)
 	__acquires(rose_list_lock)
 {
-	int i;
-	struct sock *s;
-	struct hlist_node *node;
-
 	spin_lock_bh(&rose_list_lock);
-	if (*pos == 0)
-		return SEQ_START_TOKEN;
-
-	i = 1;
-	sk_for_each(s, node, &rose_list) {
-		if (i == *pos)
-			return s;
-		++i;
-	}
-	return NULL;
+	return seq_hlist_start_head(&rose_list, *pos);
 }
 
 static void *rose_info_next(struct seq_file *seq, void *v, loff_t *pos)
 {
-	++*pos;
-
-	return (v == SEQ_START_TOKEN) ? sk_head(&rose_list)
-		: sk_next((struct sock *)v);
+	return seq_hlist_next(v, &rose_list, pos);
 }
 
 static void rose_info_stop(struct seq_file *seq, void *v)
@@ -1444,7 +1428,7 @@ static int rose_info_show(struct seq_file *seq, void *v)
 			 "dest_addr  dest_call src_addr   src_call  dev   lci neigh st vs vr va   t  t1  t2  t3  hb    idle Snd-Q Rcv-Q inode\n");
 
 	else {
-		struct sock *s = v;
+		struct sock *s = sk_entry(v);
 		struct rose_sock *rose = rose_sk(s);
 		const char *devname, *callsign;
 		const struct net_device *dev = rose->device;
-- 
cgit v1.2.2


From b7ceabd9b528417973619c5b655bc5b21857ac36 Mon Sep 17 00:00:00 2001
From: Li Zefan <lizf@cn.fujitsu.com>
Date: Mon, 8 Feb 2010 23:19:29 +0000
Subject: net: packet: use seq_hlist_foo() helpers

Simplify seq_file code.

Signed-off-by: Li Zefan <lizf@cn.fujitsu.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/packet/af_packet.c | 20 +++-----------------
 1 file changed, 3 insertions(+), 17 deletions(-)

(limited to 'net')

diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index 6ecb426bc0cf..10f7295bcefb 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -2510,33 +2510,19 @@ static struct notifier_block packet_netdev_notifier = {
 };
 
 #ifdef CONFIG_PROC_FS
-static inline struct sock *packet_seq_idx(struct net *net, loff_t off)
-{
-	struct sock *s;
-	struct hlist_node *node;
-
-	sk_for_each(s, node, &net->packet.sklist) {
-		if (!off--)
-			return s;
-	}
-	return NULL;
-}
 
 static void *packet_seq_start(struct seq_file *seq, loff_t *pos)
 	__acquires(seq_file_net(seq)->packet.sklist_lock)
 {
 	struct net *net = seq_file_net(seq);
 	read_lock(&net->packet.sklist_lock);
-	return *pos ? packet_seq_idx(net, *pos - 1) : SEQ_START_TOKEN;
+	return seq_hlist_start_head(&net->packet.sklist, *pos);
 }
 
 static void *packet_seq_next(struct seq_file *seq, void *v, loff_t *pos)
 {
 	struct net *net = seq_file_net(seq);
-	++*pos;
-	return  (v == SEQ_START_TOKEN)
-		? sk_head(&net->packet.sklist)
-		: sk_next((struct sock *)v) ;
+	return seq_hlist_next(v, &net->packet.sklist, pos);
 }
 
 static void packet_seq_stop(struct seq_file *seq, void *v)
@@ -2551,7 +2537,7 @@ static int packet_seq_show(struct seq_file *seq, void *v)
 	if (v == SEQ_START_TOKEN)
 		seq_puts(seq, "sk       RefCnt Type Proto  Iface R Rmem   User   Inode\n");
 	else {
-		struct sock *s = v;
+		struct sock *s = sk_entry(v);
 		const struct packet_sock *po = pkt_sk(s);
 
 		seq_printf(seq,
-- 
cgit v1.2.2


From 90dd7f5ace558314fa431ae0f59388ed3e5c7695 Mon Sep 17 00:00:00 2001
From: Li Zefan <lizf@cn.fujitsu.com>
Date: Mon, 8 Feb 2010 23:19:42 +0000
Subject: net: netrom: use seq_hlist_foo() helpers

Simplify seq_file code.

Signed-off-by: Li Zefan <lizf@cn.fujitsu.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/netrom/af_netrom.c | 21 +++-----------------
 net/netrom/nr_route.c  | 53 +++++++++-----------------------------------------
 2 files changed, 12 insertions(+), 62 deletions(-)

(limited to 'net')

diff --git a/net/netrom/af_netrom.c b/net/netrom/af_netrom.c
index 71604c6613b5..a249127020a5 100644
--- a/net/netrom/af_netrom.c
+++ b/net/netrom/af_netrom.c
@@ -1267,28 +1267,13 @@ static int nr_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
 
 static void *nr_info_start(struct seq_file *seq, loff_t *pos)
 {
-	struct sock *s;
-	struct hlist_node *node;
-	int i = 1;
-
 	spin_lock_bh(&nr_list_lock);
-	if (*pos == 0)
-		return SEQ_START_TOKEN;
-
-	sk_for_each(s, node, &nr_list) {
-		if (i == *pos)
-			return s;
-		++i;
-	}
-	return NULL;
+	return seq_hlist_start_head(&nr_list, *pos);
 }
 
 static void *nr_info_next(struct seq_file *seq, void *v, loff_t *pos)
 {
-	++*pos;
-
-	return (v == SEQ_START_TOKEN) ? sk_head(&nr_list)
-		: sk_next((struct sock *)v);
+	return seq_hlist_next(v, &nr_list, pos);
 }
 
 static void nr_info_stop(struct seq_file *seq, void *v)
@@ -1298,7 +1283,7 @@ static void nr_info_stop(struct seq_file *seq, void *v)
 
 static int nr_info_show(struct seq_file *seq, void *v)
 {
-	struct sock *s = v;
+	struct sock *s = sk_entry(v);
 	struct net_device *dev;
 	struct nr_sock *nr;
 	const char *devname;
diff --git a/net/netrom/nr_route.c b/net/netrom/nr_route.c
index e2e2d33cafdf..5cc648012f50 100644
--- a/net/netrom/nr_route.c
+++ b/net/netrom/nr_route.c
@@ -863,33 +863,13 @@ int nr_route_frame(struct sk_buff *skb, ax25_cb *ax25)
 
 static void *nr_node_start(struct seq_file *seq, loff_t *pos)
 {
-	struct nr_node *nr_node;
-	struct hlist_node *node;
-	int i = 1;
-
 	spin_lock_bh(&nr_node_list_lock);
-	if (*pos == 0)
-		return SEQ_START_TOKEN;
-
-	nr_node_for_each(nr_node, node, &nr_node_list) {
-		if (i == *pos)
-			return nr_node;
-		++i;
-	}
-
-	return NULL;
+	return seq_hlist_start_head(&nr_node_list, *pos);
 }
 
 static void *nr_node_next(struct seq_file *seq, void *v, loff_t *pos)
 {
-	struct hlist_node *node;
-	++*pos;
-
-	node = (v == SEQ_START_TOKEN)
-		? nr_node_list.first
-		: ((struct nr_node *)v)->node_node.next;
-
-	return hlist_entry(node, struct nr_node, node_node);
+	return seq_hlist_next(v, &nr_node_list, pos);
 }
 
 static void nr_node_stop(struct seq_file *seq, void *v)
@@ -906,7 +886,9 @@ static int nr_node_show(struct seq_file *seq, void *v)
 		seq_puts(seq,
 			 "callsign  mnemonic w n qual obs neigh qual obs neigh qual obs neigh\n");
 	else {
-		struct nr_node *nr_node = v;
+		struct nr_node *nr_node = hlist_entry(v, struct nr_node,
+						      node_node);
+
 		nr_node_lock(nr_node);
 		seq_printf(seq, "%-9s %-7s  %d %d",
 			ax2asc(buf, &nr_node->callsign),
@@ -949,31 +931,13 @@ const struct file_operations nr_nodes_fops = {
 
 static void *nr_neigh_start(struct seq_file *seq, loff_t *pos)
 {
-	struct nr_neigh *nr_neigh;
-	struct hlist_node *node;
-	int i = 1;
-
 	spin_lock_bh(&nr_neigh_list_lock);
-	if (*pos == 0)
-		return SEQ_START_TOKEN;
-
-	nr_neigh_for_each(nr_neigh, node, &nr_neigh_list) {
-		if (i == *pos)
-			return nr_neigh;
-	}
-	return NULL;
+	return seq_hlist_start_head(&nr_neigh_list, *pos);
 }
 
 static void *nr_neigh_next(struct seq_file *seq, void *v, loff_t *pos)
 {
-	struct hlist_node *node;
-	++*pos;
-
-	node = (v == SEQ_START_TOKEN)
-		? nr_neigh_list.first
-		: ((struct nr_neigh *)v)->neigh_node.next;
-
-	return hlist_entry(node, struct nr_neigh, neigh_node);
+	return seq_hlist_next(v, &nr_neigh_list, pos);
 }
 
 static void nr_neigh_stop(struct seq_file *seq, void *v)
@@ -989,8 +953,9 @@ static int nr_neigh_show(struct seq_file *seq, void *v)
 	if (v == SEQ_START_TOKEN)
 		seq_puts(seq, "addr  callsign  dev  qual lock count failed digipeaters\n");
 	else {
-		struct nr_neigh *nr_neigh = v;
+		struct nr_neigh *nr_neigh;
 
+		nr_neigh = hlist_entry(v, struct nr_neigh, neigh_node);
 		seq_printf(seq, "%05d %-9s %-4s  %3d    %d   %3d    %3d",
 			nr_neigh->number,
 			ax2asc(buf, &nr_neigh->callsign),
-- 
cgit v1.2.2


From b512f3d841370aedee9a2c24a14a0ab5fc0a02b2 Mon Sep 17 00:00:00 2001
From: Li Zefan <lizf@cn.fujitsu.com>
Date: Mon, 8 Feb 2010 23:19:59 +0000
Subject: net: ax25: use seq_hlist_foo() helpers

Simplify seq_file code.

Signed-off-by: Li Zefan <lizf@cn.fujitsu.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ax25/af_ax25.c  | 18 +++---------------
 net/ax25/ax25_uid.c | 25 ++++---------------------
 2 files changed, 7 insertions(+), 36 deletions(-)

(limited to 'net')

diff --git a/net/ax25/af_ax25.c b/net/ax25/af_ax25.c
index 5588ba69c468..a5beedf43e2d 100644
--- a/net/ax25/af_ax25.c
+++ b/net/ax25/af_ax25.c
@@ -1863,25 +1863,13 @@ static int ax25_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
 static void *ax25_info_start(struct seq_file *seq, loff_t *pos)
 	__acquires(ax25_list_lock)
 {
-	struct ax25_cb *ax25;
-	struct hlist_node *node;
-	int i = 0;
-
 	spin_lock_bh(&ax25_list_lock);
-	ax25_for_each(ax25, node, &ax25_list) {
-		if (i == *pos)
-			return ax25;
-		++i;
-	}
-	return NULL;
+	return seq_hlist_start(&ax25_list, *pos);
 }
 
 static void *ax25_info_next(struct seq_file *seq, void *v, loff_t *pos)
 {
-	++*pos;
-
-	return hlist_entry( ((struct ax25_cb *)v)->ax25_node.next,
-			    struct ax25_cb, ax25_node);
+	return seq_hlist_next(v, &ax25_list, pos);
 }
 
 static void ax25_info_stop(struct seq_file *seq, void *v)
@@ -1892,7 +1880,7 @@ static void ax25_info_stop(struct seq_file *seq, void *v)
 
 static int ax25_info_show(struct seq_file *seq, void *v)
 {
-	ax25_cb *ax25 = v;
+	ax25_cb *ax25 = hlist_entry(v, struct ax25_cb, ax25_node);
 	char buf[11];
 	int k;
 
diff --git a/net/ax25/ax25_uid.c b/net/ax25/ax25_uid.c
index 832bcf092a01..9f13f6eefcba 100644
--- a/net/ax25/ax25_uid.c
+++ b/net/ax25/ax25_uid.c
@@ -146,31 +146,13 @@ int ax25_uid_ioctl(int cmd, struct sockaddr_ax25 *sax)
 static void *ax25_uid_seq_start(struct seq_file *seq, loff_t *pos)
 	__acquires(ax25_uid_lock)
 {
-	struct ax25_uid_assoc *pt;
-	struct hlist_node *node;
-	int i = 1;
-
 	read_lock(&ax25_uid_lock);
-
-	if (*pos == 0)
-		return SEQ_START_TOKEN;
-
-	ax25_uid_for_each(pt, node, &ax25_uid_list) {
-		if (i == *pos)
-			return pt;
-		++i;
-	}
-	return NULL;
+	return seq_hlist_start_head(&ax25_uid_list, *pos);
 }
 
 static void *ax25_uid_seq_next(struct seq_file *seq, void *v, loff_t *pos)
 {
-	++*pos;
-	if (v == SEQ_START_TOKEN)
-		return ax25_uid_list.first;
-	else
-		return hlist_entry(((ax25_uid_assoc *)v)->uid_node.next,
-			   ax25_uid_assoc, uid_node);
+	return seq_hlist_next(v, &ax25_uid_list, pos);
 }
 
 static void ax25_uid_seq_stop(struct seq_file *seq, void *v)
@@ -186,8 +168,9 @@ static int ax25_uid_seq_show(struct seq_file *seq, void *v)
 	if (v == SEQ_START_TOKEN)
 		seq_printf(seq, "Policy: %d\n", ax25_uid_policy);
 	else {
-		struct ax25_uid_assoc *pt = v;
+		struct ax25_uid_assoc *pt;
 
+		pt = hlist_entry(v, struct ax25_uid_assoc, uid_node);
 		seq_printf(seq, "%6d %s\n", pt->uid, ax2asc(buf, &pt->call));
 	}
 	return 0;
-- 
cgit v1.2.2


From efaffb78d875a155b9f327532c2ddccf28c592b4 Mon Sep 17 00:00:00 2001
From: Li Zefan <lizf@cn.fujitsu.com>
Date: Mon, 8 Feb 2010 23:20:15 +0000
Subject: net: appletalk: use seq_hlist_foo() helpers

Simplify seq_file code.

Signed-off-by: Li Zefan <lizf@cn.fujitsu.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/appletalk/atalk_proc.c | 30 +++---------------------------
 1 file changed, 3 insertions(+), 27 deletions(-)

(limited to 'net')

diff --git a/net/appletalk/atalk_proc.c b/net/appletalk/atalk_proc.c
index 80caad1a31a5..6ef0e761e5de 100644
--- a/net/appletalk/atalk_proc.c
+++ b/net/appletalk/atalk_proc.c
@@ -144,40 +144,16 @@ out:
 	return 0;
 }
 
-static __inline__ struct sock *atalk_get_socket_idx(loff_t pos)
-{
-	struct sock *s;
-	struct hlist_node *node;
-
-	sk_for_each(s, node, &atalk_sockets)
-		if (!pos--)
-			goto found;
-	s = NULL;
-found:
-	return s;
-}
-
 static void *atalk_seq_socket_start(struct seq_file *seq, loff_t *pos)
 	__acquires(atalk_sockets_lock)
 {
-	loff_t l = *pos;
-
 	read_lock_bh(&atalk_sockets_lock);
-	return l ? atalk_get_socket_idx(--l) : SEQ_START_TOKEN;
+	return seq_hlist_start_head(&atalk_sockets, *pos);
 }
 
 static void *atalk_seq_socket_next(struct seq_file *seq, void *v, loff_t *pos)
 {
-	struct sock *i;
-
-	++*pos;
-	if (v == SEQ_START_TOKEN) {
-		i = sk_head(&atalk_sockets);
-		goto out;
-	}
-	i = sk_next(v);
-out:
-	return i;
+	return seq_hlist_next(v, &atalk_sockets, pos);
 }
 
 static void atalk_seq_socket_stop(struct seq_file *seq, void *v)
@@ -197,7 +173,7 @@ static int atalk_seq_socket_show(struct seq_file *seq, void *v)
 		goto out;
 	}
 
-	s = v;
+	s = sk_entry(v);
 	at = at_sk(s);
 
 	seq_printf(seq, "%02X   %04X:%02X:%02X  %04X:%02X:%02X  %08X:%08X "
-- 
cgit v1.2.2


From 27b5b8657a2aa761f76e45fa60c20b7bafc249dc Mon Sep 17 00:00:00 2001
From: Li Zefan <lizf@cn.fujitsu.com>
Date: Mon, 8 Feb 2010 23:20:29 +0000
Subject: net: af_key: use seq_hlist_foo() helpers

Simplify seq_file code.

Signed-off-by: Li Zefan <lizf@cn.fujitsu.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/key/af_key.c | 20 +++-----------------
 1 file changed, 3 insertions(+), 17 deletions(-)

(limited to 'net')

diff --git a/net/key/af_key.c b/net/key/af_key.c
index 41dd2cb07ef3..79d2c0f3c334 100644
--- a/net/key/af_key.c
+++ b/net/key/af_key.c
@@ -3654,9 +3654,8 @@ static const struct net_proto_family pfkey_family_ops = {
 #ifdef CONFIG_PROC_FS
 static int pfkey_seq_show(struct seq_file *f, void *v)
 {
-	struct sock *s;
+	struct sock *s = sk_entry(v);
 
-	s = (struct sock *)v;
 	if (v == SEQ_START_TOKEN)
 		seq_printf(f ,"sk       RefCnt Rmem   Wmem   User   Inode\n");
 	else
@@ -3675,19 +3674,9 @@ static void *pfkey_seq_start(struct seq_file *f, loff_t *ppos)
 {
 	struct net *net = seq_file_net(f);
 	struct netns_pfkey *net_pfkey = net_generic(net, pfkey_net_id);
-	struct sock *s;
-	struct hlist_node *node;
-	loff_t pos = *ppos;
 
 	read_lock(&pfkey_table_lock);
-	if (pos == 0)
-		return SEQ_START_TOKEN;
-
-	sk_for_each(s, node, &net_pfkey->table)
-		if (pos-- == 1)
-			return s;
-
-	return NULL;
+	return seq_hlist_start_head(&net_pfkey->table, *ppos);
 }
 
 static void *pfkey_seq_next(struct seq_file *f, void *v, loff_t *ppos)
@@ -3695,10 +3684,7 @@ static void *pfkey_seq_next(struct seq_file *f, void *v, loff_t *ppos)
 	struct net *net = seq_file_net(f);
 	struct netns_pfkey *net_pfkey = net_generic(net, pfkey_net_id);
 
-	++*ppos;
-	return (v == SEQ_START_TOKEN) ?
-		sk_head(&net_pfkey->table) :
-			sk_next((struct sock *)v);
+	return seq_hlist_next(v, &net_pfkey->table, ppos);
 }
 
 static void pfkey_seq_stop(struct seq_file *f, void *v)
-- 
cgit v1.2.2


From 4f134204f740d9fbf992843a6615175f5105f49e Mon Sep 17 00:00:00 2001
From: Li Zefan <lizf@cn.fujitsu.com>
Date: Mon, 8 Feb 2010 23:20:42 +0000
Subject: net: x25: use seq_list_foo() helpers

Simplify seq_file code.

Signed-off-by: Li Zefan <lizf@cn.fujitsu.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/x25/x25_proc.c | 84 ++++++------------------------------------------------
 1 file changed, 8 insertions(+), 76 deletions(-)

(limited to 'net')

diff --git a/net/x25/x25_proc.c b/net/x25/x25_proc.c
index c4cd3226a053..7ff373792324 100644
--- a/net/x25/x25_proc.c
+++ b/net/x25/x25_proc.c
@@ -25,49 +25,17 @@
 #include <net/x25.h>
 
 #ifdef CONFIG_PROC_FS
-static __inline__ struct x25_route *x25_get_route_idx(loff_t pos)
-{
-	struct list_head *route_entry;
-	struct x25_route *rt = NULL;
-
-	list_for_each(route_entry, &x25_route_list) {
-		rt = list_entry(route_entry, struct x25_route, node);
-		if (!pos--)
-			goto found;
-	}
-	rt = NULL;
-found:
-	return rt;
-}
 
 static void *x25_seq_route_start(struct seq_file *seq, loff_t *pos)
 	__acquires(x25_route_list_lock)
 {
-	loff_t l = *pos;
-
 	read_lock_bh(&x25_route_list_lock);
-	return l ? x25_get_route_idx(--l) : SEQ_START_TOKEN;
+	return seq_list_start_head(&x25_route_list, *pos);
 }
 
 static void *x25_seq_route_next(struct seq_file *seq, void *v, loff_t *pos)
 {
-	struct x25_route *rt;
-
-	++*pos;
-	if (v == SEQ_START_TOKEN) {
-		rt = NULL;
-		if (!list_empty(&x25_route_list))
-			rt = list_entry(x25_route_list.next,
-					struct x25_route, node);
-		goto out;
-	}
-	rt = v;
-	if (rt->node.next != &x25_route_list)
-		rt = list_entry(rt->node.next, struct x25_route, node);
-	else
-		rt = NULL;
-out:
-	return rt;
+	return seq_list_next(v, &x25_route_list, pos);
 }
 
 static void x25_seq_route_stop(struct seq_file *seq, void *v)
@@ -78,9 +46,9 @@ static void x25_seq_route_stop(struct seq_file *seq, void *v)
 
 static int x25_seq_route_show(struct seq_file *seq, void *v)
 {
-	struct x25_route *rt;
+	struct x25_route *rt = list_entry(v, struct x25_route, node);
 
-	if (v == SEQ_START_TOKEN) {
+	if (v == &x25_route_list) {
 		seq_puts(seq, "Address          Digits  Device\n");
 		goto out;
 	}
@@ -146,51 +114,16 @@ out:
 	return 0;
 }
 
-static __inline__ struct x25_forward *x25_get_forward_idx(loff_t pos)
-{
-	struct x25_forward *f;
-	struct list_head *entry;
-
-	list_for_each(entry, &x25_forward_list) {
-		f = list_entry(entry, struct x25_forward, node);
-		if (!pos--)
-			goto found;
-	}
-
-	f = NULL;
-found:
-	return f;
-}
-
 static void *x25_seq_forward_start(struct seq_file *seq, loff_t *pos)
 	__acquires(x25_forward_list_lock)
 {
-	loff_t l = *pos;
-
 	read_lock_bh(&x25_forward_list_lock);
-	return l ? x25_get_forward_idx(--l) : SEQ_START_TOKEN;
+	return seq_list_start_head(&x25_forward_list, *pos);
 }
 
 static void *x25_seq_forward_next(struct seq_file *seq, void *v, loff_t *pos)
 {
-	struct x25_forward *f;
-
-	++*pos;
-	if (v == SEQ_START_TOKEN) {
-		f = NULL;
-		if (!list_empty(&x25_forward_list))
-			f = list_entry(x25_forward_list.next,
-					struct x25_forward, node);
-		goto out;
-	}
-	f = v;
-	if (f->node.next != &x25_forward_list)
-		f = list_entry(f->node.next, struct x25_forward, node);
-	else
-		f = NULL;
-out:
-	return f;
-
+	return seq_list_next(v, &x25_forward_list, pos);
 }
 
 static void x25_seq_forward_stop(struct seq_file *seq, void *v)
@@ -201,9 +134,9 @@ static void x25_seq_forward_stop(struct seq_file *seq, void *v)
 
 static int x25_seq_forward_show(struct seq_file *seq, void *v)
 {
-	struct x25_forward *f;
+	struct x25_forward *f = list_entry(v, struct x25_forward, node);
 
-	if (v == SEQ_START_TOKEN) {
+	if (v == &x25_forward_list) {
 		seq_printf(seq, "lci dev1       dev2\n");
 		goto out;
 	}
@@ -212,7 +145,6 @@ static int x25_seq_forward_show(struct seq_file *seq, void *v)
 
 	seq_printf(seq, "%d %-10s %-10s\n",
 			f->lci, f->dev1->name, f->dev2->name);
-
 out:
 	return 0;
 }
-- 
cgit v1.2.2


From 216437eb8b5adf12ab91e1f0c046ffba22c63431 Mon Sep 17 00:00:00 2001
From: Li Zefan <lizf@cn.fujitsu.com>
Date: Mon, 8 Feb 2010 23:21:05 +0000
Subject: net: irda: use seq_list_foo() helpers

Simplify seq_file code.

Signed-off-by: Li Zefan <lizf@cn.fujitsu.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/irda/irlan/irlan_common.c | 28 ++++------------------------
 1 file changed, 4 insertions(+), 24 deletions(-)

(limited to 'net')

diff --git a/net/irda/irlan/irlan_common.c b/net/irda/irlan/irlan_common.c
index 315ead3cb926..e486dc89ea59 100644
--- a/net/irda/irlan/irlan_common.c
+++ b/net/irda/irlan/irlan_common.c
@@ -1128,34 +1128,14 @@ int irlan_extract_param(__u8 *buf, char *name, char *value, __u16 *len)
  */
 static void *irlan_seq_start(struct seq_file *seq, loff_t *pos)
 {
-	int i = 1;
-	struct irlan_cb *self;
-
 	rcu_read_lock();
-	if (*pos == 0)
-		return SEQ_START_TOKEN;
-
-	list_for_each_entry(self, &irlans, dev_list) {
-		if (*pos == i)
-			return self;
-		++i;
-	}
-	return NULL;
+	return seq_list_start_head(&irlans, *pos);
 }
 
 /* Return entry after v, and increment pos */
 static void *irlan_seq_next(struct seq_file *seq, void *v, loff_t *pos)
 {
-	struct list_head *nxt;
-
-	++*pos;
-	if (v == SEQ_START_TOKEN)
-		nxt = irlans.next;
-	else
-		nxt = ((struct irlan_cb *)v)->dev_list.next;
-
-	return (nxt == &irlans) ? NULL
-		: list_entry(nxt, struct irlan_cb, dev_list);
+	return seq_list_next(v, &irlans, pos);
 }
 
 /* End of reading /proc file */
@@ -1170,10 +1150,10 @@ static void irlan_seq_stop(struct seq_file *seq, void *v)
  */
 static int irlan_seq_show(struct seq_file *seq, void *v)
 {
-	if (v == SEQ_START_TOKEN)
+	if (v == &irlans)
 		seq_puts(seq, "IrLAN instances:\n");
 	else {
-		struct irlan_cb *self = v;
+		struct irlan_cb *self = list_entry(v, struct irlan_cb, dev_list);
 
 		IRDA_ASSERT(self != NULL, return -1;);
 		IRDA_ASSERT(self->magic == IRLAN_MAGIC, return -1;);
-- 
cgit v1.2.2


From a2b79b414df97a70c33f874b631e06830431d233 Mon Sep 17 00:00:00 2001
From: Li Zefan <lizf@cn.fujitsu.com>
Date: Mon, 8 Feb 2010 23:21:22 +0000
Subject: net: ipx: use seq_list_foo() helpers

Simplify seq_file code.

Signed-off-by: Li Zefan <lizf@cn.fujitsu.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipx/ipx_proc.c | 90 ++++++++----------------------------------------------
 1 file changed, 12 insertions(+), 78 deletions(-)

(limited to 'net')

diff --git a/net/ipx/ipx_proc.c b/net/ipx/ipx_proc.c
index 576178482f89..26b5bfcf1d03 100644
--- a/net/ipx/ipx_proc.c
+++ b/net/ipx/ipx_proc.c
@@ -13,45 +13,15 @@
 #include <net/tcp_states.h>
 #include <net/ipx.h>
 
-static __inline__ struct ipx_interface *ipx_get_interface_idx(loff_t pos)
-{
-	struct ipx_interface *i;
-
-	list_for_each_entry(i, &ipx_interfaces, node)
-		if (!pos--)
-			goto out;
-	i = NULL;
-out:
-	return i;
-}
-
-static struct ipx_interface *ipx_interfaces_next(struct ipx_interface *i)
-{
-	struct ipx_interface *rc = NULL;
-
-	if (i->node.next != &ipx_interfaces)
-		rc = list_entry(i->node.next, struct ipx_interface, node);
-	return rc;
-}
-
 static void *ipx_seq_interface_start(struct seq_file *seq, loff_t *pos)
 {
-	loff_t l = *pos;
-
 	spin_lock_bh(&ipx_interfaces_lock);
-	return l ? ipx_get_interface_idx(--l) : SEQ_START_TOKEN;
+	return seq_list_start_head(&ipx_interfaces, *pos);
 }
 
 static void *ipx_seq_interface_next(struct seq_file *seq, void *v, loff_t *pos)
 {
-	struct ipx_interface *i;
-
-	++*pos;
-	if (v == SEQ_START_TOKEN)
-		i = ipx_interfaces_head();
-	else
-		i = ipx_interfaces_next(v);
-	return i;
+	return seq_list_next(v, &ipx_interfaces, pos);
 }
 
 static void ipx_seq_interface_stop(struct seq_file *seq, void *v)
@@ -63,7 +33,7 @@ static int ipx_seq_interface_show(struct seq_file *seq, void *v)
 {
 	struct ipx_interface *i;
 
-	if (v == SEQ_START_TOKEN) {
+	if (v == &ipx_interfaces) {
 		seq_puts(seq, "Network    Node_Address   Primary  Device     "
 			      "Frame_Type");
 #ifdef IPX_REFCNT_DEBUG
@@ -73,7 +43,7 @@ static int ipx_seq_interface_show(struct seq_file *seq, void *v)
 		goto out;
 	}
 
-	i = v;
+	i = list_entry(v, struct ipx_interface, node);
 	seq_printf(seq, "%08lX   ", (unsigned long int)ntohl(i->if_netnum));
 	seq_printf(seq, "%02X%02X%02X%02X%02X%02X   ",
 			i->if_node[0], i->if_node[1], i->if_node[2],
@@ -89,53 +59,15 @@ out:
 	return 0;
 }
 
-static struct ipx_route *ipx_routes_head(void)
-{
-	struct ipx_route *rc = NULL;
-
-	if (!list_empty(&ipx_routes))
-		rc = list_entry(ipx_routes.next, struct ipx_route, node);
-	return rc;
-}
-
-static struct ipx_route *ipx_routes_next(struct ipx_route *r)
-{
-	struct ipx_route *rc = NULL;
-
-	if (r->node.next != &ipx_routes)
-		rc = list_entry(r->node.next, struct ipx_route, node);
-	return rc;
-}
-
-static __inline__ struct ipx_route *ipx_get_route_idx(loff_t pos)
-{
-	struct ipx_route *r;
-
-	list_for_each_entry(r, &ipx_routes, node)
-		if (!pos--)
-			goto out;
-	r = NULL;
-out:
-	return r;
-}
-
 static void *ipx_seq_route_start(struct seq_file *seq, loff_t *pos)
 {
-	loff_t l = *pos;
 	read_lock_bh(&ipx_routes_lock);
-	return l ? ipx_get_route_idx(--l) : SEQ_START_TOKEN;
+	return seq_list_start_head(&ipx_routes, *pos);
 }
 
 static void *ipx_seq_route_next(struct seq_file *seq, void *v, loff_t *pos)
 {
-	struct ipx_route *r;
-
-	++*pos;
-	if (v == SEQ_START_TOKEN)
-		r = ipx_routes_head();
-	else
-		r = ipx_routes_next(v);
-	return r;
+	return seq_list_next(v, &ipx_routes, pos);
 }
 
 static void ipx_seq_route_stop(struct seq_file *seq, void *v)
@@ -147,11 +79,13 @@ static int ipx_seq_route_show(struct seq_file *seq, void *v)
 {
 	struct ipx_route *rt;
 
-	if (v == SEQ_START_TOKEN) {
+	if (v == &ipx_routes) {
 		seq_puts(seq, "Network    Router_Net   Router_Node\n");
 		goto out;
 	}
-	rt = v;
+
+	rt = list_entry(v, struct ipx_route, node);
+
 	seq_printf(seq, "%08lX   ", (unsigned long int)ntohl(rt->ir_net));
 	if (rt->ir_routed)
 		seq_printf(seq, "%08lX     %02X%02X%02X%02X%02X%02X\n",
@@ -226,9 +160,9 @@ static void *ipx_seq_socket_next(struct seq_file *seq, void *v, loff_t *pos)
 	spin_unlock_bh(&i->if_sklist_lock);
 	sk = NULL;
 	for (;;) {
-		i = ipx_interfaces_next(i);
-		if (!i)
+		if (i->node.next == &ipx_interfaces)
 			break;
+		i = list_entry(i->node.next, struct ipx_interface, node);
 		spin_lock_bh(&i->if_sklist_lock);
 		if (!hlist_empty(&i->if_sklist)) {
 			sk = sk_head(&i->if_sklist);
-- 
cgit v1.2.2


From 67de792420be2daa1c6fec07ec8552af9ea0bde3 Mon Sep 17 00:00:00 2001
From: Li Zefan <lizf@cn.fujitsu.com>
Date: Mon, 8 Feb 2010 23:21:51 +0000
Subject: net: atm: use seq_list_foo() helpers

Simplify seq_file code.

Signed-off-by: Li Zefan <lizf@cn.fujitsu.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/atm/proc.c      |  2 +-
 net/atm/resources.c | 18 ++----------------
 2 files changed, 3 insertions(+), 17 deletions(-)

(limited to 'net')

diff --git a/net/atm/proc.c b/net/atm/proc.c
index 476779d845eb..7a96b2376bd7 100644
--- a/net/atm/proc.c
+++ b/net/atm/proc.c
@@ -238,7 +238,7 @@ static int atm_dev_seq_show(struct seq_file *seq, void *v)
 		"Itf Type    ESI/\"MAC\"addr "
 		"AAL(TX,err,RX,err,drop) ...               [refcnt]\n";
 
-	if (v == SEQ_START_TOKEN)
+	if (v == &atm_devs)
 		seq_puts(seq, atm_dev_banner);
 	else {
 		struct atm_dev *dev = list_entry(v, struct atm_dev, dev_list);
diff --git a/net/atm/resources.c b/net/atm/resources.c
index 447ed89205d8..90082904f20d 100644
--- a/net/atm/resources.c
+++ b/net/atm/resources.c
@@ -444,21 +444,10 @@ done:
 	return error;
 }
 
-static inline void *dev_get_idx(loff_t left)
-{
-	struct list_head *p;
-
-	list_for_each(p, &atm_devs) {
-		if (!--left)
-			break;
-	}
-	return (p != &atm_devs) ? p : NULL;
-}
-
 void *atm_dev_seq_start(struct seq_file *seq, loff_t *pos)
 {
 	mutex_lock(&atm_dev_mutex);
-	return *pos ? dev_get_idx(*pos) : SEQ_START_TOKEN;
+	return seq_list_start_head(&atm_devs, *pos);
 }
 
 void atm_dev_seq_stop(struct seq_file *seq, void *v)
@@ -468,8 +457,5 @@ void atm_dev_seq_stop(struct seq_file *seq, void *v)
 
 void *atm_dev_seq_next(struct seq_file *seq, void *v, loff_t *pos)
 {
-	++*pos;
-	v = (v == SEQ_START_TOKEN)
-		? atm_devs.next : ((struct list_head *)v)->next;
-	return (v == &atm_devs) ? NULL : v;
+	return seq_list_next(v, &atm_devs, pos);
 }
-- 
cgit v1.2.2


From 4cad6c7c38930618d77e65af82c1403d63879eee Mon Sep 17 00:00:00 2001
From: Sujith <Sujith.Manoharan@atheros.com>
Date: Wed, 10 Feb 2010 14:52:21 +0530
Subject: mac80211: Deny TX BA session requests during disassociation

In associated state, when bringing an interface down, existing
BA sessions are torn down. When this is in progress, nothing
prevents mac80211 from accepting another BA session start request.

Use a new station flag to fix this.

Signed-off-by: Sujith <Sujith.Manoharan@atheros.com>
Acked-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/agg-tx.c   | 8 ++++++++
 net/mac80211/mlme.c     | 4 +++-
 net/mac80211/sta_info.h | 4 ++++
 3 files changed, 15 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/mac80211/agg-tx.c b/net/mac80211/agg-tx.c
index 718fbcff84d2..5538e1b4a697 100644
--- a/net/mac80211/agg-tx.c
+++ b/net/mac80211/agg-tx.c
@@ -237,6 +237,14 @@ int ieee80211_start_tx_ba_session(struct ieee80211_sta *pubsta, u16 tid)
 	    sdata->vif.type != NL80211_IFTYPE_AP)
 		return -EINVAL;
 
+	if (test_sta_flags(sta, WLAN_STA_DISASSOC)) {
+#ifdef CONFIG_MAC80211_HT_DEBUG
+		printk(KERN_DEBUG "Disassociation is in progress. "
+		       "Denying BA session request\n");
+#endif
+		return -EINVAL;
+	}
+
 	if (test_sta_flags(sta, WLAN_STA_SUSPEND)) {
 #ifdef CONFIG_MAC80211_HT_DEBUG
 		printk(KERN_DEBUG "Suspend in progress. "
diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index ee9443dc20ff..bfc4a5070013 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -797,8 +797,10 @@ static void ieee80211_set_disassoc(struct ieee80211_sub_if_data *sdata)
 
 	rcu_read_lock();
 	sta = sta_info_get(sdata, bssid);
-	if (sta)
+	if (sta) {
+		set_sta_flags(sta, WLAN_STA_DISASSOC);
 		ieee80211_sta_tear_down_BA_sessions(sta);
+	}
 	rcu_read_unlock();
 
 	changed |= ieee80211_reset_erp_info(sdata);
diff --git a/net/mac80211/sta_info.h b/net/mac80211/sta_info.h
index 5ff611a35979..822d84522937 100644
--- a/net/mac80211/sta_info.h
+++ b/net/mac80211/sta_info.h
@@ -42,6 +42,9 @@
  *	be in the queues
  * @WLAN_STA_PSPOLL: Station sent PS-poll while driver was keeping
  *	station in power-save mode, reply when the driver unblocks.
+ * @WLAN_STA_DISASSOC: Disassociation in progress.
+ *	This is used to reject TX BA session requests when disassociation
+ *	is in progress.
  */
 enum ieee80211_sta_info_flags {
 	WLAN_STA_AUTH		= 1<<0,
@@ -57,6 +60,7 @@ enum ieee80211_sta_info_flags {
 	WLAN_STA_SUSPEND	= 1<<11,
 	WLAN_STA_PS_DRIVER	= 1<<12,
 	WLAN_STA_PSPOLL		= 1<<13,
+	WLAN_STA_DISASSOC       = 1<<14,
 };
 
 #define STA_TID_NUM 16
-- 
cgit v1.2.2


From 598856407d4e20ebb4de01a91a93d89325924d43 Mon Sep 17 00:00:00 2001
From: Damian Lukowski <damian@tvk.rwth-aachen.de>
Date: Wed, 10 Feb 2010 18:04:08 -0800
Subject: tcp: fix ICMP-RTO war

Make sure, that TCP has a nonzero RTT estimation after three-way
handshake. Currently, a listening TCP has a value of 0 for srtt,
rttvar and rto right after the three-way handshake is completed
with TCP timestamps disabled.
This will lead to corrupt RTO recalculation and retransmission
flood when RTO is recalculated on backoff reversion as introduced
in "Revert RTO on ICMP destination unreachable"
(f1ecd5d9e7366609d640ff4040304ea197fbc618).
This behaviour can be provoked by connecting to a server which
"responds first" (like SMTP) and rejecting every packet after
the handshake with dest-unreachable, which will lead to softirq
load on the server (up to 30% per socket in some tests).

Thanks to Ilpo Jarvinen for providing debug patches and to
Denys Fedoryshchenko for reporting and testing.

Changes since v3: Removed bad characters in patchfile.

Reported-by: Denys Fedoryshchenko <denys@visp.net.lb>
Signed-off-by: Damian Lukowski <damian@tvk.rwth-aachen.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp_input.c | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 28e029632493..3fddc69ccccc 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -5783,11 +5783,9 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
 
 				/* tcp_ack considers this ACK as duplicate
 				 * and does not calculate rtt.
-				 * Fix it at least with timestamps.
+				 * Force it here.
 				 */
-				if (tp->rx_opt.saw_tstamp &&
-				    tp->rx_opt.rcv_tsecr && !tp->srtt)
-					tcp_ack_saw_tstamp(sk, 0);
+				tcp_ack_update_rtt(sk, 0, 0);
 
 				if (tp->rx_opt.tstamp_ok)
 					tp->advmss -= TCPOLEN_TSTAMP_ALIGNED;
-- 
cgit v1.2.2


From 15682bc488d4af8c9bb998844a94281025e0a333 Mon Sep 17 00:00:00 2001
From: Peter P Waskiewicz Jr <peter.p.waskiewicz.jr@intel.com>
Date: Wed, 10 Feb 2010 20:03:05 -0800
Subject: ethtool: Introduce n-tuple filter programming support

This patchset enables the ethtool layer to program n-tuple
filters to an underlying device.  The idea is to allow capable
hardware to have static rules applied that can assist steering
flows into appropriate queues.

Hardware that is known to support these types of filters today
are ixgbe and niu.

Signed-off-by: Peter P Waskiewicz Jr <peter.p.waskiewicz.jr@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/dev.c     |   5 +
 net/core/ethtool.c | 329 ++++++++++++++++++++++++++++++++++++++++++++++++++++-
 2 files changed, 333 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/core/dev.c b/net/core/dev.c
index 94c1eeed25e5..ae75f25ac0a5 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -5419,6 +5419,8 @@ struct net_device *alloc_netdev_mq(int sizeof_priv, const char *name,
 
 	netdev_init_queues(dev);
 
+	INIT_LIST_HEAD(&dev->ethtool_ntuple_list.list);
+	dev->ethtool_ntuple_list.count = 0;
 	INIT_LIST_HEAD(&dev->napi_list);
 	INIT_LIST_HEAD(&dev->unreg_list);
 	INIT_LIST_HEAD(&dev->link_watch_list);
@@ -5455,6 +5457,9 @@ void free_netdev(struct net_device *dev)
 	/* Flush device addresses */
 	dev_addr_flush(dev);
 
+	/* Clear ethtool n-tuple list */
+	ethtool_ntuple_flush(dev);
+
 	list_for_each_entry_safe(p, n, &dev->napi_list, dev_list)
 		netif_napi_del(p);
 
diff --git a/net/core/ethtool.c b/net/core/ethtool.c
index d8aee584e8d1..6ec73d3983a3 100644
--- a/net/core/ethtool.c
+++ b/net/core/ethtool.c
@@ -120,7 +120,7 @@ int ethtool_op_set_ufo(struct net_device *dev, u32 data)
  * NETIF_F_xxx values in include/linux/netdevice.h
  */
 static const u32 flags_dup_features =
-	ETH_FLAG_LRO;
+	(ETH_FLAG_LRO | ETH_FLAG_NTUPLE);
 
 u32 ethtool_op_get_flags(struct net_device *dev)
 {
@@ -139,9 +139,26 @@ int ethtool_op_set_flags(struct net_device *dev, u32 data)
 	else
 		dev->features &= ~NETIF_F_LRO;
 
+	if (data & ETH_FLAG_NTUPLE)
+		dev->features |= NETIF_F_NTUPLE;
+	else
+		dev->features &= ~NETIF_F_NTUPLE;
+
 	return 0;
 }
 
+void ethtool_ntuple_flush(struct net_device *dev)
+{
+	struct ethtool_rx_ntuple_flow_spec_container *fsc, *f;
+
+	list_for_each_entry_safe(fsc, f, &dev->ethtool_ntuple_list.list, list) {
+		list_del(&fsc->list);
+		kfree(fsc);
+	}
+	dev->ethtool_ntuple_list.count = 0;
+}
+EXPORT_SYMBOL(ethtool_ntuple_flush);
+
 /* Handlers for each ethtool command */
 
 static int ethtool_get_settings(struct net_device *dev, void __user *useraddr)
@@ -266,6 +283,307 @@ err_out:
 	return ret;
 }
 
+static int __rx_ntuple_filter_add(struct ethtool_rx_ntuple_list *list,
+                                  struct ethtool_rx_ntuple_flow_spec *spec)
+{
+	struct ethtool_rx_ntuple_flow_spec_container *fsc;
+
+	/* don't add filters forever */
+	if (list->count >= ETHTOOL_MAX_NTUPLE_LIST_ENTRY)
+		return 0;
+
+	fsc = kmalloc(sizeof(*fsc), GFP_ATOMIC);
+	if (!fsc)
+		return -ENOMEM;
+
+	/* Copy the whole filter over */
+	fsc->fs.flow_type = spec->flow_type;
+	memcpy(&fsc->fs.h_u, &spec->h_u, sizeof(spec->h_u));
+	memcpy(&fsc->fs.m_u, &spec->m_u, sizeof(spec->m_u));
+
+	fsc->fs.vlan_tag = spec->vlan_tag;
+	fsc->fs.vlan_tag_mask = spec->vlan_tag_mask;
+	fsc->fs.data = spec->data;
+	fsc->fs.data_mask = spec->data_mask;
+	fsc->fs.action = spec->action;
+
+	/* add to the list */
+	list_add_tail_rcu(&fsc->list, &list->list);
+	list->count++;
+
+	return 0;
+}
+
+static int ethtool_set_rx_ntuple(struct net_device *dev, void __user *useraddr)
+{
+	struct ethtool_rx_ntuple cmd;
+	const struct ethtool_ops *ops = dev->ethtool_ops;
+	int ret;
+
+	if (!ops->set_rx_ntuple)
+		return -EOPNOTSUPP;
+
+	if (!(dev->features & NETIF_F_NTUPLE))
+		return -EINVAL;
+
+	if (copy_from_user(&cmd, useraddr, sizeof(cmd)))
+		return -EFAULT;
+
+	ret = ops->set_rx_ntuple(dev, &cmd);
+
+	/*
+	 * Cache filter in dev struct for GET operation only if
+	 * the underlying driver doesn't have its own GET operation, and
+	 * only if the filter was added successfully.
+	 */
+	if (!ops->get_rx_ntuple && !ret)
+		if (__rx_ntuple_filter_add(&dev->ethtool_ntuple_list, &cmd.fs))
+			return -ENOMEM;
+
+	return ret;
+}
+
+static int ethtool_get_rx_ntuple(struct net_device *dev, void __user *useraddr)
+{
+	struct ethtool_gstrings gstrings;
+	const struct ethtool_ops *ops = dev->ethtool_ops;
+	struct ethtool_rx_ntuple_flow_spec_container *fsc;
+	u8 *data;
+	char *p;
+	int ret, i, num_strings = 0;
+
+	if (!ops->get_sset_count)
+		return -EOPNOTSUPP;
+
+	if (copy_from_user(&gstrings, useraddr, sizeof(gstrings)))
+		return -EFAULT;
+
+	ret = ops->get_sset_count(dev, gstrings.string_set);
+	if (ret < 0)
+		return ret;
+
+	gstrings.len = ret;
+
+	data = kmalloc(gstrings.len * ETH_GSTRING_LEN, GFP_USER);
+	if (!data)
+		return -ENOMEM;
+
+	if (ops->get_rx_ntuple) {
+		/* driver-specific filter grab */
+		ret = ops->get_rx_ntuple(dev, gstrings.string_set, data);
+		goto copy;
+	}
+
+	/* default ethtool filter grab */
+	i = 0;
+	p = (char *)data;
+	list_for_each_entry(fsc, &dev->ethtool_ntuple_list.list, list) {
+		sprintf(p, "Filter %d:\n", i);
+		p += ETH_GSTRING_LEN;
+		num_strings++;
+
+		switch (fsc->fs.flow_type) {
+		case TCP_V4_FLOW:
+			sprintf(p, "\tFlow Type: TCP\n");
+			p += ETH_GSTRING_LEN;
+			num_strings++;
+			break;
+		case UDP_V4_FLOW:
+			sprintf(p, "\tFlow Type: UDP\n");
+			p += ETH_GSTRING_LEN;
+			num_strings++;
+			break;
+		case SCTP_V4_FLOW:
+			sprintf(p, "\tFlow Type: SCTP\n");
+			p += ETH_GSTRING_LEN;
+			num_strings++;
+			break;
+		case AH_ESP_V4_FLOW:
+			sprintf(p, "\tFlow Type: AH ESP\n");
+			p += ETH_GSTRING_LEN;
+			num_strings++;
+			break;
+		case ESP_V4_FLOW:
+			sprintf(p, "\tFlow Type: ESP\n");
+			p += ETH_GSTRING_LEN;
+			num_strings++;
+			break;
+		case IP_USER_FLOW:
+			sprintf(p, "\tFlow Type: Raw IP\n");
+			p += ETH_GSTRING_LEN;
+			num_strings++;
+			break;
+		case IPV4_FLOW:
+			sprintf(p, "\tFlow Type: IPv4\n");
+			p += ETH_GSTRING_LEN;
+			num_strings++;
+			break;
+		default:
+			sprintf(p, "\tFlow Type: Unknown\n");
+			p += ETH_GSTRING_LEN;
+			num_strings++;
+			goto unknown_filter;
+		};
+
+		/* now the rest of the filters */
+		switch (fsc->fs.flow_type) {
+		case TCP_V4_FLOW:
+		case UDP_V4_FLOW:
+		case SCTP_V4_FLOW:
+			sprintf(p, "\tSrc IP addr: 0x%x\n",
+			        fsc->fs.h_u.tcp_ip4_spec.ip4src);
+			p += ETH_GSTRING_LEN;
+			num_strings++;
+			sprintf(p, "\tSrc IP mask: 0x%x\n",
+			        fsc->fs.m_u.tcp_ip4_spec.ip4src);
+			p += ETH_GSTRING_LEN;
+			num_strings++;
+			sprintf(p, "\tDest IP addr: 0x%x\n",
+			        fsc->fs.h_u.tcp_ip4_spec.ip4dst);
+			p += ETH_GSTRING_LEN;
+			num_strings++;
+			sprintf(p, "\tDest IP mask: 0x%x\n",
+			        fsc->fs.m_u.tcp_ip4_spec.ip4dst);
+			p += ETH_GSTRING_LEN;
+			num_strings++;
+			sprintf(p, "\tSrc Port: %d, mask: 0x%x\n",
+			        fsc->fs.h_u.tcp_ip4_spec.psrc,
+			        fsc->fs.m_u.tcp_ip4_spec.psrc);
+			p += ETH_GSTRING_LEN;
+			num_strings++;
+			sprintf(p, "\tDest Port: %d, mask: 0x%x\n",
+			        fsc->fs.h_u.tcp_ip4_spec.pdst,
+			        fsc->fs.m_u.tcp_ip4_spec.pdst);
+			p += ETH_GSTRING_LEN;
+			num_strings++;
+			sprintf(p, "\tTOS: %d, mask: 0x%x\n",
+			        fsc->fs.h_u.tcp_ip4_spec.tos,
+			        fsc->fs.m_u.tcp_ip4_spec.tos);
+			p += ETH_GSTRING_LEN;
+			num_strings++;
+			break;
+		case AH_ESP_V4_FLOW:
+		case ESP_V4_FLOW:
+			sprintf(p, "\tSrc IP addr: 0x%x\n",
+			        fsc->fs.h_u.ah_ip4_spec.ip4src);
+			p += ETH_GSTRING_LEN;
+			num_strings++;
+			sprintf(p, "\tSrc IP mask: 0x%x\n",
+			        fsc->fs.m_u.ah_ip4_spec.ip4src);
+			p += ETH_GSTRING_LEN;
+			num_strings++;
+			sprintf(p, "\tDest IP addr: 0x%x\n",
+			        fsc->fs.h_u.ah_ip4_spec.ip4dst);
+			p += ETH_GSTRING_LEN;
+			num_strings++;
+			sprintf(p, "\tDest IP mask: 0x%x\n",
+			        fsc->fs.m_u.ah_ip4_spec.ip4dst);
+			p += ETH_GSTRING_LEN;
+			num_strings++;
+			sprintf(p, "\tSPI: %d, mask: 0x%x\n",
+			        fsc->fs.h_u.ah_ip4_spec.spi,
+			        fsc->fs.m_u.ah_ip4_spec.spi);
+			p += ETH_GSTRING_LEN;
+			num_strings++;
+			sprintf(p, "\tTOS: %d, mask: 0x%x\n",
+			        fsc->fs.h_u.ah_ip4_spec.tos,
+			        fsc->fs.m_u.ah_ip4_spec.tos);
+			p += ETH_GSTRING_LEN;
+			num_strings++;
+			break;
+		case IP_USER_FLOW:
+			sprintf(p, "\tSrc IP addr: 0x%x\n",
+			        fsc->fs.h_u.raw_ip4_spec.ip4src);
+			p += ETH_GSTRING_LEN;
+			num_strings++;
+			sprintf(p, "\tSrc IP mask: 0x%x\n",
+			        fsc->fs.m_u.raw_ip4_spec.ip4src);
+			p += ETH_GSTRING_LEN;
+			num_strings++;
+			sprintf(p, "\tDest IP addr: 0x%x\n",
+			        fsc->fs.h_u.raw_ip4_spec.ip4dst);
+			p += ETH_GSTRING_LEN;
+			num_strings++;
+			sprintf(p, "\tDest IP mask: 0x%x\n",
+			        fsc->fs.m_u.raw_ip4_spec.ip4dst);
+			p += ETH_GSTRING_LEN;
+			num_strings++;
+			break;
+		case IPV4_FLOW:
+			sprintf(p, "\tSrc IP addr: 0x%x\n",
+			        fsc->fs.h_u.usr_ip4_spec.ip4src);
+			p += ETH_GSTRING_LEN;
+			num_strings++;
+			sprintf(p, "\tSrc IP mask: 0x%x\n",
+			        fsc->fs.m_u.usr_ip4_spec.ip4src);
+			p += ETH_GSTRING_LEN;
+			num_strings++;
+			sprintf(p, "\tDest IP addr: 0x%x\n",
+			        fsc->fs.h_u.usr_ip4_spec.ip4dst);
+			p += ETH_GSTRING_LEN;
+			num_strings++;
+			sprintf(p, "\tDest IP mask: 0x%x\n",
+			        fsc->fs.m_u.usr_ip4_spec.ip4dst);
+			p += ETH_GSTRING_LEN;
+			num_strings++;
+			sprintf(p, "\tL4 bytes: 0x%x, mask: 0x%x\n",
+			        fsc->fs.h_u.usr_ip4_spec.l4_4_bytes,
+			        fsc->fs.m_u.usr_ip4_spec.l4_4_bytes);
+			p += ETH_GSTRING_LEN;
+			num_strings++;
+			sprintf(p, "\tTOS: %d, mask: 0x%x\n",
+			        fsc->fs.h_u.usr_ip4_spec.tos,
+			        fsc->fs.m_u.usr_ip4_spec.tos);
+			p += ETH_GSTRING_LEN;
+			num_strings++;
+			sprintf(p, "\tIP Version: %d, mask: 0x%x\n",
+			        fsc->fs.h_u.usr_ip4_spec.ip_ver,
+			        fsc->fs.m_u.usr_ip4_spec.ip_ver);
+			p += ETH_GSTRING_LEN;
+			num_strings++;
+			sprintf(p, "\tProtocol: %d, mask: 0x%x\n",
+			        fsc->fs.h_u.usr_ip4_spec.proto,
+			        fsc->fs.m_u.usr_ip4_spec.proto);
+			p += ETH_GSTRING_LEN;
+			num_strings++;
+			break;
+		};
+		sprintf(p, "\tVLAN: %d, mask: 0x%x\n",
+		        fsc->fs.vlan_tag, fsc->fs.vlan_tag_mask);
+		p += ETH_GSTRING_LEN;
+		num_strings++;
+		sprintf(p, "\tUser-defined: 0x%Lx\n", fsc->fs.data);
+		p += ETH_GSTRING_LEN;
+		num_strings++;
+		sprintf(p, "\tUser-defined mask: 0x%Lx\n", fsc->fs.data_mask);
+		p += ETH_GSTRING_LEN;
+		num_strings++;
+		if (fsc->fs.action == ETHTOOL_RXNTUPLE_ACTION_DROP)
+			sprintf(p, "\tAction: Drop\n");
+		else
+			sprintf(p, "\tAction: Direct to queue %d\n",
+			        fsc->fs.action);
+		p += ETH_GSTRING_LEN;
+		num_strings++;
+unknown_filter:
+		i++;
+	}
+copy:
+	/* indicate to userspace how many strings we actually have */
+	gstrings.len = num_strings;
+	ret = -EFAULT;
+	if (copy_to_user(useraddr, &gstrings, sizeof(gstrings)))
+		goto out;
+	useraddr += sizeof(gstrings);
+	if (copy_to_user(useraddr, data, gstrings.len * ETH_GSTRING_LEN))
+		goto out;
+	ret = 0;
+
+out:
+	kfree(data);
+	return ret;
+}
+
 static int ethtool_get_regs(struct net_device *dev, char __user *useraddr)
 {
 	struct ethtool_regs regs;
@@ -313,6 +631,9 @@ static int ethtool_reset(struct net_device *dev, char __user *useraddr)
 	if (copy_from_user(&reset, useraddr, sizeof(reset)))
 		return -EFAULT;
 
+	/* Clear ethtool n-tuple list */
+	ethtool_ntuple_flush(dev);
+
 	ret = dev->ethtool_ops->reset(dev, &reset.data);
 	if (ret)
 		return ret;
@@ -1112,6 +1433,12 @@ int dev_ethtool(struct net *net, struct ifreq *ifr)
 	case ETHTOOL_RESET:
 		rc = ethtool_reset(dev, useraddr);
 		break;
+	case ETHTOOL_SRXNTUPLE:
+		rc = ethtool_set_rx_ntuple(dev, useraddr);
+		break;
+	case ETHTOOL_GRXNTUPLE:
+		rc = ethtool_get_rx_ntuple(dev, useraddr);
+		break;
 	default:
 		rc = -EOPNOTSUPP;
 	}
-- 
cgit v1.2.2


From d1e7a03f4fee4059ee3fa7ce0edb7c48c1a75fcf Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Thu, 11 Feb 2010 12:22:28 +0100
Subject: netfilter: ctnetlink: dump expectation helper name

Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 net/netfilter/nf_conntrack_netlink.c | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'net')

diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index 5c103b8c7df0..db35edac307b 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -1512,6 +1512,7 @@ ctnetlink_exp_dump_expect(struct sk_buff *skb,
 			  const struct nf_conntrack_expect *exp)
 {
 	struct nf_conn *master = exp->master;
+	struct nf_conntrack_helper *helper;
 	long timeout = (exp->timeout.expires - jiffies) / HZ;
 
 	if (timeout < 0)
@@ -1528,6 +1529,9 @@ ctnetlink_exp_dump_expect(struct sk_buff *skb,
 
 	NLA_PUT_BE32(skb, CTA_EXPECT_TIMEOUT, htonl(timeout));
 	NLA_PUT_BE32(skb, CTA_EXPECT_ID, htonl((unsigned long)exp));
+	helper = rcu_dereference(nfct_help(master)->helper);
+	if (helper)
+		NLA_PUT_STRING(skb, CTA_EXPECT_HELP_NAME, helper->name);
 
 	return 0;
 
-- 
cgit v1.2.2


From b87921bdf25485afd8f5a5f25e86b5acef32a9cf Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Thu, 11 Feb 2010 12:22:48 +0100
Subject: netfilter: nf_conntrack: show helper and class in
 /proc/net/nf_conntrack_expect

Make the output a bit more informative by showing the helper an expectation
belongs to and the expectation class.

Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 net/netfilter/nf_conntrack_expect.c | 9 +++++++++
 net/netfilter/nf_conntrack_sip.c    | 3 +++
 2 files changed, 12 insertions(+)

(limited to 'net')

diff --git a/net/netfilter/nf_conntrack_expect.c b/net/netfilter/nf_conntrack_expect.c
index 2f25ff610982..33b85f834c06 100644
--- a/net/netfilter/nf_conntrack_expect.c
+++ b/net/netfilter/nf_conntrack_expect.c
@@ -500,6 +500,7 @@ static void exp_seq_stop(struct seq_file *seq, void *v)
 static int exp_seq_show(struct seq_file *s, void *v)
 {
 	struct nf_conntrack_expect *expect;
+	struct nf_conntrack_helper *helper;
 	struct hlist_node *n = v;
 	char *delim = "";
 
@@ -525,6 +526,14 @@ static int exp_seq_show(struct seq_file *s, void *v)
 	if (expect->flags & NF_CT_EXPECT_INACTIVE)
 		seq_printf(s, "%sINACTIVE", delim);
 
+	helper = rcu_dereference(nfct_help(expect->master)->helper);
+	if (helper) {
+		seq_printf(s, "%s%s", expect->flags ? " " : "", helper->name);
+		if (helper->expect_policy[expect->class].name)
+			seq_printf(s, "/%s",
+				   helper->expect_policy[expect->class].name);
+	}
+
 	return seq_putc(s, '\n');
 }
 
diff --git a/net/netfilter/nf_conntrack_sip.c b/net/netfilter/nf_conntrack_sip.c
index 023966b569bf..419c5cabb332 100644
--- a/net/netfilter/nf_conntrack_sip.c
+++ b/net/netfilter/nf_conntrack_sip.c
@@ -1305,14 +1305,17 @@ static char sip_names[MAX_PORTS][2][sizeof("sip-65535")] __read_mostly;
 
 static const struct nf_conntrack_expect_policy sip_exp_policy[SIP_EXPECT_MAX + 1] = {
 	[SIP_EXPECT_SIGNALLING] = {
+		.name		= "signalling",
 		.max_expected	= 1,
 		.timeout	= 3 * 60,
 	},
 	[SIP_EXPECT_AUDIO] = {
+		.name		= "audio",
 		.max_expected	= 2 * IP_CT_DIR_MAX,
 		.timeout	= 3 * 60,
 	},
 	[SIP_EXPECT_VIDEO] = {
+		.name		= "video",
 		.max_expected	= 2 * IP_CT_DIR_MAX,
 		.timeout	= 3 * 60,
 	},
-- 
cgit v1.2.2


From 54101f4f3bed87fa968b57f77f8ff3b09fd88bcf Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Thu, 11 Feb 2010 12:23:12 +0100
Subject: netfilter: nf_conntrack_sip: fix ct_sip_parse_request() REGISTER
 request parsing

When requests are parsed, the "sip:" part of the SIP URI should be skipped.

Usually this doesn't matter because address parsing skips forward until after
the username part, but in case REGISTER requests it doesn't contain a username
and the address can not be parsed.

Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 net/netfilter/nf_conntrack_sip.c | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

(limited to 'net')

diff --git a/net/netfilter/nf_conntrack_sip.c b/net/netfilter/nf_conntrack_sip.c
index 419c5cabb332..0ca2f2b5c2fa 100644
--- a/net/netfilter/nf_conntrack_sip.c
+++ b/net/netfilter/nf_conntrack_sip.c
@@ -236,12 +236,13 @@ int ct_sip_parse_request(const struct nf_conn *ct,
 		return 0;
 
 	/* Find SIP URI */
-	limit -= strlen("sip:");
-	for (; dptr < limit; dptr++) {
+	for (; dptr < limit - strlen("sip:"); dptr++) {
 		if (*dptr == '\r' || *dptr == '\n')
 			return -1;
-		if (strnicmp(dptr, "sip:", strlen("sip:")) == 0)
+		if (strnicmp(dptr, "sip:", strlen("sip:")) == 0) {
+			dptr += strlen("sip:");
 			break;
+		}
 	}
 	if (!skp_epaddr_len(ct, dptr, limit, &shift))
 		return 0;
-- 
cgit v1.2.2


From 3b6b9fab42fe98358d70735cf98d43fc18dc79c9 Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Thu, 11 Feb 2010 12:23:53 +0100
Subject: netfilter: nf_conntrack_sip: pass data offset to NAT functions

When using TCP multiple SIP messages might be present in a single packet.
A following patch will parse them by setting the dptr to the beginning of
each message. The NAT helper needs to reload the dptr value after mangling
the packet however, so it needs to know the offset of the message to the
beginning of the packet.

Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 net/ipv4/netfilter/nf_nat_sip.c  | 101 ++++++++++++++++++++-------------------
 net/netfilter/nf_conntrack_sip.c |  82 ++++++++++++++++---------------
 2 files changed, 95 insertions(+), 88 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/netfilter/nf_nat_sip.c b/net/ipv4/netfilter/nf_nat_sip.c
index 07d61a57613c..2454ea5abb79 100644
--- a/net/ipv4/netfilter/nf_nat_sip.c
+++ b/net/ipv4/netfilter/nf_nat_sip.c
@@ -29,7 +29,7 @@ MODULE_DESCRIPTION("SIP NAT helper");
 MODULE_ALIAS("ip_nat_sip");
 
 
-static unsigned int mangle_packet(struct sk_buff *skb,
+static unsigned int mangle_packet(struct sk_buff *skb, unsigned int dataoff,
 				  const char **dptr, unsigned int *datalen,
 				  unsigned int matchoff, unsigned int matchlen,
 				  const char *buffer, unsigned int buflen)
@@ -42,12 +42,12 @@ static unsigned int mangle_packet(struct sk_buff *skb,
 		return 0;
 
 	/* Reload data pointer and adjust datalen value */
-	*dptr = skb->data + ip_hdrlen(skb) + sizeof(struct udphdr);
+	*dptr = skb->data + dataoff;
 	*datalen += buflen - matchlen;
 	return 1;
 }
 
-static int map_addr(struct sk_buff *skb,
+static int map_addr(struct sk_buff *skb, unsigned int dataoff,
 		    const char **dptr, unsigned int *datalen,
 		    unsigned int matchoff, unsigned int matchlen,
 		    union nf_inet_addr *addr, __be16 port)
@@ -76,11 +76,11 @@ static int map_addr(struct sk_buff *skb,
 
 	buflen = sprintf(buffer, "%pI4:%u", &newaddr, ntohs(newport));
 
-	return mangle_packet(skb, dptr, datalen, matchoff, matchlen,
+	return mangle_packet(skb, dataoff, dptr, datalen, matchoff, matchlen,
 			     buffer, buflen);
 }
 
-static int map_sip_addr(struct sk_buff *skb,
+static int map_sip_addr(struct sk_buff *skb, unsigned int dataoff,
 			const char **dptr, unsigned int *datalen,
 			enum sip_header_types type)
 {
@@ -93,16 +93,17 @@ static int map_sip_addr(struct sk_buff *skb,
 	if (ct_sip_parse_header_uri(ct, *dptr, NULL, *datalen, type, NULL,
 				    &matchoff, &matchlen, &addr, &port) <= 0)
 		return 1;
-	return map_addr(skb, dptr, datalen, matchoff, matchlen, &addr, port);
+	return map_addr(skb, dataoff, dptr, datalen, matchoff, matchlen,
+			&addr, port);
 }
 
-static unsigned int ip_nat_sip(struct sk_buff *skb,
+static unsigned int ip_nat_sip(struct sk_buff *skb, unsigned int dataoff,
 			       const char **dptr, unsigned int *datalen)
 {
 	enum ip_conntrack_info ctinfo;
 	struct nf_conn *ct = nf_ct_get(skb, &ctinfo);
 	enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
-	unsigned int dataoff, matchoff, matchlen;
+	unsigned int coff, matchoff, matchlen;
 	union nf_inet_addr addr;
 	__be16 port;
 	int request, in_header;
@@ -112,7 +113,7 @@ static unsigned int ip_nat_sip(struct sk_buff *skb,
 		if (ct_sip_parse_request(ct, *dptr, *datalen,
 					 &matchoff, &matchlen,
 					 &addr, &port) > 0 &&
-		    !map_addr(skb, dptr, datalen, matchoff, matchlen,
+		    !map_addr(skb, dataoff, dptr, datalen, matchoff, matchlen,
 			      &addr, port))
 			return NF_DROP;
 		request = 1;
@@ -138,7 +139,7 @@ static unsigned int ip_nat_sip(struct sk_buff *skb,
 				goto next;
 		}
 
-		if (!map_addr(skb, dptr, datalen, matchoff, matchlen,
+		if (!map_addr(skb, dataoff, dptr, datalen, matchoff, matchlen,
 			      &addr, port))
 			return NF_DROP;
 
@@ -153,8 +154,8 @@ static unsigned int ip_nat_sip(struct sk_buff *skb,
 		    addr.ip != ct->tuplehash[!dir].tuple.dst.u3.ip) {
 			buflen = sprintf(buffer, "%pI4",
 					&ct->tuplehash[!dir].tuple.dst.u3.ip);
-			if (!mangle_packet(skb, dptr, datalen, poff, plen,
-					   buffer, buflen))
+			if (!mangle_packet(skb, dataoff, dptr, datalen,
+					   poff, plen, buffer, buflen))
 				return NF_DROP;
 		}
 
@@ -167,8 +168,8 @@ static unsigned int ip_nat_sip(struct sk_buff *skb,
 		    addr.ip != ct->tuplehash[!dir].tuple.src.u3.ip) {
 			buflen = sprintf(buffer, "%pI4",
 					&ct->tuplehash[!dir].tuple.src.u3.ip);
-			if (!mangle_packet(skb, dptr, datalen, poff, plen,
-					   buffer, buflen))
+			if (!mangle_packet(skb, dataoff, dptr, datalen,
+					   poff, plen, buffer, buflen))
 				return NF_DROP;
 		}
 
@@ -181,27 +182,27 @@ static unsigned int ip_nat_sip(struct sk_buff *skb,
 		    htons(n) != ct->tuplehash[!dir].tuple.src.u.udp.port) {
 			__be16 p = ct->tuplehash[!dir].tuple.src.u.udp.port;
 			buflen = sprintf(buffer, "%u", ntohs(p));
-			if (!mangle_packet(skb, dptr, datalen, poff, plen,
-					   buffer, buflen))
+			if (!mangle_packet(skb, dataoff, dptr, datalen,
+					   poff, plen, buffer, buflen))
 				return NF_DROP;
 		}
 	}
 
 next:
 	/* Translate Contact headers */
-	dataoff = 0;
+	coff = 0;
 	in_header = 0;
-	while (ct_sip_parse_header_uri(ct, *dptr, &dataoff, *datalen,
+	while (ct_sip_parse_header_uri(ct, *dptr, &coff, *datalen,
 				       SIP_HDR_CONTACT, &in_header,
 				       &matchoff, &matchlen,
 				       &addr, &port) > 0) {
-		if (!map_addr(skb, dptr, datalen, matchoff, matchlen,
+		if (!map_addr(skb, dataoff, dptr, datalen, matchoff, matchlen,
 			      &addr, port))
 			return NF_DROP;
 	}
 
-	if (!map_sip_addr(skb, dptr, datalen, SIP_HDR_FROM) ||
-	    !map_sip_addr(skb, dptr, datalen, SIP_HDR_TO))
+	if (!map_sip_addr(skb, dataoff, dptr, datalen, SIP_HDR_FROM) ||
+	    !map_sip_addr(skb, dataoff, dptr, datalen, SIP_HDR_TO))
 		return NF_DROP;
 	return NF_ACCEPT;
 }
@@ -232,7 +233,7 @@ static void ip_nat_sip_expected(struct nf_conn *ct,
 	}
 }
 
-static unsigned int ip_nat_sip_expect(struct sk_buff *skb,
+static unsigned int ip_nat_sip_expect(struct sk_buff *skb, unsigned int dataoff,
 				      const char **dptr, unsigned int *datalen,
 				      struct nf_conntrack_expect *exp,
 				      unsigned int matchoff,
@@ -279,8 +280,8 @@ static unsigned int ip_nat_sip_expect(struct sk_buff *skb,
 	if (exp->tuple.dst.u3.ip != exp->saved_ip ||
 	    exp->tuple.dst.u.udp.port != exp->saved_proto.udp.port) {
 		buflen = sprintf(buffer, "%pI4:%u", &newip, port);
-		if (!mangle_packet(skb, dptr, datalen, matchoff, matchlen,
-				   buffer, buflen))
+		if (!mangle_packet(skb, dataoff, dptr, datalen,
+				   matchoff, matchlen, buffer, buflen))
 			goto err;
 	}
 	return NF_ACCEPT;
@@ -290,7 +291,7 @@ err:
 	return NF_DROP;
 }
 
-static int mangle_content_len(struct sk_buff *skb,
+static int mangle_content_len(struct sk_buff *skb, unsigned int dataoff,
 			      const char **dptr, unsigned int *datalen)
 {
 	enum ip_conntrack_info ctinfo;
@@ -312,12 +313,13 @@ static int mangle_content_len(struct sk_buff *skb,
 		return 0;
 
 	buflen = sprintf(buffer, "%u", c_len);
-	return mangle_packet(skb, dptr, datalen, matchoff, matchlen,
+	return mangle_packet(skb, dataoff, dptr, datalen, matchoff, matchlen,
 			     buffer, buflen);
 }
 
-static int mangle_sdp_packet(struct sk_buff *skb, const char **dptr,
-			     unsigned int dataoff, unsigned int *datalen,
+static int mangle_sdp_packet(struct sk_buff *skb, unsigned int dataoff,
+			     const char **dptr, unsigned int *datalen,
+			     unsigned int sdpoff,
 			     enum sdp_header_types type,
 			     enum sdp_header_types term,
 			     char *buffer, int buflen)
@@ -326,16 +328,16 @@ static int mangle_sdp_packet(struct sk_buff *skb, const char **dptr,
 	struct nf_conn *ct = nf_ct_get(skb, &ctinfo);
 	unsigned int matchlen, matchoff;
 
-	if (ct_sip_get_sdp_header(ct, *dptr, dataoff, *datalen, type, term,
+	if (ct_sip_get_sdp_header(ct, *dptr, sdpoff, *datalen, type, term,
 				  &matchoff, &matchlen) <= 0)
 		return -ENOENT;
-	return mangle_packet(skb, dptr, datalen, matchoff, matchlen,
+	return mangle_packet(skb, dataoff, dptr, datalen, matchoff, matchlen,
 			     buffer, buflen) ? 0 : -EINVAL;
 }
 
-static unsigned int ip_nat_sdp_addr(struct sk_buff *skb, const char **dptr,
-				    unsigned int dataoff,
-				    unsigned int *datalen,
+static unsigned int ip_nat_sdp_addr(struct sk_buff *skb, unsigned int dataoff,
+				    const char **dptr, unsigned int *datalen,
+				    unsigned int sdpoff,
 				    enum sdp_header_types type,
 				    enum sdp_header_types term,
 				    const union nf_inet_addr *addr)
@@ -344,16 +346,15 @@ static unsigned int ip_nat_sdp_addr(struct sk_buff *skb, const char **dptr,
 	unsigned int buflen;
 
 	buflen = sprintf(buffer, "%pI4", &addr->ip);
-	if (mangle_sdp_packet(skb, dptr, dataoff, datalen, type, term,
+	if (mangle_sdp_packet(skb, dataoff, dptr, datalen, sdpoff, type, term,
 			      buffer, buflen))
 		return 0;
 
-	return mangle_content_len(skb, dptr, datalen);
+	return mangle_content_len(skb, dataoff, dptr, datalen);
 }
 
-static unsigned int ip_nat_sdp_port(struct sk_buff *skb,
-				    const char **dptr,
-				    unsigned int *datalen,
+static unsigned int ip_nat_sdp_port(struct sk_buff *skb, unsigned int dataoff,
+				    const char **dptr, unsigned int *datalen,
 				    unsigned int matchoff,
 				    unsigned int matchlen,
 				    u_int16_t port)
@@ -362,16 +363,16 @@ static unsigned int ip_nat_sdp_port(struct sk_buff *skb,
 	unsigned int buflen;
 
 	buflen = sprintf(buffer, "%u", port);
-	if (!mangle_packet(skb, dptr, datalen, matchoff, matchlen,
+	if (!mangle_packet(skb, dataoff, dptr, datalen, matchoff, matchlen,
 			   buffer, buflen))
 		return 0;
 
-	return mangle_content_len(skb, dptr, datalen);
+	return mangle_content_len(skb, dataoff, dptr, datalen);
 }
 
-static unsigned int ip_nat_sdp_session(struct sk_buff *skb, const char **dptr,
-				       unsigned int dataoff,
-				       unsigned int *datalen,
+static unsigned int ip_nat_sdp_session(struct sk_buff *skb, unsigned int dataoff,
+				       const char **dptr, unsigned int *datalen,
+				       unsigned int sdpoff,
 				       const union nf_inet_addr *addr)
 {
 	char buffer[sizeof("nnn.nnn.nnn.nnn")];
@@ -379,12 +380,12 @@ static unsigned int ip_nat_sdp_session(struct sk_buff *skb, const char **dptr,
 
 	/* Mangle session description owner and contact addresses */
 	buflen = sprintf(buffer, "%pI4", &addr->ip);
-	if (mangle_sdp_packet(skb, dptr, dataoff, datalen,
+	if (mangle_sdp_packet(skb, dataoff, dptr, datalen, sdpoff,
 			       SDP_HDR_OWNER_IP4, SDP_HDR_MEDIA,
 			       buffer, buflen))
 		return 0;
 
-	switch (mangle_sdp_packet(skb, dptr, dataoff, datalen,
+	switch (mangle_sdp_packet(skb, dataoff, dptr, datalen, sdpoff,
 				  SDP_HDR_CONNECTION_IP4, SDP_HDR_MEDIA,
 				  buffer, buflen)) {
 	case 0:
@@ -401,14 +402,13 @@ static unsigned int ip_nat_sdp_session(struct sk_buff *skb, const char **dptr,
 		return 0;
 	}
 
-	return mangle_content_len(skb, dptr, datalen);
+	return mangle_content_len(skb, dataoff, dptr, datalen);
 }
 
 /* So, this packet has hit the connection tracking matching code.
    Mangle it, and change the expectation to match the new version. */
-static unsigned int ip_nat_sdp_media(struct sk_buff *skb,
-				     const char **dptr,
-				     unsigned int *datalen,
+static unsigned int ip_nat_sdp_media(struct sk_buff *skb, unsigned int dataoff,
+				     const char **dptr, unsigned int *datalen,
 				     struct nf_conntrack_expect *rtp_exp,
 				     struct nf_conntrack_expect *rtcp_exp,
 				     unsigned int mediaoff,
@@ -456,7 +456,8 @@ static unsigned int ip_nat_sdp_media(struct sk_buff *skb,
 
 	/* Update media port. */
 	if (rtp_exp->tuple.dst.u.udp.port != rtp_exp->saved_proto.udp.port &&
-	    !ip_nat_sdp_port(skb, dptr, datalen, mediaoff, medialen, port))
+	    !ip_nat_sdp_port(skb, dataoff, dptr, datalen,
+			     mediaoff, medialen, port))
 		goto err2;
 
 	return NF_ACCEPT;
diff --git a/net/netfilter/nf_conntrack_sip.c b/net/netfilter/nf_conntrack_sip.c
index 0ca2f2b5c2fa..0ec37d6a2df7 100644
--- a/net/netfilter/nf_conntrack_sip.c
+++ b/net/netfilter/nf_conntrack_sip.c
@@ -50,12 +50,13 @@ module_param(sip_direct_media, int, 0600);
 MODULE_PARM_DESC(sip_direct_media, "Expect Media streams between signalling "
 				   "endpoints only (default 1)");
 
-unsigned int (*nf_nat_sip_hook)(struct sk_buff *skb,
+unsigned int (*nf_nat_sip_hook)(struct sk_buff *skb, unsigned int dataoff,
 				const char **dptr,
 				unsigned int *datalen) __read_mostly;
 EXPORT_SYMBOL_GPL(nf_nat_sip_hook);
 
 unsigned int (*nf_nat_sip_expect_hook)(struct sk_buff *skb,
+				       unsigned int dataoff,
 				       const char **dptr,
 				       unsigned int *datalen,
 				       struct nf_conntrack_expect *exp,
@@ -63,17 +64,17 @@ unsigned int (*nf_nat_sip_expect_hook)(struct sk_buff *skb,
 				       unsigned int matchlen) __read_mostly;
 EXPORT_SYMBOL_GPL(nf_nat_sip_expect_hook);
 
-unsigned int (*nf_nat_sdp_addr_hook)(struct sk_buff *skb,
+unsigned int (*nf_nat_sdp_addr_hook)(struct sk_buff *skb, unsigned int dataoff,
 				     const char **dptr,
-				     unsigned int dataoff,
 				     unsigned int *datalen,
+				     unsigned int sdpoff,
 				     enum sdp_header_types type,
 				     enum sdp_header_types term,
 				     const union nf_inet_addr *addr)
 				     __read_mostly;
 EXPORT_SYMBOL_GPL(nf_nat_sdp_addr_hook);
 
-unsigned int (*nf_nat_sdp_port_hook)(struct sk_buff *skb,
+unsigned int (*nf_nat_sdp_port_hook)(struct sk_buff *skb, unsigned int dataoff,
 				     const char **dptr,
 				     unsigned int *datalen,
 				     unsigned int matchoff,
@@ -82,14 +83,15 @@ unsigned int (*nf_nat_sdp_port_hook)(struct sk_buff *skb,
 EXPORT_SYMBOL_GPL(nf_nat_sdp_port_hook);
 
 unsigned int (*nf_nat_sdp_session_hook)(struct sk_buff *skb,
-					const char **dptr,
 					unsigned int dataoff,
+					const char **dptr,
 					unsigned int *datalen,
+					unsigned int sdpoff,
 					const union nf_inet_addr *addr)
 					__read_mostly;
 EXPORT_SYMBOL_GPL(nf_nat_sdp_session_hook);
 
-unsigned int (*nf_nat_sdp_media_hook)(struct sk_buff *skb,
+unsigned int (*nf_nat_sdp_media_hook)(struct sk_buff *skb, unsigned int dataoff,
 				      const char **dptr,
 				      unsigned int *datalen,
 				      struct nf_conntrack_expect *rtp_exp,
@@ -729,7 +731,7 @@ static void flush_expectations(struct nf_conn *ct, bool media)
 	spin_unlock_bh(&nf_conntrack_lock);
 }
 
-static int set_expected_rtp_rtcp(struct sk_buff *skb,
+static int set_expected_rtp_rtcp(struct sk_buff *skb, unsigned int dataoff,
 				 const char **dptr, unsigned int *datalen,
 				 union nf_inet_addr *daddr, __be16 port,
 				 enum sip_expectation_classes class,
@@ -806,7 +808,7 @@ static int set_expected_rtp_rtcp(struct sk_buff *skb,
 	if (direct_rtp) {
 		nf_nat_sdp_port = rcu_dereference(nf_nat_sdp_port_hook);
 		if (nf_nat_sdp_port &&
-		    !nf_nat_sdp_port(skb, dptr, datalen,
+		    !nf_nat_sdp_port(skb, dataoff, dptr, datalen,
 				     mediaoff, medialen, ntohs(rtp_port)))
 			goto err1;
 	}
@@ -828,7 +830,8 @@ static int set_expected_rtp_rtcp(struct sk_buff *skb,
 
 	nf_nat_sdp_media = rcu_dereference(nf_nat_sdp_media_hook);
 	if (nf_nat_sdp_media && ct->status & IPS_NAT_MASK && !direct_rtp)
-		ret = nf_nat_sdp_media(skb, dptr, datalen, rtp_exp, rtcp_exp,
+		ret = nf_nat_sdp_media(skb, dataoff, dptr, datalen,
+				       rtp_exp, rtcp_exp,
 				       mediaoff, medialen, daddr);
 	else {
 		if (nf_ct_expect_related(rtp_exp) == 0) {
@@ -867,7 +870,7 @@ static const struct sdp_media_type *sdp_media_type(const char *dptr,
 	return NULL;
 }
 
-static int process_sdp(struct sk_buff *skb,
+static int process_sdp(struct sk_buff *skb, unsigned int dataoff,
 		       const char **dptr, unsigned int *datalen,
 		       unsigned int cseq)
 {
@@ -942,7 +945,7 @@ static int process_sdp(struct sk_buff *skb,
 		else
 			return NF_DROP;
 
-		ret = set_expected_rtp_rtcp(skb, dptr, datalen,
+		ret = set_expected_rtp_rtcp(skb, dataoff, dptr, datalen,
 					    &rtp_addr, htons(port), t->class,
 					    mediaoff, medialen);
 		if (ret != NF_ACCEPT)
@@ -950,8 +953,9 @@ static int process_sdp(struct sk_buff *skb,
 
 		/* Update media connection address if present */
 		if (maddr_len && nf_nat_sdp_addr && ct->status & IPS_NAT_MASK) {
-			ret = nf_nat_sdp_addr(skb, dptr, mediaoff, datalen,
-					      c_hdr, SDP_HDR_MEDIA, &rtp_addr);
+			ret = nf_nat_sdp_addr(skb, dataoff, dptr, datalen,
+					      mediaoff, c_hdr, SDP_HDR_MEDIA,
+					      &rtp_addr);
 			if (ret != NF_ACCEPT)
 				return ret;
 		}
@@ -961,14 +965,15 @@ static int process_sdp(struct sk_buff *skb,
 	/* Update session connection and owner addresses */
 	nf_nat_sdp_session = rcu_dereference(nf_nat_sdp_session_hook);
 	if (nf_nat_sdp_session && ct->status & IPS_NAT_MASK)
-		ret = nf_nat_sdp_session(skb, dptr, sdpoff, datalen, &rtp_addr);
+		ret = nf_nat_sdp_session(skb, dataoff, dptr, datalen, sdpoff,
+					 &rtp_addr);
 
 	if (ret == NF_ACCEPT && i > 0)
 		help->help.ct_sip_info.invite_cseq = cseq;
 
 	return ret;
 }
-static int process_invite_response(struct sk_buff *skb,
+static int process_invite_response(struct sk_buff *skb, unsigned int dataoff,
 				   const char **dptr, unsigned int *datalen,
 				   unsigned int cseq, unsigned int code)
 {
@@ -978,13 +983,13 @@ static int process_invite_response(struct sk_buff *skb,
 
 	if ((code >= 100 && code <= 199) ||
 	    (code >= 200 && code <= 299))
-		return process_sdp(skb, dptr, datalen, cseq);
+		return process_sdp(skb, dataoff, dptr, datalen, cseq);
 	else if (help->help.ct_sip_info.invite_cseq == cseq)
 		flush_expectations(ct, true);
 	return NF_ACCEPT;
 }
 
-static int process_update_response(struct sk_buff *skb,
+static int process_update_response(struct sk_buff *skb, unsigned int dataoff,
 				   const char **dptr, unsigned int *datalen,
 				   unsigned int cseq, unsigned int code)
 {
@@ -994,13 +999,13 @@ static int process_update_response(struct sk_buff *skb,
 
 	if ((code >= 100 && code <= 199) ||
 	    (code >= 200 && code <= 299))
-		return process_sdp(skb, dptr, datalen, cseq);
+		return process_sdp(skb, dataoff, dptr, datalen, cseq);
 	else if (help->help.ct_sip_info.invite_cseq == cseq)
 		flush_expectations(ct, true);
 	return NF_ACCEPT;
 }
 
-static int process_prack_response(struct sk_buff *skb,
+static int process_prack_response(struct sk_buff *skb, unsigned int dataoff,
 				  const char **dptr, unsigned int *datalen,
 				  unsigned int cseq, unsigned int code)
 {
@@ -1010,13 +1015,13 @@ static int process_prack_response(struct sk_buff *skb,
 
 	if ((code >= 100 && code <= 199) ||
 	    (code >= 200 && code <= 299))
-		return process_sdp(skb, dptr, datalen, cseq);
+		return process_sdp(skb, dataoff, dptr, datalen, cseq);
 	else if (help->help.ct_sip_info.invite_cseq == cseq)
 		flush_expectations(ct, true);
 	return NF_ACCEPT;
 }
 
-static int process_bye_request(struct sk_buff *skb,
+static int process_bye_request(struct sk_buff *skb, unsigned int dataoff,
 			       const char **dptr, unsigned int *datalen,
 			       unsigned int cseq)
 {
@@ -1031,7 +1036,7 @@ static int process_bye_request(struct sk_buff *skb,
  * signalling connections. The expectation is marked inactive and is activated
  * when receiving a response indicating success from the registrar.
  */
-static int process_register_request(struct sk_buff *skb,
+static int process_register_request(struct sk_buff *skb, unsigned int dataoff,
 				    const char **dptr, unsigned int *datalen,
 				    unsigned int cseq)
 {
@@ -1101,7 +1106,7 @@ static int process_register_request(struct sk_buff *skb,
 
 	nf_nat_sip_expect = rcu_dereference(nf_nat_sip_expect_hook);
 	if (nf_nat_sip_expect && ct->status & IPS_NAT_MASK)
-		ret = nf_nat_sip_expect(skb, dptr, datalen, exp,
+		ret = nf_nat_sip_expect(skb, dataoff, dptr, datalen, exp,
 					matchoff, matchlen);
 	else {
 		if (nf_ct_expect_related(exp) != 0)
@@ -1117,7 +1122,7 @@ store_cseq:
 	return ret;
 }
 
-static int process_register_response(struct sk_buff *skb,
+static int process_register_response(struct sk_buff *skb, unsigned int dataoff,
 				     const char **dptr, unsigned int *datalen,
 				     unsigned int cseq, unsigned int code)
 {
@@ -1127,7 +1132,7 @@ static int process_register_response(struct sk_buff *skb,
 	enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
 	union nf_inet_addr addr;
 	__be16 port;
-	unsigned int matchoff, matchlen, dataoff = 0;
+	unsigned int matchoff, matchlen, coff = 0;
 	unsigned int expires = 0;
 	int in_contact = 0, ret;
 
@@ -1154,7 +1159,7 @@ static int process_register_response(struct sk_buff *skb,
 	while (1) {
 		unsigned int c_expires = expires;
 
-		ret = ct_sip_parse_header_uri(ct, *dptr, &dataoff, *datalen,
+		ret = ct_sip_parse_header_uri(ct, *dptr, &coff, *datalen,
 					      SIP_HDR_CONTACT, &in_contact,
 					      &matchoff, &matchlen,
 					      &addr, &port);
@@ -1193,13 +1198,13 @@ static const struct sip_handler sip_handlers[] = {
 	SIP_HANDLER("REGISTER", process_register_request, process_register_response),
 };
 
-static int process_sip_response(struct sk_buff *skb,
+static int process_sip_response(struct sk_buff *skb, unsigned int dataoff,
 				const char **dptr, unsigned int *datalen)
 {
 	enum ip_conntrack_info ctinfo;
 	struct nf_conn *ct = nf_ct_get(skb, &ctinfo);
-	unsigned int matchoff, matchlen;
-	unsigned int code, cseq, dataoff, i;
+	unsigned int matchoff, matchlen, matchend;
+	unsigned int code, cseq, i;
 
 	if (*datalen < strlen("SIP/2.0 200"))
 		return NF_ACCEPT;
@@ -1213,7 +1218,7 @@ static int process_sip_response(struct sk_buff *skb,
 	cseq = simple_strtoul(*dptr + matchoff, NULL, 10);
 	if (!cseq)
 		return NF_DROP;
-	dataoff = matchoff + matchlen + 1;
+	matchend = matchoff + matchlen + 1;
 
 	for (i = 0; i < ARRAY_SIZE(sip_handlers); i++) {
 		const struct sip_handler *handler;
@@ -1221,15 +1226,16 @@ static int process_sip_response(struct sk_buff *skb,
 		handler = &sip_handlers[i];
 		if (handler->response == NULL)
 			continue;
-		if (*datalen < dataoff + handler->len ||
-		    strnicmp(*dptr + dataoff, handler->method, handler->len))
+		if (*datalen < matchend + handler->len ||
+		    strnicmp(*dptr + matchend, handler->method, handler->len))
 			continue;
-		return handler->response(skb, dptr, datalen, cseq, code);
+		return handler->response(skb, dataoff, dptr, datalen,
+					 cseq, code);
 	}
 	return NF_ACCEPT;
 }
 
-static int process_sip_request(struct sk_buff *skb,
+static int process_sip_request(struct sk_buff *skb, unsigned int dataoff,
 			       const char **dptr, unsigned int *datalen)
 {
 	enum ip_conntrack_info ctinfo;
@@ -1254,7 +1260,7 @@ static int process_sip_request(struct sk_buff *skb,
 		if (!cseq)
 			return NF_DROP;
 
-		return handler->request(skb, dptr, datalen, cseq);
+		return handler->request(skb, dataoff, dptr, datalen, cseq);
 	}
 	return NF_ACCEPT;
 }
@@ -1288,13 +1294,13 @@ static int sip_help(struct sk_buff *skb,
 		return NF_ACCEPT;
 
 	if (strnicmp(dptr, "SIP/2.0 ", strlen("SIP/2.0 ")) != 0)
-		ret = process_sip_request(skb, &dptr, &datalen);
+		ret = process_sip_request(skb, dataoff, &dptr, &datalen);
 	else
-		ret = process_sip_response(skb, &dptr, &datalen);
+		ret = process_sip_response(skb, dataoff, &dptr, &datalen);
 
 	if (ret == NF_ACCEPT && ct->status & IPS_NAT_MASK) {
 		nf_nat_sip = rcu_dereference(nf_nat_sip_hook);
-		if (nf_nat_sip && !nf_nat_sip(skb, &dptr, &datalen))
+		if (nf_nat_sip && !nf_nat_sip(skb, dataoff, &dptr, &datalen))
 			ret = NF_DROP;
 	}
 
-- 
cgit v1.2.2


From f5b321bd37fbec9188feb1f721ab46a5ac0b35da Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Thu, 11 Feb 2010 12:26:19 +0100
Subject: netfilter: nf_conntrack_sip: add TCP support

Add TCP support, which is mandated by RFC3261 for all SIP elements.

SIP over TCP is similar to UDP, except that messages are delimited
by Content-Length: headers and multiple messages may appear in one
packet.

Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 net/ipv4/netfilter/nf_nat_sip.c  |   2 +-
 net/netfilter/nf_conntrack_sip.c | 205 +++++++++++++++++++++++++++++++++------
 2 files changed, 177 insertions(+), 30 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/netfilter/nf_nat_sip.c b/net/ipv4/netfilter/nf_nat_sip.c
index 2454ea5abb79..b232e4040dc6 100644
--- a/net/ipv4/netfilter/nf_nat_sip.c
+++ b/net/ipv4/netfilter/nf_nat_sip.c
@@ -122,7 +122,7 @@ static unsigned int ip_nat_sip(struct sk_buff *skb, unsigned int dataoff,
 
 	/* Translate topmost Via header and parameters */
 	if (ct_sip_parse_header_uri(ct, *dptr, NULL, *datalen,
-				    SIP_HDR_VIA, NULL, &matchoff, &matchlen,
+				    SIP_HDR_VIA_UDP, NULL, &matchoff, &matchlen,
 				    &addr, &port) > 0) {
 		unsigned int matchend, poff, plen, buflen, n;
 		char buffer[sizeof("nnn.nnn.nnn.nnn:nnnnn")];
diff --git a/net/netfilter/nf_conntrack_sip.c b/net/netfilter/nf_conntrack_sip.c
index 0ec37d6a2df7..1cc75c5a822b 100644
--- a/net/netfilter/nf_conntrack_sip.c
+++ b/net/netfilter/nf_conntrack_sip.c
@@ -16,6 +16,7 @@
 #include <linux/inet.h>
 #include <linux/in.h>
 #include <linux/udp.h>
+#include <linux/tcp.h>
 #include <linux/netfilter.h>
 
 #include <net/netfilter/nf_conntrack.h>
@@ -287,7 +288,8 @@ static const struct sip_header ct_sip_hdrs[] = {
 	[SIP_HDR_FROM]			= SIP_HDR("From", "f", "sip:", skp_epaddr_len),
 	[SIP_HDR_TO]			= SIP_HDR("To", "t", "sip:", skp_epaddr_len),
 	[SIP_HDR_CONTACT]		= SIP_HDR("Contact", "m", "sip:", skp_epaddr_len),
-	[SIP_HDR_VIA]			= SIP_HDR("Via", "v", "UDP ", epaddr_len),
+	[SIP_HDR_VIA_UDP]		= SIP_HDR("Via", "v", "UDP ", epaddr_len),
+	[SIP_HDR_VIA_TCP]		= SIP_HDR("Via", "v", "TCP ", epaddr_len),
 	[SIP_HDR_EXPIRES]		= SIP_HDR("Expires", NULL, NULL, digits_len),
 	[SIP_HDR_CONTENT_LENGTH]	= SIP_HDR("Content-Length", "l", NULL, digits_len),
 };
@@ -519,6 +521,33 @@ int ct_sip_parse_header_uri(const struct nf_conn *ct, const char *dptr,
 }
 EXPORT_SYMBOL_GPL(ct_sip_parse_header_uri);
 
+static int ct_sip_parse_param(const struct nf_conn *ct, const char *dptr,
+			      unsigned int dataoff, unsigned int datalen,
+			      const char *name,
+			      unsigned int *matchoff, unsigned int *matchlen)
+{
+	const char *limit = dptr + datalen;
+	const char *start;
+	const char *end;
+
+	limit = ct_sip_header_search(dptr + dataoff, limit, ",", strlen(","));
+	if (!limit)
+		limit = dptr + datalen;
+
+	start = ct_sip_header_search(dptr + dataoff, limit, name, strlen(name));
+	if (!start)
+		return 0;
+	start += strlen(name);
+
+	end = ct_sip_header_search(start, limit, ";", strlen(";"));
+	if (!end)
+		end = limit;
+
+	*matchoff = start - dptr;
+	*matchlen = end - start;
+	return 1;
+}
+
 /* Parse address from header parameter and return address, offset and length */
 int ct_sip_parse_address_param(const struct nf_conn *ct, const char *dptr,
 			       unsigned int dataoff, unsigned int datalen,
@@ -577,6 +606,29 @@ int ct_sip_parse_numerical_param(const struct nf_conn *ct, const char *dptr,
 }
 EXPORT_SYMBOL_GPL(ct_sip_parse_numerical_param);
 
+static int ct_sip_parse_transport(struct nf_conn *ct, const char *dptr,
+				  unsigned int dataoff, unsigned int datalen,
+				  u8 *proto)
+{
+	unsigned int matchoff, matchlen;
+
+	if (ct_sip_parse_param(ct, dptr, dataoff, datalen, "transport=",
+			       &matchoff, &matchlen)) {
+		if (!strnicmp(dptr + matchoff, "TCP", strlen("TCP")))
+			*proto = IPPROTO_TCP;
+		else if (!strnicmp(dptr + matchoff, "UDP", strlen("UDP")))
+			*proto = IPPROTO_UDP;
+		else
+			return 0;
+
+		if (*proto != nf_ct_protonum(ct))
+			return 0;
+	} else
+		*proto = nf_ct_protonum(ct);
+
+	return 1;
+}
+
 /* SDP header parsing: a SDP session description contains an ordered set of
  * headers, starting with a section containing general session parameters,
  * optionally followed by multiple media descriptions.
@@ -685,7 +737,7 @@ static int ct_sip_parse_sdp_addr(const struct nf_conn *ct, const char *dptr,
 
 static int refresh_signalling_expectation(struct nf_conn *ct,
 					  union nf_inet_addr *addr,
-					  __be16 port,
+					  u8 proto, __be16 port,
 					  unsigned int expires)
 {
 	struct nf_conn_help *help = nfct_help(ct);
@@ -697,6 +749,7 @@ static int refresh_signalling_expectation(struct nf_conn *ct,
 	hlist_for_each_entry_safe(exp, n, next, &help->expectations, lnode) {
 		if (exp->class != SIP_EXPECT_SIGNALLING ||
 		    !nf_inet_addr_cmp(&exp->tuple.dst.u3, addr) ||
+		    exp->tuple.dst.protonum != proto ||
 		    exp->tuple.dst.u.udp.port != port)
 			continue;
 		if (!del_timer(&exp->timeout))
@@ -1048,6 +1101,7 @@ static int process_register_request(struct sk_buff *skb, unsigned int dataoff,
 	struct nf_conntrack_expect *exp;
 	union nf_inet_addr *saddr, daddr;
 	__be16 port;
+	u8 proto;
 	unsigned int expires = 0;
 	int ret;
 	typeof(nf_nat_sip_expect_hook) nf_nat_sip_expect;
@@ -1080,6 +1134,10 @@ static int process_register_request(struct sk_buff *skb, unsigned int dataoff,
 	if (!nf_inet_addr_cmp(&ct->tuplehash[dir].tuple.src.u3, &daddr))
 		return NF_ACCEPT;
 
+	if (ct_sip_parse_transport(ct, *dptr, matchoff + matchlen, *datalen,
+				   &proto) == 0)
+		return NF_ACCEPT;
+
 	if (ct_sip_parse_numerical_param(ct, *dptr,
 					 matchoff + matchlen, *datalen,
 					 "expires=", NULL, NULL, &expires) < 0)
@@ -1099,7 +1157,7 @@ static int process_register_request(struct sk_buff *skb, unsigned int dataoff,
 		saddr = &ct->tuplehash[!dir].tuple.src.u3;
 
 	nf_ct_expect_init(exp, SIP_EXPECT_SIGNALLING, nf_ct_l3num(ct),
-			  saddr, &daddr, IPPROTO_UDP, NULL, &port);
+			  saddr, &daddr, proto, NULL, &port);
 	exp->timeout.expires = sip_timeout * HZ;
 	exp->helper = nfct_help(ct)->helper;
 	exp->flags = NF_CT_EXPECT_PERMANENT | NF_CT_EXPECT_INACTIVE;
@@ -1132,6 +1190,7 @@ static int process_register_response(struct sk_buff *skb, unsigned int dataoff,
 	enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
 	union nf_inet_addr addr;
 	__be16 port;
+	u8 proto;
 	unsigned int matchoff, matchlen, coff = 0;
 	unsigned int expires = 0;
 	int in_contact = 0, ret;
@@ -1172,6 +1231,10 @@ static int process_register_response(struct sk_buff *skb, unsigned int dataoff,
 		if (!nf_inet_addr_cmp(&ct->tuplehash[dir].tuple.dst.u3, &addr))
 			continue;
 
+		if (ct_sip_parse_transport(ct, *dptr, matchoff + matchlen,
+					   *datalen, &proto) == 0)
+			continue;
+
 		ret = ct_sip_parse_numerical_param(ct, *dptr,
 						   matchoff + matchlen,
 						   *datalen, "expires=",
@@ -1180,7 +1243,8 @@ static int process_register_response(struct sk_buff *skb, unsigned int dataoff,
 			return NF_DROP;
 		if (c_expires == 0)
 			break;
-		if (refresh_signalling_expectation(ct, &addr, port, c_expires))
+		if (refresh_signalling_expectation(ct, &addr, proto, port,
+						   c_expires))
 			return NF_ACCEPT;
 	}
 
@@ -1265,50 +1329,123 @@ static int process_sip_request(struct sk_buff *skb, unsigned int dataoff,
 	return NF_ACCEPT;
 }
 
-static int sip_help(struct sk_buff *skb,
-		    unsigned int protoff,
-		    struct nf_conn *ct,
-		    enum ip_conntrack_info ctinfo)
+static int process_sip_msg(struct sk_buff *skb, struct nf_conn *ct,
+			   unsigned int dataoff, const char **dptr,
+			   unsigned int *datalen)
+{
+	typeof(nf_nat_sip_hook) nf_nat_sip;
+	int ret;
+
+	if (strnicmp(*dptr, "SIP/2.0 ", strlen("SIP/2.0 ")) != 0)
+		ret = process_sip_request(skb, dataoff, dptr, datalen);
+	else
+		ret = process_sip_response(skb, dataoff, dptr, datalen);
+
+	if (ret == NF_ACCEPT && ct->status & IPS_NAT_MASK) {
+		nf_nat_sip = rcu_dereference(nf_nat_sip_hook);
+		if (nf_nat_sip && !nf_nat_sip(skb, dataoff, dptr, datalen))
+			ret = NF_DROP;
+	}
+
+	return ret;
+}
+
+static int sip_help_tcp(struct sk_buff *skb, unsigned int protoff,
+			struct nf_conn *ct, enum ip_conntrack_info ctinfo)
 {
+	struct tcphdr *th, _tcph;
 	unsigned int dataoff, datalen;
-	const char *dptr;
+	unsigned int matchoff, matchlen, clen;
+	unsigned int msglen, origlen;
+	const char *dptr, *end;
+	s16 diff, tdiff = 0;
 	int ret;
-	typeof(nf_nat_sip_hook) nf_nat_sip;
+
+	if (ctinfo != IP_CT_ESTABLISHED &&
+	    ctinfo != IP_CT_ESTABLISHED + IP_CT_IS_REPLY)
+		return NF_ACCEPT;
 
 	/* No Data ? */
-	dataoff = protoff + sizeof(struct udphdr);
+	th = skb_header_pointer(skb, protoff, sizeof(_tcph), &_tcph);
+	if (th == NULL)
+		return NF_ACCEPT;
+	dataoff = protoff + th->doff * 4;
 	if (dataoff >= skb->len)
 		return NF_ACCEPT;
 
 	nf_ct_refresh(ct, skb, sip_timeout * HZ);
 
-	if (!skb_is_nonlinear(skb))
-		dptr = skb->data + dataoff;
-	else {
+	if (skb_is_nonlinear(skb)) {
 		pr_debug("Copy of skbuff not supported yet.\n");
 		return NF_ACCEPT;
 	}
 
+	dptr = skb->data + dataoff;
 	datalen = skb->len - dataoff;
 	if (datalen < strlen("SIP/2.0 200"))
 		return NF_ACCEPT;
 
-	if (strnicmp(dptr, "SIP/2.0 ", strlen("SIP/2.0 ")) != 0)
-		ret = process_sip_request(skb, dataoff, &dptr, &datalen);
-	else
-		ret = process_sip_response(skb, dataoff, &dptr, &datalen);
+	while (1) {
+		if (ct_sip_get_header(ct, dptr, 0, datalen,
+				      SIP_HDR_CONTENT_LENGTH,
+				      &matchoff, &matchlen) <= 0)
+			break;
 
-	if (ret == NF_ACCEPT && ct->status & IPS_NAT_MASK) {
-		nf_nat_sip = rcu_dereference(nf_nat_sip_hook);
-		if (nf_nat_sip && !nf_nat_sip(skb, dataoff, &dptr, &datalen))
-			ret = NF_DROP;
+		clen = simple_strtoul(dptr + matchoff, (char **)&end, 10);
+		if (dptr + matchoff == end)
+			break;
+
+		if (end + strlen("\r\n\r\n") > dptr + datalen)
+			break;
+		if (end[0] != '\r' || end[1] != '\n' ||
+		    end[2] != '\r' || end[3] != '\n')
+			break;
+		end += strlen("\r\n\r\n") + clen;
+
+		msglen = origlen = end - dptr;
+
+		ret = process_sip_msg(skb, ct, dataoff, &dptr, &msglen);
+		if (ret != NF_ACCEPT)
+			break;
+		diff     = msglen - origlen;
+		tdiff   += diff;
+
+		dataoff += msglen;
+		dptr    += msglen;
+		datalen  = datalen + diff - msglen;
 	}
 
 	return ret;
 }
 
-static struct nf_conntrack_helper sip[MAX_PORTS][2] __read_mostly;
-static char sip_names[MAX_PORTS][2][sizeof("sip-65535")] __read_mostly;
+static int sip_help_udp(struct sk_buff *skb, unsigned int protoff,
+			struct nf_conn *ct, enum ip_conntrack_info ctinfo)
+{
+	unsigned int dataoff, datalen;
+	const char *dptr;
+
+	/* No Data ? */
+	dataoff = protoff + sizeof(struct udphdr);
+	if (dataoff >= skb->len)
+		return NF_ACCEPT;
+
+	nf_ct_refresh(ct, skb, sip_timeout * HZ);
+
+	if (skb_is_nonlinear(skb)) {
+		pr_debug("Copy of skbuff not supported yet.\n");
+		return NF_ACCEPT;
+	}
+
+	dptr = skb->data + dataoff;
+	datalen = skb->len - dataoff;
+	if (datalen < strlen("SIP/2.0 200"))
+		return NF_ACCEPT;
+
+	return process_sip_msg(skb, ct, dataoff, &dptr, &datalen);
+}
+
+static struct nf_conntrack_helper sip[MAX_PORTS][4] __read_mostly;
+static char sip_names[MAX_PORTS][4][sizeof("sip-65535")] __read_mostly;
 
 static const struct nf_conntrack_expect_policy sip_exp_policy[SIP_EXPECT_MAX + 1] = {
 	[SIP_EXPECT_SIGNALLING] = {
@@ -1333,7 +1470,7 @@ static void nf_conntrack_sip_fini(void)
 	int i, j;
 
 	for (i = 0; i < ports_c; i++) {
-		for (j = 0; j < 2; j++) {
+		for (j = 0; j < ARRAY_SIZE(sip[i]); j++) {
 			if (sip[i][j].me == NULL)
 				continue;
 			nf_conntrack_helper_unregister(&sip[i][j]);
@@ -1353,14 +1490,24 @@ static int __init nf_conntrack_sip_init(void)
 		memset(&sip[i], 0, sizeof(sip[i]));
 
 		sip[i][0].tuple.src.l3num = AF_INET;
-		sip[i][1].tuple.src.l3num = AF_INET6;
-		for (j = 0; j < 2; j++) {
-			sip[i][j].tuple.dst.protonum = IPPROTO_UDP;
+		sip[i][0].tuple.dst.protonum = IPPROTO_UDP;
+		sip[i][0].help = sip_help_udp;
+		sip[i][1].tuple.src.l3num = AF_INET;
+		sip[i][1].tuple.dst.protonum = IPPROTO_TCP;
+		sip[i][1].help = sip_help_tcp;
+
+		sip[i][2].tuple.src.l3num = AF_INET6;
+		sip[i][2].tuple.dst.protonum = IPPROTO_UDP;
+		sip[i][2].help = sip_help_udp;
+		sip[i][3].tuple.src.l3num = AF_INET6;
+		sip[i][3].tuple.dst.protonum = IPPROTO_TCP;
+		sip[i][3].help = sip_help_tcp;
+
+		for (j = 0; j < ARRAY_SIZE(sip[i]); j++) {
 			sip[i][j].tuple.src.u.udp.port = htons(ports[i]);
 			sip[i][j].expect_policy = sip_exp_policy;
 			sip[i][j].expect_class_max = SIP_EXPECT_MAX;
 			sip[i][j].me = THIS_MODULE;
-			sip[i][j].help = sip_help;
 
 			tmpname = &sip_names[i][j][0];
 			if (ports[i] == SIP_PORT)
-- 
cgit v1.2.2


From 010c0b9f34a4c567b431f8b49a58b7332ed42e47 Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Thu, 11 Feb 2010 12:27:09 +0100
Subject: netfilter: nf_nat: support mangling a single TCP packet multiple
 times

nf_nat_mangle_tcp_packet() can currently only handle a single mangling
per window because it only maintains two sequence adjustment positions:
the one before the last adjustment and the one after.

This patch makes sequence number adjustment tracking in
nf_nat_mangle_tcp_packet() optional and allows a helper to manually
update the offsets after the packet has been fully handled.

Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 net/ipv4/netfilter/nf_nat_helper.c | 39 ++++++++++++++++++++++----------------
 1 file changed, 23 insertions(+), 16 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/netfilter/nf_nat_helper.c b/net/ipv4/netfilter/nf_nat_helper.c
index 7f10a6be0191..4b6af4bb1f50 100644
--- a/net/ipv4/netfilter/nf_nat_helper.c
+++ b/net/ipv4/netfilter/nf_nat_helper.c
@@ -141,6 +141,17 @@ static int enlarge_skb(struct sk_buff *skb, unsigned int extra)
 	return 1;
 }
 
+void nf_nat_set_seq_adjust(struct nf_conn *ct, enum ip_conntrack_info ctinfo,
+			   __be32 seq, s16 off)
+{
+	if (!off)
+		return;
+	set_bit(IPS_SEQ_ADJUST_BIT, &ct->status);
+	adjust_tcp_sequence(ntohl(seq), off, ct, ctinfo);
+	nf_conntrack_event_cache(IPCT_NATSEQADJ, ct);
+}
+EXPORT_SYMBOL_GPL(nf_nat_set_seq_adjust);
+
 /* Generic function for mangling variable-length address changes inside
  * NATed TCP connections (like the PORT XXX,XXX,XXX,XXX,XXX,XXX
  * command in FTP).
@@ -149,14 +160,13 @@ static int enlarge_skb(struct sk_buff *skb, unsigned int extra)
  * skb enlargement, ...
  *
  * */
-int
-nf_nat_mangle_tcp_packet(struct sk_buff *skb,
-			 struct nf_conn *ct,
-			 enum ip_conntrack_info ctinfo,
-			 unsigned int match_offset,
-			 unsigned int match_len,
-			 const char *rep_buffer,
-			 unsigned int rep_len)
+int __nf_nat_mangle_tcp_packet(struct sk_buff *skb,
+			       struct nf_conn *ct,
+			       enum ip_conntrack_info ctinfo,
+			       unsigned int match_offset,
+			       unsigned int match_len,
+			       const char *rep_buffer,
+			       unsigned int rep_len, bool adjust)
 {
 	struct rtable *rt = skb_rtable(skb);
 	struct iphdr *iph;
@@ -202,16 +212,13 @@ nf_nat_mangle_tcp_packet(struct sk_buff *skb,
 		inet_proto_csum_replace2(&tcph->check, skb,
 					 htons(oldlen), htons(datalen), 1);
 
-	if (rep_len != match_len) {
-		set_bit(IPS_SEQ_ADJUST_BIT, &ct->status);
-		adjust_tcp_sequence(ntohl(tcph->seq),
-				    (int)rep_len - (int)match_len,
-				    ct, ctinfo);
-		nf_conntrack_event_cache(IPCT_NATSEQADJ, ct);
-	}
+	if (adjust && rep_len != match_len)
+		nf_nat_set_seq_adjust(ct, ctinfo, tcph->seq,
+				      (int)rep_len - (int)match_len);
+
 	return 1;
 }
-EXPORT_SYMBOL(nf_nat_mangle_tcp_packet);
+EXPORT_SYMBOL(__nf_nat_mangle_tcp_packet);
 
 /* Generic function for mangling variable-length address changes inside
  * NATed UDP connections (like the CONNECT DATA XXXXX MESG XXXXX INDEX XXXXX
-- 
cgit v1.2.2


From 48f8ac26537c1b7b1a2422f5232f45d06c945348 Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Thu, 11 Feb 2010 12:29:38 +0100
Subject: netfilter: nf_nat_sip: add TCP support

Add support for mangling TCP SIP packets.

Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 net/ipv4/netfilter/nf_nat_sip.c  | 53 +++++++++++++++++++++++++++++++++++-----
 net/netfilter/nf_conntrack_sip.c | 10 ++++++++
 2 files changed, 57 insertions(+), 6 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/netfilter/nf_nat_sip.c b/net/ipv4/netfilter/nf_nat_sip.c
index b232e4040dc6..11b538deaaec 100644
--- a/net/ipv4/netfilter/nf_nat_sip.c
+++ b/net/ipv4/netfilter/nf_nat_sip.c
@@ -1,4 +1,4 @@
-/* SIP extension for UDP NAT alteration.
+/* SIP extension for NAT alteration.
  *
  * (C) 2005 by Christian Hentschel <chentschel@arnet.com.ar>
  * based on RR's ip_nat_ftp.c and other modules.
@@ -15,6 +15,7 @@
 #include <linux/ip.h>
 #include <net/ip.h>
 #include <linux/udp.h>
+#include <linux/tcp.h>
 
 #include <net/netfilter/nf_nat.h>
 #include <net/netfilter/nf_nat_helper.h>
@@ -36,10 +37,27 @@ static unsigned int mangle_packet(struct sk_buff *skb, unsigned int dataoff,
 {
 	enum ip_conntrack_info ctinfo;
 	struct nf_conn *ct = nf_ct_get(skb, &ctinfo);
-
-	if (!nf_nat_mangle_udp_packet(skb, ct, ctinfo, matchoff, matchlen,
-				      buffer, buflen))
-		return 0;
+	struct tcphdr *th;
+	unsigned int baseoff;
+
+	if (nf_ct_protonum(ct) == IPPROTO_TCP) {
+		th = (struct tcphdr *)(skb->data + ip_hdrlen(skb));
+		baseoff = ip_hdrlen(skb) + th->doff * 4;
+		matchoff += dataoff - baseoff;
+
+		if (!__nf_nat_mangle_tcp_packet(skb, ct, ctinfo,
+						matchoff, matchlen,
+						buffer, buflen, false))
+			return 0;
+	} else {
+		baseoff = ip_hdrlen(skb) + sizeof(struct udphdr);
+		matchoff += dataoff - baseoff;
+
+		if (!nf_nat_mangle_udp_packet(skb, ct, ctinfo,
+					      matchoff, matchlen,
+					      buffer, buflen))
+			return 0;
+	}
 
 	/* Reload data pointer and adjust datalen value */
 	*dptr = skb->data + dataoff;
@@ -104,6 +122,7 @@ static unsigned int ip_nat_sip(struct sk_buff *skb, unsigned int dataoff,
 	struct nf_conn *ct = nf_ct_get(skb, &ctinfo);
 	enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
 	unsigned int coff, matchoff, matchlen;
+	enum sip_header_types hdr;
 	union nf_inet_addr addr;
 	__be16 port;
 	int request, in_header;
@@ -120,9 +139,14 @@ static unsigned int ip_nat_sip(struct sk_buff *skb, unsigned int dataoff,
 	} else
 		request = 0;
 
+	if (nf_ct_protonum(ct) == IPPROTO_TCP)
+		hdr = SIP_HDR_VIA_TCP;
+	else
+		hdr = SIP_HDR_VIA_UDP;
+
 	/* Translate topmost Via header and parameters */
 	if (ct_sip_parse_header_uri(ct, *dptr, NULL, *datalen,
-				    SIP_HDR_VIA_UDP, NULL, &matchoff, &matchlen,
+				    hdr, NULL, &matchoff, &matchlen,
 				    &addr, &port) > 0) {
 		unsigned int matchend, poff, plen, buflen, n;
 		char buffer[sizeof("nnn.nnn.nnn.nnn:nnnnn")];
@@ -204,9 +228,23 @@ next:
 	if (!map_sip_addr(skb, dataoff, dptr, datalen, SIP_HDR_FROM) ||
 	    !map_sip_addr(skb, dataoff, dptr, datalen, SIP_HDR_TO))
 		return NF_DROP;
+
 	return NF_ACCEPT;
 }
 
+static void ip_nat_sip_seq_adjust(struct sk_buff *skb, s16 off)
+{
+	enum ip_conntrack_info ctinfo;
+	struct nf_conn *ct = nf_ct_get(skb, &ctinfo);
+	const struct tcphdr *th;
+
+	if (nf_ct_protonum(ct) != IPPROTO_TCP || off == 0)
+		return;
+
+	th = (struct tcphdr *)(skb->data + ip_hdrlen(skb));
+	nf_nat_set_seq_adjust(ct, ctinfo, th->seq, off);
+}
+
 /* Handles expected signalling connections and media streams */
 static void ip_nat_sip_expected(struct nf_conn *ct,
 				struct nf_conntrack_expect *exp)
@@ -472,6 +510,7 @@ err1:
 static void __exit nf_nat_sip_fini(void)
 {
 	rcu_assign_pointer(nf_nat_sip_hook, NULL);
+	rcu_assign_pointer(nf_nat_sip_seq_adjust_hook, NULL);
 	rcu_assign_pointer(nf_nat_sip_expect_hook, NULL);
 	rcu_assign_pointer(nf_nat_sdp_addr_hook, NULL);
 	rcu_assign_pointer(nf_nat_sdp_port_hook, NULL);
@@ -483,12 +522,14 @@ static void __exit nf_nat_sip_fini(void)
 static int __init nf_nat_sip_init(void)
 {
 	BUG_ON(nf_nat_sip_hook != NULL);
+	BUG_ON(nf_nat_sip_seq_adjust_hook != NULL);
 	BUG_ON(nf_nat_sip_expect_hook != NULL);
 	BUG_ON(nf_nat_sdp_addr_hook != NULL);
 	BUG_ON(nf_nat_sdp_port_hook != NULL);
 	BUG_ON(nf_nat_sdp_session_hook != NULL);
 	BUG_ON(nf_nat_sdp_media_hook != NULL);
 	rcu_assign_pointer(nf_nat_sip_hook, ip_nat_sip);
+	rcu_assign_pointer(nf_nat_sip_seq_adjust_hook, ip_nat_sip_seq_adjust);
 	rcu_assign_pointer(nf_nat_sip_expect_hook, ip_nat_sip_expect);
 	rcu_assign_pointer(nf_nat_sdp_addr_hook, ip_nat_sdp_addr);
 	rcu_assign_pointer(nf_nat_sdp_port_hook, ip_nat_sdp_port);
diff --git a/net/netfilter/nf_conntrack_sip.c b/net/netfilter/nf_conntrack_sip.c
index 1cc75c5a822b..3bb3aaff76e9 100644
--- a/net/netfilter/nf_conntrack_sip.c
+++ b/net/netfilter/nf_conntrack_sip.c
@@ -56,6 +56,9 @@ unsigned int (*nf_nat_sip_hook)(struct sk_buff *skb, unsigned int dataoff,
 				unsigned int *datalen) __read_mostly;
 EXPORT_SYMBOL_GPL(nf_nat_sip_hook);
 
+void (*nf_nat_sip_seq_adjust_hook)(struct sk_buff *skb, s16 off) __read_mostly;
+EXPORT_SYMBOL_GPL(nf_nat_sip_seq_adjust_hook);
+
 unsigned int (*nf_nat_sip_expect_hook)(struct sk_buff *skb,
 				       unsigned int dataoff,
 				       const char **dptr,
@@ -1360,6 +1363,7 @@ static int sip_help_tcp(struct sk_buff *skb, unsigned int protoff,
 	const char *dptr, *end;
 	s16 diff, tdiff = 0;
 	int ret;
+	typeof(nf_nat_sip_seq_adjust_hook) nf_nat_sip_seq_adjust;
 
 	if (ctinfo != IP_CT_ESTABLISHED &&
 	    ctinfo != IP_CT_ESTABLISHED + IP_CT_IS_REPLY)
@@ -1415,6 +1419,12 @@ static int sip_help_tcp(struct sk_buff *skb, unsigned int protoff,
 		datalen  = datalen + diff - msglen;
 	}
 
+	if (ret == NF_ACCEPT && ct->status & IPS_NAT_MASK) {
+		nf_nat_sip_seq_adjust = rcu_dereference(nf_nat_sip_seq_adjust_hook);
+		if (nf_nat_sip_seq_adjust)
+			nf_nat_sip_seq_adjust(skb, tdiff);
+	}
+
 	return ret;
 }
 
-- 
cgit v1.2.2


From 9d288dffe3a276e1f06ba556845c456d696c5a4f Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Thu, 11 Feb 2010 12:30:21 +0100
Subject: netfilter: nf_conntrack_sip: add T.38 FAX support

Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 net/netfilter/nf_conntrack_sip.c | 28 +++++++++++++++++++++++-----
 1 file changed, 23 insertions(+), 5 deletions(-)

(limited to 'net')

diff --git a/net/netfilter/nf_conntrack_sip.c b/net/netfilter/nf_conntrack_sip.c
index 3bb3aaff76e9..fbe8ff5a420a 100644
--- a/net/netfilter/nf_conntrack_sip.c
+++ b/net/netfilter/nf_conntrack_sip.c
@@ -907,6 +907,7 @@ err1:
 static const struct sdp_media_type sdp_media_types[] = {
 	SDP_MEDIA_TYPE("audio ", SIP_EXPECT_AUDIO),
 	SDP_MEDIA_TYPE("video ", SIP_EXPECT_VIDEO),
+	SDP_MEDIA_TYPE("image ", SIP_EXPECT_IMAGE),
 };
 
 static const struct sdp_media_type *sdp_media_type(const char *dptr,
@@ -932,7 +933,6 @@ static int process_sdp(struct sk_buff *skb, unsigned int dataoff,
 {
 	enum ip_conntrack_info ctinfo;
 	struct nf_conn *ct = nf_ct_get(skb, &ctinfo);
-	struct nf_conn_help *help = nfct_help(ct);
 	unsigned int matchoff, matchlen;
 	unsigned int mediaoff, medialen;
 	unsigned int sdpoff;
@@ -1024,9 +1024,6 @@ static int process_sdp(struct sk_buff *skb, unsigned int dataoff,
 		ret = nf_nat_sdp_session(skb, dataoff, dptr, datalen, sdpoff,
 					 &rtp_addr);
 
-	if (ret == NF_ACCEPT && i > 0)
-		help->help.ct_sip_info.invite_cseq = cseq;
-
 	return ret;
 }
 static int process_invite_response(struct sk_buff *skb, unsigned int dataoff,
@@ -1077,6 +1074,22 @@ static int process_prack_response(struct sk_buff *skb, unsigned int dataoff,
 	return NF_ACCEPT;
 }
 
+static int process_invite_request(struct sk_buff *skb, unsigned int dataoff,
+				  const char **dptr, unsigned int *datalen,
+				  unsigned int cseq)
+{
+	enum ip_conntrack_info ctinfo;
+	struct nf_conn *ct = nf_ct_get(skb, &ctinfo);
+	struct nf_conn_help *help = nfct_help(ct);
+	unsigned int ret;
+
+	flush_expectations(ct, true);
+	ret = process_sdp(skb, dataoff, dptr, datalen, cseq);
+	if (ret == NF_ACCEPT)
+		help->help.ct_sip_info.invite_cseq = cseq;
+	return ret;
+}
+
 static int process_bye_request(struct sk_buff *skb, unsigned int dataoff,
 			       const char **dptr, unsigned int *datalen,
 			       unsigned int cseq)
@@ -1257,7 +1270,7 @@ flush:
 }
 
 static const struct sip_handler sip_handlers[] = {
-	SIP_HANDLER("INVITE", process_sdp, process_invite_response),
+	SIP_HANDLER("INVITE", process_invite_request, process_invite_response),
 	SIP_HANDLER("UPDATE", process_sdp, process_update_response),
 	SIP_HANDLER("ACK", process_sdp, NULL),
 	SIP_HANDLER("PRACK", process_sdp, process_prack_response),
@@ -1473,6 +1486,11 @@ static const struct nf_conntrack_expect_policy sip_exp_policy[SIP_EXPECT_MAX + 1
 		.max_expected	= 2 * IP_CT_DIR_MAX,
 		.timeout	= 3 * 60,
 	},
+	[SIP_EXPECT_IMAGE] = {
+		.name		= "image",
+		.max_expected	= IP_CT_DIR_MAX,
+		.timeout	= 3 * 60,
+	},
 };
 
 static void nf_conntrack_sip_fini(void)
-- 
cgit v1.2.2


From b2907e501945d5d13326a6a7441c13e41cd0e799 Mon Sep 17 00:00:00 2001
From: Alexey Dobriyan <adobriyan@gmail.com>
Date: Thu, 11 Feb 2010 18:41:35 +0100
Subject: netfilter: xtables: fix mangle tables

In POST_ROUTING hook, calling dev_net(in) is going to oops.

Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 net/ipv4/netfilter/iptable_mangle.c  | 4 +++-
 net/ipv6/netfilter/ip6table_mangle.c | 4 +++-
 2 files changed, 6 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/netfilter/iptable_mangle.c b/net/ipv4/netfilter/iptable_mangle.c
index 58d7097baa3d..c8333305d631 100644
--- a/net/ipv4/netfilter/iptable_mangle.c
+++ b/net/ipv4/netfilter/iptable_mangle.c
@@ -87,7 +87,9 @@ iptable_mangle_hook(unsigned int hook,
 {
 	if (hook == NF_INET_LOCAL_OUT)
 		return ipt_local_hook(hook, skb, in, out, okfn);
-
+	if (hook == NF_INET_POST_ROUTING)
+		return ipt_do_table(skb, hook, in, out,
+				    dev_net(out)->ipv4.iptable_mangle);
 	/* PREROUTING/INPUT/FORWARD: */
 	return ipt_do_table(skb, hook, in, out,
 			    dev_net(in)->ipv4.iptable_mangle);
diff --git a/net/ipv6/netfilter/ip6table_mangle.c b/net/ipv6/netfilter/ip6table_mangle.c
index dc803b7e8e54..b6216dede52c 100644
--- a/net/ipv6/netfilter/ip6table_mangle.c
+++ b/net/ipv6/netfilter/ip6table_mangle.c
@@ -81,7 +81,9 @@ ip6table_mangle_hook(unsigned int hook, struct sk_buff *skb,
 {
 	if (hook == NF_INET_LOCAL_OUT)
 		return ip6t_local_out_hook(hook, skb, out, okfn);
-
+	if (hook == NF_INET_POST_ROUTING)
+		return ip6t_do_table(skb, hook, in, out,
+				     dev_net(out)->ipv6.ip6table_mangle);
 	/* INPUT/FORWARD */
 	return ip6t_do_table(skb, hook, in, out,
 			     dev_net(in)->ipv6.ip6table_mangle);
-- 
cgit v1.2.2


From 8e5574211d96c0552f84c757718475fdb4021be7 Mon Sep 17 00:00:00 2001
From: Roland Dreier <rolandd@cisco.com>
Date: Thu, 11 Feb 2010 12:14:23 -0800
Subject: ethtool: Use explicit designated initializers for .cmd

Initialize the .cmd member of various ethtool using a designated struct
initializer rather.  This makes things a teeny bit more robust, although
the chance of a struct layout changing is extremely remote, and also
makes the code a little easier to read.

Signed-off-by: Roland Dreier <rolandd@cisco.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/ethtool.c | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

(limited to 'net')

diff --git a/net/core/ethtool.c b/net/core/ethtool.c
index 6ec73d3983a3..a1280f643bf4 100644
--- a/net/core/ethtool.c
+++ b/net/core/ethtool.c
@@ -163,7 +163,7 @@ EXPORT_SYMBOL(ethtool_ntuple_flush);
 
 static int ethtool_get_settings(struct net_device *dev, void __user *useraddr)
 {
-	struct ethtool_cmd cmd = { ETHTOOL_GSET };
+	struct ethtool_cmd cmd = { .cmd = ETHTOOL_GSET };
 	int err;
 
 	if (!dev->ethtool_ops->get_settings)
@@ -645,7 +645,7 @@ static int ethtool_reset(struct net_device *dev, char __user *useraddr)
 
 static int ethtool_get_wol(struct net_device *dev, char __user *useraddr)
 {
-	struct ethtool_wolinfo wol = { ETHTOOL_GWOL };
+	struct ethtool_wolinfo wol = { .cmd = ETHTOOL_GWOL };
 
 	if (!dev->ethtool_ops->get_wol)
 		return -EOPNOTSUPP;
@@ -779,7 +779,7 @@ static int ethtool_set_eeprom(struct net_device *dev, void __user *useraddr)
 
 static int ethtool_get_coalesce(struct net_device *dev, void __user *useraddr)
 {
-	struct ethtool_coalesce coalesce = { ETHTOOL_GCOALESCE };
+	struct ethtool_coalesce coalesce = { .cmd = ETHTOOL_GCOALESCE };
 
 	if (!dev->ethtool_ops->get_coalesce)
 		return -EOPNOTSUPP;
@@ -806,7 +806,7 @@ static int ethtool_set_coalesce(struct net_device *dev, void __user *useraddr)
 
 static int ethtool_get_ringparam(struct net_device *dev, void __user *useraddr)
 {
-	struct ethtool_ringparam ringparam = { ETHTOOL_GRINGPARAM };
+	struct ethtool_ringparam ringparam = { .cmd = ETHTOOL_GRINGPARAM };
 
 	if (!dev->ethtool_ops->get_ringparam)
 		return -EOPNOTSUPP;
@@ -1160,7 +1160,7 @@ static int ethtool_get_perm_addr(struct net_device *dev, void __user *useraddr)
 static int ethtool_get_value(struct net_device *dev, char __user *useraddr,
 			     u32 cmd, u32 (*actor)(struct net_device *))
 {
-	struct ethtool_value edata = { cmd };
+	struct ethtool_value edata = { .cmd = cmd };
 
 	if (!actor)
 		return -EOPNOTSUPP;
-- 
cgit v1.2.2


From ca1c2e2da9637c131436bf6d6ae41b58f5353afe Mon Sep 17 00:00:00 2001
From: Alexey Dobriyan <adobriyan@gmail.com>
Date: Fri, 12 Feb 2010 06:25:36 +0100
Subject: netfilter: don't use INIT_RCU_HEAD()

call_rcu() will unconditionally reinitialize RCU head anyway.

Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Reviewed-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 net/netfilter/nf_conntrack_expect.c | 1 -
 net/netfilter/nf_conntrack_extend.c | 1 -
 net/netfilter/nfnetlink_queue.c     | 1 -
 3 files changed, 3 deletions(-)

(limited to 'net')

diff --git a/net/netfilter/nf_conntrack_expect.c b/net/netfilter/nf_conntrack_expect.c
index 33b85f834c06..6182fb1b55de 100644
--- a/net/netfilter/nf_conntrack_expect.c
+++ b/net/netfilter/nf_conntrack_expect.c
@@ -232,7 +232,6 @@ struct nf_conntrack_expect *nf_ct_expect_alloc(struct nf_conn *me)
 
 	new->master = me;
 	atomic_set(&new->use, 1);
-	INIT_RCU_HEAD(&new->rcu);
 	return new;
 }
 EXPORT_SYMBOL_GPL(nf_ct_expect_alloc);
diff --git a/net/netfilter/nf_conntrack_extend.c b/net/netfilter/nf_conntrack_extend.c
index fef95be334bd..fdc8fb4ae10f 100644
--- a/net/netfilter/nf_conntrack_extend.c
+++ b/net/netfilter/nf_conntrack_extend.c
@@ -59,7 +59,6 @@ nf_ct_ext_create(struct nf_ct_ext **ext, enum nf_ct_ext_id id, gfp_t gfp)
 	if (!*ext)
 		return NULL;
 
-	INIT_RCU_HEAD(&(*ext)->rcu);
 	(*ext)->offset[id] = off;
 	(*ext)->len = len;
 
diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c
index 90cf36decea3..7ba4abc405c9 100644
--- a/net/netfilter/nfnetlink_queue.c
+++ b/net/netfilter/nfnetlink_queue.c
@@ -112,7 +112,6 @@ instance_create(u_int16_t queue_num, int pid)
 	inst->copy_mode = NFQNL_COPY_NONE;
 	spin_lock_init(&inst->lock);
 	INIT_LIST_HEAD(&inst->queue_list);
-	INIT_RCU_HEAD(&inst->rcu);
 
 	if (!try_module_get(THIS_MODULE)) {
 		err = -EAGAIN;
-- 
cgit v1.2.2


From 4cd24eaf0c6ee7f0242e34ee77ec899f255e66b5 Mon Sep 17 00:00:00 2001
From: Jiri Pirko <jpirko@redhat.com>
Date: Mon, 8 Feb 2010 04:30:35 +0000
Subject: net: use netdev_mc_count and netdev_mc_empty when appropriate

This patch replaces dev->mc_count in all drivers (hopefully I didn't miss
anything). Used spatch and did small tweaks and conding style changes when
it was suitable.

Jirka

Signed-off-by: Jiri Pirko <jpirko@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/bluetooth/bnep/netdev.c | 6 ++++--
 net/core/dev.c              | 2 +-
 net/irda/irlan/irlan_eth.c  | 5 +++--
 net/mac80211/iface.c        | 2 +-
 4 files changed, 9 insertions(+), 6 deletions(-)

(limited to 'net')

diff --git a/net/bluetooth/bnep/netdev.c b/net/bluetooth/bnep/netdev.c
index 26fb831ef7e0..b6234b73c4cf 100644
--- a/net/bluetooth/bnep/netdev.c
+++ b/net/bluetooth/bnep/netdev.c
@@ -64,7 +64,7 @@ static void bnep_net_set_mc_list(struct net_device *dev)
 	struct sk_buff *skb;
 	int size;
 
-	BT_DBG("%s mc_count %d", dev->name, dev->mc_count);
+	BT_DBG("%s mc_count %d", dev->name, netdev_mc_count(dev));
 
 	size = sizeof(*r) + (BNEP_MAX_MULTICAST_FILTERS + 1) * ETH_ALEN * 2;
 	skb  = alloc_skb(size, GFP_ATOMIC);
@@ -97,7 +97,9 @@ static void bnep_net_set_mc_list(struct net_device *dev)
 
 		/* FIXME: We should group addresses here. */
 
-		for (i = 0; i < dev->mc_count && i < BNEP_MAX_MULTICAST_FILTERS; i++) {
+		for (i = 0;
+		     i < netdev_mc_count(dev) && i < BNEP_MAX_MULTICAST_FILTERS;
+		     i++) {
 			memcpy(__skb_put(skb, ETH_ALEN), dmi->dmi_addr, ETH_ALEN);
 			memcpy(__skb_put(skb, ETH_ALEN), dmi->dmi_addr, ETH_ALEN);
 			dmi = dmi->next;
diff --git a/net/core/dev.c b/net/core/dev.c
index ae75f25ac0a5..d1cf53d0d597 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -4263,7 +4263,7 @@ static void dev_addr_discard(struct net_device *dev)
 	netif_addr_lock_bh(dev);
 
 	__dev_addr_discard(&dev->mc_list);
-	dev->mc_count = 0;
+	netdev_mc_count(dev) = 0;
 
 	netif_addr_unlock_bh(dev);
 }
diff --git a/net/irda/irlan/irlan_eth.c b/net/irda/irlan/irlan_eth.c
index d340110f5c0c..9616c32d1076 100644
--- a/net/irda/irlan/irlan_eth.c
+++ b/net/irda/irlan/irlan_eth.c
@@ -321,14 +321,15 @@ static void irlan_eth_set_multicast_list(struct net_device *dev)
 		/* Enable promiscuous mode */
 		IRDA_WARNING("Promiscuous mode not implemented by IrLAN!\n");
 	}
-	else if ((dev->flags & IFF_ALLMULTI) || dev->mc_count > HW_MAX_ADDRS) {
+	else if ((dev->flags & IFF_ALLMULTI) ||
+		 netdev_mc_count(dev) > HW_MAX_ADDRS) {
 		/* Disable promiscuous mode, use normal mode. */
 		IRDA_DEBUG(4, "%s(), Setting multicast filter\n", __func__ );
 		/* hardware_set_filter(NULL); */
 
 		irlan_set_multicast_filter(self, TRUE);
 	}
-	else if (dev->mc_count) {
+	else if (!netdev_mc_empty(dev)) {
 		IRDA_DEBUG(4, "%s(), Setting multicast filter\n", __func__ );
 		/* Walk the address list, and load the filter */
 		/* hardware_set_filter(dev->mc_list); */
diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c
index 09fff4662e80..f943f5fa7286 100644
--- a/net/mac80211/iface.c
+++ b/net/mac80211/iface.c
@@ -413,7 +413,7 @@ static int ieee80211_stop(struct net_device *dev)
 	netif_addr_lock_bh(dev);
 	spin_lock_bh(&local->filter_lock);
 	__dev_addr_unsync(&local->mc_list, &local->mc_count,
-			  &dev->mc_list, &dev->mc_count);
+			  &dev->mc_list, dev->mc_count);
 	spin_unlock_bh(&local->filter_lock);
 	netif_addr_unlock_bh(dev);
 
-- 
cgit v1.2.2


From c6b471e6454c0e1c6d756672841cbaeae7c949f8 Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Sun, 7 Feb 2010 17:26:30 +0000
Subject: inet: Remove bogus IGMPv3 report handling

Currently we treat IGMPv3 reports as if it were an IGMPv2/v1 report.
This is broken as IGMPv3 reports are formatted differently.  So we
end up suppressing a bogus multicast group (which should be harmless
as long as the leading reserved field is zero).

In fact, IGMPv3 does not allow membership report suppression so
we should simply ignore IGMPv3 membership reports as a host.

This patch does exactly that.  I kept the case statement for it
so people won't accidentally add it back thinking that we overlooked
this case.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/igmp.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c
index 76c08402c933..a42f658e756a 100644
--- a/net/ipv4/igmp.c
+++ b/net/ipv4/igmp.c
@@ -946,7 +946,6 @@ int igmp_rcv(struct sk_buff *skb)
 		break;
 	case IGMP_HOST_MEMBERSHIP_REPORT:
 	case IGMPV2_HOST_MEMBERSHIP_REPORT:
-	case IGMPV3_HOST_MEMBERSHIP_REPORT:
 		/* Is it our report looped back? */
 		if (skb_rtable(skb)->fl.iif == 0)
 			break;
@@ -960,6 +959,7 @@ int igmp_rcv(struct sk_buff *skb)
 		in_dev_put(in_dev);
 		return pim_rcv_v1(skb);
 #endif
+	case IGMPV3_HOST_MEMBERSHIP_REPORT:
 	case IGMP_DVMRP:
 	case IGMP_TRACE:
 	case IGMP_HOST_LEAVE_MESSAGE:
-- 
cgit v1.2.2


From 69a6a0b38a139ccceef32222108caca8a9b0b795 Mon Sep 17 00:00:00 2001
From: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Date: Sun, 7 Feb 2010 20:20:28 +0000
Subject: dccp: allow probing of CCID-array length

This fixes a problem in the DCCP getsockopt() API: currently there is no way
for a user to a priori know the number of built-in CCIDs, other than trying
DCCP_SOCKOPT_AVAILABLE_CCIDS in a loop, incrementing the option length until
EINVAL is no longer returned.

This patch truncates the array to the user-provided length. No copy is made
when the length is <= 0.

Due to the length restriction in do_dccp_getsockopt() to sizeof(int), the
minimum array length remains 4, which is a reasonable default (only 3
CCIDs, CCID-2..4, are currently defined).

Signed-off-by: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/dccp/ccid.c | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

(limited to 'net')

diff --git a/net/dccp/ccid.c b/net/dccp/ccid.c
index ff16e9df1969..49d27c556bec 100644
--- a/net/dccp/ccid.c
+++ b/net/dccp/ccid.c
@@ -63,14 +63,13 @@ int ccid_getsockopt_builtin_ccids(struct sock *sk, int len,
 	u8 *ccid_array, array_len;
 	int err = 0;
 
-	if (len < ARRAY_SIZE(ccids))
-		return -EINVAL;
-
 	if (ccid_get_builtin_ccids(&ccid_array, &array_len))
 		return -ENOBUFS;
 
-	if (put_user(array_len, optlen) ||
-	    copy_to_user(optval, ccid_array, array_len))
+	if (put_user(array_len, optlen))
+		err = -EFAULT;
+	else if (len > 0 && copy_to_user(optval, ccid_array,
+					 len > array_len ? array_len : len))
 		err = -EFAULT;
 
 	kfree(ccid_array);
-- 
cgit v1.2.2


From 5affcd6ba2036b59a4dee3f0576ae3584e92e4f1 Mon Sep 17 00:00:00 2001
From: Juuso Oikarinen <juuso.oikarinen@nokia.com>
Date: Fri, 12 Feb 2010 10:05:45 +0200
Subject: mac80211: fix handling of null-rate control in rate_control_get_rate

For hardware with IEEE80211_HW_HAS_RATE_CONTROL the rate controller is not
initialized. However, calling functions such as ieee80211_beacon_get result
in the rate_control_get_rate function getting called, which is accessing
(in this case uninitialized) rate control structures unconditionally.

Fix by exiting the function before setting the rates for HW with
IEEE80211_HW_HAS_RATE_CONTROL set. The initialization of the ieee80211_tx_info
struct is intentionally still executed.

Signed-off-by: Juuso Oikarinen <juuso.oikarinen@nokia.com>
Reviewed-by: Kalle Valo <kalle.valo@nokia.com>
Cc: stable@kernel.org
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/rate.c | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'net')

diff --git a/net/mac80211/rate.c b/net/mac80211/rate.c
index b9007f80cb92..12a2bff7dcdb 100644
--- a/net/mac80211/rate.c
+++ b/net/mac80211/rate.c
@@ -245,6 +245,9 @@ void rate_control_get_rate(struct ieee80211_sub_if_data *sdata,
 		info->control.rates[i].count = 1;
 	}
 
+	if (sdata->local->hw.flags & IEEE80211_HW_HAS_RATE_CONTROL)
+		return;
+
 	if (sta && sdata->force_unicast_rateidx > -1) {
 		info->control.rates[0].idx = sdata->force_unicast_rateidx;
 	} else {
-- 
cgit v1.2.2


From 2bec5a369ee79576a3eea2c23863325089785a2c Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Mon, 8 Feb 2010 05:19:03 +0000
Subject: ipv6: fib: fix crash when changing large fib while dumping it

When the fib size exceeds what can be dumped in a single skb, the
dump is suspended and resumed once the last skb has been received
by userspace. When the fib is changed while the dump is suspended,
the walker might contain stale pointers, causing a crash when the
dump is resumed.

BUG: unable to handle kernel NULL pointer dereference at 0000000000000018
IP: [<ffffffffa01bce04>] fib6_walk_continue+0xbb/0x124 [ipv6]
PGD 5347a067 PUD 65c7067 PMD 0
Oops: 0000 [#1] PREEMPT SMP
...
RIP: 0010:[<ffffffffa01bce04>]
[<ffffffffa01bce04>] fib6_walk_continue+0xbb/0x124 [ipv6]
...
Call Trace:
 [<ffffffff8104aca3>] ? mutex_spin_on_owner+0x59/0x71
 [<ffffffffa01bd105>] inet6_dump_fib+0x11b/0x1b9 [ipv6]
 [<ffffffff81371af4>] netlink_dump+0x5b/0x19e
 [<ffffffff8134f288>] ? consume_skb+0x28/0x2a
 [<ffffffff81373b69>] netlink_recvmsg+0x1ab/0x2c6
 [<ffffffff81372781>] ? netlink_unicast+0xfa/0x151
 [<ffffffff813483e0>] __sock_recvmsg+0x6d/0x79
 [<ffffffff81348a53>] sock_recvmsg+0xca/0xe3
 [<ffffffff81066d4b>] ? autoremove_wake_function+0x0/0x38
 [<ffffffff811ed1f8>] ? radix_tree_lookup_slot+0xe/0x10
 [<ffffffff810b3ed7>] ? find_get_page+0x90/0xa5
 [<ffffffff810b5dc5>] ? filemap_fault+0x201/0x34f
 [<ffffffff810ef152>] ? fget_light+0x2f/0xac
 [<ffffffff813519e7>] ? verify_iovec+0x4f/0x94
 [<ffffffff81349a65>] sys_recvmsg+0x14d/0x223

Store the serial number when beginning to walk the fib and reload
pointers when continuing to walk after a change occured. Similar
to other dumping functions, this might cause unrelated entries to
be missed when entries are deleted.

Tested-by: Ben Greear <greearb@candelatech.com>
Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/ip6_fib.c | 29 +++++++++++++++++++++++++++--
 1 file changed, 27 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c
index f626ea2b304f..77e122f53ea6 100644
--- a/net/ipv6/ip6_fib.c
+++ b/net/ipv6/ip6_fib.c
@@ -319,12 +319,26 @@ static int fib6_dump_table(struct fib6_table *table, struct sk_buff *skb,
 	w->root = &table->tb6_root;
 
 	if (cb->args[4] == 0) {
+		w->count = 0;
+		w->skip = 0;
+
 		read_lock_bh(&table->tb6_lock);
 		res = fib6_walk(w);
 		read_unlock_bh(&table->tb6_lock);
-		if (res > 0)
+		if (res > 0) {
 			cb->args[4] = 1;
+			cb->args[5] = w->root->fn_sernum;
+		}
 	} else {
+		if (cb->args[5] != w->root->fn_sernum) {
+			/* Begin at the root if the tree changed */
+			cb->args[5] = w->root->fn_sernum;
+			w->state = FWS_INIT;
+			w->node = w->root;
+			w->skip = w->count;
+		} else
+			w->skip = 0;
+
 		read_lock_bh(&table->tb6_lock);
 		res = fib6_walk_continue(w);
 		read_unlock_bh(&table->tb6_lock);
@@ -1250,9 +1264,18 @@ static int fib6_walk_continue(struct fib6_walker_t *w)
 			w->leaf = fn->leaf;
 		case FWS_C:
 			if (w->leaf && fn->fn_flags&RTN_RTINFO) {
-				int err = w->func(w);
+				int err;
+
+				if (w->count < w->skip) {
+					w->count++;
+					continue;
+				}
+
+				err = w->func(w);
 				if (err)
 					return err;
+
+				w->count++;
 				continue;
 			}
 			w->state = FWS_U;
@@ -1346,6 +1369,8 @@ static void fib6_clean_tree(struct net *net, struct fib6_node *root,
 	c.w.root = root;
 	c.w.func = fib6_clean_node;
 	c.w.prune = prune;
+	c.w.count = 0;
+	c.w.skip = 0;
 	c.func = func;
 	c.arg = arg;
 	c.net = net;
-- 
cgit v1.2.2


From dc2b99f71ef477a31020511876ab4403fb7c4420 Mon Sep 17 00:00:00 2001
From: stephen hemminger <shemminger@vyatta.com>
Date: Mon, 8 Feb 2010 19:48:05 +0000
Subject: IPv6: keep permanent addresses on admin down

Permanent IPV6 addresses should not be removed when the link is
set to admin down, only when device is removed.

When link is lost permanent addresses should be marked as tentative
so that when link comes back they are subject to duplicate address
detection (if DAD was enabled for that address).

Other routing systems keep manually configured IPv6 addresses
when link is set down.

Signed-off-by: Stephen Hemminger <shemminger@vyatta.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/addrconf.c | 35 ++++++++++++++++++++++++-----------
 1 file changed, 24 insertions(+), 11 deletions(-)

(limited to 'net')

diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 1593289155ff..b0e1430b64f1 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -2646,7 +2646,8 @@ static int addrconf_ifdown(struct net_device *dev, int how)
 
 		write_lock_bh(&addrconf_hash_lock);
 		while ((ifa = *bifa) != NULL) {
-			if (ifa->idev == idev) {
+			if (ifa->idev == idev &&
+			    (how || !(ifa->flags&IFA_F_PERMANENT))) {
 				*bifa = ifa->lst_next;
 				ifa->lst_next = NULL;
 				addrconf_del_timer(ifa);
@@ -2686,18 +2687,30 @@ static int addrconf_ifdown(struct net_device *dev, int how)
 		write_lock_bh(&idev->lock);
 	}
 #endif
-	while ((ifa = idev->addr_list) != NULL) {
-		idev->addr_list = ifa->if_next;
-		ifa->if_next = NULL;
-		ifa->dead = 1;
-		addrconf_del_timer(ifa);
-		write_unlock_bh(&idev->lock);
+	bifa = &idev->addr_list;
+	while ((ifa = *bifa) != NULL) {
+		if (how == 0 && (ifa->flags&IFA_F_PERMANENT)) {
+			/* Retain permanent address on admin down */
+			bifa = &ifa->if_next;
+
+			/* Restart DAD if needed when link comes back up */
+			if ( !((dev->flags&(IFF_NOARP|IFF_LOOPBACK)) ||
+			       idev->cnf.accept_dad <= 0 ||
+			       (ifa->flags & IFA_F_NODAD)))
+				ifa->flags |= IFA_F_TENTATIVE;
+		} else {
+			*bifa = ifa->if_next;
+			ifa->if_next = NULL;
 
-		__ipv6_ifa_notify(RTM_DELADDR, ifa);
-		atomic_notifier_call_chain(&inet6addr_chain, NETDEV_DOWN, ifa);
-		in6_ifa_put(ifa);
+			ifa->dead = 1;
+			write_unlock_bh(&idev->lock);
 
-		write_lock_bh(&idev->lock);
+			__ipv6_ifa_notify(RTM_DELADDR, ifa);
+			atomic_notifier_call_chain(&inet6addr_chain, NETDEV_DOWN, ifa);
+			in6_ifa_put(ifa);
+
+			write_lock_bh(&idev->lock);
+		}
 	}
 	write_unlock_bh(&idev->lock);
 
-- 
cgit v1.2.2


From 21809fafa042fcfff3f788419bed99f3c289745c Mon Sep 17 00:00:00 2001
From: stephen hemminger <shemminger@vyatta.com>
Date: Mon, 8 Feb 2010 19:48:52 +0000
Subject: IPv6: remove trivial nested _bh suffix

Don't need to disable bottom half it is already down in the
previous lock. Move some blank lines to group locking in same
context.

Signed-off-by: Stephen Hemminger <shemminger@vyatta.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/addrconf.c | 21 +++++++++++----------
 1 file changed, 11 insertions(+), 10 deletions(-)

(limited to 'net')

diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index b0e1430b64f1..764ad37ca070 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -2802,14 +2802,14 @@ static void addrconf_dad_start(struct inet6_ifaddr *ifp, u32 flags)
 	read_lock_bh(&idev->lock);
 	if (ifp->dead)
 		goto out;
-	spin_lock_bh(&ifp->lock);
 
+	spin_lock(&ifp->lock);
 	if (dev->flags&(IFF_NOARP|IFF_LOOPBACK) ||
 	    idev->cnf.accept_dad < 1 ||
 	    !(ifp->flags&IFA_F_TENTATIVE) ||
 	    ifp->flags & IFA_F_NODAD) {
 		ifp->flags &= ~(IFA_F_TENTATIVE|IFA_F_OPTIMISTIC|IFA_F_DADFAILED);
-		spin_unlock_bh(&ifp->lock);
+		spin_unlock(&ifp->lock);
 		read_unlock_bh(&idev->lock);
 
 		addrconf_dad_completed(ifp);
@@ -2817,7 +2817,7 @@ static void addrconf_dad_start(struct inet6_ifaddr *ifp, u32 flags)
 	}
 
 	if (!(idev->if_flags & IF_READY)) {
-		spin_unlock_bh(&ifp->lock);
+		spin_unlock(&ifp->lock);
 		read_unlock_bh(&idev->lock);
 		/*
 		 * If the device is not ready:
@@ -2837,7 +2837,7 @@ static void addrconf_dad_start(struct inet6_ifaddr *ifp, u32 flags)
 		ip6_ins_rt(ifp->rt);
 
 	addrconf_dad_kick(ifp);
-	spin_unlock_bh(&ifp->lock);
+	spin_unlock(&ifp->lock);
 out:
 	read_unlock_bh(&idev->lock);
 }
@@ -2853,14 +2853,15 @@ static void addrconf_dad_timer(unsigned long data)
 		read_unlock_bh(&idev->lock);
 		goto out;
 	}
-	spin_lock_bh(&ifp->lock);
+
+	spin_lock(&ifp->lock);
 	if (ifp->probes == 0) {
 		/*
 		 * DAD was successful
 		 */
 
 		ifp->flags &= ~(IFA_F_TENTATIVE|IFA_F_OPTIMISTIC|IFA_F_DADFAILED);
-		spin_unlock_bh(&ifp->lock);
+		spin_unlock(&ifp->lock);
 		read_unlock_bh(&idev->lock);
 
 		addrconf_dad_completed(ifp);
@@ -2870,7 +2871,7 @@ static void addrconf_dad_timer(unsigned long data)
 
 	ifp->probes--;
 	addrconf_mod_timer(ifp, AC_DAD, ifp->idev->nd_parms->retrans_time);
-	spin_unlock_bh(&ifp->lock);
+	spin_unlock(&ifp->lock);
 	read_unlock_bh(&idev->lock);
 
 	/* send a neighbour solicitation for our addr */
@@ -2918,12 +2919,12 @@ static void addrconf_dad_run(struct inet6_dev *idev) {
 
 	read_lock_bh(&idev->lock);
 	for (ifp = idev->addr_list; ifp; ifp = ifp->if_next) {
-		spin_lock_bh(&ifp->lock);
+		spin_lock(&ifp->lock);
 		if (!(ifp->flags & IFA_F_TENTATIVE)) {
-			spin_unlock_bh(&ifp->lock);
+			spin_unlock(&ifp->lock);
 			continue;
 		}
-		spin_unlock_bh(&ifp->lock);
+		spin_unlock(&ifp->lock);
 		addrconf_dad_kick(ifp);
 	}
 	read_unlock_bh(&idev->lock);
-- 
cgit v1.2.2


From c28e93040b497e895d5c41b54abef3bc8db17fa9 Mon Sep 17 00:00:00 2001
From: jamal <hadi@cyberus.ca>
Date: Tue, 9 Feb 2010 03:59:38 +0000
Subject: xfrm: validate attributes

Some XFRM attributes were not going through basic validation.

Signed-off-by: Jamal Hadi Salim <hadi@cyberus.ca>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/xfrm/xfrm_user.c | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'net')

diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c
index d5a712976004..943c8712bd97 100644
--- a/net/xfrm/xfrm_user.c
+++ b/net/xfrm/xfrm_user.c
@@ -2054,6 +2054,10 @@ static const int xfrm_msg_min[XFRM_NR_MSGTYPES] = {
 #undef XMSGSIZE
 
 static const struct nla_policy xfrma_policy[XFRMA_MAX+1] = {
+	[XFRMA_SA]		= { .len = sizeof(struct xfrm_usersa_info)},
+	[XFRMA_POLICY]		= { .len = sizeof(struct xfrm_userpolicy_info)},
+	[XFRMA_LASTUSED]	= { .type = NLA_U64},
+	[XFRMA_ALG_AUTH_TRUNC]	= { .len = sizeof(struct xfrm_algo_auth)},
 	[XFRMA_ALG_AEAD]	= { .len = sizeof(struct xfrm_algo_aead) },
 	[XFRMA_ALG_AUTH]	= { .len = sizeof(struct xfrm_algo) },
 	[XFRMA_ALG_CRYPT]	= { .len = sizeof(struct xfrm_algo) },
-- 
cgit v1.2.2


From 55d955902a7f78e3e7c9ddbc71a4a050d9d44810 Mon Sep 17 00:00:00 2001
From: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Date: Wed, 10 Feb 2010 20:26:18 +0000
Subject: dccp: support for passing MSG_TRUNC

DCCP is datagram-oriented but lacks UDP's support for MSG_TRUNC as defined in
recvmsg(2)/recv(2). Hence the following 'Hello world\0' receiver

  len = recv(fd, buf, 10, MSG_PEEK | MSG_TRUNC);

wrongly (always) returns 10, while in UDP it returns 12 as expected.
This patch adds the missing MSG_TRUNC support to recvmsg().

Signed-off-by: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/dccp/proto.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'net')

diff --git a/net/dccp/proto.c b/net/dccp/proto.c
index 671cd1413d59..85ec1cb7fd41 100644
--- a/net/dccp/proto.c
+++ b/net/dccp/proto.c
@@ -835,6 +835,8 @@ verify_sock_status:
 			len = -EFAULT;
 			break;
 		}
+		if (flags & MSG_TRUNC)
+			len = skb->len;
 	found_fin_ok:
 		if (!(flags & MSG_PEEK))
 			sk_eat_skb(sk, skb, 0);
-- 
cgit v1.2.2


From 81d54ec8479a2c695760da81f05b5a9fb2dbe40a Mon Sep 17 00:00:00 2001
From: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Date: Wed, 10 Feb 2010 20:26:19 +0000
Subject: udp: remove redundant variable

The variable 'copied' is used in udp_recvmsg() to emphasize that the passed
'len' is adjusted to fit the actual datagram length. But the same can be
done by adjusting 'len' directly. This patch thus removes the indirection.

Signed-off-by: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/udp.c | 15 +++++++--------
 net/ipv6/udp.c | 15 +++++++--------
 2 files changed, 14 insertions(+), 16 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 4f7d2122d818..608a5446d05b 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -1117,7 +1117,7 @@ int udp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
 	struct inet_sock *inet = inet_sk(sk);
 	struct sockaddr_in *sin = (struct sockaddr_in *)msg->msg_name;
 	struct sk_buff *skb;
-	unsigned int ulen, copied;
+	unsigned int ulen;
 	int peeked;
 	int err;
 	int is_udplite = IS_UDPLITE(sk);
@@ -1138,10 +1138,9 @@ try_again:
 		goto out;
 
 	ulen = skb->len - sizeof(struct udphdr);
-	copied = len;
-	if (copied > ulen)
-		copied = ulen;
-	else if (copied < ulen)
+	if (len > ulen)
+		len = ulen;
+	else if (len < ulen)
 		msg->msg_flags |= MSG_TRUNC;
 
 	/*
@@ -1150,14 +1149,14 @@ try_again:
 	 * coverage checksum (UDP-Lite), do it before the copy.
 	 */
 
-	if (copied < ulen || UDP_SKB_CB(skb)->partial_cov) {
+	if (len < ulen || UDP_SKB_CB(skb)->partial_cov) {
 		if (udp_lib_checksum_complete(skb))
 			goto csum_copy_err;
 	}
 
 	if (skb_csum_unnecessary(skb))
 		err = skb_copy_datagram_iovec(skb, sizeof(struct udphdr),
-					      msg->msg_iov, copied);
+					      msg->msg_iov, len);
 	else {
 		err = skb_copy_and_csum_datagram_iovec(skb,
 						       sizeof(struct udphdr),
@@ -1186,7 +1185,7 @@ try_again:
 	if (inet->cmsg_flags)
 		ip_cmsg_recv(msg, skb);
 
-	err = copied;
+	err = len;
 	if (flags & MSG_TRUNC)
 		err = ulen;
 
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index 34efb3589ffa..a7af9d68cd6c 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -322,7 +322,7 @@ int udpv6_recvmsg(struct kiocb *iocb, struct sock *sk,
 	struct ipv6_pinfo *np = inet6_sk(sk);
 	struct inet_sock *inet = inet_sk(sk);
 	struct sk_buff *skb;
-	unsigned int ulen, copied;
+	unsigned int ulen;
 	int peeked;
 	int err;
 	int is_udplite = IS_UDPLITE(sk);
@@ -341,10 +341,9 @@ try_again:
 		goto out;
 
 	ulen = skb->len - sizeof(struct udphdr);
-	copied = len;
-	if (copied > ulen)
-		copied = ulen;
-	else if (copied < ulen)
+	if (len > ulen)
+		len = ulen;
+	else if (len < ulen)
 		msg->msg_flags |= MSG_TRUNC;
 
 	is_udp4 = (skb->protocol == htons(ETH_P_IP));
@@ -355,14 +354,14 @@ try_again:
 	 * coverage checksum (UDP-Lite), do it before the copy.
 	 */
 
-	if (copied < ulen || UDP_SKB_CB(skb)->partial_cov) {
+	if (len < ulen || UDP_SKB_CB(skb)->partial_cov) {
 		if (udp_lib_checksum_complete(skb))
 			goto csum_copy_err;
 	}
 
 	if (skb_csum_unnecessary(skb))
 		err = skb_copy_datagram_iovec(skb, sizeof(struct udphdr),
-					      msg->msg_iov, copied       );
+					      msg->msg_iov,len);
 	else {
 		err = skb_copy_and_csum_datagram_iovec(skb, sizeof(struct udphdr), msg->msg_iov);
 		if (err == -EINVAL)
@@ -411,7 +410,7 @@ try_again:
 			datagram_recv_ctl(sk, msg, skb);
 	}
 
-	err = copied;
+	err = len;
 	if (flags & MSG_TRUNC)
 		err = ulen;
 
-- 
cgit v1.2.2


From ebc08a6f47ee76ecad8e9f26c26e6ec9b46ca659 Mon Sep 17 00:00:00 2001
From: "Williams, Mitch A" <mitch.a.williams@intel.com>
Date: Wed, 10 Feb 2010 01:44:05 +0000
Subject: rtnetlink: Add VF config code to rtnetlink

Add code to allow rtnetlink clients to query and set VF information through
the PF driver.
Signed-off-by: Mitch Williams <mitch.a.williams@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/rtnetlink.c | 67 ++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 67 insertions(+)

(limited to 'net')

diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 62f3878a6010..42da96a4eeee 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -35,6 +35,7 @@
 #include <linux/security.h>
 #include <linux/mutex.h>
 #include <linux/if_addr.h>
+#include <linux/pci.h>
 
 #include <asm/uaccess.h>
 #include <asm/system.h>
@@ -580,6 +581,15 @@ static void copy_rtnl_link_stats(struct rtnl_link_stats *a,
 	a->tx_compressed = b->tx_compressed;
 };
 
+static inline int rtnl_vfinfo_size(const struct net_device *dev)
+{
+	if (dev->dev.parent && dev_is_pci(dev->dev.parent))
+		return dev_num_vf(dev->dev.parent) *
+			sizeof(struct ifla_vf_info);
+	else
+		return 0;
+}
+
 static inline size_t if_nlmsg_size(const struct net_device *dev)
 {
 	return NLMSG_ALIGN(sizeof(struct ifinfomsg))
@@ -597,6 +607,8 @@ static inline size_t if_nlmsg_size(const struct net_device *dev)
 	       + nla_total_size(4) /* IFLA_MASTER */
 	       + nla_total_size(1) /* IFLA_OPERSTATE */
 	       + nla_total_size(1) /* IFLA_LINKMODE */
+	       + nla_total_size(4) /* IFLA_NUM_VF */
+	       + nla_total_size(rtnl_vfinfo_size(dev)) /* IFLA_VFINFO */
 	       + rtnl_link_get_size(dev); /* IFLA_LINKINFO */
 }
 
@@ -665,6 +677,17 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev,
 	stats = dev_get_stats(dev);
 	copy_rtnl_link_stats(nla_data(attr), stats);
 
+	if (dev->netdev_ops->ndo_get_vf_config && dev->dev.parent) {
+		int i;
+		struct ifla_vf_info ivi;
+
+		NLA_PUT_U32(skb, IFLA_NUM_VF, dev_num_vf(dev->dev.parent));
+		for (i = 0; i < dev_num_vf(dev->dev.parent); i++) {
+			if (dev->netdev_ops->ndo_get_vf_config(dev, i, &ivi))
+				break;
+			NLA_PUT(skb, IFLA_VFINFO, sizeof(ivi), &ivi);
+		}
+	}
 	if (dev->rtnl_link_ops) {
 		if (rtnl_link_fill(skb, dev) < 0)
 			goto nla_put_failure;
@@ -725,6 +748,12 @@ const struct nla_policy ifla_policy[IFLA_MAX+1] = {
 	[IFLA_LINKINFO]		= { .type = NLA_NESTED },
 	[IFLA_NET_NS_PID]	= { .type = NLA_U32 },
 	[IFLA_IFALIAS]	        = { .type = NLA_STRING, .len = IFALIASZ-1 },
+	[IFLA_VF_MAC]		= { .type = NLA_BINARY,
+				    .len = sizeof(struct ifla_vf_mac) },
+	[IFLA_VF_VLAN]		= { .type = NLA_BINARY,
+				    .len = sizeof(struct ifla_vf_vlan) },
+	[IFLA_VF_TX_RATE]	= { .type = NLA_BINARY,
+				    .len = sizeof(struct ifla_vf_tx_rate) },
 };
 EXPORT_SYMBOL(ifla_policy);
 
@@ -898,6 +927,44 @@ static int do_setlink(struct net_device *dev, struct ifinfomsg *ifm,
 		write_unlock_bh(&dev_base_lock);
 	}
 
+	if (tb[IFLA_VF_MAC]) {
+		struct ifla_vf_mac *ivm;
+		ivm = nla_data(tb[IFLA_VF_MAC]);
+		write_lock_bh(&dev_base_lock);
+		if (ops->ndo_set_vf_mac)
+			err = ops->ndo_set_vf_mac(dev, ivm->vf, ivm->mac);
+		write_unlock_bh(&dev_base_lock);
+		if (err < 0)
+			goto errout;
+		modified = 1;
+	}
+
+	if (tb[IFLA_VF_VLAN]) {
+		struct ifla_vf_vlan *ivv;
+		ivv = nla_data(tb[IFLA_VF_VLAN]);
+		write_lock_bh(&dev_base_lock);
+		if (ops->ndo_set_vf_vlan)
+			err = ops->ndo_set_vf_vlan(dev, ivv->vf,
+						   (u16)ivv->vlan,
+						   (u8)ivv->qos);
+		write_unlock_bh(&dev_base_lock);
+		if (err < 0)
+			goto errout;
+		modified = 1;
+	}
+	err = 0;
+
+	if (tb[IFLA_VF_TX_RATE]) {
+		struct ifla_vf_tx_rate *ivt;
+		ivt = nla_data(tb[IFLA_VF_TX_RATE]);
+		write_lock_bh(&dev_base_lock);
+		if (ops->ndo_set_vf_tx_rate)
+			err = ops->ndo_set_vf_tx_rate(dev, ivt->vf, ivt->rate);
+		write_unlock_bh(&dev_base_lock);
+		if (err < 0)
+			goto errout;
+		modified = 1;
+	}
 	err = 0;
 
 errout:
-- 
cgit v1.2.2


From 1a5778aa000ebfec7f07eed0ffa2852ffb5d16bb Mon Sep 17 00:00:00 2001
From: Ben Hutchings <ben@decadent.org.uk>
Date: Sun, 14 Feb 2010 22:35:47 -0800
Subject: net: Fix first line of kernel-doc for a few functions

The function name must be followed by a space, hypen, space, and a
short description.

Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sunrpc/rpc_pipe.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/sunrpc/rpc_pipe.c b/net/sunrpc/rpc_pipe.c
index 49278f830367..9ea45383480e 100644
--- a/net/sunrpc/rpc_pipe.c
+++ b/net/sunrpc/rpc_pipe.c
@@ -78,7 +78,7 @@ rpc_timeout_upcall_queue(struct work_struct *work)
 }
 
 /**
- * rpc_queue_upcall
+ * rpc_queue_upcall - queue an upcall message to userspace
  * @inode: inode of upcall pipe on which to queue given message
  * @msg: message to queue
  *
-- 
cgit v1.2.2


From 228da6c2e63b3b2064492b24ef83c07bcc48abbd Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Sun, 14 Feb 2010 22:49:52 -0800
Subject: mac80211: Fix error introduced in netdev_mc_count() changes.

Commit 4cd24eaf0c6ee7f0242e34ee77ec899f255e66b5
("net: use netdev_mc_count and netdev_mc_empty when appropriate")
added this hunk to net/mac80211/iface.c:

 	__dev_addr_unsync(&local->mc_list, &local->mc_count,
-			  &dev->mc_list, &dev->mc_count);
+			  &dev->mc_list, dev->mc_count);

which is definitely not correct, introduced a warning (reported
by Stephen Rothwell):

net/mac80211/iface.c: In function 'ieee80211_stop':
net/mac80211/iface.c:416: warning: passing argument 4 of '__dev_addr_unsync' makes pointer from integer without a cast
include/linux/netdevice.h:1967: note: expected 'int *' but argument is of type 'int'

and is thus reverted here.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/mac80211/iface.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c
index f943f5fa7286..09fff4662e80 100644
--- a/net/mac80211/iface.c
+++ b/net/mac80211/iface.c
@@ -413,7 +413,7 @@ static int ieee80211_stop(struct net_device *dev)
 	netif_addr_lock_bh(dev);
 	spin_lock_bh(&local->filter_lock);
 	__dev_addr_unsync(&local->mc_list, &local->mc_count,
-			  &dev->mc_list, dev->mc_count);
+			  &dev->mc_list, &dev->mc_count);
 	spin_unlock_bh(&local->filter_lock);
 	netif_addr_unlock_bh(dev);
 
-- 
cgit v1.2.2


From 98e6d2d5ee26bf56850a10eb64139c68fb09ba19 Mon Sep 17 00:00:00 2001
From: Jan Engelhardt <jengelh@medozas.de>
Date: Mon, 15 Feb 2010 16:31:35 +0100
Subject: netfilter: xt_recent: inform user when hitcount is too large

It is one of these things that iptables cannot catch and which can
cause "Invalid argument" to be printed. Without a hint in dmesg, it is
not going to be helpful.

Signed-off-by: Jan Engelhardt <jengelh@medozas.de>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 net/netfilter/xt_recent.c | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/netfilter/xt_recent.c b/net/netfilter/xt_recent.c
index 203333107367..132cfaa84cdc 100644
--- a/net/netfilter/xt_recent.c
+++ b/net/netfilter/xt_recent.c
@@ -53,7 +53,7 @@ module_param(ip_list_perms, uint, 0400);
 module_param(ip_list_uid, uint, 0400);
 module_param(ip_list_gid, uint, 0400);
 MODULE_PARM_DESC(ip_list_tot, "number of IPs to remember per list");
-MODULE_PARM_DESC(ip_pkt_list_tot, "number of packets per IP to remember (max. 255)");
+MODULE_PARM_DESC(ip_pkt_list_tot, "number of packets per IP address to remember (max. 255)");
 MODULE_PARM_DESC(ip_list_hash_size, "size of hash table used to look up IPs");
 MODULE_PARM_DESC(ip_list_perms, "permissions on /proc/net/xt_recent/* files");
 MODULE_PARM_DESC(ip_list_uid,"owner of /proc/net/xt_recent/* files");
@@ -306,8 +306,12 @@ static bool recent_mt_check(const struct xt_mtchk_param *par)
 	if ((info->check_set & (XT_RECENT_SET | XT_RECENT_REMOVE)) &&
 	    (info->seconds || info->hit_count))
 		return false;
-	if (info->hit_count > ip_pkt_list_tot)
+	if (info->hit_count > ip_pkt_list_tot) {
+		pr_info(KBUILD_MODNAME ": hitcount (%u) is larger than "
+			"packets to be remembered (%u)\n",
+			info->hit_count, ip_pkt_list_tot);
 		return false;
+	}
 	if (info->name[0] == '\0' ||
 	    strnlen(info->name, XT_RECENT_NAME_LEN) == XT_RECENT_NAME_LEN)
 		return false;
-- 
cgit v1.2.2


From fa96a0e2e67fe34a3d994e3f354a5c229dd14286 Mon Sep 17 00:00:00 2001
From: Jan Engelhardt <jengelh@medozas.de>
Date: Sun, 1 Nov 2009 00:36:40 +0100
Subject: netfilter: iptables: remove unused function arguments

Signed-off-by: Jan Engelhardt <jengelh@medozas.de>
---
 net/ipv4/netfilter/iptable_mangle.c  | 10 +++-------
 net/ipv6/netfilter/ip6table_mangle.c | 10 +++-------
 2 files changed, 6 insertions(+), 14 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/netfilter/iptable_mangle.c b/net/ipv4/netfilter/iptable_mangle.c
index c8333305d631..b9b83464cbf4 100644
--- a/net/ipv4/netfilter/iptable_mangle.c
+++ b/net/ipv4/netfilter/iptable_mangle.c
@@ -36,11 +36,7 @@ static const struct xt_table packet_mangler = {
 };
 
 static unsigned int
-ipt_local_hook(unsigned int hook,
-		   struct sk_buff *skb,
-		   const struct net_device *in,
-		   const struct net_device *out,
-		   int (*okfn)(struct sk_buff *))
+ipt_mangle_out(struct sk_buff *skb, const struct net_device *out)
 {
 	unsigned int ret;
 	const struct iphdr *iph;
@@ -60,7 +56,7 @@ ipt_local_hook(unsigned int hook,
 	daddr = iph->daddr;
 	tos = iph->tos;
 
-	ret = ipt_do_table(skb, hook, in, out,
+	ret = ipt_do_table(skb, NF_INET_LOCAL_OUT, NULL, out,
 			   dev_net(out)->ipv4.iptable_mangle);
 	/* Reroute for ANY change. */
 	if (ret != NF_DROP && ret != NF_STOLEN && ret != NF_QUEUE) {
@@ -86,7 +82,7 @@ iptable_mangle_hook(unsigned int hook,
 		     int (*okfn)(struct sk_buff *))
 {
 	if (hook == NF_INET_LOCAL_OUT)
-		return ipt_local_hook(hook, skb, in, out, okfn);
+		return ipt_mangle_out(skb, out);
 	if (hook == NF_INET_POST_ROUTING)
 		return ipt_do_table(skb, hook, in, out,
 				    dev_net(out)->ipv4.iptable_mangle);
diff --git a/net/ipv6/netfilter/ip6table_mangle.c b/net/ipv6/netfilter/ip6table_mangle.c
index b6216dede52c..7844e557c0ec 100644
--- a/net/ipv6/netfilter/ip6table_mangle.c
+++ b/net/ipv6/netfilter/ip6table_mangle.c
@@ -30,12 +30,8 @@ static const struct xt_table packet_mangler = {
 };
 
 static unsigned int
-ip6t_local_out_hook(unsigned int hook,
-		   struct sk_buff *skb,
-		   const struct net_device *out,
-		   int (*okfn)(struct sk_buff *))
+ip6t_mangle_out(struct sk_buff *skb, const struct net_device *out)
 {
-
 	unsigned int ret;
 	struct in6_addr saddr, daddr;
 	u_int8_t hop_limit;
@@ -60,7 +56,7 @@ ip6t_local_out_hook(unsigned int hook,
 	/* flowlabel and prio (includes version, which shouldn't change either */
 	flowlabel = *((u_int32_t *)ipv6_hdr(skb));
 
-	ret = ip6t_do_table(skb, hook, NULL, out,
+	ret = ip6t_do_table(skb, NF_INET_LOCAL_OUT, NULL, out,
 			    dev_net(out)->ipv6.ip6table_mangle);
 
 	if (ret != NF_DROP && ret != NF_STOLEN &&
@@ -80,7 +76,7 @@ ip6table_mangle_hook(unsigned int hook, struct sk_buff *skb,
 		     int (*okfn)(struct sk_buff *))
 {
 	if (hook == NF_INET_LOCAL_OUT)
-		return ip6t_local_out_hook(hook, skb, out, okfn);
+		return ip6t_mangle_out(skb, out);
 	if (hook == NF_INET_POST_ROUTING)
 		return ip6t_do_table(skb, hook, in, out,
 				     dev_net(out)->ipv6.ip6table_mangle);
-- 
cgit v1.2.2


From b402405d71beed8e4df354844353f66b4e18269f Mon Sep 17 00:00:00 2001
From: Jan Engelhardt <jengelh@medozas.de>
Date: Thu, 25 Jun 2009 18:32:12 +0200
Subject: netfilter: xtables: print details on size mismatch

Print which revision has been used and which size are which
(kernel/user) for easier debugging.

Signed-off-by: Jan Engelhardt <jengelh@medozas.de>
---
 net/netfilter/x_tables.c | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c
index 255ab0657ce8..12503199826f 100644
--- a/net/netfilter/x_tables.c
+++ b/net/netfilter/x_tables.c
@@ -366,8 +366,10 @@ int xt_check_match(struct xt_mtchk_param *par,
 		 * ebt_among is exempt from centralized matchsize checking
 		 * because it uses a dynamic-size data set.
 		 */
-		pr_err("%s_tables: %s match: invalid size %u != %u\n",
+		pr_err("%s_tables: %s.%u match: invalid size "
+		       "%u (kernel) != (user) %u\n",
 		       xt_prefix[par->family], par->match->name,
+		       par->match->revision,
 		       XT_ALIGN(par->match->matchsize), size);
 		return -EINVAL;
 	}
@@ -516,8 +518,10 @@ int xt_check_target(struct xt_tgchk_param *par,
 		    unsigned int size, u_int8_t proto, bool inv_proto)
 {
 	if (XT_ALIGN(par->target->targetsize) != size) {
-		pr_err("%s_tables: %s target: invalid size %u != %u\n",
+		pr_err("%s_tables: %s.%u target: invalid size "
+		       "%u (kernel) != (user) %u\n",
 		       xt_prefix[par->family], par->target->name,
+		       par->target->revision,
 		       XT_ALIGN(par->target->targetsize), size);
 		return -EINVAL;
 	}
-- 
cgit v1.2.2


From 739674fb7febf116e7d647031fab16989a08a965 Mon Sep 17 00:00:00 2001
From: Jan Engelhardt <jengelh@medozas.de>
Date: Fri, 26 Jun 2009 08:23:19 +0200
Subject: netfilter: xtables: constify args in compat copying functions

Signed-off-by: Jan Engelhardt <jengelh@medozas.de>
---
 net/ipv4/netfilter/arp_tables.c | 4 ++--
 net/ipv4/netfilter/ip_tables.c  | 4 ++--
 net/ipv4/netfilter/ipt_ULOG.c   | 4 ++--
 net/ipv6/netfilter/ip6_tables.c | 4 ++--
 net/netfilter/x_tables.c        | 8 ++++----
 net/netfilter/xt_hashlimit.c    | 4 ++--
 net/netfilter/xt_limit.c        | 4 ++--
 7 files changed, 16 insertions(+), 16 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c
index 72723ea1054b..2303dc92a277 100644
--- a/net/ipv4/netfilter/arp_tables.c
+++ b/net/ipv4/netfilter/arp_tables.c
@@ -842,7 +842,7 @@ static int copy_entries_to_user(unsigned int total_size,
 }
 
 #ifdef CONFIG_COMPAT
-static void compat_standard_from_user(void *dst, void *src)
+static void compat_standard_from_user(void *dst, const void *src)
 {
 	int v = *(compat_int_t *)src;
 
@@ -851,7 +851,7 @@ static void compat_standard_from_user(void *dst, void *src)
 	memcpy(dst, &v, sizeof(v));
 }
 
-static int compat_standard_to_user(void __user *dst, void *src)
+static int compat_standard_to_user(void __user *dst, const void *src)
 {
 	compat_int_t cv = *(int *)src;
 
diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c
index 2057b1bb6178..2a4f745ce36e 100644
--- a/net/ipv4/netfilter/ip_tables.c
+++ b/net/ipv4/netfilter/ip_tables.c
@@ -1047,7 +1047,7 @@ copy_entries_to_user(unsigned int total_size,
 }
 
 #ifdef CONFIG_COMPAT
-static void compat_standard_from_user(void *dst, void *src)
+static void compat_standard_from_user(void *dst, const void *src)
 {
 	int v = *(compat_int_t *)src;
 
@@ -1056,7 +1056,7 @@ static void compat_standard_from_user(void *dst, void *src)
 	memcpy(dst, &v, sizeof(v));
 }
 
-static int compat_standard_to_user(void __user *dst, void *src)
+static int compat_standard_to_user(void __user *dst, const void *src)
 {
 	compat_int_t cv = *(int *)src;
 
diff --git a/net/ipv4/netfilter/ipt_ULOG.c b/net/ipv4/netfilter/ipt_ULOG.c
index 399061c3fd7d..09a5d3f7cc41 100644
--- a/net/ipv4/netfilter/ipt_ULOG.c
+++ b/net/ipv4/netfilter/ipt_ULOG.c
@@ -338,7 +338,7 @@ struct compat_ipt_ulog_info {
 	char		prefix[ULOG_PREFIX_LEN];
 };
 
-static void ulog_tg_compat_from_user(void *dst, void *src)
+static void ulog_tg_compat_from_user(void *dst, const void *src)
 {
 	const struct compat_ipt_ulog_info *cl = src;
 	struct ipt_ulog_info l = {
@@ -351,7 +351,7 @@ static void ulog_tg_compat_from_user(void *dst, void *src)
 	memcpy(dst, &l, sizeof(l));
 }
 
-static int ulog_tg_compat_to_user(void __user *dst, void *src)
+static int ulog_tg_compat_to_user(void __user *dst, const void *src)
 {
 	const struct ipt_ulog_info *l = src;
 	struct compat_ipt_ulog_info cl = {
diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c
index dcd7825fe7b6..3ff4fd50e96e 100644
--- a/net/ipv6/netfilter/ip6_tables.c
+++ b/net/ipv6/netfilter/ip6_tables.c
@@ -1079,7 +1079,7 @@ copy_entries_to_user(unsigned int total_size,
 }
 
 #ifdef CONFIG_COMPAT
-static void compat_standard_from_user(void *dst, void *src)
+static void compat_standard_from_user(void *dst, const void *src)
 {
 	int v = *(compat_int_t *)src;
 
@@ -1088,7 +1088,7 @@ static void compat_standard_from_user(void *dst, void *src)
 	memcpy(dst, &v, sizeof(v));
 }
 
-static int compat_standard_to_user(void __user *dst, void *src)
+static int compat_standard_to_user(void __user *dst, const void *src)
 {
 	compat_int_t cv = *(int *)src;
 
diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c
index 12503199826f..69c56287d518 100644
--- a/net/netfilter/x_tables.c
+++ b/net/netfilter/x_tables.c
@@ -485,8 +485,8 @@ int xt_compat_match_from_user(struct xt_entry_match *m, void **dstptr,
 }
 EXPORT_SYMBOL_GPL(xt_compat_match_from_user);
 
-int xt_compat_match_to_user(struct xt_entry_match *m, void __user **dstptr,
-			    unsigned int *size)
+int xt_compat_match_to_user(const struct xt_entry_match *m,
+			    void __user **dstptr, unsigned int *size)
 {
 	const struct xt_match *match = m->u.kernel.match;
 	struct compat_xt_entry_match __user *cm = *dstptr;
@@ -588,8 +588,8 @@ void xt_compat_target_from_user(struct xt_entry_target *t, void **dstptr,
 }
 EXPORT_SYMBOL_GPL(xt_compat_target_from_user);
 
-int xt_compat_target_to_user(struct xt_entry_target *t, void __user **dstptr,
-			     unsigned int *size)
+int xt_compat_target_to_user(const struct xt_entry_target *t,
+			     void __user **dstptr, unsigned int *size)
 {
 	const struct xt_target *target = t->u.kernel.target;
 	struct compat_xt_entry_target __user *ct = *dstptr;
diff --git a/net/netfilter/xt_hashlimit.c b/net/netfilter/xt_hashlimit.c
index 017c95966aa8..e47fb805ffb4 100644
--- a/net/netfilter/xt_hashlimit.c
+++ b/net/netfilter/xt_hashlimit.c
@@ -775,7 +775,7 @@ struct compat_xt_hashlimit_info {
 	compat_uptr_t master;
 };
 
-static void hashlimit_mt_compat_from_user(void *dst, void *src)
+static void hashlimit_mt_compat_from_user(void *dst, const void *src)
 {
 	int off = offsetof(struct compat_xt_hashlimit_info, hinfo);
 
@@ -783,7 +783,7 @@ static void hashlimit_mt_compat_from_user(void *dst, void *src)
 	memset(dst + off, 0, sizeof(struct compat_xt_hashlimit_info) - off);
 }
 
-static int hashlimit_mt_compat_to_user(void __user *dst, void *src)
+static int hashlimit_mt_compat_to_user(void __user *dst, const void *src)
 {
 	int off = offsetof(struct compat_xt_hashlimit_info, hinfo);
 
diff --git a/net/netfilter/xt_limit.c b/net/netfilter/xt_limit.c
index 2773be6a71dd..a0ca5339af41 100644
--- a/net/netfilter/xt_limit.c
+++ b/net/netfilter/xt_limit.c
@@ -148,7 +148,7 @@ struct compat_xt_rateinfo {
 
 /* To keep the full "prev" timestamp, the upper 32 bits are stored in the
  * master pointer, which does not need to be preserved. */
-static void limit_mt_compat_from_user(void *dst, void *src)
+static void limit_mt_compat_from_user(void *dst, const void *src)
 {
 	const struct compat_xt_rateinfo *cm = src;
 	struct xt_rateinfo m = {
@@ -162,7 +162,7 @@ static void limit_mt_compat_from_user(void *dst, void *src)
 	memcpy(dst, &m, sizeof(m));
 }
 
-static int limit_mt_compat_to_user(void __user *dst, void *src)
+static int limit_mt_compat_to_user(void __user *dst, const void *src)
 {
 	const struct xt_rateinfo *m = src;
 	struct compat_xt_rateinfo cm = {
-- 
cgit v1.2.2


From d5d1baa15f5b05e9110403724d5dc72d6d541e04 Mon Sep 17 00:00:00 2001
From: Jan Engelhardt <jengelh@medozas.de>
Date: Fri, 26 Jun 2009 07:51:59 +0200
Subject: netfilter: xtables: add const qualifiers

This should make it easier to remove redundant arguments later.

Signed-off-by: Jan Engelhardt <jengelh@medozas.de>
---
 net/bridge/netfilter/ebtables.c | 59 ++++++++++++++-------------
 net/ipv4/netfilter/arp_tables.c | 69 ++++++++++++++++++--------------
 net/ipv4/netfilter/ip_tables.c  | 88 +++++++++++++++++++++++------------------
 net/ipv6/netfilter/ip6_tables.c | 88 ++++++++++++++++++++++-------------------
 4 files changed, 168 insertions(+), 136 deletions(-)

(limited to 'net')

diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c
index 208f4e32e732..bcdf02d866b8 100644
--- a/net/bridge/netfilter/ebtables.c
+++ b/net/bridge/netfilter/ebtables.c
@@ -82,7 +82,8 @@ static inline int ebt_do_match (struct ebt_entry_match *m,
 	return m->u.match->match(skb, par) ? EBT_MATCH : EBT_NOMATCH;
 }
 
-static inline int ebt_dev_check(char *entry, const struct net_device *device)
+static inline int
+ebt_dev_check(const char *entry, const struct net_device *device)
 {
 	int i = 0;
 	const char *devname;
@@ -100,8 +101,9 @@ static inline int ebt_dev_check(char *entry, const struct net_device *device)
 
 #define FWINV2(bool,invflg) ((bool) ^ !!(e->invflags & invflg))
 /* process standard matches */
-static inline int ebt_basic_match(struct ebt_entry *e, struct ethhdr *h,
-   const struct net_device *in, const struct net_device *out)
+static inline int
+ebt_basic_match(const struct ebt_entry *e, const struct ethhdr *h,
+                const struct net_device *in, const struct net_device *out)
 {
 	int verdict, i;
 
@@ -156,12 +158,12 @@ unsigned int ebt_do_table (unsigned int hook, struct sk_buff *skb,
 	int i, nentries;
 	struct ebt_entry *point;
 	struct ebt_counter *counter_base, *cb_base;
-	struct ebt_entry_target *t;
+	const struct ebt_entry_target *t;
 	int verdict, sp = 0;
 	struct ebt_chainstack *cs;
 	struct ebt_entries *chaininfo;
-	char *base;
-	struct ebt_table_info *private;
+	const char *base;
+	const struct ebt_table_info *private;
 	bool hotdrop = false;
 	struct xt_match_param mtpar;
 	struct xt_target_param tgpar;
@@ -395,7 +397,7 @@ ebt_check_watcher(struct ebt_entry_watcher *w, struct xt_tgchk_param *par,
 	return 0;
 }
 
-static int ebt_verify_pointers(struct ebt_replace *repl,
+static int ebt_verify_pointers(const struct ebt_replace *repl,
 			       struct ebt_table_info *newinfo)
 {
 	unsigned int limit = repl->entries_size;
@@ -466,8 +468,8 @@ static int ebt_verify_pointers(struct ebt_replace *repl,
  * to parse the userspace data
  */
 static inline int
-ebt_check_entry_size_and_hooks(struct ebt_entry *e,
-   struct ebt_table_info *newinfo,
+ebt_check_entry_size_and_hooks(const struct ebt_entry *e,
+   const struct ebt_table_info *newinfo,
    unsigned int *n, unsigned int *cnt,
    unsigned int *totalcnt, unsigned int *udc_cnt)
 {
@@ -622,9 +624,8 @@ ebt_cleanup_entry(struct ebt_entry *e, struct net *net, unsigned int *cnt)
 }
 
 static inline int
-ebt_check_entry(struct ebt_entry *e,
-   struct net *net,
-   struct ebt_table_info *newinfo,
+ebt_check_entry(struct ebt_entry *e, struct net *net,
+   const struct ebt_table_info *newinfo,
    const char *name, unsigned int *cnt,
    struct ebt_cl_stack *cl_s, unsigned int udc_cnt)
 {
@@ -743,12 +744,12 @@ cleanup_matches:
  * the hook mask for udc tells us from which base chains the udc can be
  * accessed. This mask is a parameter to the check() functions of the extensions
  */
-static int check_chainloops(struct ebt_entries *chain, struct ebt_cl_stack *cl_s,
+static int check_chainloops(const struct ebt_entries *chain, struct ebt_cl_stack *cl_s,
    unsigned int udc_cnt, unsigned int hooknr, char *base)
 {
 	int i, chain_nr = -1, pos = 0, nentries = chain->nentries, verdict;
-	struct ebt_entry *e = (struct ebt_entry *)chain->data;
-	struct ebt_entry_target *t;
+	const struct ebt_entry *e = (struct ebt_entry *)chain->data;
+	const struct ebt_entry_target *t;
 
 	while (pos < nentries || chain_nr != -1) {
 		/* end of udc, go back one 'recursion' step */
@@ -814,7 +815,7 @@ letscontinue:
 }
 
 /* do the parsing of the table/chains/entries/matches/watchers/targets, heh */
-static int translate_table(struct net *net, char *name,
+static int translate_table(struct net *net, const char *name,
 			   struct ebt_table_info *newinfo)
 {
 	unsigned int i, j, k, udc_cnt;
@@ -934,7 +935,7 @@ static int translate_table(struct net *net, char *name,
 }
 
 /* called under write_lock */
-static void get_counters(struct ebt_counter *oldcounters,
+static void get_counters(const struct ebt_counter *oldcounters,
    struct ebt_counter *counters, unsigned int nentries)
 {
 	int i, cpu;
@@ -957,7 +958,8 @@ static void get_counters(struct ebt_counter *oldcounters,
 }
 
 /* replace the table */
-static int do_replace(struct net *net, void __user *user, unsigned int len)
+static int do_replace(struct net *net, const void __user *user,
+		      unsigned int len)
 {
 	int ret, i, countersize;
 	struct ebt_table_info *newinfo;
@@ -1237,7 +1239,8 @@ void ebt_unregister_table(struct net *net, struct ebt_table *table)
 }
 
 /* userspace just supplied us with counters */
-static int update_counters(struct net *net, void __user *user, unsigned int len)
+static int update_counters(struct net *net, const void __user *user,
+			   unsigned int len)
 {
 	int i, ret;
 	struct ebt_counter *tmp;
@@ -1292,8 +1295,8 @@ free_tmp:
 	return ret;
 }
 
-static inline int ebt_make_matchname(struct ebt_entry_match *m,
-   char *base, char __user *ubase)
+static inline int ebt_make_matchname(const struct ebt_entry_match *m,
+    const char *base, char __user *ubase)
 {
 	char __user *hlp = ubase + ((char *)m - base);
 	if (copy_to_user(hlp, m->u.match->name, EBT_FUNCTION_MAXNAMELEN))
@@ -1301,8 +1304,8 @@ static inline int ebt_make_matchname(struct ebt_entry_match *m,
 	return 0;
 }
 
-static inline int ebt_make_watchername(struct ebt_entry_watcher *w,
-   char *base, char __user *ubase)
+static inline int ebt_make_watchername(const struct ebt_entry_watcher *w,
+    const char *base, char __user *ubase)
 {
 	char __user *hlp = ubase + ((char *)w - base);
 	if (copy_to_user(hlp , w->u.watcher->name, EBT_FUNCTION_MAXNAMELEN))
@@ -1310,11 +1313,12 @@ static inline int ebt_make_watchername(struct ebt_entry_watcher *w,
 	return 0;
 }
 
-static inline int ebt_make_names(struct ebt_entry *e, char *base, char __user *ubase)
+static inline int
+ebt_make_names(struct ebt_entry *e, const char *base, char __user *ubase)
 {
 	int ret;
 	char __user *hlp;
-	struct ebt_entry_target *t;
+	const struct ebt_entry_target *t;
 
 	if (e->bitmask == 0)
 		return 0;
@@ -1335,10 +1339,11 @@ static inline int ebt_make_names(struct ebt_entry *e, char *base, char __user *u
 
 /* called with ebt_mutex locked */
 static int copy_everything_to_user(struct ebt_table *t, void __user *user,
-   int *len, int cmd)
+    const int *len, int cmd)
 {
 	struct ebt_replace tmp;
-	struct ebt_counter *counterstmp, *oldcounters;
+	struct ebt_counter *counterstmp;
+	const struct ebt_counter *oldcounters;
 	unsigned int entries_size, nentries;
 	char *entries;
 
diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c
index 2303dc92a277..4db5c1ece0f9 100644
--- a/net/ipv4/netfilter/arp_tables.c
+++ b/net/ipv4/netfilter/arp_tables.c
@@ -233,7 +233,14 @@ arpt_error(struct sk_buff *skb, const struct xt_target_param *par)
 	return NF_DROP;
 }
 
-static inline struct arpt_entry *get_entry(void *base, unsigned int offset)
+static inline const struct arpt_entry_target *
+arpt_get_target_c(const struct arpt_entry *e)
+{
+	return arpt_get_target((struct arpt_entry *)e);
+}
+
+static inline struct arpt_entry *
+get_entry(const void *base, unsigned int offset)
 {
 	return (struct arpt_entry *)(base + offset);
 }
@@ -280,7 +287,7 @@ unsigned int arpt_do_table(struct sk_buff *skb,
 
 	arp = arp_hdr(skb);
 	do {
-		struct arpt_entry_target *t;
+		const struct arpt_entry_target *t;
 		int hdr_len;
 
 		if (!arp_packet_match(arp, skb->dev, indev, outdev, &e->arp)) {
@@ -292,7 +299,7 @@ unsigned int arpt_do_table(struct sk_buff *skb,
 			(2 * skb->dev->addr_len);
 		ADD_COUNTER(e->counters, hdr_len, 1);
 
-		t = arpt_get_target(e);
+		t = arpt_get_target_c(e);
 
 		/* Standard target? */
 		if (!t->u.kernel.target->target) {
@@ -358,7 +365,7 @@ static inline bool unconditional(const struct arpt_arp *arp)
 /* Figures out from what hook each rule can be called: returns 0 if
  * there are loops.  Puts hook bitmask in comefrom.
  */
-static int mark_source_chains(struct xt_table_info *newinfo,
+static int mark_source_chains(const struct xt_table_info *newinfo,
 			      unsigned int valid_hooks, void *entry0)
 {
 	unsigned int hook;
@@ -379,7 +386,7 @@ static int mark_source_chains(struct xt_table_info *newinfo,
 
 		for (;;) {
 			const struct arpt_standard_target *t
-				= (void *)arpt_get_target(e);
+				= (void *)arpt_get_target_c(e);
 			int visited = e->comefrom & (1 << hook);
 
 			if (e->comefrom & (1 << NF_ARP_NUMHOOKS)) {
@@ -463,7 +470,7 @@ static int mark_source_chains(struct xt_table_info *newinfo,
 	return 1;
 }
 
-static inline int check_entry(struct arpt_entry *e, const char *name)
+static inline int check_entry(const struct arpt_entry *e, const char *name)
 {
 	const struct arpt_entry_target *t;
 
@@ -475,7 +482,7 @@ static inline int check_entry(struct arpt_entry *e, const char *name)
 	if (e->target_offset + sizeof(struct arpt_entry_target) > e->next_offset)
 		return -EINVAL;
 
-	t = arpt_get_target(e);
+	t = arpt_get_target_c(e);
 	if (e->target_offset + t->u.target_size > e->next_offset)
 		return -EINVAL;
 
@@ -540,14 +547,14 @@ out:
 	return ret;
 }
 
-static bool check_underflow(struct arpt_entry *e)
+static bool check_underflow(const struct arpt_entry *e)
 {
 	const struct arpt_entry_target *t;
 	unsigned int verdict;
 
 	if (!unconditional(&e->arp))
 		return false;
-	t = arpt_get_target(e);
+	t = arpt_get_target_c(e);
 	if (strcmp(t->u.user.name, XT_STANDARD_TARGET) != 0)
 		return false;
 	verdict = ((struct arpt_standard_target *)t)->verdict;
@@ -557,8 +564,8 @@ static bool check_underflow(struct arpt_entry *e)
 
 static inline int check_entry_size_and_hooks(struct arpt_entry *e,
 					     struct xt_table_info *newinfo,
-					     unsigned char *base,
-					     unsigned char *limit,
+					     const unsigned char *base,
+					     const unsigned char *limit,
 					     const unsigned int *hook_entries,
 					     const unsigned int *underflows,
 					     unsigned int valid_hooks,
@@ -768,11 +775,11 @@ static void get_counters(const struct xt_table_info *t,
 	local_bh_enable();
 }
 
-static struct xt_counters *alloc_counters(struct xt_table *table)
+static struct xt_counters *alloc_counters(const struct xt_table *table)
 {
 	unsigned int countersize;
 	struct xt_counters *counters;
-	struct xt_table_info *private = table->private;
+	const struct xt_table_info *private = table->private;
 
 	/* We need atomic snapshot of counters: rest doesn't change
 	 * (other than comefrom, which userspace doesn't care
@@ -790,11 +797,11 @@ static struct xt_counters *alloc_counters(struct xt_table *table)
 }
 
 static int copy_entries_to_user(unsigned int total_size,
-				struct xt_table *table,
+				const struct xt_table *table,
 				void __user *userptr)
 {
 	unsigned int off, num;
-	struct arpt_entry *e;
+	const struct arpt_entry *e;
 	struct xt_counters *counters;
 	struct xt_table_info *private = table->private;
 	int ret = 0;
@@ -814,7 +821,7 @@ static int copy_entries_to_user(unsigned int total_size,
 	/* FIXME: use iterator macros --RR */
 	/* ... then go back and fix counters and names */
 	for (off = 0, num = 0; off < total_size; off += e->next_offset, num++){
-		struct arpt_entry_target *t;
+		const struct arpt_entry_target *t;
 
 		e = (struct arpt_entry *)(loc_cpu_entry + off);
 		if (copy_to_user(userptr + off
@@ -825,7 +832,7 @@ static int copy_entries_to_user(unsigned int total_size,
 			goto free_counters;
 		}
 
-		t = arpt_get_target(e);
+		t = arpt_get_target_c(e);
 		if (copy_to_user(userptr + off + e->target_offset
 				 + offsetof(struct arpt_entry_target,
 					    u.user.name),
@@ -860,18 +867,18 @@ static int compat_standard_to_user(void __user *dst, const void *src)
 	return copy_to_user(dst, &cv, sizeof(cv)) ? -EFAULT : 0;
 }
 
-static int compat_calc_entry(struct arpt_entry *e,
+static int compat_calc_entry(const struct arpt_entry *e,
 			     const struct xt_table_info *info,
-			     void *base, struct xt_table_info *newinfo)
+			     const void *base, struct xt_table_info *newinfo)
 {
-	struct arpt_entry_target *t;
+	const struct arpt_entry_target *t;
 	unsigned int entry_offset;
 	int off, i, ret;
 
 	off = sizeof(struct arpt_entry) - sizeof(struct compat_arpt_entry);
 	entry_offset = (void *)e - base;
 
-	t = arpt_get_target(e);
+	t = arpt_get_target_c(e);
 	off += xt_compat_target_offset(t->u.kernel.target);
 	newinfo->size -= off;
 	ret = xt_compat_add_offset(NFPROTO_ARP, entry_offset, off);
@@ -907,7 +914,8 @@ static int compat_table_info(const struct xt_table_info *info,
 }
 #endif
 
-static int get_info(struct net *net, void __user *user, int *len, int compat)
+static int get_info(struct net *net, void __user *user,
+                    const int *len, int compat)
 {
 	char name[ARPT_TABLE_MAXNAMELEN];
 	struct xt_table *t;
@@ -966,7 +974,7 @@ static int get_info(struct net *net, void __user *user, int *len, int compat)
 }
 
 static int get_entries(struct net *net, struct arpt_get_entries __user *uptr,
-		       int *len)
+		       const int *len)
 {
 	int ret;
 	struct arpt_get_entries get;
@@ -1080,7 +1088,8 @@ static int __do_replace(struct net *net, const char *name,
 	return ret;
 }
 
-static int do_replace(struct net *net, void __user *user, unsigned int len)
+static int do_replace(struct net *net, const void __user *user,
+                      unsigned int len)
 {
 	int ret;
 	struct arpt_replace tmp;
@@ -1140,8 +1149,8 @@ add_counter_to_entry(struct arpt_entry *e,
 	return 0;
 }
 
-static int do_add_counters(struct net *net, void __user *user, unsigned int len,
-			   int compat)
+static int do_add_counters(struct net *net, const void __user *user,
+			   unsigned int len, int compat)
 {
 	unsigned int i, curcpu;
 	struct xt_counters_info tmp;
@@ -1245,10 +1254,10 @@ static inline int
 check_compat_entry_size_and_hooks(struct compat_arpt_entry *e,
 				  struct xt_table_info *newinfo,
 				  unsigned int *size,
-				  unsigned char *base,
-				  unsigned char *limit,
-				  unsigned int *hook_entries,
-				  unsigned int *underflows,
+				  const unsigned char *base,
+				  const unsigned char *limit,
+				  const unsigned int *hook_entries,
+				  const unsigned int *underflows,
 				  unsigned int *i,
 				  const char *name)
 {
diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c
index 2a4f745ce36e..e94c18bdfc68 100644
--- a/net/ipv4/netfilter/ip_tables.c
+++ b/net/ipv4/netfilter/ip_tables.c
@@ -176,7 +176,7 @@ ipt_error(struct sk_buff *skb, const struct xt_target_param *par)
 
 /* Performance critical - called for every packet */
 static inline bool
-do_match(struct ipt_entry_match *m, const struct sk_buff *skb,
+do_match(const struct ipt_entry_match *m, const struct sk_buff *skb,
 	 struct xt_match_param *par)
 {
 	par->match     = m->u.kernel.match;
@@ -191,7 +191,7 @@ do_match(struct ipt_entry_match *m, const struct sk_buff *skb,
 
 /* Performance critical */
 static inline struct ipt_entry *
-get_entry(void *base, unsigned int offset)
+get_entry(const void *base, unsigned int offset)
 {
 	return (struct ipt_entry *)(base + offset);
 }
@@ -206,6 +206,13 @@ static inline bool unconditional(const struct ipt_ip *ip)
 #undef FWINV
 }
 
+/* for const-correctness */
+static inline const struct ipt_entry_target *
+ipt_get_target_c(const struct ipt_entry *e)
+{
+	return ipt_get_target((struct ipt_entry *)e);
+}
+
 #if defined(CONFIG_NETFILTER_XT_TARGET_TRACE) || \
     defined(CONFIG_NETFILTER_XT_TARGET_TRACE_MODULE)
 static const char *const hooknames[] = {
@@ -240,11 +247,11 @@ static struct nf_loginfo trace_loginfo = {
 
 /* Mildly perf critical (only if packet tracing is on) */
 static inline int
-get_chainname_rulenum(struct ipt_entry *s, struct ipt_entry *e,
+get_chainname_rulenum(const struct ipt_entry *s, const struct ipt_entry *e,
 		      const char *hookname, const char **chainname,
 		      const char **comment, unsigned int *rulenum)
 {
-	struct ipt_standard_target *t = (void *)ipt_get_target(s);
+	const struct ipt_standard_target *t = (void *)ipt_get_target_c(s);
 
 	if (strcmp(t->target.u.kernel.target->name, IPT_ERROR_TARGET) == 0) {
 		/* Head of user chain: ERROR target with chainname */
@@ -270,15 +277,15 @@ get_chainname_rulenum(struct ipt_entry *s, struct ipt_entry *e,
 	return 0;
 }
 
-static void trace_packet(struct sk_buff *skb,
+static void trace_packet(const struct sk_buff *skb,
 			 unsigned int hook,
 			 const struct net_device *in,
 			 const struct net_device *out,
 			 const char *tablename,
-			 struct xt_table_info *private,
-			 struct ipt_entry *e)
+			 const struct xt_table_info *private,
+			 const struct ipt_entry *e)
 {
-	void *table_base;
+	const void *table_base;
 	const struct ipt_entry *root;
 	const char *hookname, *chainname, *comment;
 	unsigned int rulenum = 0;
@@ -322,9 +329,9 @@ ipt_do_table(struct sk_buff *skb,
 	/* Initializing verdict to NF_DROP keeps gcc happy. */
 	unsigned int verdict = NF_DROP;
 	const char *indev, *outdev;
-	void *table_base;
+	const void *table_base;
 	struct ipt_entry *e, *back;
-	struct xt_table_info *private;
+	const struct xt_table_info *private;
 	struct xt_match_param mtpar;
 	struct xt_target_param tgpar;
 
@@ -357,7 +364,7 @@ ipt_do_table(struct sk_buff *skb,
 	back = get_entry(table_base, private->underflow[hook]);
 
 	do {
-		struct ipt_entry_target *t;
+		const struct ipt_entry_target *t;
 
 		IP_NF_ASSERT(e);
 		IP_NF_ASSERT(back);
@@ -450,7 +457,7 @@ ipt_do_table(struct sk_buff *skb,
 /* Figures out from what hook each rule can be called: returns 0 if
    there are loops.  Puts hook bitmask in comefrom. */
 static int
-mark_source_chains(struct xt_table_info *newinfo,
+mark_source_chains(const struct xt_table_info *newinfo,
 		   unsigned int valid_hooks, void *entry0)
 {
 	unsigned int hook;
@@ -468,8 +475,8 @@ mark_source_chains(struct xt_table_info *newinfo,
 		e->counters.pcnt = pos;
 
 		for (;;) {
-			struct ipt_standard_target *t
-				= (void *)ipt_get_target(e);
+			const struct ipt_standard_target *t
+				= (void *)ipt_get_target_c(e);
 			int visited = e->comefrom & (1 << hook);
 
 			if (e->comefrom & (1 << NF_INET_NUMHOOKS)) {
@@ -578,9 +585,9 @@ cleanup_match(struct ipt_entry_match *m, struct net *net, unsigned int *i)
 }
 
 static int
-check_entry(struct ipt_entry *e, const char *name)
+check_entry(const struct ipt_entry *e, const char *name)
 {
-	struct ipt_entry_target *t;
+	const struct ipt_entry_target *t;
 
 	if (!ip_checkentry(&e->ip)) {
 		duprintf("ip_tables: ip check failed %p %s.\n", e, name);
@@ -591,7 +598,7 @@ check_entry(struct ipt_entry *e, const char *name)
 	    e->next_offset)
 		return -EINVAL;
 
-	t = ipt_get_target(e);
+	t = ipt_get_target_c(e);
 	if (e->target_offset + t->u.target_size > e->next_offset)
 		return -EINVAL;
 
@@ -718,14 +725,14 @@ find_check_entry(struct ipt_entry *e, struct net *net, const char *name,
 	return ret;
 }
 
-static bool check_underflow(struct ipt_entry *e)
+static bool check_underflow(const struct ipt_entry *e)
 {
 	const struct ipt_entry_target *t;
 	unsigned int verdict;
 
 	if (!unconditional(&e->ip))
 		return false;
-	t = ipt_get_target(e);
+	t = ipt_get_target_c(e);
 	if (strcmp(t->u.user.name, XT_STANDARD_TARGET) != 0)
 		return false;
 	verdict = ((struct ipt_standard_target *)t)->verdict;
@@ -736,8 +743,8 @@ static bool check_underflow(struct ipt_entry *e)
 static int
 check_entry_size_and_hooks(struct ipt_entry *e,
 			   struct xt_table_info *newinfo,
-			   unsigned char *base,
-			   unsigned char *limit,
+			   const unsigned char *base,
+			   const unsigned char *limit,
 			   const unsigned int *hook_entries,
 			   const unsigned int *underflows,
 			   unsigned int valid_hooks,
@@ -952,11 +959,11 @@ get_counters(const struct xt_table_info *t,
 	local_bh_enable();
 }
 
-static struct xt_counters * alloc_counters(struct xt_table *table)
+static struct xt_counters *alloc_counters(const struct xt_table *table)
 {
 	unsigned int countersize;
 	struct xt_counters *counters;
-	struct xt_table_info *private = table->private;
+	const struct xt_table_info *private = table->private;
 
 	/* We need atomic snapshot of counters: rest doesn't change
 	   (other than comefrom, which userspace doesn't care
@@ -974,11 +981,11 @@ static struct xt_counters * alloc_counters(struct xt_table *table)
 
 static int
 copy_entries_to_user(unsigned int total_size,
-		     struct xt_table *table,
+		     const struct xt_table *table,
 		     void __user *userptr)
 {
 	unsigned int off, num;
-	struct ipt_entry *e;
+	const struct ipt_entry *e;
 	struct xt_counters *counters;
 	const struct xt_table_info *private = table->private;
 	int ret = 0;
@@ -1030,7 +1037,7 @@ copy_entries_to_user(unsigned int total_size,
 			}
 		}
 
-		t = ipt_get_target(e);
+		t = ipt_get_target_c(e);
 		if (copy_to_user(userptr + off + e->target_offset
 				 + offsetof(struct ipt_entry_target,
 					    u.user.name),
@@ -1066,24 +1073,24 @@ static int compat_standard_to_user(void __user *dst, const void *src)
 }
 
 static inline int
-compat_calc_match(struct ipt_entry_match *m, int *size)
+compat_calc_match(const struct ipt_entry_match *m, int *size)
 {
 	*size += xt_compat_match_offset(m->u.kernel.match);
 	return 0;
 }
 
-static int compat_calc_entry(struct ipt_entry *e,
+static int compat_calc_entry(const struct ipt_entry *e,
 			     const struct xt_table_info *info,
-			     void *base, struct xt_table_info *newinfo)
+			     const void *base, struct xt_table_info *newinfo)
 {
-	struct ipt_entry_target *t;
+	const struct ipt_entry_target *t;
 	unsigned int entry_offset;
 	int off, i, ret;
 
 	off = sizeof(struct ipt_entry) - sizeof(struct compat_ipt_entry);
 	entry_offset = (void *)e - base;
 	IPT_MATCH_ITERATE(e, compat_calc_match, &off);
-	t = ipt_get_target(e);
+	t = ipt_get_target_c(e);
 	off += xt_compat_target_offset(t->u.kernel.target);
 	newinfo->size -= off;
 	ret = xt_compat_add_offset(AF_INET, entry_offset, off);
@@ -1119,7 +1126,8 @@ static int compat_table_info(const struct xt_table_info *info,
 }
 #endif
 
-static int get_info(struct net *net, void __user *user, int *len, int compat)
+static int get_info(struct net *net, void __user *user,
+                    const int *len, int compat)
 {
 	char name[IPT_TABLE_MAXNAMELEN];
 	struct xt_table *t;
@@ -1179,7 +1187,8 @@ static int get_info(struct net *net, void __user *user, int *len, int compat)
 }
 
 static int
-get_entries(struct net *net, struct ipt_get_entries __user *uptr, int *len)
+get_entries(struct net *net, struct ipt_get_entries __user *uptr,
+	    const int *len)
 {
 	int ret;
 	struct ipt_get_entries get;
@@ -1289,7 +1298,7 @@ __do_replace(struct net *net, const char *name, unsigned int valid_hooks,
 }
 
 static int
-do_replace(struct net *net, void __user *user, unsigned int len)
+do_replace(struct net *net, const void __user *user, unsigned int len)
 {
 	int ret;
 	struct ipt_replace tmp;
@@ -1350,7 +1359,8 @@ add_counter_to_entry(struct ipt_entry *e,
 }
 
 static int
-do_add_counters(struct net *net, void __user *user, unsigned int len, int compat)
+do_add_counters(struct net *net, const void __user *user,
+                unsigned int len, int compat)
 {
 	unsigned int i, curcpu;
 	struct xt_counters_info tmp;
@@ -1546,10 +1556,10 @@ static int
 check_compat_entry_size_and_hooks(struct compat_ipt_entry *e,
 				  struct xt_table_info *newinfo,
 				  unsigned int *size,
-				  unsigned char *base,
-				  unsigned char *limit,
-				  unsigned int *hook_entries,
-				  unsigned int *underflows,
+				  const unsigned char *base,
+				  const unsigned char *limit,
+				  const unsigned int *hook_entries,
+				  const unsigned int *underflows,
 				  unsigned int *i,
 				  const char *name)
 {
diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c
index 3ff4fd50e96e..4185099c2943 100644
--- a/net/ipv6/netfilter/ip6_tables.c
+++ b/net/ipv6/netfilter/ip6_tables.c
@@ -208,7 +208,7 @@ ip6t_error(struct sk_buff *skb, const struct xt_target_param *par)
 
 /* Performance critical - called for every packet */
 static inline bool
-do_match(struct ip6t_entry_match *m, const struct sk_buff *skb,
+do_match(const struct ip6t_entry_match *m, const struct sk_buff *skb,
 	 struct xt_match_param *par)
 {
 	par->match     = m->u.kernel.match;
@@ -222,7 +222,7 @@ do_match(struct ip6t_entry_match *m, const struct sk_buff *skb,
 }
 
 static inline struct ip6t_entry *
-get_entry(void *base, unsigned int offset)
+get_entry(const void *base, unsigned int offset)
 {
 	return (struct ip6t_entry *)(base + offset);
 }
@@ -236,6 +236,12 @@ static inline bool unconditional(const struct ip6t_ip6 *ipv6)
 	return memcmp(ipv6, &uncond, sizeof(uncond)) == 0;
 }
 
+static inline const struct ip6t_entry_target *
+ip6t_get_target_c(const struct ip6t_entry *e)
+{
+	return ip6t_get_target((struct ip6t_entry *)e);
+}
+
 #if defined(CONFIG_NETFILTER_XT_TARGET_TRACE) || \
     defined(CONFIG_NETFILTER_XT_TARGET_TRACE_MODULE)
 /* This cries for unification! */
@@ -271,11 +277,11 @@ static struct nf_loginfo trace_loginfo = {
 
 /* Mildly perf critical (only if packet tracing is on) */
 static inline int
-get_chainname_rulenum(struct ip6t_entry *s, struct ip6t_entry *e,
+get_chainname_rulenum(const struct ip6t_entry *s, const struct ip6t_entry *e,
 		      const char *hookname, const char **chainname,
 		      const char **comment, unsigned int *rulenum)
 {
-	struct ip6t_standard_target *t = (void *)ip6t_get_target(s);
+	const struct ip6t_standard_target *t = (void *)ip6t_get_target_c(s);
 
 	if (strcmp(t->target.u.kernel.target->name, IP6T_ERROR_TARGET) == 0) {
 		/* Head of user chain: ERROR target with chainname */
@@ -301,15 +307,15 @@ get_chainname_rulenum(struct ip6t_entry *s, struct ip6t_entry *e,
 	return 0;
 }
 
-static void trace_packet(struct sk_buff *skb,
+static void trace_packet(const struct sk_buff *skb,
 			 unsigned int hook,
 			 const struct net_device *in,
 			 const struct net_device *out,
 			 const char *tablename,
-			 struct xt_table_info *private,
-			 struct ip6t_entry *e)
+			 const struct xt_table_info *private,
+			 const struct ip6t_entry *e)
 {
-	void *table_base;
+	const void *table_base;
 	const struct ip6t_entry *root;
 	const char *hookname, *chainname, *comment;
 	unsigned int rulenum = 0;
@@ -352,9 +358,9 @@ ip6t_do_table(struct sk_buff *skb,
 	/* Initializing verdict to NF_DROP keeps gcc happy. */
 	unsigned int verdict = NF_DROP;
 	const char *indev, *outdev;
-	void *table_base;
+	const void *table_base;
 	struct ip6t_entry *e, *back;
-	struct xt_table_info *private;
+	const struct xt_table_info *private;
 	struct xt_match_param mtpar;
 	struct xt_target_param tgpar;
 
@@ -385,7 +391,7 @@ ip6t_do_table(struct sk_buff *skb,
 	back = get_entry(table_base, private->underflow[hook]);
 
 	do {
-		struct ip6t_entry_target *t;
+		const struct ip6t_entry_target *t;
 
 		IP_NF_ASSERT(e);
 		IP_NF_ASSERT(back);
@@ -400,7 +406,7 @@ ip6t_do_table(struct sk_buff *skb,
 			    ntohs(ipv6_hdr(skb)->payload_len) +
 			    sizeof(struct ipv6hdr), 1);
 
-		t = ip6t_get_target(e);
+		t = ip6t_get_target_c(e);
 		IP_NF_ASSERT(t->u.kernel.target);
 
 #if defined(CONFIG_NETFILTER_XT_TARGET_TRACE) || \
@@ -482,7 +488,7 @@ ip6t_do_table(struct sk_buff *skb,
 /* Figures out from what hook each rule can be called: returns 0 if
    there are loops.  Puts hook bitmask in comefrom. */
 static int
-mark_source_chains(struct xt_table_info *newinfo,
+mark_source_chains(const struct xt_table_info *newinfo,
 		   unsigned int valid_hooks, void *entry0)
 {
 	unsigned int hook;
@@ -500,8 +506,8 @@ mark_source_chains(struct xt_table_info *newinfo,
 		e->counters.pcnt = pos;
 
 		for (;;) {
-			struct ip6t_standard_target *t
-				= (void *)ip6t_get_target(e);
+			const struct ip6t_standard_target *t
+				= (void *)ip6t_get_target_c(e);
 			int visited = e->comefrom & (1 << hook);
 
 			if (e->comefrom & (1 << NF_INET_NUMHOOKS)) {
@@ -610,9 +616,9 @@ cleanup_match(struct ip6t_entry_match *m, struct net *net, unsigned int *i)
 }
 
 static int
-check_entry(struct ip6t_entry *e, const char *name)
+check_entry(const struct ip6t_entry *e, const char *name)
 {
-	struct ip6t_entry_target *t;
+	const struct ip6t_entry_target *t;
 
 	if (!ip6_checkentry(&e->ipv6)) {
 		duprintf("ip_tables: ip check failed %p %s.\n", e, name);
@@ -623,7 +629,7 @@ check_entry(struct ip6t_entry *e, const char *name)
 	    e->next_offset)
 		return -EINVAL;
 
-	t = ip6t_get_target(e);
+	t = ip6t_get_target_c(e);
 	if (e->target_offset + t->u.target_size > e->next_offset)
 		return -EINVAL;
 
@@ -750,14 +756,14 @@ find_check_entry(struct ip6t_entry *e, struct net *net, const char *name,
 	return ret;
 }
 
-static bool check_underflow(struct ip6t_entry *e)
+static bool check_underflow(const struct ip6t_entry *e)
 {
 	const struct ip6t_entry_target *t;
 	unsigned int verdict;
 
 	if (!unconditional(&e->ipv6))
 		return false;
-	t = ip6t_get_target(e);
+	t = ip6t_get_target_c(e);
 	if (strcmp(t->u.user.name, XT_STANDARD_TARGET) != 0)
 		return false;
 	verdict = ((struct ip6t_standard_target *)t)->verdict;
@@ -768,8 +774,8 @@ static bool check_underflow(struct ip6t_entry *e)
 static int
 check_entry_size_and_hooks(struct ip6t_entry *e,
 			   struct xt_table_info *newinfo,
-			   unsigned char *base,
-			   unsigned char *limit,
+			   const unsigned char *base,
+			   const unsigned char *limit,
 			   const unsigned int *hook_entries,
 			   const unsigned int *underflows,
 			   unsigned int valid_hooks,
@@ -984,11 +990,11 @@ get_counters(const struct xt_table_info *t,
 	local_bh_enable();
 }
 
-static struct xt_counters *alloc_counters(struct xt_table *table)
+static struct xt_counters *alloc_counters(const struct xt_table *table)
 {
 	unsigned int countersize;
 	struct xt_counters *counters;
-	struct xt_table_info *private = table->private;
+	const struct xt_table_info *private = table->private;
 
 	/* We need atomic snapshot of counters: rest doesn't change
 	   (other than comefrom, which userspace doesn't care
@@ -1006,11 +1012,11 @@ static struct xt_counters *alloc_counters(struct xt_table *table)
 
 static int
 copy_entries_to_user(unsigned int total_size,
-		     struct xt_table *table,
+		     const struct xt_table *table,
 		     void __user *userptr)
 {
 	unsigned int off, num;
-	struct ip6t_entry *e;
+	const struct ip6t_entry *e;
 	struct xt_counters *counters;
 	const struct xt_table_info *private = table->private;
 	int ret = 0;
@@ -1062,7 +1068,7 @@ copy_entries_to_user(unsigned int total_size,
 			}
 		}
 
-		t = ip6t_get_target(e);
+		t = ip6t_get_target_c(e);
 		if (copy_to_user(userptr + off + e->target_offset
 				 + offsetof(struct ip6t_entry_target,
 					    u.user.name),
@@ -1098,24 +1104,24 @@ static int compat_standard_to_user(void __user *dst, const void *src)
 }
 
 static inline int
-compat_calc_match(struct ip6t_entry_match *m, int *size)
+compat_calc_match(const struct ip6t_entry_match *m, int *size)
 {
 	*size += xt_compat_match_offset(m->u.kernel.match);
 	return 0;
 }
 
-static int compat_calc_entry(struct ip6t_entry *e,
+static int compat_calc_entry(const struct ip6t_entry *e,
 			     const struct xt_table_info *info,
-			     void *base, struct xt_table_info *newinfo)
+			     const void *base, struct xt_table_info *newinfo)
 {
-	struct ip6t_entry_target *t;
+	const struct ip6t_entry_target *t;
 	unsigned int entry_offset;
 	int off, i, ret;
 
 	off = sizeof(struct ip6t_entry) - sizeof(struct compat_ip6t_entry);
 	entry_offset = (void *)e - base;
 	IP6T_MATCH_ITERATE(e, compat_calc_match, &off);
-	t = ip6t_get_target(e);
+	t = ip6t_get_target_c(e);
 	off += xt_compat_target_offset(t->u.kernel.target);
 	newinfo->size -= off;
 	ret = xt_compat_add_offset(AF_INET6, entry_offset, off);
@@ -1151,7 +1157,8 @@ static int compat_table_info(const struct xt_table_info *info,
 }
 #endif
 
-static int get_info(struct net *net, void __user *user, int *len, int compat)
+static int get_info(struct net *net, void __user *user,
+                    const int *len, int compat)
 {
 	char name[IP6T_TABLE_MAXNAMELEN];
 	struct xt_table *t;
@@ -1211,7 +1218,8 @@ static int get_info(struct net *net, void __user *user, int *len, int compat)
 }
 
 static int
-get_entries(struct net *net, struct ip6t_get_entries __user *uptr, int *len)
+get_entries(struct net *net, struct ip6t_get_entries __user *uptr,
+            const int *len)
 {
 	int ret;
 	struct ip6t_get_entries get;
@@ -1322,7 +1330,7 @@ __do_replace(struct net *net, const char *name, unsigned int valid_hooks,
 }
 
 static int
-do_replace(struct net *net, void __user *user, unsigned int len)
+do_replace(struct net *net, const void __user *user, unsigned int len)
 {
 	int ret;
 	struct ip6t_replace tmp;
@@ -1383,7 +1391,7 @@ add_counter_to_entry(struct ip6t_entry *e,
 }
 
 static int
-do_add_counters(struct net *net, void __user *user, unsigned int len,
+do_add_counters(struct net *net, const void __user *user, unsigned int len,
 		int compat)
 {
 	unsigned int i, curcpu;
@@ -1582,10 +1590,10 @@ static int
 check_compat_entry_size_and_hooks(struct compat_ip6t_entry *e,
 				  struct xt_table_info *newinfo,
 				  unsigned int *size,
-				  unsigned char *base,
-				  unsigned char *limit,
-				  unsigned int *hook_entries,
-				  unsigned int *underflows,
+				  const unsigned char *base,
+				  const unsigned char *limit,
+				  const unsigned int *hook_entries,
+				  const unsigned int *underflows,
 				  unsigned int *i,
 				  const char *name)
 {
-- 
cgit v1.2.2


From 8fea97ec1772bbf553d89187340ef624d548e115 Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Mon, 15 Feb 2010 17:45:08 +0100
Subject: netfilter: nf_conntrack: pass template to l4proto ->error() handler

The error handlers might need the template to get the conntrack zone
introduced in the next patches to perform a conntrack lookup.

Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 net/ipv4/netfilter/nf_conntrack_proto_icmp.c   | 3 ++-
 net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c | 3 ++-
 net/netfilter/nf_conntrack_core.c              | 3 ++-
 net/netfilter/nf_conntrack_proto_dccp.c        | 5 +++--
 net/netfilter/nf_conntrack_proto_tcp.c         | 2 +-
 net/netfilter/nf_conntrack_proto_udp.c         | 4 ++--
 net/netfilter/nf_conntrack_proto_udplite.c     | 2 +-
 7 files changed, 13 insertions(+), 9 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c
index 7afd39b5b781..327826a968a8 100644
--- a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c
+++ b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c
@@ -163,7 +163,8 @@ icmp_error_message(struct net *net, struct sk_buff *skb,
 
 /* Small and modified version of icmp_rcv */
 static int
-icmp_error(struct net *net, struct sk_buff *skb, unsigned int dataoff,
+icmp_error(struct net *net, struct nf_conn *tmpl,
+	   struct sk_buff *skb, unsigned int dataoff,
 	   enum ip_conntrack_info *ctinfo, u_int8_t pf, unsigned int hooknum)
 {
 	const struct icmphdr *icmph;
diff --git a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c
index c7b8bd1d7984..d772dc21857f 100644
--- a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c
+++ b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c
@@ -179,7 +179,8 @@ icmpv6_error_message(struct net *net,
 }
 
 static int
-icmpv6_error(struct net *net, struct sk_buff *skb, unsigned int dataoff,
+icmpv6_error(struct net *net, struct nf_conn *tmpl,
+	     struct sk_buff *skb, unsigned int dataoff,
 	     enum ip_conntrack_info *ctinfo, u_int8_t pf, unsigned int hooknum)
 {
 	const struct icmp6hdr *icmp6h;
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index bd831410a396..65351ed5d815 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -799,7 +799,8 @@ nf_conntrack_in(struct net *net, u_int8_t pf, unsigned int hooknum,
 	 * inverse of the return code tells to the netfilter
 	 * core what to do with the packet. */
 	if (l4proto->error != NULL) {
-		ret = l4proto->error(net, skb, dataoff, &ctinfo, pf, hooknum);
+		ret = l4proto->error(net, tmpl, skb, dataoff, &ctinfo,
+				     pf, hooknum);
 		if (ret <= 0) {
 			NF_CT_STAT_INC_ATOMIC(net, error);
 			NF_CT_STAT_INC_ATOMIC(net, invalid);
diff --git a/net/netfilter/nf_conntrack_proto_dccp.c b/net/netfilter/nf_conntrack_proto_dccp.c
index dd375500dccc..9a2815549375 100644
--- a/net/netfilter/nf_conntrack_proto_dccp.c
+++ b/net/netfilter/nf_conntrack_proto_dccp.c
@@ -561,8 +561,9 @@ static int dccp_packet(struct nf_conn *ct, const struct sk_buff *skb,
 	return NF_ACCEPT;
 }
 
-static int dccp_error(struct net *net, struct sk_buff *skb,
-		      unsigned int dataoff, enum ip_conntrack_info *ctinfo,
+static int dccp_error(struct net *net, struct nf_conn *tmpl,
+		      struct sk_buff *skb, unsigned int dataoff,
+		      enum ip_conntrack_info *ctinfo,
 		      u_int8_t pf, unsigned int hooknum)
 {
 	struct dccp_hdr _dh, *dh;
diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c
index ad118053971a..9dd8cd4fb6e6 100644
--- a/net/netfilter/nf_conntrack_proto_tcp.c
+++ b/net/netfilter/nf_conntrack_proto_tcp.c
@@ -760,7 +760,7 @@ static const u8 tcp_valid_flags[(TH_FIN|TH_SYN|TH_RST|TH_ACK|TH_URG) + 1] =
 };
 
 /* Protect conntrack agaist broken packets. Code taken from ipt_unclean.c.  */
-static int tcp_error(struct net *net,
+static int tcp_error(struct net *net, struct nf_conn *tmpl,
 		     struct sk_buff *skb,
 		     unsigned int dataoff,
 		     enum ip_conntrack_info *ctinfo,
diff --git a/net/netfilter/nf_conntrack_proto_udp.c b/net/netfilter/nf_conntrack_proto_udp.c
index 8d38f9a4bed8..8289088b8218 100644
--- a/net/netfilter/nf_conntrack_proto_udp.c
+++ b/net/netfilter/nf_conntrack_proto_udp.c
@@ -91,8 +91,8 @@ static bool udp_new(struct nf_conn *ct, const struct sk_buff *skb,
 	return true;
 }
 
-static int udp_error(struct net *net, struct sk_buff *skb, unsigned int dataoff,
-		     enum ip_conntrack_info *ctinfo,
+static int udp_error(struct net *net, struct nf_conn *tmpl, struct sk_buff *skb,
+		     unsigned int dataoff, enum ip_conntrack_info *ctinfo,
 		     u_int8_t pf,
 		     unsigned int hooknum)
 {
diff --git a/net/netfilter/nf_conntrack_proto_udplite.c b/net/netfilter/nf_conntrack_proto_udplite.c
index 0b1bc9ba6678..263b5a72588d 100644
--- a/net/netfilter/nf_conntrack_proto_udplite.c
+++ b/net/netfilter/nf_conntrack_proto_udplite.c
@@ -89,7 +89,7 @@ static bool udplite_new(struct nf_conn *ct, const struct sk_buff *skb,
 	return true;
 }
 
-static int udplite_error(struct net *net,
+static int udplite_error(struct net *net, struct nf_conn *tmpl,
 			 struct sk_buff *skb,
 			 unsigned int dataoff,
 			 enum ip_conntrack_info *ctinfo,
-- 
cgit v1.2.2


From 5d0aa2ccd4699a01cfdf14886191c249d7b45a01 Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Mon, 15 Feb 2010 18:13:33 +0100
Subject: netfilter: nf_conntrack: add support for "conntrack zones"

Normally, each connection needs a unique identity. Conntrack zones allow
to specify a numerical zone using the CT target, connections in different
zones can use the same identity.

Example:

iptables -t raw -A PREROUTING -i veth0 -j CT --zone 1
iptables -t raw -A OUTPUT -o veth1 -j CT --zone 1

Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c |   3 +-
 net/ipv4/netfilter/nf_conntrack_proto_icmp.c   |   8 +-
 net/ipv4/netfilter/nf_defrag_ipv4.c            |  12 ++-
 net/ipv4/netfilter/nf_nat_core.c               |  24 +++---
 net/ipv4/netfilter/nf_nat_pptp.c               |   3 +-
 net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c |  12 ++-
 net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c |   8 +-
 net/netfilter/Kconfig                          |  13 +++
 net/netfilter/nf_conntrack_core.c              | 109 +++++++++++++++++++------
 net/netfilter/nf_conntrack_expect.c            |  21 +++--
 net/netfilter/nf_conntrack_h323_main.c         |   3 +-
 net/netfilter/nf_conntrack_netlink.c           |  20 ++---
 net/netfilter/nf_conntrack_pptp.c              |  14 ++--
 net/netfilter/nf_conntrack_sip.c               |   3 +-
 net/netfilter/nf_conntrack_standalone.c        |   6 ++
 net/netfilter/xt_CT.c                          |   8 +-
 net/netfilter/xt_connlimit.c                   |   4 +-
 17 files changed, 193 insertions(+), 78 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
index d1ea38a7c490..2bb1f87051c4 100644
--- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
+++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
@@ -22,6 +22,7 @@
 #include <net/netfilter/nf_conntrack_helper.h>
 #include <net/netfilter/nf_conntrack_l4proto.h>
 #include <net/netfilter/nf_conntrack_l3proto.h>
+#include <net/netfilter/nf_conntrack_zones.h>
 #include <net/netfilter/nf_conntrack_core.h>
 #include <net/netfilter/ipv4/nf_conntrack_ipv4.h>
 #include <net/netfilter/nf_nat_helper.h>
@@ -266,7 +267,7 @@ getorigdst(struct sock *sk, int optval, void __user *user, int *len)
 		return -EINVAL;
 	}
 
-	h = nf_conntrack_find_get(sock_net(sk), &tuple);
+	h = nf_conntrack_find_get(sock_net(sk), NF_CT_DEFAULT_ZONE, &tuple);
 	if (h) {
 		struct sockaddr_in sin;
 		struct nf_conn *ct = nf_ct_tuplehash_to_ctrack(h);
diff --git a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c
index 327826a968a8..7404bde95994 100644
--- a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c
+++ b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c
@@ -18,6 +18,7 @@
 #include <net/netfilter/nf_conntrack_tuple.h>
 #include <net/netfilter/nf_conntrack_l4proto.h>
 #include <net/netfilter/nf_conntrack_core.h>
+#include <net/netfilter/nf_conntrack_zones.h>
 #include <net/netfilter/nf_log.h>
 
 static unsigned int nf_ct_icmp_timeout __read_mostly = 30*HZ;
@@ -114,13 +115,14 @@ static bool icmp_new(struct nf_conn *ct, const struct sk_buff *skb,
 
 /* Returns conntrack if it dealt with ICMP, and filled in skb fields */
 static int
-icmp_error_message(struct net *net, struct sk_buff *skb,
+icmp_error_message(struct net *net, struct nf_conn *tmpl, struct sk_buff *skb,
 		 enum ip_conntrack_info *ctinfo,
 		 unsigned int hooknum)
 {
 	struct nf_conntrack_tuple innertuple, origtuple;
 	const struct nf_conntrack_l4proto *innerproto;
 	const struct nf_conntrack_tuple_hash *h;
+	u16 zone = tmpl ? nf_ct_zone(tmpl) : NF_CT_DEFAULT_ZONE;
 
 	NF_CT_ASSERT(skb->nfct == NULL);
 
@@ -146,7 +148,7 @@ icmp_error_message(struct net *net, struct sk_buff *skb,
 
 	*ctinfo = IP_CT_RELATED;
 
-	h = nf_conntrack_find_get(net, &innertuple);
+	h = nf_conntrack_find_get(net, zone, &innertuple);
 	if (!h) {
 		pr_debug("icmp_error_message: no match\n");
 		return -NF_ACCEPT;
@@ -209,7 +211,7 @@ icmp_error(struct net *net, struct nf_conn *tmpl,
 	    icmph->type != ICMP_REDIRECT)
 		return NF_ACCEPT;
 
-	return icmp_error_message(net, skb, ctinfo, hooknum);
+	return icmp_error_message(net, tmpl, skb, ctinfo, hooknum);
 }
 
 #if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE)
diff --git a/net/ipv4/netfilter/nf_defrag_ipv4.c b/net/ipv4/netfilter/nf_defrag_ipv4.c
index f6f46686cbc0..d498a704d456 100644
--- a/net/ipv4/netfilter/nf_defrag_ipv4.c
+++ b/net/ipv4/netfilter/nf_defrag_ipv4.c
@@ -16,6 +16,7 @@
 
 #include <linux/netfilter_bridge.h>
 #include <linux/netfilter_ipv4.h>
+#include <net/netfilter/nf_conntrack_zones.h>
 #include <net/netfilter/ipv4/nf_defrag_ipv4.h>
 #include <net/netfilter/nf_conntrack.h>
 
@@ -39,15 +40,20 @@ static int nf_ct_ipv4_gather_frags(struct sk_buff *skb, u_int32_t user)
 static enum ip_defrag_users nf_ct_defrag_user(unsigned int hooknum,
 					      struct sk_buff *skb)
 {
+	u16 zone = NF_CT_DEFAULT_ZONE;
+
+	if (skb->nfct)
+		zone = nf_ct_zone((struct nf_conn *)skb->nfct);
+
 #ifdef CONFIG_BRIDGE_NETFILTER
 	if (skb->nf_bridge &&
 	    skb->nf_bridge->mask & BRNF_NF_BRIDGE_PREROUTING)
-		return IP_DEFRAG_CONNTRACK_BRIDGE_IN;
+		return IP_DEFRAG_CONNTRACK_BRIDGE_IN + zone;
 #endif
 	if (hooknum == NF_INET_PRE_ROUTING)
-		return IP_DEFRAG_CONNTRACK_IN;
+		return IP_DEFRAG_CONNTRACK_IN + zone;
 	else
-		return IP_DEFRAG_CONNTRACK_OUT;
+		return IP_DEFRAG_CONNTRACK_OUT + zone;
 }
 
 static unsigned int ipv4_conntrack_defrag(unsigned int hooknum,
diff --git a/net/ipv4/netfilter/nf_nat_core.c b/net/ipv4/netfilter/nf_nat_core.c
index 26066a2327ad..4595281c2863 100644
--- a/net/ipv4/netfilter/nf_nat_core.c
+++ b/net/ipv4/netfilter/nf_nat_core.c
@@ -30,6 +30,7 @@
 #include <net/netfilter/nf_conntrack_helper.h>
 #include <net/netfilter/nf_conntrack_l3proto.h>
 #include <net/netfilter/nf_conntrack_l4proto.h>
+#include <net/netfilter/nf_conntrack_zones.h>
 
 static DEFINE_SPINLOCK(nf_nat_lock);
 
@@ -69,13 +70,14 @@ EXPORT_SYMBOL_GPL(nf_nat_proto_put);
 
 /* We keep an extra hash for each conntrack, for fast searching. */
 static inline unsigned int
-hash_by_src(const struct net *net, const struct nf_conntrack_tuple *tuple)
+hash_by_src(const struct net *net, u16 zone,
+	    const struct nf_conntrack_tuple *tuple)
 {
 	unsigned int hash;
 
 	/* Original src, to ensure we map it consistently if poss. */
 	hash = jhash_3words((__force u32)tuple->src.u3.ip,
-			    (__force u32)tuple->src.u.all,
+			    (__force u32)tuple->src.u.all ^ zone,
 			    tuple->dst.protonum, 0);
 	return ((u64)hash * net->ipv4.nat_htable_size) >> 32;
 }
@@ -139,12 +141,12 @@ same_src(const struct nf_conn *ct,
 
 /* Only called for SRC manip */
 static int
-find_appropriate_src(struct net *net,
+find_appropriate_src(struct net *net, u16 zone,
 		     const struct nf_conntrack_tuple *tuple,
 		     struct nf_conntrack_tuple *result,
 		     const struct nf_nat_range *range)
 {
-	unsigned int h = hash_by_src(net, tuple);
+	unsigned int h = hash_by_src(net, zone, tuple);
 	const struct nf_conn_nat *nat;
 	const struct nf_conn *ct;
 	const struct hlist_node *n;
@@ -152,7 +154,7 @@ find_appropriate_src(struct net *net,
 	rcu_read_lock();
 	hlist_for_each_entry_rcu(nat, n, &net->ipv4.nat_bysource[h], bysource) {
 		ct = nat->ct;
-		if (same_src(ct, tuple)) {
+		if (same_src(ct, tuple) && nf_ct_zone(ct) == zone) {
 			/* Copy source part from reply tuple. */
 			nf_ct_invert_tuplepr(result,
 				       &ct->tuplehash[IP_CT_DIR_REPLY].tuple);
@@ -175,7 +177,7 @@ find_appropriate_src(struct net *net,
    the ip with the lowest src-ip/dst-ip/proto usage.
 */
 static void
-find_best_ips_proto(struct nf_conntrack_tuple *tuple,
+find_best_ips_proto(u16 zone, struct nf_conntrack_tuple *tuple,
 		    const struct nf_nat_range *range,
 		    const struct nf_conn *ct,
 		    enum nf_nat_manip_type maniptype)
@@ -209,7 +211,7 @@ find_best_ips_proto(struct nf_conntrack_tuple *tuple,
 	maxip = ntohl(range->max_ip);
 	j = jhash_2words((__force u32)tuple->src.u3.ip,
 			 range->flags & IP_NAT_RANGE_PERSISTENT ?
-				0 : (__force u32)tuple->dst.u3.ip, 0);
+				0 : (__force u32)tuple->dst.u3.ip ^ zone, 0);
 	j = ((u64)j * (maxip - minip + 1)) >> 32;
 	*var_ipp = htonl(minip + j);
 }
@@ -229,6 +231,7 @@ get_unique_tuple(struct nf_conntrack_tuple *tuple,
 {
 	struct net *net = nf_ct_net(ct);
 	const struct nf_nat_protocol *proto;
+	u16 zone = nf_ct_zone(ct);
 
 	/* 1) If this srcip/proto/src-proto-part is currently mapped,
 	   and that same mapping gives a unique tuple within the given
@@ -239,7 +242,7 @@ get_unique_tuple(struct nf_conntrack_tuple *tuple,
 	   manips not an issue.  */
 	if (maniptype == IP_NAT_MANIP_SRC &&
 	    !(range->flags & IP_NAT_RANGE_PROTO_RANDOM)) {
-		if (find_appropriate_src(net, orig_tuple, tuple, range)) {
+		if (find_appropriate_src(net, zone, orig_tuple, tuple, range)) {
 			pr_debug("get_unique_tuple: Found current src map\n");
 			if (!nf_nat_used_tuple(tuple, ct))
 				return;
@@ -249,7 +252,7 @@ get_unique_tuple(struct nf_conntrack_tuple *tuple,
 	/* 2) Select the least-used IP/proto combination in the given
 	   range. */
 	*tuple = *orig_tuple;
-	find_best_ips_proto(tuple, range, ct, maniptype);
+	find_best_ips_proto(zone, tuple, range, ct, maniptype);
 
 	/* 3) The per-protocol part of the manip is made to map into
 	   the range to make a unique tuple. */
@@ -327,7 +330,8 @@ nf_nat_setup_info(struct nf_conn *ct,
 	if (have_to_hash) {
 		unsigned int srchash;
 
-		srchash = hash_by_src(net, &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple);
+		srchash = hash_by_src(net, nf_ct_zone(ct),
+				      &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple);
 		spin_lock_bh(&nf_nat_lock);
 		/* nf_conntrack_alter_reply might re-allocate exntension aera */
 		nat = nfct_nat(ct);
diff --git a/net/ipv4/netfilter/nf_nat_pptp.c b/net/ipv4/netfilter/nf_nat_pptp.c
index 9eb171056c63..4c060038d29f 100644
--- a/net/ipv4/netfilter/nf_nat_pptp.c
+++ b/net/ipv4/netfilter/nf_nat_pptp.c
@@ -25,6 +25,7 @@
 #include <net/netfilter/nf_nat_rule.h>
 #include <net/netfilter/nf_conntrack_helper.h>
 #include <net/netfilter/nf_conntrack_expect.h>
+#include <net/netfilter/nf_conntrack_zones.h>
 #include <linux/netfilter/nf_conntrack_proto_gre.h>
 #include <linux/netfilter/nf_conntrack_pptp.h>
 
@@ -74,7 +75,7 @@ static void pptp_nat_expected(struct nf_conn *ct,
 
 	pr_debug("trying to unexpect other dir: ");
 	nf_ct_dump_tuple_ip(&t);
-	other_exp = nf_ct_expect_find_get(net, &t);
+	other_exp = nf_ct_expect_find_get(net, nf_ct_zone(ct), &t);
 	if (other_exp) {
 		nf_ct_unexpect_related(other_exp);
 		nf_ct_expect_put(other_exp);
diff --git a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
index 55ce22e5de49..996c3f41fecd 100644
--- a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
+++ b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
@@ -27,6 +27,7 @@
 #include <net/netfilter/nf_conntrack_l4proto.h>
 #include <net/netfilter/nf_conntrack_l3proto.h>
 #include <net/netfilter/nf_conntrack_core.h>
+#include <net/netfilter/nf_conntrack_zones.h>
 #include <net/netfilter/ipv6/nf_conntrack_ipv6.h>
 #include <net/netfilter/nf_log.h>
 
@@ -191,15 +192,20 @@ out:
 static enum ip6_defrag_users nf_ct6_defrag_user(unsigned int hooknum,
 						struct sk_buff *skb)
 {
+	u16 zone = NF_CT_DEFAULT_ZONE;
+
+	if (skb->nfct)
+		zone = nf_ct_zone((struct nf_conn *)skb->nfct);
+
 #ifdef CONFIG_BRIDGE_NETFILTER
 	if (skb->nf_bridge &&
 	    skb->nf_bridge->mask & BRNF_NF_BRIDGE_PREROUTING)
-		return IP6_DEFRAG_CONNTRACK_BRIDGE_IN;
+		return IP6_DEFRAG_CONNTRACK_BRIDGE_IN + zone;
 #endif
 	if (hooknum == NF_INET_PRE_ROUTING)
-		return IP6_DEFRAG_CONNTRACK_IN;
+		return IP6_DEFRAG_CONNTRACK_IN + zone;
 	else
-		return IP6_DEFRAG_CONNTRACK_OUT;
+		return IP6_DEFRAG_CONNTRACK_OUT + zone;
 
 }
 
diff --git a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c
index d772dc21857f..9be81776415e 100644
--- a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c
+++ b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c
@@ -23,6 +23,7 @@
 #include <net/netfilter/nf_conntrack_tuple.h>
 #include <net/netfilter/nf_conntrack_l4proto.h>
 #include <net/netfilter/nf_conntrack_core.h>
+#include <net/netfilter/nf_conntrack_zones.h>
 #include <net/netfilter/ipv6/nf_conntrack_icmpv6.h>
 #include <net/netfilter/nf_log.h>
 
@@ -128,7 +129,7 @@ static bool icmpv6_new(struct nf_conn *ct, const struct sk_buff *skb,
 }
 
 static int
-icmpv6_error_message(struct net *net,
+icmpv6_error_message(struct net *net, struct nf_conn *tmpl,
 		     struct sk_buff *skb,
 		     unsigned int icmp6off,
 		     enum ip_conntrack_info *ctinfo,
@@ -137,6 +138,7 @@ icmpv6_error_message(struct net *net,
 	struct nf_conntrack_tuple intuple, origtuple;
 	const struct nf_conntrack_tuple_hash *h;
 	const struct nf_conntrack_l4proto *inproto;
+	u16 zone = tmpl ? nf_ct_zone(tmpl) : NF_CT_DEFAULT_ZONE;
 
 	NF_CT_ASSERT(skb->nfct == NULL);
 
@@ -163,7 +165,7 @@ icmpv6_error_message(struct net *net,
 
 	*ctinfo = IP_CT_RELATED;
 
-	h = nf_conntrack_find_get(net, &intuple);
+	h = nf_conntrack_find_get(net, zone, &intuple);
 	if (!h) {
 		pr_debug("icmpv6_error: no match\n");
 		return -NF_ACCEPT;
@@ -216,7 +218,7 @@ icmpv6_error(struct net *net, struct nf_conn *tmpl,
 	if (icmp6h->icmp6_type >= 128)
 		return NF_ACCEPT;
 
-	return icmpv6_error_message(net, skb, dataoff, ctinfo, hooknum);
+	return icmpv6_error_message(net, tmpl, skb, dataoff, ctinfo, hooknum);
 }
 
 #if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE)
diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig
index 4469d45261f4..18d77b5c351a 100644
--- a/net/netfilter/Kconfig
+++ b/net/netfilter/Kconfig
@@ -83,6 +83,19 @@ config NF_CONNTRACK_SECMARK
 
 	  If unsure, say 'N'.
 
+config NF_CONNTRACK_ZONES
+	bool  'Connection tracking zones'
+	depends on NETFILTER_ADVANCED
+	depends on NETFILTER_XT_TARGET_CT
+	help
+	  This option enables support for connection tracking zones.
+	  Normally, each connection needs to have a unique system wide
+	  identity. Connection tracking zones allow to have multiple
+	  connections using the same identity, as long as they are
+	  contained in different zones.
+
+	  If unsure, say `N'.
+
 config NF_CONNTRACK_EVENTS
 	bool "Connection tracking events"
 	depends on NETFILTER_ADVANCED
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index 65351ed5d815..0c9bbe93cc16 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -42,6 +42,7 @@
 #include <net/netfilter/nf_conntrack_extend.h>
 #include <net/netfilter/nf_conntrack_acct.h>
 #include <net/netfilter/nf_conntrack_ecache.h>
+#include <net/netfilter/nf_conntrack_zones.h>
 #include <net/netfilter/nf_nat.h>
 #include <net/netfilter/nf_nat_core.h>
 
@@ -68,7 +69,7 @@ static int nf_conntrack_hash_rnd_initted;
 static unsigned int nf_conntrack_hash_rnd;
 
 static u_int32_t __hash_conntrack(const struct nf_conntrack_tuple *tuple,
-				  unsigned int size, unsigned int rnd)
+				  u16 zone, unsigned int size, unsigned int rnd)
 {
 	unsigned int n;
 	u_int32_t h;
@@ -79,16 +80,16 @@ static u_int32_t __hash_conntrack(const struct nf_conntrack_tuple *tuple,
 	 */
 	n = (sizeof(tuple->src) + sizeof(tuple->dst.u3)) / sizeof(u32);
 	h = jhash2((u32 *)tuple, n,
-		   rnd ^ (((__force __u16)tuple->dst.u.all << 16) |
-			  tuple->dst.protonum));
+		   zone ^ rnd ^ (((__force __u16)tuple->dst.u.all << 16) |
+				 tuple->dst.protonum));
 
 	return ((u64)h * size) >> 32;
 }
 
-static inline u_int32_t hash_conntrack(const struct net *net,
+static inline u_int32_t hash_conntrack(const struct net *net, u16 zone,
 				       const struct nf_conntrack_tuple *tuple)
 {
-	return __hash_conntrack(tuple, net->ct.htable_size,
+	return __hash_conntrack(tuple, zone, net->ct.htable_size,
 				nf_conntrack_hash_rnd);
 }
 
@@ -292,11 +293,12 @@ static void death_by_timeout(unsigned long ul_conntrack)
  * - Caller must lock nf_conntrack_lock before calling this function
  */
 struct nf_conntrack_tuple_hash *
-__nf_conntrack_find(struct net *net, const struct nf_conntrack_tuple *tuple)
+__nf_conntrack_find(struct net *net, u16 zone,
+		    const struct nf_conntrack_tuple *tuple)
 {
 	struct nf_conntrack_tuple_hash *h;
 	struct hlist_nulls_node *n;
-	unsigned int hash = hash_conntrack(net, tuple);
+	unsigned int hash = hash_conntrack(net, zone, tuple);
 
 	/* Disable BHs the entire time since we normally need to disable them
 	 * at least once for the stats anyway.
@@ -304,7 +306,8 @@ __nf_conntrack_find(struct net *net, const struct nf_conntrack_tuple *tuple)
 	local_bh_disable();
 begin:
 	hlist_nulls_for_each_entry_rcu(h, n, &net->ct.hash[hash], hnnode) {
-		if (nf_ct_tuple_equal(tuple, &h->tuple)) {
+		if (nf_ct_tuple_equal(tuple, &h->tuple) &&
+		    nf_ct_zone(nf_ct_tuplehash_to_ctrack(h)) == zone) {
 			NF_CT_STAT_INC(net, found);
 			local_bh_enable();
 			return h;
@@ -326,21 +329,23 @@ EXPORT_SYMBOL_GPL(__nf_conntrack_find);
 
 /* Find a connection corresponding to a tuple. */
 struct nf_conntrack_tuple_hash *
-nf_conntrack_find_get(struct net *net, const struct nf_conntrack_tuple *tuple)
+nf_conntrack_find_get(struct net *net, u16 zone,
+		      const struct nf_conntrack_tuple *tuple)
 {
 	struct nf_conntrack_tuple_hash *h;
 	struct nf_conn *ct;
 
 	rcu_read_lock();
 begin:
-	h = __nf_conntrack_find(net, tuple);
+	h = __nf_conntrack_find(net, zone, tuple);
 	if (h) {
 		ct = nf_ct_tuplehash_to_ctrack(h);
 		if (unlikely(nf_ct_is_dying(ct) ||
 			     !atomic_inc_not_zero(&ct->ct_general.use)))
 			h = NULL;
 		else {
-			if (unlikely(!nf_ct_tuple_equal(tuple, &h->tuple))) {
+			if (unlikely(!nf_ct_tuple_equal(tuple, &h->tuple) ||
+				     nf_ct_zone(ct) != zone)) {
 				nf_ct_put(ct);
 				goto begin;
 			}
@@ -368,9 +373,11 @@ void nf_conntrack_hash_insert(struct nf_conn *ct)
 {
 	struct net *net = nf_ct_net(ct);
 	unsigned int hash, repl_hash;
+	u16 zone;
 
-	hash = hash_conntrack(net, &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple);
-	repl_hash = hash_conntrack(net, &ct->tuplehash[IP_CT_DIR_REPLY].tuple);
+	zone = nf_ct_zone(ct);
+	hash = hash_conntrack(net, zone, &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple);
+	repl_hash = hash_conntrack(net, zone, &ct->tuplehash[IP_CT_DIR_REPLY].tuple);
 
 	__nf_conntrack_hash_insert(ct, hash, repl_hash);
 }
@@ -387,6 +394,7 @@ __nf_conntrack_confirm(struct sk_buff *skb)
 	struct hlist_nulls_node *n;
 	enum ip_conntrack_info ctinfo;
 	struct net *net;
+	u16 zone;
 
 	ct = nf_ct_get(skb, &ctinfo);
 	net = nf_ct_net(ct);
@@ -398,8 +406,9 @@ __nf_conntrack_confirm(struct sk_buff *skb)
 	if (CTINFO2DIR(ctinfo) != IP_CT_DIR_ORIGINAL)
 		return NF_ACCEPT;
 
-	hash = hash_conntrack(net, &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple);
-	repl_hash = hash_conntrack(net, &ct->tuplehash[IP_CT_DIR_REPLY].tuple);
+	zone = nf_ct_zone(ct);
+	hash = hash_conntrack(net, zone, &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple);
+	repl_hash = hash_conntrack(net, zone, &ct->tuplehash[IP_CT_DIR_REPLY].tuple);
 
 	/* We're not in hash table, and we refuse to set up related
 	   connections for unconfirmed conns.  But packet copies and
@@ -418,11 +427,13 @@ __nf_conntrack_confirm(struct sk_buff *skb)
 	   not in the hash.  If there is, we lost race. */
 	hlist_nulls_for_each_entry(h, n, &net->ct.hash[hash], hnnode)
 		if (nf_ct_tuple_equal(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple,
-				      &h->tuple))
+				      &h->tuple) &&
+		    zone == nf_ct_zone(nf_ct_tuplehash_to_ctrack(h)))
 			goto out;
 	hlist_nulls_for_each_entry(h, n, &net->ct.hash[repl_hash], hnnode)
 		if (nf_ct_tuple_equal(&ct->tuplehash[IP_CT_DIR_REPLY].tuple,
-				      &h->tuple))
+				      &h->tuple) &&
+		    zone == nf_ct_zone(nf_ct_tuplehash_to_ctrack(h)))
 			goto out;
 
 	/* Remove from unconfirmed list */
@@ -469,15 +480,19 @@ nf_conntrack_tuple_taken(const struct nf_conntrack_tuple *tuple,
 	struct net *net = nf_ct_net(ignored_conntrack);
 	struct nf_conntrack_tuple_hash *h;
 	struct hlist_nulls_node *n;
-	unsigned int hash = hash_conntrack(net, tuple);
+	struct nf_conn *ct;
+	u16 zone = nf_ct_zone(ignored_conntrack);
+	unsigned int hash = hash_conntrack(net, zone, tuple);
 
 	/* Disable BHs the entire time since we need to disable them at
 	 * least once for the stats anyway.
 	 */
 	rcu_read_lock_bh();
 	hlist_nulls_for_each_entry_rcu(h, n, &net->ct.hash[hash], hnnode) {
-		if (nf_ct_tuplehash_to_ctrack(h) != ignored_conntrack &&
-		    nf_ct_tuple_equal(tuple, &h->tuple)) {
+		ct = nf_ct_tuplehash_to_ctrack(h);
+		if (ct != ignored_conntrack &&
+		    nf_ct_tuple_equal(tuple, &h->tuple) &&
+		    nf_ct_zone(ct) == zone) {
 			NF_CT_STAT_INC(net, found);
 			rcu_read_unlock_bh();
 			return 1;
@@ -540,7 +555,7 @@ static noinline int early_drop(struct net *net, unsigned int hash)
 	return dropped;
 }
 
-struct nf_conn *nf_conntrack_alloc(struct net *net,
+struct nf_conn *nf_conntrack_alloc(struct net *net, u16 zone,
 				   const struct nf_conntrack_tuple *orig,
 				   const struct nf_conntrack_tuple *repl,
 				   gfp_t gfp)
@@ -558,7 +573,7 @@ struct nf_conn *nf_conntrack_alloc(struct net *net,
 
 	if (nf_conntrack_max &&
 	    unlikely(atomic_read(&net->ct.count) > nf_conntrack_max)) {
-		unsigned int hash = hash_conntrack(net, orig);
+		unsigned int hash = hash_conntrack(net, zone, orig);
 		if (!early_drop(net, hash)) {
 			atomic_dec(&net->ct.count);
 			if (net_ratelimit())
@@ -595,13 +610,28 @@ struct nf_conn *nf_conntrack_alloc(struct net *net,
 #ifdef CONFIG_NET_NS
 	ct->ct_net = net;
 #endif
-
+#ifdef CONFIG_NF_CONNTRACK_ZONES
+	if (zone) {
+		struct nf_conntrack_zone *nf_ct_zone;
+
+		nf_ct_zone = nf_ct_ext_add(ct, NF_CT_EXT_ZONE, GFP_ATOMIC);
+		if (!nf_ct_zone)
+			goto out_free;
+		nf_ct_zone->id = zone;
+	}
+#endif
 	/*
 	 * changes to lookup keys must be done before setting refcnt to 1
 	 */
 	smp_wmb();
 	atomic_set(&ct->ct_general.use, 1);
 	return ct;
+
+#ifdef CONFIG_NF_CONNTRACK_ZONES
+out_free:
+	kmem_cache_free(net->ct.nf_conntrack_cachep, ct);
+	return ERR_PTR(-ENOMEM);
+#endif
 }
 EXPORT_SYMBOL_GPL(nf_conntrack_alloc);
 
@@ -631,13 +661,14 @@ init_conntrack(struct net *net, struct nf_conn *tmpl,
 	struct nf_conntrack_tuple repl_tuple;
 	struct nf_conntrack_ecache *ecache;
 	struct nf_conntrack_expect *exp;
+	u16 zone = tmpl ? nf_ct_zone(tmpl) : NF_CT_DEFAULT_ZONE;
 
 	if (!nf_ct_invert_tuple(&repl_tuple, tuple, l3proto, l4proto)) {
 		pr_debug("Can't invert tuple.\n");
 		return NULL;
 	}
 
-	ct = nf_conntrack_alloc(net, tuple, &repl_tuple, GFP_ATOMIC);
+	ct = nf_conntrack_alloc(net, zone, tuple, &repl_tuple, GFP_ATOMIC);
 	if (IS_ERR(ct)) {
 		pr_debug("Can't allocate conntrack.\n");
 		return (struct nf_conntrack_tuple_hash *)ct;
@@ -657,7 +688,7 @@ init_conntrack(struct net *net, struct nf_conn *tmpl,
 			     GFP_ATOMIC);
 
 	spin_lock_bh(&nf_conntrack_lock);
-	exp = nf_ct_find_expectation(net, tuple);
+	exp = nf_ct_find_expectation(net, zone, tuple);
 	if (exp) {
 		pr_debug("conntrack: expectation arrives ct=%p exp=%p\n",
 			 ct, exp);
@@ -713,6 +744,7 @@ resolve_normal_ct(struct net *net, struct nf_conn *tmpl,
 	struct nf_conntrack_tuple tuple;
 	struct nf_conntrack_tuple_hash *h;
 	struct nf_conn *ct;
+	u16 zone = tmpl ? nf_ct_zone(tmpl) : NF_CT_DEFAULT_ZONE;
 
 	if (!nf_ct_get_tuple(skb, skb_network_offset(skb),
 			     dataoff, l3num, protonum, &tuple, l3proto,
@@ -722,7 +754,7 @@ resolve_normal_ct(struct net *net, struct nf_conn *tmpl,
 	}
 
 	/* look for tuple match */
-	h = nf_conntrack_find_get(net, &tuple);
+	h = nf_conntrack_find_get(net, zone, &tuple);
 	if (!h) {
 		h = init_conntrack(net, tmpl, &tuple, l3proto, l4proto,
 				   skb, dataoff);
@@ -958,6 +990,14 @@ bool __nf_ct_kill_acct(struct nf_conn *ct,
 }
 EXPORT_SYMBOL_GPL(__nf_ct_kill_acct);
 
+#ifdef CONFIG_NF_CONNTRACK_ZONES
+static struct nf_ct_ext_type nf_ct_zone_extend __read_mostly = {
+	.len	= sizeof(struct nf_conntrack_zone),
+	.align	= __alignof__(struct nf_conntrack_zone),
+	.id	= NF_CT_EXT_ZONE,
+};
+#endif
+
 #if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE)
 
 #include <linux/netfilter/nfnetlink.h>
@@ -1139,6 +1179,9 @@ static void nf_conntrack_cleanup_init_net(void)
 
 	nf_conntrack_helper_fini();
 	nf_conntrack_proto_fini();
+#ifdef CONFIG_NF_CONNTRACK_ZONES
+	nf_ct_extend_unregister(&nf_ct_zone_extend);
+#endif
 }
 
 static void nf_conntrack_cleanup_net(struct net *net)
@@ -1214,6 +1257,7 @@ int nf_conntrack_set_hashsize(const char *val, struct kernel_param *kp)
 	unsigned int hashsize, old_size;
 	struct hlist_nulls_head *hash, *old_hash;
 	struct nf_conntrack_tuple_hash *h;
+	struct nf_conn *ct;
 
 	if (current->nsproxy->net_ns != &init_net)
 		return -EOPNOTSUPP;
@@ -1240,8 +1284,10 @@ int nf_conntrack_set_hashsize(const char *val, struct kernel_param *kp)
 		while (!hlist_nulls_empty(&init_net.ct.hash[i])) {
 			h = hlist_nulls_entry(init_net.ct.hash[i].first,
 					struct nf_conntrack_tuple_hash, hnnode);
+			ct = nf_ct_tuplehash_to_ctrack(h);
 			hlist_nulls_del_rcu(&h->hnnode);
-			bucket = __hash_conntrack(&h->tuple, hashsize,
+			bucket = __hash_conntrack(&h->tuple, nf_ct_zone(ct),
+						  hashsize,
 						  nf_conntrack_hash_rnd);
 			hlist_nulls_add_head_rcu(&h->hnnode, &hash[bucket]);
 		}
@@ -1299,6 +1345,11 @@ static int nf_conntrack_init_init_net(void)
 	if (ret < 0)
 		goto err_helper;
 
+#ifdef CONFIG_NF_CONNTRACK_ZONES
+	ret = nf_ct_extend_register(&nf_ct_zone_extend);
+	if (ret < 0)
+		goto err_extend;
+#endif
 	/* Set up fake conntrack: to never be deleted, not in any hashes */
 #ifdef CONFIG_NET_NS
 	nf_conntrack_untracked.ct_net = &init_net;
@@ -1309,6 +1360,10 @@ static int nf_conntrack_init_init_net(void)
 
 	return 0;
 
+#ifdef CONFIG_NF_CONNTRACK_ZONES
+err_extend:
+	nf_conntrack_helper_fini();
+#endif
 err_helper:
 	nf_conntrack_proto_fini();
 err_proto:
diff --git a/net/netfilter/nf_conntrack_expect.c b/net/netfilter/nf_conntrack_expect.c
index 6182fb1b55de..acb29ccaa41f 100644
--- a/net/netfilter/nf_conntrack_expect.c
+++ b/net/netfilter/nf_conntrack_expect.c
@@ -27,6 +27,7 @@
 #include <net/netfilter/nf_conntrack_expect.h>
 #include <net/netfilter/nf_conntrack_helper.h>
 #include <net/netfilter/nf_conntrack_tuple.h>
+#include <net/netfilter/nf_conntrack_zones.h>
 
 unsigned int nf_ct_expect_hsize __read_mostly;
 EXPORT_SYMBOL_GPL(nf_ct_expect_hsize);
@@ -84,7 +85,8 @@ static unsigned int nf_ct_expect_dst_hash(const struct nf_conntrack_tuple *tuple
 }
 
 struct nf_conntrack_expect *
-__nf_ct_expect_find(struct net *net, const struct nf_conntrack_tuple *tuple)
+__nf_ct_expect_find(struct net *net, u16 zone,
+		    const struct nf_conntrack_tuple *tuple)
 {
 	struct nf_conntrack_expect *i;
 	struct hlist_node *n;
@@ -95,7 +97,8 @@ __nf_ct_expect_find(struct net *net, const struct nf_conntrack_tuple *tuple)
 
 	h = nf_ct_expect_dst_hash(tuple);
 	hlist_for_each_entry_rcu(i, n, &net->ct.expect_hash[h], hnode) {
-		if (nf_ct_tuple_mask_cmp(tuple, &i->tuple, &i->mask))
+		if (nf_ct_tuple_mask_cmp(tuple, &i->tuple, &i->mask) &&
+		    nf_ct_zone(i->master) == zone)
 			return i;
 	}
 	return NULL;
@@ -104,12 +107,13 @@ EXPORT_SYMBOL_GPL(__nf_ct_expect_find);
 
 /* Just find a expectation corresponding to a tuple. */
 struct nf_conntrack_expect *
-nf_ct_expect_find_get(struct net *net, const struct nf_conntrack_tuple *tuple)
+nf_ct_expect_find_get(struct net *net, u16 zone,
+		      const struct nf_conntrack_tuple *tuple)
 {
 	struct nf_conntrack_expect *i;
 
 	rcu_read_lock();
-	i = __nf_ct_expect_find(net, tuple);
+	i = __nf_ct_expect_find(net, zone, tuple);
 	if (i && !atomic_inc_not_zero(&i->use))
 		i = NULL;
 	rcu_read_unlock();
@@ -121,7 +125,8 @@ EXPORT_SYMBOL_GPL(nf_ct_expect_find_get);
 /* If an expectation for this connection is found, it gets delete from
  * global list then returned. */
 struct nf_conntrack_expect *
-nf_ct_find_expectation(struct net *net, const struct nf_conntrack_tuple *tuple)
+nf_ct_find_expectation(struct net *net, u16 zone,
+		       const struct nf_conntrack_tuple *tuple)
 {
 	struct nf_conntrack_expect *i, *exp = NULL;
 	struct hlist_node *n;
@@ -133,7 +138,8 @@ nf_ct_find_expectation(struct net *net, const struct nf_conntrack_tuple *tuple)
 	h = nf_ct_expect_dst_hash(tuple);
 	hlist_for_each_entry(i, n, &net->ct.expect_hash[h], hnode) {
 		if (!(i->flags & NF_CT_EXPECT_INACTIVE) &&
-		    nf_ct_tuple_mask_cmp(tuple, &i->tuple, &i->mask)) {
+		    nf_ct_tuple_mask_cmp(tuple, &i->tuple, &i->mask) &&
+		    nf_ct_zone(i->master) == zone) {
 			exp = i;
 			break;
 		}
@@ -204,7 +210,8 @@ static inline int expect_matches(const struct nf_conntrack_expect *a,
 {
 	return a->master == b->master && a->class == b->class &&
 		nf_ct_tuple_equal(&a->tuple, &b->tuple) &&
-		nf_ct_tuple_mask_equal(&a->mask, &b->mask);
+		nf_ct_tuple_mask_equal(&a->mask, &b->mask) &&
+		nf_ct_zone(a->master) == nf_ct_zone(b->master);
 }
 
 /* Generally a bad idea to call this: could have matched already. */
diff --git a/net/netfilter/nf_conntrack_h323_main.c b/net/netfilter/nf_conntrack_h323_main.c
index 66369490230e..a1c8dd917e12 100644
--- a/net/netfilter/nf_conntrack_h323_main.c
+++ b/net/netfilter/nf_conntrack_h323_main.c
@@ -29,6 +29,7 @@
 #include <net/netfilter/nf_conntrack_expect.h>
 #include <net/netfilter/nf_conntrack_ecache.h>
 #include <net/netfilter/nf_conntrack_helper.h>
+#include <net/netfilter/nf_conntrack_zones.h>
 #include <linux/netfilter/nf_conntrack_h323.h>
 
 /* Parameters */
@@ -1216,7 +1217,7 @@ static struct nf_conntrack_expect *find_expect(struct nf_conn *ct,
 	tuple.dst.u.tcp.port = port;
 	tuple.dst.protonum = IPPROTO_TCP;
 
-	exp = __nf_ct_expect_find(net, &tuple);
+	exp = __nf_ct_expect_find(net, nf_ct_zone(ct), &tuple);
 	if (exp && exp->master == ct)
 		return exp;
 	return NULL;
diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index db35edac307b..51089cfe1167 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -811,7 +811,7 @@ ctnetlink_del_conntrack(struct sock *ctnl, struct sk_buff *skb,
 	if (err < 0)
 		return err;
 
-	h = nf_conntrack_find_get(net, &tuple);
+	h = nf_conntrack_find_get(net, 0, &tuple);
 	if (!h)
 		return -ENOENT;
 
@@ -872,7 +872,7 @@ ctnetlink_get_conntrack(struct sock *ctnl, struct sk_buff *skb,
 	if (err < 0)
 		return err;
 
-	h = nf_conntrack_find_get(net, &tuple);
+	h = nf_conntrack_find_get(net, 0, &tuple);
 	if (!h)
 		return -ENOENT;
 
@@ -1221,7 +1221,7 @@ ctnetlink_create_conntrack(struct net *net,
 	int err = -EINVAL;
 	struct nf_conntrack_helper *helper;
 
-	ct = nf_conntrack_alloc(net, otuple, rtuple, GFP_ATOMIC);
+	ct = nf_conntrack_alloc(net, 0, otuple, rtuple, GFP_ATOMIC);
 	if (IS_ERR(ct))
 		return ERR_PTR(-ENOMEM);
 
@@ -1325,7 +1325,7 @@ ctnetlink_create_conntrack(struct net *net,
 		if (err < 0)
 			goto err2;
 
-		master_h = nf_conntrack_find_get(net, &master);
+		master_h = nf_conntrack_find_get(net, 0, &master);
 		if (master_h == NULL) {
 			err = -ENOENT;
 			goto err2;
@@ -1374,9 +1374,9 @@ ctnetlink_new_conntrack(struct sock *ctnl, struct sk_buff *skb,
 
 	spin_lock_bh(&nf_conntrack_lock);
 	if (cda[CTA_TUPLE_ORIG])
-		h = __nf_conntrack_find(net, &otuple);
+		h = __nf_conntrack_find(net, 0, &otuple);
 	else if (cda[CTA_TUPLE_REPLY])
-		h = __nf_conntrack_find(net, &rtuple);
+		h = __nf_conntrack_find(net, 0, &rtuple);
 
 	if (h == NULL) {
 		err = -ENOENT;
@@ -1714,7 +1714,7 @@ ctnetlink_get_expect(struct sock *ctnl, struct sk_buff *skb,
 	if (err < 0)
 		return err;
 
-	exp = nf_ct_expect_find_get(net, &tuple);
+	exp = nf_ct_expect_find_get(net, 0, &tuple);
 	if (!exp)
 		return -ENOENT;
 
@@ -1770,7 +1770,7 @@ ctnetlink_del_expect(struct sock *ctnl, struct sk_buff *skb,
 			return err;
 
 		/* bump usage count to 2 */
-		exp = nf_ct_expect_find_get(net, &tuple);
+		exp = nf_ct_expect_find_get(net, 0, &tuple);
 		if (!exp)
 			return -ENOENT;
 
@@ -1855,7 +1855,7 @@ ctnetlink_create_expect(struct net *net, const struct nlattr * const cda[],
 		return err;
 
 	/* Look for master conntrack of this expectation */
-	h = nf_conntrack_find_get(net, &master_tuple);
+	h = nf_conntrack_find_get(net, 0, &master_tuple);
 	if (!h)
 		return -ENOENT;
 	ct = nf_ct_tuplehash_to_ctrack(h);
@@ -1912,7 +1912,7 @@ ctnetlink_new_expect(struct sock *ctnl, struct sk_buff *skb,
 		return err;
 
 	spin_lock_bh(&nf_conntrack_lock);
-	exp = __nf_ct_expect_find(net, &tuple);
+	exp = __nf_ct_expect_find(net, 0, &tuple);
 
 	if (!exp) {
 		spin_unlock_bh(&nf_conntrack_lock);
diff --git a/net/netfilter/nf_conntrack_pptp.c b/net/netfilter/nf_conntrack_pptp.c
index 3807ac7faf4c..088944824e13 100644
--- a/net/netfilter/nf_conntrack_pptp.c
+++ b/net/netfilter/nf_conntrack_pptp.c
@@ -28,6 +28,7 @@
 #include <net/netfilter/nf_conntrack.h>
 #include <net/netfilter/nf_conntrack_core.h>
 #include <net/netfilter/nf_conntrack_helper.h>
+#include <net/netfilter/nf_conntrack_zones.h>
 #include <linux/netfilter/nf_conntrack_proto_gre.h>
 #include <linux/netfilter/nf_conntrack_pptp.h>
 
@@ -123,7 +124,7 @@ static void pptp_expectfn(struct nf_conn *ct,
 		pr_debug("trying to unexpect other dir: ");
 		nf_ct_dump_tuple(&inv_t);
 
-		exp_other = nf_ct_expect_find_get(net, &inv_t);
+		exp_other = nf_ct_expect_find_get(net, nf_ct_zone(ct), &inv_t);
 		if (exp_other) {
 			/* delete other expectation.  */
 			pr_debug("found\n");
@@ -136,17 +137,18 @@ static void pptp_expectfn(struct nf_conn *ct,
 	rcu_read_unlock();
 }
 
-static int destroy_sibling_or_exp(struct net *net,
+static int destroy_sibling_or_exp(struct net *net, struct nf_conn *ct,
 				  const struct nf_conntrack_tuple *t)
 {
 	const struct nf_conntrack_tuple_hash *h;
 	struct nf_conntrack_expect *exp;
 	struct nf_conn *sibling;
+	u16 zone = nf_ct_zone(ct);
 
 	pr_debug("trying to timeout ct or exp for tuple ");
 	nf_ct_dump_tuple(t);
 
-	h = nf_conntrack_find_get(net, t);
+	h = nf_conntrack_find_get(net, zone, t);
 	if (h)  {
 		sibling = nf_ct_tuplehash_to_ctrack(h);
 		pr_debug("setting timeout of conntrack %p to 0\n", sibling);
@@ -157,7 +159,7 @@ static int destroy_sibling_or_exp(struct net *net,
 		nf_ct_put(sibling);
 		return 1;
 	} else {
-		exp = nf_ct_expect_find_get(net, t);
+		exp = nf_ct_expect_find_get(net, zone, t);
 		if (exp) {
 			pr_debug("unexpect_related of expect %p\n", exp);
 			nf_ct_unexpect_related(exp);
@@ -182,7 +184,7 @@ static void pptp_destroy_siblings(struct nf_conn *ct)
 	t.dst.protonum = IPPROTO_GRE;
 	t.src.u.gre.key = help->help.ct_pptp_info.pns_call_id;
 	t.dst.u.gre.key = help->help.ct_pptp_info.pac_call_id;
-	if (!destroy_sibling_or_exp(net, &t))
+	if (!destroy_sibling_or_exp(net, ct, &t))
 		pr_debug("failed to timeout original pns->pac ct/exp\n");
 
 	/* try reply (pac->pns) tuple */
@@ -190,7 +192,7 @@ static void pptp_destroy_siblings(struct nf_conn *ct)
 	t.dst.protonum = IPPROTO_GRE;
 	t.src.u.gre.key = help->help.ct_pptp_info.pac_call_id;
 	t.dst.u.gre.key = help->help.ct_pptp_info.pns_call_id;
-	if (!destroy_sibling_or_exp(net, &t))
+	if (!destroy_sibling_or_exp(net, ct, &t))
 		pr_debug("failed to timeout reply pac->pns ct/exp\n");
 }
 
diff --git a/net/netfilter/nf_conntrack_sip.c b/net/netfilter/nf_conntrack_sip.c
index fbe8ff5a420a..8dd75d90efc0 100644
--- a/net/netfilter/nf_conntrack_sip.c
+++ b/net/netfilter/nf_conntrack_sip.c
@@ -23,6 +23,7 @@
 #include <net/netfilter/nf_conntrack_core.h>
 #include <net/netfilter/nf_conntrack_expect.h>
 #include <net/netfilter/nf_conntrack_helper.h>
+#include <net/netfilter/nf_conntrack_zones.h>
 #include <linux/netfilter/nf_conntrack_sip.h>
 
 MODULE_LICENSE("GPL");
@@ -836,7 +837,7 @@ static int set_expected_rtp_rtcp(struct sk_buff *skb, unsigned int dataoff,
 
 	rcu_read_lock();
 	do {
-		exp = __nf_ct_expect_find(net, &tuple);
+		exp = __nf_ct_expect_find(net, nf_ct_zone(ct), &tuple);
 
 		if (!exp || exp->master == ct ||
 		    nfct_help(exp->master)->helper != nfct_help(ct)->helper ||
diff --git a/net/netfilter/nf_conntrack_standalone.c b/net/netfilter/nf_conntrack_standalone.c
index e310f1561bb2..24a42efe62ef 100644
--- a/net/netfilter/nf_conntrack_standalone.c
+++ b/net/netfilter/nf_conntrack_standalone.c
@@ -26,6 +26,7 @@
 #include <net/netfilter/nf_conntrack_expect.h>
 #include <net/netfilter/nf_conntrack_helper.h>
 #include <net/netfilter/nf_conntrack_acct.h>
+#include <net/netfilter/nf_conntrack_zones.h>
 
 MODULE_LICENSE("GPL");
 
@@ -171,6 +172,11 @@ static int ct_seq_show(struct seq_file *s, void *v)
 		goto release;
 #endif
 
+#ifdef CONFIG_NF_CONNTRACK_ZONES
+	if (seq_printf(s, "zone=%u ", nf_ct_zone(ct)))
+		goto release;
+#endif
+
 	if (seq_printf(s, "use=%u\n", atomic_read(&ct->ct_general.use)))
 		goto release;
 
diff --git a/net/netfilter/xt_CT.c b/net/netfilter/xt_CT.c
index 8183a054256f..61c50fa84703 100644
--- a/net/netfilter/xt_CT.c
+++ b/net/netfilter/xt_CT.c
@@ -16,6 +16,7 @@
 #include <net/netfilter/nf_conntrack.h>
 #include <net/netfilter/nf_conntrack_helper.h>
 #include <net/netfilter/nf_conntrack_ecache.h>
+#include <net/netfilter/nf_conntrack_zones.h>
 
 static unsigned int xt_ct_target(struct sk_buff *skb,
 				 const struct xt_target_param *par)
@@ -69,11 +70,16 @@ static bool xt_ct_tg_check(const struct xt_tgchk_param *par)
 		goto out;
 	}
 
+#ifndef CONFIG_NF_CONNTRACK_ZONES
+	if (info->zone)
+		goto err1;
+#endif
+
 	if (nf_ct_l3proto_try_module_get(par->family) < 0)
 		goto err1;
 
 	memset(&t, 0, sizeof(t));
-	ct = nf_conntrack_alloc(par->net, &t, &t, GFP_KERNEL);
+	ct = nf_conntrack_alloc(par->net, info->zone, &t, &t, GFP_KERNEL);
 	if (IS_ERR(ct))
 		goto err2;
 
diff --git a/net/netfilter/xt_connlimit.c b/net/netfilter/xt_connlimit.c
index 0d9d18ea2b09..26997ce90e48 100644
--- a/net/netfilter/xt_connlimit.c
+++ b/net/netfilter/xt_connlimit.c
@@ -28,6 +28,7 @@
 #include <net/netfilter/nf_conntrack.h>
 #include <net/netfilter/nf_conntrack_core.h>
 #include <net/netfilter/nf_conntrack_tuple.h>
+#include <net/netfilter/nf_conntrack_zones.h>
 
 /* we will save the tuples of all connections we care about */
 struct xt_connlimit_conn {
@@ -114,7 +115,8 @@ static int count_them(struct net *net,
 
 	/* check the saved connections */
 	list_for_each_entry_safe(conn, tmp, hash, list) {
-		found    = nf_conntrack_find_get(net, &conn->tuple);
+		found    = nf_conntrack_find_get(net, NF_CT_DEFAULT_ZONE,
+						 &conn->tuple);
 		found_ct = NULL;
 
 		if (found != NULL)
-- 
cgit v1.2.2


From ef00f89f1eb7e056aab9dfe068521e6f2320c94a Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Mon, 15 Feb 2010 18:14:57 +0100
Subject: netfilter: ctnetlink: add zone support

Parse and dump the conntrack zone in ctnetlink.

Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 net/netfilter/nf_conntrack_netlink.c | 92 ++++++++++++++++++++++++++++--------
 1 file changed, 73 insertions(+), 19 deletions(-)

(limited to 'net')

diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index 51089cfe1167..8b05f364b2f2 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -39,6 +39,7 @@
 #include <net/netfilter/nf_conntrack_l4proto.h>
 #include <net/netfilter/nf_conntrack_tuple.h>
 #include <net/netfilter/nf_conntrack_acct.h>
+#include <net/netfilter/nf_conntrack_zones.h>
 #ifdef CONFIG_NF_NAT_NEEDED
 #include <net/netfilter/nf_nat_core.h>
 #include <net/netfilter/nf_nat_protocol.h>
@@ -379,6 +380,9 @@ ctnetlink_fill_info(struct sk_buff *skb, u32 pid, u32 seq,
 		goto nla_put_failure;
 	nla_nest_end(skb, nest_parms);
 
+	if (nf_ct_zone(ct))
+		NLA_PUT_BE16(skb, CTA_ZONE, htons(nf_ct_zone(ct)));
+
 	if (ctnetlink_dump_status(skb, ct) < 0 ||
 	    ctnetlink_dump_timeout(skb, ct) < 0 ||
 	    ctnetlink_dump_counters(skb, ct, IP_CT_DIR_ORIGINAL) < 0 ||
@@ -517,6 +521,9 @@ ctnetlink_conntrack_event(unsigned int events, struct nf_ct_event *item)
 		goto nla_put_failure;
 	nla_nest_end(skb, nest_parms);
 
+	if (nf_ct_zone(ct))
+		NLA_PUT_BE16(skb, CTA_ZONE, htons(nf_ct_zone(ct)));
+
 	if (ctnetlink_dump_id(skb, ct) < 0)
 		goto nla_put_failure;
 
@@ -750,6 +757,21 @@ ctnetlink_parse_tuple(const struct nlattr * const cda[],
 	return 0;
 }
 
+static int
+ctnetlink_parse_zone(const struct nlattr *attr, u16 *zone)
+{
+	if (attr)
+#ifdef CONFIG_NF_CONNTRACK_ZONES
+		*zone = ntohs(nla_get_be16(attr));
+#else
+		return -EOPNOTSUPP;
+#endif
+	else
+		*zone = 0;
+
+	return 0;
+}
+
 static const struct nla_policy help_nla_policy[CTA_HELP_MAX+1] = {
 	[CTA_HELP_NAME]		= { .type = NLA_NUL_STRING },
 };
@@ -781,6 +803,7 @@ static const struct nla_policy ct_nla_policy[CTA_MAX+1] = {
 	[CTA_ID]		= { .type = NLA_U32 },
 	[CTA_NAT_DST]		= { .type = NLA_NESTED },
 	[CTA_TUPLE_MASTER]	= { .type = NLA_NESTED },
+	[CTA_ZONE]		= { .type = NLA_U16 },
 };
 
 static int
@@ -794,7 +817,12 @@ ctnetlink_del_conntrack(struct sock *ctnl, struct sk_buff *skb,
 	struct nf_conn *ct;
 	struct nfgenmsg *nfmsg = nlmsg_data(nlh);
 	u_int8_t u3 = nfmsg->nfgen_family;
-	int err = 0;
+	u16 zone;
+	int err;
+
+	err = ctnetlink_parse_zone(cda[CTA_ZONE], &zone);
+	if (err < 0)
+		return err;
 
 	if (cda[CTA_TUPLE_ORIG])
 		err = ctnetlink_parse_tuple(cda, &tuple, CTA_TUPLE_ORIG, u3);
@@ -811,7 +839,7 @@ ctnetlink_del_conntrack(struct sock *ctnl, struct sk_buff *skb,
 	if (err < 0)
 		return err;
 
-	h = nf_conntrack_find_get(net, 0, &tuple);
+	h = nf_conntrack_find_get(net, zone, &tuple);
 	if (!h)
 		return -ENOENT;
 
@@ -856,12 +884,17 @@ ctnetlink_get_conntrack(struct sock *ctnl, struct sk_buff *skb,
 	struct sk_buff *skb2 = NULL;
 	struct nfgenmsg *nfmsg = nlmsg_data(nlh);
 	u_int8_t u3 = nfmsg->nfgen_family;
-	int err = 0;
+	u16 zone;
+	int err;
 
 	if (nlh->nlmsg_flags & NLM_F_DUMP)
 		return netlink_dump_start(ctnl, skb, nlh, ctnetlink_dump_table,
 					  ctnetlink_done);
 
+	err = ctnetlink_parse_zone(cda[CTA_ZONE], &zone);
+	if (err < 0)
+		return err;
+
 	if (cda[CTA_TUPLE_ORIG])
 		err = ctnetlink_parse_tuple(cda, &tuple, CTA_TUPLE_ORIG, u3);
 	else if (cda[CTA_TUPLE_REPLY])
@@ -872,7 +905,7 @@ ctnetlink_get_conntrack(struct sock *ctnl, struct sk_buff *skb,
 	if (err < 0)
 		return err;
 
-	h = nf_conntrack_find_get(net, 0, &tuple);
+	h = nf_conntrack_find_get(net, zone, &tuple);
 	if (!h)
 		return -ENOENT;
 
@@ -1211,7 +1244,7 @@ ctnetlink_change_conntrack(struct nf_conn *ct,
 }
 
 static struct nf_conn *
-ctnetlink_create_conntrack(struct net *net,
+ctnetlink_create_conntrack(struct net *net, u16 zone,
 			   const struct nlattr * const cda[],
 			   struct nf_conntrack_tuple *otuple,
 			   struct nf_conntrack_tuple *rtuple,
@@ -1221,7 +1254,7 @@ ctnetlink_create_conntrack(struct net *net,
 	int err = -EINVAL;
 	struct nf_conntrack_helper *helper;
 
-	ct = nf_conntrack_alloc(net, 0, otuple, rtuple, GFP_ATOMIC);
+	ct = nf_conntrack_alloc(net, zone, otuple, rtuple, GFP_ATOMIC);
 	if (IS_ERR(ct))
 		return ERR_PTR(-ENOMEM);
 
@@ -1325,7 +1358,7 @@ ctnetlink_create_conntrack(struct net *net,
 		if (err < 0)
 			goto err2;
 
-		master_h = nf_conntrack_find_get(net, 0, &master);
+		master_h = nf_conntrack_find_get(net, zone, &master);
 		if (master_h == NULL) {
 			err = -ENOENT;
 			goto err2;
@@ -1358,7 +1391,12 @@ ctnetlink_new_conntrack(struct sock *ctnl, struct sk_buff *skb,
 	struct nf_conntrack_tuple_hash *h = NULL;
 	struct nfgenmsg *nfmsg = nlmsg_data(nlh);
 	u_int8_t u3 = nfmsg->nfgen_family;
-	int err = 0;
+	u16 zone;
+	int err;
+
+	err = ctnetlink_parse_zone(cda[CTA_ZONE], &zone);
+	if (err < 0)
+		return err;
 
 	if (cda[CTA_TUPLE_ORIG]) {
 		err = ctnetlink_parse_tuple(cda, &otuple, CTA_TUPLE_ORIG, u3);
@@ -1374,9 +1412,9 @@ ctnetlink_new_conntrack(struct sock *ctnl, struct sk_buff *skb,
 
 	spin_lock_bh(&nf_conntrack_lock);
 	if (cda[CTA_TUPLE_ORIG])
-		h = __nf_conntrack_find(net, 0, &otuple);
+		h = __nf_conntrack_find(net, zone, &otuple);
 	else if (cda[CTA_TUPLE_REPLY])
-		h = __nf_conntrack_find(net, 0, &rtuple);
+		h = __nf_conntrack_find(net, zone, &rtuple);
 
 	if (h == NULL) {
 		err = -ENOENT;
@@ -1384,7 +1422,7 @@ ctnetlink_new_conntrack(struct sock *ctnl, struct sk_buff *skb,
 			struct nf_conn *ct;
 			enum ip_conntrack_events events;
 
-			ct = ctnetlink_create_conntrack(net, cda, &otuple,
+			ct = ctnetlink_create_conntrack(net, zone, cda, &otuple,
 							&rtuple, u3);
 			if (IS_ERR(ct)) {
 				err = PTR_ERR(ct);
@@ -1698,7 +1736,8 @@ ctnetlink_get_expect(struct sock *ctnl, struct sk_buff *skb,
 	struct sk_buff *skb2;
 	struct nfgenmsg *nfmsg = nlmsg_data(nlh);
 	u_int8_t u3 = nfmsg->nfgen_family;
-	int err = 0;
+	u16 zone;
+	int err;
 
 	if (nlh->nlmsg_flags & NLM_F_DUMP) {
 		return netlink_dump_start(ctnl, skb, nlh,
@@ -1706,6 +1745,10 @@ ctnetlink_get_expect(struct sock *ctnl, struct sk_buff *skb,
 					  ctnetlink_exp_done);
 	}
 
+	err = ctnetlink_parse_zone(cda[CTA_EXPECT_ZONE], &zone);
+	if (err < 0)
+		return err;
+
 	if (cda[CTA_EXPECT_MASTER])
 		err = ctnetlink_parse_tuple(cda, &tuple, CTA_EXPECT_MASTER, u3);
 	else
@@ -1714,7 +1757,7 @@ ctnetlink_get_expect(struct sock *ctnl, struct sk_buff *skb,
 	if (err < 0)
 		return err;
 
-	exp = nf_ct_expect_find_get(net, 0, &tuple);
+	exp = nf_ct_expect_find_get(net, zone, &tuple);
 	if (!exp)
 		return -ENOENT;
 
@@ -1761,16 +1804,21 @@ ctnetlink_del_expect(struct sock *ctnl, struct sk_buff *skb,
 	struct hlist_node *n, *next;
 	u_int8_t u3 = nfmsg->nfgen_family;
 	unsigned int i;
+	u16 zone;
 	int err;
 
 	if (cda[CTA_EXPECT_TUPLE]) {
 		/* delete a single expect by tuple */
+		err = ctnetlink_parse_zone(cda[CTA_EXPECT_ZONE], &zone);
+		if (err < 0)
+			return err;
+
 		err = ctnetlink_parse_tuple(cda, &tuple, CTA_EXPECT_TUPLE, u3);
 		if (err < 0)
 			return err;
 
 		/* bump usage count to 2 */
-		exp = nf_ct_expect_find_get(net, 0, &tuple);
+		exp = nf_ct_expect_find_get(net, zone, &tuple);
 		if (!exp)
 			return -ENOENT;
 
@@ -1832,7 +1880,8 @@ ctnetlink_change_expect(struct nf_conntrack_expect *x,
 }
 
 static int
-ctnetlink_create_expect(struct net *net, const struct nlattr * const cda[],
+ctnetlink_create_expect(struct net *net, u16 zone,
+			const struct nlattr * const cda[],
 			u_int8_t u3,
 			u32 pid, int report)
 {
@@ -1855,7 +1904,7 @@ ctnetlink_create_expect(struct net *net, const struct nlattr * const cda[],
 		return err;
 
 	/* Look for master conntrack of this expectation */
-	h = nf_conntrack_find_get(net, 0, &master_tuple);
+	h = nf_conntrack_find_get(net, zone, &master_tuple);
 	if (!h)
 		return -ENOENT;
 	ct = nf_ct_tuplehash_to_ctrack(h);
@@ -1900,25 +1949,30 @@ ctnetlink_new_expect(struct sock *ctnl, struct sk_buff *skb,
 	struct nf_conntrack_expect *exp;
 	struct nfgenmsg *nfmsg = nlmsg_data(nlh);
 	u_int8_t u3 = nfmsg->nfgen_family;
-	int err = 0;
+	u16 zone;
+	int err;
 
 	if (!cda[CTA_EXPECT_TUPLE]
 	    || !cda[CTA_EXPECT_MASK]
 	    || !cda[CTA_EXPECT_MASTER])
 		return -EINVAL;
 
+	err = ctnetlink_parse_zone(cda[CTA_EXPECT_ZONE], &zone);
+	if (err < 0)
+		return err;
+
 	err = ctnetlink_parse_tuple(cda, &tuple, CTA_EXPECT_TUPLE, u3);
 	if (err < 0)
 		return err;
 
 	spin_lock_bh(&nf_conntrack_lock);
-	exp = __nf_ct_expect_find(net, 0, &tuple);
+	exp = __nf_ct_expect_find(net, zone, &tuple);
 
 	if (!exp) {
 		spin_unlock_bh(&nf_conntrack_lock);
 		err = -ENOENT;
 		if (nlh->nlmsg_flags & NLM_F_CREATE) {
-			err = ctnetlink_create_expect(net, cda,
+			err = ctnetlink_create_expect(net, zone, cda,
 						      u3,
 						      NETLINK_CB(skb).pid,
 						      nlmsg_report(nlh));
-- 
cgit v1.2.2


From 1756de262e41112a8a8927808eb2f03d21fd4786 Mon Sep 17 00:00:00 2001
From: Florian Westphal <fwestphal@astaro.com>
Date: Mon, 15 Feb 2010 18:15:55 +0100
Subject: netfilter: ebtables: abort if next_offset is too small

next_offset must be > 0, otherwise this loops forever.
The offset also contains the size of the ebt_entry structure
itself, so anything smaller is invalid.

Signed-off-by: Florian Westphal <fwestphal@astaro.com>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 net/bridge/netfilter/ebtables.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'net')

diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c
index bcdf02d866b8..4370e9680487 100644
--- a/net/bridge/netfilter/ebtables.c
+++ b/net/bridge/netfilter/ebtables.c
@@ -444,6 +444,8 @@ static int ebt_verify_pointers(const struct ebt_replace *repl,
 				break;
 			if (left < e->next_offset)
 				break;
+			if (e->next_offset < sizeof(struct ebt_entry))
+				return -EINVAL;
 			offset += e->next_offset;
 		}
 	}
-- 
cgit v1.2.2


From fc0e3df4f00a5f62c2f2fce84bf496136b58c474 Mon Sep 17 00:00:00 2001
From: Florian Westphal <fwestphal@astaro.com>
Date: Mon, 15 Feb 2010 18:16:26 +0100
Subject: netfilter: ebtables: avoid explicit XT_ALIGN() in match/targets

This will cause trouble once CONFIG_COMPAT support is added to ebtables.
xt_compat_*_offset() calculate the kernel/userland structure size delta
using:

XT_ALIGN(size) - COMPAT_XT_ALIGN(size)

If the match/target sizes are aligned at registration time,
delta is always zero.

Should have zero effect for existing systems: xtables uses
XT_ALIGN() whenever it deals with match/target sizes.

Signed-off-by: Florian Westphal <fwestphal@astaro.com>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 net/bridge/netfilter/ebt_802_3.c    | 2 +-
 net/bridge/netfilter/ebt_arp.c      | 2 +-
 net/bridge/netfilter/ebt_arpreply.c | 2 +-
 net/bridge/netfilter/ebt_dnat.c     | 2 +-
 net/bridge/netfilter/ebt_ip.c       | 2 +-
 net/bridge/netfilter/ebt_ip6.c      | 2 +-
 net/bridge/netfilter/ebt_limit.c    | 2 +-
 net/bridge/netfilter/ebt_log.c      | 2 +-
 net/bridge/netfilter/ebt_mark.c     | 2 +-
 net/bridge/netfilter/ebt_mark_m.c   | 2 +-
 net/bridge/netfilter/ebt_nflog.c    | 2 +-
 net/bridge/netfilter/ebt_pkttype.c  | 2 +-
 net/bridge/netfilter/ebt_redirect.c | 2 +-
 net/bridge/netfilter/ebt_snat.c     | 2 +-
 net/bridge/netfilter/ebt_stp.c      | 2 +-
 net/bridge/netfilter/ebt_ulog.c     | 2 +-
 net/bridge/netfilter/ebt_vlan.c     | 2 +-
 17 files changed, 17 insertions(+), 17 deletions(-)

(limited to 'net')

diff --git a/net/bridge/netfilter/ebt_802_3.c b/net/bridge/netfilter/ebt_802_3.c
index bd91dc58d49b..5d1176758ca5 100644
--- a/net/bridge/netfilter/ebt_802_3.c
+++ b/net/bridge/netfilter/ebt_802_3.c
@@ -52,7 +52,7 @@ static struct xt_match ebt_802_3_mt_reg __read_mostly = {
 	.family		= NFPROTO_BRIDGE,
 	.match		= ebt_802_3_mt,
 	.checkentry	= ebt_802_3_mt_check,
-	.matchsize	= XT_ALIGN(sizeof(struct ebt_802_3_info)),
+	.matchsize	= sizeof(struct ebt_802_3_info),
 	.me		= THIS_MODULE,
 };
 
diff --git a/net/bridge/netfilter/ebt_arp.c b/net/bridge/netfilter/ebt_arp.c
index b7ad60419f9a..e727697c5847 100644
--- a/net/bridge/netfilter/ebt_arp.c
+++ b/net/bridge/netfilter/ebt_arp.c
@@ -120,7 +120,7 @@ static struct xt_match ebt_arp_mt_reg __read_mostly = {
 	.family		= NFPROTO_BRIDGE,
 	.match		= ebt_arp_mt,
 	.checkentry	= ebt_arp_mt_check,
-	.matchsize	= XT_ALIGN(sizeof(struct ebt_arp_info)),
+	.matchsize	= sizeof(struct ebt_arp_info),
 	.me		= THIS_MODULE,
 };
 
diff --git a/net/bridge/netfilter/ebt_arpreply.c b/net/bridge/netfilter/ebt_arpreply.c
index 76584cd72e57..f392e9d93f53 100644
--- a/net/bridge/netfilter/ebt_arpreply.c
+++ b/net/bridge/netfilter/ebt_arpreply.c
@@ -78,7 +78,7 @@ static struct xt_target ebt_arpreply_tg_reg __read_mostly = {
 	.hooks		= (1 << NF_BR_NUMHOOKS) | (1 << NF_BR_PRE_ROUTING),
 	.target		= ebt_arpreply_tg,
 	.checkentry	= ebt_arpreply_tg_check,
-	.targetsize	= XT_ALIGN(sizeof(struct ebt_arpreply_info)),
+	.targetsize	= sizeof(struct ebt_arpreply_info),
 	.me		= THIS_MODULE,
 };
 
diff --git a/net/bridge/netfilter/ebt_dnat.c b/net/bridge/netfilter/ebt_dnat.c
index 6b49ea9e31fb..2bb40d728a35 100644
--- a/net/bridge/netfilter/ebt_dnat.c
+++ b/net/bridge/netfilter/ebt_dnat.c
@@ -54,7 +54,7 @@ static struct xt_target ebt_dnat_tg_reg __read_mostly = {
 			  (1 << NF_BR_LOCAL_OUT) | (1 << NF_BR_BROUTING),
 	.target		= ebt_dnat_tg,
 	.checkentry	= ebt_dnat_tg_check,
-	.targetsize	= XT_ALIGN(sizeof(struct ebt_nat_info)),
+	.targetsize	= sizeof(struct ebt_nat_info),
 	.me		= THIS_MODULE,
 };
 
diff --git a/net/bridge/netfilter/ebt_ip.c b/net/bridge/netfilter/ebt_ip.c
index d771bbfbcbe6..5de6df6f86b8 100644
--- a/net/bridge/netfilter/ebt_ip.c
+++ b/net/bridge/netfilter/ebt_ip.c
@@ -110,7 +110,7 @@ static struct xt_match ebt_ip_mt_reg __read_mostly = {
 	.family		= NFPROTO_BRIDGE,
 	.match		= ebt_ip_mt,
 	.checkentry	= ebt_ip_mt_check,
-	.matchsize	= XT_ALIGN(sizeof(struct ebt_ip_info)),
+	.matchsize	= sizeof(struct ebt_ip_info),
 	.me		= THIS_MODULE,
 };
 
diff --git a/net/bridge/netfilter/ebt_ip6.c b/net/bridge/netfilter/ebt_ip6.c
index 784a6573876c..bbf2534ef026 100644
--- a/net/bridge/netfilter/ebt_ip6.c
+++ b/net/bridge/netfilter/ebt_ip6.c
@@ -122,7 +122,7 @@ static struct xt_match ebt_ip6_mt_reg __read_mostly = {
 	.family		= NFPROTO_BRIDGE,
 	.match		= ebt_ip6_mt,
 	.checkentry	= ebt_ip6_mt_check,
-	.matchsize	= XT_ALIGN(sizeof(struct ebt_ip6_info)),
+	.matchsize	= sizeof(struct ebt_ip6_info),
 	.me		= THIS_MODULE,
 };
 
diff --git a/net/bridge/netfilter/ebt_limit.c b/net/bridge/netfilter/ebt_limit.c
index f7bd9192ff0c..9dd16e6b10e7 100644
--- a/net/bridge/netfilter/ebt_limit.c
+++ b/net/bridge/netfilter/ebt_limit.c
@@ -90,7 +90,7 @@ static struct xt_match ebt_limit_mt_reg __read_mostly = {
 	.family		= NFPROTO_BRIDGE,
 	.match		= ebt_limit_mt,
 	.checkentry	= ebt_limit_mt_check,
-	.matchsize	= XT_ALIGN(sizeof(struct ebt_limit_info)),
+	.matchsize	= sizeof(struct ebt_limit_info),
 	.me		= THIS_MODULE,
 };
 
diff --git a/net/bridge/netfilter/ebt_log.c b/net/bridge/netfilter/ebt_log.c
index e4ea3fdd1d41..e873924ddb5d 100644
--- a/net/bridge/netfilter/ebt_log.c
+++ b/net/bridge/netfilter/ebt_log.c
@@ -195,7 +195,7 @@ static struct xt_target ebt_log_tg_reg __read_mostly = {
 	.family		= NFPROTO_BRIDGE,
 	.target		= ebt_log_tg,
 	.checkentry	= ebt_log_tg_check,
-	.targetsize	= XT_ALIGN(sizeof(struct ebt_log_info)),
+	.targetsize	= sizeof(struct ebt_log_info),
 	.me		= THIS_MODULE,
 };
 
diff --git a/net/bridge/netfilter/ebt_mark.c b/net/bridge/netfilter/ebt_mark.c
index 2fee7e8e2e93..153e167374a2 100644
--- a/net/bridge/netfilter/ebt_mark.c
+++ b/net/bridge/netfilter/ebt_mark.c
@@ -59,7 +59,7 @@ static struct xt_target ebt_mark_tg_reg __read_mostly = {
 	.family		= NFPROTO_BRIDGE,
 	.target		= ebt_mark_tg,
 	.checkentry	= ebt_mark_tg_check,
-	.targetsize	= XT_ALIGN(sizeof(struct ebt_mark_t_info)),
+	.targetsize	= sizeof(struct ebt_mark_t_info),
 	.me		= THIS_MODULE,
 };
 
diff --git a/net/bridge/netfilter/ebt_mark_m.c b/net/bridge/netfilter/ebt_mark_m.c
index ea570f214b1d..89abf4030399 100644
--- a/net/bridge/netfilter/ebt_mark_m.c
+++ b/net/bridge/netfilter/ebt_mark_m.c
@@ -41,7 +41,7 @@ static struct xt_match ebt_mark_mt_reg __read_mostly = {
 	.family		= NFPROTO_BRIDGE,
 	.match		= ebt_mark_mt,
 	.checkentry	= ebt_mark_mt_check,
-	.matchsize	= XT_ALIGN(sizeof(struct ebt_mark_m_info)),
+	.matchsize	= sizeof(struct ebt_mark_m_info),
 	.me		= THIS_MODULE,
 };
 
diff --git a/net/bridge/netfilter/ebt_nflog.c b/net/bridge/netfilter/ebt_nflog.c
index 2a63d996dd4e..40dbd248b9ae 100644
--- a/net/bridge/netfilter/ebt_nflog.c
+++ b/net/bridge/netfilter/ebt_nflog.c
@@ -51,7 +51,7 @@ static struct xt_target ebt_nflog_tg_reg __read_mostly = {
 	.family     = NFPROTO_BRIDGE,
 	.target     = ebt_nflog_tg,
 	.checkentry = ebt_nflog_tg_check,
-	.targetsize = XT_ALIGN(sizeof(struct ebt_nflog_info)),
+	.targetsize = sizeof(struct ebt_nflog_info),
 	.me         = THIS_MODULE,
 };
 
diff --git a/net/bridge/netfilter/ebt_pkttype.c b/net/bridge/netfilter/ebt_pkttype.c
index 883e96e2a542..e2a07e6cbef3 100644
--- a/net/bridge/netfilter/ebt_pkttype.c
+++ b/net/bridge/netfilter/ebt_pkttype.c
@@ -36,7 +36,7 @@ static struct xt_match ebt_pkttype_mt_reg __read_mostly = {
 	.family		= NFPROTO_BRIDGE,
 	.match		= ebt_pkttype_mt,
 	.checkentry	= ebt_pkttype_mt_check,
-	.matchsize	= XT_ALIGN(sizeof(struct ebt_pkttype_info)),
+	.matchsize	= sizeof(struct ebt_pkttype_info),
 	.me		= THIS_MODULE,
 };
 
diff --git a/net/bridge/netfilter/ebt_redirect.c b/net/bridge/netfilter/ebt_redirect.c
index c8a49f7a57ba..9be8fbcd370b 100644
--- a/net/bridge/netfilter/ebt_redirect.c
+++ b/net/bridge/netfilter/ebt_redirect.c
@@ -59,7 +59,7 @@ static struct xt_target ebt_redirect_tg_reg __read_mostly = {
 			  (1 << NF_BR_BROUTING),
 	.target		= ebt_redirect_tg,
 	.checkentry	= ebt_redirect_tg_check,
-	.targetsize	= XT_ALIGN(sizeof(struct ebt_redirect_info)),
+	.targetsize	= sizeof(struct ebt_redirect_info),
 	.me		= THIS_MODULE,
 };
 
diff --git a/net/bridge/netfilter/ebt_snat.c b/net/bridge/netfilter/ebt_snat.c
index 8d04d4c302bd..9c7b520765a2 100644
--- a/net/bridge/netfilter/ebt_snat.c
+++ b/net/bridge/netfilter/ebt_snat.c
@@ -67,7 +67,7 @@ static struct xt_target ebt_snat_tg_reg __read_mostly = {
 	.hooks		= (1 << NF_BR_NUMHOOKS) | (1 << NF_BR_POST_ROUTING),
 	.target		= ebt_snat_tg,
 	.checkentry	= ebt_snat_tg_check,
-	.targetsize	= XT_ALIGN(sizeof(struct ebt_nat_info)),
+	.targetsize	= sizeof(struct ebt_nat_info),
 	.me		= THIS_MODULE,
 };
 
diff --git a/net/bridge/netfilter/ebt_stp.c b/net/bridge/netfilter/ebt_stp.c
index 75e29a9cebda..92a93d363765 100644
--- a/net/bridge/netfilter/ebt_stp.c
+++ b/net/bridge/netfilter/ebt_stp.c
@@ -177,7 +177,7 @@ static struct xt_match ebt_stp_mt_reg __read_mostly = {
 	.family		= NFPROTO_BRIDGE,
 	.match		= ebt_stp_mt,
 	.checkentry	= ebt_stp_mt_check,
-	.matchsize	= XT_ALIGN(sizeof(struct ebt_stp_info)),
+	.matchsize	= sizeof(struct ebt_stp_info),
 	.me		= THIS_MODULE,
 };
 
diff --git a/net/bridge/netfilter/ebt_ulog.c b/net/bridge/netfilter/ebt_ulog.c
index ce50688a6431..c6ac657074a6 100644
--- a/net/bridge/netfilter/ebt_ulog.c
+++ b/net/bridge/netfilter/ebt_ulog.c
@@ -275,7 +275,7 @@ static struct xt_target ebt_ulog_tg_reg __read_mostly = {
 	.family		= NFPROTO_BRIDGE,
 	.target		= ebt_ulog_tg,
 	.checkentry	= ebt_ulog_tg_check,
-	.targetsize	= XT_ALIGN(sizeof(struct ebt_ulog_info)),
+	.targetsize	= sizeof(struct ebt_ulog_info),
 	.me		= THIS_MODULE,
 };
 
diff --git a/net/bridge/netfilter/ebt_vlan.c b/net/bridge/netfilter/ebt_vlan.c
index 3dddd489328e..be1dd2e1f615 100644
--- a/net/bridge/netfilter/ebt_vlan.c
+++ b/net/bridge/netfilter/ebt_vlan.c
@@ -163,7 +163,7 @@ static struct xt_match ebt_vlan_mt_reg __read_mostly = {
 	.family		= NFPROTO_BRIDGE,
 	.match		= ebt_vlan_mt,
 	.checkentry	= ebt_vlan_mt_check,
-	.matchsize	= XT_ALIGN(sizeof(struct ebt_vlan_info)),
+	.matchsize	= sizeof(struct ebt_vlan_info),
 	.me		= THIS_MODULE,
 };
 
-- 
cgit v1.2.2


From 3e5e524ffb5fcf2447eb5dd9f8e54ad22dd9baa7 Mon Sep 17 00:00:00 2001
From: Florian Westphal <fwestphal@astaro.com>
Date: Mon, 15 Feb 2010 18:17:10 +0100
Subject: netfilter: CONFIG_COMPAT: allow delta to exceed 32767

with 32 bit userland and 64 bit kernels, it is unlikely but possible
that insertion of new rules fails even tough there are only about 2000
iptables rules.

This happens because the compat delta is using a short int.
Easily reproducible via "iptables -m limit" ; after about 2050
rules inserting new ones fails with -ELOOP.

Note that compat_delta included 2 bytes of padding on x86_64, so
structure size remains the same.

Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 net/netfilter/x_tables.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'net')

diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c
index 69c56287d518..0a12cedfe9e3 100644
--- a/net/netfilter/x_tables.c
+++ b/net/netfilter/x_tables.c
@@ -39,7 +39,7 @@ MODULE_DESCRIPTION("{ip,ip6,arp,eb}_tables backend module");
 struct compat_delta {
 	struct compat_delta *next;
 	unsigned int offset;
-	short delta;
+	int delta;
 };
 
 struct xt_af {
@@ -439,10 +439,10 @@ void xt_compat_flush_offsets(u_int8_t af)
 }
 EXPORT_SYMBOL_GPL(xt_compat_flush_offsets);
 
-short xt_compat_calc_jump(u_int8_t af, unsigned int offset)
+int xt_compat_calc_jump(u_int8_t af, unsigned int offset)
 {
 	struct compat_delta *tmp;
-	short delta;
+	int delta;
 
 	for (tmp = xt[af].compat_offsets, delta = 0; tmp; tmp = tmp->next)
 		if (tmp->offset < offset)
-- 
cgit v1.2.2


From 2f5265e6e785b2a666dd985ea157bc8c260be8fa Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes@sipsolutions.net>
Date: Fri, 12 Feb 2010 10:45:05 +0100
Subject: mac80211: fix netdev rename

Fix a copy bug introduced by

    commit 47846c9b0c10808d9337d2e7d09361f3e0a0a71a
    Author: Johannes Berg <johannes@sipsolutions.net>
    Date:   Wed Nov 25 17:46:19 2009 +0100

        mac80211: reduce reliance on netdev

This manifested itself only in debug messages
and in the debugfs rename failure that would
always happen due to trying to rename the dir
over itself.

Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
Tested-by: Pavel Roskin <proski@gnu.org>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/iface.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c
index 09fff4662e80..0793d7a8d743 100644
--- a/net/mac80211/iface.c
+++ b/net/mac80211/iface.c
@@ -1031,7 +1031,7 @@ static int netdev_notify(struct notifier_block *nb,
 
 	sdata = IEEE80211_DEV_TO_SUB_IF(dev);
 
-	memcpy(sdata->name, sdata->name, IFNAMSIZ);
+	memcpy(sdata->name, dev->name, IFNAMSIZ);
 
 	ieee80211_debugfs_rename_netdev(sdata);
 	return 0;
-- 
cgit v1.2.2


From 0e956c132f822d414a4ce84726ac1d1294364581 Mon Sep 17 00:00:00 2001
From: Thadeu Lima de Souza Cascardo <cascardo@holoscopio.com>
Date: Fri, 12 Feb 2010 12:34:50 -0200
Subject: nl80211: does not allow NEW_STATION and DEL_STATION for mesh

As discussed in linux-wireless mailing list, adding and removing
stations for mesh topologies is not necessary. Since doing it triggers
bugs, the sugestion was to simply disable it.

Tested using a custom iw command "station new". Works only after using
hostapd. "station del" command also works.

Signed-off-by: Thadeu Lima de Souza Cascardo <cascardo@holoscopio.com>
Cc: Johannes Berg <johannes@sipsolutions.net>
Cc: Simon Raffeiner <sturmflut@lieberbiber.de>
Cc: Andrey Yurovsky <andrey@cozybit.com>
Cc: Javier Cardona <javier@cozybit.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/wireless/nl80211.c | 49 +++++++++++++------------------------------------
 1 file changed, 13 insertions(+), 36 deletions(-)

(limited to 'net')

diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index 5b79ecf17bea..a95ab9e4c19e 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -2010,6 +2010,9 @@ static int nl80211_new_station(struct sk_buff *skb, struct genl_info *info)
 	if (!info->attrs[NL80211_ATTR_STA_SUPPORTED_RATES])
 		return -EINVAL;
 
+	if (!info->attrs[NL80211_ATTR_STA_AID])
+		return -EINVAL;
+
 	mac_addr = nla_data(info->attrs[NL80211_ATTR_MAC]);
 	params.supported_rates =
 		nla_data(info->attrs[NL80211_ATTR_STA_SUPPORTED_RATES]);
@@ -2018,11 +2021,9 @@ static int nl80211_new_station(struct sk_buff *skb, struct genl_info *info)
 	params.listen_interval =
 		nla_get_u16(info->attrs[NL80211_ATTR_STA_LISTEN_INTERVAL]);
 
-	if (info->attrs[NL80211_ATTR_STA_AID]) {
-		params.aid = nla_get_u16(info->attrs[NL80211_ATTR_STA_AID]);
-		if (!params.aid || params.aid > IEEE80211_MAX_AID)
-			return -EINVAL;
-	}
+	params.aid = nla_get_u16(info->attrs[NL80211_ATTR_STA_AID]);
+	if (!params.aid || params.aid > IEEE80211_MAX_AID)
+		return -EINVAL;
 
 	if (info->attrs[NL80211_ATTR_HT_CAPABILITY])
 		params.ht_capa =
@@ -2037,6 +2038,12 @@ static int nl80211_new_station(struct sk_buff *skb, struct genl_info *info)
 	if (err)
 		goto out_rtnl;
 
+	if (dev->ieee80211_ptr->iftype != NL80211_IFTYPE_AP &&
+	    dev->ieee80211_ptr->iftype != NL80211_IFTYPE_AP_VLAN) {
+		err = -EINVAL;
+		goto out;
+	}
+
 	err = get_vlan(info, rdev, &params.vlan);
 	if (err)
 		goto out;
@@ -2044,35 +2051,6 @@ static int nl80211_new_station(struct sk_buff *skb, struct genl_info *info)
 	/* validate settings */
 	err = 0;
 
-	switch (dev->ieee80211_ptr->iftype) {
-	case NL80211_IFTYPE_AP:
-	case NL80211_IFTYPE_AP_VLAN:
-		/* all ok but must have AID */
-		if (!params.aid)
-			err = -EINVAL;
-		break;
-	case NL80211_IFTYPE_MESH_POINT:
-		/* disallow things mesh doesn't support */
-		if (params.vlan)
-			err = -EINVAL;
-		if (params.aid)
-			err = -EINVAL;
-		if (params.ht_capa)
-			err = -EINVAL;
-		if (params.listen_interval >= 0)
-			err = -EINVAL;
-		if (params.supported_rates)
-			err = -EINVAL;
-		if (params.sta_flags_mask)
-			err = -EINVAL;
-		break;
-	default:
-		err = -EINVAL;
-	}
-
-	if (err)
-		goto out;
-
 	if (!rdev->ops->add_station) {
 		err = -EOPNOTSUPP;
 		goto out;
@@ -2113,8 +2091,7 @@ static int nl80211_del_station(struct sk_buff *skb, struct genl_info *info)
 		goto out_rtnl;
 
 	if (dev->ieee80211_ptr->iftype != NL80211_IFTYPE_AP &&
-	    dev->ieee80211_ptr->iftype != NL80211_IFTYPE_AP_VLAN &&
-	    dev->ieee80211_ptr->iftype != NL80211_IFTYPE_MESH_POINT) {
+	    dev->ieee80211_ptr->iftype != NL80211_IFTYPE_AP_VLAN) {
 		err = -EINVAL;
 		goto out;
 	}
-- 
cgit v1.2.2


From 8404080568613d93ad7cf0a16dfb68459b42a264 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes@sipsolutions.net>
Date: Mon, 15 Feb 2010 12:46:39 +0200
Subject: mac80211: reject unhandled action frames

802.11-2007 7.3.1.11 mandates that we need to
reject action frames we don't handle by setting
the 0x80 bit in the category and returning them
to the sender, so do that. In AP mode, hostapd
is responsible for this.

Additionally, drop completely malformed action
frames or ones that should've been encrypted as
unusable, userspace shouldn't see those.

Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: Jouni Malinen <jouni.malinen@atheros.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/rx.c | 90 +++++++++++++++++++++++++++++++++++--------------------
 1 file changed, 57 insertions(+), 33 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c
index c9755f3d986c..a177472adc13 100644
--- a/net/mac80211/rx.c
+++ b/net/mac80211/rx.c
@@ -2,7 +2,7 @@
  * Copyright 2002-2005, Instant802 Networks, Inc.
  * Copyright 2005-2006, Devicescape Software, Inc.
  * Copyright 2006-2007	Jiri Benc <jbenc@suse.cz>
- * Copyright 2007	Johannes Berg <johannes@sipsolutions.net>
+ * Copyright 2007-2010	Johannes Berg <johannes@sipsolutions.net>
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 as
@@ -1855,23 +1855,28 @@ ieee80211_rx_h_action(struct ieee80211_rx_data *rx)
 	struct ieee80211_local *local = rx->local;
 	struct ieee80211_sub_if_data *sdata = rx->sdata;
 	struct ieee80211_mgmt *mgmt = (struct ieee80211_mgmt *) rx->skb->data;
+	struct sk_buff *nskb;
 	int len = rx->skb->len;
 
 	if (!ieee80211_is_action(mgmt->frame_control))
 		return RX_CONTINUE;
 
 	if (!rx->sta)
-		return RX_DROP_MONITOR;
+		return RX_DROP_UNUSABLE;
 
 	if (!(rx->flags & IEEE80211_RX_RA_MATCH))
-		return RX_DROP_MONITOR;
+		return RX_DROP_UNUSABLE;
 
 	if (ieee80211_drop_unencrypted(rx, mgmt->frame_control))
-		return RX_DROP_MONITOR;
+		return RX_DROP_UNUSABLE;
 
-	/* all categories we currently handle have action_code */
+	/* drop too small frames */
+	if (len < IEEE80211_MIN_ACTION_SIZE)
+		return RX_DROP_UNUSABLE;
+
+	/* return action frames that have *only* category */
 	if (len < IEEE80211_MIN_ACTION_SIZE + 1)
-		return RX_DROP_MONITOR;
+		goto return_frame;
 
 	switch (mgmt->u.action.category) {
 	case WLAN_CATEGORY_BACK:
@@ -1884,7 +1889,7 @@ ieee80211_rx_h_action(struct ieee80211_rx_data *rx)
 		if (sdata->vif.type != NL80211_IFTYPE_STATION &&
 		    sdata->vif.type != NL80211_IFTYPE_AP_VLAN &&
 		    sdata->vif.type != NL80211_IFTYPE_AP)
-			return RX_DROP_MONITOR;
+			break;
 
 		switch (mgmt->u.action.u.addba_req.action_code) {
 		case WLAN_ACTION_ADDBA_REQ:
@@ -1892,45 +1897,45 @@ ieee80211_rx_h_action(struct ieee80211_rx_data *rx)
 				   sizeof(mgmt->u.action.u.addba_req)))
 				return RX_DROP_MONITOR;
 			ieee80211_process_addba_request(local, rx->sta, mgmt, len);
-			break;
+			goto handled;
 		case WLAN_ACTION_ADDBA_RESP:
 			if (len < (IEEE80211_MIN_ACTION_SIZE +
 				   sizeof(mgmt->u.action.u.addba_resp)))
-				return RX_DROP_MONITOR;
+				break;
 			ieee80211_process_addba_resp(local, rx->sta, mgmt, len);
-			break;
+			goto handled;
 		case WLAN_ACTION_DELBA:
 			if (len < (IEEE80211_MIN_ACTION_SIZE +
 				   sizeof(mgmt->u.action.u.delba)))
-				return RX_DROP_MONITOR;
+				break;
 			ieee80211_process_delba(sdata, rx->sta, mgmt, len);
-			break;
+			goto handled;
 		}
 		break;
 	case WLAN_CATEGORY_SPECTRUM_MGMT:
 		if (local->hw.conf.channel->band != IEEE80211_BAND_5GHZ)
-			return RX_DROP_MONITOR;
+			break;
 
 		if (sdata->vif.type != NL80211_IFTYPE_STATION)
-			return RX_DROP_MONITOR;
+			break;
 
 		switch (mgmt->u.action.u.measurement.action_code) {
 		case WLAN_ACTION_SPCT_MSR_REQ:
 			if (len < (IEEE80211_MIN_ACTION_SIZE +
 				   sizeof(mgmt->u.action.u.measurement)))
-				return RX_DROP_MONITOR;
+				break;
 			ieee80211_process_measurement_req(sdata, mgmt, len);
-			break;
+			goto handled;
 		case WLAN_ACTION_SPCT_CHL_SWITCH:
 			if (len < (IEEE80211_MIN_ACTION_SIZE +
 				   sizeof(mgmt->u.action.u.chan_switch)))
-				return RX_DROP_MONITOR;
+				break;
 
 			if (sdata->vif.type != NL80211_IFTYPE_STATION)
-				return RX_DROP_MONITOR;
+				break;
 
 			if (memcmp(mgmt->bssid, sdata->u.mgd.bssid, ETH_ALEN))
-				return RX_DROP_MONITOR;
+				break;
 
 			return ieee80211_sta_rx_mgmt(sdata, rx->skb);
 		}
@@ -1938,29 +1943,48 @@ ieee80211_rx_h_action(struct ieee80211_rx_data *rx)
 	case WLAN_CATEGORY_SA_QUERY:
 		if (len < (IEEE80211_MIN_ACTION_SIZE +
 			   sizeof(mgmt->u.action.u.sa_query)))
-			return RX_DROP_MONITOR;
+			break;
+
 		switch (mgmt->u.action.u.sa_query.action) {
 		case WLAN_ACTION_SA_QUERY_REQUEST:
 			if (sdata->vif.type != NL80211_IFTYPE_STATION)
-				return RX_DROP_MONITOR;
+				break;
 			ieee80211_process_sa_query_req(sdata, mgmt, len);
-			break;
-		case WLAN_ACTION_SA_QUERY_RESPONSE:
-			/*
-			 * SA Query response is currently only used in AP mode
-			 * and it is processed in user space.
-			 */
-			return RX_CONTINUE;
+			goto handled;
 		}
 		break;
-	default:
-		/* do not process rejected action frames */
-		if (mgmt->u.action.category & 0x80)
-			return RX_DROP_MONITOR;
+	}
+ return_frame:
+	/*
+	 * For AP mode, hostapd is responsible for handling any action
+	 * frames that we didn't handle, including returning unknown
+	 * ones. For all other modes we will return them to the sender,
+	 * setting the 0x80 bit in the action category, as required by
+	 * 802.11-2007 7.3.1.11.
+	 */
+	if (sdata->vif.type == NL80211_IFTYPE_AP ||
+	    sdata->vif.type == NL80211_IFTYPE_AP_VLAN)
+		return RX_DROP_MONITOR;
 
-		return RX_CONTINUE;
+	/* do not return rejected action frames */
+	if (mgmt->u.action.category & 0x80)
+		return RX_DROP_UNUSABLE;
+
+	nskb = skb_copy_expand(rx->skb, local->hw.extra_tx_headroom, 0,
+			       GFP_ATOMIC);
+	if (nskb) {
+		struct ieee80211_mgmt *mgmt = (void *)nskb->data;
+
+		mgmt->u.action.category |= 0x80;
+		memcpy(mgmt->da, mgmt->sa, ETH_ALEN);
+		memcpy(mgmt->sa, rx->sdata->vif.addr, ETH_ALEN);
+
+		memset(nskb->cb, 0, sizeof(nskb->cb));
+
+		ieee80211_tx_skb(rx->sdata, nskb);
 	}
 
+ handled:
 	rx->sta->rx_packets++;
 	dev_kfree_skb(rx->skb);
 	return RX_QUEUED;
-- 
cgit v1.2.2


From 026331c4d9b526561ea96f95fac4bfc52b69e316 Mon Sep 17 00:00:00 2001
From: Jouni Malinen <jouni.malinen@atheros.com>
Date: Mon, 15 Feb 2010 12:53:10 +0200
Subject: cfg80211/mac80211: allow registering for and sending action frames

This implements a new command to register for action frames
that userspace wants to handle instead of the in-kernel
rejection. It is then responsible for rejecting ones that
it decided not to handle. There is no unregistration, but
the socket can be closed for that.

Frames that are not registered for will not be forwarded
to userspace and will be rejected by the kernel, the
cfg80211 API helps implementing that.

Additionally, this patch adds a new command that allows
doing action frame transmission from userspace. It can be
used either to exchange action frames on the current
operational channel (e.g., with the AP with which we are
currently associated) or to exchange off-channel Public
Action frames with the remain-on-channel command.

Signed-off-by: Jouni Malinen <jouni.malinen@atheros.com>
Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/cfg.c         |  12 ++-
 net/mac80211/ieee80211_i.h |   6 +-
 net/mac80211/mlme.c        |  35 ++++++
 net/mac80211/rx.c          |  42 ++++++--
 net/mac80211/status.c      |   7 +-
 net/wireless/core.c        |   4 +
 net/wireless/core.h        |   9 ++
 net/wireless/mlme.c        | 166 +++++++++++++++++++++++++++++
 net/wireless/nl80211.c     | 260 ++++++++++++++++++++++++++++++++++++++++++++-
 net/wireless/nl80211.h     |   8 ++
 10 files changed, 534 insertions(+), 15 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c
index e1731b7c2523..b7116ef84a3b 100644
--- a/net/mac80211/cfg.c
+++ b/net/mac80211/cfg.c
@@ -1,7 +1,7 @@
 /*
  * mac80211 configuration hooks for cfg80211
  *
- * Copyright 2006, 2007	Johannes Berg <johannes@sipsolutions.net>
+ * Copyright 2006-2010	Johannes Berg <johannes@sipsolutions.net>
  *
  * This file is GPLv2 as found in COPYING.
  */
@@ -1448,6 +1448,15 @@ static int ieee80211_cancel_remain_on_channel(struct wiphy *wiphy,
 	return ieee80211_wk_cancel_remain_on_channel(sdata, cookie);
 }
 
+static int ieee80211_action(struct wiphy *wiphy, struct net_device *dev,
+			    struct ieee80211_channel *chan,
+			    enum nl80211_channel_type channel_type,
+			    const u8 *buf, size_t len, u64 *cookie)
+{
+	return ieee80211_mgd_action(IEEE80211_DEV_TO_SUB_IF(dev), chan,
+				    channel_type, buf, len, cookie);
+}
+
 struct cfg80211_ops mac80211_config_ops = {
 	.add_virtual_intf = ieee80211_add_iface,
 	.del_virtual_intf = ieee80211_del_iface,
@@ -1496,4 +1505,5 @@ struct cfg80211_ops mac80211_config_ops = {
 	.set_bitrate_mask = ieee80211_set_bitrate_mask,
 	.remain_on_channel = ieee80211_remain_on_channel,
 	.cancel_remain_on_channel = ieee80211_cancel_remain_on_channel,
+	.action = ieee80211_action,
 };
diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index 9dd98b674cbc..241533e1bc03 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -2,7 +2,7 @@
  * Copyright 2002-2005, Instant802 Networks, Inc.
  * Copyright 2005, Devicescape Software, Inc.
  * Copyright 2006-2007	Jiri Benc <jbenc@suse.cz>
- * Copyright 2007-2008	Johannes Berg <johannes@sipsolutions.net>
+ * Copyright 2007-2010	Johannes Berg <johannes@sipsolutions.net>
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 as
@@ -966,6 +966,10 @@ int ieee80211_mgd_deauth(struct ieee80211_sub_if_data *sdata,
 int ieee80211_mgd_disassoc(struct ieee80211_sub_if_data *sdata,
 			   struct cfg80211_disassoc_request *req,
 			   void *cookie);
+int ieee80211_mgd_action(struct ieee80211_sub_if_data *sdata,
+			 struct ieee80211_channel *chan,
+			 enum nl80211_channel_type channel_type,
+			 const u8 *buf, size_t len, u64 *cookie);
 ieee80211_rx_result ieee80211_sta_rx_mgmt(struct ieee80211_sub_if_data *sdata,
 					  struct sk_buff *skb);
 void ieee80211_send_pspoll(struct ieee80211_local *local,
diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index bfc4a5070013..41812a15eea0 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -2084,3 +2084,38 @@ int ieee80211_mgd_disassoc(struct ieee80211_sub_if_data *sdata,
 
 	return 0;
 }
+
+int ieee80211_mgd_action(struct ieee80211_sub_if_data *sdata,
+			 struct ieee80211_channel *chan,
+			 enum nl80211_channel_type channel_type,
+			 const u8 *buf, size_t len, u64 *cookie)
+{
+	struct ieee80211_local *local = sdata->local;
+	struct ieee80211_if_managed *ifmgd = &sdata->u.mgd;
+	struct sk_buff *skb;
+
+	/* Check that we are on the requested channel for transmission */
+	if ((chan != local->tmp_channel ||
+	     channel_type != local->tmp_channel_type) &&
+	    (chan != local->oper_channel ||
+	     channel_type != local->oper_channel_type))
+		return -EBUSY;
+
+	skb = dev_alloc_skb(local->hw.extra_tx_headroom + len);
+	if (!skb)
+		return -ENOMEM;
+	skb_reserve(skb, local->hw.extra_tx_headroom);
+
+	memcpy(skb_put(skb, len), buf, len);
+
+	if (!(ifmgd->flags & IEEE80211_STA_MFP_ENABLED))
+		IEEE80211_SKB_CB(skb)->flags |=
+			IEEE80211_TX_INTFL_DONT_ENCRYPT;
+	IEEE80211_SKB_CB(skb)->flags |= IEEE80211_TX_INTFL_NL80211_FRAME_TX |
+		IEEE80211_TX_CTL_REQ_TX_STATUS;
+	skb->dev = sdata->dev;
+	ieee80211_tx_skb(sdata, skb);
+
+	*cookie = (unsigned long) skb;
+	return 0;
+}
diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c
index a177472adc13..a6080d8d72bb 100644
--- a/net/mac80211/rx.c
+++ b/net/mac80211/rx.c
@@ -1856,28 +1856,25 @@ ieee80211_rx_h_action(struct ieee80211_rx_data *rx)
 	struct ieee80211_sub_if_data *sdata = rx->sdata;
 	struct ieee80211_mgmt *mgmt = (struct ieee80211_mgmt *) rx->skb->data;
 	struct sk_buff *nskb;
+	struct ieee80211_rx_status *status;
 	int len = rx->skb->len;
 
 	if (!ieee80211_is_action(mgmt->frame_control))
 		return RX_CONTINUE;
 
-	if (!rx->sta)
+	/* drop too small frames */
+	if (len < IEEE80211_MIN_ACTION_SIZE)
 		return RX_DROP_UNUSABLE;
 
-	if (!(rx->flags & IEEE80211_RX_RA_MATCH))
+	if (!rx->sta && mgmt->u.action.category != WLAN_CATEGORY_PUBLIC)
 		return RX_DROP_UNUSABLE;
 
-	if (ieee80211_drop_unencrypted(rx, mgmt->frame_control))
+	if (!(rx->flags & IEEE80211_RX_RA_MATCH))
 		return RX_DROP_UNUSABLE;
 
-	/* drop too small frames */
-	if (len < IEEE80211_MIN_ACTION_SIZE)
+	if (ieee80211_drop_unencrypted(rx, mgmt->frame_control))
 		return RX_DROP_UNUSABLE;
 
-	/* return action frames that have *only* category */
-	if (len < IEEE80211_MIN_ACTION_SIZE + 1)
-		goto return_frame;
-
 	switch (mgmt->u.action.category) {
 	case WLAN_CATEGORY_BACK:
 		/*
@@ -1891,6 +1888,10 @@ ieee80211_rx_h_action(struct ieee80211_rx_data *rx)
 		    sdata->vif.type != NL80211_IFTYPE_AP)
 			break;
 
+		/* verify action_code is present */
+		if (len < IEEE80211_MIN_ACTION_SIZE + 1)
+			break;
+
 		switch (mgmt->u.action.u.addba_req.action_code) {
 		case WLAN_ACTION_ADDBA_REQ:
 			if (len < (IEEE80211_MIN_ACTION_SIZE +
@@ -1919,6 +1920,10 @@ ieee80211_rx_h_action(struct ieee80211_rx_data *rx)
 		if (sdata->vif.type != NL80211_IFTYPE_STATION)
 			break;
 
+		/* verify action_code is present */
+		if (len < IEEE80211_MIN_ACTION_SIZE + 1)
+			break;
+
 		switch (mgmt->u.action.u.measurement.action_code) {
 		case WLAN_ACTION_SPCT_MSR_REQ:
 			if (len < (IEEE80211_MIN_ACTION_SIZE +
@@ -1954,7 +1959,7 @@ ieee80211_rx_h_action(struct ieee80211_rx_data *rx)
 		}
 		break;
 	}
- return_frame:
+
 	/*
 	 * For AP mode, hostapd is responsible for handling any action
 	 * frames that we didn't handle, including returning unknown
@@ -1966,6 +1971,20 @@ ieee80211_rx_h_action(struct ieee80211_rx_data *rx)
 	    sdata->vif.type == NL80211_IFTYPE_AP_VLAN)
 		return RX_DROP_MONITOR;
 
+	/*
+	 * Getting here means the kernel doesn't know how to handle
+	 * it, but maybe userspace does ... include returned frames
+	 * so userspace can register for those to know whether ones
+	 * it transmitted were processed or returned.
+	 */
+	status = IEEE80211_SKB_RXCB(rx->skb);
+
+	if (sdata->vif.type == NL80211_IFTYPE_STATION &&
+	    cfg80211_rx_action(rx->sdata->dev, status->freq,
+			       rx->skb->data, rx->skb->len,
+			       GFP_ATOMIC))
+		goto handled;
+
 	/* do not return rejected action frames */
 	if (mgmt->u.action.category & 0x80)
 		return RX_DROP_UNUSABLE;
@@ -1985,7 +2004,8 @@ ieee80211_rx_h_action(struct ieee80211_rx_data *rx)
 	}
 
  handled:
-	rx->sta->rx_packets++;
+	if (rx->sta)
+		rx->sta->rx_packets++;
 	dev_kfree_skb(rx->skb);
 	return RX_QUEUED;
 }
diff --git a/net/mac80211/status.c b/net/mac80211/status.c
index ded98730c111..56d5b9a6ec5b 100644
--- a/net/mac80211/status.c
+++ b/net/mac80211/status.c
@@ -2,7 +2,7 @@
  * Copyright 2002-2005, Instant802 Networks, Inc.
  * Copyright 2005-2006, Devicescape Software, Inc.
  * Copyright 2006-2007	Jiri Benc <jbenc@suse.cz>
- * Copyright 2008-2009	Johannes Berg <johannes@sipsolutions.net>
+ * Copyright 2008-2010	Johannes Berg <johannes@sipsolutions.net>
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 as
@@ -288,6 +288,11 @@ void ieee80211_tx_status(struct ieee80211_hw *hw, struct sk_buff *skb)
 					msecs_to_jiffies(10));
 	}
 
+	if (info->flags & IEEE80211_TX_INTFL_NL80211_FRAME_TX)
+		cfg80211_action_tx_status(
+			skb->dev, (unsigned long) skb, skb->data, skb->len,
+			!!(info->flags & IEEE80211_TX_STAT_ACK), GFP_ATOMIC);
+
 	/* this was a transmitted frame, but now we want to reuse it */
 	skb_orphan(skb);
 
diff --git a/net/wireless/core.c b/net/wireless/core.c
index 71b6b3a9cf1f..51908dc2ea00 100644
--- a/net/wireless/core.c
+++ b/net/wireless/core.c
@@ -677,6 +677,9 @@ static int cfg80211_netdev_notifier_call(struct notifier_block * nb,
 		INIT_WORK(&wdev->cleanup_work, wdev_cleanup_work);
 		INIT_LIST_HEAD(&wdev->event_list);
 		spin_lock_init(&wdev->event_lock);
+		INIT_LIST_HEAD(&wdev->action_registrations);
+		spin_lock_init(&wdev->action_registrations_lock);
+
 		mutex_lock(&rdev->devlist_mtx);
 		list_add_rcu(&wdev->list, &rdev->netdev_list);
 		rdev->devlist_generation++;
@@ -792,6 +795,7 @@ static int cfg80211_netdev_notifier_call(struct notifier_block * nb,
 			sysfs_remove_link(&dev->dev.kobj, "phy80211");
 			list_del_rcu(&wdev->list);
 			rdev->devlist_generation++;
+			cfg80211_mlme_purge_actions(wdev);
 #ifdef CONFIG_CFG80211_WEXT
 			kfree(wdev->wext.keys);
 #endif
diff --git a/net/wireless/core.h b/net/wireless/core.h
index c326a667022a..d52da913145a 100644
--- a/net/wireless/core.h
+++ b/net/wireless/core.h
@@ -329,6 +329,15 @@ void __cfg80211_connect_result(struct net_device *dev, const u8 *bssid,
 			       const u8 *resp_ie, size_t resp_ie_len,
 			       u16 status, bool wextev,
 			       struct cfg80211_bss *bss);
+int cfg80211_mlme_register_action(struct wireless_dev *wdev, u32 snd_pid,
+				  const u8 *match_data, int match_len);
+void cfg80211_mlme_unregister_actions(struct wireless_dev *wdev, u32 nlpid);
+void cfg80211_mlme_purge_actions(struct wireless_dev *wdev);
+int cfg80211_mlme_action(struct cfg80211_registered_device *rdev,
+			 struct net_device *dev,
+			 struct ieee80211_channel *chan,
+			 enum nl80211_channel_type channel_type,
+			 const u8 *buf, size_t len, u64 *cookie);
 
 /* SME */
 int __cfg80211_connect(struct cfg80211_registered_device *rdev,
diff --git a/net/wireless/mlme.c b/net/wireless/mlme.c
index 94d151f6f73e..62bc8855e123 100644
--- a/net/wireless/mlme.c
+++ b/net/wireless/mlme.c
@@ -728,3 +728,169 @@ void cfg80211_new_sta(struct net_device *dev, const u8 *mac_addr,
 	nl80211_send_sta_event(rdev, dev, mac_addr, sinfo, gfp);
 }
 EXPORT_SYMBOL(cfg80211_new_sta);
+
+struct cfg80211_action_registration {
+	struct list_head list;
+
+	u32 nlpid;
+
+	int match_len;
+
+	u8 match[];
+};
+
+int cfg80211_mlme_register_action(struct wireless_dev *wdev, u32 snd_pid,
+				  const u8 *match_data, int match_len)
+{
+	struct cfg80211_action_registration *reg, *nreg;
+	int err = 0;
+
+	nreg = kzalloc(sizeof(*reg) + match_len, GFP_KERNEL);
+	if (!nreg)
+		return -ENOMEM;
+
+	spin_lock_bh(&wdev->action_registrations_lock);
+
+	list_for_each_entry(reg, &wdev->action_registrations, list) {
+		int mlen = min(match_len, reg->match_len);
+
+		if (memcmp(reg->match, match_data, mlen) == 0) {
+			err = -EALREADY;
+			break;
+		}
+	}
+
+	if (err) {
+		kfree(nreg);
+		goto out;
+	}
+
+	memcpy(nreg->match, match_data, match_len);
+	nreg->match_len = match_len;
+	nreg->nlpid = snd_pid;
+	list_add(&nreg->list, &wdev->action_registrations);
+
+ out:
+	spin_unlock_bh(&wdev->action_registrations_lock);
+	return err;
+}
+
+void cfg80211_mlme_unregister_actions(struct wireless_dev *wdev, u32 nlpid)
+{
+	struct cfg80211_action_registration *reg, *tmp;
+
+	spin_lock_bh(&wdev->action_registrations_lock);
+
+	list_for_each_entry_safe(reg, tmp, &wdev->action_registrations, list) {
+		if (reg->nlpid == nlpid) {
+			list_del(&reg->list);
+			kfree(reg);
+		}
+	}
+
+	spin_unlock_bh(&wdev->action_registrations_lock);
+}
+
+void cfg80211_mlme_purge_actions(struct wireless_dev *wdev)
+{
+	struct cfg80211_action_registration *reg, *tmp;
+
+	spin_lock_bh(&wdev->action_registrations_lock);
+
+	list_for_each_entry_safe(reg, tmp, &wdev->action_registrations, list) {
+		list_del(&reg->list);
+		kfree(reg);
+	}
+
+	spin_unlock_bh(&wdev->action_registrations_lock);
+}
+
+int cfg80211_mlme_action(struct cfg80211_registered_device *rdev,
+			 struct net_device *dev,
+			 struct ieee80211_channel *chan,
+			 enum nl80211_channel_type channel_type,
+			 const u8 *buf, size_t len, u64 *cookie)
+{
+	struct wireless_dev *wdev = dev->ieee80211_ptr;
+	const struct ieee80211_mgmt *mgmt;
+
+	if (rdev->ops->action == NULL)
+		return -EOPNOTSUPP;
+	if (len < 24 + 1)
+		return -EINVAL;
+
+	mgmt = (const struct ieee80211_mgmt *) buf;
+	if (!ieee80211_is_action(mgmt->frame_control))
+		return -EINVAL;
+	if (mgmt->u.action.category != WLAN_CATEGORY_PUBLIC) {
+		/* Verify that we are associated with the destination AP */
+		if (!wdev->current_bss ||
+		    memcmp(wdev->current_bss->pub.bssid, mgmt->bssid,
+			   ETH_ALEN) != 0 ||
+		    memcmp(wdev->current_bss->pub.bssid, mgmt->da,
+			   ETH_ALEN) != 0)
+			return -ENOTCONN;
+	}
+
+	if (memcmp(mgmt->sa, dev->dev_addr, ETH_ALEN) != 0)
+		return -EINVAL;
+
+	/* Transmit the Action frame as requested by user space */
+	return rdev->ops->action(&rdev->wiphy, dev, chan, channel_type,
+				 buf, len, cookie);
+}
+
+bool cfg80211_rx_action(struct net_device *dev, int freq, const u8 *buf,
+			size_t len, gfp_t gfp)
+{
+	struct wireless_dev *wdev = dev->ieee80211_ptr;
+	struct wiphy *wiphy = wdev->wiphy;
+	struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy);
+	struct cfg80211_action_registration *reg;
+	const u8 *action_data;
+	int action_data_len;
+	bool result = false;
+
+	/* frame length - min size excluding category */
+	action_data_len = len - (IEEE80211_MIN_ACTION_SIZE - 1);
+
+	/* action data starts with category */
+	action_data = buf + IEEE80211_MIN_ACTION_SIZE - 1;
+
+	spin_lock_bh(&wdev->action_registrations_lock);
+
+	list_for_each_entry(reg, &wdev->action_registrations, list) {
+		if (reg->match_len > action_data_len)
+			continue;
+
+		if (memcmp(reg->match, action_data, reg->match_len))
+			continue;
+
+		/* found match! */
+
+		/* Indicate the received Action frame to user space */
+		if (nl80211_send_action(rdev, dev, reg->nlpid, freq,
+					buf, len, gfp))
+			continue;
+
+		result = true;
+		break;
+	}
+
+	spin_unlock_bh(&wdev->action_registrations_lock);
+
+	return result;
+}
+EXPORT_SYMBOL(cfg80211_rx_action);
+
+void cfg80211_action_tx_status(struct net_device *dev, u64 cookie,
+			       const u8 *buf, size_t len, bool ack, gfp_t gfp)
+{
+	struct wireless_dev *wdev = dev->ieee80211_ptr;
+	struct wiphy *wiphy = wdev->wiphy;
+	struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy);
+
+	/* Indicate TX status of the Action frame to user space */
+	nl80211_send_action_tx_status(rdev, dev, cookie, buf, len, ack, gfp);
+}
+EXPORT_SYMBOL(cfg80211_action_tx_status);
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index a95ab9e4c19e..328112081358 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -1,7 +1,7 @@
 /*
  * This is the new netlink-based wireless configuration interface.
  *
- * Copyright 2006-2009	Johannes Berg <johannes@sipsolutions.net>
+ * Copyright 2006-2010	Johannes Berg <johannes@sipsolutions.net>
  */
 
 #include <linux/if.h>
@@ -145,6 +145,9 @@ static struct nla_policy nl80211_policy[NL80211_ATTR_MAX+1] __read_mostly = {
 	[NL80211_ATTR_DURATION] = { .type = NLA_U32 },
 	[NL80211_ATTR_COOKIE] = { .type = NLA_U64 },
 	[NL80211_ATTR_TX_RATES] = { .type = NLA_NESTED },
+	[NL80211_ATTR_FRAME] = { .type = NLA_BINARY,
+				 .len = IEEE80211_MAX_DATA_LEN },
+	[NL80211_ATTR_FRAME_MATCH] = { .type = NLA_BINARY, },
 };
 
 /* policy for the attributes */
@@ -577,6 +580,7 @@ static int nl80211_send_wiphy(struct sk_buff *msg, u32 pid, u32 seq, int flags,
 	CMD(flush_pmksa, FLUSH_PMKSA);
 	CMD(remain_on_channel, REMAIN_ON_CHANNEL);
 	CMD(set_bitrate_mask, SET_TX_BITRATE_MASK);
+	CMD(action, ACTION);
 	if (dev->wiphy.flags & WIPHY_FLAG_NETNS_OK) {
 		i++;
 		NLA_PUT_U32(msg, i, NL80211_CMD_SET_WIPHY_NETNS);
@@ -4526,6 +4530,139 @@ static int nl80211_set_tx_bitrate_mask(struct sk_buff *skb,
 	return err;
 }
 
+static int nl80211_register_action(struct sk_buff *skb, struct genl_info *info)
+{
+	struct cfg80211_registered_device *rdev;
+	struct net_device *dev;
+	int err;
+
+	if (!info->attrs[NL80211_ATTR_FRAME_MATCH])
+		return -EINVAL;
+
+	if (nla_len(info->attrs[NL80211_ATTR_FRAME_MATCH]) < 1)
+		return -EINVAL;
+
+	rtnl_lock();
+
+	err = get_rdev_dev_by_info_ifindex(info, &rdev, &dev);
+	if (err)
+		goto unlock_rtnl;
+
+	if (dev->ieee80211_ptr->iftype != NL80211_IFTYPE_STATION) {
+		err = -EOPNOTSUPP;
+		goto out;
+	}
+
+	/* not much point in registering if we can't reply */
+	if (!rdev->ops->action) {
+		err = -EOPNOTSUPP;
+		goto out;
+	}
+
+	err = cfg80211_mlme_register_action(dev->ieee80211_ptr, info->snd_pid,
+			nla_data(info->attrs[NL80211_ATTR_FRAME_MATCH]),
+			nla_len(info->attrs[NL80211_ATTR_FRAME_MATCH]));
+ out:
+	cfg80211_unlock_rdev(rdev);
+	dev_put(dev);
+ unlock_rtnl:
+	rtnl_unlock();
+	return err;
+}
+
+static int nl80211_action(struct sk_buff *skb, struct genl_info *info)
+{
+	struct cfg80211_registered_device *rdev;
+	struct net_device *dev;
+	struct ieee80211_channel *chan;
+	enum nl80211_channel_type channel_type = NL80211_CHAN_NO_HT;
+	u32 freq;
+	int err;
+	void *hdr;
+	u64 cookie;
+	struct sk_buff *msg;
+
+	if (!info->attrs[NL80211_ATTR_FRAME] ||
+	    !info->attrs[NL80211_ATTR_WIPHY_FREQ])
+		return -EINVAL;
+
+	rtnl_lock();
+
+	err = get_rdev_dev_by_info_ifindex(info, &rdev, &dev);
+	if (err)
+		goto unlock_rtnl;
+
+	if (!rdev->ops->action) {
+		err = -EOPNOTSUPP;
+		goto out;
+	}
+
+	if (dev->ieee80211_ptr->iftype != NL80211_IFTYPE_STATION) {
+		err = -EOPNOTSUPP;
+		goto out;
+	}
+
+	if (!netif_running(dev)) {
+		err = -ENETDOWN;
+		goto out;
+	}
+
+	if (info->attrs[NL80211_ATTR_WIPHY_CHANNEL_TYPE]) {
+		channel_type = nla_get_u32(
+			info->attrs[NL80211_ATTR_WIPHY_CHANNEL_TYPE]);
+		if (channel_type != NL80211_CHAN_NO_HT &&
+		    channel_type != NL80211_CHAN_HT20 &&
+		    channel_type != NL80211_CHAN_HT40PLUS &&
+		    channel_type != NL80211_CHAN_HT40MINUS)
+			err = -EINVAL;
+			goto out;
+	}
+
+	freq = nla_get_u32(info->attrs[NL80211_ATTR_WIPHY_FREQ]);
+	chan = rdev_freq_to_chan(rdev, freq, channel_type);
+	if (chan == NULL) {
+		err = -EINVAL;
+		goto out;
+	}
+
+	msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+	if (!msg) {
+		err = -ENOMEM;
+		goto out;
+	}
+
+	hdr = nl80211hdr_put(msg, info->snd_pid, info->snd_seq, 0,
+			     NL80211_CMD_ACTION);
+
+	if (IS_ERR(hdr)) {
+		err = PTR_ERR(hdr);
+		goto free_msg;
+	}
+	err = cfg80211_mlme_action(rdev, dev, chan, channel_type,
+				   nla_data(info->attrs[NL80211_ATTR_FRAME]),
+				   nla_len(info->attrs[NL80211_ATTR_FRAME]),
+				   &cookie);
+	if (err)
+		goto free_msg;
+
+	NLA_PUT_U64(msg, NL80211_ATTR_COOKIE, cookie);
+
+	genlmsg_end(msg, hdr);
+	err = genlmsg_reply(msg, info);
+	goto out;
+
+ nla_put_failure:
+	err = -ENOBUFS;
+ free_msg:
+	nlmsg_free(msg);
+ out:
+	cfg80211_unlock_rdev(rdev);
+	dev_put(dev);
+unlock_rtnl:
+	rtnl_unlock();
+	return err;
+}
+
 static struct genl_ops nl80211_ops[] = {
 	{
 		.cmd = NL80211_CMD_GET_WIPHY,
@@ -4806,6 +4943,18 @@ static struct genl_ops nl80211_ops[] = {
 		.policy = nl80211_policy,
 		.flags = GENL_ADMIN_PERM,
 	},
+	{
+		.cmd = NL80211_CMD_REGISTER_ACTION,
+		.doit = nl80211_register_action,
+		.policy = nl80211_policy,
+		.flags = GENL_ADMIN_PERM,
+	},
+	{
+		.cmd = NL80211_CMD_ACTION,
+		.doit = nl80211_action,
+		.policy = nl80211_policy,
+		.flags = GENL_ADMIN_PERM,
+	},
 };
 
 static struct genl_multicast_group nl80211_mlme_mcgrp = {
@@ -5478,6 +5627,110 @@ void nl80211_send_sta_event(struct cfg80211_registered_device *rdev,
 				nl80211_mlme_mcgrp.id, gfp);
 }
 
+int nl80211_send_action(struct cfg80211_registered_device *rdev,
+			struct net_device *netdev, u32 nlpid,
+			int freq, const u8 *buf, size_t len, gfp_t gfp)
+{
+	struct sk_buff *msg;
+	void *hdr;
+	int err;
+
+	msg = nlmsg_new(NLMSG_DEFAULT_SIZE, gfp);
+	if (!msg)
+		return -ENOMEM;
+
+	hdr = nl80211hdr_put(msg, 0, 0, 0, NL80211_CMD_ACTION);
+	if (!hdr) {
+		nlmsg_free(msg);
+		return -ENOMEM;
+	}
+
+	NLA_PUT_U32(msg, NL80211_ATTR_WIPHY, rdev->wiphy_idx);
+	NLA_PUT_U32(msg, NL80211_ATTR_IFINDEX, netdev->ifindex);
+	NLA_PUT_U32(msg, NL80211_ATTR_WIPHY_FREQ, freq);
+	NLA_PUT(msg, NL80211_ATTR_FRAME, len, buf);
+
+	err = genlmsg_end(msg, hdr);
+	if (err < 0) {
+		nlmsg_free(msg);
+		return err;
+	}
+
+	err = genlmsg_unicast(wiphy_net(&rdev->wiphy), msg, nlpid);
+	if (err < 0)
+		return err;
+	return 0;
+
+ nla_put_failure:
+	genlmsg_cancel(msg, hdr);
+	nlmsg_free(msg);
+	return -ENOBUFS;
+}
+
+void nl80211_send_action_tx_status(struct cfg80211_registered_device *rdev,
+				   struct net_device *netdev, u64 cookie,
+				   const u8 *buf, size_t len, bool ack,
+				   gfp_t gfp)
+{
+	struct sk_buff *msg;
+	void *hdr;
+
+	msg = nlmsg_new(NLMSG_DEFAULT_SIZE, gfp);
+	if (!msg)
+		return;
+
+	hdr = nl80211hdr_put(msg, 0, 0, 0, NL80211_CMD_ACTION_TX_STATUS);
+	if (!hdr) {
+		nlmsg_free(msg);
+		return;
+	}
+
+	NLA_PUT_U32(msg, NL80211_ATTR_WIPHY, rdev->wiphy_idx);
+	NLA_PUT_U32(msg, NL80211_ATTR_IFINDEX, netdev->ifindex);
+	NLA_PUT(msg, NL80211_ATTR_FRAME, len, buf);
+	NLA_PUT_U64(msg, NL80211_ATTR_COOKIE, cookie);
+	if (ack)
+		NLA_PUT_FLAG(msg, NL80211_ATTR_ACK);
+
+	if (genlmsg_end(msg, hdr) < 0) {
+		nlmsg_free(msg);
+		return;
+	}
+
+	genlmsg_multicast(msg, 0, nl80211_mlme_mcgrp.id, gfp);
+	return;
+
+ nla_put_failure:
+	genlmsg_cancel(msg, hdr);
+	nlmsg_free(msg);
+}
+
+static int nl80211_netlink_notify(struct notifier_block * nb,
+				  unsigned long state,
+				  void *_notify)
+{
+	struct netlink_notify *notify = _notify;
+	struct cfg80211_registered_device *rdev;
+	struct wireless_dev *wdev;
+
+	if (state != NETLINK_URELEASE)
+		return NOTIFY_DONE;
+
+	rcu_read_lock();
+
+	list_for_each_entry_rcu(rdev, &cfg80211_rdev_list, list)
+		list_for_each_entry_rcu(wdev, &rdev->netdev_list, list)
+			cfg80211_mlme_unregister_actions(wdev, notify->pid);
+
+	rcu_read_unlock();
+
+	return NOTIFY_DONE;
+}
+
+static struct notifier_block nl80211_netlink_notifier = {
+	.notifier_call = nl80211_netlink_notify,
+};
+
 /* initialisation/exit functions */
 
 int nl80211_init(void)
@@ -5511,6 +5764,10 @@ int nl80211_init(void)
 		goto err_out;
 #endif
 
+	err = netlink_register_notifier(&nl80211_netlink_notifier);
+	if (err)
+		goto err_out;
+
 	return 0;
  err_out:
 	genl_unregister_family(&nl80211_fam);
@@ -5519,5 +5776,6 @@ int nl80211_init(void)
 
 void nl80211_exit(void)
 {
+	netlink_unregister_notifier(&nl80211_netlink_notifier);
 	genl_unregister_family(&nl80211_fam);
 }
diff --git a/net/wireless/nl80211.h b/net/wireless/nl80211.h
index 14855b8fb430..4ca511102c6c 100644
--- a/net/wireless/nl80211.h
+++ b/net/wireless/nl80211.h
@@ -74,4 +74,12 @@ void nl80211_send_sta_event(struct cfg80211_registered_device *rdev,
 			    struct net_device *dev, const u8 *mac_addr,
 			    struct station_info *sinfo, gfp_t gfp);
 
+int nl80211_send_action(struct cfg80211_registered_device *rdev,
+			struct net_device *netdev, u32 nlpid, int freq,
+			const u8 *buf, size_t len, gfp_t gfp);
+void nl80211_send_action_tx_status(struct cfg80211_registered_device *rdev,
+				   struct net_device *netdev, u64 cookie,
+				   const u8 *buf, size_t len, bool ack,
+				   gfp_t gfp);
+
 #endif /* __NET_WIRELESS_NL80211_H */
-- 
cgit v1.2.2


From e858911804f5ecadb41afd61582a11f68d416328 Mon Sep 17 00:00:00 2001
From: Peter Waskiewicz <peter.p.waskiewicz.jr@intel.com>
Date: Fri, 12 Feb 2010 13:48:05 +0000
Subject: ethtool: Fix filter addition when caching n-tuple filters

We can allow a filter to be added successfully to the underlying
hardware, but still return an error if the cached list memory
allocation fails.  This patch fixes that condition.

Signed-off-by: Peter P Waskiewicz Jr <peter.p.waskiewicz.jr@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/ethtool.c | 40 ++++++++++++++++++++++++----------------
 1 file changed, 24 insertions(+), 16 deletions(-)

(limited to 'net')

diff --git a/net/core/ethtool.c b/net/core/ethtool.c
index a1280f643bf4..fbbe4b49116b 100644
--- a/net/core/ethtool.c
+++ b/net/core/ethtool.c
@@ -283,18 +283,17 @@ err_out:
 	return ret;
 }
 
-static int __rx_ntuple_filter_add(struct ethtool_rx_ntuple_list *list,
-                                  struct ethtool_rx_ntuple_flow_spec *spec)
+static void __rx_ntuple_filter_add(struct ethtool_rx_ntuple_list *list,
+                              struct ethtool_rx_ntuple_flow_spec *spec,
+                              struct ethtool_rx_ntuple_flow_spec_container *fsc)
 {
-	struct ethtool_rx_ntuple_flow_spec_container *fsc;
 
 	/* don't add filters forever */
-	if (list->count >= ETHTOOL_MAX_NTUPLE_LIST_ENTRY)
-		return 0;
-
-	fsc = kmalloc(sizeof(*fsc), GFP_ATOMIC);
-	if (!fsc)
-		return -ENOMEM;
+	if (list->count >= ETHTOOL_MAX_NTUPLE_LIST_ENTRY) {
+		/* free the container */
+		kfree(fsc);
+		return;
+	}
 
 	/* Copy the whole filter over */
 	fsc->fs.flow_type = spec->flow_type;
@@ -310,14 +309,13 @@ static int __rx_ntuple_filter_add(struct ethtool_rx_ntuple_list *list,
 	/* add to the list */
 	list_add_tail_rcu(&fsc->list, &list->list);
 	list->count++;
-
-	return 0;
 }
 
 static int ethtool_set_rx_ntuple(struct net_device *dev, void __user *useraddr)
 {
 	struct ethtool_rx_ntuple cmd;
 	const struct ethtool_ops *ops = dev->ethtool_ops;
+	struct ethtool_rx_ntuple_flow_spec_container *fsc = NULL;
 	int ret;
 
 	if (!ops->set_rx_ntuple)
@@ -329,16 +327,26 @@ static int ethtool_set_rx_ntuple(struct net_device *dev, void __user *useraddr)
 	if (copy_from_user(&cmd, useraddr, sizeof(cmd)))
 		return -EFAULT;
 
-	ret = ops->set_rx_ntuple(dev, &cmd);
-
 	/*
 	 * Cache filter in dev struct for GET operation only if
 	 * the underlying driver doesn't have its own GET operation, and
-	 * only if the filter was added successfully.
+	 * only if the filter was added successfully.  First make sure we
+	 * can allocate the filter, then continue if successful.
 	 */
-	if (!ops->get_rx_ntuple && !ret)
-		if (__rx_ntuple_filter_add(&dev->ethtool_ntuple_list, &cmd.fs))
+	if (!ops->get_rx_ntuple) {
+		fsc = kmalloc(sizeof(*fsc), GFP_ATOMIC);
+		if (!fsc)
 			return -ENOMEM;
+	}
+
+	ret = ops->set_rx_ntuple(dev, &cmd);
+	if (ret) {
+		kfree(fsc);
+		return ret;
+	}
+
+	if (!ops->get_rx_ntuple)
+		__rx_ntuple_filter_add(&dev->ethtool_ntuple_list, &cmd.fs, fsc);
 
 	return ret;
 }
-- 
cgit v1.2.2


From 0d643e1fb4207711d9c148b5c6a2820550a4a154 Mon Sep 17 00:00:00 2001
From: Peter Waskiewicz <peter.p.waskiewicz.jr@intel.com>
Date: Fri, 12 Feb 2010 13:48:25 +0000
Subject: ethtool: Move n-tuple capability check into set_flags

set_flags should check if the underlying device supports
n-tuple filter programming before setting the device flags
on the netdevice.

Signed-off-by: Peter P Waskiewicz Jr <peter.p.waskiewicz.jr@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/ethtool.c | 13 ++++++++-----
 1 file changed, 8 insertions(+), 5 deletions(-)

(limited to 'net')

diff --git a/net/core/ethtool.c b/net/core/ethtool.c
index fbbe4b49116b..794cf57078cd 100644
--- a/net/core/ethtool.c
+++ b/net/core/ethtool.c
@@ -134,15 +134,21 @@ u32 ethtool_op_get_flags(struct net_device *dev)
 
 int ethtool_op_set_flags(struct net_device *dev, u32 data)
 {
+	const struct ethtool_ops *ops = dev->ethtool_ops;
+
 	if (data & ETH_FLAG_LRO)
 		dev->features |= NETIF_F_LRO;
 	else
 		dev->features &= ~NETIF_F_LRO;
 
-	if (data & ETH_FLAG_NTUPLE)
+	if (data & ETH_FLAG_NTUPLE) {
+		if (!ops->set_rx_ntuple)
+			return -EOPNOTSUPP;
 		dev->features |= NETIF_F_NTUPLE;
-	else
+	} else {
+		/* safe to clear regardless */
 		dev->features &= ~NETIF_F_NTUPLE;
+	}
 
 	return 0;
 }
@@ -318,9 +324,6 @@ static int ethtool_set_rx_ntuple(struct net_device *dev, void __user *useraddr)
 	struct ethtool_rx_ntuple_flow_spec_container *fsc = NULL;
 	int ret;
 
-	if (!ops->set_rx_ntuple)
-		return -EOPNOTSUPP;
-
 	if (!(dev->features & NETIF_F_NTUPLE))
 		return -EINVAL;
 
-- 
cgit v1.2.2


From 9546377c42e12513b33925ab829d893dcf521c5f Mon Sep 17 00:00:00 2001
From: Shan Wei <shanwei@cn.fujitsu.com>
Date: Thu, 11 Feb 2010 00:12:45 +0000
Subject: IPv6: Delete redundant counter of IPSTATS_MIB_REASMFAILS

When no more memory can be allocated, fq_find() will return NULL and
increase the value of IPSTATS_MIB_REASMFAILS. In this case,
ipv6_frag_rcv() also increase the value of IPSTATS_MIB_REASMFAILS.

So, the patch deletes redundant counter of IPSTATS_MIB_REASMFAILS in fq_find().
and deletes the unused parameter of idev.

Signed-off-by: Shan Wei <shanwei@cn.fujitsu.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/reassembly.c | 13 ++++---------
 1 file changed, 4 insertions(+), 9 deletions(-)

(limited to 'net')

diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c
index fa38fc7cc6e9..fe27eb4264d6 100644
--- a/net/ipv6/reassembly.c
+++ b/net/ipv6/reassembly.c
@@ -237,8 +237,7 @@ out:
 }
 
 static __inline__ struct frag_queue *
-fq_find(struct net *net, __be32 id, struct in6_addr *src, struct in6_addr *dst,
-	struct inet6_dev *idev)
+fq_find(struct net *net, __be32 id, struct in6_addr *src, struct in6_addr *dst)
 {
 	struct inet_frag_queue *q;
 	struct ip6_create_arg arg;
@@ -254,13 +253,9 @@ fq_find(struct net *net, __be32 id, struct in6_addr *src, struct in6_addr *dst,
 
 	q = inet_frag_find(&net->ipv6.frags, &ip6_frags, &arg, hash);
 	if (q == NULL)
-		goto oom;
+		return NULL;
 
 	return container_of(q, struct frag_queue, q);
-
-oom:
-	IP6_INC_STATS_BH(net, idev, IPSTATS_MIB_REASMFAILS);
-	return NULL;
 }
 
 static int ip6_frag_queue(struct frag_queue *fq, struct sk_buff *skb,
@@ -606,8 +601,8 @@ static int ipv6_frag_rcv(struct sk_buff *skb)
 	if (atomic_read(&net->ipv6.frags.mem) > net->ipv6.frags.high_thresh)
 		ip6_evictor(net, ip6_dst_idev(skb_dst(skb)));
 
-	if ((fq = fq_find(net, fhdr->identification, &hdr->saddr, &hdr->daddr,
-			  ip6_dst_idev(skb_dst(skb)))) != NULL) {
+	fq = fq_find(net, fhdr->identification, &hdr->saddr, &hdr->daddr);
+	if (fq != NULL) {
 		int ret;
 
 		spin_lock(&fq->q.lock);
-- 
cgit v1.2.2


From 19f4c7133fc1b94001b997c4843d0a9192ee63e5 Mon Sep 17 00:00:00 2001
From: jamal <hadi@cyberus.ca>
Date: Wed, 10 Feb 2010 23:51:27 +0000
Subject: xfrm: Flushing empty SAD generates false events

To see the effect make sure you have an empty SAD.
-On window1 "ip xfrm mon"
-on window2 issue "ip xfrm state flush"
You get prompt back in window1
and you see the flush event on window2.
With this fix, you still get prompt on window1 but no
event on window2.

I was tempted to return -ESRCH on window1 (which would
show "RTNETLINK answers: No such process") but didnt want
to change current behavior.

cheers,
jamal
commit 5f3dd4a772326166e1bcf54acc2391df00dc7ab5
Author: Jamal Hadi Salim <hadi@cyberus.ca>
Date:   Thu Feb 11 04:41:36 2010 -0500

    xfrm: Flushing empty SAD generates false events

    To see the effect make sure you have an empty SAD.
    On window1 "ip xfrm mon" and on window2 issue "ip xfrm state flush"
    You get prompt back in window1 and you see the flush event on window2.
    With this fix, you still get prompt on window1 but no event on window2.

    Signed-off-by: Jamal Hadi Salim <hadi@cyberus.ca>

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/key/af_key.c      | 2 +-
 net/xfrm/xfrm_state.c | 8 ++++++--
 net/xfrm/xfrm_user.c  | 2 +-
 3 files changed, 8 insertions(+), 4 deletions(-)

(limited to 'net')

diff --git a/net/key/af_key.c b/net/key/af_key.c
index 79d2c0f3c334..9d47a6aa53dc 100644
--- a/net/key/af_key.c
+++ b/net/key/af_key.c
@@ -1751,7 +1751,7 @@ static int pfkey_flush(struct sock *sk, struct sk_buff *skb, struct sadb_msg *hd
 	audit_info.secid = 0;
 	err = xfrm_state_flush(net, proto, &audit_info);
 	if (err)
-		return err;
+		return 0;
 	c.data.proto = proto;
 	c.seq = hdr->sadb_msg_seq;
 	c.pid = hdr->sadb_msg_pid;
diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c
index b36cc344474b..f50ee9badf47 100644
--- a/net/xfrm/xfrm_state.c
+++ b/net/xfrm/xfrm_state.c
@@ -603,13 +603,14 @@ xfrm_state_flush_secctx_check(struct net *net, u8 proto, struct xfrm_audit *audi
 
 int xfrm_state_flush(struct net *net, u8 proto, struct xfrm_audit *audit_info)
 {
-	int i, err = 0;
+	int i, err = 0, cnt = 0;
 
 	spin_lock_bh(&xfrm_state_lock);
 	err = xfrm_state_flush_secctx_check(net, proto, audit_info);
 	if (err)
 		goto out;
 
+	err = -ESRCH;
 	for (i = 0; i <= net->xfrm.state_hmask; i++) {
 		struct hlist_node *entry;
 		struct xfrm_state *x;
@@ -626,13 +627,16 @@ restart:
 							audit_info->sessionid,
 							audit_info->secid);
 				xfrm_state_put(x);
+				if (!err)
+					cnt++;
 
 				spin_lock_bh(&xfrm_state_lock);
 				goto restart;
 			}
 		}
 	}
-	err = 0;
+	if (cnt)
+		err = 0;
 
 out:
 	spin_unlock_bh(&xfrm_state_lock);
diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c
index 943c8712bd97..7c8700447040 100644
--- a/net/xfrm/xfrm_user.c
+++ b/net/xfrm/xfrm_user.c
@@ -1525,7 +1525,7 @@ static int xfrm_flush_sa(struct sk_buff *skb, struct nlmsghdr *nlh,
 	audit_info.secid = NETLINK_CB(skb).sid;
 	err = xfrm_state_flush(net, p->proto, &audit_info);
 	if (err)
-		return err;
+		return 0;
 	c.data.proto = p->proto;
 	c.event = nlh->nlmsg_type;
 	c.seq = nlh->nlmsg_seq;
-- 
cgit v1.2.2


From 0dca3a843632c2fbb6e358734fb08fc23e800f50 Mon Sep 17 00:00:00 2001
From: jamal <hadi@cyberus.ca>
Date: Thu, 11 Feb 2010 00:53:13 +0000
Subject: xfrm: Flushing empty SPD generates false events

Observed similar behavior on SPD as previouly seen on SAD flushing..
This fixes it.

cheers,
jamal
commit 428b20432dc31bc2e01a94cd451cf5a2c00d2bf4
Author: Jamal Hadi Salim <hadi@cyberus.ca>
Date:   Thu Feb 11 05:49:38 2010 -0500

    xfrm: Flushing empty SPD generates false events

    To see the effect make sure you have an empty SPD.
    On window1 "ip xfrm mon" and on window2 issue "ip xfrm policy flush"
    You get prompt back in window1 and you see the flush event on window2.
    With this fix, you still get prompt on window1 but no event on window2.

    Signed-off-by: Jamal Hadi Salim <hadi@cyberus.ca>

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/key/af_key.c       |  2 +-
 net/xfrm/xfrm_policy.c | 13 ++++++++++---
 net/xfrm/xfrm_user.c   |  2 +-
 3 files changed, 12 insertions(+), 5 deletions(-)

(limited to 'net')

diff --git a/net/key/af_key.c b/net/key/af_key.c
index 9d47a6aa53dc..8b8e26a9e401 100644
--- a/net/key/af_key.c
+++ b/net/key/af_key.c
@@ -2713,7 +2713,7 @@ static int pfkey_spdflush(struct sock *sk, struct sk_buff *skb, struct sadb_msg
 	audit_info.secid = 0;
 	err = xfrm_policy_flush(net, XFRM_POLICY_TYPE_MAIN, &audit_info);
 	if (err)
-		return err;
+		return 0;
 	c.data.type = XFRM_POLICY_TYPE_MAIN;
 	c.event = XFRM_MSG_FLUSHPOLICY;
 	c.pid = hdr->sadb_msg_pid;
diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index 0ecb16a9a883..eb870fcc29cc 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -771,7 +771,8 @@ xfrm_policy_flush_secctx_check(struct net *net, u8 type, struct xfrm_audit *audi
 
 int xfrm_policy_flush(struct net *net, u8 type, struct xfrm_audit *audit_info)
 {
-	int dir, err = 0;
+	int dir, err = 0, cnt = 0;
+	struct xfrm_policy *dp;
 
 	write_lock_bh(&xfrm_policy_lock);
 
@@ -789,8 +790,10 @@ int xfrm_policy_flush(struct net *net, u8 type, struct xfrm_audit *audit_info)
 				     &net->xfrm.policy_inexact[dir], bydst) {
 			if (pol->type != type)
 				continue;
-			__xfrm_policy_unlink(pol, dir);
+			dp = __xfrm_policy_unlink(pol, dir);
 			write_unlock_bh(&xfrm_policy_lock);
+			if (dp)
+				cnt++;
 
 			xfrm_audit_policy_delete(pol, 1, audit_info->loginuid,
 						 audit_info->sessionid,
@@ -809,8 +812,10 @@ int xfrm_policy_flush(struct net *net, u8 type, struct xfrm_audit *audit_info)
 					     bydst) {
 				if (pol->type != type)
 					continue;
-				__xfrm_policy_unlink(pol, dir);
+				dp = __xfrm_policy_unlink(pol, dir);
 				write_unlock_bh(&xfrm_policy_lock);
+				if (dp)
+					cnt++;
 
 				xfrm_audit_policy_delete(pol, 1,
 							 audit_info->loginuid,
@@ -824,6 +829,8 @@ int xfrm_policy_flush(struct net *net, u8 type, struct xfrm_audit *audit_info)
 		}
 
 	}
+	if (!cnt)
+		err = -ESRCH;
 	atomic_inc(&flow_cache_genid);
 out:
 	write_unlock_bh(&xfrm_policy_lock);
diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c
index 7c8700447040..b0fb7d3bc15e 100644
--- a/net/xfrm/xfrm_user.c
+++ b/net/xfrm/xfrm_user.c
@@ -1677,7 +1677,7 @@ static int xfrm_flush_policy(struct sk_buff *skb, struct nlmsghdr *nlh,
 	audit_info.secid = NETLINK_CB(skb).sid;
 	err = xfrm_policy_flush(net, type, &audit_info);
 	if (err)
-		return err;
+		return 0;
 	c.data.type = type;
 	c.event = nlh->nlmsg_type;
 	c.seq = nlh->nlmsg_seq;
-- 
cgit v1.2.2


From b18e7a06857833d2c7c8c8457e5a3a7c3327f643 Mon Sep 17 00:00:00 2001
From: andrew hendry <andrew.hendry@gmail.com>
Date: Sun, 14 Feb 2010 02:00:11 +0000
Subject: X25: Fix x25_create errors for bad protocol and ENOBUFS

alloc_socket failures should return -ENOBUFS
a bad protocol should return -EINVAL

Signed-off-by: Andrew Hendry <andrew.hendry@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/x25/af_x25.c | 13 +++++++++----
 1 file changed, 9 insertions(+), 4 deletions(-)

(limited to 'net')

diff --git a/net/x25/af_x25.c b/net/x25/af_x25.c
index e3219e4cd044..6c7104edec60 100644
--- a/net/x25/af_x25.c
+++ b/net/x25/af_x25.c
@@ -512,15 +512,20 @@ static int x25_create(struct net *net, struct socket *sock, int protocol,
 {
 	struct sock *sk;
 	struct x25_sock *x25;
-	int rc = -ESOCKTNOSUPPORT;
+	int rc = -EAFNOSUPPORT;
 
 	if (!net_eq(net, &init_net))
-		return -EAFNOSUPPORT;
+		goto out;
+
+	rc = -ESOCKTNOSUPPORT;
+	if (sock->type != SOCK_SEQPACKET)
+		goto out;
 
-	if (sock->type != SOCK_SEQPACKET || protocol)
+	rc = -EINVAL;
+	if (protocol)
 		goto out;
 
-	rc = -ENOMEM;
+	rc = -ENOBUFS;
 	if ((sk = x25_alloc_socket(net)) == NULL)
 		goto out;
 
-- 
cgit v1.2.2


From a9288525d2aed806c1b8a785c226d4a9e6371650 Mon Sep 17 00:00:00 2001
From: andrew hendry <andrew.hendry@gmail.com>
Date: Sun, 14 Feb 2010 02:00:45 +0000
Subject: X25: Dont let x25_bind use addresses containing characters

Addresses should be all digits.
Stops x25_bind using addresses containing characters.

Signed-off-by: Andrew Hendry <andrew.hendry@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/x25/af_x25.c | 11 ++++++++++-
 1 file changed, 10 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/x25/af_x25.c b/net/x25/af_x25.c
index 6c7104edec60..9796f3ed1edb 100644
--- a/net/x25/af_x25.c
+++ b/net/x25/af_x25.c
@@ -55,6 +55,7 @@
 #include <linux/notifier.h>
 #include <linux/init.h>
 #include <linux/compat.h>
+#include <linux/ctype.h>
 
 #include <net/x25.h>
 #include <net/compat.h>
@@ -648,7 +649,7 @@ static int x25_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
 {
 	struct sock *sk = sock->sk;
 	struct sockaddr_x25 *addr = (struct sockaddr_x25 *)uaddr;
-	int rc = 0;
+	int len, i, rc = 0;
 
 	lock_kernel();
 	if (!sock_flag(sk, SOCK_ZAPPED) ||
@@ -658,6 +659,14 @@ static int x25_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
 		goto out;
 	}
 
+	len = strlen(addr->sx25_addr.x25_addr);
+	for (i = 0; i < len; i++) {
+		if (!isdigit(addr->sx25_addr.x25_addr[i])) {
+			rc = -EINVAL;
+			goto out;
+		}
+	}
+
 	x25_sk(sk)->source_addr = addr->sx25_addr;
 	x25_insert_socket(sk);
 	sock_reset_flag(sk, SOCK_ZAPPED);
-- 
cgit v1.2.2


From 339c6e99853d2ef1f02ad8a313e079050a300427 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Mon, 15 Feb 2010 21:51:33 -0800
Subject: ethtool: reduce stack usage

dev_ethtool() is currently using 604 bytes of stack, even with gcc-4.4.2

objdump -d vmlinux | scripts/checkstack.pl
...
0xc04bbc33 dev_ethtool [vmlinux]:			604
...
Adding noinline attributes to selected functions can reduce stack usage.

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/ethtool.c | 35 ++++++++++++++++++++++++++++-------
 1 file changed, 28 insertions(+), 7 deletions(-)

(limited to 'net')

diff --git a/net/core/ethtool.c b/net/core/ethtool.c
index 794cf57078cd..82cae3bca78d 100644
--- a/net/core/ethtool.c
+++ b/net/core/ethtool.c
@@ -197,7 +197,10 @@ static int ethtool_set_settings(struct net_device *dev, void __user *useraddr)
 	return dev->ethtool_ops->set_settings(dev, &cmd);
 }
 
-static int ethtool_get_drvinfo(struct net_device *dev, void __user *useraddr)
+/*
+ * noinline attribute so that gcc doesnt use too much stack in dev_ethtool()
+ */
+static noinline int ethtool_get_drvinfo(struct net_device *dev, void __user *useraddr)
 {
 	struct ethtool_drvinfo info;
 	const struct ethtool_ops *ops = dev->ethtool_ops;
@@ -232,7 +235,10 @@ static int ethtool_get_drvinfo(struct net_device *dev, void __user *useraddr)
 	return 0;
 }
 
-static int ethtool_set_rxnfc(struct net_device *dev, void __user *useraddr)
+/*
+ * noinline attribute so that gcc doesnt use too much stack in dev_ethtool()
+ */
+static noinline int ethtool_set_rxnfc(struct net_device *dev, void __user *useraddr)
 {
 	struct ethtool_rxnfc cmd;
 
@@ -245,7 +251,10 @@ static int ethtool_set_rxnfc(struct net_device *dev, void __user *useraddr)
 	return dev->ethtool_ops->set_rxnfc(dev, &cmd);
 }
 
-static int ethtool_get_rxnfc(struct net_device *dev, void __user *useraddr)
+/*
+ * noinline attribute so that gcc doesnt use too much stack in dev_ethtool()
+ */
+static noinline int ethtool_get_rxnfc(struct net_device *dev, void __user *useraddr)
 {
 	struct ethtool_rxnfc info;
 	const struct ethtool_ops *ops = dev->ethtool_ops;
@@ -317,7 +326,10 @@ static void __rx_ntuple_filter_add(struct ethtool_rx_ntuple_list *list,
 	list->count++;
 }
 
-static int ethtool_set_rx_ntuple(struct net_device *dev, void __user *useraddr)
+/*
+ * noinline attribute so that gcc doesnt use too much stack in dev_ethtool()
+ */
+static noinline int ethtool_set_rx_ntuple(struct net_device *dev, void __user *useraddr)
 {
 	struct ethtool_rx_ntuple cmd;
 	const struct ethtool_ops *ops = dev->ethtool_ops;
@@ -788,7 +800,10 @@ static int ethtool_set_eeprom(struct net_device *dev, void __user *useraddr)
 	return ret;
 }
 
-static int ethtool_get_coalesce(struct net_device *dev, void __user *useraddr)
+/*
+ * noinline attribute so that gcc doesnt use too much stack in dev_ethtool()
+ */
+static noinline int ethtool_get_coalesce(struct net_device *dev, void __user *useraddr)
 {
 	struct ethtool_coalesce coalesce = { .cmd = ETHTOOL_GCOALESCE };
 
@@ -802,7 +817,10 @@ static int ethtool_get_coalesce(struct net_device *dev, void __user *useraddr)
 	return 0;
 }
 
-static int ethtool_set_coalesce(struct net_device *dev, void __user *useraddr)
+/*
+ * noinline attribute so that gcc doesnt use too much stack in dev_ethtool()
+ */
+static noinline int ethtool_set_coalesce(struct net_device *dev, void __user *useraddr)
 {
 	struct ethtool_coalesce coalesce;
 
@@ -1212,7 +1230,10 @@ static int ethtool_set_value(struct net_device *dev, char __user *useraddr,
 	return actor(dev, edata.data);
 }
 
-static int ethtool_flash_device(struct net_device *dev, char __user *useraddr)
+/*
+ * noinline attribute so that gcc doesnt use too much stack in dev_ethtool()
+ */
+static noinline int ethtool_flash_device(struct net_device *dev, char __user *useraddr)
 {
 	struct ethtool_flash efl;
 
-- 
cgit v1.2.2


From e788759f44b29e5b1bc27a265dece7dcfa4234af Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Thu, 4 Feb 2010 18:38:53 +0100
Subject: netfilter: ebtables: split do_replace into two functions

once CONFIG_COMPAT support is merged this allows
to call do_replace_finish() after doing the CONFIG_COMPAT conversion
instead of copy & pasting this.

Signed-off-by: Florian Westphal <fw@strlen.de>
---
 net/bridge/netfilter/ebtables.c | 136 +++++++++++++++++++++-------------------
 1 file changed, 71 insertions(+), 65 deletions(-)

(limited to 'net')

diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c
index 4370e9680487..a707dbdc0327 100644
--- a/net/bridge/netfilter/ebtables.c
+++ b/net/bridge/netfilter/ebtables.c
@@ -959,91 +959,45 @@ static void get_counters(const struct ebt_counter *oldcounters,
 	}
 }
 
-/* replace the table */
-static int do_replace(struct net *net, const void __user *user,
-		      unsigned int len)
+static int do_replace_finish(struct net *net, struct ebt_replace *repl,
+			      struct ebt_table_info *newinfo)
 {
-	int ret, i, countersize;
-	struct ebt_table_info *newinfo;
-	struct ebt_replace tmp;
-	struct ebt_table *t;
+	int ret, i;
 	struct ebt_counter *counterstmp = NULL;
 	/* used to be able to unlock earlier */
 	struct ebt_table_info *table;
-
-	if (copy_from_user(&tmp, user, sizeof(tmp)) != 0)
-		return -EFAULT;
-
-	if (len != sizeof(tmp) + tmp.entries_size) {
-		BUGPRINT("Wrong len argument\n");
-		return -EINVAL;
-	}
-
-	if (tmp.entries_size == 0) {
-		BUGPRINT("Entries_size never zero\n");
-		return -EINVAL;
-	}
-	/* overflow check */
-	if (tmp.nentries >= ((INT_MAX - sizeof(struct ebt_table_info)) / NR_CPUS -
-			SMP_CACHE_BYTES) / sizeof(struct ebt_counter))
-		return -ENOMEM;
-	if (tmp.num_counters >= INT_MAX / sizeof(struct ebt_counter))
-		return -ENOMEM;
-
-	countersize = COUNTER_OFFSET(tmp.nentries) * nr_cpu_ids;
-	newinfo = vmalloc(sizeof(*newinfo) + countersize);
-	if (!newinfo)
-		return -ENOMEM;
-
-	if (countersize)
-		memset(newinfo->counters, 0, countersize);
-
-	newinfo->entries = vmalloc(tmp.entries_size);
-	if (!newinfo->entries) {
-		ret = -ENOMEM;
-		goto free_newinfo;
-	}
-	if (copy_from_user(
-	   newinfo->entries, tmp.entries, tmp.entries_size) != 0) {
-		BUGPRINT("Couldn't copy entries from userspace\n");
-		ret = -EFAULT;
-		goto free_entries;
-	}
+	struct ebt_table *t;
 
 	/* the user wants counters back
 	   the check on the size is done later, when we have the lock */
-	if (tmp.num_counters) {
-		counterstmp = vmalloc(tmp.num_counters * sizeof(*counterstmp));
-		if (!counterstmp) {
-			ret = -ENOMEM;
-			goto free_entries;
-		}
+	if (repl->num_counters) {
+		unsigned long size = repl->num_counters * sizeof(*counterstmp);
+		counterstmp = vmalloc(size);
+		if (!counterstmp)
+			return -ENOMEM;
 	}
-	else
-		counterstmp = NULL;
 
-	/* this can get initialized by translate_table() */
 	newinfo->chainstack = NULL;
-	ret = ebt_verify_pointers(&tmp, newinfo);
+	ret = ebt_verify_pointers(repl, newinfo);
 	if (ret != 0)
 		goto free_counterstmp;
 
-	ret = translate_table(net, tmp.name, newinfo);
+	ret = translate_table(net, repl->name, newinfo);
 
 	if (ret != 0)
 		goto free_counterstmp;
 
-	t = find_table_lock(net, tmp.name, &ret, &ebt_mutex);
+	t = find_table_lock(net, repl->name, &ret, &ebt_mutex);
 	if (!t) {
 		ret = -ENOENT;
 		goto free_iterate;
 	}
 
 	/* the table doesn't like it */
-	if (t->check && (ret = t->check(newinfo, tmp.valid_hooks)))
+	if (t->check && (ret = t->check(newinfo, repl->valid_hooks)))
 		goto free_unlock;
 
-	if (tmp.num_counters && tmp.num_counters != t->private->nentries) {
+	if (repl->num_counters && repl->num_counters != t->private->nentries) {
 		BUGPRINT("Wrong nr. of counters requested\n");
 		ret = -EINVAL;
 		goto free_unlock;
@@ -1059,7 +1013,7 @@ static int do_replace(struct net *net, const void __user *user,
 		module_put(t->me);
 	/* we need an atomic snapshot of the counters */
 	write_lock_bh(&t->lock);
-	if (tmp.num_counters)
+	if (repl->num_counters)
 		get_counters(t->private->counters, counterstmp,
 		   t->private->nentries);
 
@@ -1070,10 +1024,9 @@ static int do_replace(struct net *net, const void __user *user,
 	   allocation. Only reason why this is done is because this way the lock
 	   is held only once, while this doesn't bring the kernel into a
 	   dangerous state. */
-	if (tmp.num_counters &&
-	   copy_to_user(tmp.counters, counterstmp,
-	   tmp.num_counters * sizeof(struct ebt_counter))) {
-		BUGPRINT("Couldn't copy counters to userspace\n");
+	if (repl->num_counters &&
+	   copy_to_user(repl->counters, counterstmp,
+	   repl->num_counters * sizeof(struct ebt_counter))) {
 		ret = -EFAULT;
 	}
 	else
@@ -1107,6 +1060,59 @@ free_counterstmp:
 			vfree(newinfo->chainstack[i]);
 		vfree(newinfo->chainstack);
 	}
+	return ret;
+}
+
+/* replace the table */
+static int do_replace(struct net *net, const void __user *user,
+		      unsigned int len)
+{
+	int ret, countersize;
+	struct ebt_table_info *newinfo;
+	struct ebt_replace tmp;
+
+	if (copy_from_user(&tmp, user, sizeof(tmp)) != 0)
+		return -EFAULT;
+
+	if (len != sizeof(tmp) + tmp.entries_size) {
+		BUGPRINT("Wrong len argument\n");
+		return -EINVAL;
+	}
+
+	if (tmp.entries_size == 0) {
+		BUGPRINT("Entries_size never zero\n");
+		return -EINVAL;
+	}
+	/* overflow check */
+	if (tmp.nentries >= ((INT_MAX - sizeof(struct ebt_table_info)) /
+			NR_CPUS - SMP_CACHE_BYTES) / sizeof(struct ebt_counter))
+		return -ENOMEM;
+	if (tmp.num_counters >= INT_MAX / sizeof(struct ebt_counter))
+		return -ENOMEM;
+
+	countersize = COUNTER_OFFSET(tmp.nentries) * nr_cpu_ids;
+	newinfo = vmalloc(sizeof(*newinfo) + countersize);
+	if (!newinfo)
+		return -ENOMEM;
+
+	if (countersize)
+		memset(newinfo->counters, 0, countersize);
+
+	newinfo->entries = vmalloc(tmp.entries_size);
+	if (!newinfo->entries) {
+		ret = -ENOMEM;
+		goto free_newinfo;
+	}
+	if (copy_from_user(
+	   newinfo->entries, tmp.entries, tmp.entries_size) != 0) {
+		BUGPRINT("Couldn't copy entries from userspace\n");
+		ret = -EFAULT;
+		goto free_entries;
+	}
+
+	ret = do_replace_finish(net, &tmp, newinfo);
+	if (ret == 0)
+		return ret;
 free_entries:
 	vfree(newinfo->entries);
 free_newinfo:
-- 
cgit v1.2.2


From 837395aa863142be7c38be0ca780aef21b12b49f Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Sun, 7 Feb 2010 02:11:34 +0100
Subject: netfilter: ebtables: split copy_everything_to_user into two functions

once CONFIG_COMPAT support is added to ebtables, the new
copy_counters_to_user function can be called instead of duplicating
code.

Also remove last use of MEMPRINT, as requested by Bart De Schuymer.

Signed-off-by: Florian Westphal <fw@strlen.de>
---
 net/bridge/netfilter/ebtables.c | 70 ++++++++++++++++++++++-------------------
 1 file changed, 38 insertions(+), 32 deletions(-)

(limited to 'net')

diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c
index a707dbdc0327..46030dc90845 100644
--- a/net/bridge/netfilter/ebtables.c
+++ b/net/bridge/netfilter/ebtables.c
@@ -33,11 +33,6 @@
 #define BUGPRINT(format, args...) printk("kernel msg: ebtables bug: please "\
 					 "report to author: "format, ## args)
 /* #define BUGPRINT(format, args...) */
-#define MEMPRINT(format, args...) printk("kernel msg: ebtables "\
-					 ": out of memory: "format, ## args)
-/* #define MEMPRINT(format, args...) */
-
-
 
 /*
  * Each cpu has its own set of counters, so there is no need for write_lock in
@@ -1263,10 +1258,8 @@ static int update_counters(struct net *net, const void __user *user,
 	if (hlp.num_counters == 0)
 		return -EINVAL;
 
-	if (!(tmp = vmalloc(hlp.num_counters * sizeof(*tmp)))) {
-		MEMPRINT("Update_counters && nomemory\n");
+	if (!(tmp = vmalloc(hlp.num_counters * sizeof(*tmp))))
 		return -ENOMEM;
-	}
 
 	t = find_table_lock(net, hlp.name, &ret, &ebt_mutex);
 	if (!t)
@@ -1345,14 +1338,46 @@ ebt_make_names(struct ebt_entry *e, const char *base, char __user *ubase)
 	return 0;
 }
 
+static int copy_counters_to_user(struct ebt_table *t,
+				  const struct ebt_counter *oldcounters,
+				  void __user *user, unsigned int num_counters,
+				  unsigned int nentries)
+{
+	struct ebt_counter *counterstmp;
+	int ret = 0;
+
+	/* userspace might not need the counters */
+	if (num_counters == 0)
+		return 0;
+
+	if (num_counters != nentries) {
+		BUGPRINT("Num_counters wrong\n");
+		return -EINVAL;
+	}
+
+	counterstmp = vmalloc(nentries * sizeof(*counterstmp));
+	if (!counterstmp)
+		return -ENOMEM;
+
+	write_lock_bh(&t->lock);
+	get_counters(oldcounters, counterstmp, nentries);
+	write_unlock_bh(&t->lock);
+
+	if (copy_to_user(user, counterstmp,
+	   nentries * sizeof(struct ebt_counter)))
+		ret = -EFAULT;
+	vfree(counterstmp);
+	return ret;
+}
+
 /* called with ebt_mutex locked */
 static int copy_everything_to_user(struct ebt_table *t, void __user *user,
     const int *len, int cmd)
 {
 	struct ebt_replace tmp;
-	struct ebt_counter *counterstmp;
 	const struct ebt_counter *oldcounters;
 	unsigned int entries_size, nentries;
+	int ret;
 	char *entries;
 
 	if (cmd == EBT_SO_GET_ENTRIES) {
@@ -1388,29 +1413,10 @@ static int copy_everything_to_user(struct ebt_table *t, void __user *user,
 		return -EINVAL;
 	}
 
-	/* userspace might not need the counters */
-	if (tmp.num_counters) {
-		if (tmp.num_counters != nentries) {
-			BUGPRINT("Num_counters wrong\n");
-			return -EINVAL;
-		}
-		counterstmp = vmalloc(nentries * sizeof(*counterstmp));
-		if (!counterstmp) {
-			MEMPRINT("Couldn't copy counters, out of memory\n");
-			return -ENOMEM;
-		}
-		write_lock_bh(&t->lock);
-		get_counters(oldcounters, counterstmp, nentries);
-		write_unlock_bh(&t->lock);
-
-		if (copy_to_user(tmp.counters, counterstmp,
-		   nentries * sizeof(struct ebt_counter))) {
-			BUGPRINT("Couldn't copy counters to userspace\n");
-			vfree(counterstmp);
-			return -EFAULT;
-		}
-		vfree(counterstmp);
-	}
+	ret = copy_counters_to_user(t, oldcounters, tmp.counters,
+					tmp.num_counters, nentries);
+	if (ret)
+		return ret;
 
 	if (copy_to_user(tmp.entries, entries, entries_size)) {
 		BUGPRINT("Couldn't copy entries to userspace\n");
-- 
cgit v1.2.2


From 49facff9f92508413f3da598f86aaf6c1121ff27 Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Sun, 7 Feb 2010 02:48:47 +0100
Subject: netfilter: ebtables: split update_counters into two functions

allows to call do_update_counters() from upcoming CONFIG_COMPAT
code instead of copy&pasting the same code.

Signed-off-by: Florian Westphal <fw@strlen.de>
---
 net/bridge/netfilter/ebtables.c | 42 +++++++++++++++++++++++++----------------
 1 file changed, 26 insertions(+), 16 deletions(-)

(limited to 'net')

diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c
index 46030dc90845..76b99d3c1eea 100644
--- a/net/bridge/netfilter/ebtables.c
+++ b/net/bridge/netfilter/ebtables.c
@@ -1242,38 +1242,33 @@ void ebt_unregister_table(struct net *net, struct ebt_table *table)
 }
 
 /* userspace just supplied us with counters */
-static int update_counters(struct net *net, const void __user *user,
-			   unsigned int len)
+static int do_update_counters(struct net *net, const char *name,
+				struct ebt_counter __user *counters,
+				unsigned int num_counters,
+				const void __user *user, unsigned int len)
 {
 	int i, ret;
 	struct ebt_counter *tmp;
-	struct ebt_replace hlp;
 	struct ebt_table *t;
 
-	if (copy_from_user(&hlp, user, sizeof(hlp)))
-		return -EFAULT;
-
-	if (len != sizeof(hlp) + hlp.num_counters * sizeof(struct ebt_counter))
-		return -EINVAL;
-	if (hlp.num_counters == 0)
+	if (num_counters == 0)
 		return -EINVAL;
 
-	if (!(tmp = vmalloc(hlp.num_counters * sizeof(*tmp))))
+	tmp = vmalloc(num_counters * sizeof(*tmp));
+	if (!tmp)
 		return -ENOMEM;
 
-	t = find_table_lock(net, hlp.name, &ret, &ebt_mutex);
+	t = find_table_lock(net, name, &ret, &ebt_mutex);
 	if (!t)
 		goto free_tmp;
 
-	if (hlp.num_counters != t->private->nentries) {
+	if (num_counters != t->private->nentries) {
 		BUGPRINT("Wrong nr of counters\n");
 		ret = -EINVAL;
 		goto unlock_mutex;
 	}
 
-	if ( copy_from_user(tmp, hlp.counters,
-	   hlp.num_counters * sizeof(struct ebt_counter)) ) {
-		BUGPRINT("Updata_counters && !cfu\n");
+	if (copy_from_user(tmp, counters, num_counters * sizeof(*counters))) {
 		ret = -EFAULT;
 		goto unlock_mutex;
 	}
@@ -1282,7 +1277,7 @@ static int update_counters(struct net *net, const void __user *user,
 	write_lock_bh(&t->lock);
 
 	/* we add to the counters of the first cpu */
-	for (i = 0; i < hlp.num_counters; i++) {
+	for (i = 0; i < num_counters; i++) {
 		t->private->counters[i].pcnt += tmp[i].pcnt;
 		t->private->counters[i].bcnt += tmp[i].bcnt;
 	}
@@ -1296,6 +1291,21 @@ free_tmp:
 	return ret;
 }
 
+static int update_counters(struct net *net, const void __user *user,
+			    unsigned int len)
+{
+	struct ebt_replace hlp;
+
+	if (copy_from_user(&hlp, user, sizeof(hlp)))
+		return -EFAULT;
+
+	if (len != sizeof(hlp) + hlp.num_counters * sizeof(struct ebt_counter))
+		return -EINVAL;
+
+	return do_update_counters(net, hlp.name, hlp.counters,
+				hlp.num_counters, user, len);
+}
+
 static inline int ebt_make_matchname(const struct ebt_entry_match *m,
     const char *base, char __user *ubase)
 {
-- 
cgit v1.2.2


From 81e675c227ec60a0bdcbb547dc530ebee23ff931 Mon Sep 17 00:00:00 2001
From: Florian Westphal <fwestphal@astaro.com>
Date: Tue, 5 Jan 2010 16:09:46 +0100
Subject: netfilter: ebtables: add CONFIG_COMPAT support

Main code for 32 bit userland ebtables binary with 64 bit kernels
support.

Tested on x86_64 kernel only, using 64bit ebtables binary
for output comparision.

At least ebt_mark, m_mark and ebt_limit need CONFIG_COMPAT hooks, too.

remaining problem:

The ebtables userland makefile has:
ifeq ($(shell uname -m),sparc64)
	CFLAGS+=-DEBT_MIN_ALIGN=8 -DKERNEL_64_USERSPACE_32
endif

struct ebt_replace, ebt_entry_match etc. then contain userland-side
padding, i.e.  even if we are called from a 32 bit userland, the
structures may already be in the right format.

This problem is addressed in a follow-up patch.

Signed-off-by: Florian Westphal <fwestphal@astaro.com>
---
 net/bridge/netfilter/ebtables.c | 887 +++++++++++++++++++++++++++++++++++++++-
 1 file changed, 886 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c
index 76b99d3c1eea..fcaefdd6200b 100644
--- a/net/bridge/netfilter/ebtables.c
+++ b/net/bridge/netfilter/ebtables.c
@@ -51,11 +51,37 @@
 
 static DEFINE_MUTEX(ebt_mutex);
 
+#ifdef CONFIG_COMPAT
+static void ebt_standard_compat_from_user(void *dst, const void *src)
+{
+	int v = *(compat_int_t *)src;
+
+	if (v >= 0)
+		v += xt_compat_calc_jump(NFPROTO_BRIDGE, v);
+	memcpy(dst, &v, sizeof(v));
+}
+
+static int ebt_standard_compat_to_user(void __user *dst, const void *src)
+{
+	compat_int_t cv = *(int *)src;
+
+	if (cv >= 0)
+		cv -= xt_compat_calc_jump(NFPROTO_BRIDGE, cv);
+	return copy_to_user(dst, &cv, sizeof(cv)) ? -EFAULT : 0;
+}
+#endif
+
+
 static struct xt_target ebt_standard_target = {
 	.name       = "standard",
 	.revision   = 0,
 	.family     = NFPROTO_BRIDGE,
 	.targetsize = sizeof(int),
+#ifdef CONFIG_COMPAT
+	.compatsize = sizeof(compat_int_t),
+	.compat_from_user = ebt_standard_compat_from_user,
+	.compat_to_user =  ebt_standard_compat_to_user,
+#endif
 };
 
 static inline int
@@ -1454,7 +1480,7 @@ static int do_ebt_set_ctl(struct sock *sk,
 		break;
 	default:
 		ret = -EINVAL;
-  }
+	}
 	return ret;
 }
 
@@ -1514,15 +1540,874 @@ static int do_ebt_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
 	return ret;
 }
 
+#ifdef CONFIG_COMPAT
+/* 32 bit-userspace compatibility definitions. */
+struct compat_ebt_replace {
+	char name[EBT_TABLE_MAXNAMELEN];
+	compat_uint_t valid_hooks;
+	compat_uint_t nentries;
+	compat_uint_t entries_size;
+	/* start of the chains */
+	compat_uptr_t hook_entry[NF_BR_NUMHOOKS];
+	/* nr of counters userspace expects back */
+	compat_uint_t num_counters;
+	/* where the kernel will put the old counters. */
+	compat_uptr_t counters;
+	compat_uptr_t entries;
+};
+
+/* struct ebt_entry_match, _target and _watcher have same layout */
+struct compat_ebt_entry_mwt {
+	union {
+		char name[EBT_FUNCTION_MAXNAMELEN];
+		compat_uptr_t ptr;
+	} u;
+	compat_uint_t match_size;
+	compat_uint_t data[0];
+};
+
+/* account for possible padding between match_size and ->data */
+static int ebt_compat_entry_padsize(void)
+{
+	BUILD_BUG_ON(XT_ALIGN(sizeof(struct ebt_entry_match)) <
+			COMPAT_XT_ALIGN(sizeof(struct compat_ebt_entry_mwt)));
+	return (int) XT_ALIGN(sizeof(struct ebt_entry_match)) -
+			COMPAT_XT_ALIGN(sizeof(struct compat_ebt_entry_mwt));
+}
+
+static int ebt_compat_match_offset(const struct xt_match *match,
+				   unsigned int userlen)
+{
+	/*
+	 * ebt_among needs special handling. The kernel .matchsize is
+	 * set to -1 at registration time; at runtime an EBT_ALIGN()ed
+	 * value is expected.
+	 * Example: userspace sends 4500, ebt_among.c wants 4504.
+	 */
+	if (unlikely(match->matchsize == -1))
+		return XT_ALIGN(userlen) - COMPAT_XT_ALIGN(userlen);
+	return xt_compat_match_offset(match);
+}
+
+static int compat_match_to_user(struct ebt_entry_match *m, void __user **dstptr,
+				unsigned int *size)
+{
+	const struct xt_match *match = m->u.match;
+	struct compat_ebt_entry_mwt __user *cm = *dstptr;
+	int off = ebt_compat_match_offset(match, m->match_size);
+	compat_uint_t msize = m->match_size - off;
+
+	BUG_ON(off >= m->match_size);
+
+	if (copy_to_user(cm->u.name, match->name,
+	    strlen(match->name) + 1) || put_user(msize, &cm->match_size))
+		return -EFAULT;
+
+	if (match->compat_to_user) {
+		if (match->compat_to_user(cm->data, m->data))
+			return -EFAULT;
+	} else if (copy_to_user(cm->data, m->data, msize))
+			return -EFAULT;
+
+	*size -= ebt_compat_entry_padsize() + off;
+	*dstptr = cm->data;
+	*dstptr += msize;
+	return 0;
+}
+
+static int compat_target_to_user(struct ebt_entry_target *t,
+				 void __user **dstptr,
+				 unsigned int *size)
+{
+	const struct xt_target *target = t->u.target;
+	struct compat_ebt_entry_mwt __user *cm = *dstptr;
+	int off = xt_compat_target_offset(target);
+	compat_uint_t tsize = t->target_size - off;
+
+	BUG_ON(off >= t->target_size);
+
+	if (copy_to_user(cm->u.name, target->name,
+	    strlen(target->name) + 1) || put_user(tsize, &cm->match_size))
+		return -EFAULT;
+
+	if (target->compat_to_user) {
+		if (target->compat_to_user(cm->data, t->data))
+			return -EFAULT;
+	} else if (copy_to_user(cm->data, t->data, tsize))
+		return -EFAULT;
+
+	*size -= ebt_compat_entry_padsize() + off;
+	*dstptr = cm->data;
+	*dstptr += tsize;
+	return 0;
+}
+
+static int compat_watcher_to_user(struct ebt_entry_watcher *w,
+				  void __user **dstptr,
+				  unsigned int *size)
+{
+	return compat_target_to_user((struct ebt_entry_target *)w,
+							dstptr, size);
+}
+
+static int compat_copy_entry_to_user(struct ebt_entry *e, void __user **dstptr,
+				unsigned int *size)
+{
+	struct ebt_entry_target *t;
+	struct ebt_entry __user *ce;
+	u32 watchers_offset, target_offset, next_offset;
+	compat_uint_t origsize;
+	int ret;
+
+	if (e->bitmask == 0) {
+		if (*size < sizeof(struct ebt_entries))
+			return -EINVAL;
+		if (copy_to_user(*dstptr, e, sizeof(struct ebt_entries)))
+			return -EFAULT;
+
+		*dstptr += sizeof(struct ebt_entries);
+		*size -= sizeof(struct ebt_entries);
+		return 0;
+	}
+
+	if (*size < sizeof(*ce))
+		return -EINVAL;
+
+	ce = (struct ebt_entry __user *)*dstptr;
+	if (copy_to_user(ce, e, sizeof(*ce)))
+		return -EFAULT;
+
+	origsize = *size;
+	*dstptr += sizeof(*ce);
+
+	ret = EBT_MATCH_ITERATE(e, compat_match_to_user, dstptr, size);
+	if (ret)
+		return ret;
+	watchers_offset = e->watchers_offset - (origsize - *size);
+
+	ret = EBT_WATCHER_ITERATE(e, compat_watcher_to_user, dstptr, size);
+	if (ret)
+		return ret;
+	target_offset = e->target_offset - (origsize - *size);
+
+	t = (struct ebt_entry_target *) ((char *) e + e->target_offset);
+
+	ret = compat_target_to_user(t, dstptr, size);
+	if (ret)
+		return ret;
+	next_offset = e->next_offset - (origsize - *size);
+
+	if (put_user(watchers_offset, &ce->watchers_offset) ||
+	    put_user(target_offset, &ce->target_offset) ||
+	    put_user(next_offset, &ce->next_offset))
+		return -EFAULT;
+
+	*size -= sizeof(*ce);
+	return 0;
+}
+
+static int compat_calc_match(struct ebt_entry_match *m, int *off)
+{
+	*off += ebt_compat_match_offset(m->u.match, m->match_size);
+	*off += ebt_compat_entry_padsize();
+	return 0;
+}
+
+static int compat_calc_watcher(struct ebt_entry_watcher *w, int *off)
+{
+	*off += xt_compat_target_offset(w->u.watcher);
+	*off += ebt_compat_entry_padsize();
+	return 0;
+}
+
+static int compat_calc_entry(const struct ebt_entry *e,
+			     const struct ebt_table_info *info,
+			     const void *base,
+			     struct compat_ebt_replace *newinfo)
+{
+	const struct ebt_entry_target *t;
+	unsigned int entry_offset;
+	int off, ret, i;
+
+	if (e->bitmask == 0)
+		return 0;
+
+	off = 0;
+	entry_offset = (void *)e - base;
+
+	EBT_MATCH_ITERATE(e, compat_calc_match, &off);
+	EBT_WATCHER_ITERATE(e, compat_calc_watcher, &off);
+
+	t = (const struct ebt_entry_target *) ((char *) e + e->target_offset);
+
+	off += xt_compat_target_offset(t->u.target);
+	off += ebt_compat_entry_padsize();
+
+	newinfo->entries_size -= off;
+
+	ret = xt_compat_add_offset(NFPROTO_BRIDGE, entry_offset, off);
+	if (ret)
+		return ret;
+
+	for (i = 0; i < NF_BR_NUMHOOKS; i++) {
+		const void *hookptr = info->hook_entry[i];
+		if (info->hook_entry[i] &&
+		    (e < (struct ebt_entry *)(base - hookptr))) {
+			newinfo->hook_entry[i] -= off;
+			pr_debug("0x%08X -> 0x%08X\n",
+					newinfo->hook_entry[i] + off,
+					newinfo->hook_entry[i]);
+		}
+	}
+
+	return 0;
+}
+
+
+static int compat_table_info(const struct ebt_table_info *info,
+			     struct compat_ebt_replace *newinfo)
+{
+	unsigned int size = info->entries_size;
+	const void *entries = info->entries;
+
+	newinfo->entries_size = size;
+
+	return EBT_ENTRY_ITERATE(entries, size, compat_calc_entry, info,
+							entries, newinfo);
+}
+
+static int compat_copy_everything_to_user(struct ebt_table *t,
+					  void __user *user, int *len, int cmd)
+{
+	struct compat_ebt_replace repl, tmp;
+	struct ebt_counter *oldcounters;
+	struct ebt_table_info tinfo;
+	int ret;
+	void __user *pos;
+
+	memset(&tinfo, 0, sizeof(tinfo));
+
+	if (cmd == EBT_SO_GET_ENTRIES) {
+		tinfo.entries_size = t->private->entries_size;
+		tinfo.nentries = t->private->nentries;
+		tinfo.entries = t->private->entries;
+		oldcounters = t->private->counters;
+	} else {
+		tinfo.entries_size = t->table->entries_size;
+		tinfo.nentries = t->table->nentries;
+		tinfo.entries = t->table->entries;
+		oldcounters = t->table->counters;
+	}
+
+	if (copy_from_user(&tmp, user, sizeof(tmp)))
+		return -EFAULT;
+
+	if (tmp.nentries != tinfo.nentries ||
+	   (tmp.num_counters && tmp.num_counters != tinfo.nentries))
+		return -EINVAL;
+
+	memcpy(&repl, &tmp, sizeof(repl));
+	if (cmd == EBT_SO_GET_ENTRIES)
+		ret = compat_table_info(t->private, &repl);
+	else
+		ret = compat_table_info(&tinfo, &repl);
+	if (ret)
+		return ret;
+
+	if (*len != sizeof(tmp) + repl.entries_size +
+	   (tmp.num_counters? tinfo.nentries * sizeof(struct ebt_counter): 0)) {
+		pr_err("wrong size: *len %d, entries_size %u, replsz %d\n",
+				*len, tinfo.entries_size, repl.entries_size);
+		return -EINVAL;
+	}
+
+	/* userspace might not need the counters */
+	ret = copy_counters_to_user(t, oldcounters, compat_ptr(tmp.counters),
+					tmp.num_counters, tinfo.nentries);
+	if (ret)
+		return ret;
+
+	pos = compat_ptr(tmp.entries);
+	return EBT_ENTRY_ITERATE(tinfo.entries, tinfo.entries_size,
+			compat_copy_entry_to_user, &pos, &tmp.entries_size);
+}
+
+struct ebt_entries_buf_state {
+	char *buf_kern_start;	/* kernel buffer to copy (translated) data to */
+	u32 buf_kern_len;	/* total size of kernel buffer */
+	u32 buf_kern_offset;	/* amount of data copied so far */
+	u32 buf_user_offset;	/* read position in userspace buffer */
+};
+
+static int ebt_buf_count(struct ebt_entries_buf_state *state, unsigned int sz)
+{
+	state->buf_kern_offset += sz;
+	return state->buf_kern_offset >= sz ? 0 : -EINVAL;
+}
+
+static int ebt_buf_add(struct ebt_entries_buf_state *state,
+		       void *data, unsigned int sz)
+{
+	if (state->buf_kern_start == NULL)
+		goto count_only;
+
+	BUG_ON(state->buf_kern_offset + sz > state->buf_kern_len);
+
+	memcpy(state->buf_kern_start + state->buf_kern_offset, data, sz);
+
+ count_only:
+	state->buf_user_offset += sz;
+	return ebt_buf_count(state, sz);
+}
+
+static int ebt_buf_add_pad(struct ebt_entries_buf_state *state, unsigned int sz)
+{
+	char *b = state->buf_kern_start;
+
+	BUG_ON(b && state->buf_kern_offset > state->buf_kern_len);
+
+	if (b != NULL && sz > 0)
+		memset(b + state->buf_kern_offset, 0, sz);
+	/* do not adjust ->buf_user_offset here, we added kernel-side padding */
+	return ebt_buf_count(state, sz);
+}
+
+enum compat_mwt {
+	EBT_COMPAT_MATCH,
+	EBT_COMPAT_WATCHER,
+	EBT_COMPAT_TARGET,
+};
+
+static int compat_mtw_from_user(struct compat_ebt_entry_mwt *mwt,
+				enum compat_mwt compat_mwt,
+				struct ebt_entries_buf_state *state,
+				const unsigned char *base)
+{
+	char name[EBT_FUNCTION_MAXNAMELEN];
+	struct xt_match *match;
+	struct xt_target *wt;
+	void *dst = NULL;
+	int off, pad = 0, ret = 0;
+	unsigned int size_kern, entry_offset, match_size = mwt->match_size;
+
+	strlcpy(name, mwt->u.name, sizeof(name));
+
+	if (state->buf_kern_start)
+		dst = state->buf_kern_start + state->buf_kern_offset;
+
+	entry_offset = (unsigned char *) mwt - base;
+	switch (compat_mwt) {
+	case EBT_COMPAT_MATCH:
+		match = try_then_request_module(xt_find_match(NFPROTO_BRIDGE,
+						name, 0), "ebt_%s", name);
+		if (match == NULL)
+			return -ENOENT;
+		if (IS_ERR(match))
+			return PTR_ERR(match);
+
+		off = ebt_compat_match_offset(match, match_size);
+		if (dst) {
+			if (match->compat_from_user)
+				match->compat_from_user(dst, mwt->data);
+			else
+				memcpy(dst, mwt->data, match_size);
+		}
+
+		size_kern = match->matchsize;
+		if (unlikely(size_kern == -1))
+			size_kern = match_size;
+		module_put(match->me);
+		break;
+	case EBT_COMPAT_WATCHER: /* fallthrough */
+	case EBT_COMPAT_TARGET:
+		wt = try_then_request_module(xt_find_target(NFPROTO_BRIDGE,
+						name, 0), "ebt_%s", name);
+		if (wt == NULL)
+			return -ENOENT;
+		if (IS_ERR(wt))
+			return PTR_ERR(wt);
+		off = xt_compat_target_offset(wt);
+
+		if (dst) {
+			if (wt->compat_from_user)
+				wt->compat_from_user(dst, mwt->data);
+			else
+				memcpy(dst, mwt->data, match_size);
+		}
+
+		size_kern = wt->targetsize;
+		module_put(wt->me);
+		break;
+	}
+
+	if (!dst) {
+		ret = xt_compat_add_offset(NFPROTO_BRIDGE, entry_offset,
+					off + ebt_compat_entry_padsize());
+		if (ret < 0)
+			return ret;
+	}
+
+	state->buf_kern_offset += match_size + off;
+	state->buf_user_offset += match_size;
+	pad = XT_ALIGN(size_kern) - size_kern;
+
+	if (pad > 0 && dst) {
+		BUG_ON(state->buf_kern_len <= pad);
+		BUG_ON(state->buf_kern_offset - (match_size + off) + size_kern > state->buf_kern_len - pad);
+		memset(dst + size_kern, 0, pad);
+	}
+	return off + match_size;
+}
+
+/*
+ * return size of all matches, watchers or target, including necessary
+ * alignment and padding.
+ */
+static int ebt_size_mwt(struct compat_ebt_entry_mwt *match32,
+			unsigned int size_left, enum compat_mwt type,
+			struct ebt_entries_buf_state *state, const void *base)
+{
+	int growth = 0;
+	char *buf;
+
+	if (size_left == 0)
+		return 0;
+
+	buf = (char *) match32;
+
+	while (size_left >= sizeof(*match32)) {
+		struct ebt_entry_match *match_kern;
+		int ret;
+
+		match_kern = (struct ebt_entry_match *) state->buf_kern_start;
+		if (match_kern) {
+			char *tmp;
+			tmp = state->buf_kern_start + state->buf_kern_offset;
+			match_kern = (struct ebt_entry_match *) tmp;
+		}
+		ret = ebt_buf_add(state, buf, sizeof(*match32));
+		if (ret < 0)
+			return ret;
+		size_left -= sizeof(*match32);
+
+		/* add padding before match->data (if any) */
+		ret = ebt_buf_add_pad(state, ebt_compat_entry_padsize());
+		if (ret < 0)
+			return ret;
+
+		if (match32->match_size > size_left)
+			return -EINVAL;
+
+		size_left -= match32->match_size;
+
+		ret = compat_mtw_from_user(match32, type, state, base);
+		if (ret < 0)
+			return ret;
+
+		BUG_ON(ret < match32->match_size);
+		growth += ret - match32->match_size;
+		growth += ebt_compat_entry_padsize();
+
+		buf += sizeof(*match32);
+		buf += match32->match_size;
+
+		if (match_kern)
+			match_kern->match_size = ret;
+
+		WARN_ON(type == EBT_COMPAT_TARGET && size_left);
+		match32 = (struct compat_ebt_entry_mwt *) buf;
+	}
+
+	return growth;
+}
+
+#define EBT_COMPAT_WATCHER_ITERATE(e, fn, args...)          \
+({                                                          \
+	unsigned int __i;                                   \
+	int __ret = 0;                                      \
+	struct compat_ebt_entry_mwt *__watcher;             \
+	                                                    \
+	for (__i = e->watchers_offset;                      \
+	     __i < (e)->target_offset;                      \
+	     __i += __watcher->watcher_size +               \
+	     sizeof(struct compat_ebt_entry_mwt)) {         \
+		__watcher = (void *)(e) + __i;              \
+		__ret = fn(__watcher , ## args);            \
+		if (__ret != 0)                             \
+			break;                              \
+	}                                                   \
+	if (__ret == 0) {                                   \
+		if (__i != (e)->target_offset)              \
+			__ret = -EINVAL;                    \
+	}                                                   \
+	__ret;                                              \
+})
+
+#define EBT_COMPAT_MATCH_ITERATE(e, fn, args...)            \
+({                                                          \
+	unsigned int __i;                                   \
+	int __ret = 0;                                      \
+	struct compat_ebt_entry_mwt *__match;               \
+	                                                    \
+	for (__i = sizeof(struct ebt_entry);                \
+	     __i < (e)->watchers_offset;                    \
+	     __i += __match->match_size +                   \
+	     sizeof(struct compat_ebt_entry_mwt)) {         \
+		__match = (void *)(e) + __i;                \
+		__ret = fn(__match , ## args);              \
+		if (__ret != 0)                             \
+			break;                              \
+	}                                                   \
+	if (__ret == 0) {                                   \
+		if (__i != (e)->watchers_offset)            \
+			__ret = -EINVAL;                    \
+	}                                                   \
+	__ret;                                              \
+})
+
+/* called for all ebt_entry structures. */
+static int size_entry_mwt(struct ebt_entry *entry, const unsigned char *base,
+			  unsigned int *total,
+			  struct ebt_entries_buf_state *state)
+{
+	unsigned int i, j, startoff, new_offset = 0;
+	/* stores match/watchers/targets & offset of next struct ebt_entry: */
+	unsigned int offsets[4];
+	unsigned int *offsets_update = NULL;
+	int ret;
+	char *buf_start;
+
+	if (*total < sizeof(struct ebt_entries))
+		return -EINVAL;
+
+	if (!entry->bitmask) {
+		*total -= sizeof(struct ebt_entries);
+		return ebt_buf_add(state, entry, sizeof(struct ebt_entries));
+	}
+	if (*total < sizeof(*entry) || entry->next_offset < sizeof(*entry))
+		return -EINVAL;
+
+	startoff = state->buf_user_offset;
+	/* pull in most part of ebt_entry, it does not need to be changed. */
+	ret = ebt_buf_add(state, entry,
+			offsetof(struct ebt_entry, watchers_offset));
+	if (ret < 0)
+		return ret;
+
+	offsets[0] = sizeof(struct ebt_entry); /* matches come first */
+	memcpy(&offsets[1], &entry->watchers_offset,
+			sizeof(offsets) - sizeof(offsets[0]));
+
+	if (state->buf_kern_start) {
+		buf_start = state->buf_kern_start + state->buf_kern_offset;
+		offsets_update = (unsigned int *) buf_start;
+	}
+	ret = ebt_buf_add(state, &offsets[1],
+			sizeof(offsets) - sizeof(offsets[0]));
+	if (ret < 0)
+		return ret;
+	buf_start = (char *) entry;
+	/*
+	 * 0: matches offset, always follows ebt_entry.
+	 * 1: watchers offset, from ebt_entry structure
+	 * 2: target offset, from ebt_entry structure
+	 * 3: next ebt_entry offset, from ebt_entry structure
+	 *
+	 * offsets are relative to beginning of struct ebt_entry (i.e., 0).
+	 */
+	for (i = 0, j = 1 ; j < 4 ; j++, i++) {
+		struct compat_ebt_entry_mwt *match32;
+		unsigned int size;
+		char *buf = buf_start;
+
+		buf = buf_start + offsets[i];
+		if (offsets[i] > offsets[j])
+			return -EINVAL;
+
+		match32 = (struct compat_ebt_entry_mwt *) buf;
+		size = offsets[j] - offsets[i];
+		ret = ebt_size_mwt(match32, size, i, state, base);
+		if (ret < 0)
+			return ret;
+		new_offset += ret;
+		if (offsets_update && new_offset) {
+			pr_debug("ebtables: change offset %d to %d\n",
+				offsets_update[i], offsets[j] + new_offset);
+			offsets_update[i] = offsets[j] + new_offset;
+		}
+	}
+
+	startoff = state->buf_user_offset - startoff;
+
+	BUG_ON(*total < startoff);
+	*total -= startoff;
+	return 0;
+}
+
+/*
+ * repl->entries_size is the size of the ebt_entry blob in userspace.
+ * It might need more memory when copied to a 64 bit kernel in case
+ * userspace is 32-bit. So, first task: find out how much memory is needed.
+ *
+ * Called before validation is performed.
+ */
+static int compat_copy_entries(unsigned char *data, unsigned int size_user,
+				struct ebt_entries_buf_state *state)
+{
+	unsigned int size_remaining = size_user;
+	int ret;
+
+	ret = EBT_ENTRY_ITERATE(data, size_user, size_entry_mwt, data,
+					&size_remaining, state);
+	if (ret < 0)
+		return ret;
+
+	WARN_ON(size_remaining);
+	return state->buf_kern_offset;
+}
+
+
+static int compat_copy_ebt_replace_from_user(struct ebt_replace *repl,
+					    void __user *user, unsigned int len)
+{
+	struct compat_ebt_replace tmp;
+	int i;
+
+	if (len < sizeof(tmp))
+		return -EINVAL;
+
+	if (copy_from_user(&tmp, user, sizeof(tmp)))
+		return -EFAULT;
+
+	if (len != sizeof(tmp) + tmp.entries_size)
+		return -EINVAL;
+
+	if (tmp.entries_size == 0)
+		return -EINVAL;
+
+	if (tmp.nentries >= ((INT_MAX - sizeof(struct ebt_table_info)) /
+			NR_CPUS - SMP_CACHE_BYTES) / sizeof(struct ebt_counter))
+		return -ENOMEM;
+	if (tmp.num_counters >= INT_MAX / sizeof(struct ebt_counter))
+		return -ENOMEM;
+
+	memcpy(repl, &tmp, offsetof(struct ebt_replace, hook_entry));
+
+	/* starting with hook_entry, 32 vs. 64 bit structures are different */
+	for (i = 0; i < NF_BR_NUMHOOKS; i++)
+		repl->hook_entry[i] = compat_ptr(tmp.hook_entry[i]);
+
+	repl->num_counters = tmp.num_counters;
+	repl->counters = compat_ptr(tmp.counters);
+	repl->entries = compat_ptr(tmp.entries);
+	return 0;
+}
+
+static int compat_do_replace(struct net *net, void __user *user,
+			     unsigned int len)
+{
+	int ret, i, countersize, size64;
+	struct ebt_table_info *newinfo;
+	struct ebt_replace tmp;
+	struct ebt_entries_buf_state state;
+	void *entries_tmp;
+
+	ret = compat_copy_ebt_replace_from_user(&tmp, user, len);
+	if (ret)
+		return ret;
+
+	countersize = COUNTER_OFFSET(tmp.nentries) * nr_cpu_ids;
+	newinfo = vmalloc(sizeof(*newinfo) + countersize);
+	if (!newinfo)
+		return -ENOMEM;
+
+	if (countersize)
+		memset(newinfo->counters, 0, countersize);
+
+	memset(&state, 0, sizeof(state));
+
+	newinfo->entries = vmalloc(tmp.entries_size);
+	if (!newinfo->entries) {
+		ret = -ENOMEM;
+		goto free_newinfo;
+	}
+	if (copy_from_user(
+	   newinfo->entries, tmp.entries, tmp.entries_size) != 0) {
+		ret = -EFAULT;
+		goto free_entries;
+	}
+
+	entries_tmp = newinfo->entries;
+
+	xt_compat_lock(NFPROTO_BRIDGE);
+
+	ret = compat_copy_entries(entries_tmp, tmp.entries_size, &state);
+	if (ret < 0)
+		goto out_unlock;
+
+	pr_debug("tmp.entries_size %d, kern off %d, user off %d delta %d\n",
+		tmp.entries_size, state.buf_kern_offset, state.buf_user_offset,
+		xt_compat_calc_jump(NFPROTO_BRIDGE, tmp.entries_size));
+
+	size64 = ret;
+	newinfo->entries = vmalloc(size64);
+	if (!newinfo->entries) {
+		vfree(entries_tmp);
+		ret = -ENOMEM;
+		goto out_unlock;
+	}
+
+	memset(&state, 0, sizeof(state));
+	state.buf_kern_start = newinfo->entries;
+	state.buf_kern_len = size64;
+
+	ret = compat_copy_entries(entries_tmp, tmp.entries_size, &state);
+	BUG_ON(ret < 0);	/* parses same data again */
+
+	vfree(entries_tmp);
+	tmp.entries_size = size64;
+
+	for (i = 0; i < NF_BR_NUMHOOKS; i++) {
+		char __user *usrptr;
+		if (tmp.hook_entry[i]) {
+			unsigned int delta;
+			usrptr = (char __user *) tmp.hook_entry[i];
+			delta = usrptr - tmp.entries;
+			usrptr += xt_compat_calc_jump(NFPROTO_BRIDGE, delta);
+			tmp.hook_entry[i] = (struct ebt_entries __user *)usrptr;
+		}
+	}
+
+	xt_compat_flush_offsets(NFPROTO_BRIDGE);
+	xt_compat_unlock(NFPROTO_BRIDGE);
+
+	ret = do_replace_finish(net, &tmp, newinfo);
+	if (ret == 0)
+		return ret;
+free_entries:
+	vfree(newinfo->entries);
+free_newinfo:
+	vfree(newinfo);
+	return ret;
+out_unlock:
+	xt_compat_flush_offsets(NFPROTO_BRIDGE);
+	xt_compat_unlock(NFPROTO_BRIDGE);
+	goto free_entries;
+}
+
+static int compat_update_counters(struct net *net, void __user *user,
+				  unsigned int len)
+{
+	struct compat_ebt_replace hlp;
+
+	if (copy_from_user(&hlp, user, sizeof(hlp)))
+		return -EFAULT;
+
+	if (len != sizeof(hlp) + hlp.num_counters * sizeof(struct ebt_counter))
+		return -EINVAL;
+
+	return do_update_counters(net, hlp.name, compat_ptr(hlp.counters),
+					hlp.num_counters, user, len);
+}
+
+static int compat_do_ebt_set_ctl(struct sock *sk,
+		int cmd, void __user *user, unsigned int len)
+{
+	int ret;
+
+	if (!capable(CAP_NET_ADMIN))
+		return -EPERM;
+
+	switch (cmd) {
+	case EBT_SO_SET_ENTRIES:
+		ret = compat_do_replace(sock_net(sk), user, len);
+		break;
+	case EBT_SO_SET_COUNTERS:
+		ret = compat_update_counters(sock_net(sk), user, len);
+		break;
+	default:
+		ret = -EINVAL;
+  }
+	return ret;
+}
+
+static int compat_do_ebt_get_ctl(struct sock *sk, int cmd,
+		void __user *user, int *len)
+{
+	int ret;
+	struct compat_ebt_replace tmp;
+	struct ebt_table *t;
+
+	if (!capable(CAP_NET_ADMIN))
+		return -EPERM;
+
+	if ((cmd == EBT_SO_GET_INFO ||
+	     cmd == EBT_SO_GET_INIT_INFO) && *len != sizeof(tmp))
+			return -EINVAL;
+
+	if (copy_from_user(&tmp, user, sizeof(tmp)))
+		return -EFAULT;
+
+	t = find_table_lock(sock_net(sk), tmp.name, &ret, &ebt_mutex);
+	if (!t)
+		return ret;
+
+	xt_compat_lock(NFPROTO_BRIDGE);
+	switch (cmd) {
+	case EBT_SO_GET_INFO:
+		tmp.nentries = t->private->nentries;
+		ret = compat_table_info(t->private, &tmp);
+		if (ret)
+			goto out;
+		tmp.valid_hooks = t->valid_hooks;
+
+		if (copy_to_user(user, &tmp, *len) != 0) {
+			ret = -EFAULT;
+			break;
+		}
+		ret = 0;
+		break;
+	case EBT_SO_GET_INIT_INFO:
+		tmp.nentries = t->table->nentries;
+		tmp.entries_size = t->table->entries_size;
+		tmp.valid_hooks = t->table->valid_hooks;
+
+		if (copy_to_user(user, &tmp, *len) != 0) {
+			ret = -EFAULT;
+			break;
+		}
+		ret = 0;
+		break;
+	case EBT_SO_GET_ENTRIES:
+	case EBT_SO_GET_INIT_ENTRIES:
+		ret = compat_copy_everything_to_user(t, user, len, cmd);
+		break;
+	default:
+		ret = -EINVAL;
+	}
+ out:
+	xt_compat_flush_offsets(NFPROTO_BRIDGE);
+	xt_compat_unlock(NFPROTO_BRIDGE);
+	mutex_unlock(&ebt_mutex);
+	return ret;
+}
+#endif
+
 static struct nf_sockopt_ops ebt_sockopts =
 {
 	.pf		= PF_INET,
 	.set_optmin	= EBT_BASE_CTL,
 	.set_optmax	= EBT_SO_SET_MAX + 1,
 	.set		= do_ebt_set_ctl,
+#ifdef CONFIG_COMPAT
+	.compat_set	= compat_do_ebt_set_ctl,
+#endif
 	.get_optmin	= EBT_BASE_CTL,
 	.get_optmax	= EBT_SO_GET_MAX + 1,
 	.get		= do_ebt_get_ctl,
+#ifdef CONFIG_COMPAT
+	.compat_get	= compat_do_ebt_get_ctl,
+#endif
 	.owner		= THIS_MODULE,
 };
 
-- 
cgit v1.2.2


From 90b89af7e15143c8ea22f5c8818f5a2eec9e75c1 Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Sun, 7 Feb 2010 03:19:12 +0100
Subject: netfilter: ebtables: try native set/getsockopt handlers, too

ebtables can be compiled to perform userspace-side padding of
structures. In that case, all the structures are already in the
'native' format expected by the kernel.

This tries to determine what format the userspace program is
using.

For most set/getsockopts, this can be done by checking
the len argument for sizeof(compat_ebt_replace) and
re-trying the native handler on error.

In case of EBT_SO_GET_ENTRIES, the native handler is tried first,
it will error out early when checking the *len argument
(the compat version has to defer this check until after
 iterating over the kernel data set once, to adjust for all
 the structure size differences).

As this would cause error printks, remove those as well, as
recommended by Bart de Schuymer.

Signed-off-by: Florian Westphal <fw@strlen.de>
---
 net/bridge/netfilter/ebtables.c | 34 ++++++++++++++++++++++++----------
 1 file changed, 24 insertions(+), 10 deletions(-)

(limited to 'net')

diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c
index fcaefdd6200b..dfb58056a89a 100644
--- a/net/bridge/netfilter/ebtables.c
+++ b/net/bridge/netfilter/ebtables.c
@@ -1428,16 +1428,12 @@ static int copy_everything_to_user(struct ebt_table *t, void __user *user,
 		oldcounters = t->table->counters;
 	}
 
-	if (copy_from_user(&tmp, user, sizeof(tmp))) {
-		BUGPRINT("Cfu didn't work\n");
+	if (copy_from_user(&tmp, user, sizeof(tmp)))
 		return -EFAULT;
-	}
 
 	if (*len != sizeof(struct ebt_replace) + entries_size +
-	   (tmp.num_counters? nentries * sizeof(struct ebt_counter): 0)) {
-		BUGPRINT("Wrong size\n");
+	   (tmp.num_counters? nentries * sizeof(struct ebt_counter): 0))
 		return -EINVAL;
-	}
 
 	if (tmp.nentries != nentries) {
 		BUGPRINT("Nentries wrong\n");
@@ -2213,8 +2209,12 @@ static int compat_do_replace(struct net *net, void __user *user,
 	void *entries_tmp;
 
 	ret = compat_copy_ebt_replace_from_user(&tmp, user, len);
-	if (ret)
+	if (ret) {
+		/* try real handler in case userland supplied needed padding */
+		if (ret == -EINVAL && do_replace(net, user, len) == 0)
+			ret = 0;
 		return ret;
+	}
 
 	countersize = COUNTER_OFFSET(tmp.nentries) * nr_cpu_ids;
 	newinfo = vmalloc(sizeof(*newinfo) + countersize);
@@ -2303,8 +2303,9 @@ static int compat_update_counters(struct net *net, void __user *user,
 	if (copy_from_user(&hlp, user, sizeof(hlp)))
 		return -EFAULT;
 
+	/* try real handler in case userland supplied needed padding */
 	if (len != sizeof(hlp) + hlp.num_counters * sizeof(struct ebt_counter))
-		return -EINVAL;
+		return update_counters(net, user, len);
 
 	return do_update_counters(net, hlp.name, compat_ptr(hlp.counters),
 					hlp.num_counters, user, len);
@@ -2341,9 +2342,10 @@ static int compat_do_ebt_get_ctl(struct sock *sk, int cmd,
 	if (!capable(CAP_NET_ADMIN))
 		return -EPERM;
 
+	/* try real handler in case userland supplied needed padding */
 	if ((cmd == EBT_SO_GET_INFO ||
 	     cmd == EBT_SO_GET_INIT_INFO) && *len != sizeof(tmp))
-			return -EINVAL;
+			return do_ebt_get_ctl(sk, cmd, user, len);
 
 	if (copy_from_user(&tmp, user, sizeof(tmp)))
 		return -EFAULT;
@@ -2380,7 +2382,19 @@ static int compat_do_ebt_get_ctl(struct sock *sk, int cmd,
 		break;
 	case EBT_SO_GET_ENTRIES:
 	case EBT_SO_GET_INIT_ENTRIES:
-		ret = compat_copy_everything_to_user(t, user, len, cmd);
+		/*
+		 * try real handler first in case of userland-side padding.
+		 * in case we are dealing with an 'ordinary' 32 bit binary
+		 * without 64bit compatibility padding, this will fail right
+		 * after copy_from_user when the *len argument is validated.
+		 *
+		 * the compat_ variant needs to do one pass over the kernel
+		 * data set to adjust for size differences before it the check.
+		 */
+		if (copy_everything_to_user(t, user, len, cmd) == 0)
+			ret = 0;
+		else
+			ret = compat_copy_everything_to_user(t, user, len, cmd);
 		break;
 	default:
 		ret = -EINVAL;
-- 
cgit v1.2.2


From 314ddca3b172fdb9c23a25c545505dbde557602e Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Wed, 27 Jan 2010 14:38:32 +0100
Subject: netfilter: ebt_limit: add CONFIG_COMPAT support

ebt_limit structure is larger on 64 bit systems due
to "long" type used in the (kernel-only) data section.

Setting .compatsize is enough in this case, these values
have no meaning in userspace.

Signed-off-by: Florian Westphal <fwestphal@astaro.com>
---
 net/bridge/netfilter/ebt_limit.c | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)

(limited to 'net')

diff --git a/net/bridge/netfilter/ebt_limit.c b/net/bridge/netfilter/ebt_limit.c
index 9dd16e6b10e7..7a8182710eb3 100644
--- a/net/bridge/netfilter/ebt_limit.c
+++ b/net/bridge/netfilter/ebt_limit.c
@@ -84,6 +84,19 @@ static bool ebt_limit_mt_check(const struct xt_mtchk_param *par)
 	return true;
 }
 
+
+#ifdef CONFIG_COMPAT
+/*
+ * no conversion function needed --
+ * only avg/burst have meaningful values in userspace.
+ */
+struct ebt_compat_limit_info {
+	compat_uint_t avg, burst;
+	compat_ulong_t prev;
+	compat_uint_t credit, credit_cap, cost;
+};
+#endif
+
 static struct xt_match ebt_limit_mt_reg __read_mostly = {
 	.name		= "limit",
 	.revision	= 0,
@@ -91,6 +104,9 @@ static struct xt_match ebt_limit_mt_reg __read_mostly = {
 	.match		= ebt_limit_mt,
 	.checkentry	= ebt_limit_mt_check,
 	.matchsize	= sizeof(struct ebt_limit_info),
+#ifdef CONFIG_COMPAT
+	.compatsize	= sizeof(struct ebt_compat_limit_info),
+#endif
 	.me		= THIS_MODULE,
 };
 
-- 
cgit v1.2.2


From 6e705f56a181118f6fbd35e6b443eab33df07290 Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Wed, 27 Jan 2010 14:39:05 +0100
Subject: netfilter: ebtables: mark: add CONFIG_COMPAT support

Add the required handlers to convert 32 bit
ebtables mark match and match target structs to 64bit layout.

Signed-off-by: Florian Westphal <fwestphal@astaro.com>
---
 net/bridge/netfilter/ebt_mark.c   | 31 +++++++++++++++++++++++++++++++
 net/bridge/netfilter/ebt_mark_m.c | 37 +++++++++++++++++++++++++++++++++++++
 2 files changed, 68 insertions(+)

(limited to 'net')

diff --git a/net/bridge/netfilter/ebt_mark.c b/net/bridge/netfilter/ebt_mark.c
index 153e167374a2..2b5ce533d6b9 100644
--- a/net/bridge/netfilter/ebt_mark.c
+++ b/net/bridge/netfilter/ebt_mark.c
@@ -52,6 +52,32 @@ static bool ebt_mark_tg_check(const struct xt_tgchk_param *par)
 		return false;
 	return true;
 }
+#ifdef CONFIG_COMPAT
+struct compat_ebt_mark_t_info {
+	compat_ulong_t mark;
+	compat_uint_t target;
+};
+
+static void mark_tg_compat_from_user(void *dst, const void *src)
+{
+	const struct compat_ebt_mark_t_info *user = src;
+	struct ebt_mark_t_info *kern = dst;
+
+	kern->mark = user->mark;
+	kern->target = user->target;
+}
+
+static int mark_tg_compat_to_user(void __user *dst, const void *src)
+{
+	struct compat_ebt_mark_t_info __user *user = dst;
+	const struct ebt_mark_t_info *kern = src;
+
+	if (put_user(kern->mark, &user->mark) ||
+	    put_user(kern->target, &user->target))
+		return -EFAULT;
+	return 0;
+}
+#endif
 
 static struct xt_target ebt_mark_tg_reg __read_mostly = {
 	.name		= "mark",
@@ -60,6 +86,11 @@ static struct xt_target ebt_mark_tg_reg __read_mostly = {
 	.target		= ebt_mark_tg,
 	.checkentry	= ebt_mark_tg_check,
 	.targetsize	= sizeof(struct ebt_mark_t_info),
+#ifdef CONFIG_COMPAT
+	.compatsize	= sizeof(struct compat_ebt_mark_t_info),
+	.compat_from_user = mark_tg_compat_from_user,
+	.compat_to_user	= mark_tg_compat_to_user,
+#endif
 	.me		= THIS_MODULE,
 };
 
diff --git a/net/bridge/netfilter/ebt_mark_m.c b/net/bridge/netfilter/ebt_mark_m.c
index 89abf4030399..8de8c396d913 100644
--- a/net/bridge/netfilter/ebt_mark_m.c
+++ b/net/bridge/netfilter/ebt_mark_m.c
@@ -35,6 +35,38 @@ static bool ebt_mark_mt_check(const struct xt_mtchk_param *par)
 	return true;
 }
 
+
+#ifdef CONFIG_COMPAT
+struct compat_ebt_mark_m_info {
+	compat_ulong_t mark, mask;
+	uint8_t invert, bitmask;
+};
+
+static void mark_mt_compat_from_user(void *dst, const void *src)
+{
+	const struct compat_ebt_mark_m_info *user = src;
+	struct ebt_mark_m_info *kern = dst;
+
+	kern->mark = user->mark;
+	kern->mask = user->mask;
+	kern->invert = user->invert;
+	kern->bitmask = user->bitmask;
+}
+
+static int mark_mt_compat_to_user(void __user *dst, const void *src)
+{
+	struct compat_ebt_mark_m_info __user *user = dst;
+	const struct ebt_mark_m_info *kern = src;
+
+	if (put_user(kern->mark, &user->mark) ||
+	    put_user(kern->mask, &user->mask) ||
+	    put_user(kern->invert, &user->invert) ||
+	    put_user(kern->bitmask, &user->bitmask))
+		return -EFAULT;
+	return 0;
+}
+#endif
+
 static struct xt_match ebt_mark_mt_reg __read_mostly = {
 	.name		= "mark_m",
 	.revision	= 0,
@@ -42,6 +74,11 @@ static struct xt_match ebt_mark_mt_reg __read_mostly = {
 	.match		= ebt_mark_mt,
 	.checkentry	= ebt_mark_mt_check,
 	.matchsize	= sizeof(struct ebt_mark_m_info),
+#ifdef CONFIG_COMPAT
+	.compatsize	= sizeof(struct compat_ebt_mark_m_info),
+	.compat_from_user = mark_mt_compat_from_user,
+	.compat_to_user	= mark_mt_compat_to_user,
+#endif
 	.me		= THIS_MODULE,
 };
 
-- 
cgit v1.2.2


From bef5d1c70d132145c0fc75b3586a19841a9a82e4 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes@sipsolutions.net>
Date: Tue, 16 Feb 2010 11:05:00 +0100
Subject: mac80211: split ieee80211_drop_unencrypted

Currently, ieee80211_drop_unencrypted is called
from management and data frame context, and the
different contexts pass different frames. This
could lead to it processing an 802.3 frame as an
802.11 frame when MFP is enabled.

Move the MFP part of ieee80211_drop_unencrypted
into a new function that is only called for mgmt
frames.

Cc: Jouni Malinen <j@w1.fi>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/rx.c | 22 ++++++++++++++++++----
 1 file changed, 18 insertions(+), 4 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c
index a6080d8d72bb..b5c48de81d8b 100644
--- a/net/mac80211/rx.c
+++ b/net/mac80211/rx.c
@@ -1397,6 +1397,21 @@ ieee80211_drop_unencrypted(struct ieee80211_rx_data *rx, __le16 fc)
 		     ieee80211_is_data(fc) &&
 		     (rx->key || rx->sdata->drop_unencrypted)))
 		return -EACCES;
+
+	return 0;
+}
+
+static int
+ieee80211_drop_unencrypted_mgmt(struct ieee80211_rx_data *rx)
+{
+	struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)rx->skb->data;
+	__le16 fc = hdr->frame_control;
+	int res;
+
+	res = ieee80211_drop_unencrypted(rx, fc);
+	if (unlikely(res))
+		return res;
+
 	if (rx->sta && test_sta_flags(rx->sta, WLAN_STA_MFP)) {
 		if (unlikely(ieee80211_is_unicast_robust_mgmt_frame(rx->skb) &&
 			     rx->key))
@@ -1872,7 +1887,7 @@ ieee80211_rx_h_action(struct ieee80211_rx_data *rx)
 	if (!(rx->flags & IEEE80211_RX_RA_MATCH))
 		return RX_DROP_UNUSABLE;
 
-	if (ieee80211_drop_unencrypted(rx, mgmt->frame_control))
+	if (ieee80211_drop_unencrypted_mgmt(rx))
 		return RX_DROP_UNUSABLE;
 
 	switch (mgmt->u.action.category) {
@@ -2014,14 +2029,13 @@ static ieee80211_rx_result debug_noinline
 ieee80211_rx_h_mgmt(struct ieee80211_rx_data *rx)
 {
 	struct ieee80211_sub_if_data *sdata = rx->sdata;
-	struct ieee80211_mgmt *mgmt = (struct ieee80211_mgmt *) rx->skb->data;
 	ieee80211_rx_result rxs;
 
 	if (!(rx->flags & IEEE80211_RX_RA_MATCH))
 		return RX_DROP_MONITOR;
 
-	if (ieee80211_drop_unencrypted(rx, mgmt->frame_control))
-		return RX_DROP_MONITOR;
+	if (ieee80211_drop_unencrypted_mgmt(rx))
+		return RX_DROP_UNUSABLE;
 
 	rxs = ieee80211_work_rx_mgmt(rx->sdata, rx->skb);
 	if (rxs != RX_CONTINUE)
-- 
cgit v1.2.2


From 1cab819b5e244e1b853c7b440981e6a960da3bfb Mon Sep 17 00:00:00 2001
From: stephen hemminger <shemminger@vyatta.com>
Date: Thu, 11 Feb 2010 13:48:29 +0000
Subject: ethtool: allow non-admin user to read GRO settings.

Looks like an oversight in GRO design.

Signed-off-by: Stephen Hemminger <shemminger@vyatta.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/ethtool.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'net')

diff --git a/net/core/ethtool.c b/net/core/ethtool.c
index d8aee584e8d1..236a9988ea91 100644
--- a/net/core/ethtool.c
+++ b/net/core/ethtool.c
@@ -927,6 +927,7 @@ int dev_ethtool(struct net *net, struct ifreq *ifr)
 	case ETHTOOL_GPERMADDR:
 	case ETHTOOL_GUFO:
 	case ETHTOOL_GGSO:
+	case ETHTOOL_GGRO:
 	case ETHTOOL_GFLAGS:
 	case ETHTOOL_GPFLAGS:
 	case ETHTOOL_GRXFH:
-- 
cgit v1.2.2


From 10e7454ed7a2da39f1f6255f63d7df27ab4bb67f Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Mon, 15 Feb 2010 19:24:30 +0000
Subject: ipcomp: Avoid duplicate calls to ipcomp_destroy

When ipcomp_tunnel_attach fails we will call ipcomp_destroy twice.
This may lead to double-frees on certain structures.

As there is no reason to explicitly call ipcomp_destroy, this patch
removes it from ipcomp*.c and lets the standard xfrm_state destruction
take place.

This is based on the discovery and patch by Alexey Dobriyan.

Tested-by: Alexey Dobriyan <adobriyan@gmail.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/ipcomp.c  | 6 +-----
 net/ipv6/ipcomp6.c | 6 +-----
 2 files changed, 2 insertions(+), 10 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/ipcomp.c b/net/ipv4/ipcomp.c
index 38fbf04150ae..544ce0876f12 100644
--- a/net/ipv4/ipcomp.c
+++ b/net/ipv4/ipcomp.c
@@ -124,16 +124,12 @@ static int ipcomp4_init_state(struct xfrm_state *x)
 	if (x->props.mode == XFRM_MODE_TUNNEL) {
 		err = ipcomp_tunnel_attach(x);
 		if (err)
-			goto error_tunnel;
+			goto out;
 	}
 
 	err = 0;
 out:
 	return err;
-
-error_tunnel:
-	ipcomp_destroy(x);
-	goto out;
 }
 
 static const struct xfrm_type ipcomp_type = {
diff --git a/net/ipv6/ipcomp6.c b/net/ipv6/ipcomp6.c
index 2f2a5ca2c878..002e6eef9120 100644
--- a/net/ipv6/ipcomp6.c
+++ b/net/ipv6/ipcomp6.c
@@ -154,16 +154,12 @@ static int ipcomp6_init_state(struct xfrm_state *x)
 	if (x->props.mode == XFRM_MODE_TUNNEL) {
 		err = ipcomp6_tunnel_attach(x);
 		if (err)
-			goto error_tunnel;
+			goto out;
 	}
 
 	err = 0;
 out:
 	return err;
-error_tunnel:
-	ipcomp_destroy(x);
-
-	goto out;
 }
 
 static const struct xfrm_type ipcomp6_type =
-- 
cgit v1.2.2


From 553f9118abc4fc53674fff87f6fe5fa3f56a41ed Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Mon, 15 Feb 2010 20:00:51 +0000
Subject: xfrm: Fix xfrm_state_clone leak

xfrm_state_clone calls kfree instead of xfrm_state_put to free
a failed state.  Depending on the state of the failed state, it
can cause leaks to things like module references.

All states should be freed by xfrm_state_put past the point of
xfrm_init_state.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/xfrm/xfrm_state.c | 12 +++---------
 1 file changed, 3 insertions(+), 9 deletions(-)

(limited to 'net')

diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c
index b36cc344474b..f445ea1c5f52 100644
--- a/net/xfrm/xfrm_state.c
+++ b/net/xfrm/xfrm_state.c
@@ -1102,7 +1102,7 @@ static struct xfrm_state *xfrm_state_clone(struct xfrm_state *orig, int *errp)
 	int err = -ENOMEM;
 	struct xfrm_state *x = xfrm_state_alloc(net);
 	if (!x)
-		goto error;
+		goto out;
 
 	memcpy(&x->id, &orig->id, sizeof(x->id));
 	memcpy(&x->sel, &orig->sel, sizeof(x->sel));
@@ -1160,16 +1160,10 @@ static struct xfrm_state *xfrm_state_clone(struct xfrm_state *orig, int *errp)
 	return x;
 
  error:
+	xfrm_state_put(x);
+out:
 	if (errp)
 		*errp = err;
-	if (x) {
-		kfree(x->aalg);
-		kfree(x->ealg);
-		kfree(x->calg);
-		kfree(x->encap);
-		kfree(x->coaddr);
-	}
-	kfree(x);
 	return NULL;
 }
 
-- 
cgit v1.2.2


From c2892f02712e9516d72841d5c019ed6916329794 Mon Sep 17 00:00:00 2001
From: Alexey Dobriyan <adobriyan@gmail.com>
Date: Tue, 16 Feb 2010 07:57:44 +0000
Subject: gre: fix netns vs proto registration ordering

GRE protocol receive hook can be called right after protocol addition is done.
If netns stuff is not yet initialized, we're going to oops in
net_generic().

This is remotely oopsable if ip_gre is compiled as module and packet
comes at unfortunate moment of module loading.

Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/ip_gre.c | 19 ++++++++++---------
 1 file changed, 10 insertions(+), 9 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index 7631b20490f5..a2a5983dbf03 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -1665,14 +1665,15 @@ static int __init ipgre_init(void)
 
 	printk(KERN_INFO "GRE over IPv4 tunneling driver\n");
 
-	if (inet_add_protocol(&ipgre_protocol, IPPROTO_GRE) < 0) {
-		printk(KERN_INFO "ipgre init: can't add protocol\n");
-		return -EAGAIN;
-	}
-
 	err = register_pernet_device(&ipgre_net_ops);
 	if (err < 0)
-		goto gen_device_failed;
+		return err;
+
+	err = inet_add_protocol(&ipgre_protocol, IPPROTO_GRE);
+	if (err < 0) {
+		printk(KERN_INFO "ipgre init: can't add protocol\n");
+		goto add_proto_failed;
+	}
 
 	err = rtnl_link_register(&ipgre_link_ops);
 	if (err < 0)
@@ -1688,9 +1689,9 @@ out:
 tap_ops_failed:
 	rtnl_link_unregister(&ipgre_link_ops);
 rtnl_link_failed:
-	unregister_pernet_device(&ipgre_net_ops);
-gen_device_failed:
 	inet_del_protocol(&ipgre_protocol, IPPROTO_GRE);
+add_proto_failed:
+	unregister_pernet_device(&ipgre_net_ops);
 	goto out;
 }
 
@@ -1698,9 +1699,9 @@ static void __exit ipgre_fini(void)
 {
 	rtnl_link_unregister(&ipgre_tap_ops);
 	rtnl_link_unregister(&ipgre_link_ops);
-	unregister_pernet_device(&ipgre_net_ops);
 	if (inet_del_protocol(&ipgre_protocol, IPPROTO_GRE) < 0)
 		printk(KERN_INFO "ipgre close: can't remove protocol\n");
+	unregister_pernet_device(&ipgre_net_ops);
 }
 
 module_init(ipgre_init);
-- 
cgit v1.2.2


From d5aa407f59f5b83d2c50ec88f5bf56d40f1f8978 Mon Sep 17 00:00:00 2001
From: Alexey Dobriyan <adobriyan@gmail.com>
Date: Tue, 16 Feb 2010 09:05:04 +0000
Subject: tunnels: fix netns vs proto registration ordering

Same stuff as in ip_gre patch: receive hook can be called before netns
setup is done, oopsing in net_generic().

Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/ipip.c         | 13 ++++++------
 net/ipv6/ip6_tunnel.c   | 28 +++++++++++++------------
 net/ipv6/sit.c          | 13 ++++++------
 net/ipv6/xfrm6_tunnel.c | 55 ++++++++++++++++++-------------------------------
 4 files changed, 47 insertions(+), 62 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c
index 95db732e542b..2f302d3ac9a3 100644
--- a/net/ipv4/ipip.c
+++ b/net/ipv4/ipip.c
@@ -830,15 +830,14 @@ static int __init ipip_init(void)
 
 	printk(banner);
 
-	if (xfrm4_tunnel_register(&ipip_handler, AF_INET)) {
+	err = register_pernet_device(&ipip_net_ops);
+	if (err < 0)
+		return err;
+	err = xfrm4_tunnel_register(&ipip_handler, AF_INET);
+	if (err < 0) {
+		unregister_pernet_device(&ipip_net_ops);
 		printk(KERN_INFO "ipip init: can't register tunnel\n");
-		return -EAGAIN;
 	}
-
-	err = register_pernet_device(&ipip_net_ops);
-	if (err)
-		xfrm4_tunnel_deregister(&ipip_handler, AF_INET);
-
 	return err;
 }
 
diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c
index fbd786981aa9..9b02492d8706 100644
--- a/net/ipv6/ip6_tunnel.c
+++ b/net/ipv6/ip6_tunnel.c
@@ -1461,27 +1461,29 @@ static int __init ip6_tunnel_init(void)
 {
 	int  err;
 
-	if (xfrm6_tunnel_register(&ip4ip6_handler, AF_INET)) {
+	err = register_pernet_device(&ip6_tnl_net_ops);
+	if (err < 0)
+		goto out_pernet;
+
+	err = xfrm6_tunnel_register(&ip4ip6_handler, AF_INET);
+	if (err < 0) {
 		printk(KERN_ERR "ip6_tunnel init: can't register ip4ip6\n");
-		err = -EAGAIN;
-		goto out;
+		goto out_ip4ip6;
 	}
 
-	if (xfrm6_tunnel_register(&ip6ip6_handler, AF_INET6)) {
+	err = xfrm6_tunnel_register(&ip6ip6_handler, AF_INET6);
+	if (err < 0) {
 		printk(KERN_ERR "ip6_tunnel init: can't register ip6ip6\n");
-		err = -EAGAIN;
-		goto unreg_ip4ip6;
+		goto out_ip6ip6;
 	}
 
-	err = register_pernet_device(&ip6_tnl_net_ops);
-	if (err < 0)
-		goto err_pernet;
 	return 0;
-err_pernet:
-	xfrm6_tunnel_deregister(&ip6ip6_handler, AF_INET6);
-unreg_ip4ip6:
+
+out_ip6ip6:
 	xfrm6_tunnel_deregister(&ip4ip6_handler, AF_INET);
-out:
+out_ip4ip6:
+	unregister_pernet_device(&ip6_tnl_net_ops);
+out_pernet:
 	return err;
 }
 
diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c
index 10207cc8cc0e..52ffd29cb93f 100644
--- a/net/ipv6/sit.c
+++ b/net/ipv6/sit.c
@@ -1227,15 +1227,14 @@ static int __init sit_init(void)
 
 	printk(KERN_INFO "IPv6 over IPv4 tunneling driver\n");
 
-	if (xfrm4_tunnel_register(&sit_handler, AF_INET6) < 0) {
-		printk(KERN_INFO "sit init: Can't add protocol\n");
-		return -EAGAIN;
-	}
-
 	err = register_pernet_device(&sit_net_ops);
 	if (err < 0)
-		xfrm4_tunnel_deregister(&sit_handler, AF_INET6);
-
+		return err;
+	err = xfrm4_tunnel_register(&sit_handler, AF_INET6);
+	if (err < 0) {
+		unregister_pernet_device(&sit_net_ops);
+		printk(KERN_INFO "sit init: Can't add protocol\n");
+	}
 	return err;
 }
 
diff --git a/net/ipv6/xfrm6_tunnel.c b/net/ipv6/xfrm6_tunnel.c
index d6f9aeec69f7..ddce21e3459b 100644
--- a/net/ipv6/xfrm6_tunnel.c
+++ b/net/ipv6/xfrm6_tunnel.c
@@ -84,23 +84,6 @@ static inline unsigned xfrm6_tunnel_spi_hash_byspi(u32 spi)
 	return spi % XFRM6_TUNNEL_SPI_BYSPI_HSIZE;
 }
 
-
-static int __init xfrm6_tunnel_spi_init(void)
-{
-	xfrm6_tunnel_spi_kmem = kmem_cache_create("xfrm6_tunnel_spi",
-						  sizeof(struct xfrm6_tunnel_spi),
-						  0, SLAB_HWCACHE_ALIGN,
-						  NULL);
-	if (!xfrm6_tunnel_spi_kmem)
-		return -ENOMEM;
-	return 0;
-}
-
-static void xfrm6_tunnel_spi_fini(void)
-{
-	kmem_cache_destroy(xfrm6_tunnel_spi_kmem);
-}
-
 static struct xfrm6_tunnel_spi *__xfrm6_tunnel_spi_lookup(struct net *net, xfrm_address_t *saddr)
 {
 	struct xfrm6_tunnel_net *xfrm6_tn = xfrm6_tunnel_pernet(net);
@@ -375,42 +358,44 @@ static int __init xfrm6_tunnel_init(void)
 {
 	int rv;
 
+	xfrm6_tunnel_spi_kmem = kmem_cache_create("xfrm6_tunnel_spi",
+						  sizeof(struct xfrm6_tunnel_spi),
+						  0, SLAB_HWCACHE_ALIGN,
+						  NULL);
+	if (!xfrm6_tunnel_spi_kmem)
+		return -ENOMEM;
+	rv = register_pernet_subsys(&xfrm6_tunnel_net_ops);
+	if (rv < 0)
+		goto out_pernet;
 	rv = xfrm_register_type(&xfrm6_tunnel_type, AF_INET6);
 	if (rv < 0)
-		goto err;
+		goto out_type;
 	rv = xfrm6_tunnel_register(&xfrm6_tunnel_handler, AF_INET6);
 	if (rv < 0)
-		goto unreg;
+		goto out_xfrm6;
 	rv = xfrm6_tunnel_register(&xfrm46_tunnel_handler, AF_INET);
 	if (rv < 0)
-		goto dereg6;
-	rv = xfrm6_tunnel_spi_init();
-	if (rv < 0)
-		goto dereg46;
-	rv = register_pernet_subsys(&xfrm6_tunnel_net_ops);
-	if (rv < 0)
-		goto deregspi;
+		goto out_xfrm46;
 	return 0;
 
-deregspi:
-	xfrm6_tunnel_spi_fini();
-dereg46:
-	xfrm6_tunnel_deregister(&xfrm46_tunnel_handler, AF_INET);
-dereg6:
+out_xfrm46:
 	xfrm6_tunnel_deregister(&xfrm6_tunnel_handler, AF_INET6);
-unreg:
+out_xfrm6:
 	xfrm_unregister_type(&xfrm6_tunnel_type, AF_INET6);
-err:
+out_type:
+	unregister_pernet_subsys(&xfrm6_tunnel_net_ops);
+out_pernet:
+	kmem_cache_destroy(xfrm6_tunnel_spi_kmem);
 	return rv;
 }
 
 static void __exit xfrm6_tunnel_fini(void)
 {
-	unregister_pernet_subsys(&xfrm6_tunnel_net_ops);
-	xfrm6_tunnel_spi_fini();
 	xfrm6_tunnel_deregister(&xfrm46_tunnel_handler, AF_INET);
 	xfrm6_tunnel_deregister(&xfrm6_tunnel_handler, AF_INET6);
 	xfrm_unregister_type(&xfrm6_tunnel_type, AF_INET6);
+	unregister_pernet_subsys(&xfrm6_tunnel_net_ops);
+	kmem_cache_destroy(xfrm6_tunnel_spi_kmem);
 }
 
 module_init(xfrm6_tunnel_init);
-- 
cgit v1.2.2


From 02291680ffba92e5b5865bc0c5e7d1f3056b80ec Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Sun, 14 Feb 2010 03:25:51 +0000
Subject: net ipv4: Decouple ipv4 interface parameters from binary sysctl
 numbers

Stop using the binary sysctl enumeartion in sysctl.h as an index into
a per interface array.  This leads to unnecessary binary sysctl number
allocation, and a fragility in data structure and implementation
because of unnecessary coupling.

Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/devinet.c | 26 +++++++++++++-------------
 1 file changed, 13 insertions(+), 13 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index cd71a3908391..b1eddee9bf94 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -64,20 +64,20 @@
 
 static struct ipv4_devconf ipv4_devconf = {
 	.data = {
-		[NET_IPV4_CONF_ACCEPT_REDIRECTS - 1] = 1,
-		[NET_IPV4_CONF_SEND_REDIRECTS - 1] = 1,
-		[NET_IPV4_CONF_SECURE_REDIRECTS - 1] = 1,
-		[NET_IPV4_CONF_SHARED_MEDIA - 1] = 1,
+		[IPV4_DEVCONF_ACCEPT_REDIRECTS - 1] = 1,
+		[IPV4_DEVCONF_SEND_REDIRECTS - 1] = 1,
+		[IPV4_DEVCONF_SECURE_REDIRECTS - 1] = 1,
+		[IPV4_DEVCONF_SHARED_MEDIA - 1] = 1,
 	},
 };
 
 static struct ipv4_devconf ipv4_devconf_dflt = {
 	.data = {
-		[NET_IPV4_CONF_ACCEPT_REDIRECTS - 1] = 1,
-		[NET_IPV4_CONF_SEND_REDIRECTS - 1] = 1,
-		[NET_IPV4_CONF_SECURE_REDIRECTS - 1] = 1,
-		[NET_IPV4_CONF_SHARED_MEDIA - 1] = 1,
-		[NET_IPV4_CONF_ACCEPT_SOURCE_ROUTE - 1] = 1,
+		[IPV4_DEVCONF_ACCEPT_REDIRECTS - 1] = 1,
+		[IPV4_DEVCONF_SEND_REDIRECTS - 1] = 1,
+		[IPV4_DEVCONF_SECURE_REDIRECTS - 1] = 1,
+		[IPV4_DEVCONF_SHARED_MEDIA - 1] = 1,
+		[IPV4_DEVCONF_ACCEPT_SOURCE_ROUTE - 1] = 1,
 	},
 };
 
@@ -1360,7 +1360,7 @@ int ipv4_doint_and_flush(ctl_table *ctl, int write,
 	{ \
 		.procname	= name, \
 		.data		= ipv4_devconf.data + \
-				  NET_IPV4_CONF_ ## attr - 1, \
+				  IPV4_DEVCONF_ ## attr - 1, \
 		.maxlen		= sizeof(int), \
 		.mode		= mval, \
 		.proc_handler	= proc, \
@@ -1381,7 +1381,7 @@ int ipv4_doint_and_flush(ctl_table *ctl, int write,
 
 static struct devinet_sysctl_table {
 	struct ctl_table_header *sysctl_header;
-	struct ctl_table devinet_vars[__NET_IPV4_CONF_MAX];
+	struct ctl_table devinet_vars[__IPV4_DEVCONF_MAX];
 	char *dev_name;
 } devinet_sysctl = {
 	.devinet_vars = {
@@ -1503,7 +1503,7 @@ static struct ctl_table ctl_forward_entry[] = {
 	{
 		.procname	= "ip_forward",
 		.data		= &ipv4_devconf.data[
-					NET_IPV4_CONF_FORWARDING - 1],
+					IPV4_DEVCONF_FORWARDING - 1],
 		.maxlen		= sizeof(int),
 		.mode		= 0644,
 		.proc_handler	= devinet_sysctl_forward,
@@ -1547,7 +1547,7 @@ static __net_init int devinet_init_net(struct net *net)
 		if (tbl == NULL)
 			goto err_alloc_ctl;
 
-		tbl[0].data = &all->data[NET_IPV4_CONF_FORWARDING - 1];
+		tbl[0].data = &all->data[IPV4_DEVCONF_FORWARDING - 1];
 		tbl[0].extra1 = all;
 		tbl[0].extra2 = net;
 #endif
-- 
cgit v1.2.2


From 54716e3beb0ab20c49471348dfe399a71bfc8fd3 Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Sun, 14 Feb 2010 03:27:03 +0000
Subject: net neigh: Decouple per interface neighbour table controls from
 binary sysctls

Stop computing the number of neighbour table settings we have by
counting the number of binary sysctls.  This behaviour was silly
and meant that we could not add another neighbour table setting
without also adding another binary sysctl.

Don't pass the binary sysctl path for neighour table entries
into neigh_sysctl_register.  These parameters are no longer
used and so are just dead code.

Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/neighbour.c | 7 ++++---
 net/ipv4/arp.c       | 3 +--
 net/ipv4/devinet.c   | 3 +--
 net/ipv6/addrconf.c  | 3 +--
 net/ipv6/ndisc.c     | 3 +--
 5 files changed, 8 insertions(+), 11 deletions(-)

(limited to 'net')

diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index f2efd72da799..d102f6d9abdc 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -2556,9 +2556,11 @@ EXPORT_SYMBOL(neigh_app_ns);
 
 #ifdef CONFIG_SYSCTL
 
+#define NEIGH_VARS_MAX 19
+
 static struct neigh_sysctl_table {
 	struct ctl_table_header *sysctl_header;
-	struct ctl_table neigh_vars[__NET_NEIGH_MAX];
+	struct ctl_table neigh_vars[NEIGH_VARS_MAX];
 	char *dev_name;
 } neigh_sysctl_template __read_mostly = {
 	.neigh_vars = {
@@ -2675,8 +2677,7 @@ static struct neigh_sysctl_table {
 };
 
 int neigh_sysctl_register(struct net_device *dev, struct neigh_parms *p,
-			  int p_id, int pdev_id, char *p_name,
-			  proc_handler *handler)
+			  char *p_name, proc_handler *handler)
 {
 	struct neigh_sysctl_table *t;
 	const char *dev_name_source = NULL;
diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c
index 1940b4df7699..c4dd13542802 100644
--- a/net/ipv4/arp.c
+++ b/net/ipv4/arp.c
@@ -1284,8 +1284,7 @@ void __init arp_init(void)
 	dev_add_pack(&arp_packet_type);
 	arp_proc_init();
 #ifdef CONFIG_SYSCTL
-	neigh_sysctl_register(NULL, &arp_tbl.parms, NET_IPV4,
-			      NET_IPV4_NEIGH, "ipv4", NULL);
+	neigh_sysctl_register(NULL, &arp_tbl.parms, "ipv4", NULL);
 #endif
 	register_netdevice_notifier(&arp_netdev_notifier);
 }
diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index b1eddee9bf94..014982b61564 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -1487,8 +1487,7 @@ static void __devinet_sysctl_unregister(struct ipv4_devconf *cnf)
 
 static void devinet_sysctl_register(struct in_device *idev)
 {
-	neigh_sysctl_register(idev->dev, idev->arp_parms, NET_IPV4,
-			NET_IPV4_NEIGH, "ipv4", NULL);
+	neigh_sysctl_register(idev->dev, idev->arp_parms, "ipv4", NULL);
 	__devinet_sysctl_register(dev_net(idev->dev), idev->dev->name,
 					&idev->cnf);
 }
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 764ad37ca070..c79cbff54370 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -4416,8 +4416,7 @@ static void __addrconf_sysctl_unregister(struct ipv6_devconf *p)
 
 static void addrconf_sysctl_register(struct inet6_dev *idev)
 {
-	neigh_sysctl_register(idev->dev, idev->nd_parms, NET_IPV6,
-			      NET_IPV6_NEIGH, "ipv6",
+	neigh_sysctl_register(idev->dev, idev->nd_parms, "ipv6",
 			      &ndisc_ifinfo_sysctl_change);
 	__addrconf_sysctl_register(dev_net(idev->dev), idev->dev->name,
 					idev, &idev->cnf);
diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c
index 2dfec6bb8ada..8bcc4b7db3bf 100644
--- a/net/ipv6/ndisc.c
+++ b/net/ipv6/ndisc.c
@@ -1820,8 +1820,7 @@ int __init ndisc_init(void)
 	neigh_table_init(&nd_tbl);
 
 #ifdef CONFIG_SYSCTL
-	err = neigh_sysctl_register(NULL, &nd_tbl.parms, NET_IPV6,
-				    NET_IPV6_NEIGH, "ipv6",
+	err = neigh_sysctl_register(NULL, &nd_tbl.parms, "ipv6",
 				    &ndisc_ifinfo_sysctl_change);
 	if (err)
 		goto out_unregister_pernet;
-- 
cgit v1.2.2


From 6836b9bdd98e3b500cd49512484df68f46e14659 Mon Sep 17 00:00:00 2001
From: jamal <hadi@cyberus.ca>
Date: Tue, 16 Feb 2010 02:01:22 +0000
Subject: xfrm: avoid spinlock in get_acqseq() used by xfrm user

Eric's version fixed it for pfkey. This one is for xfrm user.
I thought about amortizing those two get_acqseq()s but it seems
reasonable to have two of these sequence spaces for the two different
interfaces.

cheers,
jamal
commit d5168d5addbc999c94aacda8f28a4a173756a72b
Author: Jamal Hadi Salim <hadi@cyberus.ca>
Date:   Tue Feb 16 06:51:22 2010 -0500

    xfrm: avoid spinlock in get_acqseq() used by xfrm user

    This is in the same spirit as commit 28aecb9d7728dc26bf03ce7925fe622023a83a2a
    by Eric Dumazet.
    Use atomic_inc_return() in get_acqseq() to avoid taking a spinlock

    Signed-off-by: Jamal Hadi Salim <hadi@cyberus.ca>
Acked-by: Eric Dumazet <eric.dumazet@gmail.com>

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/xfrm/xfrm_state.c | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

(limited to 'net')

diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c
index f50ee9badf47..96f2088e7448 100644
--- a/net/xfrm/xfrm_state.c
+++ b/net/xfrm/xfrm_state.c
@@ -1462,12 +1462,12 @@ EXPORT_SYMBOL(xfrm_find_acq_byseq);
 u32 xfrm_get_acqseq(void)
 {
 	u32 res;
-	static u32 acqseq;
-	static DEFINE_SPINLOCK(acqseq_lock);
+	static atomic_t acqseq;
+
+	do {
+		res = atomic_inc_return(&acqseq);
+	} while (!res);
 
-	spin_lock_bh(&acqseq_lock);
-	res = (++acqseq ? : ++acqseq);
-	spin_unlock_bh(&acqseq_lock);
 	return res;
 }
 EXPORT_SYMBOL(xfrm_get_acqseq);
-- 
cgit v1.2.2


From 7d720c3e4f0c4fc152a6bf17e24244a3c85412d2 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Tue, 16 Feb 2010 15:20:26 +0000
Subject: percpu: add __percpu sparse annotations to net

Add __percpu sparse annotations to net.

These annotations are to make sparse consider percpu variables to be
in a different address space and warn if accessed without going
through percpu accessors.  This patch doesn't affect normal builds.

The macro and type tricks around snmp stats make things a bit
interesting.  DEFINE/DECLARE_SNMP_STAT() macros mark the target field
as __percpu and SNMP_UPD_PO_STATS() macro is updated accordingly.  All
snmp_mib_*() users which used to cast the argument to (void **) are
updated to cast it to (void __percpu **).

Signed-off-by: Tejun Heo <tj@kernel.org>
Acked-by: David S. Miller <davem@davemloft.net>
Cc: Patrick McHardy <kaber@trash.net>
Cc: Arnaldo Carvalho de Melo <acme@ghostprotocols.net>
Cc: Vlad Yasevich <vladislav.yasevich@hp.com>
Cc: netdev@vger.kernel.org
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/8021q/vlan.h       |  2 +-
 net/dccp/proto.c       |  5 +++--
 net/ipv4/af_inet.c     | 46 +++++++++++++++++++++++-----------------------
 net/ipv4/proc.c        | 28 ++++++++++++++--------------
 net/ipv4/route.c       |  2 +-
 net/ipv4/tcp.c         | 21 ++++++++++++---------
 net/ipv6/addrconf.c    | 24 ++++++++++++------------
 net/ipv6/af_inet6.c    | 28 ++++++++++++++--------------
 net/ipv6/proc.c        | 23 +++++++++++++----------
 net/sctp/proc.c        |  2 +-
 net/sctp/protocol.c    |  5 +++--
 net/xfrm/xfrm_ipcomp.c | 16 ++++++++--------
 net/xfrm/xfrm_policy.c |  6 +++---
 net/xfrm/xfrm_proc.c   |  3 ++-
 14 files changed, 110 insertions(+), 101 deletions(-)

(limited to 'net')

diff --git a/net/8021q/vlan.h b/net/8021q/vlan.h
index 5685296017e9..6abdcac1b2e8 100644
--- a/net/8021q/vlan.h
+++ b/net/8021q/vlan.h
@@ -61,7 +61,7 @@ struct vlan_dev_info {
 	struct proc_dir_entry			*dent;
 	unsigned long				cnt_inc_headroom_on_tx;
 	unsigned long				cnt_encap_on_xmit;
-	struct vlan_rx_stats			*vlan_rx_stats;
+	struct vlan_rx_stats __percpu		*vlan_rx_stats;
 };
 
 static inline struct vlan_dev_info *vlan_dev_info(const struct net_device *dev)
diff --git a/net/dccp/proto.c b/net/dccp/proto.c
index 85ec1cb7fd41..0ef7061920c0 100644
--- a/net/dccp/proto.c
+++ b/net/dccp/proto.c
@@ -1005,12 +1005,13 @@ EXPORT_SYMBOL_GPL(dccp_shutdown);
 
 static inline int dccp_mib_init(void)
 {
-	return snmp_mib_init((void**)dccp_statistics, sizeof(struct dccp_mib));
+	return snmp_mib_init((void __percpu **)dccp_statistics,
+			     sizeof(struct dccp_mib));
 }
 
 static inline void dccp_mib_exit(void)
 {
-	snmp_mib_free((void**)dccp_statistics);
+	snmp_mib_free((void __percpu **)dccp_statistics);
 }
 
 static int thash_entries;
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index 7d12c6a9b19b..33b7dffa7732 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -1385,7 +1385,7 @@ int inet_ctl_sock_create(struct sock **sk, unsigned short family,
 }
 EXPORT_SYMBOL_GPL(inet_ctl_sock_create);
 
-unsigned long snmp_fold_field(void *mib[], int offt)
+unsigned long snmp_fold_field(void __percpu *mib[], int offt)
 {
 	unsigned long res = 0;
 	int i;
@@ -1398,7 +1398,7 @@ unsigned long snmp_fold_field(void *mib[], int offt)
 }
 EXPORT_SYMBOL_GPL(snmp_fold_field);
 
-int snmp_mib_init(void *ptr[2], size_t mibsize)
+int snmp_mib_init(void __percpu *ptr[2], size_t mibsize)
 {
 	BUG_ON(ptr == NULL);
 	ptr[0] = __alloc_percpu(mibsize, __alignof__(unsigned long long));
@@ -1416,7 +1416,7 @@ err0:
 }
 EXPORT_SYMBOL_GPL(snmp_mib_init);
 
-void snmp_mib_free(void *ptr[2])
+void snmp_mib_free(void __percpu *ptr[2])
 {
 	BUG_ON(ptr == NULL);
 	free_percpu(ptr[0]);
@@ -1460,25 +1460,25 @@ static const struct net_protocol icmp_protocol = {
 
 static __net_init int ipv4_mib_init_net(struct net *net)
 {
-	if (snmp_mib_init((void **)net->mib.tcp_statistics,
+	if (snmp_mib_init((void __percpu **)net->mib.tcp_statistics,
 			  sizeof(struct tcp_mib)) < 0)
 		goto err_tcp_mib;
-	if (snmp_mib_init((void **)net->mib.ip_statistics,
+	if (snmp_mib_init((void __percpu **)net->mib.ip_statistics,
 			  sizeof(struct ipstats_mib)) < 0)
 		goto err_ip_mib;
-	if (snmp_mib_init((void **)net->mib.net_statistics,
+	if (snmp_mib_init((void __percpu **)net->mib.net_statistics,
 			  sizeof(struct linux_mib)) < 0)
 		goto err_net_mib;
-	if (snmp_mib_init((void **)net->mib.udp_statistics,
+	if (snmp_mib_init((void __percpu **)net->mib.udp_statistics,
 			  sizeof(struct udp_mib)) < 0)
 		goto err_udp_mib;
-	if (snmp_mib_init((void **)net->mib.udplite_statistics,
+	if (snmp_mib_init((void __percpu **)net->mib.udplite_statistics,
 			  sizeof(struct udp_mib)) < 0)
 		goto err_udplite_mib;
-	if (snmp_mib_init((void **)net->mib.icmp_statistics,
+	if (snmp_mib_init((void __percpu **)net->mib.icmp_statistics,
 			  sizeof(struct icmp_mib)) < 0)
 		goto err_icmp_mib;
-	if (snmp_mib_init((void **)net->mib.icmpmsg_statistics,
+	if (snmp_mib_init((void __percpu **)net->mib.icmpmsg_statistics,
 			  sizeof(struct icmpmsg_mib)) < 0)
 		goto err_icmpmsg_mib;
 
@@ -1486,30 +1486,30 @@ static __net_init int ipv4_mib_init_net(struct net *net)
 	return 0;
 
 err_icmpmsg_mib:
-	snmp_mib_free((void **)net->mib.icmp_statistics);
+	snmp_mib_free((void __percpu **)net->mib.icmp_statistics);
 err_icmp_mib:
-	snmp_mib_free((void **)net->mib.udplite_statistics);
+	snmp_mib_free((void __percpu **)net->mib.udplite_statistics);
 err_udplite_mib:
-	snmp_mib_free((void **)net->mib.udp_statistics);
+	snmp_mib_free((void __percpu **)net->mib.udp_statistics);
 err_udp_mib:
-	snmp_mib_free((void **)net->mib.net_statistics);
+	snmp_mib_free((void __percpu **)net->mib.net_statistics);
 err_net_mib:
-	snmp_mib_free((void **)net->mib.ip_statistics);
+	snmp_mib_free((void __percpu **)net->mib.ip_statistics);
 err_ip_mib:
-	snmp_mib_free((void **)net->mib.tcp_statistics);
+	snmp_mib_free((void __percpu **)net->mib.tcp_statistics);
 err_tcp_mib:
 	return -ENOMEM;
 }
 
 static __net_exit void ipv4_mib_exit_net(struct net *net)
 {
-	snmp_mib_free((void **)net->mib.icmpmsg_statistics);
-	snmp_mib_free((void **)net->mib.icmp_statistics);
-	snmp_mib_free((void **)net->mib.udplite_statistics);
-	snmp_mib_free((void **)net->mib.udp_statistics);
-	snmp_mib_free((void **)net->mib.net_statistics);
-	snmp_mib_free((void **)net->mib.ip_statistics);
-	snmp_mib_free((void **)net->mib.tcp_statistics);
+	snmp_mib_free((void __percpu **)net->mib.icmpmsg_statistics);
+	snmp_mib_free((void __percpu **)net->mib.icmp_statistics);
+	snmp_mib_free((void __percpu **)net->mib.udplite_statistics);
+	snmp_mib_free((void __percpu **)net->mib.udp_statistics);
+	snmp_mib_free((void __percpu **)net->mib.net_statistics);
+	snmp_mib_free((void __percpu **)net->mib.ip_statistics);
+	snmp_mib_free((void __percpu **)net->mib.tcp_statistics);
 }
 
 static __net_initdata struct pernet_operations ipv4_mib_ops = {
diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c
index 1b09a6dde7c0..242ed2307370 100644
--- a/net/ipv4/proc.c
+++ b/net/ipv4/proc.c
@@ -280,7 +280,7 @@ static void icmpmsg_put(struct seq_file *seq)
 
 	count = 0;
 	for (i = 0; i < ICMPMSG_MIB_MAX; i++) {
-		val = snmp_fold_field((void **) net->mib.icmpmsg_statistics, i);
+		val = snmp_fold_field((void __percpu **) net->mib.icmpmsg_statistics, i);
 		if (val) {
 			type[count] = i;
 			vals[count++] = val;
@@ -307,18 +307,18 @@ static void icmp_put(struct seq_file *seq)
 	for (i=0; icmpmibmap[i].name != NULL; i++)
 		seq_printf(seq, " Out%s", icmpmibmap[i].name);
 	seq_printf(seq, "\nIcmp: %lu %lu",
-		snmp_fold_field((void **) net->mib.icmp_statistics, ICMP_MIB_INMSGS),
-		snmp_fold_field((void **) net->mib.icmp_statistics, ICMP_MIB_INERRORS));
+		snmp_fold_field((void __percpu **) net->mib.icmp_statistics, ICMP_MIB_INMSGS),
+		snmp_fold_field((void __percpu **) net->mib.icmp_statistics, ICMP_MIB_INERRORS));
 	for (i=0; icmpmibmap[i].name != NULL; i++)
 		seq_printf(seq, " %lu",
-			snmp_fold_field((void **) net->mib.icmpmsg_statistics,
+			snmp_fold_field((void __percpu **) net->mib.icmpmsg_statistics,
 				icmpmibmap[i].index));
 	seq_printf(seq, " %lu %lu",
-		snmp_fold_field((void **) net->mib.icmp_statistics, ICMP_MIB_OUTMSGS),
-		snmp_fold_field((void **) net->mib.icmp_statistics, ICMP_MIB_OUTERRORS));
+		snmp_fold_field((void __percpu **) net->mib.icmp_statistics, ICMP_MIB_OUTMSGS),
+		snmp_fold_field((void __percpu **) net->mib.icmp_statistics, ICMP_MIB_OUTERRORS));
 	for (i=0; icmpmibmap[i].name != NULL; i++)
 		seq_printf(seq, " %lu",
-			snmp_fold_field((void **) net->mib.icmpmsg_statistics,
+			snmp_fold_field((void __percpu **) net->mib.icmpmsg_statistics,
 				icmpmibmap[i].index | 0x100));
 }
 
@@ -341,7 +341,7 @@ static int snmp_seq_show(struct seq_file *seq, void *v)
 
 	for (i = 0; snmp4_ipstats_list[i].name != NULL; i++)
 		seq_printf(seq, " %lu",
-			   snmp_fold_field((void **)net->mib.ip_statistics,
+			   snmp_fold_field((void __percpu **)net->mib.ip_statistics,
 					   snmp4_ipstats_list[i].entry));
 
 	icmp_put(seq);	/* RFC 2011 compatibility */
@@ -356,11 +356,11 @@ static int snmp_seq_show(struct seq_file *seq, void *v)
 		/* MaxConn field is signed, RFC 2012 */
 		if (snmp4_tcp_list[i].entry == TCP_MIB_MAXCONN)
 			seq_printf(seq, " %ld",
-				   snmp_fold_field((void **)net->mib.tcp_statistics,
+				   snmp_fold_field((void __percpu **)net->mib.tcp_statistics,
 						   snmp4_tcp_list[i].entry));
 		else
 			seq_printf(seq, " %lu",
-				   snmp_fold_field((void **)net->mib.tcp_statistics,
+				   snmp_fold_field((void __percpu **)net->mib.tcp_statistics,
 						   snmp4_tcp_list[i].entry));
 	}
 
@@ -371,7 +371,7 @@ static int snmp_seq_show(struct seq_file *seq, void *v)
 	seq_puts(seq, "\nUdp:");
 	for (i = 0; snmp4_udp_list[i].name != NULL; i++)
 		seq_printf(seq, " %lu",
-			   snmp_fold_field((void **)net->mib.udp_statistics,
+			   snmp_fold_field((void __percpu **)net->mib.udp_statistics,
 					   snmp4_udp_list[i].entry));
 
 	/* the UDP and UDP-Lite MIBs are the same */
@@ -382,7 +382,7 @@ static int snmp_seq_show(struct seq_file *seq, void *v)
 	seq_puts(seq, "\nUdpLite:");
 	for (i = 0; snmp4_udp_list[i].name != NULL; i++)
 		seq_printf(seq, " %lu",
-			   snmp_fold_field((void **)net->mib.udplite_statistics,
+			   snmp_fold_field((void __percpu **)net->mib.udplite_statistics,
 					   snmp4_udp_list[i].entry));
 
 	seq_putc(seq, '\n');
@@ -419,7 +419,7 @@ static int netstat_seq_show(struct seq_file *seq, void *v)
 	seq_puts(seq, "\nTcpExt:");
 	for (i = 0; snmp4_net_list[i].name != NULL; i++)
 		seq_printf(seq, " %lu",
-			   snmp_fold_field((void **)net->mib.net_statistics,
+			   snmp_fold_field((void __percpu **)net->mib.net_statistics,
 					   snmp4_net_list[i].entry));
 
 	seq_puts(seq, "\nIpExt:");
@@ -429,7 +429,7 @@ static int netstat_seq_show(struct seq_file *seq, void *v)
 	seq_puts(seq, "\nIpExt:");
 	for (i = 0; snmp4_ipextstats_list[i].name != NULL; i++)
 		seq_printf(seq, " %lu",
-			   snmp_fold_field((void **)net->mib.ip_statistics,
+			   snmp_fold_field((void __percpu **)net->mib.ip_statistics,
 					   snmp4_ipextstats_list[i].entry));
 
 	seq_putc(seq, '\n');
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index b16dfadbe6d6..04762d3bef71 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -3334,7 +3334,7 @@ static __net_initdata struct pernet_operations rt_secret_timer_ops = {
 
 
 #ifdef CONFIG_NET_CLS_ROUTE
-struct ip_rt_acct *ip_rt_acct __read_mostly;
+struct ip_rt_acct __percpu *ip_rt_acct __read_mostly;
 #endif /* CONFIG_NET_CLS_ROUTE */
 
 static __initdata unsigned long rhash_entries;
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index d5d69ea8f249..e471d037fcc9 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -2788,10 +2788,10 @@ EXPORT_SYMBOL(tcp_gro_complete);
 
 #ifdef CONFIG_TCP_MD5SIG
 static unsigned long tcp_md5sig_users;
-static struct tcp_md5sig_pool **tcp_md5sig_pool;
+static struct tcp_md5sig_pool * __percpu *tcp_md5sig_pool;
 static DEFINE_SPINLOCK(tcp_md5sig_pool_lock);
 
-static void __tcp_free_md5sig_pool(struct tcp_md5sig_pool **pool)
+static void __tcp_free_md5sig_pool(struct tcp_md5sig_pool * __percpu *pool)
 {
 	int cpu;
 	for_each_possible_cpu(cpu) {
@@ -2808,7 +2808,7 @@ static void __tcp_free_md5sig_pool(struct tcp_md5sig_pool **pool)
 
 void tcp_free_md5sig_pool(void)
 {
-	struct tcp_md5sig_pool **pool = NULL;
+	struct tcp_md5sig_pool * __percpu *pool = NULL;
 
 	spin_lock_bh(&tcp_md5sig_pool_lock);
 	if (--tcp_md5sig_users == 0) {
@@ -2822,10 +2822,11 @@ void tcp_free_md5sig_pool(void)
 
 EXPORT_SYMBOL(tcp_free_md5sig_pool);
 
-static struct tcp_md5sig_pool **__tcp_alloc_md5sig_pool(struct sock *sk)
+static struct tcp_md5sig_pool * __percpu *
+__tcp_alloc_md5sig_pool(struct sock *sk)
 {
 	int cpu;
-	struct tcp_md5sig_pool **pool;
+	struct tcp_md5sig_pool * __percpu *pool;
 
 	pool = alloc_percpu(struct tcp_md5sig_pool *);
 	if (!pool)
@@ -2852,9 +2853,9 @@ out_free:
 	return NULL;
 }
 
-struct tcp_md5sig_pool **tcp_alloc_md5sig_pool(struct sock *sk)
+struct tcp_md5sig_pool * __percpu *tcp_alloc_md5sig_pool(struct sock *sk)
 {
-	struct tcp_md5sig_pool **pool;
+	struct tcp_md5sig_pool * __percpu *pool;
 	int alloc = 0;
 
 retry:
@@ -2873,7 +2874,9 @@ retry:
 
 	if (alloc) {
 		/* we cannot hold spinlock here because this may sleep. */
-		struct tcp_md5sig_pool **p = __tcp_alloc_md5sig_pool(sk);
+		struct tcp_md5sig_pool * __percpu *p;
+
+		p = __tcp_alloc_md5sig_pool(sk);
 		spin_lock_bh(&tcp_md5sig_pool_lock);
 		if (!p) {
 			tcp_md5sig_users--;
@@ -2897,7 +2900,7 @@ EXPORT_SYMBOL(tcp_alloc_md5sig_pool);
 
 struct tcp_md5sig_pool *__tcp_get_md5sig_pool(int cpu)
 {
-	struct tcp_md5sig_pool **p;
+	struct tcp_md5sig_pool * __percpu *p;
 	spin_lock_bh(&tcp_md5sig_pool_lock);
 	p = tcp_md5sig_pool;
 	if (p)
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index c79cbff54370..b0d4a4b23db5 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -278,31 +278,31 @@ static void addrconf_mod_timer(struct inet6_ifaddr *ifp,
 
 static int snmp6_alloc_dev(struct inet6_dev *idev)
 {
-	if (snmp_mib_init((void **)idev->stats.ipv6,
+	if (snmp_mib_init((void __percpu **)idev->stats.ipv6,
 			  sizeof(struct ipstats_mib)) < 0)
 		goto err_ip;
-	if (snmp_mib_init((void **)idev->stats.icmpv6,
+	if (snmp_mib_init((void __percpu **)idev->stats.icmpv6,
 			  sizeof(struct icmpv6_mib)) < 0)
 		goto err_icmp;
-	if (snmp_mib_init((void **)idev->stats.icmpv6msg,
+	if (snmp_mib_init((void __percpu **)idev->stats.icmpv6msg,
 			  sizeof(struct icmpv6msg_mib)) < 0)
 		goto err_icmpmsg;
 
 	return 0;
 
 err_icmpmsg:
-	snmp_mib_free((void **)idev->stats.icmpv6);
+	snmp_mib_free((void __percpu **)idev->stats.icmpv6);
 err_icmp:
-	snmp_mib_free((void **)idev->stats.ipv6);
+	snmp_mib_free((void __percpu **)idev->stats.ipv6);
 err_ip:
 	return -ENOMEM;
 }
 
 static void snmp6_free_dev(struct inet6_dev *idev)
 {
-	snmp_mib_free((void **)idev->stats.icmpv6msg);
-	snmp_mib_free((void **)idev->stats.icmpv6);
-	snmp_mib_free((void **)idev->stats.ipv6);
+	snmp_mib_free((void __percpu **)idev->stats.icmpv6msg);
+	snmp_mib_free((void __percpu **)idev->stats.icmpv6);
+	snmp_mib_free((void __percpu **)idev->stats.ipv6);
 }
 
 /* Nobody refers to this device, we may destroy it. */
@@ -3766,8 +3766,8 @@ static inline size_t inet6_if_nlmsg_size(void)
 		 );
 }
 
-static inline void __snmp6_fill_stats(u64 *stats, void **mib, int items,
-				      int bytes)
+static inline void __snmp6_fill_stats(u64 *stats, void __percpu **mib,
+				      int items, int bytes)
 {
 	int i;
 	int pad = bytes - sizeof(u64) * items;
@@ -3786,10 +3786,10 @@ static void snmp6_fill_stats(u64 *stats, struct inet6_dev *idev, int attrtype,
 {
 	switch(attrtype) {
 	case IFLA_INET6_STATS:
-		__snmp6_fill_stats(stats, (void **)idev->stats.ipv6, IPSTATS_MIB_MAX, bytes);
+		__snmp6_fill_stats(stats, (void __percpu **)idev->stats.ipv6, IPSTATS_MIB_MAX, bytes);
 		break;
 	case IFLA_INET6_ICMP6STATS:
-		__snmp6_fill_stats(stats, (void **)idev->stats.icmpv6, ICMP6_MIB_MAX, bytes);
+		__snmp6_fill_stats(stats, (void __percpu **)idev->stats.icmpv6, ICMP6_MIB_MAX, bytes);
 		break;
 	}
 }
diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index e29160ff4a0f..37d14e735c27 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -971,41 +971,41 @@ static void ipv6_packet_cleanup(void)
 
 static int __net_init ipv6_init_mibs(struct net *net)
 {
-	if (snmp_mib_init((void **)net->mib.udp_stats_in6,
+	if (snmp_mib_init((void __percpu **)net->mib.udp_stats_in6,
 			  sizeof (struct udp_mib)) < 0)
 		return -ENOMEM;
-	if (snmp_mib_init((void **)net->mib.udplite_stats_in6,
+	if (snmp_mib_init((void __percpu **)net->mib.udplite_stats_in6,
 			  sizeof (struct udp_mib)) < 0)
 		goto err_udplite_mib;
-	if (snmp_mib_init((void **)net->mib.ipv6_statistics,
+	if (snmp_mib_init((void __percpu **)net->mib.ipv6_statistics,
 			  sizeof(struct ipstats_mib)) < 0)
 		goto err_ip_mib;
-	if (snmp_mib_init((void **)net->mib.icmpv6_statistics,
+	if (snmp_mib_init((void __percpu **)net->mib.icmpv6_statistics,
 			  sizeof(struct icmpv6_mib)) < 0)
 		goto err_icmp_mib;
-	if (snmp_mib_init((void **)net->mib.icmpv6msg_statistics,
+	if (snmp_mib_init((void __percpu **)net->mib.icmpv6msg_statistics,
 			  sizeof(struct icmpv6msg_mib)) < 0)
 		goto err_icmpmsg_mib;
 	return 0;
 
 err_icmpmsg_mib:
-	snmp_mib_free((void **)net->mib.icmpv6_statistics);
+	snmp_mib_free((void __percpu **)net->mib.icmpv6_statistics);
 err_icmp_mib:
-	snmp_mib_free((void **)net->mib.ipv6_statistics);
+	snmp_mib_free((void __percpu **)net->mib.ipv6_statistics);
 err_ip_mib:
-	snmp_mib_free((void **)net->mib.udplite_stats_in6);
+	snmp_mib_free((void __percpu **)net->mib.udplite_stats_in6);
 err_udplite_mib:
-	snmp_mib_free((void **)net->mib.udp_stats_in6);
+	snmp_mib_free((void __percpu **)net->mib.udp_stats_in6);
 	return -ENOMEM;
 }
 
 static void ipv6_cleanup_mibs(struct net *net)
 {
-	snmp_mib_free((void **)net->mib.udp_stats_in6);
-	snmp_mib_free((void **)net->mib.udplite_stats_in6);
-	snmp_mib_free((void **)net->mib.ipv6_statistics);
-	snmp_mib_free((void **)net->mib.icmpv6_statistics);
-	snmp_mib_free((void **)net->mib.icmpv6msg_statistics);
+	snmp_mib_free((void __percpu **)net->mib.udp_stats_in6);
+	snmp_mib_free((void __percpu **)net->mib.udplite_stats_in6);
+	snmp_mib_free((void __percpu **)net->mib.ipv6_statistics);
+	snmp_mib_free((void __percpu **)net->mib.icmpv6_statistics);
+	snmp_mib_free((void __percpu **)net->mib.icmpv6msg_statistics);
 }
 
 static int __net_init inet6_net_init(struct net *net)
diff --git a/net/ipv6/proc.c b/net/ipv6/proc.c
index bfe2598dd563..58344c0fbd13 100644
--- a/net/ipv6/proc.c
+++ b/net/ipv6/proc.c
@@ -136,7 +136,7 @@ static const struct snmp_mib snmp6_udplite6_list[] = {
 	SNMP_MIB_SENTINEL
 };
 
-static void snmp6_seq_show_icmpv6msg(struct seq_file *seq, void **mib)
+static void snmp6_seq_show_icmpv6msg(struct seq_file *seq, void __percpu **mib)
 {
 	char name[32];
 	int i;
@@ -170,7 +170,7 @@ static void snmp6_seq_show_icmpv6msg(struct seq_file *seq, void **mib)
 	return;
 }
 
-static void snmp6_seq_show_item(struct seq_file *seq, void **mib,
+static void snmp6_seq_show_item(struct seq_file *seq, void __percpu **mib,
 				const struct snmp_mib *itemlist)
 {
 	int i;
@@ -183,14 +183,15 @@ static int snmp6_seq_show(struct seq_file *seq, void *v)
 {
 	struct net *net = (struct net *)seq->private;
 
-	snmp6_seq_show_item(seq, (void **)net->mib.ipv6_statistics,
+	snmp6_seq_show_item(seq, (void __percpu **)net->mib.ipv6_statistics,
 			    snmp6_ipstats_list);
-	snmp6_seq_show_item(seq, (void **)net->mib.icmpv6_statistics,
+	snmp6_seq_show_item(seq, (void __percpu **)net->mib.icmpv6_statistics,
 			    snmp6_icmp6_list);
-	snmp6_seq_show_icmpv6msg(seq, (void **)net->mib.icmpv6msg_statistics);
-	snmp6_seq_show_item(seq, (void **)net->mib.udp_stats_in6,
+	snmp6_seq_show_icmpv6msg(seq,
+			    (void __percpu **)net->mib.icmpv6msg_statistics);
+	snmp6_seq_show_item(seq, (void __percpu **)net->mib.udp_stats_in6,
 			    snmp6_udp6_list);
-	snmp6_seq_show_item(seq, (void **)net->mib.udplite_stats_in6,
+	snmp6_seq_show_item(seq, (void __percpu **)net->mib.udplite_stats_in6,
 			    snmp6_udplite6_list);
 	return 0;
 }
@@ -213,9 +214,11 @@ static int snmp6_dev_seq_show(struct seq_file *seq, void *v)
 	struct inet6_dev *idev = (struct inet6_dev *)seq->private;
 
 	seq_printf(seq, "%-32s\t%u\n", "ifIndex", idev->dev->ifindex);
-	snmp6_seq_show_item(seq, (void **)idev->stats.ipv6, snmp6_ipstats_list);
-	snmp6_seq_show_item(seq, (void **)idev->stats.icmpv6, snmp6_icmp6_list);
-	snmp6_seq_show_icmpv6msg(seq, (void **)idev->stats.icmpv6msg);
+	snmp6_seq_show_item(seq, (void __percpu **)idev->stats.ipv6,
+			    snmp6_ipstats_list);
+	snmp6_seq_show_item(seq, (void __percpu **)idev->stats.icmpv6,
+			    snmp6_icmp6_list);
+	snmp6_seq_show_icmpv6msg(seq, (void __percpu **)idev->stats.icmpv6msg);
 	return 0;
 }
 
diff --git a/net/sctp/proc.c b/net/sctp/proc.c
index a5ac6e0a8d9c..784bcc9a979d 100644
--- a/net/sctp/proc.c
+++ b/net/sctp/proc.c
@@ -83,7 +83,7 @@ static int sctp_snmp_seq_show(struct seq_file *seq, void *v)
 
 	for (i = 0; sctp_snmp_list[i].name != NULL; i++)
 		seq_printf(seq, "%-32s\t%ld\n", sctp_snmp_list[i].name,
-			   snmp_fold_field((void **)sctp_statistics,
+			   snmp_fold_field((void __percpu **)sctp_statistics,
 				      sctp_snmp_list[i].entry));
 
 	return 0;
diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c
index a3c8988758b1..9687177b026b 100644
--- a/net/sctp/protocol.c
+++ b/net/sctp/protocol.c
@@ -996,12 +996,13 @@ int sctp_register_pf(struct sctp_pf *pf, sa_family_t family)
 
 static inline int init_sctp_mibs(void)
 {
-	return snmp_mib_init((void**)sctp_statistics, sizeof(struct sctp_mib));
+	return snmp_mib_init((void __percpu **)sctp_statistics,
+			     sizeof(struct sctp_mib));
 }
 
 static inline void cleanup_sctp_mibs(void)
 {
-	snmp_mib_free((void**)sctp_statistics);
+	snmp_mib_free((void __percpu **)sctp_statistics);
 }
 
 static void sctp_v4_pf_init(void)
diff --git a/net/xfrm/xfrm_ipcomp.c b/net/xfrm/xfrm_ipcomp.c
index 42cd18391f46..0fc5ff66d1fa 100644
--- a/net/xfrm/xfrm_ipcomp.c
+++ b/net/xfrm/xfrm_ipcomp.c
@@ -30,12 +30,12 @@
 
 struct ipcomp_tfms {
 	struct list_head list;
-	struct crypto_comp **tfms;
+	struct crypto_comp * __percpu *tfms;
 	int users;
 };
 
 static DEFINE_MUTEX(ipcomp_resource_mutex);
-static void **ipcomp_scratches;
+static void * __percpu *ipcomp_scratches;
 static int ipcomp_scratch_users;
 static LIST_HEAD(ipcomp_tfms_list);
 
@@ -200,7 +200,7 @@ EXPORT_SYMBOL_GPL(ipcomp_output);
 static void ipcomp_free_scratches(void)
 {
 	int i;
-	void **scratches;
+	void * __percpu *scratches;
 
 	if (--ipcomp_scratch_users)
 		return;
@@ -215,10 +215,10 @@ static void ipcomp_free_scratches(void)
 	free_percpu(scratches);
 }
 
-static void **ipcomp_alloc_scratches(void)
+static void * __percpu *ipcomp_alloc_scratches(void)
 {
 	int i;
-	void **scratches;
+	void * __percpu *scratches;
 
 	if (ipcomp_scratch_users++)
 		return ipcomp_scratches;
@@ -239,7 +239,7 @@ static void **ipcomp_alloc_scratches(void)
 	return scratches;
 }
 
-static void ipcomp_free_tfms(struct crypto_comp **tfms)
+static void ipcomp_free_tfms(struct crypto_comp * __percpu *tfms)
 {
 	struct ipcomp_tfms *pos;
 	int cpu;
@@ -267,10 +267,10 @@ static void ipcomp_free_tfms(struct crypto_comp **tfms)
 	free_percpu(tfms);
 }
 
-static struct crypto_comp **ipcomp_alloc_tfms(const char *alg_name)
+static struct crypto_comp * __percpu *ipcomp_alloc_tfms(const char *alg_name)
 {
 	struct ipcomp_tfms *pos;
-	struct crypto_comp **tfms;
+	struct crypto_comp * __percpu *tfms;
 	int cpu;
 
 	/* This can be any valid CPU ID so we don't need locking. */
diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index eb870fcc29cc..cfceb6616ec1 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -2428,19 +2428,19 @@ static int __net_init xfrm_statistics_init(struct net *net)
 {
 	int rv;
 
-	if (snmp_mib_init((void **)net->mib.xfrm_statistics,
+	if (snmp_mib_init((void __percpu **)net->mib.xfrm_statistics,
 			  sizeof(struct linux_xfrm_mib)) < 0)
 		return -ENOMEM;
 	rv = xfrm_proc_init(net);
 	if (rv < 0)
-		snmp_mib_free((void **)net->mib.xfrm_statistics);
+		snmp_mib_free((void __percpu **)net->mib.xfrm_statistics);
 	return rv;
 }
 
 static void xfrm_statistics_fini(struct net *net)
 {
 	xfrm_proc_fini(net);
-	snmp_mib_free((void **)net->mib.xfrm_statistics);
+	snmp_mib_free((void __percpu **)net->mib.xfrm_statistics);
 }
 #else
 static int __net_init xfrm_statistics_init(struct net *net)
diff --git a/net/xfrm/xfrm_proc.c b/net/xfrm/xfrm_proc.c
index c083a4e4e796..003f2c437ac3 100644
--- a/net/xfrm/xfrm_proc.c
+++ b/net/xfrm/xfrm_proc.c
@@ -50,7 +50,8 @@ static int xfrm_statistics_seq_show(struct seq_file *seq, void *v)
 	int i;
 	for (i=0; xfrm_mib_list[i].name; i++)
 		seq_printf(seq, "%-24s\t%lu\n", xfrm_mib_list[i].name,
-			   snmp_fold_field((void **)net->mib.xfrm_statistics,
+			   snmp_fold_field((void __percpu **)
+					   net->mib.xfrm_statistics,
 					   xfrm_mib_list[i].entry));
 	return 0;
 }
-- 
cgit v1.2.2


From dc4c2c31053ba5bf685d273cd62ecca406dddb2d Mon Sep 17 00:00:00 2001
From: Alexey Dobriyan <adobriyan@gmail.com>
Date: Fri, 12 Feb 2010 11:41:39 +0000
Subject: net: remove INIT_RCU_HEAD() usage

call_rcu() will unconditionally reinitialize RCU head anyway.

Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Acked-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/drop_monitor.c            | 1 -
 net/ipv6/sit.c                     | 1 -
 net/ipv6/xfrm6_tunnel.c            | 1 -
 net/netlabel/netlabel_domainhash.c | 1 -
 net/netlabel/netlabel_unlabeled.c  | 3 ---
 net/sctp/bind_addr.c               | 1 -
 net/sctp/ipv6.c                    | 1 -
 net/sctp/protocol.c                | 1 -
 8 files changed, 10 deletions(-)

(limited to 'net')

diff --git a/net/core/drop_monitor.c b/net/core/drop_monitor.c
index b8e9d3a86887..f8c874975350 100644
--- a/net/core/drop_monitor.c
+++ b/net/core/drop_monitor.c
@@ -296,7 +296,6 @@ static int dropmon_net_event(struct notifier_block *ev_block,
 
 		new_stat->dev = dev;
 		new_stat->last_rx = jiffies;
-		INIT_RCU_HEAD(&new_stat->rcu);
 		spin_lock(&trace_state_lock);
 		list_add_rcu(&new_stat->list, &hw_stats_list);
 		spin_unlock(&trace_state_lock);
diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c
index 52ffd29cb93f..96eb2d4641c4 100644
--- a/net/ipv6/sit.c
+++ b/net/ipv6/sit.c
@@ -363,7 +363,6 @@ ipip6_tunnel_add_prl(struct ip_tunnel *t, struct ip_tunnel_prl *a, int chg)
 		goto out;
 	}
 
-	INIT_RCU_HEAD(&p->rcu_head);
 	p->next = t->prl;
 	p->addr = a->addr;
 	p->flags = a->flags;
diff --git a/net/ipv6/xfrm6_tunnel.c b/net/ipv6/xfrm6_tunnel.c
index ddce21e3459b..fa85a7d22dc4 100644
--- a/net/ipv6/xfrm6_tunnel.c
+++ b/net/ipv6/xfrm6_tunnel.c
@@ -161,7 +161,6 @@ alloc_spi:
 	if (!x6spi)
 		goto out;
 
-	INIT_RCU_HEAD(&x6spi->rcu_head);
 	memcpy(&x6spi->addr, saddr, sizeof(x6spi->addr));
 	x6spi->spi = spi;
 	atomic_set(&x6spi->refcnt, 1);
diff --git a/net/netlabel/netlabel_domainhash.c b/net/netlabel/netlabel_domainhash.c
index c5d9f97ef217..0bfeaab88ef5 100644
--- a/net/netlabel/netlabel_domainhash.c
+++ b/net/netlabel/netlabel_domainhash.c
@@ -315,7 +315,6 @@ int netlbl_domhsh_add(struct netlbl_dom_map *entry,
 		entry_old = netlbl_domhsh_search_def(entry->domain);
 	if (entry_old == NULL) {
 		entry->valid = 1;
-		INIT_RCU_HEAD(&entry->rcu);
 
 		if (entry->domain != NULL) {
 			u32 bkt = netlbl_domhsh_hash(entry->domain);
diff --git a/net/netlabel/netlabel_unlabeled.c b/net/netlabel/netlabel_unlabeled.c
index 98ed22ee2ff4..852d9d7976b9 100644
--- a/net/netlabel/netlabel_unlabeled.c
+++ b/net/netlabel/netlabel_unlabeled.c
@@ -327,7 +327,6 @@ static int netlbl_unlhsh_add_addr4(struct netlbl_unlhsh_iface *iface,
 	entry->list.addr = addr->s_addr & mask->s_addr;
 	entry->list.mask = mask->s_addr;
 	entry->list.valid = 1;
-	INIT_RCU_HEAD(&entry->rcu);
 	entry->secid = secid;
 
 	spin_lock(&netlbl_unlhsh_lock);
@@ -373,7 +372,6 @@ static int netlbl_unlhsh_add_addr6(struct netlbl_unlhsh_iface *iface,
 	entry->list.addr.s6_addr32[3] &= mask->s6_addr32[3];
 	ipv6_addr_copy(&entry->list.mask, mask);
 	entry->list.valid = 1;
-	INIT_RCU_HEAD(&entry->rcu);
 	entry->secid = secid;
 
 	spin_lock(&netlbl_unlhsh_lock);
@@ -410,7 +408,6 @@ static struct netlbl_unlhsh_iface *netlbl_unlhsh_add_iface(int ifindex)
 	INIT_LIST_HEAD(&iface->addr4_list);
 	INIT_LIST_HEAD(&iface->addr6_list);
 	iface->valid = 1;
-	INIT_RCU_HEAD(&iface->rcu);
 
 	spin_lock(&netlbl_unlhsh_lock);
 	if (ifindex > 0) {
diff --git a/net/sctp/bind_addr.c b/net/sctp/bind_addr.c
index 13a6fba41077..bef133731683 100644
--- a/net/sctp/bind_addr.c
+++ b/net/sctp/bind_addr.c
@@ -186,7 +186,6 @@ int sctp_add_bind_addr(struct sctp_bind_addr *bp, union sctp_addr *new,
 	addr->valid = 1;
 
 	INIT_LIST_HEAD(&addr->list);
-	INIT_RCU_HEAD(&addr->rcu);
 
 	/* We always hold a socket lock when calling this function,
 	 * and that acts as a writer synchronizing lock.
diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c
index cc50fbe99291..1d7ac70ba39f 100644
--- a/net/sctp/ipv6.c
+++ b/net/sctp/ipv6.c
@@ -381,7 +381,6 @@ static void sctp_v6_copy_addrlist(struct list_head *addrlist,
 			addr->a.v6.sin6_scope_id = dev->ifindex;
 			addr->valid = 1;
 			INIT_LIST_HEAD(&addr->list);
-			INIT_RCU_HEAD(&addr->rcu);
 			list_add_tail(&addr->list, addrlist);
 		}
 	}
diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c
index 9687177b026b..e771690f6d5d 100644
--- a/net/sctp/protocol.c
+++ b/net/sctp/protocol.c
@@ -188,7 +188,6 @@ static void sctp_v4_copy_addrlist(struct list_head *addrlist,
 			addr->a.v4.sin_addr.s_addr = ifa->ifa_local;
 			addr->valid = 1;
 			INIT_LIST_HEAD(&addr->list);
-			INIT_RCU_HEAD(&addr->rcu);
 			list_add_tail(&addr->list, addrlist);
 		}
 	}
-- 
cgit v1.2.2


From 9f0beba9f90847db7ba6ed894f7c87b6038a5bce Mon Sep 17 00:00:00 2001
From: Pavel Emelyanov <xemul@openvz.org>
Date: Wed, 17 Feb 2010 01:19:32 +0000
Subject: ipmr: remove useless checks from ipmr_device_event

The net being checked there is dev_net(dev) and thus this if
is always false.

Fits both net and net-next trees.

Signed-off-by: Pavel Emelyanov <xemul@openvz.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/ipmr.c | 3 ---
 1 file changed, 3 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
index 54596f73eff5..8582e12e4a62 100644
--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c
@@ -1163,9 +1163,6 @@ static int ipmr_device_event(struct notifier_block *this, unsigned long event, v
 	int ct;
 	LIST_HEAD(list);
 
-	if (!net_eq(dev_net(dev), net))
-		return NOTIFY_DONE;
-
 	if (event != NETDEV_UNREGISTER)
 		return NOTIFY_DONE;
 	v = &net->ipv4.vif_table[0];
-- 
cgit v1.2.2


From 8a5ce54562f296b1996813a413b0f2307ec9351a Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Wed, 17 Feb 2010 06:43:47 +0000
Subject: xt_hashlimit: fix locking

Commit 2eff25c18c3d332d3c4dd98f2ac9b7114e9771b0
(netfilter: xt_hashlimit: fix race condition and simplify locking)
added a mutex deadlock :
htable_create() is called with hashlimit_mutex already locked

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/netfilter/xt_hashlimit.c | 4 ----
 1 file changed, 4 deletions(-)

(limited to 'net')

diff --git a/net/netfilter/xt_hashlimit.c b/net/netfilter/xt_hashlimit.c
index e47fb805ffb4..d952806b6469 100644
--- a/net/netfilter/xt_hashlimit.c
+++ b/net/netfilter/xt_hashlimit.c
@@ -262,9 +262,7 @@ static int htable_create_v0(struct net *net, struct xt_hashlimit_info *minfo, u_
 	hinfo->timer.expires = jiffies + msecs_to_jiffies(hinfo->cfg.gc_interval);
 	add_timer(&hinfo->timer);
 
-	mutex_lock(&hashlimit_mutex);
 	hlist_add_head(&hinfo->node, &hashlimit_net->htables);
-	mutex_unlock(&hashlimit_mutex);
 
 	return 0;
 }
@@ -327,9 +325,7 @@ static int htable_create(struct net *net, struct xt_hashlimit_mtinfo1 *minfo,
 	hinfo->timer.expires = jiffies + msecs_to_jiffies(hinfo->cfg.gc_interval);
 	add_timer(&hinfo->timer);
 
-	mutex_lock(&hashlimit_mutex);
 	hlist_add_head(&hinfo->node, &hashlimit_net->htables);
-	mutex_unlock(&hashlimit_mutex);
 
 	return 0;
 }
-- 
cgit v1.2.2


From faf234220fb79a05891477a75180e1d9f7ab4105 Mon Sep 17 00:00:00 2001
From: Alexey Dobriyan <adobriyan@gmail.com>
Date: Wed, 17 Feb 2010 09:34:12 +0000
Subject: net: use kasprintf() for socket cache names

kasprintf() makes code smaller.

Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/sock.c | 10 ++--------
 1 file changed, 2 insertions(+), 8 deletions(-)

(limited to 'net')

diff --git a/net/core/sock.c b/net/core/sock.c
index ceef50bd131b..472a59f205b0 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -2228,13 +2228,10 @@ int proto_register(struct proto *prot, int alloc_slab)
 		}
 
 		if (prot->rsk_prot != NULL) {
-			static const char mask[] = "request_sock_%s";
-
-			prot->rsk_prot->slab_name = kmalloc(strlen(prot->name) + sizeof(mask) - 1, GFP_KERNEL);
+			prot->rsk_prot->slab_name = kasprintf(GFP_KERNEL, "request_sock_%s", prot->name);
 			if (prot->rsk_prot->slab_name == NULL)
 				goto out_free_sock_slab;
 
-			sprintf(prot->rsk_prot->slab_name, mask, prot->name);
 			prot->rsk_prot->slab = kmem_cache_create(prot->rsk_prot->slab_name,
 								 prot->rsk_prot->obj_size, 0,
 								 SLAB_HWCACHE_ALIGN, NULL);
@@ -2247,14 +2244,11 @@ int proto_register(struct proto *prot, int alloc_slab)
 		}
 
 		if (prot->twsk_prot != NULL) {
-			static const char mask[] = "tw_sock_%s";
-
-			prot->twsk_prot->twsk_slab_name = kmalloc(strlen(prot->name) + sizeof(mask) - 1, GFP_KERNEL);
+			prot->twsk_prot->twsk_slab_name = kasprintf(GFP_KERNEL, "tw_sock_%s", prot->name);
 
 			if (prot->twsk_prot->twsk_slab_name == NULL)
 				goto out_free_request_sock_slab;
 
-			sprintf(prot->twsk_prot->twsk_slab_name, mask, prot->name);
 			prot->twsk_prot->twsk_slab =
 				kmem_cache_create(prot->twsk_prot->twsk_slab_name,
 						  prot->twsk_prot->twsk_obj_size,
-- 
cgit v1.2.2


From 7af3351f71f4b3b5dbccb66cdc9b097052760a7f Mon Sep 17 00:00:00 2001
From: Ben Hutchings <bhutchings@solarflare.com>
Date: Wed, 17 Feb 2010 09:26:54 +0000
Subject: ethtool: Don't flush n-tuple list from ethtool_reset()

The n-tuple list should be flushed if and only if the ETH_RESET_FILTER
flag is set and the driver is able to reset filtering/flow direction
hardware without also resetting a component whose flag is not set.
This test is best left to the driver.

Signed-off-by: Ben Hutchings <bhutchings@solarflare.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/ethtool.c | 3 ---
 1 file changed, 3 deletions(-)

(limited to 'net')

diff --git a/net/core/ethtool.c b/net/core/ethtool.c
index d08a0c7675bf..31b1eddc1b84 100644
--- a/net/core/ethtool.c
+++ b/net/core/ethtool.c
@@ -654,9 +654,6 @@ static int ethtool_reset(struct net_device *dev, char __user *useraddr)
 	if (copy_from_user(&reset, useraddr, sizeof(reset)))
 		return -EFAULT;
 
-	/* Clear ethtool n-tuple list */
-	ethtool_ntuple_flush(dev);
-
 	ret = dev->ethtool_ops->reset(dev, &reset.data);
 	if (ret)
 		return ret;
-- 
cgit v1.2.2


From 069c474e88bb7753183f1eadbd7786c27888c8e3 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Wed, 17 Feb 2010 13:41:40 -0800
Subject: xfrm: Revert false event eliding commits.

As reported by Alexey Dobriyan:

--------------------
setkey now takes several seconds to run this simple script
and it spits "recv: Resource temporarily unavailable" messages.

#!/usr/sbin/setkey -f
flush;
spdflush;

add A B ipcomp 44 -m tunnel -C deflate;
add B A ipcomp 45 -m tunnel -C deflate;

spdadd A B any -P in ipsec
        ipcomp/tunnel/192.168.1.2-192.168.1.3/use;
spdadd B A any -P out ipsec
        ipcomp/tunnel/192.168.1.3-192.168.1.2/use;
--------------------

Obviously applications want the events even when the table
is empty.  So we cannot make this behavioral change.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/key/af_key.c       |  4 ++--
 net/xfrm/xfrm_policy.c | 13 +++----------
 net/xfrm/xfrm_state.c  |  8 ++------
 net/xfrm/xfrm_user.c   |  4 ++--
 4 files changed, 9 insertions(+), 20 deletions(-)

(limited to 'net')

diff --git a/net/key/af_key.c b/net/key/af_key.c
index 8b8e26a9e401..79d2c0f3c334 100644
--- a/net/key/af_key.c
+++ b/net/key/af_key.c
@@ -1751,7 +1751,7 @@ static int pfkey_flush(struct sock *sk, struct sk_buff *skb, struct sadb_msg *hd
 	audit_info.secid = 0;
 	err = xfrm_state_flush(net, proto, &audit_info);
 	if (err)
-		return 0;
+		return err;
 	c.data.proto = proto;
 	c.seq = hdr->sadb_msg_seq;
 	c.pid = hdr->sadb_msg_pid;
@@ -2713,7 +2713,7 @@ static int pfkey_spdflush(struct sock *sk, struct sk_buff *skb, struct sadb_msg
 	audit_info.secid = 0;
 	err = xfrm_policy_flush(net, XFRM_POLICY_TYPE_MAIN, &audit_info);
 	if (err)
-		return 0;
+		return err;
 	c.data.type = XFRM_POLICY_TYPE_MAIN;
 	c.event = XFRM_MSG_FLUSHPOLICY;
 	c.pid = hdr->sadb_msg_pid;
diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index cfceb6616ec1..2c5d93181f13 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -771,8 +771,7 @@ xfrm_policy_flush_secctx_check(struct net *net, u8 type, struct xfrm_audit *audi
 
 int xfrm_policy_flush(struct net *net, u8 type, struct xfrm_audit *audit_info)
 {
-	int dir, err = 0, cnt = 0;
-	struct xfrm_policy *dp;
+	int dir, err = 0;
 
 	write_lock_bh(&xfrm_policy_lock);
 
@@ -790,10 +789,8 @@ int xfrm_policy_flush(struct net *net, u8 type, struct xfrm_audit *audit_info)
 				     &net->xfrm.policy_inexact[dir], bydst) {
 			if (pol->type != type)
 				continue;
-			dp = __xfrm_policy_unlink(pol, dir);
+			__xfrm_policy_unlink(pol, dir);
 			write_unlock_bh(&xfrm_policy_lock);
-			if (dp)
-				cnt++;
 
 			xfrm_audit_policy_delete(pol, 1, audit_info->loginuid,
 						 audit_info->sessionid,
@@ -812,10 +809,8 @@ int xfrm_policy_flush(struct net *net, u8 type, struct xfrm_audit *audit_info)
 					     bydst) {
 				if (pol->type != type)
 					continue;
-				dp = __xfrm_policy_unlink(pol, dir);
+				__xfrm_policy_unlink(pol, dir);
 				write_unlock_bh(&xfrm_policy_lock);
-				if (dp)
-					cnt++;
 
 				xfrm_audit_policy_delete(pol, 1,
 							 audit_info->loginuid,
@@ -829,8 +824,6 @@ int xfrm_policy_flush(struct net *net, u8 type, struct xfrm_audit *audit_info)
 		}
 
 	}
-	if (!cnt)
-		err = -ESRCH;
 	atomic_inc(&flow_cache_genid);
 out:
 	write_unlock_bh(&xfrm_policy_lock);
diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c
index 9fa3322b2a7d..c9d6a5f1348d 100644
--- a/net/xfrm/xfrm_state.c
+++ b/net/xfrm/xfrm_state.c
@@ -603,14 +603,13 @@ xfrm_state_flush_secctx_check(struct net *net, u8 proto, struct xfrm_audit *audi
 
 int xfrm_state_flush(struct net *net, u8 proto, struct xfrm_audit *audit_info)
 {
-	int i, err = 0, cnt = 0;
+	int i, err = 0;
 
 	spin_lock_bh(&xfrm_state_lock);
 	err = xfrm_state_flush_secctx_check(net, proto, audit_info);
 	if (err)
 		goto out;
 
-	err = -ESRCH;
 	for (i = 0; i <= net->xfrm.state_hmask; i++) {
 		struct hlist_node *entry;
 		struct xfrm_state *x;
@@ -627,16 +626,13 @@ restart:
 							audit_info->sessionid,
 							audit_info->secid);
 				xfrm_state_put(x);
-				if (!err)
-					cnt++;
 
 				spin_lock_bh(&xfrm_state_lock);
 				goto restart;
 			}
 		}
 	}
-	if (cnt)
-		err = 0;
+	err = 0;
 
 out:
 	spin_unlock_bh(&xfrm_state_lock);
diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c
index b0fb7d3bc15e..943c8712bd97 100644
--- a/net/xfrm/xfrm_user.c
+++ b/net/xfrm/xfrm_user.c
@@ -1525,7 +1525,7 @@ static int xfrm_flush_sa(struct sk_buff *skb, struct nlmsghdr *nlh,
 	audit_info.secid = NETLINK_CB(skb).sid;
 	err = xfrm_state_flush(net, p->proto, &audit_info);
 	if (err)
-		return 0;
+		return err;
 	c.data.proto = p->proto;
 	c.event = nlh->nlmsg_type;
 	c.seq = nlh->nlmsg_seq;
@@ -1677,7 +1677,7 @@ static int xfrm_flush_policy(struct sk_buff *skb, struct nlmsghdr *nlh,
 	audit_info.secid = NETLINK_CB(skb).sid;
 	err = xfrm_policy_flush(net, type, &audit_info);
 	if (err)
-		return 0;
+		return err;
 	c.data.type = type;
 	c.event = nlh->nlmsg_type;
 	c.seq = nlh->nlmsg_seq;
-- 
cgit v1.2.2


From e76b69cc0133952c98aa1ad6330cacacd269fd64 Mon Sep 17 00:00:00 2001
From: Ajit Khaparde <ajitkhaparde@gmail.com>
Date: Tue, 16 Feb 2010 20:25:43 +0000
Subject: net: bug fix for vlan + gro issue

Traffic (tcp) doesnot start on a vlan interface when gro is enabled.
Even the tcp handshake was not taking place.
This is because, the eth_type_trans call before the netif_receive_skb
in napi_gro_finish() resets the skb->dev to napi->dev from the previously
set vlan netdev interface. This causes the ip_route_input to drop the
incoming packet considering it as a packet coming from a martian source.

I could repro this on 2.6.32.7 (stable) and 2.6.33-rc7.
With this fix, the traffic starts and the test runs fine on both vlan
and non-vlan interfaces.

CC: Herbert Xu <herbert@gondor.apana.org.au>
CC: Patrick McHardy <kaber@trash.net>
Signed-off-by: Ajit Khaparde <ajitk@serverengines.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/dev.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/core/dev.c b/net/core/dev.c
index d1cf53d0d597..1968980f513a 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -2813,7 +2813,7 @@ gro_result_t napi_frags_finish(struct napi_struct *napi, struct sk_buff *skb,
 	switch (ret) {
 	case GRO_NORMAL:
 	case GRO_HELD:
-		skb->protocol = eth_type_trans(skb, napi->dev);
+		skb->protocol = eth_type_trans(skb, skb->dev);
 
 		if (ret == GRO_HELD)
 			skb_gro_pull(skb, -ETH_HLEN);
-- 
cgit v1.2.2


From 5ff3f073670b544a9c0547cc6fef1f7eed5762ed Mon Sep 17 00:00:00 2001
From: "Michael S. Tsirkin" <mst@redhat.com>
Date: Sun, 14 Feb 2010 01:01:00 +0000
Subject: net: export attach/detach filter routines

Export sk_attach_filter/sk_detach_filter routines,
so that tun module can use them.

Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/filter.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'net')

diff --git a/net/core/filter.c b/net/core/filter.c
index 08db7b9143a3..7517110ff4ae 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -529,6 +529,7 @@ int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk)
 		sk_filter_delayed_uncharge(sk, old_fp);
 	return 0;
 }
+EXPORT_SYMBOL_GPL(sk_attach_filter);
 
 int sk_detach_filter(struct sock *sk)
 {
@@ -545,3 +546,4 @@ int sk_detach_filter(struct sock *sk)
 	rcu_read_unlock_bh();
 	return ret;
 }
+EXPORT_SYMBOL_GPL(sk_detach_filter);
-- 
cgit v1.2.2


From 6457d26bd40077238799e31df2b800bcf4ef9177 Mon Sep 17 00:00:00 2001
From: Stephen Hemminger <shemminger@vyatta.com>
Date: Wed, 17 Feb 2010 18:48:44 -0800
Subject: IPv6: convert mc_lock to spinlock

Only used for writing, so convert to spinlock

Signed-off-by: Stephen Hemminger <shemminger@vyatta.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/mcast.c | 19 ++++++++++---------
 1 file changed, 10 insertions(+), 9 deletions(-)

(limited to 'net')

diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c
index 25f6cca79e6b..bcd971915969 100644
--- a/net/ipv6/mcast.c
+++ b/net/ipv6/mcast.c
@@ -793,10 +793,10 @@ static void mld_add_delrec(struct inet6_dev *idev, struct ifmcaddr6 *im)
 	}
 	spin_unlock_bh(&im->mca_lock);
 
-	write_lock_bh(&idev->mc_lock);
+	spin_lock_bh(&idev->mc_lock);
 	pmc->next = idev->mc_tomb;
 	idev->mc_tomb = pmc;
-	write_unlock_bh(&idev->mc_lock);
+	spin_unlock_bh(&idev->mc_lock);
 }
 
 static void mld_del_delrec(struct inet6_dev *idev, struct in6_addr *pmca)
@@ -804,7 +804,7 @@ static void mld_del_delrec(struct inet6_dev *idev, struct in6_addr *pmca)
 	struct ifmcaddr6 *pmc, *pmc_prev;
 	struct ip6_sf_list *psf, *psf_next;
 
-	write_lock_bh(&idev->mc_lock);
+	spin_lock_bh(&idev->mc_lock);
 	pmc_prev = NULL;
 	for (pmc=idev->mc_tomb; pmc; pmc=pmc->next) {
 		if (ipv6_addr_equal(&pmc->mca_addr, pmca))
@@ -817,7 +817,8 @@ static void mld_del_delrec(struct inet6_dev *idev, struct in6_addr *pmca)
 		else
 			idev->mc_tomb = pmc->next;
 	}
-	write_unlock_bh(&idev->mc_lock);
+	spin_unlock_bh(&idev->mc_lock);
+
 	if (pmc) {
 		for (psf=pmc->mca_tomb; psf; psf=psf_next) {
 			psf_next = psf->sf_next;
@@ -832,10 +833,10 @@ static void mld_clear_delrec(struct inet6_dev *idev)
 {
 	struct ifmcaddr6 *pmc, *nextpmc;
 
-	write_lock_bh(&idev->mc_lock);
+	spin_lock_bh(&idev->mc_lock);
 	pmc = idev->mc_tomb;
 	idev->mc_tomb = NULL;
-	write_unlock_bh(&idev->mc_lock);
+	spin_unlock_bh(&idev->mc_lock);
 
 	for (; pmc; pmc = nextpmc) {
 		nextpmc = pmc->next;
@@ -1696,7 +1697,7 @@ static void mld_send_cr(struct inet6_dev *idev)
 	int type, dtype;
 
 	read_lock_bh(&idev->lock);
-	write_lock_bh(&idev->mc_lock);
+	spin_lock(&idev->mc_lock);
 
 	/* deleted MCA's */
 	pmc_prev = NULL;
@@ -1730,7 +1731,7 @@ static void mld_send_cr(struct inet6_dev *idev)
 		} else
 			pmc_prev = pmc;
 	}
-	write_unlock_bh(&idev->mc_lock);
+	spin_unlock(&idev->mc_lock);
 
 	/* change recs */
 	for (pmc=idev->mc_list; pmc; pmc=pmc->next) {
@@ -2311,7 +2312,7 @@ void ipv6_mc_up(struct inet6_dev *idev)
 void ipv6_mc_init_dev(struct inet6_dev *idev)
 {
 	write_lock_bh(&idev->lock);
-	rwlock_init(&idev->mc_lock);
+	spin_lock_init(&idev->mc_lock);
 	idev->mc_gq_running = 0;
 	setup_timer(&idev->mc_gq_timer, mld_gq_timer_expire,
 			(unsigned long)idev);
-- 
cgit v1.2.2


From 2906f66a5682e5670a5eefe991843689b8d8563f Mon Sep 17 00:00:00 2001
From: Venkata Mohan Reddy <mohanreddykv@gmail.com>
Date: Thu, 18 Feb 2010 12:31:05 +0100
Subject: ipvs: SCTP Trasport Loadbalancing Support

Enhance IPVS to load balance SCTP transport protocol packets. This is done
based on the SCTP rfc 4960. All possible control chunks have been taken
care. The state machine used in this code looks some what lengthy. I tried
to make the state machine easy to understand.

Signed-off-by: Venkata Mohan Reddy Koppula <mohanreddykv@gmail.com>
Signed-off-by: Simon Horman <horms@verge.net.au>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 net/netfilter/ipvs/Kconfig            |    7 +
 net/netfilter/ipvs/Makefile           |    1 +
 net/netfilter/ipvs/ip_vs_core.c       |   62 +-
 net/netfilter/ipvs/ip_vs_ctl.c        |    5 +-
 net/netfilter/ipvs/ip_vs_proto.c      |    3 +
 net/netfilter/ipvs/ip_vs_proto_sctp.c | 1183 +++++++++++++++++++++++++++++++++
 net/netfilter/ipvs/ip_vs_sync.c       |   14 +
 7 files changed, 1264 insertions(+), 11 deletions(-)
 create mode 100644 net/netfilter/ipvs/ip_vs_proto_sctp.c

(limited to 'net')

diff --git a/net/netfilter/ipvs/Kconfig b/net/netfilter/ipvs/Kconfig
index 817a8898203b..712ccad13344 100644
--- a/net/netfilter/ipvs/Kconfig
+++ b/net/netfilter/ipvs/Kconfig
@@ -104,6 +104,13 @@ config	IP_VS_PROTO_AH
 	  This option enables support for load balancing AH (Authentication
 	  Header) transport protocol. Say Y if unsure.
 
+config  IP_VS_PROTO_SCTP
+	bool "SCTP load balancing support"
+	select LIBCRC32C
+	---help---
+	  This option enables support for load balancing SCTP transport
+	  protocol. Say Y if unsure.
+
 comment "IPVS scheduler"
 
 config	IP_VS_RR
diff --git a/net/netfilter/ipvs/Makefile b/net/netfilter/ipvs/Makefile
index 73a46fe1fe4c..e3baefd7066e 100644
--- a/net/netfilter/ipvs/Makefile
+++ b/net/netfilter/ipvs/Makefile
@@ -7,6 +7,7 @@ ip_vs_proto-objs-y :=
 ip_vs_proto-objs-$(CONFIG_IP_VS_PROTO_TCP) += ip_vs_proto_tcp.o
 ip_vs_proto-objs-$(CONFIG_IP_VS_PROTO_UDP) += ip_vs_proto_udp.o
 ip_vs_proto-objs-$(CONFIG_IP_VS_PROTO_AH_ESP) += ip_vs_proto_ah_esp.o
+ip_vs_proto-objs-$(CONFIG_IP_VS_PROTO_SCTP) += ip_vs_proto_sctp.o
 
 ip_vs-objs :=	ip_vs_conn.o ip_vs_core.o ip_vs_ctl.o ip_vs_sched.o	   \
 		ip_vs_xmit.o ip_vs_app.o ip_vs_sync.o	   		   \
diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c
index 847ffca40184..72e96d823ebf 100644
--- a/net/netfilter/ipvs/ip_vs_core.c
+++ b/net/netfilter/ipvs/ip_vs_core.c
@@ -31,6 +31,7 @@
 #include <linux/kernel.h>
 #include <linux/ip.h>
 #include <linux/tcp.h>
+#include <linux/sctp.h>
 #include <linux/icmp.h>
 
 #include <net/ip.h>
@@ -81,6 +82,8 @@ const char *ip_vs_proto_name(unsigned proto)
 		return "UDP";
 	case IPPROTO_TCP:
 		return "TCP";
+	case IPPROTO_SCTP:
+		return "SCTP";
 	case IPPROTO_ICMP:
 		return "ICMP";
 #ifdef CONFIG_IP_VS_IPV6
@@ -589,8 +592,9 @@ void ip_vs_nat_icmp(struct sk_buff *skb, struct ip_vs_protocol *pp,
 		ip_send_check(ciph);
 	}
 
-	/* the TCP/UDP port */
-	if (IPPROTO_TCP == ciph->protocol || IPPROTO_UDP == ciph->protocol) {
+	/* the TCP/UDP/SCTP port */
+	if (IPPROTO_TCP == ciph->protocol || IPPROTO_UDP == ciph->protocol ||
+	    IPPROTO_SCTP == ciph->protocol) {
 		__be16 *ports = (void *)ciph + ciph->ihl*4;
 
 		if (inout)
@@ -630,8 +634,9 @@ void ip_vs_nat_icmp_v6(struct sk_buff *skb, struct ip_vs_protocol *pp,
 		ciph->saddr = cp->daddr.in6;
 	}
 
-	/* the TCP/UDP port */
-	if (IPPROTO_TCP == ciph->nexthdr || IPPROTO_UDP == ciph->nexthdr) {
+	/* the TCP/UDP/SCTP port */
+	if (IPPROTO_TCP == ciph->nexthdr || IPPROTO_UDP == ciph->nexthdr ||
+	    IPPROTO_SCTP == ciph->nexthdr) {
 		__be16 *ports = (void *)ciph + sizeof(struct ipv6hdr);
 
 		if (inout)
@@ -679,7 +684,8 @@ static int handle_response_icmp(int af, struct sk_buff *skb,
 		goto out;
 	}
 
-	if (IPPROTO_TCP == protocol || IPPROTO_UDP == protocol)
+	if (IPPROTO_TCP == protocol || IPPROTO_UDP == protocol ||
+	    IPPROTO_SCTP == protocol)
 		offset += 2 * sizeof(__u16);
 	if (!skb_make_writable(skb, offset))
 		goto out;
@@ -857,6 +863,21 @@ static int ip_vs_out_icmp_v6(struct sk_buff *skb, int *related)
 }
 #endif
 
+/*
+ * Check if sctp chunc is ABORT chunk
+ */
+static inline int is_sctp_abort(const struct sk_buff *skb, int nh_len)
+{
+	sctp_chunkhdr_t *sch, schunk;
+	sch = skb_header_pointer(skb, nh_len + sizeof(sctp_sctphdr_t),
+			sizeof(schunk), &schunk);
+	if (sch == NULL)
+		return 0;
+	if (sch->type == SCTP_CID_ABORT)
+		return 1;
+	return 0;
+}
+
 static inline int is_tcp_reset(const struct sk_buff *skb, int nh_len)
 {
 	struct tcphdr _tcph, *th;
@@ -999,7 +1020,8 @@ ip_vs_out(unsigned int hooknum, struct sk_buff *skb,
 	if (unlikely(!cp)) {
 		if (sysctl_ip_vs_nat_icmp_send &&
 		    (pp->protocol == IPPROTO_TCP ||
-		     pp->protocol == IPPROTO_UDP)) {
+		     pp->protocol == IPPROTO_UDP ||
+		     pp->protocol == IPPROTO_SCTP)) {
 			__be16 _ports[2], *pptr;
 
 			pptr = skb_header_pointer(skb, iph.len,
@@ -1014,8 +1036,13 @@ ip_vs_out(unsigned int hooknum, struct sk_buff *skb,
 				 * existing entry if it is not RST
 				 * packet or not TCP packet.
 				 */
-				if (iph.protocol != IPPROTO_TCP
-				    || !is_tcp_reset(skb, iph.len)) {
+				if ((iph.protocol != IPPROTO_TCP &&
+				     iph.protocol != IPPROTO_SCTP)
+				     || ((iph.protocol == IPPROTO_TCP
+					  && !is_tcp_reset(skb, iph.len))
+					 || (iph.protocol == IPPROTO_SCTP
+						&& !is_sctp_abort(skb,
+							iph.len)))) {
 #ifdef CONFIG_IP_VS_IPV6
 					if (af == AF_INET6)
 						icmpv6_send(skb,
@@ -1235,7 +1262,8 @@ ip_vs_in_icmp_v6(struct sk_buff *skb, int *related, unsigned int hooknum)
 
 	/* do the statistics and put it back */
 	ip_vs_in_stats(cp, skb);
-	if (IPPROTO_TCP == cih->nexthdr || IPPROTO_UDP == cih->nexthdr)
+	if (IPPROTO_TCP == cih->nexthdr || IPPROTO_UDP == cih->nexthdr ||
+	    IPPROTO_SCTP == cih->nexthdr)
 		offset += 2 * sizeof(__u16);
 	verdict = ip_vs_icmp_xmit_v6(skb, cp, pp, offset);
 	/* do not touch skb anymore */
@@ -1358,6 +1386,21 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb,
 	 * encorage the standby servers to update the connections timeout
 	 */
 	pkts = atomic_add_return(1, &cp->in_pkts);
+	if (af == AF_INET && (ip_vs_sync_state & IP_VS_STATE_MASTER) &&
+	    cp->protocol == IPPROTO_SCTP) {
+		if ((cp->state == IP_VS_SCTP_S_ESTABLISHED &&
+			(atomic_read(&cp->in_pkts) %
+			 sysctl_ip_vs_sync_threshold[1]
+			 == sysctl_ip_vs_sync_threshold[0])) ||
+				(cp->old_state != cp->state &&
+				 ((cp->state == IP_VS_SCTP_S_CLOSED) ||
+				  (cp->state == IP_VS_SCTP_S_SHUT_ACK_CLI) ||
+				  (cp->state == IP_VS_SCTP_S_SHUT_ACK_SER)))) {
+			ip_vs_sync_conn(cp);
+			goto out;
+		}
+	}
+
 	if (af == AF_INET &&
 	    (ip_vs_sync_state & IP_VS_STATE_MASTER) &&
 	    (((cp->protocol != IPPROTO_TCP ||
@@ -1370,6 +1413,7 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb,
 	       (cp->state == IP_VS_TCP_S_CLOSE_WAIT) ||
 	       (cp->state == IP_VS_TCP_S_TIME_WAIT)))))
 		ip_vs_sync_conn(cp);
+out:
 	cp->old_state = cp->state;
 
 	ip_vs_conn_put(cp);
diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c
index 00d0b152db39..7ee9c3426f44 100644
--- a/net/netfilter/ipvs/ip_vs_ctl.c
+++ b/net/netfilter/ipvs/ip_vs_ctl.c
@@ -2132,8 +2132,9 @@ do_ip_vs_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len)
 		}
 	}
 
-	/* Check for valid protocol: TCP or UDP, even for fwmark!=0 */
-	if (usvc.protocol != IPPROTO_TCP && usvc.protocol != IPPROTO_UDP) {
+	/* Check for valid protocol: TCP or UDP or SCTP, even for fwmark!=0 */
+	if (usvc.protocol != IPPROTO_TCP && usvc.protocol != IPPROTO_UDP &&
+	    usvc.protocol != IPPROTO_SCTP) {
 		pr_err("set_ctl: invalid protocol: %d %pI4:%d %s\n",
 		       usvc.protocol, &usvc.addr.ip,
 		       ntohs(usvc.port), usvc.sched_name);
diff --git a/net/netfilter/ipvs/ip_vs_proto.c b/net/netfilter/ipvs/ip_vs_proto.c
index 3e7671674549..0e584553819d 100644
--- a/net/netfilter/ipvs/ip_vs_proto.c
+++ b/net/netfilter/ipvs/ip_vs_proto.c
@@ -257,6 +257,9 @@ int __init ip_vs_protocol_init(void)
 #ifdef CONFIG_IP_VS_PROTO_UDP
 	REGISTER_PROTOCOL(&ip_vs_protocol_udp);
 #endif
+#ifdef CONFIG_IP_VS_PROTO_SCTP
+	REGISTER_PROTOCOL(&ip_vs_protocol_sctp);
+#endif
 #ifdef CONFIG_IP_VS_PROTO_AH
 	REGISTER_PROTOCOL(&ip_vs_protocol_ah);
 #endif
diff --git a/net/netfilter/ipvs/ip_vs_proto_sctp.c b/net/netfilter/ipvs/ip_vs_proto_sctp.c
new file mode 100644
index 000000000000..c9a3f7a21d53
--- /dev/null
+++ b/net/netfilter/ipvs/ip_vs_proto_sctp.c
@@ -0,0 +1,1183 @@
+#include <linux/kernel.h>
+#include <linux/ip.h>
+#include <linux/sctp.h>
+#include <net/ip.h>
+#include <net/ip6_checksum.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter_ipv4.h>
+#include <net/sctp/checksum.h>
+#include <net/ip_vs.h>
+
+
+static struct ip_vs_conn *
+sctp_conn_in_get(int af,
+		 const struct sk_buff *skb,
+		 struct ip_vs_protocol *pp,
+		 const struct ip_vs_iphdr *iph,
+		 unsigned int proto_off,
+		 int inverse)
+{
+	__be16 _ports[2], *pptr;
+
+	pptr = skb_header_pointer(skb, proto_off, sizeof(_ports), _ports);
+	if (pptr == NULL)
+		return NULL;
+
+	if (likely(!inverse)) 
+		return ip_vs_conn_in_get(af, iph->protocol,
+					 &iph->saddr, pptr[0],
+					 &iph->daddr, pptr[1]);
+	else 
+		return ip_vs_conn_in_get(af, iph->protocol,
+					 &iph->daddr, pptr[1],
+					 &iph->saddr, pptr[0]);
+}
+
+static struct ip_vs_conn *
+sctp_conn_out_get(int af,
+		  const struct sk_buff *skb,
+		  struct ip_vs_protocol *pp,
+		  const struct ip_vs_iphdr *iph,
+		  unsigned int proto_off,
+		  int inverse)
+{
+	__be16 _ports[2], *pptr;
+
+	pptr = skb_header_pointer(skb, proto_off, sizeof(_ports), _ports);
+	if (pptr == NULL)
+		return NULL;
+
+	if (likely(!inverse)) 
+		return ip_vs_conn_out_get(af, iph->protocol,
+					  &iph->saddr, pptr[0],
+					  &iph->daddr, pptr[1]);
+	else 
+		return ip_vs_conn_out_get(af, iph->protocol,
+					  &iph->daddr, pptr[1],
+					  &iph->saddr, pptr[0]);
+}
+
+static int
+sctp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_protocol *pp,
+		   int *verdict, struct ip_vs_conn **cpp)
+{
+	struct ip_vs_service *svc;
+	sctp_chunkhdr_t _schunkh, *sch;
+	sctp_sctphdr_t *sh, _sctph;
+	struct ip_vs_iphdr iph;
+
+	ip_vs_fill_iphdr(af, skb_network_header(skb), &iph);
+
+	sh = skb_header_pointer(skb, iph.len, sizeof(_sctph), &_sctph);
+	if (sh == NULL)
+		return 0;
+
+	sch = skb_header_pointer(skb, iph.len + sizeof(sctp_sctphdr_t),
+				 sizeof(_schunkh), &_schunkh);
+	if (sch == NULL)
+		return 0;
+
+	if ((sch->type == SCTP_CID_INIT) &&
+	    (svc = ip_vs_service_get(af, skb->mark, iph.protocol,
+				     &iph.daddr, sh->dest))) {
+		if (ip_vs_todrop()) {
+			/*
+			 * It seems that we are very loaded.
+			 * We have to drop this packet :(
+			 */
+			ip_vs_service_put(svc);
+			*verdict = NF_DROP;
+			return 0;
+		}
+		/*
+		 * Let the virtual server select a real server for the
+		 * incoming connection, and create a connection entry.
+		 */
+		*cpp = ip_vs_schedule(svc, skb);
+		if (!*cpp) {
+			*verdict = ip_vs_leave(svc, skb, pp);
+			return 0;
+		}
+		ip_vs_service_put(svc);
+	}
+
+	return 1;
+}
+
+static int
+sctp_snat_handler(struct sk_buff *skb,
+		  struct ip_vs_protocol *pp, struct ip_vs_conn *cp)
+{
+	sctp_sctphdr_t *sctph;
+	unsigned int sctphoff;
+	__be32 crc32;
+
+#ifdef CONFIG_IP_VS_IPV6
+	if (cp->af == AF_INET6)
+		sctphoff = sizeof(struct ipv6hdr);
+	else
+#endif
+		sctphoff = ip_hdrlen(skb);
+
+	/* csum_check requires unshared skb */
+	if (!skb_make_writable(skb, sctphoff + sizeof(*sctph)))
+		return 0;
+
+	if (unlikely(cp->app != NULL)) {
+		/* Some checks before mangling */
+		if (pp->csum_check && !pp->csum_check(cp->af, skb, pp))
+			return 0;
+
+		/* Call application helper if needed */
+		if (!ip_vs_app_pkt_out(cp, skb))
+			return 0;
+	}
+
+	sctph = (void *) skb_network_header(skb) + sctphoff;
+	sctph->source = cp->vport;
+
+	/* Calculate the checksum */
+	crc32 = sctp_start_cksum((u8 *) sctph, skb_headlen(skb) - sctphoff);
+	for (skb = skb_shinfo(skb)->frag_list; skb; skb = skb->next)
+		crc32 = sctp_update_cksum((u8 *) skb->data, skb_headlen(skb),
+				          crc32);
+	crc32 = sctp_end_cksum(crc32);
+	sctph->checksum = crc32;
+
+	return 1;
+}
+
+static int
+sctp_dnat_handler(struct sk_buff *skb,
+		  struct ip_vs_protocol *pp, struct ip_vs_conn *cp)
+{
+
+	sctp_sctphdr_t *sctph;
+	unsigned int sctphoff;
+	__be32 crc32;
+
+#ifdef CONFIG_IP_VS_IPV6
+	if (cp->af == AF_INET6)
+		sctphoff = sizeof(struct ipv6hdr);
+	else
+#endif
+		sctphoff = ip_hdrlen(skb);
+
+	/* csum_check requires unshared skb */
+	if (!skb_make_writable(skb, sctphoff + sizeof(*sctph)))
+		return 0;
+
+	if (unlikely(cp->app != NULL)) {
+		/* Some checks before mangling */
+		if (pp->csum_check && !pp->csum_check(cp->af, skb, pp))
+			return 0;
+
+		/* Call application helper if needed */
+		if (!ip_vs_app_pkt_out(cp, skb))
+			return 0;
+	}
+
+	sctph = (void *) skb_network_header(skb) + sctphoff;
+	sctph->dest = cp->dport;
+
+	/* Calculate the checksum */
+	crc32 = sctp_start_cksum((u8 *) sctph, skb_headlen(skb) - sctphoff);
+	for (skb = skb_shinfo(skb)->frag_list; skb; skb = skb->next)
+		crc32 = sctp_update_cksum((u8 *) skb->data, skb_headlen(skb),
+					  crc32);
+	crc32 = sctp_end_cksum(crc32);
+	sctph->checksum = crc32;
+
+	return 1;
+}
+
+static int
+sctp_csum_check(int af, struct sk_buff *skb, struct ip_vs_protocol *pp)
+{
+	struct sk_buff *list = skb_shinfo(skb)->frag_list;
+	unsigned int sctphoff;
+	struct sctphdr *sh, _sctph;
+	__le32 cmp;
+	__le32 val;
+	__u32 tmp;
+
+#ifdef CONFIG_IP_VS_IPV6
+	if (af == AF_INET6)
+		sctphoff = sizeof(struct ipv6hdr);
+	else
+#endif
+		sctphoff = ip_hdrlen(skb);
+
+	sh = skb_header_pointer(skb, sctphoff, sizeof(_sctph), &_sctph);
+	if (sh == NULL)
+		return 0;
+
+	cmp = sh->checksum;
+
+	tmp = sctp_start_cksum((__u8 *) sh, skb_headlen(skb));
+	for (; list; list = list->next)
+		tmp = sctp_update_cksum((__u8 *) list->data,
+					skb_headlen(list), tmp);
+
+	val = sctp_end_cksum(tmp);
+
+	if (val != cmp) {
+		/* CRC failure, dump it. */
+		IP_VS_DBG_RL_PKT(0, pp, skb, 0,
+				"Failed checksum for");
+		return 0;
+	}
+	return 1;
+}
+
+struct ipvs_sctp_nextstate {
+	int next_state;
+};
+enum ipvs_sctp_event_t {
+	IP_VS_SCTP_EVE_DATA_CLI,
+	IP_VS_SCTP_EVE_DATA_SER,
+	IP_VS_SCTP_EVE_INIT_CLI,
+	IP_VS_SCTP_EVE_INIT_SER,
+	IP_VS_SCTP_EVE_INIT_ACK_CLI,
+	IP_VS_SCTP_EVE_INIT_ACK_SER,
+	IP_VS_SCTP_EVE_COOKIE_ECHO_CLI,
+	IP_VS_SCTP_EVE_COOKIE_ECHO_SER,
+	IP_VS_SCTP_EVE_COOKIE_ACK_CLI,
+	IP_VS_SCTP_EVE_COOKIE_ACK_SER,
+	IP_VS_SCTP_EVE_ABORT_CLI,
+	IP_VS_SCTP_EVE__ABORT_SER,
+	IP_VS_SCTP_EVE_SHUT_CLI,
+	IP_VS_SCTP_EVE_SHUT_SER,
+	IP_VS_SCTP_EVE_SHUT_ACK_CLI,
+	IP_VS_SCTP_EVE_SHUT_ACK_SER,
+	IP_VS_SCTP_EVE_SHUT_COM_CLI,
+	IP_VS_SCTP_EVE_SHUT_COM_SER,
+	IP_VS_SCTP_EVE_LAST
+};
+
+static enum ipvs_sctp_event_t sctp_events[255] = {
+	IP_VS_SCTP_EVE_DATA_CLI,
+	IP_VS_SCTP_EVE_INIT_CLI,
+	IP_VS_SCTP_EVE_INIT_ACK_CLI,
+	IP_VS_SCTP_EVE_DATA_CLI,
+	IP_VS_SCTP_EVE_DATA_CLI,
+	IP_VS_SCTP_EVE_DATA_CLI,
+	IP_VS_SCTP_EVE_ABORT_CLI,
+	IP_VS_SCTP_EVE_SHUT_CLI,
+	IP_VS_SCTP_EVE_SHUT_ACK_CLI,
+	IP_VS_SCTP_EVE_DATA_CLI,
+	IP_VS_SCTP_EVE_COOKIE_ECHO_CLI,
+	IP_VS_SCTP_EVE_COOKIE_ACK_CLI,
+	IP_VS_SCTP_EVE_DATA_CLI,
+	IP_VS_SCTP_EVE_DATA_CLI,
+	IP_VS_SCTP_EVE_SHUT_COM_CLI,
+};
+
+static struct ipvs_sctp_nextstate
+ sctp_states_table[IP_VS_SCTP_S_LAST][IP_VS_SCTP_EVE_LAST] = {
+	/*
+	 * STATE : IP_VS_SCTP_S_NONE
+	 */
+	/*next state *//*event */
+	{{IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_DATA_CLI */ },
+	 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_DATA_SER */ },
+	 {IP_VS_SCTP_S_INIT_CLI /* IP_VS_SCTP_EVE_INIT_CLI */ },
+	 {IP_VS_SCTP_S_INIT_SER /* IP_VS_SCTP_EVE_INIT_SER */ },
+	 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_INIT_ACK_CLI */ },
+	 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_INIT_ACK_SER */ },
+	 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_COOKIE_ECHO_CLI */ },
+	 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_COOKIE_ECHO_SER */ },
+	 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_COOKIE_ACK_CLI */ },
+	 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_COOKIE_ACK_SER */ },
+	 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_ABORT_CLI */ },
+	 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_ABORT_SER */ },
+	 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_CLI */ },
+	 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_SER */ },
+	 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_ACK_CLI */ },
+	 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_ACK_SER */ },
+	 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_COM_CLI */ },
+	 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_COM_SER */ },
+	 },
+	/*
+	 * STATE : IP_VS_SCTP_S_INIT_CLI
+	 * Cient sent INIT and is waiting for reply from server(In ECHO_WAIT)
+	 */
+	{{IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_DATA_CLI */ },
+	 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_DATA_SER */ },
+	 {IP_VS_SCTP_S_INIT_CLI /* IP_VS_SCTP_EVE_INIT_CLI */ },
+	 {IP_VS_SCTP_S_INIT_SER /* IP_VS_SCTP_EVE_INIT_SER */ },
+	 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_INIT_ACK_CLI */ },
+	 {IP_VS_SCTP_S_INIT_ACK_SER /* IP_VS_SCTP_EVE_INIT_ACK_SER */ },
+	 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_ECHO_CLI */ },
+	 {IP_VS_SCTP_S_INIT_CLI /* IP_VS_SCTP_EVE_ECHO_SER */ },
+	 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_COOKIE_ACK_CLI */ },
+	 {IP_VS_SCTP_S_INIT_CLI /* IP_VS_SCTP_EVE_COOKIE_ACK_SER */ },
+	 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_ABORT_CLI */ },
+	 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_ABORT_SER */ },
+	 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_CLI */ },
+	 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_SER */ },
+	 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_ACK_CLI */ },
+	 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_ACK_SER */ },
+	 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_COM_CLI */ },
+	 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_COM_SER */ }
+	 },
+	/*
+	 * State : IP_VS_SCTP_S_INIT_SER
+	 * Server sent INIT and waiting for INIT ACK from the client
+	 */
+	{{IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_DATA_CLI */ },
+	 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_DATA_SER */ },
+	 {IP_VS_SCTP_S_INIT_CLI /* IP_VS_SCTP_EVE_INIT_CLI */ },
+	 {IP_VS_SCTP_S_INIT_SER /* IP_VS_SCTP_EVE_INIT_SER */ },
+	 {IP_VS_SCTP_S_INIT_ACK_CLI /* IP_VS_SCTP_EVE_INIT_ACK_CLI */ },
+	 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_INIT_ACK_SER */ },
+	 {IP_VS_SCTP_S_INIT_SER /* IP_VS_SCTP_EVE_COOKIE_ECHO_CLI */ },
+	 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_COOKIE_ECHO_SER */ },
+	 {IP_VS_SCTP_S_INIT_SER /* IP_VS_SCTP_EVE_COOKIE_ACK_CLI */ },
+	 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_COOKIE_ACK_SER */ },
+	 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_ABORT_CLI */ },
+	 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_ABORT_SER */ },
+	 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_CLI */ },
+	 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_SER */ },
+	 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_ACK_CLI */ },
+	 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_ACK_SER */ },
+	 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_COM_CLI */ },
+	 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_COM_SER */ }
+	 },
+	/*
+	 * State : IP_VS_SCTP_S_INIT_ACK_CLI
+	 * Client sent INIT ACK and waiting for ECHO from the server
+	 */
+	{{IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_DATA_CLI */ },
+	 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_DATA_SER */ },
+	 /*
+	  * We have got an INIT from client. From the spec.“Upon receipt of
+	  * an INIT in the COOKIE-WAIT state, an endpoint MUST respond with
+	  * an INIT ACK using the same parameters it sent in its  original
+	  * INIT chunk (including its Initiate Tag, unchanged”).
+	  */
+	 {IP_VS_SCTP_S_INIT_CLI /* IP_VS_SCTP_EVE_INIT_CLI */ },
+	 {IP_VS_SCTP_S_INIT_SER /* IP_VS_SCTP_EVE_INIT_SER */ },
+	 /*
+	  * INIT_ACK has been resent by the client, let us stay is in
+	  * the same state
+	  */
+	 {IP_VS_SCTP_S_INIT_ACK_CLI /* IP_VS_SCTP_EVE_INIT_ACK_CLI */ },
+	 /*
+	  * INIT_ACK sent by the server, close the connection
+	  */
+	 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_INIT_ACK_SER */ },
+	 /*
+	  * ECHO by client, it should not happen, close the connection
+	  */
+	 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_COOKIE_ECHO_CLI */ },
+	 /*
+	  * ECHO by server, this is what we are expecting, move to ECHO_SER
+	  */
+	 {IP_VS_SCTP_S_ECHO_SER /* IP_VS_SCTP_EVE_COOKIE_ECHO_SER */ },
+	 /*
+	  * COOKIE ACK from client, it should not happen, close the connection
+	  */
+	 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_COOKIE_ACK_CLI */ },
+	 /*
+	  * Unexpected COOKIE ACK from server, staty in the same state
+	  */
+	 {IP_VS_SCTP_S_INIT_ACK_CLI /* IP_VS_SCTP_EVE_COOKIE_ACK_SER */ },
+	 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_ABORT_CLI */ },
+	 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_ABORT_SER */ },
+	 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_CLI */ },
+	 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_SER */ },
+	 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_ACK_CLI */ },
+	 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_ACK_SER */ },
+	 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_COM_CLI */ },
+	 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_COM_SER */ }
+	 },
+	/*
+	 * State : IP_VS_SCTP_S_INIT_ACK_SER
+	 * Server sent INIT ACK and waiting for ECHO from the client
+	 */
+	{{IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_DATA_CLI */ },
+	 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_DATA_SER */ },
+	 /*
+	  * We have got an INIT from client. From the spec.“Upon receipt of
+	  * an INIT in the COOKIE-WAIT state, an endpoint MUST respond with
+	  * an INIT ACK using the same parameters it sent in its  original
+	  * INIT chunk (including its Initiate Tag, unchanged”).
+	  */
+	 {IP_VS_SCTP_S_INIT_CLI /* IP_VS_SCTP_EVE_INIT_CLI */ },
+	 {IP_VS_SCTP_S_INIT_SER /* IP_VS_SCTP_EVE_INIT_SER */ },
+	 /*
+	  * Unexpected INIT_ACK by the client, let us close the connection
+	  */
+	 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_INIT_ACK_CLI */ },
+	 /*
+	  * INIT_ACK resent by the server, let us move to same state
+	  */
+	 {IP_VS_SCTP_S_INIT_ACK_SER /* IP_VS_SCTP_EVE_INIT_ACK_SER */ },
+	 /*
+	  * Client send the ECHO, this is what we are expecting,
+	  * move to ECHO_CLI
+	  */
+	 {IP_VS_SCTP_S_ECHO_CLI /* IP_VS_SCTP_EVE_COOKIE_ECHO_CLI */ },
+	 /*
+	  * ECHO received from the server, Not sure what to do,
+	  * let us close it
+	  */
+	 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_COOKIE_ECHO_SER */ },
+	 /*
+	  * COOKIE ACK from client, let us stay in the same state
+	  */
+	 {IP_VS_SCTP_S_INIT_ACK_SER /* IP_VS_SCTP_EVE_COOKIE_ACK_CLI */ },
+	 /*
+	  * COOKIE ACK from server, hmm... this should not happen, lets close
+	  * the connection.
+	  */
+	 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_COOKIE_ACK_SER */ },
+	 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_ABORT_CLI */ },
+	 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_ABORT_SER */ },
+	 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_CLI */ },
+	 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_SER */ },
+	 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_ACK_CLI */ },
+	 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_ACK_SER */ },
+	 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_COM_CLI */ },
+	 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_COM_SER */ }
+	 },
+	/*
+	 * State : IP_VS_SCTP_S_ECHO_CLI
+	 * Cient  sent ECHO and waiting COOKEI ACK from the Server
+	 */
+	{{IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_DATA_CLI */ },
+	 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_DATA_SER */ },
+	 /*
+	  * We have got an INIT from client. From the spec.“Upon receipt of
+	  * an INIT in the COOKIE-WAIT state, an endpoint MUST respond with
+	  * an INIT ACK using the same parameters it sent in its  original
+	  * INIT chunk (including its Initiate Tag, unchanged”).
+	  */
+	 {IP_VS_SCTP_S_INIT_CLI /* IP_VS_SCTP_EVE_INIT_CLI */ },
+	 {IP_VS_SCTP_S_INIT_SER /* IP_VS_SCTP_EVE_INIT_SER */ },
+	 /*
+	  * INIT_ACK has been by the client, let us close the connection
+	  */
+	 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_INIT_ACK_CLI */ },
+	 /*
+	  * INIT_ACK sent by the server, Unexpected INIT ACK, spec says,
+	  * “If an INIT ACK is received by an endpoint in any state other
+	  * than the COOKIE-WAIT state, the endpoint should discard the
+	  * INIT ACK chunk”. Stay in the same state
+	  */
+	 {IP_VS_SCTP_S_ECHO_CLI /* IP_VS_SCTP_EVE_INIT_ACK_SER */ },
+	 /*
+	  * Client resent the ECHO, let us stay in the same state
+	  */
+	 {IP_VS_SCTP_S_ECHO_CLI /* IP_VS_SCTP_EVE_COOKIE_ECHO_CLI */ },
+	 /*
+	  * ECHO received from the server, Not sure what to do,
+	  * let us close it
+	  */
+	 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_COOKIE_ECHO_SER */ },
+	 /*
+	  * COOKIE ACK from client, this shoud not happen, let's close the
+	  * connection
+	  */
+	 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_COOKIE_ACK_CLI */ },
+	 /*
+	  * COOKIE ACK from server, this is what we are awaiting,lets move to
+	  * ESTABLISHED.
+	  */
+	 {IP_VS_SCTP_S_ESTABLISHED /* IP_VS_SCTP_EVE_COOKIE_ACK_SER */ },
+	 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_ABORT_CLI */ },
+	 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_ABORT_SER */ },
+	 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_CLI */ },
+	 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_SER */ },
+	 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_ACK_CLI */ },
+	 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_ACK_SER */ },
+	 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_COM_CLI */ },
+	 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_COM_SER */ }
+	 },
+	/*
+	 * State : IP_VS_SCTP_S_ECHO_SER
+	 * Server sent ECHO and waiting COOKEI ACK from the client
+	 */
+	{{IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_DATA_CLI */ },
+	 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_DATA_SER */ },
+	 /*
+	  * We have got an INIT from client. From the spec.“Upon receipt of
+	  * an INIT in the COOKIE-WAIT state, an endpoint MUST respond with
+	  * an INIT ACK using the same parameters it sent in its  original
+	  * INIT chunk (including its Initiate Tag, unchanged”).
+	  */
+	 {IP_VS_SCTP_S_INIT_CLI /* IP_VS_SCTP_EVE_INIT_CLI */ },
+	 {IP_VS_SCTP_S_INIT_SER /* IP_VS_SCTP_EVE_INIT_SER */ },
+	 /*
+	  * INIT_ACK sent by the server, Unexpected INIT ACK, spec says,
+	  * “If an INIT ACK is received by an endpoint in any state other
+	  * than the COOKIE-WAIT state, the endpoint should discard the
+	  * INIT ACK chunk”. Stay in the same state
+	  */
+	 {IP_VS_SCTP_S_ECHO_SER /* IP_VS_SCTP_EVE_INIT_ACK_CLI */ },
+	 /*
+	  * INIT_ACK has been by the server, let us close the connection
+	  */
+	 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_INIT_ACK_SER */ },
+	 /*
+	  * Client sent the ECHO, not sure what to do, let's close the
+	  * connection.
+	  */
+	 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_COOKIE_ECHO_CLI */ },
+	 /*
+	  * ECHO resent by the server, stay in the same state
+	  */
+	 {IP_VS_SCTP_S_ECHO_SER /* IP_VS_SCTP_EVE_COOKIE_ECHO_SER */ },
+	 /*
+	  * COOKIE ACK from client, this is what we are expecting, let's move
+	  * to ESTABLISHED.
+	  */
+	 {IP_VS_SCTP_S_ESTABLISHED /* IP_VS_SCTP_EVE_COOKIE_ACK_CLI */ },
+	 /*
+	  * COOKIE ACK from server, this should not happen, lets close the
+	  * connection.
+	  */
+	 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_COOKIE_ACK_SER */ },
+	 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_ABORT_CLI */ },
+	 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_ABORT_SER */ },
+	 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_CLI */ },
+	 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_SER */ },
+	 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_ACK_CLI */ },
+	 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_ACK_SER */ },
+	 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_COM_CLI */ },
+	 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_COM_SER */ }
+	 },
+	/*
+	 * State : IP_VS_SCTP_S_ESTABLISHED
+	 * Association established
+	 */
+	{{IP_VS_SCTP_S_ESTABLISHED /* IP_VS_SCTP_EVE_DATA_CLI */ },
+	 {IP_VS_SCTP_S_ESTABLISHED /* IP_VS_SCTP_EVE_DATA_SER */ },
+	 /*
+	  * We have got an INIT from client. From the spec.“Upon receipt of
+	  * an INIT in the COOKIE-WAIT state, an endpoint MUST respond with
+	  * an INIT ACK using the same parameters it sent in its  original
+	  * INIT chunk (including its Initiate Tag, unchanged”).
+	  */
+	 {IP_VS_SCTP_S_INIT_CLI /* IP_VS_SCTP_EVE_INIT_CLI */ },
+	 {IP_VS_SCTP_S_INIT_SER /* IP_VS_SCTP_EVE_INIT_SER */ },
+	 /*
+	  * INIT_ACK sent by the server, Unexpected INIT ACK, spec says,
+	  * “If an INIT ACK is received by an endpoint in any state other
+	  * than the COOKIE-WAIT state, the endpoint should discard the
+	  * INIT ACK chunk”. Stay in the same state
+	  */
+	 {IP_VS_SCTP_S_ESTABLISHED /* IP_VS_SCTP_EVE_INIT_ACK_CLI */ },
+	 {IP_VS_SCTP_S_ESTABLISHED /* IP_VS_SCTP_EVE_INIT_ACK_SER */ },
+	 /*
+	  * Client sent ECHO, Spec(sec 5.2.4) says it may be handled by the
+	  * peer and peer shall move to the ESTABISHED. if it doesn't handle
+	  * it will send ERROR chunk. So, stay in the same state
+	  */
+	 {IP_VS_SCTP_S_ESTABLISHED /* IP_VS_SCTP_EVE_COOKIE_ECHO_CLI */ },
+	 {IP_VS_SCTP_S_ESTABLISHED /* IP_VS_SCTP_EVE_COOKIE_ECHO_SER */ },
+	 /*
+	  * COOKIE ACK from client, not sure what to do stay in the same state
+	  */
+	 {IP_VS_SCTP_S_ESTABLISHED /* IP_VS_SCTP_EVE_COOKIE_ACK_CLI */ },
+	 {IP_VS_SCTP_S_ESTABLISHED /* IP_VS_SCTP_EVE_COOKIE_ACK_SER */ },
+	 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_ABORT_CLI */ },
+	 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_ABORT_SER */ },
+	 /*
+	  * SHUTDOWN from the client, move to SHUDDOWN_CLI
+	  */
+	 {IP_VS_SCTP_S_SHUT_CLI /* IP_VS_SCTP_EVE_SHUT_CLI */ },
+	 /*
+	  * SHUTDOWN from the server, move to SHUTDOWN_SER
+	  */
+	 {IP_VS_SCTP_S_SHUT_SER /* IP_VS_SCTP_EVE_SHUT_SER */ },
+	 /*
+	  * client sent SHUDTDOWN_ACK, this should not happen, let's close
+	  * the connection
+	  */
+	 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_ACK_CLI */ },
+	 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_ACK_SER */ },
+	 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_COM_CLI */ },
+	 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_COM_SER */ }
+	 },
+	/*
+	 * State : IP_VS_SCTP_S_SHUT_CLI
+	 * SHUTDOWN sent from the client, waitinf for SHUT ACK from the server
+	 */
+	/*
+	 * We recieved the data chuck, keep the state unchanged. I assume
+	 * that still data chuncks  can be received by both the peers in
+	 * SHUDOWN state
+	 */
+
+	{{IP_VS_SCTP_S_SHUT_CLI /* IP_VS_SCTP_EVE_DATA_CLI */ },
+	 {IP_VS_SCTP_S_SHUT_CLI /* IP_VS_SCTP_EVE_DATA_SER */ },
+	 /*
+	  * We have got an INIT from client. From the spec.“Upon receipt of
+	  * an INIT in the COOKIE-WAIT state, an endpoint MUST respond with
+	  * an INIT ACK using the same parameters it sent in its  original
+	  * INIT chunk (including its Initiate Tag, unchanged”).
+	  */
+	 {IP_VS_SCTP_S_INIT_CLI /* IP_VS_SCTP_EVE_INIT_CLI */ },
+	 {IP_VS_SCTP_S_INIT_SER /* IP_VS_SCTP_EVE_INIT_SER */ },
+	 /*
+	  * INIT_ACK sent by the server, Unexpected INIT ACK, spec says,
+	  * “If an INIT ACK is received by an endpoint in any state other
+	  * than the COOKIE-WAIT state, the endpoint should discard the
+	  * INIT ACK chunk”. Stay in the same state
+	  */
+	 {IP_VS_SCTP_S_SHUT_CLI /* IP_VS_SCTP_EVE_INIT_ACK_CLI */ },
+	 {IP_VS_SCTP_S_SHUT_CLI /* IP_VS_SCTP_EVE_INIT_ACK_SER */ },
+	 /*
+	  * Client sent ECHO, Spec(sec 5.2.4) says it may be handled by the
+	  * peer and peer shall move to the ESTABISHED. if it doesn't handle
+	  * it will send ERROR chunk. So, stay in the same state
+	  */
+	 {IP_VS_SCTP_S_ESTABLISHED /* IP_VS_SCTP_EVE_COOKIE_ECHO_CLI */ },
+	 {IP_VS_SCTP_S_ESTABLISHED /* IP_VS_SCTP_EVE_COOKIE_ECHO_SER */ },
+	 /*
+	  * COOKIE ACK from client, not sure what to do stay in the same state
+	  */
+	 {IP_VS_SCTP_S_SHUT_CLI /* IP_VS_SCTP_EVE_COOKIE_ACK_CLI */ },
+	 {IP_VS_SCTP_S_SHUT_CLI /* IP_VS_SCTP_EVE_COOKIE_ACK_SER */ },
+	 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_ABORT_CLI */ },
+	 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_ABORT_SER */ },
+	 /*
+	  * SHUTDOWN resent from the client, move to SHUDDOWN_CLI
+	  */
+	 {IP_VS_SCTP_S_SHUT_CLI /* IP_VS_SCTP_EVE_SHUT_CLI */ },
+	 /*
+	  * SHUTDOWN from the server, move to SHUTDOWN_SER
+	  */
+	 {IP_VS_SCTP_S_SHUT_SER /* IP_VS_SCTP_EVE_SHUT_SER */ },
+	 /*
+	  * client sent SHUDTDOWN_ACK, this should not happen, let's close
+	  * the connection
+	  */
+	 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_ACK_CLI */ },
+	 /*
+	  * Server sent SHUTDOWN ACK, this is what we are expecting, let's move
+	  * to SHUDOWN_ACK_SER
+	  */
+	 {IP_VS_SCTP_S_SHUT_ACK_SER /* IP_VS_SCTP_EVE_SHUT_ACK_SER */ },
+	 /*
+	  * SHUTDOWN COM from client, this should not happen, let's close the
+	  * connection
+	  */
+	 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_COM_CLI */ },
+	 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_COM_SER */ }
+	 },
+	/*
+	 * State : IP_VS_SCTP_S_SHUT_SER
+	 * SHUTDOWN sent from the server, waitinf for SHUTDOWN ACK from client
+	 */
+	/*
+	 * We recieved the data chuck, keep the state unchanged. I assume
+	 * that still data chuncks  can be received by both the peers in
+	 * SHUDOWN state
+	 */
+
+	{{IP_VS_SCTP_S_SHUT_SER /* IP_VS_SCTP_EVE_DATA_CLI */ },
+	 {IP_VS_SCTP_S_SHUT_SER /* IP_VS_SCTP_EVE_DATA_SER */ },
+	 /*
+	  * We have got an INIT from client. From the spec.“Upon receipt of
+	  * an INIT in the COOKIE-WAIT state, an endpoint MUST respond with
+	  * an INIT ACK using the same parameters it sent in its  original
+	  * INIT chunk (including its Initiate Tag, unchanged”).
+	  */
+	 {IP_VS_SCTP_S_INIT_CLI /* IP_VS_SCTP_EVE_INIT_CLI */ },
+	 {IP_VS_SCTP_S_INIT_SER /* IP_VS_SCTP_EVE_INIT_SER */ },
+	 /*
+	  * INIT_ACK sent by the server, Unexpected INIT ACK, spec says,
+	  * “If an INIT ACK is received by an endpoint in any state other
+	  * than the COOKIE-WAIT state, the endpoint should discard the
+	  * INIT ACK chunk”. Stay in the same state
+	  */
+	 {IP_VS_SCTP_S_SHUT_SER /* IP_VS_SCTP_EVE_INIT_ACK_CLI */ },
+	 {IP_VS_SCTP_S_SHUT_SER /* IP_VS_SCTP_EVE_INIT_ACK_SER */ },
+	 /*
+	  * Client sent ECHO, Spec(sec 5.2.4) says it may be handled by the
+	  * peer and peer shall move to the ESTABISHED. if it doesn't handle
+	  * it will send ERROR chunk. So, stay in the same state
+	  */
+	 {IP_VS_SCTP_S_ESTABLISHED /* IP_VS_SCTP_EVE_COOKIE_ECHO_CLI */ },
+	 {IP_VS_SCTP_S_ESTABLISHED /* IP_VS_SCTP_EVE_COOKIE_ECHO_SER */ },
+	 /*
+	  * COOKIE ACK from client, not sure what to do stay in the same state
+	  */
+	 {IP_VS_SCTP_S_SHUT_SER /* IP_VS_SCTP_EVE_COOKIE_ACK_CLI */ },
+	 {IP_VS_SCTP_S_SHUT_SER /* IP_VS_SCTP_EVE_COOKIE_ACK_SER */ },
+	 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_ABORT_CLI */ },
+	 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_ABORT_SER */ },
+	 /*
+	  * SHUTDOWN resent from the client, move to SHUDDOWN_CLI
+	  */
+	 {IP_VS_SCTP_S_SHUT_CLI /* IP_VS_SCTP_EVE_SHUT_CLI */ },
+	 /*
+	  * SHUTDOWN resent from the server, move to SHUTDOWN_SER
+	  */
+	 {IP_VS_SCTP_S_SHUT_SER /* IP_VS_SCTP_EVE_SHUT_SER */ },
+	 /*
+	  * client sent SHUDTDOWN_ACK, this is what we are expecting, let's
+	  * move to SHUT_ACK_CLI
+	  */
+	 {IP_VS_SCTP_S_SHUT_ACK_CLI /* IP_VS_SCTP_EVE_SHUT_ACK_CLI */ },
+	 /*
+	  * Server sent SHUTDOWN ACK, this should not happen, let's close the
+	  * connection
+	  */
+	 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_ACK_SER */ },
+	 /*
+	  * SHUTDOWN COM from client, this should not happen, let's close the
+	  * connection
+	  */
+	 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_COM_CLI */ },
+	 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_COM_SER */ }
+	 },
+
+	/*
+	 * State : IP_VS_SCTP_S_SHUT_ACK_CLI
+	 * SHUTDOWN ACK from the client, awaiting for SHUTDOWN COM from server
+	 */
+	/*
+	 * We recieved the data chuck, keep the state unchanged. I assume
+	 * that still data chuncks  can be received by both the peers in
+	 * SHUDOWN state
+	 */
+
+	{{IP_VS_SCTP_S_SHUT_ACK_CLI /* IP_VS_SCTP_EVE_DATA_CLI */ },
+	 {IP_VS_SCTP_S_SHUT_ACK_CLI /* IP_VS_SCTP_EVE_DATA_SER */ },
+	 /*
+	  * We have got an INIT from client. From the spec.“Upon receipt of
+	  * an INIT in the COOKIE-WAIT state, an endpoint MUST respond with
+	  * an INIT ACK using the same parameters it sent in its  original
+	  * INIT chunk (including its Initiate Tag, unchanged”).
+	  */
+	 {IP_VS_SCTP_S_INIT_CLI /* IP_VS_SCTP_EVE_INIT_CLI */ },
+	 {IP_VS_SCTP_S_INIT_SER /* IP_VS_SCTP_EVE_INIT_SER */ },
+	 /*
+	  * INIT_ACK sent by the server, Unexpected INIT ACK, spec says,
+	  * “If an INIT ACK is received by an endpoint in any state other
+	  * than the COOKIE-WAIT state, the endpoint should discard the
+	  * INIT ACK chunk”. Stay in the same state
+	  */
+	 {IP_VS_SCTP_S_SHUT_ACK_CLI /* IP_VS_SCTP_EVE_INIT_ACK_CLI */ },
+	 {IP_VS_SCTP_S_SHUT_ACK_CLI /* IP_VS_SCTP_EVE_INIT_ACK_SER */ },
+	 /*
+	  * Client sent ECHO, Spec(sec 5.2.4) says it may be handled by the
+	  * peer and peer shall move to the ESTABISHED. if it doesn't handle
+	  * it will send ERROR chunk. So, stay in the same state
+	  */
+	 {IP_VS_SCTP_S_ESTABLISHED /* IP_VS_SCTP_EVE_COOKIE_ECHO_CLI */ },
+	 {IP_VS_SCTP_S_ESTABLISHED /* IP_VS_SCTP_EVE_COOKIE_ECHO_SER */ },
+	 /*
+	  * COOKIE ACK from client, not sure what to do stay in the same state
+	  */
+	 {IP_VS_SCTP_S_SHUT_ACK_CLI /* IP_VS_SCTP_EVE_COOKIE_ACK_CLI */ },
+	 {IP_VS_SCTP_S_SHUT_ACK_CLI /* IP_VS_SCTP_EVE_COOKIE_ACK_SER */ },
+	 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_ABORT_CLI */ },
+	 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_ABORT_SER */ },
+	 /*
+	  * SHUTDOWN sent from the client, move to SHUDDOWN_CLI
+	  */
+	 {IP_VS_SCTP_S_SHUT_CLI /* IP_VS_SCTP_EVE_SHUT_CLI */ },
+	 /*
+	  * SHUTDOWN sent from the server, move to SHUTDOWN_SER
+	  */
+	 {IP_VS_SCTP_S_SHUT_SER /* IP_VS_SCTP_EVE_SHUT_SER */ },
+	 /*
+	  * client resent SHUDTDOWN_ACK, let's stay in the same state
+	  */
+	 {IP_VS_SCTP_S_SHUT_ACK_CLI /* IP_VS_SCTP_EVE_SHUT_ACK_CLI */ },
+	 /*
+	  * Server sent SHUTDOWN ACK, this should not happen, let's close the
+	  * connection
+	  */
+	 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_ACK_SER */ },
+	 /*
+	  * SHUTDOWN COM from client, this should not happen, let's close the
+	  * connection
+	  */
+	 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_COM_CLI */ },
+	 /*
+	  * SHUTDOWN COMPLETE from server this is what we are expecting.
+	  */
+	 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_COM_SER */ }
+	 },
+
+	/*
+	 * State : IP_VS_SCTP_S_SHUT_ACK_SER
+	 * SHUTDOWN ACK from the server, awaiting for SHUTDOWN COM from client
+	 */
+	/*
+	 * We recieved the data chuck, keep the state unchanged. I assume
+	 * that still data chuncks  can be received by both the peers in
+	 * SHUDOWN state
+	 */
+
+	{{IP_VS_SCTP_S_SHUT_ACK_SER /* IP_VS_SCTP_EVE_DATA_CLI */ },
+	 {IP_VS_SCTP_S_SHUT_ACK_SER /* IP_VS_SCTP_EVE_DATA_SER */ },
+	 /*
+	  * We have got an INIT from client. From the spec.“Upon receipt of
+	  * an INIT in the COOKIE-WAIT state, an endpoint MUST respond with
+	  * an INIT ACK using the same parameters it sent in its  original
+	  * INIT chunk (including its Initiate Tag, unchanged”).
+	  */
+	 {IP_VS_SCTP_S_INIT_CLI /* IP_VS_SCTP_EVE_INIT_CLI */ },
+	 {IP_VS_SCTP_S_INIT_SER /* IP_VS_SCTP_EVE_INIT_SER */ },
+	 /*
+	  * INIT_ACK sent by the server, Unexpected INIT ACK, spec says,
+	  * “If an INIT ACK is received by an endpoint in any state other
+	  * than the COOKIE-WAIT state, the endpoint should discard the
+	  * INIT ACK chunk”. Stay in the same state
+	  */
+	 {IP_VS_SCTP_S_SHUT_ACK_SER /* IP_VS_SCTP_EVE_INIT_ACK_CLI */ },
+	 {IP_VS_SCTP_S_SHUT_ACK_SER /* IP_VS_SCTP_EVE_INIT_ACK_SER */ },
+	 /*
+	  * Client sent ECHO, Spec(sec 5.2.4) says it may be handled by the
+	  * peer and peer shall move to the ESTABISHED. if it doesn't handle
+	  * it will send ERROR chunk. So, stay in the same state
+	  */
+	 {IP_VS_SCTP_S_ESTABLISHED /* IP_VS_SCTP_EVE_COOKIE_ECHO_CLI */ },
+	 {IP_VS_SCTP_S_ESTABLISHED /* IP_VS_SCTP_EVE_COOKIE_ECHO_SER */ },
+	 /*
+	  * COOKIE ACK from client, not sure what to do stay in the same state
+	  */
+	 {IP_VS_SCTP_S_SHUT_ACK_SER /* IP_VS_SCTP_EVE_COOKIE_ACK_CLI */ },
+	 {IP_VS_SCTP_S_SHUT_ACK_SER /* IP_VS_SCTP_EVE_COOKIE_ACK_SER */ },
+	 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_ABORT_CLI */ },
+	 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_ABORT_SER */ },
+	 /*
+	  * SHUTDOWN sent from the client, move to SHUDDOWN_CLI
+	  */
+	 {IP_VS_SCTP_S_SHUT_CLI /* IP_VS_SCTP_EVE_SHUT_CLI */ },
+	 /*
+	  * SHUTDOWN sent from the server, move to SHUTDOWN_SER
+	  */
+	 {IP_VS_SCTP_S_SHUT_SER /* IP_VS_SCTP_EVE_SHUT_SER */ },
+	 /*
+	  * client sent SHUDTDOWN_ACK, this should not happen let's close
+	  * the connection.
+	  */
+	 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_ACK_CLI */ },
+	 /*
+	  * Server resent SHUTDOWN ACK, stay in the same state
+	  */
+	 {IP_VS_SCTP_S_SHUT_ACK_SER /* IP_VS_SCTP_EVE_SHUT_ACK_SER */ },
+	 /*
+	  * SHUTDOWN COM from client, this what we are expecting, let's close
+	  * the connection
+	  */
+	 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_COM_CLI */ },
+	 /*
+	  * SHUTDOWN COMPLETE from server this should not happen.
+	  */
+	 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_COM_SER */ }
+	 },
+	/*
+	 * State : IP_VS_SCTP_S_CLOSED
+	 */
+	{{IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_DATA_CLI */ },
+	 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_DATA_SER */ },
+	 {IP_VS_SCTP_S_INIT_CLI /* IP_VS_SCTP_EVE_INIT_CLI */ },
+	 {IP_VS_SCTP_S_INIT_SER /* IP_VS_SCTP_EVE_INIT_SER */ },
+	 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_INIT_ACK_CLI */ },
+	 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_INIT_ACK_SER */ },
+	 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_COOKIE_ECHO_CLI */ },
+	 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_COOKIE_ECHO_SER */ },
+	 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_COOKIE_ACK_CLI */ },
+	 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_COOKIE_ACK_SER */ },
+	 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_ABORT_CLI */ },
+	 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_ABORT_SER */ },
+	 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_CLI */ },
+	 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_SER */ },
+	 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_ACK_CLI */ },
+	 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_ACK_SER */ },
+	 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_COM_CLI */ },
+	 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_COM_SER */ }
+	 }
+};
+
+/*
+ *      Timeout table[state]
+ */
+static int sctp_timeouts[IP_VS_SCTP_S_LAST + 1] = {
+	[IP_VS_SCTP_S_NONE]         =     2 * HZ,
+	[IP_VS_SCTP_S_INIT_CLI]     =     1 * 60 * HZ,
+	[IP_VS_SCTP_S_INIT_SER]     =     1 * 60 * HZ,
+	[IP_VS_SCTP_S_INIT_ACK_CLI] =     1 * 60 * HZ,
+	[IP_VS_SCTP_S_INIT_ACK_SER] =     1 * 60 * HZ,
+	[IP_VS_SCTP_S_ECHO_CLI]     =     1 * 60 * HZ,
+	[IP_VS_SCTP_S_ECHO_SER]     =     1 * 60 * HZ,
+	[IP_VS_SCTP_S_ESTABLISHED]  =    15 * 60 * HZ,
+	[IP_VS_SCTP_S_SHUT_CLI]     =     1 * 60 * HZ,
+	[IP_VS_SCTP_S_SHUT_SER]     =     1 * 60 * HZ,
+	[IP_VS_SCTP_S_SHUT_ACK_CLI] =     1 * 60 * HZ,
+	[IP_VS_SCTP_S_SHUT_ACK_SER] =     1 * 60 * HZ,
+	[IP_VS_SCTP_S_CLOSED]       =    10 * HZ,
+	[IP_VS_SCTP_S_LAST]         =     2 * HZ,
+};
+
+static const char *sctp_state_name_table[IP_VS_SCTP_S_LAST + 1] = {
+	[IP_VS_SCTP_S_NONE]         =    "NONE",
+	[IP_VS_SCTP_S_INIT_CLI]     =    "INIT_CLI",
+	[IP_VS_SCTP_S_INIT_SER]     =    "INIT_SER",
+	[IP_VS_SCTP_S_INIT_ACK_CLI] =    "INIT_ACK_CLI",
+	[IP_VS_SCTP_S_INIT_ACK_SER] =    "INIT_ACK_SER",
+	[IP_VS_SCTP_S_ECHO_CLI]     =    "COOKIE_ECHO_CLI",
+	[IP_VS_SCTP_S_ECHO_SER]     =    "COOKIE_ECHO_SER",
+	[IP_VS_SCTP_S_ESTABLISHED]  =    "ESTABISHED",
+	[IP_VS_SCTP_S_SHUT_CLI]     =    "SHUTDOWN_CLI",
+	[IP_VS_SCTP_S_SHUT_SER]     =    "SHUTDOWN_SER",
+	[IP_VS_SCTP_S_SHUT_ACK_CLI] =    "SHUTDOWN_ACK_CLI",
+	[IP_VS_SCTP_S_SHUT_ACK_SER] =    "SHUTDOWN_ACK_SER",
+	[IP_VS_SCTP_S_CLOSED]       =    "CLOSED",
+	[IP_VS_SCTP_S_LAST]         =    "BUG!"
+};
+
+
+static const char *sctp_state_name(int state)
+{
+	if (state >= IP_VS_SCTP_S_LAST)
+		return "ERR!";
+	if (sctp_state_name_table[state])
+		return sctp_state_name_table[state];
+	return "?";
+}
+
+static void sctp_timeout_change(struct ip_vs_protocol *pp, int flags)
+{
+}
+
+static int
+sctp_set_state_timeout(struct ip_vs_protocol *pp, char *sname, int to)
+{
+
+return ip_vs_set_state_timeout(pp->timeout_table, IP_VS_SCTP_S_LAST,
+				sctp_state_name_table, sname, to);
+}
+
+static inline int
+set_sctp_state(struct ip_vs_protocol *pp, struct ip_vs_conn *cp,
+		int direction, const struct sk_buff *skb)
+{
+	sctp_chunkhdr_t _sctpch, *sch;
+	unsigned char chunk_type;
+	int event, next_state;
+	int ihl;
+
+#ifdef CONFIG_IP_VS_IPV6
+	ihl = cp->af == AF_INET ? ip_hdrlen(skb) : sizeof(struct ipv6hdr);
+#else
+	ihl = ip_hdrlen(skb);
+#endif
+
+	sch = skb_header_pointer(skb, ihl + sizeof(sctp_sctphdr_t),
+				sizeof(_sctpch), &_sctpch);
+	if (sch == NULL)
+		return 0;
+
+	chunk_type = sch->type;
+	/*
+	 * Section 3: Multiple chunks can be bundled into one SCTP packet
+	 * up to the MTU size, except for the INIT, INIT ACK, and
+	 * SHUTDOWN COMPLETE chunks. These chunks MUST NOT be bundled with
+	 * any other chunk in a packet.
+	 *
+	 * Section 3.3.7: DATA chunks MUST NOT be bundled with ABORT. Control
+	 * chunks (except for INIT, INIT ACK, and SHUTDOWN COMPLETE) MAY be
+	 * bundled with an ABORT, but they MUST be placed before the ABORT
+	 * in the SCTP packet or they will be ignored by the receiver.
+	 */
+	if ((sch->type == SCTP_CID_COOKIE_ECHO) ||
+	    (sch->type == SCTP_CID_COOKIE_ACK)) {
+		sch = skb_header_pointer(skb, (ihl + sizeof(sctp_sctphdr_t) +
+				sch->length), sizeof(_sctpch), &_sctpch);
+		if (sch) {
+			if (sch->type == SCTP_CID_ABORT)
+				chunk_type = sch->type;
+		}
+	}
+
+	event = sctp_events[chunk_type];
+
+	/*
+	 *  If the direction is IP_VS_DIR_OUTPUT, this event is from server
+	 */
+	if (direction == IP_VS_DIR_OUTPUT)
+		event++;
+	/*
+	 * get next state
+	 */
+	next_state = sctp_states_table[cp->state][event].next_state;
+
+	if (next_state != cp->state) {
+		struct ip_vs_dest *dest = cp->dest;
+
+		IP_VS_DBG_BUF(8, "%s %s  %s:%d->"
+				"%s:%d state: %s->%s conn->refcnt:%d\n",
+				pp->name,
+				((direction == IP_VS_DIR_OUTPUT) ?
+				 "output " : "input "),
+				IP_VS_DBG_ADDR(cp->af, &cp->daddr),
+				ntohs(cp->dport),
+				IP_VS_DBG_ADDR(cp->af, &cp->caddr),
+				ntohs(cp->cport),
+				sctp_state_name(cp->state),
+				sctp_state_name(next_state),
+				atomic_read(&cp->refcnt));
+		if (dest) {
+			if (!(cp->flags & IP_VS_CONN_F_INACTIVE) &&
+				(next_state != IP_VS_SCTP_S_ESTABLISHED)) {
+				atomic_dec(&dest->activeconns);
+				atomic_inc(&dest->inactconns);
+				cp->flags |= IP_VS_CONN_F_INACTIVE;
+			} else if ((cp->flags & IP_VS_CONN_F_INACTIVE) &&
+				   (next_state == IP_VS_SCTP_S_ESTABLISHED)) {
+				atomic_inc(&dest->activeconns);
+				atomic_dec(&dest->inactconns);
+				cp->flags &= ~IP_VS_CONN_F_INACTIVE;
+			}
+		}
+	}
+
+	 cp->timeout = pp->timeout_table[cp->state = next_state];
+
+	 return 1;
+}
+
+static int
+sctp_state_transition(struct ip_vs_conn *cp, int direction,
+		const struct sk_buff *skb, struct ip_vs_protocol *pp)
+{
+	int ret = 0;
+
+	spin_lock(&cp->lock);
+	ret = set_sctp_state(pp, cp, direction, skb);
+	spin_unlock(&cp->lock);
+
+	return ret;
+}
+
+/*
+ *      Hash table for SCTP application incarnations
+ */
+#define SCTP_APP_TAB_BITS        4
+#define SCTP_APP_TAB_SIZE        (1 << SCTP_APP_TAB_BITS)
+#define SCTP_APP_TAB_MASK        (SCTP_APP_TAB_SIZE - 1)
+
+static struct list_head sctp_apps[SCTP_APP_TAB_SIZE];
+static DEFINE_SPINLOCK(sctp_app_lock);
+
+static inline __u16 sctp_app_hashkey(__be16 port)
+{
+	return (((__force u16)port >> SCTP_APP_TAB_BITS) ^ (__force u16)port)
+		& SCTP_APP_TAB_MASK;
+}
+
+static int sctp_register_app(struct ip_vs_app *inc)
+{
+	struct ip_vs_app *i;
+	__u16 hash;
+	__be16 port = inc->port;
+	int ret = 0;
+
+	hash = sctp_app_hashkey(port);
+
+	spin_lock_bh(&sctp_app_lock);
+	list_for_each_entry(i, &sctp_apps[hash], p_list) {
+		if (i->port == port) {
+			ret = -EEXIST;
+			goto out;
+		}
+	}
+	list_add(&inc->p_list, &sctp_apps[hash]);
+	atomic_inc(&ip_vs_protocol_sctp.appcnt);
+out:
+	spin_unlock_bh(&sctp_app_lock);
+
+	return ret;
+}
+
+static void sctp_unregister_app(struct ip_vs_app *inc)
+{
+	spin_lock_bh(&sctp_app_lock);
+	atomic_dec(&ip_vs_protocol_sctp.appcnt);
+	list_del(&inc->p_list);
+	spin_unlock_bh(&sctp_app_lock);
+}
+
+static int sctp_app_conn_bind(struct ip_vs_conn *cp)
+{
+	int hash;
+	struct ip_vs_app *inc;
+	int result = 0;
+
+	/* Default binding: bind app only for NAT */
+	if (IP_VS_FWD_METHOD(cp) != IP_VS_CONN_F_MASQ)
+		return 0;
+	/* Lookup application incarnations and bind the right one */
+	hash = sctp_app_hashkey(cp->vport);
+
+	spin_lock(&sctp_app_lock);
+	list_for_each_entry(inc, &sctp_apps[hash], p_list) {
+		if (inc->port == cp->vport) {
+			if (unlikely(!ip_vs_app_inc_get(inc)))
+				break;
+			spin_unlock(&sctp_app_lock);
+
+			IP_VS_DBG_BUF(9, "%s: Binding conn %s:%u->"
+					"%s:%u to app %s on port %u\n",
+					__func__,
+					IP_VS_DBG_ADDR(cp->af, &cp->caddr),
+					ntohs(cp->cport),
+					IP_VS_DBG_ADDR(cp->af, &cp->vaddr),
+					ntohs(cp->vport),
+					inc->name, ntohs(inc->port));
+			cp->app = inc;
+			if (inc->init_conn)
+				result = inc->init_conn(inc, cp);
+			goto out;
+		}
+	}
+	spin_unlock(&sctp_app_lock);
+out:
+	return result;
+}
+
+static void ip_vs_sctp_init(struct ip_vs_protocol *pp)
+{
+	IP_VS_INIT_HASH_TABLE(sctp_apps);
+	pp->timeout_table = sctp_timeouts;
+}
+
+
+static void ip_vs_sctp_exit(struct ip_vs_protocol *pp)
+{
+
+}
+
+struct ip_vs_protocol ip_vs_protocol_sctp = {
+	.name = "SCTP",
+	.protocol = IPPROTO_SCTP,
+	.num_states = IP_VS_SCTP_S_LAST,
+	.dont_defrag = 0,
+	.appcnt = ATOMIC_INIT(0),
+	.init = ip_vs_sctp_init,
+	.exit = ip_vs_sctp_exit,
+	.register_app = sctp_register_app,
+	.unregister_app = sctp_unregister_app,
+	.conn_schedule = sctp_conn_schedule,
+	.conn_in_get = sctp_conn_in_get,
+	.conn_out_get = sctp_conn_out_get,
+	.snat_handler = sctp_snat_handler,
+	.dnat_handler = sctp_dnat_handler,
+	.csum_check = sctp_csum_check,
+	.state_name = sctp_state_name,
+	.state_transition = sctp_state_transition,
+	.app_conn_bind = sctp_app_conn_bind,
+	.debug_packet = ip_vs_tcpudp_debug_packet,
+	.timeout_change = sctp_timeout_change,
+	.set_state_timeout = sctp_set_state_timeout,
+};
diff --git a/net/netfilter/ipvs/ip_vs_sync.c b/net/netfilter/ipvs/ip_vs_sync.c
index e177f0dc2084..8fb0ae616761 100644
--- a/net/netfilter/ipvs/ip_vs_sync.c
+++ b/net/netfilter/ipvs/ip_vs_sync.c
@@ -400,6 +400,11 @@ static void ip_vs_process_message(const char *buffer, const size_t buflen)
 					flags |= IP_VS_CONN_F_INACTIVE;
 				else
 					flags &= ~IP_VS_CONN_F_INACTIVE;
+			} else if (s->protocol == IPPROTO_SCTP) {
+				if (state != IP_VS_SCTP_S_ESTABLISHED)
+					flags |= IP_VS_CONN_F_INACTIVE;
+				else
+					flags &= ~IP_VS_CONN_F_INACTIVE;
 			}
 			cp = ip_vs_conn_new(AF_INET, s->protocol,
 					    (union nf_inet_addr *)&s->caddr,
@@ -434,6 +439,15 @@ static void ip_vs_process_message(const char *buffer, const size_t buflen)
 				atomic_dec(&dest->inactconns);
 				cp->flags &= ~IP_VS_CONN_F_INACTIVE;
 			}
+		} else if ((cp->dest) && (cp->protocol == IPPROTO_SCTP) &&
+			   (cp->state != state)) {
+			dest = cp->dest;
+			if (!(cp->flags & IP_VS_CONN_F_INACTIVE) &&
+			     (state != IP_VS_SCTP_S_ESTABLISHED)) {
+			    atomic_dec(&dest->activeconns);
+			    atomic_inc(&dest->inactconns);
+			    cp->flags &= ~IP_VS_CONN_F_INACTIVE;
+			}
 		}
 
 		if (opt)
-- 
cgit v1.2.2


From 37ee3d5b3e979a168536e7e2f15bd1e769cb4122 Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Thu, 18 Feb 2010 19:04:44 +0100
Subject: netfilter: nf_defrag_ipv4: fix compilation error with NF_CONNTRACK=n

As reported by Randy Dunlap <randy.dunlap@oracle.com>, compilation
of nf_defrag_ipv4 fails with:

include/net/netfilter/nf_conntrack.h:94: error: field 'ct_general' has incomplete type
include/net/netfilter/nf_conntrack.h:178: error: 'const struct sk_buff' has no member named 'nfct'
include/net/netfilter/nf_conntrack.h:185: error: implicit declaration of function 'nf_conntrack_put'
include/net/netfilter/nf_conntrack.h:294: error: 'const struct sk_buff' has no member named 'nfct'
net/ipv4/netfilter/nf_defrag_ipv4.c:45: error: 'struct sk_buff' has no member named 'nfct'
net/ipv4/netfilter/nf_defrag_ipv4.c:46: error: 'struct sk_buff' has no member named 'nfct'

net/nf_conntrack.h must not be included with NF_CONNTRACK=n, add a
few #ifdefs. Long term the header file should be fixed to be usable
even with NF_CONNTRACK=n.

Tested-by: Randy Dunlap <randy.dunlap@oracle.com>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 net/ipv4/netfilter/nf_defrag_ipv4.c | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/ipv4/netfilter/nf_defrag_ipv4.c b/net/ipv4/netfilter/nf_defrag_ipv4.c
index d498a704d456..cb763ae9ed90 100644
--- a/net/ipv4/netfilter/nf_defrag_ipv4.c
+++ b/net/ipv4/netfilter/nf_defrag_ipv4.c
@@ -16,9 +16,11 @@
 
 #include <linux/netfilter_bridge.h>
 #include <linux/netfilter_ipv4.h>
-#include <net/netfilter/nf_conntrack_zones.h>
 #include <net/netfilter/ipv4/nf_defrag_ipv4.h>
+#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
 #include <net/netfilter/nf_conntrack.h>
+#endif
+#include <net/netfilter/nf_conntrack_zones.h>
 
 /* Returns new sk_buff, or NULL */
 static int nf_ct_ipv4_gather_frags(struct sk_buff *skb, u_int32_t user)
@@ -42,8 +44,10 @@ static enum ip_defrag_users nf_ct_defrag_user(unsigned int hooknum,
 {
 	u16 zone = NF_CT_DEFAULT_ZONE;
 
+#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
 	if (skb->nfct)
 		zone = nf_ct_zone((struct nf_conn *)skb->nfct);
+#endif
 
 #ifdef CONFIG_BRIDGE_NETFILTER
 	if (skb->nf_bridge &&
-- 
cgit v1.2.2


From 663717f65c075eb4c6da7a123041295bd5295cc0 Mon Sep 17 00:00:00 2001
From: Stephen Hemminger <shemminger@vyatta.com>
Date: Thu, 18 Feb 2010 14:12:06 -0800
Subject: AF_UNIX: update locking comment

The lock used in unix_state_lock() is a spin_lock not reader-writer.

Signed-off-by: Stephen Hemminger <shemminger@vyatta.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/unix/af_unix.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index 9bc9b92bc099..3d9122e78f41 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -144,7 +144,7 @@ static inline void unix_set_secdata(struct scm_cookie *scm, struct sk_buff *skb)
 /*
  *  SMP locking strategy:
  *    hash table is protected with spinlock unix_table_lock
- *    each socket state is protected by separate rwlock.
+ *    each socket state is protected by separate spin lock.
  */
 
 static inline unsigned unix_hash_fold(__wsum n)
-- 
cgit v1.2.2


From bc417d99bf279f034474bc2d7dedd390838a5480 Mon Sep 17 00:00:00 2001
From: Alexey Dobriyan <adobriyan@gmail.com>
Date: Thu, 18 Feb 2010 08:12:20 +0000
Subject: ipv6: remove stale MIB definitions

ICMP6 MIB statistics was per-netns for quite a time.

Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/icmp.c | 5 -----
 1 file changed, 5 deletions(-)

(limited to 'net')

diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c
index 217dbc2e28d4..9a37379d741a 100644
--- a/net/ipv6/icmp.c
+++ b/net/ipv6/icmp.c
@@ -67,11 +67,6 @@
 #include <asm/uaccess.h>
 #include <asm/system.h>
 
-DEFINE_SNMP_STAT(struct icmpv6_mib, icmpv6_statistics) __read_mostly;
-EXPORT_SYMBOL(icmpv6_statistics);
-DEFINE_SNMP_STAT(struct icmpv6msg_mib, icmpv6msg_statistics) __read_mostly;
-EXPORT_SYMBOL(icmpv6msg_statistics);
-
 /*
  *	The ICMP socket(s). This is the most convenient way to flow control
  *	our ICMP output as well as maintain a clean interface throughout
-- 
cgit v1.2.2


From bbef49daca35d4fd21bf606a10b6980f17d9df5d Mon Sep 17 00:00:00 2001
From: Alexey Dobriyan <adobriyan@gmail.com>
Date: Thu, 18 Feb 2010 08:13:30 +0000
Subject: ipv6: use standard lists for FIB walks

Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/ip6_fib.c | 17 ++++-------------
 1 file changed, 4 insertions(+), 13 deletions(-)

(limited to 'net')

diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c
index 77e122f53ea6..2f9847924fa5 100644
--- a/net/ipv6/ip6_fib.c
+++ b/net/ipv6/ip6_fib.c
@@ -93,29 +93,20 @@ static __u32 rt_sernum;
 
 static void fib6_gc_timer_cb(unsigned long arg);
 
-static struct fib6_walker_t fib6_walker_list = {
-	.prev	= &fib6_walker_list,
-	.next	= &fib6_walker_list,
-};
-
-#define FOR_WALKERS(w) for ((w)=fib6_walker_list.next; (w) != &fib6_walker_list; (w)=(w)->next)
+static LIST_HEAD(fib6_walkers);
+#define FOR_WALKERS(w) list_for_each_entry(w, &fib6_walkers, lh)
 
 static inline void fib6_walker_link(struct fib6_walker_t *w)
 {
 	write_lock_bh(&fib6_walker_lock);
-	w->next = fib6_walker_list.next;
-	w->prev = &fib6_walker_list;
-	w->next->prev = w;
-	w->prev->next = w;
+	list_add(&w->lh, &fib6_walkers);
 	write_unlock_bh(&fib6_walker_lock);
 }
 
 static inline void fib6_walker_unlink(struct fib6_walker_t *w)
 {
 	write_lock_bh(&fib6_walker_lock);
-	w->next->prev = w->prev;
-	w->prev->next = w->next;
-	w->prev = w->next = w;
+	list_del(&w->lh);
 	write_unlock_bh(&fib6_walker_lock);
 }
 static __inline__ u32 fib6_new_sernum(void)
-- 
cgit v1.2.2


From 3ffe533c87281b68d469b279ff3a5056f9c75862 Mon Sep 17 00:00:00 2001
From: Alexey Dobriyan <adobriyan@gmail.com>
Date: Thu, 18 Feb 2010 08:25:24 +0000
Subject: ipv6: drop unused "dev" arg of icmpv6_send()

Dunno, what was the idea, it wasn't used for a long time.

Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/ip_gre.c                |  2 +-
 net/ipv6/exthdrs.c               |  2 +-
 net/ipv6/icmp.c                  |  5 ++---
 net/ipv6/ip6_input.c             |  3 +--
 net/ipv6/ip6_output.c            | 11 +++++------
 net/ipv6/ip6_tunnel.c            |  6 +++---
 net/ipv6/mip6.c                  |  2 +-
 net/ipv6/netfilter/ip6t_REJECT.c |  2 +-
 net/ipv6/reassembly.c            |  2 +-
 net/ipv6/route.c                 |  4 ++--
 net/ipv6/sit.c                   |  2 +-
 net/ipv6/tunnel6.c               |  4 ++--
 net/ipv6/udp.c                   |  5 ++---
 net/ipv6/xfrm6_output.c          |  2 +-
 net/netfilter/ipvs/ip_vs_core.c  |  5 ++---
 net/netfilter/ipvs/ip_vs_xmit.c  | 10 +++++-----
 16 files changed, 31 insertions(+), 36 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index a2a5983dbf03..c0c5274d0271 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -793,7 +793,7 @@ static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev
 		}
 
 		if (mtu >= IPV6_MIN_MTU && mtu < skb->len - tunnel->hlen + gre_hlen) {
-			icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, dev);
+			icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
 			ip_rt_put(rt);
 			goto tx_error;
 		}
diff --git a/net/ipv6/exthdrs.c b/net/ipv6/exthdrs.c
index 4bac362b1335..074f2c084f9f 100644
--- a/net/ipv6/exthdrs.c
+++ b/net/ipv6/exthdrs.c
@@ -481,7 +481,7 @@ looped_back:
 			IP6_INC_STATS_BH(net, ip6_dst_idev(skb_dst(skb)),
 					 IPSTATS_MIB_INHDRERRORS);
 			icmpv6_send(skb, ICMPV6_TIME_EXCEED, ICMPV6_EXC_HOPLIMIT,
-				    0, skb->dev);
+				    0);
 			kfree_skb(skb);
 			return -1;
 		}
diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c
index 9a37379d741a..eb9abe24bdf0 100644
--- a/net/ipv6/icmp.c
+++ b/net/ipv6/icmp.c
@@ -114,7 +114,7 @@ static __inline__ void icmpv6_xmit_unlock(struct sock *sk)
  */
 void icmpv6_param_prob(struct sk_buff *skb, u8 code, int pos)
 {
-	icmpv6_send(skb, ICMPV6_PARAMPROB, code, pos, skb->dev);
+	icmpv6_send(skb, ICMPV6_PARAMPROB, code, pos);
 	kfree_skb(skb);
 }
 
@@ -300,8 +300,7 @@ static inline void mip6_addr_swap(struct sk_buff *skb) {}
 /*
  *	Send an ICMP message in response to a packet in error
  */
-void icmpv6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info,
-		 struct net_device *dev)
+void icmpv6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info)
 {
 	struct net *net = dev_net(skb->dev);
 	struct inet6_dev *idev = NULL;
diff --git a/net/ipv6/ip6_input.c b/net/ipv6/ip6_input.c
index 237e2dba6e94..e28f9203deca 100644
--- a/net/ipv6/ip6_input.c
+++ b/net/ipv6/ip6_input.c
@@ -216,8 +216,7 @@ resubmit:
 				IP6_INC_STATS_BH(net, idev,
 						 IPSTATS_MIB_INUNKNOWNPROTOS);
 				icmpv6_send(skb, ICMPV6_PARAMPROB,
-					    ICMPV6_UNK_NEXTHDR, nhoff,
-					    skb->dev);
+					    ICMPV6_UNK_NEXTHDR, nhoff);
 			}
 		} else
 			IP6_INC_STATS_BH(net, idev, IPSTATS_MIB_INDELIVERS);
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index eb6d09728633..1a5fe9ad1947 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -267,7 +267,7 @@ int ip6_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl,
 	if (net_ratelimit())
 		printk(KERN_DEBUG "IPv6: sending pkt_too_big to self\n");
 	skb->dev = dst->dev;
-	icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, skb->dev);
+	icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
 	IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_FRAGFAILS);
 	kfree_skb(skb);
 	return -EMSGSIZE;
@@ -441,8 +441,7 @@ int ip6_forward(struct sk_buff *skb)
 	if (hdr->hop_limit <= 1) {
 		/* Force OUTPUT device used as source address */
 		skb->dev = dst->dev;
-		icmpv6_send(skb, ICMPV6_TIME_EXCEED, ICMPV6_EXC_HOPLIMIT,
-			    0, skb->dev);
+		icmpv6_send(skb, ICMPV6_TIME_EXCEED, ICMPV6_EXC_HOPLIMIT, 0);
 		IP6_INC_STATS_BH(net,
 				 ip6_dst_idev(dst), IPSTATS_MIB_INHDRERRORS);
 
@@ -504,7 +503,7 @@ int ip6_forward(struct sk_buff *skb)
 			goto error;
 		if (addrtype & IPV6_ADDR_LINKLOCAL) {
 			icmpv6_send(skb, ICMPV6_DEST_UNREACH,
-				ICMPV6_NOT_NEIGHBOUR, 0, skb->dev);
+				    ICMPV6_NOT_NEIGHBOUR, 0);
 			goto error;
 		}
 	}
@@ -512,7 +511,7 @@ int ip6_forward(struct sk_buff *skb)
 	if (skb->len > dst_mtu(dst)) {
 		/* Again, force OUTPUT device used as source address */
 		skb->dev = dst->dev;
-		icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, dst_mtu(dst), skb->dev);
+		icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, dst_mtu(dst));
 		IP6_INC_STATS_BH(net,
 				 ip6_dst_idev(dst), IPSTATS_MIB_INTOOBIGERRORS);
 		IP6_INC_STATS_BH(net,
@@ -627,7 +626,7 @@ static int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *))
 	 */
 	if (!skb->local_df) {
 		skb->dev = skb_dst(skb)->dev;
-		icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, skb->dev);
+		icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
 		IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
 			      IPSTATS_MIB_FRAGFAILS);
 		kfree_skb(skb);
diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c
index 9b02492d8706..138980eec214 100644
--- a/net/ipv6/ip6_tunnel.c
+++ b/net/ipv6/ip6_tunnel.c
@@ -622,7 +622,7 @@ ip6ip6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
 		if (rt && rt->rt6i_dev)
 			skb2->dev = rt->rt6i_dev;
 
-		icmpv6_send(skb2, rel_type, rel_code, rel_info, skb2->dev);
+		icmpv6_send(skb2, rel_type, rel_code, rel_info);
 
 		if (rt)
 			dst_release(&rt->u.dst);
@@ -1014,7 +1014,7 @@ ip6ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev)
 		tel = (struct ipv6_tlv_tnl_enc_lim *)&skb_network_header(skb)[offset];
 		if (tel->encap_limit == 0) {
 			icmpv6_send(skb, ICMPV6_PARAMPROB,
-				    ICMPV6_HDR_FIELD, offset + 2, skb->dev);
+				    ICMPV6_HDR_FIELD, offset + 2);
 			return -1;
 		}
 		encap_limit = tel->encap_limit - 1;
@@ -1033,7 +1033,7 @@ ip6ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev)
 	err = ip6_tnl_xmit2(skb, dev, dsfield, &fl, encap_limit, &mtu);
 	if (err != 0) {
 		if (err == -EMSGSIZE)
-			icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, dev);
+			icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
 		return -1;
 	}
 
diff --git a/net/ipv6/mip6.c b/net/ipv6/mip6.c
index f797e8c6f3b3..2794b6002836 100644
--- a/net/ipv6/mip6.c
+++ b/net/ipv6/mip6.c
@@ -56,7 +56,7 @@ static inline void *mip6_padn(__u8 *data, __u8 padlen)
 
 static inline void mip6_param_prob(struct sk_buff *skb, u8 code, int pos)
 {
-	icmpv6_send(skb, ICMPV6_PARAMPROB, code, pos, skb->dev);
+	icmpv6_send(skb, ICMPV6_PARAMPROB, code, pos);
 }
 
 static int mip6_mh_len(int type)
diff --git a/net/ipv6/netfilter/ip6t_REJECT.c b/net/ipv6/netfilter/ip6t_REJECT.c
index 8311ca31816a..dd8afbaf00a8 100644
--- a/net/ipv6/netfilter/ip6t_REJECT.c
+++ b/net/ipv6/netfilter/ip6t_REJECT.c
@@ -169,7 +169,7 @@ send_unreach(struct net *net, struct sk_buff *skb_in, unsigned char code,
 	if (hooknum == NF_INET_LOCAL_OUT && skb_in->dev == NULL)
 		skb_in->dev = net->loopback_dev;
 
-	icmpv6_send(skb_in, ICMPV6_DEST_UNREACH, code, 0, NULL);
+	icmpv6_send(skb_in, ICMPV6_DEST_UNREACH, code, 0);
 }
 
 static unsigned int
diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c
index b2847ed6a7d9..a555156e9779 100644
--- a/net/ipv6/reassembly.c
+++ b/net/ipv6/reassembly.c
@@ -228,7 +228,7 @@ static void ip6_frag_expire(unsigned long data)
 	   pointer directly, device might already disappeared.
 	 */
 	fq->q.fragments->dev = dev;
-	icmpv6_send(fq->q.fragments, ICMPV6_TIME_EXCEED, ICMPV6_EXC_FRAGTIME, 0, dev);
+	icmpv6_send(fq->q.fragments, ICMPV6_TIME_EXCEED, ICMPV6_EXC_FRAGTIME, 0);
 out_rcu_unlock:
 	rcu_read_unlock();
 out:
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 8500156f2637..88c0a5c49ae8 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -909,7 +909,7 @@ static void ip6_link_failure(struct sk_buff *skb)
 {
 	struct rt6_info *rt;
 
-	icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0, skb->dev);
+	icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0);
 
 	rt = (struct rt6_info *) skb_dst(skb);
 	if (rt) {
@@ -1884,7 +1884,7 @@ static int ip6_pkt_drop(struct sk_buff *skb, u8 code, int ipstats_mib_noroutes)
 			      ipstats_mib_noroutes);
 		break;
 	}
-	icmpv6_send(skb, ICMPV6_DEST_UNREACH, code, 0, skb->dev);
+	icmpv6_send(skb, ICMPV6_DEST_UNREACH, code, 0);
 	kfree_skb(skb);
 	return 0;
 }
diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c
index 96eb2d4641c4..b1eea811be48 100644
--- a/net/ipv6/sit.c
+++ b/net/ipv6/sit.c
@@ -743,7 +743,7 @@ static netdev_tx_t ipip6_tunnel_xmit(struct sk_buff *skb,
 			skb_dst(skb)->ops->update_pmtu(skb_dst(skb), mtu);
 
 		if (skb->len > mtu) {
-			icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, dev);
+			icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
 			ip_rt_put(rt);
 			goto tx_error;
 		}
diff --git a/net/ipv6/tunnel6.c b/net/ipv6/tunnel6.c
index 51e2832d13a6..e17bc1dfc1a4 100644
--- a/net/ipv6/tunnel6.c
+++ b/net/ipv6/tunnel6.c
@@ -98,7 +98,7 @@ static int tunnel6_rcv(struct sk_buff *skb)
 		if (!handler->handler(skb))
 			return 0;
 
-	icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_PORT_UNREACH, 0, skb->dev);
+	icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_PORT_UNREACH, 0);
 
 drop:
 	kfree_skb(skb);
@@ -116,7 +116,7 @@ static int tunnel46_rcv(struct sk_buff *skb)
 		if (!handler->handler(skb))
 			return 0;
 
-	icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_PORT_UNREACH, 0, skb->dev);
+	icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_PORT_UNREACH, 0);
 
 drop:
 	kfree_skb(skb);
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index a7af9d68cd6c..52b8347ae3b2 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -680,12 +680,11 @@ static inline int udp6_csum_init(struct sk_buff *skb, struct udphdr *uh,
 int __udp6_lib_rcv(struct sk_buff *skb, struct udp_table *udptable,
 		   int proto)
 {
+	struct net *net = dev_net(skb->dev);
 	struct sock *sk;
 	struct udphdr *uh;
-	struct net_device *dev = skb->dev;
 	struct in6_addr *saddr, *daddr;
 	u32 ulen = 0;
-	struct net *net = dev_net(skb->dev);
 
 	if (!pskb_may_pull(skb, sizeof(struct udphdr)))
 		goto short_packet;
@@ -744,7 +743,7 @@ int __udp6_lib_rcv(struct sk_buff *skb, struct udp_table *udptable,
 		UDP6_INC_STATS_BH(net, UDP_MIB_NOPORTS,
 				proto == IPPROTO_UDPLITE);
 
-		icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_PORT_UNREACH, 0, dev);
+		icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_PORT_UNREACH, 0);
 
 		kfree_skb(skb);
 		return 0;
diff --git a/net/ipv6/xfrm6_output.c b/net/ipv6/xfrm6_output.c
index c4f4eef032a3..0c92112dcba3 100644
--- a/net/ipv6/xfrm6_output.c
+++ b/net/ipv6/xfrm6_output.c
@@ -38,7 +38,7 @@ static int xfrm6_tunnel_check_size(struct sk_buff *skb)
 
 	if (!skb->local_df && skb->len > mtu) {
 		skb->dev = dst->dev;
-		icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, skb->dev);
+		icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
 		ret = -EMSGSIZE;
 	}
 
diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c
index 72e96d823ebf..44590887a92c 100644
--- a/net/netfilter/ipvs/ip_vs_core.c
+++ b/net/netfilter/ipvs/ip_vs_core.c
@@ -515,8 +515,7 @@ int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb,
 	 */
 #ifdef CONFIG_IP_VS_IPV6
 	if (svc->af == AF_INET6)
-		icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_PORT_UNREACH, 0,
-			    skb->dev);
+		icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_PORT_UNREACH, 0);
 	else
 #endif
 		icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0);
@@ -1048,7 +1047,7 @@ ip_vs_out(unsigned int hooknum, struct sk_buff *skb,
 						icmpv6_send(skb,
 							    ICMPV6_DEST_UNREACH,
 							    ICMPV6_PORT_UNREACH,
-							    0, skb->dev);
+							    0);
 					else
 #endif
 						icmp_send(skb,
diff --git a/net/netfilter/ipvs/ip_vs_xmit.c b/net/netfilter/ipvs/ip_vs_xmit.c
index 30b3189bd29c..223b5018c7dc 100644
--- a/net/netfilter/ipvs/ip_vs_xmit.c
+++ b/net/netfilter/ipvs/ip_vs_xmit.c
@@ -311,7 +311,7 @@ ip_vs_bypass_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
 	mtu = dst_mtu(&rt->u.dst);
 	if (skb->len > mtu) {
 		dst_release(&rt->u.dst);
-		icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, skb->dev);
+		icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
 		IP_VS_DBG_RL("%s(): frag needed\n", __func__);
 		goto tx_error;
 	}
@@ -454,7 +454,7 @@ ip_vs_nat_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
 	mtu = dst_mtu(&rt->u.dst);
 	if (skb->len > mtu) {
 		dst_release(&rt->u.dst);
-		icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, skb->dev);
+		icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
 		IP_VS_DBG_RL_PKT(0, pp, skb, 0,
 				 "ip_vs_nat_xmit_v6(): frag needed for");
 		goto tx_error;
@@ -672,7 +672,7 @@ ip_vs_tunnel_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
 		skb_dst(skb)->ops->update_pmtu(skb_dst(skb), mtu);
 
 	if (mtu < ntohs(old_iph->payload_len) + sizeof(struct ipv6hdr)) {
-		icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, skb->dev);
+		icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
 		dst_release(&rt->u.dst);
 		IP_VS_DBG_RL("%s(): frag needed\n", __func__);
 		goto tx_error;
@@ -814,7 +814,7 @@ ip_vs_dr_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
 	/* MTU checking */
 	mtu = dst_mtu(&rt->u.dst);
 	if (skb->len > mtu) {
-		icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, skb->dev);
+		icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
 		dst_release(&rt->u.dst);
 		IP_VS_DBG_RL("%s(): frag needed\n", __func__);
 		goto tx_error;
@@ -965,7 +965,7 @@ ip_vs_icmp_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
 	mtu = dst_mtu(&rt->u.dst);
 	if (skb->len > mtu) {
 		dst_release(&rt->u.dst);
-		icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, skb->dev);
+		icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
 		IP_VS_DBG_RL("%s(): frag needed\n", __func__);
 		goto tx_error;
 	}
-- 
cgit v1.2.2


From b54452b07a7b1b8cc1385edba3ef2ef6d4679d5a Mon Sep 17 00:00:00 2001
From: Alexey Dobriyan <adobriyan@gmail.com>
Date: Thu, 18 Feb 2010 08:14:31 +0000
Subject: const: struct nla_policy

Make remaining netlink policies as const.
Fixup coding style where needed.

Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/dcb/dcbnl.c          | 16 ++++++++--------
 net/irda/irnetlink.c     |  2 +-
 net/wimax/op-msg.c       |  3 +--
 net/wimax/op-reset.c     |  3 +--
 net/wimax/op-rfkill.c    |  3 +--
 net/wimax/op-state-get.c |  3 +--
 net/wimax/stack.c        |  3 +--
 net/wireless/nl80211.c   | 14 +++++---------
 8 files changed, 19 insertions(+), 28 deletions(-)

(limited to 'net')

diff --git a/net/dcb/dcbnl.c b/net/dcb/dcbnl.c
index db9f5b39388f..813e399220a7 100644
--- a/net/dcb/dcbnl.c
+++ b/net/dcb/dcbnl.c
@@ -54,7 +54,7 @@ MODULE_LICENSE("GPL");
 /**************** DCB attribute policies *************************************/
 
 /* DCB netlink attributes policy */
-static struct nla_policy dcbnl_rtnl_policy[DCB_ATTR_MAX + 1] = {
+static const struct nla_policy dcbnl_rtnl_policy[DCB_ATTR_MAX + 1] = {
 	[DCB_ATTR_IFNAME]      = {.type = NLA_NUL_STRING, .len = IFNAMSIZ - 1},
 	[DCB_ATTR_STATE]       = {.type = NLA_U8},
 	[DCB_ATTR_PFC_CFG]     = {.type = NLA_NESTED},
@@ -68,7 +68,7 @@ static struct nla_policy dcbnl_rtnl_policy[DCB_ATTR_MAX + 1] = {
 };
 
 /* DCB priority flow control to User Priority nested attributes */
-static struct nla_policy dcbnl_pfc_up_nest[DCB_PFC_UP_ATTR_MAX + 1] = {
+static const struct nla_policy dcbnl_pfc_up_nest[DCB_PFC_UP_ATTR_MAX + 1] = {
 	[DCB_PFC_UP_ATTR_0]   = {.type = NLA_U8},
 	[DCB_PFC_UP_ATTR_1]   = {.type = NLA_U8},
 	[DCB_PFC_UP_ATTR_2]   = {.type = NLA_U8},
@@ -81,7 +81,7 @@ static struct nla_policy dcbnl_pfc_up_nest[DCB_PFC_UP_ATTR_MAX + 1] = {
 };
 
 /* DCB priority grouping nested attributes */
-static struct nla_policy dcbnl_pg_nest[DCB_PG_ATTR_MAX + 1] = {
+static const struct nla_policy dcbnl_pg_nest[DCB_PG_ATTR_MAX + 1] = {
 	[DCB_PG_ATTR_TC_0]      = {.type = NLA_NESTED},
 	[DCB_PG_ATTR_TC_1]      = {.type = NLA_NESTED},
 	[DCB_PG_ATTR_TC_2]      = {.type = NLA_NESTED},
@@ -103,7 +103,7 @@ static struct nla_policy dcbnl_pg_nest[DCB_PG_ATTR_MAX + 1] = {
 };
 
 /* DCB traffic class nested attributes. */
-static struct nla_policy dcbnl_tc_param_nest[DCB_TC_ATTR_PARAM_MAX + 1] = {
+static const struct nla_policy dcbnl_tc_param_nest[DCB_TC_ATTR_PARAM_MAX + 1] = {
 	[DCB_TC_ATTR_PARAM_PGID]            = {.type = NLA_U8},
 	[DCB_TC_ATTR_PARAM_UP_MAPPING]      = {.type = NLA_U8},
 	[DCB_TC_ATTR_PARAM_STRICT_PRIO]     = {.type = NLA_U8},
@@ -112,7 +112,7 @@ static struct nla_policy dcbnl_tc_param_nest[DCB_TC_ATTR_PARAM_MAX + 1] = {
 };
 
 /* DCB capabilities nested attributes. */
-static struct nla_policy dcbnl_cap_nest[DCB_CAP_ATTR_MAX + 1] = {
+static const struct nla_policy dcbnl_cap_nest[DCB_CAP_ATTR_MAX + 1] = {
 	[DCB_CAP_ATTR_ALL]     = {.type = NLA_FLAG},
 	[DCB_CAP_ATTR_PG]      = {.type = NLA_U8},
 	[DCB_CAP_ATTR_PFC]     = {.type = NLA_U8},
@@ -124,14 +124,14 @@ static struct nla_policy dcbnl_cap_nest[DCB_CAP_ATTR_MAX + 1] = {
 };
 
 /* DCB capabilities nested attributes. */
-static struct nla_policy dcbnl_numtcs_nest[DCB_NUMTCS_ATTR_MAX + 1] = {
+static const struct nla_policy dcbnl_numtcs_nest[DCB_NUMTCS_ATTR_MAX + 1] = {
 	[DCB_NUMTCS_ATTR_ALL]     = {.type = NLA_FLAG},
 	[DCB_NUMTCS_ATTR_PG]      = {.type = NLA_U8},
 	[DCB_NUMTCS_ATTR_PFC]     = {.type = NLA_U8},
 };
 
 /* DCB BCN nested attributes. */
-static struct nla_policy dcbnl_bcn_nest[DCB_BCN_ATTR_MAX + 1] = {
+static const struct nla_policy dcbnl_bcn_nest[DCB_BCN_ATTR_MAX + 1] = {
 	[DCB_BCN_ATTR_RP_0]         = {.type = NLA_U8},
 	[DCB_BCN_ATTR_RP_1]         = {.type = NLA_U8},
 	[DCB_BCN_ATTR_RP_2]         = {.type = NLA_U8},
@@ -160,7 +160,7 @@ static struct nla_policy dcbnl_bcn_nest[DCB_BCN_ATTR_MAX + 1] = {
 };
 
 /* DCB APP nested attributes. */
-static struct nla_policy dcbnl_app_nest[DCB_APP_ATTR_MAX + 1] = {
+static const struct nla_policy dcbnl_app_nest[DCB_APP_ATTR_MAX + 1] = {
 	[DCB_APP_ATTR_IDTYPE]       = {.type = NLA_U8},
 	[DCB_APP_ATTR_ID]           = {.type = NLA_U16},
 	[DCB_APP_ATTR_PRIORITY]     = {.type = NLA_U8},
diff --git a/net/irda/irnetlink.c b/net/irda/irnetlink.c
index 476b307bd801..69b5b75f5431 100644
--- a/net/irda/irnetlink.c
+++ b/net/irda/irnetlink.c
@@ -124,7 +124,7 @@ static int irda_nl_get_mode(struct sk_buff *skb, struct genl_info *info)
 	return ret;
 }
 
-static struct nla_policy irda_nl_policy[IRDA_NL_ATTR_MAX + 1] = {
+static const struct nla_policy irda_nl_policy[IRDA_NL_ATTR_MAX + 1] = {
 	[IRDA_NL_ATTR_IFNAME] = { .type = NLA_NUL_STRING,
 				  .len = IFNAMSIZ-1 },
 	[IRDA_NL_ATTR_MODE] = { .type = NLA_U32 },
diff --git a/net/wimax/op-msg.c b/net/wimax/op-msg.c
index d3bfb6ef13ae..7718657e93dc 100644
--- a/net/wimax/op-msg.c
+++ b/net/wimax/op-msg.c
@@ -320,8 +320,7 @@ int wimax_msg(struct wimax_dev *wimax_dev, const char *pipe_name,
 EXPORT_SYMBOL_GPL(wimax_msg);
 
 
-static const
-struct nla_policy wimax_gnl_msg_policy[WIMAX_GNL_ATTR_MAX + 1] = {
+static const struct nla_policy wimax_gnl_msg_policy[WIMAX_GNL_ATTR_MAX + 1] = {
 	[WIMAX_GNL_MSG_IFIDX] = {
 		.type = NLA_U32,
 	},
diff --git a/net/wimax/op-reset.c b/net/wimax/op-reset.c
index 35f370091f4f..4dc82a54ba30 100644
--- a/net/wimax/op-reset.c
+++ b/net/wimax/op-reset.c
@@ -91,8 +91,7 @@ int wimax_reset(struct wimax_dev *wimax_dev)
 EXPORT_SYMBOL(wimax_reset);
 
 
-static const
-struct nla_policy wimax_gnl_reset_policy[WIMAX_GNL_ATTR_MAX + 1] = {
+static const struct nla_policy wimax_gnl_reset_policy[WIMAX_GNL_ATTR_MAX + 1] = {
 	[WIMAX_GNL_RESET_IFIDX] = {
 		.type = NLA_U32,
 	},
diff --git a/net/wimax/op-rfkill.c b/net/wimax/op-rfkill.c
index ae752a64d920..e978c7136c97 100644
--- a/net/wimax/op-rfkill.c
+++ b/net/wimax/op-rfkill.c
@@ -410,8 +410,7 @@ void wimax_rfkill_rm(struct wimax_dev *wimax_dev)
  * just query).
  */
 
-static const
-struct nla_policy wimax_gnl_rfkill_policy[WIMAX_GNL_ATTR_MAX + 1] = {
+static const struct nla_policy wimax_gnl_rfkill_policy[WIMAX_GNL_ATTR_MAX + 1] = {
 	[WIMAX_GNL_RFKILL_IFIDX] = {
 		.type = NLA_U32,
 	},
diff --git a/net/wimax/op-state-get.c b/net/wimax/op-state-get.c
index a76b8fcb056d..11ad3356eb56 100644
--- a/net/wimax/op-state-get.c
+++ b/net/wimax/op-state-get.c
@@ -33,8 +33,7 @@
 #include "debug-levels.h"
 
 
-static const
-struct nla_policy wimax_gnl_state_get_policy[WIMAX_GNL_ATTR_MAX + 1] = {
+static const struct nla_policy wimax_gnl_state_get_policy[WIMAX_GNL_ATTR_MAX + 1] = {
 	[WIMAX_GNL_STGET_IFIDX] = {
 		.type = NLA_U32,
 	},
diff --git a/net/wimax/stack.c b/net/wimax/stack.c
index c8866412f830..813e1eaea29b 100644
--- a/net/wimax/stack.c
+++ b/net/wimax/stack.c
@@ -75,8 +75,7 @@ MODULE_PARM_DESC(debug,
  * close to where the data is generated.
  */
 /*
-static const
-struct nla_policy wimax_gnl_re_status_change[WIMAX_GNL_ATTR_MAX + 1] = {
+static const struct nla_policy wimax_gnl_re_status_change[WIMAX_GNL_ATTR_MAX + 1] = {
 	[WIMAX_GNL_STCH_STATE_OLD] = { .type = NLA_U8 },
 	[WIMAX_GNL_STCH_STATE_NEW] = { .type = NLA_U8 },
 };
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index 5b79ecf17bea..a001ea32cb7d 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -58,7 +58,7 @@ static int get_rdev_dev_by_info_ifindex(struct genl_info *info,
 }
 
 /* policy for the attributes */
-static struct nla_policy nl80211_policy[NL80211_ATTR_MAX+1] __read_mostly = {
+static const struct nla_policy nl80211_policy[NL80211_ATTR_MAX+1] = {
 	[NL80211_ATTR_WIPHY] = { .type = NLA_U32 },
 	[NL80211_ATTR_WIPHY_NAME] = { .type = NLA_NUL_STRING,
 				      .len = 20-1 },
@@ -148,8 +148,7 @@ static struct nla_policy nl80211_policy[NL80211_ATTR_MAX+1] __read_mostly = {
 };
 
 /* policy for the attributes */
-static struct nla_policy
-nl80211_key_policy[NL80211_KEY_MAX + 1] __read_mostly = {
+static const struct nla_policy nl80211_key_policy[NL80211_KEY_MAX + 1] = {
 	[NL80211_KEY_DATA] = { .type = NLA_BINARY, .len = WLAN_MAX_KEY_LEN },
 	[NL80211_KEY_IDX] = { .type = NLA_U8 },
 	[NL80211_KEY_CIPHER] = { .type = NLA_U32 },
@@ -2501,8 +2500,7 @@ static int nl80211_set_bss(struct sk_buff *skb, struct genl_info *info)
 	return err;
 }
 
-static const struct nla_policy
-	reg_rule_policy[NL80211_REG_RULE_ATTR_MAX + 1] = {
+static const struct nla_policy reg_rule_policy[NL80211_REG_RULE_ATTR_MAX + 1] = {
 	[NL80211_ATTR_REG_RULE_FLAGS]		= { .type = NLA_U32 },
 	[NL80211_ATTR_FREQ_RANGE_START]		= { .type = NLA_U32 },
 	[NL80211_ATTR_FREQ_RANGE_END]		= { .type = NLA_U32 },
@@ -2671,8 +2669,7 @@ do {\
 	} \
 } while (0);\
 
-static struct nla_policy
-nl80211_meshconf_params_policy[NL80211_MESHCONF_ATTR_MAX+1] __read_mostly = {
+static const struct nla_policy nl80211_meshconf_params_policy[NL80211_MESHCONF_ATTR_MAX+1] = {
 	[NL80211_MESHCONF_RETRY_TIMEOUT] = { .type = NLA_U16 },
 	[NL80211_MESHCONF_CONFIRM_TIMEOUT] = { .type = NLA_U16 },
 	[NL80211_MESHCONF_HOLDING_TIMEOUT] = { .type = NLA_U16 },
@@ -4470,8 +4467,7 @@ static u32 rateset_to_mask(struct ieee80211_supported_band *sband,
 	return mask;
 }
 
-static struct nla_policy
-nl80211_txattr_policy[NL80211_TXRATE_MAX + 1] __read_mostly = {
+static const struct nla_policy nl80211_txattr_policy[NL80211_TXRATE_MAX + 1] = {
 	[NL80211_TXRATE_LEGACY] = { .type = NLA_BINARY,
 				    .len = NL80211_MAX_SUPP_RATES },
 };
-- 
cgit v1.2.2


From 36e31b0af58728071e8023cf8e20c5166b700717 Mon Sep 17 00:00:00 2001
From: Andreas Petlund <apetlund@simula.no>
Date: Thu, 18 Feb 2010 02:47:01 +0000
Subject: net: TCP thin linear timeouts

This patch will make TCP use only linear timeouts if the
stream is thin. This will help to avoid the very high latencies
that thin stream suffer because of exponential backoff. This
mechanism is only active if enabled by iocontrol or syscontrol
and the stream is identified as thin. A maximum of 6 linear
timeouts is tried before exponential backoff is resumed.

Signed-off-by: Andreas Petlund <apetlund@simula.no>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/sysctl_net_ipv4.c |  7 +++++++
 net/ipv4/tcp.c             |  7 +++++++
 net/ipv4/tcp_timer.c       | 21 ++++++++++++++++++++-
 3 files changed, 34 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index 7e3712ce3994..e6a2460587d4 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -575,6 +575,13 @@ static struct ctl_table ipv4_table[] = {
 		.mode		= 0644,
 		.proc_handler	= proc_dointvec
 	},
+	{
+		.procname       = "tcp_thin_linear_timeouts",
+		.data           = &sysctl_tcp_thin_linear_timeouts,
+		.maxlen         = sizeof(int),
+		.mode           = 0644,
+		.proc_handler   = proc_dointvec
+	},
 	{
 		.procname	= "udp_mem",
 		.data		= &sysctl_udp_mem,
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index e471d037fcc9..21bae9afefea 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -2229,6 +2229,13 @@ static int do_tcp_setsockopt(struct sock *sk, int level,
 		}
 		break;
 
+	case TCP_THIN_LINEAR_TIMEOUTS:
+		if (val < 0 || val > 1)
+			err = -EINVAL;
+		else
+			tp->thin_lto = val;
+		break;
+
 	case TCP_CORK:
 		/* When set indicates to always queue non-full frames.
 		 * Later the user clears this option and we transmit
diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c
index de7d1bf9114f..a17629b8912e 100644
--- a/net/ipv4/tcp_timer.c
+++ b/net/ipv4/tcp_timer.c
@@ -29,6 +29,7 @@ int sysctl_tcp_keepalive_intvl __read_mostly = TCP_KEEPALIVE_INTVL;
 int sysctl_tcp_retries1 __read_mostly = TCP_RETR1;
 int sysctl_tcp_retries2 __read_mostly = TCP_RETR2;
 int sysctl_tcp_orphan_retries __read_mostly;
+int sysctl_tcp_thin_linear_timeouts __read_mostly;
 
 static void tcp_write_timer(unsigned long);
 static void tcp_delack_timer(unsigned long);
@@ -415,7 +416,25 @@ void tcp_retransmit_timer(struct sock *sk)
 	icsk->icsk_retransmits++;
 
 out_reset_timer:
-	icsk->icsk_rto = min(icsk->icsk_rto << 1, TCP_RTO_MAX);
+	/* If stream is thin, use linear timeouts. Since 'icsk_backoff' is
+	 * used to reset timer, set to 0. Recalculate 'icsk_rto' as this
+	 * might be increased if the stream oscillates between thin and thick,
+	 * thus the old value might already be too high compared to the value
+	 * set by 'tcp_set_rto' in tcp_input.c which resets the rto without
+	 * backoff. Limit to TCP_THIN_LINEAR_RETRIES before initiating
+	 * exponential backoff behaviour to avoid continue hammering
+	 * linear-timeout retransmissions into a black hole
+	 */
+	if (sk->sk_state == TCP_ESTABLISHED &&
+	    (tp->thin_lto || sysctl_tcp_thin_linear_timeouts) &&
+	    tcp_stream_is_thin(tp) &&
+	    icsk->icsk_retransmits <= TCP_THIN_LINEAR_RETRIES) {
+		icsk->icsk_backoff = 0;
+		icsk->icsk_rto = min(__tcp_set_rto(tp), TCP_RTO_MAX);
+	} else {
+		/* Use normal (exponential) backoff */
+		icsk->icsk_rto = min(icsk->icsk_rto << 1, TCP_RTO_MAX);
+	}
 	inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, icsk->icsk_rto, TCP_RTO_MAX);
 	if (retransmits_timed_out(sk, sysctl_tcp_retries1 + 1))
 		__sk_dst_reset(sk);
-- 
cgit v1.2.2


From 7e38017557bc0b87434d184f8804cadb102bb903 Mon Sep 17 00:00:00 2001
From: Andreas Petlund <apetlund@simula.no>
Date: Thu, 18 Feb 2010 04:48:19 +0000
Subject: net: TCP thin dupack

This patch enables fast retransmissions after one dupACK for
TCP if the stream is identified as thin. This will reduce
latencies for thin streams that are not able to trigger fast
retransmissions due to high packet interarrival time. This
mechanism is only active if enabled by iocontrol or syscontrol
and the stream is identified as thin.

Signed-off-by: Andreas Petlund <apetlund@simula.no>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/sysctl_net_ipv4.c |  7 +++++++
 net/ipv4/tcp.c             |  7 +++++++
 net/ipv4/tcp_input.c       | 12 ++++++++++++
 3 files changed, 26 insertions(+)

(limited to 'net')

diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index e6a2460587d4..c1bc074f61b7 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -582,6 +582,13 @@ static struct ctl_table ipv4_table[] = {
 		.mode           = 0644,
 		.proc_handler   = proc_dointvec
 	},
+        {
+		.procname       = "tcp_thin_dupack",
+		.data           = &sysctl_tcp_thin_dupack,
+		.maxlen         = sizeof(int),
+		.mode           = 0644,
+		.proc_handler   = proc_dointvec
+	},
 	{
 		.procname	= "udp_mem",
 		.data		= &sysctl_udp_mem,
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 21bae9afefea..5901010fad55 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -2236,6 +2236,13 @@ static int do_tcp_setsockopt(struct sock *sk, int level,
 			tp->thin_lto = val;
 		break;
 
+	case TCP_THIN_DUPACK:
+		if (val < 0 || val > 1)
+			err = -EINVAL;
+		else
+			tp->thin_dupack = val;
+		break;
+
 	case TCP_CORK:
 		/* When set indicates to always queue non-full frames.
 		 * Later the user clears this option and we transmit
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 3fddc69ccccc..788851ca8c5d 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -89,6 +89,8 @@ int sysctl_tcp_frto __read_mostly = 2;
 int sysctl_tcp_frto_response __read_mostly;
 int sysctl_tcp_nometrics_save __read_mostly;
 
+int sysctl_tcp_thin_dupack __read_mostly;
+
 int sysctl_tcp_moderate_rcvbuf __read_mostly = 1;
 int sysctl_tcp_abc __read_mostly;
 
@@ -2447,6 +2449,16 @@ static int tcp_time_to_recover(struct sock *sk)
 		return 1;
 	}
 
+	/* If a thin stream is detected, retransmit after first
+	 * received dupack. Employ only if SACK is supported in order
+	 * to avoid possible corner-case series of spurious retransmissions
+	 * Use only if there are no unsent data.
+	 */
+	if ((tp->thin_dupack || sysctl_tcp_thin_dupack) &&
+	    tcp_stream_is_thin(tp) && tcp_dupack_heuristics(tp) > 1 &&
+	    tcp_is_sack(tp) && !tcp_send_head(sk))
+		return 1;
+
 	return 0;
 }
 
-- 
cgit v1.2.2


From 72032fdbcde8b333e65b3430e1bcb4358e2d6716 Mon Sep 17 00:00:00 2001
From: jamal <hadi@cyberus.ca>
Date: Thu, 18 Feb 2010 03:35:07 +0000
Subject: xfrm: Introduce LINUX_MIB_XFRMFWDHDRERROR

XFRMINHDRERROR counter is ambigous when validating forwarding
path. It makes it tricky to debug when you have both in and fwd
validation.

Signed-off-by: Jamal Hadi Salim <hadi@cyberus.ca>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/xfrm/xfrm_policy.c | 3 +--
 net/xfrm/xfrm_proc.c   | 1 +
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index 2c5d93181f13..4368e7b88469 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -2045,8 +2045,7 @@ int __xfrm_route_forward(struct sk_buff *skb, unsigned short family)
 	int res;
 
 	if (xfrm_decode_session(skb, &fl, family) < 0) {
-		/* XXX: we should have something like FWDHDRERROR here. */
-		XFRM_INC_STATS(net, LINUX_MIB_XFRMINHDRERROR);
+		XFRM_INC_STATS(net, LINUX_MIB_XFRMFWDHDRERROR);
 		return 0;
 	}
 
diff --git a/net/xfrm/xfrm_proc.c b/net/xfrm/xfrm_proc.c
index 003f2c437ac3..58d9ae005597 100644
--- a/net/xfrm/xfrm_proc.c
+++ b/net/xfrm/xfrm_proc.c
@@ -41,6 +41,7 @@ static const struct snmp_mib xfrm_mib_list[] = {
 	SNMP_MIB_ITEM("XfrmOutPolBlock", LINUX_MIB_XFRMOUTPOLBLOCK),
 	SNMP_MIB_ITEM("XfrmOutPolDead", LINUX_MIB_XFRMOUTPOLDEAD),
 	SNMP_MIB_ITEM("XfrmOutPolError", LINUX_MIB_XFRMOUTPOLERROR),
+	SNMP_MIB_ITEM("XfrmFwdHdrError", LINUX_MIB_XFRMFWDHDRERROR),
 	SNMP_MIB_SENTINEL
 };
 
-- 
cgit v1.2.2


From a88e22adf5aad79b6e2ddd1bf0109c2ba8b46b0e Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Fri, 19 Feb 2010 14:24:39 +0100
Subject: netfilter: ctnetlink: fix creation of conntrack with helpers

This patch fixes a bug that triggers an assertion if you create
a conntrack entry with a helper and netfilter debugging is enabled.
Basically, we hit the assertion because the confirmation flag is
set before the conntrack extensions are added. To fix this, we
move the extension addition before the aforementioned flag is
set.

This patch also removes the possibility of setting a helper for
existing conntracks. This operation would also trigger the
assertion since we are not allowed to add new extensions for
existing conntracks. We know noone that could benefit from
this operation sanely.

Thanks to Eric Dumazet for initial posting a preliminary patch
to address this issue.

Reported-by: David Ramblewski <David.Ramblewski@atosorigin.com>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 net/netfilter/nf_conntrack_netlink.c | 22 +++++++++++-----------
 1 file changed, 11 insertions(+), 11 deletions(-)

(limited to 'net')

diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index 8b05f364b2f2..2b2af631d2b8 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -1077,9 +1077,8 @@ ctnetlink_change_helper(struct nf_conn *ct, const struct nlattr * const cda[])
 		/* need to zero data of old helper */
 		memset(&help->help, 0, sizeof(help->help));
 	} else {
-		help = nf_ct_helper_ext_add(ct, GFP_ATOMIC);
-		if (help == NULL)
-			return -ENOMEM;
+		/* we cannot set a helper for an existing conntrack */
+		return -EOPNOTSUPP;
 	}
 
 	rcu_assign_pointer(help->helper, helper);
@@ -1263,7 +1262,6 @@ ctnetlink_create_conntrack(struct net *net, u16 zone,
 	ct->timeout.expires = ntohl(nla_get_be32(cda[CTA_TIMEOUT]));
 
 	ct->timeout.expires = jiffies + ct->timeout.expires * HZ;
-	ct->status |= IPS_CONFIRMED;
 
 	rcu_read_lock();
  	if (cda[CTA_HELP]) {
@@ -1314,14 +1312,19 @@ ctnetlink_create_conntrack(struct net *net, u16 zone,
 			goto err2;
 	}
 
-	if (cda[CTA_STATUS]) {
-		err = ctnetlink_change_status(ct, cda);
+	if (cda[CTA_NAT_SRC] || cda[CTA_NAT_DST]) {
+		err = ctnetlink_change_nat(ct, cda);
 		if (err < 0)
 			goto err2;
 	}
 
-	if (cda[CTA_NAT_SRC] || cda[CTA_NAT_DST]) {
-		err = ctnetlink_change_nat(ct, cda);
+	nf_ct_acct_ext_add(ct, GFP_ATOMIC);
+	nf_ct_ecache_ext_add(ct, 0, 0, GFP_ATOMIC);
+	/* we must add conntrack extensions before confirmation. */
+	ct->status |= IPS_CONFIRMED;
+
+	if (cda[CTA_STATUS]) {
+		err = ctnetlink_change_status(ct, cda);
 		if (err < 0)
 			goto err2;
 	}
@@ -1340,9 +1343,6 @@ ctnetlink_create_conntrack(struct net *net, u16 zone,
 			goto err2;
 	}
 
-	nf_ct_acct_ext_add(ct, GFP_ATOMIC);
-	nf_ct_ecache_ext_add(ct, 0, 0, GFP_ATOMIC);
-
 #if defined(CONFIG_NF_CONNTRACK_MARK)
 	if (cda[CTA_MARK])
 		ct->mark = ntohl(nla_get_be32(cda[CTA_MARK]));
-- 
cgit v1.2.2


From 64507fdbc29c3a622180378210ecea8659b14e40 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Fri, 19 Feb 2010 15:28:38 +0100
Subject: netfilter: nf_queue: fix NF_STOLEN skb leak

commit 3bc38712e3a6e059 (handle NF_STOP and unknown verdicts in
nf_reinject) was a partial fix to packet leaks.

If user asks NF_STOLEN status, we must free the skb as well.

Reported-by: Afi Gjermund <afigjermund@gmail.com>
Signed-off-by: Eric DUmazet <eric.dumazet@gmail.com>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 net/netfilter/nf_queue.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/netfilter/nf_queue.c b/net/netfilter/nf_queue.c
index 3a6fd77f7761..ba095fd014e5 100644
--- a/net/netfilter/nf_queue.c
+++ b/net/netfilter/nf_queue.c
@@ -265,7 +265,6 @@ void nf_reinject(struct nf_queue_entry *entry, unsigned int verdict)
 		local_bh_disable();
 		entry->okfn(skb);
 		local_bh_enable();
-	case NF_STOLEN:
 		break;
 	case NF_QUEUE:
 		if (!__nf_queue(skb, elem, entry->pf, entry->hook,
@@ -273,6 +272,7 @@ void nf_reinject(struct nf_queue_entry *entry, unsigned int verdict)
 				verdict >> NF_VERDICT_BITS))
 			goto next_hook;
 		break;
+	case NF_STOLEN:
 	default:
 		kfree_skb(skb);
 	}
-- 
cgit v1.2.2


From 9e2dcf72023d1447f09c47d77c99b0c49659e5ce Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Fri, 19 Feb 2010 18:18:37 +0100
Subject: netfilter: nf_conntrack_reasm: properly handle packets fragmented
 into a single fragment

When an ICMPV6_PKT_TOOBIG message is received with a MTU below 1280,
all further packets include a fragment header.

Unlike regular defragmentation, conntrack also needs to "reassemble"
those fragments in order to obtain a packet without the fragment
header for connection tracking. Currently nf_conntrack_reasm checks
whether a fragment has either IP6_MF set or an offset != 0, which
makes it ignore those fragments.

Remove the invalid check and make reassembly handle fragment queues
containing only a single fragment.

Reported-and-tested-by: Ulrich Weber <uweber@astaro.com>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 net/ipv6/netfilter/nf_conntrack_reasm.c | 8 +-------
 1 file changed, 1 insertion(+), 7 deletions(-)

(limited to 'net')

diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c
index ad1fcda6898b..f1171b744650 100644
--- a/net/ipv6/netfilter/nf_conntrack_reasm.c
+++ b/net/ipv6/netfilter/nf_conntrack_reasm.c
@@ -469,7 +469,7 @@ nf_ct_frag6_reasm(struct nf_ct_frag6_queue *fq, struct net_device *dev)
 
 	/* all original skbs are linked into the NFCT_FRAG6_CB(head).orig */
 	fp = skb_shinfo(head)->frag_list;
-	if (NFCT_FRAG6_CB(fp)->orig == NULL)
+	if (fp && NFCT_FRAG6_CB(fp)->orig == NULL)
 		/* at above code, head skb is divided into two skbs. */
 		fp = fp->next;
 
@@ -595,12 +595,6 @@ struct sk_buff *nf_ct_frag6_gather(struct sk_buff *skb, u32 user)
 	hdr = ipv6_hdr(clone);
 	fhdr = (struct frag_hdr *)skb_transport_header(clone);
 
-	if (!(fhdr->frag_off & htons(0xFFF9))) {
-		pr_debug("Invalid fragment offset\n");
-		/* It is not a fragmented frame */
-		goto ret_orig;
-	}
-
 	if (atomic_read(&nf_init_frags.mem) > nf_init_frags.high_thresh)
 		nf_ct_frag6_evictor();
 
-- 
cgit v1.2.2


From ffb9eb3d8b450c22bbbc688c6b630141ac476fd9 Mon Sep 17 00:00:00 2001
From: Kalle Valo <kalle.valo@nokia.com>
Date: Wed, 17 Feb 2010 17:58:10 +0200
Subject: nl80211: add power save commands

The most needed command from nl80211, which Wireless Extensions had,
is support for power save mode. Add a simple command to make it possible
to enable and disable power save via nl80211.

I was also planning about extending the interface, for example adding the
timeout value, but after thinking more about this I decided not to do it.
Basically there were three reasons:

Firstly, the parameters for power save are very much hardware dependent.
Trying to find a unified interface which would work with all hardware, and
still make sense to users, will be very difficult.

Secondly, IEEE 802.11 power save implementation in Linux is still in state
of flux. We have a long way to still to go and there is no way to predict
what kind of implementation we will have after few years. And because we
need to support nl80211 interface a long time, practically forever, adding
now parameters to nl80211 might create maintenance problems later on.

Third issue are the users. Power save parameters are mostly used for
debugging, so debugfs is better, more flexible, interface for this.
For example, wpa_supplicant currently doesn't configure anything related
to power save mode. It's better to strive that kernel can automatically
optimise the power save parameters, like with help of pm qos network
and other traffic parameters.

Later on, when we have better understanding of power save, we can extend
this command with more features, if there's a need for that.

Signed-off-by: Kalle Valo <kalle.valo@nokia.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/wireless/core.c        |  16 +++---
 net/wireless/nl80211.c     | 131 +++++++++++++++++++++++++++++++++++++++++++++
 net/wireless/wext-compat.c |  10 ++--
 3 files changed, 145 insertions(+), 12 deletions(-)

(limited to 'net')

diff --git a/net/wireless/core.c b/net/wireless/core.c
index 51908dc2ea00..7fdb9409ad2a 100644
--- a/net/wireless/core.c
+++ b/net/wireless/core.c
@@ -698,19 +698,21 @@ static int cfg80211_netdev_notifier_call(struct notifier_block * nb,
 		wdev->wext.default_key = -1;
 		wdev->wext.default_mgmt_key = -1;
 		wdev->wext.connect.auth_type = NL80211_AUTHTYPE_AUTOMATIC;
+#endif
+
 		if (wdev->wiphy->flags & WIPHY_FLAG_PS_ON_BY_DEFAULT)
-			wdev->wext.ps = true;
+			wdev->ps = true;
 		else
-			wdev->wext.ps = false;
-		wdev->wext.ps_timeout = 100;
+			wdev->ps = false;
+		wdev->ps_timeout = 100;
 		if (rdev->ops->set_power_mgmt)
 			if (rdev->ops->set_power_mgmt(wdev->wiphy, dev,
-						      wdev->wext.ps,
-						      wdev->wext.ps_timeout)) {
+						      wdev->ps,
+						      wdev->ps_timeout)) {
 				/* assume this means it's off */
-				wdev->wext.ps = false;
+				wdev->ps = false;
 			}
-#endif
+
 		if (!dev->ethtool_ops)
 			dev->ethtool_ops = &cfg80211_ethtool_ops;
 
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index 328112081358..b0495a1da22e 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -148,6 +148,7 @@ static struct nla_policy nl80211_policy[NL80211_ATTR_MAX+1] __read_mostly = {
 	[NL80211_ATTR_FRAME] = { .type = NLA_BINARY,
 				 .len = IEEE80211_MAX_DATA_LEN },
 	[NL80211_ATTR_FRAME_MATCH] = { .type = NLA_BINARY, },
+	[NL80211_ATTR_PS_STATE] = { .type = NLA_U32 },
 };
 
 /* policy for the attributes */
@@ -4663,6 +4664,124 @@ unlock_rtnl:
 	return err;
 }
 
+static int nl80211_set_power_save(struct sk_buff *skb, struct genl_info *info)
+{
+	struct cfg80211_registered_device *rdev;
+	struct wireless_dev *wdev;
+	struct net_device *dev;
+	u8 ps_state;
+	bool state;
+	int err;
+
+	if (!info->attrs[NL80211_ATTR_PS_STATE]) {
+		err = -EINVAL;
+		goto out;
+	}
+
+	ps_state = nla_get_u32(info->attrs[NL80211_ATTR_PS_STATE]);
+
+	if (ps_state != NL80211_PS_DISABLED && ps_state != NL80211_PS_ENABLED) {
+		err = -EINVAL;
+		goto out;
+	}
+
+	rtnl_lock();
+
+	err = get_rdev_dev_by_info_ifindex(info, &rdev, &dev);
+	if (err)
+		goto unlock_rdev;
+
+	wdev = dev->ieee80211_ptr;
+
+	if (!rdev->ops->set_power_mgmt) {
+		err = -EOPNOTSUPP;
+		goto unlock_rdev;
+	}
+
+	state = (ps_state == NL80211_PS_ENABLED) ? true : false;
+
+	if (state == wdev->ps)
+		goto unlock_rdev;
+
+	wdev->ps = state;
+
+	if (rdev->ops->set_power_mgmt(wdev->wiphy, dev, wdev->ps,
+				      wdev->ps_timeout))
+		/* assume this means it's off */
+		wdev->ps = false;
+
+unlock_rdev:
+	cfg80211_unlock_rdev(rdev);
+	dev_put(dev);
+	rtnl_unlock();
+
+out:
+	return err;
+}
+
+static int nl80211_get_power_save(struct sk_buff *skb, struct genl_info *info)
+{
+	struct cfg80211_registered_device *rdev;
+	enum nl80211_ps_state ps_state;
+	struct wireless_dev *wdev;
+	struct net_device *dev;
+	struct sk_buff *msg;
+	void *hdr;
+	int err;
+
+	rtnl_lock();
+
+	err = get_rdev_dev_by_info_ifindex(info, &rdev, &dev);
+	if (err)
+		goto unlock_rtnl;
+
+	wdev = dev->ieee80211_ptr;
+
+	if (!rdev->ops->set_power_mgmt) {
+		err = -EOPNOTSUPP;
+		goto out;
+	}
+
+	msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+	if (!msg) {
+		err = -ENOMEM;
+		goto out;
+	}
+
+	hdr = nl80211hdr_put(msg, info->snd_pid, info->snd_seq, 0,
+			     NL80211_CMD_GET_POWER_SAVE);
+	if (!hdr) {
+		err = -ENOMEM;
+		goto free_msg;
+	}
+
+	if (wdev->ps)
+		ps_state = NL80211_PS_ENABLED;
+	else
+		ps_state = NL80211_PS_DISABLED;
+
+	NLA_PUT_U32(msg, NL80211_ATTR_PS_STATE, ps_state);
+
+	genlmsg_end(msg, hdr);
+	err = genlmsg_reply(msg, info);
+	goto out;
+
+nla_put_failure:
+	err = -ENOBUFS;
+
+free_msg:
+	nlmsg_free(msg);
+
+out:
+	cfg80211_unlock_rdev(rdev);
+	dev_put(dev);
+
+unlock_rtnl:
+	rtnl_unlock();
+
+	return err;
+}
+
 static struct genl_ops nl80211_ops[] = {
 	{
 		.cmd = NL80211_CMD_GET_WIPHY,
@@ -4955,6 +5074,18 @@ static struct genl_ops nl80211_ops[] = {
 		.policy = nl80211_policy,
 		.flags = GENL_ADMIN_PERM,
 	},
+	{
+		.cmd = NL80211_CMD_SET_POWER_SAVE,
+		.doit = nl80211_set_power_save,
+		.policy = nl80211_policy,
+		.flags = GENL_ADMIN_PERM,
+	},
+	{
+		.cmd = NL80211_CMD_GET_POWER_SAVE,
+		.doit = nl80211_get_power_save,
+		.policy = nl80211_policy,
+		/* can be retrieved by unprivileged users */
+	},
 };
 
 static struct genl_multicast_group nl80211_mlme_mcgrp = {
diff --git a/net/wireless/wext-compat.c b/net/wireless/wext-compat.c
index b17eeae448d5..9ab51838849e 100644
--- a/net/wireless/wext-compat.c
+++ b/net/wireless/wext-compat.c
@@ -1099,8 +1099,8 @@ int cfg80211_wext_siwpower(struct net_device *dev,
 {
 	struct wireless_dev *wdev = dev->ieee80211_ptr;
 	struct cfg80211_registered_device *rdev = wiphy_to_dev(wdev->wiphy);
-	bool ps = wdev->wext.ps;
-	int timeout = wdev->wext.ps_timeout;
+	bool ps = wdev->ps;
+	int timeout = wdev->ps_timeout;
 	int err;
 
 	if (wdev->iftype != NL80211_IFTYPE_STATION)
@@ -1133,8 +1133,8 @@ int cfg80211_wext_siwpower(struct net_device *dev,
 	if (err)
 		return err;
 
-	wdev->wext.ps = ps;
-	wdev->wext.ps_timeout = timeout;
+	wdev->ps = ps;
+	wdev->ps_timeout = timeout;
 
 	return 0;
 
@@ -1147,7 +1147,7 @@ int cfg80211_wext_giwpower(struct net_device *dev,
 {
 	struct wireless_dev *wdev = dev->ieee80211_ptr;
 
-	wrq->disabled = !wdev->wext.ps;
+	wrq->disabled = !wdev->ps;
 
 	return 0;
 }
-- 
cgit v1.2.2


From 8be987d73481831265d7e8c648bec838271bfd9b Mon Sep 17 00:00:00 2001
From: Jamal Hadi Salim <hadi@cyberus.ca>
Date: Fri, 19 Feb 2010 02:00:40 +0000
Subject: pfkey: fix SA and SP flush sequence

RFC 2367 says flushing behavior should be:
1) user space -> kernel: flush
2) kernel: flush
3) kernel -> user space: flush event to ALL listeners

This is not realistic today in the presence of selinux policies
which may reject the flush etc. So we make the sequence become:
1) user space -> kernel: flush
2) kernel: flush
3) kernel -> user space: flush response to originater from #1
4) if there were no errors then:
kernel -> user space: flush event to ALL listeners

Signed-off-by: Jamal Hadi Salim <hadi@cyberus.ca>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/key/af_key.c | 33 +++++++++++++++++++++++++++------
 1 file changed, 27 insertions(+), 6 deletions(-)

(limited to 'net')

diff --git a/net/key/af_key.c b/net/key/af_key.c
index 79d2c0f3c334..b3faede9a4f6 100644
--- a/net/key/af_key.c
+++ b/net/key/af_key.c
@@ -1712,6 +1712,23 @@ static int pfkey_register(struct sock *sk, struct sk_buff *skb, struct sadb_msg
 	return 0;
 }
 
+static int unicast_flush_resp(struct sock *sk, struct sadb_msg *ihdr)
+{
+	struct sk_buff *skb;
+	struct sadb_msg *hdr;
+
+	skb = alloc_skb(sizeof(struct sadb_msg) + 16, GFP_ATOMIC);
+	if (!skb)
+		return -ENOBUFS;
+
+	hdr = (struct sadb_msg *) skb_put(skb, sizeof(struct sadb_msg));
+	memcpy(hdr, ihdr, sizeof(struct sadb_msg));
+	hdr->sadb_msg_errno = (uint8_t) 0;
+	hdr->sadb_msg_len = (sizeof(struct sadb_msg) / sizeof(uint64_t));
+
+	return pfkey_broadcast(skb, GFP_ATOMIC, BROADCAST_ONE, sk, sock_net(sk));
+}
+
 static int key_notify_sa_flush(struct km_event *c)
 {
 	struct sk_buff *skb;
@@ -1740,7 +1757,7 @@ static int pfkey_flush(struct sock *sk, struct sk_buff *skb, struct sadb_msg *hd
 	unsigned proto;
 	struct km_event c;
 	struct xfrm_audit audit_info;
-	int err;
+	int err, err2;
 
 	proto = pfkey_satype2proto(hdr->sadb_msg_satype);
 	if (proto == 0)
@@ -1750,8 +1767,10 @@ static int pfkey_flush(struct sock *sk, struct sk_buff *skb, struct sadb_msg *hd
 	audit_info.sessionid = audit_get_sessionid(current);
 	audit_info.secid = 0;
 	err = xfrm_state_flush(net, proto, &audit_info);
-	if (err)
-		return err;
+	err2 = unicast_flush_resp(sk, hdr);
+	if (err || err2)
+		return err ? err : err2;
+
 	c.data.proto = proto;
 	c.seq = hdr->sadb_msg_seq;
 	c.pid = hdr->sadb_msg_pid;
@@ -2706,14 +2725,16 @@ static int pfkey_spdflush(struct sock *sk, struct sk_buff *skb, struct sadb_msg
 	struct net *net = sock_net(sk);
 	struct km_event c;
 	struct xfrm_audit audit_info;
-	int err;
+	int err, err2;
 
 	audit_info.loginuid = audit_get_loginuid(current);
 	audit_info.sessionid = audit_get_sessionid(current);
 	audit_info.secid = 0;
 	err = xfrm_policy_flush(net, XFRM_POLICY_TYPE_MAIN, &audit_info);
-	if (err)
-		return err;
+	err2 = unicast_flush_resp(sk, hdr);
+	if (err || err2)
+		return err ? err : err2;
+
 	c.data.type = XFRM_POLICY_TYPE_MAIN;
 	c.event = XFRM_MSG_FLUSHPOLICY;
 	c.pid = hdr->sadb_msg_pid;
-- 
cgit v1.2.2


From 9e64cc9572b43afcbcd2d004538db435f2cd0587 Mon Sep 17 00:00:00 2001
From: Jamal Hadi Salim <hadi@cyberus.ca>
Date: Fri, 19 Feb 2010 02:00:41 +0000
Subject: xfrm: Flushing empty SAD generates false events

To see the effect make sure you have an empty SAD.
On window1 "ip xfrm mon" and on window2 issue "ip xfrm state flush"
You get prompt back in window2 and you see the flush event on window1.
With this fix, you still get prompt on window1 but no event on window2.

Thanks to Alexey Dobriyan for finding a bug in earlier version
when using pfkey to do the flushing.

Signed-off-by: Jamal Hadi Salim <hadi@cyberus.ca>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/key/af_key.c      | 5 ++++-
 net/xfrm/xfrm_state.c | 8 ++++++--
 net/xfrm/xfrm_user.c  | 5 ++++-
 3 files changed, 14 insertions(+), 4 deletions(-)

(limited to 'net')

diff --git a/net/key/af_key.c b/net/key/af_key.c
index b3faede9a4f6..c269ce6094d6 100644
--- a/net/key/af_key.c
+++ b/net/key/af_key.c
@@ -1768,8 +1768,11 @@ static int pfkey_flush(struct sock *sk, struct sk_buff *skb, struct sadb_msg *hd
 	audit_info.secid = 0;
 	err = xfrm_state_flush(net, proto, &audit_info);
 	err2 = unicast_flush_resp(sk, hdr);
-	if (err || err2)
+	if (err || err2) {
+		if (err == -ESRCH) /* empty table - go quietly */
+			err = 0;
 		return err ? err : err2;
+	}
 
 	c.data.proto = proto;
 	c.seq = hdr->sadb_msg_seq;
diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c
index c9d6a5f1348d..9fa3322b2a7d 100644
--- a/net/xfrm/xfrm_state.c
+++ b/net/xfrm/xfrm_state.c
@@ -603,13 +603,14 @@ xfrm_state_flush_secctx_check(struct net *net, u8 proto, struct xfrm_audit *audi
 
 int xfrm_state_flush(struct net *net, u8 proto, struct xfrm_audit *audit_info)
 {
-	int i, err = 0;
+	int i, err = 0, cnt = 0;
 
 	spin_lock_bh(&xfrm_state_lock);
 	err = xfrm_state_flush_secctx_check(net, proto, audit_info);
 	if (err)
 		goto out;
 
+	err = -ESRCH;
 	for (i = 0; i <= net->xfrm.state_hmask; i++) {
 		struct hlist_node *entry;
 		struct xfrm_state *x;
@@ -626,13 +627,16 @@ restart:
 							audit_info->sessionid,
 							audit_info->secid);
 				xfrm_state_put(x);
+				if (!err)
+					cnt++;
 
 				spin_lock_bh(&xfrm_state_lock);
 				goto restart;
 			}
 		}
 	}
-	err = 0;
+	if (cnt)
+		err = 0;
 
 out:
 	spin_unlock_bh(&xfrm_state_lock);
diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c
index 943c8712bd97..cd94a9dd1bad 100644
--- a/net/xfrm/xfrm_user.c
+++ b/net/xfrm/xfrm_user.c
@@ -1524,8 +1524,11 @@ static int xfrm_flush_sa(struct sk_buff *skb, struct nlmsghdr *nlh,
 	audit_info.sessionid = NETLINK_CB(skb).sessionid;
 	audit_info.secid = NETLINK_CB(skb).sid;
 	err = xfrm_state_flush(net, p->proto, &audit_info);
-	if (err)
+	if (err) {
+		if (err == -ESRCH) /* empty table */
+			return 0;
 		return err;
+	}
 	c.data.proto = p->proto;
 	c.event = nlh->nlmsg_type;
 	c.seq = nlh->nlmsg_seq;
-- 
cgit v1.2.2


From 2f1eb65f366b81aa3c22c31e6e8db26168777ec5 Mon Sep 17 00:00:00 2001
From: Jamal Hadi Salim <hadi@cyberus.ca>
Date: Fri, 19 Feb 2010 02:00:42 +0000
Subject: xfrm: Flushing empty SPD generates false events

To see the effect make sure you have an empty SPD.
On window1 "ip xfrm mon" and on window2 issue "ip xfrm policy flush"
You get prompt back in window2 and you see the flush event on window1.
With this fix, you still get prompt on window1 but no event on window2.

Thanks to Alexey Dobriyan for finding a bug in earlier version
when using pfkey to do the flushing.

Signed-off-by: Jamal Hadi Salim <hadi@cyberus.ca>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/key/af_key.c       |  7 +++++--
 net/xfrm/xfrm_policy.c | 13 ++++++++++---
 net/xfrm/xfrm_user.c   |  6 +++++-
 3 files changed, 20 insertions(+), 6 deletions(-)

(limited to 'net')

diff --git a/net/key/af_key.c b/net/key/af_key.c
index c269ce6094d6..a20d2fa88db9 100644
--- a/net/key/af_key.c
+++ b/net/key/af_key.c
@@ -2735,8 +2735,11 @@ static int pfkey_spdflush(struct sock *sk, struct sk_buff *skb, struct sadb_msg
 	audit_info.secid = 0;
 	err = xfrm_policy_flush(net, XFRM_POLICY_TYPE_MAIN, &audit_info);
 	err2 = unicast_flush_resp(sk, hdr);
-	if (err || err2)
-		return err ? err : err2;
+	if (err || err2) {
+		if (err == -ESRCH) /* empty table - old silent behavior */
+			return 0;
+		return err;
+	}
 
 	c.data.type = XFRM_POLICY_TYPE_MAIN;
 	c.event = XFRM_MSG_FLUSHPOLICY;
diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index 4368e7b88469..d6eb16d75243 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -771,7 +771,8 @@ xfrm_policy_flush_secctx_check(struct net *net, u8 type, struct xfrm_audit *audi
 
 int xfrm_policy_flush(struct net *net, u8 type, struct xfrm_audit *audit_info)
 {
-	int dir, err = 0;
+	int dir, err = 0, cnt = 0;
+	struct xfrm_policy *dp;
 
 	write_lock_bh(&xfrm_policy_lock);
 
@@ -789,8 +790,10 @@ int xfrm_policy_flush(struct net *net, u8 type, struct xfrm_audit *audit_info)
 				     &net->xfrm.policy_inexact[dir], bydst) {
 			if (pol->type != type)
 				continue;
-			__xfrm_policy_unlink(pol, dir);
+			dp = __xfrm_policy_unlink(pol, dir);
 			write_unlock_bh(&xfrm_policy_lock);
+			if (dp)
+				cnt++;
 
 			xfrm_audit_policy_delete(pol, 1, audit_info->loginuid,
 						 audit_info->sessionid,
@@ -809,8 +812,10 @@ int xfrm_policy_flush(struct net *net, u8 type, struct xfrm_audit *audit_info)
 					     bydst) {
 				if (pol->type != type)
 					continue;
-				__xfrm_policy_unlink(pol, dir);
+				dp = __xfrm_policy_unlink(pol, dir);
 				write_unlock_bh(&xfrm_policy_lock);
+				if (dp)
+					cnt++;
 
 				xfrm_audit_policy_delete(pol, 1,
 							 audit_info->loginuid,
@@ -824,6 +829,8 @@ int xfrm_policy_flush(struct net *net, u8 type, struct xfrm_audit *audit_info)
 		}
 
 	}
+	if (!cnt)
+		err = -ESRCH;
 	atomic_inc(&flow_cache_genid);
 out:
 	write_unlock_bh(&xfrm_policy_lock);
diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c
index cd94a9dd1bad..ee04e6bf0e54 100644
--- a/net/xfrm/xfrm_user.c
+++ b/net/xfrm/xfrm_user.c
@@ -1679,8 +1679,12 @@ static int xfrm_flush_policy(struct sk_buff *skb, struct nlmsghdr *nlh,
 	audit_info.sessionid = NETLINK_CB(skb).sessionid;
 	audit_info.secid = NETLINK_CB(skb).sid;
 	err = xfrm_policy_flush(net, type, &audit_info);
-	if (err)
+	if (err) {
+		if (err == -ESRCH) /* empty table */
+			return 0;
 		return err;
+	}
+
 	c.data.type = type;
 	c.event = nlh->nlmsg_type;
 	c.seq = nlh->nlmsg_seq;
-- 
cgit v1.2.2


From 88af182e389097997c5e2a0b42285b3522796759 Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Fri, 19 Feb 2010 13:22:59 +0000
Subject: net: Fix sysctl restarts...

Yuck.  It turns out that when we restart sysctls we were restarting
with the values already changed.  Which unfortunately meant that
the second time through we thought there was no change and skipped
all kinds of work, despite the fact that there was indeed a change.

I have fixed this the simplest way possible by restoring the changed
values when we restart the sysctl write.

One of my coworkers spotted this bug when after disabling forwarding
on an interface pings were still forwarded.

Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/devinet.c  |  7 ++++++-
 net/ipv6/addrconf.c | 16 ++++++++++++++--
 2 files changed, 20 insertions(+), 3 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index 040c4f05b653..26dec2be9615 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -1317,14 +1317,19 @@ static int devinet_sysctl_forward(ctl_table *ctl, int write,
 {
 	int *valp = ctl->data;
 	int val = *valp;
+	loff_t pos = *ppos;
 	int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
 
 	if (write && *valp != val) {
 		struct net *net = ctl->extra2;
 
 		if (valp != &IPV4_DEVCONF_DFLT(net, FORWARDING)) {
-			if (!rtnl_trylock())
+			if (!rtnl_trylock()) {
+				/* Restore the original values before restarting */
+				*valp = val;
+				*ppos = pos;
 				return restart_syscall();
+			}
 			if (valp == &IPV4_DEVCONF_ALL(net, FORWARDING)) {
 				inet_forward_change(net);
 			} else if (*valp) {
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index de7a194a64ab..143791da062c 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -502,8 +502,11 @@ static int addrconf_fixup_forwarding(struct ctl_table *table, int *p, int old)
 	if (p == &net->ipv6.devconf_dflt->forwarding)
 		return 0;
 
-	if (!rtnl_trylock())
+	if (!rtnl_trylock()) {
+		/* Restore the original values before restarting */
+		*p = old;
 		return restart_syscall();
+	}
 
 	if (p == &net->ipv6.devconf_all->forwarding) {
 		__s32 newf = net->ipv6.devconf_all->forwarding;
@@ -4028,12 +4031,15 @@ int addrconf_sysctl_forward(ctl_table *ctl, int write,
 {
 	int *valp = ctl->data;
 	int val = *valp;
+	loff_t pos = *ppos;
 	int ret;
 
 	ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
 
 	if (write)
 		ret = addrconf_fixup_forwarding(ctl, valp, val);
+	if (ret)
+		*ppos = pos;
 	return ret;
 }
 
@@ -4075,8 +4081,11 @@ static int addrconf_disable_ipv6(struct ctl_table *table, int *p, int old)
 	if (p == &net->ipv6.devconf_dflt->disable_ipv6)
 		return 0;
 
-	if (!rtnl_trylock())
+	if (!rtnl_trylock()) {
+		/* Restore the original values before restarting */
+		*p = old;
 		return restart_syscall();
+	}
 
 	if (p == &net->ipv6.devconf_all->disable_ipv6) {
 		__s32 newf = net->ipv6.devconf_all->disable_ipv6;
@@ -4095,12 +4104,15 @@ int addrconf_sysctl_disable(ctl_table *ctl, int write,
 {
 	int *valp = ctl->data;
 	int val = *valp;
+	loff_t pos = *ppos;
 	int ret;
 
 	ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
 
 	if (write)
 		ret = addrconf_disable_ipv6(ctl, valp, val);
+	if (ret)
+		*ppos = pos;
 	return ret;
 }
 
-- 
cgit v1.2.2


From b8afe6416101549e877f8470f2a160df69676166 Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Fri, 19 Feb 2010 13:23:47 +0000
Subject: net-sysfs: Use rtnl_trylock in wireless sysfs methods.

The wireless sysfs methods like the rest of the networking sysfs
methods are removed with the rtnl_lock held and block until
the existing methods stop executing.  So use rtnl_trylock
and restart_syscall so that the code continues to work.

Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/net-sysfs.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c
index fbc1c7472c5e..099c753c4213 100644
--- a/net/core/net-sysfs.c
+++ b/net/core/net-sysfs.c
@@ -410,7 +410,8 @@ static ssize_t wireless_show(struct device *d, char *buf,
 	const struct iw_statistics *iw;
 	ssize_t ret = -EINVAL;
 
-	rtnl_lock();
+	if (!rtnl_trylock())
+		return restart_syscall();
 	if (dev_isalive(dev)) {
 		iw = get_wireless_stats(dev);
 		if (iw)
-- 
cgit v1.2.2


From 808f5114a9206fee855117d416440e1071ab375c Mon Sep 17 00:00:00 2001
From: stephen hemminger <shemminger@vyatta.com>
Date: Mon, 22 Feb 2010 07:57:18 +0000
Subject: packet: convert socket list to RCU (v3)

Convert AF_PACKET to use RCU, eliminating one more reader/writer lock.

There is no need for a real sk_del_node_init_rcu(), because sk_del_node_init
is doing the equivalent thing to hlst_del_init_rcu already; but added
some comments to try and make that obvious.

Signed-off-by: Stephen Hemminger <shemminger@vyatta.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/packet/af_packet.c | 62 +++++++++++++++++++++++++-------------------------
 1 file changed, 31 insertions(+), 31 deletions(-)

(limited to 'net')

diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index 10f7295bcefb..2f0369367ee0 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -1262,24 +1262,22 @@ static int packet_release(struct socket *sock)
 	net = sock_net(sk);
 	po = pkt_sk(sk);
 
-	write_lock_bh(&net->packet.sklist_lock);
-	sk_del_node_init(sk);
+	spin_lock_bh(&net->packet.sklist_lock);
+	sk_del_node_init_rcu(sk);
 	sock_prot_inuse_add(net, sk->sk_prot, -1);
-	write_unlock_bh(&net->packet.sklist_lock);
-
-	/*
-	 *	Unhook packet receive handler.
-	 */
+	spin_unlock_bh(&net->packet.sklist_lock);
 
+	spin_lock(&po->bind_lock);
 	if (po->running) {
 		/*
-		 *	Remove the protocol hook
+		 * Remove from protocol table
 		 */
-		dev_remove_pack(&po->prot_hook);
 		po->running = 0;
 		po->num = 0;
+		__dev_remove_pack(&po->prot_hook);
 		__sock_put(sk);
 	}
+	spin_unlock(&po->bind_lock);
 
 	packet_flush_mclist(sk);
 
@@ -1291,10 +1289,10 @@ static int packet_release(struct socket *sock)
 	if (po->tx_ring.pg_vec)
 		packet_set_ring(sk, &req, 1, 1);
 
+	synchronize_net();
 	/*
 	 *	Now the socket is dead. No more input will appear.
 	 */
-
 	sock_orphan(sk);
 	sock->sk = NULL;
 
@@ -1478,10 +1476,11 @@ static int packet_create(struct net *net, struct socket *sock, int protocol,
 		po->running = 1;
 	}
 
-	write_lock_bh(&net->packet.sklist_lock);
-	sk_add_node(sk, &net->packet.sklist);
+	spin_lock_bh(&net->packet.sklist_lock);
+	sk_add_node_rcu(sk, &net->packet.sklist);
 	sock_prot_inuse_add(net, &packet_proto, 1);
-	write_unlock_bh(&net->packet.sklist_lock);
+	spin_unlock_bh(&net->packet.sklist_lock);
+
 	return 0;
 out:
 	return err;
@@ -2075,8 +2074,8 @@ static int packet_notifier(struct notifier_block *this, unsigned long msg, void
 	struct net_device *dev = data;
 	struct net *net = dev_net(dev);
 
-	read_lock(&net->packet.sklist_lock);
-	sk_for_each(sk, node, &net->packet.sklist) {
+	rcu_read_lock();
+	sk_for_each_rcu(sk, node, &net->packet.sklist) {
 		struct packet_sock *po = pkt_sk(sk);
 
 		switch (msg) {
@@ -2104,18 +2103,19 @@ static int packet_notifier(struct notifier_block *this, unsigned long msg, void
 			}
 			break;
 		case NETDEV_UP:
-			spin_lock(&po->bind_lock);
-			if (dev->ifindex == po->ifindex && po->num &&
-			    !po->running) {
-				dev_add_pack(&po->prot_hook);
-				sock_hold(sk);
-				po->running = 1;
+			if (dev->ifindex == po->ifindex) {
+				spin_lock(&po->bind_lock);
+				if (po->num && !po->running) {
+					dev_add_pack(&po->prot_hook);
+					sock_hold(sk);
+					po->running = 1;
+				}
+				spin_unlock(&po->bind_lock);
 			}
-			spin_unlock(&po->bind_lock);
 			break;
 		}
 	}
-	read_unlock(&net->packet.sklist_lock);
+	rcu_read_unlock();
 	return NOTIFY_DONE;
 }
 
@@ -2512,24 +2512,24 @@ static struct notifier_block packet_netdev_notifier = {
 #ifdef CONFIG_PROC_FS
 
 static void *packet_seq_start(struct seq_file *seq, loff_t *pos)
-	__acquires(seq_file_net(seq)->packet.sklist_lock)
+	__acquires(RCU)
 {
 	struct net *net = seq_file_net(seq);
-	read_lock(&net->packet.sklist_lock);
-	return seq_hlist_start_head(&net->packet.sklist, *pos);
+
+	rcu_read_lock();
+	return seq_hlist_start_head_rcu(&net->packet.sklist, *pos);
 }
 
 static void *packet_seq_next(struct seq_file *seq, void *v, loff_t *pos)
 {
 	struct net *net = seq_file_net(seq);
-	return seq_hlist_next(v, &net->packet.sklist, pos);
+	return seq_hlist_next_rcu(v, &net->packet.sklist, pos);
 }
 
 static void packet_seq_stop(struct seq_file *seq, void *v)
-	__releases(seq_file_net(seq)->packet.sklist_lock)
+	__releases(RCU)
 {
-	struct net *net = seq_file_net(seq);
-	read_unlock(&net->packet.sklist_lock);
+	rcu_read_unlock();
 }
 
 static int packet_seq_show(struct seq_file *seq, void *v)
@@ -2581,7 +2581,7 @@ static const struct file_operations packet_seq_fops = {
 
 static int __net_init packet_net_init(struct net *net)
 {
-	rwlock_init(&net->packet.sklist_lock);
+	spin_lock_init(&net->packet.sklist_lock);
 	INIT_HLIST_HEAD(&net->packet.sklist);
 
 	if (!proc_net_fops_create(net, "packet", 0, &packet_seq_fops))
-- 
cgit v1.2.2


From 7f6b9dbd5afbd966a82dcbafc5ed62305eb9d479 Mon Sep 17 00:00:00 2001
From: stephen hemminger <shemminger@vyatta.com>
Date: Mon, 22 Feb 2010 07:57:19 +0000
Subject: af_key: locking change

Get rid of custom locking that was using wait queue, lock, and atomic
to basically build a queued mutex.  Use RCU for read side.

Signed-off-by: Stephen Hemminger <shemminger@vyatta.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/key/af_key.c | 76 ++++++++++++--------------------------------------------
 1 file changed, 16 insertions(+), 60 deletions(-)

(limited to 'net')

diff --git a/net/key/af_key.c b/net/key/af_key.c
index a20d2fa88db9..da2fe5f57619 100644
--- a/net/key/af_key.c
+++ b/net/key/af_key.c
@@ -41,9 +41,7 @@ struct netns_pfkey {
 	struct hlist_head table;
 	atomic_t socks_nr;
 };
-static DECLARE_WAIT_QUEUE_HEAD(pfkey_table_wait);
-static DEFINE_RWLOCK(pfkey_table_lock);
-static atomic_t pfkey_table_users = ATOMIC_INIT(0);
+static DEFINE_MUTEX(pfkey_mutex);
 
 struct pfkey_sock {
 	/* struct sock must be the first member of struct pfkey_sock */
@@ -108,50 +106,6 @@ static void pfkey_sock_destruct(struct sock *sk)
 	atomic_dec(&net_pfkey->socks_nr);
 }
 
-static void pfkey_table_grab(void)
-{
-	write_lock_bh(&pfkey_table_lock);
-
-	if (atomic_read(&pfkey_table_users)) {
-		DECLARE_WAITQUEUE(wait, current);
-
-		add_wait_queue_exclusive(&pfkey_table_wait, &wait);
-		for(;;) {
-			set_current_state(TASK_UNINTERRUPTIBLE);
-			if (atomic_read(&pfkey_table_users) == 0)
-				break;
-			write_unlock_bh(&pfkey_table_lock);
-			schedule();
-			write_lock_bh(&pfkey_table_lock);
-		}
-
-		__set_current_state(TASK_RUNNING);
-		remove_wait_queue(&pfkey_table_wait, &wait);
-	}
-}
-
-static __inline__ void pfkey_table_ungrab(void)
-{
-	write_unlock_bh(&pfkey_table_lock);
-	wake_up(&pfkey_table_wait);
-}
-
-static __inline__ void pfkey_lock_table(void)
-{
-	/* read_lock() synchronizes us to pfkey_table_grab */
-
-	read_lock(&pfkey_table_lock);
-	atomic_inc(&pfkey_table_users);
-	read_unlock(&pfkey_table_lock);
-}
-
-static __inline__ void pfkey_unlock_table(void)
-{
-	if (atomic_dec_and_test(&pfkey_table_users))
-		wake_up(&pfkey_table_wait);
-}
-
-
 static const struct proto_ops pfkey_ops;
 
 static void pfkey_insert(struct sock *sk)
@@ -159,16 +113,16 @@ static void pfkey_insert(struct sock *sk)
 	struct net *net = sock_net(sk);
 	struct netns_pfkey *net_pfkey = net_generic(net, pfkey_net_id);
 
-	pfkey_table_grab();
-	sk_add_node(sk, &net_pfkey->table);
-	pfkey_table_ungrab();
+	mutex_lock(&pfkey_mutex);
+	sk_add_node_rcu(sk, &net_pfkey->table);
+	mutex_unlock(&pfkey_mutex);
 }
 
 static void pfkey_remove(struct sock *sk)
 {
-	pfkey_table_grab();
-	sk_del_node_init(sk);
-	pfkey_table_ungrab();
+	mutex_lock(&pfkey_mutex);
+	sk_del_node_init_rcu(sk);
+	mutex_unlock(&pfkey_mutex);
 }
 
 static struct proto key_proto = {
@@ -223,6 +177,8 @@ static int pfkey_release(struct socket *sock)
 	sock_orphan(sk);
 	sock->sk = NULL;
 	skb_queue_purge(&sk->sk_write_queue);
+
+	synchronize_rcu();
 	sock_put(sk);
 
 	return 0;
@@ -277,8 +233,8 @@ static int pfkey_broadcast(struct sk_buff *skb, gfp_t allocation,
 	if (!skb)
 		return -ENOMEM;
 
-	pfkey_lock_table();
-	sk_for_each(sk, node, &net_pfkey->table) {
+	rcu_read_lock();
+	sk_for_each_rcu(sk, node, &net_pfkey->table) {
 		struct pfkey_sock *pfk = pfkey_sk(sk);
 		int err2;
 
@@ -309,7 +265,7 @@ static int pfkey_broadcast(struct sk_buff *skb, gfp_t allocation,
 		if ((broadcast_flags & BROADCAST_REGISTERED) && err)
 			err = err2;
 	}
-	pfkey_unlock_table();
+	rcu_read_unlock();
 
 	if (one_sk != NULL)
 		err = pfkey_broadcast_one(skb, &skb2, allocation, one_sk);
@@ -3702,8 +3658,8 @@ static void *pfkey_seq_start(struct seq_file *f, loff_t *ppos)
 	struct net *net = seq_file_net(f);
 	struct netns_pfkey *net_pfkey = net_generic(net, pfkey_net_id);
 
-	read_lock(&pfkey_table_lock);
-	return seq_hlist_start_head(&net_pfkey->table, *ppos);
+	rcu_read_lock();
+	return seq_hlist_start_head_rcu(&net_pfkey->table, *ppos);
 }
 
 static void *pfkey_seq_next(struct seq_file *f, void *v, loff_t *ppos)
@@ -3711,12 +3667,12 @@ static void *pfkey_seq_next(struct seq_file *f, void *v, loff_t *ppos)
 	struct net *net = seq_file_net(f);
 	struct netns_pfkey *net_pfkey = net_generic(net, pfkey_net_id);
 
-	return seq_hlist_next(v, &net_pfkey->table, ppos);
+	return seq_hlist_next_rcu(v, &net_pfkey->table, ppos);
 }
 
 static void pfkey_seq_stop(struct seq_file *f, void *v)
 {
-	read_unlock(&pfkey_table_lock);
+	rcu_read_unlock();
 }
 
 static const struct seq_operations pfkey_seq_ops = {
-- 
cgit v1.2.2


From bd55775c8dd656fc69b3a42a1c4ab32abb7e8af9 Mon Sep 17 00:00:00 2001
From: Jamal Hadi Salim <hadi@cyberus.ca>
Date: Mon, 22 Feb 2010 16:20:22 -0800
Subject: xfrm: SA lookups signature with mark

pass mark to all SA lookups to prepare them for when we add code
to have them search.

Signed-off-by: Jamal Hadi Salim <hadi@cyberus.ca>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/pktgen.c      |  3 ++-
 net/ipv4/ah4.c         |  2 +-
 net/ipv4/esp4.c        |  2 +-
 net/ipv4/ipcomp.c      |  6 ++++--
 net/ipv6/ah6.c         |  2 +-
 net/ipv6/esp6.c        |  2 +-
 net/ipv6/ipcomp6.c     |  6 ++++--
 net/ipv6/xfrm6_input.c |  2 +-
 net/key/af_key.c       | 14 ++++++------
 net/xfrm/xfrm_input.c  |  2 +-
 net/xfrm/xfrm_state.c  | 58 +++++++++++++++++++++++++++++---------------------
 net/xfrm/xfrm_user.c   | 17 +++++++++------
 12 files changed, 68 insertions(+), 48 deletions(-)

(limited to 'net')

diff --git a/net/core/pktgen.c b/net/core/pktgen.c
index 2e692afdc55d..43923811bd6a 100644
--- a/net/core/pktgen.c
+++ b/net/core/pktgen.c
@@ -2188,12 +2188,13 @@ static inline int f_pick(struct pktgen_dev *pkt_dev)
 /* If there was already an IPSEC SA, we keep it as is, else
  * we go look for it ...
 */
+#define DUMMY_MARK 0
 static void get_ipsec_sa(struct pktgen_dev *pkt_dev, int flow)
 {
 	struct xfrm_state *x = pkt_dev->flows[flow].x;
 	if (!x) {
 		/*slow path: we dont already have xfrm_state*/
-		x = xfrm_stateonly_find(&init_net,
+		x = xfrm_stateonly_find(&init_net, DUMMY_MARK,
 					(xfrm_address_t *)&pkt_dev->cur_daddr,
 					(xfrm_address_t *)&pkt_dev->cur_saddr,
 					AF_INET,
diff --git a/net/ipv4/ah4.c b/net/ipv4/ah4.c
index 7ed3e4ae93ae..987b47dc69ad 100644
--- a/net/ipv4/ah4.c
+++ b/net/ipv4/ah4.c
@@ -393,7 +393,7 @@ static void ah4_err(struct sk_buff *skb, u32 info)
 	    icmp_hdr(skb)->code != ICMP_FRAG_NEEDED)
 		return;
 
-	x = xfrm_state_lookup(net, (xfrm_address_t *)&iph->daddr, ah->spi, IPPROTO_AH, AF_INET);
+	x = xfrm_state_lookup(net, skb->mark, (xfrm_address_t *)&iph->daddr, ah->spi, IPPROTO_AH, AF_INET);
 	if (!x)
 		return;
 	printk(KERN_DEBUG "pmtu discovery on SA AH/%08x/%08x\n",
diff --git a/net/ipv4/esp4.c b/net/ipv4/esp4.c
index 1948895beb6d..14ca1f1c3fb0 100644
--- a/net/ipv4/esp4.c
+++ b/net/ipv4/esp4.c
@@ -422,7 +422,7 @@ static void esp4_err(struct sk_buff *skb, u32 info)
 	    icmp_hdr(skb)->code != ICMP_FRAG_NEEDED)
 		return;
 
-	x = xfrm_state_lookup(net, (xfrm_address_t *)&iph->daddr, esph->spi, IPPROTO_ESP, AF_INET);
+	x = xfrm_state_lookup(net, skb->mark, (xfrm_address_t *)&iph->daddr, esph->spi, IPPROTO_ESP, AF_INET);
 	if (!x)
 		return;
 	NETDEBUG(KERN_DEBUG "pmtu discovery on SA ESP/%08x/%08x\n",
diff --git a/net/ipv4/ipcomp.c b/net/ipv4/ipcomp.c
index 83ed71500898..629067571f02 100644
--- a/net/ipv4/ipcomp.c
+++ b/net/ipv4/ipcomp.c
@@ -36,7 +36,7 @@ static void ipcomp4_err(struct sk_buff *skb, u32 info)
 		return;
 
 	spi = htonl(ntohs(ipch->cpi));
-	x = xfrm_state_lookup(net, (xfrm_address_t *)&iph->daddr,
+	x = xfrm_state_lookup(net, skb->mark, (xfrm_address_t *)&iph->daddr,
 			      spi, IPPROTO_COMP, AF_INET);
 	if (!x)
 		return;
@@ -63,6 +63,7 @@ static struct xfrm_state *ipcomp_tunnel_create(struct xfrm_state *x)
 	t->props.mode = x->props.mode;
 	t->props.saddr.a4 = x->props.saddr.a4;
 	t->props.flags = x->props.flags;
+	memcpy(&t->mark, &x->mark, sizeof(t->mark));
 
 	if (xfrm_init_state(t))
 		goto error;
@@ -87,8 +88,9 @@ static int ipcomp_tunnel_attach(struct xfrm_state *x)
 	struct net *net = xs_net(x);
 	int err = 0;
 	struct xfrm_state *t;
+	u32 mark = x->mark.v & x->mark.m;
 
-	t = xfrm_state_lookup(net, (xfrm_address_t *)&x->id.daddr.a4,
+	t = xfrm_state_lookup(net, mark, (xfrm_address_t *)&x->id.daddr.a4,
 			      x->props.saddr.a4, IPPROTO_IPIP, AF_INET);
 	if (!t) {
 		t = ipcomp_tunnel_create(x);
diff --git a/net/ipv6/ah6.c b/net/ipv6/ah6.c
index c2f300c314be..5ac89025f9de 100644
--- a/net/ipv6/ah6.c
+++ b/net/ipv6/ah6.c
@@ -614,7 +614,7 @@ static void ah6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
 	    type != ICMPV6_PKT_TOOBIG)
 		return;
 
-	x = xfrm_state_lookup(net, (xfrm_address_t *)&iph->daddr, ah->spi, IPPROTO_AH, AF_INET6);
+	x = xfrm_state_lookup(net, skb->mark, (xfrm_address_t *)&iph->daddr, ah->spi, IPPROTO_AH, AF_INET6);
 	if (!x)
 		return;
 
diff --git a/net/ipv6/esp6.c b/net/ipv6/esp6.c
index 668a46b655e6..ee9b93bdd6a2 100644
--- a/net/ipv6/esp6.c
+++ b/net/ipv6/esp6.c
@@ -365,7 +365,7 @@ static void esp6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
 	    type != ICMPV6_PKT_TOOBIG)
 		return;
 
-	x = xfrm_state_lookup(net, (xfrm_address_t *)&iph->daddr, esph->spi, IPPROTO_ESP, AF_INET6);
+	x = xfrm_state_lookup(net, skb->mark, (xfrm_address_t *)&iph->daddr, esph->spi, IPPROTO_ESP, AF_INET6);
 	if (!x)
 		return;
 	printk(KERN_DEBUG "pmtu discovery on SA ESP/%08x/%pI6\n",
diff --git a/net/ipv6/ipcomp6.c b/net/ipv6/ipcomp6.c
index bb42f39c1db8..85cccd6ed0b7 100644
--- a/net/ipv6/ipcomp6.c
+++ b/net/ipv6/ipcomp6.c
@@ -64,7 +64,7 @@ static void ipcomp6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
 		return;
 
 	spi = htonl(ntohs(ipcomph->cpi));
-	x = xfrm_state_lookup(net, (xfrm_address_t *)&iph->daddr, spi, IPPROTO_COMP, AF_INET6);
+	x = xfrm_state_lookup(net, skb->mark, (xfrm_address_t *)&iph->daddr, spi, IPPROTO_COMP, AF_INET6);
 	if (!x)
 		return;
 
@@ -92,6 +92,7 @@ static struct xfrm_state *ipcomp6_tunnel_create(struct xfrm_state *x)
 	t->props.family = AF_INET6;
 	t->props.mode = x->props.mode;
 	memcpy(t->props.saddr.a6, x->props.saddr.a6, sizeof(struct in6_addr));
+	memcpy(&t->mark, &x->mark, sizeof(t->mark));
 
 	if (xfrm_init_state(t))
 		goto error;
@@ -114,10 +115,11 @@ static int ipcomp6_tunnel_attach(struct xfrm_state *x)
 	int err = 0;
 	struct xfrm_state *t = NULL;
 	__be32 spi;
+	u32 mark = x->mark.m & x->mark.v;
 
 	spi = xfrm6_tunnel_spi_lookup(net, (xfrm_address_t *)&x->props.saddr);
 	if (spi)
-		t = xfrm_state_lookup(net, (xfrm_address_t *)&x->id.daddr,
+		t = xfrm_state_lookup(net, mark, (xfrm_address_t *)&x->id.daddr,
 					      spi, IPPROTO_IPV6, AF_INET6);
 	if (!t) {
 		t = ipcomp6_tunnel_create(x);
diff --git a/net/ipv6/xfrm6_input.c b/net/ipv6/xfrm6_input.c
index 9084582d236b..2bc98ede1235 100644
--- a/net/ipv6/xfrm6_input.c
+++ b/net/ipv6/xfrm6_input.c
@@ -101,7 +101,7 @@ int xfrm6_input_addr(struct sk_buff *skb, xfrm_address_t *daddr,
 			break;
 		}
 
-		x = xfrm_state_lookup_byaddr(net, dst, src, proto, AF_INET6);
+		x = xfrm_state_lookup_byaddr(net, skb->mark, dst, src, proto, AF_INET6);
 		if (!x)
 			continue;
 
diff --git a/net/key/af_key.c b/net/key/af_key.c
index da2fe5f57619..aae3cd86ccd7 100644
--- a/net/key/af_key.c
+++ b/net/key/af_key.c
@@ -43,6 +43,8 @@ struct netns_pfkey {
 };
 static DEFINE_MUTEX(pfkey_mutex);
 
+#define DUMMY_MARK 0
+static struct xfrm_mark dummy_mark = {0, 0};
 struct pfkey_sock {
 	/* struct sock must be the first member of struct pfkey_sock */
 	struct sock	sk;
@@ -647,7 +649,7 @@ static struct  xfrm_state *pfkey_xfrm_state_lookup(struct net *net, struct sadb_
 	if (!xaddr)
 		return NULL;
 
-	return xfrm_state_lookup(net, xaddr, sa->sadb_sa_spi, proto, family);
+	return xfrm_state_lookup(net, DUMMY_MARK, xaddr, sa->sadb_sa_spi, proto, family);
 }
 
 #define PFKEY_ALIGN8(a) (1 + (((a) - 1) | (8 - 1)))
@@ -1316,7 +1318,7 @@ static int pfkey_getspi(struct sock *sk, struct sk_buff *skb, struct sadb_msg *h
 	}
 
 	if (hdr->sadb_msg_seq) {
-		x = xfrm_find_acq_byseq(net, hdr->sadb_msg_seq);
+		x = xfrm_find_acq_byseq(net, DUMMY_MARK, hdr->sadb_msg_seq);
 		if (x && xfrm_addr_cmp(&x->id.daddr, xdaddr, family)) {
 			xfrm_state_put(x);
 			x = NULL;
@@ -1324,7 +1326,7 @@ static int pfkey_getspi(struct sock *sk, struct sk_buff *skb, struct sadb_msg *h
 	}
 
 	if (!x)
-		x = xfrm_find_acq(net, mode, reqid, proto, xdaddr, xsaddr, 1, family);
+		x = xfrm_find_acq(net, &dummy_mark, mode, reqid, proto, xdaddr, xsaddr, 1, family);
 
 	if (x == NULL)
 		return -ENOENT;
@@ -1373,7 +1375,7 @@ static int pfkey_acquire(struct sock *sk, struct sk_buff *skb, struct sadb_msg *
 	if (hdr->sadb_msg_seq == 0 || hdr->sadb_msg_errno == 0)
 		return 0;
 
-	x = xfrm_find_acq_byseq(net, hdr->sadb_msg_seq);
+	x = xfrm_find_acq_byseq(net, DUMMY_MARK, hdr->sadb_msg_seq);
 	if (x == NULL)
 		return 0;
 
@@ -2572,8 +2574,8 @@ static int pfkey_spdget(struct sock *sk, struct sk_buff *skb, struct sadb_msg *h
 		return -EINVAL;
 
 	delete = (hdr->sadb_msg_type == SADB_X_SPDDELETE2);
-	xp = xfrm_policy_byid(net, XFRM_POLICY_TYPE_MAIN, dir,
-			      pol->sadb_x_policy_id, delete, &err);
+	xp = xfrm_policy_byid(net, XFRM_POLICY_TYPE_MAIN,
+			      dir, pol->sadb_x_policy_id, delete, &err);
 	if (xp == NULL)
 		return -ENOENT;
 
diff --git a/net/xfrm/xfrm_input.c b/net/xfrm/xfrm_input.c
index e0009c17d809..45f1c98d4fce 100644
--- a/net/xfrm/xfrm_input.c
+++ b/net/xfrm/xfrm_input.c
@@ -152,7 +152,7 @@ int xfrm_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type)
 			goto drop;
 		}
 
-		x = xfrm_state_lookup(net, daddr, spi, nexthdr, family);
+		x = xfrm_state_lookup(net, skb->mark, daddr, spi, nexthdr, family);
 		if (x == NULL) {
 			XFRM_INC_STATS(net, LINUX_MIB_XFRMINNOSTATES);
 			xfrm_audit_state_notfound(skb, family, spi, seq);
diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c
index 9fa3322b2a7d..9f8530356b86 100644
--- a/net/xfrm/xfrm_state.c
+++ b/net/xfrm/xfrm_state.c
@@ -669,7 +669,7 @@ xfrm_init_tempsel(struct xfrm_state *x, struct flowi *fl,
 	return 0;
 }
 
-static struct xfrm_state *__xfrm_state_lookup(struct net *net, xfrm_address_t *daddr, __be32 spi, u8 proto, unsigned short family)
+static struct xfrm_state *__xfrm_state_lookup(struct net *net, u32 mark, xfrm_address_t *daddr, __be32 spi, u8 proto, unsigned short family)
 {
 	unsigned int h = xfrm_spi_hash(net, daddr, spi, proto, family);
 	struct xfrm_state *x;
@@ -689,7 +689,7 @@ static struct xfrm_state *__xfrm_state_lookup(struct net *net, xfrm_address_t *d
 	return NULL;
 }
 
-static struct xfrm_state *__xfrm_state_lookup_byaddr(struct net *net, xfrm_address_t *daddr, xfrm_address_t *saddr, u8 proto, unsigned short family)
+static struct xfrm_state *__xfrm_state_lookup_byaddr(struct net *net, u32 mark, xfrm_address_t *daddr, xfrm_address_t *saddr, u8 proto, unsigned short family)
 {
 	unsigned int h = xfrm_src_hash(net, daddr, saddr, family);
 	struct xfrm_state *x;
@@ -713,12 +713,14 @@ static inline struct xfrm_state *
 __xfrm_state_locate(struct xfrm_state *x, int use_spi, int family)
 {
 	struct net *net = xs_net(x);
+	u32 mark = x->mark.v & x->mark.m;
 
 	if (use_spi)
-		return __xfrm_state_lookup(net, &x->id.daddr, x->id.spi,
-					   x->id.proto, family);
+		return __xfrm_state_lookup(net, mark, &x->id.daddr,
+					   x->id.spi, x->id.proto, family);
 	else
-		return __xfrm_state_lookup_byaddr(net, &x->id.daddr,
+		return __xfrm_state_lookup_byaddr(net, mark,
+						  &x->id.daddr,
 						  &x->props.saddr,
 						  x->id.proto, family);
 }
@@ -783,6 +785,7 @@ xfrm_state_find(xfrm_address_t *daddr, xfrm_address_t *saddr,
 	int acquire_in_progress = 0;
 	int error = 0;
 	struct xfrm_state *best = NULL;
+	u32 mark = pol->mark.v & pol->mark.m;
 
 	to_put = NULL;
 
@@ -819,7 +822,7 @@ found:
 	x = best;
 	if (!x && !error && !acquire_in_progress) {
 		if (tmpl->id.spi &&
-		    (x0 = __xfrm_state_lookup(net, daddr, tmpl->id.spi,
+		    (x0 = __xfrm_state_lookup(net, mark, daddr, tmpl->id.spi,
 					      tmpl->id.proto, family)) != NULL) {
 			to_put = x0;
 			error = -EEXIST;
@@ -833,6 +836,7 @@ found:
 		/* Initialize temporary selector matching only
 		 * to current session. */
 		xfrm_init_tempsel(x, fl, tmpl, daddr, saddr, family);
+		memcpy(&x->mark, &pol->mark, sizeof(x->mark));
 
 		error = security_xfrm_state_alloc_acquire(x, pol->security, fl->secid);
 		if (error) {
@@ -875,7 +879,7 @@ out:
 }
 
 struct xfrm_state *
-xfrm_stateonly_find(struct net *net,
+xfrm_stateonly_find(struct net *net, u32 mark,
 		    xfrm_address_t *daddr, xfrm_address_t *saddr,
 		    unsigned short family, u8 mode, u8 proto, u32 reqid)
 {
@@ -971,7 +975,7 @@ void xfrm_state_insert(struct xfrm_state *x)
 EXPORT_SYMBOL(xfrm_state_insert);
 
 /* xfrm_state_lock is held */
-static struct xfrm_state *__find_acq_core(struct net *net, unsigned short family, u8 mode, u32 reqid, u8 proto, xfrm_address_t *daddr, xfrm_address_t *saddr, int create)
+static struct xfrm_state *__find_acq_core(struct net *net, struct xfrm_mark *m, unsigned short family, u8 mode, u32 reqid, u8 proto, xfrm_address_t *daddr, xfrm_address_t *saddr, int create)
 {
 	unsigned int h = xfrm_dst_hash(net, daddr, saddr, reqid, family);
 	struct hlist_node *entry;
@@ -1026,6 +1030,8 @@ static struct xfrm_state *__find_acq_core(struct net *net, unsigned short family
 		x->props.family = family;
 		x->props.mode = mode;
 		x->props.reqid = reqid;
+		x->mark.v = m->v;
+		x->mark.m = m->m;
 		x->lft.hard_add_expires_seconds = net->xfrm.sysctl_acq_expires;
 		xfrm_state_hold(x);
 		tasklet_hrtimer_start(&x->mtimer, ktime_set(net->xfrm.sysctl_acq_expires, 0), HRTIMER_MODE_REL);
@@ -1042,7 +1048,7 @@ static struct xfrm_state *__find_acq_core(struct net *net, unsigned short family
 	return x;
 }
 
-static struct xfrm_state *__xfrm_find_acq_byseq(struct net *net, u32 seq);
+static struct xfrm_state *__xfrm_find_acq_byseq(struct net *net, u32 mark, u32 seq);
 
 int xfrm_state_add(struct xfrm_state *x)
 {
@@ -1050,6 +1056,7 @@ int xfrm_state_add(struct xfrm_state *x)
 	struct xfrm_state *x1, *to_put;
 	int family;
 	int err;
+	u32 mark = x->mark.v & x->mark.m;
 	int use_spi = xfrm_id_proto_match(x->id.proto, IPSEC_PROTO_ANY);
 
 	family = x->props.family;
@@ -1067,7 +1074,7 @@ int xfrm_state_add(struct xfrm_state *x)
 	}
 
 	if (use_spi && x->km.seq) {
-		x1 = __xfrm_find_acq_byseq(net, x->km.seq);
+		x1 = __xfrm_find_acq_byseq(net, mark, x->km.seq);
 		if (x1 && ((x1->id.proto != x->id.proto) ||
 		    xfrm_addr_cmp(&x1->id.daddr, &x->id.daddr, family))) {
 			to_put = x1;
@@ -1076,8 +1083,8 @@ int xfrm_state_add(struct xfrm_state *x)
 	}
 
 	if (use_spi && !x1)
-		x1 = __find_acq_core(net, family, x->props.mode, x->props.reqid,
-				     x->id.proto,
+		x1 = __find_acq_core(net, &x->mark, family, x->props.mode,
+				     x->props.reqid, x->id.proto,
 				     &x->id.daddr, &x->props.saddr, 0);
 
 	__xfrm_state_bump_genids(x);
@@ -1151,6 +1158,8 @@ static struct xfrm_state *xfrm_state_clone(struct xfrm_state *orig, int *errp)
 			goto error;
 	}
 
+	memcpy(&x->mark, &orig->mark, sizeof(x->mark));
+
 	err = xfrm_init_state(x);
 	if (err)
 		goto error;
@@ -1342,41 +1351,41 @@ int xfrm_state_check_expire(struct xfrm_state *x)
 EXPORT_SYMBOL(xfrm_state_check_expire);
 
 struct xfrm_state *
-xfrm_state_lookup(struct net *net, xfrm_address_t *daddr, __be32 spi, u8 proto,
-		  unsigned short family)
+xfrm_state_lookup(struct net *net, u32 mark, xfrm_address_t *daddr, __be32 spi,
+		  u8 proto, unsigned short family)
 {
 	struct xfrm_state *x;
 
 	spin_lock_bh(&xfrm_state_lock);
-	x = __xfrm_state_lookup(net, daddr, spi, proto, family);
+	x = __xfrm_state_lookup(net, mark, daddr, spi, proto, family);
 	spin_unlock_bh(&xfrm_state_lock);
 	return x;
 }
 EXPORT_SYMBOL(xfrm_state_lookup);
 
 struct xfrm_state *
-xfrm_state_lookup_byaddr(struct net *net,
+xfrm_state_lookup_byaddr(struct net *net, u32 mark,
 			 xfrm_address_t *daddr, xfrm_address_t *saddr,
 			 u8 proto, unsigned short family)
 {
 	struct xfrm_state *x;
 
 	spin_lock_bh(&xfrm_state_lock);
-	x = __xfrm_state_lookup_byaddr(net, daddr, saddr, proto, family);
+	x = __xfrm_state_lookup_byaddr(net, mark, daddr, saddr, proto, family);
 	spin_unlock_bh(&xfrm_state_lock);
 	return x;
 }
 EXPORT_SYMBOL(xfrm_state_lookup_byaddr);
 
 struct xfrm_state *
-xfrm_find_acq(struct net *net, u8 mode, u32 reqid, u8 proto,
+xfrm_find_acq(struct net *net, struct xfrm_mark *mark, u8 mode, u32 reqid, u8 proto,
 	      xfrm_address_t *daddr, xfrm_address_t *saddr,
 	      int create, unsigned short family)
 {
 	struct xfrm_state *x;
 
 	spin_lock_bh(&xfrm_state_lock);
-	x = __find_acq_core(net, family, mode, reqid, proto, daddr, saddr, create);
+	x = __find_acq_core(net, mark, family, mode, reqid, proto, daddr, saddr, create);
 	spin_unlock_bh(&xfrm_state_lock);
 
 	return x;
@@ -1423,7 +1432,7 @@ EXPORT_SYMBOL(xfrm_state_sort);
 
 /* Silly enough, but I'm lazy to build resolution list */
 
-static struct xfrm_state *__xfrm_find_acq_byseq(struct net *net, u32 seq)
+static struct xfrm_state *__xfrm_find_acq_byseq(struct net *net, u32 mark, u32 seq)
 {
 	int i;
 
@@ -1442,12 +1451,12 @@ static struct xfrm_state *__xfrm_find_acq_byseq(struct net *net, u32 seq)
 	return NULL;
 }
 
-struct xfrm_state *xfrm_find_acq_byseq(struct net *net, u32 seq)
+struct xfrm_state *xfrm_find_acq_byseq(struct net *net, u32 mark, u32 seq)
 {
 	struct xfrm_state *x;
 
 	spin_lock_bh(&xfrm_state_lock);
-	x = __xfrm_find_acq_byseq(net, seq);
+	x = __xfrm_find_acq_byseq(net, mark, seq);
 	spin_unlock_bh(&xfrm_state_lock);
 	return x;
 }
@@ -1474,6 +1483,7 @@ int xfrm_alloc_spi(struct xfrm_state *x, u32 low, u32 high)
 	int err = -ENOENT;
 	__be32 minspi = htonl(low);
 	__be32 maxspi = htonl(high);
+	u32 mark = x->mark.v & x->mark.m;
 
 	spin_lock_bh(&x->lock);
 	if (x->km.state == XFRM_STATE_DEAD)
@@ -1486,7 +1496,7 @@ int xfrm_alloc_spi(struct xfrm_state *x, u32 low, u32 high)
 	err = -ENOENT;
 
 	if (minspi == maxspi) {
-		x0 = xfrm_state_lookup(net, &x->id.daddr, minspi, x->id.proto, x->props.family);
+		x0 = xfrm_state_lookup(net, mark, &x->id.daddr, minspi, x->id.proto, x->props.family);
 		if (x0) {
 			xfrm_state_put(x0);
 			goto unlock;
@@ -1496,7 +1506,7 @@ int xfrm_alloc_spi(struct xfrm_state *x, u32 low, u32 high)
 		u32 spi = 0;
 		for (h=0; h<high-low+1; h++) {
 			spi = low + net_random()%(high-low+1);
-			x0 = xfrm_state_lookup(net, &x->id.daddr, htonl(spi), x->id.proto, x->props.family);
+			x0 = xfrm_state_lookup(net, mark, &x->id.daddr, htonl(spi), x->id.proto, x->props.family);
 			if (x0 == NULL) {
 				x->id.spi = htonl(spi);
 				break;
diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c
index ee04e6bf0e54..331ae731080a 100644
--- a/net/xfrm/xfrm_user.c
+++ b/net/xfrm/xfrm_user.c
@@ -31,6 +31,9 @@
 #include <linux/in6.h>
 #endif
 
+#define DUMMY_MARK 0
+static struct xfrm_mark dummy_mark = {0, 0};
+
 static inline int aead_len(struct xfrm_algo_aead *alg)
 {
 	return sizeof(*alg) + ((alg->alg_key_len + 7) / 8);
@@ -530,7 +533,7 @@ static struct xfrm_state *xfrm_user_state_lookup(struct net *net,
 
 	if (xfrm_id_proto_match(p->proto, IPSEC_PROTO_ANY)) {
 		err = -ESRCH;
-		x = xfrm_state_lookup(net, &p->daddr, p->spi, p->proto, p->family);
+		x = xfrm_state_lookup(net, DUMMY_MARK, &p->daddr, p->spi, p->proto, p->family);
 	} else {
 		xfrm_address_t *saddr = NULL;
 
@@ -541,7 +544,7 @@ static struct xfrm_state *xfrm_user_state_lookup(struct net *net,
 		}
 
 		err = -ESRCH;
-		x = xfrm_state_lookup_byaddr(net, &p->daddr, saddr,
+		x = xfrm_state_lookup_byaddr(net, DUMMY_MARK, &p->daddr, saddr,
 					     p->proto, p->family);
 	}
 
@@ -958,7 +961,7 @@ static int xfrm_alloc_userspi(struct sk_buff *skb, struct nlmsghdr *nlh,
 
 	x = NULL;
 	if (p->info.seq) {
-		x = xfrm_find_acq_byseq(net, p->info.seq);
+		x = xfrm_find_acq_byseq(net, DUMMY_MARK, p->info.seq);
 		if (x && xfrm_addr_cmp(&x->id.daddr, daddr, family)) {
 			xfrm_state_put(x);
 			x = NULL;
@@ -966,7 +969,7 @@ static int xfrm_alloc_userspi(struct sk_buff *skb, struct nlmsghdr *nlh,
 	}
 
 	if (!x)
-		x = xfrm_find_acq(net, p->info.mode, p->info.reqid,
+		x = xfrm_find_acq(net, &dummy_mark, p->info.mode, p->info.reqid,
 				  p->info.id.proto, daddr,
 				  &p->info.saddr, 1,
 				  family);
@@ -1598,7 +1601,7 @@ static int xfrm_get_ae(struct sk_buff *skb, struct nlmsghdr *nlh,
 	if (r_skb == NULL)
 		return -ENOMEM;
 
-	x = xfrm_state_lookup(net, &id->daddr, id->spi, id->proto, id->family);
+	x = xfrm_state_lookup(net, DUMMY_MARK, &id->daddr, id->spi, id->proto, id->family);
 	if (x == NULL) {
 		kfree_skb(r_skb);
 		return -ESRCH;
@@ -1640,7 +1643,7 @@ static int xfrm_new_ae(struct sk_buff *skb, struct nlmsghdr *nlh,
 	if (!(nlh->nlmsg_flags&NLM_F_REPLACE))
 		return err;
 
-	x = xfrm_state_lookup(net, &p->sa_id.daddr, p->sa_id.spi, p->sa_id.proto, p->sa_id.family);
+	x = xfrm_state_lookup(net, DUMMY_MARK, &p->sa_id.daddr, p->sa_id.spi, p->sa_id.proto, p->sa_id.family);
 	if (x == NULL)
 		return -ESRCH;
 
@@ -1767,7 +1770,7 @@ static int xfrm_add_sa_expire(struct sk_buff *skb, struct nlmsghdr *nlh,
 	struct xfrm_user_expire *ue = nlmsg_data(nlh);
 	struct xfrm_usersa_info *p = &ue->state;
 
-	x = xfrm_state_lookup(net, &p->id.daddr, p->id.spi, p->id.proto, p->family);
+	x = xfrm_state_lookup(net, DUMMY_MARK, &p->id.daddr, p->id.spi, p->id.proto, p->family);
 
 	err = -ENOENT;
 	if (x == NULL)
-- 
cgit v1.2.2


From 3d6acfa7641fd0a35f608b142f61e79f7ed8db43 Mon Sep 17 00:00:00 2001
From: Jamal Hadi Salim <hadi@cyberus.ca>
Date: Mon, 22 Feb 2010 11:32:56 +0000
Subject: xfrm: SA lookups with mark

Allow mark to be added to the SA lookup

Signed-off-by: Jamal Hadi Salim <hadi@cyberus.ca>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/xfrm/xfrm_state.c | 12 ++++++++++++
 1 file changed, 12 insertions(+)

(limited to 'net')

diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c
index 9f8530356b86..17d5b96f2fc8 100644
--- a/net/xfrm/xfrm_state.c
+++ b/net/xfrm/xfrm_state.c
@@ -682,6 +682,8 @@ static struct xfrm_state *__xfrm_state_lookup(struct net *net, u32 mark, xfrm_ad
 		    xfrm_addr_cmp(&x->id.daddr, daddr, family))
 			continue;
 
+		if ((mark & x->mark.m) != x->mark.v)
+			continue;
 		xfrm_state_hold(x);
 		return x;
 	}
@@ -702,6 +704,8 @@ static struct xfrm_state *__xfrm_state_lookup_byaddr(struct net *net, u32 mark,
 		    xfrm_addr_cmp(&x->props.saddr, saddr, family))
 			continue;
 
+		if ((mark & x->mark.m) != x->mark.v)
+			continue;
 		xfrm_state_hold(x);
 		return x;
 	}
@@ -794,6 +798,7 @@ xfrm_state_find(xfrm_address_t *daddr, xfrm_address_t *saddr,
 	hlist_for_each_entry(x, entry, net->xfrm.state_bydst+h, bydst) {
 		if (x->props.family == family &&
 		    x->props.reqid == tmpl->reqid &&
+		    (mark & x->mark.m) == x->mark.v &&
 		    !(x->props.flags & XFRM_STATE_WILDRECV) &&
 		    xfrm_state_addr_check(x, daddr, saddr, family) &&
 		    tmpl->mode == x->props.mode &&
@@ -809,6 +814,7 @@ xfrm_state_find(xfrm_address_t *daddr, xfrm_address_t *saddr,
 	hlist_for_each_entry(x, entry, net->xfrm.state_bydst+h_wildcard, bydst) {
 		if (x->props.family == family &&
 		    x->props.reqid == tmpl->reqid &&
+		    (mark & x->mark.m) == x->mark.v &&
 		    !(x->props.flags & XFRM_STATE_WILDRECV) &&
 		    xfrm_state_addr_check(x, daddr, saddr, family) &&
 		    tmpl->mode == x->props.mode &&
@@ -892,6 +898,7 @@ xfrm_stateonly_find(struct net *net, u32 mark,
 	hlist_for_each_entry(x, entry, net->xfrm.state_bydst+h, bydst) {
 		if (x->props.family == family &&
 		    x->props.reqid == reqid &&
+		    (mark & x->mark.m) == x->mark.v &&
 		    !(x->props.flags & XFRM_STATE_WILDRECV) &&
 		    xfrm_state_addr_check(x, daddr, saddr, family) &&
 		    mode == x->props.mode &&
@@ -954,11 +961,13 @@ static void __xfrm_state_bump_genids(struct xfrm_state *xnew)
 	struct xfrm_state *x;
 	struct hlist_node *entry;
 	unsigned int h;
+	u32 mark = xnew->mark.v & xnew->mark.m;
 
 	h = xfrm_dst_hash(net, &xnew->id.daddr, &xnew->props.saddr, reqid, family);
 	hlist_for_each_entry(x, entry, net->xfrm.state_bydst+h, bydst) {
 		if (x->props.family	== family &&
 		    x->props.reqid	== reqid &&
+		    (mark & x->mark.m) == x->mark.v &&
 		    !xfrm_addr_cmp(&x->id.daddr, &xnew->id.daddr, family) &&
 		    !xfrm_addr_cmp(&x->props.saddr, &xnew->props.saddr, family))
 			x->genid = xfrm_state_genid;
@@ -980,6 +989,7 @@ static struct xfrm_state *__find_acq_core(struct net *net, struct xfrm_mark *m,
 	unsigned int h = xfrm_dst_hash(net, daddr, saddr, reqid, family);
 	struct hlist_node *entry;
 	struct xfrm_state *x;
+	u32 mark = m->v & m->m;
 
 	hlist_for_each_entry(x, entry, net->xfrm.state_bydst+h, bydst) {
 		if (x->props.reqid  != reqid ||
@@ -988,6 +998,7 @@ static struct xfrm_state *__find_acq_core(struct net *net, struct xfrm_mark *m,
 		    x->km.state     != XFRM_STATE_ACQ ||
 		    x->id.spi       != 0 ||
 		    x->id.proto	    != proto ||
+		    (mark & x->mark.m) != x->mark.v ||
 		    xfrm_addr_cmp(&x->id.daddr, daddr, family) ||
 		    xfrm_addr_cmp(&x->props.saddr, saddr, family))
 			continue;
@@ -1442,6 +1453,7 @@ static struct xfrm_state *__xfrm_find_acq_byseq(struct net *net, u32 mark, u32 s
 
 		hlist_for_each_entry(x, entry, net->xfrm.state_bydst+i, bydst) {
 			if (x->km.seq == seq &&
+			    (mark & x->mark.m) == x->mark.v &&
 			    x->km.state == XFRM_STATE_ACQ) {
 				xfrm_state_hold(x);
 				return x;
-- 
cgit v1.2.2


From 8ca2e93b557f2a0b35f7769038abf600177e1122 Mon Sep 17 00:00:00 2001
From: Jamal Hadi Salim <hadi@cyberus.ca>
Date: Mon, 22 Feb 2010 11:32:57 +0000
Subject: xfrm: SP lookups signature with mark

pass mark to all SP lookups to prepare them for when we add code
to have them search.

Signed-off-by: Jamal Hadi Salim <hadi@cyberus.ca>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/key/af_key.c       |  4 ++--
 net/xfrm/xfrm_policy.c |  8 ++++----
 net/xfrm/xfrm_user.c   | 10 +++++-----
 3 files changed, 11 insertions(+), 11 deletions(-)

(limited to 'net')

diff --git a/net/key/af_key.c b/net/key/af_key.c
index aae3cd86ccd7..368707882647 100644
--- a/net/key/af_key.c
+++ b/net/key/af_key.c
@@ -2326,7 +2326,7 @@ static int pfkey_spddelete(struct sock *sk, struct sk_buff *skb, struct sadb_msg
 			return err;
 	}
 
-	xp = xfrm_policy_bysel_ctx(net, XFRM_POLICY_TYPE_MAIN,
+	xp = xfrm_policy_bysel_ctx(net, DUMMY_MARK, XFRM_POLICY_TYPE_MAIN,
 				   pol->sadb_x_policy_dir - 1, &sel, pol_ctx,
 				   1, &err);
 	security_xfrm_policy_free(pol_ctx);
@@ -2574,7 +2574,7 @@ static int pfkey_spdget(struct sock *sk, struct sk_buff *skb, struct sadb_msg *h
 		return -EINVAL;
 
 	delete = (hdr->sadb_msg_type == SADB_X_SPDDELETE2);
-	xp = xfrm_policy_byid(net, XFRM_POLICY_TYPE_MAIN,
+	xp = xfrm_policy_byid(net, DUMMY_MARK, XFRM_POLICY_TYPE_MAIN,
 			      dir, pol->sadb_x_policy_id, delete, &err);
 	if (xp == NULL)
 		return -ENOENT;
diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index d6eb16d75243..e67d3ca6e657 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -635,8 +635,8 @@ int xfrm_policy_insert(int dir, struct xfrm_policy *policy, int excl)
 }
 EXPORT_SYMBOL(xfrm_policy_insert);
 
-struct xfrm_policy *xfrm_policy_bysel_ctx(struct net *net, u8 type, int dir,
-					  struct xfrm_selector *sel,
+struct xfrm_policy *xfrm_policy_bysel_ctx(struct net *net, u32 mark, u8 type,
+					  int dir, struct xfrm_selector *sel,
 					  struct xfrm_sec_ctx *ctx, int delete,
 					  int *err)
 {
@@ -676,8 +676,8 @@ struct xfrm_policy *xfrm_policy_bysel_ctx(struct net *net, u8 type, int dir,
 }
 EXPORT_SYMBOL(xfrm_policy_bysel_ctx);
 
-struct xfrm_policy *xfrm_policy_byid(struct net *net, u8 type, int dir, u32 id,
-				     int delete, int *err)
+struct xfrm_policy *xfrm_policy_byid(struct net *net, u32 mark, u8 type,
+				     int dir, u32 id, int delete, int *err)
 {
 	struct xfrm_policy *pol, *ret;
 	struct hlist_head *chain;
diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c
index 331ae731080a..02a67b4a64dd 100644
--- a/net/xfrm/xfrm_user.c
+++ b/net/xfrm/xfrm_user.c
@@ -1457,7 +1457,7 @@ static int xfrm_get_policy(struct sk_buff *skb, struct nlmsghdr *nlh,
 		return err;
 
 	if (p->index)
-		xp = xfrm_policy_byid(net, type, p->dir, p->index, delete, &err);
+		xp = xfrm_policy_byid(net, DUMMY_MARK, type, p->dir, p->index, delete, &err);
 	else {
 		struct nlattr *rt = attrs[XFRMA_SEC_CTX];
 		struct xfrm_sec_ctx *ctx;
@@ -1474,8 +1474,8 @@ static int xfrm_get_policy(struct sk_buff *skb, struct nlmsghdr *nlh,
 			if (err)
 				return err;
 		}
-		xp = xfrm_policy_bysel_ctx(net, type, p->dir, &p->sel, ctx,
-					   delete, &err);
+		xp = xfrm_policy_bysel_ctx(net, DUMMY_MARK, type, p->dir,
+					   &p->sel, ctx, delete, &err);
 		security_xfrm_policy_free(ctx);
 	}
 	if (xp == NULL)
@@ -1712,7 +1712,7 @@ static int xfrm_add_pol_expire(struct sk_buff *skb, struct nlmsghdr *nlh,
 		return err;
 
 	if (p->index)
-		xp = xfrm_policy_byid(net, type, p->dir, p->index, 0, &err);
+		xp = xfrm_policy_byid(net, DUMMY_MARK, type, p->dir, p->index, 0, &err);
 	else {
 		struct nlattr *rt = attrs[XFRMA_SEC_CTX];
 		struct xfrm_sec_ctx *ctx;
@@ -1729,7 +1729,7 @@ static int xfrm_add_pol_expire(struct sk_buff *skb, struct nlmsghdr *nlh,
 			if (err)
 				return err;
 		}
-		xp = xfrm_policy_bysel_ctx(net, type, p->dir, &p->sel, ctx, 0, &err);
+		xp = xfrm_policy_bysel_ctx(net, DUMMY_MARK, type, p->dir, &p->sel, ctx, 0, &err);
 		security_xfrm_policy_free(ctx);
 	}
 	if (xp == NULL)
-- 
cgit v1.2.2


From 34f8d8846f69f3b5bc3916ba9145e4eebae9394e Mon Sep 17 00:00:00 2001
From: Jamal Hadi Salim <hadi@cyberus.ca>
Date: Mon, 22 Feb 2010 11:32:58 +0000
Subject: xfrm: SP lookups with mark

Allow mark to be used when doing SP lookup

Signed-off-by: Jamal Hadi Salim <hadi@cyberus.ca>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/xfrm/xfrm_policy.c | 12 +++++++++++-
 1 file changed, 11 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index e67d3ca6e657..2a6e64652654 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -556,6 +556,7 @@ int xfrm_policy_insert(int dir, struct xfrm_policy *policy, int excl)
 	struct hlist_head *chain;
 	struct hlist_node *entry, *newpos;
 	struct dst_entry *gc_list;
+	u32 mark = policy->mark.v & policy->mark.m;
 
 	write_lock_bh(&xfrm_policy_lock);
 	chain = policy_hash_bysel(net, &policy->selector, policy->family, dir);
@@ -564,6 +565,7 @@ int xfrm_policy_insert(int dir, struct xfrm_policy *policy, int excl)
 	hlist_for_each_entry(pol, entry, chain, bydst) {
 		if (pol->type == policy->type &&
 		    !selector_cmp(&pol->selector, &policy->selector) &&
+		    (mark & pol->mark.m) == pol->mark.v &&
 		    xfrm_sec_ctx_match(pol->security, policy->security) &&
 		    !WARN_ON(delpol)) {
 			if (excl) {
@@ -650,6 +652,7 @@ struct xfrm_policy *xfrm_policy_bysel_ctx(struct net *net, u32 mark, u8 type,
 	ret = NULL;
 	hlist_for_each_entry(pol, entry, chain, bydst) {
 		if (pol->type == type &&
+		    (mark & pol->mark.m) == pol->mark.v &&
 		    !selector_cmp(sel, &pol->selector) &&
 		    xfrm_sec_ctx_match(ctx, pol->security)) {
 			xfrm_pol_hold(pol);
@@ -692,7 +695,8 @@ struct xfrm_policy *xfrm_policy_byid(struct net *net, u32 mark, u8 type,
 	chain = net->xfrm.policy_byidx + idx_hash(net, id);
 	ret = NULL;
 	hlist_for_each_entry(pol, entry, chain, byidx) {
-		if (pol->type == type && pol->index == id) {
+		if (pol->type == type && pol->index == id &&
+		    (mark & pol->mark.m) == pol->mark.v) {
 			xfrm_pol_hold(pol);
 			if (delete) {
 				*err = security_xfrm_policy_delete(
@@ -916,6 +920,7 @@ static int xfrm_policy_match(struct xfrm_policy *pol, struct flowi *fl,
 	int match, ret = -ESRCH;
 
 	if (pol->family != family ||
+	    (fl->mark & pol->mark.m) != pol->mark.v ||
 	    pol->type != type)
 		return ret;
 
@@ -1040,6 +1045,10 @@ static struct xfrm_policy *xfrm_sk_policy_lookup(struct sock *sk, int dir, struc
 		int err = 0;
 
 		if (match) {
+			if ((sk->sk_mark & pol->mark.m) != pol->mark.v) {
+				pol = NULL;
+				goto out;
+			}
 			err = security_xfrm_policy_lookup(pol->security,
 						      fl->secid,
 						      policy_to_flow_dir(dir));
@@ -1052,6 +1061,7 @@ static struct xfrm_policy *xfrm_sk_policy_lookup(struct sock *sk, int dir, struc
 		} else
 			pol = NULL;
 	}
+out:
 	read_unlock_bh(&xfrm_policy_lock);
 	return pol;
 }
-- 
cgit v1.2.2


From 6f26b61e177e57a41795355f6222cf817f1212dc Mon Sep 17 00:00:00 2001
From: Jamal Hadi Salim <hadi@cyberus.ca>
Date: Mon, 22 Feb 2010 11:32:59 +0000
Subject: xfrm: Allow user space config of SAD mark

Add ability for netlink userspace to manipulate the SAD
and manipulate the mark, retrieve it and get events with a defined
mark.
MIGRATE may be added later.

Signed-off-by: Jamal Hadi Salim <hadi@cyberus.ca>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/xfrm/xfrm_user.c | 72 +++++++++++++++++++++++++++++++++++++++++-----------
 1 file changed, 57 insertions(+), 15 deletions(-)

(limited to 'net')

diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c
index 02a67b4a64dd..303092f7088b 100644
--- a/net/xfrm/xfrm_user.c
+++ b/net/xfrm/xfrm_user.c
@@ -32,7 +32,6 @@
 #endif
 
 #define DUMMY_MARK 0
-static struct xfrm_mark dummy_mark = {0, 0};
 
 static inline int aead_len(struct xfrm_algo_aead *alg)
 {
@@ -449,6 +448,8 @@ static struct xfrm_state *xfrm_state_construct(struct net *net,
 			goto error;
 	}
 
+	xfrm_mark_get(attrs, &x->mark);
+
 	err = xfrm_init_state(x);
 	if (err)
 		goto error;
@@ -529,11 +530,13 @@ static struct xfrm_state *xfrm_user_state_lookup(struct net *net,
 						 int *errp)
 {
 	struct xfrm_state *x = NULL;
+	struct xfrm_mark m;
 	int err;
+	u32 mark = xfrm_mark_get(attrs, &m);
 
 	if (xfrm_id_proto_match(p->proto, IPSEC_PROTO_ANY)) {
 		err = -ESRCH;
-		x = xfrm_state_lookup(net, DUMMY_MARK, &p->daddr, p->spi, p->proto, p->family);
+		x = xfrm_state_lookup(net, mark, &p->daddr, p->spi, p->proto, p->family);
 	} else {
 		xfrm_address_t *saddr = NULL;
 
@@ -544,7 +547,8 @@ static struct xfrm_state *xfrm_user_state_lookup(struct net *net,
 		}
 
 		err = -ESRCH;
-		x = xfrm_state_lookup_byaddr(net, DUMMY_MARK, &p->daddr, saddr,
+		x = xfrm_state_lookup_byaddr(net, mark,
+					     &p->daddr, saddr,
 					     p->proto, p->family);
 	}
 
@@ -686,6 +690,9 @@ static int copy_to_user_state_extra(struct xfrm_state *x,
 	if (x->encap)
 		NLA_PUT(skb, XFRMA_ENCAP, sizeof(*x->encap), x->encap);
 
+	if (xfrm_mark_put(skb, &x->mark))
+		goto nla_put_failure;
+
 	if (x->security && copy_sec_ctx(x->security, skb) < 0)
 		goto nla_put_failure;
 
@@ -950,6 +957,8 @@ static int xfrm_alloc_userspi(struct sk_buff *skb, struct nlmsghdr *nlh,
 	xfrm_address_t *daddr;
 	int family;
 	int err;
+	u32 mark;
+	struct xfrm_mark m;
 
 	p = nlmsg_data(nlh);
 	err = verify_userspi_info(p);
@@ -960,8 +969,10 @@ static int xfrm_alloc_userspi(struct sk_buff *skb, struct nlmsghdr *nlh,
 	daddr = &p->info.id.daddr;
 
 	x = NULL;
+
+	mark = xfrm_mark_get(attrs, &m);
 	if (p->info.seq) {
-		x = xfrm_find_acq_byseq(net, DUMMY_MARK, p->info.seq);
+		x = xfrm_find_acq_byseq(net, mark, p->info.seq);
 		if (x && xfrm_addr_cmp(&x->id.daddr, daddr, family)) {
 			xfrm_state_put(x);
 			x = NULL;
@@ -969,7 +980,7 @@ static int xfrm_alloc_userspi(struct sk_buff *skb, struct nlmsghdr *nlh,
 	}
 
 	if (!x)
-		x = xfrm_find_acq(net, &dummy_mark, p->info.mode, p->info.reqid,
+		x = xfrm_find_acq(net, &m, p->info.mode, p->info.reqid,
 				  p->info.id.proto, daddr,
 				  &p->info.saddr, 1,
 				  family);
@@ -1474,8 +1485,8 @@ static int xfrm_get_policy(struct sk_buff *skb, struct nlmsghdr *nlh,
 			if (err)
 				return err;
 		}
-		xp = xfrm_policy_bysel_ctx(net, DUMMY_MARK, type, p->dir,
-					   &p->sel, ctx, delete, &err);
+		xp = xfrm_policy_bysel_ctx(net, DUMMY_MARK, type, p->dir, &p->sel,
+					   ctx, delete, &err);
 		security_xfrm_policy_free(ctx);
 	}
 	if (xp == NULL)
@@ -1547,6 +1558,7 @@ static inline size_t xfrm_aevent_msgsize(void)
 	return NLMSG_ALIGN(sizeof(struct xfrm_aevent_id))
 	       + nla_total_size(sizeof(struct xfrm_replay_state))
 	       + nla_total_size(sizeof(struct xfrm_lifetime_cur))
+	       + nla_total_size(sizeof(struct xfrm_mark))
 	       + nla_total_size(4) /* XFRM_AE_RTHR */
 	       + nla_total_size(4); /* XFRM_AE_ETHR */
 }
@@ -1579,6 +1591,9 @@ static int build_aevent(struct sk_buff *skb, struct xfrm_state *x, struct km_eve
 		NLA_PUT_U32(skb, XFRMA_ETIMER_THRESH,
 			    x->replay_maxage * 10 / HZ);
 
+	if (xfrm_mark_put(skb, &x->mark))
+		goto nla_put_failure;
+
 	return nlmsg_end(skb, nlh);
 
 nla_put_failure:
@@ -1594,6 +1609,8 @@ static int xfrm_get_ae(struct sk_buff *skb, struct nlmsghdr *nlh,
 	struct sk_buff *r_skb;
 	int err;
 	struct km_event c;
+	u32 mark;
+	struct xfrm_mark m;
 	struct xfrm_aevent_id *p = nlmsg_data(nlh);
 	struct xfrm_usersa_id *id = &p->sa_id;
 
@@ -1601,7 +1618,9 @@ static int xfrm_get_ae(struct sk_buff *skb, struct nlmsghdr *nlh,
 	if (r_skb == NULL)
 		return -ENOMEM;
 
-	x = xfrm_state_lookup(net, DUMMY_MARK, &id->daddr, id->spi, id->proto, id->family);
+	mark = xfrm_mark_get(attrs, &m);
+
+	x = xfrm_state_lookup(net, mark, &id->daddr, id->spi, id->proto, id->family);
 	if (x == NULL) {
 		kfree_skb(r_skb);
 		return -ESRCH;
@@ -1632,6 +1651,8 @@ static int xfrm_new_ae(struct sk_buff *skb, struct nlmsghdr *nlh,
 	struct xfrm_state *x;
 	struct km_event c;
 	int err = - EINVAL;
+	u32 mark = 0;
+	struct xfrm_mark m;
 	struct xfrm_aevent_id *p = nlmsg_data(nlh);
 	struct nlattr *rp = attrs[XFRMA_REPLAY_VAL];
 	struct nlattr *lt = attrs[XFRMA_LTIME_VAL];
@@ -1643,7 +1664,9 @@ static int xfrm_new_ae(struct sk_buff *skb, struct nlmsghdr *nlh,
 	if (!(nlh->nlmsg_flags&NLM_F_REPLACE))
 		return err;
 
-	x = xfrm_state_lookup(net, DUMMY_MARK, &p->sa_id.daddr, p->sa_id.spi, p->sa_id.proto, p->sa_id.family);
+	mark = xfrm_mark_get(attrs, &m);
+
+	x = xfrm_state_lookup(net, mark, &p->sa_id.daddr, p->sa_id.spi, p->sa_id.proto, p->sa_id.family);
 	if (x == NULL)
 		return -ESRCH;
 
@@ -1729,7 +1752,8 @@ static int xfrm_add_pol_expire(struct sk_buff *skb, struct nlmsghdr *nlh,
 			if (err)
 				return err;
 		}
-		xp = xfrm_policy_bysel_ctx(net, DUMMY_MARK, type, p->dir, &p->sel, ctx, 0, &err);
+		xp = xfrm_policy_bysel_ctx(net, DUMMY_MARK, type, p->dir,
+					   &p->sel, ctx, 0, &err);
 		security_xfrm_policy_free(ctx);
 	}
 	if (xp == NULL)
@@ -1769,8 +1793,10 @@ static int xfrm_add_sa_expire(struct sk_buff *skb, struct nlmsghdr *nlh,
 	int err;
 	struct xfrm_user_expire *ue = nlmsg_data(nlh);
 	struct xfrm_usersa_info *p = &ue->state;
+	struct xfrm_mark m;
+	u32 mark = xfrm_mark_get(attrs, &m);;
 
-	x = xfrm_state_lookup(net, DUMMY_MARK, &p->id.daddr, p->id.spi, p->id.proto, p->family);
+	x = xfrm_state_lookup(net, mark, &p->id.daddr, p->id.spi, p->id.proto, p->family);
 
 	err = -ENOENT;
 	if (x == NULL)
@@ -1804,6 +1830,7 @@ static int xfrm_add_acquire(struct sk_buff *skb, struct nlmsghdr *nlh,
 	struct xfrm_user_tmpl *ut;
 	int i;
 	struct nlattr *rt = attrs[XFRMA_TMPL];
+	struct xfrm_mark mark;
 
 	struct xfrm_user_acquire *ua = nlmsg_data(nlh);
 	struct xfrm_state *x = xfrm_state_alloc(net);
@@ -1812,6 +1839,8 @@ static int xfrm_add_acquire(struct sk_buff *skb, struct nlmsghdr *nlh,
 	if (!x)
 		goto nomem;
 
+	xfrm_mark_get(attrs, &mark);
+
 	err = verify_newpolicy_info(&ua->policy);
 	if (err)
 		goto bad_policy;
@@ -1824,7 +1853,8 @@ static int xfrm_add_acquire(struct sk_buff *skb, struct nlmsghdr *nlh,
 	memcpy(&x->id, &ua->id, sizeof(ua->id));
 	memcpy(&x->props.saddr, &ua->saddr, sizeof(ua->saddr));
 	memcpy(&x->sel, &ua->sel, sizeof(ua->sel));
-
+	xp->mark.m = x->mark.m = mark.m;
+	xp->mark.v = x->mark.v = mark.v;
 	ut = nla_data(rt);
 	/* extract the templates and for each call km_key */
 	for (i = 0; i < xp->xfrm_nr; i++, ut++) {
@@ -2084,6 +2114,7 @@ static const struct nla_policy xfrma_policy[XFRMA_MAX+1] = {
 	[XFRMA_POLICY_TYPE]	= { .len = sizeof(struct xfrm_userpolicy_type)},
 	[XFRMA_MIGRATE]		= { .len = sizeof(struct xfrm_user_migrate) },
 	[XFRMA_KMADDRESS]	= { .len = sizeof(struct xfrm_user_kmaddress) },
+	[XFRMA_MARK]		= { .len = sizeof(struct xfrm_mark) },
 };
 
 static struct xfrm_link {
@@ -2163,7 +2194,8 @@ static void xfrm_netlink_rcv(struct sk_buff *skb)
 
 static inline size_t xfrm_expire_msgsize(void)
 {
-	return NLMSG_ALIGN(sizeof(struct xfrm_user_expire));
+	return NLMSG_ALIGN(sizeof(struct xfrm_user_expire))
+	       + nla_total_size(sizeof(struct xfrm_mark));
 }
 
 static int build_expire(struct sk_buff *skb, struct xfrm_state *x, struct km_event *c)
@@ -2179,7 +2211,13 @@ static int build_expire(struct sk_buff *skb, struct xfrm_state *x, struct km_eve
 	copy_to_user_state(x, &ue->state);
 	ue->hard = (c->data.hard != 0) ? 1 : 0;
 
+	if (xfrm_mark_put(skb, &x->mark))
+		goto nla_put_failure;
+
 	return nlmsg_end(skb, nlh);
+
+nla_put_failure:
+	return -EMSGSIZE;
 }
 
 static int xfrm_exp_state_notify(struct xfrm_state *x, struct km_event *c)
@@ -2191,8 +2229,10 @@ static int xfrm_exp_state_notify(struct xfrm_state *x, struct km_event *c)
 	if (skb == NULL)
 		return -ENOMEM;
 
-	if (build_expire(skb, x, c) < 0)
-		BUG();
+	if (build_expire(skb, x, c) < 0) {
+		kfree_skb(skb);
+		return -EMSGSIZE;
+	}
 
 	return nlmsg_multicast(net->xfrm.nlsk, skb, 0, XFRMNLGRP_EXPIRE, GFP_ATOMIC);
 }
@@ -2280,6 +2320,7 @@ static int xfrm_notify_sa(struct xfrm_state *x, struct km_event *c)
 	if (c->event == XFRM_MSG_DELSA) {
 		len += nla_total_size(headlen);
 		headlen = sizeof(*id);
+		len += nla_total_size(sizeof(struct xfrm_mark));
 	}
 	len += NLMSG_ALIGN(headlen);
 
@@ -2350,6 +2391,7 @@ static inline size_t xfrm_acquire_msgsize(struct xfrm_state *x,
 {
 	return NLMSG_ALIGN(sizeof(struct xfrm_user_acquire))
 	       + nla_total_size(sizeof(struct xfrm_user_tmpl) * xp->xfrm_nr)
+	       + nla_total_size(sizeof(struct xfrm_mark))
 	       + nla_total_size(xfrm_user_sec_ctx_size(x->security))
 	       + userpolicy_type_attrsize();
 }
-- 
cgit v1.2.2


From 295fae568885a93c39a0e29a9455054608b6cc0e Mon Sep 17 00:00:00 2001
From: Jamal Hadi Salim <hadi@cyberus.ca>
Date: Mon, 22 Feb 2010 11:33:00 +0000
Subject: xfrm: Allow user space manipulation of SPD mark

Add ability for netlink userspace to manipulate the SPD
and manipulate the mark, retrieve it and get events with a defined
mark, etc.

Signed-off-by: Jamal Hadi Salim <hadi@cyberus.ca>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/xfrm/xfrm_user.c | 31 +++++++++++++++++++++++++------
 1 file changed, 25 insertions(+), 6 deletions(-)

(limited to 'net')

diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c
index 303092f7088b..6106b72826d3 100644
--- a/net/xfrm/xfrm_user.c
+++ b/net/xfrm/xfrm_user.c
@@ -31,8 +31,6 @@
 #include <linux/in6.h>
 #endif
 
-#define DUMMY_MARK 0
-
 static inline int aead_len(struct xfrm_algo_aead *alg)
 {
 	return sizeof(*alg) + ((alg->alg_key_len + 7) / 8);
@@ -1234,6 +1232,8 @@ static struct xfrm_policy *xfrm_policy_construct(struct net *net, struct xfrm_us
 	if (err)
 		goto error;
 
+	xfrm_mark_get(attrs, &xp->mark);
+
 	return xp;
  error:
 	*errp = err;
@@ -1380,10 +1380,13 @@ static int dump_one_policy(struct xfrm_policy *xp, int dir, int count, void *ptr
 		goto nlmsg_failure;
 	if (copy_to_user_policy_type(xp->type, skb) < 0)
 		goto nlmsg_failure;
+	if (xfrm_mark_put(skb, &xp->mark))
+		goto nla_put_failure;
 
 	nlmsg_end(skb, nlh);
 	return 0;
 
+nla_put_failure:
 nlmsg_failure:
 	nlmsg_cancel(skb, nlh);
 	return -EMSGSIZE;
@@ -1455,6 +1458,8 @@ static int xfrm_get_policy(struct sk_buff *skb, struct nlmsghdr *nlh,
 	int err;
 	struct km_event c;
 	int delete;
+	struct xfrm_mark m;
+	u32 mark = xfrm_mark_get(attrs, &m);
 
 	p = nlmsg_data(nlh);
 	delete = nlh->nlmsg_type == XFRM_MSG_DELPOLICY;
@@ -1468,7 +1473,7 @@ static int xfrm_get_policy(struct sk_buff *skb, struct nlmsghdr *nlh,
 		return err;
 
 	if (p->index)
-		xp = xfrm_policy_byid(net, DUMMY_MARK, type, p->dir, p->index, delete, &err);
+		xp = xfrm_policy_byid(net, mark, type, p->dir, p->index, delete, &err);
 	else {
 		struct nlattr *rt = attrs[XFRMA_SEC_CTX];
 		struct xfrm_sec_ctx *ctx;
@@ -1485,7 +1490,7 @@ static int xfrm_get_policy(struct sk_buff *skb, struct nlmsghdr *nlh,
 			if (err)
 				return err;
 		}
-		xp = xfrm_policy_bysel_ctx(net, DUMMY_MARK, type, p->dir, &p->sel,
+		xp = xfrm_policy_bysel_ctx(net, mark, type, p->dir, &p->sel,
 					   ctx, delete, &err);
 		security_xfrm_policy_free(ctx);
 	}
@@ -1729,13 +1734,15 @@ static int xfrm_add_pol_expire(struct sk_buff *skb, struct nlmsghdr *nlh,
 	struct xfrm_userpolicy_info *p = &up->pol;
 	u8 type = XFRM_POLICY_TYPE_MAIN;
 	int err = -ENOENT;
+	struct xfrm_mark m;
+	u32 mark = xfrm_mark_get(attrs, &m);
 
 	err = copy_from_user_policy_type(&type, attrs);
 	if (err)
 		return err;
 
 	if (p->index)
-		xp = xfrm_policy_byid(net, DUMMY_MARK, type, p->dir, p->index, 0, &err);
+		xp = xfrm_policy_byid(net, mark, type, p->dir, p->index, 0, &err);
 	else {
 		struct nlattr *rt = attrs[XFRMA_SEC_CTX];
 		struct xfrm_sec_ctx *ctx;
@@ -1752,7 +1759,7 @@ static int xfrm_add_pol_expire(struct sk_buff *skb, struct nlmsghdr *nlh,
 			if (err)
 				return err;
 		}
-		xp = xfrm_policy_bysel_ctx(net, DUMMY_MARK, type, p->dir,
+		xp = xfrm_policy_bysel_ctx(net, mark, type, p->dir,
 					   &p->sel, ctx, 0, &err);
 		security_xfrm_policy_free(ctx);
 	}
@@ -2424,9 +2431,12 @@ static int build_acquire(struct sk_buff *skb, struct xfrm_state *x,
 		goto nlmsg_failure;
 	if (copy_to_user_policy_type(xp->type, skb) < 0)
 		goto nlmsg_failure;
+	if (xfrm_mark_put(skb, &xp->mark))
+		goto nla_put_failure;
 
 	return nlmsg_end(skb, nlh);
 
+nla_put_failure:
 nlmsg_failure:
 	nlmsg_cancel(skb, nlh);
 	return -EMSGSIZE;
@@ -2513,6 +2523,7 @@ static inline size_t xfrm_polexpire_msgsize(struct xfrm_policy *xp)
 	return NLMSG_ALIGN(sizeof(struct xfrm_user_polexpire))
 	       + nla_total_size(sizeof(struct xfrm_user_tmpl) * xp->xfrm_nr)
 	       + nla_total_size(xfrm_user_sec_ctx_size(xp->security))
+	       + nla_total_size(sizeof(struct xfrm_mark))
 	       + userpolicy_type_attrsize();
 }
 
@@ -2535,10 +2546,13 @@ static int build_polexpire(struct sk_buff *skb, struct xfrm_policy *xp,
 		goto nlmsg_failure;
 	if (copy_to_user_policy_type(xp->type, skb) < 0)
 		goto nlmsg_failure;
+	if (xfrm_mark_put(skb, &xp->mark))
+		goto nla_put_failure;
 	upe->hard = !!hard;
 
 	return nlmsg_end(skb, nlh);
 
+nla_put_failure:
 nlmsg_failure:
 	nlmsg_cancel(skb, nlh);
 	return -EMSGSIZE;
@@ -2575,6 +2589,7 @@ static int xfrm_notify_policy(struct xfrm_policy *xp, int dir, struct km_event *
 		headlen = sizeof(*id);
 	}
 	len += userpolicy_type_attrsize();
+	len += nla_total_size(sizeof(struct xfrm_mark));
 	len += NLMSG_ALIGN(headlen);
 
 	skb = nlmsg_new(len, GFP_ATOMIC);
@@ -2610,10 +2625,14 @@ static int xfrm_notify_policy(struct xfrm_policy *xp, int dir, struct km_event *
 	if (copy_to_user_policy_type(xp->type, skb) < 0)
 		goto nlmsg_failure;
 
+	if (xfrm_mark_put(skb, &xp->mark))
+		goto nla_put_failure;
+
 	nlmsg_end(skb, nlh);
 
 	return nlmsg_multicast(net->xfrm.nlsk, skb, 0, XFRMNLGRP_POLICY, GFP_ATOMIC);
 
+nla_put_failure:
 nlmsg_failure:
 	kfree_skb(skb);
 	return -1;
-- 
cgit v1.2.2


From 2c08522e5d2f0af2d6f05be558946dcbf8173683 Mon Sep 17 00:00:00 2001
From: Tim Gardner <tim.gardner@canonical.com>
Date: Tue, 23 Feb 2010 14:55:21 +0100
Subject: netfilter: xt_recent: fix buffer overflow

e->index overflows e->stamps[] every ip_pkt_list_tot packets.

Consider the case when ip_pkt_list_tot==1; the first packet received is stored
in e->stamps[0] and e->index is initialized to 1. The next received packet
timestamp is then stored at e->stamps[1] in recent_entry_update(),
a buffer overflow because the maximum e->stamps[] index is 0.

Signed-off-by: Tim Gardner <tim.gardner@canonical.com>
Cc: stable@kernel.org
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 net/netfilter/xt_recent.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/netfilter/xt_recent.c b/net/netfilter/xt_recent.c
index 132cfaa84cdc..1278f0aa7434 100644
--- a/net/netfilter/xt_recent.c
+++ b/net/netfilter/xt_recent.c
@@ -177,10 +177,10 @@ recent_entry_init(struct recent_table *t, const union nf_inet_addr *addr,
 
 static void recent_entry_update(struct recent_table *t, struct recent_entry *e)
 {
+	e->index %= ip_pkt_list_tot;
 	e->stamps[e->index++] = jiffies;
 	if (e->index > e->nstamps)
 		e->nstamps = e->index;
-	e->index %= ip_pkt_list_tot;
 	list_move_tail(&e->lru_list, &t->lru_list);
 }
 
-- 
cgit v1.2.2


From 8ccb92ad41cb311e52ad1b1fe77992c7f47a3b63 Mon Sep 17 00:00:00 2001
From: Tim Gardner <tim.gardner@canonical.com>
Date: Tue, 23 Feb 2010 14:59:12 +0100
Subject: netfilter: xt_recent: fix false match

A rule with a zero hit_count will always match.

Signed-off-by: Tim Gardner <tim.gardner@canonical.com>
Cc: stable@kernel.org
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 net/netfilter/xt_recent.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/netfilter/xt_recent.c b/net/netfilter/xt_recent.c
index 1278f0aa7434..7073dbb8100c 100644
--- a/net/netfilter/xt_recent.c
+++ b/net/netfilter/xt_recent.c
@@ -267,7 +267,7 @@ recent_mt(const struct sk_buff *skb, const struct xt_match_param *par)
 		for (i = 0; i < e->nstamps; i++) {
 			if (info->seconds && time_after(time, e->stamps[i]))
 				continue;
-			if (++hits >= info->hit_count) {
+			if (info->hit_count && ++hits >= info->hit_count) {
 				ret = !ret;
 				break;
 			}
-- 
cgit v1.2.2


From fb977e2ca607a7e74946a1de798f474d1b80b9d6 Mon Sep 17 00:00:00 2001
From: Jamal Hadi Salim <hadi@cyberus.ca>
Date: Tue, 23 Feb 2010 15:09:53 -0800
Subject: xfrm: clone mark when cloning policy

When we clone the SP, we should also clone the mark.
Useful for socket based SPs.

Signed-off-by: Jamal Hadi Salim <hadi@cyberus.ca>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/xfrm/xfrm_policy.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'net')

diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index 2a6e64652654..34a5ef8316e7 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -1154,6 +1154,7 @@ static struct xfrm_policy *clone_policy(struct xfrm_policy *old, int dir)
 		}
 		newp->lft = old->lft;
 		newp->curlft = old->curlft;
+		newp->mark = old->mark;
 		newp->action = old->action;
 		newp->flags = old->flags;
 		newp->xfrm_nr = old->xfrm_nr;
-- 
cgit v1.2.2


From c4d49794ff2838038fd9756eae39c39a5a685833 Mon Sep 17 00:00:00 2001
From: Ajit Khaparde <ajitkhaparde@gmail.com>
Date: Tue, 16 Feb 2010 20:25:43 +0000
Subject: net: bug fix for vlan + gro issue

Traffic (tcp) doesnot start on a vlan interface when gro is enabled.
Even the tcp handshake was not taking place.
This is because, the eth_type_trans call before the netif_receive_skb
in napi_gro_finish() resets the skb->dev to napi->dev from the previously
set vlan netdev interface. This causes the ip_route_input to drop the
incoming packet considering it as a packet coming from a martian source.

I could repro this on 2.6.32.7 (stable) and 2.6.33-rc7.
With this fix, the traffic starts and the test runs fine on both vlan
and non-vlan interfaces.

CC: Herbert Xu <herbert@gondor.apana.org.au>
CC: Patrick McHardy <kaber@trash.net>
Signed-off-by: Ajit Khaparde <ajitk@serverengines.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/dev.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/core/dev.c b/net/core/dev.c
index be9924f60ec3..ec874218b206 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -2761,7 +2761,7 @@ gro_result_t napi_frags_finish(struct napi_struct *napi, struct sk_buff *skb,
 	switch (ret) {
 	case GRO_NORMAL:
 	case GRO_HELD:
-		skb->protocol = eth_type_trans(skb, napi->dev);
+		skb->protocol = eth_type_trans(skb, skb->dev);
 
 		if (ret == GRO_HELD)
 			skb_gro_pull(skb, -ETH_HLEN);
-- 
cgit v1.2.2


From 72b2b1dd77e8feb0b7c0b26dee58f2a1e2c9828c Mon Sep 17 00:00:00 2001
From: Jan Engelhardt <jengelh@medozas.de>
Date: Wed, 24 Feb 2010 18:32:59 +0100
Subject: netfilter: xtables: replace XT_ENTRY_ITERATE macro

The macro is replaced by a list.h-like foreach loop. This makes
the code much more inspectable.

Signed-off-by: Jan Engelhardt <jengelh@medozas.de>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 net/ipv4/netfilter/arp_tables.c | 151 ++++++++++++++++++++++++-------------
 net/ipv4/netfilter/ip_tables.c  | 160 ++++++++++++++++++++++++++--------------
 net/ipv6/netfilter/ip6_tables.c | 160 ++++++++++++++++++++++++++--------------
 3 files changed, 306 insertions(+), 165 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c
index 4db5c1ece0f9..f7338869fc4c 100644
--- a/net/ipv4/netfilter/arp_tables.c
+++ b/net/ipv4/netfilter/arp_tables.c
@@ -641,8 +641,9 @@ static int translate_table(const char *name,
 			   const unsigned int *hook_entries,
 			   const unsigned int *underflows)
 {
+	struct arpt_entry *iter;
 	unsigned int i;
-	int ret;
+	int ret = 0;
 
 	newinfo->size = size;
 	newinfo->number = number;
@@ -657,12 +658,13 @@ static int translate_table(const char *name,
 	i = 0;
 
 	/* Walk through entries, checking offsets. */
-	ret = ARPT_ENTRY_ITERATE(entry0, newinfo->size,
-				 check_entry_size_and_hooks,
-				 newinfo,
-				 entry0,
-				 entry0 + size,
-				 hook_entries, underflows, valid_hooks, &i);
+	xt_entry_foreach(iter, entry0, newinfo->size) {
+		ret = check_entry_size_and_hooks(iter, newinfo, entry0,
+		      entry0 + size, hook_entries, underflows,
+		      valid_hooks, &i);
+		if (ret != 0)
+			break;
+	}
 	duprintf("translate_table: ARPT_ENTRY_ITERATE gives %d\n", ret);
 	if (ret != 0)
 		return ret;
@@ -697,12 +699,16 @@ static int translate_table(const char *name,
 
 	/* Finally, each sanity check must pass */
 	i = 0;
-	ret = ARPT_ENTRY_ITERATE(entry0, newinfo->size,
-				 find_check_entry, name, size, &i);
+	xt_entry_foreach(iter, entry0, newinfo->size) {
+		ret = find_check_entry(iter, name, size, &i);
+		if (ret != 0)
+			break;
+	}
 
 	if (ret != 0) {
-		ARPT_ENTRY_ITERATE(entry0, newinfo->size,
-				cleanup_entry, &i);
+		xt_entry_foreach(iter, entry0, newinfo->size)
+			if (cleanup_entry(iter, &i) != 0)
+				break;
 		return ret;
 	}
 
@@ -739,6 +745,7 @@ static inline int set_entry_to_counter(const struct arpt_entry *e,
 static void get_counters(const struct xt_table_info *t,
 			 struct xt_counters counters[])
 {
+	struct arpt_entry *iter;
 	unsigned int cpu;
 	unsigned int i;
 	unsigned int curcpu;
@@ -754,22 +761,18 @@ static void get_counters(const struct xt_table_info *t,
 	curcpu = smp_processor_id();
 
 	i = 0;
-	ARPT_ENTRY_ITERATE(t->entries[curcpu],
-			   t->size,
-			   set_entry_to_counter,
-			   counters,
-			   &i);
+	xt_entry_foreach(iter, t->entries[curcpu], t->size)
+		if (set_entry_to_counter(iter, counters, &i) != 0)
+			break;
 
 	for_each_possible_cpu(cpu) {
 		if (cpu == curcpu)
 			continue;
 		i = 0;
 		xt_info_wrlock(cpu);
-		ARPT_ENTRY_ITERATE(t->entries[cpu],
-				   t->size,
-				   add_entry_to_counter,
-				   counters,
-				   &i);
+		xt_entry_foreach(iter, t->entries[cpu], t->size)
+			if (add_entry_to_counter(iter, counters, &i) != 0)
+				break;
 		xt_info_wrunlock(cpu);
 	}
 	local_bh_enable();
@@ -899,7 +902,9 @@ static int compat_calc_entry(const struct arpt_entry *e,
 static int compat_table_info(const struct xt_table_info *info,
 			     struct xt_table_info *newinfo)
 {
+	struct arpt_entry *iter;
 	void *loc_cpu_entry;
+	int ret = 0;
 
 	if (!newinfo || !info)
 		return -EINVAL;
@@ -908,9 +913,12 @@ static int compat_table_info(const struct xt_table_info *info,
 	memcpy(newinfo, info, offsetof(struct xt_table_info, entries));
 	newinfo->initial_entries = 0;
 	loc_cpu_entry = info->entries[raw_smp_processor_id()];
-	return ARPT_ENTRY_ITERATE(loc_cpu_entry, info->size,
-				  compat_calc_entry, info, loc_cpu_entry,
-				  newinfo);
+	xt_entry_foreach(iter, loc_cpu_entry, info->size) {
+		ret = compat_calc_entry(iter, info, loc_cpu_entry, newinfo);
+		if (ret != 0)
+			break;
+	}
+	return ret;
 }
 #endif
 
@@ -1025,6 +1033,7 @@ static int __do_replace(struct net *net, const char *name,
 	struct xt_table_info *oldinfo;
 	struct xt_counters *counters;
 	void *loc_cpu_old_entry;
+	struct arpt_entry *iter;
 
 	ret = 0;
 	counters = vmalloc_node(num_counters * sizeof(struct xt_counters),
@@ -1068,8 +1077,9 @@ static int __do_replace(struct net *net, const char *name,
 
 	/* Decrease module usage counts and free resource */
 	loc_cpu_old_entry = oldinfo->entries[raw_smp_processor_id()];
-	ARPT_ENTRY_ITERATE(loc_cpu_old_entry, oldinfo->size, cleanup_entry,
-			   NULL);
+	xt_entry_foreach(iter, loc_cpu_old_entry, oldinfo->size)
+		if (cleanup_entry(iter, NULL) != 0)
+			break;
 
 	xt_free_table_info(oldinfo);
 	if (copy_to_user(counters_ptr, counters,
@@ -1095,6 +1105,7 @@ static int do_replace(struct net *net, const void __user *user,
 	struct arpt_replace tmp;
 	struct xt_table_info *newinfo;
 	void *loc_cpu_entry;
+	struct arpt_entry *iter;
 
 	if (copy_from_user(&tmp, user, sizeof(tmp)) != 0)
 		return -EFAULT;
@@ -1130,7 +1141,9 @@ static int do_replace(struct net *net, const void __user *user,
 	return 0;
 
  free_newinfo_untrans:
-	ARPT_ENTRY_ITERATE(loc_cpu_entry, newinfo->size, cleanup_entry, NULL);
+	xt_entry_foreach(iter, loc_cpu_entry, newinfo->size)
+		if (cleanup_entry(iter, NULL) != 0)
+			break;
  free_newinfo:
 	xt_free_table_info(newinfo);
 	return ret;
@@ -1163,6 +1176,7 @@ static int do_add_counters(struct net *net, const void __user *user,
 	const struct xt_table_info *private;
 	int ret = 0;
 	void *loc_cpu_entry;
+	struct arpt_entry *iter;
 #ifdef CONFIG_COMPAT
 	struct compat_xt_counters_info compat_tmp;
 
@@ -1220,11 +1234,9 @@ static int do_add_counters(struct net *net, const void __user *user,
 	curcpu = smp_processor_id();
 	loc_cpu_entry = private->entries[curcpu];
 	xt_info_wrlock(curcpu);
-	ARPT_ENTRY_ITERATE(loc_cpu_entry,
-			   private->size,
-			   add_counter_to_entry,
-			   paddc,
-			   &i);
+	xt_entry_foreach(iter, loc_cpu_entry, private->size)
+		if (add_counter_to_entry(iter, paddc, &i) != 0)
+			break;
 	xt_info_wrunlock(curcpu);
  unlock_up_free:
 	local_bh_enable();
@@ -1388,8 +1400,10 @@ static int translate_compat_table(const char *name,
 	unsigned int i, j;
 	struct xt_table_info *newinfo, *info;
 	void *pos, *entry0, *entry1;
+	struct compat_arpt_entry *iter0;
+	struct arpt_entry *iter1;
 	unsigned int size;
-	int ret;
+	int ret = 0;
 
 	info = *pinfo;
 	entry0 = *pentry0;
@@ -1406,11 +1420,13 @@ static int translate_compat_table(const char *name,
 	j = 0;
 	xt_compat_lock(NFPROTO_ARP);
 	/* Walk through entries, checking offsets. */
-	ret = COMPAT_ARPT_ENTRY_ITERATE(entry0, total_size,
-					check_compat_entry_size_and_hooks,
-					info, &size, entry0,
-					entry0 + total_size,
-					hook_entries, underflows, &j, name);
+	xt_entry_foreach(iter0, entry0, total_size) {
+		ret = check_compat_entry_size_and_hooks(iter0, info, &size,
+		      entry0, entry0 + total_size, hook_entries, underflows,
+		      &j, name);
+		if (ret != 0)
+			break;
+	}
 	if (ret != 0)
 		goto out_unlock;
 
@@ -1451,9 +1467,12 @@ static int translate_compat_table(const char *name,
 	entry1 = newinfo->entries[raw_smp_processor_id()];
 	pos = entry1;
 	size = total_size;
-	ret = COMPAT_ARPT_ENTRY_ITERATE(entry0, total_size,
-					compat_copy_entry_from_user,
-					&pos, &size, name, newinfo, entry1);
+	xt_entry_foreach(iter0, entry0, total_size) {
+		ret = compat_copy_entry_from_user(iter0, &pos,
+		      &size, name, newinfo, entry1);
+		if (ret != 0)
+			break;
+	}
 	xt_compat_flush_offsets(NFPROTO_ARP);
 	xt_compat_unlock(NFPROTO_ARP);
 	if (ret)
@@ -1464,13 +1483,28 @@ static int translate_compat_table(const char *name,
 		goto free_newinfo;
 
 	i = 0;
-	ret = ARPT_ENTRY_ITERATE(entry1, newinfo->size, compat_check_entry,
-				 name, &i);
+	xt_entry_foreach(iter1, entry1, newinfo->size) {
+		ret = compat_check_entry(iter1, name, &i);
+		if (ret != 0)
+			break;
+	}
 	if (ret) {
+		/*
+		 * The first i matches need cleanup_entry (calls ->destroy)
+		 * because they had called ->check already. The other j-i
+		 * entries need only release.
+		 */
+		int skip = i;
 		j -= i;
-		COMPAT_ARPT_ENTRY_ITERATE_CONTINUE(entry0, newinfo->size, i,
-						   compat_release_entry, &j);
-		ARPT_ENTRY_ITERATE(entry1, newinfo->size, cleanup_entry, &i);
+		xt_entry_foreach(iter0, entry0, newinfo->size) {
+			if (skip-- > 0)
+				continue;
+			if (compat_release_entry(iter0, &j) != 0)
+				break;
+		}
+		xt_entry_foreach(iter1, entry1, newinfo->size)
+			if (cleanup_entry(iter1, &i) != 0)
+				break;
 		xt_free_table_info(newinfo);
 		return ret;
 	}
@@ -1488,7 +1522,9 @@ static int translate_compat_table(const char *name,
 free_newinfo:
 	xt_free_table_info(newinfo);
 out:
-	COMPAT_ARPT_ENTRY_ITERATE(entry0, total_size, compat_release_entry, &j);
+	xt_entry_foreach(iter0, entry0, total_size)
+		if (compat_release_entry(iter0, &j) != 0)
+			break;
 	return ret;
 out_unlock:
 	xt_compat_flush_offsets(NFPROTO_ARP);
@@ -1515,6 +1551,7 @@ static int compat_do_replace(struct net *net, void __user *user,
 	struct compat_arpt_replace tmp;
 	struct xt_table_info *newinfo;
 	void *loc_cpu_entry;
+	struct arpt_entry *iter;
 
 	if (copy_from_user(&tmp, user, sizeof(tmp)) != 0)
 		return -EFAULT;
@@ -1552,7 +1589,9 @@ static int compat_do_replace(struct net *net, void __user *user,
 	return 0;
 
  free_newinfo_untrans:
-	ARPT_ENTRY_ITERATE(loc_cpu_entry, newinfo->size, cleanup_entry, NULL);
+	xt_entry_foreach(iter, loc_cpu_entry, newinfo->size)
+		if (cleanup_entry(iter, NULL) != 0)
+			break;
  free_newinfo:
 	xt_free_table_info(newinfo);
 	return ret;
@@ -1636,6 +1675,7 @@ static int compat_copy_entries_to_user(unsigned int total_size,
 	int ret = 0;
 	void *loc_cpu_entry;
 	unsigned int i = 0;
+	struct arpt_entry *iter;
 
 	counters = alloc_counters(table);
 	if (IS_ERR(counters))
@@ -1645,9 +1685,12 @@ static int compat_copy_entries_to_user(unsigned int total_size,
 	loc_cpu_entry = private->entries[raw_smp_processor_id()];
 	pos = userptr;
 	size = total_size;
-	ret = ARPT_ENTRY_ITERATE(loc_cpu_entry, total_size,
-				 compat_copy_entry_to_user,
-				 &pos, &size, counters, &i);
+	xt_entry_foreach(iter, loc_cpu_entry, total_size) {
+		ret = compat_copy_entry_to_user(iter, &pos,
+		      &size, counters, &i);
+		if (ret != 0)
+			break;
+	}
 	vfree(counters);
 	return ret;
 }
@@ -1843,13 +1886,15 @@ void arpt_unregister_table(struct xt_table *table)
 	struct xt_table_info *private;
 	void *loc_cpu_entry;
 	struct module *table_owner = table->me;
+	struct arpt_entry *iter;
 
 	private = xt_unregister_table(table);
 
 	/* Decrease module usage counts and free resources */
 	loc_cpu_entry = private->entries[raw_smp_processor_id()];
-	ARPT_ENTRY_ITERATE(loc_cpu_entry, private->size,
-			   cleanup_entry, NULL);
+	xt_entry_foreach(iter, loc_cpu_entry, private->size)
+		if (cleanup_entry(iter, NULL) != 0)
+			break;
 	if (private->number > private->initial_entries)
 		module_put(table_owner);
 	xt_free_table_info(private);
diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c
index e94c18bdfc68..b43280aad8a2 100644
--- a/net/ipv4/netfilter/ip_tables.c
+++ b/net/ipv4/netfilter/ip_tables.c
@@ -288,6 +288,7 @@ static void trace_packet(const struct sk_buff *skb,
 	const void *table_base;
 	const struct ipt_entry *root;
 	const char *hookname, *chainname, *comment;
+	const struct ipt_entry *iter;
 	unsigned int rulenum = 0;
 
 	table_base = private->entries[smp_processor_id()];
@@ -296,10 +297,10 @@ static void trace_packet(const struct sk_buff *skb,
 	hookname = chainname = hooknames[hook];
 	comment = comments[NF_IP_TRACE_COMMENT_RULE];
 
-	IPT_ENTRY_ITERATE(root,
-			  private->size - private->hook_entry[hook],
-			  get_chainname_rulenum,
-			  e, hookname, &chainname, &comment, &rulenum);
+	xt_entry_foreach(iter, root, private->size - private->hook_entry[hook])
+		if (get_chainname_rulenum(iter, e, hookname,
+		    &chainname, &comment, &rulenum) != 0)
+			break;
 
 	nf_log_packet(AF_INET, hook, skb, in, out, &trace_loginfo,
 		      "TRACE: %s:%s:%s:%u ",
@@ -826,8 +827,9 @@ translate_table(struct net *net,
 		const unsigned int *hook_entries,
 		const unsigned int *underflows)
 {
+	struct ipt_entry *iter;
 	unsigned int i;
-	int ret;
+	int ret = 0;
 
 	newinfo->size = size;
 	newinfo->number = number;
@@ -841,12 +843,13 @@ translate_table(struct net *net,
 	duprintf("translate_table: size %u\n", newinfo->size);
 	i = 0;
 	/* Walk through entries, checking offsets. */
-	ret = IPT_ENTRY_ITERATE(entry0, newinfo->size,
-				check_entry_size_and_hooks,
-				newinfo,
-				entry0,
-				entry0 + size,
-				hook_entries, underflows, valid_hooks, &i);
+	xt_entry_foreach(iter, entry0, newinfo->size) {
+		ret = check_entry_size_and_hooks(iter, newinfo, entry0,
+		      entry0 + size, hook_entries, underflows,
+		      valid_hooks, &i);
+		if (ret != 0)
+			break;
+	}
 	if (ret != 0)
 		return ret;
 
@@ -878,12 +881,16 @@ translate_table(struct net *net,
 
 	/* Finally, each sanity check must pass */
 	i = 0;
-	ret = IPT_ENTRY_ITERATE(entry0, newinfo->size,
-				find_check_entry, net, name, size, &i);
+	xt_entry_foreach(iter, entry0, newinfo->size) {
+		ret = find_check_entry(iter, net, name, size, &i);
+		if (ret != 0)
+			break;
+	}
 
 	if (ret != 0) {
-		IPT_ENTRY_ITERATE(entry0, newinfo->size,
-				cleanup_entry, net, &i);
+		xt_entry_foreach(iter, entry0, newinfo->size)
+			if (cleanup_entry(iter, net, &i) != 0)
+				break;
 		return ret;
 	}
 
@@ -923,6 +930,7 @@ static void
 get_counters(const struct xt_table_info *t,
 	     struct xt_counters counters[])
 {
+	struct ipt_entry *iter;
 	unsigned int cpu;
 	unsigned int i;
 	unsigned int curcpu;
@@ -938,22 +946,18 @@ get_counters(const struct xt_table_info *t,
 	curcpu = smp_processor_id();
 
 	i = 0;
-	IPT_ENTRY_ITERATE(t->entries[curcpu],
-			  t->size,
-			  set_entry_to_counter,
-			  counters,
-			  &i);
+	xt_entry_foreach(iter, t->entries[curcpu], t->size)
+		if (set_entry_to_counter(iter, counters, &i) != 0)
+			break;
 
 	for_each_possible_cpu(cpu) {
 		if (cpu == curcpu)
 			continue;
 		i = 0;
 		xt_info_wrlock(cpu);
-		IPT_ENTRY_ITERATE(t->entries[cpu],
-				  t->size,
-				  add_entry_to_counter,
-				  counters,
-				  &i);
+		xt_entry_foreach(iter, t->entries[cpu], t->size)
+			if (add_entry_to_counter(iter, counters, &i) != 0)
+				break;
 		xt_info_wrunlock(cpu);
 	}
 	local_bh_enable();
@@ -1111,7 +1115,9 @@ static int compat_calc_entry(const struct ipt_entry *e,
 static int compat_table_info(const struct xt_table_info *info,
 			     struct xt_table_info *newinfo)
 {
+	struct ipt_entry *iter;
 	void *loc_cpu_entry;
+	int ret = 0;
 
 	if (!newinfo || !info)
 		return -EINVAL;
@@ -1120,9 +1126,12 @@ static int compat_table_info(const struct xt_table_info *info,
 	memcpy(newinfo, info, offsetof(struct xt_table_info, entries));
 	newinfo->initial_entries = 0;
 	loc_cpu_entry = info->entries[raw_smp_processor_id()];
-	return IPT_ENTRY_ITERATE(loc_cpu_entry, info->size,
-				 compat_calc_entry, info, loc_cpu_entry,
-				 newinfo);
+	xt_entry_foreach(iter, loc_cpu_entry, info->size) {
+		ret = compat_calc_entry(iter, info, loc_cpu_entry, newinfo);
+		if (ret != 0)
+			break;
+	}
+	return ret;
 }
 #endif
 
@@ -1236,6 +1245,7 @@ __do_replace(struct net *net, const char *name, unsigned int valid_hooks,
 	struct xt_table_info *oldinfo;
 	struct xt_counters *counters;
 	void *loc_cpu_old_entry;
+	struct ipt_entry *iter;
 
 	ret = 0;
 	counters = vmalloc(num_counters * sizeof(struct xt_counters));
@@ -1278,8 +1288,10 @@ __do_replace(struct net *net, const char *name, unsigned int valid_hooks,
 
 	/* Decrease module usage counts and free resource */
 	loc_cpu_old_entry = oldinfo->entries[raw_smp_processor_id()];
-	IPT_ENTRY_ITERATE(loc_cpu_old_entry, oldinfo->size, cleanup_entry,
-			  net, NULL);
+	xt_entry_foreach(iter, loc_cpu_old_entry, oldinfo->size)
+		if (cleanup_entry(iter, net, NULL) != 0)
+			break;
+
 	xt_free_table_info(oldinfo);
 	if (copy_to_user(counters_ptr, counters,
 			 sizeof(struct xt_counters) * num_counters) != 0)
@@ -1304,6 +1316,7 @@ do_replace(struct net *net, const void __user *user, unsigned int len)
 	struct ipt_replace tmp;
 	struct xt_table_info *newinfo;
 	void *loc_cpu_entry;
+	struct ipt_entry *iter;
 
 	if (copy_from_user(&tmp, user, sizeof(tmp)) != 0)
 		return -EFAULT;
@@ -1339,7 +1352,9 @@ do_replace(struct net *net, const void __user *user, unsigned int len)
 	return 0;
 
  free_newinfo_untrans:
-	IPT_ENTRY_ITERATE(loc_cpu_entry, newinfo->size, cleanup_entry, net, NULL);
+	xt_entry_foreach(iter, loc_cpu_entry, newinfo->size)
+		if (cleanup_entry(iter, net, NULL) != 0)
+			break;
  free_newinfo:
 	xt_free_table_info(newinfo);
 	return ret;
@@ -1373,6 +1388,7 @@ do_add_counters(struct net *net, const void __user *user,
 	const struct xt_table_info *private;
 	int ret = 0;
 	void *loc_cpu_entry;
+	struct ipt_entry *iter;
 #ifdef CONFIG_COMPAT
 	struct compat_xt_counters_info compat_tmp;
 
@@ -1430,11 +1446,9 @@ do_add_counters(struct net *net, const void __user *user,
 	curcpu = smp_processor_id();
 	loc_cpu_entry = private->entries[curcpu];
 	xt_info_wrlock(curcpu);
-	IPT_ENTRY_ITERATE(loc_cpu_entry,
-			  private->size,
-			  add_counter_to_entry,
-			  paddc,
-			  &i);
+	xt_entry_foreach(iter, loc_cpu_entry, private->size)
+		if (add_counter_to_entry(iter, paddc, &i) != 0)
+			break;
 	xt_info_wrunlock(curcpu);
  unlock_up_free:
 	local_bh_enable();
@@ -1720,8 +1734,10 @@ translate_compat_table(struct net *net,
 	unsigned int i, j;
 	struct xt_table_info *newinfo, *info;
 	void *pos, *entry0, *entry1;
+	struct compat_ipt_entry *iter0;
+	struct ipt_entry *iter1;
 	unsigned int size;
-	int ret;
+	int ret = 0;
 
 	info = *pinfo;
 	entry0 = *pentry0;
@@ -1738,11 +1754,13 @@ translate_compat_table(struct net *net,
 	j = 0;
 	xt_compat_lock(AF_INET);
 	/* Walk through entries, checking offsets. */
-	ret = COMPAT_IPT_ENTRY_ITERATE(entry0, total_size,
-				       check_compat_entry_size_and_hooks,
-				       info, &size, entry0,
-				       entry0 + total_size,
-				       hook_entries, underflows, &j, name);
+	xt_entry_foreach(iter0, entry0, total_size) {
+		ret = check_compat_entry_size_and_hooks(iter0, info, &size,
+		      entry0, entry0 + total_size, hook_entries, underflows,
+		      &j, name);
+		if (ret != 0)
+			break;
+	}
 	if (ret != 0)
 		goto out_unlock;
 
@@ -1783,9 +1801,12 @@ translate_compat_table(struct net *net,
 	entry1 = newinfo->entries[raw_smp_processor_id()];
 	pos = entry1;
 	size = total_size;
-	ret = COMPAT_IPT_ENTRY_ITERATE(entry0, total_size,
-				       compat_copy_entry_from_user,
-				       &pos, &size, name, newinfo, entry1);
+	xt_entry_foreach(iter0, entry0, total_size) {
+		ret = compat_copy_entry_from_user(iter0, &pos,
+		      &size, name, newinfo, entry1);
+		if (ret != 0)
+			break;
+	}
 	xt_compat_flush_offsets(AF_INET);
 	xt_compat_unlock(AF_INET);
 	if (ret)
@@ -1796,13 +1817,28 @@ translate_compat_table(struct net *net,
 		goto free_newinfo;
 
 	i = 0;
-	ret = IPT_ENTRY_ITERATE(entry1, newinfo->size, compat_check_entry,
-				net, name, &i);
+	xt_entry_foreach(iter1, entry1, newinfo->size) {
+		ret = compat_check_entry(iter1, net, name, &i);
+		if (ret != 0)
+			break;
+	}
 	if (ret) {
+		/*
+		 * The first i matches need cleanup_entry (calls ->destroy)
+		 * because they had called ->check already. The other j-i
+		 * entries need only release.
+		 */
+		int skip = i;
 		j -= i;
-		COMPAT_IPT_ENTRY_ITERATE_CONTINUE(entry0, newinfo->size, i,
-						  compat_release_entry, &j);
-		IPT_ENTRY_ITERATE(entry1, newinfo->size, cleanup_entry, net, &i);
+		xt_entry_foreach(iter0, entry0, newinfo->size) {
+			if (skip-- > 0)
+				continue;
+			if (compat_release_entry(iter0, &i) != 0)
+				break;
+		}
+		xt_entry_foreach(iter1, entry1, newinfo->size)
+			if (cleanup_entry(iter1, net, &i) != 0)
+				break;
 		xt_free_table_info(newinfo);
 		return ret;
 	}
@@ -1820,7 +1856,9 @@ translate_compat_table(struct net *net,
 free_newinfo:
 	xt_free_table_info(newinfo);
 out:
-	COMPAT_IPT_ENTRY_ITERATE(entry0, total_size, compat_release_entry, &j);
+	xt_entry_foreach(iter0, entry0, total_size)
+		if (compat_release_entry(iter0, &j) != 0)
+			break;
 	return ret;
 out_unlock:
 	xt_compat_flush_offsets(AF_INET);
@@ -1835,6 +1873,7 @@ compat_do_replace(struct net *net, void __user *user, unsigned int len)
 	struct compat_ipt_replace tmp;
 	struct xt_table_info *newinfo;
 	void *loc_cpu_entry;
+	struct ipt_entry *iter;
 
 	if (copy_from_user(&tmp, user, sizeof(tmp)) != 0)
 		return -EFAULT;
@@ -1873,7 +1912,9 @@ compat_do_replace(struct net *net, void __user *user, unsigned int len)
 	return 0;
 
  free_newinfo_untrans:
-	IPT_ENTRY_ITERATE(loc_cpu_entry, newinfo->size, cleanup_entry, net, NULL);
+	xt_entry_foreach(iter, loc_cpu_entry, newinfo->size)
+		if (cleanup_entry(iter, net, NULL) != 0)
+			break;
  free_newinfo:
 	xt_free_table_info(newinfo);
 	return ret;
@@ -1922,6 +1963,7 @@ compat_copy_entries_to_user(unsigned int total_size, struct xt_table *table,
 	int ret = 0;
 	const void *loc_cpu_entry;
 	unsigned int i = 0;
+	struct ipt_entry *iter;
 
 	counters = alloc_counters(table);
 	if (IS_ERR(counters))
@@ -1934,9 +1976,12 @@ compat_copy_entries_to_user(unsigned int total_size, struct xt_table *table,
 	loc_cpu_entry = private->entries[raw_smp_processor_id()];
 	pos = userptr;
 	size = total_size;
-	ret = IPT_ENTRY_ITERATE(loc_cpu_entry, total_size,
-				compat_copy_entry_to_user,
-				&pos, &size, counters, &i);
+	xt_entry_foreach(iter, loc_cpu_entry, total_size) {
+		ret = compat_copy_entry_to_user(iter, &pos,
+		      &size, counters, &i);
+		if (ret != 0)
+			break;
+	}
 
 	vfree(counters);
 	return ret;
@@ -2137,12 +2182,15 @@ void ipt_unregister_table(struct net *net, struct xt_table *table)
 	struct xt_table_info *private;
 	void *loc_cpu_entry;
 	struct module *table_owner = table->me;
+	struct ipt_entry *iter;
 
 	private = xt_unregister_table(table);
 
 	/* Decrease module usage counts and free resources */
 	loc_cpu_entry = private->entries[raw_smp_processor_id()];
-	IPT_ENTRY_ITERATE(loc_cpu_entry, private->size, cleanup_entry, net, NULL);
+	xt_entry_foreach(iter, loc_cpu_entry, private->size)
+		if (cleanup_entry(iter, net, NULL) != 0)
+			break;
 	if (private->number > private->initial_entries)
 		module_put(table_owner);
 	xt_free_table_info(private);
diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c
index 4185099c2943..23926e38d36b 100644
--- a/net/ipv6/netfilter/ip6_tables.c
+++ b/net/ipv6/netfilter/ip6_tables.c
@@ -318,6 +318,7 @@ static void trace_packet(const struct sk_buff *skb,
 	const void *table_base;
 	const struct ip6t_entry *root;
 	const char *hookname, *chainname, *comment;
+	const struct ip6t_entry *iter;
 	unsigned int rulenum = 0;
 
 	table_base = private->entries[smp_processor_id()];
@@ -326,10 +327,10 @@ static void trace_packet(const struct sk_buff *skb,
 	hookname = chainname = hooknames[hook];
 	comment = comments[NF_IP6_TRACE_COMMENT_RULE];
 
-	IP6T_ENTRY_ITERATE(root,
-			   private->size - private->hook_entry[hook],
-			   get_chainname_rulenum,
-			   e, hookname, &chainname, &comment, &rulenum);
+	xt_entry_foreach(iter, root, private->size - private->hook_entry[hook])
+		if (get_chainname_rulenum(iter, e, hookname,
+		    &chainname, &comment, &rulenum) != 0)
+			break;
 
 	nf_log_packet(AF_INET6, hook, skb, in, out, &trace_loginfo,
 		      "TRACE: %s:%s:%s:%u ",
@@ -857,8 +858,9 @@ translate_table(struct net *net,
 		const unsigned int *hook_entries,
 		const unsigned int *underflows)
 {
+	struct ip6t_entry *iter;
 	unsigned int i;
-	int ret;
+	int ret = 0;
 
 	newinfo->size = size;
 	newinfo->number = number;
@@ -872,12 +874,13 @@ translate_table(struct net *net,
 	duprintf("translate_table: size %u\n", newinfo->size);
 	i = 0;
 	/* Walk through entries, checking offsets. */
-	ret = IP6T_ENTRY_ITERATE(entry0, newinfo->size,
-				check_entry_size_and_hooks,
-				newinfo,
-				entry0,
-				entry0 + size,
-				hook_entries, underflows, valid_hooks, &i);
+	xt_entry_foreach(iter, entry0, newinfo->size) {
+		ret = check_entry_size_and_hooks(iter, newinfo, entry0,
+		      entry0 + size, hook_entries, underflows,
+		      valid_hooks, &i);
+		if (ret != 0)
+			break;
+	}
 	if (ret != 0)
 		return ret;
 
@@ -909,12 +912,16 @@ translate_table(struct net *net,
 
 	/* Finally, each sanity check must pass */
 	i = 0;
-	ret = IP6T_ENTRY_ITERATE(entry0, newinfo->size,
-				find_check_entry, net, name, size, &i);
+	xt_entry_foreach(iter, entry0, newinfo->size) {
+		ret = find_check_entry(iter, net, name, size, &i);
+		if (ret != 0)
+			break;
+	}
 
 	if (ret != 0) {
-		IP6T_ENTRY_ITERATE(entry0, newinfo->size,
-				   cleanup_entry, net, &i);
+		xt_entry_foreach(iter, entry0, newinfo->size)
+			if (cleanup_entry(iter, net, &i) != 0)
+				break;
 		return ret;
 	}
 
@@ -954,6 +961,7 @@ static void
 get_counters(const struct xt_table_info *t,
 	     struct xt_counters counters[])
 {
+	struct ip6t_entry *iter;
 	unsigned int cpu;
 	unsigned int i;
 	unsigned int curcpu;
@@ -969,22 +977,18 @@ get_counters(const struct xt_table_info *t,
 	curcpu = smp_processor_id();
 
 	i = 0;
-	IP6T_ENTRY_ITERATE(t->entries[curcpu],
-			   t->size,
-			   set_entry_to_counter,
-			   counters,
-			   &i);
+	xt_entry_foreach(iter, t->entries[curcpu], t->size)
+		if (set_entry_to_counter(iter, counters, &i) != 0)
+			break;
 
 	for_each_possible_cpu(cpu) {
 		if (cpu == curcpu)
 			continue;
 		i = 0;
 		xt_info_wrlock(cpu);
-		IP6T_ENTRY_ITERATE(t->entries[cpu],
-				  t->size,
-				  add_entry_to_counter,
-				  counters,
-				  &i);
+		xt_entry_foreach(iter, t->entries[cpu], t->size)
+			if (add_entry_to_counter(iter, counters, &i) != 0)
+				break;
 		xt_info_wrunlock(cpu);
 	}
 	local_bh_enable();
@@ -1142,7 +1146,9 @@ static int compat_calc_entry(const struct ip6t_entry *e,
 static int compat_table_info(const struct xt_table_info *info,
 			     struct xt_table_info *newinfo)
 {
+	struct ip6t_entry *iter;
 	void *loc_cpu_entry;
+	int ret = 0;
 
 	if (!newinfo || !info)
 		return -EINVAL;
@@ -1151,9 +1157,12 @@ static int compat_table_info(const struct xt_table_info *info,
 	memcpy(newinfo, info, offsetof(struct xt_table_info, entries));
 	newinfo->initial_entries = 0;
 	loc_cpu_entry = info->entries[raw_smp_processor_id()];
-	return IP6T_ENTRY_ITERATE(loc_cpu_entry, info->size,
-				  compat_calc_entry, info, loc_cpu_entry,
-				  newinfo);
+	xt_entry_foreach(iter, loc_cpu_entry, info->size) {
+		ret = compat_calc_entry(iter, info, loc_cpu_entry, newinfo);
+		if (ret != 0)
+			break;
+	}
+	return ret;
 }
 #endif
 
@@ -1267,6 +1276,7 @@ __do_replace(struct net *net, const char *name, unsigned int valid_hooks,
 	struct xt_table_info *oldinfo;
 	struct xt_counters *counters;
 	const void *loc_cpu_old_entry;
+	struct ip6t_entry *iter;
 
 	ret = 0;
 	counters = vmalloc_node(num_counters * sizeof(struct xt_counters),
@@ -1310,8 +1320,10 @@ __do_replace(struct net *net, const char *name, unsigned int valid_hooks,
 
 	/* Decrease module usage counts and free resource */
 	loc_cpu_old_entry = oldinfo->entries[raw_smp_processor_id()];
-	IP6T_ENTRY_ITERATE(loc_cpu_old_entry, oldinfo->size, cleanup_entry,
-			   net, NULL);
+	xt_entry_foreach(iter, loc_cpu_old_entry, oldinfo->size)
+		if (cleanup_entry(iter, net, NULL) != 0)
+			break;
+
 	xt_free_table_info(oldinfo);
 	if (copy_to_user(counters_ptr, counters,
 			 sizeof(struct xt_counters) * num_counters) != 0)
@@ -1336,6 +1348,7 @@ do_replace(struct net *net, const void __user *user, unsigned int len)
 	struct ip6t_replace tmp;
 	struct xt_table_info *newinfo;
 	void *loc_cpu_entry;
+	struct ip6t_entry *iter;
 
 	if (copy_from_user(&tmp, user, sizeof(tmp)) != 0)
 		return -EFAULT;
@@ -1371,7 +1384,9 @@ do_replace(struct net *net, const void __user *user, unsigned int len)
 	return 0;
 
  free_newinfo_untrans:
-	IP6T_ENTRY_ITERATE(loc_cpu_entry, newinfo->size, cleanup_entry, net, NULL);
+	xt_entry_foreach(iter, loc_cpu_entry, newinfo->size)
+		if (cleanup_entry(iter, net, NULL) != 0)
+			break;
  free_newinfo:
 	xt_free_table_info(newinfo);
 	return ret;
@@ -1405,6 +1420,7 @@ do_add_counters(struct net *net, const void __user *user, unsigned int len,
 	const struct xt_table_info *private;
 	int ret = 0;
 	const void *loc_cpu_entry;
+	struct ip6t_entry *iter;
 #ifdef CONFIG_COMPAT
 	struct compat_xt_counters_info compat_tmp;
 
@@ -1463,11 +1479,9 @@ do_add_counters(struct net *net, const void __user *user, unsigned int len,
 	curcpu = smp_processor_id();
 	xt_info_wrlock(curcpu);
 	loc_cpu_entry = private->entries[curcpu];
-	IP6T_ENTRY_ITERATE(loc_cpu_entry,
-			  private->size,
-			  add_counter_to_entry,
-			  paddc,
-			  &i);
+	xt_entry_foreach(iter, loc_cpu_entry, private->size)
+		if (add_counter_to_entry(iter, paddc, &i) != 0)
+			break;
 	xt_info_wrunlock(curcpu);
 
  unlock_up_free:
@@ -1753,8 +1767,10 @@ translate_compat_table(struct net *net,
 	unsigned int i, j;
 	struct xt_table_info *newinfo, *info;
 	void *pos, *entry0, *entry1;
+	struct compat_ip6t_entry *iter0;
+	struct ip6t_entry *iter1;
 	unsigned int size;
-	int ret;
+	int ret = 0;
 
 	info = *pinfo;
 	entry0 = *pentry0;
@@ -1771,11 +1787,13 @@ translate_compat_table(struct net *net,
 	j = 0;
 	xt_compat_lock(AF_INET6);
 	/* Walk through entries, checking offsets. */
-	ret = COMPAT_IP6T_ENTRY_ITERATE(entry0, total_size,
-					check_compat_entry_size_and_hooks,
-					info, &size, entry0,
-					entry0 + total_size,
-					hook_entries, underflows, &j, name);
+	xt_entry_foreach(iter0, entry0, total_size) {
+		ret = check_compat_entry_size_and_hooks(iter0, info, &size,
+		      entry0, entry0 + total_size, hook_entries, underflows,
+		      &j, name);
+		if (ret != 0)
+			break;
+	}
 	if (ret != 0)
 		goto out_unlock;
 
@@ -1816,9 +1834,12 @@ translate_compat_table(struct net *net,
 	entry1 = newinfo->entries[raw_smp_processor_id()];
 	pos = entry1;
 	size = total_size;
-	ret = COMPAT_IP6T_ENTRY_ITERATE(entry0, total_size,
-					compat_copy_entry_from_user,
-					&pos, &size, name, newinfo, entry1);
+	xt_entry_foreach(iter0, entry0, total_size) {
+		ret = compat_copy_entry_from_user(iter0, &pos,
+		      &size, name, newinfo, entry1);
+		if (ret != 0)
+			break;
+	}
 	xt_compat_flush_offsets(AF_INET6);
 	xt_compat_unlock(AF_INET6);
 	if (ret)
@@ -1829,13 +1850,28 @@ translate_compat_table(struct net *net,
 		goto free_newinfo;
 
 	i = 0;
-	ret = IP6T_ENTRY_ITERATE(entry1, newinfo->size, compat_check_entry,
-				 net, name, &i);
+	xt_entry_foreach(iter1, entry1, newinfo->size) {
+		ret = compat_check_entry(iter1, net, name, &i);
+		if (ret != 0)
+			break;
+	}
 	if (ret) {
+		/*
+		 * The first i matches need cleanup_entry (calls ->destroy)
+		 * because they had called ->check already. The other j-i
+		 * entries need only release.
+		 */
+		int skip = i;
 		j -= i;
-		COMPAT_IP6T_ENTRY_ITERATE_CONTINUE(entry0, newinfo->size, i,
-						   compat_release_entry, &j);
-		IP6T_ENTRY_ITERATE(entry1, newinfo->size, cleanup_entry, net, &i);
+		xt_entry_foreach(iter0, entry0, newinfo->size) {
+			if (skip-- > 0)
+				continue;
+			if (compat_release_entry(iter0, &j) != 0)
+				break;
+		}
+		xt_entry_foreach(iter1, entry1, newinfo->size)
+			if (cleanup_entry(iter1, net, &i) != 0)
+				break;
 		xt_free_table_info(newinfo);
 		return ret;
 	}
@@ -1853,7 +1889,9 @@ translate_compat_table(struct net *net,
 free_newinfo:
 	xt_free_table_info(newinfo);
 out:
-	COMPAT_IP6T_ENTRY_ITERATE(entry0, total_size, compat_release_entry, &j);
+	xt_entry_foreach(iter0, entry0, total_size)
+		if (compat_release_entry(iter0, &j) != 0)
+			break;
 	return ret;
 out_unlock:
 	xt_compat_flush_offsets(AF_INET6);
@@ -1868,6 +1906,7 @@ compat_do_replace(struct net *net, void __user *user, unsigned int len)
 	struct compat_ip6t_replace tmp;
 	struct xt_table_info *newinfo;
 	void *loc_cpu_entry;
+	struct ip6t_entry *iter;
 
 	if (copy_from_user(&tmp, user, sizeof(tmp)) != 0)
 		return -EFAULT;
@@ -1906,7 +1945,9 @@ compat_do_replace(struct net *net, void __user *user, unsigned int len)
 	return 0;
 
  free_newinfo_untrans:
-	IP6T_ENTRY_ITERATE(loc_cpu_entry, newinfo->size, cleanup_entry, net, NULL);
+	xt_entry_foreach(iter, loc_cpu_entry, newinfo->size)
+		if (cleanup_entry(iter, net, NULL) != 0)
+			break;
  free_newinfo:
 	xt_free_table_info(newinfo);
 	return ret;
@@ -1955,6 +1996,7 @@ compat_copy_entries_to_user(unsigned int total_size, struct xt_table *table,
 	int ret = 0;
 	const void *loc_cpu_entry;
 	unsigned int i = 0;
+	struct ip6t_entry *iter;
 
 	counters = alloc_counters(table);
 	if (IS_ERR(counters))
@@ -1967,9 +2009,12 @@ compat_copy_entries_to_user(unsigned int total_size, struct xt_table *table,
 	loc_cpu_entry = private->entries[raw_smp_processor_id()];
 	pos = userptr;
 	size = total_size;
-	ret = IP6T_ENTRY_ITERATE(loc_cpu_entry, total_size,
-				 compat_copy_entry_to_user,
-				 &pos, &size, counters, &i);
+	xt_entry_foreach(iter, loc_cpu_entry, total_size) {
+		ret = compat_copy_entry_to_user(iter, &pos,
+		      &size, counters, &i);
+		if (ret != 0)
+			break;
+	}
 
 	vfree(counters);
 	return ret;
@@ -2169,12 +2214,15 @@ void ip6t_unregister_table(struct net *net, struct xt_table *table)
 	struct xt_table_info *private;
 	void *loc_cpu_entry;
 	struct module *table_owner = table->me;
+	struct ip6t_entry *iter;
 
 	private = xt_unregister_table(table);
 
 	/* Decrease module usage counts and free resources */
 	loc_cpu_entry = private->entries[raw_smp_processor_id()];
-	IP6T_ENTRY_ITERATE(loc_cpu_entry, private->size, cleanup_entry, net, NULL);
+	xt_entry_foreach(iter, loc_cpu_entry, private->size)
+		if (cleanup_entry(iter, net, NULL) != 0)
+			break;
 	if (private->number > private->initial_entries)
 		module_put(table_owner);
 	xt_free_table_info(private);
-- 
cgit v1.2.2


From 0559518b5b99c591226460c0bbf8e6a570c518a8 Mon Sep 17 00:00:00 2001
From: Jan Engelhardt <jengelh@medozas.de>
Date: Wed, 24 Feb 2010 18:33:43 +0100
Subject: netfilter: xtables: optimize call flow around xt_entry_foreach

Signed-off-by: Jan Engelhardt <jengelh@medozas.de>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 net/ipv4/netfilter/arp_tables.c | 180 +++++++++++++--------------------------
 net/ipv4/netfilter/ip_tables.c  | 183 ++++++++++++++--------------------------
 net/ipv6/netfilter/ip6_tables.c | 179 ++++++++++++++-------------------------
 3 files changed, 182 insertions(+), 360 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c
index f7338869fc4c..5fdedeb46218 100644
--- a/net/ipv4/netfilter/arp_tables.c
+++ b/net/ipv4/netfilter/arp_tables.c
@@ -512,8 +512,7 @@ static inline int check_target(struct arpt_entry *e, const char *name)
 }
 
 static inline int
-find_check_entry(struct arpt_entry *e, const char *name, unsigned int size,
-		 unsigned int *i)
+find_check_entry(struct arpt_entry *e, const char *name, unsigned int size)
 {
 	struct arpt_entry_target *t;
 	struct xt_target *target;
@@ -538,8 +537,6 @@ find_check_entry(struct arpt_entry *e, const char *name, unsigned int size,
 	ret = check_target(e, name);
 	if (ret)
 		goto err;
-
-	(*i)++;
 	return 0;
 err:
 	module_put(t->u.kernel.target->me);
@@ -568,8 +565,7 @@ static inline int check_entry_size_and_hooks(struct arpt_entry *e,
 					     const unsigned char *limit,
 					     const unsigned int *hook_entries,
 					     const unsigned int *underflows,
-					     unsigned int valid_hooks,
-					     unsigned int *i)
+					     unsigned int valid_hooks)
 {
 	unsigned int h;
 
@@ -606,19 +602,14 @@ static inline int check_entry_size_and_hooks(struct arpt_entry *e,
 	/* Clear counters and comefrom */
 	e->counters = ((struct xt_counters) { 0, 0 });
 	e->comefrom = 0;
-
-	(*i)++;
 	return 0;
 }
 
-static inline int cleanup_entry(struct arpt_entry *e, unsigned int *i)
+static inline void cleanup_entry(struct arpt_entry *e)
 {
 	struct xt_tgdtor_param par;
 	struct arpt_entry_target *t;
 
-	if (i && (*i)-- == 0)
-		return 1;
-
 	t = arpt_get_target(e);
 	par.target   = t->u.kernel.target;
 	par.targinfo = t->data;
@@ -626,7 +617,6 @@ static inline int cleanup_entry(struct arpt_entry *e, unsigned int *i)
 	if (par.target->destroy != NULL)
 		par.target->destroy(&par);
 	module_put(par.target->me);
-	return 0;
 }
 
 /* Checks and translates the user-supplied table segment (held in
@@ -660,10 +650,10 @@ static int translate_table(const char *name,
 	/* Walk through entries, checking offsets. */
 	xt_entry_foreach(iter, entry0, newinfo->size) {
 		ret = check_entry_size_and_hooks(iter, newinfo, entry0,
-		      entry0 + size, hook_entries, underflows,
-		      valid_hooks, &i);
+		      entry0 + size, hook_entries, underflows, valid_hooks);
 		if (ret != 0)
 			break;
+		++i;
 	}
 	duprintf("translate_table: ARPT_ENTRY_ITERATE gives %d\n", ret);
 	if (ret != 0)
@@ -700,15 +690,18 @@ static int translate_table(const char *name,
 	/* Finally, each sanity check must pass */
 	i = 0;
 	xt_entry_foreach(iter, entry0, newinfo->size) {
-		ret = find_check_entry(iter, name, size, &i);
+		ret = find_check_entry(iter, name, size);
 		if (ret != 0)
 			break;
+		++i;
 	}
 
 	if (ret != 0) {
-		xt_entry_foreach(iter, entry0, newinfo->size)
-			if (cleanup_entry(iter, &i) != 0)
+		xt_entry_foreach(iter, entry0, newinfo->size) {
+			if (i-- == 0)
 				break;
+			cleanup_entry(iter);
+		}
 		return ret;
 	}
 
@@ -721,27 +714,6 @@ static int translate_table(const char *name,
 	return ret;
 }
 
-/* Gets counters. */
-static inline int add_entry_to_counter(const struct arpt_entry *e,
-				       struct xt_counters total[],
-				       unsigned int *i)
-{
-	ADD_COUNTER(total[*i], e->counters.bcnt, e->counters.pcnt);
-
-	(*i)++;
-	return 0;
-}
-
-static inline int set_entry_to_counter(const struct arpt_entry *e,
-				       struct xt_counters total[],
-				       unsigned int *i)
-{
-	SET_COUNTER(total[*i], e->counters.bcnt, e->counters.pcnt);
-
-	(*i)++;
-	return 0;
-}
-
 static void get_counters(const struct xt_table_info *t,
 			 struct xt_counters counters[])
 {
@@ -761,18 +733,22 @@ static void get_counters(const struct xt_table_info *t,
 	curcpu = smp_processor_id();
 
 	i = 0;
-	xt_entry_foreach(iter, t->entries[curcpu], t->size)
-		if (set_entry_to_counter(iter, counters, &i) != 0)
-			break;
+	xt_entry_foreach(iter, t->entries[curcpu], t->size) {
+		SET_COUNTER(counters[i], iter->counters.bcnt,
+			iter->counters.pcnt);
+		++i;
+	}
 
 	for_each_possible_cpu(cpu) {
 		if (cpu == curcpu)
 			continue;
 		i = 0;
 		xt_info_wrlock(cpu);
-		xt_entry_foreach(iter, t->entries[cpu], t->size)
-			if (add_entry_to_counter(iter, counters, &i) != 0)
-				break;
+		xt_entry_foreach(iter, t->entries[cpu], t->size) {
+			ADD_COUNTER(counters[i], iter->counters.bcnt,
+				iter->counters.pcnt);
+			++i;
+		}
 		xt_info_wrunlock(cpu);
 	}
 	local_bh_enable();
@@ -904,7 +880,7 @@ static int compat_table_info(const struct xt_table_info *info,
 {
 	struct arpt_entry *iter;
 	void *loc_cpu_entry;
-	int ret = 0;
+	int ret;
 
 	if (!newinfo || !info)
 		return -EINVAL;
@@ -916,9 +892,9 @@ static int compat_table_info(const struct xt_table_info *info,
 	xt_entry_foreach(iter, loc_cpu_entry, info->size) {
 		ret = compat_calc_entry(iter, info, loc_cpu_entry, newinfo);
 		if (ret != 0)
-			break;
+			return ret;
 	}
-	return ret;
+	return 0;
 }
 #endif
 
@@ -1078,8 +1054,7 @@ static int __do_replace(struct net *net, const char *name,
 	/* Decrease module usage counts and free resource */
 	loc_cpu_old_entry = oldinfo->entries[raw_smp_processor_id()];
 	xt_entry_foreach(iter, loc_cpu_old_entry, oldinfo->size)
-		if (cleanup_entry(iter, NULL) != 0)
-			break;
+		cleanup_entry(iter);
 
 	xt_free_table_info(oldinfo);
 	if (copy_to_user(counters_ptr, counters,
@@ -1142,26 +1117,12 @@ static int do_replace(struct net *net, const void __user *user,
 
  free_newinfo_untrans:
 	xt_entry_foreach(iter, loc_cpu_entry, newinfo->size)
-		if (cleanup_entry(iter, NULL) != 0)
-			break;
+		cleanup_entry(iter);
  free_newinfo:
 	xt_free_table_info(newinfo);
 	return ret;
 }
 
-/* We're lazy, and add to the first CPU; overflow works its fey magic
- * and everything is OK. */
-static int
-add_counter_to_entry(struct arpt_entry *e,
-		     const struct xt_counters addme[],
-		     unsigned int *i)
-{
-	ADD_COUNTER(e->counters, addme[*i].bcnt, addme[*i].pcnt);
-
-	(*i)++;
-	return 0;
-}
-
 static int do_add_counters(struct net *net, const void __user *user,
 			   unsigned int len, int compat)
 {
@@ -1234,9 +1195,10 @@ static int do_add_counters(struct net *net, const void __user *user,
 	curcpu = smp_processor_id();
 	loc_cpu_entry = private->entries[curcpu];
 	xt_info_wrlock(curcpu);
-	xt_entry_foreach(iter, loc_cpu_entry, private->size)
-		if (add_counter_to_entry(iter, paddc, &i) != 0)
-			break;
+	xt_entry_foreach(iter, loc_cpu_entry, private->size) {
+		ADD_COUNTER(iter->counters, paddc[i].bcnt, paddc[i].pcnt);
+		++i;
+	}
 	xt_info_wrunlock(curcpu);
  unlock_up_free:
 	local_bh_enable();
@@ -1249,17 +1211,12 @@ static int do_add_counters(struct net *net, const void __user *user,
 }
 
 #ifdef CONFIG_COMPAT
-static inline int
-compat_release_entry(struct compat_arpt_entry *e, unsigned int *i)
+static inline void compat_release_entry(struct compat_arpt_entry *e)
 {
 	struct arpt_entry_target *t;
 
-	if (i && (*i)-- == 0)
-		return 1;
-
 	t = compat_arpt_get_target(e);
 	module_put(t->u.kernel.target->me);
-	return 0;
 }
 
 static inline int
@@ -1270,7 +1227,6 @@ check_compat_entry_size_and_hooks(struct compat_arpt_entry *e,
 				  const unsigned char *limit,
 				  const unsigned int *hook_entries,
 				  const unsigned int *underflows,
-				  unsigned int *i,
 				  const char *name)
 {
 	struct arpt_entry_target *t;
@@ -1330,8 +1286,6 @@ check_compat_entry_size_and_hooks(struct compat_arpt_entry *e,
 	/* Clear counters and comefrom */
 	memset(&e->counters, 0, sizeof(e->counters));
 	e->comefrom = 0;
-
-	(*i)++;
 	return 0;
 
 release_target:
@@ -1375,19 +1329,6 @@ compat_copy_entry_from_user(struct compat_arpt_entry *e, void **dstptr,
 	return ret;
 }
 
-static inline int compat_check_entry(struct arpt_entry *e, const char *name,
-				     unsigned int *i)
-{
-	int ret;
-
-	ret = check_target(e, name);
-	if (ret)
-		return ret;
-
-	(*i)++;
-	return 0;
-}
-
 static int translate_compat_table(const char *name,
 				  unsigned int valid_hooks,
 				  struct xt_table_info **pinfo,
@@ -1423,12 +1364,11 @@ static int translate_compat_table(const char *name,
 	xt_entry_foreach(iter0, entry0, total_size) {
 		ret = check_compat_entry_size_and_hooks(iter0, info, &size,
 		      entry0, entry0 + total_size, hook_entries, underflows,
-		      &j, name);
+		      name);
 		if (ret != 0)
-			break;
+			goto out_unlock;
+		++j;
 	}
-	if (ret != 0)
-		goto out_unlock;
 
 	ret = -EINVAL;
 	if (j != number) {
@@ -1484,9 +1424,10 @@ static int translate_compat_table(const char *name,
 
 	i = 0;
 	xt_entry_foreach(iter1, entry1, newinfo->size) {
-		ret = compat_check_entry(iter1, name, &i);
+		ret = check_target(iter1, name);
 		if (ret != 0)
 			break;
+		++i;
 	}
 	if (ret) {
 		/*
@@ -1499,12 +1440,15 @@ static int translate_compat_table(const char *name,
 		xt_entry_foreach(iter0, entry0, newinfo->size) {
 			if (skip-- > 0)
 				continue;
-			if (compat_release_entry(iter0, &j) != 0)
+			if (j-- == 0)
 				break;
+			compat_release_entry(iter0);
 		}
-		xt_entry_foreach(iter1, entry1, newinfo->size)
-			if (cleanup_entry(iter1, &i) != 0)
+		xt_entry_foreach(iter1, entry1, newinfo->size) {
+			if (i-- == 0)
 				break;
+			cleanup_entry(iter1);
+		}
 		xt_free_table_info(newinfo);
 		return ret;
 	}
@@ -1522,9 +1466,11 @@ static int translate_compat_table(const char *name,
 free_newinfo:
 	xt_free_table_info(newinfo);
 out:
-	xt_entry_foreach(iter0, entry0, total_size)
-		if (compat_release_entry(iter0, &j) != 0)
+	xt_entry_foreach(iter0, entry0, total_size) {
+		if (j-- == 0)
 			break;
+		compat_release_entry(iter0);
+	}
 	return ret;
 out_unlock:
 	xt_compat_flush_offsets(NFPROTO_ARP);
@@ -1590,8 +1536,7 @@ static int compat_do_replace(struct net *net, void __user *user,
 
  free_newinfo_untrans:
 	xt_entry_foreach(iter, loc_cpu_entry, newinfo->size)
-		if (cleanup_entry(iter, NULL) != 0)
-			break;
+		cleanup_entry(iter);
  free_newinfo:
 	xt_free_table_info(newinfo);
 	return ret;
@@ -1625,7 +1570,7 @@ static int compat_do_arpt_set_ctl(struct sock *sk, int cmd, void __user *user,
 static int compat_copy_entry_to_user(struct arpt_entry *e, void __user **dstptr,
 				     compat_uint_t *size,
 				     struct xt_counters *counters,
-				     unsigned int *i)
+				     unsigned int i)
 {
 	struct arpt_entry_target *t;
 	struct compat_arpt_entry __user *ce;
@@ -1633,14 +1578,12 @@ static int compat_copy_entry_to_user(struct arpt_entry *e, void __user **dstptr,
 	compat_uint_t origsize;
 	int ret;
 
-	ret = -EFAULT;
 	origsize = *size;
 	ce = (struct compat_arpt_entry __user *)*dstptr;
-	if (copy_to_user(ce, e, sizeof(struct arpt_entry)))
-		goto out;
-
-	if (copy_to_user(&ce->counters, &counters[*i], sizeof(counters[*i])))
-		goto out;
+	if (copy_to_user(ce, e, sizeof(struct arpt_entry)) != 0 ||
+	    copy_to_user(&ce->counters, &counters[i],
+	    sizeof(counters[i])) != 0)
+		return -EFAULT;
 
 	*dstptr += sizeof(struct compat_arpt_entry);
 	*size -= sizeof(struct arpt_entry) - sizeof(struct compat_arpt_entry);
@@ -1650,18 +1593,12 @@ static int compat_copy_entry_to_user(struct arpt_entry *e, void __user **dstptr,
 	t = arpt_get_target(e);
 	ret = xt_compat_target_to_user(t, dstptr, size);
 	if (ret)
-		goto out;
-	ret = -EFAULT;
+		return ret;
 	next_offset = e->next_offset - (origsize - *size);
-	if (put_user(target_offset, &ce->target_offset))
-		goto out;
-	if (put_user(next_offset, &ce->next_offset))
-		goto out;
-
-	(*i)++;
+	if (put_user(target_offset, &ce->target_offset) != 0 ||
+	    put_user(next_offset, &ce->next_offset) != 0)
+		return -EFAULT;
 	return 0;
-out:
-	return ret;
 }
 
 static int compat_copy_entries_to_user(unsigned int total_size,
@@ -1687,7 +1624,7 @@ static int compat_copy_entries_to_user(unsigned int total_size,
 	size = total_size;
 	xt_entry_foreach(iter, loc_cpu_entry, total_size) {
 		ret = compat_copy_entry_to_user(iter, &pos,
-		      &size, counters, &i);
+		      &size, counters, i++);
 		if (ret != 0)
 			break;
 	}
@@ -1893,8 +1830,7 @@ void arpt_unregister_table(struct xt_table *table)
 	/* Decrease module usage counts and free resources */
 	loc_cpu_entry = private->entries[raw_smp_processor_id()];
 	xt_entry_foreach(iter, loc_cpu_entry, private->size)
-		if (cleanup_entry(iter, NULL) != 0)
-			break;
+		cleanup_entry(iter);
 	if (private->number > private->initial_entries)
 		module_put(table_owner);
 	xt_free_table_info(private);
diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c
index b43280aad8a2..9c8aa394c51c 100644
--- a/net/ipv4/netfilter/ip_tables.c
+++ b/net/ipv4/netfilter/ip_tables.c
@@ -679,7 +679,7 @@ static int check_target(struct ipt_entry *e, struct net *net, const char *name)
 
 static int
 find_check_entry(struct ipt_entry *e, struct net *net, const char *name,
-		 unsigned int size, unsigned int *i)
+		 unsigned int size)
 {
 	struct ipt_entry_target *t;
 	struct xt_target *target;
@@ -716,8 +716,6 @@ find_check_entry(struct ipt_entry *e, struct net *net, const char *name,
 	ret = check_target(e, net, name);
 	if (ret)
 		goto err;
-
-	(*i)++;
 	return 0;
  err:
 	module_put(t->u.kernel.target->me);
@@ -748,8 +746,7 @@ check_entry_size_and_hooks(struct ipt_entry *e,
 			   const unsigned char *limit,
 			   const unsigned int *hook_entries,
 			   const unsigned int *underflows,
-			   unsigned int valid_hooks,
-			   unsigned int *i)
+			   unsigned int valid_hooks)
 {
 	unsigned int h;
 
@@ -786,20 +783,15 @@ check_entry_size_and_hooks(struct ipt_entry *e,
 	/* Clear counters and comefrom */
 	e->counters = ((struct xt_counters) { 0, 0 });
 	e->comefrom = 0;
-
-	(*i)++;
 	return 0;
 }
 
-static int
-cleanup_entry(struct ipt_entry *e, struct net *net, unsigned int *i)
+static void
+cleanup_entry(struct ipt_entry *e, struct net *net)
 {
 	struct xt_tgdtor_param par;
 	struct ipt_entry_target *t;
 
-	if (i && (*i)-- == 0)
-		return 1;
-
 	/* Cleanup all matches */
 	IPT_MATCH_ITERATE(e, cleanup_match, net, NULL);
 	t = ipt_get_target(e);
@@ -811,7 +803,6 @@ cleanup_entry(struct ipt_entry *e, struct net *net, unsigned int *i)
 	if (par.target->destroy != NULL)
 		par.target->destroy(&par);
 	module_put(par.target->me);
-	return 0;
 }
 
 /* Checks and translates the user-supplied table segment (held in
@@ -845,13 +836,11 @@ translate_table(struct net *net,
 	/* Walk through entries, checking offsets. */
 	xt_entry_foreach(iter, entry0, newinfo->size) {
 		ret = check_entry_size_and_hooks(iter, newinfo, entry0,
-		      entry0 + size, hook_entries, underflows,
-		      valid_hooks, &i);
+		      entry0 + size, hook_entries, underflows, valid_hooks);
 		if (ret != 0)
-			break;
+			return ret;
+		++i;
 	}
-	if (ret != 0)
-		return ret;
 
 	if (i != number) {
 		duprintf("translate_table: %u not %u entries\n",
@@ -882,15 +871,18 @@ translate_table(struct net *net,
 	/* Finally, each sanity check must pass */
 	i = 0;
 	xt_entry_foreach(iter, entry0, newinfo->size) {
-		ret = find_check_entry(iter, net, name, size, &i);
+		ret = find_check_entry(iter, net, name, size);
 		if (ret != 0)
 			break;
+		++i;
 	}
 
 	if (ret != 0) {
-		xt_entry_foreach(iter, entry0, newinfo->size)
-			if (cleanup_entry(iter, net, &i) != 0)
+		xt_entry_foreach(iter, entry0, newinfo->size) {
+			if (i-- == 0)
 				break;
+			cleanup_entry(iter, net);
+		}
 		return ret;
 	}
 
@@ -903,29 +895,6 @@ translate_table(struct net *net,
 	return ret;
 }
 
-/* Gets counters. */
-static inline int
-add_entry_to_counter(const struct ipt_entry *e,
-		     struct xt_counters total[],
-		     unsigned int *i)
-{
-	ADD_COUNTER(total[*i], e->counters.bcnt, e->counters.pcnt);
-
-	(*i)++;
-	return 0;
-}
-
-static inline int
-set_entry_to_counter(const struct ipt_entry *e,
-		     struct ipt_counters total[],
-		     unsigned int *i)
-{
-	SET_COUNTER(total[*i], e->counters.bcnt, e->counters.pcnt);
-
-	(*i)++;
-	return 0;
-}
-
 static void
 get_counters(const struct xt_table_info *t,
 	     struct xt_counters counters[])
@@ -946,18 +915,22 @@ get_counters(const struct xt_table_info *t,
 	curcpu = smp_processor_id();
 
 	i = 0;
-	xt_entry_foreach(iter, t->entries[curcpu], t->size)
-		if (set_entry_to_counter(iter, counters, &i) != 0)
-			break;
+	xt_entry_foreach(iter, t->entries[curcpu], t->size) {
+		SET_COUNTER(counters[i], iter->counters.bcnt,
+			iter->counters.pcnt);
+		++i;
+	}
 
 	for_each_possible_cpu(cpu) {
 		if (cpu == curcpu)
 			continue;
 		i = 0;
 		xt_info_wrlock(cpu);
-		xt_entry_foreach(iter, t->entries[cpu], t->size)
-			if (add_entry_to_counter(iter, counters, &i) != 0)
-				break;
+		xt_entry_foreach(iter, t->entries[cpu], t->size) {
+			ADD_COUNTER(counters[i], iter->counters.bcnt,
+				iter->counters.pcnt);
+			++i; /* macro does multi eval of i */
+		}
 		xt_info_wrunlock(cpu);
 	}
 	local_bh_enable();
@@ -1117,7 +1090,7 @@ static int compat_table_info(const struct xt_table_info *info,
 {
 	struct ipt_entry *iter;
 	void *loc_cpu_entry;
-	int ret = 0;
+	int ret;
 
 	if (!newinfo || !info)
 		return -EINVAL;
@@ -1129,9 +1102,9 @@ static int compat_table_info(const struct xt_table_info *info,
 	xt_entry_foreach(iter, loc_cpu_entry, info->size) {
 		ret = compat_calc_entry(iter, info, loc_cpu_entry, newinfo);
 		if (ret != 0)
-			break;
+			return ret;
 	}
-	return ret;
+	return 0;
 }
 #endif
 
@@ -1289,8 +1262,7 @@ __do_replace(struct net *net, const char *name, unsigned int valid_hooks,
 	/* Decrease module usage counts and free resource */
 	loc_cpu_old_entry = oldinfo->entries[raw_smp_processor_id()];
 	xt_entry_foreach(iter, loc_cpu_old_entry, oldinfo->size)
-		if (cleanup_entry(iter, net, NULL) != 0)
-			break;
+		cleanup_entry(iter, net);
 
 	xt_free_table_info(oldinfo);
 	if (copy_to_user(counters_ptr, counters,
@@ -1353,26 +1325,12 @@ do_replace(struct net *net, const void __user *user, unsigned int len)
 
  free_newinfo_untrans:
 	xt_entry_foreach(iter, loc_cpu_entry, newinfo->size)
-		if (cleanup_entry(iter, net, NULL) != 0)
-			break;
+		cleanup_entry(iter, net);
  free_newinfo:
 	xt_free_table_info(newinfo);
 	return ret;
 }
 
-/* We're lazy, and add to the first CPU; overflow works its fey magic
- * and everything is OK. */
-static int
-add_counter_to_entry(struct ipt_entry *e,
-		     const struct xt_counters addme[],
-		     unsigned int *i)
-{
-	ADD_COUNTER(e->counters, addme[*i].bcnt, addme[*i].pcnt);
-
-	(*i)++;
-	return 0;
-}
-
 static int
 do_add_counters(struct net *net, const void __user *user,
                 unsigned int len, int compat)
@@ -1446,9 +1404,10 @@ do_add_counters(struct net *net, const void __user *user,
 	curcpu = smp_processor_id();
 	loc_cpu_entry = private->entries[curcpu];
 	xt_info_wrlock(curcpu);
-	xt_entry_foreach(iter, loc_cpu_entry, private->size)
-		if (add_counter_to_entry(iter, paddc, &i) != 0)
-			break;
+	xt_entry_foreach(iter, loc_cpu_entry, private->size) {
+		ADD_COUNTER(iter->counters, paddc[i].bcnt, paddc[i].pcnt);
+		++i;
+	}
 	xt_info_wrunlock(curcpu);
  unlock_up_free:
 	local_bh_enable();
@@ -1476,7 +1435,7 @@ struct compat_ipt_replace {
 static int
 compat_copy_entry_to_user(struct ipt_entry *e, void __user **dstptr,
 			  unsigned int *size, struct xt_counters *counters,
-			  unsigned int *i)
+			  unsigned int i)
 {
 	struct ipt_entry_target *t;
 	struct compat_ipt_entry __user *ce;
@@ -1484,14 +1443,12 @@ compat_copy_entry_to_user(struct ipt_entry *e, void __user **dstptr,
 	compat_uint_t origsize;
 	int ret;
 
-	ret = -EFAULT;
 	origsize = *size;
 	ce = (struct compat_ipt_entry __user *)*dstptr;
-	if (copy_to_user(ce, e, sizeof(struct ipt_entry)))
-		goto out;
-
-	if (copy_to_user(&ce->counters, &counters[*i], sizeof(counters[*i])))
-		goto out;
+	if (copy_to_user(ce, e, sizeof(struct ipt_entry)) != 0 ||
+	    copy_to_user(&ce->counters, &counters[i],
+	    sizeof(counters[i])) != 0)
+		return -EFAULT;
 
 	*dstptr += sizeof(struct compat_ipt_entry);
 	*size -= sizeof(struct ipt_entry) - sizeof(struct compat_ipt_entry);
@@ -1499,22 +1456,16 @@ compat_copy_entry_to_user(struct ipt_entry *e, void __user **dstptr,
 	ret = IPT_MATCH_ITERATE(e, xt_compat_match_to_user, dstptr, size);
 	target_offset = e->target_offset - (origsize - *size);
 	if (ret)
-		goto out;
+		return ret;
 	t = ipt_get_target(e);
 	ret = xt_compat_target_to_user(t, dstptr, size);
 	if (ret)
-		goto out;
-	ret = -EFAULT;
+		return ret;
 	next_offset = e->next_offset - (origsize - *size);
-	if (put_user(target_offset, &ce->target_offset))
-		goto out;
-	if (put_user(next_offset, &ce->next_offset))
-		goto out;
-
-	(*i)++;
+	if (put_user(target_offset, &ce->target_offset) != 0 ||
+	    put_user(next_offset, &ce->next_offset) != 0)
+		return -EFAULT;
 	return 0;
-out:
-	return ret;
 }
 
 static int
@@ -1551,19 +1502,14 @@ compat_release_match(struct ipt_entry_match *m, unsigned int *i)
 	return 0;
 }
 
-static int
-compat_release_entry(struct compat_ipt_entry *e, unsigned int *i)
+static void compat_release_entry(struct compat_ipt_entry *e)
 {
 	struct ipt_entry_target *t;
 
-	if (i && (*i)-- == 0)
-		return 1;
-
 	/* Cleanup all matches */
 	COMPAT_IPT_MATCH_ITERATE(e, compat_release_match, NULL);
 	t = compat_ipt_get_target(e);
 	module_put(t->u.kernel.target->me);
-	return 0;
 }
 
 static int
@@ -1574,7 +1520,6 @@ check_compat_entry_size_and_hooks(struct compat_ipt_entry *e,
 				  const unsigned char *limit,
 				  const unsigned int *hook_entries,
 				  const unsigned int *underflows,
-				  unsigned int *i,
 				  const char *name)
 {
 	struct ipt_entry_target *t;
@@ -1640,8 +1585,6 @@ check_compat_entry_size_and_hooks(struct compat_ipt_entry *e,
 	/* Clear counters and comefrom */
 	memset(&e->counters, 0, sizeof(e->counters));
 	e->comefrom = 0;
-
-	(*i)++;
 	return 0;
 
 out:
@@ -1691,8 +1634,7 @@ compat_copy_entry_from_user(struct compat_ipt_entry *e, void **dstptr,
 }
 
 static int
-compat_check_entry(struct ipt_entry *e, struct net *net, const char *name,
-				     unsigned int *i)
+compat_check_entry(struct ipt_entry *e, struct net *net, const char *name)
 {
 	struct xt_mtchk_param mtpar;
 	unsigned int j;
@@ -1711,8 +1653,6 @@ compat_check_entry(struct ipt_entry *e, struct net *net, const char *name,
 	ret = check_target(e, net, name);
 	if (ret)
 		goto cleanup_matches;
-
-	(*i)++;
 	return 0;
 
  cleanup_matches:
@@ -1737,7 +1677,7 @@ translate_compat_table(struct net *net,
 	struct compat_ipt_entry *iter0;
 	struct ipt_entry *iter1;
 	unsigned int size;
-	int ret = 0;
+	int ret;
 
 	info = *pinfo;
 	entry0 = *pentry0;
@@ -1757,12 +1697,11 @@ translate_compat_table(struct net *net,
 	xt_entry_foreach(iter0, entry0, total_size) {
 		ret = check_compat_entry_size_and_hooks(iter0, info, &size,
 		      entry0, entry0 + total_size, hook_entries, underflows,
-		      &j, name);
+		      name);
 		if (ret != 0)
-			break;
+			goto out_unlock;
+		++j;
 	}
-	if (ret != 0)
-		goto out_unlock;
 
 	ret = -EINVAL;
 	if (j != number) {
@@ -1818,9 +1757,10 @@ translate_compat_table(struct net *net,
 
 	i = 0;
 	xt_entry_foreach(iter1, entry1, newinfo->size) {
-		ret = compat_check_entry(iter1, net, name, &i);
+		ret = compat_check_entry(iter1, net, name);
 		if (ret != 0)
 			break;
+		++i;
 	}
 	if (ret) {
 		/*
@@ -1833,12 +1773,15 @@ translate_compat_table(struct net *net,
 		xt_entry_foreach(iter0, entry0, newinfo->size) {
 			if (skip-- > 0)
 				continue;
-			if (compat_release_entry(iter0, &i) != 0)
+			if (j-- == 0)
 				break;
+			compat_release_entry(iter0);
 		}
-		xt_entry_foreach(iter1, entry1, newinfo->size)
-			if (cleanup_entry(iter1, net, &i) != 0)
+		xt_entry_foreach(iter1, entry1, newinfo->size) {
+			if (i-- == 0)
 				break;
+			cleanup_entry(iter1, net);
+		}
 		xt_free_table_info(newinfo);
 		return ret;
 	}
@@ -1856,9 +1799,11 @@ translate_compat_table(struct net *net,
 free_newinfo:
 	xt_free_table_info(newinfo);
 out:
-	xt_entry_foreach(iter0, entry0, total_size)
-		if (compat_release_entry(iter0, &j) != 0)
+	xt_entry_foreach(iter0, entry0, total_size) {
+		if (j-- == 0)
 			break;
+		compat_release_entry(iter0);
+	}
 	return ret;
 out_unlock:
 	xt_compat_flush_offsets(AF_INET);
@@ -1913,8 +1858,7 @@ compat_do_replace(struct net *net, void __user *user, unsigned int len)
 
  free_newinfo_untrans:
 	xt_entry_foreach(iter, loc_cpu_entry, newinfo->size)
-		if (cleanup_entry(iter, net, NULL) != 0)
-			break;
+		cleanup_entry(iter, net);
  free_newinfo:
 	xt_free_table_info(newinfo);
 	return ret;
@@ -1978,7 +1922,7 @@ compat_copy_entries_to_user(unsigned int total_size, struct xt_table *table,
 	size = total_size;
 	xt_entry_foreach(iter, loc_cpu_entry, total_size) {
 		ret = compat_copy_entry_to_user(iter, &pos,
-		      &size, counters, &i);
+		      &size, counters, i++);
 		if (ret != 0)
 			break;
 	}
@@ -2189,8 +2133,7 @@ void ipt_unregister_table(struct net *net, struct xt_table *table)
 	/* Decrease module usage counts and free resources */
 	loc_cpu_entry = private->entries[raw_smp_processor_id()];
 	xt_entry_foreach(iter, loc_cpu_entry, private->size)
-		if (cleanup_entry(iter, net, NULL) != 0)
-			break;
+		cleanup_entry(iter, net);
 	if (private->number > private->initial_entries)
 		module_put(table_owner);
 	xt_free_table_info(private);
diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c
index 23926e38d36b..b7e27c19c7ab 100644
--- a/net/ipv6/netfilter/ip6_tables.c
+++ b/net/ipv6/netfilter/ip6_tables.c
@@ -710,7 +710,7 @@ static int check_target(struct ip6t_entry *e, struct net *net, const char *name)
 
 static int
 find_check_entry(struct ip6t_entry *e, struct net *net, const char *name,
-		 unsigned int size, unsigned int *i)
+		 unsigned int size)
 {
 	struct ip6t_entry_target *t;
 	struct xt_target *target;
@@ -747,8 +747,6 @@ find_check_entry(struct ip6t_entry *e, struct net *net, const char *name,
 	ret = check_target(e, net, name);
 	if (ret)
 		goto err;
-
-	(*i)++;
 	return 0;
  err:
 	module_put(t->u.kernel.target->me);
@@ -779,8 +777,7 @@ check_entry_size_and_hooks(struct ip6t_entry *e,
 			   const unsigned char *limit,
 			   const unsigned int *hook_entries,
 			   const unsigned int *underflows,
-			   unsigned int valid_hooks,
-			   unsigned int *i)
+			   unsigned int valid_hooks)
 {
 	unsigned int h;
 
@@ -817,20 +814,14 @@ check_entry_size_and_hooks(struct ip6t_entry *e,
 	/* Clear counters and comefrom */
 	e->counters = ((struct xt_counters) { 0, 0 });
 	e->comefrom = 0;
-
-	(*i)++;
 	return 0;
 }
 
-static int
-cleanup_entry(struct ip6t_entry *e, struct net *net, unsigned int *i)
+static void cleanup_entry(struct ip6t_entry *e, struct net *net)
 {
 	struct xt_tgdtor_param par;
 	struct ip6t_entry_target *t;
 
-	if (i && (*i)-- == 0)
-		return 1;
-
 	/* Cleanup all matches */
 	IP6T_MATCH_ITERATE(e, cleanup_match, net, NULL);
 	t = ip6t_get_target(e);
@@ -842,7 +833,6 @@ cleanup_entry(struct ip6t_entry *e, struct net *net, unsigned int *i)
 	if (par.target->destroy != NULL)
 		par.target->destroy(&par);
 	module_put(par.target->me);
-	return 0;
 }
 
 /* Checks and translates the user-supplied table segment (held in
@@ -876,13 +866,11 @@ translate_table(struct net *net,
 	/* Walk through entries, checking offsets. */
 	xt_entry_foreach(iter, entry0, newinfo->size) {
 		ret = check_entry_size_and_hooks(iter, newinfo, entry0,
-		      entry0 + size, hook_entries, underflows,
-		      valid_hooks, &i);
+		      entry0 + size, hook_entries, underflows, valid_hooks);
 		if (ret != 0)
-			break;
+			return ret;
+		++i;
 	}
-	if (ret != 0)
-		return ret;
 
 	if (i != number) {
 		duprintf("translate_table: %u not %u entries\n",
@@ -913,15 +901,18 @@ translate_table(struct net *net,
 	/* Finally, each sanity check must pass */
 	i = 0;
 	xt_entry_foreach(iter, entry0, newinfo->size) {
-		ret = find_check_entry(iter, net, name, size, &i);
+		ret = find_check_entry(iter, net, name, size);
 		if (ret != 0)
 			break;
+		++i;
 	}
 
 	if (ret != 0) {
-		xt_entry_foreach(iter, entry0, newinfo->size)
-			if (cleanup_entry(iter, net, &i) != 0)
+		xt_entry_foreach(iter, entry0, newinfo->size) {
+			if (i-- == 0)
 				break;
+			cleanup_entry(iter, net);
+		}
 		return ret;
 	}
 
@@ -934,29 +925,6 @@ translate_table(struct net *net,
 	return ret;
 }
 
-/* Gets counters. */
-static inline int
-add_entry_to_counter(const struct ip6t_entry *e,
-		     struct xt_counters total[],
-		     unsigned int *i)
-{
-	ADD_COUNTER(total[*i], e->counters.bcnt, e->counters.pcnt);
-
-	(*i)++;
-	return 0;
-}
-
-static inline int
-set_entry_to_counter(const struct ip6t_entry *e,
-		     struct ip6t_counters total[],
-		     unsigned int *i)
-{
-	SET_COUNTER(total[*i], e->counters.bcnt, e->counters.pcnt);
-
-	(*i)++;
-	return 0;
-}
-
 static void
 get_counters(const struct xt_table_info *t,
 	     struct xt_counters counters[])
@@ -977,18 +945,22 @@ get_counters(const struct xt_table_info *t,
 	curcpu = smp_processor_id();
 
 	i = 0;
-	xt_entry_foreach(iter, t->entries[curcpu], t->size)
-		if (set_entry_to_counter(iter, counters, &i) != 0)
-			break;
+	xt_entry_foreach(iter, t->entries[curcpu], t->size) {
+		SET_COUNTER(counters[i], iter->counters.bcnt,
+			iter->counters.pcnt);
+		++i;
+	}
 
 	for_each_possible_cpu(cpu) {
 		if (cpu == curcpu)
 			continue;
 		i = 0;
 		xt_info_wrlock(cpu);
-		xt_entry_foreach(iter, t->entries[cpu], t->size)
-			if (add_entry_to_counter(iter, counters, &i) != 0)
-				break;
+		xt_entry_foreach(iter, t->entries[cpu], t->size) {
+			ADD_COUNTER(counters[i], iter->counters.bcnt,
+				iter->counters.pcnt);
+			++i;
+		}
 		xt_info_wrunlock(cpu);
 	}
 	local_bh_enable();
@@ -1148,7 +1120,7 @@ static int compat_table_info(const struct xt_table_info *info,
 {
 	struct ip6t_entry *iter;
 	void *loc_cpu_entry;
-	int ret = 0;
+	int ret;
 
 	if (!newinfo || !info)
 		return -EINVAL;
@@ -1160,9 +1132,9 @@ static int compat_table_info(const struct xt_table_info *info,
 	xt_entry_foreach(iter, loc_cpu_entry, info->size) {
 		ret = compat_calc_entry(iter, info, loc_cpu_entry, newinfo);
 		if (ret != 0)
-			break;
+			return ret;
 	}
-	return ret;
+	return 0;
 }
 #endif
 
@@ -1321,8 +1293,7 @@ __do_replace(struct net *net, const char *name, unsigned int valid_hooks,
 	/* Decrease module usage counts and free resource */
 	loc_cpu_old_entry = oldinfo->entries[raw_smp_processor_id()];
 	xt_entry_foreach(iter, loc_cpu_old_entry, oldinfo->size)
-		if (cleanup_entry(iter, net, NULL) != 0)
-			break;
+		cleanup_entry(iter, net);
 
 	xt_free_table_info(oldinfo);
 	if (copy_to_user(counters_ptr, counters,
@@ -1385,26 +1356,12 @@ do_replace(struct net *net, const void __user *user, unsigned int len)
 
  free_newinfo_untrans:
 	xt_entry_foreach(iter, loc_cpu_entry, newinfo->size)
-		if (cleanup_entry(iter, net, NULL) != 0)
-			break;
+		cleanup_entry(iter, net);
  free_newinfo:
 	xt_free_table_info(newinfo);
 	return ret;
 }
 
-/* We're lazy, and add to the first CPU; overflow works its fey magic
- * and everything is OK. */
-static int
-add_counter_to_entry(struct ip6t_entry *e,
-		     const struct xt_counters addme[],
-		     unsigned int *i)
-{
-	ADD_COUNTER(e->counters, addme[*i].bcnt, addme[*i].pcnt);
-
-	(*i)++;
-	return 0;
-}
-
 static int
 do_add_counters(struct net *net, const void __user *user, unsigned int len,
 		int compat)
@@ -1479,9 +1436,10 @@ do_add_counters(struct net *net, const void __user *user, unsigned int len,
 	curcpu = smp_processor_id();
 	xt_info_wrlock(curcpu);
 	loc_cpu_entry = private->entries[curcpu];
-	xt_entry_foreach(iter, loc_cpu_entry, private->size)
-		if (add_counter_to_entry(iter, paddc, &i) != 0)
-			break;
+	xt_entry_foreach(iter, loc_cpu_entry, private->size) {
+		ADD_COUNTER(iter->counters, paddc[i].bcnt, paddc[i].pcnt);
+		++i;
+	}
 	xt_info_wrunlock(curcpu);
 
  unlock_up_free:
@@ -1510,7 +1468,7 @@ struct compat_ip6t_replace {
 static int
 compat_copy_entry_to_user(struct ip6t_entry *e, void __user **dstptr,
 			  unsigned int *size, struct xt_counters *counters,
-			  unsigned int *i)
+			  unsigned int i)
 {
 	struct ip6t_entry_target *t;
 	struct compat_ip6t_entry __user *ce;
@@ -1518,14 +1476,12 @@ compat_copy_entry_to_user(struct ip6t_entry *e, void __user **dstptr,
 	compat_uint_t origsize;
 	int ret;
 
-	ret = -EFAULT;
 	origsize = *size;
 	ce = (struct compat_ip6t_entry __user *)*dstptr;
-	if (copy_to_user(ce, e, sizeof(struct ip6t_entry)))
-		goto out;
-
-	if (copy_to_user(&ce->counters, &counters[*i], sizeof(counters[*i])))
-		goto out;
+	if (copy_to_user(ce, e, sizeof(struct ip6t_entry)) != 0 ||
+	    copy_to_user(&ce->counters, &counters[i],
+	    sizeof(counters[i])) != 0)
+		return -EFAULT;
 
 	*dstptr += sizeof(struct compat_ip6t_entry);
 	*size -= sizeof(struct ip6t_entry) - sizeof(struct compat_ip6t_entry);
@@ -1533,22 +1489,16 @@ compat_copy_entry_to_user(struct ip6t_entry *e, void __user **dstptr,
 	ret = IP6T_MATCH_ITERATE(e, xt_compat_match_to_user, dstptr, size);
 	target_offset = e->target_offset - (origsize - *size);
 	if (ret)
-		goto out;
+		return ret;
 	t = ip6t_get_target(e);
 	ret = xt_compat_target_to_user(t, dstptr, size);
 	if (ret)
-		goto out;
-	ret = -EFAULT;
+		return ret;
 	next_offset = e->next_offset - (origsize - *size);
-	if (put_user(target_offset, &ce->target_offset))
-		goto out;
-	if (put_user(next_offset, &ce->next_offset))
-		goto out;
-
-	(*i)++;
+	if (put_user(target_offset, &ce->target_offset) != 0 ||
+	    put_user(next_offset, &ce->next_offset) != 0)
+		return -EFAULT;
 	return 0;
-out:
-	return ret;
 }
 
 static int
@@ -1585,19 +1535,14 @@ compat_release_match(struct ip6t_entry_match *m, unsigned int *i)
 	return 0;
 }
 
-static int
-compat_release_entry(struct compat_ip6t_entry *e, unsigned int *i)
+static void compat_release_entry(struct compat_ip6t_entry *e)
 {
 	struct ip6t_entry_target *t;
 
-	if (i && (*i)-- == 0)
-		return 1;
-
 	/* Cleanup all matches */
 	COMPAT_IP6T_MATCH_ITERATE(e, compat_release_match, NULL);
 	t = compat_ip6t_get_target(e);
 	module_put(t->u.kernel.target->me);
-	return 0;
 }
 
 static int
@@ -1608,7 +1553,6 @@ check_compat_entry_size_and_hooks(struct compat_ip6t_entry *e,
 				  const unsigned char *limit,
 				  const unsigned int *hook_entries,
 				  const unsigned int *underflows,
-				  unsigned int *i,
 				  const char *name)
 {
 	struct ip6t_entry_target *t;
@@ -1674,8 +1618,6 @@ check_compat_entry_size_and_hooks(struct compat_ip6t_entry *e,
 	/* Clear counters and comefrom */
 	memset(&e->counters, 0, sizeof(e->counters));
 	e->comefrom = 0;
-
-	(*i)++;
 	return 0;
 
 out:
@@ -1725,7 +1667,7 @@ compat_copy_entry_from_user(struct compat_ip6t_entry *e, void **dstptr,
 }
 
 static int compat_check_entry(struct ip6t_entry *e, struct net *net,
-			      const char *name, unsigned int *i)
+			      const char *name)
 {
 	unsigned int j;
 	int ret;
@@ -1744,8 +1686,6 @@ static int compat_check_entry(struct ip6t_entry *e, struct net *net,
 	ret = check_target(e, net, name);
 	if (ret)
 		goto cleanup_matches;
-
-	(*i)++;
 	return 0;
 
  cleanup_matches:
@@ -1790,12 +1730,11 @@ translate_compat_table(struct net *net,
 	xt_entry_foreach(iter0, entry0, total_size) {
 		ret = check_compat_entry_size_and_hooks(iter0, info, &size,
 		      entry0, entry0 + total_size, hook_entries, underflows,
-		      &j, name);
+		      name);
 		if (ret != 0)
-			break;
+			goto out_unlock;
+		++j;
 	}
-	if (ret != 0)
-		goto out_unlock;
 
 	ret = -EINVAL;
 	if (j != number) {
@@ -1851,9 +1790,10 @@ translate_compat_table(struct net *net,
 
 	i = 0;
 	xt_entry_foreach(iter1, entry1, newinfo->size) {
-		ret = compat_check_entry(iter1, net, name, &i);
+		ret = compat_check_entry(iter1, net, name);
 		if (ret != 0)
 			break;
+		++i;
 	}
 	if (ret) {
 		/*
@@ -1866,12 +1806,15 @@ translate_compat_table(struct net *net,
 		xt_entry_foreach(iter0, entry0, newinfo->size) {
 			if (skip-- > 0)
 				continue;
-			if (compat_release_entry(iter0, &j) != 0)
+			if (j-- == 0)
 				break;
+			compat_release_entry(iter0);
 		}
-		xt_entry_foreach(iter1, entry1, newinfo->size)
-			if (cleanup_entry(iter1, net, &i) != 0)
+		xt_entry_foreach(iter1, entry1, newinfo->size) {
+			if (i-- == 0)
 				break;
+			cleanup_entry(iter1, net);
+		}
 		xt_free_table_info(newinfo);
 		return ret;
 	}
@@ -1889,9 +1832,11 @@ translate_compat_table(struct net *net,
 free_newinfo:
 	xt_free_table_info(newinfo);
 out:
-	xt_entry_foreach(iter0, entry0, total_size)
-		if (compat_release_entry(iter0, &j) != 0)
+	xt_entry_foreach(iter0, entry0, total_size) {
+		if (j-- == 0)
 			break;
+		compat_release_entry(iter0);
+	}
 	return ret;
 out_unlock:
 	xt_compat_flush_offsets(AF_INET6);
@@ -1946,8 +1891,7 @@ compat_do_replace(struct net *net, void __user *user, unsigned int len)
 
  free_newinfo_untrans:
 	xt_entry_foreach(iter, loc_cpu_entry, newinfo->size)
-		if (cleanup_entry(iter, net, NULL) != 0)
-			break;
+		cleanup_entry(iter, net);
  free_newinfo:
 	xt_free_table_info(newinfo);
 	return ret;
@@ -2011,7 +1955,7 @@ compat_copy_entries_to_user(unsigned int total_size, struct xt_table *table,
 	size = total_size;
 	xt_entry_foreach(iter, loc_cpu_entry, total_size) {
 		ret = compat_copy_entry_to_user(iter, &pos,
-		      &size, counters, &i);
+		      &size, counters, i++);
 		if (ret != 0)
 			break;
 	}
@@ -2221,8 +2165,7 @@ void ip6t_unregister_table(struct net *net, struct xt_table *table)
 	/* Decrease module usage counts and free resources */
 	loc_cpu_entry = private->entries[raw_smp_processor_id()];
 	xt_entry_foreach(iter, loc_cpu_entry, private->size)
-		if (cleanup_entry(iter, net, NULL) != 0)
-			break;
+		cleanup_entry(iter, net);
 	if (private->number > private->initial_entries)
 		module_put(table_owner);
 	xt_free_table_info(private);
-- 
cgit v1.2.2


From dcea992aca82cb08b4674c4c783e325835408d1e Mon Sep 17 00:00:00 2001
From: Jan Engelhardt <jengelh@medozas.de>
Date: Wed, 24 Feb 2010 18:34:48 +0100
Subject: netfilter: xtables: replace XT_MATCH_ITERATE macro

The macro is replaced by a list.h-like foreach loop. This makes
the code more inspectable.

Signed-off-by: Jan Engelhardt <jengelh@medozas.de>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 net/ipv4/netfilter/ip_tables.c  | 78 ++++++++++++++++++++++++++++++++---------
 net/ipv6/netfilter/ip6_tables.c | 78 ++++++++++++++++++++++++++++++++---------
 net/netfilter/xt_TCPMSS.c       | 12 ++++---
 3 files changed, 130 insertions(+), 38 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c
index 9c8aa394c51c..3a7fc732b918 100644
--- a/net/ipv4/netfilter/ip_tables.c
+++ b/net/ipv4/netfilter/ip_tables.c
@@ -366,16 +366,21 @@ ipt_do_table(struct sk_buff *skb,
 
 	do {
 		const struct ipt_entry_target *t;
+		const struct xt_entry_match *ematch;
 
 		IP_NF_ASSERT(e);
 		IP_NF_ASSERT(back);
 		if (!ip_packet_match(ip, indev, outdev,
-		    &e->ip, mtpar.fragoff) ||
-		    IPT_MATCH_ITERATE(e, do_match, skb, &mtpar) != 0) {
+		    &e->ip, mtpar.fragoff)) {
+ no_match:
 			e = ipt_next_entry(e);
 			continue;
 		}
 
+		xt_ematch_foreach(ematch, e)
+			if (do_match(ematch, skb, &mtpar) != 0)
+				goto no_match;
+
 		ADD_COUNTER(e->counters, ntohs(ip->tot_len), 1);
 
 		t = ipt_get_target(e);
@@ -686,6 +691,7 @@ find_check_entry(struct ipt_entry *e, struct net *net, const char *name,
 	int ret;
 	unsigned int j;
 	struct xt_mtchk_param mtpar;
+	struct xt_entry_match *ematch;
 
 	ret = check_entry(e, name);
 	if (ret)
@@ -697,7 +703,11 @@ find_check_entry(struct ipt_entry *e, struct net *net, const char *name,
 	mtpar.entryinfo = &e->ip;
 	mtpar.hook_mask = e->comefrom;
 	mtpar.family    = NFPROTO_IPV4;
-	ret = IPT_MATCH_ITERATE(e, find_check_match, &mtpar, &j);
+	xt_ematch_foreach(ematch, e) {
+		ret = find_check_match(ematch, &mtpar, &j);
+		if (ret != 0)
+			break;
+	}
 	if (ret != 0)
 		goto cleanup_matches;
 
@@ -720,7 +730,9 @@ find_check_entry(struct ipt_entry *e, struct net *net, const char *name,
  err:
 	module_put(t->u.kernel.target->me);
  cleanup_matches:
-	IPT_MATCH_ITERATE(e, cleanup_match, net, &j);
+	xt_ematch_foreach(ematch, e)
+		if (cleanup_match(ematch, net, &j) != 0)
+			break;
 	return ret;
 }
 
@@ -791,9 +803,12 @@ cleanup_entry(struct ipt_entry *e, struct net *net)
 {
 	struct xt_tgdtor_param par;
 	struct ipt_entry_target *t;
+	struct xt_entry_match *ematch;
 
 	/* Cleanup all matches */
-	IPT_MATCH_ITERATE(e, cleanup_match, net, NULL);
+	xt_ematch_foreach(ematch, e)
+		if (cleanup_match(ematch, net, NULL) != 0)
+			break;
 	t = ipt_get_target(e);
 
 	par.net      = net;
@@ -1060,13 +1075,16 @@ static int compat_calc_entry(const struct ipt_entry *e,
 			     const struct xt_table_info *info,
 			     const void *base, struct xt_table_info *newinfo)
 {
+	const struct xt_entry_match *ematch;
 	const struct ipt_entry_target *t;
 	unsigned int entry_offset;
 	int off, i, ret;
 
 	off = sizeof(struct ipt_entry) - sizeof(struct compat_ipt_entry);
 	entry_offset = (void *)e - base;
-	IPT_MATCH_ITERATE(e, compat_calc_match, &off);
+	xt_ematch_foreach(ematch, e)
+		if (compat_calc_match(ematch, &off) != 0)
+			break;
 	t = ipt_get_target_c(e);
 	off += xt_compat_target_offset(t->u.kernel.target);
 	newinfo->size -= off;
@@ -1441,7 +1459,8 @@ compat_copy_entry_to_user(struct ipt_entry *e, void __user **dstptr,
 	struct compat_ipt_entry __user *ce;
 	u_int16_t target_offset, next_offset;
 	compat_uint_t origsize;
-	int ret;
+	const struct xt_entry_match *ematch;
+	int ret = 0;
 
 	origsize = *size;
 	ce = (struct compat_ipt_entry __user *)*dstptr;
@@ -1453,7 +1472,11 @@ compat_copy_entry_to_user(struct ipt_entry *e, void __user **dstptr,
 	*dstptr += sizeof(struct compat_ipt_entry);
 	*size -= sizeof(struct ipt_entry) - sizeof(struct compat_ipt_entry);
 
-	ret = IPT_MATCH_ITERATE(e, xt_compat_match_to_user, dstptr, size);
+	xt_ematch_foreach(ematch, e) {
+		ret = xt_compat_match_to_user(ematch, dstptr, size);
+		if (ret != 0)
+			break;
+	}
 	target_offset = e->target_offset - (origsize - *size);
 	if (ret)
 		return ret;
@@ -1505,9 +1528,12 @@ compat_release_match(struct ipt_entry_match *m, unsigned int *i)
 static void compat_release_entry(struct compat_ipt_entry *e)
 {
 	struct ipt_entry_target *t;
+	struct xt_entry_match *ematch;
 
 	/* Cleanup all matches */
-	COMPAT_IPT_MATCH_ITERATE(e, compat_release_match, NULL);
+	xt_ematch_foreach(ematch, e)
+		if (compat_release_match(ematch, NULL) != 0)
+			break;
 	t = compat_ipt_get_target(e);
 	module_put(t->u.kernel.target->me);
 }
@@ -1522,6 +1548,7 @@ check_compat_entry_size_and_hooks(struct compat_ipt_entry *e,
 				  const unsigned int *underflows,
 				  const char *name)
 {
+	struct xt_entry_match *ematch;
 	struct ipt_entry_target *t;
 	struct xt_target *target;
 	unsigned int entry_offset;
@@ -1550,8 +1577,12 @@ check_compat_entry_size_and_hooks(struct compat_ipt_entry *e,
 	off = sizeof(struct ipt_entry) - sizeof(struct compat_ipt_entry);
 	entry_offset = (void *)e - (void *)base;
 	j = 0;
-	ret = COMPAT_IPT_MATCH_ITERATE(e, compat_find_calc_match, name,
-				       &e->ip, e->comefrom, &off, &j);
+	xt_ematch_foreach(ematch, e) {
+		ret = compat_find_calc_match(ematch, name,
+		      &e->ip, e->comefrom, &off, &j);
+		if (ret != 0)
+			break;
+	}
 	if (ret != 0)
 		goto release_matches;
 
@@ -1590,7 +1621,9 @@ check_compat_entry_size_and_hooks(struct compat_ipt_entry *e,
 out:
 	module_put(t->u.kernel.target->me);
 release_matches:
-	IPT_MATCH_ITERATE(e, compat_release_match, &j);
+	xt_ematch_foreach(ematch, e)
+		if (compat_release_match(ematch, &j) != 0)
+			break;
 	return ret;
 }
 
@@ -1604,6 +1637,7 @@ compat_copy_entry_from_user(struct compat_ipt_entry *e, void **dstptr,
 	struct ipt_entry *de;
 	unsigned int origsize;
 	int ret, h;
+	struct xt_entry_match *ematch;
 
 	ret = 0;
 	origsize = *size;
@@ -1614,8 +1648,11 @@ compat_copy_entry_from_user(struct compat_ipt_entry *e, void **dstptr,
 	*dstptr += sizeof(struct ipt_entry);
 	*size += sizeof(struct ipt_entry) - sizeof(struct compat_ipt_entry);
 
-	ret = COMPAT_IPT_MATCH_ITERATE(e, xt_compat_match_from_user,
-				       dstptr, size);
+	xt_ematch_foreach(ematch, e) {
+		ret = xt_compat_match_from_user(ematch, dstptr, size);
+		if (ret != 0)
+			break;
+	}
 	if (ret)
 		return ret;
 	de->target_offset = e->target_offset - (origsize - *size);
@@ -1636,9 +1673,10 @@ compat_copy_entry_from_user(struct compat_ipt_entry *e, void **dstptr,
 static int
 compat_check_entry(struct ipt_entry *e, struct net *net, const char *name)
 {
+	struct xt_entry_match *ematch;
 	struct xt_mtchk_param mtpar;
 	unsigned int j;
-	int ret;
+	int ret = 0;
 
 	j = 0;
 	mtpar.net	= net;
@@ -1646,7 +1684,11 @@ compat_check_entry(struct ipt_entry *e, struct net *net, const char *name)
 	mtpar.entryinfo = &e->ip;
 	mtpar.hook_mask = e->comefrom;
 	mtpar.family    = NFPROTO_IPV4;
-	ret = IPT_MATCH_ITERATE(e, check_match, &mtpar, &j);
+	xt_ematch_foreach(ematch, e) {
+		ret = check_match(ematch, &mtpar, &j);
+		if (ret != 0)
+			break;
+	}
 	if (ret)
 		goto cleanup_matches;
 
@@ -1656,7 +1698,9 @@ compat_check_entry(struct ipt_entry *e, struct net *net, const char *name)
 	return 0;
 
  cleanup_matches:
-	IPT_MATCH_ITERATE(e, cleanup_match, net, &j);
+	xt_ematch_foreach(ematch, e)
+		if (cleanup_match(ematch, net, &j) != 0)
+			break;
 	return ret;
 }
 
diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c
index b7e27c19c7ab..1537e6bad5d9 100644
--- a/net/ipv6/netfilter/ip6_tables.c
+++ b/net/ipv6/netfilter/ip6_tables.c
@@ -393,16 +393,21 @@ ip6t_do_table(struct sk_buff *skb,
 
 	do {
 		const struct ip6t_entry_target *t;
+		const struct xt_entry_match *ematch;
 
 		IP_NF_ASSERT(e);
 		IP_NF_ASSERT(back);
 		if (!ip6_packet_match(skb, indev, outdev, &e->ipv6,
-		    &mtpar.thoff, &mtpar.fragoff, &hotdrop) ||
-		    IP6T_MATCH_ITERATE(e, do_match, skb, &mtpar) != 0) {
+		    &mtpar.thoff, &mtpar.fragoff, &hotdrop)) {
+ no_match:
 			e = ip6t_next_entry(e);
 			continue;
 		}
 
+		xt_ematch_foreach(ematch, e)
+			if (do_match(ematch, skb, &mtpar) != 0)
+				goto no_match;
+
 		ADD_COUNTER(e->counters,
 			    ntohs(ipv6_hdr(skb)->payload_len) +
 			    sizeof(struct ipv6hdr), 1);
@@ -717,6 +722,7 @@ find_check_entry(struct ip6t_entry *e, struct net *net, const char *name,
 	int ret;
 	unsigned int j;
 	struct xt_mtchk_param mtpar;
+	struct xt_entry_match *ematch;
 
 	ret = check_entry(e, name);
 	if (ret)
@@ -728,7 +734,11 @@ find_check_entry(struct ip6t_entry *e, struct net *net, const char *name,
 	mtpar.entryinfo = &e->ipv6;
 	mtpar.hook_mask = e->comefrom;
 	mtpar.family    = NFPROTO_IPV6;
-	ret = IP6T_MATCH_ITERATE(e, find_check_match, &mtpar, &j);
+	xt_ematch_foreach(ematch, e) {
+		ret = find_check_match(ematch, &mtpar, &j);
+		if (ret != 0)
+			break;
+	}
 	if (ret != 0)
 		goto cleanup_matches;
 
@@ -751,7 +761,9 @@ find_check_entry(struct ip6t_entry *e, struct net *net, const char *name,
  err:
 	module_put(t->u.kernel.target->me);
  cleanup_matches:
-	IP6T_MATCH_ITERATE(e, cleanup_match, net, &j);
+	xt_ematch_foreach(ematch, e)
+		if (cleanup_match(ematch, net, &j) != 0)
+			break;
 	return ret;
 }
 
@@ -821,9 +833,12 @@ static void cleanup_entry(struct ip6t_entry *e, struct net *net)
 {
 	struct xt_tgdtor_param par;
 	struct ip6t_entry_target *t;
+	struct xt_entry_match *ematch;
 
 	/* Cleanup all matches */
-	IP6T_MATCH_ITERATE(e, cleanup_match, net, NULL);
+	xt_ematch_foreach(ematch, e)
+		if (cleanup_match(ematch, net, NULL) != 0)
+			break;
 	t = ip6t_get_target(e);
 
 	par.net      = net;
@@ -1090,13 +1105,16 @@ static int compat_calc_entry(const struct ip6t_entry *e,
 			     const struct xt_table_info *info,
 			     const void *base, struct xt_table_info *newinfo)
 {
+	const struct xt_entry_match *ematch;
 	const struct ip6t_entry_target *t;
 	unsigned int entry_offset;
 	int off, i, ret;
 
 	off = sizeof(struct ip6t_entry) - sizeof(struct compat_ip6t_entry);
 	entry_offset = (void *)e - base;
-	IP6T_MATCH_ITERATE(e, compat_calc_match, &off);
+	xt_ematch_foreach(ematch, e)
+		if (compat_calc_match(ematch, &off) != 0)
+			break;
 	t = ip6t_get_target_c(e);
 	off += xt_compat_target_offset(t->u.kernel.target);
 	newinfo->size -= off;
@@ -1474,7 +1492,8 @@ compat_copy_entry_to_user(struct ip6t_entry *e, void __user **dstptr,
 	struct compat_ip6t_entry __user *ce;
 	u_int16_t target_offset, next_offset;
 	compat_uint_t origsize;
-	int ret;
+	const struct xt_entry_match *ematch;
+	int ret = 0;
 
 	origsize = *size;
 	ce = (struct compat_ip6t_entry __user *)*dstptr;
@@ -1486,7 +1505,11 @@ compat_copy_entry_to_user(struct ip6t_entry *e, void __user **dstptr,
 	*dstptr += sizeof(struct compat_ip6t_entry);
 	*size -= sizeof(struct ip6t_entry) - sizeof(struct compat_ip6t_entry);
 
-	ret = IP6T_MATCH_ITERATE(e, xt_compat_match_to_user, dstptr, size);
+	xt_ematch_foreach(ematch, e) {
+		ret = xt_compat_match_to_user(ematch, dstptr, size);
+		if (ret != 0)
+			break;
+	}
 	target_offset = e->target_offset - (origsize - *size);
 	if (ret)
 		return ret;
@@ -1538,9 +1561,12 @@ compat_release_match(struct ip6t_entry_match *m, unsigned int *i)
 static void compat_release_entry(struct compat_ip6t_entry *e)
 {
 	struct ip6t_entry_target *t;
+	struct xt_entry_match *ematch;
 
 	/* Cleanup all matches */
-	COMPAT_IP6T_MATCH_ITERATE(e, compat_release_match, NULL);
+	xt_ematch_foreach(ematch, e)
+		if (compat_release_match(ematch, NULL) != 0)
+			break;
 	t = compat_ip6t_get_target(e);
 	module_put(t->u.kernel.target->me);
 }
@@ -1555,6 +1581,7 @@ check_compat_entry_size_and_hooks(struct compat_ip6t_entry *e,
 				  const unsigned int *underflows,
 				  const char *name)
 {
+	struct xt_entry_match *ematch;
 	struct ip6t_entry_target *t;
 	struct xt_target *target;
 	unsigned int entry_offset;
@@ -1583,8 +1610,12 @@ check_compat_entry_size_and_hooks(struct compat_ip6t_entry *e,
 	off = sizeof(struct ip6t_entry) - sizeof(struct compat_ip6t_entry);
 	entry_offset = (void *)e - (void *)base;
 	j = 0;
-	ret = COMPAT_IP6T_MATCH_ITERATE(e, compat_find_calc_match, name,
-					&e->ipv6, e->comefrom, &off, &j);
+	xt_ematch_foreach(ematch, e) {
+		ret = compat_find_calc_match(ematch, name,
+		      &e->ipv6, e->comefrom, &off, &j);
+		if (ret != 0)
+			break;
+	}
 	if (ret != 0)
 		goto release_matches;
 
@@ -1623,7 +1654,9 @@ check_compat_entry_size_and_hooks(struct compat_ip6t_entry *e,
 out:
 	module_put(t->u.kernel.target->me);
 release_matches:
-	IP6T_MATCH_ITERATE(e, compat_release_match, &j);
+	xt_ematch_foreach(ematch, e)
+		if (compat_release_match(ematch, &j) != 0)
+			break;
 	return ret;
 }
 
@@ -1637,6 +1670,7 @@ compat_copy_entry_from_user(struct compat_ip6t_entry *e, void **dstptr,
 	struct ip6t_entry *de;
 	unsigned int origsize;
 	int ret, h;
+	struct xt_entry_match *ematch;
 
 	ret = 0;
 	origsize = *size;
@@ -1647,8 +1681,11 @@ compat_copy_entry_from_user(struct compat_ip6t_entry *e, void **dstptr,
 	*dstptr += sizeof(struct ip6t_entry);
 	*size += sizeof(struct ip6t_entry) - sizeof(struct compat_ip6t_entry);
 
-	ret = COMPAT_IP6T_MATCH_ITERATE(e, xt_compat_match_from_user,
-					dstptr, size);
+	xt_ematch_foreach(ematch, e) {
+		ret = xt_compat_match_from_user(ematch, dstptr, size);
+		if (ret != 0)
+			break;
+	}
 	if (ret)
 		return ret;
 	de->target_offset = e->target_offset - (origsize - *size);
@@ -1670,8 +1707,9 @@ static int compat_check_entry(struct ip6t_entry *e, struct net *net,
 			      const char *name)
 {
 	unsigned int j;
-	int ret;
+	int ret = 0;
 	struct xt_mtchk_param mtpar;
+	struct xt_entry_match *ematch;
 
 	j = 0;
 	mtpar.net	= net;
@@ -1679,7 +1717,11 @@ static int compat_check_entry(struct ip6t_entry *e, struct net *net,
 	mtpar.entryinfo = &e->ipv6;
 	mtpar.hook_mask = e->comefrom;
 	mtpar.family    = NFPROTO_IPV6;
-	ret = IP6T_MATCH_ITERATE(e, check_match, &mtpar, &j);
+	xt_ematch_foreach(ematch, e) {
+		ret = check_match(ematch, &mtpar, &j);
+		if (ret != 0)
+			break;
+	}
 	if (ret)
 		goto cleanup_matches;
 
@@ -1689,7 +1731,9 @@ static int compat_check_entry(struct ip6t_entry *e, struct net *net,
 	return 0;
 
  cleanup_matches:
-	IP6T_MATCH_ITERATE(e, cleanup_match, net, &j);
+	xt_ematch_foreach(ematch, e)
+		if (cleanup_match(ematch, net, &j) != 0)
+			break;
 	return ret;
 }
 
diff --git a/net/netfilter/xt_TCPMSS.c b/net/netfilter/xt_TCPMSS.c
index 6f21b4377dbb..0e357ac9a2a8 100644
--- a/net/netfilter/xt_TCPMSS.c
+++ b/net/netfilter/xt_TCPMSS.c
@@ -239,6 +239,7 @@ static bool tcpmss_tg4_check(const struct xt_tgchk_param *par)
 {
 	const struct xt_tcpmss_info *info = par->targinfo;
 	const struct ipt_entry *e = par->entryinfo;
+	const struct xt_entry_match *ematch;
 
 	if (info->mss == XT_TCPMSS_CLAMP_PMTU &&
 	    (par->hook_mask & ~((1 << NF_INET_FORWARD) |
@@ -248,8 +249,9 @@ static bool tcpmss_tg4_check(const struct xt_tgchk_param *par)
 		       "FORWARD, OUTPUT and POSTROUTING hooks\n");
 		return false;
 	}
-	if (IPT_MATCH_ITERATE(e, find_syn_match))
-		return true;
+	xt_ematch_foreach(ematch, e)
+		if (find_syn_match(ematch))
+			return true;
 	printk("xt_TCPMSS: Only works on TCP SYN packets\n");
 	return false;
 }
@@ -259,6 +261,7 @@ static bool tcpmss_tg6_check(const struct xt_tgchk_param *par)
 {
 	const struct xt_tcpmss_info *info = par->targinfo;
 	const struct ip6t_entry *e = par->entryinfo;
+	const struct xt_entry_match *ematch;
 
 	if (info->mss == XT_TCPMSS_CLAMP_PMTU &&
 	    (par->hook_mask & ~((1 << NF_INET_FORWARD) |
@@ -268,8 +271,9 @@ static bool tcpmss_tg6_check(const struct xt_tgchk_param *par)
 		       "FORWARD, OUTPUT and POSTROUTING hooks\n");
 		return false;
 	}
-	if (IP6T_MATCH_ITERATE(e, find_syn_match))
-		return true;
+	xt_ematch_foreach(ematch, e)
+		if (find_syn_match(ematch))
+			return true;
 	printk("xt_TCPMSS: Only works on TCP SYN packets\n");
 	return false;
 }
-- 
cgit v1.2.2


From 6bdb331bc6910d1ccb74dc9852fc858c5916c927 Mon Sep 17 00:00:00 2001
From: Jan Engelhardt <jengelh@medozas.de>
Date: Wed, 24 Feb 2010 18:35:37 +0100
Subject: netfilter: xtables: optimize call flow around xt_ematch_foreach

Signed-off-by: Jan Engelhardt <jengelh@medozas.de>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 net/ipv4/netfilter/ip_tables.c  | 93 ++++++++++++++---------------------------
 net/ipv6/netfilter/ip6_tables.c | 93 ++++++++++++++---------------------------
 2 files changed, 62 insertions(+), 124 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c
index 3a7fc732b918..36edc7d5f284 100644
--- a/net/ipv4/netfilter/ip_tables.c
+++ b/net/ipv4/netfilter/ip_tables.c
@@ -572,14 +572,10 @@ mark_source_chains(const struct xt_table_info *newinfo,
 	return 1;
 }
 
-static int
-cleanup_match(struct ipt_entry_match *m, struct net *net, unsigned int *i)
+static void cleanup_match(struct ipt_entry_match *m, struct net *net)
 {
 	struct xt_mtdtor_param par;
 
-	if (i && (*i)-- == 0)
-		return 1;
-
 	par.net       = net;
 	par.match     = m->u.kernel.match;
 	par.matchinfo = m->data;
@@ -587,7 +583,6 @@ cleanup_match(struct ipt_entry_match *m, struct net *net, unsigned int *i)
 	if (par.match->destroy != NULL)
 		par.match->destroy(&par);
 	module_put(par.match->me);
-	return 0;
 }
 
 static int
@@ -612,8 +607,7 @@ check_entry(const struct ipt_entry *e, const char *name)
 }
 
 static int
-check_match(struct ipt_entry_match *m, struct xt_mtchk_param *par,
-	    unsigned int *i)
+check_match(struct ipt_entry_match *m, struct xt_mtchk_param *par)
 {
 	const struct ipt_ip *ip = par->entryinfo;
 	int ret;
@@ -628,13 +622,11 @@ check_match(struct ipt_entry_match *m, struct xt_mtchk_param *par,
 			 par.match->name);
 		return ret;
 	}
-	++*i;
 	return 0;
 }
 
 static int
-find_check_match(struct ipt_entry_match *m, struct xt_mtchk_param *par,
-		 unsigned int *i)
+find_check_match(struct ipt_entry_match *m, struct xt_mtchk_param *par)
 {
 	struct xt_match *match;
 	int ret;
@@ -648,7 +640,7 @@ find_check_match(struct ipt_entry_match *m, struct xt_mtchk_param *par,
 	}
 	m->u.kernel.match = match;
 
-	ret = check_match(m, par, i);
+	ret = check_match(m, par);
 	if (ret)
 		goto err;
 
@@ -704,12 +696,11 @@ find_check_entry(struct ipt_entry *e, struct net *net, const char *name,
 	mtpar.hook_mask = e->comefrom;
 	mtpar.family    = NFPROTO_IPV4;
 	xt_ematch_foreach(ematch, e) {
-		ret = find_check_match(ematch, &mtpar, &j);
+		ret = find_check_match(ematch, &mtpar);
 		if (ret != 0)
-			break;
+			goto cleanup_matches;
+		++j;
 	}
-	if (ret != 0)
-		goto cleanup_matches;
 
 	t = ipt_get_target(e);
 	target = try_then_request_module(xt_find_target(AF_INET,
@@ -730,9 +721,11 @@ find_check_entry(struct ipt_entry *e, struct net *net, const char *name,
  err:
 	module_put(t->u.kernel.target->me);
  cleanup_matches:
-	xt_ematch_foreach(ematch, e)
-		if (cleanup_match(ematch, net, &j) != 0)
+	xt_ematch_foreach(ematch, e) {
+		if (j-- == 0)
 			break;
+		cleanup_match(ematch, net);
+	}
 	return ret;
 }
 
@@ -807,8 +800,7 @@ cleanup_entry(struct ipt_entry *e, struct net *net)
 
 	/* Cleanup all matches */
 	xt_ematch_foreach(ematch, e)
-		if (cleanup_match(ematch, net, NULL) != 0)
-			break;
+		cleanup_match(ematch, net);
 	t = ipt_get_target(e);
 
 	par.net      = net;
@@ -1064,13 +1056,6 @@ static int compat_standard_to_user(void __user *dst, const void *src)
 	return copy_to_user(dst, &cv, sizeof(cv)) ? -EFAULT : 0;
 }
 
-static inline int
-compat_calc_match(const struct ipt_entry_match *m, int *size)
-{
-	*size += xt_compat_match_offset(m->u.kernel.match);
-	return 0;
-}
-
 static int compat_calc_entry(const struct ipt_entry *e,
 			     const struct xt_table_info *info,
 			     const void *base, struct xt_table_info *newinfo)
@@ -1083,8 +1068,7 @@ static int compat_calc_entry(const struct ipt_entry *e,
 	off = sizeof(struct ipt_entry) - sizeof(struct compat_ipt_entry);
 	entry_offset = (void *)e - base;
 	xt_ematch_foreach(ematch, e)
-		if (compat_calc_match(ematch, &off) != 0)
-			break;
+		off += xt_compat_match_offset(ematch->u.kernel.match);
 	t = ipt_get_target_c(e);
 	off += xt_compat_target_offset(t->u.kernel.target);
 	newinfo->size -= off;
@@ -1475,11 +1459,9 @@ compat_copy_entry_to_user(struct ipt_entry *e, void __user **dstptr,
 	xt_ematch_foreach(ematch, e) {
 		ret = xt_compat_match_to_user(ematch, dstptr, size);
 		if (ret != 0)
-			break;
+			return ret;
 	}
 	target_offset = e->target_offset - (origsize - *size);
-	if (ret)
-		return ret;
 	t = ipt_get_target(e);
 	ret = xt_compat_target_to_user(t, dstptr, size);
 	if (ret)
@@ -1496,7 +1478,7 @@ compat_find_calc_match(struct ipt_entry_match *m,
 		       const char *name,
 		       const struct ipt_ip *ip,
 		       unsigned int hookmask,
-		       int *size, unsigned int *i)
+		       int *size)
 {
 	struct xt_match *match;
 
@@ -1510,18 +1492,6 @@ compat_find_calc_match(struct ipt_entry_match *m,
 	}
 	m->u.kernel.match = match;
 	*size += xt_compat_match_offset(match);
-
-	(*i)++;
-	return 0;
-}
-
-static int
-compat_release_match(struct ipt_entry_match *m, unsigned int *i)
-{
-	if (i && (*i)-- == 0)
-		return 1;
-
-	module_put(m->u.kernel.match->me);
 	return 0;
 }
 
@@ -1532,8 +1502,7 @@ static void compat_release_entry(struct compat_ipt_entry *e)
 
 	/* Cleanup all matches */
 	xt_ematch_foreach(ematch, e)
-		if (compat_release_match(ematch, NULL) != 0)
-			break;
+		module_put(ematch->u.kernel.match->me);
 	t = compat_ipt_get_target(e);
 	module_put(t->u.kernel.target->me);
 }
@@ -1579,12 +1548,11 @@ check_compat_entry_size_and_hooks(struct compat_ipt_entry *e,
 	j = 0;
 	xt_ematch_foreach(ematch, e) {
 		ret = compat_find_calc_match(ematch, name,
-		      &e->ip, e->comefrom, &off, &j);
+		      &e->ip, e->comefrom, &off);
 		if (ret != 0)
-			break;
+			goto release_matches;
+		++j;
 	}
-	if (ret != 0)
-		goto release_matches;
 
 	t = compat_ipt_get_target(e);
 	target = try_then_request_module(xt_find_target(AF_INET,
@@ -1621,9 +1589,11 @@ check_compat_entry_size_and_hooks(struct compat_ipt_entry *e,
 out:
 	module_put(t->u.kernel.target->me);
 release_matches:
-	xt_ematch_foreach(ematch, e)
-		if (compat_release_match(ematch, &j) != 0)
+	xt_ematch_foreach(ematch, e) {
+		if (j-- == 0)
 			break;
+		module_put(ematch->u.kernel.match->me);
+	}
 	return ret;
 }
 
@@ -1651,10 +1621,8 @@ compat_copy_entry_from_user(struct compat_ipt_entry *e, void **dstptr,
 	xt_ematch_foreach(ematch, e) {
 		ret = xt_compat_match_from_user(ematch, dstptr, size);
 		if (ret != 0)
-			break;
+			return ret;
 	}
-	if (ret)
-		return ret;
 	de->target_offset = e->target_offset - (origsize - *size);
 	t = compat_ipt_get_target(e);
 	target = t->u.kernel.target;
@@ -1685,12 +1653,11 @@ compat_check_entry(struct ipt_entry *e, struct net *net, const char *name)
 	mtpar.hook_mask = e->comefrom;
 	mtpar.family    = NFPROTO_IPV4;
 	xt_ematch_foreach(ematch, e) {
-		ret = check_match(ematch, &mtpar, &j);
+		ret = check_match(ematch, &mtpar);
 		if (ret != 0)
-			break;
+			goto cleanup_matches;
+		++j;
 	}
-	if (ret)
-		goto cleanup_matches;
 
 	ret = check_target(e, net, name);
 	if (ret)
@@ -1698,9 +1665,11 @@ compat_check_entry(struct ipt_entry *e, struct net *net, const char *name)
 	return 0;
 
  cleanup_matches:
-	xt_ematch_foreach(ematch, e)
-		if (cleanup_match(ematch, net, &j) != 0)
+	xt_ematch_foreach(ematch, e) {
+		if (j-- == 0)
 			break;
+		cleanup_match(ematch, net);
+	}
 	return ret;
 }
 
diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c
index 1537e6bad5d9..c5a963e4b545 100644
--- a/net/ipv6/netfilter/ip6_tables.c
+++ b/net/ipv6/netfilter/ip6_tables.c
@@ -603,14 +603,10 @@ mark_source_chains(const struct xt_table_info *newinfo,
 	return 1;
 }
 
-static int
-cleanup_match(struct ip6t_entry_match *m, struct net *net, unsigned int *i)
+static void cleanup_match(struct ip6t_entry_match *m, struct net *net)
 {
 	struct xt_mtdtor_param par;
 
-	if (i && (*i)-- == 0)
-		return 1;
-
 	par.net       = net;
 	par.match     = m->u.kernel.match;
 	par.matchinfo = m->data;
@@ -618,7 +614,6 @@ cleanup_match(struct ip6t_entry_match *m, struct net *net, unsigned int *i)
 	if (par.match->destroy != NULL)
 		par.match->destroy(&par);
 	module_put(par.match->me);
-	return 0;
 }
 
 static int
@@ -642,8 +637,7 @@ check_entry(const struct ip6t_entry *e, const char *name)
 	return 0;
 }
 
-static int check_match(struct ip6t_entry_match *m, struct xt_mtchk_param *par,
-		       unsigned int *i)
+static int check_match(struct ip6t_entry_match *m, struct xt_mtchk_param *par)
 {
 	const struct ip6t_ip6 *ipv6 = par->entryinfo;
 	int ret;
@@ -658,13 +652,11 @@ static int check_match(struct ip6t_entry_match *m, struct xt_mtchk_param *par,
 			 par.match->name);
 		return ret;
 	}
-	++*i;
 	return 0;
 }
 
 static int
-find_check_match(struct ip6t_entry_match *m, struct xt_mtchk_param *par,
-		 unsigned int *i)
+find_check_match(struct ip6t_entry_match *m, struct xt_mtchk_param *par)
 {
 	struct xt_match *match;
 	int ret;
@@ -678,7 +670,7 @@ find_check_match(struct ip6t_entry_match *m, struct xt_mtchk_param *par,
 	}
 	m->u.kernel.match = match;
 
-	ret = check_match(m, par, i);
+	ret = check_match(m, par);
 	if (ret)
 		goto err;
 
@@ -735,12 +727,11 @@ find_check_entry(struct ip6t_entry *e, struct net *net, const char *name,
 	mtpar.hook_mask = e->comefrom;
 	mtpar.family    = NFPROTO_IPV6;
 	xt_ematch_foreach(ematch, e) {
-		ret = find_check_match(ematch, &mtpar, &j);
+		ret = find_check_match(ematch, &mtpar);
 		if (ret != 0)
-			break;
+			goto cleanup_matches;
+		++j;
 	}
-	if (ret != 0)
-		goto cleanup_matches;
 
 	t = ip6t_get_target(e);
 	target = try_then_request_module(xt_find_target(AF_INET6,
@@ -761,9 +752,11 @@ find_check_entry(struct ip6t_entry *e, struct net *net, const char *name,
  err:
 	module_put(t->u.kernel.target->me);
  cleanup_matches:
-	xt_ematch_foreach(ematch, e)
-		if (cleanup_match(ematch, net, &j) != 0)
+	xt_ematch_foreach(ematch, e) {
+		if (j-- == 0)
 			break;
+		cleanup_match(ematch, net);
+	}
 	return ret;
 }
 
@@ -837,8 +830,7 @@ static void cleanup_entry(struct ip6t_entry *e, struct net *net)
 
 	/* Cleanup all matches */
 	xt_ematch_foreach(ematch, e)
-		if (cleanup_match(ematch, net, NULL) != 0)
-			break;
+		cleanup_match(ematch, net);
 	t = ip6t_get_target(e);
 
 	par.net      = net;
@@ -1094,13 +1086,6 @@ static int compat_standard_to_user(void __user *dst, const void *src)
 	return copy_to_user(dst, &cv, sizeof(cv)) ? -EFAULT : 0;
 }
 
-static inline int
-compat_calc_match(const struct ip6t_entry_match *m, int *size)
-{
-	*size += xt_compat_match_offset(m->u.kernel.match);
-	return 0;
-}
-
 static int compat_calc_entry(const struct ip6t_entry *e,
 			     const struct xt_table_info *info,
 			     const void *base, struct xt_table_info *newinfo)
@@ -1113,8 +1098,7 @@ static int compat_calc_entry(const struct ip6t_entry *e,
 	off = sizeof(struct ip6t_entry) - sizeof(struct compat_ip6t_entry);
 	entry_offset = (void *)e - base;
 	xt_ematch_foreach(ematch, e)
-		if (compat_calc_match(ematch, &off) != 0)
-			break;
+		off += xt_compat_match_offset(ematch->u.kernel.match);
 	t = ip6t_get_target_c(e);
 	off += xt_compat_target_offset(t->u.kernel.target);
 	newinfo->size -= off;
@@ -1508,11 +1492,9 @@ compat_copy_entry_to_user(struct ip6t_entry *e, void __user **dstptr,
 	xt_ematch_foreach(ematch, e) {
 		ret = xt_compat_match_to_user(ematch, dstptr, size);
 		if (ret != 0)
-			break;
+			return ret;
 	}
 	target_offset = e->target_offset - (origsize - *size);
-	if (ret)
-		return ret;
 	t = ip6t_get_target(e);
 	ret = xt_compat_target_to_user(t, dstptr, size);
 	if (ret)
@@ -1529,7 +1511,7 @@ compat_find_calc_match(struct ip6t_entry_match *m,
 		       const char *name,
 		       const struct ip6t_ip6 *ipv6,
 		       unsigned int hookmask,
-		       int *size, unsigned int *i)
+		       int *size)
 {
 	struct xt_match *match;
 
@@ -1543,18 +1525,6 @@ compat_find_calc_match(struct ip6t_entry_match *m,
 	}
 	m->u.kernel.match = match;
 	*size += xt_compat_match_offset(match);
-
-	(*i)++;
-	return 0;
-}
-
-static int
-compat_release_match(struct ip6t_entry_match *m, unsigned int *i)
-{
-	if (i && (*i)-- == 0)
-		return 1;
-
-	module_put(m->u.kernel.match->me);
 	return 0;
 }
 
@@ -1565,8 +1535,7 @@ static void compat_release_entry(struct compat_ip6t_entry *e)
 
 	/* Cleanup all matches */
 	xt_ematch_foreach(ematch, e)
-		if (compat_release_match(ematch, NULL) != 0)
-			break;
+		module_put(ematch->u.kernel.match->me);
 	t = compat_ip6t_get_target(e);
 	module_put(t->u.kernel.target->me);
 }
@@ -1612,12 +1581,11 @@ check_compat_entry_size_and_hooks(struct compat_ip6t_entry *e,
 	j = 0;
 	xt_ematch_foreach(ematch, e) {
 		ret = compat_find_calc_match(ematch, name,
-		      &e->ipv6, e->comefrom, &off, &j);
+		      &e->ipv6, e->comefrom, &off);
 		if (ret != 0)
-			break;
+			goto release_matches;
+		++j;
 	}
-	if (ret != 0)
-		goto release_matches;
 
 	t = compat_ip6t_get_target(e);
 	target = try_then_request_module(xt_find_target(AF_INET6,
@@ -1654,9 +1622,11 @@ check_compat_entry_size_and_hooks(struct compat_ip6t_entry *e,
 out:
 	module_put(t->u.kernel.target->me);
 release_matches:
-	xt_ematch_foreach(ematch, e)
-		if (compat_release_match(ematch, &j) != 0)
+	xt_ematch_foreach(ematch, e) {
+		if (j-- == 0)
 			break;
+		module_put(ematch->u.kernel.match->me);
+	}
 	return ret;
 }
 
@@ -1684,10 +1654,8 @@ compat_copy_entry_from_user(struct compat_ip6t_entry *e, void **dstptr,
 	xt_ematch_foreach(ematch, e) {
 		ret = xt_compat_match_from_user(ematch, dstptr, size);
 		if (ret != 0)
-			break;
+			return ret;
 	}
-	if (ret)
-		return ret;
 	de->target_offset = e->target_offset - (origsize - *size);
 	t = compat_ip6t_get_target(e);
 	target = t->u.kernel.target;
@@ -1718,12 +1686,11 @@ static int compat_check_entry(struct ip6t_entry *e, struct net *net,
 	mtpar.hook_mask = e->comefrom;
 	mtpar.family    = NFPROTO_IPV6;
 	xt_ematch_foreach(ematch, e) {
-		ret = check_match(ematch, &mtpar, &j);
+		ret = check_match(ematch, &mtpar);
 		if (ret != 0)
-			break;
+			goto cleanup_matches;
+		++j;
 	}
-	if (ret)
-		goto cleanup_matches;
 
 	ret = check_target(e, net, name);
 	if (ret)
@@ -1731,9 +1698,11 @@ static int compat_check_entry(struct ip6t_entry *e, struct net *net,
 	return 0;
 
  cleanup_matches:
-	xt_ematch_foreach(ematch, e)
-		if (cleanup_match(ematch, net, &j) != 0)
+	xt_ematch_foreach(ematch, e) {
+		if (j-- == 0)
 			break;
+		cleanup_match(ematch, net);
+	}
 	return ret;
 }
 
-- 
cgit v1.2.2


From 0f234214d15fa914436d304ecf5c3e43449e79f9 Mon Sep 17 00:00:00 2001
From: Jan Engelhardt <jengelh@medozas.de>
Date: Wed, 24 Feb 2010 18:36:04 +0100
Subject: netfilter: xtables: reduce arguments to translate_table

Just pass in the entire repl struct. In case of a new table (e.g.
ip6t_register_table), the repldata has been previously filled with
table->name and table->size already (in ip6t_alloc_initial_table).

Signed-off-by: Jan Engelhardt <jengelh@medozas.de>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 net/ipv4/netfilter/arp_tables.c | 42 +++++++++++++++--------------------------
 net/ipv4/netfilter/ip_tables.c  | 42 +++++++++++++++--------------------------
 net/ipv6/netfilter/ip6_tables.c | 42 +++++++++++++++--------------------------
 3 files changed, 45 insertions(+), 81 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c
index 5fdedeb46218..57098dcda294 100644
--- a/net/ipv4/netfilter/arp_tables.c
+++ b/net/ipv4/netfilter/arp_tables.c
@@ -622,21 +622,15 @@ static inline void cleanup_entry(struct arpt_entry *e)
 /* Checks and translates the user-supplied table segment (held in
  * newinfo).
  */
-static int translate_table(const char *name,
-			   unsigned int valid_hooks,
-			   struct xt_table_info *newinfo,
-			   void *entry0,
-			   unsigned int size,
-			   unsigned int number,
-			   const unsigned int *hook_entries,
-			   const unsigned int *underflows)
+static int translate_table(struct xt_table_info *newinfo, void *entry0,
+                           const struct arpt_replace *repl)
 {
 	struct arpt_entry *iter;
 	unsigned int i;
 	int ret = 0;
 
-	newinfo->size = size;
-	newinfo->number = number;
+	newinfo->size = repl->size;
+	newinfo->number = repl->num_entries;
 
 	/* Init all hooks to impossible value. */
 	for (i = 0; i < NF_ARP_NUMHOOKS; i++) {
@@ -650,7 +644,8 @@ static int translate_table(const char *name,
 	/* Walk through entries, checking offsets. */
 	xt_entry_foreach(iter, entry0, newinfo->size) {
 		ret = check_entry_size_and_hooks(iter, newinfo, entry0,
-		      entry0 + size, hook_entries, underflows, valid_hooks);
+		      entry0 + repl->size, repl->hook_entry, repl->underflow,
+		      repl->valid_hooks);
 		if (ret != 0)
 			break;
 		++i;
@@ -659,30 +654,30 @@ static int translate_table(const char *name,
 	if (ret != 0)
 		return ret;
 
-	if (i != number) {
+	if (i != repl->num_entries) {
 		duprintf("translate_table: %u not %u entries\n",
-			 i, number);
+			 i, repl->num_entries);
 		return -EINVAL;
 	}
 
 	/* Check hooks all assigned */
 	for (i = 0; i < NF_ARP_NUMHOOKS; i++) {
 		/* Only hooks which are valid */
-		if (!(valid_hooks & (1 << i)))
+		if (!(repl->valid_hooks & (1 << i)))
 			continue;
 		if (newinfo->hook_entry[i] == 0xFFFFFFFF) {
 			duprintf("Invalid hook entry %u %u\n",
-				 i, hook_entries[i]);
+				 i, repl->hook_entry[i]);
 			return -EINVAL;
 		}
 		if (newinfo->underflow[i] == 0xFFFFFFFF) {
 			duprintf("Invalid underflow %u %u\n",
-				 i, underflows[i]);
+				 i, repl->underflow[i]);
 			return -EINVAL;
 		}
 	}
 
-	if (!mark_source_chains(newinfo, valid_hooks, entry0)) {
+	if (!mark_source_chains(newinfo, repl->valid_hooks, entry0)) {
 		duprintf("Looping hook\n");
 		return -ELOOP;
 	}
@@ -690,7 +685,7 @@ static int translate_table(const char *name,
 	/* Finally, each sanity check must pass */
 	i = 0;
 	xt_entry_foreach(iter, entry0, newinfo->size) {
-		ret = find_check_entry(iter, name, size);
+		ret = find_check_entry(iter, repl->name, repl->size);
 		if (ret != 0)
 			break;
 		++i;
@@ -1101,9 +1096,7 @@ static int do_replace(struct net *net, const void __user *user,
 		goto free_newinfo;
 	}
 
-	ret = translate_table(tmp.name, tmp.valid_hooks,
-			      newinfo, loc_cpu_entry, tmp.size, tmp.num_entries,
-			      tmp.hook_entry, tmp.underflow);
+	ret = translate_table(newinfo, loc_cpu_entry, &tmp);
 	if (ret != 0)
 		goto free_newinfo;
 
@@ -1795,12 +1788,7 @@ struct xt_table *arpt_register_table(struct net *net,
 	loc_cpu_entry = newinfo->entries[raw_smp_processor_id()];
 	memcpy(loc_cpu_entry, repl->entries, repl->size);
 
-	ret = translate_table(table->name, table->valid_hooks,
-			      newinfo, loc_cpu_entry, repl->size,
-			      repl->num_entries,
-			      repl->hook_entry,
-			      repl->underflow);
-
+	ret = translate_table(newinfo, loc_cpu_entry, repl);
 	duprintf("arpt_register_table: translate table gives %d\n", ret);
 	if (ret != 0)
 		goto out_free;
diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c
index 36edc7d5f284..c92f4e541cf6 100644
--- a/net/ipv4/netfilter/ip_tables.c
+++ b/net/ipv4/netfilter/ip_tables.c
@@ -815,22 +815,15 @@ cleanup_entry(struct ipt_entry *e, struct net *net)
 /* Checks and translates the user-supplied table segment (held in
    newinfo) */
 static int
-translate_table(struct net *net,
-		const char *name,
-		unsigned int valid_hooks,
-		struct xt_table_info *newinfo,
-		void *entry0,
-		unsigned int size,
-		unsigned int number,
-		const unsigned int *hook_entries,
-		const unsigned int *underflows)
+translate_table(struct net *net, struct xt_table_info *newinfo, void *entry0,
+                const struct ipt_replace *repl)
 {
 	struct ipt_entry *iter;
 	unsigned int i;
 	int ret = 0;
 
-	newinfo->size = size;
-	newinfo->number = number;
+	newinfo->size = repl->size;
+	newinfo->number = repl->num_entries;
 
 	/* Init all hooks to impossible value. */
 	for (i = 0; i < NF_INET_NUMHOOKS; i++) {
@@ -843,42 +836,43 @@ translate_table(struct net *net,
 	/* Walk through entries, checking offsets. */
 	xt_entry_foreach(iter, entry0, newinfo->size) {
 		ret = check_entry_size_and_hooks(iter, newinfo, entry0,
-		      entry0 + size, hook_entries, underflows, valid_hooks);
+		      entry0 + repl->size, repl->hook_entry, repl->underflow,
+		      repl->valid_hooks);
 		if (ret != 0)
 			return ret;
 		++i;
 	}
 
-	if (i != number) {
+	if (i != repl->num_entries) {
 		duprintf("translate_table: %u not %u entries\n",
-			 i, number);
+			 i, repl->num_entries);
 		return -EINVAL;
 	}
 
 	/* Check hooks all assigned */
 	for (i = 0; i < NF_INET_NUMHOOKS; i++) {
 		/* Only hooks which are valid */
-		if (!(valid_hooks & (1 << i)))
+		if (!(repl->valid_hooks & (1 << i)))
 			continue;
 		if (newinfo->hook_entry[i] == 0xFFFFFFFF) {
 			duprintf("Invalid hook entry %u %u\n",
-				 i, hook_entries[i]);
+				 i, repl->hook_entry[i]);
 			return -EINVAL;
 		}
 		if (newinfo->underflow[i] == 0xFFFFFFFF) {
 			duprintf("Invalid underflow %u %u\n",
-				 i, underflows[i]);
+				 i, repl->underflow[i]);
 			return -EINVAL;
 		}
 	}
 
-	if (!mark_source_chains(newinfo, valid_hooks, entry0))
+	if (!mark_source_chains(newinfo, repl->valid_hooks, entry0))
 		return -ELOOP;
 
 	/* Finally, each sanity check must pass */
 	i = 0;
 	xt_entry_foreach(iter, entry0, newinfo->size) {
-		ret = find_check_entry(iter, net, name, size);
+		ret = find_check_entry(iter, net, repl->name, repl->size);
 		if (ret != 0)
 			break;
 		++i;
@@ -1311,9 +1305,7 @@ do_replace(struct net *net, const void __user *user, unsigned int len)
 		goto free_newinfo;
 	}
 
-	ret = translate_table(net, tmp.name, tmp.valid_hooks,
-			      newinfo, loc_cpu_entry, tmp.size, tmp.num_entries,
-			      tmp.hook_entry, tmp.underflow);
+	ret = translate_table(net, newinfo, loc_cpu_entry, &tmp);
 	if (ret != 0)
 		goto free_newinfo;
 
@@ -2112,11 +2104,7 @@ struct xt_table *ipt_register_table(struct net *net,
 	loc_cpu_entry = newinfo->entries[raw_smp_processor_id()];
 	memcpy(loc_cpu_entry, repl->entries, repl->size);
 
-	ret = translate_table(net, table->name, table->valid_hooks,
-			      newinfo, loc_cpu_entry, repl->size,
-			      repl->num_entries,
-			      repl->hook_entry,
-			      repl->underflow);
+	ret = translate_table(net, newinfo, loc_cpu_entry, repl);
 	if (ret != 0)
 		goto out_free;
 
diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c
index c5a963e4b545..f7042869198e 100644
--- a/net/ipv6/netfilter/ip6_tables.c
+++ b/net/ipv6/netfilter/ip6_tables.c
@@ -845,22 +845,15 @@ static void cleanup_entry(struct ip6t_entry *e, struct net *net)
 /* Checks and translates the user-supplied table segment (held in
    newinfo) */
 static int
-translate_table(struct net *net,
-		const char *name,
-		unsigned int valid_hooks,
-		struct xt_table_info *newinfo,
-		void *entry0,
-		unsigned int size,
-		unsigned int number,
-		const unsigned int *hook_entries,
-		const unsigned int *underflows)
+translate_table(struct net *net, struct xt_table_info *newinfo, void *entry0,
+                const struct ip6t_replace *repl)
 {
 	struct ip6t_entry *iter;
 	unsigned int i;
 	int ret = 0;
 
-	newinfo->size = size;
-	newinfo->number = number;
+	newinfo->size = repl->size;
+	newinfo->number = repl->num_entries;
 
 	/* Init all hooks to impossible value. */
 	for (i = 0; i < NF_INET_NUMHOOKS; i++) {
@@ -873,42 +866,43 @@ translate_table(struct net *net,
 	/* Walk through entries, checking offsets. */
 	xt_entry_foreach(iter, entry0, newinfo->size) {
 		ret = check_entry_size_and_hooks(iter, newinfo, entry0,
-		      entry0 + size, hook_entries, underflows, valid_hooks);
+		      entry0 + repl->size, repl->hook_entry, repl->underflow,
+		      repl->valid_hooks);
 		if (ret != 0)
 			return ret;
 		++i;
 	}
 
-	if (i != number) {
+	if (i != repl->num_entries) {
 		duprintf("translate_table: %u not %u entries\n",
-			 i, number);
+			 i, repl->num_entries);
 		return -EINVAL;
 	}
 
 	/* Check hooks all assigned */
 	for (i = 0; i < NF_INET_NUMHOOKS; i++) {
 		/* Only hooks which are valid */
-		if (!(valid_hooks & (1 << i)))
+		if (!(repl->valid_hooks & (1 << i)))
 			continue;
 		if (newinfo->hook_entry[i] == 0xFFFFFFFF) {
 			duprintf("Invalid hook entry %u %u\n",
-				 i, hook_entries[i]);
+				 i, repl->hook_entry[i]);
 			return -EINVAL;
 		}
 		if (newinfo->underflow[i] == 0xFFFFFFFF) {
 			duprintf("Invalid underflow %u %u\n",
-				 i, underflows[i]);
+				 i, repl->underflow[i]);
 			return -EINVAL;
 		}
 	}
 
-	if (!mark_source_chains(newinfo, valid_hooks, entry0))
+	if (!mark_source_chains(newinfo, repl->valid_hooks, entry0))
 		return -ELOOP;
 
 	/* Finally, each sanity check must pass */
 	i = 0;
 	xt_entry_foreach(iter, entry0, newinfo->size) {
-		ret = find_check_entry(iter, net, name, size);
+		ret = find_check_entry(iter, net, repl->name, repl->size);
 		if (ret != 0)
 			break;
 		++i;
@@ -1342,9 +1336,7 @@ do_replace(struct net *net, const void __user *user, unsigned int len)
 		goto free_newinfo;
 	}
 
-	ret = translate_table(net, tmp.name, tmp.valid_hooks,
-			      newinfo, loc_cpu_entry, tmp.size, tmp.num_entries,
-			      tmp.hook_entry, tmp.underflow);
+	ret = translate_table(net, newinfo, loc_cpu_entry, &tmp);
 	if (ret != 0)
 		goto free_newinfo;
 
@@ -2145,11 +2137,7 @@ struct xt_table *ip6t_register_table(struct net *net,
 	loc_cpu_entry = newinfo->entries[raw_smp_processor_id()];
 	memcpy(loc_cpu_entry, repl->entries, repl->size);
 
-	ret = translate_table(net, table->name, table->valid_hooks,
-			      newinfo, loc_cpu_entry, repl->size,
-			      repl->num_entries,
-			      repl->hook_entry,
-			      repl->underflow);
+	ret = translate_table(net, newinfo, loc_cpu_entry, repl);
 	if (ret != 0)
 		goto out_free;
 
-- 
cgit v1.2.2


From a898def29e4119bc01ebe7ca97423181f4c0ea2d Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Date: Mon, 22 Feb 2010 17:04:49 -0800
Subject: net: Add checking to rcu_dereference() primitives

Update rcu_dereference() primitives to use new lockdep-based
checking. The rcu_dereference() in __in6_dev_get() may be
protected either by rcu_read_lock() or RTNL, per Eric Dumazet.
The rcu_dereference() in __sk_free() is protected by the fact
that it is never reached if an update could change it.  Check
for this by using rcu_dereference_check() to verify that the
struct sock's ->sk_wmem_alloc counter is zero.

Acked-by: Eric Dumazet <eric.dumazet@gmail.com>
Acked-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: laijs@cn.fujitsu.com
Cc: dipankar@in.ibm.com
Cc: mathieu.desnoyers@polymtl.ca
Cc: josh@joshtriplett.org
Cc: dvhltc@us.ibm.com
Cc: niv@us.ibm.com
Cc: peterz@infradead.org
Cc: rostedt@goodmis.org
Cc: Valdis.Kletnieks@vt.edu
Cc: dhowells@redhat.com
LKML-Reference: <1266887105-1528-5-git-send-email-paulmck@linux.vnet.ibm.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 net/core/dev.c         |  2 +-
 net/core/filter.c      |  6 +++---
 net/core/rtnetlink.c   |  8 ++++++++
 net/core/sock.c        |  3 ++-
 net/decnet/dn_route.c  | 14 +++++++-------
 net/ipv4/route.c       | 14 +++++++-------
 net/packet/af_packet.c |  2 +-
 7 files changed, 29 insertions(+), 20 deletions(-)

(limited to 'net')

diff --git a/net/core/dev.c b/net/core/dev.c
index ec874218b206..bb1f1da2b8a7 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -2041,7 +2041,7 @@ gso:
 	rcu_read_lock_bh();
 
 	txq = dev_pick_tx(dev, skb);
-	q = rcu_dereference(txq->qdisc);
+	q = rcu_dereference_bh(txq->qdisc);
 
 #ifdef CONFIG_NET_CLS_ACT
 	skb->tc_verd = SET_TC_AT(skb->tc_verd, AT_EGRESS);
diff --git a/net/core/filter.c b/net/core/filter.c
index 08db7b9143a3..3541aa48d21d 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -86,7 +86,7 @@ int sk_filter(struct sock *sk, struct sk_buff *skb)
 		return err;
 
 	rcu_read_lock_bh();
-	filter = rcu_dereference(sk->sk_filter);
+	filter = rcu_dereference_bh(sk->sk_filter);
 	if (filter) {
 		unsigned int pkt_len = sk_run_filter(skb, filter->insns,
 				filter->len);
@@ -521,7 +521,7 @@ int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk)
 	}
 
 	rcu_read_lock_bh();
-	old_fp = rcu_dereference(sk->sk_filter);
+	old_fp = rcu_dereference_bh(sk->sk_filter);
 	rcu_assign_pointer(sk->sk_filter, fp);
 	rcu_read_unlock_bh();
 
@@ -536,7 +536,7 @@ int sk_detach_filter(struct sock *sk)
 	struct sk_filter *filter;
 
 	rcu_read_lock_bh();
-	filter = rcu_dereference(sk->sk_filter);
+	filter = rcu_dereference_bh(sk->sk_filter);
 	if (filter) {
 		rcu_assign_pointer(sk->sk_filter, NULL);
 		sk_filter_delayed_uncharge(sk, filter);
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 794bcb897ff0..4c7d3f635ba7 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -89,6 +89,14 @@ int rtnl_is_locked(void)
 }
 EXPORT_SYMBOL(rtnl_is_locked);
 
+#ifdef CONFIG_PROVE_LOCKING
+int lockdep_rtnl_is_held(void)
+{
+	return lockdep_is_held(&rtnl_mutex);
+}
+EXPORT_SYMBOL(lockdep_rtnl_is_held);
+#endif /* #ifdef CONFIG_PROVE_LOCKING */
+
 static struct rtnl_link *rtnl_msg_handlers[NPROTO];
 
 static inline int rtm_msgindex(int msgtype)
diff --git a/net/core/sock.c b/net/core/sock.c
index e1f6f225f012..305cba401ae6 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -1073,7 +1073,8 @@ static void __sk_free(struct sock *sk)
 	if (sk->sk_destruct)
 		sk->sk_destruct(sk);
 
-	filter = rcu_dereference(sk->sk_filter);
+	filter = rcu_dereference_check(sk->sk_filter,
+				       atomic_read(&sk->sk_wmem_alloc) == 0);
 	if (filter) {
 		sk_filter_uncharge(sk, filter);
 		rcu_assign_pointer(sk->sk_filter, NULL);
diff --git a/net/decnet/dn_route.c b/net/decnet/dn_route.c
index a03284061a31..a7bf03ca0a36 100644
--- a/net/decnet/dn_route.c
+++ b/net/decnet/dn_route.c
@@ -1155,8 +1155,8 @@ static int __dn_route_output_key(struct dst_entry **pprt, const struct flowi *fl
 
 	if (!(flags & MSG_TRYHARD)) {
 		rcu_read_lock_bh();
-		for(rt = rcu_dereference(dn_rt_hash_table[hash].chain); rt;
-			rt = rcu_dereference(rt->u.dst.dn_next)) {
+		for (rt = rcu_dereference_bh(dn_rt_hash_table[hash].chain); rt;
+			rt = rcu_dereference_bh(rt->u.dst.dn_next)) {
 			if ((flp->fld_dst == rt->fl.fld_dst) &&
 			    (flp->fld_src == rt->fl.fld_src) &&
 			    (flp->mark == rt->fl.mark) &&
@@ -1618,9 +1618,9 @@ int dn_cache_dump(struct sk_buff *skb, struct netlink_callback *cb)
 		if (h > s_h)
 			s_idx = 0;
 		rcu_read_lock_bh();
-		for(rt = rcu_dereference(dn_rt_hash_table[h].chain), idx = 0;
+		for(rt = rcu_dereference_bh(dn_rt_hash_table[h].chain), idx = 0;
 			rt;
-			rt = rcu_dereference(rt->u.dst.dn_next), idx++) {
+			rt = rcu_dereference_bh(rt->u.dst.dn_next), idx++) {
 			if (idx < s_idx)
 				continue;
 			skb_dst_set(skb, dst_clone(&rt->u.dst));
@@ -1654,12 +1654,12 @@ static struct dn_route *dn_rt_cache_get_first(struct seq_file *seq)
 
 	for(s->bucket = dn_rt_hash_mask; s->bucket >= 0; --s->bucket) {
 		rcu_read_lock_bh();
-		rt = dn_rt_hash_table[s->bucket].chain;
+		rt = rcu_dereference_bh(dn_rt_hash_table[s->bucket].chain);
 		if (rt)
 			break;
 		rcu_read_unlock_bh();
 	}
-	return rcu_dereference(rt);
+	return rt;
 }
 
 static struct dn_route *dn_rt_cache_get_next(struct seq_file *seq, struct dn_route *rt)
@@ -1674,7 +1674,7 @@ static struct dn_route *dn_rt_cache_get_next(struct seq_file *seq, struct dn_rou
 		rcu_read_lock_bh();
 		rt = dn_rt_hash_table[s->bucket].chain;
 	}
-	return rcu_dereference(rt);
+	return rcu_dereference_bh(rt);
 }
 
 static void *dn_rt_cache_seq_start(struct seq_file *seq, loff_t *pos)
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index d62b05d33384..4f11faa5c824 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -287,12 +287,12 @@ static struct rtable *rt_cache_get_first(struct seq_file *seq)
 		if (!rt_hash_table[st->bucket].chain)
 			continue;
 		rcu_read_lock_bh();
-		r = rcu_dereference(rt_hash_table[st->bucket].chain);
+		r = rcu_dereference_bh(rt_hash_table[st->bucket].chain);
 		while (r) {
 			if (dev_net(r->u.dst.dev) == seq_file_net(seq) &&
 			    r->rt_genid == st->genid)
 				return r;
-			r = rcu_dereference(r->u.dst.rt_next);
+			r = rcu_dereference_bh(r->u.dst.rt_next);
 		}
 		rcu_read_unlock_bh();
 	}
@@ -314,7 +314,7 @@ static struct rtable *__rt_cache_get_next(struct seq_file *seq,
 		rcu_read_lock_bh();
 		r = rt_hash_table[st->bucket].chain;
 	}
-	return rcu_dereference(r);
+	return rcu_dereference_bh(r);
 }
 
 static struct rtable *rt_cache_get_next(struct seq_file *seq,
@@ -2689,8 +2689,8 @@ int __ip_route_output_key(struct net *net, struct rtable **rp,
 	hash = rt_hash(flp->fl4_dst, flp->fl4_src, flp->oif, rt_genid(net));
 
 	rcu_read_lock_bh();
-	for (rth = rcu_dereference(rt_hash_table[hash].chain); rth;
-		rth = rcu_dereference(rth->u.dst.rt_next)) {
+	for (rth = rcu_dereference_bh(rt_hash_table[hash].chain); rth;
+		rth = rcu_dereference_bh(rth->u.dst.rt_next)) {
 		if (rth->fl.fl4_dst == flp->fl4_dst &&
 		    rth->fl.fl4_src == flp->fl4_src &&
 		    rth->fl.iif == 0 &&
@@ -3008,8 +3008,8 @@ int ip_rt_dump(struct sk_buff *skb,  struct netlink_callback *cb)
 		if (!rt_hash_table[h].chain)
 			continue;
 		rcu_read_lock_bh();
-		for (rt = rcu_dereference(rt_hash_table[h].chain), idx = 0; rt;
-		     rt = rcu_dereference(rt->u.dst.rt_next), idx++) {
+		for (rt = rcu_dereference_bh(rt_hash_table[h].chain), idx = 0; rt;
+		     rt = rcu_dereference_bh(rt->u.dst.rt_next), idx++) {
 			if (!net_eq(dev_net(rt->u.dst.dev), net) || idx < s_idx)
 				continue;
 			if (rt_is_expired(rt))
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index f126d18dbdc4..939471ef8d50 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -508,7 +508,7 @@ static inline unsigned int run_filter(struct sk_buff *skb, struct sock *sk,
 	struct sk_filter *filter;
 
 	rcu_read_lock_bh();
-	filter = rcu_dereference(sk->sk_filter);
+	filter = rcu_dereference_bh(sk->sk_filter);
 	if (filter != NULL)
 		res = sk_run_filter(skb, filter->insns, filter->len);
 	rcu_read_unlock_bh();
-- 
cgit v1.2.2


From 45bb00609022ecf1d97e083666c68c74d237b799 Mon Sep 17 00:00:00 2001
From: Ulrich Weber <uweber@astaro.com>
Date: Thu, 25 Feb 2010 23:28:58 +0000
Subject: ipv6: Remove IPV6_ADDR_RESERVED

RFC 4291 section 2.4 states that all uncategorized addresses
should be considered as Global Unicast.

This will remove IPV6_ADDR_RESERVED completely
and return IPV6_ADDR_UNICAST in ipv6_addr_type() instead.

Signed-off-by: Ulrich Weber <uweber@astaro.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/addrconf.c      | 3 +--
 net/ipv6/addrconf_core.c | 2 +-
 net/ipv6/route.c         | 2 +-
 3 files changed, 3 insertions(+), 4 deletions(-)

(limited to 'net')

diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 1b327f15e7e7..88fd8c5877ee 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -992,8 +992,7 @@ struct ipv6_saddr_dst {
 
 static inline int ipv6_saddr_preferred(int type)
 {
-	if (type & (IPV6_ADDR_MAPPED|IPV6_ADDR_COMPATv4|
-		    IPV6_ADDR_LOOPBACK|IPV6_ADDR_RESERVED))
+	if (type & (IPV6_ADDR_MAPPED|IPV6_ADDR_COMPATv4|IPV6_ADDR_LOOPBACK))
 		return 1;
 	return 0;
 }
diff --git a/net/ipv6/addrconf_core.c b/net/ipv6/addrconf_core.c
index 3f82e9542eda..6b03826552e1 100644
--- a/net/ipv6/addrconf_core.c
+++ b/net/ipv6/addrconf_core.c
@@ -72,7 +72,7 @@ int __ipv6_addr_type(const struct in6_addr *addr)
 				IPV6_ADDR_SCOPE_TYPE(IPV6_ADDR_SCOPE_GLOBAL));	/* addr-select 3.3 */
 	}
 
-	return (IPV6_ADDR_RESERVED |
+	return (IPV6_ADDR_UNICAST |
 		IPV6_ADDR_SCOPE_TYPE(IPV6_ADDR_SCOPE_GLOBAL));	/* addr-select 3.4 */
 }
 EXPORT_SYMBOL(__ipv6_addr_type);
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 88c0a5c49ae8..b08879e97f22 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -1873,7 +1873,7 @@ static int ip6_pkt_drop(struct sk_buff *skb, u8 code, int ipstats_mib_noroutes)
 	switch (ipstats_mib_noroutes) {
 	case IPSTATS_MIB_INNOROUTES:
 		type = ipv6_addr_type(&ipv6_hdr(skb)->daddr);
-		if (type == IPV6_ADDR_ANY || type == IPV6_ADDR_RESERVED) {
+		if (type == IPV6_ADDR_ANY) {
 			IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
 				      IPSTATS_MIB_INADDRERRORS);
 			break;
-- 
cgit v1.2.2


From 914c8ad2d18b62ad1420f518c0cab0b0b90ab308 Mon Sep 17 00:00:00 2001
From: Jiri Pirko <jpirko@redhat.com>
Date: Wed, 24 Feb 2010 23:57:04 +0000
Subject: af_packet: do not accept mc address smaller then dev->addr_len in
 packet_mc_add()

There is no point of accepting an address of smaller length than dev->addr_len
here. Therefore change this for stonger check.

Signed-off-by: Jiri Pirko <jpirko@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/packet/af_packet.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index 2f0369367ee0..e2d1def70841 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -1734,7 +1734,7 @@ static int packet_mc_add(struct sock *sk, struct packet_mreq_max *mreq)
 		goto done;
 
 	err = -EINVAL;
-	if (mreq->mr_alen > dev->addr_len)
+	if (mreq->mr_alen != dev->addr_len)
 		goto done;
 
 	err = -ENOBUFS;
-- 
cgit v1.2.2


From 4edb246626be6e031950205c885bdf29fb2ff1eb Mon Sep 17 00:00:00 2001
From: "Williams, Mitch A" <mitch.a.williams@intel.com>
Date: Wed, 24 Feb 2010 21:59:56 +0000
Subject: rtnetlink: clean up SR-IOV config interface

This patch consists of a few minor cleanups to the SR-IOV
configurion code in rtnetlink.
- Remove unneccesary lock
- Remove unneccesary casts
- Return correct error code for no driver support

These changes are based on comments from Patrick McHardy

Signed-off-by: Mitch Williams <mitch.a.williams@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/rtnetlink.c | 13 +++++--------
 1 file changed, 5 insertions(+), 8 deletions(-)

(limited to 'net')

diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 42da96a4eeee..4dd4c3cdc442 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -930,10 +930,9 @@ static int do_setlink(struct net_device *dev, struct ifinfomsg *ifm,
 	if (tb[IFLA_VF_MAC]) {
 		struct ifla_vf_mac *ivm;
 		ivm = nla_data(tb[IFLA_VF_MAC]);
-		write_lock_bh(&dev_base_lock);
+		err = -EOPNOTSUPP;
 		if (ops->ndo_set_vf_mac)
 			err = ops->ndo_set_vf_mac(dev, ivm->vf, ivm->mac);
-		write_unlock_bh(&dev_base_lock);
 		if (err < 0)
 			goto errout;
 		modified = 1;
@@ -942,12 +941,11 @@ static int do_setlink(struct net_device *dev, struct ifinfomsg *ifm,
 	if (tb[IFLA_VF_VLAN]) {
 		struct ifla_vf_vlan *ivv;
 		ivv = nla_data(tb[IFLA_VF_VLAN]);
-		write_lock_bh(&dev_base_lock);
+		err = -EOPNOTSUPP;
 		if (ops->ndo_set_vf_vlan)
 			err = ops->ndo_set_vf_vlan(dev, ivv->vf,
-						   (u16)ivv->vlan,
-						   (u8)ivv->qos);
-		write_unlock_bh(&dev_base_lock);
+						   ivv->vlan,
+						   ivv->qos);
 		if (err < 0)
 			goto errout;
 		modified = 1;
@@ -957,10 +955,9 @@ static int do_setlink(struct net_device *dev, struct ifinfomsg *ifm,
 	if (tb[IFLA_VF_TX_RATE]) {
 		struct ifla_vf_tx_rate *ivt;
 		ivt = nla_data(tb[IFLA_VF_TX_RATE]);
-		write_lock_bh(&dev_base_lock);
+		err = -EOPNOTSUPP;
 		if (ops->ndo_set_vf_tx_rate)
 			err = ops->ndo_set_vf_tx_rate(dev, ivt->vf, ivt->rate);
-		write_unlock_bh(&dev_base_lock);
 		if (err < 0)
 			goto errout;
 		modified = 1;
-- 
cgit v1.2.2


From e5e26d75f490d7d41f25a4b39ed6db1713beb417 Mon Sep 17 00:00:00 2001
From: stephen hemminger <shemminger@vyatta.com>
Date: Wed, 24 Feb 2010 14:01:38 +0000
Subject: netdev: use list_first_entry macro

Use list_first_entry macro; no longer any need to use
'next' directly in list to find first entry.

Signed-off-by: Stephen Hemminger <shemminger@vyatta.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/dev.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'net')

diff --git a/net/core/dev.c b/net/core/dev.c
index 1968980f513a..eb7f1a4fefc6 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -3018,7 +3018,7 @@ static void net_rx_action(struct softirq_action *h)
 		 * entries to the tail of this list, and only ->poll()
 		 * calls can remove this head entry from the list.
 		 */
-		n = list_entry(list->next, struct napi_struct, poll_list);
+		n = list_first_entry(list, struct napi_struct, poll_list);
 
 		have = netpoll_poll_lock(n);
 
@@ -4882,7 +4882,7 @@ static void rollback_registered_many(struct list_head *head)
 	}
 
 	/* Process any work delayed until the end of the batch */
-	dev = list_entry(head->next, struct net_device, unreg_list);
+	dev = list_first_entry(head, struct net_device, unreg_list);
 	call_netdevice_notifiers(NETDEV_UNREGISTER_BATCH, dev);
 
 	synchronize_net();
@@ -5268,7 +5268,7 @@ void netdev_run_todo(void)
 
 	while (!list_empty(&list)) {
 		struct net_device *dev
-			= list_entry(list.next, struct net_device, todo_list);
+			= list_first_entry(&list, struct net_device, todo_list);
 		list_del(&dev->todo_list);
 
 		if (unlikely(dev->reg_state != NETREG_UNREGISTERING)) {
-- 
cgit v1.2.2


From c79c5ffdce14abb4de3878c5aa8c3c6e5093c69b Mon Sep 17 00:00:00 2001
From: Peter Waskiewicz <peter.p.waskiewicz.jr@intel.com>
Date: Fri, 26 Feb 2010 01:54:20 +0000
Subject: ethtool: Add n-tuple string length to drvinfo and return it

The drvinfo struct should include the number of strings that
get_rx_ntuple will return.  It will be variable if an underlying
driver implements its own get_rx_ntuple routine, so userspace
needs to know how much data is coming.

Signed-off-by: Peter P Waskiewicz Jr <peter.p.waskiewicz.jr@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/ethtool.c | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'net')

diff --git a/net/core/ethtool.c b/net/core/ethtool.c
index 31b1eddc1b84..1c94f48e27b5 100644
--- a/net/core/ethtool.c
+++ b/net/core/ethtool.c
@@ -224,6 +224,9 @@ static noinline int ethtool_get_drvinfo(struct net_device *dev, void __user *use
 		rc = ops->get_sset_count(dev, ETH_SS_PRIV_FLAGS);
 		if (rc >= 0)
 			info.n_priv_flags = rc;
+		rc = ops->get_sset_count(dev, ETH_SS_NTUPLE_FILTERS);
+		if (rc >= 0)
+			info.n_ntuples = rc;
 	}
 	if (ops->get_regs_len)
 		info.regdump_len = ops->get_regs_len(dev);
-- 
cgit v1.2.2


From 6e17d45ae310758ab30623a42ad070858c9a48de Mon Sep 17 00:00:00 2001
From: Jiri Pirko <jpirko@redhat.com>
Date: Wed, 24 Feb 2010 22:49:15 +0000
Subject: net: add addr len check to dev_mc_add

Signed-off-by: Jiri Pirko <jpirko@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/dev_mcast.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'net')

diff --git a/net/core/dev_mcast.c b/net/core/dev_mcast.c
index 9e2fa39f22a3..fd91569e2394 100644
--- a/net/core/dev_mcast.c
+++ b/net/core/dev_mcast.c
@@ -96,6 +96,8 @@ int dev_mc_add(struct net_device *dev, void *addr, int alen, int glbl)
 	int err;
 
 	netif_addr_lock_bh(dev);
+	if (alen != dev->addr_len)
+		return -EINVAL;
 	err = __dev_addr_add(&dev->mc_list, &dev->mc_count, addr, alen, glbl);
 	if (!err)
 		__dev_set_rx_mode(dev);
-- 
cgit v1.2.2


From 14f3ad6f4a12495b32b0dd743bc7179f36658208 Mon Sep 17 00:00:00 2001
From: Ulrich Weber <uweber@astaro.com>
Date: Fri, 26 Feb 2010 04:34:49 -0800
Subject: ipv6: Use 1280 as min MTU for ipv6 forwarding

Clients will set their MTU to 1280 if they receive a
ICMPV6_PKT_TOOBIG message with an MTU less than 1280.

To allow encapsulating of packets over a 1280 link
we should always accept packets with a size of 1280
for forwarding even if the path has a lower MTU and
fragment the encapsulated packets afterwards.

In case a forwarded packet is not going to be encapsulated
a ICMPV6_PKT_TOOBIG msg will still be send by ip6_fragment()
with the correct MTU.

Signed-off-by: Ulrich Weber <uweber@astaro.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/ip6_output.c | 12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)

(limited to 'net')

diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index 1a5fe9ad1947..dabf108ad811 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -402,6 +402,7 @@ int ip6_forward(struct sk_buff *skb)
 	struct ipv6hdr *hdr = ipv6_hdr(skb);
 	struct inet6_skb_parm *opt = IP6CB(skb);
 	struct net *net = dev_net(dst->dev);
+	u32 mtu;
 
 	if (net->ipv6.devconf_all->forwarding == 0)
 		goto error;
@@ -508,10 +509,14 @@ int ip6_forward(struct sk_buff *skb)
 		}
 	}
 
-	if (skb->len > dst_mtu(dst)) {
+	mtu = dst_mtu(dst);
+	if (mtu < IPV6_MIN_MTU)
+		mtu = IPV6_MIN_MTU;
+
+	if (skb->len > mtu) {
 		/* Again, force OUTPUT device used as source address */
 		skb->dev = dst->dev;
-		icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, dst_mtu(dst));
+		icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
 		IP6_INC_STATS_BH(net,
 				 ip6_dst_idev(dst), IPSTATS_MIB_INTOOBIGERRORS);
 		IP6_INC_STATS_BH(net,
@@ -621,8 +626,7 @@ static int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *))
 	mtu = ip6_skb_dst_mtu(skb);
 
 	/* We must not fragment if the socket is set to force MTU discovery
-	 * or if the skb it not generated by a local socket.  (This last
-	 * check should be redundant, but it's free.)
+	 * or if the skb it not generated by a local socket.
 	 */
 	if (!skb->local_df) {
 		skb->dev = skb_dst(skb)->dev;
-- 
cgit v1.2.2


From 738b0343e73604750feb107e063c28b3ca36cb84 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Fri, 26 Feb 2010 05:12:02 -0800
Subject: Revert "ethtool: Add n-tuple string length to drvinfo and return it"

This reverts commit c79c5ffdce14abb4de3878c5aa8c3c6e5093c69b.

As Jeff points out we can't break the user visible interface
like this, we need to add this into the reserved[] thing.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/ethtool.c | 3 ---
 1 file changed, 3 deletions(-)

(limited to 'net')

diff --git a/net/core/ethtool.c b/net/core/ethtool.c
index 1c94f48e27b5..31b1eddc1b84 100644
--- a/net/core/ethtool.c
+++ b/net/core/ethtool.c
@@ -224,9 +224,6 @@ static noinline int ethtool_get_drvinfo(struct net_device *dev, void __user *use
 		rc = ops->get_sset_count(dev, ETH_SS_PRIV_FLAGS);
 		if (rc >= 0)
 			info.n_priv_flags = rc;
-		rc = ops->get_sset_count(dev, ETH_SS_NTUPLE_FILTERS);
-		if (rc >= 0)
-			info.n_ntuples = rc;
 	}
 	if (ops->get_regs_len)
 		info.regdump_len = ops->get_regs_len(dev);
-- 
cgit v1.2.2


From 51f0bc78680edccb6574ef56bd32f9e2939c8a5a Mon Sep 17 00:00:00 2001
From: Simon Horman <horms@verge.net.au>
Date: Fri, 26 Feb 2010 17:45:14 +0100
Subject: IPVS: ip_vs_lblcr: use list headA

Use list_head rather than a custom list implementation.

Signed-off-by: Simon Horman <horms@verge.net.au>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 net/netfilter/ipvs/ip_vs_lblcr.c | 44 +++++++++++++++++++---------------------
 1 file changed, 21 insertions(+), 23 deletions(-)

(limited to 'net')

diff --git a/net/netfilter/ipvs/ip_vs_lblcr.c b/net/netfilter/ipvs/ip_vs_lblcr.c
index f7476b95ab46..caa58fa1438a 100644
--- a/net/netfilter/ipvs/ip_vs_lblcr.c
+++ b/net/netfilter/ipvs/ip_vs_lblcr.c
@@ -45,6 +45,7 @@
 #include <linux/kernel.h>
 #include <linux/skbuff.h>
 #include <linux/jiffies.h>
+#include <linux/list.h>
 
 /* for sysctl */
 #include <linux/fs.h>
@@ -85,25 +86,25 @@ static int sysctl_ip_vs_lblcr_expiration = 24*60*60*HZ;
 /*
  *      IPVS destination set structure and operations
  */
-struct ip_vs_dest_list {
-	struct ip_vs_dest_list  *next;          /* list link */
+struct ip_vs_dest_set_elem {
+	struct list_head	list;          /* list link */
 	struct ip_vs_dest       *dest;          /* destination server */
 };
 
 struct ip_vs_dest_set {
 	atomic_t                size;           /* set size */
 	unsigned long           lastmod;        /* last modified time */
-	struct ip_vs_dest_list  *list;          /* destination list */
+	struct list_head	list;           /* destination list */
 	rwlock_t	        lock;           /* lock for this list */
 };
 
 
-static struct ip_vs_dest_list *
+static struct ip_vs_dest_set_elem *
 ip_vs_dest_set_insert(struct ip_vs_dest_set *set, struct ip_vs_dest *dest)
 {
-	struct ip_vs_dest_list *e;
+	struct ip_vs_dest_set_elem *e;
 
-	for (e=set->list; e!=NULL; e=e->next) {
+	list_for_each_entry(e, &set->list, list) {
 		if (e->dest == dest)
 			/* already existed */
 			return NULL;
@@ -118,9 +119,7 @@ ip_vs_dest_set_insert(struct ip_vs_dest_set *set, struct ip_vs_dest *dest)
 	atomic_inc(&dest->refcnt);
 	e->dest = dest;
 
-	/* link it to the list */
-	e->next = set->list;
-	set->list = e;
+	list_add(&e->list, &set->list);
 	atomic_inc(&set->size);
 
 	set->lastmod = jiffies;
@@ -130,34 +129,33 @@ ip_vs_dest_set_insert(struct ip_vs_dest_set *set, struct ip_vs_dest *dest)
 static void
 ip_vs_dest_set_erase(struct ip_vs_dest_set *set, struct ip_vs_dest *dest)
 {
-	struct ip_vs_dest_list *e, **ep;
+	struct ip_vs_dest_set_elem *e;
 
-	for (ep=&set->list, e=*ep; e!=NULL; e=*ep) {
+	list_for_each_entry(e, &set->list, list) {
 		if (e->dest == dest) {
 			/* HIT */
-			*ep = e->next;
 			atomic_dec(&set->size);
 			set->lastmod = jiffies;
 			atomic_dec(&e->dest->refcnt);
+			list_del(&e->list);
 			kfree(e);
 			break;
 		}
-		ep = &e->next;
 	}
 }
 
 static void ip_vs_dest_set_eraseall(struct ip_vs_dest_set *set)
 {
-	struct ip_vs_dest_list *e, **ep;
+	struct ip_vs_dest_set_elem *e, *ep;
 
 	write_lock(&set->lock);
-	for (ep=&set->list, e=*ep; e!=NULL; e=*ep) {
-		*ep = e->next;
+	list_for_each_entry_safe(e, ep, &set->list, list) {
 		/*
 		 * We don't kfree dest because it is refered either
 		 * by its service or by the trash dest list.
 		 */
 		atomic_dec(&e->dest->refcnt);
+		list_del(&e->list);
 		kfree(e);
 	}
 	write_unlock(&set->lock);
@@ -166,7 +164,7 @@ static void ip_vs_dest_set_eraseall(struct ip_vs_dest_set *set)
 /* get weighted least-connection node in the destination set */
 static inline struct ip_vs_dest *ip_vs_dest_set_min(struct ip_vs_dest_set *set)
 {
-	register struct ip_vs_dest_list *e;
+	register struct ip_vs_dest_set_elem *e;
 	struct ip_vs_dest *dest, *least;
 	int loh, doh;
 
@@ -174,7 +172,7 @@ static inline struct ip_vs_dest *ip_vs_dest_set_min(struct ip_vs_dest_set *set)
 		return NULL;
 
 	/* select the first destination server, whose weight > 0 */
-	for (e=set->list; e!=NULL; e=e->next) {
+	list_for_each_entry(e, &set->list, list) {
 		least = e->dest;
 		if (least->flags & IP_VS_DEST_F_OVERLOAD)
 			continue;
@@ -190,7 +188,7 @@ static inline struct ip_vs_dest *ip_vs_dest_set_min(struct ip_vs_dest_set *set)
 
 	/* find the destination with the weighted least load */
   nextstage:
-	for (e=e->next; e!=NULL; e=e->next) {
+	list_for_each_entry(e, &set->list, list) {
 		dest = e->dest;
 		if (dest->flags & IP_VS_DEST_F_OVERLOAD)
 			continue;
@@ -220,7 +218,7 @@ static inline struct ip_vs_dest *ip_vs_dest_set_min(struct ip_vs_dest_set *set)
 /* get weighted most-connection node in the destination set */
 static inline struct ip_vs_dest *ip_vs_dest_set_max(struct ip_vs_dest_set *set)
 {
-	register struct ip_vs_dest_list *e;
+	register struct ip_vs_dest_set_elem *e;
 	struct ip_vs_dest *dest, *most;
 	int moh, doh;
 
@@ -228,7 +226,7 @@ static inline struct ip_vs_dest *ip_vs_dest_set_max(struct ip_vs_dest_set *set)
 		return NULL;
 
 	/* select the first destination server, whose weight > 0 */
-	for (e=set->list; e!=NULL; e=e->next) {
+	list_for_each_entry(e, &set->list, list) {
 		most = e->dest;
 		if (atomic_read(&most->weight) > 0) {
 			moh = atomic_read(&most->activeconns) * 50
@@ -240,7 +238,7 @@ static inline struct ip_vs_dest *ip_vs_dest_set_max(struct ip_vs_dest_set *set)
 
 	/* find the destination with the weighted most load */
   nextstage:
-	for (e=e->next; e!=NULL; e=e->next) {
+	list_for_each_entry(e, &set->list, list) {
 		dest = e->dest;
 		doh = atomic_read(&dest->activeconns) * 50
 			+ atomic_read(&dest->inactconns);
@@ -389,7 +387,7 @@ ip_vs_lblcr_new(struct ip_vs_lblcr_table *tbl, const union nf_inet_addr *daddr,
 
 		/* initilize its dest set */
 		atomic_set(&(en->set.size), 0);
-		en->set.list = NULL;
+		INIT_LIST_HEAD(&en->set.list);
 		rwlock_init(&en->set.lock);
 
 		ip_vs_lblcr_hash(tbl, en);
-- 
cgit v1.2.2


From a49c65037146bfb2fe300b8277b10b4479fea5fc Mon Sep 17 00:00:00 2001
From: Alexey Dobriyan <adobriyan@gmail.com>
Date: Fri, 26 Feb 2010 17:48:40 +0100
Subject: netfilter: nfnetlink_log: fix silly refcount leak

Quick fix for memory/module refcount leak.
Reference count of listener instance never reaches 0.

Start/stop of ulogd2 is enough to trigger this bug!

Now, refcounting there looks very fishy in particular this code:

 	if (!try_module_get(THIS_MODULE)) {
		...

and creation of listener instance with refcount 2,
so it may very well be ripped and redone.  :-)

Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 net/netfilter/nfnetlink_log.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/netfilter/nfnetlink_log.c b/net/netfilter/nfnetlink_log.c
index 285e9029a9ff..d9b8fb8ab340 100644
--- a/net/netfilter/nfnetlink_log.c
+++ b/net/netfilter/nfnetlink_log.c
@@ -768,7 +768,7 @@ nfulnl_recv_config(struct sock *ctnl, struct sk_buff *skb,
 			}
 
 			instance_destroy(inst);
-			goto out;
+			goto out_put;
 		default:
 			ret = -ENOTSUPP;
 			break;
-- 
cgit v1.2.2


From 6b4ff2d7675511a31980fa5379808660e1261f90 Mon Sep 17 00:00:00 2001
From: Jan Engelhardt <jengelh@medozas.de>
Date: Fri, 26 Feb 2010 17:53:31 +0100
Subject: netfilter: xtables: restore indentation

Signed-off-by: Jan Engelhardt <jengelh@medozas.de>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 net/ipv4/netfilter/arp_tables.c | 23 ++++++++++++++---------
 net/ipv4/netfilter/ip_tables.c  | 25 +++++++++++++++----------
 net/ipv6/netfilter/ip6_tables.c | 25 +++++++++++++++----------
 3 files changed, 44 insertions(+), 29 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c
index 57098dcda294..f07d77f65751 100644
--- a/net/ipv4/netfilter/arp_tables.c
+++ b/net/ipv4/netfilter/arp_tables.c
@@ -644,8 +644,10 @@ static int translate_table(struct xt_table_info *newinfo, void *entry0,
 	/* Walk through entries, checking offsets. */
 	xt_entry_foreach(iter, entry0, newinfo->size) {
 		ret = check_entry_size_and_hooks(iter, newinfo, entry0,
-		      entry0 + repl->size, repl->hook_entry, repl->underflow,
-		      repl->valid_hooks);
+						 entry0 + repl->size,
+						 repl->hook_entry,
+						 repl->underflow,
+						 repl->valid_hooks);
 		if (ret != 0)
 			break;
 		++i;
@@ -730,7 +732,7 @@ static void get_counters(const struct xt_table_info *t,
 	i = 0;
 	xt_entry_foreach(iter, t->entries[curcpu], t->size) {
 		SET_COUNTER(counters[i], iter->counters.bcnt,
-			iter->counters.pcnt);
+			    iter->counters.pcnt);
 		++i;
 	}
 
@@ -741,7 +743,7 @@ static void get_counters(const struct xt_table_info *t,
 		xt_info_wrlock(cpu);
 		xt_entry_foreach(iter, t->entries[cpu], t->size) {
 			ADD_COUNTER(counters[i], iter->counters.bcnt,
-				iter->counters.pcnt);
+				    iter->counters.pcnt);
 			++i;
 		}
 		xt_info_wrunlock(cpu);
@@ -1356,8 +1358,11 @@ static int translate_compat_table(const char *name,
 	/* Walk through entries, checking offsets. */
 	xt_entry_foreach(iter0, entry0, total_size) {
 		ret = check_compat_entry_size_and_hooks(iter0, info, &size,
-		      entry0, entry0 + total_size, hook_entries, underflows,
-		      name);
+							entry0,
+							entry0 + total_size,
+							hook_entries,
+							underflows,
+							name);
 		if (ret != 0)
 			goto out_unlock;
 		++j;
@@ -1401,8 +1406,8 @@ static int translate_compat_table(const char *name,
 	pos = entry1;
 	size = total_size;
 	xt_entry_foreach(iter0, entry0, total_size) {
-		ret = compat_copy_entry_from_user(iter0, &pos,
-		      &size, name, newinfo, entry1);
+		ret = compat_copy_entry_from_user(iter0, &pos, &size,
+						  name, newinfo, entry1);
 		if (ret != 0)
 			break;
 	}
@@ -1617,7 +1622,7 @@ static int compat_copy_entries_to_user(unsigned int total_size,
 	size = total_size;
 	xt_entry_foreach(iter, loc_cpu_entry, total_size) {
 		ret = compat_copy_entry_to_user(iter, &pos,
-		      &size, counters, i++);
+						&size, counters, i++);
 		if (ret != 0)
 			break;
 	}
diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c
index c92f4e541cf6..b29c66df8d1f 100644
--- a/net/ipv4/netfilter/ip_tables.c
+++ b/net/ipv4/netfilter/ip_tables.c
@@ -836,8 +836,10 @@ translate_table(struct net *net, struct xt_table_info *newinfo, void *entry0,
 	/* Walk through entries, checking offsets. */
 	xt_entry_foreach(iter, entry0, newinfo->size) {
 		ret = check_entry_size_and_hooks(iter, newinfo, entry0,
-		      entry0 + repl->size, repl->hook_entry, repl->underflow,
-		      repl->valid_hooks);
+						 entry0 + repl->size,
+						 repl->hook_entry,
+						 repl->underflow,
+						 repl->valid_hooks);
 		if (ret != 0)
 			return ret;
 		++i;
@@ -918,7 +920,7 @@ get_counters(const struct xt_table_info *t,
 	i = 0;
 	xt_entry_foreach(iter, t->entries[curcpu], t->size) {
 		SET_COUNTER(counters[i], iter->counters.bcnt,
-			iter->counters.pcnt);
+			    iter->counters.pcnt);
 		++i;
 	}
 
@@ -929,7 +931,7 @@ get_counters(const struct xt_table_info *t,
 		xt_info_wrlock(cpu);
 		xt_entry_foreach(iter, t->entries[cpu], t->size) {
 			ADD_COUNTER(counters[i], iter->counters.bcnt,
-				iter->counters.pcnt);
+				    iter->counters.pcnt);
 			++i; /* macro does multi eval of i */
 		}
 		xt_info_wrunlock(cpu);
@@ -1540,7 +1542,7 @@ check_compat_entry_size_and_hooks(struct compat_ipt_entry *e,
 	j = 0;
 	xt_ematch_foreach(ematch, e) {
 		ret = compat_find_calc_match(ematch, name,
-		      &e->ip, e->comefrom, &off);
+					     &e->ip, e->comefrom, &off);
 		if (ret != 0)
 			goto release_matches;
 		++j;
@@ -1701,8 +1703,11 @@ translate_compat_table(struct net *net,
 	/* Walk through entries, checking offsets. */
 	xt_entry_foreach(iter0, entry0, total_size) {
 		ret = check_compat_entry_size_and_hooks(iter0, info, &size,
-		      entry0, entry0 + total_size, hook_entries, underflows,
-		      name);
+							entry0,
+							entry0 + total_size,
+							hook_entries,
+							underflows,
+							name);
 		if (ret != 0)
 			goto out_unlock;
 		++j;
@@ -1746,8 +1751,8 @@ translate_compat_table(struct net *net,
 	pos = entry1;
 	size = total_size;
 	xt_entry_foreach(iter0, entry0, total_size) {
-		ret = compat_copy_entry_from_user(iter0, &pos,
-		      &size, name, newinfo, entry1);
+		ret = compat_copy_entry_from_user(iter0, &pos, &size,
+						  name, newinfo, entry1);
 		if (ret != 0)
 			break;
 	}
@@ -1927,7 +1932,7 @@ compat_copy_entries_to_user(unsigned int total_size, struct xt_table *table,
 	size = total_size;
 	xt_entry_foreach(iter, loc_cpu_entry, total_size) {
 		ret = compat_copy_entry_to_user(iter, &pos,
-		      &size, counters, i++);
+						&size, counters, i++);
 		if (ret != 0)
 			break;
 	}
diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c
index f7042869198e..9210e312edf1 100644
--- a/net/ipv6/netfilter/ip6_tables.c
+++ b/net/ipv6/netfilter/ip6_tables.c
@@ -866,8 +866,10 @@ translate_table(struct net *net, struct xt_table_info *newinfo, void *entry0,
 	/* Walk through entries, checking offsets. */
 	xt_entry_foreach(iter, entry0, newinfo->size) {
 		ret = check_entry_size_and_hooks(iter, newinfo, entry0,
-		      entry0 + repl->size, repl->hook_entry, repl->underflow,
-		      repl->valid_hooks);
+						 entry0 + repl->size,
+						 repl->hook_entry,
+						 repl->underflow,
+						 repl->valid_hooks);
 		if (ret != 0)
 			return ret;
 		++i;
@@ -948,7 +950,7 @@ get_counters(const struct xt_table_info *t,
 	i = 0;
 	xt_entry_foreach(iter, t->entries[curcpu], t->size) {
 		SET_COUNTER(counters[i], iter->counters.bcnt,
-			iter->counters.pcnt);
+			    iter->counters.pcnt);
 		++i;
 	}
 
@@ -959,7 +961,7 @@ get_counters(const struct xt_table_info *t,
 		xt_info_wrlock(cpu);
 		xt_entry_foreach(iter, t->entries[cpu], t->size) {
 			ADD_COUNTER(counters[i], iter->counters.bcnt,
-				iter->counters.pcnt);
+				    iter->counters.pcnt);
 			++i;
 		}
 		xt_info_wrunlock(cpu);
@@ -1573,7 +1575,7 @@ check_compat_entry_size_and_hooks(struct compat_ip6t_entry *e,
 	j = 0;
 	xt_ematch_foreach(ematch, e) {
 		ret = compat_find_calc_match(ematch, name,
-		      &e->ipv6, e->comefrom, &off);
+					     &e->ipv6, e->comefrom, &off);
 		if (ret != 0)
 			goto release_matches;
 		++j;
@@ -1734,8 +1736,11 @@ translate_compat_table(struct net *net,
 	/* Walk through entries, checking offsets. */
 	xt_entry_foreach(iter0, entry0, total_size) {
 		ret = check_compat_entry_size_and_hooks(iter0, info, &size,
-		      entry0, entry0 + total_size, hook_entries, underflows,
-		      name);
+							entry0,
+							entry0 + total_size,
+							hook_entries,
+							underflows,
+							name);
 		if (ret != 0)
 			goto out_unlock;
 		++j;
@@ -1779,8 +1784,8 @@ translate_compat_table(struct net *net,
 	pos = entry1;
 	size = total_size;
 	xt_entry_foreach(iter0, entry0, total_size) {
-		ret = compat_copy_entry_from_user(iter0, &pos,
-		      &size, name, newinfo, entry1);
+		ret = compat_copy_entry_from_user(iter0, &pos, &size,
+						  name, newinfo, entry1);
 		if (ret != 0)
 			break;
 	}
@@ -1960,7 +1965,7 @@ compat_copy_entries_to_user(unsigned int total_size, struct xt_table *table,
 	size = total_size;
 	xt_entry_foreach(iter, loc_cpu_entry, total_size) {
 		ret = compat_copy_entry_to_user(iter, &pos,
-		      &size, counters, i++);
+						&size, counters, i++);
 		if (ret != 0)
 			break;
 	}
-- 
cgit v1.2.2


From b446918b77c717a34eaa853dfab55f579d330551 Mon Sep 17 00:00:00 2001
From: Helmut Schaa <helmut.schaa@googlemail.com>
Date: Wed, 24 Feb 2010 14:19:37 +0100
Subject: mac80211: use listen interval 5 as default

Currently if a driver does not set hw.max_listen_interval a listen
interval of 1 is negotiated with the AP. Thus, the AP could drop
buffered frames for us after just one beacon interval which can
easily happen with the current powersave and scan implementation.
To avoid this issue increase the default interval to 5 which should
be a reasonable safe default.

Signed-off-by: Helmut Schaa <helmut.schaa@googlemail.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/main.c | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/mac80211/main.c b/net/mac80211/main.c
index ec8f767ba95b..06c33b68d8e5 100644
--- a/net/mac80211/main.c
+++ b/net/mac80211/main.c
@@ -558,8 +558,12 @@ int ieee80211_register_hw(struct ieee80211_hw *hw)
 
 	debugfs_hw_add(local);
 
+	/*
+	 * if the driver doesn't specify a max listen interval we
+	 * use 5 which should be a safe default
+	 */
 	if (local->hw.max_listen_interval == 0)
-		local->hw.max_listen_interval = 1;
+		local->hw.max_listen_interval = 5;
 
 	local->hw.conf.listen_interval = local->hw.max_listen_interval;
 
-- 
cgit v1.2.2


From 0e0a228398cc967c922759be36c69d32e4f62701 Mon Sep 17 00:00:00 2001
From: Juuso Oikarinen <juuso.oikarinen@nokia.com>
Date: Fri, 26 Feb 2010 08:13:41 +0200
Subject: mac80211: fix direct probe loop on ieee80211_work_purge

If authentication has already been performed when the WLAN interface is
stopped, (sometimes) the ieee80211_work_purge would corrupt some
ieee80211_work-structures. The outcome is this (cleaned up):

[ 2252.398681] WARNING: at net/mac80211/work.c:995 ieee80211_work_purge
[ 2252.466430] Backtrace:
[ 2252.529266] (ieee80211_work_purge+0x0/0xcc [mac80211])
[ 2252.546875] (ieee80211_stop+0x0/0x4c0 [mac80211])

Additionally, one would get this, going on regarless of the WLAN interface
state, going on forever:

[ 2252.859985] wlan0: direct probe to 00:90:4c:60:04:00 (try -996717525)
[ 2253.055419] wlan0: direct probe to 00:90:4c:60:04:00 (try -996717524)
[ 2253.250610] wlan0: direct probe to 00:90:4c:60:04:00 (try -996717523)
[ 2253.446014] wlan0: direct probe to 00:90:4c:60:04:00 (try -996717522)
[ 2253.641357] wlan0: direct probe to 00:90:4c:60:04:00 (try -996717521)

Signed-off-by: Juuso Oikarinen <juuso.oikarinen@nokia.com>
Reviewed-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/work.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'net')

diff --git a/net/mac80211/work.c b/net/mac80211/work.c
index 7e708d5c88b4..1e1ea3007b06 100644
--- a/net/mac80211/work.c
+++ b/net/mac80211/work.c
@@ -869,6 +869,7 @@ static void ieee80211_work_work(struct work_struct *work)
 			break;
 		case IEEE80211_WORK_ABORT:
 			rma = WORK_ACT_TIMEOUT;
+			break;
 		case IEEE80211_WORK_DIRECT_PROBE:
 			rma = ieee80211_direct_probe(wk);
 			break;
-- 
cgit v1.2.2


From ab1b18f70a007ea6caeb007d269abb75b131a410 Mon Sep 17 00:00:00 2001
From: Neil Brown <neilb@suse.de>
Date: Sat, 27 Feb 2010 09:33:40 +1100
Subject: sunrpc: remove unnecessary svc_xprt_put

The 'struct svc_deferred_req's on the xpt_deferred queue do not
own a reference to the owning xprt.  This is seen in svc_revisit
which is where things are added to this queue.  dr->xprt is set to
NULL and the reference to the xprt it put.

So when this list is cleaned up in svc_delete_xprt, we mustn't
put the reference.

Also, replace the 'for' with a 'while' which is arguably
simpler and more likely to compile efficiently.

Cc: Tom Tucker <tom@opengridcomputing.com>
Signed-off-by: NeilBrown <neilb@suse.de>
Cc: stable@kernel.org
Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
---
 net/sunrpc/svc_xprt.c | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

(limited to 'net')

diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c
index d7ec5caf998c..09838300dac4 100644
--- a/net/sunrpc/svc_xprt.c
+++ b/net/sunrpc/svc_xprt.c
@@ -896,11 +896,8 @@ void svc_delete_xprt(struct svc_xprt *xprt)
 	if (test_bit(XPT_TEMP, &xprt->xpt_flags))
 		serv->sv_tmpcnt--;
 
-	for (dr = svc_deferred_dequeue(xprt); dr;
-	     dr = svc_deferred_dequeue(xprt)) {
-		svc_xprt_put(xprt);
+	while ((dr = svc_deferred_dequeue(xprt)) != NULL)
 		kfree(dr);
-	}
 
 	svc_xprt_put(xprt);
 	spin_unlock_bh(&serv->sv_lock);
-- 
cgit v1.2.2


From 10de05afe01c12cedc42eb9ce05b111eed6c8210 Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Fri, 26 Feb 2010 06:34:50 +0000
Subject: rtnetlink: ignore NETDEV_PRE_UP notifier in rtnetlink_event()

Commit 3b8bcfd (net: introduce pre-up netdev notifier) added a new
notifier which is run before a device is set UP for use by cfg80211.

The patch missed to add the new notifier to the ignore list in
rtnetlink_event(), so we currently get an unnecessary netlink
notification before a device is set UP.

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/rtnetlink.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'net')

diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 4dd4c3cdc442..b7c7dfd86507 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -1432,6 +1432,7 @@ static int rtnetlink_event(struct notifier_block *this, unsigned long event, voi
 	case NETDEV_DOWN:
 		rtmsg_ifinfo(RTM_NEWLINK, dev, IFF_UP|IFF_RUNNING);
 		break;
+	case NETDEV_PRE_UP:
 	case NETDEV_POST_INIT:
 	case NETDEV_REGISTER:
 	case NETDEV_CHANGE:
-- 
cgit v1.2.2


From a2835763e130c343ace5320c20d33c281e7097b7 Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Fri, 26 Feb 2010 06:34:51 +0000
Subject: rtnetlink: handle rtnl_link netlink notifications manually

In order to support specifying device flags during device creation,
we must be able to roll back device registration in case setting the
flags fails without sending any notifications related to the device
to userspace.

This patch changes rollback_registered_many() and register_netdevice()
to manually send netlink notifications for devices not handled by
rtnl_link and allows to defer notifications for devices handled by
rtnl_link until setup is complete.

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/dev.c       | 8 +++++++-
 net/core/rtnetlink.c | 4 +---
 2 files changed, 8 insertions(+), 4 deletions(-)

(limited to 'net')

diff --git a/net/core/dev.c b/net/core/dev.c
index eb7f1a4fefc6..75332b089529 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -4865,6 +4865,10 @@ static void rollback_registered_many(struct list_head *head)
 		*/
 		call_netdevice_notifiers(NETDEV_UNREGISTER, dev);
 
+		if (!dev->rtnl_link_ops ||
+		    dev->rtnl_link_state == RTNL_LINK_INITIALIZED)
+			rtmsg_ifinfo(RTM_DELLINK, dev, ~0U);
+
 		/*
 		 *	Flush the unicast and multicast chains
 		 */
@@ -5091,7 +5095,9 @@ int register_netdevice(struct net_device *dev)
 	 *	Prevent userspace races by waiting until the network
 	 *	device is fully setup before sending notifications.
 	 */
-	rtmsg_ifinfo(RTM_NEWLINK, dev, ~0U);
+	if (!dev->rtnl_link_ops ||
+	    dev->rtnl_link_state == RTNL_LINK_INITIALIZED)
+		rtmsg_ifinfo(RTM_NEWLINK, dev, ~0U);
 
 out:
 	return ret;
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index b7c7dfd86507..020e43bfef5f 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -1425,9 +1425,6 @@ static int rtnetlink_event(struct notifier_block *this, unsigned long event, voi
 	struct net_device *dev = ptr;
 
 	switch (event) {
-	case NETDEV_UNREGISTER:
-		rtmsg_ifinfo(RTM_DELLINK, dev, ~0U);
-		break;
 	case NETDEV_UP:
 	case NETDEV_DOWN:
 		rtmsg_ifinfo(RTM_NEWLINK, dev, IFF_UP|IFF_RUNNING);
@@ -1437,6 +1434,7 @@ static int rtnetlink_event(struct notifier_block *this, unsigned long event, voi
 	case NETDEV_REGISTER:
 	case NETDEV_CHANGE:
 	case NETDEV_GOING_DOWN:
+	case NETDEV_UNREGISTER:
 	case NETDEV_UNREGISTER_BATCH:
 		break;
 	default:
-- 
cgit v1.2.2


From bd38081160bb3d036db98472e537b6a7dd4da51a Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Fri, 26 Feb 2010 06:34:53 +0000
Subject: dev: support deferring device flag change notifications

Split dev_change_flags() into two functions: __dev_change_flags() to
perform the actual changes and __dev_notify_flags() to invoke netdevice
notifiers. This will be used by rtnl_link to defer netlink notifications
until the device has been fully configured.

This changes ordering of some operations, in particular:

- netlink notifications are sent after all changes have been performed.
  As a side effect this surpresses one unnecessary netlink message when
  the IFF_UP and other flags are changed simultaneously.

- The NETDEV_UP/NETDEV_DOWN and NETDEV_CHANGE notifiers are invoked
  after all changes have been performed. Their relative is unchanged.

- net_dmaengine_put() is invoked before the NETDEV_DOWN notifier instead
  of afterwards. This should not make any difference since both RX and TX
  are already shut down at this point.

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/dev.c       | 163 ++++++++++++++++++++++++++++++++-------------------
 net/core/rtnetlink.c |   2 -
 2 files changed, 104 insertions(+), 61 deletions(-)

(limited to 'net')

diff --git a/net/core/dev.c b/net/core/dev.c
index 75332b089529..e5972f7f7e1b 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -1113,32 +1113,13 @@ void dev_load(struct net *net, const char *name)
 }
 EXPORT_SYMBOL(dev_load);
 
-/**
- *	dev_open	- prepare an interface for use.
- *	@dev:	device to open
- *
- *	Takes a device from down to up state. The device's private open
- *	function is invoked and then the multicast lists are loaded. Finally
- *	the device is moved into the up state and a %NETDEV_UP message is
- *	sent to the netdev notifier chain.
- *
- *	Calling this function on an active interface is a nop. On a failure
- *	a negative errno code is returned.
- */
-int dev_open(struct net_device *dev)
+static int __dev_open(struct net_device *dev)
 {
 	const struct net_device_ops *ops = dev->netdev_ops;
 	int ret;
 
 	ASSERT_RTNL();
 
-	/*
-	 *	Is it already up?
-	 */
-
-	if (dev->flags & IFF_UP)
-		return 0;
-
 	/*
 	 *	Is it even present?
 	 */
@@ -1187,36 +1168,57 @@ int dev_open(struct net_device *dev)
 		 *	Wakeup transmit queue engine
 		 */
 		dev_activate(dev);
-
-		/*
-		 *	... and announce new interface.
-		 */
-		call_netdevice_notifiers(NETDEV_UP, dev);
 	}
 
 	return ret;
 }
-EXPORT_SYMBOL(dev_open);
 
 /**
- *	dev_close - shutdown an interface.
- *	@dev: device to shutdown
+ *	dev_open	- prepare an interface for use.
+ *	@dev:	device to open
  *
- *	This function moves an active device into down state. A
- *	%NETDEV_GOING_DOWN is sent to the netdev notifier chain. The device
- *	is then deactivated and finally a %NETDEV_DOWN is sent to the notifier
- *	chain.
+ *	Takes a device from down to up state. The device's private open
+ *	function is invoked and then the multicast lists are loaded. Finally
+ *	the device is moved into the up state and a %NETDEV_UP message is
+ *	sent to the netdev notifier chain.
+ *
+ *	Calling this function on an active interface is a nop. On a failure
+ *	a negative errno code is returned.
  */
-int dev_close(struct net_device *dev)
+int dev_open(struct net_device *dev)
+{
+	int ret;
+
+	/*
+	 *	Is it already up?
+	 */
+	if (dev->flags & IFF_UP)
+		return 0;
+
+	/*
+	 *	Open device
+	 */
+	ret = __dev_open(dev);
+	if (ret < 0)
+		return ret;
+
+	/*
+	 *	... and announce new interface.
+	 */
+	rtmsg_ifinfo(RTM_NEWLINK, dev, IFF_UP|IFF_RUNNING);
+	call_netdevice_notifiers(NETDEV_UP, dev);
+
+	return ret;
+}
+EXPORT_SYMBOL(dev_open);
+
+static int __dev_close(struct net_device *dev)
 {
 	const struct net_device_ops *ops = dev->netdev_ops;
-	ASSERT_RTNL();
 
+	ASSERT_RTNL();
 	might_sleep();
 
-	if (!(dev->flags & IFF_UP))
-		return 0;
-
 	/*
 	 *	Tell people we are going down, so that they can
 	 *	prepare to death, when device is still operating.
@@ -1252,14 +1254,34 @@ int dev_close(struct net_device *dev)
 	dev->flags &= ~IFF_UP;
 
 	/*
-	 * Tell people we are down
+	 *	Shutdown NET_DMA
 	 */
-	call_netdevice_notifiers(NETDEV_DOWN, dev);
+	net_dmaengine_put();
+
+	return 0;
+}
+
+/**
+ *	dev_close - shutdown an interface.
+ *	@dev: device to shutdown
+ *
+ *	This function moves an active device into down state. A
+ *	%NETDEV_GOING_DOWN is sent to the netdev notifier chain. The device
+ *	is then deactivated and finally a %NETDEV_DOWN is sent to the notifier
+ *	chain.
+ */
+int dev_close(struct net_device *dev)
+{
+	if (!(dev->flags & IFF_UP))
+		return 0;
+
+	__dev_close(dev);
 
 	/*
-	 *	Shutdown NET_DMA
+	 * Tell people we are down
 	 */
-	net_dmaengine_put();
+	rtmsg_ifinfo(RTM_NEWLINK, dev, IFF_UP|IFF_RUNNING);
+	call_netdevice_notifiers(NETDEV_DOWN, dev);
 
 	return 0;
 }
@@ -4299,18 +4321,10 @@ unsigned dev_get_flags(const struct net_device *dev)
 }
 EXPORT_SYMBOL(dev_get_flags);
 
-/**
- *	dev_change_flags - change device settings
- *	@dev: device
- *	@flags: device state flags
- *
- *	Change settings on device based state flags. The flags are
- *	in the userspace exported format.
- */
-int dev_change_flags(struct net_device *dev, unsigned flags)
+int __dev_change_flags(struct net_device *dev, unsigned int flags)
 {
-	int ret, changes;
 	int old_flags = dev->flags;
+	int ret;
 
 	ASSERT_RTNL();
 
@@ -4341,17 +4355,12 @@ int dev_change_flags(struct net_device *dev, unsigned flags)
 
 	ret = 0;
 	if ((old_flags ^ flags) & IFF_UP) {	/* Bit is different  ? */
-		ret = ((old_flags & IFF_UP) ? dev_close : dev_open)(dev);
+		ret = ((old_flags & IFF_UP) ? __dev_close : __dev_open)(dev);
 
 		if (!ret)
 			dev_set_rx_mode(dev);
 	}
 
-	if (dev->flags & IFF_UP &&
-	    ((old_flags ^ dev->flags) & ~(IFF_UP | IFF_PROMISC | IFF_ALLMULTI |
-					  IFF_VOLATILE)))
-		call_netdevice_notifiers(NETDEV_CHANGE, dev);
-
 	if ((flags ^ dev->gflags) & IFF_PROMISC) {
 		int inc = (flags & IFF_PROMISC) ? 1 : -1;
 
@@ -4370,11 +4379,47 @@ int dev_change_flags(struct net_device *dev, unsigned flags)
 		dev_set_allmulti(dev, inc);
 	}
 
-	/* Exclude state transition flags, already notified */
-	changes = (old_flags ^ dev->flags) & ~(IFF_UP | IFF_RUNNING);
+	return ret;
+}
+
+void __dev_notify_flags(struct net_device *dev, unsigned int old_flags)
+{
+	unsigned int changes = dev->flags ^ old_flags;
+
+	if (changes & IFF_UP) {
+		if (dev->flags & IFF_UP)
+			call_netdevice_notifiers(NETDEV_UP, dev);
+		else
+			call_netdevice_notifiers(NETDEV_DOWN, dev);
+	}
+
+	if (dev->flags & IFF_UP &&
+	    (changes & ~(IFF_UP | IFF_PROMISC | IFF_ALLMULTI | IFF_VOLATILE)))
+		call_netdevice_notifiers(NETDEV_CHANGE, dev);
+}
+
+/**
+ *	dev_change_flags - change device settings
+ *	@dev: device
+ *	@flags: device state flags
+ *
+ *	Change settings on device based state flags. The flags are
+ *	in the userspace exported format.
+ */
+int dev_change_flags(struct net_device *dev, unsigned flags)
+{
+	int ret, changes;
+	int old_flags = dev->flags;
+
+	ret = __dev_change_flags(dev, flags);
+	if (ret < 0)
+		return ret;
+
+	changes = old_flags ^ dev->flags;
 	if (changes)
 		rtmsg_ifinfo(RTM_NEWLINK, dev, changes);
 
+	__dev_notify_flags(dev, old_flags);
 	return ret;
 }
 EXPORT_SYMBOL(dev_change_flags);
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 020e43bfef5f..c21ec4236dd0 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -1427,8 +1427,6 @@ static int rtnetlink_event(struct notifier_block *this, unsigned long event, voi
 	switch (event) {
 	case NETDEV_UP:
 	case NETDEV_DOWN:
-		rtmsg_ifinfo(RTM_NEWLINK, dev, IFF_UP|IFF_RUNNING);
-		break;
 	case NETDEV_PRE_UP:
 	case NETDEV_POST_INIT:
 	case NETDEV_REGISTER:
-- 
cgit v1.2.2


From 3729d5021257b283f7fce33d957893162ccb2c9d Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Fri, 26 Feb 2010 06:34:54 +0000
Subject: rtnetlink: support specifying device flags on device creation

commit e8469ed959c373c2ff9e6f488aa5a14971aebe1f
Author: Patrick McHardy <kaber@trash.net>
Date:   Tue Feb 23 20:41:30 2010 +0100

Support specifying the initial device flags when creating a device though
rtnl_link. Devices allocated by rtnl_create_link() are marked as INITIALIZING
in order to surpress netlink registration notifications. To complete setup,
rtnl_configure_link() must be called, which performs the device flag changes
and invokes the deferred notifiers if everything went well.

Two examples:

# add macvlan to eth0
#
$ ip link add link eth0 up allmulticast on type macvlan

[LINK]11: macvlan0@eth0: <BROADCAST,MULTICAST,ALLMULTI,UP,LOWER_UP> mtu 1500 qdisc noqueue state UNKNOWN
    link/ether 26:f8:84:02:f9:2a brd ff:ff:ff:ff:ff:ff
[ROUTE]ff00::/8 dev macvlan0  table local  metric 256  mtu 1500 advmss 1440 hoplimit 0
[ROUTE]fe80::/64 dev macvlan0  proto kernel  metric 256  mtu 1500 advmss 1440 hoplimit 0
[LINK]11: macvlan0@eth0: <BROADCAST,MULTICAST,ALLMULTI,UP,LOWER_UP> mtu 1500
    link/ether 26:f8:84:02:f9:2a
[ADDR]11: macvlan0    inet6 fe80::24f8:84ff:fe02:f92a/64 scope link
       valid_lft forever preferred_lft forever
[ROUTE]local fe80::24f8:84ff:fe02:f92a via :: dev lo  table local  proto none  metric 0  mtu 16436 advmss 16376 hoplimit 0
[ROUTE]default via fe80::215:e9ff:fef0:10f8 dev macvlan0  proto kernel  metric 1024  mtu 1500 advmss 1440 hoplimit 0
[NEIGH]fe80::215:e9ff:fef0:10f8 dev macvlan0 lladdr 00:15:e9:f0:10:f8 router STALE
[ROUTE]2001:6f8:974::/64 dev macvlan0  proto kernel  metric 256  expires 0sec mtu 1500 advmss 1440 hoplimit 0
[PREFIX]prefix 2001:6f8:974::/64 dev macvlan0 onlink autoconf valid 14400 preferred 131084
[ADDR]11: macvlan0    inet6 2001:6f8:974:0:24f8:84ff:fe02:f92a/64 scope global dynamic
       valid_lft 86399sec preferred_lft 14399sec

# add VLAN to eth1, eth1 is down
#
$ ip link add link eth1 up type vlan id 1000
RTNETLINK answers: Network is down

<no events>

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/rtnetlink.c | 52 +++++++++++++++++++++++++++++++++++++++++++---------
 1 file changed, 43 insertions(+), 9 deletions(-)

(limited to 'net')

diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index c21ec4236dd0..d1472a423323 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -549,6 +549,19 @@ static void set_operstate(struct net_device *dev, unsigned char transition)
 	}
 }
 
+static unsigned int rtnl_dev_combine_flags(const struct net_device *dev,
+					   const struct ifinfomsg *ifm)
+{
+	unsigned int flags = ifm->ifi_flags;
+
+	/* bugwards compatibility: ifi_change == 0 is treated as ~0 */
+	if (ifm->ifi_change)
+		flags = (flags & ifm->ifi_change) |
+			(dev->flags & ~ifm->ifi_change);
+
+	return flags;
+}
+
 static void copy_rtnl_link_stats(struct rtnl_link_stats *a,
 				 const struct net_device_stats *b)
 {
@@ -904,13 +917,7 @@ static int do_setlink(struct net_device *dev, struct ifinfomsg *ifm,
 	}
 
 	if (ifm->ifi_flags || ifm->ifi_change) {
-		unsigned int flags = ifm->ifi_flags;
-
-		/* bugwards compatibility: ifi_change == 0 is treated as ~0 */
-		if (ifm->ifi_change)
-			flags = (flags & ifm->ifi_change) |
-				(dev->flags & ~ifm->ifi_change);
-		err = dev_change_flags(dev, flags);
+		err = dev_change_flags(dev, rtnl_dev_combine_flags(dev, ifm));
 		if (err < 0)
 			goto errout;
 	}
@@ -1053,6 +1060,26 @@ static int rtnl_dellink(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
 	return 0;
 }
 
+int rtnl_configure_link(struct net_device *dev, const struct ifinfomsg *ifm)
+{
+	unsigned int old_flags;
+	int err;
+
+	old_flags = dev->flags;
+	if (ifm && (ifm->ifi_flags || ifm->ifi_change)) {
+		err = __dev_change_flags(dev, rtnl_dev_combine_flags(dev, ifm));
+		if (err < 0)
+			return err;
+	}
+
+	dev->rtnl_link_state = RTNL_LINK_INITIALIZED;
+	rtmsg_ifinfo(RTM_NEWLINK, dev, ~0U);
+
+	__dev_notify_flags(dev, old_flags);
+	return 0;
+}
+EXPORT_SYMBOL(rtnl_configure_link);
+
 struct net_device *rtnl_create_link(struct net *src_net, struct net *net,
 	char *ifname, const struct rtnl_link_ops *ops, struct nlattr *tb[])
 {
@@ -1074,6 +1101,7 @@ struct net_device *rtnl_create_link(struct net *src_net, struct net *net,
 
 	dev_net_set(dev, net);
 	dev->rtnl_link_ops = ops;
+	dev->rtnl_link_state = RTNL_LINK_INITIALIZING;
 	dev->real_num_tx_queues = real_num_queues;
 
 	if (strchr(dev->name, '%')) {
@@ -1203,7 +1231,7 @@ replay:
 		if (!(nlh->nlmsg_flags & NLM_F_CREATE))
 			return -ENODEV;
 
-		if (ifm->ifi_index || ifm->ifi_flags || ifm->ifi_change)
+		if (ifm->ifi_index)
 			return -EOPNOTSUPP;
 		if (tb[IFLA_MAP] || tb[IFLA_MASTER] || tb[IFLA_PROTINFO])
 			return -EOPNOTSUPP;
@@ -1234,9 +1262,15 @@ replay:
 			err = ops->newlink(net, dev, tb, data);
 		else
 			err = register_netdevice(dev);
-		if (err < 0 && !IS_ERR(dev))
+		if (err < 0 && !IS_ERR(dev)) {
 			free_netdev(dev);
+			goto out;
+		}
 
+		err = rtnl_configure_link(dev, ifm);
+		if (err < 0)
+			unregister_netdevice(dev);
+out:
 		put_net(dest_net);
 		return err;
 	}
-- 
cgit v1.2.2


From c13854cef4751000b968d4e8ac95796562d5b96f Mon Sep 17 00:00:00 2001
From: Marcel Holtmann <marcel@holtmann.org>
Date: Mon, 8 Feb 2010 15:27:07 +0100
Subject: Bluetooth: Convert controller hdev->type to hdev->bus

The hdev->type is misnamed and should be actually hdev->bus instead. So
convert it now.

Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 net/bluetooth/hci_core.c  |  8 ++++----
 net/bluetooth/hci_sysfs.c | 16 ++++++++--------
 2 files changed, 12 insertions(+), 12 deletions(-)

(limited to 'net')

diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c
index 94ba34982021..4b62ed01ddc6 100644
--- a/net/bluetooth/hci_core.c
+++ b/net/bluetooth/hci_core.c
@@ -797,7 +797,7 @@ int hci_get_dev_info(void __user *arg)
 
 	strcpy(di.name, hdev->name);
 	di.bdaddr   = hdev->bdaddr;
-	di.type     = hdev->type;
+	di.type     = hdev->bus;
 	di.flags    = hdev->flags;
 	di.pkt_type = hdev->pkt_type;
 	di.acl_mtu  = hdev->acl_mtu;
@@ -869,8 +869,8 @@ int hci_register_dev(struct hci_dev *hdev)
 	struct list_head *head = &hci_dev_list, *p;
 	int i, id = 0;
 
-	BT_DBG("%p name %s type %d owner %p", hdev, hdev->name,
-						hdev->type, hdev->owner);
+	BT_DBG("%p name %s bus %d owner %p", hdev, hdev->name,
+						hdev->bus, hdev->owner);
 
 	if (!hdev->open || !hdev->close || !hdev->destruct)
 		return -EINVAL;
@@ -946,7 +946,7 @@ int hci_unregister_dev(struct hci_dev *hdev)
 {
 	int i;
 
-	BT_DBG("%p name %s type %d", hdev, hdev->name, hdev->type);
+	BT_DBG("%p name %s bus %d", hdev, hdev->name, hdev->bus);
 
 	write_lock_bh(&hci_dev_list_lock);
 	list_del(&hdev->list);
diff --git a/net/bluetooth/hci_sysfs.c b/net/bluetooth/hci_sysfs.c
index 2bc6f6a8de68..9b5f376813b7 100644
--- a/net/bluetooth/hci_sysfs.c
+++ b/net/bluetooth/hci_sysfs.c
@@ -166,9 +166,9 @@ void hci_conn_del_sysfs(struct hci_conn *conn)
 	queue_work(bt_workq, &conn->work_del);
 }
 
-static inline char *host_typetostr(int type)
+static inline char *host_bustostr(int bus)
 {
-	switch (type) {
+	switch (bus) {
 	case HCI_VIRTUAL:
 		return "VIRTUAL";
 	case HCI_USB:
@@ -188,10 +188,10 @@ static inline char *host_typetostr(int type)
 	}
 }
 
-static ssize_t show_type(struct device *dev, struct device_attribute *attr, char *buf)
+static ssize_t show_bus(struct device *dev, struct device_attribute *attr, char *buf)
 {
 	struct hci_dev *hdev = dev_get_drvdata(dev);
-	return sprintf(buf, "%s\n", host_typetostr(hdev->type));
+	return sprintf(buf, "%s\n", host_bustostr(hdev->bus));
 }
 
 static ssize_t show_name(struct device *dev, struct device_attribute *attr, char *buf)
@@ -355,7 +355,7 @@ static ssize_t store_sniff_min_interval(struct device *dev, struct device_attrib
 	return count;
 }
 
-static DEVICE_ATTR(type, S_IRUGO, show_type, NULL);
+static DEVICE_ATTR(bus, S_IRUGO, show_bus, NULL);
 static DEVICE_ATTR(name, S_IRUGO, show_name, NULL);
 static DEVICE_ATTR(class, S_IRUGO, show_class, NULL);
 static DEVICE_ATTR(address, S_IRUGO, show_address, NULL);
@@ -373,7 +373,7 @@ static DEVICE_ATTR(sniff_min_interval, S_IRUGO | S_IWUSR,
 				show_sniff_min_interval, store_sniff_min_interval);
 
 static struct attribute *bt_host_attrs[] = {
-	&dev_attr_type.attr,
+	&dev_attr_bus.attr,
 	&dev_attr_name.attr,
 	&dev_attr_class.attr,
 	&dev_attr_address.attr,
@@ -414,7 +414,7 @@ int hci_register_sysfs(struct hci_dev *hdev)
 	struct device *dev = &hdev->dev;
 	int err;
 
-	BT_DBG("%p name %s type %d", hdev, hdev->name, hdev->type);
+	BT_DBG("%p name %s bus %d", hdev, hdev->name, hdev->bus);
 
 	dev->type = &bt_host;
 	dev->class = bt_class;
@@ -433,7 +433,7 @@ int hci_register_sysfs(struct hci_dev *hdev)
 
 void hci_unregister_sysfs(struct hci_dev *hdev)
 {
-	BT_DBG("%p name %s type %d", hdev, hdev->name, hdev->type);
+	BT_DBG("%p name %s bus %d", hdev, hdev->name, hdev->bus);
 
 	device_del(&hdev->dev);
 }
-- 
cgit v1.2.2


From ca325f698996c1a0770a67f41e7dc97a007d8bc2 Mon Sep 17 00:00:00 2001
From: Marcel Holtmann <marcel@holtmann.org>
Date: Mon, 8 Feb 2010 16:22:31 +0100
Subject: Bluetooth: Convert inquiry cache to use debugfs instead of sysfs

The output of the inquiry cache is only useful for debugging purposes
and so move it into debugfs.

Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 net/bluetooth/hci_sysfs.c | 92 +++++++++++++++++++++++++++++++----------------
 1 file changed, 62 insertions(+), 30 deletions(-)

(limited to 'net')

diff --git a/net/bluetooth/hci_sysfs.c b/net/bluetooth/hci_sysfs.c
index 9b5f376813b7..f9d93f9cbcd2 100644
--- a/net/bluetooth/hci_sysfs.c
+++ b/net/bluetooth/hci_sysfs.c
@@ -2,6 +2,7 @@
 
 #include <linux/kernel.h>
 #include <linux/init.h>
+#include <linux/debugfs.h>
 
 #include <net/bluetooth/bluetooth.h>
 #include <net/bluetooth/hci_core.h>
@@ -9,6 +10,9 @@
 struct class *bt_class = NULL;
 EXPORT_SYMBOL_GPL(bt_class);
 
+struct dentry *bt_debugfs = NULL;
+EXPORT_SYMBOL_GPL(bt_debugfs);
+
 static struct workqueue_struct *bt_workq;
 
 static inline char *link_typetostr(int type)
@@ -251,32 +255,6 @@ static ssize_t show_hci_revision(struct device *dev, struct device_attribute *at
 	return sprintf(buf, "%d\n", hdev->hci_rev);
 }
 
-static ssize_t show_inquiry_cache(struct device *dev, struct device_attribute *attr, char *buf)
-{
-	struct hci_dev *hdev = dev_get_drvdata(dev);
-	struct inquiry_cache *cache = &hdev->inq_cache;
-	struct inquiry_entry *e;
-	int n = 0;
-
-	hci_dev_lock_bh(hdev);
-
-	for (e = cache->list; e; e = e->next) {
-		struct inquiry_data *data = &e->data;
-		bdaddr_t bdaddr;
-		baswap(&bdaddr, &data->bdaddr);
-		n += sprintf(buf + n, "%s %d %d %d 0x%.2x%.2x%.2x 0x%.4x %d %d %u\n",
-				batostr(&bdaddr),
-				data->pscan_rep_mode, data->pscan_period_mode,
-				data->pscan_mode, data->dev_class[2],
-				data->dev_class[1], data->dev_class[0],
-				__le16_to_cpu(data->clock_offset),
-				data->rssi, data->ssp_mode, e->timestamp);
-	}
-
-	hci_dev_unlock_bh(hdev);
-	return n;
-}
-
 static ssize_t show_idle_timeout(struct device *dev, struct device_attribute *attr, char *buf)
 {
 	struct hci_dev *hdev = dev_get_drvdata(dev);
@@ -363,7 +341,6 @@ static DEVICE_ATTR(features, S_IRUGO, show_features, NULL);
 static DEVICE_ATTR(manufacturer, S_IRUGO, show_manufacturer, NULL);
 static DEVICE_ATTR(hci_version, S_IRUGO, show_hci_version, NULL);
 static DEVICE_ATTR(hci_revision, S_IRUGO, show_hci_revision, NULL);
-static DEVICE_ATTR(inquiry_cache, S_IRUGO, show_inquiry_cache, NULL);
 
 static DEVICE_ATTR(idle_timeout, S_IRUGO | S_IWUSR,
 				show_idle_timeout, store_idle_timeout);
@@ -381,7 +358,6 @@ static struct attribute *bt_host_attrs[] = {
 	&dev_attr_manufacturer.attr,
 	&dev_attr_hci_version.attr,
 	&dev_attr_hci_revision.attr,
-	&dev_attr_inquiry_cache.attr,
 	&dev_attr_idle_timeout.attr,
 	&dev_attr_sniff_max_interval.attr,
 	&dev_attr_sniff_min_interval.attr,
@@ -409,6 +385,46 @@ static struct device_type bt_host = {
 	.release = bt_host_release,
 };
 
+static int inquiry_cache_open(struct inode *inode, struct file *file)
+{
+	file->private_data = inode->i_private;
+	return 0;
+}
+
+static ssize_t inquiry_cache_read(struct file *file, char __user *userbuf,
+						size_t count, loff_t *ppos)
+{
+	struct hci_dev *hdev = file->private_data;
+	struct inquiry_cache *cache = &hdev->inq_cache;
+	struct inquiry_entry *e;
+	char buf[4096];
+	int n = 0;
+
+	hci_dev_lock_bh(hdev);
+
+	for (e = cache->list; e; e = e->next) {
+		struct inquiry_data *data = &e->data;
+		bdaddr_t bdaddr;
+		baswap(&bdaddr, &data->bdaddr);
+		n += sprintf(buf + n, "%s %d %d %d 0x%.2x%.2x%.2x 0x%.4x %d %d %u\n",
+				batostr(&bdaddr),
+				data->pscan_rep_mode, data->pscan_period_mode,
+				data->pscan_mode, data->dev_class[2],
+				data->dev_class[1], data->dev_class[0],
+				__le16_to_cpu(data->clock_offset),
+				data->rssi, data->ssp_mode, e->timestamp);
+	}
+
+	hci_dev_unlock_bh(hdev);
+
+	return simple_read_from_buffer(userbuf, count, ppos, buf, n);
+}
+
+static const struct file_operations inquiry_cache_fops = {
+	.open = inquiry_cache_open,
+	.read = inquiry_cache_read,
+};
+
 int hci_register_sysfs(struct hci_dev *hdev)
 {
 	struct device *dev = &hdev->dev;
@@ -428,6 +444,16 @@ int hci_register_sysfs(struct hci_dev *hdev)
 	if (err < 0)
 		return err;
 
+	if (!bt_debugfs)
+		return 0;
+
+	hdev->debugfs = debugfs_create_dir(hdev->name, bt_debugfs);
+	if (!hdev->debugfs)
+		return 0;
+
+	debugfs_create_file("inquiry_cache", 0444, hdev->debugfs,
+						hdev, &inquiry_cache_fops);
+
 	return 0;
 }
 
@@ -435,6 +461,8 @@ void hci_unregister_sysfs(struct hci_dev *hdev)
 {
 	BT_DBG("%p name %s bus %d", hdev, hdev->name, hdev->bus);
 
+	debugfs_remove_recursive(hdev->debugfs);
+
 	device_del(&hdev->dev);
 }
 
@@ -444,6 +472,8 @@ int __init bt_sysfs_init(void)
 	if (!bt_workq)
 		return -ENOMEM;
 
+	bt_debugfs = debugfs_create_dir("bluetooth", NULL);
+
 	bt_class = class_create(THIS_MODULE, "bluetooth");
 	if (IS_ERR(bt_class)) {
 		destroy_workqueue(bt_workq);
@@ -455,7 +485,9 @@ int __init bt_sysfs_init(void)
 
 void bt_sysfs_cleanup(void)
 {
-	destroy_workqueue(bt_workq);
-
 	class_destroy(bt_class);
+
+	debugfs_remove_recursive(bt_debugfs);
+
+	destroy_workqueue(bt_workq);
 }
-- 
cgit v1.2.2


From 943da25d95c7e8fd8c39dbf09e030f5da46f5d85 Mon Sep 17 00:00:00 2001
From: Marcel Holtmann <marcel@holtmann.org>
Date: Sat, 13 Feb 2010 02:28:41 +0100
Subject: Bluetooth: Add controller types for BR/EDR and 802.11 AMP

With the Bluetooth 3.0 specification and the introduction of alternate
MAC/PHY (AMP) support, it is required to differentiate between primary
BR/EDR controllers and 802.11 AMP controllers. So introduce a special
type inside HCI device for differentiation.

For now all AMP controllers will be treated as raw devices until an
AMP manager has been implemented.

Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 net/bluetooth/hci_core.c  |  6 +++++-
 net/bluetooth/hci_sysfs.c | 20 ++++++++++++++++++++
 2 files changed, 25 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c
index 4b62ed01ddc6..4ad23192c7a5 100644
--- a/net/bluetooth/hci_core.c
+++ b/net/bluetooth/hci_core.c
@@ -491,6 +491,10 @@ int hci_dev_open(__u16 dev)
 	if (test_bit(HCI_QUIRK_RAW_DEVICE, &hdev->quirks))
 		set_bit(HCI_RAW, &hdev->flags);
 
+	/* Treat all non BR/EDR controllers as raw devices for now */
+	if (hdev->dev_type != HCI_BREDR)
+		set_bit(HCI_RAW, &hdev->flags);
+
 	if (hdev->open(hdev)) {
 		ret = -EIO;
 		goto done;
@@ -797,7 +801,7 @@ int hci_get_dev_info(void __user *arg)
 
 	strcpy(di.name, hdev->name);
 	di.bdaddr   = hdev->bdaddr;
-	di.type     = hdev->bus;
+	di.type     = (hdev->bus & 0x0f) | (hdev->dev_type << 4);
 	di.flags    = hdev->flags;
 	di.pkt_type = hdev->pkt_type;
 	di.acl_mtu  = hdev->acl_mtu;
diff --git a/net/bluetooth/hci_sysfs.c b/net/bluetooth/hci_sysfs.c
index f9d93f9cbcd2..1a79a6c7e30e 100644
--- a/net/bluetooth/hci_sysfs.c
+++ b/net/bluetooth/hci_sysfs.c
@@ -192,12 +192,30 @@ static inline char *host_bustostr(int bus)
 	}
 }
 
+static inline char *host_typetostr(int type)
+{
+	switch (type) {
+	case HCI_BREDR:
+		return "BR/EDR";
+	case HCI_80211:
+		return "802.11";
+	default:
+		return "UNKNOWN";
+	}
+}
+
 static ssize_t show_bus(struct device *dev, struct device_attribute *attr, char *buf)
 {
 	struct hci_dev *hdev = dev_get_drvdata(dev);
 	return sprintf(buf, "%s\n", host_bustostr(hdev->bus));
 }
 
+static ssize_t show_type(struct device *dev, struct device_attribute *attr, char *buf)
+{
+	struct hci_dev *hdev = dev_get_drvdata(dev);
+	return sprintf(buf, "%s\n", host_typetostr(hdev->dev_type));
+}
+
 static ssize_t show_name(struct device *dev, struct device_attribute *attr, char *buf)
 {
 	struct hci_dev *hdev = dev_get_drvdata(dev);
@@ -334,6 +352,7 @@ static ssize_t store_sniff_min_interval(struct device *dev, struct device_attrib
 }
 
 static DEVICE_ATTR(bus, S_IRUGO, show_bus, NULL);
+static DEVICE_ATTR(type, S_IRUGO, show_type, NULL);
 static DEVICE_ATTR(name, S_IRUGO, show_name, NULL);
 static DEVICE_ATTR(class, S_IRUGO, show_class, NULL);
 static DEVICE_ATTR(address, S_IRUGO, show_address, NULL);
@@ -351,6 +370,7 @@ static DEVICE_ATTR(sniff_min_interval, S_IRUGO | S_IWUSR,
 
 static struct attribute *bt_host_attrs[] = {
 	&dev_attr_bus.attr,
+	&dev_attr_type.attr,
 	&dev_attr_name.attr,
 	&dev_attr_class.attr,
 	&dev_attr_address.attr,
-- 
cgit v1.2.2


From 705e5711b61e9622b2d88850f38c219014aa0780 Mon Sep 17 00:00:00 2001
From: Stephen Coe <smcoe1@gmail.com>
Date: Tue, 16 Feb 2010 11:29:44 -0500
Subject: Bluetooth: Add SCO fallback for unsupported feature error

The Bluetooth SIG PTS test case: TC_AG_ACS_BV_10_I, rejects eSCO with
"Unsupported Feature or Parameter Value" (0x11).  This patch adds case
for SCO fallback.

2007-09-20 12:20:37.787747 > HCI Event: Number of Completed Packets (0x13) plen 5
   handle 38 packets 1
2007-09-20 12:20:37.842154 < HCI Command: Setup Synchronous Connection (0x01|0x0028) plen 17
   handle 38 voice setting 0x0060
2007-09-20 12:20:37.847037 > HCI Event: Command Status (0x0f) plen 4
   Setup Synchronous Connection (0x01|0x0028) status 0x00 ncmd 1
2007-09-20 12:20:37.855233 > HCI Event: Max Slots Change (0x1b) plen 3
   handle 38 slots 1
2007-09-20 12:20:39.913354 > HCI Event: Synchronous Connect Complete (0x2c) plen 17
   status 0x11 handle 38 bdaddr 00:16:93:01:01:7A type eSCO
   Error: Unsupported Feature or Parameter Value
2007-09-20 12:20:39.922629 > HCI Event: Max Slots Change (0x1b) plen 3
   handle 38 slots 5
2007-09-20 12:20:58.126886 < ACL data: handle 38 flags 0x02 dlen 8
   L2CAP(d): cid 0x0041 len 4 [psm 0]
     0000: 0b 53 01 b8                                       .S..
2007-09-20 12:20:58.130138 > HCI Event: Number of Completed Packets (0x13) plen 5
   handle 38 packets 1

Signed-off-by: Stephen Coe <smcoe1@gmail.com>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 net/bluetooth/hci_event.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'net')

diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c
index 592da5c909c1..6c57fc71c7e2 100644
--- a/net/bluetooth/hci_event.c
+++ b/net/bluetooth/hci_event.c
@@ -1698,6 +1698,7 @@ static inline void hci_sync_conn_complete_evt(struct hci_dev *hdev, struct sk_bu
 		hci_conn_add_sysfs(conn);
 		break;
 
+	case 0x11:	/* Unsupported Feature or Parameter Value */
 	case 0x1c:	/* SCO interval rejected */
 	case 0x1a:	/* Unsupported Remote Feature */
 	case 0x1f:	/* Unspecified error */
-- 
cgit v1.2.2


From f6e623a65cb301088bd04794043e82bfc996c512 Mon Sep 17 00:00:00 2001
From: Johann Felix Soden <johfel@users.sourceforge.net>
Date: Mon, 15 Feb 2010 22:23:48 +0100
Subject: Bluetooth: Fix out of scope variable access in hci_sock_cmsg()

The pointer data can point to the variable ctv.
Access to data happens when ctv is already out of scope.

Signed-off-by: Johann Felix Soden <johfel@users.sourceforge.net>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 net/bluetooth/hci_sock.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/bluetooth/hci_sock.c b/net/bluetooth/hci_sock.c
index 688cfebfbee0..38f08f6b86f6 100644
--- a/net/bluetooth/hci_sock.c
+++ b/net/bluetooth/hci_sock.c
@@ -329,6 +329,9 @@ static inline void hci_sock_cmsg(struct sock *sk, struct msghdr *msg, struct sk_
 	}
 
 	if (mask & HCI_CMSG_TSTAMP) {
+#ifdef CONFIG_COMPAT
+		struct compat_timeval ctv;
+#endif
 		struct timeval tv;
 		void *data;
 		int len;
@@ -339,7 +342,6 @@ static inline void hci_sock_cmsg(struct sock *sk, struct msghdr *msg, struct sk_
 		len = sizeof(tv);
 #ifdef CONFIG_COMPAT
 		if (msg->msg_flags & MSG_CMSG_COMPAT) {
-			struct compat_timeval ctv;
 			ctv.tv_sec = tv.tv_sec;
 			ctv.tv_usec = tv.tv_usec;
 			data = &ctv;
-- 
cgit v1.2.2


From 87557c18ac36241b596984589a0889c5c4bf916c Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Sat, 27 Feb 2010 19:41:39 +0000
Subject: bridge: Do br_pass_frame_up after other ports

At the moment we deliver to the local bridge port via the function
br_pass_frame_up before all other ports.  There is no requirement
for this.

For the purpose of IGMP snooping, it would be more convenient if
we did the local port last.  Therefore this patch rearranges the
bridge input processing so that the local bridge port gets to see
the packet last (if at all).

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/bridge/br_input.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'net')

diff --git a/net/bridge/br_input.c b/net/bridge/br_input.c
index 5ee1a3682bf2..9589937e1c0a 100644
--- a/net/bridge/br_input.c
+++ b/net/bridge/br_input.c
@@ -73,9 +73,6 @@ int br_handle_frame_finish(struct sk_buff *skb)
 	if (skb2 == skb)
 		skb2 = skb_clone(skb, GFP_ATOMIC);
 
-	if (skb2)
-		br_pass_frame_up(br, skb2);
-
 	if (skb) {
 		if (dst)
 			br_forward(dst->dst, skb);
@@ -83,6 +80,9 @@ int br_handle_frame_finish(struct sk_buff *skb)
 			br_flood_forward(br, skb);
 	}
 
+	if (skb2)
+		br_pass_frame_up(br, skb2);
+
 out:
 	return 0;
 drop:
-- 
cgit v1.2.2


From 68b7c895be336b19f4c38d7cb500132fabba0afd Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Sat, 27 Feb 2010 19:41:40 +0000
Subject: bridge: Allow tail-call on br_pass_frame_up

This patch allows tail-call on the call to br_pass_frame_up
in br_handle_frame_finish.  This is now possible because of the
previous patch to call br_pass_frame_up last.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/bridge/br_input.c   | 12 +++++++-----
 net/bridge/br_private.h |  6 ++++++
 2 files changed, 13 insertions(+), 5 deletions(-)

(limited to 'net')

diff --git a/net/bridge/br_input.c b/net/bridge/br_input.c
index 9589937e1c0a..be5ab8df6661 100644
--- a/net/bridge/br_input.c
+++ b/net/bridge/br_input.c
@@ -20,9 +20,9 @@
 /* Bridge group multicast address 802.1d (pg 51). */
 const u8 br_group_address[ETH_ALEN] = { 0x01, 0x80, 0xc2, 0x00, 0x00, 0x00 };
 
-static void br_pass_frame_up(struct net_bridge *br, struct sk_buff *skb)
+static int br_pass_frame_up(struct sk_buff *skb)
 {
-	struct net_device *indev, *brdev = br->dev;
+	struct net_device *indev, *brdev = BR_INPUT_SKB_CB(skb)->brdev;
 
 	brdev->stats.rx_packets++;
 	brdev->stats.rx_bytes += skb->len;
@@ -30,8 +30,8 @@ static void br_pass_frame_up(struct net_bridge *br, struct sk_buff *skb)
 	indev = skb->dev;
 	skb->dev = brdev;
 
-	NF_HOOK(PF_BRIDGE, NF_BR_LOCAL_IN, skb, indev, NULL,
-		netif_receive_skb);
+	return NF_HOOK(PF_BRIDGE, NF_BR_LOCAL_IN, skb, indev, NULL,
+		       netif_receive_skb);
 }
 
 /* note: already called with rcu_read_lock (preempt_disabled) */
@@ -53,6 +53,8 @@ int br_handle_frame_finish(struct sk_buff *skb)
 	if (p->state == BR_STATE_LEARNING)
 		goto drop;
 
+	BR_INPUT_SKB_CB(skb)->brdev = br->dev;
+
 	/* The packet skb2 goes to the local host (NULL to skip). */
 	skb2 = NULL;
 
@@ -81,7 +83,7 @@ int br_handle_frame_finish(struct sk_buff *skb)
 	}
 
 	if (skb2)
-		br_pass_frame_up(br, skb2);
+		return br_pass_frame_up(skb2);
 
 out:
 	return 0;
diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h
index 1f0c4f44b765..16513793156e 100644
--- a/net/bridge/br_private.h
+++ b/net/bridge/br_private.h
@@ -131,6 +131,12 @@ struct net_bridge
 	struct kobject			*ifobj;
 };
 
+struct br_input_skb_cb {
+	struct net_device *brdev;
+};
+
+#define BR_INPUT_SKB_CB(__skb)	((struct br_input_skb_cb *)(__skb)->cb)
+
 extern struct notifier_block br_device_notifier;
 extern const u8 br_group_address[ETH_ALEN];
 
-- 
cgit v1.2.2


From b33084be192ee1e347d98bb5c9e38a53d98d35e2 Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Sat, 27 Feb 2010 19:41:41 +0000
Subject: bridge: Avoid unnecessary clone on forward path

When the packet is delivered to the local bridge device we may
end up cloning it unnecessarily if no bridge port can receive
the packet in br_flood.

This patch avoids this by moving the skb_clone into br_flood.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/bridge/br_forward.c | 33 ++++++++++++++++++++++-----------
 net/bridge/br_input.c   |  5 +----
 net/bridge/br_private.h |  3 ++-
 3 files changed, 25 insertions(+), 16 deletions(-)

(limited to 'net')

diff --git a/net/bridge/br_forward.c b/net/bridge/br_forward.c
index bc1704ac6cd9..6cd50c6e57cf 100644
--- a/net/bridge/br_forward.c
+++ b/net/bridge/br_forward.c
@@ -105,8 +105,9 @@ void br_forward(const struct net_bridge_port *to, struct sk_buff *skb)
 
 /* called under bridge lock */
 static void br_flood(struct net_bridge *br, struct sk_buff *skb,
-	void (*__packet_hook)(const struct net_bridge_port *p,
-			      struct sk_buff *skb))
+		     struct sk_buff *skb0,
+		     void (*__packet_hook)(const struct net_bridge_port *p,
+					   struct sk_buff *skb))
 {
 	struct net_bridge_port *p;
 	struct net_bridge_port *prev;
@@ -120,8 +121,7 @@ static void br_flood(struct net_bridge *br, struct sk_buff *skb,
 
 				if ((skb2 = skb_clone(skb, GFP_ATOMIC)) == NULL) {
 					br->dev->stats.tx_dropped++;
-					kfree_skb(skb);
-					return;
+					goto out;
 				}
 
 				__packet_hook(prev, skb2);
@@ -131,23 +131,34 @@ static void br_flood(struct net_bridge *br, struct sk_buff *skb,
 		}
 	}
 
-	if (prev != NULL) {
-		__packet_hook(prev, skb);
-		return;
+	if (!prev)
+		goto out;
+
+	if (skb0) {
+		skb = skb_clone(skb, GFP_ATOMIC);
+		if (!skb) {
+			br->dev->stats.tx_dropped++;
+			goto out;
+		}
 	}
+	__packet_hook(prev, skb);
+	return;
 
-	kfree_skb(skb);
+out:
+	if (!skb0)
+		kfree_skb(skb);
 }
 
 
 /* called with rcu_read_lock */
 void br_flood_deliver(struct net_bridge *br, struct sk_buff *skb)
 {
-	br_flood(br, skb, __br_deliver);
+	br_flood(br, skb, NULL, __br_deliver);
 }
 
 /* called under bridge lock */
-void br_flood_forward(struct net_bridge *br, struct sk_buff *skb)
+void br_flood_forward(struct net_bridge *br, struct sk_buff *skb,
+		      struct sk_buff *skb2)
 {
-	br_flood(br, skb, __br_forward);
+	br_flood(br, skb, skb2, __br_forward);
 }
diff --git a/net/bridge/br_input.c b/net/bridge/br_input.c
index be5ab8df6661..edfdaef44296 100644
--- a/net/bridge/br_input.c
+++ b/net/bridge/br_input.c
@@ -72,14 +72,11 @@ int br_handle_frame_finish(struct sk_buff *skb)
 		skb = NULL;
 	}
 
-	if (skb2 == skb)
-		skb2 = skb_clone(skb, GFP_ATOMIC);
-
 	if (skb) {
 		if (dst)
 			br_forward(dst->dst, skb);
 		else
-			br_flood_forward(br, skb);
+			br_flood_forward(br, skb, skb2);
 	}
 
 	if (skb2)
diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h
index 16513793156e..fad5a2669d34 100644
--- a/net/bridge/br_private.h
+++ b/net/bridge/br_private.h
@@ -180,7 +180,8 @@ extern void br_forward(const struct net_bridge_port *to,
 		struct sk_buff *skb);
 extern int br_forward_finish(struct sk_buff *skb);
 extern void br_flood_deliver(struct net_bridge *br, struct sk_buff *skb);
-extern void br_flood_forward(struct net_bridge *br, struct sk_buff *skb);
+extern void br_flood_forward(struct net_bridge *br, struct sk_buff *skb,
+			     struct sk_buff *skb2);
 
 /* br_if.c */
 extern void br_port_carrier_check(struct net_bridge_port *p);
-- 
cgit v1.2.2


From 6088a539d8d1666dca6979b5759bf966ee9124ef Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Sat, 27 Feb 2010 19:41:42 +0000
Subject: bridge: Use BR_INPUT_SKB_CB on xmit path

this patch makes BR_INPUT_SKB_CB available on the xmit path so
that we could avoid passing the br pointer around for the purpose
of collecting device statistics.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/bridge/br_device.c  | 2 ++
 net/bridge/br_forward.c | 5 +++--
 2 files changed, 5 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c
index 1a99c4e04e85..be356293caa1 100644
--- a/net/bridge/br_device.c
+++ b/net/bridge/br_device.c
@@ -26,6 +26,8 @@ netdev_tx_t br_dev_xmit(struct sk_buff *skb, struct net_device *dev)
 	const unsigned char *dest = skb->data;
 	struct net_bridge_fdb_entry *dst;
 
+	BR_INPUT_SKB_CB(skb)->brdev = dev;
+
 	dev->stats.tx_packets++;
 	dev->stats.tx_bytes += skb->len;
 
diff --git a/net/bridge/br_forward.c b/net/bridge/br_forward.c
index 6cd50c6e57cf..2e1cb434f6cd 100644
--- a/net/bridge/br_forward.c
+++ b/net/bridge/br_forward.c
@@ -111,6 +111,7 @@ static void br_flood(struct net_bridge *br, struct sk_buff *skb,
 {
 	struct net_bridge_port *p;
 	struct net_bridge_port *prev;
+	struct net_device *dev = BR_INPUT_SKB_CB(skb)->brdev;
 
 	prev = NULL;
 
@@ -120,7 +121,7 @@ static void br_flood(struct net_bridge *br, struct sk_buff *skb,
 				struct sk_buff *skb2;
 
 				if ((skb2 = skb_clone(skb, GFP_ATOMIC)) == NULL) {
-					br->dev->stats.tx_dropped++;
+					dev->stats.tx_dropped++;
 					goto out;
 				}
 
@@ -137,7 +138,7 @@ static void br_flood(struct net_bridge *br, struct sk_buff *skb,
 	if (skb0) {
 		skb = skb_clone(skb, GFP_ATOMIC);
 		if (!skb) {
-			br->dev->stats.tx_dropped++;
+			dev->stats.tx_dropped++;
 			goto out;
 		}
 	}
-- 
cgit v1.2.2


From 025d89c27f54c69cd0e51666d88aada33666bb6c Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Sat, 27 Feb 2010 19:41:43 +0000
Subject: bridge: Split may_deliver/deliver_clone out of br_flood

This patch moves the main loop body in br_flood into the function
may_deliver.  The code that clones an skb and delivers it is moved
into the deliver_clone function.

This allows this to be reused by the future multicast forward
function.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/bridge/br_forward.c | 69 ++++++++++++++++++++++++++++++++-----------------
 1 file changed, 46 insertions(+), 23 deletions(-)

(limited to 'net')

diff --git a/net/bridge/br_forward.c b/net/bridge/br_forward.c
index 2e1cb434f6cd..86cd0712d63e 100644
--- a/net/bridge/br_forward.c
+++ b/net/bridge/br_forward.c
@@ -11,6 +11,7 @@
  *	2 of the License, or (at your option) any later version.
  */
 
+#include <linux/err.h>
 #include <linux/kernel.h>
 #include <linux/netdevice.h>
 #include <linux/skbuff.h>
@@ -103,6 +104,44 @@ void br_forward(const struct net_bridge_port *to, struct sk_buff *skb)
 	kfree_skb(skb);
 }
 
+static int deliver_clone(struct net_bridge_port *prev, struct sk_buff *skb,
+			 void (*__packet_hook)(const struct net_bridge_port *p,
+					       struct sk_buff *skb))
+{
+	skb = skb_clone(skb, GFP_ATOMIC);
+	if (!skb) {
+		struct net_device *dev = BR_INPUT_SKB_CB(skb)->brdev;
+
+		dev->stats.tx_dropped++;
+		return -ENOMEM;
+	}
+
+	__packet_hook(prev, skb);
+	return 0;
+}
+
+static struct net_bridge_port *maybe_deliver(
+	struct net_bridge_port *prev, struct net_bridge_port *p,
+	struct sk_buff *skb,
+	void (*__packet_hook)(const struct net_bridge_port *p,
+			      struct sk_buff *skb))
+{
+	int err;
+
+	if (!should_deliver(p, skb))
+		return prev;
+
+	if (!prev)
+		goto out;
+
+	err = deliver_clone(prev, skb, __packet_hook);
+	if (err)
+		return ERR_PTR(err);
+
+out:
+	return p;
+}
+
 /* called under bridge lock */
 static void br_flood(struct net_bridge *br, struct sk_buff *skb,
 		     struct sk_buff *skb0,
@@ -111,38 +150,22 @@ static void br_flood(struct net_bridge *br, struct sk_buff *skb,
 {
 	struct net_bridge_port *p;
 	struct net_bridge_port *prev;
-	struct net_device *dev = BR_INPUT_SKB_CB(skb)->brdev;
 
 	prev = NULL;
 
 	list_for_each_entry_rcu(p, &br->port_list, list) {
-		if (should_deliver(p, skb)) {
-			if (prev != NULL) {
-				struct sk_buff *skb2;
-
-				if ((skb2 = skb_clone(skb, GFP_ATOMIC)) == NULL) {
-					dev->stats.tx_dropped++;
-					goto out;
-				}
-
-				__packet_hook(prev, skb2);
-			}
-
-			prev = p;
-		}
+		prev = maybe_deliver(prev, p, skb, __packet_hook);
+		if (IS_ERR(prev))
+			goto out;
 	}
 
 	if (!prev)
 		goto out;
 
-	if (skb0) {
-		skb = skb_clone(skb, GFP_ATOMIC);
-		if (!skb) {
-			dev->stats.tx_dropped++;
-			goto out;
-		}
-	}
-	__packet_hook(prev, skb);
+	if (skb0)
+		deliver_clone(prev, skb, __packet_hook);
+	else
+		__packet_hook(prev, skb);
 	return;
 
 out:
-- 
cgit v1.2.2


From eb1d16414339a6e113d89e2cca2556005d7ce919 Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Sat, 27 Feb 2010 19:41:45 +0000
Subject: bridge: Add core IGMP snooping support

This patch adds the core functionality of IGMP snooping support
without actually hooking it up.  So this patch should be a no-op
as far as the bridge's external behaviour is concerned.

All the new code and data is controlled by the Kconfig option
BRIDGE_IGMP_SNOOPING.  A run-time toggle is also available.

The multicast switching is done using an hash table that is
lockless on the read-side through RCU.  On the write-side the
new multicast_lock is used for all operations.  The hash table
supports dynamic growth/rehashing.

The hash table will be rehashed if any chain length exceeds a
preset limit.  If rehashing does not reduce the maximum chain
length then snooping will be disabled.

These features may be added in future (in no particular order):

* IGMPv3 source support
* Non-querier router detection
* IPv6

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/bridge/Kconfig        |   12 +
 net/bridge/Makefile       |    2 +
 net/bridge/br_multicast.c | 1135 +++++++++++++++++++++++++++++++++++++++++++++
 net/bridge/br_private.h   |  139 ++++++
 4 files changed, 1288 insertions(+)
 create mode 100644 net/bridge/br_multicast.c

(limited to 'net')

diff --git a/net/bridge/Kconfig b/net/bridge/Kconfig
index e143ca678881..78dd5497210a 100644
--- a/net/bridge/Kconfig
+++ b/net/bridge/Kconfig
@@ -31,3 +31,15 @@ config BRIDGE
 	  will be called bridge.
 
 	  If unsure, say N.
+
+config BRIDGE_IGMP_SNOOPING
+	bool "IGMP snooping"
+	default y
+	---help---
+	  If you say Y here, then the Ethernet bridge will be able selectively
+	  forward multicast traffic based on IGMP traffic received from each
+	  port.
+
+	  Say N to exclude this support and reduce the binary size.
+
+	  If unsure, say Y.
diff --git a/net/bridge/Makefile b/net/bridge/Makefile
index f444c12cde5a..d0359ea8ee79 100644
--- a/net/bridge/Makefile
+++ b/net/bridge/Makefile
@@ -12,4 +12,6 @@ bridge-$(CONFIG_SYSFS) += br_sysfs_if.o br_sysfs_br.o
 
 bridge-$(CONFIG_BRIDGE_NETFILTER) += br_netfilter.o
 
+bridge-$(CONFIG_BRIDGE_IGMP_SNOOPING) += br_multicast.o
+
 obj-$(CONFIG_BRIDGE_NF_EBTABLES) += netfilter/
diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c
new file mode 100644
index 000000000000..746b5a611aae
--- /dev/null
+++ b/net/bridge/br_multicast.c
@@ -0,0 +1,1135 @@
+/*
+ * Bridge multicast support.
+ *
+ * Copyright (c) 2010 Herbert Xu <herbert@gondor.apana.org.au>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option)
+ * any later version.
+ *
+ */
+
+#include <linux/err.h>
+#include <linux/if_ether.h>
+#include <linux/igmp.h>
+#include <linux/jhash.h>
+#include <linux/kernel.h>
+#include <linux/netdevice.h>
+#include <linux/netfilter_bridge.h>
+#include <linux/random.h>
+#include <linux/rculist.h>
+#include <linux/skbuff.h>
+#include <linux/slab.h>
+#include <linux/timer.h>
+#include <net/ip.h>
+
+#include "br_private.h"
+
+static inline int br_ip_hash(struct net_bridge_mdb_htable *mdb, __be32 ip)
+{
+	return jhash_1word(mdb->secret, (u32)ip) & (mdb->max - 1);
+}
+
+static struct net_bridge_mdb_entry *__br_mdb_ip_get(
+	struct net_bridge_mdb_htable *mdb, __be32 dst, int hash)
+{
+	struct net_bridge_mdb_entry *mp;
+	struct hlist_node *p;
+
+	hlist_for_each_entry(mp, p, &mdb->mhash[hash], hlist[mdb->ver]) {
+		if (dst == mp->addr)
+			return mp;
+	}
+
+	return NULL;
+}
+
+static struct net_bridge_mdb_entry *br_mdb_ip_get(
+	struct net_bridge_mdb_htable *mdb, __be32 dst)
+{
+	return __br_mdb_ip_get(mdb, dst, br_ip_hash(mdb, dst));
+}
+
+struct net_bridge_mdb_entry *br_mdb_get(struct net_bridge *br,
+					struct sk_buff *skb)
+{
+	struct net_bridge_mdb_htable *mdb = br->mdb;
+
+	if (!mdb || br->multicast_disabled)
+		return NULL;
+
+	switch (skb->protocol) {
+	case htons(ETH_P_IP):
+		if (BR_INPUT_SKB_CB(skb)->igmp)
+			break;
+		return br_mdb_ip_get(mdb, ip_hdr(skb)->daddr);
+	}
+
+	return NULL;
+}
+
+static void br_mdb_free(struct rcu_head *head)
+{
+	struct net_bridge_mdb_htable *mdb =
+		container_of(head, struct net_bridge_mdb_htable, rcu);
+	struct net_bridge_mdb_htable *old = mdb->old;
+
+	mdb->old = NULL;
+	kfree(old->mhash);
+	kfree(old);
+}
+
+static int br_mdb_copy(struct net_bridge_mdb_htable *new,
+		       struct net_bridge_mdb_htable *old,
+		       int elasticity)
+{
+	struct net_bridge_mdb_entry *mp;
+	struct hlist_node *p;
+	int maxlen;
+	int len;
+	int i;
+
+	for (i = 0; i < old->max; i++)
+		hlist_for_each_entry(mp, p, &old->mhash[i], hlist[old->ver])
+			hlist_add_head(&mp->hlist[new->ver],
+				       &new->mhash[br_ip_hash(new, mp->addr)]);
+
+	if (!elasticity)
+		return 0;
+
+	maxlen = 0;
+	for (i = 0; i < new->max; i++) {
+		len = 0;
+		hlist_for_each_entry(mp, p, &new->mhash[i], hlist[new->ver])
+			len++;
+		if (len > maxlen)
+			maxlen = len;
+	}
+
+	return maxlen > elasticity ? -EINVAL : 0;
+}
+
+static void br_multicast_free_pg(struct rcu_head *head)
+{
+	struct net_bridge_port_group *p =
+		container_of(head, struct net_bridge_port_group, rcu);
+
+	kfree(p);
+}
+
+static void br_multicast_free_group(struct rcu_head *head)
+{
+	struct net_bridge_mdb_entry *mp =
+		container_of(head, struct net_bridge_mdb_entry, rcu);
+
+	kfree(mp);
+}
+
+static void br_multicast_group_expired(unsigned long data)
+{
+	struct net_bridge_mdb_entry *mp = (void *)data;
+	struct net_bridge *br = mp->br;
+	struct net_bridge_mdb_htable *mdb;
+
+	spin_lock(&br->multicast_lock);
+	if (!netif_running(br->dev) || timer_pending(&mp->timer))
+		goto out;
+
+	if (!hlist_unhashed(&mp->mglist))
+		hlist_del_init(&mp->mglist);
+
+	if (mp->ports)
+		goto out;
+
+	mdb = br->mdb;
+	hlist_del_rcu(&mp->hlist[mdb->ver]);
+	mdb->size--;
+
+	del_timer(&mp->query_timer);
+	call_rcu_bh(&mp->rcu, br_multicast_free_group);
+
+out:
+	spin_unlock(&br->multicast_lock);
+}
+
+static void br_multicast_del_pg(struct net_bridge *br,
+				struct net_bridge_port_group *pg)
+{
+	struct net_bridge_mdb_htable *mdb = br->mdb;
+	struct net_bridge_mdb_entry *mp;
+	struct net_bridge_port_group *p;
+	struct net_bridge_port_group **pp;
+
+	mp = br_mdb_ip_get(mdb, pg->addr);
+	if (WARN_ON(!mp))
+		return;
+
+	for (pp = &mp->ports; (p = *pp); pp = &p->next) {
+		if (p != pg)
+			continue;
+
+		*pp = p->next;
+		hlist_del_init(&p->mglist);
+		del_timer(&p->timer);
+		del_timer(&p->query_timer);
+		call_rcu_bh(&p->rcu, br_multicast_free_pg);
+
+		if (!mp->ports && hlist_unhashed(&mp->mglist) &&
+		    netif_running(br->dev))
+			mod_timer(&mp->timer, jiffies);
+
+		return;
+	}
+
+	WARN_ON(1);
+}
+
+static void br_multicast_port_group_expired(unsigned long data)
+{
+	struct net_bridge_port_group *pg = (void *)data;
+	struct net_bridge *br = pg->port->br;
+
+	spin_lock(&br->multicast_lock);
+	if (!netif_running(br->dev) || timer_pending(&pg->timer) ||
+	    hlist_unhashed(&pg->mglist))
+		goto out;
+
+	br_multicast_del_pg(br, pg);
+
+out:
+	spin_unlock(&br->multicast_lock);
+}
+
+static int br_mdb_rehash(struct net_bridge_mdb_htable **mdbp, int max,
+			 int elasticity)
+{
+	struct net_bridge_mdb_htable *old = *mdbp;
+	struct net_bridge_mdb_htable *mdb;
+	int err;
+
+	mdb = kmalloc(sizeof(*mdb), GFP_ATOMIC);
+	if (!mdb)
+		return -ENOMEM;
+
+	mdb->max = max;
+	mdb->old = old;
+
+	mdb->mhash = kzalloc(max * sizeof(*mdb->mhash), GFP_ATOMIC);
+	if (!mdb->mhash) {
+		kfree(mdb);
+		return -ENOMEM;
+	}
+
+	mdb->size = old ? old->size : 0;
+	mdb->ver = old ? old->ver ^ 1 : 0;
+
+	if (!old || elasticity)
+		get_random_bytes(&mdb->secret, sizeof(mdb->secret));
+	else
+		mdb->secret = old->secret;
+
+	if (!old)
+		goto out;
+
+	err = br_mdb_copy(mdb, old, elasticity);
+	if (err) {
+		kfree(mdb->mhash);
+		kfree(mdb);
+		return err;
+	}
+
+	call_rcu_bh(&mdb->rcu, br_mdb_free);
+
+out:
+	rcu_assign_pointer(*mdbp, mdb);
+
+	return 0;
+}
+
+static struct sk_buff *br_multicast_alloc_query(struct net_bridge *br,
+						__be32 group)
+{
+	struct sk_buff *skb;
+	struct igmphdr *ih;
+	struct ethhdr *eth;
+	struct iphdr *iph;
+
+	skb = netdev_alloc_skb_ip_align(br->dev, sizeof(*eth) + sizeof(*iph) +
+						 sizeof(*ih) + 4);
+	if (!skb)
+		goto out;
+
+	skb->protocol = htons(ETH_P_IP);
+
+	skb_reset_mac_header(skb);
+	eth = eth_hdr(skb);
+
+	memcpy(eth->h_source, br->dev->dev_addr, 6);
+	eth->h_dest[0] = 1;
+	eth->h_dest[1] = 0;
+	eth->h_dest[2] = 0x5e;
+	eth->h_dest[3] = 0;
+	eth->h_dest[4] = 0;
+	eth->h_dest[5] = 1;
+	eth->h_proto = htons(ETH_P_IP);
+	skb_put(skb, sizeof(*eth));
+
+	skb_set_network_header(skb, skb->len);
+	iph = ip_hdr(skb);
+
+	iph->version = 4;
+	iph->ihl = 6;
+	iph->tos = 0xc0;
+	iph->tot_len = htons(sizeof(*iph) + sizeof(*ih) + 4);
+	iph->id = 0;
+	iph->frag_off = htons(IP_DF);
+	iph->ttl = 1;
+	iph->protocol = IPPROTO_IGMP;
+	iph->saddr = 0;
+	iph->daddr = htonl(INADDR_ALLHOSTS_GROUP);
+	((u8 *)&iph[1])[0] = IPOPT_RA;
+	((u8 *)&iph[1])[1] = 4;
+	((u8 *)&iph[1])[2] = 0;
+	((u8 *)&iph[1])[3] = 0;
+	ip_send_check(iph);
+	skb_put(skb, 24);
+
+	skb_set_transport_header(skb, skb->len);
+	ih = igmp_hdr(skb);
+	ih->type = IGMP_HOST_MEMBERSHIP_QUERY;
+	ih->code = (group ? br->multicast_last_member_interval :
+			    br->multicast_query_response_interval) /
+		   (HZ / IGMP_TIMER_SCALE);
+	ih->group = group;
+	ih->csum = 0;
+	ih->csum = ip_compute_csum((void *)ih, sizeof(struct igmphdr));
+	skb_put(skb, sizeof(*ih));
+
+	__skb_pull(skb, sizeof(*eth));
+
+out:
+	return skb;
+}
+
+static void br_multicast_send_group_query(struct net_bridge_mdb_entry *mp)
+{
+	struct net_bridge *br = mp->br;
+	struct sk_buff *skb;
+
+	skb = br_multicast_alloc_query(br, mp->addr);
+	if (!skb)
+		goto timer;
+
+	netif_rx(skb);
+
+timer:
+	if (++mp->queries_sent < br->multicast_last_member_count)
+		mod_timer(&mp->query_timer,
+			  jiffies + br->multicast_last_member_interval);
+}
+
+static void br_multicast_group_query_expired(unsigned long data)
+{
+	struct net_bridge_mdb_entry *mp = (void *)data;
+	struct net_bridge *br = mp->br;
+
+	spin_lock(&br->multicast_lock);
+	if (!netif_running(br->dev) || hlist_unhashed(&mp->mglist) ||
+	    mp->queries_sent >= br->multicast_last_member_count)
+		goto out;
+
+	br_multicast_send_group_query(mp);
+
+out:
+	spin_unlock(&br->multicast_lock);
+}
+
+static void br_multicast_send_port_group_query(struct net_bridge_port_group *pg)
+{
+	struct net_bridge_port *port = pg->port;
+	struct net_bridge *br = port->br;
+	struct sk_buff *skb;
+
+	skb = br_multicast_alloc_query(br, pg->addr);
+	if (!skb)
+		goto timer;
+
+	br_deliver(port, skb);
+
+timer:
+	if (++pg->queries_sent < br->multicast_last_member_count)
+		mod_timer(&pg->query_timer,
+			  jiffies + br->multicast_last_member_interval);
+}
+
+static void br_multicast_port_group_query_expired(unsigned long data)
+{
+	struct net_bridge_port_group *pg = (void *)data;
+	struct net_bridge_port *port = pg->port;
+	struct net_bridge *br = port->br;
+
+	spin_lock(&br->multicast_lock);
+	if (!netif_running(br->dev) || hlist_unhashed(&pg->mglist) ||
+	    pg->queries_sent >= br->multicast_last_member_count)
+		goto out;
+
+	br_multicast_send_port_group_query(pg);
+
+out:
+	spin_unlock(&br->multicast_lock);
+}
+
+static struct net_bridge_mdb_entry *br_multicast_get_group(
+	struct net_bridge *br, struct net_bridge_port *port, __be32 group,
+	int hash)
+{
+	struct net_bridge_mdb_htable *mdb = br->mdb;
+	struct net_bridge_mdb_entry *mp;
+	struct hlist_node *p;
+	unsigned count = 0;
+	unsigned max;
+	int elasticity;
+	int err;
+
+	hlist_for_each_entry(mp, p, &mdb->mhash[hash], hlist[mdb->ver]) {
+		count++;
+		if (unlikely(group == mp->addr)) {
+			return mp;
+		}
+	}
+
+	elasticity = 0;
+	max = mdb->max;
+
+	if (unlikely(count > br->hash_elasticity && count)) {
+		if (net_ratelimit())
+			printk(KERN_INFO "%s: Multicast hash table "
+			       "chain limit reached: %s\n",
+			       br->dev->name, port ? port->dev->name :
+						     br->dev->name);
+
+		elasticity = br->hash_elasticity;
+	}
+
+	if (mdb->size >= max) {
+		max *= 2;
+		if (unlikely(max >= br->hash_max)) {
+			printk(KERN_WARNING "%s: Multicast hash table maximum "
+			       "reached, disabling snooping: %s, %d\n",
+			       br->dev->name, port ? port->dev->name :
+						     br->dev->name,
+			       max);
+			err = -E2BIG;
+disable:
+			br->multicast_disabled = 1;
+			goto err;
+		}
+	}
+
+	if (max > mdb->max || elasticity) {
+		if (mdb->old) {
+			if (net_ratelimit())
+				printk(KERN_INFO "%s: Multicast hash table "
+				       "on fire: %s\n",
+				       br->dev->name, port ? port->dev->name :
+							     br->dev->name);
+			err = -EEXIST;
+			goto err;
+		}
+
+		err = br_mdb_rehash(&br->mdb, max, elasticity);
+		if (err) {
+			printk(KERN_WARNING "%s: Cannot rehash multicast "
+			       "hash table, disabling snooping: "
+			       "%s, %d, %d\n",
+			       br->dev->name, port ? port->dev->name :
+						     br->dev->name,
+			       mdb->size, err);
+			goto disable;
+		}
+
+		err = -EAGAIN;
+		goto err;
+	}
+
+	return NULL;
+
+err:
+	mp = ERR_PTR(err);
+	return mp;
+}
+
+static struct net_bridge_mdb_entry *br_multicast_new_group(
+	struct net_bridge *br, struct net_bridge_port *port, __be32 group)
+{
+	struct net_bridge_mdb_htable *mdb = br->mdb;
+	struct net_bridge_mdb_entry *mp;
+	int hash;
+
+	if (!mdb) {
+		if (br_mdb_rehash(&br->mdb, BR_HASH_SIZE, 0))
+			return NULL;
+		goto rehash;
+	}
+
+	hash = br_ip_hash(mdb, group);
+	mp = br_multicast_get_group(br, port, group, hash);
+	switch (PTR_ERR(mp)) {
+	case 0:
+		break;
+
+	case -EAGAIN:
+rehash:
+		mdb = br->mdb;
+		hash = br_ip_hash(mdb, group);
+		break;
+
+	default:
+		goto out;
+	}
+
+	mp = kzalloc(sizeof(*mp), GFP_ATOMIC);
+	if (unlikely(!mp))
+		goto out;
+
+	mp->br = br;
+	mp->addr = group;
+	setup_timer(&mp->timer, br_multicast_group_expired,
+		    (unsigned long)mp);
+	setup_timer(&mp->query_timer, br_multicast_group_query_expired,
+		    (unsigned long)mp);
+
+	hlist_add_head_rcu(&mp->hlist[mdb->ver], &mdb->mhash[hash]);
+	mdb->size++;
+
+out:
+	return mp;
+}
+
+static int br_multicast_add_group(struct net_bridge *br,
+				  struct net_bridge_port *port, __be32 group)
+{
+	struct net_bridge_mdb_entry *mp;
+	struct net_bridge_port_group *p;
+	struct net_bridge_port_group **pp;
+	unsigned long now = jiffies;
+	int err;
+
+	if (ipv4_is_local_multicast(group))
+		return 0;
+
+	spin_lock(&br->multicast_lock);
+	if (!netif_running(br->dev) ||
+	    (port && port->state == BR_STATE_DISABLED))
+		goto out;
+
+	mp = br_multicast_new_group(br, port, group);
+	err = PTR_ERR(mp);
+	if (unlikely(IS_ERR(mp) || !mp))
+		goto err;
+
+	if (!port) {
+		hlist_add_head(&mp->mglist, &br->mglist);
+		mod_timer(&mp->timer, now + br->multicast_membership_interval);
+		goto out;
+	}
+
+	for (pp = &mp->ports; (p = *pp); pp = &p->next) {
+		if (p->port == port)
+			goto found;
+		if ((unsigned long)p->port < (unsigned long)port)
+			break;
+	}
+
+	p = kzalloc(sizeof(*p), GFP_ATOMIC);
+	err = -ENOMEM;
+	if (unlikely(!p))
+		goto err;
+
+	p->addr = group;
+	p->port = port;
+	p->next = *pp;
+	hlist_add_head(&p->mglist, &port->mglist);
+	setup_timer(&p->timer, br_multicast_port_group_expired,
+		    (unsigned long)p);
+	setup_timer(&p->query_timer, br_multicast_port_group_query_expired,
+		    (unsigned long)p);
+
+	rcu_assign_pointer(*pp, p);
+
+found:
+	mod_timer(&p->timer, now + br->multicast_membership_interval);
+out:
+	err = 0;
+
+err:
+	spin_unlock(&br->multicast_lock);
+	return err;
+}
+
+static void br_multicast_router_expired(unsigned long data)
+{
+	struct net_bridge_port *port = (void *)data;
+	struct net_bridge *br = port->br;
+
+	spin_lock(&br->multicast_lock);
+	if (port->multicast_router != 1 ||
+	    timer_pending(&port->multicast_router_timer) ||
+	    hlist_unhashed(&port->rlist))
+		goto out;
+
+	hlist_del_init_rcu(&port->rlist);
+
+out:
+	spin_unlock(&br->multicast_lock);
+}
+
+static void br_multicast_local_router_expired(unsigned long data)
+{
+}
+
+static void br_multicast_send_query(struct net_bridge *br,
+				    struct net_bridge_port *port, u32 sent)
+{
+	unsigned long time;
+	struct sk_buff *skb;
+
+	if (!netif_running(br->dev) || br->multicast_disabled ||
+	    timer_pending(&br->multicast_querier_timer))
+		return;
+
+	skb = br_multicast_alloc_query(br, 0);
+	if (!skb)
+		goto timer;
+
+	if (port) {
+		__skb_push(skb, sizeof(struct ethhdr));
+		skb->dev = port->dev;
+		NF_HOOK(PF_BRIDGE, NF_BR_LOCAL_OUT, skb, NULL, skb->dev,
+			dev_queue_xmit);
+	} else
+		netif_rx(skb);
+
+timer:
+	time = jiffies;
+	time += sent < br->multicast_startup_query_count ?
+		br->multicast_startup_query_interval :
+		br->multicast_query_interval;
+	mod_timer(port ? &port->multicast_query_timer :
+			 &br->multicast_query_timer, time);
+}
+
+static void br_multicast_port_query_expired(unsigned long data)
+{
+	struct net_bridge_port *port = (void *)data;
+	struct net_bridge *br = port->br;
+
+	spin_lock(&br->multicast_lock);
+	if (port && (port->state == BR_STATE_DISABLED ||
+		     port->state == BR_STATE_BLOCKING))
+		goto out;
+
+	if (port->multicast_startup_queries_sent <
+	    br->multicast_startup_query_count)
+		port->multicast_startup_queries_sent++;
+
+	br_multicast_send_query(port->br, port,
+				port->multicast_startup_queries_sent);
+
+out:
+	spin_unlock(&br->multicast_lock);
+}
+
+void br_multicast_add_port(struct net_bridge_port *port)
+{
+	port->multicast_router = 1;
+
+	setup_timer(&port->multicast_router_timer, br_multicast_router_expired,
+		    (unsigned long)port);
+	setup_timer(&port->multicast_query_timer,
+		    br_multicast_port_query_expired, (unsigned long)port);
+}
+
+void br_multicast_del_port(struct net_bridge_port *port)
+{
+	del_timer_sync(&port->multicast_router_timer);
+}
+
+void br_multicast_enable_port(struct net_bridge_port *port)
+{
+	struct net_bridge *br = port->br;
+
+	spin_lock(&br->multicast_lock);
+	if (br->multicast_disabled || !netif_running(br->dev))
+		goto out;
+
+	port->multicast_startup_queries_sent = 0;
+
+	if (try_to_del_timer_sync(&port->multicast_query_timer) >= 0 ||
+	    del_timer(&port->multicast_query_timer))
+		mod_timer(&port->multicast_query_timer, jiffies);
+
+out:
+	spin_unlock(&br->multicast_lock);
+}
+
+void br_multicast_disable_port(struct net_bridge_port *port)
+{
+	struct net_bridge *br = port->br;
+	struct net_bridge_port_group *pg;
+	struct hlist_node *p, *n;
+
+	spin_lock(&br->multicast_lock);
+	hlist_for_each_entry_safe(pg, p, n, &port->mglist, mglist)
+		br_multicast_del_pg(br, pg);
+
+	if (!hlist_unhashed(&port->rlist))
+		hlist_del_init_rcu(&port->rlist);
+	del_timer(&port->multicast_router_timer);
+	del_timer(&port->multicast_query_timer);
+	spin_unlock(&br->multicast_lock);
+}
+
+static int br_multicast_igmp3_report(struct net_bridge *br,
+				     struct net_bridge_port *port,
+				     struct sk_buff *skb)
+{
+	struct igmpv3_report *ih;
+	struct igmpv3_grec *grec;
+	int i;
+	int len;
+	int num;
+	int type;
+	int err = 0;
+	__be32 group;
+
+	if (!pskb_may_pull(skb, sizeof(*ih)))
+		return -EINVAL;
+
+	ih = igmpv3_report_hdr(skb);
+	num = ntohs(ih->ngrec);
+	len = sizeof(*ih);
+
+	for (i = 0; i < num; i++) {
+		len += sizeof(*grec);
+		if (!pskb_may_pull(skb, len))
+			return -EINVAL;
+
+		grec = (void *)(skb->data + len);
+		group = grec->grec_mca;
+		type = grec->grec_type;
+
+		len += grec->grec_nsrcs * 4;
+		if (!pskb_may_pull(skb, len))
+			return -EINVAL;
+
+		/* We treat this as an IGMPv2 report for now. */
+		switch (type) {
+		case IGMPV3_MODE_IS_INCLUDE:
+		case IGMPV3_MODE_IS_EXCLUDE:
+		case IGMPV3_CHANGE_TO_INCLUDE:
+		case IGMPV3_CHANGE_TO_EXCLUDE:
+		case IGMPV3_ALLOW_NEW_SOURCES:
+		case IGMPV3_BLOCK_OLD_SOURCES:
+			break;
+
+		default:
+			continue;
+		}
+
+		err = br_multicast_add_group(br, port, group);
+		if (err)
+			break;
+	}
+
+	return err;
+}
+
+static void br_multicast_mark_router(struct net_bridge *br,
+				     struct net_bridge_port *port)
+{
+	unsigned long now = jiffies;
+	struct hlist_node *p;
+	struct hlist_node **h;
+
+	if (!port) {
+		if (br->multicast_router == 1)
+			mod_timer(&br->multicast_router_timer,
+				  now + br->multicast_querier_interval);
+		return;
+	}
+
+	if (port->multicast_router != 1)
+		return;
+
+	if (!hlist_unhashed(&port->rlist))
+		goto timer;
+
+	for (h = &br->router_list.first;
+	     (p = *h) &&
+	     (unsigned long)container_of(p, struct net_bridge_port, rlist) >
+	     (unsigned long)port;
+	     h = &p->next)
+		;
+
+	port->rlist.pprev = h;
+	port->rlist.next = p;
+	rcu_assign_pointer(*h, &port->rlist);
+	if (p)
+		p->pprev = &port->rlist.next;
+
+timer:
+	mod_timer(&port->multicast_router_timer,
+		  now + br->multicast_querier_interval);
+}
+
+static void br_multicast_query_received(struct net_bridge *br,
+					struct net_bridge_port *port,
+					__be32 saddr)
+{
+	if (saddr)
+		mod_timer(&br->multicast_querier_timer,
+			  jiffies + br->multicast_querier_interval);
+	else if (timer_pending(&br->multicast_querier_timer))
+		return;
+
+	br_multicast_mark_router(br, port);
+}
+
+static int br_multicast_query(struct net_bridge *br,
+			      struct net_bridge_port *port,
+			      struct sk_buff *skb)
+{
+	struct iphdr *iph = ip_hdr(skb);
+	struct igmphdr *ih = igmp_hdr(skb);
+	struct net_bridge_mdb_entry *mp;
+	struct igmpv3_query *ih3;
+	struct net_bridge_port_group *p;
+	struct net_bridge_port_group **pp;
+	unsigned long max_delay;
+	unsigned long now = jiffies;
+	__be32 group;
+
+	spin_lock(&br->multicast_lock);
+	if (!netif_running(br->dev) ||
+	    (port && port->state == BR_STATE_DISABLED))
+		goto out;
+
+	br_multicast_query_received(br, port, iph->saddr);
+
+	group = ih->group;
+
+	if (skb->len == sizeof(*ih)) {
+		max_delay = ih->code * (HZ / IGMP_TIMER_SCALE);
+
+		if (!max_delay) {
+			max_delay = 10 * HZ;
+			group = 0;
+		}
+	} else {
+		if (!pskb_may_pull(skb, sizeof(struct igmpv3_query)))
+			return -EINVAL;
+
+		ih3 = igmpv3_query_hdr(skb);
+		if (ih3->nsrcs)
+			return 0;
+
+		max_delay = ih3->code ? 1 :
+			    IGMPV3_MRC(ih3->code) * (HZ / IGMP_TIMER_SCALE);
+	}
+
+	if (!group)
+		goto out;
+
+	mp = br_mdb_ip_get(br->mdb, group);
+	if (!mp)
+		goto out;
+
+	max_delay *= br->multicast_last_member_count;
+
+	if (!hlist_unhashed(&mp->mglist) &&
+	    (timer_pending(&mp->timer) ?
+	     time_after(mp->timer.expires, now + max_delay) :
+	     try_to_del_timer_sync(&mp->timer) >= 0))
+		mod_timer(&mp->timer, now + max_delay);
+
+	for (pp = &mp->ports; (p = *pp); pp = &p->next) {
+		if (timer_pending(&p->timer) ?
+		    time_after(p->timer.expires, now + max_delay) :
+		    try_to_del_timer_sync(&p->timer) >= 0)
+			mod_timer(&mp->timer, now + max_delay);
+	}
+
+out:
+	spin_unlock(&br->multicast_lock);
+	return 0;
+}
+
+static void br_multicast_leave_group(struct net_bridge *br,
+				     struct net_bridge_port *port,
+				     __be32 group)
+{
+	struct net_bridge_mdb_htable *mdb;
+	struct net_bridge_mdb_entry *mp;
+	struct net_bridge_port_group *p;
+	unsigned long now;
+	unsigned long time;
+
+	if (ipv4_is_local_multicast(group))
+		return;
+
+	spin_lock(&br->multicast_lock);
+	if (!netif_running(br->dev) ||
+	    (port && port->state == BR_STATE_DISABLED) ||
+	    timer_pending(&br->multicast_querier_timer))
+		goto out;
+
+	mdb = br->mdb;
+	mp = br_mdb_ip_get(mdb, group);
+	if (!mp)
+		goto out;
+
+	now = jiffies;
+	time = now + br->multicast_last_member_count *
+		     br->multicast_last_member_interval;
+
+	if (!port) {
+		if (!hlist_unhashed(&mp->mglist) &&
+		    (timer_pending(&mp->timer) ?
+		     time_after(mp->timer.expires, time) :
+		     try_to_del_timer_sync(&mp->timer) >= 0)) {
+			mod_timer(&mp->timer, time);
+
+			mp->queries_sent = 0;
+			mod_timer(&mp->query_timer, now);
+		}
+
+		goto out;
+	}
+
+	for (p = mp->ports; p; p = p->next) {
+		if (p->port != port)
+			continue;
+
+		if (!hlist_unhashed(&p->mglist) &&
+		    (timer_pending(&p->timer) ?
+		     time_after(p->timer.expires, time) :
+		     try_to_del_timer_sync(&p->timer) >= 0)) {
+			mod_timer(&p->timer, time);
+
+			p->queries_sent = 0;
+			mod_timer(&p->query_timer, now);
+		}
+
+		break;
+	}
+
+out:
+	spin_unlock(&br->multicast_lock);
+}
+
+static int br_multicast_ipv4_rcv(struct net_bridge *br,
+				 struct net_bridge_port *port,
+				 struct sk_buff *skb)
+{
+	struct sk_buff *skb2 = skb;
+	struct iphdr *iph;
+	struct igmphdr *ih;
+	unsigned len;
+	unsigned offset;
+	int err;
+
+	BR_INPUT_SKB_CB(skb)->igmp = 0;
+	BR_INPUT_SKB_CB(skb)->mrouters_only = 0;
+
+	/* We treat OOM as packet loss for now. */
+	if (!pskb_may_pull(skb, sizeof(*iph)))
+		return -EINVAL;
+
+	iph = ip_hdr(skb);
+
+	if (iph->ihl < 5 || iph->version != 4)
+		return -EINVAL;
+
+	if (!pskb_may_pull(skb, ip_hdrlen(skb)))
+		return -EINVAL;
+
+	iph = ip_hdr(skb);
+
+	if (unlikely(ip_fast_csum((u8 *)iph, iph->ihl)))
+		return -EINVAL;
+
+	if (iph->protocol != IPPROTO_IGMP)
+		return 0;
+
+	len = ntohs(iph->tot_len);
+	if (skb->len < len || len < ip_hdrlen(skb))
+		return -EINVAL;
+
+	if (skb->len > len) {
+		skb2 = skb_clone(skb, GFP_ATOMIC);
+		if (!skb2)
+			return -ENOMEM;
+
+		err = pskb_trim_rcsum(skb2, len);
+		if (err)
+			return err;
+	}
+
+	len -= ip_hdrlen(skb2);
+	offset = skb_network_offset(skb2) + ip_hdrlen(skb2);
+	__skb_pull(skb2, offset);
+	skb_reset_transport_header(skb2);
+
+	err = -EINVAL;
+	if (!pskb_may_pull(skb2, sizeof(*ih)))
+		goto out;
+
+	iph = ip_hdr(skb2);
+
+	switch (skb2->ip_summed) {
+	case CHECKSUM_COMPLETE:
+		if (!csum_fold(skb2->csum))
+			break;
+		/* fall through */
+	case CHECKSUM_NONE:
+		skb2->csum = 0;
+		if (skb_checksum_complete(skb2))
+			return -EINVAL;
+	}
+
+	err = 0;
+
+	BR_INPUT_SKB_CB(skb)->igmp = 1;
+	ih = igmp_hdr(skb2);
+
+	switch (ih->type) {
+	case IGMP_HOST_MEMBERSHIP_REPORT:
+	case IGMPV2_HOST_MEMBERSHIP_REPORT:
+		BR_INPUT_SKB_CB(skb2)->mrouters_only = 1;
+		err = br_multicast_add_group(br, port, ih->group);
+		break;
+	case IGMPV3_HOST_MEMBERSHIP_REPORT:
+		err = br_multicast_igmp3_report(br, port, skb2);
+		break;
+	case IGMP_HOST_MEMBERSHIP_QUERY:
+		err = br_multicast_query(br, port, skb2);
+		break;
+	case IGMP_HOST_LEAVE_MESSAGE:
+		br_multicast_leave_group(br, port, ih->group);
+		break;
+	}
+
+out:
+	__skb_push(skb2, offset);
+	if (skb2 != skb)
+		kfree_skb(skb2);
+	return err;
+}
+
+int br_multicast_rcv(struct net_bridge *br, struct net_bridge_port *port,
+		     struct sk_buff *skb)
+{
+	if (br->multicast_disabled)
+		return 0;
+
+	switch (skb->protocol) {
+	case htons(ETH_P_IP):
+		return br_multicast_ipv4_rcv(br, port, skb);
+	}
+
+	return 0;
+}
+
+static void br_multicast_query_expired(unsigned long data)
+{
+	struct net_bridge *br = (void *)data;
+
+	spin_lock(&br->multicast_lock);
+	if (br->multicast_startup_queries_sent <
+	    br->multicast_startup_query_count)
+		br->multicast_startup_queries_sent++;
+
+	br_multicast_send_query(br, NULL, br->multicast_startup_queries_sent);
+
+	spin_unlock(&br->multicast_lock);
+}
+
+void br_multicast_init(struct net_bridge *br)
+{
+	br->hash_elasticity = 4;
+	br->hash_max = 512;
+
+	br->multicast_router = 1;
+	br->multicast_last_member_count = 2;
+	br->multicast_startup_query_count = 2;
+
+	br->multicast_last_member_interval = HZ;
+	br->multicast_query_response_interval = 10 * HZ;
+	br->multicast_startup_query_interval = 125 * HZ / 4;
+	br->multicast_query_interval = 125 * HZ;
+	br->multicast_querier_interval = 255 * HZ;
+	br->multicast_membership_interval = 260 * HZ;
+
+	spin_lock_init(&br->multicast_lock);
+	setup_timer(&br->multicast_router_timer,
+		    br_multicast_local_router_expired, 0);
+	setup_timer(&br->multicast_querier_timer,
+		    br_multicast_local_router_expired, 0);
+	setup_timer(&br->multicast_query_timer, br_multicast_query_expired,
+		    (unsigned long)br);
+}
+
+void br_multicast_open(struct net_bridge *br)
+{
+	br->multicast_startup_queries_sent = 0;
+
+	if (br->multicast_disabled)
+		return;
+
+	mod_timer(&br->multicast_query_timer, jiffies);
+}
+
+void br_multicast_stop(struct net_bridge *br)
+{
+	struct net_bridge_mdb_htable *mdb;
+	struct net_bridge_mdb_entry *mp;
+	struct hlist_node *p, *n;
+	u32 ver;
+	int i;
+
+	del_timer_sync(&br->multicast_router_timer);
+	del_timer_sync(&br->multicast_querier_timer);
+	del_timer_sync(&br->multicast_query_timer);
+
+	spin_lock_bh(&br->multicast_lock);
+	mdb = br->mdb;
+	if (!mdb)
+		goto out;
+
+	br->mdb = NULL;
+
+	ver = mdb->ver;
+	for (i = 0; i < mdb->max; i++) {
+		hlist_for_each_entry_safe(mp, p, n, &mdb->mhash[i],
+					  hlist[ver]) {
+			del_timer(&mp->timer);
+			del_timer(&mp->query_timer);
+			call_rcu_bh(&mp->rcu, br_multicast_free_group);
+		}
+	}
+
+	if (mdb->old) {
+		spin_unlock_bh(&br->multicast_lock);
+		synchronize_rcu_bh();
+		spin_lock_bh(&br->multicast_lock);
+		WARN_ON(mdb->old);
+	}
+
+	mdb->old = mdb;
+	call_rcu_bh(&mdb->rcu, br_mdb_free);
+
+out:
+	spin_unlock_bh(&br->multicast_lock);
+}
diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h
index fad5a2669d34..44345c9afdd3 100644
--- a/net/bridge/br_private.h
+++ b/net/bridge/br_private.h
@@ -57,6 +57,41 @@ struct net_bridge_fdb_entry
 	unsigned char			is_static;
 };
 
+struct net_bridge_port_group {
+	struct net_bridge_port		*port;
+	struct net_bridge_port_group	*next;
+	struct hlist_node		mglist;
+	struct rcu_head			rcu;
+	struct timer_list		timer;
+	struct timer_list		query_timer;
+	__be32				addr;
+	u32				queries_sent;
+};
+
+struct net_bridge_mdb_entry
+{
+	struct hlist_node		hlist[2];
+	struct hlist_node		mglist;
+	struct net_bridge		*br;
+	struct net_bridge_port_group	*ports;
+	struct rcu_head			rcu;
+	struct timer_list		timer;
+	struct timer_list		query_timer;
+	__be32				addr;
+	u32				queries_sent;
+};
+
+struct net_bridge_mdb_htable
+{
+	struct hlist_head		*mhash;
+	struct rcu_head			rcu;
+	struct net_bridge_mdb_htable	*old;
+	u32				size;
+	u32				max;
+	u32				secret;
+	u32				ver;
+};
+
 struct net_bridge_port
 {
 	struct net_bridge		*br;
@@ -84,6 +119,15 @@ struct net_bridge_port
 
 	unsigned long 			flags;
 #define BR_HAIRPIN_MODE		0x00000001
+
+#ifdef CONFIG_BRIDGE_IGMP_SNOOPING
+	u32				multicast_startup_queries_sent;
+	unsigned char			multicast_router;
+	struct timer_list		multicast_router_timer;
+	struct timer_list		multicast_query_timer;
+	struct hlist_head		mglist;
+	struct hlist_node		rlist;
+#endif
 };
 
 struct net_bridge
@@ -124,6 +168,35 @@ struct net_bridge
 	unsigned char			topology_change;
 	unsigned char			topology_change_detected;
 
+#ifdef CONFIG_BRIDGE_IGMP_SNOOPING
+	unsigned char			multicast_router;
+
+	u8				multicast_disabled:1;
+
+	u32				hash_elasticity;
+	u32				hash_max;
+
+	u32				multicast_last_member_count;
+	u32				multicast_startup_queries_sent;
+	u32				multicast_startup_query_count;
+
+	unsigned long			multicast_last_member_interval;
+	unsigned long			multicast_membership_interval;
+	unsigned long			multicast_querier_interval;
+	unsigned long			multicast_query_interval;
+	unsigned long			multicast_query_response_interval;
+	unsigned long			multicast_startup_query_interval;
+
+	spinlock_t			multicast_lock;
+	struct net_bridge_mdb_htable	*mdb;
+	struct hlist_head		router_list;
+	struct hlist_head		mglist;
+
+	struct timer_list		multicast_router_timer;
+	struct timer_list		multicast_querier_timer;
+	struct timer_list		multicast_query_timer;
+#endif
+
 	struct timer_list		hello_timer;
 	struct timer_list		tcn_timer;
 	struct timer_list		topology_change_timer;
@@ -133,6 +206,8 @@ struct net_bridge
 
 struct br_input_skb_cb {
 	struct net_device *brdev;
+	int igmp;
+	int mrouters_only;
 };
 
 #define BR_INPUT_SKB_CB(__skb)	((struct br_input_skb_cb *)(__skb)->cb)
@@ -204,6 +279,70 @@ extern struct sk_buff *br_handle_frame(struct net_bridge_port *p,
 extern int br_dev_ioctl(struct net_device *dev, struct ifreq *rq, int cmd);
 extern int br_ioctl_deviceless_stub(struct net *net, unsigned int cmd, void __user *arg);
 
+/* br_multicast.c */
+#ifdef CONFIG_BRIDGE_IGMP_SNOOPING
+extern int br_multicast_rcv(struct net_bridge *br,
+			    struct net_bridge_port *port,
+			    struct sk_buff *skb);
+extern struct net_bridge_mdb_entry *br_mdb_get(struct net_bridge *br,
+					       struct sk_buff *skb);
+extern void br_multicast_add_port(struct net_bridge_port *port);
+extern void br_multicast_del_port(struct net_bridge_port *port);
+extern void br_multicast_enable_port(struct net_bridge_port *port);
+extern void br_multicast_disable_port(struct net_bridge_port *port);
+extern void br_multicast_init(struct net_bridge *br);
+extern void br_multicast_open(struct net_bridge *br);
+extern void br_multicast_stop(struct net_bridge *br);
+#else
+static inline int br_multicast_rcv(struct net_bridge *br,
+				   struct net_bridge_port *port,
+				   struct sk_buff *skb)
+{
+	return 0;
+}
+
+static inline struct net_bridge_mdb_entry *br_mdb_get(struct net_bridge *br,
+						      struct sk_buff *skb)
+{
+	return NULL;
+}
+
+static inline void br_multicast_add_port(struct net_bridge_port *port)
+{
+}
+
+static inline void br_multicast_del_port(struct net_bridge_port *port)
+{
+}
+
+static inline void br_multicast_enable_port(struct net_bridge_port *port)
+{
+}
+
+static inline void br_multicast_disable_port(struct net_bridge_port *port)
+{
+}
+
+static inline void br_multicast_init(struct net_bridge *br)
+{
+}
+
+static inline void br_multicast_open(struct net_bridge *br)
+{
+}
+
+static inline void br_multicast_stop(struct net_bridge *br)
+{
+}
+#endif
+
+static inline bool br_multicast_is_router(struct net_bridge *br)
+{
+	return br->multicast_router == 2 ||
+	       (br->multicast_router == 1 &&
+		timer_pending(&br->multicast_router_timer));
+}
+
 /* br_netfilter.c */
 #ifdef CONFIG_BRIDGE_NETFILTER
 extern int br_netfilter_init(void);
-- 
cgit v1.2.2


From 5cb5e947d8f82011e3d7e7017e9d10db9a40ae9f Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Sat, 27 Feb 2010 19:41:46 +0000
Subject: bridge: Add multicast forwarding functions

This patch adds code to perform selective multicast forwarding.

We forward multicast traffic to a set of ports plus all multicast
router ports.  In order to avoid duplications among these two
sets of ports, we order all ports by the numeric value of their
pointers.  The two lists are then walked in lock-step to eliminate
duplicates.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/bridge/br_forward.c | 67 +++++++++++++++++++++++++++++++++++++++++++++++++
 net/bridge/br_private.h | 15 +++++++++++
 2 files changed, 82 insertions(+)

(limited to 'net')

diff --git a/net/bridge/br_forward.c b/net/bridge/br_forward.c
index 86cd0712d63e..d61e6f741125 100644
--- a/net/bridge/br_forward.c
+++ b/net/bridge/br_forward.c
@@ -186,3 +186,70 @@ void br_flood_forward(struct net_bridge *br, struct sk_buff *skb,
 {
 	br_flood(br, skb, skb2, __br_forward);
 }
+
+#ifdef CONFIG_BRIDGE_IGMP_SNOOPING
+/* called with rcu_read_lock */
+static void br_multicast_flood(struct net_bridge_mdb_entry *mdst,
+			       struct sk_buff *skb, struct sk_buff *skb0,
+			       void (*__packet_hook)(
+					const struct net_bridge_port *p,
+					struct sk_buff *skb))
+{
+	struct net_device *dev = BR_INPUT_SKB_CB(skb)->brdev;
+	struct net_bridge *br = netdev_priv(dev);
+	struct net_bridge_port *port;
+	struct net_bridge_port *lport, *rport;
+	struct net_bridge_port *prev;
+	struct net_bridge_port_group *p;
+	struct hlist_node *rp;
+
+	prev = NULL;
+
+	rp = br->router_list.first;
+	p = mdst ? mdst->ports : NULL;
+	while (p || rp) {
+		lport = p ? p->port : NULL;
+		rport = rp ? hlist_entry(rp, struct net_bridge_port, rlist) :
+			     NULL;
+
+		port = (unsigned long)lport > (unsigned long)rport ?
+		       lport : rport;
+
+		prev = maybe_deliver(prev, port, skb, __packet_hook);
+		if (IS_ERR(prev))
+			goto out;
+
+		if ((unsigned long)lport >= (unsigned long)port)
+			p = p->next;
+		if ((unsigned long)rport >= (unsigned long)port)
+			rp = rp->next;
+	}
+
+	if (!prev)
+		goto out;
+
+	if (skb0)
+		deliver_clone(prev, skb, __packet_hook);
+	else
+		__packet_hook(prev, skb);
+	return;
+
+out:
+	if (!skb0)
+		kfree_skb(skb);
+}
+
+/* called with rcu_read_lock */
+void br_multicast_deliver(struct net_bridge_mdb_entry *mdst,
+			  struct sk_buff *skb)
+{
+	br_multicast_flood(mdst, skb, NULL, __br_deliver);
+}
+
+/* called with rcu_read_lock */
+void br_multicast_forward(struct net_bridge_mdb_entry *mdst,
+			  struct sk_buff *skb, struct sk_buff *skb2)
+{
+	br_multicast_flood(mdst, skb, skb2, __br_forward);
+}
+#endif
diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h
index 44345c9afdd3..c85943c2b23f 100644
--- a/net/bridge/br_private.h
+++ b/net/bridge/br_private.h
@@ -293,6 +293,10 @@ extern void br_multicast_disable_port(struct net_bridge_port *port);
 extern void br_multicast_init(struct net_bridge *br);
 extern void br_multicast_open(struct net_bridge *br);
 extern void br_multicast_stop(struct net_bridge *br);
+extern void br_multicast_deliver(struct net_bridge_mdb_entry *mdst,
+				 struct sk_buff *skb);
+extern void br_multicast_forward(struct net_bridge_mdb_entry *mdst,
+				 struct sk_buff *skb, struct sk_buff *skb2);
 #else
 static inline int br_multicast_rcv(struct net_bridge *br,
 				   struct net_bridge_port *port,
@@ -334,6 +338,17 @@ static inline void br_multicast_open(struct net_bridge *br)
 static inline void br_multicast_stop(struct net_bridge *br)
 {
 }
+
+static inline void br_multicast_deliver(struct net_bridge_mdb_entry *mdst,
+					struct sk_buff *skb)
+{
+}
+
+static inline void br_multicast_forward(struct net_bridge_mdb_entry *mdst,
+					struct sk_buff *skb,
+					struct sk_buff *skb2)
+{
+}
 #endif
 
 static inline bool br_multicast_is_router(struct net_bridge *br)
-- 
cgit v1.2.2


From 3fe2d7c70b747d5d968f4e8fa210676d49d40059 Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Sun, 28 Feb 2010 00:49:38 -0800
Subject: bridge: Add multicast start/stop hooks

This patch hooks up the bridge start/stop and add/delete/disable
port functions to the new multicast module.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/bridge/br_device.c | 6 +++++-
 net/bridge/br_if.c     | 4 ++++
 net/bridge/br_stp.c    | 2 ++
 net/bridge/br_stp_if.c | 1 +
 4 files changed, 12 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c
index be356293caa1..91dffe7574d6 100644
--- a/net/bridge/br_device.c
+++ b/net/bridge/br_device.c
@@ -51,6 +51,7 @@ static int br_dev_open(struct net_device *dev)
 	br_features_recompute(br);
 	netif_start_queue(dev);
 	br_stp_enable_bridge(br);
+	br_multicast_open(br);
 
 	return 0;
 }
@@ -61,7 +62,10 @@ static void br_dev_set_multicast_list(struct net_device *dev)
 
 static int br_dev_stop(struct net_device *dev)
 {
-	br_stp_disable_bridge(netdev_priv(dev));
+	struct net_bridge *br = netdev_priv(dev);
+
+	br_stp_disable_bridge(br);
+	br_multicast_stop(br);
 
 	netif_stop_queue(dev);
 
diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c
index bc2b1badab88..b6a3872f5681 100644
--- a/net/bridge/br_if.c
+++ b/net/bridge/br_if.c
@@ -147,6 +147,8 @@ static void del_nbp(struct net_bridge_port *p)
 
 	rcu_assign_pointer(dev->br_port, NULL);
 
+	br_multicast_del_port(p);
+
 	kobject_uevent(&p->kobj, KOBJ_REMOVE);
 	kobject_del(&p->kobj);
 
@@ -207,6 +209,7 @@ static struct net_device *new_bridge_dev(struct net *net, const char *name)
 	br_netfilter_rtable_init(br);
 
 	br_stp_timer_init(br);
+	br_multicast_init(br);
 
 	return dev;
 }
@@ -258,6 +261,7 @@ static struct net_bridge_port *new_nbp(struct net_bridge *br,
 	br_init_port(p);
 	p->state = BR_STATE_DISABLED;
 	br_stp_port_timer_init(p);
+	br_multicast_add_port(p);
 
 	return p;
 }
diff --git a/net/bridge/br_stp.c b/net/bridge/br_stp.c
index fd3f8d6c0998..edcf14b560f6 100644
--- a/net/bridge/br_stp.c
+++ b/net/bridge/br_stp.c
@@ -386,6 +386,8 @@ static void br_make_forwarding(struct net_bridge_port *p)
 	else
 		p->state = BR_STATE_LEARNING;
 
+	br_multicast_enable_port(p);
+
 	br_log_state(p);
 
 	if (br->forward_delay != 0)
diff --git a/net/bridge/br_stp_if.c b/net/bridge/br_stp_if.c
index 9a52ac5b4525..d527119e9f54 100644
--- a/net/bridge/br_stp_if.c
+++ b/net/bridge/br_stp_if.c
@@ -108,6 +108,7 @@ void br_stp_disable_port(struct net_bridge_port *p)
 	del_timer(&p->hold_timer);
 
 	br_fdb_delete_by_port(br, p, 0);
+	br_multicast_disable_port(p);
 
 	br_configuration_update(br);
 
-- 
cgit v1.2.2


From c4fcb78cf8ae55667809e54e54872a21025dd073 Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Sat, 27 Feb 2010 19:41:48 +0000
Subject: bridge: Add multicast data-path hooks

This patch finally hooks up the multicast snooping module to the
data path.  In particular, all multicast packets passing through
the bridge are fed into the module and switched by it.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/bridge/br_device.c | 15 ++++++++++++---
 net/bridge/br_input.c  | 18 +++++++++++++++++-
 2 files changed, 29 insertions(+), 4 deletions(-)

(limited to 'net')

diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c
index 91dffe7574d6..eb7062d2e9e5 100644
--- a/net/bridge/br_device.c
+++ b/net/bridge/br_device.c
@@ -25,6 +25,7 @@ netdev_tx_t br_dev_xmit(struct sk_buff *skb, struct net_device *dev)
 	struct net_bridge *br = netdev_priv(dev);
 	const unsigned char *dest = skb->data;
 	struct net_bridge_fdb_entry *dst;
+	struct net_bridge_mdb_entry *mdst;
 
 	BR_INPUT_SKB_CB(skb)->brdev = dev;
 
@@ -34,13 +35,21 @@ netdev_tx_t br_dev_xmit(struct sk_buff *skb, struct net_device *dev)
 	skb_reset_mac_header(skb);
 	skb_pull(skb, ETH_HLEN);
 
-	if (dest[0] & 1)
-		br_flood_deliver(br, skb);
-	else if ((dst = __br_fdb_get(br, dest)) != NULL)
+	if (dest[0] & 1) {
+		if (br_multicast_rcv(br, NULL, skb))
+			goto out;
+
+		mdst = br_mdb_get(br, skb);
+		if (mdst || BR_INPUT_SKB_CB(skb)->mrouters_only)
+			br_multicast_deliver(mdst, skb);
+		else
+			br_flood_deliver(br, skb);
+	} else if ((dst = __br_fdb_get(br, dest)) != NULL)
 		br_deliver(dst->dst, skb);
 	else
 		br_flood_deliver(br, skb);
 
+out:
 	return NETDEV_TX_OK;
 }
 
diff --git a/net/bridge/br_input.c b/net/bridge/br_input.c
index edfdaef44296..53b39851d87d 100644
--- a/net/bridge/br_input.c
+++ b/net/bridge/br_input.c
@@ -41,6 +41,7 @@ int br_handle_frame_finish(struct sk_buff *skb)
 	struct net_bridge_port *p = rcu_dereference(skb->dev->br_port);
 	struct net_bridge *br;
 	struct net_bridge_fdb_entry *dst;
+	struct net_bridge_mdb_entry *mdst;
 	struct sk_buff *skb2;
 
 	if (!p || p->state == BR_STATE_DISABLED)
@@ -50,6 +51,10 @@ int br_handle_frame_finish(struct sk_buff *skb)
 	br = p->br;
 	br_fdb_update(br, p, eth_hdr(skb)->h_source);
 
+	if (is_multicast_ether_addr(dest) &&
+	    br_multicast_rcv(br, p, skb))
+		goto drop;
+
 	if (p->state == BR_STATE_LEARNING)
 		goto drop;
 
@@ -64,8 +69,19 @@ int br_handle_frame_finish(struct sk_buff *skb)
 	dst = NULL;
 
 	if (is_multicast_ether_addr(dest)) {
+		mdst = br_mdb_get(br, skb);
+		if (mdst || BR_INPUT_SKB_CB(skb)->mrouters_only) {
+			if ((mdst && !hlist_unhashed(&mdst->mglist)) ||
+			    br_multicast_is_router(br))
+				skb2 = skb;
+			br_multicast_forward(mdst, skb, skb2);
+			skb = NULL;
+			if (!skb2)
+				goto out;
+		} else
+			skb2 = skb;
+
 		br->dev->stats.multicast++;
-		skb2 = skb;
 	} else if ((dst = __br_fdb_get(br, dest)) && dst->is_local) {
 		skb2 = skb;
 		/* Do not forward the packet since it's local. */
-- 
cgit v1.2.2


From 0909e11758bd28848aeb6646e021ec1e031a3f0f Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Sat, 27 Feb 2010 19:41:49 +0000
Subject: bridge: Add multicast_router sysfs entries

This patch allows the user to forcibly enable/disable ports as
having multicast routers attached.  A port with a multicast router
will receive all multicast traffic.

The value 0 disables it completely.  The default is 1 which lets
the system automatically detect the presence of routers (currently
this is limited to picking up queries), and 2 means that the port
will always receive all multicast traffic.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/bridge/br_multicast.c | 105 +++++++++++++++++++++++++++++++++++++++-------
 net/bridge/br_private.h   |   3 ++
 net/bridge/br_sysfs_br.c  |  21 ++++++++++
 net/bridge/br_sysfs_if.c  |  18 ++++++++
 4 files changed, 133 insertions(+), 14 deletions(-)

(limited to 'net')

diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c
index 746b5a611aae..674224b6729d 100644
--- a/net/bridge/br_multicast.c
+++ b/net/bridge/br_multicast.c
@@ -746,12 +746,30 @@ static int br_multicast_igmp3_report(struct net_bridge *br,
 	return err;
 }
 
+static void br_multicast_add_router(struct net_bridge *br,
+				    struct net_bridge_port *port)
+{
+	struct hlist_node *p;
+	struct hlist_node **h;
+
+	for (h = &br->router_list.first;
+	     (p = *h) &&
+	     (unsigned long)container_of(p, struct net_bridge_port, rlist) >
+	     (unsigned long)port;
+	     h = &p->next)
+		;
+
+	port->rlist.pprev = h;
+	port->rlist.next = p;
+	rcu_assign_pointer(*h, &port->rlist);
+	if (p)
+		p->pprev = &port->rlist.next;
+}
+
 static void br_multicast_mark_router(struct net_bridge *br,
 				     struct net_bridge_port *port)
 {
 	unsigned long now = jiffies;
-	struct hlist_node *p;
-	struct hlist_node **h;
 
 	if (!port) {
 		if (br->multicast_router == 1)
@@ -766,18 +784,7 @@ static void br_multicast_mark_router(struct net_bridge *br,
 	if (!hlist_unhashed(&port->rlist))
 		goto timer;
 
-	for (h = &br->router_list.first;
-	     (p = *h) &&
-	     (unsigned long)container_of(p, struct net_bridge_port, rlist) >
-	     (unsigned long)port;
-	     h = &p->next)
-		;
-
-	port->rlist.pprev = h;
-	port->rlist.next = p;
-	rcu_assign_pointer(*h, &port->rlist);
-	if (p)
-		p->pprev = &port->rlist.next;
+	br_multicast_add_router(br, port);
 
 timer:
 	mod_timer(&port->multicast_router_timer,
@@ -1133,3 +1140,73 @@ void br_multicast_stop(struct net_bridge *br)
 out:
 	spin_unlock_bh(&br->multicast_lock);
 }
+
+int br_multicast_set_router(struct net_bridge *br, unsigned long val)
+{
+	int err = -ENOENT;
+
+	spin_lock_bh(&br->multicast_lock);
+	if (!netif_running(br->dev))
+		goto unlock;
+
+	switch (val) {
+	case 0:
+	case 2:
+		del_timer(&br->multicast_router_timer);
+		/* fall through */
+	case 1:
+		br->multicast_router = val;
+		err = 0;
+		break;
+
+	default:
+		err = -EINVAL;
+		break;
+	}
+
+unlock:
+	spin_unlock_bh(&br->multicast_lock);
+
+	return err;
+}
+
+int br_multicast_set_port_router(struct net_bridge_port *p, unsigned long val)
+{
+	struct net_bridge *br = p->br;
+	int err = -ENOENT;
+
+	spin_lock(&br->multicast_lock);
+	if (!netif_running(br->dev) || p->state == BR_STATE_DISABLED)
+		goto unlock;
+
+	switch (val) {
+	case 0:
+	case 1:
+	case 2:
+		p->multicast_router = val;
+		err = 0;
+
+		if (val < 2 && !hlist_unhashed(&p->rlist))
+			hlist_del_init_rcu(&p->rlist);
+
+		if (val == 1)
+			break;
+
+		del_timer(&p->multicast_router_timer);
+
+		if (val == 0)
+			break;
+
+		br_multicast_add_router(br, p);
+		break;
+
+	default:
+		err = -EINVAL;
+		break;
+	}
+
+unlock:
+	spin_unlock(&br->multicast_lock);
+
+	return err;
+}
diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h
index c85943c2b23f..dcdfafbe4b17 100644
--- a/net/bridge/br_private.h
+++ b/net/bridge/br_private.h
@@ -297,6 +297,9 @@ extern void br_multicast_deliver(struct net_bridge_mdb_entry *mdst,
 				 struct sk_buff *skb);
 extern void br_multicast_forward(struct net_bridge_mdb_entry *mdst,
 				 struct sk_buff *skb, struct sk_buff *skb2);
+extern int br_multicast_set_router(struct net_bridge *br, unsigned long val);
+extern int br_multicast_set_port_router(struct net_bridge_port *p,
+					unsigned long val);
 #else
 static inline int br_multicast_rcv(struct net_bridge *br,
 				   struct net_bridge_port *port,
diff --git a/net/bridge/br_sysfs_br.c b/net/bridge/br_sysfs_br.c
index bee4f300d0c8..cb742016db21 100644
--- a/net/bridge/br_sysfs_br.c
+++ b/net/bridge/br_sysfs_br.c
@@ -345,6 +345,24 @@ static ssize_t store_flush(struct device *d,
 }
 static DEVICE_ATTR(flush, S_IWUSR, NULL, store_flush);
 
+#ifdef CONFIG_BRIDGE_IGMP_SNOOPING
+static ssize_t show_multicast_router(struct device *d,
+				     struct device_attribute *attr, char *buf)
+{
+	struct net_bridge *br = to_bridge(d);
+	return sprintf(buf, "%d\n", br->multicast_router);
+}
+
+static ssize_t store_multicast_router(struct device *d,
+				      struct device_attribute *attr,
+				      const char *buf, size_t len)
+{
+	return store_bridge_parm(d, buf, len, br_multicast_set_router);
+}
+static DEVICE_ATTR(multicast_router, S_IRUGO | S_IWUSR, show_multicast_router,
+		   store_multicast_router);
+#endif
+
 static struct attribute *bridge_attrs[] = {
 	&dev_attr_forward_delay.attr,
 	&dev_attr_hello_time.attr,
@@ -364,6 +382,9 @@ static struct attribute *bridge_attrs[] = {
 	&dev_attr_gc_timer.attr,
 	&dev_attr_group_addr.attr,
 	&dev_attr_flush.attr,
+#ifdef CONFIG_BRIDGE_IGMP_SNOOPING
+	&dev_attr_multicast_router.attr,
+#endif
 	NULL
 };
 
diff --git a/net/bridge/br_sysfs_if.c b/net/bridge/br_sysfs_if.c
index 820643a3ba9c..696596cd3384 100644
--- a/net/bridge/br_sysfs_if.c
+++ b/net/bridge/br_sysfs_if.c
@@ -159,6 +159,21 @@ static ssize_t store_hairpin_mode(struct net_bridge_port *p, unsigned long v)
 static BRPORT_ATTR(hairpin_mode, S_IRUGO | S_IWUSR,
 		   show_hairpin_mode, store_hairpin_mode);
 
+#ifdef CONFIG_BRIDGE_IGMP_SNOOPING
+static ssize_t show_multicast_router(struct net_bridge_port *p, char *buf)
+{
+	return sprintf(buf, "%d\n", p->multicast_router);
+}
+
+static ssize_t store_multicast_router(struct net_bridge_port *p,
+				      unsigned long v)
+{
+	return br_multicast_set_port_router(p, v);
+}
+static BRPORT_ATTR(multicast_router, S_IRUGO | S_IWUSR, show_multicast_router,
+		   store_multicast_router);
+#endif
+
 static struct brport_attribute *brport_attrs[] = {
 	&brport_attr_path_cost,
 	&brport_attr_priority,
@@ -176,6 +191,9 @@ static struct brport_attribute *brport_attrs[] = {
 	&brport_attr_hold_timer,
 	&brport_attr_flush,
 	&brport_attr_hairpin_mode,
+#ifdef CONFIG_BRIDGE_IGMP_SNOOPING
+	&brport_attr_multicast_router,
+#endif
 	NULL
 };
 
-- 
cgit v1.2.2


From 561f1103a2b70de7e06e1e7fd072a5b142a4278c Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Sat, 27 Feb 2010 19:41:50 +0000
Subject: bridge: Add multicast_snooping sysfs toggle

This patch allows the user to disable IGMP snooping completely
through a sysfs toggle.  It also allows the user to reenable
snooping when it has been automatically disabled due to hash
collisions.  If the collisions have not been resolved however
the system will refuse to reenable snooping.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/bridge/br_multicast.c | 61 +++++++++++++++++++++++++++++++++++++++++++----
 net/bridge/br_private.h   |  1 +
 net/bridge/br_sysfs_br.c  | 18 ++++++++++++++
 3 files changed, 75 insertions(+), 5 deletions(-)

(limited to 'net')

diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c
index 674224b6729d..c7a1095ed84a 100644
--- a/net/bridge/br_multicast.c
+++ b/net/bridge/br_multicast.c
@@ -656,6 +656,15 @@ void br_multicast_del_port(struct net_bridge_port *port)
 	del_timer_sync(&port->multicast_router_timer);
 }
 
+static void __br_multicast_enable_port(struct net_bridge_port *port)
+{
+	port->multicast_startup_queries_sent = 0;
+
+	if (try_to_del_timer_sync(&port->multicast_query_timer) >= 0 ||
+	    del_timer(&port->multicast_query_timer))
+		mod_timer(&port->multicast_query_timer, jiffies);
+}
+
 void br_multicast_enable_port(struct net_bridge_port *port)
 {
 	struct net_bridge *br = port->br;
@@ -664,11 +673,7 @@ void br_multicast_enable_port(struct net_bridge_port *port)
 	if (br->multicast_disabled || !netif_running(br->dev))
 		goto out;
 
-	port->multicast_startup_queries_sent = 0;
-
-	if (try_to_del_timer_sync(&port->multicast_query_timer) >= 0 ||
-	    del_timer(&port->multicast_query_timer))
-		mod_timer(&port->multicast_query_timer, jiffies);
+	__br_multicast_enable_port(port);
 
 out:
 	spin_unlock(&br->multicast_lock);
@@ -1210,3 +1215,49 @@ unlock:
 
 	return err;
 }
+
+int br_multicast_toggle(struct net_bridge *br, unsigned long val)
+{
+	struct net_bridge_port *port;
+	int err = -ENOENT;
+
+	spin_lock(&br->multicast_lock);
+	if (!netif_running(br->dev))
+		goto unlock;
+
+	err = 0;
+	if (br->multicast_disabled == !val)
+		goto unlock;
+
+	br->multicast_disabled = !val;
+	if (br->multicast_disabled)
+		goto unlock;
+
+	if (br->mdb) {
+		if (br->mdb->old) {
+			err = -EEXIST;
+rollback:
+			br->multicast_disabled = !!val;
+			goto unlock;
+		}
+
+		err = br_mdb_rehash(&br->mdb, br->mdb->max,
+				    br->hash_elasticity);
+		if (err)
+			goto rollback;
+	}
+
+	br_multicast_open(br);
+	list_for_each_entry(port, &br->port_list, list) {
+		if (port->state == BR_STATE_DISABLED ||
+		    port->state == BR_STATE_BLOCKING)
+			continue;
+
+		__br_multicast_enable_port(port);
+	}
+
+unlock:
+	spin_unlock(&br->multicast_lock);
+
+	return err;
+}
diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h
index dcdfafbe4b17..bf162fa6b23b 100644
--- a/net/bridge/br_private.h
+++ b/net/bridge/br_private.h
@@ -300,6 +300,7 @@ extern void br_multicast_forward(struct net_bridge_mdb_entry *mdst,
 extern int br_multicast_set_router(struct net_bridge *br, unsigned long val);
 extern int br_multicast_set_port_router(struct net_bridge_port *p,
 					unsigned long val);
+extern int br_multicast_toggle(struct net_bridge *br, unsigned long val);
 #else
 static inline int br_multicast_rcv(struct net_bridge *br,
 				   struct net_bridge_port *port,
diff --git a/net/bridge/br_sysfs_br.c b/net/bridge/br_sysfs_br.c
index cb742016db21..0ab288332fc5 100644
--- a/net/bridge/br_sysfs_br.c
+++ b/net/bridge/br_sysfs_br.c
@@ -361,6 +361,23 @@ static ssize_t store_multicast_router(struct device *d,
 }
 static DEVICE_ATTR(multicast_router, S_IRUGO | S_IWUSR, show_multicast_router,
 		   store_multicast_router);
+
+static ssize_t show_multicast_snooping(struct device *d,
+				       struct device_attribute *attr,
+				       char *buf)
+{
+	struct net_bridge *br = to_bridge(d);
+	return sprintf(buf, "%d\n", !br->multicast_disabled);
+}
+
+static ssize_t store_multicast_snooping(struct device *d,
+					struct device_attribute *attr,
+					const char *buf, size_t len)
+{
+	return store_bridge_parm(d, buf, len, br_multicast_toggle);
+}
+static DEVICE_ATTR(multicast_snooping, S_IRUGO | S_IWUSR,
+		   show_multicast_snooping, store_multicast_snooping);
 #endif
 
 static struct attribute *bridge_attrs[] = {
@@ -384,6 +401,7 @@ static struct attribute *bridge_attrs[] = {
 	&dev_attr_flush.attr,
 #ifdef CONFIG_BRIDGE_IGMP_SNOOPING
 	&dev_attr_multicast_router.attr,
+	&dev_attr_multicast_snooping.attr,
 #endif
 	NULL
 };
-- 
cgit v1.2.2


From b195167fcf089dbdc650bb874084555035f07f98 Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Sat, 27 Feb 2010 19:41:51 +0000
Subject: bridge: Add hash elasticity/max sysfs entries

This patch allows the user to control the hash elasticity/max
parameters.  The elasticity setting does not take effect until
the next new multicast group is added.  At which point it is
checked and if after rehashing it still can't be satisfied then
snooping will be disabled.

The max setting on the other hand takes effect immediately.  It
must be a power of two and cannot be set to a value less than the
current number of multicast group entries.  This is the only way
to shrink the multicast hash.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/bridge/br_multicast.c | 41 +++++++++++++++++++++++++++++++++++++++++
 net/bridge/br_private.h   |  1 +
 net/bridge/br_sysfs_br.c  | 39 +++++++++++++++++++++++++++++++++++++++
 3 files changed, 81 insertions(+)

(limited to 'net')

diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c
index c7a1095ed84a..2559fb539836 100644
--- a/net/bridge/br_multicast.c
+++ b/net/bridge/br_multicast.c
@@ -15,6 +15,7 @@
 #include <linux/igmp.h>
 #include <linux/jhash.h>
 #include <linux/kernel.h>
+#include <linux/log2.h>
 #include <linux/netdevice.h>
 #include <linux/netfilter_bridge.h>
 #include <linux/random.h>
@@ -1261,3 +1262,43 @@ unlock:
 
 	return err;
 }
+
+int br_multicast_set_hash_max(struct net_bridge *br, unsigned long val)
+{
+	int err = -ENOENT;
+	u32 old;
+
+	spin_lock(&br->multicast_lock);
+	if (!netif_running(br->dev))
+		goto unlock;
+
+	err = -EINVAL;
+	if (!is_power_of_2(val))
+		goto unlock;
+	if (br->mdb && val < br->mdb->size)
+		goto unlock;
+
+	err = 0;
+
+	old = br->hash_max;
+	br->hash_max = val;
+
+	if (br->mdb) {
+		if (br->mdb->old) {
+			err = -EEXIST;
+rollback:
+			br->hash_max = old;
+			goto unlock;
+		}
+
+		err = br_mdb_rehash(&br->mdb, br->hash_max,
+				    br->hash_elasticity);
+		if (err)
+			goto rollback;
+	}
+
+unlock:
+	spin_unlock(&br->multicast_lock);
+
+	return err;
+}
diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h
index bf162fa6b23b..9191198c4f1b 100644
--- a/net/bridge/br_private.h
+++ b/net/bridge/br_private.h
@@ -301,6 +301,7 @@ extern int br_multicast_set_router(struct net_bridge *br, unsigned long val);
 extern int br_multicast_set_port_router(struct net_bridge_port *p,
 					unsigned long val);
 extern int br_multicast_toggle(struct net_bridge *br, unsigned long val);
+extern int br_multicast_set_hash_max(struct net_bridge *br, unsigned long val);
 #else
 static inline int br_multicast_rcv(struct net_bridge *br,
 				   struct net_bridge_port *port,
diff --git a/net/bridge/br_sysfs_br.c b/net/bridge/br_sysfs_br.c
index 0ab288332fc5..d2ee53b3ad7d 100644
--- a/net/bridge/br_sysfs_br.c
+++ b/net/bridge/br_sysfs_br.c
@@ -378,6 +378,43 @@ static ssize_t store_multicast_snooping(struct device *d,
 }
 static DEVICE_ATTR(multicast_snooping, S_IRUGO | S_IWUSR,
 		   show_multicast_snooping, store_multicast_snooping);
+
+static ssize_t show_hash_elasticity(struct device *d,
+				    struct device_attribute *attr, char *buf)
+{
+	struct net_bridge *br = to_bridge(d);
+	return sprintf(buf, "%u\n", br->hash_elasticity);
+}
+
+static int set_elasticity(struct net_bridge *br, unsigned long val)
+{
+	br->hash_elasticity = val;
+	return 0;
+}
+
+static ssize_t store_hash_elasticity(struct device *d,
+				     struct device_attribute *attr,
+				     const char *buf, size_t len)
+{
+	return store_bridge_parm(d, buf, len, set_elasticity);
+}
+static DEVICE_ATTR(hash_elasticity, S_IRUGO | S_IWUSR, show_hash_elasticity,
+		   store_hash_elasticity);
+
+static ssize_t show_hash_max(struct device *d, struct device_attribute *attr,
+			     char *buf)
+{
+	struct net_bridge *br = to_bridge(d);
+	return sprintf(buf, "%u\n", br->hash_max);
+}
+
+static ssize_t store_hash_max(struct device *d, struct device_attribute *attr,
+			      const char *buf, size_t len)
+{
+	return store_bridge_parm(d, buf, len, br_multicast_set_hash_max);
+}
+static DEVICE_ATTR(hash_max, S_IRUGO | S_IWUSR, show_hash_max,
+		   store_hash_max);
 #endif
 
 static struct attribute *bridge_attrs[] = {
@@ -402,6 +439,8 @@ static struct attribute *bridge_attrs[] = {
 #ifdef CONFIG_BRIDGE_IGMP_SNOOPING
 	&dev_attr_multicast_router.attr,
 	&dev_attr_multicast_snooping.attr,
+	&dev_attr_hash_elasticity.attr,
+	&dev_attr_hash_max.attr,
 #endif
 	NULL
 };
-- 
cgit v1.2.2


From d902eee43f1951b358d7347d9165c6af21cf7b1b Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Sat, 27 Feb 2010 19:41:52 +0000
Subject: bridge: Add multicast count/interval sysfs entries

This patch allows the user to the IGMP parameters related to the
snooping function of the bridge.  This includes various time
values and retransmission limits.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/bridge/br_sysfs_br.c | 203 +++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 203 insertions(+)

(limited to 'net')

diff --git a/net/bridge/br_sysfs_br.c b/net/bridge/br_sysfs_br.c
index d2ee53b3ad7d..dd321e39e621 100644
--- a/net/bridge/br_sysfs_br.c
+++ b/net/bridge/br_sysfs_br.c
@@ -415,6 +415,201 @@ static ssize_t store_hash_max(struct device *d, struct device_attribute *attr,
 }
 static DEVICE_ATTR(hash_max, S_IRUGO | S_IWUSR, show_hash_max,
 		   store_hash_max);
+
+static ssize_t show_multicast_last_member_count(struct device *d,
+						struct device_attribute *attr,
+						char *buf)
+{
+	struct net_bridge *br = to_bridge(d);
+	return sprintf(buf, "%u\n", br->multicast_last_member_count);
+}
+
+static int set_last_member_count(struct net_bridge *br, unsigned long val)
+{
+	br->multicast_last_member_count = val;
+	return 0;
+}
+
+static ssize_t store_multicast_last_member_count(struct device *d,
+						 struct device_attribute *attr,
+						 const char *buf, size_t len)
+{
+	return store_bridge_parm(d, buf, len, set_last_member_count);
+}
+static DEVICE_ATTR(multicast_last_member_count, S_IRUGO | S_IWUSR,
+		   show_multicast_last_member_count,
+		   store_multicast_last_member_count);
+
+static ssize_t show_multicast_startup_query_count(
+	struct device *d, struct device_attribute *attr, char *buf)
+{
+	struct net_bridge *br = to_bridge(d);
+	return sprintf(buf, "%u\n", br->multicast_startup_query_count);
+}
+
+static int set_startup_query_count(struct net_bridge *br, unsigned long val)
+{
+	br->multicast_startup_query_count = val;
+	return 0;
+}
+
+static ssize_t store_multicast_startup_query_count(
+	struct device *d, struct device_attribute *attr, const char *buf,
+	size_t len)
+{
+	return store_bridge_parm(d, buf, len, set_startup_query_count);
+}
+static DEVICE_ATTR(multicast_startup_query_count, S_IRUGO | S_IWUSR,
+		   show_multicast_startup_query_count,
+		   store_multicast_startup_query_count);
+
+static ssize_t show_multicast_last_member_interval(
+	struct device *d, struct device_attribute *attr, char *buf)
+{
+	struct net_bridge *br = to_bridge(d);
+	return sprintf(buf, "%lu\n",
+		       jiffies_to_clock_t(br->multicast_last_member_interval));
+}
+
+static int set_last_member_interval(struct net_bridge *br, unsigned long val)
+{
+	br->multicast_last_member_interval = clock_t_to_jiffies(val);
+	return 0;
+}
+
+static ssize_t store_multicast_last_member_interval(
+	struct device *d, struct device_attribute *attr, const char *buf,
+	size_t len)
+{
+	return store_bridge_parm(d, buf, len, set_last_member_interval);
+}
+static DEVICE_ATTR(multicast_last_member_interval, S_IRUGO | S_IWUSR,
+		   show_multicast_last_member_interval,
+		   store_multicast_last_member_interval);
+
+static ssize_t show_multicast_membership_interval(
+	struct device *d, struct device_attribute *attr, char *buf)
+{
+	struct net_bridge *br = to_bridge(d);
+	return sprintf(buf, "%lu\n",
+		       jiffies_to_clock_t(br->multicast_membership_interval));
+}
+
+static int set_membership_interval(struct net_bridge *br, unsigned long val)
+{
+	br->multicast_membership_interval = clock_t_to_jiffies(val);
+	return 0;
+}
+
+static ssize_t store_multicast_membership_interval(
+	struct device *d, struct device_attribute *attr, const char *buf,
+	size_t len)
+{
+	return store_bridge_parm(d, buf, len, set_membership_interval);
+}
+static DEVICE_ATTR(multicast_membership_interval, S_IRUGO | S_IWUSR,
+		   show_multicast_membership_interval,
+		   store_multicast_membership_interval);
+
+static ssize_t show_multicast_querier_interval(struct device *d,
+					       struct device_attribute *attr,
+					       char *buf)
+{
+	struct net_bridge *br = to_bridge(d);
+	return sprintf(buf, "%lu\n",
+		       jiffies_to_clock_t(br->multicast_querier_interval));
+}
+
+static int set_querier_interval(struct net_bridge *br, unsigned long val)
+{
+	br->multicast_querier_interval = clock_t_to_jiffies(val);
+	return 0;
+}
+
+static ssize_t store_multicast_querier_interval(struct device *d,
+						struct device_attribute *attr,
+						const char *buf, size_t len)
+{
+	return store_bridge_parm(d, buf, len, set_querier_interval);
+}
+static DEVICE_ATTR(multicast_querier_interval, S_IRUGO | S_IWUSR,
+		   show_multicast_querier_interval,
+		   store_multicast_querier_interval);
+
+static ssize_t show_multicast_query_interval(struct device *d,
+					     struct device_attribute *attr,
+					     char *buf)
+{
+	struct net_bridge *br = to_bridge(d);
+	return sprintf(buf, "%lu\n",
+		       jiffies_to_clock_t(br->multicast_query_interval));
+}
+
+static int set_query_interval(struct net_bridge *br, unsigned long val)
+{
+	br->multicast_query_interval = clock_t_to_jiffies(val);
+	return 0;
+}
+
+static ssize_t store_multicast_query_interval(struct device *d,
+					      struct device_attribute *attr,
+					      const char *buf, size_t len)
+{
+	return store_bridge_parm(d, buf, len, set_query_interval);
+}
+static DEVICE_ATTR(multicast_query_interval, S_IRUGO | S_IWUSR,
+		   show_multicast_query_interval,
+		   store_multicast_query_interval);
+
+static ssize_t show_multicast_query_response_interval(
+	struct device *d, struct device_attribute *attr, char *buf)
+{
+	struct net_bridge *br = to_bridge(d);
+	return sprintf(
+		buf, "%lu\n",
+		jiffies_to_clock_t(br->multicast_query_response_interval));
+}
+
+static int set_query_response_interval(struct net_bridge *br, unsigned long val)
+{
+	br->multicast_query_response_interval = clock_t_to_jiffies(val);
+	return 0;
+}
+
+static ssize_t store_multicast_query_response_interval(
+	struct device *d, struct device_attribute *attr, const char *buf,
+	size_t len)
+{
+	return store_bridge_parm(d, buf, len, set_query_response_interval);
+}
+static DEVICE_ATTR(multicast_query_response_interval, S_IRUGO | S_IWUSR,
+		   show_multicast_query_response_interval,
+		   store_multicast_query_response_interval);
+
+static ssize_t show_multicast_startup_query_interval(
+	struct device *d, struct device_attribute *attr, char *buf)
+{
+	struct net_bridge *br = to_bridge(d);
+	return sprintf(
+		buf, "%lu\n",
+		jiffies_to_clock_t(br->multicast_startup_query_interval));
+}
+
+static int set_startup_query_interval(struct net_bridge *br, unsigned long val)
+{
+	br->multicast_startup_query_interval = clock_t_to_jiffies(val);
+	return 0;
+}
+
+static ssize_t store_multicast_startup_query_interval(
+	struct device *d, struct device_attribute *attr, const char *buf,
+	size_t len)
+{
+	return store_bridge_parm(d, buf, len, set_startup_query_interval);
+}
+static DEVICE_ATTR(multicast_startup_query_interval, S_IRUGO | S_IWUSR,
+		   show_multicast_startup_query_interval,
+		   store_multicast_startup_query_interval);
 #endif
 
 static struct attribute *bridge_attrs[] = {
@@ -441,6 +636,14 @@ static struct attribute *bridge_attrs[] = {
 	&dev_attr_multicast_snooping.attr,
 	&dev_attr_hash_elasticity.attr,
 	&dev_attr_hash_max.attr,
+	&dev_attr_multicast_last_member_count.attr,
+	&dev_attr_multicast_startup_query_count.attr,
+	&dev_attr_multicast_last_member_interval.attr,
+	&dev_attr_multicast_membership_interval.attr,
+	&dev_attr_multicast_querier_interval.attr,
+	&dev_attr_multicast_query_interval.attr,
+	&dev_attr_multicast_query_response_interval.attr,
+	&dev_attr_multicast_startup_query_interval.attr,
 #endif
 	NULL
 };
-- 
cgit v1.2.2


From 024c378f0b719d43b01b875caefa19f2612e103a Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Sun, 28 Feb 2010 00:51:42 -0800
Subject: bridge: Make IGMP snooping depend upon BRIDGE.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/bridge/Kconfig | 1 +
 1 file changed, 1 insertion(+)

(limited to 'net')

diff --git a/net/bridge/Kconfig b/net/bridge/Kconfig
index 78dd5497210a..19a6b9629c51 100644
--- a/net/bridge/Kconfig
+++ b/net/bridge/Kconfig
@@ -34,6 +34,7 @@ config BRIDGE
 
 config BRIDGE_IGMP_SNOOPING
 	bool "IGMP snooping"
+	depends on BRIDGE
 	default y
 	---help---
 	  If you say Y here, then the Ethernet bridge will be able selectively
-- 
cgit v1.2.2


From cf0aa4e07c32b0c211c24742aa015c0e7a135293 Mon Sep 17 00:00:00 2001
From: Masatake YAMATO <yamato@redhat.com>
Date: Sat, 27 Feb 2010 19:45:37 +0000
Subject: netlink: Adding inode field to /proc/net/netlink

The Inode field in /proc/net/{tcp,udp,packet,raw,...} is useful to know the types of
file descriptors associated to a process. Actually lsof utility uses the field.
Unfortunately, unlike /proc/net/{tcp,udp,packet,raw,...}, /proc/net/netlink doesn't have the field.
This patch adds the field to /proc/net/netlink.

Signed-off-by: Masatake YAMATO <yamato@redhat.com>
Acked-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/netlink/af_netlink.c | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

(limited to 'net')

diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index 4c5972ba8c78..320d0423a240 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -1978,12 +1978,12 @@ static int netlink_seq_show(struct seq_file *seq, void *v)
 	if (v == SEQ_START_TOKEN)
 		seq_puts(seq,
 			 "sk       Eth Pid    Groups   "
-			 "Rmem     Wmem     Dump     Locks     Drops\n");
+			 "Rmem     Wmem     Dump     Locks     Drops     Inode\n");
 	else {
 		struct sock *s = v;
 		struct netlink_sock *nlk = nlk_sk(s);
 
-		seq_printf(seq, "%p %-3d %-6d %08x %-8d %-8d %p %-8d %-8d\n",
+		seq_printf(seq, "%p %-3d %-6d %08x %-8d %-8d %p %-8d %-8d %-8lu\n",
 			   s,
 			   s->sk_protocol,
 			   nlk->pid,
@@ -1992,7 +1992,8 @@ static int netlink_seq_show(struct seq_file *seq, void *v)
 			   sk_wmem_alloc_get(s),
 			   nlk->cb,
 			   atomic_read(&s->sk_refcnt),
-			   atomic_read(&s->sk_drops)
+			   atomic_read(&s->sk_drops),
+			   sock_i_ino(s)
 			);
 
 	}
-- 
cgit v1.2.2


From 9675478bbafed08848bf8d7e28400d5e46330b23 Mon Sep 17 00:00:00 2001
From: Jeff Garzik <jeff@garzik.org>
Date: Fri, 26 Feb 2010 21:43:38 +0000
Subject: ethtool: do not set some flags, if others failed

NETIF_F_NTUPLE flag setting introduced a bug:  non-ntuple flags
like LRO may be successfully set, before ioctl(2) returns failure
to userspace.

The set-flags operation should be all-or-none, rather than leaving
things in an inconsistent state prior to reporting failure to
userspace.

Signed-off-by: Jeff Garzik <jgarzik@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/ethtool.c | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

(limited to 'net')

diff --git a/net/core/ethtool.c b/net/core/ethtool.c
index 31b1eddc1b84..0f2f82185ec4 100644
--- a/net/core/ethtool.c
+++ b/net/core/ethtool.c
@@ -135,21 +135,23 @@ u32 ethtool_op_get_flags(struct net_device *dev)
 int ethtool_op_set_flags(struct net_device *dev, u32 data)
 {
 	const struct ethtool_ops *ops = dev->ethtool_ops;
+	unsigned long features = dev->features;
 
 	if (data & ETH_FLAG_LRO)
-		dev->features |= NETIF_F_LRO;
+		features |= NETIF_F_LRO;
 	else
-		dev->features &= ~NETIF_F_LRO;
+		features &= ~NETIF_F_LRO;
 
 	if (data & ETH_FLAG_NTUPLE) {
 		if (!ops->set_rx_ntuple)
 			return -EOPNOTSUPP;
-		dev->features |= NETIF_F_NTUPLE;
+		features |= NETIF_F_NTUPLE;
 	} else {
 		/* safe to clear regardless */
-		dev->features &= ~NETIF_F_NTUPLE;
+		features &= ~NETIF_F_NTUPLE;
 	}
 
+	dev->features = features;
 	return 0;
 }
 
-- 
cgit v1.2.2


From f5822754ea006563e1bf0a1f43faaad49c0d8bb2 Mon Sep 17 00:00:00 2001
From: "J. Bruce Fields" <bfields@citi.umich.edu>
Date: Sun, 28 Feb 2010 16:32:51 -0500
Subject: Revert "sunrpc: fix peername failed on closed listener"

This reverts commit b292cf9ce70d221c3f04ff62db5ab13d9a249ca8.  The
commit that it attempted to patch up,
b0401d725334a94d57335790b8ac2404144748ee, was fundamentally wrong, and
will also be reverted.

Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
---
 net/sunrpc/svc_xprt.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c
index 09838300dac4..818c4c365b28 100644
--- a/net/sunrpc/svc_xprt.c
+++ b/net/sunrpc/svc_xprt.c
@@ -706,8 +706,7 @@ int svc_recv(struct svc_rqst *rqstp, long timeout)
 	spin_unlock_bh(&pool->sp_lock);
 
 	len = 0;
-	if (test_bit(XPT_LISTENER, &xprt->xpt_flags) &&
-	    !test_bit(XPT_CLOSE, &xprt->xpt_flags)) {
+	if (test_bit(XPT_LISTENER, &xprt->xpt_flags)) {
 		struct svc_xprt *newxpt;
 		newxpt = xprt->xpt_ops->xpo_accept(xprt);
 		if (newxpt) {
-- 
cgit v1.2.2


From 1b644b6e6f6160ae35ce4b52c2ca89ed3e356e18 Mon Sep 17 00:00:00 2001
From: "J. Bruce Fields" <bfields@citi.umich.edu>
Date: Sun, 28 Feb 2010 16:33:31 -0500
Subject: Revert "sunrpc: move the close processing after do recvfrom method"

This reverts commit b0401d725334a94d57335790b8ac2404144748ee, which
moved svc_delete_xprt() outside of XPT_BUSY, and allowed it to be called
after svc_xpt_recived(), removing its last reference and destroying it
after it had already been queued for future processing.

Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
---
 net/sunrpc/svc_xprt.c | 12 +++++-------
 1 file changed, 5 insertions(+), 7 deletions(-)

(limited to 'net')

diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c
index 818c4c365b28..8f0f1fb3dc52 100644
--- a/net/sunrpc/svc_xprt.c
+++ b/net/sunrpc/svc_xprt.c
@@ -706,7 +706,10 @@ int svc_recv(struct svc_rqst *rqstp, long timeout)
 	spin_unlock_bh(&pool->sp_lock);
 
 	len = 0;
-	if (test_bit(XPT_LISTENER, &xprt->xpt_flags)) {
+	if (test_bit(XPT_CLOSE, &xprt->xpt_flags)) {
+		dprintk("svc_recv: found XPT_CLOSE\n");
+		svc_delete_xprt(xprt);
+	} else if (test_bit(XPT_LISTENER, &xprt->xpt_flags)) {
 		struct svc_xprt *newxpt;
 		newxpt = xprt->xpt_ops->xpo_accept(xprt);
 		if (newxpt) {
@@ -732,7 +735,7 @@ int svc_recv(struct svc_rqst *rqstp, long timeout)
 			svc_xprt_received(newxpt);
 		}
 		svc_xprt_received(xprt);
-	} else if (!test_bit(XPT_CLOSE, &xprt->xpt_flags)) {
+	} else {
 		dprintk("svc: server %p, pool %u, transport %p, inuse=%d\n",
 			rqstp, pool->sp_id, xprt,
 			atomic_read(&xprt->xpt_ref.refcount));
@@ -745,11 +748,6 @@ int svc_recv(struct svc_rqst *rqstp, long timeout)
 		dprintk("svc: got len=%d\n", len);
 	}
 
-	if (test_bit(XPT_CLOSE, &xprt->xpt_flags)) {
-		dprintk("svc_recv: found XPT_CLOSE\n");
-		svc_delete_xprt(xprt);
-	}
-
 	/* No data, incomplete (TCP) read, or accept() */
 	if (len == 0 || len == -EAGAIN) {
 		rqstp->rq_res.len = 0;
-- 
cgit v1.2.2


From 76dadd76c265a0cdb5a76aa4eef03fcc9639b388 Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Sun, 28 Feb 2010 01:20:36 +0000
Subject: scm: Only support SCM_RIGHTS on unix domain sockets.

We use scm_send and scm_recv on both unix domain and
netlink sockets, but only unix domain sockets support
everything required for file descriptor passing,
so error if someone attempts to pass file descriptors
over netlink sockets.

Cc: stable@kernel.org
Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/scm.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'net')

diff --git a/net/core/scm.c b/net/core/scm.c
index b7ba91b074b3..9b264634acfd 100644
--- a/net/core/scm.c
+++ b/net/core/scm.c
@@ -156,6 +156,8 @@ int __scm_send(struct socket *sock, struct msghdr *msg, struct scm_cookie *p)
 		switch (cmsg->cmsg_type)
 		{
 		case SCM_RIGHTS:
+			if (!sock->ops || sock->ops->family != PF_UNIX)
+				goto error;
 			err=scm_fp_copy(cmsg, &p->fp);
 			if (err<0)
 				goto error;
-- 
cgit v1.2.2


From 301e99ce4a2f42a317129230fd42e6cd874c64b0 Mon Sep 17 00:00:00 2001
From: Neil Brown <neilb@suse.de>
Date: Sun, 28 Feb 2010 22:01:05 -0500
Subject: nfsd: ensure sockets are closed on error

One the changes in commit d7979ae4a "svc: Move close processing to a
single place" is:

  err_delete:
-       svc_delete_socket(svsk);
+       set_bit(SK_CLOSE, &svsk->sk_flags);
        return -EAGAIN;

This is insufficient. The recvfrom methods must always call
svc_xprt_received on completion so that the socket gets re-queued if
there is any more work to do.  This particular path did not make that
call because it actually destroyed the svsk, making requeue pointless.
When the svc_delete_socket was change to just set a bit, we should have
added a call to svc_xprt_received,

This is the problem that b0401d7253 attempted to fix, incorrectly.

Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
---
 net/sunrpc/svcsock.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'net')

diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c
index 9e093910c479..a29f259204e6 100644
--- a/net/sunrpc/svcsock.c
+++ b/net/sunrpc/svcsock.c
@@ -968,6 +968,7 @@ static int svc_tcp_recv_record(struct svc_sock *svsk, struct svc_rqst *rqstp)
 	return len;
  err_delete:
 	set_bit(XPT_CLOSE, &svsk->sk_xprt.xpt_flags);
+	svc_xprt_received(&svsk->sk_xprt);
  err_again:
 	return -EAGAIN;
 }
-- 
cgit v1.2.2


From 85b3526932645269f67fee3583fa2aa500ab4f34 Mon Sep 17 00:00:00 2001
From: Sridhar Samudrala <sri@us.ibm.com>
Date: Mon, 1 Mar 2010 09:53:04 +0000
Subject: bridge: Fix build error when IGMP_SNOOPING is not enabled

Fix the following build error when IGMP_SNOOPING is not enabled.
In file included from net/bridge/br.c:24:
net/bridge/br_private.h: In function 'br_multicast_is_router':
net/bridge/br_private.h:361: error: 'struct net_bridge' has no member named 'multicast_router'
net/bridge/br_private.h:362: error: 'struct net_bridge' has no member named 'multicast_router'
net/bridge/br_private.h:363: error: 'struct net_bridge' has no member named 'multicast_router_timer'

Signed-off-by: Sridhar Samudrala <sri@us.ibm.com>
Acked-by: Randy Dunlap <randy.dunlap@oracle.com>
Acked-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/bridge/br_private.h | 14 +++++++++-----
 1 file changed, 9 insertions(+), 5 deletions(-)

(limited to 'net')

diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h
index 9191198c4f1b..1cf2cef78584 100644
--- a/net/bridge/br_private.h
+++ b/net/bridge/br_private.h
@@ -302,6 +302,13 @@ extern int br_multicast_set_port_router(struct net_bridge_port *p,
 					unsigned long val);
 extern int br_multicast_toggle(struct net_bridge *br, unsigned long val);
 extern int br_multicast_set_hash_max(struct net_bridge *br, unsigned long val);
+
+static inline bool br_multicast_is_router(struct net_bridge *br)
+{
+	return br->multicast_router == 2 ||
+	       (br->multicast_router == 1 &&
+		timer_pending(&br->multicast_router_timer));
+}
 #else
 static inline int br_multicast_rcv(struct net_bridge *br,
 				   struct net_bridge_port *port,
@@ -354,14 +361,11 @@ static inline void br_multicast_forward(struct net_bridge_mdb_entry *mdst,
 					struct sk_buff *skb2)
 {
 }
-#endif
-
 static inline bool br_multicast_is_router(struct net_bridge *br)
 {
-	return br->multicast_router == 2 ||
-	       (br->multicast_router == 1 &&
-		timer_pending(&br->multicast_router_timer));
+	return 0;
 }
+#endif
 
 /* br_netfilter.c */
 #ifdef CONFIG_BRIDGE_NETFILTER
-- 
cgit v1.2.2


From 9fcfe0c83c3b04a759cde6b8c5f961237f17808b Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Tue, 2 Mar 2010 13:06:21 -0500
Subject: SUNRPC: Handle EINVAL error returns from the TCP connect operation

This can, for instance, happen if the user specifies a link local IPv6
address.

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
Cc: stable@kernel.org
---
 net/sunrpc/xprtsock.c | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'net')

diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
index 721bafd95a0f..712412982cee 100644
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c
@@ -1912,6 +1912,11 @@ static void xs_tcp_setup_socket(struct rpc_xprt *xprt,
 	case -EALREADY:
 		xprt_clear_connecting(xprt);
 		return;
+	case -EINVAL:
+		/* Happens, for instance, if the user specified a link
+		 * local IPv6 address without a scope-id.
+		 */
+		goto out;
 	}
 out_eagain:
 	status = -EAGAIN;
-- 
cgit v1.2.2


From 3082a2b7b1af1b1508c1c3fa589566064f926f40 Mon Sep 17 00:00:00 2001
From: Matthew Garrett <mjg@redhat.com>
Date: Tue, 16 Feb 2010 16:36:25 -0500
Subject: rfkill: Add support for KEY_RFKILL

Add support for handling KEY_RFKILL in the rfkill input module. This
simply toggles the state of all rfkill devices. The comment in rfkill.h
is also updated to reflect that RFKILL_TYPE_ALL may be used inside the
kernel.

Signed-off-by: Matthew Garrett <mjg@redhat.com>
Acked-by: Marcel Holtmann <marcel@holtmann.org>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/rfkill/input.c | 8 ++++++++
 1 file changed, 8 insertions(+)

(limited to 'net')

diff --git a/net/rfkill/input.c b/net/rfkill/input.c
index a7295ad5f9cb..3713d7ecab96 100644
--- a/net/rfkill/input.c
+++ b/net/rfkill/input.c
@@ -212,6 +212,9 @@ static void rfkill_event(struct input_handle *handle, unsigned int type,
 		case KEY_WIMAX:
 			rfkill_schedule_toggle(RFKILL_TYPE_WIMAX);
 			break;
+		case KEY_RFKILL:
+			rfkill_schedule_toggle(RFKILL_TYPE_ALL);
+			break;
 		}
 	} else if (type == EV_SW && code == SW_RFKILL_ALL)
 		rfkill_schedule_evsw_rfkillall(data);
@@ -294,6 +297,11 @@ static const struct input_device_id rfkill_ids[] = {
 		.evbit = { BIT_MASK(EV_KEY) },
 		.keybit = { [BIT_WORD(KEY_WIMAX)] = BIT_MASK(KEY_WIMAX) },
 	},
+	{
+		.flags = INPUT_DEVICE_ID_MATCH_EVBIT | INPUT_DEVICE_ID_MATCH_KEYBIT,
+		.evbit = { BIT_MASK(EV_KEY) },
+		.keybit = { [BIT_WORD(KEY_RFKILL)] = BIT_MASK(KEY_RFKILL) },
+	},
 	{
 		.flags = INPUT_DEVICE_ID_MATCH_EVBIT | INPUT_DEVICE_ID_MATCH_SWBIT,
 		.evbit = { BIT(EV_SW) },
-- 
cgit v1.2.2


From 9c87ba6734422034fccb938da1039ed63da1395c Mon Sep 17 00:00:00 2001
From: Jouni Malinen <j@w1.fi>
Date: Sun, 28 Feb 2010 12:13:46 +0200
Subject: mac80211: Fix reassociation processing (within ESS roaming)

Commit e1dd33f60ced091114e4aacf141e0d03b88d3e13 changed cfg80211 to
allow association commands while in associated state to enable support
for roaming within an ESS. However, this was not enough to resolve all
cases with mac80211 which needs some additional handling of the
reassociation case to clear internal state with the BSS that was in use
previously.

This patch makes ieee80211_mgd_assoc() accept a valid reassociation
command and clean the association state with the previous BSS. This
fixes roaming between BSSes in an ESS when using wpa_supplicant with
-Dnl80211.

Signed-off-by: Jouni Malinen <j@w1.fi>
Cc: stable@kernel.org
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/mlme.c | 16 ++++++++++++++--
 1 file changed, 14 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index 41812a15eea0..5a268761e4c5 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -1893,8 +1893,20 @@ int ieee80211_mgd_assoc(struct ieee80211_sub_if_data *sdata,
 
 	mutex_lock(&ifmgd->mtx);
 	if (ifmgd->associated) {
-		mutex_unlock(&ifmgd->mtx);
-		return -EALREADY;
+		if (!req->prev_bssid ||
+		    memcmp(req->prev_bssid, ifmgd->associated->bssid,
+			   ETH_ALEN)) {
+			/*
+			 * We are already associated and the request was not a
+			 * reassociation request from the current BSS, so
+			 * reject it.
+			 */
+			mutex_unlock(&ifmgd->mtx);
+			return -EALREADY;
+		}
+
+		/* Trying to reassociate - clear previous association state */
+		ieee80211_set_disassoc(sdata);
 	}
 	mutex_unlock(&ifmgd->mtx);
 
-- 
cgit v1.2.2


From ccdb357ccb77cc4cbe4f7abee9efd19957f0753a Mon Sep 17 00:00:00 2001
From: "J. Bruce Fields" <bfields@citi.umich.edu>
Date: Tue, 2 Mar 2010 15:49:21 -0500
Subject: svcrpc: treat uid's as unsigned

We should consistently treat uid's as unsigned--it's confusing when
the display of uid's in the cache contents isn't consistent with their
representation in upcalls.

Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
---
 net/sunrpc/svcauth_unix.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/sunrpc/svcauth_unix.c b/net/sunrpc/svcauth_unix.c
index 97f0e9e12024..afdcb0459a83 100644
--- a/net/sunrpc/svcauth_unix.c
+++ b/net/sunrpc/svcauth_unix.c
@@ -624,7 +624,7 @@ static int unix_gid_show(struct seq_file *m,
 	else
 		glen = 0;
 
-	seq_printf(m, "%d %d:", ug->uid, glen);
+	seq_printf(m, "%u %d:", ug->uid, glen);
 	for (i = 0; i < glen; i++)
 		seq_printf(m, " %d", GROUP_AT(ug->gi, i));
 	seq_printf(m, "\n");
-- 
cgit v1.2.2


From 87c1e12b5eeb7b30b4b41291bef8e0b41fc3dde9 Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Tue, 2 Mar 2010 02:51:56 +0000
Subject: ipsec: Fix bogus bundle flowi

When I merged the bundle creation code, I introduced a bogus
flowi value in the bundle.  Instead of getting from the caller,
it was instead set to the flow in the route object, which is
totally different.

The end result is that the bundles we created never match, and
we instead end up with an ever growing bundle list.

Thanks to Jamal for find this problem.

Reported-by: Jamal Hadi Salim <hadi@cyberus.ca>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Acked-by: Steffen Klassert <steffen.klassert@secunet.com>
Acked-by: Jamal Hadi Salim <hadi@cyberus.ca>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/xfrm4_policy.c | 5 +++--
 net/ipv6/xfrm6_policy.c | 3 ++-
 net/xfrm/xfrm_policy.c  | 7 ++++---
 3 files changed, 9 insertions(+), 6 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c
index 67107d63c1cd..e4a1483fba77 100644
--- a/net/ipv4/xfrm4_policy.c
+++ b/net/ipv4/xfrm4_policy.c
@@ -91,11 +91,12 @@ static int xfrm4_init_path(struct xfrm_dst *path, struct dst_entry *dst,
 	return 0;
 }
 
-static int xfrm4_fill_dst(struct xfrm_dst *xdst, struct net_device *dev)
+static int xfrm4_fill_dst(struct xfrm_dst *xdst, struct net_device *dev,
+			  struct flowi *fl)
 {
 	struct rtable *rt = (struct rtable *)xdst->route;
 
-	xdst->u.rt.fl = rt->fl;
+	xdst->u.rt.fl = *fl;
 
 	xdst->u.dst.dev = dev;
 	dev_hold(dev);
diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c
index dbdc696f5fc5..ae181651c75a 100644
--- a/net/ipv6/xfrm6_policy.c
+++ b/net/ipv6/xfrm6_policy.c
@@ -116,7 +116,8 @@ static int xfrm6_init_path(struct xfrm_dst *path, struct dst_entry *dst,
 	return 0;
 }
 
-static int xfrm6_fill_dst(struct xfrm_dst *xdst, struct net_device *dev)
+static int xfrm6_fill_dst(struct xfrm_dst *xdst, struct net_device *dev,
+			  struct flowi *fl)
 {
 	struct rt6_info *rt = (struct rt6_info*)xdst->route;
 
diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index 34a5ef8316e7..843e066649cb 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -1372,7 +1372,8 @@ static inline int xfrm_init_path(struct xfrm_dst *path, struct dst_entry *dst,
 	return err;
 }
 
-static inline int xfrm_fill_dst(struct xfrm_dst *xdst, struct net_device *dev)
+static inline int xfrm_fill_dst(struct xfrm_dst *xdst, struct net_device *dev,
+				struct flowi *fl)
 {
 	struct xfrm_policy_afinfo *afinfo =
 		xfrm_policy_get_afinfo(xdst->u.dst.ops->family);
@@ -1381,7 +1382,7 @@ static inline int xfrm_fill_dst(struct xfrm_dst *xdst, struct net_device *dev)
 	if (!afinfo)
 		return -EINVAL;
 
-	err = afinfo->fill_dst(xdst, dev);
+	err = afinfo->fill_dst(xdst, dev, fl);
 
 	xfrm_policy_put_afinfo(afinfo);
 
@@ -1486,7 +1487,7 @@ static struct dst_entry *xfrm_bundle_create(struct xfrm_policy *policy,
 	for (dst_prev = dst0; dst_prev != dst; dst_prev = dst_prev->child) {
 		struct xfrm_dst *xdst = (struct xfrm_dst *)dst_prev;
 
-		err = xfrm_fill_dst(xdst, dev);
+		err = xfrm_fill_dst(xdst, dev, fl);
 		if (err)
 			goto free_dst;
 
-- 
cgit v1.2.2


From 1162563f82b434e3099c9e6c1bbdba846d792f0d Mon Sep 17 00:00:00 2001
From: Jiri Pirko <jpirko@redhat.com>
Date: Tue, 2 Mar 2010 20:40:01 +0000
Subject: af_packet: move strict addr_len check right before
 dev_[mc/unicast]_[add/del]

My previous patch 914c8ad2d18b62ad1420f518c0cab0b0b90ab308 incorrectly changed
the length check in packet_mc_add to be more strict. The problem is that
userspace is not filling this field (and it stays zeroed) in case of setting
PACKET_MR_PROMISC or PACKET_MR_ALLMULTI. So move the strict check to the point
in path where the addr_len must be set correctly.

Signed-off-by: Jiri Pirko <jpirko@redhat.com>
Reported-by: Pavel Roskin <proski@gnu.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/packet/af_packet.c | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index 031a5e6fb4aa..1612d417d10c 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -1688,6 +1688,8 @@ static int packet_dev_mc(struct net_device *dev, struct packet_mclist *i,
 {
 	switch (i->type) {
 	case PACKET_MR_MULTICAST:
+		if (i->alen != dev->addr_len)
+			return -EINVAL;
 		if (what > 0)
 			return dev_mc_add(dev, i->addr, i->alen, 0);
 		else
@@ -1700,6 +1702,8 @@ static int packet_dev_mc(struct net_device *dev, struct packet_mclist *i,
 		return dev_set_allmulti(dev, what);
 		break;
 	case PACKET_MR_UNICAST:
+		if (i->alen != dev->addr_len)
+			return -EINVAL;
 		if (what > 0)
 			return dev_unicast_add(dev, i->addr);
 		else
@@ -1734,7 +1738,7 @@ static int packet_mc_add(struct sock *sk, struct packet_mreq_max *mreq)
 		goto done;
 
 	err = -EINVAL;
-	if (mreq->mr_alen != dev->addr_len)
+	if (mreq->mr_alen > dev->addr_len)
 		goto done;
 
 	err = -ENOBUFS;
-- 
cgit v1.2.2


From d4612cb86ed8db8956b6b19435f8a30de6c67ffe Mon Sep 17 00:00:00 2001
From: Marcel Holtmann <marcel@holtmann.org>
Date: Tue, 2 Mar 2010 15:48:23 +0000
Subject: Bluetooth: Use single_open() for inquiry cache within debugfs

The inquiry cache information in debugfs should be using seq_file support
and not allocating memory on the stack for the string. Since the usage of
these information is really seldom, using single_open() for it is good
enough.

Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/bluetooth/hci_sysfs.c | 41 ++++++++++++++++++++---------------------
 1 file changed, 20 insertions(+), 21 deletions(-)

(limited to 'net')

diff --git a/net/bluetooth/hci_sysfs.c b/net/bluetooth/hci_sysfs.c
index 1a79a6c7e30e..cafb55b0cea5 100644
--- a/net/bluetooth/hci_sysfs.c
+++ b/net/bluetooth/hci_sysfs.c
@@ -3,6 +3,7 @@
 #include <linux/kernel.h>
 #include <linux/init.h>
 #include <linux/debugfs.h>
+#include <linux/seq_file.h>
 
 #include <net/bluetooth/bluetooth.h>
 #include <net/bluetooth/hci_core.h>
@@ -405,20 +406,11 @@ static struct device_type bt_host = {
 	.release = bt_host_release,
 };
 
-static int inquiry_cache_open(struct inode *inode, struct file *file)
-{
-	file->private_data = inode->i_private;
-	return 0;
-}
-
-static ssize_t inquiry_cache_read(struct file *file, char __user *userbuf,
-						size_t count, loff_t *ppos)
+static int inquiry_cache_show(struct seq_file *f, void *p)
 {
-	struct hci_dev *hdev = file->private_data;
+	struct hci_dev *hdev = f->private;
 	struct inquiry_cache *cache = &hdev->inq_cache;
 	struct inquiry_entry *e;
-	char buf[4096];
-	int n = 0;
 
 	hci_dev_lock_bh(hdev);
 
@@ -426,23 +418,30 @@ static ssize_t inquiry_cache_read(struct file *file, char __user *userbuf,
 		struct inquiry_data *data = &e->data;
 		bdaddr_t bdaddr;
 		baswap(&bdaddr, &data->bdaddr);
-		n += sprintf(buf + n, "%s %d %d %d 0x%.2x%.2x%.2x 0x%.4x %d %d %u\n",
-				batostr(&bdaddr),
-				data->pscan_rep_mode, data->pscan_period_mode,
-				data->pscan_mode, data->dev_class[2],
-				data->dev_class[1], data->dev_class[0],
-				__le16_to_cpu(data->clock_offset),
-				data->rssi, data->ssp_mode, e->timestamp);
+		seq_printf(f, "%s %d %d %d 0x%.2x%.2x%.2x 0x%.4x %d %d %u\n",
+			   batostr(&bdaddr),
+			   data->pscan_rep_mode, data->pscan_period_mode,
+			   data->pscan_mode, data->dev_class[2],
+			   data->dev_class[1], data->dev_class[0],
+			   __le16_to_cpu(data->clock_offset),
+			   data->rssi, data->ssp_mode, e->timestamp);
 	}
 
 	hci_dev_unlock_bh(hdev);
 
-	return simple_read_from_buffer(userbuf, count, ppos, buf, n);
+	return 0;
+}
+
+static int inquiry_cache_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, inquiry_cache_show, inode->i_private);
 }
 
 static const struct file_operations inquiry_cache_fops = {
-	.open = inquiry_cache_open,
-	.read = inquiry_cache_read,
+	.open		= inquiry_cache_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= single_release,
 };
 
 int hci_register_sysfs(struct hci_dev *hdev)
-- 
cgit v1.2.2


From 1cd4efddc4512ccbd9fe317f688f361605ca0c88 Mon Sep 17 00:00:00 2001
From: Randy Dunlap <randy.dunlap@oracle.com>
Date: Wed, 3 Mar 2010 01:23:22 -0800
Subject: bridge: depends on INET

br_multicast calls ip_send_check(), so it should depend on INET.

built-in:
br_multicast.c:(.text+0x88cf4): undefined reference to `ip_send_check'

or modular:
ERROR: "ip_send_check" [net/bridge/bridge.ko] undefined!

Signed-off-by: Randy Dunlap <randy.dunlap@oracle.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/bridge/Kconfig | 1 +
 1 file changed, 1 insertion(+)

(limited to 'net')

diff --git a/net/bridge/Kconfig b/net/bridge/Kconfig
index 19a6b9629c51..d115d5cea5b6 100644
--- a/net/bridge/Kconfig
+++ b/net/bridge/Kconfig
@@ -35,6 +35,7 @@ config BRIDGE
 config BRIDGE_IGMP_SNOOPING
 	bool "IGMP snooping"
 	depends on BRIDGE
+	depends on INET
 	default y
 	---help---
 	  If you say Y here, then the Ethernet bridge will be able selectively
-- 
cgit v1.2.2


From fc7bed8c802de3b064a56a43ec8574aa8d412de3 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Mon, 25 Jan 2010 18:30:38 -0500
Subject: Don't bother with d_genocide in rpc_pipe

kill_litter_super() from ->kill_sb() will take care of the junk
---
 net/sunrpc/rpc_pipe.c | 9 ++-------
 1 file changed, 2 insertions(+), 7 deletions(-)

(limited to 'net')

diff --git a/net/sunrpc/rpc_pipe.c b/net/sunrpc/rpc_pipe.c
index 49278f830367..9ac493fcc873 100644
--- a/net/sunrpc/rpc_pipe.c
+++ b/net/sunrpc/rpc_pipe.c
@@ -999,19 +999,14 @@ rpc_fill_super(struct super_block *sb, void *data, int silent)
 	inode = rpc_get_inode(sb, S_IFDIR | 0755);
 	if (!inode)
 		return -ENOMEM;
-	root = d_alloc_root(inode);
+	sb->s_root = root = d_alloc_root(inode);
 	if (!root) {
 		iput(inode);
 		return -ENOMEM;
 	}
 	if (rpc_populate(root, files, RPCAUTH_lockd, RPCAUTH_RootEOF, NULL))
-		goto out;
-	sb->s_root = root;
+		return -ENOMEM;
 	return 0;
-out:
-	d_genocide(root);
-	dput(root);
-	return -ENOMEM;
 }
 
 static int
-- 
cgit v1.2.2


From 4fa004373133ece3d9b1c0a7e243b0e53760b165 Mon Sep 17 00:00:00 2001
From: Sujith <Sujith.Manoharan@atheros.com>
Date: Mon, 1 Mar 2010 14:42:57 +0530
Subject: mac80211: Fix HT rate control configuration

Handling HT configuration changes involved setting the channel
with the new HT parameters and then issuing a rate_update()
notification to the driver.

This behavior changed after the off-channel changes. Now, the channel
is not updated with the new HT params in enable_ht() - instead, it
is now done when the scan work terminates. This results in the driver
depending on stale information, defaulting to non-HT mode always.

Fix this by passing the new channel type to the driver.

Cc: stable@kernel.org
Signed-off-by: Sujith <Sujith.Manoharan@atheros.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/mlme.c | 3 ++-
 net/mac80211/rate.h | 5 +++--
 2 files changed, 5 insertions(+), 3 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index 5a268761e4c5..0ab284c32135 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -177,7 +177,8 @@ static u32 ieee80211_enable_ht(struct ieee80211_sub_if_data *sdata,
 		sta = sta_info_get(sdata, bssid);
 		if (sta)
 			rate_control_rate_update(local, sband, sta,
-						 IEEE80211_RC_HT_CHANGED);
+						 IEEE80211_RC_HT_CHANGED,
+						 local->oper_channel_type);
 		rcu_read_unlock();
         }
 
diff --git a/net/mac80211/rate.h b/net/mac80211/rate.h
index b6108bca96d4..065a96190e32 100644
--- a/net/mac80211/rate.h
+++ b/net/mac80211/rate.h
@@ -66,7 +66,8 @@ static inline void rate_control_rate_init(struct sta_info *sta)
 
 static inline void rate_control_rate_update(struct ieee80211_local *local,
 				    struct ieee80211_supported_band *sband,
-				    struct sta_info *sta, u32 changed)
+				    struct sta_info *sta, u32 changed,
+				    enum nl80211_channel_type oper_chan_type)
 {
 	struct rate_control_ref *ref = local->rate_ctrl;
 	struct ieee80211_sta *ista = &sta->sta;
@@ -74,7 +75,7 @@ static inline void rate_control_rate_update(struct ieee80211_local *local,
 
 	if (ref && ref->ops->rate_update)
 		ref->ops->rate_update(ref->priv, sband, ista,
-				      priv_sta, changed);
+				      priv_sta, changed, oper_chan_type);
 }
 
 static inline void *rate_control_alloc_sta(struct rate_control_ref *ref,
-- 
cgit v1.2.2


From 122e4519cd5c224d4b8e681d368132b643e28f60 Mon Sep 17 00:00:00 2001
From: stephen hemminger <shemminger@vyatta.com>
Date: Tue, 2 Mar 2010 13:32:44 +0000
Subject: IPv6: addrconf dad timer unnecessary bh_disable

Timer code runs in bottom half, so there is no need for
using _bh form of locking.  Also check if device is not ready
to avoid race with address that is no longer active.

Signed-off-by: Stephen Hemminger <shemminger@vyatta.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/addrconf.c | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

(limited to 'net')

diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 88fd8c5877ee..e6cba9c45c6c 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -2850,9 +2850,9 @@ static void addrconf_dad_timer(unsigned long data)
 	struct inet6_dev *idev = ifp->idev;
 	struct in6_addr mcaddr;
 
-	read_lock_bh(&idev->lock);
-	if (idev->dead) {
-		read_unlock_bh(&idev->lock);
+	read_lock(&idev->lock);
+	if (idev->dead || !(idev->if_flags & IF_READY)) {
+		read_unlock(&idev->lock);
 		goto out;
 	}
 
@@ -2864,7 +2864,7 @@ static void addrconf_dad_timer(unsigned long data)
 
 		ifp->flags &= ~(IFA_F_TENTATIVE|IFA_F_OPTIMISTIC|IFA_F_DADFAILED);
 		spin_unlock(&ifp->lock);
-		read_unlock_bh(&idev->lock);
+		read_unlock(&idev->lock);
 
 		addrconf_dad_completed(ifp);
 
@@ -2874,7 +2874,7 @@ static void addrconf_dad_timer(unsigned long data)
 	ifp->probes--;
 	addrconf_mod_timer(ifp, AC_DAD, ifp->idev->nd_parms->retrans_time);
 	spin_unlock(&ifp->lock);
-	read_unlock_bh(&idev->lock);
+	read_unlock(&idev->lock);
 
 	/* send a neighbour solicitation for our addr */
 	addrconf_addr_solict_mult(&ifp->addr, &mcaddr);
-- 
cgit v1.2.2


From 5b2a19539c5f59c5a038d213ede723f0245d97cf Mon Sep 17 00:00:00 2001
From: stephen hemminger <shemminger@vyatta.com>
Date: Tue, 2 Mar 2010 13:32:45 +0000
Subject: IPv6: addrconf timer race

The Router Solicitation timer races with device state changes
because it doesn't lock the device. Use local variable to avoid
one repeated dereference.

Signed-off-by: Stephen Hemminger <shemminger@vyatta.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/addrconf.c | 28 +++++++++++++++-------------
 1 file changed, 15 insertions(+), 13 deletions(-)

(limited to 'net')

diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index e6cba9c45c6c..5f582f385abb 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -2739,28 +2739,29 @@ static int addrconf_ifdown(struct net_device *dev, int how)
 static void addrconf_rs_timer(unsigned long data)
 {
 	struct inet6_ifaddr *ifp = (struct inet6_ifaddr *) data;
+	struct inet6_dev *idev = ifp->idev;
 
-	if (ifp->idev->cnf.forwarding)
+	read_lock(&idev->lock);
+	if (idev->dead || !(idev->if_flags & IF_READY))
 		goto out;
 
-	if (ifp->idev->if_flags & IF_RA_RCVD) {
-		/*
-		 *	Announcement received after solicitation
-		 *	was sent
-		 */
+	if (idev->cnf.forwarding)
+		goto out;
+
+	/* Announcement received after solicitation was sent */
+	if (idev->if_flags & IF_RA_RCVD)
 		goto out;
-	}
 
 	spin_lock(&ifp->lock);
-	if (ifp->probes++ < ifp->idev->cnf.rtr_solicits) {
+	if (ifp->probes++ < idev->cnf.rtr_solicits) {
 		/* The wait after the last probe can be shorter */
 		addrconf_mod_timer(ifp, AC_RS,
-				   (ifp->probes == ifp->idev->cnf.rtr_solicits) ?
-				   ifp->idev->cnf.rtr_solicit_delay :
-				   ifp->idev->cnf.rtr_solicit_interval);
+				   (ifp->probes == idev->cnf.rtr_solicits) ?
+				   idev->cnf.rtr_solicit_delay :
+				   idev->cnf.rtr_solicit_interval);
 		spin_unlock(&ifp->lock);
 
-		ndisc_send_rs(ifp->idev->dev, &ifp->addr, &in6addr_linklocal_allrouters);
+		ndisc_send_rs(idev->dev, &ifp->addr, &in6addr_linklocal_allrouters);
 	} else {
 		spin_unlock(&ifp->lock);
 		/*
@@ -2768,10 +2769,11 @@ static void addrconf_rs_timer(unsigned long data)
 		 * assumption any longer.
 		 */
 		printk(KERN_DEBUG "%s: no IPv6 routers present\n",
-		       ifp->idev->dev->name);
+		       idev->dev->name);
 	}
 
 out:
+	read_unlock(&idev->lock);
 	in6_ifa_put(ifp);
 }
 
-- 
cgit v1.2.2


From 84e8b803f1e16f3a2b8b80f80a63fa2f2f8a9be6 Mon Sep 17 00:00:00 2001
From: stephen hemminger <shemminger@vyatta.com>
Date: Tue, 2 Mar 2010 13:32:46 +0000
Subject: IPv6: addrconf notify when address is unavailable

My recent change in net-next to retain permanent addresses caused regression.
Device refcount would not go to zero when device was unregistered because
left over anycast reference would hold ipv6 dev reference which would hold
device references...

The correct procedure is to call notify chain when address is no longer
available for use.  When interface comes back DAD timer will notify
back that address is available.

Also, link local addresses should be purged when interface is brought
down. The address might be changed.

Signed-off-by: Stephen Hemminger <shemminger@vyatta.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/addrconf.c | 46 +++++++++++++++++++++++++++++-----------------
 1 file changed, 29 insertions(+), 17 deletions(-)

(limited to 'net')

diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 5f582f385abb..7a4bf7671285 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -2649,11 +2649,11 @@ static int addrconf_ifdown(struct net_device *dev, int how)
 		write_lock_bh(&addrconf_hash_lock);
 		while ((ifa = *bifa) != NULL) {
 			if (ifa->idev == idev &&
-			    (how || !(ifa->flags&IFA_F_PERMANENT))) {
+			    (how || !(ifa->flags&IFA_F_PERMANENT) ||
+			     ipv6_addr_type(&ifa->addr) & IPV6_ADDR_LINKLOCAL)) {
 				*bifa = ifa->lst_next;
 				ifa->lst_next = NULL;
-				addrconf_del_timer(ifa);
-				in6_ifa_put(ifa);
+				__in6_ifa_put(ifa);
 				continue;
 			}
 			bifa = &ifa->lst_next;
@@ -2691,28 +2691,40 @@ static int addrconf_ifdown(struct net_device *dev, int how)
 #endif
 	bifa = &idev->addr_list;
 	while ((ifa = *bifa) != NULL) {
-		if (how == 0 && (ifa->flags&IFA_F_PERMANENT)) {
-			/* Retain permanent address on admin down */
+		addrconf_del_timer(ifa);
+
+		/* If just doing link down, and address is permanent
+		   and not link-local, then retain it. */
+		if (how == 0 &&
+		    (ifa->flags&IFA_F_PERMANENT) &&
+		    !(ipv6_addr_type(&ifa->addr) & IPV6_ADDR_LINKLOCAL)) {
 			bifa = &ifa->if_next;
 
-			/* Restart DAD if needed when link comes back up */
-			if ( !((dev->flags&(IFF_NOARP|IFF_LOOPBACK)) ||
-			       idev->cnf.accept_dad <= 0 ||
-			       (ifa->flags & IFA_F_NODAD)))
-				ifa->flags |= IFA_F_TENTATIVE;
+			/* If not doing DAD on this address, just keep it. */
+			if ((dev->flags&(IFF_NOARP|IFF_LOOPBACK)) ||
+			    idev->cnf.accept_dad <= 0 ||
+			    (ifa->flags & IFA_F_NODAD))
+				continue;
+
+			/* If it was tentative already, no need to notify */
+			if (ifa->flags & IFA_F_TENTATIVE)
+				continue;
+
+			/* Flag it for later restoration when link comes up */
+			ifa->flags |= IFA_F_TENTATIVE;
+			in6_ifa_hold(ifa);
 		} else {
 			*bifa = ifa->if_next;
 			ifa->if_next = NULL;
-
 			ifa->dead = 1;
-			write_unlock_bh(&idev->lock);
+		}
+		write_unlock_bh(&idev->lock);
 
-			__ipv6_ifa_notify(RTM_DELADDR, ifa);
-			atomic_notifier_call_chain(&inet6addr_chain, NETDEV_DOWN, ifa);
-			in6_ifa_put(ifa);
+		__ipv6_ifa_notify(RTM_DELADDR, ifa);
+		atomic_notifier_call_chain(&inet6addr_chain, NETDEV_DOWN, ifa);
+		in6_ifa_put(ifa);
 
-			write_lock_bh(&idev->lock);
-		}
+		write_lock_bh(&idev->lock);
 	}
 	write_unlock_bh(&idev->lock);
 
-- 
cgit v1.2.2


From 8f37ada5b5f6bfb4d251a7f510f249cb855b77b3 Mon Sep 17 00:00:00 2001
From: stephen hemminger <shemminger@vyatta.com>
Date: Wed, 3 Mar 2010 08:19:59 +0000
Subject: IPv6: fix race between cleanup and add/delete address

This solves a potential race problem during the cleanup process.
The issue is that addrconf_ifdown() needs to traverse address list,
but then drop lock to call the notifier. The version in -next
could get confused if add/delete happened during this window.
Original code (2.6.32 and earlier) was okay because all addresses
were always deleted.

Signed-off-by: Stephen Hemminger <shemminger@vyatta.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/addrconf.c | 18 +++++++++++++-----
 1 file changed, 13 insertions(+), 5 deletions(-)

(limited to 'net')

diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 7a4bf7671285..6cf3ee14ace3 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -2615,7 +2615,7 @@ static void addrconf_bonding_change(struct net_device *dev, unsigned long event)
 static int addrconf_ifdown(struct net_device *dev, int how)
 {
 	struct inet6_dev *idev;
-	struct inet6_ifaddr *ifa, **bifa;
+	struct inet6_ifaddr *ifa, *keep_list, **bifa;
 	struct net *net = dev_net(dev);
 	int i;
 
@@ -2689,8 +2689,12 @@ static int addrconf_ifdown(struct net_device *dev, int how)
 		write_lock_bh(&idev->lock);
 	}
 #endif
-	bifa = &idev->addr_list;
-	while ((ifa = *bifa) != NULL) {
+	keep_list = NULL;
+	bifa = &keep_list;
+	while ((ifa = idev->addr_list) != NULL) {
+		idev->addr_list = ifa->if_next;
+		ifa->if_next = NULL;
+
 		addrconf_del_timer(ifa);
 
 		/* If just doing link down, and address is permanent
@@ -2698,6 +2702,9 @@ static int addrconf_ifdown(struct net_device *dev, int how)
 		if (how == 0 &&
 		    (ifa->flags&IFA_F_PERMANENT) &&
 		    !(ipv6_addr_type(&ifa->addr) & IPV6_ADDR_LINKLOCAL)) {
+
+			/* Move to holding list */
+			*bifa = ifa;
 			bifa = &ifa->if_next;
 
 			/* If not doing DAD on this address, just keep it. */
@@ -2714,8 +2721,6 @@ static int addrconf_ifdown(struct net_device *dev, int how)
 			ifa->flags |= IFA_F_TENTATIVE;
 			in6_ifa_hold(ifa);
 		} else {
-			*bifa = ifa->if_next;
-			ifa->if_next = NULL;
 			ifa->dead = 1;
 		}
 		write_unlock_bh(&idev->lock);
@@ -2726,6 +2731,9 @@ static int addrconf_ifdown(struct net_device *dev, int how)
 
 		write_lock_bh(&idev->lock);
 	}
+
+	idev->addr_list = keep_list;
+
 	write_unlock_bh(&idev->lock);
 
 	/* Step 5: Discard multicast list */
-- 
cgit v1.2.2


From 6d55cb91a0020ac0d78edcad61efd6c8cf5785a3 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Timo=20Ter=C3=A4s?= <timo.teras@iki.fi>
Date: Wed, 3 Mar 2010 04:01:13 +0000
Subject: gre: fix hard header destination address checking

ipgre_header() can be called with zero daddr when the gre device is
configured as multipoint tunnel and still has the NOARP flag set (which is
typically cleared by the userspace arp daemon).  If the NOARP packets are
not dropped, ipgre_tunnel_xmit() will take rt->rt_gateway (= NBMA IP) and
use that for route look up (and may lead to bogus xfrm acquires).

The multicast address check is removed as sending to multicast group should
be ok.  In fact, if gre device has a multicast address as destination
ipgre_header is always called with multicast address.

Signed-off-by: Timo Teras <timo.teras@iki.fi>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/ip_gre.c | 7 ++-----
 1 file changed, 2 insertions(+), 5 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index c0c5274d0271..f47c9f76754b 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -1144,12 +1144,9 @@ static int ipgre_header(struct sk_buff *skb, struct net_device *dev,
 
 	if (saddr)
 		memcpy(&iph->saddr, saddr, 4);
-
-	if (daddr) {
+	if (daddr)
 		memcpy(&iph->daddr, daddr, 4);
-		return t->hlen;
-	}
-	if (iph->daddr && !ipv4_is_multicast(iph->daddr))
+	if (iph->daddr)
 		return t->hlen;
 
 	return -t->hlen;
-- 
cgit v1.2.2


From d0021b252eaf65ca07ed14f0d66425dd9ccab9a6 Mon Sep 17 00:00:00 2001
From: Neil Horman <nhorman@tuxdriver.com>
Date: Wed, 3 Mar 2010 08:31:23 +0000
Subject: tipc: Fix oops on send prior to entering networked mode (v3)

Fix TIPC to disallow sending to remote addresses prior to entering NET_MODE

user programs can oops the kernel by sending datagrams via AF_TIPC prior to
entering networked mode.  The following backtrace has been observed:

ID: 13459  TASK: ffff810014640040  CPU: 0   COMMAND: "tipc-client"
[exception RIP: tipc_node_select_next_hop+90]
RIP: ffffffff8869d3c3  RSP: ffff81002d9a5ab8  RFLAGS: 00010202
RAX: 0000000000000001  RBX: 0000000000000001  RCX: 0000000000000001
RDX: 0000000000000000  RSI: 0000000000000001  RDI: 0000000001001001
RBP: 0000000001001001   R8: 0074736575716552   R9: 0000000000000000
R10: ffff81003fbd0680  R11: 00000000000000c8  R12: 0000000000000008
R13: 0000000000000001  R14: 0000000000000001  R15: ffff810015c6ca00
ORIG_RAX: ffffffffffffffff  CS: 0010  SS: 0018
RIP: 0000003cbd8d49a3  RSP: 00007fffc84e0be8  RFLAGS: 00010206
RAX: 000000000000002c  RBX: ffffffff8005d116  RCX: 0000000000000000
RDX: 0000000000000008  RSI: 00007fffc84e0c00  RDI: 0000000000000003
RBP: 0000000000000000   R8: 00007fffc84e0c10   R9: 0000000000000010
R10: 0000000000000000  R11: 0000000000000246  R12: 0000000000000000
R13: 00007fffc84e0d10  R14: 0000000000000000  R15: 00007fffc84e0c30
ORIG_RAX: 000000000000002c  CS: 0033  SS: 002b

What happens is that, when the tipc module in inserted it enters a standalone
node mode in which communication to its own address is allowed <0.0.0> but not
to other addresses, since the appropriate data structures have not been
allocated yet (specifically the tipc_net pointer).  There is nothing stopping a
client from trying to send such a message however, and if that happens, we
attempt to dereference tipc_net.zones while the pointer is still NULL, and
explode.  The fix is pretty straightforward.  Since these oopses all arise from
the dereference of global pointers prior to their assignment to allocated
values, and since these allocations are small (about 2k total), lets convert
these pointers to static arrays of the appropriate size.  All the accesses to
these bits consider 0/NULL to be a non match when searching, so all the lookups
still work properly, and there is no longer a chance of a bad dererence
anywhere.  As a bonus, this lets us eliminate the setup/teardown routines for
those pointers, and elimnates the need to preform any locking around them to
prevent access while their being allocated/freed.

I've updated the tipc_net structure to behave this way to fix the exact reported
problem, and also fixed up the tipc_bearers and media_list arrays to fix an
obvious simmilar problem that arises from issuing tipc-config commands to
manipulate bearers/links prior to entering networked mode

I've tested this for a few hours by running the sanity tests and stress test
with the tipcutils suite, and nothing has fallen over.  There have been a few
lockdep warnings, but those were there before, and can be addressed later, as
they didn't actually result in any deadlock.

Signed-off-by: Neil Horman <nhorman@tuxdriver.com>
CC: Allan Stephens <allan.stephens@windriver.com>
CC: David S. Miller <davem@davemloft.net>
CC: tipc-discussion@lists.sourceforge.net

 bearer.c |   37 ++++++-------------------------------
 bearer.h |    2 +-
 net.c    |   25 ++++---------------------
 3 files changed, 11 insertions(+), 53 deletions(-)
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/tipc/bearer.c | 37 ++++++-------------------------------
 net/tipc/bearer.h |  2 +-
 net/tipc/net.c    | 25 ++++---------------------
 3 files changed, 11 insertions(+), 53 deletions(-)

(limited to 'net')

diff --git a/net/tipc/bearer.c b/net/tipc/bearer.c
index 327011fcc407..78091375ca12 100644
--- a/net/tipc/bearer.c
+++ b/net/tipc/bearer.c
@@ -45,10 +45,10 @@
 
 #define MAX_ADDR_STR 32
 
-static struct media *media_list = NULL;
+static struct media media_list[MAX_MEDIA];
 static u32 media_count = 0;
 
-struct bearer *tipc_bearers = NULL;
+struct bearer tipc_bearers[MAX_BEARERS];
 
 /**
  * media_name_valid - validate media name
@@ -108,9 +108,11 @@ int  tipc_register_media(u32 media_type,
 	int res = -EINVAL;
 
 	write_lock_bh(&tipc_net_lock);
-	if (!media_list)
-		goto exit;
 
+	if (tipc_mode != TIPC_NET_MODE) {
+		warn("Media <%s> rejected, not in networked mode yet\n", name);
+		goto exit;
+	}
 	if (!media_name_valid(name)) {
 		warn("Media <%s> rejected, illegal name\n", name);
 		goto exit;
@@ -660,33 +662,10 @@ int tipc_disable_bearer(const char *name)
 
 
-int tipc_bearer_init(void)
-{
-	int res;
-
-	write_lock_bh(&tipc_net_lock);
-	tipc_bearers = kcalloc(MAX_BEARERS, sizeof(struct bearer), GFP_ATOMIC);
-	media_list = kcalloc(MAX_MEDIA, sizeof(struct media), GFP_ATOMIC);
-	if (tipc_bearers && media_list) {
-		res = 0;
-	} else {
-		kfree(tipc_bearers);
-		kfree(media_list);
-		tipc_bearers = NULL;
-		media_list = NULL;
-		res = -ENOMEM;
-	}
-	write_unlock_bh(&tipc_net_lock);
-	return res;
-}
-
 void tipc_bearer_stop(void)
 {
 	u32 i;
 
-	if (!tipc_bearers)
-		return;
-
 	for (i = 0; i < MAX_BEARERS; i++) {
 		if (tipc_bearers[i].active)
 			tipc_bearers[i].publ.blocked = 1;
@@ -695,10 +674,6 @@ void tipc_bearer_stop(void)
 		if (tipc_bearers[i].active)
 			bearer_disable(tipc_bearers[i].publ.name);
 	}
-	kfree(tipc_bearers);
-	kfree(media_list);
-	tipc_bearers = NULL;
-	media_list = NULL;
 	media_count = 0;
 }
 
diff --git a/net/tipc/bearer.h b/net/tipc/bearer.h
index ca5734892713..000228e93f9e 100644
--- a/net/tipc/bearer.h
+++ b/net/tipc/bearer.h
@@ -114,7 +114,7 @@ struct bearer_name {
 
 struct link;
 
-extern struct bearer *tipc_bearers;
+extern struct bearer tipc_bearers[];
 
 void tipc_media_addr_printf(struct print_buf *pb, struct tipc_media_addr *a);
 struct sk_buff *tipc_media_get_names(void);
diff --git a/net/tipc/net.c b/net/tipc/net.c
index 7906608bf510..f25b1cdb64eb 100644
--- a/net/tipc/net.c
+++ b/net/tipc/net.c
@@ -116,7 +116,8 @@
 */
 
 DEFINE_RWLOCK(tipc_net_lock);
-struct network tipc_net = { NULL };
+struct _zone *tipc_zones[256] = { NULL, };
+struct network tipc_net = { tipc_zones };
 
 struct tipc_node *tipc_net_select_remote_node(u32 addr, u32 ref)
 {
@@ -158,28 +159,12 @@ void tipc_net_send_external_routes(u32 dest)
 	}
 }
 
-static int net_init(void)
-{
-	memset(&tipc_net, 0, sizeof(tipc_net));
-	tipc_net.zones = kcalloc(tipc_max_zones + 1, sizeof(struct _zone *), GFP_ATOMIC);
-	if (!tipc_net.zones) {
-		return -ENOMEM;
-	}
-	return 0;
-}
-
 static void net_stop(void)
 {
 	u32 z_num;
 
-	if (!tipc_net.zones)
-		return;
-
-	for (z_num = 1; z_num <= tipc_max_zones; z_num++) {
+	for (z_num = 1; z_num <= tipc_max_zones; z_num++)
 		tipc_zone_delete(tipc_net.zones[z_num]);
-	}
-	kfree(tipc_net.zones);
-	tipc_net.zones = NULL;
 }
 
 static void net_route_named_msg(struct sk_buff *buf)
@@ -282,9 +267,7 @@ int tipc_net_start(u32 addr)
 	tipc_named_reinit();
 	tipc_port_reinit();
 
-	if ((res = tipc_bearer_init()) ||
-	    (res = net_init()) ||
-	    (res = tipc_cltr_init()) ||
+	if ((res = tipc_cltr_init()) ||
 	    (res = tipc_bclink_init())) {
 		return res;
 	}
-- 
cgit v1.2.2


From f75580c4afb72c156746b3fc1ec977b1a85d3dee Mon Sep 17 00:00:00 2001
From: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>
Date: Mon, 15 Feb 2010 17:27:00 +0000
Subject: net/9p: Add multi channel support.

This is needed for supporting multiple mount points.

We can find out the device names to be used with mount by checking

/sys/devices/virtio-pci/virtio*/device file

if the device file have value 9 then the specific virtio device can
be used for mounting.

ex:
 #cat /sys/devices/virtio-pci/virtio1/device
 9

now we can mount using
# mount -t 9p -o trans=virtio virtio1  /mnt/

Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
Signed-off-by: Eric Van Hensbergen <ericvh@gmail.com>
---
 net/9p/trans_virtio.c | 14 ++++++++------
 1 file changed, 8 insertions(+), 6 deletions(-)

(limited to 'net')

diff --git a/net/9p/trans_virtio.c b/net/9p/trans_virtio.c
index cb50f4ae5eef..df924e5657d3 100644
--- a/net/9p/trans_virtio.c
+++ b/net/9p/trans_virtio.c
@@ -296,13 +296,15 @@ p9_virtio_create(struct p9_client *client, const char *devname, char *args)
 
 	mutex_lock(&virtio_9p_lock);
 	while (index < MAX_9P_CHAN) {
-		if (chan->initialized && !chan->inuse) {
-			chan->inuse = true;
-			break;
-		} else {
-			index++;
-			chan = &channels[index];
+		if (chan->initialized &&
+			!strcmp(devname, dev_name(&chan->vdev->dev))) {
+			if (!chan->inuse) {
+				chan->inuse = true;
+				break;
+			}
 		}
+		index++;
+		chan = &channels[index];
 	}
 	mutex_unlock(&virtio_9p_lock);
 
-- 
cgit v1.2.2


From 37c1209d413242d9560e343c040777049a8dd869 Mon Sep 17 00:00:00 2001
From: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>
Date: Mon, 15 Feb 2010 17:27:01 +0000
Subject: net/9p: Remove MAX_9P_CHAN limit

Use a list to track the channel instead of statically
allocated array

Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
Signed-off-by: Eric Van Hensbergen <ericvh@gmail.com>
---
 net/9p/trans_virtio.c | 70 ++++++++++++++++++++-------------------------------
 1 file changed, 27 insertions(+), 43 deletions(-)

(limited to 'net')

diff --git a/net/9p/trans_virtio.c b/net/9p/trans_virtio.c
index df924e5657d3..05918d3cb40d 100644
--- a/net/9p/trans_virtio.c
+++ b/net/9p/trans_virtio.c
@@ -49,8 +49,6 @@
 
 /* a single mutex to manage channel initialization and attachment */
 static DEFINE_MUTEX(virtio_9p_lock);
-/* global which tracks highest initialized channel */
-static int chan_index;
 
 /**
  * struct virtio_chan - per-instance transport information
@@ -68,8 +66,7 @@ static int chan_index;
  *
  */
 
-static struct virtio_chan {
-	bool initialized;
+struct virtio_chan {
 	bool inuse;
 
 	spinlock_t lock;
@@ -80,7 +77,11 @@ static struct virtio_chan {
 
 	/* Scatterlist: can be too big for stack. */
 	struct scatterlist sg[VIRTQUEUE_NUM];
-} channels[MAX_9P_CHAN];
+
+	struct list_head chan_list;
+};
+
+static struct list_head virtio_chan_list;
 
 /* How many bytes left in this page. */
 static unsigned int rest_of_page(void *data)
@@ -217,9 +218,7 @@ p9_virtio_request(struct p9_client *client, struct p9_req_t *req)
  * p9_virtio_probe - probe for existence of 9P virtio channels
  * @vdev: virtio device to probe
  *
- * This probes for existing virtio channels.  At present only
- * a single channel is in use, so in the future more work may need
- * to be done here.
+ * This probes for existing virtio channels.
  *
  */
 
@@ -227,16 +226,10 @@ static int p9_virtio_probe(struct virtio_device *vdev)
 {
 	int err;
 	struct virtio_chan *chan;
-	int index;
 
-	mutex_lock(&virtio_9p_lock);
-	index = chan_index++;
-	chan = &channels[index];
-	mutex_unlock(&virtio_9p_lock);
-
-	if (chan_index > MAX_9P_CHAN) {
-		printk(KERN_ERR "9p: virtio: Maximum channels exceeded\n");
-		BUG();
+	chan = kmalloc(sizeof(struct virtio_chan), GFP_KERNEL);
+	if (!chan) {
+		printk(KERN_ERR "9p: Failed to allocate virtio 9P channel\n");
 		err = -ENOMEM;
 		goto fail;
 	}
@@ -255,15 +248,15 @@ static int p9_virtio_probe(struct virtio_device *vdev)
 	sg_init_table(chan->sg, VIRTQUEUE_NUM);
 
 	chan->inuse = false;
-	chan->initialized = true;
+	mutex_lock(&virtio_9p_lock);
+	list_add_tail(&chan->chan_list, &virtio_chan_list);
+	mutex_unlock(&virtio_9p_lock);
 	return 0;
 
 out_free_vq:
 	vdev->config->del_vqs(vdev);
+	kfree(chan);
 fail:
-	mutex_lock(&virtio_9p_lock);
-	chan_index--;
-	mutex_unlock(&virtio_9p_lock);
 	return err;
 }
 
@@ -280,35 +273,27 @@ fail:
  * We use a simple reference count mechanism to ensure that only a single
  * mount has a channel open at a time.
  *
- * Bugs: doesn't allow identification of a specific channel
- * to allocate, channels are allocated sequentially. This was
- * a pragmatic decision to get things rolling, but ideally some
- * way of identifying the channel to attach to would be nice
- * if we are going to support multiple channels.
- *
  */
 
 static int
 p9_virtio_create(struct p9_client *client, const char *devname, char *args)
 {
-	struct virtio_chan *chan = channels;
-	int index = 0;
+	struct virtio_chan *chan;
+	int found = 0;
 
 	mutex_lock(&virtio_9p_lock);
-	while (index < MAX_9P_CHAN) {
-		if (chan->initialized &&
-			!strcmp(devname, dev_name(&chan->vdev->dev))) {
+	list_for_each_entry(chan, &virtio_chan_list, chan_list) {
+		if (!strcmp(devname, dev_name(&chan->vdev->dev))) {
 			if (!chan->inuse) {
 				chan->inuse = true;
+				found = 1;
 				break;
 			}
 		}
-		index++;
-		chan = &channels[index];
 	}
 	mutex_unlock(&virtio_9p_lock);
 
-	if (index >= MAX_9P_CHAN) {
+	if (!found) {
 		printk(KERN_ERR "9p: no channels available\n");
 		return -ENODEV;
 	}
@@ -331,11 +316,13 @@ static void p9_virtio_remove(struct virtio_device *vdev)
 	struct virtio_chan *chan = vdev->priv;
 
 	BUG_ON(chan->inuse);
+	vdev->config->del_vqs(vdev);
+
+	mutex_lock(&virtio_9p_lock);
+	list_del(&chan->chan_list);
+	mutex_unlock(&virtio_9p_lock);
+	kfree(chan);
 
-	if (chan->initialized) {
-		vdev->config->del_vqs(vdev);
-		chan->initialized = false;
-	}
 }
 
 static struct virtio_device_id id_table[] = {
@@ -366,10 +353,7 @@ static struct p9_trans_module p9_virtio_trans = {
 /* The standard init function */
 static int __init p9_virtio_init(void)
 {
-	int count;
-
-	for (count = 0; count < MAX_9P_CHAN; count++)
-		channels[count].initialized = false;
+	INIT_LIST_HEAD(&virtio_chan_list);
 
 	v9fs_register_trans(&p9_virtio_trans);
 	return register_virtio_driver(&p9_virtio_drv);
-- 
cgit v1.2.2


From c1a7c2262035e83a8f70ebe1328b3451702cf51b Mon Sep 17 00:00:00 2001
From: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>
Date: Mon, 15 Feb 2010 17:27:02 +0000
Subject: net/9p: Handle mount errors correctly.

With this patch we have

# mount -t 9p -o trans=virtio virtio2 /mnt/
# mount -t 9p -o trans=virtio virtio2 /mnt/
mount: virtio2 already mounted or /mnt/ busy
mount: according to mtab, virtio2 is already mounted on /mnt
# mount -t 9p -o trans=virtio virtio3 /mnt/ -o debug=0xfff
mount: special device virtio3 does not exist

Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
Signed-off-by: Eric Van Hensbergen <ericvh@gmail.com>
---
 net/9p/trans_virtio.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/9p/trans_virtio.c b/net/9p/trans_virtio.c
index 05918d3cb40d..0aaed4819379 100644
--- a/net/9p/trans_virtio.c
+++ b/net/9p/trans_virtio.c
@@ -279,6 +279,7 @@ static int
 p9_virtio_create(struct p9_client *client, const char *devname, char *args)
 {
 	struct virtio_chan *chan;
+	int ret = -ENOENT;
 	int found = 0;
 
 	mutex_lock(&virtio_9p_lock);
@@ -289,13 +290,14 @@ p9_virtio_create(struct p9_client *client, const char *devname, char *args)
 				found = 1;
 				break;
 			}
+			ret = -EBUSY;
 		}
 	}
 	mutex_unlock(&virtio_9p_lock);
 
 	if (!found) {
 		printk(KERN_ERR "9p: no channels available\n");
-		return -ENODEV;
+		return ret;
 	}
 
 	client->trans = (void *)chan;
-- 
cgit v1.2.2


From 0fb80abd911a7cb1e6548b5279568dc1e8949702 Mon Sep 17 00:00:00 2001
From: Sripathi Kodi <sripathik@in.ibm.com>
Date: Fri, 5 Mar 2010 18:49:11 +0000
Subject: 9P2010.L handshake: Add mount option

Add new mount V9FS mount option to specify protocol version

This patch adds a new mount option to specify protocol version.
With this option it is possible to use "-o version=" switch to
specify 9P protocol version to use. Valid options for version
are:
9p2000
9p2000.u
9p2010.L

Signed-off-by: Sripathi Kodi <sripathik@in.ibm.com>
Signed-off-by: Eric Van Hensbergen <ericvh@gmail.com>
---
 net/9p/client.c | 28 ++++++++++++++++++++++++++++
 1 file changed, 28 insertions(+)

(limited to 'net')

diff --git a/net/9p/client.c b/net/9p/client.c
index 09d4f1e2e4a8..3b5f3c94a6eb 100644
--- a/net/9p/client.c
+++ b/net/9p/client.c
@@ -46,6 +46,7 @@ enum {
 	Opt_msize,
 	Opt_trans,
 	Opt_legacy,
+	Opt_version,
 	Opt_err,
 };
 
@@ -53,9 +54,30 @@ static const match_table_t tokens = {
 	{Opt_msize, "msize=%u"},
 	{Opt_legacy, "noextend"},
 	{Opt_trans, "trans=%s"},
+	{Opt_version, "version=%s"},
 	{Opt_err, NULL},
 };
 
+/* Interpret mount option for protocol version */
+static unsigned char get_protocol_version(const substring_t *name)
+{
+	unsigned char version = -EINVAL;
+	if (!strncmp("9p2000", name->from, name->to-name->from)) {
+		version = p9_proto_legacy;
+		P9_DPRINTK(P9_DEBUG_9P, "Protocol version: Legacy\n");
+	} else if (!strncmp("9p2000.u", name->from, name->to-name->from)) {
+		version = p9_proto_2000u;
+		P9_DPRINTK(P9_DEBUG_9P, "Protocol version: 9P2000.u\n");
+	} else if (!strncmp("9p2010.L", name->from, name->to-name->from)) {
+		version = p9_proto_2010L;
+		P9_DPRINTK(P9_DEBUG_9P, "Protocol version: 9P2010.L\n");
+	} else {
+		P9_DPRINTK(P9_DEBUG_ERROR, "Unknown protocol version %s. ",
+							name->from);
+	}
+	return version;
+}
+
 static struct p9_req_t *
 p9_client_rpc(struct p9_client *c, int8_t type, const char *fmt, ...);
 
@@ -120,6 +142,12 @@ static int parse_opts(char *opts, struct p9_client *clnt)
 		case Opt_legacy:
 			clnt->dotu = 0;
 			break;
+		case Opt_version:
+			ret = get_protocol_version(&args[0]);
+			if (ret == -EINVAL)
+				goto free_and_return;
+			clnt->proto_version = ret;
+			break;
 		default:
 			continue;
 		}
-- 
cgit v1.2.2


From 342fee1d5c7dfa05f4e14ec1e583df4553b09776 Mon Sep 17 00:00:00 2001
From: Sripathi Kodi <sripathik@in.ibm.com>
Date: Fri, 5 Mar 2010 18:50:14 +0000
Subject: 9P2010.L handshake: Remove "dotu" variable

Removes 'dotu' variable and make everything dependent
on 'proto_version' field.

Signed-off-by: Sripathi Kodi <sripathik@in.ibm.com>
Signed-off-by: Eric Van Hensbergen <ericvh@gmail.com>
---
 net/9p/client.c   | 65 +++++++++++++++++++++++++++++-------------------
 net/9p/protocol.c | 74 ++++++++++++++++++++++++++++++-------------------------
 net/9p/protocol.h |  6 ++---
 3 files changed, 83 insertions(+), 62 deletions(-)

(limited to 'net')

diff --git a/net/9p/client.c b/net/9p/client.c
index 3b5f3c94a6eb..9994676e57da 100644
--- a/net/9p/client.c
+++ b/net/9p/client.c
@@ -58,6 +58,18 @@ static const match_table_t tokens = {
 	{Opt_err, NULL},
 };
 
+inline int p9_is_proto_dotl(struct p9_client *clnt)
+{
+	return (clnt->proto_version == p9_proto_2010L);
+}
+EXPORT_SYMBOL(p9_is_proto_dotl);
+
+inline int p9_is_proto_dotu(struct p9_client *clnt)
+{
+	return (clnt->proto_version == p9_proto_2000u);
+}
+EXPORT_SYMBOL(p9_is_proto_dotu);
+
 /* Interpret mount option for protocol version */
 static unsigned char get_protocol_version(const substring_t *name)
 {
@@ -97,7 +109,7 @@ static int parse_opts(char *opts, struct p9_client *clnt)
 	int option;
 	int ret = 0;
 
-	clnt->dotu = 1;
+	clnt->proto_version = p9_proto_2000u;
 	clnt->msize = 8192;
 
 	if (!opts)
@@ -140,7 +152,7 @@ static int parse_opts(char *opts, struct p9_client *clnt)
 			}
 			break;
 		case Opt_legacy:
-			clnt->dotu = 0;
+			clnt->proto_version = p9_proto_legacy;
 			break;
 		case Opt_version:
 			ret = get_protocol_version(&args[0]);
@@ -438,14 +450,15 @@ static int p9_check_errors(struct p9_client *c, struct p9_req_t *req)
 		int ecode;
 		char *ename;
 
-		err = p9pdu_readf(req->rc, c->dotu, "s?d", &ename, &ecode);
+		err = p9pdu_readf(req->rc, c->proto_version, "s?d",
+							&ename, &ecode);
 		if (err) {
 			P9_DPRINTK(P9_DEBUG_ERROR, "couldn't parse error%d\n",
 									err);
 			return err;
 		}
 
-		if (c->dotu)
+		if (p9_is_proto_dotu(c))
 			err = -ecode;
 
 		if (!err || !IS_ERR_VALUE(err))
@@ -543,7 +556,7 @@ p9_client_rpc(struct p9_client *c, int8_t type, const char *fmt, ...)
 	/* marshall the data */
 	p9pdu_prepare(req->tc, tag, type);
 	va_start(ap, fmt);
-	err = p9pdu_vwritef(req->tc, c->dotu, fmt, ap);
+	err = p9pdu_vwritef(req->tc, c->proto_version, fmt, ap);
 	va_end(ap);
 	p9pdu_finalize(req->tc);
 
@@ -655,14 +668,14 @@ int p9_client_version(struct p9_client *c)
 	char *version;
 	int msize;
 
-	P9_DPRINTK(P9_DEBUG_9P, ">>> TVERSION msize %d extended %d\n",
-							c->msize, c->dotu);
+	P9_DPRINTK(P9_DEBUG_9P, ">>> TVERSION msize %d protocol %d\n",
+						c->msize, c->proto_version);
 	req = p9_client_rpc(c, P9_TVERSION, "ds", c->msize,
-				c->dotu ? "9P2000.u" : "9P2000");
+				p9_is_proto_dotu(c) ? "9P2000.u" : "9P2000");
 	if (IS_ERR(req))
 		return PTR_ERR(req);
 
-	err = p9pdu_readf(req->rc, c->dotu, "ds", &msize, &version);
+	err = p9pdu_readf(req->rc, c->proto_version, "ds", &msize, &version);
 	if (err) {
 		P9_DPRINTK(P9_DEBUG_9P, "version error %d\n", err);
 		p9pdu_dump(1, req->rc);
@@ -670,10 +683,10 @@ int p9_client_version(struct p9_client *c)
 	}
 
 	P9_DPRINTK(P9_DEBUG_9P, "<<< RVERSION msize %d %s\n", msize, version);
-	if (!memcmp(version, "9P2000.u", 8))
-		c->dotu = 1;
-	else if (!memcmp(version, "9P2000", 6))
-		c->dotu = 0;
+	if (!strncmp(version, "9P2000.u", 8))
+		c->proto_version = p9_proto_2000u;
+	else if (!strncmp(version, "9P2000", 6))
+		c->proto_version = p9_proto_legacy;
 	else {
 		err = -EREMOTEIO;
 		goto error;
@@ -728,8 +741,8 @@ struct p9_client *p9_client_create(const char *dev_name, char *options)
 		goto put_trans;
 	}
 
-	P9_DPRINTK(P9_DEBUG_MUX, "clnt %p trans %p msize %d dotu %d\n",
-		clnt, clnt->trans_mod, clnt->msize, clnt->dotu);
+	P9_DPRINTK(P9_DEBUG_MUX, "clnt %p trans %p msize %d protocol %d\n",
+		clnt, clnt->trans_mod, clnt->msize, clnt->proto_version);
 
 	err = clnt->trans_mod->create(clnt, dev_name, options);
 	if (err)
@@ -812,7 +825,7 @@ struct p9_fid *p9_client_attach(struct p9_client *clnt, struct p9_fid *afid,
 		goto error;
 	}
 
-	err = p9pdu_readf(req->rc, clnt->dotu, "Q", &qid);
+	err = p9pdu_readf(req->rc, clnt->proto_version, "Q", &qid);
 	if (err) {
 		p9pdu_dump(1, req->rc);
 		p9_free_req(clnt, req);
@@ -861,7 +874,7 @@ p9_client_auth(struct p9_client *clnt, char *uname, u32 n_uname, char *aname)
 		goto error;
 	}
 
-	err = p9pdu_readf(req->rc, clnt->dotu, "Q", &qid);
+	err = p9pdu_readf(req->rc, clnt->proto_version, "Q", &qid);
 	if (err) {
 		p9pdu_dump(1, req->rc);
 		p9_free_req(clnt, req);
@@ -919,7 +932,7 @@ struct p9_fid *p9_client_walk(struct p9_fid *oldfid, int nwname, char **wnames,
 		goto error;
 	}
 
-	err = p9pdu_readf(req->rc, clnt->dotu, "R", &nwqids, &wqids);
+	err = p9pdu_readf(req->rc, clnt->proto_version, "R", &nwqids, &wqids);
 	if (err) {
 		p9pdu_dump(1, req->rc);
 		p9_free_req(clnt, req);
@@ -980,7 +993,7 @@ int p9_client_open(struct p9_fid *fid, int mode)
 		goto error;
 	}
 
-	err = p9pdu_readf(req->rc, clnt->dotu, "Qd", &qid, &iounit);
+	err = p9pdu_readf(req->rc, clnt->proto_version, "Qd", &qid, &iounit);
 	if (err) {
 		p9pdu_dump(1, req->rc);
 		goto free_and_error;
@@ -1025,7 +1038,7 @@ int p9_client_fcreate(struct p9_fid *fid, char *name, u32 perm, int mode,
 		goto error;
 	}
 
-	err = p9pdu_readf(req->rc, clnt->dotu, "Qd", &qid, &iounit);
+	err = p9pdu_readf(req->rc, clnt->proto_version, "Qd", &qid, &iounit);
 	if (err) {
 		p9pdu_dump(1, req->rc);
 		goto free_and_error;
@@ -1126,7 +1139,7 @@ p9_client_read(struct p9_fid *fid, char *data, char __user *udata, u64 offset,
 		goto error;
 	}
 
-	err = p9pdu_readf(req->rc, clnt->dotu, "D", &count, &dataptr);
+	err = p9pdu_readf(req->rc, clnt->proto_version, "D", &count, &dataptr);
 	if (err) {
 		p9pdu_dump(1, req->rc);
 		goto free_and_error;
@@ -1187,7 +1200,7 @@ p9_client_write(struct p9_fid *fid, char *data, const char __user *udata,
 		goto error;
 	}
 
-	err = p9pdu_readf(req->rc, clnt->dotu, "d", &count);
+	err = p9pdu_readf(req->rc, clnt->proto_version, "d", &count);
 	if (err) {
 		p9pdu_dump(1, req->rc);
 		goto free_and_error;
@@ -1227,7 +1240,7 @@ struct p9_wstat *p9_client_stat(struct p9_fid *fid)
 		goto error;
 	}
 
-	err = p9pdu_readf(req->rc, clnt->dotu, "wS", &ignored, ret);
+	err = p9pdu_readf(req->rc, clnt->proto_version, "wS", &ignored, ret);
 	if (err) {
 		p9pdu_dump(1, req->rc);
 		p9_free_req(clnt, req);
@@ -1254,7 +1267,7 @@ error:
 }
 EXPORT_SYMBOL(p9_client_stat);
 
-static int p9_client_statsize(struct p9_wstat *wst, int optional)
+static int p9_client_statsize(struct p9_wstat *wst, int proto_version)
 {
 	int ret;
 
@@ -1273,7 +1286,7 @@ static int p9_client_statsize(struct p9_wstat *wst, int optional)
 	if (wst->muid)
 		ret += strlen(wst->muid);
 
-	if (optional) {
+	if (proto_version == p9_proto_2000u) {
 		ret += 2+4+4+4;	/* extension[s] n_uid[4] n_gid[4] n_muid[4] */
 		if (wst->extension)
 			ret += strlen(wst->extension);
@@ -1290,7 +1303,7 @@ int p9_client_wstat(struct p9_fid *fid, struct p9_wstat *wst)
 
 	err = 0;
 	clnt = fid->clnt;
-	wst->size = p9_client_statsize(wst, clnt->dotu);
+	wst->size = p9_client_statsize(wst, clnt->proto_version);
 	P9_DPRINTK(P9_DEBUG_9P, ">>> TWSTAT fid %d\n", fid->fid);
 	P9_DPRINTK(P9_DEBUG_9P,
 		"     sz=%x type=%x dev=%x qid=%x.%llx.%x\n"
diff --git a/net/9p/protocol.c b/net/9p/protocol.c
index fc70147c771e..94f5a8f65e9c 100644
--- a/net/9p/protocol.c
+++ b/net/9p/protocol.c
@@ -52,7 +52,7 @@
 #endif
 
 static int
-p9pdu_writef(struct p9_fcall *pdu, int optional, const char *fmt, ...);
+p9pdu_writef(struct p9_fcall *pdu, int proto_version, const char *fmt, ...);
 
 #ifdef CONFIG_NET_9P_DEBUG
 void
@@ -144,7 +144,8 @@ pdu_write_u(struct p9_fcall *pdu, const char __user *udata, size_t size)
 */
 
 static int
-p9pdu_vreadf(struct p9_fcall *pdu, int optional, const char *fmt, va_list ap)
+p9pdu_vreadf(struct p9_fcall *pdu, int proto_version, const char *fmt,
+	va_list ap)
 {
 	const char *ptr;
 	int errcode = 0;
@@ -194,7 +195,8 @@ p9pdu_vreadf(struct p9_fcall *pdu, int optional, const char *fmt, va_list ap)
 				int16_t len;
 				int size;
 
-				errcode = p9pdu_readf(pdu, optional, "w", &len);
+				errcode = p9pdu_readf(pdu, proto_version,
+								"w", &len);
 				if (errcode)
 					break;
 
@@ -217,7 +219,7 @@ p9pdu_vreadf(struct p9_fcall *pdu, int optional, const char *fmt, va_list ap)
 				struct p9_qid *qid =
 				    va_arg(ap, struct p9_qid *);
 
-				errcode = p9pdu_readf(pdu, optional, "bdq",
+				errcode = p9pdu_readf(pdu, proto_version, "bdq",
 						      &qid->type, &qid->version,
 						      &qid->path);
 			}
@@ -230,7 +232,7 @@ p9pdu_vreadf(struct p9_fcall *pdu, int optional, const char *fmt, va_list ap)
 				stbuf->n_uid = stbuf->n_gid = stbuf->n_muid =
 									-1;
 				errcode =
-				    p9pdu_readf(pdu, optional,
+				    p9pdu_readf(pdu, proto_version,
 						"wwdQdddqssss?sddd",
 						&stbuf->size, &stbuf->type,
 						&stbuf->dev, &stbuf->qid,
@@ -250,7 +252,7 @@ p9pdu_vreadf(struct p9_fcall *pdu, int optional, const char *fmt, va_list ap)
 				void **data = va_arg(ap, void **);
 
 				errcode =
-				    p9pdu_readf(pdu, optional, "d", count);
+				    p9pdu_readf(pdu, proto_version, "d", count);
 				if (!errcode) {
 					*count =
 					    MIN(*count,
@@ -263,8 +265,8 @@ p9pdu_vreadf(struct p9_fcall *pdu, int optional, const char *fmt, va_list ap)
 				int16_t *nwname = va_arg(ap, int16_t *);
 				char ***wnames = va_arg(ap, char ***);
 
-				errcode =
-				    p9pdu_readf(pdu, optional, "w", nwname);
+				errcode = p9pdu_readf(pdu, proto_version,
+								"w", nwname);
 				if (!errcode) {
 					*wnames =
 					    kmalloc(sizeof(char *) * *nwname,
@@ -278,7 +280,8 @@ p9pdu_vreadf(struct p9_fcall *pdu, int optional, const char *fmt, va_list ap)
 
 					for (i = 0; i < *nwname; i++) {
 						errcode =
-						    p9pdu_readf(pdu, optional,
+						    p9pdu_readf(pdu,
+								proto_version,
 								"s",
 								&(*wnames)[i]);
 						if (errcode)
@@ -306,7 +309,7 @@ p9pdu_vreadf(struct p9_fcall *pdu, int optional, const char *fmt, va_list ap)
 				*wqids = NULL;
 
 				errcode =
-				    p9pdu_readf(pdu, optional, "w", nwqid);
+				    p9pdu_readf(pdu, proto_version, "w", nwqid);
 				if (!errcode) {
 					*wqids =
 					    kmalloc(*nwqid *
@@ -321,7 +324,8 @@ p9pdu_vreadf(struct p9_fcall *pdu, int optional, const char *fmt, va_list ap)
 
 					for (i = 0; i < *nwqid; i++) {
 						errcode =
-						    p9pdu_readf(pdu, optional,
+						    p9pdu_readf(pdu,
+								proto_version,
 								"Q",
 								&(*wqids)[i]);
 						if (errcode)
@@ -336,7 +340,7 @@ p9pdu_vreadf(struct p9_fcall *pdu, int optional, const char *fmt, va_list ap)
 			}
 			break;
 		case '?':
-			if (!optional)
+			if (proto_version != p9_proto_2000u)
 				return 0;
 			break;
 		default:
@@ -352,7 +356,8 @@ p9pdu_vreadf(struct p9_fcall *pdu, int optional, const char *fmt, va_list ap)
 }
 
 int
-p9pdu_vwritef(struct p9_fcall *pdu, int optional, const char *fmt, va_list ap)
+p9pdu_vwritef(struct p9_fcall *pdu, int proto_version, const char *fmt,
+	va_list ap)
 {
 	const char *ptr;
 	int errcode = 0;
@@ -389,7 +394,8 @@ p9pdu_vwritef(struct p9_fcall *pdu, int optional, const char *fmt, va_list ap)
 				if (sptr)
 					len = MIN(strlen(sptr), USHORT_MAX);
 
-				errcode = p9pdu_writef(pdu, optional, "w", len);
+				errcode = p9pdu_writef(pdu, proto_version,
+								"w", len);
 				if (!errcode && pdu_write(pdu, sptr, len))
 					errcode = -EFAULT;
 			}
@@ -398,7 +404,7 @@ p9pdu_vwritef(struct p9_fcall *pdu, int optional, const char *fmt, va_list ap)
 				const struct p9_qid *qid =
 				    va_arg(ap, const struct p9_qid *);
 				errcode =
-				    p9pdu_writef(pdu, optional, "bdq",
+				    p9pdu_writef(pdu, proto_version, "bdq",
 						 qid->type, qid->version,
 						 qid->path);
 			} break;
@@ -406,7 +412,7 @@ p9pdu_vwritef(struct p9_fcall *pdu, int optional, const char *fmt, va_list ap)
 				const struct p9_wstat *stbuf =
 				    va_arg(ap, const struct p9_wstat *);
 				errcode =
-				    p9pdu_writef(pdu, optional,
+				    p9pdu_writef(pdu, proto_version,
 						 "wwdQdddqssss?sddd",
 						 stbuf->size, stbuf->type,
 						 stbuf->dev, &stbuf->qid,
@@ -421,8 +427,8 @@ p9pdu_vwritef(struct p9_fcall *pdu, int optional, const char *fmt, va_list ap)
 				int32_t count = va_arg(ap, int32_t);
 				const void *data = va_arg(ap, const void *);
 
-				errcode =
-				    p9pdu_writef(pdu, optional, "d", count);
+				errcode = p9pdu_writef(pdu, proto_version, "d",
+									count);
 				if (!errcode && pdu_write(pdu, data, count))
 					errcode = -EFAULT;
 			}
@@ -431,8 +437,8 @@ p9pdu_vwritef(struct p9_fcall *pdu, int optional, const char *fmt, va_list ap)
 				int32_t count = va_arg(ap, int32_t);
 				const char __user *udata =
 						va_arg(ap, const void __user *);
-				errcode =
-				    p9pdu_writef(pdu, optional, "d", count);
+				errcode = p9pdu_writef(pdu, proto_version, "d",
+									count);
 				if (!errcode && pdu_write_u(pdu, udata, count))
 					errcode = -EFAULT;
 			}
@@ -441,14 +447,15 @@ p9pdu_vwritef(struct p9_fcall *pdu, int optional, const char *fmt, va_list ap)
 				int16_t nwname = va_arg(ap, int);
 				const char **wnames = va_arg(ap, const char **);
 
-				errcode =
-				    p9pdu_writef(pdu, optional, "w", nwname);
+				errcode = p9pdu_writef(pdu, proto_version, "w",
+									nwname);
 				if (!errcode) {
 					int i;
 
 					for (i = 0; i < nwname; i++) {
 						errcode =
-						    p9pdu_writef(pdu, optional,
+						    p9pdu_writef(pdu,
+								proto_version,
 								 "s",
 								 wnames[i]);
 						if (errcode)
@@ -462,14 +469,15 @@ p9pdu_vwritef(struct p9_fcall *pdu, int optional, const char *fmt, va_list ap)
 				struct p9_qid *wqids =
 				    va_arg(ap, struct p9_qid *);
 
-				errcode =
-				    p9pdu_writef(pdu, optional, "w", nwqid);
+				errcode = p9pdu_writef(pdu, proto_version, "w",
+									nwqid);
 				if (!errcode) {
 					int i;
 
 					for (i = 0; i < nwqid; i++) {
 						errcode =
-						    p9pdu_writef(pdu, optional,
+						    p9pdu_writef(pdu,
+								proto_version,
 								 "Q",
 								 &wqids[i]);
 						if (errcode)
@@ -479,7 +487,7 @@ p9pdu_vwritef(struct p9_fcall *pdu, int optional, const char *fmt, va_list ap)
 			}
 			break;
 		case '?':
-			if (!optional)
+			if (proto_version != p9_proto_2000u)
 				return 0;
 			break;
 		default:
@@ -494,32 +502,32 @@ p9pdu_vwritef(struct p9_fcall *pdu, int optional, const char *fmt, va_list ap)
 	return errcode;
 }
 
-int p9pdu_readf(struct p9_fcall *pdu, int optional, const char *fmt, ...)
+int p9pdu_readf(struct p9_fcall *pdu, int proto_version, const char *fmt, ...)
 {
 	va_list ap;
 	int ret;
 
 	va_start(ap, fmt);
-	ret = p9pdu_vreadf(pdu, optional, fmt, ap);
+	ret = p9pdu_vreadf(pdu, proto_version, fmt, ap);
 	va_end(ap);
 
 	return ret;
 }
 
 static int
-p9pdu_writef(struct p9_fcall *pdu, int optional, const char *fmt, ...)
+p9pdu_writef(struct p9_fcall *pdu, int proto_version, const char *fmt, ...)
 {
 	va_list ap;
 	int ret;
 
 	va_start(ap, fmt);
-	ret = p9pdu_vwritef(pdu, optional, fmt, ap);
+	ret = p9pdu_vwritef(pdu, proto_version, fmt, ap);
 	va_end(ap);
 
 	return ret;
 }
 
-int p9stat_read(char *buf, int len, struct p9_wstat *st, int dotu)
+int p9stat_read(char *buf, int len, struct p9_wstat *st, int proto_version)
 {
 	struct p9_fcall fake_pdu;
 	int ret;
@@ -529,7 +537,7 @@ int p9stat_read(char *buf, int len, struct p9_wstat *st, int dotu)
 	fake_pdu.sdata = buf;
 	fake_pdu.offset = 0;
 
-	ret = p9pdu_readf(&fake_pdu, dotu, "S", st);
+	ret = p9pdu_readf(&fake_pdu, proto_version, "S", st);
 	if (ret) {
 		P9_DPRINTK(P9_DEBUG_9P, "<<< p9stat_read failed: %d\n", ret);
 		p9pdu_dump(1, &fake_pdu);
diff --git a/net/9p/protocol.h b/net/9p/protocol.h
index ccde462e7ac5..2431c0f38d56 100644
--- a/net/9p/protocol.h
+++ b/net/9p/protocol.h
@@ -25,9 +25,9 @@
  *
  */
 
-int
-p9pdu_vwritef(struct p9_fcall *pdu, int optional, const char *fmt, va_list ap);
-int p9pdu_readf(struct p9_fcall *pdu, int optional, const char *fmt, ...);
+int p9pdu_vwritef(struct p9_fcall *pdu, int proto_version, const char *fmt,
+								va_list ap);
+int p9pdu_readf(struct p9_fcall *pdu, int proto_version, const char *fmt, ...);
 int p9pdu_prepare(struct p9_fcall *pdu, int16_t tag, int8_t type);
 int p9pdu_finalize(struct p9_fcall *pdu);
 void p9pdu_dump(int, struct p9_fcall *);
-- 
cgit v1.2.2


From c5a7697da9775f7a0e122fa23180becc311772d1 Mon Sep 17 00:00:00 2001
From: Sripathi Kodi <sripathik@in.ibm.com>
Date: Fri, 5 Mar 2010 18:51:04 +0000
Subject: 9P2010.L handshake: .L protocol negotiation

This patch adds 9P2010.L protocol negotiation with the server

Signed-off-by: Sripathi Kodi <sripathik@in.ibm.com>
Signed-off-by: Eric Van Hensbergen <ericvh@gmail.com>
---
 net/9p/client.c | 25 ++++++++++++++++++++++---
 1 file changed, 22 insertions(+), 3 deletions(-)

(limited to 'net')

diff --git a/net/9p/client.c b/net/9p/client.c
index 9994676e57da..bde9f3d38c57 100644
--- a/net/9p/client.c
+++ b/net/9p/client.c
@@ -670,8 +670,25 @@ int p9_client_version(struct p9_client *c)
 
 	P9_DPRINTK(P9_DEBUG_9P, ">>> TVERSION msize %d protocol %d\n",
 						c->msize, c->proto_version);
-	req = p9_client_rpc(c, P9_TVERSION, "ds", c->msize,
-				p9_is_proto_dotu(c) ? "9P2000.u" : "9P2000");
+
+	switch (c->proto_version) {
+	case p9_proto_2010L:
+		req = p9_client_rpc(c, P9_TVERSION, "ds",
+					c->msize, "9P2010.L");
+		break;
+	case p9_proto_2000u:
+		req = p9_client_rpc(c, P9_TVERSION, "ds",
+					c->msize, "9P2000.u");
+		break;
+	case p9_proto_legacy:
+		req = p9_client_rpc(c, P9_TVERSION, "ds",
+					c->msize, "9P2000");
+		break;
+	default:
+		return -EINVAL;
+		break;
+	}
+
 	if (IS_ERR(req))
 		return PTR_ERR(req);
 
@@ -683,7 +700,9 @@ int p9_client_version(struct p9_client *c)
 	}
 
 	P9_DPRINTK(P9_DEBUG_9P, "<<< RVERSION msize %d %s\n", msize, version);
-	if (!strncmp(version, "9P2000.u", 8))
+	if (!strncmp(version, "9P2010.L", 8))
+		c->proto_version = p9_proto_2010L;
+	else if (!strncmp(version, "9P2000.u", 8))
 		c->proto_version = p9_proto_2000u;
 	else if (!strncmp(version, "9P2000", 6))
 		c->proto_version = p9_proto_legacy;
-- 
cgit v1.2.2


From 8eae939f1400326b06d0c9afe53d2a484a326871 Mon Sep 17 00:00:00 2001
From: Zhu Yi <yi.zhu@intel.com>
Date: Thu, 4 Mar 2010 18:01:40 +0000
Subject: net: add limit for socket backlog

We got system OOM while running some UDP netperf testing on the loopback
device. The case is multiple senders sent stream UDP packets to a single
receiver via loopback on local host. Of course, the receiver is not able
to handle all the packets in time. But we surprisingly found that these
packets were not discarded due to the receiver's sk->sk_rcvbuf limit.
Instead, they are kept queuing to sk->sk_backlog and finally ate up all
the memory. We believe this is a secure hole that a none privileged user
can crash the system.

The root cause for this problem is, when the receiver is doing
__release_sock() (i.e. after userspace recv, kernel udp_recvmsg ->
skb_free_datagram_locked -> release_sock), it moves skbs from backlog to
sk_receive_queue with the softirq enabled. In the above case, multiple
busy senders will almost make it an endless loop. The skbs in the
backlog end up eat all the system memory.

The issue is not only for UDP. Any protocols using socket backlog is
potentially affected. The patch adds limit for socket backlog so that
the backlog size cannot be expanded endlessly.

Reported-by: Alex Shi <alex.shi@intel.com>
Cc: David Miller <davem@davemloft.net>
Cc: Arnaldo Carvalho de Melo <acme@ghostprotocols.net>
Cc: Alexey Kuznetsov <kuznet@ms2.inr.ac.ru
Cc: "Pekka Savola (ipv6)" <pekkas@netcore.fi>
Cc: Patrick McHardy <kaber@trash.net>
Cc: Vlad Yasevich <vladislav.yasevich@hp.com>
Cc: Sridhar Samudrala <sri@us.ibm.com>
Cc: Jon Maloy <jon.maloy@ericsson.com>
Cc: Allan Stephens <allan.stephens@windriver.com>
Cc: Andrew Hendry <andrew.hendry@gmail.com>
Signed-off-by: Zhu Yi <yi.zhu@intel.com>
Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Acked-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/sock.c | 16 ++++++++++++++--
 1 file changed, 14 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/core/sock.c b/net/core/sock.c
index fcd397a762ff..6e22dc973d23 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -340,8 +340,12 @@ int sk_receive_skb(struct sock *sk, struct sk_buff *skb, const int nested)
 		rc = sk_backlog_rcv(sk, skb);
 
 		mutex_release(&sk->sk_lock.dep_map, 1, _RET_IP_);
-	} else
-		sk_add_backlog(sk, skb);
+	} else if (sk_add_backlog_limited(sk, skb)) {
+		bh_unlock_sock(sk);
+		atomic_inc(&sk->sk_drops);
+		goto discard_and_relse;
+	}
+
 	bh_unlock_sock(sk);
 out:
 	sock_put(sk);
@@ -1139,6 +1143,7 @@ struct sock *sk_clone(const struct sock *sk, const gfp_t priority)
 		sock_lock_init(newsk);
 		bh_lock_sock(newsk);
 		newsk->sk_backlog.head	= newsk->sk_backlog.tail = NULL;
+		newsk->sk_backlog.len = 0;
 
 		atomic_set(&newsk->sk_rmem_alloc, 0);
 		/*
@@ -1542,6 +1547,12 @@ static void __release_sock(struct sock *sk)
 
 		bh_lock_sock(sk);
 	} while ((skb = sk->sk_backlog.head) != NULL);
+
+	/*
+	 * Doing the zeroing here guarantee we can not loop forever
+	 * while a wild producer attempts to flood us.
+	 */
+	sk->sk_backlog.len = 0;
 }
 
 /**
@@ -1874,6 +1885,7 @@ void sock_init_data(struct socket *sock, struct sock *sk)
 	sk->sk_allocation	=	GFP_KERNEL;
 	sk->sk_rcvbuf		=	sysctl_rmem_default;
 	sk->sk_sndbuf		=	sysctl_wmem_default;
+	sk->sk_backlog.limit	=	sk->sk_rcvbuf << 1;
 	sk->sk_state		=	TCP_CLOSE;
 	sk_set_socket(sk, sock);
 
-- 
cgit v1.2.2


From 6b03a53a5ab7ccf2d5d69f96cf1c739c4d2a8fb9 Mon Sep 17 00:00:00 2001
From: Zhu Yi <yi.zhu@intel.com>
Date: Thu, 4 Mar 2010 18:01:41 +0000
Subject: tcp: use limited socket backlog

Make tcp adapt to the limited socket backlog change.

Cc: "David S. Miller" <davem@davemloft.net>
Cc: Alexey Kuznetsov <kuznet@ms2.inr.ac.ru>
Cc: "Pekka Savola (ipv6)" <pekkas@netcore.fi>
Cc: Patrick McHardy <kaber@trash.net>
Signed-off-by: Zhu Yi <yi.zhu@intel.com>
Acked-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp_ipv4.c | 6 ++++--
 net/ipv6/tcp_ipv6.c | 6 ++++--
 2 files changed, 8 insertions(+), 4 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index c3588b4fd979..4baf1943b1bd 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -1682,8 +1682,10 @@ process:
 			if (!tcp_prequeue(sk, skb))
 				ret = tcp_v4_do_rcv(sk, skb);
 		}
-	} else
-		sk_add_backlog(sk, skb);
+	} else if (sk_add_backlog_limited(sk, skb)) {
+		bh_unlock_sock(sk);
+		goto discard_and_relse;
+	}
 	bh_unlock_sock(sk);
 
 	sock_put(sk);
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 6963a6b6763e..c4ea9d5cbfaa 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -1740,8 +1740,10 @@ process:
 			if (!tcp_prequeue(sk, skb))
 				ret = tcp_v6_do_rcv(sk, skb);
 		}
-	} else
-		sk_add_backlog(sk, skb);
+	} else if (sk_add_backlog_limited(sk, skb)) {
+		bh_unlock_sock(sk);
+		goto discard_and_relse;
+	}
 	bh_unlock_sock(sk);
 
 	sock_put(sk);
-- 
cgit v1.2.2


From 55349790d7cbf0d381873a7ece1dcafcffd4aaa9 Mon Sep 17 00:00:00 2001
From: Zhu Yi <yi.zhu@intel.com>
Date: Thu, 4 Mar 2010 18:01:42 +0000
Subject: udp: use limited socket backlog

Make udp adapt to the limited socket backlog change.

Cc: "David S. Miller" <davem@davemloft.net>
Cc: Alexey Kuznetsov <kuznet@ms2.inr.ac.ru>
Cc: "Pekka Savola (ipv6)" <pekkas@netcore.fi>
Cc: Patrick McHardy <kaber@trash.net>
Signed-off-by: Zhu Yi <yi.zhu@intel.com>
Acked-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/udp.c |  6 ++++--
 net/ipv6/udp.c | 28 ++++++++++++++++++----------
 2 files changed, 22 insertions(+), 12 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 608a5446d05b..e7eb47f338d4 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -1371,8 +1371,10 @@ int udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
 	bh_lock_sock(sk);
 	if (!sock_owned_by_user(sk))
 		rc = __udp_queue_rcv_skb(sk, skb);
-	else
-		sk_add_backlog(sk, skb);
+	else if (sk_add_backlog_limited(sk, skb)) {
+		bh_unlock_sock(sk);
+		goto drop;
+	}
 	bh_unlock_sock(sk);
 
 	return rc;
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index 52b8347ae3b2..64804912b093 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -583,16 +583,20 @@ static void flush_stack(struct sock **stack, unsigned int count,
 			bh_lock_sock(sk);
 			if (!sock_owned_by_user(sk))
 				udpv6_queue_rcv_skb(sk, skb1);
-			else
-				sk_add_backlog(sk, skb1);
+			else if (sk_add_backlog_limited(sk, skb1)) {
+				kfree_skb(skb1);
+				bh_unlock_sock(sk);
+				goto drop;
+			}
 			bh_unlock_sock(sk);
-		} else {
-			atomic_inc(&sk->sk_drops);
-			UDP6_INC_STATS_BH(sock_net(sk),
-					UDP_MIB_RCVBUFERRORS, IS_UDPLITE(sk));
-			UDP6_INC_STATS_BH(sock_net(sk),
-					UDP_MIB_INERRORS, IS_UDPLITE(sk));
+			continue;
 		}
+drop:
+		atomic_inc(&sk->sk_drops);
+		UDP6_INC_STATS_BH(sock_net(sk),
+				UDP_MIB_RCVBUFERRORS, IS_UDPLITE(sk));
+		UDP6_INC_STATS_BH(sock_net(sk),
+				UDP_MIB_INERRORS, IS_UDPLITE(sk));
 	}
 }
 /*
@@ -754,8 +758,12 @@ int __udp6_lib_rcv(struct sk_buff *skb, struct udp_table *udptable,
 	bh_lock_sock(sk);
 	if (!sock_owned_by_user(sk))
 		udpv6_queue_rcv_skb(sk, skb);
-	else
-		sk_add_backlog(sk, skb);
+	else if (sk_add_backlog_limited(sk, skb)) {
+		atomic_inc(&sk->sk_drops);
+		bh_unlock_sock(sk);
+		sock_put(sk);
+		goto discard;
+	}
 	bh_unlock_sock(sk);
 	sock_put(sk);
 	return 0;
-- 
cgit v1.2.2


From 79545b681961d7001c1f4c3eb9ffb87bed4485db Mon Sep 17 00:00:00 2001
From: Zhu Yi <yi.zhu@intel.com>
Date: Thu, 4 Mar 2010 18:01:43 +0000
Subject: llc: use limited socket backlog

Make llc adapt to the limited socket backlog change.

Cc: Arnaldo Carvalho de Melo <acme@ghostprotocols.net>
Signed-off-by: Zhu Yi <yi.zhu@intel.com>
Acked-by: Eric Dumazet <eric.dumazet@gmail.com>
Acked-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/llc/llc_conn.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/llc/llc_conn.c b/net/llc/llc_conn.c
index a8dde9b010da..c0539ffdb272 100644
--- a/net/llc/llc_conn.c
+++ b/net/llc/llc_conn.c
@@ -827,7 +827,8 @@ void llc_conn_handler(struct llc_sap *sap, struct sk_buff *skb)
 	else {
 		dprintk("%s: adding to backlog...\n", __func__);
 		llc_set_backlog_type(skb, LLC_PACKET);
-		sk_add_backlog(sk, skb);
+		if (sk_add_backlog_limited(sk, skb))
+			goto drop_unlock;
 	}
 out:
 	bh_unlock_sock(sk);
-- 
cgit v1.2.2


From 50b1a782f845140f4138f14a1ce8a4a6dd0cc82f Mon Sep 17 00:00:00 2001
From: Zhu Yi <yi.zhu@intel.com>
Date: Thu, 4 Mar 2010 18:01:44 +0000
Subject: sctp: use limited socket backlog

Make sctp adapt to the limited socket backlog change.

Cc: Vlad Yasevich <vladislav.yasevich@hp.com>
Cc: Sridhar Samudrala <sri@us.ibm.com>
Signed-off-by: Zhu Yi <yi.zhu@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sctp/input.c  | 42 +++++++++++++++++++++++++++---------------
 net/sctp/socket.c |  3 +++
 2 files changed, 30 insertions(+), 15 deletions(-)

(limited to 'net')

diff --git a/net/sctp/input.c b/net/sctp/input.c
index c0c973e67add..cbc063665e6b 100644
--- a/net/sctp/input.c
+++ b/net/sctp/input.c
@@ -75,7 +75,7 @@ static struct sctp_association *__sctp_lookup_association(
 					const union sctp_addr *peer,
 					struct sctp_transport **pt);
 
-static void sctp_add_backlog(struct sock *sk, struct sk_buff *skb);
+static int sctp_add_backlog(struct sock *sk, struct sk_buff *skb);
 
 
 /* Calculate the SCTP checksum of an SCTP packet.  */
@@ -265,8 +265,13 @@ int sctp_rcv(struct sk_buff *skb)
 	}
 
 	if (sock_owned_by_user(sk)) {
+		if (sctp_add_backlog(sk, skb)) {
+			sctp_bh_unlock_sock(sk);
+			sctp_chunk_free(chunk);
+			skb = NULL; /* sctp_chunk_free already freed the skb */
+			goto discard_release;
+		}
 		SCTP_INC_STATS_BH(SCTP_MIB_IN_PKT_BACKLOG);
-		sctp_add_backlog(sk, skb);
 	} else {
 		SCTP_INC_STATS_BH(SCTP_MIB_IN_PKT_SOFTIRQ);
 		sctp_inq_push(&chunk->rcvr->inqueue, chunk);
@@ -336,8 +341,10 @@ int sctp_backlog_rcv(struct sock *sk, struct sk_buff *skb)
 		sctp_bh_lock_sock(sk);
 
 		if (sock_owned_by_user(sk)) {
-			sk_add_backlog(sk, skb);
-			backloged = 1;
+			if (sk_add_backlog_limited(sk, skb))
+				sctp_chunk_free(chunk);
+			else
+				backloged = 1;
 		} else
 			sctp_inq_push(inqueue, chunk);
 
@@ -362,22 +369,27 @@ done:
 	return 0;
 }
 
-static void sctp_add_backlog(struct sock *sk, struct sk_buff *skb)
+static int sctp_add_backlog(struct sock *sk, struct sk_buff *skb)
 {
 	struct sctp_chunk *chunk = SCTP_INPUT_CB(skb)->chunk;
 	struct sctp_ep_common *rcvr = chunk->rcvr;
+	int ret;
 
-	/* Hold the assoc/ep while hanging on the backlog queue.
-	 * This way, we know structures we need will not disappear from us
-	 */
-	if (SCTP_EP_TYPE_ASSOCIATION == rcvr->type)
-		sctp_association_hold(sctp_assoc(rcvr));
-	else if (SCTP_EP_TYPE_SOCKET == rcvr->type)
-		sctp_endpoint_hold(sctp_ep(rcvr));
-	else
-		BUG();
+	ret = sk_add_backlog_limited(sk, skb);
+	if (!ret) {
+		/* Hold the assoc/ep while hanging on the backlog queue.
+		 * This way, we know structures we need will not disappear
+		 * from us
+		 */
+		if (SCTP_EP_TYPE_ASSOCIATION == rcvr->type)
+			sctp_association_hold(sctp_assoc(rcvr));
+		else if (SCTP_EP_TYPE_SOCKET == rcvr->type)
+			sctp_endpoint_hold(sctp_ep(rcvr));
+		else
+			BUG();
+	}
+	return ret;
 
-	sk_add_backlog(sk, skb);
 }
 
 /* Handle icmp frag needed error. */
diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index f6d1e59c4151..dfc5c127efd4 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -3720,6 +3720,9 @@ SCTP_STATIC int sctp_init_sock(struct sock *sk)
 	SCTP_DBG_OBJCNT_INC(sock);
 	percpu_counter_inc(&sctp_sockets_allocated);
 
+	/* Set socket backlog limit. */
+	sk->sk_backlog.limit = sysctl_sctp_rmem[1];
+
 	local_bh_disable();
 	sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
 	local_bh_enable();
-- 
cgit v1.2.2


From 53eecb1be5ae499d399d2923933937a9ea1a284f Mon Sep 17 00:00:00 2001
From: Zhu Yi <yi.zhu@intel.com>
Date: Thu, 4 Mar 2010 18:01:45 +0000
Subject: tipc: use limited socket backlog

Make tipc adapt to the limited socket backlog change.

Cc: Jon Maloy <jon.maloy@ericsson.com>
Cc: Allan Stephens <allan.stephens@windriver.com>
Signed-off-by: Zhu Yi <yi.zhu@intel.com>
Acked-by: Eric Dumazet <eric.dumazet@gmail.com>
Acked-by: Allan Stephens <allan.stephens@windriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/tipc/socket.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/tipc/socket.c b/net/tipc/socket.c
index 1ea64f09cc45..22bfbc33a8ac 100644
--- a/net/tipc/socket.c
+++ b/net/tipc/socket.c
@@ -1322,8 +1322,10 @@ static u32 dispatch(struct tipc_port *tport, struct sk_buff *buf)
 	if (!sock_owned_by_user(sk)) {
 		res = filter_rcv(sk, buf);
 	} else {
-		sk_add_backlog(sk, buf);
-		res = TIPC_OK;
+		if (sk_add_backlog_limited(sk, buf))
+			res = TIPC_ERR_OVERLOAD;
+		else
+			res = TIPC_OK;
 	}
 	bh_unlock_sock(sk);
 
-- 
cgit v1.2.2


From 2499849ee8f513e795b9f2c19a42d6356e4943a4 Mon Sep 17 00:00:00 2001
From: Zhu Yi <yi.zhu@intel.com>
Date: Thu, 4 Mar 2010 18:01:46 +0000
Subject: x25: use limited socket backlog

Make x25 adapt to the limited socket backlog change.

Cc: Andrew Hendry <andrew.hendry@gmail.com>
Signed-off-by: Zhu Yi <yi.zhu@intel.com>
Acked-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/x25/x25_dev.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/x25/x25_dev.c b/net/x25/x25_dev.c
index 3e1efe534645..a9da0dc26f4f 100644
--- a/net/x25/x25_dev.c
+++ b/net/x25/x25_dev.c
@@ -53,7 +53,7 @@ static int x25_receive_data(struct sk_buff *skb, struct x25_neigh *nb)
 		if (!sock_owned_by_user(sk)) {
 			queued = x25_process_rx_frame(sk, skb);
 		} else {
-			sk_add_backlog(sk, skb);
+			queued = !sk_add_backlog_limited(sk, skb);
 		}
 		bh_unlock_sock(sk);
 		sock_put(sk);
-- 
cgit v1.2.2


From a3a858ff18a72a8d388e31ab0d98f7e944841a62 Mon Sep 17 00:00:00 2001
From: Zhu Yi <yi.zhu@intel.com>
Date: Thu, 4 Mar 2010 18:01:47 +0000
Subject: net: backlog functions rename

sk_add_backlog -> __sk_add_backlog
sk_add_backlog_limited -> sk_add_backlog

Signed-off-by: Zhu Yi <yi.zhu@intel.com>
Acked-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/sock.c          | 2 +-
 net/dccp/minisocks.c     | 2 +-
 net/ipv4/tcp_ipv4.c      | 2 +-
 net/ipv4/tcp_minisocks.c | 2 +-
 net/ipv4/udp.c           | 2 +-
 net/ipv6/tcp_ipv6.c      | 2 +-
 net/ipv6/udp.c           | 4 ++--
 net/llc/llc_c_ac.c       | 2 +-
 net/llc/llc_conn.c       | 2 +-
 net/sctp/input.c         | 4 ++--
 net/tipc/socket.c        | 2 +-
 net/x25/x25_dev.c        | 2 +-
 12 files changed, 14 insertions(+), 14 deletions(-)

(limited to 'net')

diff --git a/net/core/sock.c b/net/core/sock.c
index 6e22dc973d23..61a65a2e0455 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -340,7 +340,7 @@ int sk_receive_skb(struct sock *sk, struct sk_buff *skb, const int nested)
 		rc = sk_backlog_rcv(sk, skb);
 
 		mutex_release(&sk->sk_lock.dep_map, 1, _RET_IP_);
-	} else if (sk_add_backlog_limited(sk, skb)) {
+	} else if (sk_add_backlog(sk, skb)) {
 		bh_unlock_sock(sk);
 		atomic_inc(&sk->sk_drops);
 		goto discard_and_relse;
diff --git a/net/dccp/minisocks.c b/net/dccp/minisocks.c
index af226a063141..0d508c359fa9 100644
--- a/net/dccp/minisocks.c
+++ b/net/dccp/minisocks.c
@@ -254,7 +254,7 @@ int dccp_child_process(struct sock *parent, struct sock *child,
 		 * in main socket hash table and lock on listening
 		 * socket does not protect us more.
 		 */
-		sk_add_backlog(child, skb);
+		__sk_add_backlog(child, skb);
 	}
 
 	bh_unlock_sock(child);
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 4baf1943b1bd..1915f7dc30e6 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -1682,7 +1682,7 @@ process:
 			if (!tcp_prequeue(sk, skb))
 				ret = tcp_v4_do_rcv(sk, skb);
 		}
-	} else if (sk_add_backlog_limited(sk, skb)) {
+	} else if (sk_add_backlog(sk, skb)) {
 		bh_unlock_sock(sk);
 		goto discard_and_relse;
 	}
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index f206ee5dda80..4199bc6915c5 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -728,7 +728,7 @@ int tcp_child_process(struct sock *parent, struct sock *child,
 		 * in main socket hash table and lock on listening
 		 * socket does not protect us more.
 		 */
-		sk_add_backlog(child, skb);
+		__sk_add_backlog(child, skb);
 	}
 
 	bh_unlock_sock(child);
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index e7eb47f338d4..7af756d0f931 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -1371,7 +1371,7 @@ int udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
 	bh_lock_sock(sk);
 	if (!sock_owned_by_user(sk))
 		rc = __udp_queue_rcv_skb(sk, skb);
-	else if (sk_add_backlog_limited(sk, skb)) {
+	else if (sk_add_backlog(sk, skb)) {
 		bh_unlock_sock(sk);
 		goto drop;
 	}
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index c4ea9d5cbfaa..2c378b1bd5cf 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -1740,7 +1740,7 @@ process:
 			if (!tcp_prequeue(sk, skb))
 				ret = tcp_v6_do_rcv(sk, skb);
 		}
-	} else if (sk_add_backlog_limited(sk, skb)) {
+	} else if (sk_add_backlog(sk, skb)) {
 		bh_unlock_sock(sk);
 		goto discard_and_relse;
 	}
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index 64804912b093..3c0c9c755c92 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -583,7 +583,7 @@ static void flush_stack(struct sock **stack, unsigned int count,
 			bh_lock_sock(sk);
 			if (!sock_owned_by_user(sk))
 				udpv6_queue_rcv_skb(sk, skb1);
-			else if (sk_add_backlog_limited(sk, skb1)) {
+			else if (sk_add_backlog(sk, skb1)) {
 				kfree_skb(skb1);
 				bh_unlock_sock(sk);
 				goto drop;
@@ -758,7 +758,7 @@ int __udp6_lib_rcv(struct sk_buff *skb, struct udp_table *udptable,
 	bh_lock_sock(sk);
 	if (!sock_owned_by_user(sk))
 		udpv6_queue_rcv_skb(sk, skb);
-	else if (sk_add_backlog_limited(sk, skb)) {
+	else if (sk_add_backlog(sk, skb)) {
 		atomic_inc(&sk->sk_drops);
 		bh_unlock_sock(sk);
 		sock_put(sk);
diff --git a/net/llc/llc_c_ac.c b/net/llc/llc_c_ac.c
index 019c780512e8..86d6985b9d49 100644
--- a/net/llc/llc_c_ac.c
+++ b/net/llc/llc_c_ac.c
@@ -1437,7 +1437,7 @@ static void llc_process_tmr_ev(struct sock *sk, struct sk_buff *skb)
 			llc_conn_state_process(sk, skb);
 		else {
 			llc_set_backlog_type(skb, LLC_EVENT);
-			sk_add_backlog(sk, skb);
+			__sk_add_backlog(sk, skb);
 		}
 	}
 }
diff --git a/net/llc/llc_conn.c b/net/llc/llc_conn.c
index c0539ffdb272..a12144da7974 100644
--- a/net/llc/llc_conn.c
+++ b/net/llc/llc_conn.c
@@ -827,7 +827,7 @@ void llc_conn_handler(struct llc_sap *sap, struct sk_buff *skb)
 	else {
 		dprintk("%s: adding to backlog...\n", __func__);
 		llc_set_backlog_type(skb, LLC_PACKET);
-		if (sk_add_backlog_limited(sk, skb))
+		if (sk_add_backlog(sk, skb))
 			goto drop_unlock;
 	}
 out:
diff --git a/net/sctp/input.c b/net/sctp/input.c
index cbc063665e6b..3d74b264ea22 100644
--- a/net/sctp/input.c
+++ b/net/sctp/input.c
@@ -341,7 +341,7 @@ int sctp_backlog_rcv(struct sock *sk, struct sk_buff *skb)
 		sctp_bh_lock_sock(sk);
 
 		if (sock_owned_by_user(sk)) {
-			if (sk_add_backlog_limited(sk, skb))
+			if (sk_add_backlog(sk, skb))
 				sctp_chunk_free(chunk);
 			else
 				backloged = 1;
@@ -375,7 +375,7 @@ static int sctp_add_backlog(struct sock *sk, struct sk_buff *skb)
 	struct sctp_ep_common *rcvr = chunk->rcvr;
 	int ret;
 
-	ret = sk_add_backlog_limited(sk, skb);
+	ret = sk_add_backlog(sk, skb);
 	if (!ret) {
 		/* Hold the assoc/ep while hanging on the backlog queue.
 		 * This way, we know structures we need will not disappear
diff --git a/net/tipc/socket.c b/net/tipc/socket.c
index 22bfbc33a8ac..4b235fc1c70f 100644
--- a/net/tipc/socket.c
+++ b/net/tipc/socket.c
@@ -1322,7 +1322,7 @@ static u32 dispatch(struct tipc_port *tport, struct sk_buff *buf)
 	if (!sock_owned_by_user(sk)) {
 		res = filter_rcv(sk, buf);
 	} else {
-		if (sk_add_backlog_limited(sk, buf))
+		if (sk_add_backlog(sk, buf))
 			res = TIPC_ERR_OVERLOAD;
 		else
 			res = TIPC_OK;
diff --git a/net/x25/x25_dev.c b/net/x25/x25_dev.c
index a9da0dc26f4f..52e304212241 100644
--- a/net/x25/x25_dev.c
+++ b/net/x25/x25_dev.c
@@ -53,7 +53,7 @@ static int x25_receive_data(struct sk_buff *skb, struct x25_neigh *nb)
 		if (!sock_owned_by_user(sk)) {
 			queued = x25_process_rx_frame(sk, skb);
 		} else {
-			queued = !sk_add_backlog_limited(sk, skb);
+			queued = !sk_add_backlog(sk, skb);
 		}
 		bh_unlock_sock(sk);
 		sock_put(sk);
-- 
cgit v1.2.2


From 723b2f57ad83ee7087acf9a95e8e289414b1f521 Mon Sep 17 00:00:00 2001
From: Jeff Garzik <jgarzik@redhat.com>
Date: Wed, 3 Mar 2010 22:51:50 +0000
Subject: ethtool: Add direct access to ops->get_sset_count

This patch is an alternative approach for accessing string
counts, vs. the drvinfo indirect approach.  This way the drvinfo
space doesn't run out, and we don't break ABI later.

Signed-off-by: Jeff Garzik <jgarzik@redhat.com>
Signed-off-by: Peter P Waskiewicz Jr <peter.p.waskiewicz.jr@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/ethtool.c | 72 ++++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 72 insertions(+)

(limited to 'net')

diff --git a/net/core/ethtool.c b/net/core/ethtool.c
index 0f2f82185ec4..70075c47ada8 100644
--- a/net/core/ethtool.c
+++ b/net/core/ethtool.c
@@ -214,6 +214,10 @@ static noinline int ethtool_get_drvinfo(struct net_device *dev, void __user *use
 	info.cmd = ETHTOOL_GDRVINFO;
 	ops->get_drvinfo(dev, &info);
 
+	/*
+	 * this method of obtaining string set info is deprecated;
+	 * consider using ETHTOOL_GSSET_INFO instead
+	 */
 	if (ops->get_sset_count) {
 		int rc;
 
@@ -237,6 +241,71 @@ static noinline int ethtool_get_drvinfo(struct net_device *dev, void __user *use
 	return 0;
 }
 
+/*
+ * noinline attribute so that gcc doesnt use too much stack in dev_ethtool()
+ */
+static noinline int ethtool_get_sset_info(struct net_device *dev,
+                                          void __user *useraddr)
+{
+	struct ethtool_sset_info info;
+	const struct ethtool_ops *ops = dev->ethtool_ops;
+	u64 sset_mask;
+	int i, idx = 0, n_bits = 0, ret, rc;
+	u32 *info_buf = NULL;
+
+	if (!ops->get_sset_count)
+		return -EOPNOTSUPP;
+
+	if (copy_from_user(&info, useraddr, sizeof(info)))
+		return -EFAULT;
+
+	/* store copy of mask, because we zero struct later on */
+	sset_mask = info.sset_mask;
+	if (!sset_mask)
+		return 0;
+
+	/* calculate size of return buffer */
+	for (i = 0; i < 64; i++)
+		if (sset_mask & (1ULL << i))
+			n_bits++;
+
+	memset(&info, 0, sizeof(info));
+	info.cmd = ETHTOOL_GSSET_INFO;
+
+	info_buf = kzalloc(n_bits * sizeof(u32), GFP_USER);
+	if (!info_buf)
+		return -ENOMEM;
+
+	/*
+	 * fill return buffer based on input bitmask and successful
+	 * get_sset_count return
+	 */
+	for (i = 0; i < 64; i++) {
+		if (!(sset_mask & (1ULL << i)))
+			continue;
+
+		rc = ops->get_sset_count(dev, i);
+		if (rc >= 0) {
+			info.sset_mask |= (1ULL << i);
+			info_buf[idx++] = rc;
+		}
+	}
+
+	ret = -EFAULT;
+	if (copy_to_user(useraddr, &info, sizeof(info)))
+		goto out;
+
+	useraddr += offsetof(struct ethtool_sset_info, data);
+	if (copy_to_user(useraddr, info_buf, idx * sizeof(u32)))
+		goto out;
+
+	ret = 0;
+
+out:
+	kfree(info_buf);
+	return ret;
+}
+
 /*
  * noinline attribute so that gcc doesnt use too much stack in dev_ethtool()
  */
@@ -1471,6 +1540,9 @@ int dev_ethtool(struct net *net, struct ifreq *ifr)
 	case ETHTOOL_GRXNTUPLE:
 		rc = ethtool_get_rx_ntuple(dev, useraddr);
 		break;
+	case ETHTOOL_GSSET_INFO:
+		rc = ethtool_get_sset_info(dev, useraddr);
+		break;
 	default:
 		rc = -EOPNOTSUPP;
 	}
-- 
cgit v1.2.2


From d17792ebdf90289c9fd1bce888076d3d60ecd53b Mon Sep 17 00:00:00 2001
From: Jeff Garzik <jeff@garzik.org>
Date: Thu, 4 Mar 2010 08:21:53 +0000
Subject: ethtool: Add direct access to ops->get_sset_count

On 03/04/2010 09:26 AM, Ben Hutchings wrote:
> On Thu, 2010-03-04 at 00:51 -0800, Jeff Kirsher wrote:
>> From: Jeff Garzik<jgarzik@redhat.com>
>>
>> This patch is an alternative approach for accessing string
>> counts, vs. the drvinfo indirect approach.  This way the drvinfo
>> space doesn't run out, and we don't break ABI later.
> [...]
>> --- a/net/core/ethtool.c
>> +++ b/net/core/ethtool.c
>> @@ -214,6 +214,10 @@ static noinline int ethtool_get_drvinfo(struct net_device *dev, void __user *use
>>   	info.cmd = ETHTOOL_GDRVINFO;
>>   	ops->get_drvinfo(dev,&info);
>>
>> +	/*
>> +	 * this method of obtaining string set info is deprecated;
>> +	 * consider using ETHTOOL_GSSET_INFO instead
>> +	 */
>
> This comment belongs on the interface (ethtool.h) not the
> implementation.

Debatable -- the current comment is located at the callsite of
ops->get_sset_count(), which is where an implementor might think to add
a new call.  Not all the numeric fields in ethtool_drvinfo are obtained
from ->get_sset_count().

Hence the "some" in the attached patch to include/linux/ethtool.h,
addressing your comment.

> [...]
>> +static noinline int ethtool_get_sset_info(struct net_device *dev,
>> +                                          void __user *useraddr)
>> +{
> [...]
>> +	/* calculate size of return buffer */
>> +	for (i = 0; i<  64; i++)
>> +		if (sset_mask&  (1ULL<<  i))
>> +			n_bits++;
> [...]
>
> We have a function for this:
>
> 	n_bits = hweight64(sset_mask);

Agreed.

I've attached a follow-up patch, which should enable my/Jeff's kernel
patch to be applied, followed by this one.

Signed-off-by: Jeff Garzik <jgarzik@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/ethtool.c | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

(limited to 'net')

diff --git a/net/core/ethtool.c b/net/core/ethtool.c
index 70075c47ada8..33d2ded50f84 100644
--- a/net/core/ethtool.c
+++ b/net/core/ethtool.c
@@ -17,6 +17,7 @@
 #include <linux/errno.h>
 #include <linux/ethtool.h>
 #include <linux/netdevice.h>
+#include <linux/bitops.h>
 #include <asm/uaccess.h>
 
 /*
@@ -216,7 +217,7 @@ static noinline int ethtool_get_drvinfo(struct net_device *dev, void __user *use
 
 	/*
 	 * this method of obtaining string set info is deprecated;
-	 * consider using ETHTOOL_GSSET_INFO instead
+	 * Use ETHTOOL_GSSET_INFO instead.
 	 */
 	if (ops->get_sset_count) {
 		int rc;
@@ -265,9 +266,7 @@ static noinline int ethtool_get_sset_info(struct net_device *dev,
 		return 0;
 
 	/* calculate size of return buffer */
-	for (i = 0; i < 64; i++)
-		if (sset_mask & (1ULL << i))
-			n_bits++;
+	n_bits = hweight64(sset_mask);
 
 	memset(&info, 0, sizeof(info));
 	info.cmd = ETHTOOL_GSSET_INFO;
-- 
cgit v1.2.2


From 72c3368856c543ace033f6a5b9a3edf1f4043236 Mon Sep 17 00:00:00 2001
From: H Hartley Sweeten <hartleys@visionengravers.com>
Date: Fri, 5 Mar 2010 13:42:43 -0800
Subject: nodemask.h: remove macro any_online_node

The macro any_online_node() is prone to producing sparse warnings due to
the local symbol 'node'.  Since all the in-tree users are really
requesting the first online node (the mask argument is either
NODE_MASK_ALL or node_online_map) just use the first_online_node macro and
remove the any_online_node macro since there are no users.

Signed-off-by: H Hartley Sweeten <hsweeten@visionengravers.com>
Acked-by: David Rientjes <rientjes@google.com>
Reviewed-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Mel Gorman <mel@csn.ul.ie>
Cc: Lee Schermerhorn <lee.schermerhorn@hp.com>
Acked-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Dave Hansen <dave@linux.vnet.ibm.com>
Cc: Milton Miller <miltonm@bga.com>
Cc: Nathan Fontenot <nfont@austin.ibm.com>
Cc: Geoff Levand <geoffrey.levand@am.sony.com>
Cc: Grant Likely <grant.likely@secretlab.ca>
Cc: J. Bruce Fields <bfields@fieldses.org>
Cc: Neil Brown <neilb@suse.de>
Cc: Trond Myklebust <Trond.Myklebust@netapp.com>
Cc: David S. Miller <davem@davemloft.net>
Cc: Benny Halevy <bhalevy@panasas.com>
Cc: Chuck Lever <chuck.lever@oracle.com>
Cc: Ricardo Labiaga <Ricardo.Labiaga@netapp.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 net/sunrpc/svc.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c
index 6dcf8c9c784c..8420a4205b76 100644
--- a/net/sunrpc/svc.c
+++ b/net/sunrpc/svc.c
@@ -133,7 +133,7 @@ svc_pool_map_choose_mode(void)
 		return SVC_POOL_PERNODE;
 	}
 
-	node = any_online_node(node_online_map);
+	node = first_online_node;
 	if (nr_cpus_node(node) > 2) {
 		/*
 		 * Non-trivial SMP, or CONFIG_NUMA on
-- 
cgit v1.2.2


From 02a780c014c40973cbe71d04cec7a24e6629995f Mon Sep 17 00:00:00 2001
From: Dan Carpenter <error27@gmail.com>
Date: Sat, 6 Mar 2010 01:14:09 +0000
Subject: bridge: cleanup: remove unneed check

We dereference "port" on the lines immediately before and immediately
after the test so port should hopefully never be null here.

Signed-off-by: Dan Carpenter <error27@gmail.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/bridge/br_multicast.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c
index 2559fb539836..8b258872eba8 100644
--- a/net/bridge/br_multicast.c
+++ b/net/bridge/br_multicast.c
@@ -627,8 +627,8 @@ static void br_multicast_port_query_expired(unsigned long data)
 	struct net_bridge *br = port->br;
 
 	spin_lock(&br->multicast_lock);
-	if (port && (port->state == BR_STATE_DISABLED ||
-		     port->state == BR_STATE_BLOCKING))
+	if (port->state == BR_STATE_DISABLED ||
+	    port->state == BR_STATE_BLOCKING)
 		goto out;
 
 	if (port->multicast_startup_queries_sent <
-- 
cgit v1.2.2


From 72150e9b7fec217fbd646a29ea2f65a3d4d55ea9 Mon Sep 17 00:00:00 2001
From: Dan Carpenter <error27@gmail.com>
Date: Sat, 6 Mar 2010 01:04:45 +0000
Subject: sock.c: potential null dereference

We test that "prot->rsk_prot" is non-null right before we dereference it
on this line.

Signed-off-by: Dan Carpenter <error27@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/sock.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/core/sock.c b/net/core/sock.c
index 61a65a2e0455..c5812bbc2cc9 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -2288,7 +2288,8 @@ out_free_request_sock_slab:
 		prot->rsk_prot->slab = NULL;
 	}
 out_free_request_sock_slab_name:
-	kfree(prot->rsk_prot->slab_name);
+	if (prot->rsk_prot)
+		kfree(prot->rsk_prot->slab_name);
 out_free_sock_slab:
 	kmem_cache_destroy(prot->slab);
 	prot->slab = NULL;
-- 
cgit v1.2.2


From 0c9a2ac1f8a2e55b3382dfc27256878a58ea49e9 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?YOSHIFUJI=20Hideaki=20/=20=E5=90=89=E8=97=A4=E8=8B=B1?=
 =?UTF-8?q?=E6=98=8E?= <yoshfuji@linux-ipv6.org>
Date: Sun, 7 Mar 2010 00:14:44 +0000
Subject: ipv6: Optmize translation between IPV6_PREFER_SRC_xxx and
 RT6_LOOKUP_F_xxx.

IPV6_PREFER_SRC_xxx definitions:
| #define IPV6_PREFER_SRC_TMP             0x0001
| #define IPV6_PREFER_SRC_PUBLIC          0x0002
| #define IPV6_PREFER_SRC_COA             0x0004

RT6_LOOKUP_F_xxx definitions:
| #define RT6_LOOKUP_F_SRCPREF_TMP        0x00000008
| #define RT6_LOOKUP_F_SRCPREF_PUBLIC     0x00000010
| #define RT6_LOOKUP_F_SRCPREF_COA        0x00000020

So, we can translate between these two groups by shift operation
instead of multiple 'if's.

Signed-off-by: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/fib6_rules.c | 11 ++---------
 net/ipv6/route.c      | 11 ++---------
 2 files changed, 4 insertions(+), 18 deletions(-)

(limited to 'net')

diff --git a/net/ipv6/fib6_rules.c b/net/ipv6/fib6_rules.c
index 551882b9dfd6..5e463c43fcc2 100644
--- a/net/ipv6/fib6_rules.c
+++ b/net/ipv6/fib6_rules.c
@@ -84,18 +84,11 @@ static int fib6_rule_action(struct fib_rule *rule, struct flowi *flp,
 		if ((rule->flags & FIB_RULE_FIND_SADDR) &&
 		    r->src.plen && !(flags & RT6_LOOKUP_F_HAS_SADDR)) {
 			struct in6_addr saddr;
-			unsigned int srcprefs = 0;
-
-			if (flags & RT6_LOOKUP_F_SRCPREF_TMP)
-				srcprefs |= IPV6_PREFER_SRC_TMP;
-			if (flags & RT6_LOOKUP_F_SRCPREF_PUBLIC)
-				srcprefs |= IPV6_PREFER_SRC_PUBLIC;
-			if (flags & RT6_LOOKUP_F_SRCPREF_COA)
-				srcprefs |= IPV6_PREFER_SRC_COA;
 
 			if (ipv6_dev_get_saddr(net,
 					       ip6_dst_idev(&rt->u.dst)->dev,
-					       &flp->fl6_dst, srcprefs,
+					       &flp->fl6_dst,
+					       rt6_flags2srcprefs(flags),
 					       &saddr))
 				goto again;
 			if (!ipv6_prefix_equal(&saddr, &r->src.addr,
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index b08879e97f22..52cd3eff31dc 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -819,15 +819,8 @@ struct dst_entry * ip6_route_output(struct net *net, struct sock *sk,
 
 	if (!ipv6_addr_any(&fl->fl6_src))
 		flags |= RT6_LOOKUP_F_HAS_SADDR;
-	else if (sk) {
-		unsigned int prefs = inet6_sk(sk)->srcprefs;
-		if (prefs & IPV6_PREFER_SRC_TMP)
-			flags |= RT6_LOOKUP_F_SRCPREF_TMP;
-		if (prefs & IPV6_PREFER_SRC_PUBLIC)
-			flags |= RT6_LOOKUP_F_SRCPREF_PUBLIC;
-		if (prefs & IPV6_PREFER_SRC_COA)
-			flags |= RT6_LOOKUP_F_SRCPREF_COA;
-	}
+	else if (sk)
+		flags |= rt6_srcprefs2flags(inet6_sk(sk)->srcprefs);
 
 	return fib6_rule_lookup(net, fl, flags, ip6_pol_route_output);
 }
-- 
cgit v1.2.2


From 49f5fcfd4ac3df24aa66520e1c5f37db5dfa8c10 Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Fri, 5 Mar 2010 21:07:39 +0000
Subject: bridge: Use RCU list primitive in __br_mdb_ip_get

As Paul McKenney correctly pointed out, __br_mdb_ip_get needs
to use the RCU list walking primitive in order to work correctly
on platforms where data-dependency ordering is not guaranteed.

Reported-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/bridge/br_multicast.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c
index 8b258872eba8..a1ffe1582c9a 100644
--- a/net/bridge/br_multicast.c
+++ b/net/bridge/br_multicast.c
@@ -38,7 +38,7 @@ static struct net_bridge_mdb_entry *__br_mdb_ip_get(
 	struct net_bridge_mdb_entry *mp;
 	struct hlist_node *p;
 
-	hlist_for_each_entry(mp, p, &mdb->mhash[hash], hlist[mdb->ver]) {
+	hlist_for_each_entry_rcu(mp, p, &mdb->mhash[hash], hlist[mdb->ver]) {
 		if (dst == mp->addr)
 			return mp;
 	}
-- 
cgit v1.2.2


From 10cc2b50eb4b01ca4dc014af2094d28b4ebe20d7 Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Fri, 5 Mar 2010 21:03:35 +0000
Subject: bridge: Fix RCU race in br_multicast_stop

Thanks to Paul McKenny for pointing out that it is incorrect to use
synchronize_rcu_bh to ensure that pending callbacks have completed.
Instead we should use rcu_barrier_bh.

Reported-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/bridge/br_multicast.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c
index a1ffe1582c9a..12ce1eaa4f3e 100644
--- a/net/bridge/br_multicast.c
+++ b/net/bridge/br_multicast.c
@@ -1135,7 +1135,7 @@ void br_multicast_stop(struct net_bridge *br)
 
 	if (mdb->old) {
 		spin_unlock_bh(&br->multicast_lock);
-		synchronize_rcu_bh();
+		rcu_barrier_bh();
 		spin_lock_bh(&br->multicast_lock);
 		WARN_ON(mdb->old);
 	}
-- 
cgit v1.2.2


From 28812fe11a21826ba4c97c6c7971a619987cd912 Mon Sep 17 00:00:00 2001
From: Andi Kleen <andi@firstfloor.org>
Date: Tue, 5 Jan 2010 12:48:07 +0100
Subject: driver-core: Add attribute argument to class_attribute show/store

Passing the attribute to the low level IO functions allows all kinds
of cleanups, by sharing low level IO code without requiring
an own function for every piece of data.

Also drivers can extend the attributes with own data fields
and use that in the low level function.

This makes the class attributes the same as sysdev_class attributes
and plain attributes.

This will allow further cleanups in drivers.

Full tree sweep converting all users.

Signed-off-by: Andi Kleen <ak@linux.intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 net/bluetooth/l2cap.c       | 4 +++-
 net/bluetooth/rfcomm/core.c | 4 +++-
 net/bluetooth/rfcomm/sock.c | 4 +++-
 net/bluetooth/sco.c         | 4 +++-
 4 files changed, 12 insertions(+), 4 deletions(-)

(limited to 'net')

diff --git a/net/bluetooth/l2cap.c b/net/bluetooth/l2cap.c
index 400efa26ddba..4db7ae2fe07d 100644
--- a/net/bluetooth/l2cap.c
+++ b/net/bluetooth/l2cap.c
@@ -3937,7 +3937,9 @@ drop:
 	return 0;
 }
 
-static ssize_t l2cap_sysfs_show(struct class *dev, char *buf)
+static ssize_t l2cap_sysfs_show(struct class *dev,
+				struct class_attribute *attr,
+				char *buf)
 {
 	struct sock *sk;
 	struct hlist_node *node;
diff --git a/net/bluetooth/rfcomm/core.c b/net/bluetooth/rfcomm/core.c
index 89f4a59eb82b..db8a68e1a5ba 100644
--- a/net/bluetooth/rfcomm/core.c
+++ b/net/bluetooth/rfcomm/core.c
@@ -2098,7 +2098,9 @@ static struct hci_cb rfcomm_cb = {
 	.security_cfm	= rfcomm_security_cfm
 };
 
-static ssize_t rfcomm_dlc_sysfs_show(struct class *dev, char *buf)
+static ssize_t rfcomm_dlc_sysfs_show(struct class *dev,
+				     struct class_attribute *attr,
+				     char *buf)
 {
 	struct rfcomm_session *s;
 	struct list_head *pp, *p;
diff --git a/net/bluetooth/rfcomm/sock.c b/net/bluetooth/rfcomm/sock.c
index 4b5968dda673..ca87d6ac6a20 100644
--- a/net/bluetooth/rfcomm/sock.c
+++ b/net/bluetooth/rfcomm/sock.c
@@ -1061,7 +1061,9 @@ done:
 	return result;
 }
 
-static ssize_t rfcomm_sock_sysfs_show(struct class *dev, char *buf)
+static ssize_t rfcomm_sock_sysfs_show(struct class *dev,
+				      struct class_attribute *attr,
+				      char *buf)
 {
 	struct sock *sk;
 	struct hlist_node *node;
diff --git a/net/bluetooth/sco.c b/net/bluetooth/sco.c
index dd8f6ec57dce..f93b939539bc 100644
--- a/net/bluetooth/sco.c
+++ b/net/bluetooth/sco.c
@@ -953,7 +953,9 @@ drop:
 	return 0;
 }
 
-static ssize_t sco_sysfs_show(struct class *dev, char *buf)
+static ssize_t sco_sysfs_show(struct class *dev,
+				struct class_attribute *attr,
+				char *buf)
 {
 	struct sock *sk;
 	struct hlist_node *node;
-- 
cgit v1.2.2


From 52cf25d0ab7f78eeecc59ac652ed5090f69b619e Mon Sep 17 00:00:00 2001
From: Emese Revfy <re.emese@gmail.com>
Date: Tue, 19 Jan 2010 02:58:23 +0100
Subject: Driver core: Constify struct sysfs_ops in struct kobj_type

Constify struct sysfs_ops.

This is part of the ops structure constification
effort started by Arjan van de Ven et al.

Benefits of this constification:

 * prevents modification of data that is shared
   (referenced) by many other structure instances
   at runtime

 * detects/prevents accidental (but not intentional)
   modification attempts on archs that enforce
   read-only kernel data at runtime

 * potentially better optimized code as the compiler
   can assume that the const data cannot be changed

 * the compiler/linker move const data into .rodata
   and therefore exclude them from false sharing

Signed-off-by: Emese Revfy <re.emese@gmail.com>
Acked-by: David Teigland <teigland@redhat.com>
Acked-by: Matt Domsch <Matt_Domsch@dell.com>
Acked-by: Maciej Sosnowski <maciej.sosnowski@intel.com>
Acked-by: Hans J. Koch <hjk@linutronix.de>
Acked-by: Pekka Enberg <penberg@cs.helsinki.fi>
Acked-by: Jens Axboe <jens.axboe@oracle.com>
Acked-by: Stephen Hemminger <shemminger@vyatta.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 net/bridge/br_private.h  | 2 +-
 net/bridge/br_sysfs_if.c | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h
index 1cf2cef78584..fef0384e3c0b 100644
--- a/net/bridge/br_private.h
+++ b/net/bridge/br_private.h
@@ -423,7 +423,7 @@ extern void br_ifinfo_notify(int event, struct net_bridge_port *port);
 
 #ifdef CONFIG_SYSFS
 /* br_sysfs_if.c */
-extern struct sysfs_ops brport_sysfs_ops;
+extern const struct sysfs_ops brport_sysfs_ops;
 extern int br_sysfs_addif(struct net_bridge_port *p);
 
 /* br_sysfs_br.c */
diff --git a/net/bridge/br_sysfs_if.c b/net/bridge/br_sysfs_if.c
index 696596cd3384..0b9916489d6b 100644
--- a/net/bridge/br_sysfs_if.c
+++ b/net/bridge/br_sysfs_if.c
@@ -238,7 +238,7 @@ static ssize_t brport_store(struct kobject * kobj,
 	return ret;
 }
 
-struct sysfs_ops brport_sysfs_ops = {
+const struct sysfs_ops brport_sysfs_ops = {
 	.show = brport_show,
 	.store = brport_store,
 };
-- 
cgit v1.2.2


From 0898f99a267f89a7dc72cc687955f17613a711b8 Mon Sep 17 00:00:00 2001
From: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
Date: Mon, 8 Mar 2010 13:15:59 +0100
Subject: netfilter: ebt_ip6: Use ipv6_masked_addr_cmp()

Signed-off-by: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
Signed-off-by: Bart De Schuymer <bdschuym@pandora.be>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 net/bridge/netfilter/ebt_ip6.c | 18 ++++--------------
 1 file changed, 4 insertions(+), 14 deletions(-)

(limited to 'net')

diff --git a/net/bridge/netfilter/ebt_ip6.c b/net/bridge/netfilter/ebt_ip6.c
index bbf2534ef026..4644cc9c0579 100644
--- a/net/bridge/netfilter/ebt_ip6.c
+++ b/net/bridge/netfilter/ebt_ip6.c
@@ -35,8 +35,6 @@ ebt_ip6_mt(const struct sk_buff *skb, const struct xt_match_param *par)
 	struct ipv6hdr _ip6h;
 	const struct tcpudphdr *pptr;
 	struct tcpudphdr _ports;
-	struct in6_addr tmp_addr;
-	int i;
 
 	ih6 = skb_header_pointer(skb, 0, sizeof(_ip6h), &_ip6h);
 	if (ih6 == NULL)
@@ -44,18 +42,10 @@ ebt_ip6_mt(const struct sk_buff *skb, const struct xt_match_param *par)
 	if (info->bitmask & EBT_IP6_TCLASS &&
 	   FWINV(info->tclass != ipv6_get_dsfield(ih6), EBT_IP6_TCLASS))
 		return false;
-	for (i = 0; i < 4; i++)
-		tmp_addr.in6_u.u6_addr32[i] = ih6->saddr.in6_u.u6_addr32[i] &
-			info->smsk.in6_u.u6_addr32[i];
-	if (info->bitmask & EBT_IP6_SOURCE &&
-		FWINV((ipv6_addr_cmp(&tmp_addr, &info->saddr) != 0),
-			EBT_IP6_SOURCE))
-		return false;
-	for (i = 0; i < 4; i++)
-		tmp_addr.in6_u.u6_addr32[i] = ih6->daddr.in6_u.u6_addr32[i] &
-			info->dmsk.in6_u.u6_addr32[i];
-	if (info->bitmask & EBT_IP6_DEST &&
-	   FWINV((ipv6_addr_cmp(&tmp_addr, &info->daddr) != 0), EBT_IP6_DEST))
+	if (FWINV(ipv6_masked_addr_cmp(&ih6->saddr, &info->smsk,
+				       &info->saddr), EBT_IP6_SOURCE) ||
+	    FWINV(ipv6_masked_addr_cmp(&ih6->daddr, &info->dmsk,
+				       &info->daddr), EBT_IP6_DEST))
 		return false;
 	if (info->bitmask & EBT_IP6_PROTO) {
 		uint8_t nexthdr = ih6->nexthdr;
-- 
cgit v1.2.2


From 6cce09f87a04797fae5b947ef2626c14a78f0b49 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Sun, 7 Mar 2010 23:21:57 +0000
Subject: tcp: Add SNMP counters for backlog and min_ttl drops

Commit 6b03a53a (tcp: use limited socket backlog) added the possibility
of dropping frames when backlog queue is full.

Commit d218d111 (tcp: Generalized TTL Security Mechanism) added the
possibility of dropping frames when TTL is under a given limit.

This patch adds new SNMP MIB entries, named TCPBacklogDrop and
TCPMinTTLDrop, published in /proc/net/netstat in TcpExt: line

netstat -s | egrep "TCPBacklogDrop|TCPMinTTLDrop"
    TCPBacklogDrop: 0
    TCPMinTTLDrop: 0

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/proc.c     | 2 ++
 net/ipv4/tcp_ipv4.c | 7 +++++--
 net/ipv6/tcp_ipv6.c | 3 ++-
 3 files changed, 9 insertions(+), 3 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c
index 242ed2307370..4f1f337f4337 100644
--- a/net/ipv4/proc.c
+++ b/net/ipv4/proc.c
@@ -249,6 +249,8 @@ static const struct snmp_mib snmp4_net_list[] = {
 	SNMP_MIB_ITEM("TCPSackShifted", LINUX_MIB_SACKSHIFTED),
 	SNMP_MIB_ITEM("TCPSackMerged", LINUX_MIB_SACKMERGED),
 	SNMP_MIB_ITEM("TCPSackShiftFallback", LINUX_MIB_SACKSHIFTFALLBACK),
+	SNMP_MIB_ITEM("TCPBacklogDrop", LINUX_MIB_TCPBACKLOGDROP),
+	SNMP_MIB_ITEM("TCPMinTTLDrop", LINUX_MIB_TCPMINTTLDROP),
 	SNMP_MIB_SENTINEL
 };
 
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 1915f7dc30e6..8d51d39ad1bb 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -1651,8 +1651,10 @@ int tcp_v4_rcv(struct sk_buff *skb)
 	if (!sk)
 		goto no_tcp_socket;
 
-	if (iph->ttl < inet_sk(sk)->min_ttl)
+	if (unlikely(iph->ttl < inet_sk(sk)->min_ttl)) {
+		NET_INC_STATS_BH(net, LINUX_MIB_TCPMINTTLDROP);
 		goto discard_and_relse;
+	}
 
 process:
 	if (sk->sk_state == TCP_TIME_WAIT)
@@ -1682,8 +1684,9 @@ process:
 			if (!tcp_prequeue(sk, skb))
 				ret = tcp_v4_do_rcv(sk, skb);
 		}
-	} else if (sk_add_backlog(sk, skb)) {
+	} else if (unlikely(sk_add_backlog(sk, skb))) {
 		bh_unlock_sock(sk);
+		NET_INC_STATS_BH(net, LINUX_MIB_TCPBACKLOGDROP);
 		goto discard_and_relse;
 	}
 	bh_unlock_sock(sk);
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 2c378b1bd5cf..9b6dbba80d31 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -1740,8 +1740,9 @@ process:
 			if (!tcp_prequeue(sk, skb))
 				ret = tcp_v6_do_rcv(sk, skb);
 		}
-	} else if (sk_add_backlog(sk, skb)) {
+	} else if (unlikely(sk_add_backlog(sk, skb))) {
 		bh_unlock_sock(sk);
+		NET_INC_STATS_BH(net, LINUX_MIB_TCPBACKLOGDROP);
 		goto discard_and_relse;
 	}
 	bh_unlock_sock(sk);
-- 
cgit v1.2.2


From 9837638727488922727b0cfd438039fa73364183 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Mon, 8 Mar 2010 03:20:00 +0000
Subject: net: fix route cache rebuilds
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

We added an automatic route cache rebuilding in commit 1080d709fb9d8cd43
but had to correct few bugs. One of the assumption of original patch,
was that entries where kept sorted in a given way.

This assumption is known to be wrong (commit 1ddbcb005c395518 gave an
explanation of this and corrected a leak) and expensive to respect.

Paweł Staszewski reported to me one of his machine got its routing cache
disabled after few messages like :

[ 2677.850065] Route hash chain too long!
[ 2677.850080] Adjust your secret_interval!
[82839.662993] Route hash chain too long!
[82839.662996] Adjust your secret_interval!
[155843.731650] Route hash chain too long!
[155843.731664] Adjust your secret_interval!
[155843.811881] Route hash chain too long!
[155843.811891] Adjust your secret_interval!
[155843.858209] vlan0811: 5 rebuilds is over limit, route caching
disabled
[155843.858212] Route hash chain too long!
[155843.858213] Adjust your secret_interval!

This is because rt_intern_hash() might be fooled when computing a chain
length, because multiple entries with same keys can differ because of
TOS (or mark/oif) bits.

In the rare case the fast algorithm see a too long chain, and before
taking expensive path, we call a helper function in order to not count
duplicates of same routes, that only differ with tos/mark/oif bits. This
helper works with data already in cpu cache and is not be very
expensive, despite its O(N^2) implementation.

Paweł Staszewski sucessfully tested this patch on his loaded router.

Reported-and-tested-by: Paweł Staszewski <pstaszewski@itcare.pl>
Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Acked-by: Neil Horman <nhorman@tuxdriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/route.c | 50 ++++++++++++++++++++++++++++++++++++++------------
 1 file changed, 38 insertions(+), 12 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index b2ba5581d2ae..d9b40248b97f 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -146,7 +146,6 @@ static struct dst_entry *ipv4_negative_advice(struct dst_entry *dst);
 static void		 ipv4_link_failure(struct sk_buff *skb);
 static void		 ip_rt_update_pmtu(struct dst_entry *dst, u32 mtu);
 static int rt_garbage_collect(struct dst_ops *ops);
-static void rt_emergency_hash_rebuild(struct net *net);
 
 
 static struct dst_ops ipv4_dst_ops = {
@@ -780,11 +779,30 @@ static void rt_do_flush(int process_context)
 #define FRACT_BITS 3
 #define ONE (1UL << FRACT_BITS)
 
+/*
+ * Given a hash chain and an item in this hash chain,
+ * find if a previous entry has the same hash_inputs
+ * (but differs on tos, mark or oif)
+ * Returns 0 if an alias is found.
+ * Returns ONE if rth has no alias before itself.
+ */
+static int has_noalias(const struct rtable *head, const struct rtable *rth)
+{
+	const struct rtable *aux = head;
+
+	while (aux != rth) {
+		if (compare_hash_inputs(&aux->fl, &rth->fl))
+			return 0;
+		aux = aux->u.dst.rt_next;
+	}
+	return ONE;
+}
+
 static void rt_check_expire(void)
 {
 	static unsigned int rover;
 	unsigned int i = rover, goal;
-	struct rtable *rth, *aux, **rthp;
+	struct rtable *rth, **rthp;
 	unsigned long samples = 0;
 	unsigned long sum = 0, sum2 = 0;
 	unsigned long delta;
@@ -835,15 +853,7 @@ nofree:
 					 * attributes don't unfairly skew
 					 * the length computation
 					 */
-					for (aux = rt_hash_table[i].chain;;) {
-						if (aux == rth) {
-							length += ONE;
-							break;
-						}
-						if (compare_hash_inputs(&aux->fl, &rth->fl))
-							break;
-						aux = aux->u.dst.rt_next;
-					}
+					length += has_noalias(rt_hash_table[i].chain, rth);
 					continue;
 				}
 			} else if (!rt_may_expire(rth, tmo, ip_rt_gc_timeout))
@@ -1073,6 +1083,21 @@ work_done:
 out:	return 0;
 }
 
+/*
+ * Returns number of entries in a hash chain that have different hash_inputs
+ */
+static int slow_chain_length(const struct rtable *head)
+{
+	int length = 0;
+	const struct rtable *rth = head;
+
+	while (rth) {
+		length += has_noalias(head, rth);
+		rth = rth->u.dst.rt_next;
+	}
+	return length >> FRACT_BITS;
+}
+
 static int rt_intern_hash(unsigned hash, struct rtable *rt,
 			  struct rtable **rp, struct sk_buff *skb)
 {
@@ -1185,7 +1210,8 @@ restart:
 			rt_free(cand);
 		}
 	} else {
-		if (chain_length > rt_chain_length_max) {
+		if (chain_length > rt_chain_length_max &&
+		    slow_chain_length(rt_hash_table[hash].chain) > rt_chain_length_max) {
 			struct net *net = dev_net(rt->u.dst.dev);
 			int num = ++net->ipv4.current_rt_cache_rebuild_count;
 			if (!rt_caching(dev_net(rt->u.dst.dev))) {
-- 
cgit v1.2.2


From 5fe46e9d733f19a880ef7e516002bd4c2b833e14 Mon Sep 17 00:00:00 2001
From: Bian Naimeng <biannm@cn.fujitsu.com>
Date: Mon, 8 Mar 2010 14:49:01 +0800
Subject: rpc client can not deal with ENOSOCK, so translate it into ENOCONN

If NFSv4 client send a request before connect, or the old connection was broken
because a ETIMEOUT error catched by call_status, ->send_request will return
ENOSOCK, but rpc layer can not deal with it, so make sure ->send_request can
translate ENOSOCK into ENOCONN.

Signed-off-by: Bian Naimeng <biannm@cn.fujitsu.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 net/sunrpc/xprtsock.c | 8 ++------
 1 file changed, 2 insertions(+), 6 deletions(-)

(limited to 'net')

diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
index 712412982cee..abb8a589555f 100644
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c
@@ -549,8 +549,6 @@ static int xs_udp_send_request(struct rpc_task *task)
 		/* Still some bytes left; set up for a retry later. */
 		status = -EAGAIN;
 	}
-	if (!transport->sock)
-		goto out;
 
 	switch (status) {
 	case -ENOTSOCK:
@@ -570,7 +568,7 @@ static int xs_udp_send_request(struct rpc_task *task)
 		 * prompts ECONNREFUSED. */
 		clear_bit(SOCK_ASYNC_NOSPACE, &transport->sock->flags);
 	}
-out:
+
 	return status;
 }
 
@@ -652,8 +650,6 @@ static int xs_tcp_send_request(struct rpc_task *task)
 		status = -EAGAIN;
 		break;
 	}
-	if (!transport->sock)
-		goto out;
 
 	switch (status) {
 	case -ENOTSOCK:
@@ -673,7 +669,7 @@ static int xs_tcp_send_request(struct rpc_task *task)
 	case -ENOTCONN:
 		clear_bit(SOCK_ASYNC_NOSPACE, &transport->sock->flags);
 	}
-out:
+
 	return status;
 }
 
-- 
cgit v1.2.2


From 28b2774a0d5852236dab77a4147b8b88548110f1 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Mon, 8 Mar 2010 11:32:01 -0800
Subject: tcp: Fix tcp_make_synack()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Commit 4957faad (TCPCT part 1g: Responder Cookie => Initiator), part
of TCP_COOKIE_TRANSACTION implementation, forgot to correctly size
synack skb in case user data must be included.

Many thanks to Mika Pentillä for spotting this error.

Reported-by: Penttillä Mika <mika.penttila@ixonos.com>
Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp_output.c | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 4a1605d3f909..f181b78f2385 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -2395,13 +2395,17 @@ struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst,
 	struct tcp_extend_values *xvp = tcp_xv(rvp);
 	struct inet_request_sock *ireq = inet_rsk(req);
 	struct tcp_sock *tp = tcp_sk(sk);
+	const struct tcp_cookie_values *cvp = tp->cookie_values;
 	struct tcphdr *th;
 	struct sk_buff *skb;
 	struct tcp_md5sig_key *md5;
 	int tcp_header_size;
 	int mss;
+	int s_data_desired = 0;
 
-	skb = sock_wmalloc(sk, MAX_TCP_HEADER + 15, 1, GFP_ATOMIC);
+	if (cvp != NULL && cvp->s_data_constant && cvp->s_data_desired)
+		s_data_desired = cvp->s_data_desired;
+	skb = sock_wmalloc(sk, MAX_TCP_HEADER + 15 + s_data_desired, 1, GFP_ATOMIC);
 	if (skb == NULL)
 		return NULL;
 
@@ -2457,16 +2461,12 @@ struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst,
 			     TCPCB_FLAG_SYN | TCPCB_FLAG_ACK);
 
 	if (OPTION_COOKIE_EXTENSION & opts.options) {
-		const struct tcp_cookie_values *cvp = tp->cookie_values;
-
-		if (cvp != NULL &&
-		    cvp->s_data_constant &&
-		    cvp->s_data_desired > 0) {
-			u8 *buf = skb_put(skb, cvp->s_data_desired);
+		if (s_data_desired) {
+			u8 *buf = skb_put(skb, s_data_desired);
 
 			/* copy data directly from the listening socket. */
-			memcpy(buf, cvp->s_data_payload, cvp->s_data_desired);
-			TCP_SKB_CB(skb)->end_seq += cvp->s_data_desired;
+			memcpy(buf, cvp->s_data_payload, s_data_desired);
+			TCP_SKB_CB(skb)->end_seq += s_data_desired;
 		}
 
 		if (opts.hash_size > 0) {
-- 
cgit v1.2.2


From fc0b579168cbe737c83c6b9bbfe265d3ae6baca6 Mon Sep 17 00:00:00 2001
From: Joe Perches <joe@perches.com>
Date: Mon, 8 Mar 2010 12:15:28 -0800
Subject: net/sunrpc: Remove uses of NIPQUAD, use %pI4

Originally submitted Jan 1, 2010
http://patchwork.kernel.org/patch/71221/

Convert NIPQUAD to the %pI4 format extension where possible
Convert %02x%02x%02x%02x/NIPQUAD to %08x/ntohl

Signed-off-by: Joe Perches <joe@perches.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sunrpc/xprtrdma/transport.c | 3 +--
 net/sunrpc/xprtsock.c           | 5 ++---
 2 files changed, 3 insertions(+), 5 deletions(-)

(limited to 'net')

diff --git a/net/sunrpc/xprtrdma/transport.c b/net/sunrpc/xprtrdma/transport.c
index 7018eef1dcdd..83d339fd66d5 100644
--- a/net/sunrpc/xprtrdma/transport.c
+++ b/net/sunrpc/xprtrdma/transport.c
@@ -165,8 +165,7 @@ xprt_rdma_format_addresses(struct rpc_xprt *xprt)
 
 	xprt->address_strings[RPC_DISPLAY_PROTO] = "rdma";
 
-	(void)snprintf(buf, sizeof(buf), "%02x%02x%02x%02x",
-				NIPQUAD(sin->sin_addr.s_addr));
+	(void)snprintf(buf, sizeof(buf), "%08x", ntohl(sin->sin_addr.s_addr));
 	xprt->address_strings[RPC_DISPLAY_HEX_ADDR] = kstrdup(buf, GFP_KERNEL);
 
 	(void)snprintf(buf, sizeof(buf), "%4hx", rpc_get_port(sap));
diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
index 3d739e5d15d8..86234bcf2e89 100644
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c
@@ -297,12 +297,11 @@ static void xs_format_common_peer_addresses(struct rpc_xprt *xprt)
 	switch (sap->sa_family) {
 	case AF_INET:
 		sin = xs_addr_in(xprt);
-		(void)snprintf(buf, sizeof(buf), "%02x%02x%02x%02x",
-					NIPQUAD(sin->sin_addr.s_addr));
+		snprintf(buf, sizeof(buf), "%08x", ntohl(sin->sin_addr.s_addr));
 		break;
 	case AF_INET6:
 		sin6 = xs_addr_in6(xprt);
-		(void)snprintf(buf, sizeof(buf), "%pi6", &sin6->sin6_addr);
+		snprintf(buf, sizeof(buf), "%pi6", &sin6->sin6_addr);
 		break;
 	default:
 		BUG();
-- 
cgit v1.2.2


From 81160e66cca3d3a16b7d88e0e2dccfc5c76f36f9 Mon Sep 17 00:00:00 2001
From: Joe Perches <joe@perches.com>
Date: Mon, 8 Mar 2010 12:15:59 -0800
Subject: net/sunrpc: Convert (void)snprintf to snprintf

(Applies on top of "Remove uses of NIPQUAD, use %pI4")

Casts to void of snprintf are most uncommon in kernel source.
9 use casts, 1301 do not.

Remove the remaining uses in net/sunrpc/

Signed-off-by: Joe Perches <joe@perches.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sunrpc/xprtrdma/transport.c | 6 +++---
 net/sunrpc/xprtsock.c           | 4 ++--
 2 files changed, 5 insertions(+), 5 deletions(-)

(limited to 'net')

diff --git a/net/sunrpc/xprtrdma/transport.c b/net/sunrpc/xprtrdma/transport.c
index 83d339fd66d5..f96c2fe6137b 100644
--- a/net/sunrpc/xprtrdma/transport.c
+++ b/net/sunrpc/xprtrdma/transport.c
@@ -160,15 +160,15 @@ xprt_rdma_format_addresses(struct rpc_xprt *xprt)
 	(void)rpc_ntop(sap, buf, sizeof(buf));
 	xprt->address_strings[RPC_DISPLAY_ADDR] = kstrdup(buf, GFP_KERNEL);
 
-	(void)snprintf(buf, sizeof(buf), "%u", rpc_get_port(sap));
+	snprintf(buf, sizeof(buf), "%u", rpc_get_port(sap));
 	xprt->address_strings[RPC_DISPLAY_PORT] = kstrdup(buf, GFP_KERNEL);
 
 	xprt->address_strings[RPC_DISPLAY_PROTO] = "rdma";
 
-	(void)snprintf(buf, sizeof(buf), "%08x", ntohl(sin->sin_addr.s_addr));
+	snprintf(buf, sizeof(buf), "%08x", ntohl(sin->sin_addr.s_addr));
 	xprt->address_strings[RPC_DISPLAY_HEX_ADDR] = kstrdup(buf, GFP_KERNEL);
 
-	(void)snprintf(buf, sizeof(buf), "%4hx", rpc_get_port(sap));
+	snprintf(buf, sizeof(buf), "%4hx", rpc_get_port(sap));
 	xprt->address_strings[RPC_DISPLAY_HEX_PORT] = kstrdup(buf, GFP_KERNEL);
 
 	/* netid */
diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
index 86234bcf2e89..4f55ab7ec1b1 100644
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c
@@ -314,10 +314,10 @@ static void xs_format_common_peer_ports(struct rpc_xprt *xprt)
 	struct sockaddr *sap = xs_addr(xprt);
 	char buf[128];
 
-	(void)snprintf(buf, sizeof(buf), "%u", rpc_get_port(sap));
+	snprintf(buf, sizeof(buf), "%u", rpc_get_port(sap));
 	xprt->address_strings[RPC_DISPLAY_PORT] = kstrdup(buf, GFP_KERNEL);
 
-	(void)snprintf(buf, sizeof(buf), "%4hx", rpc_get_port(sap));
+	snprintf(buf, sizeof(buf), "%4hx", rpc_get_port(sap));
 	xprt->address_strings[RPC_DISPLAY_HEX_PORT] = kstrdup(buf, GFP_KERNEL);
 }
 
-- 
cgit v1.2.2


From f5c445ed4148434f142be0263a8ad7cb58503e8a Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Mon, 8 Mar 2010 12:17:04 -0800
Subject: ethtool: Use noinline_for_stack

Use self documenting noinline_for_stack instead of duplicated comments.

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/ethtool.c | 40 ++++++++--------------------------------
 1 file changed, 8 insertions(+), 32 deletions(-)

(limited to 'net')

diff --git a/net/core/ethtool.c b/net/core/ethtool.c
index 33d2ded50f84..f4cb6b6299d9 100644
--- a/net/core/ethtool.c
+++ b/net/core/ethtool.c
@@ -200,10 +200,7 @@ static int ethtool_set_settings(struct net_device *dev, void __user *useraddr)
 	return dev->ethtool_ops->set_settings(dev, &cmd);
 }
 
-/*
- * noinline attribute so that gcc doesnt use too much stack in dev_ethtool()
- */
-static noinline int ethtool_get_drvinfo(struct net_device *dev, void __user *useraddr)
+static noinline_for_stack int ethtool_get_drvinfo(struct net_device *dev, void __user *useraddr)
 {
 	struct ethtool_drvinfo info;
 	const struct ethtool_ops *ops = dev->ethtool_ops;
@@ -242,10 +239,7 @@ static noinline int ethtool_get_drvinfo(struct net_device *dev, void __user *use
 	return 0;
 }
 
-/*
- * noinline attribute so that gcc doesnt use too much stack in dev_ethtool()
- */
-static noinline int ethtool_get_sset_info(struct net_device *dev,
+static noinline_for_stack int ethtool_get_sset_info(struct net_device *dev,
                                           void __user *useraddr)
 {
 	struct ethtool_sset_info info;
@@ -305,10 +299,7 @@ out:
 	return ret;
 }
 
-/*
- * noinline attribute so that gcc doesnt use too much stack in dev_ethtool()
- */
-static noinline int ethtool_set_rxnfc(struct net_device *dev, void __user *useraddr)
+static noinline_for_stack int ethtool_set_rxnfc(struct net_device *dev, void __user *useraddr)
 {
 	struct ethtool_rxnfc cmd;
 
@@ -321,10 +312,7 @@ static noinline int ethtool_set_rxnfc(struct net_device *dev, void __user *usera
 	return dev->ethtool_ops->set_rxnfc(dev, &cmd);
 }
 
-/*
- * noinline attribute so that gcc doesnt use too much stack in dev_ethtool()
- */
-static noinline int ethtool_get_rxnfc(struct net_device *dev, void __user *useraddr)
+static noinline_for_stack int ethtool_get_rxnfc(struct net_device *dev, void __user *useraddr)
 {
 	struct ethtool_rxnfc info;
 	const struct ethtool_ops *ops = dev->ethtool_ops;
@@ -396,10 +384,7 @@ static void __rx_ntuple_filter_add(struct ethtool_rx_ntuple_list *list,
 	list->count++;
 }
 
-/*
- * noinline attribute so that gcc doesnt use too much stack in dev_ethtool()
- */
-static noinline int ethtool_set_rx_ntuple(struct net_device *dev, void __user *useraddr)
+static noinline_for_stack int ethtool_set_rx_ntuple(struct net_device *dev, void __user *useraddr)
 {
 	struct ethtool_rx_ntuple cmd;
 	const struct ethtool_ops *ops = dev->ethtool_ops;
@@ -867,10 +852,7 @@ static int ethtool_set_eeprom(struct net_device *dev, void __user *useraddr)
 	return ret;
 }
 
-/*
- * noinline attribute so that gcc doesnt use too much stack in dev_ethtool()
- */
-static noinline int ethtool_get_coalesce(struct net_device *dev, void __user *useraddr)
+static noinline_for_stack int ethtool_get_coalesce(struct net_device *dev, void __user *useraddr)
 {
 	struct ethtool_coalesce coalesce = { .cmd = ETHTOOL_GCOALESCE };
 
@@ -884,10 +866,7 @@ static noinline int ethtool_get_coalesce(struct net_device *dev, void __user *us
 	return 0;
 }
 
-/*
- * noinline attribute so that gcc doesnt use too much stack in dev_ethtool()
- */
-static noinline int ethtool_set_coalesce(struct net_device *dev, void __user *useraddr)
+static noinline_for_stack int ethtool_set_coalesce(struct net_device *dev, void __user *useraddr)
 {
 	struct ethtool_coalesce coalesce;
 
@@ -1297,10 +1276,7 @@ static int ethtool_set_value(struct net_device *dev, char __user *useraddr,
 	return actor(dev, edata.data);
 }
 
-/*
- * noinline attribute so that gcc doesnt use too much stack in dev_ethtool()
- */
-static noinline int ethtool_flash_device(struct net_device *dev, char __user *useraddr)
+static noinline_for_stack int ethtool_flash_device(struct net_device *dev, char __user *useraddr)
 {
 	struct ethtool_flash efl;
 
-- 
cgit v1.2.2


From d88dca79d3852a3623f606f781e013d61486828a Mon Sep 17 00:00:00 2001
From: Neil Horman <nhorman@tuxdriver.com>
Date: Mon, 8 Mar 2010 12:20:58 -0800
Subject: tipc: fix endianness on tipc subscriber messages

Remove htohl implementation from tipc

I was working on forward porting the downstream commits for TIPC and ran accross this one:
http://tipc.cslab.ericsson.net/cgi-bin/gitweb.cgi?p=people/allan/tipc.git;a=commitdiff;h=894279b9437b63cbb02405ad5b8e033b51e4e31e

I was going to just take it, when I looked closer and noted what it was doing.
This is basically a routine to byte swap fields of data in sent/received packets
for tipc, dependent upon the receivers guessed endianness of the peer when a
connection is established.  Asside from just seeming silly to me, it appears to
violate the latest RFC draft for tipc:
http://tipc.sourceforge.net/doc/draft-spec-tipc-02.txt
Which, according to section 4.2 and 4.3.3, requires that all fields of all
commands be sent in network byte order.  So instead of just taking this patch,
instead I'm removing the htohl function and replacing the calls with calls to
ntohl in the rx path and htonl in the send path.

As part of this fix, I'm also changing the subscr_cancel function, which
searches the list of subscribers, using a memcmp of the entire subscriber list,
for the entry to tear down.  unfortunately it memcmps the entire tipc_subscr
structure which has several bits that are private to the local side, so nothing
will ever match.  section 5.2 of the draft spec indicates the <type,upper,lower>
tuple should uniquely identify a subscriber, so convert subscr_cancel to just
match on those fields (properly endian swapped).

I've tested this using the tipc test suite, and its passed without issue.

Signed-off-by: Neil Horman <nhorman@tuxdriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/tipc/subscr.c | 57 +++++++++++++++++++++----------------------------------
 net/tipc/subscr.h |  2 --
 2 files changed, 22 insertions(+), 37 deletions(-)

(limited to 'net')

diff --git a/net/tipc/subscr.c b/net/tipc/subscr.c
index ac91f0dfa144..ff123e56114a 100644
--- a/net/tipc/subscr.c
+++ b/net/tipc/subscr.c
@@ -75,19 +75,6 @@ struct top_srv {
 
 static struct top_srv topsrv = { 0 };
 
-/**
- * htohl - convert value to endianness used by destination
- * @in: value to convert
- * @swap: non-zero if endianness must be reversed
- *
- * Returns converted value
- */
-
-static u32 htohl(u32 in, int swap)
-{
-	return swap ? swab32(in) : in;
-}
-
 /**
  * subscr_send_event - send a message containing a tipc_event to the subscriber
  *
@@ -107,11 +94,11 @@ static void subscr_send_event(struct subscription *sub,
 	msg_sect.iov_base = (void *)&sub->evt;
 	msg_sect.iov_len = sizeof(struct tipc_event);
 
-	sub->evt.event = htohl(event, sub->swap);
-	sub->evt.found_lower = htohl(found_lower, sub->swap);
-	sub->evt.found_upper = htohl(found_upper, sub->swap);
-	sub->evt.port.ref = htohl(port_ref, sub->swap);
-	sub->evt.port.node = htohl(node, sub->swap);
+	sub->evt.event = htonl(event);
+	sub->evt.found_lower = htonl(found_lower);
+	sub->evt.found_upper = htonl(found_upper);
+	sub->evt.port.ref = htonl(port_ref);
+	sub->evt.port.node = htonl(node);
 	tipc_send(sub->server_ref, 1, &msg_sect);
 }
 
@@ -287,16 +274,23 @@ static void subscr_cancel(struct tipc_subscr *s,
 {
 	struct subscription *sub;
 	struct subscription *sub_temp;
+	__u32 type, lower, upper;
 	int found = 0;
 
 	/* Find first matching subscription, exit if not found */
 
+	type = ntohl(s->seq.type);
+	lower = ntohl(s->seq.lower);
+	upper = ntohl(s->seq.upper);
+
 	list_for_each_entry_safe(sub, sub_temp, &subscriber->subscription_list,
 				 subscription_list) {
-		if (!memcmp(s, &sub->evt.s, sizeof(struct tipc_subscr))) {
-			found = 1;
-			break;
-		}
+			if ((type == sub->seq.type) &&
+			    (lower == sub->seq.lower) &&
+			    (upper == sub->seq.upper)) {
+				found = 1;
+				break;
+			}
 	}
 	if (!found)
 		return;
@@ -325,16 +319,10 @@ static struct subscription *subscr_subscribe(struct tipc_subscr *s,
 					     struct subscriber *subscriber)
 {
 	struct subscription *sub;
-	int swap;
-
-	/* Determine subscriber's endianness */
-
-	swap = !(s->filter & (TIPC_SUB_PORTS | TIPC_SUB_SERVICE));
 
 	/* Detect & process a subscription cancellation request */
 
-	if (s->filter & htohl(TIPC_SUB_CANCEL, swap)) {
-		s->filter &= ~htohl(TIPC_SUB_CANCEL, swap);
+	if (ntohl(s->filter) & TIPC_SUB_CANCEL) {
 		subscr_cancel(s, subscriber);
 		return NULL;
 	}
@@ -359,11 +347,11 @@ static struct subscription *subscr_subscribe(struct tipc_subscr *s,
 
 	/* Initialize subscription object */
 
-	sub->seq.type = htohl(s->seq.type, swap);
-	sub->seq.lower = htohl(s->seq.lower, swap);
-	sub->seq.upper = htohl(s->seq.upper, swap);
-	sub->timeout = htohl(s->timeout, swap);
-	sub->filter = htohl(s->filter, swap);
+	sub->seq.type = ntohl(s->seq.type);
+	sub->seq.lower = ntohl(s->seq.lower);
+	sub->seq.upper = ntohl(s->seq.upper);
+	sub->timeout = ntohl(s->timeout);
+	sub->filter = ntohl(s->filter);
 	if ((!(sub->filter & TIPC_SUB_PORTS) ==
 	     !(sub->filter & TIPC_SUB_SERVICE)) ||
 	    (sub->seq.lower > sub->seq.upper)) {
@@ -376,7 +364,6 @@ static struct subscription *subscr_subscribe(struct tipc_subscr *s,
 	INIT_LIST_HEAD(&sub->nameseq_list);
 	list_add(&sub->subscription_list, &subscriber->subscription_list);
 	sub->server_ref = subscriber->port_ref;
-	sub->swap = swap;
 	memcpy(&sub->evt.s, s, sizeof(struct tipc_subscr));
 	atomic_inc(&topsrv.subscription_count);
 	if (sub->timeout != TIPC_WAIT_FOREVER) {
diff --git a/net/tipc/subscr.h b/net/tipc/subscr.h
index 45d89bf4d202..c20f496d95b2 100644
--- a/net/tipc/subscr.h
+++ b/net/tipc/subscr.h
@@ -53,7 +53,6 @@ typedef void (*tipc_subscr_event) (struct subscription *sub,
  * @nameseq_list: adjacent subscriptions in name sequence's subscription list
  * @subscription_list: adjacent subscriptions in subscriber's subscription list
  * @server_ref: object reference of server port associated with subscription
- * @swap: indicates if subscriber uses opposite endianness in its messages
  * @evt: template for events generated by subscription
  */
 
@@ -66,7 +65,6 @@ struct subscription {
 	struct list_head nameseq_list;
 	struct list_head subscription_list;
 	u32 server_ref;
-	int swap;
 	struct tipc_event evt;
 };
 
-- 
cgit v1.2.2


From de5865714621e23d65c52955ca2125dbb074c242 Mon Sep 17 00:00:00 2001
From: Neil Horman <nhorman@tuxdriver.com>
Date: Mon, 8 Mar 2010 12:43:56 -0800
Subject: tipc: filter out messages not intended for this host

Port commit 20deb48d16fdd07ce2fdc8d03ea317362217e085
from git://tipc.cslab.ericsson.net/pub/git/people/allan/tipc.git

Part of the large effort I'm trying to help with getting all the downstreamed
code from windriver forward ported to the upstream tree

Origional commit message
Restore check to filter out inadverdently received messages
This patch reimplements a check that allows TIPC to discard messages
that are not intended for it.  This check was present in TIPC 1.5/1.6,
but was removed by accident during the development of TIPC 1.7; it has
now been updated to account for new features present in TIPC 1.7 and
reinserted into TIPC.  The main benefit of this check is to filter
out messages arriving from orphaned link endpoints, which can arise
when a node exits the network and then re-enters it with a different
TIPC network address (i.e. <Z.C.N> value).

Signed-off-by: Neil Horman <nhorman@tuxdriver.com>
Origionally-authored-by: Allan Stephens <allan.stephens@windriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/tipc/link.c | 9 +++++++++
 1 file changed, 9 insertions(+)

(limited to 'net')

diff --git a/net/tipc/link.c b/net/tipc/link.c
index 6f50f6423f63..1a7e4665af80 100644
--- a/net/tipc/link.c
+++ b/net/tipc/link.c
@@ -1882,6 +1882,15 @@ void tipc_recv_msg(struct sk_buff *head, struct tipc_bearer *tb_ptr)
 			     (msg_destnode(msg) != tipc_own_addr)))
 			goto cont;
 
+		/* Discard non-routeable messages destined for another node */
+
+		if (unlikely(!msg_isdata(msg) &&
+			     (msg_destnode(msg) != tipc_own_addr))) {
+			if ((msg_user(msg) != CONN_MANAGER) &&
+			    (msg_user(msg) != MSG_FRAGMENTER))
+				goto cont;
+		}
+
 		/* Locate unicast link endpoint that should handle message */
 
 		n_ptr = tipc_node_find(msg_prevnode(msg));
-- 
cgit v1.2.2


From 62bb2ac5cb6c2f813e151617525ec518e2d1c649 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes@sipsolutions.net>
Date: Wed, 17 Feb 2010 21:27:26 +0100
Subject: mac80211: deprecate RX status noise

The noise value as is won't be used, isn't
filled by most drivers and doesn't really
make a whole lot of sense on a per packet
basis -- proper cfg80211 survey support in
mac80211 will need to be different.

Mark the struct member as deprecated so it
will be removed from drivers.

Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/debugfs_sta.c | 2 --
 net/mac80211/rx.c          | 9 ---------
 net/mac80211/sta_info.h    | 2 --
 3 files changed, 13 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/debugfs_sta.c b/net/mac80211/debugfs_sta.c
index d92800bb2d2f..23e720034577 100644
--- a/net/mac80211/debugfs_sta.c
+++ b/net/mac80211/debugfs_sta.c
@@ -57,7 +57,6 @@ STA_FILE(tx_filtered, tx_filtered_count, LU);
 STA_FILE(tx_retry_failed, tx_retry_failed, LU);
 STA_FILE(tx_retry_count, tx_retry_count, LU);
 STA_FILE(last_signal, last_signal, D);
-STA_FILE(last_noise, last_noise, D);
 STA_FILE(wep_weak_iv_count, wep_weak_iv_count, LU);
 
 static ssize_t sta_flags_read(struct file *file, char __user *userbuf,
@@ -289,7 +288,6 @@ void ieee80211_sta_debugfs_add(struct sta_info *sta)
 	DEBUGFS_ADD(tx_retry_failed);
 	DEBUGFS_ADD(tx_retry_count);
 	DEBUGFS_ADD(last_signal);
-	DEBUGFS_ADD(last_noise);
 	DEBUGFS_ADD(wep_weak_iv_count);
 	DEBUGFS_ADD(ht_capa);
 }
diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c
index b5c48de81d8b..1da57c8e849a 100644
--- a/net/mac80211/rx.c
+++ b/net/mac80211/rx.c
@@ -178,14 +178,6 @@ ieee80211_add_rx_radiotap_header(struct ieee80211_local *local,
 		pos++;
 	}
 
-	/* IEEE80211_RADIOTAP_DBM_ANTNOISE */
-	if (local->hw.flags & IEEE80211_HW_NOISE_DBM) {
-		*pos = status->noise;
-		rthdr->it_present |=
-			cpu_to_le32(1 << IEEE80211_RADIOTAP_DBM_ANTNOISE);
-		pos++;
-	}
-
 	/* IEEE80211_RADIOTAP_LOCK_QUALITY is missing */
 
 	/* IEEE80211_RADIOTAP_ANTENNA */
@@ -1077,7 +1069,6 @@ ieee80211_rx_h_sta_process(struct ieee80211_rx_data *rx)
 	sta->rx_fragments++;
 	sta->rx_bytes += rx->skb->len;
 	sta->last_signal = status->signal;
-	sta->last_noise = status->noise;
 
 	/*
 	 * Change STA power saving mode only at the end of a frame
diff --git a/net/mac80211/sta_info.h b/net/mac80211/sta_info.h
index 822d84522937..2b635909de5c 100644
--- a/net/mac80211/sta_info.h
+++ b/net/mac80211/sta_info.h
@@ -200,7 +200,6 @@ struct sta_ampdu_mlme {
  * @rx_fragments: number of received MPDUs
  * @rx_dropped: number of dropped MPDUs from this STA
  * @last_signal: signal of last received frame from this STA
- * @last_noise: noise of last received frame from this STA
  * @last_seq_ctrl: last received seq/frag number from this STA (per RX queue)
  * @tx_filtered_count: number of frames the hardware filtered for this STA
  * @tx_retry_failed: number of frames that failed retry
@@ -267,7 +266,6 @@ struct sta_info {
 	unsigned long rx_fragments;
 	unsigned long rx_dropped;
 	int last_signal;
-	int last_noise;
 	__le16 last_seq_ctrl[NUM_RX_DATA_QUEUES];
 
 	/* Updated from TX status path only, no locking requirements */
-- 
cgit v1.2.2


From b4d59a9317e41faec3d0b6a03f0454d1e8abb710 Mon Sep 17 00:00:00 2001
From: Bruno Randolf <br1@einfach.org>
Date: Tue, 23 Feb 2010 18:51:13 +0900
Subject: mac80211: fix rates setup on IBSS merge

when an IBSS merge happened, the supported rates for the newly added station
were left empty, causing the rate control module to be initialized with only
the basic rates.

also the section of the ibss code which deals with updating supported rates for
an already existing station fails to inform the rate control module about the
new rates. as i don't know how to fix this (minstrel does not have an update
function), i have just added a comment for now.

Signed-off-by: Bruno Randolf <br1@einfach.org>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/ibss.c | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'net')

diff --git a/net/mac80211/ibss.c b/net/mac80211/ibss.c
index f3e942486749..b840d9072de9 100644
--- a/net/mac80211/ibss.c
+++ b/net/mac80211/ibss.c
@@ -276,6 +276,8 @@ static void ieee80211_rx_bss_info(struct ieee80211_sub_if_data *sdata,
 				    (unsigned long long) sta->sta.supp_rates[band]);
 #endif
 			rcu_read_unlock();
+
+			/* FIXME: update rate control */
 		} else {
 			rcu_read_unlock();
 			ieee80211_ibss_add_sta(sdata, mgmt->bssid, mgmt->sa,
@@ -370,6 +372,7 @@ static void ieee80211_rx_bss_info(struct ieee80211_sub_if_data *sdata,
 		       sdata->name, mgmt->bssid);
 #endif
 		ieee80211_sta_join_ibss(sdata, bss);
+		supp_rates = ieee80211_sta_get_rates(local, elems, band);
 		ieee80211_ibss_add_sta(sdata, mgmt->bssid, mgmt->sa,
 				       supp_rates, GFP_KERNEL);
 	}
-- 
cgit v1.2.2


From df13cce53a7b28a81460e6bfc4857e9df4956141 Mon Sep 17 00:00:00 2001
From: Helmut Schaa <Helmut.Schaa@gmx.de>
Date: Wed, 24 Feb 2010 14:19:21 +0100
Subject: mac80211: Improve software scan timing

The current software scan implemenation in mac80211 returns to the operating
channel after each scanned channel. However, in some situations (e.g. no
traffic) it would be nicer to scan a few channels in a row to speed up
the scan itself.

Hence, after scanning a channel, check if we have queued up any tx frames and
return to the operating channel in that case.

Unfortunately we don't know if the AP has buffered any frames for us. Hence,
scan only as many channels in a row as the pm_qos latency and the negotiated
listen interval allows us to.

Signed-off-by: Helmut Schaa <helmut.schaa@googlemail.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/ieee80211_i.h |  1 +
 net/mac80211/scan.c        | 71 ++++++++++++++++++++++++++++++++++++++++++----
 2 files changed, 66 insertions(+), 6 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index 241533e1bc03..b84126491ab1 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -745,6 +745,7 @@ struct ieee80211_local {
 	int scan_channel_idx;
 	int scan_ies_len;
 
+	unsigned long leave_oper_channel_time;
 	enum mac80211_scan_state next_scan_state;
 	struct delayed_work scan_work;
 	struct ieee80211_sub_if_data *scan_sdata;
diff --git a/net/mac80211/scan.c b/net/mac80211/scan.c
index b822dce97867..75a85978c3b3 100644
--- a/net/mac80211/scan.c
+++ b/net/mac80211/scan.c
@@ -14,6 +14,8 @@
 
 #include <linux/if_arp.h>
 #include <linux/rtnetlink.h>
+#include <linux/pm_qos_params.h>
+#include <net/sch_generic.h>
 #include <net/mac80211.h>
 
 #include "ieee80211_i.h"
@@ -321,6 +323,7 @@ static int ieee80211_start_sw_scan(struct ieee80211_local *local)
 
 	ieee80211_offchannel_stop_beaconing(local);
 
+	local->leave_oper_channel_time = 0;
 	local->next_scan_state = SCAN_DECISION;
 	local->scan_channel_idx = 0;
 
@@ -425,11 +428,28 @@ static int __ieee80211_start_scan(struct ieee80211_sub_if_data *sdata,
 	return rc;
 }
 
+static unsigned long
+ieee80211_scan_get_channel_time(struct ieee80211_channel *chan)
+{
+	/*
+	 * TODO: channel switching also consumes quite some time,
+	 * add that delay as well to get a better estimation
+	 */
+	if (chan->flags & IEEE80211_CHAN_PASSIVE_SCAN)
+		return IEEE80211_PASSIVE_CHANNEL_TIME;
+	return IEEE80211_PROBE_DELAY + IEEE80211_CHANNEL_TIME;
+}
+
 static int ieee80211_scan_state_decision(struct ieee80211_local *local,
 					 unsigned long *next_delay)
 {
 	bool associated = false;
+	bool tx_empty = true;
+	bool bad_latency;
+	bool listen_int_exceeded;
+	unsigned long min_beacon_int = 0;
 	struct ieee80211_sub_if_data *sdata;
+	struct ieee80211_channel *next_chan;
 
 	/* if no more bands/channels left, complete scan and advance to the idle state */
 	if (local->scan_channel_idx >= local->scan_req->n_channels) {
@@ -437,7 +457,11 @@ static int ieee80211_scan_state_decision(struct ieee80211_local *local,
 		return 1;
 	}
 
-	/* check if at least one STA interface is associated */
+	/*
+	 * check if at least one STA interface is associated,
+	 * check if at least one STA interface has pending tx frames
+	 * and grab the lowest used beacon interval
+	 */
 	mutex_lock(&local->iflist_mtx);
 	list_for_each_entry(sdata, &local->interfaces, list) {
 		if (!ieee80211_sdata_running(sdata))
@@ -446,7 +470,16 @@ static int ieee80211_scan_state_decision(struct ieee80211_local *local,
 		if (sdata->vif.type == NL80211_IFTYPE_STATION) {
 			if (sdata->u.mgd.associated) {
 				associated = true;
-				break;
+
+				if (sdata->vif.bss_conf.beacon_int <
+				    min_beacon_int || min_beacon_int == 0)
+					min_beacon_int =
+						sdata->vif.bss_conf.beacon_int;
+
+				if (!qdisc_all_tx_empty(sdata->dev)) {
+					tx_empty = false;
+					break;
+				}
 			}
 		}
 	}
@@ -455,11 +488,34 @@ static int ieee80211_scan_state_decision(struct ieee80211_local *local,
 	if (local->scan_channel) {
 		/*
 		 * we're currently scanning a different channel, let's
-		 * switch back to the operating channel now if at least
-		 * one interface is associated. Otherwise just scan the
-		 * next channel
+		 * see if we can scan another channel without interfering
+		 * with the current traffic situation.
+		 *
+		 * Since we don't know if the AP has pending frames for us
+		 * we can only check for our tx queues and use the current
+		 * pm_qos requirements for rx. Hence, if no tx traffic occurs
+		 * at all we will scan as many channels in a row as the pm_qos
+		 * latency allows us to. Additionally we also check for the
+		 * currently negotiated listen interval to prevent losing
+		 * frames unnecessarily.
+		 *
+		 * Otherwise switch back to the operating channel.
 		 */
-		if (associated)
+		next_chan = local->scan_req->channels[local->scan_channel_idx];
+
+		bad_latency = time_after(jiffies +
+				ieee80211_scan_get_channel_time(next_chan),
+				local->leave_oper_channel_time +
+				usecs_to_jiffies(pm_qos_requirement(PM_QOS_NETWORK_LATENCY)));
+
+		listen_int_exceeded = time_after(jiffies +
+				ieee80211_scan_get_channel_time(next_chan),
+				local->leave_oper_channel_time +
+				usecs_to_jiffies(min_beacon_int * 1024) *
+				local->hw.conf.listen_interval);
+
+		if (associated && ( !tx_empty || bad_latency ||
+		    listen_int_exceeded))
 			local->next_scan_state = SCAN_ENTER_OPER_CHANNEL;
 		else
 			local->next_scan_state = SCAN_SET_CHANNEL;
@@ -491,6 +547,9 @@ static void ieee80211_scan_state_leave_oper_channel(struct ieee80211_local *loca
 	else
 		*next_delay = HZ / 10;
 
+	/* remember when we left the operating channel */
+	local->leave_oper_channel_time = jiffies;
+
 	/* advance to the next channel to be scanned */
 	local->next_scan_state = SCAN_SET_CHANNEL;
 }
-- 
cgit v1.2.2


From fa9029f8c34576e121a4b6ddbbd645081fe50c74 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes@sipsolutions.net>
Date: Thu, 25 Feb 2010 15:13:11 +0100
Subject: mac80211: use different MAC addresses for virtual interfaces

Drivers can now advertise to cfg80211 that they have
multiple MAC addresses reserved for a device, but we
don't currently make use of that in mac80211.

Change that and assign different addresses to new
virtual interfaces (if addresses are available) in
order to make it easier for users to use multiple
virtual interfaces; they no longer need to always
assign a new MAC address manually.

Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/iface.c | 116 ++++++++++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 114 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c
index 0793d7a8d743..d5571b9420cd 100644
--- a/net/mac80211/iface.c
+++ b/net/mac80211/iface.c
@@ -815,6 +815,118 @@ int ieee80211_if_change_type(struct ieee80211_sub_if_data *sdata,
 	return 0;
 }
 
+static void ieee80211_assign_perm_addr(struct ieee80211_local *local,
+				       struct net_device *dev,
+				       enum nl80211_iftype type)
+{
+	struct ieee80211_sub_if_data *sdata;
+	u64 mask, start, addr, val, inc;
+	u8 *m;
+	u8 tmp_addr[ETH_ALEN];
+	int i;
+
+	/* default ... something at least */
+	memcpy(dev->perm_addr, local->hw.wiphy->perm_addr, ETH_ALEN);
+
+	if (is_zero_ether_addr(local->hw.wiphy->addr_mask) &&
+	    local->hw.wiphy->n_addresses <= 1)
+		return;
+
+
+	mutex_lock(&local->iflist_mtx);
+
+	switch (type) {
+	case NL80211_IFTYPE_MONITOR:
+		/* doesn't matter */
+		break;
+	case NL80211_IFTYPE_WDS:
+	case NL80211_IFTYPE_AP_VLAN:
+		/* match up with an AP interface */
+		list_for_each_entry(sdata, &local->interfaces, list) {
+			if (sdata->vif.type != NL80211_IFTYPE_AP)
+				continue;
+			memcpy(dev->perm_addr, sdata->vif.addr, ETH_ALEN);
+			break;
+		}
+		/* keep default if no AP interface present */
+		break;
+	default:
+		/* assign a new address if possible -- try n_addresses first */
+		for (i = 0; i < local->hw.wiphy->n_addresses; i++) {
+			bool used = false;
+
+			list_for_each_entry(sdata, &local->interfaces, list) {
+				if (memcmp(local->hw.wiphy->addresses[i].addr,
+					   sdata->vif.addr, ETH_ALEN) == 0) {
+					used = true;
+					break;
+				}
+			}
+
+			if (!used) {
+				memcpy(dev->perm_addr,
+				       local->hw.wiphy->addresses[i].addr,
+				       ETH_ALEN);
+				break;
+			}
+		}
+
+		/* try mask if available */
+		if (is_zero_ether_addr(local->hw.wiphy->addr_mask))
+			break;
+
+		m = local->hw.wiphy->addr_mask;
+		mask =	((u64)m[0] << 5*8) | ((u64)m[1] << 4*8) |
+			((u64)m[2] << 3*8) | ((u64)m[3] << 2*8) |
+			((u64)m[4] << 1*8) | ((u64)m[5] << 0*8);
+
+		if (__ffs64(mask) + hweight64(mask) != fls64(mask)) {
+			/* not a contiguous mask ... not handled now! */
+			printk(KERN_DEBUG "not contiguous\n");
+			break;
+		}
+
+		m = local->hw.wiphy->perm_addr;
+		start = ((u64)m[0] << 5*8) | ((u64)m[1] << 4*8) |
+			((u64)m[2] << 3*8) | ((u64)m[3] << 2*8) |
+			((u64)m[4] << 1*8) | ((u64)m[5] << 0*8);
+
+		inc = 1ULL<<__ffs64(mask);
+		val = (start & mask);
+		addr = (start & ~mask) | (val & mask);
+		do {
+			bool used = false;
+
+			tmp_addr[5] = addr >> 0*8;
+			tmp_addr[4] = addr >> 1*8;
+			tmp_addr[3] = addr >> 2*8;
+			tmp_addr[2] = addr >> 3*8;
+			tmp_addr[1] = addr >> 4*8;
+			tmp_addr[0] = addr >> 5*8;
+
+			val += inc;
+
+			list_for_each_entry(sdata, &local->interfaces, list) {
+				if (memcmp(tmp_addr, sdata->vif.addr,
+							ETH_ALEN) == 0) {
+					used = true;
+					break;
+				}
+			}
+
+			if (!used) {
+				memcpy(dev->perm_addr, tmp_addr, ETH_ALEN);
+				break;
+			}
+			addr = (start & ~mask) | (val & mask);
+		} while (addr != start);
+
+		break;
+	}
+
+	mutex_unlock(&local->iflist_mtx);
+}
+
 int ieee80211_if_add(struct ieee80211_local *local, const char *name,
 		     struct net_device **new_dev, enum nl80211_iftype type,
 		     struct vif_params *params)
@@ -844,8 +956,8 @@ int ieee80211_if_add(struct ieee80211_local *local, const char *name,
 	if (ret < 0)
 		goto fail;
 
-	memcpy(ndev->dev_addr, local->hw.wiphy->perm_addr, ETH_ALEN);
-	memcpy(ndev->perm_addr, ndev->dev_addr, ETH_ALEN);
+	ieee80211_assign_perm_addr(local, ndev, type);
+	memcpy(ndev->dev_addr, ndev->perm_addr, ETH_ALEN);
 	SET_NETDEV_DEV(ndev, wiphy_dev(local->hw.wiphy));
 
 	/* don't use IEEE80211_DEV_TO_SUB_IF because it checks too much */
-- 
cgit v1.2.2


From bb134d5d9580fc7b945e3bca3c4b263947022966 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Tue, 9 Mar 2010 05:55:56 +0000
Subject: tcp: Fix tcp_v4_rcv()

Commit d218d111 (tcp: Generalized TTL Security Mechanism) added a bug
for TIMEWAIT sockets. We should not test min_ttl for TW sockets.

Reported-by: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Acked-by: Stephen Hemminger <shemminger@vyatta.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp_ipv4.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 8d51d39ad1bb..70df40980a87 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -1651,15 +1651,15 @@ int tcp_v4_rcv(struct sk_buff *skb)
 	if (!sk)
 		goto no_tcp_socket;
 
+process:
+	if (sk->sk_state == TCP_TIME_WAIT)
+		goto do_time_wait;
+
 	if (unlikely(iph->ttl < inet_sk(sk)->min_ttl)) {
 		NET_INC_STATS_BH(net, LINUX_MIB_TCPMINTTLDROP);
 		goto discard_and_relse;
 	}
 
-process:
-	if (sk->sk_state == TCP_TIME_WAIT)
-		goto do_time_wait;
-
 	if (!xfrm4_policy_check(sk, XFRM_POLICY_IN, skb))
 		goto discard_and_relse;
 	nf_reset(skb);
-- 
cgit v1.2.2


From 0a141509ede48ac33ef756ac1640f4d3f46fa2db Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Tue, 9 Mar 2010 19:40:54 +0000
Subject: net: Annotates neigh_invalidate()

Annotates neigh_invalidate() with __releases() and __acquires() for
sparse sake.

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/neighbour.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'net')

diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index d102f6d9abdc..6cee6434da67 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -771,6 +771,8 @@ static __inline__ int neigh_max_probes(struct neighbour *n)
 }
 
 static void neigh_invalidate(struct neighbour *neigh)
+	__releases(neigh->lock)
+	__acquires(neigh->lock)
 {
 	struct sk_buff *skb;
 
-- 
cgit v1.2.2


From 3041f5170751e3522aa1bd6e8ca5d98e846720b0 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Tue, 9 Mar 2010 19:09:08 +0000
Subject: net: Fix dev_mc_add()

Commit 6e17d45a (net: add addr len check to dev_mc_add)
added a bug in dev_mc_add(), since it can now exit with a lock
imbalance.

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
CC: Jiri Pirko <jpirko@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/dev_mcast.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/core/dev_mcast.c b/net/core/dev_mcast.c
index fd91569e2394..3dc295beb483 100644
--- a/net/core/dev_mcast.c
+++ b/net/core/dev_mcast.c
@@ -97,8 +97,9 @@ int dev_mc_add(struct net_device *dev, void *addr, int alen, int glbl)
 
 	netif_addr_lock_bh(dev);
 	if (alen != dev->addr_len)
-		return -EINVAL;
-	err = __dev_addr_add(&dev->mc_list, &dev->mc_count, addr, alen, glbl);
+		err = -EINVAL;
+	else
+		err = __dev_addr_add(&dev->mc_list, &dev->mc_count, addr, alen, glbl);
 	if (!err)
 		__dev_set_rx_mode(dev);
 	netif_addr_unlock_bh(dev);
-- 
cgit v1.2.2


From 38a679a52be13d5a0c766597ab823e06688d6e8e Mon Sep 17 00:00:00 2001
From: Jouni Malinen <j@w1.fi>
Date: Sat, 6 Mar 2010 18:35:08 +0200
Subject: mac80211: Fix sta_mtx unlocking on insert STA failure path

Commit 34e895075e21be3e21e71d6317440d1ee7969ad0 introduced sta_mtx
locking into sta_info_insert() (now sta_info_insert_rcu), but forgot
to unlock this mutex on one of the error paths. Fix this by adding
the missing mutex_unlock() call for the case where STA insert fails
due to an entry existing already. This may happen at least in AP mode
when a STA roams between two BSSes (vifs).

Signed-off-by: Jouni Malinen <j@w1.fi>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/sta_info.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'net')

diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c
index 211c475f73c6..56422d894351 100644
--- a/net/mac80211/sta_info.c
+++ b/net/mac80211/sta_info.c
@@ -434,6 +434,7 @@ int sta_info_insert_rcu(struct sta_info *sta) __acquires(RCU)
 	/* check if STA exists already */
 	if (sta_info_get_bss(sdata, sta->sta.addr)) {
 		spin_unlock_irqrestore(&local->sta_lock, flags);
+		mutex_unlock(&local->sta_mtx);
 		rcu_read_lock();
 		err = -EEXIST;
 		goto out_free;
-- 
cgit v1.2.2


From 2a13052fe495948e572839e514e0e0cd236c50b0 Mon Sep 17 00:00:00 2001
From: Juuso Oikarinen <juuso.oikarinen@nokia.com>
Date: Tue, 9 Mar 2010 14:25:02 +0200
Subject: mac80211: Fix (dynamic) power save entry

Currently hardware with !IEEE80211_HW_PS_NULLFUNC_STACK and
IEEE80211_HW_REPORTS_TX_ACK_STATUS will never enter PSM due to the
conditions in the power save entry functions.

Fix those conditions.

Signed-off-by: Juuso Oikarinen <juuso.oikarinen@nokia.com>
Cc: stable@kernel.org
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/mlme.c | 13 ++++++++-----
 1 file changed, 8 insertions(+), 5 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index 0ab284c32135..be5f723d643a 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -436,10 +436,12 @@ static void ieee80211_enable_ps(struct ieee80211_local *local,
 		if (local->hw.flags & IEEE80211_HW_PS_NULLFUNC_STACK)
 			ieee80211_send_nullfunc(local, sdata, 1);
 
-		if (!(local->hw.flags & IEEE80211_HW_REPORTS_TX_ACK_STATUS)) {
-			conf->flags |= IEEE80211_CONF_PS;
-			ieee80211_hw_config(local, IEEE80211_CONF_CHANGE_PS);
-		}
+		if ((local->hw.flags & IEEE80211_HW_PS_NULLFUNC_STACK) &&
+		    (local->hw.flags & IEEE80211_HW_REPORTS_TX_ACK_STATUS))
+			return;
+
+		conf->flags |= IEEE80211_CONF_PS;
+		ieee80211_hw_config(local, IEEE80211_CONF_CHANGE_PS);
 	}
 }
 
@@ -558,7 +560,8 @@ void ieee80211_dynamic_ps_enable_work(struct work_struct *work)
 	    (!(ifmgd->flags & IEEE80211_STA_NULLFUNC_ACKED)))
 		ieee80211_send_nullfunc(local, sdata, 1);
 
-	if (!(local->hw.flags & IEEE80211_HW_REPORTS_TX_ACK_STATUS) ||
+	if (!((local->hw.flags & IEEE80211_HW_REPORTS_TX_ACK_STATUS) &&
+	      (local->hw.flags & IEEE80211_HW_PS_NULLFUNC_STACK)) ||
 	    (ifmgd->flags & IEEE80211_STA_NULLFUNC_ACKED)) {
 		ifmgd->flags &= ~IEEE80211_STA_NULLFUNC_ACKED;
 		local->hw.conf.flags |= IEEE80211_CONF_PS;
-- 
cgit v1.2.2


From 51f5f8ca446d4c59041b9b6995821e13208897ea Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Wed, 10 Mar 2010 17:13:36 +0100
Subject: mac80211: Fix memory leak in ieee80211_if_write()

Fix memory leak and use kmalloc() instead of kzalloc() as we are going
to overwrite the allocated buffer.

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/debugfs_netdev.c | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/debugfs_netdev.c b/net/mac80211/debugfs_netdev.c
index 9affe2cd185f..b4ddb2f83914 100644
--- a/net/mac80211/debugfs_netdev.c
+++ b/net/mac80211/debugfs_netdev.c
@@ -48,20 +48,24 @@ static ssize_t ieee80211_if_write(
 	ssize_t (*write)(struct ieee80211_sub_if_data *, const char *, int))
 {
 	u8 *buf;
-	ssize_t ret = -ENODEV;
+	ssize_t ret;
 
-	buf = kzalloc(count, GFP_KERNEL);
+	buf = kmalloc(count, GFP_KERNEL);
 	if (!buf)
 		return -ENOMEM;
 
+	ret = -EFAULT;
 	if (copy_from_user(buf, userbuf, count))
-		return -EFAULT;
+		goto freebuf;
 
+	ret = -ENODEV;
 	rtnl_lock();
 	if (sdata->dev->reg_state == NETREG_REGISTERED)
 		ret = (*write)(sdata, buf, count);
 	rtnl_unlock();
 
+freebuf:
+	kfree(buf);
 	return ret;
 }
 
-- 
cgit v1.2.2


From 6c26361e4be3cf0dad7083e38ca52001a987e3e6 Mon Sep 17 00:00:00 2001
From: "florian@mickler.org" <florian@mickler.org>
Date: Fri, 26 Feb 2010 12:01:34 +0100
Subject: enhance sysfs rfkill interface

This commit introduces two new sysfs knobs.

/sys/class/rfkill/rfkill[0-9]+/blocked_hw: (ro)
	hardblock kill state
/sys/class/rfkill/rfkill[0-9]+/blocked_sw: (rw)
	softblock kill state

Signed-off-by: Florian Mickler <florian@mickler.org>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/rfkill/core.c | 58 +++++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 58 insertions(+)

(limited to 'net')

diff --git a/net/rfkill/core.c b/net/rfkill/core.c
index c218e07e5caf..5f33151d70ae 100644
--- a/net/rfkill/core.c
+++ b/net/rfkill/core.c
@@ -628,6 +628,61 @@ static ssize_t rfkill_persistent_show(struct device *dev,
 	return sprintf(buf, "%d\n", rfkill->persistent);
 }
 
+static ssize_t rfkill_blocked_hw_show(struct device *dev,
+				 struct device_attribute *attr,
+				 char *buf)
+{
+	struct rfkill *rfkill = to_rfkill(dev);
+	unsigned long flags;
+	u32 state;
+
+	spin_lock_irqsave(&rfkill->lock, flags);
+	state = rfkill->state;
+	spin_unlock_irqrestore(&rfkill->lock, flags);
+
+	return sprintf(buf, "%d\n", (state & RFKILL_BLOCK_HW) ? 1 : 0 );
+}
+
+static ssize_t rfkill_blocked_sw_show(struct device *dev,
+				 struct device_attribute *attr,
+				 char *buf)
+{
+	struct rfkill *rfkill = to_rfkill(dev);
+	unsigned long flags;
+	u32 state;
+
+	spin_lock_irqsave(&rfkill->lock, flags);
+	state = rfkill->state;
+	spin_unlock_irqrestore(&rfkill->lock, flags);
+
+	return sprintf(buf, "%d\n", (state & RFKILL_BLOCK_SW) ? 1 : 0 );
+}
+
+static ssize_t rfkill_blocked_sw_store(struct device *dev,
+				  struct device_attribute *attr,
+				  const char *buf, size_t count)
+{
+	struct rfkill *rfkill = to_rfkill(dev);
+	unsigned long state;
+	int err;
+
+	if (!capable(CAP_NET_ADMIN))
+		return -EPERM;
+
+	err = strict_strtoul(buf, 0, &state);
+	if (err)
+		return err;
+
+	if (state > 1 )
+		return -EINVAL;
+
+	mutex_lock(&rfkill_global_mutex);
+	rfkill_set_block(rfkill, state);
+	mutex_unlock(&rfkill_global_mutex);
+
+	return err ?: count;
+}
+
 static u8 user_state_from_blocked(unsigned long state)
 {
 	if (state & RFKILL_BLOCK_HW)
@@ -700,6 +755,9 @@ static struct device_attribute rfkill_dev_attrs[] = {
 	__ATTR(persistent, S_IRUGO, rfkill_persistent_show, NULL),
 	__ATTR(state, S_IRUGO|S_IWUSR, rfkill_state_show, rfkill_state_store),
 	__ATTR(claim, S_IRUGO|S_IWUSR, rfkill_claim_show, rfkill_claim_store),
+	__ATTR(sw, S_IRUGO|S_IWUSR, rfkill_blocked_sw_show,
+			rfkill_blocked_sw_store),
+	__ATTR(hw, S_IRUGO, rfkill_blocked_hw_show, NULL),
 	__ATTR_NULL
 };
 
-- 
cgit v1.2.2


From c2ef355bf3ef0b8006b96128726684fba47ac928 Mon Sep 17 00:00:00 2001
From: Andres Salomon <dilinger@collabora.co.uk>
Date: Thu, 25 Feb 2010 19:18:47 -0500
Subject: mac80211: give warning if building w/out rate ctrl algorithm

I discovered that if EMBEDDED=y, one can accidentally build a mac80211 stack
and drivers w/ no rate control algorithm.  For drivers like RTL8187 that don't
supply their own RC algorithms, this will cause ieee80211_register_hw to
fail (making the driver unusable).

This will tell kconfig to provide a warning if no rate control algorithms
have been selected.  That'll at least warn the user; users that know that
their drivers supply a rate control algorithm can safely ignore the
warning, and those who don't know (or who expect to be using multiple
drivers) can select a default RC algorithm.

Signed-off-by: Andres Salomon <dilinger@collabora.co.uk>
Cc: stable@kernel.org
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/Kconfig | 9 +++++++++
 1 file changed, 9 insertions(+)

(limited to 'net')

diff --git a/net/mac80211/Kconfig b/net/mac80211/Kconfig
index a952b7f8c648..334c359da5e8 100644
--- a/net/mac80211/Kconfig
+++ b/net/mac80211/Kconfig
@@ -15,8 +15,12 @@ comment "CFG80211 needs to be enabled for MAC80211"
 
 if MAC80211 != n
 
+config MAC80211_HAS_RC
+	def_bool n
+
 config MAC80211_RC_PID
 	bool "PID controller based rate control algorithm" if EMBEDDED
+	select MAC80211_HAS_RC
 	---help---
 	  This option enables a TX rate control algorithm for
 	  mac80211 that uses a PID controller to select the TX
@@ -24,12 +28,14 @@ config MAC80211_RC_PID
 
 config MAC80211_RC_MINSTREL
 	bool "Minstrel" if EMBEDDED
+	select MAC80211_HAS_RC
 	default y
 	---help---
 	  This option enables the 'minstrel' TX rate control algorithm
 
 choice
 	prompt "Default rate control algorithm"
+	depends on MAC80211_HAS_RC
 	default MAC80211_RC_DEFAULT_MINSTREL
 	---help---
 	  This option selects the default rate control algorithm
@@ -62,6 +68,9 @@ config MAC80211_RC_DEFAULT
 
 endif
 
+comment "Some wireless drivers require a rate control algorithm"
+	depends on MAC80211_HAS_RC=n
+
 config MAC80211_MESH
 	bool "Enable mac80211 mesh networking (pre-802.11s) support"
 	depends on MAC80211 && EXPERIMENTAL
-- 
cgit v1.2.2


From 44ac91ea8450b0e7a27b4a1fd64aefd35a144728 Mon Sep 17 00:00:00 2001
From: Felix Fietkau <nbd@openwrt.org>
Date: Mon, 1 Mar 2010 22:17:38 +0100
Subject: minstrel: simplify and fix debugfs code

This patch cleans up the debugfs read function for the statistics by
using simple_read_from_buffer instead of its own semi-broken hack.
Also removes a useless member of the minstrel debugfs info struct.

Signed-off-by: Felix Fietkau <nbd@openwrt.org>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/rc80211_minstrel.h         |  5 +++++
 net/mac80211/rc80211_minstrel_debugfs.c | 35 ++++++---------------------------
 2 files changed, 11 insertions(+), 29 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/rc80211_minstrel.h b/net/mac80211/rc80211_minstrel.h
index 38bf4168fc3a..9372656f3f5e 100644
--- a/net/mac80211/rc80211_minstrel.h
+++ b/net/mac80211/rc80211_minstrel.h
@@ -80,6 +80,11 @@ struct minstrel_priv {
 	unsigned int lookaround_rate_mrr;
 };
 
+struct minstrel_debugfs_info {
+	size_t len;
+	char buf[];
+};
+
 void minstrel_add_sta_debugfs(void *priv, void *priv_sta, struct dentry *dir);
 void minstrel_remove_sta_debugfs(void *priv, void *priv_sta);
 
diff --git a/net/mac80211/rc80211_minstrel_debugfs.c b/net/mac80211/rc80211_minstrel_debugfs.c
index a715d9454f64..3e83402ece17 100644
--- a/net/mac80211/rc80211_minstrel_debugfs.c
+++ b/net/mac80211/rc80211_minstrel_debugfs.c
@@ -52,21 +52,15 @@
 #include <net/mac80211.h>
 #include "rc80211_minstrel.h"
 
-struct minstrel_stats_info {
-	struct minstrel_sta_info *mi;
-	char buf[4096];
-	size_t len;
-};
-
 static int
 minstrel_stats_open(struct inode *inode, struct file *file)
 {
 	struct minstrel_sta_info *mi = inode->i_private;
-	struct minstrel_stats_info *ms;
+	struct minstrel_debugfs_info *ms;
 	unsigned int i, tp, prob, eprob;
 	char *p;
 
-	ms = kmalloc(sizeof(*ms), GFP_KERNEL);
+	ms = kmalloc(sizeof(*ms) + 4096, GFP_KERNEL);
 	if (!ms)
 		return -ENOMEM;
 
@@ -107,35 +101,18 @@ minstrel_stats_open(struct inode *inode, struct file *file)
 }
 
 static ssize_t
-minstrel_stats_read(struct file *file, char __user *buf, size_t len, loff_t *o)
+minstrel_stats_read(struct file *file, char __user *buf, size_t len, loff_t *ppos)
 {
-	struct minstrel_stats_info *ms;
-	char *src;
+	struct minstrel_debugfs_info *ms;
 
 	ms = file->private_data;
-	src = ms->buf;
-
-	len = min(len, ms->len);
-	if (len <= *o)
-		return 0;
-
-	src += *o;
-	len -= *o;
-	*o += len;
-
-	if (copy_to_user(buf, src, len))
-		return -EFAULT;
-
-	return len;
+	return simple_read_from_buffer(buf, len, ppos, ms->buf, ms->len);
 }
 
 static int
 minstrel_stats_release(struct inode *inode, struct file *file)
 {
-	struct minstrel_stats_info *ms = file->private_data;
-
-	kfree(ms);
-
+	kfree(file->private_data);
 	return 0;
 }
 
-- 
cgit v1.2.2


From eae44756d60c4e938259358090dba5df675dced0 Mon Sep 17 00:00:00 2001
From: Felix Fietkau <nbd@openwrt.org>
Date: Mon, 1 Mar 2010 22:21:40 +0100
Subject: minstrel: make the rate control ops reusable from another rc
 implementation

This patch makes it possible to reuse the minstrel rate control ops
from another rate control module. This is useful in preparing for the
new 802.11n implementation of minstrel, which will reuse the old code
for legacy stations.

Signed-off-by: Felix Fietkau <nbd@openwrt.org>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/rc80211_minstrel.c         | 2 +-
 net/mac80211/rc80211_minstrel.h         | 6 ++++++
 net/mac80211/rc80211_minstrel_debugfs.c | 6 +++---
 3 files changed, 10 insertions(+), 4 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/rc80211_minstrel.c b/net/mac80211/rc80211_minstrel.c
index 6e5d68b4e427..4926d929fd9f 100644
--- a/net/mac80211/rc80211_minstrel.c
+++ b/net/mac80211/rc80211_minstrel.c
@@ -541,7 +541,7 @@ minstrel_free(void *priv)
 	kfree(priv);
 }
 
-static struct rate_control_ops mac80211_minstrel = {
+struct rate_control_ops mac80211_minstrel = {
 	.name = "minstrel",
 	.tx_status = minstrel_tx_status,
 	.get_rate = minstrel_get_rate,
diff --git a/net/mac80211/rc80211_minstrel.h b/net/mac80211/rc80211_minstrel.h
index 9372656f3f5e..0f5a83370aa6 100644
--- a/net/mac80211/rc80211_minstrel.h
+++ b/net/mac80211/rc80211_minstrel.h
@@ -85,7 +85,13 @@ struct minstrel_debugfs_info {
 	char buf[];
 };
 
+extern struct rate_control_ops mac80211_minstrel;
 void minstrel_add_sta_debugfs(void *priv, void *priv_sta, struct dentry *dir);
 void minstrel_remove_sta_debugfs(void *priv, void *priv_sta);
 
+/* debugfs */
+int minstrel_stats_open(struct inode *inode, struct file *file);
+ssize_t minstrel_stats_read(struct file *file, char __user *buf, size_t len, loff_t *ppos);
+int minstrel_stats_release(struct inode *inode, struct file *file);
+
 #endif
diff --git a/net/mac80211/rc80211_minstrel_debugfs.c b/net/mac80211/rc80211_minstrel_debugfs.c
index 3e83402ece17..56d0f24957d9 100644
--- a/net/mac80211/rc80211_minstrel_debugfs.c
+++ b/net/mac80211/rc80211_minstrel_debugfs.c
@@ -52,7 +52,7 @@
 #include <net/mac80211.h>
 #include "rc80211_minstrel.h"
 
-static int
+int
 minstrel_stats_open(struct inode *inode, struct file *file)
 {
 	struct minstrel_sta_info *mi = inode->i_private;
@@ -100,7 +100,7 @@ minstrel_stats_open(struct inode *inode, struct file *file)
 	return 0;
 }
 
-static ssize_t
+ssize_t
 minstrel_stats_read(struct file *file, char __user *buf, size_t len, loff_t *ppos)
 {
 	struct minstrel_debugfs_info *ms;
@@ -109,7 +109,7 @@ minstrel_stats_read(struct file *file, char __user *buf, size_t len, loff_t *ppo
 	return simple_read_from_buffer(buf, len, ppos, ms->buf, ms->len);
 }
 
-static int
+int
 minstrel_stats_release(struct inode *inode, struct file *file)
 {
 	kfree(file->private_data);
-- 
cgit v1.2.2


From 964ad81cbd933e5fa310faeec1e923c14651284b Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Fri, 12 Mar 2010 00:00:17 -0800
Subject: ipconfig: Handle devices which take some time to come up.

Some network devices, particularly USB ones, take several seconds to
fully init and appear in the device list.

If the user turned ipconfig on, they are using it for NFS root or some
other early booting purpose.  So it makes no sense to just flat out
fail immediately if the device isn't found.

It also doesn't make sense to just jack up the initial wait to
something crazy like 10 seconds.

Instead, poll immediately, and then periodically once a second,
waiting for a usable device to appear.  Fail after 12 seconds.

Signed-off-by: David S. Miller <davem@davemloft.net>
Tested-by: Christian Pellegrin <chripell@fsfe.org>
---
 net/ipv4/ipconfig.c | 57 +++++++++++++++++++++++++++++++++++++++++++----------
 1 file changed, 47 insertions(+), 10 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/ipconfig.c b/net/ipv4/ipconfig.c
index 10a6a604bf32..678909281648 100644
--- a/net/ipv4/ipconfig.c
+++ b/net/ipv4/ipconfig.c
@@ -187,6 +187,16 @@ struct ic_device {
 static struct ic_device *ic_first_dev __initdata = NULL;/* List of open device */
 static struct net_device *ic_dev __initdata = NULL;	/* Selected device */
 
+static bool __init ic_device_match(struct net_device *dev)
+{
+	if (user_dev_name[0] ? !strcmp(dev->name, user_dev_name) :
+	    (!(dev->flags & IFF_LOOPBACK) &&
+	     (dev->flags & (IFF_POINTOPOINT|IFF_BROADCAST)) &&
+	     strncmp(dev->name, "dummy", 5)))
+		return true;
+	return false;
+}
+
 static int __init ic_open_devs(void)
 {
 	struct ic_device *d, **last;
@@ -207,10 +217,7 @@ static int __init ic_open_devs(void)
 	for_each_netdev(&init_net, dev) {
 		if (dev->flags & IFF_LOOPBACK)
 			continue;
-		if (user_dev_name[0] ? !strcmp(dev->name, user_dev_name) :
-		    (!(dev->flags & IFF_LOOPBACK) &&
-		     (dev->flags & (IFF_POINTOPOINT|IFF_BROADCAST)) &&
-		     strncmp(dev->name, "dummy", 5))) {
+		if (ic_device_match(dev)) {
 			int able = 0;
 			if (dev->mtu >= 364)
 				able |= IC_BOOTP;
@@ -228,7 +235,7 @@ static int __init ic_open_devs(void)
 			}
 			if (!(d = kmalloc(sizeof(struct ic_device), GFP_KERNEL))) {
 				rtnl_unlock();
-				return -1;
+				return -ENOMEM;
 			}
 			d->dev = dev;
 			*last = d;
@@ -253,7 +260,7 @@ static int __init ic_open_devs(void)
 			printk(KERN_ERR "IP-Config: Device `%s' not found.\n", user_dev_name);
 		else
 			printk(KERN_ERR "IP-Config: No network devices available.\n");
-		return -1;
+		return -ENODEV;
 	}
 	return 0;
 }
@@ -1303,6 +1310,32 @@ __be32 __init root_nfs_parse_addr(char *name)
 	return addr;
 }
 
+#define DEVICE_WAIT_MAX		12 /* 12 seconds */
+
+static int __init wait_for_devices(void)
+{
+	int i;
+
+	msleep(CONF_PRE_OPEN);
+	for (i = 0; i < DEVICE_WAIT_MAX; i++) {
+		struct net_device *dev;
+		int found = 0;
+
+		rtnl_lock();
+		for_each_netdev(&init_net, dev) {
+			if (ic_device_match(dev)) {
+				found = 1;
+				break;
+			}
+		}
+		rtnl_unlock();
+		if (found)
+			return 0;
+		ssleep(1);
+	}
+	return -ENODEV;
+}
+
 /*
  *	IP Autoconfig dispatcher.
  */
@@ -1313,6 +1346,7 @@ static int __init ip_auto_config(void)
 #ifdef IPCONFIG_DYNAMIC
 	int retries = CONF_OPEN_RETRIES;
 #endif
+	int err;
 
 #ifdef CONFIG_PROC_FS
 	proc_net_fops_create(&init_net, "pnp", S_IRUGO, &pnp_seq_fops);
@@ -1325,12 +1359,15 @@ static int __init ip_auto_config(void)
 #ifdef IPCONFIG_DYNAMIC
  try_try_again:
 #endif
-	/* Give hardware a chance to settle */
-	msleep(CONF_PRE_OPEN);
+	/* Wait for devices to appear */
+	err = wait_for_devices();
+	if (err)
+		return err;
 
 	/* Setup all network devices */
-	if (ic_open_devs() < 0)
-		return -1;
+	err = ic_open_devs();
+	if (err)
+		return err;
 
 	/* Give drivers a chance to settle */
 	ssleep(CONF_POST_OPEN);
-- 
cgit v1.2.2


From 97ee9b0257402f4731b55dfea42f24d26d793ddf Mon Sep 17 00:00:00 2001
From: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>
Date: Sat, 6 Mar 2010 04:44:14 +0000
Subject: net/9p: Use the tag name in the config space for identifying mount
 point

This patch use the tag name in the config space to identify the
mount device. The the virtio device name depend on the enumeration
order of the device and may not remain the same across multiple boots
So we use the tag name which is set via qemu option to uniquely identify
the mount device

Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
Signed-off-by: Eric Van Hensbergen <ericvh@gmail.com>
---
 net/9p/trans_virtio.c | 44 ++++++++++++++++++++++++++++++++++++++------
 1 file changed, 38 insertions(+), 6 deletions(-)

(limited to 'net')

diff --git a/net/9p/trans_virtio.c b/net/9p/trans_virtio.c
index 0aaed4819379..026775ad391a 100644
--- a/net/9p/trans_virtio.c
+++ b/net/9p/trans_virtio.c
@@ -78,6 +78,12 @@ struct virtio_chan {
 	/* Scatterlist: can be too big for stack. */
 	struct scatterlist sg[VIRTQUEUE_NUM];
 
+	int tag_len;
+	/*
+	 * tag name to identify a mount Non-null terminated
+	 */
+	char *tag;
+
 	struct list_head chan_list;
 };
 
@@ -224,6 +230,8 @@ p9_virtio_request(struct p9_client *client, struct p9_req_t *req)
 
 static int p9_virtio_probe(struct virtio_device *vdev)
 {
+	__u16 tag_len;
+	char *tag;
 	int err;
 	struct virtio_chan *chan;
 
@@ -248,6 +256,23 @@ static int p9_virtio_probe(struct virtio_device *vdev)
 	sg_init_table(chan->sg, VIRTQUEUE_NUM);
 
 	chan->inuse = false;
+	if (virtio_has_feature(vdev, VIRTIO_9P_MOUNT_TAG)) {
+		vdev->config->get(vdev,
+				offsetof(struct virtio_9p_config, tag_len),
+				&tag_len, sizeof(tag_len));
+	} else {
+		err = -EINVAL;
+		goto out_free_vq;
+	}
+	tag = kmalloc(tag_len, GFP_KERNEL);
+	if (!tag) {
+		err = -ENOMEM;
+		goto out_free_vq;
+	}
+	vdev->config->get(vdev, offsetof(struct virtio_9p_config, tag),
+			tag, tag_len);
+	chan->tag = tag;
+	chan->tag_len = tag_len;
 	mutex_lock(&virtio_9p_lock);
 	list_add_tail(&chan->chan_list, &virtio_chan_list);
 	mutex_unlock(&virtio_9p_lock);
@@ -284,7 +309,7 @@ p9_virtio_create(struct p9_client *client, const char *devname, char *args)
 
 	mutex_lock(&virtio_9p_lock);
 	list_for_each_entry(chan, &virtio_chan_list, chan_list) {
-		if (!strcmp(devname, dev_name(&chan->vdev->dev))) {
+		if (!strncmp(devname, chan->tag, chan->tag_len)) {
 			if (!chan->inuse) {
 				chan->inuse = true;
 				found = 1;
@@ -323,6 +348,7 @@ static void p9_virtio_remove(struct virtio_device *vdev)
 	mutex_lock(&virtio_9p_lock);
 	list_del(&chan->chan_list);
 	mutex_unlock(&virtio_9p_lock);
+	kfree(chan->tag);
 	kfree(chan);
 
 }
@@ -332,13 +358,19 @@ static struct virtio_device_id id_table[] = {
 	{ 0 },
 };
 
+static unsigned int features[] = {
+	VIRTIO_9P_MOUNT_TAG,
+};
+
 /* The standard "struct lguest_driver": */
 static struct virtio_driver p9_virtio_drv = {
-	.driver.name = 	KBUILD_MODNAME,
-	.driver.owner = THIS_MODULE,
-	.id_table =	id_table,
-	.probe = 	p9_virtio_probe,
-	.remove =	p9_virtio_remove,
+	.feature_table  = features,
+	.feature_table_size = ARRAY_SIZE(features),
+	.driver.name    = KBUILD_MODNAME,
+	.driver.owner	= THIS_MODULE,
+	.id_table	= id_table,
+	.probe		= p9_virtio_probe,
+	.remove		= p9_virtio_remove,
 };
 
 static struct p9_trans_module p9_virtio_trans = {
-- 
cgit v1.2.2


From 86c8437383acd85c05ec7c9a004f59fe7ac9821a Mon Sep 17 00:00:00 2001
From: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>
Date: Sat, 6 Mar 2010 04:44:15 +0000
Subject: net/9p: Add sysfs mount_tag file for virtio 9P device

This adds a new file for virtio 9P device. The file
contain details of the mount device name that should
be used to mount the 9P file system.

Ex: /sys/devices/virtio-pci/virtio1/mount_tag  file now
contian the tag name to be used to mount the 9P file system.

Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
Signed-off-by: Eric Van Hensbergen <ericvh@gmail.com>
---
 net/9p/trans_virtio.c | 20 ++++++++++++++++++++
 1 file changed, 20 insertions(+)

(limited to 'net')

diff --git a/net/9p/trans_virtio.c b/net/9p/trans_virtio.c
index 026775ad391a..afde1a89fbb3 100644
--- a/net/9p/trans_virtio.c
+++ b/net/9p/trans_virtio.c
@@ -220,6 +220,20 @@ p9_virtio_request(struct p9_client *client, struct p9_req_t *req)
 	return 0;
 }
 
+static ssize_t p9_mount_tag_show(struct device *dev,
+				struct device_attribute *attr, char *buf)
+{
+	struct virtio_chan *chan;
+	struct virtio_device *vdev;
+
+	vdev = dev_to_virtio(dev);
+	chan = vdev->priv;
+
+	return snprintf(buf, chan->tag_len + 1, "%s", chan->tag);
+}
+
+static DEVICE_ATTR(mount_tag, 0444, p9_mount_tag_show, NULL);
+
 /**
  * p9_virtio_probe - probe for existence of 9P virtio channels
  * @vdev: virtio device to probe
@@ -273,6 +287,11 @@ static int p9_virtio_probe(struct virtio_device *vdev)
 			tag, tag_len);
 	chan->tag = tag;
 	chan->tag_len = tag_len;
+	err = sysfs_create_file(&(vdev->dev.kobj), &dev_attr_mount_tag.attr);
+	if (err) {
+		kfree(tag);
+		goto out_free_vq;
+	}
 	mutex_lock(&virtio_9p_lock);
 	list_add_tail(&chan->chan_list, &virtio_chan_list);
 	mutex_unlock(&virtio_9p_lock);
@@ -348,6 +367,7 @@ static void p9_virtio_remove(struct virtio_device *vdev)
 	mutex_lock(&virtio_9p_lock);
 	list_del(&chan->chan_list);
 	mutex_unlock(&virtio_9p_lock);
+	sysfs_remove_file(&(vdev->dev.kobj), &dev_attr_mount_tag.attr);
 	kfree(chan->tag);
 	kfree(chan);
 
-- 
cgit v1.2.2


From 45bc21edb52fa71dbb1324c6f573aa880e95519d Mon Sep 17 00:00:00 2001
From: Sripathi Kodi <sripathik@in.ibm.com>
Date: Mon, 8 Mar 2010 17:33:04 +0000
Subject: 9p: Change the name of new protocol from 9p2010.L to 9p2000.L

This patch changes the name of the new 9P protocol from 9p2010.L to
9p2000.u. This is because we learnt that the name 9p2010 is already
being used by others.

Signed-off-by: Sripathi Kodi <sripathik@in.ibm.com>
Signed-off-by: Eric Van Hensbergen <ericvh@gmail.com>
---
 net/9p/client.c | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

(limited to 'net')

diff --git a/net/9p/client.c b/net/9p/client.c
index bde9f3d38c57..e3e5bf4469ce 100644
--- a/net/9p/client.c
+++ b/net/9p/client.c
@@ -60,7 +60,7 @@ static const match_table_t tokens = {
 
 inline int p9_is_proto_dotl(struct p9_client *clnt)
 {
-	return (clnt->proto_version == p9_proto_2010L);
+	return (clnt->proto_version == p9_proto_2000L);
 }
 EXPORT_SYMBOL(p9_is_proto_dotl);
 
@@ -80,9 +80,9 @@ static unsigned char get_protocol_version(const substring_t *name)
 	} else if (!strncmp("9p2000.u", name->from, name->to-name->from)) {
 		version = p9_proto_2000u;
 		P9_DPRINTK(P9_DEBUG_9P, "Protocol version: 9P2000.u\n");
-	} else if (!strncmp("9p2010.L", name->from, name->to-name->from)) {
-		version = p9_proto_2010L;
-		P9_DPRINTK(P9_DEBUG_9P, "Protocol version: 9P2010.L\n");
+	} else if (!strncmp("9p2000.L", name->from, name->to-name->from)) {
+		version = p9_proto_2000L;
+		P9_DPRINTK(P9_DEBUG_9P, "Protocol version: 9P2000.L\n");
 	} else {
 		P9_DPRINTK(P9_DEBUG_ERROR, "Unknown protocol version %s. ",
 							name->from);
@@ -672,9 +672,9 @@ int p9_client_version(struct p9_client *c)
 						c->msize, c->proto_version);
 
 	switch (c->proto_version) {
-	case p9_proto_2010L:
+	case p9_proto_2000L:
 		req = p9_client_rpc(c, P9_TVERSION, "ds",
-					c->msize, "9P2010.L");
+					c->msize, "9P2000.L");
 		break;
 	case p9_proto_2000u:
 		req = p9_client_rpc(c, P9_TVERSION, "ds",
@@ -700,8 +700,8 @@ int p9_client_version(struct p9_client *c)
 	}
 
 	P9_DPRINTK(P9_DEBUG_9P, "<<< RVERSION msize %d %s\n", msize, version);
-	if (!strncmp(version, "9P2010.L", 8))
-		c->proto_version = p9_proto_2010L;
+	if (!strncmp(version, "9P2000.L", 8))
+		c->proto_version = p9_proto_2000L;
 	else if (!strncmp(version, "9P2000.u", 8))
 		c->proto_version = p9_proto_2000u;
 	else if (!strncmp(version, "9P2000", 6))
-- 
cgit v1.2.2


From e2577a065832f2c6d108de2e027891bdb2d78924 Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Sat, 13 Mar 2010 12:23:29 -0800
Subject: ipv6: Send netlink notification when DAD fails

If we are managing IPv6 addresses using DHCP, it would be nice
for user-space to be notified if an address configured through
DHCP fails DAD.  Otherwise user-space would have to poll to see
whether DAD succeeds.

This patch uses the existing notification mechanism and simply
hooks it into the DAD failure code path.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/addrconf.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'net')

diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 6cf3ee14ace3..3381b4317c27 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -1380,6 +1380,8 @@ static void addrconf_dad_stop(struct inet6_ifaddr *ifp, int dad_failed)
 		if (dad_failed)
 			ifp->flags |= IFA_F_DADFAILED;
 		spin_unlock_bh(&ifp->lock);
+		if (dad_failed)
+			ipv6_ifa_notify(0, ifp);
 		in6_ifa_put(ifp);
 #ifdef CONFIG_IPV6_PRIVACY
 	} else if (ifp->flags&IFA_F_TEMPORARY) {
-- 
cgit v1.2.2


From bec68ff1637ca00bb1585a03a7be8a13380084de Mon Sep 17 00:00:00 2001
From: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
Date: Sat, 13 Mar 2010 12:27:21 -0800
Subject: bridge: ensure to unlock in error path in br_multicast_query().

Signed-off-by: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/bridge/br_multicast.c | 11 +++++++----
 1 file changed, 7 insertions(+), 4 deletions(-)

(limited to 'net')

diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c
index 12ce1eaa4f3e..fd96a8dc97f4 100644
--- a/net/bridge/br_multicast.c
+++ b/net/bridge/br_multicast.c
@@ -823,6 +823,7 @@ static int br_multicast_query(struct net_bridge *br,
 	unsigned long max_delay;
 	unsigned long now = jiffies;
 	__be32 group;
+	int err = 0;
 
 	spin_lock(&br->multicast_lock);
 	if (!netif_running(br->dev) ||
@@ -841,12 +842,14 @@ static int br_multicast_query(struct net_bridge *br,
 			group = 0;
 		}
 	} else {
-		if (!pskb_may_pull(skb, sizeof(struct igmpv3_query)))
-			return -EINVAL;
+		if (!pskb_may_pull(skb, sizeof(struct igmpv3_query))) {
+			err = -EINVAL;
+			goto out;
+		}
 
 		ih3 = igmpv3_query_hdr(skb);
 		if (ih3->nsrcs)
-			return 0;
+			goto out;
 
 		max_delay = ih3->code ? 1 :
 			    IGMPV3_MRC(ih3->code) * (HZ / IGMP_TIMER_SCALE);
@@ -876,7 +879,7 @@ static int br_multicast_query(struct net_bridge *br,
 
 out:
 	spin_unlock(&br->multicast_lock);
-	return 0;
+	return err;
 }
 
 static void br_multicast_leave_group(struct net_bridge *br,
-- 
cgit v1.2.2


From 3af4974eb2c7867d6e160977195dfde586d0e564 Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@suse.de>
Date: Wed, 3 Feb 2010 17:31:31 +1100
Subject: sunrpc: don't keep expired entries in the auth caches.

currently expired entries remain in the auth caches as long
as there is a reference.
This was needed long ago when the auth_domain cache used the same
cache infrastructure.  But since that (being a very different sort
of cache) was separated, this test is no longer needed.

So remove the test on refcnt and tidy up the surrounding code.

This allows the cache_dequeue call (which needed to be there to
drop a potentially awkward reference) can be moved outside of the
spinlock which is a better place for it.

Signed-off-by: NeilBrown <neilb@suse.de>
Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
---
 net/sunrpc/cache.c | 13 +++++--------
 1 file changed, 5 insertions(+), 8 deletions(-)

(limited to 'net')

diff --git a/net/sunrpc/cache.c b/net/sunrpc/cache.c
index 39bddba53ba1..83592e012585 100644
--- a/net/sunrpc/cache.c
+++ b/net/sunrpc/cache.c
@@ -397,31 +397,28 @@ static int cache_clean(void)
 		/* Ok, now to clean this strand */
 
 		cp = & current_detail->hash_table[current_index];
-		ch = *cp;
-		for (; ch; cp= & ch->next, ch= *cp) {
+		for (ch = *cp ; ch ; cp = & ch->next, ch = *cp) {
 			if (current_detail->nextcheck > ch->expiry_time)
 				current_detail->nextcheck = ch->expiry_time+1;
 			if (ch->expiry_time >= get_seconds() &&
 			    ch->last_refresh >= current_detail->flush_time)
 				continue;
-			if (test_and_clear_bit(CACHE_PENDING, &ch->flags))
-				cache_dequeue(current_detail, ch);
 
-			if (atomic_read(&ch->ref.refcount) == 1)
-				break;
-		}
-		if (ch) {
 			*cp = ch->next;
 			ch->next = NULL;
 			current_detail->entries--;
 			rv = 1;
+			break;
 		}
+
 		write_unlock(&current_detail->hash_lock);
 		d = current_detail;
 		if (!ch)
 			current_index ++;
 		spin_unlock(&cache_list_lock);
 		if (ch) {
+			if (test_and_clear_bit(CACHE_PENDING, &ch->flags))
+				cache_dequeue(current_detail, ch);
 			cache_revisit_request(ch);
 			cache_put(ch, d);
 		}
-- 
cgit v1.2.2


From 2f50d8b63dd6e5320a9d223298df19df3502da29 Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@suse.de>
Date: Wed, 3 Feb 2010 17:31:31 +1100
Subject: sunrpc/cache: factor out cache_is_expired

This removes a tiny bit of code duplication, but more important
prepares for following patch which will perform the expiry check in
cache_lookup and the rest of the validity check in cache_check.

Signed-off-by: NeilBrown <neilb@suse.de>
Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
---
 net/sunrpc/cache.c | 13 ++++++++-----
 1 file changed, 8 insertions(+), 5 deletions(-)

(limited to 'net')

diff --git a/net/sunrpc/cache.c b/net/sunrpc/cache.c
index 83592e012585..9826c5ceb995 100644
--- a/net/sunrpc/cache.c
+++ b/net/sunrpc/cache.c
@@ -49,6 +49,12 @@ static void cache_init(struct cache_head *h)
 	h->last_refresh = now;
 }
 
+static inline int cache_is_expired(struct cache_detail *detail, struct cache_head *h)
+{
+	return  (h->expiry_time < get_seconds()) ||
+		(detail->flush_time > h->last_refresh);
+}
+
 struct cache_head *sunrpc_cache_lookup(struct cache_detail *detail,
 				       struct cache_head *key, int hash)
 {
@@ -184,9 +190,7 @@ static int cache_make_upcall(struct cache_detail *cd, struct cache_head *h)
 static inline int cache_is_valid(struct cache_detail *detail, struct cache_head *h)
 {
 	if (!test_bit(CACHE_VALID, &h->flags) ||
-	    h->expiry_time < get_seconds())
-		return -EAGAIN;
-	else if (detail->flush_time > h->last_refresh)
+	    cache_is_expired(detail, h))
 		return -EAGAIN;
 	else {
 		/* entry is valid */
@@ -400,8 +404,7 @@ static int cache_clean(void)
 		for (ch = *cp ; ch ; cp = & ch->next, ch = *cp) {
 			if (current_detail->nextcheck > ch->expiry_time)
 				current_detail->nextcheck = ch->expiry_time+1;
-			if (ch->expiry_time >= get_seconds() &&
-			    ch->last_refresh >= current_detail->flush_time)
+			if (!cache_is_expired(current_detail, ch))
 				continue;
 
 			*cp = ch->next;
-- 
cgit v1.2.2


From d202cce8963d9268ff355a386e20243e8332b308 Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@suse.de>
Date: Wed, 3 Feb 2010 17:31:31 +1100
Subject: sunrpc: never return expired entries in sunrpc_cache_lookup

If sunrpc_cache_lookup finds an expired entry, remove it from
the cache and return a freshly created non-VALID entry instead.
This ensures that we only ever get a usable entry, or an
entry that will become usable once an update arrives.
i.e. we will never need to repeat the lookup.

This allows us to remove the 'is_expired' test from cache_check
(i.e. from cache_is_valid).  cache_check should never get an expired
entry as 'lookup' will never return one.  If it does happen - due to
inconvenient timing - then just accept it as still valid, it won't be
very much past it's use-by date.

Signed-off-by: NeilBrown <neilb@suse.de>
Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
---
 net/sunrpc/cache.c | 17 ++++++++++++++---
 1 file changed, 14 insertions(+), 3 deletions(-)

(limited to 'net')

diff --git a/net/sunrpc/cache.c b/net/sunrpc/cache.c
index 9826c5ceb995..3e1ef8bf4dc2 100644
--- a/net/sunrpc/cache.c
+++ b/net/sunrpc/cache.c
@@ -59,7 +59,7 @@ struct cache_head *sunrpc_cache_lookup(struct cache_detail *detail,
 				       struct cache_head *key, int hash)
 {
 	struct cache_head **head,  **hp;
-	struct cache_head *new = NULL;
+	struct cache_head *new = NULL, *freeme = NULL;
 
 	head = &detail->hash_table[hash];
 
@@ -68,6 +68,9 @@ struct cache_head *sunrpc_cache_lookup(struct cache_detail *detail,
 	for (hp=head; *hp != NULL ; hp = &(*hp)->next) {
 		struct cache_head *tmp = *hp;
 		if (detail->match(tmp, key)) {
+			if (cache_is_expired(detail, tmp))
+				/* This entry is expired, we will discard it. */
+				break;
 			cache_get(tmp);
 			read_unlock(&detail->hash_lock);
 			return tmp;
@@ -92,6 +95,13 @@ struct cache_head *sunrpc_cache_lookup(struct cache_detail *detail,
 	for (hp=head; *hp != NULL ; hp = &(*hp)->next) {
 		struct cache_head *tmp = *hp;
 		if (detail->match(tmp, key)) {
+			if (cache_is_expired(detail, tmp)) {
+				*hp = tmp->next;
+				tmp->next = NULL;
+				detail->entries --;
+				freeme = tmp;
+				break;
+			}
 			cache_get(tmp);
 			write_unlock(&detail->hash_lock);
 			cache_put(new, detail);
@@ -104,6 +114,8 @@ struct cache_head *sunrpc_cache_lookup(struct cache_detail *detail,
 	cache_get(new);
 	write_unlock(&detail->hash_lock);
 
+	if (freeme)
+		cache_put(freeme, detail);
 	return new;
 }
 EXPORT_SYMBOL_GPL(sunrpc_cache_lookup);
@@ -189,8 +201,7 @@ static int cache_make_upcall(struct cache_detail *cd, struct cache_head *h)
 
 static inline int cache_is_valid(struct cache_detail *detail, struct cache_head *h)
 {
-	if (!test_bit(CACHE_VALID, &h->flags) ||
-	    cache_is_expired(detail, h))
+	if (!test_bit(CACHE_VALID, &h->flags))
 		return -EAGAIN;
 	else {
 		/* entry is valid */
-- 
cgit v1.2.2


From 1da05f50f6a766c7611102382f85183b4db96c2d Mon Sep 17 00:00:00 2001
From: Joe Perches <joe@perches.com>
Date: Mon, 15 Mar 2010 18:03:05 +0100
Subject: netfilter: net/netfilter/ipvs/ip_vs_ftp.c: Remove use of NIPQUAD

NIPQUAD has very few uses left.

Remove this use and make the code have the identical form of the only
other use of "%u,%u,%u,%u,%u,%u" in net/ipv4/netfilter/nf_nat_ftp.c

Signed-off-by: Joe Perches <joe@perches.com>
Acked-by: Simon Horman <horms@verge.net.au>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 net/netfilter/ipvs/ip_vs_ftp.c | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/netfilter/ipvs/ip_vs_ftp.c b/net/netfilter/ipvs/ip_vs_ftp.c
index 73f38ea98f25..9f6328303844 100644
--- a/net/netfilter/ipvs/ip_vs_ftp.c
+++ b/net/netfilter/ipvs/ip_vs_ftp.c
@@ -208,8 +208,14 @@ static int ip_vs_ftp_out(struct ip_vs_app *app, struct ip_vs_conn *cp,
 		 */
 		from.ip = n_cp->vaddr.ip;
 		port = n_cp->vport;
-		sprintf(buf, "%u,%u,%u,%u,%u,%u", NIPQUAD(from.ip),
-			(ntohs(port)>>8)&255, ntohs(port)&255);
+		snprintf(buf, sizeof(buf), "%u,%u,%u,%u,%u,%u",
+			 ((unsigned char *)&from.ip)[0],
+			 ((unsigned char *)&from.ip)[1],
+			 ((unsigned char *)&from.ip)[2],
+			 ((unsigned char *)&from.ip)[3],
+			 ntohs(port) >> 8,
+			 ntohs(port) & 0xFF);
+
 		buf_len = strlen(buf);
 
 		/*
-- 
cgit v1.2.2


From eaf55530c94cb7adcd320c28ed6c7d463c9a3727 Mon Sep 17 00:00:00 2001
From: Felix Fietkau <nbd@openwrt.org>
Date: Thu, 11 Mar 2010 16:28:24 +0100
Subject: mac80211: optimize tx status processing

When a cooked monitor interface is active, ieee80211_tx_status()
generates a radiotap header for every single frame, even if it wasn't
injected and thus won't be sent to a monitor interface.
This patch reduces cpu utilization by moving the cooked monitor check a
bit earlier, before it generates the rtap header.

Signed-off-by: Felix Fietkau <nbd@openwrt.org>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/status.c | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/status.c b/net/mac80211/status.c
index 56d5b9a6ec5b..11805a3a626f 100644
--- a/net/mac80211/status.c
+++ b/net/mac80211/status.c
@@ -171,7 +171,7 @@ void ieee80211_tx_status(struct ieee80211_hw *hw, struct sk_buff *skb)
 	struct net_device *prev_dev = NULL;
 	struct sta_info *sta, *tmp;
 	int retry_count = -1, i;
-	bool injected;
+	bool send_to_cooked;
 
 	for (i = 0; i < IEEE80211_TX_MAX_RATES; i++) {
 		/* the HW cannot have attempted that rate */
@@ -296,11 +296,15 @@ void ieee80211_tx_status(struct ieee80211_hw *hw, struct sk_buff *skb)
 	/* this was a transmitted frame, but now we want to reuse it */
 	skb_orphan(skb);
 
+	/* Need to make a copy before skb->cb gets cleared */
+	send_to_cooked = !!(info->flags & IEEE80211_TX_CTL_INJECTED) ||
+			(type != IEEE80211_FTYPE_DATA);
+
 	/*
 	 * This is a bit racy but we can avoid a lot of work
 	 * with this test...
 	 */
-	if (!local->monitors && !local->cooked_mntrs) {
+	if (!local->monitors && (!send_to_cooked || !local->cooked_mntrs)) {
 		dev_kfree_skb(skb);
 		return;
 	}
@@ -345,9 +349,6 @@ void ieee80211_tx_status(struct ieee80211_hw *hw, struct sk_buff *skb)
 	/* for now report the total retry_count */
 	rthdr->data_retries = retry_count;
 
-	/* Need to make a copy before skb->cb gets cleared */
-	injected = !!(info->flags & IEEE80211_TX_CTL_INJECTED);
-
 	/* XXX: is this sufficient for BPF? */
 	skb_set_mac_header(skb, 0);
 	skb->ip_summed = CHECKSUM_UNNECESSARY;
@@ -362,8 +363,7 @@ void ieee80211_tx_status(struct ieee80211_hw *hw, struct sk_buff *skb)
 				continue;
 
 			if ((sdata->u.mntr_flags & MONITOR_FLAG_COOK_FRAMES) &&
-			    !injected &&
-			    (type == IEEE80211_FTYPE_DATA))
+			    !send_to_cooked)
 				continue;
 
 			if (prev_dev) {
-- 
cgit v1.2.2


From a1ca14ac54675d3bf48d442b5a7b9eba133f1888 Mon Sep 17 00:00:00 2001
From: Akinobu Mita <akinobu.mita@gmail.com>
Date: Thu, 11 Mar 2010 12:07:49 +0000
Subject: phonet: use for_each_set_bit()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Replace open-coded loop with for_each_set_bit().

Signed-off-by: Akinobu Mita <akinobu.mita@gmail.com>
Cc: "David S. Miller" <davem@davemloft.net>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Acked-by: Rémi Denis-Courmont <remi.denis-courmont@nokia.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/phonet/pn_dev.c     | 3 +--
 net/phonet/pn_netlink.c | 3 +--
 2 files changed, 2 insertions(+), 4 deletions(-)

(limited to 'net')

diff --git a/net/phonet/pn_dev.c b/net/phonet/pn_dev.c
index c597cc53a6fb..5c6ae0c701c0 100644
--- a/net/phonet/pn_dev.c
+++ b/net/phonet/pn_dev.c
@@ -107,8 +107,7 @@ static void phonet_device_destroy(struct net_device *dev)
 	if (pnd) {
 		u8 addr;
 
-		for (addr = find_first_bit(pnd->addrs, 64); addr < 64;
-			addr = find_next_bit(pnd->addrs, 64, 1+addr))
+		for_each_set_bit(addr, pnd->addrs, 64)
 			phonet_address_notify(RTM_DELADDR, dev, addr);
 		kfree(pnd);
 	}
diff --git a/net/phonet/pn_netlink.c b/net/phonet/pn_netlink.c
index 2e6c7eb8e76a..fe2e7088ee07 100644
--- a/net/phonet/pn_netlink.c
+++ b/net/phonet/pn_netlink.c
@@ -141,8 +141,7 @@ static int getaddr_dumpit(struct sk_buff *skb, struct netlink_callback *cb)
 			continue;
 
 		addr_idx = 0;
-		for (addr = find_first_bit(pnd->addrs, 64); addr < 64;
-			addr = find_next_bit(pnd->addrs, 64, 1+addr)) {
+		for_each_set_bit(addr, pnd->addrs, 64) {
 			if (addr_idx++ < addr_start_idx)
 				continue;
 
-- 
cgit v1.2.2


From d14a0ebda7d3daede1a99c01527affb9ceaa4c22 Mon Sep 17 00:00:00 2001
From: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Date: Sun, 14 Mar 2010 20:13:19 +0000
Subject: net-2.6 [Bug-Fix][dccp]: fix oops caused after failed initialisation

dccp: fix panic caused by failed initialisation

This fixes a kernel panic reported thanks to Andre Noll:

if DCCP is compiled into the kernel and any out of the initialisation
steps in net/dccp/proto.c:dccp_init() fail, a subsequent attempt to create
a SOCK_DCCP socket will panic, since inet{,6}_create() are not prevented
from creating DCCP sockets.

This patch fixes the problem by propagating a failure in dccp_init() to
dccp_v{4,6}_init_net(), and from there to dccp_v{4,6}_init(), so that the
DCCP protocol is not made available if its initialisation fails.

Signed-off-by: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/dccp/ipv4.c  |  8 ++++----
 net/dccp/ipv6.c  |  8 ++++----
 net/dccp/proto.c | 16 +++++++++-------
 3 files changed, 17 insertions(+), 15 deletions(-)

(limited to 'net')

diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c
index b195c4feaa0a..4071eaf2b361 100644
--- a/net/dccp/ipv4.c
+++ b/net/dccp/ipv4.c
@@ -998,11 +998,11 @@ static struct inet_protosw dccp_v4_protosw = {
 
 static int __net_init dccp_v4_init_net(struct net *net)
 {
-	int err;
+	if (dccp_hashinfo.bhash == NULL)
+		return -ESOCKTNOSUPPORT;
 
-	err = inet_ctl_sock_create(&net->dccp.v4_ctl_sk, PF_INET,
-				   SOCK_DCCP, IPPROTO_DCCP, net);
-	return err;
+	return inet_ctl_sock_create(&net->dccp.v4_ctl_sk, PF_INET,
+				    SOCK_DCCP, IPPROTO_DCCP, net);
 }
 
 static void __net_exit dccp_v4_exit_net(struct net *net)
diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c
index 1aec6349e858..af3394df63b7 100644
--- a/net/dccp/ipv6.c
+++ b/net/dccp/ipv6.c
@@ -1191,11 +1191,11 @@ static struct inet_protosw dccp_v6_protosw = {
 
 static int __net_init dccp_v6_init_net(struct net *net)
 {
-	int err;
+	if (dccp_hashinfo.bhash == NULL)
+		return -ESOCKTNOSUPPORT;
 
-	err = inet_ctl_sock_create(&net->dccp.v6_ctl_sk, PF_INET6,
-				   SOCK_DCCP, IPPROTO_DCCP, net);
-	return err;
+	return inet_ctl_sock_create(&net->dccp.v6_ctl_sk, PF_INET6,
+				    SOCK_DCCP, IPPROTO_DCCP, net);
 }
 
 static void __net_exit dccp_v6_exit_net(struct net *net)
diff --git a/net/dccp/proto.c b/net/dccp/proto.c
index 0ef7061920c0..aa4cef374fd0 100644
--- a/net/dccp/proto.c
+++ b/net/dccp/proto.c
@@ -1036,7 +1036,7 @@ static int __init dccp_init(void)
 		     FIELD_SIZEOF(struct sk_buff, cb));
 	rc = percpu_counter_init(&dccp_orphan_count, 0);
 	if (rc)
-		goto out;
+		goto out_fail;
 	rc = -ENOBUFS;
 	inet_hashinfo_init(&dccp_hashinfo);
 	dccp_hashinfo.bind_bucket_cachep =
@@ -1125,8 +1125,9 @@ static int __init dccp_init(void)
 		goto out_sysctl_exit;
 
 	dccp_timestamping_init();
-out:
-	return rc;
+
+	return 0;
+
 out_sysctl_exit:
 	dccp_sysctl_exit();
 out_ackvec_exit:
@@ -1135,18 +1136,19 @@ out_free_dccp_mib:
 	dccp_mib_exit();
 out_free_dccp_bhash:
 	free_pages((unsigned long)dccp_hashinfo.bhash, bhash_order);
-	dccp_hashinfo.bhash = NULL;
 out_free_dccp_locks:
 	inet_ehash_locks_free(&dccp_hashinfo);
 out_free_dccp_ehash:
 	free_pages((unsigned long)dccp_hashinfo.ehash, ehash_order);
-	dccp_hashinfo.ehash = NULL;
 out_free_bind_bucket_cachep:
 	kmem_cache_destroy(dccp_hashinfo.bind_bucket_cachep);
-	dccp_hashinfo.bind_bucket_cachep = NULL;
 out_free_percpu:
 	percpu_counter_destroy(&dccp_orphan_count);
-	goto out;
+out_fail:
+	dccp_hashinfo.bhash = NULL;
+	dccp_hashinfo.ehash = NULL;
+	dccp_hashinfo.bind_bucket_cachep = NULL;
+	return rc;
 }
 
 static void __exit dccp_fini(void)
-- 
cgit v1.2.2


From 0821ec55bb1382dd3b9476dc15d5dbbb19f0c3fd Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Mon, 15 Mar 2010 20:38:25 -0700
Subject: bridge: Move NULL mdb check into br_mdb_ip_get

Since all callers of br_mdb_ip_get need to check whether the
hash table is NULL, this patch moves the check into the function.

This fixes the two callers (query/leave handler) that didn't
check it.

Reported-by: Michael Braun <michael-dev@fami-braun.de>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/bridge/br_multicast.c | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

(limited to 'net')

diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c
index fd96a8dc97f4..398221e81be5 100644
--- a/net/bridge/br_multicast.c
+++ b/net/bridge/br_multicast.c
@@ -49,22 +49,23 @@ static struct net_bridge_mdb_entry *__br_mdb_ip_get(
 static struct net_bridge_mdb_entry *br_mdb_ip_get(
 	struct net_bridge_mdb_htable *mdb, __be32 dst)
 {
+	if (!mdb)
+		return NULL;
+
 	return __br_mdb_ip_get(mdb, dst, br_ip_hash(mdb, dst));
 }
 
 struct net_bridge_mdb_entry *br_mdb_get(struct net_bridge *br,
 					struct sk_buff *skb)
 {
-	struct net_bridge_mdb_htable *mdb = br->mdb;
-
-	if (!mdb || br->multicast_disabled)
+	if (br->multicast_disabled)
 		return NULL;
 
 	switch (skb->protocol) {
 	case htons(ETH_P_IP):
 		if (BR_INPUT_SKB_CB(skb)->igmp)
 			break;
-		return br_mdb_ip_get(mdb, ip_hdr(skb)->daddr);
+		return br_mdb_ip_get(br->mdb, ip_hdr(skb)->daddr);
 	}
 
 	return NULL;
-- 
cgit v1.2.2


From 7f7708f0055e49e331f267700aa8b2ee879f004c Mon Sep 17 00:00:00 2001
From: Michael Braun <michael-dev@fami-braun.de>
Date: Tue, 16 Mar 2010 00:26:22 -0700
Subject: bridge: Fix br_forward crash in promiscuous mode

From: Michael Braun <michael-dev@fami-braun.de>

bridge: Fix br_forward crash in promiscuous mode

It's a linux-next kernel from 2010-03-12 on an x86 system and it
OOPs in the bridge module in br_pass_frame_up (called by
br_handle_frame_finish) because brdev cannot be dereferenced (its set to
a non-null value).

Adding some BUG_ON statements revealed that
 BR_INPUT_SKB_CB(skb)->brdev == br-dev
(as set in br_handle_frame_finish first)
only holds until br_forward is called.
The next call to br_pass_frame_up then fails.

Digging deeper it seems that br_forward either frees the skb or passes
it to NF_HOOK which will in turn take care of freeing the skb. The
same is holds for br_pass_frame_ip. So it seems as if two independent
skb allocations are required. As far as I can see, commit
b33084be192ee1e347d98bb5c9e38a53d98d35e2 ("bridge: Avoid unnecessary
clone on forward path") removed skb duplication and so likely causes
this crash. This crash does not happen on 2.6.33.

I've therefore modified br_forward the same way br_flood has been
modified so that the skb is not freed if skb0 is going to be used
and I can confirm that the attached patch resolves the issue for me.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/bridge/br_forward.c | 14 +++++++++++---
 net/bridge/br_input.c   |  2 +-
 net/bridge/br_private.h |  2 +-
 3 files changed, 13 insertions(+), 5 deletions(-)

(limited to 'net')

diff --git a/net/bridge/br_forward.c b/net/bridge/br_forward.c
index d61e6f741125..8347916efe88 100644
--- a/net/bridge/br_forward.c
+++ b/net/bridge/br_forward.c
@@ -19,6 +19,10 @@
 #include <linux/netfilter_bridge.h>
 #include "br_private.h"
 
+static int deliver_clone(struct net_bridge_port *prev, struct sk_buff *skb,
+			 void (*__packet_hook)(const struct net_bridge_port *p,
+					       struct sk_buff *skb));
+
 /* Don't forward packets to originating port or forwarding diasabled */
 static inline int should_deliver(const struct net_bridge_port *p,
 				 const struct sk_buff *skb)
@@ -94,14 +98,18 @@ void br_deliver(const struct net_bridge_port *to, struct sk_buff *skb)
 }
 
 /* called with rcu_read_lock */
-void br_forward(const struct net_bridge_port *to, struct sk_buff *skb)
+void br_forward(const struct net_bridge_port *to, struct sk_buff *skb, struct sk_buff *skb0)
 {
 	if (should_deliver(to, skb)) {
-		__br_forward(to, skb);
+		if (skb0)
+			deliver_clone(to, skb, __br_forward);
+		else
+			__br_forward(to, skb);
 		return;
 	}
 
-	kfree_skb(skb);
+	if (!skb0)
+		kfree_skb(skb);
 }
 
 static int deliver_clone(struct net_bridge_port *prev, struct sk_buff *skb,
diff --git a/net/bridge/br_input.c b/net/bridge/br_input.c
index 53b39851d87d..08a72e63fb8e 100644
--- a/net/bridge/br_input.c
+++ b/net/bridge/br_input.c
@@ -90,7 +90,7 @@ int br_handle_frame_finish(struct sk_buff *skb)
 
 	if (skb) {
 		if (dst)
-			br_forward(dst->dst, skb);
+			br_forward(dst->dst, skb, skb2);
 		else
 			br_flood_forward(br, skb, skb2);
 	}
diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h
index fef0384e3c0b..bfb8feb310a6 100644
--- a/net/bridge/br_private.h
+++ b/net/bridge/br_private.h
@@ -252,7 +252,7 @@ extern void br_deliver(const struct net_bridge_port *to,
 		struct sk_buff *skb);
 extern int br_dev_queue_push_xmit(struct sk_buff *skb);
 extern void br_forward(const struct net_bridge_port *to,
-		struct sk_buff *skb);
+		struct sk_buff *skb, struct sk_buff *skb0);
 extern int br_forward_finish(struct sk_buff *skb);
 extern void br_flood_deliver(struct net_bridge *br, struct sk_buff *skb);
 extern void br_flood_forward(struct net_bridge *br, struct sk_buff *skb,
-- 
cgit v1.2.2


From 88393161210493e317ae391696ee8ef463cb3c23 Mon Sep 17 00:00:00 2001
From: Thomas Weber <swirl@gmx.li>
Date: Tue, 16 Mar 2010 11:47:56 +0100
Subject: Fix typos in comments

[Ss]ytem => [Ss]ystem
udpate => update
paramters => parameters
orginal => original

Signed-off-by: Thomas Weber <swirl@gmx.li>
Acked-by: Randy Dunlap <rdunlap@xenotime.net>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
---
 net/wimax/op-rfkill.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/wimax/op-rfkill.c b/net/wimax/op-rfkill.c
index e978c7136c97..2609e445fe7d 100644
--- a/net/wimax/op-rfkill.c
+++ b/net/wimax/op-rfkill.c
@@ -43,7 +43,7 @@
  *   wimax_rfkill()             Kernel calling wimax_rfkill()
  *     __wimax_rf_toggle_radio()
  *
- * wimax_rfkill_set_radio_block()  RF-Kill subsytem calling
+ * wimax_rfkill_set_radio_block()  RF-Kill subsystem calling
  *   __wimax_rf_toggle_radio()
  *
  * __wimax_rf_toggle_radio()
-- 
cgit v1.2.2


From 819386dfc67e770b4a0b59983f7948f8ddaa357e Mon Sep 17 00:00:00 2001
From: "John W. Linville" <linville@tuxdriver.com>
Date: Tue, 16 Mar 2010 15:02:35 -0400
Subject: Revert "mac80211: fix rates setup on IBSS merge"

I accidentally merged an incomplete version of the patch...

This reverts commit b4d59a9317e41faec3d0b6a03f0454d1e8abb710.

Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/ibss.c | 3 ---
 1 file changed, 3 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/ibss.c b/net/mac80211/ibss.c
index b840d9072de9..f3e942486749 100644
--- a/net/mac80211/ibss.c
+++ b/net/mac80211/ibss.c
@@ -276,8 +276,6 @@ static void ieee80211_rx_bss_info(struct ieee80211_sub_if_data *sdata,
 				    (unsigned long long) sta->sta.supp_rates[band]);
 #endif
 			rcu_read_unlock();
-
-			/* FIXME: update rate control */
 		} else {
 			rcu_read_unlock();
 			ieee80211_ibss_add_sta(sdata, mgmt->bssid, mgmt->sa,
@@ -372,7 +370,6 @@ static void ieee80211_rx_bss_info(struct ieee80211_sub_if_data *sdata,
 		       sdata->name, mgmt->bssid);
 #endif
 		ieee80211_sta_join_ibss(sdata, bss);
-		supp_rates = ieee80211_sta_get_rates(local, elems, band);
 		ieee80211_ibss_add_sta(sdata, mgmt->bssid, mgmt->sa,
 				       supp_rates, GFP_KERNEL);
 	}
-- 
cgit v1.2.2


From 09a08cff3d13315c948e6aee5cf912f8f1db54e7 Mon Sep 17 00:00:00 2001
From: Bruno Randolf <br1@einfach.org>
Date: Wed, 3 Mar 2010 18:45:42 +0900
Subject: mac80211: (really) fix rates setup on IBSS merge

when an IBSS merge happened, the supported rates for the newly added station
were left empty, causing the rate control module to be initialized with only
the basic rates.

the section of the ibss code which deals with updating supported rates for
an already existing station failed to inform the rate control module about the
new rates. as both minstrel and pid don't have an update function i just use
the init function.

also remove unnecessary (unsigned long long) casts and edit debug message.

Signed-off-by: Bruno Randolf <br1@einfach.org>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/ibss.c | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/ibss.c b/net/mac80211/ibss.c
index f3e942486749..01974c2510a8 100644
--- a/net/mac80211/ibss.c
+++ b/net/mac80211/ibss.c
@@ -264,17 +264,16 @@ static void ieee80211_rx_bss_info(struct ieee80211_sub_if_data *sdata,
 			sta->sta.supp_rates[band] = supp_rates |
 				ieee80211_mandatory_rates(local, band);
 
+			if (sta->sta.supp_rates[band] != prev_rates) {
 #ifdef CONFIG_MAC80211_IBSS_DEBUG
-			if (sta->sta.supp_rates[band] != prev_rates)
 				printk(KERN_DEBUG "%s: updated supp_rates set "
-				    "for %pM based on beacon info (0x%llx | "
-				    "0x%llx -> 0x%llx)\n",
-				    sdata->name,
-				    sta->sta.addr,
-				    (unsigned long long) prev_rates,
-				    (unsigned long long) supp_rates,
-				    (unsigned long long) sta->sta.supp_rates[band]);
+				    "for %pM based on beacon/probe_response "
+				    "(0x%x -> 0x%x)\n",
+				    sdata->name, sta->sta.addr,
+				    prev_rates, sta->sta.supp_rates[band]);
 #endif
+				rate_control_rate_init(sta);
+			}
 			rcu_read_unlock();
 		} else {
 			rcu_read_unlock();
@@ -370,6 +369,7 @@ static void ieee80211_rx_bss_info(struct ieee80211_sub_if_data *sdata,
 		       sdata->name, mgmt->bssid);
 #endif
 		ieee80211_sta_join_ibss(sdata, bss);
+		supp_rates = ieee80211_sta_get_rates(local, elems, band);
 		ieee80211_ibss_add_sta(sdata, mgmt->bssid, mgmt->sa,
 				       supp_rates, GFP_KERNEL);
 	}
-- 
cgit v1.2.2


From a2f46ee1ba5ee249ce2ca1ee7a7a0ac46529fb4f Mon Sep 17 00:00:00 2001
From: Neil Horman <nhorman@tuxdriver.com>
Date: Tue, 16 Mar 2010 08:14:33 +0000
Subject: tipc: fix lockdep warning on address assignment

So in the forward porting of various tipc packages, I was constantly
getting this lockdep warning everytime I used tipc-config to set a network
address for the protocol:

[ INFO: possible circular locking dependency detected ]
2.6.33 #1
tipc-config/1326 is trying to acquire lock:
(ref_table_lock){+.-...}, at: [<ffffffffa0315148>] tipc_ref_discard+0x53/0xd4 [tipc]

but task is already holding lock:
(&(&entry->lock)->rlock#2){+.-...}, at: [<ffffffffa03150d5>] tipc_ref_lock+0x43/0x63 [tipc]

which lock already depends on the new lock.

the existing dependency chain (in reverse order) is:

-> #1 (&(&entry->lock)->rlock#2){+.-...}:
[<ffffffff8107b508>] __lock_acquire+0xb67/0xd0f
[<ffffffff8107b78c>] lock_acquire+0xdc/0x102
[<ffffffff8145471e>] _raw_spin_lock_bh+0x3b/0x6e
[<ffffffffa03152b1>] tipc_ref_acquire+0xe8/0x11b [tipc]
[<ffffffffa031433f>] tipc_createport_raw+0x78/0x1b9 [tipc]
[<ffffffffa031450b>] tipc_createport+0x8b/0x125 [tipc]
[<ffffffffa030f221>] tipc_subscr_start+0xce/0x126 [tipc]
[<ffffffffa0308fb2>] process_signal_queue+0x47/0x7d [tipc]
[<ffffffff81053e0c>] tasklet_action+0x8c/0xf4
[<ffffffff81054bd8>] __do_softirq+0xf8/0x1cd
[<ffffffff8100aadc>] call_softirq+0x1c/0x30
[<ffffffff810549f4>] _local_bh_enable_ip+0xb8/0xd7
[<ffffffff81054a21>] local_bh_enable_ip+0xe/0x10
[<ffffffff81454d31>] _raw_spin_unlock_bh+0x34/0x39
[<ffffffffa0308eb8>] spin_unlock_bh.clone.0+0x15/0x17 [tipc]
[<ffffffffa0308f47>] tipc_k_signal+0x8d/0xb1 [tipc]
[<ffffffffa0308dd9>] tipc_core_start+0x8a/0xad [tipc]
[<ffffffffa01b1087>] 0xffffffffa01b1087
[<ffffffff8100207d>] do_one_initcall+0x72/0x18a
[<ffffffff810872fb>] sys_init_module+0xd8/0x23a
[<ffffffff81009b42>] system_call_fastpath+0x16/0x1b

-> #0 (ref_table_lock){+.-...}:
[<ffffffff8107b3b2>] __lock_acquire+0xa11/0xd0f
[<ffffffff8107b78c>] lock_acquire+0xdc/0x102
[<ffffffff81454836>] _raw_write_lock_bh+0x3b/0x6e
[<ffffffffa0315148>] tipc_ref_discard+0x53/0xd4 [tipc]
[<ffffffffa03141ee>] tipc_deleteport+0x40/0x119 [tipc]
[<ffffffffa0316e35>] release+0xeb/0x137 [tipc]
[<ffffffff8139dbf4>] sock_release+0x1f/0x6f
[<ffffffff8139dc6b>] sock_close+0x27/0x2b
[<ffffffff811116f6>] __fput+0x12a/0x1df
[<ffffffff811117c5>] fput+0x1a/0x1c
[<ffffffff8110e49b>] filp_close+0x68/0x72
[<ffffffff8110e552>] sys_close+0xad/0xe7
[<ffffffff81009b42>] system_call_fastpath+0x16/0x1b

Finally decided I should fix this.  Its a straightforward inversion,
tipc_ref_acquire takes two locks in this order:
ref_table_lock
entry->lock

while tipc_deleteport takes them in this order:
entry->lock (via tipc_port_lock())
ref_table_lock (via tipc_ref_discard())

when the same entry is referenced, we get the above warning.  The fix is equally
straightforward.  Theres no real relation between the entry->lock and the
ref_table_lock (they just are needed at the same time), so move the entry->lock
aquisition in tipc_ref_acquire down, after we unlock ref_table_lock (this is
safe since the ref_table_lock guards changes to the reference table, and we've
already claimed a slot there.  I've tested the below fix and confirmed that it
clears up the lockdep issue

Signed-off-by: Neil Horman <nhorman@tuxdriver.com>
CC: Allan Stephens <allan.stephens@windriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/tipc/ref.c | 26 ++++++++++++++++----------
 1 file changed, 16 insertions(+), 10 deletions(-)

(limited to 'net')

diff --git a/net/tipc/ref.c b/net/tipc/ref.c
index 414fc34b8bea..8dea66500cf5 100644
--- a/net/tipc/ref.c
+++ b/net/tipc/ref.c
@@ -153,11 +153,11 @@ void tipc_ref_table_stop(void)
 
 u32 tipc_ref_acquire(void *object, spinlock_t **lock)
 {
-	struct reference *entry;
 	u32 index;
 	u32 index_mask;
 	u32 next_plus_upper;
 	u32 ref;
+	struct reference *entry = NULL;
 
 	if (!object) {
 		err("Attempt to acquire reference to non-existent object\n");
@@ -175,30 +175,36 @@ u32 tipc_ref_acquire(void *object, spinlock_t **lock)
 		index = tipc_ref_table.first_free;
 		entry = &(tipc_ref_table.entries[index]);
 		index_mask = tipc_ref_table.index_mask;
-		/* take lock in case a previous user of entry still holds it */
-		spin_lock_bh(&entry->lock);
 		next_plus_upper = entry->ref;
 		tipc_ref_table.first_free = next_plus_upper & index_mask;
 		ref = (next_plus_upper & ~index_mask) + index;
-		entry->ref = ref;
-		entry->object = object;
-		*lock = &entry->lock;
 	}
 	else if (tipc_ref_table.init_point < tipc_ref_table.capacity) {
 		index = tipc_ref_table.init_point++;
 		entry = &(tipc_ref_table.entries[index]);
 		spin_lock_init(&entry->lock);
-		spin_lock_bh(&entry->lock);
 		ref = tipc_ref_table.start_mask + index;
-		entry->ref = ref;
-		entry->object = object;
-		*lock = &entry->lock;
 	}
 	else {
 		ref = 0;
 	}
 	write_unlock_bh(&ref_table_lock);
 
+	/*
+	 * Grab the lock so no one else can modify this entry
+	 * While we assign its ref value & object pointer
+	 */
+	if (entry) {
+		spin_lock_bh(&entry->lock);
+		entry->ref = ref;
+		entry->object = object;
+		*lock = &entry->lock;
+		/*
+		 * keep it locked, the caller is responsible
+		 * for unlocking this when they're done with it
+		 */
+	}
+
 	return ref;
 }
 
-- 
cgit v1.2.2


From 21edbb223ed2af88b090e7945af7d91d672e3aa6 Mon Sep 17 00:00:00 2001
From: Jiri Slaby <jslaby@suse.cz>
Date: Tue, 16 Mar 2010 05:29:54 +0000
Subject: NET: netpoll, fix potential NULL ptr dereference

Stanse found that one error path in netpoll_setup dereferences npinfo
even though it is NULL. Avoid that by adding new label and go to that
instead.

Signed-off-by: Jiri Slaby <jslaby@suse.cz>
Cc: Daniel Borkmann <danborkmann@googlemail.com>
Cc: David S. Miller <davem@davemloft.net>
Acked-by: chavey@google.com
Acked-by: Matt Mackall <mpm@selenic.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/netpoll.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/core/netpoll.c b/net/core/netpoll.c
index 7aa697253765..d4ec38fa64e6 100644
--- a/net/core/netpoll.c
+++ b/net/core/netpoll.c
@@ -735,7 +735,7 @@ int netpoll_setup(struct netpoll *np)
 		npinfo = kmalloc(sizeof(*npinfo), GFP_KERNEL);
 		if (!npinfo) {
 			err = -ENOMEM;
-			goto release;
+			goto put;
 		}
 
 		npinfo->rx_flags = 0;
@@ -845,7 +845,7 @@ int netpoll_setup(struct netpoll *np)
 
 		kfree(npinfo);
 	}
-
+put:
 	dev_put(ndev);
 	return err;
 }
-- 
cgit v1.2.2


From 0ba8c9ec25465cd0680b80c0f5836f558e3b972d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?YOSHIFUJI=20Hideaki=20/=20=E5=90=89=E8=97=A4=E8=8B=B1?=
 =?UTF-8?q?=E6=98=8E?= <yoshfuji@linux-ipv6.org>
Date: Mon, 15 Mar 2010 19:27:00 +0000
Subject: bridge br_multicast: Fix handling of Max Response Code in IGMPv3
 message.

Signed-off-by: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/bridge/br_multicast.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c
index 398221e81be5..19618f25b4c6 100644
--- a/net/bridge/br_multicast.c
+++ b/net/bridge/br_multicast.c
@@ -852,8 +852,8 @@ static int br_multicast_query(struct net_bridge *br,
 		if (ih3->nsrcs)
 			goto out;
 
-		max_delay = ih3->code ? 1 :
-			    IGMPV3_MRC(ih3->code) * (HZ / IGMP_TIMER_SCALE);
+		max_delay = ih3->code ?
+			    IGMPV3_MRC(ih3->code) * (HZ / IGMP_TIMER_SCALE) : 1;
 	}
 
 	if (!group)
-- 
cgit v1.2.2


From 8440853bb70010f2f8edc26cb0f7af60da8b2c5a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?YOSHIFUJI=20Hideaki=20/=20=E5=90=89=E8=97=A4=E8=8B=B1?=
 =?UTF-8?q?=E6=98=8E?= <yoshfuji@linux-ipv6.org>
Date: Mon, 15 Mar 2010 19:26:56 +0000
Subject: bridge br_multicast: Fix skb leakage in error path.

Signed-off-by: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/bridge/br_multicast.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c
index 19618f25b4c6..6980625537ca 100644
--- a/net/bridge/br_multicast.c
+++ b/net/bridge/br_multicast.c
@@ -991,7 +991,7 @@ static int br_multicast_ipv4_rcv(struct net_bridge *br,
 
 		err = pskb_trim_rcsum(skb2, len);
 		if (err)
-			return err;
+			goto err_out;
 	}
 
 	len -= ip_hdrlen(skb2);
@@ -1013,7 +1013,7 @@ static int br_multicast_ipv4_rcv(struct net_bridge *br,
 	case CHECKSUM_NONE:
 		skb2->csum = 0;
 		if (skb_checksum_complete(skb2))
-			return -EINVAL;
+			goto out;
 	}
 
 	err = 0;
@@ -1040,6 +1040,7 @@ static int br_multicast_ipv4_rcv(struct net_bridge *br,
 
 out:
 	__skb_push(skb2, offset);
+err_out:
 	if (skb2 != skb)
 		kfree_skb(skb2);
 	return err;
-- 
cgit v1.2.2


From 858a18a6a2f74e8f0e5b2e9671d4b74694aba708 Mon Sep 17 00:00:00 2001
From: Vitaliy Gusev <vgusev@openvz.org>
Date: Tue, 16 Mar 2010 01:07:51 +0000
Subject: route: Fix caught BUG_ON during rt_secret_rebuild_oneshot()

route: Fix caught BUG_ON during rt_secret_rebuild_oneshot()

Call rt_secret_rebuild can cause BUG_ON(timer_pending(&net->ipv4.rt_secret_timer)) in
add_timer as there is not any synchronization for call rt_secret_rebuild_oneshot()
for the same net namespace.

Also this issue affects to rt_secret_reschedule().

Thus use mod_timer enstead.

Signed-off-by: Vitaliy Gusev <vgusev@openvz.org>
Acked-by: Neil Horman <nhorman@tuxdriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/route.c | 16 ++++++----------
 1 file changed, 6 insertions(+), 10 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index d9b40248b97f..a770df2493d2 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -932,10 +932,8 @@ static void rt_secret_rebuild_oneshot(struct net *net)
 {
 	del_timer_sync(&net->ipv4.rt_secret_timer);
 	rt_cache_invalidate(net);
-	if (ip_rt_secret_interval) {
-		net->ipv4.rt_secret_timer.expires += ip_rt_secret_interval;
-		add_timer(&net->ipv4.rt_secret_timer);
-	}
+	if (ip_rt_secret_interval)
+		mod_timer(&net->ipv4.rt_secret_timer, jiffies + ip_rt_secret_interval);
 }
 
 static void rt_emergency_hash_rebuild(struct net *net)
@@ -3103,22 +3101,20 @@ static void rt_secret_reschedule(int old)
 	rtnl_lock();
 	for_each_net(net) {
 		int deleted = del_timer_sync(&net->ipv4.rt_secret_timer);
+		long time;
 
 		if (!new)
 			continue;
 
 		if (deleted) {
-			long time = net->ipv4.rt_secret_timer.expires - jiffies;
+			time = net->ipv4.rt_secret_timer.expires - jiffies;
 
 			if (time <= 0 || (time += diff) <= 0)
 				time = 0;
-
-			net->ipv4.rt_secret_timer.expires = time;
 		} else
-			net->ipv4.rt_secret_timer.expires = new;
+			time = new;
 
-		net->ipv4.rt_secret_timer.expires += jiffies;
-		add_timer(&net->ipv4.rt_secret_timer);
+		mod_timer(&net->ipv4.rt_secret_timer, jiffies + time);
 	}
 	rtnl_unlock();
 }
-- 
cgit v1.2.2


From 32dec5dd0233ebffa9cae25ce7ba6daeb7df4467 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?YOSHIFUJI=20Hideaki=20/=20=E5=90=89=E8=97=A4=E8=8B=B1?=
 =?UTF-8?q?=E6=98=8E?= <yoshfuji@linux-ipv6.org>
Date: Mon, 15 Mar 2010 21:51:18 +0000
Subject: bridge br_multicast: Don't refer to
 BR_INPUT_SKB_CB(skb)->mrouters_only without IGMP snooping.

Without CONFIG_BRIDGE_IGMP_SNOOPING,
BR_INPUT_SKB_CB(skb)->mrouters_only is not appropriately
initialized, so we can see garbage.

A clear option to fix this is to set it even without that
config, but we cannot optimize out the branch.

Let's introduce a macro that returns value of mrouters_only
and let it return 0 without CONFIG_BRIDGE_IGMP_SNOOPING.

Signed-off-by: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/bridge/br_device.c  | 2 +-
 net/bridge/br_input.c   | 2 +-
 net/bridge/br_private.h | 8 ++++++++
 3 files changed, 10 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c
index eb7062d2e9e5..90a9024e5c1e 100644
--- a/net/bridge/br_device.c
+++ b/net/bridge/br_device.c
@@ -40,7 +40,7 @@ netdev_tx_t br_dev_xmit(struct sk_buff *skb, struct net_device *dev)
 			goto out;
 
 		mdst = br_mdb_get(br, skb);
-		if (mdst || BR_INPUT_SKB_CB(skb)->mrouters_only)
+		if (mdst || BR_INPUT_SKB_CB_MROUTERS_ONLY(skb))
 			br_multicast_deliver(mdst, skb);
 		else
 			br_flood_deliver(br, skb);
diff --git a/net/bridge/br_input.c b/net/bridge/br_input.c
index 08a72e63fb8e..d74d570fc848 100644
--- a/net/bridge/br_input.c
+++ b/net/bridge/br_input.c
@@ -70,7 +70,7 @@ int br_handle_frame_finish(struct sk_buff *skb)
 
 	if (is_multicast_ether_addr(dest)) {
 		mdst = br_mdb_get(br, skb);
-		if (mdst || BR_INPUT_SKB_CB(skb)->mrouters_only) {
+		if (mdst || BR_INPUT_SKB_CB_MROUTERS_ONLY(skb)) {
 			if ((mdst && !hlist_unhashed(&mdst->mglist)) ||
 			    br_multicast_is_router(br))
 				skb2 = skb;
diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h
index bfb8feb310a6..846d7d1e2075 100644
--- a/net/bridge/br_private.h
+++ b/net/bridge/br_private.h
@@ -206,12 +206,20 @@ struct net_bridge
 
 struct br_input_skb_cb {
 	struct net_device *brdev;
+#ifdef CONFIG_BRIDGE_IGMP_SNOOPING
 	int igmp;
 	int mrouters_only;
+#endif
 };
 
 #define BR_INPUT_SKB_CB(__skb)	((struct br_input_skb_cb *)(__skb)->cb)
 
+#ifdef CONFIG_BRIDGE_IGMP_SNOOPING
+# define BR_INPUT_SKB_CB_MROUTERS_ONLY(__skb)	(BR_INPUT_SKB_CB(__skb)->mrouters_only)
+#else
+# define BR_INPUT_SKB_CB_MROUTERS_ONLY(__skb)	(0)
+#endif
+
 extern struct notifier_block br_device_notifier;
 extern const u8 br_group_address[ETH_ALEN];
 
-- 
cgit v1.2.2


From 87faf3ccf1c939938600ab57c6c9ed5bd2e5f4cc Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Tue, 16 Mar 2010 14:37:47 -0700
Subject: bridge: Make first arg to deliver_clone const.

Otherwise we get a warning from the call in br_forward().

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/bridge/br_forward.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/bridge/br_forward.c b/net/bridge/br_forward.c
index 8347916efe88..8dbec83e50ca 100644
--- a/net/bridge/br_forward.c
+++ b/net/bridge/br_forward.c
@@ -19,7 +19,8 @@
 #include <linux/netfilter_bridge.h>
 #include "br_private.h"
 
-static int deliver_clone(struct net_bridge_port *prev, struct sk_buff *skb,
+static int deliver_clone(const struct net_bridge_port *prev,
+			 struct sk_buff *skb,
 			 void (*__packet_hook)(const struct net_bridge_port *p,
 					       struct sk_buff *skb));
 
@@ -112,7 +113,8 @@ void br_forward(const struct net_bridge_port *to, struct sk_buff *skb, struct sk
 		kfree_skb(skb);
 }
 
-static int deliver_clone(struct net_bridge_port *prev, struct sk_buff *skb,
+static int deliver_clone(const struct net_bridge_port *prev,
+			 struct sk_buff *skb,
 			 void (*__packet_hook)(const struct net_bridge_port *p,
 					       struct sk_buff *skb))
 {
-- 
cgit v1.2.2


From 735f61e62611161588123930823af6e6a9fd5c2c Mon Sep 17 00:00:00 2001
From: Andy Grover <andy.grover@oracle.com>
Date: Thu, 11 Mar 2010 13:49:55 +0000
Subject: RDS: Do not BUG() on error returned from ib_post_send

BUGging on a runtime error code should be avoided. This
patch also eliminates all other BUG()s that have no real
reason to exist.

Signed-off-by: Andy Grover <andy.grover@oracle.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/rds/ib_recv.c        | 4 ++--
 net/rds/ib_send.c        | 7 +++----
 net/rds/iw_recv.c        | 4 ++--
 net/rds/iw_send.c        | 3 +--
 net/rds/rdma_transport.c | 3 +--
 5 files changed, 9 insertions(+), 12 deletions(-)

(limited to 'net')

diff --git a/net/rds/ib_recv.c b/net/rds/ib_recv.c
index 04dc0d3f3c95..c338881eca71 100644
--- a/net/rds/ib_recv.c
+++ b/net/rds/ib_recv.c
@@ -468,8 +468,8 @@ static void rds_ib_send_ack(struct rds_ib_connection *ic, unsigned int adv_credi
 		set_bit(IB_ACK_REQUESTED, &ic->i_ack_flags);
 
 		rds_ib_stats_inc(s_ib_ack_send_failure);
-		/* Need to finesse this later. */
-		BUG();
+
+		rds_ib_conn_error(ic->conn, "sending ack failed\n");
 	} else
 		rds_ib_stats_inc(s_ib_ack_sent);
 }
diff --git a/net/rds/ib_send.c b/net/rds/ib_send.c
index a10fab6886d1..f380c3f01256 100644
--- a/net/rds/ib_send.c
+++ b/net/rds/ib_send.c
@@ -574,8 +574,7 @@ int rds_ib_xmit(struct rds_connection *conn, struct rds_message *rm,
 		rds_ib_send_grab_credits(ic, 0, &posted, 1, RDS_MAX_ADV_CREDIT - adv_credits);
 		adv_credits += posted;
 		BUG_ON(adv_credits > 255);
-	} else if (ic->i_rm != rm)
-		BUG();
+	}
 
 	send = &ic->i_sends[pos];
 	first = send;
@@ -714,8 +713,8 @@ add_header:
 			ic->i_rm = prev->s_rm;
 			prev->s_rm = NULL;
 		}
-		/* Finesse this later */
-		BUG();
+
+		rds_ib_conn_error(ic->conn, "ib_post_send failed\n");
 		goto out;
 	}
 
diff --git a/net/rds/iw_recv.c b/net/rds/iw_recv.c
index 54af7d6b92da..337e4e5025e2 100644
--- a/net/rds/iw_recv.c
+++ b/net/rds/iw_recv.c
@@ -468,8 +468,8 @@ static void rds_iw_send_ack(struct rds_iw_connection *ic, unsigned int adv_credi
 		set_bit(IB_ACK_REQUESTED, &ic->i_ack_flags);
 
 		rds_iw_stats_inc(s_iw_ack_send_failure);
-		/* Need to finesse this later. */
-		BUG();
+
+		rds_iw_conn_error(ic->conn, "sending ack failed\n");
 	} else
 		rds_iw_stats_inc(s_iw_ack_sent);
 }
diff --git a/net/rds/iw_send.c b/net/rds/iw_send.c
index 1379e9d66a78..52182ff7519e 100644
--- a/net/rds/iw_send.c
+++ b/net/rds/iw_send.c
@@ -616,8 +616,7 @@ int rds_iw_xmit(struct rds_connection *conn, struct rds_message *rm,
 		rds_iw_send_grab_credits(ic, 0, &posted, 1, RDS_MAX_ADV_CREDIT - adv_credits);
 		adv_credits += posted;
 		BUG_ON(adv_credits > 255);
-	} else if (ic->i_rm != rm)
-		BUG();
+	}
 
 	send = &ic->i_sends[pos];
 	first = send;
diff --git a/net/rds/rdma_transport.c b/net/rds/rdma_transport.c
index 9ece910ea394..31f9c72b8d76 100644
--- a/net/rds/rdma_transport.c
+++ b/net/rds/rdma_transport.c
@@ -109,8 +109,7 @@ int rds_rdma_cm_event_handler(struct rdma_cm_id *cm_id,
 
 	default:
 		/* things like device disconnect? */
-		printk(KERN_ERR "unknown event %u\n", event->event);
-		BUG();
+		printk(KERN_ERR "RDS: unknown event %u!\n", event->event);
 		break;
 	}
 
-- 
cgit v1.2.2


From 1123fd734df6ad82373a5a27f0f2ed3115555b9d Mon Sep 17 00:00:00 2001
From: Andy Grover <andy.grover@oracle.com>
Date: Thu, 11 Mar 2010 13:49:56 +0000
Subject: RDS: sendmsg() should check sndtimeo, not rcvtimeo

Most likely cut n paste error - sendmsg() was checking sock_rcvtimeo.

Signed-off-by: Andy Grover <andy.grover@oracle.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/rds/send.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/rds/send.c b/net/rds/send.c
index b2fccfc20769..ad2e46947c8a 100644
--- a/net/rds/send.c
+++ b/net/rds/send.c
@@ -815,7 +815,7 @@ int rds_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg,
 	int ret = 0;
 	int queued = 0, allocated_mr = 0;
 	int nonblock = msg->msg_flags & MSG_DONTWAIT;
-	long timeo = sock_rcvtimeo(sk, nonblock);
+	long timeo = sock_sndtimeo(sk, nonblock);
 
 	/* Mirror Linux UDP mirror of BSD error message compatibility */
 	/* XXX: Perhaps MSG_MORE someday */
-- 
cgit v1.2.2


From b075cfdb666d6fa90c55c8619186398a3c4fd865 Mon Sep 17 00:00:00 2001
From: Andy Grover <andy.grover@oracle.com>
Date: Thu, 11 Mar 2010 13:49:57 +0000
Subject: RDS: update copy_to_user state in tcp transport

Other transports use rds_page_copy_user, which updates our
s_copy_to_user counter. TCP doesn't, so it needs to explicity
call rds_stats_add().

Reported-by: Richard Frank <richard.frank@oracle.com>
Signed-off-by: Andy Grover <andy.grover@oracle.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/rds/tcp_recv.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'net')

diff --git a/net/rds/tcp_recv.c b/net/rds/tcp_recv.c
index c00dafffbb5a..40bfcf887465 100644
--- a/net/rds/tcp_recv.c
+++ b/net/rds/tcp_recv.c
@@ -97,6 +97,7 @@ int rds_tcp_inc_copy_to_user(struct rds_incoming *inc, struct iovec *first_iov,
 				goto out;
 			}
 
+			rds_stats_add(s_copy_to_user, to_copy);
 			size -= to_copy;
 			ret += to_copy;
 			skb_off += to_copy;
-- 
cgit v1.2.2


From 8e82376e5f72bb576504c8c6117685e56c1b97db Mon Sep 17 00:00:00 2001
From: Andy Grover <andy.grover@oracle.com>
Date: Thu, 11 Mar 2010 13:49:58 +0000
Subject: RDS/TCP: Wait to wake thread when write space available

Instead of waking the send thread whenever any send space is available,
wait until it is at least half empty. This is modeled on how
sock_def_write_space() does it, and may help to minimize context
switches.

Signed-off-by: Andy Grover <andy.grover@oracle.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/rds/tcp_send.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/rds/tcp_send.c b/net/rds/tcp_send.c
index 34fdcc059e54..a28b895ff0d1 100644
--- a/net/rds/tcp_send.c
+++ b/net/rds/tcp_send.c
@@ -240,7 +240,9 @@ void rds_tcp_write_space(struct sock *sk)
 	tc->t_last_seen_una = rds_tcp_snd_una(tc);
 	rds_send_drop_acked(conn, rds_tcp_snd_una(tc), rds_tcp_is_acked);
 
-	queue_delayed_work(rds_wq, &conn->c_send_w, 0);
+        if ((atomic_read(&sk->sk_wmem_alloc) << 1) <= sk->sk_sndbuf)
+		queue_delayed_work(rds_wq, &conn->c_send_w, 0);
+
 out:
 	read_unlock(&sk->sk_callback_lock);
 
-- 
cgit v1.2.2


From 2e7b3b994529d4760231a45a6b88950187bda877 Mon Sep 17 00:00:00 2001
From: Andy Grover <andy.grover@oracle.com>
Date: Thu, 11 Mar 2010 13:49:59 +0000
Subject: RDS: Fix congestion issues for loopback

We have two kinds of loopback: software (via loop transport)
and hardware (via IB). sw is used for 127.0.0.1, and doesn't
support rdma ops. hw is used for sends to local device IPs,
and supports rdma. Both are used in different cases.

For both of these, when there is a congestion map update, we
want to call rds_cong_map_updated() but not actually send
anything -- since loopback local and foreign congestion maps
point to the same spot, they're already in sync.

The old code never called sw loop's xmit_cong_map(),so
rds_cong_map_updated() wasn't being called for it. sw loop
ports would not work right with the congestion monitor.

Fixing that meant that hw loopback now would send congestion maps
to itself. This is also undesirable (racy), so we check for this
case in the ib-specific xmit code.

Signed-off-by: Andy Grover <andy.grover@oracle.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/rds/cong.c    | 2 --
 net/rds/ib_send.c | 7 +++++++
 net/rds/loop.c    | 7 -------
 3 files changed, 7 insertions(+), 9 deletions(-)

(limited to 'net')

diff --git a/net/rds/cong.c b/net/rds/cong.c
index 6d06cac2649c..dd2711df640b 100644
--- a/net/rds/cong.c
+++ b/net/rds/cong.c
@@ -218,8 +218,6 @@ void rds_cong_queue_updates(struct rds_cong_map *map)
 	spin_lock_irqsave(&rds_cong_lock, flags);
 
 	list_for_each_entry(conn, &map->m_conn_list, c_map_item) {
-		if (conn->c_loopback)
-			continue;
 		if (!test_and_set_bit(0, &conn->c_map_queued)) {
 			rds_stats_inc(s_cong_update_queued);
 			queue_delayed_work(rds_wq, &conn->c_send_w, 0);
diff --git a/net/rds/ib_send.c b/net/rds/ib_send.c
index f380c3f01256..c18228aec779 100644
--- a/net/rds/ib_send.c
+++ b/net/rds/ib_send.c
@@ -482,6 +482,13 @@ int rds_ib_xmit(struct rds_connection *conn, struct rds_message *rm,
 	BUG_ON(off % RDS_FRAG_SIZE);
 	BUG_ON(hdr_off != 0 && hdr_off != sizeof(struct rds_header));
 
+	/* Do not send cong updates to IB loopback */
+	if (conn->c_loopback
+	    && rm->m_inc.i_hdr.h_flags & RDS_FLAG_CONG_BITMAP) {
+		rds_cong_map_updated(conn->c_fcong, ~(u64) 0);
+		return sizeof(struct rds_header) + RDS_CONG_MAP_BYTES;
+	}
+
 	/* FIXME we may overallocate here */
 	if (be32_to_cpu(rm->m_inc.i_hdr.h_len) == 0)
 		i = 1;
diff --git a/net/rds/loop.c b/net/rds/loop.c
index 4a61997f554d..93a45f1ce61f 100644
--- a/net/rds/loop.c
+++ b/net/rds/loop.c
@@ -80,16 +80,9 @@ static int rds_loop_xmit_cong_map(struct rds_connection *conn,
 				  struct rds_cong_map *map,
 				  unsigned long offset)
 {
-	unsigned long i;
-
 	BUG_ON(offset);
 	BUG_ON(map != conn->c_lcong);
 
-	for (i = 0; i < RDS_CONG_MAP_PAGES; i++) {
-		memcpy((void *)conn->c_fcong->m_page_addrs[i],
-		       (void *)map->m_page_addrs[i], PAGE_SIZE);
-	}
-
 	rds_cong_map_updated(conn->c_fcong, ~(u64) 0);
 
 	return sizeof(struct rds_header) + RDS_CONG_MAP_BYTES;
-- 
cgit v1.2.2


From 048c15e641289d902f7ef9f1241068d8045e210c Mon Sep 17 00:00:00 2001
From: Tina Yang <Tina.Yang@oracle.com>
Date: Thu, 11 Mar 2010 13:50:00 +0000
Subject: RDS: Fix send locking issue

Fix a deadlock between rds_rdma_send_complete() and
rds_send_remove_from_sock() when rds socket lock and
rds message lock are acquired out-of-order.

Signed-off-by: Tina Yang <Tina.Yang@oracle.com>
Signed-off-by: Andy Grover <andy.grover@oracle.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/rds/send.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

(limited to 'net')

diff --git a/net/rds/send.c b/net/rds/send.c
index ad2e46947c8a..1a2ef24fbc94 100644
--- a/net/rds/send.c
+++ b/net/rds/send.c
@@ -533,14 +533,13 @@ void rds_send_remove_from_sock(struct list_head *messages, int status)
 
 		if (rs != rm->m_rs) {
 			if (rs) {
-				spin_unlock(&rs->rs_lock);
 				rds_wake_sk_sleep(rs);
 				sock_put(rds_rs_to_sk(rs));
 			}
 			rs = rm->m_rs;
-			spin_lock(&rs->rs_lock);
 			sock_hold(rds_rs_to_sk(rs));
 		}
+		spin_lock(&rs->rs_lock);
 
 		if (test_and_clear_bit(RDS_MSG_ON_SOCK, &rm->m_flags)) {
 			struct rds_rdma_op *ro = rm->m_rdma_op;
@@ -560,6 +559,7 @@ void rds_send_remove_from_sock(struct list_head *messages, int status)
 			rds_message_put(rm);
 			rm->m_rs = NULL;
 		}
+		spin_unlock(&rs->rs_lock);
 
 unlock_and_drop:
 		spin_unlock(&rm->m_rs_lock);
@@ -567,7 +567,6 @@ unlock_and_drop:
 	}
 
 	if (rs) {
-		spin_unlock(&rs->rs_lock);
 		rds_wake_sk_sleep(rs);
 		sock_put(rds_rs_to_sk(rs));
 	}
-- 
cgit v1.2.2


From 571c02fa81e43ebb4b793f626d6c7bf0fa18902b Mon Sep 17 00:00:00 2001
From: Andy Grover <andy.grover@oracle.com>
Date: Thu, 11 Mar 2010 13:50:01 +0000
Subject: RDS: Workaround for in-use MRs on close causing crash

if a machine is shut down without closing sockets properly, and
freeing all MRs, then a BUG_ON will bring it down. This patch
changes these to WARN_ONs -- leaking MRs is not fatal (although
not ideal, and there is more work to do here for a proper fix.)

Signed-off-by: Andy Grover <andy.grover@oracle.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/rds/ib_rdma.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/rds/ib_rdma.c b/net/rds/ib_rdma.c
index 4b0da865a72c..65e668defe42 100644
--- a/net/rds/ib_rdma.c
+++ b/net/rds/ib_rdma.c
@@ -234,8 +234,8 @@ void rds_ib_destroy_mr_pool(struct rds_ib_mr_pool *pool)
 {
 	flush_workqueue(rds_wq);
 	rds_ib_flush_mr_pool(pool, 1);
-	BUG_ON(atomic_read(&pool->item_count));
-	BUG_ON(atomic_read(&pool->free_pinned));
+	WARN_ON(atomic_read(&pool->item_count));
+	WARN_ON(atomic_read(&pool->free_pinned));
 	kfree(pool);
 }
 
-- 
cgit v1.2.2


From 97069788d6784ac92d050557a02f6e7bf4d1f53d Mon Sep 17 00:00:00 2001
From: Andy Grover <andy.grover@oracle.com>
Date: Thu, 11 Mar 2010 13:50:02 +0000
Subject: RDS: Turn down alarming reconnect messages

RDS's error messages when a connection goes down are a little
extreme. A connection may go down, and it will be re-established,
and everything is fine. This patch links these messages through
rdsdebug(), instead of to printk directly.

Signed-off-by: Andy Grover <andy.grover@oracle.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/rds/ib_cm.c          | 3 ++-
 net/rds/iw_cm.c          | 4 +++-
 net/rds/rdma_transport.c | 2 +-
 3 files changed, 6 insertions(+), 3 deletions(-)

(limited to 'net')

diff --git a/net/rds/ib_cm.c b/net/rds/ib_cm.c
index 647cb8ffc39b..e1f124bf03bb 100644
--- a/net/rds/ib_cm.c
+++ b/net/rds/ib_cm.c
@@ -203,9 +203,10 @@ static void rds_ib_qp_event_handler(struct ib_event *event, void *data)
 		rdma_notify(ic->i_cm_id, IB_EVENT_COMM_EST);
 		break;
 	default:
-		rds_ib_conn_error(conn, "RDS/IB: Fatal QP Event %u "
+		rdsdebug("Fatal QP Event %u "
 			"- connection %pI4->%pI4, reconnecting\n",
 			event->event, &conn->c_laddr, &conn->c_faddr);
+		rds_conn_drop(conn);
 		break;
 	}
 }
diff --git a/net/rds/iw_cm.c b/net/rds/iw_cm.c
index 394cf6b4d0aa..6bc638fd252c 100644
--- a/net/rds/iw_cm.c
+++ b/net/rds/iw_cm.c
@@ -156,9 +156,11 @@ static void rds_iw_qp_event_handler(struct ib_event *event, void *data)
 	case IB_EVENT_QP_REQ_ERR:
 	case IB_EVENT_QP_FATAL:
 	default:
-		rds_iw_conn_error(conn, "RDS/IW: Fatal QP Event %u - connection %pI4->%pI4...reconnecting\n",
+		rdsdebug("Fatal QP Event %u "
+			"- connection %pI4->%pI4, reconnecting\n",
 			event->event, &conn->c_laddr,
 			&conn->c_faddr);
+		rds_conn_drop(conn);
 		break;
 	}
 }
diff --git a/net/rds/rdma_transport.c b/net/rds/rdma_transport.c
index 31f9c72b8d76..5ea82fc47c3e 100644
--- a/net/rds/rdma_transport.c
+++ b/net/rds/rdma_transport.c
@@ -101,7 +101,7 @@ int rds_rdma_cm_event_handler(struct rdma_cm_id *cm_id,
 		break;
 
 	case RDMA_CM_EVENT_DISCONNECTED:
-		printk(KERN_WARNING "RDS/RDMA: DISCONNECT event - dropping connection "
+		rdsdebug("DISCONNECT event - dropping connection "
 			"%pI4->%pI4\n", &conn->c_laddr,
 			 &conn->c_faddr);
 		rds_conn_drop(conn);
-- 
cgit v1.2.2


From 550a8002e4340eaf3bc333e33b59427e9c20272d Mon Sep 17 00:00:00 2001
From: Tina Yang <tina.yang@oracle.com>
Date: Thu, 11 Mar 2010 13:50:03 +0000
Subject: RDS: Fix locking in rds_send_drop_to()

It seems rds_send_drop_to() called
__rds_rdma_send_complete(rs, rm, RDS_RDMA_CANCELED)
with only rds_sock lock, but not rds_message lock. It raced with
other threads that is attempting to modify the rds_message as well,
such as from within rds_rdma_send_complete().

Signed-off-by: Tina Yang <tina.yang@oracle.com>
Signed-off-by: Andy Grover <andy.grover@oracle.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/rds/send.c | 13 +++++++------
 1 file changed, 7 insertions(+), 6 deletions(-)

(limited to 'net')

diff --git a/net/rds/send.c b/net/rds/send.c
index 1a2ef24fbc94..192a480076d0 100644
--- a/net/rds/send.c
+++ b/net/rds/send.c
@@ -632,9 +632,6 @@ void rds_send_drop_to(struct rds_sock *rs, struct sockaddr_in *dest)
 		list_move(&rm->m_sock_item, &list);
 		rds_send_sndbuf_remove(rs, rm);
 		clear_bit(RDS_MSG_ON_SOCK, &rm->m_flags);
-
-		/* If this is a RDMA operation, notify the app. */
-		__rds_rdma_send_complete(rs, rm, RDS_RDMA_CANCELED);
 	}
 
 	/* order flag updates with the rs lock */
@@ -643,9 +640,6 @@ void rds_send_drop_to(struct rds_sock *rs, struct sockaddr_in *dest)
 
 	spin_unlock_irqrestore(&rs->rs_lock, flags);
 
-	if (wake)
-		rds_wake_sk_sleep(rs);
-
 	conn = NULL;
 
 	/* now remove the messages from the conn list as needed */
@@ -653,6 +647,10 @@ void rds_send_drop_to(struct rds_sock *rs, struct sockaddr_in *dest)
 		/* We do this here rather than in the loop above, so that
 		 * we don't have to nest m_rs_lock under rs->rs_lock */
 		spin_lock_irqsave(&rm->m_rs_lock, flags2);
+		/* If this is a RDMA operation, notify the app. */
+		spin_lock(&rs->rs_lock);
+		__rds_rdma_send_complete(rs, rm, RDS_RDMA_CANCELED);
+		spin_unlock(&rs->rs_lock);
 		rm->m_rs = NULL;
 		spin_unlock_irqrestore(&rm->m_rs_lock, flags2);
 
@@ -681,6 +679,9 @@ void rds_send_drop_to(struct rds_sock *rs, struct sockaddr_in *dest)
 	if (conn)
 		spin_unlock_irqrestore(&conn->c_lock, flags);
 
+	if (wake)
+		rds_wake_sk_sleep(rs);
+
 	while (!list_empty(&list)) {
 		rm = list_entry(list.next, struct rds_message, m_sock_item);
 		list_del_init(&rm->m_sock_item);
-- 
cgit v1.2.2


From b98ba52f96e7cdb4dbe2b06bced83d95d94c9d02 Mon Sep 17 00:00:00 2001
From: Andy Grover <andy.grover@oracle.com>
Date: Thu, 11 Mar 2010 13:50:04 +0000
Subject: RDS: only put sockets that have seen congestion on the poll_waitq

rds_poll_waitq's listeners will be awoken if we receive a congestion
notification. Bad performance may result because *all* polled sockets
contend for this single lock. However, it should not be necessary to
wake pollers when a congestion update arrives if they have never
experienced congestion, and not putting these on the waitq will
hopefully greatly reduce contention.

Signed-off-by: Andy Grover <andy.grover@oracle.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/rds/af_rds.c | 7 ++++++-
 net/rds/rds.h    | 2 ++
 net/rds/send.c   | 4 +++-
 3 files changed, 11 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/rds/af_rds.c b/net/rds/af_rds.c
index 853c52be781f..937ecda4abe7 100644
--- a/net/rds/af_rds.c
+++ b/net/rds/af_rds.c
@@ -159,7 +159,8 @@ static unsigned int rds_poll(struct file *file, struct socket *sock,
 
 	poll_wait(file, sk->sk_sleep, wait);
 
-	poll_wait(file, &rds_poll_waitq, wait);
+	if (rs->rs_seen_congestion)
+		poll_wait(file, &rds_poll_waitq, wait);
 
 	read_lock_irqsave(&rs->rs_recv_lock, flags);
 	if (!rs->rs_cong_monitor) {
@@ -181,6 +182,10 @@ static unsigned int rds_poll(struct file *file, struct socket *sock,
 		mask |= (POLLOUT | POLLWRNORM);
 	read_unlock_irqrestore(&rs->rs_recv_lock, flags);
 
+	/* clear state any time we wake a seen-congested socket */
+	if (mask)
+		rs->rs_seen_congestion = 0;
+
 	return mask;
 }
 
diff --git a/net/rds/rds.h b/net/rds/rds.h
index 85d6f897ecc7..4bec6e2ed495 100644
--- a/net/rds/rds.h
+++ b/net/rds/rds.h
@@ -388,6 +388,8 @@ struct rds_sock {
 
 	/* flag indicating we were congested or not */
 	int			rs_congested;
+	/* seen congestion (ENOBUFS) when sending? */
+	int			rs_seen_congestion;
 
 	/* rs_lock protects all these adjacent members before the newline */
 	spinlock_t		rs_lock;
diff --git a/net/rds/send.c b/net/rds/send.c
index 192a480076d0..51e2def50b12 100644
--- a/net/rds/send.c
+++ b/net/rds/send.c
@@ -894,8 +894,10 @@ int rds_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg,
 		queue_delayed_work(rds_wq, &conn->c_conn_w, 0);
 
 	ret = rds_cong_wait(conn->c_fcong, dport, nonblock, rs);
-	if (ret)
+	if (ret) {
+		rs->rs_seen_congestion = 1;
 		goto out;
+	}
 
 	while (!rds_send_queue_rm(rs, conn, rm, rs->rs_bound_port,
 				  dport, &queued)) {
-- 
cgit v1.2.2


From 450d06c0208ad195ccd74a7edd11321e316791ad Mon Sep 17 00:00:00 2001
From: Sherman Pun <sherman.pun@sun.com>
Date: Thu, 11 Mar 2010 13:50:05 +0000
Subject: RDS: Properly unmap when getting a remote access error

If the RDMA op has aborted with a remote access error,
in addition to what we already do (tell userspace it has
completed with an error) also unmap it and put() the rm.

Otherwise, hangs may occur on arches that track maps and
will not exit without proper cleanup.

Signed-off-by: Andy Grover <andy.grover@oracle.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/rds/ib_send.c | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/rds/ib_send.c b/net/rds/ib_send.c
index c18228aec779..17fa80803ab0 100644
--- a/net/rds/ib_send.c
+++ b/net/rds/ib_send.c
@@ -243,8 +243,12 @@ void rds_ib_send_cq_comp_handler(struct ib_cq *cq, void *context)
 				struct rds_message *rm;
 
 				rm = rds_send_get_message(conn, send->s_op);
-				if (rm)
+				if (rm) {
+					if (rm->m_rdma_op)
+						rds_ib_send_unmap_rdma(ic, rm->m_rdma_op);
 					rds_ib_send_rdma_complete(rm, wc.status);
+					rds_message_put(rm);
+				}
 			}
 
 			oldest = (oldest + 1) % ic->i_send_ring.w_nr;
-- 
cgit v1.2.2


From 561c7df63e259203515509a7ad075382a42bff0c Mon Sep 17 00:00:00 2001
From: Andy Grover <andy.grover@oracle.com>
Date: Thu, 11 Mar 2010 13:50:06 +0000
Subject: RDS: Do not call set_page_dirty() with irqs off

set_page_dirty() unconditionally re-enables interrupts, so
if we call it with irqs off, they will be on after the call,
and that's bad. This patch moves the call after we've re-enabled
interrupts in send_drop_to(), so it's safe.

Also, add BUG_ONs to let us know if we ever do call set_page_dirty
with interrupts off.

Signed-off-by: Andy Grover <andy.grover@oracle.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/rds/ib_rdma.c |  1 +
 net/rds/rdma.c    |  4 +++-
 net/rds/send.c    | 14 ++++++++------
 3 files changed, 12 insertions(+), 7 deletions(-)

(limited to 'net')

diff --git a/net/rds/ib_rdma.c b/net/rds/ib_rdma.c
index 65e668defe42..cfb1d904ed00 100644
--- a/net/rds/ib_rdma.c
+++ b/net/rds/ib_rdma.c
@@ -440,6 +440,7 @@ static void __rds_ib_teardown_mr(struct rds_ib_mr *ibmr)
 
 			/* FIXME we need a way to tell a r/w MR
 			 * from a r/o MR */
+			BUG_ON(in_interrupt());
 			set_page_dirty(page);
 			put_page(page);
 		}
diff --git a/net/rds/rdma.c b/net/rds/rdma.c
index 4c64daa1f5d5..61b359d9dffd 100644
--- a/net/rds/rdma.c
+++ b/net/rds/rdma.c
@@ -438,8 +438,10 @@ void rds_rdma_free_op(struct rds_rdma_op *ro)
 		/* Mark page dirty if it was possibly modified, which
 		 * is the case for a RDMA_READ which copies from remote
 		 * to local memory */
-		if (!ro->r_write)
+		if (!ro->r_write) {
+			BUG_ON(in_interrupt());
 			set_page_dirty(page);
+		}
 		put_page(page);
 	}
 
diff --git a/net/rds/send.c b/net/rds/send.c
index 51e2def50b12..4629a0b63bbd 100644
--- a/net/rds/send.c
+++ b/net/rds/send.c
@@ -507,12 +507,13 @@ EXPORT_SYMBOL_GPL(rds_send_get_message);
  */
 void rds_send_remove_from_sock(struct list_head *messages, int status)
 {
-	unsigned long flags = 0; /* silence gcc :P */
+	unsigned long flags;
 	struct rds_sock *rs = NULL;
 	struct rds_message *rm;
 
-	local_irq_save(flags);
 	while (!list_empty(messages)) {
+		int was_on_sock = 0;
+
 		rm = list_entry(messages->next, struct rds_message,
 				m_conn_item);
 		list_del_init(&rm->m_conn_item);
@@ -527,7 +528,7 @@ void rds_send_remove_from_sock(struct list_head *messages, int status)
 		 * while we're messing with it. It does not prevent the
 		 * message from being removed from the socket, though.
 		 */
-		spin_lock(&rm->m_rs_lock);
+		spin_lock_irqsave(&rm->m_rs_lock, flags);
 		if (!test_bit(RDS_MSG_ON_SOCK, &rm->m_flags))
 			goto unlock_and_drop;
 
@@ -556,21 +557,22 @@ void rds_send_remove_from_sock(struct list_head *messages, int status)
 					notifier->n_status = status;
 				rm->m_rdma_op->r_notifier = NULL;
 			}
-			rds_message_put(rm);
+			was_on_sock = 1;
 			rm->m_rs = NULL;
 		}
 		spin_unlock(&rs->rs_lock);
 
 unlock_and_drop:
-		spin_unlock(&rm->m_rs_lock);
+		spin_unlock_irqrestore(&rm->m_rs_lock, flags);
 		rds_message_put(rm);
+		if (was_on_sock)
+			rds_message_put(rm);
 	}
 
 	if (rs) {
 		rds_wake_sk_sleep(rs);
 		sock_put(rds_rs_to_sk(rs));
 	}
-	local_irq_restore(flags);
 }
 
 /*
-- 
cgit v1.2.2


From 768bbedf9ca4cc4784eae2003f37abe0818fe0b0 Mon Sep 17 00:00:00 2001
From: Tina Yang <tina.yang@oracle.com>
Date: Thu, 11 Mar 2010 13:50:07 +0000
Subject: RDS: Enable per-cpu workqueue threads

Create per-cpu workqueue threads instead of a single
krdsd thread. This is a step towards better scalability.

Signed-off-by: Andy Grover <andy.grover@oracle.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/rds/threads.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/rds/threads.c b/net/rds/threads.c
index 00fa10e59af8..786c20eaaf5e 100644
--- a/net/rds/threads.c
+++ b/net/rds/threads.c
@@ -259,7 +259,7 @@ void rds_threads_exit(void)
 
 int __init rds_threads_init(void)
 {
-	rds_wq = create_singlethread_workqueue("krdsd");
+	rds_wq = create_workqueue("krdsd");
 	if (rds_wq == NULL)
 		return -ENOMEM;
 
-- 
cgit v1.2.2


From 0a9627f2649a02bea165cfd529d7bcb625c2fcad Mon Sep 17 00:00:00 2001
From: Tom Herbert <therbert@google.com>
Date: Tue, 16 Mar 2010 08:03:29 +0000
Subject: rps: Receive Packet Steering

This patch implements software receive side packet steering (RPS).  RPS
distributes the load of received packet processing across multiple CPUs.

Problem statement: Protocol processing done in the NAPI context for received
packets is serialized per device queue and becomes a bottleneck under high
packet load.  This substantially limits pps that can be achieved on a single
queue NIC and provides no scaling with multiple cores.

This solution queues packets early on in the receive path on the backlog queues
of other CPUs.   This allows protocol processing (e.g. IP and TCP) to be
performed on packets in parallel.   For each device (or each receive queue in
a multi-queue device) a mask of CPUs is set to indicate the CPUs that can
process packets. A CPU is selected on a per packet basis by hashing contents
of the packet header (e.g. the TCP or UDP 4-tuple) and using the result to index
into the CPU mask.  The IPI mechanism is used to raise networking receive
softirqs between CPUs.  This effectively emulates in software what a multi-queue
NIC can provide, but is generic requiring no device support.

Many devices now provide a hash over the 4-tuple on a per packet basis
(e.g. the Toeplitz hash).  This patch allow drivers to set the HW reported hash
in an skb field, and that value in turn is used to index into the RPS maps.
Using the HW generated hash can avoid cache misses on the packet when
steering it to a remote CPU.

The CPU mask is set on a per device and per queue basis in the sysfs variable
/sys/class/net/<device>/queues/rx-<n>/rps_cpus.  This is a set of canonical
bit maps for receive queues in the device (numbered by <n>).  If a device
does not support multi-queue, a single variable is used for the device (rx-0).

Generally, we have found this technique increases pps capabilities of a single
queue device with good CPU utilization.  Optimal settings for the CPU mask
seem to depend on architectures and cache hierarcy.  Below are some results
running 500 instances of netperf TCP_RR test with 1 byte req. and resp.
Results show cumulative transaction rate and system CPU utilization.

e1000e on 8 core Intel
   Without RPS: 108K tps at 33% CPU
   With RPS:    311K tps at 64% CPU

forcedeth on 16 core AMD
   Without RPS: 156K tps at 15% CPU
   With RPS:    404K tps at 49% CPU

bnx2x on 16 core AMD
   Without RPS  567K tps at 61% CPU (4 HW RX queues)
   Without RPS  738K tps at 96% CPU (8 HW RX queues)
   With RPS:    854K tps at 76% CPU (4 HW RX queues)

Caveats:
- The benefits of this patch are dependent on architecture and cache hierarchy.
Tuning the masks to get best performance is probably necessary.
- This patch adds overhead in the path for processing a single packet.  In
a lightly loaded server this overhead may eliminate the advantages of
increased parallelism, and possibly cause some relative performance degradation.
We have found that masks that are cache aware (share same caches with
the interrupting CPU) mitigate much of this.
- The RPS masks can be changed dynamically, however whenever the mask is changed
this introduces the possibility of generating out of order packets.  It's
probably best not change the masks too frequently.

Signed-off-by: Tom Herbert <therbert@google.com>

 include/linux/netdevice.h |   32 ++++-
 include/linux/skbuff.h    |    3 +
 net/core/dev.c            |  335 +++++++++++++++++++++++++++++++++++++--------
 net/core/net-sysfs.c      |  225 ++++++++++++++++++++++++++++++-
 net/core/skbuff.c         |    2 +
 5 files changed, 538 insertions(+), 59 deletions(-)
Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/dev.c       | 335 ++++++++++++++++++++++++++++++++++++++++++---------
 net/core/net-sysfs.c | 225 +++++++++++++++++++++++++++++++++-
 net/core/skbuff.c    |   2 +
 3 files changed, 505 insertions(+), 57 deletions(-)

(limited to 'net')

diff --git a/net/core/dev.c b/net/core/dev.c
index bcc490cc9452..17b168671501 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -1931,7 +1931,7 @@ out_kfree_skb:
 	return rc;
 }
 
-static u32 skb_tx_hashrnd;
+static u32 hashrnd __read_mostly;
 
 u16 skb_tx_hash(const struct net_device *dev, const struct sk_buff *skb)
 {
@@ -1949,7 +1949,7 @@ u16 skb_tx_hash(const struct net_device *dev, const struct sk_buff *skb)
 	else
 		hash = skb->protocol;
 
-	hash = jhash_1word(hash, skb_tx_hashrnd);
+	hash = jhash_1word(hash, hashrnd);
 
 	return (u16) (((u64) hash * dev->real_num_tx_queues) >> 32);
 }
@@ -1959,10 +1959,9 @@ static inline u16 dev_cap_txqueue(struct net_device *dev, u16 queue_index)
 {
 	if (unlikely(queue_index >= dev->real_num_tx_queues)) {
 		if (net_ratelimit()) {
-			WARN(1, "%s selects TX queue %d, but "
+			netdev_warn(dev, "selects TX queue %d, but "
 			     "real number of TX queues is %d\n",
-			     dev->name, queue_index,
-			     dev->real_num_tx_queues);
+			     queue_index, dev->real_num_tx_queues);
 		}
 		return 0;
 	}
@@ -2175,6 +2174,172 @@ int weight_p __read_mostly = 64;            /* old backlog weight */
 
 DEFINE_PER_CPU(struct netif_rx_stats, netdev_rx_stat) = { 0, };
 
+/*
+ * get_rps_cpu is called from netif_receive_skb and returns the target
+ * CPU from the RPS map of the receiving queue for a given skb.
+ */
+static int get_rps_cpu(struct net_device *dev, struct sk_buff *skb)
+{
+	struct ipv6hdr *ip6;
+	struct iphdr *ip;
+	struct netdev_rx_queue *rxqueue;
+	struct rps_map *map;
+	int cpu = -1;
+	u8 ip_proto;
+	u32 addr1, addr2, ports, ihl;
+
+	rcu_read_lock();
+
+	if (skb_rx_queue_recorded(skb)) {
+		u16 index = skb_get_rx_queue(skb);
+		if (unlikely(index >= dev->num_rx_queues)) {
+			if (net_ratelimit()) {
+				netdev_warn(dev, "received packet on queue "
+				    "%u, but number of RX queues is %u\n",
+				     index, dev->num_rx_queues);
+			}
+			goto done;
+		}
+		rxqueue = dev->_rx + index;
+	} else
+		rxqueue = dev->_rx;
+
+	if (!rxqueue->rps_map)
+		goto done;
+
+	if (skb->rxhash)
+		goto got_hash; /* Skip hash computation on packet header */
+
+	switch (skb->protocol) {
+	case __constant_htons(ETH_P_IP):
+		if (!pskb_may_pull(skb, sizeof(*ip)))
+			goto done;
+
+		ip = (struct iphdr *) skb->data;
+		ip_proto = ip->protocol;
+		addr1 = ip->saddr;
+		addr2 = ip->daddr;
+		ihl = ip->ihl;
+		break;
+	case __constant_htons(ETH_P_IPV6):
+		if (!pskb_may_pull(skb, sizeof(*ip6)))
+			goto done;
+
+		ip6 = (struct ipv6hdr *) skb->data;
+		ip_proto = ip6->nexthdr;
+		addr1 = ip6->saddr.s6_addr32[3];
+		addr2 = ip6->daddr.s6_addr32[3];
+		ihl = (40 >> 2);
+		break;
+	default:
+		goto done;
+	}
+	ports = 0;
+	switch (ip_proto) {
+	case IPPROTO_TCP:
+	case IPPROTO_UDP:
+	case IPPROTO_DCCP:
+	case IPPROTO_ESP:
+	case IPPROTO_AH:
+	case IPPROTO_SCTP:
+	case IPPROTO_UDPLITE:
+		if (pskb_may_pull(skb, (ihl * 4) + 4))
+			ports = *((u32 *) (skb->data + (ihl * 4)));
+		break;
+
+	default:
+		break;
+	}
+
+	skb->rxhash = jhash_3words(addr1, addr2, ports, hashrnd);
+	if (!skb->rxhash)
+		skb->rxhash = 1;
+
+got_hash:
+	map = rcu_dereference(rxqueue->rps_map);
+	if (map) {
+		u16 tcpu = map->cpus[((u64) skb->rxhash * map->len) >> 32];
+
+		if (cpu_online(tcpu)) {
+			cpu = tcpu;
+			goto done;
+		}
+	}
+
+done:
+	rcu_read_unlock();
+	return cpu;
+}
+
+/*
+ * This structure holds the per-CPU mask of CPUs for which IPIs are scheduled
+ * to be sent to kick remote softirq processing.  There are two masks since
+ * the sending of IPIs must be done with interrupts enabled.  The select field
+ * indicates the current mask that enqueue_backlog uses to schedule IPIs.
+ * select is flipped before net_rps_action is called while still under lock,
+ * net_rps_action then uses the non-selected mask to send the IPIs and clears
+ * it without conflicting with enqueue_backlog operation.
+ */
+struct rps_remote_softirq_cpus {
+	cpumask_t mask[2];
+	int select;
+};
+static DEFINE_PER_CPU(struct rps_remote_softirq_cpus, rps_remote_softirq_cpus);
+
+/* Called from hardirq (IPI) context */
+static void trigger_softirq(void *data)
+{
+	struct softnet_data *queue = data;
+	__napi_schedule(&queue->backlog);
+	__get_cpu_var(netdev_rx_stat).received_rps++;
+}
+
+/*
+ * enqueue_to_backlog is called to queue an skb to a per CPU backlog
+ * queue (may be a remote CPU queue).
+ */
+static int enqueue_to_backlog(struct sk_buff *skb, int cpu)
+{
+	struct softnet_data *queue;
+	unsigned long flags;
+
+	queue = &per_cpu(softnet_data, cpu);
+
+	local_irq_save(flags);
+	__get_cpu_var(netdev_rx_stat).total++;
+
+	spin_lock(&queue->input_pkt_queue.lock);
+	if (queue->input_pkt_queue.qlen <= netdev_max_backlog) {
+		if (queue->input_pkt_queue.qlen) {
+enqueue:
+			__skb_queue_tail(&queue->input_pkt_queue, skb);
+			spin_unlock_irqrestore(&queue->input_pkt_queue.lock,
+			    flags);
+			return NET_RX_SUCCESS;
+		}
+
+		/* Schedule NAPI for backlog device */
+		if (napi_schedule_prep(&queue->backlog)) {
+			if (cpu != smp_processor_id()) {
+				struct rps_remote_softirq_cpus *rcpus =
+				    &__get_cpu_var(rps_remote_softirq_cpus);
+
+				cpu_set(cpu, rcpus->mask[rcpus->select]);
+				__raise_softirq_irqoff(NET_RX_SOFTIRQ);
+			} else
+				__napi_schedule(&queue->backlog);
+		}
+		goto enqueue;
+	}
+
+	spin_unlock(&queue->input_pkt_queue.lock);
+
+	__get_cpu_var(netdev_rx_stat).dropped++;
+	local_irq_restore(flags);
+
+	kfree_skb(skb);
+	return NET_RX_DROP;
+}
 
 /**
  *	netif_rx	-	post buffer to the network code
@@ -2193,8 +2358,7 @@ DEFINE_PER_CPU(struct netif_rx_stats, netdev_rx_stat) = { 0, };
 
 int netif_rx(struct sk_buff *skb)
 {
-	struct softnet_data *queue;
-	unsigned long flags;
+	int cpu;
 
 	/* if netpoll wants it, pretend we never saw it */
 	if (netpoll_rx(skb))
@@ -2203,31 +2367,11 @@ int netif_rx(struct sk_buff *skb)
 	if (!skb->tstamp.tv64)
 		net_timestamp(skb);
 
-	/*
-	 * The code is rearranged so that the path is the most
-	 * short when CPU is congested, but is still operating.
-	 */
-	local_irq_save(flags);
-	queue = &__get_cpu_var(softnet_data);
-
-	__get_cpu_var(netdev_rx_stat).total++;
-	if (queue->input_pkt_queue.qlen <= netdev_max_backlog) {
-		if (queue->input_pkt_queue.qlen) {
-enqueue:
-			__skb_queue_tail(&queue->input_pkt_queue, skb);
-			local_irq_restore(flags);
-			return NET_RX_SUCCESS;
-		}
-
-		napi_schedule(&queue->backlog);
-		goto enqueue;
-	}
-
-	__get_cpu_var(netdev_rx_stat).dropped++;
-	local_irq_restore(flags);
+	cpu = get_rps_cpu(skb->dev, skb);
+	if (cpu < 0)
+		cpu = smp_processor_id();
 
-	kfree_skb(skb);
-	return NET_RX_DROP;
+	return enqueue_to_backlog(skb, cpu);
 }
 EXPORT_SYMBOL(netif_rx);
 
@@ -2464,22 +2608,7 @@ void netif_nit_deliver(struct sk_buff *skb)
 	rcu_read_unlock();
 }
 
-/**
- *	netif_receive_skb - process receive buffer from network
- *	@skb: buffer to process
- *
- *	netif_receive_skb() is the main receive data processing function.
- *	It always succeeds. The buffer may be dropped during processing
- *	for congestion control or by the protocol layers.
- *
- *	This function may only be called from softirq context and interrupts
- *	should be enabled.
- *
- *	Return values (usually ignored):
- *	NET_RX_SUCCESS: no congestion
- *	NET_RX_DROP: packet was dropped
- */
-int netif_receive_skb(struct sk_buff *skb)
+int __netif_receive_skb(struct sk_buff *skb)
 {
 	struct packet_type *ptype, *pt_prev;
 	struct net_device *orig_dev;
@@ -2588,6 +2717,33 @@ out:
 	rcu_read_unlock();
 	return ret;
 }
+
+/**
+ *	netif_receive_skb - process receive buffer from network
+ *	@skb: buffer to process
+ *
+ *	netif_receive_skb() is the main receive data processing function.
+ *	It always succeeds. The buffer may be dropped during processing
+ *	for congestion control or by the protocol layers.
+ *
+ *	This function may only be called from softirq context and interrupts
+ *	should be enabled.
+ *
+ *	Return values (usually ignored):
+ *	NET_RX_SUCCESS: no congestion
+ *	NET_RX_DROP: packet was dropped
+ */
+int netif_receive_skb(struct sk_buff *skb)
+{
+	int cpu;
+
+	cpu = get_rps_cpu(skb->dev, skb);
+
+	if (cpu < 0)
+		return __netif_receive_skb(skb);
+	else
+		return enqueue_to_backlog(skb, cpu);
+}
 EXPORT_SYMBOL(netif_receive_skb);
 
 /* Network device is going away, flush any packets still pending  */
@@ -2914,16 +3070,16 @@ static int process_backlog(struct napi_struct *napi, int quota)
 	do {
 		struct sk_buff *skb;
 
-		local_irq_disable();
+		spin_lock_irq(&queue->input_pkt_queue.lock);
 		skb = __skb_dequeue(&queue->input_pkt_queue);
 		if (!skb) {
 			__napi_complete(napi);
-			local_irq_enable();
+			spin_unlock_irq(&queue->input_pkt_queue.lock);
 			break;
 		}
-		local_irq_enable();
+		spin_unlock_irq(&queue->input_pkt_queue.lock);
 
-		netif_receive_skb(skb);
+		__netif_receive_skb(skb);
 	} while (++work < quota && jiffies == start_time);
 
 	return work;
@@ -3012,6 +3168,22 @@ void netif_napi_del(struct napi_struct *napi)
 }
 EXPORT_SYMBOL(netif_napi_del);
 
+/*
+ * net_rps_action sends any pending IPI's for rps.  This is only called from
+ * softirq and interrupts must be enabled.
+ */
+static void net_rps_action(cpumask_t *mask)
+{
+	int cpu;
+
+	/* Send pending IPI's to kick RPS processing on remote cpus. */
+	for_each_cpu_mask_nr(cpu, *mask) {
+		struct softnet_data *queue = &per_cpu(softnet_data, cpu);
+		if (cpu_online(cpu))
+			__smp_call_function_single(cpu, &queue->csd, 0);
+	}
+	cpus_clear(*mask);
+}
 
 static void net_rx_action(struct softirq_action *h)
 {
@@ -3019,6 +3191,8 @@ static void net_rx_action(struct softirq_action *h)
 	unsigned long time_limit = jiffies + 2;
 	int budget = netdev_budget;
 	void *have;
+	int select;
+	struct rps_remote_softirq_cpus *rcpus;
 
 	local_irq_disable();
 
@@ -3081,8 +3255,14 @@ static void net_rx_action(struct softirq_action *h)
 		netpoll_poll_unlock(have);
 	}
 out:
+	rcpus = &__get_cpu_var(rps_remote_softirq_cpus);
+	select = rcpus->select;
+	rcpus->select ^= 1;
+
 	local_irq_enable();
 
+	net_rps_action(&rcpus->mask[select]);
+
 #ifdef CONFIG_NET_DMA
 	/*
 	 * There may not be any more sk_buffs coming right now, so push
@@ -3327,10 +3507,10 @@ static int softnet_seq_show(struct seq_file *seq, void *v)
 {
 	struct netif_rx_stats *s = v;
 
-	seq_printf(seq, "%08x %08x %08x %08x %08x %08x %08x %08x %08x\n",
+	seq_printf(seq, "%08x %08x %08x %08x %08x %08x %08x %08x %08x %08x\n",
 		   s->total, s->dropped, s->time_squeeze, 0,
 		   0, 0, 0, 0, /* was fastroute */
-		   s->cpu_collision);
+		   s->cpu_collision, s->received_rps);
 	return 0;
 }
 
@@ -5067,6 +5247,23 @@ int register_netdevice(struct net_device *dev)
 
 	dev->iflink = -1;
 
+	if (!dev->num_rx_queues) {
+		/*
+		 * Allocate a single RX queue if driver never called
+		 * alloc_netdev_mq
+		 */
+
+		dev->_rx = kzalloc(sizeof(struct netdev_rx_queue), GFP_KERNEL);
+		if (!dev->_rx) {
+			ret = -ENOMEM;
+			goto out;
+		}
+
+		dev->_rx->first = dev->_rx;
+		atomic_set(&dev->_rx->count, 1);
+		dev->num_rx_queues = 1;
+	}
+
 	/* Init, if this function is available */
 	if (dev->netdev_ops->ndo_init) {
 		ret = dev->netdev_ops->ndo_init(dev);
@@ -5424,9 +5621,11 @@ struct net_device *alloc_netdev_mq(int sizeof_priv, const char *name,
 		void (*setup)(struct net_device *), unsigned int queue_count)
 {
 	struct netdev_queue *tx;
+	struct netdev_rx_queue *rx;
 	struct net_device *dev;
 	size_t alloc_size;
 	struct net_device *p;
+	int i;
 
 	BUG_ON(strlen(name) >= sizeof(dev->name));
 
@@ -5452,11 +5651,27 @@ struct net_device *alloc_netdev_mq(int sizeof_priv, const char *name,
 		goto free_p;
 	}
 
+	rx = kcalloc(queue_count, sizeof(struct netdev_rx_queue), GFP_KERNEL);
+	if (!rx) {
+		printk(KERN_ERR "alloc_netdev: Unable to allocate "
+		       "rx queues.\n");
+		goto free_tx;
+	}
+
+	atomic_set(&rx->count, queue_count);
+
+	/*
+	 * Set a pointer to first element in the array which holds the
+	 * reference count.
+	 */
+	for (i = 0; i < queue_count; i++)
+		rx[i].first = rx;
+
 	dev = PTR_ALIGN(p, NETDEV_ALIGN);
 	dev->padded = (char *)dev - (char *)p;
 
 	if (dev_addr_init(dev))
-		goto free_tx;
+		goto free_rx;
 
 	dev_unicast_init(dev);
 
@@ -5466,6 +5681,9 @@ struct net_device *alloc_netdev_mq(int sizeof_priv, const char *name,
 	dev->num_tx_queues = queue_count;
 	dev->real_num_tx_queues = queue_count;
 
+	dev->_rx = rx;
+	dev->num_rx_queues = queue_count;
+
 	dev->gso_max_size = GSO_MAX_SIZE;
 
 	netdev_init_queues(dev);
@@ -5480,9 +5698,10 @@ struct net_device *alloc_netdev_mq(int sizeof_priv, const char *name,
 	strcpy(dev->name, name);
 	return dev;
 
+free_rx:
+	kfree(rx);
 free_tx:
 	kfree(tx);
-
 free_p:
 	kfree(p);
 	return NULL;
@@ -5985,6 +6204,10 @@ static int __init net_dev_init(void)
 		queue->completion_queue = NULL;
 		INIT_LIST_HEAD(&queue->poll_list);
 
+		queue->csd.func = trigger_softirq;
+		queue->csd.info = queue;
+		queue->csd.flags = 0;
+
 		queue->backlog.poll = process_backlog;
 		queue->backlog.weight = weight_p;
 		queue->backlog.gro_list = NULL;
@@ -6023,7 +6246,7 @@ subsys_initcall(net_dev_init);
 
 static int __init initialize_hashrnd(void)
 {
-	get_random_bytes(&skb_tx_hashrnd, sizeof(skb_tx_hashrnd));
+	get_random_bytes(&hashrnd, sizeof(hashrnd));
 	return 0;
 }
 
diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c
index 099c753c4213..7a46343d5ae3 100644
--- a/net/core/net-sysfs.c
+++ b/net/core/net-sysfs.c
@@ -466,6 +466,216 @@ static struct attribute_group wireless_group = {
 };
 #endif
 
+/*
+ * RX queue sysfs structures and functions.
+ */
+struct rx_queue_attribute {
+	struct attribute attr;
+	ssize_t (*show)(struct netdev_rx_queue *queue,
+	    struct rx_queue_attribute *attr, char *buf);
+	ssize_t (*store)(struct netdev_rx_queue *queue,
+	    struct rx_queue_attribute *attr, const char *buf, size_t len);
+};
+#define to_rx_queue_attr(_attr) container_of(_attr,		\
+    struct rx_queue_attribute, attr)
+
+#define to_rx_queue(obj) container_of(obj, struct netdev_rx_queue, kobj)
+
+static ssize_t rx_queue_attr_show(struct kobject *kobj, struct attribute *attr,
+				  char *buf)
+{
+	struct rx_queue_attribute *attribute = to_rx_queue_attr(attr);
+	struct netdev_rx_queue *queue = to_rx_queue(kobj);
+
+	if (!attribute->show)
+		return -EIO;
+
+	return attribute->show(queue, attribute, buf);
+}
+
+static ssize_t rx_queue_attr_store(struct kobject *kobj, struct attribute *attr,
+				   const char *buf, size_t count)
+{
+	struct rx_queue_attribute *attribute = to_rx_queue_attr(attr);
+	struct netdev_rx_queue *queue = to_rx_queue(kobj);
+
+	if (!attribute->store)
+		return -EIO;
+
+	return attribute->store(queue, attribute, buf, count);
+}
+
+static struct sysfs_ops rx_queue_sysfs_ops = {
+	.show = rx_queue_attr_show,
+	.store = rx_queue_attr_store,
+};
+
+static ssize_t show_rps_map(struct netdev_rx_queue *queue,
+			    struct rx_queue_attribute *attribute, char *buf)
+{
+	struct rps_map *map;
+	cpumask_var_t mask;
+	size_t len = 0;
+	int i;
+
+	if (!zalloc_cpumask_var(&mask, GFP_KERNEL))
+		return -ENOMEM;
+
+	rcu_read_lock();
+	map = rcu_dereference(queue->rps_map);
+	if (map)
+		for (i = 0; i < map->len; i++)
+			cpumask_set_cpu(map->cpus[i], mask);
+
+	len += cpumask_scnprintf(buf + len, PAGE_SIZE, mask);
+	if (PAGE_SIZE - len < 3) {
+		rcu_read_unlock();
+		free_cpumask_var(mask);
+		return -EINVAL;
+	}
+	rcu_read_unlock();
+
+	free_cpumask_var(mask);
+	len += sprintf(buf + len, "\n");
+	return len;
+}
+
+static void rps_map_release(struct rcu_head *rcu)
+{
+	struct rps_map *map = container_of(rcu, struct rps_map, rcu);
+
+	kfree(map);
+}
+
+ssize_t store_rps_map(struct netdev_rx_queue *queue,
+		      struct rx_queue_attribute *attribute,
+		      const char *buf, size_t len)
+{
+	struct rps_map *old_map, *map;
+	cpumask_var_t mask;
+	int err, cpu, i;
+	static DEFINE_SPINLOCK(rps_map_lock);
+
+	if (!capable(CAP_NET_ADMIN))
+		return -EPERM;
+
+	if (!alloc_cpumask_var(&mask, GFP_KERNEL))
+		return -ENOMEM;
+
+	err = bitmap_parse(buf, len, cpumask_bits(mask), nr_cpumask_bits);
+	if (err) {
+		free_cpumask_var(mask);
+		return err;
+	}
+
+	map = kzalloc(max_t(unsigned,
+	    RPS_MAP_SIZE(cpumask_weight(mask)), L1_CACHE_BYTES),
+	    GFP_KERNEL);
+	if (!map) {
+		free_cpumask_var(mask);
+		return -ENOMEM;
+	}
+
+	i = 0;
+	for_each_cpu_and(cpu, mask, cpu_online_mask)
+		map->cpus[i++] = cpu;
+
+	if (i)
+		map->len = i;
+	else {
+		kfree(map);
+		map = NULL;
+	}
+
+	spin_lock(&rps_map_lock);
+	old_map = queue->rps_map;
+	rcu_assign_pointer(queue->rps_map, map);
+	spin_unlock(&rps_map_lock);
+
+	if (old_map)
+		call_rcu(&old_map->rcu, rps_map_release);
+
+	free_cpumask_var(mask);
+	return len;
+}
+
+static struct rx_queue_attribute rps_cpus_attribute =
+	__ATTR(rps_cpus, S_IRUGO | S_IWUSR, show_rps_map, store_rps_map);
+
+static struct attribute *rx_queue_default_attrs[] = {
+	&rps_cpus_attribute.attr,
+	NULL
+};
+
+static void rx_queue_release(struct kobject *kobj)
+{
+	struct netdev_rx_queue *queue = to_rx_queue(kobj);
+	struct rps_map *map = queue->rps_map;
+	struct netdev_rx_queue *first = queue->first;
+
+	if (map)
+		call_rcu(&map->rcu, rps_map_release);
+
+	if (atomic_dec_and_test(&first->count))
+		kfree(first);
+}
+
+static struct kobj_type rx_queue_ktype = {
+	.sysfs_ops = &rx_queue_sysfs_ops,
+	.release = rx_queue_release,
+	.default_attrs = rx_queue_default_attrs,
+};
+
+static int rx_queue_add_kobject(struct net_device *net, int index)
+{
+	struct netdev_rx_queue *queue = net->_rx + index;
+	struct kobject *kobj = &queue->kobj;
+	int error = 0;
+
+	kobj->kset = net->queues_kset;
+	error = kobject_init_and_add(kobj, &rx_queue_ktype, NULL,
+	    "rx-%u", index);
+	if (error) {
+		kobject_put(kobj);
+		return error;
+	}
+
+	kobject_uevent(kobj, KOBJ_ADD);
+
+	return error;
+}
+
+static int rx_queue_register_kobjects(struct net_device *net)
+{
+	int i;
+	int error = 0;
+
+	net->queues_kset = kset_create_and_add("queues",
+	    NULL, &net->dev.kobj);
+	if (!net->queues_kset)
+		return -ENOMEM;
+	for (i = 0; i < net->num_rx_queues; i++) {
+		error = rx_queue_add_kobject(net, i);
+		if (error)
+			break;
+	}
+
+	if (error)
+		while (--i >= 0)
+			kobject_put(&net->_rx[i].kobj);
+
+	return error;
+}
+
+static void rx_queue_remove_kobjects(struct net_device *net)
+{
+	int i;
+
+	for (i = 0; i < net->num_rx_queues; i++)
+		kobject_put(&net->_rx[i].kobj);
+	kset_unregister(net->queues_kset);
+}
+
 #endif /* CONFIG_SYSFS */
 
 #ifdef CONFIG_HOTPLUG
@@ -529,6 +739,8 @@ void netdev_unregister_kobject(struct net_device * net)
 	if (!net_eq(dev_net(net), &init_net))
 		return;
 
+	rx_queue_remove_kobjects(net);
+
 	device_del(dev);
 }
 
@@ -537,6 +749,7 @@ int netdev_register_kobject(struct net_device *net)
 {
 	struct device *dev = &(net->dev);
 	const struct attribute_group **groups = net->sysfs_groups;
+	int error = 0;
 
 	dev->class = &net_class;
 	dev->platform_data = net;
@@ -563,7 +776,17 @@ int netdev_register_kobject(struct net_device *net)
 	if (!net_eq(dev_net(net), &init_net))
 		return 0;
 
-	return device_add(dev);
+	error = device_add(dev);
+	if (error)
+		return error;
+
+	error = rx_queue_register_kobjects(net);
+	if (error) {
+		device_del(dev);
+		return error;
+	}
+
+	return error;
 }
 
 int netdev_class_create_file(struct class_attribute *class_attr)
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 93c4e060c91e..bdea0efdf8cb 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -534,6 +534,7 @@ static void __copy_skb_header(struct sk_buff *new, const struct sk_buff *old)
 	new->network_header	= old->network_header;
 	new->mac_header		= old->mac_header;
 	skb_dst_set(new, dst_clone(skb_dst(old)));
+	new->rxhash		= old->rxhash;
 #ifdef CONFIG_XFRM
 	new->sp			= secpath_get(old->sp);
 #endif
@@ -581,6 +582,7 @@ static struct sk_buff *__skb_clone(struct sk_buff *n, struct sk_buff *skb)
 	C(len);
 	C(data_len);
 	C(mac_len);
+	C(rxhash);
 	n->hdr_len = skb->nohdr ? skb_headroom(skb) : skb->hdr_len;
 	n->cloned = 1;
 	n->nohdr = 0;
-- 
cgit v1.2.2


From 14bb4789833a2e2610f30e2d3e1451701ac96ec1 Mon Sep 17 00:00:00 2001
From: stephen hemminger <shemminger@vyatta.com>
Date: Tue, 2 Mar 2010 13:32:09 +0000
Subject: bridge: per-cpu packet statistics (v3)

The shared packet statistics are a potential source of slow down
on bridged traffic. Convert to per-cpu array, but only keep those
statistics which change per-packet.

Signed-off-by: Stephen Hemminger <shemminger@vyatta.com>
Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/bridge/br_device.c  | 43 +++++++++++++++++++++++++++++++++++++++----
 net/bridge/br_if.c      |  6 ++++++
 net/bridge/br_input.c   |  6 ++++--
 net/bridge/br_private.h |  8 ++++++++
 4 files changed, 57 insertions(+), 6 deletions(-)

(limited to 'net')

diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c
index 90a9024e5c1e..5b8a6e73b02f 100644
--- a/net/bridge/br_device.c
+++ b/net/bridge/br_device.c
@@ -26,11 +26,12 @@ netdev_tx_t br_dev_xmit(struct sk_buff *skb, struct net_device *dev)
 	const unsigned char *dest = skb->data;
 	struct net_bridge_fdb_entry *dst;
 	struct net_bridge_mdb_entry *mdst;
+	struct br_cpu_netstats *brstats = this_cpu_ptr(br->stats);
 
-	BR_INPUT_SKB_CB(skb)->brdev = dev;
+	brstats->tx_packets++;
+	brstats->tx_bytes += skb->len;
 
-	dev->stats.tx_packets++;
-	dev->stats.tx_bytes += skb->len;
+	BR_INPUT_SKB_CB(skb)->brdev = dev;
 
 	skb_reset_mac_header(skb);
 	skb_pull(skb, ETH_HLEN);
@@ -81,6 +82,31 @@ static int br_dev_stop(struct net_device *dev)
 	return 0;
 }
 
+static struct net_device_stats *br_get_stats(struct net_device *dev)
+{
+	struct net_bridge *br = netdev_priv(dev);
+	struct net_device_stats *stats = &dev->stats;
+	struct br_cpu_netstats sum = { 0 };
+	unsigned int cpu;
+
+	for_each_possible_cpu(cpu) {
+		const struct br_cpu_netstats *bstats
+			= per_cpu_ptr(br->stats, cpu);
+
+		sum.tx_bytes   += bstats->tx_bytes;
+		sum.tx_packets += bstats->tx_packets;
+		sum.rx_bytes   += bstats->rx_bytes;
+		sum.rx_packets += bstats->rx_packets;
+	}
+
+	stats->tx_bytes   = sum.tx_bytes;
+	stats->tx_packets = sum.tx_packets;
+	stats->rx_bytes   = sum.rx_bytes;
+	stats->rx_packets = sum.rx_packets;
+
+	return stats;
+}
+
 static int br_change_mtu(struct net_device *dev, int new_mtu)
 {
 	struct net_bridge *br = netdev_priv(dev);
@@ -180,19 +206,28 @@ static const struct net_device_ops br_netdev_ops = {
 	.ndo_open		 = br_dev_open,
 	.ndo_stop		 = br_dev_stop,
 	.ndo_start_xmit		 = br_dev_xmit,
+	.ndo_get_stats		 = br_get_stats,
 	.ndo_set_mac_address	 = br_set_mac_address,
 	.ndo_set_multicast_list	 = br_dev_set_multicast_list,
 	.ndo_change_mtu		 = br_change_mtu,
 	.ndo_do_ioctl		 = br_dev_ioctl,
 };
 
+static void br_dev_free(struct net_device *dev)
+{
+	struct net_bridge *br = netdev_priv(dev);
+
+	free_percpu(br->stats);
+	free_netdev(dev);
+}
+
 void br_dev_setup(struct net_device *dev)
 {
 	random_ether_addr(dev->dev_addr);
 	ether_setup(dev);
 
 	dev->netdev_ops = &br_netdev_ops;
-	dev->destructor = free_netdev;
+	dev->destructor = br_dev_free;
 	SET_ETHTOOL_OPS(dev, &br_ethtool_ops);
 	dev->tx_queue_len = 0;
 	dev->priv_flags = IFF_EBRIDGE;
diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c
index b6a3872f5681..b7cdd2e98050 100644
--- a/net/bridge/br_if.c
+++ b/net/bridge/br_if.c
@@ -185,6 +185,12 @@ static struct net_device *new_bridge_dev(struct net *net, const char *name)
 	br = netdev_priv(dev);
 	br->dev = dev;
 
+	br->stats = alloc_percpu(struct br_cpu_netstats);
+	if (!br->stats) {
+		free_netdev(dev);
+		return NULL;
+	}
+
 	spin_lock_init(&br->lock);
 	INIT_LIST_HEAD(&br->port_list);
 	spin_lock_init(&br->hash_lock);
diff --git a/net/bridge/br_input.c b/net/bridge/br_input.c
index d74d570fc848..333dfb7c5886 100644
--- a/net/bridge/br_input.c
+++ b/net/bridge/br_input.c
@@ -23,9 +23,11 @@ const u8 br_group_address[ETH_ALEN] = { 0x01, 0x80, 0xc2, 0x00, 0x00, 0x00 };
 static int br_pass_frame_up(struct sk_buff *skb)
 {
 	struct net_device *indev, *brdev = BR_INPUT_SKB_CB(skb)->brdev;
+	struct net_bridge *br = netdev_priv(brdev);
+	struct br_cpu_netstats *brstats = this_cpu_ptr(br->stats);
 
-	brdev->stats.rx_packets++;
-	brdev->stats.rx_bytes += skb->len;
+	brstats->rx_packets++;
+	brstats->rx_bytes += skb->len;
 
 	indev = skb->dev;
 	skb->dev = brdev;
diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h
index 846d7d1e2075..791d4ab0fd4d 100644
--- a/net/bridge/br_private.h
+++ b/net/bridge/br_private.h
@@ -135,6 +135,14 @@ struct net_bridge
 	spinlock_t			lock;
 	struct list_head		port_list;
 	struct net_device		*dev;
+
+	struct br_cpu_netstats __percpu {
+		unsigned long	rx_packets;
+		unsigned long	rx_bytes;
+		unsigned long	tx_packets;
+		unsigned long	tx_bytes;
+	} *stats;
+
 	spinlock_t			hash_lock;
 	struct hlist_head		hash[BR_HASH_SIZE];
 	unsigned long			feature_mask;
-- 
cgit v1.2.2


From 2fb3573dfbca0bd853ddc1e47617eb446fa3deae Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Tue, 9 Mar 2010 20:03:38 +0000
Subject: net: remove rcu locking from fib_rules_event()

We hold RTNL at this point and dont use RCU variants of list traversals,
we dont need rcu_read_lock()/rcu_read_unlock()

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/fib_rules.c | 10 ++--------
 1 file changed, 2 insertions(+), 8 deletions(-)

(limited to 'net')

diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c
index 9a24377146bf..2ff34894357a 100644
--- a/net/core/fib_rules.c
+++ b/net/core/fib_rules.c
@@ -108,7 +108,7 @@ fib_rules_register(struct fib_rules_ops *tmpl, struct net *net)
 	struct fib_rules_ops *ops;
 	int err;
 
-	ops = kmemdup(tmpl, sizeof (*ops), GFP_KERNEL);
+	ops = kmemdup(tmpl, sizeof(*ops), GFP_KERNEL);
 	if (ops == NULL)
 		return ERR_PTR(-ENOMEM);
 
@@ -123,7 +123,6 @@ fib_rules_register(struct fib_rules_ops *tmpl, struct net *net)
 
 	return ops;
 }
-
 EXPORT_SYMBOL_GPL(fib_rules_register);
 
 void fib_rules_cleanup_ops(struct fib_rules_ops *ops)
@@ -157,7 +156,6 @@ void fib_rules_unregister(struct fib_rules_ops *ops)
 
 	call_rcu(&ops->rcu, fib_rules_put_rcu);
 }
-
 EXPORT_SYMBOL_GPL(fib_rules_unregister);
 
 static int fib_rule_match(struct fib_rule *rule, struct fib_rules_ops *ops,
@@ -220,7 +218,6 @@ out:
 
 	return err;
 }
-
 EXPORT_SYMBOL_GPL(fib_rules_lookup);
 
 static int validate_rulemsg(struct fib_rule_hdr *frh, struct nlattr **tb,
@@ -613,7 +610,7 @@ static int fib_nl_dumprule(struct sk_buff *skb, struct netlink_callback *cb)
 			break;
 
 		cb->args[1] = 0;
-	skip:
+skip:
 		idx++;
 	}
 	rcu_read_unlock();
@@ -685,7 +682,6 @@ static int fib_rules_event(struct notifier_block *this, unsigned long event,
 	struct fib_rules_ops *ops;
 
 	ASSERT_RTNL();
-	rcu_read_lock();
 
 	switch (event) {
 	case NETDEV_REGISTER:
@@ -699,8 +695,6 @@ static int fib_rules_event(struct notifier_block *this, unsigned long event,
 		break;
 	}
 
-	rcu_read_unlock();
-
 	return NOTIFY_DONE;
 }
 
-- 
cgit v1.2.2


From dd2acaa7bcb150dadac0b17c8eb654b0712a62ab Mon Sep 17 00:00:00 2001
From: Jan Engelhardt <jengelh@medozas.de>
Date: Thu, 11 Mar 2010 09:57:27 +0000
Subject: net: tcp: make hybla selectable as default congestion module

Signed-off-by: Jan Engelhardt <jengelh@medozas.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/Kconfig | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'net')

diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig
index 0c94a1ac2946..731c47ba6ad2 100644
--- a/net/ipv4/Kconfig
+++ b/net/ipv4/Kconfig
@@ -587,6 +587,9 @@ choice
 	config DEFAULT_HTCP
 		bool "Htcp" if TCP_CONG_HTCP=y
 
+	config DEFAULT_HYBLA
+		bool "Hybla" if TCP_CONG_HYBLA=y
+
 	config DEFAULT_VEGAS
 		bool "Vegas" if TCP_CONG_VEGAS=y
 
@@ -610,6 +613,7 @@ config DEFAULT_TCP_CONG
 	default "bic" if DEFAULT_BIC
 	default "cubic" if DEFAULT_CUBIC
 	default "htcp" if DEFAULT_HTCP
+	default "hybla" if DEFAULT_HYBLA
 	default "vegas" if DEFAULT_VEGAS
 	default "westwood" if DEFAULT_WESTWOOD
 	default "reno" if DEFAULT_RENO
-- 
cgit v1.2.2


From 6ce1a6df6efbbeaa262a225a1a439ebc30a75d2e Mon Sep 17 00:00:00 2001
From: Jan Engelhardt <jengelh@medozas.de>
Date: Thu, 11 Mar 2010 09:57:28 +0000
Subject: net: tcp: make veno selectable as default congestion module

Signed-off-by: Jan Engelhardt <jengelh@medozas.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/Kconfig | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'net')

diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig
index 731c47ba6ad2..c9a1c68767ff 100644
--- a/net/ipv4/Kconfig
+++ b/net/ipv4/Kconfig
@@ -593,6 +593,9 @@ choice
 	config DEFAULT_VEGAS
 		bool "Vegas" if TCP_CONG_VEGAS=y
 
+	config DEFAULT_VENO
+		bool "Veno" if TCP_CONG_VENO=y
+
 	config DEFAULT_WESTWOOD
 		bool "Westwood" if TCP_CONG_WESTWOOD=y
 
@@ -616,6 +619,7 @@ config DEFAULT_TCP_CONG
 	default "hybla" if DEFAULT_HYBLA
 	default "vegas" if DEFAULT_VEGAS
 	default "westwood" if DEFAULT_WESTWOOD
+	default "veno" if DEFAULT_VENO
 	default "reno" if DEFAULT_RENO
 	default "cubic"
 
-- 
cgit v1.2.2


From 10708f37ae729baba9b67bd134c3720709d4ae62 Mon Sep 17 00:00:00 2001
From: Jan Engelhardt <jengelh@medozas.de>
Date: Thu, 11 Mar 2010 09:57:29 +0000
Subject: net: core: add IFLA_STATS64 support

`ip -s link` shows interface counters truncated to 32 bit. This is
because interface statistics are transported only in 32-bit quantity
to userspace. This commit adds a new IFLA_STATS64 attribute that
exports them in full 64 bit.

References: http://lkml.indiana.edu/hypermail/linux/kernel/0307.3/0215.html
Signed-off-by: Jan Engelhardt <jengelh@medozas.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/rtnetlink.c | 42 +++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 41 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 4568120d8533..e1121f0bca6a 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -600,7 +600,39 @@ static void copy_rtnl_link_stats(struct rtnl_link_stats *a,
 
 	a->rx_compressed = b->rx_compressed;
 	a->tx_compressed = b->tx_compressed;
-};
+}
+
+static void copy_rtnl_link_stats64(struct rtnl_link_stats64 *a,
+				   const struct net_device_stats *b)
+{
+	a->rx_packets = b->rx_packets;
+	a->tx_packets = b->tx_packets;
+	a->rx_bytes = b->rx_bytes;
+	a->tx_bytes = b->tx_bytes;
+	a->rx_errors = b->rx_errors;
+	a->tx_errors = b->tx_errors;
+	a->rx_dropped = b->rx_dropped;
+	a->tx_dropped = b->tx_dropped;
+
+	a->multicast = b->multicast;
+	a->collisions = b->collisions;
+
+	a->rx_length_errors = b->rx_length_errors;
+	a->rx_over_errors = b->rx_over_errors;
+	a->rx_crc_errors = b->rx_crc_errors;
+	a->rx_frame_errors = b->rx_frame_errors;
+	a->rx_fifo_errors = b->rx_fifo_errors;
+	a->rx_missed_errors = b->rx_missed_errors;
+
+	a->tx_aborted_errors = b->tx_aborted_errors;
+	a->tx_carrier_errors = b->tx_carrier_errors;
+	a->tx_fifo_errors = b->tx_fifo_errors;
+	a->tx_heartbeat_errors = b->tx_heartbeat_errors;
+	a->tx_window_errors = b->tx_window_errors;
+
+	a->rx_compressed = b->rx_compressed;
+	a->tx_compressed = b->tx_compressed;
+}
 
 static inline int rtnl_vfinfo_size(const struct net_device *dev)
 {
@@ -698,6 +730,14 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev,
 	stats = dev_get_stats(dev);
 	copy_rtnl_link_stats(nla_data(attr), stats);
 
+	attr = nla_reserve(skb, IFLA_STATS64,
+			sizeof(struct rtnl_link_stats64));
+	if (attr == NULL)
+		goto nla_put_failure;
+
+	stats = dev_get_stats(dev);
+	copy_rtnl_link_stats64(nla_data(attr), stats);
+
 	if (dev->netdev_ops->ndo_get_vf_config && dev->dev.parent) {
 		int i;
 		struct ifla_vf_info ivi;
-- 
cgit v1.2.2


From 1a624832a06b465d0e5651901bcbc3680c78d374 Mon Sep 17 00:00:00 2001
From: Neil Horman <nhorman@tuxdriver.com>
Date: Mon, 15 Mar 2010 08:02:24 +0000
Subject: tipc: Increase frequency of load distribution over broadcast link

Forward port commit 29eb572941501c40ac6e62dbc5043bf9ee76ee56
from git://tipc.cslab.ericsson.net/pub/git/people/allan/tipc.git

Origional commit message:
Increase frequency of load distribution over broadcast link

This patch enhances the behavior of TIPC's broadcast link so that it
alternates between redundant bearers (if available) after every
message sent, rather than after every 10 messages.  This change helps
to speed up delivery of retransmitted messages by ensuring that
they are not sent repeatedly over a bearer that is no longer working,
but not yet recognized as failed.

Tested by myself in the latest net-2.6 tree using the tipc sanity test suite

Origionally-signed-off-by: Allan Stephens <allan.stephens@windriver.com>
Signed-off-by: Neil Horman <nhorman@tuxdriver.com>

bcast.c |   35 ++++++++++++++---------------------
1 file changed, 14 insertions(+), 21 deletions(-)
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/tipc/bcast.c | 35 ++++++++++++++---------------------
 1 file changed, 14 insertions(+), 21 deletions(-)

(limited to 'net')

diff --git a/net/tipc/bcast.c b/net/tipc/bcast.c
index a3bfd4064912..90a051912c03 100644
--- a/net/tipc/bcast.c
+++ b/net/tipc/bcast.c
@@ -558,10 +558,7 @@ static int tipc_bcbearer_send(struct sk_buff *buf,
 			      struct tipc_bearer *unused1,
 			      struct tipc_media_addr *unused2)
 {
-	static int send_count = 0;
-
 	int bp_index;
-	int swap_time;
 
 	/* Prepare buffer for broadcasting (if first time trying to send it) */
 
@@ -575,11 +572,6 @@ static int tipc_bcbearer_send(struct sk_buff *buf,
 		msg_set_mc_netid(msg, tipc_net_id);
 	}
 
-	/* Determine if bearer pairs should be swapped following this attempt */
-
-	if ((swap_time = (++send_count >= 10)))
-		send_count = 0;
-
 	/* Send buffer over bearers until all targets reached */
 
 	bcbearer->remains = tipc_cltr_bcast_nodes;
@@ -595,21 +587,22 @@ static int tipc_bcbearer_send(struct sk_buff *buf,
 		if (bcbearer->remains_new.count == bcbearer->remains.count)
 			continue;	/* bearer pair doesn't add anything */
 
-		if (!p->publ.blocked &&
-		    !p->media->send_msg(buf, &p->publ, &p->media->bcast_addr)) {
-			if (swap_time && s && !s->publ.blocked)
-				goto swap;
-			else
-				goto update;
+		if (p->publ.blocked ||
+		    p->media->send_msg(buf, &p->publ, &p->media->bcast_addr)) {
+			/* unable to send on primary bearer */
+			if (!s || s->publ.blocked ||
+			    s->media->send_msg(buf, &s->publ,
+					       &s->media->bcast_addr)) {
+				/* unable to send on either bearer */
+				continue;
+			}
+		}
+
+		if (s) {
+			bcbearer->bpairs[bp_index].primary = s;
+			bcbearer->bpairs[bp_index].secondary = p;
 		}
 
-		if (!s || s->publ.blocked ||
-		    s->media->send_msg(buf, &s->publ, &s->media->bcast_addr))
-			continue;	/* unable to send using bearer pair */
-swap:
-		bcbearer->bpairs[bp_index].primary = s;
-		bcbearer->bpairs[bp_index].secondary = p;
-update:
 		if (bcbearer->remains_new.count == 0)
 			return 0;
 
-- 
cgit v1.2.2


From ca50910185fcce694af0cc194de50d2374a7c7ef Mon Sep 17 00:00:00 2001
From: Neil Horman <nhorman@tuxdriver.com>
Date: Mon, 15 Mar 2010 07:58:45 +0000
Subject: tipc: Allow retransmission of cloned buffers

Forward port commit
fc477e160af086f6e30c3d4fdf5f5c000d29beb5
from git://tipc.cslab.ericsson.net/pub/git/people/allan/tipc.git

Origional commit message:

Allow retransmission of cloned buffers

This patch fixes an issue with TIPC's message retransmission logic
that prevented retransmission of clone sk_buffs.  Originally intended
as a means of avoiding wasted work in retransmitting messages that
were still on the driver's outbound queue, it also prevented TIPC
from retransmitting messages through other means -- such as the
secondary bearer of the broadcast link, or another interface in a
set of bonded interfaces.  This fix removes existing checks for
cloned sk_buffs that prevented such retransmission.

Origionally-Signed-off-by: Allan Stephens <allan.stephens@windriver.com>
Signed-off-by: Neil Horman <nhorman@tuxdriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/tipc/link.c | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

(limited to 'net')

diff --git a/net/tipc/link.c b/net/tipc/link.c
index 1a7e4665af80..49f2be8622a9 100644
--- a/net/tipc/link.c
+++ b/net/tipc/link.c
@@ -1553,7 +1553,7 @@ u32 tipc_link_push_packet(struct link *l_ptr)
 
 	/* Continue retransmission now, if there is anything: */
 
-	if (r_q_size && buf && !skb_cloned(buf)) {
+	if (r_q_size && buf) {
 		msg_set_ack(buf_msg(buf), mod(l_ptr->next_in_no - 1));
 		msg_set_bcast_ack(buf_msg(buf), l_ptr->owner->bclink.last_in);
 		if (tipc_bearer_send(l_ptr->b_ptr, buf, &l_ptr->media_addr)) {
@@ -1722,15 +1722,16 @@ void tipc_link_retransmit(struct link *l_ptr, struct sk_buff *buf,
 	dbg("Retransmitting %u in link %x\n", retransmits, l_ptr);
 
 	if (tipc_bearer_congested(l_ptr->b_ptr, l_ptr)) {
-		if (!skb_cloned(buf)) {
+		if (l_ptr->retransm_queue_size == 0) {
 			msg_dbg(msg, ">NO_RETR->BCONG>");
 			dbg_print_link(l_ptr, "   ");
 			l_ptr->retransm_queue_head = msg_seqno(msg);
 			l_ptr->retransm_queue_size = retransmits;
-			return;
 		} else {
-			/* Don't retransmit if driver already has the buffer */
+			err("Unexpected retransmit on link %s (qsize=%d)\n",
+			    l_ptr->name, l_ptr->retransm_queue_size);
 		}
+		return;
 	} else {
 		/* Detect repeated retransmit failures on uncongested bearer */
 
@@ -1745,7 +1746,7 @@ void tipc_link_retransmit(struct link *l_ptr, struct sk_buff *buf,
 		}
 	}
 
-	while (retransmits && (buf != l_ptr->next_out) && buf && !skb_cloned(buf)) {
+	while (retransmits && (buf != l_ptr->next_out) && buf) {
 		msg = buf_msg(buf);
 		msg_set_ack(msg, mod(l_ptr->next_in_no - 1));
 		msg_set_bcast_ack(msg, l_ptr->owner->bclink.last_in);
-- 
cgit v1.2.2


From ff6e2163f28a1094fb5ca5950fe2b43c3cf6bc7a Mon Sep 17 00:00:00 2001
From: Jiri Pirko <jpirko@redhat.com>
Date: Mon, 1 Mar 2010 05:09:14 +0000
Subject: net: convert multiple drivers to use netdev_for_each_mc_addr, part7

In mlx4, using char * to store mc address in private structure instead.

Signed-off-by: Jiri Pirko <jpirko@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/bluetooth/bnep/netdev.c | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

(limited to 'net')

diff --git a/net/bluetooth/bnep/netdev.c b/net/bluetooth/bnep/netdev.c
index b6234b73c4cf..326ab453edb7 100644
--- a/net/bluetooth/bnep/netdev.c
+++ b/net/bluetooth/bnep/netdev.c
@@ -87,7 +87,7 @@ static void bnep_net_set_mc_list(struct net_device *dev)
 		memcpy(__skb_put(skb, ETH_ALEN), dev->broadcast, ETH_ALEN);
 		r->len = htons(ETH_ALEN * 2);
 	} else {
-		struct dev_mc_list *dmi = dev->mc_list;
+		struct dev_mc_list *dmi;
 		int i, len = skb->len;
 
 		if (dev->flags & IFF_BROADCAST) {
@@ -97,12 +97,12 @@ static void bnep_net_set_mc_list(struct net_device *dev)
 
 		/* FIXME: We should group addresses here. */
 
-		for (i = 0;
-		     i < netdev_mc_count(dev) && i < BNEP_MAX_MULTICAST_FILTERS;
-		     i++) {
+		i = 0;
+		netdev_for_each_mc_addr(dmi, dev) {
+			if (i == BNEP_MAX_MULTICAST_FILTERS)
+				break;
 			memcpy(__skb_put(skb, ETH_ALEN), dmi->dmi_addr, ETH_ALEN);
 			memcpy(__skb_put(skb, ETH_ALEN), dmi->dmi_addr, ETH_ALEN);
-			dmi = dmi->next;
 		}
 		r->len = htons(skb->len - len);
 	}
-- 
cgit v1.2.2


From 076f7839dd30b379f3cd468b13cf513cdcd5cee7 Mon Sep 17 00:00:00 2001
From: Jan Engelhardt <jengelh@medozas.de>
Date: Thu, 11 Mar 2010 00:38:44 +0100
Subject: netfilter: xt_CT: par->family is an nfproto

Signed-off-by: Jan Engelhardt <jengelh@medozas.de>
---
 net/netfilter/xt_CT.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/netfilter/xt_CT.c b/net/netfilter/xt_CT.c
index 61c50fa84703..fda603edb31a 100644
--- a/net/netfilter/xt_CT.c
+++ b/net/netfilter/xt_CT.c
@@ -37,13 +37,13 @@ static unsigned int xt_ct_target(struct sk_buff *skb,
 
 static u8 xt_ct_find_proto(const struct xt_tgchk_param *par)
 {
-	if (par->family == AF_INET) {
+	if (par->family == NFPROTO_IPV4) {
 		const struct ipt_entry *e = par->entryinfo;
 
 		if (e->ip.invflags & IPT_INV_PROTO)
 			return 0;
 		return e->ip.proto;
-	} else if (par->family == AF_INET6) {
+	} else if (par->family == NFPROTO_IPV6) {
 		const struct ip6t_entry *e = par->entryinfo;
 
 		if (e->ipv6.invflags & IP6T_INV_PROTO)
-- 
cgit v1.2.2


From f76a47c83247b453f25629618056a6d2c1e39103 Mon Sep 17 00:00:00 2001
From: Jan Engelhardt <jengelh@medozas.de>
Date: Fri, 5 Jun 2009 15:22:15 +0200
Subject: netfilter: xt_NFQUEUE: consolidate v4/v6 targets into one

Signed-off-by: Jan Engelhardt <jengelh@medozas.de>
---
 net/netfilter/xt_NFQUEUE.c | 40 ++++++++++++----------------------------
 1 file changed, 12 insertions(+), 28 deletions(-)

(limited to 'net')

diff --git a/net/netfilter/xt_NFQUEUE.c b/net/netfilter/xt_NFQUEUE.c
index 12dcd7007c3e..a37e2166858e 100644
--- a/net/netfilter/xt_NFQUEUE.c
+++ b/net/netfilter/xt_NFQUEUE.c
@@ -49,17 +49,6 @@ static u32 hash_v4(const struct sk_buff *skb)
 	return jhash_2words((__force u32)ipaddr, iph->protocol, jhash_initval);
 }
 
-static unsigned int
-nfqueue_tg4_v1(struct sk_buff *skb, const struct xt_target_param *par)
-{
-	const struct xt_NFQ_info_v1 *info = par->targinfo;
-	u32 queue = info->queuenum;
-
-	if (info->queues_total > 1)
-		queue = hash_v4(skb) % info->queues_total + queue;
-	return NF_QUEUE_NR(queue);
-}
-
 #if defined(CONFIG_IP6_NF_IPTABLES) || defined(CONFIG_IP6_NF_IPTABLES_MODULE)
 static u32 hash_v6(const struct sk_buff *skb)
 {
@@ -73,18 +62,24 @@ static u32 hash_v6(const struct sk_buff *skb)
 
 	return jhash2((__force u32 *)addr, ARRAY_SIZE(addr), jhash_initval);
 }
+#endif
 
 static unsigned int
-nfqueue_tg6_v1(struct sk_buff *skb, const struct xt_target_param *par)
+nfqueue_tg_v1(struct sk_buff *skb, const struct xt_target_param *par)
 {
 	const struct xt_NFQ_info_v1 *info = par->targinfo;
 	u32 queue = info->queuenum;
 
-	if (info->queues_total > 1)
-		queue = hash_v6(skb) % info->queues_total + queue;
+	if (info->queues_total > 1) {
+		if (par->target->family == NFPROTO_IPV4)
+			queue = hash_v4(skb) % info->queues_total + queue;
+#if defined(CONFIG_IP6_NF_IPTABLES) || defined(CONFIG_IP6_NF_IPTABLES_MODULE)
+		else if (par->target->family == NFPROTO_IPV6)
+			queue = hash_v6(skb) % info->queues_total + queue;
+#endif
+	}
 	return NF_QUEUE_NR(queue);
 }
-#endif
 
 static bool nfqueue_tg_v1_check(const struct xt_tgchk_param *par)
 {
@@ -119,23 +114,12 @@ static struct xt_target nfqueue_tg_reg[] __read_mostly = {
 	{
 		.name		= "NFQUEUE",
 		.revision	= 1,
-		.family		= NFPROTO_IPV4,
-		.checkentry	= nfqueue_tg_v1_check,
-		.target		= nfqueue_tg4_v1,
-		.targetsize	= sizeof(struct xt_NFQ_info_v1),
-		.me		= THIS_MODULE,
-	},
-#if defined(CONFIG_IP6_NF_IPTABLES) || defined(CONFIG_IP6_NF_IPTABLES_MODULE)
-	{
-		.name		= "NFQUEUE",
-		.revision	= 1,
-		.family		= NFPROTO_IPV6,
+		.family		= NFPROTO_UNSPEC,
 		.checkentry	= nfqueue_tg_v1_check,
-		.target		= nfqueue_tg6_v1,
+		.target		= nfqueue_tg_v1,
 		.targetsize	= sizeof(struct xt_NFQ_info_v1),
 		.me		= THIS_MODULE,
 	},
-#endif
 };
 
 static int __init nfqueue_tg_init(void)
-- 
cgit v1.2.2


From 44c5873199a90a45e9d64a86a1f54b1ccab21ec8 Mon Sep 17 00:00:00 2001
From: Jan Engelhardt <jengelh@medozas.de>
Date: Fri, 26 Feb 2010 14:14:22 +0100
Subject: netfilter: xtables: add comment markers to Xtables Kconfig

Signed-off-by: Jan Engelhardt <jengelh@medozas.de>
---
 net/netfilter/Kconfig | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'net')

diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig
index 18d77b5c351a..abf4ce6d1834 100644
--- a/net/netfilter/Kconfig
+++ b/net/netfilter/Kconfig
@@ -316,6 +316,8 @@ if NETFILTER_XTABLES
 
 # alphabetically ordered list of targets
 
+comment "Xtables targets"
+
 config NETFILTER_XT_TARGET_CLASSIFY
 	tristate '"CLASSIFY" target support'
 	depends on NETFILTER_ADVANCED
@@ -552,6 +554,10 @@ config NETFILTER_XT_TARGET_TCPOPTSTRIP
 	  This option adds a "TCPOPTSTRIP" target, which allows you to strip
 	  TCP options from TCP packets.
 
+# alphabetically ordered list of matches
+
+comment "Xtables matches"
+
 config NETFILTER_XT_MATCH_CLUSTER
 	tristate '"cluster" match support'
 	depends on NF_CONNTRACK
-- 
cgit v1.2.2


From 28b949885f80efb87d7cebdcf879c99db12c37bd Mon Sep 17 00:00:00 2001
From: Jan Engelhardt <jengelh@medozas.de>
Date: Sat, 28 Feb 2009 03:23:57 +0100
Subject: netfilter: xtables: merge xt_MARK into xt_mark

Two arguments for combining the two:
- xt_mark is pretty useless without xt_MARK
- the actual code is so small anyway that the kmod metadata and the module
  in its loaded state totally outweighs the combined actual code size.

i586-before:
-rw-r--r-- 1 jengelh users 3821 Feb 10 01:01 xt_MARK.ko
-rw-r--r-- 1 jengelh users 2592 Feb 10 00:04 xt_MARK.o
-rw-r--r-- 1 jengelh users 3274 Feb 10 01:01 xt_mark.ko
-rw-r--r-- 1 jengelh users 2108 Feb 10 00:05 xt_mark.o
   text    data     bss     dec     hex filename
    354     264       0     618     26a xt_MARK.o
    223     176       0     399     18f xt_mark.o
And the runtime size is like 14 KB.

i586-after:
-rw-r--r-- 1 jengelh users 3264 Feb 18 17:28 xt_mark.o

Signed-off-by: Jan Engelhardt <jengelh@medozas.de>
---
 net/netfilter/Kconfig   | 46 +++++++++++++++++++++++++---------------
 net/netfilter/Makefile  |  5 +++--
 net/netfilter/xt_MARK.c | 56 -------------------------------------------------
 net/netfilter/xt_mark.c | 35 +++++++++++++++++++++++++++++--
 4 files changed, 65 insertions(+), 77 deletions(-)
 delete mode 100644 net/netfilter/xt_MARK.c

(limited to 'net')

diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig
index abf4ce6d1834..236aa20ce5cc 100644
--- a/net/netfilter/Kconfig
+++ b/net/netfilter/Kconfig
@@ -314,6 +314,23 @@ config NETFILTER_XTABLES
 
 if NETFILTER_XTABLES
 
+comment "Xtables combined modules"
+
+config NETFILTER_XT_MARK
+	tristate 'nfmark target and match support'
+	default m if NETFILTER_ADVANCED=n
+	---help---
+	This option adds the "MARK" target and "mark" match.
+
+	Netfilter mark matching allows you to match packets based on the
+	"nfmark" value in the packet.
+	The target allows you to create rules in the "mangle" table which alter
+	the netfilter mark (nfmark) field associated with the packet.
+
+	Prior to routing, the nfmark can influence the routing method (see
+	"Use netfilter MARK value as routing key") and can also be used by
+	other subsystems to change their behavior.
+
 # alphabetically ordered list of targets
 
 comment "Xtables targets"
@@ -425,16 +442,12 @@ config NETFILTER_XT_TARGET_LED
 
 config NETFILTER_XT_TARGET_MARK
 	tristate '"MARK" target support'
-	default m if NETFILTER_ADVANCED=n
-	help
-	  This option adds a `MARK' target, which allows you to create rules
-	  in the `mangle' table which alter the netfilter mark (nfmark) field
-	  associated with the packet prior to routing. This can change
-	  the routing method (see `Use netfilter MARK value as routing
-	  key') and can also be used by other subsystems to change their
-	  behavior.
-
-	  To compile it as a module, choose M here.  If unsure, say N.
+	depends on NETFILTER_ADVANCED
+	select NETFILTER_XT_MARK
+	---help---
+	This is a backwards-compat option for the user's convenience
+	(e.g. when running oldconfig). It selects
+	CONFIG_NETFILTER_XT_MARK (combined mark/MARK module).
 
 config NETFILTER_XT_TARGET_NFLOG
 	tristate '"NFLOG" target support'
@@ -739,13 +752,12 @@ config NETFILTER_XT_MATCH_MAC
 
 config NETFILTER_XT_MATCH_MARK
 	tristate '"mark" match support'
-	default m if NETFILTER_ADVANCED=n
-	help
-	  Netfilter mark matching allows you to match packets based on the
-	  `nfmark' value in the packet.  This can be set by the MARK target
-	  (see below).
-
-	  To compile it as a module, choose M here.  If unsure, say N.
+	depends on NETFILTER_ADVANCED
+	select NETFILTER_XT_MARK
+	---help---
+	This is a backwards-compat option for the user's convenience
+	(e.g. when running oldconfig). It selects
+	CONFIG_NETFILTER_XT_MARK (combined mark/MARK module).
 
 config NETFILTER_XT_MATCH_MULTIPORT
 	tristate '"multiport" Multiple port match support'
diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile
index f873644f02f6..19775cc30fe5 100644
--- a/net/netfilter/Makefile
+++ b/net/netfilter/Makefile
@@ -40,6 +40,9 @@ obj-$(CONFIG_NETFILTER_TPROXY) += nf_tproxy_core.o
 # generic X tables 
 obj-$(CONFIG_NETFILTER_XTABLES) += x_tables.o xt_tcpudp.o
 
+# combos
+obj-$(CONFIG_NETFILTER_XT_MARK) += xt_mark.o
+
 # targets
 obj-$(CONFIG_NETFILTER_XT_TARGET_CLASSIFY) += xt_CLASSIFY.o
 obj-$(CONFIG_NETFILTER_XT_TARGET_CONNMARK) += xt_CONNMARK.o
@@ -48,7 +51,6 @@ obj-$(CONFIG_NETFILTER_XT_TARGET_CT) += xt_CT.o
 obj-$(CONFIG_NETFILTER_XT_TARGET_DSCP) += xt_DSCP.o
 obj-$(CONFIG_NETFILTER_XT_TARGET_HL) += xt_HL.o
 obj-$(CONFIG_NETFILTER_XT_TARGET_LED) += xt_LED.o
-obj-$(CONFIG_NETFILTER_XT_TARGET_MARK) += xt_MARK.o
 obj-$(CONFIG_NETFILTER_XT_TARGET_NFLOG) += xt_NFLOG.o
 obj-$(CONFIG_NETFILTER_XT_TARGET_NFQUEUE) += xt_NFQUEUE.o
 obj-$(CONFIG_NETFILTER_XT_TARGET_NOTRACK) += xt_NOTRACK.o
@@ -76,7 +78,6 @@ obj-$(CONFIG_NETFILTER_XT_MATCH_IPRANGE) += xt_iprange.o
 obj-$(CONFIG_NETFILTER_XT_MATCH_LENGTH) += xt_length.o
 obj-$(CONFIG_NETFILTER_XT_MATCH_LIMIT) += xt_limit.o
 obj-$(CONFIG_NETFILTER_XT_MATCH_MAC) += xt_mac.o
-obj-$(CONFIG_NETFILTER_XT_MATCH_MARK) += xt_mark.o
 obj-$(CONFIG_NETFILTER_XT_MATCH_MULTIPORT) += xt_multiport.o
 obj-$(CONFIG_NETFILTER_XT_MATCH_OSF) += xt_osf.o
 obj-$(CONFIG_NETFILTER_XT_MATCH_OWNER) += xt_owner.o
diff --git a/net/netfilter/xt_MARK.c b/net/netfilter/xt_MARK.c
deleted file mode 100644
index 225f8d11e173..000000000000
--- a/net/netfilter/xt_MARK.c
+++ /dev/null
@@ -1,56 +0,0 @@
-/*
- *	xt_MARK - Netfilter module to modify the NFMARK field of an skb
- *
- *	(C) 1999-2001 Marc Boucher <marc@mbsi.ca>
- *	Copyright © CC Computer Consultants GmbH, 2007 - 2008
- *	Jan Engelhardt <jengelh@computergmbh.de>
- *
- *	This program is free software; you can redistribute it and/or modify
- *	it under the terms of the GNU General Public License version 2 as
- *	published by the Free Software Foundation.
- */
-
-#include <linux/module.h>
-#include <linux/skbuff.h>
-#include <linux/ip.h>
-#include <net/checksum.h>
-
-#include <linux/netfilter/x_tables.h>
-#include <linux/netfilter/xt_MARK.h>
-
-MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Marc Boucher <marc@mbsi.ca>");
-MODULE_DESCRIPTION("Xtables: packet mark modification");
-MODULE_ALIAS("ipt_MARK");
-MODULE_ALIAS("ip6t_MARK");
-
-static unsigned int
-mark_tg(struct sk_buff *skb, const struct xt_target_param *par)
-{
-	const struct xt_mark_tginfo2 *info = par->targinfo;
-
-	skb->mark = (skb->mark & ~info->mask) ^ info->mark;
-	return XT_CONTINUE;
-}
-
-static struct xt_target mark_tg_reg __read_mostly = {
-	.name           = "MARK",
-	.revision       = 2,
-	.family         = NFPROTO_UNSPEC,
-	.target         = mark_tg,
-	.targetsize     = sizeof(struct xt_mark_tginfo2),
-	.me             = THIS_MODULE,
-};
-
-static int __init mark_tg_init(void)
-{
-	return xt_register_target(&mark_tg_reg);
-}
-
-static void __exit mark_tg_exit(void)
-{
-	xt_unregister_target(&mark_tg_reg);
-}
-
-module_init(mark_tg_init);
-module_exit(mark_tg_exit);
diff --git a/net/netfilter/xt_mark.c b/net/netfilter/xt_mark.c
index 1db07d8125f8..035c468a0040 100644
--- a/net/netfilter/xt_mark.c
+++ b/net/netfilter/xt_mark.c
@@ -18,9 +18,20 @@
 
 MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Marc Boucher <marc@mbsi.ca>");
-MODULE_DESCRIPTION("Xtables: packet mark match");
+MODULE_DESCRIPTION("Xtables: packet mark operations");
 MODULE_ALIAS("ipt_mark");
 MODULE_ALIAS("ip6t_mark");
+MODULE_ALIAS("ipt_MARK");
+MODULE_ALIAS("ip6t_MARK");
+
+static unsigned int
+mark_tg(struct sk_buff *skb, const struct xt_target_param *par)
+{
+	const struct xt_mark_tginfo2 *info = par->targinfo;
+
+	skb->mark = (skb->mark & ~info->mask) ^ info->mark;
+	return XT_CONTINUE;
+}
 
 static bool
 mark_mt(const struct sk_buff *skb, const struct xt_match_param *par)
@@ -30,6 +41,15 @@ mark_mt(const struct sk_buff *skb, const struct xt_match_param *par)
 	return ((skb->mark & info->mask) == info->mark) ^ info->invert;
 }
 
+static struct xt_target mark_tg_reg __read_mostly = {
+	.name           = "MARK",
+	.revision       = 2,
+	.family         = NFPROTO_UNSPEC,
+	.target         = mark_tg,
+	.targetsize     = sizeof(struct xt_mark_tginfo2),
+	.me             = THIS_MODULE,
+};
+
 static struct xt_match mark_mt_reg __read_mostly = {
 	.name           = "mark",
 	.revision       = 1,
@@ -41,12 +61,23 @@ static struct xt_match mark_mt_reg __read_mostly = {
 
 static int __init mark_mt_init(void)
 {
-	return xt_register_match(&mark_mt_reg);
+	int ret;
+
+	ret = xt_register_target(&mark_tg_reg);
+	if (ret < 0)
+		return ret;
+	ret = xt_register_match(&mark_mt_reg);
+	if (ret < 0) {
+		xt_unregister_target(&mark_tg_reg);
+		return ret;
+	}
+	return 0;
 }
 
 static void __exit mark_mt_exit(void)
 {
 	xt_unregister_match(&mark_mt_reg);
+	xt_unregister_target(&mark_tg_reg);
 }
 
 module_init(mark_mt_init);
-- 
cgit v1.2.2


From b8f00ba27e4acc4a2224360ce3aa8a22354434c5 Mon Sep 17 00:00:00 2001
From: Jan Engelhardt <jengelh@medozas.de>
Date: Fri, 26 Feb 2010 14:20:32 +0100
Subject: netfilter: xtables: merge xt_CONNMARK into xt_connmark

Signed-off-by: Jan Engelhardt <jengelh@medozas.de>
---
 net/netfilter/Kconfig       |  39 ++++++++-------
 net/netfilter/Makefile      |   3 +-
 net/netfilter/xt_CONNMARK.c | 113 --------------------------------------------
 net/netfilter/xt_connmark.c |  84 ++++++++++++++++++++++++++++++--
 4 files changed, 104 insertions(+), 135 deletions(-)
 delete mode 100644 net/netfilter/xt_CONNMARK.c

(limited to 'net')

diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig
index 236aa20ce5cc..8550dfde7804 100644
--- a/net/netfilter/Kconfig
+++ b/net/netfilter/Kconfig
@@ -331,6 +331,18 @@ config NETFILTER_XT_MARK
 	"Use netfilter MARK value as routing key") and can also be used by
 	other subsystems to change their behavior.
 
+config NETFILTER_XT_CONNMARK
+	tristate 'ctmark target and match support'
+	depends on NF_CONNTRACK
+	depends on NETFILTER_ADVANCED
+	select NF_CONNTRACK_MARK
+	---help---
+	This option adds the "CONNMARK" target and "connmark" match.
+
+	Netfilter allows you to store a mark value per connection (a.k.a.
+	ctmark), similarly to the packet mark (nfmark). Using this
+	target and match, you can set and match on this mark.
+
 # alphabetically ordered list of targets
 
 comment "Xtables targets"
@@ -351,15 +363,11 @@ config NETFILTER_XT_TARGET_CONNMARK
 	tristate  '"CONNMARK" target support'
 	depends on NF_CONNTRACK
 	depends on NETFILTER_ADVANCED
-	select NF_CONNTRACK_MARK
-	help
-	  This option adds a `CONNMARK' target, which allows one to manipulate
-	  the connection mark value.  Similar to the MARK target, but
-	  affects the connection mark value rather than the packet mark value.
-
-	  If you want to compile it as a module, say M here and read
-	  <file:Documentation/kbuild/modules.txt>.  The module will be called
-	  ipt_CONNMARK.  If unsure, say `N'.
+	select NETFILTER_XT_CONNMARK
+	---help---
+	This is a backwards-compat option for the user's convenience
+	(e.g. when running oldconfig). It selects
+	CONFIG_NETFILTER_XT_CONNMARK (combined connmark/CONNMARK module).
 
 config NETFILTER_XT_TARGET_CONNSECMARK
 	tristate '"CONNSECMARK" target support'
@@ -621,14 +629,11 @@ config NETFILTER_XT_MATCH_CONNMARK
 	tristate  '"connmark" connection mark match support'
 	depends on NF_CONNTRACK
 	depends on NETFILTER_ADVANCED
-	select NF_CONNTRACK_MARK
-	help
-	  This option adds a `connmark' match, which allows you to match the
-	  connection mark value previously set for the session by `CONNMARK'. 
-
-	  If you want to compile it as a module, say M here and read
-	  <file:Documentation/kbuild/modules.txt>.  The module will be called
-	  ipt_connmark.  If unsure, say `N'.
+	select NETFILTER_XT_CONNMARK
+	---help---
+	This is a backwards-compat option for the user's convenience
+	(e.g. when running oldconfig). It selects
+	CONFIG_NETFILTER_XT_CONNMARK (combined connmark/CONNMARK module).
 
 config NETFILTER_XT_MATCH_CONNTRACK
 	tristate '"conntrack" connection tracking match support'
diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile
index 19775cc30fe5..cd31afe0692a 100644
--- a/net/netfilter/Makefile
+++ b/net/netfilter/Makefile
@@ -42,10 +42,10 @@ obj-$(CONFIG_NETFILTER_XTABLES) += x_tables.o xt_tcpudp.o
 
 # combos
 obj-$(CONFIG_NETFILTER_XT_MARK) += xt_mark.o
+obj-$(CONFIG_NETFILTER_XT_CONNMARK) += xt_connmark.o
 
 # targets
 obj-$(CONFIG_NETFILTER_XT_TARGET_CLASSIFY) += xt_CLASSIFY.o
-obj-$(CONFIG_NETFILTER_XT_TARGET_CONNMARK) += xt_CONNMARK.o
 obj-$(CONFIG_NETFILTER_XT_TARGET_CONNSECMARK) += xt_CONNSECMARK.o
 obj-$(CONFIG_NETFILTER_XT_TARGET_CT) += xt_CT.o
 obj-$(CONFIG_NETFILTER_XT_TARGET_DSCP) += xt_DSCP.o
@@ -66,7 +66,6 @@ obj-$(CONFIG_NETFILTER_XT_MATCH_CLUSTER) += xt_cluster.o
 obj-$(CONFIG_NETFILTER_XT_MATCH_COMMENT) += xt_comment.o
 obj-$(CONFIG_NETFILTER_XT_MATCH_CONNBYTES) += xt_connbytes.o
 obj-$(CONFIG_NETFILTER_XT_MATCH_CONNLIMIT) += xt_connlimit.o
-obj-$(CONFIG_NETFILTER_XT_MATCH_CONNMARK) += xt_connmark.o
 obj-$(CONFIG_NETFILTER_XT_MATCH_CONNTRACK) += xt_conntrack.o
 obj-$(CONFIG_NETFILTER_XT_MATCH_DCCP) += xt_dccp.o
 obj-$(CONFIG_NETFILTER_XT_MATCH_DSCP) += xt_dscp.o
diff --git a/net/netfilter/xt_CONNMARK.c b/net/netfilter/xt_CONNMARK.c
deleted file mode 100644
index 593457068ae1..000000000000
--- a/net/netfilter/xt_CONNMARK.c
+++ /dev/null
@@ -1,113 +0,0 @@
-/*
- *	xt_CONNMARK - Netfilter module to modify the connection mark values
- *
- *	Copyright (C) 2002,2004 MARA Systems AB <http://www.marasystems.com>
- *	by Henrik Nordstrom <hno@marasystems.com>
- *	Copyright © CC Computer Consultants GmbH, 2007 - 2008
- *	Jan Engelhardt <jengelh@computergmbh.de>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
- */
-#include <linux/module.h>
-#include <linux/skbuff.h>
-#include <linux/ip.h>
-#include <net/checksum.h>
-
-MODULE_AUTHOR("Henrik Nordstrom <hno@marasystems.com>");
-MODULE_DESCRIPTION("Xtables: connection mark modification");
-MODULE_LICENSE("GPL");
-MODULE_ALIAS("ipt_CONNMARK");
-MODULE_ALIAS("ip6t_CONNMARK");
-
-#include <linux/netfilter/x_tables.h>
-#include <linux/netfilter/xt_CONNMARK.h>
-#include <net/netfilter/nf_conntrack_ecache.h>
-
-static unsigned int
-connmark_tg(struct sk_buff *skb, const struct xt_target_param *par)
-{
-	const struct xt_connmark_tginfo1 *info = par->targinfo;
-	enum ip_conntrack_info ctinfo;
-	struct nf_conn *ct;
-	u_int32_t newmark;
-
-	ct = nf_ct_get(skb, &ctinfo);
-	if (ct == NULL)
-		return XT_CONTINUE;
-
-	switch (info->mode) {
-	case XT_CONNMARK_SET:
-		newmark = (ct->mark & ~info->ctmask) ^ info->ctmark;
-		if (ct->mark != newmark) {
-			ct->mark = newmark;
-			nf_conntrack_event_cache(IPCT_MARK, ct);
-		}
-		break;
-	case XT_CONNMARK_SAVE:
-		newmark = (ct->mark & ~info->ctmask) ^
-		          (skb->mark & info->nfmask);
-		if (ct->mark != newmark) {
-			ct->mark = newmark;
-			nf_conntrack_event_cache(IPCT_MARK, ct);
-		}
-		break;
-	case XT_CONNMARK_RESTORE:
-		newmark = (skb->mark & ~info->nfmask) ^
-		          (ct->mark & info->ctmask);
-		skb->mark = newmark;
-		break;
-	}
-
-	return XT_CONTINUE;
-}
-
-static bool connmark_tg_check(const struct xt_tgchk_param *par)
-{
-	if (nf_ct_l3proto_try_module_get(par->family) < 0) {
-		printk(KERN_WARNING "cannot load conntrack support for "
-		       "proto=%u\n", par->family);
-		return false;
-	}
-	return true;
-}
-
-static void connmark_tg_destroy(const struct xt_tgdtor_param *par)
-{
-	nf_ct_l3proto_module_put(par->family);
-}
-
-static struct xt_target connmark_tg_reg __read_mostly = {
-	.name           = "CONNMARK",
-	.revision       = 1,
-	.family         = NFPROTO_UNSPEC,
-	.checkentry     = connmark_tg_check,
-	.target         = connmark_tg,
-	.targetsize     = sizeof(struct xt_connmark_tginfo1),
-	.destroy        = connmark_tg_destroy,
-	.me             = THIS_MODULE,
-};
-
-static int __init connmark_tg_init(void)
-{
-	return xt_register_target(&connmark_tg_reg);
-}
-
-static void __exit connmark_tg_exit(void)
-{
-	xt_unregister_target(&connmark_tg_reg);
-}
-
-module_init(connmark_tg_init);
-module_exit(connmark_tg_exit);
diff --git a/net/netfilter/xt_connmark.c b/net/netfilter/xt_connmark.c
index 122aa8b0147b..d184515604b6 100644
--- a/net/netfilter/xt_connmark.c
+++ b/net/netfilter/xt_connmark.c
@@ -1,5 +1,5 @@
 /*
- *	xt_connmark - Netfilter module to match connection mark values
+ *	xt_connmark - Netfilter module to operate on connection marks
  *
  *	Copyright (C) 2002,2004 MARA Systems AB <http://www.marasystems.com>
  *	by Henrik Nordstrom <hno@marasystems.com>
@@ -24,15 +24,71 @@
 #include <linux/module.h>
 #include <linux/skbuff.h>
 #include <net/netfilter/nf_conntrack.h>
+#include <net/netfilter/nf_conntrack_ecache.h>
 #include <linux/netfilter/x_tables.h>
 #include <linux/netfilter/xt_connmark.h>
 
 MODULE_AUTHOR("Henrik Nordstrom <hno@marasystems.com>");
-MODULE_DESCRIPTION("Xtables: connection mark match");
+MODULE_DESCRIPTION("Xtables: connection mark operations");
 MODULE_LICENSE("GPL");
+MODULE_ALIAS("ipt_CONNMARK");
+MODULE_ALIAS("ip6t_CONNMARK");
 MODULE_ALIAS("ipt_connmark");
 MODULE_ALIAS("ip6t_connmark");
 
+static unsigned int
+connmark_tg(struct sk_buff *skb, const struct xt_target_param *par)
+{
+	const struct xt_connmark_tginfo1 *info = par->targinfo;
+	enum ip_conntrack_info ctinfo;
+	struct nf_conn *ct;
+	u_int32_t newmark;
+
+	ct = nf_ct_get(skb, &ctinfo);
+	if (ct == NULL)
+		return XT_CONTINUE;
+
+	switch (info->mode) {
+	case XT_CONNMARK_SET:
+		newmark = (ct->mark & ~info->ctmask) ^ info->ctmark;
+		if (ct->mark != newmark) {
+			ct->mark = newmark;
+			nf_conntrack_event_cache(IPCT_MARK, ct);
+		}
+		break;
+	case XT_CONNMARK_SAVE:
+		newmark = (ct->mark & ~info->ctmask) ^
+		          (skb->mark & info->nfmask);
+		if (ct->mark != newmark) {
+			ct->mark = newmark;
+			nf_conntrack_event_cache(IPCT_MARK, ct);
+		}
+		break;
+	case XT_CONNMARK_RESTORE:
+		newmark = (skb->mark & ~info->nfmask) ^
+		          (ct->mark & info->ctmask);
+		skb->mark = newmark;
+		break;
+	}
+
+	return XT_CONTINUE;
+}
+
+static bool connmark_tg_check(const struct xt_tgchk_param *par)
+{
+	if (nf_ct_l3proto_try_module_get(par->family) < 0) {
+		printk(KERN_WARNING "cannot load conntrack support for "
+		       "proto=%u\n", par->family);
+		return false;
+	}
+	return true;
+}
+
+static void connmark_tg_destroy(const struct xt_tgdtor_param *par)
+{
+	nf_ct_l3proto_module_put(par->family);
+}
+
 static bool
 connmark_mt(const struct sk_buff *skb, const struct xt_match_param *par)
 {
@@ -62,6 +118,17 @@ static void connmark_mt_destroy(const struct xt_mtdtor_param *par)
 	nf_ct_l3proto_module_put(par->family);
 }
 
+static struct xt_target connmark_tg_reg __read_mostly = {
+	.name           = "CONNMARK",
+	.revision       = 1,
+	.family         = NFPROTO_UNSPEC,
+	.checkentry     = connmark_tg_check,
+	.target         = connmark_tg,
+	.targetsize     = sizeof(struct xt_connmark_tginfo1),
+	.destroy        = connmark_tg_destroy,
+	.me             = THIS_MODULE,
+};
+
 static struct xt_match connmark_mt_reg __read_mostly = {
 	.name           = "connmark",
 	.revision       = 1,
@@ -75,12 +142,23 @@ static struct xt_match connmark_mt_reg __read_mostly = {
 
 static int __init connmark_mt_init(void)
 {
-	return xt_register_match(&connmark_mt_reg);
+	int ret;
+
+	ret = xt_register_target(&connmark_tg_reg);
+	if (ret < 0)
+		return ret;
+	ret = xt_register_match(&connmark_mt_reg);
+	if (ret < 0) {
+		xt_unregister_target(&connmark_tg_reg);
+		return ret;
+	}
+	return 0;
 }
 
 static void __exit connmark_mt_exit(void)
 {
 	xt_unregister_match(&connmark_mt_reg);
+	xt_unregister_target(&connmark_tg_reg);
 }
 
 module_init(connmark_mt_init);
-- 
cgit v1.2.2


From 408ffaa4a11ddd6f730be520479fd5cd890c57d3 Mon Sep 17 00:00:00 2001
From: Jan Engelhardt <jengelh@medozas.de>
Date: Sun, 28 Feb 2010 23:19:52 +0100
Subject: netfilter: update my email address

Signed-off-by: Jan Engelhardt <jengelh@medozas.de>
---
 net/bridge/netfilter/ebt_ip6.c | 2 +-
 net/netfilter/xt_TCPOPTSTRIP.c | 3 +--
 net/netfilter/xt_connlimit.c   | 1 -
 net/netfilter/xt_connmark.c    | 2 +-
 net/netfilter/xt_hashlimit.c   | 2 +-
 net/netfilter/xt_recent.c      | 2 +-
 net/netfilter/xt_time.c        | 3 +--
 net/netfilter/xt_u32.c         | 3 +--
 8 files changed, 7 insertions(+), 11 deletions(-)

(limited to 'net')

diff --git a/net/bridge/netfilter/ebt_ip6.c b/net/bridge/netfilter/ebt_ip6.c
index bbf2534ef026..4cb60f1ae7ad 100644
--- a/net/bridge/netfilter/ebt_ip6.c
+++ b/net/bridge/netfilter/ebt_ip6.c
@@ -4,7 +4,7 @@
  *	Authors:
  *	Manohar Castelino <manohar.r.castelino@intel.com>
  *	Kuo-Lang Tseng <kuo-lang.tseng@intel.com>
- *	Jan Engelhardt <jengelh@computergmbh.de>
+ *	Jan Engelhardt <jengelh@medozas.de>
  *
  * Summary:
  * This is just a modification of the IPv4 code written by
diff --git a/net/netfilter/xt_TCPOPTSTRIP.c b/net/netfilter/xt_TCPOPTSTRIP.c
index 9dd8c8ef63eb..e8b57609ddc0 100644
--- a/net/netfilter/xt_TCPOPTSTRIP.c
+++ b/net/netfilter/xt_TCPOPTSTRIP.c
@@ -3,7 +3,6 @@
  *
  * Copyright (C) 2007 Sven Schnelle <svens@bitebene.org>
  * Copyright © CC Computer Consultants GmbH, 2007
- * Contact: Jan Engelhardt <jengelh@computergmbh.de>
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 as
@@ -136,7 +135,7 @@ static void __exit tcpoptstrip_tg_exit(void)
 
 module_init(tcpoptstrip_tg_init);
 module_exit(tcpoptstrip_tg_exit);
-MODULE_AUTHOR("Sven Schnelle <svens@bitebene.org>, Jan Engelhardt <jengelh@computergmbh.de>");
+MODULE_AUTHOR("Sven Schnelle <svens@bitebene.org>, Jan Engelhardt <jengelh@medozas.de>");
 MODULE_DESCRIPTION("Xtables: TCP option stripping");
 MODULE_LICENSE("GPL");
 MODULE_ALIAS("ipt_TCPOPTSTRIP");
diff --git a/net/netfilter/xt_connlimit.c b/net/netfilter/xt_connlimit.c
index 26997ce90e48..9e624af40f80 100644
--- a/net/netfilter/xt_connlimit.c
+++ b/net/netfilter/xt_connlimit.c
@@ -5,7 +5,6 @@
  *   Nov 2002: Martin Bene <martin.bene@icomedias.com>:
  *		only ignore TIME_WAIT or gone connections
  *   (C) CC Computer Consultants GmbH, 2007
- *   Contact: <jengelh@computergmbh.de>
  *
  * based on ...
  *
diff --git a/net/netfilter/xt_connmark.c b/net/netfilter/xt_connmark.c
index d184515604b6..97465a472344 100644
--- a/net/netfilter/xt_connmark.c
+++ b/net/netfilter/xt_connmark.c
@@ -4,7 +4,7 @@
  *	Copyright (C) 2002,2004 MARA Systems AB <http://www.marasystems.com>
  *	by Henrik Nordstrom <hno@marasystems.com>
  *	Copyright © CC Computer Consultants GmbH, 2007 - 2008
- *	Jan Engelhardt <jengelh@computergmbh.de>
+ *	Jan Engelhardt <jengelh@medozas.de>
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License as published by
diff --git a/net/netfilter/xt_hashlimit.c b/net/netfilter/xt_hashlimit.c
index 9e9c48963942..ba9601a767aa 100644
--- a/net/netfilter/xt_hashlimit.c
+++ b/net/netfilter/xt_hashlimit.c
@@ -36,7 +36,7 @@
 
 MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Harald Welte <laforge@netfilter.org>");
-MODULE_AUTHOR("Jan Engelhardt <jengelh@computergmbh.de>");
+MODULE_AUTHOR("Jan Engelhardt <jengelh@medozas.de>");
 MODULE_DESCRIPTION("Xtables: per hash-bucket rate-limit match");
 MODULE_ALIAS("ipt_hashlimit");
 MODULE_ALIAS("ip6t_hashlimit");
diff --git a/net/netfilter/xt_recent.c b/net/netfilter/xt_recent.c
index 7073dbb8100c..1af74dd563d5 100644
--- a/net/netfilter/xt_recent.c
+++ b/net/netfilter/xt_recent.c
@@ -34,7 +34,7 @@
 #include <linux/netfilter/xt_recent.h>
 
 MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
-MODULE_AUTHOR("Jan Engelhardt <jengelh@computergmbh.de>");
+MODULE_AUTHOR("Jan Engelhardt <jengelh@medozas.de>");
 MODULE_DESCRIPTION("Xtables: \"recently-seen\" host matching for IPv4");
 MODULE_LICENSE("GPL");
 MODULE_ALIAS("ipt_recent");
diff --git a/net/netfilter/xt_time.c b/net/netfilter/xt_time.c
index 93acaa59d108..9a9c9a3b0a5d 100644
--- a/net/netfilter/xt_time.c
+++ b/net/netfilter/xt_time.c
@@ -1,7 +1,6 @@
 /*
  *	xt_time
  *	Copyright © CC Computer Consultants GmbH, 2007
- *	Contact: <jengelh@computergmbh.de>
  *
  *	based on ipt_time by Fabrice MARIE <fabrice@netfilter.org>
  *	This is a module which is used for time matching
@@ -264,7 +263,7 @@ static void __exit time_mt_exit(void)
 
 module_init(time_mt_init);
 module_exit(time_mt_exit);
-MODULE_AUTHOR("Jan Engelhardt <jengelh@computergmbh.de>");
+MODULE_AUTHOR("Jan Engelhardt <jengelh@medozas.de>");
 MODULE_DESCRIPTION("Xtables: time-based matching");
 MODULE_LICENSE("GPL");
 MODULE_ALIAS("ipt_time");
diff --git a/net/netfilter/xt_u32.c b/net/netfilter/xt_u32.c
index 24a527624500..d7c05f03a7e7 100644
--- a/net/netfilter/xt_u32.c
+++ b/net/netfilter/xt_u32.c
@@ -3,7 +3,6 @@
  *
  *	Original author: Don Cohen <don@isis.cs3-inc.com>
  *	(C) CC Computer Consultants GmbH, 2007
- *	Contact: <jengelh@computergmbh.de>
  */
 
 #include <linux/module.h>
@@ -117,7 +116,7 @@ static void __exit u32_mt_exit(void)
 
 module_init(u32_mt_init);
 module_exit(u32_mt_exit);
-MODULE_AUTHOR("Jan Engelhardt <jengelh@computergmbh.de>");
+MODULE_AUTHOR("Jan Engelhardt <jengelh@medozas.de>");
 MODULE_DESCRIPTION("Xtables: arbitrary byte matching");
 MODULE_LICENSE("GPL");
 MODULE_ALIAS("ipt_u32");
-- 
cgit v1.2.2


From 8244f4baf5e1793a75a1d4409efc3e2d9d71c9cb Mon Sep 17 00:00:00 2001
From: Jan Engelhardt <jengelh@medozas.de>
Date: Sun, 28 Feb 2010 23:22:04 +0100
Subject: netfilter: ebt_ip6: add principal maintainer in a MODULE_AUTHOR tag

Cc: Kuo-Lang Tseng <kuo-lang.tseng@intel.com>
Signed-off-by: Jan Engelhardt <jengelh@medozas.de>
---
 net/bridge/netfilter/ebt_ip6.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'net')

diff --git a/net/bridge/netfilter/ebt_ip6.c b/net/bridge/netfilter/ebt_ip6.c
index 4cb60f1ae7ad..05d0d0c4ccb9 100644
--- a/net/bridge/netfilter/ebt_ip6.c
+++ b/net/bridge/netfilter/ebt_ip6.c
@@ -139,4 +139,5 @@ static void __exit ebt_ip6_fini(void)
 module_init(ebt_ip6_init);
 module_exit(ebt_ip6_fini);
 MODULE_DESCRIPTION("Ebtables: IPv6 protocol packet match");
+MODULE_AUTHOR("Kuo-Lang Tseng <kuo-lang.tseng@intel.com>");
 MODULE_LICENSE("GPL");
-- 
cgit v1.2.2


From 06bf514e3199150c715ab72e8e0e335ae8f99666 Mon Sep 17 00:00:00 2001
From: Jan Engelhardt <jengelh@medozas.de>
Date: Sun, 28 Feb 2010 23:22:35 +0100
Subject: netfilter: xt_recent: update description

It had IPv6 for quite a while already :-)

Signed-off-by: Jan Engelhardt <jengelh@medozas.de>
---
 net/netfilter/xt_recent.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/netfilter/xt_recent.c b/net/netfilter/xt_recent.c
index 1af74dd563d5..bcabfbc1cd05 100644
--- a/net/netfilter/xt_recent.c
+++ b/net/netfilter/xt_recent.c
@@ -35,7 +35,7 @@
 
 MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
 MODULE_AUTHOR("Jan Engelhardt <jengelh@medozas.de>");
-MODULE_DESCRIPTION("Xtables: \"recently-seen\" host matching for IPv4");
+MODULE_DESCRIPTION("Xtables: \"recently-seen\" host matching");
 MODULE_LICENSE("GPL");
 MODULE_ALIAS("ipt_recent");
 MODULE_ALIAS("ip6t_recent");
-- 
cgit v1.2.2


From 5be4a4f589841ef06fd79b241de3d5353a6c5efa Mon Sep 17 00:00:00 2001
From: Jan Engelhardt <jengelh@medozas.de>
Date: Mon, 1 Mar 2010 11:55:33 +0100
Subject: netfilter: xt_recent: remove old proc directory

The compat option was introduced in October 2008.

Signed-off-by: Jan Engelhardt <jengelh@medozas.de>
---
 net/netfilter/Kconfig     |   7 ----
 net/netfilter/xt_recent.c | 105 ----------------------------------------------
 2 files changed, 112 deletions(-)

(limited to 'net')

diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig
index 8550dfde7804..6ac28ef9300c 100644
--- a/net/netfilter/Kconfig
+++ b/net/netfilter/Kconfig
@@ -859,13 +859,6 @@ config NETFILTER_XT_MATCH_RECENT
 	Short options are available by using 'iptables -m recent -h'
 	Official Website: <http://snowman.net/projects/ipt_recent/>
 
-config NETFILTER_XT_MATCH_RECENT_PROC_COMPAT
-	bool 'Enable obsolete /proc/net/ipt_recent'
-	depends on NETFILTER_XT_MATCH_RECENT && PROC_FS
-	---help---
-	This option enables the old /proc/net/ipt_recent interface,
-	which has been obsoleted by /proc/net/xt_recent.
-
 config NETFILTER_XT_MATCH_SCTP
 	tristate  '"sctp" protocol match support (EXPERIMENTAL)'
 	depends on EXPERIMENTAL
diff --git a/net/netfilter/xt_recent.c b/net/netfilter/xt_recent.c
index bcabfbc1cd05..2ff8aae84a38 100644
--- a/net/netfilter/xt_recent.c
+++ b/net/netfilter/xt_recent.c
@@ -83,9 +83,6 @@ struct recent_net {
 	struct list_head	tables;
 #ifdef CONFIG_PROC_FS
 	struct proc_dir_entry	*xt_recent;
-#ifdef CONFIG_NETFILTER_XT_MATCH_RECENT_PROC_COMPAT
-	struct proc_dir_entry	*ipt_recent;
-#endif
 #endif
 };
 
@@ -342,17 +339,6 @@ static bool recent_mt_check(const struct xt_mtchk_param *par)
 	}
 	pde->uid = ip_list_uid;
 	pde->gid = ip_list_gid;
-#ifdef CONFIG_NETFILTER_XT_MATCH_RECENT_PROC_COMPAT
-	pde = proc_create_data(t->name, ip_list_perms, recent_net->ipt_recent,
-		      &recent_old_fops, t);
-	if (pde == NULL) {
-		remove_proc_entry(t->name, recent_net->xt_recent);
-		kfree(t);
-		goto out;
-	}
-	pde->uid = ip_list_uid;
-	pde->gid = ip_list_gid;
-#endif
 #endif
 	spin_lock_bh(&recent_lock);
 	list_add_tail(&t->list, &recent_net->tables);
@@ -376,9 +362,6 @@ static void recent_mt_destroy(const struct xt_mtdtor_param *par)
 		list_del(&t->list);
 		spin_unlock_bh(&recent_lock);
 #ifdef CONFIG_PROC_FS
-#ifdef CONFIG_NETFILTER_XT_MATCH_RECENT_PROC_COMPAT
-		remove_proc_entry(t->name, recent_net->ipt_recent);
-#endif
 		remove_proc_entry(t->name, recent_net->xt_recent);
 #endif
 		recent_table_flush(t);
@@ -470,84 +453,6 @@ static int recent_seq_open(struct inode *inode, struct file *file)
 	return 0;
 }
 
-#ifdef CONFIG_NETFILTER_XT_MATCH_RECENT_PROC_COMPAT
-static int recent_old_seq_open(struct inode *inode, struct file *filp)
-{
-	static bool warned_of_old;
-
-	if (unlikely(!warned_of_old)) {
-		printk(KERN_INFO KBUILD_MODNAME ": Use of /proc/net/ipt_recent"
-		       " is deprecated; use /proc/net/xt_recent.\n");
-		warned_of_old = true;
-	}
-	return recent_seq_open(inode, filp);
-}
-
-static ssize_t recent_old_proc_write(struct file *file,
-				     const char __user *input,
-				     size_t size, loff_t *loff)
-{
-	const struct proc_dir_entry *pde = PDE(file->f_path.dentry->d_inode);
-	struct recent_table *t = pde->data;
-	struct recent_entry *e;
-	char buf[sizeof("+255.255.255.255")], *c = buf;
-	union nf_inet_addr addr = {};
-	int add;
-
-	if (size > sizeof(buf))
-		size = sizeof(buf);
-	if (copy_from_user(buf, input, size))
-		return -EFAULT;
-
-	c = skip_spaces(c);
-
-	if (size - (c - buf) < 5)
-		return c - buf;
-	if (!strncmp(c, "clear", 5)) {
-		c += 5;
-		spin_lock_bh(&recent_lock);
-		recent_table_flush(t);
-		spin_unlock_bh(&recent_lock);
-		return c - buf;
-	}
-
-	switch (*c) {
-	case '-':
-		add = 0;
-		c++;
-		break;
-	case '+':
-		c++;
-	default:
-		add = 1;
-		break;
-	}
-	addr.ip = in_aton(c);
-
-	spin_lock_bh(&recent_lock);
-	e = recent_entry_lookup(t, &addr, NFPROTO_IPV4, 0);
-	if (e == NULL) {
-		if (add)
-			recent_entry_init(t, &addr, NFPROTO_IPV4, 0);
-	} else {
-		if (add)
-			recent_entry_update(t, e);
-		else
-			recent_entry_remove(t, e);
-	}
-	spin_unlock_bh(&recent_lock);
-	return size;
-}
-
-static const struct file_operations recent_old_fops = {
-	.open		= recent_old_seq_open,
-	.read		= seq_read,
-	.write		= recent_old_proc_write,
-	.release	= seq_release_private,
-	.owner		= THIS_MODULE,
-};
-#endif
-
 static ssize_t
 recent_mt_proc_write(struct file *file, const char __user *input,
 		     size_t size, loff_t *loff)
@@ -636,21 +541,11 @@ static int __net_init recent_proc_net_init(struct net *net)
 	recent_net->xt_recent = proc_mkdir("xt_recent", net->proc_net);
 	if (!recent_net->xt_recent)
 		return -ENOMEM;
-#ifdef CONFIG_NETFILTER_XT_MATCH_RECENT_PROC_COMPAT
-	recent_net->ipt_recent = proc_mkdir("ipt_recent", net->proc_net);
-	if (!recent_net->ipt_recent) {
-		proc_net_remove(net, "xt_recent");
-		return -ENOMEM;
-	}
-#endif
 	return 0;
 }
 
 static void __net_exit recent_proc_net_exit(struct net *net)
 {
-#ifdef CONFIG_NETFILTER_XT_MATCH_RECENT_PROC_COMPAT
-	proc_net_remove(net, "ipt_recent");
-#endif
 	proc_net_remove(net, "xt_recent");
 }
 #else
-- 
cgit v1.2.2


From 0079c5aee34880bcee7feee9960f0502c73dc5fa Mon Sep 17 00:00:00 2001
From: Tim Gardner <tim.gardner@canonical.com>
Date: Tue, 16 Mar 2010 19:53:13 +0100
Subject: netfilter: xt_recent: add an entry reaper

One of the problems with the way xt_recent is implemented is that
there is no efficient way to remove expired entries. Of course,
one can write a rule '-m recent --remove', but you have to know
beforehand which entry to delete. This commit adds reaper
logic which checks the head of the LRU list when a rule
is invoked that has a '--seconds' value and XT_RECENT_REAP set. If an
entry ceases to accumulate time stamps, then it will eventually bubble
to the top of the LRU list where it is then reaped.

Signed-off-by: Tim Gardner <tim.gardner@canonical.com>
Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 net/netfilter/xt_recent.c | 28 +++++++++++++++++++++++++++-
 1 file changed, 27 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/netfilter/xt_recent.c b/net/netfilter/xt_recent.c
index 2ff8aae84a38..b65eca9e13a3 100644
--- a/net/netfilter/xt_recent.c
+++ b/net/netfilter/xt_recent.c
@@ -143,6 +143,25 @@ static void recent_entry_remove(struct recent_table *t, struct recent_entry *e)
 	t->entries--;
 }
 
+/*
+ * Drop entries with timestamps older then 'time'.
+ */
+static void recent_entry_reap(struct recent_table *t, unsigned long time)
+{
+	struct recent_entry *e;
+
+	/*
+	 * The head of the LRU list is always the oldest entry.
+	 */
+	e = list_entry(t->lru_list.next, struct recent_entry, lru_list);
+
+	/*
+	 * The last time stamp is the most recent.
+	 */
+	if (time_after(time, e->stamps[e->index-1]))
+		recent_entry_remove(t, e);
+}
+
 static struct recent_entry *
 recent_entry_init(struct recent_table *t, const union nf_inet_addr *addr,
 		  u_int16_t family, u_int8_t ttl)
@@ -269,6 +288,10 @@ recent_mt(const struct sk_buff *skb, const struct xt_match_param *par)
 				break;
 			}
 		}
+
+		/* info->seconds must be non-zero */
+		if (info->check_set & XT_RECENT_REAP)
+			recent_entry_reap(t, time);
 	}
 
 	if (info->check_set & XT_RECENT_SET ||
@@ -301,7 +324,10 @@ static bool recent_mt_check(const struct xt_mtchk_param *par)
 		      XT_RECENT_CHECK | XT_RECENT_UPDATE)) != 1)
 		return false;
 	if ((info->check_set & (XT_RECENT_SET | XT_RECENT_REMOVE)) &&
-	    (info->seconds || info->hit_count))
+	    (info->seconds || info->hit_count ||
+	    (info->check_set & XT_RECENT_MODIFIERS)))
+		return false;
+	if ((info->check_set & XT_RECENT_REAP) && !info->seconds)
 		return false;
 	if (info->hit_count > ip_pkt_list_tot) {
 		pr_info(KBUILD_MODNAME ": hitcount (%u) is larger than "
-- 
cgit v1.2.2


From 606a9a02633c02d0e09fc96706f041053dbc57ee Mon Sep 17 00:00:00 2001
From: Tim Gardner <tim.gardner@canonical.com>
Date: Wed, 17 Mar 2010 16:18:56 +0100
Subject: netfilter: xt_recent: check for unsupported user space flags

Signed-off-by: Tim Gardner <tim.gardner@canonical.com>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 net/netfilter/xt_recent.c | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'net')

diff --git a/net/netfilter/xt_recent.c b/net/netfilter/xt_recent.c
index b65eca9e13a3..d2e7c80cd3c3 100644
--- a/net/netfilter/xt_recent.c
+++ b/net/netfilter/xt_recent.c
@@ -319,6 +319,11 @@ static bool recent_mt_check(const struct xt_mtchk_param *par)
 		get_random_bytes(&hash_rnd, sizeof(hash_rnd));
 		hash_rnd_inited = true;
 	}
+	if (info->check_set & ~XT_RECENT_VALID_FLAGS) {
+		pr_info(KBUILD_MODNAME ": Unsupported user space flags "
+			"(%08x)\n", info->check_set);
+		return false;
+	}
 	if (hweight8(info->check_set &
 		     (XT_RECENT_SET | XT_RECENT_REMOVE |
 		      XT_RECENT_CHECK | XT_RECENT_UPDATE)) != 1)
-- 
cgit v1.2.2


From c01ae818a3e3578747861d9a0c7fabe9641786c1 Mon Sep 17 00:00:00 2001
From: Zhitong Wang <zhitong.wangzt@alibaba-inc.com>
Date: Wed, 17 Mar 2010 16:28:25 +0100
Subject: netfilter: remove unused headers in net/netfilter/nfnetlink.c

Remove unused headers in net/netfilter/nfnetlink.c

Signed-off-by: Zhitong Wang <zhitong.wangzt@alibaba-inc.com>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 net/netfilter/nfnetlink.c | 3 ---
 1 file changed, 3 deletions(-)

(limited to 'net')

diff --git a/net/netfilter/nfnetlink.c b/net/netfilter/nfnetlink.c
index 8eb0cc23ada3..0794f9a106ee 100644
--- a/net/netfilter/nfnetlink.c
+++ b/net/netfilter/nfnetlink.c
@@ -18,12 +18,9 @@
 #include <linux/types.h>
 #include <linux/socket.h>
 #include <linux/kernel.h>
-#include <linux/major.h>
-#include <linux/timer.h>
 #include <linux/string.h>
 #include <linux/sockios.h>
 #include <linux/net.h>
-#include <linux/fcntl.h>
 #include <linux/skbuff.h>
 #include <asm/uaccess.h>
 #include <asm/system.h>
-- 
cgit v1.2.2


From 7d5f7ed802507999c22949b84686d03987fe57ec Mon Sep 17 00:00:00 2001
From: Jan Engelhardt <jengelh@medozas.de>
Date: Tue, 9 Mar 2010 23:27:24 +0100
Subject: netfilter: xtables: do without explicit XT_ALIGN

XT_ALIGN is already applied on matchsize/targetsize in x_tables.c,
so it is not strictly needed in the extensions.

Signed-off-by: Jan Engelhardt <jengelh@medozas.de>
---
 net/netfilter/xt_CT.c  | 2 +-
 net/netfilter/xt_LED.c | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/netfilter/xt_CT.c b/net/netfilter/xt_CT.c
index fda603edb31a..6509e03f1e62 100644
--- a/net/netfilter/xt_CT.c
+++ b/net/netfilter/xt_CT.c
@@ -137,7 +137,7 @@ static void xt_ct_tg_destroy(const struct xt_tgdtor_param *par)
 static struct xt_target xt_ct_tg __read_mostly = {
 	.name		= "CT",
 	.family		= NFPROTO_UNSPEC,
-	.targetsize	= XT_ALIGN(sizeof(struct xt_ct_target_info)),
+	.targetsize	= sizeof(struct xt_ct_target_info),
 	.checkentry	= xt_ct_tg_check,
 	.destroy	= xt_ct_tg_destroy,
 	.target		= xt_ct_target,
diff --git a/net/netfilter/xt_LED.c b/net/netfilter/xt_LED.c
index 8ff7843bb921..f86dc52367b1 100644
--- a/net/netfilter/xt_LED.c
+++ b/net/netfilter/xt_LED.c
@@ -141,7 +141,7 @@ static struct xt_target led_tg_reg __read_mostly = {
 	.revision	= 0,
 	.family		= NFPROTO_UNSPEC,
 	.target		= led_tg,
-	.targetsize	= XT_ALIGN(sizeof(struct xt_led_info)),
+	.targetsize	= sizeof(struct xt_led_info),
 	.checkentry	= led_tg_check,
 	.destroy	= led_tg_destroy,
 	.me		= THIS_MODULE,
-- 
cgit v1.2.2


From 1d1c397db95f1c0cd95f6fa633c1e68acfaacec3 Mon Sep 17 00:00:00 2001
From: Jan Engelhardt <jengelh@medozas.de>
Date: Tue, 16 Mar 2010 21:09:04 +0100
Subject: netfilter: xtables: clean up xt_mac match routine

Signed-off-by: Jan Engelhardt <jengelh@medozas.de>
---
 net/netfilter/xt_mac.c | 18 ++++++++++--------
 1 file changed, 10 insertions(+), 8 deletions(-)

(limited to 'net')

diff --git a/net/netfilter/xt_mac.c b/net/netfilter/xt_mac.c
index c2007116ce5b..2039d0741b37 100644
--- a/net/netfilter/xt_mac.c
+++ b/net/netfilter/xt_mac.c
@@ -26,14 +26,16 @@ MODULE_ALIAS("ip6t_mac");
 
 static bool mac_mt(const struct sk_buff *skb, const struct xt_match_param *par)
 {
-    const struct xt_mac_info *info = par->matchinfo;
-
-    /* Is mac pointer valid? */
-    return skb_mac_header(skb) >= skb->head &&
-	   skb_mac_header(skb) + ETH_HLEN <= skb->data
-	   /* If so, compare... */
-	   && ((!compare_ether_addr(eth_hdr(skb)->h_source, info->srcaddr))
-		^ info->invert);
+	const struct xt_mac_info *info = par->matchinfo;
+	bool ret;
+
+	if (skb_mac_header(skb) < skb->head)
+		return false;
+	if (skb_mac_header(skb) + ETH_HLEN > skb->data)
+		return false;
+	ret  = compare_ether_addr(eth_hdr(skb)->h_source, info->srcaddr) == 0;
+	ret ^= info->invert;
+	return ret;
 }
 
 static struct xt_match mac_mt_reg __read_mostly = {
-- 
cgit v1.2.2


From e5042a290097b7c2fc9b6a247585b6ed07b13026 Mon Sep 17 00:00:00 2001
From: Jan Engelhardt <jengelh@medozas.de>
Date: Tue, 16 Mar 2010 21:44:44 +0100
Subject: netfilter: xtables: limit xt_mac to ethernet devices

I do not see a point of allowing the MAC module to work with devices
that don't possibly have one, e.g. various tunnel interfaces such as
tun and sit.

Signed-off-by: Jan Engelhardt <jengelh@medozas.de>
---
 net/netfilter/xt_mac.c | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'net')

diff --git a/net/netfilter/xt_mac.c b/net/netfilter/xt_mac.c
index 2039d0741b37..b971ce93773e 100644
--- a/net/netfilter/xt_mac.c
+++ b/net/netfilter/xt_mac.c
@@ -10,6 +10,7 @@
 
 #include <linux/module.h>
 #include <linux/skbuff.h>
+#include <linux/if_arp.h>
 #include <linux/if_ether.h>
 #include <linux/etherdevice.h>
 
@@ -29,6 +30,8 @@ static bool mac_mt(const struct sk_buff *skb, const struct xt_match_param *par)
 	const struct xt_mac_info *info = par->matchinfo;
 	bool ret;
 
+	if (skb->dev == NULL || skb->dev->type != ARPHRD_ETHER)
+		return false;
 	if (skb_mac_header(skb) < skb->head)
 		return false;
 	if (skb_mac_header(skb) + ETH_HLEN > skb->data)
-- 
cgit v1.2.2


From 115bc8f2874078e6ac78d88652a91b58447d5f4d Mon Sep 17 00:00:00 2001
From: Jan Engelhardt <jengelh@medozas.de>
Date: Tue, 16 Mar 2010 20:06:55 +0100
Subject: netfilter: xtables: resort osf kconfig text

Restore alphabetical ordering of the list and put the xt_osf option
into its 'right' place again.

Signed-off-by: Jan Engelhardt <jengelh@medozas.de>
---
 net/netfilter/Kconfig | 26 +++++++++++++-------------
 1 file changed, 13 insertions(+), 13 deletions(-)

(limited to 'net')

diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig
index 6ac28ef9300c..8055786b7702 100644
--- a/net/netfilter/Kconfig
+++ b/net/netfilter/Kconfig
@@ -774,6 +774,19 @@ config NETFILTER_XT_MATCH_MULTIPORT
 
 	  To compile it as a module, choose M here.  If unsure, say N.
 
+config NETFILTER_XT_MATCH_OSF
+	tristate '"osf" Passive OS fingerprint match'
+	depends on NETFILTER_ADVANCED && NETFILTER_NETLINK
+	help
+	  This option selects the Passive OS Fingerprinting match module
+	  that allows to passively match the remote operating system by
+	  analyzing incoming TCP SYN packets.
+
+	  Rules and loading software can be downloaded from
+	  http://www.ioremap.net/projects/osf
+
+	  To compile it as a module, choose M here.  If unsure, say N.
+
 config NETFILTER_XT_MATCH_OWNER
 	tristate '"owner" match support'
 	depends on NETFILTER_ADVANCED
@@ -958,19 +971,6 @@ config NETFILTER_XT_MATCH_U32
 
 	  Details and examples are in the kernel module source.
 
-config NETFILTER_XT_MATCH_OSF
-	tristate '"osf" Passive OS fingerprint match'
-	depends on NETFILTER_ADVANCED && NETFILTER_NETLINK
-	help
-	  This option selects the Passive OS Fingerprinting match module
-	  that allows to passively match the remote operating system by
-	  analyzing incoming TCP SYN packets.
-
-	  Rules and loading software can be downloaded from
-	  http://www.ioremap.net/projects/osf
-
-	  To compile it as a module, choose M here.  If unsure, say N.
-
 endif # NETFILTER_XTABLES
 
 endmenu
-- 
cgit v1.2.2


From aa5fa3185791aac71c9172d4fda3e8729164b5d1 Mon Sep 17 00:00:00 2001
From: Jan Engelhardt <jengelh@medozas.de>
Date: Thu, 18 Mar 2010 00:44:52 +0100
Subject: netfilter: xtables: make use of caller family rather than match
 family

The matches can have .family = NFPROTO_UNSPEC, and though that is not
the case for the touched modules, it seems better to just use the
nfproto from the caller.

Signed-off-by: Jan Engelhardt <jengelh@medozas.de>
---
 net/netfilter/xt_dscp.c      |  2 +-
 net/netfilter/xt_hashlimit.c | 10 +++++-----
 net/netfilter/xt_policy.c    |  4 ++--
 net/netfilter/xt_recent.c    |  6 +++---
 net/netfilter/xt_state.c     |  6 +++---
 5 files changed, 14 insertions(+), 14 deletions(-)

(limited to 'net')

diff --git a/net/netfilter/xt_dscp.c b/net/netfilter/xt_dscp.c
index 0280d3a8c161..fe58cd01ef99 100644
--- a/net/netfilter/xt_dscp.c
+++ b/net/netfilter/xt_dscp.c
@@ -58,7 +58,7 @@ static bool tos_mt(const struct sk_buff *skb, const struct xt_match_param *par)
 {
 	const struct xt_tos_match_info *info = par->matchinfo;
 
-	if (par->match->family == NFPROTO_IPV4)
+	if (par->family == NFPROTO_IPV4)
 		return ((ip_hdr(skb)->tos & info->tos_mask) ==
 		       info->tos_value) ^ !!info->invert;
 	else
diff --git a/net/netfilter/xt_hashlimit.c b/net/netfilter/xt_hashlimit.c
index ba9601a767aa..1fdb50a90f1c 100644
--- a/net/netfilter/xt_hashlimit.c
+++ b/net/netfilter/xt_hashlimit.c
@@ -703,8 +703,8 @@ static bool hashlimit_mt_check_v0(const struct xt_mtchk_param *par)
 		return false;
 
 	mutex_lock(&hashlimit_mutex);
-	r->hinfo = htable_find_get(net, r->name, par->match->family);
-	if (!r->hinfo && htable_create_v0(net, r, par->match->family) != 0) {
+	r->hinfo = htable_find_get(net, r->name, par->family);
+	if (!r->hinfo && htable_create_v0(net, r, par->family) != 0) {
 		mutex_unlock(&hashlimit_mutex);
 		return false;
 	}
@@ -730,7 +730,7 @@ static bool hashlimit_mt_check(const struct xt_mtchk_param *par)
 		return false;
 	if (info->name[sizeof(info->name)-1] != '\0')
 		return false;
-	if (par->match->family == NFPROTO_IPV4) {
+	if (par->family == NFPROTO_IPV4) {
 		if (info->cfg.srcmask > 32 || info->cfg.dstmask > 32)
 			return false;
 	} else {
@@ -739,8 +739,8 @@ static bool hashlimit_mt_check(const struct xt_mtchk_param *par)
 	}
 
 	mutex_lock(&hashlimit_mutex);
-	info->hinfo = htable_find_get(net, info->name, par->match->family);
-	if (!info->hinfo && htable_create(net, info, par->match->family) != 0) {
+	info->hinfo = htable_find_get(net, info->name, par->family);
+	if (!info->hinfo && htable_create(net, info, par->family) != 0) {
 		mutex_unlock(&hashlimit_mutex);
 		return false;
 	}
diff --git a/net/netfilter/xt_policy.c b/net/netfilter/xt_policy.c
index 4cbfebda8fa1..cc033d2c35ea 100644
--- a/net/netfilter/xt_policy.c
+++ b/net/netfilter/xt_policy.c
@@ -116,9 +116,9 @@ policy_mt(const struct sk_buff *skb, const struct xt_match_param *par)
 	int ret;
 
 	if (info->flags & XT_POLICY_MATCH_IN)
-		ret = match_policy_in(skb, info, par->match->family);
+		ret = match_policy_in(skb, info, par->family);
 	else
-		ret = match_policy_out(skb, info, par->match->family);
+		ret = match_policy_out(skb, info, par->family);
 
 	if (ret < 0)
 		ret = info->flags & XT_POLICY_MATCH_NONE ? true : false;
diff --git a/net/netfilter/xt_recent.c b/net/netfilter/xt_recent.c
index d2e7c80cd3c3..a0ea1a21c470 100644
--- a/net/netfilter/xt_recent.c
+++ b/net/netfilter/xt_recent.c
@@ -233,7 +233,7 @@ recent_mt(const struct sk_buff *skb, const struct xt_match_param *par)
 	u_int8_t ttl;
 	bool ret = info->invert;
 
-	if (par->match->family == NFPROTO_IPV4) {
+	if (par->family == NFPROTO_IPV4) {
 		const struct iphdr *iph = ip_hdr(skb);
 
 		if (info->side == XT_RECENT_DEST)
@@ -259,12 +259,12 @@ recent_mt(const struct sk_buff *skb, const struct xt_match_param *par)
 
 	spin_lock_bh(&recent_lock);
 	t = recent_table_lookup(recent_net, info->name);
-	e = recent_entry_lookup(t, &addr, par->match->family,
+	e = recent_entry_lookup(t, &addr, par->family,
 				(info->check_set & XT_RECENT_TTL) ? ttl : 0);
 	if (e == NULL) {
 		if (!(info->check_set & XT_RECENT_SET))
 			goto out;
-		e = recent_entry_init(t, &addr, par->match->family, ttl);
+		e = recent_entry_init(t, &addr, par->family, ttl);
 		if (e == NULL)
 			*par->hotdrop = true;
 		ret = !ret;
diff --git a/net/netfilter/xt_state.c b/net/netfilter/xt_state.c
index 4c946cbd731f..ef09b2e2970f 100644
--- a/net/netfilter/xt_state.c
+++ b/net/netfilter/xt_state.c
@@ -39,9 +39,9 @@ state_mt(const struct sk_buff *skb, const struct xt_match_param *par)
 
 static bool state_mt_check(const struct xt_mtchk_param *par)
 {
-	if (nf_ct_l3proto_try_module_get(par->match->family) < 0) {
+	if (nf_ct_l3proto_try_module_get(par->family) < 0) {
 		printk(KERN_WARNING "can't load conntrack support for "
-				    "proto=%u\n", par->match->family);
+				    "proto=%u\n", par->family);
 		return false;
 	}
 	return true;
@@ -49,7 +49,7 @@ static bool state_mt_check(const struct xt_mtchk_param *par)
 
 static void state_mt_destroy(const struct xt_mtdtor_param *par)
 {
-	nf_ct_l3proto_module_put(par->match->family);
+	nf_ct_l3proto_module_put(par->family);
 }
 
 static struct xt_match state_mt_reg[] __read_mostly = {
-- 
cgit v1.2.2


From 4f948db1915ff05e4ce0fd98e6323db6a3ec0fc0 Mon Sep 17 00:00:00 2001
From: Jan Engelhardt <jengelh@medozas.de>
Date: Thu, 18 Mar 2010 11:03:51 +0100
Subject: netfilter: xtables: remove almost-unused xt_match_param.data member

This member is taking up a "long" per match, yet is only used by one
module out of the roughly 90 modules, ip6t_hbh. ip6t_hbh can be
restructured a little to accomodate for the lack of the .data member.
This variant uses checking the par->match address, which should avoid
having to add two extra functions, including calls, i.e.

(hbh_mt6: call hbhdst_mt6(skb, par, NEXTHDR_OPT),
dst_mt6: call hbhdst_mt6(skb, par, NEXTHDR_DEST))

Signed-off-by: Jan Engelhardt <jengelh@medozas.de>
---
 net/ipv6/netfilter/ip6t_hbh.c | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

(limited to 'net')

diff --git a/net/ipv6/netfilter/ip6t_hbh.c b/net/ipv6/netfilter/ip6t_hbh.c
index cbe8dec9744b..82593c8bdc3e 100644
--- a/net/ipv6/netfilter/ip6t_hbh.c
+++ b/net/ipv6/netfilter/ip6t_hbh.c
@@ -41,6 +41,8 @@ MODULE_ALIAS("ip6t_dst");
  *	5	-> RTALERT 2 x x
  */
 
+static struct xt_match hbh_mt6_reg[] __read_mostly;
+
 static bool
 hbh_mt6(const struct sk_buff *skb, const struct xt_match_param *par)
 {
@@ -58,7 +60,9 @@ hbh_mt6(const struct sk_buff *skb, const struct xt_match_param *par)
 	unsigned int optlen;
 	int err;
 
-	err = ipv6_find_hdr(skb, &ptr, par->match->data, NULL);
+	err = ipv6_find_hdr(skb, &ptr,
+			    (par->match == &hbh_mt6_reg[0]) ?
+			    NEXTHDR_HOP : NEXTHDR_DEST, NULL);
 	if (err < 0) {
 		if (err != -ENOENT)
 			*par->hotdrop = true;
@@ -179,13 +183,13 @@ static bool hbh_mt6_check(const struct xt_mtchk_param *par)
 
 static struct xt_match hbh_mt6_reg[] __read_mostly = {
 	{
+		/* Note, hbh_mt6 relies on the order of hbh_mt6_reg */
 		.name		= "hbh",
 		.family		= NFPROTO_IPV6,
 		.match		= hbh_mt6,
 		.matchsize	= sizeof(struct ip6t_opts),
 		.checkentry	= hbh_mt6_check,
 		.me		= THIS_MODULE,
-		.data		= NEXTHDR_HOP,
 	},
 	{
 		.name		= "dst",
@@ -194,7 +198,6 @@ static struct xt_match hbh_mt6_reg[] __read_mostly = {
 		.matchsize	= sizeof(struct ip6t_opts),
 		.checkentry	= hbh_mt6_check,
 		.me		= THIS_MODULE,
-		.data		= NEXTHDR_DEST,
 	},
 };
 
-- 
cgit v1.2.2


From 85bc3f38147c5d3fb1eb9ca2236536389b592cae Mon Sep 17 00:00:00 2001
From: Jan Engelhardt <jengelh@medozas.de>
Date: Thu, 18 Mar 2010 00:27:03 +0100
Subject: netfilter: xtables: do not print any messages on ENOMEM

ENOMEM is a very obvious error code (cf. EINVAL), so I think we do not
really need a warning message. Not to mention that if the allocation
fails, the user is most likely going to get a stack trace from slab
already.

Signed-off-by: Jan Engelhardt <jengelh@medozas.de>
---
 net/bridge/netfilter/ebt_ulog.c | 7 ++-----
 net/netfilter/xt_LED.c          | 4 +---
 net/netfilter/xt_hashlimit.c    | 8 ++------
 net/netfilter/xt_statistic.c    | 4 +---
 4 files changed, 6 insertions(+), 17 deletions(-)

(limited to 'net')

diff --git a/net/bridge/netfilter/ebt_ulog.c b/net/bridge/netfilter/ebt_ulog.c
index c6ac657074a6..84340ab30ed3 100644
--- a/net/bridge/netfilter/ebt_ulog.c
+++ b/net/bridge/netfilter/ebt_ulog.c
@@ -305,13 +305,10 @@ static int __init ebt_ulog_init(void)
 	ebtulognl = netlink_kernel_create(&init_net, NETLINK_NFLOG,
 					  EBT_ULOG_MAXNLGROUPS, NULL, NULL,
 					  THIS_MODULE);
-	if (!ebtulognl) {
-		printk(KERN_WARNING KBUILD_MODNAME ": out of memory trying to "
-		       "call netlink_kernel_create\n");
+	if (!ebtulognl)
 		ret = -ENOMEM;
-	} else if ((ret = xt_register_target(&ebt_ulog_tg_reg)) != 0) {
+	else if ((ret = xt_register_target(&ebt_ulog_tg_reg)) != 0)
 		netlink_kernel_release(ebtulognl);
-	}
 
 	if (ret == 0)
 		nf_log_register(NFPROTO_BRIDGE, &ebt_ulog_logger);
diff --git a/net/netfilter/xt_LED.c b/net/netfilter/xt_LED.c
index f86dc52367b1..0d6c2885ebd6 100644
--- a/net/netfilter/xt_LED.c
+++ b/net/netfilter/xt_LED.c
@@ -92,10 +92,8 @@ static bool led_tg_check(const struct xt_tgchk_param *par)
 	}
 
 	ledinternal = kzalloc(sizeof(struct xt_led_info_internal), GFP_KERNEL);
-	if (!ledinternal) {
-		printk(KERN_CRIT KBUILD_MODNAME ": out of memory\n");
+	if (!ledinternal)
 		return false;
-	}
 
 	ledinternal->netfilter_led_trigger.name = ledinfo->id;
 
diff --git a/net/netfilter/xt_hashlimit.c b/net/netfilter/xt_hashlimit.c
index 1fdb50a90f1c..52327c5c1f1d 100644
--- a/net/netfilter/xt_hashlimit.c
+++ b/net/netfilter/xt_hashlimit.c
@@ -215,10 +215,8 @@ static int htable_create_v0(struct net *net, struct xt_hashlimit_info *minfo, u_
 	/* FIXME: don't use vmalloc() here or anywhere else -HW */
 	hinfo = vmalloc(sizeof(struct xt_hashlimit_htable) +
 			sizeof(struct list_head) * size);
-	if (!hinfo) {
-		printk(KERN_ERR "xt_hashlimit: unable to create hashtable\n");
+	if (!hinfo)
 		return -1;
-	}
 	minfo->hinfo = hinfo;
 
 	/* copy match config into hashtable config */
@@ -288,10 +286,8 @@ static int htable_create(struct net *net, struct xt_hashlimit_mtinfo1 *minfo,
 	/* FIXME: don't use vmalloc() here or anywhere else -HW */
 	hinfo = vmalloc(sizeof(struct xt_hashlimit_htable) +
 	                sizeof(struct list_head) * size);
-	if (hinfo == NULL) {
-		printk(KERN_ERR "xt_hashlimit: unable to create hashtable\n");
+	if (hinfo == NULL)
 		return -1;
-	}
 	minfo->hinfo = hinfo;
 
 	/* copy match config into hashtable config */
diff --git a/net/netfilter/xt_statistic.c b/net/netfilter/xt_statistic.c
index d8c0f8f1a78e..51ac1bbb4f52 100644
--- a/net/netfilter/xt_statistic.c
+++ b/net/netfilter/xt_statistic.c
@@ -61,10 +61,8 @@ static bool statistic_mt_check(const struct xt_mtchk_param *par)
 		return false;
 
 	info->master = kzalloc(sizeof(*info->master), GFP_KERNEL);
-	if (info->master == NULL) {
-		printk(KERN_ERR KBUILD_MODNAME ": Out of memory\n");
+	if (info->master == NULL)
 		return false;
-	}
 	info->master->count = info->u.nth.count;
 
 	return true;
-- 
cgit v1.2.2


From be91fd5e323b46450ca82f6828e933e3791fb2f2 Mon Sep 17 00:00:00 2001
From: Jan Engelhardt <jengelh@medozas.de>
Date: Thu, 18 Mar 2010 02:22:32 +0100
Subject: netfilter: xtables: replace custom duprintf with pr_debug

Signed-off-by: Jan Engelhardt <jengelh@medozas.de>
---
 net/ipv4/netfilter/ipt_ah.c  | 18 ++++++------------
 net/ipv6/netfilter/ip6t_mh.c | 11 +++--------
 net/netfilter/x_tables.c     | 12 +++---------
 net/netfilter/xt_esp.c       | 18 ++++++------------
 net/netfilter/xt_multiport.c | 16 +++++-----------
 net/netfilter/xt_sctp.c      | 29 +++++++++++++----------------
 net/netfilter/xt_tcpudp.c    | 16 +++++-----------
 7 files changed, 41 insertions(+), 79 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/netfilter/ipt_ah.c b/net/ipv4/netfilter/ipt_ah.c
index 0104c0b399de..4f27e170c630 100644
--- a/net/ipv4/netfilter/ipt_ah.c
+++ b/net/ipv4/netfilter/ipt_ah.c
@@ -5,7 +5,7 @@
  * it under the terms of the GNU General Public License version 2 as
  * published by the Free Software Foundation.
  */
-
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
 #include <linux/in.h>
 #include <linux/module.h>
 #include <linux/skbuff.h>
@@ -18,21 +18,15 @@ MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Yon Uriarte <yon@astaro.de>");
 MODULE_DESCRIPTION("Xtables: IPv4 IPsec-AH SPI match");
 
-#ifdef DEBUG_CONNTRACK
-#define duprintf(format, args...) printk(format , ## args)
-#else
-#define duprintf(format, args...)
-#endif
-
 /* Returns 1 if the spi is matched by the range, 0 otherwise */
 static inline bool
 spi_match(u_int32_t min, u_int32_t max, u_int32_t spi, bool invert)
 {
 	bool r;
-	duprintf("ah spi_match:%c 0x%x <= 0x%x <= 0x%x",invert? '!':' ',
-		min,spi,max);
+	pr_debug("spi_match:%c 0x%x <= 0x%x <= 0x%x\n",
+		 invert ? '!' : ' ', min, spi, max);
 	r=(spi >= min && spi <= max) ^ invert;
-	duprintf(" result %s\n",r? "PASS" : "FAILED");
+	pr_debug(" result %s\n", r ? "PASS" : "FAILED");
 	return r;
 }
 
@@ -51,7 +45,7 @@ static bool ah_mt(const struct sk_buff *skb, const struct xt_match_param *par)
 		/* We've been asked to examine this packet, and we
 		 * can't.  Hence, no choice but to drop.
 		 */
-		duprintf("Dropping evil AH tinygram.\n");
+		pr_debug("Dropping evil AH tinygram.\n");
 		*par->hotdrop = true;
 		return 0;
 	}
@@ -67,7 +61,7 @@ static bool ah_mt_check(const struct xt_mtchk_param *par)
 
 	/* Must specify no unknown invflags */
 	if (ahinfo->invflags & ~IPT_AH_INV_MASK) {
-		duprintf("ipt_ah: unknown flags %X\n", ahinfo->invflags);
+		pr_debug("unknown flags %X\n", ahinfo->invflags);
 		return false;
 	}
 	return true;
diff --git a/net/ipv6/netfilter/ip6t_mh.c b/net/ipv6/netfilter/ip6t_mh.c
index aafe4e66577b..0181eb81d24b 100644
--- a/net/ipv6/netfilter/ip6t_mh.c
+++ b/net/ipv6/netfilter/ip6t_mh.c
@@ -11,6 +11,7 @@
  * Based on net/netfilter/xt_tcpudp.c
  *
  */
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
 #include <linux/types.h>
 #include <linux/module.h>
 #include <net/ip.h>
@@ -24,12 +25,6 @@
 MODULE_DESCRIPTION("Xtables: IPv6 Mobility Header match");
 MODULE_LICENSE("GPL");
 
-#ifdef DEBUG_IP_FIREWALL_USER
-#define duprintf(format, args...) printk(format , ## args)
-#else
-#define duprintf(format, args...)
-#endif
-
 /* Returns 1 if the type is matched by the range, 0 otherwise */
 static inline bool
 type_match(u_int8_t min, u_int8_t max, u_int8_t type, bool invert)
@@ -51,13 +46,13 @@ static bool mh_mt6(const struct sk_buff *skb, const struct xt_match_param *par)
 	if (mh == NULL) {
 		/* We've been asked to examine this packet, and we
 		   can't.  Hence, no choice but to drop. */
-		duprintf("Dropping evil MH tinygram.\n");
+		pr_debug("Dropping evil MH tinygram.\n");
 		*par->hotdrop = true;
 		return false;
 	}
 
 	if (mh->ip6mh_proto != IPPROTO_NONE) {
-		duprintf("Dropping invalid MH Payload Proto: %u\n",
+		pr_debug("Dropping invalid MH Payload Proto: %u\n",
 			 mh->ip6mh_proto);
 		*par->hotdrop = true;
 		return false;
diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c
index 0a12cedfe9e3..9a248d4a877f 100644
--- a/net/netfilter/x_tables.c
+++ b/net/netfilter/x_tables.c
@@ -12,7 +12,7 @@
  * published by the Free Software Foundation.
  *
  */
-
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
 #include <linux/kernel.h>
 #include <linux/socket.h>
 #include <linux/net.h>
@@ -54,12 +54,6 @@ struct xt_af {
 
 static struct xt_af *xt;
 
-#ifdef DEBUG_IP_FIREWALL_USER
-#define duprintf(format, args...) printk(format , ## args)
-#else
-#define duprintf(format, args...)
-#endif
-
 static const char *const xt_prefix[NFPROTO_NUMPROTO] = {
 	[NFPROTO_UNSPEC] = "x",
 	[NFPROTO_IPV4]   = "ip",
@@ -720,7 +714,7 @@ xt_replace_table(struct xt_table *table,
 
 	/* Check inside lock: is the old number correct? */
 	if (num_counters != private->number) {
-		duprintf("num_counters != table->private->number (%u/%u)\n",
+		pr_debug("num_counters != table->private->number (%u/%u)\n",
 			 num_counters, private->number);
 		local_bh_enable();
 		*error = -EAGAIN;
@@ -777,7 +771,7 @@ struct xt_table *xt_register_table(struct net *net,
 		goto unlock;
 
 	private = table->private;
-	duprintf("table->private->number = %u\n", private->number);
+	pr_debug("table->private->number = %u\n", private->number);
 
 	/* save number of initial entries */
 	private->initial_entries = private->number;
diff --git a/net/netfilter/xt_esp.c b/net/netfilter/xt_esp.c
index 609439967c2c..f9deecbef875 100644
--- a/net/netfilter/xt_esp.c
+++ b/net/netfilter/xt_esp.c
@@ -6,7 +6,7 @@
  * it under the terms of the GNU General Public License version 2 as
  * published by the Free Software Foundation.
  */
-
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
 #include <linux/module.h>
 #include <linux/skbuff.h>
 #include <linux/in.h>
@@ -24,21 +24,15 @@ MODULE_DESCRIPTION("Xtables: IPsec-ESP packet match");
 MODULE_ALIAS("ipt_esp");
 MODULE_ALIAS("ip6t_esp");
 
-#if 0
-#define duprintf(format, args...) printk(format , ## args)
-#else
-#define duprintf(format, args...)
-#endif
-
 /* Returns 1 if the spi is matched by the range, 0 otherwise */
 static inline bool
 spi_match(u_int32_t min, u_int32_t max, u_int32_t spi, bool invert)
 {
 	bool r;
-	duprintf("esp spi_match:%c 0x%x <= 0x%x <= 0x%x", invert ? '!' : ' ',
-		 min, spi, max);
+	pr_debug("esp spi_match:%c 0x%x <= 0x%x <= 0x%x\n",
+		 invert ? '!' : ' ', min, spi, max);
 	r = (spi >= min && spi <= max) ^ invert;
-	duprintf(" result %s\n", r ? "PASS" : "FAILED");
+	pr_debug(" result %s\n", r ? "PASS" : "FAILED");
 	return r;
 }
 
@@ -57,7 +51,7 @@ static bool esp_mt(const struct sk_buff *skb, const struct xt_match_param *par)
 		/* We've been asked to examine this packet, and we
 		 * can't.  Hence, no choice but to drop.
 		 */
-		duprintf("Dropping evil ESP tinygram.\n");
+		pr_debug("Dropping evil ESP tinygram.\n");
 		*par->hotdrop = true;
 		return false;
 	}
@@ -71,7 +65,7 @@ static bool esp_mt_check(const struct xt_mtchk_param *par)
 	const struct xt_esp *espinfo = par->matchinfo;
 
 	if (espinfo->invflags & ~XT_ESP_INV_MASK) {
-		duprintf("xt_esp: unknown flags %X\n", espinfo->invflags);
+		pr_debug("unknown flags %X\n", espinfo->invflags);
 		return false;
 	}
 
diff --git a/net/netfilter/xt_multiport.c b/net/netfilter/xt_multiport.c
index d06bb2dd3900..4fa90c86fdb5 100644
--- a/net/netfilter/xt_multiport.c
+++ b/net/netfilter/xt_multiport.c
@@ -8,7 +8,7 @@
  * it under the terms of the GNU General Public License version 2 as
  * published by the Free Software Foundation.
  */
-
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
 #include <linux/module.h>
 #include <linux/types.h>
 #include <linux/udp.h>
@@ -26,12 +26,6 @@ MODULE_DESCRIPTION("Xtables: multiple port matching for TCP, UDP, UDP-Lite, SCTP
 MODULE_ALIAS("ipt_multiport");
 MODULE_ALIAS("ip6t_multiport");
 
-#if 0
-#define duprintf(format, args...) printk(format , ## args)
-#else
-#define duprintf(format, args...)
-#endif
-
 /* Returns 1 if the port is matched by the test, 0 otherwise. */
 static inline bool
 ports_match_v0(const u_int16_t *portlist, enum xt_multiport_flags flags,
@@ -63,7 +57,7 @@ ports_match_v1(const struct xt_multiport_v1 *minfo,
 		if (minfo->pflags[i]) {
 			/* range port matching */
 			e = minfo->ports[++i];
-			duprintf("src or dst matches with %d-%d?\n", s, e);
+			pr_debug("src or dst matches with %d-%d?\n", s, e);
 
 			if (minfo->flags == XT_MULTIPORT_SOURCE
 			    && src >= s && src <= e)
@@ -77,7 +71,7 @@ ports_match_v1(const struct xt_multiport_v1 *minfo,
 				return true ^ minfo->invert;
 		} else {
 			/* exact port matching */
-			duprintf("src or dst matches with %d?\n", s);
+			pr_debug("src or dst matches with %d?\n", s);
 
 			if (minfo->flags == XT_MULTIPORT_SOURCE
 			    && src == s)
@@ -109,7 +103,7 @@ multiport_mt_v0(const struct sk_buff *skb, const struct xt_match_param *par)
 		/* We've been asked to examine this packet, and we
 		 * can't.  Hence, no choice but to drop.
 		 */
-		duprintf("xt_multiport: Dropping evil offset=0 tinygram.\n");
+		pr_debug("Dropping evil offset=0 tinygram.\n");
 		*par->hotdrop = true;
 		return false;
 	}
@@ -133,7 +127,7 @@ multiport_mt(const struct sk_buff *skb, const struct xt_match_param *par)
 		/* We've been asked to examine this packet, and we
 		 * can't.  Hence, no choice but to drop.
 		 */
-		duprintf("xt_multiport: Dropping evil offset=0 tinygram.\n");
+		pr_debug("Dropping evil offset=0 tinygram.\n");
 		*par->hotdrop = true;
 		return false;
 	}
diff --git a/net/netfilter/xt_sctp.c b/net/netfilter/xt_sctp.c
index a189ada9128f..43c7e1de532c 100644
--- a/net/netfilter/xt_sctp.c
+++ b/net/netfilter/xt_sctp.c
@@ -1,3 +1,4 @@
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
 #include <linux/module.h>
 #include <linux/skbuff.h>
 #include <net/ip.h>
@@ -15,12 +16,6 @@ MODULE_DESCRIPTION("Xtables: SCTP protocol packet match");
 MODULE_ALIAS("ipt_sctp");
 MODULE_ALIAS("ip6t_sctp");
 
-#ifdef DEBUG_SCTP
-#define duprintf(format, args...) printk(format , ## args)
-#else
-#define duprintf(format, args...)
-#endif
-
 #define SCCHECK(cond, option, flag, invflag) (!((flag) & (option)) \
 					      || (!!((invflag) & (option)) ^ (cond)))
 
@@ -52,7 +47,7 @@ match_packet(const struct sk_buff *skb,
 	const struct xt_sctp_flag_info *flag_info = info->flag_info;
 	int flag_count = info->flag_count;
 
-#ifdef DEBUG_SCTP
+#ifdef DEBUG
 	int i = 0;
 #endif
 
@@ -62,17 +57,19 @@ match_packet(const struct sk_buff *skb,
 	do {
 		sch = skb_header_pointer(skb, offset, sizeof(_sch), &_sch);
 		if (sch == NULL || sch->length == 0) {
-			duprintf("Dropping invalid SCTP packet.\n");
+			pr_debug("Dropping invalid SCTP packet.\n");
 			*hotdrop = true;
 			return false;
 		}
-
-		duprintf("Chunk num: %d\toffset: %d\ttype: %d\tlength: %d\tflags: %x\n",
-				++i, offset, sch->type, htons(sch->length), sch->flags);
-
+#ifdef DEBUG
+		pr_debug("Chunk num: %d\toffset: %d\ttype: %d\tlength: %d"
+			 "\tflags: %x\n",
+			 ++i, offset, sch->type, htons(sch->length),
+			 sch->flags);
+#endif
 		offset += (ntohs(sch->length) + 3) & ~3;
 
-		duprintf("skb->len: %d\toffset: %d\n", skb->len, offset);
+		pr_debug("skb->len: %d\toffset: %d\n", skb->len, offset);
 
 		if (SCTP_CHUNKMAP_IS_SET(info->chunkmap, sch->type)) {
 			switch (chunk_match_type) {
@@ -124,17 +121,17 @@ sctp_mt(const struct sk_buff *skb, const struct xt_match_param *par)
 	sctp_sctphdr_t _sh;
 
 	if (par->fragoff != 0) {
-		duprintf("Dropping non-first fragment.. FIXME\n");
+		pr_debug("Dropping non-first fragment.. FIXME\n");
 		return false;
 	}
 
 	sh = skb_header_pointer(skb, par->thoff, sizeof(_sh), &_sh);
 	if (sh == NULL) {
-		duprintf("Dropping evil TCP offset=0 tinygram.\n");
+		pr_debug("Dropping evil TCP offset=0 tinygram.\n");
 		*par->hotdrop = true;
 		return false;
 	}
-	duprintf("spt: %d\tdpt: %d\n", ntohs(sh->source), ntohs(sh->dest));
+	pr_debug("spt: %d\tdpt: %d\n", ntohs(sh->source), ntohs(sh->dest));
 
 	return  SCCHECK(ntohs(sh->source) >= info->spts[0]
 			&& ntohs(sh->source) <= info->spts[1],
diff --git a/net/netfilter/xt_tcpudp.c b/net/netfilter/xt_tcpudp.c
index 1ebdc4934eed..b53887f83c44 100644
--- a/net/netfilter/xt_tcpudp.c
+++ b/net/netfilter/xt_tcpudp.c
@@ -1,3 +1,4 @@
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
 #include <linux/types.h>
 #include <linux/module.h>
 #include <net/ip.h>
@@ -19,13 +20,6 @@ MODULE_ALIAS("ipt_tcp");
 MODULE_ALIAS("ip6t_udp");
 MODULE_ALIAS("ip6t_tcp");
 
-#ifdef DEBUG_IP_FIREWALL_USER
-#define duprintf(format, args...) printk(format , ## args)
-#else
-#define duprintf(format, args...)
-#endif
-
-
 /* Returns 1 if the port is matched by the range, 0 otherwise */
 static inline bool
 port_match(u_int16_t min, u_int16_t max, u_int16_t port, bool invert)
@@ -46,7 +40,7 @@ tcp_find_option(u_int8_t option,
 	u_int8_t _opt[60 - sizeof(struct tcphdr)];
 	unsigned int i;
 
-	duprintf("tcp_match: finding option\n");
+	pr_debug("finding option\n");
 
 	if (!optlen)
 		return invert;
@@ -82,7 +76,7 @@ static bool tcp_mt(const struct sk_buff *skb, const struct xt_match_param *par)
 		   flag overwrite to pass the direction checks.
 		*/
 		if (par->fragoff == 1) {
-			duprintf("Dropping evil TCP offset=1 frag.\n");
+			pr_debug("Dropping evil TCP offset=1 frag.\n");
 			*par->hotdrop = true;
 		}
 		/* Must not be a fragment. */
@@ -95,7 +89,7 @@ static bool tcp_mt(const struct sk_buff *skb, const struct xt_match_param *par)
 	if (th == NULL) {
 		/* We've been asked to examine this packet, and we
 		   can't.  Hence, no choice but to drop. */
-		duprintf("Dropping evil TCP offset=0 tinygram.\n");
+		pr_debug("Dropping evil TCP offset=0 tinygram.\n");
 		*par->hotdrop = true;
 		return false;
 	}
@@ -148,7 +142,7 @@ static bool udp_mt(const struct sk_buff *skb, const struct xt_match_param *par)
 	if (uh == NULL) {
 		/* We've been asked to examine this packet, and we
 		   can't.  Hence, no choice but to drop. */
-		duprintf("Dropping evil UDP tinygram.\n");
+		pr_debug("Dropping evil UDP tinygram.\n");
 		*par->hotdrop = true;
 		return false;
 	}
-- 
cgit v1.2.2


From 8bee4bad03c5b601bd6cea123c31025680587ccc Mon Sep 17 00:00:00 2001
From: Jan Engelhardt <jengelh@medozas.de>
Date: Wed, 17 Mar 2010 16:04:40 +0100
Subject: netfilter: xt extensions: use pr_<level>

Signed-off-by: Jan Engelhardt <jengelh@medozas.de>
---
 net/netfilter/xt_CONNSECMARK.c | 13 ++++++-------
 net/netfilter/xt_DSCP.c        |  4 ++--
 net/netfilter/xt_HL.c          | 10 ++++------
 net/netfilter/xt_LED.c         | 10 ++++------
 net/netfilter/xt_SECMARK.c     | 20 ++++++++++----------
 net/netfilter/xt_TCPMSS.c      | 18 +++++++++---------
 net/netfilter/xt_cluster.c     | 11 ++++++-----
 net/netfilter/xt_connbytes.c   |  5 +++--
 net/netfilter/xt_connlimit.c   |  5 +++--
 net/netfilter/xt_connmark.c    |  8 ++++----
 net/netfilter/xt_conntrack.c   |  6 +++---
 net/netfilter/xt_dscp.c        |  4 ++--
 net/netfilter/xt_hashlimit.c   | 18 ++++++++----------
 net/netfilter/xt_helper.c      |  6 +++---
 net/netfilter/xt_hl.c          | 12 ------------
 net/netfilter/xt_limit.c       |  6 +++---
 net/netfilter/xt_osf.c         |  8 ++++----
 net/netfilter/xt_physdev.c     |  8 ++++----
 net/netfilter/xt_policy.c      | 13 +++++--------
 net/netfilter/xt_recent.c      |  6 +++---
 net/netfilter/xt_state.c       |  4 ++--
 21 files changed, 88 insertions(+), 107 deletions(-)

(limited to 'net')

diff --git a/net/netfilter/xt_CONNSECMARK.c b/net/netfilter/xt_CONNSECMARK.c
index b54c3756fdc3..6812865488d6 100644
--- a/net/netfilter/xt_CONNSECMARK.c
+++ b/net/netfilter/xt_CONNSECMARK.c
@@ -15,6 +15,7 @@
  * published by the Free Software Foundation.
  *
  */
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
 #include <linux/module.h>
 #include <linux/skbuff.h>
 #include <linux/netfilter/x_tables.h>
@@ -22,8 +23,6 @@
 #include <net/netfilter/nf_conntrack.h>
 #include <net/netfilter/nf_conntrack_ecache.h>
 
-#define PFX "CONNSECMARK: "
-
 MODULE_LICENSE("GPL");
 MODULE_AUTHOR("James Morris <jmorris@redhat.com>");
 MODULE_DESCRIPTION("Xtables: target for copying between connection and security mark");
@@ -91,8 +90,8 @@ static bool connsecmark_tg_check(const struct xt_tgchk_param *par)
 
 	if (strcmp(par->table, "mangle") != 0 &&
 	    strcmp(par->table, "security") != 0) {
-		printk(KERN_INFO PFX "target only valid in the \'mangle\' "
-		       "or \'security\' tables, not \'%s\'.\n", par->table);
+		pr_info("target only valid in the \'mangle\' "
+			"or \'security\' tables, not \'%s\'.\n", par->table);
 		return false;
 	}
 
@@ -102,13 +101,13 @@ static bool connsecmark_tg_check(const struct xt_tgchk_param *par)
 		break;
 
 	default:
-		printk(KERN_INFO PFX "invalid mode: %hu\n", info->mode);
+		pr_info("invalid mode: %hu\n", info->mode);
 		return false;
 	}
 
 	if (nf_ct_l3proto_try_module_get(par->family) < 0) {
-		printk(KERN_WARNING "can't load conntrack support for "
-				    "proto=%u\n", par->family);
+		pr_info("cannot load conntrack support for proto=%u\n",
+			par->family);
 		return false;
 	}
 	return true;
diff --git a/net/netfilter/xt_DSCP.c b/net/netfilter/xt_DSCP.c
index 74ce89260056..bbf08a91c600 100644
--- a/net/netfilter/xt_DSCP.c
+++ b/net/netfilter/xt_DSCP.c
@@ -9,7 +9,7 @@
  *
  * See RFC2474 for a description of the DSCP field within the IP Header.
 */
-
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
 #include <linux/module.h>
 #include <linux/skbuff.h>
 #include <linux/ip.h>
@@ -65,7 +65,7 @@ static bool dscp_tg_check(const struct xt_tgchk_param *par)
 	const struct xt_DSCP_info *info = par->targinfo;
 
 	if (info->dscp > XT_DSCP_MAX) {
-		printk(KERN_WARNING "DSCP: dscp %x out of range\n", info->dscp);
+		pr_info("dscp %x out of range\n", info->dscp);
 		return false;
 	}
 	return true;
diff --git a/net/netfilter/xt_HL.c b/net/netfilter/xt_HL.c
index 10e789e2d12a..7004ed2ffa44 100644
--- a/net/netfilter/xt_HL.c
+++ b/net/netfilter/xt_HL.c
@@ -9,7 +9,7 @@
  * it under the terms of the GNU General Public License version 2 as
  * published by the Free Software Foundation.
  */
-
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
 #include <linux/module.h>
 #include <linux/skbuff.h>
 #include <linux/ip.h>
@@ -106,8 +106,7 @@ static bool ttl_tg_check(const struct xt_tgchk_param *par)
 	const struct ipt_TTL_info *info = par->targinfo;
 
 	if (info->mode > IPT_TTL_MAXMODE) {
-		printk(KERN_WARNING "ipt_TTL: invalid or unknown Mode %u\n",
-			info->mode);
+		pr_info("TTL: invalid or unknown mode %u\n", info->mode);
 		return false;
 	}
 	if (info->mode != IPT_TTL_SET && info->ttl == 0)
@@ -120,12 +119,11 @@ static bool hl_tg6_check(const struct xt_tgchk_param *par)
 	const struct ip6t_HL_info *info = par->targinfo;
 
 	if (info->mode > IP6T_HL_MAXMODE) {
-		printk(KERN_WARNING "ip6t_HL: invalid or unknown Mode %u\n",
-			info->mode);
+		pr_info("invalid or unknown mode %u\n", info->mode);
 		return false;
 	}
 	if (info->mode != IP6T_HL_SET && info->hop_limit == 0) {
-		printk(KERN_WARNING "ip6t_HL: increment/decrement doesn't "
+		pr_info("increment/decrement does not "
 			"make sense with value 0\n");
 		return false;
 	}
diff --git a/net/netfilter/xt_LED.c b/net/netfilter/xt_LED.c
index 0d6c2885ebd6..f511bea9464a 100644
--- a/net/netfilter/xt_LED.c
+++ b/net/netfilter/xt_LED.c
@@ -18,7 +18,7 @@
  * 02110-1301 USA.
  *
  */
-
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
 #include <linux/module.h>
 #include <linux/skbuff.h>
 #include <linux/netfilter/x_tables.h>
@@ -87,7 +87,7 @@ static bool led_tg_check(const struct xt_tgchk_param *par)
 	int err;
 
 	if (ledinfo->id[0] == '\0') {
-		printk(KERN_ERR KBUILD_MODNAME ": No 'id' parameter given.\n");
+		pr_info("No 'id' parameter given.\n");
 		return false;
 	}
 
@@ -99,11 +99,9 @@ static bool led_tg_check(const struct xt_tgchk_param *par)
 
 	err = led_trigger_register(&ledinternal->netfilter_led_trigger);
 	if (err) {
-		printk(KERN_CRIT KBUILD_MODNAME
-			": led_trigger_register() failed\n");
+		pr_warning("led_trigger_register() failed\n");
 		if (err == -EEXIST)
-			printk(KERN_ERR KBUILD_MODNAME
-				": Trigger name is already in use.\n");
+			pr_warning("Trigger name is already in use.\n");
 		goto exit_alloc;
 	}
 
diff --git a/net/netfilter/xt_SECMARK.c b/net/netfilter/xt_SECMARK.c
index 7a6f9e6f5dfa..4855fd9d7c6f 100644
--- a/net/netfilter/xt_SECMARK.c
+++ b/net/netfilter/xt_SECMARK.c
@@ -12,6 +12,7 @@
  * published by the Free Software Foundation.
  *
  */
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
 #include <linux/module.h>
 #include <linux/skbuff.h>
 #include <linux/selinux.h>
@@ -59,20 +60,19 @@ static bool checkentry_selinux(struct xt_secmark_target_info *info)
 	err = selinux_string_to_sid(sel->selctx, &sel->selsid);
 	if (err) {
 		if (err == -EINVAL)
-			printk(KERN_INFO PFX "invalid SELinux context \'%s\'\n",
-			       sel->selctx);
+			pr_info("invalid SELinux context \'%s\'\n",
+				sel->selctx);
 		return false;
 	}
 
 	if (!sel->selsid) {
-		printk(KERN_INFO PFX "unable to map SELinux context \'%s\'\n",
-		       sel->selctx);
+		pr_info("unable to map SELinux context \'%s\'\n", sel->selctx);
 		return false;
 	}
 
 	err = selinux_secmark_relabel_packet_permission(sel->selsid);
 	if (err) {
-		printk(KERN_INFO PFX "unable to obtain relabeling permission\n");
+		pr_info("unable to obtain relabeling permission\n");
 		return false;
 	}
 
@@ -86,14 +86,14 @@ static bool secmark_tg_check(const struct xt_tgchk_param *par)
 
 	if (strcmp(par->table, "mangle") != 0 &&
 	    strcmp(par->table, "security") != 0) {
-		printk(KERN_INFO PFX "target only valid in the \'mangle\' "
-		       "or \'security\' tables, not \'%s\'.\n", par->table);
+		pr_info("target only valid in the \'mangle\' "
+			"or \'security\' tables, not \'%s\'.\n", par->table);
 		return false;
 	}
 
 	if (mode && mode != info->mode) {
-		printk(KERN_INFO PFX "mode already set to %hu cannot mix with "
-		       "rules for mode %hu\n", mode, info->mode);
+		pr_info("mode already set to %hu cannot mix with "
+			"rules for mode %hu\n", mode, info->mode);
 		return false;
 	}
 
@@ -104,7 +104,7 @@ static bool secmark_tg_check(const struct xt_tgchk_param *par)
 		break;
 
 	default:
-		printk(KERN_INFO PFX "invalid mode: %hu\n", info->mode);
+		pr_info("invalid mode: %hu\n", info->mode);
 		return false;
 	}
 
diff --git a/net/netfilter/xt_TCPMSS.c b/net/netfilter/xt_TCPMSS.c
index 0e357ac9a2a8..2077da31c973 100644
--- a/net/netfilter/xt_TCPMSS.c
+++ b/net/netfilter/xt_TCPMSS.c
@@ -7,7 +7,7 @@
  * it under the terms of the GNU General Public License version 2 as
  * published by the Free Software Foundation.
  */
-
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
 #include <linux/module.h>
 #include <linux/skbuff.h>
 #include <linux/ip.h>
@@ -67,14 +67,14 @@ tcpmss_mangle_packet(struct sk_buff *skb,
 	if (info->mss == XT_TCPMSS_CLAMP_PMTU) {
 		if (dst_mtu(skb_dst(skb)) <= minlen) {
 			if (net_ratelimit())
-				printk(KERN_ERR "xt_TCPMSS: "
+				pr_err("xt_TCPMSS: "
 				       "unknown or invalid path-MTU (%u)\n",
 				       dst_mtu(skb_dst(skb)));
 			return -1;
 		}
 		if (in_mtu <= minlen) {
 			if (net_ratelimit())
-				printk(KERN_ERR "xt_TCPMSS: unknown or "
+				pr_err("xt_TCPMSS: unknown or "
 				       "invalid path-MTU (%u)\n", in_mtu);
 			return -1;
 		}
@@ -245,14 +245,14 @@ static bool tcpmss_tg4_check(const struct xt_tgchk_param *par)
 	    (par->hook_mask & ~((1 << NF_INET_FORWARD) |
 			   (1 << NF_INET_LOCAL_OUT) |
 			   (1 << NF_INET_POST_ROUTING))) != 0) {
-		printk("xt_TCPMSS: path-MTU clamping only supported in "
-		       "FORWARD, OUTPUT and POSTROUTING hooks\n");
+		pr_info("path-MTU clamping only supported in "
+			"FORWARD, OUTPUT and POSTROUTING hooks\n");
 		return false;
 	}
 	xt_ematch_foreach(ematch, e)
 		if (find_syn_match(ematch))
 			return true;
-	printk("xt_TCPMSS: Only works on TCP SYN packets\n");
+	pr_info("Only works on TCP SYN packets\n");
 	return false;
 }
 
@@ -267,14 +267,14 @@ static bool tcpmss_tg6_check(const struct xt_tgchk_param *par)
 	    (par->hook_mask & ~((1 << NF_INET_FORWARD) |
 			   (1 << NF_INET_LOCAL_OUT) |
 			   (1 << NF_INET_POST_ROUTING))) != 0) {
-		printk("xt_TCPMSS: path-MTU clamping only supported in "
-		       "FORWARD, OUTPUT and POSTROUTING hooks\n");
+		pr_info("path-MTU clamping only supported in "
+			"FORWARD, OUTPUT and POSTROUTING hooks\n");
 		return false;
 	}
 	xt_ematch_foreach(ematch, e)
 		if (find_syn_match(ematch))
 			return true;
-	printk("xt_TCPMSS: Only works on TCP SYN packets\n");
+	pr_info("Only works on TCP SYN packets\n");
 	return false;
 }
 #endif
diff --git a/net/netfilter/xt_cluster.c b/net/netfilter/xt_cluster.c
index 225ee3ecd69d..4c273e871301 100644
--- a/net/netfilter/xt_cluster.c
+++ b/net/netfilter/xt_cluster.c
@@ -5,6 +5,7 @@
  * it under the terms of the GNU General Public License version 2 as
  * published by the Free Software Foundation.
  */
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
 #include <linux/module.h>
 #include <linux/skbuff.h>
 #include <linux/jhash.h>
@@ -136,14 +137,14 @@ static bool xt_cluster_mt_checkentry(const struct xt_mtchk_param *par)
 	struct xt_cluster_match_info *info = par->matchinfo;
 
 	if (info->total_nodes > XT_CLUSTER_NODES_MAX) {
-		printk(KERN_ERR "xt_cluster: you have exceeded the maximum "
-				"number of cluster nodes (%u > %u)\n",
-				info->total_nodes, XT_CLUSTER_NODES_MAX);
+		pr_info("you have exceeded the maximum "
+			"number of cluster nodes (%u > %u)\n",
+			info->total_nodes, XT_CLUSTER_NODES_MAX);
 		return false;
 	}
 	if (info->node_mask >= (1ULL << info->total_nodes)) {
-		printk(KERN_ERR "xt_cluster: this node mask cannot be "
-				"higher than the total number of nodes\n");
+		pr_info("this node mask cannot be "
+			"higher than the total number of nodes\n");
 		return false;
 	}
 	return true;
diff --git a/net/netfilter/xt_connbytes.c b/net/netfilter/xt_connbytes.c
index 955e6598a7f0..edb7bbd9ae54 100644
--- a/net/netfilter/xt_connbytes.c
+++ b/net/netfilter/xt_connbytes.c
@@ -1,6 +1,7 @@
 /* Kernel module to match connection tracking byte counter.
  * GPL (C) 2002 Martin Devera (devik@cdi.cz).
  */
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
 #include <linux/module.h>
 #include <linux/bitops.h>
 #include <linux/skbuff.h>
@@ -107,8 +108,8 @@ static bool connbytes_mt_check(const struct xt_mtchk_param *par)
 		return false;
 
 	if (nf_ct_l3proto_try_module_get(par->family) < 0) {
-		printk(KERN_WARNING "can't load conntrack support for "
-				    "proto=%u\n", par->family);
+		pr_info("cannot load conntrack support for proto=%u\n",
+			par->family);
 		return false;
 	}
 
diff --git a/net/netfilter/xt_connlimit.c b/net/netfilter/xt_connlimit.c
index 9e624af40f80..d5b26dab9e26 100644
--- a/net/netfilter/xt_connlimit.c
+++ b/net/netfilter/xt_connlimit.c
@@ -11,6 +11,7 @@
  * Kernel module to match connection tracking information.
  * GPL (C) 1999  Rusty Russell (rusty@rustcorp.com.au).
  */
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
 #include <linux/in.h>
 #include <linux/in6.h>
 #include <linux/ip.h>
@@ -225,8 +226,8 @@ static bool connlimit_mt_check(const struct xt_mtchk_param *par)
 		connlimit_rnd_inited = true;
 	}
 	if (nf_ct_l3proto_try_module_get(par->family) < 0) {
-		printk(KERN_WARNING "cannot load conntrack support for "
-		       "address family %u\n", par->family);
+		pr_info("cannot load conntrack support for "
+			"address family %u\n", par->family);
 		return false;
 	}
 
diff --git a/net/netfilter/xt_connmark.c b/net/netfilter/xt_connmark.c
index 97465a472344..7a51ba63f545 100644
--- a/net/netfilter/xt_connmark.c
+++ b/net/netfilter/xt_connmark.c
@@ -77,8 +77,8 @@ connmark_tg(struct sk_buff *skb, const struct xt_target_param *par)
 static bool connmark_tg_check(const struct xt_tgchk_param *par)
 {
 	if (nf_ct_l3proto_try_module_get(par->family) < 0) {
-		printk(KERN_WARNING "cannot load conntrack support for "
-		       "proto=%u\n", par->family);
+		pr_info("cannot load conntrack support for proto=%u\n",
+			par->family);
 		return false;
 	}
 	return true;
@@ -106,8 +106,8 @@ connmark_mt(const struct sk_buff *skb, const struct xt_match_param *par)
 static bool connmark_mt_check(const struct xt_mtchk_param *par)
 {
 	if (nf_ct_l3proto_try_module_get(par->family) < 0) {
-		printk(KERN_WARNING "cannot load conntrack support for "
-		       "proto=%u\n", par->family);
+		pr_info("cannot load conntrack support for proto=%u\n",
+			par->family);
 		return false;
 	}
 	return true;
diff --git a/net/netfilter/xt_conntrack.c b/net/netfilter/xt_conntrack.c
index ae66305f0fe5..387172b6b0d8 100644
--- a/net/netfilter/xt_conntrack.c
+++ b/net/netfilter/xt_conntrack.c
@@ -9,7 +9,7 @@
  *	it under the terms of the GNU General Public License version 2 as
  *	published by the Free Software Foundation.
  */
-
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
 #include <linux/module.h>
 #include <linux/skbuff.h>
 #include <net/ipv6.h>
@@ -209,8 +209,8 @@ conntrack_mt_v2(const struct sk_buff *skb, const struct xt_match_param *par)
 static bool conntrack_mt_check(const struct xt_mtchk_param *par)
 {
 	if (nf_ct_l3proto_try_module_get(par->family) < 0) {
-		printk(KERN_WARNING "can't load conntrack support for "
-				    "proto=%u\n", par->family);
+		pr_info("cannot load conntrack support for proto=%u\n",
+			par->family);
 		return false;
 	}
 	return true;
diff --git a/net/netfilter/xt_dscp.c b/net/netfilter/xt_dscp.c
index fe58cd01ef99..6ecedc13db0c 100644
--- a/net/netfilter/xt_dscp.c
+++ b/net/netfilter/xt_dscp.c
@@ -6,7 +6,7 @@
  * it under the terms of the GNU General Public License version 2 as
  * published by the Free Software Foundation.
  */
-
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
 #include <linux/module.h>
 #include <linux/skbuff.h>
 #include <linux/ip.h>
@@ -47,7 +47,7 @@ static bool dscp_mt_check(const struct xt_mtchk_param *par)
 	const struct xt_dscp_info *info = par->matchinfo;
 
 	if (info->dscp > XT_DSCP_MAX) {
-		printk(KERN_ERR "xt_dscp: dscp %x out of range\n", info->dscp);
+		pr_info("dscp %x out of range\n", info->dscp);
 		return false;
 	}
 
diff --git a/net/netfilter/xt_hashlimit.c b/net/netfilter/xt_hashlimit.c
index 52327c5c1f1d..8f3e0c02ca54 100644
--- a/net/netfilter/xt_hashlimit.c
+++ b/net/netfilter/xt_hashlimit.c
@@ -7,6 +7,7 @@
  *
  * Development of this code was funded by Astaro AG, http://www.astaro.com/
  */
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
 #include <linux/module.h>
 #include <linux/spinlock.h>
 #include <linux/random.h>
@@ -166,17 +167,14 @@ dsthash_alloc_init(struct xt_hashlimit_htable *ht,
 	if (ht->cfg.max && ht->count >= ht->cfg.max) {
 		/* FIXME: do something. question is what.. */
 		if (net_ratelimit())
-			printk(KERN_WARNING
-				"xt_hashlimit: max count of %u reached\n",
-				ht->cfg.max);
+			pr_err("max count of %u reached\n", ht->cfg.max);
 		return NULL;
 	}
 
 	ent = kmem_cache_alloc(hashlimit_cachep, GFP_ATOMIC);
 	if (!ent) {
 		if (net_ratelimit())
-			printk(KERN_ERR
-				"xt_hashlimit: can't allocate dsthash_ent\n");
+			pr_err("cannot allocate dsthash_ent\n");
 		return NULL;
 	}
 	memcpy(&ent->dst, dst, sizeof(ent->dst));
@@ -681,8 +679,8 @@ static bool hashlimit_mt_check_v0(const struct xt_mtchk_param *par)
 	/* Check for overflow. */
 	if (r->cfg.burst == 0 ||
 	    user2credits(r->cfg.avg * r->cfg.burst) < user2credits(r->cfg.avg)) {
-		printk(KERN_ERR "xt_hashlimit: overflow, try lower: %u/%u\n",
-		       r->cfg.avg, r->cfg.burst);
+		pr_info("overflow, try lower: %u/%u\n",
+			r->cfg.avg, r->cfg.burst);
 		return false;
 	}
 	if (r->cfg.mode == 0 ||
@@ -718,8 +716,8 @@ static bool hashlimit_mt_check(const struct xt_mtchk_param *par)
 	if (info->cfg.burst == 0 ||
 	    user2credits(info->cfg.avg * info->cfg.burst) <
 	    user2credits(info->cfg.avg)) {
-		printk(KERN_ERR "xt_hashlimit: overflow, try lower: %u/%u\n",
-		       info->cfg.avg, info->cfg.burst);
+		pr_info("overflow, try lower: %u/%u\n",
+			info->cfg.avg, info->cfg.burst);
 		return false;
 	}
 	if (info->cfg.gc_interval == 0 || info->cfg.expire == 0)
@@ -1018,7 +1016,7 @@ static int __init hashlimit_mt_init(void)
 					    sizeof(struct dsthash_ent), 0, 0,
 					    NULL);
 	if (!hashlimit_cachep) {
-		printk(KERN_ERR "xt_hashlimit: unable to create slab cache\n");
+		pr_warning("unable to create slab cache\n");
 		goto err2;
 	}
 	return 0;
diff --git a/net/netfilter/xt_helper.c b/net/netfilter/xt_helper.c
index 64fc7f277221..482aff2ccf7c 100644
--- a/net/netfilter/xt_helper.c
+++ b/net/netfilter/xt_helper.c
@@ -6,7 +6,7 @@
  * it under the terms of the GNU General Public License version 2 as
  * published by the Free Software Foundation.
  */
-
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
 #include <linux/module.h>
 #include <linux/skbuff.h>
 #include <linux/netfilter.h>
@@ -59,8 +59,8 @@ static bool helper_mt_check(const struct xt_mtchk_param *par)
 	struct xt_helper_info *info = par->matchinfo;
 
 	if (nf_ct_l3proto_try_module_get(par->family) < 0) {
-		printk(KERN_WARNING "can't load conntrack support for "
-				    "proto=%u\n", par->family);
+		pr_info("cannot load conntrack support for proto=%u\n",
+			par->family);
 		return false;
 	}
 	info->name[29] = '\0';
diff --git a/net/netfilter/xt_hl.c b/net/netfilter/xt_hl.c
index 7726154c87b2..be53f7299623 100644
--- a/net/netfilter/xt_hl.c
+++ b/net/netfilter/xt_hl.c
@@ -39,10 +39,6 @@ static bool ttl_mt(const struct sk_buff *skb, const struct xt_match_param *par)
 			return ttl < info->ttl;
 		case IPT_TTL_GT:
 			return ttl > info->ttl;
-		default:
-			printk(KERN_WARNING "ipt_ttl: unknown mode %d\n",
-				info->mode);
-			return false;
 	}
 
 	return false;
@@ -56,20 +52,12 @@ static bool hl_mt6(const struct sk_buff *skb, const struct xt_match_param *par)
 	switch (info->mode) {
 		case IP6T_HL_EQ:
 			return ip6h->hop_limit == info->hop_limit;
-			break;
 		case IP6T_HL_NE:
 			return ip6h->hop_limit != info->hop_limit;
-			break;
 		case IP6T_HL_LT:
 			return ip6h->hop_limit < info->hop_limit;
-			break;
 		case IP6T_HL_GT:
 			return ip6h->hop_limit > info->hop_limit;
-			break;
-		default:
-			printk(KERN_WARNING "ip6t_hl: unknown mode %d\n",
-				info->mode);
-			return false;
 	}
 
 	return false;
diff --git a/net/netfilter/xt_limit.c b/net/netfilter/xt_limit.c
index a0ca5339af41..b3dfca63fa52 100644
--- a/net/netfilter/xt_limit.c
+++ b/net/netfilter/xt_limit.c
@@ -5,7 +5,7 @@
  * it under the terms of the GNU General Public License version 2 as
  * published by the Free Software Foundation.
  */
-
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
 #include <linux/module.h>
 #include <linux/skbuff.h>
 #include <linux/spinlock.h>
@@ -105,8 +105,8 @@ static bool limit_mt_check(const struct xt_mtchk_param *par)
 	/* Check for overflow. */
 	if (r->burst == 0
 	    || user2credits(r->avg * r->burst) < user2credits(r->avg)) {
-		printk("Overflow in xt_limit, try lower: %u/%u\n",
-		       r->avg, r->burst);
+		pr_info("Overflow, try lower: %u/%u\n",
+			r->avg, r->burst);
 		return false;
 	}
 
diff --git a/net/netfilter/xt_osf.c b/net/netfilter/xt_osf.c
index 4169e200588d..8dcde13a0781 100644
--- a/net/netfilter/xt_osf.c
+++ b/net/netfilter/xt_osf.c
@@ -16,7 +16,7 @@
  * along with this program; if not, write to the Free Software
  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
  */
-
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
 #include <linux/module.h>
 #include <linux/kernel.h>
 
@@ -382,14 +382,14 @@ static int __init xt_osf_init(void)
 
 	err = nfnetlink_subsys_register(&xt_osf_nfnetlink);
 	if (err < 0) {
-		printk(KERN_ERR "Failed (%d) to register OSF nsfnetlink helper.\n", err);
+		pr_err("Failed to register OSF nsfnetlink helper (%d)\n", err);
 		goto err_out_exit;
 	}
 
 	err = xt_register_match(&xt_osf_match);
 	if (err) {
-		printk(KERN_ERR "Failed (%d) to register OS fingerprint "
-				"matching module.\n", err);
+		pr_err("Failed to register OS fingerprint "
+		       "matching module (%d)\n", err);
 		goto err_out_remove;
 	}
 
diff --git a/net/netfilter/xt_physdev.c b/net/netfilter/xt_physdev.c
index 8d28ca5848bc..3d42a278408f 100644
--- a/net/netfilter/xt_physdev.c
+++ b/net/netfilter/xt_physdev.c
@@ -7,7 +7,7 @@
  * it under the terms of the GNU General Public License version 2 as
  * published by the Free Software Foundation.
  */
-
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
 #include <linux/module.h>
 #include <linux/skbuff.h>
 #include <linux/netfilter_bridge.h>
@@ -95,9 +95,9 @@ static bool physdev_mt_check(const struct xt_mtchk_param *par)
 	     info->invert & XT_PHYSDEV_OP_BRIDGED) &&
 	    par->hook_mask & ((1 << NF_INET_LOCAL_OUT) |
 	    (1 << NF_INET_FORWARD) | (1 << NF_INET_POST_ROUTING))) {
-		printk(KERN_WARNING "physdev match: using --physdev-out in the "
-		       "OUTPUT, FORWARD and POSTROUTING chains for non-bridged "
-		       "traffic is not supported anymore.\n");
+		pr_info("using --physdev-out in the OUTPUT, FORWARD and "
+			"POSTROUTING chains for non-bridged traffic is not "
+			"supported anymore.\n");
 		if (par->hook_mask & (1 << NF_INET_LOCAL_OUT))
 			return false;
 	}
diff --git a/net/netfilter/xt_policy.c b/net/netfilter/xt_policy.c
index cc033d2c35ea..de3aded6afb8 100644
--- a/net/netfilter/xt_policy.c
+++ b/net/netfilter/xt_policy.c
@@ -6,7 +6,7 @@
  * it under the terms of the GNU General Public License version 2 as
  * published by the Free Software Foundation.
  */
-
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/skbuff.h>
@@ -133,24 +133,21 @@ static bool policy_mt_check(const struct xt_mtchk_param *par)
 	const struct xt_policy_info *info = par->matchinfo;
 
 	if (!(info->flags & (XT_POLICY_MATCH_IN|XT_POLICY_MATCH_OUT))) {
-		printk(KERN_ERR "xt_policy: neither incoming nor "
-				"outgoing policy selected\n");
+		pr_info("neither incoming nor outgoing policy selected\n");
 		return false;
 	}
 	if (par->hook_mask & ((1 << NF_INET_PRE_ROUTING) |
 	    (1 << NF_INET_LOCAL_IN)) && info->flags & XT_POLICY_MATCH_OUT) {
-		printk(KERN_ERR "xt_policy: output policy not valid in "
-				"PRE_ROUTING and INPUT\n");
+		pr_info("output policy not valid in PREROUTING and INPUT\n");
 		return false;
 	}
 	if (par->hook_mask & ((1 << NF_INET_POST_ROUTING) |
 	    (1 << NF_INET_LOCAL_OUT)) && info->flags & XT_POLICY_MATCH_IN) {
-		printk(KERN_ERR "xt_policy: input policy not valid in "
-				"POST_ROUTING and OUTPUT\n");
+		pr_info("input policy not valid in POSTROUTING and OUTPUT\n");
 		return false;
 	}
 	if (info->len > XT_POLICY_MAX_ELEM) {
-		printk(KERN_ERR "xt_policy: too many policy elements\n");
+		pr_info("too many policy elements\n");
 		return false;
 	}
 	return true;
diff --git a/net/netfilter/xt_recent.c b/net/netfilter/xt_recent.c
index a0ea1a21c470..aa9817e91338 100644
--- a/net/netfilter/xt_recent.c
+++ b/net/netfilter/xt_recent.c
@@ -12,6 +12,7 @@
  * Author: Stephen Frost <sfrost@snowman.net>
  * Copyright 2002-2003, Stephen Frost, 2.5.x port by laforge@netfilter.org
  */
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
 #include <linux/init.h>
 #include <linux/ip.h>
 #include <linux/ipv6.h>
@@ -520,7 +521,7 @@ recent_mt_proc_write(struct file *file, const char __user *input,
 		add = true;
 		break;
 	default:
-		printk(KERN_INFO KBUILD_MODNAME ": Need +ip, -ip or /\n");
+		pr_info("Need \"+ip\", \"-ip\" or \"/\"\n");
 		return -EINVAL;
 	}
 
@@ -535,8 +536,7 @@ recent_mt_proc_write(struct file *file, const char __user *input,
 	}
 
 	if (!succ) {
-		printk(KERN_INFO KBUILD_MODNAME ": illegal address written "
-		       "to procfs\n");
+		pr_info("illegal address written to procfs\n");
 		return -EINVAL;
 	}
 
diff --git a/net/netfilter/xt_state.c b/net/netfilter/xt_state.c
index ef09b2e2970f..94893be80276 100644
--- a/net/netfilter/xt_state.c
+++ b/net/netfilter/xt_state.c
@@ -40,8 +40,8 @@ state_mt(const struct sk_buff *skb, const struct xt_match_param *par)
 static bool state_mt_check(const struct xt_mtchk_param *par)
 {
 	if (nf_ct_l3proto_try_module_get(par->family) < 0) {
-		printk(KERN_WARNING "can't load conntrack support for "
-				    "proto=%u\n", par->family);
+		pr_info("cannot load conntrack support for proto=%u\n",
+			par->family);
 		return false;
 	}
 	return true;
-- 
cgit v1.2.2


From 1e94d72feab025b8f7c55d07020602f82f3a97dd Mon Sep 17 00:00:00 2001
From: Tom Herbert <therbert@google.com>
Date: Thu, 18 Mar 2010 17:45:44 -0700
Subject: rps: Fixed build with CONFIG_SMP not enabled.

Signed-off-by: Tom Herbert <therbert@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/dev.c | 24 ++++++++++++++++++++++++
 1 file changed, 24 insertions(+)

(limited to 'net')

diff --git a/net/core/dev.c b/net/core/dev.c
index 17b168671501..1a7e1d1d5ad9 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -2174,6 +2174,7 @@ int weight_p __read_mostly = 64;            /* old backlog weight */
 
 DEFINE_PER_CPU(struct netif_rx_stats, netdev_rx_stat) = { 0, };
 
+#ifdef CONFIG_SMP
 /*
  * get_rps_cpu is called from netif_receive_skb and returns the target
  * CPU from the RPS map of the receiving queue for a given skb.
@@ -2293,6 +2294,7 @@ static void trigger_softirq(void *data)
 	__napi_schedule(&queue->backlog);
 	__get_cpu_var(netdev_rx_stat).received_rps++;
 }
+#endif /* CONFIG_SMP */
 
 /*
  * enqueue_to_backlog is called to queue an skb to a per CPU backlog
@@ -2320,6 +2322,7 @@ enqueue:
 
 		/* Schedule NAPI for backlog device */
 		if (napi_schedule_prep(&queue->backlog)) {
+#ifdef CONFIG_SMP
 			if (cpu != smp_processor_id()) {
 				struct rps_remote_softirq_cpus *rcpus =
 				    &__get_cpu_var(rps_remote_softirq_cpus);
@@ -2328,6 +2331,9 @@ enqueue:
 				__raise_softirq_irqoff(NET_RX_SOFTIRQ);
 			} else
 				__napi_schedule(&queue->backlog);
+#else
+			__napi_schedule(&queue->backlog);
+#endif
 		}
 		goto enqueue;
 	}
@@ -2367,9 +2373,13 @@ int netif_rx(struct sk_buff *skb)
 	if (!skb->tstamp.tv64)
 		net_timestamp(skb);
 
+#ifdef CONFIG_SMP
 	cpu = get_rps_cpu(skb->dev, skb);
 	if (cpu < 0)
 		cpu = smp_processor_id();
+#else
+	cpu = smp_processor_id();
+#endif
 
 	return enqueue_to_backlog(skb, cpu);
 }
@@ -2735,6 +2745,7 @@ out:
  */
 int netif_receive_skb(struct sk_buff *skb)
 {
+#ifdef CONFIG_SMP
 	int cpu;
 
 	cpu = get_rps_cpu(skb->dev, skb);
@@ -2743,6 +2754,9 @@ int netif_receive_skb(struct sk_buff *skb)
 		return __netif_receive_skb(skb);
 	else
 		return enqueue_to_backlog(skb, cpu);
+#else
+	return __netif_receive_skb(skb);
+#endif
 }
 EXPORT_SYMBOL(netif_receive_skb);
 
@@ -3168,6 +3182,7 @@ void netif_napi_del(struct napi_struct *napi)
 }
 EXPORT_SYMBOL(netif_napi_del);
 
+#ifdef CONFIG_SMP
 /*
  * net_rps_action sends any pending IPI's for rps.  This is only called from
  * softirq and interrupts must be enabled.
@@ -3184,6 +3199,7 @@ static void net_rps_action(cpumask_t *mask)
 	}
 	cpus_clear(*mask);
 }
+#endif
 
 static void net_rx_action(struct softirq_action *h)
 {
@@ -3191,8 +3207,10 @@ static void net_rx_action(struct softirq_action *h)
 	unsigned long time_limit = jiffies + 2;
 	int budget = netdev_budget;
 	void *have;
+#ifdef CONFIG_SMP
 	int select;
 	struct rps_remote_softirq_cpus *rcpus;
+#endif
 
 	local_irq_disable();
 
@@ -3255,6 +3273,7 @@ static void net_rx_action(struct softirq_action *h)
 		netpoll_poll_unlock(have);
 	}
 out:
+#ifdef CONFIG_SMP
 	rcpus = &__get_cpu_var(rps_remote_softirq_cpus);
 	select = rcpus->select;
 	rcpus->select ^= 1;
@@ -3262,6 +3281,9 @@ out:
 	local_irq_enable();
 
 	net_rps_action(&rcpus->mask[select]);
+#else
+	local_irq_enable();
+#endif
 
 #ifdef CONFIG_NET_DMA
 	/*
@@ -6204,9 +6226,11 @@ static int __init net_dev_init(void)
 		queue->completion_queue = NULL;
 		INIT_LIST_HEAD(&queue->poll_list);
 
+#ifdef CONFIG_SMP
 		queue->csd.func = trigger_softirq;
 		queue->csd.info = queue;
 		queue->csd.flags = 0;
+#endif
 
 		queue->backlog.poll = process_backlog;
 		queue->backlog.weight = weight_p;
-- 
cgit v1.2.2


From 93d9b7d7a85cfb4e1711d5226eba73586dd4919f Mon Sep 17 00:00:00 2001
From: Jiri Pirko <jpirko@redhat.com>
Date: Wed, 10 Mar 2010 10:28:56 +0000
Subject: net: rename notifier defines for netdev type change

Since generally there could be more netdevices changing type other
than bonding, making this event type name "bonding-unrelated"

Signed-off-by: Jiri Pirko <jpirko@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/devinet.c  |  4 ++--
 net/ipv6/addrconf.c | 16 ++++++++--------
 2 files changed, 10 insertions(+), 10 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index 51ca946e3392..c75320ef95c2 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -1095,10 +1095,10 @@ static int inetdev_event(struct notifier_block *this, unsigned long event,
 	case NETDEV_DOWN:
 		ip_mc_down(in_dev);
 		break;
-	case NETDEV_BONDING_OLDTYPE:
+	case NETDEV_PRE_TYPE_CHANGE:
 		ip_mc_unmap(in_dev);
 		break;
-	case NETDEV_BONDING_NEWTYPE:
+	case NETDEV_POST_TYPE_CHANGE:
 		ip_mc_remap(in_dev);
 		break;
 	case NETDEV_CHANGEMTU:
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 3381b4317c27..8d41abc40db5 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -137,8 +137,8 @@ static DEFINE_SPINLOCK(addrconf_verify_lock);
 static void addrconf_join_anycast(struct inet6_ifaddr *ifp);
 static void addrconf_leave_anycast(struct inet6_ifaddr *ifp);
 
-static void addrconf_bonding_change(struct net_device *dev,
-				    unsigned long event);
+static void addrconf_type_change(struct net_device *dev,
+				 unsigned long event);
 static int addrconf_ifdown(struct net_device *dev, int how);
 
 static void addrconf_dad_start(struct inet6_ifaddr *ifp, u32 flags);
@@ -2584,9 +2584,9 @@ static int addrconf_notify(struct notifier_block *this, unsigned long event,
 				return notifier_from_errno(err);
 		}
 		break;
-	case NETDEV_BONDING_OLDTYPE:
-	case NETDEV_BONDING_NEWTYPE:
-		addrconf_bonding_change(dev, event);
+	case NETDEV_PRE_TYPE_CHANGE:
+	case NETDEV_POST_TYPE_CHANGE:
+		addrconf_type_change(dev, event);
 		break;
 	}
 
@@ -2601,16 +2601,16 @@ static struct notifier_block ipv6_dev_notf = {
 	.priority = 0
 };
 
-static void addrconf_bonding_change(struct net_device *dev, unsigned long event)
+static void addrconf_type_change(struct net_device *dev, unsigned long event)
 {
 	struct inet6_dev *idev;
 	ASSERT_RTNL();
 
 	idev = __in6_dev_get(dev);
 
-	if (event == NETDEV_BONDING_NEWTYPE)
+	if (event == NETDEV_POST_TYPE_CHANGE)
 		ipv6_mc_remap(idev);
-	else if (event == NETDEV_BONDING_OLDTYPE)
+	else if (event == NETDEV_PRE_TYPE_CHANGE)
 		ipv6_mc_unmap(idev);
 }
 
-- 
cgit v1.2.2


From 3ca5b4042ecae5e73c59de62e4ac0db31c10e0f8 Mon Sep 17 00:00:00 2001
From: Jiri Pirko <jpirko@redhat.com>
Date: Wed, 10 Mar 2010 10:29:35 +0000
Subject: bonding: check return value of nofitier when changing type

This patch adds the possibility to refuse the bonding type change for
other subsystems (such as for example bridge, vlan, etc.)

Signed-off-by: Jiri Pirko <jpirko@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/dev.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/core/dev.c b/net/core/dev.c
index 1a7e1d1d5ad9..d1f027c41e73 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -1084,9 +1084,9 @@ void netdev_state_change(struct net_device *dev)
 }
 EXPORT_SYMBOL(netdev_state_change);
 
-void netdev_bonding_change(struct net_device *dev, unsigned long event)
+int netdev_bonding_change(struct net_device *dev, unsigned long event)
 {
-	call_netdevice_notifiers(event, dev);
+	return call_netdevice_notifiers(event, dev);
 }
 EXPORT_SYMBOL(netdev_bonding_change);
 
-- 
cgit v1.2.2


From 1c01fe14a87332cc88266fbd6e598319322eb96f Mon Sep 17 00:00:00 2001
From: Jiri Pirko <jpirko@redhat.com>
Date: Wed, 10 Mar 2010 10:30:19 +0000
Subject: net: forbid underlaying devices to change its type

It's not desired for underlaying devices to change type. At the time,
there is for example possible to have bond with changed type from
Ethernet to Infiniband as a port of a bridge. This patch fixes this.

Signed-off-by: Jiri Pirko <jpirko@redhat.com>
Signed-off-by: Jay Vosburgh <fubar@us.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/8021q/vlan.c       | 4 ++++
 net/bridge/br_notify.c | 4 ++++
 2 files changed, 8 insertions(+)

(limited to 'net')

diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c
index 453512266ea1..c39a5f41169c 100644
--- a/net/8021q/vlan.c
+++ b/net/8021q/vlan.c
@@ -530,6 +530,10 @@ static int vlan_device_event(struct notifier_block *unused, unsigned long event,
 		}
 		unregister_netdevice_many(&list);
 		break;
+
+	case NETDEV_PRE_TYPE_CHANGE:
+		/* Forbid underlaying device to change its type. */
+		return NOTIFY_BAD;
 	}
 
 out:
diff --git a/net/bridge/br_notify.c b/net/bridge/br_notify.c
index 763a3ec292e5..1413b72acc7f 100644
--- a/net/bridge/br_notify.c
+++ b/net/bridge/br_notify.c
@@ -82,6 +82,10 @@ static int br_device_event(struct notifier_block *unused, unsigned long event, v
 	case NETDEV_UNREGISTER:
 		br_del_if(br, dev);
 		break;
+
+	case NETDEV_PRE_TYPE_CHANGE:
+		/* Forbid underlaying device to change its type. */
+		return NOTIFY_BAD;
 	}
 
 	/* Events that may cause spanning tree to refresh */
-- 
cgit v1.2.2


From b634f87522dff87712df8bda2a6c9061954d552a Mon Sep 17 00:00:00 2001
From: Alexandra Kossovsky <Alexandra.Kossovsky@oktetlabs.ru>
Date: Thu, 18 Mar 2010 20:29:24 -0700
Subject: tcp: Fix OOB POLLIN avoidance.

From: Alexandra.Kossovsky@oktetlabs.ru

Fixes kernel bugzilla #15541

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 5901010fad55..ae16f809e716 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -429,7 +429,7 @@ unsigned int tcp_poll(struct file *file, struct socket *sock, poll_table *wait)
 		if (tp->urg_seq == tp->copied_seq &&
 		    !sock_flag(sk, SOCK_URGINLINE) &&
 		    tp->urg_data)
-			target--;
+			target++;
 
 		/* Potential race condition. If read of tp below will
 		 * escape above sk->sk_state, we can be illegally awaken
-- 
cgit v1.2.2


From 0641e4fbf2f824faee00ea74c459a088d94905fd Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Thu, 18 Mar 2010 21:16:45 -0700
Subject: net: Potential null skb->dev dereference

When doing "ifenslave -d bond0 eth0", there is chance to get NULL
dereference in netif_receive_skb(), because dev->master suddenly becomes
NULL after we tested it.

We should use ACCESS_ONCE() to avoid this (or rcu_dereference())

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/8021q/vlan_core.c | 4 ++--
 net/core/dev.c        | 8 +++++---
 2 files changed, 7 insertions(+), 5 deletions(-)

(limited to 'net')

diff --git a/net/8021q/vlan_core.c b/net/8021q/vlan_core.c
index c0316e0ca6e8..c584a0af77d3 100644
--- a/net/8021q/vlan_core.c
+++ b/net/8021q/vlan_core.c
@@ -11,7 +11,7 @@ int __vlan_hwaccel_rx(struct sk_buff *skb, struct vlan_group *grp,
 	if (netpoll_rx(skb))
 		return NET_RX_DROP;
 
-	if (skb_bond_should_drop(skb))
+	if (skb_bond_should_drop(skb, ACCESS_ONCE(skb->dev->master)))
 		goto drop;
 
 	skb->skb_iif = skb->dev->ifindex;
@@ -83,7 +83,7 @@ vlan_gro_common(struct napi_struct *napi, struct vlan_group *grp,
 {
 	struct sk_buff *p;
 
-	if (skb_bond_should_drop(skb))
+	if (skb_bond_should_drop(skb, ACCESS_ONCE(skb->dev->master)))
 		goto drop;
 
 	skb->skb_iif = skb->dev->ifindex;
diff --git a/net/core/dev.c b/net/core/dev.c
index bcc490cc9452..59d4394d2ce8 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -2483,6 +2483,7 @@ int netif_receive_skb(struct sk_buff *skb)
 {
 	struct packet_type *ptype, *pt_prev;
 	struct net_device *orig_dev;
+	struct net_device *master;
 	struct net_device *null_or_orig;
 	struct net_device *null_or_bond;
 	int ret = NET_RX_DROP;
@@ -2503,11 +2504,12 @@ int netif_receive_skb(struct sk_buff *skb)
 
 	null_or_orig = NULL;
 	orig_dev = skb->dev;
-	if (orig_dev->master) {
-		if (skb_bond_should_drop(skb))
+	master = ACCESS_ONCE(orig_dev->master);
+	if (master) {
+		if (skb_bond_should_drop(skb, master))
 			null_or_orig = orig_dev; /* deliver only exact match */
 		else
-			skb->dev = orig_dev->master;
+			skb->dev = master;
 	}
 
 	__get_cpu_var(netdev_rx_stat).total++;
-- 
cgit v1.2.2


From 1159683ef48469de71dc26f0ee1a9c30d131cf89 Mon Sep 17 00:00:00 2001
From: Zhitong Wang <zhitong.wangzt@alibaba-inc.com>
Date: Fri, 19 Mar 2010 16:01:54 +0100
Subject: netfilter: remove unused headers in net/ipv6/netfilter/ip6t_LOG.c

Remove unused headers in net/ipv6/netfilter/ip6t_LOG.c

Signed-off-by: Zhitong Wang <zhitong.wangzt@alibaba-inc.com>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 net/ipv6/netfilter/ip6t_LOG.c | 1 -
 1 file changed, 1 deletion(-)

(limited to 'net')

diff --git a/net/ipv6/netfilter/ip6t_LOG.c b/net/ipv6/netfilter/ip6t_LOG.c
index b285fdf19050..e16c0c7d086d 100644
--- a/net/ipv6/netfilter/ip6t_LOG.c
+++ b/net/ipv6/netfilter/ip6t_LOG.c
@@ -11,7 +11,6 @@
  */
 
 #include <linux/module.h>
-#include <linux/moduleparam.h>
 #include <linux/skbuff.h>
 #include <linux/if_arp.h>
 #include <linux/ip.h>
-- 
cgit v1.2.2


From 2c46cd8163b25bf6f38e612e9f1d162f0357c8f1 Mon Sep 17 00:00:00 2001
From: Zhitong Wang <zhitong.wangzt@alibaba-inc.com>
Date: Fri, 19 Mar 2010 16:04:10 +0100
Subject: netfilter: remove unused headers in net/ipv4/netfilter/nf_nat_h323.c

Remove unused headers in net/ipv4/netfilter/nf_nat_h323.c

Signed-off-by: Zhitong Wang <zhitong.wangzt@alibaba-inc.com>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 net/ipv4/netfilter/nf_nat_h323.c | 1 -
 1 file changed, 1 deletion(-)

(limited to 'net')

diff --git a/net/ipv4/netfilter/nf_nat_h323.c b/net/ipv4/netfilter/nf_nat_h323.c
index 7e8e6fc75413..d4c061874f8f 100644
--- a/net/ipv4/netfilter/nf_nat_h323.c
+++ b/net/ipv4/netfilter/nf_nat_h323.c
@@ -10,7 +10,6 @@
  */
 
 #include <linux/module.h>
-#include <linux/moduleparam.h>
 #include <linux/tcp.h>
 #include <net/tcp.h>
 
-- 
cgit v1.2.2


From 819bfecc4fc6b6e5a793f719a45b7146ce423b79 Mon Sep 17 00:00:00 2001
From: "florian@mickler.org" <florian@mickler.org>
Date: Sat, 13 Mar 2010 13:31:05 +0100
Subject: rename new rfkill sysfs knobs

This patch renames the (never officially released) sysfs-knobs
"blocked_hw" and "blocked_sw" to "hard" and "soft", as the hardware vs
software conotation is misleading.

It also gets rid of not needed locks around u32-read-access.

Signed-off-by: Florian Mickler <florian@mickler.org>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/rfkill/core.c | 35 ++++++++---------------------------
 1 file changed, 8 insertions(+), 27 deletions(-)

(limited to 'net')

diff --git a/net/rfkill/core.c b/net/rfkill/core.c
index 5f33151d70ae..7ae58b5b5a08 100644
--- a/net/rfkill/core.c
+++ b/net/rfkill/core.c
@@ -628,37 +628,25 @@ static ssize_t rfkill_persistent_show(struct device *dev,
 	return sprintf(buf, "%d\n", rfkill->persistent);
 }
 
-static ssize_t rfkill_blocked_hw_show(struct device *dev,
+static ssize_t rfkill_hard_show(struct device *dev,
 				 struct device_attribute *attr,
 				 char *buf)
 {
 	struct rfkill *rfkill = to_rfkill(dev);
-	unsigned long flags;
-	u32 state;
 
-	spin_lock_irqsave(&rfkill->lock, flags);
-	state = rfkill->state;
-	spin_unlock_irqrestore(&rfkill->lock, flags);
-
-	return sprintf(buf, "%d\n", (state & RFKILL_BLOCK_HW) ? 1 : 0 );
+	return sprintf(buf, "%d\n", (rfkill->state & RFKILL_BLOCK_HW) ? 1 : 0 );
 }
 
-static ssize_t rfkill_blocked_sw_show(struct device *dev,
+static ssize_t rfkill_soft_show(struct device *dev,
 				 struct device_attribute *attr,
 				 char *buf)
 {
 	struct rfkill *rfkill = to_rfkill(dev);
-	unsigned long flags;
-	u32 state;
-
-	spin_lock_irqsave(&rfkill->lock, flags);
-	state = rfkill->state;
-	spin_unlock_irqrestore(&rfkill->lock, flags);
 
-	return sprintf(buf, "%d\n", (state & RFKILL_BLOCK_SW) ? 1 : 0 );
+	return sprintf(buf, "%d\n", (rfkill->state & RFKILL_BLOCK_SW) ? 1 : 0 );
 }
 
-static ssize_t rfkill_blocked_sw_store(struct device *dev,
+static ssize_t rfkill_soft_store(struct device *dev,
 				  struct device_attribute *attr,
 				  const char *buf, size_t count)
 {
@@ -698,14 +686,8 @@ static ssize_t rfkill_state_show(struct device *dev,
 				 char *buf)
 {
 	struct rfkill *rfkill = to_rfkill(dev);
-	unsigned long flags;
-	u32 state;
-
-	spin_lock_irqsave(&rfkill->lock, flags);
-	state = rfkill->state;
-	spin_unlock_irqrestore(&rfkill->lock, flags);
 
-	return sprintf(buf, "%d\n", user_state_from_blocked(state));
+	return sprintf(buf, "%d\n", user_state_from_blocked(rfkill->state));
 }
 
 static ssize_t rfkill_state_store(struct device *dev,
@@ -755,9 +737,8 @@ static struct device_attribute rfkill_dev_attrs[] = {
 	__ATTR(persistent, S_IRUGO, rfkill_persistent_show, NULL),
 	__ATTR(state, S_IRUGO|S_IWUSR, rfkill_state_show, rfkill_state_store),
 	__ATTR(claim, S_IRUGO|S_IWUSR, rfkill_claim_show, rfkill_claim_store),
-	__ATTR(sw, S_IRUGO|S_IWUSR, rfkill_blocked_sw_show,
-			rfkill_blocked_sw_store),
-	__ATTR(hw, S_IRUGO, rfkill_blocked_hw_show, NULL),
+	__ATTR(soft, S_IRUGO|S_IWUSR, rfkill_soft_show, rfkill_soft_store),
+	__ATTR(hard, S_IRUGO, rfkill_hard_show, NULL),
 	__ATTR_NULL
 };
 
-- 
cgit v1.2.2


From d11a4dc18bf41719c9f0d7ed494d295dd2973b92 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Timo=20Ter=C3=A4s?= <timo.teras@iki.fi>
Date: Thu, 18 Mar 2010 23:20:20 +0000
Subject: ipv4: check rt_genid in dst_check

Xfrm_dst keeps a reference to ipv4 rtable entries on each
cached bundle. The only way to renew xfrm_dst when the underlying
route has changed, is to implement dst_check for this. This is
what ipv6 side does too.

The problems started after 87c1e12b5eeb7b30b4b41291bef8e0b41fc3dde9
("ipsec: Fix bogus bundle flowi") which fixed a bug causing xfrm_dst
to not get reused, until that all lookups always generated new
xfrm_dst with new route reference and path mtu worked. But after the
fix, the old routes started to get reused even after they were expired
causing pmtu to break (well it would occationally work if the rtable
gc had run recently and marked the route obsolete causing dst_check to
get called).

Signed-off-by: Timo Teras <timo.teras@iki.fi>
Acked-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/route.c | 14 ++++++++++----
 1 file changed, 10 insertions(+), 4 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index a770df2493d2..32d396196df8 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -1441,7 +1441,7 @@ void ip_rt_redirect(__be32 old_gw, __be32 daddr, __be32 new_gw,
 					dev_hold(rt->u.dst.dev);
 				if (rt->idev)
 					in_dev_hold(rt->idev);
-				rt->u.dst.obsolete	= 0;
+				rt->u.dst.obsolete	= -1;
 				rt->u.dst.lastuse	= jiffies;
 				rt->u.dst.path		= &rt->u.dst;
 				rt->u.dst.neighbour	= NULL;
@@ -1506,7 +1506,7 @@ static struct dst_entry *ipv4_negative_advice(struct dst_entry *dst)
 	struct dst_entry *ret = dst;
 
 	if (rt) {
-		if (dst->obsolete) {
+		if (dst->obsolete > 0) {
 			ip_rt_put(rt);
 			ret = NULL;
 		} else if ((rt->rt_flags & RTCF_REDIRECTED) ||
@@ -1726,7 +1726,9 @@ static void ip_rt_update_pmtu(struct dst_entry *dst, u32 mtu)
 
 static struct dst_entry *ipv4_dst_check(struct dst_entry *dst, u32 cookie)
 {
-	return NULL;
+	if (rt_is_expired((struct rtable *)dst))
+		return NULL;
+	return dst;
 }
 
 static void ipv4_dst_destroy(struct dst_entry *dst)
@@ -1888,7 +1890,8 @@ static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr,
 	if (!rth)
 		goto e_nobufs;
 
-	rth->u.dst.output= ip_rt_bug;
+	rth->u.dst.output = ip_rt_bug;
+	rth->u.dst.obsolete = -1;
 
 	atomic_set(&rth->u.dst.__refcnt, 1);
 	rth->u.dst.flags= DST_HOST;
@@ -2054,6 +2057,7 @@ static int __mkroute_input(struct sk_buff *skb,
 	rth->fl.oif 	= 0;
 	rth->rt_spec_dst= spec_dst;
 
+	rth->u.dst.obsolete = -1;
 	rth->u.dst.input = ip_forward;
 	rth->u.dst.output = ip_output;
 	rth->rt_genid = rt_genid(dev_net(rth->u.dst.dev));
@@ -2218,6 +2222,7 @@ local_input:
 		goto e_nobufs;
 
 	rth->u.dst.output= ip_rt_bug;
+	rth->u.dst.obsolete = -1;
 	rth->rt_genid = rt_genid(net);
 
 	atomic_set(&rth->u.dst.__refcnt, 1);
@@ -2444,6 +2449,7 @@ static int __mkroute_output(struct rtable **result,
 	rth->rt_spec_dst= fl->fl4_src;
 
 	rth->u.dst.output=ip_output;
+	rth->u.dst.obsolete = -1;
 	rth->rt_genid = rt_genid(dev_net(dev_out));
 
 	RT_CACHE_STAT_INC(out_slow_tot);
-- 
cgit v1.2.2


From 10414444cb8a8ee8893e00390b7cf40502e28352 Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Thu, 18 Mar 2010 23:00:22 +0000
Subject: ipv6: Remove redundant dst NULL check in ip6_dst_check

As the only path leading to ip6_dst_check makes an indirect call
through dst->ops, dst cannot be NULL in ip6_dst_check.

This patch removes this check in case it misleads people who
come across this code.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/route.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 52cd3eff31dc..7fcb0e5d1213 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -879,7 +879,7 @@ static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
 
 	rt = (struct rt6_info *) dst;
 
-	if (rt && rt->rt6i_node && (rt->rt6i_node->fn_sernum == cookie))
+	if (rt->rt6i_node && (rt->rt6i_node->fn_sernum == cookie))
 		return dst;
 
 	return NULL;
-- 
cgit v1.2.2


From 97e3ecd112ba45eb217cddab59f48659bc15d9d0 Mon Sep 17 00:00:00 2001
From: stephen hemminger <shemminger@vyatta.com>
Date: Thu, 18 Mar 2010 11:27:32 +0000
Subject: TCP: check min TTL on received ICMP packets

This adds RFC5082 checks for TTL on received ICMP packets.
It adds some security against spoofed ICMP packets
disrupting GTSM protected sessions.

Signed-off-by: Stephen Hemminger <shemminger@vyatta.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp_ipv4.c | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'net')

diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 70df40980a87..f4df5f931f36 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -370,6 +370,11 @@ void tcp_v4_err(struct sk_buff *icmp_skb, u32 info)
 	if (sk->sk_state == TCP_CLOSE)
 		goto out;
 
+	if (unlikely(iph->ttl < inet_sk(sk)->min_ttl)) {
+		NET_INC_STATS_BH(net, LINUX_MIB_TCPMINTTLDROP);
+		goto out;
+	}
+
 	icsk = inet_csk(sk);
 	tp = tcp_sk(sk);
 	seq = ntohl(th->seq);
-- 
cgit v1.2.2


From a50436f2cd6e85794f7e1aad795ca8302177b896 Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Wed, 17 Mar 2010 06:04:14 +0000
Subject: net: ipmr/ip6mr: fix potential out-of-bounds vif_table access

mfc_parent of cache entries is used to index into the vif_table and is
initialised from mfcctl->mfcc_parent. This can take values of to 2^16-1,
while the vif_table has only MAXVIFS (32) entries. The same problem
affects ip6mr.

Refuse invalid values to fix a potential out-of-bounds access. Unlike
the other validity checks, this is checked in ipmr_mfc_add() instead of
the setsockopt handler since its unused in the delete path and might be
uninitialized.

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/ipmr.c  | 3 +++
 net/ipv6/ip6mr.c | 3 +++
 2 files changed, 6 insertions(+)

(limited to 'net')

diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
index 8582e12e4a62..0b9d03c54dc3 100644
--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c
@@ -802,6 +802,9 @@ static int ipmr_mfc_add(struct net *net, struct mfcctl *mfc, int mrtsock)
 	int line;
 	struct mfc_cache *uc, *c, **cp;
 
+	if (mfc->mfcc_parent >= MAXVIFS)
+		return -ENFILE;
+
 	line = MFC_HASH(mfc->mfcc_mcastgrp.s_addr, mfc->mfcc_origin.s_addr);
 
 	for (cp = &net->ipv4.mfc_cache_array[line];
diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c
index 52e0f74fdfe0..23e4ac0cc30e 100644
--- a/net/ipv6/ip6mr.c
+++ b/net/ipv6/ip6mr.c
@@ -1113,6 +1113,9 @@ static int ip6mr_mfc_add(struct net *net, struct mf6cctl *mfc, int mrtsock)
 	unsigned char ttls[MAXMIFS];
 	int i;
 
+	if (mfc->mf6cc_parent >= MAXMIFS)
+		return -ENFILE;
+
 	memset(ttls, 255, MAXMIFS);
 	for (i = 0; i < MAXMIFS; i++) {
 		if (IF_ISSET(i, &mfc->mf6cc_ifset))
-- 
cgit v1.2.2


From 6830c25b7d08fbbd922959425193791bc42079f2 Mon Sep 17 00:00:00 2001
From: Lennart Schulte <lennart.schulte@nets.rwth-aachen.de>
Date: Wed, 17 Mar 2010 02:16:29 +0000
Subject: tcp: Fix tcp_mark_head_lost() with packets == 0

A packet is marked as lost in case packets == 0, although nothing should be done.
This results in a too early retransmitted packet during recovery in some cases.
This small patch fixes this issue by returning immediately.

Signed-off-by: Lennart Schulte <lennart.schulte@nets.rwth-aachen.de>
Signed-off-by: Arnd Hannemann <hannemann@nets.rwth-aachen.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp_input.c | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'net')

diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 788851ca8c5d..c096a4218b8f 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -2511,6 +2511,9 @@ static void tcp_mark_head_lost(struct sock *sk, int packets)
 	int err;
 	unsigned int mss;
 
+	if (packets == 0)
+		return;
+
 	WARN_ON(packets > tp->packets_out);
 	if (tp->lost_skb_hint) {
 		skb = tp->lost_skb_hint;
-- 
cgit v1.2.2


From 73852e8151b7d7a529fbe019ab6d2d0c02d8f3f2 Mon Sep 17 00:00:00 2001
From: "Steven J. Magnani" <steve@digidescorp.com>
Date: Tue, 16 Mar 2010 05:22:44 +0000
Subject: NET_DMA: free skbs periodically

Under NET_DMA, data transfer can grind to a halt when userland issues a
large read on a socket with a high RCVLOWAT (i.e., 512 KB for both).
This appears to be because the NET_DMA design queues up lots of memcpy
operations, but doesn't issue or wait for them (and thus free the
associated skbs) until it is time for tcp_recvmesg() to return.
The socket hangs when its TCP window goes to zero before enough data is
available to satisfy the read.

Periodically issue asynchronous memcpy operations, and free skbs for ones
that have completed, to prevent sockets from going into zero-window mode.

Signed-off-by: Steven J. Magnani <steve@digidescorp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp.c | 63 +++++++++++++++++++++++++++++++++++++++-------------------
 1 file changed, 43 insertions(+), 20 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index ae16f809e716..6afb6d8662b2 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -1254,6 +1254,39 @@ static void tcp_prequeue_process(struct sock *sk)
 	tp->ucopy.memory = 0;
 }
 
+#ifdef CONFIG_NET_DMA
+static void tcp_service_net_dma(struct sock *sk, bool wait)
+{
+	dma_cookie_t done, used;
+	dma_cookie_t last_issued;
+	struct tcp_sock *tp = tcp_sk(sk);
+
+	if (!tp->ucopy.dma_chan)
+		return;
+
+	last_issued = tp->ucopy.dma_cookie;
+	dma_async_memcpy_issue_pending(tp->ucopy.dma_chan);
+
+	do {
+		if (dma_async_memcpy_complete(tp->ucopy.dma_chan,
+					      last_issued, &done,
+					      &used) == DMA_SUCCESS) {
+			/* Safe to free early-copied skbs now */
+			__skb_queue_purge(&sk->sk_async_wait_queue);
+			break;
+		} else {
+			struct sk_buff *skb;
+			while ((skb = skb_peek(&sk->sk_async_wait_queue)) &&
+			       (dma_async_is_complete(skb->dma_cookie, done,
+						      used) == DMA_SUCCESS)) {
+				__skb_dequeue(&sk->sk_async_wait_queue);
+				kfree_skb(skb);
+			}
+		}
+	} while (wait);
+}
+#endif
+
 static inline struct sk_buff *tcp_recv_skb(struct sock *sk, u32 seq, u32 *off)
 {
 	struct sk_buff *skb;
@@ -1546,6 +1579,10 @@ int tcp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
 			/* __ Set realtime policy in scheduler __ */
 		}
 
+#ifdef CONFIG_NET_DMA
+		if (tp->ucopy.dma_chan)
+			dma_async_memcpy_issue_pending(tp->ucopy.dma_chan);
+#endif
 		if (copied >= target) {
 			/* Do not sleep, just process backlog. */
 			release_sock(sk);
@@ -1554,6 +1591,7 @@ int tcp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
 			sk_wait_data(sk, &timeo);
 
 #ifdef CONFIG_NET_DMA
+		tcp_service_net_dma(sk, false);  /* Don't block */
 		tp->ucopy.wakeup = 0;
 #endif
 
@@ -1633,6 +1671,9 @@ do_prequeue:
 						copied = -EFAULT;
 					break;
 				}
+
+				dma_async_memcpy_issue_pending(tp->ucopy.dma_chan);
+
 				if ((offset + used) == skb->len)
 					copied_early = 1;
 
@@ -1702,27 +1743,9 @@ skip_copy:
 	}
 
 #ifdef CONFIG_NET_DMA
-	if (tp->ucopy.dma_chan) {
-		dma_cookie_t done, used;
-
-		dma_async_memcpy_issue_pending(tp->ucopy.dma_chan);
-
-		while (dma_async_memcpy_complete(tp->ucopy.dma_chan,
-						 tp->ucopy.dma_cookie, &done,
-						 &used) == DMA_IN_PROGRESS) {
-			/* do partial cleanup of sk_async_wait_queue */
-			while ((skb = skb_peek(&sk->sk_async_wait_queue)) &&
-			       (dma_async_is_complete(skb->dma_cookie, done,
-						      used) == DMA_SUCCESS)) {
-				__skb_dequeue(&sk->sk_async_wait_queue);
-				kfree_skb(skb);
-			}
-		}
+	tcp_service_net_dma(sk, true);  /* Wait for queue to drain */
+	tp->ucopy.dma_chan = NULL;
 
-		/* Safe to free early-copied skbs now */
-		__skb_queue_purge(&sk->sk_async_wait_queue);
-		tp->ucopy.dma_chan = NULL;
-	}
 	if (tp->ucopy.pinned_list) {
 		dma_unpin_iovec_pages(tp->ucopy.pinned_list);
 		tp->ucopy.pinned_list = NULL;
-- 
cgit v1.2.2


From 1a50307ba1826e4da0024e64b245ce4eadf7688a Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Thu, 18 Mar 2010 14:24:42 +0000
Subject: netlink: fix NETLINK_RECV_NO_ENOBUFS in netlink_set_err()

Currently, ENOBUFS errors are reported to the socket via
netlink_set_err() even if NETLINK_RECV_NO_ENOBUFS is set. However,
that should not happen. This fixes this problem and it changes the
prototype of netlink_set_err() to return the number of sockets that
have set the NETLINK_RECV_NO_ENOBUFS socket option. This return
value is used in the next patch in these bugfix series.

Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/netlink/af_netlink.c | 17 ++++++++++++++---
 1 file changed, 14 insertions(+), 3 deletions(-)

(limited to 'net')

diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index 320d0423a240..acbbae1e89b5 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -1093,6 +1093,7 @@ static inline int do_one_set_err(struct sock *sk,
 				 struct netlink_set_err_data *p)
 {
 	struct netlink_sock *nlk = nlk_sk(sk);
+	int ret = 0;
 
 	if (sk == p->exclude_sk)
 		goto out;
@@ -1104,10 +1105,15 @@ static inline int do_one_set_err(struct sock *sk,
 	    !test_bit(p->group - 1, nlk->groups))
 		goto out;
 
+	if (p->code == ENOBUFS && nlk->flags & NETLINK_RECV_NO_ENOBUFS) {
+		ret = 1;
+		goto out;
+	}
+
 	sk->sk_err = p->code;
 	sk->sk_error_report(sk);
 out:
-	return 0;
+	return ret;
 }
 
 /**
@@ -1116,12 +1122,16 @@ out:
  * @pid: the PID of a process that we want to skip (if any)
  * @groups: the broadcast group that will notice the error
  * @code: error code, must be negative (as usual in kernelspace)
+ *
+ * This function returns the number of broadcast listeners that have set the
+ * NETLINK_RECV_NO_ENOBUFS socket option.
  */
-void netlink_set_err(struct sock *ssk, u32 pid, u32 group, int code)
+int netlink_set_err(struct sock *ssk, u32 pid, u32 group, int code)
 {
 	struct netlink_set_err_data info;
 	struct hlist_node *node;
 	struct sock *sk;
+	int ret = 0;
 
 	info.exclude_sk = ssk;
 	info.pid = pid;
@@ -1132,9 +1142,10 @@ void netlink_set_err(struct sock *ssk, u32 pid, u32 group, int code)
 	read_lock(&nl_table_lock);
 
 	sk_for_each_bound(sk, node, &nl_table[ssk->sk_protocol].mc_list)
-		do_one_set_err(sk, &info);
+		ret += do_one_set_err(sk, &info);
 
 	read_unlock(&nl_table_lock);
+	return ret;
 }
 EXPORT_SYMBOL(netlink_set_err);
 
-- 
cgit v1.2.2


From 37b7ef7203240b3aba577bb1ff6765fe15225976 Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Tue, 16 Mar 2010 13:30:21 +0000
Subject: netfilter: ctnetlink: fix reliable event delivery if message building
 fails

This patch fixes a bug that allows to lose events when reliable
event delivery mode is used, ie. if NETLINK_BROADCAST_SEND_ERROR
and NETLINK_RECV_NO_ENOBUFS socket options are set.

Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/netfilter/nf_conntrack_netlink.c | 4 +++-
 net/netfilter/nfnetlink.c            | 4 ++--
 2 files changed, 5 insertions(+), 3 deletions(-)

(limited to 'net')

diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index 2b2af631d2b8..569410a85953 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -582,7 +582,9 @@ nla_put_failure:
 nlmsg_failure:
 	kfree_skb(skb);
 errout:
-	nfnetlink_set_err(net, 0, group, -ENOBUFS);
+	if (nfnetlink_set_err(net, 0, group, -ENOBUFS) > 0)
+		return -ENOBUFS;
+
 	return 0;
 }
 #endif /* CONFIG_NF_CONNTRACK_EVENTS */
diff --git a/net/netfilter/nfnetlink.c b/net/netfilter/nfnetlink.c
index 8eb0cc23ada3..6afa3d52ea5f 100644
--- a/net/netfilter/nfnetlink.c
+++ b/net/netfilter/nfnetlink.c
@@ -113,9 +113,9 @@ int nfnetlink_send(struct sk_buff *skb, struct net *net, u32 pid,
 }
 EXPORT_SYMBOL_GPL(nfnetlink_send);
 
-void nfnetlink_set_err(struct net *net, u32 pid, u32 group, int error)
+int nfnetlink_set_err(struct net *net, u32 pid, u32 group, int error)
 {
-	netlink_set_err(net->nfnl, pid, group, error);
+	return netlink_set_err(net->nfnl, pid, group, error);
 }
 EXPORT_SYMBOL_GPL(nfnetlink_set_err);
 
-- 
cgit v1.2.2


From 372e6c8f1f7b2bb68f9992d2e664925c73552a1d Mon Sep 17 00:00:00 2001
From: stephen hemminger <shemminger@vyatta.com>
Date: Wed, 17 Mar 2010 20:31:09 +0000
Subject: ipv6: convert temporary address list to list macros

Use list macros instead of open coded linked list.

Signed-off-by: Stephen Hemminger <shemminger@vyatta.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/addrconf.c | 30 ++++++++++++------------------
 1 file changed, 12 insertions(+), 18 deletions(-)

(limited to 'net')

diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 8d41abc40db5..f372f895cd41 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -401,6 +401,7 @@ static struct inet6_dev * ipv6_add_dev(struct net_device *dev)
 #endif
 
 #ifdef CONFIG_IPV6_PRIVACY
+	INIT_LIST_HEAD(&ndev->tempaddr_list);
 	setup_timer(&ndev->regen_timer, ipv6_regen_rndid, (unsigned long)ndev);
 	if ((dev->flags&IFF_LOOPBACK) ||
 	    dev->type == ARPHRD_TUNNEL ||
@@ -679,8 +680,7 @@ ipv6_add_addr(struct inet6_dev *idev, const struct in6_addr *addr, int pfxlen,
 
 #ifdef CONFIG_IPV6_PRIVACY
 	if (ifa->flags&IFA_F_TEMPORARY) {
-		ifa->tmp_next = idev->tempaddr_list;
-		idev->tempaddr_list = ifa;
+		list_add(&ifa->tmp_list, &idev->tempaddr_list);
 		in6_ifa_hold(ifa);
 	}
 #endif
@@ -732,19 +732,12 @@ static void ipv6_del_addr(struct inet6_ifaddr *ifp)
 	write_lock_bh(&idev->lock);
 #ifdef CONFIG_IPV6_PRIVACY
 	if (ifp->flags&IFA_F_TEMPORARY) {
-		for (ifap = &idev->tempaddr_list; (ifa=*ifap) != NULL;
-		     ifap = &ifa->tmp_next) {
-			if (ifa == ifp) {
-				*ifap = ifa->tmp_next;
-				if (ifp->ifpub) {
-					in6_ifa_put(ifp->ifpub);
-					ifp->ifpub = NULL;
-				}
-				__in6_ifa_put(ifp);
-				ifa->tmp_next = NULL;
-				break;
-			}
+		list_del(&ifp->tmp_list);
+		if (ifp->ifpub) {
+			in6_ifa_put(ifp->ifpub);
+			ifp->ifpub = NULL;
 		}
+		__in6_ifa_put(ifp);
 	}
 #endif
 
@@ -1970,7 +1963,7 @@ ok:
 #ifdef CONFIG_IPV6_PRIVACY
 			read_lock_bh(&in6_dev->lock);
 			/* update all temporary addresses in the list */
-			for (ift=in6_dev->tempaddr_list; ift; ift=ift->tmp_next) {
+			list_for_each_entry(ift, &in6_dev->tempaddr_list, tmp_list) {
 				/*
 				 * When adjusting the lifetimes of an existing
 				 * temporary address, only lower the lifetimes.
@@ -2675,9 +2668,10 @@ static int addrconf_ifdown(struct net_device *dev, int how)
 		in6_dev_put(idev);
 
 	/* clear tempaddr list */
-	while ((ifa = idev->tempaddr_list) != NULL) {
-		idev->tempaddr_list = ifa->tmp_next;
-		ifa->tmp_next = NULL;
+	while (!list_empty(&idev->tempaddr_list)) {
+		ifa = list_first_entry(&idev->tempaddr_list,
+				       struct inet6_ifaddr, tmp_list);
+		list_del(&ifa->tmp_list);
 		ifa->dead = 1;
 		write_unlock_bh(&idev->lock);
 		spin_lock_bh(&ifa->lock);
-- 
cgit v1.2.2


From c2e21293c054817c42eb5fa9c613d2ad51954136 Mon Sep 17 00:00:00 2001
From: stephen hemminger <shemminger@vyatta.com>
Date: Wed, 17 Mar 2010 20:31:10 +0000
Subject: ipv6: convert addrconf list to hlist

Using hash list macros, simplifies code and helps later RCU.

This patch includes some initialization that is not strictly necessary,
since an empty hlist node/list is all zero; and list is in BSS
and node is allocated with kzalloc.

Signed-off-by: Stephen Hemminger <shemminger@vyatta.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/addrconf.c | 128 ++++++++++++++++++++++------------------------------
 1 file changed, 53 insertions(+), 75 deletions(-)

(limited to 'net')

diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index f372f895cd41..0488b9f8071d 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -126,7 +126,7 @@ static int ipv6_count_addresses(struct inet6_dev *idev);
 /*
  *	Configured unicast address hash table
  */
-static struct inet6_ifaddr		*inet6_addr_lst[IN6_ADDR_HSIZE];
+static struct hlist_head inet6_addr_lst[IN6_ADDR_HSIZE];
 static DEFINE_RWLOCK(addrconf_hash_lock);
 
 static void addrconf_verify(unsigned long);
@@ -528,7 +528,7 @@ static int addrconf_fixup_forwarding(struct ctl_table *table, int *p, int old)
 void inet6_ifa_finish_destroy(struct inet6_ifaddr *ifp)
 {
 	WARN_ON(ifp->if_next != NULL);
-	WARN_ON(ifp->lst_next != NULL);
+	WARN_ON(!hlist_unhashed(&ifp->addr_lst));
 
 #ifdef NET_REFCNT_DEBUG
 	printk(KERN_DEBUG "inet6_ifa_finish_destroy\n");
@@ -643,6 +643,7 @@ ipv6_add_addr(struct inet6_dev *idev, const struct in6_addr *addr, int pfxlen,
 
 	spin_lock_init(&ifa->lock);
 	init_timer(&ifa->timer);
+	INIT_HLIST_NODE(&ifa->addr_lst);
 	ifa->timer.data = (unsigned long) ifa;
 	ifa->scope = scope;
 	ifa->prefix_len = pfxlen;
@@ -669,8 +670,7 @@ ipv6_add_addr(struct inet6_dev *idev, const struct in6_addr *addr, int pfxlen,
 	/* Add to big hash table */
 	hash = ipv6_addr_hash(addr);
 
-	ifa->lst_next = inet6_addr_lst[hash];
-	inet6_addr_lst[hash] = ifa;
+	hlist_add_head(&ifa->addr_lst, &inet6_addr_lst[hash]);
 	in6_ifa_hold(ifa);
 	write_unlock(&addrconf_hash_lock);
 
@@ -718,15 +718,8 @@ static void ipv6_del_addr(struct inet6_ifaddr *ifp)
 	ifp->dead = 1;
 
 	write_lock_bh(&addrconf_hash_lock);
-	for (ifap = &inet6_addr_lst[hash]; (ifa=*ifap) != NULL;
-	     ifap = &ifa->lst_next) {
-		if (ifa == ifp) {
-			*ifap = ifa->lst_next;
-			__in6_ifa_put(ifp);
-			ifa->lst_next = NULL;
-			break;
-		}
-	}
+	hlist_del_init(&ifp->addr_lst);
+	__in6_ifa_put(ifp);
 	write_unlock_bh(&addrconf_hash_lock);
 
 	write_lock_bh(&idev->lock);
@@ -1277,11 +1270,12 @@ static int ipv6_count_addresses(struct inet6_dev *idev)
 int ipv6_chk_addr(struct net *net, struct in6_addr *addr,
 		  struct net_device *dev, int strict)
 {
-	struct inet6_ifaddr * ifp;
+	struct inet6_ifaddr *ifp = NULL;
+	struct hlist_node *node;
 	u8 hash = ipv6_addr_hash(addr);
 
 	read_lock_bh(&addrconf_hash_lock);
-	for(ifp = inet6_addr_lst[hash]; ifp; ifp=ifp->lst_next) {
+	hlist_for_each_entry(ifp, node, &inet6_addr_lst[hash], addr_lst) {
 		if (!net_eq(dev_net(ifp->idev->dev), net))
 			continue;
 		if (ipv6_addr_equal(&ifp->addr, addr) &&
@@ -1300,10 +1294,11 @@ static
 int ipv6_chk_same_addr(struct net *net, const struct in6_addr *addr,
 		       struct net_device *dev)
 {
-	struct inet6_ifaddr * ifp;
+	struct inet6_ifaddr *ifp;
+	struct hlist_node *node;
 	u8 hash = ipv6_addr_hash(addr);
 
-	for(ifp = inet6_addr_lst[hash]; ifp; ifp=ifp->lst_next) {
+	hlist_for_each_entry(ifp, node, &inet6_addr_lst[hash], addr_lst) {
 		if (!net_eq(dev_net(ifp->idev->dev), net))
 			continue;
 		if (ipv6_addr_equal(&ifp->addr, addr)) {
@@ -1342,11 +1337,12 @@ EXPORT_SYMBOL(ipv6_chk_prefix);
 struct inet6_ifaddr *ipv6_get_ifaddr(struct net *net, const struct in6_addr *addr,
 				     struct net_device *dev, int strict)
 {
-	struct inet6_ifaddr * ifp;
+	struct inet6_ifaddr *ifp = NULL;
+	struct hlist_node *node;
 	u8 hash = ipv6_addr_hash(addr);
 
 	read_lock_bh(&addrconf_hash_lock);
-	for(ifp = inet6_addr_lst[hash]; ifp; ifp=ifp->lst_next) {
+	hlist_for_each_entry(ifp, node, &inet6_addr_lst[hash], addr_lst) {
 		if (!net_eq(dev_net(ifp->idev->dev), net))
 			continue;
 		if (ipv6_addr_equal(&ifp->addr, addr)) {
@@ -2612,7 +2608,6 @@ static int addrconf_ifdown(struct net_device *dev, int how)
 	struct inet6_dev *idev;
 	struct inet6_ifaddr *ifa, *keep_list, **bifa;
 	struct net *net = dev_net(dev);
-	int i;
 
 	ASSERT_RTNL();
 
@@ -2637,25 +2632,6 @@ static int addrconf_ifdown(struct net_device *dev, int how)
 
 	}
 
-	/* Step 2: clear hash table */
-	for (i=0; i<IN6_ADDR_HSIZE; i++) {
-		bifa = &inet6_addr_lst[i];
-
-		write_lock_bh(&addrconf_hash_lock);
-		while ((ifa = *bifa) != NULL) {
-			if (ifa->idev == idev &&
-			    (how || !(ifa->flags&IFA_F_PERMANENT) ||
-			     ipv6_addr_type(&ifa->addr) & IPV6_ADDR_LINKLOCAL)) {
-				*bifa = ifa->lst_next;
-				ifa->lst_next = NULL;
-				__in6_ifa_put(ifa);
-				continue;
-			}
-			bifa = &ifa->lst_next;
-		}
-		write_unlock_bh(&addrconf_hash_lock);
-	}
-
 	write_lock_bh(&idev->lock);
 
 	/* Step 3: clear flags for stateless addrconf */
@@ -2721,6 +2697,12 @@ static int addrconf_ifdown(struct net_device *dev, int how)
 		}
 		write_unlock_bh(&idev->lock);
 
+		/* clear hash table */
+		write_lock_bh(&addrconf_hash_lock);
+		hlist_del_init(&ifa->addr_lst);
+		__in6_ifa_put(ifa);
+		write_unlock_bh(&addrconf_hash_lock);
+
 		__ipv6_ifa_notify(RTM_DELADDR, ifa);
 		atomic_notifier_call_chain(&inet6addr_chain, NETDEV_DOWN, ifa);
 		in6_ifa_put(ifa);
@@ -2963,36 +2945,37 @@ static struct inet6_ifaddr *if6_get_first(struct seq_file *seq)
 	struct net *net = seq_file_net(seq);
 
 	for (state->bucket = 0; state->bucket < IN6_ADDR_HSIZE; ++state->bucket) {
-		ifa = inet6_addr_lst[state->bucket];
-
-		while (ifa && !net_eq(dev_net(ifa->idev->dev), net))
-			ifa = ifa->lst_next;
-		if (ifa)
-			break;
+		struct hlist_node *n;
+		hlist_for_each_entry(ifa, n,
+				     &inet6_addr_lst[state->bucket], addr_lst) {
+			if (net_eq(dev_net(ifa->idev->dev), net))
+				return ifa;
+		}
 	}
-	return ifa;
+	return NULL;
 }
 
-static struct inet6_ifaddr *if6_get_next(struct seq_file *seq, struct inet6_ifaddr *ifa)
+static struct inet6_ifaddr *if6_get_next(struct seq_file *seq,
+					 struct inet6_ifaddr *ifa)
 {
 	struct if6_iter_state *state = seq->private;
 	struct net *net = seq_file_net(seq);
+	struct hlist_node *n = &ifa->addr_lst;
 
-	ifa = ifa->lst_next;
-try_again:
-	if (ifa) {
-		if (!net_eq(dev_net(ifa->idev->dev), net)) {
-			ifa = ifa->lst_next;
-			goto try_again;
-		}
+	hlist_for_each_entry_continue(ifa, n, addr_lst) {
+		if (net_eq(dev_net(ifa->idev->dev), net))
+			return ifa;
 	}
 
-	if (!ifa && ++state->bucket < IN6_ADDR_HSIZE) {
-		ifa = inet6_addr_lst[state->bucket];
-		goto try_again;
+	while (++state->bucket < IN6_ADDR_HSIZE) {
+		hlist_for_each_entry(ifa, n,
+				     &inet6_addr_lst[state->bucket], addr_lst) {
+			if (net_eq(dev_net(ifa->idev->dev), net))
+				return ifa;
+		}
 	}
 
-	return ifa;
+	return NULL;
 }
 
 static struct inet6_ifaddr *if6_get_idx(struct seq_file *seq, loff_t pos)
@@ -3094,10 +3077,12 @@ void if6_proc_exit(void)
 int ipv6_chk_home_addr(struct net *net, struct in6_addr *addr)
 {
 	int ret = 0;
-	struct inet6_ifaddr * ifp;
+	struct inet6_ifaddr *ifp = NULL;
+	struct hlist_node *n;
 	u8 hash = ipv6_addr_hash(addr);
+
 	read_lock_bh(&addrconf_hash_lock);
-	for (ifp = inet6_addr_lst[hash]; ifp; ifp = ifp->lst_next) {
+	hlist_for_each_entry(ifp, n, &inet6_addr_lst[hash], addr_lst) {
 		if (!net_eq(dev_net(ifp->idev->dev), net))
 			continue;
 		if (ipv6_addr_equal(&ifp->addr, addr) &&
@@ -3118,6 +3103,7 @@ int ipv6_chk_home_addr(struct net *net, struct in6_addr *addr)
 static void addrconf_verify(unsigned long foo)
 {
 	struct inet6_ifaddr *ifp;
+	struct hlist_node *node;
 	unsigned long now, next;
 	int i;
 
@@ -3131,7 +3117,7 @@ static void addrconf_verify(unsigned long foo)
 
 restart:
 		read_lock(&addrconf_hash_lock);
-		for (ifp=inet6_addr_lst[i]; ifp; ifp=ifp->lst_next) {
+		hlist_for_each_entry(ifp, node, &inet6_addr_lst[i], addr_lst) {
 			unsigned long age;
 #ifdef CONFIG_IPV6_PRIVACY
 			unsigned long regen_advance;
@@ -4550,7 +4536,7 @@ EXPORT_SYMBOL(unregister_inet6addr_notifier);
 
 int __init addrconf_init(void)
 {
-	int err;
+	int i, err;
 
 	if ((err = ipv6_addr_label_init()) < 0) {
 		printk(KERN_CRIT "IPv6 Addrconf: cannot initialize default policy table: %d.\n",
@@ -4585,6 +4571,9 @@ int __init addrconf_init(void)
 	if (err)
 		goto errlo;
 
+	for (i = 0; i < IN6_ADDR_HSIZE; i++)
+		INIT_HLIST_HEAD(&inet6_addr_lst[i]);
+
 	register_netdevice_notifier(&ipv6_dev_notf);
 
 	addrconf_verify(0);
@@ -4613,7 +4602,6 @@ errlo:
 
 void addrconf_cleanup(void)
 {
-	struct inet6_ifaddr *ifa;
 	struct net_device *dev;
 	int i;
 
@@ -4634,18 +4622,8 @@ void addrconf_cleanup(void)
 	 *	Check hash table.
 	 */
 	write_lock_bh(&addrconf_hash_lock);
-	for (i=0; i < IN6_ADDR_HSIZE; i++) {
-		for (ifa=inet6_addr_lst[i]; ifa; ) {
-			struct inet6_ifaddr *bifa;
-
-			bifa = ifa;
-			ifa = ifa->lst_next;
-			printk(KERN_DEBUG "bug: IPv6 address leakage detected: ifa=%p\n", bifa);
-			/* Do not free it; something is wrong.
-			   Now we can investigate it with debugger.
-			 */
-		}
-	}
+	for (i = 0; i < IN6_ADDR_HSIZE; i++)
+		WARN_ON(!hlist_empty(&inet6_addr_lst[i]));
 	write_unlock_bh(&addrconf_hash_lock);
 
 	del_timer(&addr_chk_timer);
-- 
cgit v1.2.2


From 5c578aedcb21d79eeb4e9cf04ca5b276ac82614c Mon Sep 17 00:00:00 2001
From: stephen hemminger <shemminger@vyatta.com>
Date: Wed, 17 Mar 2010 20:31:11 +0000
Subject: IPv6: convert addrconf hash list to RCU

Convert from reader/writer lock to RCU and spinlock for addrconf
hash list.

Adds an additional helper macro for hlist_for_each_entry_continue_rcu
to handle the continue case.

Signed-off-by: Stephen Hemminger <shemminger@vyatta.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/addrconf.c | 85 +++++++++++++++++++++++++++--------------------------
 1 file changed, 44 insertions(+), 41 deletions(-)

(limited to 'net')

diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 0488b9f8071d..7ffd5eeab967 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -127,7 +127,7 @@ static int ipv6_count_addresses(struct inet6_dev *idev);
  *	Configured unicast address hash table
  */
 static struct hlist_head inet6_addr_lst[IN6_ADDR_HSIZE];
-static DEFINE_RWLOCK(addrconf_hash_lock);
+static DEFINE_SPINLOCK(addrconf_hash_lock);
 
 static void addrconf_verify(unsigned long);
 
@@ -523,8 +523,13 @@ static int addrconf_fixup_forwarding(struct ctl_table *table, int *p, int old)
 }
 #endif
 
-/* Nobody refers to this ifaddr, destroy it */
+static void inet6_ifa_finish_destroy_rcu(struct rcu_head *head)
+{
+	struct inet6_ifaddr *ifp = container_of(head, struct inet6_ifaddr, rcu);
+	kfree(ifp);
+}
 
+/* Nobody refers to this ifaddr, destroy it */
 void inet6_ifa_finish_destroy(struct inet6_ifaddr *ifp)
 {
 	WARN_ON(ifp->if_next != NULL);
@@ -545,7 +550,7 @@ void inet6_ifa_finish_destroy(struct inet6_ifaddr *ifp)
 	}
 	dst_release(&ifp->rt->u.dst);
 
-	kfree(ifp);
+	call_rcu(&ifp->rcu, inet6_ifa_finish_destroy_rcu);
 }
 
 static void
@@ -616,7 +621,7 @@ ipv6_add_addr(struct inet6_dev *idev, const struct in6_addr *addr, int pfxlen,
 		goto out2;
 	}
 
-	write_lock(&addrconf_hash_lock);
+	spin_lock(&addrconf_hash_lock);
 
 	/* Ignore adding duplicate addresses on an interface */
 	if (ipv6_chk_same_addr(dev_net(idev->dev), addr, idev->dev)) {
@@ -670,9 +675,9 @@ ipv6_add_addr(struct inet6_dev *idev, const struct in6_addr *addr, int pfxlen,
 	/* Add to big hash table */
 	hash = ipv6_addr_hash(addr);
 
-	hlist_add_head(&ifa->addr_lst, &inet6_addr_lst[hash]);
+	hlist_add_head_rcu(&ifa->addr_lst, &inet6_addr_lst[hash]);
 	in6_ifa_hold(ifa);
-	write_unlock(&addrconf_hash_lock);
+	spin_unlock(&addrconf_hash_lock);
 
 	write_lock(&idev->lock);
 	/* Add to inet6_dev unicast addr list. */
@@ -699,7 +704,7 @@ out2:
 
 	return ifa;
 out:
-	write_unlock(&addrconf_hash_lock);
+	spin_unlock(&addrconf_hash_lock);
 	goto out2;
 }
 
@@ -717,10 +722,10 @@ static void ipv6_del_addr(struct inet6_ifaddr *ifp)
 
 	ifp->dead = 1;
 
-	write_lock_bh(&addrconf_hash_lock);
-	hlist_del_init(&ifp->addr_lst);
+	spin_lock_bh(&addrconf_hash_lock);
+	hlist_del_init_rcu(&ifp->addr_lst);
 	__in6_ifa_put(ifp);
-	write_unlock_bh(&addrconf_hash_lock);
+	spin_unlock_bh(&addrconf_hash_lock);
 
 	write_lock_bh(&idev->lock);
 #ifdef CONFIG_IPV6_PRIVACY
@@ -1274,8 +1279,8 @@ int ipv6_chk_addr(struct net *net, struct in6_addr *addr,
 	struct hlist_node *node;
 	u8 hash = ipv6_addr_hash(addr);
 
-	read_lock_bh(&addrconf_hash_lock);
-	hlist_for_each_entry(ifp, node, &inet6_addr_lst[hash], addr_lst) {
+	rcu_read_lock_bh();
+	hlist_for_each_entry_rcu(ifp, node, &inet6_addr_lst[hash], addr_lst) {
 		if (!net_eq(dev_net(ifp->idev->dev), net))
 			continue;
 		if (ipv6_addr_equal(&ifp->addr, addr) &&
@@ -1285,7 +1290,8 @@ int ipv6_chk_addr(struct net *net, struct in6_addr *addr,
 				break;
 		}
 	}
-	read_unlock_bh(&addrconf_hash_lock);
+	rcu_read_unlock_bh();
+
 	return ifp != NULL;
 }
 EXPORT_SYMBOL(ipv6_chk_addr);
@@ -1341,8 +1347,8 @@ struct inet6_ifaddr *ipv6_get_ifaddr(struct net *net, const struct in6_addr *add
 	struct hlist_node *node;
 	u8 hash = ipv6_addr_hash(addr);
 
-	read_lock_bh(&addrconf_hash_lock);
-	hlist_for_each_entry(ifp, node, &inet6_addr_lst[hash], addr_lst) {
+	rcu_read_lock_bh();
+	hlist_for_each_entry_rcu(ifp, node, &inet6_addr_lst[hash], addr_lst) {
 		if (!net_eq(dev_net(ifp->idev->dev), net))
 			continue;
 		if (ipv6_addr_equal(&ifp->addr, addr)) {
@@ -1353,7 +1359,7 @@ struct inet6_ifaddr *ipv6_get_ifaddr(struct net *net, const struct in6_addr *add
 			}
 		}
 	}
-	read_unlock_bh(&addrconf_hash_lock);
+	rcu_read_unlock_bh();
 
 	return ifp;
 }
@@ -2698,10 +2704,10 @@ static int addrconf_ifdown(struct net_device *dev, int how)
 		write_unlock_bh(&idev->lock);
 
 		/* clear hash table */
-		write_lock_bh(&addrconf_hash_lock);
-		hlist_del_init(&ifa->addr_lst);
+		spin_lock_bh(&addrconf_hash_lock);
+		hlist_del_init_rcu(&ifa->addr_lst);
 		__in6_ifa_put(ifa);
-		write_unlock_bh(&addrconf_hash_lock);
+		spin_unlock_bh(&addrconf_hash_lock);
 
 		__ipv6_ifa_notify(RTM_DELADDR, ifa);
 		atomic_notifier_call_chain(&inet6addr_chain, NETDEV_DOWN, ifa);
@@ -2946,11 +2952,10 @@ static struct inet6_ifaddr *if6_get_first(struct seq_file *seq)
 
 	for (state->bucket = 0; state->bucket < IN6_ADDR_HSIZE; ++state->bucket) {
 		struct hlist_node *n;
-		hlist_for_each_entry(ifa, n,
-				     &inet6_addr_lst[state->bucket], addr_lst) {
+		hlist_for_each_entry_rcu(ifa, n, &inet6_addr_lst[state->bucket],
+					 addr_lst)
 			if (net_eq(dev_net(ifa->idev->dev), net))
 				return ifa;
-		}
 	}
 	return NULL;
 }
@@ -2962,10 +2967,9 @@ static struct inet6_ifaddr *if6_get_next(struct seq_file *seq,
 	struct net *net = seq_file_net(seq);
 	struct hlist_node *n = &ifa->addr_lst;
 
-	hlist_for_each_entry_continue(ifa, n, addr_lst) {
+	hlist_for_each_entry_continue_rcu(ifa, n, addr_lst)
 		if (net_eq(dev_net(ifa->idev->dev), net))
 			return ifa;
-	}
 
 	while (++state->bucket < IN6_ADDR_HSIZE) {
 		hlist_for_each_entry(ifa, n,
@@ -2989,9 +2993,9 @@ static struct inet6_ifaddr *if6_get_idx(struct seq_file *seq, loff_t pos)
 }
 
 static void *if6_seq_start(struct seq_file *seq, loff_t *pos)
-	__acquires(addrconf_hash_lock)
+	__acquires(rcu)
 {
-	read_lock_bh(&addrconf_hash_lock);
+	rcu_read_lock_bh();
 	return if6_get_idx(seq, *pos);
 }
 
@@ -3005,9 +3009,9 @@ static void *if6_seq_next(struct seq_file *seq, void *v, loff_t *pos)
 }
 
 static void if6_seq_stop(struct seq_file *seq, void *v)
-	__releases(addrconf_hash_lock)
+	__releases(rcu)
 {
-	read_unlock_bh(&addrconf_hash_lock);
+	rcu_read_unlock_bh();
 }
 
 static int if6_seq_show(struct seq_file *seq, void *v)
@@ -3081,8 +3085,8 @@ int ipv6_chk_home_addr(struct net *net, struct in6_addr *addr)
 	struct hlist_node *n;
 	u8 hash = ipv6_addr_hash(addr);
 
-	read_lock_bh(&addrconf_hash_lock);
-	hlist_for_each_entry(ifp, n, &inet6_addr_lst[hash], addr_lst) {
+	rcu_read_lock_bh();
+	hlist_for_each_entry_rcu(ifp, n, &inet6_addr_lst[hash], addr_lst) {
 		if (!net_eq(dev_net(ifp->idev->dev), net))
 			continue;
 		if (ipv6_addr_equal(&ifp->addr, addr) &&
@@ -3091,7 +3095,7 @@ int ipv6_chk_home_addr(struct net *net, struct in6_addr *addr)
 			break;
 		}
 	}
-	read_unlock_bh(&addrconf_hash_lock);
+	rcu_read_unlock_bh();
 	return ret;
 }
 #endif
@@ -3107,7 +3111,8 @@ static void addrconf_verify(unsigned long foo)
 	unsigned long now, next;
 	int i;
 
-	spin_lock_bh(&addrconf_verify_lock);
+	rcu_read_lock_bh();
+	spin_lock(&addrconf_verify_lock);
 	now = jiffies;
 	next = now + ADDR_CHECK_FREQUENCY;
 
@@ -3116,8 +3121,8 @@ static void addrconf_verify(unsigned long foo)
 	for (i=0; i < IN6_ADDR_HSIZE; i++) {
 
 restart:
-		read_lock(&addrconf_hash_lock);
-		hlist_for_each_entry(ifp, node, &inet6_addr_lst[i], addr_lst) {
+		hlist_for_each_entry_rcu(ifp, node,
+					 &inet6_addr_lst[i], addr_lst) {
 			unsigned long age;
 #ifdef CONFIG_IPV6_PRIVACY
 			unsigned long regen_advance;
@@ -3139,7 +3144,6 @@ restart:
 			    age >= ifp->valid_lft) {
 				spin_unlock(&ifp->lock);
 				in6_ifa_hold(ifp);
-				read_unlock(&addrconf_hash_lock);
 				ipv6_del_addr(ifp);
 				goto restart;
 			} else if (ifp->prefered_lft == INFINITY_LIFE_TIME) {
@@ -3161,7 +3165,6 @@ restart:
 
 				if (deprecate) {
 					in6_ifa_hold(ifp);
-					read_unlock(&addrconf_hash_lock);
 
 					ipv6_ifa_notify(0, ifp);
 					in6_ifa_put(ifp);
@@ -3179,7 +3182,7 @@ restart:
 						in6_ifa_hold(ifp);
 						in6_ifa_hold(ifpub);
 						spin_unlock(&ifp->lock);
-						read_unlock(&addrconf_hash_lock);
+
 						spin_lock(&ifpub->lock);
 						ifpub->regen_count = 0;
 						spin_unlock(&ifpub->lock);
@@ -3199,12 +3202,12 @@ restart:
 				spin_unlock(&ifp->lock);
 			}
 		}
-		read_unlock(&addrconf_hash_lock);
 	}
 
 	addr_chk_timer.expires = time_before(next, jiffies + HZ) ? jiffies + HZ : next;
 	add_timer(&addr_chk_timer);
-	spin_unlock_bh(&addrconf_verify_lock);
+	spin_unlock(&addrconf_verify_lock);
+	rcu_read_unlock_bh();
 }
 
 static struct in6_addr *extract_addr(struct nlattr *addr, struct nlattr *local)
@@ -4621,10 +4624,10 @@ void addrconf_cleanup(void)
 	/*
 	 *	Check hash table.
 	 */
-	write_lock_bh(&addrconf_hash_lock);
+	spin_lock_bh(&addrconf_hash_lock);
 	for (i = 0; i < IN6_ADDR_HSIZE; i++)
 		WARN_ON(!hlist_empty(&inet6_addr_lst[i]));
-	write_unlock_bh(&addrconf_hash_lock);
+	spin_unlock_bh(&addrconf_hash_lock);
 
 	del_timer(&addr_chk_timer);
 	rtnl_unlock();
-- 
cgit v1.2.2


From 3a88a81d89c20be312b3b219b185bbdde24b8fb8 Mon Sep 17 00:00:00 2001
From: stephen hemminger <shemminger@vyatta.com>
Date: Wed, 17 Mar 2010 20:31:12 +0000
Subject: ipv6: user better hash for addrconf

The existing hash function has a couple of issues:
  * it is hardwired to 16 for IN6_ADDR_HSIZE
  * limited to 256 and callers using int
  * use jhash2 rather than some old BSD algorithm

No need for random seed since this is local only (based on assigned
addresses) table.

Signed-off-by: Stephen Hemminger <shemminger@vyatta.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/addrconf.c | 25 ++++++++-----------------
 1 file changed, 8 insertions(+), 17 deletions(-)

(limited to 'net')

diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 7ffd5eeab967..1e5e41fe92bc 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -573,23 +573,14 @@ ipv6_link_dev_addr(struct inet6_dev *idev, struct inet6_ifaddr *ifp)
 	*ifap = ifp;
 }
 
-/*
- *	Hash function taken from net_alias.c
- */
-static u8 ipv6_addr_hash(const struct in6_addr *addr)
+static u32 ipv6_addr_hash(const struct in6_addr *addr)
 {
-	__u32 word;
-
 	/*
 	 * We perform the hash function over the last 64 bits of the address
 	 * This will include the IEEE address token on links that support it.
 	 */
-
-	word = (__force u32)(addr->s6_addr32[2] ^ addr->s6_addr32[3]);
-	word ^= (word >> 16);
-	word ^= (word >> 8);
-
-	return ((word ^ (word >> 4)) & 0x0f);
+	return jhash_2words(addr->s6_addr32[2],  addr->s6_addr32[3], 0)
+		& (IN6_ADDR_HSIZE - 1);
 }
 
 /* On success it returns ifp with increased reference count */
@@ -600,7 +591,7 @@ ipv6_add_addr(struct inet6_dev *idev, const struct in6_addr *addr, int pfxlen,
 {
 	struct inet6_ifaddr *ifa = NULL;
 	struct rt6_info *rt;
-	int hash;
+	unsigned int hash;
 	int err = 0;
 	int addr_type = ipv6_addr_type(addr);
 
@@ -1277,7 +1268,7 @@ int ipv6_chk_addr(struct net *net, struct in6_addr *addr,
 {
 	struct inet6_ifaddr *ifp = NULL;
 	struct hlist_node *node;
-	u8 hash = ipv6_addr_hash(addr);
+	unsigned int hash = ipv6_addr_hash(addr);
 
 	rcu_read_lock_bh();
 	hlist_for_each_entry_rcu(ifp, node, &inet6_addr_lst[hash], addr_lst) {
@@ -1302,7 +1293,7 @@ int ipv6_chk_same_addr(struct net *net, const struct in6_addr *addr,
 {
 	struct inet6_ifaddr *ifp;
 	struct hlist_node *node;
-	u8 hash = ipv6_addr_hash(addr);
+	unsigned int hash = ipv6_addr_hash(addr);
 
 	hlist_for_each_entry(ifp, node, &inet6_addr_lst[hash], addr_lst) {
 		if (!net_eq(dev_net(ifp->idev->dev), net))
@@ -1345,7 +1336,7 @@ struct inet6_ifaddr *ipv6_get_ifaddr(struct net *net, const struct in6_addr *add
 {
 	struct inet6_ifaddr *ifp = NULL;
 	struct hlist_node *node;
-	u8 hash = ipv6_addr_hash(addr);
+	unsigned int hash = ipv6_addr_hash(addr);
 
 	rcu_read_lock_bh();
 	hlist_for_each_entry_rcu(ifp, node, &inet6_addr_lst[hash], addr_lst) {
@@ -3083,7 +3074,7 @@ int ipv6_chk_home_addr(struct net *net, struct in6_addr *addr)
 	int ret = 0;
 	struct inet6_ifaddr *ifp = NULL;
 	struct hlist_node *n;
-	u8 hash = ipv6_addr_hash(addr);
+	unsigned int hash = ipv6_addr_hash(addr);
 
 	rcu_read_lock_bh();
 	hlist_for_each_entry_rcu(ifp, n, &inet6_addr_lst[hash], addr_lst) {
-- 
cgit v1.2.2


From 502a2ffd7376ae27cfde6172257db0ff9d8cfec2 Mon Sep 17 00:00:00 2001
From: stephen hemminger <shemminger@vyatta.com>
Date: Wed, 17 Mar 2010 20:31:13 +0000
Subject: ipv6: convert idev_list to list macros

Convert to list macro's for the list of addresses per interface
in IPv6.

Signed-off-by: Stephen Hemminger <shemminger@vyatta.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/addrconf.c | 78 ++++++++++++++++++++++++++---------------------------
 net/sctp/ipv6.c     |  2 +-
 2 files changed, 40 insertions(+), 40 deletions(-)

(limited to 'net')

diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 1e5e41fe92bc..6dbf0f79b762 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -317,7 +317,7 @@ void in6_dev_finish_destroy(struct inet6_dev *idev)
 {
 	struct net_device *dev = idev->dev;
 
-	WARN_ON(idev->addr_list != NULL);
+	WARN_ON(!list_empty(&idev->addr_list));
 	WARN_ON(idev->mc_list != NULL);
 
 #ifdef NET_REFCNT_DEBUG
@@ -350,6 +350,8 @@ static struct inet6_dev * ipv6_add_dev(struct net_device *dev)
 
 	rwlock_init(&ndev->lock);
 	ndev->dev = dev;
+	INIT_LIST_HEAD(&ndev->addr_list);
+
 	memcpy(&ndev->cnf, dev_net(dev)->ipv6.devconf_dflt, sizeof(ndev->cnf));
 	ndev->cnf.mtu6 = dev->mtu;
 	ndev->cnf.sysctl = NULL;
@@ -466,7 +468,8 @@ static void dev_forward_change(struct inet6_dev *idev)
 		else
 			ipv6_dev_mc_dec(dev, &in6addr_linklocal_allrouters);
 	}
-	for (ifa=idev->addr_list; ifa; ifa=ifa->if_next) {
+
+	list_for_each_entry(ifa, &idev->addr_list, if_list) {
 		if (ifa->flags&IFA_F_TENTATIVE)
 			continue;
 		if (idev->cnf.forwarding)
@@ -532,7 +535,6 @@ static void inet6_ifa_finish_destroy_rcu(struct rcu_head *head)
 /* Nobody refers to this ifaddr, destroy it */
 void inet6_ifa_finish_destroy(struct inet6_ifaddr *ifp)
 {
-	WARN_ON(ifp->if_next != NULL);
 	WARN_ON(!hlist_unhashed(&ifp->addr_lst));
 
 #ifdef NET_REFCNT_DEBUG
@@ -556,21 +558,21 @@ void inet6_ifa_finish_destroy(struct inet6_ifaddr *ifp)
 static void
 ipv6_link_dev_addr(struct inet6_dev *idev, struct inet6_ifaddr *ifp)
 {
-	struct inet6_ifaddr *ifa, **ifap;
+	struct list_head *p;
 	int ifp_scope = ipv6_addr_src_scope(&ifp->addr);
 
 	/*
 	 * Each device address list is sorted in order of scope -
 	 * global before linklocal.
 	 */
-	for (ifap = &idev->addr_list; (ifa = *ifap) != NULL;
-	     ifap = &ifa->if_next) {
+	list_for_each(p, &idev->addr_list) {
+		struct inet6_ifaddr *ifa
+			= list_entry(p, struct inet6_ifaddr, if_list);
 		if (ifp_scope >= ipv6_addr_src_scope(&ifa->addr))
 			break;
 	}
 
-	ifp->if_next = *ifap;
-	*ifap = ifp;
+	list_add(&ifp->if_list, p);
 }
 
 static u32 ipv6_addr_hash(const struct in6_addr *addr)
@@ -703,7 +705,7 @@ out:
 
 static void ipv6_del_addr(struct inet6_ifaddr *ifp)
 {
-	struct inet6_ifaddr *ifa, **ifap;
+	struct inet6_ifaddr *ifa, *ifn;
 	struct inet6_dev *idev = ifp->idev;
 	int hash;
 	int deleted = 0, onlink = 0;
@@ -730,11 +732,11 @@ static void ipv6_del_addr(struct inet6_ifaddr *ifp)
 	}
 #endif
 
-	for (ifap = &idev->addr_list; (ifa=*ifap) != NULL;) {
+	list_for_each_entry_safe(ifa, ifn, &idev->addr_list, if_list) {
 		if (ifa == ifp) {
-			*ifap = ifa->if_next;
+			list_del_init(&ifp->if_list);
 			__in6_ifa_put(ifp);
-			ifa->if_next = NULL;
+
 			if (!(ifp->flags & IFA_F_PERMANENT) || onlink > 0)
 				break;
 			deleted = 1;
@@ -767,7 +769,6 @@ static void ipv6_del_addr(struct inet6_ifaddr *ifp)
 				}
 			}
 		}
-		ifap = &ifa->if_next;
 	}
 	write_unlock_bh(&idev->lock);
 
@@ -1146,7 +1147,7 @@ int ipv6_dev_get_saddr(struct net *net, struct net_device *dst_dev,
 			continue;
 
 		read_lock_bh(&idev->lock);
-		for (score->ifa = idev->addr_list; score->ifa; score->ifa = score->ifa->if_next) {
+		list_for_each_entry(score->ifa, &idev->addr_list, if_list) {
 			int i;
 
 			/*
@@ -1238,8 +1239,9 @@ int ipv6_get_lladdr(struct net_device *dev, struct in6_addr *addr,
 		struct inet6_ifaddr *ifp;
 
 		read_lock_bh(&idev->lock);
-		for (ifp=idev->addr_list; ifp; ifp=ifp->if_next) {
-			if (ifp->scope == IFA_LINK && !(ifp->flags & banned_flags)) {
+		list_for_each_entry(ifp, &idev->addr_list, if_list) {
+			if (ifp->scope == IFA_LINK &&
+			    !(ifp->flags & banned_flags)) {
 				ipv6_addr_copy(addr, &ifp->addr);
 				err = 0;
 				break;
@@ -1257,7 +1259,7 @@ static int ipv6_count_addresses(struct inet6_dev *idev)
 	struct inet6_ifaddr *ifp;
 
 	read_lock_bh(&idev->lock);
-	for (ifp=idev->addr_list; ifp; ifp=ifp->if_next)
+	list_for_each_entry(ifp, &idev->addr_list, if_list)
 		cnt++;
 	read_unlock_bh(&idev->lock);
 	return cnt;
@@ -1317,7 +1319,7 @@ int ipv6_chk_prefix(struct in6_addr *addr, struct net_device *dev)
 	idev = __in6_dev_get(dev);
 	if (idev) {
 		read_lock_bh(&idev->lock);
-		for (ifa = idev->addr_list; ifa; ifa = ifa->if_next) {
+		list_for_each_entry(ifa, &idev->addr_list, if_list) {
 			onlink = ipv6_prefix_equal(addr, &ifa->addr,
 						   ifa->prefix_len);
 			if (onlink)
@@ -1555,7 +1557,7 @@ static int ipv6_inherit_eui64(u8 *eui, struct inet6_dev *idev)
 	struct inet6_ifaddr *ifp;
 
 	read_lock_bh(&idev->lock);
-	for (ifp=idev->addr_list; ifp; ifp=ifp->if_next) {
+	list_for_each_entry(ifp, &idev->addr_list, if_list) {
 		if (ifp->scope == IFA_LINK && !(ifp->flags&IFA_F_TENTATIVE)) {
 			memcpy(eui, ifp->addr.s6_addr+8, 8);
 			err = 0;
@@ -2159,7 +2161,7 @@ static int inet6_addr_del(struct net *net, int ifindex, struct in6_addr *pfx,
 		return -ENXIO;
 
 	read_lock_bh(&idev->lock);
-	for (ifp = idev->addr_list; ifp; ifp=ifp->if_next) {
+	list_for_each_entry(ifp, &idev->addr_list, if_list) {
 		if (ifp->prefix_len == plen &&
 		    ipv6_addr_equal(pfx, &ifp->addr)) {
 			in6_ifa_hold(ifp);
@@ -2170,7 +2172,7 @@ static int inet6_addr_del(struct net *net, int ifindex, struct in6_addr *pfx,
 			/* If the last address is deleted administratively,
 			   disable IPv6 on this interface.
 			 */
-			if (idev->addr_list == NULL)
+			if (list_empty(&idev->addr_list))
 				addrconf_ifdown(idev->dev, 1);
 			return 0;
 		}
@@ -2602,9 +2604,10 @@ static void addrconf_type_change(struct net_device *dev, unsigned long event)
 
 static int addrconf_ifdown(struct net_device *dev, int how)
 {
-	struct inet6_dev *idev;
-	struct inet6_ifaddr *ifa, *keep_list, **bifa;
 	struct net *net = dev_net(dev);
+	struct inet6_dev *idev;
+	struct inet6_ifaddr *ifa;
+	LIST_HEAD(keep_list);
 
 	ASSERT_RTNL();
 
@@ -2658,12 +2661,10 @@ static int addrconf_ifdown(struct net_device *dev, int how)
 		write_lock_bh(&idev->lock);
 	}
 #endif
-	keep_list = NULL;
-	bifa = &keep_list;
-	while ((ifa = idev->addr_list) != NULL) {
-		idev->addr_list = ifa->if_next;
-		ifa->if_next = NULL;
 
+	while (!list_empty(&idev->addr_list)) {
+		ifa = list_first_entry(&idev->addr_list,
+				       struct inet6_ifaddr, if_list);
 		addrconf_del_timer(ifa);
 
 		/* If just doing link down, and address is permanent
@@ -2671,10 +2672,7 @@ static int addrconf_ifdown(struct net_device *dev, int how)
 		if (how == 0 &&
 		    (ifa->flags&IFA_F_PERMANENT) &&
 		    !(ipv6_addr_type(&ifa->addr) & IPV6_ADDR_LINKLOCAL)) {
-
-			/* Move to holding list */
-			*bifa = ifa;
-			bifa = &ifa->if_next;
+			list_move_tail(&ifa->if_list, &keep_list);
 
 			/* If not doing DAD on this address, just keep it. */
 			if ((dev->flags&(IFF_NOARP|IFF_LOOPBACK)) ||
@@ -2690,6 +2688,7 @@ static int addrconf_ifdown(struct net_device *dev, int how)
 			ifa->flags |= IFA_F_TENTATIVE;
 			in6_ifa_hold(ifa);
 		} else {
+			list_del(&ifa->if_list);
 			ifa->dead = 1;
 		}
 		write_unlock_bh(&idev->lock);
@@ -2707,7 +2706,7 @@ static int addrconf_ifdown(struct net_device *dev, int how)
 		write_lock_bh(&idev->lock);
 	}
 
-	idev->addr_list = keep_list;
+	list_splice(&keep_list, &idev->addr_list);
 
 	write_unlock_bh(&idev->lock);
 
@@ -2917,7 +2916,7 @@ static void addrconf_dad_run(struct inet6_dev *idev) {
 	struct inet6_ifaddr *ifp;
 
 	read_lock_bh(&idev->lock);
-	for (ifp = idev->addr_list; ifp; ifp = ifp->if_next) {
+	list_for_each_entry(ifp, &idev->addr_list, if_list) {
 		spin_lock(&ifp->lock);
 		if (!(ifp->flags & IFA_F_TENTATIVE)) {
 			spin_unlock(&ifp->lock);
@@ -3500,7 +3499,6 @@ static int in6_dump_addrs(struct inet6_dev *idev, struct sk_buff *skb,
 			  struct netlink_callback *cb, enum addr_type_t type,
 			  int s_ip_idx, int *p_ip_idx)
 {
-	struct inet6_ifaddr *ifa;
 	struct ifmcaddr6 *ifmca;
 	struct ifacaddr6 *ifaca;
 	int err = 1;
@@ -3508,11 +3506,12 @@ static int in6_dump_addrs(struct inet6_dev *idev, struct sk_buff *skb,
 
 	read_lock_bh(&idev->lock);
 	switch (type) {
-	case UNICAST_ADDR:
+	case UNICAST_ADDR: {
+		struct inet6_ifaddr *ifa;
+
 		/* unicast address incl. temp addr */
-		for (ifa = idev->addr_list; ifa;
-		     ifa = ifa->if_next, ip_idx++) {
-			if (ip_idx < s_ip_idx)
+		list_for_each_entry(ifa, &idev->addr_list, if_list) {
+			if (++ip_idx < s_ip_idx)
 				continue;
 			err = inet6_fill_ifaddr(skb, ifa,
 						NETLINK_CB(cb->skb).pid,
@@ -3523,6 +3522,7 @@ static int in6_dump_addrs(struct inet6_dev *idev, struct sk_buff *skb,
 				break;
 		}
 		break;
+	}
 	case MULTICAST_ADDR:
 		/* multicast address */
 		for (ifmca = idev->mc_list; ifmca;
diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c
index 1d7ac70ba39f..240dceba06e5 100644
--- a/net/sctp/ipv6.c
+++ b/net/sctp/ipv6.c
@@ -371,7 +371,7 @@ static void sctp_v6_copy_addrlist(struct list_head *addrlist,
 	}
 
 	read_lock_bh(&in6_dev->lock);
-	for (ifp = in6_dev->addr_list; ifp; ifp = ifp->if_next) {
+	list_for_each_entry(ifp, &in6_dev->addr_list, if_list) {
 		/* Add the address to the local list.  */
 		addr = t_new(struct sctp_sockaddr_entry, GFP_ATOMIC);
 		if (addr) {
-- 
cgit v1.2.2


From bcdd553fd3037d8700082ec4cbb6b25437ea06d6 Mon Sep 17 00:00:00 2001
From: Stephen Hemminger <shemminger@vyatta.com>
Date: Sat, 20 Mar 2010 16:08:18 -0700
Subject: IPv6: addrconf cleanups

Some minor stuff, reformat comments and add whitespace for clarity

Signed-off-by: Stephen Hemminger <shemminger@vyatta.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/addrconf.c | 51 ++++++++++++++++++++++++++++-----------------------
 1 file changed, 28 insertions(+), 23 deletions(-)

(limited to 'net')

diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 6dbf0f79b762..bcb55b78746f 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -2456,6 +2456,7 @@ static int addrconf_notify(struct notifier_block *this, unsigned long event,
 				return notifier_from_errno(-ENOMEM);
 		}
 		break;
+
 	case NETDEV_UP:
 	case NETDEV_CHANGE:
 		if (dev->flags & IFF_SLAVE)
@@ -2485,10 +2486,9 @@ static int addrconf_notify(struct notifier_block *this, unsigned long event,
 			}
 
 			if (idev) {
-				if (idev->if_flags & IF_READY) {
+				if (idev->if_flags & IF_READY)
 					/* device is already configured. */
 					break;
-				}
 				idev->if_flags |= IF_READY;
 			}
 
@@ -2517,25 +2517,30 @@ static int addrconf_notify(struct notifier_block *this, unsigned long event,
 			addrconf_dev_config(dev);
 			break;
 		}
+
 		if (idev) {
 			if (run_pending)
 				addrconf_dad_run(idev);
 
-			/* If the MTU changed during the interface down, when the
-			   interface up, the changed MTU must be reflected in the
-			   idev as well as routers.
+			/*
+			 * If the MTU changed during the interface down,
+			 * when the interface up, the changed MTU must be
+			 * reflected in the idev as well as routers.
 			 */
-			if (idev->cnf.mtu6 != dev->mtu && dev->mtu >= IPV6_MIN_MTU) {
+			if (idev->cnf.mtu6 != dev->mtu &&
+			    dev->mtu >= IPV6_MIN_MTU) {
 				rt6_mtu_change(dev, dev->mtu);
 				idev->cnf.mtu6 = dev->mtu;
 			}
 			idev->tstamp = jiffies;
 			inet6_ifinfo_notify(RTM_NEWLINK, idev);
-			/* If the changed mtu during down is lower than IPV6_MIN_MTU
-			   stop IPv6 on this interface.
+
+			/*
+			 * If the changed mtu during down is lower than
+			 * IPV6_MIN_MTU stop IPv6 on this interface.
 			 */
 			if (dev->mtu < IPV6_MIN_MTU)
-				addrconf_ifdown(dev, event != NETDEV_DOWN);
+				addrconf_ifdown(dev, 1);
 		}
 		break;
 
@@ -2552,7 +2557,10 @@ static int addrconf_notify(struct notifier_block *this, unsigned long event,
 				break;
 		}
 
-		/* MTU falled under IPV6_MIN_MTU. Stop IPv6 on this interface. */
+		/*
+		 * MTU falled under IPV6_MIN_MTU.
+		 * Stop IPv6 on this interface.
+		 */
 
 	case NETDEV_DOWN:
 	case NETDEV_UNREGISTER:
@@ -2572,6 +2580,7 @@ static int addrconf_notify(struct notifier_block *this, unsigned long event,
 				return notifier_from_errno(err);
 		}
 		break;
+
 	case NETDEV_PRE_TYPE_CHANGE:
 	case NETDEV_POST_TYPE_CHANGE:
 		addrconf_type_change(dev, event);
@@ -2586,7 +2595,6 @@ static int addrconf_notify(struct notifier_block *this, unsigned long event,
  */
 static struct notifier_block ipv6_dev_notf = {
 	.notifier_call = addrconf_notify,
-	.priority = 0
 };
 
 static void addrconf_type_change(struct net_device *dev, unsigned long event)
@@ -2618,8 +2626,9 @@ static int addrconf_ifdown(struct net_device *dev, int how)
 	if (idev == NULL)
 		return -ENODEV;
 
-	/* Step 1: remove reference to ipv6 device from parent device.
-		   Do not dev_put!
+	/*
+	 * Step 1: remove reference to ipv6 device from parent device.
+	 *	   Do not dev_put!
 	 */
 	if (how) {
 		idev->dead = 1;
@@ -2634,16 +2643,15 @@ static int addrconf_ifdown(struct net_device *dev, int how)
 
 	write_lock_bh(&idev->lock);
 
-	/* Step 3: clear flags for stateless addrconf */
+	/* Step 2: clear flags for stateless addrconf */
 	if (!how)
 		idev->if_flags &= ~(IF_RS_SENT|IF_RA_RCVD|IF_READY);
 
-	/* Step 4: clear address list */
 #ifdef CONFIG_IPV6_PRIVACY
 	if (how && del_timer(&idev->regen_timer))
 		in6_dev_put(idev);
 
-	/* clear tempaddr list */
+	/* Step 3: clear tempaddr list */
 	while (!list_empty(&idev->tempaddr_list)) {
 		ifa = list_first_entry(&idev->tempaddr_list,
 				       struct inet6_ifaddr, tmp_list);
@@ -2669,7 +2677,7 @@ static int addrconf_ifdown(struct net_device *dev, int how)
 
 		/* If just doing link down, and address is permanent
 		   and not link-local, then retain it. */
-		if (how == 0 &&
+		if (!how &&
 		    (ifa->flags&IFA_F_PERMANENT) &&
 		    !(ipv6_addr_type(&ifa->addr) & IPV6_ADDR_LINKLOCAL)) {
 			list_move_tail(&ifa->if_list, &keep_list);
@@ -2711,7 +2719,6 @@ static int addrconf_ifdown(struct net_device *dev, int how)
 	write_unlock_bh(&idev->lock);
 
 	/* Step 5: Discard multicast list */
-
 	if (how)
 		ipv6_mc_destroy_dev(idev);
 	else
@@ -2719,8 +2726,7 @@ static int addrconf_ifdown(struct net_device *dev, int how)
 
 	idev->tstamp = jiffies;
 
-	/* Shot the device (if unregistered) */
-
+	/* Last: Shot the device (if unregistered) */
 	if (how) {
 		addrconf_sysctl_unregister(idev);
 		neigh_parms_release(&nd_tbl, idev->nd_parms);
@@ -3108,8 +3114,7 @@ static void addrconf_verify(unsigned long foo)
 
 	del_timer(&addr_chk_timer);
 
-	for (i=0; i < IN6_ADDR_HSIZE; i++) {
-
+	for (i = 0; i < IN6_ADDR_HSIZE; i++) {
 restart:
 		hlist_for_each_entry_rcu(ifp, node,
 					 &inet6_addr_lst[i], addr_lst) {
@@ -4376,7 +4381,7 @@ static int __addrconf_sysctl_register(struct net *net, char *dev_name,
 	if (t == NULL)
 		goto out;
 
-	for (i=0; t->addrconf_vars[i].data; i++) {
+	for (i = 0; t->addrconf_vars[i].data; i++) {
 		t->addrconf_vars[i].data += (char*)p - (char*)&ipv6_devconf;
 		t->addrconf_vars[i].extra1 = idev; /* embedded; no ref */
 		t->addrconf_vars[i].extra2 = net;
-- 
cgit v1.2.2


From e21e8467d3188a36f7f0af0d4b9aae74e23fda0e Mon Sep 17 00:00:00 2001
From: Stephen Hemminger <shemminger@vyatta.com>
Date: Sat, 20 Mar 2010 16:09:01 -0700
Subject: addrconf: checkpatch fixes

Fix some of the checkpatch complaints.

Signed-off-by: Stephen Hemminger <shemminger@vyatta.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/addrconf.c | 352 ++++++++++++++++++++++++++--------------------------
 1 file changed, 179 insertions(+), 173 deletions(-)

(limited to 'net')

diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index bcb55b78746f..279580eab309 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -81,7 +81,7 @@
 #include <linux/random.h>
 #endif
 
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <asm/unaligned.h>
 
 #include <linux/proc_fs.h>
@@ -97,7 +97,7 @@
 #endif
 
 #define	INFINITY_LIFE_TIME	0xFFFFFFFF
-#define TIME_DELTA(a,b) ((unsigned long)((long)(a) - (long)(b)))
+#define TIME_DELTA(a, b) ((unsigned long)((long)(a) - (long)(b)))
 
 #ifdef CONFIG_SYSCTL
 static void addrconf_sysctl_register(struct inet6_dev *idev);
@@ -249,8 +249,7 @@ static void addrconf_del_timer(struct inet6_ifaddr *ifp)
 		__in6_ifa_put(ifp);
 }
 
-enum addrconf_timer_t
-{
+enum addrconf_timer_t {
 	AC_NONE,
 	AC_DAD,
 	AC_RS,
@@ -270,7 +269,8 @@ static void addrconf_mod_timer(struct inet6_ifaddr *ifp,
 	case AC_RS:
 		ifp->timer.function = addrconf_rs_timer;
 		break;
-	default:;
+	default:
+		break;
 	}
 	ifp->timer.expires = jiffies + when;
 	add_timer(&ifp->timer);
@@ -325,7 +325,7 @@ void in6_dev_finish_destroy(struct inet6_dev *idev)
 #endif
 	dev_put(dev);
 	if (!idev->dead) {
-		printk("Freeing alive inet6 device %p\n", idev);
+		pr_warning("Freeing alive inet6 device %p\n", idev);
 		return;
 	}
 	snmp6_free_dev(idev);
@@ -441,8 +441,10 @@ static struct inet6_dev * ipv6_find_idev(struct net_device *dev)
 
 	ASSERT_RTNL();
 
-	if ((idev = __in6_dev_get(dev)) == NULL) {
-		if ((idev = ipv6_add_dev(dev)) == NULL)
+	idev = __in6_dev_get(dev);
+	if (!idev) {
+		idev = ipv6_add_dev(dev);
+		if (!idev)
 			return NULL;
 	}
 
@@ -544,10 +546,10 @@ void inet6_ifa_finish_destroy(struct inet6_ifaddr *ifp)
 	in6_dev_put(ifp->idev);
 
 	if (del_timer(&ifp->timer))
-		printk("Timer is still running, when freeing ifa=%p\n", ifp);
+		pr_notice("Timer is still running, when freeing ifa=%p\n", ifp);
 
 	if (!ifp->dead) {
-		printk("Freeing alive inet6 address %p\n", ifp);
+		pr_warning("Freeing alive inet6 address %p\n", ifp);
 		return;
 	}
 	dst_release(&ifp->rt->u.dst);
@@ -1225,7 +1227,6 @@ try_nextdev:
 	in6_ifa_put(hiscore->ifa);
 	return 0;
 }
-
 EXPORT_SYMBOL(ipv6_dev_get_saddr);
 
 int ipv6_get_lladdr(struct net_device *dev, struct in6_addr *addr,
@@ -1235,7 +1236,8 @@ int ipv6_get_lladdr(struct net_device *dev, struct in6_addr *addr,
 	int err = -EADDRNOTAVAIL;
 
 	rcu_read_lock();
-	if ((idev = __in6_dev_get(dev)) != NULL) {
+	idev = __in6_dev_get(dev);
+	if (idev) {
 		struct inet6_ifaddr *ifp;
 
 		read_lock_bh(&idev->lock);
@@ -1725,7 +1727,8 @@ static struct inet6_dev *addrconf_add_dev(struct net_device *dev)
 
 	ASSERT_RTNL();
 
-	if ((idev = ipv6_find_idev(dev)) == NULL)
+	idev = ipv6_find_idev(dev);
+	if (!idev)
 		return NULL;
 
 	/* Add default multicast route */
@@ -2433,7 +2436,8 @@ static void addrconf_ip6_tnl_config(struct net_device *dev)
 
 	ASSERT_RTNL();
 
-	if ((idev = addrconf_add_dev(dev)) == NULL) {
+	idev = addrconf_add_dev(dev);
+	if (!idev) {
 		printk(KERN_DEBUG "init ip6-ip6: add_dev failed\n");
 		return;
 	}
@@ -2448,7 +2452,7 @@ static int addrconf_notify(struct notifier_block *this, unsigned long event,
 	int run_pending = 0;
 	int err;
 
-	switch(event) {
+	switch (event) {
 	case NETDEV_REGISTER:
 		if (!idev && dev->mtu >= IPV6_MIN_MTU) {
 			idev = ipv6_add_dev(dev);
@@ -2500,7 +2504,7 @@ static int addrconf_notify(struct notifier_block *this, unsigned long event,
 			run_pending = 1;
 		}
 
-		switch(dev->type) {
+		switch (dev->type) {
 #if defined(CONFIG_IPV6_SIT) || defined(CONFIG_IPV6_SIT_MODULE)
 		case ARPHRD_SIT:
 			addrconf_sit_config(dev);
@@ -2837,7 +2841,7 @@ static void addrconf_dad_start(struct inet6_ifaddr *ifp, u32 flags)
 	 * Optimistic nodes can start receiving
 	 * Frames right away
 	 */
-	if(ifp->flags & IFA_F_OPTIMISTIC)
+	if (ifp->flags & IFA_F_OPTIMISTIC)
 		ip6_ins_rt(ifp->rt);
 
 	addrconf_dad_kick(ifp);
@@ -2887,7 +2891,7 @@ out:
 
 static void addrconf_dad_completed(struct inet6_ifaddr *ifp)
 {
-	struct net_device *	dev = ifp->idev->dev;
+	struct net_device *dev = ifp->idev->dev;
 
 	/*
 	 *	Configure the address for reception. Now it is valid.
@@ -2918,7 +2922,8 @@ static void addrconf_dad_completed(struct inet6_ifaddr *ifp)
 	}
 }
 
-static void addrconf_dad_run(struct inet6_dev *idev) {
+static void addrconf_dad_run(struct inet6_dev *idev)
+{
 	struct inet6_ifaddr *ifp;
 
 	read_lock_bh(&idev->lock);
@@ -2983,7 +2988,7 @@ static struct inet6_ifaddr *if6_get_idx(struct seq_file *seq, loff_t pos)
 	struct inet6_ifaddr *ifa = if6_get_first(seq);
 
 	if (ifa)
-		while(pos && (ifa = if6_get_next(seq, ifa)) != NULL)
+		while (pos && (ifa = if6_get_next(seq, ifa)) != NULL)
 			--pos;
 	return pos ? NULL : ifa;
 }
@@ -3492,8 +3497,7 @@ static int inet6_fill_ifacaddr(struct sk_buff *skb, struct ifacaddr6 *ifaca,
 	return nlmsg_end(skb, nlh);
 }
 
-enum addr_type_t
-{
+enum addr_type_t {
 	UNICAST_ADDR,
 	MULTICAST_ADDR,
 	ANYCAST_ADDR,
@@ -3592,7 +3596,8 @@ static int inet6_dump_addr(struct sk_buff *skb, struct netlink_callback *cb,
 			if (idx > s_idx)
 				s_ip_idx = 0;
 			ip_idx = 0;
-			if ((idev = __in6_dev_get(dev)) == NULL)
+			idev = __in6_dev_get(dev);
+			if (!idev)
 				goto cont;
 
 			if (in6_dump_addrs(idev, skb, cb, type,
@@ -3659,12 +3664,14 @@ static int inet6_rtm_getaddr(struct sk_buff *in_skb, struct nlmsghdr* nlh,
 	if (ifm->ifa_index)
 		dev = __dev_get_by_index(net, ifm->ifa_index);
 
-	if ((ifa = ipv6_get_ifaddr(net, addr, dev, 1)) == NULL) {
+	ifa = ipv6_get_ifaddr(net, addr, dev, 1);
+	if (!ifa) {
 		err = -EADDRNOTAVAIL;
 		goto errout;
 	}
 
-	if ((skb = nlmsg_new(inet6_ifaddr_msgsize(), GFP_KERNEL)) == NULL) {
+	skb = nlmsg_new(inet6_ifaddr_msgsize(), GFP_KERNEL);
+	if (!skb) {
 		err = -ENOBUFS;
 		goto errout_ifa;
 	}
@@ -3789,7 +3796,7 @@ static inline void __snmp6_fill_stats(u64 *stats, void __percpu **mib,
 static void snmp6_fill_stats(u64 *stats, struct inet6_dev *idev, int attrtype,
 			     int bytes)
 {
-	switch(attrtype) {
+	switch (attrtype) {
 	case IFLA_INET6_STATS:
 		__snmp6_fill_stats(stats, (void __percpu **)idev->stats.ipv6, IPSTATS_MIB_MAX, bytes);
 		break;
@@ -4141,211 +4148,211 @@ static struct addrconf_sysctl_table
 	.sysctl_header = NULL,
 	.addrconf_vars = {
 		{
-			.procname	=	"forwarding",
-			.data		=	&ipv6_devconf.forwarding,
-			.maxlen		=	sizeof(int),
-			.mode		=	0644,
-			.proc_handler	=	addrconf_sysctl_forward,
+			.procname	= "forwarding",
+			.data		= &ipv6_devconf.forwarding,
+			.maxlen		= sizeof(int),
+			.mode		= 0644,
+			.proc_handler	= addrconf_sysctl_forward,
 		},
 		{
-			.procname	=	"hop_limit",
-			.data		=	&ipv6_devconf.hop_limit,
-			.maxlen		=	sizeof(int),
-			.mode		=	0644,
-			.proc_handler	=	proc_dointvec,
+			.procname	= "hop_limit",
+			.data		= &ipv6_devconf.hop_limit,
+			.maxlen		= sizeof(int),
+			.mode		= 0644,
+			.proc_handler	= proc_dointvec,
 		},
 		{
-			.procname	=	"mtu",
-			.data		=	&ipv6_devconf.mtu6,
-			.maxlen		=	sizeof(int),
-			.mode		=	0644,
-			.proc_handler	=	proc_dointvec,
+			.procname	= "mtu",
+			.data		= &ipv6_devconf.mtu6,
+			.maxlen		= sizeof(int),
+			.mode		= 0644,
+			.proc_handler	= proc_dointvec,
 		},
 		{
-			.procname	=	"accept_ra",
-			.data		=	&ipv6_devconf.accept_ra,
-			.maxlen		=	sizeof(int),
-			.mode		=	0644,
-			.proc_handler	=	proc_dointvec,
+			.procname	= "accept_ra",
+			.data		= &ipv6_devconf.accept_ra,
+			.maxlen		= sizeof(int),
+			.mode		= 0644,
+			.proc_handler	= proc_dointvec,
 		},
 		{
-			.procname	=	"accept_redirects",
-			.data		=	&ipv6_devconf.accept_redirects,
-			.maxlen		=	sizeof(int),
-			.mode		=	0644,
-			.proc_handler	=	proc_dointvec,
+			.procname	= "accept_redirects",
+			.data		= &ipv6_devconf.accept_redirects,
+			.maxlen		= sizeof(int),
+			.mode		= 0644,
+			.proc_handler	= proc_dointvec,
 		},
 		{
-			.procname	=	"autoconf",
-			.data		=	&ipv6_devconf.autoconf,
-			.maxlen		=	sizeof(int),
-			.mode		=	0644,
-			.proc_handler	=	proc_dointvec,
+			.procname	= "autoconf",
+			.data		= &ipv6_devconf.autoconf,
+			.maxlen		= sizeof(int),
+			.mode		= 0644,
+			.proc_handler	= proc_dointvec,
 		},
 		{
-			.procname	=	"dad_transmits",
-			.data		=	&ipv6_devconf.dad_transmits,
-			.maxlen		=	sizeof(int),
-			.mode		=	0644,
-			.proc_handler	=	proc_dointvec,
+			.procname	= "dad_transmits",
+			.data		= &ipv6_devconf.dad_transmits,
+			.maxlen		= sizeof(int),
+			.mode		= 0644,
+			.proc_handler	= proc_dointvec,
 		},
 		{
-			.procname	=	"router_solicitations",
-			.data		=	&ipv6_devconf.rtr_solicits,
-			.maxlen		=	sizeof(int),
-			.mode		=	0644,
-			.proc_handler	=	proc_dointvec,
+			.procname	= "router_solicitations",
+			.data		= &ipv6_devconf.rtr_solicits,
+			.maxlen		= sizeof(int),
+			.mode		= 0644,
+			.proc_handler	= proc_dointvec,
 		},
 		{
-			.procname	=	"router_solicitation_interval",
-			.data		=	&ipv6_devconf.rtr_solicit_interval,
-			.maxlen		=	sizeof(int),
-			.mode		=	0644,
-			.proc_handler	=	proc_dointvec_jiffies,
+			.procname	= "router_solicitation_interval",
+			.data		= &ipv6_devconf.rtr_solicit_interval,
+			.maxlen		= sizeof(int),
+			.mode		= 0644,
+			.proc_handler	= proc_dointvec_jiffies,
 		},
 		{
-			.procname	=	"router_solicitation_delay",
-			.data		=	&ipv6_devconf.rtr_solicit_delay,
-			.maxlen		=	sizeof(int),
-			.mode		=	0644,
-			.proc_handler	=	proc_dointvec_jiffies,
+			.procname	= "router_solicitation_delay",
+			.data		= &ipv6_devconf.rtr_solicit_delay,
+			.maxlen		= sizeof(int),
+			.mode		= 0644,
+			.proc_handler	= proc_dointvec_jiffies,
 		},
 		{
-			.procname	=	"force_mld_version",
-			.data		=	&ipv6_devconf.force_mld_version,
-			.maxlen		=	sizeof(int),
-			.mode		=	0644,
-			.proc_handler	=	proc_dointvec,
+			.procname	= "force_mld_version",
+			.data		= &ipv6_devconf.force_mld_version,
+			.maxlen		= sizeof(int),
+			.mode		= 0644,
+			.proc_handler	= proc_dointvec,
 		},
 #ifdef CONFIG_IPV6_PRIVACY
 		{
-			.procname	=	"use_tempaddr",
-			.data		=	&ipv6_devconf.use_tempaddr,
-			.maxlen		=	sizeof(int),
-			.mode		=	0644,
-			.proc_handler	=	proc_dointvec,
+			.procname	= "use_tempaddr",
+			.data		= &ipv6_devconf.use_tempaddr,
+			.maxlen		= sizeof(int),
+			.mode		= 0644,
+			.proc_handler	= proc_dointvec,
 		},
 		{
-			.procname	=	"temp_valid_lft",
-			.data		=	&ipv6_devconf.temp_valid_lft,
-			.maxlen		=	sizeof(int),
-			.mode		=	0644,
-			.proc_handler	=	proc_dointvec,
+			.procname	= "temp_valid_lft",
+			.data		= &ipv6_devconf.temp_valid_lft,
+			.maxlen		= sizeof(int),
+			.mode		= 0644,
+			.proc_handler	= proc_dointvec,
 		},
 		{
-			.procname	=	"temp_prefered_lft",
-			.data		=	&ipv6_devconf.temp_prefered_lft,
-			.maxlen		=	sizeof(int),
-			.mode		=	0644,
-			.proc_handler	=	proc_dointvec,
+			.procname	= "temp_prefered_lft",
+			.data		= &ipv6_devconf.temp_prefered_lft,
+			.maxlen		= sizeof(int),
+			.mode		= 0644,
+			.proc_handler	= proc_dointvec,
 		},
 		{
-			.procname	=	"regen_max_retry",
-			.data		=	&ipv6_devconf.regen_max_retry,
-			.maxlen		=	sizeof(int),
-			.mode		=	0644,
-			.proc_handler	=	proc_dointvec,
+			.procname	= "regen_max_retry",
+			.data		= &ipv6_devconf.regen_max_retry,
+			.maxlen		= sizeof(int),
+			.mode		= 0644,
+			.proc_handler	= proc_dointvec,
 		},
 		{
-			.procname	=	"max_desync_factor",
-			.data		=	&ipv6_devconf.max_desync_factor,
-			.maxlen		=	sizeof(int),
-			.mode		=	0644,
-			.proc_handler	=	proc_dointvec,
+			.procname	= "max_desync_factor",
+			.data		= &ipv6_devconf.max_desync_factor,
+			.maxlen		= sizeof(int),
+			.mode		= 0644,
+			.proc_handler	= proc_dointvec,
 		},
 #endif
 		{
-			.procname	=	"max_addresses",
-			.data		=	&ipv6_devconf.max_addresses,
-			.maxlen		=	sizeof(int),
-			.mode		=	0644,
-			.proc_handler	=	proc_dointvec,
+			.procname	= "max_addresses",
+			.data		= &ipv6_devconf.max_addresses,
+			.maxlen		= sizeof(int),
+			.mode		= 0644,
+			.proc_handler	= proc_dointvec,
 		},
 		{
-			.procname	=	"accept_ra_defrtr",
-			.data		=	&ipv6_devconf.accept_ra_defrtr,
-			.maxlen		=	sizeof(int),
-			.mode		=	0644,
-			.proc_handler	=	proc_dointvec,
+			.procname	= "accept_ra_defrtr",
+			.data		= &ipv6_devconf.accept_ra_defrtr,
+			.maxlen		= sizeof(int),
+			.mode		= 0644,
+			.proc_handler	= proc_dointvec,
 		},
 		{
-			.procname	=	"accept_ra_pinfo",
-			.data		=	&ipv6_devconf.accept_ra_pinfo,
-			.maxlen		=	sizeof(int),
-			.mode		=	0644,
-			.proc_handler	=	proc_dointvec,
+			.procname	= "accept_ra_pinfo",
+			.data		= &ipv6_devconf.accept_ra_pinfo,
+			.maxlen		= sizeof(int),
+			.mode		= 0644,
+			.proc_handler	= proc_dointvec,
 		},
 #ifdef CONFIG_IPV6_ROUTER_PREF
 		{
-			.procname	=	"accept_ra_rtr_pref",
-			.data		=	&ipv6_devconf.accept_ra_rtr_pref,
-			.maxlen		=	sizeof(int),
-			.mode		=	0644,
-			.proc_handler	=	proc_dointvec,
+			.procname	= "accept_ra_rtr_pref",
+			.data		= &ipv6_devconf.accept_ra_rtr_pref,
+			.maxlen		= sizeof(int),
+			.mode		= 0644,
+			.proc_handler	= proc_dointvec,
 		},
 		{
-			.procname	=	"router_probe_interval",
-			.data		=	&ipv6_devconf.rtr_probe_interval,
-			.maxlen		=	sizeof(int),
-			.mode		=	0644,
-			.proc_handler	=	proc_dointvec_jiffies,
+			.procname	= "router_probe_interval",
+			.data		= &ipv6_devconf.rtr_probe_interval,
+			.maxlen		= sizeof(int),
+			.mode		= 0644,
+			.proc_handler	= proc_dointvec_jiffies,
 		},
 #ifdef CONFIG_IPV6_ROUTE_INFO
 		{
-			.procname	=	"accept_ra_rt_info_max_plen",
-			.data		=	&ipv6_devconf.accept_ra_rt_info_max_plen,
-			.maxlen		=	sizeof(int),
-			.mode		=	0644,
-			.proc_handler	=	proc_dointvec,
+			.procname	= "accept_ra_rt_info_max_plen",
+			.data		= &ipv6_devconf.accept_ra_rt_info_max_plen,
+			.maxlen		= sizeof(int),
+			.mode		= 0644,
+			.proc_handler	= proc_dointvec,
 		},
 #endif
 #endif
 		{
-			.procname	=	"proxy_ndp",
-			.data		=	&ipv6_devconf.proxy_ndp,
-			.maxlen		=	sizeof(int),
-			.mode		=	0644,
-			.proc_handler	=	proc_dointvec,
+			.procname	= "proxy_ndp",
+			.data		= &ipv6_devconf.proxy_ndp,
+			.maxlen		= sizeof(int),
+			.mode		= 0644,
+			.proc_handler	= proc_dointvec,
 		},
 		{
-			.procname	=	"accept_source_route",
-			.data		=	&ipv6_devconf.accept_source_route,
-			.maxlen		=	sizeof(int),
-			.mode		=	0644,
-			.proc_handler	=	proc_dointvec,
+			.procname	= "accept_source_route",
+			.data		= &ipv6_devconf.accept_source_route,
+			.maxlen		= sizeof(int),
+			.mode		= 0644,
+			.proc_handler	= proc_dointvec,
 		},
 #ifdef CONFIG_IPV6_OPTIMISTIC_DAD
 		{
-			.procname       =       "optimistic_dad",
-			.data           =       &ipv6_devconf.optimistic_dad,
-			.maxlen         =       sizeof(int),
-			.mode           =       0644,
-			.proc_handler   =       proc_dointvec,
+			.procname       = "optimistic_dad",
+			.data           = &ipv6_devconf.optimistic_dad,
+			.maxlen         = sizeof(int),
+			.mode           = 0644,
+			.proc_handler   = proc_dointvec,
 
 		},
 #endif
 #ifdef CONFIG_IPV6_MROUTE
 		{
-			.procname	=	"mc_forwarding",
-			.data		=	&ipv6_devconf.mc_forwarding,
-			.maxlen		=	sizeof(int),
-			.mode		=	0444,
-			.proc_handler	=	proc_dointvec,
+			.procname	= "mc_forwarding",
+			.data		= &ipv6_devconf.mc_forwarding,
+			.maxlen		= sizeof(int),
+			.mode		= 0444,
+			.proc_handler	= proc_dointvec,
 		},
 #endif
 		{
-			.procname	=	"disable_ipv6",
-			.data		=	&ipv6_devconf.disable_ipv6,
-			.maxlen		=	sizeof(int),
-			.mode		=	0644,
-			.proc_handler	=	addrconf_sysctl_disable,
+			.procname	= "disable_ipv6",
+			.data		= &ipv6_devconf.disable_ipv6,
+			.maxlen		= sizeof(int),
+			.mode		= 0644,
+			.proc_handler	= addrconf_sysctl_disable,
 		},
 		{
-			.procname	=	"accept_dad",
-			.data		=	&ipv6_devconf.accept_dad,
-			.maxlen		=	sizeof(int),
-			.mode		=	0644,
-			.proc_handler	=	proc_dointvec,
+			.procname	= "accept_dad",
+			.data		= &ipv6_devconf.accept_dad,
+			.maxlen		= sizeof(int),
+			.mode		= 0644,
+			.proc_handler	= proc_dointvec,
 		},
 		{
 			.procname       = "force_tllao",
@@ -4382,7 +4389,7 @@ static int __addrconf_sysctl_register(struct net *net, char *dev_name,
 		goto out;
 
 	for (i = 0; t->addrconf_vars[i].data; i++) {
-		t->addrconf_vars[i].data += (char*)p - (char*)&ipv6_devconf;
+		t->addrconf_vars[i].data += (char *)p - (char *)&ipv6_devconf;
 		t->addrconf_vars[i].extra1 = idev; /* embedded; no ref */
 		t->addrconf_vars[i].extra2 = net;
 	}
@@ -4519,14 +4526,12 @@ int register_inet6addr_notifier(struct notifier_block *nb)
 {
 	return atomic_notifier_chain_register(&inet6addr_chain, nb);
 }
-
 EXPORT_SYMBOL(register_inet6addr_notifier);
 
 int unregister_inet6addr_notifier(struct notifier_block *nb)
 {
-	return atomic_notifier_chain_unregister(&inet6addr_chain,nb);
+	return atomic_notifier_chain_unregister(&inet6addr_chain, nb);
 }
-
 EXPORT_SYMBOL(unregister_inet6addr_notifier);
 
 /*
@@ -4537,9 +4542,10 @@ int __init addrconf_init(void)
 {
 	int i, err;
 
-	if ((err = ipv6_addr_label_init()) < 0) {
-		printk(KERN_CRIT "IPv6 Addrconf: cannot initialize default policy table: %d.\n",
-			err);
+	err = ipv6_addr_label_init();
+	if (err < 0) {
+		printk(KERN_CRIT "IPv6 Addrconf:"
+		       " cannot initialize default policy table: %d.\n", err);
 		return err;
 	}
 
-- 
cgit v1.2.2


From 88949cf484bfc399e1d662b5dda6892aaca21aae Mon Sep 17 00:00:00 2001
From: stephen hemminger <shemminger@vyatta.com>
Date: Wed, 17 Mar 2010 20:31:17 +0000
Subject: IPv6: addrconf cleanup addrconf_verify

The variable regen_advance is only used in the privacy case.
Move it to simplify code and eliminate ifdef's

Signed-off-by: Stephen Hemminger <shemminger@vyatta.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/addrconf.c | 13 ++++---------
 1 file changed, 4 insertions(+), 9 deletions(-)

(limited to 'net')

diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 279580eab309..36ebb4ad8a0a 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -3124,9 +3124,6 @@ restart:
 		hlist_for_each_entry_rcu(ifp, node,
 					 &inet6_addr_lst[i], addr_lst) {
 			unsigned long age;
-#ifdef CONFIG_IPV6_PRIVACY
-			unsigned long regen_advance;
-#endif
 
 			if (ifp->flags & IFA_F_PERMANENT)
 				continue;
@@ -3134,12 +3131,6 @@ restart:
 			spin_lock(&ifp->lock);
 			age = (now - ifp->tstamp) / HZ;
 
-#ifdef CONFIG_IPV6_PRIVACY
-			regen_advance = ifp->idev->cnf.regen_max_retry *
-					ifp->idev->cnf.dad_transmits *
-					ifp->idev->nd_parms->retrans_time / HZ;
-#endif
-
 			if (ifp->valid_lft != INFINITY_LIFE_TIME &&
 			    age >= ifp->valid_lft) {
 				spin_unlock(&ifp->lock);
@@ -3173,6 +3164,10 @@ restart:
 #ifdef CONFIG_IPV6_PRIVACY
 			} else if ((ifp->flags&IFA_F_TEMPORARY) &&
 				   !(ifp->flags&IFA_F_TENTATIVE)) {
+				unsigned long regen_advance = ifp->idev->cnf.regen_max_retry *
+					ifp->idev->cnf.dad_transmits *
+					ifp->idev->nd_parms->retrans_time / HZ;
+
 				if (age >= ifp->prefered_lft - regen_advance) {
 					struct inet6_ifaddr *ifpub = ifp->ifpub;
 					if (time_before(ifp->tstamp + ifp->prefered_lft * HZ, next))
-- 
cgit v1.2.2


From b2db756449f63f98049587f7ede4a8e85e0c79b1 Mon Sep 17 00:00:00 2001
From: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
Date: Sat, 20 Mar 2010 16:11:12 -0700
Subject: ipv6: Reduce timer events for addrconf_verify().

This patch reduces timer events while keeping accuracy by rounding
our timer and/or batching several address validations in addrconf_verify().

addrconf_verify() is called at earliest timeout among interface addresses'
timeouts, but at maximum ADDR_CHECK_FREQUENCY (120 secs).

In most cases, all of timeouts of interface addresses are long enough
(e.g. several hours or days vs 2 minutes), this timer is usually called
every ADDR_CHECK_FREQUENCY, and it is okay to be lazy.
(Note this timer could be eliminated if all code paths which modifies
variables related to timeouts call us manually, but it is another story.)

However, in other least but important cases, we try keeping accuracy.

When the real interface address timeout is coming, and the timeout
is just before the rounded timeout, we accept some error.

When a timeout has been reached, we also try batching other several
events in very near future.

Signed-off-by: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/addrconf.c | 27 +++++++++++++++++++++++----
 1 file changed, 23 insertions(+), 4 deletions(-)

(limited to 'net')

diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 36ebb4ad8a0a..7d7d4b17c0f0 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -99,6 +99,10 @@
 #define	INFINITY_LIFE_TIME	0xFFFFFFFF
 #define TIME_DELTA(a, b) ((unsigned long)((long)(a) - (long)(b)))
 
+#define ADDRCONF_TIMER_FUZZ_MINUS	(HZ > 50 ? HZ/50 : 1)
+#define ADDRCONF_TIMER_FUZZ		(HZ / 4)
+#define ADDRCONF_TIMER_FUZZ_MAX		(HZ)
+
 #ifdef CONFIG_SYSCTL
 static void addrconf_sysctl_register(struct inet6_dev *idev);
 static void addrconf_sysctl_unregister(struct inet6_dev *idev);
@@ -3107,15 +3111,15 @@ int ipv6_chk_home_addr(struct net *net, struct in6_addr *addr)
 
 static void addrconf_verify(unsigned long foo)
 {
+	unsigned long now, next, next_sec, next_sched;
 	struct inet6_ifaddr *ifp;
 	struct hlist_node *node;
-	unsigned long now, next;
 	int i;
 
 	rcu_read_lock_bh();
 	spin_lock(&addrconf_verify_lock);
 	now = jiffies;
-	next = now + ADDR_CHECK_FREQUENCY;
+	next = round_jiffies_up(now + ADDR_CHECK_FREQUENCY);
 
 	del_timer(&addr_chk_timer);
 
@@ -3129,7 +3133,8 @@ restart:
 				continue;
 
 			spin_lock(&ifp->lock);
-			age = (now - ifp->tstamp) / HZ;
+			/* We try to batch several events at once. */
+			age = (now - ifp->tstamp + ADDRCONF_TIMER_FUZZ_MINUS) / HZ;
 
 			if (ifp->valid_lft != INFINITY_LIFE_TIME &&
 			    age >= ifp->valid_lft) {
@@ -3199,7 +3204,21 @@ restart:
 		}
 	}
 
-	addr_chk_timer.expires = time_before(next, jiffies + HZ) ? jiffies + HZ : next;
+	next_sec = round_jiffies_up(next);
+	next_sched = next;
+
+	/* If rounded timeout is accurate enough, accept it. */
+	if (time_before(next_sec, next + ADDRCONF_TIMER_FUZZ))
+		next_sched = next_sec;
+
+	/* And minimum interval is ADDRCONF_TIMER_FUZZ_MAX. */
+	if (time_before(next_sched, jiffies + ADDRCONF_TIMER_FUZZ_MAX))
+		next_sched = jiffies + ADDRCONF_TIMER_FUZZ_MAX;
+
+	ADBG((KERN_DEBUG "now = %lu, schedule = %lu, rounded schedule = %lu => %lu\n",
+	      now, next, next_sec, next_sched));
+
+	addr_chk_timer.expires = next_sched;
 	add_timer(&addr_chk_timer);
 	spin_unlock(&addrconf_verify_lock);
 	rcu_read_unlock_bh();
-- 
cgit v1.2.2


From 3e81c6da39a265e11ef48f52bd15bf7ca0068c75 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Sat, 20 Mar 2010 16:18:00 -0700
Subject: ipv6: Fix bug in ipv6_chk_same_addr().

hlist_for_each_entry(p...) will not necessarily initialize 'p'
to anything if the hlist is empty.  GCC notices this and emits
a warning.

Just return true explicitly when we hit a match, and return
false is we fall out of the loop without one.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/addrconf.c | 15 +++++++--------
 1 file changed, 7 insertions(+), 8 deletions(-)

(limited to 'net')

diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 7d7d4b17c0f0..68e5809a2153 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -155,8 +155,8 @@ static void ipv6_ifa_notify(int event, struct inet6_ifaddr *ifa);
 
 static void inet6_prefix_notify(int event, struct inet6_dev *idev,
 				struct prefix_info *pinfo);
-static int ipv6_chk_same_addr(struct net *net, const struct in6_addr *addr,
-			      struct net_device *dev);
+static bool ipv6_chk_same_addr(struct net *net, const struct in6_addr *addr,
+			       struct net_device *dev);
 
 static ATOMIC_NOTIFIER_HEAD(inet6addr_chain);
 
@@ -1295,23 +1295,22 @@ int ipv6_chk_addr(struct net *net, struct in6_addr *addr,
 }
 EXPORT_SYMBOL(ipv6_chk_addr);
 
-static
-int ipv6_chk_same_addr(struct net *net, const struct in6_addr *addr,
-		       struct net_device *dev)
+static bool ipv6_chk_same_addr(struct net *net, const struct in6_addr *addr,
+			       struct net_device *dev)
 {
+	unsigned int hash = ipv6_addr_hash(addr);
 	struct inet6_ifaddr *ifp;
 	struct hlist_node *node;
-	unsigned int hash = ipv6_addr_hash(addr);
 
 	hlist_for_each_entry(ifp, node, &inet6_addr_lst[hash], addr_lst) {
 		if (!net_eq(dev_net(ifp->idev->dev), net))
 			continue;
 		if (ipv6_addr_equal(&ifp->addr, addr)) {
 			if (dev == NULL || ifp->idev->dev == dev)
-				break;
+				return true;
 		}
 	}
-	return ifp != NULL;
+	return false;
 }
 
 int ipv6_chk_prefix(struct in6_addr *addr, struct net_device *dev)
-- 
cgit v1.2.2


From 101545f6fef4a0a3ea8daf0b5b880df2c6a92a69 Mon Sep 17 00:00:00 2001
From: Marcel Holtmann <marcel@holtmann.org>
Date: Mon, 15 Mar 2010 14:12:58 -0700
Subject: Bluetooth: Fix potential bad memory access with sysfs files

When creating a high number of Bluetooth sockets (L2CAP, SCO
and RFCOMM) it is possible to scribble repeatedly on arbitrary
pages of memory. Ensure that the content of these sysfs files is
always less than one page. Even if this means truncating. The
files in question are scheduled to be moved over to debugfs in
the future anyway.

Based on initial patches from Neil Brown and Linus Torvalds

Reported-by: Neil Brown <neilb@suse.de>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 net/bluetooth/l2cap.c       | 10 +++++++++-
 net/bluetooth/rfcomm/core.c | 13 ++++++++++++-
 net/bluetooth/rfcomm/sock.c | 11 ++++++++++-
 net/bluetooth/sco.c         | 11 ++++++++++-
 4 files changed, 41 insertions(+), 4 deletions(-)

(limited to 'net')

diff --git a/net/bluetooth/l2cap.c b/net/bluetooth/l2cap.c
index 4db7ae2fe07d..27551820741e 100644
--- a/net/bluetooth/l2cap.c
+++ b/net/bluetooth/l2cap.c
@@ -3944,16 +3944,24 @@ static ssize_t l2cap_sysfs_show(struct class *dev,
 	struct sock *sk;
 	struct hlist_node *node;
 	char *str = buf;
+	int size = PAGE_SIZE;
 
 	read_lock_bh(&l2cap_sk_list.lock);
 
 	sk_for_each(sk, node, &l2cap_sk_list.head) {
 		struct l2cap_pinfo *pi = l2cap_pi(sk);
+		int len;
 
-		str += sprintf(str, "%s %s %d %d 0x%4.4x 0x%4.4x %d %d %d\n",
+		len = snprintf(str, size, "%s %s %d %d 0x%4.4x 0x%4.4x %d %d %d\n",
 				batostr(&bt_sk(sk)->src), batostr(&bt_sk(sk)->dst),
 				sk->sk_state, __le16_to_cpu(pi->psm), pi->scid,
 				pi->dcid, pi->imtu, pi->omtu, pi->sec_level);
+
+		size -= len;
+		if (size <= 0)
+			break;
+
+		str += len;
 	}
 
 	read_unlock_bh(&l2cap_sk_list.lock);
diff --git a/net/bluetooth/rfcomm/core.c b/net/bluetooth/rfcomm/core.c
index db8a68e1a5ba..cf164073269d 100644
--- a/net/bluetooth/rfcomm/core.c
+++ b/net/bluetooth/rfcomm/core.c
@@ -2105,6 +2105,7 @@ static ssize_t rfcomm_dlc_sysfs_show(struct class *dev,
 	struct rfcomm_session *s;
 	struct list_head *pp, *p;
 	char *str = buf;
+	int size = PAGE_SIZE;
 
 	rfcomm_lock();
 
@@ -2113,11 +2114,21 @@ static ssize_t rfcomm_dlc_sysfs_show(struct class *dev,
 		list_for_each(pp, &s->dlcs) {
 			struct sock *sk = s->sock->sk;
 			struct rfcomm_dlc *d = list_entry(pp, struct rfcomm_dlc, list);
+			int len;
 
-			str += sprintf(str, "%s %s %ld %d %d %d %d\n",
+			len = snprintf(str, size, "%s %s %ld %d %d %d %d\n",
 					batostr(&bt_sk(sk)->src), batostr(&bt_sk(sk)->dst),
 					d->state, d->dlci, d->mtu, d->rx_credits, d->tx_credits);
+
+			size -= len;
+			if (size <= 0)
+				break;
+
+			str += len;
 		}
+
+		if (size <= 0)
+			break;
 	}
 
 	rfcomm_unlock();
diff --git a/net/bluetooth/rfcomm/sock.c b/net/bluetooth/rfcomm/sock.c
index ca87d6ac6a20..8d0ee0b8a6b6 100644
--- a/net/bluetooth/rfcomm/sock.c
+++ b/net/bluetooth/rfcomm/sock.c
@@ -1068,13 +1068,22 @@ static ssize_t rfcomm_sock_sysfs_show(struct class *dev,
 	struct sock *sk;
 	struct hlist_node *node;
 	char *str = buf;
+	int size = PAGE_SIZE;
 
 	read_lock_bh(&rfcomm_sk_list.lock);
 
 	sk_for_each(sk, node, &rfcomm_sk_list.head) {
-		str += sprintf(str, "%s %s %d %d\n",
+		int len;
+
+		len = snprintf(str, size, "%s %s %d %d\n",
 				batostr(&bt_sk(sk)->src), batostr(&bt_sk(sk)->dst),
 				sk->sk_state, rfcomm_pi(sk)->channel);
+
+		size -= len;
+		if (size <= 0)
+			break;
+
+		str += len;
 	}
 
 	read_unlock_bh(&rfcomm_sk_list.lock);
diff --git a/net/bluetooth/sco.c b/net/bluetooth/sco.c
index f93b939539bc..967a75175c66 100644
--- a/net/bluetooth/sco.c
+++ b/net/bluetooth/sco.c
@@ -960,13 +960,22 @@ static ssize_t sco_sysfs_show(struct class *dev,
 	struct sock *sk;
 	struct hlist_node *node;
 	char *str = buf;
+	int size = PAGE_SIZE;
 
 	read_lock_bh(&sco_sk_list.lock);
 
 	sk_for_each(sk, node, &sco_sk_list.head) {
-		str += sprintf(str, "%s %s %d\n",
+		int len;
+
+		len = snprintf(str, size, "%s %s %d\n",
 				batostr(&bt_sk(sk)->src), batostr(&bt_sk(sk)->dst),
 				sk->sk_state);
+
+		size -= len;
+		if (size <= 0)
+			break;
+
+		str += len;
 	}
 
 	read_unlock_bh(&sco_sk_list.lock);
-- 
cgit v1.2.2


From aef7d97cc604309b66f6f45cce02cd734934cd4e Mon Sep 17 00:00:00 2001
From: Marcel Holtmann <marcel@holtmann.org>
Date: Sun, 21 Mar 2010 05:27:45 +0100
Subject: Bluetooth: Convert debug files to actually use debugfs instead of
 sysfs

Some of the debug files ended up wrongly in sysfs, because at that point
of time, debugfs didn't exist. Convert these files to use debugfs and
also seq_file. This patch converts all of these files at once and then
removes the exported symbol for the Bluetooth sysfs class.

Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 net/bluetooth/hci_sysfs.c   |  3 +--
 net/bluetooth/l2cap.c       | 51 ++++++++++++++++++++++++++------------------
 net/bluetooth/rfcomm/core.c | 52 +++++++++++++++++++++++++--------------------
 net/bluetooth/rfcomm/sock.c | 47 +++++++++++++++++++++++-----------------
 net/bluetooth/sco.c         | 47 ++++++++++++++++++++++------------------
 5 files changed, 113 insertions(+), 87 deletions(-)

(limited to 'net')

diff --git a/net/bluetooth/hci_sysfs.c b/net/bluetooth/hci_sysfs.c
index cafb55b0cea5..05fd125f74fe 100644
--- a/net/bluetooth/hci_sysfs.c
+++ b/net/bluetooth/hci_sysfs.c
@@ -8,8 +8,7 @@
 #include <net/bluetooth/bluetooth.h>
 #include <net/bluetooth/hci_core.h>
 
-struct class *bt_class = NULL;
-EXPORT_SYMBOL_GPL(bt_class);
+static struct class *bt_class;
 
 struct dentry *bt_debugfs = NULL;
 EXPORT_SYMBOL_GPL(bt_debugfs);
diff --git a/net/bluetooth/l2cap.c b/net/bluetooth/l2cap.c
index 27551820741e..43e17f7d7ecd 100644
--- a/net/bluetooth/l2cap.c
+++ b/net/bluetooth/l2cap.c
@@ -40,6 +40,8 @@
 #include <linux/skbuff.h>
 #include <linux/list.h>
 #include <linux/device.h>
+#include <linux/debugfs.h>
+#include <linux/seq_file.h>
 #include <linux/uaccess.h>
 #include <linux/crc16.h>
 #include <net/sock.h>
@@ -3937,39 +3939,42 @@ drop:
 	return 0;
 }
 
-static ssize_t l2cap_sysfs_show(struct class *dev,
-				struct class_attribute *attr,
-				char *buf)
+static int l2cap_debugfs_show(struct seq_file *f, void *p)
 {
 	struct sock *sk;
 	struct hlist_node *node;
-	char *str = buf;
-	int size = PAGE_SIZE;
 
 	read_lock_bh(&l2cap_sk_list.lock);
 
 	sk_for_each(sk, node, &l2cap_sk_list.head) {
 		struct l2cap_pinfo *pi = l2cap_pi(sk);
-		int len;
-
-		len = snprintf(str, size, "%s %s %d %d 0x%4.4x 0x%4.4x %d %d %d\n",
-				batostr(&bt_sk(sk)->src), batostr(&bt_sk(sk)->dst),
-				sk->sk_state, __le16_to_cpu(pi->psm), pi->scid,
-				pi->dcid, pi->imtu, pi->omtu, pi->sec_level);
-
-		size -= len;
-		if (size <= 0)
-			break;
 
-		str += len;
+		seq_printf(f, "%s %s %d %d 0x%4.4x 0x%4.4x %d %d %d\n",
+					batostr(&bt_sk(sk)->src),
+					batostr(&bt_sk(sk)->dst),
+					sk->sk_state, __le16_to_cpu(pi->psm),
+					pi->scid, pi->dcid,
+					pi->imtu, pi->omtu, pi->sec_level);
 	}
 
 	read_unlock_bh(&l2cap_sk_list.lock);
 
-	return str - buf;
+	return 0;
 }
 
-static CLASS_ATTR(l2cap, S_IRUGO, l2cap_sysfs_show, NULL);
+static int l2cap_debugfs_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, l2cap_debugfs_show, inode->i_private);
+}
+
+static const struct file_operations l2cap_debugfs_fops = {
+	.open		= l2cap_debugfs_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= single_release,
+};
+
+static struct dentry *l2cap_debugfs;
 
 static const struct proto_ops l2cap_sock_ops = {
 	.family		= PF_BLUETOOTH,
@@ -4029,8 +4034,12 @@ static int __init l2cap_init(void)
 		goto error;
 	}
 
-	if (class_create_file(bt_class, &class_attr_l2cap) < 0)
-		BT_ERR("Failed to create L2CAP info file");
+	if (bt_debugfs) {
+		l2cap_debugfs = debugfs_create_file("l2cap", 0444,
+					bt_debugfs, NULL, &l2cap_debugfs_fops);
+		if (!l2cap_debugfs)
+			BT_ERR("Failed to create L2CAP debug file");
+	}
 
 	BT_INFO("L2CAP ver %s", VERSION);
 	BT_INFO("L2CAP socket layer initialized");
@@ -4044,7 +4053,7 @@ error:
 
 static void __exit l2cap_exit(void)
 {
-	class_remove_file(bt_class, &class_attr_l2cap);
+	debugfs_remove(l2cap_debugfs);
 
 	if (bt_sock_unregister(BTPROTO_L2CAP) < 0)
 		BT_ERR("L2CAP socket unregistration failed");
diff --git a/net/bluetooth/rfcomm/core.c b/net/bluetooth/rfcomm/core.c
index cf164073269d..13f114e8b0f9 100644
--- a/net/bluetooth/rfcomm/core.c
+++ b/net/bluetooth/rfcomm/core.c
@@ -33,6 +33,8 @@
 #include <linux/init.h>
 #include <linux/wait.h>
 #include <linux/device.h>
+#include <linux/debugfs.h>
+#include <linux/seq_file.h>
 #include <linux/net.h>
 #include <linux/mutex.h>
 #include <linux/kthread.h>
@@ -2098,14 +2100,10 @@ static struct hci_cb rfcomm_cb = {
 	.security_cfm	= rfcomm_security_cfm
 };
 
-static ssize_t rfcomm_dlc_sysfs_show(struct class *dev,
-				     struct class_attribute *attr,
-				     char *buf)
+static int rfcomm_dlc_debugfs_show(struct seq_file *f, void *x)
 {
 	struct rfcomm_session *s;
 	struct list_head *pp, *p;
-	char *str = buf;
-	int size = PAGE_SIZE;
 
 	rfcomm_lock();
 
@@ -2114,29 +2112,33 @@ static ssize_t rfcomm_dlc_sysfs_show(struct class *dev,
 		list_for_each(pp, &s->dlcs) {
 			struct sock *sk = s->sock->sk;
 			struct rfcomm_dlc *d = list_entry(pp, struct rfcomm_dlc, list);
-			int len;
 
-			len = snprintf(str, size, "%s %s %ld %d %d %d %d\n",
-					batostr(&bt_sk(sk)->src), batostr(&bt_sk(sk)->dst),
-					d->state, d->dlci, d->mtu, d->rx_credits, d->tx_credits);
-
-			size -= len;
-			if (size <= 0)
-				break;
-
-			str += len;
+			seq_printf(f, "%s %s %ld %d %d %d %d\n",
+						batostr(&bt_sk(sk)->src),
+						batostr(&bt_sk(sk)->dst),
+						d->state, d->dlci, d->mtu,
+						d->rx_credits, d->tx_credits);
 		}
-
-		if (size <= 0)
-			break;
 	}
 
 	rfcomm_unlock();
 
-	return (str - buf);
+	return 0;
+}
+
+static int rfcomm_dlc_debugfs_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, rfcomm_dlc_debugfs_show, inode->i_private);
 }
 
-static CLASS_ATTR(rfcomm_dlc, S_IRUGO, rfcomm_dlc_sysfs_show, NULL);
+static const struct file_operations rfcomm_dlc_debugfs_fops = {
+	.open		= rfcomm_dlc_debugfs_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= single_release,
+};
+
+static struct dentry *rfcomm_dlc_debugfs;
 
 /* ---- Initialization ---- */
 static int __init rfcomm_init(void)
@@ -2153,8 +2155,12 @@ static int __init rfcomm_init(void)
 		goto unregister;
 	}
 
-	if (class_create_file(bt_class, &class_attr_rfcomm_dlc) < 0)
-		BT_ERR("Failed to create RFCOMM info file");
+	if (bt_debugfs) {
+		rfcomm_dlc_debugfs = debugfs_create_file("rfcomm_dlc", 0444,
+				bt_debugfs, NULL, &rfcomm_dlc_debugfs_fops);
+		if (!rfcomm_dlc_debugfs)
+			BT_ERR("Failed to create RFCOMM debug file");
+	}
 
 	err = rfcomm_init_ttys();
 	if (err < 0)
@@ -2182,7 +2188,7 @@ unregister:
 
 static void __exit rfcomm_exit(void)
 {
-	class_remove_file(bt_class, &class_attr_rfcomm_dlc);
+	debugfs_remove(rfcomm_dlc_debugfs);
 
 	hci_unregister_cb(&rfcomm_cb);
 
diff --git a/net/bluetooth/rfcomm/sock.c b/net/bluetooth/rfcomm/sock.c
index 8d0ee0b8a6b6..7f439765403d 100644
--- a/net/bluetooth/rfcomm/sock.c
+++ b/net/bluetooth/rfcomm/sock.c
@@ -40,6 +40,8 @@
 #include <linux/skbuff.h>
 #include <linux/list.h>
 #include <linux/device.h>
+#include <linux/debugfs.h>
+#include <linux/seq_file.h>
 #include <net/sock.h>
 
 #include <asm/system.h>
@@ -1061,37 +1063,38 @@ done:
 	return result;
 }
 
-static ssize_t rfcomm_sock_sysfs_show(struct class *dev,
-				      struct class_attribute *attr,
-				      char *buf)
+static int rfcomm_sock_debugfs_show(struct seq_file *f, void *p)
 {
 	struct sock *sk;
 	struct hlist_node *node;
-	char *str = buf;
-	int size = PAGE_SIZE;
 
 	read_lock_bh(&rfcomm_sk_list.lock);
 
 	sk_for_each(sk, node, &rfcomm_sk_list.head) {
-		int len;
-
-		len = snprintf(str, size, "%s %s %d %d\n",
-				batostr(&bt_sk(sk)->src), batostr(&bt_sk(sk)->dst),
+		seq_printf(f, "%s %s %d %d\n",
+				batostr(&bt_sk(sk)->src),
+				batostr(&bt_sk(sk)->dst),
 				sk->sk_state, rfcomm_pi(sk)->channel);
-
-		size -= len;
-		if (size <= 0)
-			break;
-
-		str += len;
 	}
 
 	read_unlock_bh(&rfcomm_sk_list.lock);
 
-	return (str - buf);
+	return 0;
 }
 
-static CLASS_ATTR(rfcomm, S_IRUGO, rfcomm_sock_sysfs_show, NULL);
+static int rfcomm_sock_debugfs_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, rfcomm_sock_debugfs_show, inode->i_private);
+}
+
+static const struct file_operations rfcomm_sock_debugfs_fops = {
+	.open		= rfcomm_sock_debugfs_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= single_release,
+};
+
+static struct dentry *rfcomm_sock_debugfs;
 
 static const struct proto_ops rfcomm_sock_ops = {
 	.family		= PF_BLUETOOTH,
@@ -1131,8 +1134,12 @@ int __init rfcomm_init_sockets(void)
 	if (err < 0)
 		goto error;
 
-	if (class_create_file(bt_class, &class_attr_rfcomm) < 0)
-		BT_ERR("Failed to create RFCOMM info file");
+	if (bt_debugfs) {
+		rfcomm_sock_debugfs = debugfs_create_file("rfcomm", 0444,
+				bt_debugfs, NULL, &rfcomm_sock_debugfs_fops);
+		if (!rfcomm_sock_debugfs)
+			BT_ERR("Failed to create RFCOMM debug file");
+	}
 
 	BT_INFO("RFCOMM socket layer initialized");
 
@@ -1146,7 +1153,7 @@ error:
 
 void rfcomm_cleanup_sockets(void)
 {
-	class_remove_file(bt_class, &class_attr_rfcomm);
+	debugfs_remove(rfcomm_sock_debugfs);
 
 	if (bt_sock_unregister(BTPROTO_RFCOMM) < 0)
 		BT_ERR("RFCOMM socket layer unregistration failed");
diff --git a/net/bluetooth/sco.c b/net/bluetooth/sco.c
index 967a75175c66..e5b16b76b22e 100644
--- a/net/bluetooth/sco.c
+++ b/net/bluetooth/sco.c
@@ -38,6 +38,8 @@
 #include <linux/socket.h>
 #include <linux/skbuff.h>
 #include <linux/device.h>
+#include <linux/debugfs.h>
+#include <linux/seq_file.h>
 #include <linux/list.h>
 #include <net/sock.h>
 
@@ -953,37 +955,36 @@ drop:
 	return 0;
 }
 
-static ssize_t sco_sysfs_show(struct class *dev,
-				struct class_attribute *attr,
-				char *buf)
+static int sco_debugfs_show(struct seq_file *f, void *p)
 {
 	struct sock *sk;
 	struct hlist_node *node;
-	char *str = buf;
-	int size = PAGE_SIZE;
 
 	read_lock_bh(&sco_sk_list.lock);
 
 	sk_for_each(sk, node, &sco_sk_list.head) {
-		int len;
-
-		len = snprintf(str, size, "%s %s %d\n",
-				batostr(&bt_sk(sk)->src), batostr(&bt_sk(sk)->dst),
-				sk->sk_state);
-
-		size -= len;
-		if (size <= 0)
-			break;
-
-		str += len;
+		seq_printf(f, "%s %s %d\n", batostr(&bt_sk(sk)->src),
+				batostr(&bt_sk(sk)->dst), sk->sk_state);
 	}
 
 	read_unlock_bh(&sco_sk_list.lock);
 
-	return (str - buf);
+	return 0;
 }
 
-static CLASS_ATTR(sco, S_IRUGO, sco_sysfs_show, NULL);
+static int sco_debugfs_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, sco_debugfs_show, inode->i_private);
+}
+
+static const struct file_operations sco_debugfs_fops = {
+	.open		= sco_debugfs_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= single_release,
+};
+
+static struct dentry *sco_debugfs;
 
 static const struct proto_ops sco_sock_ops = {
 	.family		= PF_BLUETOOTH,
@@ -1041,8 +1042,12 @@ static int __init sco_init(void)
 		goto error;
 	}
 
-	if (class_create_file(bt_class, &class_attr_sco) < 0)
-		BT_ERR("Failed to create SCO info file");
+	if (bt_debugfs) {
+		sco_debugfs = debugfs_create_file("sco", 0444,
+					bt_debugfs, NULL, &sco_debugfs_fops);
+		if (!sco_debugfs)
+			BT_ERR("Failed to create SCO debug file");
+	}
 
 	BT_INFO("SCO (Voice Link) ver %s", VERSION);
 	BT_INFO("SCO socket layer initialized");
@@ -1056,7 +1061,7 @@ error:
 
 static void __exit sco_exit(void)
 {
-	class_remove_file(bt_class, &class_attr_sco);
+	debugfs_remove(sco_debugfs);
 
 	if (bt_sock_unregister(BTPROTO_SCO) < 0)
 		BT_ERR("SCO socket unregistration failed");
-- 
cgit v1.2.2


From c2c77ec83bdad17fb688557b5b3fdc36661dd1c6 Mon Sep 17 00:00:00 2001
From: Andrei Emeltchenko <andrei.emeltchenko@nokia.com>
Date: Fri, 19 Mar 2010 10:26:28 +0200
Subject: Bluetooth: Fix kernel crash on L2CAP stress tests

Added very simple check that req buffer has enough space to
fit configuration parameters. Shall be enough to reject packets
with configuration size more than req buffer.

Crash trace below

[ 6069.659393] Unable to handle kernel paging request at virtual address 02000205
[ 6069.673034] Internal error: Oops: 805 [#1] PREEMPT
...
[ 6069.727172] PC is at l2cap_add_conf_opt+0x70/0xf0 [l2cap]
[ 6069.732604] LR is at l2cap_recv_frame+0x1350/0x2e78 [l2cap]
...
[ 6070.030303] Backtrace:
[ 6070.032806] [<bf1c2880>] (l2cap_add_conf_opt+0x0/0xf0 [l2cap]) from
[<bf1c6624>] (l2cap_recv_frame+0x1350/0x2e78 [l2cap])
[ 6070.043823]  r8:dc5d3100 r7:df2a91d6 r6:00000001 r5:df2a8000 r4:00000200
[ 6070.050659] [<bf1c52d4>] (l2cap_recv_frame+0x0/0x2e78 [l2cap]) from
[<bf1c8408>] (l2cap_recv_acldata+0x2bc/0x350 [l2cap])
[ 6070.061798] [<bf1c814c>] (l2cap_recv_acldata+0x0/0x350 [l2cap]) from
[<bf0037a4>] (hci_rx_task+0x244/0x478 [bluetooth])
[ 6070.072631]  r6:dc647700 r5:00000001 r4:df2ab740
[ 6070.077362] [<bf003560>] (hci_rx_task+0x0/0x478 [bluetooth]) from
[<c006b9fc>] (tasklet_action+0x78/0xd8)
[ 6070.087005] [<c006b984>] (tasklet_action+0x0/0xd8) from [<c006c160>]

Signed-off-by: Andrei Emeltchenko <andrei.emeltchenko@nokia.com>
Acked-by: Gustavo F. Padovan <gustavo@padovan.org>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 net/bluetooth/l2cap.c | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'net')

diff --git a/net/bluetooth/l2cap.c b/net/bluetooth/l2cap.c
index 43e17f7d7ecd..7794a2e2adce 100644
--- a/net/bluetooth/l2cap.c
+++ b/net/bluetooth/l2cap.c
@@ -2832,6 +2832,11 @@ static inline int l2cap_config_rsp(struct l2cap_conn *conn, struct l2cap_cmd_hdr
 			int len = cmd->len - sizeof(*rsp);
 			char req[64];
 
+			if (len > sizeof(req) - sizeof(struct l2cap_conf_req)) {
+				l2cap_send_disconn_req(conn, sk);
+				goto done;
+			}
+
 			/* throw out any old stored conf requests */
 			result = L2CAP_CONF_SUCCESS;
 			len = l2cap_parse_conf_rsp(sk, rsp->data,
-- 
cgit v1.2.2


From cdead7cf12896c0e50a8be2e52de52c364603095 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Fri, 19 Mar 2010 15:36:22 -0400
Subject: SUNRPC: Fix a potential memory leak in auth_gss

The function alloc_enc_pages() currently fails to release the pointer
rqstp->rq_enc_pages in the error path.

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
Acked-by: J. Bruce Fields <bfields@citi.umich.edu>
Cc: stable@kernel.org
---
 net/sunrpc/auth_gss/auth_gss.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

(limited to 'net')

diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c
index 0cfccc2a0297..c389ccf6437d 100644
--- a/net/sunrpc/auth_gss/auth_gss.c
+++ b/net/sunrpc/auth_gss/auth_gss.c
@@ -1280,9 +1280,8 @@ alloc_enc_pages(struct rpc_rqst *rqstp)
 	rqstp->rq_release_snd_buf = priv_release_snd_buf;
 	return 0;
 out_free:
-	for (i--; i >= 0; i--) {
-		__free_page(rqstp->rq_enc_pages[i]);
-	}
+	rqstp->rq_enc_pages_num = i;
+	priv_release_snd_buf(rqstp);
 out:
 	return -EAGAIN;
 }
-- 
cgit v1.2.2


From 634a4b2038a6eba4c211fb906fa2f6ec9a4bbfc7 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Date: Sun, 21 Mar 2010 18:01:05 -0700
Subject: net: suppress lockdep-RCU false positive in FIB trie.

Allow fib_find_node() to be called either under rcu_read_lock()
protection or with RTNL held.

Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/fib_trie.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c
index af5d89792860..01ef8ba9025c 100644
--- a/net/ipv4/fib_trie.c
+++ b/net/ipv4/fib_trie.c
@@ -961,7 +961,9 @@ fib_find_node(struct trie *t, u32 key)
 	struct node *n;
 
 	pos = 0;
-	n = rcu_dereference(t->trie);
+	n = rcu_dereference_check(t->trie,
+				  rcu_read_lock_held() ||
+				  lockdep_rtnl_is_held());
 
 	while (n != NULL &&  NODE_TYPE(n) == T_TNODE) {
 		tn = (struct tnode *) n;
-- 
cgit v1.2.2


From 755d0e77ac9c8d125388922dc33434ed5b2ebe80 Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Fri, 19 Mar 2010 04:42:24 +0000
Subject: net: rtnetlink: ignore NETDEV_PRE_TYPE_CHANGE in rtnetlink_event()

Ignore the new NETDEV_PRE_TYPE_CHANGE event in rtnetlink_event() since
there have been no changes userspace needs to be notified of.

Also add a comment to the netdev notifier event definitions to remind
people to update the exclusion list when adding new event types.

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/rtnetlink.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'net')

diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index e1121f0bca6a..ffc6cf3495ac 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -1513,6 +1513,7 @@ static int rtnetlink_event(struct notifier_block *this, unsigned long event, voi
 	case NETDEV_POST_INIT:
 	case NETDEV_REGISTER:
 	case NETDEV_CHANGE:
+	case NETDEV_PRE_TYPE_CHANGE:
 	case NETDEV_GOING_DOWN:
 	case NETDEV_UNREGISTER:
 	case NETDEV_UNREGISTER_BATCH:
-- 
cgit v1.2.2


From 32a806c194ea112cfab00f558482dd97bee5e44e Mon Sep 17 00:00:00 2001
From: Jiri Pirko <jpirko@redhat.com>
Date: Fri, 19 Mar 2010 04:00:23 +0000
Subject: bonding: flush unicast and multicast lists when changing type

After the type change, addresses in unicast and multicast lists wouldn't make
sense, not to mention possible different lenghts. So flush both lists here.

Note "dev_addr_discard" will be very soon replaced by "dev_mc_flush" (once
mc_list conversion will be done).

Signed-off-by: Jiri Pirko <jpirko@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/dev.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/core/dev.c b/net/core/dev.c
index c0e260870c0a..fe2a754238a9 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -4457,12 +4457,13 @@ void dev_unicast_unsync(struct net_device *to, struct net_device *from)
 }
 EXPORT_SYMBOL(dev_unicast_unsync);
 
-static void dev_unicast_flush(struct net_device *dev)
+void dev_unicast_flush(struct net_device *dev)
 {
 	netif_addr_lock_bh(dev);
 	__hw_addr_flush(&dev->uc);
 	netif_addr_unlock_bh(dev);
 }
+EXPORT_SYMBOL(dev_unicast_flush);
 
 static void dev_unicast_init(struct net_device *dev)
 {
@@ -4484,7 +4485,7 @@ static void __dev_addr_discard(struct dev_addr_list **list)
 	}
 }
 
-static void dev_addr_discard(struct net_device *dev)
+void dev_addr_discard(struct net_device *dev)
 {
 	netif_addr_lock_bh(dev);
 
@@ -4493,6 +4494,7 @@ static void dev_addr_discard(struct net_device *dev)
 
 	netif_addr_unlock_bh(dev);
 }
+EXPORT_SYMBOL(dev_addr_discard);
 
 /**
  *	dev_get_flags - get flags reported to userspace
-- 
cgit v1.2.2


From 907cdda5205b012eec7513f66713749b293188c9 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Fri, 19 Mar 2010 05:37:18 +0000
Subject: tcp: Add SNMP counter for DEFER_ACCEPT

Its currently hard to diagnose when ACK frames are dropped because an
application set TCP_DEFER_ACCEPT on its listening socket.

See http://bugzilla.kernel.org/show_bug.cgi?id=15507

This patch adds a SNMP value, named TCPDeferAcceptDrop

netstat -s | grep TCPDeferAcceptDrop
    TCPDeferAcceptDrop: 0

This counter is incremented every time we drop a pure ACK frame received
by a socket in SYN_RECV state because its SYNACK retrans count is lower
than defer_accept value.

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/proc.c          | 1 +
 net/ipv4/tcp_minisocks.c | 1 +
 2 files changed, 2 insertions(+)

(limited to 'net')

diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c
index 4f1f337f4337..3dc9914c1dce 100644
--- a/net/ipv4/proc.c
+++ b/net/ipv4/proc.c
@@ -251,6 +251,7 @@ static const struct snmp_mib snmp4_net_list[] = {
 	SNMP_MIB_ITEM("TCPSackShiftFallback", LINUX_MIB_SACKSHIFTFALLBACK),
 	SNMP_MIB_ITEM("TCPBacklogDrop", LINUX_MIB_TCPBACKLOGDROP),
 	SNMP_MIB_ITEM("TCPMinTTLDrop", LINUX_MIB_TCPMINTTLDROP),
+	SNMP_MIB_ITEM("TCPDeferAcceptDrop", LINUX_MIB_TCPDEFERACCEPTDROP),
 	SNMP_MIB_SENTINEL
 };
 
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index 4199bc6915c5..32f96278a24a 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -671,6 +671,7 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb,
 	if (req->retrans < inet_csk(sk)->icsk_accept_queue.rskq_defer_accept &&
 	    TCP_SKB_CB(skb)->end_seq == tcp_rsk(req)->rcv_isn + 1) {
 		inet_rsk(req)->acked = 1;
+		NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPDEFERACCEPTDROP);
 		return NULL;
 	}
 
-- 
cgit v1.2.2


From 283f2fe87e980d8af5ad8aa63751e7e3258ee05a Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Thu, 18 Mar 2010 13:37:40 +0000
Subject: net: speedup netdev_set_master()

We currently force a synchronize_net() in netdev_set_master()

This seems necessary only when a slave had a master and we dismantle it.

In the other case ("ifenslave bond0 ethO"), we dont need this long
delay.

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/dev.c | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

(limited to 'net')

diff --git a/net/core/dev.c b/net/core/dev.c
index fe2a754238a9..2d01f18f303a 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -3757,11 +3757,10 @@ int netdev_set_master(struct net_device *slave, struct net_device *master)
 
 	slave->master = master;
 
-	synchronize_net();
-
-	if (old)
+	if (old) {
+		synchronize_net();
 		dev_put(old);
-
+	}
 	if (master)
 		slave->flags |= IFF_SLAVE;
 	else
-- 
cgit v1.2.2


From 62c97ac04a67c120ec37a9bfd445a8d5dbbc1ed2 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Thu, 18 Mar 2010 13:48:26 +0000
Subject: atm: Use kasprintf

Use kasprintf in atm_proc_dev_register()

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/atm/proc.c | 10 +---------
 1 file changed, 1 insertion(+), 9 deletions(-)

(limited to 'net')

diff --git a/net/atm/proc.c b/net/atm/proc.c
index 7a96b2376bd7..f188a399c679 100644
--- a/net/atm/proc.c
+++ b/net/atm/proc.c
@@ -406,7 +406,6 @@ EXPORT_SYMBOL(atm_proc_root);
 
 int atm_proc_dev_register(struct atm_dev *dev)
 {
-	int digits, num;
 	int error;
 
 	/* No proc info */
@@ -414,16 +413,9 @@ int atm_proc_dev_register(struct atm_dev *dev)
 		return 0;
 
 	error = -ENOMEM;
-	digits = 0;
-	for (num = dev->number; num; num /= 10)
-		digits++;
-	if (!digits)
-		digits++;
-
-	dev->proc_name = kmalloc(strlen(dev->type) + digits + 2, GFP_KERNEL);
+	dev->proc_name = kasprintf(GFP_KERNEL, "%s:%d", dev->type, dev->number);
 	if (!dev->proc_name)
 		goto err_out;
-	sprintf(dev->proc_name, "%s:%d", dev->type, dev->number);
 
 	dev->proc_entry = proc_create_data(dev->proc_name, 0, atm_proc_root,
 					   &proc_atm_dev_ops, dev);
-- 
cgit v1.2.2


From ec733b15a3ef0b5759141a177f8044a2f40c41e7 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Thu, 18 Mar 2010 20:36:06 +0000
Subject: net: snmp mib cleanup

There is no point to align or pad mibs to cache lines, they are per cpu
allocated with a 8 bytes alignment anyway.
This wastes space for no gain. This patch removes __SNMP_MIB_ALIGN__

Since SNMP mibs contain "unsigned long" fields only, we can relax the
allocation alignment from "unsigned long long" to "unsigned long"

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/dccp/dccp.h    | 2 +-
 net/ipv4/af_inet.c | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

(limited to 'net')

diff --git a/net/dccp/dccp.h b/net/dccp/dccp.h
index 5ef32c2f0d6a..53f8e12d0c10 100644
--- a/net/dccp/dccp.h
+++ b/net/dccp/dccp.h
@@ -189,7 +189,7 @@ enum {
 #define DCCP_MIB_MAX	__DCCP_MIB_MAX
 struct dccp_mib {
 	unsigned long	mibs[DCCP_MIB_MAX];
-} __SNMP_MIB_ALIGN__;
+};
 
 DECLARE_SNMP_STAT(struct dccp_mib, dccp_statistics);
 #define DCCP_INC_STATS(field)	    SNMP_INC_STATS(dccp_statistics, field)
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index 33b7dffa7732..55e11906a73a 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -1401,10 +1401,10 @@ EXPORT_SYMBOL_GPL(snmp_fold_field);
 int snmp_mib_init(void __percpu *ptr[2], size_t mibsize)
 {
 	BUG_ON(ptr == NULL);
-	ptr[0] = __alloc_percpu(mibsize, __alignof__(unsigned long long));
+	ptr[0] = __alloc_percpu(mibsize, __alignof__(unsigned long));
 	if (!ptr[0])
 		goto err0;
-	ptr[1] = __alloc_percpu(mibsize, __alignof__(unsigned long long));
+	ptr[1] = __alloc_percpu(mibsize, __alignof__(unsigned long));
 	if (!ptr[1])
 		goto err1;
 	return 0;
-- 
cgit v1.2.2


From 99fe3c391d50d381687fd84ed0ab22d57079e41f Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Thu, 18 Mar 2010 11:27:25 +0000
Subject: net: dev_getfirstbyhwtype() optimization

Use RCU to avoid RTNL use in dev_getfirstbyhwtype()

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/dev.c | 17 ++++++++++-------
 1 file changed, 10 insertions(+), 7 deletions(-)

(limited to 'net')

diff --git a/net/core/dev.c b/net/core/dev.c
index 2d01f18f303a..a03aab45e84f 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -772,14 +772,17 @@ EXPORT_SYMBOL(__dev_getfirstbyhwtype);
 
 struct net_device *dev_getfirstbyhwtype(struct net *net, unsigned short type)
 {
-	struct net_device *dev;
+	struct net_device *dev, *ret = NULL;
 
-	rtnl_lock();
-	dev = __dev_getfirstbyhwtype(net, type);
-	if (dev)
-		dev_hold(dev);
-	rtnl_unlock();
-	return dev;
+	rcu_read_lock();
+	for_each_netdev_rcu(net, dev)
+		if (dev->type == type) {
+			dev_hold(dev);
+			ret = dev;
+			break;
+		}
+	rcu_read_unlock();
+	return ret;
 }
 EXPORT_SYMBOL(dev_getfirstbyhwtype);
 
-- 
cgit v1.2.2


From e99b99b471c21b071132e51bb7aa6b7a8796dc02 Mon Sep 17 00:00:00 2001
From: Robert Olsson <robert@herjulf.net>
Date: Thu, 18 Mar 2010 22:44:30 +0000
Subject: pktgen node allocation

Here is patch to manipulate packet node allocation and implicitly
how packets are DMA'd etc.

The flag NODE_ALLOC enables the function and numa_node_id();
when enabled it can also be explicitly controlled via a new
node parameter

Tested this with 10 Intel 82599 ports w. TYAN S7025 E5520 CPU's.
Was able to TX/DMA ~80 Gbit/s to Ethernet wires.

Signed-off-by: Robert Olsson <robert.olsson@its.uu.se>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/pktgen.c | 58 ++++++++++++++++++++++++++++++++++++++++++++++++++-----
 1 file changed, 53 insertions(+), 5 deletions(-)

(limited to 'net')

diff --git a/net/core/pktgen.c b/net/core/pktgen.c
index 43923811bd6a..2ad68da418df 100644
--- a/net/core/pktgen.c
+++ b/net/core/pktgen.c
@@ -169,7 +169,7 @@
 #include <asm/dma.h>
 #include <asm/div64.h>		/* do_div */
 
-#define VERSION 	"2.72"
+#define VERSION 	"2.73"
 #define IP_NAME_SZ 32
 #define MAX_MPLS_LABELS 16 /* This is the max label stack depth */
 #define MPLS_STACK_BOTTOM htonl(0x00000100)
@@ -190,6 +190,7 @@
 #define F_IPSEC_ON    (1<<12)	/* ipsec on for flows */
 #define F_QUEUE_MAP_RND (1<<13)	/* queue map Random */
 #define F_QUEUE_MAP_CPU (1<<14)	/* queue map mirrors smp_processor_id() */
+#define F_NODE          (1<<15)	/* Node memory alloc*/
 
 /* Thread control flag bits */
 #define T_STOP        (1<<0)	/* Stop run */
@@ -372,6 +373,7 @@ struct pktgen_dev {
 
 	u16 queue_map_min;
 	u16 queue_map_max;
+	int node;               /* Memory node */
 
 #ifdef CONFIG_XFRM
 	__u8	ipsmode;		/* IPSEC mode (config) */
@@ -607,6 +609,9 @@ static int pktgen_if_show(struct seq_file *seq, void *v)
 	if (pkt_dev->traffic_class)
 		seq_printf(seq, "     traffic_class: 0x%02x\n", pkt_dev->traffic_class);
 
+	if (pkt_dev->node >= 0)
+		seq_printf(seq, "     node: %d\n", pkt_dev->node);
+
 	seq_printf(seq, "     Flags: ");
 
 	if (pkt_dev->flags & F_IPV6)
@@ -660,6 +665,9 @@ static int pktgen_if_show(struct seq_file *seq, void *v)
 	if (pkt_dev->flags & F_SVID_RND)
 		seq_printf(seq, "SVID_RND  ");
 
+	if (pkt_dev->flags & F_NODE)
+		seq_printf(seq, "NODE_ALLOC  ");
+
 	seq_puts(seq, "\n");
 
 	/* not really stopped, more like last-running-at */
@@ -1074,6 +1082,21 @@ static ssize_t pktgen_if_write(struct file *file,
 			pkt_dev->dst_mac_count);
 		return count;
 	}
+	if (!strcmp(name, "node")) {
+		len = num_arg(&user_buffer[i], 10, &value);
+		if (len < 0)
+			return len;
+
+		i += len;
+
+		if (node_possible(value)) {
+			pkt_dev->node = value;
+			sprintf(pg_result, "OK: node=%d", pkt_dev->node);
+		}
+		else
+			sprintf(pg_result, "ERROR: node not possible");
+		return count;
+	}
 	if (!strcmp(name, "flag")) {
 		char f[32];
 		memset(f, 0, 32);
@@ -1166,12 +1189,18 @@ static ssize_t pktgen_if_write(struct file *file,
 		else if (strcmp(f, "!IPV6") == 0)
 			pkt_dev->flags &= ~F_IPV6;
 
+		else if (strcmp(f, "NODE_ALLOC") == 0)
+			pkt_dev->flags |= F_NODE;
+
+		else if (strcmp(f, "!NODE_ALLOC") == 0)
+			pkt_dev->flags &= ~F_NODE;
+
 		else {
 			sprintf(pg_result,
 				"Flag -:%s:- unknown\nAvailable flags, (prepend ! to un-set flag):\n%s",
 				f,
 				"IPSRC_RND, IPDST_RND, UDPSRC_RND, UDPDST_RND, "
-				"MACSRC_RND, MACDST_RND, TXSIZE_RND, IPV6, MPLS_RND, VID_RND, SVID_RND, FLOW_SEQ, IPSEC\n");
+				"MACSRC_RND, MACDST_RND, TXSIZE_RND, IPV6, MPLS_RND, VID_RND, SVID_RND, FLOW_SEQ, IPSEC, NODE_ALLOC\n");
 			return count;
 		}
 		sprintf(pg_result, "OK: flags=0x%x", pkt_dev->flags);
@@ -2572,9 +2601,27 @@ static struct sk_buff *fill_packet_ipv4(struct net_device *odev,
 	mod_cur_headers(pkt_dev);
 
 	datalen = (odev->hard_header_len + 16) & ~0xf;
-	skb = __netdev_alloc_skb(odev,
-				 pkt_dev->cur_pkt_size + 64
-				 + datalen + pkt_dev->pkt_overhead, GFP_NOWAIT);
+
+	if (pkt_dev->flags & F_NODE) {
+		int node;
+
+		if (pkt_dev->node >= 0)
+			node = pkt_dev->node;
+		else
+			node =  numa_node_id();
+
+		skb = __alloc_skb(NET_SKB_PAD + pkt_dev->cur_pkt_size + 64
+				  + datalen + pkt_dev->pkt_overhead, GFP_NOWAIT, 0, node);
+		if (likely(skb)) {
+			skb_reserve(skb, NET_SKB_PAD);
+			skb->dev = odev;
+		}
+	}
+	else
+	  skb = __netdev_alloc_skb(odev,
+				   pkt_dev->cur_pkt_size + 64
+				   + datalen + pkt_dev->pkt_overhead, GFP_NOWAIT);
+
 	if (!skb) {
 		sprintf(pkt_dev->result, "No memory");
 		return NULL;
@@ -3674,6 +3721,7 @@ static int pktgen_add_device(struct pktgen_thread *t, const char *ifname)
 	pkt_dev->svlan_p = 0;
 	pkt_dev->svlan_cfi = 0;
 	pkt_dev->svlan_id = 0xffff;
+	pkt_dev->node = -1;
 
 	err = pktgen_setup_dev(pkt_dev, ifname);
 	if (err)
-- 
cgit v1.2.2


From 5e016cbf6cffd4a53b7922e0c91b775399d7fe47 Mon Sep 17 00:00:00 2001
From: Guenter Roeck <guenter.roeck@ericsson.com>
Date: Sun, 21 Mar 2010 20:55:13 -0700
Subject: ipv4: Don't drop redirected route cache entry unless PTMU actually
 expired

TCP sessions over IPv4 can get stuck if routers between endpoints
do not fragment packets but implement PMTU instead, and we are using
those routers because of an ICMP redirect.

Setup is as follows

       MTU1    MTU2   MTU1
    A--------B------C------D

with MTU1 > MTU2. A and D are endpoints, B and C are routers. B and C
implement PMTU and drop packets larger than MTU2 (for example because
DF is set on all packets). TCP sessions are initiated between A and D.
There is packet loss between A and D, causing frequent TCP
retransmits.

After the number of retransmits on a TCP session reaches tcp_retries1,
tcp calls dst_negative_advice() prior to each retransmit. This results
in route cache entries for the peer to be deleted in
ipv4_negative_advice() if the Path MTU is set.

If the outstanding data on an affected TCP session is larger than
MTU2, packets sent from the endpoints will be dropped by B or C, and
ICMP NEEDFRAG will be returned. A and D receive NEEDFRAG messages and
update PMTU.

Before the next retransmit, tcp will again call dst_negative_advice(),
causing the route cache entry (with correct PMTU) to be deleted. The
retransmitted packet will be larger than MTU2, causing it to be
dropped again.

This sequence repeats until the TCP session aborts or is terminated.

Problem is fixed by removing redirected route cache entries in
ipv4_negative_advice() only if the PMTU is expired.

Signed-off-by: Guenter Roeck <guenter.roeck@ericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/route.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 32d396196df8..54fd68c14c87 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -1510,7 +1510,8 @@ static struct dst_entry *ipv4_negative_advice(struct dst_entry *dst)
 			ip_rt_put(rt);
 			ret = NULL;
 		} else if ((rt->rt_flags & RTCF_REDIRECTED) ||
-			   rt->u.dst.expires) {
+			   (rt->u.dst.expires &&
+			    time_after_eq(jiffies, rt->u.dst.expires))) {
 			unsigned hash = rt_hash(rt->fl.fl4_dst, rt->fl.fl4_src,
 						rt->fl.oif,
 						rt_genid(dev_net(dst->dev)));
-- 
cgit v1.2.2


From 7668448ea91cda36661878da54c851f8eb239d8e Mon Sep 17 00:00:00 2001
From: Dan Carpenter <error27@gmail.com>
Date: Sat, 20 Mar 2010 01:20:49 +0000
Subject: bridge: cleanup: remove unused assignment

We never actually use iph again so this assignment can be removed.

Signed-off-by: Dan Carpenter <error27@gmail.com>
Acked-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/bridge/br_multicast.c | 2 --
 1 file changed, 2 deletions(-)

(limited to 'net')

diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c
index 6980625537ca..9f0c4f065604 100644
--- a/net/bridge/br_multicast.c
+++ b/net/bridge/br_multicast.c
@@ -1003,8 +1003,6 @@ static int br_multicast_ipv4_rcv(struct net_bridge *br,
 	if (!pskb_may_pull(skb2, sizeof(*ih)))
 		goto out;
 
-	iph = ip_hdr(skb2);
-
 	switch (skb2->ip_summed) {
 	case CHECKSUM_COMPLETE:
 		if (!csum_fold(skb2->csum))
-- 
cgit v1.2.2


From 243aad830e8a4cdda261626fbaeddde16b08d04a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Timo=20Ter=C3=A4s?= <timo.teras@iki.fi>
Date: Sat, 20 Mar 2010 02:27:58 +0000
Subject: ip_gre: include route header_len in max_headroom calculation

Taking route's header_len into account, and updating gre device
needed_headroom will give better hints on upper bound of required
headroom. This is useful if the gre traffic is xfrm'ed.

Signed-off-by: Timo Teras <timo.teras@iki.fi>
Acked-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/ip_gre.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index f47c9f76754b..f78402d097b3 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -810,11 +810,13 @@ static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev
 			tunnel->err_count = 0;
 	}
 
-	max_headroom = LL_RESERVED_SPACE(tdev) + gre_hlen;
+	max_headroom = LL_RESERVED_SPACE(tdev) + gre_hlen + rt->u.dst.header_len;
 
 	if (skb_headroom(skb) < max_headroom || skb_shared(skb)||
 	    (skb_cloned(skb) && !skb_clone_writable(skb, 0))) {
 		struct sk_buff *new_skb = skb_realloc_headroom(skb, max_headroom);
+		if (max_headroom > dev->needed_headroom)
+			dev->needed_headroom = max_headroom;
 		if (!new_skb) {
 			ip_rt_put(rt);
 			txq->tx_dropped++;
-- 
cgit v1.2.2


From c9acb42ef1904d15d0fb315061cefbe638f67f3a Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Fri, 19 Mar 2010 15:36:22 -0400
Subject: SUNRPC: Fix a use after free bug with the NFSv4.1 backchannel

The ->release_request() callback was designed to allow the transport layer
to do housekeeping after the RPC call is done. It cannot be used to free
the request itself, and doing so leads to a use-after-free bug in
xprt_release().

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 net/sunrpc/bc_svc.c   | 15 ---------------
 net/sunrpc/xprt.c     | 22 +++++++++-------------
 net/sunrpc/xprtsock.c |  3 ---
 3 files changed, 9 insertions(+), 31 deletions(-)

(limited to 'net')

diff --git a/net/sunrpc/bc_svc.c b/net/sunrpc/bc_svc.c
index 13f214f53120..f0c05d3311c1 100644
--- a/net/sunrpc/bc_svc.c
+++ b/net/sunrpc/bc_svc.c
@@ -37,21 +37,6 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 #define RPCDBG_FACILITY	RPCDBG_SVCDSP
 
-void bc_release_request(struct rpc_task *task)
-{
-	struct rpc_rqst *req = task->tk_rqstp;
-
-	dprintk("RPC:       bc_release_request: task= %p\n", task);
-
-	/*
-	 * Release this request only if it's a backchannel
-	 * preallocated request
-	 */
-	if (!bc_prealloc(req))
-		return;
-	xprt_free_bc_request(req);
-}
-
 /* Empty callback ops */
 static const struct rpc_call_ops nfs41_callback_ops = {
 };
diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c
index 469de292c23c..42f09ade0044 100644
--- a/net/sunrpc/xprt.c
+++ b/net/sunrpc/xprt.c
@@ -46,6 +46,7 @@
 
 #include <linux/sunrpc/clnt.h>
 #include <linux/sunrpc/metrics.h>
+#include <linux/sunrpc/bc_xprt.h>
 
 #include "sunrpc.h"
 
@@ -1032,21 +1033,16 @@ void xprt_release(struct rpc_task *task)
 	if (req->rq_release_snd_buf)
 		req->rq_release_snd_buf(req);
 
-	/*
-	 * Early exit if this is a backchannel preallocated request.
-	 * There is no need to have it added to the RPC slot list.
-	 */
-	if (is_bc_request)
-		return;
-
-	memset(req, 0, sizeof(*req));	/* mark unused */
-
 	dprintk("RPC: %5u release request %p\n", task->tk_pid, req);
+	if (likely(!is_bc_request)) {
+		memset(req, 0, sizeof(*req));	/* mark unused */
 
-	spin_lock(&xprt->reserve_lock);
-	list_add(&req->rq_list, &xprt->free);
-	rpc_wake_up_next(&xprt->backlog);
-	spin_unlock(&xprt->reserve_lock);
+		spin_lock(&xprt->reserve_lock);
+		list_add(&req->rq_list, &xprt->free);
+		rpc_wake_up_next(&xprt->backlog);
+		spin_unlock(&xprt->reserve_lock);
+	} else
+		xprt_free_bc_request(req);
 }
 
 /**
diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
index e4839c07c913..9847c30b5001 100644
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c
@@ -2251,9 +2251,6 @@ static struct rpc_xprt_ops xs_tcp_ops = {
 	.buf_free		= rpc_free,
 	.send_request		= xs_tcp_send_request,
 	.set_retrans_timeout	= xprt_set_retrans_timeout_def,
-#if defined(CONFIG_NFS_V4_1)
-	.release_request	= bc_release_request,
-#endif /* CONFIG_NFS_V4_1 */
 	.close			= xs_tcp_close,
 	.destroy		= xs_destroy,
 	.print_stats		= xs_tcp_print_stats,
-- 
cgit v1.2.2


From ff0901f8036a1586037c30a365c9666e946af0f1 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Fri, 19 Mar 2010 16:17:45 -0400
Subject: SUNRPC: Fix the return value of rpc_run_bc_task()

Currently rpc_run_bc_task() will return NULL if the task allocation failed.
However the only caller is bc_send, which assumes that the return value
will be an ERR_PTR.

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 net/sunrpc/clnt.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'net')

diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
index 154034b675bd..19c9983d5360 100644
--- a/net/sunrpc/clnt.c
+++ b/net/sunrpc/clnt.c
@@ -659,6 +659,7 @@ struct rpc_task *rpc_run_bc_task(struct rpc_rqst *req,
 	task = rpc_new_task(&task_setup_data);
 	if (!task) {
 		xprt_free_bc_request(req);
+		task = ERR_PTR(-ENOMEM);
 		goto out;
 	}
 	task->tk_rqstp = req;
-- 
cgit v1.2.2


From f1f0abe192a72e75d7c59972e30784d043fd8d73 Mon Sep 17 00:00:00 2001
From: Dan Carpenter <error27@gmail.com>
Date: Sun, 21 Mar 2010 12:10:34 -0400
Subject: sunrpc: handle allocation errors from __rpc_lookup_create()

__rpc_lookup_create() can return ERR_PTR(-ENOMEM).

Signed-off-by: Dan Carpenter <error27@gmail.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
Cc: stable@kernel.org
---
 net/sunrpc/rpc_pipe.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'net')

diff --git a/net/sunrpc/rpc_pipe.c b/net/sunrpc/rpc_pipe.c
index 8d63f8fd29b7..20e30c6f8355 100644
--- a/net/sunrpc/rpc_pipe.c
+++ b/net/sunrpc/rpc_pipe.c
@@ -587,6 +587,8 @@ static struct dentry *__rpc_lookup_create_exclusive(struct dentry *parent,
 	struct dentry *dentry;
 
 	dentry = __rpc_lookup_create(parent, name);
+	if (IS_ERR(dentry))
+		return dentry;
 	if (dentry->d_inode == NULL)
 		return dentry;
 	dput(dentry);
-- 
cgit v1.2.2


From c3824d21eb653fe7017476724257ccaa8bf3d9e1 Mon Sep 17 00:00:00 2001
From: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
Date: Mon, 22 Mar 2010 13:50:19 +0000
Subject: rxrpc: Check allocation failure.

alloc_skb() can return NULL.

Signed-off-by: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
Signed-off-by: David Howells <dhowells@redhat.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 net/rxrpc/ar-accept.c | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'net')

diff --git a/net/rxrpc/ar-accept.c b/net/rxrpc/ar-accept.c
index 77228f28fa36..2d744f22a9a1 100644
--- a/net/rxrpc/ar-accept.c
+++ b/net/rxrpc/ar-accept.c
@@ -88,6 +88,11 @@ static int rxrpc_accept_incoming_call(struct rxrpc_local *local,
 
 	/* get a notification message to send to the server app */
 	notification = alloc_skb(0, GFP_NOFS);
+	if (!notification) {
+		_debug("no memory");
+		ret = -ENOMEM;
+		goto error_nofree;
+	}
 	rxrpc_new_skb(notification);
 	notification->mark = RXRPC_SKB_MARK_NEW_CALL;
 
@@ -189,6 +194,7 @@ invalid_service:
 	ret = -ECONNREFUSED;
 error:
 	rxrpc_free_skb(notification);
+error_nofree:
 	_leave(" = %d", ret);
 	return ret;
 }
-- 
cgit v1.2.2


From ef1691504c83ba3eb636c0cfd3ed33f7a6d0b4ee Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Mon, 22 Mar 2010 18:25:20 +0100
Subject: netfilter: xt_recent: fix regression in rules using a zero hit_count

Commit 8ccb92ad (netfilter: xt_recent: fix false match) fixed supposedly
false matches in rules using a zero hit_count. As it turns out there is
nothing false about these matches and people are actually using entries
with a hit_count of zero to make rules dependant on addresses inserted
manually through /proc.

Since this slipped past the eyes of three reviewers, instead of
reverting the commit in question, this patch explicitly checks
for a hit_count of zero to make the intentions more clear.

Reported-by: Thomas Jarosch <thomas.jarosch@intra2net.com>
Tested-by: Thomas Jarosch <thomas.jarosch@intra2net.com>
Cc: stable@kernel.org
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 net/netfilter/xt_recent.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/netfilter/xt_recent.c b/net/netfilter/xt_recent.c
index 7073dbb8100c..971d172afece 100644
--- a/net/netfilter/xt_recent.c
+++ b/net/netfilter/xt_recent.c
@@ -267,7 +267,7 @@ recent_mt(const struct sk_buff *skb, const struct xt_match_param *par)
 		for (i = 0; i < e->nstamps; i++) {
 			if (info->seconds && time_after(time, e->stamps[i]))
 				continue;
-			if (info->hit_count && ++hits >= info->hit_count) {
+			if (!info->hit_count || ++hits >= info->hit_count) {
 				ret = !ret;
 				break;
 			}
-- 
cgit v1.2.2


From e880eb6c5c9d98e389ffc0d8947f75d70785361a Mon Sep 17 00:00:00 2001
From: Tom Herbert <therbert@google.com>
Date: Mon, 22 Mar 2010 18:06:47 -0700
Subject: rps: Fix build with CONFIG_SYSFS enabled

Fix build with CONFIG_SYSFS not enabled.

Signed-off-by: Tom Herbert <therbert@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/net-sysfs.c | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'net')

diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c
index 7a46343d5ae3..f6b6bfee72ae 100644
--- a/net/core/net-sysfs.c
+++ b/net/core/net-sysfs.c
@@ -739,7 +739,9 @@ void netdev_unregister_kobject(struct net_device * net)
 	if (!net_eq(dev_net(net), &init_net))
 		return;
 
+#ifdef CONFIG_SYSFS
 	rx_queue_remove_kobjects(net);
+#endif
 
 	device_del(dev);
 }
@@ -780,11 +782,13 @@ int netdev_register_kobject(struct net_device *net)
 	if (error)
 		return error;
 
+#ifdef CONFIG_SYSFS
 	error = rx_queue_register_kobjects(net);
 	if (error) {
 		device_del(dev);
 		return error;
 	}
+#endif
 
 	return error;
 }
-- 
cgit v1.2.2


From 5fc05f8764f301138003ff562a31ad3721f1675f Mon Sep 17 00:00:00 2001
From: Amerigo Wang <amwang@redhat.com>
Date: Sun, 21 Mar 2010 22:59:58 +0000
Subject: netpoll: warn when there are spaces in parameters

v2: update according to Frans' comments.

Currently, if we leave spaces before dst port,
netconsole will silently accept it as 0. Warn about this.

Also, when spaces appear in other places, make them
visible in error messages.

Signed-off-by: WANG Cong <amwang@redhat.com>
Cc: David Miller <davem@davemloft.net>
Acked-by: Neil Horman <nhorman@tuxdriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/netpoll.c | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/core/netpoll.c b/net/core/netpoll.c
index d4ec38fa64e6..6f9206b36dc2 100644
--- a/net/core/netpoll.c
+++ b/net/core/netpoll.c
@@ -614,7 +614,7 @@ void netpoll_print_options(struct netpoll *np)
 			 np->name, np->local_port);
 	printk(KERN_INFO "%s: local IP %pI4\n",
 			 np->name, &np->local_ip);
-	printk(KERN_INFO "%s: interface %s\n",
+	printk(KERN_INFO "%s: interface '%s'\n",
 			 np->name, np->dev_name);
 	printk(KERN_INFO "%s: remote port %d\n",
 			 np->name, np->remote_port);
@@ -661,6 +661,9 @@ int netpoll_parse_options(struct netpoll *np, char *opt)
 		if ((delim = strchr(cur, '@')) == NULL)
 			goto parse_failed;
 		*delim = 0;
+		if (*cur == ' ' || *cur == '\t')
+			printk(KERN_INFO "%s: warning: whitespace"
+					"is not allowed\n", np->name);
 		np->remote_port = simple_strtol(cur, NULL, 10);
 		cur = delim;
 	}
@@ -708,7 +711,7 @@ int netpoll_parse_options(struct netpoll *np, char *opt)
 	return 0;
 
  parse_failed:
-	printk(KERN_INFO "%s: couldn't parse config at %s!\n",
+	printk(KERN_INFO "%s: couldn't parse config at '%s'!\n",
 	       np->name, cur);
 	return -1;
 }
-- 
cgit v1.2.2


From 7316ae88c43d47f6503f4c29b4973204e33c3411 Mon Sep 17 00:00:00 2001
From: Tom Goff <thomas.goff@boeing.com>
Date: Fri, 19 Mar 2010 15:40:13 +0000
Subject: net_sched: make traffic control network namespace aware

Mostly minor changes to add a net argument to various functions and
remove initial network namespace checks.

Make /proc/net/psched per network namespace.

Signed-off-by: Tom Goff <thomas.goff@boeing.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/act_api.c |  45 +++++++++++----------
 net/sched/cls_api.c |  30 ++++++--------
 net/sched/sch_api.c | 112 +++++++++++++++++++++++++++++++++-------------------
 3 files changed, 107 insertions(+), 80 deletions(-)

(limited to 'net')

diff --git a/net/sched/act_api.c b/net/sched/act_api.c
index 64f5e328cee9..7a558da99bb6 100644
--- a/net/sched/act_api.c
+++ b/net/sched/act_api.c
@@ -667,7 +667,8 @@ nlmsg_failure:
 }
 
 static int
-act_get_notify(u32 pid, struct nlmsghdr *n, struct tc_action *a, int event)
+act_get_notify(struct net *net, u32 pid, struct nlmsghdr *n,
+	       struct tc_action *a, int event)
 {
 	struct sk_buff *skb;
 
@@ -679,7 +680,7 @@ act_get_notify(u32 pid, struct nlmsghdr *n, struct tc_action *a, int event)
 		return -EINVAL;
 	}
 
-	return rtnl_unicast(skb, &init_net, pid);
+	return rtnl_unicast(skb, net, pid);
 }
 
 static struct tc_action *
@@ -749,7 +750,8 @@ static struct tc_action *create_a(int i)
 	return act;
 }
 
-static int tca_action_flush(struct nlattr *nla, struct nlmsghdr *n, u32 pid)
+static int tca_action_flush(struct net *net, struct nlattr *nla,
+			    struct nlmsghdr *n, u32 pid)
 {
 	struct sk_buff *skb;
 	unsigned char *b;
@@ -808,7 +810,7 @@ static int tca_action_flush(struct nlattr *nla, struct nlmsghdr *n, u32 pid)
 	nlh->nlmsg_flags |= NLM_F_ROOT;
 	module_put(a->ops->owner);
 	kfree(a);
-	err = rtnetlink_send(skb, &init_net, pid, RTNLGRP_TC, n->nlmsg_flags&NLM_F_ECHO);
+	err = rtnetlink_send(skb, net, pid, RTNLGRP_TC, n->nlmsg_flags&NLM_F_ECHO);
 	if (err > 0)
 		return 0;
 
@@ -825,7 +827,8 @@ noflush_out:
 }
 
 static int
-tca_action_gd(struct nlattr *nla, struct nlmsghdr *n, u32 pid, int event)
+tca_action_gd(struct net *net, struct nlattr *nla, struct nlmsghdr *n,
+	      u32 pid, int event)
 {
 	int i, ret;
 	struct nlattr *tb[TCA_ACT_MAX_PRIO+1];
@@ -837,7 +840,7 @@ tca_action_gd(struct nlattr *nla, struct nlmsghdr *n, u32 pid, int event)
 
 	if (event == RTM_DELACTION && n->nlmsg_flags&NLM_F_ROOT) {
 		if (tb[1] != NULL)
-			return tca_action_flush(tb[1], n, pid);
+			return tca_action_flush(net, tb[1], n, pid);
 		else
 			return -EINVAL;
 	}
@@ -858,7 +861,7 @@ tca_action_gd(struct nlattr *nla, struct nlmsghdr *n, u32 pid, int event)
 	}
 
 	if (event == RTM_GETACTION)
-		ret = act_get_notify(pid, n, head, event);
+		ret = act_get_notify(net, pid, n, head, event);
 	else { /* delete */
 		struct sk_buff *skb;
 
@@ -877,7 +880,7 @@ tca_action_gd(struct nlattr *nla, struct nlmsghdr *n, u32 pid, int event)
 
 		/* now do the delete */
 		tcf_action_destroy(head, 0);
-		ret = rtnetlink_send(skb, &init_net, pid, RTNLGRP_TC,
+		ret = rtnetlink_send(skb, net, pid, RTNLGRP_TC,
 				     n->nlmsg_flags&NLM_F_ECHO);
 		if (ret > 0)
 			return 0;
@@ -888,8 +891,8 @@ err:
 	return ret;
 }
 
-static int tcf_add_notify(struct tc_action *a, u32 pid, u32 seq, int event,
-			  u16 flags)
+static int tcf_add_notify(struct net *net, struct tc_action *a,
+			  u32 pid, u32 seq, int event, u16 flags)
 {
 	struct tcamsg *t;
 	struct nlmsghdr *nlh;
@@ -922,7 +925,7 @@ static int tcf_add_notify(struct tc_action *a, u32 pid, u32 seq, int event,
 	nlh->nlmsg_len = skb_tail_pointer(skb) - b;
 	NETLINK_CB(skb).dst_group = RTNLGRP_TC;
 
-	err = rtnetlink_send(skb, &init_net, pid, RTNLGRP_TC, flags&NLM_F_ECHO);
+	err = rtnetlink_send(skb, net, pid, RTNLGRP_TC, flags&NLM_F_ECHO);
 	if (err > 0)
 		err = 0;
 	return err;
@@ -935,7 +938,8 @@ nlmsg_failure:
 
 
 static int
-tcf_action_add(struct nlattr *nla, struct nlmsghdr *n, u32 pid, int ovr)
+tcf_action_add(struct net *net, struct nlattr *nla, struct nlmsghdr *n,
+	       u32 pid, int ovr)
 {
 	int ret = 0;
 	struct tc_action *act;
@@ -953,7 +957,7 @@ tcf_action_add(struct nlattr *nla, struct nlmsghdr *n, u32 pid, int ovr)
 	/* dump then free all the actions after update; inserted policy
 	 * stays intact
 	 * */
-	ret = tcf_add_notify(act, pid, seq, RTM_NEWACTION, n->nlmsg_flags);
+	ret = tcf_add_notify(net, act, pid, seq, RTM_NEWACTION, n->nlmsg_flags);
 	for (a = act; a; a = act) {
 		act = a->next;
 		kfree(a);
@@ -969,9 +973,6 @@ static int tc_ctl_action(struct sk_buff *skb, struct nlmsghdr *n, void *arg)
 	u32 pid = skb ? NETLINK_CB(skb).pid : 0;
 	int ret = 0, ovr = 0;
 
-	if (!net_eq(net, &init_net))
-		return -EINVAL;
-
 	ret = nlmsg_parse(n, sizeof(struct tcamsg), tca, TCA_ACT_MAX, NULL);
 	if (ret < 0)
 		return ret;
@@ -994,15 +995,17 @@ static int tc_ctl_action(struct sk_buff *skb, struct nlmsghdr *n, void *arg)
 		if (n->nlmsg_flags&NLM_F_REPLACE)
 			ovr = 1;
 replay:
-		ret = tcf_action_add(tca[TCA_ACT_TAB], n, pid, ovr);
+		ret = tcf_action_add(net, tca[TCA_ACT_TAB], n, pid, ovr);
 		if (ret == -EAGAIN)
 			goto replay;
 		break;
 	case RTM_DELACTION:
-		ret = tca_action_gd(tca[TCA_ACT_TAB], n, pid, RTM_DELACTION);
+		ret = tca_action_gd(net, tca[TCA_ACT_TAB], n,
+				    pid, RTM_DELACTION);
 		break;
 	case RTM_GETACTION:
-		ret = tca_action_gd(tca[TCA_ACT_TAB], n, pid, RTM_GETACTION);
+		ret = tca_action_gd(net, tca[TCA_ACT_TAB], n,
+				    pid, RTM_GETACTION);
 		break;
 	default:
 		BUG();
@@ -1042,7 +1045,6 @@ find_dump_kind(const struct nlmsghdr *n)
 static int
 tc_dump_action(struct sk_buff *skb, struct netlink_callback *cb)
 {
-	struct net *net = sock_net(skb->sk);
 	struct nlmsghdr *nlh;
 	unsigned char *b = skb_tail_pointer(skb);
 	struct nlattr *nest;
@@ -1052,9 +1054,6 @@ tc_dump_action(struct sk_buff *skb, struct netlink_callback *cb)
 	struct tcamsg *t = (struct tcamsg *) NLMSG_DATA(cb->nlh);
 	struct nlattr *kind = find_dump_kind(cb->nlh);
 
-	if (!net_eq(net, &init_net))
-		return 0;
-
 	if (kind == NULL) {
 		printk("tc_dump_action: action bad kind\n");
 		return 0;
diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c
index 3725d8fa29db..4a795d966172 100644
--- a/net/sched/cls_api.c
+++ b/net/sched/cls_api.c
@@ -98,8 +98,9 @@ out:
 }
 EXPORT_SYMBOL(unregister_tcf_proto_ops);
 
-static int tfilter_notify(struct sk_buff *oskb, struct nlmsghdr *n,
-			  struct tcf_proto *tp, unsigned long fh, int event);
+static int tfilter_notify(struct net *net, struct sk_buff *oskb,
+			  struct nlmsghdr *n, struct tcf_proto *tp,
+			  unsigned long fh, int event);
 
 
 /* Select new prio value from the range, managed by kernel. */
@@ -137,9 +138,6 @@ static int tc_ctl_tfilter(struct sk_buff *skb, struct nlmsghdr *n, void *arg)
 	int err;
 	int tp_created = 0;
 
-	if (!net_eq(net, &init_net))
-		return -EINVAL;
-
 replay:
 	t = NLMSG_DATA(n);
 	protocol = TC_H_MIN(t->tcm_info);
@@ -158,7 +156,7 @@ replay:
 	/* Find head of filter chain. */
 
 	/* Find link */
-	dev = __dev_get_by_index(&init_net, t->tcm_ifindex);
+	dev = __dev_get_by_index(net, t->tcm_ifindex);
 	if (dev == NULL)
 		return -ENODEV;
 
@@ -282,7 +280,7 @@ replay:
 			*back = tp->next;
 			spin_unlock_bh(root_lock);
 
-			tfilter_notify(skb, n, tp, fh, RTM_DELTFILTER);
+			tfilter_notify(net, skb, n, tp, fh, RTM_DELTFILTER);
 			tcf_destroy(tp);
 			err = 0;
 			goto errout;
@@ -305,10 +303,10 @@ replay:
 		case RTM_DELTFILTER:
 			err = tp->ops->delete(tp, fh);
 			if (err == 0)
-				tfilter_notify(skb, n, tp, fh, RTM_DELTFILTER);
+				tfilter_notify(net, skb, n, tp, fh, RTM_DELTFILTER);
 			goto errout;
 		case RTM_GETTFILTER:
-			err = tfilter_notify(skb, n, tp, fh, RTM_NEWTFILTER);
+			err = tfilter_notify(net, skb, n, tp, fh, RTM_NEWTFILTER);
 			goto errout;
 		default:
 			err = -EINVAL;
@@ -324,7 +322,7 @@ replay:
 			*back = tp;
 			spin_unlock_bh(root_lock);
 		}
-		tfilter_notify(skb, n, tp, fh, RTM_NEWTFILTER);
+		tfilter_notify(net, skb, n, tp, fh, RTM_NEWTFILTER);
 	} else {
 		if (tp_created)
 			tcf_destroy(tp);
@@ -370,8 +368,9 @@ nla_put_failure:
 	return -1;
 }
 
-static int tfilter_notify(struct sk_buff *oskb, struct nlmsghdr *n,
-			  struct tcf_proto *tp, unsigned long fh, int event)
+static int tfilter_notify(struct net *net, struct sk_buff *oskb,
+			  struct nlmsghdr *n, struct tcf_proto *tp,
+			  unsigned long fh, int event)
 {
 	struct sk_buff *skb;
 	u32 pid = oskb ? NETLINK_CB(oskb).pid : 0;
@@ -385,7 +384,7 @@ static int tfilter_notify(struct sk_buff *oskb, struct nlmsghdr *n,
 		return -EINVAL;
 	}
 
-	return rtnetlink_send(skb, &init_net, pid, RTNLGRP_TC,
+	return rtnetlink_send(skb, net, pid, RTNLGRP_TC,
 			      n->nlmsg_flags & NLM_F_ECHO);
 }
 
@@ -418,12 +417,9 @@ static int tc_dump_tfilter(struct sk_buff *skb, struct netlink_callback *cb)
 	const struct Qdisc_class_ops *cops;
 	struct tcf_dump_args arg;
 
-	if (!net_eq(net, &init_net))
-		return 0;
-
 	if (cb->nlh->nlmsg_len < NLMSG_LENGTH(sizeof(*tcm)))
 		return skb->len;
-	if ((dev = __dev_get_by_index(&init_net, tcm->tcm_ifindex)) == NULL)
+	if ((dev = __dev_get_by_index(net, tcm->tcm_ifindex)) == NULL)
 		return skb->len;
 
 	if (!tcm->tcm_parent)
diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c
index 6cd491013b50..6d6fe16289f3 100644
--- a/net/sched/sch_api.c
+++ b/net/sched/sch_api.c
@@ -34,10 +34,12 @@
 #include <net/netlink.h>
 #include <net/pkt_sched.h>
 
-static int qdisc_notify(struct sk_buff *oskb, struct nlmsghdr *n, u32 clid,
+static int qdisc_notify(struct net *net, struct sk_buff *oskb,
+			struct nlmsghdr *n, u32 clid,
 			struct Qdisc *old, struct Qdisc *new);
-static int tclass_notify(struct sk_buff *oskb, struct nlmsghdr *n,
-			 struct Qdisc *q, unsigned long cl, int event);
+static int tclass_notify(struct net *net, struct sk_buff *oskb,
+			 struct nlmsghdr *n, struct Qdisc *q,
+			 unsigned long cl, int event);
 
 /*
 
@@ -638,11 +640,12 @@ void qdisc_tree_decrease_qlen(struct Qdisc *sch, unsigned int n)
 }
 EXPORT_SYMBOL(qdisc_tree_decrease_qlen);
 
-static void notify_and_destroy(struct sk_buff *skb, struct nlmsghdr *n, u32 clid,
+static void notify_and_destroy(struct net *net, struct sk_buff *skb,
+			       struct nlmsghdr *n, u32 clid,
 			       struct Qdisc *old, struct Qdisc *new)
 {
 	if (new || old)
-		qdisc_notify(skb, n, clid, old, new);
+		qdisc_notify(net, skb, n, clid, old, new);
 
 	if (old)
 		qdisc_destroy(old);
@@ -662,6 +665,7 @@ static int qdisc_graft(struct net_device *dev, struct Qdisc *parent,
 		       struct Qdisc *new, struct Qdisc *old)
 {
 	struct Qdisc *q = old;
+	struct net *net = dev_net(dev);
 	int err = 0;
 
 	if (parent == NULL) {
@@ -698,12 +702,13 @@ static int qdisc_graft(struct net_device *dev, struct Qdisc *parent,
 		}
 
 		if (!ingress) {
-			notify_and_destroy(skb, n, classid, dev->qdisc, new);
+			notify_and_destroy(net, skb, n, classid,
+					   dev->qdisc, new);
 			if (new && !new->ops->attach)
 				atomic_inc(&new->refcnt);
 			dev->qdisc = new ? : &noop_qdisc;
 		} else {
-			notify_and_destroy(skb, n, classid, old, new);
+			notify_and_destroy(net, skb, n, classid, old, new);
 		}
 
 		if (dev->flags & IFF_UP)
@@ -721,7 +726,7 @@ static int qdisc_graft(struct net_device *dev, struct Qdisc *parent,
 				err = -ENOENT;
 		}
 		if (!err)
-			notify_and_destroy(skb, n, classid, old, new);
+			notify_and_destroy(net, skb, n, classid, old, new);
 	}
 	return err;
 }
@@ -947,10 +952,7 @@ static int tc_get_qdisc(struct sk_buff *skb, struct nlmsghdr *n, void *arg)
 	struct Qdisc *p = NULL;
 	int err;
 
-	if (!net_eq(net, &init_net))
-		return -EINVAL;
-
-	if ((dev = __dev_get_by_index(&init_net, tcm->tcm_ifindex)) == NULL)
+	if ((dev = __dev_get_by_index(net, tcm->tcm_ifindex)) == NULL)
 		return -ENODEV;
 
 	err = nlmsg_parse(n, sizeof(*tcm), tca, TCA_MAX, NULL);
@@ -990,7 +992,7 @@ static int tc_get_qdisc(struct sk_buff *skb, struct nlmsghdr *n, void *arg)
 		if ((err = qdisc_graft(dev, p, skb, n, clid, NULL, q)) != 0)
 			return err;
 	} else {
-		qdisc_notify(skb, n, clid, NULL, q);
+		qdisc_notify(net, skb, n, clid, NULL, q);
 	}
 	return 0;
 }
@@ -1009,16 +1011,13 @@ static int tc_modify_qdisc(struct sk_buff *skb, struct nlmsghdr *n, void *arg)
 	struct Qdisc *q, *p;
 	int err;
 
-	if (!net_eq(net, &init_net))
-		return -EINVAL;
-
 replay:
 	/* Reinit, just in case something touches this. */
 	tcm = NLMSG_DATA(n);
 	clid = tcm->tcm_parent;
 	q = p = NULL;
 
-	if ((dev = __dev_get_by_index(&init_net, tcm->tcm_ifindex)) == NULL)
+	if ((dev = __dev_get_by_index(net, tcm->tcm_ifindex)) == NULL)
 		return -ENODEV;
 
 	err = nlmsg_parse(n, sizeof(*tcm), tca, TCA_MAX, NULL);
@@ -1105,7 +1104,7 @@ replay:
 		return -EINVAL;
 	err = qdisc_change(q, tca);
 	if (err == 0)
-		qdisc_notify(skb, n, clid, NULL, q);
+		qdisc_notify(net, skb, n, clid, NULL, q);
 	return err;
 
 create_n_graft:
@@ -1195,8 +1194,9 @@ nla_put_failure:
 	return -1;
 }
 
-static int qdisc_notify(struct sk_buff *oskb, struct nlmsghdr *n,
-			u32 clid, struct Qdisc *old, struct Qdisc *new)
+static int qdisc_notify(struct net *net, struct sk_buff *oskb,
+			struct nlmsghdr *n, u32 clid,
+			struct Qdisc *old, struct Qdisc *new)
 {
 	struct sk_buff *skb;
 	u32 pid = oskb ? NETLINK_CB(oskb).pid : 0;
@@ -1215,7 +1215,7 @@ static int qdisc_notify(struct sk_buff *oskb, struct nlmsghdr *n,
 	}
 
 	if (skb->len)
-		return rtnetlink_send(skb, &init_net, pid, RTNLGRP_TC, n->nlmsg_flags&NLM_F_ECHO);
+		return rtnetlink_send(skb, net, pid, RTNLGRP_TC, n->nlmsg_flags&NLM_F_ECHO);
 
 err_out:
 	kfree_skb(skb);
@@ -1274,15 +1274,12 @@ static int tc_dump_qdisc(struct sk_buff *skb, struct netlink_callback *cb)
 	int s_idx, s_q_idx;
 	struct net_device *dev;
 
-	if (!net_eq(net, &init_net))
-		return 0;
-
 	s_idx = cb->args[0];
 	s_q_idx = q_idx = cb->args[1];
 
 	rcu_read_lock();
 	idx = 0;
-	for_each_netdev_rcu(&init_net, dev) {
+	for_each_netdev_rcu(net, dev) {
 		struct netdev_queue *dev_queue;
 
 		if (idx < s_idx)
@@ -1334,10 +1331,7 @@ static int tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n, void *arg)
 	u32 qid = TC_H_MAJ(clid);
 	int err;
 
-	if (!net_eq(net, &init_net))
-		return -EINVAL;
-
-	if ((dev = __dev_get_by_index(&init_net, tcm->tcm_ifindex)) == NULL)
+	if ((dev = __dev_get_by_index(net, tcm->tcm_ifindex)) == NULL)
 		return -ENODEV;
 
 	err = nlmsg_parse(n, sizeof(*tcm), tca, TCA_MAX, NULL);
@@ -1418,10 +1412,10 @@ static int tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n, void *arg)
 			if (cops->delete)
 				err = cops->delete(q, cl);
 			if (err == 0)
-				tclass_notify(skb, n, q, cl, RTM_DELTCLASS);
+				tclass_notify(net, skb, n, q, cl, RTM_DELTCLASS);
 			goto out;
 		case RTM_GETTCLASS:
-			err = tclass_notify(skb, n, q, cl, RTM_NEWTCLASS);
+			err = tclass_notify(net, skb, n, q, cl, RTM_NEWTCLASS);
 			goto out;
 		default:
 			err = -EINVAL;
@@ -1434,7 +1428,7 @@ static int tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n, void *arg)
 	if (cops->change)
 		err = cops->change(q, clid, pid, tca, &new_cl);
 	if (err == 0)
-		tclass_notify(skb, n, q, new_cl, RTM_NEWTCLASS);
+		tclass_notify(net, skb, n, q, new_cl, RTM_NEWTCLASS);
 
 out:
 	if (cl)
@@ -1486,8 +1480,9 @@ nla_put_failure:
 	return -1;
 }
 
-static int tclass_notify(struct sk_buff *oskb, struct nlmsghdr *n,
-			  struct Qdisc *q, unsigned long cl, int event)
+static int tclass_notify(struct net *net, struct sk_buff *oskb,
+			 struct nlmsghdr *n, struct Qdisc *q,
+			 unsigned long cl, int event)
 {
 	struct sk_buff *skb;
 	u32 pid = oskb ? NETLINK_CB(oskb).pid : 0;
@@ -1501,7 +1496,7 @@ static int tclass_notify(struct sk_buff *oskb, struct nlmsghdr *n,
 		return -EINVAL;
 	}
 
-	return rtnetlink_send(skb, &init_net, pid, RTNLGRP_TC, n->nlmsg_flags&NLM_F_ECHO);
+	return rtnetlink_send(skb, net, pid, RTNLGRP_TC, n->nlmsg_flags&NLM_F_ECHO);
 }
 
 struct qdisc_dump_args
@@ -1576,12 +1571,9 @@ static int tc_dump_tclass(struct sk_buff *skb, struct netlink_callback *cb)
 	struct net_device *dev;
 	int t, s_t;
 
-	if (!net_eq(net, &init_net))
-		return 0;
-
 	if (cb->nlh->nlmsg_len < NLMSG_LENGTH(sizeof(*tcm)))
 		return 0;
-	if ((dev = dev_get_by_index(&init_net, tcm->tcm_ifindex)) == NULL)
+	if ((dev = dev_get_by_index(net, tcm->tcm_ifindex)) == NULL)
 		return 0;
 
 	s_t = cb->args[0];
@@ -1701,15 +1693,55 @@ static const struct file_operations psched_fops = {
 	.llseek = seq_lseek,
 	.release = single_release,
 };
+
+static int __net_init psched_net_init(struct net *net)
+{
+	struct proc_dir_entry *e;
+
+	e = proc_net_fops_create(net, "psched", 0, &psched_fops);
+	if (e == NULL)
+		return -ENOMEM;
+
+	return 0;
+}
+
+static void __net_exit psched_net_exit(struct net *net)
+{
+	proc_net_remove(net, "psched");
+
+	return;
+}
+#else
+static int __net_init psched_net_init(struct net *net)
+{
+	return 0;
+}
+
+static void __net_exit psched_net_exit(struct net *net)
+{
+}
 #endif
 
+static struct pernet_operations psched_net_ops = {
+	.init = psched_net_init,
+	.exit = psched_net_exit,
+};
+
 static int __init pktsched_init(void)
 {
+	int err;
+
+	err = register_pernet_subsys(&psched_net_ops);
+	if (err) {
+		printk(KERN_ERR "pktsched_init: "
+		       "cannot initialize per netns operations\n");
+		return err;
+	}
+
 	register_qdisc(&pfifo_qdisc_ops);
 	register_qdisc(&bfifo_qdisc_ops);
 	register_qdisc(&pfifo_head_drop_qdisc_ops);
 	register_qdisc(&mq_qdisc_ops);
-	proc_net_fops_create(&init_net, "psched", 0, &psched_fops);
 
 	rtnl_register(PF_UNSPEC, RTM_NEWQDISC, tc_modify_qdisc, NULL);
 	rtnl_register(PF_UNSPEC, RTM_DELQDISC, tc_get_qdisc, NULL);
-- 
cgit v1.2.2


From 8e039d84b323c4503c4d56863faa47c783660826 Mon Sep 17 00:00:00 2001
From: Ben Blum <bblum@andrew.cmu.edu>
Date: Tue, 23 Mar 2010 05:24:03 +0000
Subject: cgroups: net_cls as module

Allows the net_cls cgroup subsystem to be compiled as a module

This patch modifies net/sched/cls_cgroup.c to allow the net_cls subsystem
to be optionally compiled as a module instead of builtin.  The
cgroup_subsys struct is moved around a bit to allow the subsys_id to be
either declared as a compile-time constant by the cgroup_subsys.h include
in cgroup.h, or, if it's a module, initialized within the struct by
cgroup_load_subsys.

Signed-off-by: Ben Blum <bblum@andrew.cmu.edu>
Acked-by: Li Zefan <lizf@cn.fujitsu.com>
Cc: Paul Menage <menage@google.com>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Lai Jiangshan <laijs@cn.fujitsu.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/Kconfig      |  5 ++++-
 net/sched/cls_cgroup.c | 36 +++++++++++++++++++++++++++---------
 2 files changed, 31 insertions(+), 10 deletions(-)

(limited to 'net')

diff --git a/net/sched/Kconfig b/net/sched/Kconfig
index 21f9c7678aa3..2f691fb180d1 100644
--- a/net/sched/Kconfig
+++ b/net/sched/Kconfig
@@ -328,13 +328,16 @@ config NET_CLS_FLOW
 	  module will be called cls_flow.
 
 config NET_CLS_CGROUP
-	bool "Control Group Classifier"
+	tristate "Control Group Classifier"
 	select NET_CLS
 	depends on CGROUPS
 	---help---
 	  Say Y here if you want to classify packets based on the control
 	  cgroup of their process.
 
+	  To compile this code as a module, choose M here: the
+	  module will be called cls_cgroup.
+
 config NET_EMATCH
 	bool "Extended Matches"
 	select NET_CLS
diff --git a/net/sched/cls_cgroup.c b/net/sched/cls_cgroup.c
index e4877ca6727c..7f27d2c15e08 100644
--- a/net/sched/cls_cgroup.c
+++ b/net/sched/cls_cgroup.c
@@ -24,6 +24,25 @@ struct cgroup_cls_state
 	u32 classid;
 };
 
+static struct cgroup_subsys_state *cgrp_create(struct cgroup_subsys *ss,
+					       struct cgroup *cgrp);
+static void cgrp_destroy(struct cgroup_subsys *ss, struct cgroup *cgrp);
+static int cgrp_populate(struct cgroup_subsys *ss, struct cgroup *cgrp);
+
+struct cgroup_subsys net_cls_subsys = {
+	.name		= "net_cls",
+	.create		= cgrp_create,
+	.destroy	= cgrp_destroy,
+	.populate	= cgrp_populate,
+#ifdef CONFIG_NET_CLS_CGROUP
+	.subsys_id	= net_cls_subsys_id,
+#else
+#define net_cls_subsys_id net_cls_subsys.subsys_id
+#endif
+	.module		= THIS_MODULE,
+};
+
+
 static inline struct cgroup_cls_state *cgrp_cls_state(struct cgroup *cgrp)
 {
 	return container_of(cgroup_subsys_state(cgrp, net_cls_subsys_id),
@@ -79,14 +98,6 @@ static int cgrp_populate(struct cgroup_subsys *ss, struct cgroup *cgrp)
 	return cgroup_add_files(cgrp, ss, ss_files, ARRAY_SIZE(ss_files));
 }
 
-struct cgroup_subsys net_cls_subsys = {
-	.name		= "net_cls",
-	.create		= cgrp_create,
-	.destroy	= cgrp_destroy,
-	.populate	= cgrp_populate,
-	.subsys_id	= net_cls_subsys_id,
-};
-
 struct cls_cgroup_head
 {
 	u32			handle;
@@ -277,12 +288,19 @@ static struct tcf_proto_ops cls_cgroup_ops __read_mostly = {
 
 static int __init init_cgroup_cls(void)
 {
-	return register_tcf_proto_ops(&cls_cgroup_ops);
+	int ret = register_tcf_proto_ops(&cls_cgroup_ops);
+	if (ret)
+		return ret;
+	ret = cgroup_load_subsys(&net_cls_subsys);
+	if (ret)
+		unregister_tcf_proto_ops(&cls_cgroup_ops);
+	return ret;
 }
 
 static void __exit exit_cgroup_cls(void)
 {
 	unregister_tcf_proto_ops(&cls_cgroup_ops);
+	cgroup_unload_subsys(&net_cls_subsys);
 }
 
 module_init(init_cgroup_cls);
-- 
cgit v1.2.2


From 44608f801283f0f69d8a04d9976837748e410084 Mon Sep 17 00:00:00 2001
From: Joe Perches <joe@perches.com>
Date: Thu, 18 Mar 2010 18:29:35 -0700
Subject: net/wireless/wext_core.c: Use IW_IOCTL_IDX macro

There's a wireless.h macro for this, might as well use it.

Signed-off-by: Joe Perches <joe@perches.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/wireless/wext-core.c | 112 +++++++++++++++++++++++------------------------
 1 file changed, 56 insertions(+), 56 deletions(-)

(limited to 'net')

diff --git a/net/wireless/wext-core.c b/net/wireless/wext-core.c
index 5e1656bdf23b..dbde22b8f30f 100644
--- a/net/wireless/wext-core.c
+++ b/net/wireless/wext-core.c
@@ -28,226 +28,226 @@ typedef int (*wext_ioctl_func)(struct net_device *, struct iwreq *,
  * know about.
  */
 static const struct iw_ioctl_description standard_ioctl[] = {
-	[SIOCSIWCOMMIT	- SIOCIWFIRST] = {
+	[IW_IOCTL_IDX(SIOCSIWCOMMIT)] = {
 		.header_type	= IW_HEADER_TYPE_NULL,
 	},
-	[SIOCGIWNAME	- SIOCIWFIRST] = {
+	[IW_IOCTL_IDX(SIOCGIWNAME)] = {
 		.header_type	= IW_HEADER_TYPE_CHAR,
 		.flags		= IW_DESCR_FLAG_DUMP,
 	},
-	[SIOCSIWNWID	- SIOCIWFIRST] = {
+	[IW_IOCTL_IDX(SIOCSIWNWID)] = {
 		.header_type	= IW_HEADER_TYPE_PARAM,
 		.flags		= IW_DESCR_FLAG_EVENT,
 	},
-	[SIOCGIWNWID	- SIOCIWFIRST] = {
+	[IW_IOCTL_IDX(SIOCGIWNWID)] = {
 		.header_type	= IW_HEADER_TYPE_PARAM,
 		.flags		= IW_DESCR_FLAG_DUMP,
 	},
-	[SIOCSIWFREQ	- SIOCIWFIRST] = {
+	[IW_IOCTL_IDX(SIOCSIWFREQ)] = {
 		.header_type	= IW_HEADER_TYPE_FREQ,
 		.flags		= IW_DESCR_FLAG_EVENT,
 	},
-	[SIOCGIWFREQ	- SIOCIWFIRST] = {
+	[IW_IOCTL_IDX(SIOCGIWFREQ)] = {
 		.header_type	= IW_HEADER_TYPE_FREQ,
 		.flags		= IW_DESCR_FLAG_DUMP,
 	},
-	[SIOCSIWMODE	- SIOCIWFIRST] = {
+	[IW_IOCTL_IDX(SIOCSIWMODE)] = {
 		.header_type	= IW_HEADER_TYPE_UINT,
 		.flags		= IW_DESCR_FLAG_EVENT,
 	},
-	[SIOCGIWMODE	- SIOCIWFIRST] = {
+	[IW_IOCTL_IDX(SIOCGIWMODE)] = {
 		.header_type	= IW_HEADER_TYPE_UINT,
 		.flags		= IW_DESCR_FLAG_DUMP,
 	},
-	[SIOCSIWSENS	- SIOCIWFIRST] = {
+	[IW_IOCTL_IDX(SIOCSIWSENS)] = {
 		.header_type	= IW_HEADER_TYPE_PARAM,
 	},
-	[SIOCGIWSENS	- SIOCIWFIRST] = {
+	[IW_IOCTL_IDX(SIOCGIWSENS)] = {
 		.header_type	= IW_HEADER_TYPE_PARAM,
 	},
-	[SIOCSIWRANGE	- SIOCIWFIRST] = {
+	[IW_IOCTL_IDX(SIOCSIWRANGE)] = {
 		.header_type	= IW_HEADER_TYPE_NULL,
 	},
-	[SIOCGIWRANGE	- SIOCIWFIRST] = {
+	[IW_IOCTL_IDX(SIOCGIWRANGE)] = {
 		.header_type	= IW_HEADER_TYPE_POINT,
 		.token_size	= 1,
 		.max_tokens	= sizeof(struct iw_range),
 		.flags		= IW_DESCR_FLAG_DUMP,
 	},
-	[SIOCSIWPRIV	- SIOCIWFIRST] = {
+	[IW_IOCTL_IDX(SIOCSIWPRIV)] = {
 		.header_type	= IW_HEADER_TYPE_NULL,
 	},
-	[SIOCGIWPRIV	- SIOCIWFIRST] = { /* (handled directly by us) */
+	[IW_IOCTL_IDX(SIOCGIWPRIV)] = { /* (handled directly by us) */
 		.header_type	= IW_HEADER_TYPE_POINT,
 		.token_size	= sizeof(struct iw_priv_args),
 		.max_tokens	= 16,
 		.flags		= IW_DESCR_FLAG_NOMAX,
 	},
-	[SIOCSIWSTATS	- SIOCIWFIRST] = {
+	[IW_IOCTL_IDX(SIOCSIWSTATS)] = {
 		.header_type	= IW_HEADER_TYPE_NULL,
 	},
-	[SIOCGIWSTATS	- SIOCIWFIRST] = { /* (handled directly by us) */
+	[IW_IOCTL_IDX(SIOCGIWSTATS)] = { /* (handled directly by us) */
 		.header_type	= IW_HEADER_TYPE_POINT,
 		.token_size	= 1,
 		.max_tokens	= sizeof(struct iw_statistics),
 		.flags		= IW_DESCR_FLAG_DUMP,
 	},
-	[SIOCSIWSPY	- SIOCIWFIRST] = {
+	[IW_IOCTL_IDX(SIOCSIWSPY)] = {
 		.header_type	= IW_HEADER_TYPE_POINT,
 		.token_size	= sizeof(struct sockaddr),
 		.max_tokens	= IW_MAX_SPY,
 	},
-	[SIOCGIWSPY	- SIOCIWFIRST] = {
+	[IW_IOCTL_IDX(SIOCGIWSPY)] = {
 		.header_type	= IW_HEADER_TYPE_POINT,
 		.token_size	= sizeof(struct sockaddr) +
 				  sizeof(struct iw_quality),
 		.max_tokens	= IW_MAX_SPY,
 	},
-	[SIOCSIWTHRSPY	- SIOCIWFIRST] = {
+	[IW_IOCTL_IDX(SIOCSIWTHRSPY)] = {
 		.header_type	= IW_HEADER_TYPE_POINT,
 		.token_size	= sizeof(struct iw_thrspy),
 		.min_tokens	= 1,
 		.max_tokens	= 1,
 	},
-	[SIOCGIWTHRSPY	- SIOCIWFIRST] = {
+	[IW_IOCTL_IDX(SIOCGIWTHRSPY)] = {
 		.header_type	= IW_HEADER_TYPE_POINT,
 		.token_size	= sizeof(struct iw_thrspy),
 		.min_tokens	= 1,
 		.max_tokens	= 1,
 	},
-	[SIOCSIWAP	- SIOCIWFIRST] = {
+	[IW_IOCTL_IDX(SIOCSIWAP)] = {
 		.header_type	= IW_HEADER_TYPE_ADDR,
 	},
-	[SIOCGIWAP	- SIOCIWFIRST] = {
+	[IW_IOCTL_IDX(SIOCGIWAP)] = {
 		.header_type	= IW_HEADER_TYPE_ADDR,
 		.flags		= IW_DESCR_FLAG_DUMP,
 	},
-	[SIOCSIWMLME	- SIOCIWFIRST] = {
+	[IW_IOCTL_IDX(SIOCSIWMLME)] = {
 		.header_type	= IW_HEADER_TYPE_POINT,
 		.token_size	= 1,
 		.min_tokens	= sizeof(struct iw_mlme),
 		.max_tokens	= sizeof(struct iw_mlme),
 	},
-	[SIOCGIWAPLIST	- SIOCIWFIRST] = {
+	[IW_IOCTL_IDX(SIOCGIWAPLIST)] = {
 		.header_type	= IW_HEADER_TYPE_POINT,
 		.token_size	= sizeof(struct sockaddr) +
 				  sizeof(struct iw_quality),
 		.max_tokens	= IW_MAX_AP,
 		.flags		= IW_DESCR_FLAG_NOMAX,
 	},
-	[SIOCSIWSCAN	- SIOCIWFIRST] = {
+	[IW_IOCTL_IDX(SIOCSIWSCAN)] = {
 		.header_type	= IW_HEADER_TYPE_POINT,
 		.token_size	= 1,
 		.min_tokens	= 0,
 		.max_tokens	= sizeof(struct iw_scan_req),
 	},
-	[SIOCGIWSCAN	- SIOCIWFIRST] = {
+	[IW_IOCTL_IDX(SIOCGIWSCAN)] = {
 		.header_type	= IW_HEADER_TYPE_POINT,
 		.token_size	= 1,
 		.max_tokens	= IW_SCAN_MAX_DATA,
 		.flags		= IW_DESCR_FLAG_NOMAX,
 	},
-	[SIOCSIWESSID	- SIOCIWFIRST] = {
+	[IW_IOCTL_IDX(SIOCSIWESSID)] = {
 		.header_type	= IW_HEADER_TYPE_POINT,
 		.token_size	= 1,
 		.max_tokens	= IW_ESSID_MAX_SIZE,
 		.flags		= IW_DESCR_FLAG_EVENT,
 	},
-	[SIOCGIWESSID	- SIOCIWFIRST] = {
+	[IW_IOCTL_IDX(SIOCGIWESSID)] = {
 		.header_type	= IW_HEADER_TYPE_POINT,
 		.token_size	= 1,
 		.max_tokens	= IW_ESSID_MAX_SIZE,
 		.flags		= IW_DESCR_FLAG_DUMP,
 	},
-	[SIOCSIWNICKN	- SIOCIWFIRST] = {
+	[IW_IOCTL_IDX(SIOCSIWNICKN)] = {
 		.header_type	= IW_HEADER_TYPE_POINT,
 		.token_size	= 1,
 		.max_tokens	= IW_ESSID_MAX_SIZE,
 	},
-	[SIOCGIWNICKN	- SIOCIWFIRST] = {
+	[IW_IOCTL_IDX(SIOCGIWNICKN)] = {
 		.header_type	= IW_HEADER_TYPE_POINT,
 		.token_size	= 1,
 		.max_tokens	= IW_ESSID_MAX_SIZE,
 	},
-	[SIOCSIWRATE	- SIOCIWFIRST] = {
+	[IW_IOCTL_IDX(SIOCSIWRATE)] = {
 		.header_type	= IW_HEADER_TYPE_PARAM,
 	},
-	[SIOCGIWRATE	- SIOCIWFIRST] = {
+	[IW_IOCTL_IDX(SIOCGIWRATE)] = {
 		.header_type	= IW_HEADER_TYPE_PARAM,
 	},
-	[SIOCSIWRTS	- SIOCIWFIRST] = {
+	[IW_IOCTL_IDX(SIOCSIWRTS)] = {
 		.header_type	= IW_HEADER_TYPE_PARAM,
 	},
-	[SIOCGIWRTS	- SIOCIWFIRST] = {
+	[IW_IOCTL_IDX(SIOCGIWRTS)] = {
 		.header_type	= IW_HEADER_TYPE_PARAM,
 	},
-	[SIOCSIWFRAG	- SIOCIWFIRST] = {
+	[IW_IOCTL_IDX(SIOCSIWFRAG)] = {
 		.header_type	= IW_HEADER_TYPE_PARAM,
 	},
-	[SIOCGIWFRAG	- SIOCIWFIRST] = {
+	[IW_IOCTL_IDX(SIOCGIWFRAG)] = {
 		.header_type	= IW_HEADER_TYPE_PARAM,
 	},
-	[SIOCSIWTXPOW	- SIOCIWFIRST] = {
+	[IW_IOCTL_IDX(SIOCSIWTXPOW)] = {
 		.header_type	= IW_HEADER_TYPE_PARAM,
 	},
-	[SIOCGIWTXPOW	- SIOCIWFIRST] = {
+	[IW_IOCTL_IDX(SIOCGIWTXPOW)] = {
 		.header_type	= IW_HEADER_TYPE_PARAM,
 	},
-	[SIOCSIWRETRY	- SIOCIWFIRST] = {
+	[IW_IOCTL_IDX(SIOCSIWRETRY)] = {
 		.header_type	= IW_HEADER_TYPE_PARAM,
 	},
-	[SIOCGIWRETRY	- SIOCIWFIRST] = {
+	[IW_IOCTL_IDX(SIOCGIWRETRY)] = {
 		.header_type	= IW_HEADER_TYPE_PARAM,
 	},
-	[SIOCSIWENCODE	- SIOCIWFIRST] = {
+	[IW_IOCTL_IDX(SIOCSIWENCODE)] = {
 		.header_type	= IW_HEADER_TYPE_POINT,
 		.token_size	= 1,
 		.max_tokens	= IW_ENCODING_TOKEN_MAX,
 		.flags		= IW_DESCR_FLAG_EVENT | IW_DESCR_FLAG_RESTRICT,
 	},
-	[SIOCGIWENCODE	- SIOCIWFIRST] = {
+	[IW_IOCTL_IDX(SIOCGIWENCODE)] = {
 		.header_type	= IW_HEADER_TYPE_POINT,
 		.token_size	= 1,
 		.max_tokens	= IW_ENCODING_TOKEN_MAX,
 		.flags		= IW_DESCR_FLAG_DUMP | IW_DESCR_FLAG_RESTRICT,
 	},
-	[SIOCSIWPOWER	- SIOCIWFIRST] = {
+	[IW_IOCTL_IDX(SIOCSIWPOWER)] = {
 		.header_type	= IW_HEADER_TYPE_PARAM,
 	},
-	[SIOCGIWPOWER	- SIOCIWFIRST] = {
+	[IW_IOCTL_IDX(SIOCGIWPOWER)] = {
 		.header_type	= IW_HEADER_TYPE_PARAM,
 	},
-	[SIOCSIWGENIE	- SIOCIWFIRST] = {
+	[IW_IOCTL_IDX(SIOCSIWGENIE)] = {
 		.header_type	= IW_HEADER_TYPE_POINT,
 		.token_size	= 1,
 		.max_tokens	= IW_GENERIC_IE_MAX,
 	},
-	[SIOCGIWGENIE	- SIOCIWFIRST] = {
+	[IW_IOCTL_IDX(SIOCGIWGENIE)] = {
 		.header_type	= IW_HEADER_TYPE_POINT,
 		.token_size	= 1,
 		.max_tokens	= IW_GENERIC_IE_MAX,
 	},
-	[SIOCSIWAUTH	- SIOCIWFIRST] = {
+	[IW_IOCTL_IDX(SIOCSIWAUTH)] = {
 		.header_type	= IW_HEADER_TYPE_PARAM,
 	},
-	[SIOCGIWAUTH	- SIOCIWFIRST] = {
+	[IW_IOCTL_IDX(SIOCGIWAUTH)] = {
 		.header_type	= IW_HEADER_TYPE_PARAM,
 	},
-	[SIOCSIWENCODEEXT - SIOCIWFIRST] = {
+	[IW_IOCTL_IDX(SIOCSIWENCODEEXT)] = {
 		.header_type	= IW_HEADER_TYPE_POINT,
 		.token_size	= 1,
 		.min_tokens	= sizeof(struct iw_encode_ext),
 		.max_tokens	= sizeof(struct iw_encode_ext) +
 				  IW_ENCODING_TOKEN_MAX,
 	},
-	[SIOCGIWENCODEEXT - SIOCIWFIRST] = {
+	[IW_IOCTL_IDX(SIOCGIWENCODEEXT)] = {
 		.header_type	= IW_HEADER_TYPE_POINT,
 		.token_size	= 1,
 		.min_tokens	= sizeof(struct iw_encode_ext),
 		.max_tokens	= sizeof(struct iw_encode_ext) +
 				  IW_ENCODING_TOKEN_MAX,
 	},
-	[SIOCSIWPMKSA - SIOCIWFIRST] = {
+	[IW_IOCTL_IDX(SIOCSIWPMKSA)] = {
 		.header_type	= IW_HEADER_TYPE_POINT,
 		.token_size	= 1,
 		.min_tokens	= sizeof(struct iw_pmksa),
@@ -449,7 +449,7 @@ void wireless_send_event(struct net_device *	dev,
 
 	/* Get the description of the Event */
 	if (cmd <= SIOCIWLAST) {
-		cmd_index = cmd - SIOCIWFIRST;
+		cmd_index = IW_IOCTL_IDX(cmd);
 		if (cmd_index < standard_ioctl_num)
 			descr = &(standard_ioctl[cmd_index]);
 	} else {
@@ -662,7 +662,7 @@ static iw_handler get_handler(struct net_device *dev, unsigned int cmd)
 		return NULL;
 
 	/* Try as a standard command */
-	index = cmd - SIOCIWFIRST;
+	index = IW_IOCTL_IDX(cmd);
 	if (index < handlers->num_standard)
 		return handlers->standard[index];
 
@@ -954,9 +954,9 @@ static int ioctl_standard_call(struct net_device *	dev,
 	int					ret = -EINVAL;
 
 	/* Get the description of the IOCTL */
-	if ((cmd - SIOCIWFIRST) >= standard_ioctl_num)
+	if (IW_IOCTL_IDX(cmd) >= standard_ioctl_num)
 		return -EOPNOTSUPP;
-	descr = &(standard_ioctl[cmd - SIOCIWFIRST]);
+	descr = &(standard_ioctl[IW_IOCTL_IDX(cmd)]);
 
 	/* Check if we have a pointer to user space data or not */
 	if (descr->header_type != IW_HEADER_TYPE_POINT) {
@@ -1012,7 +1012,7 @@ static int compat_standard_call(struct net_device	*dev,
 	struct iw_point iwp;
 	int err;
 
-	descr = standard_ioctl + (cmd - SIOCIWFIRST);
+	descr = standard_ioctl + IW_IOCTL_IDX(cmd);
 
 	if (descr->header_type != IW_HEADER_TYPE_POINT)
 		return ioctl_standard_call(dev, iwr, cmd, info, handler);
-- 
cgit v1.2.2


From 76326f1d4c98fe01daf363e3d07f84bafed1feec Mon Sep 17 00:00:00 2001
From: Joe Perches <joe@perches.com>
Date: Thu, 18 Mar 2010 18:29:36 -0700
Subject: net/wireless/wext-core.c: Use IW_EVENT_IDX macro

There's a wireless.h macro for this, might as well use it.

Signed-off-by: Joe Perches <joe@perches.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/wireless/wext-core.c | 22 +++++++++++-----------
 1 file changed, 11 insertions(+), 11 deletions(-)

(limited to 'net')

diff --git a/net/wireless/wext-core.c b/net/wireless/wext-core.c
index dbde22b8f30f..bfcbeee23f9c 100644
--- a/net/wireless/wext-core.c
+++ b/net/wireless/wext-core.c
@@ -261,44 +261,44 @@ static const unsigned standard_ioctl_num = ARRAY_SIZE(standard_ioctl);
  * we know about.
  */
 static const struct iw_ioctl_description standard_event[] = {
-	[IWEVTXDROP	- IWEVFIRST] = {
+	[IW_EVENT_IDX(IWEVTXDROP)] = {
 		.header_type	= IW_HEADER_TYPE_ADDR,
 	},
-	[IWEVQUAL	- IWEVFIRST] = {
+	[IW_EVENT_IDX(IWEVQUAL)] = {
 		.header_type	= IW_HEADER_TYPE_QUAL,
 	},
-	[IWEVCUSTOM	- IWEVFIRST] = {
+	[IW_EVENT_IDX(IWEVCUSTOM)] = {
 		.header_type	= IW_HEADER_TYPE_POINT,
 		.token_size	= 1,
 		.max_tokens	= IW_CUSTOM_MAX,
 	},
-	[IWEVREGISTERED	- IWEVFIRST] = {
+	[IW_EVENT_IDX(IWEVREGISTERED)] = {
 		.header_type	= IW_HEADER_TYPE_ADDR,
 	},
-	[IWEVEXPIRED	- IWEVFIRST] = {
+	[IW_EVENT_IDX(IWEVEXPIRED)] = {
 		.header_type	= IW_HEADER_TYPE_ADDR,
 	},
-	[IWEVGENIE	- IWEVFIRST] = {
+	[IW_EVENT_IDX(IWEVGENIE)] = {
 		.header_type	= IW_HEADER_TYPE_POINT,
 		.token_size	= 1,
 		.max_tokens	= IW_GENERIC_IE_MAX,
 	},
-	[IWEVMICHAELMICFAILURE	- IWEVFIRST] = {
+	[IW_EVENT_IDX(IWEVMICHAELMICFAILURE)] = {
 		.header_type	= IW_HEADER_TYPE_POINT,
 		.token_size	= 1,
 		.max_tokens	= sizeof(struct iw_michaelmicfailure),
 	},
-	[IWEVASSOCREQIE	- IWEVFIRST] = {
+	[IW_EVENT_IDX(IWEVASSOCREQIE)] = {
 		.header_type	= IW_HEADER_TYPE_POINT,
 		.token_size	= 1,
 		.max_tokens	= IW_GENERIC_IE_MAX,
 	},
-	[IWEVASSOCRESPIE	- IWEVFIRST] = {
+	[IW_EVENT_IDX(IWEVASSOCRESPIE)] = {
 		.header_type	= IW_HEADER_TYPE_POINT,
 		.token_size	= 1,
 		.max_tokens	= IW_GENERIC_IE_MAX,
 	},
-	[IWEVPMKIDCAND	- IWEVFIRST] = {
+	[IW_EVENT_IDX(IWEVPMKIDCAND)] = {
 		.header_type	= IW_HEADER_TYPE_POINT,
 		.token_size	= 1,
 		.max_tokens	= sizeof(struct iw_pmkid_cand),
@@ -453,7 +453,7 @@ void wireless_send_event(struct net_device *	dev,
 		if (cmd_index < standard_ioctl_num)
 			descr = &(standard_ioctl[cmd_index]);
 	} else {
-		cmd_index = cmd - IWEVFIRST;
+		cmd_index = IW_EVENT_IDX(cmd);
 		if (cmd_index < standard_event_num)
 			descr = &(standard_event[cmd_index]);
 	}
-- 
cgit v1.2.2


From 1e4dcd012423b6a28f968a55886d2b27896a1586 Mon Sep 17 00:00:00 2001
From: Juuso Oikarinen <juuso.oikarinen@nokia.com>
Date: Fri, 19 Mar 2010 07:14:53 +0200
Subject: mac80211: Add support for connection monitor in hardware

This patch is based on a RFC patch by Kalle Valo.

The wl1271 has a feature which handles the connection monitor logic
in hardware, basically sending periodically nullfunc frames and reporting
to the host if AP is lost, after attempting to recover by sending
probe-requests to the AP.

Add support to mac80211 by adding a new flag IEEE80211_HW_CONNECTION_MONITOR
which prevents conn_mon_timer from triggering during idle periods, and
prevents sending probe-requests to the AP if beacon-loss is indicated by the
hardware.

Cc: Kalle Valo <kalle.valo@nokia.com>
Signed-off-by: Juuso Oikarinen <juuso.oikarinen@nokia.com>
Reviewed-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/ieee80211_i.h |  4 +--
 net/mac80211/iface.c       |  2 +-
 net/mac80211/mlme.c        | 64 +++++++++++++++++++++++++++++++++++++++++-----
 3 files changed, 60 insertions(+), 10 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index b84126491ab1..ab369e2a5282 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -327,7 +327,7 @@ struct ieee80211_if_managed {
 	struct work_struct work;
 	struct work_struct monitor_work;
 	struct work_struct chswitch_work;
-	struct work_struct beacon_loss_work;
+	struct work_struct beacon_connection_loss_work;
 
 	unsigned long probe_timeout;
 	int probe_send_count;
@@ -1156,7 +1156,7 @@ void ieee80211_send_nullfunc(struct ieee80211_local *local,
 			     int powersave);
 void ieee80211_sta_rx_notify(struct ieee80211_sub_if_data *sdata,
 			     struct ieee80211_hdr *hdr);
-void ieee80211_beacon_loss_work(struct work_struct *work);
+void ieee80211_beacon_connection_loss_work(struct work_struct *work);
 
 void ieee80211_wake_queues_by_reason(struct ieee80211_hw *hw,
 				     enum queue_stop_reason reason);
diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c
index d5571b9420cd..b4ec59a8dc03 100644
--- a/net/mac80211/iface.c
+++ b/net/mac80211/iface.c
@@ -486,7 +486,7 @@ static int ieee80211_stop(struct net_device *dev)
 		cancel_work_sync(&sdata->u.mgd.work);
 		cancel_work_sync(&sdata->u.mgd.chswitch_work);
 		cancel_work_sync(&sdata->u.mgd.monitor_work);
-		cancel_work_sync(&sdata->u.mgd.beacon_loss_work);
+		cancel_work_sync(&sdata->u.mgd.beacon_connection_loss_work);
 
 		/*
 		 * When we get here, the interface is marked down.
diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index 0ab284c32135..865ea1cfb7bb 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -851,6 +851,9 @@ void ieee80211_sta_rx_notify(struct ieee80211_sub_if_data *sdata,
 	if (is_multicast_ether_addr(hdr->addr1))
 		return;
 
+	if (sdata->local->hw.flags & IEEE80211_HW_CONNECTION_MONITOR)
+		return;
+
 	mod_timer(&sdata->u.mgd.conn_mon_timer,
 		  round_jiffies_up(jiffies + IEEE80211_CONNECTION_IDLE_TIME));
 }
@@ -928,23 +931,68 @@ static void ieee80211_mgd_probe_ap(struct ieee80211_sub_if_data *sdata,
 	mutex_unlock(&ifmgd->mtx);
 }
 
-void ieee80211_beacon_loss_work(struct work_struct *work)
+static void __ieee80211_connection_loss(struct ieee80211_sub_if_data *sdata)
+{
+	struct ieee80211_if_managed *ifmgd = &sdata->u.mgd;
+	struct ieee80211_local *local = sdata->local;
+	u8 bssid[ETH_ALEN];
+
+	mutex_lock(&ifmgd->mtx);
+	if (!ifmgd->associated) {
+		mutex_unlock(&ifmgd->mtx);
+		return;
+	}
+
+	memcpy(bssid, ifmgd->associated->bssid, ETH_ALEN);
+
+	printk(KERN_DEBUG "Connection to AP %pM lost.\n", bssid);
+
+	ieee80211_set_disassoc(sdata);
+	ieee80211_recalc_idle(local);
+	mutex_unlock(&ifmgd->mtx);
+	/*
+	 * must be outside lock due to cfg80211,
+	 * but that's not a problem.
+	 */
+	ieee80211_send_deauth_disassoc(sdata, bssid,
+				       IEEE80211_STYPE_DEAUTH,
+				       WLAN_REASON_DISASSOC_DUE_TO_INACTIVITY,
+				       NULL);
+}
+
+void ieee80211_beacon_connection_loss_work(struct work_struct *work)
 {
 	struct ieee80211_sub_if_data *sdata =
 		container_of(work, struct ieee80211_sub_if_data,
-			     u.mgd.beacon_loss_work);
+			     u.mgd.beacon_connection_loss_work);
 
-	ieee80211_mgd_probe_ap(sdata, true);
+	if (sdata->local->hw.flags & IEEE80211_HW_CONNECTION_MONITOR)
+		__ieee80211_connection_loss(sdata);
+	else
+		ieee80211_mgd_probe_ap(sdata, true);
 }
 
 void ieee80211_beacon_loss(struct ieee80211_vif *vif)
 {
 	struct ieee80211_sub_if_data *sdata = vif_to_sdata(vif);
+	struct ieee80211_hw *hw = &sdata->local->hw;
 
-	ieee80211_queue_work(&sdata->local->hw, &sdata->u.mgd.beacon_loss_work);
+	WARN_ON(hw->flags & IEEE80211_HW_CONNECTION_MONITOR);
+	ieee80211_queue_work(hw, &sdata->u.mgd.beacon_connection_loss_work);
 }
 EXPORT_SYMBOL(ieee80211_beacon_loss);
 
+void ieee80211_connection_loss(struct ieee80211_vif *vif)
+{
+	struct ieee80211_sub_if_data *sdata = vif_to_sdata(vif);
+	struct ieee80211_hw *hw = &sdata->local->hw;
+
+	WARN_ON(!(hw->flags & IEEE80211_HW_CONNECTION_MONITOR));
+	ieee80211_queue_work(hw, &sdata->u.mgd.beacon_connection_loss_work);
+}
+EXPORT_SYMBOL(ieee80211_connection_loss);
+
+
 static enum rx_mgmt_action __must_check
 ieee80211_rx_mgmt_deauth(struct ieee80211_sub_if_data *sdata,
 			 struct ieee80211_mgmt *mgmt, size_t len)
@@ -1634,7 +1682,8 @@ static void ieee80211_sta_bcn_mon_timer(unsigned long data)
 	if (local->quiescing)
 		return;
 
-	ieee80211_queue_work(&sdata->local->hw, &sdata->u.mgd.beacon_loss_work);
+	ieee80211_queue_work(&sdata->local->hw,
+			     &sdata->u.mgd.beacon_connection_loss_work);
 }
 
 static void ieee80211_sta_conn_mon_timer(unsigned long data)
@@ -1686,7 +1735,7 @@ void ieee80211_sta_quiesce(struct ieee80211_sub_if_data *sdata)
 	 */
 
 	cancel_work_sync(&ifmgd->work);
-	cancel_work_sync(&ifmgd->beacon_loss_work);
+	cancel_work_sync(&ifmgd->beacon_connection_loss_work);
 	if (del_timer_sync(&ifmgd->timer))
 		set_bit(TMR_RUNNING_TIMER, &ifmgd->timers_running);
 
@@ -1720,7 +1769,8 @@ void ieee80211_sta_setup_sdata(struct ieee80211_sub_if_data *sdata)
 	INIT_WORK(&ifmgd->work, ieee80211_sta_work);
 	INIT_WORK(&ifmgd->monitor_work, ieee80211_sta_monitor_work);
 	INIT_WORK(&ifmgd->chswitch_work, ieee80211_chswitch_work);
-	INIT_WORK(&ifmgd->beacon_loss_work, ieee80211_beacon_loss_work);
+	INIT_WORK(&ifmgd->beacon_connection_loss_work,
+		  ieee80211_beacon_connection_loss_work);
 	setup_timer(&ifmgd->timer, ieee80211_sta_timer,
 		    (unsigned long) sdata);
 	setup_timer(&ifmgd->bcn_mon_timer, ieee80211_sta_bcn_mon_timer,
-- 
cgit v1.2.2


From e51d739ab79110c43ca03daf3ddb3c52dadd38b7 Mon Sep 17 00:00:00 2001
From: Tom Herbert <therbert@google.com>
Date: Tue, 23 Mar 2010 13:39:19 +0000
Subject: net: Fix locking in flush_backlog

Need to take spinlocks when dequeuing from input_pkt_queue in flush_backlog.
Also, flush_backlog can now be called directly from netdev_run_todo.

Signed-off-by: Tom Herbert <therbert@google.com>
Acked-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/dev.c | 12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)

(limited to 'net')

diff --git a/net/core/dev.c b/net/core/dev.c
index a03aab45e84f..5e3dc28cbf5a 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -2766,17 +2766,19 @@ int netif_receive_skb(struct sk_buff *skb)
 EXPORT_SYMBOL(netif_receive_skb);
 
 /* Network device is going away, flush any packets still pending  */
-static void flush_backlog(void *arg)
+static void flush_backlog(struct net_device *dev, int cpu)
 {
-	struct net_device *dev = arg;
-	struct softnet_data *queue = &__get_cpu_var(softnet_data);
+	struct softnet_data *queue = &per_cpu(softnet_data, cpu);
 	struct sk_buff *skb, *tmp;
+	unsigned long flags;
 
+	spin_lock_irqsave(&queue->input_pkt_queue.lock, flags);
 	skb_queue_walk_safe(&queue->input_pkt_queue, skb, tmp)
 		if (skb->dev == dev) {
 			__skb_unlink(skb, &queue->input_pkt_queue);
 			kfree_skb(skb);
 		}
+	spin_unlock_irqrestore(&queue->input_pkt_queue.lock, flags);
 }
 
 static int napi_gro_complete(struct sk_buff *skb)
@@ -5545,6 +5547,7 @@ void netdev_run_todo(void)
 	while (!list_empty(&list)) {
 		struct net_device *dev
 			= list_first_entry(&list, struct net_device, todo_list);
+		int i;
 		list_del(&dev->todo_list);
 
 		if (unlikely(dev->reg_state != NETREG_UNREGISTERING)) {
@@ -5556,7 +5559,8 @@ void netdev_run_todo(void)
 
 		dev->reg_state = NETREG_UNREGISTERED;
 
-		on_each_cpu(flush_backlog, dev, 1);
+		for_each_online_cpu(i)
+			flush_backlog(dev, i);
 
 		netdev_wait_allrefs(dev);
 
-- 
cgit v1.2.2


From a5990ea1254cd186b38744507aeec3136a0c1c95 Mon Sep 17 00:00:00 2001
From: Li Zefan <lizf@cn.fujitsu.com>
Date: Thu, 11 Mar 2010 14:08:10 -0800
Subject: sunrpc/cache: fix module refcnt leak in a failure path

Don't forget to release the module refcnt if seq_open() returns failure.

Signed-off-by: Li Zefan <lizf@cn.fujitsu.com>
Cc: J. Bruce Fields <bfields@fieldses.org>
Cc: Neil Brown <neilb@suse.de>
Cc: Trond Myklebust <Trond.Myklebust@netapp.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
---
 net/sunrpc/cache.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/sunrpc/cache.c b/net/sunrpc/cache.c
index 3e1ef8bf4dc2..a3f340c8b79a 100644
--- a/net/sunrpc/cache.c
+++ b/net/sunrpc/cache.c
@@ -1244,8 +1244,10 @@ static int content_open(struct inode *inode, struct file *file,
 	if (!cd || !try_module_get(cd->owner))
 		return -EACCES;
 	han = __seq_open_private(file, &cache_content_op, sizeof(*han));
-	if (han == NULL)
+	if (han == NULL) {
+		module_put(cd->owner);
 		return -ENOMEM;
+	}
 
 	han->cd = cd;
 	return 0;
-- 
cgit v1.2.2


From 669d3e0babb40018dd6e78f4093c13a2eac73866 Mon Sep 17 00:00:00 2001
From: Vasu Dev <vasu.dev@intel.com>
Date: Tue, 23 Mar 2010 14:41:45 +0000
Subject: vlan: adds vlan_dev_select_queue

This is required to correctly select vlan tx queue for a driver
supporting multi tx queue with ndo_select_queue implemented since
currently selected vlan tx queue is unaligned to selected queue by
real net_devce ndo_select_queue.

Unaligned vlan tx queue selection causes thrash with higher vlan
tx lock contention for least fcoe traffic and wrong socket tx
queue_mapping for ixgbe having ndo_select_queue implemented.

-v2

As per Eric Dumazet<eric.dumazet@gmail.com> comments, mirrored
vlan net_device_ops to have them with and without vlan_dev_select_queue
and then select according to real dev ndo_select_queue present or not
for a vlan net_device. This is to completely skip vlan_dev_select_queue
calling for real net_device not supporting ndo_select_queue.

Signed-off-by: Vasu Dev <vasu.dev@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
Acked-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/8021q/vlan_dev.c | 71 +++++++++++++++++++++++++++++++++++++++++++++++++---
 1 file changed, 68 insertions(+), 3 deletions(-)

(limited to 'net')

diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c
index 9e83272fc5b0..2fd057c81bbf 100644
--- a/net/8021q/vlan_dev.c
+++ b/net/8021q/vlan_dev.c
@@ -361,6 +361,14 @@ static netdev_tx_t vlan_dev_hwaccel_hard_start_xmit(struct sk_buff *skb,
 	return ret;
 }
 
+static u16 vlan_dev_select_queue(struct net_device *dev, struct sk_buff *skb)
+{
+	struct net_device *rdev = vlan_dev_info(dev)->real_dev;
+	const struct net_device_ops *ops = rdev->netdev_ops;
+
+	return ops->ndo_select_queue(rdev, skb);
+}
+
 static int vlan_dev_change_mtu(struct net_device *dev, int new_mtu)
 {
 	/* TODO: gotta make sure the underlying layer can handle it,
@@ -688,7 +696,8 @@ static const struct header_ops vlan_header_ops = {
 	.parse	 = eth_header_parse,
 };
 
-static const struct net_device_ops vlan_netdev_ops, vlan_netdev_accel_ops;
+static const struct net_device_ops vlan_netdev_ops, vlan_netdev_accel_ops,
+		    vlan_netdev_ops_sq, vlan_netdev_accel_ops_sq;
 
 static int vlan_dev_init(struct net_device *dev)
 {
@@ -722,11 +731,17 @@ static int vlan_dev_init(struct net_device *dev)
 	if (real_dev->features & NETIF_F_HW_VLAN_TX) {
 		dev->header_ops      = real_dev->header_ops;
 		dev->hard_header_len = real_dev->hard_header_len;
-		dev->netdev_ops         = &vlan_netdev_accel_ops;
+		if (real_dev->netdev_ops->ndo_select_queue)
+			dev->netdev_ops = &vlan_netdev_accel_ops_sq;
+		else
+			dev->netdev_ops = &vlan_netdev_accel_ops;
 	} else {
 		dev->header_ops      = &vlan_header_ops;
 		dev->hard_header_len = real_dev->hard_header_len + VLAN_HLEN;
-		dev->netdev_ops         = &vlan_netdev_ops;
+		if (real_dev->netdev_ops->ndo_select_queue)
+			dev->netdev_ops = &vlan_netdev_ops_sq;
+		else
+			dev->netdev_ops = &vlan_netdev_ops;
 	}
 
 	if (is_vlan_dev(real_dev))
@@ -865,6 +880,56 @@ static const struct net_device_ops vlan_netdev_accel_ops = {
 #endif
 };
 
+static const struct net_device_ops vlan_netdev_ops_sq = {
+	.ndo_select_queue	= vlan_dev_select_queue,
+	.ndo_change_mtu		= vlan_dev_change_mtu,
+	.ndo_init		= vlan_dev_init,
+	.ndo_uninit		= vlan_dev_uninit,
+	.ndo_open		= vlan_dev_open,
+	.ndo_stop		= vlan_dev_stop,
+	.ndo_start_xmit =  vlan_dev_hard_start_xmit,
+	.ndo_validate_addr	= eth_validate_addr,
+	.ndo_set_mac_address	= vlan_dev_set_mac_address,
+	.ndo_set_rx_mode	= vlan_dev_set_rx_mode,
+	.ndo_set_multicast_list	= vlan_dev_set_rx_mode,
+	.ndo_change_rx_flags	= vlan_dev_change_rx_flags,
+	.ndo_do_ioctl		= vlan_dev_ioctl,
+	.ndo_neigh_setup	= vlan_dev_neigh_setup,
+	.ndo_get_stats		= vlan_dev_get_stats,
+#if defined(CONFIG_FCOE) || defined(CONFIG_FCOE_MODULE)
+	.ndo_fcoe_ddp_setup	= vlan_dev_fcoe_ddp_setup,
+	.ndo_fcoe_ddp_done	= vlan_dev_fcoe_ddp_done,
+	.ndo_fcoe_enable	= vlan_dev_fcoe_enable,
+	.ndo_fcoe_disable	= vlan_dev_fcoe_disable,
+	.ndo_fcoe_get_wwn	= vlan_dev_fcoe_get_wwn,
+#endif
+};
+
+static const struct net_device_ops vlan_netdev_accel_ops_sq = {
+	.ndo_select_queue	= vlan_dev_select_queue,
+	.ndo_change_mtu		= vlan_dev_change_mtu,
+	.ndo_init		= vlan_dev_init,
+	.ndo_uninit		= vlan_dev_uninit,
+	.ndo_open		= vlan_dev_open,
+	.ndo_stop		= vlan_dev_stop,
+	.ndo_start_xmit =  vlan_dev_hwaccel_hard_start_xmit,
+	.ndo_validate_addr	= eth_validate_addr,
+	.ndo_set_mac_address	= vlan_dev_set_mac_address,
+	.ndo_set_rx_mode	= vlan_dev_set_rx_mode,
+	.ndo_set_multicast_list	= vlan_dev_set_rx_mode,
+	.ndo_change_rx_flags	= vlan_dev_change_rx_flags,
+	.ndo_do_ioctl		= vlan_dev_ioctl,
+	.ndo_neigh_setup	= vlan_dev_neigh_setup,
+	.ndo_get_stats		= vlan_dev_get_stats,
+#if defined(CONFIG_FCOE) || defined(CONFIG_FCOE_MODULE)
+	.ndo_fcoe_ddp_setup	= vlan_dev_fcoe_ddp_setup,
+	.ndo_fcoe_ddp_done	= vlan_dev_fcoe_ddp_done,
+	.ndo_fcoe_enable	= vlan_dev_fcoe_enable,
+	.ndo_fcoe_disable	= vlan_dev_fcoe_disable,
+	.ndo_fcoe_get_wwn	= vlan_dev_fcoe_get_wwn,
+#endif
+};
+
 void vlan_setup(struct net_device *dev)
 {
 	ether_setup(dev);
-- 
cgit v1.2.2


From f6b9f4b263f3178fc0f23f0e67d04386528cc727 Mon Sep 17 00:00:00 2001
From: Vasu Dev <vasu.dev@intel.com>
Date: Tue, 23 Mar 2010 14:42:05 +0000
Subject: vlan: updates vlan real_num_tx_queues

Updates real_num_tx_queues in case underlying real device
has changed real_num_tx_queues.

-v2
 As per Eric Dumazet<eric.dumazet@gmail.com> comment:-
   -- adds BUG_ON to catch case of real_num_tx_queues exceeding num_tx_queues.
   -- created this self contained patch to just update real_num_tx_queues.

Signed-off-by: Vasu Dev <vasu.dev@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/8021q/vlan.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'net')

diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c
index 453512266ea1..db783d7af5a3 100644
--- a/net/8021q/vlan.c
+++ b/net/8021q/vlan.c
@@ -378,6 +378,8 @@ static void vlan_transfer_features(struct net_device *dev,
 #if defined(CONFIG_FCOE) || defined(CONFIG_FCOE_MODULE)
 	vlandev->fcoe_ddp_xid = dev->fcoe_ddp_xid;
 #endif
+	vlandev->real_num_tx_queues = dev->real_num_tx_queues;
+	BUG_ON(vlandev->real_num_tx_queues > vlandev->num_tx_queues);
 
 	if (old_features != vlandev->features)
 		netdev_features_change(vlandev);
-- 
cgit v1.2.2


From d6dc1a386358979e12366d1f35eeb68fc181e101 Mon Sep 17 00:00:00 2001
From: Juuso Oikarinen <juuso.oikarinen@nokia.com>
Date: Tue, 23 Mar 2010 09:02:33 +0200
Subject: cfg80211: Add connection quality monitoring support to nl80211

Add support for basic configuration of a connection quality monitoring to the
nl80211 interface, and basic support for notifying about triggered monitoring
events.

Via this interface a user-space connection manager may configure and receive
pre-warning events of deteriorating WLAN connection quality, and start
preparing for roaming in advance, before the connection is already lost.

An example usage of such a trigger is starting scanning for nearby AP's in
an attempt to find one with better connection quality, and associate to it
before the connection characteristics of the existing connection become too bad
or the association is even lost, leading in a prolonged delay in connectivity.

The interface currently supports only RSSI, but it could be later extended
to include other parameters, such as signal-to-noise ratio, if need for that
arises.

Signed-off-by: Juuso Oikarinen <juuso.oikarinen@nokia.com>
Reviewed-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/wireless/mlme.c    |  13 +++++
 net/wireless/nl80211.c | 131 +++++++++++++++++++++++++++++++++++++++++++++++++
 net/wireless/nl80211.h |   6 +++
 3 files changed, 150 insertions(+)

(limited to 'net')

diff --git a/net/wireless/mlme.c b/net/wireless/mlme.c
index 62bc8855e123..0855f0d32349 100644
--- a/net/wireless/mlme.c
+++ b/net/wireless/mlme.c
@@ -894,3 +894,16 @@ void cfg80211_action_tx_status(struct net_device *dev, u64 cookie,
 	nl80211_send_action_tx_status(rdev, dev, cookie, buf, len, ack, gfp);
 }
 EXPORT_SYMBOL(cfg80211_action_tx_status);
+
+void cfg80211_cqm_rssi_notify(struct net_device *dev,
+			      enum nl80211_cqm_rssi_threshold_event rssi_event,
+			      gfp_t gfp)
+{
+	struct wireless_dev *wdev = dev->ieee80211_ptr;
+	struct wiphy *wiphy = wdev->wiphy;
+	struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy);
+
+	/* Indicate roaming trigger event to user space */
+	nl80211_send_cqm_rssi_notify(rdev, dev, rssi_event, gfp);
+}
+EXPORT_SYMBOL(cfg80211_cqm_rssi_notify);
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index e447db04cf76..a7fc3d83f5f6 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -149,6 +149,7 @@ static const struct nla_policy nl80211_policy[NL80211_ATTR_MAX+1] = {
 				 .len = IEEE80211_MAX_DATA_LEN },
 	[NL80211_ATTR_FRAME_MATCH] = { .type = NLA_BINARY, },
 	[NL80211_ATTR_PS_STATE] = { .type = NLA_U32 },
+	[NL80211_ATTR_CQM] = { .type = NLA_NESTED, },
 };
 
 /* policy for the attributes */
@@ -4778,6 +4779,84 @@ unlock_rtnl:
 	return err;
 }
 
+static struct nla_policy
+nl80211_attr_cqm_policy[NL80211_ATTR_CQM_MAX + 1] __read_mostly = {
+	[NL80211_ATTR_CQM_RSSI_THOLD] = { .type = NLA_U32 },
+	[NL80211_ATTR_CQM_RSSI_HYST] = { .type = NLA_U32 },
+	[NL80211_ATTR_CQM_RSSI_THRESHOLD_EVENT] = { .type = NLA_U32 },
+};
+
+static int nl80211_set_cqm_rssi(struct genl_info *info,
+				s32 threshold, u32 hysteresis)
+{
+	struct cfg80211_registered_device *rdev;
+	struct wireless_dev *wdev;
+	struct net_device *dev;
+	int err;
+
+	if (threshold > 0)
+		return -EINVAL;
+
+	rtnl_lock();
+
+	err = get_rdev_dev_by_info_ifindex(info, &rdev, &dev);
+	if (err)
+		goto unlock_rdev;
+
+	wdev = dev->ieee80211_ptr;
+
+	if (!rdev->ops->set_cqm_rssi_config) {
+		err = -EOPNOTSUPP;
+		goto unlock_rdev;
+	}
+
+	if (wdev->iftype != NL80211_IFTYPE_STATION) {
+		err = -EOPNOTSUPP;
+		goto unlock_rdev;
+	}
+
+	err = rdev->ops->set_cqm_rssi_config(wdev->wiphy, dev,
+					     threshold, hysteresis);
+
+unlock_rdev:
+	cfg80211_unlock_rdev(rdev);
+	dev_put(dev);
+	rtnl_unlock();
+
+	return err;
+}
+
+static int nl80211_set_cqm(struct sk_buff *skb, struct genl_info *info)
+{
+	struct nlattr *attrs[NL80211_ATTR_CQM_MAX + 1];
+	struct nlattr *cqm;
+	int err;
+
+	cqm = info->attrs[NL80211_ATTR_CQM];
+	if (!cqm) {
+		err = -EINVAL;
+		goto out;
+	}
+
+	err = nla_parse_nested(attrs, NL80211_ATTR_CQM_MAX, cqm,
+			       nl80211_attr_cqm_policy);
+	if (err)
+		goto out;
+
+	if (attrs[NL80211_ATTR_CQM_RSSI_THOLD] &&
+	    attrs[NL80211_ATTR_CQM_RSSI_HYST]) {
+		s32 threshold;
+		u32 hysteresis;
+		threshold = nla_get_u32(attrs[NL80211_ATTR_CQM_RSSI_THOLD]);
+		hysteresis = nla_get_u32(attrs[NL80211_ATTR_CQM_RSSI_HYST]);
+		err = nl80211_set_cqm_rssi(info, threshold, hysteresis);
+	} else
+		err = -EINVAL;
+
+out:
+	return err;
+}
+
 static struct genl_ops nl80211_ops[] = {
 	{
 		.cmd = NL80211_CMD_GET_WIPHY,
@@ -5082,6 +5161,12 @@ static struct genl_ops nl80211_ops[] = {
 		.policy = nl80211_policy,
 		/* can be retrieved by unprivileged users */
 	},
+	{
+		.cmd = NL80211_CMD_SET_CQM,
+		.doit = nl80211_set_cqm,
+		.policy = nl80211_policy,
+		.flags = GENL_ADMIN_PERM,
+	},
 };
 
 static struct genl_multicast_group nl80211_mlme_mcgrp = {
@@ -5832,6 +5917,52 @@ void nl80211_send_action_tx_status(struct cfg80211_registered_device *rdev,
 	nlmsg_free(msg);
 }
 
+void
+nl80211_send_cqm_rssi_notify(struct cfg80211_registered_device *rdev,
+			     struct net_device *netdev,
+			     enum nl80211_cqm_rssi_threshold_event rssi_event,
+			     gfp_t gfp)
+{
+	struct sk_buff *msg;
+	struct nlattr *pinfoattr;
+	void *hdr;
+
+	msg = nlmsg_new(NLMSG_GOODSIZE, gfp);
+	if (!msg)
+		return;
+
+	hdr = nl80211hdr_put(msg, 0, 0, 0, NL80211_CMD_NOTIFY_CQM);
+	if (!hdr) {
+		nlmsg_free(msg);
+		return;
+	}
+
+	NLA_PUT_U32(msg, NL80211_ATTR_WIPHY, rdev->wiphy_idx);
+	NLA_PUT_U32(msg, NL80211_ATTR_IFINDEX, netdev->ifindex);
+
+	pinfoattr = nla_nest_start(msg, NL80211_ATTR_CQM);
+	if (!pinfoattr)
+		goto nla_put_failure;
+
+	NLA_PUT_U32(msg, NL80211_ATTR_CQM_RSSI_THRESHOLD_EVENT,
+		    rssi_event);
+
+	nla_nest_end(msg, pinfoattr);
+
+	if (genlmsg_end(msg, hdr) < 0) {
+		nlmsg_free(msg);
+		return;
+	}
+
+	genlmsg_multicast_netns(wiphy_net(&rdev->wiphy), msg, 0,
+				nl80211_mlme_mcgrp.id, gfp);
+	return;
+
+ nla_put_failure:
+	genlmsg_cancel(msg, hdr);
+	nlmsg_free(msg);
+}
+
 static int nl80211_netlink_notify(struct notifier_block * nb,
 				  unsigned long state,
 				  void *_notify)
diff --git a/net/wireless/nl80211.h b/net/wireless/nl80211.h
index 4ca511102c6c..2ad7fbc7d9f1 100644
--- a/net/wireless/nl80211.h
+++ b/net/wireless/nl80211.h
@@ -82,4 +82,10 @@ void nl80211_send_action_tx_status(struct cfg80211_registered_device *rdev,
 				   const u8 *buf, size_t len, bool ack,
 				   gfp_t gfp);
 
+void
+nl80211_send_cqm_rssi_notify(struct cfg80211_registered_device *rdev,
+			     struct net_device *netdev,
+			     enum nl80211_cqm_rssi_threshold_event rssi_event,
+			     gfp_t gfp);
+
 #endif /* __NET_WIRELESS_NL80211_H */
-- 
cgit v1.2.2


From a97c13c34509be460dea23c86f31c02daa2428b5 Mon Sep 17 00:00:00 2001
From: Juuso Oikarinen <juuso.oikarinen@nokia.com>
Date: Tue, 23 Mar 2010 09:02:34 +0200
Subject: mac80211: Add support for connection quality monitoring

Add support for the set_cqm_config op. This op function configures the
requested connection quality monitor rssi threshold and rssi hysteresis
values to the hardware  if the hardware supports
IEEE80211_HW_SUPPORTS_CQM.

For unsupported hardware, currently -EOPNOTSUPP is returned, so the mac80211
is currently not doing connection quality monitoring on the host. This could be
added later, if needed.

Signed-off-by: Juuso Oikarinen <juuso.oikarinen@nokia.com>
Reviewed-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/cfg.c  | 27 +++++++++++++++++++++++++++
 net/mac80211/mlme.c | 15 +++++++++++++++
 2 files changed, 42 insertions(+)

(limited to 'net')

diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c
index b7116ef84a3b..c8f520529eec 100644
--- a/net/mac80211/cfg.c
+++ b/net/mac80211/cfg.c
@@ -1402,6 +1402,32 @@ static int ieee80211_set_power_mgmt(struct wiphy *wiphy, struct net_device *dev,
 	return 0;
 }
 
+static int ieee80211_set_cqm_rssi_config(struct wiphy *wiphy,
+					 struct net_device *dev,
+					 s32 rssi_thold, u32 rssi_hyst)
+{
+	struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
+	struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr);
+	struct ieee80211_vif *vif = &sdata->vif;
+	struct ieee80211_bss_conf *bss_conf = &vif->bss_conf;
+
+	if (!(local->hw.flags & IEEE80211_HW_SUPPORTS_CQM_RSSI))
+		return -EOPNOTSUPP;
+
+	if (rssi_thold == bss_conf->cqm_rssi_thold &&
+	    rssi_hyst == bss_conf->cqm_rssi_hyst)
+		return 0;
+
+	bss_conf->cqm_rssi_thold = rssi_thold;
+	bss_conf->cqm_rssi_hyst = rssi_hyst;
+
+	/* tell the driver upon association, unless already associated */
+	if (sdata->u.mgd.associated)
+		ieee80211_bss_info_change_notify(sdata, BSS_CHANGED_CQM);
+
+	return 0;
+}
+
 static int ieee80211_set_bitrate_mask(struct wiphy *wiphy,
 				      struct net_device *dev,
 				      const u8 *addr,
@@ -1506,4 +1532,5 @@ struct cfg80211_ops mac80211_config_ops = {
 	.remain_on_channel = ieee80211_remain_on_channel,
 	.cancel_remain_on_channel = ieee80211_cancel_remain_on_channel,
 	.action = ieee80211_action,
+	.set_cqm_rssi_config = ieee80211_set_cqm_rssi_config,
 };
diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index 865ea1cfb7bb..65eafda5738a 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -750,6 +750,11 @@ static void ieee80211_set_associated(struct ieee80211_sub_if_data *sdata,
 	/* And the BSSID changed - we're associated now */
 	bss_info_changed |= BSS_CHANGED_BSSID;
 
+	/* Tell the driver to monitor connection quality (if supported) */
+	if ((local->hw.flags & IEEE80211_HW_SUPPORTS_CQM_RSSI) &&
+	    sdata->vif.bss_conf.cqm_rssi_thold)
+		bss_info_changed |= BSS_CHANGED_CQM;
+
 	ieee80211_bss_info_change_notify(sdata, bss_info_changed);
 
 	mutex_lock(&local->iflist_mtx);
@@ -2182,3 +2187,13 @@ int ieee80211_mgd_action(struct ieee80211_sub_if_data *sdata,
 	*cookie = (unsigned long) skb;
 	return 0;
 }
+
+void ieee80211_cqm_rssi_notify(struct ieee80211_vif *vif,
+			       enum nl80211_cqm_rssi_threshold_event rssi_event,
+			       gfp_t gfp)
+{
+	struct ieee80211_sub_if_data *sdata = vif_to_sdata(vif);
+
+	cfg80211_cqm_rssi_notify(sdata->dev, rssi_event, gfp);
+}
+EXPORT_SYMBOL(ieee80211_cqm_rssi_notify);
-- 
cgit v1.2.2


From 14b44974d5a3c1ca59f6809b7313d7229eb55fd8 Mon Sep 17 00:00:00 2001
From: Dan Carpenter <error27@gmail.com>
Date: Wed, 24 Mar 2010 14:56:41 +0300
Subject: mac80211: remove unneed variable from ieee80211_tx_pending()

We don't need "sdata" any more after:
	d84f323477260e773d5317ad7cbe50f76115cb47
	mac80211: remove dev_hold/put calls

Signed-off-by: Dan Carpenter <error27@gmail.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/tx.c | 2 --
 1 file changed, 2 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
index cbe53ed4fb0b..08e1f17a4226 100644
--- a/net/mac80211/tx.c
+++ b/net/mac80211/tx.c
@@ -2010,14 +2010,12 @@ void ieee80211_tx_pending(unsigned long data)
 		while (!skb_queue_empty(&local->pending[i])) {
 			struct sk_buff *skb = __skb_dequeue(&local->pending[i]);
 			struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb);
-			struct ieee80211_sub_if_data *sdata;
 
 			if (WARN_ON(!info->control.vif)) {
 				kfree_skb(skb);
 				continue;
 			}
 
-			sdata = vif_to_sdata(info->control.vif);
 			spin_unlock_irqrestore(&local->queue_stop_reason_lock,
 						flags);
 
-- 
cgit v1.2.2


From 9a127aad4d60968fba96622008ea0d243688f2b0 Mon Sep 17 00:00:00 2001
From: Dan Carpenter <error27@gmail.com>
Date: Wed, 24 Mar 2010 01:47:00 +0000
Subject: af_key: return error if pfkey_xfrm_policy2msg_prep() fails

The original code saved the error value but just returned 0 in the end.

Signed-off-by: Dan Carpenter <error27@gmail.com>
Acked-by: Jamal Hadi Salim <hadi@mojatatu.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/key/af_key.c | 8 +++-----
 1 file changed, 3 insertions(+), 5 deletions(-)

(limited to 'net')

diff --git a/net/key/af_key.c b/net/key/af_key.c
index 368707882647..344145f23c34 100644
--- a/net/key/af_key.c
+++ b/net/key/af_key.c
@@ -2129,10 +2129,9 @@ static int key_notify_policy(struct xfrm_policy *xp, int dir, struct km_event *c
 	int err;
 
 	out_skb = pfkey_xfrm_policy2msg_prep(xp);
-	if (IS_ERR(out_skb)) {
-		err = PTR_ERR(out_skb);
-		goto out;
-	}
+	if (IS_ERR(out_skb))
+		return PTR_ERR(out_skb);
+
 	err = pfkey_xfrm_policy2msg(out_skb, xp, dir);
 	if (err < 0)
 		return err;
@@ -2148,7 +2147,6 @@ static int key_notify_policy(struct xfrm_policy *xp, int dir, struct km_event *c
 	out_hdr->sadb_msg_seq = c->seq;
 	out_hdr->sadb_msg_pid = c->pid;
 	pfkey_broadcast(out_skb, GFP_ATOMIC, BROADCAST_ALL, NULL, xp_net(xp));
-out:
 	return 0;
 
 }
-- 
cgit v1.2.2


From a3dcce97b285ba54810f38fe2eccc295d69a76ce Mon Sep 17 00:00:00 2001
From: Dan Carpenter <error27@gmail.com>
Date: Wed, 24 Mar 2010 01:55:10 +0000
Subject: llc: cleanup: remove dead code from llc_init()

We don't need "dev" any more after:
	a5a04819c5740cb1aa217af2cc8f5ef26f33d744
	[LLC]: station source mac address

Signed-off-by: Dan Carpenter <error27@gmail.com>
Acked-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/llc/llc_core.c | 6 ------
 1 file changed, 6 deletions(-)

(limited to 'net')

diff --git a/net/llc/llc_core.c b/net/llc/llc_core.c
index 78167e81dfeb..2bb0ddff8c0f 100644
--- a/net/llc/llc_core.c
+++ b/net/llc/llc_core.c
@@ -144,12 +144,6 @@ static struct packet_type llc_tr_packet_type __read_mostly = {
 
 static int __init llc_init(void)
 {
-	struct net_device *dev;
-
-	dev = first_net_device(&init_net);
-	if (dev != NULL)
-		dev = next_net_device(dev);
-
 	dev_add_pack(&llc_packet_type);
 	dev_add_pack(&llc_tr_packet_type);
 	return 0;
-- 
cgit v1.2.2


From a424077a0a48d5b2e1bdbb8cc56fd43abfd7fd6c Mon Sep 17 00:00:00 2001
From: Dan Carpenter <error27@gmail.com>
Date: Wed, 24 Mar 2010 01:56:03 +0000
Subject: wimax: remove unneeded variable

We never actually use "dev" so I removed it.

Signed-off-by: Dan Carpenter <error27@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/wimax/op-reset.c     | 2 --
 net/wimax/op-state-get.c | 2 --
 2 files changed, 4 deletions(-)

(limited to 'net')

diff --git a/net/wimax/op-reset.c b/net/wimax/op-reset.c
index 4dc82a54ba30..68bedf3e5443 100644
--- a/net/wimax/op-reset.c
+++ b/net/wimax/op-reset.c
@@ -110,7 +110,6 @@ int wimax_gnl_doit_reset(struct sk_buff *skb, struct genl_info *info)
 {
 	int result, ifindex;
 	struct wimax_dev *wimax_dev;
-	struct device *dev;
 
 	d_fnstart(3, NULL, "(skb %p info %p)\n", skb, info);
 	result = -ENODEV;
@@ -123,7 +122,6 @@ int wimax_gnl_doit_reset(struct sk_buff *skb, struct genl_info *info)
 	wimax_dev = wimax_dev_get_by_genl_info(info, ifindex);
 	if (wimax_dev == NULL)
 		goto error_no_wimax_dev;
-	dev = wimax_dev_to_dev(wimax_dev);
 	/* Execute the operation and send the result back to user space */
 	result = wimax_reset(wimax_dev);
 	dev_put(wimax_dev->net_dev);
diff --git a/net/wimax/op-state-get.c b/net/wimax/op-state-get.c
index 11ad3356eb56..aff8776e2d41 100644
--- a/net/wimax/op-state-get.c
+++ b/net/wimax/op-state-get.c
@@ -53,7 +53,6 @@ int wimax_gnl_doit_state_get(struct sk_buff *skb, struct genl_info *info)
 {
 	int result, ifindex;
 	struct wimax_dev *wimax_dev;
-	struct device *dev;
 
 	d_fnstart(3, NULL, "(skb %p info %p)\n", skb, info);
 	result = -ENODEV;
@@ -66,7 +65,6 @@ int wimax_gnl_doit_state_get(struct sk_buff *skb, struct genl_info *info)
 	wimax_dev = wimax_dev_get_by_genl_info(info, ifindex);
 	if (wimax_dev == NULL)
 		goto error_no_wimax_dev;
-	dev = wimax_dev_to_dev(wimax_dev);
 	/* Execute the operation and send the result back to user space */
 	result = wimax_state_get(wimax_dev);
 	dev_put(wimax_dev->net_dev);
-- 
cgit v1.2.2


From 18062ca94714a66e75da8a22e010d0e8e61ab4cd Mon Sep 17 00:00:00 2001
From: Dan Carpenter <error27@gmail.com>
Date: Wed, 24 Mar 2010 01:57:30 +0000
Subject: rds: cleanup: remove unneeded variable

We never use "sk" so this patch removes it.

Signed-off-by: Dan Carpenter <error27@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/rds/af_rds.c | 2 --
 1 file changed, 2 deletions(-)

(limited to 'net')

diff --git a/net/rds/af_rds.c b/net/rds/af_rds.c
index 937ecda4abe7..c2e45e8efa20 100644
--- a/net/rds/af_rds.c
+++ b/net/rds/af_rds.c
@@ -451,7 +451,6 @@ static void rds_sock_inc_info(struct socket *sock, unsigned int len,
 			      struct rds_info_lengths *lens)
 {
 	struct rds_sock *rs;
-	struct sock *sk;
 	struct rds_incoming *inc;
 	unsigned long flags;
 	unsigned int total = 0;
@@ -461,7 +460,6 @@ static void rds_sock_inc_info(struct socket *sock, unsigned int len,
 	spin_lock_irqsave(&rds_sock_lock, flags);
 
 	list_for_each_entry(rs, &rds_sock_list, rs_item) {
-		sk = rds_rs_to_sk(rs);
 		read_lock(&rs->rs_recv_lock);
 
 		/* XXX too lazy to maintain counts.. */
-- 
cgit v1.2.2


From b138338056fc423c61a583d45f8aa64cfad87131 Mon Sep 17 00:00:00 2001
From: Frans Pop <elendil@planet.nl>
Date: Wed, 24 Mar 2010 07:57:28 +0000
Subject: net: remove trailing space in messages

Signed-off-by: Frans Pop <elendil@planet.nl>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/dccp/ccids/ccid3.c                | 2 +-
 net/dccp/input.c                      | 2 +-
 net/ipv4/ipconfig.c                   | 2 +-
 net/ipv4/tcp_input.c                  | 2 +-
 net/ipv6/netfilter/ip6t_hbh.c         | 4 ++--
 net/irda/ircomm/ircomm_param.c        | 2 +-
 net/sched/cls_u32.c                   | 4 ++--
 net/sunrpc/auth_gss/gss_spkm3_token.c | 2 +-
 net/sunrpc/bc_svc.c                   | 2 +-
 9 files changed, 11 insertions(+), 11 deletions(-)

(limited to 'net')

diff --git a/net/dccp/ccids/ccid3.c b/net/dccp/ccids/ccid3.c
index bcd7632299f5..d3235899c7e3 100644
--- a/net/dccp/ccids/ccid3.c
+++ b/net/dccp/ccids/ccid3.c
@@ -208,7 +208,7 @@ static void ccid3_hc_tx_no_feedback_timer(unsigned long data)
 		goto restart_timer;
 	}
 
-	ccid3_pr_debug("%s(%p, state=%s) - entry \n", dccp_role(sk), sk,
+	ccid3_pr_debug("%s(%p, state=%s) - entry\n", dccp_role(sk), sk,
 		       ccid3_tx_state_name(hc->tx_state));
 
 	if (hc->tx_state == TFRC_SSTATE_FBACK)
diff --git a/net/dccp/input.c b/net/dccp/input.c
index 7648f316310f..5daa4bdfdb10 100644
--- a/net/dccp/input.c
+++ b/net/dccp/input.c
@@ -414,7 +414,7 @@ static int dccp_rcv_request_sent_state_process(struct sock *sk,
 		if (!between48(DCCP_SKB_CB(skb)->dccpd_ack_seq,
 			       dp->dccps_awl, dp->dccps_awh)) {
 			dccp_pr_debug("invalid ackno: S.AWL=%llu, "
-				      "P.ackno=%llu, S.AWH=%llu \n",
+				      "P.ackno=%llu, S.AWH=%llu\n",
 				      (unsigned long long)dp->dccps_awl,
 			   (unsigned long long)DCCP_SKB_CB(skb)->dccpd_ack_seq,
 				      (unsigned long long)dp->dccps_awh);
diff --git a/net/ipv4/ipconfig.c b/net/ipv4/ipconfig.c
index 678909281648..bf12d2a7a0c7 100644
--- a/net/ipv4/ipconfig.c
+++ b/net/ipv4/ipconfig.c
@@ -975,7 +975,7 @@ static int __init ic_bootp_recv(struct sk_buff *skb, struct net_device *dev, str
 	/* Is it a reply for the device we are configuring? */
 	if (b->xid != ic_dev_xid) {
 		if (net_ratelimit())
-			printk(KERN_ERR "DHCP/BOOTP: Ignoring delayed packet \n");
+			printk(KERN_ERR "DHCP/BOOTP: Ignoring delayed packet\n");
 		goto drop_unlock;
 	}
 
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index c096a4218b8f..7b31476a4063 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -4318,7 +4318,7 @@ static void tcp_ofo_queue(struct sock *sk)
 		}
 
 		if (!after(TCP_SKB_CB(skb)->end_seq, tp->rcv_nxt)) {
-			SOCK_DEBUG(sk, "ofo packet was already received \n");
+			SOCK_DEBUG(sk, "ofo packet was already received\n");
 			__skb_unlink(skb, &tp->out_of_order_queue);
 			__kfree_skb(skb);
 			continue;
diff --git a/net/ipv6/netfilter/ip6t_hbh.c b/net/ipv6/netfilter/ip6t_hbh.c
index cbe8dec9744b..e60677519e40 100644
--- a/net/ipv6/netfilter/ip6t_hbh.c
+++ b/net/ipv6/netfilter/ip6t_hbh.c
@@ -141,11 +141,11 @@ hbh_mt6(const struct sk_buff *skb, const struct xt_match_param *par)
 			}
 
 			/* Step to the next */
-			pr_debug("len%04X \n", optlen);
+			pr_debug("len%04X\n", optlen);
 
 			if ((ptr > skb->len - optlen || hdrlen < optlen) &&
 			    temp < optinfo->optsnr - 1) {
-				pr_debug("new pointer is too large! \n");
+				pr_debug("new pointer is too large!\n");
 				break;
 			}
 			ptr += optlen;
diff --git a/net/irda/ircomm/ircomm_param.c b/net/irda/ircomm/ircomm_param.c
index d57aefd9fe77..0804532d970f 100644
--- a/net/irda/ircomm/ircomm_param.c
+++ b/net/irda/ircomm/ircomm_param.c
@@ -474,7 +474,7 @@ static int ircomm_param_dce(void *instance, irda_param_t *param, int get)
 	/* Check if any of the settings have changed */
 	if (dce & 0x0f) {
 		if (dce & IRCOMM_DELTA_CTS) {
-			IRDA_DEBUG(2, "%s(), CTS \n", __func__ );
+			IRDA_DEBUG(2, "%s(), CTS\n", __func__ );
 		}
 	}
 
diff --git a/net/sched/cls_u32.c b/net/sched/cls_u32.c
index 07372f60bee3..1ef76871a57b 100644
--- a/net/sched/cls_u32.c
+++ b/net/sched/cls_u32.c
@@ -772,10 +772,10 @@ static int __init init_u32(void)
 	printk("    Performance counters on\n");
 #endif
 #ifdef CONFIG_NET_CLS_IND
-	printk("    input device check on \n");
+	printk("    input device check on\n");
 #endif
 #ifdef CONFIG_NET_CLS_ACT
-	printk("    Actions configured \n");
+	printk("    Actions configured\n");
 #endif
 	return register_tcf_proto_ops(&cls_u32_ops);
 }
diff --git a/net/sunrpc/auth_gss/gss_spkm3_token.c b/net/sunrpc/auth_gss/gss_spkm3_token.c
index 3308157436d2..a99825d7caa0 100644
--- a/net/sunrpc/auth_gss/gss_spkm3_token.c
+++ b/net/sunrpc/auth_gss/gss_spkm3_token.c
@@ -223,7 +223,7 @@ spkm3_verify_mic_token(unsigned char **tokp, int *mic_hdrlen, unsigned char **ck
 
 	/* only support SPKM_MIC_TOK */
 	if((ptr[6] != 0x01) || (ptr[7] != 0x01)) {
-		dprintk("RPC:       ERROR unsupported SPKM3 token \n");
+		dprintk("RPC:       ERROR unsupported SPKM3 token\n");
 		goto out;
 	}
 
diff --git a/net/sunrpc/bc_svc.c b/net/sunrpc/bc_svc.c
index 13f214f53120..8a610fb0cfec 100644
--- a/net/sunrpc/bc_svc.c
+++ b/net/sunrpc/bc_svc.c
@@ -75,7 +75,7 @@ int bc_send(struct rpc_rqst *req)
 		rpc_put_task(task);
 	}
 	return ret;
-	dprintk("RPC:       bc_send ret= %d \n", ret);
+	dprintk("RPC:       bc_send ret= %d\n", ret);
 }
 
 #endif /* CONFIG_NFS_V4_1 */
-- 
cgit v1.2.2


From a570f095eac34b7439eed2df6728381708c55bdc Mon Sep 17 00:00:00 2001
From: Frans Pop <elendil@planet.nl>
Date: Wed, 24 Mar 2010 07:57:29 +0000
Subject: tipc: remove trailing space in messages

Signed-off-by: Frans Pop <elendil@planet.nl>
Cc: Per Liden <per.liden@ericsson.com>
Cc: Jon Maloy <jon.maloy@ericsson.com>
Cc: Allan Stephens <allan.stephens@windriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/tipc/link.c | 8 ++++----
 net/tipc/net.c  | 2 +-
 net/tipc/node.c | 2 +-
 3 files changed, 6 insertions(+), 6 deletions(-)

(limited to 'net')

diff --git a/net/tipc/link.c b/net/tipc/link.c
index 49f2be8622a9..c76e82e5f982 100644
--- a/net/tipc/link.c
+++ b/net/tipc/link.c
@@ -877,7 +877,7 @@ static void link_state_event(struct link *l_ptr, unsigned event)
 		case TIMEOUT_EVT:
 			dbg_link("TIM ");
 			if (l_ptr->next_in_no != l_ptr->checkpoint) {
-				dbg_link("-> WW \n");
+				dbg_link("-> WW\n");
 				l_ptr->state = WORKING_WORKING;
 				l_ptr->fsm_msg_cnt = 0;
 				l_ptr->checkpoint = l_ptr->next_in_no;
@@ -934,7 +934,7 @@ static void link_state_event(struct link *l_ptr, unsigned event)
 			link_set_timer(l_ptr, cont_intv);
 			break;
 		case RESET_MSG:
-			dbg_link("RES \n");
+			dbg_link("RES\n");
 			dbg_link(" -> RR\n");
 			l_ptr->state = RESET_RESET;
 			l_ptr->fsm_msg_cnt = 0;
@@ -947,7 +947,7 @@ static void link_state_event(struct link *l_ptr, unsigned event)
 			l_ptr->started = 1;
 			/* fall through */
 		case TIMEOUT_EVT:
-			dbg_link("TIM \n");
+			dbg_link("TIM\n");
 			tipc_link_send_proto_msg(l_ptr, RESET_MSG, 0, 0, 0, 0, 0);
 			l_ptr->fsm_msg_cnt++;
 			link_set_timer(l_ptr, cont_intv);
@@ -3295,7 +3295,7 @@ static void link_dump_rec_queue(struct link *l_ptr)
 			info("buffer %x invalid\n", crs);
 			return;
 		}
-		msg_dbg(buf_msg(crs), "In rec queue: \n");
+		msg_dbg(buf_msg(crs), "In rec queue:\n");
 		crs = crs->next;
 	}
 }
diff --git a/net/tipc/net.c b/net/tipc/net.c
index f25b1cdb64eb..79ce8fa2b77a 100644
--- a/net/tipc/net.c
+++ b/net/tipc/net.c
@@ -291,6 +291,6 @@ void tipc_net_stop(void)
 	tipc_bclink_stop();
 	net_stop();
 	write_unlock_bh(&tipc_net_lock);
-	info("Left network mode \n");
+	info("Left network mode\n");
 }
 
diff --git a/net/tipc/node.c b/net/tipc/node.c
index 2c24e7d6d950..17cc394f424f 100644
--- a/net/tipc/node.c
+++ b/net/tipc/node.c
@@ -278,7 +278,7 @@ struct tipc_node *tipc_node_attach_link(struct link *l_ptr)
 			n_ptr->link_cnt++;
 			return n_ptr;
 		}
-		err("Attempt to establish second link on <%s> to %s \n",
+		err("Attempt to establish second link on <%s> to %s\n",
 		    l_ptr->b_ptr->publ.name,
 		    addr_string_fill(addr_string, l_ptr->addr));
 	}
-- 
cgit v1.2.2


From 0d34545563f99886b997be7da63f0e8084af3bc5 Mon Sep 17 00:00:00 2001
From: Jan Engelhardt <jengelh@medozas.de>
Date: Fri, 19 Mar 2010 18:47:51 +0100
Subject: netfilter: xtables: make use of caller family rather than target
 family

Supplement to aa5fa3185791aac71c9172d4fda3e8729164b5d1.
The semantic patch for this change is:

// <smpl>
@@
struct xt_target_param *par;
@@
-par->target->family
+par->family

@@
struct xt_tgchk_param *par;
@@
-par->target->family
+par->family

@@
struct xt_tgdtor_param *par;
@@
-par->target->family
+par->family
// </smpl>

Signed-off-by: Jan Engelhardt <jengelh@medozas.de>
---
 net/ipv4/netfilter/ipt_CLUSTERIP.c | 6 +++---
 net/netfilter/xt_NFQUEUE.c         | 4 ++--
 2 files changed, 5 insertions(+), 5 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/netfilter/ipt_CLUSTERIP.c b/net/ipv4/netfilter/ipt_CLUSTERIP.c
index 0886f96c736b..a20bee75b02c 100644
--- a/net/ipv4/netfilter/ipt_CLUSTERIP.c
+++ b/net/ipv4/netfilter/ipt_CLUSTERIP.c
@@ -401,9 +401,9 @@ static bool clusterip_tg_check(const struct xt_tgchk_param *par)
 	}
 	cipinfo->config = config;
 
-	if (nf_ct_l3proto_try_module_get(par->target->family) < 0) {
+	if (nf_ct_l3proto_try_module_get(par->family) < 0) {
 		printk(KERN_WARNING "can't load conntrack support for "
-				    "proto=%u\n", par->target->family);
+				    "proto=%u\n", par->family);
 		return false;
 	}
 
@@ -421,7 +421,7 @@ static void clusterip_tg_destroy(const struct xt_tgdtor_param *par)
 
 	clusterip_config_put(cipinfo->config);
 
-	nf_ct_l3proto_module_put(par->target->family);
+	nf_ct_l3proto_module_put(par->family);
 }
 
 #ifdef CONFIG_COMPAT
diff --git a/net/netfilter/xt_NFQUEUE.c b/net/netfilter/xt_NFQUEUE.c
index a37e2166858e..7cc0de63aa0f 100644
--- a/net/netfilter/xt_NFQUEUE.c
+++ b/net/netfilter/xt_NFQUEUE.c
@@ -71,10 +71,10 @@ nfqueue_tg_v1(struct sk_buff *skb, const struct xt_target_param *par)
 	u32 queue = info->queuenum;
 
 	if (info->queues_total > 1) {
-		if (par->target->family == NFPROTO_IPV4)
+		if (par->family == NFPROTO_IPV4)
 			queue = hash_v4(skb) % info->queues_total + queue;
 #if defined(CONFIG_IP6_NF_IPTABLES) || defined(CONFIG_IP6_NF_IPTABLES_MODULE)
-		else if (par->target->family == NFPROTO_IPV6)
+		else if (par->family == NFPROTO_IPV6)
 			queue = hash_v6(skb) % info->queues_total + queue;
 #endif
 	}
-- 
cgit v1.2.2


From 55e0d7cf279177dfe320f54816320558bc370f24 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Thu, 25 Mar 2010 11:00:22 +0100
Subject: netfilter: xt_hashlimit: dl_seq_stop() fix

If dl_seq_start() memory allocation fails, we crash later in
dl_seq_stop(), trying to kfree(ERR_PTR(-ENOMEM))

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 net/netfilter/xt_hashlimit.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/netfilter/xt_hashlimit.c b/net/netfilter/xt_hashlimit.c
index 9e9c48963942..70d561a2d9e0 100644
--- a/net/netfilter/xt_hashlimit.c
+++ b/net/netfilter/xt_hashlimit.c
@@ -879,7 +879,8 @@ static void dl_seq_stop(struct seq_file *s, void *v)
 	struct xt_hashlimit_htable *htable = s->private;
 	unsigned int *bucket = (unsigned int *)v;
 
-	kfree(bucket);
+	if (!IS_ERR(bucket))
+		kfree(bucket);
 	spin_unlock_bh(&htable->lock);
 }
 
-- 
cgit v1.2.2


From 9c13886665c43600bd0af4b38e33c654e648e078 Mon Sep 17 00:00:00 2001
From: Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
Date: Thu, 25 Mar 2010 11:17:26 +0100
Subject: netfilter: ip6table_raw: fix table priority

The order of the IPv6 raw table is currently reversed, that makes impossible
to use the NOTRACK target in IPv6: for example if someone enters

ip6tables -t raw -A PREROUTING -p tcp --dport 80 -j NOTRACK

and if we receive fragmented packets then the first fragment will be
untracked and thus skip nf_ct_frag6_gather (and conntrack), while all
subsequent fragments enter nf_ct_frag6_gather and reassembly will never
successfully be finished.

Singed-off-by: Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 net/ipv6/netfilter/ip6table_raw.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/ipv6/netfilter/ip6table_raw.c b/net/ipv6/netfilter/ip6table_raw.c
index aef31a29de9e..b9cf7cd61923 100644
--- a/net/ipv6/netfilter/ip6table_raw.c
+++ b/net/ipv6/netfilter/ip6table_raw.c
@@ -13,7 +13,7 @@ static const struct xt_table packet_raw = {
 	.valid_hooks = RAW_VALID_HOOKS,
 	.me = THIS_MODULE,
 	.af = NFPROTO_IPV6,
-	.priority = NF_IP6_PRI_FIRST,
+	.priority = NF_IP6_PRI_RAW,
 };
 
 /* The work comes in here from netfilter.c. */
-- 
cgit v1.2.2


From ff67e4e42bd178b1179c4d8e5c1fde18758ce84f Mon Sep 17 00:00:00 2001
From: Jan Engelhardt <jengelh@medozas.de>
Date: Fri, 19 Mar 2010 21:08:16 +0100
Subject: netfilter: xt extensions: use pr_<level> (2)

Supplement to 1159683ef48469de71dc26f0ee1a9c30d131cf89.

Downgrade the log level to INFO for most checkentry messages as they
are, IMO, just an extra information to the -EINVAL code that is
returned as part of a parameter "constraint violation". Leave errors
to real errors, such as being unable to create a LED trigger.

Signed-off-by: Jan Engelhardt <jengelh@medozas.de>
---
 net/bridge/netfilter/ebt_among.c    | 15 ++++++-------
 net/bridge/netfilter/ebt_limit.c    |  3 ++-
 net/bridge/netfilter/ebt_ulog.c     | 23 ++++++++------------
 net/bridge/netfilter/ebt_vlan.c     | 36 ++++++++++++-------------------
 net/bridge/netfilter/ebtables.c     |  5 ++---
 net/ipv4/netfilter/ip_tables.c      | 18 +++++++---------
 net/ipv4/netfilter/ipt_CLUSTERIP.c  | 42 ++++++++++++++++++-------------------
 net/ipv4/netfilter/ipt_ECN.c        | 11 ++++------
 net/ipv4/netfilter/ipt_LOG.c        |  7 +++----
 net/ipv4/netfilter/ipt_MASQUERADE.c |  8 +++----
 net/ipv4/netfilter/ipt_NETMAP.c     |  6 +++---
 net/ipv4/netfilter/ipt_REDIRECT.c   |  6 +++---
 net/ipv4/netfilter/ipt_REJECT.c     |  6 +++---
 net/ipv4/netfilter/ipt_ULOG.c       | 37 +++++++++++++-------------------
 net/ipv4/netfilter/ipt_addrtype.c   | 14 ++++++-------
 net/ipv4/netfilter/ipt_ecn.c        |  5 ++---
 net/ipv4/netfilter/nf_nat_rule.c    |  5 +++--
 net/ipv6/netfilter/ip6_tables.c     |  9 ++++----
 net/ipv6/netfilter/ip6t_LOG.c       |  7 +++----
 net/ipv6/netfilter/ip6t_REJECT.c    | 20 +++++++++---------
 net/ipv6/netfilter/ip6t_ah.c        |  6 +++---
 net/ipv6/netfilter/ip6t_frag.c      |  6 +++---
 net/ipv6/netfilter/ip6t_hbh.c       |  6 +++---
 net/ipv6/netfilter/ip6t_rt.c        |  6 +++---
 net/netfilter/xt_TCPMSS.c           |  7 +++----
 net/netfilter/xt_TPROXY.c           |  4 ++--
 net/netfilter/xt_esp.c              |  2 +-
 net/netfilter/xt_iprange.c          |  1 +
 net/netfilter/xt_recent.c           |  6 +++---
 net/netfilter/xt_socket.c           |  5 ++---
 net/netfilter/xt_time.c             |  4 ++--
 31 files changed, 151 insertions(+), 185 deletions(-)

(limited to 'net')

diff --git a/net/bridge/netfilter/ebt_among.c b/net/bridge/netfilter/ebt_among.c
index b595f091f35b..60ad6308bc1f 100644
--- a/net/bridge/netfilter/ebt_among.c
+++ b/net/bridge/netfilter/ebt_among.c
@@ -7,6 +7,7 @@
  *  August, 2003
  *
  */
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
 #include <linux/ip.h>
 #include <linux/if_arp.h>
 #include <linux/module.h>
@@ -186,21 +187,17 @@ static bool ebt_among_mt_check(const struct xt_mtchk_param *par)
 	expected_length += ebt_mac_wormhash_size(wh_src);
 
 	if (em->match_size != EBT_ALIGN(expected_length)) {
-		printk(KERN_WARNING
-		       "ebtables: among: wrong size: %d "
-		       "against expected %d, rounded to %Zd\n",
-		       em->match_size, expected_length,
-		       EBT_ALIGN(expected_length));
+		pr_info("wrong size: %d against expected %d, rounded to %Zd\n",
+			em->match_size, expected_length,
+			EBT_ALIGN(expected_length));
 		return false;
 	}
 	if (wh_dst && (err = ebt_mac_wormhash_check_integrity(wh_dst))) {
-		printk(KERN_WARNING
-		       "ebtables: among: dst integrity fail: %x\n", -err);
+		pr_info("dst integrity fail: %x\n", -err);
 		return false;
 	}
 	if (wh_src && (err = ebt_mac_wormhash_check_integrity(wh_src))) {
-		printk(KERN_WARNING
-		       "ebtables: among: src integrity fail: %x\n", -err);
+		pr_info("src integrity fail: %x\n", -err);
 		return false;
 	}
 	return true;
diff --git a/net/bridge/netfilter/ebt_limit.c b/net/bridge/netfilter/ebt_limit.c
index 7a8182710eb3..5b7330b62541 100644
--- a/net/bridge/netfilter/ebt_limit.c
+++ b/net/bridge/netfilter/ebt_limit.c
@@ -10,6 +10,7 @@
  *  September, 2003
  *
  */
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
 #include <linux/module.h>
 #include <linux/netdevice.h>
 #include <linux/spinlock.h>
@@ -71,7 +72,7 @@ static bool ebt_limit_mt_check(const struct xt_mtchk_param *par)
 	/* Check for overflow. */
 	if (info->burst == 0 ||
 	    user2credits(info->avg * info->burst) < user2credits(info->avg)) {
-		printk("Overflow in ebt_limit, try lower: %u/%u\n",
+		pr_info("overflow, try lower: %u/%u\n",
 			info->avg, info->burst);
 		return false;
 	}
diff --git a/net/bridge/netfilter/ebt_ulog.c b/net/bridge/netfilter/ebt_ulog.c
index 84340ab30ed3..789ea36f1db1 100644
--- a/net/bridge/netfilter/ebt_ulog.c
+++ b/net/bridge/netfilter/ebt_ulog.c
@@ -27,7 +27,7 @@
  *   flushed even if it is not full yet.
  *
  */
-
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
 #include <linux/module.h>
 #include <linux/spinlock.h>
 #include <linux/socket.h>
@@ -43,9 +43,6 @@
 #include <net/sock.h>
 #include "../br_private.h"
 
-#define PRINTR(format, args...) do { if (net_ratelimit()) \
-				printk(format , ## args); } while (0)
-
 static unsigned int nlbufsiz = NLMSG_GOODSIZE;
 module_param(nlbufsiz, uint, 0600);
 MODULE_PARM_DESC(nlbufsiz, "netlink buffer size (number of bytes) "
@@ -106,15 +103,14 @@ static struct sk_buff *ulog_alloc_skb(unsigned int size)
 	n = max(size, nlbufsiz);
 	skb = alloc_skb(n, GFP_ATOMIC);
 	if (!skb) {
-		PRINTR(KERN_ERR "ebt_ulog: can't alloc whole buffer "
-		       "of size %ub!\n", n);
+		pr_debug("cannot alloc whole buffer of size %ub!\n", n);
 		if (n > size) {
 			/* try to allocate only as much as we need for
 			 * current packet */
 			skb = alloc_skb(size, GFP_ATOMIC);
 			if (!skb)
-				PRINTR(KERN_ERR "ebt_ulog: can't even allocate "
-				       "buffer of size %ub\n", size);
+				pr_debug("cannot even allocate "
+					 "buffer of size %ub\n", size);
 		}
 	}
 
@@ -141,8 +137,7 @@ static void ebt_ulog_packet(unsigned int hooknr, const struct sk_buff *skb,
 
 	size = NLMSG_SPACE(sizeof(*pm) + copy_len);
 	if (size > nlbufsiz) {
-		PRINTR("ebt_ulog: Size %Zd needed, but nlbufsiz=%d\n",
-		       size, nlbufsiz);
+		pr_debug("Size %Zd needed, but nlbufsiz=%d\n", size, nlbufsiz);
 		return;
 	}
 
@@ -216,8 +211,8 @@ unlock:
 	return;
 
 nlmsg_failure:
-	printk(KERN_CRIT "ebt_ulog: error during NLMSG_PUT. This should "
-	       "not happen, please report to author.\n");
+	pr_debug("error during NLMSG_PUT. This should "
+		 "not happen, please report to author.\n");
 	goto unlock;
 alloc_failure:
 	goto unlock;
@@ -291,8 +286,8 @@ static int __init ebt_ulog_init(void)
 	int i;
 
 	if (nlbufsiz >= 128*1024) {
-		printk(KERN_NOTICE "ebt_ulog: Netlink buffer has to be <= 128kB,"
-		       " please try a smaller nlbufsiz parameter.\n");
+		pr_warning("Netlink buffer has to be <= 128kB,"
+			   " please try a smaller nlbufsiz parameter.\n");
 		return -EINVAL;
 	}
 
diff --git a/net/bridge/netfilter/ebt_vlan.c b/net/bridge/netfilter/ebt_vlan.c
index be1dd2e1f615..5c44f51063c3 100644
--- a/net/bridge/netfilter/ebt_vlan.c
+++ b/net/bridge/netfilter/ebt_vlan.c
@@ -26,17 +26,12 @@
 #include <linux/netfilter_bridge/ebtables.h>
 #include <linux/netfilter_bridge/ebt_vlan.h>
 
-static int debug;
 #define MODULE_VERS "0.6"
 
-module_param(debug, int, 0);
-MODULE_PARM_DESC(debug, "debug=1 is turn on debug messages");
 MODULE_AUTHOR("Nick Fedchik <nick@fedchik.org.ua>");
 MODULE_DESCRIPTION("Ebtables: 802.1Q VLAN tag match");
 MODULE_LICENSE("GPL");
 
-
-#define DEBUG_MSG(args...) if (debug) printk (KERN_DEBUG "ebt_vlan: " args)
 #define GET_BITMASK(_BIT_MASK_) info->bitmask & _BIT_MASK_
 #define EXIT_ON_MISMATCH(_MATCH_,_MASK_) {if (!((info->_MATCH_ == _MATCH_)^!!(info->invflags & _MASK_))) return false; }
 
@@ -91,24 +86,23 @@ static bool ebt_vlan_mt_check(const struct xt_mtchk_param *par)
 
 	/* Is it 802.1Q frame checked? */
 	if (e->ethproto != htons(ETH_P_8021Q)) {
-		DEBUG_MSG
-		    ("passed entry proto %2.4X is not 802.1Q (8100)\n",
-		     (unsigned short) ntohs(e->ethproto));
+		pr_debug("passed entry proto %2.4X is not 802.1Q (8100)\n",
+			 ntohs(e->ethproto));
 		return false;
 	}
 
 	/* Check for bitmask range
 	 * True if even one bit is out of mask */
 	if (info->bitmask & ~EBT_VLAN_MASK) {
-		DEBUG_MSG("bitmask %2X is out of mask (%2X)\n",
-			  info->bitmask, EBT_VLAN_MASK);
+		pr_debug("bitmask %2X is out of mask (%2X)\n",
+			 info->bitmask, EBT_VLAN_MASK);
 		return false;
 	}
 
 	/* Check for inversion flags range */
 	if (info->invflags & ~EBT_VLAN_MASK) {
-		DEBUG_MSG("inversion flags %2X is out of mask (%2X)\n",
-			  info->invflags, EBT_VLAN_MASK);
+		pr_debug("inversion flags %2X is out of mask (%2X)\n",
+			 info->invflags, EBT_VLAN_MASK);
 		return false;
 	}
 
@@ -121,9 +115,8 @@ static bool ebt_vlan_mt_check(const struct xt_mtchk_param *par)
 	if (GET_BITMASK(EBT_VLAN_ID)) {
 		if (!!info->id) { /* if id!=0 => check vid range */
 			if (info->id > VLAN_GROUP_ARRAY_LEN) {
-				DEBUG_MSG
-				    ("id %d is out of range (1-4096)\n",
-				     info->id);
+				pr_debug("id %d is out of range (1-4096)\n",
+					 info->id);
 				return false;
 			}
 			/* Note: This is valid VLAN-tagged frame point.
@@ -137,8 +130,8 @@ static bool ebt_vlan_mt_check(const struct xt_mtchk_param *par)
 
 	if (GET_BITMASK(EBT_VLAN_PRIO)) {
 		if ((unsigned char) info->prio > 7) {
-			DEBUG_MSG("prio %d is out of range (0-7)\n",
-			     info->prio);
+			pr_debug("prio %d is out of range (0-7)\n",
+				 info->prio);
 			return false;
 		}
 	}
@@ -147,9 +140,8 @@ static bool ebt_vlan_mt_check(const struct xt_mtchk_param *par)
 	 * if_ether.h:  ETH_ZLEN        60   -  Min. octets in frame sans FCS */
 	if (GET_BITMASK(EBT_VLAN_ENCAP)) {
 		if ((unsigned short) ntohs(info->encap) < ETH_ZLEN) {
-			DEBUG_MSG
-			    ("encap frame length %d is less than minimal\n",
-			     ntohs(info->encap));
+			pr_debug("encap frame length %d is less than "
+				 "minimal\n", ntohs(info->encap));
 			return false;
 		}
 	}
@@ -169,9 +161,7 @@ static struct xt_match ebt_vlan_mt_reg __read_mostly = {
 
 static int __init ebt_vlan_init(void)
 {
-	DEBUG_MSG("ebtables 802.1Q extension module v"
-		  MODULE_VERS "\n");
-	DEBUG_MSG("module debug=%d\n", !!debug);
+	pr_debug("ebtables 802.1Q extension module v" MODULE_VERS "\n");
 	return xt_register_match(&ebt_vlan_mt_reg);
 }
 
diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c
index dfb58056a89a..989d72cc8148 100644
--- a/net/bridge/netfilter/ebtables.c
+++ b/net/bridge/netfilter/ebtables.c
@@ -14,8 +14,7 @@
  *  as published by the Free Software Foundation; either version
  *  2 of the License, or (at your option) any later version.
  */
-
-
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
 #include <linux/kmod.h>
 #include <linux/module.h>
 #include <linux/vmalloc.h>
@@ -2127,7 +2126,7 @@ static int size_entry_mwt(struct ebt_entry *entry, const unsigned char *base,
 			return ret;
 		new_offset += ret;
 		if (offsets_update && new_offset) {
-			pr_debug("ebtables: change offset %d to %d\n",
+			pr_debug("change offset %d to %d\n",
 				offsets_update[i], offsets[j] + new_offset);
 			offsets_update[i] = offsets[j] + new_offset;
 		}
diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c
index b29c66df8d1f..73fdf20263ed 100644
--- a/net/ipv4/netfilter/ip_tables.c
+++ b/net/ipv4/netfilter/ip_tables.c
@@ -39,13 +39,13 @@ MODULE_DESCRIPTION("IPv4 packet filter");
 /*#define DEBUG_IP_FIREWALL_USER*/
 
 #ifdef DEBUG_IP_FIREWALL
-#define dprintf(format, args...)  printk(format , ## args)
+#define dprintf(format, args...) pr_info(format , ## args)
 #else
 #define dprintf(format, args...)
 #endif
 
 #ifdef DEBUG_IP_FIREWALL_USER
-#define duprintf(format, args...) printk(format , ## args)
+#define duprintf(format, args...) pr_info(format , ## args)
 #else
 #define duprintf(format, args...)
 #endif
@@ -168,8 +168,7 @@ static unsigned int
 ipt_error(struct sk_buff *skb, const struct xt_target_param *par)
 {
 	if (net_ratelimit())
-		printk("ip_tables: error: `%s'\n",
-		       (const char *)par->targinfo);
+		pr_info("error: `%s'\n", (const char *)par->targinfo);
 
 	return NF_DROP;
 }
@@ -591,7 +590,7 @@ check_entry(const struct ipt_entry *e, const char *name)
 	const struct ipt_entry_target *t;
 
 	if (!ip_checkentry(&e->ip)) {
-		duprintf("ip_tables: ip check failed %p %s.\n", e, name);
+		duprintf("ip check failed %p %s.\n", e, name);
 		return -EINVAL;
 	}
 
@@ -618,8 +617,7 @@ check_match(struct ipt_entry_match *m, struct xt_mtchk_param *par)
 	ret = xt_check_match(par, m->u.match_size - sizeof(*m),
 	      ip->proto, ip->invflags & IPT_INV_PROTO);
 	if (ret < 0) {
-		duprintf("ip_tables: check failed for `%s'.\n",
-			 par.match->name);
+		duprintf("check failed for `%s'.\n", par.match->name);
 		return ret;
 	}
 	return 0;
@@ -667,7 +665,7 @@ static int check_target(struct ipt_entry *e, struct net *net, const char *name)
 	ret = xt_check_target(&par, t->u.target_size - sizeof(*t),
 	      e->ip.proto, e->ip.invflags & IPT_INV_PROTO);
 	if (ret < 0) {
-		duprintf("ip_tables: check failed for `%s'.\n",
+		duprintf("check failed for `%s'.\n",
 			 t->u.kernel.target->name);
 		return ret;
 	}
@@ -1311,7 +1309,7 @@ do_replace(struct net *net, const void __user *user, unsigned int len)
 	if (ret != 0)
 		goto free_newinfo;
 
-	duprintf("ip_tables: Translated table\n");
+	duprintf("Translated table\n");
 
 	ret = __do_replace(net, tmp.name, tmp.valid_hooks, newinfo,
 			   tmp.num_counters, tmp.counters);
@@ -2276,7 +2274,7 @@ static int __init ip_tables_init(void)
 	if (ret < 0)
 		goto err5;
 
-	printk(KERN_INFO "ip_tables: (C) 2000-2006 Netfilter Core Team\n");
+	pr_info("(C) 2000-2006 Netfilter Core Team\n");
 	return 0;
 
 err5:
diff --git a/net/ipv4/netfilter/ipt_CLUSTERIP.c b/net/ipv4/netfilter/ipt_CLUSTERIP.c
index a20bee75b02c..fcaa0dc8e075 100644
--- a/net/ipv4/netfilter/ipt_CLUSTERIP.c
+++ b/net/ipv4/netfilter/ipt_CLUSTERIP.c
@@ -9,6 +9,7 @@
  * published by the Free Software Foundation.
  *
  */
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
 #include <linux/module.h>
 #include <linux/proc_fs.h>
 #include <linux/jhash.h>
@@ -238,8 +239,7 @@ clusterip_hashfn(const struct sk_buff *skb,
 		break;
 	default:
 		if (net_ratelimit())
-			printk(KERN_NOTICE "CLUSTERIP: unknown protocol `%u'\n",
-				iph->protocol);
+			pr_info("unknown protocol %u\n", iph->protocol);
 		sport = dport = 0;
 	}
 
@@ -261,7 +261,7 @@ clusterip_hashfn(const struct sk_buff *skb,
 		hashval = 0;
 		/* This cannot happen, unless the check function wasn't called
 		 * at rule load time */
-		printk("CLUSTERIP: unknown mode `%u'\n", config->hash_mode);
+		pr_info("unknown mode %u\n", config->hash_mode);
 		BUG();
 		break;
 	}
@@ -294,7 +294,7 @@ clusterip_tg(struct sk_buff *skb, const struct xt_target_param *par)
 
 	ct = nf_ct_get(skb, &ctinfo);
 	if (ct == NULL) {
-		printk(KERN_ERR "CLUSTERIP: no conntrack!\n");
+		pr_info("no conntrack!\n");
 			/* FIXME: need to drop invalid ones, since replies
 			 * to outgoing connections of other nodes will be
 			 * marked as INVALID */
@@ -357,14 +357,13 @@ static bool clusterip_tg_check(const struct xt_tgchk_param *par)
 	if (cipinfo->hash_mode != CLUSTERIP_HASHMODE_SIP &&
 	    cipinfo->hash_mode != CLUSTERIP_HASHMODE_SIP_SPT &&
 	    cipinfo->hash_mode != CLUSTERIP_HASHMODE_SIP_SPT_DPT) {
-		printk(KERN_WARNING "CLUSTERIP: unknown mode `%u'\n",
-			cipinfo->hash_mode);
+		pr_info("unknown mode %u\n", cipinfo->hash_mode);
 		return false;
 
 	}
 	if (e->ip.dmsk.s_addr != htonl(0xffffffff) ||
 	    e->ip.dst.s_addr == 0) {
-		printk(KERN_ERR "CLUSTERIP: Please specify destination IP\n");
+		pr_info("Please specify destination IP\n");
 		return false;
 	}
 
@@ -373,26 +372,28 @@ static bool clusterip_tg_check(const struct xt_tgchk_param *par)
 	config = clusterip_config_find_get(e->ip.dst.s_addr, 1);
 	if (!config) {
 		if (!(cipinfo->flags & CLUSTERIP_FLAG_NEW)) {
-			printk(KERN_WARNING "CLUSTERIP: no config found for %pI4, need 'new'\n", &e->ip.dst.s_addr);
+			pr_info("no config found for %pI4, need 'new'\n",
+				&e->ip.dst.s_addr);
 			return false;
 		} else {
 			struct net_device *dev;
 
 			if (e->ip.iniface[0] == '\0') {
-				printk(KERN_WARNING "CLUSTERIP: Please specify an interface name\n");
+				pr_info("Please specify an interface name\n");
 				return false;
 			}
 
 			dev = dev_get_by_name(&init_net, e->ip.iniface);
 			if (!dev) {
-				printk(KERN_WARNING "CLUSTERIP: no such interface %s\n", e->ip.iniface);
+				pr_info("no such interface %s\n",
+					e->ip.iniface);
 				return false;
 			}
 
 			config = clusterip_config_init(cipinfo,
 							e->ip.dst.s_addr, dev);
 			if (!config) {
-				printk(KERN_WARNING "CLUSTERIP: cannot allocate config\n");
+				pr_info("cannot allocate config\n");
 				dev_put(dev);
 				return false;
 			}
@@ -402,8 +403,8 @@ static bool clusterip_tg_check(const struct xt_tgchk_param *par)
 	cipinfo->config = config;
 
 	if (nf_ct_l3proto_try_module_get(par->family) < 0) {
-		printk(KERN_WARNING "can't load conntrack support for "
-				    "proto=%u\n", par->family);
+		pr_info("cannot load conntrack support for proto=%u\n",
+			par->family);
 		return false;
 	}
 
@@ -478,8 +479,8 @@ static void arp_print(struct arp_payload *payload)
 	}
 	hbuffer[--k]='\0';
 
-	printk("src %pI4@%s, dst %pI4\n",
-		&payload->src_ip, hbuffer, &payload->dst_ip);
+	pr_debug("src %pI4@%s, dst %pI4\n",
+		 &payload->src_ip, hbuffer, &payload->dst_ip);
 }
 #endif
 
@@ -518,7 +519,7 @@ arp_mangle(unsigned int hook,
 	 * this wouldn't work, since we didn't subscribe the mcast group on
 	 * other interfaces */
 	if (c->dev != out) {
-		pr_debug("CLUSTERIP: not mangling arp reply on different "
+		pr_debug("not mangling arp reply on different "
 			 "interface: cip'%s'-skb'%s'\n",
 			 c->dev->name, out->name);
 		clusterip_config_put(c);
@@ -529,7 +530,7 @@ arp_mangle(unsigned int hook,
 	memcpy(payload->src_hw, c->clustermac, arp->ar_hln);
 
 #ifdef DEBUG
-	pr_debug(KERN_DEBUG "CLUSTERIP mangled arp reply: ");
+	pr_debug("mangled arp reply: ");
 	arp_print(payload);
 #endif
 
@@ -705,13 +706,13 @@ static int __init clusterip_tg_init(void)
 #ifdef CONFIG_PROC_FS
 	clusterip_procdir = proc_mkdir("ipt_CLUSTERIP", init_net.proc_net);
 	if (!clusterip_procdir) {
-		printk(KERN_ERR "CLUSTERIP: Unable to proc dir entry\n");
+		pr_err("Unable to proc dir entry\n");
 		ret = -ENOMEM;
 		goto cleanup_hook;
 	}
 #endif /* CONFIG_PROC_FS */
 
-	printk(KERN_NOTICE "ClusterIP Version %s loaded successfully\n",
+	pr_info("ClusterIP Version %s loaded successfully\n",
 		CLUSTERIP_VERSION);
 	return 0;
 
@@ -726,8 +727,7 @@ cleanup_target:
 
 static void __exit clusterip_tg_exit(void)
 {
-	printk(KERN_NOTICE "ClusterIP Version %s unloading\n",
-		CLUSTERIP_VERSION);
+	pr_info("ClusterIP Version %s unloading\n", CLUSTERIP_VERSION);
 #ifdef CONFIG_PROC_FS
 	remove_proc_entry(clusterip_procdir->name, clusterip_procdir->parent);
 #endif
diff --git a/net/ipv4/netfilter/ipt_ECN.c b/net/ipv4/netfilter/ipt_ECN.c
index ea5cea2415c1..01988752547e 100644
--- a/net/ipv4/netfilter/ipt_ECN.c
+++ b/net/ipv4/netfilter/ipt_ECN.c
@@ -6,7 +6,7 @@
  * it under the terms of the GNU General Public License version 2 as
  * published by the Free Software Foundation.
 */
-
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
 #include <linux/in.h>
 #include <linux/module.h>
 #include <linux/skbuff.h>
@@ -99,19 +99,16 @@ static bool ecn_tg_check(const struct xt_tgchk_param *par)
 	const struct ipt_entry *e = par->entryinfo;
 
 	if (einfo->operation & IPT_ECN_OP_MASK) {
-		printk(KERN_WARNING "ECN: unsupported ECN operation %x\n",
-			einfo->operation);
+		pr_info("unsupported ECN operation %x\n", einfo->operation);
 		return false;
 	}
 	if (einfo->ip_ect & ~IPT_ECN_IP_MASK) {
-		printk(KERN_WARNING "ECN: new ECT codepoint %x out of mask\n",
-			einfo->ip_ect);
+		pr_info("new ECT codepoint %x out of mask\n", einfo->ip_ect);
 		return false;
 	}
 	if ((einfo->operation & (IPT_ECN_OP_SET_ECE|IPT_ECN_OP_SET_CWR)) &&
 	    (e->ip.proto != IPPROTO_TCP || (e->ip.invflags & XT_INV_PROTO))) {
-		printk(KERN_WARNING "ECN: cannot use TCP operations on a "
-		       "non-tcp rule\n");
+		pr_info("cannot use TCP operations on a non-tcp rule\n");
 		return false;
 	}
 	return true;
diff --git a/net/ipv4/netfilter/ipt_LOG.c b/net/ipv4/netfilter/ipt_LOG.c
index ee128efa1c8d..b3bf623fa222 100644
--- a/net/ipv4/netfilter/ipt_LOG.c
+++ b/net/ipv4/netfilter/ipt_LOG.c
@@ -9,7 +9,7 @@
  * it under the terms of the GNU General Public License version 2 as
  * published by the Free Software Foundation.
  */
-
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
 #include <linux/module.h>
 #include <linux/spinlock.h>
 #include <linux/skbuff.h>
@@ -444,12 +444,11 @@ static bool log_tg_check(const struct xt_tgchk_param *par)
 	const struct ipt_log_info *loginfo = par->targinfo;
 
 	if (loginfo->level >= 8) {
-		pr_debug("LOG: level %u >= 8\n", loginfo->level);
+		pr_debug("level %u >= 8\n", loginfo->level);
 		return false;
 	}
 	if (loginfo->prefix[sizeof(loginfo->prefix)-1] != '\0') {
-		pr_debug("LOG: prefix term %i\n",
-			 loginfo->prefix[sizeof(loginfo->prefix)-1]);
+		pr_debug("prefix is not null-terminated\n");
 		return false;
 	}
 	return true;
diff --git a/net/ipv4/netfilter/ipt_MASQUERADE.c b/net/ipv4/netfilter/ipt_MASQUERADE.c
index 650b54042b01..5063ddac7c04 100644
--- a/net/ipv4/netfilter/ipt_MASQUERADE.c
+++ b/net/ipv4/netfilter/ipt_MASQUERADE.c
@@ -8,7 +8,7 @@
  * it under the terms of the GNU General Public License version 2 as
  * published by the Free Software Foundation.
  */
-
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
 #include <linux/types.h>
 #include <linux/inetdevice.h>
 #include <linux/ip.h>
@@ -33,11 +33,11 @@ static bool masquerade_tg_check(const struct xt_tgchk_param *par)
 	const struct nf_nat_multi_range_compat *mr = par->targinfo;
 
 	if (mr->range[0].flags & IP_NAT_RANGE_MAP_IPS) {
-		pr_debug("masquerade_check: bad MAP_IPS.\n");
+		pr_debug("bad MAP_IPS.\n");
 		return false;
 	}
 	if (mr->rangesize != 1) {
-		pr_debug("masquerade_check: bad rangesize %u\n", mr->rangesize);
+		pr_debug("bad rangesize %u\n", mr->rangesize);
 		return false;
 	}
 	return true;
@@ -72,7 +72,7 @@ masquerade_tg(struct sk_buff *skb, const struct xt_target_param *par)
 	rt = skb_rtable(skb);
 	newsrc = inet_select_addr(par->out, rt->rt_gateway, RT_SCOPE_UNIVERSE);
 	if (!newsrc) {
-		printk("MASQUERADE: %s ate my IP address\n", par->out->name);
+		pr_info("%s ate my IP address\n", par->out->name);
 		return NF_DROP;
 	}
 
diff --git a/net/ipv4/netfilter/ipt_NETMAP.c b/net/ipv4/netfilter/ipt_NETMAP.c
index 7c29582d4ec8..51ab01a0a95d 100644
--- a/net/ipv4/netfilter/ipt_NETMAP.c
+++ b/net/ipv4/netfilter/ipt_NETMAP.c
@@ -9,7 +9,7 @@
  * it under the terms of the GNU General Public License version 2 as
  * published by the Free Software Foundation.
  */
-
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
 #include <linux/ip.h>
 #include <linux/module.h>
 #include <linux/netdevice.h>
@@ -27,11 +27,11 @@ static bool netmap_tg_check(const struct xt_tgchk_param *par)
 	const struct nf_nat_multi_range_compat *mr = par->targinfo;
 
 	if (!(mr->range[0].flags & IP_NAT_RANGE_MAP_IPS)) {
-		pr_debug("NETMAP:check: bad MAP_IPS.\n");
+		pr_debug("bad MAP_IPS.\n");
 		return false;
 	}
 	if (mr->rangesize != 1) {
-		pr_debug("NETMAP:check: bad rangesize %u.\n", mr->rangesize);
+		pr_debug("bad rangesize %u.\n", mr->rangesize);
 		return false;
 	}
 	return true;
diff --git a/net/ipv4/netfilter/ipt_REDIRECT.c b/net/ipv4/netfilter/ipt_REDIRECT.c
index 698e5e78685b..74f1f55fd61a 100644
--- a/net/ipv4/netfilter/ipt_REDIRECT.c
+++ b/net/ipv4/netfilter/ipt_REDIRECT.c
@@ -6,7 +6,7 @@
  * it under the terms of the GNU General Public License version 2 as
  * published by the Free Software Foundation.
  */
-
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
 #include <linux/types.h>
 #include <linux/ip.h>
 #include <linux/timer.h>
@@ -31,11 +31,11 @@ static bool redirect_tg_check(const struct xt_tgchk_param *par)
 	const struct nf_nat_multi_range_compat *mr = par->targinfo;
 
 	if (mr->range[0].flags & IP_NAT_RANGE_MAP_IPS) {
-		pr_debug("redirect_check: bad MAP_IPS.\n");
+		pr_debug("bad MAP_IPS.\n");
 		return false;
 	}
 	if (mr->rangesize != 1) {
-		pr_debug("redirect_check: bad rangesize %u.\n", mr->rangesize);
+		pr_debug("bad rangesize %u.\n", mr->rangesize);
 		return false;
 	}
 	return true;
diff --git a/net/ipv4/netfilter/ipt_REJECT.c b/net/ipv4/netfilter/ipt_REJECT.c
index 5113b8f1a379..ff32252bad59 100644
--- a/net/ipv4/netfilter/ipt_REJECT.c
+++ b/net/ipv4/netfilter/ipt_REJECT.c
@@ -9,7 +9,7 @@
  * it under the terms of the GNU General Public License version 2 as
  * published by the Free Software Foundation.
  */
-
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
 #include <linux/module.h>
 #include <linux/skbuff.h>
 #include <linux/ip.h>
@@ -180,13 +180,13 @@ static bool reject_tg_check(const struct xt_tgchk_param *par)
 	const struct ipt_entry *e = par->entryinfo;
 
 	if (rejinfo->with == IPT_ICMP_ECHOREPLY) {
-		printk("ipt_REJECT: ECHOREPLY no longer supported.\n");
+		pr_info("ECHOREPLY no longer supported.\n");
 		return false;
 	} else if (rejinfo->with == IPT_TCP_RESET) {
 		/* Must specify that it's a TCP packet */
 		if (e->ip.proto != IPPROTO_TCP ||
 		    (e->ip.invflags & XT_INV_PROTO)) {
-			printk("ipt_REJECT: TCP_RESET invalid for non-tcp\n");
+			pr_info("TCP_RESET invalid for non-tcp\n");
 			return false;
 		}
 	}
diff --git a/net/ipv4/netfilter/ipt_ULOG.c b/net/ipv4/netfilter/ipt_ULOG.c
index 09a5d3f7cc41..d926201560dd 100644
--- a/net/ipv4/netfilter/ipt_ULOG.c
+++ b/net/ipv4/netfilter/ipt_ULOG.c
@@ -29,7 +29,7 @@
  *   Specify, after how many hundredths of a second the queue should be
  *   flushed even if it is not full yet.
  */
-
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
 #include <linux/module.h>
 #include <linux/spinlock.h>
 #include <linux/socket.h>
@@ -56,8 +56,6 @@ MODULE_ALIAS_NET_PF_PROTO(PF_NETLINK, NETLINK_NFLOG);
 #define ULOG_NL_EVENT		111		/* Harald's favorite number */
 #define ULOG_MAXNLGROUPS	32		/* numer of nlgroups */
 
-#define PRINTR(format, args...) do { if (net_ratelimit()) printk(format , ## args); } while (0)
-
 static unsigned int nlbufsiz = NLMSG_GOODSIZE;
 module_param(nlbufsiz, uint, 0400);
 MODULE_PARM_DESC(nlbufsiz, "netlink buffer size");
@@ -90,12 +88,12 @@ static void ulog_send(unsigned int nlgroupnum)
 	ulog_buff_t *ub = &ulog_buffers[nlgroupnum];
 
 	if (timer_pending(&ub->timer)) {
-		pr_debug("ipt_ULOG: ulog_send: timer was pending, deleting\n");
+		pr_debug("ulog_send: timer was pending, deleting\n");
 		del_timer(&ub->timer);
 	}
 
 	if (!ub->skb) {
-		pr_debug("ipt_ULOG: ulog_send: nothing to send\n");
+		pr_debug("ulog_send: nothing to send\n");
 		return;
 	}
 
@@ -104,7 +102,7 @@ static void ulog_send(unsigned int nlgroupnum)
 		ub->lastnlh->nlmsg_type = NLMSG_DONE;
 
 	NETLINK_CB(ub->skb).dst_group = nlgroupnum + 1;
-	pr_debug("ipt_ULOG: throwing %d packets to netlink group %u\n",
+	pr_debug("throwing %d packets to netlink group %u\n",
 		 ub->qlen, nlgroupnum + 1);
 	netlink_broadcast(nflognl, ub->skb, 0, nlgroupnum + 1, GFP_ATOMIC);
 
@@ -117,7 +115,7 @@ static void ulog_send(unsigned int nlgroupnum)
 /* timer function to flush queue in flushtimeout time */
 static void ulog_timer(unsigned long data)
 {
-	pr_debug("ipt_ULOG: timer function called, calling ulog_send\n");
+	pr_debug("timer function called, calling ulog_send\n");
 
 	/* lock to protect against somebody modifying our structure
 	 * from ipt_ulog_target at the same time */
@@ -138,7 +136,7 @@ static struct sk_buff *ulog_alloc_skb(unsigned int size)
 	n = max(size, nlbufsiz);
 	skb = alloc_skb(n, GFP_ATOMIC);
 	if (!skb) {
-		PRINTR("ipt_ULOG: can't alloc whole buffer %ub!\n", n);
+		pr_debug("cannot alloc whole buffer %ub!\n", n);
 
 		if (n > size) {
 			/* try to allocate only as much as we need for
@@ -146,8 +144,7 @@ static struct sk_buff *ulog_alloc_skb(unsigned int size)
 
 			skb = alloc_skb(size, GFP_ATOMIC);
 			if (!skb)
-				PRINTR("ipt_ULOG: can't even allocate %ub\n",
-				       size);
+				pr_debug("cannot even allocate %ub\n", size);
 		}
 	}
 
@@ -198,8 +195,7 @@ static void ipt_ulog_packet(unsigned int hooknum,
 			goto alloc_failure;
 	}
 
-	pr_debug("ipt_ULOG: qlen %d, qthreshold %Zu\n", ub->qlen,
-		 loginfo->qthreshold);
+	pr_debug("qlen %d, qthreshold %Zu\n", ub->qlen, loginfo->qthreshold);
 
 	/* NLMSG_PUT contains a hidden goto nlmsg_failure !!! */
 	nlh = NLMSG_PUT(ub->skb, 0, ub->qlen, ULOG_NL_EVENT,
@@ -272,11 +268,9 @@ static void ipt_ulog_packet(unsigned int hooknum,
 	return;
 
 nlmsg_failure:
-	PRINTR("ipt_ULOG: error during NLMSG_PUT\n");
-
+	pr_debug("error during NLMSG_PUT\n");
 alloc_failure:
-	PRINTR("ipt_ULOG: Error building netlink message\n");
-
+	pr_debug("Error building netlink message\n");
 	spin_unlock_bh(&ulog_lock);
 }
 
@@ -318,12 +312,11 @@ static bool ulog_tg_check(const struct xt_tgchk_param *par)
 	const struct ipt_ulog_info *loginfo = par->targinfo;
 
 	if (loginfo->prefix[sizeof(loginfo->prefix) - 1] != '\0') {
-		pr_debug("ipt_ULOG: prefix term %i\n",
-			 loginfo->prefix[sizeof(loginfo->prefix) - 1]);
+		pr_debug("prefix not null-terminated\n");
 		return false;
 	}
 	if (loginfo->qthreshold > ULOG_MAX_QLEN) {
-		pr_debug("ipt_ULOG: queue threshold %Zu > MAX_QLEN\n",
+		pr_debug("queue threshold %Zu > MAX_QLEN\n",
 			 loginfo->qthreshold);
 		return false;
 	}
@@ -389,10 +382,10 @@ static int __init ulog_tg_init(void)
 {
 	int ret, i;
 
-	pr_debug("ipt_ULOG: init module\n");
+	pr_debug("init module\n");
 
 	if (nlbufsiz > 128*1024) {
-		printk("Netlink buffer has to be <= 128kB\n");
+		pr_warning("Netlink buffer has to be <= 128kB\n");
 		return -EINVAL;
 	}
 
@@ -422,7 +415,7 @@ static void __exit ulog_tg_exit(void)
 	ulog_buff_t *ub;
 	int i;
 
-	pr_debug("ipt_ULOG: cleanup_module\n");
+	pr_debug("cleanup_module\n");
 
 	if (nflog)
 		nf_log_unregister(&ipt_ulog_logger);
diff --git a/net/ipv4/netfilter/ipt_addrtype.c b/net/ipv4/netfilter/ipt_addrtype.c
index 3b216be3bc9f..ea4f58a46c6e 100644
--- a/net/ipv4/netfilter/ipt_addrtype.c
+++ b/net/ipv4/netfilter/ipt_addrtype.c
@@ -8,7 +8,7 @@
  *  it under the terms of the GNU General Public License version 2 as
  *  published by the Free Software Foundation.
  */
-
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/skbuff.h>
@@ -76,24 +76,24 @@ static bool addrtype_mt_checkentry_v1(const struct xt_mtchk_param *par)
 
 	if (info->flags & IPT_ADDRTYPE_LIMIT_IFACE_IN &&
 	    info->flags & IPT_ADDRTYPE_LIMIT_IFACE_OUT) {
-		printk(KERN_ERR "ipt_addrtype: both incoming and outgoing "
-				"interface limitation cannot be selected\n");
+		pr_info("both incoming and outgoing "
+			"interface limitation cannot be selected\n");
 		return false;
 	}
 
 	if (par->hook_mask & ((1 << NF_INET_PRE_ROUTING) |
 	    (1 << NF_INET_LOCAL_IN)) &&
 	    info->flags & IPT_ADDRTYPE_LIMIT_IFACE_OUT) {
-		printk(KERN_ERR "ipt_addrtype: output interface limitation "
-				"not valid in PRE_ROUTING and INPUT\n");
+		pr_info("output interface limitation "
+			"not valid in PREROUTING and INPUT\n");
 		return false;
 	}
 
 	if (par->hook_mask & ((1 << NF_INET_POST_ROUTING) |
 	    (1 << NF_INET_LOCAL_OUT)) &&
 	    info->flags & IPT_ADDRTYPE_LIMIT_IFACE_IN) {
-		printk(KERN_ERR "ipt_addrtype: input interface limitation "
-				"not valid in POST_ROUTING and OUTPUT\n");
+		pr_info("input interface limitation "
+			"not valid in POSTROUTING and OUTPUT\n");
 		return false;
 	}
 
diff --git a/net/ipv4/netfilter/ipt_ecn.c b/net/ipv4/netfilter/ipt_ecn.c
index 2a1e56b71908..e661108c73f1 100644
--- a/net/ipv4/netfilter/ipt_ecn.c
+++ b/net/ipv4/netfilter/ipt_ecn.c
@@ -6,7 +6,7 @@
  * it under the terms of the GNU General Public License version 2 as
  * published by the Free Software Foundation.
  */
-
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
 #include <linux/in.h>
 #include <linux/ip.h>
 #include <net/ip.h>
@@ -98,8 +98,7 @@ static bool ecn_mt_check(const struct xt_mtchk_param *par)
 
 	if (info->operation & (IPT_ECN_OP_MATCH_ECE|IPT_ECN_OP_MATCH_CWR) &&
 	    ip->proto != IPPROTO_TCP) {
-		printk(KERN_WARNING "ipt_ecn: can't match TCP bits in rule for"
-		       " non-tcp packets\n");
+		pr_info("cannot match TCP bits in rule for non-tcp packets\n");
 		return false;
 	}
 
diff --git a/net/ipv4/netfilter/nf_nat_rule.c b/net/ipv4/netfilter/nf_nat_rule.c
index ab74cc0535e2..7d6345e416c7 100644
--- a/net/ipv4/netfilter/nf_nat_rule.c
+++ b/net/ipv4/netfilter/nf_nat_rule.c
@@ -7,6 +7,7 @@
  */
 
 /* Everything about the rules for NAT. */
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
 #include <linux/types.h>
 #include <linux/ip.h>
 #include <linux/netfilter.h>
@@ -79,7 +80,7 @@ static bool ipt_snat_checkentry(const struct xt_tgchk_param *par)
 
 	/* Must be a valid range */
 	if (mr->rangesize != 1) {
-		printk("SNAT: multiple ranges no longer supported\n");
+		pr_info("SNAT: multiple ranges no longer supported\n");
 		return false;
 	}
 	return true;
@@ -91,7 +92,7 @@ static bool ipt_dnat_checkentry(const struct xt_tgchk_param *par)
 
 	/* Must be a valid range */
 	if (mr->rangesize != 1) {
-		printk("DNAT: multiple ranges no longer supported\n");
+		pr_info("DNAT: multiple ranges no longer supported\n");
 		return false;
 	}
 	return true;
diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c
index 9210e312edf1..1b2414e03a34 100644
--- a/net/ipv6/netfilter/ip6_tables.c
+++ b/net/ipv6/netfilter/ip6_tables.c
@@ -40,13 +40,13 @@ MODULE_DESCRIPTION("IPv6 packet filter");
 /*#define DEBUG_IP_FIREWALL_USER*/
 
 #ifdef DEBUG_IP_FIREWALL
-#define dprintf(format, args...)  printk(format , ## args)
+#define dprintf(format, args...) pr_info(format , ## args)
 #else
 #define dprintf(format, args...)
 #endif
 
 #ifdef DEBUG_IP_FIREWALL_USER
-#define duprintf(format, args...) printk(format , ## args)
+#define duprintf(format, args...) pr_info(format , ## args)
 #else
 #define duprintf(format, args...)
 #endif
@@ -200,8 +200,7 @@ static unsigned int
 ip6t_error(struct sk_buff *skb, const struct xt_target_param *par)
 {
 	if (net_ratelimit())
-		printk("ip6_tables: error: `%s'\n",
-		       (const char *)par->targinfo);
+		pr_info("error: `%s'\n", (const char *)par->targinfo);
 
 	return NF_DROP;
 }
@@ -2308,7 +2307,7 @@ static int __init ip6_tables_init(void)
 	if (ret < 0)
 		goto err5;
 
-	printk(KERN_INFO "ip6_tables: (C) 2000-2006 Netfilter Core Team\n");
+	pr_info("(C) 2000-2006 Netfilter Core Team\n");
 	return 0;
 
 err5:
diff --git a/net/ipv6/netfilter/ip6t_LOG.c b/net/ipv6/netfilter/ip6t_LOG.c
index e16c0c7d086d..5a79883220e0 100644
--- a/net/ipv6/netfilter/ip6t_LOG.c
+++ b/net/ipv6/netfilter/ip6t_LOG.c
@@ -9,7 +9,7 @@
  * it under the terms of the GNU General Public License version 2 as
  * published by the Free Software Foundation.
  */
-
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
 #include <linux/module.h>
 #include <linux/skbuff.h>
 #include <linux/if_arp.h>
@@ -456,12 +456,11 @@ static bool log_tg6_check(const struct xt_tgchk_param *par)
 	const struct ip6t_log_info *loginfo = par->targinfo;
 
 	if (loginfo->level >= 8) {
-		pr_debug("LOG: level %u >= 8\n", loginfo->level);
+		pr_debug("level %u >= 8\n", loginfo->level);
 		return false;
 	}
 	if (loginfo->prefix[sizeof(loginfo->prefix)-1] != '\0') {
-		pr_debug("LOG: prefix term %i\n",
-			 loginfo->prefix[sizeof(loginfo->prefix)-1]);
+		pr_debug("prefix not null-terminated\n");
 		return false;
 	}
 	return true;
diff --git a/net/ipv6/netfilter/ip6t_REJECT.c b/net/ipv6/netfilter/ip6t_REJECT.c
index dd8afbaf00a8..45efb9f38fcb 100644
--- a/net/ipv6/netfilter/ip6t_REJECT.c
+++ b/net/ipv6/netfilter/ip6t_REJECT.c
@@ -14,7 +14,7 @@
  * as published by the Free Software Foundation; either version
  * 2 of the License, or (at your option) any later version.
  */
-
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
 #include <linux/module.h>
 #include <linux/skbuff.h>
 #include <linux/icmpv6.h>
@@ -49,7 +49,7 @@ static void send_reset(struct net *net, struct sk_buff *oldskb)
 
 	if ((!(ipv6_addr_type(&oip6h->saddr) & IPV6_ADDR_UNICAST)) ||
 	    (!(ipv6_addr_type(&oip6h->daddr) & IPV6_ADDR_UNICAST))) {
-		pr_debug("ip6t_REJECT: addr is not unicast.\n");
+		pr_debug("addr is not unicast.\n");
 		return;
 	}
 
@@ -57,7 +57,7 @@ static void send_reset(struct net *net, struct sk_buff *oldskb)
 	tcphoff = ipv6_skip_exthdr(oldskb, ((u8*)(oip6h+1) - oldskb->data), &proto);
 
 	if ((tcphoff < 0) || (tcphoff > oldskb->len)) {
-		pr_debug("ip6t_REJECT: Can't get TCP header.\n");
+		pr_debug("Cannot get TCP header.\n");
 		return;
 	}
 
@@ -65,7 +65,7 @@ static void send_reset(struct net *net, struct sk_buff *oldskb)
 
 	/* IP header checks: fragment, too short. */
 	if (proto != IPPROTO_TCP || otcplen < sizeof(struct tcphdr)) {
-		pr_debug("ip6t_REJECT: proto(%d) != IPPROTO_TCP, "
+		pr_debug("proto(%d) != IPPROTO_TCP, "
 			 "or too short. otcplen = %d\n",
 			 proto, otcplen);
 		return;
@@ -76,14 +76,14 @@ static void send_reset(struct net *net, struct sk_buff *oldskb)
 
 	/* No RST for RST. */
 	if (otcph.rst) {
-		pr_debug("ip6t_REJECT: RST is set\n");
+		pr_debug("RST is set\n");
 		return;
 	}
 
 	/* Check checksum. */
 	if (csum_ipv6_magic(&oip6h->saddr, &oip6h->daddr, otcplen, IPPROTO_TCP,
 			    skb_checksum(oldskb, tcphoff, otcplen, 0))) {
-		pr_debug("ip6t_REJECT: TCP checksum is invalid\n");
+		pr_debug("TCP checksum is invalid\n");
 		return;
 	}
 
@@ -107,7 +107,7 @@ static void send_reset(struct net *net, struct sk_buff *oldskb)
 
 	if (!nskb) {
 		if (net_ratelimit())
-			printk("ip6t_REJECT: Can't alloc skb\n");
+			pr_debug("cannot alloc skb\n");
 		dst_release(dst);
 		return;
 	}
@@ -206,7 +206,7 @@ reject_tg6(struct sk_buff *skb, const struct xt_target_param *par)
 		break;
 	default:
 		if (net_ratelimit())
-			printk(KERN_WARNING "ip6t_REJECT: case %u not handled yet\n", reject->with);
+			pr_info("case %u not handled yet\n", reject->with);
 		break;
 	}
 
@@ -219,13 +219,13 @@ static bool reject_tg6_check(const struct xt_tgchk_param *par)
 	const struct ip6t_entry *e = par->entryinfo;
 
 	if (rejinfo->with == IP6T_ICMP6_ECHOREPLY) {
-		printk("ip6t_REJECT: ECHOREPLY is not supported.\n");
+		pr_info("ECHOREPLY is not supported.\n");
 		return false;
 	} else if (rejinfo->with == IP6T_TCP_RESET) {
 		/* Must specify that it's a TCP packet */
 		if (e->ipv6.proto != IPPROTO_TCP ||
 		    (e->ipv6.invflags & XT_INV_PROTO)) {
-			printk("ip6t_REJECT: TCP_RESET illegal for non-tcp\n");
+			pr_info("TCP_RESET illegal for non-tcp\n");
 			return false;
 		}
 	}
diff --git a/net/ipv6/netfilter/ip6t_ah.c b/net/ipv6/netfilter/ip6t_ah.c
index ac0b7c629d78..4429bfd39e11 100644
--- a/net/ipv6/netfilter/ip6t_ah.c
+++ b/net/ipv6/netfilter/ip6t_ah.c
@@ -6,7 +6,7 @@
  * it under the terms of the GNU General Public License version 2 as
  * published by the Free Software Foundation.
  */
-
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
 #include <linux/module.h>
 #include <linux/skbuff.h>
 #include <linux/ip.h>
@@ -29,7 +29,7 @@ spi_match(u_int32_t min, u_int32_t max, u_int32_t spi, bool invert)
 {
 	bool r;
 
-	pr_debug("ah spi_match:%c 0x%x <= 0x%x <= 0x%x",
+	pr_debug("spi_match:%c 0x%x <= 0x%x <= 0x%x\n",
 		 invert ? '!' : ' ', min, spi, max);
 	r = (spi >= min && spi <= max) ^ invert;
 	pr_debug(" result %s\n", r ? "PASS" : "FAILED");
@@ -92,7 +92,7 @@ static bool ah_mt6_check(const struct xt_mtchk_param *par)
 	const struct ip6t_ah *ahinfo = par->matchinfo;
 
 	if (ahinfo->invflags & ~IP6T_AH_INV_MASK) {
-		pr_debug("ip6t_ah: unknown flags %X\n", ahinfo->invflags);
+		pr_debug("unknown flags %X\n", ahinfo->invflags);
 		return false;
 	}
 	return true;
diff --git a/net/ipv6/netfilter/ip6t_frag.c b/net/ipv6/netfilter/ip6t_frag.c
index 7b91c2598ed5..5c0da913b4ab 100644
--- a/net/ipv6/netfilter/ip6t_frag.c
+++ b/net/ipv6/netfilter/ip6t_frag.c
@@ -6,7 +6,7 @@
  * it under the terms of the GNU General Public License version 2 as
  * published by the Free Software Foundation.
  */
-
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
 #include <linux/module.h>
 #include <linux/skbuff.h>
 #include <linux/ipv6.h>
@@ -27,7 +27,7 @@ static inline bool
 id_match(u_int32_t min, u_int32_t max, u_int32_t id, bool invert)
 {
 	bool r;
-	pr_debug("frag id_match:%c 0x%x <= 0x%x <= 0x%x", invert ? '!' : ' ',
+	pr_debug("id_match:%c 0x%x <= 0x%x <= 0x%x\n", invert ? '!' : ' ',
 		 min, id, max);
 	r = (id >= min && id <= max) ^ invert;
 	pr_debug(" result %s\n", r ? "PASS" : "FAILED");
@@ -107,7 +107,7 @@ static bool frag_mt6_check(const struct xt_mtchk_param *par)
 	const struct ip6t_frag *fraginfo = par->matchinfo;
 
 	if (fraginfo->invflags & ~IP6T_FRAG_INV_MASK) {
-		pr_debug("ip6t_frag: unknown flags %X\n", fraginfo->invflags);
+		pr_debug("unknown flags %X\n", fraginfo->invflags);
 		return false;
 	}
 	return true;
diff --git a/net/ipv6/netfilter/ip6t_hbh.c b/net/ipv6/netfilter/ip6t_hbh.c
index 82593c8bdc3e..f4b73889d00a 100644
--- a/net/ipv6/netfilter/ip6t_hbh.c
+++ b/net/ipv6/netfilter/ip6t_hbh.c
@@ -6,7 +6,7 @@
  * it under the terms of the GNU General Public License version 2 as
  * published by the Free Software Foundation.
  */
-
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
 #include <linux/module.h>
 #include <linux/skbuff.h>
 #include <linux/ipv6.h>
@@ -169,12 +169,12 @@ static bool hbh_mt6_check(const struct xt_mtchk_param *par)
 	const struct ip6t_opts *optsinfo = par->matchinfo;
 
 	if (optsinfo->invflags & ~IP6T_OPTS_INV_MASK) {
-		pr_debug("ip6t_opts: unknown flags %X\n", optsinfo->invflags);
+		pr_debug("unknown flags %X\n", optsinfo->invflags);
 		return false;
 	}
 
 	if (optsinfo->flags & IP6T_OPTS_NSTRICT) {
-		pr_debug("ip6t_opts: Not strict - not implemented");
+		pr_debug("Not strict - not implemented");
 		return false;
 	}
 
diff --git a/net/ipv6/netfilter/ip6t_rt.c b/net/ipv6/netfilter/ip6t_rt.c
index b77307fc8743..c58d65336577 100644
--- a/net/ipv6/netfilter/ip6t_rt.c
+++ b/net/ipv6/netfilter/ip6t_rt.c
@@ -6,7 +6,7 @@
  * it under the terms of the GNU General Public License version 2 as
  * published by the Free Software Foundation.
  */
-
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
 #include <linux/module.h>
 #include <linux/skbuff.h>
 #include <linux/ipv6.h>
@@ -29,7 +29,7 @@ static inline bool
 segsleft_match(u_int32_t min, u_int32_t max, u_int32_t id, bool invert)
 {
 	bool r;
-	pr_debug("rt segsleft_match:%c 0x%x <= 0x%x <= 0x%x",
+	pr_debug("segsleft_match:%c 0x%x <= 0x%x <= 0x%x\n",
 		 invert ? '!' : ' ', min, id, max);
 	r = (id >= min && id <= max) ^ invert;
 	pr_debug(" result %s\n", r ? "PASS" : "FAILED");
@@ -188,7 +188,7 @@ static bool rt_mt6_check(const struct xt_mtchk_param *par)
 	const struct ip6t_rt *rtinfo = par->matchinfo;
 
 	if (rtinfo->invflags & ~IP6T_RT_INV_MASK) {
-		pr_debug("ip6t_rt: unknown flags %X\n", rtinfo->invflags);
+		pr_debug("unknown flags %X\n", rtinfo->invflags);
 		return false;
 	}
 	if ((rtinfo->flags & (IP6T_RT_RES | IP6T_RT_FST_MASK)) &&
diff --git a/net/netfilter/xt_TCPMSS.c b/net/netfilter/xt_TCPMSS.c
index 2077da31c973..45161d9a9f23 100644
--- a/net/netfilter/xt_TCPMSS.c
+++ b/net/netfilter/xt_TCPMSS.c
@@ -67,15 +67,14 @@ tcpmss_mangle_packet(struct sk_buff *skb,
 	if (info->mss == XT_TCPMSS_CLAMP_PMTU) {
 		if (dst_mtu(skb_dst(skb)) <= minlen) {
 			if (net_ratelimit())
-				pr_err("xt_TCPMSS: "
-				       "unknown or invalid path-MTU (%u)\n",
+				pr_err("unknown or invalid path-MTU (%u)\n",
 				       dst_mtu(skb_dst(skb)));
 			return -1;
 		}
 		if (in_mtu <= minlen) {
 			if (net_ratelimit())
-				pr_err("xt_TCPMSS: unknown or "
-				       "invalid path-MTU (%u)\n", in_mtu);
+				pr_err("unknown or invalid path-MTU (%u)\n",
+				       in_mtu);
 			return -1;
 		}
 		newmss = min(dst_mtu(skb_dst(skb)), in_mtu) - minlen;
diff --git a/net/netfilter/xt_TPROXY.c b/net/netfilter/xt_TPROXY.c
index 1340c2fa3621..e9244fdc123a 100644
--- a/net/netfilter/xt_TPROXY.c
+++ b/net/netfilter/xt_TPROXY.c
@@ -9,7 +9,7 @@
  * published by the Free Software Foundation.
  *
  */
-
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
 #include <linux/module.h>
 #include <linux/skbuff.h>
 #include <linux/ip.h>
@@ -67,7 +67,7 @@ static bool tproxy_tg_check(const struct xt_tgchk_param *par)
 	    && !(i->invflags & IPT_INV_PROTO))
 		return true;
 
-	pr_info("xt_TPROXY: Can be used only in combination with "
+	pr_info("Can be used only in combination with "
 		"either -p tcp or -p udp\n");
 	return false;
 }
diff --git a/net/netfilter/xt_esp.c b/net/netfilter/xt_esp.c
index f9deecbef875..1a446d626769 100644
--- a/net/netfilter/xt_esp.c
+++ b/net/netfilter/xt_esp.c
@@ -29,7 +29,7 @@ static inline bool
 spi_match(u_int32_t min, u_int32_t max, u_int32_t spi, bool invert)
 {
 	bool r;
-	pr_debug("esp spi_match:%c 0x%x <= 0x%x <= 0x%x\n",
+	pr_debug("spi_match:%c 0x%x <= 0x%x <= 0x%x\n",
 		 invert ? '!' : ' ', min, spi, max);
 	r = (spi >= min && spi <= max) ^ invert;
 	pr_debug(" result %s\n", r ? "PASS" : "FAILED");
diff --git a/net/netfilter/xt_iprange.c b/net/netfilter/xt_iprange.c
index ffc96387d556..8471d9715bde 100644
--- a/net/netfilter/xt_iprange.c
+++ b/net/netfilter/xt_iprange.c
@@ -8,6 +8,7 @@
  *	it under the terms of the GNU General Public License version 2 as
  *	published by the Free Software Foundation.
  */
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
 #include <linux/module.h>
 #include <linux/skbuff.h>
 #include <linux/ip.h>
diff --git a/net/netfilter/xt_recent.c b/net/netfilter/xt_recent.c
index aa9817e91338..72cbced48a8d 100644
--- a/net/netfilter/xt_recent.c
+++ b/net/netfilter/xt_recent.c
@@ -321,8 +321,8 @@ static bool recent_mt_check(const struct xt_mtchk_param *par)
 		hash_rnd_inited = true;
 	}
 	if (info->check_set & ~XT_RECENT_VALID_FLAGS) {
-		pr_info(KBUILD_MODNAME ": Unsupported user space flags "
-			"(%08x)\n", info->check_set);
+		pr_info("Unsupported user space flags (%08x)\n",
+			info->check_set);
 		return false;
 	}
 	if (hweight8(info->check_set &
@@ -336,7 +336,7 @@ static bool recent_mt_check(const struct xt_mtchk_param *par)
 	if ((info->check_set & XT_RECENT_REAP) && !info->seconds)
 		return false;
 	if (info->hit_count > ip_pkt_list_tot) {
-		pr_info(KBUILD_MODNAME ": hitcount (%u) is larger than "
+		pr_info("hitcount (%u) is larger than "
 			"packets to be remembered (%u)\n",
 			info->hit_count, ip_pkt_list_tot);
 		return false;
diff --git a/net/netfilter/xt_socket.c b/net/netfilter/xt_socket.c
index 6a902564d24f..a9b16867e1f7 100644
--- a/net/netfilter/xt_socket.c
+++ b/net/netfilter/xt_socket.c
@@ -9,7 +9,7 @@
  * published by the Free Software Foundation.
  *
  */
-
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
 #include <linux/module.h>
 #include <linux/skbuff.h>
 #include <linux/netfilter/x_tables.h>
@@ -165,8 +165,7 @@ socket_match(const struct sk_buff *skb, const struct xt_match_param *par,
 			sk = NULL;
 	}
 
-	pr_debug("socket match: proto %u %08x:%u -> %08x:%u "
-		 "(orig %08x:%u) sock %p\n",
+	pr_debug("proto %u %08x:%u -> %08x:%u (orig %08x:%u) sock %p\n",
 		 protocol, ntohl(saddr), ntohs(sport),
 		 ntohl(daddr), ntohs(dport),
 		 ntohl(iph->daddr), hp ? ntohs(hp->dest) : 0, sk);
diff --git a/net/netfilter/xt_time.c b/net/netfilter/xt_time.c
index 9a9c9a3b0a5d..45ed05b5161f 100644
--- a/net/netfilter/xt_time.c
+++ b/net/netfilter/xt_time.c
@@ -223,8 +223,8 @@ static bool time_mt_check(const struct xt_mtchk_param *par)
 
 	if (info->daytime_start > XT_TIME_MAX_DAYTIME ||
 	    info->daytime_stop > XT_TIME_MAX_DAYTIME) {
-		printk(KERN_WARNING "xt_time: invalid argument - start or "
-		       "stop time greater than 23:59:59\n");
+		pr_info("invalid argument - start or "
+			"stop time greater than 23:59:59\n");
 		return false;
 	}
 
-- 
cgit v1.2.2


From d2a7b6bad2c38e41eddb0b24d03627d9e7aa3f7b Mon Sep 17 00:00:00 2001
From: Jan Engelhardt <jengelh@medozas.de>
Date: Fri, 10 Jul 2009 18:55:11 +0200
Subject: netfilter: xtables: make use of xt_request_find_target

Signed-off-by: Jan Engelhardt <jengelh@medozas.de>
---
 net/bridge/netfilter/ebtables.c | 13 ++-----------
 net/ipv4/netfilter/arp_tables.c | 20 ++++++++------------
 net/ipv4/netfilter/ip_tables.c  | 20 ++++++++------------
 net/ipv6/netfilter/ip6_tables.c | 20 ++++++++------------
 net/netfilter/x_tables.c        |  4 +---
 net/sched/act_ipt.c             |  4 ++--
 6 files changed, 29 insertions(+), 52 deletions(-)

(limited to 'net')

diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c
index 989d72cc8148..6d3b256d2f61 100644
--- a/net/bridge/netfilter/ebtables.c
+++ b/net/bridge/netfilter/ebtables.c
@@ -395,13 +395,9 @@ ebt_check_watcher(struct ebt_entry_watcher *w, struct xt_tgchk_param *par,
 	   left - sizeof(struct ebt_entry_watcher) < w->watcher_size)
 		return -EINVAL;
 
-	watcher = try_then_request_module(
-		  xt_find_target(NFPROTO_BRIDGE, w->u.name, 0),
-		  "ebt_%s", w->u.name);
+	watcher = xt_request_find_target(NFPROTO_BRIDGE, w->u.name, 0);
 	if (IS_ERR(watcher))
 		return PTR_ERR(watcher);
-	if (watcher == NULL)
-		return -ENOENT;
 	w->u.watcher = watcher;
 
 	par->target   = watcher;
@@ -714,15 +710,10 @@ ebt_check_entry(struct ebt_entry *e, struct net *net,
 	t = (struct ebt_entry_target *)(((char *)e) + e->target_offset);
 	gap = e->next_offset - e->target_offset;
 
-	target = try_then_request_module(
-		 xt_find_target(NFPROTO_BRIDGE, t->u.name, 0),
-		 "ebt_%s", t->u.name);
+	target = xt_request_find_target(NFPROTO_BRIDGE, t->u.name, 0);
 	if (IS_ERR(target)) {
 		ret = PTR_ERR(target);
 		goto cleanup_watchers;
-	} else if (target == NULL) {
-		ret = -ENOENT;
-		goto cleanup_watchers;
 	}
 
 	t->u.target = target;
diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c
index f07d77f65751..e8e363d90365 100644
--- a/net/ipv4/netfilter/arp_tables.c
+++ b/net/ipv4/netfilter/arp_tables.c
@@ -523,13 +523,11 @@ find_check_entry(struct arpt_entry *e, const char *name, unsigned int size)
 		return ret;
 
 	t = arpt_get_target(e);
-	target = try_then_request_module(xt_find_target(NFPROTO_ARP,
-							t->u.user.name,
-							t->u.user.revision),
-					 "arpt_%s", t->u.user.name);
-	if (IS_ERR(target) || !target) {
+	target = xt_request_find_target(NFPROTO_ARP, t->u.user.name,
+					t->u.user.revision);
+	if (IS_ERR(target)) {
 		duprintf("find_check_entry: `%s' not found\n", t->u.user.name);
-		ret = target ? PTR_ERR(target) : -ENOENT;
+		ret = PTR_ERR(target);
 		goto out;
 	}
 	t->u.kernel.target = target;
@@ -1252,14 +1250,12 @@ check_compat_entry_size_and_hooks(struct compat_arpt_entry *e,
 	entry_offset = (void *)e - (void *)base;
 
 	t = compat_arpt_get_target(e);
-	target = try_then_request_module(xt_find_target(NFPROTO_ARP,
-							t->u.user.name,
-							t->u.user.revision),
-					 "arpt_%s", t->u.user.name);
-	if (IS_ERR(target) || !target) {
+	target = xt_request_find_target(NFPROTO_ARP, t->u.user.name,
+					t->u.user.revision);
+	if (IS_ERR(target)) {
 		duprintf("check_compat_entry_size_and_hooks: `%s' not found\n",
 			 t->u.user.name);
-		ret = target ? PTR_ERR(target) : -ENOENT;
+		ret = PTR_ERR(target);
 		goto out;
 	}
 	t->u.kernel.target = target;
diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c
index 73fdf20263ed..e24ec48ee8cd 100644
--- a/net/ipv4/netfilter/ip_tables.c
+++ b/net/ipv4/netfilter/ip_tables.c
@@ -701,13 +701,11 @@ find_check_entry(struct ipt_entry *e, struct net *net, const char *name,
 	}
 
 	t = ipt_get_target(e);
-	target = try_then_request_module(xt_find_target(AF_INET,
-							t->u.user.name,
-							t->u.user.revision),
-					 "ipt_%s", t->u.user.name);
-	if (IS_ERR(target) || !target) {
+	target = xt_request_find_target(NFPROTO_IPV4, t->u.user.name,
+					t->u.user.revision);
+	if (IS_ERR(target)) {
 		duprintf("find_check_entry: `%s' not found\n", t->u.user.name);
-		ret = target ? PTR_ERR(target) : -ENOENT;
+		ret = PTR_ERR(target);
 		goto cleanup_matches;
 	}
 	t->u.kernel.target = target;
@@ -1547,14 +1545,12 @@ check_compat_entry_size_and_hooks(struct compat_ipt_entry *e,
 	}
 
 	t = compat_ipt_get_target(e);
-	target = try_then_request_module(xt_find_target(AF_INET,
-							t->u.user.name,
-							t->u.user.revision),
-					 "ipt_%s", t->u.user.name);
-	if (IS_ERR(target) || !target) {
+	target = xt_request_find_target(NFPROTO_IPV4, t->u.user.name,
+					t->u.user.revision);
+	if (IS_ERR(target)) {
 		duprintf("check_compat_entry_size_and_hooks: `%s' not found\n",
 			 t->u.user.name);
-		ret = target ? PTR_ERR(target) : -ENOENT;
+		ret = PTR_ERR(target);
 		goto release_matches;
 	}
 	t->u.kernel.target = target;
diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c
index 1b2414e03a34..842bef374dcc 100644
--- a/net/ipv6/netfilter/ip6_tables.c
+++ b/net/ipv6/netfilter/ip6_tables.c
@@ -733,13 +733,11 @@ find_check_entry(struct ip6t_entry *e, struct net *net, const char *name,
 	}
 
 	t = ip6t_get_target(e);
-	target = try_then_request_module(xt_find_target(AF_INET6,
-							t->u.user.name,
-							t->u.user.revision),
-					 "ip6t_%s", t->u.user.name);
-	if (IS_ERR(target) || !target) {
+	target = xt_request_find_target(NFPROTO_IPV6, t->u.user.name,
+					t->u.user.revision);
+	if (IS_ERR(target)) {
 		duprintf("find_check_entry: `%s' not found\n", t->u.user.name);
-		ret = target ? PTR_ERR(target) : -ENOENT;
+		ret = PTR_ERR(target);
 		goto cleanup_matches;
 	}
 	t->u.kernel.target = target;
@@ -1581,14 +1579,12 @@ check_compat_entry_size_and_hooks(struct compat_ip6t_entry *e,
 	}
 
 	t = compat_ip6t_get_target(e);
-	target = try_then_request_module(xt_find_target(AF_INET6,
-							t->u.user.name,
-							t->u.user.revision),
-					 "ip6t_%s", t->u.user.name);
-	if (IS_ERR(target) || !target) {
+	target = xt_request_find_target(NFPROTO_IPV6, t->u.user.name,
+					t->u.user.revision);
+	if (IS_ERR(target)) {
 		duprintf("check_compat_entry_size_and_hooks: `%s' not found\n",
 			 t->u.user.name);
-		ret = target ? PTR_ERR(target) : -ENOENT;
+		ret = PTR_ERR(target);
 		goto release_matches;
 	}
 	t->u.kernel.target = target;
diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c
index 9a248d4a877f..bf2806afd920 100644
--- a/net/netfilter/x_tables.c
+++ b/net/netfilter/x_tables.c
@@ -250,9 +250,7 @@ struct xt_target *xt_request_find_target(u8 af, const char *name, u8 revision)
 
 	target = try_then_request_module(xt_find_target(af, name, revision),
 					 "%st_%s", xt_prefix[af], name);
-	if (IS_ERR(target) || !target)
-		return NULL;
-	return target;
+	return (target != NULL) ? target : ERR_PTR(-ENOENT);
 }
 EXPORT_SYMBOL_GPL(xt_request_find_target);
 
diff --git a/net/sched/act_ipt.c b/net/sched/act_ipt.c
index 082c520b0def..b9f79c251d75 100644
--- a/net/sched/act_ipt.c
+++ b/net/sched/act_ipt.c
@@ -46,8 +46,8 @@ static int ipt_init_target(struct ipt_entry_target *t, char *table, unsigned int
 
 	target = xt_request_find_target(AF_INET, t->u.user.name,
 					t->u.user.revision);
-	if (!target)
-		return -ENOENT;
+	if (IS_ERR(target))
+		return PTR_ERR(target);
 
 	t->u.kernel.target = target;
 	par.table     = table;
-- 
cgit v1.2.2


From fd0ec0e6216baea854465bbdb177f2d1b2ccaf22 Mon Sep 17 00:00:00 2001
From: Jan Engelhardt <jengelh@medozas.de>
Date: Fri, 10 Jul 2009 19:27:47 +0200
Subject: netfilter: xtables: consolidate code into xt_request_find_match

Signed-off-by: Jan Engelhardt <jengelh@medozas.de>
---
 net/bridge/netfilter/ebtables.c |  5 +----
 net/ipv4/netfilter/ip_tables.c  | 18 ++++++++----------
 net/ipv6/netfilter/ip6_tables.c | 18 ++++++++----------
 net/netfilter/x_tables.c        | 11 +++++++++++
 4 files changed, 28 insertions(+), 24 deletions(-)

(limited to 'net')

diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c
index 6d3b256d2f61..c41f3fad0587 100644
--- a/net/bridge/netfilter/ebtables.c
+++ b/net/bridge/netfilter/ebtables.c
@@ -361,12 +361,9 @@ ebt_check_match(struct ebt_entry_match *m, struct xt_mtchk_param *par,
 	    left - sizeof(struct ebt_entry_match) < m->match_size)
 		return -EINVAL;
 
-	match = try_then_request_module(xt_find_match(NFPROTO_BRIDGE,
-		m->u.name, 0), "ebt_%s", m->u.name);
+	match = xt_request_find_match(NFPROTO_BRIDGE, m->u.name, 0);
 	if (IS_ERR(match))
 		return PTR_ERR(match);
-	if (match == NULL)
-		return -ENOENT;
 	m->u.match = match;
 
 	par->match     = match;
diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c
index e24ec48ee8cd..09f6567a85b7 100644
--- a/net/ipv4/netfilter/ip_tables.c
+++ b/net/ipv4/netfilter/ip_tables.c
@@ -629,12 +629,11 @@ find_check_match(struct ipt_entry_match *m, struct xt_mtchk_param *par)
 	struct xt_match *match;
 	int ret;
 
-	match = try_then_request_module(xt_find_match(AF_INET, m->u.user.name,
-						      m->u.user.revision),
-					"ipt_%s", m->u.user.name);
-	if (IS_ERR(match) || !match) {
+	match = xt_request_find_match(NFPROTO_IPV4, m->u.user.name,
+				      m->u.user.revision);
+	if (IS_ERR(match)) {
 		duprintf("find_check_match: `%s' not found\n", m->u.user.name);
-		return match ? PTR_ERR(match) : -ENOENT;
+		return PTR_ERR(match);
 	}
 	m->u.kernel.match = match;
 
@@ -1472,13 +1471,12 @@ compat_find_calc_match(struct ipt_entry_match *m,
 {
 	struct xt_match *match;
 
-	match = try_then_request_module(xt_find_match(AF_INET, m->u.user.name,
-						      m->u.user.revision),
-					"ipt_%s", m->u.user.name);
-	if (IS_ERR(match) || !match) {
+	match = xt_request_find_match(NFPROTO_IPV4, m->u.user.name,
+				      m->u.user.revision);
+	if (IS_ERR(match)) {
 		duprintf("compat_check_calc_match: `%s' not found\n",
 			 m->u.user.name);
-		return match ? PTR_ERR(match) : -ENOENT;
+		return PTR_ERR(match);
 	}
 	m->u.kernel.match = match;
 	*size += xt_compat_match_offset(match);
diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c
index 842bef374dcc..41e2429c0163 100644
--- a/net/ipv6/netfilter/ip6_tables.c
+++ b/net/ipv6/netfilter/ip6_tables.c
@@ -660,12 +660,11 @@ find_check_match(struct ip6t_entry_match *m, struct xt_mtchk_param *par)
 	struct xt_match *match;
 	int ret;
 
-	match = try_then_request_module(xt_find_match(AF_INET6, m->u.user.name,
-						      m->u.user.revision),
-					"ip6t_%s", m->u.user.name);
-	if (IS_ERR(match) || !match) {
+	match = xt_request_find_match(NFPROTO_IPV6, m->u.user.name,
+				      m->u.user.revision);
+	if (IS_ERR(match)) {
 		duprintf("find_check_match: `%s' not found\n", m->u.user.name);
-		return match ? PTR_ERR(match) : -ENOENT;
+		return PTR_ERR(match);
 	}
 	m->u.kernel.match = match;
 
@@ -1506,13 +1505,12 @@ compat_find_calc_match(struct ip6t_entry_match *m,
 {
 	struct xt_match *match;
 
-	match = try_then_request_module(xt_find_match(AF_INET6, m->u.user.name,
-						      m->u.user.revision),
-					"ip6t_%s", m->u.user.name);
-	if (IS_ERR(match) || !match) {
+	match = xt_request_find_match(NFPROTO_IPV6, m->u.user.name,
+				      m->u.user.revision);
+	if (IS_ERR(match)) {
 		duprintf("compat_check_calc_match: `%s' not found\n",
 			 m->u.user.name);
-		return match ? PTR_ERR(match) : -ENOENT;
+		return PTR_ERR(match);
 	}
 	m->u.kernel.match = match;
 	*size += xt_compat_match_offset(match);
diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c
index bf2806afd920..ee7fe215b3e1 100644
--- a/net/netfilter/x_tables.c
+++ b/net/netfilter/x_tables.c
@@ -214,6 +214,17 @@ struct xt_match *xt_find_match(u8 af, const char *name, u8 revision)
 }
 EXPORT_SYMBOL(xt_find_match);
 
+struct xt_match *
+xt_request_find_match(uint8_t nfproto, const char *name, uint8_t revision)
+{
+	struct xt_match *match;
+
+	match = try_then_request_module(xt_find_match(nfproto, name, revision),
+					"%st_%s", xt_prefix[nfproto], name);
+	return (match != NULL) ? match : ERR_PTR(-ENOENT);
+}
+EXPORT_SYMBOL_GPL(xt_request_find_match);
+
 /* Find target, grabs ref.  Returns ERR_PTR() on error. */
 struct xt_target *xt_find_target(u8 af, const char *name, u8 revision)
 {
-- 
cgit v1.2.2


From 5dc7a6d5749d3ddbf9fbea9512cb45762428512c Mon Sep 17 00:00:00 2001
From: Jan Engelhardt <jengelh@medozas.de>
Date: Fri, 19 Mar 2010 21:29:08 +0100
Subject: netfilter: xt_recent: allow changing ip_list_[ug]id at runtime

Signed-off-by: Jan Engelhardt <jengelh@medozas.de>
---
 net/netfilter/xt_recent.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'net')

diff --git a/net/netfilter/xt_recent.c b/net/netfilter/xt_recent.c
index 72cbced48a8d..85309448c5e7 100644
--- a/net/netfilter/xt_recent.c
+++ b/net/netfilter/xt_recent.c
@@ -51,14 +51,14 @@ module_param(ip_list_tot, uint, 0400);
 module_param(ip_pkt_list_tot, uint, 0400);
 module_param(ip_list_hash_size, uint, 0400);
 module_param(ip_list_perms, uint, 0400);
-module_param(ip_list_uid, uint, 0400);
-module_param(ip_list_gid, uint, 0400);
+module_param(ip_list_uid, uint, S_IRUGO | S_IWUSR);
+module_param(ip_list_gid, uint, S_IRUGO | S_IWUSR);
 MODULE_PARM_DESC(ip_list_tot, "number of IPs to remember per list");
 MODULE_PARM_DESC(ip_pkt_list_tot, "number of packets per IP address to remember (max. 255)");
 MODULE_PARM_DESC(ip_list_hash_size, "size of hash table used to look up IPs");
 MODULE_PARM_DESC(ip_list_perms, "permissions on /proc/net/xt_recent/* files");
-MODULE_PARM_DESC(ip_list_uid,"owner of /proc/net/xt_recent/* files");
-MODULE_PARM_DESC(ip_list_gid,"owning group of /proc/net/xt_recent/* files");
+MODULE_PARM_DESC(ip_list_uid, "default owner of /proc/net/xt_recent/* files");
+MODULE_PARM_DESC(ip_list_gid, "default owning group of /proc/net/xt_recent/* files");
 
 struct recent_entry {
 	struct list_head	list;
-- 
cgit v1.2.2


From 713aefa3fb3929ce36305d4d1b7b4059d87ed115 Mon Sep 17 00:00:00 2001
From: Jan Engelhardt <jengelh@medozas.de>
Date: Tue, 23 Mar 2010 04:07:21 +0100
Subject: netfilter: bridge: use NFPROTO values for NF_HOOK invocation

The first argument to NF_HOOK* is an nfproto since quite some time.
Commit v2.6.27-2457-gfdc9314 was the first to practically start using
the new names. Do that now for the remaining NF_HOOK calls.

The semantic patch used was:
// <smpl>
@@
@@
(NF_HOOK
|NF_HOOK_THRESH
)(
-PF_BRIDGE,
+NFPROTO_BRIDGE,
 ...)

@@
@@
 NF_HOOK(
-PF_INET6,
+NFPROTO_IPV6,
 ...)

@@
@@
 NF_HOOK(
-PF_INET,
+NFPROTO_IPV4,
 ...)
// </smpl>

Signed-off-by: Jan Engelhardt <jengelh@medozas.de>
---
 net/bridge/br_forward.c   | 10 +++++-----
 net/bridge/br_input.c     |  6 +++---
 net/bridge/br_multicast.c |  2 +-
 net/bridge/br_netfilter.c | 15 ++++++++-------
 net/bridge/br_stp_bpdu.c  |  2 +-
 5 files changed, 18 insertions(+), 17 deletions(-)

(limited to 'net')

diff --git a/net/bridge/br_forward.c b/net/bridge/br_forward.c
index 8dbec83e50ca..7ab52d07b477 100644
--- a/net/bridge/br_forward.c
+++ b/net/bridge/br_forward.c
@@ -58,7 +58,7 @@ int br_dev_queue_push_xmit(struct sk_buff *skb)
 
 int br_forward_finish(struct sk_buff *skb)
 {
-	return NF_HOOK(PF_BRIDGE, NF_BR_POST_ROUTING, skb, NULL, skb->dev,
+	return NF_HOOK(NFPROTO_BRIDGE, NF_BR_POST_ROUTING, skb, NULL, skb->dev,
 		       br_dev_queue_push_xmit);
 
 }
@@ -66,8 +66,8 @@ int br_forward_finish(struct sk_buff *skb)
 static void __br_deliver(const struct net_bridge_port *to, struct sk_buff *skb)
 {
 	skb->dev = to->dev;
-	NF_HOOK(PF_BRIDGE, NF_BR_LOCAL_OUT, skb, NULL, skb->dev,
-			br_forward_finish);
+	NF_HOOK(NFPROTO_BRIDGE, NF_BR_LOCAL_OUT, skb, NULL, skb->dev,
+		br_forward_finish);
 }
 
 static void __br_forward(const struct net_bridge_port *to, struct sk_buff *skb)
@@ -83,8 +83,8 @@ static void __br_forward(const struct net_bridge_port *to, struct sk_buff *skb)
 	skb->dev = to->dev;
 	skb_forward_csum(skb);
 
-	NF_HOOK(PF_BRIDGE, NF_BR_FORWARD, skb, indev, skb->dev,
-			br_forward_finish);
+	NF_HOOK(NFPROTO_BRIDGE, NF_BR_FORWARD, skb, indev, skb->dev,
+		br_forward_finish);
 }
 
 /* called with rcu_read_lock */
diff --git a/net/bridge/br_input.c b/net/bridge/br_input.c
index 333dfb7c5886..c9018fc72d24 100644
--- a/net/bridge/br_input.c
+++ b/net/bridge/br_input.c
@@ -32,7 +32,7 @@ static int br_pass_frame_up(struct sk_buff *skb)
 	indev = skb->dev;
 	skb->dev = brdev;
 
-	return NF_HOOK(PF_BRIDGE, NF_BR_LOCAL_IN, skb, indev, NULL,
+	return NF_HOOK(NFPROTO_BRIDGE, NF_BR_LOCAL_IN, skb, indev, NULL,
 		       netif_receive_skb);
 }
 
@@ -155,7 +155,7 @@ struct sk_buff *br_handle_frame(struct net_bridge_port *p, struct sk_buff *skb)
 		if (p->br->stp_enabled == BR_NO_STP && dest[5] == 0)
 			goto forward;
 
-		if (NF_HOOK(PF_BRIDGE, NF_BR_LOCAL_IN, skb, skb->dev,
+		if (NF_HOOK(NFPROTO_BRIDGE, NF_BR_LOCAL_IN, skb, skb->dev,
 			    NULL, br_handle_local_finish))
 			return NULL;	/* frame consumed by filter */
 		else
@@ -176,7 +176,7 @@ forward:
 		if (!compare_ether_addr(p->br->dev->dev_addr, dest))
 			skb->pkt_type = PACKET_HOST;
 
-		NF_HOOK(PF_BRIDGE, NF_BR_PRE_ROUTING, skb, skb->dev, NULL,
+		NF_HOOK(NFPROTO_BRIDGE, NF_BR_PRE_ROUTING, skb, skb->dev, NULL,
 			br_handle_frame_finish);
 		break;
 	default:
diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c
index 6980625537ca..ed19b0a730ab 100644
--- a/net/bridge/br_multicast.c
+++ b/net/bridge/br_multicast.c
@@ -608,7 +608,7 @@ static void br_multicast_send_query(struct net_bridge *br,
 	if (port) {
 		__skb_push(skb, sizeof(struct ethhdr));
 		skb->dev = port->dev;
-		NF_HOOK(PF_BRIDGE, NF_BR_LOCAL_OUT, skb, NULL, skb->dev,
+		NF_HOOK(NFPROTO_BRIDGE, NF_BR_LOCAL_OUT, skb, NULL, skb->dev,
 			dev_queue_xmit);
 	} else
 		netif_rx(skb);
diff --git a/net/bridge/br_netfilter.c b/net/bridge/br_netfilter.c
index 268e2e725888..bc6b57248494 100644
--- a/net/bridge/br_netfilter.c
+++ b/net/bridge/br_netfilter.c
@@ -246,7 +246,7 @@ static int br_nf_pre_routing_finish_ipv6(struct sk_buff *skb)
 
 	skb->dev = nf_bridge->physindev;
 	nf_bridge_push_encap_header(skb);
-	NF_HOOK_THRESH(PF_BRIDGE, NF_BR_PRE_ROUTING, skb, skb->dev, NULL,
+	NF_HOOK_THRESH(NFPROTO_BRIDGE, NF_BR_PRE_ROUTING, skb, skb->dev, NULL,
 		       br_handle_frame_finish, 1);
 
 	return 0;
@@ -396,7 +396,8 @@ bridged_dnat:
 				nf_bridge->mask |= BRNF_BRIDGED_DNAT;
 				skb->dev = nf_bridge->physindev;
 				nf_bridge_push_encap_header(skb);
-				NF_HOOK_THRESH(PF_BRIDGE, NF_BR_PRE_ROUTING,
+				NF_HOOK_THRESH(NFPROTO_BRIDGE,
+					       NF_BR_PRE_ROUTING,
 					       skb, skb->dev, NULL,
 					       br_nf_pre_routing_finish_bridge,
 					       1);
@@ -417,7 +418,7 @@ bridged_dnat:
 
 	skb->dev = nf_bridge->physindev;
 	nf_bridge_push_encap_header(skb);
-	NF_HOOK_THRESH(PF_BRIDGE, NF_BR_PRE_ROUTING, skb, skb->dev, NULL,
+	NF_HOOK_THRESH(NFPROTO_BRIDGE, NF_BR_PRE_ROUTING, skb, skb->dev, NULL,
 		       br_handle_frame_finish, 1);
 
 	return 0;
@@ -534,7 +535,7 @@ static unsigned int br_nf_pre_routing_ipv6(unsigned int hook,
 	if (!setup_pre_routing(skb))
 		return NF_DROP;
 
-	NF_HOOK(PF_INET6, NF_INET_PRE_ROUTING, skb, skb->dev, NULL,
+	NF_HOOK(NFPROTO_IPV6, NF_INET_PRE_ROUTING, skb, skb->dev, NULL,
 		br_nf_pre_routing_finish_ipv6);
 
 	return NF_STOLEN;
@@ -607,7 +608,7 @@ static unsigned int br_nf_pre_routing(unsigned int hook, struct sk_buff *skb,
 		return NF_DROP;
 	store_orig_dstaddr(skb);
 
-	NF_HOOK(PF_INET, NF_INET_PRE_ROUTING, skb, skb->dev, NULL,
+	NF_HOOK(NFPROTO_IPV4, NF_INET_PRE_ROUTING, skb, skb->dev, NULL,
 		br_nf_pre_routing_finish);
 
 	return NF_STOLEN;
@@ -655,7 +656,7 @@ static int br_nf_forward_finish(struct sk_buff *skb)
 		in = *((struct net_device **)(skb->cb));
 	}
 	nf_bridge_push_encap_header(skb);
-	NF_HOOK_THRESH(PF_BRIDGE, NF_BR_FORWARD, skb, in,
+	NF_HOOK_THRESH(NFPROTO_BRIDGE, NF_BR_FORWARD, skb, in,
 		       skb->dev, br_forward_finish, 1);
 	return 0;
 }
@@ -786,7 +787,7 @@ static unsigned int br_nf_local_out(unsigned int hook, struct sk_buff *skb,
 	}
 	nf_bridge_push_encap_header(skb);
 
-	NF_HOOK(PF_BRIDGE, NF_BR_FORWARD, skb, realindev, skb->dev,
+	NF_HOOK(NFPROTO_BRIDGE, NF_BR_FORWARD, skb, realindev, skb->dev,
 		br_forward_finish);
 	return NF_STOLEN;
 }
diff --git a/net/bridge/br_stp_bpdu.c b/net/bridge/br_stp_bpdu.c
index 81ae40b3f655..11b0157f69c3 100644
--- a/net/bridge/br_stp_bpdu.c
+++ b/net/bridge/br_stp_bpdu.c
@@ -49,7 +49,7 @@ static void br_send_bpdu(struct net_bridge_port *p,
 
 	llc_mac_hdr_init(skb, p->dev->dev_addr, p->br->group_addr);
 
-	NF_HOOK(PF_BRIDGE, NF_BR_LOCAL_OUT, skb, NULL, skb->dev,
+	NF_HOOK(NFPROTO_BRIDGE, NF_BR_LOCAL_OUT, skb, NULL, skb->dev,
 		dev_queue_xmit);
 }
 
-- 
cgit v1.2.2


From 9bbc768aa911a3ef336272eaa6d220abfba8ce50 Mon Sep 17 00:00:00 2001
From: Jan Engelhardt <jengelh@medozas.de>
Date: Tue, 23 Mar 2010 04:07:29 +0100
Subject: netfilter: ipv4: use NFPROTO values for NF_HOOK invocation

The semantic patch that was used:
// <smpl>
@@
@@
(NF_HOOK
|NF_HOOK_COND
|nf_hook
)(
-PF_INET,
+NFPROTO_IPV4,
 ...)
// </smpl>

Signed-off-by: Jan Engelhardt <jengelh@medozas.de>
---
 net/ipv4/ip_forward.c   |  4 ++--
 net/ipv4/ip_input.c     |  4 ++--
 net/ipv4/ip_output.c    | 18 +++++++++---------
 net/ipv4/ipmr.c         |  2 +-
 net/ipv4/raw.c          |  4 ++--
 net/ipv4/xfrm4_input.c  |  2 +-
 net/ipv4/xfrm4_output.c |  2 +-
 7 files changed, 18 insertions(+), 18 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/ip_forward.c b/net/ipv4/ip_forward.c
index a2991bc8e32e..9f2cd47ceeb7 100644
--- a/net/ipv4/ip_forward.c
+++ b/net/ipv4/ip_forward.c
@@ -111,8 +111,8 @@ int ip_forward(struct sk_buff *skb)
 
 	skb->priority = rt_tos2priority(iph->tos);
 
-	return NF_HOOK(PF_INET, NF_INET_FORWARD, skb, skb->dev, rt->u.dst.dev,
-		       ip_forward_finish);
+	return NF_HOOK(NFPROTO_IPV4, NF_INET_FORWARD, skb, skb->dev,
+		       rt->u.dst.dev, ip_forward_finish);
 
 sr_failed:
 	/*
diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c
index c29de9879fda..091b5c7e04e1 100644
--- a/net/ipv4/ip_input.c
+++ b/net/ipv4/ip_input.c
@@ -265,7 +265,7 @@ int ip_local_deliver(struct sk_buff *skb)
 			return 0;
 	}
 
-	return NF_HOOK(PF_INET, NF_INET_LOCAL_IN, skb, skb->dev, NULL,
+	return NF_HOOK(NFPROTO_IPV4, NF_INET_LOCAL_IN, skb, skb->dev, NULL,
 		       ip_local_deliver_finish);
 }
 
@@ -443,7 +443,7 @@ int ip_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt,
 	/* Must drop socket now because of tproxy. */
 	skb_orphan(skb);
 
-	return NF_HOOK(PF_INET, NF_INET_PRE_ROUTING, skb, dev, NULL,
+	return NF_HOOK(NFPROTO_IPV4, NF_INET_PRE_ROUTING, skb, dev, NULL,
 		       ip_rcv_finish);
 
 inhdr_error:
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index 3451799e3dbf..f09135e1e14f 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -95,8 +95,8 @@ int __ip_local_out(struct sk_buff *skb)
 
 	iph->tot_len = htons(skb->len);
 	ip_send_check(iph);
-	return nf_hook(PF_INET, NF_INET_LOCAL_OUT, skb, NULL, skb_dst(skb)->dev,
-		       dst_output);
+	return nf_hook(NFPROTO_IPV4, NF_INET_LOCAL_OUT, skb, NULL,
+		       skb_dst(skb)->dev, dst_output);
 }
 
 int ip_local_out(struct sk_buff *skb)
@@ -271,8 +271,8 @@ int ip_mc_output(struct sk_buff *skb)
 		   ) {
 			struct sk_buff *newskb = skb_clone(skb, GFP_ATOMIC);
 			if (newskb)
-				NF_HOOK(PF_INET, NF_INET_POST_ROUTING, newskb,
-					NULL, newskb->dev,
+				NF_HOOK(NFPROTO_IPV4, NF_INET_POST_ROUTING,
+					newskb, NULL, newskb->dev,
 					ip_dev_loopback_xmit);
 		}
 
@@ -287,12 +287,12 @@ int ip_mc_output(struct sk_buff *skb)
 	if (rt->rt_flags&RTCF_BROADCAST) {
 		struct sk_buff *newskb = skb_clone(skb, GFP_ATOMIC);
 		if (newskb)
-			NF_HOOK(PF_INET, NF_INET_POST_ROUTING, newskb, NULL,
-				newskb->dev, ip_dev_loopback_xmit);
+			NF_HOOK(NFPROTO_IPV4, NF_INET_POST_ROUTING, newskb,
+				NULL, newskb->dev, ip_dev_loopback_xmit);
 	}
 
-	return NF_HOOK_COND(PF_INET, NF_INET_POST_ROUTING, skb, NULL, skb->dev,
-			    ip_finish_output,
+	return NF_HOOK_COND(NFPROTO_IPV4, NF_INET_POST_ROUTING, skb, NULL,
+			    skb->dev, ip_finish_output,
 			    !(IPCB(skb)->flags & IPSKB_REROUTED));
 }
 
@@ -305,7 +305,7 @@ int ip_output(struct sk_buff *skb)
 	skb->dev = dev;
 	skb->protocol = htons(ETH_P_IP);
 
-	return NF_HOOK_COND(PF_INET, NF_INET_POST_ROUTING, skb, NULL, dev,
+	return NF_HOOK_COND(NFPROTO_IPV4, NF_INET_POST_ROUTING, skb, NULL, dev,
 			    ip_finish_output,
 			    !(IPCB(skb)->flags & IPSKB_REROUTED));
 }
diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
index 8582e12e4a62..1d42f6103c8d 100644
--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c
@@ -1319,7 +1319,7 @@ static void ipmr_queue_xmit(struct sk_buff *skb, struct mfc_cache *c, int vifi)
 	 * not mrouter) cannot join to more than one interface - it will
 	 * result in receiving multiple packets.
 	 */
-	NF_HOOK(PF_INET, NF_INET_FORWARD, skb, skb->dev, dev,
+	NF_HOOK(NFPROTO_IPV4, NF_INET_FORWARD, skb, skb->dev, dev,
 		ipmr_forward_finish);
 	return;
 
diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c
index ce154b47f1da..34d9adb83590 100644
--- a/net/ipv4/raw.c
+++ b/net/ipv4/raw.c
@@ -382,8 +382,8 @@ static int raw_send_hdrinc(struct sock *sk, void *from, size_t length,
 		icmp_out_count(net, ((struct icmphdr *)
 			skb_transport_header(skb))->type);
 
-	err = NF_HOOK(PF_INET, NF_INET_LOCAL_OUT, skb, NULL, rt->u.dst.dev,
-		      dst_output);
+	err = NF_HOOK(NFPROTO_IPV4, NF_INET_LOCAL_OUT, skb, NULL,
+		      rt->u.dst.dev, dst_output);
 	if (err > 0)
 		err = net_xmit_errno(err);
 	if (err)
diff --git a/net/ipv4/xfrm4_input.c b/net/ipv4/xfrm4_input.c
index f9f922a0ba88..c3969e0f96c3 100644
--- a/net/ipv4/xfrm4_input.c
+++ b/net/ipv4/xfrm4_input.c
@@ -60,7 +60,7 @@ int xfrm4_transport_finish(struct sk_buff *skb, int async)
 	iph->tot_len = htons(skb->len);
 	ip_send_check(iph);
 
-	NF_HOOK(PF_INET, NF_INET_PRE_ROUTING, skb, skb->dev, NULL,
+	NF_HOOK(NFPROTO_IPV4, NF_INET_PRE_ROUTING, skb, skb->dev, NULL,
 		xfrm4_rcv_encap_finish);
 	return 0;
 }
diff --git a/net/ipv4/xfrm4_output.c b/net/ipv4/xfrm4_output.c
index c908bd99bcba..571aa96a175c 100644
--- a/net/ipv4/xfrm4_output.c
+++ b/net/ipv4/xfrm4_output.c
@@ -86,7 +86,7 @@ static int xfrm4_output_finish(struct sk_buff *skb)
 
 int xfrm4_output(struct sk_buff *skb)
 {
-	return NF_HOOK_COND(PF_INET, NF_INET_POST_ROUTING, skb,
+	return NF_HOOK_COND(NFPROTO_IPV4, NF_INET_POST_ROUTING, skb,
 			    NULL, skb_dst(skb)->dev, xfrm4_output_finish,
 			    !(IPCB(skb)->flags & IPSKB_REROUTED));
 }
-- 
cgit v1.2.2


From b2e0b385d77069031edb957839aaaa8441b47287 Mon Sep 17 00:00:00 2001
From: Jan Engelhardt <jengelh@medozas.de>
Date: Tue, 23 Mar 2010 04:09:07 +0100
Subject: netfilter: ipv6: use NFPROTO values for NF_HOOK invocation

The semantic patch that was used:
// <smpl>
@@
@@
(NF_HOOK
|NF_HOOK_THRESH
|nf_hook
)(
-PF_INET6,
+NFPROTO_IPV6,
 ...)
// </smpl>

Signed-off-by: Jan Engelhardt <jengelh@medozas.de>
---
 net/ipv6/ip6_input.c                    |  4 ++--
 net/ipv6/ip6_output.c                   | 16 ++++++++--------
 net/ipv6/ip6mr.c                        |  2 +-
 net/ipv6/mcast.c                        |  4 ++--
 net/ipv6/ndisc.c                        |  4 ++--
 net/ipv6/netfilter/nf_conntrack_reasm.c |  2 +-
 net/ipv6/raw.c                          |  4 ++--
 net/ipv6/xfrm6_input.c                  |  2 +-
 net/ipv6/xfrm6_output.c                 |  4 ++--
 9 files changed, 21 insertions(+), 21 deletions(-)

(limited to 'net')

diff --git a/net/ipv6/ip6_input.c b/net/ipv6/ip6_input.c
index e28f9203deca..2c01dc65794d 100644
--- a/net/ipv6/ip6_input.c
+++ b/net/ipv6/ip6_input.c
@@ -142,7 +142,7 @@ int ipv6_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt
 	/* Must drop socket now because of tproxy. */
 	skb_orphan(skb);
 
-	return NF_HOOK(PF_INET6, NF_INET_PRE_ROUTING, skb, dev, NULL,
+	return NF_HOOK(NFPROTO_IPV6, NF_INET_PRE_ROUTING, skb, dev, NULL,
 		       ip6_rcv_finish);
 err:
 	IP6_INC_STATS_BH(net, idev, IPSTATS_MIB_INHDRERRORS);
@@ -235,7 +235,7 @@ discard:
 
 int ip6_input(struct sk_buff *skb)
 {
-	return NF_HOOK(PF_INET6, NF_INET_LOCAL_IN, skb, skb->dev, NULL,
+	return NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_IN, skb, skb->dev, NULL,
 		       ip6_input_finish);
 }
 
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index dabf108ad811..4535b7a0169b 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -66,8 +66,8 @@ int __ip6_local_out(struct sk_buff *skb)
 		len = 0;
 	ipv6_hdr(skb)->payload_len = htons(len);
 
-	return nf_hook(PF_INET6, NF_INET_LOCAL_OUT, skb, NULL, skb_dst(skb)->dev,
-		       dst_output);
+	return nf_hook(NFPROTO_IPV6, NF_INET_LOCAL_OUT, skb, NULL,
+		       skb_dst(skb)->dev, dst_output);
 }
 
 int ip6_local_out(struct sk_buff *skb)
@@ -134,8 +134,8 @@ static int ip6_output2(struct sk_buff *skb)
 			   is not supported in any case.
 			 */
 			if (newskb)
-				NF_HOOK(PF_INET6, NF_INET_POST_ROUTING, newskb,
-					NULL, newskb->dev,
+				NF_HOOK(NFPROTO_IPV6, NF_INET_POST_ROUTING,
+					newskb, NULL, newskb->dev,
 					ip6_dev_loopback_xmit);
 
 			if (ipv6_hdr(skb)->hop_limit == 0) {
@@ -150,7 +150,7 @@ static int ip6_output2(struct sk_buff *skb)
 				skb->len);
 	}
 
-	return NF_HOOK(PF_INET6, NF_INET_POST_ROUTING, skb, NULL, skb->dev,
+	return NF_HOOK(NFPROTO_IPV6, NF_INET_POST_ROUTING, skb, NULL, skb->dev,
 		       ip6_output_finish);
 }
 
@@ -260,8 +260,8 @@ int ip6_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl,
 	if ((skb->len <= mtu) || skb->local_df || skb_is_gso(skb)) {
 		IP6_UPD_PO_STATS(net, ip6_dst_idev(skb_dst(skb)),
 			      IPSTATS_MIB_OUT, skb->len);
-		return NF_HOOK(PF_INET6, NF_INET_LOCAL_OUT, skb, NULL, dst->dev,
-				dst_output);
+		return NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT, skb, NULL,
+			       dst->dev, dst_output);
 	}
 
 	if (net_ratelimit())
@@ -537,7 +537,7 @@ int ip6_forward(struct sk_buff *skb)
 	hdr->hop_limit--;
 
 	IP6_INC_STATS_BH(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTFORWDATAGRAMS);
-	return NF_HOOK(PF_INET6, NF_INET_FORWARD, skb, skb->dev, dst->dev,
+	return NF_HOOK(NFPROTO_IPV6, NF_INET_FORWARD, skb, skb->dev, dst->dev,
 		       ip6_forward_finish);
 
 error:
diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c
index 52e0f74fdfe0..430372e0bf24 100644
--- a/net/ipv6/ip6mr.c
+++ b/net/ipv6/ip6mr.c
@@ -1566,7 +1566,7 @@ static int ip6mr_forward2(struct sk_buff *skb, struct mfc6_cache *c, int vifi)
 
 	IP6CB(skb)->flags |= IP6SKB_FORWARDED;
 
-	return NF_HOOK(PF_INET6, NF_INET_FORWARD, skb, skb->dev, dev,
+	return NF_HOOK(NFPROTO_IPV6, NF_INET_FORWARD, skb, skb->dev, dev,
 		       ip6mr_forward2_finish);
 
 out_free:
diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c
index bcd971915969..773b9d18b748 100644
--- a/net/ipv6/mcast.c
+++ b/net/ipv6/mcast.c
@@ -1479,7 +1479,7 @@ static void mld_sendpack(struct sk_buff *skb)
 
 	payload_len = skb->len;
 
-	err = NF_HOOK(PF_INET6, NF_INET_LOCAL_OUT, skb, NULL, skb->dev,
+	err = NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT, skb, NULL, skb->dev,
 		      dst_output);
 out:
 	if (!err) {
@@ -1847,7 +1847,7 @@ static void igmp6_send(struct in6_addr *addr, struct net_device *dev, int type)
 		goto err_out;
 
 	skb_dst_set(skb, dst);
-	err = NF_HOOK(PF_INET6, NF_INET_LOCAL_OUT, skb, NULL, skb->dev,
+	err = NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT, skb, NULL, skb->dev,
 		      dst_output);
 out:
 	if (!err) {
diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c
index 8bcc4b7db3bf..8e96a350f52f 100644
--- a/net/ipv6/ndisc.c
+++ b/net/ipv6/ndisc.c
@@ -535,7 +535,7 @@ void ndisc_send_skb(struct sk_buff *skb,
 	idev = in6_dev_get(dst->dev);
 	IP6_UPD_PO_STATS(net, idev, IPSTATS_MIB_OUT, skb->len);
 
-	err = NF_HOOK(PF_INET6, NF_INET_LOCAL_OUT, skb, NULL, dst->dev,
+	err = NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT, skb, NULL, dst->dev,
 		      dst_output);
 	if (!err) {
 		ICMP6MSGOUT_INC_STATS(net, idev, type);
@@ -1617,7 +1617,7 @@ void ndisc_send_redirect(struct sk_buff *skb, struct neighbour *neigh,
 	skb_dst_set(buff, dst);
 	idev = in6_dev_get(dst->dev);
 	IP6_UPD_PO_STATS(net, idev, IPSTATS_MIB_OUT, skb->len);
-	err = NF_HOOK(PF_INET6, NF_INET_LOCAL_OUT, buff, NULL, dst->dev,
+	err = NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT, buff, NULL, dst->dev,
 		      dst_output);
 	if (!err) {
 		ICMP6MSGOUT_INC_STATS(net, idev, NDISC_REDIRECT);
diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c
index f1171b744650..8f80e245f370 100644
--- a/net/ipv6/netfilter/nf_conntrack_reasm.c
+++ b/net/ipv6/netfilter/nf_conntrack_reasm.c
@@ -643,7 +643,7 @@ void nf_ct_frag6_output(unsigned int hooknum, struct sk_buff *skb,
 		s2 = s->next;
 		s->next = NULL;
 
-		NF_HOOK_THRESH(PF_INET6, hooknum, s, in, out, okfn,
+		NF_HOOK_THRESH(NFPROTO_IPV6, hooknum, s, in, out, okfn,
 			       NF_IP6_PRI_CONNTRACK_DEFRAG + 1);
 		s = s2;
 	}
diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c
index ed31c37c6e39..e9e1f774b0b7 100644
--- a/net/ipv6/raw.c
+++ b/net/ipv6/raw.c
@@ -636,8 +636,8 @@ static int rawv6_send_hdrinc(struct sock *sk, void *from, int length,
 		goto error_fault;
 
 	IP6_UPD_PO_STATS(sock_net(sk), rt->rt6i_idev, IPSTATS_MIB_OUT, skb->len);
-	err = NF_HOOK(PF_INET6, NF_INET_LOCAL_OUT, skb, NULL, rt->u.dst.dev,
-		      dst_output);
+	err = NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT, skb, NULL,
+		      rt->u.dst.dev, dst_output);
 	if (err > 0)
 		err = net_xmit_errno(err);
 	if (err)
diff --git a/net/ipv6/xfrm6_input.c b/net/ipv6/xfrm6_input.c
index 2bc98ede1235..f8c3cf842f53 100644
--- a/net/ipv6/xfrm6_input.c
+++ b/net/ipv6/xfrm6_input.c
@@ -42,7 +42,7 @@ int xfrm6_transport_finish(struct sk_buff *skb, int async)
 	ipv6_hdr(skb)->payload_len = htons(skb->len);
 	__skb_push(skb, skb->data - skb_network_header(skb));
 
-	NF_HOOK(PF_INET6, NF_INET_PRE_ROUTING, skb, skb->dev, NULL,
+	NF_HOOK(NFPROTO_IPV6, NF_INET_PRE_ROUTING, skb, skb->dev, NULL,
 		ip6_rcv_finish);
 	return -1;
 }
diff --git a/net/ipv6/xfrm6_output.c b/net/ipv6/xfrm6_output.c
index 0c92112dcba3..6434bd5ce088 100644
--- a/net/ipv6/xfrm6_output.c
+++ b/net/ipv6/xfrm6_output.c
@@ -90,6 +90,6 @@ static int xfrm6_output_finish(struct sk_buff *skb)
 
 int xfrm6_output(struct sk_buff *skb)
 {
-	return NF_HOOK(PF_INET6, NF_INET_POST_ROUTING, skb, NULL, skb_dst(skb)->dev,
-		       xfrm6_output_finish);
+	return NF_HOOK(NFPROTO_IPV6, NF_INET_POST_ROUTING, skb, NULL,
+		       skb_dst(skb)->dev, xfrm6_output_finish);
 }
-- 
cgit v1.2.2


From 5d877d876cfb96c0c3254184171b4767501f4f95 Mon Sep 17 00:00:00 2001
From: Jan Engelhardt <jengelh@medozas.de>
Date: Tue, 23 Mar 2010 04:09:14 +0100
Subject: netfilter: decnet: use NFPROTO values for NF_HOOK invocation

The semantic patch used was:
// <smpl>
@@
@@
 NF_HOOK(
-PF_DECnet,
+NFPROTO_DECNET,
 ...)
// </smpl>

Signed-off-by: Jan Engelhardt <jengelh@medozas.de>
---
 net/decnet/dn_neigh.c  |  9 ++++++---
 net/decnet/dn_nsp_in.c |  3 ++-
 net/decnet/dn_route.c  | 28 ++++++++++++++++++++--------
 3 files changed, 28 insertions(+), 12 deletions(-)

(limited to 'net')

diff --git a/net/decnet/dn_neigh.c b/net/decnet/dn_neigh.c
index 794b5bf95af1..76622c0442be 100644
--- a/net/decnet/dn_neigh.c
+++ b/net/decnet/dn_neigh.c
@@ -265,7 +265,8 @@ static int dn_long_output(struct sk_buff *skb)
 
 	skb_reset_network_header(skb);
 
-	return NF_HOOK(PF_DECnet, NF_DN_POST_ROUTING, skb, NULL, neigh->dev, dn_neigh_output_packet);
+	return NF_HOOK(NFPROTO_DECNET, NF_DN_POST_ROUTING, skb, NULL,
+		       neigh->dev, dn_neigh_output_packet);
 }
 
 static int dn_short_output(struct sk_buff *skb)
@@ -304,7 +305,8 @@ static int dn_short_output(struct sk_buff *skb)
 
 	skb_reset_network_header(skb);
 
-	return NF_HOOK(PF_DECnet, NF_DN_POST_ROUTING, skb, NULL, neigh->dev, dn_neigh_output_packet);
+	return NF_HOOK(NFPROTO_DECNET, NF_DN_POST_ROUTING, skb, NULL,
+		       neigh->dev, dn_neigh_output_packet);
 }
 
 /*
@@ -346,7 +348,8 @@ static int dn_phase3_output(struct sk_buff *skb)
 
 	skb_reset_network_header(skb);
 
-	return NF_HOOK(PF_DECnet, NF_DN_POST_ROUTING, skb, NULL, neigh->dev, dn_neigh_output_packet);
+	return NF_HOOK(NFPROTO_DECNET, NF_DN_POST_ROUTING, skb, NULL,
+		       neigh->dev, dn_neigh_output_packet);
 }
 
 /*
diff --git a/net/decnet/dn_nsp_in.c b/net/decnet/dn_nsp_in.c
index 932408dca86d..65531ad96e70 100644
--- a/net/decnet/dn_nsp_in.c
+++ b/net/decnet/dn_nsp_in.c
@@ -809,7 +809,8 @@ free_out:
 
 int dn_nsp_rx(struct sk_buff *skb)
 {
-	return NF_HOOK(PF_DECnet, NF_DN_LOCAL_IN, skb, skb->dev, NULL, dn_nsp_rx_packet);
+	return NF_HOOK(NFPROTO_DECNET, NF_DN_LOCAL_IN, skb, skb->dev, NULL,
+		       dn_nsp_rx_packet);
 }
 
 /*
diff --git a/net/decnet/dn_route.c b/net/decnet/dn_route.c
index a7bf03ca0a36..86eca5f7f678 100644
--- a/net/decnet/dn_route.c
+++ b/net/decnet/dn_route.c
@@ -517,7 +517,8 @@ static int dn_route_rx_long(struct sk_buff *skb)
 	ptr++;
 	cb->hops = *ptr++; /* Visit Count */
 
-	return NF_HOOK(PF_DECnet, NF_DN_PRE_ROUTING, skb, skb->dev, NULL, dn_route_rx_packet);
+	return NF_HOOK(NFPROTO_DECNET, NF_DN_PRE_ROUTING, skb, skb->dev, NULL,
+		       dn_route_rx_packet);
 
 drop_it:
 	kfree_skb(skb);
@@ -543,7 +544,8 @@ static int dn_route_rx_short(struct sk_buff *skb)
 	ptr += 2;
 	cb->hops = *ptr & 0x3f;
 
-	return NF_HOOK(PF_DECnet, NF_DN_PRE_ROUTING, skb, skb->dev, NULL, dn_route_rx_packet);
+	return NF_HOOK(NFPROTO_DECNET, NF_DN_PRE_ROUTING, skb, skb->dev, NULL,
+		       dn_route_rx_packet);
 
 drop_it:
 	kfree_skb(skb);
@@ -645,16 +647,24 @@ int dn_route_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type
 
 		switch(flags & DN_RT_CNTL_MSK) {
 			case DN_RT_PKT_HELO:
-				return NF_HOOK(PF_DECnet, NF_DN_HELLO, skb, skb->dev, NULL, dn_route_ptp_hello);
+				return NF_HOOK(NFPROTO_DECNET, NF_DN_HELLO,
+					       skb, skb->dev, NULL,
+					       dn_route_ptp_hello);
 
 			case DN_RT_PKT_L1RT:
 			case DN_RT_PKT_L2RT:
-				return NF_HOOK(PF_DECnet, NF_DN_ROUTE, skb, skb->dev, NULL, dn_route_discard);
+				return NF_HOOK(NFPROTO_DECNET, NF_DN_ROUTE,
+					       skb, skb->dev, NULL,
+					       dn_route_discard);
 			case DN_RT_PKT_ERTH:
-				return NF_HOOK(PF_DECnet, NF_DN_HELLO, skb, skb->dev, NULL, dn_neigh_router_hello);
+				return NF_HOOK(NFPROTO_DECNET, NF_DN_HELLO,
+					       skb, skb->dev, NULL,
+					       dn_neigh_router_hello);
 
 			case DN_RT_PKT_EEDH:
-				return NF_HOOK(PF_DECnet, NF_DN_HELLO, skb, skb->dev, NULL, dn_neigh_endnode_hello);
+				return NF_HOOK(NFPROTO_DECNET, NF_DN_HELLO,
+					       skb, skb->dev, NULL,
+					       dn_neigh_endnode_hello);
 		}
 	} else {
 		if (dn->parms.state != DN_DEV_S_RU)
@@ -703,7 +713,8 @@ static int dn_output(struct sk_buff *skb)
 	cb->rt_flags |= DN_RT_F_IE;
 	cb->hops = 0;
 
-	return NF_HOOK(PF_DECnet, NF_DN_LOCAL_OUT, skb, NULL, dev, neigh->output);
+	return NF_HOOK(NFPROTO_DECNET, NF_DN_LOCAL_OUT, skb, NULL, dev,
+		       neigh->output);
 
 error:
 	if (net_ratelimit())
@@ -752,7 +763,8 @@ static int dn_forward(struct sk_buff *skb)
 	if (rt->rt_flags & RTCF_DOREDIRECT)
 		cb->rt_flags |= DN_RT_F_IE;
 
-	return NF_HOOK(PF_DECnet, NF_DN_FORWARD, skb, dev, skb->dev, neigh->output);
+	return NF_HOOK(NFPROTO_DECNET, NF_DN_FORWARD, skb, dev, skb->dev,
+		       neigh->output);
 
 drop:
 	kfree_skb(skb);
-- 
cgit v1.2.2


From 7911b5c75b613f533b6cb6f999041dd5ea3bb004 Mon Sep 17 00:00:00 2001
From: Jan Engelhardt <jengelh@medozas.de>
Date: Tue, 23 Mar 2010 04:08:46 +0100
Subject: netfilter: ipvs: use NFPROTO values for NF_HOOK invocation

Semantic patch:
// <smpl>
@@
@@
 IP_VS_XMIT(
-PF_INET6,
+NFPROTO_IPV6,
 ...)

@@
@@
 IP_VS_XMIT(
-PF_INET,
+NFPROTO_IPV4,
 ...)
// </smpl>

Signed-off-by: Jan Engelhardt <jengelh@medozas.de>
---
 net/netfilter/ipvs/ip_vs_xmit.c | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

(limited to 'net')

diff --git a/net/netfilter/ipvs/ip_vs_xmit.c b/net/netfilter/ipvs/ip_vs_xmit.c
index 223b5018c7dc..d0a7b7b05ddb 100644
--- a/net/netfilter/ipvs/ip_vs_xmit.c
+++ b/net/netfilter/ipvs/ip_vs_xmit.c
@@ -269,7 +269,7 @@ ip_vs_bypass_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
 	/* Another hack: avoid icmp_send in ip_fragment */
 	skb->local_df = 1;
 
-	IP_VS_XMIT(PF_INET, skb, rt);
+	IP_VS_XMIT(NFPROTO_IPV4, skb, rt);
 
 	LeaveFunction(10);
 	return NF_STOLEN;
@@ -333,7 +333,7 @@ ip_vs_bypass_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
 	/* Another hack: avoid icmp_send in ip_fragment */
 	skb->local_df = 1;
 
-	IP_VS_XMIT(PF_INET6, skb, rt);
+	IP_VS_XMIT(NFPROTO_IPV6, skb, rt);
 
 	LeaveFunction(10);
 	return NF_STOLEN;
@@ -409,7 +409,7 @@ ip_vs_nat_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
 	/* Another hack: avoid icmp_send in ip_fragment */
 	skb->local_df = 1;
 
-	IP_VS_XMIT(PF_INET, skb, rt);
+	IP_VS_XMIT(NFPROTO_IPV4, skb, rt);
 
 	LeaveFunction(10);
 	return NF_STOLEN;
@@ -485,7 +485,7 @@ ip_vs_nat_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
 	/* Another hack: avoid icmp_send in ip_fragment */
 	skb->local_df = 1;
 
-	IP_VS_XMIT(PF_INET6, skb, rt);
+	IP_VS_XMIT(NFPROTO_IPV6, skb, rt);
 
 	LeaveFunction(10);
 	return NF_STOLEN;
@@ -784,7 +784,7 @@ ip_vs_dr_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
 	/* Another hack: avoid icmp_send in ip_fragment */
 	skb->local_df = 1;
 
-	IP_VS_XMIT(PF_INET, skb, rt);
+	IP_VS_XMIT(NFPROTO_IPV4, skb, rt);
 
 	LeaveFunction(10);
 	return NF_STOLEN;
@@ -837,7 +837,7 @@ ip_vs_dr_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
 	/* Another hack: avoid icmp_send in ip_fragment */
 	skb->local_df = 1;
 
-	IP_VS_XMIT(PF_INET6, skb, rt);
+	IP_VS_XMIT(NFPROTO_IPV6, skb, rt);
 
 	LeaveFunction(10);
 	return NF_STOLEN;
@@ -911,7 +911,7 @@ ip_vs_icmp_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
 	/* Another hack: avoid icmp_send in ip_fragment */
 	skb->local_df = 1;
 
-	IP_VS_XMIT(PF_INET, skb, rt);
+	IP_VS_XMIT(NFPROTO_IPV4, skb, rt);
 
 	rc = NF_STOLEN;
 	goto out;
@@ -986,7 +986,7 @@ ip_vs_icmp_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
 	/* Another hack: avoid icmp_send in ip_fragment */
 	skb->local_df = 1;
 
-	IP_VS_XMIT(PF_INET6, skb, rt);
+	IP_VS_XMIT(NFPROTO_IPV6, skb, rt);
 
 	rc = NF_STOLEN;
 	goto out;
-- 
cgit v1.2.2


From 9f5673174161cc026a6c87f70d9b457e7ad82a80 Mon Sep 17 00:00:00 2001
From: Jan Engelhardt <jengelh@medozas.de>
Date: Tue, 23 Mar 2010 17:40:13 +0100
Subject: netfilter: xtables: untangle spaghetti if clauses in checkentry

As I'm changing the return values soon, I want to have a clear visual
path.

Signed-off-by: Jan Engelhardt <jengelh@medozas.de>
---
 net/netfilter/xt_dccp.c | 10 +++++++---
 net/netfilter/xt_sctp.c | 20 ++++++++++++--------
 2 files changed, 19 insertions(+), 11 deletions(-)

(limited to 'net')

diff --git a/net/netfilter/xt_dccp.c b/net/netfilter/xt_dccp.c
index 0989f29ade2e..8f6014f7c881 100644
--- a/net/netfilter/xt_dccp.c
+++ b/net/netfilter/xt_dccp.c
@@ -127,9 +127,13 @@ static bool dccp_mt_check(const struct xt_mtchk_param *par)
 {
 	const struct xt_dccp_info *info = par->matchinfo;
 
-	return !(info->flags & ~XT_DCCP_VALID_FLAGS)
-		&& !(info->invflags & ~XT_DCCP_VALID_FLAGS)
-		&& !(info->invflags & ~info->flags);
+	if (info->flags & ~XT_DCCP_VALID_FLAGS)
+		return false;
+	if (info->invflags & ~XT_DCCP_VALID_FLAGS)
+		return false;
+	if (info->invflags & ~info->flags)
+		return false;
+	return true;
 }
 
 static struct xt_match dccp_mt_reg[] __read_mostly = {
diff --git a/net/netfilter/xt_sctp.c b/net/netfilter/xt_sctp.c
index 43c7e1de532c..977b182dea59 100644
--- a/net/netfilter/xt_sctp.c
+++ b/net/netfilter/xt_sctp.c
@@ -148,14 +148,18 @@ static bool sctp_mt_check(const struct xt_mtchk_param *par)
 {
 	const struct xt_sctp_info *info = par->matchinfo;
 
-	return !(info->flags & ~XT_SCTP_VALID_FLAGS)
-		&& !(info->invflags & ~XT_SCTP_VALID_FLAGS)
-		&& !(info->invflags & ~info->flags)
-		&& ((!(info->flags & XT_SCTP_CHUNK_TYPES)) ||
-			(info->chunk_match_type &
-				(SCTP_CHUNK_MATCH_ALL
-				| SCTP_CHUNK_MATCH_ANY
-				| SCTP_CHUNK_MATCH_ONLY)));
+	if (info->flags & ~XT_SCTP_VALID_FLAGS)
+		return false;
+	if (info->invflags & ~XT_SCTP_VALID_FLAGS)
+		return false;
+	if (info->invflags & ~info->flags)
+		return false;
+	if (!(info->flags & XT_SCTP_CHUNK_TYPES))
+		return true;
+	if (info->chunk_match_type & (SCTP_CHUNK_MATCH_ALL |
+	    SCTP_CHUNK_MATCH_ANY | SCTP_CHUNK_MATCH_ONLY))
+		return true;
+	return false;
 }
 
 static struct xt_match sctp_mt_reg[] __read_mostly = {
-- 
cgit v1.2.2


From b0f38452ff73da7e9e0ddc68cd5c6b93c897ca0d Mon Sep 17 00:00:00 2001
From: Jan Engelhardt <jengelh@medozas.de>
Date: Fri, 19 Mar 2010 17:16:42 +0100
Subject: netfilter: xtables: change xt_match.checkentry return type

Restore function signatures from bool to int so that we can report
memory allocation failures or similar using -ENOMEM rather than
always having to pass -EINVAL back.

This semantic patch may not be too precise (checking for functions
that use xt_mtchk_param rather than functions referenced by
xt_match.checkentry), but reviewed, it produced the intended result.

// <smpl>
@@
type bool;
identifier check, par;
@@
-bool check
+int check
 (struct xt_mtchk_param *par) { ... }
// </smpl>

Signed-off-by: Jan Engelhardt <jengelh@medozas.de>
---
 net/bridge/netfilter/ebt_802_3.c     | 2 +-
 net/bridge/netfilter/ebt_among.c     | 2 +-
 net/bridge/netfilter/ebt_arp.c       | 2 +-
 net/bridge/netfilter/ebt_ip.c        | 2 +-
 net/bridge/netfilter/ebt_ip6.c       | 2 +-
 net/bridge/netfilter/ebt_limit.c     | 2 +-
 net/bridge/netfilter/ebt_mark_m.c    | 2 +-
 net/bridge/netfilter/ebt_pkttype.c   | 2 +-
 net/bridge/netfilter/ebt_stp.c       | 2 +-
 net/bridge/netfilter/ebt_vlan.c      | 2 +-
 net/ipv4/netfilter/ip_tables.c       | 2 +-
 net/ipv4/netfilter/ipt_addrtype.c    | 2 +-
 net/ipv4/netfilter/ipt_ah.c          | 2 +-
 net/ipv4/netfilter/ipt_ecn.c         | 2 +-
 net/ipv6/netfilter/ip6_tables.c      | 2 +-
 net/ipv6/netfilter/ip6t_ah.c         | 2 +-
 net/ipv6/netfilter/ip6t_frag.c       | 2 +-
 net/ipv6/netfilter/ip6t_hbh.c        | 2 +-
 net/ipv6/netfilter/ip6t_ipv6header.c | 2 +-
 net/ipv6/netfilter/ip6t_mh.c         | 2 +-
 net/ipv6/netfilter/ip6t_rt.c         | 2 +-
 net/netfilter/xt_cluster.c           | 2 +-
 net/netfilter/xt_connbytes.c         | 2 +-
 net/netfilter/xt_connlimit.c         | 2 +-
 net/netfilter/xt_connmark.c          | 2 +-
 net/netfilter/xt_conntrack.c         | 2 +-
 net/netfilter/xt_dccp.c              | 2 +-
 net/netfilter/xt_dscp.c              | 2 +-
 net/netfilter/xt_esp.c               | 2 +-
 net/netfilter/xt_hashlimit.c         | 4 ++--
 net/netfilter/xt_helper.c            | 2 +-
 net/netfilter/xt_limit.c             | 2 +-
 net/netfilter/xt_multiport.c         | 8 ++++----
 net/netfilter/xt_physdev.c           | 2 +-
 net/netfilter/xt_policy.c            | 2 +-
 net/netfilter/xt_quota.c             | 2 +-
 net/netfilter/xt_rateest.c           | 2 +-
 net/netfilter/xt_recent.c            | 2 +-
 net/netfilter/xt_sctp.c              | 2 +-
 net/netfilter/xt_state.c             | 2 +-
 net/netfilter/xt_statistic.c         | 2 +-
 net/netfilter/xt_string.c            | 2 +-
 net/netfilter/xt_tcpudp.c            | 4 ++--
 net/netfilter/xt_time.c              | 2 +-
 44 files changed, 49 insertions(+), 49 deletions(-)

(limited to 'net')

diff --git a/net/bridge/netfilter/ebt_802_3.c b/net/bridge/netfilter/ebt_802_3.c
index 5d1176758ca5..7b6f4c4cccb7 100644
--- a/net/bridge/netfilter/ebt_802_3.c
+++ b/net/bridge/netfilter/ebt_802_3.c
@@ -36,7 +36,7 @@ ebt_802_3_mt(const struct sk_buff *skb, const struct xt_match_param *par)
 	return true;
 }
 
-static bool ebt_802_3_mt_check(const struct xt_mtchk_param *par)
+static int ebt_802_3_mt_check(const struct xt_mtchk_param *par)
 {
 	const struct ebt_802_3_info *info = par->matchinfo;
 
diff --git a/net/bridge/netfilter/ebt_among.c b/net/bridge/netfilter/ebt_among.c
index 60ad6308bc1f..8a75d399b510 100644
--- a/net/bridge/netfilter/ebt_among.c
+++ b/net/bridge/netfilter/ebt_among.c
@@ -172,7 +172,7 @@ ebt_among_mt(const struct sk_buff *skb, const struct xt_match_param *par)
 	return true;
 }
 
-static bool ebt_among_mt_check(const struct xt_mtchk_param *par)
+static int ebt_among_mt_check(const struct xt_mtchk_param *par)
 {
 	const struct ebt_among_info *info = par->matchinfo;
 	const struct ebt_entry_match *em =
diff --git a/net/bridge/netfilter/ebt_arp.c b/net/bridge/netfilter/ebt_arp.c
index e727697c5847..fc62055adb17 100644
--- a/net/bridge/netfilter/ebt_arp.c
+++ b/net/bridge/netfilter/ebt_arp.c
@@ -100,7 +100,7 @@ ebt_arp_mt(const struct sk_buff *skb, const struct xt_match_param *par)
 	return true;
 }
 
-static bool ebt_arp_mt_check(const struct xt_mtchk_param *par)
+static int ebt_arp_mt_check(const struct xt_mtchk_param *par)
 {
 	const struct ebt_arp_info *info = par->matchinfo;
 	const struct ebt_entry *e = par->entryinfo;
diff --git a/net/bridge/netfilter/ebt_ip.c b/net/bridge/netfilter/ebt_ip.c
index 5de6df6f86b8..d1a555dc8878 100644
--- a/net/bridge/netfilter/ebt_ip.c
+++ b/net/bridge/netfilter/ebt_ip.c
@@ -77,7 +77,7 @@ ebt_ip_mt(const struct sk_buff *skb, const struct xt_match_param *par)
 	return true;
 }
 
-static bool ebt_ip_mt_check(const struct xt_mtchk_param *par)
+static int ebt_ip_mt_check(const struct xt_mtchk_param *par)
 {
 	const struct ebt_ip_info *info = par->matchinfo;
 	const struct ebt_entry *e = par->entryinfo;
diff --git a/net/bridge/netfilter/ebt_ip6.c b/net/bridge/netfilter/ebt_ip6.c
index 63e3888d20cf..fa4ecf50fdc9 100644
--- a/net/bridge/netfilter/ebt_ip6.c
+++ b/net/bridge/netfilter/ebt_ip6.c
@@ -80,7 +80,7 @@ ebt_ip6_mt(const struct sk_buff *skb, const struct xt_match_param *par)
 	return true;
 }
 
-static bool ebt_ip6_mt_check(const struct xt_mtchk_param *par)
+static int ebt_ip6_mt_check(const struct xt_mtchk_param *par)
 {
 	const struct ebt_entry *e = par->entryinfo;
 	struct ebt_ip6_info *info = par->matchinfo;
diff --git a/net/bridge/netfilter/ebt_limit.c b/net/bridge/netfilter/ebt_limit.c
index 5b7330b62541..abfb0ecd7c17 100644
--- a/net/bridge/netfilter/ebt_limit.c
+++ b/net/bridge/netfilter/ebt_limit.c
@@ -65,7 +65,7 @@ user2credits(u_int32_t user)
 	return (user * HZ * CREDITS_PER_JIFFY) / EBT_LIMIT_SCALE;
 }
 
-static bool ebt_limit_mt_check(const struct xt_mtchk_param *par)
+static int ebt_limit_mt_check(const struct xt_mtchk_param *par)
 {
 	struct ebt_limit_info *info = par->matchinfo;
 
diff --git a/net/bridge/netfilter/ebt_mark_m.c b/net/bridge/netfilter/ebt_mark_m.c
index 8de8c396d913..1e5b0b316fbe 100644
--- a/net/bridge/netfilter/ebt_mark_m.c
+++ b/net/bridge/netfilter/ebt_mark_m.c
@@ -22,7 +22,7 @@ ebt_mark_mt(const struct sk_buff *skb, const struct xt_match_param *par)
 	return ((skb->mark & info->mask) == info->mark) ^ info->invert;
 }
 
-static bool ebt_mark_mt_check(const struct xt_mtchk_param *par)
+static int ebt_mark_mt_check(const struct xt_mtchk_param *par)
 {
 	const struct ebt_mark_m_info *info = par->matchinfo;
 
diff --git a/net/bridge/netfilter/ebt_pkttype.c b/net/bridge/netfilter/ebt_pkttype.c
index e2a07e6cbef3..9b3c64516605 100644
--- a/net/bridge/netfilter/ebt_pkttype.c
+++ b/net/bridge/netfilter/ebt_pkttype.c
@@ -20,7 +20,7 @@ ebt_pkttype_mt(const struct sk_buff *skb, const struct xt_match_param *par)
 	return (skb->pkt_type == info->pkt_type) ^ info->invert;
 }
 
-static bool ebt_pkttype_mt_check(const struct xt_mtchk_param *par)
+static int ebt_pkttype_mt_check(const struct xt_mtchk_param *par)
 {
 	const struct ebt_pkttype_info *info = par->matchinfo;
 
diff --git a/net/bridge/netfilter/ebt_stp.c b/net/bridge/netfilter/ebt_stp.c
index 92a93d363765..521186fa6994 100644
--- a/net/bridge/netfilter/ebt_stp.c
+++ b/net/bridge/netfilter/ebt_stp.c
@@ -153,7 +153,7 @@ ebt_stp_mt(const struct sk_buff *skb, const struct xt_match_param *par)
 	return true;
 }
 
-static bool ebt_stp_mt_check(const struct xt_mtchk_param *par)
+static int ebt_stp_mt_check(const struct xt_mtchk_param *par)
 {
 	const struct ebt_stp_info *info = par->matchinfo;
 	const uint8_t bridge_ula[6] = {0x01, 0x80, 0xc2, 0x00, 0x00, 0x00};
diff --git a/net/bridge/netfilter/ebt_vlan.c b/net/bridge/netfilter/ebt_vlan.c
index 5c44f51063c3..04a9575389d8 100644
--- a/net/bridge/netfilter/ebt_vlan.c
+++ b/net/bridge/netfilter/ebt_vlan.c
@@ -79,7 +79,7 @@ ebt_vlan_mt(const struct sk_buff *skb, const struct xt_match_param *par)
 	return true;
 }
 
-static bool ebt_vlan_mt_check(const struct xt_mtchk_param *par)
+static int ebt_vlan_mt_check(const struct xt_mtchk_param *par)
 {
 	struct ebt_vlan_info *info = par->matchinfo;
 	const struct ebt_entry *e = par->entryinfo;
diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c
index 09f6567a85b7..771ffa7b9aff 100644
--- a/net/ipv4/netfilter/ip_tables.c
+++ b/net/ipv4/netfilter/ip_tables.c
@@ -2176,7 +2176,7 @@ icmp_match(const struct sk_buff *skb, const struct xt_match_param *par)
 				    !!(icmpinfo->invflags&IPT_ICMP_INV));
 }
 
-static bool icmp_checkentry(const struct xt_mtchk_param *par)
+static int icmp_checkentry(const struct xt_mtchk_param *par)
 {
 	const struct ipt_icmp *icmpinfo = par->matchinfo;
 
diff --git a/net/ipv4/netfilter/ipt_addrtype.c b/net/ipv4/netfilter/ipt_addrtype.c
index ea4f58a46c6e..81197f456d7f 100644
--- a/net/ipv4/netfilter/ipt_addrtype.c
+++ b/net/ipv4/netfilter/ipt_addrtype.c
@@ -70,7 +70,7 @@ addrtype_mt_v1(const struct sk_buff *skb, const struct xt_match_param *par)
 	return ret;
 }
 
-static bool addrtype_mt_checkentry_v1(const struct xt_mtchk_param *par)
+static int addrtype_mt_checkentry_v1(const struct xt_mtchk_param *par)
 {
 	struct ipt_addrtype_info_v1 *info = par->matchinfo;
 
diff --git a/net/ipv4/netfilter/ipt_ah.c b/net/ipv4/netfilter/ipt_ah.c
index 4f27e170c630..667ded16e120 100644
--- a/net/ipv4/netfilter/ipt_ah.c
+++ b/net/ipv4/netfilter/ipt_ah.c
@@ -55,7 +55,7 @@ static bool ah_mt(const struct sk_buff *skb, const struct xt_match_param *par)
 			 !!(ahinfo->invflags & IPT_AH_INV_SPI));
 }
 
-static bool ah_mt_check(const struct xt_mtchk_param *par)
+static int ah_mt_check(const struct xt_mtchk_param *par)
 {
 	const struct ipt_ah *ahinfo = par->matchinfo;
 
diff --git a/net/ipv4/netfilter/ipt_ecn.c b/net/ipv4/netfilter/ipt_ecn.c
index e661108c73f1..d1e234fe7f1a 100644
--- a/net/ipv4/netfilter/ipt_ecn.c
+++ b/net/ipv4/netfilter/ipt_ecn.c
@@ -85,7 +85,7 @@ static bool ecn_mt(const struct sk_buff *skb, const struct xt_match_param *par)
 	return true;
 }
 
-static bool ecn_mt_check(const struct xt_mtchk_param *par)
+static int ecn_mt_check(const struct xt_mtchk_param *par)
 {
 	const struct ipt_ecn_info *info = par->matchinfo;
 	const struct ipt_ip *ip = par->entryinfo;
diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c
index 41e2429c0163..595b45d52ff3 100644
--- a/net/ipv6/netfilter/ip6_tables.c
+++ b/net/ipv6/netfilter/ip6_tables.c
@@ -2209,7 +2209,7 @@ icmp6_match(const struct sk_buff *skb, const struct xt_match_param *par)
 }
 
 /* Called when user tries to insert an entry of this type. */
-static bool icmp6_checkentry(const struct xt_mtchk_param *par)
+static int icmp6_checkentry(const struct xt_mtchk_param *par)
 {
 	const struct ip6t_icmp *icmpinfo = par->matchinfo;
 
diff --git a/net/ipv6/netfilter/ip6t_ah.c b/net/ipv6/netfilter/ip6t_ah.c
index 4429bfd39e11..3d570446deef 100644
--- a/net/ipv6/netfilter/ip6t_ah.c
+++ b/net/ipv6/netfilter/ip6t_ah.c
@@ -87,7 +87,7 @@ static bool ah_mt6(const struct sk_buff *skb, const struct xt_match_param *par)
 		!(ahinfo->hdrres && ah->reserved);
 }
 
-static bool ah_mt6_check(const struct xt_mtchk_param *par)
+static int ah_mt6_check(const struct xt_mtchk_param *par)
 {
 	const struct ip6t_ah *ahinfo = par->matchinfo;
 
diff --git a/net/ipv6/netfilter/ip6t_frag.c b/net/ipv6/netfilter/ip6t_frag.c
index 5c0da913b4ab..c2dba2701fa3 100644
--- a/net/ipv6/netfilter/ip6t_frag.c
+++ b/net/ipv6/netfilter/ip6t_frag.c
@@ -102,7 +102,7 @@ frag_mt6(const struct sk_buff *skb, const struct xt_match_param *par)
 		  (ntohs(fh->frag_off) & IP6_MF));
 }
 
-static bool frag_mt6_check(const struct xt_mtchk_param *par)
+static int frag_mt6_check(const struct xt_mtchk_param *par)
 {
 	const struct ip6t_frag *fraginfo = par->matchinfo;
 
diff --git a/net/ipv6/netfilter/ip6t_hbh.c b/net/ipv6/netfilter/ip6t_hbh.c
index f4b73889d00a..1b294317707b 100644
--- a/net/ipv6/netfilter/ip6t_hbh.c
+++ b/net/ipv6/netfilter/ip6t_hbh.c
@@ -164,7 +164,7 @@ hbh_mt6(const struct sk_buff *skb, const struct xt_match_param *par)
 	return false;
 }
 
-static bool hbh_mt6_check(const struct xt_mtchk_param *par)
+static int hbh_mt6_check(const struct xt_mtchk_param *par)
 {
 	const struct ip6t_opts *optsinfo = par->matchinfo;
 
diff --git a/net/ipv6/netfilter/ip6t_ipv6header.c b/net/ipv6/netfilter/ip6t_ipv6header.c
index 91490ad9302c..90e1e04b7932 100644
--- a/net/ipv6/netfilter/ip6t_ipv6header.c
+++ b/net/ipv6/netfilter/ip6t_ipv6header.c
@@ -118,7 +118,7 @@ ipv6header_mt6(const struct sk_buff *skb, const struct xt_match_param *par)
 	}
 }
 
-static bool ipv6header_mt6_check(const struct xt_mtchk_param *par)
+static int ipv6header_mt6_check(const struct xt_mtchk_param *par)
 {
 	const struct ip6t_ipv6header_info *info = par->matchinfo;
 
diff --git a/net/ipv6/netfilter/ip6t_mh.c b/net/ipv6/netfilter/ip6t_mh.c
index 0181eb81d24b..d9408045994c 100644
--- a/net/ipv6/netfilter/ip6t_mh.c
+++ b/net/ipv6/netfilter/ip6t_mh.c
@@ -62,7 +62,7 @@ static bool mh_mt6(const struct sk_buff *skb, const struct xt_match_param *par)
 			  !!(mhinfo->invflags & IP6T_MH_INV_TYPE));
 }
 
-static bool mh_mt6_check(const struct xt_mtchk_param *par)
+static int mh_mt6_check(const struct xt_mtchk_param *par)
 {
 	const struct ip6t_mh *mhinfo = par->matchinfo;
 
diff --git a/net/ipv6/netfilter/ip6t_rt.c b/net/ipv6/netfilter/ip6t_rt.c
index c58d65336577..76397f35eafd 100644
--- a/net/ipv6/netfilter/ip6t_rt.c
+++ b/net/ipv6/netfilter/ip6t_rt.c
@@ -183,7 +183,7 @@ static bool rt_mt6(const struct sk_buff *skb, const struct xt_match_param *par)
 	return false;
 }
 
-static bool rt_mt6_check(const struct xt_mtchk_param *par)
+static int rt_mt6_check(const struct xt_mtchk_param *par)
 {
 	const struct ip6t_rt *rtinfo = par->matchinfo;
 
diff --git a/net/netfilter/xt_cluster.c b/net/netfilter/xt_cluster.c
index 4c273e871301..1f2c35ef1427 100644
--- a/net/netfilter/xt_cluster.c
+++ b/net/netfilter/xt_cluster.c
@@ -132,7 +132,7 @@ xt_cluster_mt(const struct sk_buff *skb, const struct xt_match_param *par)
 	       !!(info->flags & XT_CLUSTER_F_INV);
 }
 
-static bool xt_cluster_mt_checkentry(const struct xt_mtchk_param *par)
+static int xt_cluster_mt_checkentry(const struct xt_mtchk_param *par)
 {
 	struct xt_cluster_match_info *info = par->matchinfo;
 
diff --git a/net/netfilter/xt_connbytes.c b/net/netfilter/xt_connbytes.c
index edb7bbd9ae54..136ef4ccdacb 100644
--- a/net/netfilter/xt_connbytes.c
+++ b/net/netfilter/xt_connbytes.c
@@ -93,7 +93,7 @@ connbytes_mt(const struct sk_buff *skb, const struct xt_match_param *par)
 		return what >= sinfo->count.from;
 }
 
-static bool connbytes_mt_check(const struct xt_mtchk_param *par)
+static int connbytes_mt_check(const struct xt_mtchk_param *par)
 {
 	const struct xt_connbytes_info *sinfo = par->matchinfo;
 
diff --git a/net/netfilter/xt_connlimit.c b/net/netfilter/xt_connlimit.c
index d5b26dab9e26..a9fec38ab029 100644
--- a/net/netfilter/xt_connlimit.c
+++ b/net/netfilter/xt_connlimit.c
@@ -216,7 +216,7 @@ connlimit_mt(const struct sk_buff *skb, const struct xt_match_param *par)
 	return false;
 }
 
-static bool connlimit_mt_check(const struct xt_mtchk_param *par)
+static int connlimit_mt_check(const struct xt_mtchk_param *par)
 {
 	struct xt_connlimit_info *info = par->matchinfo;
 	unsigned int i;
diff --git a/net/netfilter/xt_connmark.c b/net/netfilter/xt_connmark.c
index 7a51ba63f545..df7eaff874f1 100644
--- a/net/netfilter/xt_connmark.c
+++ b/net/netfilter/xt_connmark.c
@@ -103,7 +103,7 @@ connmark_mt(const struct sk_buff *skb, const struct xt_match_param *par)
 	return ((ct->mark & info->mask) == info->mark) ^ info->invert;
 }
 
-static bool connmark_mt_check(const struct xt_mtchk_param *par)
+static int connmark_mt_check(const struct xt_mtchk_param *par)
 {
 	if (nf_ct_l3proto_try_module_get(par->family) < 0) {
 		pr_info("cannot load conntrack support for proto=%u\n",
diff --git a/net/netfilter/xt_conntrack.c b/net/netfilter/xt_conntrack.c
index 387172b6b0d8..500e0338a187 100644
--- a/net/netfilter/xt_conntrack.c
+++ b/net/netfilter/xt_conntrack.c
@@ -206,7 +206,7 @@ conntrack_mt_v2(const struct sk_buff *skb, const struct xt_match_param *par)
 	return conntrack_mt(skb, par, info->state_mask, info->status_mask);
 }
 
-static bool conntrack_mt_check(const struct xt_mtchk_param *par)
+static int conntrack_mt_check(const struct xt_mtchk_param *par)
 {
 	if (nf_ct_l3proto_try_module_get(par->family) < 0) {
 		pr_info("cannot load conntrack support for proto=%u\n",
diff --git a/net/netfilter/xt_dccp.c b/net/netfilter/xt_dccp.c
index 8f6014f7c881..da8c301d24ea 100644
--- a/net/netfilter/xt_dccp.c
+++ b/net/netfilter/xt_dccp.c
@@ -123,7 +123,7 @@ dccp_mt(const struct sk_buff *skb, const struct xt_match_param *par)
 			   XT_DCCP_OPTION, info->flags, info->invflags);
 }
 
-static bool dccp_mt_check(const struct xt_mtchk_param *par)
+static int dccp_mt_check(const struct xt_mtchk_param *par)
 {
 	const struct xt_dccp_info *info = par->matchinfo;
 
diff --git a/net/netfilter/xt_dscp.c b/net/netfilter/xt_dscp.c
index 6ecedc13db0c..295da4ce822c 100644
--- a/net/netfilter/xt_dscp.c
+++ b/net/netfilter/xt_dscp.c
@@ -42,7 +42,7 @@ dscp_mt6(const struct sk_buff *skb, const struct xt_match_param *par)
 	return (dscp == info->dscp) ^ !!info->invert;
 }
 
-static bool dscp_mt_check(const struct xt_mtchk_param *par)
+static int dscp_mt_check(const struct xt_mtchk_param *par)
 {
 	const struct xt_dscp_info *info = par->matchinfo;
 
diff --git a/net/netfilter/xt_esp.c b/net/netfilter/xt_esp.c
index 1a446d626769..9f5da9795674 100644
--- a/net/netfilter/xt_esp.c
+++ b/net/netfilter/xt_esp.c
@@ -60,7 +60,7 @@ static bool esp_mt(const struct sk_buff *skb, const struct xt_match_param *par)
 			 !!(espinfo->invflags & XT_ESP_INV_SPI));
 }
 
-static bool esp_mt_check(const struct xt_mtchk_param *par)
+static int esp_mt_check(const struct xt_mtchk_param *par)
 {
 	const struct xt_esp *espinfo = par->matchinfo;
 
diff --git a/net/netfilter/xt_hashlimit.c b/net/netfilter/xt_hashlimit.c
index 8f3e0c02ca54..d13800c95930 100644
--- a/net/netfilter/xt_hashlimit.c
+++ b/net/netfilter/xt_hashlimit.c
@@ -671,7 +671,7 @@ hashlimit_mt(const struct sk_buff *skb, const struct xt_match_param *par)
 	return false;
 }
 
-static bool hashlimit_mt_check_v0(const struct xt_mtchk_param *par)
+static int hashlimit_mt_check_v0(const struct xt_mtchk_param *par)
 {
 	struct net *net = par->net;
 	struct xt_hashlimit_info *r = par->matchinfo;
@@ -707,7 +707,7 @@ static bool hashlimit_mt_check_v0(const struct xt_mtchk_param *par)
 	return true;
 }
 
-static bool hashlimit_mt_check(const struct xt_mtchk_param *par)
+static int hashlimit_mt_check(const struct xt_mtchk_param *par)
 {
 	struct net *net = par->net;
 	struct xt_hashlimit_mtinfo1 *info = par->matchinfo;
diff --git a/net/netfilter/xt_helper.c b/net/netfilter/xt_helper.c
index 482aff2ccf7c..6e177b279f90 100644
--- a/net/netfilter/xt_helper.c
+++ b/net/netfilter/xt_helper.c
@@ -54,7 +54,7 @@ helper_mt(const struct sk_buff *skb, const struct xt_match_param *par)
 	return ret;
 }
 
-static bool helper_mt_check(const struct xt_mtchk_param *par)
+static int helper_mt_check(const struct xt_mtchk_param *par)
 {
 	struct xt_helper_info *info = par->matchinfo;
 
diff --git a/net/netfilter/xt_limit.c b/net/netfilter/xt_limit.c
index b3dfca63fa52..138a324df8df 100644
--- a/net/netfilter/xt_limit.c
+++ b/net/netfilter/xt_limit.c
@@ -97,7 +97,7 @@ user2credits(u_int32_t user)
 	return (user * HZ * CREDITS_PER_JIFFY) / XT_LIMIT_SCALE;
 }
 
-static bool limit_mt_check(const struct xt_mtchk_param *par)
+static int limit_mt_check(const struct xt_mtchk_param *par)
 {
 	struct xt_rateinfo *r = par->matchinfo;
 	struct xt_limit_priv *priv;
diff --git a/net/netfilter/xt_multiport.c b/net/netfilter/xt_multiport.c
index 4fa90c86fdb5..b446738eab1a 100644
--- a/net/netfilter/xt_multiport.c
+++ b/net/netfilter/xt_multiport.c
@@ -152,7 +152,7 @@ check(u_int16_t proto,
 		&& count <= XT_MULTI_PORTS;
 }
 
-static bool multiport_mt_check_v0(const struct xt_mtchk_param *par)
+static int multiport_mt_check_v0(const struct xt_mtchk_param *par)
 {
 	const struct ipt_ip *ip = par->entryinfo;
 	const struct xt_multiport *multiinfo = par->matchinfo;
@@ -161,7 +161,7 @@ static bool multiport_mt_check_v0(const struct xt_mtchk_param *par)
 		     multiinfo->count);
 }
 
-static bool multiport_mt_check(const struct xt_mtchk_param *par)
+static int multiport_mt_check(const struct xt_mtchk_param *par)
 {
 	const struct ipt_ip *ip = par->entryinfo;
 	const struct xt_multiport_v1 *multiinfo = par->matchinfo;
@@ -170,7 +170,7 @@ static bool multiport_mt_check(const struct xt_mtchk_param *par)
 		     multiinfo->count);
 }
 
-static bool multiport_mt6_check_v0(const struct xt_mtchk_param *par)
+static int multiport_mt6_check_v0(const struct xt_mtchk_param *par)
 {
 	const struct ip6t_ip6 *ip = par->entryinfo;
 	const struct xt_multiport *multiinfo = par->matchinfo;
@@ -179,7 +179,7 @@ static bool multiport_mt6_check_v0(const struct xt_mtchk_param *par)
 		     multiinfo->count);
 }
 
-static bool multiport_mt6_check(const struct xt_mtchk_param *par)
+static int multiport_mt6_check(const struct xt_mtchk_param *par)
 {
 	const struct ip6t_ip6 *ip = par->entryinfo;
 	const struct xt_multiport_v1 *multiinfo = par->matchinfo;
diff --git a/net/netfilter/xt_physdev.c b/net/netfilter/xt_physdev.c
index 3d42a278408f..850e412c83ef 100644
--- a/net/netfilter/xt_physdev.c
+++ b/net/netfilter/xt_physdev.c
@@ -83,7 +83,7 @@ match_outdev:
 	return (!!ret ^ !(info->invert & XT_PHYSDEV_OP_OUT));
 }
 
-static bool physdev_mt_check(const struct xt_mtchk_param *par)
+static int physdev_mt_check(const struct xt_mtchk_param *par)
 {
 	const struct xt_physdev_info *info = par->matchinfo;
 
diff --git a/net/netfilter/xt_policy.c b/net/netfilter/xt_policy.c
index de3aded6afb8..c9965b640b16 100644
--- a/net/netfilter/xt_policy.c
+++ b/net/netfilter/xt_policy.c
@@ -128,7 +128,7 @@ policy_mt(const struct sk_buff *skb, const struct xt_match_param *par)
 	return ret;
 }
 
-static bool policy_mt_check(const struct xt_mtchk_param *par)
+static int policy_mt_check(const struct xt_mtchk_param *par)
 {
 	const struct xt_policy_info *info = par->matchinfo;
 
diff --git a/net/netfilter/xt_quota.c b/net/netfilter/xt_quota.c
index 390b7d09fe51..2861fac5f2e1 100644
--- a/net/netfilter/xt_quota.c
+++ b/net/netfilter/xt_quota.c
@@ -43,7 +43,7 @@ quota_mt(const struct sk_buff *skb, const struct xt_match_param *par)
 	return ret;
 }
 
-static bool quota_mt_check(const struct xt_mtchk_param *par)
+static int quota_mt_check(const struct xt_mtchk_param *par)
 {
 	struct xt_quota_info *q = par->matchinfo;
 
diff --git a/net/netfilter/xt_rateest.c b/net/netfilter/xt_rateest.c
index 4fc6a917f6de..3b5e3d613b18 100644
--- a/net/netfilter/xt_rateest.c
+++ b/net/netfilter/xt_rateest.c
@@ -74,7 +74,7 @@ xt_rateest_mt(const struct sk_buff *skb, const struct xt_match_param *par)
 	return ret;
 }
 
-static bool xt_rateest_mt_checkentry(const struct xt_mtchk_param *par)
+static int xt_rateest_mt_checkentry(const struct xt_mtchk_param *par)
 {
 	struct xt_rateest_match_info *info = par->matchinfo;
 	struct xt_rateest *est1, *est2;
diff --git a/net/netfilter/xt_recent.c b/net/netfilter/xt_recent.c
index 85309448c5e7..52042c8bf7f2 100644
--- a/net/netfilter/xt_recent.c
+++ b/net/netfilter/xt_recent.c
@@ -305,7 +305,7 @@ out:
 	return ret;
 }
 
-static bool recent_mt_check(const struct xt_mtchk_param *par)
+static int recent_mt_check(const struct xt_mtchk_param *par)
 {
 	struct recent_net *recent_net = recent_pernet(par->net);
 	const struct xt_recent_mtinfo *info = par->matchinfo;
diff --git a/net/netfilter/xt_sctp.c b/net/netfilter/xt_sctp.c
index 977b182dea59..5037a7a0059c 100644
--- a/net/netfilter/xt_sctp.c
+++ b/net/netfilter/xt_sctp.c
@@ -144,7 +144,7 @@ sctp_mt(const struct sk_buff *skb, const struct xt_match_param *par)
 			   XT_SCTP_CHUNK_TYPES, info->flags, info->invflags);
 }
 
-static bool sctp_mt_check(const struct xt_mtchk_param *par)
+static int sctp_mt_check(const struct xt_mtchk_param *par)
 {
 	const struct xt_sctp_info *info = par->matchinfo;
 
diff --git a/net/netfilter/xt_state.c b/net/netfilter/xt_state.c
index 94893be80276..8b15b1317f1f 100644
--- a/net/netfilter/xt_state.c
+++ b/net/netfilter/xt_state.c
@@ -37,7 +37,7 @@ state_mt(const struct sk_buff *skb, const struct xt_match_param *par)
 	return (sinfo->statemask & statebit);
 }
 
-static bool state_mt_check(const struct xt_mtchk_param *par)
+static int state_mt_check(const struct xt_mtchk_param *par)
 {
 	if (nf_ct_l3proto_try_module_get(par->family) < 0) {
 		pr_info("cannot load conntrack support for proto=%u\n",
diff --git a/net/netfilter/xt_statistic.c b/net/netfilter/xt_statistic.c
index 51ac1bbb4f52..a577ab008f57 100644
--- a/net/netfilter/xt_statistic.c
+++ b/net/netfilter/xt_statistic.c
@@ -52,7 +52,7 @@ statistic_mt(const struct sk_buff *skb, const struct xt_match_param *par)
 	return ret;
 }
 
-static bool statistic_mt_check(const struct xt_mtchk_param *par)
+static int statistic_mt_check(const struct xt_mtchk_param *par)
 {
 	struct xt_statistic_info *info = par->matchinfo;
 
diff --git a/net/netfilter/xt_string.c b/net/netfilter/xt_string.c
index b4d774111311..7d1412154e27 100644
--- a/net/netfilter/xt_string.c
+++ b/net/netfilter/xt_string.c
@@ -40,7 +40,7 @@ string_mt(const struct sk_buff *skb, const struct xt_match_param *par)
 
 #define STRING_TEXT_PRIV(m) ((struct xt_string_info *)(m))
 
-static bool string_mt_check(const struct xt_mtchk_param *par)
+static int string_mt_check(const struct xt_mtchk_param *par)
 {
 	struct xt_string_info *conf = par->matchinfo;
 	struct ts_config *ts_conf;
diff --git a/net/netfilter/xt_tcpudp.c b/net/netfilter/xt_tcpudp.c
index b53887f83c44..00728410099f 100644
--- a/net/netfilter/xt_tcpudp.c
+++ b/net/netfilter/xt_tcpudp.c
@@ -120,7 +120,7 @@ static bool tcp_mt(const struct sk_buff *skb, const struct xt_match_param *par)
 	return true;
 }
 
-static bool tcp_mt_check(const struct xt_mtchk_param *par)
+static int tcp_mt_check(const struct xt_mtchk_param *par)
 {
 	const struct xt_tcp *tcpinfo = par->matchinfo;
 
@@ -155,7 +155,7 @@ static bool udp_mt(const struct sk_buff *skb, const struct xt_match_param *par)
 			      !!(udpinfo->invflags & XT_UDP_INV_DSTPT));
 }
 
-static bool udp_mt_check(const struct xt_mtchk_param *par)
+static int udp_mt_check(const struct xt_mtchk_param *par)
 {
 	const struct xt_udp *udpinfo = par->matchinfo;
 
diff --git a/net/netfilter/xt_time.c b/net/netfilter/xt_time.c
index 45ed05b5161f..db74f4fd57df 100644
--- a/net/netfilter/xt_time.c
+++ b/net/netfilter/xt_time.c
@@ -217,7 +217,7 @@ time_mt(const struct sk_buff *skb, const struct xt_match_param *par)
 	return true;
 }
 
-static bool time_mt_check(const struct xt_mtchk_param *par)
+static int time_mt_check(const struct xt_mtchk_param *par)
 {
 	const struct xt_time_info *info = par->matchinfo;
 
-- 
cgit v1.2.2


From 135367b8f6a18507af6b9a6910a14b5699415309 Mon Sep 17 00:00:00 2001
From: Jan Engelhardt <jengelh@medozas.de>
Date: Fri, 19 Mar 2010 17:16:42 +0100
Subject: netfilter: xtables: change xt_target.checkentry return type

Restore function signatures from bool to int so that we can report
memory allocation failures or similar using -ENOMEM rather than
always having to pass -EINVAL back.

// <smpl>
@@
type bool;
identifier check, par;
@@
-bool check
+int check
 (struct xt_tgchk_param *par) { ... }
// </smpl>

Minus the change it does to xt_ct_find_proto.

Signed-off-by: Jan Engelhardt <jengelh@medozas.de>
---
 net/bridge/netfilter/ebt_arpreply.c | 2 +-
 net/bridge/netfilter/ebt_dnat.c     | 2 +-
 net/bridge/netfilter/ebt_log.c      | 2 +-
 net/bridge/netfilter/ebt_mark.c     | 2 +-
 net/bridge/netfilter/ebt_nflog.c    | 2 +-
 net/bridge/netfilter/ebt_redirect.c | 2 +-
 net/bridge/netfilter/ebt_snat.c     | 2 +-
 net/bridge/netfilter/ebt_ulog.c     | 2 +-
 net/ipv4/netfilter/arpt_mangle.c    | 2 +-
 net/ipv4/netfilter/ipt_CLUSTERIP.c  | 2 +-
 net/ipv4/netfilter/ipt_ECN.c        | 2 +-
 net/ipv4/netfilter/ipt_LOG.c        | 2 +-
 net/ipv4/netfilter/ipt_MASQUERADE.c | 2 +-
 net/ipv4/netfilter/ipt_NETMAP.c     | 2 +-
 net/ipv4/netfilter/ipt_REDIRECT.c   | 2 +-
 net/ipv4/netfilter/ipt_REJECT.c     | 2 +-
 net/ipv4/netfilter/ipt_ULOG.c       | 2 +-
 net/ipv4/netfilter/nf_nat_rule.c    | 4 ++--
 net/ipv6/netfilter/ip6t_LOG.c       | 2 +-
 net/ipv6/netfilter/ip6t_REJECT.c    | 2 +-
 net/netfilter/xt_CONNSECMARK.c      | 2 +-
 net/netfilter/xt_CT.c               | 2 +-
 net/netfilter/xt_DSCP.c             | 2 +-
 net/netfilter/xt_HL.c               | 4 ++--
 net/netfilter/xt_LED.c              | 2 +-
 net/netfilter/xt_NFLOG.c            | 2 +-
 net/netfilter/xt_NFQUEUE.c          | 2 +-
 net/netfilter/xt_RATEEST.c          | 2 +-
 net/netfilter/xt_SECMARK.c          | 2 +-
 net/netfilter/xt_TCPMSS.c           | 4 ++--
 net/netfilter/xt_TPROXY.c           | 2 +-
 net/netfilter/xt_connmark.c         | 2 +-
 32 files changed, 35 insertions(+), 35 deletions(-)

(limited to 'net')

diff --git a/net/bridge/netfilter/ebt_arpreply.c b/net/bridge/netfilter/ebt_arpreply.c
index f392e9d93f53..2491564e9e08 100644
--- a/net/bridge/netfilter/ebt_arpreply.c
+++ b/net/bridge/netfilter/ebt_arpreply.c
@@ -57,7 +57,7 @@ ebt_arpreply_tg(struct sk_buff *skb, const struct xt_target_param *par)
 	return info->target;
 }
 
-static bool ebt_arpreply_tg_check(const struct xt_tgchk_param *par)
+static int ebt_arpreply_tg_check(const struct xt_tgchk_param *par)
 {
 	const struct ebt_arpreply_info *info = par->targinfo;
 	const struct ebt_entry *e = par->entryinfo;
diff --git a/net/bridge/netfilter/ebt_dnat.c b/net/bridge/netfilter/ebt_dnat.c
index 2bb40d728a35..5fddebea45c2 100644
--- a/net/bridge/netfilter/ebt_dnat.c
+++ b/net/bridge/netfilter/ebt_dnat.c
@@ -26,7 +26,7 @@ ebt_dnat_tg(struct sk_buff *skb, const struct xt_target_param *par)
 	return info->target;
 }
 
-static bool ebt_dnat_tg_check(const struct xt_tgchk_param *par)
+static int ebt_dnat_tg_check(const struct xt_tgchk_param *par)
 {
 	const struct ebt_nat_info *info = par->targinfo;
 	unsigned int hook_mask;
diff --git a/net/bridge/netfilter/ebt_log.c b/net/bridge/netfilter/ebt_log.c
index e873924ddb5d..a0aeac6176ee 100644
--- a/net/bridge/netfilter/ebt_log.c
+++ b/net/bridge/netfilter/ebt_log.c
@@ -24,7 +24,7 @@
 
 static DEFINE_SPINLOCK(ebt_log_lock);
 
-static bool ebt_log_tg_check(const struct xt_tgchk_param *par)
+static int ebt_log_tg_check(const struct xt_tgchk_param *par)
 {
 	struct ebt_log_info *info = par->targinfo;
 
diff --git a/net/bridge/netfilter/ebt_mark.c b/net/bridge/netfilter/ebt_mark.c
index 2b5ce533d6b9..dd94dafa6155 100644
--- a/net/bridge/netfilter/ebt_mark.c
+++ b/net/bridge/netfilter/ebt_mark.c
@@ -36,7 +36,7 @@ ebt_mark_tg(struct sk_buff *skb, const struct xt_target_param *par)
 	return info->target | ~EBT_VERDICT_BITS;
 }
 
-static bool ebt_mark_tg_check(const struct xt_tgchk_param *par)
+static int ebt_mark_tg_check(const struct xt_tgchk_param *par)
 {
 	const struct ebt_mark_t_info *info = par->targinfo;
 	int tmp;
diff --git a/net/bridge/netfilter/ebt_nflog.c b/net/bridge/netfilter/ebt_nflog.c
index 40dbd248b9ae..1f2b7bbdde73 100644
--- a/net/bridge/netfilter/ebt_nflog.c
+++ b/net/bridge/netfilter/ebt_nflog.c
@@ -35,7 +35,7 @@ ebt_nflog_tg(struct sk_buff *skb, const struct xt_target_param *par)
 	return EBT_CONTINUE;
 }
 
-static bool ebt_nflog_tg_check(const struct xt_tgchk_param *par)
+static int ebt_nflog_tg_check(const struct xt_tgchk_param *par)
 {
 	struct ebt_nflog_info *info = par->targinfo;
 
diff --git a/net/bridge/netfilter/ebt_redirect.c b/net/bridge/netfilter/ebt_redirect.c
index 9be8fbcd370b..73c4d3ac6f2e 100644
--- a/net/bridge/netfilter/ebt_redirect.c
+++ b/net/bridge/netfilter/ebt_redirect.c
@@ -32,7 +32,7 @@ ebt_redirect_tg(struct sk_buff *skb, const struct xt_target_param *par)
 	return info->target;
 }
 
-static bool ebt_redirect_tg_check(const struct xt_tgchk_param *par)
+static int ebt_redirect_tg_check(const struct xt_tgchk_param *par)
 {
 	const struct ebt_redirect_info *info = par->targinfo;
 	unsigned int hook_mask;
diff --git a/net/bridge/netfilter/ebt_snat.c b/net/bridge/netfilter/ebt_snat.c
index 9c7b520765a2..94bcecd90d74 100644
--- a/net/bridge/netfilter/ebt_snat.c
+++ b/net/bridge/netfilter/ebt_snat.c
@@ -42,7 +42,7 @@ out:
 	return info->target | ~EBT_VERDICT_BITS;
 }
 
-static bool ebt_snat_tg_check(const struct xt_tgchk_param *par)
+static int ebt_snat_tg_check(const struct xt_tgchk_param *par)
 {
 	const struct ebt_nat_info *info = par->targinfo;
 	int tmp;
diff --git a/net/bridge/netfilter/ebt_ulog.c b/net/bridge/netfilter/ebt_ulog.c
index 789ea36f1db1..f554bc2515d6 100644
--- a/net/bridge/netfilter/ebt_ulog.c
+++ b/net/bridge/netfilter/ebt_ulog.c
@@ -249,7 +249,7 @@ ebt_ulog_tg(struct sk_buff *skb, const struct xt_target_param *par)
 	return EBT_CONTINUE;
 }
 
-static bool ebt_ulog_tg_check(const struct xt_tgchk_param *par)
+static int ebt_ulog_tg_check(const struct xt_tgchk_param *par)
 {
 	struct ebt_ulog_info *uloginfo = par->targinfo;
 
diff --git a/net/ipv4/netfilter/arpt_mangle.c b/net/ipv4/netfilter/arpt_mangle.c
index b0d5b1d0a769..4b51a027f307 100644
--- a/net/ipv4/netfilter/arpt_mangle.c
+++ b/net/ipv4/netfilter/arpt_mangle.c
@@ -54,7 +54,7 @@ target(struct sk_buff *skb, const struct xt_target_param *par)
 	return mangle->target;
 }
 
-static bool checkentry(const struct xt_tgchk_param *par)
+static int checkentry(const struct xt_tgchk_param *par)
 {
 	const struct arpt_mangle *mangle = par->targinfo;
 
diff --git a/net/ipv4/netfilter/ipt_CLUSTERIP.c b/net/ipv4/netfilter/ipt_CLUSTERIP.c
index fcaa0dc8e075..290a7b9b393e 100644
--- a/net/ipv4/netfilter/ipt_CLUSTERIP.c
+++ b/net/ipv4/netfilter/ipt_CLUSTERIP.c
@@ -347,7 +347,7 @@ clusterip_tg(struct sk_buff *skb, const struct xt_target_param *par)
 	return XT_CONTINUE;
 }
 
-static bool clusterip_tg_check(const struct xt_tgchk_param *par)
+static int clusterip_tg_check(const struct xt_tgchk_param *par)
 {
 	struct ipt_clusterip_tgt_info *cipinfo = par->targinfo;
 	const struct ipt_entry *e = par->entryinfo;
diff --git a/net/ipv4/netfilter/ipt_ECN.c b/net/ipv4/netfilter/ipt_ECN.c
index 01988752547e..9d96500a4157 100644
--- a/net/ipv4/netfilter/ipt_ECN.c
+++ b/net/ipv4/netfilter/ipt_ECN.c
@@ -93,7 +93,7 @@ ecn_tg(struct sk_buff *skb, const struct xt_target_param *par)
 	return XT_CONTINUE;
 }
 
-static bool ecn_tg_check(const struct xt_tgchk_param *par)
+static int ecn_tg_check(const struct xt_tgchk_param *par)
 {
 	const struct ipt_ECN_info *einfo = par->targinfo;
 	const struct ipt_entry *e = par->entryinfo;
diff --git a/net/ipv4/netfilter/ipt_LOG.c b/net/ipv4/netfilter/ipt_LOG.c
index b3bf623fa222..c9ee5c40d1bb 100644
--- a/net/ipv4/netfilter/ipt_LOG.c
+++ b/net/ipv4/netfilter/ipt_LOG.c
@@ -439,7 +439,7 @@ log_tg(struct sk_buff *skb, const struct xt_target_param *par)
 	return XT_CONTINUE;
 }
 
-static bool log_tg_check(const struct xt_tgchk_param *par)
+static int log_tg_check(const struct xt_tgchk_param *par)
 {
 	const struct ipt_log_info *loginfo = par->targinfo;
 
diff --git a/net/ipv4/netfilter/ipt_MASQUERADE.c b/net/ipv4/netfilter/ipt_MASQUERADE.c
index 5063ddac7c04..5a182f6de5d5 100644
--- a/net/ipv4/netfilter/ipt_MASQUERADE.c
+++ b/net/ipv4/netfilter/ipt_MASQUERADE.c
@@ -28,7 +28,7 @@ MODULE_AUTHOR("Netfilter Core Team <coreteam@netfilter.org>");
 MODULE_DESCRIPTION("Xtables: automatic-address SNAT");
 
 /* FIXME: Multiple targets. --RR */
-static bool masquerade_tg_check(const struct xt_tgchk_param *par)
+static int masquerade_tg_check(const struct xt_tgchk_param *par)
 {
 	const struct nf_nat_multi_range_compat *mr = par->targinfo;
 
diff --git a/net/ipv4/netfilter/ipt_NETMAP.c b/net/ipv4/netfilter/ipt_NETMAP.c
index 51ab01a0a95d..cbfe5f7e082a 100644
--- a/net/ipv4/netfilter/ipt_NETMAP.c
+++ b/net/ipv4/netfilter/ipt_NETMAP.c
@@ -22,7 +22,7 @@ MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Svenning Soerensen <svenning@post5.tele.dk>");
 MODULE_DESCRIPTION("Xtables: 1:1 NAT mapping of IPv4 subnets");
 
-static bool netmap_tg_check(const struct xt_tgchk_param *par)
+static int netmap_tg_check(const struct xt_tgchk_param *par)
 {
 	const struct nf_nat_multi_range_compat *mr = par->targinfo;
 
diff --git a/net/ipv4/netfilter/ipt_REDIRECT.c b/net/ipv4/netfilter/ipt_REDIRECT.c
index 74f1f55fd61a..f8daec20fb04 100644
--- a/net/ipv4/netfilter/ipt_REDIRECT.c
+++ b/net/ipv4/netfilter/ipt_REDIRECT.c
@@ -26,7 +26,7 @@ MODULE_AUTHOR("Netfilter Core Team <coreteam@netfilter.org>");
 MODULE_DESCRIPTION("Xtables: Connection redirection to localhost");
 
 /* FIXME: Take multiple ranges --RR */
-static bool redirect_tg_check(const struct xt_tgchk_param *par)
+static int redirect_tg_check(const struct xt_tgchk_param *par)
 {
 	const struct nf_nat_multi_range_compat *mr = par->targinfo;
 
diff --git a/net/ipv4/netfilter/ipt_REJECT.c b/net/ipv4/netfilter/ipt_REJECT.c
index ff32252bad59..cf76f1bc3f10 100644
--- a/net/ipv4/netfilter/ipt_REJECT.c
+++ b/net/ipv4/netfilter/ipt_REJECT.c
@@ -174,7 +174,7 @@ reject_tg(struct sk_buff *skb, const struct xt_target_param *par)
 	return NF_DROP;
 }
 
-static bool reject_tg_check(const struct xt_tgchk_param *par)
+static int reject_tg_check(const struct xt_tgchk_param *par)
 {
 	const struct ipt_reject_info *rejinfo = par->targinfo;
 	const struct ipt_entry *e = par->entryinfo;
diff --git a/net/ipv4/netfilter/ipt_ULOG.c b/net/ipv4/netfilter/ipt_ULOG.c
index d926201560dd..7f73bbe2193c 100644
--- a/net/ipv4/netfilter/ipt_ULOG.c
+++ b/net/ipv4/netfilter/ipt_ULOG.c
@@ -307,7 +307,7 @@ static void ipt_logfn(u_int8_t pf,
 	ipt_ulog_packet(hooknum, skb, in, out, &loginfo, prefix);
 }
 
-static bool ulog_tg_check(const struct xt_tgchk_param *par)
+static int ulog_tg_check(const struct xt_tgchk_param *par)
 {
 	const struct ipt_ulog_info *loginfo = par->targinfo;
 
diff --git a/net/ipv4/netfilter/nf_nat_rule.c b/net/ipv4/netfilter/nf_nat_rule.c
index 7d6345e416c7..117226708738 100644
--- a/net/ipv4/netfilter/nf_nat_rule.c
+++ b/net/ipv4/netfilter/nf_nat_rule.c
@@ -74,7 +74,7 @@ ipt_dnat_target(struct sk_buff *skb, const struct xt_target_param *par)
 	return nf_nat_setup_info(ct, &mr->range[0], IP_NAT_MANIP_DST);
 }
 
-static bool ipt_snat_checkentry(const struct xt_tgchk_param *par)
+static int ipt_snat_checkentry(const struct xt_tgchk_param *par)
 {
 	const struct nf_nat_multi_range_compat *mr = par->targinfo;
 
@@ -86,7 +86,7 @@ static bool ipt_snat_checkentry(const struct xt_tgchk_param *par)
 	return true;
 }
 
-static bool ipt_dnat_checkentry(const struct xt_tgchk_param *par)
+static int ipt_dnat_checkentry(const struct xt_tgchk_param *par)
 {
 	const struct nf_nat_multi_range_compat *mr = par->targinfo;
 
diff --git a/net/ipv6/netfilter/ip6t_LOG.c b/net/ipv6/netfilter/ip6t_LOG.c
index 5a79883220e0..bcc3fc19374a 100644
--- a/net/ipv6/netfilter/ip6t_LOG.c
+++ b/net/ipv6/netfilter/ip6t_LOG.c
@@ -451,7 +451,7 @@ log_tg6(struct sk_buff *skb, const struct xt_target_param *par)
 }
 
 
-static bool log_tg6_check(const struct xt_tgchk_param *par)
+static int log_tg6_check(const struct xt_tgchk_param *par)
 {
 	const struct ip6t_log_info *loginfo = par->targinfo;
 
diff --git a/net/ipv6/netfilter/ip6t_REJECT.c b/net/ipv6/netfilter/ip6t_REJECT.c
index 45efb9f38fcb..8d5141ece671 100644
--- a/net/ipv6/netfilter/ip6t_REJECT.c
+++ b/net/ipv6/netfilter/ip6t_REJECT.c
@@ -213,7 +213,7 @@ reject_tg6(struct sk_buff *skb, const struct xt_target_param *par)
 	return NF_DROP;
 }
 
-static bool reject_tg6_check(const struct xt_tgchk_param *par)
+static int reject_tg6_check(const struct xt_tgchk_param *par)
 {
 	const struct ip6t_reject_info *rejinfo = par->targinfo;
 	const struct ip6t_entry *e = par->entryinfo;
diff --git a/net/netfilter/xt_CONNSECMARK.c b/net/netfilter/xt_CONNSECMARK.c
index 6812865488d6..3f9d0f4f852d 100644
--- a/net/netfilter/xt_CONNSECMARK.c
+++ b/net/netfilter/xt_CONNSECMARK.c
@@ -84,7 +84,7 @@ connsecmark_tg(struct sk_buff *skb, const struct xt_target_param *par)
 	return XT_CONTINUE;
 }
 
-static bool connsecmark_tg_check(const struct xt_tgchk_param *par)
+static int connsecmark_tg_check(const struct xt_tgchk_param *par)
 {
 	const struct xt_connsecmark_target_info *info = par->targinfo;
 
diff --git a/net/netfilter/xt_CT.c b/net/netfilter/xt_CT.c
index 6509e03f1e62..c1553bf06cf6 100644
--- a/net/netfilter/xt_CT.c
+++ b/net/netfilter/xt_CT.c
@@ -53,7 +53,7 @@ static u8 xt_ct_find_proto(const struct xt_tgchk_param *par)
 		return 0;
 }
 
-static bool xt_ct_tg_check(const struct xt_tgchk_param *par)
+static int xt_ct_tg_check(const struct xt_tgchk_param *par)
 {
 	struct xt_ct_target_info *info = par->targinfo;
 	struct nf_conntrack_tuple t;
diff --git a/net/netfilter/xt_DSCP.c b/net/netfilter/xt_DSCP.c
index bbf08a91c600..1fa7b67bf225 100644
--- a/net/netfilter/xt_DSCP.c
+++ b/net/netfilter/xt_DSCP.c
@@ -60,7 +60,7 @@ dscp_tg6(struct sk_buff *skb, const struct xt_target_param *par)
 	return XT_CONTINUE;
 }
 
-static bool dscp_tg_check(const struct xt_tgchk_param *par)
+static int dscp_tg_check(const struct xt_tgchk_param *par)
 {
 	const struct xt_DSCP_info *info = par->targinfo;
 
diff --git a/net/netfilter/xt_HL.c b/net/netfilter/xt_HL.c
index 7004ed2ffa44..15ba16108182 100644
--- a/net/netfilter/xt_HL.c
+++ b/net/netfilter/xt_HL.c
@@ -101,7 +101,7 @@ hl_tg6(struct sk_buff *skb, const struct xt_target_param *par)
 	return XT_CONTINUE;
 }
 
-static bool ttl_tg_check(const struct xt_tgchk_param *par)
+static int ttl_tg_check(const struct xt_tgchk_param *par)
 {
 	const struct ipt_TTL_info *info = par->targinfo;
 
@@ -114,7 +114,7 @@ static bool ttl_tg_check(const struct xt_tgchk_param *par)
 	return true;
 }
 
-static bool hl_tg6_check(const struct xt_tgchk_param *par)
+static int hl_tg6_check(const struct xt_tgchk_param *par)
 {
 	const struct ip6t_HL_info *info = par->targinfo;
 
diff --git a/net/netfilter/xt_LED.c b/net/netfilter/xt_LED.c
index f511bea9464a..1a3e3dd5a774 100644
--- a/net/netfilter/xt_LED.c
+++ b/net/netfilter/xt_LED.c
@@ -80,7 +80,7 @@ static void led_timeout_callback(unsigned long data)
 	led_trigger_event(&ledinternal->netfilter_led_trigger, LED_OFF);
 }
 
-static bool led_tg_check(const struct xt_tgchk_param *par)
+static int led_tg_check(const struct xt_tgchk_param *par)
 {
 	struct xt_led_info *ledinfo = par->targinfo;
 	struct xt_led_info_internal *ledinternal;
diff --git a/net/netfilter/xt_NFLOG.c b/net/netfilter/xt_NFLOG.c
index a57c5cf018ec..13e6c0002c8a 100644
--- a/net/netfilter/xt_NFLOG.c
+++ b/net/netfilter/xt_NFLOG.c
@@ -37,7 +37,7 @@ nflog_tg(struct sk_buff *skb, const struct xt_target_param *par)
 	return XT_CONTINUE;
 }
 
-static bool nflog_tg_check(const struct xt_tgchk_param *par)
+static int nflog_tg_check(const struct xt_tgchk_param *par)
 {
 	const struct xt_nflog_info *info = par->targinfo;
 
diff --git a/net/netfilter/xt_NFQUEUE.c b/net/netfilter/xt_NFQUEUE.c
index 7cc0de63aa0f..d435579a64ca 100644
--- a/net/netfilter/xt_NFQUEUE.c
+++ b/net/netfilter/xt_NFQUEUE.c
@@ -81,7 +81,7 @@ nfqueue_tg_v1(struct sk_buff *skb, const struct xt_target_param *par)
 	return NF_QUEUE_NR(queue);
 }
 
-static bool nfqueue_tg_v1_check(const struct xt_tgchk_param *par)
+static int nfqueue_tg_v1_check(const struct xt_tgchk_param *par)
 {
 	const struct xt_NFQ_info_v1 *info = par->targinfo;
 	u32 maxid;
diff --git a/net/netfilter/xt_RATEEST.c b/net/netfilter/xt_RATEEST.c
index 87ae97e5516f..9743e50be8ef 100644
--- a/net/netfilter/xt_RATEEST.c
+++ b/net/netfilter/xt_RATEEST.c
@@ -85,7 +85,7 @@ xt_rateest_tg(struct sk_buff *skb, const struct xt_target_param *par)
 	return XT_CONTINUE;
 }
 
-static bool xt_rateest_tg_checkentry(const struct xt_tgchk_param *par)
+static int xt_rateest_tg_checkentry(const struct xt_tgchk_param *par)
 {
 	struct xt_rateest_target_info *info = par->targinfo;
 	struct xt_rateest *est;
diff --git a/net/netfilter/xt_SECMARK.c b/net/netfilter/xt_SECMARK.c
index 4855fd9d7c6f..48f8e4f7ea8a 100644
--- a/net/netfilter/xt_SECMARK.c
+++ b/net/netfilter/xt_SECMARK.c
@@ -80,7 +80,7 @@ static bool checkentry_selinux(struct xt_secmark_target_info *info)
 	return true;
 }
 
-static bool secmark_tg_check(const struct xt_tgchk_param *par)
+static int secmark_tg_check(const struct xt_tgchk_param *par)
 {
 	struct xt_secmark_target_info *info = par->targinfo;
 
diff --git a/net/netfilter/xt_TCPMSS.c b/net/netfilter/xt_TCPMSS.c
index 45161d9a9f23..70288dc31583 100644
--- a/net/netfilter/xt_TCPMSS.c
+++ b/net/netfilter/xt_TCPMSS.c
@@ -234,7 +234,7 @@ static inline bool find_syn_match(const struct xt_entry_match *m)
 	return false;
 }
 
-static bool tcpmss_tg4_check(const struct xt_tgchk_param *par)
+static int tcpmss_tg4_check(const struct xt_tgchk_param *par)
 {
 	const struct xt_tcpmss_info *info = par->targinfo;
 	const struct ipt_entry *e = par->entryinfo;
@@ -256,7 +256,7 @@ static bool tcpmss_tg4_check(const struct xt_tgchk_param *par)
 }
 
 #if defined(CONFIG_IP6_NF_IPTABLES) || defined(CONFIG_IP6_NF_IPTABLES_MODULE)
-static bool tcpmss_tg6_check(const struct xt_tgchk_param *par)
+static int tcpmss_tg6_check(const struct xt_tgchk_param *par)
 {
 	const struct xt_tcpmss_info *info = par->targinfo;
 	const struct ip6t_entry *e = par->entryinfo;
diff --git a/net/netfilter/xt_TPROXY.c b/net/netfilter/xt_TPROXY.c
index e9244fdc123a..189df9af4de6 100644
--- a/net/netfilter/xt_TPROXY.c
+++ b/net/netfilter/xt_TPROXY.c
@@ -59,7 +59,7 @@ tproxy_tg(struct sk_buff *skb, const struct xt_target_param *par)
 	return NF_DROP;
 }
 
-static bool tproxy_tg_check(const struct xt_tgchk_param *par)
+static int tproxy_tg_check(const struct xt_tgchk_param *par)
 {
 	const struct ipt_ip *i = par->entryinfo;
 
diff --git a/net/netfilter/xt_connmark.c b/net/netfilter/xt_connmark.c
index df7eaff874f1..0e69427f8cda 100644
--- a/net/netfilter/xt_connmark.c
+++ b/net/netfilter/xt_connmark.c
@@ -74,7 +74,7 @@ connmark_tg(struct sk_buff *skb, const struct xt_target_param *par)
 	return XT_CONTINUE;
 }
 
-static bool connmark_tg_check(const struct xt_tgchk_param *par)
+static int connmark_tg_check(const struct xt_tgchk_param *par)
 {
 	if (nf_ct_l3proto_try_module_get(par->family) < 0) {
 		pr_info("cannot load conntrack support for proto=%u\n",
-- 
cgit v1.2.2


From bd414ee605ff3ac5fcd79f57269a897879ee4cde Mon Sep 17 00:00:00 2001
From: Jan Engelhardt <jengelh@medozas.de>
Date: Tue, 23 Mar 2010 16:35:56 +0100
Subject: netfilter: xtables: change matches to return error code

The following semantic patch does part of the transformation:
// <smpl>
@ rule1 @
struct xt_match ops;
identifier check;
@@
 ops.checkentry = check;

@@
identifier rule1.check;
@@
 check(...) { <...
-return true;
+return 0;
 ...> }

@@
identifier rule1.check;
@@
 check(...) { <...
-return false;
+return -EINVAL;
 ...> }
// </smpl>

Signed-off-by: Jan Engelhardt <jengelh@medozas.de>
---
 net/bridge/netfilter/ebt_802_3.c     |  4 ++--
 net/bridge/netfilter/ebt_among.c     |  8 ++++----
 net/bridge/netfilter/ebt_arp.c       |  6 +++---
 net/bridge/netfilter/ebt_ip.c        | 14 +++++++-------
 net/bridge/netfilter/ebt_ip6.c       | 14 +++++++-------
 net/bridge/netfilter/ebt_limit.c     |  4 ++--
 net/bridge/netfilter/ebt_mark_m.c    |  8 ++++----
 net/bridge/netfilter/ebt_pkttype.c   |  4 ++--
 net/bridge/netfilter/ebt_stp.c       |  6 +++---
 net/bridge/netfilter/ebt_vlan.c      | 14 +++++++-------
 net/ipv4/netfilter/ip_tables.c       |  2 +-
 net/ipv4/netfilter/ipt_addrtype.c    |  8 ++++----
 net/ipv4/netfilter/ipt_ah.c          |  4 ++--
 net/ipv4/netfilter/ipt_ecn.c         |  8 ++++----
 net/ipv6/netfilter/ip6_tables.c      |  2 +-
 net/ipv6/netfilter/ip6t_ah.c         |  4 ++--
 net/ipv6/netfilter/ip6t_frag.c       |  4 ++--
 net/ipv6/netfilter/ip6t_hbh.c        |  6 +++---
 net/ipv6/netfilter/ip6t_ipv6header.c |  4 ++--
 net/ipv6/netfilter/ip6t_mh.c         |  2 +-
 net/ipv6/netfilter/ip6t_rt.c         |  6 +++---
 net/netfilter/x_tables.c             | 12 ++++++++++--
 net/netfilter/xt_cluster.c           |  6 +++---
 net/netfilter/xt_connbytes.c         |  8 ++++----
 net/netfilter/xt_connlimit.c         |  6 +++---
 net/netfilter/xt_connmark.c          |  8 ++++----
 net/netfilter/xt_conntrack.c         |  4 ++--
 net/netfilter/xt_dccp.c              |  8 ++++----
 net/netfilter/xt_dscp.c              |  4 ++--
 net/netfilter/xt_esp.c               |  4 ++--
 net/netfilter/xt_hashlimit.c         | 29 ++++++++++++++---------------
 net/netfilter/xt_helper.c            |  4 ++--
 net/netfilter/xt_limit.c             |  6 +++---
 net/netfilter/xt_physdev.c           |  6 +++---
 net/netfilter/xt_policy.c            | 10 +++++-----
 net/netfilter/xt_quota.c             |  6 +++---
 net/netfilter/xt_rateest.c           |  4 ++--
 net/netfilter/xt_recent.c            | 18 +++++++++---------
 net/netfilter/xt_sctp.c              | 12 ++++++------
 net/netfilter/xt_state.c             |  4 ++--
 net/netfilter/xt_statistic.c         |  6 +++---
 net/netfilter/xt_string.c            | 13 ++++++-------
 net/netfilter/xt_tcpudp.c            |  4 ++--
 net/netfilter/xt_time.c              |  4 ++--
 44 files changed, 162 insertions(+), 156 deletions(-)

(limited to 'net')

diff --git a/net/bridge/netfilter/ebt_802_3.c b/net/bridge/netfilter/ebt_802_3.c
index 7b6f4c4cccb7..f7de8dbc3422 100644
--- a/net/bridge/netfilter/ebt_802_3.c
+++ b/net/bridge/netfilter/ebt_802_3.c
@@ -41,9 +41,9 @@ static int ebt_802_3_mt_check(const struct xt_mtchk_param *par)
 	const struct ebt_802_3_info *info = par->matchinfo;
 
 	if (info->bitmask & ~EBT_802_3_MASK || info->invflags & ~EBT_802_3_MASK)
-		return false;
+		return -EINVAL;
 
-	return true;
+	return 0;
 }
 
 static struct xt_match ebt_802_3_mt_reg __read_mostly = {
diff --git a/net/bridge/netfilter/ebt_among.c b/net/bridge/netfilter/ebt_among.c
index 8a75d399b510..20068e03fa81 100644
--- a/net/bridge/netfilter/ebt_among.c
+++ b/net/bridge/netfilter/ebt_among.c
@@ -190,17 +190,17 @@ static int ebt_among_mt_check(const struct xt_mtchk_param *par)
 		pr_info("wrong size: %d against expected %d, rounded to %Zd\n",
 			em->match_size, expected_length,
 			EBT_ALIGN(expected_length));
-		return false;
+		return -EINVAL;
 	}
 	if (wh_dst && (err = ebt_mac_wormhash_check_integrity(wh_dst))) {
 		pr_info("dst integrity fail: %x\n", -err);
-		return false;
+		return -EINVAL;
 	}
 	if (wh_src && (err = ebt_mac_wormhash_check_integrity(wh_src))) {
 		pr_info("src integrity fail: %x\n", -err);
-		return false;
+		return -EINVAL;
 	}
-	return true;
+	return 0;
 }
 
 static struct xt_match ebt_among_mt_reg __read_mostly = {
diff --git a/net/bridge/netfilter/ebt_arp.c b/net/bridge/netfilter/ebt_arp.c
index fc62055adb17..952150cd5e7d 100644
--- a/net/bridge/netfilter/ebt_arp.c
+++ b/net/bridge/netfilter/ebt_arp.c
@@ -108,10 +108,10 @@ static int ebt_arp_mt_check(const struct xt_mtchk_param *par)
 	if ((e->ethproto != htons(ETH_P_ARP) &&
 	   e->ethproto != htons(ETH_P_RARP)) ||
 	   e->invflags & EBT_IPROTO)
-		return false;
+		return -EINVAL;
 	if (info->bitmask & ~EBT_ARP_MASK || info->invflags & ~EBT_ARP_MASK)
-		return false;
-	return true;
+		return -EINVAL;
+	return 0;
 }
 
 static struct xt_match ebt_arp_mt_reg __read_mostly = {
diff --git a/net/bridge/netfilter/ebt_ip.c b/net/bridge/netfilter/ebt_ip.c
index d1a555dc8878..a1c76c7e5219 100644
--- a/net/bridge/netfilter/ebt_ip.c
+++ b/net/bridge/netfilter/ebt_ip.c
@@ -84,24 +84,24 @@ static int ebt_ip_mt_check(const struct xt_mtchk_param *par)
 
 	if (e->ethproto != htons(ETH_P_IP) ||
 	   e->invflags & EBT_IPROTO)
-		return false;
+		return -EINVAL;
 	if (info->bitmask & ~EBT_IP_MASK || info->invflags & ~EBT_IP_MASK)
-		return false;
+		return -EINVAL;
 	if (info->bitmask & (EBT_IP_DPORT | EBT_IP_SPORT)) {
 		if (info->invflags & EBT_IP_PROTO)
-			return false;
+			return -EINVAL;
 		if (info->protocol != IPPROTO_TCP &&
 		    info->protocol != IPPROTO_UDP &&
 		    info->protocol != IPPROTO_UDPLITE &&
 		    info->protocol != IPPROTO_SCTP &&
 		    info->protocol != IPPROTO_DCCP)
-			 return false;
+			 return -EINVAL;
 	}
 	if (info->bitmask & EBT_IP_DPORT && info->dport[0] > info->dport[1])
-		return false;
+		return -EINVAL;
 	if (info->bitmask & EBT_IP_SPORT && info->sport[0] > info->sport[1])
-		return false;
-	return true;
+		return -EINVAL;
+	return 0;
 }
 
 static struct xt_match ebt_ip_mt_reg __read_mostly = {
diff --git a/net/bridge/netfilter/ebt_ip6.c b/net/bridge/netfilter/ebt_ip6.c
index fa4ecf50fdc9..33f8413f05ad 100644
--- a/net/bridge/netfilter/ebt_ip6.c
+++ b/net/bridge/netfilter/ebt_ip6.c
@@ -86,24 +86,24 @@ static int ebt_ip6_mt_check(const struct xt_mtchk_param *par)
 	struct ebt_ip6_info *info = par->matchinfo;
 
 	if (e->ethproto != htons(ETH_P_IPV6) || e->invflags & EBT_IPROTO)
-		return false;
+		return -EINVAL;
 	if (info->bitmask & ~EBT_IP6_MASK || info->invflags & ~EBT_IP6_MASK)
-		return false;
+		return -EINVAL;
 	if (info->bitmask & (EBT_IP6_DPORT | EBT_IP6_SPORT)) {
 		if (info->invflags & EBT_IP6_PROTO)
-			return false;
+			return -EINVAL;
 		if (info->protocol != IPPROTO_TCP &&
 		    info->protocol != IPPROTO_UDP &&
 		    info->protocol != IPPROTO_UDPLITE &&
 		    info->protocol != IPPROTO_SCTP &&
 		    info->protocol != IPPROTO_DCCP)
-			return false;
+			return -EINVAL;
 	}
 	if (info->bitmask & EBT_IP6_DPORT && info->dport[0] > info->dport[1])
-		return false;
+		return -EINVAL;
 	if (info->bitmask & EBT_IP6_SPORT && info->sport[0] > info->sport[1])
-		return false;
-	return true;
+		return -EINVAL;
+	return 0;
 }
 
 static struct xt_match ebt_ip6_mt_reg __read_mostly = {
diff --git a/net/bridge/netfilter/ebt_limit.c b/net/bridge/netfilter/ebt_limit.c
index abfb0ecd7c17..4b0e2e53fa57 100644
--- a/net/bridge/netfilter/ebt_limit.c
+++ b/net/bridge/netfilter/ebt_limit.c
@@ -74,7 +74,7 @@ static int ebt_limit_mt_check(const struct xt_mtchk_param *par)
 	    user2credits(info->avg * info->burst) < user2credits(info->avg)) {
 		pr_info("overflow, try lower: %u/%u\n",
 			info->avg, info->burst);
-		return false;
+		return -EINVAL;
 	}
 
 	/* User avg in seconds * EBT_LIMIT_SCALE: convert to jiffies * 128. */
@@ -82,7 +82,7 @@ static int ebt_limit_mt_check(const struct xt_mtchk_param *par)
 	info->credit = user2credits(info->avg * info->burst);
 	info->credit_cap = user2credits(info->avg * info->burst);
 	info->cost = user2credits(info->avg);
-	return true;
+	return 0;
 }
 
 
diff --git a/net/bridge/netfilter/ebt_mark_m.c b/net/bridge/netfilter/ebt_mark_m.c
index 1e5b0b316fbe..e4366c0a1a43 100644
--- a/net/bridge/netfilter/ebt_mark_m.c
+++ b/net/bridge/netfilter/ebt_mark_m.c
@@ -27,12 +27,12 @@ static int ebt_mark_mt_check(const struct xt_mtchk_param *par)
 	const struct ebt_mark_m_info *info = par->matchinfo;
 
 	if (info->bitmask & ~EBT_MARK_MASK)
-		return false;
+		return -EINVAL;
 	if ((info->bitmask & EBT_MARK_OR) && (info->bitmask & EBT_MARK_AND))
-		return false;
+		return -EINVAL;
 	if (!info->bitmask)
-		return false;
-	return true;
+		return -EINVAL;
+	return 0;
 }
 
 
diff --git a/net/bridge/netfilter/ebt_pkttype.c b/net/bridge/netfilter/ebt_pkttype.c
index 9b3c64516605..f34bcc3197bd 100644
--- a/net/bridge/netfilter/ebt_pkttype.c
+++ b/net/bridge/netfilter/ebt_pkttype.c
@@ -25,9 +25,9 @@ static int ebt_pkttype_mt_check(const struct xt_mtchk_param *par)
 	const struct ebt_pkttype_info *info = par->matchinfo;
 
 	if (info->invert != 0 && info->invert != 1)
-		return false;
+		return -EINVAL;
 	/* Allow any pkt_type value */
-	return true;
+	return 0;
 }
 
 static struct xt_match ebt_pkttype_mt_reg __read_mostly = {
diff --git a/net/bridge/netfilter/ebt_stp.c b/net/bridge/netfilter/ebt_stp.c
index 521186fa6994..02f28fdda393 100644
--- a/net/bridge/netfilter/ebt_stp.c
+++ b/net/bridge/netfilter/ebt_stp.c
@@ -162,13 +162,13 @@ static int ebt_stp_mt_check(const struct xt_mtchk_param *par)
 
 	if (info->bitmask & ~EBT_STP_MASK || info->invflags & ~EBT_STP_MASK ||
 	    !(info->bitmask & EBT_STP_MASK))
-		return false;
+		return -EINVAL;
 	/* Make sure the match only receives stp frames */
 	if (compare_ether_addr(e->destmac, bridge_ula) ||
 	    compare_ether_addr(e->destmsk, msk) || !(e->bitmask & EBT_DESTMAC))
-		return false;
+		return -EINVAL;
 
-	return true;
+	return 0;
 }
 
 static struct xt_match ebt_stp_mt_reg __read_mostly = {
diff --git a/net/bridge/netfilter/ebt_vlan.c b/net/bridge/netfilter/ebt_vlan.c
index 04a9575389d8..bf8ae5c7a0c5 100644
--- a/net/bridge/netfilter/ebt_vlan.c
+++ b/net/bridge/netfilter/ebt_vlan.c
@@ -88,7 +88,7 @@ static int ebt_vlan_mt_check(const struct xt_mtchk_param *par)
 	if (e->ethproto != htons(ETH_P_8021Q)) {
 		pr_debug("passed entry proto %2.4X is not 802.1Q (8100)\n",
 			 ntohs(e->ethproto));
-		return false;
+		return -EINVAL;
 	}
 
 	/* Check for bitmask range
@@ -96,14 +96,14 @@ static int ebt_vlan_mt_check(const struct xt_mtchk_param *par)
 	if (info->bitmask & ~EBT_VLAN_MASK) {
 		pr_debug("bitmask %2X is out of mask (%2X)\n",
 			 info->bitmask, EBT_VLAN_MASK);
-		return false;
+		return -EINVAL;
 	}
 
 	/* Check for inversion flags range */
 	if (info->invflags & ~EBT_VLAN_MASK) {
 		pr_debug("inversion flags %2X is out of mask (%2X)\n",
 			 info->invflags, EBT_VLAN_MASK);
-		return false;
+		return -EINVAL;
 	}
 
 	/* Reserved VLAN ID (VID) values
@@ -117,7 +117,7 @@ static int ebt_vlan_mt_check(const struct xt_mtchk_param *par)
 			if (info->id > VLAN_GROUP_ARRAY_LEN) {
 				pr_debug("id %d is out of range (1-4096)\n",
 					 info->id);
-				return false;
+				return -EINVAL;
 			}
 			/* Note: This is valid VLAN-tagged frame point.
 			 * Any value of user_priority are acceptable,
@@ -132,7 +132,7 @@ static int ebt_vlan_mt_check(const struct xt_mtchk_param *par)
 		if ((unsigned char) info->prio > 7) {
 			pr_debug("prio %d is out of range (0-7)\n",
 				 info->prio);
-			return false;
+			return -EINVAL;
 		}
 	}
 	/* Check for encapsulated proto range - it is possible to be
@@ -142,11 +142,11 @@ static int ebt_vlan_mt_check(const struct xt_mtchk_param *par)
 		if ((unsigned short) ntohs(info->encap) < ETH_ZLEN) {
 			pr_debug("encap frame length %d is less than "
 				 "minimal\n", ntohs(info->encap));
-			return false;
+			return -EINVAL;
 		}
 	}
 
-	return true;
+	return 0;
 }
 
 static struct xt_match ebt_vlan_mt_reg __read_mostly = {
diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c
index 771ffa7b9aff..18c5b1573f3e 100644
--- a/net/ipv4/netfilter/ip_tables.c
+++ b/net/ipv4/netfilter/ip_tables.c
@@ -2181,7 +2181,7 @@ static int icmp_checkentry(const struct xt_mtchk_param *par)
 	const struct ipt_icmp *icmpinfo = par->matchinfo;
 
 	/* Must specify no unknown invflags */
-	return !(icmpinfo->invflags & ~IPT_ICMP_INV);
+	return (icmpinfo->invflags & ~IPT_ICMP_INV) ? -EINVAL : 0;
 }
 
 /* The built-in targets: standard (NULL) and error. */
diff --git a/net/ipv4/netfilter/ipt_addrtype.c b/net/ipv4/netfilter/ipt_addrtype.c
index 81197f456d7f..e4b8f2bf8aaa 100644
--- a/net/ipv4/netfilter/ipt_addrtype.c
+++ b/net/ipv4/netfilter/ipt_addrtype.c
@@ -78,7 +78,7 @@ static int addrtype_mt_checkentry_v1(const struct xt_mtchk_param *par)
 	    info->flags & IPT_ADDRTYPE_LIMIT_IFACE_OUT) {
 		pr_info("both incoming and outgoing "
 			"interface limitation cannot be selected\n");
-		return false;
+		return -EINVAL;
 	}
 
 	if (par->hook_mask & ((1 << NF_INET_PRE_ROUTING) |
@@ -86,7 +86,7 @@ static int addrtype_mt_checkentry_v1(const struct xt_mtchk_param *par)
 	    info->flags & IPT_ADDRTYPE_LIMIT_IFACE_OUT) {
 		pr_info("output interface limitation "
 			"not valid in PREROUTING and INPUT\n");
-		return false;
+		return -EINVAL;
 	}
 
 	if (par->hook_mask & ((1 << NF_INET_POST_ROUTING) |
@@ -94,10 +94,10 @@ static int addrtype_mt_checkentry_v1(const struct xt_mtchk_param *par)
 	    info->flags & IPT_ADDRTYPE_LIMIT_IFACE_IN) {
 		pr_info("input interface limitation "
 			"not valid in POSTROUTING and OUTPUT\n");
-		return false;
+		return -EINVAL;
 	}
 
-	return true;
+	return 0;
 }
 
 static struct xt_match addrtype_mt_reg[] __read_mostly = {
diff --git a/net/ipv4/netfilter/ipt_ah.c b/net/ipv4/netfilter/ipt_ah.c
index 667ded16e120..9f9810204892 100644
--- a/net/ipv4/netfilter/ipt_ah.c
+++ b/net/ipv4/netfilter/ipt_ah.c
@@ -62,9 +62,9 @@ static int ah_mt_check(const struct xt_mtchk_param *par)
 	/* Must specify no unknown invflags */
 	if (ahinfo->invflags & ~IPT_AH_INV_MASK) {
 		pr_debug("unknown flags %X\n", ahinfo->invflags);
-		return false;
+		return -EINVAL;
 	}
-	return true;
+	return 0;
 }
 
 static struct xt_match ah_mt_reg __read_mostly = {
diff --git a/net/ipv4/netfilter/ipt_ecn.c b/net/ipv4/netfilter/ipt_ecn.c
index d1e234fe7f1a..32e24100d8d1 100644
--- a/net/ipv4/netfilter/ipt_ecn.c
+++ b/net/ipv4/netfilter/ipt_ecn.c
@@ -91,18 +91,18 @@ static int ecn_mt_check(const struct xt_mtchk_param *par)
 	const struct ipt_ip *ip = par->entryinfo;
 
 	if (info->operation & IPT_ECN_OP_MATCH_MASK)
-		return false;
+		return -EINVAL;
 
 	if (info->invert & IPT_ECN_OP_MATCH_MASK)
-		return false;
+		return -EINVAL;
 
 	if (info->operation & (IPT_ECN_OP_MATCH_ECE|IPT_ECN_OP_MATCH_CWR) &&
 	    ip->proto != IPPROTO_TCP) {
 		pr_info("cannot match TCP bits in rule for non-tcp packets\n");
-		return false;
+		return -EINVAL;
 	}
 
-	return true;
+	return 0;
 }
 
 static struct xt_match ecn_mt_reg __read_mostly = {
diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c
index 595b45d52ff3..f2b815e72329 100644
--- a/net/ipv6/netfilter/ip6_tables.c
+++ b/net/ipv6/netfilter/ip6_tables.c
@@ -2214,7 +2214,7 @@ static int icmp6_checkentry(const struct xt_mtchk_param *par)
 	const struct ip6t_icmp *icmpinfo = par->matchinfo;
 
 	/* Must specify no unknown invflags */
-	return !(icmpinfo->invflags & ~IP6T_ICMP_INV);
+	return (icmpinfo->invflags & ~IP6T_ICMP_INV) ? -EINVAL : 0;
 }
 
 /* The built-in targets: standard (NULL) and error. */
diff --git a/net/ipv6/netfilter/ip6t_ah.c b/net/ipv6/netfilter/ip6t_ah.c
index 3d570446deef..1580693c86c1 100644
--- a/net/ipv6/netfilter/ip6t_ah.c
+++ b/net/ipv6/netfilter/ip6t_ah.c
@@ -93,9 +93,9 @@ static int ah_mt6_check(const struct xt_mtchk_param *par)
 
 	if (ahinfo->invflags & ~IP6T_AH_INV_MASK) {
 		pr_debug("unknown flags %X\n", ahinfo->invflags);
-		return false;
+		return -EINVAL;
 	}
-	return true;
+	return 0;
 }
 
 static struct xt_match ah_mt6_reg __read_mostly = {
diff --git a/net/ipv6/netfilter/ip6t_frag.c b/net/ipv6/netfilter/ip6t_frag.c
index c2dba2701fa3..a5daf0ffb4ec 100644
--- a/net/ipv6/netfilter/ip6t_frag.c
+++ b/net/ipv6/netfilter/ip6t_frag.c
@@ -108,9 +108,9 @@ static int frag_mt6_check(const struct xt_mtchk_param *par)
 
 	if (fraginfo->invflags & ~IP6T_FRAG_INV_MASK) {
 		pr_debug("unknown flags %X\n", fraginfo->invflags);
-		return false;
+		return -EINVAL;
 	}
-	return true;
+	return 0;
 }
 
 static struct xt_match frag_mt6_reg __read_mostly = {
diff --git a/net/ipv6/netfilter/ip6t_hbh.c b/net/ipv6/netfilter/ip6t_hbh.c
index 1b294317707b..5e6acdae6d80 100644
--- a/net/ipv6/netfilter/ip6t_hbh.c
+++ b/net/ipv6/netfilter/ip6t_hbh.c
@@ -170,15 +170,15 @@ static int hbh_mt6_check(const struct xt_mtchk_param *par)
 
 	if (optsinfo->invflags & ~IP6T_OPTS_INV_MASK) {
 		pr_debug("unknown flags %X\n", optsinfo->invflags);
-		return false;
+		return -EINVAL;
 	}
 
 	if (optsinfo->flags & IP6T_OPTS_NSTRICT) {
 		pr_debug("Not strict - not implemented");
-		return false;
+		return -EINVAL;
 	}
 
-	return true;
+	return 0;
 }
 
 static struct xt_match hbh_mt6_reg[] __read_mostly = {
diff --git a/net/ipv6/netfilter/ip6t_ipv6header.c b/net/ipv6/netfilter/ip6t_ipv6header.c
index 90e1e04b7932..46fbabb493fa 100644
--- a/net/ipv6/netfilter/ip6t_ipv6header.c
+++ b/net/ipv6/netfilter/ip6t_ipv6header.c
@@ -125,9 +125,9 @@ static int ipv6header_mt6_check(const struct xt_mtchk_param *par)
 	/* invflags is 0 or 0xff in hard mode */
 	if ((!info->modeflag) && info->invflags != 0x00 &&
 	    info->invflags != 0xFF)
-		return false;
+		return -EINVAL;
 
-	return true;
+	return 0;
 }
 
 static struct xt_match ipv6header_mt6_reg __read_mostly = {
diff --git a/net/ipv6/netfilter/ip6t_mh.c b/net/ipv6/netfilter/ip6t_mh.c
index d9408045994c..c9f443e0138f 100644
--- a/net/ipv6/netfilter/ip6t_mh.c
+++ b/net/ipv6/netfilter/ip6t_mh.c
@@ -67,7 +67,7 @@ static int mh_mt6_check(const struct xt_mtchk_param *par)
 	const struct ip6t_mh *mhinfo = par->matchinfo;
 
 	/* Must specify no unknown invflags */
-	return !(mhinfo->invflags & ~IP6T_MH_INV_MASK);
+	return (mhinfo->invflags & ~IP6T_MH_INV_MASK) ? -EINVAL : 0;
 }
 
 static struct xt_match mh_mt6_reg __read_mostly = {
diff --git a/net/ipv6/netfilter/ip6t_rt.c b/net/ipv6/netfilter/ip6t_rt.c
index 76397f35eafd..09322720d2a6 100644
--- a/net/ipv6/netfilter/ip6t_rt.c
+++ b/net/ipv6/netfilter/ip6t_rt.c
@@ -189,17 +189,17 @@ static int rt_mt6_check(const struct xt_mtchk_param *par)
 
 	if (rtinfo->invflags & ~IP6T_RT_INV_MASK) {
 		pr_debug("unknown flags %X\n", rtinfo->invflags);
-		return false;
+		return -EINVAL;
 	}
 	if ((rtinfo->flags & (IP6T_RT_RES | IP6T_RT_FST_MASK)) &&
 	    (!(rtinfo->flags & IP6T_RT_TYP) ||
 	     (rtinfo->rt_type != 0) ||
 	     (rtinfo->invflags & IP6T_RT_INV_TYP))) {
 		pr_debug("`--rt-type 0' required before `--rt-0-*'");
-		return false;
+		return -EINVAL;
 	}
 
-	return true;
+	return 0;
 }
 
 static struct xt_match rt_mt6_reg __read_mostly = {
diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c
index ee7fe215b3e1..7ee177746172 100644
--- a/net/netfilter/x_tables.c
+++ b/net/netfilter/x_tables.c
@@ -363,6 +363,8 @@ static char *textify_hooks(char *buf, size_t size, unsigned int mask)
 int xt_check_match(struct xt_mtchk_param *par,
 		   unsigned int size, u_int8_t proto, bool inv_proto)
 {
+	int ret;
+
 	if (XT_ALIGN(par->match->matchsize) != size &&
 	    par->match->matchsize != -1) {
 		/*
@@ -399,8 +401,14 @@ int xt_check_match(struct xt_mtchk_param *par,
 		       par->match->proto);
 		return -EINVAL;
 	}
-	if (par->match->checkentry != NULL && !par->match->checkentry(par))
-		return -EINVAL;
+	if (par->match->checkentry != NULL) {
+		ret = par->match->checkentry(par);
+		if (ret < 0)
+			return ret;
+		else if (ret > 0)
+			/* Flag up potential errors. */
+			return -EIO;
+	}
 	return 0;
 }
 EXPORT_SYMBOL_GPL(xt_check_match);
diff --git a/net/netfilter/xt_cluster.c b/net/netfilter/xt_cluster.c
index 1f2c35ef1427..30cb7762fc41 100644
--- a/net/netfilter/xt_cluster.c
+++ b/net/netfilter/xt_cluster.c
@@ -140,14 +140,14 @@ static int xt_cluster_mt_checkentry(const struct xt_mtchk_param *par)
 		pr_info("you have exceeded the maximum "
 			"number of cluster nodes (%u > %u)\n",
 			info->total_nodes, XT_CLUSTER_NODES_MAX);
-		return false;
+		return -EINVAL;
 	}
 	if (info->node_mask >= (1ULL << info->total_nodes)) {
 		pr_info("this node mask cannot be "
 			"higher than the total number of nodes\n");
-		return false;
+		return -EINVAL;
 	}
-	return true;
+	return 0;
 }
 
 static struct xt_match xt_cluster_match __read_mostly = {
diff --git a/net/netfilter/xt_connbytes.c b/net/netfilter/xt_connbytes.c
index 136ef4ccdacb..bf8e286361c3 100644
--- a/net/netfilter/xt_connbytes.c
+++ b/net/netfilter/xt_connbytes.c
@@ -100,20 +100,20 @@ static int connbytes_mt_check(const struct xt_mtchk_param *par)
 	if (sinfo->what != XT_CONNBYTES_PKTS &&
 	    sinfo->what != XT_CONNBYTES_BYTES &&
 	    sinfo->what != XT_CONNBYTES_AVGPKT)
-		return false;
+		return -EINVAL;
 
 	if (sinfo->direction != XT_CONNBYTES_DIR_ORIGINAL &&
 	    sinfo->direction != XT_CONNBYTES_DIR_REPLY &&
 	    sinfo->direction != XT_CONNBYTES_DIR_BOTH)
-		return false;
+		return -EINVAL;
 
 	if (nf_ct_l3proto_try_module_get(par->family) < 0) {
 		pr_info("cannot load conntrack support for proto=%u\n",
 			par->family);
-		return false;
+		return -EINVAL;
 	}
 
-	return true;
+	return 0;
 }
 
 static void connbytes_mt_destroy(const struct xt_mtdtor_param *par)
diff --git a/net/netfilter/xt_connlimit.c b/net/netfilter/xt_connlimit.c
index a9fec38ab029..68e89f08140b 100644
--- a/net/netfilter/xt_connlimit.c
+++ b/net/netfilter/xt_connlimit.c
@@ -228,21 +228,21 @@ static int connlimit_mt_check(const struct xt_mtchk_param *par)
 	if (nf_ct_l3proto_try_module_get(par->family) < 0) {
 		pr_info("cannot load conntrack support for "
 			"address family %u\n", par->family);
-		return false;
+		return -EINVAL;
 	}
 
 	/* init private data */
 	info->data = kmalloc(sizeof(struct xt_connlimit_data), GFP_KERNEL);
 	if (info->data == NULL) {
 		nf_ct_l3proto_module_put(par->family);
-		return false;
+		return -EINVAL;
 	}
 
 	spin_lock_init(&info->data->lock);
 	for (i = 0; i < ARRAY_SIZE(info->data->iphash); ++i)
 		INIT_LIST_HEAD(&info->data->iphash[i]);
 
-	return true;
+	return 0;
 }
 
 static void connlimit_mt_destroy(const struct xt_mtdtor_param *par)
diff --git a/net/netfilter/xt_connmark.c b/net/netfilter/xt_connmark.c
index 0e69427f8cda..e137af5559e0 100644
--- a/net/netfilter/xt_connmark.c
+++ b/net/netfilter/xt_connmark.c
@@ -79,9 +79,9 @@ static int connmark_tg_check(const struct xt_tgchk_param *par)
 	if (nf_ct_l3proto_try_module_get(par->family) < 0) {
 		pr_info("cannot load conntrack support for proto=%u\n",
 			par->family);
-		return false;
+		return -EINVAL;
 	}
-	return true;
+	return 0;
 }
 
 static void connmark_tg_destroy(const struct xt_tgdtor_param *par)
@@ -108,9 +108,9 @@ static int connmark_mt_check(const struct xt_mtchk_param *par)
 	if (nf_ct_l3proto_try_module_get(par->family) < 0) {
 		pr_info("cannot load conntrack support for proto=%u\n",
 			par->family);
-		return false;
+		return -EINVAL;
 	}
-	return true;
+	return 0;
 }
 
 static void connmark_mt_destroy(const struct xt_mtdtor_param *par)
diff --git a/net/netfilter/xt_conntrack.c b/net/netfilter/xt_conntrack.c
index 500e0338a187..26e34aa7f8d1 100644
--- a/net/netfilter/xt_conntrack.c
+++ b/net/netfilter/xt_conntrack.c
@@ -211,9 +211,9 @@ static int conntrack_mt_check(const struct xt_mtchk_param *par)
 	if (nf_ct_l3proto_try_module_get(par->family) < 0) {
 		pr_info("cannot load conntrack support for proto=%u\n",
 			par->family);
-		return false;
+		return -EINVAL;
 	}
-	return true;
+	return 0;
 }
 
 static void conntrack_mt_destroy(const struct xt_mtdtor_param *par)
diff --git a/net/netfilter/xt_dccp.c b/net/netfilter/xt_dccp.c
index da8c301d24ea..f54699ca5609 100644
--- a/net/netfilter/xt_dccp.c
+++ b/net/netfilter/xt_dccp.c
@@ -128,12 +128,12 @@ static int dccp_mt_check(const struct xt_mtchk_param *par)
 	const struct xt_dccp_info *info = par->matchinfo;
 
 	if (info->flags & ~XT_DCCP_VALID_FLAGS)
-		return false;
+		return -EINVAL;
 	if (info->invflags & ~XT_DCCP_VALID_FLAGS)
-		return false;
+		return -EINVAL;
 	if (info->invflags & ~info->flags)
-		return false;
-	return true;
+		return -EINVAL;
+	return 0;
 }
 
 static struct xt_match dccp_mt_reg[] __read_mostly = {
diff --git a/net/netfilter/xt_dscp.c b/net/netfilter/xt_dscp.c
index 295da4ce822c..f355fb9e06fa 100644
--- a/net/netfilter/xt_dscp.c
+++ b/net/netfilter/xt_dscp.c
@@ -48,10 +48,10 @@ static int dscp_mt_check(const struct xt_mtchk_param *par)
 
 	if (info->dscp > XT_DSCP_MAX) {
 		pr_info("dscp %x out of range\n", info->dscp);
-		return false;
+		return -EINVAL;
 	}
 
-	return true;
+	return 0;
 }
 
 static bool tos_mt(const struct sk_buff *skb, const struct xt_match_param *par)
diff --git a/net/netfilter/xt_esp.c b/net/netfilter/xt_esp.c
index 9f5da9795674..143bfdc8e38f 100644
--- a/net/netfilter/xt_esp.c
+++ b/net/netfilter/xt_esp.c
@@ -66,10 +66,10 @@ static int esp_mt_check(const struct xt_mtchk_param *par)
 
 	if (espinfo->invflags & ~XT_ESP_INV_MASK) {
 		pr_debug("unknown flags %X\n", espinfo->invflags);
-		return false;
+		return -EINVAL;
 	}
 
-	return true;
+	return 0;
 }
 
 static struct xt_match esp_mt_reg[] __read_mostly = {
diff --git a/net/netfilter/xt_hashlimit.c b/net/netfilter/xt_hashlimit.c
index d13800c95930..0c0152902b3b 100644
--- a/net/netfilter/xt_hashlimit.c
+++ b/net/netfilter/xt_hashlimit.c
@@ -681,30 +681,29 @@ static int hashlimit_mt_check_v0(const struct xt_mtchk_param *par)
 	    user2credits(r->cfg.avg * r->cfg.burst) < user2credits(r->cfg.avg)) {
 		pr_info("overflow, try lower: %u/%u\n",
 			r->cfg.avg, r->cfg.burst);
-		return false;
+		return -EINVAL;
 	}
 	if (r->cfg.mode == 0 ||
 	    r->cfg.mode > (XT_HASHLIMIT_HASH_DPT |
 			   XT_HASHLIMIT_HASH_DIP |
 			   XT_HASHLIMIT_HASH_SIP |
 			   XT_HASHLIMIT_HASH_SPT))
-		return false;
+		return -EINVAL;
 	if (!r->cfg.gc_interval)
-		return false;
+		return -EINVAL;
 	if (!r->cfg.expire)
-		return false;
+		return -EINVAL;
 	if (r->name[sizeof(r->name) - 1] != '\0')
-		return false;
+		return -EINVAL;
 
 	mutex_lock(&hashlimit_mutex);
 	r->hinfo = htable_find_get(net, r->name, par->family);
 	if (!r->hinfo && htable_create_v0(net, r, par->family) != 0) {
 		mutex_unlock(&hashlimit_mutex);
-		return false;
+		return -EINVAL;
 	}
 	mutex_unlock(&hashlimit_mutex);
-
-	return true;
+	return 0;
 }
 
 static int hashlimit_mt_check(const struct xt_mtchk_param *par)
@@ -718,28 +717,28 @@ static int hashlimit_mt_check(const struct xt_mtchk_param *par)
 	    user2credits(info->cfg.avg)) {
 		pr_info("overflow, try lower: %u/%u\n",
 			info->cfg.avg, info->cfg.burst);
-		return false;
+		return -EINVAL;
 	}
 	if (info->cfg.gc_interval == 0 || info->cfg.expire == 0)
-		return false;
+		return -EINVAL;
 	if (info->name[sizeof(info->name)-1] != '\0')
-		return false;
+		return -EINVAL;
 	if (par->family == NFPROTO_IPV4) {
 		if (info->cfg.srcmask > 32 || info->cfg.dstmask > 32)
-			return false;
+			return -EINVAL;
 	} else {
 		if (info->cfg.srcmask > 128 || info->cfg.dstmask > 128)
-			return false;
+			return -EINVAL;
 	}
 
 	mutex_lock(&hashlimit_mutex);
 	info->hinfo = htable_find_get(net, info->name, par->family);
 	if (!info->hinfo && htable_create(net, info, par->family) != 0) {
 		mutex_unlock(&hashlimit_mutex);
-		return false;
+		return -EINVAL;
 	}
 	mutex_unlock(&hashlimit_mutex);
-	return true;
+	return 0;
 }
 
 static void
diff --git a/net/netfilter/xt_helper.c b/net/netfilter/xt_helper.c
index 6e177b279f90..eb308b32bfe0 100644
--- a/net/netfilter/xt_helper.c
+++ b/net/netfilter/xt_helper.c
@@ -61,10 +61,10 @@ static int helper_mt_check(const struct xt_mtchk_param *par)
 	if (nf_ct_l3proto_try_module_get(par->family) < 0) {
 		pr_info("cannot load conntrack support for proto=%u\n",
 			par->family);
-		return false;
+		return -EINVAL;
 	}
 	info->name[29] = '\0';
-	return true;
+	return 0;
 }
 
 static void helper_mt_destroy(const struct xt_mtdtor_param *par)
diff --git a/net/netfilter/xt_limit.c b/net/netfilter/xt_limit.c
index 138a324df8df..5ff0580ce878 100644
--- a/net/netfilter/xt_limit.c
+++ b/net/netfilter/xt_limit.c
@@ -107,12 +107,12 @@ static int limit_mt_check(const struct xt_mtchk_param *par)
 	    || user2credits(r->avg * r->burst) < user2credits(r->avg)) {
 		pr_info("Overflow, try lower: %u/%u\n",
 			r->avg, r->burst);
-		return false;
+		return -EINVAL;
 	}
 
 	priv = kmalloc(sizeof(*priv), GFP_KERNEL);
 	if (priv == NULL)
-		return false;
+		return -EINVAL;
 
 	/* For SMP, we only want to use one set of state. */
 	r->master = priv;
@@ -124,7 +124,7 @@ static int limit_mt_check(const struct xt_mtchk_param *par)
 		r->credit_cap = user2credits(r->avg * r->burst); /* Credits full. */
 		r->cost = user2credits(r->avg);
 	}
-	return true;
+	return 0;
 }
 
 static void limit_mt_destroy(const struct xt_mtdtor_param *par)
diff --git a/net/netfilter/xt_physdev.c b/net/netfilter/xt_physdev.c
index 850e412c83ef..d0bdf3dd4d25 100644
--- a/net/netfilter/xt_physdev.c
+++ b/net/netfilter/xt_physdev.c
@@ -89,7 +89,7 @@ static int physdev_mt_check(const struct xt_mtchk_param *par)
 
 	if (!(info->bitmask & XT_PHYSDEV_OP_MASK) ||
 	    info->bitmask & ~XT_PHYSDEV_OP_MASK)
-		return false;
+		return -EINVAL;
 	if (info->bitmask & XT_PHYSDEV_OP_OUT &&
 	    (!(info->bitmask & XT_PHYSDEV_OP_BRIDGED) ||
 	     info->invert & XT_PHYSDEV_OP_BRIDGED) &&
@@ -99,9 +99,9 @@ static int physdev_mt_check(const struct xt_mtchk_param *par)
 			"POSTROUTING chains for non-bridged traffic is not "
 			"supported anymore.\n");
 		if (par->hook_mask & (1 << NF_INET_LOCAL_OUT))
-			return false;
+			return -EINVAL;
 	}
-	return true;
+	return 0;
 }
 
 static struct xt_match physdev_mt_reg __read_mostly = {
diff --git a/net/netfilter/xt_policy.c b/net/netfilter/xt_policy.c
index c9965b640b16..1fa239c1fb93 100644
--- a/net/netfilter/xt_policy.c
+++ b/net/netfilter/xt_policy.c
@@ -134,23 +134,23 @@ static int policy_mt_check(const struct xt_mtchk_param *par)
 
 	if (!(info->flags & (XT_POLICY_MATCH_IN|XT_POLICY_MATCH_OUT))) {
 		pr_info("neither incoming nor outgoing policy selected\n");
-		return false;
+		return -EINVAL;
 	}
 	if (par->hook_mask & ((1 << NF_INET_PRE_ROUTING) |
 	    (1 << NF_INET_LOCAL_IN)) && info->flags & XT_POLICY_MATCH_OUT) {
 		pr_info("output policy not valid in PREROUTING and INPUT\n");
-		return false;
+		return -EINVAL;
 	}
 	if (par->hook_mask & ((1 << NF_INET_POST_ROUTING) |
 	    (1 << NF_INET_LOCAL_OUT)) && info->flags & XT_POLICY_MATCH_IN) {
 		pr_info("input policy not valid in POSTROUTING and OUTPUT\n");
-		return false;
+		return -EINVAL;
 	}
 	if (info->len > XT_POLICY_MAX_ELEM) {
 		pr_info("too many policy elements\n");
-		return false;
+		return -EINVAL;
 	}
-	return true;
+	return 0;
 }
 
 static struct xt_match policy_mt_reg[] __read_mostly = {
diff --git a/net/netfilter/xt_quota.c b/net/netfilter/xt_quota.c
index 2861fac5f2e1..766e71c6dc55 100644
--- a/net/netfilter/xt_quota.c
+++ b/net/netfilter/xt_quota.c
@@ -48,14 +48,14 @@ static int quota_mt_check(const struct xt_mtchk_param *par)
 	struct xt_quota_info *q = par->matchinfo;
 
 	if (q->flags & ~XT_QUOTA_MASK)
-		return false;
+		return -EINVAL;
 
 	q->master = kmalloc(sizeof(*q->master), GFP_KERNEL);
 	if (q->master == NULL)
-		return false;
+		return -EINVAL;
 
 	q->master->quota = q->quota;
-	return true;
+	return 0;
 }
 
 static void quota_mt_destroy(const struct xt_mtdtor_param *par)
diff --git a/net/netfilter/xt_rateest.c b/net/netfilter/xt_rateest.c
index 3b5e3d613b18..0b5c6122737d 100644
--- a/net/netfilter/xt_rateest.c
+++ b/net/netfilter/xt_rateest.c
@@ -109,12 +109,12 @@ static int xt_rateest_mt_checkentry(const struct xt_mtchk_param *par)
 
 	info->est1 = est1;
 	info->est2 = est2;
-	return true;
+	return 0;
 
 err2:
 	xt_rateest_put(est1);
 err1:
-	return false;
+	return -EINVAL;
 }
 
 static void xt_rateest_mt_destroy(const struct xt_mtdtor_param *par)
diff --git a/net/netfilter/xt_recent.c b/net/netfilter/xt_recent.c
index 52042c8bf7f2..0994ff54a731 100644
--- a/net/netfilter/xt_recent.c
+++ b/net/netfilter/xt_recent.c
@@ -314,7 +314,7 @@ static int recent_mt_check(const struct xt_mtchk_param *par)
 	struct proc_dir_entry *pde;
 #endif
 	unsigned i;
-	bool ret = false;
+	int ret = -EINVAL;
 
 	if (unlikely(!hash_rnd_inited)) {
 		get_random_bytes(&hash_rnd, sizeof(hash_rnd));
@@ -323,33 +323,33 @@ static int recent_mt_check(const struct xt_mtchk_param *par)
 	if (info->check_set & ~XT_RECENT_VALID_FLAGS) {
 		pr_info("Unsupported user space flags (%08x)\n",
 			info->check_set);
-		return false;
+		return -EINVAL;
 	}
 	if (hweight8(info->check_set &
 		     (XT_RECENT_SET | XT_RECENT_REMOVE |
 		      XT_RECENT_CHECK | XT_RECENT_UPDATE)) != 1)
-		return false;
+		return -EINVAL;
 	if ((info->check_set & (XT_RECENT_SET | XT_RECENT_REMOVE)) &&
 	    (info->seconds || info->hit_count ||
 	    (info->check_set & XT_RECENT_MODIFIERS)))
-		return false;
+		return -EINVAL;
 	if ((info->check_set & XT_RECENT_REAP) && !info->seconds)
-		return false;
+		return -EINVAL;
 	if (info->hit_count > ip_pkt_list_tot) {
 		pr_info("hitcount (%u) is larger than "
 			"packets to be remembered (%u)\n",
 			info->hit_count, ip_pkt_list_tot);
-		return false;
+		return -EINVAL;
 	}
 	if (info->name[0] == '\0' ||
 	    strnlen(info->name, XT_RECENT_NAME_LEN) == XT_RECENT_NAME_LEN)
-		return false;
+		return -EINVAL;
 
 	mutex_lock(&recent_mutex);
 	t = recent_table_lookup(recent_net, info->name);
 	if (t != NULL) {
 		t->refcnt++;
-		ret = true;
+		ret = 0;
 		goto out;
 	}
 
@@ -375,7 +375,7 @@ static int recent_mt_check(const struct xt_mtchk_param *par)
 	spin_lock_bh(&recent_lock);
 	list_add_tail(&t->list, &recent_net->tables);
 	spin_unlock_bh(&recent_lock);
-	ret = true;
+	ret = 0;
 out:
 	mutex_unlock(&recent_mutex);
 	return ret;
diff --git a/net/netfilter/xt_sctp.c b/net/netfilter/xt_sctp.c
index 5037a7a0059c..c3694df54672 100644
--- a/net/netfilter/xt_sctp.c
+++ b/net/netfilter/xt_sctp.c
@@ -149,17 +149,17 @@ static int sctp_mt_check(const struct xt_mtchk_param *par)
 	const struct xt_sctp_info *info = par->matchinfo;
 
 	if (info->flags & ~XT_SCTP_VALID_FLAGS)
-		return false;
+		return -EINVAL;
 	if (info->invflags & ~XT_SCTP_VALID_FLAGS)
-		return false;
+		return -EINVAL;
 	if (info->invflags & ~info->flags)
-		return false;
+		return -EINVAL;
 	if (!(info->flags & XT_SCTP_CHUNK_TYPES))
-		return true;
+		return 0;
 	if (info->chunk_match_type & (SCTP_CHUNK_MATCH_ALL |
 	    SCTP_CHUNK_MATCH_ANY | SCTP_CHUNK_MATCH_ONLY))
-		return true;
-	return false;
+		return 0;
+	return -EINVAL;
 }
 
 static struct xt_match sctp_mt_reg[] __read_mostly = {
diff --git a/net/netfilter/xt_state.c b/net/netfilter/xt_state.c
index 8b15b1317f1f..8e8c9df51784 100644
--- a/net/netfilter/xt_state.c
+++ b/net/netfilter/xt_state.c
@@ -42,9 +42,9 @@ static int state_mt_check(const struct xt_mtchk_param *par)
 	if (nf_ct_l3proto_try_module_get(par->family) < 0) {
 		pr_info("cannot load conntrack support for proto=%u\n",
 			par->family);
-		return false;
+		return -EINVAL;
 	}
-	return true;
+	return 0;
 }
 
 static void state_mt_destroy(const struct xt_mtdtor_param *par)
diff --git a/net/netfilter/xt_statistic.c b/net/netfilter/xt_statistic.c
index a577ab008f57..29d76f8f1880 100644
--- a/net/netfilter/xt_statistic.c
+++ b/net/netfilter/xt_statistic.c
@@ -58,14 +58,14 @@ static int statistic_mt_check(const struct xt_mtchk_param *par)
 
 	if (info->mode > XT_STATISTIC_MODE_MAX ||
 	    info->flags & ~XT_STATISTIC_MASK)
-		return false;
+		return -EINVAL;
 
 	info->master = kzalloc(sizeof(*info->master), GFP_KERNEL);
 	if (info->master == NULL)
-		return false;
+		return -EINVAL;
 	info->master->count = info->u.nth.count;
 
-	return true;
+	return 0;
 }
 
 static void statistic_mt_destroy(const struct xt_mtdtor_param *par)
diff --git a/net/netfilter/xt_string.c b/net/netfilter/xt_string.c
index 7d1412154e27..e1f22a7a4152 100644
--- a/net/netfilter/xt_string.c
+++ b/net/netfilter/xt_string.c
@@ -48,26 +48,25 @@ static int string_mt_check(const struct xt_mtchk_param *par)
 
 	/* Damn, can't handle this case properly with iptables... */
 	if (conf->from_offset > conf->to_offset)
-		return false;
+		return -EINVAL;
 	if (conf->algo[XT_STRING_MAX_ALGO_NAME_SIZE - 1] != '\0')
-		return false;
+		return -EINVAL;
 	if (conf->patlen > XT_STRING_MAX_PATTERN_SIZE)
-		return false;
+		return -EINVAL;
 	if (par->match->revision == 1) {
 		if (conf->u.v1.flags &
 		    ~(XT_STRING_FLAG_IGNORECASE | XT_STRING_FLAG_INVERT))
-			return false;
+			return -EINVAL;
 		if (conf->u.v1.flags & XT_STRING_FLAG_IGNORECASE)
 			flags |= TS_IGNORECASE;
 	}
 	ts_conf = textsearch_prepare(conf->algo, conf->pattern, conf->patlen,
 				     GFP_KERNEL, flags);
 	if (IS_ERR(ts_conf))
-		return false;
+		return -EINVAL;
 
 	conf->config = ts_conf;
-
-	return true;
+	return 0;
 }
 
 static void string_mt_destroy(const struct xt_mtdtor_param *par)
diff --git a/net/netfilter/xt_tcpudp.c b/net/netfilter/xt_tcpudp.c
index 00728410099f..efa2ede24ae6 100644
--- a/net/netfilter/xt_tcpudp.c
+++ b/net/netfilter/xt_tcpudp.c
@@ -125,7 +125,7 @@ static int tcp_mt_check(const struct xt_mtchk_param *par)
 	const struct xt_tcp *tcpinfo = par->matchinfo;
 
 	/* Must specify no unknown invflags */
-	return !(tcpinfo->invflags & ~XT_TCP_INV_MASK);
+	return (tcpinfo->invflags & ~XT_TCP_INV_MASK) ? -EINVAL : 0;
 }
 
 static bool udp_mt(const struct sk_buff *skb, const struct xt_match_param *par)
@@ -160,7 +160,7 @@ static int udp_mt_check(const struct xt_mtchk_param *par)
 	const struct xt_udp *udpinfo = par->matchinfo;
 
 	/* Must specify no unknown invflags */
-	return !(udpinfo->invflags & ~XT_UDP_INV_MASK);
+	return (udpinfo->invflags & ~XT_UDP_INV_MASK) ? -EINVAL : 0;
 }
 
 static struct xt_match tcpudp_mt_reg[] __read_mostly = {
diff --git a/net/netfilter/xt_time.c b/net/netfilter/xt_time.c
index db74f4fd57df..8dde5e51ff19 100644
--- a/net/netfilter/xt_time.c
+++ b/net/netfilter/xt_time.c
@@ -225,10 +225,10 @@ static int time_mt_check(const struct xt_mtchk_param *par)
 	    info->daytime_stop > XT_TIME_MAX_DAYTIME) {
 		pr_info("invalid argument - start or "
 			"stop time greater than 23:59:59\n");
-		return false;
+		return -EINVAL;
 	}
 
-	return true;
+	return 0;
 }
 
 static struct xt_match xt_time_mt_reg __read_mostly = {
-- 
cgit v1.2.2


From d6b00a5345ce4e86e8b00a88bb84a2c0c1f69ddc Mon Sep 17 00:00:00 2001
From: Jan Engelhardt <jengelh@medozas.de>
Date: Thu, 25 Mar 2010 16:34:45 +0100
Subject: netfilter: xtables: change targets to return error code

Part of the transition of done by this semantic patch:
// <smpl>
@ rule1 @
struct xt_target ops;
identifier check;
@@
 ops.checkentry = check;

@@
identifier rule1.check;
@@
 check(...) { <...
-return true;
+return 0;
 ...> }

@@
identifier rule1.check;
@@
 check(...) { <...
-return false;
+return -EINVAL;
 ...> }
// </smpl>

Signed-off-by: Jan Engelhardt <jengelh@medozas.de>
---
 net/bridge/netfilter/ebt_arpreply.c |  6 +++---
 net/bridge/netfilter/ebt_dnat.c     |  8 ++++----
 net/bridge/netfilter/ebt_log.c      |  6 +++---
 net/bridge/netfilter/ebt_mark.c     |  8 ++++----
 net/bridge/netfilter/ebt_nflog.c    |  4 ++--
 net/bridge/netfilter/ebt_redirect.c |  8 ++++----
 net/bridge/netfilter/ebt_snat.c     |  8 ++++----
 net/bridge/netfilter/ebt_ulog.c     |  4 ++--
 net/ipv4/netfilter/ipt_CLUSTERIP.c  | 16 ++++++++--------
 net/ipv4/netfilter/ipt_ECN.c        |  8 ++++----
 net/ipv4/netfilter/ipt_LOG.c        |  6 +++---
 net/ipv4/netfilter/ipt_MASQUERADE.c |  6 +++---
 net/ipv4/netfilter/ipt_NETMAP.c     |  6 +++---
 net/ipv4/netfilter/ipt_REDIRECT.c   |  6 +++---
 net/ipv4/netfilter/ipt_REJECT.c     |  6 +++---
 net/ipv4/netfilter/ipt_ULOG.c       |  6 +++---
 net/ipv4/netfilter/nf_nat_rule.c    |  8 ++++----
 net/ipv6/netfilter/ip6t_LOG.c       |  6 +++---
 net/ipv6/netfilter/ip6t_REJECT.c    |  6 +++---
 net/netfilter/x_tables.c            | 12 ++++++++++--
 net/netfilter/xt_CONNSECMARK.c      |  6 +++---
 net/netfilter/xt_CT.c               |  6 +++---
 net/netfilter/xt_DSCP.c             |  4 ++--
 net/netfilter/xt_HL.c               | 10 +++++-----
 net/netfilter/xt_LED.c              | 10 ++++------
 net/netfilter/xt_NFLOG.c            |  6 +++---
 net/netfilter/xt_NFQUEUE.c          |  6 +++---
 net/netfilter/xt_RATEEST.c          |  9 ++++-----
 net/netfilter/xt_SECMARK.c          | 10 +++++-----
 net/netfilter/xt_TCPMSS.c           | 12 ++++++------
 net/netfilter/xt_TPROXY.c           |  4 ++--
 31 files changed, 116 insertions(+), 111 deletions(-)

(limited to 'net')

diff --git a/net/bridge/netfilter/ebt_arpreply.c b/net/bridge/netfilter/ebt_arpreply.c
index 2491564e9e08..4581adb27583 100644
--- a/net/bridge/netfilter/ebt_arpreply.c
+++ b/net/bridge/netfilter/ebt_arpreply.c
@@ -63,11 +63,11 @@ static int ebt_arpreply_tg_check(const struct xt_tgchk_param *par)
 	const struct ebt_entry *e = par->entryinfo;
 
 	if (BASE_CHAIN && info->target == EBT_RETURN)
-		return false;
+		return -EINVAL;
 	if (e->ethproto != htons(ETH_P_ARP) ||
 	    e->invflags & EBT_IPROTO)
-		return false;
-	return true;
+		return -EINVAL;
+	return 0;
 }
 
 static struct xt_target ebt_arpreply_tg_reg __read_mostly = {
diff --git a/net/bridge/netfilter/ebt_dnat.c b/net/bridge/netfilter/ebt_dnat.c
index 5fddebea45c2..59d5b7c8a557 100644
--- a/net/bridge/netfilter/ebt_dnat.c
+++ b/net/bridge/netfilter/ebt_dnat.c
@@ -32,7 +32,7 @@ static int ebt_dnat_tg_check(const struct xt_tgchk_param *par)
 	unsigned int hook_mask;
 
 	if (BASE_CHAIN && info->target == EBT_RETURN)
-		return false;
+		return -EINVAL;
 
 	hook_mask = par->hook_mask & ~(1 << NF_BR_NUMHOOKS);
 	if ((strcmp(par->table, "nat") != 0 ||
@@ -40,10 +40,10 @@ static int ebt_dnat_tg_check(const struct xt_tgchk_param *par)
 	    (1 << NF_BR_LOCAL_OUT)))) &&
 	    (strcmp(par->table, "broute") != 0 ||
 	    hook_mask & ~(1 << NF_BR_BROUTING)))
-		return false;
+		return -EINVAL;
 	if (INVALID_TARGET)
-		return false;
-	return true;
+		return -EINVAL;
+	return 0;
 }
 
 static struct xt_target ebt_dnat_tg_reg __read_mostly = {
diff --git a/net/bridge/netfilter/ebt_log.c b/net/bridge/netfilter/ebt_log.c
index a0aeac6176ee..c46024156539 100644
--- a/net/bridge/netfilter/ebt_log.c
+++ b/net/bridge/netfilter/ebt_log.c
@@ -29,11 +29,11 @@ static int ebt_log_tg_check(const struct xt_tgchk_param *par)
 	struct ebt_log_info *info = par->targinfo;
 
 	if (info->bitmask & ~EBT_LOG_MASK)
-		return false;
+		return -EINVAL;
 	if (info->loglevel >= 8)
-		return false;
+		return -EINVAL;
 	info->prefix[EBT_LOG_PREFIX_SIZE - 1] = '\0';
-	return true;
+	return 0;
 }
 
 struct tcpudphdr
diff --git a/net/bridge/netfilter/ebt_mark.c b/net/bridge/netfilter/ebt_mark.c
index dd94dafa6155..126e536ff8f4 100644
--- a/net/bridge/netfilter/ebt_mark.c
+++ b/net/bridge/netfilter/ebt_mark.c
@@ -43,14 +43,14 @@ static int ebt_mark_tg_check(const struct xt_tgchk_param *par)
 
 	tmp = info->target | ~EBT_VERDICT_BITS;
 	if (BASE_CHAIN && tmp == EBT_RETURN)
-		return false;
+		return -EINVAL;
 	if (tmp < -NUM_STANDARD_TARGETS || tmp >= 0)
-		return false;
+		return -EINVAL;
 	tmp = info->target & ~EBT_VERDICT_BITS;
 	if (tmp != MARK_SET_VALUE && tmp != MARK_OR_VALUE &&
 	    tmp != MARK_AND_VALUE && tmp != MARK_XOR_VALUE)
-		return false;
-	return true;
+		return -EINVAL;
+	return 0;
 }
 #ifdef CONFIG_COMPAT
 struct compat_ebt_mark_t_info {
diff --git a/net/bridge/netfilter/ebt_nflog.c b/net/bridge/netfilter/ebt_nflog.c
index 1f2b7bbdde73..22e2ad5f23e8 100644
--- a/net/bridge/netfilter/ebt_nflog.c
+++ b/net/bridge/netfilter/ebt_nflog.c
@@ -40,9 +40,9 @@ static int ebt_nflog_tg_check(const struct xt_tgchk_param *par)
 	struct ebt_nflog_info *info = par->targinfo;
 
 	if (info->flags & ~EBT_NFLOG_MASK)
-		return false;
+		return -EINVAL;
 	info->prefix[EBT_NFLOG_PREFIX_SIZE - 1] = '\0';
-	return true;
+	return 0;
 }
 
 static struct xt_target ebt_nflog_tg_reg __read_mostly = {
diff --git a/net/bridge/netfilter/ebt_redirect.c b/net/bridge/netfilter/ebt_redirect.c
index 73c4d3ac6f2e..a6044a6f2383 100644
--- a/net/bridge/netfilter/ebt_redirect.c
+++ b/net/bridge/netfilter/ebt_redirect.c
@@ -38,17 +38,17 @@ static int ebt_redirect_tg_check(const struct xt_tgchk_param *par)
 	unsigned int hook_mask;
 
 	if (BASE_CHAIN && info->target == EBT_RETURN)
-		return false;
+		return -EINVAL;
 
 	hook_mask = par->hook_mask & ~(1 << NF_BR_NUMHOOKS);
 	if ((strcmp(par->table, "nat") != 0 ||
 	    hook_mask & ~(1 << NF_BR_PRE_ROUTING)) &&
 	    (strcmp(par->table, "broute") != 0 ||
 	    hook_mask & ~(1 << NF_BR_BROUTING)))
-		return false;
+		return -EINVAL;
 	if (INVALID_TARGET)
-		return false;
-	return true;
+		return -EINVAL;
+	return 0;
 }
 
 static struct xt_target ebt_redirect_tg_reg __read_mostly = {
diff --git a/net/bridge/netfilter/ebt_snat.c b/net/bridge/netfilter/ebt_snat.c
index 94bcecd90d74..79caca34ae2b 100644
--- a/net/bridge/netfilter/ebt_snat.c
+++ b/net/bridge/netfilter/ebt_snat.c
@@ -49,14 +49,14 @@ static int ebt_snat_tg_check(const struct xt_tgchk_param *par)
 
 	tmp = info->target | ~EBT_VERDICT_BITS;
 	if (BASE_CHAIN && tmp == EBT_RETURN)
-		return false;
+		return -EINVAL;
 
 	if (tmp < -NUM_STANDARD_TARGETS || tmp >= 0)
-		return false;
+		return -EINVAL;
 	tmp = info->target | EBT_VERDICT_BITS;
 	if ((tmp & ~NAT_ARP_BIT) != ~NAT_ARP_BIT)
-		return false;
-	return true;
+		return -EINVAL;
+	return 0;
 }
 
 static struct xt_target ebt_snat_tg_reg __read_mostly = {
diff --git a/net/bridge/netfilter/ebt_ulog.c b/net/bridge/netfilter/ebt_ulog.c
index f554bc2515d6..f77b42d8e87d 100644
--- a/net/bridge/netfilter/ebt_ulog.c
+++ b/net/bridge/netfilter/ebt_ulog.c
@@ -254,14 +254,14 @@ static int ebt_ulog_tg_check(const struct xt_tgchk_param *par)
 	struct ebt_ulog_info *uloginfo = par->targinfo;
 
 	if (uloginfo->nlgroup > 31)
-		return false;
+		return -EINVAL;
 
 	uloginfo->prefix[EBT_ULOG_PREFIX_LEN - 1] = '\0';
 
 	if (uloginfo->qthreshold > EBT_ULOG_MAX_QLEN)
 		uloginfo->qthreshold = EBT_ULOG_MAX_QLEN;
 
-	return true;
+	return 0;
 }
 
 static struct xt_target ebt_ulog_tg_reg __read_mostly = {
diff --git a/net/ipv4/netfilter/ipt_CLUSTERIP.c b/net/ipv4/netfilter/ipt_CLUSTERIP.c
index 290a7b9b393e..1302de2ae0ae 100644
--- a/net/ipv4/netfilter/ipt_CLUSTERIP.c
+++ b/net/ipv4/netfilter/ipt_CLUSTERIP.c
@@ -358,13 +358,13 @@ static int clusterip_tg_check(const struct xt_tgchk_param *par)
 	    cipinfo->hash_mode != CLUSTERIP_HASHMODE_SIP_SPT &&
 	    cipinfo->hash_mode != CLUSTERIP_HASHMODE_SIP_SPT_DPT) {
 		pr_info("unknown mode %u\n", cipinfo->hash_mode);
-		return false;
+		return -EINVAL;
 
 	}
 	if (e->ip.dmsk.s_addr != htonl(0xffffffff) ||
 	    e->ip.dst.s_addr == 0) {
 		pr_info("Please specify destination IP\n");
-		return false;
+		return -EINVAL;
 	}
 
 	/* FIXME: further sanity checks */
@@ -374,20 +374,20 @@ static int clusterip_tg_check(const struct xt_tgchk_param *par)
 		if (!(cipinfo->flags & CLUSTERIP_FLAG_NEW)) {
 			pr_info("no config found for %pI4, need 'new'\n",
 				&e->ip.dst.s_addr);
-			return false;
+			return -EINVAL;
 		} else {
 			struct net_device *dev;
 
 			if (e->ip.iniface[0] == '\0') {
 				pr_info("Please specify an interface name\n");
-				return false;
+				return -EINVAL;
 			}
 
 			dev = dev_get_by_name(&init_net, e->ip.iniface);
 			if (!dev) {
 				pr_info("no such interface %s\n",
 					e->ip.iniface);
-				return false;
+				return -EINVAL;
 			}
 
 			config = clusterip_config_init(cipinfo,
@@ -395,7 +395,7 @@ static int clusterip_tg_check(const struct xt_tgchk_param *par)
 			if (!config) {
 				pr_info("cannot allocate config\n");
 				dev_put(dev);
-				return false;
+				return -EINVAL;
 			}
 			dev_mc_add(config->dev,config->clustermac, ETH_ALEN, 0);
 		}
@@ -405,10 +405,10 @@ static int clusterip_tg_check(const struct xt_tgchk_param *par)
 	if (nf_ct_l3proto_try_module_get(par->family) < 0) {
 		pr_info("cannot load conntrack support for proto=%u\n",
 			par->family);
-		return false;
+		return -EINVAL;
 	}
 
-	return true;
+	return 0;
 }
 
 /* drop reference count of cluster config when rule is deleted */
diff --git a/net/ipv4/netfilter/ipt_ECN.c b/net/ipv4/netfilter/ipt_ECN.c
index 9d96500a4157..563049f31aef 100644
--- a/net/ipv4/netfilter/ipt_ECN.c
+++ b/net/ipv4/netfilter/ipt_ECN.c
@@ -100,18 +100,18 @@ static int ecn_tg_check(const struct xt_tgchk_param *par)
 
 	if (einfo->operation & IPT_ECN_OP_MASK) {
 		pr_info("unsupported ECN operation %x\n", einfo->operation);
-		return false;
+		return -EINVAL;
 	}
 	if (einfo->ip_ect & ~IPT_ECN_IP_MASK) {
 		pr_info("new ECT codepoint %x out of mask\n", einfo->ip_ect);
-		return false;
+		return -EINVAL;
 	}
 	if ((einfo->operation & (IPT_ECN_OP_SET_ECE|IPT_ECN_OP_SET_CWR)) &&
 	    (e->ip.proto != IPPROTO_TCP || (e->ip.invflags & XT_INV_PROTO))) {
 		pr_info("cannot use TCP operations on a non-tcp rule\n");
-		return false;
+		return -EINVAL;
 	}
-	return true;
+	return 0;
 }
 
 static struct xt_target ecn_tg_reg __read_mostly = {
diff --git a/net/ipv4/netfilter/ipt_LOG.c b/net/ipv4/netfilter/ipt_LOG.c
index c9ee5c40d1bb..a6a454b25502 100644
--- a/net/ipv4/netfilter/ipt_LOG.c
+++ b/net/ipv4/netfilter/ipt_LOG.c
@@ -445,13 +445,13 @@ static int log_tg_check(const struct xt_tgchk_param *par)
 
 	if (loginfo->level >= 8) {
 		pr_debug("level %u >= 8\n", loginfo->level);
-		return false;
+		return -EINVAL;
 	}
 	if (loginfo->prefix[sizeof(loginfo->prefix)-1] != '\0') {
 		pr_debug("prefix is not null-terminated\n");
-		return false;
+		return -EINVAL;
 	}
-	return true;
+	return 0;
 }
 
 static struct xt_target log_tg_reg __read_mostly = {
diff --git a/net/ipv4/netfilter/ipt_MASQUERADE.c b/net/ipv4/netfilter/ipt_MASQUERADE.c
index 5a182f6de5d5..02b1bc477998 100644
--- a/net/ipv4/netfilter/ipt_MASQUERADE.c
+++ b/net/ipv4/netfilter/ipt_MASQUERADE.c
@@ -34,13 +34,13 @@ static int masquerade_tg_check(const struct xt_tgchk_param *par)
 
 	if (mr->range[0].flags & IP_NAT_RANGE_MAP_IPS) {
 		pr_debug("bad MAP_IPS.\n");
-		return false;
+		return -EINVAL;
 	}
 	if (mr->rangesize != 1) {
 		pr_debug("bad rangesize %u\n", mr->rangesize);
-		return false;
+		return -EINVAL;
 	}
-	return true;
+	return 0;
 }
 
 static unsigned int
diff --git a/net/ipv4/netfilter/ipt_NETMAP.c b/net/ipv4/netfilter/ipt_NETMAP.c
index cbfe5f7e082a..708c7f8f7eea 100644
--- a/net/ipv4/netfilter/ipt_NETMAP.c
+++ b/net/ipv4/netfilter/ipt_NETMAP.c
@@ -28,13 +28,13 @@ static int netmap_tg_check(const struct xt_tgchk_param *par)
 
 	if (!(mr->range[0].flags & IP_NAT_RANGE_MAP_IPS)) {
 		pr_debug("bad MAP_IPS.\n");
-		return false;
+		return -EINVAL;
 	}
 	if (mr->rangesize != 1) {
 		pr_debug("bad rangesize %u.\n", mr->rangesize);
-		return false;
+		return -EINVAL;
 	}
-	return true;
+	return 0;
 }
 
 static unsigned int
diff --git a/net/ipv4/netfilter/ipt_REDIRECT.c b/net/ipv4/netfilter/ipt_REDIRECT.c
index f8daec20fb04..3cf101916523 100644
--- a/net/ipv4/netfilter/ipt_REDIRECT.c
+++ b/net/ipv4/netfilter/ipt_REDIRECT.c
@@ -32,13 +32,13 @@ static int redirect_tg_check(const struct xt_tgchk_param *par)
 
 	if (mr->range[0].flags & IP_NAT_RANGE_MAP_IPS) {
 		pr_debug("bad MAP_IPS.\n");
-		return false;
+		return -EINVAL;
 	}
 	if (mr->rangesize != 1) {
 		pr_debug("bad rangesize %u.\n", mr->rangesize);
-		return false;
+		return -EINVAL;
 	}
-	return true;
+	return 0;
 }
 
 static unsigned int
diff --git a/net/ipv4/netfilter/ipt_REJECT.c b/net/ipv4/netfilter/ipt_REJECT.c
index cf76f1bc3f10..b026014e7a5b 100644
--- a/net/ipv4/netfilter/ipt_REJECT.c
+++ b/net/ipv4/netfilter/ipt_REJECT.c
@@ -181,16 +181,16 @@ static int reject_tg_check(const struct xt_tgchk_param *par)
 
 	if (rejinfo->with == IPT_ICMP_ECHOREPLY) {
 		pr_info("ECHOREPLY no longer supported.\n");
-		return false;
+		return -EINVAL;
 	} else if (rejinfo->with == IPT_TCP_RESET) {
 		/* Must specify that it's a TCP packet */
 		if (e->ip.proto != IPPROTO_TCP ||
 		    (e->ip.invflags & XT_INV_PROTO)) {
 			pr_info("TCP_RESET invalid for non-tcp\n");
-			return false;
+			return -EINVAL;
 		}
 	}
-	return true;
+	return 0;
 }
 
 static struct xt_target reject_tg_reg __read_mostly = {
diff --git a/net/ipv4/netfilter/ipt_ULOG.c b/net/ipv4/netfilter/ipt_ULOG.c
index 7f73bbe2193c..04c86dc5d538 100644
--- a/net/ipv4/netfilter/ipt_ULOG.c
+++ b/net/ipv4/netfilter/ipt_ULOG.c
@@ -313,14 +313,14 @@ static int ulog_tg_check(const struct xt_tgchk_param *par)
 
 	if (loginfo->prefix[sizeof(loginfo->prefix) - 1] != '\0') {
 		pr_debug("prefix not null-terminated\n");
-		return false;
+		return -EINVAL;
 	}
 	if (loginfo->qthreshold > ULOG_MAX_QLEN) {
 		pr_debug("queue threshold %Zu > MAX_QLEN\n",
 			 loginfo->qthreshold);
-		return false;
+		return -EINVAL;
 	}
-	return true;
+	return 0;
 }
 
 #ifdef CONFIG_COMPAT
diff --git a/net/ipv4/netfilter/nf_nat_rule.c b/net/ipv4/netfilter/nf_nat_rule.c
index 117226708738..b66137c80bc7 100644
--- a/net/ipv4/netfilter/nf_nat_rule.c
+++ b/net/ipv4/netfilter/nf_nat_rule.c
@@ -81,9 +81,9 @@ static int ipt_snat_checkentry(const struct xt_tgchk_param *par)
 	/* Must be a valid range */
 	if (mr->rangesize != 1) {
 		pr_info("SNAT: multiple ranges no longer supported\n");
-		return false;
+		return -EINVAL;
 	}
-	return true;
+	return 0;
 }
 
 static int ipt_dnat_checkentry(const struct xt_tgchk_param *par)
@@ -93,9 +93,9 @@ static int ipt_dnat_checkentry(const struct xt_tgchk_param *par)
 	/* Must be a valid range */
 	if (mr->rangesize != 1) {
 		pr_info("DNAT: multiple ranges no longer supported\n");
-		return false;
+		return -EINVAL;
 	}
-	return true;
+	return 0;
 }
 
 unsigned int
diff --git a/net/ipv6/netfilter/ip6t_LOG.c b/net/ipv6/netfilter/ip6t_LOG.c
index bcc3fc19374a..439ededd5300 100644
--- a/net/ipv6/netfilter/ip6t_LOG.c
+++ b/net/ipv6/netfilter/ip6t_LOG.c
@@ -457,13 +457,13 @@ static int log_tg6_check(const struct xt_tgchk_param *par)
 
 	if (loginfo->level >= 8) {
 		pr_debug("level %u >= 8\n", loginfo->level);
-		return false;
+		return -EINVAL;
 	}
 	if (loginfo->prefix[sizeof(loginfo->prefix)-1] != '\0') {
 		pr_debug("prefix not null-terminated\n");
-		return false;
+		return -EINVAL;
 	}
-	return true;
+	return 0;
 }
 
 static struct xt_target log_tg6_reg __read_mostly = {
diff --git a/net/ipv6/netfilter/ip6t_REJECT.c b/net/ipv6/netfilter/ip6t_REJECT.c
index 8d5141ece671..55b9b2da1340 100644
--- a/net/ipv6/netfilter/ip6t_REJECT.c
+++ b/net/ipv6/netfilter/ip6t_REJECT.c
@@ -220,16 +220,16 @@ static int reject_tg6_check(const struct xt_tgchk_param *par)
 
 	if (rejinfo->with == IP6T_ICMP6_ECHOREPLY) {
 		pr_info("ECHOREPLY is not supported.\n");
-		return false;
+		return -EINVAL;
 	} else if (rejinfo->with == IP6T_TCP_RESET) {
 		/* Must specify that it's a TCP packet */
 		if (e->ipv6.proto != IPPROTO_TCP ||
 		    (e->ipv6.invflags & XT_INV_PROTO)) {
 			pr_info("TCP_RESET illegal for non-tcp\n");
-			return false;
+			return -EINVAL;
 		}
 	}
-	return true;
+	return 0;
 }
 
 static struct xt_target reject_tg6_reg __read_mostly = {
diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c
index 7ee177746172..8e23d8f68459 100644
--- a/net/netfilter/x_tables.c
+++ b/net/netfilter/x_tables.c
@@ -528,6 +528,8 @@ EXPORT_SYMBOL_GPL(xt_compat_match_to_user);
 int xt_check_target(struct xt_tgchk_param *par,
 		    unsigned int size, u_int8_t proto, bool inv_proto)
 {
+	int ret;
+
 	if (XT_ALIGN(par->target->targetsize) != size) {
 		pr_err("%s_tables: %s.%u target: invalid size "
 		       "%u (kernel) != (user) %u\n",
@@ -559,8 +561,14 @@ int xt_check_target(struct xt_tgchk_param *par,
 		       par->target->proto);
 		return -EINVAL;
 	}
-	if (par->target->checkentry != NULL && !par->target->checkentry(par))
-		return -EINVAL;
+	if (par->target->checkentry != NULL) {
+		ret = par->target->checkentry(par);
+		if (ret < 0)
+			return ret;
+		else if (ret > 0)
+			/* Flag up potential errors. */
+			return -EIO;
+	}
 	return 0;
 }
 EXPORT_SYMBOL_GPL(xt_check_target);
diff --git a/net/netfilter/xt_CONNSECMARK.c b/net/netfilter/xt_CONNSECMARK.c
index 3f9d0f4f852d..2287a82a0703 100644
--- a/net/netfilter/xt_CONNSECMARK.c
+++ b/net/netfilter/xt_CONNSECMARK.c
@@ -92,7 +92,7 @@ static int connsecmark_tg_check(const struct xt_tgchk_param *par)
 	    strcmp(par->table, "security") != 0) {
 		pr_info("target only valid in the \'mangle\' "
 			"or \'security\' tables, not \'%s\'.\n", par->table);
-		return false;
+		return -EINVAL;
 	}
 
 	switch (info->mode) {
@@ -108,9 +108,9 @@ static int connsecmark_tg_check(const struct xt_tgchk_param *par)
 	if (nf_ct_l3proto_try_module_get(par->family) < 0) {
 		pr_info("cannot load conntrack support for proto=%u\n",
 			par->family);
-		return false;
+		return -EINVAL;
 	}
-	return true;
+	return 0;
 }
 
 static void connsecmark_tg_destroy(const struct xt_tgdtor_param *par)
diff --git a/net/netfilter/xt_CT.c b/net/netfilter/xt_CT.c
index c1553bf06cf6..ee566e2e4534 100644
--- a/net/netfilter/xt_CT.c
+++ b/net/netfilter/xt_CT.c
@@ -62,7 +62,7 @@ static int xt_ct_tg_check(const struct xt_tgchk_param *par)
 	u8 proto;
 
 	if (info->flags & ~XT_CT_NOTRACK)
-		return false;
+		return -EINVAL;
 
 	if (info->flags & XT_CT_NOTRACK) {
 		ct = &nf_conntrack_untracked;
@@ -108,14 +108,14 @@ static int xt_ct_tg_check(const struct xt_tgchk_param *par)
 	__set_bit(IPS_CONFIRMED_BIT, &ct->status);
 out:
 	info->ct = ct;
-	return true;
+	return 0;
 
 err3:
 	nf_conntrack_free(ct);
 err2:
 	nf_ct_l3proto_module_put(par->family);
 err1:
-	return false;
+	return -EINVAL;
 }
 
 static void xt_ct_tg_destroy(const struct xt_tgdtor_param *par)
diff --git a/net/netfilter/xt_DSCP.c b/net/netfilter/xt_DSCP.c
index 1fa7b67bf225..aa263b80f8c0 100644
--- a/net/netfilter/xt_DSCP.c
+++ b/net/netfilter/xt_DSCP.c
@@ -66,9 +66,9 @@ static int dscp_tg_check(const struct xt_tgchk_param *par)
 
 	if (info->dscp > XT_DSCP_MAX) {
 		pr_info("dscp %x out of range\n", info->dscp);
-		return false;
+		return -EINVAL;
 	}
-	return true;
+	return 0;
 }
 
 static unsigned int
diff --git a/net/netfilter/xt_HL.c b/net/netfilter/xt_HL.c
index 15ba16108182..7a47383ec723 100644
--- a/net/netfilter/xt_HL.c
+++ b/net/netfilter/xt_HL.c
@@ -110,8 +110,8 @@ static int ttl_tg_check(const struct xt_tgchk_param *par)
 		return false;
 	}
 	if (info->mode != IPT_TTL_SET && info->ttl == 0)
-		return false;
-	return true;
+		return -EINVAL;
+	return 0;
 }
 
 static int hl_tg6_check(const struct xt_tgchk_param *par)
@@ -120,14 +120,14 @@ static int hl_tg6_check(const struct xt_tgchk_param *par)
 
 	if (info->mode > IP6T_HL_MAXMODE) {
 		pr_info("invalid or unknown mode %u\n", info->mode);
-		return false;
+		return -EINVAL;
 	}
 	if (info->mode != IP6T_HL_SET && info->hop_limit == 0) {
 		pr_info("increment/decrement does not "
 			"make sense with value 0\n");
-		return false;
+		return -EINVAL;
 	}
-	return true;
+	return 0;
 }
 
 static struct xt_target hl_tg_reg[] __read_mostly = {
diff --git a/net/netfilter/xt_LED.c b/net/netfilter/xt_LED.c
index 1a3e3dd5a774..22b5b7057397 100644
--- a/net/netfilter/xt_LED.c
+++ b/net/netfilter/xt_LED.c
@@ -88,12 +88,12 @@ static int led_tg_check(const struct xt_tgchk_param *par)
 
 	if (ledinfo->id[0] == '\0') {
 		pr_info("No 'id' parameter given.\n");
-		return false;
+		return -EINVAL;
 	}
 
 	ledinternal = kzalloc(sizeof(struct xt_led_info_internal), GFP_KERNEL);
 	if (!ledinternal)
-		return false;
+		return -EINVAL;
 
 	ledinternal->netfilter_led_trigger.name = ledinfo->id;
 
@@ -111,13 +111,11 @@ static int led_tg_check(const struct xt_tgchk_param *par)
 			    (unsigned long)ledinfo);
 
 	ledinfo->internal_data = ledinternal;
-
-	return true;
+	return 0;
 
 exit_alloc:
 	kfree(ledinternal);
-
-	return false;
+	return -EINVAL;
 }
 
 static void led_tg_destroy(const struct xt_tgdtor_param *par)
diff --git a/net/netfilter/xt_NFLOG.c b/net/netfilter/xt_NFLOG.c
index 13e6c0002c8a..42dd8747b421 100644
--- a/net/netfilter/xt_NFLOG.c
+++ b/net/netfilter/xt_NFLOG.c
@@ -42,10 +42,10 @@ static int nflog_tg_check(const struct xt_tgchk_param *par)
 	const struct xt_nflog_info *info = par->targinfo;
 
 	if (info->flags & ~XT_NFLOG_MASK)
-		return false;
+		return -EINVAL;
 	if (info->prefix[sizeof(info->prefix) - 1] != '\0')
-		return false;
-	return true;
+		return -EINVAL;
+	return 0;
 }
 
 static struct xt_target nflog_tg_reg __read_mostly = {
diff --git a/net/netfilter/xt_NFQUEUE.c b/net/netfilter/xt_NFQUEUE.c
index d435579a64ca..add1789ae4a8 100644
--- a/net/netfilter/xt_NFQUEUE.c
+++ b/net/netfilter/xt_NFQUEUE.c
@@ -92,15 +92,15 @@ static int nfqueue_tg_v1_check(const struct xt_tgchk_param *par)
 	}
 	if (info->queues_total == 0) {
 		pr_err("NFQUEUE: number of total queues is 0\n");
-		return false;
+		return -EINVAL;
 	}
 	maxid = info->queues_total - 1 + info->queuenum;
 	if (maxid > 0xffff) {
 		pr_err("NFQUEUE: number of queues (%u) out of range (got %u)\n",
 		       info->queues_total, maxid);
-		return false;
+		return -EINVAL;
 	}
-	return true;
+	return 0;
 }
 
 static struct xt_target nfqueue_tg_reg[] __read_mostly = {
diff --git a/net/netfilter/xt_RATEEST.c b/net/netfilter/xt_RATEEST.c
index 9743e50be8ef..7af5fba39cdd 100644
--- a/net/netfilter/xt_RATEEST.c
+++ b/net/netfilter/xt_RATEEST.c
@@ -109,10 +109,10 @@ static int xt_rateest_tg_checkentry(const struct xt_tgchk_param *par)
 		    (info->interval != est->params.interval ||
 		     info->ewma_log != est->params.ewma_log)) {
 			xt_rateest_put(est);
-			return false;
+			return -EINVAL;
 		}
 		info->est = est;
-		return true;
+		return 0;
 	}
 
 	est = kzalloc(sizeof(*est), GFP_KERNEL);
@@ -136,13 +136,12 @@ static int xt_rateest_tg_checkentry(const struct xt_tgchk_param *par)
 
 	info->est = est;
 	xt_rateest_hash_insert(est);
-
-	return true;
+	return 0;
 
 err2:
 	kfree(est);
 err1:
-	return false;
+	return -EINVAL;
 }
 
 static void xt_rateest_tg_destroy(const struct xt_tgdtor_param *par)
diff --git a/net/netfilter/xt_SECMARK.c b/net/netfilter/xt_SECMARK.c
index 48f8e4f7ea8a..39098fc9887d 100644
--- a/net/netfilter/xt_SECMARK.c
+++ b/net/netfilter/xt_SECMARK.c
@@ -88,29 +88,29 @@ static int secmark_tg_check(const struct xt_tgchk_param *par)
 	    strcmp(par->table, "security") != 0) {
 		pr_info("target only valid in the \'mangle\' "
 			"or \'security\' tables, not \'%s\'.\n", par->table);
-		return false;
+		return -EINVAL;
 	}
 
 	if (mode && mode != info->mode) {
 		pr_info("mode already set to %hu cannot mix with "
 			"rules for mode %hu\n", mode, info->mode);
-		return false;
+		return -EINVAL;
 	}
 
 	switch (info->mode) {
 	case SECMARK_MODE_SEL:
 		if (!checkentry_selinux(info))
-			return false;
+			return -EINVAL;
 		break;
 
 	default:
 		pr_info("invalid mode: %hu\n", info->mode);
-		return false;
+		return -EINVAL;
 	}
 
 	if (!mode)
 		mode = info->mode;
-	return true;
+	return 0;
 }
 
 static void secmark_tg_destroy(const struct xt_tgdtor_param *par)
diff --git a/net/netfilter/xt_TCPMSS.c b/net/netfilter/xt_TCPMSS.c
index 70288dc31583..385677b963d5 100644
--- a/net/netfilter/xt_TCPMSS.c
+++ b/net/netfilter/xt_TCPMSS.c
@@ -246,13 +246,13 @@ static int tcpmss_tg4_check(const struct xt_tgchk_param *par)
 			   (1 << NF_INET_POST_ROUTING))) != 0) {
 		pr_info("path-MTU clamping only supported in "
 			"FORWARD, OUTPUT and POSTROUTING hooks\n");
-		return false;
+		return -EINVAL;
 	}
 	xt_ematch_foreach(ematch, e)
 		if (find_syn_match(ematch))
-			return true;
+			return 0;
 	pr_info("Only works on TCP SYN packets\n");
-	return false;
+	return -EINVAL;
 }
 
 #if defined(CONFIG_IP6_NF_IPTABLES) || defined(CONFIG_IP6_NF_IPTABLES_MODULE)
@@ -268,13 +268,13 @@ static int tcpmss_tg6_check(const struct xt_tgchk_param *par)
 			   (1 << NF_INET_POST_ROUTING))) != 0) {
 		pr_info("path-MTU clamping only supported in "
 			"FORWARD, OUTPUT and POSTROUTING hooks\n");
-		return false;
+		return -EINVAL;
 	}
 	xt_ematch_foreach(ematch, e)
 		if (find_syn_match(ematch))
-			return true;
+			return 0;
 	pr_info("Only works on TCP SYN packets\n");
-	return false;
+	return -EINVAL;
 }
 #endif
 
diff --git a/net/netfilter/xt_TPROXY.c b/net/netfilter/xt_TPROXY.c
index 189df9af4de6..4f246ddc5c48 100644
--- a/net/netfilter/xt_TPROXY.c
+++ b/net/netfilter/xt_TPROXY.c
@@ -65,11 +65,11 @@ static int tproxy_tg_check(const struct xt_tgchk_param *par)
 
 	if ((i->proto == IPPROTO_TCP || i->proto == IPPROTO_UDP)
 	    && !(i->invflags & IPT_INV_PROTO))
-		return true;
+		return 0;
 
 	pr_info("Can be used only in combination with "
 		"either -p tcp or -p udp\n");
-	return false;
+	return -EINVAL;
 }
 
 static struct xt_target tproxy_tg_reg __read_mostly = {
-- 
cgit v1.2.2


From 4a5a5c73b7cfee46a0b1411903cfa0dea532deec Mon Sep 17 00:00:00 2001
From: Jan Engelhardt <jengelh@medozas.de>
Date: Fri, 19 Mar 2010 17:32:59 +0100
Subject: netfilter: xtables: slightly better error reporting

When extended status codes are available, such as ENOMEM on failed
allocations, or subsequent functions (e.g. nf_ct_get_l3proto), passing
them up to userspace seems like a good idea compared to just always
EINVAL.

Signed-off-by: Jan Engelhardt <jengelh@medozas.de>
---
 net/ipv4/netfilter/ipt_CLUSTERIP.c | 11 ++++++-----
 net/netfilter/xt_CONNSECMARK.c     |  8 +++++---
 net/netfilter/xt_CT.c              | 11 +++++++++--
 net/netfilter/xt_DSCP.c            |  2 +-
 net/netfilter/xt_HL.c              |  2 +-
 net/netfilter/xt_LED.c             |  4 ++--
 net/netfilter/xt_NFQUEUE.c         |  2 +-
 net/netfilter/xt_RATEEST.c         |  9 ++++++---
 net/netfilter/xt_SECMARK.c         | 16 +++++++++-------
 net/netfilter/xt_cluster.c         |  2 +-
 net/netfilter/xt_connbytes.c       |  6 ++++--
 net/netfilter/xt_connlimit.c       |  8 +++++---
 net/netfilter/xt_connmark.c        | 14 ++++++++++----
 net/netfilter/xt_conntrack.c       |  7 +++++--
 net/netfilter/xt_dscp.c            |  2 +-
 net/netfilter/xt_hashlimit.c       | 32 ++++++++++++++++++++------------
 net/netfilter/xt_helper.c          |  6 ++++--
 net/netfilter/xt_limit.c           |  4 ++--
 net/netfilter/xt_quota.c           |  2 +-
 net/netfilter/xt_rateest.c         |  2 ++
 net/netfilter/xt_recent.c          |  5 ++++-
 net/netfilter/xt_state.c           |  7 +++++--
 net/netfilter/xt_statistic.c       |  2 +-
 net/netfilter/xt_string.c          |  2 +-
 net/netfilter/xt_time.c            |  2 +-
 25 files changed, 107 insertions(+), 61 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/netfilter/ipt_CLUSTERIP.c b/net/ipv4/netfilter/ipt_CLUSTERIP.c
index 1302de2ae0ae..1faf5fa06ac8 100644
--- a/net/ipv4/netfilter/ipt_CLUSTERIP.c
+++ b/net/ipv4/netfilter/ipt_CLUSTERIP.c
@@ -351,8 +351,8 @@ static int clusterip_tg_check(const struct xt_tgchk_param *par)
 {
 	struct ipt_clusterip_tgt_info *cipinfo = par->targinfo;
 	const struct ipt_entry *e = par->entryinfo;
-
 	struct clusterip_config *config;
+	int ret;
 
 	if (cipinfo->hash_mode != CLUSTERIP_HASHMODE_SIP &&
 	    cipinfo->hash_mode != CLUSTERIP_HASHMODE_SIP_SPT &&
@@ -387,7 +387,7 @@ static int clusterip_tg_check(const struct xt_tgchk_param *par)
 			if (!dev) {
 				pr_info("no such interface %s\n",
 					e->ip.iniface);
-				return -EINVAL;
+				return -ENOENT;
 			}
 
 			config = clusterip_config_init(cipinfo,
@@ -395,17 +395,18 @@ static int clusterip_tg_check(const struct xt_tgchk_param *par)
 			if (!config) {
 				pr_info("cannot allocate config\n");
 				dev_put(dev);
-				return -EINVAL;
+				return -ENOMEM;
 			}
 			dev_mc_add(config->dev,config->clustermac, ETH_ALEN, 0);
 		}
 	}
 	cipinfo->config = config;
 
-	if (nf_ct_l3proto_try_module_get(par->family) < 0) {
+	ret = nf_ct_l3proto_try_module_get(par->family);
+	if (ret < 0) {
 		pr_info("cannot load conntrack support for proto=%u\n",
 			par->family);
-		return -EINVAL;
+		return ret;
 	}
 
 	return 0;
diff --git a/net/netfilter/xt_CONNSECMARK.c b/net/netfilter/xt_CONNSECMARK.c
index 2287a82a0703..105a62e985d3 100644
--- a/net/netfilter/xt_CONNSECMARK.c
+++ b/net/netfilter/xt_CONNSECMARK.c
@@ -87,6 +87,7 @@ connsecmark_tg(struct sk_buff *skb, const struct xt_target_param *par)
 static int connsecmark_tg_check(const struct xt_tgchk_param *par)
 {
 	const struct xt_connsecmark_target_info *info = par->targinfo;
+	int ret;
 
 	if (strcmp(par->table, "mangle") != 0 &&
 	    strcmp(par->table, "security") != 0) {
@@ -102,13 +103,14 @@ static int connsecmark_tg_check(const struct xt_tgchk_param *par)
 
 	default:
 		pr_info("invalid mode: %hu\n", info->mode);
-		return false;
+		return -EINVAL;
 	}
 
-	if (nf_ct_l3proto_try_module_get(par->family) < 0) {
+	ret = nf_ct_l3proto_try_module_get(par->family);
+	if (ret < 0) {
 		pr_info("cannot load conntrack support for proto=%u\n",
 			par->family);
-		return -EINVAL;
+		return ret;
 	}
 	return 0;
 }
diff --git a/net/netfilter/xt_CT.c b/net/netfilter/xt_CT.c
index ee566e2e4534..65dd348ae361 100644
--- a/net/netfilter/xt_CT.c
+++ b/net/netfilter/xt_CT.c
@@ -59,6 +59,7 @@ static int xt_ct_tg_check(const struct xt_tgchk_param *par)
 	struct nf_conntrack_tuple t;
 	struct nf_conn_help *help;
 	struct nf_conn *ct;
+	int ret = 0;
 	u8 proto;
 
 	if (info->flags & ~XT_CT_NOTRACK)
@@ -75,28 +76,34 @@ static int xt_ct_tg_check(const struct xt_tgchk_param *par)
 		goto err1;
 #endif
 
-	if (nf_ct_l3proto_try_module_get(par->family) < 0)
+	ret = nf_ct_l3proto_try_module_get(par->family);
+	if (ret < 0)
 		goto err1;
 
 	memset(&t, 0, sizeof(t));
 	ct = nf_conntrack_alloc(par->net, info->zone, &t, &t, GFP_KERNEL);
+	ret = PTR_ERR(ct);
 	if (IS_ERR(ct))
 		goto err2;
 
+	ret = 0;
 	if ((info->ct_events || info->exp_events) &&
 	    !nf_ct_ecache_ext_add(ct, info->ct_events, info->exp_events,
 				  GFP_KERNEL))
 		goto err3;
 
 	if (info->helper[0]) {
+		ret = -ENOENT;
 		proto = xt_ct_find_proto(par);
 		if (!proto)
 			goto err3;
 
+		ret = -ENOMEM;
 		help = nf_ct_helper_ext_add(ct, GFP_KERNEL);
 		if (help == NULL)
 			goto err3;
 
+		ret = -ENOENT;
 		help->helper = nf_conntrack_helper_try_module_get(info->helper,
 								  par->family,
 								  proto);
@@ -115,7 +122,7 @@ err3:
 err2:
 	nf_ct_l3proto_module_put(par->family);
 err1:
-	return -EINVAL;
+	return ret;
 }
 
 static void xt_ct_tg_destroy(const struct xt_tgdtor_param *par)
diff --git a/net/netfilter/xt_DSCP.c b/net/netfilter/xt_DSCP.c
index aa263b80f8c0..969634f293e5 100644
--- a/net/netfilter/xt_DSCP.c
+++ b/net/netfilter/xt_DSCP.c
@@ -66,7 +66,7 @@ static int dscp_tg_check(const struct xt_tgchk_param *par)
 
 	if (info->dscp > XT_DSCP_MAX) {
 		pr_info("dscp %x out of range\n", info->dscp);
-		return -EINVAL;
+		return -EDOM;
 	}
 	return 0;
 }
diff --git a/net/netfilter/xt_HL.c b/net/netfilter/xt_HL.c
index 7a47383ec723..77b99f732711 100644
--- a/net/netfilter/xt_HL.c
+++ b/net/netfilter/xt_HL.c
@@ -107,7 +107,7 @@ static int ttl_tg_check(const struct xt_tgchk_param *par)
 
 	if (info->mode > IPT_TTL_MAXMODE) {
 		pr_info("TTL: invalid or unknown mode %u\n", info->mode);
-		return false;
+		return -EINVAL;
 	}
 	if (info->mode != IPT_TTL_SET && info->ttl == 0)
 		return -EINVAL;
diff --git a/net/netfilter/xt_LED.c b/net/netfilter/xt_LED.c
index 22b5b7057397..efcf56db23e8 100644
--- a/net/netfilter/xt_LED.c
+++ b/net/netfilter/xt_LED.c
@@ -93,7 +93,7 @@ static int led_tg_check(const struct xt_tgchk_param *par)
 
 	ledinternal = kzalloc(sizeof(struct xt_led_info_internal), GFP_KERNEL);
 	if (!ledinternal)
-		return -EINVAL;
+		return -ENOMEM;
 
 	ledinternal->netfilter_led_trigger.name = ledinfo->id;
 
@@ -115,7 +115,7 @@ static int led_tg_check(const struct xt_tgchk_param *par)
 
 exit_alloc:
 	kfree(ledinternal);
-	return -EINVAL;
+	return err;
 }
 
 static void led_tg_destroy(const struct xt_tgdtor_param *par)
diff --git a/net/netfilter/xt_NFQUEUE.c b/net/netfilter/xt_NFQUEUE.c
index add1789ae4a8..f9217cb56fe3 100644
--- a/net/netfilter/xt_NFQUEUE.c
+++ b/net/netfilter/xt_NFQUEUE.c
@@ -98,7 +98,7 @@ static int nfqueue_tg_v1_check(const struct xt_tgchk_param *par)
 	if (maxid > 0xffff) {
 		pr_err("NFQUEUE: number of queues (%u) out of range (got %u)\n",
 		       info->queues_total, maxid);
-		return -EINVAL;
+		return -ERANGE;
 	}
 	return 0;
 }
diff --git a/net/netfilter/xt_RATEEST.c b/net/netfilter/xt_RATEEST.c
index 7af5fba39cdd..40751c618e70 100644
--- a/net/netfilter/xt_RATEEST.c
+++ b/net/netfilter/xt_RATEEST.c
@@ -93,6 +93,7 @@ static int xt_rateest_tg_checkentry(const struct xt_tgchk_param *par)
 		struct nlattr		opt;
 		struct gnet_estimator	est;
 	} cfg;
+	int ret;
 
 	if (unlikely(!rnd_inited)) {
 		get_random_bytes(&jhash_rnd, sizeof(jhash_rnd));
@@ -115,6 +116,7 @@ static int xt_rateest_tg_checkentry(const struct xt_tgchk_param *par)
 		return 0;
 	}
 
+	ret = -ENOMEM;
 	est = kzalloc(sizeof(*est), GFP_KERNEL);
 	if (!est)
 		goto err1;
@@ -130,8 +132,9 @@ static int xt_rateest_tg_checkentry(const struct xt_tgchk_param *par)
 	cfg.est.interval	= info->interval;
 	cfg.est.ewma_log	= info->ewma_log;
 
-	if (gen_new_estimator(&est->bstats, &est->rstats, &est->lock,
-			      &cfg.opt) < 0)
+	ret = gen_new_estimator(&est->bstats, &est->rstats,
+				&est->lock, &cfg.opt);
+	if (ret < 0)
 		goto err2;
 
 	info->est = est;
@@ -141,7 +144,7 @@ static int xt_rateest_tg_checkentry(const struct xt_tgchk_param *par)
 err2:
 	kfree(est);
 err1:
-	return -EINVAL;
+	return ret;
 }
 
 static void xt_rateest_tg_destroy(const struct xt_tgdtor_param *par)
diff --git a/net/netfilter/xt_SECMARK.c b/net/netfilter/xt_SECMARK.c
index 39098fc9887d..a91d4a7d5a2c 100644
--- a/net/netfilter/xt_SECMARK.c
+++ b/net/netfilter/xt_SECMARK.c
@@ -50,7 +50,7 @@ secmark_tg(struct sk_buff *skb, const struct xt_target_param *par)
 	return XT_CONTINUE;
 }
 
-static bool checkentry_selinux(struct xt_secmark_target_info *info)
+static int checkentry_selinux(struct xt_secmark_target_info *info)
 {
 	int err;
 	struct xt_secmark_target_selinux_info *sel = &info->u.sel;
@@ -62,27 +62,28 @@ static bool checkentry_selinux(struct xt_secmark_target_info *info)
 		if (err == -EINVAL)
 			pr_info("invalid SELinux context \'%s\'\n",
 				sel->selctx);
-		return false;
+		return err;
 	}
 
 	if (!sel->selsid) {
 		pr_info("unable to map SELinux context \'%s\'\n", sel->selctx);
-		return false;
+		return -ENOENT;
 	}
 
 	err = selinux_secmark_relabel_packet_permission(sel->selsid);
 	if (err) {
 		pr_info("unable to obtain relabeling permission\n");
-		return false;
+		return err;
 	}
 
 	selinux_secmark_refcount_inc();
-	return true;
+	return 0;
 }
 
 static int secmark_tg_check(const struct xt_tgchk_param *par)
 {
 	struct xt_secmark_target_info *info = par->targinfo;
+	int err;
 
 	if (strcmp(par->table, "mangle") != 0 &&
 	    strcmp(par->table, "security") != 0) {
@@ -99,8 +100,9 @@ static int secmark_tg_check(const struct xt_tgchk_param *par)
 
 	switch (info->mode) {
 	case SECMARK_MODE_SEL:
-		if (!checkentry_selinux(info))
-			return -EINVAL;
+		err = checkentry_selinux(info);
+		if (err <= 0)
+			return err;
 		break;
 
 	default:
diff --git a/net/netfilter/xt_cluster.c b/net/netfilter/xt_cluster.c
index 30cb7762fc41..6c941e1c6b9e 100644
--- a/net/netfilter/xt_cluster.c
+++ b/net/netfilter/xt_cluster.c
@@ -145,7 +145,7 @@ static int xt_cluster_mt_checkentry(const struct xt_mtchk_param *par)
 	if (info->node_mask >= (1ULL << info->total_nodes)) {
 		pr_info("this node mask cannot be "
 			"higher than the total number of nodes\n");
-		return -EINVAL;
+		return -EDOM;
 	}
 	return 0;
 }
diff --git a/net/netfilter/xt_connbytes.c b/net/netfilter/xt_connbytes.c
index bf8e286361c3..2ff332ecc342 100644
--- a/net/netfilter/xt_connbytes.c
+++ b/net/netfilter/xt_connbytes.c
@@ -96,6 +96,7 @@ connbytes_mt(const struct sk_buff *skb, const struct xt_match_param *par)
 static int connbytes_mt_check(const struct xt_mtchk_param *par)
 {
 	const struct xt_connbytes_info *sinfo = par->matchinfo;
+	int ret;
 
 	if (sinfo->what != XT_CONNBYTES_PKTS &&
 	    sinfo->what != XT_CONNBYTES_BYTES &&
@@ -107,10 +108,11 @@ static int connbytes_mt_check(const struct xt_mtchk_param *par)
 	    sinfo->direction != XT_CONNBYTES_DIR_BOTH)
 		return -EINVAL;
 
-	if (nf_ct_l3proto_try_module_get(par->family) < 0) {
+	ret = nf_ct_l3proto_try_module_get(par->family);
+	if (ret < 0) {
 		pr_info("cannot load conntrack support for proto=%u\n",
 			par->family);
-		return -EINVAL;
+		return ret;
 	}
 
 	return 0;
diff --git a/net/netfilter/xt_connlimit.c b/net/netfilter/xt_connlimit.c
index 68e89f08140b..370088ec5764 100644
--- a/net/netfilter/xt_connlimit.c
+++ b/net/netfilter/xt_connlimit.c
@@ -220,22 +220,24 @@ static int connlimit_mt_check(const struct xt_mtchk_param *par)
 {
 	struct xt_connlimit_info *info = par->matchinfo;
 	unsigned int i;
+	int ret;
 
 	if (unlikely(!connlimit_rnd_inited)) {
 		get_random_bytes(&connlimit_rnd, sizeof(connlimit_rnd));
 		connlimit_rnd_inited = true;
 	}
-	if (nf_ct_l3proto_try_module_get(par->family) < 0) {
+	ret = nf_ct_l3proto_try_module_get(par->family);
+	if (ret < 0) {
 		pr_info("cannot load conntrack support for "
 			"address family %u\n", par->family);
-		return -EINVAL;
+		return ret;
 	}
 
 	/* init private data */
 	info->data = kmalloc(sizeof(struct xt_connlimit_data), GFP_KERNEL);
 	if (info->data == NULL) {
 		nf_ct_l3proto_module_put(par->family);
-		return -EINVAL;
+		return -ENOMEM;
 	}
 
 	spin_lock_init(&info->data->lock);
diff --git a/net/netfilter/xt_connmark.c b/net/netfilter/xt_connmark.c
index e137af5559e0..71e38a1fd656 100644
--- a/net/netfilter/xt_connmark.c
+++ b/net/netfilter/xt_connmark.c
@@ -76,10 +76,13 @@ connmark_tg(struct sk_buff *skb, const struct xt_target_param *par)
 
 static int connmark_tg_check(const struct xt_tgchk_param *par)
 {
-	if (nf_ct_l3proto_try_module_get(par->family) < 0) {
+	int ret;
+
+	ret = nf_ct_l3proto_try_module_get(par->family);
+	if (ret < 0) {
 		pr_info("cannot load conntrack support for proto=%u\n",
 			par->family);
-		return -EINVAL;
+		return ret;
 	}
 	return 0;
 }
@@ -105,10 +108,13 @@ connmark_mt(const struct sk_buff *skb, const struct xt_match_param *par)
 
 static int connmark_mt_check(const struct xt_mtchk_param *par)
 {
-	if (nf_ct_l3proto_try_module_get(par->family) < 0) {
+	int ret;
+
+	ret = nf_ct_l3proto_try_module_get(par->family);
+	if (ret < 0) {
 		pr_info("cannot load conntrack support for proto=%u\n",
 			par->family);
-		return -EINVAL;
+		return ret;
 	}
 	return 0;
 }
diff --git a/net/netfilter/xt_conntrack.c b/net/netfilter/xt_conntrack.c
index 26e34aa7f8d1..e0bcf8d2cf33 100644
--- a/net/netfilter/xt_conntrack.c
+++ b/net/netfilter/xt_conntrack.c
@@ -208,10 +208,13 @@ conntrack_mt_v2(const struct sk_buff *skb, const struct xt_match_param *par)
 
 static int conntrack_mt_check(const struct xt_mtchk_param *par)
 {
-	if (nf_ct_l3proto_try_module_get(par->family) < 0) {
+	int ret;
+
+	ret = nf_ct_l3proto_try_module_get(par->family);
+	if (ret < 0) {
 		pr_info("cannot load conntrack support for proto=%u\n",
 			par->family);
-		return -EINVAL;
+		return ret;
 	}
 	return 0;
 }
diff --git a/net/netfilter/xt_dscp.c b/net/netfilter/xt_dscp.c
index f355fb9e06fa..9db51fddbdb8 100644
--- a/net/netfilter/xt_dscp.c
+++ b/net/netfilter/xt_dscp.c
@@ -48,7 +48,7 @@ static int dscp_mt_check(const struct xt_mtchk_param *par)
 
 	if (info->dscp > XT_DSCP_MAX) {
 		pr_info("dscp %x out of range\n", info->dscp);
-		return -EINVAL;
+		return -EDOM;
 	}
 
 	return 0;
diff --git a/net/netfilter/xt_hashlimit.c b/net/netfilter/xt_hashlimit.c
index 0c0152902b3b..c89fde7d1234 100644
--- a/net/netfilter/xt_hashlimit.c
+++ b/net/netfilter/xt_hashlimit.c
@@ -214,7 +214,7 @@ static int htable_create_v0(struct net *net, struct xt_hashlimit_info *minfo, u_
 	hinfo = vmalloc(sizeof(struct xt_hashlimit_htable) +
 			sizeof(struct list_head) * size);
 	if (!hinfo)
-		return -1;
+		return -ENOMEM;
 	minfo->hinfo = hinfo;
 
 	/* copy match config into hashtable config */
@@ -250,7 +250,7 @@ static int htable_create_v0(struct net *net, struct xt_hashlimit_info *minfo, u_
 		&dl_file_ops, hinfo);
 	if (!hinfo->pde) {
 		vfree(hinfo);
-		return -1;
+		return -ENOMEM;
 	}
 	hinfo->net = net;
 
@@ -285,7 +285,7 @@ static int htable_create(struct net *net, struct xt_hashlimit_mtinfo1 *minfo,
 	hinfo = vmalloc(sizeof(struct xt_hashlimit_htable) +
 	                sizeof(struct list_head) * size);
 	if (hinfo == NULL)
-		return -1;
+		return -ENOMEM;
 	minfo->hinfo = hinfo;
 
 	/* copy match config into hashtable config */
@@ -311,7 +311,7 @@ static int htable_create(struct net *net, struct xt_hashlimit_mtinfo1 *minfo,
 		&dl_file_ops, hinfo);
 	if (hinfo->pde == NULL) {
 		vfree(hinfo);
-		return -1;
+		return -ENOMEM;
 	}
 	hinfo->net = net;
 
@@ -675,13 +675,14 @@ static int hashlimit_mt_check_v0(const struct xt_mtchk_param *par)
 {
 	struct net *net = par->net;
 	struct xt_hashlimit_info *r = par->matchinfo;
+	int ret;
 
 	/* Check for overflow. */
 	if (r->cfg.burst == 0 ||
 	    user2credits(r->cfg.avg * r->cfg.burst) < user2credits(r->cfg.avg)) {
 		pr_info("overflow, try lower: %u/%u\n",
 			r->cfg.avg, r->cfg.burst);
-		return -EINVAL;
+		return -ERANGE;
 	}
 	if (r->cfg.mode == 0 ||
 	    r->cfg.mode > (XT_HASHLIMIT_HASH_DPT |
@@ -698,9 +699,12 @@ static int hashlimit_mt_check_v0(const struct xt_mtchk_param *par)
 
 	mutex_lock(&hashlimit_mutex);
 	r->hinfo = htable_find_get(net, r->name, par->family);
-	if (!r->hinfo && htable_create_v0(net, r, par->family) != 0) {
-		mutex_unlock(&hashlimit_mutex);
-		return -EINVAL;
+	if (r->hinfo == NULL) {
+		ret = htable_create_v0(net, r, par->family);
+		if (ret < 0) {
+			mutex_unlock(&hashlimit_mutex);
+			return ret;
+		}
 	}
 	mutex_unlock(&hashlimit_mutex);
 	return 0;
@@ -710,6 +714,7 @@ static int hashlimit_mt_check(const struct xt_mtchk_param *par)
 {
 	struct net *net = par->net;
 	struct xt_hashlimit_mtinfo1 *info = par->matchinfo;
+	int ret;
 
 	/* Check for overflow. */
 	if (info->cfg.burst == 0 ||
@@ -717,7 +722,7 @@ static int hashlimit_mt_check(const struct xt_mtchk_param *par)
 	    user2credits(info->cfg.avg)) {
 		pr_info("overflow, try lower: %u/%u\n",
 			info->cfg.avg, info->cfg.burst);
-		return -EINVAL;
+		return -ERANGE;
 	}
 	if (info->cfg.gc_interval == 0 || info->cfg.expire == 0)
 		return -EINVAL;
@@ -733,9 +738,12 @@ static int hashlimit_mt_check(const struct xt_mtchk_param *par)
 
 	mutex_lock(&hashlimit_mutex);
 	info->hinfo = htable_find_get(net, info->name, par->family);
-	if (!info->hinfo && htable_create(net, info, par->family) != 0) {
-		mutex_unlock(&hashlimit_mutex);
-		return -EINVAL;
+	if (info->hinfo == NULL) {
+		ret = htable_create(net, info, par->family);
+		if (ret < 0) {
+			mutex_unlock(&hashlimit_mutex);
+			return ret;
+		}
 	}
 	mutex_unlock(&hashlimit_mutex);
 	return 0;
diff --git a/net/netfilter/xt_helper.c b/net/netfilter/xt_helper.c
index eb308b32bfe0..b8b3e13dc71e 100644
--- a/net/netfilter/xt_helper.c
+++ b/net/netfilter/xt_helper.c
@@ -57,11 +57,13 @@ helper_mt(const struct sk_buff *skb, const struct xt_match_param *par)
 static int helper_mt_check(const struct xt_mtchk_param *par)
 {
 	struct xt_helper_info *info = par->matchinfo;
+	int ret;
 
-	if (nf_ct_l3proto_try_module_get(par->family) < 0) {
+	ret = nf_ct_l3proto_try_module_get(par->family);
+	if (ret < 0) {
 		pr_info("cannot load conntrack support for proto=%u\n",
 			par->family);
-		return -EINVAL;
+		return ret;
 	}
 	info->name[29] = '\0';
 	return 0;
diff --git a/net/netfilter/xt_limit.c b/net/netfilter/xt_limit.c
index 5ff0580ce878..e2a284ebb415 100644
--- a/net/netfilter/xt_limit.c
+++ b/net/netfilter/xt_limit.c
@@ -107,12 +107,12 @@ static int limit_mt_check(const struct xt_mtchk_param *par)
 	    || user2credits(r->avg * r->burst) < user2credits(r->avg)) {
 		pr_info("Overflow, try lower: %u/%u\n",
 			r->avg, r->burst);
-		return -EINVAL;
+		return -ERANGE;
 	}
 
 	priv = kmalloc(sizeof(*priv), GFP_KERNEL);
 	if (priv == NULL)
-		return -EINVAL;
+		return -ENOMEM;
 
 	/* For SMP, we only want to use one set of state. */
 	r->master = priv;
diff --git a/net/netfilter/xt_quota.c b/net/netfilter/xt_quota.c
index 766e71c6dc55..3e5cbd85a65b 100644
--- a/net/netfilter/xt_quota.c
+++ b/net/netfilter/xt_quota.c
@@ -52,7 +52,7 @@ static int quota_mt_check(const struct xt_mtchk_param *par)
 
 	q->master = kmalloc(sizeof(*q->master), GFP_KERNEL);
 	if (q->master == NULL)
-		return -EINVAL;
+		return -ENOMEM;
 
 	q->master->quota = q->quota;
 	return 0;
diff --git a/net/netfilter/xt_rateest.c b/net/netfilter/xt_rateest.c
index 0b5c6122737d..23805f8a444b 100644
--- a/net/netfilter/xt_rateest.c
+++ b/net/netfilter/xt_rateest.c
@@ -78,6 +78,7 @@ static int xt_rateest_mt_checkentry(const struct xt_mtchk_param *par)
 {
 	struct xt_rateest_match_info *info = par->matchinfo;
 	struct xt_rateest *est1, *est2;
+	int ret = false;
 
 	if (hweight32(info->flags & (XT_RATEEST_MATCH_ABS |
 				     XT_RATEEST_MATCH_REL)) != 1)
@@ -95,6 +96,7 @@ static int xt_rateest_mt_checkentry(const struct xt_mtchk_param *par)
 		goto err1;
 	}
 
+	ret  = -ENOENT;
 	est1 = xt_rateest_lookup(info->name1);
 	if (!est1)
 		goto err1;
diff --git a/net/netfilter/xt_recent.c b/net/netfilter/xt_recent.c
index 0994ff54a731..0d9f80b1dd9f 100644
--- a/net/netfilter/xt_recent.c
+++ b/net/netfilter/xt_recent.c
@@ -355,8 +355,10 @@ static int recent_mt_check(const struct xt_mtchk_param *par)
 
 	t = kzalloc(sizeof(*t) + sizeof(t->iphash[0]) * ip_list_hash_size,
 		    GFP_KERNEL);
-	if (t == NULL)
+	if (t == NULL) {
+		ret = -ENOMEM;
 		goto out;
+	}
 	t->refcnt = 1;
 	strcpy(t->name, info->name);
 	INIT_LIST_HEAD(&t->lru_list);
@@ -367,6 +369,7 @@ static int recent_mt_check(const struct xt_mtchk_param *par)
 		  &recent_mt_fops, t);
 	if (pde == NULL) {
 		kfree(t);
+		ret = -ENOMEM;
 		goto out;
 	}
 	pde->uid = ip_list_uid;
diff --git a/net/netfilter/xt_state.c b/net/netfilter/xt_state.c
index 8e8c9df51784..2b75230d15ca 100644
--- a/net/netfilter/xt_state.c
+++ b/net/netfilter/xt_state.c
@@ -39,10 +39,13 @@ state_mt(const struct sk_buff *skb, const struct xt_match_param *par)
 
 static int state_mt_check(const struct xt_mtchk_param *par)
 {
-	if (nf_ct_l3proto_try_module_get(par->family) < 0) {
+	int ret;
+
+	ret = nf_ct_l3proto_try_module_get(par->family);
+	if (ret < 0) {
 		pr_info("cannot load conntrack support for proto=%u\n",
 			par->family);
-		return -EINVAL;
+		return ret;
 	}
 	return 0;
 }
diff --git a/net/netfilter/xt_statistic.c b/net/netfilter/xt_statistic.c
index 29d76f8f1880..8ed2b2905091 100644
--- a/net/netfilter/xt_statistic.c
+++ b/net/netfilter/xt_statistic.c
@@ -62,7 +62,7 @@ static int statistic_mt_check(const struct xt_mtchk_param *par)
 
 	info->master = kzalloc(sizeof(*info->master), GFP_KERNEL);
 	if (info->master == NULL)
-		return -EINVAL;
+		return -ENOMEM;
 	info->master->count = info->u.nth.count;
 
 	return 0;
diff --git a/net/netfilter/xt_string.c b/net/netfilter/xt_string.c
index e1f22a7a4152..b0f8292db6f8 100644
--- a/net/netfilter/xt_string.c
+++ b/net/netfilter/xt_string.c
@@ -63,7 +63,7 @@ static int string_mt_check(const struct xt_mtchk_param *par)
 	ts_conf = textsearch_prepare(conf->algo, conf->pattern, conf->patlen,
 				     GFP_KERNEL, flags);
 	if (IS_ERR(ts_conf))
-		return -EINVAL;
+		return PTR_ERR(ts_conf);
 
 	conf->config = ts_conf;
 	return 0;
diff --git a/net/netfilter/xt_time.c b/net/netfilter/xt_time.c
index 8dde5e51ff19..d8556fdda440 100644
--- a/net/netfilter/xt_time.c
+++ b/net/netfilter/xt_time.c
@@ -225,7 +225,7 @@ static int time_mt_check(const struct xt_mtchk_param *par)
 	    info->daytime_stop > XT_TIME_MAX_DAYTIME) {
 		pr_info("invalid argument - start or "
 			"stop time greater than 23:59:59\n");
-		return -EINVAL;
+		return -EDOM;
 	}
 
 	return 0;
-- 
cgit v1.2.2


From f95c74e33eff5e3fe9798e2dc0a7749150ea3f80 Mon Sep 17 00:00:00 2001
From: Jan Engelhardt <jengelh@medozas.de>
Date: Sun, 21 Mar 2010 04:05:56 +0100
Subject: netfilter: xtables: shorten up return clause

The return value of nf_ct_l3proto_get can directly be returned even in
the case of success.

Signed-off-by: Jan Engelhardt <jengelh@medozas.de>
---
 net/ipv4/netfilter/ipt_CLUSTERIP.c |  7 ++-----
 net/netfilter/xt_CONNSECMARK.c     |  6 ++----
 net/netfilter/xt_connbytes.c       |  7 ++-----
 net/netfilter/xt_connmark.c        | 12 ++++--------
 net/netfilter/xt_conntrack.c       |  6 ++----
 net/netfilter/xt_state.c           |  6 ++----
 6 files changed, 14 insertions(+), 30 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/netfilter/ipt_CLUSTERIP.c b/net/ipv4/netfilter/ipt_CLUSTERIP.c
index 1faf5fa06ac8..5d70c43302bb 100644
--- a/net/ipv4/netfilter/ipt_CLUSTERIP.c
+++ b/net/ipv4/netfilter/ipt_CLUSTERIP.c
@@ -403,13 +403,10 @@ static int clusterip_tg_check(const struct xt_tgchk_param *par)
 	cipinfo->config = config;
 
 	ret = nf_ct_l3proto_try_module_get(par->family);
-	if (ret < 0) {
+	if (ret < 0)
 		pr_info("cannot load conntrack support for proto=%u\n",
 			par->family);
-		return ret;
-	}
-
-	return 0;
+	return ret;
 }
 
 /* drop reference count of cluster config when rule is deleted */
diff --git a/net/netfilter/xt_CONNSECMARK.c b/net/netfilter/xt_CONNSECMARK.c
index 105a62e985d3..e953e302141d 100644
--- a/net/netfilter/xt_CONNSECMARK.c
+++ b/net/netfilter/xt_CONNSECMARK.c
@@ -107,12 +107,10 @@ static int connsecmark_tg_check(const struct xt_tgchk_param *par)
 	}
 
 	ret = nf_ct_l3proto_try_module_get(par->family);
-	if (ret < 0) {
+	if (ret < 0)
 		pr_info("cannot load conntrack support for proto=%u\n",
 			par->family);
-		return ret;
-	}
-	return 0;
+	return ret;
 }
 
 static void connsecmark_tg_destroy(const struct xt_tgdtor_param *par)
diff --git a/net/netfilter/xt_connbytes.c b/net/netfilter/xt_connbytes.c
index 2ff332ecc342..ff738a5f963a 100644
--- a/net/netfilter/xt_connbytes.c
+++ b/net/netfilter/xt_connbytes.c
@@ -109,13 +109,10 @@ static int connbytes_mt_check(const struct xt_mtchk_param *par)
 		return -EINVAL;
 
 	ret = nf_ct_l3proto_try_module_get(par->family);
-	if (ret < 0) {
+	if (ret < 0)
 		pr_info("cannot load conntrack support for proto=%u\n",
 			par->family);
-		return ret;
-	}
-
-	return 0;
+	return ret;
 }
 
 static void connbytes_mt_destroy(const struct xt_mtdtor_param *par)
diff --git a/net/netfilter/xt_connmark.c b/net/netfilter/xt_connmark.c
index 71e38a1fd656..ae1015484ae2 100644
--- a/net/netfilter/xt_connmark.c
+++ b/net/netfilter/xt_connmark.c
@@ -79,12 +79,10 @@ static int connmark_tg_check(const struct xt_tgchk_param *par)
 	int ret;
 
 	ret = nf_ct_l3proto_try_module_get(par->family);
-	if (ret < 0) {
+	if (ret < 0)
 		pr_info("cannot load conntrack support for proto=%u\n",
 			par->family);
-		return ret;
-	}
-	return 0;
+	return ret;
 }
 
 static void connmark_tg_destroy(const struct xt_tgdtor_param *par)
@@ -111,12 +109,10 @@ static int connmark_mt_check(const struct xt_mtchk_param *par)
 	int ret;
 
 	ret = nf_ct_l3proto_try_module_get(par->family);
-	if (ret < 0) {
+	if (ret < 0)
 		pr_info("cannot load conntrack support for proto=%u\n",
 			par->family);
-		return ret;
-	}
-	return 0;
+	return ret;
 }
 
 static void connmark_mt_destroy(const struct xt_mtdtor_param *par)
diff --git a/net/netfilter/xt_conntrack.c b/net/netfilter/xt_conntrack.c
index e0bcf8d2cf33..3348706ce56d 100644
--- a/net/netfilter/xt_conntrack.c
+++ b/net/netfilter/xt_conntrack.c
@@ -211,12 +211,10 @@ static int conntrack_mt_check(const struct xt_mtchk_param *par)
 	int ret;
 
 	ret = nf_ct_l3proto_try_module_get(par->family);
-	if (ret < 0) {
+	if (ret < 0)
 		pr_info("cannot load conntrack support for proto=%u\n",
 			par->family);
-		return ret;
-	}
-	return 0;
+	return ret;
 }
 
 static void conntrack_mt_destroy(const struct xt_mtdtor_param *par)
diff --git a/net/netfilter/xt_state.c b/net/netfilter/xt_state.c
index 2b75230d15ca..be00d7b1f53b 100644
--- a/net/netfilter/xt_state.c
+++ b/net/netfilter/xt_state.c
@@ -42,12 +42,10 @@ static int state_mt_check(const struct xt_mtchk_param *par)
 	int ret;
 
 	ret = nf_ct_l3proto_try_module_get(par->family);
-	if (ret < 0) {
+	if (ret < 0)
 		pr_info("cannot load conntrack support for proto=%u\n",
 			par->family);
-		return ret;
-	}
-	return 0;
+	return ret;
 }
 
 static void state_mt_destroy(const struct xt_mtdtor_param *par)
-- 
cgit v1.2.2


From 779dd630d8e198a2dc5581a48efae546161cc726 Mon Sep 17 00:00:00 2001
From: Jan Engelhardt <jengelh@medozas.de>
Date: Mon, 22 Mar 2010 19:28:53 +0100
Subject: netfilter: xtables: remove xt_hashlimit revision 0

Superseded by xt_hashlimit revision 1 (linux v2.6.24-6212-g09e410d,
iptables 1.4.1-rc1).

Signed-off-by: Jan Engelhardt <jengelh@medozas.de>
---
 net/netfilter/xt_hashlimit.c | 221 -------------------------------------------
 1 file changed, 221 deletions(-)

(limited to 'net')

diff --git a/net/netfilter/xt_hashlimit.c b/net/netfilter/xt_hashlimit.c
index c89fde7d1234..5470bb097c48 100644
--- a/net/netfilter/xt_hashlimit.c
+++ b/net/netfilter/xt_hashlimit.c
@@ -193,76 +193,6 @@ dsthash_free(struct xt_hashlimit_htable *ht, struct dsthash_ent *ent)
 }
 static void htable_gc(unsigned long htlong);
 
-static int htable_create_v0(struct net *net, struct xt_hashlimit_info *minfo, u_int8_t family)
-{
-	struct hashlimit_net *hashlimit_net = hashlimit_pernet(net);
-	struct xt_hashlimit_htable *hinfo;
-	unsigned int size;
-	unsigned int i;
-
-	if (minfo->cfg.size)
-		size = minfo->cfg.size;
-	else {
-		size = ((totalram_pages << PAGE_SHIFT) / 16384) /
-		       sizeof(struct list_head);
-		if (totalram_pages > (1024 * 1024 * 1024 / PAGE_SIZE))
-			size = 8192;
-		if (size < 16)
-			size = 16;
-	}
-	/* FIXME: don't use vmalloc() here or anywhere else -HW */
-	hinfo = vmalloc(sizeof(struct xt_hashlimit_htable) +
-			sizeof(struct list_head) * size);
-	if (!hinfo)
-		return -ENOMEM;
-	minfo->hinfo = hinfo;
-
-	/* copy match config into hashtable config */
-	hinfo->cfg.mode        = minfo->cfg.mode;
-	hinfo->cfg.avg         = minfo->cfg.avg;
-	hinfo->cfg.burst       = minfo->cfg.burst;
-	hinfo->cfg.max         = minfo->cfg.max;
-	hinfo->cfg.gc_interval = minfo->cfg.gc_interval;
-	hinfo->cfg.expire      = minfo->cfg.expire;
-
-	if (family == NFPROTO_IPV4)
-		hinfo->cfg.srcmask = hinfo->cfg.dstmask = 32;
-	else
-		hinfo->cfg.srcmask = hinfo->cfg.dstmask = 128;
-
-	hinfo->cfg.size = size;
-	if (!hinfo->cfg.max)
-		hinfo->cfg.max = 8 * hinfo->cfg.size;
-	else if (hinfo->cfg.max < hinfo->cfg.size)
-		hinfo->cfg.max = hinfo->cfg.size;
-
-	for (i = 0; i < hinfo->cfg.size; i++)
-		INIT_HLIST_HEAD(&hinfo->hash[i]);
-
-	hinfo->use = 1;
-	hinfo->count = 0;
-	hinfo->family = family;
-	hinfo->rnd_initialized = false;
-	spin_lock_init(&hinfo->lock);
-	hinfo->pde = proc_create_data(minfo->name, 0,
-		(family == NFPROTO_IPV4) ?
-		hashlimit_net->ipt_hashlimit : hashlimit_net->ip6t_hashlimit,
-		&dl_file_ops, hinfo);
-	if (!hinfo->pde) {
-		vfree(hinfo);
-		return -ENOMEM;
-	}
-	hinfo->net = net;
-
-	setup_timer(&hinfo->timer, htable_gc, (unsigned long )hinfo);
-	hinfo->timer.expires = jiffies + msecs_to_jiffies(hinfo->cfg.gc_interval);
-	add_timer(&hinfo->timer);
-
-	hlist_add_head(&hinfo->node, &hashlimit_net->htables);
-
-	return 0;
-}
-
 static int htable_create(struct net *net, struct xt_hashlimit_mtinfo1 *minfo,
 			 u_int8_t family)
 {
@@ -570,57 +500,6 @@ hashlimit_init_dst(const struct xt_hashlimit_htable *hinfo,
 	return 0;
 }
 
-static bool
-hashlimit_mt_v0(const struct sk_buff *skb, const struct xt_match_param *par)
-{
-	const struct xt_hashlimit_info *r = par->matchinfo;
-	struct xt_hashlimit_htable *hinfo = r->hinfo;
-	unsigned long now = jiffies;
-	struct dsthash_ent *dh;
-	struct dsthash_dst dst;
-
-	if (hashlimit_init_dst(hinfo, &dst, skb, par->thoff) < 0)
-		goto hotdrop;
-
-	spin_lock_bh(&hinfo->lock);
-	dh = dsthash_find(hinfo, &dst);
-	if (!dh) {
-		dh = dsthash_alloc_init(hinfo, &dst);
-		if (!dh) {
-			spin_unlock_bh(&hinfo->lock);
-			goto hotdrop;
-		}
-
-		dh->expires = jiffies + msecs_to_jiffies(hinfo->cfg.expire);
-		dh->rateinfo.prev = jiffies;
-		dh->rateinfo.credit = user2credits(hinfo->cfg.avg *
-						   hinfo->cfg.burst);
-		dh->rateinfo.credit_cap = user2credits(hinfo->cfg.avg *
-						       hinfo->cfg.burst);
-		dh->rateinfo.cost = user2credits(hinfo->cfg.avg);
-	} else {
-		/* update expiration timeout */
-		dh->expires = now + msecs_to_jiffies(hinfo->cfg.expire);
-		rateinfo_recalc(dh, now);
-	}
-
-	if (dh->rateinfo.credit >= dh->rateinfo.cost) {
-		/* We're underlimit. */
-		dh->rateinfo.credit -= dh->rateinfo.cost;
-		spin_unlock_bh(&hinfo->lock);
-		return true;
-	}
-
-	spin_unlock_bh(&hinfo->lock);
-
-	/* default case: we're overlimit, thus don't match */
-	return false;
-
-hotdrop:
-	*par->hotdrop = true;
-	return false;
-}
-
 static bool
 hashlimit_mt(const struct sk_buff *skb, const struct xt_match_param *par)
 {
@@ -671,45 +550,6 @@ hashlimit_mt(const struct sk_buff *skb, const struct xt_match_param *par)
 	return false;
 }
 
-static int hashlimit_mt_check_v0(const struct xt_mtchk_param *par)
-{
-	struct net *net = par->net;
-	struct xt_hashlimit_info *r = par->matchinfo;
-	int ret;
-
-	/* Check for overflow. */
-	if (r->cfg.burst == 0 ||
-	    user2credits(r->cfg.avg * r->cfg.burst) < user2credits(r->cfg.avg)) {
-		pr_info("overflow, try lower: %u/%u\n",
-			r->cfg.avg, r->cfg.burst);
-		return -ERANGE;
-	}
-	if (r->cfg.mode == 0 ||
-	    r->cfg.mode > (XT_HASHLIMIT_HASH_DPT |
-			   XT_HASHLIMIT_HASH_DIP |
-			   XT_HASHLIMIT_HASH_SIP |
-			   XT_HASHLIMIT_HASH_SPT))
-		return -EINVAL;
-	if (!r->cfg.gc_interval)
-		return -EINVAL;
-	if (!r->cfg.expire)
-		return -EINVAL;
-	if (r->name[sizeof(r->name) - 1] != '\0')
-		return -EINVAL;
-
-	mutex_lock(&hashlimit_mutex);
-	r->hinfo = htable_find_get(net, r->name, par->family);
-	if (r->hinfo == NULL) {
-		ret = htable_create_v0(net, r, par->family);
-		if (ret < 0) {
-			mutex_unlock(&hashlimit_mutex);
-			return ret;
-		}
-	}
-	mutex_unlock(&hashlimit_mutex);
-	return 0;
-}
-
 static int hashlimit_mt_check(const struct xt_mtchk_param *par)
 {
 	struct net *net = par->net;
@@ -749,14 +589,6 @@ static int hashlimit_mt_check(const struct xt_mtchk_param *par)
 	return 0;
 }
 
-static void
-hashlimit_mt_destroy_v0(const struct xt_mtdtor_param *par)
-{
-	const struct xt_hashlimit_info *r = par->matchinfo;
-
-	htable_put(r->hinfo);
-}
-
 static void hashlimit_mt_destroy(const struct xt_mtdtor_param *par)
 {
 	const struct xt_hashlimit_mtinfo1 *info = par->matchinfo;
@@ -764,46 +596,7 @@ static void hashlimit_mt_destroy(const struct xt_mtdtor_param *par)
 	htable_put(info->hinfo);
 }
 
-#ifdef CONFIG_COMPAT
-struct compat_xt_hashlimit_info {
-	char name[IFNAMSIZ];
-	struct hashlimit_cfg cfg;
-	compat_uptr_t hinfo;
-	compat_uptr_t master;
-};
-
-static void hashlimit_mt_compat_from_user(void *dst, const void *src)
-{
-	int off = offsetof(struct compat_xt_hashlimit_info, hinfo);
-
-	memcpy(dst, src, off);
-	memset(dst + off, 0, sizeof(struct compat_xt_hashlimit_info) - off);
-}
-
-static int hashlimit_mt_compat_to_user(void __user *dst, const void *src)
-{
-	int off = offsetof(struct compat_xt_hashlimit_info, hinfo);
-
-	return copy_to_user(dst, src, off) ? -EFAULT : 0;
-}
-#endif
-
 static struct xt_match hashlimit_mt_reg[] __read_mostly = {
-	{
-		.name		= "hashlimit",
-		.revision	= 0,
-		.family		= NFPROTO_IPV4,
-		.match		= hashlimit_mt_v0,
-		.matchsize	= sizeof(struct xt_hashlimit_info),
-#ifdef CONFIG_COMPAT
-		.compatsize	= sizeof(struct compat_xt_hashlimit_info),
-		.compat_from_user = hashlimit_mt_compat_from_user,
-		.compat_to_user	= hashlimit_mt_compat_to_user,
-#endif
-		.checkentry	= hashlimit_mt_check_v0,
-		.destroy	= hashlimit_mt_destroy_v0,
-		.me		= THIS_MODULE
-	},
 	{
 		.name           = "hashlimit",
 		.revision       = 1,
@@ -815,20 +608,6 @@ static struct xt_match hashlimit_mt_reg[] __read_mostly = {
 		.me             = THIS_MODULE,
 	},
 #if defined(CONFIG_IP6_NF_IPTABLES) || defined(CONFIG_IP6_NF_IPTABLES_MODULE)
-	{
-		.name		= "hashlimit",
-		.family		= NFPROTO_IPV6,
-		.match		= hashlimit_mt_v0,
-		.matchsize	= sizeof(struct xt_hashlimit_info),
-#ifdef CONFIG_COMPAT
-		.compatsize	= sizeof(struct compat_xt_hashlimit_info),
-		.compat_from_user = hashlimit_mt_compat_from_user,
-		.compat_to_user	= hashlimit_mt_compat_to_user,
-#endif
-		.checkentry	= hashlimit_mt_check_v0,
-		.destroy	= hashlimit_mt_destroy_v0,
-		.me		= THIS_MODULE
-	},
 	{
 		.name           = "hashlimit",
 		.revision       = 1,
-- 
cgit v1.2.2


From 110d99ed1c846458654e2771b3ad626319f19394 Mon Sep 17 00:00:00 2001
From: Jan Engelhardt <jengelh@medozas.de>
Date: Mon, 22 Mar 2010 19:35:01 +0100
Subject: netfilter: xtables: remove xt_multiport revision 0

Superseded by xt_multiport revision 1 (introduction already predates
linux.git).

Signed-off-by: Jan Engelhardt <jengelh@medozas.de>
---
 net/netfilter/xt_multiport.c | 77 --------------------------------------------
 1 file changed, 77 deletions(-)

(limited to 'net')

diff --git a/net/netfilter/xt_multiport.c b/net/netfilter/xt_multiport.c
index b446738eab1a..83b77ceb264f 100644
--- a/net/netfilter/xt_multiport.c
+++ b/net/netfilter/xt_multiport.c
@@ -26,23 +26,6 @@ MODULE_DESCRIPTION("Xtables: multiple port matching for TCP, UDP, UDP-Lite, SCTP
 MODULE_ALIAS("ipt_multiport");
 MODULE_ALIAS("ip6t_multiport");
 
-/* Returns 1 if the port is matched by the test, 0 otherwise. */
-static inline bool
-ports_match_v0(const u_int16_t *portlist, enum xt_multiport_flags flags,
-	       u_int8_t count, u_int16_t src, u_int16_t dst)
-{
-	unsigned int i;
-	for (i = 0; i < count; i++) {
-		if (flags != XT_MULTIPORT_DESTINATION && portlist[i] == src)
-			return true;
-
-		if (flags != XT_MULTIPORT_SOURCE && portlist[i] == dst)
-			return true;
-	}
-
-	return false;
-}
-
 /* Returns 1 if the port is matched by the test, 0 otherwise. */
 static inline bool
 ports_match_v1(const struct xt_multiport_v1 *minfo,
@@ -88,30 +71,6 @@ ports_match_v1(const struct xt_multiport_v1 *minfo,
 	return minfo->invert;
 }
 
-static bool
-multiport_mt_v0(const struct sk_buff *skb, const struct xt_match_param *par)
-{
-	const __be16 *pptr;
-	__be16 _ports[2];
-	const struct xt_multiport *multiinfo = par->matchinfo;
-
-	if (par->fragoff != 0)
-		return false;
-
-	pptr = skb_header_pointer(skb, par->thoff, sizeof(_ports), _ports);
-	if (pptr == NULL) {
-		/* We've been asked to examine this packet, and we
-		 * can't.  Hence, no choice but to drop.
-		 */
-		pr_debug("Dropping evil offset=0 tinygram.\n");
-		*par->hotdrop = true;
-		return false;
-	}
-
-	return ports_match_v0(multiinfo->ports, multiinfo->flags,
-	       multiinfo->count, ntohs(pptr[0]), ntohs(pptr[1]));
-}
-
 static bool
 multiport_mt(const struct sk_buff *skb, const struct xt_match_param *par)
 {
@@ -152,15 +111,6 @@ check(u_int16_t proto,
 		&& count <= XT_MULTI_PORTS;
 }
 
-static int multiport_mt_check_v0(const struct xt_mtchk_param *par)
-{
-	const struct ipt_ip *ip = par->entryinfo;
-	const struct xt_multiport *multiinfo = par->matchinfo;
-
-	return check(ip->proto, ip->invflags, multiinfo->flags,
-		     multiinfo->count);
-}
-
 static int multiport_mt_check(const struct xt_mtchk_param *par)
 {
 	const struct ipt_ip *ip = par->entryinfo;
@@ -170,15 +120,6 @@ static int multiport_mt_check(const struct xt_mtchk_param *par)
 		     multiinfo->count);
 }
 
-static int multiport_mt6_check_v0(const struct xt_mtchk_param *par)
-{
-	const struct ip6t_ip6 *ip = par->entryinfo;
-	const struct xt_multiport *multiinfo = par->matchinfo;
-
-	return check(ip->proto, ip->invflags, multiinfo->flags,
-		     multiinfo->count);
-}
-
 static int multiport_mt6_check(const struct xt_mtchk_param *par)
 {
 	const struct ip6t_ip6 *ip = par->entryinfo;
@@ -189,15 +130,6 @@ static int multiport_mt6_check(const struct xt_mtchk_param *par)
 }
 
 static struct xt_match multiport_mt_reg[] __read_mostly = {
-	{
-		.name		= "multiport",
-		.family		= NFPROTO_IPV4,
-		.revision	= 0,
-		.checkentry	= multiport_mt_check_v0,
-		.match		= multiport_mt_v0,
-		.matchsize	= sizeof(struct xt_multiport),
-		.me		= THIS_MODULE,
-	},
 	{
 		.name		= "multiport",
 		.family		= NFPROTO_IPV4,
@@ -207,15 +139,6 @@ static struct xt_match multiport_mt_reg[] __read_mostly = {
 		.matchsize	= sizeof(struct xt_multiport_v1),
 		.me		= THIS_MODULE,
 	},
-	{
-		.name		= "multiport",
-		.family		= NFPROTO_IPV6,
-		.revision	= 0,
-		.checkentry	= multiport_mt6_check_v0,
-		.match		= multiport_mt_v0,
-		.matchsize	= sizeof(struct xt_multiport),
-		.me		= THIS_MODULE,
-	},
 	{
 		.name		= "multiport",
 		.family		= NFPROTO_IPV6,
-- 
cgit v1.2.2


From d879e19e18ebc69fc20a9b95612e9dd0acf4d7aa Mon Sep 17 00:00:00 2001
From: Jan Engelhardt <jengelh@medozas.de>
Date: Mon, 22 Mar 2010 19:39:04 +0100
Subject: netfilter: xtables: remove xt_string revision 0

Superseded by xt_string revision 1 (linux v2.6.26-rc8-1127-g4ad3f26,
iptables 1.4.2-rc1).

Signed-off-by: Jan Engelhardt <jengelh@medozas.de>
---
 net/netfilter/xt_string.c | 53 ++++++++++++++++-------------------------------
 1 file changed, 18 insertions(+), 35 deletions(-)

(limited to 'net')

diff --git a/net/netfilter/xt_string.c b/net/netfilter/xt_string.c
index b0f8292db6f8..488e368a2c4e 100644
--- a/net/netfilter/xt_string.c
+++ b/net/netfilter/xt_string.c
@@ -26,12 +26,10 @@ string_mt(const struct sk_buff *skb, const struct xt_match_param *par)
 {
 	const struct xt_string_info *conf = par->matchinfo;
 	struct ts_state state;
-	int invert;
+	bool invert;
 
 	memset(&state, 0, sizeof(struct ts_state));
-
-	invert = (par->match->revision == 0 ? conf->u.v0.invert :
-				    conf->u.v1.flags & XT_STRING_FLAG_INVERT);
+	invert = conf->u.v1.flags & XT_STRING_FLAG_INVERT;
 
 	return (skb_find_text((struct sk_buff *)skb, conf->from_offset,
 			     conf->to_offset, conf->config, &state)
@@ -53,13 +51,11 @@ static int string_mt_check(const struct xt_mtchk_param *par)
 		return -EINVAL;
 	if (conf->patlen > XT_STRING_MAX_PATTERN_SIZE)
 		return -EINVAL;
-	if (par->match->revision == 1) {
-		if (conf->u.v1.flags &
-		    ~(XT_STRING_FLAG_IGNORECASE | XT_STRING_FLAG_INVERT))
-			return -EINVAL;
-		if (conf->u.v1.flags & XT_STRING_FLAG_IGNORECASE)
-			flags |= TS_IGNORECASE;
-	}
+	if (conf->u.v1.flags &
+	    ~(XT_STRING_FLAG_IGNORECASE | XT_STRING_FLAG_INVERT))
+		return -EINVAL;
+	if (conf->u.v1.flags & XT_STRING_FLAG_IGNORECASE)
+		flags |= TS_IGNORECASE;
 	ts_conf = textsearch_prepare(conf->algo, conf->pattern, conf->patlen,
 				     GFP_KERNEL, flags);
 	if (IS_ERR(ts_conf))
@@ -74,38 +70,25 @@ static void string_mt_destroy(const struct xt_mtdtor_param *par)
 	textsearch_destroy(STRING_TEXT_PRIV(par->matchinfo)->config);
 }
 
-static struct xt_match xt_string_mt_reg[] __read_mostly = {
-	{
-		.name 		= "string",
-		.revision	= 0,
-		.family		= NFPROTO_UNSPEC,
-		.checkentry	= string_mt_check,
-		.match 		= string_mt,
-		.destroy 	= string_mt_destroy,
-		.matchsize	= sizeof(struct xt_string_info),
-		.me 		= THIS_MODULE
-	},
-	{
-		.name 		= "string",
-		.revision	= 1,
-		.family		= NFPROTO_UNSPEC,
-		.checkentry	= string_mt_check,
-		.match 		= string_mt,
-		.destroy 	= string_mt_destroy,
-		.matchsize	= sizeof(struct xt_string_info),
-		.me 		= THIS_MODULE
-	},
+static struct xt_match xt_string_mt_reg __read_mostly = {
+	.name       = "string",
+	.revision   = 1,
+	.family     = NFPROTO_UNSPEC,
+	.checkentry = string_mt_check,
+	.match      = string_mt,
+	.destroy    = string_mt_destroy,
+	.matchsize  = sizeof(struct xt_string_info),
+	.me         = THIS_MODULE,
 };
 
 static int __init string_mt_init(void)
 {
-	return xt_register_matches(xt_string_mt_reg,
-				   ARRAY_SIZE(xt_string_mt_reg));
+	return xt_register_match(&xt_string_mt_reg);
 }
 
 static void __exit string_mt_exit(void)
 {
-	xt_unregister_matches(xt_string_mt_reg, ARRAY_SIZE(xt_string_mt_reg));
+	xt_unregister_match(&xt_string_mt_reg);
 }
 
 module_init(string_mt_init);
-- 
cgit v1.2.2


From b44672889c11e13e4f4dc0a8ee23f0e64f1e57c6 Mon Sep 17 00:00:00 2001
From: Jan Engelhardt <jengelh@medozas.de>
Date: Wed, 24 Mar 2010 22:50:01 +0100
Subject: netfilter: xtables: merge registration structure to NFPROTO_UNSPEC

Signed-off-by: Jan Engelhardt <jengelh@medozas.de>
---
 net/netfilter/xt_state.c | 31 ++++++++++---------------------
 1 file changed, 10 insertions(+), 21 deletions(-)

(limited to 'net')

diff --git a/net/netfilter/xt_state.c b/net/netfilter/xt_state.c
index be00d7b1f53b..bb1271852d50 100644
--- a/net/netfilter/xt_state.c
+++ b/net/netfilter/xt_state.c
@@ -53,35 +53,24 @@ static void state_mt_destroy(const struct xt_mtdtor_param *par)
 	nf_ct_l3proto_module_put(par->family);
 }
 
-static struct xt_match state_mt_reg[] __read_mostly = {
-	{
-		.name		= "state",
-		.family		= NFPROTO_IPV4,
-		.checkentry	= state_mt_check,
-		.match		= state_mt,
-		.destroy	= state_mt_destroy,
-		.matchsize	= sizeof(struct xt_state_info),
-		.me		= THIS_MODULE,
-	},
-	{
-		.name		= "state",
-		.family		= NFPROTO_IPV6,
-		.checkentry	= state_mt_check,
-		.match		= state_mt,
-		.destroy	= state_mt_destroy,
-		.matchsize	= sizeof(struct xt_state_info),
-		.me		= THIS_MODULE,
-	},
+static struct xt_match state_mt_reg __read_mostly = {
+	.name       = "state",
+	.family     = NFPROTO_UNSPEC,
+	.checkentry = state_mt_check,
+	.match      = state_mt,
+	.destroy    = state_mt_destroy,
+	.matchsize  = sizeof(struct xt_state_info),
+	.me         = THIS_MODULE,
 };
 
 static int __init state_mt_init(void)
 {
-	return xt_register_matches(state_mt_reg, ARRAY_SIZE(state_mt_reg));
+	return xt_register_match(&state_mt_reg);
 }
 
 static void __exit state_mt_exit(void)
 {
-	xt_unregister_matches(state_mt_reg, ARRAY_SIZE(state_mt_reg));
+	xt_unregister_match(&state_mt_reg);
 }
 
 module_init(state_mt_init);
-- 
cgit v1.2.2


From 8f5992291457c8e6de2f5fe39849de6756be1a96 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Thu, 25 Mar 2010 17:25:11 +0100
Subject: netfilter: xt_hashlimit: IPV6 bugfix

A missing break statement in hashlimit_ipv6_mask(), and masks
between /64 and /95 are not working at all...

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 net/netfilter/xt_hashlimit.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'net')

diff --git a/net/netfilter/xt_hashlimit.c b/net/netfilter/xt_hashlimit.c
index 70d561a2d9e0..215a64835de8 100644
--- a/net/netfilter/xt_hashlimit.c
+++ b/net/netfilter/xt_hashlimit.c
@@ -493,6 +493,7 @@ static void hashlimit_ipv6_mask(__be32 *i, unsigned int p)
 	case 64 ... 95:
 		i[2] = maskl(i[2], p - 64);
 		i[3] = 0;
+		break;
 	case 96 ... 127:
 		i[3] = maskl(i[3], p - 96);
 		break;
-- 
cgit v1.2.2


From df3345457a7a174dfb5872a070af80d456985038 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Wed, 24 Mar 2010 19:13:54 +0000
Subject: rps: add CONFIG_RPS

RPS currently depends on SMP and SYSFS

Adding a CONFIG_RPS makes sense in case this requirement changes in the
future. This patch saves about 1500 bytes of kernel text in case SMP is
on but SYSFS is off.

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/Kconfig    |  5 +++++
 net/core/dev.c | 29 +++++++++++++++++++----------
 2 files changed, 24 insertions(+), 10 deletions(-)

(limited to 'net')

diff --git a/net/Kconfig b/net/Kconfig
index 041c35edb763..68514644ce91 100644
--- a/net/Kconfig
+++ b/net/Kconfig
@@ -203,6 +203,11 @@ source "net/ieee802154/Kconfig"
 source "net/sched/Kconfig"
 source "net/dcb/Kconfig"
 
+config RPS
+	boolean
+	depends on SMP && SYSFS
+	default y
+
 menu "Network testing"
 
 config NET_PKTGEN
diff --git a/net/core/dev.c b/net/core/dev.c
index 5e3dc28cbf5a..bcb3ed26af1c 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -2177,7 +2177,7 @@ int weight_p __read_mostly = 64;            /* old backlog weight */
 
 DEFINE_PER_CPU(struct netif_rx_stats, netdev_rx_stat) = { 0, };
 
-#ifdef CONFIG_SMP
+#ifdef CONFIG_RPS
 /*
  * get_rps_cpu is called from netif_receive_skb and returns the target
  * CPU from the RPS map of the receiving queue for a given skb.
@@ -2325,7 +2325,7 @@ enqueue:
 
 		/* Schedule NAPI for backlog device */
 		if (napi_schedule_prep(&queue->backlog)) {
-#ifdef CONFIG_SMP
+#ifdef CONFIG_RPS
 			if (cpu != smp_processor_id()) {
 				struct rps_remote_softirq_cpus *rcpus =
 				    &__get_cpu_var(rps_remote_softirq_cpus);
@@ -2376,7 +2376,7 @@ int netif_rx(struct sk_buff *skb)
 	if (!skb->tstamp.tv64)
 		net_timestamp(skb);
 
-#ifdef CONFIG_SMP
+#ifdef CONFIG_RPS
 	cpu = get_rps_cpu(skb->dev, skb);
 	if (cpu < 0)
 		cpu = smp_processor_id();
@@ -2750,7 +2750,7 @@ out:
  */
 int netif_receive_skb(struct sk_buff *skb)
 {
-#ifdef CONFIG_SMP
+#ifdef CONFIG_RPS
 	int cpu;
 
 	cpu = get_rps_cpu(skb->dev, skb);
@@ -3189,7 +3189,7 @@ void netif_napi_del(struct napi_struct *napi)
 }
 EXPORT_SYMBOL(netif_napi_del);
 
-#ifdef CONFIG_SMP
+#ifdef CONFIG_RPS
 /*
  * net_rps_action sends any pending IPI's for rps.  This is only called from
  * softirq and interrupts must be enabled.
@@ -3214,7 +3214,7 @@ static void net_rx_action(struct softirq_action *h)
 	unsigned long time_limit = jiffies + 2;
 	int budget = netdev_budget;
 	void *have;
-#ifdef CONFIG_SMP
+#ifdef CONFIG_RPS
 	int select;
 	struct rps_remote_softirq_cpus *rcpus;
 #endif
@@ -3280,7 +3280,7 @@ static void net_rx_action(struct softirq_action *h)
 		netpoll_poll_unlock(have);
 	}
 out:
-#ifdef CONFIG_SMP
+#ifdef CONFIG_RPS
 	rcpus = &__get_cpu_var(rps_remote_softirq_cpus);
 	select = rcpus->select;
 	rcpus->select ^= 1;
@@ -5277,6 +5277,7 @@ int register_netdevice(struct net_device *dev)
 
 	dev->iflink = -1;
 
+#ifdef CONFIG_RPS
 	if (!dev->num_rx_queues) {
 		/*
 		 * Allocate a single RX queue if driver never called
@@ -5293,7 +5294,7 @@ int register_netdevice(struct net_device *dev)
 		atomic_set(&dev->_rx->count, 1);
 		dev->num_rx_queues = 1;
 	}
-
+#endif
 	/* Init, if this function is available */
 	if (dev->netdev_ops->ndo_init) {
 		ret = dev->netdev_ops->ndo_init(dev);
@@ -5653,11 +5654,13 @@ struct net_device *alloc_netdev_mq(int sizeof_priv, const char *name,
 		void (*setup)(struct net_device *), unsigned int queue_count)
 {
 	struct netdev_queue *tx;
-	struct netdev_rx_queue *rx;
 	struct net_device *dev;
 	size_t alloc_size;
 	struct net_device *p;
+#ifdef CONFIG_RPS
+	struct netdev_rx_queue *rx;
 	int i;
+#endif
 
 	BUG_ON(strlen(name) >= sizeof(dev->name));
 
@@ -5683,6 +5686,7 @@ struct net_device *alloc_netdev_mq(int sizeof_priv, const char *name,
 		goto free_p;
 	}
 
+#ifdef CONFIG_RPS
 	rx = kcalloc(queue_count, sizeof(struct netdev_rx_queue), GFP_KERNEL);
 	if (!rx) {
 		printk(KERN_ERR "alloc_netdev: Unable to allocate "
@@ -5698,6 +5702,7 @@ struct net_device *alloc_netdev_mq(int sizeof_priv, const char *name,
 	 */
 	for (i = 0; i < queue_count; i++)
 		rx[i].first = rx;
+#endif
 
 	dev = PTR_ALIGN(p, NETDEV_ALIGN);
 	dev->padded = (char *)dev - (char *)p;
@@ -5713,8 +5718,10 @@ struct net_device *alloc_netdev_mq(int sizeof_priv, const char *name,
 	dev->num_tx_queues = queue_count;
 	dev->real_num_tx_queues = queue_count;
 
+#ifdef CONFIG_RPS
 	dev->_rx = rx;
 	dev->num_rx_queues = queue_count;
+#endif
 
 	dev->gso_max_size = GSO_MAX_SIZE;
 
@@ -5731,8 +5738,10 @@ struct net_device *alloc_netdev_mq(int sizeof_priv, const char *name,
 	return dev;
 
 free_rx:
+#ifdef CONFIG_RPS
 	kfree(rx);
 free_tx:
+#endif
 	kfree(tx);
 free_p:
 	kfree(p);
@@ -6236,7 +6245,7 @@ static int __init net_dev_init(void)
 		queue->completion_queue = NULL;
 		INIT_LIST_HEAD(&queue->poll_list);
 
-#ifdef CONFIG_SMP
+#ifdef CONFIG_RPS
 		queue->csd.func = trigger_softirq;
 		queue->csd.info = queue;
 		queue->csd.flags = 0;
-- 
cgit v1.2.2


From b54c9b98bbfb4836b1f7441c5a9db24affd3c2e9 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Thu, 25 Mar 2010 21:25:30 -0700
Subject: ipv6: Preserve pervious behavior in ipv6_link_dev_addr().

Use list_add_tail() to get the behavior we had before
the list_head conversion for ipv6 address lists.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/addrconf.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 68e5809a2153..999568344721 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -578,7 +578,7 @@ ipv6_link_dev_addr(struct inet6_dev *idev, struct inet6_ifaddr *ifp)
 			break;
 	}
 
-	list_add(&ifp->if_list, p);
+	list_add_tail(&ifp->if_list, p);
 }
 
 static u32 ipv6_addr_hash(const struct in6_addr *addr)
-- 
cgit v1.2.2


From b79d1d54cf0672f764402fe4711ef5306f917bd3 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Thu, 25 Mar 2010 21:39:21 -0700
Subject: ipv6: Fix result generation in ipv6_get_ifaddr().

Finishing naturally from hlist_for_each_entry(x, ...) does not result
in 'x' being NULL.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/addrconf.c | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

(limited to 'net')

diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 999568344721..21b4c9e1a682 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -1341,9 +1341,9 @@ EXPORT_SYMBOL(ipv6_chk_prefix);
 struct inet6_ifaddr *ipv6_get_ifaddr(struct net *net, const struct in6_addr *addr,
 				     struct net_device *dev, int strict)
 {
-	struct inet6_ifaddr *ifp = NULL;
-	struct hlist_node *node;
+	struct inet6_ifaddr *ifp, *result = NULL;
 	unsigned int hash = ipv6_addr_hash(addr);
+	struct hlist_node *node;
 
 	rcu_read_lock_bh();
 	hlist_for_each_entry_rcu(ifp, node, &inet6_addr_lst[hash], addr_lst) {
@@ -1352,6 +1352,7 @@ struct inet6_ifaddr *ipv6_get_ifaddr(struct net *net, const struct in6_addr *add
 		if (ipv6_addr_equal(&ifp->addr, addr)) {
 			if (dev == NULL || ifp->idev->dev == dev ||
 			    !(ifp->scope&(IFA_LINK|IFA_HOST) || strict)) {
+				result = ifp;
 				in6_ifa_hold(ifp);
 				break;
 			}
@@ -1359,7 +1360,7 @@ struct inet6_ifaddr *ipv6_get_ifaddr(struct net *net, const struct in6_addr *add
 	}
 	rcu_read_unlock_bh();
 
-	return ifp;
+	return result;
 }
 
 /* Gets referenced address, destroys ifaddr */
-- 
cgit v1.2.2


From 66aa4a55fe0548c8b13a195c61774db65c5896cd Mon Sep 17 00:00:00 2001
From: Tom Goff <thomas.goff@boeing.com>
Date: Fri, 19 Mar 2010 15:38:50 +0000
Subject: netlink: use the appropriate namespace pid

This was included in OpenVZ kernels but wasn't integrated upstream.
>From git://git.openvz.org/pub/linux-2.6.24-openvz:

	commit 5c69402f18adf7276352e051ece2cf31feefab02
	Author: Alexey Dobriyan <adobriyan@openvz.org>
	Date:   Mon Dec 24 14:37:45 2007 +0300

	    netlink: fixup ->tgid to work in multiple PID namespaces

Signed-off-by: Tom Goff <thomas.goff@boeing.com>
Acked-by: Alexey Dobriyan <adobriyan@openvz.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/netlink/af_netlink.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index acbbae1e89b5..274d977166b7 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -545,7 +545,7 @@ static int netlink_autobind(struct socket *sock)
 	struct hlist_head *head;
 	struct sock *osk;
 	struct hlist_node *node;
-	s32 pid = current->tgid;
+	s32 pid = task_tgid_vnr(current);
 	int err;
 	static s32 rover = -4097;
 
-- 
cgit v1.2.2


From 4b97efdf392563bf03b4917a0b5add2df65de39a Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Fri, 26 Mar 2010 20:27:49 -0700
Subject: net: fix netlink address dumping in IPv4/IPv6

When a dump is interrupted at the last device in a hash chain and
then continued, "idx" won't get incremented past s_idx, so s_ip_idx
is not reset when moving on to the next device. This means of all
following devices only the last n - s_ip_idx addresses are dumped.

Tested-by: Pawel Staszewski <pstaszewski@itcare.pl>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 net/ipv4/devinet.c  | 2 +-
 net/ipv6/addrconf.c | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index 51ca946e3392..3feb2b390308 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -1194,7 +1194,7 @@ static int inet_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb)
 		hlist_for_each_entry_rcu(dev, node, head, index_hlist) {
 			if (idx < s_idx)
 				goto cont;
-			if (idx > s_idx)
+			if (h > s_h || idx > s_idx)
 				s_ip_idx = 0;
 			in_dev = __in_dev_get_rcu(dev);
 			if (!in_dev)
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 3381b4317c27..7e567ae5eaab 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -3610,7 +3610,7 @@ static int inet6_dump_addr(struct sk_buff *skb, struct netlink_callback *cb,
 		hlist_for_each_entry_rcu(dev, node, head, index_hlist) {
 			if (idx < s_idx)
 				goto cont;
-			if (idx > s_idx)
+			if (h > s_h || idx > s_idx)
 				s_ip_idx = 0;
 			ip_idx = 0;
 			if ((idev = __in6_dev_get(dev)) == NULL)
-- 
cgit v1.2.2


From b35ecb5d404c00f2420221ccbb1bbba1139353a4 Mon Sep 17 00:00:00 2001
From: Pavel Emelyanov <xemul@openvz.org>
Date: Wed, 24 Mar 2010 07:43:17 +0000
Subject: ipv4: Cleanup struct net dereference in rt_intern_hash

There's no need in getting it 3 times and gcc isn't smart enough
to understand this himself.

This is just a cleanup before the fix (next patch).

Signed-off-by: Pavel Emelyanov <xemul@openvz.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/route.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 54fd68c14c87..124af1605d10 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -1212,11 +1212,11 @@ restart:
 		    slow_chain_length(rt_hash_table[hash].chain) > rt_chain_length_max) {
 			struct net *net = dev_net(rt->u.dst.dev);
 			int num = ++net->ipv4.current_rt_cache_rebuild_count;
-			if (!rt_caching(dev_net(rt->u.dst.dev))) {
+			if (!rt_caching(net)) {
 				printk(KERN_WARNING "%s: %d rebuilds is over limit, route caching disabled\n",
 					rt->u.dst.dev->name, num);
 			}
-			rt_emergency_hash_rebuild(dev_net(rt->u.dst.dev));
+			rt_emergency_hash_rebuild(net);
 		}
 	}
 
-- 
cgit v1.2.2


From 6a2bad70d546cf30a46bc6d9ec0cb9a0891a38eb Mon Sep 17 00:00:00 2001
From: Pavel Emelyanov <xemul@openvz.org>
Date: Wed, 24 Mar 2010 21:51:22 +0000
Subject: ipv4: Restart rt_intern_hash after emergency rebuild (v2)

The the rebuild changes the genid which in turn is used at
the hash calculation. Thus if we don't restart and go on with
inserting the rt will happen in wrong chain.

(Fixed Neil's comment about the index passed into the rt_intern_hash)

Signed-off-by: Pavel Emelyanov <xemul@openvz.org>
Reviewed-by: Neil Horman <nhorman@tuxdriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/route.c | 17 +++++++++++------
 1 file changed, 11 insertions(+), 6 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 124af1605d10..d413b57be9b3 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -1097,7 +1097,7 @@ static int slow_chain_length(const struct rtable *head)
 }
 
 static int rt_intern_hash(unsigned hash, struct rtable *rt,
-			  struct rtable **rp, struct sk_buff *skb)
+			  struct rtable **rp, struct sk_buff *skb, int ifindex)
 {
 	struct rtable	*rth, **rthp;
 	unsigned long	now;
@@ -1217,6 +1217,11 @@ restart:
 					rt->u.dst.dev->name, num);
 			}
 			rt_emergency_hash_rebuild(net);
+			spin_unlock_bh(rt_hash_lock_addr(hash));
+
+			hash = rt_hash(rt->fl.fl4_dst, rt->fl.fl4_src,
+					ifindex, rt_genid(net));
+			goto restart;
 		}
 	}
 
@@ -1477,7 +1482,7 @@ void ip_rt_redirect(__be32 old_gw, __be32 daddr, __be32 new_gw,
 							&netevent);
 
 				rt_del(hash, rth);
-				if (!rt_intern_hash(hash, rt, &rt, NULL))
+				if (!rt_intern_hash(hash, rt, &rt, NULL, rt->fl.oif))
 					ip_rt_put(rt);
 				goto do_next;
 			}
@@ -1931,7 +1936,7 @@ static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr,
 
 	in_dev_put(in_dev);
 	hash = rt_hash(daddr, saddr, dev->ifindex, rt_genid(dev_net(dev)));
-	return rt_intern_hash(hash, rth, NULL, skb);
+	return rt_intern_hash(hash, rth, NULL, skb, dev->ifindex);
 
 e_nobufs:
 	in_dev_put(in_dev);
@@ -2098,7 +2103,7 @@ static int ip_mkroute_input(struct sk_buff *skb,
 	/* put it into the cache */
 	hash = rt_hash(daddr, saddr, fl->iif,
 		       rt_genid(dev_net(rth->u.dst.dev)));
-	return rt_intern_hash(hash, rth, NULL, skb);
+	return rt_intern_hash(hash, rth, NULL, skb, fl->iif);
 }
 
 /*
@@ -2255,7 +2260,7 @@ local_input:
 	}
 	rth->rt_type	= res.type;
 	hash = rt_hash(daddr, saddr, fl.iif, rt_genid(net));
-	err = rt_intern_hash(hash, rth, NULL, skb);
+	err = rt_intern_hash(hash, rth, NULL, skb, fl.iif);
 	goto done;
 
 no_route:
@@ -2502,7 +2507,7 @@ static int ip_mkroute_output(struct rtable **rp,
 	if (err == 0) {
 		hash = rt_hash(oldflp->fl4_dst, oldflp->fl4_src, oldflp->oif,
 			       rt_genid(dev_net(dev_out)));
-		err = rt_intern_hash(hash, rth, rp, NULL);
+		err = rt_intern_hash(hash, rth, rp, NULL, oldflp->oif);
 	}
 
 	return err;
-- 
cgit v1.2.2


From 71c5c1595c04852d6fbf3c4882b47b30b61a4d32 Mon Sep 17 00:00:00 2001
From: Brandon L Black <blblack@gmail.com>
Date: Fri, 26 Mar 2010 16:18:03 +0000
Subject: net: Add MSG_WAITFORONE flag to recvmmsg

Add new flag MSG_WAITFORONE for the recvmmsg() syscall.
When this flag is specified for a blocking socket, recvmmsg()
will only block until at least 1 packet is available.  The
default behavior is to block until all vlen packets are
available.  This flag has no effect on non-blocking sockets
or when used in combination with MSG_DONTWAIT.

Signed-off-by: Brandon L Black <blblack@gmail.com>
Acked-by: Ulrich Drepper <drepper@redhat.com>
Acked-by: Eric Dumazet <eric.dumazet@gmail.com>
Acked-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/socket.c | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'net')

diff --git a/net/socket.c b/net/socket.c
index 769c386bd428..f55ffe9f8c87 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -2135,6 +2135,10 @@ int __sys_recvmmsg(int fd, struct mmsghdr __user *mmsg, unsigned int vlen,
 			break;
 		++datagrams;
 
+		/* MSG_WAITFORONE turns on MSG_DONTWAIT after one packet */
+		if (flags & MSG_WAITFORONE)
+			flags |= MSG_DONTWAIT;
+
 		if (timeout) {
 			ktime_get_ts(timeout);
 			*timeout = timespec_sub(end_time, *timeout);
-- 
cgit v1.2.2


From 7438189baa0a2fe30084bdc97e3d540ebc5444f3 Mon Sep 17 00:00:00 2001
From: Nicolas Dichtel <nicolas.dichtel@dev.6wind.com>
Date: Thu, 25 Mar 2010 23:45:35 +0000
Subject: net: ipmr/ip6mr: prevent out-of-bounds vif_table access

When cache is unresolved, c->mf[6]c_parent is set to 65535 and
minvif, maxvif are not initialized, hence we must avoid to
parse IIF and OIF.
A second problem can happen when the user dumps a cache entry
where a VIF, that was referenced at creation time, has been
removed.

Signed-off-by: Nicolas Dichtel <nicolas.dichtel@6wind.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/ipmr.c  | 11 +++++++----
 net/ipv6/ip6mr.c | 11 +++++++----
 2 files changed, 14 insertions(+), 8 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
index 0b9d03c54dc3..d0a6092a67be 100644
--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c
@@ -1616,17 +1616,20 @@ ipmr_fill_mroute(struct sk_buff *skb, struct mfc_cache *c, struct rtmsg *rtm)
 	int ct;
 	struct rtnexthop *nhp;
 	struct net *net = mfc_net(c);
-	struct net_device *dev = net->ipv4.vif_table[c->mfc_parent].dev;
 	u8 *b = skb_tail_pointer(skb);
 	struct rtattr *mp_head;
 
-	if (dev)
-		RTA_PUT(skb, RTA_IIF, 4, &dev->ifindex);
+	/* If cache is unresolved, don't try to parse IIF and OIF */
+	if (c->mfc_parent > MAXVIFS)
+		return -ENOENT;
+
+	if (VIF_EXISTS(net, c->mfc_parent))
+		RTA_PUT(skb, RTA_IIF, 4, &net->ipv4.vif_table[c->mfc_parent].dev->ifindex);
 
 	mp_head = (struct rtattr *)skb_put(skb, RTA_LENGTH(0));
 
 	for (ct = c->mfc_un.res.minvif; ct < c->mfc_un.res.maxvif; ct++) {
-		if (c->mfc_un.res.ttls[ct] < 255) {
+		if (VIF_EXISTS(net, ct) && c->mfc_un.res.ttls[ct] < 255) {
 			if (skb_tailroom(skb) < RTA_ALIGN(RTA_ALIGN(sizeof(*nhp)) + 4))
 				goto rtattr_failure;
 			nhp = (struct rtnexthop *)skb_put(skb, RTA_ALIGN(sizeof(*nhp)));
diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c
index 23e4ac0cc30e..27acfb58650a 100644
--- a/net/ipv6/ip6mr.c
+++ b/net/ipv6/ip6mr.c
@@ -1695,17 +1695,20 @@ ip6mr_fill_mroute(struct sk_buff *skb, struct mfc6_cache *c, struct rtmsg *rtm)
 	int ct;
 	struct rtnexthop *nhp;
 	struct net *net = mfc6_net(c);
-	struct net_device *dev = net->ipv6.vif6_table[c->mf6c_parent].dev;
 	u8 *b = skb_tail_pointer(skb);
 	struct rtattr *mp_head;
 
-	if (dev)
-		RTA_PUT(skb, RTA_IIF, 4, &dev->ifindex);
+	/* If cache is unresolved, don't try to parse IIF and OIF */
+	if (c->mf6c_parent > MAXMIFS)
+		return -ENOENT;
+
+	if (MIF_EXISTS(net, c->mf6c_parent))
+		RTA_PUT(skb, RTA_IIF, 4, &net->ipv6.vif6_table[c->mf6c_parent].dev->ifindex);
 
 	mp_head = (struct rtattr *)skb_put(skb, RTA_LENGTH(0));
 
 	for (ct = c->mfc_un.res.minvif; ct < c->mfc_un.res.maxvif; ct++) {
-		if (c->mfc_un.res.ttls[ct] < 255) {
+		if (MIF_EXISTS(net, ct) && c->mfc_un.res.ttls[ct] < 255) {
 			if (skb_tailroom(skb) < RTA_ALIGN(RTA_ALIGN(sizeof(*nhp)) + 4))
 				goto rtattr_failure;
 			nhp = (struct rtnexthop *)skb_put(skb, RTA_ALIGN(sizeof(*nhp)));
-- 
cgit v1.2.2


From 14a4b42bd6082b4ce3b94bad00cd367707cc1e97 Mon Sep 17 00:00:00 2001
From: Jan Engelhardt <jengelh@medozas.de>
Date: Sat, 27 Mar 2010 16:35:50 -0700
Subject: net: fix unaligned access in IFLA_STATS64

Tony Luck observes that the original IFLA_STATS64 submission causes
unaligned accesses. This is because nla_data() returns a pointer to a
memory region that is only aligned to 32 bits. Do some memcpying to
workaround this.

Signed-off-by: Jan Engelhardt <jengelh@medozas.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/rtnetlink.c | 62 ++++++++++++++++++++++++++--------------------------
 1 file changed, 31 insertions(+), 31 deletions(-)

(limited to 'net')

diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index ffc6cf3495ac..ed0766f0181a 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -602,36 +602,38 @@ static void copy_rtnl_link_stats(struct rtnl_link_stats *a,
 	a->tx_compressed = b->tx_compressed;
 }
 
-static void copy_rtnl_link_stats64(struct rtnl_link_stats64 *a,
-				   const struct net_device_stats *b)
+static void copy_rtnl_link_stats64(void *v, const struct net_device_stats *b)
 {
-	a->rx_packets = b->rx_packets;
-	a->tx_packets = b->tx_packets;
-	a->rx_bytes = b->rx_bytes;
-	a->tx_bytes = b->tx_bytes;
-	a->rx_errors = b->rx_errors;
-	a->tx_errors = b->tx_errors;
-	a->rx_dropped = b->rx_dropped;
-	a->tx_dropped = b->tx_dropped;
-
-	a->multicast = b->multicast;
-	a->collisions = b->collisions;
-
-	a->rx_length_errors = b->rx_length_errors;
-	a->rx_over_errors = b->rx_over_errors;
-	a->rx_crc_errors = b->rx_crc_errors;
-	a->rx_frame_errors = b->rx_frame_errors;
-	a->rx_fifo_errors = b->rx_fifo_errors;
-	a->rx_missed_errors = b->rx_missed_errors;
-
-	a->tx_aborted_errors = b->tx_aborted_errors;
-	a->tx_carrier_errors = b->tx_carrier_errors;
-	a->tx_fifo_errors = b->tx_fifo_errors;
-	a->tx_heartbeat_errors = b->tx_heartbeat_errors;
-	a->tx_window_errors = b->tx_window_errors;
-
-	a->rx_compressed = b->rx_compressed;
-	a->tx_compressed = b->tx_compressed;
+	struct rtnl_link_stats64 a;
+
+	a.rx_packets = b->rx_packets;
+	a.tx_packets = b->tx_packets;
+	a.rx_bytes = b->rx_bytes;
+	a.tx_bytes = b->tx_bytes;
+	a.rx_errors = b->rx_errors;
+	a.tx_errors = b->tx_errors;
+	a.rx_dropped = b->rx_dropped;
+	a.tx_dropped = b->tx_dropped;
+
+	a.multicast = b->multicast;
+	a.collisions = b->collisions;
+
+	a.rx_length_errors = b->rx_length_errors;
+	a.rx_over_errors = b->rx_over_errors;
+	a.rx_crc_errors = b->rx_crc_errors;
+	a.rx_frame_errors = b->rx_frame_errors;
+	a.rx_fifo_errors = b->rx_fifo_errors;
+	a.rx_missed_errors = b->rx_missed_errors;
+
+	a.tx_aborted_errors = b->tx_aborted_errors;
+	a.tx_carrier_errors = b->tx_carrier_errors;
+	a.tx_fifo_errors = b->tx_fifo_errors;
+	a.tx_heartbeat_errors = b->tx_heartbeat_errors;
+	a.tx_window_errors = b->tx_window_errors;
+
+	a.rx_compressed = b->rx_compressed;
+	a.tx_compressed = b->tx_compressed;
+	memcpy(v, &a, sizeof(a));
 }
 
 static inline int rtnl_vfinfo_size(const struct net_device *dev)
@@ -734,8 +736,6 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev,
 			sizeof(struct rtnl_link_stats64));
 	if (attr == NULL)
 		goto nla_put_failure;
-
-	stats = dev_get_stats(dev);
 	copy_rtnl_link_stats64(nla_data(attr), stats);
 
 	if (dev->netdev_ops->ndo_get_vf_config && dev->dev.parent) {
-- 
cgit v1.2.2


From adcfe1964e627b62fbc6e45609b1f0db2c64dd14 Mon Sep 17 00:00:00 2001
From: Jan Engelhardt <jengelh@medozas.de>
Date: Sat, 27 Mar 2010 17:15:29 -0700
Subject: net: increase preallocated size of nlmsg to accomodate for
 IFLA_STATS64

When more data is stuffed into an nlmsg than initially projected, an
extra allocation needs to be done. Reserve enough for IFLA_STATS64 so
that this does not to needlessy happen.

Signed-off-by: Jan Engelhardt <jengelh@medozas.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/rtnetlink.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'net')

diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index ed0766f0181a..bf919b6acea2 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -653,6 +653,7 @@ static inline size_t if_nlmsg_size(const struct net_device *dev)
 	       + nla_total_size(IFNAMSIZ) /* IFLA_QDISC */
 	       + nla_total_size(sizeof(struct rtnl_link_ifmap))
 	       + nla_total_size(sizeof(struct rtnl_link_stats))
+	       + nla_total_size(sizeof(struct rtnl_link_stats64))
 	       + nla_total_size(MAX_ADDR_LEN) /* IFLA_ADDRESS */
 	       + nla_total_size(MAX_ADDR_LEN) /* IFLA_BROADCAST */
 	       + nla_total_size(4) /* IFLA_TXQLEN */
-- 
cgit v1.2.2


From 54c1a859efd9fd6cda05bc700315ba2519c14eba Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?YOSHIFUJI=20Hideaki=20/=20=E5=90=89=E8=97=A4=E8=8B=B1?=
 =?UTF-8?q?=E6=98=8E?= <yoshfuji@linux-ipv6.org>
Date: Sun, 28 Mar 2010 07:15:45 +0000
Subject: ipv6: Don't drop cache route entry unless timer actually expired.

This is ipv6 variant of the commit 5e016cbf6.. ("ipv4: Don't drop
redirected route cache entry unless PTMU actually expired")
by Guenter Roeck <guenter.roeck@ericsson.com>.

Remove cache route entry in ipv6_negative_advice() only if
the timer is expired.

Signed-off-by: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/route.c | 13 +++++++++----
 1 file changed, 9 insertions(+), 4 deletions(-)

(limited to 'net')

diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 7fcb0e5d1213..0d7713c5c206 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -890,12 +890,17 @@ static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
 	struct rt6_info *rt = (struct rt6_info *) dst;
 
 	if (rt) {
-		if (rt->rt6i_flags & RTF_CACHE)
-			ip6_del_rt(rt);
-		else
+		if (rt->rt6i_flags & RTF_CACHE) {
+			if (rt6_check_expired(rt)) {
+				ip6_del_rt(rt);
+				dst = NULL;
+			}
+		} else {
 			dst_release(dst);
+			dst = NULL;
+		}
 	}
-	return NULL;
+	return dst;
 }
 
 static void ip6_link_failure(struct sk_buff *skb)
-- 
cgit v1.2.2


From 10f744d205dde72a0016dbdb11e239da8269958b Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Sun, 28 Mar 2010 23:07:20 -0700
Subject: net: __netif_receive_skb should be static

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/dev.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/core/dev.c b/net/core/dev.c
index bcb3ed26af1c..887aa84fcd46 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -2621,7 +2621,7 @@ void netif_nit_deliver(struct sk_buff *skb)
 	rcu_read_unlock();
 }
 
-int __netif_receive_skb(struct sk_buff *skb)
+static int __netif_receive_skb(struct sk_buff *skb)
 {
 	struct packet_type *ptype, *pt_prev;
 	struct net_device *orig_dev;
-- 
cgit v1.2.2


From 30bde1f5076a9b6bd4b6a168523930ce242c7449 Mon Sep 17 00:00:00 2001
From: Stephen Rothwell <sfr@canb.auug.org.au>
Date: Mon, 29 Mar 2010 01:00:44 -0700
Subject: rps: fix net-sysfs build for !CONFIG_RPS

Signed-off-by: Stephen Rothwell <sfr@canb.auug.org.au>
Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/net-sysfs.c | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

(limited to 'net')

diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c
index f6b6bfee72ae..1e7fdd6029a2 100644
--- a/net/core/net-sysfs.c
+++ b/net/core/net-sysfs.c
@@ -466,6 +466,7 @@ static struct attribute_group wireless_group = {
 };
 #endif
 
+#ifdef CONFIG_RPS
 /*
  * RX queue sysfs structures and functions.
  */
@@ -675,7 +676,7 @@ static void rx_queue_remove_kobjects(struct net_device *net)
 		kobject_put(&net->_rx[i].kobj);
 	kset_unregister(net->queues_kset);
 }
-
+#endif /* CONFIG_RPS */
 #endif /* CONFIG_SYSFS */
 
 #ifdef CONFIG_HOTPLUG
@@ -739,7 +740,7 @@ void netdev_unregister_kobject(struct net_device * net)
 	if (!net_eq(dev_net(net), &init_net))
 		return;
 
-#ifdef CONFIG_SYSFS
+#ifdef CONFIG_RPS
 	rx_queue_remove_kobjects(net);
 #endif
 
@@ -782,7 +783,7 @@ int netdev_register_kobject(struct net_device *net)
 	if (error)
 		return error;
 
-#ifdef CONFIG_SYSFS
+#ifdef CONFIG_RPS
 	error = rx_queue_register_kobjects(net);
 	if (error) {
 		device_del(dev);
-- 
cgit v1.2.2


From f77f13e22df4a40d237697df496152c8c37f3f2b Mon Sep 17 00:00:00 2001
From: Gilles Espinasse <g.esp@free.fr>
Date: Mon, 29 Mar 2010 15:41:47 +0200
Subject: Fix comment and Kconfig typos for 'require' and 'fragment'

Signed-off-by: Gilles Espinasse <g.esp@free.fr>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
---
 net/ipv4/ipmr.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
index 8582e12e4a62..b1373abe2489 100644
--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c
@@ -22,7 +22,7 @@
  *					overflow.
  *      Carlos Picoto           :       PIMv1 Support
  *	Pavlin Ivanov Radoslavov:	PIMv2 Registers must checksum only PIM header
- *					Relax this requrement to work with older peers.
+ *					Relax this requirement to work with older peers.
  *
  */
 
-- 
cgit v1.2.2


From 788e69e548cc8d127b90f0de1f7b7e983d1d587a Mon Sep 17 00:00:00 2001
From: "J. Bruce Fields" <bfields@citi.umich.edu>
Date: Mon, 29 Mar 2010 21:02:31 -0400
Subject: svcrpc: don't hold sv_lock over svc_xprt_put()

svc_xprt_put() can call tcp_close(), which can sleep, so we shouldn't be
holding this lock.

In fact, only the xpt_list removal and the sv_tmpcnt decrement should
need the sv_lock here.

Reported-by: Mi Jinlong <mijinlong@cn.fujitsu.com>
Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
---
 net/sunrpc/svc_xprt.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c
index 8f0f1fb3dc52..c334f5413c60 100644
--- a/net/sunrpc/svc_xprt.c
+++ b/net/sunrpc/svc_xprt.c
@@ -892,12 +892,12 @@ void svc_delete_xprt(struct svc_xprt *xprt)
 	 */
 	if (test_bit(XPT_TEMP, &xprt->xpt_flags))
 		serv->sv_tmpcnt--;
+	spin_unlock_bh(&serv->sv_lock);
 
 	while ((dr = svc_deferred_dequeue(xprt)) != NULL)
 		kfree(dr);
 
 	svc_xprt_put(xprt);
-	spin_unlock_bh(&serv->sv_lock);
 }
 
 void svc_close_xprt(struct svc_xprt *xprt)
-- 
cgit v1.2.2


From 5a0e3ad6af8660be21ca98a971cd00f331318c05 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Wed, 24 Mar 2010 17:04:11 +0900
Subject: include cleanup: Update gfp.h and slab.h includes to prepare for
 breaking implicit slab.h inclusion from percpu.h

percpu.h is included by sched.h and module.h and thus ends up being
included when building most .c files.  percpu.h includes slab.h which
in turn includes gfp.h making everything defined by the two files
universally available and complicating inclusion dependencies.

percpu.h -> slab.h dependency is about to be removed.  Prepare for
this change by updating users of gfp and slab facilities include those
headers directly instead of assuming availability.  As this conversion
needs to touch large number of source files, the following script is
used as the basis of conversion.

  http://userweb.kernel.org/~tj/misc/slabh-sweep.py

The script does the followings.

* Scan files for gfp and slab usages and update includes such that
  only the necessary includes are there.  ie. if only gfp is used,
  gfp.h, if slab is used, slab.h.

* When the script inserts a new include, it looks at the include
  blocks and try to put the new include such that its order conforms
  to its surrounding.  It's put in the include block which contains
  core kernel includes, in the same order that the rest are ordered -
  alphabetical, Christmas tree, rev-Xmas-tree or at the end if there
  doesn't seem to be any matching order.

* If the script can't find a place to put a new include (mostly
  because the file doesn't have fitting include block), it prints out
  an error message indicating which .h file needs to be added to the
  file.

The conversion was done in the following steps.

1. The initial automatic conversion of all .c files updated slightly
   over 4000 files, deleting around 700 includes and adding ~480 gfp.h
   and ~3000 slab.h inclusions.  The script emitted errors for ~400
   files.

2. Each error was manually checked.  Some didn't need the inclusion,
   some needed manual addition while adding it to implementation .h or
   embedding .c file was more appropriate for others.  This step added
   inclusions to around 150 files.

3. The script was run again and the output was compared to the edits
   from #2 to make sure no file was left behind.

4. Several build tests were done and a couple of problems were fixed.
   e.g. lib/decompress_*.c used malloc/free() wrappers around slab
   APIs requiring slab.h to be added manually.

5. The script was run on all .h files but without automatically
   editing them as sprinkling gfp.h and slab.h inclusions around .h
   files could easily lead to inclusion dependency hell.  Most gfp.h
   inclusion directives were ignored as stuff from gfp.h was usually
   wildly available and often used in preprocessor macros.  Each
   slab.h inclusion directive was examined and added manually as
   necessary.

6. percpu.h was updated not to include slab.h.

7. Build test were done on the following configurations and failures
   were fixed.  CONFIG_GCOV_KERNEL was turned off for all tests (as my
   distributed build env didn't work with gcov compiles) and a few
   more options had to be turned off depending on archs to make things
   build (like ipr on powerpc/64 which failed due to missing writeq).

   * x86 and x86_64 UP and SMP allmodconfig and a custom test config.
   * powerpc and powerpc64 SMP allmodconfig
   * sparc and sparc64 SMP allmodconfig
   * ia64 SMP allmodconfig
   * s390 SMP allmodconfig
   * alpha SMP allmodconfig
   * um on x86_64 SMP allmodconfig

8. percpu.h modifications were reverted so that it could be applied as
   a separate patch and serve as bisection point.

Given the fact that I had only a couple of failures from tests on step
6, I'm fairly confident about the coverage of this conversion patch.
If there is a breakage, it's likely to be something in one of the arch
headers which should be easily discoverable easily on most builds of
the specific arch.

Signed-off-by: Tejun Heo <tj@kernel.org>
Guess-its-ok-by: Christoph Lameter <cl@linux-foundation.org>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Lee Schermerhorn <Lee.Schermerhorn@hp.com>
---
 net/802/garp.c                           | 1 +
 net/802/p8022.c                          | 1 +
 net/802/p8023.c                          | 1 +
 net/802/psnap.c                          | 1 +
 net/802/stp.c                            | 1 +
 net/802/tr.c                             | 1 +
 net/8021q/vlan.c                         | 1 +
 net/8021q/vlan_dev.c                     | 1 +
 net/9p/client.c                          | 1 +
 net/9p/protocol.c                        | 1 +
 net/9p/trans_fd.c                        | 1 +
 net/9p/trans_rdma.c                      | 1 +
 net/9p/trans_virtio.c                    | 1 +
 net/9p/util.c                            | 1 +
 net/appletalk/aarp.c                     | 1 +
 net/appletalk/ddp.c                      | 1 +
 net/atm/addr.c                           | 1 +
 net/atm/atm_sysfs.c                      | 1 +
 net/atm/br2684.c                         | 1 +
 net/atm/clip.c                           | 1 +
 net/atm/common.c                         | 1 +
 net/atm/lec.c                            | 1 +
 net/atm/mpc.c                            | 1 +
 net/atm/mpoa_caches.c                    | 1 +
 net/atm/mpoa_proc.c                      | 1 +
 net/atm/pppoatm.c                        | 1 +
 net/atm/proc.c                           | 1 +
 net/atm/raw.c                            | 1 +
 net/atm/resources.c                      | 1 +
 net/atm/signaling.c                      | 1 +
 net/ax25/af_ax25.c                       | 1 +
 net/ax25/ax25_dev.c                      | 1 +
 net/ax25/ax25_ds_subr.c                  | 1 +
 net/ax25/ax25_iface.c                    | 1 +
 net/ax25/ax25_in.c                       | 1 +
 net/ax25/ax25_ip.c                       | 1 +
 net/ax25/ax25_out.c                      | 1 +
 net/ax25/ax25_route.c                    | 1 +
 net/ax25/ax25_subr.c                     | 1 +
 net/ax25/ax25_uid.c                      | 1 +
 net/ax25/sysctl_net_ax25.c               | 1 +
 net/bluetooth/af_bluetooth.c             | 1 -
 net/bluetooth/bnep/core.c                | 1 +
 net/bluetooth/bnep/netdev.c              | 1 +
 net/bluetooth/bnep/sock.c                | 2 +-
 net/bluetooth/cmtp/sock.c                | 2 +-
 net/bluetooth/hci_sysfs.c                | 1 +
 net/bluetooth/hidp/sock.c                | 2 +-
 net/bluetooth/rfcomm/core.c              | 1 +
 net/bridge/br_fdb.c                      | 1 +
 net/bridge/br_forward.c                  | 1 +
 net/bridge/br_if.c                       | 1 +
 net/bridge/br_input.c                    | 1 +
 net/bridge/br_ioctl.c                    | 1 +
 net/bridge/br_netfilter.c                | 1 +
 net/bridge/br_netlink.c                  | 1 +
 net/bridge/br_stp_bpdu.c                 | 1 +
 net/bridge/netfilter/ebt_ulog.c          | 1 +
 net/bridge/netfilter/ebtables.c          | 1 +
 net/can/bcm.c                            | 1 +
 net/can/raw.c                            | 1 +
 net/compat.c                             | 1 +
 net/core/datagram.c                      | 1 +
 net/core/dev.c                           | 1 +
 net/core/drop_monitor.c                  | 1 +
 net/core/dst.c                           | 1 +
 net/core/ethtool.c                       | 1 +
 net/core/fib_rules.c                     | 1 +
 net/core/filter.c                        | 1 +
 net/core/gen_estimator.c                 | 1 +
 net/core/iovec.c                         | 1 -
 net/core/link_watch.c                    | 1 -
 net/core/neighbour.c                     | 1 +
 net/core/net-sysfs.c                     | 1 +
 net/core/net-traces.c                    | 1 +
 net/core/netpoll.c                       | 1 +
 net/core/scm.c                           | 1 +
 net/core/sysctl_net_core.c               | 1 +
 net/dcb/dcbnl.c                          | 1 +
 net/dccp/ccid.c                          | 2 ++
 net/dccp/ccids/ccid2.c                   | 1 +
 net/dccp/feat.c                          | 1 +
 net/dccp/input.c                         | 1 +
 net/dccp/ipv4.c                          | 1 +
 net/dccp/ipv6.c                          | 1 +
 net/dccp/minisocks.c                     | 1 +
 net/dccp/output.c                        | 1 +
 net/dccp/probe.c                         | 1 +
 net/dccp/proto.c                         | 1 +
 net/decnet/dn_dev.c                      | 1 +
 net/decnet/dn_fib.c                      | 1 +
 net/decnet/dn_neigh.c                    | 1 +
 net/decnet/dn_nsp_in.c                   | 1 +
 net/decnet/dn_nsp_out.c                  | 1 +
 net/decnet/dn_route.c                    | 1 +
 net/decnet/dn_table.c                    | 1 +
 net/decnet/netfilter/dn_rtmsg.c          | 1 +
 net/dsa/dsa.c                            | 1 +
 net/dsa/tag_dsa.c                        | 1 +
 net/dsa/tag_edsa.c                       | 1 +
 net/dsa/tag_trailer.c                    | 1 +
 net/econet/af_econet.c                   | 1 +
 net/ethernet/pe2.c                       | 1 +
 net/ieee802154/af_ieee802154.c           | 1 +
 net/ieee802154/dgram.c                   | 1 +
 net/ieee802154/netlink.c                 | 1 +
 net/ieee802154/nl-mac.c                  | 1 +
 net/ieee802154/nl-phy.c                  | 1 +
 net/ieee802154/raw.c                     | 1 +
 net/ieee802154/wpan-class.c              | 1 +
 net/ipv4/af_inet.c                       | 1 +
 net/ipv4/ah4.c                           | 1 +
 net/ipv4/arp.c                           | 1 +
 net/ipv4/cipso_ipv4.c                    | 1 +
 net/ipv4/devinet.c                       | 1 +
 net/ipv4/fib_frontend.c                  | 1 +
 net/ipv4/fib_hash.c                      | 1 +
 net/ipv4/fib_semantics.c                 | 1 +
 net/ipv4/fib_trie.c                      | 1 +
 net/ipv4/icmp.c                          | 1 +
 net/ipv4/igmp.c                          | 1 +
 net/ipv4/inet_diag.c                     | 1 +
 net/ipv4/inet_fragment.c                 | 1 +
 net/ipv4/inet_timewait_sock.c            | 1 +
 net/ipv4/ip_forward.c                    | 1 +
 net/ipv4/ip_fragment.c                   | 1 +
 net/ipv4/ip_gre.c                        | 1 +
 net/ipv4/ip_input.c                      | 1 +
 net/ipv4/ip_options.c                    | 1 +
 net/ipv4/ip_output.c                     | 1 +
 net/ipv4/ip_sockglue.c                   | 1 +
 net/ipv4/ipconfig.c                      | 1 +
 net/ipv4/ipip.c                          | 1 +
 net/ipv4/ipmr.c                          | 1 +
 net/ipv4/netfilter.c                     | 1 +
 net/ipv4/netfilter/arptable_filter.c     | 1 +
 net/ipv4/netfilter/ip_queue.c            | 1 +
 net/ipv4/netfilter/ipt_CLUSTERIP.c       | 1 +
 net/ipv4/netfilter/ipt_REJECT.c          | 1 +
 net/ipv4/netfilter/ipt_ULOG.c            | 1 +
 net/ipv4/netfilter/iptable_filter.c      | 1 +
 net/ipv4/netfilter/iptable_mangle.c      | 1 +
 net/ipv4/netfilter/iptable_raw.c         | 1 +
 net/ipv4/netfilter/iptable_security.c    | 1 +
 net/ipv4/netfilter/nf_nat_core.c         | 1 +
 net/ipv4/netfilter/nf_nat_helper.c       | 1 +
 net/ipv4/netfilter/nf_nat_rule.c         | 1 +
 net/ipv4/netfilter/nf_nat_snmp_basic.c   | 1 +
 net/ipv4/netfilter/nf_nat_standalone.c   | 1 +
 net/ipv4/raw.c                           | 1 -
 net/ipv4/route.c                         | 1 +
 net/ipv4/sysctl_net_ipv4.c               | 1 +
 net/ipv4/tcp.c                           | 1 +
 net/ipv4/tcp_cong.c                      | 1 +
 net/ipv4/tcp_input.c                     | 1 +
 net/ipv4/tcp_ipv4.c                      | 1 +
 net/ipv4/tcp_minisocks.c                 | 1 +
 net/ipv4/tcp_output.c                    | 1 +
 net/ipv4/tcp_probe.c                     | 1 +
 net/ipv4/tcp_timer.c                     | 1 +
 net/ipv4/tunnel4.c                       | 1 +
 net/ipv4/udp.c                           | 1 +
 net/ipv4/xfrm4_input.c                   | 1 +
 net/ipv4/xfrm4_mode_tunnel.c             | 1 +
 net/ipv6/addrconf.c                      | 1 +
 net/ipv6/addrlabel.c                     | 1 +
 net/ipv6/af_inet6.c                      | 1 +
 net/ipv6/ah6.c                           | 1 +
 net/ipv6/anycast.c                       | 1 +
 net/ipv6/datagram.c                      | 1 +
 net/ipv6/exthdrs.c                       | 1 +
 net/ipv6/icmp.c                          | 1 +
 net/ipv6/inet6_connection_sock.c         | 1 +
 net/ipv6/ip6_fib.c                       | 1 +
 net/ipv6/ip6_flowlabel.c                 | 1 +
 net/ipv6/ip6_input.c                     | 1 +
 net/ipv6/ip6_output.c                    | 1 +
 net/ipv6/ip6_tunnel.c                    | 1 +
 net/ipv6/ip6mr.c                         | 1 +
 net/ipv6/ipv6_sockglue.c                 | 1 +
 net/ipv6/mcast.c                         | 1 +
 net/ipv6/ndisc.c                         | 1 +
 net/ipv6/netfilter/ip6_queue.c           | 1 +
 net/ipv6/netfilter/ip6t_REJECT.c         | 1 +
 net/ipv6/netfilter/ip6table_filter.c     | 1 +
 net/ipv6/netfilter/ip6table_mangle.c     | 1 +
 net/ipv6/netfilter/ip6table_raw.c        | 1 +
 net/ipv6/netfilter/ip6table_security.c   | 1 +
 net/ipv6/netfilter/nf_conntrack_reasm.c  | 1 +
 net/ipv6/raw.c                           | 1 +
 net/ipv6/reassembly.c                    | 1 +
 net/ipv6/route.c                         | 1 +
 net/ipv6/sit.c                           | 1 +
 net/ipv6/sysctl_net_ipv6.c               | 1 +
 net/ipv6/tcp_ipv6.c                      | 1 +
 net/ipv6/tunnel6.c                       | 1 +
 net/ipv6/udp.c                           | 1 +
 net/ipv6/xfrm6_mode_tunnel.c             | 1 +
 net/ipv6/xfrm6_tunnel.c                  | 1 +
 net/ipx/af_ipx.c                         | 1 +
 net/ipx/ipx_route.c                      | 1 +
 net/irda/af_irda.c                       | 1 +
 net/irda/discovery.c                     | 1 +
 net/irda/ircomm/ircomm_core.c            | 1 +
 net/irda/ircomm/ircomm_lmp.c             | 1 +
 net/irda/ircomm/ircomm_param.c           | 1 +
 net/irda/ircomm/ircomm_tty.c             | 1 +
 net/irda/irda_device.c                   | 1 +
 net/irda/iriap.c                         | 1 +
 net/irda/iriap_event.c                   | 2 ++
 net/irda/irias_object.c                  | 1 +
 net/irda/irlan/irlan_client.c            | 1 +
 net/irda/irlan/irlan_common.c            | 1 +
 net/irda/irlan/irlan_provider.c          | 1 +
 net/irda/irlap_event.c                   | 1 +
 net/irda/irlap_frame.c                   | 1 +
 net/irda/irnet/irnet_irda.c              | 1 +
 net/irda/irnet/irnet_ppp.c               | 1 +
 net/irda/irnetlink.c                     | 1 +
 net/irda/irqueue.c                       | 1 +
 net/irda/irttp.c                         | 1 +
 net/key/af_key.c                         | 1 +
 net/lapb/lapb_iface.c                    | 1 +
 net/lapb/lapb_in.c                       | 1 +
 net/lapb/lapb_out.c                      | 1 +
 net/lapb/lapb_subr.c                     | 1 +
 net/llc/af_llc.c                         | 1 +
 net/llc/llc_c_ac.c                       | 1 +
 net/llc/llc_conn.c                       | 1 +
 net/llc/llc_if.c                         | 1 +
 net/llc/llc_input.c                      | 1 +
 net/llc/llc_sap.c                        | 1 +
 net/llc/llc_station.c                    | 1 +
 net/mac80211/agg-rx.c                    | 1 +
 net/mac80211/agg-tx.c                    | 1 +
 net/mac80211/cfg.c                       | 1 +
 net/mac80211/debugfs_key.c               | 1 +
 net/mac80211/debugfs_netdev.c            | 1 +
 net/mac80211/ibss.c                      | 1 +
 net/mac80211/iface.c                     | 1 +
 net/mac80211/key.c                       | 1 +
 net/mac80211/led.c                       | 1 +
 net/mac80211/mesh.c                      | 1 +
 net/mac80211/mesh_hwmp.c                 | 1 +
 net/mac80211/mesh_pathtbl.c              | 1 +
 net/mac80211/mesh_plink.c                | 1 +
 net/mac80211/mlme.c                      | 1 +
 net/mac80211/rate.c                      | 1 +
 net/mac80211/rc80211_minstrel.c          | 1 +
 net/mac80211/rc80211_minstrel_debugfs.c  | 1 +
 net/mac80211/rc80211_pid_algo.c          | 1 +
 net/mac80211/rc80211_pid_debugfs.c       | 1 +
 net/mac80211/rx.c                        | 1 +
 net/mac80211/scan.c                      | 1 +
 net/mac80211/wep.c                       | 1 +
 net/mac80211/work.c                      | 1 +
 net/mac80211/wpa.c                       | 2 +-
 net/netfilter/core.c                     | 1 +
 net/netfilter/ipvs/ip_vs_app.c           | 1 +
 net/netfilter/ipvs/ip_vs_conn.c          | 1 +
 net/netfilter/ipvs/ip_vs_core.c          | 1 +
 net/netfilter/ipvs/ip_vs_ctl.c           | 1 +
 net/netfilter/ipvs/ip_vs_dh.c            | 1 +
 net/netfilter/ipvs/ip_vs_est.c           | 1 -
 net/netfilter/ipvs/ip_vs_ftp.c           | 1 +
 net/netfilter/ipvs/ip_vs_lblc.c          | 1 +
 net/netfilter/ipvs/ip_vs_lblcr.c         | 1 +
 net/netfilter/ipvs/ip_vs_proto.c         | 1 +
 net/netfilter/ipvs/ip_vs_sh.c            | 1 +
 net/netfilter/ipvs/ip_vs_wrr.c           | 1 +
 net/netfilter/ipvs/ip_vs_xmit.c          | 1 +
 net/netfilter/nf_conntrack_acct.c        | 1 +
 net/netfilter/nf_conntrack_amanda.c      | 1 +
 net/netfilter/nf_conntrack_ecache.c      | 1 +
 net/netfilter/nf_conntrack_ftp.c         | 1 +
 net/netfilter/nf_conntrack_h323_main.c   | 1 +
 net/netfilter/nf_conntrack_helper.c      | 1 -
 net/netfilter/nf_conntrack_irc.c         | 1 +
 net/netfilter/nf_conntrack_netlink.c     | 1 +
 net/netfilter/nf_conntrack_proto.c       | 1 +
 net/netfilter/nf_conntrack_proto_dccp.c  | 1 +
 net/netfilter/nf_conntrack_proto_gre.c   | 1 +
 net/netfilter/nf_conntrack_sane.c        | 1 +
 net/netfilter/nf_conntrack_standalone.c  | 1 +
 net/netfilter/nf_queue.c                 | 1 +
 net/netfilter/nfnetlink_log.c            | 1 +
 net/netfilter/nfnetlink_queue.c          | 1 +
 net/netfilter/x_tables.c                 | 1 +
 net/netfilter/xt_CT.c                    | 1 +
 net/netfilter/xt_LED.c                   | 1 +
 net/netfilter/xt_RATEEST.c               | 1 +
 net/netfilter/xt_TCPMSS.c                | 1 +
 net/netfilter/xt_connlimit.c             | 1 +
 net/netfilter/xt_dccp.c                  | 1 +
 net/netfilter/xt_limit.c                 | 1 +
 net/netfilter/xt_quota.c                 | 1 +
 net/netfilter/xt_recent.c                | 1 +
 net/netfilter/xt_statistic.c             | 1 +
 net/netfilter/xt_string.c                | 1 +
 net/netlabel/netlabel_cipso_v4.c         | 1 +
 net/netlabel/netlabel_domainhash.c       | 1 +
 net/netlabel/netlabel_kapi.c             | 1 +
 net/netlabel/netlabel_mgmt.c             | 1 +
 net/netlabel/netlabel_unlabeled.c        | 1 +
 net/netlabel/netlabel_user.c             | 1 +
 net/netlink/genetlink.c                  | 1 +
 net/netrom/af_netrom.c                   | 1 +
 net/netrom/nr_dev.c                      | 1 +
 net/netrom/nr_in.c                       | 1 +
 net/netrom/nr_loopback.c                 | 1 +
 net/netrom/nr_out.c                      | 1 +
 net/netrom/nr_route.c                    | 1 +
 net/netrom/nr_subr.c                     | 1 +
 net/packet/af_packet.c                   | 1 +
 net/phonet/af_phonet.c                   | 1 +
 net/phonet/datagram.c                    | 1 +
 net/phonet/pep.c                         | 1 +
 net/phonet/pn_dev.c                      | 1 +
 net/phonet/pn_netlink.c                  | 1 +
 net/phonet/socket.c                      | 1 +
 net/rds/af_rds.c                         | 1 +
 net/rds/cong.c                           | 1 +
 net/rds/connection.c                     | 1 +
 net/rds/ib.c                             | 1 +
 net/rds/ib_cm.c                          | 1 +
 net/rds/ib_rdma.c                        | 1 +
 net/rds/ib_recv.c                        | 1 +
 net/rds/info.c                           | 1 +
 net/rds/iw.c                             | 1 +
 net/rds/iw_cm.c                          | 1 +
 net/rds/iw_rdma.c                        | 1 +
 net/rds/iw_recv.c                        | 1 +
 net/rds/loop.c                           | 1 +
 net/rds/message.c                        | 1 +
 net/rds/page.c                           | 1 +
 net/rds/rdma.c                           | 1 +
 net/rds/recv.c                           | 1 +
 net/rds/send.c                           | 1 +
 net/rds/tcp.c                            | 1 +
 net/rds/tcp_listen.c                     | 1 +
 net/rds/tcp_recv.c                       | 1 +
 net/rfkill/core.c                        | 1 +
 net/rose/af_rose.c                       | 1 +
 net/rose/rose_dev.c                      | 1 +
 net/rose/rose_link.c                     | 1 +
 net/rose/rose_loopback.c                 | 1 +
 net/rose/rose_out.c                      | 1 +
 net/rose/rose_route.c                    | 1 +
 net/rose/rose_subr.c                     | 1 +
 net/rxrpc/af_rxrpc.c                     | 1 +
 net/rxrpc/ar-accept.c                    | 1 +
 net/rxrpc/ar-ack.c                       | 1 +
 net/rxrpc/ar-call.c                      | 1 +
 net/rxrpc/ar-connection.c                | 1 +
 net/rxrpc/ar-input.c                     | 1 +
 net/rxrpc/ar-key.c                       | 1 +
 net/rxrpc/ar-local.c                     | 1 +
 net/rxrpc/ar-output.c                    | 1 +
 net/rxrpc/ar-peer.c                      | 1 +
 net/rxrpc/ar-transport.c                 | 1 +
 net/rxrpc/rxkad.c                        | 1 +
 net/sched/act_api.c                      | 1 +
 net/sched/act_ipt.c                      | 1 +
 net/sched/act_mirred.c                   | 1 +
 net/sched/act_pedit.c                    | 1 +
 net/sched/act_police.c                   | 1 +
 net/sched/act_simple.c                   | 1 +
 net/sched/cls_api.c                      | 1 +
 net/sched/cls_basic.c                    | 1 +
 net/sched/cls_cgroup.c                   | 1 +
 net/sched/cls_flow.c                     | 1 +
 net/sched/cls_fw.c                       | 1 +
 net/sched/cls_route.c                    | 1 +
 net/sched/cls_tcindex.c                  | 1 +
 net/sched/cls_u32.c                      | 1 +
 net/sched/em_meta.c                      | 1 +
 net/sched/em_nbyte.c                     | 1 +
 net/sched/em_text.c                      | 1 +
 net/sched/ematch.c                       | 1 +
 net/sched/sch_api.c                      | 1 +
 net/sched/sch_atm.c                      | 1 +
 net/sched/sch_cbq.c                      | 1 +
 net/sched/sch_drr.c                      | 1 +
 net/sched/sch_dsmark.c                   | 1 +
 net/sched/sch_fifo.c                     | 1 +
 net/sched/sch_generic.c                  | 1 +
 net/sched/sch_gred.c                     | 1 +
 net/sched/sch_htb.c                      | 1 +
 net/sched/sch_mq.c                       | 1 +
 net/sched/sch_multiq.c                   | 1 +
 net/sched/sch_netem.c                    | 1 +
 net/sched/sch_prio.c                     | 1 +
 net/sched/sch_sfq.c                      | 1 +
 net/sched/sch_teql.c                     | 1 +
 net/sctp/auth.c                          | 1 +
 net/sctp/bind_addr.c                     | 1 +
 net/sctp/chunk.c                         | 1 +
 net/sctp/input.c                         | 1 +
 net/sctp/inqueue.c                       | 1 +
 net/sctp/ipv6.c                          | 1 +
 net/sctp/output.c                        | 1 +
 net/sctp/outqueue.c                      | 1 +
 net/sctp/primitive.c                     | 1 +
 net/sctp/protocol.c                      | 1 +
 net/sctp/sm_make_chunk.c                 | 1 +
 net/sctp/sm_sideeffect.c                 | 1 +
 net/sctp/sm_statefuns.c                  | 1 +
 net/sctp/socket.c                        | 1 +
 net/sctp/ssnmap.c                        | 1 +
 net/sctp/transport.c                     | 1 +
 net/sctp/tsnmap.c                        | 1 +
 net/sctp/ulpevent.c                      | 1 +
 net/sctp/ulpqueue.c                      | 1 +
 net/socket.c                             | 1 +
 net/sunrpc/addr.c                        | 1 +
 net/sunrpc/auth_generic.c                | 1 +
 net/sunrpc/auth_gss/gss_generic_token.c  | 1 -
 net/sunrpc/auth_gss/gss_krb5_crypto.c    | 1 -
 net/sunrpc/auth_gss/gss_krb5_seal.c      | 1 -
 net/sunrpc/auth_gss/gss_krb5_seqnum.c    | 1 -
 net/sunrpc/auth_gss/gss_krb5_unseal.c    | 1 -
 net/sunrpc/auth_gss/gss_krb5_wrap.c      | 1 -
 net/sunrpc/auth_gss/gss_spkm3_seal.c     | 1 -
 net/sunrpc/auth_gss/svcauth_gss.c        | 1 +
 net/sunrpc/auth_unix.c                   | 1 +
 net/sunrpc/backchannel_rqst.c            | 1 +
 net/sunrpc/rpcb_clnt.c                   | 1 +
 net/sunrpc/socklib.c                     | 1 +
 net/sunrpc/stats.c                       | 1 +
 net/sunrpc/svc.c                         | 1 +
 net/sunrpc/svc_xprt.c                    | 1 +
 net/sunrpc/svcauth_unix.c                | 1 +
 net/sunrpc/xdr.c                         | 1 +
 net/sunrpc/xprtrdma/svc_rdma.c           | 1 +
 net/sunrpc/xprtrdma/svc_rdma_transport.c | 1 +
 net/sunrpc/xprtrdma/transport.c          | 1 +
 net/sunrpc/xprtrdma/verbs.c              | 1 +
 net/tipc/core.h                          | 1 +
 net/tipc/eth_media.c                     | 1 +
 net/tipc/socket.c                        | 2 +-
 net/unix/garbage.c                       | 1 -
 net/unix/sysctl_net_unix.c               | 1 +
 net/wimax/op-msg.c                       | 1 +
 net/wimax/stack.c                        | 1 +
 net/wireless/core.c                      | 1 +
 net/wireless/debugfs.c                   | 1 +
 net/wireless/ibss.c                      | 1 +
 net/wireless/mlme.c                      | 1 +
 net/wireless/nl80211.c                   | 1 +
 net/wireless/reg.c                       | 1 +
 net/wireless/scan.c                      | 1 +
 net/wireless/sme.c                       | 1 +
 net/wireless/util.c                      | 1 +
 net/wireless/wext-compat.c               | 1 +
 net/wireless/wext-core.c                 | 1 +
 net/wireless/wext-priv.c                 | 1 +
 net/wireless/wext-sme.c                  | 1 +
 net/x25/af_x25.c                         | 1 +
 net/x25/x25_dev.c                        | 1 +
 net/x25/x25_forward.c                    | 1 +
 net/x25/x25_in.c                         | 1 +
 net/x25/x25_link.c                       | 1 +
 net/x25/x25_out.c                        | 1 +
 net/x25/x25_route.c                      | 1 +
 net/x25/x25_subr.c                       | 1 +
 net/xfrm/xfrm_ipcomp.c                   | 2 +-
 net/xfrm/xfrm_output.c                   | 1 +
 net/xfrm/xfrm_state.c                    | 1 +
 net/xfrm/xfrm_sysctl.c                   | 1 +
 469 files changed, 457 insertions(+), 20 deletions(-)

(limited to 'net')

diff --git a/net/802/garp.c b/net/802/garp.c
index 1dcb0660c49d..9ed7c0e7dc17 100644
--- a/net/802/garp.c
+++ b/net/802/garp.c
@@ -14,6 +14,7 @@
 #include <linux/etherdevice.h>
 #include <linux/rtnetlink.h>
 #include <linux/llc.h>
+#include <linux/slab.h>
 #include <net/llc.h>
 #include <net/llc_pdu.h>
 #include <net/garp.h>
diff --git a/net/802/p8022.c b/net/802/p8022.c
index 2530f35241cd..7f353c4f437a 100644
--- a/net/802/p8022.c
+++ b/net/802/p8022.c
@@ -18,6 +18,7 @@
 #include <linux/module.h>
 #include <linux/netdevice.h>
 #include <linux/skbuff.h>
+#include <linux/slab.h>
 #include <net/datalink.h>
 #include <linux/mm.h>
 #include <linux/in.h>
diff --git a/net/802/p8023.c b/net/802/p8023.c
index 6ab1835041a7..1256a40da43c 100644
--- a/net/802/p8023.c
+++ b/net/802/p8023.c
@@ -18,6 +18,7 @@
 #include <linux/module.h>
 #include <linux/netdevice.h>
 #include <linux/skbuff.h>
+#include <linux/slab.h>
 
 #include <net/datalink.h>
 #include <net/p8022.h>
diff --git a/net/802/psnap.c b/net/802/psnap.c
index 6fea0750662b..21cde8fd5795 100644
--- a/net/802/psnap.c
+++ b/net/802/psnap.c
@@ -14,6 +14,7 @@
 #include <linux/module.h>
 #include <linux/netdevice.h>
 #include <linux/skbuff.h>
+#include <linux/slab.h>
 #include <net/datalink.h>
 #include <net/llc.h>
 #include <net/psnap.h>
diff --git a/net/802/stp.c b/net/802/stp.c
index 0b7a24452d11..53c8f77f0ccd 100644
--- a/net/802/stp.c
+++ b/net/802/stp.c
@@ -11,6 +11,7 @@
 #include <linux/skbuff.h>
 #include <linux/etherdevice.h>
 #include <linux/llc.h>
+#include <linux/slab.h>
 #include <net/llc.h>
 #include <net/llc_pdu.h>
 #include <net/stp.h>
diff --git a/net/802/tr.c b/net/802/tr.c
index 44acce47fcdc..1c6e596074df 100644
--- a/net/802/tr.c
+++ b/net/802/tr.c
@@ -36,6 +36,7 @@
 #include <linux/seq_file.h>
 #include <linux/init.h>
 #include <linux/sysctl.h>
+#include <linux/slab.h>
 #include <net/arp.h>
 #include <net/net_namespace.h>
 
diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c
index db783d7af5a3..97da977c2a23 100644
--- a/net/8021q/vlan.c
+++ b/net/8021q/vlan.c
@@ -22,6 +22,7 @@
 #include <linux/module.h>
 #include <linux/netdevice.h>
 #include <linux/skbuff.h>
+#include <linux/slab.h>
 #include <linux/init.h>
 #include <linux/rculist.h>
 #include <net/p8022.h>
diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c
index 2fd057c81bbf..29b6348c8d4d 100644
--- a/net/8021q/vlan_dev.c
+++ b/net/8021q/vlan_dev.c
@@ -21,6 +21,7 @@
  */
 
 #include <linux/module.h>
+#include <linux/slab.h>
 #include <linux/skbuff.h>
 #include <linux/netdevice.h>
 #include <linux/etherdevice.h>
diff --git a/net/9p/client.c b/net/9p/client.c
index e3e5bf4469ce..6e6b928a5972 100644
--- a/net/9p/client.c
+++ b/net/9p/client.c
@@ -29,6 +29,7 @@
 #include <linux/poll.h>
 #include <linux/idr.h>
 #include <linux/mutex.h>
+#include <linux/slab.h>
 #include <linux/sched.h>
 #include <linux/uaccess.h>
 #include <net/9p/9p.h>
diff --git a/net/9p/protocol.c b/net/9p/protocol.c
index 94f5a8f65e9c..e7541d5b0118 100644
--- a/net/9p/protocol.c
+++ b/net/9p/protocol.c
@@ -28,6 +28,7 @@
 #include <linux/module.h>
 #include <linux/errno.h>
 #include <linux/uaccess.h>
+#include <linux/slab.h>
 #include <linux/sched.h>
 #include <linux/types.h>
 #include <net/9p/9p.h>
diff --git a/net/9p/trans_fd.c b/net/9p/trans_fd.c
index 31d0b05582a9..98ce9bcb0e15 100644
--- a/net/9p/trans_fd.c
+++ b/net/9p/trans_fd.c
@@ -38,6 +38,7 @@
 #include <linux/idr.h>
 #include <linux/file.h>
 #include <linux/parser.h>
+#include <linux/slab.h>
 #include <net/9p/9p.h>
 #include <net/9p/client.h>
 #include <net/9p/transport.h>
diff --git a/net/9p/trans_rdma.c b/net/9p/trans_rdma.c
index 2c95a89c0f46..041101ab4aa5 100644
--- a/net/9p/trans_rdma.c
+++ b/net/9p/trans_rdma.c
@@ -40,6 +40,7 @@
 #include <linux/file.h>
 #include <linux/parser.h>
 #include <linux/semaphore.h>
+#include <linux/slab.h>
 #include <net/9p/9p.h>
 #include <net/9p/client.h>
 #include <net/9p/transport.h>
diff --git a/net/9p/trans_virtio.c b/net/9p/trans_virtio.c
index afde1a89fbb3..7eb78ecc1618 100644
--- a/net/9p/trans_virtio.c
+++ b/net/9p/trans_virtio.c
@@ -37,6 +37,7 @@
 #include <linux/inet.h>
 #include <linux/idr.h>
 #include <linux/file.h>
+#include <linux/slab.h>
 #include <net/9p/9p.h>
 #include <linux/parser.h>
 #include <net/9p/client.h>
diff --git a/net/9p/util.c b/net/9p/util.c
index dc4ec05ad93d..e048701a72d2 100644
--- a/net/9p/util.c
+++ b/net/9p/util.c
@@ -30,6 +30,7 @@
 #include <linux/sched.h>
 #include <linux/parser.h>
 #include <linux/idr.h>
+#include <linux/slab.h>
 #include <net/9p/9p.h>
 
 /**
diff --git a/net/appletalk/aarp.c b/net/appletalk/aarp.c
index f2b3b56aa779..50dce7981321 100644
--- a/net/appletalk/aarp.c
+++ b/net/appletalk/aarp.c
@@ -30,6 +30,7 @@
  */
 
 #include <linux/if_arp.h>
+#include <linux/slab.h>
 #include <net/sock.h>
 #include <net/datalink.h>
 #include <net/psnap.h>
diff --git a/net/appletalk/ddp.c b/net/appletalk/ddp.c
index 9fc4da56fb1d..7b02967fbbe7 100644
--- a/net/appletalk/ddp.c
+++ b/net/appletalk/ddp.c
@@ -57,6 +57,7 @@
 #include <linux/smp_lock.h>
 #include <linux/termios.h>	/* For TIOCOUTQ/INQ */
 #include <linux/compat.h>
+#include <linux/slab.h>
 #include <net/datalink.h>
 #include <net/psnap.h>
 #include <net/sock.h>
diff --git a/net/atm/addr.c b/net/atm/addr.c
index cf3ae8b47572..dcda35c66f15 100644
--- a/net/atm/addr.c
+++ b/net/atm/addr.c
@@ -4,6 +4,7 @@
 
 #include <linux/atm.h>
 #include <linux/atmdev.h>
+#include <linux/slab.h>
 #include <linux/uaccess.h>
 
 #include "signaling.h"
diff --git a/net/atm/atm_sysfs.c b/net/atm/atm_sysfs.c
index f693b78eb467..799c631f0fed 100644
--- a/net/atm/atm_sysfs.c
+++ b/net/atm/atm_sysfs.c
@@ -1,6 +1,7 @@
 /* ATM driver model support. */
 
 #include <linux/kernel.h>
+#include <linux/slab.h>
 #include <linux/init.h>
 #include <linux/kobject.h>
 #include <linux/atmdev.h>
diff --git a/net/atm/br2684.c b/net/atm/br2684.c
index 4d64d87e7578..d6c7ceaf13e9 100644
--- a/net/atm/br2684.c
+++ b/net/atm/br2684.c
@@ -18,6 +18,7 @@
 #include <linux/rtnetlink.h>
 #include <linux/ip.h>
 #include <linux/uaccess.h>
+#include <linux/slab.h>
 #include <net/arp.h>
 #include <linux/atm.h>
 #include <linux/atmdev.h>
diff --git a/net/atm/clip.c b/net/atm/clip.c
index ebfa022008f7..313aba11316b 100644
--- a/net/atm/clip.c
+++ b/net/atm/clip.c
@@ -30,6 +30,7 @@
 #include <linux/seq_file.h>
 #include <linux/rcupdate.h>
 #include <linux/jhash.h>
+#include <linux/slab.h>
 #include <net/route.h> /* for struct rtable and routing */
 #include <net/icmp.h> /* icmp_send */
 #include <linux/param.h> /* for HZ */
diff --git a/net/atm/common.c b/net/atm/common.c
index 74d095a081e3..97ed94aa0cbc 100644
--- a/net/atm/common.c
+++ b/net/atm/common.c
@@ -18,6 +18,7 @@
 #include <linux/skbuff.h>
 #include <linux/bitops.h>
 #include <linux/init.h>
+#include <linux/slab.h>
 #include <net/sock.h>		/* struct sock */
 #include <linux/uaccess.h>
 #include <linux/poll.h>
diff --git a/net/atm/lec.c b/net/atm/lec.c
index 5da5753157f9..feeaf5718472 100644
--- a/net/atm/lec.c
+++ b/net/atm/lec.c
@@ -6,6 +6,7 @@
 
 #define pr_fmt(fmt) KBUILD_MODNAME ":%s: " fmt, __func__
 
+#include <linux/slab.h>
 #include <linux/kernel.h>
 #include <linux/bitops.h>
 #include <linux/capability.h>
diff --git a/net/atm/mpc.c b/net/atm/mpc.c
index a6521c8aa88b..436f2e177657 100644
--- a/net/atm/mpc.c
+++ b/net/atm/mpc.c
@@ -2,6 +2,7 @@
 
 #include <linux/kernel.h>
 #include <linux/string.h>
+#include <linux/slab.h>
 #include <linux/timer.h>
 #include <linux/init.h>
 #include <linux/bitops.h>
diff --git a/net/atm/mpoa_caches.c b/net/atm/mpoa_caches.c
index 4c141810eb6d..e773d8336918 100644
--- a/net/atm/mpoa_caches.c
+++ b/net/atm/mpoa_caches.c
@@ -1,5 +1,6 @@
 #include <linux/types.h>
 #include <linux/atmmpc.h>
+#include <linux/slab.h>
 #include <linux/time.h>
 
 #include "mpoa_caches.h"
diff --git a/net/atm/mpoa_proc.c b/net/atm/mpoa_proc.c
index b9bdb98427e4..53e500292271 100644
--- a/net/atm/mpoa_proc.c
+++ b/net/atm/mpoa_proc.c
@@ -12,6 +12,7 @@
 #include <linux/uaccess.h>
 #include <linux/atmmpc.h>
 #include <linux/atm.h>
+#include <linux/gfp.h>
 #include "mpc.h"
 #include "mpoa_caches.h"
 
diff --git a/net/atm/pppoatm.c b/net/atm/pppoatm.c
index 400839273c67..e49bb6d948a1 100644
--- a/net/atm/pppoatm.c
+++ b/net/atm/pppoatm.c
@@ -38,6 +38,7 @@
 #include <linux/module.h>
 #include <linux/init.h>
 #include <linux/skbuff.h>
+#include <linux/slab.h>
 #include <linux/atm.h>
 #include <linux/atmdev.h>
 #include <linux/capability.h>
diff --git a/net/atm/proc.c b/net/atm/proc.c
index 7a96b2376bd7..696e218436e5 100644
--- a/net/atm/proc.c
+++ b/net/atm/proc.c
@@ -22,6 +22,7 @@
 #include <linux/netdevice.h>
 #include <linux/atmclip.h>
 #include <linux/init.h> /* for __init */
+#include <linux/slab.h>
 #include <net/net_namespace.h>
 #include <net/atmclip.h>
 #include <linux/uaccess.h>
diff --git a/net/atm/raw.c b/net/atm/raw.c
index d0c4bd047dc4..b4f7b9ff3c74 100644
--- a/net/atm/raw.c
+++ b/net/atm/raw.c
@@ -10,6 +10,7 @@
 #include <linux/kernel.h>
 #include <linux/skbuff.h>
 #include <linux/mm.h>
+#include <linux/slab.h>
 
 #include "common.h"
 #include "protocols.h"
diff --git a/net/atm/resources.c b/net/atm/resources.c
index 90082904f20d..d29e58261511 100644
--- a/net/atm/resources.c
+++ b/net/atm/resources.c
@@ -19,6 +19,7 @@
 #include <linux/capability.h>
 #include <linux/delay.h>
 #include <linux/mutex.h>
+#include <linux/slab.h>
 
 #include <net/sock.h>	 /* for struct sock */
 
diff --git a/net/atm/signaling.c b/net/atm/signaling.c
index ad1d28ae512b..6ba6e466ee54 100644
--- a/net/atm/signaling.c
+++ b/net/atm/signaling.c
@@ -14,6 +14,7 @@
 #include <linux/atmsvc.h>
 #include <linux/atmdev.h>
 #include <linux/bitops.h>
+#include <linux/slab.h>
 
 #include "resources.h"
 #include "signaling.h"
diff --git a/net/ax25/af_ax25.c b/net/ax25/af_ax25.c
index a5beedf43e2d..65c5801261f9 100644
--- a/net/ax25/af_ax25.c
+++ b/net/ax25/af_ax25.c
@@ -25,6 +25,7 @@
 #include <linux/string.h>
 #include <linux/sockios.h>
 #include <linux/net.h>
+#include <linux/slab.h>
 #include <net/ax25.h>
 #include <linux/inet.h>
 #include <linux/netdevice.h>
diff --git a/net/ax25/ax25_dev.c b/net/ax25/ax25_dev.c
index a7a0e0c9698b..c1cb982f6e86 100644
--- a/net/ax25/ax25_dev.c
+++ b/net/ax25/ax25_dev.c
@@ -9,6 +9,7 @@
 #include <linux/errno.h>
 #include <linux/types.h>
 #include <linux/socket.h>
+#include <linux/slab.h>
 #include <linux/in.h>
 #include <linux/kernel.h>
 #include <linux/timer.h>
diff --git a/net/ax25/ax25_ds_subr.c b/net/ax25/ax25_ds_subr.c
index b5e59787be2f..85816e612dc0 100644
--- a/net/ax25/ax25_ds_subr.c
+++ b/net/ax25/ax25_ds_subr.c
@@ -17,6 +17,7 @@
 #include <linux/sockios.h>
 #include <linux/spinlock.h>
 #include <linux/net.h>
+#include <linux/gfp.h>
 #include <net/ax25.h>
 #include <linux/inet.h>
 #include <linux/netdevice.h>
diff --git a/net/ax25/ax25_iface.c b/net/ax25/ax25_iface.c
index 71338f112108..5a0dda8df492 100644
--- a/net/ax25/ax25_iface.c
+++ b/net/ax25/ax25_iface.c
@@ -17,6 +17,7 @@
 #include <linux/string.h>
 #include <linux/sockios.h>
 #include <linux/net.h>
+#include <linux/slab.h>
 #include <net/ax25.h>
 #include <linux/inet.h>
 #include <linux/netdevice.h>
diff --git a/net/ax25/ax25_in.c b/net/ax25/ax25_in.c
index de56d3983de0..9bb776541203 100644
--- a/net/ax25/ax25_in.c
+++ b/net/ax25/ax25_in.c
@@ -18,6 +18,7 @@
 #include <linux/string.h>
 #include <linux/sockios.h>
 #include <linux/net.h>
+#include <linux/slab.h>
 #include <net/ax25.h>
 #include <linux/inet.h>
 #include <linux/netdevice.h>
diff --git a/net/ax25/ax25_ip.c b/net/ax25/ax25_ip.c
index f047a57aa95c..cf0c47a26530 100644
--- a/net/ax25/ax25_ip.c
+++ b/net/ax25/ax25_ip.c
@@ -16,6 +16,7 @@
 #include <linux/string.h>
 #include <linux/sockios.h>
 #include <linux/net.h>
+#include <linux/slab.h>
 #include <net/ax25.h>
 #include <linux/inet.h>
 #include <linux/netdevice.h>
diff --git a/net/ax25/ax25_out.c b/net/ax25/ax25_out.c
index 14912600ec57..37507d806f65 100644
--- a/net/ax25/ax25_out.c
+++ b/net/ax25/ax25_out.c
@@ -19,6 +19,7 @@
 #include <linux/sockios.h>
 #include <linux/spinlock.h>
 #include <linux/net.h>
+#include <linux/slab.h>
 #include <net/ax25.h>
 #include <linux/inet.h>
 #include <linux/netdevice.h>
diff --git a/net/ax25/ax25_route.c b/net/ax25/ax25_route.c
index c833ba4c45a5..7805945a5fd6 100644
--- a/net/ax25/ax25_route.c
+++ b/net/ax25/ax25_route.c
@@ -23,6 +23,7 @@
 #include <linux/string.h>
 #include <linux/sockios.h>
 #include <linux/net.h>
+#include <linux/slab.h>
 #include <net/ax25.h>
 #include <linux/inet.h>
 #include <linux/netdevice.h>
diff --git a/net/ax25/ax25_subr.c b/net/ax25/ax25_subr.c
index 034aa10a5198..c6715ee4ab8f 100644
--- a/net/ax25/ax25_subr.c
+++ b/net/ax25/ax25_subr.c
@@ -18,6 +18,7 @@
 #include <linux/string.h>
 #include <linux/sockios.h>
 #include <linux/net.h>
+#include <linux/slab.h>
 #include <net/ax25.h>
 #include <linux/inet.h>
 #include <linux/netdevice.h>
diff --git a/net/ax25/ax25_uid.c b/net/ax25/ax25_uid.c
index 9f13f6eefcba..d349be9578f5 100644
--- a/net/ax25/ax25_uid.c
+++ b/net/ax25/ax25_uid.c
@@ -18,6 +18,7 @@
 #include <linux/sockios.h>
 #include <linux/net.h>
 #include <linux/spinlock.h>
+#include <linux/slab.h>
 #include <net/ax25.h>
 #include <linux/inet.h>
 #include <linux/netdevice.h>
diff --git a/net/ax25/sysctl_net_ax25.c b/net/ax25/sysctl_net_ax25.c
index 5159be6b2625..ebe0ef3f1d83 100644
--- a/net/ax25/sysctl_net_ax25.c
+++ b/net/ax25/sysctl_net_ax25.c
@@ -7,6 +7,7 @@
  * Copyright (C) 1996 Mike Shaver (shaver@zeroknowledge.com)
  */
 #include <linux/mm.h>
+#include <linux/slab.h>
 #include <linux/sysctl.h>
 #include <linux/spinlock.h>
 #include <net/ax25.h>
diff --git a/net/bluetooth/af_bluetooth.c b/net/bluetooth/af_bluetooth.c
index 087cc51f5927..404a8500fd03 100644
--- a/net/bluetooth/af_bluetooth.c
+++ b/net/bluetooth/af_bluetooth.c
@@ -31,7 +31,6 @@
 #include <linux/errno.h>
 #include <linux/kernel.h>
 #include <linux/sched.h>
-#include <linux/slab.h>
 #include <linux/skbuff.h>
 #include <linux/init.h>
 #include <linux/poll.h>
diff --git a/net/bluetooth/bnep/core.c b/net/bluetooth/bnep/core.c
index ef09c7b3a858..8062dad6d10d 100644
--- a/net/bluetooth/bnep/core.c
+++ b/net/bluetooth/bnep/core.c
@@ -35,6 +35,7 @@
 #include <linux/freezer.h>
 #include <linux/errno.h>
 #include <linux/net.h>
+#include <linux/slab.h>
 #include <net/sock.h>
 
 #include <linux/socket.h>
diff --git a/net/bluetooth/bnep/netdev.c b/net/bluetooth/bnep/netdev.c
index b6234b73c4cf..5643a2391e76 100644
--- a/net/bluetooth/bnep/netdev.c
+++ b/net/bluetooth/bnep/netdev.c
@@ -26,6 +26,7 @@
 */
 
 #include <linux/module.h>
+#include <linux/slab.h>
 
 #include <linux/socket.h>
 #include <linux/netdevice.h>
diff --git a/net/bluetooth/bnep/sock.c b/net/bluetooth/bnep/sock.c
index 2ff6ac7b2ed4..2862f53b66b1 100644
--- a/net/bluetooth/bnep/sock.c
+++ b/net/bluetooth/bnep/sock.c
@@ -30,7 +30,6 @@
 #include <linux/capability.h>
 #include <linux/errno.h>
 #include <linux/kernel.h>
-#include <linux/slab.h>
 #include <linux/poll.h>
 #include <linux/fcntl.h>
 #include <linux/skbuff.h>
@@ -39,6 +38,7 @@
 #include <linux/file.h>
 #include <linux/init.h>
 #include <linux/compat.h>
+#include <linux/gfp.h>
 #include <net/sock.h>
 
 #include <asm/system.h>
diff --git a/net/bluetooth/cmtp/sock.c b/net/bluetooth/cmtp/sock.c
index 978cc3a718ad..7ea1979a8e4f 100644
--- a/net/bluetooth/cmtp/sock.c
+++ b/net/bluetooth/cmtp/sock.c
@@ -26,7 +26,6 @@
 #include <linux/capability.h>
 #include <linux/errno.h>
 #include <linux/kernel.h>
-#include <linux/slab.h>
 #include <linux/poll.h>
 #include <linux/fcntl.h>
 #include <linux/skbuff.h>
@@ -34,6 +33,7 @@
 #include <linux/ioctl.h>
 #include <linux/file.h>
 #include <linux/compat.h>
+#include <linux/gfp.h>
 #include <net/sock.h>
 
 #include <linux/isdn/capilli.h>
diff --git a/net/bluetooth/hci_sysfs.c b/net/bluetooth/hci_sysfs.c
index 05fd125f74fe..0e8e1a59856c 100644
--- a/net/bluetooth/hci_sysfs.c
+++ b/net/bluetooth/hci_sysfs.c
@@ -1,6 +1,7 @@
 /* Bluetooth HCI driver model support. */
 
 #include <linux/kernel.h>
+#include <linux/slab.h>
 #include <linux/init.h>
 #include <linux/debugfs.h>
 #include <linux/seq_file.h>
diff --git a/net/bluetooth/hidp/sock.c b/net/bluetooth/hidp/sock.c
index 9cfef68b9fec..250dfd46237d 100644
--- a/net/bluetooth/hidp/sock.c
+++ b/net/bluetooth/hidp/sock.c
@@ -26,7 +26,6 @@
 #include <linux/capability.h>
 #include <linux/errno.h>
 #include <linux/kernel.h>
-#include <linux/slab.h>
 #include <linux/poll.h>
 #include <linux/fcntl.h>
 #include <linux/skbuff.h>
@@ -35,6 +34,7 @@
 #include <linux/file.h>
 #include <linux/init.h>
 #include <linux/compat.h>
+#include <linux/gfp.h>
 #include <net/sock.h>
 
 #include "hidp.h"
diff --git a/net/bluetooth/rfcomm/core.c b/net/bluetooth/rfcomm/core.c
index 13f114e8b0f9..7dca91bb8c57 100644
--- a/net/bluetooth/rfcomm/core.c
+++ b/net/bluetooth/rfcomm/core.c
@@ -38,6 +38,7 @@
 #include <linux/net.h>
 #include <linux/mutex.h>
 #include <linux/kthread.h>
+#include <linux/slab.h>
 
 #include <net/sock.h>
 #include <asm/uaccess.h>
diff --git a/net/bridge/br_fdb.c b/net/bridge/br_fdb.c
index 3b8e038ab32c..9101a4e56201 100644
--- a/net/bridge/br_fdb.c
+++ b/net/bridge/br_fdb.c
@@ -20,6 +20,7 @@
 #include <linux/etherdevice.h>
 #include <linux/jhash.h>
 #include <linux/random.h>
+#include <linux/slab.h>
 #include <asm/atomic.h>
 #include <asm/unaligned.h>
 #include "br_private.h"
diff --git a/net/bridge/br_forward.c b/net/bridge/br_forward.c
index 8dbec83e50ca..7a241c396981 100644
--- a/net/bridge/br_forward.c
+++ b/net/bridge/br_forward.c
@@ -12,6 +12,7 @@
  */
 
 #include <linux/err.h>
+#include <linux/slab.h>
 #include <linux/kernel.h>
 #include <linux/netdevice.h>
 #include <linux/skbuff.h>
diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c
index b6a3872f5681..0b6b1f2ff7ac 100644
--- a/net/bridge/br_if.c
+++ b/net/bridge/br_if.c
@@ -19,6 +19,7 @@
 #include <linux/init.h>
 #include <linux/rtnetlink.h>
 #include <linux/if_ether.h>
+#include <linux/slab.h>
 #include <net/sock.h>
 
 #include "br_private.h"
diff --git a/net/bridge/br_input.c b/net/bridge/br_input.c
index d74d570fc848..a82dde2d2ead 100644
--- a/net/bridge/br_input.c
+++ b/net/bridge/br_input.c
@@ -11,6 +11,7 @@
  *	2 of the License, or (at your option) any later version.
  */
 
+#include <linux/slab.h>
 #include <linux/kernel.h>
 #include <linux/netdevice.h>
 #include <linux/etherdevice.h>
diff --git a/net/bridge/br_ioctl.c b/net/bridge/br_ioctl.c
index 2af6e4a90262..995afc4b04dc 100644
--- a/net/bridge/br_ioctl.c
+++ b/net/bridge/br_ioctl.c
@@ -15,6 +15,7 @@
 #include <linux/kernel.h>
 #include <linux/if_bridge.h>
 #include <linux/netdevice.h>
+#include <linux/slab.h>
 #include <linux/times.h>
 #include <net/net_namespace.h>
 #include <asm/uaccess.h>
diff --git a/net/bridge/br_netfilter.c b/net/bridge/br_netfilter.c
index 268e2e725888..4c4977d12fd6 100644
--- a/net/bridge/br_netfilter.c
+++ b/net/bridge/br_netfilter.c
@@ -23,6 +23,7 @@
 
 #include <linux/module.h>
 #include <linux/kernel.h>
+#include <linux/slab.h>
 #include <linux/ip.h>
 #include <linux/netdevice.h>
 #include <linux/skbuff.h>
diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c
index fcffb3fb1177..aa56ac2c8829 100644
--- a/net/bridge/br_netlink.c
+++ b/net/bridge/br_netlink.c
@@ -11,6 +11,7 @@
  */
 
 #include <linux/kernel.h>
+#include <linux/slab.h>
 #include <net/rtnetlink.h>
 #include <net/net_namespace.h>
 #include <net/sock.h>
diff --git a/net/bridge/br_stp_bpdu.c b/net/bridge/br_stp_bpdu.c
index 81ae40b3f655..d66cce11f3bf 100644
--- a/net/bridge/br_stp_bpdu.c
+++ b/net/bridge/br_stp_bpdu.c
@@ -15,6 +15,7 @@
 #include <linux/netfilter_bridge.h>
 #include <linux/etherdevice.h>
 #include <linux/llc.h>
+#include <linux/slab.h>
 #include <net/net_namespace.h>
 #include <net/llc.h>
 #include <net/llc_pdu.h>
diff --git a/net/bridge/netfilter/ebt_ulog.c b/net/bridge/netfilter/ebt_ulog.c
index c6ac657074a6..f9560f3dbdc7 100644
--- a/net/bridge/netfilter/ebt_ulog.c
+++ b/net/bridge/netfilter/ebt_ulog.c
@@ -29,6 +29,7 @@
  */
 
 #include <linux/module.h>
+#include <linux/slab.h>
 #include <linux/spinlock.h>
 #include <linux/socket.h>
 #include <linux/skbuff.h>
diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c
index dfb58056a89a..f0865fd1e3ec 100644
--- a/net/bridge/netfilter/ebtables.c
+++ b/net/bridge/netfilter/ebtables.c
@@ -23,6 +23,7 @@
 #include <linux/netfilter_bridge/ebtables.h>
 #include <linux/spinlock.h>
 #include <linux/mutex.h>
+#include <linux/slab.h>
 #include <asm/uaccess.h>
 #include <linux/smp.h>
 #include <linux/cpumask.h>
diff --git a/net/can/bcm.c b/net/can/bcm.c
index e32af52238a2..a2dee522b43e 100644
--- a/net/can/bcm.c
+++ b/net/can/bcm.c
@@ -56,6 +56,7 @@
 #include <linux/can.h>
 #include <linux/can/core.h>
 #include <linux/can/bcm.h>
+#include <linux/slab.h>
 #include <net/sock.h>
 #include <net/net_namespace.h>
 
diff --git a/net/can/raw.c b/net/can/raw.c
index abca920440b5..3a7dffb6519c 100644
--- a/net/can/raw.c
+++ b/net/can/raw.c
@@ -45,6 +45,7 @@
 #include <linux/init.h>
 #include <linux/uio.h>
 #include <linux/net.h>
+#include <linux/slab.h>
 #include <linux/netdevice.h>
 #include <linux/socket.h>
 #include <linux/if_arp.h>
diff --git a/net/compat.c b/net/compat.c
index a1fb1b079a82..ec24d9edb025 100644
--- a/net/compat.c
+++ b/net/compat.c
@@ -12,6 +12,7 @@
  */
 
 #include <linux/kernel.h>
+#include <linux/gfp.h>
 #include <linux/fs.h>
 #include <linux/types.h>
 #include <linux/file.h>
diff --git a/net/core/datagram.c b/net/core/datagram.c
index 95c2e0840d0d..2dccd4ee591b 100644
--- a/net/core/datagram.c
+++ b/net/core/datagram.c
@@ -48,6 +48,7 @@
 #include <linux/poll.h>
 #include <linux/highmem.h>
 #include <linux/spinlock.h>
+#include <linux/slab.h>
 
 #include <net/protocol.h>
 #include <linux/skbuff.h>
diff --git a/net/core/dev.c b/net/core/dev.c
index 59d4394d2ce8..1c8a0ce473a8 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -80,6 +80,7 @@
 #include <linux/types.h>
 #include <linux/kernel.h>
 #include <linux/hash.h>
+#include <linux/slab.h>
 #include <linux/sched.h>
 #include <linux/mutex.h>
 #include <linux/string.h>
diff --git a/net/core/drop_monitor.c b/net/core/drop_monitor.c
index f8c874975350..cf208d8042b1 100644
--- a/net/core/drop_monitor.c
+++ b/net/core/drop_monitor.c
@@ -21,6 +21,7 @@
 #include <linux/percpu.h>
 #include <linux/timer.h>
 #include <linux/bitops.h>
+#include <linux/slab.h>
 #include <net/genetlink.h>
 #include <net/netevent.h>
 
diff --git a/net/core/dst.c b/net/core/dst.c
index cb1b3488b739..f307bc18f6a0 100644
--- a/net/core/dst.c
+++ b/net/core/dst.c
@@ -12,6 +12,7 @@
 #include <linux/workqueue.h>
 #include <linux/mm.h>
 #include <linux/module.h>
+#include <linux/slab.h>
 #include <linux/netdevice.h>
 #include <linux/skbuff.h>
 #include <linux/string.h>
diff --git a/net/core/ethtool.c b/net/core/ethtool.c
index f4cb6b6299d9..9d55c57f318a 100644
--- a/net/core/ethtool.c
+++ b/net/core/ethtool.c
@@ -18,6 +18,7 @@
 #include <linux/ethtool.h>
 #include <linux/netdevice.h>
 #include <linux/bitops.h>
+#include <linux/slab.h>
 #include <asm/uaccess.h>
 
 /*
diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c
index 9a24377146bf..d2c3e7dc2e5f 100644
--- a/net/core/fib_rules.c
+++ b/net/core/fib_rules.c
@@ -10,6 +10,7 @@
 
 #include <linux/types.h>
 #include <linux/kernel.h>
+#include <linux/slab.h>
 #include <linux/list.h>
 #include <net/net_namespace.h>
 #include <net/sock.h>
diff --git a/net/core/filter.c b/net/core/filter.c
index d38ef7fd50f0..ff943bed21af 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -25,6 +25,7 @@
 #include <linux/inet.h>
 #include <linux/netdevice.h>
 #include <linux/if_packet.h>
+#include <linux/gfp.h>
 #include <net/ip.h>
 #include <net/protocol.h>
 #include <net/netlink.h>
diff --git a/net/core/gen_estimator.c b/net/core/gen_estimator.c
index 493775f4f2f1..cf8e70392fe0 100644
--- a/net/core/gen_estimator.c
+++ b/net/core/gen_estimator.c
@@ -32,6 +32,7 @@
 #include <linux/rtnetlink.h>
 #include <linux/init.h>
 #include <linux/rbtree.h>
+#include <linux/slab.h>
 #include <net/sock.h>
 #include <net/gen_stats.h>
 
diff --git a/net/core/iovec.c b/net/core/iovec.c
index 16ad45d4882b..1e7f4e91a935 100644
--- a/net/core/iovec.c
+++ b/net/core/iovec.c
@@ -20,7 +20,6 @@
 #include <linux/module.h>
 #include <linux/kernel.h>
 #include <linux/mm.h>
-#include <linux/slab.h>
 #include <linux/net.h>
 #include <linux/in6.h>
 #include <asm/uaccess.h>
diff --git a/net/core/link_watch.c b/net/core/link_watch.c
index 5910b555a54a..bdbce2f5875b 100644
--- a/net/core/link_watch.c
+++ b/net/core/link_watch.c
@@ -19,7 +19,6 @@
 #include <linux/rtnetlink.h>
 #include <linux/jiffies.h>
 #include <linux/spinlock.h>
-#include <linux/slab.h>
 #include <linux/workqueue.h>
 #include <linux/bitops.h>
 #include <asm/types.h>
diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index 6cee6434da67..bff37908bd55 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -15,6 +15,7 @@
  *	Harald Welte		Add neighbour cache statistics like rtstat
  */
 
+#include <linux/slab.h>
 #include <linux/types.h>
 #include <linux/kernel.h>
 #include <linux/module.h>
diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c
index 099c753c4213..59cfc7d8fc45 100644
--- a/net/core/net-sysfs.c
+++ b/net/core/net-sysfs.c
@@ -13,6 +13,7 @@
 #include <linux/kernel.h>
 #include <linux/netdevice.h>
 #include <linux/if_arp.h>
+#include <linux/slab.h>
 #include <net/sock.h>
 #include <linux/rtnetlink.h>
 #include <linux/wireless.h>
diff --git a/net/core/net-traces.c b/net/core/net-traces.c
index f1e982c508bb..afa6380ed88a 100644
--- a/net/core/net-traces.c
+++ b/net/core/net-traces.c
@@ -19,6 +19,7 @@
 #include <linux/workqueue.h>
 #include <linux/netlink.h>
 #include <linux/net_dropmon.h>
+#include <linux/slab.h>
 
 #include <asm/unaligned.h>
 #include <asm/bitops.h>
diff --git a/net/core/netpoll.c b/net/core/netpoll.c
index 6f9206b36dc2..a58f59b97597 100644
--- a/net/core/netpoll.c
+++ b/net/core/netpoll.c
@@ -22,6 +22,7 @@
 #include <linux/delay.h>
 #include <linux/rcupdate.h>
 #include <linux/workqueue.h>
+#include <linux/slab.h>
 #include <net/tcp.h>
 #include <net/udp.h>
 #include <asm/unaligned.h>
diff --git a/net/core/scm.c b/net/core/scm.c
index 9b264634acfd..b88f6f9d0b97 100644
--- a/net/core/scm.c
+++ b/net/core/scm.c
@@ -26,6 +26,7 @@
 #include <linux/security.h>
 #include <linux/pid.h>
 #include <linux/nsproxy.h>
+#include <linux/slab.h>
 
 #include <asm/system.h>
 #include <asm/uaccess.h>
diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c
index 06124872af5b..b7b6b8208f75 100644
--- a/net/core/sysctl_net_core.c
+++ b/net/core/sysctl_net_core.c
@@ -12,6 +12,7 @@
 #include <linux/netdevice.h>
 #include <linux/ratelimit.h>
 #include <linux/init.h>
+#include <linux/slab.h>
 
 #include <net/ip.h>
 #include <net/sock.h>
diff --git a/net/dcb/dcbnl.c b/net/dcb/dcbnl.c
index 813e399220a7..19ac2b985485 100644
--- a/net/dcb/dcbnl.c
+++ b/net/dcb/dcbnl.c
@@ -19,6 +19,7 @@
 
 #include <linux/netdevice.h>
 #include <linux/netlink.h>
+#include <linux/slab.h>
 #include <net/netlink.h>
 #include <net/rtnetlink.h>
 #include <linux/dcbnl.h>
diff --git a/net/dccp/ccid.c b/net/dccp/ccid.c
index 49d27c556bec..36479ca61e03 100644
--- a/net/dccp/ccid.c
+++ b/net/dccp/ccid.c
@@ -11,6 +11,8 @@
  *	published by the Free Software Foundation.
  */
 
+#include <linux/slab.h>
+
 #include "ccid.h"
 #include "ccids/lib/tfrc.h"
 
diff --git a/net/dccp/ccids/ccid2.c b/net/dccp/ccids/ccid2.c
index a47a8c918ee8..9b3ae9922be1 100644
--- a/net/dccp/ccids/ccid2.c
+++ b/net/dccp/ccids/ccid2.c
@@ -23,6 +23,7 @@
 /*
  * This implementation should follow RFC 4341
  */
+#include <linux/slab.h>
 #include "../feat.h"
 #include "../ccid.h"
 #include "../dccp.h"
diff --git a/net/dccp/feat.c b/net/dccp/feat.c
index 972b8dc918d6..df7dd26cf07e 100644
--- a/net/dccp/feat.c
+++ b/net/dccp/feat.c
@@ -22,6 +22,7 @@
  *  2 of the License, or (at your option) any later version.
  */
 #include <linux/module.h>
+#include <linux/slab.h>
 #include "ccid.h"
 #include "feat.h"
 
diff --git a/net/dccp/input.c b/net/dccp/input.c
index 7648f316310f..9ec717426024 100644
--- a/net/dccp/input.c
+++ b/net/dccp/input.c
@@ -12,6 +12,7 @@
 
 #include <linux/dccp.h>
 #include <linux/skbuff.h>
+#include <linux/slab.h>
 
 #include <net/sock.h>
 
diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c
index 4071eaf2b361..52ffa1cde15a 100644
--- a/net/dccp/ipv4.c
+++ b/net/dccp/ipv4.c
@@ -12,6 +12,7 @@
 
 #include <linux/dccp.h>
 #include <linux/icmp.h>
+#include <linux/slab.h>
 #include <linux/module.h>
 #include <linux/skbuff.h>
 #include <linux/random.h>
diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c
index af3394df63b7..3b11e41a2929 100644
--- a/net/dccp/ipv6.c
+++ b/net/dccp/ipv6.c
@@ -14,6 +14,7 @@
 
 #include <linux/module.h>
 #include <linux/random.h>
+#include <linux/slab.h>
 #include <linux/xfrm.h>
 
 #include <net/addrconf.h>
diff --git a/net/dccp/minisocks.c b/net/dccp/minisocks.c
index 0d508c359fa9..128b089d3aef 100644
--- a/net/dccp/minisocks.c
+++ b/net/dccp/minisocks.c
@@ -11,6 +11,7 @@
  */
 
 #include <linux/dccp.h>
+#include <linux/gfp.h>
 #include <linux/kernel.h>
 #include <linux/skbuff.h>
 #include <linux/timer.h>
diff --git a/net/dccp/output.c b/net/dccp/output.c
index d6bb753bf6ad..fc3f436440b4 100644
--- a/net/dccp/output.c
+++ b/net/dccp/output.c
@@ -13,6 +13,7 @@
 #include <linux/dccp.h>
 #include <linux/kernel.h>
 #include <linux/skbuff.h>
+#include <linux/slab.h>
 
 #include <net/inet_sock.h>
 #include <net/sock.h>
diff --git a/net/dccp/probe.c b/net/dccp/probe.c
index f5b3464f1242..078e48d442fd 100644
--- a/net/dccp/probe.c
+++ b/net/dccp/probe.c
@@ -30,6 +30,7 @@
 #include <linux/module.h>
 #include <linux/kfifo.h>
 #include <linux/vmalloc.h>
+#include <linux/gfp.h>
 #include <net/net_namespace.h>
 
 #include "dccp.h"
diff --git a/net/dccp/proto.c b/net/dccp/proto.c
index aa4cef374fd0..a0e38d8018f5 100644
--- a/net/dccp/proto.c
+++ b/net/dccp/proto.c
@@ -20,6 +20,7 @@
 #include <linux/if_arp.h>
 #include <linux/init.h>
 #include <linux/random.h>
+#include <linux/slab.h>
 #include <net/checksum.h>
 
 #include <net/inet_sock.h>
diff --git a/net/decnet/dn_dev.c b/net/decnet/dn_dev.c
index 238af093495b..cead68eb254c 100644
--- a/net/decnet/dn_dev.c
+++ b/net/decnet/dn_dev.c
@@ -40,6 +40,7 @@
 #include <linux/skbuff.h>
 #include <linux/sysctl.h>
 #include <linux/notifier.h>
+#include <linux/slab.h>
 #include <asm/uaccess.h>
 #include <asm/system.h>
 #include <net/net_namespace.h>
diff --git a/net/decnet/dn_fib.c b/net/decnet/dn_fib.c
index e9d48700e83a..4ab96c15166d 100644
--- a/net/decnet/dn_fib.c
+++ b/net/decnet/dn_fib.c
@@ -20,6 +20,7 @@
 #include <linux/string.h>
 #include <linux/net.h>
 #include <linux/socket.h>
+#include <linux/slab.h>
 #include <linux/sockios.h>
 #include <linux/init.h>
 #include <linux/skbuff.h>
diff --git a/net/decnet/dn_neigh.c b/net/decnet/dn_neigh.c
index 794b5bf95af1..deb723dba44b 100644
--- a/net/decnet/dn_neigh.c
+++ b/net/decnet/dn_neigh.c
@@ -28,6 +28,7 @@
 #include <linux/module.h>
 #include <linux/socket.h>
 #include <linux/if_arp.h>
+#include <linux/slab.h>
 #include <linux/if_ether.h>
 #include <linux/init.h>
 #include <linux/proc_fs.h>
diff --git a/net/decnet/dn_nsp_in.c b/net/decnet/dn_nsp_in.c
index 932408dca86d..25a37299bc65 100644
--- a/net/decnet/dn_nsp_in.c
+++ b/net/decnet/dn_nsp_in.c
@@ -57,6 +57,7 @@
 #include <linux/netdevice.h>
 #include <linux/inet.h>
 #include <linux/route.h>
+#include <linux/slab.h>
 #include <net/sock.h>
 #include <net/tcp_states.h>
 #include <asm/system.h>
diff --git a/net/decnet/dn_nsp_out.c b/net/decnet/dn_nsp_out.c
index a65e929ce76c..baeb1eaf011b 100644
--- a/net/decnet/dn_nsp_out.c
+++ b/net/decnet/dn_nsp_out.c
@@ -50,6 +50,7 @@
 #include <linux/netdevice.h>
 #include <linux/inet.h>
 #include <linux/route.h>
+#include <linux/slab.h>
 #include <net/sock.h>
 #include <asm/system.h>
 #include <linux/fcntl.h>
diff --git a/net/decnet/dn_route.c b/net/decnet/dn_route.c
index a7bf03ca0a36..70ebe74027d5 100644
--- a/net/decnet/dn_route.c
+++ b/net/decnet/dn_route.c
@@ -66,6 +66,7 @@
 #include <linux/inet.h>
 #include <linux/route.h>
 #include <linux/in_route.h>
+#include <linux/slab.h>
 #include <net/sock.h>
 #include <linux/mm.h>
 #include <linux/proc_fs.h>
diff --git a/net/decnet/dn_table.c b/net/decnet/dn_table.c
index b9a33bb5e9cc..f2abd3755690 100644
--- a/net/decnet/dn_table.c
+++ b/net/decnet/dn_table.c
@@ -15,6 +15,7 @@
 #include <linux/string.h>
 #include <linux/net.h>
 #include <linux/socket.h>
+#include <linux/slab.h>
 #include <linux/sockios.h>
 #include <linux/init.h>
 #include <linux/skbuff.h>
diff --git a/net/decnet/netfilter/dn_rtmsg.c b/net/decnet/netfilter/dn_rtmsg.c
index 6d2bd3202048..64a7f39e069f 100644
--- a/net/decnet/netfilter/dn_rtmsg.c
+++ b/net/decnet/netfilter/dn_rtmsg.c
@@ -14,6 +14,7 @@
  */
 #include <linux/module.h>
 #include <linux/skbuff.h>
+#include <linux/slab.h>
 #include <linux/init.h>
 #include <linux/netdevice.h>
 #include <linux/netfilter.h>
diff --git a/net/dsa/dsa.c b/net/dsa/dsa.c
index 71489f69a42c..6112a12578b2 100644
--- a/net/dsa/dsa.c
+++ b/net/dsa/dsa.c
@@ -11,6 +11,7 @@
 #include <linux/list.h>
 #include <linux/netdevice.h>
 #include <linux/platform_device.h>
+#include <linux/slab.h>
 #include <net/dsa.h>
 #include "dsa_priv.h"
 
diff --git a/net/dsa/tag_dsa.c b/net/dsa/tag_dsa.c
index cdf2d28a0297..98dfe80b4538 100644
--- a/net/dsa/tag_dsa.c
+++ b/net/dsa/tag_dsa.c
@@ -11,6 +11,7 @@
 #include <linux/etherdevice.h>
 #include <linux/list.h>
 #include <linux/netdevice.h>
+#include <linux/slab.h>
 #include "dsa_priv.h"
 
 #define DSA_HLEN	4
diff --git a/net/dsa/tag_edsa.c b/net/dsa/tag_edsa.c
index 8f53948cff4f..6f383322ad25 100644
--- a/net/dsa/tag_edsa.c
+++ b/net/dsa/tag_edsa.c
@@ -11,6 +11,7 @@
 #include <linux/etherdevice.h>
 #include <linux/list.h>
 #include <linux/netdevice.h>
+#include <linux/slab.h>
 #include "dsa_priv.h"
 
 #define DSA_HLEN	4
diff --git a/net/dsa/tag_trailer.c b/net/dsa/tag_trailer.c
index a85c829853c0..d6d7d0add3cb 100644
--- a/net/dsa/tag_trailer.c
+++ b/net/dsa/tag_trailer.c
@@ -11,6 +11,7 @@
 #include <linux/etherdevice.h>
 #include <linux/list.h>
 #include <linux/netdevice.h>
+#include <linux/slab.h>
 #include "dsa_priv.h"
 
 netdev_tx_t trailer_xmit(struct sk_buff *skb, struct net_device *dev)
diff --git a/net/econet/af_econet.c b/net/econet/af_econet.c
index 29b4931aae52..2a5a8053e000 100644
--- a/net/econet/af_econet.c
+++ b/net/econet/af_econet.c
@@ -30,6 +30,7 @@
 #include <linux/wireless.h>
 #include <linux/skbuff.h>
 #include <linux/udp.h>
+#include <linux/slab.h>
 #include <net/sock.h>
 #include <net/inet_common.h>
 #include <linux/stat.h>
diff --git a/net/ethernet/pe2.c b/net/ethernet/pe2.c
index d60e15d9365e..eb00796758c3 100644
--- a/net/ethernet/pe2.c
+++ b/net/ethernet/pe2.c
@@ -3,6 +3,7 @@
 #include <linux/module.h>
 #include <linux/netdevice.h>
 #include <linux/skbuff.h>
+#include <linux/slab.h>
 
 #include <net/datalink.h>
 
diff --git a/net/ieee802154/af_ieee802154.c b/net/ieee802154/af_ieee802154.c
index bad1c49fd960..79886d546bab 100644
--- a/net/ieee802154/af_ieee802154.c
+++ b/net/ieee802154/af_ieee802154.c
@@ -28,6 +28,7 @@
 #include <linux/if.h>
 #include <linux/termios.h>	/* For TIOCOUTQ/INQ */
 #include <linux/list.h>
+#include <linux/slab.h>
 #include <net/datalink.h>
 #include <net/psnap.h>
 #include <net/sock.h>
diff --git a/net/ieee802154/dgram.c b/net/ieee802154/dgram.c
index 9aac5aee1575..1a3334c2609a 100644
--- a/net/ieee802154/dgram.c
+++ b/net/ieee802154/dgram.c
@@ -25,6 +25,7 @@
 #include <linux/module.h>
 #include <linux/if_arp.h>
 #include <linux/list.h>
+#include <linux/slab.h>
 #include <net/sock.h>
 #include <net/af_ieee802154.h>
 #include <net/ieee802154.h>
diff --git a/net/ieee802154/netlink.c b/net/ieee802154/netlink.c
index 33137b99e471..c8097ae2482f 100644
--- a/net/ieee802154/netlink.c
+++ b/net/ieee802154/netlink.c
@@ -23,6 +23,7 @@
  */
 
 #include <linux/kernel.h>
+#include <linux/gfp.h>
 #include <net/genetlink.h>
 #include <linux/nl802154.h>
 
diff --git a/net/ieee802154/nl-mac.c b/net/ieee802154/nl-mac.c
index 135c1678fb11..71ee1108d4f8 100644
--- a/net/ieee802154/nl-mac.c
+++ b/net/ieee802154/nl-mac.c
@@ -22,6 +22,7 @@
  * Maxim Osipov <maxim.osipov@siemens.com>
  */
 
+#include <linux/gfp.h>
 #include <linux/kernel.h>
 #include <linux/if_arp.h>
 #include <linux/netdevice.h>
diff --git a/net/ieee802154/nl-phy.c b/net/ieee802154/nl-phy.c
index 199a2d9d12f9..ed0eab39f531 100644
--- a/net/ieee802154/nl-phy.c
+++ b/net/ieee802154/nl-phy.c
@@ -23,6 +23,7 @@
  */
 
 #include <linux/kernel.h>
+#include <linux/slab.h>
 #include <net/netlink.h>
 #include <net/genetlink.h>
 #include <net/wpan-phy.h>
diff --git a/net/ieee802154/raw.c b/net/ieee802154/raw.c
index 9c9b85c00033..10970ca85748 100644
--- a/net/ieee802154/raw.c
+++ b/net/ieee802154/raw.c
@@ -25,6 +25,7 @@
 #include <linux/module.h>
 #include <linux/if_arp.h>
 #include <linux/list.h>
+#include <linux/slab.h>
 #include <net/sock.h>
 #include <net/af_ieee802154.h>
 
diff --git a/net/ieee802154/wpan-class.c b/net/ieee802154/wpan-class.c
index 268691256a6d..3d803a1b9fb6 100644
--- a/net/ieee802154/wpan-class.c
+++ b/net/ieee802154/wpan-class.c
@@ -16,6 +16,7 @@
  *
  */
 
+#include <linux/slab.h>
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/device.h>
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index 33b7dffa7732..2ed85714540f 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -86,6 +86,7 @@
 #include <linux/poll.h>
 #include <linux/netfilter_ipv4.h>
 #include <linux/random.h>
+#include <linux/slab.h>
 
 #include <asm/uaccess.h>
 #include <asm/system.h>
diff --git a/net/ipv4/ah4.c b/net/ipv4/ah4.c
index 987b47dc69ad..880a5ec6dce0 100644
--- a/net/ipv4/ah4.c
+++ b/net/ipv4/ah4.c
@@ -1,6 +1,7 @@
 #include <crypto/hash.h>
 #include <linux/err.h>
 #include <linux/module.h>
+#include <linux/slab.h>
 #include <net/ip.h>
 #include <net/xfrm.h>
 #include <net/ah.h>
diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c
index c4dd13542802..6e747065c202 100644
--- a/net/ipv4/arp.c
+++ b/net/ipv4/arp.c
@@ -98,6 +98,7 @@
 #include <linux/net.h>
 #include <linux/rcupdate.h>
 #include <linux/jhash.h>
+#include <linux/slab.h>
 #ifdef CONFIG_SYSCTL
 #include <linux/sysctl.h>
 #endif
diff --git a/net/ipv4/cipso_ipv4.c b/net/ipv4/cipso_ipv4.c
index 1e029dc75455..c97cd9ff697e 100644
--- a/net/ipv4/cipso_ipv4.c
+++ b/net/ipv4/cipso_ipv4.c
@@ -44,6 +44,7 @@
 #include <linux/string.h>
 #include <linux/jhash.h>
 #include <linux/audit.h>
+#include <linux/slab.h>
 #include <net/ip.h>
 #include <net/icmp.h>
 #include <net/tcp.h>
diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index 3feb2b390308..90e3d6379a42 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -50,6 +50,7 @@
 #include <linux/notifier.h>
 #include <linux/inetdevice.h>
 #include <linux/igmp.h>
+#include <linux/slab.h>
 #ifdef CONFIG_SYSCTL
 #include <linux/sysctl.h>
 #endif
diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index 9b3e28ed5240..4f0ed458c883 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -34,6 +34,7 @@
 #include <linux/skbuff.h>
 #include <linux/init.h>
 #include <linux/list.h>
+#include <linux/slab.h>
 
 #include <net/ip.h>
 #include <net/protocol.h>
diff --git a/net/ipv4/fib_hash.c b/net/ipv4/fib_hash.c
index 14972017b9c2..4ed7e0dea1bc 100644
--- a/net/ipv4/fib_hash.c
+++ b/net/ipv4/fib_hash.c
@@ -32,6 +32,7 @@
 #include <linux/skbuff.h>
 #include <linux/netlink.h>
 #include <linux/init.h>
+#include <linux/slab.h>
 
 #include <net/net_namespace.h>
 #include <net/ip.h>
diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c
index 1af0ea0fb6a2..20f09c5b31e8 100644
--- a/net/ipv4/fib_semantics.c
+++ b/net/ipv4/fib_semantics.c
@@ -32,6 +32,7 @@
 #include <linux/proc_fs.h>
 #include <linux/skbuff.h>
 #include <linux/init.h>
+#include <linux/slab.h>
 
 #include <net/arp.h>
 #include <net/ip.h>
diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c
index 01ef8ba9025c..59a838795e3e 100644
--- a/net/ipv4/fib_trie.c
+++ b/net/ipv4/fib_trie.c
@@ -71,6 +71,7 @@
 #include <linux/netlink.h>
 #include <linux/init.h>
 #include <linux/list.h>
+#include <linux/slab.h>
 #include <net/net_namespace.h>
 #include <net/ip.h>
 #include <net/protocol.h>
diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c
index 4b4c2bcd15db..ac4dec132735 100644
--- a/net/ipv4/icmp.c
+++ b/net/ipv4/icmp.c
@@ -74,6 +74,7 @@
 #include <linux/netdevice.h>
 #include <linux/string.h>
 #include <linux/netfilter_ipv4.h>
+#include <linux/slab.h>
 #include <net/snmp.h>
 #include <net/ip.h>
 #include <net/route.h>
diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c
index 63bf298ca109..15d3eeda92f5 100644
--- a/net/ipv4/igmp.c
+++ b/net/ipv4/igmp.c
@@ -71,6 +71,7 @@
  */
 
 #include <linux/module.h>
+#include <linux/slab.h>
 #include <asm/uaccess.h>
 #include <asm/system.h>
 #include <linux/types.h>
diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c
index 1aaa8110d84b..e5fa2ddce320 100644
--- a/net/ipv4/inet_diag.c
+++ b/net/ipv4/inet_diag.c
@@ -14,6 +14,7 @@
 #include <linux/types.h>
 #include <linux/fcntl.h>
 #include <linux/random.h>
+#include <linux/slab.h>
 #include <linux/cache.h>
 #include <linux/init.h>
 #include <linux/time.h>
diff --git a/net/ipv4/inet_fragment.c b/net/ipv4/inet_fragment.c
index eaf3e2c8646a..a2ca6aed763b 100644
--- a/net/ipv4/inet_fragment.c
+++ b/net/ipv4/inet_fragment.c
@@ -19,6 +19,7 @@
 #include <linux/random.h>
 #include <linux/skbuff.h>
 #include <linux/rtnetlink.h>
+#include <linux/slab.h>
 
 #include <net/inet_frag.h>
 
diff --git a/net/ipv4/inet_timewait_sock.c b/net/ipv4/inet_timewait_sock.c
index cc94cc2d8b2d..c5af909cf701 100644
--- a/net/ipv4/inet_timewait_sock.c
+++ b/net/ipv4/inet_timewait_sock.c
@@ -10,6 +10,7 @@
 
 #include <linux/kernel.h>
 #include <linux/kmemcheck.h>
+#include <linux/slab.h>
 #include <net/inet_hashtables.h>
 #include <net/inet_timewait_sock.h>
 #include <net/ip.h>
diff --git a/net/ipv4/ip_forward.c b/net/ipv4/ip_forward.c
index a2991bc8e32e..af10942b326c 100644
--- a/net/ipv4/ip_forward.c
+++ b/net/ipv4/ip_forward.c
@@ -25,6 +25,7 @@
 #include <linux/ip.h>
 #include <linux/icmp.h>
 #include <linux/netdevice.h>
+#include <linux/slab.h>
 #include <net/sock.h>
 #include <net/ip.h>
 #include <net/tcp.h>
diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c
index b59430bc041c..75347ea70ea0 100644
--- a/net/ipv4/ip_fragment.c
+++ b/net/ipv4/ip_fragment.c
@@ -32,6 +32,7 @@
 #include <linux/netdevice.h>
 #include <linux/jhash.h>
 #include <linux/random.h>
+#include <linux/slab.h>
 #include <net/route.h>
 #include <net/dst.h>
 #include <net/sock.h>
diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index f78402d097b3..fe381d12ecdd 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -14,6 +14,7 @@
 #include <linux/module.h>
 #include <linux/types.h>
 #include <linux/kernel.h>
+#include <linux/slab.h>
 #include <asm/uaccess.h>
 #include <linux/skbuff.h>
 #include <linux/netdevice.h>
diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c
index c29de9879fda..f8ab7a380d4a 100644
--- a/net/ipv4/ip_input.c
+++ b/net/ipv4/ip_input.c
@@ -119,6 +119,7 @@
 #include <linux/kernel.h>
 #include <linux/string.h>
 #include <linux/errno.h>
+#include <linux/slab.h>
 
 #include <linux/net.h>
 #include <linux/socket.h>
diff --git a/net/ipv4/ip_options.c b/net/ipv4/ip_options.c
index 94bf105ef3c9..4c09a31fd140 100644
--- a/net/ipv4/ip_options.c
+++ b/net/ipv4/ip_options.c
@@ -11,6 +11,7 @@
 
 #include <linux/capability.h>
 #include <linux/module.h>
+#include <linux/slab.h>
 #include <linux/types.h>
 #include <asm/uaccess.h>
 #include <linux/skbuff.h>
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index 3451799e3dbf..c65f18e0936e 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -51,6 +51,7 @@
 #include <linux/string.h>
 #include <linux/errno.h>
 #include <linux/highmem.h>
+#include <linux/slab.h>
 
 #include <linux/socket.h>
 #include <linux/sockios.h>
diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c
index 644dc43a55de..1e64dabbd232 100644
--- a/net/ipv4/ip_sockglue.c
+++ b/net/ipv4/ip_sockglue.c
@@ -23,6 +23,7 @@
 #include <linux/icmp.h>
 #include <linux/inetdevice.h>
 #include <linux/netdevice.h>
+#include <linux/slab.h>
 #include <net/sock.h>
 #include <net/ip.h>
 #include <net/icmp.h>
diff --git a/net/ipv4/ipconfig.c b/net/ipv4/ipconfig.c
index 678909281648..067ce9e043dc 100644
--- a/net/ipv4/ipconfig.c
+++ b/net/ipv4/ipconfig.c
@@ -53,6 +53,7 @@
 #include <linux/root_dev.h>
 #include <linux/delay.h>
 #include <linux/nfs_fs.h>
+#include <linux/slab.h>
 #include <net/net_namespace.h>
 #include <net/arp.h>
 #include <net/ip.h>
diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c
index 2f302d3ac9a3..0b27b14dcc9d 100644
--- a/net/ipv4/ipip.c
+++ b/net/ipv4/ipip.c
@@ -95,6 +95,7 @@
 #include <linux/module.h>
 #include <linux/types.h>
 #include <linux/kernel.h>
+#include <linux/slab.h>
 #include <asm/uaccess.h>
 #include <linux/skbuff.h>
 #include <linux/netdevice.h>
diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
index d0a6092a67be..9d4f6d1340a4 100644
--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c
@@ -47,6 +47,7 @@
 #include <linux/mroute.h>
 #include <linux/init.h>
 #include <linux/if_ether.h>
+#include <linux/slab.h>
 #include <net/net_namespace.h>
 #include <net/ip.h>
 #include <net/protocol.h>
diff --git a/net/ipv4/netfilter.c b/net/ipv4/netfilter.c
index c14623fc4d5e..82fb43c5c59e 100644
--- a/net/ipv4/netfilter.c
+++ b/net/ipv4/netfilter.c
@@ -4,6 +4,7 @@
 #include <linux/netfilter_ipv4.h>
 #include <linux/ip.h>
 #include <linux/skbuff.h>
+#include <linux/gfp.h>
 #include <net/route.h>
 #include <net/xfrm.h>
 #include <net/ip.h>
diff --git a/net/ipv4/netfilter/arptable_filter.c b/net/ipv4/netfilter/arptable_filter.c
index bfe26f32b930..79ca5e70d497 100644
--- a/net/ipv4/netfilter/arptable_filter.c
+++ b/net/ipv4/netfilter/arptable_filter.c
@@ -8,6 +8,7 @@
 #include <linux/module.h>
 #include <linux/netfilter/x_tables.h>
 #include <linux/netfilter_arp/arp_tables.h>
+#include <linux/slab.h>
 
 MODULE_LICENSE("GPL");
 MODULE_AUTHOR("David S. Miller <davem@redhat.com>");
diff --git a/net/ipv4/netfilter/ip_queue.c b/net/ipv4/netfilter/ip_queue.c
index 2855f1f38cbc..e2787048aa0a 100644
--- a/net/ipv4/netfilter/ip_queue.c
+++ b/net/ipv4/netfilter/ip_queue.c
@@ -26,6 +26,7 @@
 #include <linux/security.h>
 #include <linux/net.h>
 #include <linux/mutex.h>
+#include <linux/slab.h>
 #include <net/net_namespace.h>
 #include <net/sock.h>
 #include <net/route.h>
diff --git a/net/ipv4/netfilter/ipt_CLUSTERIP.c b/net/ipv4/netfilter/ipt_CLUSTERIP.c
index 0886f96c736b..ab828400ed71 100644
--- a/net/ipv4/netfilter/ipt_CLUSTERIP.c
+++ b/net/ipv4/netfilter/ipt_CLUSTERIP.c
@@ -14,6 +14,7 @@
 #include <linux/jhash.h>
 #include <linux/bitops.h>
 #include <linux/skbuff.h>
+#include <linux/slab.h>
 #include <linux/ip.h>
 #include <linux/tcp.h>
 #include <linux/udp.h>
diff --git a/net/ipv4/netfilter/ipt_REJECT.c b/net/ipv4/netfilter/ipt_REJECT.c
index 5113b8f1a379..a0e8bcf04159 100644
--- a/net/ipv4/netfilter/ipt_REJECT.c
+++ b/net/ipv4/netfilter/ipt_REJECT.c
@@ -12,6 +12,7 @@
 
 #include <linux/module.h>
 #include <linux/skbuff.h>
+#include <linux/slab.h>
 #include <linux/ip.h>
 #include <linux/udp.h>
 #include <linux/icmp.h>
diff --git a/net/ipv4/netfilter/ipt_ULOG.c b/net/ipv4/netfilter/ipt_ULOG.c
index 09a5d3f7cc41..0dbe697f164f 100644
--- a/net/ipv4/netfilter/ipt_ULOG.c
+++ b/net/ipv4/netfilter/ipt_ULOG.c
@@ -33,6 +33,7 @@
 #include <linux/module.h>
 #include <linux/spinlock.h>
 #include <linux/socket.h>
+#include <linux/slab.h>
 #include <linux/skbuff.h>
 #include <linux/kernel.h>
 #include <linux/timer.h>
diff --git a/net/ipv4/netfilter/iptable_filter.c b/net/ipv4/netfilter/iptable_filter.c
index c8dc9800d620..55392466daa4 100644
--- a/net/ipv4/netfilter/iptable_filter.c
+++ b/net/ipv4/netfilter/iptable_filter.c
@@ -13,6 +13,7 @@
 #include <linux/module.h>
 #include <linux/moduleparam.h>
 #include <linux/netfilter_ipv4/ip_tables.h>
+#include <linux/slab.h>
 #include <net/ip.h>
 
 MODULE_LICENSE("GPL");
diff --git a/net/ipv4/netfilter/iptable_mangle.c b/net/ipv4/netfilter/iptable_mangle.c
index b9b83464cbf4..294a2a32f293 100644
--- a/net/ipv4/netfilter/iptable_mangle.c
+++ b/net/ipv4/netfilter/iptable_mangle.c
@@ -12,6 +12,7 @@
 #include <linux/netfilter_ipv4/ip_tables.h>
 #include <linux/netdevice.h>
 #include <linux/skbuff.h>
+#include <linux/slab.h>
 #include <net/sock.h>
 #include <net/route.h>
 #include <linux/ip.h>
diff --git a/net/ipv4/netfilter/iptable_raw.c b/net/ipv4/netfilter/iptable_raw.c
index 06fb9d11953c..07fb710cd722 100644
--- a/net/ipv4/netfilter/iptable_raw.c
+++ b/net/ipv4/netfilter/iptable_raw.c
@@ -5,6 +5,7 @@
  */
 #include <linux/module.h>
 #include <linux/netfilter_ipv4/ip_tables.h>
+#include <linux/slab.h>
 #include <net/ip.h>
 
 #define RAW_VALID_HOOKS ((1 << NF_INET_PRE_ROUTING) | (1 << NF_INET_LOCAL_OUT))
diff --git a/net/ipv4/netfilter/iptable_security.c b/net/ipv4/netfilter/iptable_security.c
index cce2f64e6f21..be45bdc4c602 100644
--- a/net/ipv4/netfilter/iptable_security.c
+++ b/net/ipv4/netfilter/iptable_security.c
@@ -17,6 +17,7 @@
  */
 #include <linux/module.h>
 #include <linux/netfilter_ipv4/ip_tables.h>
+#include <linux/slab.h>
 #include <net/ip.h>
 
 MODULE_LICENSE("GPL");
diff --git a/net/ipv4/netfilter/nf_nat_core.c b/net/ipv4/netfilter/nf_nat_core.c
index 4595281c2863..4f8bddb760c9 100644
--- a/net/ipv4/netfilter/nf_nat_core.c
+++ b/net/ipv4/netfilter/nf_nat_core.c
@@ -12,6 +12,7 @@
 #include <linux/types.h>
 #include <linux/timer.h>
 #include <linux/skbuff.h>
+#include <linux/gfp.h>
 #include <net/checksum.h>
 #include <net/icmp.h>
 #include <net/ip.h>
diff --git a/net/ipv4/netfilter/nf_nat_helper.c b/net/ipv4/netfilter/nf_nat_helper.c
index 4b6af4bb1f50..4a0c6b548eee 100644
--- a/net/ipv4/netfilter/nf_nat_helper.c
+++ b/net/ipv4/netfilter/nf_nat_helper.c
@@ -8,6 +8,7 @@
  * published by the Free Software Foundation.
  */
 #include <linux/module.h>
+#include <linux/gfp.h>
 #include <linux/kmod.h>
 #include <linux/types.h>
 #include <linux/timer.h>
diff --git a/net/ipv4/netfilter/nf_nat_rule.c b/net/ipv4/netfilter/nf_nat_rule.c
index ab74cc0535e2..26de2c1f7fab 100644
--- a/net/ipv4/netfilter/nf_nat_rule.c
+++ b/net/ipv4/netfilter/nf_nat_rule.c
@@ -15,6 +15,7 @@
 #include <linux/kmod.h>
 #include <linux/skbuff.h>
 #include <linux/proc_fs.h>
+#include <linux/slab.h>
 #include <net/checksum.h>
 #include <net/route.h>
 #include <linux/bitops.h>
diff --git a/net/ipv4/netfilter/nf_nat_snmp_basic.c b/net/ipv4/netfilter/nf_nat_snmp_basic.c
index 0b9c7ce3d6c5..4d85b6e55f29 100644
--- a/net/ipv4/netfilter/nf_nat_snmp_basic.c
+++ b/net/ipv4/netfilter/nf_nat_snmp_basic.c
@@ -43,6 +43,7 @@
 #include <linux/moduleparam.h>
 #include <linux/types.h>
 #include <linux/kernel.h>
+#include <linux/slab.h>
 #include <linux/in.h>
 #include <linux/ip.h>
 #include <linux/udp.h>
diff --git a/net/ipv4/netfilter/nf_nat_standalone.c b/net/ipv4/netfilter/nf_nat_standalone.c
index 5678e9562c15..c39c9cf6bee6 100644
--- a/net/ipv4/netfilter/nf_nat_standalone.c
+++ b/net/ipv4/netfilter/nf_nat_standalone.c
@@ -7,6 +7,7 @@
  */
 #include <linux/types.h>
 #include <linux/icmp.h>
+#include <linux/gfp.h>
 #include <linux/ip.h>
 #include <linux/netfilter.h>
 #include <linux/netfilter_ipv4.h>
diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c
index ce154b47f1da..cc6f097fbd5f 100644
--- a/net/ipv4/raw.c
+++ b/net/ipv4/raw.c
@@ -60,7 +60,6 @@
 #include <net/net_namespace.h>
 #include <net/dst.h>
 #include <net/sock.h>
-#include <linux/gfp.h>
 #include <linux/ip.h>
 #include <linux/net.h>
 #include <net/ip.h>
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index d413b57be9b3..cb562fdd9b9a 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -90,6 +90,7 @@
 #include <linux/jhash.h>
 #include <linux/rcupdate.h>
 #include <linux/times.h>
+#include <linux/slab.h>
 #include <net/dst.h>
 #include <net/net_namespace.h>
 #include <net/protocol.h>
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index c1bc074f61b7..1cd5c15174b8 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -12,6 +12,7 @@
 #include <linux/inetdevice.h>
 #include <linux/seqlock.h>
 #include <linux/init.h>
+#include <linux/slab.h>
 #include <net/snmp.h>
 #include <net/icmp.h>
 #include <net/ip.h>
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 6afb6d8662b2..7a1f1d78893f 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -265,6 +265,7 @@
 #include <linux/err.h>
 #include <linux/crypto.h>
 #include <linux/time.h>
+#include <linux/slab.h>
 
 #include <net/icmp.h>
 #include <net/tcp.h>
diff --git a/net/ipv4/tcp_cong.c b/net/ipv4/tcp_cong.c
index 6428b342b164..0ec9bd0ae94f 100644
--- a/net/ipv4/tcp_cong.c
+++ b/net/ipv4/tcp_cong.c
@@ -10,6 +10,7 @@
 #include <linux/mm.h>
 #include <linux/types.h>
 #include <linux/list.h>
+#include <linux/gfp.h>
 #include <net/tcp.h>
 
 int sysctl_tcp_max_ssthresh = 0;
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index c096a4218b8f..f240f57b2199 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -62,6 +62,7 @@
  */
 
 #include <linux/mm.h>
+#include <linux/slab.h>
 #include <linux/module.h>
 #include <linux/sysctl.h>
 #include <linux/kernel.h>
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index f4df5f931f36..3c23e70885f4 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -60,6 +60,7 @@
 #include <linux/jhash.h>
 #include <linux/init.h>
 #include <linux/times.h>
+#include <linux/slab.h>
 
 #include <net/net_namespace.h>
 #include <net/icmp.h>
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index 4199bc6915c5..5fabff9ac6d6 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -20,6 +20,7 @@
 
 #include <linux/mm.h>
 #include <linux/module.h>
+#include <linux/slab.h>
 #include <linux/sysctl.h>
 #include <linux/workqueue.h>
 #include <net/tcp.h>
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index f181b78f2385..0dda86e72ad8 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -37,6 +37,7 @@
 #include <net/tcp.h>
 
 #include <linux/compiler.h>
+#include <linux/gfp.h>
 #include <linux/module.h>
 
 /* People can turn this off for buggy TCP's found in printers etc. */
diff --git a/net/ipv4/tcp_probe.c b/net/ipv4/tcp_probe.c
index 9bc805df95d2..f8efada580e8 100644
--- a/net/ipv4/tcp_probe.c
+++ b/net/ipv4/tcp_probe.c
@@ -22,6 +22,7 @@
 #include <linux/kprobes.h>
 #include <linux/socket.h>
 #include <linux/tcp.h>
+#include <linux/slab.h>
 #include <linux/proc_fs.h>
 #include <linux/module.h>
 #include <linux/ktime.h>
diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c
index b2e6bbccaee1..8a0ab2977f1f 100644
--- a/net/ipv4/tcp_timer.c
+++ b/net/ipv4/tcp_timer.c
@@ -19,6 +19,7 @@
  */
 
 #include <linux/module.h>
+#include <linux/gfp.h>
 #include <net/tcp.h>
 
 int sysctl_tcp_syn_retries __read_mostly = TCP_SYN_RETRIES;
diff --git a/net/ipv4/tunnel4.c b/net/ipv4/tunnel4.c
index 3959e0ca456a..3b3813cc80b9 100644
--- a/net/ipv4/tunnel4.c
+++ b/net/ipv4/tunnel4.c
@@ -8,6 +8,7 @@
 #include <linux/mutex.h>
 #include <linux/netdevice.h>
 #include <linux/skbuff.h>
+#include <linux/slab.h>
 #include <net/icmp.h>
 #include <net/ip.h>
 #include <net/protocol.h>
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 7af756d0f931..954bbfb39dff 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -95,6 +95,7 @@
 #include <linux/mm.h>
 #include <linux/inet.h>
 #include <linux/netdevice.h>
+#include <linux/slab.h>
 #include <net/tcp_states.h>
 #include <linux/skbuff.h>
 #include <linux/proc_fs.h>
diff --git a/net/ipv4/xfrm4_input.c b/net/ipv4/xfrm4_input.c
index f9f922a0ba88..c791bb63203f 100644
--- a/net/ipv4/xfrm4_input.c
+++ b/net/ipv4/xfrm4_input.c
@@ -9,6 +9,7 @@
  *
  */
 
+#include <linux/slab.h>
 #include <linux/module.h>
 #include <linux/string.h>
 #include <linux/netfilter.h>
diff --git a/net/ipv4/xfrm4_mode_tunnel.c b/net/ipv4/xfrm4_mode_tunnel.c
index 3444f3b34eca..6f368413eb0e 100644
--- a/net/ipv4/xfrm4_mode_tunnel.c
+++ b/net/ipv4/xfrm4_mode_tunnel.c
@@ -4,6 +4,7 @@
  * Copyright (c) 2004-2006 Herbert Xu <herbert@gondor.apana.org.au>
  */
 
+#include <linux/gfp.h>
 #include <linux/init.h>
 #include <linux/kernel.h>
 #include <linux/module.h>
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 7e567ae5eaab..413054f02aab 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -53,6 +53,7 @@
 #include <linux/route.h>
 #include <linux/inetdevice.h>
 #include <linux/init.h>
+#include <linux/slab.h>
 #ifdef CONFIG_SYSCTL
 #include <linux/sysctl.h>
 #endif
diff --git a/net/ipv6/addrlabel.c b/net/ipv6/addrlabel.c
index 6ff73c4c126a..ae404c9a746c 100644
--- a/net/ipv6/addrlabel.c
+++ b/net/ipv6/addrlabel.c
@@ -13,6 +13,7 @@
 #include <linux/list.h>
 #include <linux/rcupdate.h>
 #include <linux/in6.h>
+#include <linux/slab.h>
 #include <net/addrconf.h>
 #include <linux/if_addrlabel.h>
 #include <linux/netlink.h>
diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index 37d14e735c27..3192aa02ba5d 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -36,6 +36,7 @@
 #include <linux/proc_fs.h>
 #include <linux/stat.h>
 #include <linux/init.h>
+#include <linux/slab.h>
 
 #include <linux/inet.h>
 #include <linux/netdevice.h>
diff --git a/net/ipv6/ah6.c b/net/ipv6/ah6.c
index 5ac89025f9de..ee82d4ef26ce 100644
--- a/net/ipv6/ah6.c
+++ b/net/ipv6/ah6.c
@@ -26,6 +26,7 @@
 
 #include <crypto/hash.h>
 #include <linux/module.h>
+#include <linux/slab.h>
 #include <net/ip.h>
 #include <net/ah.h>
 #include <linux/crypto.h>
diff --git a/net/ipv6/anycast.c b/net/ipv6/anycast.c
index c4f6ca32fa74..b5b07054508a 100644
--- a/net/ipv6/anycast.c
+++ b/net/ipv6/anycast.c
@@ -29,6 +29,7 @@
 #include <linux/init.h>
 #include <linux/proc_fs.h>
 #include <linux/seq_file.h>
+#include <linux/slab.h>
 
 #include <net/net_namespace.h>
 #include <net/sock.h>
diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c
index e6f9cdf780fe..622dc7939a1b 100644
--- a/net/ipv6/datagram.c
+++ b/net/ipv6/datagram.c
@@ -21,6 +21,7 @@
 #include <linux/in6.h>
 #include <linux/ipv6.h>
 #include <linux/route.h>
+#include <linux/slab.h>
 
 #include <net/ipv6.h>
 #include <net/ndisc.h>
diff --git a/net/ipv6/exthdrs.c b/net/ipv6/exthdrs.c
index 074f2c084f9f..8a659f92d17a 100644
--- a/net/ipv6/exthdrs.c
+++ b/net/ipv6/exthdrs.c
@@ -29,6 +29,7 @@
 #include <linux/netdevice.h>
 #include <linux/in6.h>
 #include <linux/icmpv6.h>
+#include <linux/slab.h>
 
 #include <net/dst.h>
 #include <net/sock.h>
diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c
index eb9abe24bdf0..3330a4bd6157 100644
--- a/net/ipv6/icmp.c
+++ b/net/ipv6/icmp.c
@@ -40,6 +40,7 @@
 #include <linux/skbuff.h>
 #include <linux/init.h>
 #include <linux/netfilter.h>
+#include <linux/slab.h>
 
 #ifdef CONFIG_SYSCTL
 #include <linux/sysctl.h>
diff --git a/net/ipv6/inet6_connection_sock.c b/net/ipv6/inet6_connection_sock.c
index 3516e6fe2e56..628db24bcf22 100644
--- a/net/ipv6/inet6_connection_sock.c
+++ b/net/ipv6/inet6_connection_sock.c
@@ -17,6 +17,7 @@
 #include <linux/in6.h>
 #include <linux/ipv6.h>
 #include <linux/jhash.h>
+#include <linux/slab.h>
 
 #include <net/addrconf.h>
 #include <net/inet_connection_sock.h>
diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c
index 2f9847924fa5..6b82e02158c6 100644
--- a/net/ipv6/ip6_fib.c
+++ b/net/ipv6/ip6_fib.c
@@ -26,6 +26,7 @@
 #include <linux/in6.h>
 #include <linux/init.h>
 #include <linux/list.h>
+#include <linux/slab.h>
 
 #ifdef 	CONFIG_PROC_FS
 #include <linux/proc_fs.h>
diff --git a/net/ipv6/ip6_flowlabel.c b/net/ipv6/ip6_flowlabel.c
index e41eba8aacf1..14e23216eb28 100644
--- a/net/ipv6/ip6_flowlabel.c
+++ b/net/ipv6/ip6_flowlabel.c
@@ -20,6 +20,7 @@
 #include <linux/route.h>
 #include <linux/proc_fs.h>
 #include <linux/seq_file.h>
+#include <linux/slab.h>
 
 #include <net/net_namespace.h>
 #include <net/sock.h>
diff --git a/net/ipv6/ip6_input.c b/net/ipv6/ip6_input.c
index e28f9203deca..6aa7ee1295c2 100644
--- a/net/ipv6/ip6_input.c
+++ b/net/ipv6/ip6_input.c
@@ -28,6 +28,7 @@
 #include <linux/in6.h>
 #include <linux/icmpv6.h>
 #include <linux/mroute6.h>
+#include <linux/slab.h>
 
 #include <linux/netfilter.h>
 #include <linux/netfilter_ipv6.h>
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index dabf108ad811..16c4391f952b 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -37,6 +37,7 @@
 #include <linux/tcp.h>
 #include <linux/route.h>
 #include <linux/module.h>
+#include <linux/slab.h>
 
 #include <linux/netfilter.h>
 #include <linux/netfilter_ipv6.h>
diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c
index 138980eec214..2599870747ec 100644
--- a/net/ipv6/ip6_tunnel.c
+++ b/net/ipv6/ip6_tunnel.c
@@ -37,6 +37,7 @@
 #include <linux/route.h>
 #include <linux/rtnetlink.h>
 #include <linux/netfilter_ipv6.h>
+#include <linux/slab.h>
 
 #include <asm/uaccess.h>
 #include <asm/atomic.h>
diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c
index 27acfb58650a..3e333268db89 100644
--- a/net/ipv6/ip6mr.c
+++ b/net/ipv6/ip6mr.c
@@ -33,6 +33,7 @@
 #include <linux/proc_fs.h>
 #include <linux/seq_file.h>
 #include <linux/init.h>
+#include <linux/slab.h>
 #include <net/protocol.h>
 #include <linux/skbuff.h>
 #include <net/sock.h>
diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c
index 430454ee5ead..33f60fca7aa7 100644
--- a/net/ipv6/ipv6_sockglue.c
+++ b/net/ipv6/ipv6_sockglue.c
@@ -36,6 +36,7 @@
 #include <linux/init.h>
 #include <linux/sysctl.h>
 #include <linux/netfilter.h>
+#include <linux/slab.h>
 
 #include <net/sock.h>
 #include <net/snmp.h>
diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c
index bcd971915969..c483ab9fd67b 100644
--- a/net/ipv6/mcast.c
+++ b/net/ipv6/mcast.c
@@ -43,6 +43,7 @@
 #include <linux/init.h>
 #include <linux/proc_fs.h>
 #include <linux/seq_file.h>
+#include <linux/slab.h>
 
 #include <linux/netfilter.h>
 #include <linux/netfilter_ipv6.h>
diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c
index 8bcc4b7db3bf..da0a4d2adc69 100644
--- a/net/ipv6/ndisc.c
+++ b/net/ipv6/ndisc.c
@@ -59,6 +59,7 @@
 #include <linux/route.h>
 #include <linux/init.h>
 #include <linux/rcupdate.h>
+#include <linux/slab.h>
 #ifdef CONFIG_SYSCTL
 #include <linux/sysctl.h>
 #endif
diff --git a/net/ipv6/netfilter/ip6_queue.c b/net/ipv6/netfilter/ip6_queue.c
index 7854052be60b..6a68a74d14a3 100644
--- a/net/ipv6/netfilter/ip6_queue.c
+++ b/net/ipv6/netfilter/ip6_queue.c
@@ -25,6 +25,7 @@
 #include <linux/proc_fs.h>
 #include <linux/seq_file.h>
 #include <linux/mutex.h>
+#include <linux/slab.h>
 #include <net/net_namespace.h>
 #include <net/sock.h>
 #include <net/ipv6.h>
diff --git a/net/ipv6/netfilter/ip6t_REJECT.c b/net/ipv6/netfilter/ip6t_REJECT.c
index dd8afbaf00a8..39b50c3768e8 100644
--- a/net/ipv6/netfilter/ip6t_REJECT.c
+++ b/net/ipv6/netfilter/ip6t_REJECT.c
@@ -15,6 +15,7 @@
  * 2 of the License, or (at your option) any later version.
  */
 
+#include <linux/gfp.h>
 #include <linux/module.h>
 #include <linux/skbuff.h>
 #include <linux/icmpv6.h>
diff --git a/net/ipv6/netfilter/ip6table_filter.c b/net/ipv6/netfilter/ip6table_filter.c
index 36b72cafc227..d6fc9aff3163 100644
--- a/net/ipv6/netfilter/ip6table_filter.c
+++ b/net/ipv6/netfilter/ip6table_filter.c
@@ -12,6 +12,7 @@
 #include <linux/module.h>
 #include <linux/moduleparam.h>
 #include <linux/netfilter_ipv6/ip6_tables.h>
+#include <linux/slab.h>
 
 MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Netfilter Core Team <coreteam@netfilter.org>");
diff --git a/net/ipv6/netfilter/ip6table_mangle.c b/net/ipv6/netfilter/ip6table_mangle.c
index 7844e557c0ec..6a102b57f356 100644
--- a/net/ipv6/netfilter/ip6table_mangle.c
+++ b/net/ipv6/netfilter/ip6table_mangle.c
@@ -10,6 +10,7 @@
  */
 #include <linux/module.h>
 #include <linux/netfilter_ipv6/ip6_tables.h>
+#include <linux/slab.h>
 
 MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Netfilter Core Team <coreteam@netfilter.org>");
diff --git a/net/ipv6/netfilter/ip6table_raw.c b/net/ipv6/netfilter/ip6table_raw.c
index b9cf7cd61923..5b9926a011bd 100644
--- a/net/ipv6/netfilter/ip6table_raw.c
+++ b/net/ipv6/netfilter/ip6table_raw.c
@@ -5,6 +5,7 @@
  */
 #include <linux/module.h>
 #include <linux/netfilter_ipv6/ip6_tables.h>
+#include <linux/slab.h>
 
 #define RAW_VALID_HOOKS ((1 << NF_INET_PRE_ROUTING) | (1 << NF_INET_LOCAL_OUT))
 
diff --git a/net/ipv6/netfilter/ip6table_security.c b/net/ipv6/netfilter/ip6table_security.c
index 0824d865aa9b..91aa2b4d83c9 100644
--- a/net/ipv6/netfilter/ip6table_security.c
+++ b/net/ipv6/netfilter/ip6table_security.c
@@ -17,6 +17,7 @@
  */
 #include <linux/module.h>
 #include <linux/netfilter_ipv6/ip6_tables.h>
+#include <linux/slab.h>
 
 MODULE_LICENSE("GPL");
 MODULE_AUTHOR("James Morris <jmorris <at> redhat.com>");
diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c
index f1171b744650..dd5b9bd61c62 100644
--- a/net/ipv6/netfilter/nf_conntrack_reasm.c
+++ b/net/ipv6/netfilter/nf_conntrack_reasm.c
@@ -27,6 +27,7 @@
 #include <linux/ipv6.h>
 #include <linux/icmpv6.h>
 #include <linux/random.h>
+#include <linux/slab.h>
 
 #include <net/sock.h>
 #include <net/snmp.h>
diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c
index ed31c37c6e39..8763b1a0814a 100644
--- a/net/ipv6/raw.c
+++ b/net/ipv6/raw.c
@@ -21,6 +21,7 @@
 #include <linux/errno.h>
 #include <linux/types.h>
 #include <linux/socket.h>
+#include <linux/slab.h>
 #include <linux/sockios.h>
 #include <linux/net.h>
 #include <linux/in6.h>
diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c
index a555156e9779..6d4292ff5854 100644
--- a/net/ipv6/reassembly.c
+++ b/net/ipv6/reassembly.c
@@ -41,6 +41,7 @@
 #include <linux/random.h>
 #include <linux/jhash.h>
 #include <linux/skbuff.h>
+#include <linux/slab.h>
 
 #include <net/sock.h>
 #include <net/snmp.h>
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 0d7713c5c206..c2438e8cb9d0 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -40,6 +40,7 @@
 #include <linux/proc_fs.h>
 #include <linux/seq_file.h>
 #include <linux/nsproxy.h>
+#include <linux/slab.h>
 #include <net/net_namespace.h>
 #include <net/snmp.h>
 #include <net/ipv6.h>
diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c
index b1eea811be48..5abae10cd884 100644
--- a/net/ipv6/sit.c
+++ b/net/ipv6/sit.c
@@ -28,6 +28,7 @@
 #include <linux/netdevice.h>
 #include <linux/if_arp.h>
 #include <linux/icmp.h>
+#include <linux/slab.h>
 #include <asm/uaccess.h>
 #include <linux/init.h>
 #include <linux/netfilter_ipv4.h>
diff --git a/net/ipv6/sysctl_net_ipv6.c b/net/ipv6/sysctl_net_ipv6.c
index f841d93bf987..fa1d8f4e0051 100644
--- a/net/ipv6/sysctl_net_ipv6.c
+++ b/net/ipv6/sysctl_net_ipv6.c
@@ -9,6 +9,7 @@
 #include <linux/sysctl.h>
 #include <linux/in6.h>
 #include <linux/ipv6.h>
+#include <linux/slab.h>
 #include <net/ndisc.h>
 #include <net/ipv6.h>
 #include <net/addrconf.h>
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 9b6dbba80d31..c92ebe8f80d5 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -38,6 +38,7 @@
 #include <linux/jhash.h>
 #include <linux/ipsec.h>
 #include <linux/times.h>
+#include <linux/slab.h>
 
 #include <linux/ipv6.h>
 #include <linux/icmpv6.h>
diff --git a/net/ipv6/tunnel6.c b/net/ipv6/tunnel6.c
index e17bc1dfc1a4..fc3c86a47452 100644
--- a/net/ipv6/tunnel6.c
+++ b/net/ipv6/tunnel6.c
@@ -25,6 +25,7 @@
 #include <linux/mutex.h>
 #include <linux/netdevice.h>
 #include <linux/skbuff.h>
+#include <linux/slab.h>
 #include <net/ipv6.h>
 #include <net/protocol.h>
 #include <net/xfrm.h>
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index 3c0c9c755c92..c177aea88c0b 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -34,6 +34,7 @@
 #include <linux/init.h>
 #include <linux/module.h>
 #include <linux/skbuff.h>
+#include <linux/slab.h>
 #include <asm/uaccess.h>
 
 #include <net/ndisc.h>
diff --git a/net/ipv6/xfrm6_mode_tunnel.c b/net/ipv6/xfrm6_mode_tunnel.c
index 3927832227b9..b809812c8d30 100644
--- a/net/ipv6/xfrm6_mode_tunnel.c
+++ b/net/ipv6/xfrm6_mode_tunnel.c
@@ -5,6 +5,7 @@
  * Copyright (c) 2004-2006 Herbert Xu <herbert@gondor.apana.org.au>
  */
 
+#include <linux/gfp.h>
 #include <linux/init.h>
 #include <linux/kernel.h>
 #include <linux/module.h>
diff --git a/net/ipv6/xfrm6_tunnel.c b/net/ipv6/xfrm6_tunnel.c
index fa85a7d22dc4..2ce3a8278f26 100644
--- a/net/ipv6/xfrm6_tunnel.c
+++ b/net/ipv6/xfrm6_tunnel.c
@@ -23,6 +23,7 @@
  */
 #include <linux/module.h>
 #include <linux/xfrm.h>
+#include <linux/slab.h>
 #include <linux/rculist.h>
 #include <net/ip.h>
 #include <net/xfrm.h>
diff --git a/net/ipx/af_ipx.c b/net/ipx/af_ipx.c
index f9759b54a6de..da3d21c41d90 100644
--- a/net/ipx/af_ipx.c
+++ b/net/ipx/af_ipx.c
@@ -40,6 +40,7 @@
 #include <linux/net.h>
 #include <linux/netdevice.h>
 #include <linux/uio.h>
+#include <linux/slab.h>
 #include <linux/skbuff.h>
 #include <linux/smp_lock.h>
 #include <linux/socket.h>
diff --git a/net/ipx/ipx_route.c b/net/ipx/ipx_route.c
index e16c11423527..30f4519b092f 100644
--- a/net/ipx/ipx_route.c
+++ b/net/ipx/ipx_route.c
@@ -9,6 +9,7 @@
 
 #include <linux/list.h>
 #include <linux/route.h>
+#include <linux/slab.h>
 #include <linux/spinlock.h>
 
 #include <net/ipx.h>
diff --git a/net/irda/af_irda.c b/net/irda/af_irda.c
index 10093aab6173..2a4efcea3423 100644
--- a/net/irda/af_irda.c
+++ b/net/irda/af_irda.c
@@ -48,6 +48,7 @@
 #include <linux/smp_lock.h>
 #include <linux/socket.h>
 #include <linux/sockios.h>
+#include <linux/slab.h>
 #include <linux/init.h>
 #include <linux/net.h>
 #include <linux/irda.h>
diff --git a/net/irda/discovery.c b/net/irda/discovery.c
index a6f99b5a1499..c1c8ae939126 100644
--- a/net/irda/discovery.c
+++ b/net/irda/discovery.c
@@ -34,6 +34,7 @@
 #include <linux/socket.h>
 #include <linux/fs.h>
 #include <linux/seq_file.h>
+#include <linux/slab.h>
 
 #include <net/irda/irda.h>
 #include <net/irda/irlmp.h>
diff --git a/net/irda/ircomm/ircomm_core.c b/net/irda/ircomm/ircomm_core.c
index 018c92941aba..e97082017f4f 100644
--- a/net/irda/ircomm/ircomm_core.c
+++ b/net/irda/ircomm/ircomm_core.c
@@ -33,6 +33,7 @@
 #include <linux/proc_fs.h>
 #include <linux/seq_file.h>
 #include <linux/init.h>
+#include <linux/slab.h>
 
 #include <net/irda/irda.h>
 #include <net/irda/irmod.h>
diff --git a/net/irda/ircomm/ircomm_lmp.c b/net/irda/ircomm/ircomm_lmp.c
index 7ba96618660e..08fb54dc8c41 100644
--- a/net/irda/ircomm/ircomm_lmp.c
+++ b/net/irda/ircomm/ircomm_lmp.c
@@ -31,6 +31,7 @@
  ********************************************************************/
 
 #include <linux/init.h>
+#include <linux/gfp.h>
 
 #include <net/irda/irda.h>
 #include <net/irda/irlmp.h>
diff --git a/net/irda/ircomm/ircomm_param.c b/net/irda/ircomm/ircomm_param.c
index d57aefd9fe77..e2e893b474e9 100644
--- a/net/irda/ircomm/ircomm_param.c
+++ b/net/irda/ircomm/ircomm_param.c
@@ -28,6 +28,7 @@
  *
  ********************************************************************/
 
+#include <linux/gfp.h>
 #include <linux/workqueue.h>
 #include <linux/interrupt.h>
 
diff --git a/net/irda/ircomm/ircomm_tty.c b/net/irda/ircomm/ircomm_tty.c
index 8b85d774e47f..faa82ca2dfdc 100644
--- a/net/irda/ircomm/ircomm_tty.c
+++ b/net/irda/ircomm/ircomm_tty.c
@@ -33,6 +33,7 @@
 #include <linux/init.h>
 #include <linux/module.h>
 #include <linux/fs.h>
+#include <linux/slab.h>
 #include <linux/sched.h>
 #include <linux/seq_file.h>
 #include <linux/termios.h>
diff --git a/net/irda/irda_device.c b/net/irda/irda_device.c
index bf92e1473447..25cc2e695158 100644
--- a/net/irda/irda_device.c
+++ b/net/irda/irda_device.c
@@ -41,6 +41,7 @@
 #include <linux/tty.h>
 #include <linux/kmod.h>
 #include <linux/spinlock.h>
+#include <linux/slab.h>
 
 #include <asm/ioctls.h>
 #include <asm/uaccess.h>
diff --git a/net/irda/iriap.c b/net/irda/iriap.c
index 294e34d3517c..79a1e5a23e10 100644
--- a/net/irda/iriap.c
+++ b/net/irda/iriap.c
@@ -31,6 +31,7 @@
 #include <linux/string.h>
 #include <linux/init.h>
 #include <linux/seq_file.h>
+#include <linux/slab.h>
 
 #include <asm/byteorder.h>
 #include <asm/unaligned.h>
diff --git a/net/irda/iriap_event.c b/net/irda/iriap_event.c
index a301cbd93785..703774e29e32 100644
--- a/net/irda/iriap_event.c
+++ b/net/irda/iriap_event.c
@@ -24,6 +24,8 @@
  *
  ********************************************************************/
 
+#include <linux/slab.h>
+
 #include <net/irda/irda.h>
 #include <net/irda/irlmp.h>
 #include <net/irda/iriap.h>
diff --git a/net/irda/irias_object.c b/net/irda/irias_object.c
index 99ebb96f1386..f07ed9fd5792 100644
--- a/net/irda/irias_object.c
+++ b/net/irda/irias_object.c
@@ -22,6 +22,7 @@
  *
  ********************************************************************/
 
+#include <linux/slab.h>
 #include <linux/string.h>
 #include <linux/socket.h>
 #include <linux/module.h>
diff --git a/net/irda/irlan/irlan_client.c b/net/irda/irlan/irlan_client.c
index 42f7d960d055..7ed3af957935 100644
--- a/net/irda/irlan/irlan_client.c
+++ b/net/irda/irlan/irlan_client.c
@@ -28,6 +28,7 @@
 
 #include <linux/kernel.h>
 #include <linux/string.h>
+#include <linux/slab.h>
 #include <linux/errno.h>
 #include <linux/init.h>
 #include <linux/netdevice.h>
diff --git a/net/irda/irlan/irlan_common.c b/net/irda/irlan/irlan_common.c
index e486dc89ea59..a788f9e9427d 100644
--- a/net/irda/irlan/irlan_common.c
+++ b/net/irda/irlan/irlan_common.c
@@ -27,6 +27,7 @@
 
 #include <linux/kernel.h>
 #include <linux/string.h>
+#include <linux/gfp.h>
 #include <linux/init.h>
 #include <linux/errno.h>
 #include <linux/proc_fs.h>
diff --git a/net/irda/irlan/irlan_provider.c b/net/irda/irlan/irlan_provider.c
index 3f81f81b2dfa..5cf5e6c872bb 100644
--- a/net/irda/irlan/irlan_provider.c
+++ b/net/irda/irlan/irlan_provider.c
@@ -34,6 +34,7 @@
 #include <linux/init.h>
 #include <linux/random.h>
 #include <linux/bitops.h>
+#include <linux/slab.h>
 
 #include <asm/system.h>
 #include <asm/byteorder.h>
diff --git a/net/irda/irlap_event.c b/net/irda/irlap_event.c
index 94a9884d7146..d434c8880745 100644
--- a/net/irda/irlap_event.c
+++ b/net/irda/irlap_event.c
@@ -29,6 +29,7 @@
 #include <linux/kernel.h>
 #include <linux/delay.h>
 #include <linux/skbuff.h>
+#include <linux/slab.h>
 
 #include <net/irda/irda.h>
 #include <net/irda/irlap_event.h>
diff --git a/net/irda/irlap_frame.c b/net/irda/irlap_frame.c
index 7af2e74deda8..688222cbf55b 100644
--- a/net/irda/irlap_frame.c
+++ b/net/irda/irlap_frame.c
@@ -29,6 +29,7 @@
 #include <linux/if_ether.h>
 #include <linux/netdevice.h>
 #include <linux/irda.h>
+#include <linux/slab.h>
 
 #include <net/pkt_sched.h>
 #include <net/sock.h>
diff --git a/net/irda/irnet/irnet_irda.c b/net/irda/irnet/irnet_irda.c
index b26dee784aba..df18ab4b6c5e 100644
--- a/net/irda/irnet/irnet_irda.c
+++ b/net/irda/irnet/irnet_irda.c
@@ -11,6 +11,7 @@
 #include "irnet_irda.h"		/* Private header */
 #include <linux/sched.h>
 #include <linux/seq_file.h>
+#include <linux/slab.h>
 #include <asm/unaligned.h>
 
 /*
diff --git a/net/irda/irnet/irnet_ppp.c b/net/irda/irnet/irnet_ppp.c
index 6b3602de359a..6a1a202710c5 100644
--- a/net/irda/irnet/irnet_ppp.c
+++ b/net/irda/irnet/irnet_ppp.c
@@ -14,6 +14,7 @@
  */
 
 #include <linux/sched.h>
+#include <linux/slab.h>
 #include <linux/smp_lock.h>
 #include "irnet_ppp.h"		/* Private header */
 /* Please put other headers in irnet.h - Thanks */
diff --git a/net/irda/irnetlink.c b/net/irda/irnetlink.c
index 69b5b75f5431..6c7c4b92e4f8 100644
--- a/net/irda/irnetlink.c
+++ b/net/irda/irnetlink.c
@@ -15,6 +15,7 @@
 
 #include <linux/socket.h>
 #include <linux/irda.h>
+#include <linux/gfp.h>
 #include <net/net_namespace.h>
 #include <net/sock.h>
 #include <net/irda/irda.h>
diff --git a/net/irda/irqueue.c b/net/irda/irqueue.c
index ba01938becb5..849aaf0dabb5 100644
--- a/net/irda/irqueue.c
+++ b/net/irda/irqueue.c
@@ -192,6 +192,7 @@
  * Jean II
  */
 #include <linux/module.h>
+#include <linux/slab.h>
 
 #include <net/irda/irda.h>
 #include <net/irda/irqueue.h>
diff --git a/net/irda/irttp.c b/net/irda/irttp.c
index 9cb79f95bf63..47db1d8a0d92 100644
--- a/net/irda/irttp.c
+++ b/net/irda/irttp.c
@@ -28,6 +28,7 @@
 #include <linux/init.h>
 #include <linux/fs.h>
 #include <linux/seq_file.h>
+#include <linux/slab.h>
 
 #include <asm/byteorder.h>
 #include <asm/unaligned.h>
diff --git a/net/key/af_key.c b/net/key/af_key.c
index 344145f23c34..ba9a3fcc2fed 100644
--- a/net/key/af_key.c
+++ b/net/key/af_key.c
@@ -26,6 +26,7 @@
 #include <linux/in6.h>
 #include <linux/proc_fs.h>
 #include <linux/init.h>
+#include <linux/slab.h>
 #include <net/net_namespace.h>
 #include <net/netns/generic.h>
 #include <net/xfrm.h>
diff --git a/net/lapb/lapb_iface.c b/net/lapb/lapb_iface.c
index bda96d18fd98..d5d8d555c410 100644
--- a/net/lapb/lapb_iface.c
+++ b/net/lapb/lapb_iface.c
@@ -29,6 +29,7 @@
 #include <linux/inet.h>
 #include <linux/if_arp.h>
 #include <linux/skbuff.h>
+#include <linux/slab.h>
 #include <net/sock.h>
 #include <asm/uaccess.h>
 #include <asm/system.h>
diff --git a/net/lapb/lapb_in.c b/net/lapb/lapb_in.c
index 6762e7c751eb..21904a002449 100644
--- a/net/lapb/lapb_in.c
+++ b/net/lapb/lapb_in.c
@@ -27,6 +27,7 @@
 #include <linux/inet.h>
 #include <linux/netdevice.h>
 #include <linux/skbuff.h>
+#include <linux/slab.h>
 #include <net/sock.h>
 #include <asm/uaccess.h>
 #include <asm/system.h>
diff --git a/net/lapb/lapb_out.c b/net/lapb/lapb_out.c
index 339cc5f2684f..c75a79540f9f 100644
--- a/net/lapb/lapb_out.c
+++ b/net/lapb/lapb_out.c
@@ -25,6 +25,7 @@
 #include <linux/net.h>
 #include <linux/inet.h>
 #include <linux/skbuff.h>
+#include <linux/slab.h>
 #include <net/sock.h>
 #include <asm/uaccess.h>
 #include <asm/system.h>
diff --git a/net/lapb/lapb_subr.c b/net/lapb/lapb_subr.c
index b827f47ac133..43a2a7fb327b 100644
--- a/net/lapb/lapb_subr.c
+++ b/net/lapb/lapb_subr.c
@@ -24,6 +24,7 @@
 #include <linux/net.h>
 #include <linux/inet.h>
 #include <linux/skbuff.h>
+#include <linux/slab.h>
 #include <net/sock.h>
 #include <asm/uaccess.h>
 #include <asm/system.h>
diff --git a/net/llc/af_llc.c b/net/llc/af_llc.c
index e35d907fba2c..2db6a9f75913 100644
--- a/net/llc/af_llc.c
+++ b/net/llc/af_llc.c
@@ -25,6 +25,7 @@
 #include <linux/module.h>
 #include <linux/rtnetlink.h>
 #include <linux/init.h>
+#include <linux/slab.h>
 #include <net/llc.h>
 #include <net/llc_sap.h>
 #include <net/llc_pdu.h>
diff --git a/net/llc/llc_c_ac.c b/net/llc/llc_c_ac.c
index 86d6985b9d49..ea225bd2672c 100644
--- a/net/llc/llc_c_ac.c
+++ b/net/llc/llc_c_ac.c
@@ -18,6 +18,7 @@
  * See the GNU General Public License for more details.
  */
 #include <linux/netdevice.h>
+#include <linux/slab.h>
 #include <net/llc_conn.h>
 #include <net/llc_sap.h>
 #include <net/sock.h>
diff --git a/net/llc/llc_conn.c b/net/llc/llc_conn.c
index a12144da7974..ba137a6a224d 100644
--- a/net/llc/llc_conn.c
+++ b/net/llc/llc_conn.c
@@ -13,6 +13,7 @@
  */
 
 #include <linux/init.h>
+#include <linux/slab.h>
 #include <net/llc_sap.h>
 #include <net/llc_conn.h>
 #include <net/sock.h>
diff --git a/net/llc/llc_if.c b/net/llc/llc_if.c
index a89917130a7b..25c31c0a3fdb 100644
--- a/net/llc/llc_if.c
+++ b/net/llc/llc_if.c
@@ -11,6 +11,7 @@
  *
  * See the GNU General Public License for more details.
  */
+#include <linux/gfp.h>
 #include <linux/module.h>
 #include <linux/kernel.h>
 #include <linux/netdevice.h>
diff --git a/net/llc/llc_input.c b/net/llc/llc_input.c
index 57ad974e4d94..f99687439139 100644
--- a/net/llc/llc_input.c
+++ b/net/llc/llc_input.c
@@ -12,6 +12,7 @@
  * See the GNU General Public License for more details.
  */
 #include <linux/netdevice.h>
+#include <linux/slab.h>
 #include <net/net_namespace.h>
 #include <net/llc.h>
 #include <net/llc_pdu.h>
diff --git a/net/llc/llc_sap.c b/net/llc/llc_sap.c
index ad6e6e1cf22f..a432f0ec051c 100644
--- a/net/llc/llc_sap.c
+++ b/net/llc/llc_sap.c
@@ -23,6 +23,7 @@
 #include <net/sock.h>
 #include <net/tcp_states.h>
 #include <linux/llc.h>
+#include <linux/slab.h>
 
 static int llc_mac_header_len(unsigned short devtype)
 {
diff --git a/net/llc/llc_station.c b/net/llc/llc_station.c
index 83da13339490..e4dae0244d76 100644
--- a/net/llc/llc_station.c
+++ b/net/llc/llc_station.c
@@ -13,6 +13,7 @@
  */
 #include <linux/init.h>
 #include <linux/module.h>
+#include <linux/slab.h>
 #include <net/llc.h>
 #include <net/llc_sap.h>
 #include <net/llc_conn.h>
diff --git a/net/mac80211/agg-rx.c b/net/mac80211/agg-rx.c
index a978e666ed6f..f9516a27e233 100644
--- a/net/mac80211/agg-rx.c
+++ b/net/mac80211/agg-rx.c
@@ -14,6 +14,7 @@
  */
 
 #include <linux/ieee80211.h>
+#include <linux/slab.h>
 #include <net/mac80211.h>
 #include "ieee80211_i.h"
 #include "driver-ops.h"
diff --git a/net/mac80211/agg-tx.c b/net/mac80211/agg-tx.c
index 5538e1b4a697..96d25348aa59 100644
--- a/net/mac80211/agg-tx.c
+++ b/net/mac80211/agg-tx.c
@@ -14,6 +14,7 @@
  */
 
 #include <linux/ieee80211.h>
+#include <linux/slab.h>
 #include <net/mac80211.h>
 #include "ieee80211_i.h"
 #include "driver-ops.h"
diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c
index b7116ef84a3b..edc872e22c9b 100644
--- a/net/mac80211/cfg.c
+++ b/net/mac80211/cfg.c
@@ -9,6 +9,7 @@
 #include <linux/ieee80211.h>
 #include <linux/nl80211.h>
 #include <linux/rtnetlink.h>
+#include <linux/slab.h>
 #include <net/net_namespace.h>
 #include <linux/rcupdate.h>
 #include <net/cfg80211.h>
diff --git a/net/mac80211/debugfs_key.c b/net/mac80211/debugfs_key.c
index d12e743cb4e1..97c9e46e859e 100644
--- a/net/mac80211/debugfs_key.c
+++ b/net/mac80211/debugfs_key.c
@@ -9,6 +9,7 @@
  */
 
 #include <linux/kobject.h>
+#include <linux/slab.h>
 #include "ieee80211_i.h"
 #include "key.h"
 #include "debugfs.h"
diff --git a/net/mac80211/debugfs_netdev.c b/net/mac80211/debugfs_netdev.c
index b4ddb2f83914..83d4289d954b 100644
--- a/net/mac80211/debugfs_netdev.c
+++ b/net/mac80211/debugfs_netdev.c
@@ -13,6 +13,7 @@
 #include <linux/interrupt.h>
 #include <linux/netdevice.h>
 #include <linux/rtnetlink.h>
+#include <linux/slab.h>
 #include <linux/notifier.h>
 #include <net/mac80211.h>
 #include <net/cfg80211.h>
diff --git a/net/mac80211/ibss.c b/net/mac80211/ibss.c
index f3e942486749..e2976da4e0d9 100644
--- a/net/mac80211/ibss.c
+++ b/net/mac80211/ibss.c
@@ -13,6 +13,7 @@
  */
 
 #include <linux/delay.h>
+#include <linux/slab.h>
 #include <linux/if_ether.h>
 #include <linux/skbuff.h>
 #include <linux/if_arp.h>
diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c
index 0793d7a8d743..e08fa8eda1b3 100644
--- a/net/mac80211/iface.c
+++ b/net/mac80211/iface.c
@@ -10,6 +10,7 @@
  * it under the terms of the GNU General Public License version 2 as
  * published by the Free Software Foundation.
  */
+#include <linux/slab.h>
 #include <linux/kernel.h>
 #include <linux/if_arp.h>
 #include <linux/netdevice.h>
diff --git a/net/mac80211/key.c b/net/mac80211/key.c
index 8160d9c5372e..e8f6e3b252d8 100644
--- a/net/mac80211/key.c
+++ b/net/mac80211/key.c
@@ -14,6 +14,7 @@
 #include <linux/list.h>
 #include <linux/rcupdate.h>
 #include <linux/rtnetlink.h>
+#include <linux/slab.h>
 #include <net/mac80211.h>
 #include "ieee80211_i.h"
 #include "driver-ops.h"
diff --git a/net/mac80211/led.c b/net/mac80211/led.c
index 162a643f16b6..063aad944246 100644
--- a/net/mac80211/led.c
+++ b/net/mac80211/led.c
@@ -8,6 +8,7 @@
 
 /* just for IFNAMSIZ */
 #include <linux/if.h>
+#include <linux/slab.h>
 #include "led.h"
 
 void ieee80211_led_rx(struct ieee80211_local *local)
diff --git a/net/mac80211/mesh.c b/net/mac80211/mesh.c
index 61080c5fad50..58e3e3a61d99 100644
--- a/net/mac80211/mesh.c
+++ b/net/mac80211/mesh.c
@@ -8,6 +8,7 @@
  * published by the Free Software Foundation.
  */
 
+#include <linux/slab.h>
 #include <asm/unaligned.h>
 #include "ieee80211_i.h"
 #include "mesh.h"
diff --git a/net/mac80211/mesh_hwmp.c b/net/mac80211/mesh_hwmp.c
index ce84237ebad3..122c11380ffe 100644
--- a/net/mac80211/mesh_hwmp.c
+++ b/net/mac80211/mesh_hwmp.c
@@ -7,6 +7,7 @@
  * published by the Free Software Foundation.
  */
 
+#include <linux/slab.h>
 #include "mesh.h"
 
 #ifdef CONFIG_MAC80211_VERBOSE_MHWMP_DEBUG
diff --git a/net/mac80211/mesh_pathtbl.c b/net/mac80211/mesh_pathtbl.c
index 2312efe04c62..181ffd6efd81 100644
--- a/net/mac80211/mesh_pathtbl.c
+++ b/net/mac80211/mesh_pathtbl.c
@@ -10,6 +10,7 @@
 #include <linux/etherdevice.h>
 #include <linux/list.h>
 #include <linux/random.h>
+#include <linux/slab.h>
 #include <linux/spinlock.h>
 #include <linux/string.h>
 #include <net/mac80211.h>
diff --git a/net/mac80211/mesh_plink.c b/net/mac80211/mesh_plink.c
index 1a29c4a8139e..7b7080e2b49f 100644
--- a/net/mac80211/mesh_plink.c
+++ b/net/mac80211/mesh_plink.c
@@ -6,6 +6,7 @@
  * it under the terms of the GNU General Public License version 2 as
  * published by the Free Software Foundation.
  */
+#include <linux/gfp.h>
 #include <linux/kernel.h>
 #include <linux/random.h>
 #include "ieee80211_i.h"
diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index be5f723d643a..c8cd169fc10e 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -19,6 +19,7 @@
 #include <linux/rtnetlink.h>
 #include <linux/pm_qos_params.h>
 #include <linux/crc32.h>
+#include <linux/slab.h>
 #include <net/mac80211.h>
 #include <asm/unaligned.h>
 
diff --git a/net/mac80211/rate.c b/net/mac80211/rate.c
index 0b299d236fa1..6d0bd198af19 100644
--- a/net/mac80211/rate.c
+++ b/net/mac80211/rate.c
@@ -10,6 +10,7 @@
 
 #include <linux/kernel.h>
 #include <linux/rtnetlink.h>
+#include <linux/slab.h>
 #include "rate.h"
 #include "ieee80211_i.h"
 #include "debugfs.h"
diff --git a/net/mac80211/rc80211_minstrel.c b/net/mac80211/rc80211_minstrel.c
index 6e5d68b4e427..818abfae9007 100644
--- a/net/mac80211/rc80211_minstrel.c
+++ b/net/mac80211/rc80211_minstrel.c
@@ -50,6 +50,7 @@
 #include <linux/debugfs.h>
 #include <linux/random.h>
 #include <linux/ieee80211.h>
+#include <linux/slab.h>
 #include <net/mac80211.h>
 #include "rate.h"
 #include "rc80211_minstrel.h"
diff --git a/net/mac80211/rc80211_minstrel_debugfs.c b/net/mac80211/rc80211_minstrel_debugfs.c
index a715d9454f64..0e1f12b1b6dd 100644
--- a/net/mac80211/rc80211_minstrel_debugfs.c
+++ b/net/mac80211/rc80211_minstrel_debugfs.c
@@ -49,6 +49,7 @@
 #include <linux/skbuff.h>
 #include <linux/debugfs.h>
 #include <linux/ieee80211.h>
+#include <linux/slab.h>
 #include <net/mac80211.h>
 #include "rc80211_minstrel.h"
 
diff --git a/net/mac80211/rc80211_pid_algo.c b/net/mac80211/rc80211_pid_algo.c
index 2652a374974e..aeda65466f3e 100644
--- a/net/mac80211/rc80211_pid_algo.c
+++ b/net/mac80211/rc80211_pid_algo.c
@@ -13,6 +13,7 @@
 #include <linux/types.h>
 #include <linux/skbuff.h>
 #include <linux/debugfs.h>
+#include <linux/slab.h>
 #include <net/mac80211.h>
 #include "rate.h"
 #include "mesh.h"
diff --git a/net/mac80211/rc80211_pid_debugfs.c b/net/mac80211/rc80211_pid_debugfs.c
index 45667054a5f3..47438b4a9af5 100644
--- a/net/mac80211/rc80211_pid_debugfs.c
+++ b/net/mac80211/rc80211_pid_debugfs.c
@@ -12,6 +12,7 @@
 #include <linux/netdevice.h>
 #include <linux/types.h>
 #include <linux/skbuff.h>
+#include <linux/slab.h>
 
 #include <net/mac80211.h>
 #include "rate.h"
diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c
index b5c48de81d8b..f0accf622cd7 100644
--- a/net/mac80211/rx.c
+++ b/net/mac80211/rx.c
@@ -10,6 +10,7 @@
  */
 
 #include <linux/jiffies.h>
+#include <linux/slab.h>
 #include <linux/kernel.h>
 #include <linux/skbuff.h>
 #include <linux/netdevice.h>
diff --git a/net/mac80211/scan.c b/net/mac80211/scan.c
index b822dce97867..85507bd9e341 100644
--- a/net/mac80211/scan.c
+++ b/net/mac80211/scan.c
@@ -14,6 +14,7 @@
 
 #include <linux/if_arp.h>
 #include <linux/rtnetlink.h>
+#include <linux/slab.h>
 #include <net/mac80211.h>
 
 #include "ieee80211_i.h"
diff --git a/net/mac80211/wep.c b/net/mac80211/wep.c
index 5d745f2d7236..5f3a4113bda1 100644
--- a/net/mac80211/wep.c
+++ b/net/mac80211/wep.c
@@ -17,6 +17,7 @@
 #include <linux/err.h>
 #include <linux/mm.h>
 #include <linux/scatterlist.h>
+#include <linux/slab.h>
 #include <asm/unaligned.h>
 
 #include <net/mac80211.h>
diff --git a/net/mac80211/work.c b/net/mac80211/work.c
index 1e1ea3007b06..15e1ba931b87 100644
--- a/net/mac80211/work.c
+++ b/net/mac80211/work.c
@@ -19,6 +19,7 @@
 #include <linux/if_arp.h>
 #include <linux/etherdevice.h>
 #include <linux/crc32.h>
+#include <linux/slab.h>
 #include <net/mac80211.h>
 #include <asm/unaligned.h>
 
diff --git a/net/mac80211/wpa.c b/net/mac80211/wpa.c
index f4971cd45c64..0adbcc941ac9 100644
--- a/net/mac80211/wpa.c
+++ b/net/mac80211/wpa.c
@@ -9,10 +9,10 @@
 
 #include <linux/netdevice.h>
 #include <linux/types.h>
-#include <linux/slab.h>
 #include <linux/skbuff.h>
 #include <linux/compiler.h>
 #include <linux/ieee80211.h>
+#include <linux/gfp.h>
 #include <asm/unaligned.h>
 #include <net/mac80211.h>
 
diff --git a/net/netfilter/core.c b/net/netfilter/core.c
index 60ec4e4badaa..78b505d33bfb 100644
--- a/net/netfilter/core.c
+++ b/net/netfilter/core.c
@@ -19,6 +19,7 @@
 #include <linux/inetdevice.h>
 #include <linux/proc_fs.h>
 #include <linux/mutex.h>
+#include <linux/slab.h>
 #include <net/net_namespace.h>
 #include <net/sock.h>
 
diff --git a/net/netfilter/ipvs/ip_vs_app.c b/net/netfilter/ipvs/ip_vs_app.c
index 3c7e42735b60..1cb0e834f8ff 100644
--- a/net/netfilter/ipvs/ip_vs_app.c
+++ b/net/netfilter/ipvs/ip_vs_app.c
@@ -27,6 +27,7 @@
 #include <linux/in.h>
 #include <linux/ip.h>
 #include <linux/netfilter.h>
+#include <linux/slab.h>
 #include <net/net_namespace.h>
 #include <net/protocol.h>
 #include <net/tcp.h>
diff --git a/net/netfilter/ipvs/ip_vs_conn.c b/net/netfilter/ipvs/ip_vs_conn.c
index 60bb41a8d8d4..d8f7e8ef67b4 100644
--- a/net/netfilter/ipvs/ip_vs_conn.c
+++ b/net/netfilter/ipvs/ip_vs_conn.c
@@ -32,6 +32,7 @@
 #include <linux/module.h>
 #include <linux/vmalloc.h>
 #include <linux/proc_fs.h>		/* for proc_net_* */
+#include <linux/slab.h>
 #include <linux/seq_file.h>
 #include <linux/jhash.h>
 #include <linux/random.h>
diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c
index 44590887a92c..1cd6e3fd058b 100644
--- a/net/netfilter/ipvs/ip_vs_core.c
+++ b/net/netfilter/ipvs/ip_vs_core.c
@@ -33,6 +33,7 @@
 #include <linux/tcp.h>
 #include <linux/sctp.h>
 #include <linux/icmp.h>
+#include <linux/slab.h>
 
 #include <net/ip.h>
 #include <net/tcp.h>
diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c
index 7ee9c3426f44..36dc1d88c2fa 100644
--- a/net/netfilter/ipvs/ip_vs_ctl.c
+++ b/net/netfilter/ipvs/ip_vs_ctl.c
@@ -31,6 +31,7 @@
 #include <linux/workqueue.h>
 #include <linux/swap.h>
 #include <linux/seq_file.h>
+#include <linux/slab.h>
 
 #include <linux/netfilter.h>
 #include <linux/netfilter_ipv4.h>
diff --git a/net/netfilter/ipvs/ip_vs_dh.c b/net/netfilter/ipvs/ip_vs_dh.c
index fe3e18834b91..95fd0d14200b 100644
--- a/net/netfilter/ipvs/ip_vs_dh.c
+++ b/net/netfilter/ipvs/ip_vs_dh.c
@@ -39,6 +39,7 @@
 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
 
 #include <linux/ip.h>
+#include <linux/slab.h>
 #include <linux/module.h>
 #include <linux/kernel.h>
 #include <linux/skbuff.h>
diff --git a/net/netfilter/ipvs/ip_vs_est.c b/net/netfilter/ipvs/ip_vs_est.c
index 702b53ca937c..ff28801962e0 100644
--- a/net/netfilter/ipvs/ip_vs_est.c
+++ b/net/netfilter/ipvs/ip_vs_est.c
@@ -17,7 +17,6 @@
 
 #include <linux/kernel.h>
 #include <linux/jiffies.h>
-#include <linux/slab.h>
 #include <linux/types.h>
 #include <linux/interrupt.h>
 #include <linux/sysctl.h>
diff --git a/net/netfilter/ipvs/ip_vs_ftp.c b/net/netfilter/ipvs/ip_vs_ftp.c
index 73f38ea98f25..2c7f185dfae4 100644
--- a/net/netfilter/ipvs/ip_vs_ftp.c
+++ b/net/netfilter/ipvs/ip_vs_ftp.c
@@ -32,6 +32,7 @@
 #include <linux/in.h>
 #include <linux/ip.h>
 #include <linux/netfilter.h>
+#include <linux/gfp.h>
 #include <net/protocol.h>
 #include <net/tcp.h>
 #include <asm/unaligned.h>
diff --git a/net/netfilter/ipvs/ip_vs_lblc.c b/net/netfilter/ipvs/ip_vs_lblc.c
index 1b9370db2305..94a45213faa6 100644
--- a/net/netfilter/ipvs/ip_vs_lblc.c
+++ b/net/netfilter/ipvs/ip_vs_lblc.c
@@ -43,6 +43,7 @@
 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
 
 #include <linux/ip.h>
+#include <linux/slab.h>
 #include <linux/module.h>
 #include <linux/kernel.h>
 #include <linux/skbuff.h>
diff --git a/net/netfilter/ipvs/ip_vs_lblcr.c b/net/netfilter/ipvs/ip_vs_lblcr.c
index caa58fa1438a..535dc2b419d8 100644
--- a/net/netfilter/ipvs/ip_vs_lblcr.c
+++ b/net/netfilter/ipvs/ip_vs_lblcr.c
@@ -46,6 +46,7 @@
 #include <linux/skbuff.h>
 #include <linux/jiffies.h>
 #include <linux/list.h>
+#include <linux/slab.h>
 
 /* for sysctl */
 #include <linux/fs.h>
diff --git a/net/netfilter/ipvs/ip_vs_proto.c b/net/netfilter/ipvs/ip_vs_proto.c
index 0e584553819d..7fc49f4cf5ad 100644
--- a/net/netfilter/ipvs/ip_vs_proto.c
+++ b/net/netfilter/ipvs/ip_vs_proto.c
@@ -19,6 +19,7 @@
 #include <linux/module.h>
 #include <linux/kernel.h>
 #include <linux/skbuff.h>
+#include <linux/gfp.h>
 #include <linux/in.h>
 #include <linux/ip.h>
 #include <net/protocol.h>
diff --git a/net/netfilter/ipvs/ip_vs_sh.c b/net/netfilter/ipvs/ip_vs_sh.c
index 8e6cfd36e6f0..e6cc174fbc06 100644
--- a/net/netfilter/ipvs/ip_vs_sh.c
+++ b/net/netfilter/ipvs/ip_vs_sh.c
@@ -36,6 +36,7 @@
 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
 
 #include <linux/ip.h>
+#include <linux/slab.h>
 #include <linux/module.h>
 #include <linux/kernel.h>
 #include <linux/skbuff.h>
diff --git a/net/netfilter/ipvs/ip_vs_wrr.c b/net/netfilter/ipvs/ip_vs_wrr.c
index 3c115fc19784..30db633f88f1 100644
--- a/net/netfilter/ipvs/ip_vs_wrr.c
+++ b/net/netfilter/ipvs/ip_vs_wrr.c
@@ -23,6 +23,7 @@
 
 #include <linux/module.h>
 #include <linux/kernel.h>
+#include <linux/slab.h>
 #include <linux/net.h>
 #include <linux/gcd.h>
 
diff --git a/net/netfilter/ipvs/ip_vs_xmit.c b/net/netfilter/ipvs/ip_vs_xmit.c
index 223b5018c7dc..e450cd6f4eb5 100644
--- a/net/netfilter/ipvs/ip_vs_xmit.c
+++ b/net/netfilter/ipvs/ip_vs_xmit.c
@@ -17,6 +17,7 @@
 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
 
 #include <linux/kernel.h>
+#include <linux/slab.h>
 #include <linux/tcp.h>                  /* for tcphdr */
 #include <net/ip.h>
 #include <net/tcp.h>                    /* for csum_tcpudp_magic */
diff --git a/net/netfilter/nf_conntrack_acct.c b/net/netfilter/nf_conntrack_acct.c
index 018f90db511c..ab81b380eae6 100644
--- a/net/netfilter/nf_conntrack_acct.c
+++ b/net/netfilter/nf_conntrack_acct.c
@@ -9,6 +9,7 @@
  */
 
 #include <linux/netfilter.h>
+#include <linux/slab.h>
 #include <linux/kernel.h>
 #include <linux/moduleparam.h>
 
diff --git a/net/netfilter/nf_conntrack_amanda.c b/net/netfilter/nf_conntrack_amanda.c
index 07d9d8857e5d..372e80f07a81 100644
--- a/net/netfilter/nf_conntrack_amanda.c
+++ b/net/netfilter/nf_conntrack_amanda.c
@@ -16,6 +16,7 @@
 #include <linux/in.h>
 #include <linux/udp.h>
 #include <linux/netfilter.h>
+#include <linux/gfp.h>
 
 #include <net/netfilter/nf_conntrack.h>
 #include <net/netfilter/nf_conntrack_expect.h>
diff --git a/net/netfilter/nf_conntrack_ecache.c b/net/netfilter/nf_conntrack_ecache.c
index d5a9bcd7d61b..f516961a83b4 100644
--- a/net/netfilter/nf_conntrack_ecache.c
+++ b/net/netfilter/nf_conntrack_ecache.c
@@ -18,6 +18,7 @@
 #include <linux/percpu.h>
 #include <linux/kernel.h>
 #include <linux/netdevice.h>
+#include <linux/slab.h>
 
 #include <net/netfilter/nf_conntrack.h>
 #include <net/netfilter/nf_conntrack_core.h>
diff --git a/net/netfilter/nf_conntrack_ftp.c b/net/netfilter/nf_conntrack_ftp.c
index f0732aa18e4f..2ae3169e7633 100644
--- a/net/netfilter/nf_conntrack_ftp.c
+++ b/net/netfilter/nf_conntrack_ftp.c
@@ -13,6 +13,7 @@
 #include <linux/moduleparam.h>
 #include <linux/netfilter.h>
 #include <linux/ip.h>
+#include <linux/slab.h>
 #include <linux/ipv6.h>
 #include <linux/ctype.h>
 #include <linux/inet.h>
diff --git a/net/netfilter/nf_conntrack_h323_main.c b/net/netfilter/nf_conntrack_h323_main.c
index a1c8dd917e12..a487c8038044 100644
--- a/net/netfilter/nf_conntrack_h323_main.c
+++ b/net/netfilter/nf_conntrack_h323_main.c
@@ -17,6 +17,7 @@
 #include <linux/inet.h>
 #include <linux/in.h>
 #include <linux/ip.h>
+#include <linux/slab.h>
 #include <linux/udp.h>
 #include <linux/tcp.h>
 #include <linux/skbuff.h>
diff --git a/net/netfilter/nf_conntrack_helper.c b/net/netfilter/nf_conntrack_helper.c
index 4509fa6726f8..59e1a4cd4e8b 100644
--- a/net/netfilter/nf_conntrack_helper.c
+++ b/net/netfilter/nf_conntrack_helper.c
@@ -15,7 +15,6 @@
 #include <linux/skbuff.h>
 #include <linux/vmalloc.h>
 #include <linux/stddef.h>
-#include <linux/slab.h>
 #include <linux/random.h>
 #include <linux/err.h>
 #include <linux/kernel.h>
diff --git a/net/netfilter/nf_conntrack_irc.c b/net/netfilter/nf_conntrack_irc.c
index 8bd98c84f77e..7673930ca342 100644
--- a/net/netfilter/nf_conntrack_irc.c
+++ b/net/netfilter/nf_conntrack_irc.c
@@ -15,6 +15,7 @@
 #include <linux/ip.h>
 #include <linux/tcp.h>
 #include <linux/netfilter.h>
+#include <linux/slab.h>
 
 #include <net/netfilter/nf_conntrack.h>
 #include <net/netfilter/nf_conntrack_expect.h>
diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index 569410a85953..afc52f2ee4ac 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -27,6 +27,7 @@
 #include <linux/netlink.h>
 #include <linux/spinlock.h>
 #include <linux/interrupt.h>
+#include <linux/slab.h>
 
 #include <linux/netfilter.h>
 #include <net/netlink.h>
diff --git a/net/netfilter/nf_conntrack_proto.c b/net/netfilter/nf_conntrack_proto.c
index 1a4568bf7ea5..a44fa75b5178 100644
--- a/net/netfilter/nf_conntrack_proto.c
+++ b/net/netfilter/nf_conntrack_proto.c
@@ -12,6 +12,7 @@
 #include <linux/types.h>
 #include <linux/netfilter.h>
 #include <linux/module.h>
+#include <linux/slab.h>
 #include <linux/mutex.h>
 #include <linux/skbuff.h>
 #include <linux/vmalloc.h>
diff --git a/net/netfilter/nf_conntrack_proto_dccp.c b/net/netfilter/nf_conntrack_proto_dccp.c
index 9a2815549375..5292560d6d4a 100644
--- a/net/netfilter/nf_conntrack_proto_dccp.c
+++ b/net/netfilter/nf_conntrack_proto_dccp.c
@@ -15,6 +15,7 @@
 #include <linux/spinlock.h>
 #include <linux/skbuff.h>
 #include <linux/dccp.h>
+#include <linux/slab.h>
 
 #include <net/net_namespace.h>
 #include <net/netns/generic.h>
diff --git a/net/netfilter/nf_conntrack_proto_gre.c b/net/netfilter/nf_conntrack_proto_gre.c
index d899b1a69940..cf616e55ca41 100644
--- a/net/netfilter/nf_conntrack_proto_gre.c
+++ b/net/netfilter/nf_conntrack_proto_gre.c
@@ -31,6 +31,7 @@
 #include <linux/in.h>
 #include <linux/netdevice.h>
 #include <linux/skbuff.h>
+#include <linux/slab.h>
 #include <net/dst.h>
 #include <net/net_namespace.h>
 #include <net/netns/generic.h>
diff --git a/net/netfilter/nf_conntrack_sane.c b/net/netfilter/nf_conntrack_sane.c
index dcfecbb81c46..d9e27734b2a2 100644
--- a/net/netfilter/nf_conntrack_sane.c
+++ b/net/netfilter/nf_conntrack_sane.c
@@ -20,6 +20,7 @@
 #include <linux/module.h>
 #include <linux/moduleparam.h>
 #include <linux/netfilter.h>
+#include <linux/slab.h>
 #include <linux/in.h>
 #include <linux/tcp.h>
 #include <net/netfilter/nf_conntrack.h>
diff --git a/net/netfilter/nf_conntrack_standalone.c b/net/netfilter/nf_conntrack_standalone.c
index 24a42efe62ef..faa8eb3722b9 100644
--- a/net/netfilter/nf_conntrack_standalone.c
+++ b/net/netfilter/nf_conntrack_standalone.c
@@ -8,6 +8,7 @@
 
 #include <linux/types.h>
 #include <linux/netfilter.h>
+#include <linux/slab.h>
 #include <linux/module.h>
 #include <linux/skbuff.h>
 #include <linux/proc_fs.h>
diff --git a/net/netfilter/nf_queue.c b/net/netfilter/nf_queue.c
index ba095fd014e5..c49ef219899e 100644
--- a/net/netfilter/nf_queue.c
+++ b/net/netfilter/nf_queue.c
@@ -1,4 +1,5 @@
 #include <linux/kernel.h>
+#include <linux/slab.h>
 #include <linux/init.h>
 #include <linux/module.h>
 #include <linux/proc_fs.h>
diff --git a/net/netfilter/nfnetlink_log.c b/net/netfilter/nfnetlink_log.c
index d9b8fb8ab340..203643fb2c52 100644
--- a/net/netfilter/nfnetlink_log.c
+++ b/net/netfilter/nfnetlink_log.c
@@ -28,6 +28,7 @@
 #include <linux/list.h>
 #include <linux/jhash.h>
 #include <linux/random.h>
+#include <linux/slab.h>
 #include <net/sock.h>
 #include <net/netfilter/nf_log.h>
 #include <net/netfilter/nfnetlink_log.h>
diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c
index 7ba4abc405c9..e70a6ef1f4f2 100644
--- a/net/netfilter/nfnetlink_queue.c
+++ b/net/netfilter/nfnetlink_queue.c
@@ -18,6 +18,7 @@
 #include <linux/skbuff.h>
 #include <linux/init.h>
 #include <linux/spinlock.h>
+#include <linux/slab.h>
 #include <linux/notifier.h>
 #include <linux/netdevice.h>
 #include <linux/netfilter.h>
diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c
index 0a12cedfe9e3..665f5beef6ad 100644
--- a/net/netfilter/x_tables.c
+++ b/net/netfilter/x_tables.c
@@ -22,6 +22,7 @@
 #include <linux/vmalloc.h>
 #include <linux/mutex.h>
 #include <linux/mm.h>
+#include <linux/slab.h>
 #include <net/net_namespace.h>
 
 #include <linux/netfilter/x_tables.h>
diff --git a/net/netfilter/xt_CT.c b/net/netfilter/xt_CT.c
index 61c50fa84703..ee18b231b950 100644
--- a/net/netfilter/xt_CT.c
+++ b/net/netfilter/xt_CT.c
@@ -7,6 +7,7 @@
  */
 
 #include <linux/module.h>
+#include <linux/gfp.h>
 #include <linux/skbuff.h>
 #include <linux/selinux.h>
 #include <linux/netfilter_ipv4/ip_tables.h>
diff --git a/net/netfilter/xt_LED.c b/net/netfilter/xt_LED.c
index 8ff7843bb921..3271c8e52153 100644
--- a/net/netfilter/xt_LED.c
+++ b/net/netfilter/xt_LED.c
@@ -22,6 +22,7 @@
 #include <linux/module.h>
 #include <linux/skbuff.h>
 #include <linux/netfilter/x_tables.h>
+#include <linux/slab.h>
 #include <linux/leds.h>
 #include <linux/mutex.h>
 
diff --git a/net/netfilter/xt_RATEEST.c b/net/netfilter/xt_RATEEST.c
index 87ae97e5516f..d16d55df4f61 100644
--- a/net/netfilter/xt_RATEEST.c
+++ b/net/netfilter/xt_RATEEST.c
@@ -11,6 +11,7 @@
 #include <linux/jhash.h>
 #include <linux/rtnetlink.h>
 #include <linux/random.h>
+#include <linux/slab.h>
 #include <net/gen_stats.h>
 #include <net/netlink.h>
 
diff --git a/net/netfilter/xt_TCPMSS.c b/net/netfilter/xt_TCPMSS.c
index 0e357ac9a2a8..c5f4b9919e9a 100644
--- a/net/netfilter/xt_TCPMSS.c
+++ b/net/netfilter/xt_TCPMSS.c
@@ -11,6 +11,7 @@
 #include <linux/module.h>
 #include <linux/skbuff.h>
 #include <linux/ip.h>
+#include <linux/gfp.h>
 #include <linux/ipv6.h>
 #include <linux/tcp.h>
 #include <net/dst.h>
diff --git a/net/netfilter/xt_connlimit.c b/net/netfilter/xt_connlimit.c
index 26997ce90e48..388ca4596098 100644
--- a/net/netfilter/xt_connlimit.c
+++ b/net/netfilter/xt_connlimit.c
@@ -17,6 +17,7 @@
 #include <linux/ip.h>
 #include <linux/ipv6.h>
 #include <linux/jhash.h>
+#include <linux/slab.h>
 #include <linux/list.h>
 #include <linux/module.h>
 #include <linux/random.h>
diff --git a/net/netfilter/xt_dccp.c b/net/netfilter/xt_dccp.c
index 0989f29ade2e..395af5943ffd 100644
--- a/net/netfilter/xt_dccp.c
+++ b/net/netfilter/xt_dccp.c
@@ -10,6 +10,7 @@
 
 #include <linux/module.h>
 #include <linux/skbuff.h>
+#include <linux/slab.h>
 #include <linux/spinlock.h>
 #include <net/ip.h>
 #include <linux/dccp.h>
diff --git a/net/netfilter/xt_limit.c b/net/netfilter/xt_limit.c
index a0ca5339af41..e5d7e1ffb1a4 100644
--- a/net/netfilter/xt_limit.c
+++ b/net/netfilter/xt_limit.c
@@ -6,6 +6,7 @@
  * published by the Free Software Foundation.
  */
 
+#include <linux/slab.h>
 #include <linux/module.h>
 #include <linux/skbuff.h>
 #include <linux/spinlock.h>
diff --git a/net/netfilter/xt_quota.c b/net/netfilter/xt_quota.c
index 390b7d09fe51..2d5562498c43 100644
--- a/net/netfilter/xt_quota.c
+++ b/net/netfilter/xt_quota.c
@@ -4,6 +4,7 @@
  * Sam Johnston <samj@samj.net>
  */
 #include <linux/skbuff.h>
+#include <linux/slab.h>
 #include <linux/spinlock.h>
 
 #include <linux/netfilter/x_tables.h>
diff --git a/net/netfilter/xt_recent.c b/net/netfilter/xt_recent.c
index 971d172afece..834b736857cb 100644
--- a/net/netfilter/xt_recent.c
+++ b/net/netfilter/xt_recent.c
@@ -27,6 +27,7 @@
 #include <linux/bitops.h>
 #include <linux/skbuff.h>
 #include <linux/inet.h>
+#include <linux/slab.h>
 #include <net/net_namespace.h>
 #include <net/netns/generic.h>
 
diff --git a/net/netfilter/xt_statistic.c b/net/netfilter/xt_statistic.c
index d8c0f8f1a78e..937ce0633e99 100644
--- a/net/netfilter/xt_statistic.c
+++ b/net/netfilter/xt_statistic.c
@@ -12,6 +12,7 @@
 #include <linux/spinlock.h>
 #include <linux/skbuff.h>
 #include <linux/net.h>
+#include <linux/slab.h>
 
 #include <linux/netfilter/xt_statistic.h>
 #include <linux/netfilter/x_tables.h>
diff --git a/net/netfilter/xt_string.c b/net/netfilter/xt_string.c
index b4d774111311..96801ffd8af8 100644
--- a/net/netfilter/xt_string.c
+++ b/net/netfilter/xt_string.c
@@ -7,6 +7,7 @@
  * published by the Free Software Foundation.
  */
 
+#include <linux/gfp.h>
 #include <linux/init.h>
 #include <linux/module.h>
 #include <linux/kernel.h>
diff --git a/net/netlabel/netlabel_cipso_v4.c b/net/netlabel/netlabel_cipso_v4.c
index e639298bc9c8..5f14c8462e30 100644
--- a/net/netlabel/netlabel_cipso_v4.c
+++ b/net/netlabel/netlabel_cipso_v4.c
@@ -33,6 +33,7 @@
 #include <linux/string.h>
 #include <linux/skbuff.h>
 #include <linux/audit.h>
+#include <linux/slab.h>
 #include <net/sock.h>
 #include <net/netlink.h>
 #include <net/genetlink.h>
diff --git a/net/netlabel/netlabel_domainhash.c b/net/netlabel/netlabel_domainhash.c
index 0bfeaab88ef5..016ab9c75ebd 100644
--- a/net/netlabel/netlabel_domainhash.c
+++ b/net/netlabel/netlabel_domainhash.c
@@ -35,6 +35,7 @@
 #include <linux/spinlock.h>
 #include <linux/string.h>
 #include <linux/audit.h>
+#include <linux/slab.h>
 #include <net/netlabel.h>
 #include <net/cipso_ipv4.h>
 #include <asm/bug.h>
diff --git a/net/netlabel/netlabel_kapi.c b/net/netlabel/netlabel_kapi.c
index 6ce00205f342..1b83e0009d8d 100644
--- a/net/netlabel/netlabel_kapi.c
+++ b/net/netlabel/netlabel_kapi.c
@@ -30,6 +30,7 @@
 
 #include <linux/init.h>
 #include <linux/types.h>
+#include <linux/slab.h>
 #include <linux/audit.h>
 #include <linux/in.h>
 #include <linux/in6.h>
diff --git a/net/netlabel/netlabel_mgmt.c b/net/netlabel/netlabel_mgmt.c
index 8203623e65ad..998e85e895d0 100644
--- a/net/netlabel/netlabel_mgmt.c
+++ b/net/netlabel/netlabel_mgmt.c
@@ -34,6 +34,7 @@
 #include <linux/skbuff.h>
 #include <linux/in.h>
 #include <linux/in6.h>
+#include <linux/slab.h>
 #include <net/sock.h>
 #include <net/netlink.h>
 #include <net/genetlink.h>
diff --git a/net/netlabel/netlabel_unlabeled.c b/net/netlabel/netlabel_unlabeled.c
index 852d9d7976b9..d7ea2cf390b7 100644
--- a/net/netlabel/netlabel_unlabeled.c
+++ b/net/netlabel/netlabel_unlabeled.c
@@ -43,6 +43,7 @@
 #include <linux/notifier.h>
 #include <linux/netdevice.h>
 #include <linux/security.h>
+#include <linux/slab.h>
 #include <net/sock.h>
 #include <net/netlink.h>
 #include <net/genetlink.h>
diff --git a/net/netlabel/netlabel_user.c b/net/netlabel/netlabel_user.c
index 68706b4e3bf8..a3fd75ac3fa5 100644
--- a/net/netlabel/netlabel_user.c
+++ b/net/netlabel/netlabel_user.c
@@ -35,6 +35,7 @@
 #include <linux/audit.h>
 #include <linux/tty.h>
 #include <linux/security.h>
+#include <linux/gfp.h>
 #include <net/sock.h>
 #include <net/netlink.h>
 #include <net/genetlink.h>
diff --git a/net/netlink/genetlink.c b/net/netlink/genetlink.c
index a4b6e148c5de..06438fa2b1e5 100644
--- a/net/netlink/genetlink.c
+++ b/net/netlink/genetlink.c
@@ -8,6 +8,7 @@
 
 #include <linux/module.h>
 #include <linux/kernel.h>
+#include <linux/slab.h>
 #include <linux/errno.h>
 #include <linux/types.h>
 #include <linux/socket.h>
diff --git a/net/netrom/af_netrom.c b/net/netrom/af_netrom.c
index a249127020a5..fa07f044b599 100644
--- a/net/netrom/af_netrom.c
+++ b/net/netrom/af_netrom.c
@@ -15,6 +15,7 @@
 #include <linux/types.h>
 #include <linux/socket.h>
 #include <linux/in.h>
+#include <linux/slab.h>
 #include <linux/kernel.h>
 #include <linux/sched.h>
 #include <linux/timer.h>
diff --git a/net/netrom/nr_dev.c b/net/netrom/nr_dev.c
index 7aa11b01b2e2..64e6dde9749d 100644
--- a/net/netrom/nr_dev.c
+++ b/net/netrom/nr_dev.c
@@ -19,6 +19,7 @@
 #include <linux/fcntl.h>
 #include <linux/in.h>
 #include <linux/if_ether.h>	/* For the statistics structure. */
+#include <linux/slab.h>
 
 #include <asm/system.h>
 #include <asm/uaccess.h>
diff --git a/net/netrom/nr_in.c b/net/netrom/nr_in.c
index 68176483617f..6d4ef6d65b3d 100644
--- a/net/netrom/nr_in.c
+++ b/net/netrom/nr_in.c
@@ -16,6 +16,7 @@
 #include <linux/string.h>
 #include <linux/sockios.h>
 #include <linux/net.h>
+#include <linux/slab.h>
 #include <net/ax25.h>
 #include <linux/inet.h>
 #include <linux/netdevice.h>
diff --git a/net/netrom/nr_loopback.c b/net/netrom/nr_loopback.c
index f324d5df4186..94d4e922af53 100644
--- a/net/netrom/nr_loopback.c
+++ b/net/netrom/nr_loopback.c
@@ -7,6 +7,7 @@
  * Copyright Tomi Manninen OH2BNS (oh2bns@sral.fi)
  */
 #include <linux/types.h>
+#include <linux/slab.h>
 #include <linux/socket.h>
 #include <linux/timer.h>
 #include <net/ax25.h>
diff --git a/net/netrom/nr_out.c b/net/netrom/nr_out.c
index e3e6c44e1890..607fddb4fdbb 100644
--- a/net/netrom/nr_out.c
+++ b/net/netrom/nr_out.c
@@ -16,6 +16,7 @@
 #include <linux/string.h>
 #include <linux/sockios.h>
 #include <linux/net.h>
+#include <linux/slab.h>
 #include <net/ax25.h>
 #include <linux/inet.h>
 #include <linux/netdevice.h>
diff --git a/net/netrom/nr_route.c b/net/netrom/nr_route.c
index 5cc648012f50..44059d0c8dd1 100644
--- a/net/netrom/nr_route.c
+++ b/net/netrom/nr_route.c
@@ -17,6 +17,7 @@
 #include <linux/string.h>
 #include <linux/sockios.h>
 #include <linux/net.h>
+#include <linux/slab.h>
 #include <net/ax25.h>
 #include <linux/inet.h>
 #include <linux/netdevice.h>
diff --git a/net/netrom/nr_subr.c b/net/netrom/nr_subr.c
index 04e7d0d2fd8f..6a947ae50dbd 100644
--- a/net/netrom/nr_subr.c
+++ b/net/netrom/nr_subr.c
@@ -15,6 +15,7 @@
 #include <linux/string.h>
 #include <linux/sockios.h>
 #include <linux/net.h>
+#include <linux/slab.h>
 #include <net/ax25.h>
 #include <linux/inet.h>
 #include <linux/netdevice.h>
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index 1612d417d10c..cc90363d7e7a 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -60,6 +60,7 @@
 #include <linux/wireless.h>
 #include <linux/kernel.h>
 #include <linux/kmod.h>
+#include <linux/slab.h>
 #include <net/net_namespace.h>
 #include <net/ip.h>
 #include <net/protocol.h>
diff --git a/net/phonet/af_phonet.c b/net/phonet/af_phonet.c
index 526d0273991a..73aee7f2fcdc 100644
--- a/net/phonet/af_phonet.c
+++ b/net/phonet/af_phonet.c
@@ -25,6 +25,7 @@
 
 #include <linux/kernel.h>
 #include <linux/module.h>
+#include <linux/slab.h>
 #include <asm/unaligned.h>
 #include <net/sock.h>
 
diff --git a/net/phonet/datagram.c b/net/phonet/datagram.c
index 387197b579b1..1bd38db4fe1e 100644
--- a/net/phonet/datagram.c
+++ b/net/phonet/datagram.c
@@ -24,6 +24,7 @@
  */
 
 #include <linux/kernel.h>
+#include <linux/slab.h>
 #include <linux/socket.h>
 #include <asm/ioctls.h>
 #include <net/sock.h>
diff --git a/net/phonet/pep.c b/net/phonet/pep.c
index 360cf377693e..e2a95762abd3 100644
--- a/net/phonet/pep.c
+++ b/net/phonet/pep.c
@@ -23,6 +23,7 @@
  */
 
 #include <linux/kernel.h>
+#include <linux/slab.h>
 #include <linux/socket.h>
 #include <net/sock.h>
 #include <net/tcp_states.h>
diff --git a/net/phonet/pn_dev.c b/net/phonet/pn_dev.c
index 5c6ae0c701c0..9b4ced6e0968 100644
--- a/net/phonet/pn_dev.c
+++ b/net/phonet/pn_dev.c
@@ -25,6 +25,7 @@
 
 #include <linux/kernel.h>
 #include <linux/net.h>
+#include <linux/slab.h>
 #include <linux/netdevice.h>
 #include <linux/phonet.h>
 #include <linux/proc_fs.h>
diff --git a/net/phonet/pn_netlink.c b/net/phonet/pn_netlink.c
index fe2e7088ee07..58b3b1f991ed 100644
--- a/net/phonet/pn_netlink.c
+++ b/net/phonet/pn_netlink.c
@@ -26,6 +26,7 @@
 #include <linux/kernel.h>
 #include <linux/netlink.h>
 #include <linux/phonet.h>
+#include <linux/slab.h>
 #include <net/sock.h>
 #include <net/phonet/pn_dev.h>
 
diff --git a/net/phonet/socket.c b/net/phonet/socket.c
index 69c8b826a0ce..c785bfd0744f 100644
--- a/net/phonet/socket.c
+++ b/net/phonet/socket.c
@@ -23,6 +23,7 @@
  * 02110-1301 USA
  */
 
+#include <linux/gfp.h>
 #include <linux/kernel.h>
 #include <linux/net.h>
 #include <linux/poll.h>
diff --git a/net/rds/af_rds.c b/net/rds/af_rds.c
index 853c52be781f..f81862baf4d0 100644
--- a/net/rds/af_rds.c
+++ b/net/rds/af_rds.c
@@ -33,6 +33,7 @@
 #include <linux/module.h>
 #include <linux/errno.h>
 #include <linux/kernel.h>
+#include <linux/gfp.h>
 #include <linux/in.h>
 #include <linux/poll.h>
 #include <net/sock.h>
diff --git a/net/rds/cong.c b/net/rds/cong.c
index 6d06cac2649c..f1da27ceb064 100644
--- a/net/rds/cong.c
+++ b/net/rds/cong.c
@@ -30,6 +30,7 @@
  * SOFTWARE.
  *
  */
+#include <linux/slab.h>
 #include <linux/types.h>
 #include <linux/rbtree.h>
 
diff --git a/net/rds/connection.c b/net/rds/connection.c
index 278f607ab603..7619b671ca28 100644
--- a/net/rds/connection.c
+++ b/net/rds/connection.c
@@ -32,6 +32,7 @@
  */
 #include <linux/kernel.h>
 #include <linux/list.h>
+#include <linux/slab.h>
 #include <net/inet_hashtables.h>
 
 #include "rds.h"
diff --git a/net/rds/ib.c b/net/rds/ib.c
index 3b8992361042..8f2d6dd7700a 100644
--- a/net/rds/ib.c
+++ b/net/rds/ib.c
@@ -37,6 +37,7 @@
 #include <linux/inetdevice.h>
 #include <linux/if_arp.h>
 #include <linux/delay.h>
+#include <linux/slab.h>
 
 #include "rds.h"
 #include "ib.h"
diff --git a/net/rds/ib_cm.c b/net/rds/ib_cm.c
index 647cb8ffc39b..88d0856cb797 100644
--- a/net/rds/ib_cm.c
+++ b/net/rds/ib_cm.c
@@ -32,6 +32,7 @@
  */
 #include <linux/kernel.h>
 #include <linux/in.h>
+#include <linux/slab.h>
 #include <linux/vmalloc.h>
 
 #include "rds.h"
diff --git a/net/rds/ib_rdma.c b/net/rds/ib_rdma.c
index 4b0da865a72c..059989fdb7d7 100644
--- a/net/rds/ib_rdma.c
+++ b/net/rds/ib_rdma.c
@@ -31,6 +31,7 @@
  *
  */
 #include <linux/kernel.h>
+#include <linux/slab.h>
 
 #include "rds.h"
 #include "rdma.h"
diff --git a/net/rds/ib_recv.c b/net/rds/ib_recv.c
index 04dc0d3f3c95..c7dd11b835f0 100644
--- a/net/rds/ib_recv.c
+++ b/net/rds/ib_recv.c
@@ -31,6 +31,7 @@
  *
  */
 #include <linux/kernel.h>
+#include <linux/slab.h>
 #include <linux/pci.h>
 #include <linux/dma-mapping.h>
 #include <rdma/rdma_cm.h>
diff --git a/net/rds/info.c b/net/rds/info.c
index 814a91a6f4a7..c45c4173a44d 100644
--- a/net/rds/info.c
+++ b/net/rds/info.c
@@ -32,6 +32,7 @@
  */
 #include <linux/percpu.h>
 #include <linux/seq_file.h>
+#include <linux/slab.h>
 #include <linux/proc_fs.h>
 
 #include "rds.h"
diff --git a/net/rds/iw.c b/net/rds/iw.c
index b28fa8525b24..c8f3d3525cb9 100644
--- a/net/rds/iw.c
+++ b/net/rds/iw.c
@@ -37,6 +37,7 @@
 #include <linux/inetdevice.h>
 #include <linux/if_arp.h>
 #include <linux/delay.h>
+#include <linux/slab.h>
 
 #include "rds.h"
 #include "iw.h"
diff --git a/net/rds/iw_cm.c b/net/rds/iw_cm.c
index 394cf6b4d0aa..3e9460f935d8 100644
--- a/net/rds/iw_cm.c
+++ b/net/rds/iw_cm.c
@@ -32,6 +32,7 @@
  */
 #include <linux/kernel.h>
 #include <linux/in.h>
+#include <linux/slab.h>
 #include <linux/vmalloc.h>
 
 #include "rds.h"
diff --git a/net/rds/iw_rdma.c b/net/rds/iw_rdma.c
index 9eda11cca956..13dc1862d862 100644
--- a/net/rds/iw_rdma.c
+++ b/net/rds/iw_rdma.c
@@ -31,6 +31,7 @@
  *
  */
 #include <linux/kernel.h>
+#include <linux/slab.h>
 
 #include "rds.h"
 #include "rdma.h"
diff --git a/net/rds/iw_recv.c b/net/rds/iw_recv.c
index 54af7d6b92da..da43ee840ca3 100644
--- a/net/rds/iw_recv.c
+++ b/net/rds/iw_recv.c
@@ -31,6 +31,7 @@
  *
  */
 #include <linux/kernel.h>
+#include <linux/slab.h>
 #include <linux/pci.h>
 #include <linux/dma-mapping.h>
 #include <rdma/rdma_cm.h>
diff --git a/net/rds/loop.c b/net/rds/loop.c
index 4a61997f554d..0d7a159158b8 100644
--- a/net/rds/loop.c
+++ b/net/rds/loop.c
@@ -31,6 +31,7 @@
  *
  */
 #include <linux/kernel.h>
+#include <linux/slab.h>
 #include <linux/in.h>
 
 #include "rds.h"
diff --git a/net/rds/message.c b/net/rds/message.c
index 73e600ffd87f..9a1d67e001ba 100644
--- a/net/rds/message.c
+++ b/net/rds/message.c
@@ -31,6 +31,7 @@
  *
  */
 #include <linux/kernel.h>
+#include <linux/slab.h>
 
 #include "rds.h"
 #include "rdma.h"
diff --git a/net/rds/page.c b/net/rds/page.c
index 36790122dfd4..595a952d4b17 100644
--- a/net/rds/page.c
+++ b/net/rds/page.c
@@ -31,6 +31,7 @@
  *
  */
 #include <linux/highmem.h>
+#include <linux/gfp.h>
 
 #include "rds.h"
 
diff --git a/net/rds/rdma.c b/net/rds/rdma.c
index 4c64daa1f5d5..5ce9437cad67 100644
--- a/net/rds/rdma.c
+++ b/net/rds/rdma.c
@@ -31,6 +31,7 @@
  *
  */
 #include <linux/pagemap.h>
+#include <linux/slab.h>
 #include <linux/rbtree.h>
 #include <linux/dma-mapping.h> /* for DMA_*_DEVICE */
 
diff --git a/net/rds/recv.c b/net/rds/recv.c
index b426d67f760c..e2a2b9344f7b 100644
--- a/net/rds/recv.c
+++ b/net/rds/recv.c
@@ -31,6 +31,7 @@
  *
  */
 #include <linux/kernel.h>
+#include <linux/slab.h>
 #include <net/sock.h>
 #include <linux/in.h>
 
diff --git a/net/rds/send.c b/net/rds/send.c
index b2fccfc20769..f04b929ded92 100644
--- a/net/rds/send.c
+++ b/net/rds/send.c
@@ -31,6 +31,7 @@
  *
  */
 #include <linux/kernel.h>
+#include <linux/gfp.h>
 #include <net/sock.h>
 #include <linux/in.h>
 #include <linux/list.h>
diff --git a/net/rds/tcp.c b/net/rds/tcp.c
index b5198aee45d3..babf4577ff7d 100644
--- a/net/rds/tcp.c
+++ b/net/rds/tcp.c
@@ -31,6 +31,7 @@
  *
  */
 #include <linux/kernel.h>
+#include <linux/slab.h>
 #include <linux/in.h>
 #include <net/tcp.h>
 
diff --git a/net/rds/tcp_listen.c b/net/rds/tcp_listen.c
index 53cb1b54165d..975183fe6950 100644
--- a/net/rds/tcp_listen.c
+++ b/net/rds/tcp_listen.c
@@ -31,6 +31,7 @@
  *
  */
 #include <linux/kernel.h>
+#include <linux/gfp.h>
 #include <linux/in.h>
 #include <net/tcp.h>
 
diff --git a/net/rds/tcp_recv.c b/net/rds/tcp_recv.c
index c00dafffbb5a..e08ec912d8b0 100644
--- a/net/rds/tcp_recv.c
+++ b/net/rds/tcp_recv.c
@@ -31,6 +31,7 @@
  *
  */
 #include <linux/kernel.h>
+#include <linux/slab.h>
 #include <net/tcp.h>
 
 #include "rds.h"
diff --git a/net/rfkill/core.c b/net/rfkill/core.c
index c218e07e5caf..a9fa86f65983 100644
--- a/net/rfkill/core.c
+++ b/net/rfkill/core.c
@@ -33,6 +33,7 @@
 #include <linux/wait.h>
 #include <linux/poll.h>
 #include <linux/fs.h>
+#include <linux/slab.h>
 
 #include "rfkill.h"
 
diff --git a/net/rose/af_rose.c b/net/rose/af_rose.c
index e90b9b6c16ae..4fb711a035f4 100644
--- a/net/rose/af_rose.c
+++ b/net/rose/af_rose.c
@@ -18,6 +18,7 @@
 #include <linux/types.h>
 #include <linux/socket.h>
 #include <linux/in.h>
+#include <linux/slab.h>
 #include <linux/kernel.h>
 #include <linux/sched.h>
 #include <linux/spinlock.h>
diff --git a/net/rose/rose_dev.c b/net/rose/rose_dev.c
index 424b893d1450..178ff4f73c85 100644
--- a/net/rose/rose_dev.c
+++ b/net/rose/rose_dev.c
@@ -19,6 +19,7 @@
 #include <linux/fcntl.h>
 #include <linux/in.h>
 #include <linux/if_ether.h>
+#include <linux/slab.h>
 
 #include <asm/system.h>
 #include <asm/io.h>
diff --git a/net/rose/rose_link.c b/net/rose/rose_link.c
index 5ef5f6988a2e..a750a28e0221 100644
--- a/net/rose/rose_link.c
+++ b/net/rose/rose_link.c
@@ -16,6 +16,7 @@
 #include <linux/string.h>
 #include <linux/sockios.h>
 #include <linux/net.h>
+#include <linux/slab.h>
 #include <net/ax25.h>
 #include <linux/inet.h>
 #include <linux/netdevice.h>
diff --git a/net/rose/rose_loopback.c b/net/rose/rose_loopback.c
index 968e8bac1b5d..ae4a9d99aec7 100644
--- a/net/rose/rose_loopback.c
+++ b/net/rose/rose_loopback.c
@@ -7,6 +7,7 @@
  * Copyright (C) Jonathan Naylor G4KLX (g4klx@g4klx.demon.co.uk)
  */
 #include <linux/types.h>
+#include <linux/slab.h>
 #include <linux/socket.h>
 #include <linux/timer.h>
 #include <net/ax25.h>
diff --git a/net/rose/rose_out.c b/net/rose/rose_out.c
index 69820f93414b..4ebf33afbe47 100644
--- a/net/rose/rose_out.c
+++ b/net/rose/rose_out.c
@@ -15,6 +15,7 @@
 #include <linux/string.h>
 #include <linux/sockios.h>
 #include <linux/net.h>
+#include <linux/gfp.h>
 #include <net/ax25.h>
 #include <linux/inet.h>
 #include <linux/netdevice.h>
diff --git a/net/rose/rose_route.c b/net/rose/rose_route.c
index 70a0b3b4b4d2..cbc244a128bd 100644
--- a/net/rose/rose_route.c
+++ b/net/rose/rose_route.c
@@ -16,6 +16,7 @@
 #include <linux/string.h>
 #include <linux/sockios.h>
 #include <linux/net.h>
+#include <linux/slab.h>
 #include <net/ax25.h>
 #include <linux/inet.h>
 #include <linux/netdevice.h>
diff --git a/net/rose/rose_subr.c b/net/rose/rose_subr.c
index b05108f382da..1734abba26a2 100644
--- a/net/rose/rose_subr.c
+++ b/net/rose/rose_subr.c
@@ -15,6 +15,7 @@
 #include <linux/string.h>
 #include <linux/sockios.h>
 #include <linux/net.h>
+#include <linux/slab.h>
 #include <net/ax25.h>
 #include <linux/inet.h>
 #include <linux/netdevice.h>
diff --git a/net/rxrpc/af_rxrpc.c b/net/rxrpc/af_rxrpc.c
index 287b1415cee9..c060095b27ce 100644
--- a/net/rxrpc/af_rxrpc.c
+++ b/net/rxrpc/af_rxrpc.c
@@ -11,6 +11,7 @@
 
 #include <linux/module.h>
 #include <linux/net.h>
+#include <linux/slab.h>
 #include <linux/skbuff.h>
 #include <linux/poll.h>
 #include <linux/proc_fs.h>
diff --git a/net/rxrpc/ar-accept.c b/net/rxrpc/ar-accept.c
index 2d744f22a9a1..6d79310fcaae 100644
--- a/net/rxrpc/ar-accept.c
+++ b/net/rxrpc/ar-accept.c
@@ -17,6 +17,7 @@
 #include <linux/in.h>
 #include <linux/in6.h>
 #include <linux/icmp.h>
+#include <linux/gfp.h>
 #include <net/sock.h>
 #include <net/af_rxrpc.h>
 #include <net/ip.h>
diff --git a/net/rxrpc/ar-ack.c b/net/rxrpc/ar-ack.c
index b4a220977031..2714da167fb8 100644
--- a/net/rxrpc/ar-ack.c
+++ b/net/rxrpc/ar-ack.c
@@ -13,6 +13,7 @@
 #include <linux/circ_buf.h>
 #include <linux/net.h>
 #include <linux/skbuff.h>
+#include <linux/slab.h>
 #include <linux/udp.h>
 #include <net/sock.h>
 #include <net/af_rxrpc.h>
diff --git a/net/rxrpc/ar-call.c b/net/rxrpc/ar-call.c
index bc0019f704fe..909d092de9f4 100644
--- a/net/rxrpc/ar-call.c
+++ b/net/rxrpc/ar-call.c
@@ -9,6 +9,7 @@
  * 2 of the License, or (at your option) any later version.
  */
 
+#include <linux/slab.h>
 #include <linux/module.h>
 #include <linux/circ_buf.h>
 #include <net/sock.h>
diff --git a/net/rxrpc/ar-connection.c b/net/rxrpc/ar-connection.c
index 9f1ce841a0bb..4106ca95ec86 100644
--- a/net/rxrpc/ar-connection.c
+++ b/net/rxrpc/ar-connection.c
@@ -10,6 +10,7 @@
  */
 
 #include <linux/module.h>
+#include <linux/slab.h>
 #include <linux/net.h>
 #include <linux/skbuff.h>
 #include <linux/crypto.h>
diff --git a/net/rxrpc/ar-input.c b/net/rxrpc/ar-input.c
index f98c8027e5c1..89315009bab1 100644
--- a/net/rxrpc/ar-input.c
+++ b/net/rxrpc/ar-input.c
@@ -17,6 +17,7 @@
 #include <linux/in.h>
 #include <linux/in6.h>
 #include <linux/icmp.h>
+#include <linux/gfp.h>
 #include <net/sock.h>
 #include <net/af_rxrpc.h>
 #include <net/ip.h>
diff --git a/net/rxrpc/ar-key.c b/net/rxrpc/ar-key.c
index 74697b200496..5ee16f0353fe 100644
--- a/net/rxrpc/ar-key.c
+++ b/net/rxrpc/ar-key.c
@@ -18,6 +18,7 @@
 #include <linux/key-type.h>
 #include <linux/crypto.h>
 #include <linux/ctype.h>
+#include <linux/slab.h>
 #include <net/sock.h>
 #include <net/af_rxrpc.h>
 #include <keys/rxrpc-type.h>
diff --git a/net/rxrpc/ar-local.c b/net/rxrpc/ar-local.c
index 807535ff29b5..87f7135d238b 100644
--- a/net/rxrpc/ar-local.c
+++ b/net/rxrpc/ar-local.c
@@ -12,6 +12,7 @@
 #include <linux/module.h>
 #include <linux/net.h>
 #include <linux/skbuff.h>
+#include <linux/slab.h>
 #include <net/sock.h>
 #include <net/af_rxrpc.h>
 #include "ar-internal.h"
diff --git a/net/rxrpc/ar-output.c b/net/rxrpc/ar-output.c
index cc9102c5b588..5f22e263eda7 100644
--- a/net/rxrpc/ar-output.c
+++ b/net/rxrpc/ar-output.c
@@ -10,6 +10,7 @@
  */
 
 #include <linux/net.h>
+#include <linux/gfp.h>
 #include <linux/skbuff.h>
 #include <linux/circ_buf.h>
 #include <net/sock.h>
diff --git a/net/rxrpc/ar-peer.c b/net/rxrpc/ar-peer.c
index edc026c1eb76..f0f85b0123f7 100644
--- a/net/rxrpc/ar-peer.c
+++ b/net/rxrpc/ar-peer.c
@@ -16,6 +16,7 @@
 #include <linux/in.h>
 #include <linux/in6.h>
 #include <linux/icmp.h>
+#include <linux/slab.h>
 #include <net/sock.h>
 #include <net/af_rxrpc.h>
 #include <net/ip.h>
diff --git a/net/rxrpc/ar-transport.c b/net/rxrpc/ar-transport.c
index 0936e1acc30e..5e0226fe587e 100644
--- a/net/rxrpc/ar-transport.c
+++ b/net/rxrpc/ar-transport.c
@@ -12,6 +12,7 @@
 #include <linux/module.h>
 #include <linux/net.h>
 #include <linux/skbuff.h>
+#include <linux/slab.h>
 #include <net/sock.h>
 #include <net/af_rxrpc.h>
 #include "ar-internal.h"
diff --git a/net/rxrpc/rxkad.c b/net/rxrpc/rxkad.c
index 713ac593e2e9..7635107726ce 100644
--- a/net/rxrpc/rxkad.c
+++ b/net/rxrpc/rxkad.c
@@ -16,6 +16,7 @@
 #include <linux/crypto.h>
 #include <linux/scatterlist.h>
 #include <linux/ctype.h>
+#include <linux/slab.h>
 #include <net/sock.h>
 #include <net/af_rxrpc.h>
 #include <keys/rxrpc-type.h>
diff --git a/net/sched/act_api.c b/net/sched/act_api.c
index 64f5e328cee9..d8e0171d9a4b 100644
--- a/net/sched/act_api.c
+++ b/net/sched/act_api.c
@@ -15,6 +15,7 @@
 #include <linux/kernel.h>
 #include <linux/string.h>
 #include <linux/errno.h>
+#include <linux/slab.h>
 #include <linux/skbuff.h>
 #include <linux/init.h>
 #include <linux/kmod.h>
diff --git a/net/sched/act_ipt.c b/net/sched/act_ipt.c
index 082c520b0def..da27a170b6b7 100644
--- a/net/sched/act_ipt.c
+++ b/net/sched/act_ipt.c
@@ -19,6 +19,7 @@
 #include <linux/rtnetlink.h>
 #include <linux/module.h>
 #include <linux/init.h>
+#include <linux/slab.h>
 #include <net/netlink.h>
 #include <net/pkt_sched.h>
 #include <linux/tc_act/tc_ipt.h>
diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c
index d329170243cb..c046682054eb 100644
--- a/net/sched/act_mirred.c
+++ b/net/sched/act_mirred.c
@@ -20,6 +20,7 @@
 #include <linux/rtnetlink.h>
 #include <linux/module.h>
 #include <linux/init.h>
+#include <linux/gfp.h>
 #include <net/net_namespace.h>
 #include <net/netlink.h>
 #include <net/pkt_sched.h>
diff --git a/net/sched/act_pedit.c b/net/sched/act_pedit.c
index 6b0359a500e6..b7dcfedc802e 100644
--- a/net/sched/act_pedit.c
+++ b/net/sched/act_pedit.c
@@ -17,6 +17,7 @@
 #include <linux/rtnetlink.h>
 #include <linux/module.h>
 #include <linux/init.h>
+#include <linux/slab.h>
 #include <net/netlink.h>
 #include <net/pkt_sched.h>
 #include <linux/tc_act/tc_pedit.h>
diff --git a/net/sched/act_police.c b/net/sched/act_police.c
index 723964c3ee4f..654f73dff7c1 100644
--- a/net/sched/act_police.c
+++ b/net/sched/act_police.c
@@ -18,6 +18,7 @@
 #include <linux/skbuff.h>
 #include <linux/rtnetlink.h>
 #include <linux/init.h>
+#include <linux/slab.h>
 #include <net/act_api.h>
 #include <net/netlink.h>
 
diff --git a/net/sched/act_simple.c b/net/sched/act_simple.c
index 8daa1ebc7413..622ca809c15c 100644
--- a/net/sched/act_simple.c
+++ b/net/sched/act_simple.c
@@ -11,6 +11,7 @@
  */
 
 #include <linux/module.h>
+#include <linux/slab.h>
 #include <linux/init.h>
 #include <linux/kernel.h>
 #include <linux/skbuff.h>
diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c
index 3725d8fa29db..f082b27ff46d 100644
--- a/net/sched/cls_api.c
+++ b/net/sched/cls_api.c
@@ -24,6 +24,7 @@
 #include <linux/kmod.h>
 #include <linux/netlink.h>
 #include <linux/err.h>
+#include <linux/slab.h>
 #include <net/net_namespace.h>
 #include <net/sock.h>
 #include <net/netlink.h>
diff --git a/net/sched/cls_basic.c b/net/sched/cls_basic.c
index 4e2bda854119..efd4f95fd050 100644
--- a/net/sched/cls_basic.c
+++ b/net/sched/cls_basic.c
@@ -10,6 +10,7 @@
  */
 
 #include <linux/module.h>
+#include <linux/slab.h>
 #include <linux/types.h>
 #include <linux/kernel.h>
 #include <linux/string.h>
diff --git a/net/sched/cls_cgroup.c b/net/sched/cls_cgroup.c
index 7f27d2c15e08..221180384fd7 100644
--- a/net/sched/cls_cgroup.c
+++ b/net/sched/cls_cgroup.c
@@ -10,6 +10,7 @@
  */
 
 #include <linux/module.h>
+#include <linux/slab.h>
 #include <linux/types.h>
 #include <linux/string.h>
 #include <linux/errno.h>
diff --git a/net/sched/cls_flow.c b/net/sched/cls_flow.c
index e054c62857e1..6ed61b10e002 100644
--- a/net/sched/cls_flow.c
+++ b/net/sched/cls_flow.c
@@ -20,6 +20,7 @@
 #include <linux/ip.h>
 #include <linux/ipv6.h>
 #include <linux/if_vlan.h>
+#include <linux/slab.h>
 
 #include <net/pkt_cls.h>
 #include <net/ip.h>
diff --git a/net/sched/cls_fw.c b/net/sched/cls_fw.c
index 6d6e87585fb1..93b0a7b6f9b4 100644
--- a/net/sched/cls_fw.c
+++ b/net/sched/cls_fw.c
@@ -19,6 +19,7 @@
  */
 
 #include <linux/module.h>
+#include <linux/slab.h>
 #include <linux/types.h>
 #include <linux/kernel.h>
 #include <linux/string.h>
diff --git a/net/sched/cls_route.c b/net/sched/cls_route.c
index dd872d5383ef..694dcd85dec8 100644
--- a/net/sched/cls_route.c
+++ b/net/sched/cls_route.c
@@ -10,6 +10,7 @@
  */
 
 #include <linux/module.h>
+#include <linux/slab.h>
 #include <linux/types.h>
 #include <linux/kernel.h>
 #include <linux/string.h>
diff --git a/net/sched/cls_tcindex.c b/net/sched/cls_tcindex.c
index e806f2314b5e..20ef330bb918 100644
--- a/net/sched/cls_tcindex.c
+++ b/net/sched/cls_tcindex.c
@@ -9,6 +9,7 @@
 #include <linux/kernel.h>
 #include <linux/skbuff.h>
 #include <linux/errno.h>
+#include <linux/slab.h>
 #include <net/act_api.h>
 #include <net/netlink.h>
 #include <net/pkt_cls.h>
diff --git a/net/sched/cls_u32.c b/net/sched/cls_u32.c
index 07372f60bee3..17c5dfc67320 100644
--- a/net/sched/cls_u32.c
+++ b/net/sched/cls_u32.c
@@ -31,6 +31,7 @@
  */
 
 #include <linux/module.h>
+#include <linux/slab.h>
 #include <linux/types.h>
 #include <linux/kernel.h>
 #include <linux/string.h>
diff --git a/net/sched/em_meta.c b/net/sched/em_meta.c
index 24dce8b648a4..3bcac8aa333c 100644
--- a/net/sched/em_meta.c
+++ b/net/sched/em_meta.c
@@ -58,6 +58,7 @@
  * 	      only available if that subsystem is enabled in the kernel.
  */
 
+#include <linux/slab.h>
 #include <linux/module.h>
 #include <linux/types.h>
 #include <linux/kernel.h>
diff --git a/net/sched/em_nbyte.c b/net/sched/em_nbyte.c
index 370a1b2ea317..1a4176aee6e5 100644
--- a/net/sched/em_nbyte.c
+++ b/net/sched/em_nbyte.c
@@ -9,6 +9,7 @@
  * Authors:	Thomas Graf <tgraf@suug.ch>
  */
 
+#include <linux/gfp.h>
 #include <linux/module.h>
 #include <linux/types.h>
 #include <linux/kernel.h>
diff --git a/net/sched/em_text.c b/net/sched/em_text.c
index 853c5ead87fd..763253257411 100644
--- a/net/sched/em_text.c
+++ b/net/sched/em_text.c
@@ -9,6 +9,7 @@
  * Authors:	Thomas Graf <tgraf@suug.ch>
  */
 
+#include <linux/slab.h>
 #include <linux/module.h>
 #include <linux/types.h>
 #include <linux/kernel.h>
diff --git a/net/sched/ematch.c b/net/sched/ematch.c
index aab59409728b..e782bdeedc58 100644
--- a/net/sched/ematch.c
+++ b/net/sched/ematch.c
@@ -82,6 +82,7 @@
  */
 
 #include <linux/module.h>
+#include <linux/slab.h>
 #include <linux/types.h>
 #include <linux/kernel.h>
 #include <linux/errno.h>
diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c
index 6cd491013b50..145268ca57cf 100644
--- a/net/sched/sch_api.c
+++ b/net/sched/sch_api.c
@@ -28,6 +28,7 @@
 #include <linux/list.h>
 #include <linux/hrtimer.h>
 #include <linux/lockdep.h>
+#include <linux/slab.h>
 
 #include <net/net_namespace.h>
 #include <net/sock.h>
diff --git a/net/sched/sch_atm.c b/net/sched/sch_atm.c
index ab82f145f689..fcbb86a486a2 100644
--- a/net/sched/sch_atm.c
+++ b/net/sched/sch_atm.c
@@ -3,6 +3,7 @@
 /* Written 1998-2000 by Werner Almesberger, EPFL ICA */
 
 #include <linux/module.h>
+#include <linux/slab.h>
 #include <linux/init.h>
 #include <linux/string.h>
 #include <linux/errno.h>
diff --git a/net/sched/sch_cbq.c b/net/sched/sch_cbq.c
index 3846d65bc03e..28c01ef5abc8 100644
--- a/net/sched/sch_cbq.c
+++ b/net/sched/sch_cbq.c
@@ -11,6 +11,7 @@
  */
 
 #include <linux/module.h>
+#include <linux/slab.h>
 #include <linux/types.h>
 #include <linux/kernel.h>
 #include <linux/string.h>
diff --git a/net/sched/sch_drr.c b/net/sched/sch_drr.c
index a65604f8f2b8..b74046a95397 100644
--- a/net/sched/sch_drr.c
+++ b/net/sched/sch_drr.c
@@ -9,6 +9,7 @@
  */
 
 #include <linux/module.h>
+#include <linux/slab.h>
 #include <linux/init.h>
 #include <linux/errno.h>
 #include <linux/netdevice.h>
diff --git a/net/sched/sch_dsmark.c b/net/sched/sch_dsmark.c
index d303daa45d49..63d41f86679c 100644
--- a/net/sched/sch_dsmark.c
+++ b/net/sched/sch_dsmark.c
@@ -5,6 +5,7 @@
 
 #include <linux/module.h>
 #include <linux/init.h>
+#include <linux/slab.h>
 #include <linux/types.h>
 #include <linux/string.h>
 #include <linux/errno.h>
diff --git a/net/sched/sch_fifo.c b/net/sched/sch_fifo.c
index 4b0a6cc44c77..5948bafa8ce2 100644
--- a/net/sched/sch_fifo.c
+++ b/net/sched/sch_fifo.c
@@ -10,6 +10,7 @@
  */
 
 #include <linux/module.h>
+#include <linux/slab.h>
 #include <linux/types.h>
 #include <linux/kernel.h>
 #include <linux/errno.h>
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index 5173c1e1b19c..ff4dd53eeff0 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -24,6 +24,7 @@
 #include <linux/init.h>
 #include <linux/rcupdate.h>
 #include <linux/list.h>
+#include <linux/slab.h>
 #include <net/pkt_sched.h>
 
 /* Main transmission queue. */
diff --git a/net/sched/sch_gred.c b/net/sched/sch_gred.c
index 40408d595c08..51dcc2aa5c92 100644
--- a/net/sched/sch_gred.c
+++ b/net/sched/sch_gred.c
@@ -18,6 +18,7 @@
  *  For all the glorious comments look at include/net/red.h
  */
 
+#include <linux/slab.h>
 #include <linux/module.h>
 #include <linux/types.h>
 #include <linux/kernel.h>
diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c
index 508cf5f3a6d5..0b52b8de562c 100644
--- a/net/sched/sch_htb.c
+++ b/net/sched/sch_htb.c
@@ -36,6 +36,7 @@
 #include <linux/compiler.h>
 #include <linux/rbtree.h>
 #include <linux/workqueue.h>
+#include <linux/slab.h>
 #include <net/netlink.h>
 #include <net/pkt_sched.h>
 
diff --git a/net/sched/sch_mq.c b/net/sched/sch_mq.c
index d1dea3d5dc92..b2aba3f5e6fa 100644
--- a/net/sched/sch_mq.c
+++ b/net/sched/sch_mq.c
@@ -9,6 +9,7 @@
  */
 
 #include <linux/types.h>
+#include <linux/slab.h>
 #include <linux/kernel.h>
 #include <linux/string.h>
 #include <linux/errno.h>
diff --git a/net/sched/sch_multiq.c b/net/sched/sch_multiq.c
index 7db2c88ce585..c50876cd8704 100644
--- a/net/sched/sch_multiq.c
+++ b/net/sched/sch_multiq.c
@@ -18,6 +18,7 @@
  */
 
 #include <linux/module.h>
+#include <linux/slab.h>
 #include <linux/types.h>
 #include <linux/kernel.h>
 #include <linux/string.h>
diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c
index d8b10e054627..4714ff162bbd 100644
--- a/net/sched/sch_netem.c
+++ b/net/sched/sch_netem.c
@@ -14,6 +14,7 @@
  */
 
 #include <linux/module.h>
+#include <linux/slab.h>
 #include <linux/types.h>
 #include <linux/kernel.h>
 #include <linux/errno.h>
diff --git a/net/sched/sch_prio.c b/net/sched/sch_prio.c
index 93285cecb246..81672e0c1b25 100644
--- a/net/sched/sch_prio.c
+++ b/net/sched/sch_prio.c
@@ -12,6 +12,7 @@
  */
 
 #include <linux/module.h>
+#include <linux/slab.h>
 #include <linux/types.h>
 #include <linux/kernel.h>
 #include <linux/string.h>
diff --git a/net/sched/sch_sfq.c b/net/sched/sch_sfq.c
index cb21380c0605..c5a9ac566007 100644
--- a/net/sched/sch_sfq.c
+++ b/net/sched/sch_sfq.c
@@ -20,6 +20,7 @@
 #include <linux/ipv6.h>
 #include <linux/skbuff.h>
 #include <linux/jhash.h>
+#include <linux/slab.h>
 #include <net/ip.h>
 #include <net/netlink.h>
 #include <net/pkt_sched.h>
diff --git a/net/sched/sch_teql.c b/net/sched/sch_teql.c
index db69637069c4..3415b6ce1c0a 100644
--- a/net/sched/sch_teql.c
+++ b/net/sched/sch_teql.c
@@ -11,6 +11,7 @@
 #include <linux/module.h>
 #include <linux/types.h>
 #include <linux/kernel.h>
+#include <linux/slab.h>
 #include <linux/string.h>
 #include <linux/errno.h>
 #include <linux/if_arp.h>
diff --git a/net/sctp/auth.c b/net/sctp/auth.c
index 56935bbc1496..86366390038a 100644
--- a/net/sctp/auth.c
+++ b/net/sctp/auth.c
@@ -34,6 +34,7 @@
  * be incorporated into the next SCTP release.
  */
 
+#include <linux/slab.h>
 #include <linux/types.h>
 #include <linux/crypto.h>
 #include <linux/scatterlist.h>
diff --git a/net/sctp/bind_addr.c b/net/sctp/bind_addr.c
index bef133731683..faf71d179e46 100644
--- a/net/sctp/bind_addr.c
+++ b/net/sctp/bind_addr.c
@@ -43,6 +43,7 @@
  */
 
 #include <linux/types.h>
+#include <linux/slab.h>
 #include <linux/in.h>
 #include <net/sock.h>
 #include <net/ipv6.h>
diff --git a/net/sctp/chunk.c b/net/sctp/chunk.c
index 8e4320040f05..3eab6db59a37 100644
--- a/net/sctp/chunk.c
+++ b/net/sctp/chunk.c
@@ -42,6 +42,7 @@
 #include <linux/net.h>
 #include <linux/inet.h>
 #include <linux/skbuff.h>
+#include <linux/slab.h>
 #include <net/sock.h>
 #include <net/sctp/sctp.h>
 #include <net/sctp/sm.h>
diff --git a/net/sctp/input.c b/net/sctp/input.c
index 3d74b264ea22..2a570184e5a9 100644
--- a/net/sctp/input.c
+++ b/net/sctp/input.c
@@ -53,6 +53,7 @@
 #include <linux/socket.h>
 #include <linux/ip.h>
 #include <linux/time.h> /* For struct timeval */
+#include <linux/slab.h>
 #include <net/ip.h>
 #include <net/icmp.h>
 #include <net/snmp.h>
diff --git a/net/sctp/inqueue.c b/net/sctp/inqueue.c
index bbf5dd2a97c4..ccb6dc48d15b 100644
--- a/net/sctp/inqueue.c
+++ b/net/sctp/inqueue.c
@@ -46,6 +46,7 @@
 #include <net/sctp/sctp.h>
 #include <net/sctp/sm.h>
 #include <linux/interrupt.h>
+#include <linux/slab.h>
 
 /* Initialize an SCTP inqueue.  */
 void sctp_inq_init(struct sctp_inq *queue)
diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c
index 1d7ac70ba39f..9fb5d37c37ad 100644
--- a/net/sctp/ipv6.c
+++ b/net/sctp/ipv6.c
@@ -58,6 +58,7 @@
 #include <linux/netdevice.h>
 #include <linux/init.h>
 #include <linux/ipsec.h>
+#include <linux/slab.h>
 
 #include <linux/ipv6.h>
 #include <linux/icmpv6.h>
diff --git a/net/sctp/output.c b/net/sctp/output.c
index 7c5589363433..fad261d41ec2 100644
--- a/net/sctp/output.c
+++ b/net/sctp/output.c
@@ -48,6 +48,7 @@
 #include <linux/ip.h>
 #include <linux/ipv6.h>
 #include <linux/init.h>
+#include <linux/slab.h>
 #include <net/inet_ecn.h>
 #include <net/ip.h>
 #include <net/icmp.h>
diff --git a/net/sctp/outqueue.c b/net/sctp/outqueue.c
index 229690f02a1d..abfc0b8dee74 100644
--- a/net/sctp/outqueue.c
+++ b/net/sctp/outqueue.c
@@ -50,6 +50,7 @@
 #include <linux/list.h>   /* For struct list_head */
 #include <linux/socket.h>
 #include <linux/ip.h>
+#include <linux/slab.h>
 #include <net/sock.h>	  /* For skb_set_owner_w */
 
 #include <net/sctp/sctp.h>
diff --git a/net/sctp/primitive.c b/net/sctp/primitive.c
index 8cb4f060bce6..534c7eae9d15 100644
--- a/net/sctp/primitive.c
+++ b/net/sctp/primitive.c
@@ -50,6 +50,7 @@
 #include <linux/socket.h>
 #include <linux/ip.h>
 #include <linux/time.h> /* For struct timeval */
+#include <linux/gfp.h>
 #include <net/sock.h>
 #include <net/sctp/sctp.h>
 #include <net/sctp/sm.h>
diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c
index e771690f6d5d..a56f98e82f92 100644
--- a/net/sctp/protocol.c
+++ b/net/sctp/protocol.c
@@ -54,6 +54,7 @@
 #include <linux/bootmem.h>
 #include <linux/highmem.h>
 #include <linux/swap.h>
+#include <linux/slab.h>
 #include <net/net_namespace.h>
 #include <net/protocol.h>
 #include <net/ip.h>
diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c
index 9e732916b671..17cb400ecd6a 100644
--- a/net/sctp/sm_make_chunk.c
+++ b/net/sctp/sm_make_chunk.c
@@ -58,6 +58,7 @@
 #include <linux/inet.h>
 #include <linux/scatterlist.h>
 #include <linux/crypto.h>
+#include <linux/slab.h>
 #include <net/sock.h>
 
 #include <linux/skbuff.h>
diff --git a/net/sctp/sm_sideeffect.c b/net/sctp/sm_sideeffect.c
index 500886bda9b4..4c5bed9af4e3 100644
--- a/net/sctp/sm_sideeffect.c
+++ b/net/sctp/sm_sideeffect.c
@@ -51,6 +51,7 @@
 #include <linux/types.h>
 #include <linux/socket.h>
 #include <linux/ip.h>
+#include <linux/gfp.h>
 #include <net/sock.h>
 #include <net/sctp/sctp.h>
 #include <net/sctp/sm.h>
diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c
index 47bc20d3a85b..abf601a1b847 100644
--- a/net/sctp/sm_statefuns.c
+++ b/net/sctp/sm_statefuns.c
@@ -56,6 +56,7 @@
 #include <linux/ipv6.h>
 #include <linux/net.h>
 #include <linux/inet.h>
+#include <linux/slab.h>
 #include <net/sock.h>
 #include <net/inet_ecn.h>
 #include <linux/skbuff.h>
diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index dfc5c127efd4..007e8baba089 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -67,6 +67,7 @@
 #include <linux/poll.h>
 #include <linux/init.h>
 #include <linux/crypto.h>
+#include <linux/slab.h>
 
 #include <net/ip.h>
 #include <net/icmp.h>
diff --git a/net/sctp/ssnmap.c b/net/sctp/ssnmap.c
index 737d330e5ffc..442ad4ed6315 100644
--- a/net/sctp/ssnmap.c
+++ b/net/sctp/ssnmap.c
@@ -37,6 +37,7 @@
  */
 
 #include <linux/types.h>
+#include <linux/slab.h>
 #include <net/sctp/sctp.h>
 #include <net/sctp/sm.h>
 
diff --git a/net/sctp/transport.c b/net/sctp/transport.c
index b827d21dbe54..be4d63d5a5cc 100644
--- a/net/sctp/transport.c
+++ b/net/sctp/transport.c
@@ -48,6 +48,7 @@
  * be incorporated into the next SCTP release.
  */
 
+#include <linux/slab.h>
 #include <linux/types.h>
 #include <linux/random.h>
 #include <net/sctp/sctp.h>
diff --git a/net/sctp/tsnmap.c b/net/sctp/tsnmap.c
index 9bd64565021a..747d5412c463 100644
--- a/net/sctp/tsnmap.c
+++ b/net/sctp/tsnmap.c
@@ -42,6 +42,7 @@
  * be incorporated into the next SCTP release.
  */
 
+#include <linux/slab.h>
 #include <linux/types.h>
 #include <linux/bitmap.h>
 #include <net/sctp/sctp.h>
diff --git a/net/sctp/ulpevent.c b/net/sctp/ulpevent.c
index 8b3560fd876d..aa72e89c3ee1 100644
--- a/net/sctp/ulpevent.c
+++ b/net/sctp/ulpevent.c
@@ -43,6 +43,7 @@
  * be incorporated into the next SCTP release.
  */
 
+#include <linux/slab.h>
 #include <linux/types.h>
 #include <linux/skbuff.h>
 #include <net/sctp/structs.h>
diff --git a/net/sctp/ulpqueue.c b/net/sctp/ulpqueue.c
index 7b23803343cc..3a448536f0b6 100644
--- a/net/sctp/ulpqueue.c
+++ b/net/sctp/ulpqueue.c
@@ -41,6 +41,7 @@
  * be incorporated into the next SCTP release.
  */
 
+#include <linux/slab.h>
 #include <linux/types.h>
 #include <linux/skbuff.h>
 #include <net/sock.h>
diff --git a/net/socket.c b/net/socket.c
index f55ffe9f8c87..5e8d0af3c0e7 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -87,6 +87,7 @@
 #include <linux/wireless.h>
 #include <linux/nsproxy.h>
 #include <linux/magic.h>
+#include <linux/slab.h>
 
 #include <asm/uaccess.h>
 #include <asm/unistd.h>
diff --git a/net/sunrpc/addr.c b/net/sunrpc/addr.c
index f845d9d72f73..1419d0cdbbac 100644
--- a/net/sunrpc/addr.c
+++ b/net/sunrpc/addr.c
@@ -18,6 +18,7 @@
 
 #include <net/ipv6.h>
 #include <linux/sunrpc/clnt.h>
+#include <linux/slab.h>
 
 #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
 
diff --git a/net/sunrpc/auth_generic.c b/net/sunrpc/auth_generic.c
index bf88bf8e9365..8f623b0f03dd 100644
--- a/net/sunrpc/auth_generic.c
+++ b/net/sunrpc/auth_generic.c
@@ -5,6 +5,7 @@
  */
 
 #include <linux/err.h>
+#include <linux/slab.h>
 #include <linux/types.h>
 #include <linux/module.h>
 #include <linux/sched.h>
diff --git a/net/sunrpc/auth_gss/gss_generic_token.c b/net/sunrpc/auth_gss/gss_generic_token.c
index c0ba39c4f5f2..310b78e99456 100644
--- a/net/sunrpc/auth_gss/gss_generic_token.c
+++ b/net/sunrpc/auth_gss/gss_generic_token.c
@@ -33,7 +33,6 @@
 
 #include <linux/types.h>
 #include <linux/module.h>
-#include <linux/slab.h>
 #include <linux/string.h>
 #include <linux/sunrpc/sched.h>
 #include <linux/sunrpc/gss_asn1.h>
diff --git a/net/sunrpc/auth_gss/gss_krb5_crypto.c b/net/sunrpc/auth_gss/gss_krb5_crypto.c
index c93fca204558..e9b636176687 100644
--- a/net/sunrpc/auth_gss/gss_krb5_crypto.c
+++ b/net/sunrpc/auth_gss/gss_krb5_crypto.c
@@ -37,7 +37,6 @@
 #include <linux/err.h>
 #include <linux/types.h>
 #include <linux/mm.h>
-#include <linux/slab.h>
 #include <linux/scatterlist.h>
 #include <linux/crypto.h>
 #include <linux/highmem.h>
diff --git a/net/sunrpc/auth_gss/gss_krb5_seal.c b/net/sunrpc/auth_gss/gss_krb5_seal.c
index b8f42ef7178e..88fe6e75ed7e 100644
--- a/net/sunrpc/auth_gss/gss_krb5_seal.c
+++ b/net/sunrpc/auth_gss/gss_krb5_seal.c
@@ -59,7 +59,6 @@
  */
 
 #include <linux/types.h>
-#include <linux/slab.h>
 #include <linux/jiffies.h>
 #include <linux/sunrpc/gss_krb5.h>
 #include <linux/random.h>
diff --git a/net/sunrpc/auth_gss/gss_krb5_seqnum.c b/net/sunrpc/auth_gss/gss_krb5_seqnum.c
index 17562b4c35f6..6331cd6866ec 100644
--- a/net/sunrpc/auth_gss/gss_krb5_seqnum.c
+++ b/net/sunrpc/auth_gss/gss_krb5_seqnum.c
@@ -32,7 +32,6 @@
  */
 
 #include <linux/types.h>
-#include <linux/slab.h>
 #include <linux/sunrpc/gss_krb5.h>
 #include <linux/crypto.h>
 
diff --git a/net/sunrpc/auth_gss/gss_krb5_unseal.c b/net/sunrpc/auth_gss/gss_krb5_unseal.c
index 066ec73c84d6..ce6c247edad0 100644
--- a/net/sunrpc/auth_gss/gss_krb5_unseal.c
+++ b/net/sunrpc/auth_gss/gss_krb5_unseal.c
@@ -58,7 +58,6 @@
  */
 
 #include <linux/types.h>
-#include <linux/slab.h>
 #include <linux/jiffies.h>
 #include <linux/sunrpc/gss_krb5.h>
 #include <linux/crypto.h>
diff --git a/net/sunrpc/auth_gss/gss_krb5_wrap.c b/net/sunrpc/auth_gss/gss_krb5_wrap.c
index ae8e69b59c4c..a6e905637e03 100644
--- a/net/sunrpc/auth_gss/gss_krb5_wrap.c
+++ b/net/sunrpc/auth_gss/gss_krb5_wrap.c
@@ -1,5 +1,4 @@
 #include <linux/types.h>
-#include <linux/slab.h>
 #include <linux/jiffies.h>
 #include <linux/sunrpc/gss_krb5.h>
 #include <linux/random.h>
diff --git a/net/sunrpc/auth_gss/gss_spkm3_seal.c b/net/sunrpc/auth_gss/gss_spkm3_seal.c
index c832712f8d55..5a3a65a0e2b4 100644
--- a/net/sunrpc/auth_gss/gss_spkm3_seal.c
+++ b/net/sunrpc/auth_gss/gss_spkm3_seal.c
@@ -34,7 +34,6 @@
  */
 
 #include <linux/types.h>
-#include <linux/slab.h>
 #include <linux/jiffies.h>
 #include <linux/sunrpc/gss_spkm3.h>
 #include <linux/random.h>
diff --git a/net/sunrpc/auth_gss/svcauth_gss.c b/net/sunrpc/auth_gss/svcauth_gss.c
index e34bc531fcb9..b81e790ef9f4 100644
--- a/net/sunrpc/auth_gss/svcauth_gss.c
+++ b/net/sunrpc/auth_gss/svcauth_gss.c
@@ -37,6 +37,7 @@
  *
  */
 
+#include <linux/slab.h>
 #include <linux/types.h>
 #include <linux/module.h>
 #include <linux/pagemap.h>
diff --git a/net/sunrpc/auth_unix.c b/net/sunrpc/auth_unix.c
index 46b2647c5bd2..aac2f8b4ee21 100644
--- a/net/sunrpc/auth_unix.c
+++ b/net/sunrpc/auth_unix.c
@@ -6,6 +6,7 @@
  * Copyright (C) 1996, Olaf Kirch <okir@monad.swb.de>
  */
 
+#include <linux/slab.h>
 #include <linux/types.h>
 #include <linux/sched.h>
 #include <linux/module.h>
diff --git a/net/sunrpc/backchannel_rqst.c b/net/sunrpc/backchannel_rqst.c
index 553621fb2c41..cf06af3b63c6 100644
--- a/net/sunrpc/backchannel_rqst.c
+++ b/net/sunrpc/backchannel_rqst.c
@@ -22,6 +22,7 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 ******************************************************************************/
 
 #include <linux/tcp.h>
+#include <linux/slab.h>
 #include <linux/sunrpc/xprt.h>
 
 #ifdef RPC_DEBUG
diff --git a/net/sunrpc/rpcb_clnt.c b/net/sunrpc/rpcb_clnt.c
index 3e3772d8eb92..121105355f60 100644
--- a/net/sunrpc/rpcb_clnt.c
+++ b/net/sunrpc/rpcb_clnt.c
@@ -21,6 +21,7 @@
 #include <linux/kernel.h>
 #include <linux/errno.h>
 #include <linux/mutex.h>
+#include <linux/slab.h>
 #include <net/ipv6.h>
 
 #include <linux/sunrpc/clnt.h>
diff --git a/net/sunrpc/socklib.c b/net/sunrpc/socklib.c
index a661a3acb37e..10b4319ebbca 100644
--- a/net/sunrpc/socklib.c
+++ b/net/sunrpc/socklib.c
@@ -8,6 +8,7 @@
 
 #include <linux/compiler.h>
 #include <linux/netdevice.h>
+#include <linux/gfp.h>
 #include <linux/skbuff.h>
 #include <linux/types.h>
 #include <linux/pagemap.h>
diff --git a/net/sunrpc/stats.c b/net/sunrpc/stats.c
index 1b4e6791ecf3..5785d2037f45 100644
--- a/net/sunrpc/stats.c
+++ b/net/sunrpc/stats.c
@@ -13,6 +13,7 @@
  */
 
 #include <linux/module.h>
+#include <linux/slab.h>
 
 #include <linux/init.h>
 #include <linux/kernel.h>
diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c
index 8420a4205b76..d9017d64597e 100644
--- a/net/sunrpc/svc.c
+++ b/net/sunrpc/svc.c
@@ -19,6 +19,7 @@
 #include <linux/interrupt.h>
 #include <linux/module.h>
 #include <linux/kthread.h>
+#include <linux/slab.h>
 
 #include <linux/sunrpc/types.h>
 #include <linux/sunrpc/xdr.h>
diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c
index 8f0f1fb3dc52..061b2e0f9118 100644
--- a/net/sunrpc/svc_xprt.c
+++ b/net/sunrpc/svc_xprt.c
@@ -9,6 +9,7 @@
 #include <linux/errno.h>
 #include <linux/freezer.h>
 #include <linux/kthread.h>
+#include <linux/slab.h>
 #include <net/sock.h>
 #include <linux/sunrpc/stats.h>
 #include <linux/sunrpc/svc_xprt.h>
diff --git a/net/sunrpc/svcauth_unix.c b/net/sunrpc/svcauth_unix.c
index afdcb0459a83..207311610988 100644
--- a/net/sunrpc/svcauth_unix.c
+++ b/net/sunrpc/svcauth_unix.c
@@ -10,6 +10,7 @@
 #include <linux/seq_file.h>
 #include <linux/hash.h>
 #include <linux/string.h>
+#include <linux/slab.h>
 #include <net/sock.h>
 #include <net/ipv6.h>
 #include <linux/kernel.h>
diff --git a/net/sunrpc/xdr.c b/net/sunrpc/xdr.c
index 8bd690c48b69..2763fde88499 100644
--- a/net/sunrpc/xdr.c
+++ b/net/sunrpc/xdr.c
@@ -7,6 +7,7 @@
  */
 
 #include <linux/module.h>
+#include <linux/slab.h>
 #include <linux/types.h>
 #include <linux/string.h>
 #include <linux/kernel.h>
diff --git a/net/sunrpc/xprtrdma/svc_rdma.c b/net/sunrpc/xprtrdma/svc_rdma.c
index 5b8a8ff93a25..d718b8fa9525 100644
--- a/net/sunrpc/xprtrdma/svc_rdma.c
+++ b/net/sunrpc/xprtrdma/svc_rdma.c
@@ -40,6 +40,7 @@
  */
 #include <linux/module.h>
 #include <linux/init.h>
+#include <linux/slab.h>
 #include <linux/fs.h>
 #include <linux/sysctl.h>
 #include <linux/sunrpc/clnt.h>
diff --git a/net/sunrpc/xprtrdma/svc_rdma_transport.c b/net/sunrpc/xprtrdma/svc_rdma_transport.c
index 3fa5751af0ec..fd90eb89842b 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_transport.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_transport.c
@@ -43,6 +43,7 @@
 #include <linux/sunrpc/debug.h>
 #include <linux/sunrpc/rpc_rdma.h>
 #include <linux/sched.h>
+#include <linux/slab.h>
 #include <linux/spinlock.h>
 #include <rdma/ib_verbs.h>
 #include <rdma/rdma_cm.h>
diff --git a/net/sunrpc/xprtrdma/transport.c b/net/sunrpc/xprtrdma/transport.c
index f96c2fe6137b..187257b1d880 100644
--- a/net/sunrpc/xprtrdma/transport.c
+++ b/net/sunrpc/xprtrdma/transport.c
@@ -49,6 +49,7 @@
 
 #include <linux/module.h>
 #include <linux/init.h>
+#include <linux/slab.h>
 #include <linux/seq_file.h>
 
 #include "xprt_rdma.h"
diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c
index 2209aa87d899..27015c6d8eb5 100644
--- a/net/sunrpc/xprtrdma/verbs.c
+++ b/net/sunrpc/xprtrdma/verbs.c
@@ -48,6 +48,7 @@
  */
 
 #include <linux/pci.h>	/* for Tavor hack below */
+#include <linux/slab.h>
 
 #include "xprt_rdma.h"
 
diff --git a/net/tipc/core.h b/net/tipc/core.h
index a881f92a8537..c58a1d16563a 100644
--- a/net/tipc/core.h
+++ b/net/tipc/core.h
@@ -56,6 +56,7 @@
 #include <linux/netdevice.h>
 #include <linux/in.h>
 #include <linux/list.h>
+#include <linux/slab.h>
 #include <linux/vmalloc.h>
 
 /*
diff --git a/net/tipc/eth_media.c b/net/tipc/eth_media.c
index 524ba5696d4d..6230d16020c4 100644
--- a/net/tipc/eth_media.c
+++ b/net/tipc/eth_media.c
@@ -38,6 +38,7 @@
 #include <net/tipc/tipc_bearer.h>
 #include <net/tipc/tipc_msg.h>
 #include <linux/netdevice.h>
+#include <linux/slab.h>
 #include <net/net_namespace.h>
 
 #define MAX_ETH_BEARERS		2
diff --git a/net/tipc/socket.c b/net/tipc/socket.c
index 4b235fc1c70f..cfb20b80b3a1 100644
--- a/net/tipc/socket.c
+++ b/net/tipc/socket.c
@@ -40,9 +40,9 @@
 #include <linux/socket.h>
 #include <linux/errno.h>
 #include <linux/mm.h>
-#include <linux/slab.h>
 #include <linux/poll.h>
 #include <linux/fcntl.h>
+#include <linux/gfp.h>
 #include <asm/string.h>
 #include <asm/atomic.h>
 #include <net/sock.h>
diff --git a/net/unix/garbage.c b/net/unix/garbage.c
index 19c17e4a0c8b..14c22c3768da 100644
--- a/net/unix/garbage.c
+++ b/net/unix/garbage.c
@@ -74,7 +74,6 @@
 #include <linux/un.h>
 #include <linux/net.h>
 #include <linux/fs.h>
-#include <linux/slab.h>
 #include <linux/skbuff.h>
 #include <linux/netdevice.h>
 #include <linux/file.h>
diff --git a/net/unix/sysctl_net_unix.c b/net/unix/sysctl_net_unix.c
index d095c7be10d0..397cffebb3b6 100644
--- a/net/unix/sysctl_net_unix.c
+++ b/net/unix/sysctl_net_unix.c
@@ -10,6 +10,7 @@
  */
 
 #include <linux/mm.h>
+#include <linux/slab.h>
 #include <linux/sysctl.h>
 
 #include <net/af_unix.h>
diff --git a/net/wimax/op-msg.c b/net/wimax/op-msg.c
index 7718657e93dc..d5b7c3779c43 100644
--- a/net/wimax/op-msg.c
+++ b/net/wimax/op-msg.c
@@ -72,6 +72,7 @@
  *   wimax_msg_send()
  */
 #include <linux/device.h>
+#include <linux/slab.h>
 #include <net/genetlink.h>
 #include <linux/netdevice.h>
 #include <linux/wimax.h>
diff --git a/net/wimax/stack.c b/net/wimax/stack.c
index 813e1eaea29b..1ed65dbdab03 100644
--- a/net/wimax/stack.c
+++ b/net/wimax/stack.c
@@ -51,6 +51,7 @@
  *   wimax_rfkill_rm()
  */
 #include <linux/device.h>
+#include <linux/gfp.h>
 #include <net/genetlink.h>
 #include <linux/netdevice.h>
 #include <linux/wimax.h>
diff --git a/net/wireless/core.c b/net/wireless/core.c
index 7fdb9409ad2a..6ac70c101523 100644
--- a/net/wireless/core.c
+++ b/net/wireless/core.c
@@ -8,6 +8,7 @@
 #include <linux/module.h>
 #include <linux/err.h>
 #include <linux/list.h>
+#include <linux/slab.h>
 #include <linux/nl80211.h>
 #include <linux/debugfs.h>
 #include <linux/notifier.h>
diff --git a/net/wireless/debugfs.c b/net/wireless/debugfs.c
index 2e4895615037..a4991a3efec0 100644
--- a/net/wireless/debugfs.c
+++ b/net/wireless/debugfs.c
@@ -9,6 +9,7 @@
  * published by the Free Software Foundation.
  */
 
+#include <linux/slab.h>
 #include "core.h"
 #include "debugfs.h"
 
diff --git a/net/wireless/ibss.c b/net/wireless/ibss.c
index 6ef5a491fb4b..6a5acf750174 100644
--- a/net/wireless/ibss.c
+++ b/net/wireless/ibss.c
@@ -6,6 +6,7 @@
 
 #include <linux/etherdevice.h>
 #include <linux/if_arp.h>
+#include <linux/slab.h>
 #include <net/cfg80211.h>
 #include "wext-compat.h"
 #include "nl80211.h"
diff --git a/net/wireless/mlme.c b/net/wireless/mlme.c
index 62bc8855e123..22139fa46115 100644
--- a/net/wireless/mlme.c
+++ b/net/wireless/mlme.c
@@ -8,6 +8,7 @@
 #include <linux/module.h>
 #include <linux/netdevice.h>
 #include <linux/nl80211.h>
+#include <linux/slab.h>
 #include <linux/wireless.h>
 #include <net/cfg80211.h>
 #include <net/iw_handler.h>
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index e447db04cf76..030cf153bea2 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -7,6 +7,7 @@
 #include <linux/if.h>
 #include <linux/module.h>
 #include <linux/err.h>
+#include <linux/slab.h>
 #include <linux/list.h>
 #include <linux/if_ether.h>
 #include <linux/ieee80211.h>
diff --git a/net/wireless/reg.c b/net/wireless/reg.c
index ed89c59bb431..b7604b823f46 100644
--- a/net/wireless/reg.c
+++ b/net/wireless/reg.c
@@ -33,6 +33,7 @@
  *
  */
 #include <linux/kernel.h>
+#include <linux/slab.h>
 #include <linux/list.h>
 #include <linux/random.h>
 #include <linux/nl80211.h>
diff --git a/net/wireless/scan.c b/net/wireless/scan.c
index 978cac3414b5..a026c6d56bd3 100644
--- a/net/wireless/scan.c
+++ b/net/wireless/scan.c
@@ -4,6 +4,7 @@
  * Copyright 2008 Johannes Berg <johannes@sipsolutions.net>
  */
 #include <linux/kernel.h>
+#include <linux/slab.h>
 #include <linux/module.h>
 #include <linux/netdevice.h>
 #include <linux/wireless.h>
diff --git a/net/wireless/sme.c b/net/wireless/sme.c
index 17fde0da1b08..f4dfd5f5f2ea 100644
--- a/net/wireless/sme.c
+++ b/net/wireless/sme.c
@@ -7,6 +7,7 @@
 
 #include <linux/etherdevice.h>
 #include <linux/if_arp.h>
+#include <linux/slab.h>
 #include <linux/workqueue.h>
 #include <linux/wireless.h>
 #include <net/iw_handler.h>
diff --git a/net/wireless/util.c b/net/wireless/util.c
index be2ab8c59e3a..d3574a4eb3ba 100644
--- a/net/wireless/util.c
+++ b/net/wireless/util.c
@@ -5,6 +5,7 @@
  */
 #include <linux/bitops.h>
 #include <linux/etherdevice.h>
+#include <linux/slab.h>
 #include <net/cfg80211.h>
 #include <net/ip.h>
 #include "core.h"
diff --git a/net/wireless/wext-compat.c b/net/wireless/wext-compat.c
index 9ab51838849e..a60a2773b497 100644
--- a/net/wireless/wext-compat.c
+++ b/net/wireless/wext-compat.c
@@ -12,6 +12,7 @@
 #include <linux/nl80211.h>
 #include <linux/if_arp.h>
 #include <linux/etherdevice.h>
+#include <linux/slab.h>
 #include <net/iw_handler.h>
 #include <net/cfg80211.h>
 #include "wext-compat.h"
diff --git a/net/wireless/wext-core.c b/net/wireless/wext-core.c
index 5e1656bdf23b..4f5a47091fde 100644
--- a/net/wireless/wext-core.c
+++ b/net/wireless/wext-core.c
@@ -10,6 +10,7 @@
 #include <linux/kernel.h>
 #include <linux/netdevice.h>
 #include <linux/rtnetlink.h>
+#include <linux/slab.h>
 #include <linux/wireless.h>
 #include <linux/uaccess.h>
 #include <net/cfg80211.h>
diff --git a/net/wireless/wext-priv.c b/net/wireless/wext-priv.c
index a3c2277de9e5..3feb28e41c53 100644
--- a/net/wireless/wext-priv.c
+++ b/net/wireless/wext-priv.c
@@ -7,6 +7,7 @@
  *
  * (As all part of the Linux kernel, this file is GPL)
  */
+#include <linux/slab.h>
 #include <linux/wireless.h>
 #include <linux/netdevice.h>
 #include <net/iw_handler.h>
diff --git a/net/wireless/wext-sme.c b/net/wireless/wext-sme.c
index 5615a8802536..d5c6140f4cb8 100644
--- a/net/wireless/wext-sme.c
+++ b/net/wireless/wext-sme.c
@@ -7,6 +7,7 @@
 
 #include <linux/etherdevice.h>
 #include <linux/if_arp.h>
+#include <linux/slab.h>
 #include <net/cfg80211.h>
 #include "wext-compat.h"
 #include "nl80211.h"
diff --git a/net/x25/af_x25.c b/net/x25/af_x25.c
index 9796f3ed1edb..e56f711baccc 100644
--- a/net/x25/af_x25.c
+++ b/net/x25/af_x25.c
@@ -47,6 +47,7 @@
 #include <linux/netdevice.h>
 #include <linux/if_arp.h>
 #include <linux/skbuff.h>
+#include <linux/slab.h>
 #include <net/sock.h>
 #include <net/tcp_states.h>
 #include <asm/uaccess.h>
diff --git a/net/x25/x25_dev.c b/net/x25/x25_dev.c
index 52e304212241..b9ef682230a0 100644
--- a/net/x25/x25_dev.c
+++ b/net/x25/x25_dev.c
@@ -20,6 +20,7 @@
 #include <linux/kernel.h>
 #include <linux/netdevice.h>
 #include <linux/skbuff.h>
+#include <linux/slab.h>
 #include <net/sock.h>
 #include <linux/if_arp.h>
 #include <net/x25.h>
diff --git a/net/x25/x25_forward.c b/net/x25/x25_forward.c
index 056a55f3a871..25a810793968 100644
--- a/net/x25/x25_forward.c
+++ b/net/x25/x25_forward.c
@@ -10,6 +10,7 @@
  */
 #include <linux/if_arp.h>
 #include <linux/init.h>
+#include <linux/slab.h>
 #include <net/x25.h>
 
 LIST_HEAD(x25_forward_list);
diff --git a/net/x25/x25_in.c b/net/x25/x25_in.c
index 96d922783547..a31b3b9e5966 100644
--- a/net/x25/x25_in.c
+++ b/net/x25/x25_in.c
@@ -23,6 +23,7 @@
  *					  i-frames.
  */
 
+#include <linux/slab.h>
 #include <linux/errno.h>
 #include <linux/kernel.h>
 #include <linux/string.h>
diff --git a/net/x25/x25_link.c b/net/x25/x25_link.c
index e4e1b6e49538..73e7b954ad28 100644
--- a/net/x25/x25_link.c
+++ b/net/x25/x25_link.c
@@ -24,6 +24,7 @@
 #include <linux/kernel.h>
 #include <linux/jiffies.h>
 #include <linux/timer.h>
+#include <linux/slab.h>
 #include <linux/netdevice.h>
 #include <linux/skbuff.h>
 #include <asm/uaccess.h>
diff --git a/net/x25/x25_out.c b/net/x25/x25_out.c
index 2b96b52114d6..52351a26b6fc 100644
--- a/net/x25/x25_out.c
+++ b/net/x25/x25_out.c
@@ -22,6 +22,7 @@
  *					needed cleaned seq-number fields.
  */
 
+#include <linux/slab.h>
 #include <linux/socket.h>
 #include <linux/kernel.h>
 #include <linux/string.h>
diff --git a/net/x25/x25_route.c b/net/x25/x25_route.c
index b95fae9ab393..97d77c532d8c 100644
--- a/net/x25/x25_route.c
+++ b/net/x25/x25_route.c
@@ -19,6 +19,7 @@
 
 #include <linux/if_arp.h>
 #include <linux/init.h>
+#include <linux/slab.h>
 #include <net/x25.h>
 
 LIST_HEAD(x25_route_list);
diff --git a/net/x25/x25_subr.c b/net/x25/x25_subr.c
index 352b32d216fc..dc20cf12f39b 100644
--- a/net/x25/x25_subr.c
+++ b/net/x25/x25_subr.c
@@ -23,6 +23,7 @@
  *						restriction on response.
  */
 
+#include <linux/slab.h>
 #include <linux/kernel.h>
 #include <linux/string.h>
 #include <linux/skbuff.h>
diff --git a/net/xfrm/xfrm_ipcomp.c b/net/xfrm/xfrm_ipcomp.c
index 0fc5ff66d1fa..fc91ad7ee26e 100644
--- a/net/xfrm/xfrm_ipcomp.c
+++ b/net/xfrm/xfrm_ipcomp.c
@@ -17,11 +17,11 @@
 
 #include <linux/crypto.h>
 #include <linux/err.h>
-#include <linux/gfp.h>
 #include <linux/list.h>
 #include <linux/module.h>
 #include <linux/mutex.h>
 #include <linux/percpu.h>
+#include <linux/slab.h>
 #include <linux/smp.h>
 #include <linux/vmalloc.h>
 #include <net/ip.h>
diff --git a/net/xfrm/xfrm_output.c b/net/xfrm/xfrm_output.c
index b9fe13138c07..6a329158bdfa 100644
--- a/net/xfrm/xfrm_output.c
+++ b/net/xfrm/xfrm_output.c
@@ -14,6 +14,7 @@
 #include <linux/netdevice.h>
 #include <linux/netfilter.h>
 #include <linux/skbuff.h>
+#include <linux/slab.h>
 #include <linux/spinlock.h>
 #include <net/dst.h>
 #include <net/xfrm.h>
diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c
index 17d5b96f2fc8..add77ecb8ac4 100644
--- a/net/xfrm/xfrm_state.c
+++ b/net/xfrm/xfrm_state.c
@@ -22,6 +22,7 @@
 #include <linux/audit.h>
 #include <asm/uaccess.h>
 #include <linux/ktime.h>
+#include <linux/slab.h>
 #include <linux/interrupt.h>
 #include <linux/kernel.h>
 
diff --git a/net/xfrm/xfrm_sysctl.c b/net/xfrm/xfrm_sysctl.c
index 2c4d6cdcba49..05640bc9594b 100644
--- a/net/xfrm/xfrm_sysctl.c
+++ b/net/xfrm/xfrm_sysctl.c
@@ -1,4 +1,5 @@
 #include <linux/sysctl.h>
+#include <linux/slab.h>
 #include <net/net_namespace.h>
 #include <net/xfrm.h>
 
-- 
cgit v1.2.2


From 368d06f5b0eefcbf37d677d3b65381310a251f03 Mon Sep 17 00:00:00 2001
From: "John W. Linville" <linville@tuxdriver.com>
Date: Tue, 16 Mar 2010 15:40:59 -0400
Subject: wireless: convert reg_regdb_search_lock to mutex

Stanse discovered that kmalloc is being called with GFP_KERNEL while
holding this spinlock.  The spinlock can be a mutex instead, which also
enables the removal of the unlock/lock around the lock/unlock of
cfg80211_mutex and the call to set_regdom.

Reported-by: Jiri Slaby <jirislaby@gmail.com>
Cc: stable@kernel.org
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/wireless/reg.c | 12 +++++-------
 1 file changed, 5 insertions(+), 7 deletions(-)

(limited to 'net')

diff --git a/net/wireless/reg.c b/net/wireless/reg.c
index ed89c59bb431..81fcafc60150 100644
--- a/net/wireless/reg.c
+++ b/net/wireless/reg.c
@@ -324,7 +324,7 @@ struct reg_regdb_search_request {
 };
 
 static LIST_HEAD(reg_regdb_search_list);
-static DEFINE_SPINLOCK(reg_regdb_search_lock);
+static DEFINE_MUTEX(reg_regdb_search_mutex);
 
 static void reg_regdb_search(struct work_struct *work)
 {
@@ -332,7 +332,7 @@ static void reg_regdb_search(struct work_struct *work)
 	const struct ieee80211_regdomain *curdom, *regdom;
 	int i, r;
 
-	spin_lock(&reg_regdb_search_lock);
+	mutex_lock(&reg_regdb_search_mutex);
 	while (!list_empty(&reg_regdb_search_list)) {
 		request = list_first_entry(&reg_regdb_search_list,
 					   struct reg_regdb_search_request,
@@ -346,18 +346,16 @@ static void reg_regdb_search(struct work_struct *work)
 				r = reg_copy_regd(&regdom, curdom);
 				if (r)
 					break;
-				spin_unlock(&reg_regdb_search_lock);
 				mutex_lock(&cfg80211_mutex);
 				set_regdom(regdom);
 				mutex_unlock(&cfg80211_mutex);
-				spin_lock(&reg_regdb_search_lock);
 				break;
 			}
 		}
 
 		kfree(request);
 	}
-	spin_unlock(&reg_regdb_search_lock);
+	mutex_unlock(&reg_regdb_search_mutex);
 }
 
 static DECLARE_WORK(reg_regdb_work, reg_regdb_search);
@@ -375,9 +373,9 @@ static void reg_regdb_query(const char *alpha2)
 
 	memcpy(request->alpha2, alpha2, 2);
 
-	spin_lock(&reg_regdb_search_lock);
+	mutex_lock(&reg_regdb_search_mutex);
 	list_add_tail(&request->list, &reg_regdb_search_list);
-	spin_unlock(&reg_regdb_search_lock);
+	mutex_unlock(&reg_regdb_search_mutex);
 
 	schedule_work(&reg_regdb_work);
 }
-- 
cgit v1.2.2


From c7a00dc73b7185ab2ebd1aa7ce710c7b4edc77a4 Mon Sep 17 00:00:00 2001
From: "John W. Linville" <linville@tuxdriver.com>
Date: Wed, 17 Mar 2010 11:28:18 -0400
Subject: mac80211: correct typos in "unavailable upon resume" warning

Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/util.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/util.c b/net/mac80211/util.c
index c453226f06b2..e6b1fba980c6 100644
--- a/net/mac80211/util.c
+++ b/net/mac80211/util.c
@@ -1097,9 +1097,9 @@ int ieee80211_reconfig(struct ieee80211_local *local)
 		 */
 		res = drv_start(local);
 		if (res) {
-			WARN(local->suspended, "Harware became unavailable "
-			     "upon resume. This is could be a software issue"
-			     "prior to suspend or a hardware issue\n");
+			WARN(local->suspended, "Hardware became unavailable "
+			     "upon resume. This could be a software issue "
+			     "prior to suspend or a hardware issue.\n");
 			return res;
 		}
 
-- 
cgit v1.2.2


From 533866b12cce484994163b1e201778cbac4c04c5 Mon Sep 17 00:00:00 2001
From: "Porsch, Marco" <marco.porsch@siemens.com>
Date: Wed, 24 Feb 2010 09:53:13 +0100
Subject: mac80211: fix PREQ processing and one small bug

1st) a PREQ should only be processed, if it has the same SN and better
metric (instead of better or equal).
2nd) next_hop[ETH_ALEN] now actually used to buffer
mpath->next_hop->sta.addr for use out of lock.

Signed-off-by: Marco Porsch <marco.porsch@siemens.com>
Acked-by: Javier Cardona <javier@cozybit.com>
Cc: stable@kernel.org
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/mesh_hwmp.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/mesh_hwmp.c b/net/mac80211/mesh_hwmp.c
index ce84237ebad3..ccff6133e19a 100644
--- a/net/mac80211/mesh_hwmp.c
+++ b/net/mac80211/mesh_hwmp.c
@@ -391,7 +391,7 @@ static u32 hwmp_route_info_get(struct ieee80211_sub_if_data *sdata,
 				if (SN_GT(mpath->sn, orig_sn) ||
 				    (mpath->sn == orig_sn &&
 				     action == MPATH_PREQ &&
-				     new_metric > mpath->metric)) {
+				     new_metric >= mpath->metric)) {
 					process = false;
 					fresh_info = false;
 				}
@@ -611,7 +611,7 @@ static void hwmp_prep_frame_process(struct ieee80211_sub_if_data *sdata,
 
 	mesh_path_sel_frame_tx(MPATH_PREP, flags, orig_addr,
 		cpu_to_le32(orig_sn), 0, target_addr,
-		cpu_to_le32(target_sn), mpath->next_hop->sta.addr, hopcount,
+		cpu_to_le32(target_sn), next_hop, hopcount,
 		ttl, cpu_to_le32(lifetime), cpu_to_le32(metric),
 		0, sdata);
 	rcu_read_unlock();
-- 
cgit v1.2.2


From 7236fe29fd72d17074574ba312e7f1bb9d10abaa Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Mon, 22 Mar 2010 13:42:43 -0700
Subject: mac80211: move netdev queue enabling to correct spot

"mac80211: fix skb buffering issue" still left a race
between enabling the hardware queues and the virtual
interface queues. In hindsight it's totally obvious
that enabling the netdev queues for a hardware queue
when the hardware queue is enabled is wrong, because
it could well possible that we can fill the hw queue
with packets we already have pending. Thus, we must
only enable the netdev queues once all the pending
packets have been processed and sent off to the device.

In testing, I haven't been able to trigger this race
condition, but it's clearly there, possibly only when
aggregation is being enabled.

Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Cc: stable@kernel.org
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/tx.c   |  6 ++++++
 net/mac80211/util.c | 12 ++++++------
 2 files changed, 12 insertions(+), 6 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
index cbe53ed4fb0b..cfc473e1b050 100644
--- a/net/mac80211/tx.c
+++ b/net/mac80211/tx.c
@@ -1991,6 +1991,7 @@ static bool ieee80211_tx_pending_skb(struct ieee80211_local *local,
 void ieee80211_tx_pending(unsigned long data)
 {
 	struct ieee80211_local *local = (struct ieee80211_local *)data;
+	struct ieee80211_sub_if_data *sdata;
 	unsigned long flags;
 	int i;
 	bool txok;
@@ -2029,6 +2030,11 @@ void ieee80211_tx_pending(unsigned long data)
 			if (!txok)
 				break;
 		}
+
+		if (skb_queue_empty(&local->pending[i]))
+			list_for_each_entry_rcu(sdata, &local->interfaces, list)
+				netif_tx_wake_queue(
+					netdev_get_tx_queue(sdata->dev, i));
 	}
 	spin_unlock_irqrestore(&local->queue_stop_reason_lock, flags);
 
diff --git a/net/mac80211/util.c b/net/mac80211/util.c
index e6b1fba980c6..53af57047435 100644
--- a/net/mac80211/util.c
+++ b/net/mac80211/util.c
@@ -279,13 +279,13 @@ static void __ieee80211_wake_queue(struct ieee80211_hw *hw, int queue,
 		/* someone still has this queue stopped */
 		return;
 
-	if (!skb_queue_empty(&local->pending[queue]))
+	if (skb_queue_empty(&local->pending[queue])) {
+		rcu_read_lock();
+		list_for_each_entry_rcu(sdata, &local->interfaces, list)
+			netif_tx_wake_queue(netdev_get_tx_queue(sdata->dev, queue));
+		rcu_read_unlock();
+	} else
 		tasklet_schedule(&local->tx_pending_tasklet);
-
-	rcu_read_lock();
-	list_for_each_entry_rcu(sdata, &local->interfaces, list)
-		netif_tx_wake_queue(netdev_get_tx_queue(sdata->dev, queue));
-	rcu_read_unlock();
 }
 
 void ieee80211_wake_queue_by_reason(struct ieee80211_hw *hw, int queue,
-- 
cgit v1.2.2


From baff42ab1494528907bf4d5870359e31711746ae Mon Sep 17 00:00:00 2001
From: "Steven J. Magnani" <steve@digidescorp.com>
Date: Tue, 30 Mar 2010 13:56:01 -0700
Subject: net: Fix oops from tcp_collapse() when using splice()

tcp_read_sock() can have a eat skbs without immediately advancing copied_seq.
This can cause a panic in tcp_collapse() if it is called as a result
of the recv_actor dropping the socket lock.

A userspace program that splices data from a socket to either another
socket or to a file can trigger this bug.

Signed-off-by: Steven J. Magnani <steve@digidescorp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'net')

diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 6afb6d8662b2..2c75f891914e 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -1368,6 +1368,7 @@ int tcp_read_sock(struct sock *sk, read_descriptor_t *desc,
 		sk_eat_skb(sk, skb, 0);
 		if (!desc->count)
 			break;
+		tp->copied_seq = seq;
 	}
 	tp->copied_seq = seq;
 
-- 
cgit v1.2.2


From b482cd2053e3b90a7b33a78c63cdb6badf2ec383 Mon Sep 17 00:00:00 2001
From: Sjur Braendeland <sjur.brandeland@stericsson.com>
Date: Tue, 30 Mar 2010 13:56:23 +0000
Subject: net-caif: add CAIF core protocol stack

CAIF generic protocol implementation. This layer is
somewhat generic in order to be able to use and test it outside
the Linux Kernel.

cfctrl.c     - CAIF control protocol layer
cfdbgl.c     - CAIF debug protocol layer
cfdgml.c     - CAIF datagram protocol layer
cffrml.c     - CAIF framing protocol layer
cfmuxl.c     - CAIF mux protocol layer
cfrfml.c     - CAIF remote file manager protocol layer
cfserl.c     - CAIF serial (fragmentation) protocol layer
cfsrvl.c     - CAIF generic service layer functions
cfutill.c    - CAIF utility protocol layer
cfveil.c     - CAIF AT protocol layer
cfvidl.c     - CAIF video protocol layer

Signed-off-by: Sjur Braendeland <sjur.brandeland@stericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/caif/cfctrl.c  | 664 +++++++++++++++++++++++++++++++++++++++++++++++++++++
 net/caif/cfdbgl.c  |  40 ++++
 net/caif/cfdgml.c  | 108 +++++++++
 net/caif/cffrml.c  | 151 ++++++++++++
 net/caif/cfmuxl.c  | 246 ++++++++++++++++++++
 net/caif/cfrfml.c  | 108 +++++++++
 net/caif/cfserl.c  | 192 ++++++++++++++++
 net/caif/cfsrvl.c  | 185 +++++++++++++++
 net/caif/cfutill.c | 115 ++++++++++
 net/caif/cfveil.c  | 107 +++++++++
 net/caif/cfvidl.c  |  65 ++++++
 11 files changed, 1981 insertions(+)
 create mode 100644 net/caif/cfctrl.c
 create mode 100644 net/caif/cfdbgl.c
 create mode 100644 net/caif/cfdgml.c
 create mode 100644 net/caif/cffrml.c
 create mode 100644 net/caif/cfmuxl.c
 create mode 100644 net/caif/cfrfml.c
 create mode 100644 net/caif/cfserl.c
 create mode 100644 net/caif/cfsrvl.c
 create mode 100644 net/caif/cfutill.c
 create mode 100644 net/caif/cfveil.c
 create mode 100644 net/caif/cfvidl.c

(limited to 'net')

diff --git a/net/caif/cfctrl.c b/net/caif/cfctrl.c
new file mode 100644
index 000000000000..11f80140f3cb
--- /dev/null
+++ b/net/caif/cfctrl.c
@@ -0,0 +1,664 @@
+/*
+ * Copyright (C) ST-Ericsson AB 2010
+ * Author:	Sjur Brendeland/sjur.brandeland@stericsson.com
+ * License terms: GNU General Public License (GPL) version 2
+ */
+
+#include <linux/stddef.h>
+#include <linux/spinlock.h>
+#include <linux/slab.h>
+#include <net/caif/caif_layer.h>
+#include <net/caif/cfpkt.h>
+#include <net/caif/cfctrl.h>
+
+#define container_obj(layr) container_of(layr, struct cfctrl, serv.layer)
+#define UTILITY_NAME_LENGTH 16
+#define CFPKT_CTRL_PKT_LEN 20
+
+
+#ifdef CAIF_NO_LOOP
+static int handle_loop(struct cfctrl *ctrl,
+			      int cmd, struct cfpkt *pkt){
+	return CAIF_FAILURE;
+}
+#else
+static int handle_loop(struct cfctrl *ctrl,
+		int cmd, struct cfpkt *pkt);
+#endif
+static int cfctrl_recv(struct cflayer *layr, struct cfpkt *pkt);
+static void cfctrl_ctrlcmd(struct cflayer *layr, enum caif_ctrlcmd ctrl,
+			   int phyid);
+
+
+struct cflayer *cfctrl_create(void)
+{
+	struct cfctrl *this =
+		kmalloc(sizeof(struct cfctrl), GFP_ATOMIC);
+	if (!this) {
+		pr_warning("CAIF: %s(): Out of memory\n", __func__);
+		return NULL;
+	}
+	caif_assert(offsetof(struct cfctrl, serv.layer) == 0);
+	memset(this, 0, sizeof(*this));
+	spin_lock_init(&this->info_list_lock);
+	atomic_set(&this->req_seq_no, 1);
+	atomic_set(&this->rsp_seq_no, 1);
+	this->serv.dev_info.id = 0xff;
+	this->serv.layer.id = 0;
+	this->serv.layer.receive = cfctrl_recv;
+	sprintf(this->serv.layer.name, "ctrl");
+	this->serv.layer.ctrlcmd = cfctrl_ctrlcmd;
+	spin_lock_init(&this->loop_linkid_lock);
+	this->loop_linkid = 1;
+	return &this->serv.layer;
+}
+
+static bool param_eq(struct cfctrl_link_param *p1, struct cfctrl_link_param *p2)
+{
+	bool eq =
+	    p1->linktype == p2->linktype &&
+	    p1->priority == p2->priority &&
+	    p1->phyid == p2->phyid &&
+	    p1->endpoint == p2->endpoint && p1->chtype == p2->chtype;
+
+	if (!eq)
+		return false;
+
+	switch (p1->linktype) {
+	case CFCTRL_SRV_VEI:
+		return true;
+	case CFCTRL_SRV_DATAGRAM:
+		return p1->u.datagram.connid == p2->u.datagram.connid;
+	case CFCTRL_SRV_RFM:
+		return
+		    p1->u.rfm.connid == p2->u.rfm.connid &&
+		    strcmp(p1->u.rfm.volume, p2->u.rfm.volume) == 0;
+	case CFCTRL_SRV_UTIL:
+		return
+		    p1->u.utility.fifosize_kb == p2->u.utility.fifosize_kb
+		    && p1->u.utility.fifosize_bufs ==
+		    p2->u.utility.fifosize_bufs
+		    && strcmp(p1->u.utility.name, p2->u.utility.name) == 0
+		    && p1->u.utility.paramlen == p2->u.utility.paramlen
+		    && memcmp(p1->u.utility.params, p2->u.utility.params,
+			      p1->u.utility.paramlen) == 0;
+
+	case CFCTRL_SRV_VIDEO:
+		return p1->u.video.connid == p2->u.video.connid;
+	case CFCTRL_SRV_DBG:
+		return true;
+	case CFCTRL_SRV_DECM:
+		return false;
+	default:
+		return false;
+	}
+	return false;
+}
+
+bool cfctrl_req_eq(struct cfctrl_request_info *r1,
+		   struct cfctrl_request_info *r2)
+{
+	if (r1->cmd != r2->cmd)
+		return false;
+	if (r1->cmd == CFCTRL_CMD_LINK_SETUP)
+		return param_eq(&r1->param, &r2->param);
+	else
+		return r1->channel_id == r2->channel_id;
+}
+
+/* Insert request at the end */
+void cfctrl_insert_req(struct cfctrl *ctrl,
+			      struct cfctrl_request_info *req)
+{
+	struct cfctrl_request_info *p;
+	spin_lock(&ctrl->info_list_lock);
+	req->next = NULL;
+	atomic_inc(&ctrl->req_seq_no);
+	req->sequence_no = atomic_read(&ctrl->req_seq_no);
+	if (ctrl->first_req == NULL) {
+		ctrl->first_req = req;
+		spin_unlock(&ctrl->info_list_lock);
+		return;
+	}
+	p = ctrl->first_req;
+	while (p->next != NULL)
+		p = p->next;
+	p->next = req;
+	spin_unlock(&ctrl->info_list_lock);
+}
+
+static void cfctrl_insert_req2(struct cfctrl *ctrl, enum cfctrl_cmd cmd,
+			       u8 linkid, struct cflayer *user_layer)
+{
+	struct cfctrl_request_info *req = kmalloc(sizeof(*req), GFP_KERNEL);
+	if (!req) {
+		pr_warning("CAIF: %s(): Out of memory\n", __func__);
+		return;
+	}
+	req->client_layer = user_layer;
+	req->cmd = cmd;
+	req->channel_id = linkid;
+	cfctrl_insert_req(ctrl, req);
+}
+
+/* Compare and remove request */
+struct cfctrl_request_info *cfctrl_remove_req(struct cfctrl *ctrl,
+					      struct cfctrl_request_info *req)
+{
+	struct cfctrl_request_info *p;
+	struct cfctrl_request_info *ret;
+
+	spin_lock(&ctrl->info_list_lock);
+	if (ctrl->first_req == NULL) {
+		spin_unlock(&ctrl->info_list_lock);
+		return NULL;
+	}
+
+	if (cfctrl_req_eq(req, ctrl->first_req)) {
+		ret = ctrl->first_req;
+		caif_assert(ctrl->first_req);
+		atomic_set(&ctrl->rsp_seq_no,
+				 ctrl->first_req->sequence_no);
+		ctrl->first_req = ctrl->first_req->next;
+		spin_unlock(&ctrl->info_list_lock);
+		return ret;
+	}
+
+	p = ctrl->first_req;
+
+	while (p->next != NULL) {
+		if (cfctrl_req_eq(req, p->next)) {
+			pr_warning("CAIF: %s(): Requests are not "
+					"received in order\n",
+					__func__);
+			ret = p->next;
+			atomic_set(&ctrl->rsp_seq_no,
+					p->next->sequence_no);
+			p->next = p->next->next;
+			spin_unlock(&ctrl->info_list_lock);
+			return ret;
+		}
+		p = p->next;
+	}
+	spin_unlock(&ctrl->info_list_lock);
+
+	pr_warning("CAIF: %s(): Request does not match\n",
+		   __func__);
+	return NULL;
+}
+
+struct cfctrl_rsp *cfctrl_get_respfuncs(struct cflayer *layer)
+{
+	struct cfctrl *this = container_obj(layer);
+	return &this->res;
+}
+
+void cfctrl_set_dnlayer(struct cflayer *this, struct cflayer *dn)
+{
+	this->dn = dn;
+}
+
+void cfctrl_set_uplayer(struct cflayer *this, struct cflayer *up)
+{
+	this->up = up;
+}
+
+static void init_info(struct caif_payload_info *info, struct cfctrl *cfctrl)
+{
+	info->hdr_len = 0;
+	info->channel_id = cfctrl->serv.layer.id;
+	info->dev_info = &cfctrl->serv.dev_info;
+}
+
+void cfctrl_enum_req(struct cflayer *layer, u8 physlinkid)
+{
+	struct cfctrl *cfctrl = container_obj(layer);
+	int ret;
+	struct cfpkt *pkt = cfpkt_create(CFPKT_CTRL_PKT_LEN);
+	if (!pkt) {
+		pr_warning("CAIF: %s(): Out of memory\n", __func__);
+		return;
+	}
+	caif_assert(offsetof(struct cfctrl, serv.layer) == 0);
+	init_info(cfpkt_info(pkt), cfctrl);
+	cfpkt_info(pkt)->dev_info->id = physlinkid;
+	cfctrl->serv.dev_info.id = physlinkid;
+	cfpkt_addbdy(pkt, CFCTRL_CMD_ENUM);
+	cfpkt_addbdy(pkt, physlinkid);
+	ret =
+	    cfctrl->serv.layer.dn->transmit(cfctrl->serv.layer.dn, pkt);
+	if (ret < 0) {
+		pr_err("CAIF: %s(): Could not transmit enum message\n",
+			__func__);
+		cfpkt_destroy(pkt);
+	}
+}
+
+void cfctrl_linkup_request(struct cflayer *layer,
+			   struct cfctrl_link_param *param,
+			   struct cflayer *user_layer)
+{
+	struct cfctrl *cfctrl = container_obj(layer);
+	u32 tmp32;
+	u16 tmp16;
+	u8 tmp8;
+	struct cfctrl_request_info *req;
+	int ret;
+	char utility_name[16];
+	struct cfpkt *pkt = cfpkt_create(CFPKT_CTRL_PKT_LEN);
+	if (!pkt) {
+		pr_warning("CAIF: %s(): Out of memory\n", __func__);
+		return;
+	}
+	cfpkt_addbdy(pkt, CFCTRL_CMD_LINK_SETUP);
+	cfpkt_addbdy(pkt, (param->chtype << 4) + param->linktype);
+	cfpkt_addbdy(pkt, (param->priority << 3) + param->phyid);
+	cfpkt_addbdy(pkt, param->endpoint & 0x03);
+
+	switch (param->linktype) {
+	case CFCTRL_SRV_VEI:
+		break;
+	case CFCTRL_SRV_VIDEO:
+		cfpkt_addbdy(pkt, (u8) param->u.video.connid);
+		break;
+	case CFCTRL_SRV_DBG:
+		break;
+	case CFCTRL_SRV_DATAGRAM:
+		tmp32 = cpu_to_le32(param->u.datagram.connid);
+		cfpkt_add_body(pkt, &tmp32, 4);
+		break;
+	case CFCTRL_SRV_RFM:
+		/* Construct a frame, convert DatagramConnectionID to network
+		 * format long and copy it out...
+		 */
+		tmp32 = cpu_to_le32(param->u.rfm.connid);
+		cfpkt_add_body(pkt, &tmp32, 4);
+		/* Add volume name, including zero termination... */
+		cfpkt_add_body(pkt, param->u.rfm.volume,
+			       strlen(param->u.rfm.volume) + 1);
+		break;
+	case CFCTRL_SRV_UTIL:
+		tmp16 = cpu_to_le16(param->u.utility.fifosize_kb);
+		cfpkt_add_body(pkt, &tmp16, 2);
+		tmp16 = cpu_to_le16(param->u.utility.fifosize_bufs);
+		cfpkt_add_body(pkt, &tmp16, 2);
+		memset(utility_name, 0, sizeof(utility_name));
+		strncpy(utility_name, param->u.utility.name,
+			UTILITY_NAME_LENGTH - 1);
+		cfpkt_add_body(pkt, utility_name, UTILITY_NAME_LENGTH);
+		tmp8 = param->u.utility.paramlen;
+		cfpkt_add_body(pkt, &tmp8, 1);
+		cfpkt_add_body(pkt, param->u.utility.params,
+			       param->u.utility.paramlen);
+		break;
+	default:
+		pr_warning("CAIF: %s():Request setup of bad link type = %d\n",
+			   __func__, param->linktype);
+	}
+	req = kmalloc(sizeof(*req), GFP_KERNEL);
+	if (!req) {
+		pr_warning("CAIF: %s(): Out of memory\n", __func__);
+		return;
+	}
+	memset(req, 0, sizeof(*req));
+	req->client_layer = user_layer;
+	req->cmd = CFCTRL_CMD_LINK_SETUP;
+	req->param = *param;
+	cfctrl_insert_req(cfctrl, req);
+	init_info(cfpkt_info(pkt), cfctrl);
+	cfpkt_info(pkt)->dev_info->id = param->phyid;
+	ret =
+	    cfctrl->serv.layer.dn->transmit(cfctrl->serv.layer.dn, pkt);
+	if (ret < 0) {
+		pr_err("CAIF: %s(): Could not transmit linksetup request\n",
+			__func__);
+		cfpkt_destroy(pkt);
+	}
+}
+
+int cfctrl_linkdown_req(struct cflayer *layer, u8 channelid,
+				struct cflayer *client)
+{
+	int ret;
+	struct cfctrl *cfctrl = container_obj(layer);
+	struct cfpkt *pkt = cfpkt_create(CFPKT_CTRL_PKT_LEN);
+	if (!pkt) {
+		pr_warning("CAIF: %s(): Out of memory\n", __func__);
+		return -ENOMEM;
+	}
+	cfctrl_insert_req2(cfctrl, CFCTRL_CMD_LINK_DESTROY, channelid, client);
+	cfpkt_addbdy(pkt, CFCTRL_CMD_LINK_DESTROY);
+	cfpkt_addbdy(pkt, channelid);
+	init_info(cfpkt_info(pkt), cfctrl);
+	ret =
+	    cfctrl->serv.layer.dn->transmit(cfctrl->serv.layer.dn, pkt);
+	if (ret < 0) {
+		pr_err("CAIF: %s(): Could not transmit link-down request\n",
+			__func__);
+		cfpkt_destroy(pkt);
+	}
+	return ret;
+}
+
+void cfctrl_sleep_req(struct cflayer *layer)
+{
+	int ret;
+	struct cfctrl *cfctrl = container_obj(layer);
+	struct cfpkt *pkt = cfpkt_create(CFPKT_CTRL_PKT_LEN);
+	if (!pkt) {
+		pr_warning("CAIF: %s(): Out of memory\n", __func__);
+		return;
+	}
+	cfpkt_addbdy(pkt, CFCTRL_CMD_SLEEP);
+	init_info(cfpkt_info(pkt), cfctrl);
+	ret =
+	    cfctrl->serv.layer.dn->transmit(cfctrl->serv.layer.dn, pkt);
+	if (ret < 0)
+		cfpkt_destroy(pkt);
+}
+
+void cfctrl_wake_req(struct cflayer *layer)
+{
+	int ret;
+	struct cfctrl *cfctrl = container_obj(layer);
+	struct cfpkt *pkt = cfpkt_create(CFPKT_CTRL_PKT_LEN);
+	if (!pkt) {
+		pr_warning("CAIF: %s(): Out of memory\n", __func__);
+		return;
+	}
+	cfpkt_addbdy(pkt, CFCTRL_CMD_WAKE);
+	init_info(cfpkt_info(pkt), cfctrl);
+	ret =
+	    cfctrl->serv.layer.dn->transmit(cfctrl->serv.layer.dn, pkt);
+	if (ret < 0)
+		cfpkt_destroy(pkt);
+}
+
+void cfctrl_getstartreason_req(struct cflayer *layer)
+{
+	int ret;
+	struct cfctrl *cfctrl = container_obj(layer);
+	struct cfpkt *pkt = cfpkt_create(CFPKT_CTRL_PKT_LEN);
+	if (!pkt) {
+		pr_warning("CAIF: %s(): Out of memory\n", __func__);
+		return;
+	}
+	cfpkt_addbdy(pkt, CFCTRL_CMD_START_REASON);
+	init_info(cfpkt_info(pkt), cfctrl);
+	ret =
+	    cfctrl->serv.layer.dn->transmit(cfctrl->serv.layer.dn, pkt);
+	if (ret < 0)
+		cfpkt_destroy(pkt);
+}
+
+
+static int cfctrl_recv(struct cflayer *layer, struct cfpkt *pkt)
+{
+	u8 cmdrsp;
+	u8 cmd;
+	int ret = -1;
+	u16 tmp16;
+	u8 len;
+	u8 param[255];
+	u8 linkid;
+	struct cfctrl *cfctrl = container_obj(layer);
+	struct cfctrl_request_info rsp, *req;
+
+
+	cfpkt_extr_head(pkt, &cmdrsp, 1);
+	cmd = cmdrsp & CFCTRL_CMD_MASK;
+	if (cmd != CFCTRL_CMD_LINK_ERR
+	    && CFCTRL_RSP_BIT != (CFCTRL_RSP_BIT & cmdrsp)) {
+		if (handle_loop(cfctrl, cmd, pkt) == CAIF_FAILURE) {
+			pr_info("CAIF: %s() CAIF Protocol error:"
+				"Response bit not set\n", __func__);
+			goto error;
+		}
+	}
+
+	switch (cmd) {
+	case CFCTRL_CMD_LINK_SETUP:
+		{
+			enum cfctrl_srv serv;
+			enum cfctrl_srv servtype;
+			u8 endpoint;
+			u8 physlinkid;
+			u8 prio;
+			u8 tmp;
+			u32 tmp32;
+			u8 *cp;
+			int i;
+			struct cfctrl_link_param linkparam;
+			memset(&linkparam, 0, sizeof(linkparam));
+
+			cfpkt_extr_head(pkt, &tmp, 1);
+
+			serv = tmp & CFCTRL_SRV_MASK;
+			linkparam.linktype = serv;
+
+			servtype = tmp >> 4;
+			linkparam.chtype = servtype;
+
+			cfpkt_extr_head(pkt, &tmp, 1);
+			physlinkid = tmp & 0x07;
+			prio = tmp >> 3;
+
+			linkparam.priority = prio;
+			linkparam.phyid = physlinkid;
+			cfpkt_extr_head(pkt, &endpoint, 1);
+			linkparam.endpoint = endpoint & 0x03;
+
+			switch (serv) {
+			case CFCTRL_SRV_VEI:
+			case CFCTRL_SRV_DBG:
+				/* Link ID */
+				cfpkt_extr_head(pkt, &linkid, 1);
+				break;
+			case CFCTRL_SRV_VIDEO:
+				cfpkt_extr_head(pkt, &tmp, 1);
+				linkparam.u.video.connid = tmp;
+				/* Link ID */
+				cfpkt_extr_head(pkt, &linkid, 1);
+				break;
+
+			case CFCTRL_SRV_DATAGRAM:
+				cfpkt_extr_head(pkt, &tmp32, 4);
+				linkparam.u.datagram.connid =
+				    le32_to_cpu(tmp32);
+				/* Link ID */
+				cfpkt_extr_head(pkt, &linkid, 1);
+				break;
+			case CFCTRL_SRV_RFM:
+				/* Construct a frame, convert
+				 * DatagramConnectionID
+				 * to network format long and copy it out...
+				 */
+				cfpkt_extr_head(pkt, &tmp32, 4);
+				linkparam.u.rfm.connid =
+				  le32_to_cpu(tmp32);
+				cp = (u8 *) linkparam.u.rfm.volume;
+				for (cfpkt_extr_head(pkt, &tmp, 1);
+				     cfpkt_more(pkt) && tmp != '\0';
+				     cfpkt_extr_head(pkt, &tmp, 1))
+					*cp++ = tmp;
+				*cp = '\0';
+
+				/* Link ID */
+				cfpkt_extr_head(pkt, &linkid, 1);
+
+				break;
+			case CFCTRL_SRV_UTIL:
+				/* Construct a frame, convert
+				 * DatagramConnectionID
+				 * to network format long and copy it out...
+				 */
+				/* Fifosize KB */
+				cfpkt_extr_head(pkt, &tmp16, 2);
+				linkparam.u.utility.fifosize_kb =
+				    le16_to_cpu(tmp16);
+				/* Fifosize bufs */
+				cfpkt_extr_head(pkt, &tmp16, 2);
+				linkparam.u.utility.fifosize_bufs =
+				    le16_to_cpu(tmp16);
+				/* name */
+				cp = (u8 *) linkparam.u.utility.name;
+				caif_assert(sizeof(linkparam.u.utility.name)
+					     >= UTILITY_NAME_LENGTH);
+				for (i = 0;
+				     i < UTILITY_NAME_LENGTH
+				     && cfpkt_more(pkt); i++) {
+					cfpkt_extr_head(pkt, &tmp, 1);
+					*cp++ = tmp;
+				}
+				/* Length */
+				cfpkt_extr_head(pkt, &len, 1);
+				linkparam.u.utility.paramlen = len;
+				/* Param Data */
+				cp = linkparam.u.utility.params;
+				while (cfpkt_more(pkt) && len--) {
+					cfpkt_extr_head(pkt, &tmp, 1);
+					*cp++ = tmp;
+				}
+				/* Link ID */
+				cfpkt_extr_head(pkt, &linkid, 1);
+				/* Length */
+				cfpkt_extr_head(pkt, &len, 1);
+				/* Param Data */
+				cfpkt_extr_head(pkt, &param, len);
+				break;
+			default:
+				pr_warning("CAIF: %s(): Request setup "
+					   "- invalid link type (%d)",
+					   __func__, serv);
+				goto error;
+			}
+
+			rsp.cmd = cmd;
+			rsp.param = linkparam;
+			req = cfctrl_remove_req(cfctrl, &rsp);
+
+			if (CFCTRL_ERR_BIT == (CFCTRL_ERR_BIT & cmdrsp) ||
+				cfpkt_erroneous(pkt)) {
+				pr_err("CAIF: %s(): Invalid O/E bit or parse "
+				       "error on CAIF control channel",
+					__func__);
+				cfctrl->res.reject_rsp(cfctrl->serv.layer.up,
+						       0,
+						       req ? req->client_layer
+						       : NULL);
+			} else {
+				cfctrl->res.linksetup_rsp(cfctrl->serv.
+							  layer.up, linkid,
+							  serv, physlinkid,
+							  req ? req->
+							  client_layer : NULL);
+			}
+
+			if (req != NULL)
+				kfree(req);
+		}
+		break;
+	case CFCTRL_CMD_LINK_DESTROY:
+		cfpkt_extr_head(pkt, &linkid, 1);
+		rsp.cmd = cmd;
+		rsp.channel_id = linkid;
+		req = cfctrl_remove_req(cfctrl, &rsp);
+		cfctrl->res.linkdestroy_rsp(cfctrl->serv.layer.up, linkid,
+					    req ? req->client_layer : NULL);
+		if (req != NULL)
+			kfree(req);
+		break;
+	case CFCTRL_CMD_LINK_ERR:
+		pr_err("CAIF: %s(): Frame Error Indication received\n",
+			__func__);
+		cfctrl->res.linkerror_ind();
+		break;
+	case CFCTRL_CMD_ENUM:
+		cfctrl->res.enum_rsp();
+		break;
+	case CFCTRL_CMD_SLEEP:
+		cfctrl->res.sleep_rsp();
+		break;
+	case CFCTRL_CMD_WAKE:
+		cfctrl->res.wake_rsp();
+		break;
+	case CFCTRL_CMD_LINK_RECONF:
+		cfctrl->res.restart_rsp();
+		break;
+	case CFCTRL_CMD_RADIO_SET:
+		cfctrl->res.radioset_rsp();
+		break;
+	default:
+		pr_err("CAIF: %s(): Unrecognized Control Frame\n", __func__);
+		goto error;
+		break;
+	}
+	ret = 0;
+error:
+	cfpkt_destroy(pkt);
+	return ret;
+}
+
+static void cfctrl_ctrlcmd(struct cflayer *layr, enum caif_ctrlcmd ctrl,
+			int phyid)
+{
+	struct cfctrl *this = container_obj(layr);
+	switch (ctrl) {
+	case _CAIF_CTRLCMD_PHYIF_FLOW_OFF_IND:
+	case CAIF_CTRLCMD_FLOW_OFF_IND:
+		spin_lock(&this->info_list_lock);
+		if (this->first_req != NULL) {
+			pr_warning("CAIF: %s(): Received flow off in "
+				   "control layer", __func__);
+		}
+		spin_unlock(&this->info_list_lock);
+		break;
+	default:
+		break;
+	}
+}
+
+#ifndef CAIF_NO_LOOP
+static int handle_loop(struct cfctrl *ctrl, int cmd, struct cfpkt *pkt)
+{
+	static int last_linkid;
+	u8 linkid, linktype, tmp;
+	switch (cmd) {
+	case CFCTRL_CMD_LINK_SETUP:
+		spin_lock(&ctrl->loop_linkid_lock);
+		for (linkid = last_linkid + 1; linkid < 255; linkid++)
+			if (!ctrl->loop_linkused[linkid])
+				goto found;
+		for (linkid = last_linkid - 1; linkid > 0; linkid--)
+			if (!ctrl->loop_linkused[linkid])
+				goto found;
+		spin_unlock(&ctrl->loop_linkid_lock);
+		return -EINVAL;
+found:
+		if (!ctrl->loop_linkused[linkid])
+			ctrl->loop_linkused[linkid] = 1;
+
+		last_linkid = linkid;
+
+		cfpkt_add_trail(pkt, &linkid, 1);
+		spin_unlock(&ctrl->loop_linkid_lock);
+		cfpkt_peek_head(pkt, &linktype, 1);
+		if (linktype ==  CFCTRL_SRV_UTIL) {
+			tmp = 0x01;
+			cfpkt_add_trail(pkt, &tmp, 1);
+			cfpkt_add_trail(pkt, &tmp, 1);
+		}
+		break;
+
+	case CFCTRL_CMD_LINK_DESTROY:
+		spin_lock(&ctrl->loop_linkid_lock);
+		cfpkt_peek_head(pkt, &linkid, 1);
+		ctrl->loop_linkused[linkid] = 0;
+		spin_unlock(&ctrl->loop_linkid_lock);
+		break;
+	default:
+		break;
+	}
+	return CAIF_SUCCESS;
+}
+#endif
diff --git a/net/caif/cfdbgl.c b/net/caif/cfdbgl.c
new file mode 100644
index 000000000000..ab6b6dc34cf8
--- /dev/null
+++ b/net/caif/cfdbgl.c
@@ -0,0 +1,40 @@
+/*
+ * Copyright (C) ST-Ericsson AB 2010
+ * Author:	Sjur Brendeland/sjur.brandeland@stericsson.com
+ * License terms: GNU General Public License (GPL) version 2
+ */
+
+#include <linux/stddef.h>
+#include <linux/slab.h>
+#include <net/caif/caif_layer.h>
+#include <net/caif/cfsrvl.h>
+#include <net/caif/cfpkt.h>
+
+static int cfdbgl_receive(struct cflayer *layr, struct cfpkt *pkt);
+static int cfdbgl_transmit(struct cflayer *layr, struct cfpkt *pkt);
+
+struct cflayer *cfdbgl_create(u8 channel_id, struct dev_info *dev_info)
+{
+	struct cfsrvl *dbg = kmalloc(sizeof(struct cfsrvl), GFP_ATOMIC);
+	if (!dbg) {
+		pr_warning("CAIF: %s(): Out of memory\n", __func__);
+		return NULL;
+	}
+	caif_assert(offsetof(struct cfsrvl, layer) == 0);
+	memset(dbg, 0, sizeof(struct cfsrvl));
+	cfsrvl_init(dbg, channel_id, dev_info);
+	dbg->layer.receive = cfdbgl_receive;
+	dbg->layer.transmit = cfdbgl_transmit;
+	snprintf(dbg->layer.name, CAIF_LAYER_NAME_SZ - 1, "dbg%d", channel_id);
+	return &dbg->layer;
+}
+
+static int cfdbgl_receive(struct cflayer *layr, struct cfpkt *pkt)
+{
+	return layr->up->receive(layr->up, pkt);
+}
+
+static int cfdbgl_transmit(struct cflayer *layr, struct cfpkt *pkt)
+{
+	return layr->dn->transmit(layr->dn, pkt);
+}
diff --git a/net/caif/cfdgml.c b/net/caif/cfdgml.c
new file mode 100644
index 000000000000..53194840ecb6
--- /dev/null
+++ b/net/caif/cfdgml.c
@@ -0,0 +1,108 @@
+/*
+ * Copyright (C) ST-Ericsson AB 2010
+ * Author:	Sjur Brendeland/sjur.brandeland@stericsson.com
+ * License terms: GNU General Public License (GPL) version 2
+ */
+
+#include <linux/stddef.h>
+#include <linux/spinlock.h>
+#include <linux/slab.h>
+#include <net/caif/caif_layer.h>
+#include <net/caif/cfsrvl.h>
+#include <net/caif/cfpkt.h>
+
+#define container_obj(layr) ((struct cfsrvl *) layr)
+
+#define DGM_CMD_BIT  0x80
+#define DGM_FLOW_OFF 0x81
+#define DGM_FLOW_ON  0x80
+#define DGM_CTRL_PKT_SIZE 1
+
+static int cfdgml_receive(struct cflayer *layr, struct cfpkt *pkt);
+static int cfdgml_transmit(struct cflayer *layr, struct cfpkt *pkt);
+
+struct cflayer *cfdgml_create(u8 channel_id, struct dev_info *dev_info)
+{
+	struct cfsrvl *dgm = kmalloc(sizeof(struct cfsrvl), GFP_ATOMIC);
+	if (!dgm) {
+		pr_warning("CAIF: %s(): Out of memory\n", __func__);
+		return NULL;
+	}
+	caif_assert(offsetof(struct cfsrvl, layer) == 0);
+	memset(dgm, 0, sizeof(struct cfsrvl));
+	cfsrvl_init(dgm, channel_id, dev_info);
+	dgm->layer.receive = cfdgml_receive;
+	dgm->layer.transmit = cfdgml_transmit;
+	snprintf(dgm->layer.name, CAIF_LAYER_NAME_SZ - 1, "dgm%d", channel_id);
+	dgm->layer.name[CAIF_LAYER_NAME_SZ - 1] = '\0';
+	return &dgm->layer;
+}
+
+static int cfdgml_receive(struct cflayer *layr, struct cfpkt *pkt)
+{
+	u8 cmd = -1;
+	u8 dgmhdr[3];
+	int ret;
+	caif_assert(layr->up != NULL);
+	caif_assert(layr->receive != NULL);
+	caif_assert(layr->ctrlcmd != NULL);
+
+	if (cfpkt_extr_head(pkt, &cmd, 1) < 0) {
+		pr_err("CAIF: %s(): Packet is erroneous!\n", __func__);
+		cfpkt_destroy(pkt);
+		return -EPROTO;
+	}
+
+	if ((cmd & DGM_CMD_BIT) == 0) {
+		if (cfpkt_extr_head(pkt, &dgmhdr, 3) < 0) {
+			pr_err("CAIF: %s(): Packet is erroneous!\n", __func__);
+			cfpkt_destroy(pkt);
+			return -EPROTO;
+		}
+		ret = layr->up->receive(layr->up, pkt);
+		return ret;
+	}
+
+	switch (cmd) {
+	case DGM_FLOW_OFF:	/* FLOW OFF */
+		layr->ctrlcmd(layr, CAIF_CTRLCMD_FLOW_OFF_IND, 0);
+		cfpkt_destroy(pkt);
+		return 0;
+	case DGM_FLOW_ON:	/* FLOW ON */
+		layr->ctrlcmd(layr, CAIF_CTRLCMD_FLOW_ON_IND, 0);
+		cfpkt_destroy(pkt);
+		return 0;
+	default:
+		cfpkt_destroy(pkt);
+		pr_info("CAIF: %s(): Unknown datagram control %d (0x%x)\n",
+			__func__, cmd, cmd);
+		return -EPROTO;
+	}
+}
+
+static int cfdgml_transmit(struct cflayer *layr, struct cfpkt *pkt)
+{
+	u32 zero = 0;
+	struct caif_payload_info *info;
+	struct cfsrvl *service = container_obj(layr);
+	int ret;
+	if (!cfsrvl_ready(service, &ret))
+		return ret;
+
+	cfpkt_add_head(pkt, &zero, 4);
+
+	/* Add info for MUX-layer to route the packet out. */
+	info = cfpkt_info(pkt);
+	info->channel_id = service->layer.id;
+	/* To optimize alignment, we add up the size of CAIF header
+	 * before payload.
+	 */
+	info->hdr_len = 4;
+	info->dev_info = &service->dev_info;
+	ret = layr->dn->transmit(layr->dn, pkt);
+	if (ret < 0) {
+		u32 tmp32;
+		cfpkt_extr_head(pkt, &tmp32, 4);
+	}
+	return ret;
+}
diff --git a/net/caif/cffrml.c b/net/caif/cffrml.c
new file mode 100644
index 000000000000..e86a4ca3b217
--- /dev/null
+++ b/net/caif/cffrml.c
@@ -0,0 +1,151 @@
+/*
+ * CAIF Framing Layer.
+ *
+ * Copyright (C) ST-Ericsson AB 2010
+ * Author:	Sjur Brendeland/sjur.brandeland@stericsson.com
+ * License terms: GNU General Public License (GPL) version 2
+ */
+
+#include <linux/stddef.h>
+#include <linux/spinlock.h>
+#include <linux/slab.h>
+#include <linux/crc-ccitt.h>
+#include <net/caif/caif_layer.h>
+#include <net/caif/cfpkt.h>
+#include <net/caif/cffrml.h>
+
+#define container_obj(layr) container_of(layr, struct cffrml, layer)
+
+struct cffrml {
+	struct cflayer layer;
+	bool dofcs;		/* !< FCS active */
+};
+
+static int cffrml_receive(struct cflayer *layr, struct cfpkt *pkt);
+static int cffrml_transmit(struct cflayer *layr, struct cfpkt *pkt);
+static void cffrml_ctrlcmd(struct cflayer *layr, enum caif_ctrlcmd ctrl,
+				int phyid);
+
+static u32 cffrml_rcv_error;
+static u32 cffrml_rcv_checsum_error;
+struct cflayer *cffrml_create(u16 phyid, bool use_fcs)
+{
+	struct cffrml *this = kmalloc(sizeof(struct cffrml), GFP_ATOMIC);
+	if (!this) {
+		pr_warning("CAIF: %s(): Out of memory\n", __func__);
+		return NULL;
+	}
+	caif_assert(offsetof(struct cffrml, layer) == 0);
+
+	memset(this, 0, sizeof(struct cflayer));
+	this->layer.receive = cffrml_receive;
+	this->layer.transmit = cffrml_transmit;
+	this->layer.ctrlcmd = cffrml_ctrlcmd;
+	snprintf(this->layer.name, CAIF_LAYER_NAME_SZ, "frm%d", phyid);
+	this->dofcs = use_fcs;
+	this->layer.id = phyid;
+	return (struct cflayer *) this;
+}
+
+void cffrml_set_uplayer(struct cflayer *this, struct cflayer *up)
+{
+	this->up = up;
+}
+
+void cffrml_set_dnlayer(struct cflayer *this, struct cflayer *dn)
+{
+	this->dn = dn;
+}
+
+static u16 cffrml_checksum(u16 chks, void *buf, u16 len)
+{
+	/* FIXME: FCS should be moved to glue in order to use OS-Specific
+	 * solutions
+	 */
+	return crc_ccitt(chks, buf, len);
+}
+
+static int cffrml_receive(struct cflayer *layr, struct cfpkt *pkt)
+{
+	u16 tmp;
+	u16 len;
+	u16 hdrchks;
+	u16 pktchks;
+	struct cffrml *this;
+	this = container_obj(layr);
+
+	cfpkt_extr_head(pkt, &tmp, 2);
+	len = le16_to_cpu(tmp);
+
+	/* Subtract for FCS on length if FCS is not used. */
+	if (!this->dofcs)
+		len -= 2;
+
+	if (cfpkt_setlen(pkt, len) < 0) {
+		++cffrml_rcv_error;
+		pr_err("CAIF: %s():Framing length error (%d)\n", __func__, len);
+		cfpkt_destroy(pkt);
+		return -EPROTO;
+	}
+	/*
+	 * Don't do extract if FCS is false, rather do setlen - then we don't
+	 * get a cache-miss.
+	 */
+	if (this->dofcs) {
+		cfpkt_extr_trail(pkt, &tmp, 2);
+		hdrchks = le16_to_cpu(tmp);
+		pktchks = cfpkt_iterate(pkt, cffrml_checksum, 0xffff);
+		if (pktchks != hdrchks) {
+			cfpkt_add_trail(pkt, &tmp, 2);
+			++cffrml_rcv_error;
+			++cffrml_rcv_checsum_error;
+			pr_info("CAIF: %s(): Frame checksum error "
+				"(0x%x != 0x%x)\n", __func__, hdrchks, pktchks);
+			return -EILSEQ;
+		}
+	}
+	if (cfpkt_erroneous(pkt)) {
+		++cffrml_rcv_error;
+		pr_err("CAIF: %s(): Packet is erroneous!\n", __func__);
+		cfpkt_destroy(pkt);
+		return -EPROTO;
+	}
+	return layr->up->receive(layr->up, pkt);
+}
+
+static int cffrml_transmit(struct cflayer *layr, struct cfpkt *pkt)
+{
+	int tmp;
+	u16 chks;
+	u16 len;
+	int ret;
+	struct cffrml *this = container_obj(layr);
+	if (this->dofcs) {
+		chks = cfpkt_iterate(pkt, cffrml_checksum, 0xffff);
+		tmp = cpu_to_le16(chks);
+		cfpkt_add_trail(pkt, &tmp, 2);
+	} else {
+		cfpkt_pad_trail(pkt, 2);
+	}
+	len = cfpkt_getlen(pkt);
+	tmp = cpu_to_le16(len);
+	cfpkt_add_head(pkt, &tmp, 2);
+	cfpkt_info(pkt)->hdr_len += 2;
+	if (cfpkt_erroneous(pkt)) {
+		pr_err("CAIF: %s(): Packet is erroneous!\n", __func__);
+		return -EPROTO;
+	}
+	ret = layr->dn->transmit(layr->dn, pkt);
+	if (ret < 0) {
+		/* Remove header on faulty packet. */
+		cfpkt_extr_head(pkt, &tmp, 2);
+	}
+	return ret;
+}
+
+static void cffrml_ctrlcmd(struct cflayer *layr, enum caif_ctrlcmd ctrl,
+					int phyid)
+{
+	if (layr->up->ctrlcmd)
+		layr->up->ctrlcmd(layr->up, ctrl, layr->id);
+}
diff --git a/net/caif/cfmuxl.c b/net/caif/cfmuxl.c
new file mode 100644
index 000000000000..6fb9f9e96cf8
--- /dev/null
+++ b/net/caif/cfmuxl.c
@@ -0,0 +1,246 @@
+/*
+ * Copyright (C) ST-Ericsson AB 2010
+ * Author:	Sjur Brendeland/sjur.brandeland@stericsson.com
+ * License terms: GNU General Public License (GPL) version 2
+ */
+#include <linux/stddef.h>
+#include <linux/spinlock.h>
+#include <linux/slab.h>
+#include <net/caif/cfpkt.h>
+#include <net/caif/cfmuxl.h>
+#include <net/caif/cfsrvl.h>
+#include <net/caif/cffrml.h>
+
+#define container_obj(layr) container_of(layr, struct cfmuxl, layer)
+
+#define CAIF_CTRL_CHANNEL 0
+#define UP_CACHE_SIZE 8
+#define DN_CACHE_SIZE 8
+
+struct cfmuxl {
+	struct cflayer layer;
+	struct list_head srvl_list;
+	struct list_head frml_list;
+	struct cflayer *up_cache[UP_CACHE_SIZE];
+	struct cflayer *dn_cache[DN_CACHE_SIZE];
+	/*
+	 * Set when inserting or removing downwards layers.
+	 */
+	spinlock_t transmit_lock;
+
+	/*
+	 * Set when inserting or removing upwards layers.
+	 */
+	spinlock_t receive_lock;
+
+};
+
+static int cfmuxl_receive(struct cflayer *layr, struct cfpkt *pkt);
+static int cfmuxl_transmit(struct cflayer *layr, struct cfpkt *pkt);
+static void cfmuxl_ctrlcmd(struct cflayer *layr, enum caif_ctrlcmd ctrl,
+				int phyid);
+static struct cflayer *get_up(struct cfmuxl *muxl, u16 id);
+
+struct cflayer *cfmuxl_create(void)
+{
+	struct cfmuxl *this = kmalloc(sizeof(struct cfmuxl), GFP_ATOMIC);
+	if (!this)
+		return NULL;
+	memset(this, 0, sizeof(*this));
+	this->layer.receive = cfmuxl_receive;
+	this->layer.transmit = cfmuxl_transmit;
+	this->layer.ctrlcmd = cfmuxl_ctrlcmd;
+	INIT_LIST_HEAD(&this->srvl_list);
+	INIT_LIST_HEAD(&this->frml_list);
+	spin_lock_init(&this->transmit_lock);
+	spin_lock_init(&this->receive_lock);
+	snprintf(this->layer.name, CAIF_LAYER_NAME_SZ, "mux");
+	return &this->layer;
+}
+
+int cfmuxl_set_uplayer(struct cflayer *layr, struct cflayer *up, u8 linkid)
+{
+	struct cfmuxl *muxl = container_obj(layr);
+	spin_lock(&muxl->receive_lock);
+	list_add(&up->node, &muxl->srvl_list);
+	spin_unlock(&muxl->receive_lock);
+	return 0;
+}
+
+bool cfmuxl_is_phy_inuse(struct cflayer *layr, u8 phyid)
+{
+	struct list_head *node;
+	struct cflayer *layer;
+	struct cfmuxl *muxl = container_obj(layr);
+	bool match = false;
+	spin_lock(&muxl->receive_lock);
+
+	list_for_each(node, &muxl->srvl_list) {
+		layer = list_entry(node, struct cflayer, node);
+		if (cfsrvl_phyid_match(layer, phyid)) {
+			match = true;
+			break;
+		}
+
+	}
+	spin_unlock(&muxl->receive_lock);
+	return match;
+}
+
+u8 cfmuxl_get_phyid(struct cflayer *layr, u8 channel_id)
+{
+	struct cflayer *up;
+	int phyid;
+	struct cfmuxl *muxl = container_obj(layr);
+	spin_lock(&muxl->receive_lock);
+	up = get_up(muxl, channel_id);
+	if (up != NULL)
+		phyid = cfsrvl_getphyid(up);
+	else
+		phyid = 0;
+	spin_unlock(&muxl->receive_lock);
+	return phyid;
+}
+
+int cfmuxl_set_dnlayer(struct cflayer *layr, struct cflayer *dn, u8 phyid)
+{
+	struct cfmuxl *muxl = (struct cfmuxl *) layr;
+	spin_lock(&muxl->transmit_lock);
+	list_add(&dn->node, &muxl->frml_list);
+	spin_unlock(&muxl->transmit_lock);
+	return 0;
+}
+
+static struct cflayer *get_from_id(struct list_head *list, u16 id)
+{
+	struct list_head *node;
+	struct cflayer *layer;
+	list_for_each(node, list) {
+		layer = list_entry(node, struct cflayer, node);
+		if (layer->id == id)
+			return layer;
+	}
+	return NULL;
+}
+
+struct cflayer *cfmuxl_remove_dnlayer(struct cflayer *layr, u8 phyid)
+{
+	struct cfmuxl *muxl = container_obj(layr);
+	struct cflayer *dn;
+	spin_lock(&muxl->transmit_lock);
+	memset(muxl->dn_cache, 0, sizeof(muxl->dn_cache));
+	dn = get_from_id(&muxl->frml_list, phyid);
+	if (dn == NULL) {
+		spin_unlock(&muxl->transmit_lock);
+		return NULL;
+	}
+	list_del(&dn->node);
+	caif_assert(dn != NULL);
+	spin_unlock(&muxl->transmit_lock);
+	return dn;
+}
+
+/* Invariant: lock is taken */
+static struct cflayer *get_up(struct cfmuxl *muxl, u16 id)
+{
+	struct cflayer *up;
+	int idx = id % UP_CACHE_SIZE;
+	up = muxl->up_cache[idx];
+	if (up == NULL || up->id != id) {
+		up = get_from_id(&muxl->srvl_list, id);
+		muxl->up_cache[idx] = up;
+	}
+	return up;
+}
+
+/* Invariant: lock is taken */
+static struct cflayer *get_dn(struct cfmuxl *muxl, struct dev_info *dev_info)
+{
+	struct cflayer *dn;
+	int idx = dev_info->id % DN_CACHE_SIZE;
+	dn = muxl->dn_cache[idx];
+	if (dn == NULL || dn->id != dev_info->id) {
+		dn = get_from_id(&muxl->frml_list, dev_info->id);
+		muxl->dn_cache[idx] = dn;
+	}
+	return dn;
+}
+
+struct cflayer *cfmuxl_remove_uplayer(struct cflayer *layr, u8 id)
+{
+	struct cflayer *up;
+	struct cfmuxl *muxl = container_obj(layr);
+	spin_lock(&muxl->receive_lock);
+	up = get_up(muxl, id);
+	memset(muxl->up_cache, 0, sizeof(muxl->up_cache));
+	list_del(&up->node);
+	spin_unlock(&muxl->receive_lock);
+	return up;
+}
+
+static int cfmuxl_receive(struct cflayer *layr, struct cfpkt *pkt)
+{
+	int ret;
+	struct cfmuxl *muxl = container_obj(layr);
+	u8 id;
+	struct cflayer *up;
+	if (cfpkt_extr_head(pkt, &id, 1) < 0) {
+		pr_err("CAIF: %s(): erroneous Caif Packet\n", __func__);
+		cfpkt_destroy(pkt);
+		return -EPROTO;
+	}
+
+	spin_lock(&muxl->receive_lock);
+	up = get_up(muxl, id);
+	spin_unlock(&muxl->receive_lock);
+	if (up == NULL) {
+		pr_info("CAIF: %s():Received data on unknown link ID = %d "
+			"(0x%x)	 up == NULL", __func__, id, id);
+		cfpkt_destroy(pkt);
+		/*
+		 * Don't return ERROR, since modem misbehaves and sends out
+		 * flow on before linksetup response.
+		 */
+		return /* CFGLU_EPROT; */ 0;
+	}
+
+	ret = up->receive(up, pkt);
+	return ret;
+}
+
+static int cfmuxl_transmit(struct cflayer *layr, struct cfpkt *pkt)
+{
+	int ret;
+	struct cfmuxl *muxl = container_obj(layr);
+	u8 linkid;
+	struct cflayer *dn;
+	struct caif_payload_info *info = cfpkt_info(pkt);
+	dn = get_dn(muxl, cfpkt_info(pkt)->dev_info);
+	if (dn == NULL) {
+		pr_warning("CAIF: %s(): Send data on unknown phy "
+			   "ID = %d (0x%x)\n",
+			   __func__, info->dev_info->id, info->dev_info->id);
+		return -ENOTCONN;
+	}
+	info->hdr_len += 1;
+	linkid = info->channel_id;
+	cfpkt_add_head(pkt, &linkid, 1);
+	ret = dn->transmit(dn, pkt);
+	/* Remove MUX protocol header upon error. */
+	if (ret < 0)
+		cfpkt_extr_head(pkt, &linkid, 1);
+	return ret;
+}
+
+static void cfmuxl_ctrlcmd(struct cflayer *layr, enum caif_ctrlcmd ctrl,
+				int phyid)
+{
+	struct cfmuxl *muxl = container_obj(layr);
+	struct list_head *node;
+	struct cflayer *layer;
+	list_for_each(node, &muxl->srvl_list) {
+		layer = list_entry(node, struct cflayer, node);
+		if (cfsrvl_phyid_match(layer, phyid))
+			layer->ctrlcmd(layer, ctrl, phyid);
+	}
+}
diff --git a/net/caif/cfrfml.c b/net/caif/cfrfml.c
new file mode 100644
index 000000000000..cd2830fec935
--- /dev/null
+++ b/net/caif/cfrfml.c
@@ -0,0 +1,108 @@
+/*
+ * Copyright (C) ST-Ericsson AB 2010
+ * Author:	Sjur Brendeland/sjur.brandeland@stericsson.com
+ * License terms: GNU General Public License (GPL) version 2
+ */
+
+#include <linux/stddef.h>
+#include <linux/spinlock.h>
+#include <linux/slab.h>
+#include <net/caif/caif_layer.h>
+#include <net/caif/cfsrvl.h>
+#include <net/caif/cfpkt.h>
+
+#define container_obj(layr) container_of(layr, struct cfsrvl, layer)
+
+#define RFM_SEGMENTATION_BIT 0x01
+#define RFM_PAYLOAD  0x00
+#define RFM_CMD_BIT  0x80
+#define RFM_FLOW_OFF 0x81
+#define RFM_FLOW_ON  0x80
+#define RFM_SET_PIN  0x82
+#define RFM_CTRL_PKT_SIZE 1
+
+static int cfrfml_receive(struct cflayer *layr, struct cfpkt *pkt);
+static int cfrfml_transmit(struct cflayer *layr, struct cfpkt *pkt);
+static int cfservl_modemcmd(struct cflayer *layr, enum caif_modemcmd ctrl);
+
+struct cflayer *cfrfml_create(u8 channel_id, struct dev_info *dev_info)
+{
+	struct cfsrvl *rfm = kmalloc(sizeof(struct cfsrvl), GFP_ATOMIC);
+	if (!rfm) {
+		pr_warning("CAIF: %s(): Out of memory\n", __func__);
+		return NULL;
+	}
+	caif_assert(offsetof(struct cfsrvl, layer) == 0);
+	memset(rfm, 0, sizeof(struct cfsrvl));
+	cfsrvl_init(rfm, channel_id, dev_info);
+	rfm->layer.modemcmd = cfservl_modemcmd;
+	rfm->layer.receive = cfrfml_receive;
+	rfm->layer.transmit = cfrfml_transmit;
+	snprintf(rfm->layer.name, CAIF_LAYER_NAME_SZ, "rfm%d", channel_id);
+	return &rfm->layer;
+}
+
+static int cfservl_modemcmd(struct cflayer *layr, enum caif_modemcmd ctrl)
+{
+       return -EPROTO;
+}
+
+static int cfrfml_receive(struct cflayer *layr, struct cfpkt *pkt)
+{
+	u8 tmp;
+	bool segmented;
+	int ret;
+	caif_assert(layr->up != NULL);
+	caif_assert(layr->receive != NULL);
+
+	/*
+	 * RFM is taking care of segmentation and stripping of
+	 * segmentation bit.
+	 */
+	if (cfpkt_extr_head(pkt, &tmp, 1) < 0) {
+		pr_err("CAIF: %s(): Packet is erroneous!\n", __func__);
+		cfpkt_destroy(pkt);
+		return -EPROTO;
+	}
+	segmented = tmp & RFM_SEGMENTATION_BIT;
+	caif_assert(!segmented);
+
+	ret = layr->up->receive(layr->up, pkt);
+	return ret;
+}
+
+static int cfrfml_transmit(struct cflayer *layr, struct cfpkt *pkt)
+{
+	u8 tmp = 0;
+	int ret;
+	struct cfsrvl *service = container_obj(layr);
+
+	caif_assert(layr->dn != NULL);
+	caif_assert(layr->dn->transmit != NULL);
+
+	if (!cfsrvl_ready(service, &ret))
+		return ret;
+
+	if (!cfpkt_getlen(pkt) > CAIF_MAX_PAYLOAD_SIZE) {
+		pr_err("CAIF: %s():Packet too large - size=%d\n",
+			__func__, cfpkt_getlen(pkt));
+		return -EOVERFLOW;
+	}
+	if (cfpkt_add_head(pkt, &tmp, 1) < 0) {
+		pr_err("CAIF: %s(): Packet is erroneous!\n", __func__);
+		return -EPROTO;
+	}
+
+	/* Add info for MUX-layer to route the packet out. */
+	cfpkt_info(pkt)->channel_id = service->layer.id;
+	/*
+	 * To optimize alignment, we add up the size of CAIF header before
+	 * payload.
+	 */
+	cfpkt_info(pkt)->hdr_len = 1;
+	cfpkt_info(pkt)->dev_info = &service->dev_info;
+	ret = layr->dn->transmit(layr->dn, pkt);
+	if (ret < 0)
+		cfpkt_extr_head(pkt, &tmp, 1);
+	return ret;
+}
diff --git a/net/caif/cfserl.c b/net/caif/cfserl.c
new file mode 100644
index 000000000000..06029ea2da2f
--- /dev/null
+++ b/net/caif/cfserl.c
@@ -0,0 +1,192 @@
+/*
+ * Copyright (C) ST-Ericsson AB 2010
+ * Author:	Sjur Brendeland/sjur.brandeland@stericsson.com
+ * License terms: GNU General Public License (GPL) version 2
+ */
+
+#include <linux/stddef.h>
+#include <linux/spinlock.h>
+#include <linux/slab.h>
+#include <net/caif/caif_layer.h>
+#include <net/caif/cfpkt.h>
+#include <net/caif/cfserl.h>
+
+#define container_obj(layr) ((struct cfserl *) layr)
+
+#define CFSERL_STX 0x02
+#define CAIF_MINIUM_PACKET_SIZE 4
+struct cfserl {
+	struct cflayer layer;
+	struct cfpkt *incomplete_frm;
+	/* Protects parallel processing of incoming packets */
+	spinlock_t sync;
+	bool usestx;
+};
+#define STXLEN(layr) (layr->usestx ? 1 : 0)
+
+static int cfserl_receive(struct cflayer *layr, struct cfpkt *pkt);
+static int cfserl_transmit(struct cflayer *layr, struct cfpkt *pkt);
+static void cfserl_ctrlcmd(struct cflayer *layr, enum caif_ctrlcmd ctrl,
+				int phyid);
+
+struct cflayer *cfserl_create(int type, int instance, bool use_stx)
+{
+	struct cfserl *this = kmalloc(sizeof(struct cfserl), GFP_ATOMIC);
+	if (!this) {
+		pr_warning("CAIF: %s(): Out of memory\n", __func__);
+		return NULL;
+	}
+	caif_assert(offsetof(struct cfserl, layer) == 0);
+	memset(this, 0, sizeof(struct cfserl));
+	this->layer.receive = cfserl_receive;
+	this->layer.transmit = cfserl_transmit;
+	this->layer.ctrlcmd = cfserl_ctrlcmd;
+	this->layer.type = type;
+	this->usestx = use_stx;
+	spin_lock_init(&this->sync);
+	snprintf(this->layer.name, CAIF_LAYER_NAME_SZ, "ser1");
+	return &this->layer;
+}
+
+static int cfserl_receive(struct cflayer *l, struct cfpkt *newpkt)
+{
+	struct cfserl *layr = container_obj(l);
+	u16 pkt_len;
+	struct cfpkt *pkt = NULL;
+	struct cfpkt *tail_pkt = NULL;
+	u8 tmp8;
+	u16 tmp;
+	u8 stx = CFSERL_STX;
+	int ret;
+	u16 expectlen = 0;
+	caif_assert(newpkt != NULL);
+	spin_lock(&layr->sync);
+
+	if (layr->incomplete_frm != NULL) {
+
+		layr->incomplete_frm =
+		    cfpkt_append(layr->incomplete_frm, newpkt, expectlen);
+		pkt = layr->incomplete_frm;
+	} else {
+		pkt = newpkt;
+	}
+	layr->incomplete_frm = NULL;
+
+	do {
+		/* Search for STX at start of pkt if STX is used */
+		if (layr->usestx) {
+			cfpkt_extr_head(pkt, &tmp8, 1);
+			if (tmp8 != CFSERL_STX) {
+				while (cfpkt_more(pkt)
+				       && tmp8 != CFSERL_STX) {
+					cfpkt_extr_head(pkt, &tmp8, 1);
+				}
+				if (!cfpkt_more(pkt)) {
+					cfpkt_destroy(pkt);
+					layr->incomplete_frm = NULL;
+					spin_unlock(&layr->sync);
+					return -EPROTO;
+				}
+			}
+		}
+
+		pkt_len = cfpkt_getlen(pkt);
+
+		/*
+		 *  pkt_len is the accumulated length of the packet data
+		 *  we have received so far.
+		 *  Exit if frame doesn't hold length.
+		 */
+
+		if (pkt_len < 2) {
+			if (layr->usestx)
+				cfpkt_add_head(pkt, &stx, 1);
+			layr->incomplete_frm = pkt;
+			spin_unlock(&layr->sync);
+			return 0;
+		}
+
+		/*
+		 *  Find length of frame.
+		 *  expectlen is the length we need for a full frame.
+		 */
+		cfpkt_peek_head(pkt, &tmp, 2);
+		expectlen = le16_to_cpu(tmp) + 2;
+		/*
+		 * Frame error handling
+		 */
+		if (expectlen < CAIF_MINIUM_PACKET_SIZE
+		    || expectlen > CAIF_MAX_FRAMESIZE) {
+			if (!layr->usestx) {
+				if (pkt != NULL)
+					cfpkt_destroy(pkt);
+				layr->incomplete_frm = NULL;
+				expectlen = 0;
+				spin_unlock(&layr->sync);
+				return -EPROTO;
+			}
+			continue;
+		}
+
+		if (pkt_len < expectlen) {
+			/* Too little received data */
+			if (layr->usestx)
+				cfpkt_add_head(pkt, &stx, 1);
+			layr->incomplete_frm = pkt;
+			spin_unlock(&layr->sync);
+			return 0;
+		}
+
+		/*
+		 * Enough data for at least one frame.
+		 * Split the frame, if too long
+		 */
+		if (pkt_len > expectlen)
+			tail_pkt = cfpkt_split(pkt, expectlen);
+		else
+			tail_pkt = NULL;
+
+		/* Send the first part of packet upwards.*/
+		spin_unlock(&layr->sync);
+		ret = layr->layer.up->receive(layr->layer.up, pkt);
+		spin_lock(&layr->sync);
+		if (ret == -EILSEQ) {
+			if (layr->usestx) {
+				if (tail_pkt != NULL)
+					pkt = cfpkt_append(pkt, tail_pkt, 0);
+
+				/* Start search for next STX if frame failed */
+				continue;
+			} else {
+				cfpkt_destroy(pkt);
+				pkt = NULL;
+			}
+		}
+
+		pkt = tail_pkt;
+
+	} while (pkt != NULL);
+
+	spin_unlock(&layr->sync);
+	return 0;
+}
+
+static int cfserl_transmit(struct cflayer *layer, struct cfpkt *newpkt)
+{
+	struct cfserl *layr = container_obj(layer);
+	int ret;
+	u8 tmp8 = CFSERL_STX;
+	if (layr->usestx)
+		cfpkt_add_head(newpkt, &tmp8, 1);
+	ret = layer->dn->transmit(layer->dn, newpkt);
+	if (ret < 0)
+		cfpkt_extr_head(newpkt, &tmp8, 1);
+
+	return ret;
+}
+
+static void cfserl_ctrlcmd(struct cflayer *layr, enum caif_ctrlcmd ctrl,
+				int phyid)
+{
+	layr->up->ctrlcmd(layr->up, ctrl, phyid);
+}
diff --git a/net/caif/cfsrvl.c b/net/caif/cfsrvl.c
new file mode 100644
index 000000000000..d470c51c6431
--- /dev/null
+++ b/net/caif/cfsrvl.c
@@ -0,0 +1,185 @@
+/*
+ * Copyright (C) ST-Ericsson AB 2010
+ * Author:	Sjur Brendeland/sjur.brandeland@stericsson.com
+ * License terms: GNU General Public License (GPL) version 2
+ */
+
+#include <linux/kernel.h>
+#include <linux/types.h>
+#include <linux/errno.h>
+#include <linux/slab.h>
+#include <net/caif/caif_layer.h>
+#include <net/caif/cfsrvl.h>
+#include <net/caif/cfpkt.h>
+
+#define SRVL_CTRL_PKT_SIZE 1
+#define SRVL_FLOW_OFF 0x81
+#define SRVL_FLOW_ON  0x80
+#define SRVL_SET_PIN  0x82
+#define SRVL_CTRL_PKT_SIZE 1
+
+#define container_obj(layr) container_of(layr, struct cfsrvl, layer)
+
+static void cfservl_ctrlcmd(struct cflayer *layr, enum caif_ctrlcmd ctrl,
+				int phyid)
+{
+	struct cfsrvl *service = container_obj(layr);
+	caif_assert(layr->up != NULL);
+	caif_assert(layr->up->ctrlcmd != NULL);
+	switch (ctrl) {
+	case CAIF_CTRLCMD_INIT_RSP:
+		service->open = true;
+		layr->up->ctrlcmd(layr->up, ctrl, phyid);
+		break;
+	case CAIF_CTRLCMD_DEINIT_RSP:
+	case CAIF_CTRLCMD_INIT_FAIL_RSP:
+		service->open = false;
+		layr->up->ctrlcmd(layr->up, ctrl, phyid);
+		break;
+	case _CAIF_CTRLCMD_PHYIF_FLOW_OFF_IND:
+		if (phyid != service->dev_info.id)
+			break;
+		if (service->modem_flow_on)
+			layr->up->ctrlcmd(layr->up,
+					  CAIF_CTRLCMD_FLOW_OFF_IND, phyid);
+		service->phy_flow_on = false;
+		break;
+	case _CAIF_CTRLCMD_PHYIF_FLOW_ON_IND:
+		if (phyid != service->dev_info.id)
+			return;
+		if (service->modem_flow_on) {
+			layr->up->ctrlcmd(layr->up,
+					   CAIF_CTRLCMD_FLOW_ON_IND,
+					   phyid);
+		}
+		service->phy_flow_on = true;
+		break;
+	case CAIF_CTRLCMD_FLOW_OFF_IND:
+		if (service->phy_flow_on) {
+			layr->up->ctrlcmd(layr->up,
+					  CAIF_CTRLCMD_FLOW_OFF_IND, phyid);
+		}
+		service->modem_flow_on = false;
+		break;
+	case CAIF_CTRLCMD_FLOW_ON_IND:
+		if (service->phy_flow_on) {
+			layr->up->ctrlcmd(layr->up,
+					  CAIF_CTRLCMD_FLOW_ON_IND, phyid);
+		}
+		service->modem_flow_on = true;
+		break;
+	case _CAIF_CTRLCMD_PHYIF_DOWN_IND:
+		/* In case interface is down, let's fake a remove shutdown */
+		layr->up->ctrlcmd(layr->up,
+				CAIF_CTRLCMD_REMOTE_SHUTDOWN_IND, phyid);
+		break;
+	case CAIF_CTRLCMD_REMOTE_SHUTDOWN_IND:
+		layr->up->ctrlcmd(layr->up, ctrl, phyid);
+		break;
+	default:
+		pr_warning("CAIF: %s(): "
+			   "Unexpected ctrl in cfsrvl (%d)\n", __func__, ctrl);
+		/* We have both modem and phy flow on, send flow on */
+		layr->up->ctrlcmd(layr->up, ctrl, phyid);
+		service->phy_flow_on = true;
+		break;
+	}
+}
+
+static int cfservl_modemcmd(struct cflayer *layr, enum caif_modemcmd ctrl)
+{
+	struct cfsrvl *service = container_obj(layr);
+	caif_assert(layr != NULL);
+	caif_assert(layr->dn != NULL);
+	caif_assert(layr->dn->transmit != NULL);
+	switch (ctrl) {
+	case CAIF_MODEMCMD_FLOW_ON_REQ:
+		{
+			struct cfpkt *pkt;
+			struct caif_payload_info *info;
+			u8 flow_on = SRVL_FLOW_ON;
+			pkt = cfpkt_create(SRVL_CTRL_PKT_SIZE);
+			if (!pkt) {
+				pr_warning("CAIF: %s(): Out of memory\n",
+					__func__);
+				return -ENOMEM;
+			}
+
+			if (cfpkt_add_head(pkt, &flow_on, 1) < 0) {
+				pr_err("CAIF: %s(): Packet is erroneous!\n",
+					__func__);
+				cfpkt_destroy(pkt);
+				return -EPROTO;
+			}
+			info = cfpkt_info(pkt);
+			info->channel_id = service->layer.id;
+			info->hdr_len = 1;
+			info->dev_info = &service->dev_info;
+			return layr->dn->transmit(layr->dn, pkt);
+		}
+	case CAIF_MODEMCMD_FLOW_OFF_REQ:
+		{
+			struct cfpkt *pkt;
+			struct caif_payload_info *info;
+			u8 flow_off = SRVL_FLOW_OFF;
+			pkt = cfpkt_create(SRVL_CTRL_PKT_SIZE);
+			if (cfpkt_add_head(pkt, &flow_off, 1) < 0) {
+				pr_err("CAIF: %s(): Packet is erroneous!\n",
+					__func__);
+				cfpkt_destroy(pkt);
+				return -EPROTO;
+			}
+			info = cfpkt_info(pkt);
+			info->channel_id = service->layer.id;
+			info->hdr_len = 1;
+			info->dev_info = &service->dev_info;
+			return layr->dn->transmit(layr->dn, pkt);
+		}
+	default:
+	  break;
+	}
+	return -EINVAL;
+}
+
+void cfservl_destroy(struct cflayer *layer)
+{
+	kfree(layer);
+}
+
+void cfsrvl_init(struct cfsrvl *service,
+		 u8 channel_id,
+		 struct dev_info *dev_info)
+{
+	caif_assert(offsetof(struct cfsrvl, layer) == 0);
+	service->open = false;
+	service->modem_flow_on = true;
+	service->phy_flow_on = true;
+	service->layer.id = channel_id;
+	service->layer.ctrlcmd = cfservl_ctrlcmd;
+	service->layer.modemcmd = cfservl_modemcmd;
+	service->dev_info = *dev_info;
+}
+
+bool cfsrvl_ready(struct cfsrvl *service, int *err)
+{
+	if (service->open && service->modem_flow_on && service->phy_flow_on)
+		return true;
+	if (!service->open) {
+		*err = -ENOTCONN;
+		return false;
+	}
+	caif_assert(!(service->modem_flow_on && service->phy_flow_on));
+	*err = -EAGAIN;
+	return false;
+}
+u8 cfsrvl_getphyid(struct cflayer *layer)
+{
+	struct cfsrvl *servl = container_obj(layer);
+	return servl->dev_info.id;
+}
+
+bool cfsrvl_phyid_match(struct cflayer *layer, int phyid)
+{
+	struct cfsrvl *servl = container_obj(layer);
+	return servl->dev_info.id == phyid;
+}
diff --git a/net/caif/cfutill.c b/net/caif/cfutill.c
new file mode 100644
index 000000000000..5fd2c9ea8b42
--- /dev/null
+++ b/net/caif/cfutill.c
@@ -0,0 +1,115 @@
+/*
+ * Copyright (C) ST-Ericsson AB 2010
+ * Author:	Sjur Brendeland/sjur.brandeland@stericsson.com
+ * License terms: GNU General Public License (GPL) version 2
+ */
+
+#include <linux/kernel.h>
+#include <linux/types.h>
+#include <linux/slab.h>
+#include <linux/errno.h>
+#include <net/caif/caif_layer.h>
+#include <net/caif/cfsrvl.h>
+#include <net/caif/cfpkt.h>
+
+#define container_obj(layr) ((struct cfsrvl *) layr)
+#define UTIL_PAYLOAD  0x00
+#define UTIL_CMD_BIT  0x80
+#define UTIL_REMOTE_SHUTDOWN 0x82
+#define UTIL_FLOW_OFF 0x81
+#define UTIL_FLOW_ON  0x80
+#define UTIL_CTRL_PKT_SIZE 1
+static int cfutill_receive(struct cflayer *layr, struct cfpkt *pkt);
+static int cfutill_transmit(struct cflayer *layr, struct cfpkt *pkt);
+
+struct cflayer *cfutill_create(u8 channel_id, struct dev_info *dev_info)
+{
+	struct cfsrvl *util = kmalloc(sizeof(struct cfsrvl), GFP_ATOMIC);
+	if (!util) {
+		pr_warning("CAIF: %s(): Out of memory\n", __func__);
+		return NULL;
+	}
+	caif_assert(offsetof(struct cfsrvl, layer) == 0);
+	memset(util, 0, sizeof(struct cfsrvl));
+	cfsrvl_init(util, channel_id, dev_info);
+	util->layer.receive = cfutill_receive;
+	util->layer.transmit = cfutill_transmit;
+	snprintf(util->layer.name, CAIF_LAYER_NAME_SZ - 1, "util1");
+	return &util->layer;
+}
+
+static int cfutill_receive(struct cflayer *layr, struct cfpkt *pkt)
+{
+	u8 cmd = -1;
+	struct cfsrvl *service = container_obj(layr);
+	caif_assert(layr != NULL);
+	caif_assert(layr->up != NULL);
+	caif_assert(layr->up->receive != NULL);
+	caif_assert(layr->up->ctrlcmd != NULL);
+	if (cfpkt_extr_head(pkt, &cmd, 1) < 0) {
+		pr_err("CAIF: %s(): Packet is erroneous!\n", __func__);
+		cfpkt_destroy(pkt);
+		return -EPROTO;
+	}
+
+	switch (cmd) {
+	case UTIL_PAYLOAD:
+		return layr->up->receive(layr->up, pkt);
+	case UTIL_FLOW_OFF:
+		layr->ctrlcmd(layr, CAIF_CTRLCMD_FLOW_OFF_IND, 0);
+		cfpkt_destroy(pkt);
+		return 0;
+	case UTIL_FLOW_ON:
+		layr->ctrlcmd(layr, CAIF_CTRLCMD_FLOW_ON_IND, 0);
+		cfpkt_destroy(pkt);
+		return 0;
+	case UTIL_REMOTE_SHUTDOWN:	/* Remote Shutdown Request */
+		pr_err("CAIF: %s(): REMOTE SHUTDOWN REQUEST RECEIVED\n",
+			__func__);
+		layr->ctrlcmd(layr, CAIF_CTRLCMD_REMOTE_SHUTDOWN_IND, 0);
+		service->open = false;
+		cfpkt_destroy(pkt);
+		return 0;
+	default:
+		cfpkt_destroy(pkt);
+		pr_warning("CAIF: %s(): Unknown service control %d (0x%x)\n",
+			   __func__, cmd, cmd);
+		return -EPROTO;
+	}
+}
+
+static int cfutill_transmit(struct cflayer *layr, struct cfpkt *pkt)
+{
+	u8 zero = 0;
+	struct caif_payload_info *info;
+	int ret;
+	struct cfsrvl *service = container_obj(layr);
+	caif_assert(layr != NULL);
+	caif_assert(layr->dn != NULL);
+	caif_assert(layr->dn->transmit != NULL);
+	if (!cfsrvl_ready(service, &ret))
+		return ret;
+
+	if (cfpkt_getlen(pkt) > CAIF_MAX_PAYLOAD_SIZE) {
+		pr_err("CAIF: %s(): packet too large size=%d\n",
+			__func__, cfpkt_getlen(pkt));
+		return -EOVERFLOW;
+	}
+
+	cfpkt_add_head(pkt, &zero, 1);
+	/* Add info for MUX-layer to route the packet out. */
+	info = cfpkt_info(pkt);
+	info->channel_id = service->layer.id;
+	/*
+	 * To optimize alignment, we add up the size of CAIF header before
+	 * payload.
+	 */
+	info->hdr_len = 1;
+	info->dev_info = &service->dev_info;
+	ret = layr->dn->transmit(layr->dn, pkt);
+	if (ret < 0) {
+		u32 tmp32;
+		cfpkt_extr_head(pkt, &tmp32, 4);
+	}
+	return ret;
+}
diff --git a/net/caif/cfveil.c b/net/caif/cfveil.c
new file mode 100644
index 000000000000..0fd827f49491
--- /dev/null
+++ b/net/caif/cfveil.c
@@ -0,0 +1,107 @@
+/*
+ * Copyright (C) ST-Ericsson AB 2010
+ * Author:	Sjur Brendeland/sjur.brandeland@stericsson.com
+ * License terms: GNU General Public License (GPL) version 2
+ */
+
+#include <linux/stddef.h>
+#include <linux/slab.h>
+#include <net/caif/caif_layer.h>
+#include <net/caif/cfsrvl.h>
+#include <net/caif/cfpkt.h>
+
+#define VEI_PAYLOAD  0x00
+#define VEI_CMD_BIT  0x80
+#define VEI_FLOW_OFF 0x81
+#define VEI_FLOW_ON  0x80
+#define VEI_SET_PIN  0x82
+#define VEI_CTRL_PKT_SIZE 1
+#define container_obj(layr) container_of(layr, struct cfsrvl, layer)
+
+static int cfvei_receive(struct cflayer *layr, struct cfpkt *pkt);
+static int cfvei_transmit(struct cflayer *layr, struct cfpkt *pkt);
+
+struct cflayer *cfvei_create(u8 channel_id, struct dev_info *dev_info)
+{
+	struct cfsrvl *vei = kmalloc(sizeof(struct cfsrvl), GFP_ATOMIC);
+	if (!vei) {
+		pr_warning("CAIF: %s(): Out of memory\n", __func__);
+		return NULL;
+	}
+	caif_assert(offsetof(struct cfsrvl, layer) == 0);
+	memset(vei, 0, sizeof(struct cfsrvl));
+	cfsrvl_init(vei, channel_id, dev_info);
+	vei->layer.receive = cfvei_receive;
+	vei->layer.transmit = cfvei_transmit;
+	snprintf(vei->layer.name, CAIF_LAYER_NAME_SZ - 1, "vei%d", channel_id);
+	return &vei->layer;
+}
+
+static int cfvei_receive(struct cflayer *layr, struct cfpkt *pkt)
+{
+	u8 cmd;
+	int ret;
+	caif_assert(layr->up != NULL);
+	caif_assert(layr->receive != NULL);
+	caif_assert(layr->ctrlcmd != NULL);
+
+
+	if (cfpkt_extr_head(pkt, &cmd, 1) < 0) {
+		pr_err("CAIF: %s(): Packet is erroneous!\n", __func__);
+		cfpkt_destroy(pkt);
+		return -EPROTO;
+	}
+	switch (cmd) {
+	case VEI_PAYLOAD:
+		ret = layr->up->receive(layr->up, pkt);
+		return ret;
+	case VEI_FLOW_OFF:
+		layr->ctrlcmd(layr, CAIF_CTRLCMD_FLOW_OFF_IND, 0);
+		cfpkt_destroy(pkt);
+		return 0;
+	case VEI_FLOW_ON:
+		layr->ctrlcmd(layr, CAIF_CTRLCMD_FLOW_ON_IND, 0);
+		cfpkt_destroy(pkt);
+		return 0;
+	case VEI_SET_PIN:	/* SET RS232 PIN */
+		cfpkt_destroy(pkt);
+		return 0;
+	default:		/* SET RS232 PIN */
+		pr_warning("CAIF: %s():Unknown VEI control packet %d (0x%x)!\n",
+			   __func__, cmd, cmd);
+		cfpkt_destroy(pkt);
+		return -EPROTO;
+	}
+}
+
+static int cfvei_transmit(struct cflayer *layr, struct cfpkt *pkt)
+{
+	u8 tmp = 0;
+	struct caif_payload_info *info;
+	int ret;
+	struct cfsrvl *service = container_obj(layr);
+	if (!cfsrvl_ready(service, &ret))
+		return ret;
+	caif_assert(layr->dn != NULL);
+	caif_assert(layr->dn->transmit != NULL);
+	if (!cfpkt_getlen(pkt) > CAIF_MAX_PAYLOAD_SIZE) {
+		pr_warning("CAIF: %s(): Packet too large - size=%d\n",
+			   __func__, cfpkt_getlen(pkt));
+		return -EOVERFLOW;
+	}
+
+	if (cfpkt_add_head(pkt, &tmp, 1) < 0) {
+		pr_err("CAIF: %s(): Packet is erroneous!\n", __func__);
+		return -EPROTO;
+	}
+
+	/* Add info-> for MUX-layer to route the packet out. */
+	info = cfpkt_info(pkt);
+	info->channel_id = service->layer.id;
+	info->hdr_len = 1;
+	info->dev_info = &service->dev_info;
+	ret = layr->dn->transmit(layr->dn, pkt);
+	if (ret < 0)
+		cfpkt_extr_head(pkt, &tmp, 1);
+	return ret;
+}
diff --git a/net/caif/cfvidl.c b/net/caif/cfvidl.c
new file mode 100644
index 000000000000..89ad4ea239f1
--- /dev/null
+++ b/net/caif/cfvidl.c
@@ -0,0 +1,65 @@
+/*
+ * Copyright (C) ST-Ericsson AB 2010
+ * Author:	Sjur Brendeland/sjur.brandeland@stericsson.com
+ * License terms: GNU General Public License (GPL) version 2
+ */
+
+#include <linux/kernel.h>
+#include <linux/types.h>
+#include <linux/slab.h>
+#include <linux/errno.h>
+#include <net/caif/caif_layer.h>
+#include <net/caif/cfsrvl.h>
+#include <net/caif/cfpkt.h>
+
+#define container_obj(layr) ((struct cfsrvl *) layr)
+
+static int cfvidl_receive(struct cflayer *layr, struct cfpkt *pkt);
+static int cfvidl_transmit(struct cflayer *layr, struct cfpkt *pkt);
+
+struct cflayer *cfvidl_create(u8 channel_id, struct dev_info *dev_info)
+{
+	struct cfsrvl *vid = kmalloc(sizeof(struct cfsrvl), GFP_ATOMIC);
+	if (!vid) {
+		pr_warning("CAIF: %s(): Out of memory\n", __func__);
+		return NULL;
+	}
+	caif_assert(offsetof(struct cfsrvl, layer) == 0);
+
+	memset(vid, 0, sizeof(struct cfsrvl));
+	cfsrvl_init(vid, channel_id, dev_info);
+	vid->layer.receive = cfvidl_receive;
+	vid->layer.transmit = cfvidl_transmit;
+	snprintf(vid->layer.name, CAIF_LAYER_NAME_SZ - 1, "vid1");
+	return &vid->layer;
+}
+
+static int cfvidl_receive(struct cflayer *layr, struct cfpkt *pkt)
+{
+	u32 videoheader;
+	if (cfpkt_extr_head(pkt, &videoheader, 4) < 0) {
+		pr_err("CAIF: %s(): Packet is erroneous!\n", __func__);
+		cfpkt_destroy(pkt);
+		return -EPROTO;
+	}
+	return layr->up->receive(layr->up, pkt);
+}
+
+static int cfvidl_transmit(struct cflayer *layr, struct cfpkt *pkt)
+{
+	struct cfsrvl *service = container_obj(layr);
+	struct caif_payload_info *info;
+	u32 videoheader = 0;
+	int ret;
+	if (!cfsrvl_ready(service, &ret))
+		return ret;
+	cfpkt_add_head(pkt, &videoheader, 4);
+	/* Add info for MUX-layer to route the packet out */
+	info = cfpkt_info(pkt);
+	info->channel_id = service->layer.id;
+	info->dev_info = &service->dev_info;
+	ret = layr->dn->transmit(layr->dn, pkt);
+	if (ret < 0)
+		cfpkt_extr_head(pkt, &videoheader, 4);
+	return ret;
+}
-- 
cgit v1.2.2


From 15c9ac0c80e390df09ce5730a7b08b13e07a8dd5 Mon Sep 17 00:00:00 2001
From: Sjur Braendeland <sjur.brandeland@stericsson.com>
Date: Tue, 30 Mar 2010 13:56:24 +0000
Subject: net-caif: add CAIF generic caif support functions

Support functions for the caif protocol stack:
cfcnfg.c        - CAIF Configuration Module used for
                  adding and removing drivers and connection
cfpkt_skbuff.c  - CAIF Packet layer (SKB helper functions)

Signed-off-by: Sjur Braendeland <sjur.brandeland@stericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/caif/cfcnfg.c       | 529 ++++++++++++++++++++++++++++++++++++++++++++
 net/caif/cfpkt_skbuff.c | 571 ++++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 1100 insertions(+)
 create mode 100644 net/caif/cfcnfg.c
 create mode 100644 net/caif/cfpkt_skbuff.c

(limited to 'net')

diff --git a/net/caif/cfcnfg.c b/net/caif/cfcnfg.c
new file mode 100644
index 000000000000..70a733d3d3da
--- /dev/null
+++ b/net/caif/cfcnfg.c
@@ -0,0 +1,529 @@
+/*
+ * Copyright (C) ST-Ericsson AB 2010
+ * Author:	Sjur Brendeland/sjur.brandeland@stericsson.com
+ * License terms: GNU General Public License (GPL) version 2
+ */
+#include <linux/kernel.h>
+#include <linux/stddef.h>
+#include <net/caif/caif_layer.h>
+#include <net/caif/cfpkt.h>
+#include <net/caif/cfcnfg.h>
+#include <net/caif/cfctrl.h>
+#include <net/caif/cfmuxl.h>
+#include <net/caif/cffrml.h>
+#include <net/caif/cfserl.h>
+#include <net/caif/cfsrvl.h>
+
+#include <linux/module.h>
+#include <asm/atomic.h>
+
+#define MAX_PHY_LAYERS 7
+#define PHY_NAME_LEN 20
+
+#define container_obj(layr) container_of(layr, struct cfcnfg, layer)
+
+/* Information about CAIF physical interfaces held by Config Module in order
+ * to manage physical interfaces
+ */
+struct cfcnfg_phyinfo {
+	/* Pointer to the layer below the MUX (framing layer) */
+	struct cflayer *frm_layer;
+	/* Pointer to the lowest actual physical layer */
+	struct cflayer *phy_layer;
+	/* Unique identifier of the physical interface */
+	unsigned int id;
+	/* Preference of the physical in interface */
+	enum cfcnfg_phy_preference pref;
+
+	/* Reference count, number of channels using the device */
+	int phy_ref_count;
+
+	/* Information about the physical device */
+	struct dev_info dev_info;
+};
+
+struct cfcnfg {
+	struct cflayer layer;
+	struct cflayer *ctrl;
+	struct cflayer *mux;
+	u8 last_phyid;
+	struct cfcnfg_phyinfo phy_layers[MAX_PHY_LAYERS];
+};
+
+static void cncfg_linkup_rsp(struct cflayer *layer, u8 linkid,
+			     enum cfctrl_srv serv, u8 phyid,
+			     struct cflayer *adapt_layer);
+static void cncfg_linkdestroy_rsp(struct cflayer *layer, u8 linkid,
+				  struct cflayer *client_layer);
+static void cncfg_reject_rsp(struct cflayer *layer, u8 linkid,
+			     struct cflayer *adapt_layer);
+static void cfctrl_resp_func(void);
+static void cfctrl_enum_resp(void);
+
+struct cfcnfg *cfcnfg_create(void)
+{
+	struct cfcnfg *this;
+	struct cfctrl_rsp *resp;
+	/* Initiate this layer */
+	this = kmalloc(sizeof(struct cfcnfg), GFP_ATOMIC);
+	if (!this) {
+		pr_warning("CAIF: %s(): Out of memory\n", __func__);
+		return NULL;
+	}
+	memset(this, 0, sizeof(struct cfcnfg));
+	this->mux = cfmuxl_create();
+	if (!this->mux)
+		goto out_of_mem;
+	this->ctrl = cfctrl_create();
+	if (!this->ctrl)
+		goto out_of_mem;
+	/* Initiate response functions */
+	resp = cfctrl_get_respfuncs(this->ctrl);
+	resp->enum_rsp = cfctrl_enum_resp;
+	resp->linkerror_ind = cfctrl_resp_func;
+	resp->linkdestroy_rsp = cncfg_linkdestroy_rsp;
+	resp->sleep_rsp = cfctrl_resp_func;
+	resp->wake_rsp = cfctrl_resp_func;
+	resp->restart_rsp = cfctrl_resp_func;
+	resp->radioset_rsp = cfctrl_resp_func;
+	resp->linksetup_rsp = cncfg_linkup_rsp;
+	resp->reject_rsp = cncfg_reject_rsp;
+
+	this->last_phyid = 1;
+
+	cfmuxl_set_uplayer(this->mux, this->ctrl, 0);
+	layer_set_dn(this->ctrl, this->mux);
+	layer_set_up(this->ctrl, this);
+	return this;
+out_of_mem:
+	pr_warning("CAIF: %s(): Out of memory\n", __func__);
+	kfree(this->mux);
+	kfree(this->ctrl);
+	kfree(this);
+	return NULL;
+}
+EXPORT_SYMBOL(cfcnfg_create);
+
+void cfcnfg_remove(struct cfcnfg *cfg)
+{
+	if (cfg) {
+		kfree(cfg->mux);
+		kfree(cfg->ctrl);
+		kfree(cfg);
+	}
+}
+
+static void cfctrl_resp_func(void)
+{
+}
+
+static void cfctrl_enum_resp(void)
+{
+}
+
+struct dev_info *cfcnfg_get_phyid(struct cfcnfg *cnfg,
+				  enum cfcnfg_phy_preference phy_pref)
+{
+	u16 i;
+
+	/* Try to match with specified preference */
+	for (i = 1; i < MAX_PHY_LAYERS; i++) {
+		if (cnfg->phy_layers[i].id == i &&
+		     cnfg->phy_layers[i].pref == phy_pref &&
+		     cnfg->phy_layers[i].frm_layer != NULL) {
+			caif_assert(cnfg->phy_layers != NULL);
+			caif_assert(cnfg->phy_layers[i].id == i);
+			return &cnfg->phy_layers[i].dev_info;
+		}
+	}
+	/* Otherwise just return something */
+	for (i = 1; i < MAX_PHY_LAYERS; i++) {
+		if (cnfg->phy_layers[i].id == i) {
+			caif_assert(cnfg->phy_layers != NULL);
+			caif_assert(cnfg->phy_layers[i].id == i);
+			return &cnfg->phy_layers[i].dev_info;
+		}
+	}
+
+	return NULL;
+}
+
+static struct cfcnfg_phyinfo *cfcnfg_get_phyinfo(struct cfcnfg *cnfg,
+							u8 phyid)
+{
+	int i;
+	/* Try to match with specified preference */
+	for (i = 0; i < MAX_PHY_LAYERS; i++)
+		if (cnfg->phy_layers[i].frm_layer != NULL &&
+		    cnfg->phy_layers[i].id == phyid)
+			return &cnfg->phy_layers[i];
+	return NULL;
+}
+
+int cfcnfg_get_named(struct cfcnfg *cnfg, char *name)
+{
+	int i;
+
+	/* Try to match with specified name */
+	for (i = 0; i < MAX_PHY_LAYERS; i++) {
+		if (cnfg->phy_layers[i].frm_layer != NULL
+		    && strcmp(cnfg->phy_layers[i].phy_layer->name,
+			      name) == 0)
+			return cnfg->phy_layers[i].frm_layer->id;
+	}
+	return 0;
+}
+
+/*
+ * NOTE: What happens on destroy failure:
+ *	 1a) No response - Too early
+ *	      This will not happen because enumerate has already
+ *	      completed.
+ *	 1b) No response - FATAL
+ *	      Not handled, but this should be a CAIF PROTOCOL ERROR
+ *	      Modem error, response is really expected -  this
+ *	      case is not really handled.
+ *	 2) O/E-bit indicate error
+ *	      Ignored - this link is destroyed anyway.
+ *	 3) Not able to match on request
+ *	      Not handled, but this should be a CAIF PROTOCOL ERROR
+ *	 4) Link-Error - (no response)
+ *	      Not handled, but this should be a CAIF PROTOCOL ERROR
+ */
+
+int cfcnfg_del_adapt_layer(struct cfcnfg *cnfg, struct cflayer *adap_layer)
+{
+	u8 channel_id = 0;
+	int ret = 0;
+	struct cfcnfg_phyinfo *phyinfo = NULL;
+	u8 phyid = 0;
+
+	caif_assert(adap_layer != NULL);
+	channel_id = adap_layer->id;
+	if (channel_id == 0) {
+		pr_err("CAIF: %s():adap_layer->id is 0\n", __func__);
+		ret = -ENOTCONN;
+		goto end;
+	}
+
+	if (adap_layer->dn == NULL) {
+		pr_err("CAIF: %s():adap_layer->dn is NULL\n", __func__);
+		ret = -ENODEV;
+		goto end;
+	}
+
+	if (adap_layer->dn != NULL)
+		phyid = cfsrvl_getphyid(adap_layer->dn);
+
+	phyinfo = cfcnfg_get_phyinfo(cnfg, phyid);
+	if (phyinfo == NULL) {
+		pr_warning("CAIF: %s(): No interface to send disconnect to\n",
+			   __func__);
+		ret = -ENODEV;
+		goto end;
+	}
+
+	if (phyinfo->id != phyid
+		|| phyinfo->phy_layer->id != phyid
+		|| phyinfo->frm_layer->id != phyid) {
+
+		pr_err("CAIF: %s(): Inconsistency in phy registration\n",
+			__func__);
+		ret = -EINVAL;
+		goto end;
+	}
+
+	ret = cfctrl_linkdown_req(cnfg->ctrl, channel_id, adap_layer);
+
+end:
+	if (phyinfo != NULL && --phyinfo->phy_ref_count == 0 &&
+		phyinfo->phy_layer != NULL &&
+		phyinfo->phy_layer->modemcmd != NULL) {
+		phyinfo->phy_layer->modemcmd(phyinfo->phy_layer,
+					     _CAIF_MODEMCMD_PHYIF_USELESS);
+	}
+	return ret;
+
+}
+EXPORT_SYMBOL(cfcnfg_del_adapt_layer);
+
+static void cncfg_linkdestroy_rsp(struct cflayer *layer, u8 linkid,
+				  struct cflayer *client_layer)
+{
+	struct cfcnfg *cnfg = container_obj(layer);
+	struct cflayer *servl;
+
+	/*
+	 * 1) Remove service from the MUX layer. The MUX must
+	 *    guarante that no more payload sent "upwards" (receive)
+	 */
+	servl = cfmuxl_remove_uplayer(cnfg->mux, linkid);
+
+	if (servl == NULL) {
+		pr_err("CAIF: %s(): PROTOCOL ERROR "
+		       "- Error removing service_layer Linkid(%d)",
+			__func__, linkid);
+		return;
+	}
+	caif_assert(linkid == servl->id);
+
+	if (servl != client_layer && servl->up != client_layer) {
+		pr_err("CAIF: %s(): Error removing service_layer "
+		       "Linkid(%d) %p %p",
+			__func__, linkid, (void *) servl,
+			(void *) client_layer);
+		return;
+	}
+
+	/*
+	 * 2) DEINIT_RSP must guarantee that no more packets are transmitted
+	 *    from client (adap_layer) when it returns.
+	 */
+
+	if (servl->ctrlcmd == NULL) {
+		pr_err("CAIF: %s(): Error servl->ctrlcmd == NULL", __func__);
+		return;
+	}
+
+	servl->ctrlcmd(servl, CAIF_CTRLCMD_DEINIT_RSP, 0);
+
+	/* 3) It is now safe to destroy the service layer. */
+	cfservl_destroy(servl);
+}
+
+/*
+ * NOTE: What happens on linksetup failure:
+ *	 1a) No response - Too early
+ *	      This will not happen because enumerate is secured
+ *	      before using interface.
+ *	 1b) No response - FATAL
+ *	      Not handled, but this should be a CAIF PROTOCOL ERROR
+ *	      Modem error, response is really expected -  this case is
+ *	      not really handled.
+ *	 2) O/E-bit indicate error
+ *	      Handled in cnfg_reject_rsp
+ *	 3) Not able to match on request
+ *	      Not handled, but this should be a CAIF PROTOCOL ERROR
+ *	 4) Link-Error - (no response)
+ *	      Not handled, but this should be a CAIF PROTOCOL ERROR
+ */
+
+int
+cfcnfg_add_adaptation_layer(struct cfcnfg *cnfg,
+				struct cfctrl_link_param *param,
+				struct cflayer *adap_layer)
+{
+	struct cflayer *frml;
+	if (adap_layer == NULL) {
+		pr_err("CAIF: %s(): adap_layer is zero", __func__);
+		return -EINVAL;
+	}
+	if (adap_layer->receive == NULL) {
+		pr_err("CAIF: %s(): adap_layer->receive is NULL", __func__);
+		return -EINVAL;
+	}
+	if (adap_layer->ctrlcmd == NULL) {
+		pr_err("CAIF: %s(): adap_layer->ctrlcmd == NULL", __func__);
+		return -EINVAL;
+	}
+	frml = cnfg->phy_layers[param->phyid].frm_layer;
+	if (frml == NULL) {
+		pr_err("CAIF: %s(): Specified PHY type does not exist!",
+			__func__);
+		return -ENODEV;
+	}
+	caif_assert(param->phyid == cnfg->phy_layers[param->phyid].id);
+	caif_assert(cnfg->phy_layers[param->phyid].frm_layer->id ==
+		     param->phyid);
+	caif_assert(cnfg->phy_layers[param->phyid].phy_layer->id ==
+		     param->phyid);
+	/* FIXME: ENUMERATE INITIALLY WHEN ACTIVATING PHYSICAL INTERFACE */
+	cfctrl_enum_req(cnfg->ctrl, param->phyid);
+	cfctrl_linkup_request(cnfg->ctrl, param, adap_layer);
+	return 0;
+}
+EXPORT_SYMBOL(cfcnfg_add_adaptation_layer);
+
+static void cncfg_reject_rsp(struct cflayer *layer, u8 linkid,
+			     struct cflayer *adapt_layer)
+{
+	if (adapt_layer != NULL && adapt_layer->ctrlcmd != NULL)
+		adapt_layer->ctrlcmd(adapt_layer,
+				     CAIF_CTRLCMD_INIT_FAIL_RSP, 0);
+}
+
+static void
+cncfg_linkup_rsp(struct cflayer *layer, u8 linkid, enum cfctrl_srv serv,
+		 u8 phyid, struct cflayer *adapt_layer)
+{
+	struct cfcnfg *cnfg = container_obj(layer);
+	struct cflayer *servicel = NULL;
+	struct cfcnfg_phyinfo *phyinfo;
+	if (adapt_layer == NULL) {
+		pr_err("CAIF: %s(): PROTOCOL ERROR "
+			"- LinkUp Request/Response did not match\n", __func__);
+		return;
+	}
+
+	caif_assert(cnfg != NULL);
+	caif_assert(phyid != 0);
+	phyinfo = &cnfg->phy_layers[phyid];
+	caif_assert(phyinfo != NULL);
+	caif_assert(phyinfo->id == phyid);
+	caif_assert(phyinfo->phy_layer != NULL);
+	caif_assert(phyinfo->phy_layer->id == phyid);
+
+	if (phyinfo != NULL &&
+	    phyinfo->phy_ref_count++ == 0 &&
+	    phyinfo->phy_layer != NULL &&
+	    phyinfo->phy_layer->modemcmd != NULL) {
+		caif_assert(phyinfo->phy_layer->id == phyid);
+		phyinfo->phy_layer->modemcmd(phyinfo->phy_layer,
+					     _CAIF_MODEMCMD_PHYIF_USEFULL);
+
+	}
+	adapt_layer->id = linkid;
+
+	switch (serv) {
+	case CFCTRL_SRV_VEI:
+		servicel = cfvei_create(linkid, &phyinfo->dev_info);
+		break;
+	case CFCTRL_SRV_DATAGRAM:
+		servicel = cfdgml_create(linkid, &phyinfo->dev_info);
+		break;
+	case CFCTRL_SRV_RFM:
+		servicel = cfrfml_create(linkid, &phyinfo->dev_info);
+		break;
+	case CFCTRL_SRV_UTIL:
+		servicel = cfutill_create(linkid, &phyinfo->dev_info);
+		break;
+	case CFCTRL_SRV_VIDEO:
+		servicel = cfvidl_create(linkid, &phyinfo->dev_info);
+		break;
+	case CFCTRL_SRV_DBG:
+		servicel = cfdbgl_create(linkid, &phyinfo->dev_info);
+		break;
+	default:
+		pr_err("CAIF: %s(): Protocol error. "
+			"Link setup response - unknown channel type\n",
+			__func__);
+		return;
+	}
+	if (!servicel) {
+		pr_warning("CAIF: %s(): Out of memory\n", __func__);
+		return;
+	}
+	layer_set_dn(servicel, cnfg->mux);
+	cfmuxl_set_uplayer(cnfg->mux, servicel, linkid);
+	layer_set_up(servicel, adapt_layer);
+	layer_set_dn(adapt_layer, servicel);
+	servicel->ctrlcmd(servicel, CAIF_CTRLCMD_INIT_RSP, 0);
+}
+
+void
+cfcnfg_add_phy_layer(struct cfcnfg *cnfg, enum cfcnfg_phy_type phy_type,
+		     void *dev, struct cflayer *phy_layer, u16 *phyid,
+		     enum cfcnfg_phy_preference pref,
+		     bool fcs, bool stx)
+{
+	struct cflayer *frml;
+	struct cflayer *phy_driver = NULL;
+	int i;
+
+
+	if (cnfg->phy_layers[cnfg->last_phyid].frm_layer == NULL) {
+		*phyid = cnfg->last_phyid;
+
+		/* range: * 1..(MAX_PHY_LAYERS-1) */
+		cnfg->last_phyid =
+		    (cnfg->last_phyid % (MAX_PHY_LAYERS - 1)) + 1;
+	} else {
+		*phyid = 0;
+		for (i = 1; i < MAX_PHY_LAYERS; i++) {
+			if (cnfg->phy_layers[i].frm_layer == NULL) {
+				*phyid = i;
+				break;
+			}
+		}
+	}
+	if (*phyid == 0) {
+		pr_err("CAIF: %s(): No Available PHY ID\n", __func__);
+		return;
+	}
+
+	switch (phy_type) {
+	case CFPHYTYPE_FRAG:
+		phy_driver =
+		    cfserl_create(CFPHYTYPE_FRAG, *phyid, stx);
+		if (!phy_driver) {
+			pr_warning("CAIF: %s(): Out of memory\n", __func__);
+			return;
+		}
+
+		break;
+	case CFPHYTYPE_CAIF:
+		phy_driver = NULL;
+		break;
+	default:
+		pr_err("CAIF: %s(): %d", __func__, phy_type);
+		return;
+		break;
+	}
+
+	phy_layer->id = *phyid;
+	cnfg->phy_layers[*phyid].pref = pref;
+	cnfg->phy_layers[*phyid].id = *phyid;
+	cnfg->phy_layers[*phyid].dev_info.id = *phyid;
+	cnfg->phy_layers[*phyid].dev_info.dev = dev;
+	cnfg->phy_layers[*phyid].phy_layer = phy_layer;
+	cnfg->phy_layers[*phyid].phy_ref_count = 0;
+	phy_layer->type = phy_type;
+	frml = cffrml_create(*phyid, fcs);
+	if (!frml) {
+		pr_warning("CAIF: %s(): Out of memory\n", __func__);
+		return;
+	}
+	cnfg->phy_layers[*phyid].frm_layer = frml;
+	cfmuxl_set_dnlayer(cnfg->mux, frml, *phyid);
+	layer_set_up(frml, cnfg->mux);
+
+	if (phy_driver != NULL) {
+		phy_driver->id = *phyid;
+		layer_set_dn(frml, phy_driver);
+		layer_set_up(phy_driver, frml);
+		layer_set_dn(phy_driver, phy_layer);
+		layer_set_up(phy_layer, phy_driver);
+	} else {
+		layer_set_dn(frml, phy_layer);
+		layer_set_up(phy_layer, frml);
+	}
+}
+EXPORT_SYMBOL(cfcnfg_add_phy_layer);
+
+int cfcnfg_del_phy_layer(struct cfcnfg *cnfg, struct cflayer *phy_layer)
+{
+	struct cflayer *frml, *frml_dn;
+	u16 phyid;
+	phyid = phy_layer->id;
+	caif_assert(phyid == cnfg->phy_layers[phyid].id);
+	caif_assert(phy_layer == cnfg->phy_layers[phyid].phy_layer);
+	caif_assert(phy_layer->id == phyid);
+	caif_assert(cnfg->phy_layers[phyid].frm_layer->id == phyid);
+
+	memset(&cnfg->phy_layers[phy_layer->id], 0,
+	       sizeof(struct cfcnfg_phyinfo));
+	frml = cfmuxl_remove_dnlayer(cnfg->mux, phy_layer->id);
+	frml_dn = frml->dn;
+	cffrml_set_uplayer(frml, NULL);
+	cffrml_set_dnlayer(frml, NULL);
+	kfree(frml);
+
+	if (phy_layer != frml_dn) {
+		layer_set_up(frml_dn, NULL);
+		layer_set_dn(frml_dn, NULL);
+		kfree(frml_dn);
+	}
+	layer_set_up(phy_layer, NULL);
+	return 0;
+}
+EXPORT_SYMBOL(cfcnfg_del_phy_layer);
diff --git a/net/caif/cfpkt_skbuff.c b/net/caif/cfpkt_skbuff.c
new file mode 100644
index 000000000000..83fff2ff6658
--- /dev/null
+++ b/net/caif/cfpkt_skbuff.c
@@ -0,0 +1,571 @@
+/*
+ * Copyright (C) ST-Ericsson AB 2010
+ * Author:	Sjur Brendeland/sjur.brandeland@stericsson.com
+ * License terms: GNU General Public License (GPL) version 2
+ */
+
+#include <linux/string.h>
+#include <linux/skbuff.h>
+#include <linux/hardirq.h>
+#include <net/caif/cfpkt.h>
+
+#define PKT_PREFIX CAIF_NEEDED_HEADROOM
+#define PKT_POSTFIX CAIF_NEEDED_TAILROOM
+#define PKT_LEN_WHEN_EXTENDING 128
+#define PKT_ERROR(pkt, errmsg) do {	   \
+    cfpkt_priv(pkt)->erronous = true;	   \
+    skb_reset_tail_pointer(&pkt->skb);	   \
+    pr_warning("CAIF: " errmsg);\
+  } while (0)
+
+struct cfpktq {
+	struct sk_buff_head head;
+	atomic_t count;
+	/* Lock protects count updates */
+	spinlock_t lock;
+};
+
+/*
+ * net/caif/ is generic and does not
+ * understand SKB, so we do this typecast
+ */
+struct cfpkt {
+	struct sk_buff skb;
+};
+
+/* Private data inside SKB */
+struct cfpkt_priv_data {
+	struct dev_info dev_info;
+	bool erronous;
+};
+
+inline struct cfpkt_priv_data *cfpkt_priv(struct cfpkt *pkt)
+{
+	return (struct cfpkt_priv_data *) pkt->skb.cb;
+}
+
+inline bool is_erronous(struct cfpkt *pkt)
+{
+	return cfpkt_priv(pkt)->erronous;
+}
+
+inline struct sk_buff *pkt_to_skb(struct cfpkt *pkt)
+{
+	return &pkt->skb;
+}
+
+inline struct cfpkt *skb_to_pkt(struct sk_buff *skb)
+{
+	return (struct cfpkt *) skb;
+}
+
+
+struct cfpkt *cfpkt_fromnative(enum caif_direction dir, void *nativepkt)
+{
+	struct cfpkt *pkt = skb_to_pkt(nativepkt);
+	cfpkt_priv(pkt)->erronous = false;
+	return pkt;
+}
+EXPORT_SYMBOL(cfpkt_fromnative);
+
+void *cfpkt_tonative(struct cfpkt *pkt)
+{
+	return (void *) pkt;
+}
+EXPORT_SYMBOL(cfpkt_tonative);
+
+static struct cfpkt *cfpkt_create_pfx(u16 len, u16 pfx)
+{
+	struct sk_buff *skb;
+
+	if (likely(in_interrupt()))
+		skb = alloc_skb(len + pfx, GFP_ATOMIC);
+	else
+		skb = alloc_skb(len + pfx, GFP_KERNEL);
+
+	if (unlikely(skb == NULL))
+		return NULL;
+
+	skb_reserve(skb, pfx);
+	return skb_to_pkt(skb);
+}
+
+inline struct cfpkt *cfpkt_create(u16 len)
+{
+	return cfpkt_create_pfx(len + PKT_POSTFIX, PKT_PREFIX);
+}
+EXPORT_SYMBOL(cfpkt_create);
+
+void cfpkt_destroy(struct cfpkt *pkt)
+{
+	struct sk_buff *skb = pkt_to_skb(pkt);
+	kfree_skb(skb);
+}
+EXPORT_SYMBOL(cfpkt_destroy);
+
+inline bool cfpkt_more(struct cfpkt *pkt)
+{
+	struct sk_buff *skb = pkt_to_skb(pkt);
+	return skb->len > 0;
+}
+EXPORT_SYMBOL(cfpkt_more);
+
+int cfpkt_peek_head(struct cfpkt *pkt, void *data, u16 len)
+{
+	struct sk_buff *skb = pkt_to_skb(pkt);
+	if (skb_headlen(skb) >= len) {
+		memcpy(data, skb->data, len);
+		return 0;
+	}
+	return !cfpkt_extr_head(pkt, data, len) &&
+	    !cfpkt_add_head(pkt, data, len);
+}
+EXPORT_SYMBOL(cfpkt_peek_head);
+
+int cfpkt_extr_head(struct cfpkt *pkt, void *data, u16 len)
+{
+	struct sk_buff *skb = pkt_to_skb(pkt);
+	u8 *from;
+	if (unlikely(is_erronous(pkt)))
+		return -EPROTO;
+
+	if (unlikely(len > skb->len)) {
+		PKT_ERROR(pkt, "cfpkt_extr_head read beyond end of packet\n");
+		return -EPROTO;
+	}
+
+	if (unlikely(len > skb_headlen(skb))) {
+		if (unlikely(skb_linearize(skb) != 0)) {
+			PKT_ERROR(pkt, "cfpkt_extr_head linearize failed\n");
+			return -EPROTO;
+		}
+	}
+	from = skb_pull(skb, len);
+	from -= len;
+	memcpy(data, from, len);
+	return 0;
+}
+EXPORT_SYMBOL(cfpkt_extr_head);
+
+int cfpkt_extr_trail(struct cfpkt *pkt, void *dta, u16 len)
+{
+	struct sk_buff *skb = pkt_to_skb(pkt);
+	u8 *data = dta;
+	u8 *from;
+	if (unlikely(is_erronous(pkt)))
+		return -EPROTO;
+
+	if (unlikely(skb_linearize(skb) != 0)) {
+		PKT_ERROR(pkt, "cfpkt_extr_trail linearize failed\n");
+		return -EPROTO;
+	}
+	if (unlikely(skb->data + len > skb_tail_pointer(skb))) {
+		PKT_ERROR(pkt, "cfpkt_extr_trail read beyond end of packet\n");
+		return -EPROTO;
+	}
+	from = skb_tail_pointer(skb) - len;
+	skb_trim(skb, skb->len - len);
+	memcpy(data, from, len);
+	return 0;
+}
+EXPORT_SYMBOL(cfpkt_extr_trail);
+
+int cfpkt_pad_trail(struct cfpkt *pkt, u16 len)
+{
+	return cfpkt_add_body(pkt, NULL, len);
+}
+EXPORT_SYMBOL(cfpkt_pad_trail);
+
+int cfpkt_add_body(struct cfpkt *pkt, const void *data, u16 len)
+{
+	struct sk_buff *skb = pkt_to_skb(pkt);
+	struct sk_buff *lastskb;
+	u8 *to;
+	u16 addlen = 0;
+
+
+	if (unlikely(is_erronous(pkt)))
+		return -EPROTO;
+
+	lastskb = skb;
+
+	/* Check whether we need to add space at the tail */
+	if (unlikely(skb_tailroom(skb) < len)) {
+		if (likely(len < PKT_LEN_WHEN_EXTENDING))
+			addlen = PKT_LEN_WHEN_EXTENDING;
+		else
+			addlen = len;
+	}
+
+	/* Check whether we need to change the SKB before writing to the tail */
+	if (unlikely((addlen > 0) || skb_cloned(skb) || skb_shared(skb))) {
+
+		/* Make sure data is writable */
+		if (unlikely(skb_cow_data(skb, addlen, &lastskb) < 0)) {
+			PKT_ERROR(pkt, "cfpkt_add_body: cow failed\n");
+			return -EPROTO;
+		}
+		/*
+		 * Is the SKB non-linear after skb_cow_data()? If so, we are
+		 * going to add data to the last SKB, so we need to adjust
+		 * lengths of the top SKB.
+		 */
+		if (lastskb != skb) {
+			pr_warning("CAIF: %s(): Packet is non-linear\n",
+				   __func__);
+			skb->len += len;
+			skb->data_len += len;
+		}
+	}
+
+	/* All set to put the last SKB and optionally write data there. */
+	to = skb_put(lastskb, len);
+	if (likely(data))
+		memcpy(to, data, len);
+	return 0;
+}
+EXPORT_SYMBOL(cfpkt_add_body);
+
+inline int cfpkt_addbdy(struct cfpkt *pkt, u8 data)
+{
+	return cfpkt_add_body(pkt, &data, 1);
+}
+EXPORT_SYMBOL(cfpkt_addbdy);
+
+int cfpkt_add_head(struct cfpkt *pkt, const void *data2, u16 len)
+{
+	struct sk_buff *skb = pkt_to_skb(pkt);
+	struct sk_buff *lastskb;
+	u8 *to;
+	const u8 *data = data2;
+	if (unlikely(is_erronous(pkt)))
+		return -EPROTO;
+	if (unlikely(skb_headroom(skb) < len)) {
+		PKT_ERROR(pkt, "cfpkt_add_head: no headroom\n");
+		return -EPROTO;
+	}
+
+	/* Make sure data is writable */
+	if (unlikely(skb_cow_data(skb, 0, &lastskb) < 0)) {
+		PKT_ERROR(pkt, "cfpkt_add_head: cow failed\n");
+		return -EPROTO;
+	}
+
+	to = skb_push(skb, len);
+	memcpy(to, data, len);
+	return 0;
+}
+EXPORT_SYMBOL(cfpkt_add_head);
+
+inline int cfpkt_add_trail(struct cfpkt *pkt, const void *data, u16 len)
+{
+	return cfpkt_add_body(pkt, data, len);
+}
+EXPORT_SYMBOL(cfpkt_add_trail);
+
+inline u16 cfpkt_getlen(struct cfpkt *pkt)
+{
+	struct sk_buff *skb = pkt_to_skb(pkt);
+	return skb->len;
+}
+EXPORT_SYMBOL(cfpkt_getlen);
+
+inline u16 cfpkt_iterate(struct cfpkt *pkt,
+			    u16 (*iter_func)(u16, void *, u16),
+			    u16 data)
+{
+	/*
+	 * Don't care about the performance hit of linearizing,
+	 * Checksum should not be used on high-speed interfaces anyway.
+	 */
+	if (unlikely(is_erronous(pkt)))
+		return -EPROTO;
+	if (unlikely(skb_linearize(&pkt->skb) != 0)) {
+		PKT_ERROR(pkt, "cfpkt_iterate: linearize failed\n");
+		return -EPROTO;
+	}
+	return iter_func(data, pkt->skb.data, cfpkt_getlen(pkt));
+}
+EXPORT_SYMBOL(cfpkt_iterate);
+
+int cfpkt_setlen(struct cfpkt *pkt, u16 len)
+{
+	struct sk_buff *skb = pkt_to_skb(pkt);
+
+
+	if (unlikely(is_erronous(pkt)))
+		return -EPROTO;
+
+	if (likely(len <= skb->len)) {
+		if (unlikely(skb->data_len))
+			___pskb_trim(skb, len);
+		else
+			skb_trim(skb, len);
+
+			return cfpkt_getlen(pkt);
+	}
+
+	/* Need to expand SKB */
+	if (unlikely(!cfpkt_pad_trail(pkt, len - skb->len)))
+		PKT_ERROR(pkt, "cfpkt_setlen: skb_pad_trail failed\n");
+
+	return cfpkt_getlen(pkt);
+}
+EXPORT_SYMBOL(cfpkt_setlen);
+
+struct cfpkt *cfpkt_create_uplink(const unsigned char *data, unsigned int len)
+{
+	struct cfpkt *pkt = cfpkt_create_pfx(len + PKT_POSTFIX, PKT_PREFIX);
+	if (unlikely(data != NULL))
+		cfpkt_add_body(pkt, data, len);
+	return pkt;
+}
+EXPORT_SYMBOL(cfpkt_create_uplink);
+
+struct cfpkt *cfpkt_append(struct cfpkt *dstpkt,
+			     struct cfpkt *addpkt,
+			     u16 expectlen)
+{
+	struct sk_buff *dst = pkt_to_skb(dstpkt);
+	struct sk_buff *add = pkt_to_skb(addpkt);
+	u16 addlen = skb_headlen(add);
+	u16 neededtailspace;
+	struct sk_buff *tmp;
+	u16 dstlen;
+	u16 createlen;
+	if (unlikely(is_erronous(dstpkt) || is_erronous(addpkt))) {
+		cfpkt_destroy(addpkt);
+		return dstpkt;
+	}
+	if (expectlen > addlen)
+		neededtailspace = expectlen;
+	else
+		neededtailspace = addlen;
+
+	if (dst->tail + neededtailspace > dst->end) {
+		/* Create a dumplicate of 'dst' with more tail space */
+		dstlen = skb_headlen(dst);
+		createlen = dstlen + neededtailspace;
+		tmp = pkt_to_skb(
+			cfpkt_create(createlen + PKT_PREFIX + PKT_POSTFIX));
+		if (!tmp)
+			return NULL;
+		skb_set_tail_pointer(tmp, dstlen);
+		tmp->len = dstlen;
+		memcpy(tmp->data, dst->data, dstlen);
+		cfpkt_destroy(dstpkt);
+		dst = tmp;
+	}
+	memcpy(skb_tail_pointer(dst), add->data, skb_headlen(add));
+	cfpkt_destroy(addpkt);
+	dst->tail += addlen;
+	dst->len += addlen;
+	return skb_to_pkt(dst);
+}
+EXPORT_SYMBOL(cfpkt_append);
+
+struct cfpkt *cfpkt_split(struct cfpkt *pkt, u16 pos)
+{
+	struct sk_buff *skb2;
+	struct sk_buff *skb = pkt_to_skb(pkt);
+	u8 *split = skb->data + pos;
+	u16 len2nd = skb_tail_pointer(skb) - split;
+
+	if (unlikely(is_erronous(pkt)))
+		return NULL;
+
+	if (skb->data + pos > skb_tail_pointer(skb)) {
+		PKT_ERROR(pkt,
+			  "cfpkt_split: trying to split beyond end of packet");
+		return NULL;
+	}
+
+	/* Create a new packet for the second part of the data */
+	skb2 = pkt_to_skb(
+		cfpkt_create_pfx(len2nd + PKT_PREFIX + PKT_POSTFIX,
+				 PKT_PREFIX));
+
+	if (skb2 == NULL)
+		return NULL;
+
+	/* Reduce the length of the original packet */
+	skb_set_tail_pointer(skb, pos);
+	skb->len = pos;
+
+	memcpy(skb2->data, split, len2nd);
+	skb2->tail += len2nd;
+	skb2->len += len2nd;
+	return skb_to_pkt(skb2);
+}
+EXPORT_SYMBOL(cfpkt_split);
+
+char *cfpkt_log_pkt(struct cfpkt *pkt, char *buf, int buflen)
+{
+	struct sk_buff *skb = pkt_to_skb(pkt);
+	char *p = buf;
+	int i;
+
+	/*
+	 * Sanity check buffer length, it needs to be at least as large as
+	 * the header info: ~=50+ bytes
+	 */
+	if (buflen < 50)
+		return NULL;
+
+	snprintf(buf, buflen, "%s: pkt:%p len:%ld(%ld+%ld) {%ld,%ld} data: [",
+		is_erronous(pkt) ? "ERRONOUS-SKB" :
+		 (skb->data_len != 0 ? "COMPLEX-SKB" : "SKB"),
+		 skb,
+		 (long) skb->len,
+		 (long) (skb_tail_pointer(skb) - skb->data),
+		 (long) skb->data_len,
+		 (long) (skb->data - skb->head),
+		 (long) (skb_tail_pointer(skb) - skb->head));
+	p = buf + strlen(buf);
+
+	for (i = 0; i < skb_tail_pointer(skb) - skb->data && i < 300; i++) {
+		if (p > buf + buflen - 10) {
+			sprintf(p, "...");
+			p = buf + strlen(buf);
+			break;
+		}
+		sprintf(p, "%02x,", skb->data[i]);
+		p = buf + strlen(buf);
+	}
+	sprintf(p, "]\n");
+	return buf;
+}
+EXPORT_SYMBOL(cfpkt_log_pkt);
+
+int cfpkt_raw_append(struct cfpkt *pkt, void **buf, unsigned int buflen)
+{
+	struct sk_buff *skb = pkt_to_skb(pkt);
+	struct sk_buff *lastskb;
+
+	caif_assert(buf != NULL);
+	if (unlikely(is_erronous(pkt)))
+		return -EPROTO;
+	/* Make sure SKB is writable */
+	if (unlikely(skb_cow_data(skb, 0, &lastskb) < 0)) {
+		PKT_ERROR(pkt, "cfpkt_raw_append: skb_cow_data failed\n");
+		return -EPROTO;
+	}
+
+	if (unlikely(skb_linearize(skb) != 0)) {
+		PKT_ERROR(pkt, "cfpkt_raw_append: linearize failed\n");
+		return -EPROTO;
+	}
+
+	if (unlikely(skb_tailroom(skb) < buflen)) {
+		PKT_ERROR(pkt, "cfpkt_raw_append: buffer too short - failed\n");
+		return -EPROTO;
+	}
+
+	*buf = skb_put(skb, buflen);
+	return 1;
+}
+EXPORT_SYMBOL(cfpkt_raw_append);
+
+int cfpkt_raw_extract(struct cfpkt *pkt, void **buf, unsigned int buflen)
+{
+	struct sk_buff *skb = pkt_to_skb(pkt);
+
+	caif_assert(buf != NULL);
+	if (unlikely(is_erronous(pkt)))
+		return -EPROTO;
+
+	if (unlikely(buflen > skb->len)) {
+		PKT_ERROR(pkt, "cfpkt_raw_extract: buflen too large "
+				"- failed\n");
+		return -EPROTO;
+	}
+
+	if (unlikely(buflen > skb_headlen(skb))) {
+		if (unlikely(skb_linearize(skb) != 0)) {
+			PKT_ERROR(pkt, "cfpkt_raw_extract: linearize failed\n");
+			return -EPROTO;
+		}
+	}
+
+	*buf = skb->data;
+	skb_pull(skb, buflen);
+
+	return 1;
+}
+EXPORT_SYMBOL(cfpkt_raw_extract);
+
+inline bool cfpkt_erroneous(struct cfpkt *pkt)
+{
+	return cfpkt_priv(pkt)->erronous;
+}
+EXPORT_SYMBOL(cfpkt_erroneous);
+
+struct cfpktq *cfpktq_create(void)
+{
+	struct cfpktq *q = kmalloc(sizeof(struct cfpktq), GFP_ATOMIC);
+	if (!q)
+		return NULL;
+	skb_queue_head_init(&q->head);
+	atomic_set(&q->count, 0);
+	spin_lock_init(&q->lock);
+	return q;
+}
+EXPORT_SYMBOL(cfpktq_create);
+
+void cfpkt_queue(struct cfpktq *pktq, struct cfpkt *pkt, unsigned short prio)
+{
+	atomic_inc(&pktq->count);
+	spin_lock(&pktq->lock);
+	skb_queue_tail(&pktq->head, pkt_to_skb(pkt));
+	spin_unlock(&pktq->lock);
+
+}
+EXPORT_SYMBOL(cfpkt_queue);
+
+struct cfpkt *cfpkt_qpeek(struct cfpktq *pktq)
+{
+	struct cfpkt *tmp;
+	spin_lock(&pktq->lock);
+	tmp = skb_to_pkt(skb_peek(&pktq->head));
+	spin_unlock(&pktq->lock);
+	return tmp;
+}
+EXPORT_SYMBOL(cfpkt_qpeek);
+
+struct cfpkt *cfpkt_dequeue(struct cfpktq *pktq)
+{
+	struct cfpkt *pkt;
+	spin_lock(&pktq->lock);
+	pkt = skb_to_pkt(skb_dequeue(&pktq->head));
+	if (pkt) {
+		atomic_dec(&pktq->count);
+		caif_assert(atomic_read(&pktq->count) >= 0);
+	}
+	spin_unlock(&pktq->lock);
+	return pkt;
+}
+EXPORT_SYMBOL(cfpkt_dequeue);
+
+int cfpkt_qcount(struct cfpktq *pktq)
+{
+	return atomic_read(&pktq->count);
+}
+EXPORT_SYMBOL(cfpkt_qcount);
+
+struct cfpkt *cfpkt_clone_release(struct cfpkt *pkt)
+{
+	struct cfpkt *clone;
+	clone  = skb_to_pkt(skb_clone(pkt_to_skb(pkt), GFP_ATOMIC));
+	/* Free original packet. */
+	cfpkt_destroy(pkt);
+	if (!clone)
+		return NULL;
+	return clone;
+}
+EXPORT_SYMBOL(cfpkt_clone_release);
+
+struct caif_payload_info *cfpkt_info(struct cfpkt *pkt)
+{
+	return (struct caif_payload_info *)&pkt_to_skb(pkt)->cb;
+}
+EXPORT_SYMBOL(cfpkt_info);
-- 
cgit v1.2.2


From c72dfae2f77620e5b3fcee1beeee7e536a42b2ad Mon Sep 17 00:00:00 2001
From: Sjur Braendeland <sjur.brandeland@stericsson.com>
Date: Tue, 30 Mar 2010 13:56:25 +0000
Subject: net-caif: add CAIF device registration functionality

Registration and deregistration of CAIF Link Layer.

Signed-off-by: Sjur Braendeland <sjur.brandeland@stericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/caif/caif_config_util.c |  87 ++++++++++
 net/caif/caif_dev.c         | 413 ++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 500 insertions(+)
 create mode 100644 net/caif/caif_config_util.c
 create mode 100644 net/caif/caif_dev.c

(limited to 'net')

diff --git a/net/caif/caif_config_util.c b/net/caif/caif_config_util.c
new file mode 100644
index 000000000000..6f36580366f0
--- /dev/null
+++ b/net/caif/caif_config_util.c
@@ -0,0 +1,87 @@
+/*
+ * Copyright (C) ST-Ericsson AB 2010
+ * Author:	Sjur Brendeland sjur.brandeland@stericsson.com
+ * License terms: GNU General Public License (GPL) version 2
+ */
+
+#include <linux/module.h>
+#include <linux/spinlock.h>
+#include <net/caif/cfctrl.h>
+#include <net/caif/cfcnfg.h>
+#include <net/caif/caif_dev.h>
+
+int connect_req_to_link_param(struct cfcnfg *cnfg,
+				struct caif_connect_request *s,
+				struct cfctrl_link_param *l)
+{
+	struct dev_info *dev_info;
+	enum cfcnfg_phy_preference pref;
+	memset(l, 0, sizeof(*l));
+	l->priority = s->priority;
+
+	if (s->link_name[0] != '\0')
+		l->phyid = cfcnfg_get_named(cnfg, s->link_name);
+	else {
+		switch (s->link_selector) {
+		case CAIF_LINK_HIGH_BANDW:
+			pref = CFPHYPREF_HIGH_BW;
+			break;
+		case CAIF_LINK_LOW_LATENCY:
+			pref = CFPHYPREF_LOW_LAT;
+			break;
+		default:
+			return -EINVAL;
+		}
+		dev_info = cfcnfg_get_phyid(cnfg, pref);
+		if (dev_info == NULL)
+			return -ENODEV;
+		l->phyid = dev_info->id;
+	}
+	switch (s->protocol) {
+	case CAIFPROTO_AT:
+		l->linktype = CFCTRL_SRV_VEI;
+		if (s->sockaddr.u.at.type == CAIF_ATTYPE_PLAIN)
+			l->chtype = 0x02;
+		else
+			l->chtype = s->sockaddr.u.at.type;
+		l->endpoint = 0x00;
+		break;
+	case CAIFPROTO_DATAGRAM:
+		l->linktype = CFCTRL_SRV_DATAGRAM;
+		l->chtype = 0x00;
+		l->u.datagram.connid = s->sockaddr.u.dgm.connection_id;
+		break;
+	case CAIFPROTO_DATAGRAM_LOOP:
+		l->linktype = CFCTRL_SRV_DATAGRAM;
+		l->chtype = 0x03;
+		l->endpoint = 0x00;
+		l->u.datagram.connid = s->sockaddr.u.dgm.connection_id;
+		break;
+	case CAIFPROTO_RFM:
+		l->linktype = CFCTRL_SRV_RFM;
+		l->u.datagram.connid = s->sockaddr.u.rfm.connection_id;
+		strncpy(l->u.rfm.volume, s->sockaddr.u.rfm.volume,
+			sizeof(l->u.rfm.volume)-1);
+		l->u.rfm.volume[sizeof(l->u.rfm.volume)-1] = 0;
+		break;
+	case CAIFPROTO_UTIL:
+		l->linktype = CFCTRL_SRV_UTIL;
+		l->endpoint = 0x00;
+		l->chtype = 0x00;
+		strncpy(l->u.utility.name, s->sockaddr.u.util.service,
+			sizeof(l->u.utility.name)-1);
+		l->u.utility.name[sizeof(l->u.utility.name)-1] = 0;
+		caif_assert(sizeof(l->u.utility.name) > 10);
+		l->u.utility.paramlen = s->param.size;
+		if (l->u.utility.paramlen > sizeof(l->u.utility.params))
+			l->u.utility.paramlen = sizeof(l->u.utility.params);
+
+		memcpy(l->u.utility.params, s->param.data,
+		       l->u.utility.paramlen);
+
+		break;
+	default:
+		return -EINVAL;
+	}
+	return 0;
+}
diff --git a/net/caif/caif_dev.c b/net/caif/caif_dev.c
new file mode 100644
index 000000000000..e84837e1bc86
--- /dev/null
+++ b/net/caif/caif_dev.c
@@ -0,0 +1,413 @@
+/*
+ * CAIF Interface registration.
+ * Copyright (C) ST-Ericsson AB 2010
+ * Author:	Sjur Brendeland/sjur.brandeland@stericsson.com
+ * License terms: GNU General Public License (GPL) version 2
+ *
+ * Borrowed heavily from file: pn_dev.c. Thanks to
+ *  Remi Denis-Courmont <remi.denis-courmont@nokia.com>
+ *  and Sakari Ailus <sakari.ailus@nokia.com>
+ */
+
+#include <linux/version.h>
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/if_arp.h>
+#include <linux/net.h>
+#include <linux/netdevice.h>
+#include <linux/skbuff.h>
+#include <linux/sched.h>
+#include <linux/wait.h>
+#include <net/netns/generic.h>
+#include <net/net_namespace.h>
+#include <net/pkt_sched.h>
+#include <net/caif/caif_device.h>
+#include <net/caif/caif_dev.h>
+#include <net/caif/caif_layer.h>
+#include <net/caif/cfpkt.h>
+#include <net/caif/cfcnfg.h>
+
+MODULE_LICENSE("GPL");
+#define TIMEOUT (HZ*5)
+
+/* Used for local tracking of the CAIF net devices */
+struct caif_device_entry {
+	struct cflayer layer;
+	struct list_head list;
+	atomic_t in_use;
+	atomic_t state;
+	u16 phyid;
+	struct net_device *netdev;
+	wait_queue_head_t event;
+};
+
+struct caif_device_entry_list {
+	struct list_head list;
+	/* Protects simulanous deletes in list */
+	spinlock_t lock;
+};
+
+struct caif_net {
+	struct caif_device_entry_list caifdevs;
+};
+
+static int caif_net_id;
+static struct cfcnfg *cfg;
+
+static struct caif_device_entry_list *caif_device_list(struct net *net)
+{
+	struct caif_net *caifn;
+	BUG_ON(!net);
+	caifn = net_generic(net, caif_net_id);
+	BUG_ON(!caifn);
+	return &caifn->caifdevs;
+}
+
+/* Allocate new CAIF device. */
+static struct caif_device_entry *caif_device_alloc(struct net_device *dev)
+{
+	struct caif_device_entry_list *caifdevs;
+	struct caif_device_entry *caifd;
+	caifdevs = caif_device_list(dev_net(dev));
+	BUG_ON(!caifdevs);
+	caifd = kzalloc(sizeof(*caifd), GFP_ATOMIC);
+	if (!caifd)
+		return NULL;
+	caifd->netdev = dev;
+	list_add(&caifd->list, &caifdevs->list);
+	init_waitqueue_head(&caifd->event);
+	return caifd;
+}
+
+static struct caif_device_entry *caif_get(struct net_device *dev)
+{
+	struct caif_device_entry_list *caifdevs =
+	    caif_device_list(dev_net(dev));
+	struct caif_device_entry *caifd;
+	BUG_ON(!caifdevs);
+	list_for_each_entry(caifd, &caifdevs->list, list) {
+		if (caifd->netdev == dev)
+			return caifd;
+	}
+	return NULL;
+}
+
+static void caif_device_destroy(struct net_device *dev)
+{
+	struct caif_device_entry_list *caifdevs =
+	    caif_device_list(dev_net(dev));
+	struct caif_device_entry *caifd;
+	ASSERT_RTNL();
+	if (dev->type != ARPHRD_CAIF)
+		return;
+
+	spin_lock_bh(&caifdevs->lock);
+	caifd = caif_get(dev);
+	if (caifd == NULL) {
+		spin_unlock_bh(&caifdevs->lock);
+		return;
+	}
+
+	list_del(&caifd->list);
+	spin_unlock_bh(&caifdevs->lock);
+
+	kfree(caifd);
+	return;
+}
+
+static int transmit(struct cflayer *layer, struct cfpkt *pkt)
+{
+	struct caif_device_entry *caifd =
+	    container_of(layer, struct caif_device_entry, layer);
+	struct sk_buff *skb, *skb2;
+	int ret = -EINVAL;
+	skb = cfpkt_tonative(pkt);
+	skb->dev = caifd->netdev;
+	/*
+	 * Don't allow SKB to be destroyed upon error, but signal resend
+	 * notification to clients. We can't rely on the return value as
+	 * congestion (NET_XMIT_CN) sometimes drops the packet, sometimes don't.
+	 */
+	if (netif_queue_stopped(caifd->netdev))
+		return -EAGAIN;
+	skb2 = skb_get(skb);
+
+	ret = dev_queue_xmit(skb2);
+
+	if (!ret)
+		kfree_skb(skb);
+	else
+		return -EAGAIN;
+
+	return 0;
+}
+
+static int modemcmd(struct cflayer *layr, enum caif_modemcmd ctrl)
+{
+	struct caif_device_entry *caifd;
+	struct caif_dev_common *caifdev;
+	caifd = container_of(layr, struct caif_device_entry, layer);
+	caifdev = netdev_priv(caifd->netdev);
+	if (ctrl == _CAIF_MODEMCMD_PHYIF_USEFULL) {
+		atomic_set(&caifd->in_use, 1);
+		wake_up_interruptible(&caifd->event);
+
+	} else if (ctrl == _CAIF_MODEMCMD_PHYIF_USELESS) {
+		atomic_set(&caifd->in_use, 0);
+		wake_up_interruptible(&caifd->event);
+	}
+	return 0;
+}
+
+/*
+ * Stuff received packets to associated sockets.
+ * On error, returns non-zero and releases the skb.
+ */
+static int receive(struct sk_buff *skb, struct net_device *dev,
+		   struct packet_type *pkttype, struct net_device *orig_dev)
+{
+	struct net *net;
+	struct cfpkt *pkt;
+	struct caif_device_entry *caifd;
+	net = dev_net(dev);
+	pkt = cfpkt_fromnative(CAIF_DIR_IN, skb);
+	caifd = caif_get(dev);
+	if (!caifd || !caifd->layer.up || !caifd->layer.up->ctrlcmd)
+		return NET_RX_DROP;
+
+	if (caifd->layer.up->receive(caifd->layer.up, pkt))
+		return NET_RX_DROP;
+
+	return 0;
+}
+
+static struct packet_type caif_packet_type __read_mostly = {
+	.type = cpu_to_be16(ETH_P_CAIF),
+	.func = receive,
+};
+
+static void dev_flowctrl(struct net_device *dev, int on)
+{
+	struct caif_device_entry *caifd = caif_get(dev);
+	if (!caifd || !caifd->layer.up || !caifd->layer.up->ctrlcmd)
+		return;
+
+	caifd->layer.up->ctrlcmd(caifd->layer.up,
+				 on ?
+				 _CAIF_CTRLCMD_PHYIF_FLOW_ON_IND :
+				 _CAIF_CTRLCMD_PHYIF_FLOW_OFF_IND,
+				 caifd->layer.id);
+}
+
+/* notify Caif of device events */
+static int caif_device_notify(struct notifier_block *me, unsigned long what,
+			      void *arg)
+{
+	struct net_device *dev = arg;
+	struct caif_device_entry *caifd = NULL;
+	struct caif_dev_common *caifdev;
+	enum cfcnfg_phy_preference pref;
+	int res = -EINVAL;
+	enum cfcnfg_phy_type phy_type;
+
+	if (dev->type != ARPHRD_CAIF)
+		return 0;
+
+	switch (what) {
+	case NETDEV_REGISTER:
+		pr_info("CAIF: %s():register %s\n", __func__, dev->name);
+		caifd = caif_device_alloc(dev);
+		if (caifd == NULL)
+			break;
+		caifdev = netdev_priv(dev);
+		caifdev->flowctrl = dev_flowctrl;
+		atomic_set(&caifd->state, what);
+		res = 0;
+		break;
+
+	case NETDEV_UP:
+		pr_info("CAIF: %s(): up %s\n", __func__, dev->name);
+		caifd = caif_get(dev);
+		if (caifd == NULL)
+			break;
+		caifdev = netdev_priv(dev);
+		if (atomic_read(&caifd->state) == NETDEV_UP) {
+			pr_info("CAIF: %s():%s already up\n",
+				__func__, dev->name);
+			break;
+		}
+		atomic_set(&caifd->state, what);
+		caifd->layer.transmit = transmit;
+		caifd->layer.modemcmd = modemcmd;
+
+		if (caifdev->use_frag)
+			phy_type = CFPHYTYPE_FRAG;
+		else
+			phy_type = CFPHYTYPE_CAIF;
+
+		switch (caifdev->link_select) {
+		case CAIF_LINK_HIGH_BANDW:
+			pref = CFPHYPREF_LOW_LAT;
+			break;
+		case CAIF_LINK_LOW_LATENCY:
+			pref = CFPHYPREF_HIGH_BW;
+			break;
+		default:
+			pref = CFPHYPREF_HIGH_BW;
+			break;
+		}
+
+		cfcnfg_add_phy_layer(get_caif_conf(),
+				     phy_type,
+				     dev,
+				     &caifd->layer,
+				     &caifd->phyid,
+				     pref,
+				     caifdev->use_fcs,
+				     caifdev->use_stx);
+		strncpy(caifd->layer.name, dev->name,
+			sizeof(caifd->layer.name) - 1);
+		caifd->layer.name[sizeof(caifd->layer.name) - 1] = 0;
+		break;
+
+	case NETDEV_GOING_DOWN:
+		caifd = caif_get(dev);
+		if (caifd == NULL)
+			break;
+		pr_info("CAIF: %s():going down %s\n", __func__, dev->name);
+
+		if (atomic_read(&caifd->state) == NETDEV_GOING_DOWN ||
+			atomic_read(&caifd->state) == NETDEV_DOWN)
+			break;
+
+		atomic_set(&caifd->state, what);
+		if (!caifd || !caifd->layer.up || !caifd->layer.up->ctrlcmd)
+			return -EINVAL;
+		caifd->layer.up->ctrlcmd(caifd->layer.up,
+					 _CAIF_CTRLCMD_PHYIF_DOWN_IND,
+					 caifd->layer.id);
+		res = wait_event_interruptible_timeout(caifd->event,
+					atomic_read(&caifd->in_use) == 0,
+					TIMEOUT);
+		break;
+
+	case NETDEV_DOWN:
+		caifd = caif_get(dev);
+		if (caifd == NULL)
+			break;
+		pr_info("CAIF: %s(): down %s\n", __func__, dev->name);
+		if (atomic_read(&caifd->in_use))
+			pr_warning("CAIF: %s(): "
+				   "Unregistering an active CAIF device: %s\n",
+				   __func__, dev->name);
+		cfcnfg_del_phy_layer(get_caif_conf(), &caifd->layer);
+		atomic_set(&caifd->state, what);
+		break;
+
+	case NETDEV_UNREGISTER:
+		caifd = caif_get(dev);
+		pr_info("CAIF: %s(): unregister %s\n", __func__, dev->name);
+		atomic_set(&caifd->state, what);
+		caif_device_destroy(dev);
+		break;
+	}
+	return 0;
+}
+
+static struct notifier_block caif_device_notifier = {
+	.notifier_call = caif_device_notify,
+	.priority = 0,
+};
+
+
+struct cfcnfg *get_caif_conf(void)
+{
+	return cfg;
+}
+EXPORT_SYMBOL(get_caif_conf);
+
+int caif_connect_client(struct caif_connect_request *conn_req,
+			   struct cflayer *client_layer)
+{
+	struct cfctrl_link_param param;
+	if (connect_req_to_link_param(get_caif_conf(), conn_req, &param) == 0)
+		/* Hook up the adaptation layer. */
+		return cfcnfg_add_adaptation_layer(get_caif_conf(),
+						&param, client_layer);
+
+	return -EINVAL;
+
+	caif_assert(0);
+}
+EXPORT_SYMBOL(caif_connect_client);
+
+int caif_disconnect_client(struct cflayer *adap_layer)
+{
+	return cfcnfg_del_adapt_layer(get_caif_conf(), adap_layer);
+}
+EXPORT_SYMBOL(caif_disconnect_client);
+
+/* Per-namespace Caif devices handling */
+static int caif_init_net(struct net *net)
+{
+	struct caif_net *caifn = net_generic(net, caif_net_id);
+	INIT_LIST_HEAD(&caifn->caifdevs.list);
+	spin_lock_init(&caifn->caifdevs.lock);
+	return 0;
+}
+
+static void caif_exit_net(struct net *net)
+{
+	struct net_device *dev;
+	int res;
+	rtnl_lock();
+	for_each_netdev(net, dev) {
+		if (dev->type != ARPHRD_CAIF)
+			continue;
+		res = dev_close(dev);
+		caif_device_destroy(dev);
+	}
+	rtnl_unlock();
+}
+
+static struct pernet_operations caif_net_ops = {
+	.init = caif_init_net,
+	.exit = caif_exit_net,
+	.id   = &caif_net_id,
+	.size = sizeof(struct caif_net),
+};
+
+/* Initialize Caif devices list */
+static int __init caif_device_init(void)
+{
+	int result;
+	cfg = cfcnfg_create();
+	if (!cfg) {
+		pr_warning("CAIF: %s(): can't create cfcnfg.\n", __func__);
+		goto err_cfcnfg_create_failed;
+	}
+	result = register_pernet_device(&caif_net_ops);
+
+	if (result) {
+		kfree(cfg);
+		cfg = NULL;
+		return result;
+	}
+	dev_add_pack(&caif_packet_type);
+	register_netdevice_notifier(&caif_device_notifier);
+
+	return result;
+err_cfcnfg_create_failed:
+	return -ENODEV;
+}
+
+static void __exit caif_device_exit(void)
+{
+	dev_remove_pack(&caif_packet_type);
+	unregister_pernet_device(&caif_net_ops);
+	unregister_netdevice_notifier(&caif_device_notifier);
+	cfcnfg_remove(cfg);
+}
+
+module_init(caif_device_init);
+module_exit(caif_device_exit);
-- 
cgit v1.2.2


From e6f95ec8db312491235b4f06343fbd991a82ce20 Mon Sep 17 00:00:00 2001
From: Sjur Braendeland <sjur.brandeland@stericsson.com>
Date: Tue, 30 Mar 2010 13:56:26 +0000
Subject: net-caif: add CAIF socket implementation

Implementation of CAIF sockets for protocol and address family
PF_CAIF and AF_CAIF.
CAIF socket is connection oriented implementing SOCK_SEQPACKET
and SOCK_STREAM interface with supporting blocking and non-blocking mode.

Signed-off-by: Sjur Braendeland <sjur.brandeland@stericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/caif/caif_socket.c | 1391 ++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 1391 insertions(+)
 create mode 100644 net/caif/caif_socket.c

(limited to 'net')

diff --git a/net/caif/caif_socket.c b/net/caif/caif_socket.c
new file mode 100644
index 000000000000..cdf62b9fefac
--- /dev/null
+++ b/net/caif/caif_socket.c
@@ -0,0 +1,1391 @@
+/*
+ * Copyright (C) ST-Ericsson AB 2010
+ * Author:	Sjur Brendeland sjur.brandeland@stericsson.com
+ *		Per Sigmond per.sigmond@stericsson.com
+ * License terms: GNU General Public License (GPL) version 2
+ */
+
+#include <linux/fs.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/sched.h>
+#include <linux/spinlock.h>
+#include <linux/mutex.h>
+#include <linux/list.h>
+#include <linux/wait.h>
+#include <linux/poll.h>
+#include <linux/tcp.h>
+#include <linux/uaccess.h>
+#include <asm/atomic.h>
+
+#include <linux/caif/caif_socket.h>
+#include <net/caif/caif_layer.h>
+#include <net/caif/caif_dev.h>
+#include <net/caif/cfpkt.h>
+
+MODULE_LICENSE("GPL");
+
+#define CHNL_SKT_READ_QUEUE_HIGH 200
+#define CHNL_SKT_READ_QUEUE_LOW 100
+
+static int caif_sockbuf_size = 40000;
+static atomic_t caif_nr_socks = ATOMIC_INIT(0);
+
+#define CONN_STATE_OPEN_BIT	      1
+#define CONN_STATE_PENDING_BIT	      2
+#define CONN_STATE_PEND_DESTROY_BIT   3
+#define CONN_REMOTE_SHUTDOWN_BIT      4
+
+#define TX_FLOW_ON_BIT		      1
+#define RX_FLOW_ON_BIT		      2
+
+#define STATE_IS_OPEN(cf_sk) test_bit(CONN_STATE_OPEN_BIT,\
+				    (void *) &(cf_sk)->conn_state)
+#define STATE_IS_REMOTE_SHUTDOWN(cf_sk) test_bit(CONN_REMOTE_SHUTDOWN_BIT,\
+				    (void *) &(cf_sk)->conn_state)
+#define STATE_IS_PENDING(cf_sk) test_bit(CONN_STATE_PENDING_BIT,\
+				       (void *) &(cf_sk)->conn_state)
+#define STATE_IS_PENDING_DESTROY(cf_sk) test_bit(CONN_STATE_PEND_DESTROY_BIT,\
+				       (void *) &(cf_sk)->conn_state)
+
+#define SET_STATE_PENDING_DESTROY(cf_sk) set_bit(CONN_STATE_PEND_DESTROY_BIT,\
+				    (void *) &(cf_sk)->conn_state)
+#define SET_STATE_OPEN(cf_sk) set_bit(CONN_STATE_OPEN_BIT,\
+				    (void *) &(cf_sk)->conn_state)
+#define SET_STATE_CLOSED(cf_sk) clear_bit(CONN_STATE_OPEN_BIT,\
+					(void *) &(cf_sk)->conn_state)
+#define SET_PENDING_ON(cf_sk) set_bit(CONN_STATE_PENDING_BIT,\
+				    (void *) &(cf_sk)->conn_state)
+#define SET_PENDING_OFF(cf_sk) clear_bit(CONN_STATE_PENDING_BIT,\
+				       (void *) &(cf_sk)->conn_state)
+#define SET_REMOTE_SHUTDOWN(cf_sk) set_bit(CONN_REMOTE_SHUTDOWN_BIT,\
+				    (void *) &(cf_sk)->conn_state)
+
+#define SET_REMOTE_SHUTDOWN_OFF(dev) clear_bit(CONN_REMOTE_SHUTDOWN_BIT,\
+				    (void *) &(dev)->conn_state)
+#define RX_FLOW_IS_ON(cf_sk) test_bit(RX_FLOW_ON_BIT,\
+				    (void *) &(cf_sk)->flow_state)
+#define TX_FLOW_IS_ON(cf_sk) test_bit(TX_FLOW_ON_BIT,\
+				    (void *) &(cf_sk)->flow_state)
+
+#define SET_RX_FLOW_OFF(cf_sk) clear_bit(RX_FLOW_ON_BIT,\
+				       (void *) &(cf_sk)->flow_state)
+#define SET_RX_FLOW_ON(cf_sk) set_bit(RX_FLOW_ON_BIT,\
+				    (void *) &(cf_sk)->flow_state)
+#define SET_TX_FLOW_OFF(cf_sk) clear_bit(TX_FLOW_ON_BIT,\
+				       (void *) &(cf_sk)->flow_state)
+#define SET_TX_FLOW_ON(cf_sk) set_bit(TX_FLOW_ON_BIT,\
+				    (void *) &(cf_sk)->flow_state)
+
+#define SKT_READ_FLAG 0x01
+#define SKT_WRITE_FLAG 0x02
+static struct dentry *debugfsdir;
+#include <linux/debugfs.h>
+
+#ifdef CONFIG_DEBUG_FS
+struct debug_fs_counter {
+	atomic_t num_open;
+	atomic_t num_close;
+	atomic_t num_init;
+	atomic_t num_init_resp;
+	atomic_t num_init_fail_resp;
+	atomic_t num_deinit;
+	atomic_t num_deinit_resp;
+	atomic_t num_remote_shutdown_ind;
+	atomic_t num_tx_flow_off_ind;
+	atomic_t num_tx_flow_on_ind;
+	atomic_t num_rx_flow_off;
+	atomic_t num_rx_flow_on;
+	atomic_t skb_in_use;
+	atomic_t skb_alloc;
+	atomic_t skb_free;
+};
+static struct debug_fs_counter cnt;
+#define	dbfs_atomic_inc(v) atomic_inc(v)
+#define	dbfs_atomic_dec(v) atomic_dec(v)
+#else
+#define	dbfs_atomic_inc(v)
+#define	dbfs_atomic_dec(v)
+#endif
+
+/* The AF_CAIF socket */
+struct caifsock {
+	/* NOTE: sk has to be the first member */
+	struct sock sk;
+	struct cflayer layer;
+	char name[CAIF_LAYER_NAME_SZ];
+	u32 conn_state;
+	u32 flow_state;
+	struct cfpktq *pktq;
+	int file_mode;
+	struct caif_connect_request conn_req;
+	int read_queue_len;
+	/* protect updates of read_queue_len */
+	spinlock_t read_queue_len_lock;
+	struct dentry *debugfs_socket_dir;
+};
+
+static void drain_queue(struct caifsock *cf_sk);
+
+/* Packet Receive Callback function called from CAIF Stack */
+static int caif_sktrecv_cb(struct cflayer *layr, struct cfpkt *pkt)
+{
+	struct caifsock *cf_sk;
+	int read_queue_high;
+	cf_sk = container_of(layr, struct caifsock, layer);
+
+	if (!STATE_IS_OPEN(cf_sk)) {
+		/*FIXME: This should be allowed finally!*/
+		pr_debug("CAIF: %s(): called after close request\n", __func__);
+		cfpkt_destroy(pkt);
+		return 0;
+	}
+	/* NOTE: This function may be called in Tasklet context! */
+
+	/* The queue has its own lock */
+	cfpkt_queue(cf_sk->pktq, pkt, 0);
+
+	spin_lock(&cf_sk->read_queue_len_lock);
+	cf_sk->read_queue_len++;
+
+	read_queue_high = (cf_sk->read_queue_len > CHNL_SKT_READ_QUEUE_HIGH);
+	spin_unlock(&cf_sk->read_queue_len_lock);
+
+	if (RX_FLOW_IS_ON(cf_sk) && read_queue_high) {
+		dbfs_atomic_inc(&cnt.num_rx_flow_off);
+		SET_RX_FLOW_OFF(cf_sk);
+
+		/* Send flow off (NOTE: must not sleep) */
+		pr_debug("CAIF: %s():"
+			" sending flow OFF (queue len = %d)\n",
+			__func__,
+		     cf_sk->read_queue_len);
+		caif_assert(cf_sk->layer.dn);
+		caif_assert(cf_sk->layer.dn->ctrlcmd);
+
+		(void) cf_sk->layer.dn->modemcmd(cf_sk->layer.dn,
+					       CAIF_MODEMCMD_FLOW_OFF_REQ);
+	}
+
+	/* Signal reader that data is available. */
+
+	wake_up_interruptible(cf_sk->sk.sk_sleep);
+
+	return 0;
+}
+
+/* Packet Flow Control Callback function called from CAIF */
+static void caif_sktflowctrl_cb(struct cflayer *layr,
+				enum caif_ctrlcmd flow,
+				int phyid)
+{
+	struct caifsock *cf_sk;
+
+	/* NOTE: This function may be called in Tasklet context! */
+	pr_debug("CAIF: %s(): flowctrl func called: %s.\n",
+		      __func__,
+		      flow == CAIF_CTRLCMD_FLOW_ON_IND ? "ON" :
+		      flow == CAIF_CTRLCMD_FLOW_OFF_IND ? "OFF" :
+		      flow == CAIF_CTRLCMD_INIT_RSP ? "INIT_RSP" :
+		      flow == CAIF_CTRLCMD_DEINIT_RSP ? "DEINIT_RSP" :
+		      flow == CAIF_CTRLCMD_INIT_FAIL_RSP ? "INIT_FAIL_RSP" :
+		      flow ==
+		      CAIF_CTRLCMD_REMOTE_SHUTDOWN_IND ? "REMOTE_SHUTDOWN" :
+		      "UKNOWN CTRL COMMAND");
+
+	if (layr == NULL)
+		return;
+
+	cf_sk = container_of(layr, struct caifsock, layer);
+
+	switch (flow) {
+	case CAIF_CTRLCMD_FLOW_ON_IND:
+		dbfs_atomic_inc(&cnt.num_tx_flow_on_ind);
+		/* Signal reader that data is available. */
+		SET_TX_FLOW_ON(cf_sk);
+		wake_up_interruptible(cf_sk->sk.sk_sleep);
+		break;
+
+	case CAIF_CTRLCMD_FLOW_OFF_IND:
+		dbfs_atomic_inc(&cnt.num_tx_flow_off_ind);
+		SET_TX_FLOW_OFF(cf_sk);
+		break;
+
+	case CAIF_CTRLCMD_INIT_RSP:
+		dbfs_atomic_inc(&cnt.num_init_resp);
+		/* Signal reader that data is available. */
+		caif_assert(STATE_IS_OPEN(cf_sk));
+		SET_PENDING_OFF(cf_sk);
+		SET_TX_FLOW_ON(cf_sk);
+		wake_up_interruptible(cf_sk->sk.sk_sleep);
+		break;
+
+	case CAIF_CTRLCMD_DEINIT_RSP:
+		dbfs_atomic_inc(&cnt.num_deinit_resp);
+		caif_assert(!STATE_IS_OPEN(cf_sk));
+		SET_PENDING_OFF(cf_sk);
+		if (!STATE_IS_PENDING_DESTROY(cf_sk)) {
+			if (cf_sk->sk.sk_sleep != NULL)
+				wake_up_interruptible(cf_sk->sk.sk_sleep);
+		}
+		dbfs_atomic_inc(&cnt.num_deinit);
+		sock_put(&cf_sk->sk);
+		break;
+
+	case CAIF_CTRLCMD_INIT_FAIL_RSP:
+		dbfs_atomic_inc(&cnt.num_init_fail_resp);
+		caif_assert(STATE_IS_OPEN(cf_sk));
+		SET_STATE_CLOSED(cf_sk);
+		SET_PENDING_OFF(cf_sk);
+		SET_TX_FLOW_OFF(cf_sk);
+		wake_up_interruptible(cf_sk->sk.sk_sleep);
+		break;
+
+	case CAIF_CTRLCMD_REMOTE_SHUTDOWN_IND:
+		dbfs_atomic_inc(&cnt.num_remote_shutdown_ind);
+		SET_REMOTE_SHUTDOWN(cf_sk);
+		/* Use sk_shutdown to indicate remote shutdown indication */
+		cf_sk->sk.sk_shutdown |= RCV_SHUTDOWN;
+		cf_sk->file_mode = 0;
+		wake_up_interruptible(cf_sk->sk.sk_sleep);
+		break;
+
+	default:
+		pr_debug("CAIF: %s(): Unexpected flow command %d\n",
+			      __func__, flow);
+	}
+}
+
+static void skb_destructor(struct sk_buff *skb)
+{
+	dbfs_atomic_inc(&cnt.skb_free);
+	dbfs_atomic_dec(&cnt.skb_in_use);
+}
+
+
+static int caif_recvmsg(struct kiocb *iocb, struct socket *sock,
+				struct msghdr *m, size_t buf_len, int flags)
+
+{
+	struct sock *sk = sock->sk;
+	struct caifsock *cf_sk = container_of(sk, struct caifsock, sk);
+	struct cfpkt *pkt = NULL;
+	size_t len;
+	int result;
+	struct sk_buff *skb;
+	ssize_t ret = -EIO;
+	int read_queue_low;
+
+	if (cf_sk == NULL) {
+		pr_debug("CAIF: %s(): private_data not set!\n",
+			      __func__);
+		ret = -EBADFD;
+		goto read_error;
+	}
+
+	/* Don't do multiple iovec entries yet */
+	if (m->msg_iovlen != 1)
+		return -EOPNOTSUPP;
+
+	if (unlikely(!buf_len))
+		return -EINVAL;
+
+	lock_sock(&(cf_sk->sk));
+
+	caif_assert(cf_sk->pktq);
+
+	if (!STATE_IS_OPEN(cf_sk)) {
+		/* Socket is closed or closing. */
+		if (!STATE_IS_PENDING(cf_sk)) {
+			pr_debug("CAIF: %s(): socket is closed (by remote)\n",
+				 __func__);
+			ret = -EPIPE;
+		} else {
+			pr_debug("CAIF: %s(): socket is closing..\n", __func__);
+			ret = -EBADF;
+		}
+		goto read_error;
+	}
+	/* Socket is open or opening. */
+	if (STATE_IS_PENDING(cf_sk)) {
+		pr_debug("CAIF: %s(): socket is opening...\n", __func__);
+
+		if (flags & MSG_DONTWAIT) {
+			/* We can't block. */
+			pr_debug("CAIF: %s():state pending and MSG_DONTWAIT\n",
+				 __func__);
+			ret = -EAGAIN;
+			goto read_error;
+		}
+
+		/*
+		 * Blocking mode; state is pending and we need to wait
+		 * for its conclusion.
+		 */
+		release_sock(&cf_sk->sk);
+
+		result =
+		    wait_event_interruptible(*cf_sk->sk.sk_sleep,
+					     !STATE_IS_PENDING(cf_sk));
+
+		lock_sock(&(cf_sk->sk));
+
+		if (result == -ERESTARTSYS) {
+			pr_debug("CAIF: %s(): wait_event_interruptible"
+				 " woken by a signal (1)", __func__);
+			ret = -ERESTARTSYS;
+			goto read_error;
+		}
+	}
+
+	if (STATE_IS_REMOTE_SHUTDOWN(cf_sk) ||
+		!STATE_IS_OPEN(cf_sk) ||
+		STATE_IS_PENDING(cf_sk)) {
+
+		pr_debug("CAIF: %s(): socket closed\n",
+			__func__);
+		ret = -ESHUTDOWN;
+		goto read_error;
+	}
+
+	/*
+	 * Block if we don't have any received buffers.
+	 * The queue has its own lock.
+	 */
+	while ((pkt = cfpkt_qpeek(cf_sk->pktq)) == NULL) {
+
+		if (flags & MSG_DONTWAIT) {
+			pr_debug("CAIF: %s(): MSG_DONTWAIT\n", __func__);
+			ret = -EAGAIN;
+			goto read_error;
+		}
+		trace_printk("CAIF: %s() wait_event\n", __func__);
+
+		/* Let writers in. */
+		release_sock(&cf_sk->sk);
+
+		/* Block reader until data arrives or socket is closed. */
+		if (wait_event_interruptible(*cf_sk->sk.sk_sleep,
+					cfpkt_qpeek(cf_sk->pktq)
+					|| STATE_IS_REMOTE_SHUTDOWN(cf_sk)
+					|| !STATE_IS_OPEN(cf_sk)) ==
+		    -ERESTARTSYS) {
+			pr_debug("CAIF: %s():"
+				" wait_event_interruptible woken by "
+				"a signal, signal_pending(current) = %d\n",
+				__func__,
+				signal_pending(current));
+			return -ERESTARTSYS;
+		}
+
+		trace_printk("CAIF: %s() awake\n", __func__);
+		if (STATE_IS_REMOTE_SHUTDOWN(cf_sk)) {
+			pr_debug("CAIF: %s(): "
+				 "received remote_shutdown indication\n",
+				 __func__);
+			ret = -ESHUTDOWN;
+			goto read_error_no_unlock;
+		}
+
+		/* I want to be alone on cf_sk (except status and queue). */
+		lock_sock(&(cf_sk->sk));
+
+		if (!STATE_IS_OPEN(cf_sk)) {
+			/* Someone closed the link, report error. */
+			pr_debug("CAIF: %s(): remote end shutdown!\n",
+				      __func__);
+			ret = -EPIPE;
+			goto read_error;
+		}
+	}
+
+	/* The queue has its own lock. */
+	len = cfpkt_getlen(pkt);
+
+	/* Check max length that can be copied. */
+	if (len <= buf_len)
+		pkt = cfpkt_dequeue(cf_sk->pktq);
+	else {
+		pr_debug("CAIF: %s(): user buffer too small (%ld,%ld)\n",
+			 __func__, (long) len, (long) buf_len);
+		if (sock->type == SOCK_SEQPACKET) {
+			ret = -EMSGSIZE;
+			goto read_error;
+		}
+		len = buf_len;
+	}
+
+
+	spin_lock(&cf_sk->read_queue_len_lock);
+	cf_sk->read_queue_len--;
+	read_queue_low = (cf_sk->read_queue_len < CHNL_SKT_READ_QUEUE_LOW);
+	spin_unlock(&cf_sk->read_queue_len_lock);
+
+	if (!RX_FLOW_IS_ON(cf_sk) && read_queue_low) {
+		dbfs_atomic_inc(&cnt.num_rx_flow_on);
+		SET_RX_FLOW_ON(cf_sk);
+
+		/* Send flow on. */
+		pr_debug("CAIF: %s(): sending flow ON (queue len = %d)\n",
+			 __func__, cf_sk->read_queue_len);
+		caif_assert(cf_sk->layer.dn);
+		caif_assert(cf_sk->layer.dn->ctrlcmd);
+		(void) cf_sk->layer.dn->modemcmd(cf_sk->layer.dn,
+					       CAIF_MODEMCMD_FLOW_ON_REQ);
+
+		caif_assert(cf_sk->read_queue_len >= 0);
+	}
+
+	skb = cfpkt_tonative(pkt);
+	result = skb_copy_datagram_iovec(skb, 0, m->msg_iov, len);
+	skb_pull(skb, len);
+
+	if (result) {
+		pr_debug("CAIF: %s(): copy to_iovec failed\n", __func__);
+		cfpkt_destroy(pkt);
+		ret = -EFAULT;
+		goto read_error;
+	}
+
+	/* Free packet and remove from queue */
+	if (skb->len == 0)
+		skb_free_datagram(sk, skb);
+
+	/* Let the others in. */
+	release_sock(&cf_sk->sk);
+	return len;
+
+read_error:
+	release_sock(&cf_sk->sk);
+read_error_no_unlock:
+	return ret;
+}
+
+/* Send a signal as a consequence of sendmsg, sendto or caif_sendmsg. */
+static int caif_sendmsg(struct kiocb *kiocb, struct socket *sock,
+			struct msghdr *msg, size_t len)
+{
+
+	struct sock *sk = sock->sk;
+	struct caifsock *cf_sk = container_of(sk, struct caifsock, sk);
+	size_t payload_size = msg->msg_iov->iov_len;
+	struct cfpkt *pkt = NULL;
+	struct caif_payload_info info;
+	unsigned char *txbuf;
+	ssize_t ret = -EIO;
+	int result;
+	struct sk_buff *skb;
+	caif_assert(msg->msg_iovlen == 1);
+
+	if (cf_sk == NULL) {
+		pr_debug("CAIF: %s(): private_data not set!\n",
+			      __func__);
+		ret = -EBADFD;
+		goto write_error_no_unlock;
+	}
+
+	if (unlikely(msg->msg_iov->iov_base == NULL)) {
+		pr_warning("CAIF: %s(): Buffer is NULL.\n", __func__);
+		ret = -EINVAL;
+		goto write_error_no_unlock;
+	}
+
+	if (payload_size > CAIF_MAX_PAYLOAD_SIZE) {
+		pr_debug("CAIF: %s(): buffer too long\n", __func__);
+		if (sock->type == SOCK_SEQPACKET) {
+			ret = -EINVAL;
+			goto write_error_no_unlock;
+		}
+		payload_size = CAIF_MAX_PAYLOAD_SIZE;
+	}
+
+	/* I want to be alone on cf_sk (except status and queue) */
+	lock_sock(&(cf_sk->sk));
+
+	caif_assert(cf_sk->pktq);
+
+	if (!STATE_IS_OPEN(cf_sk)) {
+		/* Socket is closed or closing */
+		if (!STATE_IS_PENDING(cf_sk)) {
+			pr_debug("CAIF: %s(): socket is closed (by remote)\n",
+				 __func__);
+			ret = -EPIPE;
+		} else {
+			pr_debug("CAIF: %s(): socket is closing...\n",
+				 __func__);
+			ret = -EBADF;
+		}
+		goto write_error;
+	}
+
+	/* Socket is open or opening */
+	if (STATE_IS_PENDING(cf_sk)) {
+		pr_debug("CAIF: %s(): socket is opening...\n", __func__);
+
+		if (msg->msg_flags & MSG_DONTWAIT) {
+			/* We can't block */
+			trace_printk("CAIF: %s():state pending:"
+				     "state=MSG_DONTWAIT\n", __func__);
+			ret = -EAGAIN;
+			goto write_error;
+		}
+		/* Let readers in */
+		release_sock(&cf_sk->sk);
+
+		/*
+		 * Blocking mode; state is pending and we need to wait
+		 * for its conclusion.
+		 */
+		result =
+		    wait_event_interruptible(*cf_sk->sk.sk_sleep,
+					     !STATE_IS_PENDING(cf_sk));
+		/* I want to be alone on cf_sk (except status and queue) */
+		lock_sock(&(cf_sk->sk));
+
+		if (result == -ERESTARTSYS) {
+			pr_debug("CAIF: %s(): wait_event_interruptible"
+				 " woken by a signal (1)", __func__);
+			ret = -ERESTARTSYS;
+			goto write_error;
+		}
+	}
+	if (STATE_IS_REMOTE_SHUTDOWN(cf_sk) ||
+		!STATE_IS_OPEN(cf_sk) ||
+		STATE_IS_PENDING(cf_sk)) {
+
+		pr_debug("CAIF: %s(): socket closed\n",
+			__func__);
+		ret = -ESHUTDOWN;
+		goto write_error;
+	}
+
+	if (!TX_FLOW_IS_ON(cf_sk)) {
+
+		/* Flow is off. Check non-block flag */
+		if (msg->msg_flags & MSG_DONTWAIT) {
+			trace_printk("CAIF: %s(): MSG_DONTWAIT and tx flow off",
+				 __func__);
+			ret = -EAGAIN;
+			goto write_error;
+		}
+
+		/* release lock before waiting */
+		release_sock(&cf_sk->sk);
+
+		/* Wait until flow is on or socket is closed */
+		if (wait_event_interruptible(*cf_sk->sk.sk_sleep,
+					TX_FLOW_IS_ON(cf_sk)
+					|| !STATE_IS_OPEN(cf_sk)
+					|| STATE_IS_REMOTE_SHUTDOWN(cf_sk)
+					) == -ERESTARTSYS) {
+			pr_debug("CAIF: %s():"
+				 " wait_event_interruptible woken by a signal",
+				 __func__);
+			ret = -ERESTARTSYS;
+			goto write_error_no_unlock;
+		}
+
+		/* I want to be alone on cf_sk (except status and queue) */
+		lock_sock(&(cf_sk->sk));
+
+		if (!STATE_IS_OPEN(cf_sk)) {
+			/* someone closed the link, report error */
+			pr_debug("CAIF: %s(): remote end shutdown!\n",
+				      __func__);
+			ret = -EPIPE;
+			goto write_error;
+		}
+
+		if (STATE_IS_REMOTE_SHUTDOWN(cf_sk)) {
+			pr_debug("CAIF: %s(): "
+				 "received remote_shutdown indication\n",
+				 __func__);
+			ret = -ESHUTDOWN;
+			goto write_error;
+		}
+	}
+
+	pkt = cfpkt_create(payload_size);
+	skb = (struct sk_buff *)pkt;
+	skb->destructor = skb_destructor;
+	skb->sk = sk;
+	dbfs_atomic_inc(&cnt.skb_alloc);
+	dbfs_atomic_inc(&cnt.skb_in_use);
+	if (cfpkt_raw_append(pkt, (void **) &txbuf, payload_size) < 0) {
+		pr_debug("CAIF: %s(): cfpkt_raw_append failed\n", __func__);
+		cfpkt_destroy(pkt);
+		ret = -EINVAL;
+		goto write_error;
+	}
+
+	/* Copy data into buffer. */
+	if (copy_from_user(txbuf, msg->msg_iov->iov_base, payload_size)) {
+		pr_debug("CAIF: %s(): copy_from_user returned non zero.\n",
+			 __func__);
+		cfpkt_destroy(pkt);
+		ret = -EINVAL;
+		goto write_error;
+	}
+	memset(&info, 0, sizeof(info));
+
+	/* Send the packet down the stack. */
+	caif_assert(cf_sk->layer.dn);
+	caif_assert(cf_sk->layer.dn->transmit);
+
+	do {
+		ret = cf_sk->layer.dn->transmit(cf_sk->layer.dn, pkt);
+
+		if (likely((ret >= 0) || (ret != -EAGAIN)))
+			break;
+
+		/* EAGAIN - retry */
+		if (msg->msg_flags & MSG_DONTWAIT) {
+			pr_debug("CAIF: %s(): NONBLOCK and transmit failed,"
+				 " error = %ld\n", __func__, (long) ret);
+			ret = -EAGAIN;
+			goto write_error;
+		}
+
+		/* Let readers in */
+		release_sock(&cf_sk->sk);
+
+		/* Wait until flow is on or socket is closed */
+		if (wait_event_interruptible(*cf_sk->sk.sk_sleep,
+					TX_FLOW_IS_ON(cf_sk)
+					|| !STATE_IS_OPEN(cf_sk)
+					|| STATE_IS_REMOTE_SHUTDOWN(cf_sk)
+					) == -ERESTARTSYS) {
+			pr_debug("CAIF: %s(): wait_event_interruptible"
+				 " woken by a signal", __func__);
+			ret = -ERESTARTSYS;
+			goto write_error_no_unlock;
+		}
+
+		/* I want to be alone on cf_sk (except status and queue) */
+		lock_sock(&(cf_sk->sk));
+
+	} while (ret == -EAGAIN);
+
+	if (ret < 0) {
+		cfpkt_destroy(pkt);
+		pr_debug("CAIF: %s(): transmit failed, error = %ld\n",
+			 __func__, (long) ret);
+
+		goto write_error;
+	}
+
+	release_sock(&cf_sk->sk);
+	return payload_size;
+
+write_error:
+	release_sock(&cf_sk->sk);
+write_error_no_unlock:
+	return ret;
+}
+
+static unsigned int caif_poll(struct file *file, struct socket *sock,
+						poll_table *wait)
+{
+	struct sock *sk = sock->sk;
+	struct caifsock *cf_sk = container_of(sk, struct caifsock, sk);
+	u32 mask = 0;
+	poll_wait(file, sk->sk_sleep, wait);
+	lock_sock(&(cf_sk->sk));
+	if (!STATE_IS_OPEN(cf_sk)) {
+		if (!STATE_IS_PENDING(cf_sk))
+			mask |= POLLHUP;
+	} else {
+		if (cfpkt_qpeek(cf_sk->pktq) != NULL)
+			mask |= (POLLIN | POLLRDNORM);
+		if (TX_FLOW_IS_ON(cf_sk))
+			mask |= (POLLOUT | POLLWRNORM);
+	}
+	release_sock(&cf_sk->sk);
+	trace_printk("CAIF: %s(): poll mask=0x%04x\n",
+		      __func__, mask);
+	return mask;
+}
+
+static void drain_queue(struct caifsock *cf_sk)
+{
+	struct cfpkt *pkt = NULL;
+
+	/* Empty the queue */
+	do {
+		/* The queue has its own lock */
+		if (!cf_sk->pktq)
+			break;
+
+		pkt = cfpkt_dequeue(cf_sk->pktq);
+		if (!pkt)
+			break;
+		pr_debug("CAIF: %s(): freeing packet from read queue\n",
+			 __func__);
+		cfpkt_destroy(pkt);
+
+	} while (1);
+
+	cf_sk->read_queue_len = 0;
+}
+
+static int setsockopt(struct socket *sock,
+			int lvl, int opt, char __user *ov, unsigned int ol)
+{
+	struct sock *sk = sock->sk;
+	struct caifsock *cf_sk = container_of(sk, struct caifsock, sk);
+	int prio, linksel;
+	struct ifreq ifreq;
+
+	if (STATE_IS_OPEN(cf_sk)) {
+		pr_debug("CAIF: %s(): setsockopt "
+			 "cannot be done on a connected socket\n",
+			 __func__);
+		return -ENOPROTOOPT;
+	}
+	switch (opt) {
+	case CAIFSO_LINK_SELECT:
+		if (ol < sizeof(int)) {
+			pr_debug("CAIF: %s(): setsockopt"
+				 " CAIFSO_CHANNEL_CONFIG bad size\n", __func__);
+			return -EINVAL;
+		}
+		if (lvl != SOL_CAIF)
+			goto bad_sol;
+		if (copy_from_user(&linksel, ov, sizeof(int)))
+			return -EINVAL;
+		lock_sock(&(cf_sk->sk));
+		cf_sk->conn_req.link_selector = linksel;
+		release_sock(&cf_sk->sk);
+		return 0;
+
+	case SO_PRIORITY:
+		if (lvl != SOL_SOCKET)
+			goto bad_sol;
+		if (ol < sizeof(int)) {
+			pr_debug("CAIF: %s(): setsockopt"
+				 " SO_PRIORITY bad size\n", __func__);
+			return -EINVAL;
+		}
+		if (copy_from_user(&prio, ov, sizeof(int)))
+			return -EINVAL;
+		lock_sock(&(cf_sk->sk));
+		cf_sk->conn_req.priority = prio;
+		pr_debug("CAIF: %s(): Setting sockopt priority=%d\n", __func__,
+			cf_sk->conn_req.priority);
+		release_sock(&cf_sk->sk);
+		return 0;
+
+	case SO_BINDTODEVICE:
+		if (lvl != SOL_SOCKET)
+			goto bad_sol;
+		if (ol < sizeof(struct ifreq)) {
+			pr_debug("CAIF: %s(): setsockopt"
+				 " SO_PRIORITY bad size\n", __func__);
+			return -EINVAL;
+		}
+		if (copy_from_user(&ifreq, ov, sizeof(ifreq)))
+			return -EFAULT;
+		lock_sock(&(cf_sk->sk));
+		strncpy(cf_sk->conn_req.link_name, ifreq.ifr_name,
+			sizeof(cf_sk->conn_req.link_name));
+		cf_sk->conn_req.link_name
+			[sizeof(cf_sk->conn_req.link_name)-1] = 0;
+		release_sock(&cf_sk->sk);
+		return 0;
+
+	case CAIFSO_REQ_PARAM:
+		if (lvl != SOL_CAIF)
+			goto bad_sol;
+		if (cf_sk->sk.sk_protocol != CAIFPROTO_UTIL)
+			return -ENOPROTOOPT;
+		if (ol > sizeof(cf_sk->conn_req.param.data))
+			goto req_param_bad_size;
+
+		lock_sock(&(cf_sk->sk));
+		cf_sk->conn_req.param.size = ol;
+		if (copy_from_user(&cf_sk->conn_req.param.data, ov, ol)) {
+			release_sock(&cf_sk->sk);
+req_param_bad_size:
+			pr_debug("CAIF: %s(): setsockopt"
+				 " CAIFSO_CHANNEL_CONFIG bad size\n", __func__);
+			return -EINVAL;
+		}
+
+		release_sock(&cf_sk->sk);
+		return 0;
+
+	default:
+		pr_debug("CAIF: %s(): unhandled option %d\n", __func__, opt);
+		return -EINVAL;
+	}
+
+	return 0;
+bad_sol:
+	pr_debug("CAIF: %s(): setsockopt bad level\n", __func__);
+	return -ENOPROTOOPT;
+
+}
+
+static int caif_connect(struct socket *sock, struct sockaddr *uservaddr,
+	       int sockaddr_len, int flags)
+{
+	struct caifsock *cf_sk = NULL;
+	int result = -1;
+	int mode = 0;
+	int ret = -EIO;
+	struct sock *sk = sock->sk;
+	BUG_ON(sk == NULL);
+
+	cf_sk = container_of(sk, struct caifsock, sk);
+
+	trace_printk("CAIF: %s(): cf_sk=%p OPEN=%d, TX_FLOW=%d, RX_FLOW=%d\n",
+		 __func__, cf_sk,
+		STATE_IS_OPEN(cf_sk),
+		TX_FLOW_IS_ON(cf_sk), RX_FLOW_IS_ON(cf_sk));
+
+
+	if (sock->type == SOCK_SEQPACKET || sock->type == SOCK_STREAM)
+		sock->state	= SS_CONNECTING;
+	else
+		goto out;
+
+	/* I want to be alone on cf_sk (except status and queue) */
+	lock_sock(&(cf_sk->sk));
+
+	if (sockaddr_len != sizeof(struct sockaddr_caif)) {
+		pr_debug("CAIF: %s(): Bad address len (%ld,%lu)\n",
+			 __func__, (long) sockaddr_len,
+			(long unsigned) sizeof(struct sockaddr_caif));
+		ret = -EINVAL;
+		goto open_error;
+	}
+
+	if (uservaddr->sa_family != AF_CAIF) {
+		pr_debug("CAIF: %s(): Bad address family (%d)\n",
+			 __func__, uservaddr->sa_family);
+		ret = -EAFNOSUPPORT;
+		goto open_error;
+	}
+
+	memcpy(&cf_sk->conn_req.sockaddr, uservaddr,
+		sizeof(struct sockaddr_caif));
+
+	dbfs_atomic_inc(&cnt.num_open);
+	mode = SKT_READ_FLAG | SKT_WRITE_FLAG;
+
+	/* If socket is not open, make sure socket is in fully closed state */
+	if (!STATE_IS_OPEN(cf_sk)) {
+		/* Has link close response been received (if we ever sent it)?*/
+		if (STATE_IS_PENDING(cf_sk)) {
+			/*
+			 * Still waiting for close response from remote.
+			 * If opened non-blocking, report "would block"
+			 */
+			if (flags & O_NONBLOCK) {
+				pr_debug("CAIF: %s(): O_NONBLOCK"
+					" && close pending\n", __func__);
+				ret = -EAGAIN;
+				goto open_error;
+			}
+
+			pr_debug("CAIF: %s(): Wait for close response"
+				 " from remote...\n", __func__);
+
+			release_sock(&cf_sk->sk);
+
+			/*
+			 * Blocking mode; close is pending and we need to wait
+			 * for its conclusion.
+			 */
+			result =
+			    wait_event_interruptible(*cf_sk->sk.sk_sleep,
+						     !STATE_IS_PENDING(cf_sk));
+
+			lock_sock(&(cf_sk->sk));
+			if (result == -ERESTARTSYS) {
+				pr_debug("CAIF: %s(): wait_event_interruptible"
+					 "woken by a signal (1)", __func__);
+				ret = -ERESTARTSYS;
+				goto open_error;
+			}
+		}
+	}
+
+	/* socket is now either closed, pending open or open */
+	if (STATE_IS_OPEN(cf_sk) && !STATE_IS_PENDING(cf_sk)) {
+		/* Open */
+		pr_debug("CAIF: %s(): Socket is already opened (cf_sk=%p)"
+			" check access f_flags = 0x%x file_mode = 0x%x\n",
+			 __func__, cf_sk, mode, cf_sk->file_mode);
+
+	} else {
+		/* We are closed or pending open.
+		 * If closed:	    send link setup
+		 * If pending open: link setup already sent (we could have been
+		 *		    interrupted by a signal last time)
+		 */
+		if (!STATE_IS_OPEN(cf_sk)) {
+			/* First opening of file; connect lower layers: */
+			/* Drain queue (very unlikely) */
+			drain_queue(cf_sk);
+
+			cf_sk->layer.receive = caif_sktrecv_cb;
+			SET_STATE_OPEN(cf_sk);
+			SET_PENDING_ON(cf_sk);
+
+			/* Register this channel. */
+			result =
+				caif_connect_client(&cf_sk->conn_req,
+							&cf_sk->layer);
+			if (result < 0) {
+				pr_debug("CAIF: %s(): can't register channel\n",
+					__func__);
+				ret = -EIO;
+				SET_STATE_CLOSED(cf_sk);
+				SET_PENDING_OFF(cf_sk);
+				goto open_error;
+			}
+			dbfs_atomic_inc(&cnt.num_init);
+		}
+
+		/* If opened non-blocking, report "success".
+		 */
+		if (flags & O_NONBLOCK) {
+			pr_debug("CAIF: %s(): O_NONBLOCK success\n",
+				 __func__);
+			ret = -EINPROGRESS;
+			cf_sk->sk.sk_err = -EINPROGRESS;
+			goto open_error;
+		}
+
+		trace_printk("CAIF: %s(): Wait for connect response\n",
+			     __func__);
+
+		/* release lock before waiting */
+		release_sock(&cf_sk->sk);
+
+		result =
+		    wait_event_interruptible(*cf_sk->sk.sk_sleep,
+					     !STATE_IS_PENDING(cf_sk));
+
+		lock_sock(&(cf_sk->sk));
+
+		if (result == -ERESTARTSYS) {
+			pr_debug("CAIF: %s(): wait_event_interruptible"
+				 "woken by a signal (2)", __func__);
+			ret = -ERESTARTSYS;
+			goto open_error;
+		}
+
+		if (!STATE_IS_OPEN(cf_sk)) {
+			/* Lower layers said "no" */
+			pr_debug("CAIF: %s(): Closed received\n", __func__);
+			ret = -EPIPE;
+			goto open_error;
+		}
+
+		trace_printk("CAIF: %s(): Connect received\n", __func__);
+	}
+	/* Open is ok */
+	cf_sk->file_mode |= mode;
+
+	trace_printk("CAIF: %s(): Connected - file mode = %x\n",
+		  __func__, cf_sk->file_mode);
+
+	release_sock(&cf_sk->sk);
+	return 0;
+open_error:
+	sock->state	= SS_UNCONNECTED;
+	release_sock(&cf_sk->sk);
+out:
+	return ret;
+}
+
+static int caif_shutdown(struct socket *sock, int how)
+{
+	struct caifsock *cf_sk = NULL;
+	int result = 0;
+	int tx_flow_state_was_on;
+	struct sock *sk = sock->sk;
+
+	trace_printk("CAIF: %s(): enter\n", __func__);
+	pr_debug("f_flags=%x\n", sock->file->f_flags);
+
+	if (how != SHUT_RDWR)
+		return -EOPNOTSUPP;
+
+	cf_sk = container_of(sk, struct caifsock, sk);
+	if (cf_sk == NULL) {
+		pr_debug("CAIF: %s(): COULD NOT FIND SOCKET\n", __func__);
+		return -EBADF;
+	}
+
+	/* I want to be alone on cf_sk (except status queue) */
+	lock_sock(&(cf_sk->sk));
+	sock_hold(&cf_sk->sk);
+
+	/* IS_CLOSED have double meaning:
+	 * 1) Spontanous Remote Shutdown Request.
+	 * 2) Ack on a channel teardown(disconnect)
+	 * Must clear bit in case we previously received
+	 * remote shudown request.
+	 */
+	if (STATE_IS_OPEN(cf_sk) && !STATE_IS_PENDING(cf_sk)) {
+		SET_STATE_CLOSED(cf_sk);
+		SET_PENDING_ON(cf_sk);
+		tx_flow_state_was_on = TX_FLOW_IS_ON(cf_sk);
+		SET_TX_FLOW_OFF(cf_sk);
+
+		/* Hold the socket until DEINIT_RSP is received */
+		sock_hold(&cf_sk->sk);
+		result = caif_disconnect_client(&cf_sk->layer);
+
+		if (result < 0) {
+			pr_debug("CAIF: %s(): "
+					"caif_disconnect_client() failed\n",
+					 __func__);
+			SET_STATE_CLOSED(cf_sk);
+			SET_PENDING_OFF(cf_sk);
+			SET_TX_FLOW_OFF(cf_sk);
+			release_sock(&cf_sk->sk);
+			sock_put(&cf_sk->sk);
+			return -EIO;
+		}
+
+	}
+	if (STATE_IS_REMOTE_SHUTDOWN(cf_sk)) {
+		SET_PENDING_OFF(cf_sk);
+		SET_REMOTE_SHUTDOWN_OFF(cf_sk);
+	}
+
+	/*
+	 * Socket is no longer in state pending close,
+	 * and we can release the reference.
+	 */
+
+	dbfs_atomic_inc(&cnt.num_close);
+	drain_queue(cf_sk);
+	SET_RX_FLOW_ON(cf_sk);
+	cf_sk->file_mode = 0;
+	sock_put(&cf_sk->sk);
+	release_sock(&cf_sk->sk);
+	if (!result && (sock->file->f_flags & O_NONBLOCK)) {
+		pr_debug("nonblocking shutdown returing -EAGAIN\n");
+		return -EAGAIN;
+	} else
+		return result;
+}
+
+static ssize_t caif_sock_no_sendpage(struct socket *sock,
+				     struct page *page,
+				     int offset, size_t size, int flags)
+{
+	return -EOPNOTSUPP;
+}
+
+/* This function is called as part of close. */
+static int caif_release(struct socket *sock)
+{
+	struct sock *sk = sock->sk;
+	struct caifsock *cf_sk = NULL;
+	int res;
+	caif_assert(sk != NULL);
+	cf_sk = container_of(sk, struct caifsock, sk);
+
+	if (cf_sk->debugfs_socket_dir != NULL)
+		debugfs_remove_recursive(cf_sk->debugfs_socket_dir);
+
+	res = caif_shutdown(sock, SHUT_RDWR);
+	if (res && res != -EINPROGRESS)
+		return res;
+
+	/*
+	 * FIXME: Shutdown should probably be possible to do async
+	 * without flushing queues, allowing reception of frames while
+	 * waiting for DEINIT_IND.
+	 * Release should always block, to allow secure decoupling of
+	 * CAIF stack.
+	 */
+	if (!(sock->file->f_flags & O_NONBLOCK)) {
+		res = wait_event_interruptible(*cf_sk->sk.sk_sleep,
+						!STATE_IS_PENDING(cf_sk));
+
+		if (res == -ERESTARTSYS) {
+			pr_debug("CAIF: %s(): wait_event_interruptible"
+				"woken by a signal (1)", __func__);
+		}
+	}
+	lock_sock(&(cf_sk->sk));
+
+	sock->sk = NULL;
+
+	/* Detach the socket from its process context by making it orphan. */
+	sock_orphan(sk);
+
+	/*
+	 * Setting SHUTDOWN_MASK means that both send and receive are shutdown
+	 * for the socket.
+	 */
+	sk->sk_shutdown = SHUTDOWN_MASK;
+
+	/*
+	 * Set the socket state to closed, the TCP_CLOSE macro is used when
+	 * closing any socket.
+	 */
+
+	/* Flush out this sockets receive queue. */
+	drain_queue(cf_sk);
+
+	/* Finally release the socket. */
+	SET_STATE_PENDING_DESTROY(cf_sk);
+
+	release_sock(&cf_sk->sk);
+
+	sock_put(sk);
+
+	/*
+	 * The rest of the cleanup will be handled from the
+	 * caif_sock_destructor
+	 */
+	return res;
+}
+
+static const struct proto_ops caif_ops = {
+	.family = PF_CAIF,
+	.owner = THIS_MODULE,
+	.release = caif_release,
+	.bind = sock_no_bind,
+	.connect = caif_connect,
+	.socketpair = sock_no_socketpair,
+	.accept = sock_no_accept,
+	.getname = sock_no_getname,
+	.poll = caif_poll,
+	.ioctl = sock_no_ioctl,
+	.listen = sock_no_listen,
+	.shutdown = caif_shutdown,
+	.setsockopt = setsockopt,
+	.getsockopt = sock_no_getsockopt,
+	.sendmsg = caif_sendmsg,
+	.recvmsg = caif_recvmsg,
+	.mmap = sock_no_mmap,
+	.sendpage = caif_sock_no_sendpage,
+};
+
+/* This function is called when a socket is finally destroyed. */
+static void caif_sock_destructor(struct sock *sk)
+{
+	struct caifsock *cf_sk = NULL;
+	cf_sk = container_of(sk, struct caifsock, sk);
+	/* Error checks. */
+	caif_assert(!atomic_read(&sk->sk_wmem_alloc));
+	caif_assert(sk_unhashed(sk));
+	caif_assert(!sk->sk_socket);
+	if (!sock_flag(sk, SOCK_DEAD)) {
+		pr_debug("CAIF: %s(): 0x%p", __func__, sk);
+		return;
+	}
+
+	if (STATE_IS_OPEN(cf_sk)) {
+		pr_debug("CAIF: %s(): socket is opened (cf_sk=%p)"
+			 " file_mode = 0x%x\n", __func__,
+			 cf_sk, cf_sk->file_mode);
+		return;
+	}
+	drain_queue(cf_sk);
+	kfree(cf_sk->pktq);
+
+	trace_printk("CAIF: %s(): caif_sock_destructor: Removing socket %s\n",
+		__func__, cf_sk->name);
+	atomic_dec(&caif_nr_socks);
+}
+
+static int caif_create(struct net *net, struct socket *sock, int protocol,
+		       int kern)
+{
+	struct sock *sk = NULL;
+	struct caifsock *cf_sk = NULL;
+	int result = 0;
+	static struct proto prot = {.name = "PF_CAIF",
+		.owner = THIS_MODULE,
+		.obj_size = sizeof(struct caifsock),
+	};
+
+	/*
+	 * The sock->type specifies the socket type to use.
+	 * in SEQPACKET mode packet boundaries are enforced.
+	 */
+	if (sock->type != SOCK_SEQPACKET && sock->type != SOCK_STREAM)
+		return -ESOCKTNOSUPPORT;
+
+	if (net != &init_net)
+		return -EAFNOSUPPORT;
+
+	if (protocol < 0 || protocol >= CAIFPROTO_MAX)
+		return -EPROTONOSUPPORT;
+	/*
+	 * Set the socket state to unconnected.	 The socket state is really
+	 * not used at all in the net/core or socket.c but the
+	 * initialization makes sure that sock->state is not uninitialized.
+	 */
+	sock->state = SS_UNCONNECTED;
+
+	sk = sk_alloc(net, PF_CAIF, GFP_KERNEL, &prot);
+	if (!sk)
+		return -ENOMEM;
+
+	cf_sk = container_of(sk, struct caifsock, sk);
+
+	/* Store the protocol */
+	sk->sk_protocol = (unsigned char) protocol;
+
+	spin_lock_init(&cf_sk->read_queue_len_lock);
+
+	/* Fill in some information concerning the misc socket. */
+	snprintf(cf_sk->name, sizeof(cf_sk->name), "cf_sk%d",
+		atomic_read(&caif_nr_socks));
+
+	/*
+	 * Lock in order to try to stop someone from opening the socket
+	 * too early.
+	 */
+	lock_sock(&(cf_sk->sk));
+
+	/* Initialize the nozero default sock structure data. */
+	sock_init_data(sock, sk);
+	sock->ops = &caif_ops;
+	sk->sk_destruct = caif_sock_destructor;
+	sk->sk_sndbuf = caif_sockbuf_size;
+	sk->sk_rcvbuf = caif_sockbuf_size;
+
+	cf_sk->pktq = cfpktq_create();
+
+	if (!cf_sk->pktq) {
+		pr_err("CAIF: %s(): queue create failed.\n", __func__);
+		result = -ENOMEM;
+		release_sock(&cf_sk->sk);
+		goto err_failed;
+	}
+	cf_sk->layer.ctrlcmd = caif_sktflowctrl_cb;
+	SET_STATE_CLOSED(cf_sk);
+	SET_PENDING_OFF(cf_sk);
+	SET_TX_FLOW_OFF(cf_sk);
+	SET_RX_FLOW_ON(cf_sk);
+
+	/* Set default options on configuration */
+	cf_sk->conn_req.priority = CAIF_PRIO_NORMAL;
+	cf_sk->conn_req.link_selector = CAIF_LINK_HIGH_BANDW;
+	cf_sk->conn_req.protocol = protocol;
+	/* Increase the number of sockets created. */
+	atomic_inc(&caif_nr_socks);
+	if (!IS_ERR(debugfsdir)) {
+		cf_sk->debugfs_socket_dir =
+			debugfs_create_dir(cf_sk->name, debugfsdir);
+		debugfs_create_u32("conn_state", S_IRUSR | S_IWUSR,
+				cf_sk->debugfs_socket_dir, &cf_sk->conn_state);
+		debugfs_create_u32("flow_state", S_IRUSR | S_IWUSR,
+				cf_sk->debugfs_socket_dir, &cf_sk->flow_state);
+		debugfs_create_u32("read_queue_len", S_IRUSR | S_IWUSR,
+				cf_sk->debugfs_socket_dir,
+				(u32 *) &cf_sk->read_queue_len);
+		debugfs_create_u32("identity", S_IRUSR | S_IWUSR,
+				cf_sk->debugfs_socket_dir,
+				(u32 *) &cf_sk->layer.id);
+	}
+	release_sock(&cf_sk->sk);
+	return 0;
+err_failed:
+	sk_free(sk);
+	return result;
+}
+
+static struct net_proto_family caif_family_ops = {
+	.family = PF_CAIF,
+	.create = caif_create,
+	.owner = THIS_MODULE,
+};
+
+static int af_caif_init(void)
+{
+	int err;
+	err = sock_register(&caif_family_ops);
+
+	if (!err)
+		return err;
+
+	return 0;
+}
+
+static int __init caif_sktinit_module(void)
+{
+	int stat;
+#ifdef CONFIG_DEBUG_FS
+	debugfsdir = debugfs_create_dir("chnl_skt", NULL);
+	if (!IS_ERR(debugfsdir)) {
+		debugfs_create_u32("skb_inuse", S_IRUSR | S_IWUSR,
+				debugfsdir,
+				(u32 *) &cnt.skb_in_use);
+		debugfs_create_u32("skb_alloc", S_IRUSR | S_IWUSR,
+				debugfsdir,
+				(u32 *) &cnt.skb_alloc);
+		debugfs_create_u32("skb_free", S_IRUSR | S_IWUSR,
+				debugfsdir,
+				(u32 *) &cnt.skb_free);
+		debugfs_create_u32("num_sockets", S_IRUSR | S_IWUSR,
+				debugfsdir,
+				(u32 *) &caif_nr_socks);
+		debugfs_create_u32("num_open", S_IRUSR | S_IWUSR,
+				debugfsdir,
+				(u32 *) &cnt.num_open);
+		debugfs_create_u32("num_close", S_IRUSR | S_IWUSR,
+				debugfsdir,
+				(u32 *) &cnt.num_close);
+		debugfs_create_u32("num_init", S_IRUSR | S_IWUSR,
+				debugfsdir,
+				(u32 *) &cnt.num_init);
+		debugfs_create_u32("num_init_resp", S_IRUSR | S_IWUSR,
+				debugfsdir,
+				(u32 *) &cnt.num_init_resp);
+		debugfs_create_u32("num_init_fail_resp", S_IRUSR | S_IWUSR,
+				debugfsdir,
+				(u32 *) &cnt.num_init_fail_resp);
+		debugfs_create_u32("num_deinit", S_IRUSR | S_IWUSR,
+				debugfsdir,
+				(u32 *) &cnt.num_deinit);
+		debugfs_create_u32("num_deinit_resp", S_IRUSR | S_IWUSR,
+				debugfsdir,
+				(u32 *) &cnt.num_deinit_resp);
+		debugfs_create_u32("num_remote_shutdown_ind",
+				S_IRUSR | S_IWUSR, debugfsdir,
+				(u32 *) &cnt.num_remote_shutdown_ind);
+		debugfs_create_u32("num_tx_flow_off_ind", S_IRUSR | S_IWUSR,
+				debugfsdir,
+				(u32 *) &cnt.num_tx_flow_off_ind);
+		debugfs_create_u32("num_tx_flow_on_ind", S_IRUSR | S_IWUSR,
+				debugfsdir,
+				(u32 *) &cnt.num_tx_flow_on_ind);
+		debugfs_create_u32("num_rx_flow_off", S_IRUSR | S_IWUSR,
+				debugfsdir,
+				(u32 *) &cnt.num_rx_flow_off);
+		debugfs_create_u32("num_rx_flow_on", S_IRUSR | S_IWUSR,
+				debugfsdir,
+				(u32 *) &cnt.num_rx_flow_on);
+	}
+#endif
+	stat = af_caif_init();
+	if (stat) {
+		pr_err("CAIF: %s(): Failed to initialize CAIF socket layer.",
+		       __func__);
+		return stat;
+	}
+	return 0;
+}
+
+static void __exit caif_sktexit_module(void)
+{
+	sock_unregister(PF_CAIF);
+	if (debugfsdir != NULL)
+		debugfs_remove_recursive(debugfsdir);
+}
+
+module_init(caif_sktinit_module);
+module_exit(caif_sktexit_module);
-- 
cgit v1.2.2


From cc36a070b5901cd54386348b4d79d2daac91ce75 Mon Sep 17 00:00:00 2001
From: Sjur Braendeland <sjur.brandeland@stericsson.com>
Date: Tue, 30 Mar 2010 13:56:27 +0000
Subject: net-caif: add CAIF netdevice

Adding GPRS Net Device for PDP Contexts.
The device can be managed by RTNL as defined in if_caif.h.

Signed-off-by: Sjur Braendeland <sjur.brandeland@stericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/caif/chnl_net.c | 451 ++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 451 insertions(+)
 create mode 100644 net/caif/chnl_net.c

(limited to 'net')

diff --git a/net/caif/chnl_net.c b/net/caif/chnl_net.c
new file mode 100644
index 000000000000..f622ff1d39ba
--- /dev/null
+++ b/net/caif/chnl_net.c
@@ -0,0 +1,451 @@
+/*
+ * Copyright (C) ST-Ericsson AB 2010
+ * Authors:	Sjur Brendeland/sjur.brandeland@stericsson.com
+ *		Daniel Martensson / Daniel.Martensson@stericsson.com
+ * License terms: GNU General Public License (GPL) version 2
+ */
+
+#include <linux/version.h>
+#include <linux/fs.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/netdevice.h>
+#include <linux/if_ether.h>
+#include <linux/moduleparam.h>
+#include <linux/ip.h>
+#include <linux/sched.h>
+#include <linux/sockios.h>
+#include <linux/caif/if_caif.h>
+#include <net/rtnetlink.h>
+#include <net/caif/caif_layer.h>
+#include <net/caif/cfcnfg.h>
+#include <net/caif/cfpkt.h>
+#include <net/caif/caif_dev.h>
+
+#define CAIF_CONNECT_TIMEOUT 30
+#define SIZE_MTU 1500
+#define SIZE_MTU_MAX 4080
+#define SIZE_MTU_MIN 68
+#define CAIF_NET_DEFAULT_QUEUE_LEN 500
+
+#undef pr_debug
+#define pr_debug pr_warning
+
+/*This list is protected by the rtnl lock. */
+static LIST_HEAD(chnl_net_list);
+
+MODULE_LICENSE("GPL");
+MODULE_ALIAS_RTNL_LINK("caif");
+
+struct chnl_net {
+	struct cflayer chnl;
+	struct net_device_stats stats;
+	struct caif_connect_request conn_req;
+	struct list_head list_field;
+	struct net_device *netdev;
+	char name[256];
+	wait_queue_head_t netmgmt_wq;
+	/* Flow status to remember and control the transmission. */
+	bool flowenabled;
+	bool pending_close;
+};
+
+static void robust_list_del(struct list_head *delete_node)
+{
+	struct list_head *list_node;
+	struct list_head *n;
+	ASSERT_RTNL();
+	list_for_each_safe(list_node, n, &chnl_net_list) {
+		if (list_node == delete_node) {
+			list_del(list_node);
+			break;
+		}
+	}
+}
+
+static int chnl_recv_cb(struct cflayer *layr, struct cfpkt *pkt)
+{
+	struct sk_buff *skb;
+	struct chnl_net *priv  = NULL;
+	int pktlen;
+	int err = 0;
+
+	priv = container_of(layr, struct chnl_net, chnl);
+
+	if (!priv)
+		return -EINVAL;
+
+	/* Get length of CAIF packet. */
+	pktlen = cfpkt_getlen(pkt);
+
+	skb = (struct sk_buff *) cfpkt_tonative(pkt);
+	/* Pass some minimum information and
+	 * send the packet to the net stack.
+	 */
+	skb->dev = priv->netdev;
+	skb->protocol = htons(ETH_P_IP);
+
+	/* If we change the header in loop mode, the checksum is corrupted. */
+	if (priv->conn_req.protocol == CAIFPROTO_DATAGRAM_LOOP)
+		skb->ip_summed = CHECKSUM_UNNECESSARY;
+	else
+		skb->ip_summed = CHECKSUM_NONE;
+
+	/* FIXME: Drivers should call this in tasklet context. */
+	if (in_interrupt())
+		netif_rx(skb);
+	else
+		netif_rx_ni(skb);
+
+	/* Update statistics. */
+	priv->netdev->stats.rx_packets++;
+	priv->netdev->stats.rx_bytes += pktlen;
+
+	return err;
+}
+
+static int delete_device(struct chnl_net *dev)
+{
+	ASSERT_RTNL();
+	if (dev->netdev)
+		unregister_netdevice(dev->netdev);
+	return 0;
+}
+
+static void close_work(struct work_struct *work)
+{
+	struct chnl_net *dev = NULL;
+	struct list_head *list_node;
+	struct list_head *_tmp;
+	rtnl_lock();
+	list_for_each_safe(list_node, _tmp, &chnl_net_list) {
+		dev = list_entry(list_node, struct chnl_net, list_field);
+		if (!dev->pending_close)
+			continue;
+		list_del(list_node);
+		delete_device(dev);
+	}
+	rtnl_unlock();
+}
+static DECLARE_WORK(close_worker, close_work);
+
+static void chnl_flowctrl_cb(struct cflayer *layr, enum caif_ctrlcmd flow,
+				int phyid)
+{
+	struct chnl_net *priv;
+	pr_debug("CAIF: %s(): NET flowctrl func called flow: %s.\n",
+		__func__,
+		flow == CAIF_CTRLCMD_FLOW_ON_IND ? "ON" :
+		flow == CAIF_CTRLCMD_INIT_RSP ? "INIT" :
+		flow == CAIF_CTRLCMD_FLOW_OFF_IND ? "OFF" :
+		flow == CAIF_CTRLCMD_DEINIT_RSP ? "CLOSE/DEINIT" :
+		flow == CAIF_CTRLCMD_INIT_FAIL_RSP ? "OPEN_FAIL" :
+		flow == CAIF_CTRLCMD_REMOTE_SHUTDOWN_IND ?
+		 "REMOTE_SHUTDOWN" : "UKNOWN CTRL COMMAND");
+
+	priv = container_of(layr, struct chnl_net, chnl);
+
+	switch (flow) {
+	case CAIF_CTRLCMD_FLOW_OFF_IND:
+	case CAIF_CTRLCMD_DEINIT_RSP:
+	case CAIF_CTRLCMD_INIT_FAIL_RSP:
+	case CAIF_CTRLCMD_REMOTE_SHUTDOWN_IND:
+		priv->flowenabled = false;
+		netif_tx_disable(priv->netdev);
+		pr_warning("CAIF: %s(): done\n", __func__);
+		priv->pending_close = 1;
+		schedule_work(&close_worker);
+		break;
+	case CAIF_CTRLCMD_FLOW_ON_IND:
+	case CAIF_CTRLCMD_INIT_RSP:
+		priv->flowenabled = true;
+		netif_wake_queue(priv->netdev);
+		wake_up_interruptible(&priv->netmgmt_wq);
+		break;
+	default:
+		break;
+	}
+}
+
+static int chnl_net_start_xmit(struct sk_buff *skb, struct net_device *dev)
+{
+	struct chnl_net *priv;
+	struct cfpkt *pkt = NULL;
+	int len;
+	int result = -1;
+	/* Get our private data. */
+	priv = netdev_priv(dev);
+
+	if (skb->len > priv->netdev->mtu) {
+		pr_warning("CAIF: %s(): Size of skb exceeded MTU\n", __func__);
+		return -ENOSPC;
+	}
+
+	if (!priv->flowenabled) {
+		pr_debug("CAIF: %s(): dropping packets flow off\n", __func__);
+		return NETDEV_TX_BUSY;
+	}
+
+	if (priv->conn_req.protocol == CAIFPROTO_DATAGRAM_LOOP)
+		swap(ip_hdr(skb)->saddr, ip_hdr(skb)->daddr);
+
+	/* Store original SKB length. */
+	len = skb->len;
+
+	pkt = cfpkt_fromnative(CAIF_DIR_OUT, (void *) skb);
+
+	pr_debug("CAIF: %s(): transmit inst %s %d,%p\n",
+		__func__, dev->name, priv->chnl.dn->id, &priv->chnl.dn);
+
+	/* Send the packet down the stack. */
+	result = priv->chnl.dn->transmit(priv->chnl.dn, pkt);
+	if (result) {
+		if (result == -EAGAIN)
+			result = NETDEV_TX_BUSY;
+		return result;
+	}
+
+	/* Update statistics. */
+	dev->stats.tx_packets++;
+	dev->stats.tx_bytes += len;
+
+	return NETDEV_TX_OK;
+}
+
+static int chnl_net_open(struct net_device *dev)
+{
+	struct chnl_net *priv = NULL;
+	int result = -1;
+	ASSERT_RTNL();
+
+	priv = netdev_priv(dev);
+	pr_debug("CAIF: %s(): dev name: %s\n", __func__, priv->name);
+
+	if (!priv) {
+		pr_debug("CAIF: %s(): chnl_net_open: no priv\n", __func__);
+		return -ENODEV;
+	}
+	result = caif_connect_client(&priv->conn_req, &priv->chnl);
+	if (result != 0) {
+		pr_debug("CAIF: %s(): err: "
+			 "Unable to register and open device, Err:%d\n",
+			__func__,
+			result);
+		return -ENODEV;
+	}
+	result = wait_event_interruptible(priv->netmgmt_wq, priv->flowenabled);
+
+	if (result == -ERESTARTSYS) {
+		pr_debug("CAIF: %s(): wait_event_interruptible"
+			 " woken by a signal\n", __func__);
+		return -ERESTARTSYS;
+	} else
+		pr_debug("CAIF: %s(): Flow on recieved\n", __func__);
+
+	return 0;
+}
+
+static int chnl_net_stop(struct net_device *dev)
+{
+	struct chnl_net *priv;
+	int result = -1;
+	ASSERT_RTNL();
+	priv = netdev_priv(dev);
+
+	result = caif_disconnect_client(&priv->chnl);
+	if (result != 0) {
+		pr_debug("CAIF: %s(): chnl_net_stop: err: "
+			 "Unable to STOP device, Err:%d\n",
+			 __func__, result);
+		return -EBUSY;
+	}
+	result = wait_event_interruptible(priv->netmgmt_wq,
+					  !priv->flowenabled);
+
+	if (result == -ERESTARTSYS) {
+		pr_debug("CAIF: %s(): wait_event_interruptible woken by"
+			 " signal, signal_pending(current) = %d\n",
+			 __func__,
+			 signal_pending(current));
+	} else {
+		pr_debug("CAIF: %s(): disconnect received\n", __func__);
+
+	}
+
+	return 0;
+}
+
+static int chnl_net_init(struct net_device *dev)
+{
+	struct chnl_net *priv;
+	ASSERT_RTNL();
+	priv = netdev_priv(dev);
+	strncpy(priv->name, dev->name, sizeof(priv->name));
+	return 0;
+}
+
+static void chnl_net_uninit(struct net_device *dev)
+{
+	struct chnl_net *priv;
+	ASSERT_RTNL();
+	priv = netdev_priv(dev);
+	robust_list_del(&priv->list_field);
+}
+
+static const struct net_device_ops netdev_ops = {
+	.ndo_open = chnl_net_open,
+	.ndo_stop = chnl_net_stop,
+	.ndo_init = chnl_net_init,
+	.ndo_uninit = chnl_net_uninit,
+	.ndo_start_xmit = chnl_net_start_xmit,
+};
+
+static void ipcaif_net_setup(struct net_device *dev)
+{
+	struct chnl_net *priv;
+	dev->netdev_ops = &netdev_ops;
+	dev->destructor = free_netdev;
+	dev->flags |= IFF_NOARP;
+	dev->flags |= IFF_POINTOPOINT;
+	dev->needed_headroom = CAIF_NEEDED_HEADROOM;
+	dev->needed_tailroom = CAIF_NEEDED_TAILROOM;
+	dev->mtu = SIZE_MTU;
+	dev->tx_queue_len = CAIF_NET_DEFAULT_QUEUE_LEN;
+
+	priv = netdev_priv(dev);
+	priv->chnl.receive = chnl_recv_cb;
+	priv->chnl.ctrlcmd = chnl_flowctrl_cb;
+	priv->netdev = dev;
+	priv->conn_req.protocol = CAIFPROTO_DATAGRAM;
+	priv->conn_req.link_selector = CAIF_LINK_HIGH_BANDW;
+	priv->conn_req.priority = CAIF_PRIO_LOW;
+	/* Insert illegal value */
+	priv->conn_req.sockaddr.u.dgm.connection_id = -1;
+	priv->flowenabled = false;
+
+	ASSERT_RTNL();
+	init_waitqueue_head(&priv->netmgmt_wq);
+	list_add(&priv->list_field, &chnl_net_list);
+}
+
+
+static int ipcaif_fill_info(struct sk_buff *skb, const struct net_device *dev)
+{
+	struct chnl_net *priv;
+	u8 loop;
+	priv = netdev_priv(dev);
+	NLA_PUT_U32(skb, IFLA_CAIF_IPV4_CONNID,
+		    priv->conn_req.sockaddr.u.dgm.connection_id);
+	NLA_PUT_U32(skb, IFLA_CAIF_IPV6_CONNID,
+		    priv->conn_req.sockaddr.u.dgm.connection_id);
+	loop = priv->conn_req.protocol == CAIFPROTO_DATAGRAM_LOOP;
+	NLA_PUT_U8(skb, IFLA_CAIF_LOOPBACK, loop);
+
+
+	return 0;
+nla_put_failure:
+	return -EMSGSIZE;
+
+}
+
+static void caif_netlink_parms(struct nlattr *data[],
+				struct caif_connect_request *conn_req)
+{
+	if (!data) {
+		pr_warning("CAIF: %s: no params data found\n", __func__);
+		return;
+	}
+	if (data[IFLA_CAIF_IPV4_CONNID])
+		conn_req->sockaddr.u.dgm.connection_id =
+			nla_get_u32(data[IFLA_CAIF_IPV4_CONNID]);
+	if (data[IFLA_CAIF_IPV6_CONNID])
+		conn_req->sockaddr.u.dgm.connection_id =
+			nla_get_u32(data[IFLA_CAIF_IPV6_CONNID]);
+	if (data[IFLA_CAIF_LOOPBACK]) {
+		if (nla_get_u8(data[IFLA_CAIF_LOOPBACK]))
+			conn_req->protocol = CAIFPROTO_DATAGRAM_LOOP;
+		else
+			conn_req->protocol = CAIFPROTO_DATAGRAM;
+	}
+}
+
+static int ipcaif_newlink(struct net *src_net, struct net_device *dev,
+			  struct nlattr *tb[], struct nlattr *data[])
+{
+	int ret;
+	struct chnl_net *caifdev;
+	ASSERT_RTNL();
+	caifdev = netdev_priv(dev);
+	caif_netlink_parms(data, &caifdev->conn_req);
+	ret = register_netdevice(dev);
+	if (ret)
+		pr_warning("CAIF: %s(): device rtml registration failed\n",
+			   __func__);
+	return ret;
+}
+
+static int ipcaif_changelink(struct net_device *dev, struct nlattr *tb[],
+				struct nlattr *data[])
+{
+	struct chnl_net *caifdev;
+	ASSERT_RTNL();
+	caifdev = netdev_priv(dev);
+	caif_netlink_parms(data, &caifdev->conn_req);
+	netdev_state_change(dev);
+	return 0;
+}
+
+static size_t ipcaif_get_size(const struct net_device *dev)
+{
+	return
+		/* IFLA_CAIF_IPV4_CONNID */
+		nla_total_size(4) +
+		/* IFLA_CAIF_IPV6_CONNID */
+		nla_total_size(4) +
+		/* IFLA_CAIF_LOOPBACK */
+		nla_total_size(2) +
+		0;
+}
+
+static const struct nla_policy ipcaif_policy[IFLA_CAIF_MAX + 1] = {
+	[IFLA_CAIF_IPV4_CONNID]	      = { .type = NLA_U32 },
+	[IFLA_CAIF_IPV6_CONNID]	      = { .type = NLA_U32 },
+	[IFLA_CAIF_LOOPBACK]	      = { .type = NLA_U8 }
+};
+
+
+static struct rtnl_link_ops ipcaif_link_ops __read_mostly = {
+	.kind		= "caif",
+	.priv_size	= sizeof(struct chnl_net),
+	.setup		= ipcaif_net_setup,
+	.maxtype	= IFLA_CAIF_MAX,
+	.policy		= ipcaif_policy,
+	.newlink	= ipcaif_newlink,
+	.changelink	= ipcaif_changelink,
+	.get_size	= ipcaif_get_size,
+	.fill_info	= ipcaif_fill_info,
+
+};
+
+static int __init chnl_init_module(void)
+{
+	return rtnl_link_register(&ipcaif_link_ops);
+}
+
+static void __exit chnl_exit_module(void)
+{
+	struct chnl_net *dev = NULL;
+	struct list_head *list_node;
+	struct list_head *_tmp;
+	rtnl_link_unregister(&ipcaif_link_ops);
+	rtnl_lock();
+	list_for_each_safe(list_node, _tmp, &chnl_net_list) {
+		dev = list_entry(list_node, struct chnl_net, list_field);
+		list_del(list_node);
+		delete_device(dev);
+	}
+	rtnl_unlock();
+}
+
+module_init(chnl_init_module);
+module_exit(chnl_exit_module);
-- 
cgit v1.2.2


From 3908c6902372206cc582ecf459af889b09a150c9 Mon Sep 17 00:00:00 2001
From: Sjur Braendeland <sjur.brandeland@stericsson.com>
Date: Tue, 30 Mar 2010 13:56:28 +0000
Subject: net-caif: add CAIF Kconfig and Makefiles

Kconfig and Makefiles with options for:
CAIF:        Including caif
CAIF_DEBUG:  CAIF Debug
CAIF_NETDEV: CAIF Network Device for GPRS Contexts

Signed-off-by: Sjur Braendeland <sjur.brandeland@stericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/Kconfig       |  2 ++
 net/Makefile      |  1 +
 net/caif/Kconfig  | 48 ++++++++++++++++++++++++++++++++++++++++++++++++
 net/caif/Makefile | 26 ++++++++++++++++++++++++++
 4 files changed, 77 insertions(+)
 create mode 100644 net/caif/Kconfig
 create mode 100644 net/caif/Makefile

(limited to 'net')

diff --git a/net/Kconfig b/net/Kconfig
index 68514644ce91..e10d55c8ee5c 100644
--- a/net/Kconfig
+++ b/net/Kconfig
@@ -280,5 +280,7 @@ source "net/wimax/Kconfig"
 
 source "net/rfkill/Kconfig"
 source "net/9p/Kconfig"
+source "net/caif/Kconfig"
+
 
 endif   # if NET
diff --git a/net/Makefile b/net/Makefile
index 1542e7268a7b..a5eae27aa42d 100644
--- a/net/Makefile
+++ b/net/Makefile
@@ -56,6 +56,7 @@ obj-$(CONFIG_NETLABEL)		+= netlabel/
 obj-$(CONFIG_IUCV)		+= iucv/
 obj-$(CONFIG_RFKILL)		+= rfkill/
 obj-$(CONFIG_NET_9P)		+= 9p/
+obj-$(CONFIG_CAIF)		+= caif/
 ifneq ($(CONFIG_DCB),)
 obj-y				+= dcb/
 endif
diff --git a/net/caif/Kconfig b/net/caif/Kconfig
new file mode 100644
index 000000000000..cd1daf6008bd
--- /dev/null
+++ b/net/caif/Kconfig
@@ -0,0 +1,48 @@
+#
+# CAIF net configurations
+#
+
+#menu "CAIF Support"
+comment "CAIF Support"
+menuconfig CAIF
+	tristate "Enable CAIF support"
+	select CRC_CCITT
+	default n
+	---help---
+	The "Communication CPU to Application CPU Interface" (CAIF) is a packet
+	based connection-oriented MUX protocol developed by ST-Ericsson for use
+	with its modems. It is accessed from user space as sockets (PF_CAIF).
+
+	Say Y (or M) here if you build for a phone product (e.g. Android or
+	MeeGo ) that uses CAIF as transport, if unsure say N.
+
+	If you select to build it as module then CAIF_NETDEV also needs to be
+	built as modules. You will also need to say yes to any CAIF physical
+	devices that your platform requires.
+
+	See Documentation/networking/caif for a further explanation on how to
+	use and configure CAIF.
+
+if CAIF
+
+config  CAIF_DEBUG
+	bool "Enable Debug"
+	default n
+	--- help ---
+	Enable the inclusion of debug code in the CAIF stack.
+	Be aware that doing this will impact performance.
+	If unsure say N.
+
+
+config CAIF_NETDEV
+	tristate "CAIF GPRS Network device"
+	default CAIF
+	---help---
+	Say Y if you will be using a CAIF based GPRS network device.
+	This can be either built-in or a loadable module,
+	If you select to build it as a built-in then the main CAIF device must
+	also be a built-in.
+	If unsure say Y.
+
+endif
+#endmenu
diff --git a/net/caif/Makefile b/net/caif/Makefile
new file mode 100644
index 000000000000..34852af2595e
--- /dev/null
+++ b/net/caif/Makefile
@@ -0,0 +1,26 @@
+ifeq ($(CONFIG_CAIF_DEBUG),1)
+CAIF_DBG_FLAGS := -DDEBUG
+endif
+
+ccflags-y := $(CAIF_FLAGS) $(CAIF_DBG_FLAGS)
+
+caif-objs := caif_dev.o \
+	cfcnfg.o cfmuxl.o cfctrl.o  \
+	cffrml.o cfveil.o cfdbgl.o\
+	cfserl.o cfdgml.o  \
+	cfrfml.o cfvidl.o cfutill.o \
+	cfsrvl.o cfpkt_skbuff.o caif_config_util.o
+clean-dirs:= .tmp_versions
+
+clean-files:= \
+	Module.symvers \
+	modules.order \
+	*.cmd \
+	*.o \
+	*~
+
+obj-$(CONFIG_CAIF) += caif.o
+obj-$(CONFIG_CAIF_NETDEV) += chnl_net.o
+obj-$(CONFIG_CAIF) += caif_socket.o
+
+export-objs := caif.o
-- 
cgit v1.2.2


From 7e5ab157813993356f021757d0b0dcbdca7c55a1 Mon Sep 17 00:00:00 2001
From: Tom Goff <thomas.goff@boeing.com>
Date: Tue, 30 Mar 2010 19:44:56 -0700
Subject: net_sched: minor netns related cleanup

These changes were suggested by Alexey Dobriyan <adobriyan@gmail.com>:

  - psched_show() does not use any private data so just pass NULL to
    psched_open()

  - remove unnecessary return statement

Signed-off-by: Tom Goff <thomas.goff@boeing.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/sch_api.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

(limited to 'net')

diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c
index 6d6fe16289f3..c65866da17bc 100644
--- a/net/sched/sch_api.c
+++ b/net/sched/sch_api.c
@@ -1683,7 +1683,7 @@ static int psched_show(struct seq_file *seq, void *v)
 
 static int psched_open(struct inode *inode, struct file *file)
 {
-	return single_open(file, psched_show, PDE(inode)->data);
+	return single_open(file, psched_show, NULL);
 }
 
 static const struct file_operations psched_fops = {
@@ -1708,8 +1708,6 @@ static int __net_init psched_net_init(struct net *net)
 static void __net_exit psched_net_exit(struct net *net)
 {
 	proc_net_remove(net, "psched");
-
-	return;
 }
 #else
 static int __net_init psched_net_init(struct net *net)
-- 
cgit v1.2.2


From de7737e056d65ad6b0f135f7bb24d86458af0d47 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?YOSHIFUJI=20Hideaki=20/=20=E5=90=89=E8=97=A4=E8=8B=B1?=
 =?UTF-8?q?=E6=98=8E?= <yoshfuji@linux-ipv6.org>
Date: Fri, 26 Mar 2010 08:34:30 +0000
Subject: sctp: Use ipv6_addr_diff() in sctp_v6_addr_match_len().

Signed-off-by: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sctp/ipv6.c | 15 +--------------
 1 file changed, 1 insertion(+), 14 deletions(-)

(limited to 'net')

diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c
index 240dceba06e5..216d88f27236 100644
--- a/net/sctp/ipv6.c
+++ b/net/sctp/ipv6.c
@@ -276,20 +276,7 @@ static struct dst_entry *sctp_v6_get_dst(struct sctp_association *asoc,
 static inline int sctp_v6_addr_match_len(union sctp_addr *s1,
 					 union sctp_addr *s2)
 {
-	struct in6_addr *a1 = &s1->v6.sin6_addr;
-	struct in6_addr *a2 = &s2->v6.sin6_addr;
-	int i, j;
-
-	for (i = 0; i < 4 ; i++) {
-		__be32 a1xora2;
-
-		a1xora2 = a1->s6_addr32[i] ^ a2->s6_addr32[i];
-
-		if ((j = fls(ntohl(a1xora2))))
-			return (i * 32 + 32 - j);
-	}
-
-	return (i*32);
+	return ipv6_addr_diff(&s1->v6.sin6_addr, &s2->v6.sin6_addr);
 }
 
 /* Fills in the source address(saddr) based on the destination address(daddr)
-- 
cgit v1.2.2


From 02cdce53f3d0d3eee8188944c96150ee8c97100d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?YOSHIFUJI=20Hideaki=20/=20=E5=90=89=E8=97=A4=E8=8B=B1?=
 =?UTF-8?q?=E6=98=8E?= <yoshfuji@linux-ipv6.org>
Date: Sat, 27 Mar 2010 01:24:16 +0000
Subject: ipv6 fib: Use "Sweezle" to optimize addr_bit_test().

addr_bit_test() is used in various places in IPv6 routing table
subsystem.  It checks if the given fn_bit is set,
where fn_bit counts bits from MSB in words in network-order.

 fn_bit        :   0 .... 31 32 .... 64 65 .... 95 96 ....127

fn_bit >> 5 gives offset of word, and (~fn_bit & 0x1f) gives
count from LSB in the network-endian word in question.

 fn_bit >> 5   :       0          1          2          3
 ~fn_bit & 0x1f:  31 ....  0 31 ....  0 31 ....  0 31 ....  0

Thus, the mask was generated as htonl(1 << (~fn_bit & 0x1f)).
This can be optimized by "sweezle" (See include/asm-generic/bitops/le.h).

In little-endian,
  htonl(1 << bit) = 1 << (bit ^ BITOP_BE32_SWIZZLE)
where
  BITOP_BE32_SWIZZLE is (0x1f & ~7)
So,
  htonl(1 << (~fn_bit & 0x1f)) = 1 << ((~fn_bit & 0x1f) ^ (0x1f & ~7))
                               = 1 << ((~fn_bit ^ ~7) & 0x1f)
                               = 1 << ((~fn_bit ^ BITOP_BE32_SWIZZLE) & 0x1f)

In big-endian, BITOP_BE32_SWIZZLE is equal to 0.
  1 << ((~fn_bit ^ BITOP_BE32_SWIZZLE) & 0x1f)
                               = 1 << ((~fn_bit) & 0x1f)
                               = htonl(1 << (~fn_bit & 0x1f))

Signed-off-by: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/ip6_fib.c | 15 +++++++++++++--
 1 file changed, 13 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c
index 2f9847924fa5..68119ef62869 100644
--- a/net/ipv6/ip6_fib.c
+++ b/net/ipv6/ip6_fib.c
@@ -127,12 +127,23 @@ static __inline__ u32 fib6_new_sernum(void)
 /*
  *	test bit
  */
+#if defined(__LITTLE_ENDIAN)
+# define BITOP_BE32_SWIZZLE	(0x1F & ~7)
+#else
+# define BITOP_BE32_SWIZZLE	0
+#endif
 
 static __inline__ __be32 addr_bit_set(void *token, int fn_bit)
 {
 	__be32 *addr = token;
-
-	return htonl(1 << ((~fn_bit)&0x1F)) & addr[fn_bit>>5];
+	/*
+	 * Here,
+	 * 	1 << ((~fn_bit ^ BITOP_BE32_SWIZZLE) & 0x1f)
+	 * is optimized version of
+	 *	htonl(1 << ((~fn_bit)&0x1F))
+	 * See include/asm-generic/bitops/le.h.
+	 */
+	return (1 << ((~fn_bit ^ BITOP_BE32_SWIZZLE) & 0x1f)) & addr[fn_bit >> 5];
 }
 
 static __inline__ struct fib6_node * node_alloc(void)
-- 
cgit v1.2.2


From b00fabb4020d17bda4bea59507e09fadf573088d Mon Sep 17 00:00:00 2001
From: stephen hemminger <shemminger@vyatta.com>
Date: Mon, 29 Mar 2010 14:47:27 +0000
Subject: netdev: ethtool RXHASH flag

This adds ethtool and device feature flag to allow control
of receive hashing offload.

Signed-off-by: Stephen Hemminger <shemminger@vyatta.com>
Acked-by: Jeff Garzik <jgarzik@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/ethtool.c | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/core/ethtool.c b/net/core/ethtool.c
index f4cb6b6299d9..73c81edde8d9 100644
--- a/net/core/ethtool.c
+++ b/net/core/ethtool.c
@@ -121,7 +121,7 @@ int ethtool_op_set_ufo(struct net_device *dev, u32 data)
  * NETIF_F_xxx values in include/linux/netdevice.h
  */
 static const u32 flags_dup_features =
-	(ETH_FLAG_LRO | ETH_FLAG_NTUPLE);
+	(ETH_FLAG_LRO | ETH_FLAG_NTUPLE | ETH_FLAG_RXHASH);
 
 u32 ethtool_op_get_flags(struct net_device *dev)
 {
@@ -152,6 +152,11 @@ int ethtool_op_set_flags(struct net_device *dev, u32 data)
 		features &= ~NETIF_F_NTUPLE;
 	}
 
+	if (data & ETH_FLAG_RXHASH)
+		features |= NETIF_F_RXHASH;
+	else
+		features &= ~NETIF_F_RXHASH;
+
 	dev->features = features;
 	return 0;
 }
-- 
cgit v1.2.2


From 598ed9367a36ee1fd4ae3271a54a3547a33975a5 Mon Sep 17 00:00:00 2001
From: laurent chavey <chavey@google.com>
Date: Mon, 29 Mar 2010 10:41:36 +0000
Subject: fix net/core/dst.c coding style error and warnings

Fix coding style errors and warnings output while running checkpatch.pl
on the file net/core/dst.c.

Signed-off-by: chavey <chavey@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/dst.c | 41 ++++++++++++++++++++---------------------
 1 file changed, 20 insertions(+), 21 deletions(-)

(limited to 'net')

diff --git a/net/core/dst.c b/net/core/dst.c
index cb1b3488b739..2076d84203d1 100644
--- a/net/core/dst.c
+++ b/net/core/dst.c
@@ -43,7 +43,7 @@ static atomic_t			 dst_total = ATOMIC_INIT(0);
  */
 static struct {
 	spinlock_t		lock;
-	struct dst_entry 	*list;
+	struct dst_entry	*list;
 	unsigned long		timer_inc;
 	unsigned long		timer_expires;
 } dst_garbage = {
@@ -51,7 +51,7 @@ static struct {
 	.timer_inc = DST_GC_MAX,
 };
 static void dst_gc_task(struct work_struct *work);
-static void ___dst_free(struct dst_entry * dst);
+static void ___dst_free(struct dst_entry *dst);
 
 static DECLARE_DELAYED_WORK(dst_gc_work, dst_gc_task);
 
@@ -135,8 +135,8 @@ loop:
 		}
 		expires = dst_garbage.timer_expires;
 		/*
-		 * if the next desired timer is more than 4 seconds in the future
-		 * then round the timer to whole seconds
+		 * if the next desired timer is more than 4 seconds in the
+		 * future then round the timer to whole seconds
 		 */
 		if (expires > 4*HZ)
 			expires = round_jiffies_relative(expires);
@@ -151,7 +151,8 @@ loop:
 		" expires: %lu elapsed: %lu us\n",
 		atomic_read(&dst_total), delayed, work_performed,
 		expires,
-		elapsed.tv_sec * USEC_PER_SEC + elapsed.tv_nsec / NSEC_PER_USEC);
+		elapsed.tv_sec * USEC_PER_SEC +
+		  elapsed.tv_nsec / NSEC_PER_USEC);
 #endif
 }
 
@@ -162,9 +163,9 @@ int dst_discard(struct sk_buff *skb)
 }
 EXPORT_SYMBOL(dst_discard);
 
-void * dst_alloc(struct dst_ops * ops)
+void *dst_alloc(struct dst_ops *ops)
 {
-	struct dst_entry * dst;
+	struct dst_entry *dst;
 
 	if (ops->gc && atomic_read(&ops->entries) > ops->gc_thresh) {
 		if (ops->gc(ops))
@@ -184,19 +185,20 @@ void * dst_alloc(struct dst_ops * ops)
 	atomic_inc(&ops->entries);
 	return dst;
 }
+EXPORT_SYMBOL(dst_alloc);
 
-static void ___dst_free(struct dst_entry * dst)
+static void ___dst_free(struct dst_entry *dst)
 {
 	/* The first case (dev==NULL) is required, when
 	   protocol module is unloaded.
 	 */
-	if (dst->dev == NULL || !(dst->dev->flags&IFF_UP)) {
+	if (dst->dev == NULL || !(dst->dev->flags&IFF_UP))
 		dst->input = dst->output = dst_discard;
-	}
 	dst->obsolete = 2;
 }
+EXPORT_SYMBOL(__dst_free);
 
-void __dst_free(struct dst_entry * dst)
+void __dst_free(struct dst_entry *dst)
 {
 	spin_lock_bh(&dst_garbage.lock);
 	___dst_free(dst);
@@ -261,15 +263,16 @@ again:
 	}
 	return NULL;
 }
+EXPORT_SYMBOL(dst_destroy);
 
 void dst_release(struct dst_entry *dst)
 {
 	if (dst) {
-               int newrefcnt;
+		int newrefcnt;
 
 		smp_mb__before_atomic_dec();
-               newrefcnt = atomic_dec_return(&dst->__refcnt);
-               WARN_ON(newrefcnt < 0);
+		newrefcnt = atomic_dec_return(&dst->__refcnt);
+		WARN_ON(newrefcnt < 0);
 	}
 }
 EXPORT_SYMBOL(dst_release);
@@ -305,7 +308,8 @@ static inline void dst_ifdown(struct dst_entry *dst, struct net_device *dev,
 	}
 }
 
-static int dst_dev_event(struct notifier_block *this, unsigned long event, void *ptr)
+static int dst_dev_event(struct notifier_block *this, unsigned long event,
+			 void *ptr)
 {
 	struct net_device *dev = ptr;
 	struct dst_entry *dst, *last = NULL;
@@ -328,9 +332,8 @@ static int dst_dev_event(struct notifier_block *this, unsigned long event, void
 			last->next = dst;
 		else
 			dst_busy_list = dst;
-		for (; dst; dst = dst->next) {
+		for (; dst; dst = dst->next)
 			dst_ifdown(dst, dev, event != NETDEV_DOWN);
-		}
 		mutex_unlock(&dst_gc_mutex);
 		break;
 	}
@@ -345,7 +348,3 @@ void __init dst_init(void)
 {
 	register_netdevice_notifier(&dst_dev_notifier);
 }
-
-EXPORT_SYMBOL(__dst_free);
-EXPORT_SYMBOL(dst_alloc);
-EXPORT_SYMBOL(dst_destroy);
-- 
cgit v1.2.2


From 8379d07031e59a5d72bc73a6060c4d63aac956ce Mon Sep 17 00:00:00 2001
From: Hagen Paul Pfeifer <hagen@jauu.net>
Date: Tue, 30 Mar 2010 14:24:12 +0000
Subject: tipc: define needless global scoped variable static

struct _zone *tipc_zones has local scope level and
should defined with the correct scoping.

CC: Per Liden <per.liden@nospam.ericsson.com>
Signed-off-by: Hagen Paul Pfeifer <hagen@jauu.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/tipc/net.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/tipc/net.c b/net/tipc/net.c
index 79ce8fa2b77a..d7cd1e064a80 100644
--- a/net/tipc/net.c
+++ b/net/tipc/net.c
@@ -116,7 +116,7 @@
 */
 
 DEFINE_RWLOCK(tipc_net_lock);
-struct _zone *tipc_zones[256] = { NULL, };
+static struct _zone *tipc_zones[256] = { NULL, };
 struct network tipc_net = { tipc_zones };
 
 struct tipc_node *tipc_net_select_remote_node(u32 addr, u32 ref)
-- 
cgit v1.2.2


From b68c92460d380c59891ba97531edbe5b01f5ea0b Mon Sep 17 00:00:00 2001
From: Hagen Paul Pfeifer <hagen@jauu.net>
Date: Tue, 30 Mar 2010 14:24:57 +0000
Subject: sctp: eliminate useless code

Remove duplicate declaration of symbol: struct hlist_node *node was
already declared, the seconds declaration shadows the first one.

CC: Vlad Yasevich <vladislav.yasevich@hp.com>
Signed-off-by: Hagen Paul Pfeifer <hagen@jauu.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sctp/socket.c | 1 -
 1 file changed, 1 deletion(-)

(limited to 'net')

diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index dfc5c127efd4..d80ee3a2f110 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -5481,7 +5481,6 @@ pp_found:
 		 */
 		int reuse = sk->sk_reuse;
 		struct sock *sk2;
-		struct hlist_node *node;
 
 		SCTP_DEBUG_PRINTK("sctp_get_port() found a possible match\n");
 		if (pp->fastreuse && sk->sk_reuse &&
-- 
cgit v1.2.2


From 55f98938b5cea8949077c79813c4f86ef0018858 Mon Sep 17 00:00:00 2001
From: Frans Pop <elendil@planet.nl>
Date: Wed, 24 Mar 2010 19:46:29 +0100
Subject: wireless: remove trailing space in messages

Also correct indentation in net/wireless/reg.c.

Signed-off-by: Frans Pop <elendil@planet.nl>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/agg-tx.c | 4 ++--
 net/wireless/reg.c    | 6 +++---
 2 files changed, 5 insertions(+), 5 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/agg-tx.c b/net/mac80211/agg-tx.c
index 5538e1b4a697..bb4ac70fc97a 100644
--- a/net/mac80211/agg-tx.c
+++ b/net/mac80211/agg-tx.c
@@ -414,7 +414,7 @@ static void ieee80211_agg_tx_operational(struct ieee80211_local *local,
 					 struct sta_info *sta, u16 tid)
 {
 #ifdef CONFIG_MAC80211_HT_DEBUG
-	printk(KERN_DEBUG "Aggregation is on for tid %d \n", tid);
+	printk(KERN_DEBUG "Aggregation is on for tid %d\n", tid);
 #endif
 
 	spin_lock(&local->ampdu_lock);
@@ -674,7 +674,7 @@ void ieee80211_process_addba_resp(struct ieee80211_local *local,
 	del_timer(&sta->ampdu_mlme.tid_tx[tid]->addba_resp_timer);
 
 #ifdef CONFIG_MAC80211_HT_DEBUG
-	printk(KERN_DEBUG "switched off addBA timer for tid %d \n", tid);
+	printk(KERN_DEBUG "switched off addBA timer for tid %d\n", tid);
 #endif /* CONFIG_MAC80211_HT_DEBUG */
 
 	if (le16_to_cpu(mgmt->u.action.u.addba_resp.status)
diff --git a/net/wireless/reg.c b/net/wireless/reg.c
index ed89c59bb431..e857d72c7e8c 100644
--- a/net/wireless/reg.c
+++ b/net/wireless/reg.c
@@ -2357,10 +2357,10 @@ static void print_regdomain(const struct ieee80211_regdomain *rd)
 					rdev->country_ie_alpha2[1]);
 			} else
 				printk(KERN_INFO "cfg80211: Current regulatory "
-					"domain intersected: \n");
+					"domain intersected:\n");
 		} else
-				printk(KERN_INFO "cfg80211: Current regulatory "
-					"domain intersected: \n");
+			printk(KERN_INFO "cfg80211: Current regulatory "
+				"domain intersected:\n");
 	} else if (is_world_regdom(rd->alpha2))
 		printk(KERN_INFO "cfg80211: World regulatory "
 			"domain updated:\n");
-- 
cgit v1.2.2


From e3cf8b3f7b9eefbe1d39b160726d6e5c2cbb4c5d Mon Sep 17 00:00:00 2001
From: Zhu Yi <yi.zhu@intel.com>
Date: Mon, 29 Mar 2010 17:35:07 +0800
Subject: mac80211: support paged rx SKBs

Mac80211 drivers can now pass paged SKBs to mac80211 via
ieee80211_rx{_irqsafe}. The implementation currently use
skb_linearize() in a few places i.e. management frame
handling, software decryption, defragmentation and A-MSDU
process. We will optimize them one by one later.

Signed-off-by: Zhu Yi <yi.zhu@intel.com>
Cc: Kalle Valo <kalle.valo@iki.fi>
Cc: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/rx.c   | 36 ++++++++++++++++++++++++++++++++----
 net/wireless/util.c | 24 ++++++++++++++++++------
 2 files changed, 50 insertions(+), 10 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c
index 1da57c8e849a..11ed5aa90f83 100644
--- a/net/mac80211/rx.c
+++ b/net/mac80211/rx.c
@@ -38,7 +38,7 @@ static struct sk_buff *remove_monitor_info(struct ieee80211_local *local,
 {
 	if (local->hw.flags & IEEE80211_HW_RX_INCLUDES_FCS) {
 		if (likely(skb->len > FCS_LEN))
-			skb_trim(skb, skb->len - FCS_LEN);
+			__pskb_trim(skb, skb->len - FCS_LEN);
 		else {
 			/* driver bug */
 			WARN_ON(1);
@@ -227,6 +227,12 @@ ieee80211_rx_monitor(struct ieee80211_local *local, struct sk_buff *origskb,
 	if (local->hw.flags & IEEE80211_HW_RX_INCLUDES_FCS)
 		present_fcs_len = FCS_LEN;
 
+	/* make sure hdr->frame_control is on the linear part */
+	if (!pskb_may_pull(origskb, 2)) {
+		dev_kfree_skb(origskb);
+		return NULL;
+	}
+
 	if (!local->monitors) {
 		if (should_drop_frame(origskb, present_fcs_len)) {
 			dev_kfree_skb(origskb);
@@ -931,6 +937,9 @@ ieee80211_rx_h_decrypt(struct ieee80211_rx_data *rx)
 		return RX_DROP_MONITOR;
 	}
 
+	if (skb_linearize(rx->skb))
+		return RX_DROP_UNUSABLE;
+
 	/* Check for weak IVs if possible */
 	if (rx->sta && rx->key->conf.alg == ALG_WEP &&
 	    ieee80211_is_data(hdr->frame_control) &&
@@ -1231,6 +1240,9 @@ ieee80211_rx_h_defragment(struct ieee80211_rx_data *rx)
 	}
 	I802_DEBUG_INC(rx->local->rx_handlers_fragments);
 
+	if (skb_linearize(rx->skb))
+		return RX_DROP_UNUSABLE;
+
 	seq = (sc & IEEE80211_SCTL_SEQ) >> 4;
 
 	if (frag == 0) {
@@ -1588,6 +1600,9 @@ ieee80211_rx_h_amsdu(struct ieee80211_rx_data *rx)
 	skb->dev = dev;
 	__skb_queue_head_init(&frame_list);
 
+	if (skb_linearize(skb))
+		return RX_DROP_UNUSABLE;
+
 	ieee80211_amsdu_to_8023s(skb, &frame_list, dev->dev_addr,
 				 rx->sdata->vif.type,
 				 rx->local->hw.extra_tx_headroom);
@@ -2357,29 +2372,42 @@ static void __ieee80211_rx_handle_packet(struct ieee80211_hw *hw,
 	struct ieee80211_local *local = hw_to_local(hw);
 	struct ieee80211_sub_if_data *sdata;
 	struct ieee80211_hdr *hdr;
+	__le16 fc;
 	struct ieee80211_rx_data rx;
 	int prepares;
 	struct ieee80211_sub_if_data *prev = NULL;
 	struct sk_buff *skb_new;
 	struct sta_info *sta, *tmp;
 	bool found_sta = false;
+	int err = 0;
 
-	hdr = (struct ieee80211_hdr *)skb->data;
+	fc = ((struct ieee80211_hdr *)skb->data)->frame_control;
 	memset(&rx, 0, sizeof(rx));
 	rx.skb = skb;
 	rx.local = local;
 
-	if (ieee80211_is_data(hdr->frame_control) || ieee80211_is_mgmt(hdr->frame_control))
+	if (ieee80211_is_data(fc) || ieee80211_is_mgmt(fc))
 		local->dot11ReceivedFragmentCount++;
 
 	if (unlikely(test_bit(SCAN_HW_SCANNING, &local->scanning) ||
 		     test_bit(SCAN_OFF_CHANNEL, &local->scanning)))
 		rx.flags |= IEEE80211_RX_IN_SCAN;
 
+	if (ieee80211_is_mgmt(fc))
+		err = skb_linearize(skb);
+	else
+		err = !pskb_may_pull(skb, ieee80211_hdrlen(fc));
+
+	if (err) {
+		dev_kfree_skb(skb);
+		return;
+	}
+
+	hdr = (struct ieee80211_hdr *)skb->data;
 	ieee80211_parse_qos(&rx);
 	ieee80211_verify_alignment(&rx);
 
-	if (ieee80211_is_data(hdr->frame_control)) {
+	if (ieee80211_is_data(fc)) {
 		for_each_sta_info(local, hdr->addr2, sta, tmp) {
 			rx.sta = sta;
 			found_sta = true;
diff --git a/net/wireless/util.c b/net/wireless/util.c
index be2ab8c59e3a..7acb81b9675d 100644
--- a/net/wireless/util.c
+++ b/net/wireless/util.c
@@ -330,11 +330,18 @@ int ieee80211_data_to_8023(struct sk_buff *skb, const u8 *addr,
 		if (iftype == NL80211_IFTYPE_MESH_POINT) {
 			struct ieee80211s_hdr *meshdr =
 				(struct ieee80211s_hdr *) (skb->data + hdrlen);
-			hdrlen += ieee80211_get_mesh_hdrlen(meshdr);
+			/* make sure meshdr->flags is on the linear part */
+			if (!pskb_may_pull(skb, hdrlen + 1))
+				return -1;
 			if (meshdr->flags & MESH_FLAGS_AE_A5_A6) {
-				memcpy(dst, meshdr->eaddr1, ETH_ALEN);
-				memcpy(src, meshdr->eaddr2, ETH_ALEN);
+				skb_copy_bits(skb, hdrlen +
+					offsetof(struct ieee80211s_hdr, eaddr1),
+				       	dst, ETH_ALEN);
+				skb_copy_bits(skb, hdrlen +
+					offsetof(struct ieee80211s_hdr, eaddr2),
+				        src, ETH_ALEN);
 			}
+			hdrlen += ieee80211_get_mesh_hdrlen(meshdr);
 		}
 		break;
 	case cpu_to_le16(IEEE80211_FCTL_FROMDS):
@@ -346,9 +353,14 @@ int ieee80211_data_to_8023(struct sk_buff *skb, const u8 *addr,
 		if (iftype == NL80211_IFTYPE_MESH_POINT) {
 			struct ieee80211s_hdr *meshdr =
 				(struct ieee80211s_hdr *) (skb->data + hdrlen);
-			hdrlen += ieee80211_get_mesh_hdrlen(meshdr);
+			/* make sure meshdr->flags is on the linear part */
+			if (!pskb_may_pull(skb, hdrlen + 1))
+				return -1;
 			if (meshdr->flags & MESH_FLAGS_AE_A4)
-				memcpy(src, meshdr->eaddr1, ETH_ALEN);
+				skb_copy_bits(skb, hdrlen +
+					offsetof(struct ieee80211s_hdr, eaddr1),
+					src, ETH_ALEN);
+			hdrlen += ieee80211_get_mesh_hdrlen(meshdr);
 		}
 		break;
 	case cpu_to_le16(0):
@@ -357,7 +369,7 @@ int ieee80211_data_to_8023(struct sk_buff *skb, const u8 *addr,
 		break;
 	}
 
-	if (unlikely(skb->len - hdrlen < 8))
+	if (!pskb_may_pull(skb, hdrlen + 8))
 		return -1;
 
 	payload = skb->data + hdrlen;
-- 
cgit v1.2.2


From e1b3ec1a2a336c328c336cfa5485a5f0484cc90d Mon Sep 17 00:00:00 2001
From: Stanislaw Gruszka <sgruszka@redhat.com>
Date: Mon, 29 Mar 2010 12:18:34 +0200
Subject: mac80211: explicitly disable/enable QoS

Add interface to disable/enable QoS (aka WMM or WME). Currently drivers
enable it explicitly when ->conf_tx method is called, and newer disable.
Disabling is needed for some APs, which do not support QoS, such
we should send QoS frames to them.

Signed-off-by: Stanislaw Gruszka <sgruszka@redhat.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/mlme.c | 9 ++++++++-
 net/mac80211/util.c | 5 +++++
 2 files changed, 13 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index 65eafda5738a..c686d1b90f9f 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -586,6 +586,9 @@ static void ieee80211_sta_wmm_params(struct ieee80211_local *local,
 	int count;
 	u8 *pos, uapsd_queues = 0;
 
+	if (!local->ops->conf_tx)
+		return;
+
 	if (local->hw.queues < 4)
 		return;
 
@@ -660,11 +663,15 @@ static void ieee80211_sta_wmm_params(struct ieee80211_local *local,
 		       params.aifs, params.cw_min, params.cw_max, params.txop,
 		       params.uapsd);
 #endif
-		if (drv_conf_tx(local, queue, &params) && local->ops->conf_tx)
+		if (drv_conf_tx(local, queue, &params))
 			printk(KERN_DEBUG "%s: failed to set TX queue "
 			       "parameters for queue %d\n",
 			       wiphy_name(local->hw.wiphy), queue);
 	}
+
+	/* enable WMM or activate new settings */
+	local->hw.conf.flags |=	IEEE80211_CONF_QOS;
+	drv_config(local, IEEE80211_CONF_CHANGE_QOS);
 }
 
 static u32 ieee80211_handle_bss_capability(struct ieee80211_sub_if_data *sdata,
diff --git a/net/mac80211/util.c b/net/mac80211/util.c
index c453226f06b2..7b2c170af71c 100644
--- a/net/mac80211/util.c
+++ b/net/mac80211/util.c
@@ -796,6 +796,11 @@ void ieee80211_set_wmm_default(struct ieee80211_sub_if_data *sdata)
 
 		drv_conf_tx(local, queue, &qparam);
 	}
+
+	/* after reinitialize QoS TX queues setting to default,
+	 * disable QoS at all */
+	local->hw.conf.flags &=	~IEEE80211_CONF_QOS;
+	drv_config(local, IEEE80211_CONF_CHANGE_QOS);
 }
 
 void ieee80211_sta_def_wmm_params(struct ieee80211_sub_if_data *sdata,
-- 
cgit v1.2.2


From 0af26b278bc1d747370b451595b7586cb7b3455c Mon Sep 17 00:00:00 2001
From: Stanislaw Gruszka <sgruszka@redhat.com>
Date: Mon, 29 Mar 2010 12:18:36 +0200
Subject: mac80211: enable QoS explicitly in AP mode

Enable QoS explicitly, when user space AP program will setup a QoS
queues. Currently this is not needed as iwlwifi not work in AP mode
and no other driver implement enable/disable QoS.

Signed-off-by: Stanislaw Gruszka <sgruszka@redhat.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/cfg.c | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'net')

diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c
index c8f520529eec..a4ca425e4f3f 100644
--- a/net/mac80211/cfg.c
+++ b/net/mac80211/cfg.c
@@ -1136,6 +1136,10 @@ static int ieee80211_set_txq_params(struct wiphy *wiphy,
 		return -EINVAL;
 	}
 
+	/* enable WMM or activate new settings */
+	local->hw.conf.flags |= IEEE80211_CONF_QOS;
+	drv_config(local, IEEE80211_CONF_CHANGE_QOS);
+
 	return 0;
 }
 
-- 
cgit v1.2.2


From 17e4ec147f4939ca8c81b41b4261ec7974531381 Mon Sep 17 00:00:00 2001
From: Jouni Malinen <j@w1.fi>
Date: Mon, 29 Mar 2010 23:28:30 -0700
Subject: mac80211: Track Beacon signal strength and implement cqm events

Calculate a running average of the signal strength reported for Beacon
frames and indicate cqm events if the average value moves below or
above the configured threshold value (and filter out repetitive events
with by using the configured hysteresis).

Signed-off-by: Jouni Malinen <j@w1.fi>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/cfg.c            |  9 ++++++---
 net/mac80211/debugfs_netdev.c | 12 ++++++++++++
 net/mac80211/ieee80211_i.h    | 19 ++++++++++++++++++
 net/mac80211/mlme.c           | 45 +++++++++++++++++++++++++++++++++++++++++++
 4 files changed, 82 insertions(+), 3 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c
index a4ca425e4f3f..4edd73cbf052 100644
--- a/net/mac80211/cfg.c
+++ b/net/mac80211/cfg.c
@@ -1415,9 +1415,6 @@ static int ieee80211_set_cqm_rssi_config(struct wiphy *wiphy,
 	struct ieee80211_vif *vif = &sdata->vif;
 	struct ieee80211_bss_conf *bss_conf = &vif->bss_conf;
 
-	if (!(local->hw.flags & IEEE80211_HW_SUPPORTS_CQM_RSSI))
-		return -EOPNOTSUPP;
-
 	if (rssi_thold == bss_conf->cqm_rssi_thold &&
 	    rssi_hyst == bss_conf->cqm_rssi_hyst)
 		return 0;
@@ -1425,6 +1422,12 @@ static int ieee80211_set_cqm_rssi_config(struct wiphy *wiphy,
 	bss_conf->cqm_rssi_thold = rssi_thold;
 	bss_conf->cqm_rssi_hyst = rssi_hyst;
 
+	if (!(local->hw.flags & IEEE80211_HW_SUPPORTS_CQM_RSSI)) {
+		if (sdata->vif.type != NL80211_IFTYPE_STATION)
+			return -EOPNOTSUPP;
+		return 0;
+	}
+
 	/* tell the driver upon association, unless already associated */
 	if (sdata->u.mgd.associated)
 		ieee80211_bss_info_change_notify(sdata, BSS_CHANGED_CQM);
diff --git a/net/mac80211/debugfs_netdev.c b/net/mac80211/debugfs_netdev.c
index 9affe2cd185f..ee61a9f6fabc 100644
--- a/net/mac80211/debugfs_netdev.c
+++ b/net/mac80211/debugfs_netdev.c
@@ -95,6 +95,14 @@ static ssize_t ieee80211_if_fmt_##name(					\
 	return scnprintf(buf, buflen, "%pM\n", sdata->field);		\
 }
 
+#define IEEE80211_IF_FMT_DEC_DIV_16(name, field)			\
+static ssize_t ieee80211_if_fmt_##name(					\
+	const struct ieee80211_sub_if_data *sdata,			\
+	char *buf, int buflen)						\
+{									\
+	return scnprintf(buf, buflen, "%d\n", sdata->field / 16);	\
+}
+
 #define __IEEE80211_IF_FILE(name, _write)				\
 static ssize_t ieee80211_if_read_##name(struct file *file,		\
 					char __user *userbuf,		\
@@ -135,6 +143,8 @@ IEEE80211_IF_FILE(rc_rateidx_mask_5ghz, rc_rateidx_mask[IEEE80211_BAND_5GHZ],
 /* STA attributes */
 IEEE80211_IF_FILE(bssid, u.mgd.bssid, MAC);
 IEEE80211_IF_FILE(aid, u.mgd.aid, DEC);
+IEEE80211_IF_FILE(last_beacon, u.mgd.last_beacon_signal, DEC);
+IEEE80211_IF_FILE(ave_beacon, u.mgd.ave_beacon_signal, DEC_DIV_16);
 
 static int ieee80211_set_smps(struct ieee80211_sub_if_data *sdata,
 			      enum ieee80211_smps_mode smps_mode)
@@ -271,6 +281,8 @@ static void add_sta_files(struct ieee80211_sub_if_data *sdata)
 
 	DEBUGFS_ADD(bssid);
 	DEBUGFS_ADD(aid);
+	DEBUGFS_ADD(last_beacon);
+	DEBUGFS_ADD(ave_beacon);
 	DEBUGFS_ADD_MODE(smps, 0600);
 }
 
diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index ab369e2a5282..741fb8bbc4a0 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -317,6 +317,7 @@ enum ieee80211_sta_flags {
 	IEEE80211_STA_MFP_ENABLED	= BIT(6),
 	IEEE80211_STA_UAPSD_ENABLED	= BIT(7),
 	IEEE80211_STA_NULLFUNC_ACKED	= BIT(8),
+	IEEE80211_STA_RESET_SIGNAL_AVE	= BIT(9),
 };
 
 struct ieee80211_if_managed {
@@ -359,6 +360,24 @@ struct ieee80211_if_managed {
 	int wmm_last_param_set;
 
 	u8 use_4addr;
+
+	/* Signal strength from the last Beacon frame in the current BSS. */
+	int last_beacon_signal;
+
+	/*
+	 * Weighted average of the signal strength from Beacon frames in the
+	 * current BSS. This is in units of 1/16 of the signal unit to maintain
+	 * accuracy and to speed up calculations, i.e., the value need to be
+	 * divided by 16 to get the actual value.
+	 */
+	int ave_beacon_signal;
+
+	/*
+	 * Last Beacon frame signal strength average (ave_beacon_signal / 16)
+	 * that triggered a cqm event. 0 indicates that no event has been
+	 * generated for the current association.
+	 */
+	int last_cqm_event_signal;
 };
 
 enum ieee80211_ibss_request {
diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index c686d1b90f9f..de7519eb2b5d 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -46,6 +46,13 @@
  */
 #define IEEE80211_PROBE_WAIT		(HZ / 2)
 
+/*
+ * Weight given to the latest Beacon frame when calculating average signal
+ * strength for Beacon frames received in the current BSS. This must be
+ * between 1 and 15.
+ */
+#define IEEE80211_SIGNAL_AVE_WEIGHT	3
+
 #define TMR_RUNNING_TIMER	0
 #define TMR_RUNNING_CHANSW	1
 
@@ -732,6 +739,8 @@ static void ieee80211_set_associated(struct ieee80211_sub_if_data *sdata,
 	sdata->u.mgd.associated = cbss;
 	memcpy(sdata->u.mgd.bssid, cbss->bssid, ETH_ALEN);
 
+	sdata->u.mgd.flags |= IEEE80211_STA_RESET_SIGNAL_AVE;
+
 	/* just to be sure */
 	sdata->u.mgd.flags &= ~(IEEE80211_STA_CONNECTION_POLL |
 				IEEE80211_STA_BEACON_POLL);
@@ -1347,6 +1356,7 @@ static void ieee80211_rx_mgmt_beacon(struct ieee80211_sub_if_data *sdata,
 				     struct ieee80211_rx_status *rx_status)
 {
 	struct ieee80211_if_managed *ifmgd = &sdata->u.mgd;
+	struct ieee80211_bss_conf *bss_conf = &sdata->vif.bss_conf;
 	size_t baselen;
 	struct ieee802_11_elems elems;
 	struct ieee80211_local *local = sdata->local;
@@ -1382,6 +1392,41 @@ static void ieee80211_rx_mgmt_beacon(struct ieee80211_sub_if_data *sdata,
 	if (memcmp(bssid, mgmt->bssid, ETH_ALEN) != 0)
 		return;
 
+	/* Track average RSSI from the Beacon frames of the current AP */
+	ifmgd->last_beacon_signal = rx_status->signal;
+	if (ifmgd->flags & IEEE80211_STA_RESET_SIGNAL_AVE) {
+		ifmgd->flags &= ~IEEE80211_STA_RESET_SIGNAL_AVE;
+		ifmgd->ave_beacon_signal = rx_status->signal;
+		ifmgd->last_cqm_event_signal = 0;
+	} else {
+		ifmgd->ave_beacon_signal =
+			(IEEE80211_SIGNAL_AVE_WEIGHT * rx_status->signal * 16 +
+			 (16 - IEEE80211_SIGNAL_AVE_WEIGHT) *
+			 ifmgd->ave_beacon_signal) / 16;
+	}
+	if (bss_conf->cqm_rssi_thold &&
+	    !(local->hw.flags & IEEE80211_HW_SUPPORTS_CQM_RSSI)) {
+		int sig = ifmgd->ave_beacon_signal / 16;
+		int last_event = ifmgd->last_cqm_event_signal;
+		int thold = bss_conf->cqm_rssi_thold;
+		int hyst = bss_conf->cqm_rssi_hyst;
+		if (sig < thold &&
+		    (last_event == 0 || sig < last_event - hyst)) {
+			ifmgd->last_cqm_event_signal = sig;
+			ieee80211_cqm_rssi_notify(
+				&sdata->vif,
+				NL80211_CQM_RSSI_THRESHOLD_EVENT_LOW,
+				GFP_KERNEL);
+		} else if (sig > thold &&
+			   (last_event == 0 || sig > last_event + hyst)) {
+			ifmgd->last_cqm_event_signal = sig;
+			ieee80211_cqm_rssi_notify(
+				&sdata->vif,
+				NL80211_CQM_RSSI_THRESHOLD_EVENT_HIGH,
+				GFP_KERNEL);
+		}
+	}
+
 	if (ifmgd->flags & IEEE80211_STA_BEACON_POLL) {
 #ifdef CONFIG_MAC80211_VERBOSE_DEBUG
 		if (net_ratelimit()) {
-- 
cgit v1.2.2


From e69e95dbecfb73f76765cdd16dadc6219a9068e3 Mon Sep 17 00:00:00 2001
From: Jouni Malinen <j@w1.fi>
Date: Mon, 29 Mar 2010 23:29:31 -0700
Subject: mac80211: Send deauth/disassoc prior to dropping STA entry

When management frame protection (IEEE 802.11w) is used, the
deauthentication and disassociation frames must be protected whenever
the encryption keys are configured. We were removing the STA entry and
with it, the keys, just before actually sending out these frames which
meant that the frames went out unprotected. The AP will drop them in
such a case. Fix this by reordering the operations a bit so that
sta_info_destroy_addr() gets called only after
ieee80211_send_deauth_disassoc().

Signed-off-by: Jouni Malinen <j@w1.fi>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/mlme.c | 23 ++++++++++++++---------
 1 file changed, 14 insertions(+), 9 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index de7519eb2b5d..57a3c62139e2 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -782,7 +782,8 @@ static void ieee80211_set_associated(struct ieee80211_sub_if_data *sdata,
 	netif_carrier_on(sdata->dev);
 }
 
-static void ieee80211_set_disassoc(struct ieee80211_sub_if_data *sdata)
+static void ieee80211_set_disassoc(struct ieee80211_sub_if_data *sdata,
+				   bool remove_sta)
 {
 	struct ieee80211_if_managed *ifmgd = &sdata->u.mgd;
 	struct ieee80211_local *local = sdata->local;
@@ -855,7 +856,8 @@ static void ieee80211_set_disassoc(struct ieee80211_sub_if_data *sdata)
 	changed |= BSS_CHANGED_BSSID;
 	ieee80211_bss_info_change_notify(sdata, changed);
 
-	sta_info_destroy_addr(sdata, bssid);
+	if (remove_sta)
+		sta_info_destroy_addr(sdata, bssid);
 }
 
 void ieee80211_sta_rx_notify(struct ieee80211_sub_if_data *sdata,
@@ -968,7 +970,7 @@ static void __ieee80211_connection_loss(struct ieee80211_sub_if_data *sdata)
 
 	printk(KERN_DEBUG "Connection to AP %pM lost.\n", bssid);
 
-	ieee80211_set_disassoc(sdata);
+	ieee80211_set_disassoc(sdata, true);
 	ieee80211_recalc_idle(local);
 	mutex_unlock(&ifmgd->mtx);
 	/*
@@ -1034,7 +1036,7 @@ ieee80211_rx_mgmt_deauth(struct ieee80211_sub_if_data *sdata,
 	printk(KERN_DEBUG "%s: deauthenticated from %pM (Reason: %u)\n",
 			sdata->name, bssid, reason_code);
 
-	ieee80211_set_disassoc(sdata);
+	ieee80211_set_disassoc(sdata, true);
 	ieee80211_recalc_idle(sdata->local);
 
 	return RX_MGMT_CFG80211_DEAUTH;
@@ -1064,7 +1066,7 @@ ieee80211_rx_mgmt_disassoc(struct ieee80211_sub_if_data *sdata,
 	printk(KERN_DEBUG "%s: disassociated from %pM (Reason: %u)\n",
 			sdata->name, mgmt->sa, reason_code);
 
-	ieee80211_set_disassoc(sdata);
+	ieee80211_set_disassoc(sdata, true);
 	ieee80211_recalc_idle(sdata->local);
 	return RX_MGMT_CFG80211_DISASSOC;
 }
@@ -1712,7 +1714,7 @@ static void ieee80211_sta_work(struct work_struct *work)
 			printk(KERN_DEBUG "No probe response from AP %pM"
 				" after %dms, disconnecting.\n",
 				bssid, (1000 * IEEE80211_PROBE_WAIT)/HZ);
-			ieee80211_set_disassoc(sdata);
+			ieee80211_set_disassoc(sdata, true);
 			ieee80211_recalc_idle(local);
 			mutex_unlock(&ifmgd->mtx);
 			/*
@@ -2014,7 +2016,7 @@ int ieee80211_mgd_assoc(struct ieee80211_sub_if_data *sdata,
 		}
 
 		/* Trying to reassociate - clear previous association state */
-		ieee80211_set_disassoc(sdata);
+		ieee80211_set_disassoc(sdata, true);
 	}
 	mutex_unlock(&ifmgd->mtx);
 
@@ -2118,7 +2120,7 @@ int ieee80211_mgd_deauth(struct ieee80211_sub_if_data *sdata,
 
 	if (ifmgd->associated == req->bss) {
 		bssid = req->bss->bssid;
-		ieee80211_set_disassoc(sdata);
+		ieee80211_set_disassoc(sdata, true);
 		mutex_unlock(&ifmgd->mtx);
 	} else {
 		bool not_auth_yet = false;
@@ -2175,6 +2177,7 @@ int ieee80211_mgd_disassoc(struct ieee80211_sub_if_data *sdata,
 			   void *cookie)
 {
 	struct ieee80211_if_managed *ifmgd = &sdata->u.mgd;
+	u8 bssid[ETH_ALEN];
 
 	mutex_lock(&ifmgd->mtx);
 
@@ -2192,13 +2195,15 @@ int ieee80211_mgd_disassoc(struct ieee80211_sub_if_data *sdata,
 	printk(KERN_DEBUG "%s: disassociating from %pM by local choice (reason=%d)\n",
 	       sdata->name, req->bss->bssid, req->reason_code);
 
-	ieee80211_set_disassoc(sdata);
+	memcpy(bssid, req->bss->bssid, ETH_ALEN);
+	ieee80211_set_disassoc(sdata, false);
 
 	mutex_unlock(&ifmgd->mtx);
 
 	ieee80211_send_deauth_disassoc(sdata, req->bss->bssid,
 			IEEE80211_STYPE_DISASSOC, req->reason_code,
 			cookie);
+	sta_info_destroy_addr(sdata, bssid);
 
 	ieee80211_recalc_idle(sdata->local);
 
-- 
cgit v1.2.2


From ecbcd3243651ae8ac2b73a96c320992a4cf01c5b Mon Sep 17 00:00:00 2001
From: Jouni Malinen <j@w1.fi>
Date: Mon, 29 Mar 2010 23:35:23 -0700
Subject: mac80211: Fix BIP to be used only with group-addressed frames

BIP (part of IEEE 802.11w) is only supposed to be used with
group-addressed frames. We ended up picking it as a default mechanism
for every management whenever we did not have a STA entry for the
destination (e.g., for Probe Response to a STA that is not
associated). While the extra MMIE in the end of management frames
should not break frames completed in most cases, there is no point in
doing this. Fix key selection to pick the default management key only
if the frame is sent to multicast/broadcast address and the frame is a
robust management frame.

Signed-off-by: Jouni Malinen <j@w1.fi>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/tx.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'net')

diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
index 08e1f17a4226..350096afe79a 100644
--- a/net/mac80211/tx.c
+++ b/net/mac80211/tx.c
@@ -513,6 +513,8 @@ ieee80211_tx_h_select_key(struct ieee80211_tx_data *tx)
 	else if (tx->sta && (key = rcu_dereference(tx->sta->key)))
 		tx->key = key;
 	else if (ieee80211_is_mgmt(hdr->frame_control) &&
+		 is_multicast_ether_addr(hdr->addr1) &&
+		 ieee80211_is_robust_mgmt_frame(hdr) &&
 		 (key = rcu_dereference(tx->sdata->default_mgmt_key)))
 		tx->key = key;
 	else if ((key = rcu_dereference(tx->sdata->default_key)))
-- 
cgit v1.2.2


From fa83a2189870cdcd6fb4deeed391e0b988dc9a19 Mon Sep 17 00:00:00 2001
From: Jouni Malinen <j@w1.fi>
Date: Mon, 29 Mar 2010 23:36:15 -0700
Subject: mac80211: Fix dropping of unprotected robust multicast frames

When selecting the RX key for group-addressed robust management
frames, we do not actually select any BIP key if the frame is
unprotected (since we cannot find the key index from MMIE). This
results in the drop_unencrypted check in failing to drop the frame. It
is enough to verify that we have a STA entry for the transmitter and
that MFP is enabled for that STA; we do not need to check rx->key
here. This fixes BIP processing for unprotected, group-addressed,
robust management frames.

Signed-off-by: Jouni Malinen <j@w1.fi>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/rx.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c
index 11ed5aa90f83..ea71e1abc4c8 100644
--- a/net/mac80211/rx.c
+++ b/net/mac80211/rx.c
@@ -1421,8 +1421,7 @@ ieee80211_drop_unencrypted_mgmt(struct ieee80211_rx_data *rx)
 			return -EACCES;
 		/* BIP does not use Protected field, so need to check MMIE */
 		if (unlikely(ieee80211_is_multicast_robust_mgmt_frame(rx->skb) &&
-			     ieee80211_get_mmie_keyidx(rx->skb) < 0 &&
-			     rx->key))
+			     ieee80211_get_mmie_keyidx(rx->skb) < 0))
 			return -EACCES;
 		/*
 		 * When using MFP, Action frames are not allowed prior to
-- 
cgit v1.2.2


From d5d9de024c157a3dfbab191241c5c51e4d4c069a Mon Sep 17 00:00:00 2001
From: Marco Porsch <marco.porsch@siemens.com>
Date: Tue, 30 Mar 2010 10:00:16 +0200
Subject: nl80211: reenable station del for mesh

iw dev <devname> station del <MAC address> is quiet useful in mesh mode and should be possible.

Signed-off-by: Marco Porsch <marco.porsch@siemens.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/wireless/nl80211.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index a7fc3d83f5f6..95149f303409 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -2096,7 +2096,8 @@ static int nl80211_del_station(struct sk_buff *skb, struct genl_info *info)
 		goto out_rtnl;
 
 	if (dev->ieee80211_ptr->iftype != NL80211_IFTYPE_AP &&
-	    dev->ieee80211_ptr->iftype != NL80211_IFTYPE_AP_VLAN) {
+	    dev->ieee80211_ptr->iftype != NL80211_IFTYPE_AP_VLAN &&
+	    dev->ieee80211_ptr->iftype != NL80211_IFTYPE_MESH_POINT) {
 		err = -EINVAL;
 		goto out;
 	}
-- 
cgit v1.2.2


From e3efca0a63b4ac4d8849d37d082a95cf1a75162d Mon Sep 17 00:00:00 2001
From: Jouni Malinen <j@w1.fi>
Date: Sun, 28 Mar 2010 22:31:15 -0700
Subject: mac80211: Fix drop_unencrypted for MFP with hwaccel

Commit bef5d1c70d132145c0fc75b3586a19841a9a82e4 split
ieee80211_drop_unencrypted() into separate functions that are used for
Data and Management frames. However, it did not handle the
RX_FLAG_DECRYPTED correctly for Management frames:
ieee80211_drop_unencrypted() can only return 0 for Management frames,
so there is no point in calling it here. Instead, just check the
status->flag directly.

Signed-off-by: Jouni Malinen <j@w1.fi>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/rx.c | 11 +++++++----
 1 file changed, 7 insertions(+), 4 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c
index ea71e1abc4c8..14366d4afbed 100644
--- a/net/mac80211/rx.c
+++ b/net/mac80211/rx.c
@@ -1408,12 +1408,15 @@ static int
 ieee80211_drop_unencrypted_mgmt(struct ieee80211_rx_data *rx)
 {
 	struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)rx->skb->data;
+	struct ieee80211_rx_status *status = IEEE80211_SKB_RXCB(rx->skb);
 	__le16 fc = hdr->frame_control;
-	int res;
 
-	res = ieee80211_drop_unencrypted(rx, fc);
-	if (unlikely(res))
-		return res;
+	/*
+	 * Pass through unencrypted frames if the hardware has
+	 * decrypted them already.
+	 */
+	if (status->flag & RX_FLAG_DECRYPTED)
+		return 0;
 
 	if (rx->sta && test_sta_flags(rx->sta, WLAN_STA_MFP)) {
 		if (unlikely(ieee80211_is_unicast_robust_mgmt_frame(rx->skb) &&
-- 
cgit v1.2.2


From 6c57990696a16ae43ea9fddb131b2784292068ba Mon Sep 17 00:00:00 2001
From: Stephen Rothwell <sfr@canb.auug.org.au>
Date: Thu, 1 Apr 2010 00:28:49 -0700
Subject: net-caif: using kmalloc/kfree requires the include of slab.h

Signed-off-by: Stephen Rothwell <sfr@canb.auug.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/caif/cfcnfg.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'net')

diff --git a/net/caif/cfcnfg.c b/net/caif/cfcnfg.c
index 70a733d3d3da..c873e3d4387c 100644
--- a/net/caif/cfcnfg.c
+++ b/net/caif/cfcnfg.c
@@ -5,6 +5,7 @@
  */
 #include <linux/kernel.h>
 #include <linux/stddef.h>
+#include <linux/slab.h>
 #include <net/caif/caif_layer.h>
 #include <net/caif/cfpkt.h>
 #include <net/caif/cfcnfg.h>
-- 
cgit v1.2.2


From d26e6a02835affa8bafe09a51e37f9fbc339e415 Mon Sep 17 00:00:00 2001
From: Jiri Pirko <jpirko@redhat.com>
Date: Thu, 1 Apr 2010 12:39:19 +0200
Subject: netfilter: ctnetlink: compute message size properly

Message size should be dependent on the presence of an accounting
extension, not on CONFIG_NF_CT_ACCT definition.

Signed-off-by: Jiri Pirko <jpirko@redhat.com>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 net/netfilter/nf_conntrack_netlink.c | 17 ++++++++++++-----
 1 file changed, 12 insertions(+), 5 deletions(-)

(limited to 'net')

diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index 2b2af631d2b8..9a0c0d99dbfd 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -425,6 +425,17 @@ ctnetlink_proto_size(const struct nf_conn *ct)
 	return len;
 }
 
+static inline size_t
+ctnetlink_counters_size(const struct nf_conn *ct)
+{
+	if (!nf_ct_ext_exist(ct, NF_CT_EXT_ACCT))
+		return 0;
+	return 2 * nla_total_size(0) /* CTA_COUNTERS_ORIG|REPL */
+	       + 2 * nla_total_size(sizeof(uint64_t)) /* CTA_COUNTERS_PACKETS */
+	       + 2 * nla_total_size(sizeof(uint64_t)) /* CTA_COUNTERS_BYTES */
+	       ;
+}
+
 static inline size_t
 ctnetlink_nlmsg_size(const struct nf_conn *ct)
 {
@@ -435,11 +446,7 @@ ctnetlink_nlmsg_size(const struct nf_conn *ct)
 	       + 3 * nla_total_size(sizeof(u_int8_t)) /* CTA_PROTO_NUM */
 	       + nla_total_size(sizeof(u_int32_t)) /* CTA_ID */
 	       + nla_total_size(sizeof(u_int32_t)) /* CTA_STATUS */
-#ifdef CONFIG_NF_CT_ACCT
-	       + 2 * nla_total_size(0) /* CTA_COUNTERS_ORIG|REPL */
-	       + 2 * nla_total_size(sizeof(uint64_t)) /* CTA_COUNTERS_PACKETS */
-	       + 2 * nla_total_size(sizeof(uint64_t)) /* CTA_COUNTERS_BYTES */
-#endif
+	       + ctnetlink_counters_size(ct)
 	       + nla_total_size(sizeof(u_int32_t)) /* CTA_TIMEOUT */
 	       + nla_total_size(0) /* CTA_PROTOINFO */
 	       + nla_total_size(0) /* CTA_HELP */
-- 
cgit v1.2.2


From 902a3dd5e6b19048604ec533203d7d38a39505a2 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Thu, 1 Apr 2010 12:54:09 +0200
Subject: netfilter: CLUSTERIP: clusterip_seq_stop() fix

If clusterip_seq_start() memory allocation fails, we crash later in
clusterip_seq_start(), trying to kfree(ERR_PTR(-ENOMEM))

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 net/ipv4/netfilter/ipt_CLUSTERIP.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/ipv4/netfilter/ipt_CLUSTERIP.c b/net/ipv4/netfilter/ipt_CLUSTERIP.c
index 5d70c43302bb..c6be74e57264 100644
--- a/net/ipv4/netfilter/ipt_CLUSTERIP.c
+++ b/net/ipv4/netfilter/ipt_CLUSTERIP.c
@@ -599,7 +599,8 @@ static void *clusterip_seq_next(struct seq_file *s, void *v, loff_t *pos)
 
 static void clusterip_seq_stop(struct seq_file *s, void *v)
 {
-	kfree(v);
+	if (!IS_ERR(v))
+		kfree(v);
 }
 
 static int clusterip_seq_show(struct seq_file *s, void *v)
-- 
cgit v1.2.2


From 02e4eb75912a5c8babccc1acdc9cc913989be04e Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Thu, 1 Apr 2010 14:35:56 +0200
Subject: netfilter: xt_hashlimit: RCU conversion

xt_hashlimit uses a central lock per hash table and suffers from
contention on some workloads. (Multiqueue NIC or if RPS is enabled)

After RCU conversion, central lock is only used when a writer wants to
add or delete an entry.

For 'readers', updating an existing entry, they use an individual lock
per entry.

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 net/netfilter/xt_hashlimit.c | 70 +++++++++++++++++++++++++++++---------------
 1 file changed, 47 insertions(+), 23 deletions(-)

(limited to 'net')

diff --git a/net/netfilter/xt_hashlimit.c b/net/netfilter/xt_hashlimit.c
index 5470bb097c48..453178d25cba 100644
--- a/net/netfilter/xt_hashlimit.c
+++ b/net/netfilter/xt_hashlimit.c
@@ -81,12 +81,14 @@ struct dsthash_ent {
 	struct dsthash_dst dst;
 
 	/* modified structure members in the end */
+	spinlock_t lock;
 	unsigned long expires;		/* precalculated expiry time */
 	struct {
 		unsigned long prev;	/* last modification */
 		u_int32_t credit;
 		u_int32_t credit_cap, cost;
 	} rateinfo;
+	struct rcu_head rcu;
 };
 
 struct xt_hashlimit_htable {
@@ -143,9 +145,11 @@ dsthash_find(const struct xt_hashlimit_htable *ht,
 	u_int32_t hash = hash_dst(ht, dst);
 
 	if (!hlist_empty(&ht->hash[hash])) {
-		hlist_for_each_entry(ent, pos, &ht->hash[hash], node)
-			if (dst_cmp(ent, dst))
+		hlist_for_each_entry_rcu(ent, pos, &ht->hash[hash], node)
+			if (dst_cmp(ent, dst)) {
+				spin_lock(&ent->lock);
 				return ent;
+			}
 	}
 	return NULL;
 }
@@ -157,9 +161,10 @@ dsthash_alloc_init(struct xt_hashlimit_htable *ht,
 {
 	struct dsthash_ent *ent;
 
+	spin_lock(&ht->lock);
 	/* initialize hash with random val at the time we allocate
 	 * the first hashtable entry */
-	if (!ht->rnd_initialized) {
+	if (unlikely(!ht->rnd_initialized)) {
 		get_random_bytes(&ht->rnd, sizeof(ht->rnd));
 		ht->rnd_initialized = true;
 	}
@@ -168,27 +173,36 @@ dsthash_alloc_init(struct xt_hashlimit_htable *ht,
 		/* FIXME: do something. question is what.. */
 		if (net_ratelimit())
 			pr_err("max count of %u reached\n", ht->cfg.max);
-		return NULL;
-	}
-
-	ent = kmem_cache_alloc(hashlimit_cachep, GFP_ATOMIC);
+		ent = NULL;
+	} else
+		ent = kmem_cache_alloc(hashlimit_cachep, GFP_ATOMIC);
 	if (!ent) {
 		if (net_ratelimit())
 			pr_err("cannot allocate dsthash_ent\n");
-		return NULL;
-	}
-	memcpy(&ent->dst, dst, sizeof(ent->dst));
+	} else {
+		memcpy(&ent->dst, dst, sizeof(ent->dst));
+		spin_lock_init(&ent->lock);
 
-	hlist_add_head(&ent->node, &ht->hash[hash_dst(ht, dst)]);
-	ht->count++;
+		spin_lock(&ent->lock);
+		hlist_add_head_rcu(&ent->node, &ht->hash[hash_dst(ht, dst)]);
+		ht->count++;
+	}
+	spin_unlock(&ht->lock);
 	return ent;
 }
 
+static void dsthash_free_rcu(struct rcu_head *head)
+{
+	struct dsthash_ent *ent = container_of(head, struct dsthash_ent, rcu);
+
+	kmem_cache_free(hashlimit_cachep, ent);
+}
+
 static inline void
 dsthash_free(struct xt_hashlimit_htable *ht, struct dsthash_ent *ent)
 {
-	hlist_del(&ent->node);
-	kmem_cache_free(hashlimit_cachep, ent);
+	hlist_del_rcu(&ent->node);
+	call_rcu_bh(&ent->rcu, dsthash_free_rcu);
 	ht->count--;
 }
 static void htable_gc(unsigned long htlong);
@@ -512,15 +526,14 @@ hashlimit_mt(const struct sk_buff *skb, const struct xt_match_param *par)
 	if (hashlimit_init_dst(hinfo, &dst, skb, par->thoff) < 0)
 		goto hotdrop;
 
-	spin_lock_bh(&hinfo->lock);
+	rcu_read_lock_bh();
 	dh = dsthash_find(hinfo, &dst);
 	if (dh == NULL) {
 		dh = dsthash_alloc_init(hinfo, &dst);
 		if (dh == NULL) {
-			spin_unlock_bh(&hinfo->lock);
+			rcu_read_unlock_bh();
 			goto hotdrop;
 		}
-
 		dh->expires = jiffies + msecs_to_jiffies(hinfo->cfg.expire);
 		dh->rateinfo.prev = jiffies;
 		dh->rateinfo.credit = user2credits(hinfo->cfg.avg *
@@ -537,11 +550,13 @@ hashlimit_mt(const struct sk_buff *skb, const struct xt_match_param *par)
 	if (dh->rateinfo.credit >= dh->rateinfo.cost) {
 		/* below the limit */
 		dh->rateinfo.credit -= dh->rateinfo.cost;
-		spin_unlock_bh(&hinfo->lock);
+		spin_unlock(&dh->lock);
+		rcu_read_unlock_bh();
 		return !(info->cfg.mode & XT_HASHLIMIT_INVERT);
 	}
 
-	spin_unlock_bh(&hinfo->lock);
+	spin_unlock(&dh->lock);
+	rcu_read_unlock_bh();
 	/* default match is underlimit - so over the limit, we need to invert */
 	return info->cfg.mode & XT_HASHLIMIT_INVERT;
 
@@ -666,12 +681,15 @@ static void dl_seq_stop(struct seq_file *s, void *v)
 static int dl_seq_real_show(struct dsthash_ent *ent, u_int8_t family,
 				   struct seq_file *s)
 {
+	int res;
+
+	spin_lock(&ent->lock);
 	/* recalculate to show accurate numbers */
 	rateinfo_recalc(ent, jiffies);
 
 	switch (family) {
 	case NFPROTO_IPV4:
-		return seq_printf(s, "%ld %pI4:%u->%pI4:%u %u %u %u\n",
+		res = seq_printf(s, "%ld %pI4:%u->%pI4:%u %u %u %u\n",
 				 (long)(ent->expires - jiffies)/HZ,
 				 &ent->dst.ip.src,
 				 ntohs(ent->dst.src_port),
@@ -679,9 +697,10 @@ static int dl_seq_real_show(struct dsthash_ent *ent, u_int8_t family,
 				 ntohs(ent->dst.dst_port),
 				 ent->rateinfo.credit, ent->rateinfo.credit_cap,
 				 ent->rateinfo.cost);
+		break;
 #if defined(CONFIG_IP6_NF_IPTABLES) || defined(CONFIG_IP6_NF_IPTABLES_MODULE)
 	case NFPROTO_IPV6:
-		return seq_printf(s, "%ld %pI6:%u->%pI6:%u %u %u %u\n",
+		res = seq_printf(s, "%ld %pI6:%u->%pI6:%u %u %u %u\n",
 				 (long)(ent->expires - jiffies)/HZ,
 				 &ent->dst.ip6.src,
 				 ntohs(ent->dst.src_port),
@@ -689,11 +708,14 @@ static int dl_seq_real_show(struct dsthash_ent *ent, u_int8_t family,
 				 ntohs(ent->dst.dst_port),
 				 ent->rateinfo.credit, ent->rateinfo.credit_cap,
 				 ent->rateinfo.cost);
+		break;
 #endif
 	default:
 		BUG();
-		return 0;
+		res = 0;
 	}
+	spin_unlock(&ent->lock);
+	return res;
 }
 
 static int dl_seq_show(struct seq_file *s, void *v)
@@ -817,9 +839,11 @@ err1:
 
 static void __exit hashlimit_mt_exit(void)
 {
-	kmem_cache_destroy(hashlimit_cachep);
 	xt_unregister_matches(hashlimit_mt_reg, ARRAY_SIZE(hashlimit_mt_reg));
 	unregister_pernet_subsys(&hashlimit_net_ops);
+
+	rcu_barrier_bh();
+	kmem_cache_destroy(hashlimit_cachep);
 }
 
 module_init(hashlimit_mt_init);
-- 
cgit v1.2.2


From 6503d96168f891ffa3b70ae6c9698a1a722025a0 Mon Sep 17 00:00:00 2001
From: Changli Gao <xiaosuo@gmail.com>
Date: Wed, 31 Mar 2010 22:58:26 +0000
Subject: net: check the length of the socket address passed to connect(2)

check the length of the socket address passed to connect(2).

Check the length of the socket address passed to connect(2). If the
length is invalid, -EINVAL will be returned.

Signed-off-by: Changli Gao <xiaosuo@gmail.com>
----
net/bluetooth/l2cap.c | 3 ++-
net/bluetooth/rfcomm/sock.c | 3 ++-
net/bluetooth/sco.c | 3 ++-
net/can/bcm.c | 3 +++
net/ieee802154/af_ieee802154.c | 3 +++
net/ipv4/af_inet.c | 5 +++++
net/netlink/af_netlink.c | 3 +++
7 files changed, 20 insertions(+), 3 deletions(-)
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/bluetooth/l2cap.c          | 3 ++-
 net/bluetooth/rfcomm/sock.c    | 3 ++-
 net/bluetooth/sco.c            | 3 ++-
 net/can/bcm.c                  | 3 +++
 net/ieee802154/af_ieee802154.c | 3 +++
 net/ipv4/af_inet.c             | 5 +++++
 net/netlink/af_netlink.c       | 3 +++
 7 files changed, 20 insertions(+), 3 deletions(-)

(limited to 'net')

diff --git a/net/bluetooth/l2cap.c b/net/bluetooth/l2cap.c
index 7794a2e2adce..99d68c34e4f1 100644
--- a/net/bluetooth/l2cap.c
+++ b/net/bluetooth/l2cap.c
@@ -1002,7 +1002,8 @@ static int l2cap_sock_connect(struct socket *sock, struct sockaddr *addr, int al
 
 	BT_DBG("sk %p", sk);
 
-	if (!addr || addr->sa_family != AF_BLUETOOTH)
+	if (!addr || alen < sizeof(addr->sa_family) ||
+	    addr->sa_family != AF_BLUETOOTH)
 		return -EINVAL;
 
 	memset(&la, 0, sizeof(la));
diff --git a/net/bluetooth/rfcomm/sock.c b/net/bluetooth/rfcomm/sock.c
index 7f439765403d..8ed3c37684fa 100644
--- a/net/bluetooth/rfcomm/sock.c
+++ b/net/bluetooth/rfcomm/sock.c
@@ -397,7 +397,8 @@ static int rfcomm_sock_connect(struct socket *sock, struct sockaddr *addr, int a
 
 	BT_DBG("sk %p", sk);
 
-	if (addr->sa_family != AF_BLUETOOTH || alen < sizeof(struct sockaddr_rc))
+	if (alen < sizeof(struct sockaddr_rc) ||
+	    addr->sa_family != AF_BLUETOOTH)
 		return -EINVAL;
 
 	lock_sock(sk);
diff --git a/net/bluetooth/sco.c b/net/bluetooth/sco.c
index e5b16b76b22e..ca6b2ad1c3fc 100644
--- a/net/bluetooth/sco.c
+++ b/net/bluetooth/sco.c
@@ -499,7 +499,8 @@ static int sco_sock_connect(struct socket *sock, struct sockaddr *addr, int alen
 
 	BT_DBG("sk %p", sk);
 
-	if (addr->sa_family != AF_BLUETOOTH || alen < sizeof(struct sockaddr_sco))
+	if (alen < sizeof(struct sockaddr_sco) ||
+	    addr->sa_family != AF_BLUETOOTH)
 		return -EINVAL;
 
 	if (sk->sk_state != BT_OPEN && sk->sk_state != BT_BOUND)
diff --git a/net/can/bcm.c b/net/can/bcm.c
index e32af52238a2..629ad1debe81 100644
--- a/net/can/bcm.c
+++ b/net/can/bcm.c
@@ -1478,6 +1478,9 @@ static int bcm_connect(struct socket *sock, struct sockaddr *uaddr, int len,
 	struct sock *sk = sock->sk;
 	struct bcm_sock *bo = bcm_sk(sk);
 
+	if (len < sizeof(*addr))
+		return -EINVAL;
+
 	if (bo->bound)
 		return -EISCONN;
 
diff --git a/net/ieee802154/af_ieee802154.c b/net/ieee802154/af_ieee802154.c
index bad1c49fd960..01beb6c11205 100644
--- a/net/ieee802154/af_ieee802154.c
+++ b/net/ieee802154/af_ieee802154.c
@@ -126,6 +126,9 @@ static int ieee802154_sock_connect(struct socket *sock, struct sockaddr *uaddr,
 {
 	struct sock *sk = sock->sk;
 
+	if (addr_len < sizeof(uaddr->sa_family))
+		return -EINVAL;
+
 	if (uaddr->sa_family == AF_UNSPEC)
 		return sk->sk_prot->disconnect(sk, flags);
 
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index 33b7dffa7732..a366861bf4cd 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -530,6 +530,8 @@ int inet_dgram_connect(struct socket *sock, struct sockaddr * uaddr,
 {
 	struct sock *sk = sock->sk;
 
+	if (addr_len < sizeof(uaddr->sa_family))
+		return -EINVAL;
 	if (uaddr->sa_family == AF_UNSPEC)
 		return sk->sk_prot->disconnect(sk, flags);
 
@@ -573,6 +575,9 @@ int inet_stream_connect(struct socket *sock, struct sockaddr *uaddr,
 	int err;
 	long timeo;
 
+	if (addr_len < sizeof(uaddr->sa_family))
+		return -EINVAL;
+
 	lock_sock(sk);
 
 	if (uaddr->sa_family == AF_UNSPEC) {
diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index acbbae1e89b5..795424396aff 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -683,6 +683,9 @@ static int netlink_connect(struct socket *sock, struct sockaddr *addr,
 	struct netlink_sock *nlk = nlk_sk(sk);
 	struct sockaddr_nl *nladdr = (struct sockaddr_nl *)addr;
 
+	if (alen < sizeof(addr->sa_family))
+		return -EINVAL;
+
 	if (addr->sa_family == AF_UNSPEC) {
 		sk->sk_state	= NETLINK_UNCONNECTED;
 		nlk->dst_pid	= 0;
-- 
cgit v1.2.2


From b914f3a2a35812545f773645f340d7c075e5b64d Mon Sep 17 00:00:00 2001
From: Paul Moore <paul.moore@hp.com>
Date: Thu, 1 Apr 2010 10:43:57 +0000
Subject: netlabel: Fix several rcu_dereference() calls used without RCU read
 locks

The recent changes to add RCU lock verification to rcu_dereference() calls
caught out a problem with netlbl_unlhsh_hash(), see below.

 ===================================================
 [ INFO: suspicious rcu_dereference_check() usage. ]
 ---------------------------------------------------
 net/netlabel/netlabel_unlabeled.c:246 invoked rcu_dereference_check()
 without protection!

This patch fixes this problem as well as others like it in the NetLabel
code.  Also included in this patch is the identification of future work
to eliminate the RCU read lock in netlbl_domhsh_add(), but in the interest
of getting this patch out quickly that work will happen in another patch
to be finished later.

Thanks to Eric Dumazet and Paul McKenney for their help in understanding
the recent RCU changes.

Signed-off-by: Paul Moore <paul.moore@hp.com>
Reported-by: David Howells <dhowells@redhat.com>
CC: Eric Dumazet <eric.dumazet@gmail.com>
CC: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Acked-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/netlabel/netlabel_domainhash.c | 28 ++++++++++------
 net/netlabel/netlabel_unlabeled.c  | 66 +++++++++++---------------------------
 2 files changed, 37 insertions(+), 57 deletions(-)

(limited to 'net')

diff --git a/net/netlabel/netlabel_domainhash.c b/net/netlabel/netlabel_domainhash.c
index 0bfeaab88ef5..06ab41b6b57a 100644
--- a/net/netlabel/netlabel_domainhash.c
+++ b/net/netlabel/netlabel_domainhash.c
@@ -50,9 +50,12 @@ struct netlbl_domhsh_tbl {
 };
 
 /* Domain hash table */
-/* XXX - updates should be so rare that having one spinlock for the entire
- * hash table should be okay */
+/* updates should be so rare that having one spinlock for the entire hash table
+ * should be okay */
 static DEFINE_SPINLOCK(netlbl_domhsh_lock);
+#define netlbl_domhsh_rcu_deref(p) \
+	rcu_dereference_check(p, rcu_read_lock_held() || \
+				 lockdep_is_held(&netlbl_domhsh_lock))
 static struct netlbl_domhsh_tbl *netlbl_domhsh = NULL;
 static struct netlbl_dom_map *netlbl_domhsh_def = NULL;
 
@@ -106,7 +109,8 @@ static void netlbl_domhsh_free_entry(struct rcu_head *entry)
  * Description:
  * This is the hashing function for the domain hash table, it returns the
  * correct bucket number for the domain.  The caller is responsibile for
- * calling the rcu_read_[un]lock() functions.
+ * ensuring that the hash table is protected with either a RCU read lock or the
+ * hash table lock.
  *
  */
 static u32 netlbl_domhsh_hash(const char *key)
@@ -120,7 +124,7 @@ static u32 netlbl_domhsh_hash(const char *key)
 
 	for (iter = 0, val = 0, len = strlen(key); iter < len; iter++)
 		val = (val << 4 | (val >> (8 * sizeof(u32) - 4))) ^ key[iter];
-	return val & (rcu_dereference(netlbl_domhsh)->size - 1);
+	return val & (netlbl_domhsh_rcu_deref(netlbl_domhsh)->size - 1);
 }
 
 /**
@@ -130,7 +134,8 @@ static u32 netlbl_domhsh_hash(const char *key)
  * Description:
  * Searches the domain hash table and returns a pointer to the hash table
  * entry if found, otherwise NULL is returned.  The caller is responsibile for
- * the rcu hash table locks (i.e. the caller much call rcu_read_[un]lock()).
+ * ensuring that the hash table is protected with either a RCU read lock or the
+ * hash table lock.
  *
  */
 static struct netlbl_dom_map *netlbl_domhsh_search(const char *domain)
@@ -141,7 +146,7 @@ static struct netlbl_dom_map *netlbl_domhsh_search(const char *domain)
 
 	if (domain != NULL) {
 		bkt = netlbl_domhsh_hash(domain);
-		bkt_list = &rcu_dereference(netlbl_domhsh)->tbl[bkt];
+		bkt_list = &netlbl_domhsh_rcu_deref(netlbl_domhsh)->tbl[bkt];
 		list_for_each_entry_rcu(iter, bkt_list, list)
 			if (iter->valid && strcmp(iter->domain, domain) == 0)
 				return iter;
@@ -159,8 +164,8 @@ static struct netlbl_dom_map *netlbl_domhsh_search(const char *domain)
  * Searches the domain hash table and returns a pointer to the hash table
  * entry if an exact match is found, if an exact match is not present in the
  * hash table then the default entry is returned if valid otherwise NULL is
- * returned.  The caller is responsibile for the rcu hash table locks
- * (i.e. the caller much call rcu_read_[un]lock()).
+ * returned.  The caller is responsibile ensuring that the hash table is
+ * protected with either a RCU read lock or the hash table lock.
  *
  */
 static struct netlbl_dom_map *netlbl_domhsh_search_def(const char *domain)
@@ -169,7 +174,7 @@ static struct netlbl_dom_map *netlbl_domhsh_search_def(const char *domain)
 
 	entry = netlbl_domhsh_search(domain);
 	if (entry == NULL) {
-		entry = rcu_dereference(netlbl_domhsh_def);
+		entry = netlbl_domhsh_rcu_deref(netlbl_domhsh_def);
 		if (entry != NULL && !entry->valid)
 			entry = NULL;
 	}
@@ -306,8 +311,11 @@ int netlbl_domhsh_add(struct netlbl_dom_map *entry,
 	struct netlbl_af6list *tmp6;
 #endif /* IPv6 */
 
+	/* XXX - we can remove this RCU read lock as the spinlock protects the
+	 *       entire function, but before we do we need to fixup the
+	 *       netlbl_af[4,6]list RCU functions to do "the right thing" with
+	 *       respect to rcu_dereference() when only a spinlock is held. */
 	rcu_read_lock();
-
 	spin_lock(&netlbl_domhsh_lock);
 	if (entry->domain != NULL)
 		entry_old = netlbl_domhsh_search(entry->domain);
diff --git a/net/netlabel/netlabel_unlabeled.c b/net/netlabel/netlabel_unlabeled.c
index 852d9d7976b9..3b4fde7622a3 100644
--- a/net/netlabel/netlabel_unlabeled.c
+++ b/net/netlabel/netlabel_unlabeled.c
@@ -114,6 +114,9 @@ struct netlbl_unlhsh_walk_arg {
 /* updates should be so rare that having one spinlock for the entire
  * hash table should be okay */
 static DEFINE_SPINLOCK(netlbl_unlhsh_lock);
+#define netlbl_unlhsh_rcu_deref(p) \
+	rcu_dereference_check(p, rcu_read_lock_held() || \
+				 lockdep_is_held(&netlbl_unlhsh_lock))
 static struct netlbl_unlhsh_tbl *netlbl_unlhsh = NULL;
 static struct netlbl_unlhsh_iface *netlbl_unlhsh_def = NULL;
 
@@ -235,15 +238,13 @@ static void netlbl_unlhsh_free_iface(struct rcu_head *entry)
  * Description:
  * This is the hashing function for the unlabeled hash table, it returns the
  * bucket number for the given device/interface.  The caller is responsible for
- * calling the rcu_read_[un]lock() functions.
+ * ensuring that the hash table is protected with either a RCU read lock or
+ * the hash table lock.
  *
  */
 static u32 netlbl_unlhsh_hash(int ifindex)
 {
-	/* this is taken _almost_ directly from
-	 * security/selinux/netif.c:sel_netif_hasfn() as they do pretty much
-	 * the same thing */
-	return ifindex & (rcu_dereference(netlbl_unlhsh)->size - 1);
+	return ifindex & (netlbl_unlhsh_rcu_deref(netlbl_unlhsh)->size - 1);
 }
 
 /**
@@ -253,7 +254,8 @@ static u32 netlbl_unlhsh_hash(int ifindex)
  * Description:
  * Searches the unlabeled connection hash table and returns a pointer to the
  * interface entry which matches @ifindex, otherwise NULL is returned.  The
- * caller is responsible for calling the rcu_read_[un]lock() functions.
+ * caller is responsible for ensuring that the hash table is protected with
+ * either a RCU read lock or the hash table lock.
  *
  */
 static struct netlbl_unlhsh_iface *netlbl_unlhsh_search_iface(int ifindex)
@@ -263,7 +265,7 @@ static struct netlbl_unlhsh_iface *netlbl_unlhsh_search_iface(int ifindex)
 	struct netlbl_unlhsh_iface *iter;
 
 	bkt = netlbl_unlhsh_hash(ifindex);
-	bkt_list = &rcu_dereference(netlbl_unlhsh)->tbl[bkt];
+	bkt_list = &netlbl_unlhsh_rcu_deref(netlbl_unlhsh)->tbl[bkt];
 	list_for_each_entry_rcu(iter, bkt_list, list)
 		if (iter->valid && iter->ifindex == ifindex)
 			return iter;
@@ -271,33 +273,6 @@ static struct netlbl_unlhsh_iface *netlbl_unlhsh_search_iface(int ifindex)
 	return NULL;
 }
 
-/**
- * netlbl_unlhsh_search_iface_def - Search for a matching interface entry
- * @ifindex: the network interface
- *
- * Description:
- * Searches the unlabeled connection hash table and returns a pointer to the
- * interface entry which matches @ifindex.  If an exact match can not be found
- * and there is a valid default entry, the default entry is returned, otherwise
- * NULL is returned.  The caller is responsible for calling the
- * rcu_read_[un]lock() functions.
- *
- */
-static struct netlbl_unlhsh_iface *netlbl_unlhsh_search_iface_def(int ifindex)
-{
-	struct netlbl_unlhsh_iface *entry;
-
-	entry = netlbl_unlhsh_search_iface(ifindex);
-	if (entry != NULL)
-		return entry;
-
-	entry = rcu_dereference(netlbl_unlhsh_def);
-	if (entry != NULL && entry->valid)
-		return entry;
-
-	return NULL;
-}
-
 /**
  * netlbl_unlhsh_add_addr4 - Add a new IPv4 address entry to the hash table
  * @iface: the associated interface entry
@@ -308,8 +283,7 @@ static struct netlbl_unlhsh_iface *netlbl_unlhsh_search_iface_def(int ifindex)
  * Description:
  * Add a new address entry into the unlabeled connection hash table using the
  * interface entry specified by @iface.  On success zero is returned, otherwise
- * a negative value is returned.  The caller is responsible for calling the
- * rcu_read_[un]lock() functions.
+ * a negative value is returned.
  *
  */
 static int netlbl_unlhsh_add_addr4(struct netlbl_unlhsh_iface *iface,
@@ -349,8 +323,7 @@ static int netlbl_unlhsh_add_addr4(struct netlbl_unlhsh_iface *iface,
  * Description:
  * Add a new address entry into the unlabeled connection hash table using the
  * interface entry specified by @iface.  On success zero is returned, otherwise
- * a negative value is returned.  The caller is responsible for calling the
- * rcu_read_[un]lock() functions.
+ * a negative value is returned.
  *
  */
 static int netlbl_unlhsh_add_addr6(struct netlbl_unlhsh_iface *iface,
@@ -391,8 +364,7 @@ static int netlbl_unlhsh_add_addr6(struct netlbl_unlhsh_iface *iface,
  * Description:
  * Add a new, empty, interface entry into the unlabeled connection hash table.
  * On success a pointer to the new interface entry is returned, on failure NULL
- * is returned.  The caller is responsible for calling the rcu_read_[un]lock()
- * functions.
+ * is returned.
  *
  */
 static struct netlbl_unlhsh_iface *netlbl_unlhsh_add_iface(int ifindex)
@@ -415,10 +387,10 @@ static struct netlbl_unlhsh_iface *netlbl_unlhsh_add_iface(int ifindex)
 		if (netlbl_unlhsh_search_iface(ifindex) != NULL)
 			goto add_iface_failure;
 		list_add_tail_rcu(&iface->list,
-				  &rcu_dereference(netlbl_unlhsh)->tbl[bkt]);
+			     &netlbl_unlhsh_rcu_deref(netlbl_unlhsh)->tbl[bkt]);
 	} else {
 		INIT_LIST_HEAD(&iface->list);
-		if (rcu_dereference(netlbl_unlhsh_def) != NULL)
+		if (netlbl_unlhsh_rcu_deref(netlbl_unlhsh_def) != NULL)
 			goto add_iface_failure;
 		rcu_assign_pointer(netlbl_unlhsh_def, iface);
 	}
@@ -548,8 +520,7 @@ unlhsh_add_return:
  *
  * Description:
  * Remove an IP address entry from the unlabeled connection hash table.
- * Returns zero on success, negative values on failure.  The caller is
- * responsible for calling the rcu_read_[un]lock() functions.
+ * Returns zero on success, negative values on failure.
  *
  */
 static int netlbl_unlhsh_remove_addr4(struct net *net,
@@ -611,8 +582,7 @@ static int netlbl_unlhsh_remove_addr4(struct net *net,
  *
  * Description:
  * Remove an IP address entry from the unlabeled connection hash table.
- * Returns zero on success, negative values on failure.  The caller is
- * responsible for calling the rcu_read_[un]lock() functions.
+ * Returns zero on success, negative values on failure.
  *
  */
 static int netlbl_unlhsh_remove_addr6(struct net *net,
@@ -1547,8 +1517,10 @@ int netlbl_unlabel_getattr(const struct sk_buff *skb,
 	struct netlbl_unlhsh_iface *iface;
 
 	rcu_read_lock();
-	iface = netlbl_unlhsh_search_iface_def(skb->skb_iif);
+	iface = netlbl_unlhsh_search_iface(skb->skb_iif);
 	if (iface == NULL)
+		iface = rcu_dereference(netlbl_unlhsh_def);
+	if (iface == NULL || !iface->valid)
 		goto unlabel_getattr_nolabel;
 	switch (family) {
 	case PF_INET: {
-- 
cgit v1.2.2


From d4fc6dbb5ae51430e35b2005f6d68938861f8d8b Mon Sep 17 00:00:00 2001
From: Hagen Paul Pfeifer <hagen@jauu.net>
Date: Wed, 31 Mar 2010 14:54:46 +0000
Subject: ipv4: remove redundant verification code

The check if error signaling is wanted (inet->recverr != 0) is done by
the caller: raw.c:raw_err() and udp.c:__udp4_lib_err(), so there is no
need to check this condition again.

Signed-off-by: Hagen Paul Pfeifer <hagen@jauu.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/ip_sockglue.c | 4 ----
 1 file changed, 4 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c
index 644dc43a55de..f4b47acbf7b6 100644
--- a/net/ipv4/ip_sockglue.c
+++ b/net/ipv4/ip_sockglue.c
@@ -286,12 +286,8 @@ int ip_ra_control(struct sock *sk, unsigned char on,
 void ip_icmp_error(struct sock *sk, struct sk_buff *skb, int err,
 		   __be16 port, u32 info, u8 *payload)
 {
-	struct inet_sock *inet = inet_sk(sk);
 	struct sock_exterr_skb *serr;
 
-	if (!inet->recverr)
-		return;
-
 	skb = skb_clone(skb, GFP_ATOMIC);
 	if (!skb)
 		return;
-- 
cgit v1.2.2


From 5d944c640b4ae5f37c537acf491c2f0eb89fa0d6 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Wed, 31 Mar 2010 07:06:04 +0000
Subject: gen_estimator: deadlock fix

One of my test machine got a deadlock during "tc" sessions,
adding/deleting classes & filters, using traffic estimators.

After some analysis, I believe we have a potential use after free case
in est_timer() :

spin_lock(e->stats_lock); << HERE >>
read_lock(&est_lock);
if (e->bstats == NULL)   << TEST >>
	goto skip;

Test is done a bit late, because after estimator is killed, and before
rcu grace period elapsed, we might already have freed/reuse memory where
e->stats_locks points to (some qdisc->q.lock)

A possible fix is to respect a rcu grace period at Qdisc dismantle time.

On 64bit, sizeof(struct Qdisc) is exactly 192 bytes. Adding 16 bytes to
it (for struct rcu_head) is a problem because it might change
performance, given QDISC_ALIGNTO is 32 bytes.

This is why I also change QDISC_ALIGNTO to 64 bytes, to satisfy most
current alignment requirements.

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/sch_generic.c | 15 +++++++++++++--
 1 file changed, 13 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index 5173c1e1b19c..17513252e83f 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -528,7 +528,7 @@ struct Qdisc *qdisc_alloc(struct netdev_queue *dev_queue,
 	unsigned int size;
 	int err = -ENOBUFS;
 
-	/* ensure that the Qdisc and the private data are 32-byte aligned */
+	/* ensure that the Qdisc and the private data are 64-byte aligned */
 	size = QDISC_ALIGN(sizeof(*sch));
 	size += ops->priv_size + (QDISC_ALIGNTO - 1);
 
@@ -590,6 +590,13 @@ void qdisc_reset(struct Qdisc *qdisc)
 }
 EXPORT_SYMBOL(qdisc_reset);
 
+static void qdisc_rcu_free(struct rcu_head *head)
+{
+	struct Qdisc *qdisc = container_of(head, struct Qdisc, rcu_head);
+
+	kfree((char *) qdisc - qdisc->padded);
+}
+
 void qdisc_destroy(struct Qdisc *qdisc)
 {
 	const struct Qdisc_ops  *ops = qdisc->ops;
@@ -613,7 +620,11 @@ void qdisc_destroy(struct Qdisc *qdisc)
 	dev_put(qdisc_dev(qdisc));
 
 	kfree_skb(qdisc->gso_skb);
-	kfree((char *) qdisc - qdisc->padded);
+	/*
+	 * gen_estimator est_timer() might access qdisc->q.lock,
+	 * wait a RCU grace period before freeing qdisc.
+	 */
+	call_rcu(&qdisc->rcu_head, qdisc_rcu_free);
 }
 EXPORT_SYMBOL(qdisc_destroy);
 
-- 
cgit v1.2.2


From 152102c7f2bf191690f1069bae292ea3925adf14 Mon Sep 17 00:00:00 2001
From: Changli Gao <xiaosuo@gmail.com>
Date: Tue, 30 Mar 2010 20:16:22 +0000
Subject: rps: keep the old behavior on SMP without rps

keep the old behavior on SMP without rps

RPS introduces a lock operation to per cpu variable input_pkt_queue on
SMP whenever rps is enabled or not. On SMP without RPS, this lock isn't
needed at all.

Signed-off-by: Changli Gao <xiaosuo@gmail.com>
----
net/core/dev.c | 42 ++++++++++++++++++++++++++++--------------
1 file changed, 28 insertions(+), 14 deletions(-)
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/dev.c | 42 ++++++++++++++++++++++++++++--------------
 1 file changed, 28 insertions(+), 14 deletions(-)

(limited to 'net')

diff --git a/net/core/dev.c b/net/core/dev.c
index 887aa84fcd46..427cd53c118d 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -206,6 +206,20 @@ static inline struct hlist_head *dev_index_hash(struct net *net, int ifindex)
 	return &net->dev_index_head[ifindex & (NETDEV_HASHENTRIES - 1)];
 }
 
+static inline void rps_lock(struct softnet_data *queue)
+{
+#ifdef CONFIG_RPS
+	spin_lock(&queue->input_pkt_queue.lock);
+#endif
+}
+
+static inline void rps_unlock(struct softnet_data *queue)
+{
+#ifdef CONFIG_RPS
+	spin_unlock(&queue->input_pkt_queue.lock);
+#endif
+}
+
 /* Device list insertion */
 static int list_netdevice(struct net_device *dev)
 {
@@ -2313,13 +2327,13 @@ static int enqueue_to_backlog(struct sk_buff *skb, int cpu)
 	local_irq_save(flags);
 	__get_cpu_var(netdev_rx_stat).total++;
 
-	spin_lock(&queue->input_pkt_queue.lock);
+	rps_lock(queue);
 	if (queue->input_pkt_queue.qlen <= netdev_max_backlog) {
 		if (queue->input_pkt_queue.qlen) {
 enqueue:
 			__skb_queue_tail(&queue->input_pkt_queue, skb);
-			spin_unlock_irqrestore(&queue->input_pkt_queue.lock,
-			    flags);
+			rps_unlock(queue);
+			local_irq_restore(flags);
 			return NET_RX_SUCCESS;
 		}
 
@@ -2341,7 +2355,7 @@ enqueue:
 		goto enqueue;
 	}
 
-	spin_unlock(&queue->input_pkt_queue.lock);
+	rps_unlock(queue);
 
 	__get_cpu_var(netdev_rx_stat).dropped++;
 	local_irq_restore(flags);
@@ -2766,19 +2780,19 @@ int netif_receive_skb(struct sk_buff *skb)
 EXPORT_SYMBOL(netif_receive_skb);
 
 /* Network device is going away, flush any packets still pending  */
-static void flush_backlog(struct net_device *dev, int cpu)
+static void flush_backlog(void *arg)
 {
-	struct softnet_data *queue = &per_cpu(softnet_data, cpu);
+	struct net_device *dev = arg;
+	struct softnet_data *queue = &__get_cpu_var(softnet_data);
 	struct sk_buff *skb, *tmp;
-	unsigned long flags;
 
-	spin_lock_irqsave(&queue->input_pkt_queue.lock, flags);
+	rps_lock(queue);
 	skb_queue_walk_safe(&queue->input_pkt_queue, skb, tmp)
 		if (skb->dev == dev) {
 			__skb_unlink(skb, &queue->input_pkt_queue);
 			kfree_skb(skb);
 		}
-	spin_unlock_irqrestore(&queue->input_pkt_queue.lock, flags);
+	rps_unlock(queue);
 }
 
 static int napi_gro_complete(struct sk_buff *skb)
@@ -3091,14 +3105,16 @@ static int process_backlog(struct napi_struct *napi, int quota)
 	do {
 		struct sk_buff *skb;
 
-		spin_lock_irq(&queue->input_pkt_queue.lock);
+		local_irq_disable();
+		rps_lock(queue);
 		skb = __skb_dequeue(&queue->input_pkt_queue);
 		if (!skb) {
 			__napi_complete(napi);
 			spin_unlock_irq(&queue->input_pkt_queue.lock);
 			break;
 		}
-		spin_unlock_irq(&queue->input_pkt_queue.lock);
+		rps_unlock(queue);
+		local_irq_enable();
 
 		__netif_receive_skb(skb);
 	} while (++work < quota && jiffies == start_time);
@@ -5548,7 +5564,6 @@ void netdev_run_todo(void)
 	while (!list_empty(&list)) {
 		struct net_device *dev
 			= list_first_entry(&list, struct net_device, todo_list);
-		int i;
 		list_del(&dev->todo_list);
 
 		if (unlikely(dev->reg_state != NETREG_UNREGISTERING)) {
@@ -5560,8 +5575,7 @@ void netdev_run_todo(void)
 
 		dev->reg_state = NETREG_UNREGISTERED;
 
-		for_each_online_cpu(i)
-			flush_backlog(dev, i);
+		on_each_cpu(flush_backlog, dev, 1);
 
 		netdev_wait_allrefs(dev);
 
-- 
cgit v1.2.2


From 34996cb91dd72f0b0456d8fd3fef4aaee62232f2 Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Wed, 31 Mar 2010 01:19:49 +0000
Subject: xfrm: Remove xfrm_state_genid
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The xfrm state genid only needs to be matched against the copy
saved in xfrm_dst.  So we don't need a global genid at all.  In
fact, we don't even need to initialise it.

Based on observation by Timo Teräs.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/xfrm/xfrm_state.c | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

(limited to 'net')

diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c
index 17d5b96f2fc8..71f8f33d637b 100644
--- a/net/xfrm/xfrm_state.c
+++ b/net/xfrm/xfrm_state.c
@@ -37,7 +37,6 @@
 static DEFINE_SPINLOCK(xfrm_state_lock);
 
 static unsigned int xfrm_state_hashmax __read_mostly = 1 * 1024 * 1024;
-static unsigned int xfrm_state_genid;
 
 static struct xfrm_state_afinfo *xfrm_state_get_afinfo(unsigned int family);
 static void xfrm_state_put_afinfo(struct xfrm_state_afinfo *afinfo);
@@ -923,8 +922,6 @@ static void __xfrm_state_insert(struct xfrm_state *x)
 	struct net *net = xs_net(x);
 	unsigned int h;
 
-	x->genid = ++xfrm_state_genid;
-
 	list_add(&x->km.all, &net->xfrm.state_all);
 
 	h = xfrm_dst_hash(net, &x->id.daddr, &x->props.saddr,
@@ -970,7 +967,7 @@ static void __xfrm_state_bump_genids(struct xfrm_state *xnew)
 		    (mark & x->mark.m) == x->mark.v &&
 		    !xfrm_addr_cmp(&x->id.daddr, &xnew->id.daddr, family) &&
 		    !xfrm_addr_cmp(&x->props.saddr, &xnew->props.saddr, family))
-			x->genid = xfrm_state_genid;
+			x->genid++;
 	}
 }
 
-- 
cgit v1.2.2


From c8bf4d04f970fafb3430d332533e1cf103f2a018 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Timo=20Ter=C3=A4s?= <timo.teras@iki.fi>
Date: Wed, 31 Mar 2010 00:17:04 +0000
Subject: xfrm_user: verify policy direction at XFRM_MSG_POLEXPIRE handler

Add missing check for policy direction verification. This is
especially important since without this xfrm_user may end up
deleting per-socket policy which is not allowed.

Signed-off-by: Timo Teras <timo.teras@iki.fi>
Acked-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/xfrm/xfrm_user.c | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'net')

diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c
index 6106b72826d3..da5ba86181de 100644
--- a/net/xfrm/xfrm_user.c
+++ b/net/xfrm/xfrm_user.c
@@ -1741,6 +1741,10 @@ static int xfrm_add_pol_expire(struct sk_buff *skb, struct nlmsghdr *nlh,
 	if (err)
 		return err;
 
+	err = verify_policy_dir(p->dir);
+	if (err)
+		return err;
+
 	if (p->index)
 		xp = xfrm_policy_byid(net, mark, type, p->dir, p->index, 0, &err);
 	else {
-- 
cgit v1.2.2


From ea2dea9dacc256fe927857feb423872051642ae7 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Timo=20Ter=C3=A4s?= <timo.teras@iki.fi>
Date: Wed, 31 Mar 2010 00:17:05 +0000
Subject: xfrm: remove policy lock when accessing policy->walk.dead

All of the code considers ->dead as a hint that the cached policy
needs to get refreshed. The read side can just drop the read lock
without any side effects.

The write side needs to make sure that it's written only exactly
once. Only possible race is at xfrm_policy_kill(). This is fixed
by checking result of __xfrm_policy_unlink() when needed. It will
always succeed if the policy object is looked up from the hash
list (so some checks are removed), but it needs to be checked if
we are trying to unlink policy via a reference (appropriate
checks added).

Since policy->walk.dead is written exactly once, it no longer
needs to be protected with a write lock.

Signed-off-by: Timo Teras <timo.teras@iki.fi>
Acked-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/xfrm/xfrm_policy.c | 31 +++++++++----------------------
 net/xfrm/xfrm_user.c   |  6 +-----
 2 files changed, 10 insertions(+), 27 deletions(-)

(limited to 'net')

diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index 843e066649cb..82789cf1c632 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -156,7 +156,7 @@ static void xfrm_policy_timer(unsigned long data)
 
 	read_lock(&xp->lock);
 
-	if (xp->walk.dead)
+	if (unlikely(xp->walk.dead))
 		goto out;
 
 	dir = xfrm_policy_id2dir(xp->index);
@@ -297,17 +297,7 @@ static DECLARE_WORK(xfrm_policy_gc_work, xfrm_policy_gc_task);
 
 static void xfrm_policy_kill(struct xfrm_policy *policy)
 {
-	int dead;
-
-	write_lock_bh(&policy->lock);
-	dead = policy->walk.dead;
 	policy->walk.dead = 1;
-	write_unlock_bh(&policy->lock);
-
-	if (unlikely(dead)) {
-		WARN_ON(1);
-		return;
-	}
 
 	spin_lock_bh(&xfrm_policy_gc_lock);
 	hlist_add_head(&policy->bydst, &xfrm_policy_gc_list);
@@ -776,7 +766,6 @@ xfrm_policy_flush_secctx_check(struct net *net, u8 type, struct xfrm_audit *audi
 int xfrm_policy_flush(struct net *net, u8 type, struct xfrm_audit *audit_info)
 {
 	int dir, err = 0, cnt = 0;
-	struct xfrm_policy *dp;
 
 	write_lock_bh(&xfrm_policy_lock);
 
@@ -794,10 +783,9 @@ int xfrm_policy_flush(struct net *net, u8 type, struct xfrm_audit *audit_info)
 				     &net->xfrm.policy_inexact[dir], bydst) {
 			if (pol->type != type)
 				continue;
-			dp = __xfrm_policy_unlink(pol, dir);
+			__xfrm_policy_unlink(pol, dir);
 			write_unlock_bh(&xfrm_policy_lock);
-			if (dp)
-				cnt++;
+			cnt++;
 
 			xfrm_audit_policy_delete(pol, 1, audit_info->loginuid,
 						 audit_info->sessionid,
@@ -816,10 +804,9 @@ int xfrm_policy_flush(struct net *net, u8 type, struct xfrm_audit *audit_info)
 					     bydst) {
 				if (pol->type != type)
 					continue;
-				dp = __xfrm_policy_unlink(pol, dir);
+				__xfrm_policy_unlink(pol, dir);
 				write_unlock_bh(&xfrm_policy_lock);
-				if (dp)
-					cnt++;
+				cnt++;
 
 				xfrm_audit_policy_delete(pol, 1,
 							 audit_info->loginuid,
@@ -1132,6 +1119,9 @@ int xfrm_sk_policy_insert(struct sock *sk, int dir, struct xfrm_policy *pol)
 		__xfrm_policy_link(pol, XFRM_POLICY_MAX+dir);
 	}
 	if (old_pol)
+		/* Unlinking succeeds always. This is the only function
+		 * allowed to delete or replace socket policy.
+		 */
 		__xfrm_policy_unlink(old_pol, XFRM_POLICY_MAX+dir);
 	write_unlock_bh(&xfrm_policy_lock);
 
@@ -1737,11 +1727,8 @@ restart:
 			goto error;
 		}
 
-		for (pi = 0; pi < npols; pi++) {
-			read_lock_bh(&pols[pi]->lock);
+		for (pi = 0; pi < npols; pi++)
 			pol_dead |= pols[pi]->walk.dead;
-			read_unlock_bh(&pols[pi]->lock);
-		}
 
 		write_lock_bh(&policy->lock);
 		if (unlikely(pol_dead || stale_bundle(dst))) {
diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c
index da5ba86181de..a267fbdda525 100644
--- a/net/xfrm/xfrm_user.c
+++ b/net/xfrm/xfrm_user.c
@@ -1770,13 +1770,9 @@ static int xfrm_add_pol_expire(struct sk_buff *skb, struct nlmsghdr *nlh,
 	if (xp == NULL)
 		return -ENOENT;
 
-	read_lock(&xp->lock);
-	if (xp->walk.dead) {
-		read_unlock(&xp->lock);
+	if (unlikely(xp->walk.dead))
 		goto out;
-	}
 
-	read_unlock(&xp->lock);
 	err = 0;
 	if (up->hard) {
 		uid_t loginuid = NETLINK_CB(skb).loginuid;
-- 
cgit v1.2.2


From d7997fe1f4584da12e9c29fb682c18e9bdc13b73 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Timo=20Ter=C3=A4s?= <timo.teras@iki.fi>
Date: Wed, 31 Mar 2010 00:17:06 +0000
Subject: flow: structurize flow cache

Group all per-cpu data to one structure instead of having many
globals. Also prepare the internals so that we can have multiple
instances of the flow cache if needed.

Only the kmem_cache is left as a global as all flow caches share
the same element size, and benefit from using a common cache.

Signed-off-by: Timo Teras <timo.teras@iki.fi>
Acked-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/flow.c | 223 ++++++++++++++++++++++++++++++--------------------------
 1 file changed, 119 insertions(+), 104 deletions(-)

(limited to 'net')

diff --git a/net/core/flow.c b/net/core/flow.c
index 96015871ecea..1d27ca6b421d 100644
--- a/net/core/flow.c
+++ b/net/core/flow.c
@@ -35,104 +35,105 @@ struct flow_cache_entry {
 	atomic_t		*object_ref;
 };
 
-atomic_t flow_cache_genid = ATOMIC_INIT(0);
-
-static u32 flow_hash_shift;
-#define flow_hash_size	(1 << flow_hash_shift)
-static DEFINE_PER_CPU(struct flow_cache_entry **, flow_tables) = { NULL };
-
-#define flow_table(cpu) (per_cpu(flow_tables, cpu))
-
-static struct kmem_cache *flow_cachep __read_mostly;
-
-static int flow_lwm, flow_hwm;
-
-struct flow_percpu_info {
-	int hash_rnd_recalc;
-	u32 hash_rnd;
-	int count;
+struct flow_cache_percpu {
+	struct flow_cache_entry **	hash_table;
+	int				hash_count;
+	u32				hash_rnd;
+	int				hash_rnd_recalc;
+	struct tasklet_struct		flush_tasklet;
 };
-static DEFINE_PER_CPU(struct flow_percpu_info, flow_hash_info) = { 0 };
-
-#define flow_hash_rnd_recalc(cpu) \
-	(per_cpu(flow_hash_info, cpu).hash_rnd_recalc)
-#define flow_hash_rnd(cpu) \
-	(per_cpu(flow_hash_info, cpu).hash_rnd)
-#define flow_count(cpu) \
-	(per_cpu(flow_hash_info, cpu).count)
-
-static struct timer_list flow_hash_rnd_timer;
-
-#define FLOW_HASH_RND_PERIOD	(10 * 60 * HZ)
 
 struct flow_flush_info {
-	atomic_t cpuleft;
-	struct completion completion;
+	struct flow_cache *		cache;
+	atomic_t			cpuleft;
+	struct completion		completion;
 };
-static DEFINE_PER_CPU(struct tasklet_struct, flow_flush_tasklets) = { NULL };
 
-#define flow_flush_tasklet(cpu) (&per_cpu(flow_flush_tasklets, cpu))
+struct flow_cache {
+	u32				hash_shift;
+	unsigned long			order;
+	struct flow_cache_percpu *	percpu;
+	struct notifier_block		hotcpu_notifier;
+	int				low_watermark;
+	int				high_watermark;
+	struct timer_list		rnd_timer;
+};
+
+atomic_t flow_cache_genid = ATOMIC_INIT(0);
+static struct flow_cache flow_cache_global;
+static struct kmem_cache *flow_cachep;
+
+#define flow_cache_hash_size(cache)	(1 << (cache)->hash_shift)
+#define FLOW_HASH_RND_PERIOD		(10 * 60 * HZ)
 
 static void flow_cache_new_hashrnd(unsigned long arg)
 {
+	struct flow_cache *fc = (void *) arg;
 	int i;
 
 	for_each_possible_cpu(i)
-		flow_hash_rnd_recalc(i) = 1;
+		per_cpu_ptr(fc->percpu, i)->hash_rnd_recalc = 1;
 
-	flow_hash_rnd_timer.expires = jiffies + FLOW_HASH_RND_PERIOD;
-	add_timer(&flow_hash_rnd_timer);
+	fc->rnd_timer.expires = jiffies + FLOW_HASH_RND_PERIOD;
+	add_timer(&fc->rnd_timer);
 }
 
-static void flow_entry_kill(int cpu, struct flow_cache_entry *fle)
+static void flow_entry_kill(struct flow_cache *fc,
+			    struct flow_cache_percpu *fcp,
+			    struct flow_cache_entry *fle)
 {
 	if (fle->object)
 		atomic_dec(fle->object_ref);
 	kmem_cache_free(flow_cachep, fle);
-	flow_count(cpu)--;
+	fcp->hash_count--;
 }
 
-static void __flow_cache_shrink(int cpu, int shrink_to)
+static void __flow_cache_shrink(struct flow_cache *fc,
+				struct flow_cache_percpu *fcp,
+				int shrink_to)
 {
 	struct flow_cache_entry *fle, **flp;
 	int i;
 
-	for (i = 0; i < flow_hash_size; i++) {
+	for (i = 0; i < flow_cache_hash_size(fc); i++) {
 		int k = 0;
 
-		flp = &flow_table(cpu)[i];
+		flp = &fcp->hash_table[i];
 		while ((fle = *flp) != NULL && k < shrink_to) {
 			k++;
 			flp = &fle->next;
 		}
 		while ((fle = *flp) != NULL) {
 			*flp = fle->next;
-			flow_entry_kill(cpu, fle);
+			flow_entry_kill(fc, fcp, fle);
 		}
 	}
 }
 
-static void flow_cache_shrink(int cpu)
+static void flow_cache_shrink(struct flow_cache *fc,
+			      struct flow_cache_percpu *fcp)
 {
-	int shrink_to = flow_lwm / flow_hash_size;
+	int shrink_to = fc->low_watermark / flow_cache_hash_size(fc);
 
-	__flow_cache_shrink(cpu, shrink_to);
+	__flow_cache_shrink(fc, fcp, shrink_to);
 }
 
-static void flow_new_hash_rnd(int cpu)
+static void flow_new_hash_rnd(struct flow_cache *fc,
+			      struct flow_cache_percpu *fcp)
 {
-	get_random_bytes(&flow_hash_rnd(cpu), sizeof(u32));
-	flow_hash_rnd_recalc(cpu) = 0;
-
-	__flow_cache_shrink(cpu, 0);
+	get_random_bytes(&fcp->hash_rnd, sizeof(u32));
+	fcp->hash_rnd_recalc = 0;
+	__flow_cache_shrink(fc, fcp, 0);
 }
 
-static u32 flow_hash_code(struct flowi *key, int cpu)
+static u32 flow_hash_code(struct flow_cache *fc,
+			  struct flow_cache_percpu *fcp,
+			  struct flowi *key)
 {
 	u32 *k = (u32 *) key;
 
-	return (jhash2(k, (sizeof(*key) / sizeof(u32)), flow_hash_rnd(cpu)) &
-		(flow_hash_size - 1));
+	return (jhash2(k, (sizeof(*key) / sizeof(u32)), fcp->hash_rnd)
+		& (flow_cache_hash_size(fc) - 1));
 }
 
 #if (BITS_PER_LONG == 64)
@@ -168,24 +169,25 @@ static int flow_key_compare(struct flowi *key1, struct flowi *key2)
 void *flow_cache_lookup(struct net *net, struct flowi *key, u16 family, u8 dir,
 			flow_resolve_t resolver)
 {
+	struct flow_cache *fc = &flow_cache_global;
+	struct flow_cache_percpu *fcp;
 	struct flow_cache_entry *fle, **head;
 	unsigned int hash;
-	int cpu;
 
 	local_bh_disable();
-	cpu = smp_processor_id();
+	fcp = per_cpu_ptr(fc->percpu, smp_processor_id());
 
 	fle = NULL;
 	/* Packet really early in init?  Making flow_cache_init a
 	 * pre-smp initcall would solve this.  --RR */
-	if (!flow_table(cpu))
+	if (!fcp->hash_table)
 		goto nocache;
 
-	if (flow_hash_rnd_recalc(cpu))
-		flow_new_hash_rnd(cpu);
-	hash = flow_hash_code(key, cpu);
+	if (fcp->hash_rnd_recalc)
+		flow_new_hash_rnd(fc, fcp);
+	hash = flow_hash_code(fc, fcp, key);
 
-	head = &flow_table(cpu)[hash];
+	head = &fcp->hash_table[hash];
 	for (fle = *head; fle; fle = fle->next) {
 		if (fle->family == family &&
 		    fle->dir == dir &&
@@ -204,8 +206,8 @@ void *flow_cache_lookup(struct net *net, struct flowi *key, u16 family, u8 dir,
 	}
 
 	if (!fle) {
-		if (flow_count(cpu) > flow_hwm)
-			flow_cache_shrink(cpu);
+		if (fcp->hash_count > fc->high_watermark)
+			flow_cache_shrink(fc, fcp);
 
 		fle = kmem_cache_alloc(flow_cachep, GFP_ATOMIC);
 		if (fle) {
@@ -215,7 +217,7 @@ void *flow_cache_lookup(struct net *net, struct flowi *key, u16 family, u8 dir,
 			fle->dir = dir;
 			memcpy(&fle->key, key, sizeof(*key));
 			fle->object = NULL;
-			flow_count(cpu)++;
+			fcp->hash_count++;
 		}
 	}
 
@@ -249,14 +251,15 @@ nocache:
 static void flow_cache_flush_tasklet(unsigned long data)
 {
 	struct flow_flush_info *info = (void *)data;
+	struct flow_cache *fc = info->cache;
+	struct flow_cache_percpu *fcp;
 	int i;
-	int cpu;
 
-	cpu = smp_processor_id();
-	for (i = 0; i < flow_hash_size; i++) {
+	fcp = per_cpu_ptr(fc->percpu, smp_processor_id());
+	for (i = 0; i < flow_cache_hash_size(fc); i++) {
 		struct flow_cache_entry *fle;
 
-		fle = flow_table(cpu)[i];
+		fle = fcp->hash_table[i];
 		for (; fle; fle = fle->next) {
 			unsigned genid = atomic_read(&flow_cache_genid);
 
@@ -272,7 +275,6 @@ static void flow_cache_flush_tasklet(unsigned long data)
 		complete(&info->completion);
 }
 
-static void flow_cache_flush_per_cpu(void *) __attribute__((__unused__));
 static void flow_cache_flush_per_cpu(void *data)
 {
 	struct flow_flush_info *info = data;
@@ -280,8 +282,7 @@ static void flow_cache_flush_per_cpu(void *data)
 	struct tasklet_struct *tasklet;
 
 	cpu = smp_processor_id();
-
-	tasklet = flow_flush_tasklet(cpu);
+	tasklet = &per_cpu_ptr(info->cache->percpu, cpu)->flush_tasklet;
 	tasklet->data = (unsigned long)info;
 	tasklet_schedule(tasklet);
 }
@@ -294,6 +295,7 @@ void flow_cache_flush(void)
 	/* Don't want cpus going down or up during this. */
 	get_online_cpus();
 	mutex_lock(&flow_flush_sem);
+	info.cache = &flow_cache_global;
 	atomic_set(&info.cpuleft, num_online_cpus());
 	init_completion(&info.completion);
 
@@ -307,62 +309,75 @@ void flow_cache_flush(void)
 	put_online_cpus();
 }
 
-static void __init flow_cache_cpu_prepare(int cpu)
+static void __init flow_cache_cpu_prepare(struct flow_cache *fc,
+					  struct flow_cache_percpu *fcp)
 {
-	struct tasklet_struct *tasklet;
-	unsigned long order;
-
-	for (order = 0;
-	     (PAGE_SIZE << order) <
-		     (sizeof(struct flow_cache_entry *)*flow_hash_size);
-	     order++)
-		/* NOTHING */;
-
-	flow_table(cpu) = (struct flow_cache_entry **)
-		__get_free_pages(GFP_KERNEL|__GFP_ZERO, order);
-	if (!flow_table(cpu))
-		panic("NET: failed to allocate flow cache order %lu\n", order);
-
-	flow_hash_rnd_recalc(cpu) = 1;
-	flow_count(cpu) = 0;
-
-	tasklet = flow_flush_tasklet(cpu);
-	tasklet_init(tasklet, flow_cache_flush_tasklet, 0);
+	fcp->hash_table = (struct flow_cache_entry **)
+		__get_free_pages(GFP_KERNEL|__GFP_ZERO, fc->order);
+	if (!fcp->hash_table)
+		panic("NET: failed to allocate flow cache order %lu\n", fc->order);
+
+	fcp->hash_rnd_recalc = 1;
+	fcp->hash_count = 0;
+	tasklet_init(&fcp->flush_tasklet, flow_cache_flush_tasklet, 0);
 }
 
 static int flow_cache_cpu(struct notifier_block *nfb,
 			  unsigned long action,
 			  void *hcpu)
 {
+	struct flow_cache *fc = container_of(nfb, struct flow_cache, hotcpu_notifier);
+	int cpu = (unsigned long) hcpu;
+	struct flow_cache_percpu *fcp = per_cpu_ptr(fc->percpu, cpu);
+
 	if (action == CPU_DEAD || action == CPU_DEAD_FROZEN)
-		__flow_cache_shrink((unsigned long)hcpu, 0);
+		__flow_cache_shrink(fc, fcp, 0);
 	return NOTIFY_OK;
 }
 
-static int __init flow_cache_init(void)
+static int flow_cache_init(struct flow_cache *fc)
 {
+	unsigned long order;
 	int i;
 
-	flow_cachep = kmem_cache_create("flow_cache",
-					sizeof(struct flow_cache_entry),
-					0, SLAB_PANIC,
-					NULL);
-	flow_hash_shift = 10;
-	flow_lwm = 2 * flow_hash_size;
-	flow_hwm = 4 * flow_hash_size;
+	fc->hash_shift = 10;
+	fc->low_watermark = 2 * flow_cache_hash_size(fc);
+	fc->high_watermark = 4 * flow_cache_hash_size(fc);
+
+	for (order = 0;
+	     (PAGE_SIZE << order) <
+		     (sizeof(struct flow_cache_entry *)*flow_cache_hash_size(fc));
+	     order++)
+		/* NOTHING */;
+	fc->order = order;
+	fc->percpu = alloc_percpu(struct flow_cache_percpu);
 
-	setup_timer(&flow_hash_rnd_timer, flow_cache_new_hashrnd, 0);
-	flow_hash_rnd_timer.expires = jiffies + FLOW_HASH_RND_PERIOD;
-	add_timer(&flow_hash_rnd_timer);
+	setup_timer(&fc->rnd_timer, flow_cache_new_hashrnd,
+		    (unsigned long) fc);
+	fc->rnd_timer.expires = jiffies + FLOW_HASH_RND_PERIOD;
+	add_timer(&fc->rnd_timer);
 
 	for_each_possible_cpu(i)
-		flow_cache_cpu_prepare(i);
+		flow_cache_cpu_prepare(fc, per_cpu_ptr(fc->percpu, i));
+
+	fc->hotcpu_notifier = (struct notifier_block){
+		.notifier_call = flow_cache_cpu,
+	};
+	register_hotcpu_notifier(&fc->hotcpu_notifier);
 
-	hotcpu_notifier(flow_cache_cpu, 0);
 	return 0;
 }
 
-module_init(flow_cache_init);
+static int __init flow_cache_init_global(void)
+{
+	flow_cachep = kmem_cache_create("flow_cache",
+					sizeof(struct flow_cache_entry),
+					0, SLAB_PANIC, NULL);
+
+	return flow_cache_init(&flow_cache_global);
+}
+
+module_init(flow_cache_init_global);
 
 EXPORT_SYMBOL(flow_cache_genid);
 EXPORT_SYMBOL(flow_cache_lookup);
-- 
cgit v1.2.2


From 5acbbd428db47b12f137a8a2aa96b3c0a96b744e Mon Sep 17 00:00:00 2001
From: FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp>
Date: Tue, 30 Mar 2010 22:35:50 +0000
Subject: net: change illegal_highdma to use dma_mask

Robert Hancock pointed out two problems about NETIF_F_HIGHDMA:

-Many drivers only set the flag when they detect they can use 64-bit DMA,
since otherwise they could receive DMA addresses that they can't handle
(which on platforms without IOMMU/SWIOTLB support is fatal). This means that if
64-bit support isn't available, even buffers located below 4GB will get copied
unnecessarily.

-Some drivers set the flag even though they can't actually handle 64-bit DMA,
which would mean that on platforms without IOMMU/SWIOTLB they would get a DMA
mapping error if the memory they received happened to be located above 4GB.

http://lkml.org/lkml/2010/3/3/530

We can use the dma_mask if we need bouncing or not here. Then we can
safely fix drivers that misuse NETIF_F_HIGHDMA.

Signed-off-by: FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/dev.c | 20 ++++++++++++++------
 1 file changed, 14 insertions(+), 6 deletions(-)

(limited to 'net')

diff --git a/net/core/dev.c b/net/core/dev.c
index 427cd53c118d..e19cdae49fef 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -129,6 +129,7 @@
 #include <linux/jhash.h>
 #include <linux/random.h>
 #include <trace/events/napi.h>
+#include <linux/pci.h>
 
 #include "net-sysfs.h"
 
@@ -1804,14 +1805,21 @@ static inline int illegal_highdma(struct net_device *dev, struct sk_buff *skb)
 {
 #ifdef CONFIG_HIGHMEM
 	int i;
+	if (!(dev->features & NETIF_F_HIGHDMA)) {
+		for (i = 0; i < skb_shinfo(skb)->nr_frags; i++)
+			if (PageHighMem(skb_shinfo(skb)->frags[i].page))
+				return 1;
+	}
 
-	if (dev->features & NETIF_F_HIGHDMA)
-		return 0;
-
-	for (i = 0; i < skb_shinfo(skb)->nr_frags; i++)
-		if (PageHighMem(skb_shinfo(skb)->frags[i].page))
-			return 1;
+	if (PCI_DMA_BUS_IS_PHYS) {
+		struct device *pdev = dev->dev.parent;
 
+		for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
+			dma_addr_t addr = page_to_phys(skb_shinfo(skb)->frags[i].page);
+			if (!pdev->dma_mask || addr + PAGE_SIZE - 1 > *pdev->dma_mask)
+				return 1;
+		}
+	}
 #endif
 	return 0;
 }
-- 
cgit v1.2.2


From 9092c658bab215b2752fa59d2a36c05b74d1e9e9 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Fri, 2 Apr 2010 13:34:49 -0700
Subject: net: illegal_highdma() fix

Followup to commit 5acbbd428db47b12f137a8a2aa96b3c0a96b744e
(net: change illegal_highdma to use dma_mask)

If dev->dev.parent is NULL, we should not try to dereference it.

Dont force inline illegal_highdma() as its pretty big now.

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/dev.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/core/dev.c b/net/core/dev.c
index e19cdae49fef..c6b52068d5ec 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -1801,7 +1801,7 @@ EXPORT_SYMBOL(netdev_rx_csum_fault);
  * 2. No high memory really exists on this machine.
  */
 
-static inline int illegal_highdma(struct net_device *dev, struct sk_buff *skb)
+static int illegal_highdma(struct net_device *dev, struct sk_buff *skb)
 {
 #ifdef CONFIG_HIGHMEM
 	int i;
@@ -1814,6 +1814,8 @@ static inline int illegal_highdma(struct net_device *dev, struct sk_buff *skb)
 	if (PCI_DMA_BUS_IS_PHYS) {
 		struct device *pdev = dev->dev.parent;
 
+		if (!pdev)
+			return 0;
 		for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
 			dma_addr_t addr = page_to_phys(skb_shinfo(skb)->frags[i].page);
 			if (!pdev->dma_mask || addr + PAGE_SIZE - 1 > *pdev->dma_mask)
-- 
cgit v1.2.2


From a748ee2426817a95b1f03012d8f339c45c722ae1 Mon Sep 17 00:00:00 2001
From: Jiri Pirko <jpirko@redhat.com>
Date: Thu, 1 Apr 2010 21:22:09 +0000
Subject: net: move address list functions to a separate file

+little renaming of unicast functions to be smooth with multicast ones

Signed-off-by: Jiri Pirko <jpirko@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/8021q/vlan.c          |   4 +-
 net/8021q/vlan_dev.c      |  14 +-
 net/core/Makefile         |   3 +-
 net/core/dev.c            | 430 +----------------------------------------
 net/core/dev_addr_lists.c | 478 ++++++++++++++++++++++++++++++++++++++++++++++
 net/dsa/slave.c           |  14 +-
 net/packet/af_packet.c    |   4 +-
 7 files changed, 501 insertions(+), 446 deletions(-)
 create mode 100644 net/core/dev_addr_lists.c

(limited to 'net')

diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c
index c39a5f41169c..bd33f02013ec 100644
--- a/net/8021q/vlan.c
+++ b/net/8021q/vlan.c
@@ -356,13 +356,13 @@ static void vlan_sync_address(struct net_device *dev,
 	 * the new address */
 	if (compare_ether_addr(vlandev->dev_addr, vlan->real_dev_addr) &&
 	    !compare_ether_addr(vlandev->dev_addr, dev->dev_addr))
-		dev_unicast_delete(dev, vlandev->dev_addr);
+		dev_uc_del(dev, vlandev->dev_addr);
 
 	/* vlan address was equal to the old address and is different from
 	 * the new address */
 	if (!compare_ether_addr(vlandev->dev_addr, vlan->real_dev_addr) &&
 	    compare_ether_addr(vlandev->dev_addr, dev->dev_addr))
-		dev_unicast_add(dev, vlandev->dev_addr);
+		dev_uc_add(dev, vlandev->dev_addr);
 
 	memcpy(vlan->real_dev_addr, dev->dev_addr, ETH_ALEN);
 }
diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c
index 9e83272fc5b0..7f4d247237e4 100644
--- a/net/8021q/vlan_dev.c
+++ b/net/8021q/vlan_dev.c
@@ -461,7 +461,7 @@ static int vlan_dev_open(struct net_device *dev)
 		return -ENETDOWN;
 
 	if (compare_ether_addr(dev->dev_addr, real_dev->dev_addr)) {
-		err = dev_unicast_add(real_dev, dev->dev_addr);
+		err = dev_uc_add(real_dev, dev->dev_addr);
 		if (err < 0)
 			goto out;
 	}
@@ -490,7 +490,7 @@ clear_allmulti:
 		dev_set_allmulti(real_dev, -1);
 del_unicast:
 	if (compare_ether_addr(dev->dev_addr, real_dev->dev_addr))
-		dev_unicast_delete(real_dev, dev->dev_addr);
+		dev_uc_del(real_dev, dev->dev_addr);
 out:
 	netif_carrier_off(dev);
 	return err;
@@ -505,14 +505,14 @@ static int vlan_dev_stop(struct net_device *dev)
 		vlan_gvrp_request_leave(dev);
 
 	dev_mc_unsync(real_dev, dev);
-	dev_unicast_unsync(real_dev, dev);
+	dev_uc_unsync(real_dev, dev);
 	if (dev->flags & IFF_ALLMULTI)
 		dev_set_allmulti(real_dev, -1);
 	if (dev->flags & IFF_PROMISC)
 		dev_set_promiscuity(real_dev, -1);
 
 	if (compare_ether_addr(dev->dev_addr, real_dev->dev_addr))
-		dev_unicast_delete(real_dev, dev->dev_addr);
+		dev_uc_del(real_dev, dev->dev_addr);
 
 	netif_carrier_off(dev);
 	return 0;
@@ -531,13 +531,13 @@ static int vlan_dev_set_mac_address(struct net_device *dev, void *p)
 		goto out;
 
 	if (compare_ether_addr(addr->sa_data, real_dev->dev_addr)) {
-		err = dev_unicast_add(real_dev, addr->sa_data);
+		err = dev_uc_add(real_dev, addr->sa_data);
 		if (err < 0)
 			return err;
 	}
 
 	if (compare_ether_addr(dev->dev_addr, real_dev->dev_addr))
-		dev_unicast_delete(real_dev, dev->dev_addr);
+		dev_uc_del(real_dev, dev->dev_addr);
 
 out:
 	memcpy(dev->dev_addr, addr->sa_data, ETH_ALEN);
@@ -654,7 +654,7 @@ static void vlan_dev_change_rx_flags(struct net_device *dev, int change)
 static void vlan_dev_set_rx_mode(struct net_device *vlan_dev)
 {
 	dev_mc_sync(vlan_dev_info(vlan_dev)->real_dev, vlan_dev);
-	dev_unicast_sync(vlan_dev_info(vlan_dev)->real_dev, vlan_dev);
+	dev_uc_sync(vlan_dev_info(vlan_dev)->real_dev, vlan_dev);
 }
 
 /*
diff --git a/net/core/Makefile b/net/core/Makefile
index 08791ac3e05a..0a899f1aadb9 100644
--- a/net/core/Makefile
+++ b/net/core/Makefile
@@ -8,7 +8,8 @@ obj-y := sock.o request_sock.o skbuff.o iovec.o datagram.o stream.o scm.o \
 obj-$(CONFIG_SYSCTL) += sysctl_net_core.o
 
 obj-y		     += dev.o ethtool.o dev_mcast.o dst.o netevent.o \
-			neighbour.o rtnetlink.o utils.o link_watch.o filter.o
+			neighbour.o rtnetlink.o utils.o link_watch.o filter.o \
+			dev_addr_lists.o
 
 obj-$(CONFIG_XFRM) += flow.o
 obj-y += net-sysfs.o
diff --git a/net/core/dev.c b/net/core/dev.c
index c6b52068d5ec..949c62dba719 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -3968,314 +3968,6 @@ void dev_set_rx_mode(struct net_device *dev)
 	netif_addr_unlock_bh(dev);
 }
 
-/* hw addresses list handling functions */
-
-static int __hw_addr_add(struct netdev_hw_addr_list *list, unsigned char *addr,
-			 int addr_len, unsigned char addr_type)
-{
-	struct netdev_hw_addr *ha;
-	int alloc_size;
-
-	if (addr_len > MAX_ADDR_LEN)
-		return -EINVAL;
-
-	list_for_each_entry(ha, &list->list, list) {
-		if (!memcmp(ha->addr, addr, addr_len) &&
-		    ha->type == addr_type) {
-			ha->refcount++;
-			return 0;
-		}
-	}
-
-
-	alloc_size = sizeof(*ha);
-	if (alloc_size < L1_CACHE_BYTES)
-		alloc_size = L1_CACHE_BYTES;
-	ha = kmalloc(alloc_size, GFP_ATOMIC);
-	if (!ha)
-		return -ENOMEM;
-	memcpy(ha->addr, addr, addr_len);
-	ha->type = addr_type;
-	ha->refcount = 1;
-	ha->synced = false;
-	list_add_tail_rcu(&ha->list, &list->list);
-	list->count++;
-	return 0;
-}
-
-static void ha_rcu_free(struct rcu_head *head)
-{
-	struct netdev_hw_addr *ha;
-
-	ha = container_of(head, struct netdev_hw_addr, rcu_head);
-	kfree(ha);
-}
-
-static int __hw_addr_del(struct netdev_hw_addr_list *list, unsigned char *addr,
-			 int addr_len, unsigned char addr_type)
-{
-	struct netdev_hw_addr *ha;
-
-	list_for_each_entry(ha, &list->list, list) {
-		if (!memcmp(ha->addr, addr, addr_len) &&
-		    (ha->type == addr_type || !addr_type)) {
-			if (--ha->refcount)
-				return 0;
-			list_del_rcu(&ha->list);
-			call_rcu(&ha->rcu_head, ha_rcu_free);
-			list->count--;
-			return 0;
-		}
-	}
-	return -ENOENT;
-}
-
-static int __hw_addr_add_multiple(struct netdev_hw_addr_list *to_list,
-				  struct netdev_hw_addr_list *from_list,
-				  int addr_len,
-				  unsigned char addr_type)
-{
-	int err;
-	struct netdev_hw_addr *ha, *ha2;
-	unsigned char type;
-
-	list_for_each_entry(ha, &from_list->list, list) {
-		type = addr_type ? addr_type : ha->type;
-		err = __hw_addr_add(to_list, ha->addr, addr_len, type);
-		if (err)
-			goto unroll;
-	}
-	return 0;
-
-unroll:
-	list_for_each_entry(ha2, &from_list->list, list) {
-		if (ha2 == ha)
-			break;
-		type = addr_type ? addr_type : ha2->type;
-		__hw_addr_del(to_list, ha2->addr, addr_len, type);
-	}
-	return err;
-}
-
-static void __hw_addr_del_multiple(struct netdev_hw_addr_list *to_list,
-				   struct netdev_hw_addr_list *from_list,
-				   int addr_len,
-				   unsigned char addr_type)
-{
-	struct netdev_hw_addr *ha;
-	unsigned char type;
-
-	list_for_each_entry(ha, &from_list->list, list) {
-		type = addr_type ? addr_type : ha->type;
-		__hw_addr_del(to_list, ha->addr, addr_len, addr_type);
-	}
-}
-
-static int __hw_addr_sync(struct netdev_hw_addr_list *to_list,
-			  struct netdev_hw_addr_list *from_list,
-			  int addr_len)
-{
-	int err = 0;
-	struct netdev_hw_addr *ha, *tmp;
-
-	list_for_each_entry_safe(ha, tmp, &from_list->list, list) {
-		if (!ha->synced) {
-			err = __hw_addr_add(to_list, ha->addr,
-					    addr_len, ha->type);
-			if (err)
-				break;
-			ha->synced = true;
-			ha->refcount++;
-		} else if (ha->refcount == 1) {
-			__hw_addr_del(to_list, ha->addr, addr_len, ha->type);
-			__hw_addr_del(from_list, ha->addr, addr_len, ha->type);
-		}
-	}
-	return err;
-}
-
-static void __hw_addr_unsync(struct netdev_hw_addr_list *to_list,
-			     struct netdev_hw_addr_list *from_list,
-			     int addr_len)
-{
-	struct netdev_hw_addr *ha, *tmp;
-
-	list_for_each_entry_safe(ha, tmp, &from_list->list, list) {
-		if (ha->synced) {
-			__hw_addr_del(to_list, ha->addr,
-				      addr_len, ha->type);
-			ha->synced = false;
-			__hw_addr_del(from_list, ha->addr,
-				      addr_len, ha->type);
-		}
-	}
-}
-
-static void __hw_addr_flush(struct netdev_hw_addr_list *list)
-{
-	struct netdev_hw_addr *ha, *tmp;
-
-	list_for_each_entry_safe(ha, tmp, &list->list, list) {
-		list_del_rcu(&ha->list);
-		call_rcu(&ha->rcu_head, ha_rcu_free);
-	}
-	list->count = 0;
-}
-
-static void __hw_addr_init(struct netdev_hw_addr_list *list)
-{
-	INIT_LIST_HEAD(&list->list);
-	list->count = 0;
-}
-
-/* Device addresses handling functions */
-
-static void dev_addr_flush(struct net_device *dev)
-{
-	/* rtnl_mutex must be held here */
-
-	__hw_addr_flush(&dev->dev_addrs);
-	dev->dev_addr = NULL;
-}
-
-static int dev_addr_init(struct net_device *dev)
-{
-	unsigned char addr[MAX_ADDR_LEN];
-	struct netdev_hw_addr *ha;
-	int err;
-
-	/* rtnl_mutex must be held here */
-
-	__hw_addr_init(&dev->dev_addrs);
-	memset(addr, 0, sizeof(addr));
-	err = __hw_addr_add(&dev->dev_addrs, addr, sizeof(addr),
-			    NETDEV_HW_ADDR_T_LAN);
-	if (!err) {
-		/*
-		 * Get the first (previously created) address from the list
-		 * and set dev_addr pointer to this location.
-		 */
-		ha = list_first_entry(&dev->dev_addrs.list,
-				      struct netdev_hw_addr, list);
-		dev->dev_addr = ha->addr;
-	}
-	return err;
-}
-
-/**
- *	dev_addr_add	- Add a device address
- *	@dev: device
- *	@addr: address to add
- *	@addr_type: address type
- *
- *	Add a device address to the device or increase the reference count if
- *	it already exists.
- *
- *	The caller must hold the rtnl_mutex.
- */
-int dev_addr_add(struct net_device *dev, unsigned char *addr,
-		 unsigned char addr_type)
-{
-	int err;
-
-	ASSERT_RTNL();
-
-	err = __hw_addr_add(&dev->dev_addrs, addr, dev->addr_len, addr_type);
-	if (!err)
-		call_netdevice_notifiers(NETDEV_CHANGEADDR, dev);
-	return err;
-}
-EXPORT_SYMBOL(dev_addr_add);
-
-/**
- *	dev_addr_del	- Release a device address.
- *	@dev: device
- *	@addr: address to delete
- *	@addr_type: address type
- *
- *	Release reference to a device address and remove it from the device
- *	if the reference count drops to zero.
- *
- *	The caller must hold the rtnl_mutex.
- */
-int dev_addr_del(struct net_device *dev, unsigned char *addr,
-		 unsigned char addr_type)
-{
-	int err;
-	struct netdev_hw_addr *ha;
-
-	ASSERT_RTNL();
-
-	/*
-	 * We can not remove the first address from the list because
-	 * dev->dev_addr points to that.
-	 */
-	ha = list_first_entry(&dev->dev_addrs.list,
-			      struct netdev_hw_addr, list);
-	if (ha->addr == dev->dev_addr && ha->refcount == 1)
-		return -ENOENT;
-
-	err = __hw_addr_del(&dev->dev_addrs, addr, dev->addr_len,
-			    addr_type);
-	if (!err)
-		call_netdevice_notifiers(NETDEV_CHANGEADDR, dev);
-	return err;
-}
-EXPORT_SYMBOL(dev_addr_del);
-
-/**
- *	dev_addr_add_multiple	- Add device addresses from another device
- *	@to_dev: device to which addresses will be added
- *	@from_dev: device from which addresses will be added
- *	@addr_type: address type - 0 means type will be used from from_dev
- *
- *	Add device addresses of the one device to another.
- **
- *	The caller must hold the rtnl_mutex.
- */
-int dev_addr_add_multiple(struct net_device *to_dev,
-			  struct net_device *from_dev,
-			  unsigned char addr_type)
-{
-	int err;
-
-	ASSERT_RTNL();
-
-	if (from_dev->addr_len != to_dev->addr_len)
-		return -EINVAL;
-	err = __hw_addr_add_multiple(&to_dev->dev_addrs, &from_dev->dev_addrs,
-				     to_dev->addr_len, addr_type);
-	if (!err)
-		call_netdevice_notifiers(NETDEV_CHANGEADDR, to_dev);
-	return err;
-}
-EXPORT_SYMBOL(dev_addr_add_multiple);
-
-/**
- *	dev_addr_del_multiple	- Delete device addresses by another device
- *	@to_dev: device where the addresses will be deleted
- *	@from_dev: device by which addresses the addresses will be deleted
- *	@addr_type: address type - 0 means type will used from from_dev
- *
- *	Deletes addresses in to device by the list of addresses in from device.
- *
- *	The caller must hold the rtnl_mutex.
- */
-int dev_addr_del_multiple(struct net_device *to_dev,
-			  struct net_device *from_dev,
-			  unsigned char addr_type)
-{
-	ASSERT_RTNL();
-
-	if (from_dev->addr_len != to_dev->addr_len)
-		return -EINVAL;
-	__hw_addr_del_multiple(&to_dev->dev_addrs, &from_dev->dev_addrs,
-			       to_dev->addr_len, addr_type);
-	call_netdevice_notifiers(NETDEV_CHANGEADDR, to_dev);
-	return 0;
-}
-EXPORT_SYMBOL(dev_addr_del_multiple);
-
 /* multicast addresses handling functions */
 
 int __dev_addr_delete(struct dev_addr_list **list, int *count,
@@ -4336,57 +4028,6 @@ int __dev_addr_add(struct dev_addr_list **list, int *count,
 	return 0;
 }
 
-/**
- *	dev_unicast_delete	- Release secondary unicast address.
- *	@dev: device
- *	@addr: address to delete
- *
- *	Release reference to a secondary unicast address and remove it
- *	from the device if the reference count drops to zero.
- *
- * 	The caller must hold the rtnl_mutex.
- */
-int dev_unicast_delete(struct net_device *dev, void *addr)
-{
-	int err;
-
-	ASSERT_RTNL();
-
-	netif_addr_lock_bh(dev);
-	err = __hw_addr_del(&dev->uc, addr, dev->addr_len,
-			    NETDEV_HW_ADDR_T_UNICAST);
-	if (!err)
-		__dev_set_rx_mode(dev);
-	netif_addr_unlock_bh(dev);
-	return err;
-}
-EXPORT_SYMBOL(dev_unicast_delete);
-
-/**
- *	dev_unicast_add		- add a secondary unicast address
- *	@dev: device
- *	@addr: address to add
- *
- *	Add a secondary unicast address to the device or increase
- *	the reference count if it already exists.
- *
- *	The caller must hold the rtnl_mutex.
- */
-int dev_unicast_add(struct net_device *dev, void *addr)
-{
-	int err;
-
-	ASSERT_RTNL();
-
-	netif_addr_lock_bh(dev);
-	err = __hw_addr_add(&dev->uc, addr, dev->addr_len,
-			    NETDEV_HW_ADDR_T_UNICAST);
-	if (!err)
-		__dev_set_rx_mode(dev);
-	netif_addr_unlock_bh(dev);
-	return err;
-}
-EXPORT_SYMBOL(dev_unicast_add);
 
 int __dev_addr_sync(struct dev_addr_list **to, int *to_count,
 		    struct dev_addr_list **from, int *from_count)
@@ -4436,71 +4077,6 @@ void __dev_addr_unsync(struct dev_addr_list **to, int *to_count,
 }
 EXPORT_SYMBOL_GPL(__dev_addr_unsync);
 
-/**
- *	dev_unicast_sync - Synchronize device's unicast list to another device
- *	@to: destination device
- *	@from: source device
- *
- *	Add newly added addresses to the destination device and release
- *	addresses that have no users left. The source device must be
- *	locked by netif_tx_lock_bh.
- *
- *	This function is intended to be called from the dev->set_rx_mode
- *	function of layered software devices.
- */
-int dev_unicast_sync(struct net_device *to, struct net_device *from)
-{
-	int err = 0;
-
-	if (to->addr_len != from->addr_len)
-		return -EINVAL;
-
-	netif_addr_lock_bh(to);
-	err = __hw_addr_sync(&to->uc, &from->uc, to->addr_len);
-	if (!err)
-		__dev_set_rx_mode(to);
-	netif_addr_unlock_bh(to);
-	return err;
-}
-EXPORT_SYMBOL(dev_unicast_sync);
-
-/**
- *	dev_unicast_unsync - Remove synchronized addresses from the destination device
- *	@to: destination device
- *	@from: source device
- *
- *	Remove all addresses that were added to the destination device by
- *	dev_unicast_sync(). This function is intended to be called from the
- *	dev->stop function of layered software devices.
- */
-void dev_unicast_unsync(struct net_device *to, struct net_device *from)
-{
-	if (to->addr_len != from->addr_len)
-		return;
-
-	netif_addr_lock_bh(from);
-	netif_addr_lock(to);
-	__hw_addr_unsync(&to->uc, &from->uc, to->addr_len);
-	__dev_set_rx_mode(to);
-	netif_addr_unlock(to);
-	netif_addr_unlock_bh(from);
-}
-EXPORT_SYMBOL(dev_unicast_unsync);
-
-void dev_unicast_flush(struct net_device *dev)
-{
-	netif_addr_lock_bh(dev);
-	__hw_addr_flush(&dev->uc);
-	netif_addr_unlock_bh(dev);
-}
-EXPORT_SYMBOL(dev_unicast_flush);
-
-static void dev_unicast_init(struct net_device *dev)
-{
-	__hw_addr_init(&dev->uc);
-}
-
-
 static void __dev_addr_discard(struct dev_addr_list **list)
 {
 	struct dev_addr_list *tmp;
@@ -5153,7 +4729,7 @@ static void rollback_registered_many(struct list_head *head)
 		/*
 		 *	Flush the unicast and multicast chains
 		 */
-		dev_unicast_flush(dev);
+		dev_uc_flush(dev);
 		dev_addr_discard(dev);
 
 		if (dev->netdev_ops->ndo_uninit)
@@ -5734,7 +5310,7 @@ struct net_device *alloc_netdev_mq(int sizeof_priv, const char *name,
 	if (dev_addr_init(dev))
 		goto free_rx;
 
-	dev_unicast_init(dev);
+	dev_uc_init(dev);
 
 	dev_net_set(dev, &init_net);
 
@@ -5968,7 +5544,7 @@ int dev_change_net_namespace(struct net_device *dev, struct net *net, const char
 	/*
 	 *	Flush the unicast and multicast chains
 	 */
-	dev_unicast_flush(dev);
+	dev_uc_flush(dev);
 	dev_addr_discard(dev);
 
 	netdev_unregister_kobject(dev);
diff --git a/net/core/dev_addr_lists.c b/net/core/dev_addr_lists.c
new file mode 100644
index 000000000000..7e52b6d18add
--- /dev/null
+++ b/net/core/dev_addr_lists.c
@@ -0,0 +1,478 @@
+/*
+ * net/core/dev_addr_lists.c - Functions for handling net device lists
+ * Copyright (c) 2010 Jiri Pirko <jpirko@redhat.com>
+ *
+ * This file contains functions for working with unicast, multicast and device
+ * addresses lists.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#include <linux/netdevice.h>
+#include <linux/rtnetlink.h>
+#include <linux/list.h>
+
+/*
+ * General list handling functions
+ */
+
+static int __hw_addr_add(struct netdev_hw_addr_list *list, unsigned char *addr,
+			 int addr_len, unsigned char addr_type)
+{
+	struct netdev_hw_addr *ha;
+	int alloc_size;
+
+	if (addr_len > MAX_ADDR_LEN)
+		return -EINVAL;
+
+	list_for_each_entry(ha, &list->list, list) {
+		if (!memcmp(ha->addr, addr, addr_len) &&
+		    ha->type == addr_type) {
+			ha->refcount++;
+			return 0;
+		}
+	}
+
+
+	alloc_size = sizeof(*ha);
+	if (alloc_size < L1_CACHE_BYTES)
+		alloc_size = L1_CACHE_BYTES;
+	ha = kmalloc(alloc_size, GFP_ATOMIC);
+	if (!ha)
+		return -ENOMEM;
+	memcpy(ha->addr, addr, addr_len);
+	ha->type = addr_type;
+	ha->refcount = 1;
+	ha->synced = false;
+	list_add_tail_rcu(&ha->list, &list->list);
+	list->count++;
+	return 0;
+}
+
+static void ha_rcu_free(struct rcu_head *head)
+{
+	struct netdev_hw_addr *ha;
+
+	ha = container_of(head, struct netdev_hw_addr, rcu_head);
+	kfree(ha);
+}
+
+static int __hw_addr_del(struct netdev_hw_addr_list *list, unsigned char *addr,
+			 int addr_len, unsigned char addr_type)
+{
+	struct netdev_hw_addr *ha;
+
+	list_for_each_entry(ha, &list->list, list) {
+		if (!memcmp(ha->addr, addr, addr_len) &&
+		    (ha->type == addr_type || !addr_type)) {
+			if (--ha->refcount)
+				return 0;
+			list_del_rcu(&ha->list);
+			call_rcu(&ha->rcu_head, ha_rcu_free);
+			list->count--;
+			return 0;
+		}
+	}
+	return -ENOENT;
+}
+
+static int __hw_addr_add_multiple(struct netdev_hw_addr_list *to_list,
+				  struct netdev_hw_addr_list *from_list,
+				  int addr_len,
+				  unsigned char addr_type)
+{
+	int err;
+	struct netdev_hw_addr *ha, *ha2;
+	unsigned char type;
+
+	list_for_each_entry(ha, &from_list->list, list) {
+		type = addr_type ? addr_type : ha->type;
+		err = __hw_addr_add(to_list, ha->addr, addr_len, type);
+		if (err)
+			goto unroll;
+	}
+	return 0;
+
+unroll:
+	list_for_each_entry(ha2, &from_list->list, list) {
+		if (ha2 == ha)
+			break;
+		type = addr_type ? addr_type : ha2->type;
+		__hw_addr_del(to_list, ha2->addr, addr_len, type);
+	}
+	return err;
+}
+
+static void __hw_addr_del_multiple(struct netdev_hw_addr_list *to_list,
+				   struct netdev_hw_addr_list *from_list,
+				   int addr_len,
+				   unsigned char addr_type)
+{
+	struct netdev_hw_addr *ha;
+	unsigned char type;
+
+	list_for_each_entry(ha, &from_list->list, list) {
+		type = addr_type ? addr_type : ha->type;
+		__hw_addr_del(to_list, ha->addr, addr_len, addr_type);
+	}
+}
+
+static int __hw_addr_sync(struct netdev_hw_addr_list *to_list,
+			  struct netdev_hw_addr_list *from_list,
+			  int addr_len)
+{
+	int err = 0;
+	struct netdev_hw_addr *ha, *tmp;
+
+	list_for_each_entry_safe(ha, tmp, &from_list->list, list) {
+		if (!ha->synced) {
+			err = __hw_addr_add(to_list, ha->addr,
+					    addr_len, ha->type);
+			if (err)
+				break;
+			ha->synced = true;
+			ha->refcount++;
+		} else if (ha->refcount == 1) {
+			__hw_addr_del(to_list, ha->addr, addr_len, ha->type);
+			__hw_addr_del(from_list, ha->addr, addr_len, ha->type);
+		}
+	}
+	return err;
+}
+
+static void __hw_addr_unsync(struct netdev_hw_addr_list *to_list,
+			     struct netdev_hw_addr_list *from_list,
+			     int addr_len)
+{
+	struct netdev_hw_addr *ha, *tmp;
+
+	list_for_each_entry_safe(ha, tmp, &from_list->list, list) {
+		if (ha->synced) {
+			__hw_addr_del(to_list, ha->addr,
+				      addr_len, ha->type);
+			ha->synced = false;
+			__hw_addr_del(from_list, ha->addr,
+				      addr_len, ha->type);
+		}
+	}
+}
+
+static void __hw_addr_flush(struct netdev_hw_addr_list *list)
+{
+	struct netdev_hw_addr *ha, *tmp;
+
+	list_for_each_entry_safe(ha, tmp, &list->list, list) {
+		list_del_rcu(&ha->list);
+		call_rcu(&ha->rcu_head, ha_rcu_free);
+	}
+	list->count = 0;
+}
+
+static void __hw_addr_init(struct netdev_hw_addr_list *list)
+{
+	INIT_LIST_HEAD(&list->list);
+	list->count = 0;
+}
+
+/*
+ * Device addresses handling functions
+ */
+
+/**
+ *	dev_addr_flush - Flush device address list
+ *	@dev: device
+ *
+ *	Flush device address list and reset ->dev_addr.
+ *
+ *	The caller must hold the rtnl_mutex.
+ */
+void dev_addr_flush(struct net_device *dev)
+{
+	/* rtnl_mutex must be held here */
+
+	__hw_addr_flush(&dev->dev_addrs);
+	dev->dev_addr = NULL;
+}
+EXPORT_SYMBOL(dev_addr_flush);
+
+/**
+ *	dev_addr_init - Init device address list
+ *	@dev: device
+ *
+ *	Init device address list and create the first element,
+ *	used by ->dev_addr.
+ *
+ *	The caller must hold the rtnl_mutex.
+ */
+int dev_addr_init(struct net_device *dev)
+{
+	unsigned char addr[MAX_ADDR_LEN];
+	struct netdev_hw_addr *ha;
+	int err;
+
+	/* rtnl_mutex must be held here */
+
+	__hw_addr_init(&dev->dev_addrs);
+	memset(addr, 0, sizeof(addr));
+	err = __hw_addr_add(&dev->dev_addrs, addr, sizeof(addr),
+			    NETDEV_HW_ADDR_T_LAN);
+	if (!err) {
+		/*
+		 * Get the first (previously created) address from the list
+		 * and set dev_addr pointer to this location.
+		 */
+		ha = list_first_entry(&dev->dev_addrs.list,
+				      struct netdev_hw_addr, list);
+		dev->dev_addr = ha->addr;
+	}
+	return err;
+}
+EXPORT_SYMBOL(dev_addr_init);
+
+/**
+ *	dev_addr_add - Add a device address
+ *	@dev: device
+ *	@addr: address to add
+ *	@addr_type: address type
+ *
+ *	Add a device address to the device or increase the reference count if
+ *	it already exists.
+ *
+ *	The caller must hold the rtnl_mutex.
+ */
+int dev_addr_add(struct net_device *dev, unsigned char *addr,
+		 unsigned char addr_type)
+{
+	int err;
+
+	ASSERT_RTNL();
+
+	err = __hw_addr_add(&dev->dev_addrs, addr, dev->addr_len, addr_type);
+	if (!err)
+		call_netdevice_notifiers(NETDEV_CHANGEADDR, dev);
+	return err;
+}
+EXPORT_SYMBOL(dev_addr_add);
+
+/**
+ *	dev_addr_del - Release a device address.
+ *	@dev: device
+ *	@addr: address to delete
+ *	@addr_type: address type
+ *
+ *	Release reference to a device address and remove it from the device
+ *	if the reference count drops to zero.
+ *
+ *	The caller must hold the rtnl_mutex.
+ */
+int dev_addr_del(struct net_device *dev, unsigned char *addr,
+		 unsigned char addr_type)
+{
+	int err;
+	struct netdev_hw_addr *ha;
+
+	ASSERT_RTNL();
+
+	/*
+	 * We can not remove the first address from the list because
+	 * dev->dev_addr points to that.
+	 */
+	ha = list_first_entry(&dev->dev_addrs.list,
+			      struct netdev_hw_addr, list);
+	if (ha->addr == dev->dev_addr && ha->refcount == 1)
+		return -ENOENT;
+
+	err = __hw_addr_del(&dev->dev_addrs, addr, dev->addr_len,
+			    addr_type);
+	if (!err)
+		call_netdevice_notifiers(NETDEV_CHANGEADDR, dev);
+	return err;
+}
+EXPORT_SYMBOL(dev_addr_del);
+
+/**
+ *	dev_addr_add_multiple - Add device addresses from another device
+ *	@to_dev: device to which addresses will be added
+ *	@from_dev: device from which addresses will be added
+ *	@addr_type: address type - 0 means type will be used from from_dev
+ *
+ *	Add device addresses of the one device to another.
+ **
+ *	The caller must hold the rtnl_mutex.
+ */
+int dev_addr_add_multiple(struct net_device *to_dev,
+			  struct net_device *from_dev,
+			  unsigned char addr_type)
+{
+	int err;
+
+	ASSERT_RTNL();
+
+	if (from_dev->addr_len != to_dev->addr_len)
+		return -EINVAL;
+	err = __hw_addr_add_multiple(&to_dev->dev_addrs, &from_dev->dev_addrs,
+				     to_dev->addr_len, addr_type);
+	if (!err)
+		call_netdevice_notifiers(NETDEV_CHANGEADDR, to_dev);
+	return err;
+}
+EXPORT_SYMBOL(dev_addr_add_multiple);
+
+/**
+ *	dev_addr_del_multiple - Delete device addresses by another device
+ *	@to_dev: device where the addresses will be deleted
+ *	@from_dev: device by which addresses the addresses will be deleted
+ *	@addr_type: address type - 0 means type will used from from_dev
+ *
+ *	Deletes addresses in to device by the list of addresses in from device.
+ *
+ *	The caller must hold the rtnl_mutex.
+ */
+int dev_addr_del_multiple(struct net_device *to_dev,
+			  struct net_device *from_dev,
+			  unsigned char addr_type)
+{
+	ASSERT_RTNL();
+
+	if (from_dev->addr_len != to_dev->addr_len)
+		return -EINVAL;
+	__hw_addr_del_multiple(&to_dev->dev_addrs, &from_dev->dev_addrs,
+			       to_dev->addr_len, addr_type);
+	call_netdevice_notifiers(NETDEV_CHANGEADDR, to_dev);
+	return 0;
+}
+EXPORT_SYMBOL(dev_addr_del_multiple);
+
+/*
+ * Unicast list handling functions
+ */
+
+/**
+ *	dev_uc_add - Add a secondary unicast address
+ *	@dev: device
+ *	@addr: address to add
+ *
+ *	Add a secondary unicast address to the device or increase
+ *	the reference count if it already exists.
+ */
+int dev_uc_add(struct net_device *dev, unsigned char *addr)
+{
+	int err;
+
+	netif_addr_lock_bh(dev);
+	err = __hw_addr_add(&dev->uc, addr, dev->addr_len,
+			    NETDEV_HW_ADDR_T_UNICAST);
+	if (!err)
+		__dev_set_rx_mode(dev);
+	netif_addr_unlock_bh(dev);
+	return err;
+}
+EXPORT_SYMBOL(dev_uc_add);
+
+/**
+ *	dev_uc_del - Release secondary unicast address.
+ *	@dev: device
+ *	@addr: address to delete
+ *
+ *	Release reference to a secondary unicast address and remove it
+ *	from the device if the reference count drops to zero.
+ */
+int dev_uc_del(struct net_device *dev, unsigned char *addr)
+{
+	int err;
+
+	netif_addr_lock_bh(dev);
+	err = __hw_addr_del(&dev->uc, addr, dev->addr_len,
+			    NETDEV_HW_ADDR_T_UNICAST);
+	if (!err)
+		__dev_set_rx_mode(dev);
+	netif_addr_unlock_bh(dev);
+	return err;
+}
+EXPORT_SYMBOL(dev_uc_del);
+
+/**
+ *	dev_uc_sync - Synchronize device's unicast list to another device
+ *	@to: destination device
+ *	@from: source device
+ *
+ *	Add newly added addresses to the destination device and release
+ *	addresses that have no users left. The source device must be
+ *	locked by netif_tx_lock_bh.
+ *
+ *	This function is intended to be called from the dev->set_rx_mode
+ *	function of layered software devices.
+ */
+int dev_uc_sync(struct net_device *to, struct net_device *from)
+{
+	int err = 0;
+
+	if (to->addr_len != from->addr_len)
+		return -EINVAL;
+
+	netif_addr_lock_bh(to);
+	err = __hw_addr_sync(&to->uc, &from->uc, to->addr_len);
+	if (!err)
+		__dev_set_rx_mode(to);
+	netif_addr_unlock_bh(to);
+	return err;
+}
+EXPORT_SYMBOL(dev_uc_sync);
+
+/**
+ *	dev_uc_unsync - Remove synchronized addresses from the destination device
+ *	@to: destination device
+ *	@from: source device
+ *
+ *	Remove all addresses that were added to the destination device by
+ *	dev_uc_sync(). This function is intended to be called from the
+ *	dev->stop function of layered software devices.
+ */
+void dev_uc_unsync(struct net_device *to, struct net_device *from)
+{
+	if (to->addr_len != from->addr_len)
+		return;
+
+	netif_addr_lock_bh(from);
+	netif_addr_lock(to);
+	__hw_addr_unsync(&to->uc, &from->uc, to->addr_len);
+	__dev_set_rx_mode(to);
+	netif_addr_unlock(to);
+	netif_addr_unlock_bh(from);
+}
+EXPORT_SYMBOL(dev_uc_unsync);
+
+/**
+ *	dev_uc_flush - Flush unicast addresses
+ *	@dev: device
+ *
+ *	Flush unicast addresses.
+ */
+void dev_uc_flush(struct net_device *dev)
+{
+	netif_addr_lock_bh(dev);
+	__hw_addr_flush(&dev->uc);
+	netif_addr_unlock_bh(dev);
+}
+EXPORT_SYMBOL(dev_uc_flush);
+
+/**
+ *	dev_uc_flush - Init unicast address list
+ *	@dev: device
+ *
+ *	Init unicast address list.
+ */
+void dev_uc_init(struct net_device *dev)
+{
+	__hw_addr_init(&dev->uc);
+}
+EXPORT_SYMBOL(dev_uc_init);
+
+/*
+ * Multicast list handling functions
+ */
+
+/* To be filled here */
diff --git a/net/dsa/slave.c b/net/dsa/slave.c
index 2175e6d5cc8d..8fdca56bb08f 100644
--- a/net/dsa/slave.c
+++ b/net/dsa/slave.c
@@ -67,7 +67,7 @@ static int dsa_slave_open(struct net_device *dev)
 		return -ENETDOWN;
 
 	if (compare_ether_addr(dev->dev_addr, master->dev_addr)) {
-		err = dev_unicast_add(master, dev->dev_addr);
+		err = dev_uc_add(master, dev->dev_addr);
 		if (err < 0)
 			goto out;
 	}
@@ -90,7 +90,7 @@ clear_allmulti:
 		dev_set_allmulti(master, -1);
 del_unicast:
 	if (compare_ether_addr(dev->dev_addr, master->dev_addr))
-		dev_unicast_delete(master, dev->dev_addr);
+		dev_uc_del(master, dev->dev_addr);
 out:
 	return err;
 }
@@ -101,14 +101,14 @@ static int dsa_slave_close(struct net_device *dev)
 	struct net_device *master = p->parent->dst->master_netdev;
 
 	dev_mc_unsync(master, dev);
-	dev_unicast_unsync(master, dev);
+	dev_uc_unsync(master, dev);
 	if (dev->flags & IFF_ALLMULTI)
 		dev_set_allmulti(master, -1);
 	if (dev->flags & IFF_PROMISC)
 		dev_set_promiscuity(master, -1);
 
 	if (compare_ether_addr(dev->dev_addr, master->dev_addr))
-		dev_unicast_delete(master, dev->dev_addr);
+		dev_uc_del(master, dev->dev_addr);
 
 	return 0;
 }
@@ -130,7 +130,7 @@ static void dsa_slave_set_rx_mode(struct net_device *dev)
 	struct net_device *master = p->parent->dst->master_netdev;
 
 	dev_mc_sync(master, dev);
-	dev_unicast_sync(master, dev);
+	dev_uc_sync(master, dev);
 }
 
 static int dsa_slave_set_mac_address(struct net_device *dev, void *a)
@@ -147,13 +147,13 @@ static int dsa_slave_set_mac_address(struct net_device *dev, void *a)
 		goto out;
 
 	if (compare_ether_addr(addr->sa_data, master->dev_addr)) {
-		err = dev_unicast_add(master, addr->sa_data);
+		err = dev_uc_add(master, addr->sa_data);
 		if (err < 0)
 			return err;
 	}
 
 	if (compare_ether_addr(dev->dev_addr, master->dev_addr))
-		dev_unicast_delete(master, dev->dev_addr);
+		dev_uc_del(master, dev->dev_addr);
 
 out:
 	memcpy(dev->dev_addr, addr->sa_data, ETH_ALEN);
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index 1612d417d10c..48c1e0ae565f 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -1705,9 +1705,9 @@ static int packet_dev_mc(struct net_device *dev, struct packet_mclist *i,
 		if (i->alen != dev->addr_len)
 			return -EINVAL;
 		if (what > 0)
-			return dev_unicast_add(dev, i->addr);
+			return dev_uc_add(dev, i->addr);
 		else
-			return dev_unicast_delete(dev, i->addr);
+			return dev_uc_del(dev, i->addr);
 		break;
 	default:
 		break;
-- 
cgit v1.2.2


From 22bedad3ce112d5ca1eaf043d4990fa2ed698c87 Mon Sep 17 00:00:00 2001
From: Jiri Pirko <jpirko@redhat.com>
Date: Thu, 1 Apr 2010 21:22:57 +0000
Subject: net: convert multicast list to list_head

Converts the list and the core manipulating with it to be the same as uc_list.

+uses two functions for adding/removing mc address (normal and "global"
 variant) instead of a function parameter.
+removes dev_mcast.c completely.
+exposes netdev_hw_addr_list_* macros along with __hw_addr_* functions for
 manipulation with lists on a sandbox (used in bonding and 80211 drivers)

Signed-off-by: Jiri Pirko <jpirko@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/802/garp.c                     |   4 +-
 net/appletalk/ddp.c                |   2 +-
 net/bluetooth/bnep/netdev.c        |   8 +-
 net/core/Makefile                  |   5 +-
 net/core/dev.c                     | 145 +-----------------
 net/core/dev_addr_lists.c          | 305 ++++++++++++++++++++++++++++++++++---
 net/core/dev_mcast.c               | 232 ----------------------------
 net/decnet/dn_dev.c                |  12 +-
 net/ipv4/igmp.c                    |   4 +-
 net/ipv4/netfilter/ipt_CLUSTERIP.c |   4 +-
 net/ipv6/mcast.c                   |   4 +-
 net/mac80211/driver-ops.h          |   8 +-
 net/mac80211/ieee80211_i.h         |   3 +-
 net/mac80211/iface.c               |   6 +-
 net/mac80211/main.c                |   2 +-
 net/packet/af_packet.c             |   4 +-
 16 files changed, 319 insertions(+), 429 deletions(-)
 delete mode 100644 net/core/dev_mcast.c

(limited to 'net')

diff --git a/net/802/garp.c b/net/802/garp.c
index 1dcb0660c49d..78cff9ec2cb4 100644
--- a/net/802/garp.c
+++ b/net/802/garp.c
@@ -575,7 +575,7 @@ int garp_init_applicant(struct net_device *dev, struct garp_application *appl)
 	if (!app)
 		goto err2;
 
-	err = dev_mc_add(dev, appl->proto.group_address, ETH_ALEN, 0);
+	err = dev_mc_add(dev, appl->proto.group_address);
 	if (err < 0)
 		goto err3;
 
@@ -615,7 +615,7 @@ void garp_uninit_applicant(struct net_device *dev, struct garp_application *appl
 	garp_pdu_queue(app);
 	garp_queue_xmit(app);
 
-	dev_mc_delete(dev, appl->proto.group_address, ETH_ALEN, 0);
+	dev_mc_del(dev, appl->proto.group_address);
 	kfree(app);
 	garp_release_port(dev);
 }
diff --git a/net/appletalk/ddp.c b/net/appletalk/ddp.c
index 9fc4da56fb1d..1d15a60b23af 100644
--- a/net/appletalk/ddp.c
+++ b/net/appletalk/ddp.c
@@ -781,7 +781,7 @@ static int atif_ioctl(int cmd, void __user *arg)
 						atrtr_create(&rtdef, dev);
 					}
 			}
-			dev_mc_add(dev, aarp_mcast, 6, 1);
+			dev_mc_add_global(dev, aarp_mcast);
 			return 0;
 
 		case SIOCGIFADDR:
diff --git a/net/bluetooth/bnep/netdev.c b/net/bluetooth/bnep/netdev.c
index 326ab453edb7..260a9507e542 100644
--- a/net/bluetooth/bnep/netdev.c
+++ b/net/bluetooth/bnep/netdev.c
@@ -87,7 +87,7 @@ static void bnep_net_set_mc_list(struct net_device *dev)
 		memcpy(__skb_put(skb, ETH_ALEN), dev->broadcast, ETH_ALEN);
 		r->len = htons(ETH_ALEN * 2);
 	} else {
-		struct dev_mc_list *dmi;
+		struct netdev_hw_addr *ha;
 		int i, len = skb->len;
 
 		if (dev->flags & IFF_BROADCAST) {
@@ -98,11 +98,11 @@ static void bnep_net_set_mc_list(struct net_device *dev)
 		/* FIXME: We should group addresses here. */
 
 		i = 0;
-		netdev_for_each_mc_addr(dmi, dev) {
+		netdev_for_each_mc_addr(ha, dev) {
 			if (i == BNEP_MAX_MULTICAST_FILTERS)
 				break;
-			memcpy(__skb_put(skb, ETH_ALEN), dmi->dmi_addr, ETH_ALEN);
-			memcpy(__skb_put(skb, ETH_ALEN), dmi->dmi_addr, ETH_ALEN);
+			memcpy(__skb_put(skb, ETH_ALEN), ha->addr, ETH_ALEN);
+			memcpy(__skb_put(skb, ETH_ALEN), ha->addr, ETH_ALEN);
 		}
 		r->len = htons(skb->len - len);
 	}
diff --git a/net/core/Makefile b/net/core/Makefile
index 0a899f1aadb9..51c3eec850ef 100644
--- a/net/core/Makefile
+++ b/net/core/Makefile
@@ -7,9 +7,8 @@ obj-y := sock.o request_sock.o skbuff.o iovec.o datagram.o stream.o scm.o \
 
 obj-$(CONFIG_SYSCTL) += sysctl_net_core.o
 
-obj-y		     += dev.o ethtool.o dev_mcast.o dst.o netevent.o \
-			neighbour.o rtnetlink.o utils.o link_watch.o filter.o \
-			dev_addr_lists.o
+obj-y		     += dev.o ethtool.o dev_addr_lists.o dst.o netevent.o \
+			neighbour.o rtnetlink.o utils.o link_watch.o filter.o
 
 obj-$(CONFIG_XFRM) += flow.o
 obj-y += net-sysfs.o
diff --git a/net/core/dev.c b/net/core/dev.c
index 949c62dba719..2a9b7dd0bb6e 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -3968,140 +3968,6 @@ void dev_set_rx_mode(struct net_device *dev)
 	netif_addr_unlock_bh(dev);
 }
 
-/* multicast addresses handling functions */
-
-int __dev_addr_delete(struct dev_addr_list **list, int *count,
-		      void *addr, int alen, int glbl)
-{
-	struct dev_addr_list *da;
-
-	for (; (da = *list) != NULL; list = &da->next) {
-		if (memcmp(da->da_addr, addr, da->da_addrlen) == 0 &&
-		    alen == da->da_addrlen) {
-			if (glbl) {
-				int old_glbl = da->da_gusers;
-				da->da_gusers = 0;
-				if (old_glbl == 0)
-					break;
-			}
-			if (--da->da_users)
-				return 0;
-
-			*list = da->next;
-			kfree(da);
-			(*count)--;
-			return 0;
-		}
-	}
-	return -ENOENT;
-}
-
-int __dev_addr_add(struct dev_addr_list **list, int *count,
-		   void *addr, int alen, int glbl)
-{
-	struct dev_addr_list *da;
-
-	for (da = *list; da != NULL; da = da->next) {
-		if (memcmp(da->da_addr, addr, da->da_addrlen) == 0 &&
-		    da->da_addrlen == alen) {
-			if (glbl) {
-				int old_glbl = da->da_gusers;
-				da->da_gusers = 1;
-				if (old_glbl)
-					return 0;
-			}
-			da->da_users++;
-			return 0;
-		}
-	}
-
-	da = kzalloc(sizeof(*da), GFP_ATOMIC);
-	if (da == NULL)
-		return -ENOMEM;
-	memcpy(da->da_addr, addr, alen);
-	da->da_addrlen = alen;
-	da->da_users = 1;
-	da->da_gusers = glbl ? 1 : 0;
-	da->next = *list;
-	*list = da;
-	(*count)++;
-	return 0;
-}
-
-
-int __dev_addr_sync(struct dev_addr_list **to, int *to_count,
-		    struct dev_addr_list **from, int *from_count)
-{
-	struct dev_addr_list *da, *next;
-	int err = 0;
-
-	da = *from;
-	while (da != NULL) {
-		next = da->next;
-		if (!da->da_synced) {
-			err = __dev_addr_add(to, to_count,
-					     da->da_addr, da->da_addrlen, 0);
-			if (err < 0)
-				break;
-			da->da_synced = 1;
-			da->da_users++;
-		} else if (da->da_users == 1) {
-			__dev_addr_delete(to, to_count,
-					  da->da_addr, da->da_addrlen, 0);
-			__dev_addr_delete(from, from_count,
-					  da->da_addr, da->da_addrlen, 0);
-		}
-		da = next;
-	}
-	return err;
-}
-EXPORT_SYMBOL_GPL(__dev_addr_sync);
-
-void __dev_addr_unsync(struct dev_addr_list **to, int *to_count,
-		       struct dev_addr_list **from, int *from_count)
-{
-	struct dev_addr_list *da, *next;
-
-	da = *from;
-	while (da != NULL) {
-		next = da->next;
-		if (da->da_synced) {
-			__dev_addr_delete(to, to_count,
-					  da->da_addr, da->da_addrlen, 0);
-			da->da_synced = 0;
-			__dev_addr_delete(from, from_count,
-					  da->da_addr, da->da_addrlen, 0);
-		}
-		da = next;
-	}
-}
-EXPORT_SYMBOL_GPL(__dev_addr_unsync);
-
-static void __dev_addr_discard(struct dev_addr_list **list)
-{
-	struct dev_addr_list *tmp;
-
-	while (*list != NULL) {
-		tmp = *list;
-		*list = tmp->next;
-		if (tmp->da_users > tmp->da_gusers)
-			printk("__dev_addr_discard: address leakage! "
-			       "da_users=%d\n", tmp->da_users);
-		kfree(tmp);
-	}
-}
-
-void dev_addr_discard(struct net_device *dev)
-{
-	netif_addr_lock_bh(dev);
-
-	__dev_addr_discard(&dev->mc_list);
-	netdev_mc_count(dev) = 0;
-
-	netif_addr_unlock_bh(dev);
-}
-EXPORT_SYMBOL(dev_addr_discard);
-
 /**
  *	dev_get_flags - get flags reported to userspace
  *	@dev: device
@@ -4412,8 +4278,7 @@ static int dev_ifsioc(struct net *net, struct ifreq *ifr, unsigned int cmd)
 			return -EINVAL;
 		if (!netif_device_present(dev))
 			return -ENODEV;
-		return dev_mc_add(dev, ifr->ifr_hwaddr.sa_data,
-				  dev->addr_len, 1);
+		return dev_mc_add_global(dev, ifr->ifr_hwaddr.sa_data);
 
 	case SIOCDELMULTI:
 		if ((!ops->ndo_set_multicast_list && !ops->ndo_set_rx_mode) ||
@@ -4421,8 +4286,7 @@ static int dev_ifsioc(struct net *net, struct ifreq *ifr, unsigned int cmd)
 			return -EINVAL;
 		if (!netif_device_present(dev))
 			return -ENODEV;
-		return dev_mc_delete(dev, ifr->ifr_hwaddr.sa_data,
-				     dev->addr_len, 1);
+		return dev_mc_del_global(dev, ifr->ifr_hwaddr.sa_data);
 
 	case SIOCSIFTXQLEN:
 		if (ifr->ifr_qlen < 0)
@@ -4730,7 +4594,7 @@ static void rollback_registered_many(struct list_head *head)
 		 *	Flush the unicast and multicast chains
 		 */
 		dev_uc_flush(dev);
-		dev_addr_discard(dev);
+		dev_mc_flush(dev);
 
 		if (dev->netdev_ops->ndo_uninit)
 			dev->netdev_ops->ndo_uninit(dev);
@@ -5310,6 +5174,7 @@ struct net_device *alloc_netdev_mq(int sizeof_priv, const char *name,
 	if (dev_addr_init(dev))
 		goto free_rx;
 
+	dev_mc_init(dev);
 	dev_uc_init(dev);
 
 	dev_net_set(dev, &init_net);
@@ -5545,7 +5410,7 @@ int dev_change_net_namespace(struct net_device *dev, struct net *net, const char
 	 *	Flush the unicast and multicast chains
 	 */
 	dev_uc_flush(dev);
-	dev_addr_discard(dev);
+	dev_mc_flush(dev);
 
 	netdev_unregister_kobject(dev);
 
diff --git a/net/core/dev_addr_lists.c b/net/core/dev_addr_lists.c
index 7e52b6d18add..37d5975e18a3 100644
--- a/net/core/dev_addr_lists.c
+++ b/net/core/dev_addr_lists.c
@@ -19,8 +19,9 @@
  * General list handling functions
  */
 
-static int __hw_addr_add(struct netdev_hw_addr_list *list, unsigned char *addr,
-			 int addr_len, unsigned char addr_type)
+static int __hw_addr_add_ex(struct netdev_hw_addr_list *list,
+			    unsigned char *addr, int addr_len,
+			    unsigned char addr_type, bool global)
 {
 	struct netdev_hw_addr *ha;
 	int alloc_size;
@@ -31,6 +32,13 @@ static int __hw_addr_add(struct netdev_hw_addr_list *list, unsigned char *addr,
 	list_for_each_entry(ha, &list->list, list) {
 		if (!memcmp(ha->addr, addr, addr_len) &&
 		    ha->type == addr_type) {
+			if (global) {
+				/* check if addr is already used as global */
+				if (ha->global_use)
+					return 0;
+				else
+					ha->global_use = true;
+			}
 			ha->refcount++;
 			return 0;
 		}
@@ -46,12 +54,19 @@ static int __hw_addr_add(struct netdev_hw_addr_list *list, unsigned char *addr,
 	memcpy(ha->addr, addr, addr_len);
 	ha->type = addr_type;
 	ha->refcount = 1;
+	ha->global_use = global;
 	ha->synced = false;
 	list_add_tail_rcu(&ha->list, &list->list);
 	list->count++;
 	return 0;
 }
 
+static int __hw_addr_add(struct netdev_hw_addr_list *list, unsigned char *addr,
+			 int addr_len, unsigned char addr_type)
+{
+	return __hw_addr_add_ex(list, addr, addr_len, addr_type, false);
+}
+
 static void ha_rcu_free(struct rcu_head *head)
 {
 	struct netdev_hw_addr *ha;
@@ -60,14 +75,21 @@ static void ha_rcu_free(struct rcu_head *head)
 	kfree(ha);
 }
 
-static int __hw_addr_del(struct netdev_hw_addr_list *list, unsigned char *addr,
-			 int addr_len, unsigned char addr_type)
+static int __hw_addr_del_ex(struct netdev_hw_addr_list *list,
+			    unsigned char *addr, int addr_len,
+			    unsigned char addr_type, bool global)
 {
 	struct netdev_hw_addr *ha;
 
 	list_for_each_entry(ha, &list->list, list) {
 		if (!memcmp(ha->addr, addr, addr_len) &&
 		    (ha->type == addr_type || !addr_type)) {
+			if (global) {
+				if (!ha->global_use)
+					break;
+				else
+					ha->global_use = false;
+			}
 			if (--ha->refcount)
 				return 0;
 			list_del_rcu(&ha->list);
@@ -79,10 +101,15 @@ static int __hw_addr_del(struct netdev_hw_addr_list *list, unsigned char *addr,
 	return -ENOENT;
 }
 
-static int __hw_addr_add_multiple(struct netdev_hw_addr_list *to_list,
-				  struct netdev_hw_addr_list *from_list,
-				  int addr_len,
-				  unsigned char addr_type)
+static int __hw_addr_del(struct netdev_hw_addr_list *list, unsigned char *addr,
+			 int addr_len, unsigned char addr_type)
+{
+	return __hw_addr_del_ex(list, addr, addr_len, addr_type, false);
+}
+
+int __hw_addr_add_multiple(struct netdev_hw_addr_list *to_list,
+			   struct netdev_hw_addr_list *from_list,
+			   int addr_len, unsigned char addr_type)
 {
 	int err;
 	struct netdev_hw_addr *ha, *ha2;
@@ -105,11 +132,11 @@ unroll:
 	}
 	return err;
 }
+EXPORT_SYMBOL(__hw_addr_add_multiple);
 
-static void __hw_addr_del_multiple(struct netdev_hw_addr_list *to_list,
-				   struct netdev_hw_addr_list *from_list,
-				   int addr_len,
-				   unsigned char addr_type)
+void __hw_addr_del_multiple(struct netdev_hw_addr_list *to_list,
+			    struct netdev_hw_addr_list *from_list,
+			    int addr_len, unsigned char addr_type)
 {
 	struct netdev_hw_addr *ha;
 	unsigned char type;
@@ -119,10 +146,11 @@ static void __hw_addr_del_multiple(struct netdev_hw_addr_list *to_list,
 		__hw_addr_del(to_list, ha->addr, addr_len, addr_type);
 	}
 }
+EXPORT_SYMBOL(__hw_addr_del_multiple);
 
-static int __hw_addr_sync(struct netdev_hw_addr_list *to_list,
-			  struct netdev_hw_addr_list *from_list,
-			  int addr_len)
+int __hw_addr_sync(struct netdev_hw_addr_list *to_list,
+		   struct netdev_hw_addr_list *from_list,
+		   int addr_len)
 {
 	int err = 0;
 	struct netdev_hw_addr *ha, *tmp;
@@ -142,10 +170,11 @@ static int __hw_addr_sync(struct netdev_hw_addr_list *to_list,
 	}
 	return err;
 }
+EXPORT_SYMBOL(__hw_addr_sync);
 
-static void __hw_addr_unsync(struct netdev_hw_addr_list *to_list,
-			     struct netdev_hw_addr_list *from_list,
-			     int addr_len)
+void __hw_addr_unsync(struct netdev_hw_addr_list *to_list,
+		      struct netdev_hw_addr_list *from_list,
+		      int addr_len)
 {
 	struct netdev_hw_addr *ha, *tmp;
 
@@ -159,8 +188,9 @@ static void __hw_addr_unsync(struct netdev_hw_addr_list *to_list,
 		}
 	}
 }
+EXPORT_SYMBOL(__hw_addr_unsync);
 
-static void __hw_addr_flush(struct netdev_hw_addr_list *list)
+void __hw_addr_flush(struct netdev_hw_addr_list *list)
 {
 	struct netdev_hw_addr *ha, *tmp;
 
@@ -170,12 +200,14 @@ static void __hw_addr_flush(struct netdev_hw_addr_list *list)
 	}
 	list->count = 0;
 }
+EXPORT_SYMBOL(__hw_addr_flush);
 
-static void __hw_addr_init(struct netdev_hw_addr_list *list)
+void __hw_addr_init(struct netdev_hw_addr_list *list)
 {
 	INIT_LIST_HEAD(&list->list);
 	list->count = 0;
 }
+EXPORT_SYMBOL(__hw_addr_init);
 
 /*
  * Device addresses handling functions
@@ -475,4 +507,235 @@ EXPORT_SYMBOL(dev_uc_init);
  * Multicast list handling functions
  */
 
-/* To be filled here */
+static int __dev_mc_add(struct net_device *dev, unsigned char *addr,
+			bool global)
+{
+	int err;
+
+	netif_addr_lock_bh(dev);
+	err = __hw_addr_add_ex(&dev->mc, addr, dev->addr_len,
+			       NETDEV_HW_ADDR_T_MULTICAST, global);
+	if (!err)
+		__dev_set_rx_mode(dev);
+	netif_addr_unlock_bh(dev);
+	return err;
+}
+/**
+ *	dev_mc_add - Add a multicast address
+ *	@dev: device
+ *	@addr: address to add
+ *
+ *	Add a multicast address to the device or increase
+ *	the reference count if it already exists.
+ */
+int dev_mc_add(struct net_device *dev, unsigned char *addr)
+{
+	return __dev_mc_add(dev, addr, false);
+}
+EXPORT_SYMBOL(dev_mc_add);
+
+/**
+ *	dev_mc_add_global - Add a global multicast address
+ *	@dev: device
+ *	@addr: address to add
+ *
+ *	Add a global multicast address to the device.
+ */
+int dev_mc_add_global(struct net_device *dev, unsigned char *addr)
+{
+	return __dev_mc_add(dev, addr, true);
+}
+EXPORT_SYMBOL(dev_mc_add_global);
+
+static int __dev_mc_del(struct net_device *dev, unsigned char *addr,
+			bool global)
+{
+	int err;
+
+	netif_addr_lock_bh(dev);
+	err = __hw_addr_del_ex(&dev->mc, addr, dev->addr_len,
+			       NETDEV_HW_ADDR_T_MULTICAST, global);
+	if (!err)
+		__dev_set_rx_mode(dev);
+	netif_addr_unlock_bh(dev);
+	return err;
+}
+
+/**
+ *	dev_mc_del - Delete a multicast address.
+ *	@dev: device
+ *	@addr: address to delete
+ *
+ *	Release reference to a multicast address and remove it
+ *	from the device if the reference count drops to zero.
+ */
+int dev_mc_del(struct net_device *dev, unsigned char *addr)
+{
+	return __dev_mc_del(dev, addr, false);
+}
+EXPORT_SYMBOL(dev_mc_del);
+
+/**
+ *	dev_mc_del_global - Delete a global multicast address.
+ *	@dev: device
+ *	@addr: address to delete
+ *
+ *	Release reference to a multicast address and remove it
+ *	from the device if the reference count drops to zero.
+ */
+int dev_mc_del_global(struct net_device *dev, unsigned char *addr)
+{
+	return __dev_mc_del(dev, addr, true);
+}
+EXPORT_SYMBOL(dev_mc_del_global);
+
+/**
+ *	dev_mc_sync - Synchronize device's unicast list to another device
+ *	@to: destination device
+ *	@from: source device
+ *
+ *	Add newly added addresses to the destination device and release
+ *	addresses that have no users left. The source device must be
+ *	locked by netif_tx_lock_bh.
+ *
+ *	This function is intended to be called from the dev->set_multicast_list
+ *	or dev->set_rx_mode function of layered software devices.
+ */
+int dev_mc_sync(struct net_device *to, struct net_device *from)
+{
+	int err = 0;
+
+	if (to->addr_len != from->addr_len)
+		return -EINVAL;
+
+	netif_addr_lock_bh(to);
+	err = __hw_addr_sync(&to->mc, &from->mc, to->addr_len);
+	if (!err)
+		__dev_set_rx_mode(to);
+	netif_addr_unlock_bh(to);
+	return err;
+}
+EXPORT_SYMBOL(dev_mc_sync);
+
+/**
+ *	dev_mc_unsync - Remove synchronized addresses from the destination device
+ *	@to: destination device
+ *	@from: source device
+ *
+ *	Remove all addresses that were added to the destination device by
+ *	dev_mc_sync(). This function is intended to be called from the
+ *	dev->stop function of layered software devices.
+ */
+void dev_mc_unsync(struct net_device *to, struct net_device *from)
+{
+	if (to->addr_len != from->addr_len)
+		return;
+
+	netif_addr_lock_bh(from);
+	netif_addr_lock(to);
+	__hw_addr_unsync(&to->mc, &from->mc, to->addr_len);
+	__dev_set_rx_mode(to);
+	netif_addr_unlock(to);
+	netif_addr_unlock_bh(from);
+}
+EXPORT_SYMBOL(dev_mc_unsync);
+
+/**
+ *	dev_mc_flush - Flush multicast addresses
+ *	@dev: device
+ *
+ *	Flush multicast addresses.
+ */
+void dev_mc_flush(struct net_device *dev)
+{
+	netif_addr_lock_bh(dev);
+	__hw_addr_flush(&dev->mc);
+	netif_addr_unlock_bh(dev);
+}
+EXPORT_SYMBOL(dev_mc_flush);
+
+/**
+ *	dev_mc_flush - Init multicast address list
+ *	@dev: device
+ *
+ *	Init multicast address list.
+ */
+void dev_mc_init(struct net_device *dev)
+{
+	__hw_addr_init(&dev->mc);
+}
+EXPORT_SYMBOL(dev_mc_init);
+
+#ifdef CONFIG_PROC_FS
+#include <linux/proc_fs.h>
+#include <linux/seq_file.h>
+
+static int dev_mc_seq_show(struct seq_file *seq, void *v)
+{
+	struct netdev_hw_addr *ha;
+	struct net_device *dev = v;
+
+	if (v == SEQ_START_TOKEN)
+		return 0;
+
+	netif_addr_lock_bh(dev);
+	netdev_for_each_mc_addr(ha, dev) {
+		int i;
+
+		seq_printf(seq, "%-4d %-15s %-5d %-5d ", dev->ifindex,
+			   dev->name, ha->refcount, ha->global_use);
+
+		for (i = 0; i < dev->addr_len; i++)
+			seq_printf(seq, "%02x", ha->addr[i]);
+
+		seq_putc(seq, '\n');
+	}
+	netif_addr_unlock_bh(dev);
+	return 0;
+}
+
+static const struct seq_operations dev_mc_seq_ops = {
+	.start = dev_seq_start,
+	.next  = dev_seq_next,
+	.stop  = dev_seq_stop,
+	.show  = dev_mc_seq_show,
+};
+
+static int dev_mc_seq_open(struct inode *inode, struct file *file)
+{
+	return seq_open_net(inode, file, &dev_mc_seq_ops,
+			    sizeof(struct seq_net_private));
+}
+
+static const struct file_operations dev_mc_seq_fops = {
+	.owner	 = THIS_MODULE,
+	.open    = dev_mc_seq_open,
+	.read    = seq_read,
+	.llseek  = seq_lseek,
+	.release = seq_release_net,
+};
+
+#endif
+
+static int __net_init dev_mc_net_init(struct net *net)
+{
+	if (!proc_net_fops_create(net, "dev_mcast", 0, &dev_mc_seq_fops))
+		return -ENOMEM;
+	return 0;
+}
+
+static void __net_exit dev_mc_net_exit(struct net *net)
+{
+	proc_net_remove(net, "dev_mcast");
+}
+
+static struct pernet_operations __net_initdata dev_mc_net_ops = {
+	.init = dev_mc_net_init,
+	.exit = dev_mc_net_exit,
+};
+
+void __init dev_mcast_init(void)
+{
+	register_pernet_subsys(&dev_mc_net_ops);
+}
+
diff --git a/net/core/dev_mcast.c b/net/core/dev_mcast.c
deleted file mode 100644
index 3dc295beb483..000000000000
--- a/net/core/dev_mcast.c
+++ /dev/null
@@ -1,232 +0,0 @@
-/*
- *	Linux NET3:	Multicast List maintenance.
- *
- *	Authors:
- *		Tim Kordas <tjk@nostromo.eeap.cwru.edu>
- *		Richard Underwood <richard@wuzz.demon.co.uk>
- *
- *	Stir fried together from the IP multicast and CAP patches above
- *		Alan Cox <alan@lxorguk.ukuu.org.uk>
- *
- *	Fixes:
- *		Alan Cox	:	Update the device on a real delete
- *					rather than any time but...
- *		Alan Cox	:	IFF_ALLMULTI support.
- *		Alan Cox	: 	New format set_multicast_list() calls.
- *		Gleb Natapov    :       Remove dev_mc_lock.
- *
- *	This program is free software; you can redistribute it and/or
- *	modify it under the terms of the GNU General Public License
- *	as published by the Free Software Foundation; either version
- *	2 of the License, or (at your option) any later version.
- */
-
-#include <linux/module.h>
-#include <asm/uaccess.h>
-#include <asm/system.h>
-#include <linux/bitops.h>
-#include <linux/types.h>
-#include <linux/kernel.h>
-#include <linux/string.h>
-#include <linux/mm.h>
-#include <linux/socket.h>
-#include <linux/sockios.h>
-#include <linux/in.h>
-#include <linux/errno.h>
-#include <linux/interrupt.h>
-#include <linux/if_ether.h>
-#include <linux/inet.h>
-#include <linux/netdevice.h>
-#include <linux/etherdevice.h>
-#include <linux/proc_fs.h>
-#include <linux/seq_file.h>
-#include <linux/init.h>
-#include <net/net_namespace.h>
-#include <net/ip.h>
-#include <net/route.h>
-#include <linux/skbuff.h>
-#include <net/sock.h>
-#include <net/arp.h>
-
-
-/*
- *	Device multicast list maintenance.
- *
- *	This is used both by IP and by the user level maintenance functions.
- *	Unlike BSD we maintain a usage count on a given multicast address so
- *	that a casual user application can add/delete multicasts used by
- *	protocols without doing damage to the protocols when it deletes the
- *	entries. It also helps IP as it tracks overlapping maps.
- *
- *	Device mc lists are changed by bh at least if IPv6 is enabled,
- *	so that it must be bh protected.
- *
- *	We block accesses to device mc filters with netif_tx_lock.
- */
-
-/*
- *	Delete a device level multicast
- */
-
-int dev_mc_delete(struct net_device *dev, void *addr, int alen, int glbl)
-{
-	int err;
-
-	netif_addr_lock_bh(dev);
-	err = __dev_addr_delete(&dev->mc_list, &dev->mc_count,
-				addr, alen, glbl);
-	if (!err) {
-		/*
-		 *	We have altered the list, so the card
-		 *	loaded filter is now wrong. Fix it
-		 */
-
-		__dev_set_rx_mode(dev);
-	}
-	netif_addr_unlock_bh(dev);
-	return err;
-}
-
-/*
- *	Add a device level multicast
- */
-
-int dev_mc_add(struct net_device *dev, void *addr, int alen, int glbl)
-{
-	int err;
-
-	netif_addr_lock_bh(dev);
-	if (alen != dev->addr_len)
-		err = -EINVAL;
-	else
-		err = __dev_addr_add(&dev->mc_list, &dev->mc_count, addr, alen, glbl);
-	if (!err)
-		__dev_set_rx_mode(dev);
-	netif_addr_unlock_bh(dev);
-	return err;
-}
-
-/**
- *	dev_mc_sync	- Synchronize device's multicast list to another device
- *	@to: destination device
- *	@from: source device
- *
- * 	Add newly added addresses to the destination device and release
- * 	addresses that have no users left. The source device must be
- * 	locked by netif_tx_lock_bh.
- *
- *	This function is intended to be called from the dev->set_multicast_list
- *	or dev->set_rx_mode function of layered software devices.
- */
-int dev_mc_sync(struct net_device *to, struct net_device *from)
-{
-	int err = 0;
-
-	netif_addr_lock_bh(to);
-	err = __dev_addr_sync(&to->mc_list, &to->mc_count,
-			      &from->mc_list, &from->mc_count);
-	if (!err)
-		__dev_set_rx_mode(to);
-	netif_addr_unlock_bh(to);
-
-	return err;
-}
-EXPORT_SYMBOL(dev_mc_sync);
-
-
-/**
- * 	dev_mc_unsync	- Remove synchronized addresses from the destination
- * 			  device
- *	@to: destination device
- *	@from: source device
- *
- * 	Remove all addresses that were added to the destination device by
- * 	dev_mc_sync(). This function is intended to be called from the
- * 	dev->stop function of layered software devices.
- */
-void dev_mc_unsync(struct net_device *to, struct net_device *from)
-{
-	netif_addr_lock_bh(from);
-	netif_addr_lock(to);
-
-	__dev_addr_unsync(&to->mc_list, &to->mc_count,
-			  &from->mc_list, &from->mc_count);
-	__dev_set_rx_mode(to);
-
-	netif_addr_unlock(to);
-	netif_addr_unlock_bh(from);
-}
-EXPORT_SYMBOL(dev_mc_unsync);
-
-#ifdef CONFIG_PROC_FS
-static int dev_mc_seq_show(struct seq_file *seq, void *v)
-{
-	struct dev_addr_list *m;
-	struct net_device *dev = v;
-
-	if (v == SEQ_START_TOKEN)
-		return 0;
-
-	netif_addr_lock_bh(dev);
-	for (m = dev->mc_list; m; m = m->next) {
-		int i;
-
-		seq_printf(seq, "%-4d %-15s %-5d %-5d ", dev->ifindex,
-			   dev->name, m->dmi_users, m->dmi_gusers);
-
-		for (i = 0; i < m->dmi_addrlen; i++)
-			seq_printf(seq, "%02x", m->dmi_addr[i]);
-
-		seq_putc(seq, '\n');
-	}
-	netif_addr_unlock_bh(dev);
-	return 0;
-}
-
-static const struct seq_operations dev_mc_seq_ops = {
-	.start = dev_seq_start,
-	.next  = dev_seq_next,
-	.stop  = dev_seq_stop,
-	.show  = dev_mc_seq_show,
-};
-
-static int dev_mc_seq_open(struct inode *inode, struct file *file)
-{
-	return seq_open_net(inode, file, &dev_mc_seq_ops,
-			    sizeof(struct seq_net_private));
-}
-
-static const struct file_operations dev_mc_seq_fops = {
-	.owner	 = THIS_MODULE,
-	.open    = dev_mc_seq_open,
-	.read    = seq_read,
-	.llseek  = seq_lseek,
-	.release = seq_release_net,
-};
-
-#endif
-
-static int __net_init dev_mc_net_init(struct net *net)
-{
-	if (!proc_net_fops_create(net, "dev_mcast", 0, &dev_mc_seq_fops))
-		return -ENOMEM;
-	return 0;
-}
-
-static void __net_exit dev_mc_net_exit(struct net *net)
-{
-	proc_net_remove(net, "dev_mcast");
-}
-
-static struct pernet_operations __net_initdata dev_mc_net_ops = {
-	.init = dev_mc_net_init,
-	.exit = dev_mc_net_exit,
-};
-
-void __init dev_mcast_init(void)
-{
-	register_pernet_subsys(&dev_mc_net_ops);
-}
-
-EXPORT_SYMBOL(dev_mc_add);
-EXPORT_SYMBOL(dev_mc_delete);
diff --git a/net/decnet/dn_dev.c b/net/decnet/dn_dev.c
index 238af093495b..f3e4734d207f 100644
--- a/net/decnet/dn_dev.c
+++ b/net/decnet/dn_dev.c
@@ -349,7 +349,7 @@ static void dn_dev_del_ifa(struct dn_dev *dn_db, struct dn_ifaddr **ifap, int de
 	if (dn_db->dev->type == ARPHRD_ETHER) {
 		if (ifa1->ifa_local != dn_eth2dn(dev->dev_addr)) {
 			dn_dn2eth(mac_addr, ifa1->ifa_local);
-			dev_mc_delete(dev, mac_addr, ETH_ALEN, 0);
+			dev_mc_del(dev, mac_addr);
 		}
 	}
 
@@ -380,7 +380,7 @@ static int dn_dev_insert_ifa(struct dn_dev *dn_db, struct dn_ifaddr *ifa)
 	if (dev->type == ARPHRD_ETHER) {
 		if (ifa->ifa_local != dn_eth2dn(dev->dev_addr)) {
 			dn_dn2eth(mac_addr, ifa->ifa_local);
-			dev_mc_add(dev, mac_addr, ETH_ALEN, 0);
+			dev_mc_add(dev, mac_addr);
 		}
 	}
 
@@ -1000,9 +1000,9 @@ static int dn_eth_up(struct net_device *dev)
 	struct dn_dev *dn_db = dev->dn_ptr;
 
 	if (dn_db->parms.forwarding == 0)
-		dev_mc_add(dev, dn_rt_all_end_mcast, ETH_ALEN, 0);
+		dev_mc_add(dev, dn_rt_all_end_mcast);
 	else
-		dev_mc_add(dev, dn_rt_all_rt_mcast, ETH_ALEN, 0);
+		dev_mc_add(dev, dn_rt_all_rt_mcast);
 
 	dn_db->use_long = 1;
 
@@ -1014,9 +1014,9 @@ static void dn_eth_down(struct net_device *dev)
 	struct dn_dev *dn_db = dev->dn_ptr;
 
 	if (dn_db->parms.forwarding == 0)
-		dev_mc_delete(dev, dn_rt_all_end_mcast, ETH_ALEN, 0);
+		dev_mc_del(dev, dn_rt_all_end_mcast);
 	else
-		dev_mc_delete(dev, dn_rt_all_rt_mcast, ETH_ALEN, 0);
+		dev_mc_del(dev, dn_rt_all_rt_mcast);
 }
 
 static void dn_dev_set_timer(struct net_device *dev);
diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c
index 63bf298ca109..51824c42b775 100644
--- a/net/ipv4/igmp.c
+++ b/net/ipv4/igmp.c
@@ -997,7 +997,7 @@ static void ip_mc_filter_add(struct in_device *in_dev, __be32 addr)
 	   --ANK
 	   */
 	if (arp_mc_map(addr, buf, dev, 0) == 0)
-		dev_mc_add(dev, buf, dev->addr_len, 0);
+		dev_mc_add(dev, buf);
 }
 
 /*
@@ -1010,7 +1010,7 @@ static void ip_mc_filter_del(struct in_device *in_dev, __be32 addr)
 	struct net_device *dev = in_dev->dev;
 
 	if (arp_mc_map(addr, buf, dev, 0) == 0)
-		dev_mc_delete(dev, buf, dev->addr_len, 0);
+		dev_mc_del(dev, buf);
 }
 
 #ifdef CONFIG_IP_MULTICAST
diff --git a/net/ipv4/netfilter/ipt_CLUSTERIP.c b/net/ipv4/netfilter/ipt_CLUSTERIP.c
index 0886f96c736b..a2208b7b313d 100644
--- a/net/ipv4/netfilter/ipt_CLUSTERIP.c
+++ b/net/ipv4/netfilter/ipt_CLUSTERIP.c
@@ -87,7 +87,7 @@ clusterip_config_entry_put(struct clusterip_config *c)
 		list_del(&c->list);
 		write_unlock_bh(&clusterip_lock);
 
-		dev_mc_delete(c->dev, c->clustermac, ETH_ALEN, 0);
+		dev_mc_del(c->dev, c->clustermac);
 		dev_put(c->dev);
 
 		/* In case anyone still accesses the file, the open/close
@@ -396,7 +396,7 @@ static bool clusterip_tg_check(const struct xt_tgchk_param *par)
 				dev_put(dev);
 				return false;
 			}
-			dev_mc_add(config->dev,config->clustermac, ETH_ALEN, 0);
+			dev_mc_add(config->dev, config->clustermac);
 		}
 	}
 	cipinfo->config = config;
diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c
index bcd971915969..37d1868c0064 100644
--- a/net/ipv6/mcast.c
+++ b/net/ipv6/mcast.c
@@ -714,7 +714,7 @@ static void igmp6_group_added(struct ifmcaddr6 *mc)
 	if (!(mc->mca_flags&MAF_LOADED)) {
 		mc->mca_flags |= MAF_LOADED;
 		if (ndisc_mc_map(&mc->mca_addr, buf, dev, 0) == 0)
-			dev_mc_add(dev, buf, dev->addr_len, 0);
+			dev_mc_add(dev, buf);
 	}
 	spin_unlock_bh(&mc->mca_lock);
 
@@ -740,7 +740,7 @@ static void igmp6_group_dropped(struct ifmcaddr6 *mc)
 	if (mc->mca_flags&MAF_LOADED) {
 		mc->mca_flags &= ~MAF_LOADED;
 		if (ndisc_mc_map(&mc->mca_addr, buf, dev, 0) == 0)
-			dev_mc_delete(dev, buf, dev->addr_len, 0);
+			dev_mc_del(dev, buf);
 	}
 
 	if (mc->mca_flags & MAF_NOREPORT)
diff --git a/net/mac80211/driver-ops.h b/net/mac80211/driver-ops.h
index c3d844093a2f..9179196da264 100644
--- a/net/mac80211/driver-ops.h
+++ b/net/mac80211/driver-ops.h
@@ -84,16 +84,14 @@ static inline void drv_bss_info_changed(struct ieee80211_local *local,
 }
 
 static inline u64 drv_prepare_multicast(struct ieee80211_local *local,
-					int mc_count,
-					struct dev_addr_list *mc_list)
+					struct netdev_hw_addr_list *mc_list)
 {
 	u64 ret = 0;
 
 	if (local->ops->prepare_multicast)
-		ret = local->ops->prepare_multicast(&local->hw, mc_count,
-						    mc_list);
+		ret = local->ops->prepare_multicast(&local->hw, mc_list);
 
-	trace_drv_prepare_multicast(local, mc_count, ret);
+	trace_drv_prepare_multicast(local, mc_list->count, ret);
 
 	return ret;
 }
diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index ab369e2a5282..7fdacf9408b1 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -646,8 +646,7 @@ struct ieee80211_local {
 	struct work_struct recalc_smps;
 
 	/* aggregated multicast list */
-	struct dev_addr_list *mc_list;
-	int mc_count;
+	struct netdev_hw_addr_list mc_list;
 
 	bool tim_in_locked_section; /* see ieee80211_beacon_get() */
 
diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c
index b4ec59a8dc03..00f3a93c6b04 100644
--- a/net/mac80211/iface.c
+++ b/net/mac80211/iface.c
@@ -412,8 +412,7 @@ static int ieee80211_stop(struct net_device *dev)
 
 	netif_addr_lock_bh(dev);
 	spin_lock_bh(&local->filter_lock);
-	__dev_addr_unsync(&local->mc_list, &local->mc_count,
-			  &dev->mc_list, &dev->mc_count);
+	__hw_addr_unsync(&local->mc_list, &dev->mc, dev->addr_len);
 	spin_unlock_bh(&local->filter_lock);
 	netif_addr_unlock_bh(dev);
 
@@ -596,8 +595,7 @@ static void ieee80211_set_multicast_list(struct net_device *dev)
 		sdata->flags ^= IEEE80211_SDATA_PROMISC;
 	}
 	spin_lock_bh(&local->filter_lock);
-	__dev_addr_sync(&local->mc_list, &local->mc_count,
-			&dev->mc_list, &dev->mc_count);
+	__hw_addr_sync(&local->mc_list, &dev->mc, dev->addr_len);
 	spin_unlock_bh(&local->filter_lock);
 	ieee80211_queue_work(&local->hw, &local->reconfig_filter);
 }
diff --git a/net/mac80211/main.c b/net/mac80211/main.c
index 06c33b68d8e5..84ad249a4e2e 100644
--- a/net/mac80211/main.c
+++ b/net/mac80211/main.c
@@ -71,7 +71,7 @@ void ieee80211_configure_filter(struct ieee80211_local *local)
 	spin_lock_bh(&local->filter_lock);
 	changed_flags = local->filter_flags ^ new_flags;
 
-	mc = drv_prepare_multicast(local, local->mc_count, local->mc_list);
+	mc = drv_prepare_multicast(local, &local->mc_list);
 	spin_unlock_bh(&local->filter_lock);
 
 	/* be a bit nasty */
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index 48c1e0ae565f..b0f037cc899c 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -1691,9 +1691,9 @@ static int packet_dev_mc(struct net_device *dev, struct packet_mclist *i,
 		if (i->alen != dev->addr_len)
 			return -EINVAL;
 		if (what > 0)
-			return dev_mc_add(dev, i->addr, i->alen, 0);
+			return dev_mc_add(dev, i->addr);
 		else
-			return dev_mc_delete(dev, i->addr, i->alen, 0);
+			return dev_mc_del(dev, i->addr);
 		break;
 	case PACKET_MR_PROMISC:
 		return dev_set_promiscuity(dev, what);
-- 
cgit v1.2.2


From 21b4aaa14329db793832e865f15000c5c0192ac3 Mon Sep 17 00:00:00 2001
From: James Chapman <jchapman@katalix.com>
Date: Fri, 2 Apr 2010 06:18:28 +0000
Subject: l2tp: Relocate pppol2tp driver to new net/l2tp directory

This patch moves the existing pppol2tp driver from drivers/net into a
new net/l2tp directory, which is where the upcoming L2TPv3 code will
live. The existing CONFIG_PPPOL2TP config option is left in its
current place to avoid "make oldconfig" issues when an existing
pppol2tp user takes this change. (This is the same approach used for
the pppoatm driver, which moved to net/atm.)

There are no code changes. The existing drivers/net/pppol2tp.c is
simply moved to net/l2tp.

Signed-off-by: James Chapman <jchapman@katalix.com>
Reviewed-by: Randy Dunlap <randy.dunlap@oracle.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/Makefile        |    1 +
 net/l2tp/Makefile   |    5 +
 net/l2tp/pppol2tp.c | 2680 +++++++++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 2686 insertions(+)
 create mode 100644 net/l2tp/Makefile
 create mode 100644 net/l2tp/pppol2tp.c

(limited to 'net')

diff --git a/net/Makefile b/net/Makefile
index a5eae27aa42d..13ca77e0eb08 100644
--- a/net/Makefile
+++ b/net/Makefile
@@ -40,6 +40,7 @@ obj-$(CONFIG_BT)		+= bluetooth/
 obj-$(CONFIG_SUNRPC)		+= sunrpc/
 obj-$(CONFIG_AF_RXRPC)		+= rxrpc/
 obj-$(CONFIG_ATM)		+= atm/
+obj-$(CONFIG_PPPOL2TP)		+= l2tp/
 obj-$(CONFIG_DECNET)		+= decnet/
 obj-$(CONFIG_ECONET)		+= econet/
 obj-$(CONFIG_PHONET)		+= phonet/
diff --git a/net/l2tp/Makefile b/net/l2tp/Makefile
new file mode 100644
index 000000000000..9af41e898a04
--- /dev/null
+++ b/net/l2tp/Makefile
@@ -0,0 +1,5 @@
+#
+# Makefile for the L2TP.
+#
+
+obj-$(CONFIG_PPPOL2TP) += pppol2tp.o
diff --git a/net/l2tp/pppol2tp.c b/net/l2tp/pppol2tp.c
new file mode 100644
index 000000000000..449a9825200d
--- /dev/null
+++ b/net/l2tp/pppol2tp.c
@@ -0,0 +1,2680 @@
+/*****************************************************************************
+ * Linux PPP over L2TP (PPPoX/PPPoL2TP) Sockets
+ *
+ * PPPoX    --- Generic PPP encapsulation socket family
+ * PPPoL2TP --- PPP over L2TP (RFC 2661)
+ *
+ * Version:	1.0.0
+ *
+ * Authors:	Martijn van Oosterhout <kleptog@svana.org>
+ *		James Chapman (jchapman@katalix.com)
+ * Contributors:
+ *		Michal Ostrowski <mostrows@speakeasy.net>
+ *		Arnaldo Carvalho de Melo <acme@xconectiva.com.br>
+ *		David S. Miller (davem@redhat.com)
+ *
+ * License:
+ *		This program is free software; you can redistribute it and/or
+ *		modify it under the terms of the GNU General Public License
+ *		as published by the Free Software Foundation; either version
+ *		2 of the License, or (at your option) any later version.
+ *
+ */
+
+/* This driver handles only L2TP data frames; control frames are handled by a
+ * userspace application.
+ *
+ * To send data in an L2TP session, userspace opens a PPPoL2TP socket and
+ * attaches it to a bound UDP socket with local tunnel_id / session_id and
+ * peer tunnel_id / session_id set. Data can then be sent or received using
+ * regular socket sendmsg() / recvmsg() calls. Kernel parameters of the socket
+ * can be read or modified using ioctl() or [gs]etsockopt() calls.
+ *
+ * When a PPPoL2TP socket is connected with local and peer session_id values
+ * zero, the socket is treated as a special tunnel management socket.
+ *
+ * Here's example userspace code to create a socket for sending/receiving data
+ * over an L2TP session:-
+ *
+ *	struct sockaddr_pppol2tp sax;
+ *	int fd;
+ *	int session_fd;
+ *
+ *	fd = socket(AF_PPPOX, SOCK_DGRAM, PX_PROTO_OL2TP);
+ *
+ *	sax.sa_family = AF_PPPOX;
+ *	sax.sa_protocol = PX_PROTO_OL2TP;
+ *	sax.pppol2tp.fd = tunnel_fd;	// bound UDP socket
+ *	sax.pppol2tp.addr.sin_addr.s_addr = addr->sin_addr.s_addr;
+ *	sax.pppol2tp.addr.sin_port = addr->sin_port;
+ *	sax.pppol2tp.addr.sin_family = AF_INET;
+ *	sax.pppol2tp.s_tunnel  = tunnel_id;
+ *	sax.pppol2tp.s_session = session_id;
+ *	sax.pppol2tp.d_tunnel  = peer_tunnel_id;
+ *	sax.pppol2tp.d_session = peer_session_id;
+ *
+ *	session_fd = connect(fd, (struct sockaddr *)&sax, sizeof(sax));
+ *
+ * A pppd plugin that allows PPP traffic to be carried over L2TP using
+ * this driver is available from the OpenL2TP project at
+ * http://openl2tp.sourceforge.net.
+ */
+
+#include <linux/module.h>
+#include <linux/string.h>
+#include <linux/list.h>
+#include <asm/uaccess.h>
+
+#include <linux/kernel.h>
+#include <linux/spinlock.h>
+#include <linux/kthread.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/errno.h>
+#include <linux/jiffies.h>
+
+#include <linux/netdevice.h>
+#include <linux/net.h>
+#include <linux/inetdevice.h>
+#include <linux/skbuff.h>
+#include <linux/init.h>
+#include <linux/ip.h>
+#include <linux/udp.h>
+#include <linux/if_pppox.h>
+#include <linux/if_pppol2tp.h>
+#include <net/sock.h>
+#include <linux/ppp_channel.h>
+#include <linux/ppp_defs.h>
+#include <linux/if_ppp.h>
+#include <linux/file.h>
+#include <linux/hash.h>
+#include <linux/sort.h>
+#include <linux/proc_fs.h>
+#include <linux/nsproxy.h>
+#include <net/net_namespace.h>
+#include <net/netns/generic.h>
+#include <net/dst.h>
+#include <net/ip.h>
+#include <net/udp.h>
+#include <net/xfrm.h>
+
+#include <asm/byteorder.h>
+#include <asm/atomic.h>
+
+
+#define PPPOL2TP_DRV_VERSION	"V1.0"
+
+/* L2TP header constants */
+#define L2TP_HDRFLAG_T	   0x8000
+#define L2TP_HDRFLAG_L	   0x4000
+#define L2TP_HDRFLAG_S	   0x0800
+#define L2TP_HDRFLAG_O	   0x0200
+#define L2TP_HDRFLAG_P	   0x0100
+
+#define L2TP_HDR_VER_MASK  0x000F
+#define L2TP_HDR_VER	   0x0002
+
+/* Space for UDP, L2TP and PPP headers */
+#define PPPOL2TP_HEADER_OVERHEAD	40
+
+/* Just some random numbers */
+#define L2TP_TUNNEL_MAGIC	0x42114DDA
+#define L2TP_SESSION_MAGIC	0x0C04EB7D
+
+#define PPPOL2TP_HASH_BITS	4
+#define PPPOL2TP_HASH_SIZE	(1 << PPPOL2TP_HASH_BITS)
+
+/* Default trace flags */
+#define PPPOL2TP_DEFAULT_DEBUG_FLAGS	0
+
+#define PRINTK(_mask, _type, _lvl, _fmt, args...)			\
+	do {								\
+		if ((_mask) & (_type))					\
+			printk(_lvl "PPPOL2TP: " _fmt, ##args);		\
+	} while(0)
+
+/* Number of bytes to build transmit L2TP headers.
+ * Unfortunately the size is different depending on whether sequence numbers
+ * are enabled.
+ */
+#define PPPOL2TP_L2TP_HDR_SIZE_SEQ		10
+#define PPPOL2TP_L2TP_HDR_SIZE_NOSEQ		6
+
+struct pppol2tp_tunnel;
+
+/* Describes a session. It is the sk_user_data field in the PPPoL2TP
+ * socket. Contains information to determine incoming packets and transmit
+ * outgoing ones.
+ */
+struct pppol2tp_session
+{
+	int			magic;		/* should be
+						 * L2TP_SESSION_MAGIC */
+	int			owner;		/* pid that opened the socket */
+
+	struct sock		*sock;		/* Pointer to the session
+						 * PPPoX socket */
+	struct sock		*tunnel_sock;	/* Pointer to the tunnel UDP
+						 * socket */
+
+	struct pppol2tp_addr	tunnel_addr;	/* Description of tunnel */
+
+	struct pppol2tp_tunnel	*tunnel;	/* back pointer to tunnel
+						 * context */
+
+	char			name[20];	/* "sess xxxxx/yyyyy", where
+						 * x=tunnel_id, y=session_id */
+	int			mtu;
+	int			mru;
+	int			flags;		/* accessed by PPPIOCGFLAGS.
+						 * Unused. */
+	unsigned		recv_seq:1;	/* expect receive packets with
+						 * sequence numbers? */
+	unsigned		send_seq:1;	/* send packets with sequence
+						 * numbers? */
+	unsigned		lns_mode:1;	/* behave as LNS? LAC enables
+						 * sequence numbers under
+						 * control of LNS. */
+	int			debug;		/* bitmask of debug message
+						 * categories */
+	int			reorder_timeout; /* configured reorder timeout
+						  * (in jiffies) */
+	u16			nr;		/* session NR state (receive) */
+	u16			ns;		/* session NR state (send) */
+	struct sk_buff_head	reorder_q;	/* receive reorder queue */
+	struct pppol2tp_ioc_stats stats;
+	struct hlist_node	hlist;		/* Hash list node */
+};
+
+/* The sk_user_data field of the tunnel's UDP socket. It contains info to track
+ * all the associated sessions so incoming packets can be sorted out
+ */
+struct pppol2tp_tunnel
+{
+	int			magic;		/* Should be L2TP_TUNNEL_MAGIC */
+	rwlock_t		hlist_lock;	/* protect session_hlist */
+	struct hlist_head	session_hlist[PPPOL2TP_HASH_SIZE];
+						/* hashed list of sessions,
+						 * hashed by id */
+	int			debug;		/* bitmask of debug message
+						 * categories */
+	char			name[12];	/* "tunl xxxxx" */
+	struct pppol2tp_ioc_stats stats;
+
+	void (*old_sk_destruct)(struct sock *);
+
+	struct sock		*sock;		/* Parent socket */
+	struct list_head	list;		/* Keep a list of all open
+						 * prepared sockets */
+	struct net		*pppol2tp_net;	/* the net we belong to */
+
+	atomic_t		ref_count;
+};
+
+/* Private data stored for received packets in the skb.
+ */
+struct pppol2tp_skb_cb {
+	u16			ns;
+	u16			nr;
+	u16			has_seq;
+	u16			length;
+	unsigned long		expires;
+};
+
+#define PPPOL2TP_SKB_CB(skb)	((struct pppol2tp_skb_cb *) &skb->cb[sizeof(struct inet_skb_parm)])
+
+static int pppol2tp_xmit(struct ppp_channel *chan, struct sk_buff *skb);
+static void pppol2tp_tunnel_free(struct pppol2tp_tunnel *tunnel);
+
+static atomic_t pppol2tp_tunnel_count;
+static atomic_t pppol2tp_session_count;
+static struct ppp_channel_ops pppol2tp_chan_ops = { pppol2tp_xmit , NULL };
+static const struct proto_ops pppol2tp_ops;
+
+/* per-net private data for this module */
+static int pppol2tp_net_id __read_mostly;
+struct pppol2tp_net {
+	struct list_head pppol2tp_tunnel_list;
+	rwlock_t pppol2tp_tunnel_list_lock;
+};
+
+static inline struct pppol2tp_net *pppol2tp_pernet(struct net *net)
+{
+	BUG_ON(!net);
+
+	return net_generic(net, pppol2tp_net_id);
+}
+
+/* Helpers to obtain tunnel/session contexts from sockets.
+ */
+static inline struct pppol2tp_session *pppol2tp_sock_to_session(struct sock *sk)
+{
+	struct pppol2tp_session *session;
+
+	if (sk == NULL)
+		return NULL;
+
+	sock_hold(sk);
+	session = (struct pppol2tp_session *)(sk->sk_user_data);
+	if (session == NULL) {
+		sock_put(sk);
+		goto out;
+	}
+
+	BUG_ON(session->magic != L2TP_SESSION_MAGIC);
+out:
+	return session;
+}
+
+static inline struct pppol2tp_tunnel *pppol2tp_sock_to_tunnel(struct sock *sk)
+{
+	struct pppol2tp_tunnel *tunnel;
+
+	if (sk == NULL)
+		return NULL;
+
+	sock_hold(sk);
+	tunnel = (struct pppol2tp_tunnel *)(sk->sk_user_data);
+	if (tunnel == NULL) {
+		sock_put(sk);
+		goto out;
+	}
+
+	BUG_ON(tunnel->magic != L2TP_TUNNEL_MAGIC);
+out:
+	return tunnel;
+}
+
+/* Tunnel reference counts. Incremented per session that is added to
+ * the tunnel.
+ */
+static inline void pppol2tp_tunnel_inc_refcount(struct pppol2tp_tunnel *tunnel)
+{
+	atomic_inc(&tunnel->ref_count);
+}
+
+static inline void pppol2tp_tunnel_dec_refcount(struct pppol2tp_tunnel *tunnel)
+{
+	if (atomic_dec_and_test(&tunnel->ref_count))
+		pppol2tp_tunnel_free(tunnel);
+}
+
+/* Session hash list.
+ * The session_id SHOULD be random according to RFC2661, but several
+ * L2TP implementations (Cisco and Microsoft) use incrementing
+ * session_ids.  So we do a real hash on the session_id, rather than a
+ * simple bitmask.
+ */
+static inline struct hlist_head *
+pppol2tp_session_id_hash(struct pppol2tp_tunnel *tunnel, u16 session_id)
+{
+	unsigned long hash_val = (unsigned long) session_id;
+	return &tunnel->session_hlist[hash_long(hash_val, PPPOL2TP_HASH_BITS)];
+}
+
+/* Lookup a session by id
+ */
+static struct pppol2tp_session *
+pppol2tp_session_find(struct pppol2tp_tunnel *tunnel, u16 session_id)
+{
+	struct hlist_head *session_list =
+		pppol2tp_session_id_hash(tunnel, session_id);
+	struct pppol2tp_session *session;
+	struct hlist_node *walk;
+
+	read_lock_bh(&tunnel->hlist_lock);
+	hlist_for_each_entry(session, walk, session_list, hlist) {
+		if (session->tunnel_addr.s_session == session_id) {
+			read_unlock_bh(&tunnel->hlist_lock);
+			return session;
+		}
+	}
+	read_unlock_bh(&tunnel->hlist_lock);
+
+	return NULL;
+}
+
+/* Lookup a tunnel by id
+ */
+static struct pppol2tp_tunnel *pppol2tp_tunnel_find(struct net *net, u16 tunnel_id)
+{
+	struct pppol2tp_tunnel *tunnel;
+	struct pppol2tp_net *pn = pppol2tp_pernet(net);
+
+	read_lock_bh(&pn->pppol2tp_tunnel_list_lock);
+	list_for_each_entry(tunnel, &pn->pppol2tp_tunnel_list, list) {
+		if (tunnel->stats.tunnel_id == tunnel_id) {
+			read_unlock_bh(&pn->pppol2tp_tunnel_list_lock);
+			return tunnel;
+		}
+	}
+	read_unlock_bh(&pn->pppol2tp_tunnel_list_lock);
+
+	return NULL;
+}
+
+/*****************************************************************************
+ * Receive data handling
+ *****************************************************************************/
+
+/* Queue a skb in order. We come here only if the skb has an L2TP sequence
+ * number.
+ */
+static void pppol2tp_recv_queue_skb(struct pppol2tp_session *session, struct sk_buff *skb)
+{
+	struct sk_buff *skbp;
+	struct sk_buff *tmp;
+	u16 ns = PPPOL2TP_SKB_CB(skb)->ns;
+
+	spin_lock_bh(&session->reorder_q.lock);
+	skb_queue_walk_safe(&session->reorder_q, skbp, tmp) {
+		if (PPPOL2TP_SKB_CB(skbp)->ns > ns) {
+			__skb_queue_before(&session->reorder_q, skbp, skb);
+			PRINTK(session->debug, PPPOL2TP_MSG_SEQ, KERN_DEBUG,
+			       "%s: pkt %hu, inserted before %hu, reorder_q len=%d\n",
+			       session->name, ns, PPPOL2TP_SKB_CB(skbp)->ns,
+			       skb_queue_len(&session->reorder_q));
+			session->stats.rx_oos_packets++;
+			goto out;
+		}
+	}
+
+	__skb_queue_tail(&session->reorder_q, skb);
+
+out:
+	spin_unlock_bh(&session->reorder_q.lock);
+}
+
+/* Dequeue a single skb.
+ */
+static void pppol2tp_recv_dequeue_skb(struct pppol2tp_session *session, struct sk_buff *skb)
+{
+	struct pppol2tp_tunnel *tunnel = session->tunnel;
+	int length = PPPOL2TP_SKB_CB(skb)->length;
+	struct sock *session_sock = NULL;
+
+	/* We're about to requeue the skb, so return resources
+	 * to its current owner (a socket receive buffer).
+	 */
+	skb_orphan(skb);
+
+	tunnel->stats.rx_packets++;
+	tunnel->stats.rx_bytes += length;
+	session->stats.rx_packets++;
+	session->stats.rx_bytes += length;
+
+	if (PPPOL2TP_SKB_CB(skb)->has_seq) {
+		/* Bump our Nr */
+		session->nr++;
+		PRINTK(session->debug, PPPOL2TP_MSG_SEQ, KERN_DEBUG,
+		       "%s: updated nr to %hu\n", session->name, session->nr);
+	}
+
+	/* If the socket is bound, send it in to PPP's input queue. Otherwise
+	 * queue it on the session socket.
+	 */
+	session_sock = session->sock;
+	if (session_sock->sk_state & PPPOX_BOUND) {
+		struct pppox_sock *po;
+		PRINTK(session->debug, PPPOL2TP_MSG_DATA, KERN_DEBUG,
+		       "%s: recv %d byte data frame, passing to ppp\n",
+		       session->name, length);
+
+		/* We need to forget all info related to the L2TP packet
+		 * gathered in the skb as we are going to reuse the same
+		 * skb for the inner packet.
+		 * Namely we need to:
+		 * - reset xfrm (IPSec) information as it applies to
+		 *   the outer L2TP packet and not to the inner one
+		 * - release the dst to force a route lookup on the inner
+		 *   IP packet since skb->dst currently points to the dst
+		 *   of the UDP tunnel
+		 * - reset netfilter information as it doesn't apply
+		 *   to the inner packet either
+		 */
+		secpath_reset(skb);
+		skb_dst_drop(skb);
+		nf_reset(skb);
+
+		po = pppox_sk(session_sock);
+		ppp_input(&po->chan, skb);
+	} else {
+		PRINTK(session->debug, PPPOL2TP_MSG_DATA, KERN_INFO,
+		       "%s: socket not bound\n", session->name);
+
+		/* Not bound. Nothing we can do, so discard. */
+		session->stats.rx_errors++;
+		kfree_skb(skb);
+	}
+
+	sock_put(session->sock);
+}
+
+/* Dequeue skbs from the session's reorder_q, subject to packet order.
+ * Skbs that have been in the queue for too long are simply discarded.
+ */
+static void pppol2tp_recv_dequeue(struct pppol2tp_session *session)
+{
+	struct sk_buff *skb;
+	struct sk_buff *tmp;
+
+	/* If the pkt at the head of the queue has the nr that we
+	 * expect to send up next, dequeue it and any other
+	 * in-sequence packets behind it.
+	 */
+	spin_lock_bh(&session->reorder_q.lock);
+	skb_queue_walk_safe(&session->reorder_q, skb, tmp) {
+		if (time_after(jiffies, PPPOL2TP_SKB_CB(skb)->expires)) {
+			session->stats.rx_seq_discards++;
+			session->stats.rx_errors++;
+			PRINTK(session->debug, PPPOL2TP_MSG_SEQ, KERN_DEBUG,
+			       "%s: oos pkt %hu len %d discarded (too old), "
+			       "waiting for %hu, reorder_q_len=%d\n",
+			       session->name, PPPOL2TP_SKB_CB(skb)->ns,
+			       PPPOL2TP_SKB_CB(skb)->length, session->nr,
+			       skb_queue_len(&session->reorder_q));
+			__skb_unlink(skb, &session->reorder_q);
+			kfree_skb(skb);
+			sock_put(session->sock);
+			continue;
+		}
+
+		if (PPPOL2TP_SKB_CB(skb)->has_seq) {
+			if (PPPOL2TP_SKB_CB(skb)->ns != session->nr) {
+				PRINTK(session->debug, PPPOL2TP_MSG_SEQ, KERN_DEBUG,
+				       "%s: holding oos pkt %hu len %d, "
+				       "waiting for %hu, reorder_q_len=%d\n",
+				       session->name, PPPOL2TP_SKB_CB(skb)->ns,
+				       PPPOL2TP_SKB_CB(skb)->length, session->nr,
+				       skb_queue_len(&session->reorder_q));
+				goto out;
+			}
+		}
+		__skb_unlink(skb, &session->reorder_q);
+
+		/* Process the skb. We release the queue lock while we
+		 * do so to let other contexts process the queue.
+		 */
+		spin_unlock_bh(&session->reorder_q.lock);
+		pppol2tp_recv_dequeue_skb(session, skb);
+		spin_lock_bh(&session->reorder_q.lock);
+	}
+
+out:
+	spin_unlock_bh(&session->reorder_q.lock);
+}
+
+static inline int pppol2tp_verify_udp_checksum(struct sock *sk,
+					       struct sk_buff *skb)
+{
+	struct udphdr *uh = udp_hdr(skb);
+	u16 ulen = ntohs(uh->len);
+	struct inet_sock *inet;
+	__wsum psum;
+
+	if (sk->sk_no_check || skb_csum_unnecessary(skb) || !uh->check)
+		return 0;
+
+	inet = inet_sk(sk);
+	psum = csum_tcpudp_nofold(inet->inet_saddr, inet->inet_daddr, ulen,
+				  IPPROTO_UDP, 0);
+
+	if ((skb->ip_summed == CHECKSUM_COMPLETE) &&
+	    !csum_fold(csum_add(psum, skb->csum)))
+		return 0;
+
+	skb->csum = psum;
+
+	return __skb_checksum_complete(skb);
+}
+
+/* Internal receive frame. Do the real work of receiving an L2TP data frame
+ * here. The skb is not on a list when we get here.
+ * Returns 0 if the packet was a data packet and was successfully passed on.
+ * Returns 1 if the packet was not a good data packet and could not be
+ * forwarded.  All such packets are passed up to userspace to deal with.
+ */
+static int pppol2tp_recv_core(struct sock *sock, struct sk_buff *skb)
+{
+	struct pppol2tp_session *session = NULL;
+	struct pppol2tp_tunnel *tunnel;
+	unsigned char *ptr, *optr;
+	u16 hdrflags;
+	u16 tunnel_id, session_id;
+	int length;
+	int offset;
+
+	tunnel = pppol2tp_sock_to_tunnel(sock);
+	if (tunnel == NULL)
+		goto no_tunnel;
+
+	if (tunnel->sock && pppol2tp_verify_udp_checksum(tunnel->sock, skb))
+		goto discard_bad_csum;
+
+	/* UDP always verifies the packet length. */
+	__skb_pull(skb, sizeof(struct udphdr));
+
+	/* Short packet? */
+	if (!pskb_may_pull(skb, 12)) {
+		PRINTK(tunnel->debug, PPPOL2TP_MSG_DATA, KERN_INFO,
+		       "%s: recv short packet (len=%d)\n", tunnel->name, skb->len);
+		goto error;
+	}
+
+	/* Point to L2TP header */
+	optr = ptr = skb->data;
+
+	/* Get L2TP header flags */
+	hdrflags = ntohs(*(__be16*)ptr);
+
+	/* Trace packet contents, if enabled */
+	if (tunnel->debug & PPPOL2TP_MSG_DATA) {
+		length = min(16u, skb->len);
+		if (!pskb_may_pull(skb, length))
+			goto error;
+
+		printk(KERN_DEBUG "%s: recv: ", tunnel->name);
+
+		offset = 0;
+		do {
+			printk(" %02X", ptr[offset]);
+		} while (++offset < length);
+
+		printk("\n");
+	}
+
+	/* Get length of L2TP packet */
+	length = skb->len;
+
+	/* If type is control packet, it is handled by userspace. */
+	if (hdrflags & L2TP_HDRFLAG_T) {
+		PRINTK(tunnel->debug, PPPOL2TP_MSG_DATA, KERN_DEBUG,
+		       "%s: recv control packet, len=%d\n", tunnel->name, length);
+		goto error;
+	}
+
+	/* Skip flags */
+	ptr += 2;
+
+	/* If length is present, skip it */
+	if (hdrflags & L2TP_HDRFLAG_L)
+		ptr += 2;
+
+	/* Extract tunnel and session ID */
+	tunnel_id = ntohs(*(__be16 *) ptr);
+	ptr += 2;
+	session_id = ntohs(*(__be16 *) ptr);
+	ptr += 2;
+
+	/* Find the session context */
+	session = pppol2tp_session_find(tunnel, session_id);
+	if (!session) {
+		/* Not found? Pass to userspace to deal with */
+		PRINTK(tunnel->debug, PPPOL2TP_MSG_DATA, KERN_INFO,
+		       "%s: no socket found (%hu/%hu). Passing up.\n",
+		       tunnel->name, tunnel_id, session_id);
+		goto error;
+	}
+	sock_hold(session->sock);
+
+	/* The ref count on the socket was increased by the above call since
+	 * we now hold a pointer to the session. Take care to do sock_put()
+	 * when exiting this function from now on...
+	 */
+
+	/* Handle the optional sequence numbers.  If we are the LAC,
+	 * enable/disable sequence numbers under the control of the LNS.  If
+	 * no sequence numbers present but we were expecting them, discard
+	 * frame.
+	 */
+	if (hdrflags & L2TP_HDRFLAG_S) {
+		u16 ns, nr;
+		ns = ntohs(*(__be16 *) ptr);
+		ptr += 2;
+		nr = ntohs(*(__be16 *) ptr);
+		ptr += 2;
+
+		/* Received a packet with sequence numbers. If we're the LNS,
+		 * check if we sre sending sequence numbers and if not,
+		 * configure it so.
+		 */
+		if ((!session->lns_mode) && (!session->send_seq)) {
+			PRINTK(session->debug, PPPOL2TP_MSG_SEQ, KERN_INFO,
+			       "%s: requested to enable seq numbers by LNS\n",
+			       session->name);
+			session->send_seq = -1;
+		}
+
+		/* Store L2TP info in the skb */
+		PPPOL2TP_SKB_CB(skb)->ns = ns;
+		PPPOL2TP_SKB_CB(skb)->nr = nr;
+		PPPOL2TP_SKB_CB(skb)->has_seq = 1;
+
+		PRINTK(session->debug, PPPOL2TP_MSG_SEQ, KERN_DEBUG,
+		       "%s: recv data ns=%hu, nr=%hu, session nr=%hu\n",
+		       session->name, ns, nr, session->nr);
+	} else {
+		/* No sequence numbers.
+		 * If user has configured mandatory sequence numbers, discard.
+		 */
+		if (session->recv_seq) {
+			PRINTK(session->debug, PPPOL2TP_MSG_SEQ, KERN_WARNING,
+			       "%s: recv data has no seq numbers when required. "
+			       "Discarding\n", session->name);
+			session->stats.rx_seq_discards++;
+			goto discard;
+		}
+
+		/* If we're the LAC and we're sending sequence numbers, the
+		 * LNS has requested that we no longer send sequence numbers.
+		 * If we're the LNS and we're sending sequence numbers, the
+		 * LAC is broken. Discard the frame.
+		 */
+		if ((!session->lns_mode) && (session->send_seq)) {
+			PRINTK(session->debug, PPPOL2TP_MSG_SEQ, KERN_INFO,
+			       "%s: requested to disable seq numbers by LNS\n",
+			       session->name);
+			session->send_seq = 0;
+		} else if (session->send_seq) {
+			PRINTK(session->debug, PPPOL2TP_MSG_SEQ, KERN_WARNING,
+			       "%s: recv data has no seq numbers when required. "
+			       "Discarding\n", session->name);
+			session->stats.rx_seq_discards++;
+			goto discard;
+		}
+
+		/* Store L2TP info in the skb */
+		PPPOL2TP_SKB_CB(skb)->has_seq = 0;
+	}
+
+	/* If offset bit set, skip it. */
+	if (hdrflags & L2TP_HDRFLAG_O) {
+		offset = ntohs(*(__be16 *)ptr);
+		ptr += 2 + offset;
+	}
+
+	offset = ptr - optr;
+	if (!pskb_may_pull(skb, offset))
+		goto discard;
+
+	__skb_pull(skb, offset);
+
+	/* Skip PPP header, if present.	 In testing, Microsoft L2TP clients
+	 * don't send the PPP header (PPP header compression enabled), but
+	 * other clients can include the header. So we cope with both cases
+	 * here. The PPP header is always FF03 when using L2TP.
+	 *
+	 * Note that skb->data[] isn't dereferenced from a u16 ptr here since
+	 * the field may be unaligned.
+	 */
+	if (!pskb_may_pull(skb, 2))
+		goto discard;
+
+	if ((skb->data[0] == 0xff) && (skb->data[1] == 0x03))
+		skb_pull(skb, 2);
+
+	/* Prepare skb for adding to the session's reorder_q.  Hold
+	 * packets for max reorder_timeout or 1 second if not
+	 * reordering.
+	 */
+	PPPOL2TP_SKB_CB(skb)->length = length;
+	PPPOL2TP_SKB_CB(skb)->expires = jiffies +
+		(session->reorder_timeout ? session->reorder_timeout : HZ);
+
+	/* Add packet to the session's receive queue. Reordering is done here, if
+	 * enabled. Saved L2TP protocol info is stored in skb->sb[].
+	 */
+	if (PPPOL2TP_SKB_CB(skb)->has_seq) {
+		if (session->reorder_timeout != 0) {
+			/* Packet reordering enabled. Add skb to session's
+			 * reorder queue, in order of ns.
+			 */
+			pppol2tp_recv_queue_skb(session, skb);
+		} else {
+			/* Packet reordering disabled. Discard out-of-sequence
+			 * packets
+			 */
+			if (PPPOL2TP_SKB_CB(skb)->ns != session->nr) {
+				session->stats.rx_seq_discards++;
+				PRINTK(session->debug, PPPOL2TP_MSG_SEQ, KERN_DEBUG,
+				       "%s: oos pkt %hu len %d discarded, "
+				       "waiting for %hu, reorder_q_len=%d\n",
+				       session->name, PPPOL2TP_SKB_CB(skb)->ns,
+				       PPPOL2TP_SKB_CB(skb)->length, session->nr,
+				       skb_queue_len(&session->reorder_q));
+				goto discard;
+			}
+			skb_queue_tail(&session->reorder_q, skb);
+		}
+	} else {
+		/* No sequence numbers. Add the skb to the tail of the
+		 * reorder queue. This ensures that it will be
+		 * delivered after all previous sequenced skbs.
+		 */
+		skb_queue_tail(&session->reorder_q, skb);
+	}
+
+	/* Try to dequeue as many skbs from reorder_q as we can. */
+	pppol2tp_recv_dequeue(session);
+	sock_put(sock);
+
+	return 0;
+
+discard:
+	session->stats.rx_errors++;
+	kfree_skb(skb);
+	sock_put(session->sock);
+	sock_put(sock);
+
+	return 0;
+
+discard_bad_csum:
+	LIMIT_NETDEBUG("%s: UDP: bad checksum\n", tunnel->name);
+	UDP_INC_STATS_USER(&init_net, UDP_MIB_INERRORS, 0);
+	tunnel->stats.rx_errors++;
+	kfree_skb(skb);
+	sock_put(sock);
+
+	return 0;
+
+error:
+	/* Put UDP header back */
+	__skb_push(skb, sizeof(struct udphdr));
+	sock_put(sock);
+
+no_tunnel:
+	return 1;
+}
+
+/* UDP encapsulation receive handler. See net/ipv4/udp.c.
+ * Return codes:
+ * 0 : success.
+ * <0: error
+ * >0: skb should be passed up to userspace as UDP.
+ */
+static int pppol2tp_udp_encap_recv(struct sock *sk, struct sk_buff *skb)
+{
+	struct pppol2tp_tunnel *tunnel;
+
+	tunnel = pppol2tp_sock_to_tunnel(sk);
+	if (tunnel == NULL)
+		goto pass_up;
+
+	PRINTK(tunnel->debug, PPPOL2TP_MSG_DATA, KERN_DEBUG,
+	       "%s: received %d bytes\n", tunnel->name, skb->len);
+
+	if (pppol2tp_recv_core(sk, skb))
+		goto pass_up_put;
+
+	sock_put(sk);
+	return 0;
+
+pass_up_put:
+	sock_put(sk);
+pass_up:
+	return 1;
+}
+
+/* Receive message. This is the recvmsg for the PPPoL2TP socket.
+ */
+static int pppol2tp_recvmsg(struct kiocb *iocb, struct socket *sock,
+			    struct msghdr *msg, size_t len,
+			    int flags)
+{
+	int err;
+	struct sk_buff *skb;
+	struct sock *sk = sock->sk;
+
+	err = -EIO;
+	if (sk->sk_state & PPPOX_BOUND)
+		goto end;
+
+	msg->msg_namelen = 0;
+
+	err = 0;
+	skb = skb_recv_datagram(sk, flags & ~MSG_DONTWAIT,
+				flags & MSG_DONTWAIT, &err);
+	if (!skb)
+		goto end;
+
+	if (len > skb->len)
+		len = skb->len;
+	else if (len < skb->len)
+		msg->msg_flags |= MSG_TRUNC;
+
+	err = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, len);
+	if (likely(err == 0))
+		err = len;
+
+	kfree_skb(skb);
+end:
+	return err;
+}
+
+/************************************************************************
+ * Transmit handling
+ ***********************************************************************/
+
+/* Tell how big L2TP headers are for a particular session. This
+ * depends on whether sequence numbers are being used.
+ */
+static inline int pppol2tp_l2tp_header_len(struct pppol2tp_session *session)
+{
+	if (session->send_seq)
+		return PPPOL2TP_L2TP_HDR_SIZE_SEQ;
+
+	return PPPOL2TP_L2TP_HDR_SIZE_NOSEQ;
+}
+
+/* Build an L2TP header for the session into the buffer provided.
+ */
+static void pppol2tp_build_l2tp_header(struct pppol2tp_session *session,
+				       void *buf)
+{
+	__be16 *bufp = buf;
+	u16 flags = L2TP_HDR_VER;
+
+	if (session->send_seq)
+		flags |= L2TP_HDRFLAG_S;
+
+	/* Setup L2TP header.
+	 * FIXME: Can this ever be unaligned? Is direct dereferencing of
+	 * 16-bit header fields safe here for all architectures?
+	 */
+	*bufp++ = htons(flags);
+	*bufp++ = htons(session->tunnel_addr.d_tunnel);
+	*bufp++ = htons(session->tunnel_addr.d_session);
+	if (session->send_seq) {
+		*bufp++ = htons(session->ns);
+		*bufp++ = 0;
+		session->ns++;
+		PRINTK(session->debug, PPPOL2TP_MSG_SEQ, KERN_DEBUG,
+		       "%s: updated ns to %hu\n", session->name, session->ns);
+	}
+}
+
+/* This is the sendmsg for the PPPoL2TP pppol2tp_session socket.  We come here
+ * when a user application does a sendmsg() on the session socket. L2TP and
+ * PPP headers must be inserted into the user's data.
+ */
+static int pppol2tp_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *m,
+			    size_t total_len)
+{
+	static const unsigned char ppph[2] = { 0xff, 0x03 };
+	struct sock *sk = sock->sk;
+	struct inet_sock *inet;
+	__wsum csum;
+	struct sk_buff *skb;
+	int error;
+	int hdr_len;
+	struct pppol2tp_session *session;
+	struct pppol2tp_tunnel *tunnel;
+	struct udphdr *uh;
+	unsigned int len;
+	struct sock *sk_tun;
+	u16 udp_len;
+
+	error = -ENOTCONN;
+	if (sock_flag(sk, SOCK_DEAD) || !(sk->sk_state & PPPOX_CONNECTED))
+		goto error;
+
+	/* Get session and tunnel contexts */
+	error = -EBADF;
+	session = pppol2tp_sock_to_session(sk);
+	if (session == NULL)
+		goto error;
+
+	sk_tun = session->tunnel_sock;
+	tunnel = pppol2tp_sock_to_tunnel(sk_tun);
+	if (tunnel == NULL)
+		goto error_put_sess;
+
+	/* What header length is configured for this session? */
+	hdr_len = pppol2tp_l2tp_header_len(session);
+
+	/* Allocate a socket buffer */
+	error = -ENOMEM;
+	skb = sock_wmalloc(sk, NET_SKB_PAD + sizeof(struct iphdr) +
+			   sizeof(struct udphdr) + hdr_len +
+			   sizeof(ppph) + total_len,
+			   0, GFP_KERNEL);
+	if (!skb)
+		goto error_put_sess_tun;
+
+	/* Reserve space for headers. */
+	skb_reserve(skb, NET_SKB_PAD);
+	skb_reset_network_header(skb);
+	skb_reserve(skb, sizeof(struct iphdr));
+	skb_reset_transport_header(skb);
+
+	/* Build UDP header */
+	inet = inet_sk(sk_tun);
+	udp_len = hdr_len + sizeof(ppph) + total_len;
+	uh = (struct udphdr *) skb->data;
+	uh->source = inet->inet_sport;
+	uh->dest = inet->inet_dport;
+	uh->len = htons(udp_len);
+	uh->check = 0;
+	skb_put(skb, sizeof(struct udphdr));
+
+	/* Build L2TP header */
+	pppol2tp_build_l2tp_header(session, skb->data);
+	skb_put(skb, hdr_len);
+
+	/* Add PPP header */
+	skb->data[0] = ppph[0];
+	skb->data[1] = ppph[1];
+	skb_put(skb, 2);
+
+	/* Copy user data into skb */
+	error = memcpy_fromiovec(skb->data, m->msg_iov, total_len);
+	if (error < 0) {
+		kfree_skb(skb);
+		goto error_put_sess_tun;
+	}
+	skb_put(skb, total_len);
+
+	/* Calculate UDP checksum if configured to do so */
+	if (sk_tun->sk_no_check == UDP_CSUM_NOXMIT)
+		skb->ip_summed = CHECKSUM_NONE;
+	else if (!(skb_dst(skb)->dev->features & NETIF_F_V4_CSUM)) {
+		skb->ip_summed = CHECKSUM_COMPLETE;
+		csum = skb_checksum(skb, 0, udp_len, 0);
+		uh->check = csum_tcpudp_magic(inet->inet_saddr,
+					      inet->inet_daddr,
+					      udp_len, IPPROTO_UDP, csum);
+		if (uh->check == 0)
+			uh->check = CSUM_MANGLED_0;
+	} else {
+		skb->ip_summed = CHECKSUM_PARTIAL;
+		skb->csum_start = skb_transport_header(skb) - skb->head;
+		skb->csum_offset = offsetof(struct udphdr, check);
+		uh->check = ~csum_tcpudp_magic(inet->inet_saddr,
+					       inet->inet_daddr,
+					       udp_len, IPPROTO_UDP, 0);
+	}
+
+	/* Debug */
+	if (session->send_seq)
+		PRINTK(session->debug, PPPOL2TP_MSG_DATA, KERN_DEBUG,
+		       "%s: send %Zd bytes, ns=%hu\n", session->name,
+		       total_len, session->ns - 1);
+	else
+		PRINTK(session->debug, PPPOL2TP_MSG_DATA, KERN_DEBUG,
+		       "%s: send %Zd bytes\n", session->name, total_len);
+
+	if (session->debug & PPPOL2TP_MSG_DATA) {
+		int i;
+		unsigned char *datap = skb->data;
+
+		printk(KERN_DEBUG "%s: xmit:", session->name);
+		for (i = 0; i < total_len; i++) {
+			printk(" %02X", *datap++);
+			if (i == 15) {
+				printk(" ...");
+				break;
+			}
+		}
+		printk("\n");
+	}
+
+	/* Queue the packet to IP for output */
+	len = skb->len;
+	error = ip_queue_xmit(skb, 1);
+
+	/* Update stats */
+	if (error >= 0) {
+		tunnel->stats.tx_packets++;
+		tunnel->stats.tx_bytes += len;
+		session->stats.tx_packets++;
+		session->stats.tx_bytes += len;
+	} else {
+		tunnel->stats.tx_errors++;
+		session->stats.tx_errors++;
+	}
+
+	return error;
+
+error_put_sess_tun:
+	sock_put(session->tunnel_sock);
+error_put_sess:
+	sock_put(sk);
+error:
+	return error;
+}
+
+/* Automatically called when the skb is freed.
+ */
+static void pppol2tp_sock_wfree(struct sk_buff *skb)
+{
+	sock_put(skb->sk);
+}
+
+/* For data skbs that we transmit, we associate with the tunnel socket
+ * but don't do accounting.
+ */
+static inline void pppol2tp_skb_set_owner_w(struct sk_buff *skb, struct sock *sk)
+{
+	sock_hold(sk);
+	skb->sk = sk;
+	skb->destructor = pppol2tp_sock_wfree;
+}
+
+/* Transmit function called by generic PPP driver.  Sends PPP frame
+ * over PPPoL2TP socket.
+ *
+ * This is almost the same as pppol2tp_sendmsg(), but rather than
+ * being called with a msghdr from userspace, it is called with a skb
+ * from the kernel.
+ *
+ * The supplied skb from ppp doesn't have enough headroom for the
+ * insertion of L2TP, UDP and IP headers so we need to allocate more
+ * headroom in the skb. This will create a cloned skb. But we must be
+ * careful in the error case because the caller will expect to free
+ * the skb it supplied, not our cloned skb. So we take care to always
+ * leave the original skb unfreed if we return an error.
+ */
+static int pppol2tp_xmit(struct ppp_channel *chan, struct sk_buff *skb)
+{
+	static const u8 ppph[2] = { 0xff, 0x03 };
+	struct sock *sk = (struct sock *) chan->private;
+	struct sock *sk_tun;
+	int hdr_len;
+	u16 udp_len;
+	struct pppol2tp_session *session;
+	struct pppol2tp_tunnel *tunnel;
+	int rc;
+	int headroom;
+	int data_len = skb->len;
+	struct inet_sock *inet;
+	__wsum csum;
+	struct udphdr *uh;
+	unsigned int len;
+	int old_headroom;
+	int new_headroom;
+
+	if (sock_flag(sk, SOCK_DEAD) || !(sk->sk_state & PPPOX_CONNECTED))
+		goto abort;
+
+	/* Get session and tunnel contexts from the socket */
+	session = pppol2tp_sock_to_session(sk);
+	if (session == NULL)
+		goto abort;
+
+	sk_tun = session->tunnel_sock;
+	if (sk_tun == NULL)
+		goto abort_put_sess;
+	tunnel = pppol2tp_sock_to_tunnel(sk_tun);
+	if (tunnel == NULL)
+		goto abort_put_sess;
+
+	/* What header length is configured for this session? */
+	hdr_len = pppol2tp_l2tp_header_len(session);
+
+	/* Check that there's enough headroom in the skb to insert IP,
+	 * UDP and L2TP and PPP headers. If not enough, expand it to
+	 * make room. Adjust truesize.
+	 */
+	headroom = NET_SKB_PAD + sizeof(struct iphdr) +
+		sizeof(struct udphdr) + hdr_len + sizeof(ppph);
+	old_headroom = skb_headroom(skb);
+	if (skb_cow_head(skb, headroom))
+		goto abort_put_sess_tun;
+
+	new_headroom = skb_headroom(skb);
+	skb_orphan(skb);
+	skb->truesize += new_headroom - old_headroom;
+
+	/* Setup PPP header */
+	__skb_push(skb, sizeof(ppph));
+	skb->data[0] = ppph[0];
+	skb->data[1] = ppph[1];
+
+	/* Setup L2TP header */
+	pppol2tp_build_l2tp_header(session, __skb_push(skb, hdr_len));
+
+	udp_len = sizeof(struct udphdr) + hdr_len + sizeof(ppph) + data_len;
+
+	/* Setup UDP header */
+	inet = inet_sk(sk_tun);
+	__skb_push(skb, sizeof(*uh));
+	skb_reset_transport_header(skb);
+	uh = udp_hdr(skb);
+	uh->source = inet->inet_sport;
+	uh->dest = inet->inet_dport;
+	uh->len = htons(udp_len);
+	uh->check = 0;
+
+	/* Debug */
+	if (session->send_seq)
+		PRINTK(session->debug, PPPOL2TP_MSG_DATA, KERN_DEBUG,
+		       "%s: send %d bytes, ns=%hu\n", session->name,
+		       data_len, session->ns - 1);
+	else
+		PRINTK(session->debug, PPPOL2TP_MSG_DATA, KERN_DEBUG,
+		       "%s: send %d bytes\n", session->name, data_len);
+
+	if (session->debug & PPPOL2TP_MSG_DATA) {
+		int i;
+		unsigned char *datap = skb->data;
+
+		printk(KERN_DEBUG "%s: xmit:", session->name);
+		for (i = 0; i < data_len; i++) {
+			printk(" %02X", *datap++);
+			if (i == 31) {
+				printk(" ...");
+				break;
+			}
+		}
+		printk("\n");
+	}
+
+	memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
+	IPCB(skb)->flags &= ~(IPSKB_XFRM_TUNNEL_SIZE | IPSKB_XFRM_TRANSFORMED |
+			      IPSKB_REROUTED);
+	nf_reset(skb);
+
+	/* Get routing info from the tunnel socket */
+	skb_dst_drop(skb);
+	skb_dst_set(skb, dst_clone(__sk_dst_get(sk_tun)));
+	pppol2tp_skb_set_owner_w(skb, sk_tun);
+
+	/* Calculate UDP checksum if configured to do so */
+	if (sk_tun->sk_no_check == UDP_CSUM_NOXMIT)
+		skb->ip_summed = CHECKSUM_NONE;
+	else if ((skb_dst(skb) && skb_dst(skb)->dev) &&
+		 (!(skb_dst(skb)->dev->features & NETIF_F_V4_CSUM))) {
+		skb->ip_summed = CHECKSUM_COMPLETE;
+		csum = skb_checksum(skb, 0, udp_len, 0);
+		uh->check = csum_tcpudp_magic(inet->inet_saddr,
+					      inet->inet_daddr,
+					      udp_len, IPPROTO_UDP, csum);
+		if (uh->check == 0)
+			uh->check = CSUM_MANGLED_0;
+	} else {
+		skb->ip_summed = CHECKSUM_PARTIAL;
+		skb->csum_start = skb_transport_header(skb) - skb->head;
+		skb->csum_offset = offsetof(struct udphdr, check);
+		uh->check = ~csum_tcpudp_magic(inet->inet_saddr,
+					       inet->inet_daddr,
+					       udp_len, IPPROTO_UDP, 0);
+	}
+
+	/* Queue the packet to IP for output */
+	len = skb->len;
+	rc = ip_queue_xmit(skb, 1);
+
+	/* Update stats */
+	if (rc >= 0) {
+		tunnel->stats.tx_packets++;
+		tunnel->stats.tx_bytes += len;
+		session->stats.tx_packets++;
+		session->stats.tx_bytes += len;
+	} else {
+		tunnel->stats.tx_errors++;
+		session->stats.tx_errors++;
+	}
+
+	sock_put(sk_tun);
+	sock_put(sk);
+	return 1;
+
+abort_put_sess_tun:
+	sock_put(sk_tun);
+abort_put_sess:
+	sock_put(sk);
+abort:
+	/* Free the original skb */
+	kfree_skb(skb);
+	return 1;
+}
+
+/*****************************************************************************
+ * Session (and tunnel control) socket create/destroy.
+ *****************************************************************************/
+
+/* When the tunnel UDP socket is closed, all the attached sockets need to go
+ * too.
+ */
+static void pppol2tp_tunnel_closeall(struct pppol2tp_tunnel *tunnel)
+{
+	int hash;
+	struct hlist_node *walk;
+	struct hlist_node *tmp;
+	struct pppol2tp_session *session;
+	struct sock *sk;
+
+	BUG_ON(tunnel == NULL);
+
+	PRINTK(tunnel->debug, PPPOL2TP_MSG_CONTROL, KERN_INFO,
+	       "%s: closing all sessions...\n", tunnel->name);
+
+	write_lock_bh(&tunnel->hlist_lock);
+	for (hash = 0; hash < PPPOL2TP_HASH_SIZE; hash++) {
+again:
+		hlist_for_each_safe(walk, tmp, &tunnel->session_hlist[hash]) {
+			struct sk_buff *skb;
+
+			session = hlist_entry(walk, struct pppol2tp_session, hlist);
+
+			sk = session->sock;
+
+			PRINTK(session->debug, PPPOL2TP_MSG_CONTROL, KERN_INFO,
+			       "%s: closing session\n", session->name);
+
+			hlist_del_init(&session->hlist);
+
+			/* Since we should hold the sock lock while
+			 * doing any unbinding, we need to release the
+			 * lock we're holding before taking that lock.
+			 * Hold a reference to the sock so it doesn't
+			 * disappear as we're jumping between locks.
+			 */
+			sock_hold(sk);
+			write_unlock_bh(&tunnel->hlist_lock);
+			lock_sock(sk);
+
+			if (sk->sk_state & (PPPOX_CONNECTED | PPPOX_BOUND)) {
+				pppox_unbind_sock(sk);
+				sk->sk_state = PPPOX_DEAD;
+				sk->sk_state_change(sk);
+			}
+
+			/* Purge any queued data */
+			skb_queue_purge(&sk->sk_receive_queue);
+			skb_queue_purge(&sk->sk_write_queue);
+			while ((skb = skb_dequeue(&session->reorder_q))) {
+				kfree_skb(skb);
+				sock_put(sk);
+			}
+
+			release_sock(sk);
+			sock_put(sk);
+
+			/* Now restart from the beginning of this hash
+			 * chain.  We always remove a session from the
+			 * list so we are guaranteed to make forward
+			 * progress.
+			 */
+			write_lock_bh(&tunnel->hlist_lock);
+			goto again;
+		}
+	}
+	write_unlock_bh(&tunnel->hlist_lock);
+}
+
+/* Really kill the tunnel.
+ * Come here only when all sessions have been cleared from the tunnel.
+ */
+static void pppol2tp_tunnel_free(struct pppol2tp_tunnel *tunnel)
+{
+	struct pppol2tp_net *pn = pppol2tp_pernet(tunnel->pppol2tp_net);
+
+	/* Remove from socket list */
+	write_lock_bh(&pn->pppol2tp_tunnel_list_lock);
+	list_del_init(&tunnel->list);
+	write_unlock_bh(&pn->pppol2tp_tunnel_list_lock);
+
+	atomic_dec(&pppol2tp_tunnel_count);
+	kfree(tunnel);
+}
+
+/* Tunnel UDP socket destruct hook.
+ * The tunnel context is deleted only when all session sockets have been
+ * closed.
+ */
+static void pppol2tp_tunnel_destruct(struct sock *sk)
+{
+	struct pppol2tp_tunnel *tunnel;
+
+	tunnel = sk->sk_user_data;
+	if (tunnel == NULL)
+		goto end;
+
+	PRINTK(tunnel->debug, PPPOL2TP_MSG_CONTROL, KERN_INFO,
+	       "%s: closing...\n", tunnel->name);
+
+	/* Close all sessions */
+	pppol2tp_tunnel_closeall(tunnel);
+
+	/* No longer an encapsulation socket. See net/ipv4/udp.c */
+	(udp_sk(sk))->encap_type = 0;
+	(udp_sk(sk))->encap_rcv = NULL;
+
+	/* Remove hooks into tunnel socket */
+	tunnel->sock = NULL;
+	sk->sk_destruct = tunnel->old_sk_destruct;
+	sk->sk_user_data = NULL;
+
+	/* Call original (UDP) socket descructor */
+	if (sk->sk_destruct != NULL)
+		(*sk->sk_destruct)(sk);
+
+	pppol2tp_tunnel_dec_refcount(tunnel);
+
+end:
+	return;
+}
+
+/* Really kill the session socket. (Called from sock_put() if
+ * refcnt == 0.)
+ */
+static void pppol2tp_session_destruct(struct sock *sk)
+{
+	struct pppol2tp_session *session = NULL;
+
+	if (sk->sk_user_data != NULL) {
+		struct pppol2tp_tunnel *tunnel;
+
+		session = sk->sk_user_data;
+		if (session == NULL)
+			goto out;
+
+		BUG_ON(session->magic != L2TP_SESSION_MAGIC);
+
+		/* Don't use pppol2tp_sock_to_tunnel() here to
+		 * get the tunnel context because the tunnel
+		 * socket might have already been closed (its
+		 * sk->sk_user_data will be NULL) so use the
+		 * session's private tunnel ptr instead.
+		 */
+		tunnel = session->tunnel;
+		if (tunnel != NULL) {
+			BUG_ON(tunnel->magic != L2TP_TUNNEL_MAGIC);
+
+			/* If session_id is zero, this is a null
+			 * session context, which was created for a
+			 * socket that is being used only to manage
+			 * tunnels.
+			 */
+			if (session->tunnel_addr.s_session != 0) {
+				/* Delete the session socket from the
+				 * hash
+				 */
+				write_lock_bh(&tunnel->hlist_lock);
+				hlist_del_init(&session->hlist);
+				write_unlock_bh(&tunnel->hlist_lock);
+
+				atomic_dec(&pppol2tp_session_count);
+			}
+
+			/* This will delete the tunnel context if this
+			 * is the last session on the tunnel.
+			 */
+			session->tunnel = NULL;
+			session->tunnel_sock = NULL;
+			pppol2tp_tunnel_dec_refcount(tunnel);
+		}
+	}
+
+	kfree(session);
+out:
+	return;
+}
+
+/* Called when the PPPoX socket (session) is closed.
+ */
+static int pppol2tp_release(struct socket *sock)
+{
+	struct sock *sk = sock->sk;
+	struct pppol2tp_session *session;
+	int error;
+
+	if (!sk)
+		return 0;
+
+	error = -EBADF;
+	lock_sock(sk);
+	if (sock_flag(sk, SOCK_DEAD) != 0)
+		goto error;
+
+	pppox_unbind_sock(sk);
+
+	/* Signal the death of the socket. */
+	sk->sk_state = PPPOX_DEAD;
+	sock_orphan(sk);
+	sock->sk = NULL;
+
+	session = pppol2tp_sock_to_session(sk);
+
+	/* Purge any queued data */
+	skb_queue_purge(&sk->sk_receive_queue);
+	skb_queue_purge(&sk->sk_write_queue);
+	if (session != NULL) {
+		struct sk_buff *skb;
+		while ((skb = skb_dequeue(&session->reorder_q))) {
+			kfree_skb(skb);
+			sock_put(sk);
+		}
+		sock_put(sk);
+	}
+
+	release_sock(sk);
+
+	/* This will delete the session context via
+	 * pppol2tp_session_destruct() if the socket's refcnt drops to
+	 * zero.
+	 */
+	sock_put(sk);
+
+	return 0;
+
+error:
+	release_sock(sk);
+	return error;
+}
+
+/* Internal function to prepare a tunnel (UDP) socket to have PPPoX
+ * sockets attached to it.
+ */
+static struct sock *pppol2tp_prepare_tunnel_socket(struct net *net,
+					int fd, u16 tunnel_id, int *error)
+{
+	int err;
+	struct socket *sock = NULL;
+	struct sock *sk;
+	struct pppol2tp_tunnel *tunnel;
+	struct pppol2tp_net *pn;
+	struct sock *ret = NULL;
+
+	/* Get the tunnel UDP socket from the fd, which was opened by
+	 * the userspace L2TP daemon.
+	 */
+	err = -EBADF;
+	sock = sockfd_lookup(fd, &err);
+	if (!sock) {
+		PRINTK(-1, PPPOL2TP_MSG_CONTROL, KERN_ERR,
+		       "tunl %hu: sockfd_lookup(fd=%d) returned %d\n",
+		       tunnel_id, fd, err);
+		goto err;
+	}
+
+	sk = sock->sk;
+
+	/* Quick sanity checks */
+	err = -EPROTONOSUPPORT;
+	if (sk->sk_protocol != IPPROTO_UDP) {
+		PRINTK(-1, PPPOL2TP_MSG_CONTROL, KERN_ERR,
+		       "tunl %hu: fd %d wrong protocol, got %d, expected %d\n",
+		       tunnel_id, fd, sk->sk_protocol, IPPROTO_UDP);
+		goto err;
+	}
+	err = -EAFNOSUPPORT;
+	if (sock->ops->family != AF_INET) {
+		PRINTK(-1, PPPOL2TP_MSG_CONTROL, KERN_ERR,
+		       "tunl %hu: fd %d wrong family, got %d, expected %d\n",
+		       tunnel_id, fd, sock->ops->family, AF_INET);
+		goto err;
+	}
+
+	err = -ENOTCONN;
+
+	/* Check if this socket has already been prepped */
+	tunnel = (struct pppol2tp_tunnel *)sk->sk_user_data;
+	if (tunnel != NULL) {
+		/* User-data field already set */
+		err = -EBUSY;
+		BUG_ON(tunnel->magic != L2TP_TUNNEL_MAGIC);
+
+		/* This socket has already been prepped */
+		ret = tunnel->sock;
+		goto out;
+	}
+
+	/* This socket is available and needs prepping. Create a new tunnel
+	 * context and init it.
+	 */
+	sk->sk_user_data = tunnel = kzalloc(sizeof(struct pppol2tp_tunnel), GFP_KERNEL);
+	if (sk->sk_user_data == NULL) {
+		err = -ENOMEM;
+		goto err;
+	}
+
+	tunnel->magic = L2TP_TUNNEL_MAGIC;
+	sprintf(&tunnel->name[0], "tunl %hu", tunnel_id);
+
+	tunnel->stats.tunnel_id = tunnel_id;
+	tunnel->debug = PPPOL2TP_DEFAULT_DEBUG_FLAGS;
+
+	/* Hook on the tunnel socket destructor so that we can cleanup
+	 * if the tunnel socket goes away.
+	 */
+	tunnel->old_sk_destruct = sk->sk_destruct;
+	sk->sk_destruct = pppol2tp_tunnel_destruct;
+
+	tunnel->sock = sk;
+	sk->sk_allocation = GFP_ATOMIC;
+
+	/* Misc init */
+	rwlock_init(&tunnel->hlist_lock);
+
+	/* The net we belong to */
+	tunnel->pppol2tp_net = net;
+	pn = pppol2tp_pernet(net);
+
+	/* Add tunnel to our list */
+	INIT_LIST_HEAD(&tunnel->list);
+	write_lock_bh(&pn->pppol2tp_tunnel_list_lock);
+	list_add(&tunnel->list, &pn->pppol2tp_tunnel_list);
+	write_unlock_bh(&pn->pppol2tp_tunnel_list_lock);
+	atomic_inc(&pppol2tp_tunnel_count);
+
+	/* Bump the reference count. The tunnel context is deleted
+	 * only when this drops to zero.
+	 */
+	pppol2tp_tunnel_inc_refcount(tunnel);
+
+	/* Mark socket as an encapsulation socket. See net/ipv4/udp.c */
+	(udp_sk(sk))->encap_type = UDP_ENCAP_L2TPINUDP;
+	(udp_sk(sk))->encap_rcv = pppol2tp_udp_encap_recv;
+
+	ret = tunnel->sock;
+
+	*error = 0;
+out:
+	if (sock)
+		sockfd_put(sock);
+
+	return ret;
+
+err:
+	*error = err;
+	goto out;
+}
+
+static struct proto pppol2tp_sk_proto = {
+	.name	  = "PPPOL2TP",
+	.owner	  = THIS_MODULE,
+	.obj_size = sizeof(struct pppox_sock),
+};
+
+/* socket() handler. Initialize a new struct sock.
+ */
+static int pppol2tp_create(struct net *net, struct socket *sock)
+{
+	int error = -ENOMEM;
+	struct sock *sk;
+
+	sk = sk_alloc(net, PF_PPPOX, GFP_KERNEL, &pppol2tp_sk_proto);
+	if (!sk)
+		goto out;
+
+	sock_init_data(sock, sk);
+
+	sock->state  = SS_UNCONNECTED;
+	sock->ops    = &pppol2tp_ops;
+
+	sk->sk_backlog_rcv = pppol2tp_recv_core;
+	sk->sk_protocol	   = PX_PROTO_OL2TP;
+	sk->sk_family	   = PF_PPPOX;
+	sk->sk_state	   = PPPOX_NONE;
+	sk->sk_type	   = SOCK_STREAM;
+	sk->sk_destruct	   = pppol2tp_session_destruct;
+
+	error = 0;
+
+out:
+	return error;
+}
+
+/* connect() handler. Attach a PPPoX socket to a tunnel UDP socket
+ */
+static int pppol2tp_connect(struct socket *sock, struct sockaddr *uservaddr,
+			    int sockaddr_len, int flags)
+{
+	struct sock *sk = sock->sk;
+	struct sockaddr_pppol2tp *sp = (struct sockaddr_pppol2tp *) uservaddr;
+	struct pppox_sock *po = pppox_sk(sk);
+	struct sock *tunnel_sock = NULL;
+	struct pppol2tp_session *session = NULL;
+	struct pppol2tp_tunnel *tunnel;
+	struct dst_entry *dst;
+	int error = 0;
+
+	lock_sock(sk);
+
+	error = -EINVAL;
+	if (sp->sa_protocol != PX_PROTO_OL2TP)
+		goto end;
+
+	/* Check for already bound sockets */
+	error = -EBUSY;
+	if (sk->sk_state & PPPOX_CONNECTED)
+		goto end;
+
+	/* We don't supporting rebinding anyway */
+	error = -EALREADY;
+	if (sk->sk_user_data)
+		goto end; /* socket is already attached */
+
+	/* Don't bind if s_tunnel is 0 */
+	error = -EINVAL;
+	if (sp->pppol2tp.s_tunnel == 0)
+		goto end;
+
+	/* Special case: prepare tunnel socket if s_session and
+	 * d_session is 0. Otherwise look up tunnel using supplied
+	 * tunnel id.
+	 */
+	if ((sp->pppol2tp.s_session == 0) && (sp->pppol2tp.d_session == 0)) {
+		tunnel_sock = pppol2tp_prepare_tunnel_socket(sock_net(sk),
+							     sp->pppol2tp.fd,
+							     sp->pppol2tp.s_tunnel,
+							     &error);
+		if (tunnel_sock == NULL)
+			goto end;
+
+		sock_hold(tunnel_sock);
+		tunnel = tunnel_sock->sk_user_data;
+	} else {
+		tunnel = pppol2tp_tunnel_find(sock_net(sk), sp->pppol2tp.s_tunnel);
+
+		/* Error if we can't find the tunnel */
+		error = -ENOENT;
+		if (tunnel == NULL)
+			goto end;
+
+		tunnel_sock = tunnel->sock;
+	}
+
+	/* Check that this session doesn't already exist */
+	error = -EEXIST;
+	session = pppol2tp_session_find(tunnel, sp->pppol2tp.s_session);
+	if (session != NULL)
+		goto end;
+
+	/* Allocate and initialize a new session context. */
+	session = kzalloc(sizeof(struct pppol2tp_session), GFP_KERNEL);
+	if (session == NULL) {
+		error = -ENOMEM;
+		goto end;
+	}
+
+	skb_queue_head_init(&session->reorder_q);
+
+	session->magic	     = L2TP_SESSION_MAGIC;
+	session->owner	     = current->pid;
+	session->sock	     = sk;
+	session->tunnel	     = tunnel;
+	session->tunnel_sock = tunnel_sock;
+	session->tunnel_addr = sp->pppol2tp;
+	sprintf(&session->name[0], "sess %hu/%hu",
+		session->tunnel_addr.s_tunnel,
+		session->tunnel_addr.s_session);
+
+	session->stats.tunnel_id  = session->tunnel_addr.s_tunnel;
+	session->stats.session_id = session->tunnel_addr.s_session;
+
+	INIT_HLIST_NODE(&session->hlist);
+
+	/* Inherit debug options from tunnel */
+	session->debug = tunnel->debug;
+
+	/* Default MTU must allow space for UDP/L2TP/PPP
+	 * headers.
+	 */
+	session->mtu = session->mru = 1500 - PPPOL2TP_HEADER_OVERHEAD;
+
+	/* If PMTU discovery was enabled, use the MTU that was discovered */
+	dst = sk_dst_get(sk);
+	if (dst != NULL) {
+		u32 pmtu = dst_mtu(__sk_dst_get(sk));
+		if (pmtu != 0)
+			session->mtu = session->mru = pmtu -
+				PPPOL2TP_HEADER_OVERHEAD;
+		dst_release(dst);
+	}
+
+	/* Special case: if source & dest session_id == 0x0000, this socket is
+	 * being created to manage the tunnel. Don't add the session to the
+	 * session hash list, just set up the internal context for use by
+	 * ioctl() and sockopt() handlers.
+	 */
+	if ((session->tunnel_addr.s_session == 0) &&
+	    (session->tunnel_addr.d_session == 0)) {
+		error = 0;
+		sk->sk_user_data = session;
+		goto out_no_ppp;
+	}
+
+	/* Get tunnel context from the tunnel socket */
+	tunnel = pppol2tp_sock_to_tunnel(tunnel_sock);
+	if (tunnel == NULL) {
+		error = -EBADF;
+		goto end;
+	}
+
+	/* Right now, because we don't have a way to push the incoming skb's
+	 * straight through the UDP layer, the only header we need to worry
+	 * about is the L2TP header. This size is different depending on
+	 * whether sequence numbers are enabled for the data channel.
+	 */
+	po->chan.hdrlen = PPPOL2TP_L2TP_HDR_SIZE_NOSEQ;
+
+	po->chan.private = sk;
+	po->chan.ops	 = &pppol2tp_chan_ops;
+	po->chan.mtu	 = session->mtu;
+
+	error = ppp_register_net_channel(sock_net(sk), &po->chan);
+	if (error)
+		goto end_put_tun;
+
+	/* This is how we get the session context from the socket. */
+	sk->sk_user_data = session;
+
+	/* Add session to the tunnel's hash list */
+	write_lock_bh(&tunnel->hlist_lock);
+	hlist_add_head(&session->hlist,
+		       pppol2tp_session_id_hash(tunnel,
+						session->tunnel_addr.s_session));
+	write_unlock_bh(&tunnel->hlist_lock);
+
+	atomic_inc(&pppol2tp_session_count);
+
+out_no_ppp:
+	pppol2tp_tunnel_inc_refcount(tunnel);
+	sk->sk_state = PPPOX_CONNECTED;
+	PRINTK(session->debug, PPPOL2TP_MSG_CONTROL, KERN_INFO,
+	       "%s: created\n", session->name);
+
+end_put_tun:
+	sock_put(tunnel_sock);
+end:
+	release_sock(sk);
+
+	if (error != 0) {
+		if (session)
+			PRINTK(session->debug,
+				PPPOL2TP_MSG_CONTROL, KERN_WARNING,
+				"%s: connect failed: %d\n",
+				session->name, error);
+		else
+			PRINTK(-1, PPPOL2TP_MSG_CONTROL, KERN_WARNING,
+				"connect failed: %d\n", error);
+	}
+
+	return error;
+}
+
+/* getname() support.
+ */
+static int pppol2tp_getname(struct socket *sock, struct sockaddr *uaddr,
+			    int *usockaddr_len, int peer)
+{
+	int len = sizeof(struct sockaddr_pppol2tp);
+	struct sockaddr_pppol2tp sp;
+	int error = 0;
+	struct pppol2tp_session *session;
+
+	error = -ENOTCONN;
+	if (sock->sk->sk_state != PPPOX_CONNECTED)
+		goto end;
+
+	session = pppol2tp_sock_to_session(sock->sk);
+	if (session == NULL) {
+		error = -EBADF;
+		goto end;
+	}
+
+	sp.sa_family	= AF_PPPOX;
+	sp.sa_protocol	= PX_PROTO_OL2TP;
+	memcpy(&sp.pppol2tp, &session->tunnel_addr,
+	       sizeof(struct pppol2tp_addr));
+
+	memcpy(uaddr, &sp, len);
+
+	*usockaddr_len = len;
+
+	error = 0;
+	sock_put(sock->sk);
+
+end:
+	return error;
+}
+
+/****************************************************************************
+ * ioctl() handlers.
+ *
+ * The PPPoX socket is created for L2TP sessions: tunnels have their own UDP
+ * sockets. However, in order to control kernel tunnel features, we allow
+ * userspace to create a special "tunnel" PPPoX socket which is used for
+ * control only.  Tunnel PPPoX sockets have session_id == 0 and simply allow
+ * the user application to issue L2TP setsockopt(), getsockopt() and ioctl()
+ * calls.
+ ****************************************************************************/
+
+/* Session ioctl helper.
+ */
+static int pppol2tp_session_ioctl(struct pppol2tp_session *session,
+				  unsigned int cmd, unsigned long arg)
+{
+	struct ifreq ifr;
+	int err = 0;
+	struct sock *sk = session->sock;
+	int val = (int) arg;
+
+	PRINTK(session->debug, PPPOL2TP_MSG_CONTROL, KERN_DEBUG,
+	       "%s: pppol2tp_session_ioctl(cmd=%#x, arg=%#lx)\n",
+	       session->name, cmd, arg);
+
+	sock_hold(sk);
+
+	switch (cmd) {
+	case SIOCGIFMTU:
+		err = -ENXIO;
+		if (!(sk->sk_state & PPPOX_CONNECTED))
+			break;
+
+		err = -EFAULT;
+		if (copy_from_user(&ifr, (void __user *) arg, sizeof(struct ifreq)))
+			break;
+		ifr.ifr_mtu = session->mtu;
+		if (copy_to_user((void __user *) arg, &ifr, sizeof(struct ifreq)))
+			break;
+
+		PRINTK(session->debug, PPPOL2TP_MSG_CONTROL, KERN_INFO,
+		       "%s: get mtu=%d\n", session->name, session->mtu);
+		err = 0;
+		break;
+
+	case SIOCSIFMTU:
+		err = -ENXIO;
+		if (!(sk->sk_state & PPPOX_CONNECTED))
+			break;
+
+		err = -EFAULT;
+		if (copy_from_user(&ifr, (void __user *) arg, sizeof(struct ifreq)))
+			break;
+
+		session->mtu = ifr.ifr_mtu;
+
+		PRINTK(session->debug, PPPOL2TP_MSG_CONTROL, KERN_INFO,
+		       "%s: set mtu=%d\n", session->name, session->mtu);
+		err = 0;
+		break;
+
+	case PPPIOCGMRU:
+		err = -ENXIO;
+		if (!(sk->sk_state & PPPOX_CONNECTED))
+			break;
+
+		err = -EFAULT;
+		if (put_user(session->mru, (int __user *) arg))
+			break;
+
+		PRINTK(session->debug, PPPOL2TP_MSG_CONTROL, KERN_INFO,
+		       "%s: get mru=%d\n", session->name, session->mru);
+		err = 0;
+		break;
+
+	case PPPIOCSMRU:
+		err = -ENXIO;
+		if (!(sk->sk_state & PPPOX_CONNECTED))
+			break;
+
+		err = -EFAULT;
+		if (get_user(val,(int __user *) arg))
+			break;
+
+		session->mru = val;
+		PRINTK(session->debug, PPPOL2TP_MSG_CONTROL, KERN_INFO,
+		       "%s: set mru=%d\n", session->name, session->mru);
+		err = 0;
+		break;
+
+	case PPPIOCGFLAGS:
+		err = -EFAULT;
+		if (put_user(session->flags, (int __user *) arg))
+			break;
+
+		PRINTK(session->debug, PPPOL2TP_MSG_CONTROL, KERN_INFO,
+		       "%s: get flags=%d\n", session->name, session->flags);
+		err = 0;
+		break;
+
+	case PPPIOCSFLAGS:
+		err = -EFAULT;
+		if (get_user(val, (int __user *) arg))
+			break;
+		session->flags = val;
+		PRINTK(session->debug, PPPOL2TP_MSG_CONTROL, KERN_INFO,
+		       "%s: set flags=%d\n", session->name, session->flags);
+		err = 0;
+		break;
+
+	case PPPIOCGL2TPSTATS:
+		err = -ENXIO;
+		if (!(sk->sk_state & PPPOX_CONNECTED))
+			break;
+
+		if (copy_to_user((void __user *) arg, &session->stats,
+				 sizeof(session->stats)))
+			break;
+		PRINTK(session->debug, PPPOL2TP_MSG_CONTROL, KERN_INFO,
+		       "%s: get L2TP stats\n", session->name);
+		err = 0;
+		break;
+
+	default:
+		err = -ENOSYS;
+		break;
+	}
+
+	sock_put(sk);
+
+	return err;
+}
+
+/* Tunnel ioctl helper.
+ *
+ * Note the special handling for PPPIOCGL2TPSTATS below. If the ioctl data
+ * specifies a session_id, the session ioctl handler is called. This allows an
+ * application to retrieve session stats via a tunnel socket.
+ */
+static int pppol2tp_tunnel_ioctl(struct pppol2tp_tunnel *tunnel,
+				 unsigned int cmd, unsigned long arg)
+{
+	int err = 0;
+	struct sock *sk = tunnel->sock;
+	struct pppol2tp_ioc_stats stats_req;
+
+	PRINTK(tunnel->debug, PPPOL2TP_MSG_CONTROL, KERN_DEBUG,
+	       "%s: pppol2tp_tunnel_ioctl(cmd=%#x, arg=%#lx)\n", tunnel->name,
+	       cmd, arg);
+
+	sock_hold(sk);
+
+	switch (cmd) {
+	case PPPIOCGL2TPSTATS:
+		err = -ENXIO;
+		if (!(sk->sk_state & PPPOX_CONNECTED))
+			break;
+
+		if (copy_from_user(&stats_req, (void __user *) arg,
+				   sizeof(stats_req))) {
+			err = -EFAULT;
+			break;
+		}
+		if (stats_req.session_id != 0) {
+			/* resend to session ioctl handler */
+			struct pppol2tp_session *session =
+				pppol2tp_session_find(tunnel, stats_req.session_id);
+			if (session != NULL)
+				err = pppol2tp_session_ioctl(session, cmd, arg);
+			else
+				err = -EBADR;
+			break;
+		}
+#ifdef CONFIG_XFRM
+		tunnel->stats.using_ipsec = (sk->sk_policy[0] || sk->sk_policy[1]) ? 1 : 0;
+#endif
+		if (copy_to_user((void __user *) arg, &tunnel->stats,
+				 sizeof(tunnel->stats))) {
+			err = -EFAULT;
+			break;
+		}
+		PRINTK(tunnel->debug, PPPOL2TP_MSG_CONTROL, KERN_INFO,
+		       "%s: get L2TP stats\n", tunnel->name);
+		err = 0;
+		break;
+
+	default:
+		err = -ENOSYS;
+		break;
+	}
+
+	sock_put(sk);
+
+	return err;
+}
+
+/* Main ioctl() handler.
+ * Dispatch to tunnel or session helpers depending on the socket.
+ */
+static int pppol2tp_ioctl(struct socket *sock, unsigned int cmd,
+			  unsigned long arg)
+{
+	struct sock *sk = sock->sk;
+	struct pppol2tp_session *session;
+	struct pppol2tp_tunnel *tunnel;
+	int err;
+
+	if (!sk)
+		return 0;
+
+	err = -EBADF;
+	if (sock_flag(sk, SOCK_DEAD) != 0)
+		goto end;
+
+	err = -ENOTCONN;
+	if ((sk->sk_user_data == NULL) ||
+	    (!(sk->sk_state & (PPPOX_CONNECTED | PPPOX_BOUND))))
+		goto end;
+
+	/* Get session context from the socket */
+	err = -EBADF;
+	session = pppol2tp_sock_to_session(sk);
+	if (session == NULL)
+		goto end;
+
+	/* Special case: if session's session_id is zero, treat ioctl as a
+	 * tunnel ioctl
+	 */
+	if ((session->tunnel_addr.s_session == 0) &&
+	    (session->tunnel_addr.d_session == 0)) {
+		err = -EBADF;
+		tunnel = pppol2tp_sock_to_tunnel(session->tunnel_sock);
+		if (tunnel == NULL)
+			goto end_put_sess;
+
+		err = pppol2tp_tunnel_ioctl(tunnel, cmd, arg);
+		sock_put(session->tunnel_sock);
+		goto end_put_sess;
+	}
+
+	err = pppol2tp_session_ioctl(session, cmd, arg);
+
+end_put_sess:
+	sock_put(sk);
+end:
+	return err;
+}
+
+/*****************************************************************************
+ * setsockopt() / getsockopt() support.
+ *
+ * The PPPoX socket is created for L2TP sessions: tunnels have their own UDP
+ * sockets. In order to control kernel tunnel features, we allow userspace to
+ * create a special "tunnel" PPPoX socket which is used for control only.
+ * Tunnel PPPoX sockets have session_id == 0 and simply allow the user
+ * application to issue L2TP setsockopt(), getsockopt() and ioctl() calls.
+ *****************************************************************************/
+
+/* Tunnel setsockopt() helper.
+ */
+static int pppol2tp_tunnel_setsockopt(struct sock *sk,
+				      struct pppol2tp_tunnel *tunnel,
+				      int optname, int val)
+{
+	int err = 0;
+
+	switch (optname) {
+	case PPPOL2TP_SO_DEBUG:
+		tunnel->debug = val;
+		PRINTK(tunnel->debug, PPPOL2TP_MSG_CONTROL, KERN_INFO,
+		       "%s: set debug=%x\n", tunnel->name, tunnel->debug);
+		break;
+
+	default:
+		err = -ENOPROTOOPT;
+		break;
+	}
+
+	return err;
+}
+
+/* Session setsockopt helper.
+ */
+static int pppol2tp_session_setsockopt(struct sock *sk,
+				       struct pppol2tp_session *session,
+				       int optname, int val)
+{
+	int err = 0;
+
+	switch (optname) {
+	case PPPOL2TP_SO_RECVSEQ:
+		if ((val != 0) && (val != 1)) {
+			err = -EINVAL;
+			break;
+		}
+		session->recv_seq = val ? -1 : 0;
+		PRINTK(session->debug, PPPOL2TP_MSG_CONTROL, KERN_INFO,
+		       "%s: set recv_seq=%d\n", session->name,
+		       session->recv_seq);
+		break;
+
+	case PPPOL2TP_SO_SENDSEQ:
+		if ((val != 0) && (val != 1)) {
+			err = -EINVAL;
+			break;
+		}
+		session->send_seq = val ? -1 : 0;
+		{
+			struct sock *ssk      = session->sock;
+			struct pppox_sock *po = pppox_sk(ssk);
+			po->chan.hdrlen = val ? PPPOL2TP_L2TP_HDR_SIZE_SEQ :
+				PPPOL2TP_L2TP_HDR_SIZE_NOSEQ;
+		}
+		PRINTK(session->debug, PPPOL2TP_MSG_CONTROL, KERN_INFO,
+		       "%s: set send_seq=%d\n", session->name, session->send_seq);
+		break;
+
+	case PPPOL2TP_SO_LNSMODE:
+		if ((val != 0) && (val != 1)) {
+			err = -EINVAL;
+			break;
+		}
+		session->lns_mode = val ? -1 : 0;
+		PRINTK(session->debug, PPPOL2TP_MSG_CONTROL, KERN_INFO,
+		       "%s: set lns_mode=%d\n", session->name,
+		       session->lns_mode);
+		break;
+
+	case PPPOL2TP_SO_DEBUG:
+		session->debug = val;
+		PRINTK(session->debug, PPPOL2TP_MSG_CONTROL, KERN_INFO,
+		       "%s: set debug=%x\n", session->name, session->debug);
+		break;
+
+	case PPPOL2TP_SO_REORDERTO:
+		session->reorder_timeout = msecs_to_jiffies(val);
+		PRINTK(session->debug, PPPOL2TP_MSG_CONTROL, KERN_INFO,
+		       "%s: set reorder_timeout=%d\n", session->name,
+		       session->reorder_timeout);
+		break;
+
+	default:
+		err = -ENOPROTOOPT;
+		break;
+	}
+
+	return err;
+}
+
+/* Main setsockopt() entry point.
+ * Does API checks, then calls either the tunnel or session setsockopt
+ * handler, according to whether the PPPoL2TP socket is a for a regular
+ * session or the special tunnel type.
+ */
+static int pppol2tp_setsockopt(struct socket *sock, int level, int optname,
+			       char __user *optval, unsigned int optlen)
+{
+	struct sock *sk = sock->sk;
+	struct pppol2tp_session *session = sk->sk_user_data;
+	struct pppol2tp_tunnel *tunnel;
+	int val;
+	int err;
+
+	if (level != SOL_PPPOL2TP)
+		return udp_prot.setsockopt(sk, level, optname, optval, optlen);
+
+	if (optlen < sizeof(int))
+		return -EINVAL;
+
+	if (get_user(val, (int __user *)optval))
+		return -EFAULT;
+
+	err = -ENOTCONN;
+	if (sk->sk_user_data == NULL)
+		goto end;
+
+	/* Get session context from the socket */
+	err = -EBADF;
+	session = pppol2tp_sock_to_session(sk);
+	if (session == NULL)
+		goto end;
+
+	/* Special case: if session_id == 0x0000, treat as operation on tunnel
+	 */
+	if ((session->tunnel_addr.s_session == 0) &&
+	    (session->tunnel_addr.d_session == 0)) {
+		err = -EBADF;
+		tunnel = pppol2tp_sock_to_tunnel(session->tunnel_sock);
+		if (tunnel == NULL)
+			goto end_put_sess;
+
+		err = pppol2tp_tunnel_setsockopt(sk, tunnel, optname, val);
+		sock_put(session->tunnel_sock);
+	} else
+		err = pppol2tp_session_setsockopt(sk, session, optname, val);
+
+	err = 0;
+
+end_put_sess:
+	sock_put(sk);
+end:
+	return err;
+}
+
+/* Tunnel getsockopt helper. Called with sock locked.
+ */
+static int pppol2tp_tunnel_getsockopt(struct sock *sk,
+				      struct pppol2tp_tunnel *tunnel,
+				      int optname, int *val)
+{
+	int err = 0;
+
+	switch (optname) {
+	case PPPOL2TP_SO_DEBUG:
+		*val = tunnel->debug;
+		PRINTK(tunnel->debug, PPPOL2TP_MSG_CONTROL, KERN_INFO,
+		       "%s: get debug=%x\n", tunnel->name, tunnel->debug);
+		break;
+
+	default:
+		err = -ENOPROTOOPT;
+		break;
+	}
+
+	return err;
+}
+
+/* Session getsockopt helper. Called with sock locked.
+ */
+static int pppol2tp_session_getsockopt(struct sock *sk,
+				       struct pppol2tp_session *session,
+				       int optname, int *val)
+{
+	int err = 0;
+
+	switch (optname) {
+	case PPPOL2TP_SO_RECVSEQ:
+		*val = session->recv_seq;
+		PRINTK(session->debug, PPPOL2TP_MSG_CONTROL, KERN_INFO,
+		       "%s: get recv_seq=%d\n", session->name, *val);
+		break;
+
+	case PPPOL2TP_SO_SENDSEQ:
+		*val = session->send_seq;
+		PRINTK(session->debug, PPPOL2TP_MSG_CONTROL, KERN_INFO,
+		       "%s: get send_seq=%d\n", session->name, *val);
+		break;
+
+	case PPPOL2TP_SO_LNSMODE:
+		*val = session->lns_mode;
+		PRINTK(session->debug, PPPOL2TP_MSG_CONTROL, KERN_INFO,
+		       "%s: get lns_mode=%d\n", session->name, *val);
+		break;
+
+	case PPPOL2TP_SO_DEBUG:
+		*val = session->debug;
+		PRINTK(session->debug, PPPOL2TP_MSG_CONTROL, KERN_INFO,
+		       "%s: get debug=%d\n", session->name, *val);
+		break;
+
+	case PPPOL2TP_SO_REORDERTO:
+		*val = (int) jiffies_to_msecs(session->reorder_timeout);
+		PRINTK(session->debug, PPPOL2TP_MSG_CONTROL, KERN_INFO,
+		       "%s: get reorder_timeout=%d\n", session->name, *val);
+		break;
+
+	default:
+		err = -ENOPROTOOPT;
+	}
+
+	return err;
+}
+
+/* Main getsockopt() entry point.
+ * Does API checks, then calls either the tunnel or session getsockopt
+ * handler, according to whether the PPPoX socket is a for a regular session
+ * or the special tunnel type.
+ */
+static int pppol2tp_getsockopt(struct socket *sock, int level,
+			       int optname, char __user *optval, int __user *optlen)
+{
+	struct sock *sk = sock->sk;
+	struct pppol2tp_session *session = sk->sk_user_data;
+	struct pppol2tp_tunnel *tunnel;
+	int val, len;
+	int err;
+
+	if (level != SOL_PPPOL2TP)
+		return udp_prot.getsockopt(sk, level, optname, optval, optlen);
+
+	if (get_user(len, (int __user *) optlen))
+		return -EFAULT;
+
+	len = min_t(unsigned int, len, sizeof(int));
+
+	if (len < 0)
+		return -EINVAL;
+
+	err = -ENOTCONN;
+	if (sk->sk_user_data == NULL)
+		goto end;
+
+	/* Get the session context */
+	err = -EBADF;
+	session = pppol2tp_sock_to_session(sk);
+	if (session == NULL)
+		goto end;
+
+	/* Special case: if session_id == 0x0000, treat as operation on tunnel */
+	if ((session->tunnel_addr.s_session == 0) &&
+	    (session->tunnel_addr.d_session == 0)) {
+		err = -EBADF;
+		tunnel = pppol2tp_sock_to_tunnel(session->tunnel_sock);
+		if (tunnel == NULL)
+			goto end_put_sess;
+
+		err = pppol2tp_tunnel_getsockopt(sk, tunnel, optname, &val);
+		sock_put(session->tunnel_sock);
+	} else
+		err = pppol2tp_session_getsockopt(sk, session, optname, &val);
+
+	err = -EFAULT;
+	if (put_user(len, (int __user *) optlen))
+		goto end_put_sess;
+
+	if (copy_to_user((void __user *) optval, &val, len))
+		goto end_put_sess;
+
+	err = 0;
+
+end_put_sess:
+	sock_put(sk);
+end:
+	return err;
+}
+
+/*****************************************************************************
+ * /proc filesystem for debug
+ *****************************************************************************/
+
+#ifdef CONFIG_PROC_FS
+
+#include <linux/seq_file.h>
+
+struct pppol2tp_seq_data {
+	struct seq_net_private p;
+	struct pppol2tp_tunnel *tunnel;		/* current tunnel */
+	struct pppol2tp_session *session;	/* NULL means get first session in tunnel */
+};
+
+static struct pppol2tp_session *next_session(struct pppol2tp_tunnel *tunnel, struct pppol2tp_session *curr)
+{
+	struct pppol2tp_session *session = NULL;
+	struct hlist_node *walk;
+	int found = 0;
+	int next = 0;
+	int i;
+
+	read_lock_bh(&tunnel->hlist_lock);
+	for (i = 0; i < PPPOL2TP_HASH_SIZE; i++) {
+		hlist_for_each_entry(session, walk, &tunnel->session_hlist[i], hlist) {
+			if (curr == NULL) {
+				found = 1;
+				goto out;
+			}
+			if (session == curr) {
+				next = 1;
+				continue;
+			}
+			if (next) {
+				found = 1;
+				goto out;
+			}
+		}
+	}
+out:
+	read_unlock_bh(&tunnel->hlist_lock);
+	if (!found)
+		session = NULL;
+
+	return session;
+}
+
+static struct pppol2tp_tunnel *next_tunnel(struct pppol2tp_net *pn,
+					   struct pppol2tp_tunnel *curr)
+{
+	struct pppol2tp_tunnel *tunnel = NULL;
+
+	read_lock_bh(&pn->pppol2tp_tunnel_list_lock);
+	if (list_is_last(&curr->list, &pn->pppol2tp_tunnel_list)) {
+		goto out;
+	}
+	tunnel = list_entry(curr->list.next, struct pppol2tp_tunnel, list);
+out:
+	read_unlock_bh(&pn->pppol2tp_tunnel_list_lock);
+
+	return tunnel;
+}
+
+static void *pppol2tp_seq_start(struct seq_file *m, loff_t *offs)
+{
+	struct pppol2tp_seq_data *pd = SEQ_START_TOKEN;
+	struct pppol2tp_net *pn;
+	loff_t pos = *offs;
+
+	if (!pos)
+		goto out;
+
+	BUG_ON(m->private == NULL);
+	pd = m->private;
+	pn = pppol2tp_pernet(seq_file_net(m));
+
+	if (pd->tunnel == NULL) {
+		if (!list_empty(&pn->pppol2tp_tunnel_list))
+			pd->tunnel = list_entry(pn->pppol2tp_tunnel_list.next, struct pppol2tp_tunnel, list);
+	} else {
+		pd->session = next_session(pd->tunnel, pd->session);
+		if (pd->session == NULL) {
+			pd->tunnel = next_tunnel(pn, pd->tunnel);
+		}
+	}
+
+	/* NULL tunnel and session indicates end of list */
+	if ((pd->tunnel == NULL) && (pd->session == NULL))
+		pd = NULL;
+
+out:
+	return pd;
+}
+
+static void *pppol2tp_seq_next(struct seq_file *m, void *v, loff_t *pos)
+{
+	(*pos)++;
+	return NULL;
+}
+
+static void pppol2tp_seq_stop(struct seq_file *p, void *v)
+{
+	/* nothing to do */
+}
+
+static void pppol2tp_seq_tunnel_show(struct seq_file *m, void *v)
+{
+	struct pppol2tp_tunnel *tunnel = v;
+
+	seq_printf(m, "\nTUNNEL '%s', %c %d\n",
+		   tunnel->name,
+		   (tunnel == tunnel->sock->sk_user_data) ? 'Y':'N',
+		   atomic_read(&tunnel->ref_count) - 1);
+	seq_printf(m, " %08x %llu/%llu/%llu %llu/%llu/%llu\n",
+		   tunnel->debug,
+		   (unsigned long long)tunnel->stats.tx_packets,
+		   (unsigned long long)tunnel->stats.tx_bytes,
+		   (unsigned long long)tunnel->stats.tx_errors,
+		   (unsigned long long)tunnel->stats.rx_packets,
+		   (unsigned long long)tunnel->stats.rx_bytes,
+		   (unsigned long long)tunnel->stats.rx_errors);
+}
+
+static void pppol2tp_seq_session_show(struct seq_file *m, void *v)
+{
+	struct pppol2tp_session *session = v;
+
+	seq_printf(m, "  SESSION '%s' %08X/%d %04X/%04X -> "
+		   "%04X/%04X %d %c\n",
+		   session->name,
+		   ntohl(session->tunnel_addr.addr.sin_addr.s_addr),
+		   ntohs(session->tunnel_addr.addr.sin_port),
+		   session->tunnel_addr.s_tunnel,
+		   session->tunnel_addr.s_session,
+		   session->tunnel_addr.d_tunnel,
+		   session->tunnel_addr.d_session,
+		   session->sock->sk_state,
+		   (session == session->sock->sk_user_data) ?
+		   'Y' : 'N');
+	seq_printf(m, "   %d/%d/%c/%c/%s %08x %u\n",
+		   session->mtu, session->mru,
+		   session->recv_seq ? 'R' : '-',
+		   session->send_seq ? 'S' : '-',
+		   session->lns_mode ? "LNS" : "LAC",
+		   session->debug,
+		   jiffies_to_msecs(session->reorder_timeout));
+	seq_printf(m, "   %hu/%hu %llu/%llu/%llu %llu/%llu/%llu\n",
+		   session->nr, session->ns,
+		   (unsigned long long)session->stats.tx_packets,
+		   (unsigned long long)session->stats.tx_bytes,
+		   (unsigned long long)session->stats.tx_errors,
+		   (unsigned long long)session->stats.rx_packets,
+		   (unsigned long long)session->stats.rx_bytes,
+		   (unsigned long long)session->stats.rx_errors);
+}
+
+static int pppol2tp_seq_show(struct seq_file *m, void *v)
+{
+	struct pppol2tp_seq_data *pd = v;
+
+	/* display header on line 1 */
+	if (v == SEQ_START_TOKEN) {
+		seq_puts(m, "PPPoL2TP driver info, " PPPOL2TP_DRV_VERSION "\n");
+		seq_puts(m, "TUNNEL name, user-data-ok session-count\n");
+		seq_puts(m, " debug tx-pkts/bytes/errs rx-pkts/bytes/errs\n");
+		seq_puts(m, "  SESSION name, addr/port src-tid/sid "
+			 "dest-tid/sid state user-data-ok\n");
+		seq_puts(m, "   mtu/mru/rcvseq/sendseq/lns debug reorderto\n");
+		seq_puts(m, "   nr/ns tx-pkts/bytes/errs rx-pkts/bytes/errs\n");
+		goto out;
+	}
+
+	/* Show the tunnel or session context.
+	 */
+	if (pd->session == NULL)
+		pppol2tp_seq_tunnel_show(m, pd->tunnel);
+	else
+		pppol2tp_seq_session_show(m, pd->session);
+
+out:
+	return 0;
+}
+
+static const struct seq_operations pppol2tp_seq_ops = {
+	.start		= pppol2tp_seq_start,
+	.next		= pppol2tp_seq_next,
+	.stop		= pppol2tp_seq_stop,
+	.show		= pppol2tp_seq_show,
+};
+
+/* Called when our /proc file is opened. We allocate data for use when
+ * iterating our tunnel / session contexts and store it in the private
+ * data of the seq_file.
+ */
+static int pppol2tp_proc_open(struct inode *inode, struct file *file)
+{
+	return seq_open_net(inode, file, &pppol2tp_seq_ops,
+			    sizeof(struct pppol2tp_seq_data));
+}
+
+static const struct file_operations pppol2tp_proc_fops = {
+	.owner		= THIS_MODULE,
+	.open		= pppol2tp_proc_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= seq_release_net,
+};
+
+#endif /* CONFIG_PROC_FS */
+
+/*****************************************************************************
+ * Init and cleanup
+ *****************************************************************************/
+
+static const struct proto_ops pppol2tp_ops = {
+	.family		= AF_PPPOX,
+	.owner		= THIS_MODULE,
+	.release	= pppol2tp_release,
+	.bind		= sock_no_bind,
+	.connect	= pppol2tp_connect,
+	.socketpair	= sock_no_socketpair,
+	.accept		= sock_no_accept,
+	.getname	= pppol2tp_getname,
+	.poll		= datagram_poll,
+	.listen		= sock_no_listen,
+	.shutdown	= sock_no_shutdown,
+	.setsockopt	= pppol2tp_setsockopt,
+	.getsockopt	= pppol2tp_getsockopt,
+	.sendmsg	= pppol2tp_sendmsg,
+	.recvmsg	= pppol2tp_recvmsg,
+	.mmap		= sock_no_mmap,
+	.ioctl		= pppox_ioctl,
+};
+
+static struct pppox_proto pppol2tp_proto = {
+	.create		= pppol2tp_create,
+	.ioctl		= pppol2tp_ioctl
+};
+
+static __net_init int pppol2tp_init_net(struct net *net)
+{
+	struct pppol2tp_net *pn = pppol2tp_pernet(net);
+	struct proc_dir_entry *pde;
+
+	INIT_LIST_HEAD(&pn->pppol2tp_tunnel_list);
+	rwlock_init(&pn->pppol2tp_tunnel_list_lock);
+
+	pde = proc_net_fops_create(net, "pppol2tp", S_IRUGO, &pppol2tp_proc_fops);
+#ifdef CONFIG_PROC_FS
+	if (!pde)
+		return -ENOMEM;
+#endif
+
+	return 0;
+}
+
+static __net_exit void pppol2tp_exit_net(struct net *net)
+{
+	proc_net_remove(net, "pppol2tp");
+}
+
+static struct pernet_operations pppol2tp_net_ops = {
+	.init = pppol2tp_init_net,
+	.exit = pppol2tp_exit_net,
+	.id   = &pppol2tp_net_id,
+	.size = sizeof(struct pppol2tp_net),
+};
+
+static int __init pppol2tp_init(void)
+{
+	int err;
+
+	err = proto_register(&pppol2tp_sk_proto, 0);
+	if (err)
+		goto out;
+	err = register_pppox_proto(PX_PROTO_OL2TP, &pppol2tp_proto);
+	if (err)
+		goto out_unregister_pppol2tp_proto;
+
+	err = register_pernet_device(&pppol2tp_net_ops);
+	if (err)
+		goto out_unregister_pppox_proto;
+
+	printk(KERN_INFO "PPPoL2TP kernel driver, %s\n",
+	       PPPOL2TP_DRV_VERSION);
+
+out:
+	return err;
+out_unregister_pppox_proto:
+	unregister_pppox_proto(PX_PROTO_OL2TP);
+out_unregister_pppol2tp_proto:
+	proto_unregister(&pppol2tp_sk_proto);
+	goto out;
+}
+
+static void __exit pppol2tp_exit(void)
+{
+	unregister_pppox_proto(PX_PROTO_OL2TP);
+	unregister_pernet_device(&pppol2tp_net_ops);
+	proto_unregister(&pppol2tp_sk_proto);
+}
+
+module_init(pppol2tp_init);
+module_exit(pppol2tp_exit);
+
+MODULE_AUTHOR("Martijn van Oosterhout <kleptog@svana.org>, "
+	      "James Chapman <jchapman@katalix.com>");
+MODULE_DESCRIPTION("PPP over L2TP over UDP");
+MODULE_LICENSE("GPL");
+MODULE_VERSION(PPPOL2TP_DRV_VERSION);
-- 
cgit v1.2.2


From fd558d186df2c13a22455373858bae634a4795af Mon Sep 17 00:00:00 2001
From: James Chapman <jchapman@katalix.com>
Date: Fri, 2 Apr 2010 06:18:33 +0000
Subject: l2tp: Split pppol2tp patch into separate l2tp and ppp parts

This patch splits the pppol2tp driver into separate L2TP and PPP parts
to prepare for L2TPv3 support. In L2TPv3, protocols other than PPP can
be carried, so this split creates a common L2TP core that will handle
the common L2TP bits which protocol support modules such as PPP will
use.

Note that the existing pppol2tp module is split into l2tp_core and
l2tp_ppp by this change.

There are no feature changes here. Internally, however, there are
significant changes, mostly to handle the separation of PPP-specific
data from the L2TP session and to provide hooks in the core for
modules like PPP to access.

Signed-off-by: James Chapman <jchapman@katalix.com>
Reviewed-by: Randy Dunlap <randy.dunlap@oracle.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/Kconfig          |    1 +
 net/Makefile         |    2 +-
 net/l2tp/Kconfig     |   28 +
 net/l2tp/Makefile    |    5 +-
 net/l2tp/l2tp_core.c | 1227 +++++++++++++++++++++++
 net/l2tp/l2tp_core.h |  254 +++++
 net/l2tp/l2tp_ppp.c  | 1658 +++++++++++++++++++++++++++++++
 net/l2tp/pppol2tp.c  | 2680 --------------------------------------------------
 8 files changed, 3173 insertions(+), 2682 deletions(-)
 create mode 100644 net/l2tp/Kconfig
 create mode 100644 net/l2tp/l2tp_core.c
 create mode 100644 net/l2tp/l2tp_core.h
 create mode 100644 net/l2tp/l2tp_ppp.c
 delete mode 100644 net/l2tp/pppol2tp.c

(limited to 'net')

diff --git a/net/Kconfig b/net/Kconfig
index e10d55c8ee5c..0d68b40fc0e6 100644
--- a/net/Kconfig
+++ b/net/Kconfig
@@ -186,6 +186,7 @@ source "net/sctp/Kconfig"
 source "net/rds/Kconfig"
 source "net/tipc/Kconfig"
 source "net/atm/Kconfig"
+source "net/l2tp/Kconfig"
 source "net/802/Kconfig"
 source "net/bridge/Kconfig"
 source "net/dsa/Kconfig"
diff --git a/net/Makefile b/net/Makefile
index 13ca77e0eb08..cb7bdc1210cb 100644
--- a/net/Makefile
+++ b/net/Makefile
@@ -40,7 +40,7 @@ obj-$(CONFIG_BT)		+= bluetooth/
 obj-$(CONFIG_SUNRPC)		+= sunrpc/
 obj-$(CONFIG_AF_RXRPC)		+= rxrpc/
 obj-$(CONFIG_ATM)		+= atm/
-obj-$(CONFIG_PPPOL2TP)		+= l2tp/
+obj-$(CONFIG_L2TP)		+= l2tp/
 obj-$(CONFIG_DECNET)		+= decnet/
 obj-$(CONFIG_ECONET)		+= econet/
 obj-$(CONFIG_PHONET)		+= phonet/
diff --git a/net/l2tp/Kconfig b/net/l2tp/Kconfig
new file mode 100644
index 000000000000..ec88c5cdc397
--- /dev/null
+++ b/net/l2tp/Kconfig
@@ -0,0 +1,28 @@
+#
+# Layer Two Tunneling Protocol (L2TP)
+#
+
+menuconfig L2TP
+	tristate "Layer Two Tunneling Protocol (L2TP)"
+	depends on INET
+	---help---
+	  Layer Two Tunneling Protocol
+
+	  From RFC 2661 <http://www.ietf.org/rfc/rfc2661.txt>.
+
+	  L2TP facilitates the tunneling of packets across an
+	  intervening network in a way that is as transparent as
+	  possible to both end-users and applications.
+
+	  L2TP is often used to tunnel PPP traffic over IP
+	  tunnels. One IP tunnel may carry thousands of individual PPP
+	  connections. L2TP is also used as a VPN protocol, popular
+	  with home workers to connect to their offices.
+
+	  The kernel component handles only L2TP data packets: a
+	  userland daemon handles L2TP the control protocol (tunnel
+	  and session setup). One such daemon is OpenL2TP
+	  (http://openl2tp.org/).
+
+	  If you don't need L2TP, say N. To compile all L2TP code as
+	  modules, choose M here.
diff --git a/net/l2tp/Makefile b/net/l2tp/Makefile
index 9af41e898a04..c91f208b1693 100644
--- a/net/l2tp/Makefile
+++ b/net/l2tp/Makefile
@@ -2,4 +2,7 @@
 # Makefile for the L2TP.
 #
 
-obj-$(CONFIG_PPPOL2TP) += pppol2tp.o
+obj-$(CONFIG_L2TP) += l2tp_core.o
+
+# Build l2tp as modules if L2TP is M
+obj-$(subst y,$(CONFIG_L2TP),$(CONFIG_PPPOL2TP)) += l2tp_ppp.o
diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c
new file mode 100644
index 000000000000..4b6da3689893
--- /dev/null
+++ b/net/l2tp/l2tp_core.c
@@ -0,0 +1,1227 @@
+/*
+ * L2TP core.
+ *
+ * Copyright (c) 2008,2009,2010 Katalix Systems Ltd
+ *
+ * This file contains some code of the original L2TPv2 pppol2tp
+ * driver, which has the following copyright:
+ *
+ * Authors:	Martijn van Oosterhout <kleptog@svana.org>
+ *		James Chapman (jchapman@katalix.com)
+ * Contributors:
+ *		Michal Ostrowski <mostrows@speakeasy.net>
+ *		Arnaldo Carvalho de Melo <acme@xconectiva.com.br>
+ *		David S. Miller (davem@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/module.h>
+#include <linux/string.h>
+#include <linux/list.h>
+#include <linux/uaccess.h>
+
+#include <linux/kernel.h>
+#include <linux/spinlock.h>
+#include <linux/kthread.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/errno.h>
+#include <linux/jiffies.h>
+
+#include <linux/netdevice.h>
+#include <linux/net.h>
+#include <linux/inetdevice.h>
+#include <linux/skbuff.h>
+#include <linux/init.h>
+#include <linux/ip.h>
+#include <linux/udp.h>
+#include <linux/hash.h>
+#include <linux/sort.h>
+#include <linux/file.h>
+#include <linux/nsproxy.h>
+#include <net/net_namespace.h>
+#include <net/netns/generic.h>
+#include <net/dst.h>
+#include <net/ip.h>
+#include <net/udp.h>
+#include <net/xfrm.h>
+
+#include <asm/byteorder.h>
+#include <asm/atomic.h>
+
+#include "l2tp_core.h"
+
+#define L2TP_DRV_VERSION	"V2.0"
+
+/* L2TP header constants */
+#define L2TP_HDRFLAG_T	   0x8000
+#define L2TP_HDRFLAG_L	   0x4000
+#define L2TP_HDRFLAG_S	   0x0800
+#define L2TP_HDRFLAG_O	   0x0200
+#define L2TP_HDRFLAG_P	   0x0100
+
+#define L2TP_HDR_VER_MASK  0x000F
+#define L2TP_HDR_VER_2	   0x0002
+
+/* L2TPv3 default L2-specific sublayer */
+#define L2TP_SLFLAG_S	   0x40000000
+#define L2TP_SL_SEQ_MASK   0x00ffffff
+
+#define L2TP_HDR_SIZE_SEQ		10
+#define L2TP_HDR_SIZE_NOSEQ		6
+
+/* Default trace flags */
+#define L2TP_DEFAULT_DEBUG_FLAGS	0
+
+#define PRINTK(_mask, _type, _lvl, _fmt, args...)			\
+	do {								\
+		if ((_mask) & (_type))					\
+			printk(_lvl "L2TP: " _fmt, ##args);		\
+	} while (0)
+
+/* Private data stored for received packets in the skb.
+ */
+struct l2tp_skb_cb {
+	u16			ns;
+	u16			has_seq;
+	u16			length;
+	unsigned long		expires;
+};
+
+#define L2TP_SKB_CB(skb)	((struct l2tp_skb_cb *) &skb->cb[sizeof(struct inet_skb_parm)])
+
+static atomic_t l2tp_tunnel_count;
+static atomic_t l2tp_session_count;
+
+/* per-net private data for this module */
+static unsigned int l2tp_net_id;
+struct l2tp_net {
+	struct list_head l2tp_tunnel_list;
+	rwlock_t l2tp_tunnel_list_lock;
+};
+
+static inline struct l2tp_net *l2tp_pernet(struct net *net)
+{
+	BUG_ON(!net);
+
+	return net_generic(net, l2tp_net_id);
+}
+
+/* Session hash list.
+ * The session_id SHOULD be random according to RFC2661, but several
+ * L2TP implementations (Cisco and Microsoft) use incrementing
+ * session_ids.  So we do a real hash on the session_id, rather than a
+ * simple bitmask.
+ */
+static inline struct hlist_head *
+l2tp_session_id_hash(struct l2tp_tunnel *tunnel, u32 session_id)
+{
+	return &tunnel->session_hlist[hash_32(session_id, L2TP_HASH_BITS)];
+}
+
+/* Lookup a session by id
+ */
+struct l2tp_session *l2tp_session_find(struct l2tp_tunnel *tunnel, u32 session_id)
+{
+	struct hlist_head *session_list =
+		l2tp_session_id_hash(tunnel, session_id);
+	struct l2tp_session *session;
+	struct hlist_node *walk;
+
+	read_lock_bh(&tunnel->hlist_lock);
+	hlist_for_each_entry(session, walk, session_list, hlist) {
+		if (session->session_id == session_id) {
+			read_unlock_bh(&tunnel->hlist_lock);
+			return session;
+		}
+	}
+	read_unlock_bh(&tunnel->hlist_lock);
+
+	return NULL;
+}
+EXPORT_SYMBOL_GPL(l2tp_session_find);
+
+struct l2tp_session *l2tp_session_find_nth(struct l2tp_tunnel *tunnel, int nth)
+{
+	int hash;
+	struct hlist_node *walk;
+	struct l2tp_session *session;
+	int count = 0;
+
+	read_lock_bh(&tunnel->hlist_lock);
+	for (hash = 0; hash < L2TP_HASH_SIZE; hash++) {
+		hlist_for_each_entry(session, walk, &tunnel->session_hlist[hash], hlist) {
+			if (++count > nth) {
+				read_unlock_bh(&tunnel->hlist_lock);
+				return session;
+			}
+		}
+	}
+
+	read_unlock_bh(&tunnel->hlist_lock);
+
+	return NULL;
+}
+EXPORT_SYMBOL_GPL(l2tp_session_find_nth);
+
+/* Lookup a tunnel by id
+ */
+struct l2tp_tunnel *l2tp_tunnel_find(struct net *net, u32 tunnel_id)
+{
+	struct l2tp_tunnel *tunnel;
+	struct l2tp_net *pn = l2tp_pernet(net);
+
+	read_lock_bh(&pn->l2tp_tunnel_list_lock);
+	list_for_each_entry(tunnel, &pn->l2tp_tunnel_list, list) {
+		if (tunnel->tunnel_id == tunnel_id) {
+			read_unlock_bh(&pn->l2tp_tunnel_list_lock);
+			return tunnel;
+		}
+	}
+	read_unlock_bh(&pn->l2tp_tunnel_list_lock);
+
+	return NULL;
+}
+EXPORT_SYMBOL_GPL(l2tp_tunnel_find);
+
+struct l2tp_tunnel *l2tp_tunnel_find_nth(struct net *net, int nth)
+{
+	struct l2tp_net *pn = l2tp_pernet(net);
+	struct l2tp_tunnel *tunnel;
+	int count = 0;
+
+	read_lock_bh(&pn->l2tp_tunnel_list_lock);
+	list_for_each_entry(tunnel, &pn->l2tp_tunnel_list, list) {
+		if (++count > nth) {
+			read_unlock_bh(&pn->l2tp_tunnel_list_lock);
+			return tunnel;
+		}
+	}
+
+	read_unlock_bh(&pn->l2tp_tunnel_list_lock);
+
+	return NULL;
+}
+EXPORT_SYMBOL_GPL(l2tp_tunnel_find_nth);
+
+/*****************************************************************************
+ * Receive data handling
+ *****************************************************************************/
+
+/* Queue a skb in order. We come here only if the skb has an L2TP sequence
+ * number.
+ */
+static void l2tp_recv_queue_skb(struct l2tp_session *session, struct sk_buff *skb)
+{
+	struct sk_buff *skbp;
+	struct sk_buff *tmp;
+	u16 ns = L2TP_SKB_CB(skb)->ns;
+
+	spin_lock_bh(&session->reorder_q.lock);
+	skb_queue_walk_safe(&session->reorder_q, skbp, tmp) {
+		if (L2TP_SKB_CB(skbp)->ns > ns) {
+			__skb_queue_before(&session->reorder_q, skbp, skb);
+			PRINTK(session->debug, L2TP_MSG_SEQ, KERN_DEBUG,
+			       "%s: pkt %hu, inserted before %hu, reorder_q len=%d\n",
+			       session->name, ns, L2TP_SKB_CB(skbp)->ns,
+			       skb_queue_len(&session->reorder_q));
+			session->stats.rx_oos_packets++;
+			goto out;
+		}
+	}
+
+	__skb_queue_tail(&session->reorder_q, skb);
+
+out:
+	spin_unlock_bh(&session->reorder_q.lock);
+}
+
+/* Dequeue a single skb.
+ */
+static void l2tp_recv_dequeue_skb(struct l2tp_session *session, struct sk_buff *skb)
+{
+	struct l2tp_tunnel *tunnel = session->tunnel;
+	int length = L2TP_SKB_CB(skb)->length;
+
+	/* We're about to requeue the skb, so return resources
+	 * to its current owner (a socket receive buffer).
+	 */
+	skb_orphan(skb);
+
+	tunnel->stats.rx_packets++;
+	tunnel->stats.rx_bytes += length;
+	session->stats.rx_packets++;
+	session->stats.rx_bytes += length;
+
+	if (L2TP_SKB_CB(skb)->has_seq) {
+		/* Bump our Nr */
+		session->nr++;
+		PRINTK(session->debug, L2TP_MSG_SEQ, KERN_DEBUG,
+		       "%s: updated nr to %hu\n", session->name, session->nr);
+	}
+
+	/* call private receive handler */
+	if (session->recv_skb != NULL)
+		(*session->recv_skb)(session, skb, L2TP_SKB_CB(skb)->length);
+	else
+		kfree_skb(skb);
+
+	if (session->deref)
+		(*session->deref)(session);
+}
+
+/* Dequeue skbs from the session's reorder_q, subject to packet order.
+ * Skbs that have been in the queue for too long are simply discarded.
+ */
+static void l2tp_recv_dequeue(struct l2tp_session *session)
+{
+	struct sk_buff *skb;
+	struct sk_buff *tmp;
+
+	/* If the pkt at the head of the queue has the nr that we
+	 * expect to send up next, dequeue it and any other
+	 * in-sequence packets behind it.
+	 */
+	spin_lock_bh(&session->reorder_q.lock);
+	skb_queue_walk_safe(&session->reorder_q, skb, tmp) {
+		if (time_after(jiffies, L2TP_SKB_CB(skb)->expires)) {
+			session->stats.rx_seq_discards++;
+			session->stats.rx_errors++;
+			PRINTK(session->debug, L2TP_MSG_SEQ, KERN_DEBUG,
+			       "%s: oos pkt %hu len %d discarded (too old), "
+			       "waiting for %hu, reorder_q_len=%d\n",
+			       session->name, L2TP_SKB_CB(skb)->ns,
+			       L2TP_SKB_CB(skb)->length, session->nr,
+			       skb_queue_len(&session->reorder_q));
+			__skb_unlink(skb, &session->reorder_q);
+			kfree_skb(skb);
+			if (session->deref)
+				(*session->deref)(session);
+			continue;
+		}
+
+		if (L2TP_SKB_CB(skb)->has_seq) {
+			if (L2TP_SKB_CB(skb)->ns != session->nr) {
+				PRINTK(session->debug, L2TP_MSG_SEQ, KERN_DEBUG,
+				       "%s: holding oos pkt %hu len %d, "
+				       "waiting for %hu, reorder_q_len=%d\n",
+				       session->name, L2TP_SKB_CB(skb)->ns,
+				       L2TP_SKB_CB(skb)->length, session->nr,
+				       skb_queue_len(&session->reorder_q));
+				goto out;
+			}
+		}
+		__skb_unlink(skb, &session->reorder_q);
+
+		/* Process the skb. We release the queue lock while we
+		 * do so to let other contexts process the queue.
+		 */
+		spin_unlock_bh(&session->reorder_q.lock);
+		l2tp_recv_dequeue_skb(session, skb);
+		spin_lock_bh(&session->reorder_q.lock);
+	}
+
+out:
+	spin_unlock_bh(&session->reorder_q.lock);
+}
+
+static inline int l2tp_verify_udp_checksum(struct sock *sk,
+					   struct sk_buff *skb)
+{
+	struct udphdr *uh = udp_hdr(skb);
+	u16 ulen = ntohs(uh->len);
+	struct inet_sock *inet;
+	__wsum psum;
+
+	if (sk->sk_no_check || skb_csum_unnecessary(skb) || !uh->check)
+		return 0;
+
+	inet = inet_sk(sk);
+	psum = csum_tcpudp_nofold(inet->inet_saddr, inet->inet_daddr, ulen,
+				  IPPROTO_UDP, 0);
+
+	if ((skb->ip_summed == CHECKSUM_COMPLETE) &&
+	    !csum_fold(csum_add(psum, skb->csum)))
+		return 0;
+
+	skb->csum = psum;
+
+	return __skb_checksum_complete(skb);
+}
+
+/* Internal UDP receive frame. Do the real work of receiving an L2TP data frame
+ * here. The skb is not on a list when we get here.
+ * Returns 0 if the packet was a data packet and was successfully passed on.
+ * Returns 1 if the packet was not a good data packet and could not be
+ * forwarded.  All such packets are passed up to userspace to deal with.
+ */
+int l2tp_udp_recv_core(struct l2tp_tunnel *tunnel, struct sk_buff *skb,
+		       int (*payload_hook)(struct sk_buff *skb))
+{
+	struct l2tp_session *session = NULL;
+	unsigned char *ptr, *optr;
+	u16 hdrflags;
+	u32 tunnel_id, session_id;
+	int length;
+	int offset;
+	u16 version;
+	u16 ns, nr;
+
+	if (tunnel->sock && l2tp_verify_udp_checksum(tunnel->sock, skb))
+		goto discard_bad_csum;
+
+	/* UDP always verifies the packet length. */
+	__skb_pull(skb, sizeof(struct udphdr));
+
+	/* Short packet? */
+	if (!pskb_may_pull(skb, L2TP_HDR_SIZE_SEQ)) {
+		PRINTK(tunnel->debug, L2TP_MSG_DATA, KERN_INFO,
+		       "%s: recv short packet (len=%d)\n", tunnel->name, skb->len);
+		goto error;
+	}
+
+	/* Point to L2TP header */
+	optr = ptr = skb->data;
+
+	/* Trace packet contents, if enabled */
+	if (tunnel->debug & L2TP_MSG_DATA) {
+		length = min(32u, skb->len);
+		if (!pskb_may_pull(skb, length))
+			goto error;
+
+		printk(KERN_DEBUG "%s: recv: ", tunnel->name);
+
+		offset = 0;
+		do {
+			printk(" %02X", ptr[offset]);
+		} while (++offset < length);
+
+		printk("\n");
+	}
+
+	/* Get L2TP header flags */
+	hdrflags = ntohs(*(__be16 *)ptr);
+
+	/* Check protocol version */
+	version = hdrflags & L2TP_HDR_VER_MASK;
+	if (version != tunnel->version) {
+		PRINTK(tunnel->debug, L2TP_MSG_DATA, KERN_INFO,
+		       "%s: recv protocol version mismatch: got %d expected %d\n",
+		       tunnel->name, version, tunnel->version);
+		goto error;
+	}
+
+	/* Get length of L2TP packet */
+	length = skb->len;
+
+	/* If type is control packet, it is handled by userspace. */
+	if (hdrflags & L2TP_HDRFLAG_T) {
+		PRINTK(tunnel->debug, L2TP_MSG_DATA, KERN_DEBUG,
+		       "%s: recv control packet, len=%d\n", tunnel->name, length);
+		goto error;
+	}
+
+	/* Skip flags */
+	ptr += 2;
+
+	/* If length is present, skip it */
+	if (hdrflags & L2TP_HDRFLAG_L)
+		ptr += 2;
+
+	/* Extract tunnel and session ID */
+	tunnel_id = ntohs(*(__be16 *) ptr);
+	ptr += 2;
+	session_id = ntohs(*(__be16 *) ptr);
+	ptr += 2;
+
+	/* Find the session context */
+	session = l2tp_session_find(tunnel, session_id);
+	if (!session) {
+		/* Not found? Pass to userspace to deal with */
+		PRINTK(tunnel->debug, L2TP_MSG_DATA, KERN_INFO,
+		       "%s: no session found (%hu/%hu). Passing up.\n",
+		       tunnel->name, tunnel_id, session_id);
+		goto error;
+	}
+
+	/* The ref count is increased since we now hold a pointer to
+	 * the session. Take care to decrement the refcnt when exiting
+	 * this function from now on...
+	 */
+	l2tp_session_inc_refcount(session);
+	if (session->ref)
+		(*session->ref)(session);
+
+	/* Handle the optional sequence numbers. Sequence numbers are
+	 * in different places for L2TPv2 and L2TPv3.
+	 *
+	 * If we are the LAC, enable/disable sequence numbers under
+	 * the control of the LNS.  If no sequence numbers present but
+	 * we were expecting them, discard frame.
+	 */
+	ns = nr = 0;
+	L2TP_SKB_CB(skb)->has_seq = 0;
+	if (hdrflags & L2TP_HDRFLAG_S) {
+		ns = (u16) ntohs(*(__be16 *) ptr);
+		ptr += 2;
+		nr = ntohs(*(__be16 *) ptr);
+		ptr += 2;
+
+		/* Store L2TP info in the skb */
+		L2TP_SKB_CB(skb)->ns = ns;
+		L2TP_SKB_CB(skb)->has_seq = 1;
+
+		PRINTK(session->debug, L2TP_MSG_SEQ, KERN_DEBUG,
+		       "%s: recv data ns=%hu, nr=%hu, session nr=%hu\n",
+		       session->name, ns, nr, session->nr);
+	}
+
+	if (L2TP_SKB_CB(skb)->has_seq) {
+		/* Received a packet with sequence numbers. If we're the LNS,
+		 * check if we sre sending sequence numbers and if not,
+		 * configure it so.
+		 */
+		if ((!session->lns_mode) && (!session->send_seq)) {
+			PRINTK(session->debug, L2TP_MSG_SEQ, KERN_INFO,
+			       "%s: requested to enable seq numbers by LNS\n",
+			       session->name);
+			session->send_seq = -1;
+		}
+	} else {
+		/* No sequence numbers.
+		 * If user has configured mandatory sequence numbers, discard.
+		 */
+		if (session->recv_seq) {
+			PRINTK(session->debug, L2TP_MSG_SEQ, KERN_WARNING,
+			       "%s: recv data has no seq numbers when required. "
+			       "Discarding\n", session->name);
+			session->stats.rx_seq_discards++;
+			goto discard;
+		}
+
+		/* If we're the LAC and we're sending sequence numbers, the
+		 * LNS has requested that we no longer send sequence numbers.
+		 * If we're the LNS and we're sending sequence numbers, the
+		 * LAC is broken. Discard the frame.
+		 */
+		if ((!session->lns_mode) && (session->send_seq)) {
+			PRINTK(session->debug, L2TP_MSG_SEQ, KERN_INFO,
+			       "%s: requested to disable seq numbers by LNS\n",
+			       session->name);
+			session->send_seq = 0;
+		} else if (session->send_seq) {
+			PRINTK(session->debug, L2TP_MSG_SEQ, KERN_WARNING,
+			       "%s: recv data has no seq numbers when required. "
+			       "Discarding\n", session->name);
+			session->stats.rx_seq_discards++;
+			goto discard;
+		}
+	}
+
+	/* If offset bit set, skip it. */
+	if (hdrflags & L2TP_HDRFLAG_O) {
+		offset = ntohs(*(__be16 *)ptr);
+		ptr += 2 + offset;
+	}
+
+	offset = ptr - optr;
+	if (!pskb_may_pull(skb, offset))
+		goto discard;
+
+	__skb_pull(skb, offset);
+
+	/* If caller wants to process the payload before we queue the
+	 * packet, do so now.
+	 */
+	if (payload_hook)
+		if ((*payload_hook)(skb))
+			goto discard;
+
+	/* Prepare skb for adding to the session's reorder_q.  Hold
+	 * packets for max reorder_timeout or 1 second if not
+	 * reordering.
+	 */
+	L2TP_SKB_CB(skb)->length = length;
+	L2TP_SKB_CB(skb)->expires = jiffies +
+		(session->reorder_timeout ? session->reorder_timeout : HZ);
+
+	/* Add packet to the session's receive queue. Reordering is done here, if
+	 * enabled. Saved L2TP protocol info is stored in skb->sb[].
+	 */
+	if (L2TP_SKB_CB(skb)->has_seq) {
+		if (session->reorder_timeout != 0) {
+			/* Packet reordering enabled. Add skb to session's
+			 * reorder queue, in order of ns.
+			 */
+			l2tp_recv_queue_skb(session, skb);
+		} else {
+			/* Packet reordering disabled. Discard out-of-sequence
+			 * packets
+			 */
+			if (L2TP_SKB_CB(skb)->ns != session->nr) {
+				session->stats.rx_seq_discards++;
+				PRINTK(session->debug, L2TP_MSG_SEQ, KERN_DEBUG,
+				       "%s: oos pkt %hu len %d discarded, "
+				       "waiting for %hu, reorder_q_len=%d\n",
+				       session->name, L2TP_SKB_CB(skb)->ns,
+				       L2TP_SKB_CB(skb)->length, session->nr,
+				       skb_queue_len(&session->reorder_q));
+				goto discard;
+			}
+			skb_queue_tail(&session->reorder_q, skb);
+		}
+	} else {
+		/* No sequence numbers. Add the skb to the tail of the
+		 * reorder queue. This ensures that it will be
+		 * delivered after all previous sequenced skbs.
+		 */
+		skb_queue_tail(&session->reorder_q, skb);
+	}
+
+	/* Try to dequeue as many skbs from reorder_q as we can. */
+	l2tp_recv_dequeue(session);
+
+	l2tp_session_dec_refcount(session);
+
+	return 0;
+
+discard:
+	session->stats.rx_errors++;
+	kfree_skb(skb);
+
+	if (session->deref)
+		(*session->deref)(session);
+
+	l2tp_session_dec_refcount(session);
+
+	return 0;
+
+discard_bad_csum:
+	LIMIT_NETDEBUG("%s: UDP: bad checksum\n", tunnel->name);
+	UDP_INC_STATS_USER(tunnel->l2tp_net, UDP_MIB_INERRORS, 0);
+	tunnel->stats.rx_errors++;
+	kfree_skb(skb);
+
+	return 0;
+
+error:
+	/* Put UDP header back */
+	__skb_push(skb, sizeof(struct udphdr));
+
+	return 1;
+}
+EXPORT_SYMBOL_GPL(l2tp_udp_recv_core);
+
+/* UDP encapsulation receive handler. See net/ipv4/udp.c.
+ * Return codes:
+ * 0 : success.
+ * <0: error
+ * >0: skb should be passed up to userspace as UDP.
+ */
+int l2tp_udp_encap_recv(struct sock *sk, struct sk_buff *skb)
+{
+	struct l2tp_tunnel *tunnel;
+
+	tunnel = l2tp_sock_to_tunnel(sk);
+	if (tunnel == NULL)
+		goto pass_up;
+
+	PRINTK(tunnel->debug, L2TP_MSG_DATA, KERN_DEBUG,
+	       "%s: received %d bytes\n", tunnel->name, skb->len);
+
+	if (l2tp_udp_recv_core(tunnel, skb, tunnel->recv_payload_hook))
+		goto pass_up_put;
+
+	sock_put(sk);
+	return 0;
+
+pass_up_put:
+	sock_put(sk);
+pass_up:
+	return 1;
+}
+EXPORT_SYMBOL_GPL(l2tp_udp_encap_recv);
+
+/************************************************************************
+ * Transmit handling
+ ***********************************************************************/
+
+/* Build an L2TP header for the session into the buffer provided.
+ */
+static void l2tp_build_l2tpv2_header(struct l2tp_tunnel *tunnel,
+				     struct l2tp_session *session,
+				     void *buf)
+{
+	__be16 *bufp = buf;
+	u16 flags = L2TP_HDR_VER_2;
+	u32 tunnel_id = tunnel->peer_tunnel_id;
+	u32 session_id = session->peer_session_id;
+
+	if (session->send_seq)
+		flags |= L2TP_HDRFLAG_S;
+
+	/* Setup L2TP header. */
+	*bufp++ = htons(flags);
+	*bufp++ = htons(tunnel_id);
+	*bufp++ = htons(session_id);
+	if (session->send_seq) {
+		*bufp++ = htons(session->ns);
+		*bufp++ = 0;
+		session->ns++;
+		PRINTK(session->debug, L2TP_MSG_SEQ, KERN_DEBUG,
+		       "%s: updated ns to %hu\n", session->name, session->ns);
+	}
+}
+
+void l2tp_build_l2tp_header(struct l2tp_session *session, void *buf)
+{
+	struct l2tp_tunnel *tunnel = session->tunnel;
+
+	BUG_ON(tunnel->version != L2TP_HDR_VER_2);
+	l2tp_build_l2tpv2_header(tunnel, session, buf);
+}
+EXPORT_SYMBOL_GPL(l2tp_build_l2tp_header);
+
+int l2tp_xmit_core(struct l2tp_session *session, struct sk_buff *skb, size_t data_len)
+{
+	struct l2tp_tunnel *tunnel = session->tunnel;
+	unsigned int len = skb->len;
+	int error;
+
+	/* Debug */
+	if (session->send_seq)
+		PRINTK(session->debug, L2TP_MSG_DATA, KERN_DEBUG,
+		       "%s: send %Zd bytes, ns=%hu\n", session->name,
+		       data_len, session->ns - 1);
+	else
+		PRINTK(session->debug, L2TP_MSG_DATA, KERN_DEBUG,
+		       "%s: send %Zd bytes\n", session->name, data_len);
+
+	if (session->debug & L2TP_MSG_DATA) {
+		int i;
+		unsigned char *datap = skb->data + sizeof(struct udphdr);
+
+		printk(KERN_DEBUG "%s: xmit:", session->name);
+		for (i = 0; i < (len - sizeof(struct udphdr)); i++) {
+			printk(" %02X", *datap++);
+			if (i == 31) {
+				printk(" ...");
+				break;
+			}
+		}
+		printk("\n");
+	}
+
+	/* Queue the packet to IP for output */
+	error = ip_queue_xmit(skb, 1);
+
+	/* Update stats */
+	if (error >= 0) {
+		tunnel->stats.tx_packets++;
+		tunnel->stats.tx_bytes += len;
+		session->stats.tx_packets++;
+		session->stats.tx_bytes += len;
+	} else {
+		tunnel->stats.tx_errors++;
+		session->stats.tx_errors++;
+	}
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(l2tp_xmit_core);
+
+/* Automatically called when the skb is freed.
+ */
+static void l2tp_sock_wfree(struct sk_buff *skb)
+{
+	sock_put(skb->sk);
+}
+
+/* For data skbs that we transmit, we associate with the tunnel socket
+ * but don't do accounting.
+ */
+static inline void l2tp_skb_set_owner_w(struct sk_buff *skb, struct sock *sk)
+{
+	sock_hold(sk);
+	skb->sk = sk;
+	skb->destructor = l2tp_sock_wfree;
+}
+
+/* If caller requires the skb to have a ppp header, the header must be
+ * inserted in the skb data before calling this function.
+ */
+int l2tp_xmit_skb(struct l2tp_session *session, struct sk_buff *skb, int hdr_len)
+{
+	int data_len = skb->len;
+	struct sock *sk = session->tunnel->sock;
+	struct udphdr *uh;
+	unsigned int udp_len;
+	struct inet_sock *inet;
+	__wsum csum;
+	int old_headroom;
+	int new_headroom;
+	int headroom;
+
+	/* Check that there's enough headroom in the skb to insert IP,
+	 * UDP and L2TP headers. If not enough, expand it to
+	 * make room. Adjust truesize.
+	 */
+	headroom = NET_SKB_PAD + sizeof(struct iphdr) +
+		sizeof(struct udphdr) + hdr_len;
+	old_headroom = skb_headroom(skb);
+	if (skb_cow_head(skb, headroom))
+		goto abort;
+
+	new_headroom = skb_headroom(skb);
+	skb_orphan(skb);
+	skb->truesize += new_headroom - old_headroom;
+
+	/* Setup L2TP header */
+	l2tp_build_l2tp_header(session, __skb_push(skb, hdr_len));
+	udp_len = sizeof(struct udphdr) + hdr_len + data_len;
+
+	/* Setup UDP header */
+	inet = inet_sk(sk);
+	__skb_push(skb, sizeof(*uh));
+	skb_reset_transport_header(skb);
+	uh = udp_hdr(skb);
+	uh->source = inet->inet_sport;
+	uh->dest = inet->inet_dport;
+	uh->len = htons(udp_len);
+
+	uh->check = 0;
+
+	memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
+	IPCB(skb)->flags &= ~(IPSKB_XFRM_TUNNEL_SIZE | IPSKB_XFRM_TRANSFORMED |
+			      IPSKB_REROUTED);
+	nf_reset(skb);
+
+	/* Get routing info from the tunnel socket */
+	skb_dst_drop(skb);
+	skb_dst_set(skb, dst_clone(__sk_dst_get(sk)));
+	l2tp_skb_set_owner_w(skb, sk);
+
+	/* Calculate UDP checksum if configured to do so */
+	if (sk->sk_no_check == UDP_CSUM_NOXMIT)
+		skb->ip_summed = CHECKSUM_NONE;
+	else if ((skb_dst(skb) && skb_dst(skb)->dev) &&
+		 (!(skb_dst(skb)->dev->features & NETIF_F_V4_CSUM))) {
+		skb->ip_summed = CHECKSUM_COMPLETE;
+		csum = skb_checksum(skb, 0, udp_len, 0);
+		uh->check = csum_tcpudp_magic(inet->inet_saddr,
+					      inet->inet_daddr,
+					      udp_len, IPPROTO_UDP, csum);
+		if (uh->check == 0)
+			uh->check = CSUM_MANGLED_0;
+	} else {
+		skb->ip_summed = CHECKSUM_PARTIAL;
+		skb->csum_start = skb_transport_header(skb) - skb->head;
+		skb->csum_offset = offsetof(struct udphdr, check);
+		uh->check = ~csum_tcpudp_magic(inet->inet_saddr,
+					       inet->inet_daddr,
+					       udp_len, IPPROTO_UDP, 0);
+	}
+
+	l2tp_xmit_core(session, skb, data_len);
+
+abort:
+	return 0;
+}
+EXPORT_SYMBOL_GPL(l2tp_xmit_skb);
+
+/*****************************************************************************
+ * Tinnel and session create/destroy.
+ *****************************************************************************/
+
+/* Tunnel socket destruct hook.
+ * The tunnel context is deleted only when all session sockets have been
+ * closed.
+ */
+void l2tp_tunnel_destruct(struct sock *sk)
+{
+	struct l2tp_tunnel *tunnel;
+
+	tunnel = sk->sk_user_data;
+	if (tunnel == NULL)
+		goto end;
+
+	PRINTK(tunnel->debug, L2TP_MSG_CONTROL, KERN_INFO,
+	       "%s: closing...\n", tunnel->name);
+
+	/* Close all sessions */
+	l2tp_tunnel_closeall(tunnel);
+
+	/* No longer an encapsulation socket. See net/ipv4/udp.c */
+	(udp_sk(sk))->encap_type = 0;
+	(udp_sk(sk))->encap_rcv = NULL;
+
+	/* Remove hooks into tunnel socket */
+	tunnel->sock = NULL;
+	sk->sk_destruct = tunnel->old_sk_destruct;
+	sk->sk_user_data = NULL;
+
+	/* Call the original destructor */
+	if (sk->sk_destruct)
+		(*sk->sk_destruct)(sk);
+
+	/* We're finished with the socket */
+	l2tp_tunnel_dec_refcount(tunnel);
+
+end:
+	return;
+}
+EXPORT_SYMBOL(l2tp_tunnel_destruct);
+
+/* When the tunnel is closed, all the attached sessions need to go too.
+ */
+void l2tp_tunnel_closeall(struct l2tp_tunnel *tunnel)
+{
+	int hash;
+	struct hlist_node *walk;
+	struct hlist_node *tmp;
+	struct l2tp_session *session;
+
+	BUG_ON(tunnel == NULL);
+
+	PRINTK(tunnel->debug, L2TP_MSG_CONTROL, KERN_INFO,
+	       "%s: closing all sessions...\n", tunnel->name);
+
+	write_lock_bh(&tunnel->hlist_lock);
+	for (hash = 0; hash < L2TP_HASH_SIZE; hash++) {
+again:
+		hlist_for_each_safe(walk, tmp, &tunnel->session_hlist[hash]) {
+			session = hlist_entry(walk, struct l2tp_session, hlist);
+
+			PRINTK(session->debug, L2TP_MSG_CONTROL, KERN_INFO,
+			       "%s: closing session\n", session->name);
+
+			hlist_del_init(&session->hlist);
+
+			/* Since we should hold the sock lock while
+			 * doing any unbinding, we need to release the
+			 * lock we're holding before taking that lock.
+			 * Hold a reference to the sock so it doesn't
+			 * disappear as we're jumping between locks.
+			 */
+			if (session->ref != NULL)
+				(*session->ref)(session);
+
+			write_unlock_bh(&tunnel->hlist_lock);
+
+			if (session->session_close != NULL)
+				(*session->session_close)(session);
+
+			if (session->deref != NULL)
+				(*session->deref)(session);
+
+			write_lock_bh(&tunnel->hlist_lock);
+
+			/* Now restart from the beginning of this hash
+			 * chain.  We always remove a session from the
+			 * list so we are guaranteed to make forward
+			 * progress.
+			 */
+			goto again;
+		}
+	}
+	write_unlock_bh(&tunnel->hlist_lock);
+}
+EXPORT_SYMBOL_GPL(l2tp_tunnel_closeall);
+
+/* Really kill the tunnel.
+ * Come here only when all sessions have been cleared from the tunnel.
+ */
+void l2tp_tunnel_free(struct l2tp_tunnel *tunnel)
+{
+	struct l2tp_net *pn = l2tp_pernet(tunnel->l2tp_net);
+
+	BUG_ON(atomic_read(&tunnel->ref_count) != 0);
+	BUG_ON(tunnel->sock != NULL);
+
+	PRINTK(tunnel->debug, L2TP_MSG_CONTROL, KERN_INFO,
+	       "%s: free...\n", tunnel->name);
+
+	/* Remove from tunnel list */
+	write_lock_bh(&pn->l2tp_tunnel_list_lock);
+	list_del_init(&tunnel->list);
+	write_unlock_bh(&pn->l2tp_tunnel_list_lock);
+
+	atomic_dec(&l2tp_tunnel_count);
+	kfree(tunnel);
+}
+EXPORT_SYMBOL_GPL(l2tp_tunnel_free);
+
+int l2tp_tunnel_create(struct net *net, int fd, int version, u32 tunnel_id, u32 peer_tunnel_id, struct l2tp_tunnel_cfg *cfg, struct l2tp_tunnel **tunnelp)
+{
+	struct l2tp_tunnel *tunnel = NULL;
+	int err;
+	struct socket *sock = NULL;
+	struct sock *sk = NULL;
+	struct l2tp_net *pn;
+
+	/* Get the tunnel socket from the fd, which was opened by
+	 * the userspace L2TP daemon.
+	 */
+	err = -EBADF;
+	sock = sockfd_lookup(fd, &err);
+	if (!sock) {
+		printk(KERN_ERR "tunl %hu: sockfd_lookup(fd=%d) returned %d\n",
+		       tunnel_id, fd, err);
+		goto err;
+	}
+
+	sk = sock->sk;
+
+	/* Quick sanity checks */
+	err = -EPROTONOSUPPORT;
+	if (sk->sk_protocol != IPPROTO_UDP) {
+		printk(KERN_ERR "tunl %hu: fd %d wrong protocol, got %d, expected %d\n",
+		       tunnel_id, fd, sk->sk_protocol, IPPROTO_UDP);
+		goto err;
+	}
+	err = -EAFNOSUPPORT;
+	if (sock->ops->family != AF_INET) {
+		printk(KERN_ERR "tunl %hu: fd %d wrong family, got %d, expected %d\n",
+		       tunnel_id, fd, sock->ops->family, AF_INET);
+		goto err;
+	}
+
+	/* Check if this socket has already been prepped */
+	tunnel = (struct l2tp_tunnel *)sk->sk_user_data;
+	if (tunnel != NULL) {
+		/* This socket has already been prepped */
+		err = -EBUSY;
+		goto err;
+	}
+
+	if (version != L2TP_HDR_VER_2)
+		goto err;
+
+	tunnel = kzalloc(sizeof(struct l2tp_tunnel), GFP_KERNEL);
+	if (tunnel == NULL) {
+		err = -ENOMEM;
+		goto err;
+	}
+
+	tunnel->version = version;
+	tunnel->tunnel_id = tunnel_id;
+	tunnel->peer_tunnel_id = peer_tunnel_id;
+	tunnel->debug = L2TP_DEFAULT_DEBUG_FLAGS;
+
+	tunnel->magic = L2TP_TUNNEL_MAGIC;
+	sprintf(&tunnel->name[0], "tunl %u", tunnel_id);
+	rwlock_init(&tunnel->hlist_lock);
+
+	/* The net we belong to */
+	tunnel->l2tp_net = net;
+	pn = l2tp_pernet(net);
+
+	if (cfg)
+		tunnel->debug = cfg->debug;
+
+	/* Mark socket as an encapsulation socket. See net/ipv4/udp.c */
+	udp_sk(sk)->encap_type = UDP_ENCAP_L2TPINUDP;
+	udp_sk(sk)->encap_rcv = l2tp_udp_encap_recv;
+
+	sk->sk_user_data = tunnel;
+
+	/* Hook on the tunnel socket destructor so that we can cleanup
+	 * if the tunnel socket goes away.
+	 */
+	tunnel->old_sk_destruct = sk->sk_destruct;
+	sk->sk_destruct = &l2tp_tunnel_destruct;
+	tunnel->sock = sk;
+	sk->sk_allocation = GFP_ATOMIC;
+
+	/* Add tunnel to our list */
+	INIT_LIST_HEAD(&tunnel->list);
+	write_lock_bh(&pn->l2tp_tunnel_list_lock);
+	list_add(&tunnel->list, &pn->l2tp_tunnel_list);
+	write_unlock_bh(&pn->l2tp_tunnel_list_lock);
+	atomic_inc(&l2tp_tunnel_count);
+
+	/* Bump the reference count. The tunnel context is deleted
+	 * only when this drops to zero.
+	 */
+	l2tp_tunnel_inc_refcount(tunnel);
+
+	err = 0;
+err:
+	if (tunnelp)
+		*tunnelp = tunnel;
+
+	if (sock)
+		sockfd_put(sock);
+
+	return err;
+}
+EXPORT_SYMBOL_GPL(l2tp_tunnel_create);
+
+/* Really kill the session.
+ */
+void l2tp_session_free(struct l2tp_session *session)
+{
+	struct l2tp_tunnel *tunnel;
+
+	BUG_ON(atomic_read(&session->ref_count) != 0);
+
+	tunnel = session->tunnel;
+	if (tunnel != NULL) {
+		BUG_ON(tunnel->magic != L2TP_TUNNEL_MAGIC);
+
+		/* Delete the session from the hash */
+		write_lock_bh(&tunnel->hlist_lock);
+		hlist_del_init(&session->hlist);
+		write_unlock_bh(&tunnel->hlist_lock);
+
+		if (session->session_id != 0)
+			atomic_dec(&l2tp_session_count);
+
+		sock_put(tunnel->sock);
+
+		/* This will delete the tunnel context if this
+		 * is the last session on the tunnel.
+		 */
+		session->tunnel = NULL;
+		l2tp_tunnel_dec_refcount(tunnel);
+	}
+
+	kfree(session);
+
+	return;
+}
+EXPORT_SYMBOL_GPL(l2tp_session_free);
+
+struct l2tp_session *l2tp_session_create(int priv_size, struct l2tp_tunnel *tunnel, u32 session_id, u32 peer_session_id, struct l2tp_session_cfg *cfg)
+{
+	struct l2tp_session *session;
+
+	session = kzalloc(sizeof(struct l2tp_session) + priv_size, GFP_KERNEL);
+	if (session != NULL) {
+		session->magic = L2TP_SESSION_MAGIC;
+		session->tunnel = tunnel;
+
+		session->session_id = session_id;
+		session->peer_session_id = peer_session_id;
+
+		sprintf(&session->name[0], "sess %u/%u",
+			tunnel->tunnel_id, session->session_id);
+
+		skb_queue_head_init(&session->reorder_q);
+
+		INIT_HLIST_NODE(&session->hlist);
+
+		/* Inherit debug options from tunnel */
+		session->debug = tunnel->debug;
+
+		if (cfg) {
+			session->debug = cfg->debug;
+			session->hdr_len = cfg->hdr_len;
+			session->mtu = cfg->mtu;
+			session->mru = cfg->mru;
+			session->send_seq = cfg->send_seq;
+			session->recv_seq = cfg->recv_seq;
+			session->lns_mode = cfg->lns_mode;
+		}
+
+		/* Bump the reference count. The session context is deleted
+		 * only when this drops to zero.
+		 */
+		l2tp_session_inc_refcount(session);
+		l2tp_tunnel_inc_refcount(tunnel);
+
+		/* Ensure tunnel socket isn't deleted */
+		sock_hold(tunnel->sock);
+
+		/* Add session to the tunnel's hash list */
+		write_lock_bh(&tunnel->hlist_lock);
+		hlist_add_head(&session->hlist,
+			       l2tp_session_id_hash(tunnel, session_id));
+		write_unlock_bh(&tunnel->hlist_lock);
+
+		/* Ignore management session in session count value */
+		if (session->session_id != 0)
+			atomic_inc(&l2tp_session_count);
+	}
+
+	return session;
+}
+EXPORT_SYMBOL_GPL(l2tp_session_create);
+
+/*****************************************************************************
+ * Init and cleanup
+ *****************************************************************************/
+
+static __net_init int l2tp_init_net(struct net *net)
+{
+	struct l2tp_net *pn;
+	int err;
+
+	pn = kzalloc(sizeof(*pn), GFP_KERNEL);
+	if (!pn)
+		return -ENOMEM;
+
+	INIT_LIST_HEAD(&pn->l2tp_tunnel_list);
+	rwlock_init(&pn->l2tp_tunnel_list_lock);
+
+	err = net_assign_generic(net, l2tp_net_id, pn);
+	if (err)
+		goto out;
+
+	return 0;
+
+out:
+	kfree(pn);
+	return err;
+}
+
+static __net_exit void l2tp_exit_net(struct net *net)
+{
+	struct l2tp_net *pn;
+
+	pn = net_generic(net, l2tp_net_id);
+	/*
+	 * if someone has cached our net then
+	 * further net_generic call will return NULL
+	 */
+	net_assign_generic(net, l2tp_net_id, NULL);
+	kfree(pn);
+}
+
+static struct pernet_operations l2tp_net_ops = {
+	.init = l2tp_init_net,
+	.exit = l2tp_exit_net,
+	.id   = &l2tp_net_id,
+	.size = sizeof(struct l2tp_net),
+};
+
+static int __init l2tp_init(void)
+{
+	int rc = 0;
+
+	rc = register_pernet_device(&l2tp_net_ops);
+	if (rc)
+		goto out;
+
+	printk(KERN_INFO "L2TP core driver, %s\n", L2TP_DRV_VERSION);
+
+out:
+	return rc;
+}
+
+static void __exit l2tp_exit(void)
+{
+	unregister_pernet_device(&l2tp_net_ops);
+}
+
+module_init(l2tp_init);
+module_exit(l2tp_exit);
+
+MODULE_AUTHOR("James Chapman <jchapman@katalix.com>");
+MODULE_DESCRIPTION("L2TP core");
+MODULE_LICENSE("GPL");
+MODULE_VERSION(L2TP_DRV_VERSION);
+
diff --git a/net/l2tp/l2tp_core.h b/net/l2tp/l2tp_core.h
new file mode 100644
index 000000000000..2efe1a3ada98
--- /dev/null
+++ b/net/l2tp/l2tp_core.h
@@ -0,0 +1,254 @@
+/*
+ * L2TP internal definitions.
+ *
+ * Copyright (c) 2008,2009 Katalix Systems Ltd
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#ifndef _L2TP_CORE_H_
+#define _L2TP_CORE_H_
+
+/* Just some random numbers */
+#define L2TP_TUNNEL_MAGIC	0x42114DDA
+#define L2TP_SESSION_MAGIC	0x0C04EB7D
+
+#define L2TP_HASH_BITS	4
+#define L2TP_HASH_SIZE	(1 << L2TP_HASH_BITS)
+
+/* Debug message categories for the DEBUG socket option */
+enum {
+	L2TP_MSG_DEBUG		= (1 << 0),	/* verbose debug (if
+						 * compiled in) */
+	L2TP_MSG_CONTROL	= (1 << 1),	/* userspace - kernel
+						 * interface */
+	L2TP_MSG_SEQ		= (1 << 2),	/* sequence numbers */
+	L2TP_MSG_DATA		= (1 << 3),	/* data packets */
+};
+
+struct sk_buff;
+
+struct l2tp_stats {
+	u64			tx_packets;
+	u64			tx_bytes;
+	u64			tx_errors;
+	u64			rx_packets;
+	u64			rx_bytes;
+	u64			rx_seq_discards;
+	u64			rx_oos_packets;
+	u64			rx_errors;
+};
+
+struct l2tp_tunnel;
+
+/* Describes a session. Contains information to determine incoming
+ * packets and transmit outgoing ones.
+ */
+struct l2tp_session_cfg {
+	unsigned		data_seq:2;	/* data sequencing level
+						 * 0 => none, 1 => IP only,
+						 * 2 => all
+						 */
+	unsigned		recv_seq:1;	/* expect receive packets with
+						 * sequence numbers? */
+	unsigned		send_seq:1;	/* send packets with sequence
+						 * numbers? */
+	unsigned		lns_mode:1;	/* behave as LNS? LAC enables
+						 * sequence numbers under
+						 * control of LNS. */
+	int			debug;		/* bitmask of debug message
+						 * categories */
+	int			offset;		/* offset to payload */
+	int			reorder_timeout; /* configured reorder timeout
+						  * (in jiffies) */
+	int			mtu;
+	int			mru;
+	int			hdr_len;
+};
+
+struct l2tp_session {
+	int			magic;		/* should be
+						 * L2TP_SESSION_MAGIC */
+
+	struct l2tp_tunnel	*tunnel;	/* back pointer to tunnel
+						 * context */
+	u32			session_id;
+	u32			peer_session_id;
+	u16			nr;		/* session NR state (receive) */
+	u16			ns;		/* session NR state (send) */
+	struct sk_buff_head	reorder_q;	/* receive reorder queue */
+	struct hlist_node	hlist;		/* Hash list node */
+	atomic_t		ref_count;
+
+	char			name[32];	/* for logging */
+	unsigned		data_seq:2;	/* data sequencing level
+						 * 0 => none, 1 => IP only,
+						 * 2 => all
+						 */
+	unsigned		recv_seq:1;	/* expect receive packets with
+						 * sequence numbers? */
+	unsigned		send_seq:1;	/* send packets with sequence
+						 * numbers? */
+	unsigned		lns_mode:1;	/* behave as LNS? LAC enables
+						 * sequence numbers under
+						 * control of LNS. */
+	int			debug;		/* bitmask of debug message
+						 * categories */
+	int			reorder_timeout; /* configured reorder timeout
+						  * (in jiffies) */
+	int			mtu;
+	int			mru;
+	int			hdr_len;
+	struct l2tp_stats	stats;
+
+	void (*recv_skb)(struct l2tp_session *session, struct sk_buff *skb, int data_len);
+	void (*session_close)(struct l2tp_session *session);
+	void (*ref)(struct l2tp_session *session);
+	void (*deref)(struct l2tp_session *session);
+
+	uint8_t			priv[0];	/* private data */
+};
+
+/* Describes the tunnel. It contains info to track all the associated
+ * sessions so incoming packets can be sorted out
+ */
+struct l2tp_tunnel_cfg {
+	int			debug;		/* bitmask of debug message
+						 * categories */
+};
+
+struct l2tp_tunnel {
+	int			magic;		/* Should be L2TP_TUNNEL_MAGIC */
+	rwlock_t		hlist_lock;	/* protect session_hlist */
+	struct hlist_head	session_hlist[L2TP_HASH_SIZE];
+						/* hashed list of sessions,
+						 * hashed by id */
+	u32			tunnel_id;
+	u32			peer_tunnel_id;
+	int			version;	/* 2=>L2TPv2, 3=>L2TPv3 */
+
+	char			name[20];	/* for logging */
+	int			debug;		/* bitmask of debug message
+						 * categories */
+	int			hdr_len;
+	struct l2tp_stats	stats;
+
+	struct list_head	list;		/* Keep a list of all tunnels */
+	struct net		*l2tp_net;	/* the net we belong to */
+
+	atomic_t		ref_count;
+
+	int (*recv_payload_hook)(struct sk_buff *skb);
+	void (*old_sk_destruct)(struct sock *);
+	struct sock		*sock;		/* Parent socket */
+	int			fd;
+
+	uint8_t			priv[0];	/* private data */
+};
+
+static inline void *l2tp_tunnel_priv(struct l2tp_tunnel *tunnel)
+{
+	return &tunnel->priv[0];
+}
+
+static inline void *l2tp_session_priv(struct l2tp_session *session)
+{
+	return &session->priv[0];
+}
+
+static inline struct l2tp_tunnel *l2tp_sock_to_tunnel(struct sock *sk)
+{
+	struct l2tp_tunnel *tunnel;
+
+	if (sk == NULL)
+		return NULL;
+
+	sock_hold(sk);
+	tunnel = (struct l2tp_tunnel *)(sk->sk_user_data);
+	if (tunnel == NULL) {
+		sock_put(sk);
+		goto out;
+	}
+
+	BUG_ON(tunnel->magic != L2TP_TUNNEL_MAGIC);
+
+out:
+	return tunnel;
+}
+
+extern struct l2tp_session *l2tp_session_find(struct l2tp_tunnel *tunnel, u32 session_id);
+extern struct l2tp_session *l2tp_session_find_nth(struct l2tp_tunnel *tunnel, int nth);
+extern struct l2tp_tunnel *l2tp_tunnel_find(struct net *net, u32 tunnel_id);
+extern struct l2tp_tunnel *l2tp_tunnel_find_nth(struct net *net, int nth);
+
+extern int l2tp_tunnel_create(struct net *net, int fd, int version, u32 tunnel_id, u32 peer_tunnel_id, struct l2tp_tunnel_cfg *cfg, struct l2tp_tunnel **tunnelp);
+extern struct l2tp_session *l2tp_session_create(int priv_size, struct l2tp_tunnel *tunnel, u32 session_id, u32 peer_session_id, struct l2tp_session_cfg *cfg);
+extern void l2tp_tunnel_free(struct l2tp_tunnel *tunnel);
+extern void l2tp_session_free(struct l2tp_session *session);
+extern int l2tp_udp_recv_core(struct l2tp_tunnel *tunnel, struct sk_buff *skb, int (*payload_hook)(struct sk_buff *skb));
+extern int l2tp_udp_encap_recv(struct sock *sk, struct sk_buff *skb);
+
+extern void l2tp_build_l2tp_header(struct l2tp_session *session, void *buf);
+extern int l2tp_xmit_core(struct l2tp_session *session, struct sk_buff *skb, size_t data_len);
+extern int l2tp_xmit_skb(struct l2tp_session *session, struct sk_buff *skb, int hdr_len);
+extern void l2tp_tunnel_destruct(struct sock *sk);
+extern void l2tp_tunnel_closeall(struct l2tp_tunnel *tunnel);
+
+/* Tunnel reference counts. Incremented per session that is added to
+ * the tunnel.
+ */
+static inline void l2tp_tunnel_inc_refcount_1(struct l2tp_tunnel *tunnel)
+{
+	atomic_inc(&tunnel->ref_count);
+}
+
+static inline void l2tp_tunnel_dec_refcount_1(struct l2tp_tunnel *tunnel)
+{
+	if (atomic_dec_and_test(&tunnel->ref_count))
+		l2tp_tunnel_free(tunnel);
+}
+#ifdef L2TP_REFCNT_DEBUG
+#define l2tp_tunnel_inc_refcount(_t) do { \
+		printk(KERN_DEBUG "l2tp_tunnel_inc_refcount: %s:%d %s: cnt=%d\n", __func__, __LINE__, (_t)->name, atomic_read(&_t->ref_count)); \
+		l2tp_tunnel_inc_refcount_1(_t);				\
+	} while (0)
+#define l2tp_tunnel_dec_refcount(_t) do { \
+		printk(KERN_DEBUG "l2tp_tunnel_dec_refcount: %s:%d %s: cnt=%d\n", __func__, __LINE__, (_t)->name, atomic_read(&_t->ref_count)); \
+		l2tp_tunnel_dec_refcount_1(_t);				\
+	} while (0)
+#else
+#define l2tp_tunnel_inc_refcount(t) l2tp_tunnel_inc_refcount_1(t)
+#define l2tp_tunnel_dec_refcount(t) l2tp_tunnel_dec_refcount_1(t)
+#endif
+
+/* Session reference counts. Incremented when code obtains a reference
+ * to a session.
+ */
+static inline void l2tp_session_inc_refcount_1(struct l2tp_session *session)
+{
+	atomic_inc(&session->ref_count);
+}
+
+static inline void l2tp_session_dec_refcount_1(struct l2tp_session *session)
+{
+	if (atomic_dec_and_test(&session->ref_count))
+		l2tp_session_free(session);
+}
+
+#ifdef L2TP_REFCNT_DEBUG
+#define l2tp_session_inc_refcount(_s) do { \
+		printk(KERN_DEBUG "l2tp_session_inc_refcount: %s:%d %s: cnt=%d\n", __func__, __LINE__, (_s)->name, atomic_read(&_s->ref_count)); \
+		l2tp_session_inc_refcount_1(_s);				\
+	} while (0)
+#define l2tp_session_dec_refcount(_s) do { \
+		printk(KERN_DEBUG "l2tp_session_dec_refcount: %s:%d %s: cnt=%d\n", __func__, __LINE__, (_s)->name, atomic_read(&_s->ref_count)); \
+		l2tp_session_dec_refcount_1(_s);				\
+	} while (0)
+#else
+#define l2tp_session_inc_refcount(s) l2tp_session_inc_refcount_1(s)
+#define l2tp_session_dec_refcount(s) l2tp_session_dec_refcount_1(s)
+#endif
+
+#endif /* _L2TP_CORE_H_ */
diff --git a/net/l2tp/l2tp_ppp.c b/net/l2tp/l2tp_ppp.c
new file mode 100644
index 000000000000..baac072761aa
--- /dev/null
+++ b/net/l2tp/l2tp_ppp.c
@@ -0,0 +1,1658 @@
+/*****************************************************************************
+ * Linux PPP over L2TP (PPPoX/PPPoL2TP) Sockets
+ *
+ * PPPoX    --- Generic PPP encapsulation socket family
+ * PPPoL2TP --- PPP over L2TP (RFC 2661)
+ *
+ * Version:	2.0.0
+ *
+ * Authors:	James Chapman (jchapman@katalix.com)
+ *
+ * Based on original work by Martijn van Oosterhout <kleptog@svana.org>
+ *
+ * License:
+ *		This program is free software; you can redistribute it and/or
+ *		modify it under the terms of the GNU General Public License
+ *		as published by the Free Software Foundation; either version
+ *		2 of the License, or (at your option) any later version.
+ *
+ */
+
+/* This driver handles only L2TP data frames; control frames are handled by a
+ * userspace application.
+ *
+ * To send data in an L2TP session, userspace opens a PPPoL2TP socket and
+ * attaches it to a bound UDP socket with local tunnel_id / session_id and
+ * peer tunnel_id / session_id set. Data can then be sent or received using
+ * regular socket sendmsg() / recvmsg() calls. Kernel parameters of the socket
+ * can be read or modified using ioctl() or [gs]etsockopt() calls.
+ *
+ * When a PPPoL2TP socket is connected with local and peer session_id values
+ * zero, the socket is treated as a special tunnel management socket.
+ *
+ * Here's example userspace code to create a socket for sending/receiving data
+ * over an L2TP session:-
+ *
+ *	struct sockaddr_pppol2tp sax;
+ *	int fd;
+ *	int session_fd;
+ *
+ *	fd = socket(AF_PPPOX, SOCK_DGRAM, PX_PROTO_OL2TP);
+ *
+ *	sax.sa_family = AF_PPPOX;
+ *	sax.sa_protocol = PX_PROTO_OL2TP;
+ *	sax.pppol2tp.fd = tunnel_fd;	// bound UDP socket
+ *	sax.pppol2tp.addr.sin_addr.s_addr = addr->sin_addr.s_addr;
+ *	sax.pppol2tp.addr.sin_port = addr->sin_port;
+ *	sax.pppol2tp.addr.sin_family = AF_INET;
+ *	sax.pppol2tp.s_tunnel  = tunnel_id;
+ *	sax.pppol2tp.s_session = session_id;
+ *	sax.pppol2tp.d_tunnel  = peer_tunnel_id;
+ *	sax.pppol2tp.d_session = peer_session_id;
+ *
+ *	session_fd = connect(fd, (struct sockaddr *)&sax, sizeof(sax));
+ *
+ * A pppd plugin that allows PPP traffic to be carried over L2TP using
+ * this driver is available from the OpenL2TP project at
+ * http://openl2tp.sourceforge.net.
+ */
+
+#include <linux/module.h>
+#include <linux/string.h>
+#include <linux/list.h>
+#include <linux/uaccess.h>
+
+#include <linux/kernel.h>
+#include <linux/spinlock.h>
+#include <linux/kthread.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/errno.h>
+#include <linux/jiffies.h>
+
+#include <linux/netdevice.h>
+#include <linux/net.h>
+#include <linux/inetdevice.h>
+#include <linux/skbuff.h>
+#include <linux/init.h>
+#include <linux/ip.h>
+#include <linux/udp.h>
+#include <linux/if_pppox.h>
+#include <linux/if_pppol2tp.h>
+#include <net/sock.h>
+#include <linux/ppp_channel.h>
+#include <linux/ppp_defs.h>
+#include <linux/if_ppp.h>
+#include <linux/file.h>
+#include <linux/hash.h>
+#include <linux/sort.h>
+#include <linux/proc_fs.h>
+#include <linux/nsproxy.h>
+#include <net/net_namespace.h>
+#include <net/netns/generic.h>
+#include <net/dst.h>
+#include <net/ip.h>
+#include <net/udp.h>
+#include <net/xfrm.h>
+
+#include <asm/byteorder.h>
+#include <asm/atomic.h>
+
+#include "l2tp_core.h"
+
+#define PPPOL2TP_DRV_VERSION	"V2.0"
+
+/* Space for UDP, L2TP and PPP headers */
+#define PPPOL2TP_HEADER_OVERHEAD	40
+
+#define PRINTK(_mask, _type, _lvl, _fmt, args...)			\
+	do {								\
+		if ((_mask) & (_type))					\
+			printk(_lvl "PPPOL2TP: " _fmt, ##args);		\
+	} while (0)
+
+/* Number of bytes to build transmit L2TP headers.
+ * Unfortunately the size is different depending on whether sequence numbers
+ * are enabled.
+ */
+#define PPPOL2TP_L2TP_HDR_SIZE_SEQ		10
+#define PPPOL2TP_L2TP_HDR_SIZE_NOSEQ		6
+
+/* Private data of each session. This data lives at the end of struct
+ * l2tp_session, referenced via session->priv[].
+ */
+struct pppol2tp_session {
+	int			owner;		/* pid that opened the socket */
+
+	struct sock		*sock;		/* Pointer to the session
+						 * PPPoX socket */
+	struct sock		*tunnel_sock;	/* Pointer to the tunnel UDP
+						 * socket */
+	int			flags;		/* accessed by PPPIOCGFLAGS.
+						 * Unused. */
+};
+
+static int pppol2tp_xmit(struct ppp_channel *chan, struct sk_buff *skb);
+
+static struct ppp_channel_ops pppol2tp_chan_ops = { pppol2tp_xmit , NULL };
+static const struct proto_ops pppol2tp_ops;
+
+/* Helpers to obtain tunnel/session contexts from sockets.
+ */
+static inline struct l2tp_session *pppol2tp_sock_to_session(struct sock *sk)
+{
+	struct l2tp_session *session;
+
+	if (sk == NULL)
+		return NULL;
+
+	sock_hold(sk);
+	session = (struct l2tp_session *)(sk->sk_user_data);
+	if (session == NULL) {
+		sock_put(sk);
+		goto out;
+	}
+
+	BUG_ON(session->magic != L2TP_SESSION_MAGIC);
+
+out:
+	return session;
+}
+
+/*****************************************************************************
+ * Receive data handling
+ *****************************************************************************/
+
+static int pppol2tp_recv_payload_hook(struct sk_buff *skb)
+{
+	/* Skip PPP header, if present.	 In testing, Microsoft L2TP clients
+	 * don't send the PPP header (PPP header compression enabled), but
+	 * other clients can include the header. So we cope with both cases
+	 * here. The PPP header is always FF03 when using L2TP.
+	 *
+	 * Note that skb->data[] isn't dereferenced from a u16 ptr here since
+	 * the field may be unaligned.
+	 */
+	if (!pskb_may_pull(skb, 2))
+		return 1;
+
+	if ((skb->data[0] == 0xff) && (skb->data[1] == 0x03))
+		skb_pull(skb, 2);
+
+	return 0;
+}
+
+/* Receive message. This is the recvmsg for the PPPoL2TP socket.
+ */
+static int pppol2tp_recvmsg(struct kiocb *iocb, struct socket *sock,
+			    struct msghdr *msg, size_t len,
+			    int flags)
+{
+	int err;
+	struct sk_buff *skb;
+	struct sock *sk = sock->sk;
+
+	err = -EIO;
+	if (sk->sk_state & PPPOX_BOUND)
+		goto end;
+
+	msg->msg_namelen = 0;
+
+	err = 0;
+	skb = skb_recv_datagram(sk, flags & ~MSG_DONTWAIT,
+				flags & MSG_DONTWAIT, &err);
+	if (!skb)
+		goto end;
+
+	if (len > skb->len)
+		len = skb->len;
+	else if (len < skb->len)
+		msg->msg_flags |= MSG_TRUNC;
+
+	err = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, len);
+	if (likely(err == 0))
+		err = len;
+
+	kfree_skb(skb);
+end:
+	return err;
+}
+
+static void pppol2tp_recv(struct l2tp_session *session, struct sk_buff *skb, int data_len)
+{
+	struct pppol2tp_session *ps = l2tp_session_priv(session);
+	struct sock *sk = NULL;
+
+	/* If the socket is bound, send it in to PPP's input queue. Otherwise
+	 * queue it on the session socket.
+	 */
+	sk = ps->sock;
+	if (sk == NULL)
+		goto no_sock;
+
+	if (sk->sk_state & PPPOX_BOUND) {
+		struct pppox_sock *po;
+		PRINTK(session->debug, PPPOL2TP_MSG_DATA, KERN_DEBUG,
+		       "%s: recv %d byte data frame, passing to ppp\n",
+		       session->name, data_len);
+
+		/* We need to forget all info related to the L2TP packet
+		 * gathered in the skb as we are going to reuse the same
+		 * skb for the inner packet.
+		 * Namely we need to:
+		 * - reset xfrm (IPSec) information as it applies to
+		 *   the outer L2TP packet and not to the inner one
+		 * - release the dst to force a route lookup on the inner
+		 *   IP packet since skb->dst currently points to the dst
+		 *   of the UDP tunnel
+		 * - reset netfilter information as it doesn't apply
+		 *   to the inner packet either
+		 */
+		secpath_reset(skb);
+		skb_dst_drop(skb);
+		nf_reset(skb);
+
+		po = pppox_sk(sk);
+		ppp_input(&po->chan, skb);
+	} else {
+		PRINTK(session->debug, PPPOL2TP_MSG_DATA, KERN_INFO,
+		       "%s: socket not bound\n", session->name);
+
+		/* Not bound. Nothing we can do, so discard. */
+		session->stats.rx_errors++;
+		kfree_skb(skb);
+	}
+
+	return;
+
+no_sock:
+	PRINTK(session->debug, PPPOL2TP_MSG_DATA, KERN_INFO,
+	       "%s: no socket\n", session->name);
+	kfree_skb(skb);
+}
+
+static void pppol2tp_session_sock_hold(struct l2tp_session *session)
+{
+	struct pppol2tp_session *ps = l2tp_session_priv(session);
+
+	if (ps->sock)
+		sock_hold(ps->sock);
+}
+
+static void pppol2tp_session_sock_put(struct l2tp_session *session)
+{
+	struct pppol2tp_session *ps = l2tp_session_priv(session);
+
+	if (ps->sock)
+		sock_put(ps->sock);
+}
+
+/************************************************************************
+ * Transmit handling
+ ***********************************************************************/
+
+/* Tell how big L2TP headers are for a particular session. This
+ * depends on whether sequence numbers are being used.
+ */
+static inline int pppol2tp_l2tp_header_len(struct l2tp_session *session)
+{
+	if (session->send_seq)
+		return PPPOL2TP_L2TP_HDR_SIZE_SEQ;
+
+	return PPPOL2TP_L2TP_HDR_SIZE_NOSEQ;
+}
+
+/* This is the sendmsg for the PPPoL2TP pppol2tp_session socket.  We come here
+ * when a user application does a sendmsg() on the session socket. L2TP and
+ * PPP headers must be inserted into the user's data.
+ */
+static int pppol2tp_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *m,
+			    size_t total_len)
+{
+	static const unsigned char ppph[2] = { 0xff, 0x03 };
+	struct sock *sk = sock->sk;
+	struct sk_buff *skb;
+	int error;
+	struct l2tp_session *session;
+	struct l2tp_tunnel *tunnel;
+	struct pppol2tp_session *ps;
+
+	error = -ENOTCONN;
+	if (sock_flag(sk, SOCK_DEAD) || !(sk->sk_state & PPPOX_CONNECTED))
+		goto error;
+
+	/* Get session and tunnel contexts */
+	error = -EBADF;
+	session = pppol2tp_sock_to_session(sk);
+	if (session == NULL)
+		goto error;
+
+	ps = l2tp_session_priv(session);
+	tunnel = l2tp_sock_to_tunnel(ps->tunnel_sock);
+	if (tunnel == NULL)
+		goto error_put_sess;
+
+	/* Allocate a socket buffer */
+	error = -ENOMEM;
+	skb = sock_wmalloc(sk, NET_SKB_PAD + sizeof(struct iphdr) +
+			   sizeof(struct udphdr) + session->hdr_len +
+			   sizeof(ppph) + total_len,
+			   0, GFP_KERNEL);
+	if (!skb)
+		goto error_put_sess_tun;
+
+	/* Reserve space for headers. */
+	skb_reserve(skb, NET_SKB_PAD);
+	skb_reset_network_header(skb);
+	skb_reserve(skb, sizeof(struct iphdr));
+	skb_reset_transport_header(skb);
+	skb_reserve(skb, sizeof(struct udphdr));
+
+	/* Add PPP header */
+	skb->data[0] = ppph[0];
+	skb->data[1] = ppph[1];
+	skb_put(skb, 2);
+
+	/* Copy user data into skb */
+	error = memcpy_fromiovec(skb->data, m->msg_iov, total_len);
+	if (error < 0) {
+		kfree_skb(skb);
+		goto error_put_sess_tun;
+	}
+	skb_put(skb, total_len);
+
+	l2tp_xmit_skb(session, skb, session->hdr_len);
+
+	sock_put(ps->tunnel_sock);
+
+	return error;
+
+error_put_sess_tun:
+	sock_put(ps->tunnel_sock);
+error_put_sess:
+	sock_put(sk);
+error:
+	return error;
+}
+
+/* Transmit function called by generic PPP driver.  Sends PPP frame
+ * over PPPoL2TP socket.
+ *
+ * This is almost the same as pppol2tp_sendmsg(), but rather than
+ * being called with a msghdr from userspace, it is called with a skb
+ * from the kernel.
+ *
+ * The supplied skb from ppp doesn't have enough headroom for the
+ * insertion of L2TP, UDP and IP headers so we need to allocate more
+ * headroom in the skb. This will create a cloned skb. But we must be
+ * careful in the error case because the caller will expect to free
+ * the skb it supplied, not our cloned skb. So we take care to always
+ * leave the original skb unfreed if we return an error.
+ */
+static int pppol2tp_xmit(struct ppp_channel *chan, struct sk_buff *skb)
+{
+	static const u8 ppph[2] = { 0xff, 0x03 };
+	struct sock *sk = (struct sock *) chan->private;
+	struct sock *sk_tun;
+	int hdr_len;
+	struct l2tp_session *session;
+	struct l2tp_tunnel *tunnel;
+	struct pppol2tp_session *ps;
+	int old_headroom;
+	int new_headroom;
+
+	if (sock_flag(sk, SOCK_DEAD) || !(sk->sk_state & PPPOX_CONNECTED))
+		goto abort;
+
+	/* Get session and tunnel contexts from the socket */
+	session = pppol2tp_sock_to_session(sk);
+	if (session == NULL)
+		goto abort;
+
+	ps = l2tp_session_priv(session);
+	sk_tun = ps->tunnel_sock;
+	if (sk_tun == NULL)
+		goto abort_put_sess;
+	tunnel = l2tp_sock_to_tunnel(sk_tun);
+	if (tunnel == NULL)
+		goto abort_put_sess;
+
+	/* What header length is configured for this session? */
+	hdr_len = pppol2tp_l2tp_header_len(session);
+
+	old_headroom = skb_headroom(skb);
+	if (skb_cow_head(skb, sizeof(ppph)))
+		goto abort_put_sess_tun;
+
+	new_headroom = skb_headroom(skb);
+	skb->truesize += new_headroom - old_headroom;
+
+	/* Setup PPP header */
+	__skb_push(skb, sizeof(ppph));
+	skb->data[0] = ppph[0];
+	skb->data[1] = ppph[1];
+
+	l2tp_xmit_skb(session, skb, hdr_len);
+
+	sock_put(sk_tun);
+	sock_put(sk);
+	return 1;
+
+abort_put_sess_tun:
+	sock_put(sk_tun);
+abort_put_sess:
+	sock_put(sk);
+abort:
+	/* Free the original skb */
+	kfree_skb(skb);
+	return 1;
+}
+
+/*****************************************************************************
+ * Session (and tunnel control) socket create/destroy.
+ *****************************************************************************/
+
+/* Called by l2tp_core when a session socket is being closed.
+ */
+static void pppol2tp_session_close(struct l2tp_session *session)
+{
+	struct pppol2tp_session *ps = l2tp_session_priv(session);
+	struct sock *sk = ps->sock;
+	struct sk_buff *skb;
+
+	BUG_ON(session->magic != L2TP_SESSION_MAGIC);
+
+	if (session->session_id == 0)
+		goto out;
+
+	if (sk != NULL) {
+		lock_sock(sk);
+
+		if (sk->sk_state & (PPPOX_CONNECTED | PPPOX_BOUND)) {
+			pppox_unbind_sock(sk);
+			sk->sk_state = PPPOX_DEAD;
+			sk->sk_state_change(sk);
+		}
+
+		/* Purge any queued data */
+		skb_queue_purge(&sk->sk_receive_queue);
+		skb_queue_purge(&sk->sk_write_queue);
+		while ((skb = skb_dequeue(&session->reorder_q))) {
+			kfree_skb(skb);
+			sock_put(sk);
+		}
+
+		release_sock(sk);
+	}
+
+out:
+	return;
+}
+
+/* Really kill the session socket. (Called from sock_put() if
+ * refcnt == 0.)
+ */
+static void pppol2tp_session_destruct(struct sock *sk)
+{
+	struct l2tp_session *session;
+
+	if (sk->sk_user_data != NULL) {
+		session = sk->sk_user_data;
+		if (session == NULL)
+			goto out;
+
+		sk->sk_user_data = NULL;
+		BUG_ON(session->magic != L2TP_SESSION_MAGIC);
+		l2tp_session_dec_refcount(session);
+	}
+
+out:
+	return;
+}
+
+/* Called when the PPPoX socket (session) is closed.
+ */
+static int pppol2tp_release(struct socket *sock)
+{
+	struct sock *sk = sock->sk;
+	struct l2tp_session *session;
+	int error;
+
+	if (!sk)
+		return 0;
+
+	error = -EBADF;
+	lock_sock(sk);
+	if (sock_flag(sk, SOCK_DEAD) != 0)
+		goto error;
+
+	pppox_unbind_sock(sk);
+
+	/* Signal the death of the socket. */
+	sk->sk_state = PPPOX_DEAD;
+	sock_orphan(sk);
+	sock->sk = NULL;
+
+	session = pppol2tp_sock_to_session(sk);
+
+	/* Purge any queued data */
+	skb_queue_purge(&sk->sk_receive_queue);
+	skb_queue_purge(&sk->sk_write_queue);
+	if (session != NULL) {
+		struct sk_buff *skb;
+		while ((skb = skb_dequeue(&session->reorder_q))) {
+			kfree_skb(skb);
+			sock_put(sk);
+		}
+		sock_put(sk);
+	}
+
+	release_sock(sk);
+
+	/* This will delete the session context via
+	 * pppol2tp_session_destruct() if the socket's refcnt drops to
+	 * zero.
+	 */
+	sock_put(sk);
+
+	return 0;
+
+error:
+	release_sock(sk);
+	return error;
+}
+
+static struct proto pppol2tp_sk_proto = {
+	.name	  = "PPPOL2TP",
+	.owner	  = THIS_MODULE,
+	.obj_size = sizeof(struct pppox_sock),
+};
+
+static int pppol2tp_backlog_recv(struct sock *sk, struct sk_buff *skb)
+{
+	int rc;
+
+	rc = l2tp_udp_encap_recv(sk, skb);
+	if (rc)
+		kfree_skb(skb);
+
+	return NET_RX_SUCCESS;
+}
+
+/* socket() handler. Initialize a new struct sock.
+ */
+static int pppol2tp_create(struct net *net, struct socket *sock)
+{
+	int error = -ENOMEM;
+	struct sock *sk;
+
+	sk = sk_alloc(net, PF_PPPOX, GFP_KERNEL, &pppol2tp_sk_proto);
+	if (!sk)
+		goto out;
+
+	sock_init_data(sock, sk);
+
+	sock->state  = SS_UNCONNECTED;
+	sock->ops    = &pppol2tp_ops;
+
+	sk->sk_backlog_rcv = pppol2tp_backlog_recv;
+	sk->sk_protocol	   = PX_PROTO_OL2TP;
+	sk->sk_family	   = PF_PPPOX;
+	sk->sk_state	   = PPPOX_NONE;
+	sk->sk_type	   = SOCK_STREAM;
+	sk->sk_destruct	   = pppol2tp_session_destruct;
+
+	error = 0;
+
+out:
+	return error;
+}
+
+/* connect() handler. Attach a PPPoX socket to a tunnel UDP socket
+ */
+static int pppol2tp_connect(struct socket *sock, struct sockaddr *uservaddr,
+			    int sockaddr_len, int flags)
+{
+	struct sock *sk = sock->sk;
+	struct sockaddr_pppol2tp *sp = (struct sockaddr_pppol2tp *) uservaddr;
+	struct pppox_sock *po = pppox_sk(sk);
+	struct l2tp_session *session = NULL;
+	struct l2tp_tunnel *tunnel;
+	struct pppol2tp_session *ps;
+	struct dst_entry *dst;
+	struct l2tp_session_cfg cfg = { 0, };
+	int error = 0;
+
+	lock_sock(sk);
+
+	error = -EINVAL;
+	if (sp->sa_protocol != PX_PROTO_OL2TP)
+		goto end;
+
+	/* Check for already bound sockets */
+	error = -EBUSY;
+	if (sk->sk_state & PPPOX_CONNECTED)
+		goto end;
+
+	/* We don't supporting rebinding anyway */
+	error = -EALREADY;
+	if (sk->sk_user_data)
+		goto end; /* socket is already attached */
+
+	/* Don't bind if s_tunnel is 0 */
+	error = -EINVAL;
+	if (sp->pppol2tp.s_tunnel == 0)
+		goto end;
+
+	/* Special case: create tunnel context if s_session and
+	 * d_session is 0. Otherwise look up tunnel using supplied
+	 * tunnel id.
+	 */
+	if ((sp->pppol2tp.s_session == 0) && (sp->pppol2tp.d_session == 0)) {
+		error = l2tp_tunnel_create(sock_net(sk), sp->pppol2tp.fd, 2, sp->pppol2tp.s_tunnel, sp->pppol2tp.d_tunnel, NULL, &tunnel);
+		if (error < 0)
+			goto end;
+	} else {
+		tunnel = l2tp_tunnel_find(sock_net(sk), sp->pppol2tp.s_tunnel);
+
+		/* Error if we can't find the tunnel */
+		error = -ENOENT;
+		if (tunnel == NULL)
+			goto end;
+
+		/* Error if socket is not prepped */
+		if (tunnel->sock == NULL)
+			goto end;
+	}
+
+	if (tunnel->recv_payload_hook == NULL)
+		tunnel->recv_payload_hook = pppol2tp_recv_payload_hook;
+
+	/* Check that this session doesn't already exist */
+	error = -EEXIST;
+	session = l2tp_session_find(tunnel, sp->pppol2tp.s_session);
+	if (session != NULL)
+		goto end;
+
+	/* Default MTU must allow space for UDP/L2TP/PPP
+	 * headers.
+	 */
+	cfg.mtu = cfg.mru = 1500 - PPPOL2TP_HEADER_OVERHEAD;
+	cfg.hdr_len = PPPOL2TP_L2TP_HDR_SIZE_NOSEQ;
+	cfg.debug = tunnel->debug;
+
+	/* Allocate and initialize a new session context. */
+	session = l2tp_session_create(sizeof(struct pppol2tp_session),
+				      tunnel, sp->pppol2tp.s_session,
+				      sp->pppol2tp.d_session, &cfg);
+	if (session == NULL) {
+		error = -ENOMEM;
+		goto end;
+	}
+
+	ps = l2tp_session_priv(session);
+	ps->owner	     = current->pid;
+	ps->sock	     = sk;
+	ps->tunnel_sock = tunnel->sock;
+
+	session->recv_skb	= pppol2tp_recv;
+	session->session_close	= pppol2tp_session_close;
+
+	/* We need to know each time a skb is dropped from the reorder
+	 * queue.
+	 */
+	session->ref = pppol2tp_session_sock_hold;
+	session->deref = pppol2tp_session_sock_put;
+
+	/* If PMTU discovery was enabled, use the MTU that was discovered */
+	dst = sk_dst_get(sk);
+	if (dst != NULL) {
+		u32 pmtu = dst_mtu(__sk_dst_get(sk));
+		if (pmtu != 0)
+			session->mtu = session->mru = pmtu -
+				PPPOL2TP_HEADER_OVERHEAD;
+		dst_release(dst);
+	}
+
+	/* Special case: if source & dest session_id == 0x0000, this
+	 * socket is being created to manage the tunnel. Just set up
+	 * the internal context for use by ioctl() and sockopt()
+	 * handlers.
+	 */
+	if ((session->session_id == 0) &&
+	    (session->peer_session_id == 0)) {
+		error = 0;
+		goto out_no_ppp;
+	}
+
+	/* The only header we need to worry about is the L2TP
+	 * header. This size is different depending on whether
+	 * sequence numbers are enabled for the data channel.
+	 */
+	po->chan.hdrlen = PPPOL2TP_L2TP_HDR_SIZE_NOSEQ;
+
+	po->chan.private = sk;
+	po->chan.ops	 = &pppol2tp_chan_ops;
+	po->chan.mtu	 = session->mtu;
+
+	error = ppp_register_net_channel(sock_net(sk), &po->chan);
+	if (error)
+		goto end;
+
+out_no_ppp:
+	/* This is how we get the session context from the socket. */
+	sk->sk_user_data = session;
+	sk->sk_state = PPPOX_CONNECTED;
+	PRINTK(session->debug, PPPOL2TP_MSG_CONTROL, KERN_INFO,
+	       "%s: created\n", session->name);
+
+end:
+	release_sock(sk);
+
+	return error;
+}
+
+/* getname() support.
+ */
+static int pppol2tp_getname(struct socket *sock, struct sockaddr *uaddr,
+			    int *usockaddr_len, int peer)
+{
+	int len = sizeof(struct sockaddr_pppol2tp);
+	struct sockaddr_pppol2tp sp;
+	int error = 0;
+	struct l2tp_session *session;
+	struct l2tp_tunnel *tunnel;
+	struct sock *sk = sock->sk;
+	struct inet_sock *inet;
+	struct pppol2tp_session *pls;
+
+	error = -ENOTCONN;
+	if (sk == NULL)
+		goto end;
+	if (sk->sk_state != PPPOX_CONNECTED)
+		goto end;
+
+	error = -EBADF;
+	session = pppol2tp_sock_to_session(sk);
+	if (session == NULL)
+		goto end;
+
+	pls = l2tp_session_priv(session);
+	tunnel = l2tp_sock_to_tunnel(pls->tunnel_sock);
+	if (tunnel == NULL) {
+		error = -EBADF;
+		goto end_put_sess;
+	}
+
+	memset(&sp, 0, len);
+	sp.sa_family	= AF_PPPOX;
+	sp.sa_protocol	= PX_PROTO_OL2TP;
+	sp.pppol2tp.fd  = tunnel->fd;
+	sp.pppol2tp.pid = pls->owner;
+	sp.pppol2tp.s_tunnel = tunnel->tunnel_id;
+	sp.pppol2tp.d_tunnel = tunnel->peer_tunnel_id;
+	sp.pppol2tp.s_session = session->session_id;
+	sp.pppol2tp.d_session = session->peer_session_id;
+	inet = inet_sk(sk);
+	sp.pppol2tp.addr.sin_family = AF_INET;
+	sp.pppol2tp.addr.sin_port = inet->inet_dport;
+	sp.pppol2tp.addr.sin_addr.s_addr = inet->inet_daddr;
+
+	memcpy(uaddr, &sp, len);
+
+	*usockaddr_len = len;
+
+	sock_put(pls->tunnel_sock);
+end_put_sess:
+	sock_put(sk);
+	error = 0;
+
+end:
+	return error;
+}
+
+/****************************************************************************
+ * ioctl() handlers.
+ *
+ * The PPPoX socket is created for L2TP sessions: tunnels have their own UDP
+ * sockets. However, in order to control kernel tunnel features, we allow
+ * userspace to create a special "tunnel" PPPoX socket which is used for
+ * control only.  Tunnel PPPoX sockets have session_id == 0 and simply allow
+ * the user application to issue L2TP setsockopt(), getsockopt() and ioctl()
+ * calls.
+ ****************************************************************************/
+
+static void pppol2tp_copy_stats(struct pppol2tp_ioc_stats *dest,
+				struct l2tp_stats *stats)
+{
+	dest->tx_packets = stats->tx_packets;
+	dest->tx_bytes = stats->tx_bytes;
+	dest->tx_errors = stats->tx_errors;
+	dest->rx_packets = stats->rx_packets;
+	dest->rx_bytes = stats->rx_bytes;
+	dest->rx_seq_discards = stats->rx_seq_discards;
+	dest->rx_oos_packets = stats->rx_oos_packets;
+	dest->rx_errors = stats->rx_errors;
+}
+
+/* Session ioctl helper.
+ */
+static int pppol2tp_session_ioctl(struct l2tp_session *session,
+				  unsigned int cmd, unsigned long arg)
+{
+	struct ifreq ifr;
+	int err = 0;
+	struct sock *sk;
+	int val = (int) arg;
+	struct pppol2tp_session *ps = l2tp_session_priv(session);
+	struct l2tp_tunnel *tunnel = session->tunnel;
+	struct pppol2tp_ioc_stats stats;
+
+	PRINTK(session->debug, PPPOL2TP_MSG_CONTROL, KERN_DEBUG,
+	       "%s: pppol2tp_session_ioctl(cmd=%#x, arg=%#lx)\n",
+	       session->name, cmd, arg);
+
+	sk = ps->sock;
+	sock_hold(sk);
+
+	switch (cmd) {
+	case SIOCGIFMTU:
+		err = -ENXIO;
+		if (!(sk->sk_state & PPPOX_CONNECTED))
+			break;
+
+		err = -EFAULT;
+		if (copy_from_user(&ifr, (void __user *) arg, sizeof(struct ifreq)))
+			break;
+		ifr.ifr_mtu = session->mtu;
+		if (copy_to_user((void __user *) arg, &ifr, sizeof(struct ifreq)))
+			break;
+
+		PRINTK(session->debug, PPPOL2TP_MSG_CONTROL, KERN_INFO,
+		       "%s: get mtu=%d\n", session->name, session->mtu);
+		err = 0;
+		break;
+
+	case SIOCSIFMTU:
+		err = -ENXIO;
+		if (!(sk->sk_state & PPPOX_CONNECTED))
+			break;
+
+		err = -EFAULT;
+		if (copy_from_user(&ifr, (void __user *) arg, sizeof(struct ifreq)))
+			break;
+
+		session->mtu = ifr.ifr_mtu;
+
+		PRINTK(session->debug, PPPOL2TP_MSG_CONTROL, KERN_INFO,
+		       "%s: set mtu=%d\n", session->name, session->mtu);
+		err = 0;
+		break;
+
+	case PPPIOCGMRU:
+		err = -ENXIO;
+		if (!(sk->sk_state & PPPOX_CONNECTED))
+			break;
+
+		err = -EFAULT;
+		if (put_user(session->mru, (int __user *) arg))
+			break;
+
+		PRINTK(session->debug, PPPOL2TP_MSG_CONTROL, KERN_INFO,
+		       "%s: get mru=%d\n", session->name, session->mru);
+		err = 0;
+		break;
+
+	case PPPIOCSMRU:
+		err = -ENXIO;
+		if (!(sk->sk_state & PPPOX_CONNECTED))
+			break;
+
+		err = -EFAULT;
+		if (get_user(val, (int __user *) arg))
+			break;
+
+		session->mru = val;
+		PRINTK(session->debug, PPPOL2TP_MSG_CONTROL, KERN_INFO,
+		       "%s: set mru=%d\n", session->name, session->mru);
+		err = 0;
+		break;
+
+	case PPPIOCGFLAGS:
+		err = -EFAULT;
+		if (put_user(ps->flags, (int __user *) arg))
+			break;
+
+		PRINTK(session->debug, PPPOL2TP_MSG_CONTROL, KERN_INFO,
+		       "%s: get flags=%d\n", session->name, ps->flags);
+		err = 0;
+		break;
+
+	case PPPIOCSFLAGS:
+		err = -EFAULT;
+		if (get_user(val, (int __user *) arg))
+			break;
+		ps->flags = val;
+		PRINTK(session->debug, PPPOL2TP_MSG_CONTROL, KERN_INFO,
+		       "%s: set flags=%d\n", session->name, ps->flags);
+		err = 0;
+		break;
+
+	case PPPIOCGL2TPSTATS:
+		err = -ENXIO;
+		if (!(sk->sk_state & PPPOX_CONNECTED))
+			break;
+
+		memset(&stats, 0, sizeof(stats));
+		stats.tunnel_id = tunnel->tunnel_id;
+		stats.session_id = session->session_id;
+		pppol2tp_copy_stats(&stats, &session->stats);
+		if (copy_to_user((void __user *) arg, &stats,
+				 sizeof(stats)))
+			break;
+		PRINTK(session->debug, PPPOL2TP_MSG_CONTROL, KERN_INFO,
+		       "%s: get L2TP stats\n", session->name);
+		err = 0;
+		break;
+
+	default:
+		err = -ENOSYS;
+		break;
+	}
+
+	sock_put(sk);
+
+	return err;
+}
+
+/* Tunnel ioctl helper.
+ *
+ * Note the special handling for PPPIOCGL2TPSTATS below. If the ioctl data
+ * specifies a session_id, the session ioctl handler is called. This allows an
+ * application to retrieve session stats via a tunnel socket.
+ */
+static int pppol2tp_tunnel_ioctl(struct l2tp_tunnel *tunnel,
+				 unsigned int cmd, unsigned long arg)
+{
+	int err = 0;
+	struct sock *sk;
+	struct pppol2tp_ioc_stats stats;
+
+	PRINTK(tunnel->debug, PPPOL2TP_MSG_CONTROL, KERN_DEBUG,
+	       "%s: pppol2tp_tunnel_ioctl(cmd=%#x, arg=%#lx)\n",
+	       tunnel->name, cmd, arg);
+
+	sk = tunnel->sock;
+	sock_hold(sk);
+
+	switch (cmd) {
+	case PPPIOCGL2TPSTATS:
+		err = -ENXIO;
+		if (!(sk->sk_state & PPPOX_CONNECTED))
+			break;
+
+		if (copy_from_user(&stats, (void __user *) arg,
+				   sizeof(stats))) {
+			err = -EFAULT;
+			break;
+		}
+		if (stats.session_id != 0) {
+			/* resend to session ioctl handler */
+			struct l2tp_session *session =
+				l2tp_session_find(tunnel, stats.session_id);
+			if (session != NULL)
+				err = pppol2tp_session_ioctl(session, cmd, arg);
+			else
+				err = -EBADR;
+			break;
+		}
+#ifdef CONFIG_XFRM
+		stats.using_ipsec = (sk->sk_policy[0] || sk->sk_policy[1]) ? 1 : 0;
+#endif
+		pppol2tp_copy_stats(&stats, &tunnel->stats);
+		if (copy_to_user((void __user *) arg, &stats, sizeof(stats))) {
+			err = -EFAULT;
+			break;
+		}
+		PRINTK(tunnel->debug, PPPOL2TP_MSG_CONTROL, KERN_INFO,
+		       "%s: get L2TP stats\n", tunnel->name);
+		err = 0;
+		break;
+
+	default:
+		err = -ENOSYS;
+		break;
+	}
+
+	sock_put(sk);
+
+	return err;
+}
+
+/* Main ioctl() handler.
+ * Dispatch to tunnel or session helpers depending on the socket.
+ */
+static int pppol2tp_ioctl(struct socket *sock, unsigned int cmd,
+			  unsigned long arg)
+{
+	struct sock *sk = sock->sk;
+	struct l2tp_session *session;
+	struct l2tp_tunnel *tunnel;
+	struct pppol2tp_session *ps;
+	int err;
+
+	if (!sk)
+		return 0;
+
+	err = -EBADF;
+	if (sock_flag(sk, SOCK_DEAD) != 0)
+		goto end;
+
+	err = -ENOTCONN;
+	if ((sk->sk_user_data == NULL) ||
+	    (!(sk->sk_state & (PPPOX_CONNECTED | PPPOX_BOUND))))
+		goto end;
+
+	/* Get session context from the socket */
+	err = -EBADF;
+	session = pppol2tp_sock_to_session(sk);
+	if (session == NULL)
+		goto end;
+
+	/* Special case: if session's session_id is zero, treat ioctl as a
+	 * tunnel ioctl
+	 */
+	ps = l2tp_session_priv(session);
+	if ((session->session_id == 0) &&
+	    (session->peer_session_id == 0)) {
+		err = -EBADF;
+		tunnel = l2tp_sock_to_tunnel(ps->tunnel_sock);
+		if (tunnel == NULL)
+			goto end_put_sess;
+
+		err = pppol2tp_tunnel_ioctl(tunnel, cmd, arg);
+		sock_put(ps->tunnel_sock);
+		goto end_put_sess;
+	}
+
+	err = pppol2tp_session_ioctl(session, cmd, arg);
+
+end_put_sess:
+	sock_put(sk);
+end:
+	return err;
+}
+
+/*****************************************************************************
+ * setsockopt() / getsockopt() support.
+ *
+ * The PPPoX socket is created for L2TP sessions: tunnels have their own UDP
+ * sockets. In order to control kernel tunnel features, we allow userspace to
+ * create a special "tunnel" PPPoX socket which is used for control only.
+ * Tunnel PPPoX sockets have session_id == 0 and simply allow the user
+ * application to issue L2TP setsockopt(), getsockopt() and ioctl() calls.
+ *****************************************************************************/
+
+/* Tunnel setsockopt() helper.
+ */
+static int pppol2tp_tunnel_setsockopt(struct sock *sk,
+				      struct l2tp_tunnel *tunnel,
+				      int optname, int val)
+{
+	int err = 0;
+
+	switch (optname) {
+	case PPPOL2TP_SO_DEBUG:
+		tunnel->debug = val;
+		PRINTK(tunnel->debug, PPPOL2TP_MSG_CONTROL, KERN_INFO,
+		       "%s: set debug=%x\n", tunnel->name, tunnel->debug);
+		break;
+
+	default:
+		err = -ENOPROTOOPT;
+		break;
+	}
+
+	return err;
+}
+
+/* Session setsockopt helper.
+ */
+static int pppol2tp_session_setsockopt(struct sock *sk,
+				       struct l2tp_session *session,
+				       int optname, int val)
+{
+	int err = 0;
+	struct pppol2tp_session *ps = l2tp_session_priv(session);
+
+	switch (optname) {
+	case PPPOL2TP_SO_RECVSEQ:
+		if ((val != 0) && (val != 1)) {
+			err = -EINVAL;
+			break;
+		}
+		session->recv_seq = val ? -1 : 0;
+		PRINTK(session->debug, PPPOL2TP_MSG_CONTROL, KERN_INFO,
+		       "%s: set recv_seq=%d\n", session->name, session->recv_seq);
+		break;
+
+	case PPPOL2TP_SO_SENDSEQ:
+		if ((val != 0) && (val != 1)) {
+			err = -EINVAL;
+			break;
+		}
+		session->send_seq = val ? -1 : 0;
+		{
+			struct sock *ssk      = ps->sock;
+			struct pppox_sock *po = pppox_sk(ssk);
+			po->chan.hdrlen = val ? PPPOL2TP_L2TP_HDR_SIZE_SEQ :
+				PPPOL2TP_L2TP_HDR_SIZE_NOSEQ;
+		}
+		PRINTK(session->debug, PPPOL2TP_MSG_CONTROL, KERN_INFO,
+		       "%s: set send_seq=%d\n", session->name, session->send_seq);
+		break;
+
+	case PPPOL2TP_SO_LNSMODE:
+		if ((val != 0) && (val != 1)) {
+			err = -EINVAL;
+			break;
+		}
+		session->lns_mode = val ? -1 : 0;
+		PRINTK(session->debug, PPPOL2TP_MSG_CONTROL, KERN_INFO,
+		       "%s: set lns_mode=%d\n", session->name, session->lns_mode);
+		break;
+
+	case PPPOL2TP_SO_DEBUG:
+		session->debug = val;
+		PRINTK(session->debug, PPPOL2TP_MSG_CONTROL, KERN_INFO,
+		       "%s: set debug=%x\n", session->name, session->debug);
+		break;
+
+	case PPPOL2TP_SO_REORDERTO:
+		session->reorder_timeout = msecs_to_jiffies(val);
+		PRINTK(session->debug, PPPOL2TP_MSG_CONTROL, KERN_INFO,
+		       "%s: set reorder_timeout=%d\n", session->name, session->reorder_timeout);
+		break;
+
+	default:
+		err = -ENOPROTOOPT;
+		break;
+	}
+
+	return err;
+}
+
+/* Main setsockopt() entry point.
+ * Does API checks, then calls either the tunnel or session setsockopt
+ * handler, according to whether the PPPoL2TP socket is a for a regular
+ * session or the special tunnel type.
+ */
+static int pppol2tp_setsockopt(struct socket *sock, int level, int optname,
+			       char __user *optval, unsigned int optlen)
+{
+	struct sock *sk = sock->sk;
+	struct l2tp_session *session;
+	struct l2tp_tunnel *tunnel;
+	struct pppol2tp_session *ps;
+	int val;
+	int err;
+
+	if (level != SOL_PPPOL2TP)
+		return udp_prot.setsockopt(sk, level, optname, optval, optlen);
+
+	if (optlen < sizeof(int))
+		return -EINVAL;
+
+	if (get_user(val, (int __user *)optval))
+		return -EFAULT;
+
+	err = -ENOTCONN;
+	if (sk->sk_user_data == NULL)
+		goto end;
+
+	/* Get session context from the socket */
+	err = -EBADF;
+	session = pppol2tp_sock_to_session(sk);
+	if (session == NULL)
+		goto end;
+
+	/* Special case: if session_id == 0x0000, treat as operation on tunnel
+	 */
+	ps = l2tp_session_priv(session);
+	if ((session->session_id == 0) &&
+	    (session->peer_session_id == 0)) {
+		err = -EBADF;
+		tunnel = l2tp_sock_to_tunnel(ps->tunnel_sock);
+		if (tunnel == NULL)
+			goto end_put_sess;
+
+		err = pppol2tp_tunnel_setsockopt(sk, tunnel, optname, val);
+		sock_put(ps->tunnel_sock);
+	} else
+		err = pppol2tp_session_setsockopt(sk, session, optname, val);
+
+	err = 0;
+
+end_put_sess:
+	sock_put(sk);
+end:
+	return err;
+}
+
+/* Tunnel getsockopt helper. Called with sock locked.
+ */
+static int pppol2tp_tunnel_getsockopt(struct sock *sk,
+				      struct l2tp_tunnel *tunnel,
+				      int optname, int *val)
+{
+	int err = 0;
+
+	switch (optname) {
+	case PPPOL2TP_SO_DEBUG:
+		*val = tunnel->debug;
+		PRINTK(tunnel->debug, PPPOL2TP_MSG_CONTROL, KERN_INFO,
+		       "%s: get debug=%x\n", tunnel->name, tunnel->debug);
+		break;
+
+	default:
+		err = -ENOPROTOOPT;
+		break;
+	}
+
+	return err;
+}
+
+/* Session getsockopt helper. Called with sock locked.
+ */
+static int pppol2tp_session_getsockopt(struct sock *sk,
+				       struct l2tp_session *session,
+				       int optname, int *val)
+{
+	int err = 0;
+
+	switch (optname) {
+	case PPPOL2TP_SO_RECVSEQ:
+		*val = session->recv_seq;
+		PRINTK(session->debug, PPPOL2TP_MSG_CONTROL, KERN_INFO,
+		       "%s: get recv_seq=%d\n", session->name, *val);
+		break;
+
+	case PPPOL2TP_SO_SENDSEQ:
+		*val = session->send_seq;
+		PRINTK(session->debug, PPPOL2TP_MSG_CONTROL, KERN_INFO,
+		       "%s: get send_seq=%d\n", session->name, *val);
+		break;
+
+	case PPPOL2TP_SO_LNSMODE:
+		*val = session->lns_mode;
+		PRINTK(session->debug, PPPOL2TP_MSG_CONTROL, KERN_INFO,
+		       "%s: get lns_mode=%d\n", session->name, *val);
+		break;
+
+	case PPPOL2TP_SO_DEBUG:
+		*val = session->debug;
+		PRINTK(session->debug, PPPOL2TP_MSG_CONTROL, KERN_INFO,
+		       "%s: get debug=%d\n", session->name, *val);
+		break;
+
+	case PPPOL2TP_SO_REORDERTO:
+		*val = (int) jiffies_to_msecs(session->reorder_timeout);
+		PRINTK(session->debug, PPPOL2TP_MSG_CONTROL, KERN_INFO,
+		       "%s: get reorder_timeout=%d\n", session->name, *val);
+		break;
+
+	default:
+		err = -ENOPROTOOPT;
+	}
+
+	return err;
+}
+
+/* Main getsockopt() entry point.
+ * Does API checks, then calls either the tunnel or session getsockopt
+ * handler, according to whether the PPPoX socket is a for a regular session
+ * or the special tunnel type.
+ */
+static int pppol2tp_getsockopt(struct socket *sock, int level,
+			       int optname, char __user *optval, int __user *optlen)
+{
+	struct sock *sk = sock->sk;
+	struct l2tp_session *session;
+	struct l2tp_tunnel *tunnel;
+	int val, len;
+	int err;
+	struct pppol2tp_session *ps;
+
+	if (level != SOL_PPPOL2TP)
+		return udp_prot.getsockopt(sk, level, optname, optval, optlen);
+
+	if (get_user(len, (int __user *) optlen))
+		return -EFAULT;
+
+	len = min_t(unsigned int, len, sizeof(int));
+
+	if (len < 0)
+		return -EINVAL;
+
+	err = -ENOTCONN;
+	if (sk->sk_user_data == NULL)
+		goto end;
+
+	/* Get the session context */
+	err = -EBADF;
+	session = pppol2tp_sock_to_session(sk);
+	if (session == NULL)
+		goto end;
+
+	/* Special case: if session_id == 0x0000, treat as operation on tunnel */
+	ps = l2tp_session_priv(session);
+	if ((session->session_id == 0) &&
+	    (session->peer_session_id == 0)) {
+		err = -EBADF;
+		tunnel = l2tp_sock_to_tunnel(ps->tunnel_sock);
+		if (tunnel == NULL)
+			goto end_put_sess;
+
+		err = pppol2tp_tunnel_getsockopt(sk, tunnel, optname, &val);
+		sock_put(ps->tunnel_sock);
+	} else
+		err = pppol2tp_session_getsockopt(sk, session, optname, &val);
+
+	err = -EFAULT;
+	if (put_user(len, (int __user *) optlen))
+		goto end_put_sess;
+
+	if (copy_to_user((void __user *) optval, &val, len))
+		goto end_put_sess;
+
+	err = 0;
+
+end_put_sess:
+	sock_put(sk);
+end:
+	return err;
+}
+
+/*****************************************************************************
+ * /proc filesystem for debug
+ *****************************************************************************/
+
+static unsigned int pppol2tp_net_id;
+
+#ifdef CONFIG_PROC_FS
+
+struct pppol2tp_seq_data {
+	struct seq_net_private p;
+	int tunnel_idx;			/* current tunnel */
+	int session_idx;		/* index of session within current tunnel */
+	struct l2tp_tunnel *tunnel;
+	struct l2tp_session *session;	/* NULL means get next tunnel */
+};
+
+static void pppol2tp_next_tunnel(struct net *net, struct pppol2tp_seq_data *pd)
+{
+	pd->tunnel = l2tp_tunnel_find_nth(net, pd->tunnel_idx);
+	pd->tunnel_idx++;
+}
+
+static void pppol2tp_next_session(struct net *net, struct pppol2tp_seq_data *pd)
+{
+	pd->session = l2tp_session_find_nth(pd->tunnel, pd->session_idx);
+	pd->session_idx++;
+	if (pd->session == NULL) {
+		pd->session_idx = 0;
+		pppol2tp_next_tunnel(net, pd);
+	}
+}
+
+static void *pppol2tp_seq_start(struct seq_file *m, loff_t *offs)
+{
+	struct pppol2tp_seq_data *pd = SEQ_START_TOKEN;
+	loff_t pos = *offs;
+	struct net *net;
+
+	if (!pos)
+		goto out;
+
+	BUG_ON(m->private == NULL);
+	pd = m->private;
+	net = seq_file_net(m);
+
+	if (pd->tunnel == NULL)
+		pppol2tp_next_tunnel(net, pd);
+	else
+		pppol2tp_next_session(net, pd);
+
+	/* NULL tunnel and session indicates end of list */
+	if ((pd->tunnel == NULL) && (pd->session == NULL))
+		pd = NULL;
+
+out:
+	return pd;
+}
+
+static void *pppol2tp_seq_next(struct seq_file *m, void *v, loff_t *pos)
+{
+	(*pos)++;
+	return NULL;
+}
+
+static void pppol2tp_seq_stop(struct seq_file *p, void *v)
+{
+	/* nothing to do */
+}
+
+static void pppol2tp_seq_tunnel_show(struct seq_file *m, void *v)
+{
+	struct l2tp_tunnel *tunnel = v;
+
+	seq_printf(m, "\nTUNNEL '%s', %c %d\n",
+		   tunnel->name,
+		   (tunnel == tunnel->sock->sk_user_data) ? 'Y' : 'N',
+		   atomic_read(&tunnel->ref_count) - 1);
+	seq_printf(m, " %08x %llu/%llu/%llu %llu/%llu/%llu\n",
+		   tunnel->debug,
+		   (unsigned long long)tunnel->stats.tx_packets,
+		   (unsigned long long)tunnel->stats.tx_bytes,
+		   (unsigned long long)tunnel->stats.tx_errors,
+		   (unsigned long long)tunnel->stats.rx_packets,
+		   (unsigned long long)tunnel->stats.rx_bytes,
+		   (unsigned long long)tunnel->stats.rx_errors);
+}
+
+static void pppol2tp_seq_session_show(struct seq_file *m, void *v)
+{
+	struct l2tp_session *session = v;
+	struct l2tp_tunnel *tunnel = session->tunnel;
+	struct pppol2tp_session *ps = l2tp_session_priv(session);
+	u32 ip = 0;
+	u16 port = 0;
+
+	if (tunnel->sock) {
+		struct inet_sock *inet = inet_sk(tunnel->sock);
+		ip = ntohl(inet->inet_saddr);
+		port = ntohs(inet->inet_sport);
+	}
+
+	seq_printf(m, "  SESSION '%s' %08X/%d %04X/%04X -> "
+		   "%04X/%04X %d %c\n",
+		   session->name, ip, port,
+		   tunnel->tunnel_id,
+		   session->session_id,
+		   tunnel->peer_tunnel_id,
+		   session->peer_session_id,
+		   ps->sock->sk_state,
+		   (session == ps->sock->sk_user_data) ?
+		   'Y' : 'N');
+	seq_printf(m, "   %d/%d/%c/%c/%s %08x %u\n",
+		   session->mtu, session->mru,
+		   session->recv_seq ? 'R' : '-',
+		   session->send_seq ? 'S' : '-',
+		   session->lns_mode ? "LNS" : "LAC",
+		   session->debug,
+		   jiffies_to_msecs(session->reorder_timeout));
+	seq_printf(m, "   %hu/%hu %llu/%llu/%llu %llu/%llu/%llu\n",
+		   session->nr, session->ns,
+		   (unsigned long long)session->stats.tx_packets,
+		   (unsigned long long)session->stats.tx_bytes,
+		   (unsigned long long)session->stats.tx_errors,
+		   (unsigned long long)session->stats.rx_packets,
+		   (unsigned long long)session->stats.rx_bytes,
+		   (unsigned long long)session->stats.rx_errors);
+}
+
+static int pppol2tp_seq_show(struct seq_file *m, void *v)
+{
+	struct pppol2tp_seq_data *pd = v;
+
+	/* display header on line 1 */
+	if (v == SEQ_START_TOKEN) {
+		seq_puts(m, "PPPoL2TP driver info, " PPPOL2TP_DRV_VERSION "\n");
+		seq_puts(m, "TUNNEL name, user-data-ok session-count\n");
+		seq_puts(m, " debug tx-pkts/bytes/errs rx-pkts/bytes/errs\n");
+		seq_puts(m, "  SESSION name, addr/port src-tid/sid "
+			 "dest-tid/sid state user-data-ok\n");
+		seq_puts(m, "   mtu/mru/rcvseq/sendseq/lns debug reorderto\n");
+		seq_puts(m, "   nr/ns tx-pkts/bytes/errs rx-pkts/bytes/errs\n");
+		goto out;
+	}
+
+	/* Show the tunnel or session context.
+	 */
+	if (pd->session == NULL)
+		pppol2tp_seq_tunnel_show(m, pd->tunnel);
+	else
+		pppol2tp_seq_session_show(m, pd->session);
+
+out:
+	return 0;
+}
+
+static const struct seq_operations pppol2tp_seq_ops = {
+	.start		= pppol2tp_seq_start,
+	.next		= pppol2tp_seq_next,
+	.stop		= pppol2tp_seq_stop,
+	.show		= pppol2tp_seq_show,
+};
+
+/* Called when our /proc file is opened. We allocate data for use when
+ * iterating our tunnel / session contexts and store it in the private
+ * data of the seq_file.
+ */
+static int pppol2tp_proc_open(struct inode *inode, struct file *file)
+{
+	return seq_open_net(inode, file, &pppol2tp_seq_ops,
+			    sizeof(struct pppol2tp_seq_data));
+}
+
+static const struct file_operations pppol2tp_proc_fops = {
+	.owner		= THIS_MODULE,
+	.open		= pppol2tp_proc_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= seq_release_net,
+};
+
+#endif /* CONFIG_PROC_FS */
+
+/*****************************************************************************
+ * Network namespace
+ *****************************************************************************/
+
+static __net_init int pppol2tp_init_net(struct net *net)
+{
+	struct proc_dir_entry *pde;
+	int err = 0;
+
+	pde = proc_net_fops_create(net, "pppol2tp", S_IRUGO, &pppol2tp_proc_fops);
+	if (!pde) {
+		err = -ENOMEM;
+		goto out;
+	}
+
+out:
+	return err;
+}
+
+static __net_exit void pppol2tp_exit_net(struct net *net)
+{
+	proc_net_remove(net, "pppol2tp");
+}
+
+static struct pernet_operations pppol2tp_net_ops = {
+	.init = pppol2tp_init_net,
+	.exit = pppol2tp_exit_net,
+	.id   = &pppol2tp_net_id,
+};
+
+/*****************************************************************************
+ * Init and cleanup
+ *****************************************************************************/
+
+static const struct proto_ops pppol2tp_ops = {
+	.family		= AF_PPPOX,
+	.owner		= THIS_MODULE,
+	.release	= pppol2tp_release,
+	.bind		= sock_no_bind,
+	.connect	= pppol2tp_connect,
+	.socketpair	= sock_no_socketpair,
+	.accept		= sock_no_accept,
+	.getname	= pppol2tp_getname,
+	.poll		= datagram_poll,
+	.listen		= sock_no_listen,
+	.shutdown	= sock_no_shutdown,
+	.setsockopt	= pppol2tp_setsockopt,
+	.getsockopt	= pppol2tp_getsockopt,
+	.sendmsg	= pppol2tp_sendmsg,
+	.recvmsg	= pppol2tp_recvmsg,
+	.mmap		= sock_no_mmap,
+	.ioctl		= pppox_ioctl,
+};
+
+static struct pppox_proto pppol2tp_proto = {
+	.create		= pppol2tp_create,
+	.ioctl		= pppol2tp_ioctl
+};
+
+static int __init pppol2tp_init(void)
+{
+	int err;
+
+	err = register_pernet_device(&pppol2tp_net_ops);
+	if (err)
+		goto out;
+
+	err = proto_register(&pppol2tp_sk_proto, 0);
+	if (err)
+		goto out_unregister_pppol2tp_pernet;
+
+	err = register_pppox_proto(PX_PROTO_OL2TP, &pppol2tp_proto);
+	if (err)
+		goto out_unregister_pppol2tp_proto;
+
+	printk(KERN_INFO "PPPoL2TP kernel driver, %s\n",
+	       PPPOL2TP_DRV_VERSION);
+
+out:
+	return err;
+out_unregister_pppol2tp_proto:
+	proto_unregister(&pppol2tp_sk_proto);
+out_unregister_pppol2tp_pernet:
+	unregister_pernet_device(&pppol2tp_net_ops);
+	goto out;
+}
+
+static void __exit pppol2tp_exit(void)
+{
+	unregister_pppox_proto(PX_PROTO_OL2TP);
+	proto_unregister(&pppol2tp_sk_proto);
+	unregister_pernet_device(&pppol2tp_net_ops);
+}
+
+module_init(pppol2tp_init);
+module_exit(pppol2tp_exit);
+
+MODULE_AUTHOR("James Chapman <jchapman@katalix.com>");
+MODULE_DESCRIPTION("PPP over L2TP over UDP");
+MODULE_LICENSE("GPL");
+MODULE_VERSION(PPPOL2TP_DRV_VERSION);
diff --git a/net/l2tp/pppol2tp.c b/net/l2tp/pppol2tp.c
deleted file mode 100644
index 449a9825200d..000000000000
--- a/net/l2tp/pppol2tp.c
+++ /dev/null
@@ -1,2680 +0,0 @@
-/*****************************************************************************
- * Linux PPP over L2TP (PPPoX/PPPoL2TP) Sockets
- *
- * PPPoX    --- Generic PPP encapsulation socket family
- * PPPoL2TP --- PPP over L2TP (RFC 2661)
- *
- * Version:	1.0.0
- *
- * Authors:	Martijn van Oosterhout <kleptog@svana.org>
- *		James Chapman (jchapman@katalix.com)
- * Contributors:
- *		Michal Ostrowski <mostrows@speakeasy.net>
- *		Arnaldo Carvalho de Melo <acme@xconectiva.com.br>
- *		David S. Miller (davem@redhat.com)
- *
- * License:
- *		This program is free software; you can redistribute it and/or
- *		modify it under the terms of the GNU General Public License
- *		as published by the Free Software Foundation; either version
- *		2 of the License, or (at your option) any later version.
- *
- */
-
-/* This driver handles only L2TP data frames; control frames are handled by a
- * userspace application.
- *
- * To send data in an L2TP session, userspace opens a PPPoL2TP socket and
- * attaches it to a bound UDP socket with local tunnel_id / session_id and
- * peer tunnel_id / session_id set. Data can then be sent or received using
- * regular socket sendmsg() / recvmsg() calls. Kernel parameters of the socket
- * can be read or modified using ioctl() or [gs]etsockopt() calls.
- *
- * When a PPPoL2TP socket is connected with local and peer session_id values
- * zero, the socket is treated as a special tunnel management socket.
- *
- * Here's example userspace code to create a socket for sending/receiving data
- * over an L2TP session:-
- *
- *	struct sockaddr_pppol2tp sax;
- *	int fd;
- *	int session_fd;
- *
- *	fd = socket(AF_PPPOX, SOCK_DGRAM, PX_PROTO_OL2TP);
- *
- *	sax.sa_family = AF_PPPOX;
- *	sax.sa_protocol = PX_PROTO_OL2TP;
- *	sax.pppol2tp.fd = tunnel_fd;	// bound UDP socket
- *	sax.pppol2tp.addr.sin_addr.s_addr = addr->sin_addr.s_addr;
- *	sax.pppol2tp.addr.sin_port = addr->sin_port;
- *	sax.pppol2tp.addr.sin_family = AF_INET;
- *	sax.pppol2tp.s_tunnel  = tunnel_id;
- *	sax.pppol2tp.s_session = session_id;
- *	sax.pppol2tp.d_tunnel  = peer_tunnel_id;
- *	sax.pppol2tp.d_session = peer_session_id;
- *
- *	session_fd = connect(fd, (struct sockaddr *)&sax, sizeof(sax));
- *
- * A pppd plugin that allows PPP traffic to be carried over L2TP using
- * this driver is available from the OpenL2TP project at
- * http://openl2tp.sourceforge.net.
- */
-
-#include <linux/module.h>
-#include <linux/string.h>
-#include <linux/list.h>
-#include <asm/uaccess.h>
-
-#include <linux/kernel.h>
-#include <linux/spinlock.h>
-#include <linux/kthread.h>
-#include <linux/sched.h>
-#include <linux/slab.h>
-#include <linux/errno.h>
-#include <linux/jiffies.h>
-
-#include <linux/netdevice.h>
-#include <linux/net.h>
-#include <linux/inetdevice.h>
-#include <linux/skbuff.h>
-#include <linux/init.h>
-#include <linux/ip.h>
-#include <linux/udp.h>
-#include <linux/if_pppox.h>
-#include <linux/if_pppol2tp.h>
-#include <net/sock.h>
-#include <linux/ppp_channel.h>
-#include <linux/ppp_defs.h>
-#include <linux/if_ppp.h>
-#include <linux/file.h>
-#include <linux/hash.h>
-#include <linux/sort.h>
-#include <linux/proc_fs.h>
-#include <linux/nsproxy.h>
-#include <net/net_namespace.h>
-#include <net/netns/generic.h>
-#include <net/dst.h>
-#include <net/ip.h>
-#include <net/udp.h>
-#include <net/xfrm.h>
-
-#include <asm/byteorder.h>
-#include <asm/atomic.h>
-
-
-#define PPPOL2TP_DRV_VERSION	"V1.0"
-
-/* L2TP header constants */
-#define L2TP_HDRFLAG_T	   0x8000
-#define L2TP_HDRFLAG_L	   0x4000
-#define L2TP_HDRFLAG_S	   0x0800
-#define L2TP_HDRFLAG_O	   0x0200
-#define L2TP_HDRFLAG_P	   0x0100
-
-#define L2TP_HDR_VER_MASK  0x000F
-#define L2TP_HDR_VER	   0x0002
-
-/* Space for UDP, L2TP and PPP headers */
-#define PPPOL2TP_HEADER_OVERHEAD	40
-
-/* Just some random numbers */
-#define L2TP_TUNNEL_MAGIC	0x42114DDA
-#define L2TP_SESSION_MAGIC	0x0C04EB7D
-
-#define PPPOL2TP_HASH_BITS	4
-#define PPPOL2TP_HASH_SIZE	(1 << PPPOL2TP_HASH_BITS)
-
-/* Default trace flags */
-#define PPPOL2TP_DEFAULT_DEBUG_FLAGS	0
-
-#define PRINTK(_mask, _type, _lvl, _fmt, args...)			\
-	do {								\
-		if ((_mask) & (_type))					\
-			printk(_lvl "PPPOL2TP: " _fmt, ##args);		\
-	} while(0)
-
-/* Number of bytes to build transmit L2TP headers.
- * Unfortunately the size is different depending on whether sequence numbers
- * are enabled.
- */
-#define PPPOL2TP_L2TP_HDR_SIZE_SEQ		10
-#define PPPOL2TP_L2TP_HDR_SIZE_NOSEQ		6
-
-struct pppol2tp_tunnel;
-
-/* Describes a session. It is the sk_user_data field in the PPPoL2TP
- * socket. Contains information to determine incoming packets and transmit
- * outgoing ones.
- */
-struct pppol2tp_session
-{
-	int			magic;		/* should be
-						 * L2TP_SESSION_MAGIC */
-	int			owner;		/* pid that opened the socket */
-
-	struct sock		*sock;		/* Pointer to the session
-						 * PPPoX socket */
-	struct sock		*tunnel_sock;	/* Pointer to the tunnel UDP
-						 * socket */
-
-	struct pppol2tp_addr	tunnel_addr;	/* Description of tunnel */
-
-	struct pppol2tp_tunnel	*tunnel;	/* back pointer to tunnel
-						 * context */
-
-	char			name[20];	/* "sess xxxxx/yyyyy", where
-						 * x=tunnel_id, y=session_id */
-	int			mtu;
-	int			mru;
-	int			flags;		/* accessed by PPPIOCGFLAGS.
-						 * Unused. */
-	unsigned		recv_seq:1;	/* expect receive packets with
-						 * sequence numbers? */
-	unsigned		send_seq:1;	/* send packets with sequence
-						 * numbers? */
-	unsigned		lns_mode:1;	/* behave as LNS? LAC enables
-						 * sequence numbers under
-						 * control of LNS. */
-	int			debug;		/* bitmask of debug message
-						 * categories */
-	int			reorder_timeout; /* configured reorder timeout
-						  * (in jiffies) */
-	u16			nr;		/* session NR state (receive) */
-	u16			ns;		/* session NR state (send) */
-	struct sk_buff_head	reorder_q;	/* receive reorder queue */
-	struct pppol2tp_ioc_stats stats;
-	struct hlist_node	hlist;		/* Hash list node */
-};
-
-/* The sk_user_data field of the tunnel's UDP socket. It contains info to track
- * all the associated sessions so incoming packets can be sorted out
- */
-struct pppol2tp_tunnel
-{
-	int			magic;		/* Should be L2TP_TUNNEL_MAGIC */
-	rwlock_t		hlist_lock;	/* protect session_hlist */
-	struct hlist_head	session_hlist[PPPOL2TP_HASH_SIZE];
-						/* hashed list of sessions,
-						 * hashed by id */
-	int			debug;		/* bitmask of debug message
-						 * categories */
-	char			name[12];	/* "tunl xxxxx" */
-	struct pppol2tp_ioc_stats stats;
-
-	void (*old_sk_destruct)(struct sock *);
-
-	struct sock		*sock;		/* Parent socket */
-	struct list_head	list;		/* Keep a list of all open
-						 * prepared sockets */
-	struct net		*pppol2tp_net;	/* the net we belong to */
-
-	atomic_t		ref_count;
-};
-
-/* Private data stored for received packets in the skb.
- */
-struct pppol2tp_skb_cb {
-	u16			ns;
-	u16			nr;
-	u16			has_seq;
-	u16			length;
-	unsigned long		expires;
-};
-
-#define PPPOL2TP_SKB_CB(skb)	((struct pppol2tp_skb_cb *) &skb->cb[sizeof(struct inet_skb_parm)])
-
-static int pppol2tp_xmit(struct ppp_channel *chan, struct sk_buff *skb);
-static void pppol2tp_tunnel_free(struct pppol2tp_tunnel *tunnel);
-
-static atomic_t pppol2tp_tunnel_count;
-static atomic_t pppol2tp_session_count;
-static struct ppp_channel_ops pppol2tp_chan_ops = { pppol2tp_xmit , NULL };
-static const struct proto_ops pppol2tp_ops;
-
-/* per-net private data for this module */
-static int pppol2tp_net_id __read_mostly;
-struct pppol2tp_net {
-	struct list_head pppol2tp_tunnel_list;
-	rwlock_t pppol2tp_tunnel_list_lock;
-};
-
-static inline struct pppol2tp_net *pppol2tp_pernet(struct net *net)
-{
-	BUG_ON(!net);
-
-	return net_generic(net, pppol2tp_net_id);
-}
-
-/* Helpers to obtain tunnel/session contexts from sockets.
- */
-static inline struct pppol2tp_session *pppol2tp_sock_to_session(struct sock *sk)
-{
-	struct pppol2tp_session *session;
-
-	if (sk == NULL)
-		return NULL;
-
-	sock_hold(sk);
-	session = (struct pppol2tp_session *)(sk->sk_user_data);
-	if (session == NULL) {
-		sock_put(sk);
-		goto out;
-	}
-
-	BUG_ON(session->magic != L2TP_SESSION_MAGIC);
-out:
-	return session;
-}
-
-static inline struct pppol2tp_tunnel *pppol2tp_sock_to_tunnel(struct sock *sk)
-{
-	struct pppol2tp_tunnel *tunnel;
-
-	if (sk == NULL)
-		return NULL;
-
-	sock_hold(sk);
-	tunnel = (struct pppol2tp_tunnel *)(sk->sk_user_data);
-	if (tunnel == NULL) {
-		sock_put(sk);
-		goto out;
-	}
-
-	BUG_ON(tunnel->magic != L2TP_TUNNEL_MAGIC);
-out:
-	return tunnel;
-}
-
-/* Tunnel reference counts. Incremented per session that is added to
- * the tunnel.
- */
-static inline void pppol2tp_tunnel_inc_refcount(struct pppol2tp_tunnel *tunnel)
-{
-	atomic_inc(&tunnel->ref_count);
-}
-
-static inline void pppol2tp_tunnel_dec_refcount(struct pppol2tp_tunnel *tunnel)
-{
-	if (atomic_dec_and_test(&tunnel->ref_count))
-		pppol2tp_tunnel_free(tunnel);
-}
-
-/* Session hash list.
- * The session_id SHOULD be random according to RFC2661, but several
- * L2TP implementations (Cisco and Microsoft) use incrementing
- * session_ids.  So we do a real hash on the session_id, rather than a
- * simple bitmask.
- */
-static inline struct hlist_head *
-pppol2tp_session_id_hash(struct pppol2tp_tunnel *tunnel, u16 session_id)
-{
-	unsigned long hash_val = (unsigned long) session_id;
-	return &tunnel->session_hlist[hash_long(hash_val, PPPOL2TP_HASH_BITS)];
-}
-
-/* Lookup a session by id
- */
-static struct pppol2tp_session *
-pppol2tp_session_find(struct pppol2tp_tunnel *tunnel, u16 session_id)
-{
-	struct hlist_head *session_list =
-		pppol2tp_session_id_hash(tunnel, session_id);
-	struct pppol2tp_session *session;
-	struct hlist_node *walk;
-
-	read_lock_bh(&tunnel->hlist_lock);
-	hlist_for_each_entry(session, walk, session_list, hlist) {
-		if (session->tunnel_addr.s_session == session_id) {
-			read_unlock_bh(&tunnel->hlist_lock);
-			return session;
-		}
-	}
-	read_unlock_bh(&tunnel->hlist_lock);
-
-	return NULL;
-}
-
-/* Lookup a tunnel by id
- */
-static struct pppol2tp_tunnel *pppol2tp_tunnel_find(struct net *net, u16 tunnel_id)
-{
-	struct pppol2tp_tunnel *tunnel;
-	struct pppol2tp_net *pn = pppol2tp_pernet(net);
-
-	read_lock_bh(&pn->pppol2tp_tunnel_list_lock);
-	list_for_each_entry(tunnel, &pn->pppol2tp_tunnel_list, list) {
-		if (tunnel->stats.tunnel_id == tunnel_id) {
-			read_unlock_bh(&pn->pppol2tp_tunnel_list_lock);
-			return tunnel;
-		}
-	}
-	read_unlock_bh(&pn->pppol2tp_tunnel_list_lock);
-
-	return NULL;
-}
-
-/*****************************************************************************
- * Receive data handling
- *****************************************************************************/
-
-/* Queue a skb in order. We come here only if the skb has an L2TP sequence
- * number.
- */
-static void pppol2tp_recv_queue_skb(struct pppol2tp_session *session, struct sk_buff *skb)
-{
-	struct sk_buff *skbp;
-	struct sk_buff *tmp;
-	u16 ns = PPPOL2TP_SKB_CB(skb)->ns;
-
-	spin_lock_bh(&session->reorder_q.lock);
-	skb_queue_walk_safe(&session->reorder_q, skbp, tmp) {
-		if (PPPOL2TP_SKB_CB(skbp)->ns > ns) {
-			__skb_queue_before(&session->reorder_q, skbp, skb);
-			PRINTK(session->debug, PPPOL2TP_MSG_SEQ, KERN_DEBUG,
-			       "%s: pkt %hu, inserted before %hu, reorder_q len=%d\n",
-			       session->name, ns, PPPOL2TP_SKB_CB(skbp)->ns,
-			       skb_queue_len(&session->reorder_q));
-			session->stats.rx_oos_packets++;
-			goto out;
-		}
-	}
-
-	__skb_queue_tail(&session->reorder_q, skb);
-
-out:
-	spin_unlock_bh(&session->reorder_q.lock);
-}
-
-/* Dequeue a single skb.
- */
-static void pppol2tp_recv_dequeue_skb(struct pppol2tp_session *session, struct sk_buff *skb)
-{
-	struct pppol2tp_tunnel *tunnel = session->tunnel;
-	int length = PPPOL2TP_SKB_CB(skb)->length;
-	struct sock *session_sock = NULL;
-
-	/* We're about to requeue the skb, so return resources
-	 * to its current owner (a socket receive buffer).
-	 */
-	skb_orphan(skb);
-
-	tunnel->stats.rx_packets++;
-	tunnel->stats.rx_bytes += length;
-	session->stats.rx_packets++;
-	session->stats.rx_bytes += length;
-
-	if (PPPOL2TP_SKB_CB(skb)->has_seq) {
-		/* Bump our Nr */
-		session->nr++;
-		PRINTK(session->debug, PPPOL2TP_MSG_SEQ, KERN_DEBUG,
-		       "%s: updated nr to %hu\n", session->name, session->nr);
-	}
-
-	/* If the socket is bound, send it in to PPP's input queue. Otherwise
-	 * queue it on the session socket.
-	 */
-	session_sock = session->sock;
-	if (session_sock->sk_state & PPPOX_BOUND) {
-		struct pppox_sock *po;
-		PRINTK(session->debug, PPPOL2TP_MSG_DATA, KERN_DEBUG,
-		       "%s: recv %d byte data frame, passing to ppp\n",
-		       session->name, length);
-
-		/* We need to forget all info related to the L2TP packet
-		 * gathered in the skb as we are going to reuse the same
-		 * skb for the inner packet.
-		 * Namely we need to:
-		 * - reset xfrm (IPSec) information as it applies to
-		 *   the outer L2TP packet and not to the inner one
-		 * - release the dst to force a route lookup on the inner
-		 *   IP packet since skb->dst currently points to the dst
-		 *   of the UDP tunnel
-		 * - reset netfilter information as it doesn't apply
-		 *   to the inner packet either
-		 */
-		secpath_reset(skb);
-		skb_dst_drop(skb);
-		nf_reset(skb);
-
-		po = pppox_sk(session_sock);
-		ppp_input(&po->chan, skb);
-	} else {
-		PRINTK(session->debug, PPPOL2TP_MSG_DATA, KERN_INFO,
-		       "%s: socket not bound\n", session->name);
-
-		/* Not bound. Nothing we can do, so discard. */
-		session->stats.rx_errors++;
-		kfree_skb(skb);
-	}
-
-	sock_put(session->sock);
-}
-
-/* Dequeue skbs from the session's reorder_q, subject to packet order.
- * Skbs that have been in the queue for too long are simply discarded.
- */
-static void pppol2tp_recv_dequeue(struct pppol2tp_session *session)
-{
-	struct sk_buff *skb;
-	struct sk_buff *tmp;
-
-	/* If the pkt at the head of the queue has the nr that we
-	 * expect to send up next, dequeue it and any other
-	 * in-sequence packets behind it.
-	 */
-	spin_lock_bh(&session->reorder_q.lock);
-	skb_queue_walk_safe(&session->reorder_q, skb, tmp) {
-		if (time_after(jiffies, PPPOL2TP_SKB_CB(skb)->expires)) {
-			session->stats.rx_seq_discards++;
-			session->stats.rx_errors++;
-			PRINTK(session->debug, PPPOL2TP_MSG_SEQ, KERN_DEBUG,
-			       "%s: oos pkt %hu len %d discarded (too old), "
-			       "waiting for %hu, reorder_q_len=%d\n",
-			       session->name, PPPOL2TP_SKB_CB(skb)->ns,
-			       PPPOL2TP_SKB_CB(skb)->length, session->nr,
-			       skb_queue_len(&session->reorder_q));
-			__skb_unlink(skb, &session->reorder_q);
-			kfree_skb(skb);
-			sock_put(session->sock);
-			continue;
-		}
-
-		if (PPPOL2TP_SKB_CB(skb)->has_seq) {
-			if (PPPOL2TP_SKB_CB(skb)->ns != session->nr) {
-				PRINTK(session->debug, PPPOL2TP_MSG_SEQ, KERN_DEBUG,
-				       "%s: holding oos pkt %hu len %d, "
-				       "waiting for %hu, reorder_q_len=%d\n",
-				       session->name, PPPOL2TP_SKB_CB(skb)->ns,
-				       PPPOL2TP_SKB_CB(skb)->length, session->nr,
-				       skb_queue_len(&session->reorder_q));
-				goto out;
-			}
-		}
-		__skb_unlink(skb, &session->reorder_q);
-
-		/* Process the skb. We release the queue lock while we
-		 * do so to let other contexts process the queue.
-		 */
-		spin_unlock_bh(&session->reorder_q.lock);
-		pppol2tp_recv_dequeue_skb(session, skb);
-		spin_lock_bh(&session->reorder_q.lock);
-	}
-
-out:
-	spin_unlock_bh(&session->reorder_q.lock);
-}
-
-static inline int pppol2tp_verify_udp_checksum(struct sock *sk,
-					       struct sk_buff *skb)
-{
-	struct udphdr *uh = udp_hdr(skb);
-	u16 ulen = ntohs(uh->len);
-	struct inet_sock *inet;
-	__wsum psum;
-
-	if (sk->sk_no_check || skb_csum_unnecessary(skb) || !uh->check)
-		return 0;
-
-	inet = inet_sk(sk);
-	psum = csum_tcpudp_nofold(inet->inet_saddr, inet->inet_daddr, ulen,
-				  IPPROTO_UDP, 0);
-
-	if ((skb->ip_summed == CHECKSUM_COMPLETE) &&
-	    !csum_fold(csum_add(psum, skb->csum)))
-		return 0;
-
-	skb->csum = psum;
-
-	return __skb_checksum_complete(skb);
-}
-
-/* Internal receive frame. Do the real work of receiving an L2TP data frame
- * here. The skb is not on a list when we get here.
- * Returns 0 if the packet was a data packet and was successfully passed on.
- * Returns 1 if the packet was not a good data packet and could not be
- * forwarded.  All such packets are passed up to userspace to deal with.
- */
-static int pppol2tp_recv_core(struct sock *sock, struct sk_buff *skb)
-{
-	struct pppol2tp_session *session = NULL;
-	struct pppol2tp_tunnel *tunnel;
-	unsigned char *ptr, *optr;
-	u16 hdrflags;
-	u16 tunnel_id, session_id;
-	int length;
-	int offset;
-
-	tunnel = pppol2tp_sock_to_tunnel(sock);
-	if (tunnel == NULL)
-		goto no_tunnel;
-
-	if (tunnel->sock && pppol2tp_verify_udp_checksum(tunnel->sock, skb))
-		goto discard_bad_csum;
-
-	/* UDP always verifies the packet length. */
-	__skb_pull(skb, sizeof(struct udphdr));
-
-	/* Short packet? */
-	if (!pskb_may_pull(skb, 12)) {
-		PRINTK(tunnel->debug, PPPOL2TP_MSG_DATA, KERN_INFO,
-		       "%s: recv short packet (len=%d)\n", tunnel->name, skb->len);
-		goto error;
-	}
-
-	/* Point to L2TP header */
-	optr = ptr = skb->data;
-
-	/* Get L2TP header flags */
-	hdrflags = ntohs(*(__be16*)ptr);
-
-	/* Trace packet contents, if enabled */
-	if (tunnel->debug & PPPOL2TP_MSG_DATA) {
-		length = min(16u, skb->len);
-		if (!pskb_may_pull(skb, length))
-			goto error;
-
-		printk(KERN_DEBUG "%s: recv: ", tunnel->name);
-
-		offset = 0;
-		do {
-			printk(" %02X", ptr[offset]);
-		} while (++offset < length);
-
-		printk("\n");
-	}
-
-	/* Get length of L2TP packet */
-	length = skb->len;
-
-	/* If type is control packet, it is handled by userspace. */
-	if (hdrflags & L2TP_HDRFLAG_T) {
-		PRINTK(tunnel->debug, PPPOL2TP_MSG_DATA, KERN_DEBUG,
-		       "%s: recv control packet, len=%d\n", tunnel->name, length);
-		goto error;
-	}
-
-	/* Skip flags */
-	ptr += 2;
-
-	/* If length is present, skip it */
-	if (hdrflags & L2TP_HDRFLAG_L)
-		ptr += 2;
-
-	/* Extract tunnel and session ID */
-	tunnel_id = ntohs(*(__be16 *) ptr);
-	ptr += 2;
-	session_id = ntohs(*(__be16 *) ptr);
-	ptr += 2;
-
-	/* Find the session context */
-	session = pppol2tp_session_find(tunnel, session_id);
-	if (!session) {
-		/* Not found? Pass to userspace to deal with */
-		PRINTK(tunnel->debug, PPPOL2TP_MSG_DATA, KERN_INFO,
-		       "%s: no socket found (%hu/%hu). Passing up.\n",
-		       tunnel->name, tunnel_id, session_id);
-		goto error;
-	}
-	sock_hold(session->sock);
-
-	/* The ref count on the socket was increased by the above call since
-	 * we now hold a pointer to the session. Take care to do sock_put()
-	 * when exiting this function from now on...
-	 */
-
-	/* Handle the optional sequence numbers.  If we are the LAC,
-	 * enable/disable sequence numbers under the control of the LNS.  If
-	 * no sequence numbers present but we were expecting them, discard
-	 * frame.
-	 */
-	if (hdrflags & L2TP_HDRFLAG_S) {
-		u16 ns, nr;
-		ns = ntohs(*(__be16 *) ptr);
-		ptr += 2;
-		nr = ntohs(*(__be16 *) ptr);
-		ptr += 2;
-
-		/* Received a packet with sequence numbers. If we're the LNS,
-		 * check if we sre sending sequence numbers and if not,
-		 * configure it so.
-		 */
-		if ((!session->lns_mode) && (!session->send_seq)) {
-			PRINTK(session->debug, PPPOL2TP_MSG_SEQ, KERN_INFO,
-			       "%s: requested to enable seq numbers by LNS\n",
-			       session->name);
-			session->send_seq = -1;
-		}
-
-		/* Store L2TP info in the skb */
-		PPPOL2TP_SKB_CB(skb)->ns = ns;
-		PPPOL2TP_SKB_CB(skb)->nr = nr;
-		PPPOL2TP_SKB_CB(skb)->has_seq = 1;
-
-		PRINTK(session->debug, PPPOL2TP_MSG_SEQ, KERN_DEBUG,
-		       "%s: recv data ns=%hu, nr=%hu, session nr=%hu\n",
-		       session->name, ns, nr, session->nr);
-	} else {
-		/* No sequence numbers.
-		 * If user has configured mandatory sequence numbers, discard.
-		 */
-		if (session->recv_seq) {
-			PRINTK(session->debug, PPPOL2TP_MSG_SEQ, KERN_WARNING,
-			       "%s: recv data has no seq numbers when required. "
-			       "Discarding\n", session->name);
-			session->stats.rx_seq_discards++;
-			goto discard;
-		}
-
-		/* If we're the LAC and we're sending sequence numbers, the
-		 * LNS has requested that we no longer send sequence numbers.
-		 * If we're the LNS and we're sending sequence numbers, the
-		 * LAC is broken. Discard the frame.
-		 */
-		if ((!session->lns_mode) && (session->send_seq)) {
-			PRINTK(session->debug, PPPOL2TP_MSG_SEQ, KERN_INFO,
-			       "%s: requested to disable seq numbers by LNS\n",
-			       session->name);
-			session->send_seq = 0;
-		} else if (session->send_seq) {
-			PRINTK(session->debug, PPPOL2TP_MSG_SEQ, KERN_WARNING,
-			       "%s: recv data has no seq numbers when required. "
-			       "Discarding\n", session->name);
-			session->stats.rx_seq_discards++;
-			goto discard;
-		}
-
-		/* Store L2TP info in the skb */
-		PPPOL2TP_SKB_CB(skb)->has_seq = 0;
-	}
-
-	/* If offset bit set, skip it. */
-	if (hdrflags & L2TP_HDRFLAG_O) {
-		offset = ntohs(*(__be16 *)ptr);
-		ptr += 2 + offset;
-	}
-
-	offset = ptr - optr;
-	if (!pskb_may_pull(skb, offset))
-		goto discard;
-
-	__skb_pull(skb, offset);
-
-	/* Skip PPP header, if present.	 In testing, Microsoft L2TP clients
-	 * don't send the PPP header (PPP header compression enabled), but
-	 * other clients can include the header. So we cope with both cases
-	 * here. The PPP header is always FF03 when using L2TP.
-	 *
-	 * Note that skb->data[] isn't dereferenced from a u16 ptr here since
-	 * the field may be unaligned.
-	 */
-	if (!pskb_may_pull(skb, 2))
-		goto discard;
-
-	if ((skb->data[0] == 0xff) && (skb->data[1] == 0x03))
-		skb_pull(skb, 2);
-
-	/* Prepare skb for adding to the session's reorder_q.  Hold
-	 * packets for max reorder_timeout or 1 second if not
-	 * reordering.
-	 */
-	PPPOL2TP_SKB_CB(skb)->length = length;
-	PPPOL2TP_SKB_CB(skb)->expires = jiffies +
-		(session->reorder_timeout ? session->reorder_timeout : HZ);
-
-	/* Add packet to the session's receive queue. Reordering is done here, if
-	 * enabled. Saved L2TP protocol info is stored in skb->sb[].
-	 */
-	if (PPPOL2TP_SKB_CB(skb)->has_seq) {
-		if (session->reorder_timeout != 0) {
-			/* Packet reordering enabled. Add skb to session's
-			 * reorder queue, in order of ns.
-			 */
-			pppol2tp_recv_queue_skb(session, skb);
-		} else {
-			/* Packet reordering disabled. Discard out-of-sequence
-			 * packets
-			 */
-			if (PPPOL2TP_SKB_CB(skb)->ns != session->nr) {
-				session->stats.rx_seq_discards++;
-				PRINTK(session->debug, PPPOL2TP_MSG_SEQ, KERN_DEBUG,
-				       "%s: oos pkt %hu len %d discarded, "
-				       "waiting for %hu, reorder_q_len=%d\n",
-				       session->name, PPPOL2TP_SKB_CB(skb)->ns,
-				       PPPOL2TP_SKB_CB(skb)->length, session->nr,
-				       skb_queue_len(&session->reorder_q));
-				goto discard;
-			}
-			skb_queue_tail(&session->reorder_q, skb);
-		}
-	} else {
-		/* No sequence numbers. Add the skb to the tail of the
-		 * reorder queue. This ensures that it will be
-		 * delivered after all previous sequenced skbs.
-		 */
-		skb_queue_tail(&session->reorder_q, skb);
-	}
-
-	/* Try to dequeue as many skbs from reorder_q as we can. */
-	pppol2tp_recv_dequeue(session);
-	sock_put(sock);
-
-	return 0;
-
-discard:
-	session->stats.rx_errors++;
-	kfree_skb(skb);
-	sock_put(session->sock);
-	sock_put(sock);
-
-	return 0;
-
-discard_bad_csum:
-	LIMIT_NETDEBUG("%s: UDP: bad checksum\n", tunnel->name);
-	UDP_INC_STATS_USER(&init_net, UDP_MIB_INERRORS, 0);
-	tunnel->stats.rx_errors++;
-	kfree_skb(skb);
-	sock_put(sock);
-
-	return 0;
-
-error:
-	/* Put UDP header back */
-	__skb_push(skb, sizeof(struct udphdr));
-	sock_put(sock);
-
-no_tunnel:
-	return 1;
-}
-
-/* UDP encapsulation receive handler. See net/ipv4/udp.c.
- * Return codes:
- * 0 : success.
- * <0: error
- * >0: skb should be passed up to userspace as UDP.
- */
-static int pppol2tp_udp_encap_recv(struct sock *sk, struct sk_buff *skb)
-{
-	struct pppol2tp_tunnel *tunnel;
-
-	tunnel = pppol2tp_sock_to_tunnel(sk);
-	if (tunnel == NULL)
-		goto pass_up;
-
-	PRINTK(tunnel->debug, PPPOL2TP_MSG_DATA, KERN_DEBUG,
-	       "%s: received %d bytes\n", tunnel->name, skb->len);
-
-	if (pppol2tp_recv_core(sk, skb))
-		goto pass_up_put;
-
-	sock_put(sk);
-	return 0;
-
-pass_up_put:
-	sock_put(sk);
-pass_up:
-	return 1;
-}
-
-/* Receive message. This is the recvmsg for the PPPoL2TP socket.
- */
-static int pppol2tp_recvmsg(struct kiocb *iocb, struct socket *sock,
-			    struct msghdr *msg, size_t len,
-			    int flags)
-{
-	int err;
-	struct sk_buff *skb;
-	struct sock *sk = sock->sk;
-
-	err = -EIO;
-	if (sk->sk_state & PPPOX_BOUND)
-		goto end;
-
-	msg->msg_namelen = 0;
-
-	err = 0;
-	skb = skb_recv_datagram(sk, flags & ~MSG_DONTWAIT,
-				flags & MSG_DONTWAIT, &err);
-	if (!skb)
-		goto end;
-
-	if (len > skb->len)
-		len = skb->len;
-	else if (len < skb->len)
-		msg->msg_flags |= MSG_TRUNC;
-
-	err = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, len);
-	if (likely(err == 0))
-		err = len;
-
-	kfree_skb(skb);
-end:
-	return err;
-}
-
-/************************************************************************
- * Transmit handling
- ***********************************************************************/
-
-/* Tell how big L2TP headers are for a particular session. This
- * depends on whether sequence numbers are being used.
- */
-static inline int pppol2tp_l2tp_header_len(struct pppol2tp_session *session)
-{
-	if (session->send_seq)
-		return PPPOL2TP_L2TP_HDR_SIZE_SEQ;
-
-	return PPPOL2TP_L2TP_HDR_SIZE_NOSEQ;
-}
-
-/* Build an L2TP header for the session into the buffer provided.
- */
-static void pppol2tp_build_l2tp_header(struct pppol2tp_session *session,
-				       void *buf)
-{
-	__be16 *bufp = buf;
-	u16 flags = L2TP_HDR_VER;
-
-	if (session->send_seq)
-		flags |= L2TP_HDRFLAG_S;
-
-	/* Setup L2TP header.
-	 * FIXME: Can this ever be unaligned? Is direct dereferencing of
-	 * 16-bit header fields safe here for all architectures?
-	 */
-	*bufp++ = htons(flags);
-	*bufp++ = htons(session->tunnel_addr.d_tunnel);
-	*bufp++ = htons(session->tunnel_addr.d_session);
-	if (session->send_seq) {
-		*bufp++ = htons(session->ns);
-		*bufp++ = 0;
-		session->ns++;
-		PRINTK(session->debug, PPPOL2TP_MSG_SEQ, KERN_DEBUG,
-		       "%s: updated ns to %hu\n", session->name, session->ns);
-	}
-}
-
-/* This is the sendmsg for the PPPoL2TP pppol2tp_session socket.  We come here
- * when a user application does a sendmsg() on the session socket. L2TP and
- * PPP headers must be inserted into the user's data.
- */
-static int pppol2tp_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *m,
-			    size_t total_len)
-{
-	static const unsigned char ppph[2] = { 0xff, 0x03 };
-	struct sock *sk = sock->sk;
-	struct inet_sock *inet;
-	__wsum csum;
-	struct sk_buff *skb;
-	int error;
-	int hdr_len;
-	struct pppol2tp_session *session;
-	struct pppol2tp_tunnel *tunnel;
-	struct udphdr *uh;
-	unsigned int len;
-	struct sock *sk_tun;
-	u16 udp_len;
-
-	error = -ENOTCONN;
-	if (sock_flag(sk, SOCK_DEAD) || !(sk->sk_state & PPPOX_CONNECTED))
-		goto error;
-
-	/* Get session and tunnel contexts */
-	error = -EBADF;
-	session = pppol2tp_sock_to_session(sk);
-	if (session == NULL)
-		goto error;
-
-	sk_tun = session->tunnel_sock;
-	tunnel = pppol2tp_sock_to_tunnel(sk_tun);
-	if (tunnel == NULL)
-		goto error_put_sess;
-
-	/* What header length is configured for this session? */
-	hdr_len = pppol2tp_l2tp_header_len(session);
-
-	/* Allocate a socket buffer */
-	error = -ENOMEM;
-	skb = sock_wmalloc(sk, NET_SKB_PAD + sizeof(struct iphdr) +
-			   sizeof(struct udphdr) + hdr_len +
-			   sizeof(ppph) + total_len,
-			   0, GFP_KERNEL);
-	if (!skb)
-		goto error_put_sess_tun;
-
-	/* Reserve space for headers. */
-	skb_reserve(skb, NET_SKB_PAD);
-	skb_reset_network_header(skb);
-	skb_reserve(skb, sizeof(struct iphdr));
-	skb_reset_transport_header(skb);
-
-	/* Build UDP header */
-	inet = inet_sk(sk_tun);
-	udp_len = hdr_len + sizeof(ppph) + total_len;
-	uh = (struct udphdr *) skb->data;
-	uh->source = inet->inet_sport;
-	uh->dest = inet->inet_dport;
-	uh->len = htons(udp_len);
-	uh->check = 0;
-	skb_put(skb, sizeof(struct udphdr));
-
-	/* Build L2TP header */
-	pppol2tp_build_l2tp_header(session, skb->data);
-	skb_put(skb, hdr_len);
-
-	/* Add PPP header */
-	skb->data[0] = ppph[0];
-	skb->data[1] = ppph[1];
-	skb_put(skb, 2);
-
-	/* Copy user data into skb */
-	error = memcpy_fromiovec(skb->data, m->msg_iov, total_len);
-	if (error < 0) {
-		kfree_skb(skb);
-		goto error_put_sess_tun;
-	}
-	skb_put(skb, total_len);
-
-	/* Calculate UDP checksum if configured to do so */
-	if (sk_tun->sk_no_check == UDP_CSUM_NOXMIT)
-		skb->ip_summed = CHECKSUM_NONE;
-	else if (!(skb_dst(skb)->dev->features & NETIF_F_V4_CSUM)) {
-		skb->ip_summed = CHECKSUM_COMPLETE;
-		csum = skb_checksum(skb, 0, udp_len, 0);
-		uh->check = csum_tcpudp_magic(inet->inet_saddr,
-					      inet->inet_daddr,
-					      udp_len, IPPROTO_UDP, csum);
-		if (uh->check == 0)
-			uh->check = CSUM_MANGLED_0;
-	} else {
-		skb->ip_summed = CHECKSUM_PARTIAL;
-		skb->csum_start = skb_transport_header(skb) - skb->head;
-		skb->csum_offset = offsetof(struct udphdr, check);
-		uh->check = ~csum_tcpudp_magic(inet->inet_saddr,
-					       inet->inet_daddr,
-					       udp_len, IPPROTO_UDP, 0);
-	}
-
-	/* Debug */
-	if (session->send_seq)
-		PRINTK(session->debug, PPPOL2TP_MSG_DATA, KERN_DEBUG,
-		       "%s: send %Zd bytes, ns=%hu\n", session->name,
-		       total_len, session->ns - 1);
-	else
-		PRINTK(session->debug, PPPOL2TP_MSG_DATA, KERN_DEBUG,
-		       "%s: send %Zd bytes\n", session->name, total_len);
-
-	if (session->debug & PPPOL2TP_MSG_DATA) {
-		int i;
-		unsigned char *datap = skb->data;
-
-		printk(KERN_DEBUG "%s: xmit:", session->name);
-		for (i = 0; i < total_len; i++) {
-			printk(" %02X", *datap++);
-			if (i == 15) {
-				printk(" ...");
-				break;
-			}
-		}
-		printk("\n");
-	}
-
-	/* Queue the packet to IP for output */
-	len = skb->len;
-	error = ip_queue_xmit(skb, 1);
-
-	/* Update stats */
-	if (error >= 0) {
-		tunnel->stats.tx_packets++;
-		tunnel->stats.tx_bytes += len;
-		session->stats.tx_packets++;
-		session->stats.tx_bytes += len;
-	} else {
-		tunnel->stats.tx_errors++;
-		session->stats.tx_errors++;
-	}
-
-	return error;
-
-error_put_sess_tun:
-	sock_put(session->tunnel_sock);
-error_put_sess:
-	sock_put(sk);
-error:
-	return error;
-}
-
-/* Automatically called when the skb is freed.
- */
-static void pppol2tp_sock_wfree(struct sk_buff *skb)
-{
-	sock_put(skb->sk);
-}
-
-/* For data skbs that we transmit, we associate with the tunnel socket
- * but don't do accounting.
- */
-static inline void pppol2tp_skb_set_owner_w(struct sk_buff *skb, struct sock *sk)
-{
-	sock_hold(sk);
-	skb->sk = sk;
-	skb->destructor = pppol2tp_sock_wfree;
-}
-
-/* Transmit function called by generic PPP driver.  Sends PPP frame
- * over PPPoL2TP socket.
- *
- * This is almost the same as pppol2tp_sendmsg(), but rather than
- * being called with a msghdr from userspace, it is called with a skb
- * from the kernel.
- *
- * The supplied skb from ppp doesn't have enough headroom for the
- * insertion of L2TP, UDP and IP headers so we need to allocate more
- * headroom in the skb. This will create a cloned skb. But we must be
- * careful in the error case because the caller will expect to free
- * the skb it supplied, not our cloned skb. So we take care to always
- * leave the original skb unfreed if we return an error.
- */
-static int pppol2tp_xmit(struct ppp_channel *chan, struct sk_buff *skb)
-{
-	static const u8 ppph[2] = { 0xff, 0x03 };
-	struct sock *sk = (struct sock *) chan->private;
-	struct sock *sk_tun;
-	int hdr_len;
-	u16 udp_len;
-	struct pppol2tp_session *session;
-	struct pppol2tp_tunnel *tunnel;
-	int rc;
-	int headroom;
-	int data_len = skb->len;
-	struct inet_sock *inet;
-	__wsum csum;
-	struct udphdr *uh;
-	unsigned int len;
-	int old_headroom;
-	int new_headroom;
-
-	if (sock_flag(sk, SOCK_DEAD) || !(sk->sk_state & PPPOX_CONNECTED))
-		goto abort;
-
-	/* Get session and tunnel contexts from the socket */
-	session = pppol2tp_sock_to_session(sk);
-	if (session == NULL)
-		goto abort;
-
-	sk_tun = session->tunnel_sock;
-	if (sk_tun == NULL)
-		goto abort_put_sess;
-	tunnel = pppol2tp_sock_to_tunnel(sk_tun);
-	if (tunnel == NULL)
-		goto abort_put_sess;
-
-	/* What header length is configured for this session? */
-	hdr_len = pppol2tp_l2tp_header_len(session);
-
-	/* Check that there's enough headroom in the skb to insert IP,
-	 * UDP and L2TP and PPP headers. If not enough, expand it to
-	 * make room. Adjust truesize.
-	 */
-	headroom = NET_SKB_PAD + sizeof(struct iphdr) +
-		sizeof(struct udphdr) + hdr_len + sizeof(ppph);
-	old_headroom = skb_headroom(skb);
-	if (skb_cow_head(skb, headroom))
-		goto abort_put_sess_tun;
-
-	new_headroom = skb_headroom(skb);
-	skb_orphan(skb);
-	skb->truesize += new_headroom - old_headroom;
-
-	/* Setup PPP header */
-	__skb_push(skb, sizeof(ppph));
-	skb->data[0] = ppph[0];
-	skb->data[1] = ppph[1];
-
-	/* Setup L2TP header */
-	pppol2tp_build_l2tp_header(session, __skb_push(skb, hdr_len));
-
-	udp_len = sizeof(struct udphdr) + hdr_len + sizeof(ppph) + data_len;
-
-	/* Setup UDP header */
-	inet = inet_sk(sk_tun);
-	__skb_push(skb, sizeof(*uh));
-	skb_reset_transport_header(skb);
-	uh = udp_hdr(skb);
-	uh->source = inet->inet_sport;
-	uh->dest = inet->inet_dport;
-	uh->len = htons(udp_len);
-	uh->check = 0;
-
-	/* Debug */
-	if (session->send_seq)
-		PRINTK(session->debug, PPPOL2TP_MSG_DATA, KERN_DEBUG,
-		       "%s: send %d bytes, ns=%hu\n", session->name,
-		       data_len, session->ns - 1);
-	else
-		PRINTK(session->debug, PPPOL2TP_MSG_DATA, KERN_DEBUG,
-		       "%s: send %d bytes\n", session->name, data_len);
-
-	if (session->debug & PPPOL2TP_MSG_DATA) {
-		int i;
-		unsigned char *datap = skb->data;
-
-		printk(KERN_DEBUG "%s: xmit:", session->name);
-		for (i = 0; i < data_len; i++) {
-			printk(" %02X", *datap++);
-			if (i == 31) {
-				printk(" ...");
-				break;
-			}
-		}
-		printk("\n");
-	}
-
-	memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
-	IPCB(skb)->flags &= ~(IPSKB_XFRM_TUNNEL_SIZE | IPSKB_XFRM_TRANSFORMED |
-			      IPSKB_REROUTED);
-	nf_reset(skb);
-
-	/* Get routing info from the tunnel socket */
-	skb_dst_drop(skb);
-	skb_dst_set(skb, dst_clone(__sk_dst_get(sk_tun)));
-	pppol2tp_skb_set_owner_w(skb, sk_tun);
-
-	/* Calculate UDP checksum if configured to do so */
-	if (sk_tun->sk_no_check == UDP_CSUM_NOXMIT)
-		skb->ip_summed = CHECKSUM_NONE;
-	else if ((skb_dst(skb) && skb_dst(skb)->dev) &&
-		 (!(skb_dst(skb)->dev->features & NETIF_F_V4_CSUM))) {
-		skb->ip_summed = CHECKSUM_COMPLETE;
-		csum = skb_checksum(skb, 0, udp_len, 0);
-		uh->check = csum_tcpudp_magic(inet->inet_saddr,
-					      inet->inet_daddr,
-					      udp_len, IPPROTO_UDP, csum);
-		if (uh->check == 0)
-			uh->check = CSUM_MANGLED_0;
-	} else {
-		skb->ip_summed = CHECKSUM_PARTIAL;
-		skb->csum_start = skb_transport_header(skb) - skb->head;
-		skb->csum_offset = offsetof(struct udphdr, check);
-		uh->check = ~csum_tcpudp_magic(inet->inet_saddr,
-					       inet->inet_daddr,
-					       udp_len, IPPROTO_UDP, 0);
-	}
-
-	/* Queue the packet to IP for output */
-	len = skb->len;
-	rc = ip_queue_xmit(skb, 1);
-
-	/* Update stats */
-	if (rc >= 0) {
-		tunnel->stats.tx_packets++;
-		tunnel->stats.tx_bytes += len;
-		session->stats.tx_packets++;
-		session->stats.tx_bytes += len;
-	} else {
-		tunnel->stats.tx_errors++;
-		session->stats.tx_errors++;
-	}
-
-	sock_put(sk_tun);
-	sock_put(sk);
-	return 1;
-
-abort_put_sess_tun:
-	sock_put(sk_tun);
-abort_put_sess:
-	sock_put(sk);
-abort:
-	/* Free the original skb */
-	kfree_skb(skb);
-	return 1;
-}
-
-/*****************************************************************************
- * Session (and tunnel control) socket create/destroy.
- *****************************************************************************/
-
-/* When the tunnel UDP socket is closed, all the attached sockets need to go
- * too.
- */
-static void pppol2tp_tunnel_closeall(struct pppol2tp_tunnel *tunnel)
-{
-	int hash;
-	struct hlist_node *walk;
-	struct hlist_node *tmp;
-	struct pppol2tp_session *session;
-	struct sock *sk;
-
-	BUG_ON(tunnel == NULL);
-
-	PRINTK(tunnel->debug, PPPOL2TP_MSG_CONTROL, KERN_INFO,
-	       "%s: closing all sessions...\n", tunnel->name);
-
-	write_lock_bh(&tunnel->hlist_lock);
-	for (hash = 0; hash < PPPOL2TP_HASH_SIZE; hash++) {
-again:
-		hlist_for_each_safe(walk, tmp, &tunnel->session_hlist[hash]) {
-			struct sk_buff *skb;
-
-			session = hlist_entry(walk, struct pppol2tp_session, hlist);
-
-			sk = session->sock;
-
-			PRINTK(session->debug, PPPOL2TP_MSG_CONTROL, KERN_INFO,
-			       "%s: closing session\n", session->name);
-
-			hlist_del_init(&session->hlist);
-
-			/* Since we should hold the sock lock while
-			 * doing any unbinding, we need to release the
-			 * lock we're holding before taking that lock.
-			 * Hold a reference to the sock so it doesn't
-			 * disappear as we're jumping between locks.
-			 */
-			sock_hold(sk);
-			write_unlock_bh(&tunnel->hlist_lock);
-			lock_sock(sk);
-
-			if (sk->sk_state & (PPPOX_CONNECTED | PPPOX_BOUND)) {
-				pppox_unbind_sock(sk);
-				sk->sk_state = PPPOX_DEAD;
-				sk->sk_state_change(sk);
-			}
-
-			/* Purge any queued data */
-			skb_queue_purge(&sk->sk_receive_queue);
-			skb_queue_purge(&sk->sk_write_queue);
-			while ((skb = skb_dequeue(&session->reorder_q))) {
-				kfree_skb(skb);
-				sock_put(sk);
-			}
-
-			release_sock(sk);
-			sock_put(sk);
-
-			/* Now restart from the beginning of this hash
-			 * chain.  We always remove a session from the
-			 * list so we are guaranteed to make forward
-			 * progress.
-			 */
-			write_lock_bh(&tunnel->hlist_lock);
-			goto again;
-		}
-	}
-	write_unlock_bh(&tunnel->hlist_lock);
-}
-
-/* Really kill the tunnel.
- * Come here only when all sessions have been cleared from the tunnel.
- */
-static void pppol2tp_tunnel_free(struct pppol2tp_tunnel *tunnel)
-{
-	struct pppol2tp_net *pn = pppol2tp_pernet(tunnel->pppol2tp_net);
-
-	/* Remove from socket list */
-	write_lock_bh(&pn->pppol2tp_tunnel_list_lock);
-	list_del_init(&tunnel->list);
-	write_unlock_bh(&pn->pppol2tp_tunnel_list_lock);
-
-	atomic_dec(&pppol2tp_tunnel_count);
-	kfree(tunnel);
-}
-
-/* Tunnel UDP socket destruct hook.
- * The tunnel context is deleted only when all session sockets have been
- * closed.
- */
-static void pppol2tp_tunnel_destruct(struct sock *sk)
-{
-	struct pppol2tp_tunnel *tunnel;
-
-	tunnel = sk->sk_user_data;
-	if (tunnel == NULL)
-		goto end;
-
-	PRINTK(tunnel->debug, PPPOL2TP_MSG_CONTROL, KERN_INFO,
-	       "%s: closing...\n", tunnel->name);
-
-	/* Close all sessions */
-	pppol2tp_tunnel_closeall(tunnel);
-
-	/* No longer an encapsulation socket. See net/ipv4/udp.c */
-	(udp_sk(sk))->encap_type = 0;
-	(udp_sk(sk))->encap_rcv = NULL;
-
-	/* Remove hooks into tunnel socket */
-	tunnel->sock = NULL;
-	sk->sk_destruct = tunnel->old_sk_destruct;
-	sk->sk_user_data = NULL;
-
-	/* Call original (UDP) socket descructor */
-	if (sk->sk_destruct != NULL)
-		(*sk->sk_destruct)(sk);
-
-	pppol2tp_tunnel_dec_refcount(tunnel);
-
-end:
-	return;
-}
-
-/* Really kill the session socket. (Called from sock_put() if
- * refcnt == 0.)
- */
-static void pppol2tp_session_destruct(struct sock *sk)
-{
-	struct pppol2tp_session *session = NULL;
-
-	if (sk->sk_user_data != NULL) {
-		struct pppol2tp_tunnel *tunnel;
-
-		session = sk->sk_user_data;
-		if (session == NULL)
-			goto out;
-
-		BUG_ON(session->magic != L2TP_SESSION_MAGIC);
-
-		/* Don't use pppol2tp_sock_to_tunnel() here to
-		 * get the tunnel context because the tunnel
-		 * socket might have already been closed (its
-		 * sk->sk_user_data will be NULL) so use the
-		 * session's private tunnel ptr instead.
-		 */
-		tunnel = session->tunnel;
-		if (tunnel != NULL) {
-			BUG_ON(tunnel->magic != L2TP_TUNNEL_MAGIC);
-
-			/* If session_id is zero, this is a null
-			 * session context, which was created for a
-			 * socket that is being used only to manage
-			 * tunnels.
-			 */
-			if (session->tunnel_addr.s_session != 0) {
-				/* Delete the session socket from the
-				 * hash
-				 */
-				write_lock_bh(&tunnel->hlist_lock);
-				hlist_del_init(&session->hlist);
-				write_unlock_bh(&tunnel->hlist_lock);
-
-				atomic_dec(&pppol2tp_session_count);
-			}
-
-			/* This will delete the tunnel context if this
-			 * is the last session on the tunnel.
-			 */
-			session->tunnel = NULL;
-			session->tunnel_sock = NULL;
-			pppol2tp_tunnel_dec_refcount(tunnel);
-		}
-	}
-
-	kfree(session);
-out:
-	return;
-}
-
-/* Called when the PPPoX socket (session) is closed.
- */
-static int pppol2tp_release(struct socket *sock)
-{
-	struct sock *sk = sock->sk;
-	struct pppol2tp_session *session;
-	int error;
-
-	if (!sk)
-		return 0;
-
-	error = -EBADF;
-	lock_sock(sk);
-	if (sock_flag(sk, SOCK_DEAD) != 0)
-		goto error;
-
-	pppox_unbind_sock(sk);
-
-	/* Signal the death of the socket. */
-	sk->sk_state = PPPOX_DEAD;
-	sock_orphan(sk);
-	sock->sk = NULL;
-
-	session = pppol2tp_sock_to_session(sk);
-
-	/* Purge any queued data */
-	skb_queue_purge(&sk->sk_receive_queue);
-	skb_queue_purge(&sk->sk_write_queue);
-	if (session != NULL) {
-		struct sk_buff *skb;
-		while ((skb = skb_dequeue(&session->reorder_q))) {
-			kfree_skb(skb);
-			sock_put(sk);
-		}
-		sock_put(sk);
-	}
-
-	release_sock(sk);
-
-	/* This will delete the session context via
-	 * pppol2tp_session_destruct() if the socket's refcnt drops to
-	 * zero.
-	 */
-	sock_put(sk);
-
-	return 0;
-
-error:
-	release_sock(sk);
-	return error;
-}
-
-/* Internal function to prepare a tunnel (UDP) socket to have PPPoX
- * sockets attached to it.
- */
-static struct sock *pppol2tp_prepare_tunnel_socket(struct net *net,
-					int fd, u16 tunnel_id, int *error)
-{
-	int err;
-	struct socket *sock = NULL;
-	struct sock *sk;
-	struct pppol2tp_tunnel *tunnel;
-	struct pppol2tp_net *pn;
-	struct sock *ret = NULL;
-
-	/* Get the tunnel UDP socket from the fd, which was opened by
-	 * the userspace L2TP daemon.
-	 */
-	err = -EBADF;
-	sock = sockfd_lookup(fd, &err);
-	if (!sock) {
-		PRINTK(-1, PPPOL2TP_MSG_CONTROL, KERN_ERR,
-		       "tunl %hu: sockfd_lookup(fd=%d) returned %d\n",
-		       tunnel_id, fd, err);
-		goto err;
-	}
-
-	sk = sock->sk;
-
-	/* Quick sanity checks */
-	err = -EPROTONOSUPPORT;
-	if (sk->sk_protocol != IPPROTO_UDP) {
-		PRINTK(-1, PPPOL2TP_MSG_CONTROL, KERN_ERR,
-		       "tunl %hu: fd %d wrong protocol, got %d, expected %d\n",
-		       tunnel_id, fd, sk->sk_protocol, IPPROTO_UDP);
-		goto err;
-	}
-	err = -EAFNOSUPPORT;
-	if (sock->ops->family != AF_INET) {
-		PRINTK(-1, PPPOL2TP_MSG_CONTROL, KERN_ERR,
-		       "tunl %hu: fd %d wrong family, got %d, expected %d\n",
-		       tunnel_id, fd, sock->ops->family, AF_INET);
-		goto err;
-	}
-
-	err = -ENOTCONN;
-
-	/* Check if this socket has already been prepped */
-	tunnel = (struct pppol2tp_tunnel *)sk->sk_user_data;
-	if (tunnel != NULL) {
-		/* User-data field already set */
-		err = -EBUSY;
-		BUG_ON(tunnel->magic != L2TP_TUNNEL_MAGIC);
-
-		/* This socket has already been prepped */
-		ret = tunnel->sock;
-		goto out;
-	}
-
-	/* This socket is available and needs prepping. Create a new tunnel
-	 * context and init it.
-	 */
-	sk->sk_user_data = tunnel = kzalloc(sizeof(struct pppol2tp_tunnel), GFP_KERNEL);
-	if (sk->sk_user_data == NULL) {
-		err = -ENOMEM;
-		goto err;
-	}
-
-	tunnel->magic = L2TP_TUNNEL_MAGIC;
-	sprintf(&tunnel->name[0], "tunl %hu", tunnel_id);
-
-	tunnel->stats.tunnel_id = tunnel_id;
-	tunnel->debug = PPPOL2TP_DEFAULT_DEBUG_FLAGS;
-
-	/* Hook on the tunnel socket destructor so that we can cleanup
-	 * if the tunnel socket goes away.
-	 */
-	tunnel->old_sk_destruct = sk->sk_destruct;
-	sk->sk_destruct = pppol2tp_tunnel_destruct;
-
-	tunnel->sock = sk;
-	sk->sk_allocation = GFP_ATOMIC;
-
-	/* Misc init */
-	rwlock_init(&tunnel->hlist_lock);
-
-	/* The net we belong to */
-	tunnel->pppol2tp_net = net;
-	pn = pppol2tp_pernet(net);
-
-	/* Add tunnel to our list */
-	INIT_LIST_HEAD(&tunnel->list);
-	write_lock_bh(&pn->pppol2tp_tunnel_list_lock);
-	list_add(&tunnel->list, &pn->pppol2tp_tunnel_list);
-	write_unlock_bh(&pn->pppol2tp_tunnel_list_lock);
-	atomic_inc(&pppol2tp_tunnel_count);
-
-	/* Bump the reference count. The tunnel context is deleted
-	 * only when this drops to zero.
-	 */
-	pppol2tp_tunnel_inc_refcount(tunnel);
-
-	/* Mark socket as an encapsulation socket. See net/ipv4/udp.c */
-	(udp_sk(sk))->encap_type = UDP_ENCAP_L2TPINUDP;
-	(udp_sk(sk))->encap_rcv = pppol2tp_udp_encap_recv;
-
-	ret = tunnel->sock;
-
-	*error = 0;
-out:
-	if (sock)
-		sockfd_put(sock);
-
-	return ret;
-
-err:
-	*error = err;
-	goto out;
-}
-
-static struct proto pppol2tp_sk_proto = {
-	.name	  = "PPPOL2TP",
-	.owner	  = THIS_MODULE,
-	.obj_size = sizeof(struct pppox_sock),
-};
-
-/* socket() handler. Initialize a new struct sock.
- */
-static int pppol2tp_create(struct net *net, struct socket *sock)
-{
-	int error = -ENOMEM;
-	struct sock *sk;
-
-	sk = sk_alloc(net, PF_PPPOX, GFP_KERNEL, &pppol2tp_sk_proto);
-	if (!sk)
-		goto out;
-
-	sock_init_data(sock, sk);
-
-	sock->state  = SS_UNCONNECTED;
-	sock->ops    = &pppol2tp_ops;
-
-	sk->sk_backlog_rcv = pppol2tp_recv_core;
-	sk->sk_protocol	   = PX_PROTO_OL2TP;
-	sk->sk_family	   = PF_PPPOX;
-	sk->sk_state	   = PPPOX_NONE;
-	sk->sk_type	   = SOCK_STREAM;
-	sk->sk_destruct	   = pppol2tp_session_destruct;
-
-	error = 0;
-
-out:
-	return error;
-}
-
-/* connect() handler. Attach a PPPoX socket to a tunnel UDP socket
- */
-static int pppol2tp_connect(struct socket *sock, struct sockaddr *uservaddr,
-			    int sockaddr_len, int flags)
-{
-	struct sock *sk = sock->sk;
-	struct sockaddr_pppol2tp *sp = (struct sockaddr_pppol2tp *) uservaddr;
-	struct pppox_sock *po = pppox_sk(sk);
-	struct sock *tunnel_sock = NULL;
-	struct pppol2tp_session *session = NULL;
-	struct pppol2tp_tunnel *tunnel;
-	struct dst_entry *dst;
-	int error = 0;
-
-	lock_sock(sk);
-
-	error = -EINVAL;
-	if (sp->sa_protocol != PX_PROTO_OL2TP)
-		goto end;
-
-	/* Check for already bound sockets */
-	error = -EBUSY;
-	if (sk->sk_state & PPPOX_CONNECTED)
-		goto end;
-
-	/* We don't supporting rebinding anyway */
-	error = -EALREADY;
-	if (sk->sk_user_data)
-		goto end; /* socket is already attached */
-
-	/* Don't bind if s_tunnel is 0 */
-	error = -EINVAL;
-	if (sp->pppol2tp.s_tunnel == 0)
-		goto end;
-
-	/* Special case: prepare tunnel socket if s_session and
-	 * d_session is 0. Otherwise look up tunnel using supplied
-	 * tunnel id.
-	 */
-	if ((sp->pppol2tp.s_session == 0) && (sp->pppol2tp.d_session == 0)) {
-		tunnel_sock = pppol2tp_prepare_tunnel_socket(sock_net(sk),
-							     sp->pppol2tp.fd,
-							     sp->pppol2tp.s_tunnel,
-							     &error);
-		if (tunnel_sock == NULL)
-			goto end;
-
-		sock_hold(tunnel_sock);
-		tunnel = tunnel_sock->sk_user_data;
-	} else {
-		tunnel = pppol2tp_tunnel_find(sock_net(sk), sp->pppol2tp.s_tunnel);
-
-		/* Error if we can't find the tunnel */
-		error = -ENOENT;
-		if (tunnel == NULL)
-			goto end;
-
-		tunnel_sock = tunnel->sock;
-	}
-
-	/* Check that this session doesn't already exist */
-	error = -EEXIST;
-	session = pppol2tp_session_find(tunnel, sp->pppol2tp.s_session);
-	if (session != NULL)
-		goto end;
-
-	/* Allocate and initialize a new session context. */
-	session = kzalloc(sizeof(struct pppol2tp_session), GFP_KERNEL);
-	if (session == NULL) {
-		error = -ENOMEM;
-		goto end;
-	}
-
-	skb_queue_head_init(&session->reorder_q);
-
-	session->magic	     = L2TP_SESSION_MAGIC;
-	session->owner	     = current->pid;
-	session->sock	     = sk;
-	session->tunnel	     = tunnel;
-	session->tunnel_sock = tunnel_sock;
-	session->tunnel_addr = sp->pppol2tp;
-	sprintf(&session->name[0], "sess %hu/%hu",
-		session->tunnel_addr.s_tunnel,
-		session->tunnel_addr.s_session);
-
-	session->stats.tunnel_id  = session->tunnel_addr.s_tunnel;
-	session->stats.session_id = session->tunnel_addr.s_session;
-
-	INIT_HLIST_NODE(&session->hlist);
-
-	/* Inherit debug options from tunnel */
-	session->debug = tunnel->debug;
-
-	/* Default MTU must allow space for UDP/L2TP/PPP
-	 * headers.
-	 */
-	session->mtu = session->mru = 1500 - PPPOL2TP_HEADER_OVERHEAD;
-
-	/* If PMTU discovery was enabled, use the MTU that was discovered */
-	dst = sk_dst_get(sk);
-	if (dst != NULL) {
-		u32 pmtu = dst_mtu(__sk_dst_get(sk));
-		if (pmtu != 0)
-			session->mtu = session->mru = pmtu -
-				PPPOL2TP_HEADER_OVERHEAD;
-		dst_release(dst);
-	}
-
-	/* Special case: if source & dest session_id == 0x0000, this socket is
-	 * being created to manage the tunnel. Don't add the session to the
-	 * session hash list, just set up the internal context for use by
-	 * ioctl() and sockopt() handlers.
-	 */
-	if ((session->tunnel_addr.s_session == 0) &&
-	    (session->tunnel_addr.d_session == 0)) {
-		error = 0;
-		sk->sk_user_data = session;
-		goto out_no_ppp;
-	}
-
-	/* Get tunnel context from the tunnel socket */
-	tunnel = pppol2tp_sock_to_tunnel(tunnel_sock);
-	if (tunnel == NULL) {
-		error = -EBADF;
-		goto end;
-	}
-
-	/* Right now, because we don't have a way to push the incoming skb's
-	 * straight through the UDP layer, the only header we need to worry
-	 * about is the L2TP header. This size is different depending on
-	 * whether sequence numbers are enabled for the data channel.
-	 */
-	po->chan.hdrlen = PPPOL2TP_L2TP_HDR_SIZE_NOSEQ;
-
-	po->chan.private = sk;
-	po->chan.ops	 = &pppol2tp_chan_ops;
-	po->chan.mtu	 = session->mtu;
-
-	error = ppp_register_net_channel(sock_net(sk), &po->chan);
-	if (error)
-		goto end_put_tun;
-
-	/* This is how we get the session context from the socket. */
-	sk->sk_user_data = session;
-
-	/* Add session to the tunnel's hash list */
-	write_lock_bh(&tunnel->hlist_lock);
-	hlist_add_head(&session->hlist,
-		       pppol2tp_session_id_hash(tunnel,
-						session->tunnel_addr.s_session));
-	write_unlock_bh(&tunnel->hlist_lock);
-
-	atomic_inc(&pppol2tp_session_count);
-
-out_no_ppp:
-	pppol2tp_tunnel_inc_refcount(tunnel);
-	sk->sk_state = PPPOX_CONNECTED;
-	PRINTK(session->debug, PPPOL2TP_MSG_CONTROL, KERN_INFO,
-	       "%s: created\n", session->name);
-
-end_put_tun:
-	sock_put(tunnel_sock);
-end:
-	release_sock(sk);
-
-	if (error != 0) {
-		if (session)
-			PRINTK(session->debug,
-				PPPOL2TP_MSG_CONTROL, KERN_WARNING,
-				"%s: connect failed: %d\n",
-				session->name, error);
-		else
-			PRINTK(-1, PPPOL2TP_MSG_CONTROL, KERN_WARNING,
-				"connect failed: %d\n", error);
-	}
-
-	return error;
-}
-
-/* getname() support.
- */
-static int pppol2tp_getname(struct socket *sock, struct sockaddr *uaddr,
-			    int *usockaddr_len, int peer)
-{
-	int len = sizeof(struct sockaddr_pppol2tp);
-	struct sockaddr_pppol2tp sp;
-	int error = 0;
-	struct pppol2tp_session *session;
-
-	error = -ENOTCONN;
-	if (sock->sk->sk_state != PPPOX_CONNECTED)
-		goto end;
-
-	session = pppol2tp_sock_to_session(sock->sk);
-	if (session == NULL) {
-		error = -EBADF;
-		goto end;
-	}
-
-	sp.sa_family	= AF_PPPOX;
-	sp.sa_protocol	= PX_PROTO_OL2TP;
-	memcpy(&sp.pppol2tp, &session->tunnel_addr,
-	       sizeof(struct pppol2tp_addr));
-
-	memcpy(uaddr, &sp, len);
-
-	*usockaddr_len = len;
-
-	error = 0;
-	sock_put(sock->sk);
-
-end:
-	return error;
-}
-
-/****************************************************************************
- * ioctl() handlers.
- *
- * The PPPoX socket is created for L2TP sessions: tunnels have their own UDP
- * sockets. However, in order to control kernel tunnel features, we allow
- * userspace to create a special "tunnel" PPPoX socket which is used for
- * control only.  Tunnel PPPoX sockets have session_id == 0 and simply allow
- * the user application to issue L2TP setsockopt(), getsockopt() and ioctl()
- * calls.
- ****************************************************************************/
-
-/* Session ioctl helper.
- */
-static int pppol2tp_session_ioctl(struct pppol2tp_session *session,
-				  unsigned int cmd, unsigned long arg)
-{
-	struct ifreq ifr;
-	int err = 0;
-	struct sock *sk = session->sock;
-	int val = (int) arg;
-
-	PRINTK(session->debug, PPPOL2TP_MSG_CONTROL, KERN_DEBUG,
-	       "%s: pppol2tp_session_ioctl(cmd=%#x, arg=%#lx)\n",
-	       session->name, cmd, arg);
-
-	sock_hold(sk);
-
-	switch (cmd) {
-	case SIOCGIFMTU:
-		err = -ENXIO;
-		if (!(sk->sk_state & PPPOX_CONNECTED))
-			break;
-
-		err = -EFAULT;
-		if (copy_from_user(&ifr, (void __user *) arg, sizeof(struct ifreq)))
-			break;
-		ifr.ifr_mtu = session->mtu;
-		if (copy_to_user((void __user *) arg, &ifr, sizeof(struct ifreq)))
-			break;
-
-		PRINTK(session->debug, PPPOL2TP_MSG_CONTROL, KERN_INFO,
-		       "%s: get mtu=%d\n", session->name, session->mtu);
-		err = 0;
-		break;
-
-	case SIOCSIFMTU:
-		err = -ENXIO;
-		if (!(sk->sk_state & PPPOX_CONNECTED))
-			break;
-
-		err = -EFAULT;
-		if (copy_from_user(&ifr, (void __user *) arg, sizeof(struct ifreq)))
-			break;
-
-		session->mtu = ifr.ifr_mtu;
-
-		PRINTK(session->debug, PPPOL2TP_MSG_CONTROL, KERN_INFO,
-		       "%s: set mtu=%d\n", session->name, session->mtu);
-		err = 0;
-		break;
-
-	case PPPIOCGMRU:
-		err = -ENXIO;
-		if (!(sk->sk_state & PPPOX_CONNECTED))
-			break;
-
-		err = -EFAULT;
-		if (put_user(session->mru, (int __user *) arg))
-			break;
-
-		PRINTK(session->debug, PPPOL2TP_MSG_CONTROL, KERN_INFO,
-		       "%s: get mru=%d\n", session->name, session->mru);
-		err = 0;
-		break;
-
-	case PPPIOCSMRU:
-		err = -ENXIO;
-		if (!(sk->sk_state & PPPOX_CONNECTED))
-			break;
-
-		err = -EFAULT;
-		if (get_user(val,(int __user *) arg))
-			break;
-
-		session->mru = val;
-		PRINTK(session->debug, PPPOL2TP_MSG_CONTROL, KERN_INFO,
-		       "%s: set mru=%d\n", session->name, session->mru);
-		err = 0;
-		break;
-
-	case PPPIOCGFLAGS:
-		err = -EFAULT;
-		if (put_user(session->flags, (int __user *) arg))
-			break;
-
-		PRINTK(session->debug, PPPOL2TP_MSG_CONTROL, KERN_INFO,
-		       "%s: get flags=%d\n", session->name, session->flags);
-		err = 0;
-		break;
-
-	case PPPIOCSFLAGS:
-		err = -EFAULT;
-		if (get_user(val, (int __user *) arg))
-			break;
-		session->flags = val;
-		PRINTK(session->debug, PPPOL2TP_MSG_CONTROL, KERN_INFO,
-		       "%s: set flags=%d\n", session->name, session->flags);
-		err = 0;
-		break;
-
-	case PPPIOCGL2TPSTATS:
-		err = -ENXIO;
-		if (!(sk->sk_state & PPPOX_CONNECTED))
-			break;
-
-		if (copy_to_user((void __user *) arg, &session->stats,
-				 sizeof(session->stats)))
-			break;
-		PRINTK(session->debug, PPPOL2TP_MSG_CONTROL, KERN_INFO,
-		       "%s: get L2TP stats\n", session->name);
-		err = 0;
-		break;
-
-	default:
-		err = -ENOSYS;
-		break;
-	}
-
-	sock_put(sk);
-
-	return err;
-}
-
-/* Tunnel ioctl helper.
- *
- * Note the special handling for PPPIOCGL2TPSTATS below. If the ioctl data
- * specifies a session_id, the session ioctl handler is called. This allows an
- * application to retrieve session stats via a tunnel socket.
- */
-static int pppol2tp_tunnel_ioctl(struct pppol2tp_tunnel *tunnel,
-				 unsigned int cmd, unsigned long arg)
-{
-	int err = 0;
-	struct sock *sk = tunnel->sock;
-	struct pppol2tp_ioc_stats stats_req;
-
-	PRINTK(tunnel->debug, PPPOL2TP_MSG_CONTROL, KERN_DEBUG,
-	       "%s: pppol2tp_tunnel_ioctl(cmd=%#x, arg=%#lx)\n", tunnel->name,
-	       cmd, arg);
-
-	sock_hold(sk);
-
-	switch (cmd) {
-	case PPPIOCGL2TPSTATS:
-		err = -ENXIO;
-		if (!(sk->sk_state & PPPOX_CONNECTED))
-			break;
-
-		if (copy_from_user(&stats_req, (void __user *) arg,
-				   sizeof(stats_req))) {
-			err = -EFAULT;
-			break;
-		}
-		if (stats_req.session_id != 0) {
-			/* resend to session ioctl handler */
-			struct pppol2tp_session *session =
-				pppol2tp_session_find(tunnel, stats_req.session_id);
-			if (session != NULL)
-				err = pppol2tp_session_ioctl(session, cmd, arg);
-			else
-				err = -EBADR;
-			break;
-		}
-#ifdef CONFIG_XFRM
-		tunnel->stats.using_ipsec = (sk->sk_policy[0] || sk->sk_policy[1]) ? 1 : 0;
-#endif
-		if (copy_to_user((void __user *) arg, &tunnel->stats,
-				 sizeof(tunnel->stats))) {
-			err = -EFAULT;
-			break;
-		}
-		PRINTK(tunnel->debug, PPPOL2TP_MSG_CONTROL, KERN_INFO,
-		       "%s: get L2TP stats\n", tunnel->name);
-		err = 0;
-		break;
-
-	default:
-		err = -ENOSYS;
-		break;
-	}
-
-	sock_put(sk);
-
-	return err;
-}
-
-/* Main ioctl() handler.
- * Dispatch to tunnel or session helpers depending on the socket.
- */
-static int pppol2tp_ioctl(struct socket *sock, unsigned int cmd,
-			  unsigned long arg)
-{
-	struct sock *sk = sock->sk;
-	struct pppol2tp_session *session;
-	struct pppol2tp_tunnel *tunnel;
-	int err;
-
-	if (!sk)
-		return 0;
-
-	err = -EBADF;
-	if (sock_flag(sk, SOCK_DEAD) != 0)
-		goto end;
-
-	err = -ENOTCONN;
-	if ((sk->sk_user_data == NULL) ||
-	    (!(sk->sk_state & (PPPOX_CONNECTED | PPPOX_BOUND))))
-		goto end;
-
-	/* Get session context from the socket */
-	err = -EBADF;
-	session = pppol2tp_sock_to_session(sk);
-	if (session == NULL)
-		goto end;
-
-	/* Special case: if session's session_id is zero, treat ioctl as a
-	 * tunnel ioctl
-	 */
-	if ((session->tunnel_addr.s_session == 0) &&
-	    (session->tunnel_addr.d_session == 0)) {
-		err = -EBADF;
-		tunnel = pppol2tp_sock_to_tunnel(session->tunnel_sock);
-		if (tunnel == NULL)
-			goto end_put_sess;
-
-		err = pppol2tp_tunnel_ioctl(tunnel, cmd, arg);
-		sock_put(session->tunnel_sock);
-		goto end_put_sess;
-	}
-
-	err = pppol2tp_session_ioctl(session, cmd, arg);
-
-end_put_sess:
-	sock_put(sk);
-end:
-	return err;
-}
-
-/*****************************************************************************
- * setsockopt() / getsockopt() support.
- *
- * The PPPoX socket is created for L2TP sessions: tunnels have their own UDP
- * sockets. In order to control kernel tunnel features, we allow userspace to
- * create a special "tunnel" PPPoX socket which is used for control only.
- * Tunnel PPPoX sockets have session_id == 0 and simply allow the user
- * application to issue L2TP setsockopt(), getsockopt() and ioctl() calls.
- *****************************************************************************/
-
-/* Tunnel setsockopt() helper.
- */
-static int pppol2tp_tunnel_setsockopt(struct sock *sk,
-				      struct pppol2tp_tunnel *tunnel,
-				      int optname, int val)
-{
-	int err = 0;
-
-	switch (optname) {
-	case PPPOL2TP_SO_DEBUG:
-		tunnel->debug = val;
-		PRINTK(tunnel->debug, PPPOL2TP_MSG_CONTROL, KERN_INFO,
-		       "%s: set debug=%x\n", tunnel->name, tunnel->debug);
-		break;
-
-	default:
-		err = -ENOPROTOOPT;
-		break;
-	}
-
-	return err;
-}
-
-/* Session setsockopt helper.
- */
-static int pppol2tp_session_setsockopt(struct sock *sk,
-				       struct pppol2tp_session *session,
-				       int optname, int val)
-{
-	int err = 0;
-
-	switch (optname) {
-	case PPPOL2TP_SO_RECVSEQ:
-		if ((val != 0) && (val != 1)) {
-			err = -EINVAL;
-			break;
-		}
-		session->recv_seq = val ? -1 : 0;
-		PRINTK(session->debug, PPPOL2TP_MSG_CONTROL, KERN_INFO,
-		       "%s: set recv_seq=%d\n", session->name,
-		       session->recv_seq);
-		break;
-
-	case PPPOL2TP_SO_SENDSEQ:
-		if ((val != 0) && (val != 1)) {
-			err = -EINVAL;
-			break;
-		}
-		session->send_seq = val ? -1 : 0;
-		{
-			struct sock *ssk      = session->sock;
-			struct pppox_sock *po = pppox_sk(ssk);
-			po->chan.hdrlen = val ? PPPOL2TP_L2TP_HDR_SIZE_SEQ :
-				PPPOL2TP_L2TP_HDR_SIZE_NOSEQ;
-		}
-		PRINTK(session->debug, PPPOL2TP_MSG_CONTROL, KERN_INFO,
-		       "%s: set send_seq=%d\n", session->name, session->send_seq);
-		break;
-
-	case PPPOL2TP_SO_LNSMODE:
-		if ((val != 0) && (val != 1)) {
-			err = -EINVAL;
-			break;
-		}
-		session->lns_mode = val ? -1 : 0;
-		PRINTK(session->debug, PPPOL2TP_MSG_CONTROL, KERN_INFO,
-		       "%s: set lns_mode=%d\n", session->name,
-		       session->lns_mode);
-		break;
-
-	case PPPOL2TP_SO_DEBUG:
-		session->debug = val;
-		PRINTK(session->debug, PPPOL2TP_MSG_CONTROL, KERN_INFO,
-		       "%s: set debug=%x\n", session->name, session->debug);
-		break;
-
-	case PPPOL2TP_SO_REORDERTO:
-		session->reorder_timeout = msecs_to_jiffies(val);
-		PRINTK(session->debug, PPPOL2TP_MSG_CONTROL, KERN_INFO,
-		       "%s: set reorder_timeout=%d\n", session->name,
-		       session->reorder_timeout);
-		break;
-
-	default:
-		err = -ENOPROTOOPT;
-		break;
-	}
-
-	return err;
-}
-
-/* Main setsockopt() entry point.
- * Does API checks, then calls either the tunnel or session setsockopt
- * handler, according to whether the PPPoL2TP socket is a for a regular
- * session or the special tunnel type.
- */
-static int pppol2tp_setsockopt(struct socket *sock, int level, int optname,
-			       char __user *optval, unsigned int optlen)
-{
-	struct sock *sk = sock->sk;
-	struct pppol2tp_session *session = sk->sk_user_data;
-	struct pppol2tp_tunnel *tunnel;
-	int val;
-	int err;
-
-	if (level != SOL_PPPOL2TP)
-		return udp_prot.setsockopt(sk, level, optname, optval, optlen);
-
-	if (optlen < sizeof(int))
-		return -EINVAL;
-
-	if (get_user(val, (int __user *)optval))
-		return -EFAULT;
-
-	err = -ENOTCONN;
-	if (sk->sk_user_data == NULL)
-		goto end;
-
-	/* Get session context from the socket */
-	err = -EBADF;
-	session = pppol2tp_sock_to_session(sk);
-	if (session == NULL)
-		goto end;
-
-	/* Special case: if session_id == 0x0000, treat as operation on tunnel
-	 */
-	if ((session->tunnel_addr.s_session == 0) &&
-	    (session->tunnel_addr.d_session == 0)) {
-		err = -EBADF;
-		tunnel = pppol2tp_sock_to_tunnel(session->tunnel_sock);
-		if (tunnel == NULL)
-			goto end_put_sess;
-
-		err = pppol2tp_tunnel_setsockopt(sk, tunnel, optname, val);
-		sock_put(session->tunnel_sock);
-	} else
-		err = pppol2tp_session_setsockopt(sk, session, optname, val);
-
-	err = 0;
-
-end_put_sess:
-	sock_put(sk);
-end:
-	return err;
-}
-
-/* Tunnel getsockopt helper. Called with sock locked.
- */
-static int pppol2tp_tunnel_getsockopt(struct sock *sk,
-				      struct pppol2tp_tunnel *tunnel,
-				      int optname, int *val)
-{
-	int err = 0;
-
-	switch (optname) {
-	case PPPOL2TP_SO_DEBUG:
-		*val = tunnel->debug;
-		PRINTK(tunnel->debug, PPPOL2TP_MSG_CONTROL, KERN_INFO,
-		       "%s: get debug=%x\n", tunnel->name, tunnel->debug);
-		break;
-
-	default:
-		err = -ENOPROTOOPT;
-		break;
-	}
-
-	return err;
-}
-
-/* Session getsockopt helper. Called with sock locked.
- */
-static int pppol2tp_session_getsockopt(struct sock *sk,
-				       struct pppol2tp_session *session,
-				       int optname, int *val)
-{
-	int err = 0;
-
-	switch (optname) {
-	case PPPOL2TP_SO_RECVSEQ:
-		*val = session->recv_seq;
-		PRINTK(session->debug, PPPOL2TP_MSG_CONTROL, KERN_INFO,
-		       "%s: get recv_seq=%d\n", session->name, *val);
-		break;
-
-	case PPPOL2TP_SO_SENDSEQ:
-		*val = session->send_seq;
-		PRINTK(session->debug, PPPOL2TP_MSG_CONTROL, KERN_INFO,
-		       "%s: get send_seq=%d\n", session->name, *val);
-		break;
-
-	case PPPOL2TP_SO_LNSMODE:
-		*val = session->lns_mode;
-		PRINTK(session->debug, PPPOL2TP_MSG_CONTROL, KERN_INFO,
-		       "%s: get lns_mode=%d\n", session->name, *val);
-		break;
-
-	case PPPOL2TP_SO_DEBUG:
-		*val = session->debug;
-		PRINTK(session->debug, PPPOL2TP_MSG_CONTROL, KERN_INFO,
-		       "%s: get debug=%d\n", session->name, *val);
-		break;
-
-	case PPPOL2TP_SO_REORDERTO:
-		*val = (int) jiffies_to_msecs(session->reorder_timeout);
-		PRINTK(session->debug, PPPOL2TP_MSG_CONTROL, KERN_INFO,
-		       "%s: get reorder_timeout=%d\n", session->name, *val);
-		break;
-
-	default:
-		err = -ENOPROTOOPT;
-	}
-
-	return err;
-}
-
-/* Main getsockopt() entry point.
- * Does API checks, then calls either the tunnel or session getsockopt
- * handler, according to whether the PPPoX socket is a for a regular session
- * or the special tunnel type.
- */
-static int pppol2tp_getsockopt(struct socket *sock, int level,
-			       int optname, char __user *optval, int __user *optlen)
-{
-	struct sock *sk = sock->sk;
-	struct pppol2tp_session *session = sk->sk_user_data;
-	struct pppol2tp_tunnel *tunnel;
-	int val, len;
-	int err;
-
-	if (level != SOL_PPPOL2TP)
-		return udp_prot.getsockopt(sk, level, optname, optval, optlen);
-
-	if (get_user(len, (int __user *) optlen))
-		return -EFAULT;
-
-	len = min_t(unsigned int, len, sizeof(int));
-
-	if (len < 0)
-		return -EINVAL;
-
-	err = -ENOTCONN;
-	if (sk->sk_user_data == NULL)
-		goto end;
-
-	/* Get the session context */
-	err = -EBADF;
-	session = pppol2tp_sock_to_session(sk);
-	if (session == NULL)
-		goto end;
-
-	/* Special case: if session_id == 0x0000, treat as operation on tunnel */
-	if ((session->tunnel_addr.s_session == 0) &&
-	    (session->tunnel_addr.d_session == 0)) {
-		err = -EBADF;
-		tunnel = pppol2tp_sock_to_tunnel(session->tunnel_sock);
-		if (tunnel == NULL)
-			goto end_put_sess;
-
-		err = pppol2tp_tunnel_getsockopt(sk, tunnel, optname, &val);
-		sock_put(session->tunnel_sock);
-	} else
-		err = pppol2tp_session_getsockopt(sk, session, optname, &val);
-
-	err = -EFAULT;
-	if (put_user(len, (int __user *) optlen))
-		goto end_put_sess;
-
-	if (copy_to_user((void __user *) optval, &val, len))
-		goto end_put_sess;
-
-	err = 0;
-
-end_put_sess:
-	sock_put(sk);
-end:
-	return err;
-}
-
-/*****************************************************************************
- * /proc filesystem for debug
- *****************************************************************************/
-
-#ifdef CONFIG_PROC_FS
-
-#include <linux/seq_file.h>
-
-struct pppol2tp_seq_data {
-	struct seq_net_private p;
-	struct pppol2tp_tunnel *tunnel;		/* current tunnel */
-	struct pppol2tp_session *session;	/* NULL means get first session in tunnel */
-};
-
-static struct pppol2tp_session *next_session(struct pppol2tp_tunnel *tunnel, struct pppol2tp_session *curr)
-{
-	struct pppol2tp_session *session = NULL;
-	struct hlist_node *walk;
-	int found = 0;
-	int next = 0;
-	int i;
-
-	read_lock_bh(&tunnel->hlist_lock);
-	for (i = 0; i < PPPOL2TP_HASH_SIZE; i++) {
-		hlist_for_each_entry(session, walk, &tunnel->session_hlist[i], hlist) {
-			if (curr == NULL) {
-				found = 1;
-				goto out;
-			}
-			if (session == curr) {
-				next = 1;
-				continue;
-			}
-			if (next) {
-				found = 1;
-				goto out;
-			}
-		}
-	}
-out:
-	read_unlock_bh(&tunnel->hlist_lock);
-	if (!found)
-		session = NULL;
-
-	return session;
-}
-
-static struct pppol2tp_tunnel *next_tunnel(struct pppol2tp_net *pn,
-					   struct pppol2tp_tunnel *curr)
-{
-	struct pppol2tp_tunnel *tunnel = NULL;
-
-	read_lock_bh(&pn->pppol2tp_tunnel_list_lock);
-	if (list_is_last(&curr->list, &pn->pppol2tp_tunnel_list)) {
-		goto out;
-	}
-	tunnel = list_entry(curr->list.next, struct pppol2tp_tunnel, list);
-out:
-	read_unlock_bh(&pn->pppol2tp_tunnel_list_lock);
-
-	return tunnel;
-}
-
-static void *pppol2tp_seq_start(struct seq_file *m, loff_t *offs)
-{
-	struct pppol2tp_seq_data *pd = SEQ_START_TOKEN;
-	struct pppol2tp_net *pn;
-	loff_t pos = *offs;
-
-	if (!pos)
-		goto out;
-
-	BUG_ON(m->private == NULL);
-	pd = m->private;
-	pn = pppol2tp_pernet(seq_file_net(m));
-
-	if (pd->tunnel == NULL) {
-		if (!list_empty(&pn->pppol2tp_tunnel_list))
-			pd->tunnel = list_entry(pn->pppol2tp_tunnel_list.next, struct pppol2tp_tunnel, list);
-	} else {
-		pd->session = next_session(pd->tunnel, pd->session);
-		if (pd->session == NULL) {
-			pd->tunnel = next_tunnel(pn, pd->tunnel);
-		}
-	}
-
-	/* NULL tunnel and session indicates end of list */
-	if ((pd->tunnel == NULL) && (pd->session == NULL))
-		pd = NULL;
-
-out:
-	return pd;
-}
-
-static void *pppol2tp_seq_next(struct seq_file *m, void *v, loff_t *pos)
-{
-	(*pos)++;
-	return NULL;
-}
-
-static void pppol2tp_seq_stop(struct seq_file *p, void *v)
-{
-	/* nothing to do */
-}
-
-static void pppol2tp_seq_tunnel_show(struct seq_file *m, void *v)
-{
-	struct pppol2tp_tunnel *tunnel = v;
-
-	seq_printf(m, "\nTUNNEL '%s', %c %d\n",
-		   tunnel->name,
-		   (tunnel == tunnel->sock->sk_user_data) ? 'Y':'N',
-		   atomic_read(&tunnel->ref_count) - 1);
-	seq_printf(m, " %08x %llu/%llu/%llu %llu/%llu/%llu\n",
-		   tunnel->debug,
-		   (unsigned long long)tunnel->stats.tx_packets,
-		   (unsigned long long)tunnel->stats.tx_bytes,
-		   (unsigned long long)tunnel->stats.tx_errors,
-		   (unsigned long long)tunnel->stats.rx_packets,
-		   (unsigned long long)tunnel->stats.rx_bytes,
-		   (unsigned long long)tunnel->stats.rx_errors);
-}
-
-static void pppol2tp_seq_session_show(struct seq_file *m, void *v)
-{
-	struct pppol2tp_session *session = v;
-
-	seq_printf(m, "  SESSION '%s' %08X/%d %04X/%04X -> "
-		   "%04X/%04X %d %c\n",
-		   session->name,
-		   ntohl(session->tunnel_addr.addr.sin_addr.s_addr),
-		   ntohs(session->tunnel_addr.addr.sin_port),
-		   session->tunnel_addr.s_tunnel,
-		   session->tunnel_addr.s_session,
-		   session->tunnel_addr.d_tunnel,
-		   session->tunnel_addr.d_session,
-		   session->sock->sk_state,
-		   (session == session->sock->sk_user_data) ?
-		   'Y' : 'N');
-	seq_printf(m, "   %d/%d/%c/%c/%s %08x %u\n",
-		   session->mtu, session->mru,
-		   session->recv_seq ? 'R' : '-',
-		   session->send_seq ? 'S' : '-',
-		   session->lns_mode ? "LNS" : "LAC",
-		   session->debug,
-		   jiffies_to_msecs(session->reorder_timeout));
-	seq_printf(m, "   %hu/%hu %llu/%llu/%llu %llu/%llu/%llu\n",
-		   session->nr, session->ns,
-		   (unsigned long long)session->stats.tx_packets,
-		   (unsigned long long)session->stats.tx_bytes,
-		   (unsigned long long)session->stats.tx_errors,
-		   (unsigned long long)session->stats.rx_packets,
-		   (unsigned long long)session->stats.rx_bytes,
-		   (unsigned long long)session->stats.rx_errors);
-}
-
-static int pppol2tp_seq_show(struct seq_file *m, void *v)
-{
-	struct pppol2tp_seq_data *pd = v;
-
-	/* display header on line 1 */
-	if (v == SEQ_START_TOKEN) {
-		seq_puts(m, "PPPoL2TP driver info, " PPPOL2TP_DRV_VERSION "\n");
-		seq_puts(m, "TUNNEL name, user-data-ok session-count\n");
-		seq_puts(m, " debug tx-pkts/bytes/errs rx-pkts/bytes/errs\n");
-		seq_puts(m, "  SESSION name, addr/port src-tid/sid "
-			 "dest-tid/sid state user-data-ok\n");
-		seq_puts(m, "   mtu/mru/rcvseq/sendseq/lns debug reorderto\n");
-		seq_puts(m, "   nr/ns tx-pkts/bytes/errs rx-pkts/bytes/errs\n");
-		goto out;
-	}
-
-	/* Show the tunnel or session context.
-	 */
-	if (pd->session == NULL)
-		pppol2tp_seq_tunnel_show(m, pd->tunnel);
-	else
-		pppol2tp_seq_session_show(m, pd->session);
-
-out:
-	return 0;
-}
-
-static const struct seq_operations pppol2tp_seq_ops = {
-	.start		= pppol2tp_seq_start,
-	.next		= pppol2tp_seq_next,
-	.stop		= pppol2tp_seq_stop,
-	.show		= pppol2tp_seq_show,
-};
-
-/* Called when our /proc file is opened. We allocate data for use when
- * iterating our tunnel / session contexts and store it in the private
- * data of the seq_file.
- */
-static int pppol2tp_proc_open(struct inode *inode, struct file *file)
-{
-	return seq_open_net(inode, file, &pppol2tp_seq_ops,
-			    sizeof(struct pppol2tp_seq_data));
-}
-
-static const struct file_operations pppol2tp_proc_fops = {
-	.owner		= THIS_MODULE,
-	.open		= pppol2tp_proc_open,
-	.read		= seq_read,
-	.llseek		= seq_lseek,
-	.release	= seq_release_net,
-};
-
-#endif /* CONFIG_PROC_FS */
-
-/*****************************************************************************
- * Init and cleanup
- *****************************************************************************/
-
-static const struct proto_ops pppol2tp_ops = {
-	.family		= AF_PPPOX,
-	.owner		= THIS_MODULE,
-	.release	= pppol2tp_release,
-	.bind		= sock_no_bind,
-	.connect	= pppol2tp_connect,
-	.socketpair	= sock_no_socketpair,
-	.accept		= sock_no_accept,
-	.getname	= pppol2tp_getname,
-	.poll		= datagram_poll,
-	.listen		= sock_no_listen,
-	.shutdown	= sock_no_shutdown,
-	.setsockopt	= pppol2tp_setsockopt,
-	.getsockopt	= pppol2tp_getsockopt,
-	.sendmsg	= pppol2tp_sendmsg,
-	.recvmsg	= pppol2tp_recvmsg,
-	.mmap		= sock_no_mmap,
-	.ioctl		= pppox_ioctl,
-};
-
-static struct pppox_proto pppol2tp_proto = {
-	.create		= pppol2tp_create,
-	.ioctl		= pppol2tp_ioctl
-};
-
-static __net_init int pppol2tp_init_net(struct net *net)
-{
-	struct pppol2tp_net *pn = pppol2tp_pernet(net);
-	struct proc_dir_entry *pde;
-
-	INIT_LIST_HEAD(&pn->pppol2tp_tunnel_list);
-	rwlock_init(&pn->pppol2tp_tunnel_list_lock);
-
-	pde = proc_net_fops_create(net, "pppol2tp", S_IRUGO, &pppol2tp_proc_fops);
-#ifdef CONFIG_PROC_FS
-	if (!pde)
-		return -ENOMEM;
-#endif
-
-	return 0;
-}
-
-static __net_exit void pppol2tp_exit_net(struct net *net)
-{
-	proc_net_remove(net, "pppol2tp");
-}
-
-static struct pernet_operations pppol2tp_net_ops = {
-	.init = pppol2tp_init_net,
-	.exit = pppol2tp_exit_net,
-	.id   = &pppol2tp_net_id,
-	.size = sizeof(struct pppol2tp_net),
-};
-
-static int __init pppol2tp_init(void)
-{
-	int err;
-
-	err = proto_register(&pppol2tp_sk_proto, 0);
-	if (err)
-		goto out;
-	err = register_pppox_proto(PX_PROTO_OL2TP, &pppol2tp_proto);
-	if (err)
-		goto out_unregister_pppol2tp_proto;
-
-	err = register_pernet_device(&pppol2tp_net_ops);
-	if (err)
-		goto out_unregister_pppox_proto;
-
-	printk(KERN_INFO "PPPoL2TP kernel driver, %s\n",
-	       PPPOL2TP_DRV_VERSION);
-
-out:
-	return err;
-out_unregister_pppox_proto:
-	unregister_pppox_proto(PX_PROTO_OL2TP);
-out_unregister_pppol2tp_proto:
-	proto_unregister(&pppol2tp_sk_proto);
-	goto out;
-}
-
-static void __exit pppol2tp_exit(void)
-{
-	unregister_pppox_proto(PX_PROTO_OL2TP);
-	unregister_pernet_device(&pppol2tp_net_ops);
-	proto_unregister(&pppol2tp_sk_proto);
-}
-
-module_init(pppol2tp_init);
-module_exit(pppol2tp_exit);
-
-MODULE_AUTHOR("Martijn van Oosterhout <kleptog@svana.org>, "
-	      "James Chapman <jchapman@katalix.com>");
-MODULE_DESCRIPTION("PPP over L2TP over UDP");
-MODULE_LICENSE("GPL");
-MODULE_VERSION(PPPOL2TP_DRV_VERSION);
-- 
cgit v1.2.2


From 9345471bca96d00d4196b3dcc4a5625f1bfae247 Mon Sep 17 00:00:00 2001
From: James Chapman <jchapman@katalix.com>
Date: Fri, 2 Apr 2010 06:18:44 +0000
Subject: l2tp: Add ppp device name to L2TP ppp session data

When dumping L2TP PPP sessions using /proc/net/pppol2tp, get the
assigned PPP device name from PPP using ppp_dev_name().

Signed-off-by: James Chapman <jchapman@katalix.com>
Reviewed-by: Randy Dunlap <randy.dunlap@oracle.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/l2tp/l2tp_ppp.c | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'net')

diff --git a/net/l2tp/l2tp_ppp.c b/net/l2tp/l2tp_ppp.c
index baac072761aa..3ad290dd830a 100644
--- a/net/l2tp/l2tp_ppp.c
+++ b/net/l2tp/l2tp_ppp.c
@@ -1465,6 +1465,7 @@ static void pppol2tp_seq_session_show(struct seq_file *m, void *v)
 	struct l2tp_session *session = v;
 	struct l2tp_tunnel *tunnel = session->tunnel;
 	struct pppol2tp_session *ps = l2tp_session_priv(session);
+	struct pppox_sock *po = pppox_sk(ps->sock);
 	u32 ip = 0;
 	u16 port = 0;
 
@@ -1499,6 +1500,9 @@ static void pppol2tp_seq_session_show(struct seq_file *m, void *v)
 		   (unsigned long long)session->stats.rx_packets,
 		   (unsigned long long)session->stats.rx_bytes,
 		   (unsigned long long)session->stats.rx_errors);
+
+	if (po)
+		seq_printf(m, "   interface %s\n", ppp_dev_name(&po->chan));
 }
 
 static int pppol2tp_seq_show(struct seq_file *m, void *v)
-- 
cgit v1.2.2


From f7faffa3ff8ef6ae712ef16312b8a2aa7a1c95fe Mon Sep 17 00:00:00 2001
From: James Chapman <jchapman@katalix.com>
Date: Fri, 2 Apr 2010 06:18:49 +0000
Subject: l2tp: Add L2TPv3 protocol support

The L2TPv3 protocol changes the layout of the L2TP packet
header. Tunnel and session ids change from 16-bit to 32-bit values,
data sequence numbers change from 16-bit to 24-bit values and PPP-specific
fields are moved into protocol-specific subheaders.

Although this patch introduces L2TPv3 protocol support, there are no
userspace interfaces to create L2TPv3 sessions yet.

Signed-off-by: James Chapman <jchapman@katalix.com>
Reviewed-by: Randy Dunlap <randy.dunlap@oracle.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/l2tp/Kconfig     |  25 +++
 net/l2tp/l2tp_core.c | 532 ++++++++++++++++++++++++++++++++++++++-------------
 net/l2tp/l2tp_core.h |  54 +++++-
 net/l2tp/l2tp_ppp.c  |  21 +-
 4 files changed, 484 insertions(+), 148 deletions(-)

(limited to 'net')

diff --git a/net/l2tp/Kconfig b/net/l2tp/Kconfig
index ec88c5cdc397..d60758d60478 100644
--- a/net/l2tp/Kconfig
+++ b/net/l2tp/Kconfig
@@ -19,6 +19,10 @@ menuconfig L2TP
 	  connections. L2TP is also used as a VPN protocol, popular
 	  with home workers to connect to their offices.
 
+	  L2TPv3 allows other protocols as well as PPP to be carried
+	  over L2TP tunnels. L2TPv3 is defined in RFC 3931
+	  <http://www.ietf.org/rfc/rfc3931.txt>.
+
 	  The kernel component handles only L2TP data packets: a
 	  userland daemon handles L2TP the control protocol (tunnel
 	  and session setup). One such daemon is OpenL2TP
@@ -26,3 +30,24 @@ menuconfig L2TP
 
 	  If you don't need L2TP, say N. To compile all L2TP code as
 	  modules, choose M here.
+
+config L2TP_V3
+	bool "L2TPv3 support (EXPERIMENTAL)"
+	depends on EXPERIMENTAL && L2TP
+	help
+	  Layer Two Tunneling Protocol Version 3
+
+	  From RFC 3931 <http://www.ietf.org/rfc/rfc3931.txt>.
+
+	  The Layer Two Tunneling Protocol (L2TP) provides a dynamic
+	  mechanism for tunneling Layer 2 (L2) "circuits" across a
+	  packet-oriented data network (e.g., over IP).  L2TP, as
+	  originally defined in RFC 2661, is a standard method for
+	  tunneling Point-to-Point Protocol (PPP) [RFC1661] sessions.
+	  L2TP has since been adopted for tunneling a number of other
+	  L2 protocols, including ATM, Frame Relay, HDLC and even raw
+	  ethernet frames.
+
+	  If you are connecting to L2TPv3 equipment, or you want to
+	  tunnel raw ethernet frames using L2TP, say Y here. If
+	  unsure, say N.
diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c
index 4b6da3689893..0eee1a65f1b1 100644
--- a/net/l2tp/l2tp_core.c
+++ b/net/l2tp/l2tp_core.c
@@ -65,6 +65,7 @@
 
 #define L2TP_HDR_VER_MASK  0x000F
 #define L2TP_HDR_VER_2	   0x0002
+#define L2TP_HDR_VER_3	   0x0003
 
 /* L2TPv3 default L2-specific sublayer */
 #define L2TP_SLFLAG_S	   0x40000000
@@ -85,7 +86,7 @@
 /* Private data stored for received packets in the skb.
  */
 struct l2tp_skb_cb {
-	u16			ns;
+	u32			ns;
 	u16			has_seq;
 	u16			length;
 	unsigned long		expires;
@@ -101,6 +102,8 @@ static unsigned int l2tp_net_id;
 struct l2tp_net {
 	struct list_head l2tp_tunnel_list;
 	rwlock_t l2tp_tunnel_list_lock;
+	struct hlist_head l2tp_session_hlist[L2TP_HASH_SIZE_2];
+	rwlock_t l2tp_session_hlist_lock;
 };
 
 static inline struct l2tp_net *l2tp_pernet(struct net *net)
@@ -110,6 +113,40 @@ static inline struct l2tp_net *l2tp_pernet(struct net *net)
 	return net_generic(net, l2tp_net_id);
 }
 
+/* Session hash global list for L2TPv3.
+ * The session_id SHOULD be random according to RFC3931, but several
+ * L2TP implementations use incrementing session_ids.  So we do a real
+ * hash on the session_id, rather than a simple bitmask.
+ */
+static inline struct hlist_head *
+l2tp_session_id_hash_2(struct l2tp_net *pn, u32 session_id)
+{
+	return &pn->l2tp_session_hlist[hash_32(session_id, L2TP_HASH_BITS_2)];
+
+}
+
+/* Lookup a session by id in the global session list
+ */
+static struct l2tp_session *l2tp_session_find_2(struct net *net, u32 session_id)
+{
+	struct l2tp_net *pn = l2tp_pernet(net);
+	struct hlist_head *session_list =
+		l2tp_session_id_hash_2(pn, session_id);
+	struct l2tp_session *session;
+	struct hlist_node *walk;
+
+	read_lock_bh(&pn->l2tp_session_hlist_lock);
+	hlist_for_each_entry(session, walk, session_list, global_hlist) {
+		if (session->session_id == session_id) {
+			read_unlock_bh(&pn->l2tp_session_hlist_lock);
+			return session;
+		}
+	}
+	read_unlock_bh(&pn->l2tp_session_hlist_lock);
+
+	return NULL;
+}
+
 /* Session hash list.
  * The session_id SHOULD be random according to RFC2661, but several
  * L2TP implementations (Cisco and Microsoft) use incrementing
@@ -124,13 +161,20 @@ l2tp_session_id_hash(struct l2tp_tunnel *tunnel, u32 session_id)
 
 /* Lookup a session by id
  */
-struct l2tp_session *l2tp_session_find(struct l2tp_tunnel *tunnel, u32 session_id)
+struct l2tp_session *l2tp_session_find(struct net *net, struct l2tp_tunnel *tunnel, u32 session_id)
 {
-	struct hlist_head *session_list =
-		l2tp_session_id_hash(tunnel, session_id);
+	struct hlist_head *session_list;
 	struct l2tp_session *session;
 	struct hlist_node *walk;
 
+	/* In L2TPv3, session_ids are unique over all tunnels and we
+	 * sometimes need to look them up before we know the
+	 * tunnel.
+	 */
+	if (tunnel == NULL)
+		return l2tp_session_find_2(net, session_id);
+
+	session_list = l2tp_session_id_hash(tunnel, session_id);
 	read_lock_bh(&tunnel->hlist_lock);
 	hlist_for_each_entry(session, walk, session_list, hlist) {
 		if (session->session_id == session_id) {
@@ -218,7 +262,7 @@ static void l2tp_recv_queue_skb(struct l2tp_session *session, struct sk_buff *sk
 {
 	struct sk_buff *skbp;
 	struct sk_buff *tmp;
-	u16 ns = L2TP_SKB_CB(skb)->ns;
+	u32 ns = L2TP_SKB_CB(skb)->ns;
 
 	spin_lock_bh(&session->reorder_q.lock);
 	skb_queue_walk_safe(&session->reorder_q, skbp, tmp) {
@@ -259,6 +303,11 @@ static void l2tp_recv_dequeue_skb(struct l2tp_session *session, struct sk_buff *
 	if (L2TP_SKB_CB(skb)->has_seq) {
 		/* Bump our Nr */
 		session->nr++;
+		if (tunnel->version == L2TP_HDR_VER_2)
+			session->nr &= 0xffff;
+		else
+			session->nr &= 0xffffff;
+
 		PRINTK(session->debug, L2TP_MSG_SEQ, KERN_DEBUG,
 		       "%s: updated nr to %hu\n", session->name, session->nr);
 	}
@@ -291,8 +340,8 @@ static void l2tp_recv_dequeue(struct l2tp_session *session)
 			session->stats.rx_seq_discards++;
 			session->stats.rx_errors++;
 			PRINTK(session->debug, L2TP_MSG_SEQ, KERN_DEBUG,
-			       "%s: oos pkt %hu len %d discarded (too old), "
-			       "waiting for %hu, reorder_q_len=%d\n",
+			       "%s: oos pkt %u len %d discarded (too old), "
+			       "waiting for %u, reorder_q_len=%d\n",
 			       session->name, L2TP_SKB_CB(skb)->ns,
 			       L2TP_SKB_CB(skb)->length, session->nr,
 			       skb_queue_len(&session->reorder_q));
@@ -306,8 +355,8 @@ static void l2tp_recv_dequeue(struct l2tp_session *session)
 		if (L2TP_SKB_CB(skb)->has_seq) {
 			if (L2TP_SKB_CB(skb)->ns != session->nr) {
 				PRINTK(session->debug, L2TP_MSG_SEQ, KERN_DEBUG,
-				       "%s: holding oos pkt %hu len %d, "
-				       "waiting for %hu, reorder_q_len=%d\n",
+				       "%s: holding oos pkt %u len %d, "
+				       "waiting for %u, reorder_q_len=%d\n",
 				       session->name, L2TP_SKB_CB(skb)->ns,
 				       L2TP_SKB_CB(skb)->length, session->nr,
 				       skb_queue_len(&session->reorder_q));
@@ -352,100 +401,73 @@ static inline int l2tp_verify_udp_checksum(struct sock *sk,
 	return __skb_checksum_complete(skb);
 }
 
-/* Internal UDP receive frame. Do the real work of receiving an L2TP data frame
- * here. The skb is not on a list when we get here.
- * Returns 0 if the packet was a data packet and was successfully passed on.
- * Returns 1 if the packet was not a good data packet and could not be
- * forwarded.  All such packets are passed up to userspace to deal with.
+/* Do receive processing of L2TP data frames. We handle both L2TPv2
+ * and L2TPv3 data frames here.
+ *
+ * L2TPv2 Data Message Header
+ *
+ *  0                   1                   2                   3
+ *  0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * |T|L|x|x|S|x|O|P|x|x|x|x|  Ver  |          Length (opt)         |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * |           Tunnel ID           |           Session ID          |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * |             Ns (opt)          |             Nr (opt)          |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * |      Offset Size (opt)        |    Offset pad... (opt)
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ *
+ * Data frames are marked by T=0. All other fields are the same as
+ * those in L2TP control frames.
+ *
+ * L2TPv3 Data Message Header
+ *
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * |                      L2TP Session Header                      |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * |                      L2-Specific Sublayer                     |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * |                        Tunnel Payload                      ...
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ *
+ * L2TPv3 Session Header Over IP
+ *
+ *  0                   1                   2                   3
+ *  0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * |                           Session ID                          |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * |               Cookie (optional, maximum 64 bits)...
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ *                                                                 |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ *
+ * L2TPv3 L2-Specific Sublayer Format
+ *
+ *  0                   1                   2                   3
+ *  0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * |x|S|x|x|x|x|x|x|              Sequence Number                  |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ *
+ * Cookie value, sublayer format and offset (pad) are negotiated with
+ * the peer when the session is set up. Unlike L2TPv2, we do not need
+ * to parse the packet header to determine if optional fields are
+ * present.
+ *
+ * Caller must already have parsed the frame and determined that it is
+ * a data (not control) frame before coming here. Fields up to the
+ * session-id have already been parsed and ptr points to the data
+ * after the session-id.
  */
-int l2tp_udp_recv_core(struct l2tp_tunnel *tunnel, struct sk_buff *skb,
-		       int (*payload_hook)(struct sk_buff *skb))
+void l2tp_recv_common(struct l2tp_session *session, struct sk_buff *skb,
+		      unsigned char *ptr, unsigned char *optr, u16 hdrflags,
+		      int length, int (*payload_hook)(struct sk_buff *skb))
 {
-	struct l2tp_session *session = NULL;
-	unsigned char *ptr, *optr;
-	u16 hdrflags;
-	u32 tunnel_id, session_id;
-	int length;
+	struct l2tp_tunnel *tunnel = session->tunnel;
 	int offset;
-	u16 version;
-	u16 ns, nr;
-
-	if (tunnel->sock && l2tp_verify_udp_checksum(tunnel->sock, skb))
-		goto discard_bad_csum;
-
-	/* UDP always verifies the packet length. */
-	__skb_pull(skb, sizeof(struct udphdr));
-
-	/* Short packet? */
-	if (!pskb_may_pull(skb, L2TP_HDR_SIZE_SEQ)) {
-		PRINTK(tunnel->debug, L2TP_MSG_DATA, KERN_INFO,
-		       "%s: recv short packet (len=%d)\n", tunnel->name, skb->len);
-		goto error;
-	}
-
-	/* Point to L2TP header */
-	optr = ptr = skb->data;
-
-	/* Trace packet contents, if enabled */
-	if (tunnel->debug & L2TP_MSG_DATA) {
-		length = min(32u, skb->len);
-		if (!pskb_may_pull(skb, length))
-			goto error;
-
-		printk(KERN_DEBUG "%s: recv: ", tunnel->name);
-
-		offset = 0;
-		do {
-			printk(" %02X", ptr[offset]);
-		} while (++offset < length);
-
-		printk("\n");
-	}
-
-	/* Get L2TP header flags */
-	hdrflags = ntohs(*(__be16 *)ptr);
-
-	/* Check protocol version */
-	version = hdrflags & L2TP_HDR_VER_MASK;
-	if (version != tunnel->version) {
-		PRINTK(tunnel->debug, L2TP_MSG_DATA, KERN_INFO,
-		       "%s: recv protocol version mismatch: got %d expected %d\n",
-		       tunnel->name, version, tunnel->version);
-		goto error;
-	}
-
-	/* Get length of L2TP packet */
-	length = skb->len;
-
-	/* If type is control packet, it is handled by userspace. */
-	if (hdrflags & L2TP_HDRFLAG_T) {
-		PRINTK(tunnel->debug, L2TP_MSG_DATA, KERN_DEBUG,
-		       "%s: recv control packet, len=%d\n", tunnel->name, length);
-		goto error;
-	}
-
-	/* Skip flags */
-	ptr += 2;
-
-	/* If length is present, skip it */
-	if (hdrflags & L2TP_HDRFLAG_L)
-		ptr += 2;
-
-	/* Extract tunnel and session ID */
-	tunnel_id = ntohs(*(__be16 *) ptr);
-	ptr += 2;
-	session_id = ntohs(*(__be16 *) ptr);
-	ptr += 2;
-
-	/* Find the session context */
-	session = l2tp_session_find(tunnel, session_id);
-	if (!session) {
-		/* Not found? Pass to userspace to deal with */
-		PRINTK(tunnel->debug, L2TP_MSG_DATA, KERN_INFO,
-		       "%s: no session found (%hu/%hu). Passing up.\n",
-		       tunnel->name, tunnel_id, session_id);
-		goto error;
-	}
+	u32 ns, nr;
 
 	/* The ref count is increased since we now hold a pointer to
 	 * the session. Take care to decrement the refcnt when exiting
@@ -455,6 +477,18 @@ int l2tp_udp_recv_core(struct l2tp_tunnel *tunnel, struct sk_buff *skb,
 	if (session->ref)
 		(*session->ref)(session);
 
+	/* Parse and check optional cookie */
+	if (session->peer_cookie_len > 0) {
+		if (memcmp(ptr, &session->peer_cookie[0], session->peer_cookie_len)) {
+			PRINTK(tunnel->debug, L2TP_MSG_DATA, KERN_INFO,
+			       "%s: cookie mismatch (%u/%u). Discarding.\n",
+			       tunnel->name, tunnel->tunnel_id, session->session_id);
+			session->stats.rx_cookie_discards++;
+			goto discard;
+		}
+		ptr += session->peer_cookie_len;
+	}
+
 	/* Handle the optional sequence numbers. Sequence numbers are
 	 * in different places for L2TPv2 and L2TPv3.
 	 *
@@ -464,21 +498,40 @@ int l2tp_udp_recv_core(struct l2tp_tunnel *tunnel, struct sk_buff *skb,
 	 */
 	ns = nr = 0;
 	L2TP_SKB_CB(skb)->has_seq = 0;
-	if (hdrflags & L2TP_HDRFLAG_S) {
-		ns = (u16) ntohs(*(__be16 *) ptr);
-		ptr += 2;
-		nr = ntohs(*(__be16 *) ptr);
-		ptr += 2;
+	if (tunnel->version == L2TP_HDR_VER_2) {
+		if (hdrflags & L2TP_HDRFLAG_S) {
+			ns = ntohs(*(__be16 *) ptr);
+			ptr += 2;
+			nr = ntohs(*(__be16 *) ptr);
+			ptr += 2;
 
-		/* Store L2TP info in the skb */
-		L2TP_SKB_CB(skb)->ns = ns;
-		L2TP_SKB_CB(skb)->has_seq = 1;
+			/* Store L2TP info in the skb */
+			L2TP_SKB_CB(skb)->ns = ns;
+			L2TP_SKB_CB(skb)->has_seq = 1;
 
-		PRINTK(session->debug, L2TP_MSG_SEQ, KERN_DEBUG,
-		       "%s: recv data ns=%hu, nr=%hu, session nr=%hu\n",
-		       session->name, ns, nr, session->nr);
+			PRINTK(session->debug, L2TP_MSG_SEQ, KERN_DEBUG,
+			       "%s: recv data ns=%u, nr=%u, session nr=%u\n",
+			       session->name, ns, nr, session->nr);
+		}
+	} else if (session->l2specific_type == L2TP_L2SPECTYPE_DEFAULT) {
+		u32 l2h = ntohl(*(__be32 *) ptr);
+
+		if (l2h & 0x40000000) {
+			ns = l2h & 0x00ffffff;
+
+			/* Store L2TP info in the skb */
+			L2TP_SKB_CB(skb)->ns = ns;
+			L2TP_SKB_CB(skb)->has_seq = 1;
+
+			PRINTK(session->debug, L2TP_MSG_SEQ, KERN_DEBUG,
+			       "%s: recv data ns=%u, session nr=%u\n",
+			       session->name, ns, session->nr);
+		}
 	}
 
+	/* Advance past L2-specific header, if present */
+	ptr += session->l2specific_len;
+
 	if (L2TP_SKB_CB(skb)->has_seq) {
 		/* Received a packet with sequence numbers. If we're the LNS,
 		 * check if we sre sending sequence numbers and if not,
@@ -489,6 +542,7 @@ int l2tp_udp_recv_core(struct l2tp_tunnel *tunnel, struct sk_buff *skb,
 			       "%s: requested to enable seq numbers by LNS\n",
 			       session->name);
 			session->send_seq = -1;
+			l2tp_session_set_header_len(session, tunnel->version);
 		}
 	} else {
 		/* No sequence numbers.
@@ -512,6 +566,7 @@ int l2tp_udp_recv_core(struct l2tp_tunnel *tunnel, struct sk_buff *skb,
 			       "%s: requested to disable seq numbers by LNS\n",
 			       session->name);
 			session->send_seq = 0;
+			l2tp_session_set_header_len(session, tunnel->version);
 		} else if (session->send_seq) {
 			PRINTK(session->debug, L2TP_MSG_SEQ, KERN_WARNING,
 			       "%s: recv data has no seq numbers when required. "
@@ -521,11 +576,19 @@ int l2tp_udp_recv_core(struct l2tp_tunnel *tunnel, struct sk_buff *skb,
 		}
 	}
 
-	/* If offset bit set, skip it. */
-	if (hdrflags & L2TP_HDRFLAG_O) {
-		offset = ntohs(*(__be16 *)ptr);
-		ptr += 2 + offset;
-	}
+	/* Session data offset is handled differently for L2TPv2 and
+	 * L2TPv3. For L2TPv2, there is an optional 16-bit value in
+	 * the header. For L2TPv3, the offset is negotiated using AVPs
+	 * in the session setup control protocol.
+	 */
+	if (tunnel->version == L2TP_HDR_VER_2) {
+		/* If offset bit set, skip it. */
+		if (hdrflags & L2TP_HDRFLAG_O) {
+			offset = ntohs(*(__be16 *)ptr);
+			ptr += 2 + offset;
+		}
+	} else
+		ptr += session->offset;
 
 	offset = ptr - optr;
 	if (!pskb_may_pull(skb, offset))
@@ -564,8 +627,8 @@ int l2tp_udp_recv_core(struct l2tp_tunnel *tunnel, struct sk_buff *skb,
 			if (L2TP_SKB_CB(skb)->ns != session->nr) {
 				session->stats.rx_seq_discards++;
 				PRINTK(session->debug, L2TP_MSG_SEQ, KERN_DEBUG,
-				       "%s: oos pkt %hu len %d discarded, "
-				       "waiting for %hu, reorder_q_len=%d\n",
+				       "%s: oos pkt %u len %d discarded, "
+				       "waiting for %u, reorder_q_len=%d\n",
 				       session->name, L2TP_SKB_CB(skb)->ns,
 				       L2TP_SKB_CB(skb)->length, session->nr,
 				       skb_queue_len(&session->reorder_q));
@@ -586,7 +649,7 @@ int l2tp_udp_recv_core(struct l2tp_tunnel *tunnel, struct sk_buff *skb,
 
 	l2tp_session_dec_refcount(session);
 
-	return 0;
+	return;
 
 discard:
 	session->stats.rx_errors++;
@@ -596,6 +659,111 @@ discard:
 		(*session->deref)(session);
 
 	l2tp_session_dec_refcount(session);
+}
+EXPORT_SYMBOL(l2tp_recv_common);
+
+/* Internal UDP receive frame. Do the real work of receiving an L2TP data frame
+ * here. The skb is not on a list when we get here.
+ * Returns 0 if the packet was a data packet and was successfully passed on.
+ * Returns 1 if the packet was not a good data packet and could not be
+ * forwarded.  All such packets are passed up to userspace to deal with.
+ */
+int l2tp_udp_recv_core(struct l2tp_tunnel *tunnel, struct sk_buff *skb,
+		       int (*payload_hook)(struct sk_buff *skb))
+{
+	struct l2tp_session *session = NULL;
+	unsigned char *ptr, *optr;
+	u16 hdrflags;
+	u32 tunnel_id, session_id;
+	int offset;
+	u16 version;
+	int length;
+
+	if (tunnel->sock && l2tp_verify_udp_checksum(tunnel->sock, skb))
+		goto discard_bad_csum;
+
+	/* UDP always verifies the packet length. */
+	__skb_pull(skb, sizeof(struct udphdr));
+
+	/* Short packet? */
+	if (!pskb_may_pull(skb, L2TP_HDR_SIZE_SEQ)) {
+		PRINTK(tunnel->debug, L2TP_MSG_DATA, KERN_INFO,
+		       "%s: recv short packet (len=%d)\n", tunnel->name, skb->len);
+		goto error;
+	}
+
+	/* Point to L2TP header */
+	optr = ptr = skb->data;
+
+	/* Trace packet contents, if enabled */
+	if (tunnel->debug & L2TP_MSG_DATA) {
+		length = min(32u, skb->len);
+		if (!pskb_may_pull(skb, length))
+			goto error;
+
+		printk(KERN_DEBUG "%s: recv: ", tunnel->name);
+
+		offset = 0;
+		do {
+			printk(" %02X", ptr[offset]);
+		} while (++offset < length);
+
+		printk("\n");
+	}
+
+	/* Get L2TP header flags */
+	hdrflags = ntohs(*(__be16 *) ptr);
+
+	/* Check protocol version */
+	version = hdrflags & L2TP_HDR_VER_MASK;
+	if (version != tunnel->version) {
+		PRINTK(tunnel->debug, L2TP_MSG_DATA, KERN_INFO,
+		       "%s: recv protocol version mismatch: got %d expected %d\n",
+		       tunnel->name, version, tunnel->version);
+		goto error;
+	}
+
+	/* Get length of L2TP packet */
+	length = skb->len;
+
+	/* If type is control packet, it is handled by userspace. */
+	if (hdrflags & L2TP_HDRFLAG_T) {
+		PRINTK(tunnel->debug, L2TP_MSG_DATA, KERN_DEBUG,
+		       "%s: recv control packet, len=%d\n", tunnel->name, length);
+		goto error;
+	}
+
+	/* Skip flags */
+	ptr += 2;
+
+	if (tunnel->version == L2TP_HDR_VER_2) {
+		/* If length is present, skip it */
+		if (hdrflags & L2TP_HDRFLAG_L)
+			ptr += 2;
+
+		/* Extract tunnel and session ID */
+		tunnel_id = ntohs(*(__be16 *) ptr);
+		ptr += 2;
+		session_id = ntohs(*(__be16 *) ptr);
+		ptr += 2;
+	} else {
+		ptr += 2;	/* skip reserved bits */
+		tunnel_id = tunnel->tunnel_id;
+		session_id = ntohl(*(__be32 *) ptr);
+		ptr += 4;
+	}
+
+	/* Find the session context */
+	session = l2tp_session_find(tunnel->l2tp_net, tunnel, session_id);
+	if (!session) {
+		/* Not found? Pass to userspace to deal with */
+		PRINTK(tunnel->debug, L2TP_MSG_DATA, KERN_INFO,
+		       "%s: no session found (%u/%u). Passing up.\n",
+		       tunnel->name, tunnel_id, session_id);
+		goto error;
+	}
+
+	l2tp_recv_common(session, skb, ptr, optr, hdrflags, length, payload_hook);
 
 	return 0;
 
@@ -651,11 +819,11 @@ EXPORT_SYMBOL_GPL(l2tp_udp_encap_recv);
 
 /* Build an L2TP header for the session into the buffer provided.
  */
-static void l2tp_build_l2tpv2_header(struct l2tp_tunnel *tunnel,
-				     struct l2tp_session *session,
-				     void *buf)
+static int l2tp_build_l2tpv2_header(struct l2tp_session *session, void *buf)
 {
+	struct l2tp_tunnel *tunnel = session->tunnel;
 	__be16 *bufp = buf;
+	__be16 *optr = buf;
 	u16 flags = L2TP_HDR_VER_2;
 	u32 tunnel_id = tunnel->peer_tunnel_id;
 	u32 session_id = session->peer_session_id;
@@ -671,19 +839,51 @@ static void l2tp_build_l2tpv2_header(struct l2tp_tunnel *tunnel,
 		*bufp++ = htons(session->ns);
 		*bufp++ = 0;
 		session->ns++;
+		session->ns &= 0xffff;
 		PRINTK(session->debug, L2TP_MSG_SEQ, KERN_DEBUG,
-		       "%s: updated ns to %hu\n", session->name, session->ns);
+		       "%s: updated ns to %u\n", session->name, session->ns);
 	}
+
+	return bufp - optr;
 }
 
-void l2tp_build_l2tp_header(struct l2tp_session *session, void *buf)
+static int l2tp_build_l2tpv3_header(struct l2tp_session *session, void *buf)
 {
-	struct l2tp_tunnel *tunnel = session->tunnel;
+	char *bufp = buf;
+	char *optr = bufp;
+	u16 flags = L2TP_HDR_VER_3;
+
+	/* Setup L2TP header. */
+	*((__be16 *) bufp) = htons(flags);
+	bufp += 2;
+	*((__be16 *) bufp) = 0;
+	bufp += 2;
+	*((__be32 *) bufp) = htonl(session->peer_session_id);
+	bufp += 4;
+	if (session->cookie_len) {
+		memcpy(bufp, &session->cookie[0], session->cookie_len);
+		bufp += session->cookie_len;
+	}
+	if (session->l2specific_len) {
+		if (session->l2specific_type == L2TP_L2SPECTYPE_DEFAULT) {
+			u32 l2h = 0;
+			if (session->send_seq) {
+				l2h = 0x40000000 | session->ns;
+				session->ns++;
+				session->ns &= 0xffffff;
+				PRINTK(session->debug, L2TP_MSG_SEQ, KERN_DEBUG,
+				       "%s: updated ns to %u\n", session->name, session->ns);
+			}
+
+			*((__be32 *) bufp) = htonl(l2h);
+		}
+		bufp += session->l2specific_len;
+	}
+	if (session->offset)
+		bufp += session->offset;
 
-	BUG_ON(tunnel->version != L2TP_HDR_VER_2);
-	l2tp_build_l2tpv2_header(tunnel, session, buf);
+	return bufp - optr;
 }
-EXPORT_SYMBOL_GPL(l2tp_build_l2tp_header);
 
 int l2tp_xmit_core(struct l2tp_session *session, struct sk_buff *skb, size_t data_len)
 {
@@ -694,7 +894,7 @@ int l2tp_xmit_core(struct l2tp_session *session, struct sk_buff *skb, size_t dat
 	/* Debug */
 	if (session->send_seq)
 		PRINTK(session->debug, L2TP_MSG_DATA, KERN_DEBUG,
-		       "%s: send %Zd bytes, ns=%hu\n", session->name,
+		       "%s: send %Zd bytes, ns=%u\n", session->name,
 		       data_len, session->ns - 1);
 	else
 		PRINTK(session->debug, L2TP_MSG_DATA, KERN_DEBUG,
@@ -780,7 +980,7 @@ int l2tp_xmit_skb(struct l2tp_session *session, struct sk_buff *skb, int hdr_len
 	skb->truesize += new_headroom - old_headroom;
 
 	/* Setup L2TP header */
-	l2tp_build_l2tp_header(session, __skb_push(skb, hdr_len));
+	session->build_header(session, __skb_push(skb, hdr_len));
 	udp_len = sizeof(struct udphdr) + hdr_len + data_len;
 
 	/* Setup UDP header */
@@ -791,7 +991,6 @@ int l2tp_xmit_skb(struct l2tp_session *session, struct sk_buff *skb, int hdr_len
 	uh->source = inet->inet_sport;
 	uh->dest = inet->inet_dport;
 	uh->len = htons(udp_len);
-
 	uh->check = 0;
 
 	memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
@@ -911,6 +1110,14 @@ again:
 
 			write_unlock_bh(&tunnel->hlist_lock);
 
+			if (tunnel->version != L2TP_HDR_VER_2) {
+				struct l2tp_net *pn = l2tp_pernet(tunnel->l2tp_net);
+
+				write_lock_bh(&pn->l2tp_session_hlist_lock);
+				hlist_del_init(&session->global_hlist);
+				write_unlock_bh(&pn->l2tp_session_hlist_lock);
+			}
+
 			if (session->session_close != NULL)
 				(*session->session_close)(session);
 
@@ -997,9 +1204,6 @@ int l2tp_tunnel_create(struct net *net, int fd, int version, u32 tunnel_id, u32
 		goto err;
 	}
 
-	if (version != L2TP_HDR_VER_2)
-		goto err;
-
 	tunnel = kzalloc(sizeof(struct l2tp_tunnel), GFP_KERNEL);
 	if (tunnel == NULL) {
 		err = -ENOMEM;
@@ -1077,6 +1281,15 @@ void l2tp_session_free(struct l2tp_session *session)
 		hlist_del_init(&session->hlist);
 		write_unlock_bh(&tunnel->hlist_lock);
 
+		/* Unlink from the global hash if not L2TPv2 */
+		if (tunnel->version != L2TP_HDR_VER_2) {
+			struct l2tp_net *pn = l2tp_pernet(tunnel->l2tp_net);
+
+			write_lock_bh(&pn->l2tp_session_hlist_lock);
+			hlist_del_init(&session->global_hlist);
+			write_unlock_bh(&pn->l2tp_session_hlist_lock);
+		}
+
 		if (session->session_id != 0)
 			atomic_dec(&l2tp_session_count);
 
@@ -1095,6 +1308,22 @@ void l2tp_session_free(struct l2tp_session *session)
 }
 EXPORT_SYMBOL_GPL(l2tp_session_free);
 
+/* We come here whenever a session's send_seq, cookie_len or
+ * l2specific_len parameters are set.
+ */
+void l2tp_session_set_header_len(struct l2tp_session *session, int version)
+{
+	if (version == L2TP_HDR_VER_2) {
+		session->hdr_len = 6;
+		if (session->send_seq)
+			session->hdr_len += 4;
+	} else {
+		session->hdr_len = 8 + session->cookie_len + session->l2specific_len + session->offset;
+	}
+
+}
+EXPORT_SYMBOL_GPL(l2tp_session_set_header_len);
+
 struct l2tp_session *l2tp_session_create(int priv_size, struct l2tp_tunnel *tunnel, u32 session_id, u32 peer_session_id, struct l2tp_session_cfg *cfg)
 {
 	struct l2tp_session *session;
@@ -1106,6 +1335,7 @@ struct l2tp_session *l2tp_session_create(int priv_size, struct l2tp_tunnel *tunn
 
 		session->session_id = session_id;
 		session->peer_session_id = peer_session_id;
+		session->nr = 1;
 
 		sprintf(&session->name[0], "sess %u/%u",
 			tunnel->tunnel_id, session->session_id);
@@ -1113,20 +1343,36 @@ struct l2tp_session *l2tp_session_create(int priv_size, struct l2tp_tunnel *tunn
 		skb_queue_head_init(&session->reorder_q);
 
 		INIT_HLIST_NODE(&session->hlist);
+		INIT_HLIST_NODE(&session->global_hlist);
 
 		/* Inherit debug options from tunnel */
 		session->debug = tunnel->debug;
 
 		if (cfg) {
+			session->pwtype = cfg->pw_type;
 			session->debug = cfg->debug;
-			session->hdr_len = cfg->hdr_len;
 			session->mtu = cfg->mtu;
 			session->mru = cfg->mru;
 			session->send_seq = cfg->send_seq;
 			session->recv_seq = cfg->recv_seq;
 			session->lns_mode = cfg->lns_mode;
+			session->reorder_timeout = cfg->reorder_timeout;
+			session->offset = cfg->offset;
+			session->l2specific_type = cfg->l2specific_type;
+			session->l2specific_len = cfg->l2specific_len;
+			session->cookie_len = cfg->cookie_len;
+			memcpy(&session->cookie[0], &cfg->cookie[0], cfg->cookie_len);
+			session->peer_cookie_len = cfg->peer_cookie_len;
+			memcpy(&session->peer_cookie[0], &cfg->peer_cookie[0], cfg->peer_cookie_len);
 		}
 
+		if (tunnel->version == L2TP_HDR_VER_2)
+			session->build_header = l2tp_build_l2tpv2_header;
+		else
+			session->build_header = l2tp_build_l2tpv3_header;
+
+		l2tp_session_set_header_len(session, tunnel->version);
+
 		/* Bump the reference count. The session context is deleted
 		 * only when this drops to zero.
 		 */
@@ -1142,6 +1388,16 @@ struct l2tp_session *l2tp_session_create(int priv_size, struct l2tp_tunnel *tunn
 			       l2tp_session_id_hash(tunnel, session_id));
 		write_unlock_bh(&tunnel->hlist_lock);
 
+		/* And to the global session list if L2TPv3 */
+		if (tunnel->version != L2TP_HDR_VER_2) {
+			struct l2tp_net *pn = l2tp_pernet(tunnel->l2tp_net);
+
+			write_lock_bh(&pn->l2tp_session_hlist_lock);
+			hlist_add_head(&session->global_hlist,
+				       l2tp_session_id_hash_2(pn, session_id));
+			write_unlock_bh(&pn->l2tp_session_hlist_lock);
+		}
+
 		/* Ignore management session in session count value */
 		if (session->session_id != 0)
 			atomic_inc(&l2tp_session_count);
@@ -1159,6 +1415,7 @@ static __net_init int l2tp_init_net(struct net *net)
 {
 	struct l2tp_net *pn;
 	int err;
+	int hash;
 
 	pn = kzalloc(sizeof(*pn), GFP_KERNEL);
 	if (!pn)
@@ -1167,6 +1424,11 @@ static __net_init int l2tp_init_net(struct net *net)
 	INIT_LIST_HEAD(&pn->l2tp_tunnel_list);
 	rwlock_init(&pn->l2tp_tunnel_list_lock);
 
+	for (hash = 0; hash < L2TP_HASH_SIZE_2; hash++)
+		INIT_HLIST_HEAD(&pn->l2tp_session_hlist[hash]);
+
+	rwlock_init(&pn->l2tp_session_hlist_lock);
+
 	err = net_assign_generic(net, l2tp_net_id, pn);
 	if (err)
 		goto out;
diff --git a/net/l2tp/l2tp_core.h b/net/l2tp/l2tp_core.h
index 2efe1a3ada98..5c53eb2a8ad9 100644
--- a/net/l2tp/l2tp_core.h
+++ b/net/l2tp/l2tp_core.h
@@ -15,9 +15,14 @@
 #define L2TP_TUNNEL_MAGIC	0x42114DDA
 #define L2TP_SESSION_MAGIC	0x0C04EB7D
 
+/* Per tunnel, session hash table size */
 #define L2TP_HASH_BITS	4
 #define L2TP_HASH_SIZE	(1 << L2TP_HASH_BITS)
 
+/* System-wide, session hash table size */
+#define L2TP_HASH_BITS_2	8
+#define L2TP_HASH_SIZE_2	(1 << L2TP_HASH_BITS_2)
+
 /* Debug message categories for the DEBUG socket option */
 enum {
 	L2TP_MSG_DEBUG		= (1 << 0),	/* verbose debug (if
@@ -28,6 +33,21 @@ enum {
 	L2TP_MSG_DATA		= (1 << 3),	/* data packets */
 };
 
+enum l2tp_pwtype {
+	L2TP_PWTYPE_NONE = 0x0000,
+	L2TP_PWTYPE_ETH_VLAN = 0x0004,
+	L2TP_PWTYPE_ETH = 0x0005,
+	L2TP_PWTYPE_PPP = 0x0007,
+	L2TP_PWTYPE_PPP_AC = 0x0008,
+	L2TP_PWTYPE_IP = 0x000b,
+	__L2TP_PWTYPE_MAX
+};
+
+enum l2tp_l2spec_type {
+	L2TP_L2SPECTYPE_NONE,
+	L2TP_L2SPECTYPE_DEFAULT,
+};
+
 struct sk_buff;
 
 struct l2tp_stats {
@@ -39,6 +59,7 @@ struct l2tp_stats {
 	u64			rx_seq_discards;
 	u64			rx_oos_packets;
 	u64			rx_errors;
+	u64			rx_cookie_discards;
 };
 
 struct l2tp_tunnel;
@@ -47,6 +68,7 @@ struct l2tp_tunnel;
  * packets and transmit outgoing ones.
  */
 struct l2tp_session_cfg {
+	enum l2tp_pwtype	pw_type;
 	unsigned		data_seq:2;	/* data sequencing level
 						 * 0 => none, 1 => IP only,
 						 * 2 => all
@@ -60,12 +82,17 @@ struct l2tp_session_cfg {
 						 * control of LNS. */
 	int			debug;		/* bitmask of debug message
 						 * categories */
-	int			offset;		/* offset to payload */
+	u16			offset;		/* offset to payload */
+	u16			l2specific_len;	/* Layer 2 specific length */
+	u16			l2specific_type; /* Layer 2 specific type */
+	u8			cookie[8];	/* optional cookie */
+	int			cookie_len;	/* 0, 4 or 8 bytes */
+	u8			peer_cookie[8];	/* peer's cookie */
+	int			peer_cookie_len; /* 0, 4 or 8 bytes */
 	int			reorder_timeout; /* configured reorder timeout
 						  * (in jiffies) */
 	int			mtu;
 	int			mru;
-	int			hdr_len;
 };
 
 struct l2tp_session {
@@ -76,8 +103,17 @@ struct l2tp_session {
 						 * context */
 	u32			session_id;
 	u32			peer_session_id;
-	u16			nr;		/* session NR state (receive) */
-	u16			ns;		/* session NR state (send) */
+	u8			cookie[8];
+	int			cookie_len;
+	u8			peer_cookie[8];
+	int			peer_cookie_len;
+	u16			offset;		/* offset from end of L2TP header
+						   to beginning of data */
+	u16			l2specific_len;
+	u16			l2specific_type;
+	u16			hdr_len;
+	u32			nr;		/* session NR state (receive) */
+	u32			ns;		/* session NR state (send) */
 	struct sk_buff_head	reorder_q;	/* receive reorder queue */
 	struct hlist_node	hlist;		/* Hash list node */
 	atomic_t		ref_count;
@@ -100,9 +136,11 @@ struct l2tp_session {
 						  * (in jiffies) */
 	int			mtu;
 	int			mru;
-	int			hdr_len;
+	enum l2tp_pwtype	pwtype;
 	struct l2tp_stats	stats;
+	struct hlist_node	global_hlist;	/* Global hash list node */
 
+	int (*build_header)(struct l2tp_session *session, void *buf);
 	void (*recv_skb)(struct l2tp_session *session, struct sk_buff *skb, int data_len);
 	void (*session_close)(struct l2tp_session *session);
 	void (*ref)(struct l2tp_session *session);
@@ -132,7 +170,6 @@ struct l2tp_tunnel {
 	char			name[20];	/* for logging */
 	int			debug;		/* bitmask of debug message
 						 * categories */
-	int			hdr_len;
 	struct l2tp_stats	stats;
 
 	struct list_head	list;		/* Keep a list of all tunnels */
@@ -178,7 +215,7 @@ out:
 	return tunnel;
 }
 
-extern struct l2tp_session *l2tp_session_find(struct l2tp_tunnel *tunnel, u32 session_id);
+extern struct l2tp_session *l2tp_session_find(struct net *net, struct l2tp_tunnel *tunnel, u32 session_id);
 extern struct l2tp_session *l2tp_session_find_nth(struct l2tp_tunnel *tunnel, int nth);
 extern struct l2tp_tunnel *l2tp_tunnel_find(struct net *net, u32 tunnel_id);
 extern struct l2tp_tunnel *l2tp_tunnel_find_nth(struct net *net, int nth);
@@ -187,14 +224,15 @@ extern int l2tp_tunnel_create(struct net *net, int fd, int version, u32 tunnel_i
 extern struct l2tp_session *l2tp_session_create(int priv_size, struct l2tp_tunnel *tunnel, u32 session_id, u32 peer_session_id, struct l2tp_session_cfg *cfg);
 extern void l2tp_tunnel_free(struct l2tp_tunnel *tunnel);
 extern void l2tp_session_free(struct l2tp_session *session);
+extern void l2tp_recv_common(struct l2tp_session *session, struct sk_buff *skb, unsigned char *ptr, unsigned char *optr, u16 hdrflags, int length, int (*payload_hook)(struct sk_buff *skb));
 extern int l2tp_udp_recv_core(struct l2tp_tunnel *tunnel, struct sk_buff *skb, int (*payload_hook)(struct sk_buff *skb));
 extern int l2tp_udp_encap_recv(struct sock *sk, struct sk_buff *skb);
 
-extern void l2tp_build_l2tp_header(struct l2tp_session *session, void *buf);
 extern int l2tp_xmit_core(struct l2tp_session *session, struct sk_buff *skb, size_t data_len);
 extern int l2tp_xmit_skb(struct l2tp_session *session, struct sk_buff *skb, int hdr_len);
 extern void l2tp_tunnel_destruct(struct sock *sk);
 extern void l2tp_tunnel_closeall(struct l2tp_tunnel *tunnel);
+extern void l2tp_session_set_header_len(struct l2tp_session *session, int version);
 
 /* Tunnel reference counts. Incremented per session that is added to
  * the tunnel.
diff --git a/net/l2tp/l2tp_ppp.c b/net/l2tp/l2tp_ppp.c
index 3ad290dd830a..bee5b1413ec0 100644
--- a/net/l2tp/l2tp_ppp.c
+++ b/net/l2tp/l2tp_ppp.c
@@ -670,7 +670,7 @@ static int pppol2tp_connect(struct socket *sock, struct sockaddr *uservaddr,
 
 	/* Check that this session doesn't already exist */
 	error = -EEXIST;
-	session = l2tp_session_find(tunnel, sp->pppol2tp.s_session);
+	session = l2tp_session_find(sock_net(sk), tunnel, sp->pppol2tp.s_session);
 	if (session != NULL)
 		goto end;
 
@@ -678,7 +678,6 @@ static int pppol2tp_connect(struct socket *sock, struct sockaddr *uservaddr,
 	 * headers.
 	 */
 	cfg.mtu = cfg.mru = 1500 - PPPOL2TP_HEADER_OVERHEAD;
-	cfg.hdr_len = PPPOL2TP_L2TP_HDR_SIZE_NOSEQ;
 	cfg.debug = tunnel->debug;
 
 	/* Allocate and initialize a new session context. */
@@ -999,7 +998,7 @@ static int pppol2tp_tunnel_ioctl(struct l2tp_tunnel *tunnel,
 		if (stats.session_id != 0) {
 			/* resend to session ioctl handler */
 			struct l2tp_session *session =
-				l2tp_session_find(tunnel, stats.session_id);
+				l2tp_session_find(sock_net(sk), tunnel, stats.session_id);
 			if (session != NULL)
 				err = pppol2tp_session_ioctl(session, cmd, arg);
 			else
@@ -1375,6 +1374,8 @@ end:
 
 /*****************************************************************************
  * /proc filesystem for debug
+ * Since the original pppol2tp driver provided /proc/net/pppol2tp for
+ * L2TPv2, we dump only L2TPv2 tunnels and sessions here.
  *****************************************************************************/
 
 static unsigned int pppol2tp_net_id;
@@ -1391,14 +1392,24 @@ struct pppol2tp_seq_data {
 
 static void pppol2tp_next_tunnel(struct net *net, struct pppol2tp_seq_data *pd)
 {
-	pd->tunnel = l2tp_tunnel_find_nth(net, pd->tunnel_idx);
-	pd->tunnel_idx++;
+	for (;;) {
+		pd->tunnel = l2tp_tunnel_find_nth(net, pd->tunnel_idx);
+		pd->tunnel_idx++;
+
+		if (pd->tunnel == NULL)
+			break;
+
+		/* Ignore L2TPv3 tunnels */
+		if (pd->tunnel->version < 3)
+			break;
+	}
 }
 
 static void pppol2tp_next_session(struct net *net, struct pppol2tp_seq_data *pd)
 {
 	pd->session = l2tp_session_find_nth(pd->tunnel, pd->session_idx);
 	pd->session_idx++;
+
 	if (pd->session == NULL) {
 		pd->session_idx = 0;
 		pppol2tp_next_tunnel(net, pd);
-- 
cgit v1.2.2


From e0d4435f93905f517003cfa7328a36ea19788147 Mon Sep 17 00:00:00 2001
From: James Chapman <jchapman@katalix.com>
Date: Fri, 2 Apr 2010 06:18:54 +0000
Subject: l2tp: Update PPP-over-L2TP driver to work over L2TPv3

This patch makes changes to the L2TP PPP code for L2TPv3.

The existing code has some assumptions about the L2TP header which are
broken by L2TPv3. Also the sockaddr_pppol2tp structure of the original
code is too small to support the increased size of the L2TPv3 tunnel
and session id, so a new sockaddr_pppol2tpv3 structure is needed. In
the socket calls, the size of this structure is used to tell if the
operation is for L2TPv2 or L2TPv3.

Signed-off-by: James Chapman <jchapman@katalix.com>
Reviewed-by: Randy Dunlap <randy.dunlap@oracle.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/l2tp/l2tp_ppp.c | 120 ++++++++++++++++++++++++++++++++--------------------
 1 file changed, 74 insertions(+), 46 deletions(-)

(limited to 'net')

diff --git a/net/l2tp/l2tp_ppp.c b/net/l2tp/l2tp_ppp.c
index bee5b1413ec0..e5b531266541 100644
--- a/net/l2tp/l2tp_ppp.c
+++ b/net/l2tp/l2tp_ppp.c
@@ -291,17 +291,6 @@ static void pppol2tp_session_sock_put(struct l2tp_session *session)
  * Transmit handling
  ***********************************************************************/
 
-/* Tell how big L2TP headers are for a particular session. This
- * depends on whether sequence numbers are being used.
- */
-static inline int pppol2tp_l2tp_header_len(struct l2tp_session *session)
-{
-	if (session->send_seq)
-		return PPPOL2TP_L2TP_HDR_SIZE_SEQ;
-
-	return PPPOL2TP_L2TP_HDR_SIZE_NOSEQ;
-}
-
 /* This is the sendmsg for the PPPoL2TP pppol2tp_session socket.  We come here
  * when a user application does a sendmsg() on the session socket. L2TP and
  * PPP headers must be inserted into the user's data.
@@ -394,7 +383,6 @@ static int pppol2tp_xmit(struct ppp_channel *chan, struct sk_buff *skb)
 	static const u8 ppph[2] = { 0xff, 0x03 };
 	struct sock *sk = (struct sock *) chan->private;
 	struct sock *sk_tun;
-	int hdr_len;
 	struct l2tp_session *session;
 	struct l2tp_tunnel *tunnel;
 	struct pppol2tp_session *ps;
@@ -417,9 +405,6 @@ static int pppol2tp_xmit(struct ppp_channel *chan, struct sk_buff *skb)
 	if (tunnel == NULL)
 		goto abort_put_sess;
 
-	/* What header length is configured for this session? */
-	hdr_len = pppol2tp_l2tp_header_len(session);
-
 	old_headroom = skb_headroom(skb);
 	if (skb_cow_head(skb, sizeof(ppph)))
 		goto abort_put_sess_tun;
@@ -432,7 +417,7 @@ static int pppol2tp_xmit(struct ppp_channel *chan, struct sk_buff *skb)
 	skb->data[0] = ppph[0];
 	skb->data[1] = ppph[1];
 
-	l2tp_xmit_skb(session, skb, hdr_len);
+	l2tp_xmit_skb(session, skb, session->hdr_len);
 
 	sock_put(sk_tun);
 	sock_put(sk);
@@ -615,6 +600,7 @@ static int pppol2tp_connect(struct socket *sock, struct sockaddr *uservaddr,
 {
 	struct sock *sk = sock->sk;
 	struct sockaddr_pppol2tp *sp = (struct sockaddr_pppol2tp *) uservaddr;
+	struct sockaddr_pppol2tpv3 *sp3 = (struct sockaddr_pppol2tpv3 *) uservaddr;
 	struct pppox_sock *po = pppox_sk(sk);
 	struct l2tp_session *session = NULL;
 	struct l2tp_tunnel *tunnel;
@@ -622,6 +608,10 @@ static int pppol2tp_connect(struct socket *sock, struct sockaddr *uservaddr,
 	struct dst_entry *dst;
 	struct l2tp_session_cfg cfg = { 0, };
 	int error = 0;
+	u32 tunnel_id, peer_tunnel_id;
+	u32 session_id, peer_session_id;
+	int ver = 2;
+	int fd;
 
 	lock_sock(sk);
 
@@ -639,21 +629,40 @@ static int pppol2tp_connect(struct socket *sock, struct sockaddr *uservaddr,
 	if (sk->sk_user_data)
 		goto end; /* socket is already attached */
 
-	/* Don't bind if s_tunnel is 0 */
+	/* Get params from socket address. Handle L2TPv2 and L2TPv3 */
+	if (sockaddr_len == sizeof(struct sockaddr_pppol2tp)) {
+		fd = sp->pppol2tp.fd;
+		tunnel_id = sp->pppol2tp.s_tunnel;
+		peer_tunnel_id = sp->pppol2tp.d_tunnel;
+		session_id = sp->pppol2tp.s_session;
+		peer_session_id = sp->pppol2tp.d_session;
+	} else if (sockaddr_len == sizeof(struct sockaddr_pppol2tpv3)) {
+		ver = 3;
+		fd = sp3->pppol2tp.fd;
+		tunnel_id = sp3->pppol2tp.s_tunnel;
+		peer_tunnel_id = sp3->pppol2tp.d_tunnel;
+		session_id = sp3->pppol2tp.s_session;
+		peer_session_id = sp3->pppol2tp.d_session;
+	} else {
+		error = -EINVAL;
+		goto end; /* bad socket address */
+	}
+
+	/* Don't bind if tunnel_id is 0 */
 	error = -EINVAL;
-	if (sp->pppol2tp.s_tunnel == 0)
+	if (tunnel_id == 0)
 		goto end;
 
-	/* Special case: create tunnel context if s_session and
-	 * d_session is 0. Otherwise look up tunnel using supplied
+	/* Special case: create tunnel context if session_id and
+	 * peer_session_id is 0. Otherwise look up tunnel using supplied
 	 * tunnel id.
 	 */
-	if ((sp->pppol2tp.s_session == 0) && (sp->pppol2tp.d_session == 0)) {
-		error = l2tp_tunnel_create(sock_net(sk), sp->pppol2tp.fd, 2, sp->pppol2tp.s_tunnel, sp->pppol2tp.d_tunnel, NULL, &tunnel);
+	if ((session_id == 0) && (peer_session_id == 0)) {
+		error = l2tp_tunnel_create(sock_net(sk), fd, ver, tunnel_id, peer_tunnel_id, NULL, &tunnel);
 		if (error < 0)
 			goto end;
 	} else {
-		tunnel = l2tp_tunnel_find(sock_net(sk), sp->pppol2tp.s_tunnel);
+		tunnel = l2tp_tunnel_find(sock_net(sk), tunnel_id);
 
 		/* Error if we can't find the tunnel */
 		error = -ENOENT;
@@ -670,20 +679,21 @@ static int pppol2tp_connect(struct socket *sock, struct sockaddr *uservaddr,
 
 	/* Check that this session doesn't already exist */
 	error = -EEXIST;
-	session = l2tp_session_find(sock_net(sk), tunnel, sp->pppol2tp.s_session);
+	session = l2tp_session_find(sock_net(sk), tunnel, session_id);
 	if (session != NULL)
 		goto end;
 
-	/* Default MTU must allow space for UDP/L2TP/PPP
-	 * headers.
-	 */
-	cfg.mtu = cfg.mru = 1500 - PPPOL2TP_HEADER_OVERHEAD;
+	/* Default MTU values. */
+	if (cfg.mtu == 0)
+		cfg.mtu = 1500 - PPPOL2TP_HEADER_OVERHEAD;
+	if (cfg.mru == 0)
+		cfg.mru = cfg.mtu;
 	cfg.debug = tunnel->debug;
 
 	/* Allocate and initialize a new session context. */
 	session = l2tp_session_create(sizeof(struct pppol2tp_session),
-				      tunnel, sp->pppol2tp.s_session,
-				      sp->pppol2tp.d_session, &cfg);
+				      tunnel, session_id,
+				      peer_session_id, &cfg);
 	if (session == NULL) {
 		error = -ENOMEM;
 		goto end;
@@ -756,8 +766,7 @@ end:
 static int pppol2tp_getname(struct socket *sock, struct sockaddr *uaddr,
 			    int *usockaddr_len, int peer)
 {
-	int len = sizeof(struct sockaddr_pppol2tp);
-	struct sockaddr_pppol2tp sp;
+	int len = 0;
 	int error = 0;
 	struct l2tp_session *session;
 	struct l2tp_tunnel *tunnel;
@@ -783,21 +792,40 @@ static int pppol2tp_getname(struct socket *sock, struct sockaddr *uaddr,
 		goto end_put_sess;
 	}
 
-	memset(&sp, 0, len);
-	sp.sa_family	= AF_PPPOX;
-	sp.sa_protocol	= PX_PROTO_OL2TP;
-	sp.pppol2tp.fd  = tunnel->fd;
-	sp.pppol2tp.pid = pls->owner;
-	sp.pppol2tp.s_tunnel = tunnel->tunnel_id;
-	sp.pppol2tp.d_tunnel = tunnel->peer_tunnel_id;
-	sp.pppol2tp.s_session = session->session_id;
-	sp.pppol2tp.d_session = session->peer_session_id;
 	inet = inet_sk(sk);
-	sp.pppol2tp.addr.sin_family = AF_INET;
-	sp.pppol2tp.addr.sin_port = inet->inet_dport;
-	sp.pppol2tp.addr.sin_addr.s_addr = inet->inet_daddr;
-
-	memcpy(uaddr, &sp, len);
+	if (tunnel->version == 2) {
+		struct sockaddr_pppol2tp sp;
+		len = sizeof(sp);
+		memset(&sp, 0, len);
+		sp.sa_family	= AF_PPPOX;
+		sp.sa_protocol	= PX_PROTO_OL2TP;
+		sp.pppol2tp.fd  = tunnel->fd;
+		sp.pppol2tp.pid = pls->owner;
+		sp.pppol2tp.s_tunnel = tunnel->tunnel_id;
+		sp.pppol2tp.d_tunnel = tunnel->peer_tunnel_id;
+		sp.pppol2tp.s_session = session->session_id;
+		sp.pppol2tp.d_session = session->peer_session_id;
+		sp.pppol2tp.addr.sin_family = AF_INET;
+		sp.pppol2tp.addr.sin_port = inet->inet_dport;
+		sp.pppol2tp.addr.sin_addr.s_addr = inet->inet_daddr;
+		memcpy(uaddr, &sp, len);
+	} else if (tunnel->version == 3) {
+		struct sockaddr_pppol2tpv3 sp;
+		len = sizeof(sp);
+		memset(&sp, 0, len);
+		sp.sa_family	= AF_PPPOX;
+		sp.sa_protocol	= PX_PROTO_OL2TP;
+		sp.pppol2tp.fd  = tunnel->fd;
+		sp.pppol2tp.pid = pls->owner;
+		sp.pppol2tp.s_tunnel = tunnel->tunnel_id;
+		sp.pppol2tp.d_tunnel = tunnel->peer_tunnel_id;
+		sp.pppol2tp.s_session = session->session_id;
+		sp.pppol2tp.d_session = session->peer_session_id;
+		sp.pppol2tp.addr.sin_family = AF_INET;
+		sp.pppol2tp.addr.sin_port = inet->inet_dport;
+		sp.pppol2tp.addr.sin_addr.s_addr = inet->inet_daddr;
+		memcpy(uaddr, &sp, len);
+	}
 
 	*usockaddr_len = len;
 
-- 
cgit v1.2.2


From 0d76751fad7739014485ba5bd388d4f1b4fd4143 Mon Sep 17 00:00:00 2001
From: James Chapman <jchapman@katalix.com>
Date: Fri, 2 Apr 2010 06:19:00 +0000
Subject: l2tp: Add L2TPv3 IP encapsulation (no UDP) support

This patch adds a new L2TPIP socket family and modifies the core to
handle the case where there is no UDP header in the L2TP
packet. L2TP/IP uses IP protocol 115. Since L2TP/UDP and L2TP/IP
packets differ in layout, the datapath packet handling code needs
changes too. Userspace uses an L2TPIP socket instead of a UDP socket
when IP encapsulation is required.

We can't use raw sockets for this because the semantics of raw sockets
don't lend themselves to the socket-per-tunnel model - we need to

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/l2tp/Kconfig     |  17 ++
 net/l2tp/Makefile    |   1 +
 net/l2tp/l2tp_core.c | 163 ++++++++-----
 net/l2tp/l2tp_core.h |   7 +
 net/l2tp/l2tp_ip.c   | 679 +++++++++++++++++++++++++++++++++++++++++++++++++++
 net/l2tp/l2tp_ppp.c  |   7 +-
 6 files changed, 812 insertions(+), 62 deletions(-)
 create mode 100644 net/l2tp/l2tp_ip.c

(limited to 'net')

diff --git a/net/l2tp/Kconfig b/net/l2tp/Kconfig
index d60758d60478..0a11ccf2d95b 100644
--- a/net/l2tp/Kconfig
+++ b/net/l2tp/Kconfig
@@ -51,3 +51,20 @@ config L2TP_V3
 	  If you are connecting to L2TPv3 equipment, or you want to
 	  tunnel raw ethernet frames using L2TP, say Y here. If
 	  unsure, say N.
+
+config L2TP_IP
+	tristate "L2TP IP encapsulation for L2TPv3"
+	depends on L2TP_V3
+	help
+	  Support for L2TP-over-IP socket family.
+
+	  The L2TPv3 protocol defines two possible encapsulations for
+	  L2TP frames, namely UDP and plain IP (without UDP). This
+	  driver provides a new L2TPIP socket family with which
+	  userspace L2TPv3 daemons may create L2TP/IP tunnel sockets
+	  when UDP encapsulation is not required. When L2TP is carried
+	  in IP packets, it used IP protocol number 115, so this port
+	  must be enabled in firewalls.
+
+	  To compile this driver as a module, choose M here. The module
+	  will be called l2tp_ip.
diff --git a/net/l2tp/Makefile b/net/l2tp/Makefile
index c91f208b1693..ef28b16f7d6a 100644
--- a/net/l2tp/Makefile
+++ b/net/l2tp/Makefile
@@ -6,3 +6,4 @@ obj-$(CONFIG_L2TP) += l2tp_core.o
 
 # Build l2tp as modules if L2TP is M
 obj-$(subst y,$(CONFIG_L2TP),$(CONFIG_PPPOL2TP)) += l2tp_ppp.o
+obj-$(subst y,$(CONFIG_L2TP),$(CONFIG_L2TP_IP)) += l2tp_ip.o
diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c
index 0eee1a65f1b1..1739d04367e4 100644
--- a/net/l2tp/l2tp_core.c
+++ b/net/l2tp/l2tp_core.c
@@ -36,8 +36,10 @@
 #include <linux/inetdevice.h>
 #include <linux/skbuff.h>
 #include <linux/init.h>
+#include <linux/in.h>
 #include <linux/ip.h>
 #include <linux/udp.h>
+#include <linux/l2tp.h>
 #include <linux/hash.h>
 #include <linux/sort.h>
 #include <linux/file.h>
@@ -48,6 +50,7 @@
 #include <net/ip.h>
 #include <net/udp.h>
 #include <net/xfrm.h>
+#include <net/protocol.h>
 
 #include <asm/byteorder.h>
 #include <asm/atomic.h>
@@ -849,15 +852,21 @@ static int l2tp_build_l2tpv2_header(struct l2tp_session *session, void *buf)
 
 static int l2tp_build_l2tpv3_header(struct l2tp_session *session, void *buf)
 {
+	struct l2tp_tunnel *tunnel = session->tunnel;
 	char *bufp = buf;
 	char *optr = bufp;
-	u16 flags = L2TP_HDR_VER_3;
 
-	/* Setup L2TP header. */
-	*((__be16 *) bufp) = htons(flags);
-	bufp += 2;
-	*((__be16 *) bufp) = 0;
-	bufp += 2;
+	/* Setup L2TP header. The header differs slightly for UDP and
+	 * IP encapsulations. For UDP, there is 4 bytes of flags.
+	 */
+	if (tunnel->encap == L2TP_ENCAPTYPE_UDP) {
+		u16 flags = L2TP_HDR_VER_3;
+		*((__be16 *) bufp) = htons(flags);
+		bufp += 2;
+		*((__be16 *) bufp) = 0;
+		bufp += 2;
+	}
+
 	*((__be32 *) bufp) = htonl(session->peer_session_id);
 	bufp += 4;
 	if (session->cookie_len) {
@@ -902,10 +911,11 @@ int l2tp_xmit_core(struct l2tp_session *session, struct sk_buff *skb, size_t dat
 
 	if (session->debug & L2TP_MSG_DATA) {
 		int i;
-		unsigned char *datap = skb->data + sizeof(struct udphdr);
+		int uhlen = (tunnel->encap == L2TP_ENCAPTYPE_UDP) ? sizeof(struct udphdr) : 0;
+		unsigned char *datap = skb->data + uhlen;
 
 		printk(KERN_DEBUG "%s: xmit:", session->name);
-		for (i = 0; i < (len - sizeof(struct udphdr)); i++) {
+		for (i = 0; i < (len - uhlen); i++) {
 			printk(" %02X", *datap++);
 			if (i == 31) {
 				printk(" ...");
@@ -956,21 +966,23 @@ static inline void l2tp_skb_set_owner_w(struct sk_buff *skb, struct sock *sk)
 int l2tp_xmit_skb(struct l2tp_session *session, struct sk_buff *skb, int hdr_len)
 {
 	int data_len = skb->len;
-	struct sock *sk = session->tunnel->sock;
+	struct l2tp_tunnel *tunnel = session->tunnel;
+	struct sock *sk = tunnel->sock;
 	struct udphdr *uh;
-	unsigned int udp_len;
 	struct inet_sock *inet;
 	__wsum csum;
 	int old_headroom;
 	int new_headroom;
 	int headroom;
+	int uhlen = (tunnel->encap == L2TP_ENCAPTYPE_UDP) ? sizeof(struct udphdr) : 0;
+	int udp_len;
 
 	/* Check that there's enough headroom in the skb to insert IP,
 	 * UDP and L2TP headers. If not enough, expand it to
 	 * make room. Adjust truesize.
 	 */
 	headroom = NET_SKB_PAD + sizeof(struct iphdr) +
-		sizeof(struct udphdr) + hdr_len;
+		uhlen + hdr_len;
 	old_headroom = skb_headroom(skb);
 	if (skb_cow_head(skb, headroom))
 		goto abort;
@@ -981,18 +993,8 @@ int l2tp_xmit_skb(struct l2tp_session *session, struct sk_buff *skb, int hdr_len
 
 	/* Setup L2TP header */
 	session->build_header(session, __skb_push(skb, hdr_len));
-	udp_len = sizeof(struct udphdr) + hdr_len + data_len;
-
-	/* Setup UDP header */
-	inet = inet_sk(sk);
-	__skb_push(skb, sizeof(*uh));
-	skb_reset_transport_header(skb);
-	uh = udp_hdr(skb);
-	uh->source = inet->inet_sport;
-	uh->dest = inet->inet_dport;
-	uh->len = htons(udp_len);
-	uh->check = 0;
 
+	/* Reset skb netfilter state */
 	memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
 	IPCB(skb)->flags &= ~(IPSKB_XFRM_TUNNEL_SIZE | IPSKB_XFRM_TRANSFORMED |
 			      IPSKB_REROUTED);
@@ -1001,29 +1003,48 @@ int l2tp_xmit_skb(struct l2tp_session *session, struct sk_buff *skb, int hdr_len
 	/* Get routing info from the tunnel socket */
 	skb_dst_drop(skb);
 	skb_dst_set(skb, dst_clone(__sk_dst_get(sk)));
-	l2tp_skb_set_owner_w(skb, sk);
 
-	/* Calculate UDP checksum if configured to do so */
-	if (sk->sk_no_check == UDP_CSUM_NOXMIT)
-		skb->ip_summed = CHECKSUM_NONE;
-	else if ((skb_dst(skb) && skb_dst(skb)->dev) &&
-		 (!(skb_dst(skb)->dev->features & NETIF_F_V4_CSUM))) {
-		skb->ip_summed = CHECKSUM_COMPLETE;
-		csum = skb_checksum(skb, 0, udp_len, 0);
-		uh->check = csum_tcpudp_magic(inet->inet_saddr,
-					      inet->inet_daddr,
-					      udp_len, IPPROTO_UDP, csum);
-		if (uh->check == 0)
-			uh->check = CSUM_MANGLED_0;
-	} else {
-		skb->ip_summed = CHECKSUM_PARTIAL;
-		skb->csum_start = skb_transport_header(skb) - skb->head;
-		skb->csum_offset = offsetof(struct udphdr, check);
-		uh->check = ~csum_tcpudp_magic(inet->inet_saddr,
-					       inet->inet_daddr,
-					       udp_len, IPPROTO_UDP, 0);
+	switch (tunnel->encap) {
+	case L2TP_ENCAPTYPE_UDP:
+		/* Setup UDP header */
+		inet = inet_sk(sk);
+		__skb_push(skb, sizeof(*uh));
+		skb_reset_transport_header(skb);
+		uh = udp_hdr(skb);
+		uh->source = inet->inet_sport;
+		uh->dest = inet->inet_dport;
+		udp_len = uhlen + hdr_len + data_len;
+		uh->len = htons(udp_len);
+		uh->check = 0;
+
+		/* Calculate UDP checksum if configured to do so */
+		if (sk->sk_no_check == UDP_CSUM_NOXMIT)
+			skb->ip_summed = CHECKSUM_NONE;
+		else if ((skb_dst(skb) && skb_dst(skb)->dev) &&
+			 (!(skb_dst(skb)->dev->features & NETIF_F_V4_CSUM))) {
+			skb->ip_summed = CHECKSUM_COMPLETE;
+			csum = skb_checksum(skb, 0, udp_len, 0);
+			uh->check = csum_tcpudp_magic(inet->inet_saddr,
+						      inet->inet_daddr,
+						      udp_len, IPPROTO_UDP, csum);
+			if (uh->check == 0)
+				uh->check = CSUM_MANGLED_0;
+		} else {
+			skb->ip_summed = CHECKSUM_PARTIAL;
+			skb->csum_start = skb_transport_header(skb) - skb->head;
+			skb->csum_offset = offsetof(struct udphdr, check);
+			uh->check = ~csum_tcpudp_magic(inet->inet_saddr,
+						       inet->inet_daddr,
+						       udp_len, IPPROTO_UDP, 0);
+		}
+		break;
+
+	case L2TP_ENCAPTYPE_IP:
+		break;
 	}
 
+	l2tp_skb_set_owner_w(skb, sk);
+
 	l2tp_xmit_core(session, skb, data_len);
 
 abort:
@@ -1053,9 +1074,15 @@ void l2tp_tunnel_destruct(struct sock *sk)
 	/* Close all sessions */
 	l2tp_tunnel_closeall(tunnel);
 
-	/* No longer an encapsulation socket. See net/ipv4/udp.c */
-	(udp_sk(sk))->encap_type = 0;
-	(udp_sk(sk))->encap_rcv = NULL;
+	switch (tunnel->encap) {
+	case L2TP_ENCAPTYPE_UDP:
+		/* No longer an encapsulation socket. See net/ipv4/udp.c */
+		(udp_sk(sk))->encap_type = 0;
+		(udp_sk(sk))->encap_rcv = NULL;
+		break;
+	case L2TP_ENCAPTYPE_IP:
+		break;
+	}
 
 	/* Remove hooks into tunnel socket */
 	tunnel->sock = NULL;
@@ -1168,6 +1195,7 @@ int l2tp_tunnel_create(struct net *net, int fd, int version, u32 tunnel_id, u32
 	struct socket *sock = NULL;
 	struct sock *sk = NULL;
 	struct l2tp_net *pn;
+	enum l2tp_encap_type encap = L2TP_ENCAPTYPE_UDP;
 
 	/* Get the tunnel socket from the fd, which was opened by
 	 * the userspace L2TP daemon.
@@ -1182,18 +1210,27 @@ int l2tp_tunnel_create(struct net *net, int fd, int version, u32 tunnel_id, u32
 
 	sk = sock->sk;
 
+	if (cfg != NULL)
+		encap = cfg->encap;
+
 	/* Quick sanity checks */
-	err = -EPROTONOSUPPORT;
-	if (sk->sk_protocol != IPPROTO_UDP) {
-		printk(KERN_ERR "tunl %hu: fd %d wrong protocol, got %d, expected %d\n",
-		       tunnel_id, fd, sk->sk_protocol, IPPROTO_UDP);
-		goto err;
-	}
-	err = -EAFNOSUPPORT;
-	if (sock->ops->family != AF_INET) {
-		printk(KERN_ERR "tunl %hu: fd %d wrong family, got %d, expected %d\n",
-		       tunnel_id, fd, sock->ops->family, AF_INET);
-		goto err;
+	switch (encap) {
+	case L2TP_ENCAPTYPE_UDP:
+		err = -EPROTONOSUPPORT;
+		if (sk->sk_protocol != IPPROTO_UDP) {
+			printk(KERN_ERR "tunl %hu: fd %d wrong protocol, got %d, expected %d\n",
+			       tunnel_id, fd, sk->sk_protocol, IPPROTO_UDP);
+			goto err;
+		}
+		break;
+	case L2TP_ENCAPTYPE_IP:
+		err = -EPROTONOSUPPORT;
+		if (sk->sk_protocol != IPPROTO_L2TP) {
+			printk(KERN_ERR "tunl %hu: fd %d wrong protocol, got %d, expected %d\n",
+			       tunnel_id, fd, sk->sk_protocol, IPPROTO_L2TP);
+			goto err;
+		}
+		break;
 	}
 
 	/* Check if this socket has already been prepped */
@@ -1223,12 +1260,16 @@ int l2tp_tunnel_create(struct net *net, int fd, int version, u32 tunnel_id, u32
 	tunnel->l2tp_net = net;
 	pn = l2tp_pernet(net);
 
-	if (cfg)
+	if (cfg != NULL)
 		tunnel->debug = cfg->debug;
 
 	/* Mark socket as an encapsulation socket. See net/ipv4/udp.c */
-	udp_sk(sk)->encap_type = UDP_ENCAP_L2TPINUDP;
-	udp_sk(sk)->encap_rcv = l2tp_udp_encap_recv;
+	tunnel->encap = encap;
+	if (encap == L2TP_ENCAPTYPE_UDP) {
+		/* Mark socket as an encapsulation socket. See net/ipv4/udp.c */
+		udp_sk(sk)->encap_type = UDP_ENCAP_L2TPINUDP;
+		udp_sk(sk)->encap_rcv = l2tp_udp_encap_recv;
+	}
 
 	sk->sk_user_data = tunnel;
 
@@ -1318,7 +1359,9 @@ void l2tp_session_set_header_len(struct l2tp_session *session, int version)
 		if (session->send_seq)
 			session->hdr_len += 4;
 	} else {
-		session->hdr_len = 8 + session->cookie_len + session->l2specific_len + session->offset;
+		session->hdr_len = 4 + session->cookie_len + session->l2specific_len + session->offset;
+		if (session->tunnel->encap == L2TP_ENCAPTYPE_UDP)
+			session->hdr_len += 4;
 	}
 
 }
diff --git a/net/l2tp/l2tp_core.h b/net/l2tp/l2tp_core.h
index 5c53eb2a8ad9..d2395984645e 100644
--- a/net/l2tp/l2tp_core.h
+++ b/net/l2tp/l2tp_core.h
@@ -48,6 +48,11 @@ enum l2tp_l2spec_type {
 	L2TP_L2SPECTYPE_DEFAULT,
 };
 
+enum l2tp_encap_type {
+	L2TP_ENCAPTYPE_UDP,
+	L2TP_ENCAPTYPE_IP,
+};
+
 struct sk_buff;
 
 struct l2tp_stats {
@@ -155,6 +160,7 @@ struct l2tp_session {
 struct l2tp_tunnel_cfg {
 	int			debug;		/* bitmask of debug message
 						 * categories */
+	enum l2tp_encap_type	encap;
 };
 
 struct l2tp_tunnel {
@@ -170,6 +176,7 @@ struct l2tp_tunnel {
 	char			name[20];	/* for logging */
 	int			debug;		/* bitmask of debug message
 						 * categories */
+	enum l2tp_encap_type	encap;
 	struct l2tp_stats	stats;
 
 	struct list_head	list;		/* Keep a list of all tunnels */
diff --git a/net/l2tp/l2tp_ip.c b/net/l2tp/l2tp_ip.c
new file mode 100644
index 000000000000..75bf784ba18d
--- /dev/null
+++ b/net/l2tp/l2tp_ip.c
@@ -0,0 +1,679 @@
+/*
+ * L2TPv3 IP encapsulation support
+ *
+ * Copyright (c) 2008,2009,2010 Katalix Systems Ltd
+ *
+ *	This program is free software; you can redistribute it and/or
+ *	modify it under the terms of the GNU General Public License
+ *	as published by the Free Software Foundation; either version
+ *	2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/icmp.h>
+#include <linux/module.h>
+#include <linux/skbuff.h>
+#include <linux/random.h>
+#include <linux/socket.h>
+#include <linux/l2tp.h>
+#include <linux/in.h>
+#include <net/sock.h>
+#include <net/ip.h>
+#include <net/icmp.h>
+#include <net/udp.h>
+#include <net/inet_common.h>
+#include <net/inet_hashtables.h>
+#include <net/tcp_states.h>
+#include <net/protocol.h>
+#include <net/xfrm.h>
+
+#include "l2tp_core.h"
+
+struct l2tp_ip_sock {
+	/* inet_sock has to be the first member of l2tp_ip_sock */
+	struct inet_sock	inet;
+
+	__u32			conn_id;
+	__u32			peer_conn_id;
+
+	__u64			tx_packets;
+	__u64			tx_bytes;
+	__u64			tx_errors;
+	__u64			rx_packets;
+	__u64			rx_bytes;
+	__u64			rx_errors;
+};
+
+static DEFINE_RWLOCK(l2tp_ip_lock);
+static struct hlist_head l2tp_ip_table;
+static struct hlist_head l2tp_ip_bind_table;
+
+static inline struct l2tp_ip_sock *l2tp_ip_sk(const struct sock *sk)
+{
+	return (struct l2tp_ip_sock *)sk;
+}
+
+static struct sock *__l2tp_ip_bind_lookup(struct net *net, __be32 laddr, int dif, u32 tunnel_id)
+{
+	struct hlist_node *node;
+	struct sock *sk;
+
+	sk_for_each_bound(sk, node, &l2tp_ip_bind_table) {
+		struct inet_sock *inet = inet_sk(sk);
+		struct l2tp_ip_sock *l2tp = l2tp_ip_sk(sk);
+
+		if (l2tp == NULL)
+			continue;
+
+		if ((l2tp->conn_id == tunnel_id) &&
+#ifdef CONFIG_NET_NS
+		    (sk->sk_net == net) &&
+#endif
+		    !(inet->inet_rcv_saddr && inet->inet_rcv_saddr != laddr) &&
+		    !(sk->sk_bound_dev_if && sk->sk_bound_dev_if != dif))
+			goto found;
+	}
+
+	sk = NULL;
+found:
+	return sk;
+}
+
+static inline struct sock *l2tp_ip_bind_lookup(struct net *net, __be32 laddr, int dif, u32 tunnel_id)
+{
+	struct sock *sk = __l2tp_ip_bind_lookup(net, laddr, dif, tunnel_id);
+	if (sk)
+		sock_hold(sk);
+
+	return sk;
+}
+
+/* When processing receive frames, there are two cases to
+ * consider. Data frames consist of a non-zero session-id and an
+ * optional cookie. Control frames consist of a regular L2TP header
+ * preceded by 32-bits of zeros.
+ *
+ * L2TPv3 Session Header Over IP
+ *
+ *  0                   1                   2                   3
+ *  0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * |                           Session ID                          |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * |               Cookie (optional, maximum 64 bits)...
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ *                                                                 |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ *
+ * L2TPv3 Control Message Header Over IP
+ *
+ *  0                   1                   2                   3
+ *  0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * |                      (32 bits of zeros)                       |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * |T|L|x|x|S|x|x|x|x|x|x|x|  Ver  |             Length            |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * |                     Control Connection ID                     |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * |               Ns              |               Nr              |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ *
+ * All control frames are passed to userspace.
+ */
+static int l2tp_ip_recv(struct sk_buff *skb)
+{
+	struct sock *sk;
+	u32 session_id;
+	u32 tunnel_id;
+	unsigned char *ptr, *optr;
+	struct l2tp_session *session;
+	struct l2tp_tunnel *tunnel = NULL;
+	int length;
+	int offset;
+
+	/* Point to L2TP header */
+	optr = ptr = skb->data;
+
+	if (!pskb_may_pull(skb, 4))
+		goto discard;
+
+	session_id = ntohl(*((__be32 *) ptr));
+	ptr += 4;
+
+	/* RFC3931: L2TP/IP packets have the first 4 bytes containing
+	 * the session_id. If it is 0, the packet is a L2TP control
+	 * frame and the session_id value can be discarded.
+	 */
+	if (session_id == 0) {
+		__skb_pull(skb, 4);
+		goto pass_up;
+	}
+
+	/* Ok, this is a data packet. Lookup the session. */
+	session = l2tp_session_find(&init_net, NULL, session_id);
+	if (session == NULL)
+		goto discard;
+
+	tunnel = session->tunnel;
+	if (tunnel == NULL)
+		goto discard;
+
+	/* Trace packet contents, if enabled */
+	if (tunnel->debug & L2TP_MSG_DATA) {
+		length = min(32u, skb->len);
+		if (!pskb_may_pull(skb, length))
+			goto discard;
+
+		printk(KERN_DEBUG "%s: ip recv: ", tunnel->name);
+
+		offset = 0;
+		do {
+			printk(" %02X", ptr[offset]);
+		} while (++offset < length);
+
+		printk("\n");
+	}
+
+	l2tp_recv_common(session, skb, ptr, optr, 0, skb->len, tunnel->recv_payload_hook);
+
+	return 0;
+
+pass_up:
+	/* Get the tunnel_id from the L2TP header */
+	if (!pskb_may_pull(skb, 12))
+		goto discard;
+
+	if ((skb->data[0] & 0xc0) != 0xc0)
+		goto discard;
+
+	tunnel_id = ntohl(*(__be32 *) &skb->data[4]);
+	tunnel = l2tp_tunnel_find(&init_net, tunnel_id);
+	if (tunnel != NULL)
+		sk = tunnel->sock;
+	else {
+		struct iphdr *iph = (struct iphdr *) skb_network_header(skb);
+
+		read_lock_bh(&l2tp_ip_lock);
+		sk = __l2tp_ip_bind_lookup(&init_net, iph->daddr, 0, tunnel_id);
+		read_unlock_bh(&l2tp_ip_lock);
+	}
+
+	if (sk == NULL)
+		goto discard;
+
+	sock_hold(sk);
+
+	if (!xfrm4_policy_check(sk, XFRM_POLICY_IN, skb))
+		goto discard_put;
+
+	nf_reset(skb);
+
+	return sk_receive_skb(sk, skb, 1);
+
+discard_put:
+	sock_put(sk);
+
+discard:
+	kfree_skb(skb);
+	return 0;
+}
+
+static int l2tp_ip_open(struct sock *sk)
+{
+	/* Prevent autobind. We don't have ports. */
+	inet_sk(sk)->inet_num = IPPROTO_L2TP;
+
+	write_lock_bh(&l2tp_ip_lock);
+	sk_add_node(sk, &l2tp_ip_table);
+	write_unlock_bh(&l2tp_ip_lock);
+
+	return 0;
+}
+
+static void l2tp_ip_close(struct sock *sk, long timeout)
+{
+	write_lock_bh(&l2tp_ip_lock);
+	hlist_del_init(&sk->sk_bind_node);
+	hlist_del_init(&sk->sk_node);
+	write_unlock_bh(&l2tp_ip_lock);
+	sk_common_release(sk);
+}
+
+static void l2tp_ip_destroy_sock(struct sock *sk)
+{
+	struct sk_buff *skb;
+
+	while ((skb = __skb_dequeue_tail(&sk->sk_write_queue)) != NULL)
+		kfree_skb(skb);
+
+	sk_refcnt_debug_dec(sk);
+}
+
+static int l2tp_ip_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len)
+{
+	struct inet_sock *inet = inet_sk(sk);
+	struct sockaddr_l2tpip *addr = (struct sockaddr_l2tpip *) uaddr;
+	int ret = -EINVAL;
+	int chk_addr_ret;
+
+	ret = -EADDRINUSE;
+	read_lock_bh(&l2tp_ip_lock);
+	if (__l2tp_ip_bind_lookup(&init_net, addr->l2tp_addr.s_addr, sk->sk_bound_dev_if, addr->l2tp_conn_id))
+		goto out_in_use;
+
+	read_unlock_bh(&l2tp_ip_lock);
+
+	lock_sock(sk);
+	if (sk->sk_state != TCP_CLOSE || addr_len < sizeof(struct sockaddr_l2tpip))
+		goto out;
+
+	chk_addr_ret = inet_addr_type(&init_net, addr->l2tp_addr.s_addr);
+	ret = -EADDRNOTAVAIL;
+	if (addr->l2tp_addr.s_addr && chk_addr_ret != RTN_LOCAL &&
+	    chk_addr_ret != RTN_MULTICAST && chk_addr_ret != RTN_BROADCAST)
+		goto out;
+
+	inet->inet_rcv_saddr = inet->inet_saddr = addr->l2tp_addr.s_addr;
+	if (chk_addr_ret == RTN_MULTICAST || chk_addr_ret == RTN_BROADCAST)
+		inet->inet_saddr = 0;  /* Use device */
+	sk_dst_reset(sk);
+
+	l2tp_ip_sk(sk)->conn_id = addr->l2tp_conn_id;
+
+	write_lock_bh(&l2tp_ip_lock);
+	sk_add_bind_node(sk, &l2tp_ip_bind_table);
+	sk_del_node_init(sk);
+	write_unlock_bh(&l2tp_ip_lock);
+	ret = 0;
+out:
+	release_sock(sk);
+
+	return ret;
+
+out_in_use:
+	read_unlock_bh(&l2tp_ip_lock);
+
+	return ret;
+}
+
+static int l2tp_ip_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
+{
+	int rc;
+	struct inet_sock *inet = inet_sk(sk);
+	struct sockaddr_l2tpip *lsa = (struct sockaddr_l2tpip *) uaddr;
+	struct rtable *rt;
+	__be32 saddr;
+	int oif;
+
+	rc = -EINVAL;
+	if (addr_len < sizeof(*lsa))
+		goto out;
+
+	rc = -EAFNOSUPPORT;
+	if (lsa->l2tp_family != AF_INET)
+		goto out;
+
+	sk_dst_reset(sk);
+
+	oif = sk->sk_bound_dev_if;
+	saddr = inet->inet_saddr;
+
+	rc = -EINVAL;
+	if (ipv4_is_multicast(lsa->l2tp_addr.s_addr))
+		goto out;
+
+	rc = ip_route_connect(&rt, lsa->l2tp_addr.s_addr, saddr,
+			      RT_CONN_FLAGS(sk), oif,
+			      IPPROTO_L2TP,
+			      0, 0, sk, 1);
+	if (rc) {
+		if (rc == -ENETUNREACH)
+			IP_INC_STATS_BH(&init_net, IPSTATS_MIB_OUTNOROUTES);
+		goto out;
+	}
+
+	rc = -ENETUNREACH;
+	if (rt->rt_flags & (RTCF_MULTICAST | RTCF_BROADCAST)) {
+		ip_rt_put(rt);
+		goto out;
+	}
+
+	l2tp_ip_sk(sk)->peer_conn_id = lsa->l2tp_conn_id;
+
+	if (!inet->inet_saddr)
+		inet->inet_saddr = rt->rt_src;
+	if (!inet->inet_rcv_saddr)
+		inet->inet_rcv_saddr = rt->rt_src;
+	inet->inet_daddr = rt->rt_dst;
+	sk->sk_state = TCP_ESTABLISHED;
+	inet->inet_id = jiffies;
+
+	sk_dst_set(sk, &rt->u.dst);
+
+	write_lock_bh(&l2tp_ip_lock);
+	hlist_del_init(&sk->sk_bind_node);
+	sk_add_bind_node(sk, &l2tp_ip_bind_table);
+	write_unlock_bh(&l2tp_ip_lock);
+
+	rc = 0;
+out:
+	return rc;
+}
+
+static int l2tp_ip_getname(struct socket *sock, struct sockaddr *uaddr,
+			   int *uaddr_len, int peer)
+{
+	struct sock *sk		= sock->sk;
+	struct inet_sock *inet	= inet_sk(sk);
+	struct l2tp_ip_sock *lsk = l2tp_ip_sk(sk);
+	struct sockaddr_l2tpip *lsa = (struct sockaddr_l2tpip *)uaddr;
+
+	memset(lsa, 0, sizeof(*lsa));
+	lsa->l2tp_family = AF_INET;
+	if (peer) {
+		if (!inet->inet_dport)
+			return -ENOTCONN;
+		lsa->l2tp_conn_id = lsk->peer_conn_id;
+		lsa->l2tp_addr.s_addr = inet->inet_daddr;
+	} else {
+		__be32 addr = inet->inet_rcv_saddr;
+		if (!addr)
+			addr = inet->inet_saddr;
+		lsa->l2tp_conn_id = lsk->conn_id;
+		lsa->l2tp_addr.s_addr = addr;
+	}
+	*uaddr_len = sizeof(*lsa);
+	return 0;
+}
+
+static int l2tp_ip_backlog_recv(struct sock *sk, struct sk_buff *skb)
+{
+	int rc;
+
+	if (!xfrm4_policy_check(sk, XFRM_POLICY_IN, skb))
+		goto drop;
+
+	nf_reset(skb);
+
+	/* Charge it to the socket, dropping if the queue is full. */
+	rc = sock_queue_rcv_skb(sk, skb);
+	if (rc < 0)
+		goto drop;
+
+	return 0;
+
+drop:
+	IP_INC_STATS(&init_net, IPSTATS_MIB_INDISCARDS);
+	kfree_skb(skb);
+	return -1;
+}
+
+/* Userspace will call sendmsg() on the tunnel socket to send L2TP
+ * control frames.
+ */
+static int l2tp_ip_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, size_t len)
+{
+	struct sk_buff *skb;
+	int rc;
+	struct l2tp_ip_sock *lsa = l2tp_ip_sk(sk);
+	struct inet_sock *inet = inet_sk(sk);
+	struct ip_options *opt = inet->opt;
+	struct rtable *rt = NULL;
+	int connected = 0;
+	__be32 daddr;
+
+	if (sock_flag(sk, SOCK_DEAD))
+		return -ENOTCONN;
+
+	/* Get and verify the address. */
+	if (msg->msg_name) {
+		struct sockaddr_l2tpip *lip = (struct sockaddr_l2tpip *) msg->msg_name;
+		if (msg->msg_namelen < sizeof(*lip))
+			return -EINVAL;
+
+		if (lip->l2tp_family != AF_INET) {
+			if (lip->l2tp_family != AF_UNSPEC)
+				return -EAFNOSUPPORT;
+		}
+
+		daddr = lip->l2tp_addr.s_addr;
+	} else {
+		if (sk->sk_state != TCP_ESTABLISHED)
+			return -EDESTADDRREQ;
+
+		daddr = inet->inet_daddr;
+		connected = 1;
+	}
+
+	/* Allocate a socket buffer */
+	rc = -ENOMEM;
+	skb = sock_wmalloc(sk, 2 + NET_SKB_PAD + sizeof(struct iphdr) +
+			   4 + len, 0, GFP_KERNEL);
+	if (!skb)
+		goto error;
+
+	/* Reserve space for headers, putting IP header on 4-byte boundary. */
+	skb_reserve(skb, 2 + NET_SKB_PAD);
+	skb_reset_network_header(skb);
+	skb_reserve(skb, sizeof(struct iphdr));
+	skb_reset_transport_header(skb);
+
+	/* Insert 0 session_id */
+	*((__be32 *) skb_put(skb, 4)) = 0;
+
+	/* Copy user data into skb */
+	rc = memcpy_fromiovec(skb_put(skb, len), msg->msg_iov, len);
+	if (rc < 0) {
+		kfree_skb(skb);
+		goto error;
+	}
+
+	if (connected)
+		rt = (struct rtable *) __sk_dst_check(sk, 0);
+
+	if (rt == NULL) {
+		/* Use correct destination address if we have options. */
+		if (opt && opt->srr)
+			daddr = opt->faddr;
+
+		{
+			struct flowi fl = { .oif = sk->sk_bound_dev_if,
+					    .nl_u = { .ip4_u = {
+							.daddr = daddr,
+							.saddr = inet->inet_saddr,
+							.tos = RT_CONN_FLAGS(sk) } },
+					    .proto = sk->sk_protocol,
+					    .flags = inet_sk_flowi_flags(sk),
+					    .uli_u = { .ports = {
+							 .sport = inet->inet_sport,
+							 .dport = inet->inet_dport } } };
+
+			/* If this fails, retransmit mechanism of transport layer will
+			 * keep trying until route appears or the connection times
+			 * itself out.
+			 */
+			security_sk_classify_flow(sk, &fl);
+			if (ip_route_output_flow(sock_net(sk), &rt, &fl, sk, 0))
+				goto no_route;
+		}
+		sk_setup_caps(sk, &rt->u.dst);
+	}
+	skb_dst_set(skb, dst_clone(&rt->u.dst));
+
+	/* Queue the packet to IP for output */
+	rc = ip_queue_xmit(skb, 0);
+
+error:
+	/* Update stats */
+	if (rc >= 0) {
+		lsa->tx_packets++;
+		lsa->tx_bytes += len;
+		rc = len;
+	} else {
+		lsa->tx_errors++;
+	}
+
+	return rc;
+
+no_route:
+	IP_INC_STATS(sock_net(sk), IPSTATS_MIB_OUTNOROUTES);
+	kfree_skb(skb);
+	return -EHOSTUNREACH;
+}
+
+static int l2tp_ip_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
+			   size_t len, int noblock, int flags, int *addr_len)
+{
+	struct inet_sock *inet = inet_sk(sk);
+	struct l2tp_ip_sock *lsk = l2tp_ip_sk(sk);
+	size_t copied = 0;
+	int err = -EOPNOTSUPP;
+	struct sockaddr_in *sin = (struct sockaddr_in *)msg->msg_name;
+	struct sk_buff *skb;
+
+	if (flags & MSG_OOB)
+		goto out;
+
+	if (addr_len)
+		*addr_len = sizeof(*sin);
+
+	skb = skb_recv_datagram(sk, flags, noblock, &err);
+	if (!skb)
+		goto out;
+
+	copied = skb->len;
+	if (len < copied) {
+		msg->msg_flags |= MSG_TRUNC;
+		copied = len;
+	}
+
+	err = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, copied);
+	if (err)
+		goto done;
+
+	sock_recv_timestamp(msg, sk, skb);
+
+	/* Copy the address. */
+	if (sin) {
+		sin->sin_family = AF_INET;
+		sin->sin_addr.s_addr = ip_hdr(skb)->saddr;
+		sin->sin_port = 0;
+		memset(&sin->sin_zero, 0, sizeof(sin->sin_zero));
+	}
+	if (inet->cmsg_flags)
+		ip_cmsg_recv(msg, skb);
+	if (flags & MSG_TRUNC)
+		copied = skb->len;
+done:
+	skb_free_datagram(sk, skb);
+out:
+	if (err) {
+		lsk->rx_errors++;
+		return err;
+	}
+
+	lsk->rx_packets++;
+	lsk->rx_bytes += copied;
+
+	return copied;
+}
+
+struct proto l2tp_ip_prot = {
+	.name		   = "L2TP/IP",
+	.owner		   = THIS_MODULE,
+	.init		   = l2tp_ip_open,
+	.close		   = l2tp_ip_close,
+	.bind		   = l2tp_ip_bind,
+	.connect	   = l2tp_ip_connect,
+	.disconnect	   = udp_disconnect,
+	.ioctl		   = udp_ioctl,
+	.destroy	   = l2tp_ip_destroy_sock,
+	.setsockopt	   = ip_setsockopt,
+	.getsockopt	   = ip_getsockopt,
+	.sendmsg	   = l2tp_ip_sendmsg,
+	.recvmsg	   = l2tp_ip_recvmsg,
+	.backlog_rcv	   = l2tp_ip_backlog_recv,
+	.hash		   = inet_hash,
+	.unhash		   = inet_unhash,
+	.obj_size	   = sizeof(struct l2tp_ip_sock),
+#ifdef CONFIG_COMPAT
+	.compat_setsockopt = compat_ip_setsockopt,
+	.compat_getsockopt = compat_ip_getsockopt,
+#endif
+};
+
+static const struct proto_ops l2tp_ip_ops = {
+	.family		   = PF_INET,
+	.owner		   = THIS_MODULE,
+	.release	   = inet_release,
+	.bind		   = inet_bind,
+	.connect	   = inet_dgram_connect,
+	.socketpair	   = sock_no_socketpair,
+	.accept		   = sock_no_accept,
+	.getname	   = l2tp_ip_getname,
+	.poll		   = datagram_poll,
+	.ioctl		   = inet_ioctl,
+	.listen		   = sock_no_listen,
+	.shutdown	   = inet_shutdown,
+	.setsockopt	   = sock_common_setsockopt,
+	.getsockopt	   = sock_common_getsockopt,
+	.sendmsg	   = inet_sendmsg,
+	.recvmsg	   = sock_common_recvmsg,
+	.mmap		   = sock_no_mmap,
+	.sendpage	   = sock_no_sendpage,
+#ifdef CONFIG_COMPAT
+	.compat_setsockopt = compat_sock_common_setsockopt,
+	.compat_getsockopt = compat_sock_common_getsockopt,
+#endif
+};
+
+static struct inet_protosw l2tp_ip_protosw = {
+	.type		= SOCK_DGRAM,
+	.protocol	= IPPROTO_L2TP,
+	.prot		= &l2tp_ip_prot,
+	.ops		= &l2tp_ip_ops,
+	.no_check	= 0,
+};
+
+static struct net_protocol l2tp_ip_protocol __read_mostly = {
+	.handler	= l2tp_ip_recv,
+};
+
+static int __init l2tp_ip_init(void)
+{
+	int err;
+
+	printk(KERN_INFO "L2TP IP encapsulation support (L2TPv3)\n");
+
+	err = proto_register(&l2tp_ip_prot, 1);
+	if (err != 0)
+		goto out;
+
+	err = inet_add_protocol(&l2tp_ip_protocol, IPPROTO_L2TP);
+	if (err)
+		goto out1;
+
+	inet_register_protosw(&l2tp_ip_protosw);
+	return 0;
+
+out1:
+	proto_unregister(&l2tp_ip_prot);
+out:
+	return err;
+}
+
+static void __exit l2tp_ip_exit(void)
+{
+	inet_unregister_protosw(&l2tp_ip_protosw);
+	inet_del_protocol(&l2tp_ip_protocol, IPPROTO_L2TP);
+	proto_unregister(&l2tp_ip_prot);
+}
+
+module_init(l2tp_ip_init);
+module_exit(l2tp_ip_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("James Chapman <jchapman@katalix.com>");
+MODULE_DESCRIPTION("L2TP over IP");
+MODULE_VERSION("1.0");
+MODULE_ALIAS_NET_PF_PROTO_TYPE(PF_INET, SOCK_DGRAM, IPPROTO_L2TP);
diff --git a/net/l2tp/l2tp_ppp.c b/net/l2tp/l2tp_ppp.c
index e5b531266541..63fc62baeeb9 100644
--- a/net/l2tp/l2tp_ppp.c
+++ b/net/l2tp/l2tp_ppp.c
@@ -305,6 +305,7 @@ static int pppol2tp_sendmsg(struct kiocb *iocb, struct socket *sock, struct msgh
 	struct l2tp_session *session;
 	struct l2tp_tunnel *tunnel;
 	struct pppol2tp_session *ps;
+	int uhlen;
 
 	error = -ENOTCONN;
 	if (sock_flag(sk, SOCK_DEAD) || !(sk->sk_state & PPPOX_CONNECTED))
@@ -321,10 +322,12 @@ static int pppol2tp_sendmsg(struct kiocb *iocb, struct socket *sock, struct msgh
 	if (tunnel == NULL)
 		goto error_put_sess;
 
+	uhlen = (tunnel->encap == L2TP_ENCAPTYPE_UDP) ? sizeof(struct udphdr) : 0;
+
 	/* Allocate a socket buffer */
 	error = -ENOMEM;
 	skb = sock_wmalloc(sk, NET_SKB_PAD + sizeof(struct iphdr) +
-			   sizeof(struct udphdr) + session->hdr_len +
+			   uhlen + session->hdr_len +
 			   sizeof(ppph) + total_len,
 			   0, GFP_KERNEL);
 	if (!skb)
@@ -335,7 +338,7 @@ static int pppol2tp_sendmsg(struct kiocb *iocb, struct socket *sock, struct msgh
 	skb_reset_network_header(skb);
 	skb_reserve(skb, sizeof(struct iphdr));
 	skb_reset_transport_header(skb);
-	skb_reserve(skb, sizeof(struct udphdr));
+	skb_reserve(skb, uhlen);
 
 	/* Add PPP header */
 	skb->data[0] = ppph[0];
-- 
cgit v1.2.2


From f408e0ce40270559ef80f231843c93baa9947bc5 Mon Sep 17 00:00:00 2001
From: James Chapman <jchapman@katalix.com>
Date: Fri, 2 Apr 2010 06:19:05 +0000
Subject: netlink: Export genl_lock() API for use by modules

This lets kernel modules which use genl netlink APIs serialize netlink
processing.

Signed-off-by: James Chapman <jchapman@katalix.com>
Reviewed-by: Randy Dunlap <randy.dunlap@oracle.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/netlink/genetlink.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/netlink/genetlink.c b/net/netlink/genetlink.c
index a4b6e148c5de..a28fda7420d9 100644
--- a/net/netlink/genetlink.c
+++ b/net/netlink/genetlink.c
@@ -20,15 +20,17 @@
 
 static DEFINE_MUTEX(genl_mutex); /* serialization of message processing */
 
-static inline void genl_lock(void)
+void genl_lock(void)
 {
 	mutex_lock(&genl_mutex);
 }
+EXPORT_SYMBOL(genl_lock);
 
-static inline void genl_unlock(void)
+void genl_unlock(void)
 {
 	mutex_unlock(&genl_mutex);
 }
+EXPORT_SYMBOL(genl_unlock);
 
 #define GENL_FAM_TAB_SIZE	16
 #define GENL_FAM_TAB_MASK	(GENL_FAM_TAB_SIZE - 1)
-- 
cgit v1.2.2


From 309795f4bec2d69cd507a631f82065c2198a0825 Mon Sep 17 00:00:00 2001
From: James Chapman <jchapman@katalix.com>
Date: Fri, 2 Apr 2010 06:19:10 +0000
Subject: l2tp: Add netlink control API for L2TP

In L2TPv3, we need to create/delete/modify/query L2TP tunnel and
session contexts. The number of parameters is significant. So let's
use netlink. Userspace uses this API to control L2TP tunnel/session
contexts in the kernel.

The previous pppol2tp driver was managed using [gs]etsockopt(). This
API is retained for backwards compatibility. Unlike L2TPv2 which
carries only PPP frames, L2TPv3 can carry raw ethernet frames or other
frame types and these do not always have an associated socket
family. Therefore, we need a way to use L2TP sessions that doesn't
require a socket type for each supported frame type. Hence netlink is
used.

Signed-off-by: James Chapman <jchapman@katalix.com>
Reviewed-by: Randy Dunlap <randy.dunlap@oracle.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/l2tp/Makefile       |   1 +
 net/l2tp/l2tp_core.c    |  61 +++-
 net/l2tp/l2tp_core.h    |  34 +-
 net/l2tp/l2tp_netlink.c | 830 ++++++++++++++++++++++++++++++++++++++++++++++++
 net/l2tp/l2tp_ppp.c     | 162 ++++++++--
 5 files changed, 1044 insertions(+), 44 deletions(-)
 create mode 100644 net/l2tp/l2tp_netlink.c

(limited to 'net')

diff --git a/net/l2tp/Makefile b/net/l2tp/Makefile
index ef28b16f7d6a..2c4a14b673ab 100644
--- a/net/l2tp/Makefile
+++ b/net/l2tp/Makefile
@@ -7,3 +7,4 @@ obj-$(CONFIG_L2TP) += l2tp_core.o
 # Build l2tp as modules if L2TP is M
 obj-$(subst y,$(CONFIG_L2TP),$(CONFIG_PPPOL2TP)) += l2tp_ppp.o
 obj-$(subst y,$(CONFIG_L2TP),$(CONFIG_L2TP_IP)) += l2tp_ip.o
+obj-$(subst y,$(CONFIG_L2TP),$(CONFIG_L2TP_V3)) += l2tp_netlink.o
diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c
index 1739d04367e4..fbd1f2119fe9 100644
--- a/net/l2tp/l2tp_core.c
+++ b/net/l2tp/l2tp_core.c
@@ -49,6 +49,7 @@
 #include <net/dst.h>
 #include <net/ip.h>
 #include <net/udp.h>
+#include <net/inet_common.h>
 #include <net/xfrm.h>
 #include <net/protocol.h>
 
@@ -214,6 +215,32 @@ struct l2tp_session *l2tp_session_find_nth(struct l2tp_tunnel *tunnel, int nth)
 }
 EXPORT_SYMBOL_GPL(l2tp_session_find_nth);
 
+/* Lookup a session by interface name.
+ * This is very inefficient but is only used by management interfaces.
+ */
+struct l2tp_session *l2tp_session_find_by_ifname(struct net *net, char *ifname)
+{
+	struct l2tp_net *pn = l2tp_pernet(net);
+	int hash;
+	struct hlist_node *walk;
+	struct l2tp_session *session;
+
+	read_lock_bh(&pn->l2tp_session_hlist_lock);
+	for (hash = 0; hash < L2TP_HASH_SIZE_2; hash++) {
+		hlist_for_each_entry(session, walk, &pn->l2tp_session_hlist[hash], global_hlist) {
+			if (!strcmp(session->ifname, ifname)) {
+				read_unlock_bh(&pn->l2tp_session_hlist_lock);
+				return session;
+			}
+		}
+	}
+
+	read_unlock_bh(&pn->l2tp_session_hlist_lock);
+
+	return NULL;
+}
+EXPORT_SYMBOL_GPL(l2tp_session_find_by_ifname);
+
 /* Lookup a tunnel by id
  */
 struct l2tp_tunnel *l2tp_tunnel_find(struct net *net, u32 tunnel_id)
@@ -758,7 +785,7 @@ int l2tp_udp_recv_core(struct l2tp_tunnel *tunnel, struct sk_buff *skb,
 
 	/* Find the session context */
 	session = l2tp_session_find(tunnel->l2tp_net, tunnel, session_id);
-	if (!session) {
+	if (!session || !session->recv_skb) {
 		/* Not found? Pass to userspace to deal with */
 		PRINTK(tunnel->debug, L2TP_MSG_DATA, KERN_INFO,
 		       "%s: no session found (%u/%u). Passing up.\n",
@@ -1305,6 +1332,23 @@ err:
 }
 EXPORT_SYMBOL_GPL(l2tp_tunnel_create);
 
+/* This function is used by the netlink TUNNEL_DELETE command.
+ */
+int l2tp_tunnel_delete(struct l2tp_tunnel *tunnel)
+{
+	int err = 0;
+
+	/* Force the tunnel socket to close. This will eventually
+	 * cause the tunnel to be deleted via the normal socket close
+	 * mechanisms when userspace closes the tunnel socket.
+	 */
+	if ((tunnel->sock != NULL) && (tunnel->sock->sk_socket != NULL))
+		err = inet_shutdown(tunnel->sock->sk_socket, 2);
+
+	return err;
+}
+EXPORT_SYMBOL_GPL(l2tp_tunnel_delete);
+
 /* Really kill the session.
  */
 void l2tp_session_free(struct l2tp_session *session)
@@ -1349,6 +1393,21 @@ void l2tp_session_free(struct l2tp_session *session)
 }
 EXPORT_SYMBOL_GPL(l2tp_session_free);
 
+/* This function is used by the netlink SESSION_DELETE command and by
+   pseudowire modules.
+ */
+int l2tp_session_delete(struct l2tp_session *session)
+{
+	if (session->session_close != NULL)
+		(*session->session_close)(session);
+
+	l2tp_session_dec_refcount(session);
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(l2tp_session_delete);
+
+
 /* We come here whenever a session's send_seq, cookie_len or
  * l2specific_len parameters are set.
  */
diff --git a/net/l2tp/l2tp_core.h b/net/l2tp/l2tp_core.h
index d2395984645e..2974d9ade167 100644
--- a/net/l2tp/l2tp_core.h
+++ b/net/l2tp/l2tp_core.h
@@ -33,26 +33,6 @@ enum {
 	L2TP_MSG_DATA		= (1 << 3),	/* data packets */
 };
 
-enum l2tp_pwtype {
-	L2TP_PWTYPE_NONE = 0x0000,
-	L2TP_PWTYPE_ETH_VLAN = 0x0004,
-	L2TP_PWTYPE_ETH = 0x0005,
-	L2TP_PWTYPE_PPP = 0x0007,
-	L2TP_PWTYPE_PPP_AC = 0x0008,
-	L2TP_PWTYPE_IP = 0x000b,
-	__L2TP_PWTYPE_MAX
-};
-
-enum l2tp_l2spec_type {
-	L2TP_L2SPECTYPE_NONE,
-	L2TP_L2SPECTYPE_DEFAULT,
-};
-
-enum l2tp_encap_type {
-	L2TP_ENCAPTYPE_UDP,
-	L2TP_ENCAPTYPE_IP,
-};
-
 struct sk_buff;
 
 struct l2tp_stats {
@@ -87,6 +67,7 @@ struct l2tp_session_cfg {
 						 * control of LNS. */
 	int			debug;		/* bitmask of debug message
 						 * categories */
+	u16			vlan_id;	/* VLAN pseudowire only */
 	u16			offset;		/* offset to payload */
 	u16			l2specific_len;	/* Layer 2 specific length */
 	u16			l2specific_type; /* Layer 2 specific type */
@@ -98,6 +79,7 @@ struct l2tp_session_cfg {
 						  * (in jiffies) */
 	int			mtu;
 	int			mru;
+	char			*ifname;
 };
 
 struct l2tp_session {
@@ -124,6 +106,7 @@ struct l2tp_session {
 	atomic_t		ref_count;
 
 	char			name[32];	/* for logging */
+	char			ifname[IFNAMSIZ];
 	unsigned		data_seq:2;	/* data sequencing level
 						 * 0 => none, 1 => IP only,
 						 * 2 => all
@@ -192,6 +175,11 @@ struct l2tp_tunnel {
 	uint8_t			priv[0];	/* private data */
 };
 
+struct l2tp_nl_cmd_ops {
+	int (*session_create)(struct net *net, u32 tunnel_id, u32 session_id, u32 peer_session_id, struct l2tp_session_cfg *cfg);
+	int (*session_delete)(struct l2tp_session *session);
+};
+
 static inline void *l2tp_tunnel_priv(struct l2tp_tunnel *tunnel)
 {
 	return &tunnel->priv[0];
@@ -224,11 +212,14 @@ out:
 
 extern struct l2tp_session *l2tp_session_find(struct net *net, struct l2tp_tunnel *tunnel, u32 session_id);
 extern struct l2tp_session *l2tp_session_find_nth(struct l2tp_tunnel *tunnel, int nth);
+extern struct l2tp_session *l2tp_session_find_by_ifname(struct net *net, char *ifname);
 extern struct l2tp_tunnel *l2tp_tunnel_find(struct net *net, u32 tunnel_id);
 extern struct l2tp_tunnel *l2tp_tunnel_find_nth(struct net *net, int nth);
 
 extern int l2tp_tunnel_create(struct net *net, int fd, int version, u32 tunnel_id, u32 peer_tunnel_id, struct l2tp_tunnel_cfg *cfg, struct l2tp_tunnel **tunnelp);
+extern int l2tp_tunnel_delete(struct l2tp_tunnel *tunnel);
 extern struct l2tp_session *l2tp_session_create(int priv_size, struct l2tp_tunnel *tunnel, u32 session_id, u32 peer_session_id, struct l2tp_session_cfg *cfg);
+extern int l2tp_session_delete(struct l2tp_session *session);
 extern void l2tp_tunnel_free(struct l2tp_tunnel *tunnel);
 extern void l2tp_session_free(struct l2tp_session *session);
 extern void l2tp_recv_common(struct l2tp_session *session, struct sk_buff *skb, unsigned char *ptr, unsigned char *optr, u16 hdrflags, int length, int (*payload_hook)(struct sk_buff *skb));
@@ -241,6 +232,9 @@ extern void l2tp_tunnel_destruct(struct sock *sk);
 extern void l2tp_tunnel_closeall(struct l2tp_tunnel *tunnel);
 extern void l2tp_session_set_header_len(struct l2tp_session *session, int version);
 
+extern int l2tp_nl_register_ops(enum l2tp_pwtype pw_type, const struct l2tp_nl_cmd_ops *ops);
+extern void l2tp_nl_unregister_ops(enum l2tp_pwtype pw_type);
+
 /* Tunnel reference counts. Incremented per session that is added to
  * the tunnel.
  */
diff --git a/net/l2tp/l2tp_netlink.c b/net/l2tp/l2tp_netlink.c
new file mode 100644
index 000000000000..3d0f7f6f7488
--- /dev/null
+++ b/net/l2tp/l2tp_netlink.c
@@ -0,0 +1,830 @@
+/*
+ * L2TP netlink layer, for management
+ *
+ * Copyright (c) 2008,2009,2010 Katalix Systems Ltd
+ *
+ * Partly based on the IrDA nelink implementation
+ * (see net/irda/irnetlink.c) which is:
+ * Copyright (c) 2007 Samuel Ortiz <samuel@sortiz.org>
+ * which is in turn partly based on the wireless netlink code:
+ * Copyright 2006 Johannes Berg <johannes@sipsolutions.net>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <net/sock.h>
+#include <net/genetlink.h>
+#include <net/udp.h>
+#include <linux/in.h>
+#include <linux/udp.h>
+#include <linux/socket.h>
+#include <linux/module.h>
+#include <linux/list.h>
+#include <net/net_namespace.h>
+
+#include <linux/l2tp.h>
+
+#include "l2tp_core.h"
+
+
+static struct genl_family l2tp_nl_family = {
+	.id		= GENL_ID_GENERATE,
+	.name		= L2TP_GENL_NAME,
+	.version	= L2TP_GENL_VERSION,
+	.hdrsize	= 0,
+	.maxattr	= L2TP_ATTR_MAX,
+};
+
+/* Accessed under genl lock */
+static const struct l2tp_nl_cmd_ops *l2tp_nl_cmd_ops[__L2TP_PWTYPE_MAX];
+
+static struct l2tp_session *l2tp_nl_session_find(struct genl_info *info)
+{
+	u32 tunnel_id;
+	u32 session_id;
+	char *ifname;
+	struct l2tp_tunnel *tunnel;
+	struct l2tp_session *session = NULL;
+	struct net *net = genl_info_net(info);
+
+	if (info->attrs[L2TP_ATTR_IFNAME]) {
+		ifname = nla_data(info->attrs[L2TP_ATTR_IFNAME]);
+		session = l2tp_session_find_by_ifname(net, ifname);
+	} else if ((info->attrs[L2TP_ATTR_SESSION_ID]) &&
+		   (info->attrs[L2TP_ATTR_CONN_ID])) {
+		tunnel_id = nla_get_u32(info->attrs[L2TP_ATTR_CONN_ID]);
+		session_id = nla_get_u32(info->attrs[L2TP_ATTR_SESSION_ID]);
+		tunnel = l2tp_tunnel_find(net, tunnel_id);
+		if (tunnel)
+			session = l2tp_session_find(net, tunnel, session_id);
+	}
+
+	return session;
+}
+
+static int l2tp_nl_cmd_noop(struct sk_buff *skb, struct genl_info *info)
+{
+	struct sk_buff *msg;
+	void *hdr;
+	int ret = -ENOBUFS;
+
+	msg = nlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL);
+	if (!msg) {
+		ret = -ENOMEM;
+		goto out;
+	}
+
+	hdr = genlmsg_put(msg, info->snd_pid, info->snd_seq,
+			  &l2tp_nl_family, 0, L2TP_CMD_NOOP);
+	if (IS_ERR(hdr)) {
+		ret = PTR_ERR(hdr);
+		goto err_out;
+	}
+
+	genlmsg_end(msg, hdr);
+
+	return genlmsg_unicast(genl_info_net(info), msg, info->snd_pid);
+
+err_out:
+	nlmsg_free(msg);
+
+out:
+	return ret;
+}
+
+static int l2tp_nl_cmd_tunnel_create(struct sk_buff *skb, struct genl_info *info)
+{
+	u32 tunnel_id;
+	u32 peer_tunnel_id;
+	int proto_version;
+	int fd;
+	int ret = 0;
+	struct l2tp_tunnel_cfg cfg = { 0, };
+	struct l2tp_tunnel *tunnel;
+	struct net *net = genl_info_net(info);
+
+	if (!info->attrs[L2TP_ATTR_CONN_ID]) {
+		ret = -EINVAL;
+		goto out;
+	}
+	tunnel_id = nla_get_u32(info->attrs[L2TP_ATTR_CONN_ID]);
+
+	if (!info->attrs[L2TP_ATTR_PEER_CONN_ID]) {
+		ret = -EINVAL;
+		goto out;
+	}
+	peer_tunnel_id = nla_get_u32(info->attrs[L2TP_ATTR_PEER_CONN_ID]);
+
+	if (!info->attrs[L2TP_ATTR_PROTO_VERSION]) {
+		ret = -EINVAL;
+		goto out;
+	}
+	proto_version = nla_get_u8(info->attrs[L2TP_ATTR_PROTO_VERSION]);
+
+	if (!info->attrs[L2TP_ATTR_ENCAP_TYPE]) {
+		ret = -EINVAL;
+		goto out;
+	}
+	cfg.encap = nla_get_u16(info->attrs[L2TP_ATTR_ENCAP_TYPE]);
+
+	if (!info->attrs[L2TP_ATTR_FD]) {
+		ret = -EINVAL;
+		goto out;
+	}
+	fd = nla_get_u32(info->attrs[L2TP_ATTR_FD]);
+
+	if (info->attrs[L2TP_ATTR_DEBUG])
+		cfg.debug = nla_get_u32(info->attrs[L2TP_ATTR_DEBUG]);
+
+	tunnel = l2tp_tunnel_find(net, tunnel_id);
+	if (tunnel != NULL) {
+		ret = -EEXIST;
+		goto out;
+	}
+
+	ret = -EINVAL;
+	switch (cfg.encap) {
+	case L2TP_ENCAPTYPE_UDP:
+	case L2TP_ENCAPTYPE_IP:
+		ret = l2tp_tunnel_create(net, fd, proto_version, tunnel_id,
+					 peer_tunnel_id, &cfg, &tunnel);
+		break;
+	}
+
+out:
+	return ret;
+}
+
+static int l2tp_nl_cmd_tunnel_delete(struct sk_buff *skb, struct genl_info *info)
+{
+	struct l2tp_tunnel *tunnel;
+	u32 tunnel_id;
+	int ret = 0;
+	struct net *net = genl_info_net(info);
+
+	if (!info->attrs[L2TP_ATTR_CONN_ID]) {
+		ret = -EINVAL;
+		goto out;
+	}
+	tunnel_id = nla_get_u32(info->attrs[L2TP_ATTR_CONN_ID]);
+
+	tunnel = l2tp_tunnel_find(net, tunnel_id);
+	if (tunnel == NULL) {
+		ret = -ENODEV;
+		goto out;
+	}
+
+	(void) l2tp_tunnel_delete(tunnel);
+
+out:
+	return ret;
+}
+
+static int l2tp_nl_cmd_tunnel_modify(struct sk_buff *skb, struct genl_info *info)
+{
+	struct l2tp_tunnel *tunnel;
+	u32 tunnel_id;
+	int ret = 0;
+	struct net *net = genl_info_net(info);
+
+	if (!info->attrs[L2TP_ATTR_CONN_ID]) {
+		ret = -EINVAL;
+		goto out;
+	}
+	tunnel_id = nla_get_u32(info->attrs[L2TP_ATTR_CONN_ID]);
+
+	tunnel = l2tp_tunnel_find(net, tunnel_id);
+	if (tunnel == NULL) {
+		ret = -ENODEV;
+		goto out;
+	}
+
+	if (info->attrs[L2TP_ATTR_DEBUG])
+		tunnel->debug = nla_get_u32(info->attrs[L2TP_ATTR_DEBUG]);
+
+out:
+	return ret;
+}
+
+static int l2tp_nl_tunnel_send(struct sk_buff *skb, u32 pid, u32 seq, int flags,
+			       struct l2tp_tunnel *tunnel)
+{
+	void *hdr;
+	struct nlattr *nest;
+	struct sock *sk = NULL;
+	struct inet_sock *inet;
+
+	hdr = genlmsg_put(skb, pid, seq, &l2tp_nl_family, flags,
+			  L2TP_CMD_TUNNEL_GET);
+	if (IS_ERR(hdr))
+		return PTR_ERR(hdr);
+
+	NLA_PUT_U8(skb, L2TP_ATTR_PROTO_VERSION, tunnel->version);
+	NLA_PUT_U32(skb, L2TP_ATTR_CONN_ID, tunnel->tunnel_id);
+	NLA_PUT_U32(skb, L2TP_ATTR_PEER_CONN_ID, tunnel->peer_tunnel_id);
+	NLA_PUT_U32(skb, L2TP_ATTR_DEBUG, tunnel->debug);
+	NLA_PUT_U16(skb, L2TP_ATTR_ENCAP_TYPE, tunnel->encap);
+
+	nest = nla_nest_start(skb, L2TP_ATTR_STATS);
+	if (nest == NULL)
+		goto nla_put_failure;
+
+	NLA_PUT_U64(skb, L2TP_ATTR_TX_PACKETS, tunnel->stats.tx_packets);
+	NLA_PUT_U64(skb, L2TP_ATTR_TX_BYTES, tunnel->stats.tx_bytes);
+	NLA_PUT_U64(skb, L2TP_ATTR_TX_ERRORS, tunnel->stats.tx_errors);
+	NLA_PUT_U64(skb, L2TP_ATTR_RX_PACKETS, tunnel->stats.rx_packets);
+	NLA_PUT_U64(skb, L2TP_ATTR_RX_BYTES, tunnel->stats.rx_bytes);
+	NLA_PUT_U64(skb, L2TP_ATTR_RX_SEQ_DISCARDS, tunnel->stats.rx_seq_discards);
+	NLA_PUT_U64(skb, L2TP_ATTR_RX_OOS_PACKETS, tunnel->stats.rx_oos_packets);
+	NLA_PUT_U64(skb, L2TP_ATTR_RX_ERRORS, tunnel->stats.rx_errors);
+	nla_nest_end(skb, nest);
+
+	sk = tunnel->sock;
+	if (!sk)
+		goto out;
+
+	inet = inet_sk(sk);
+
+	switch (tunnel->encap) {
+	case L2TP_ENCAPTYPE_UDP:
+		NLA_PUT_U16(skb, L2TP_ATTR_UDP_SPORT, ntohs(inet->inet_sport));
+		NLA_PUT_U16(skb, L2TP_ATTR_UDP_DPORT, ntohs(inet->inet_dport));
+		NLA_PUT_U8(skb, L2TP_ATTR_UDP_CSUM, (sk->sk_no_check != UDP_CSUM_NOXMIT));
+		/* NOBREAK */
+	case L2TP_ENCAPTYPE_IP:
+		NLA_PUT_BE32(skb, L2TP_ATTR_IP_SADDR, inet->inet_saddr);
+		NLA_PUT_BE32(skb, L2TP_ATTR_IP_DADDR, inet->inet_daddr);
+		break;
+	}
+
+out:
+	return genlmsg_end(skb, hdr);
+
+nla_put_failure:
+	genlmsg_cancel(skb, hdr);
+	return -1;
+}
+
+static int l2tp_nl_cmd_tunnel_get(struct sk_buff *skb, struct genl_info *info)
+{
+	struct l2tp_tunnel *tunnel;
+	struct sk_buff *msg;
+	u32 tunnel_id;
+	int ret = -ENOBUFS;
+	struct net *net = genl_info_net(info);
+
+	if (!info->attrs[L2TP_ATTR_CONN_ID]) {
+		ret = -EINVAL;
+		goto out;
+	}
+
+	tunnel_id = nla_get_u32(info->attrs[L2TP_ATTR_CONN_ID]);
+
+	tunnel = l2tp_tunnel_find(net, tunnel_id);
+	if (tunnel == NULL) {
+		ret = -ENODEV;
+		goto out;
+	}
+
+	msg = nlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL);
+	if (!msg) {
+		ret = -ENOMEM;
+		goto out;
+	}
+
+	ret = l2tp_nl_tunnel_send(msg, info->snd_pid, info->snd_seq,
+				  NLM_F_ACK, tunnel);
+	if (ret < 0)
+		goto err_out;
+
+	return genlmsg_unicast(net, msg, info->snd_pid);
+
+err_out:
+	nlmsg_free(msg);
+
+out:
+	return ret;
+}
+
+static int l2tp_nl_cmd_tunnel_dump(struct sk_buff *skb, struct netlink_callback *cb)
+{
+	int ti = cb->args[0];
+	struct l2tp_tunnel *tunnel;
+	struct net *net = sock_net(skb->sk);
+
+	for (;;) {
+		tunnel = l2tp_tunnel_find_nth(net, ti);
+		if (tunnel == NULL)
+			goto out;
+
+		if (l2tp_nl_tunnel_send(skb, NETLINK_CB(cb->skb).pid,
+					cb->nlh->nlmsg_seq, NLM_F_MULTI,
+					tunnel) <= 0)
+			goto out;
+
+		ti++;
+	}
+
+out:
+	cb->args[0] = ti;
+
+	return skb->len;
+}
+
+static int l2tp_nl_cmd_session_create(struct sk_buff *skb, struct genl_info *info)
+{
+	u32 tunnel_id = 0;
+	u32 session_id;
+	u32 peer_session_id;
+	int ret = 0;
+	struct l2tp_tunnel *tunnel;
+	struct l2tp_session *session;
+	struct l2tp_session_cfg cfg = { 0, };
+	struct net *net = genl_info_net(info);
+
+	if (!info->attrs[L2TP_ATTR_CONN_ID]) {
+		ret = -EINVAL;
+		goto out;
+	}
+	tunnel_id = nla_get_u32(info->attrs[L2TP_ATTR_CONN_ID]);
+	tunnel = l2tp_tunnel_find(net, tunnel_id);
+	if (!tunnel) {
+		ret = -ENODEV;
+		goto out;
+	}
+
+	if (!info->attrs[L2TP_ATTR_SESSION_ID]) {
+		ret = -EINVAL;
+		goto out;
+	}
+	session_id = nla_get_u32(info->attrs[L2TP_ATTR_SESSION_ID]);
+	session = l2tp_session_find(net, tunnel, session_id);
+	if (session) {
+		ret = -EEXIST;
+		goto out;
+	}
+
+	if (!info->attrs[L2TP_ATTR_PEER_SESSION_ID]) {
+		ret = -EINVAL;
+		goto out;
+	}
+	peer_session_id = nla_get_u32(info->attrs[L2TP_ATTR_PEER_SESSION_ID]);
+
+	if (!info->attrs[L2TP_ATTR_PW_TYPE]) {
+		ret = -EINVAL;
+		goto out;
+	}
+	cfg.pw_type = nla_get_u16(info->attrs[L2TP_ATTR_PW_TYPE]);
+	if (cfg.pw_type >= __L2TP_PWTYPE_MAX) {
+		ret = -EINVAL;
+		goto out;
+	}
+
+	if (tunnel->version > 2) {
+		if (info->attrs[L2TP_ATTR_OFFSET])
+			cfg.offset = nla_get_u16(info->attrs[L2TP_ATTR_OFFSET]);
+
+		if (info->attrs[L2TP_ATTR_DATA_SEQ])
+			cfg.data_seq = nla_get_u8(info->attrs[L2TP_ATTR_DATA_SEQ]);
+
+		cfg.l2specific_type = L2TP_L2SPECTYPE_DEFAULT;
+		if (info->attrs[L2TP_ATTR_L2SPEC_TYPE])
+			cfg.l2specific_type = nla_get_u8(info->attrs[L2TP_ATTR_L2SPEC_TYPE]);
+
+		cfg.l2specific_len = 4;
+		if (info->attrs[L2TP_ATTR_L2SPEC_LEN])
+			cfg.l2specific_len = nla_get_u8(info->attrs[L2TP_ATTR_L2SPEC_LEN]);
+
+		if (info->attrs[L2TP_ATTR_COOKIE]) {
+			u16 len = nla_len(info->attrs[L2TP_ATTR_COOKIE]);
+			if (len > 8) {
+				ret = -EINVAL;
+				goto out;
+			}
+			cfg.cookie_len = len;
+			memcpy(&cfg.cookie[0], nla_data(info->attrs[L2TP_ATTR_COOKIE]), len);
+		}
+		if (info->attrs[L2TP_ATTR_PEER_COOKIE]) {
+			u16 len = nla_len(info->attrs[L2TP_ATTR_PEER_COOKIE]);
+			if (len > 8) {
+				ret = -EINVAL;
+				goto out;
+			}
+			cfg.peer_cookie_len = len;
+			memcpy(&cfg.peer_cookie[0], nla_data(info->attrs[L2TP_ATTR_PEER_COOKIE]), len);
+		}
+		if (info->attrs[L2TP_ATTR_IFNAME])
+			cfg.ifname = nla_data(info->attrs[L2TP_ATTR_IFNAME]);
+
+		if (info->attrs[L2TP_ATTR_VLAN_ID])
+			cfg.vlan_id = nla_get_u16(info->attrs[L2TP_ATTR_VLAN_ID]);
+	}
+
+	if (info->attrs[L2TP_ATTR_DEBUG])
+		cfg.debug = nla_get_u32(info->attrs[L2TP_ATTR_DEBUG]);
+
+	if (info->attrs[L2TP_ATTR_RECV_SEQ])
+		cfg.recv_seq = nla_get_u8(info->attrs[L2TP_ATTR_RECV_SEQ]);
+
+	if (info->attrs[L2TP_ATTR_SEND_SEQ])
+		cfg.send_seq = nla_get_u8(info->attrs[L2TP_ATTR_SEND_SEQ]);
+
+	if (info->attrs[L2TP_ATTR_LNS_MODE])
+		cfg.lns_mode = nla_get_u8(info->attrs[L2TP_ATTR_LNS_MODE]);
+
+	if (info->attrs[L2TP_ATTR_RECV_TIMEOUT])
+		cfg.reorder_timeout = nla_get_msecs(info->attrs[L2TP_ATTR_RECV_TIMEOUT]);
+
+	if (info->attrs[L2TP_ATTR_MTU])
+		cfg.mtu = nla_get_u16(info->attrs[L2TP_ATTR_MTU]);
+
+	if (info->attrs[L2TP_ATTR_MRU])
+		cfg.mru = nla_get_u16(info->attrs[L2TP_ATTR_MRU]);
+
+	if ((l2tp_nl_cmd_ops[cfg.pw_type] == NULL) ||
+	    (l2tp_nl_cmd_ops[cfg.pw_type]->session_create == NULL)) {
+		ret = -EPROTONOSUPPORT;
+		goto out;
+	}
+
+	/* Check that pseudowire-specific params are present */
+	switch (cfg.pw_type) {
+	case L2TP_PWTYPE_NONE:
+		break;
+	case L2TP_PWTYPE_ETH_VLAN:
+		if (!info->attrs[L2TP_ATTR_VLAN_ID]) {
+			ret = -EINVAL;
+			goto out;
+		}
+		break;
+	case L2TP_PWTYPE_ETH:
+		break;
+	case L2TP_PWTYPE_PPP:
+	case L2TP_PWTYPE_PPP_AC:
+		break;
+	case L2TP_PWTYPE_IP:
+	default:
+		ret = -EPROTONOSUPPORT;
+		break;
+	}
+
+	ret = -EPROTONOSUPPORT;
+	if (l2tp_nl_cmd_ops[cfg.pw_type]->session_create)
+		ret = (*l2tp_nl_cmd_ops[cfg.pw_type]->session_create)(net, tunnel_id,
+			session_id, peer_session_id, &cfg);
+
+out:
+	return ret;
+}
+
+static int l2tp_nl_cmd_session_delete(struct sk_buff *skb, struct genl_info *info)
+{
+	int ret = 0;
+	struct l2tp_session *session;
+	u16 pw_type;
+
+	session = l2tp_nl_session_find(info);
+	if (session == NULL) {
+		ret = -ENODEV;
+		goto out;
+	}
+
+	pw_type = session->pwtype;
+	if (pw_type < __L2TP_PWTYPE_MAX)
+		if (l2tp_nl_cmd_ops[pw_type] && l2tp_nl_cmd_ops[pw_type]->session_delete)
+			ret = (*l2tp_nl_cmd_ops[pw_type]->session_delete)(session);
+
+out:
+	return ret;
+}
+
+static int l2tp_nl_cmd_session_modify(struct sk_buff *skb, struct genl_info *info)
+{
+	int ret = 0;
+	struct l2tp_session *session;
+
+	session = l2tp_nl_session_find(info);
+	if (session == NULL) {
+		ret = -ENODEV;
+		goto out;
+	}
+
+	if (info->attrs[L2TP_ATTR_DEBUG])
+		session->debug = nla_get_u32(info->attrs[L2TP_ATTR_DEBUG]);
+
+	if (info->attrs[L2TP_ATTR_DATA_SEQ])
+		session->data_seq = nla_get_u8(info->attrs[L2TP_ATTR_DATA_SEQ]);
+
+	if (info->attrs[L2TP_ATTR_RECV_SEQ])
+		session->recv_seq = nla_get_u8(info->attrs[L2TP_ATTR_RECV_SEQ]);
+
+	if (info->attrs[L2TP_ATTR_SEND_SEQ])
+		session->send_seq = nla_get_u8(info->attrs[L2TP_ATTR_SEND_SEQ]);
+
+	if (info->attrs[L2TP_ATTR_LNS_MODE])
+		session->lns_mode = nla_get_u8(info->attrs[L2TP_ATTR_LNS_MODE]);
+
+	if (info->attrs[L2TP_ATTR_RECV_TIMEOUT])
+		session->reorder_timeout = nla_get_msecs(info->attrs[L2TP_ATTR_RECV_TIMEOUT]);
+
+	if (info->attrs[L2TP_ATTR_MTU])
+		session->mtu = nla_get_u16(info->attrs[L2TP_ATTR_MTU]);
+
+	if (info->attrs[L2TP_ATTR_MRU])
+		session->mru = nla_get_u16(info->attrs[L2TP_ATTR_MRU]);
+
+out:
+	return ret;
+}
+
+static int l2tp_nl_session_send(struct sk_buff *skb, u32 pid, u32 seq, int flags,
+				struct l2tp_session *session)
+{
+	void *hdr;
+	struct nlattr *nest;
+	struct l2tp_tunnel *tunnel = session->tunnel;
+	struct sock *sk = NULL;
+
+	sk = tunnel->sock;
+
+	hdr = genlmsg_put(skb, pid, seq, &l2tp_nl_family, flags, L2TP_CMD_SESSION_GET);
+	if (IS_ERR(hdr))
+		return PTR_ERR(hdr);
+
+	NLA_PUT_U32(skb, L2TP_ATTR_CONN_ID, tunnel->tunnel_id);
+	NLA_PUT_U32(skb, L2TP_ATTR_SESSION_ID, session->session_id);
+	NLA_PUT_U32(skb, L2TP_ATTR_PEER_CONN_ID, tunnel->peer_tunnel_id);
+	NLA_PUT_U32(skb, L2TP_ATTR_PEER_SESSION_ID, session->peer_session_id);
+	NLA_PUT_U32(skb, L2TP_ATTR_DEBUG, session->debug);
+	NLA_PUT_U16(skb, L2TP_ATTR_PW_TYPE, session->pwtype);
+	NLA_PUT_U16(skb, L2TP_ATTR_MTU, session->mtu);
+	if (session->mru)
+		NLA_PUT_U16(skb, L2TP_ATTR_MRU, session->mru);
+
+	if (session->ifname && session->ifname[0])
+		NLA_PUT_STRING(skb, L2TP_ATTR_IFNAME, session->ifname);
+	if (session->cookie_len)
+		NLA_PUT(skb, L2TP_ATTR_COOKIE, session->cookie_len, &session->cookie[0]);
+	if (session->peer_cookie_len)
+		NLA_PUT(skb, L2TP_ATTR_PEER_COOKIE, session->peer_cookie_len, &session->peer_cookie[0]);
+	NLA_PUT_U8(skb, L2TP_ATTR_RECV_SEQ, session->recv_seq);
+	NLA_PUT_U8(skb, L2TP_ATTR_SEND_SEQ, session->send_seq);
+	NLA_PUT_U8(skb, L2TP_ATTR_LNS_MODE, session->lns_mode);
+#ifdef CONFIG_XFRM
+	if ((sk) && (sk->sk_policy[0] || sk->sk_policy[1]))
+		NLA_PUT_U8(skb, L2TP_ATTR_USING_IPSEC, 1);
+#endif
+	if (session->reorder_timeout)
+		NLA_PUT_MSECS(skb, L2TP_ATTR_RECV_TIMEOUT, session->reorder_timeout);
+
+	nest = nla_nest_start(skb, L2TP_ATTR_STATS);
+	if (nest == NULL)
+		goto nla_put_failure;
+	NLA_PUT_U64(skb, L2TP_ATTR_TX_PACKETS, session->stats.tx_packets);
+	NLA_PUT_U64(skb, L2TP_ATTR_TX_BYTES, session->stats.tx_bytes);
+	NLA_PUT_U64(skb, L2TP_ATTR_TX_ERRORS, session->stats.tx_errors);
+	NLA_PUT_U64(skb, L2TP_ATTR_RX_PACKETS, session->stats.rx_packets);
+	NLA_PUT_U64(skb, L2TP_ATTR_RX_BYTES, session->stats.rx_bytes);
+	NLA_PUT_U64(skb, L2TP_ATTR_RX_SEQ_DISCARDS, session->stats.rx_seq_discards);
+	NLA_PUT_U64(skb, L2TP_ATTR_RX_OOS_PACKETS, session->stats.rx_oos_packets);
+	NLA_PUT_U64(skb, L2TP_ATTR_RX_ERRORS, session->stats.rx_errors);
+	nla_nest_end(skb, nest);
+
+	return genlmsg_end(skb, hdr);
+
+ nla_put_failure:
+	genlmsg_cancel(skb, hdr);
+	return -1;
+}
+
+static int l2tp_nl_cmd_session_get(struct sk_buff *skb, struct genl_info *info)
+{
+	struct l2tp_session *session;
+	struct sk_buff *msg;
+	int ret;
+
+	session = l2tp_nl_session_find(info);
+	if (session == NULL) {
+		ret = -ENODEV;
+		goto out;
+	}
+
+	msg = nlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL);
+	if (!msg) {
+		ret = -ENOMEM;
+		goto out;
+	}
+
+	ret = l2tp_nl_session_send(msg, info->snd_pid, info->snd_seq,
+				   0, session);
+	if (ret < 0)
+		goto err_out;
+
+	return genlmsg_unicast(genl_info_net(info), msg, info->snd_pid);
+
+err_out:
+	nlmsg_free(msg);
+
+out:
+	return ret;
+}
+
+static int l2tp_nl_cmd_session_dump(struct sk_buff *skb, struct netlink_callback *cb)
+{
+	struct net *net = sock_net(skb->sk);
+	struct l2tp_session *session;
+	struct l2tp_tunnel *tunnel = NULL;
+	int ti = cb->args[0];
+	int si = cb->args[1];
+
+	for (;;) {
+		if (tunnel == NULL) {
+			tunnel = l2tp_tunnel_find_nth(net, ti);
+			if (tunnel == NULL)
+				goto out;
+		}
+
+		session = l2tp_session_find_nth(tunnel, si);
+		if (session == NULL) {
+			ti++;
+			tunnel = NULL;
+			si = 0;
+			continue;
+		}
+
+		if (l2tp_nl_session_send(skb, NETLINK_CB(cb->skb).pid,
+					 cb->nlh->nlmsg_seq, NLM_F_MULTI,
+					 session) <= 0)
+			break;
+
+		si++;
+	}
+
+out:
+	cb->args[0] = ti;
+	cb->args[1] = si;
+
+	return skb->len;
+}
+
+static struct nla_policy l2tp_nl_policy[L2TP_ATTR_MAX + 1] = {
+	[L2TP_ATTR_NONE]		= { .type = NLA_UNSPEC, },
+	[L2TP_ATTR_PW_TYPE]		= { .type = NLA_U16, },
+	[L2TP_ATTR_ENCAP_TYPE]		= { .type = NLA_U16, },
+	[L2TP_ATTR_OFFSET]		= { .type = NLA_U16, },
+	[L2TP_ATTR_DATA_SEQ]		= { .type = NLA_U8, },
+	[L2TP_ATTR_L2SPEC_TYPE]		= { .type = NLA_U8, },
+	[L2TP_ATTR_L2SPEC_LEN]		= { .type = NLA_U8, },
+	[L2TP_ATTR_PROTO_VERSION]	= { .type = NLA_U8, },
+	[L2TP_ATTR_CONN_ID]		= { .type = NLA_U32, },
+	[L2TP_ATTR_PEER_CONN_ID]	= { .type = NLA_U32, },
+	[L2TP_ATTR_SESSION_ID]		= { .type = NLA_U32, },
+	[L2TP_ATTR_PEER_SESSION_ID]	= { .type = NLA_U32, },
+	[L2TP_ATTR_UDP_CSUM]		= { .type = NLA_U8, },
+	[L2TP_ATTR_VLAN_ID]		= { .type = NLA_U16, },
+	[L2TP_ATTR_DEBUG]		= { .type = NLA_U32, },
+	[L2TP_ATTR_RECV_SEQ]		= { .type = NLA_U8, },
+	[L2TP_ATTR_SEND_SEQ]		= { .type = NLA_U8, },
+	[L2TP_ATTR_LNS_MODE]		= { .type = NLA_U8, },
+	[L2TP_ATTR_USING_IPSEC]		= { .type = NLA_U8, },
+	[L2TP_ATTR_RECV_TIMEOUT]	= { .type = NLA_MSECS, },
+	[L2TP_ATTR_FD]			= { .type = NLA_U32, },
+	[L2TP_ATTR_IP_SADDR]		= { .type = NLA_U32, },
+	[L2TP_ATTR_IP_DADDR]		= { .type = NLA_U32, },
+	[L2TP_ATTR_UDP_SPORT]		= { .type = NLA_U16, },
+	[L2TP_ATTR_UDP_DPORT]		= { .type = NLA_U16, },
+	[L2TP_ATTR_MTU]			= { .type = NLA_U16, },
+	[L2TP_ATTR_MRU]			= { .type = NLA_U16, },
+	[L2TP_ATTR_STATS]		= { .type = NLA_NESTED, },
+	[L2TP_ATTR_IFNAME] = {
+		.type = NLA_NUL_STRING,
+		.len = IFNAMSIZ - 1,
+	},
+	[L2TP_ATTR_COOKIE] = {
+		.type = NLA_BINARY,
+		.len = 8,
+	},
+	[L2TP_ATTR_PEER_COOKIE] = {
+		.type = NLA_BINARY,
+		.len = 8,
+	},
+};
+
+static struct genl_ops l2tp_nl_ops[] = {
+	{
+		.cmd = L2TP_CMD_NOOP,
+		.doit = l2tp_nl_cmd_noop,
+		.policy = l2tp_nl_policy,
+		/* can be retrieved by unprivileged users */
+	},
+	{
+		.cmd = L2TP_CMD_TUNNEL_CREATE,
+		.doit = l2tp_nl_cmd_tunnel_create,
+		.policy = l2tp_nl_policy,
+		.flags = GENL_ADMIN_PERM,
+	},
+	{
+		.cmd = L2TP_CMD_TUNNEL_DELETE,
+		.doit = l2tp_nl_cmd_tunnel_delete,
+		.policy = l2tp_nl_policy,
+		.flags = GENL_ADMIN_PERM,
+	},
+	{
+		.cmd = L2TP_CMD_TUNNEL_MODIFY,
+		.doit = l2tp_nl_cmd_tunnel_modify,
+		.policy = l2tp_nl_policy,
+		.flags = GENL_ADMIN_PERM,
+	},
+	{
+		.cmd = L2TP_CMD_TUNNEL_GET,
+		.doit = l2tp_nl_cmd_tunnel_get,
+		.dumpit = l2tp_nl_cmd_tunnel_dump,
+		.policy = l2tp_nl_policy,
+		.flags = GENL_ADMIN_PERM,
+	},
+	{
+		.cmd = L2TP_CMD_SESSION_CREATE,
+		.doit = l2tp_nl_cmd_session_create,
+		.policy = l2tp_nl_policy,
+		.flags = GENL_ADMIN_PERM,
+	},
+	{
+		.cmd = L2TP_CMD_SESSION_DELETE,
+		.doit = l2tp_nl_cmd_session_delete,
+		.policy = l2tp_nl_policy,
+		.flags = GENL_ADMIN_PERM,
+	},
+	{
+		.cmd = L2TP_CMD_SESSION_MODIFY,
+		.doit = l2tp_nl_cmd_session_modify,
+		.policy = l2tp_nl_policy,
+		.flags = GENL_ADMIN_PERM,
+	},
+	{
+		.cmd = L2TP_CMD_SESSION_GET,
+		.doit = l2tp_nl_cmd_session_get,
+		.dumpit = l2tp_nl_cmd_session_dump,
+		.policy = l2tp_nl_policy,
+		.flags = GENL_ADMIN_PERM,
+	},
+};
+
+int l2tp_nl_register_ops(enum l2tp_pwtype pw_type, const struct l2tp_nl_cmd_ops *ops)
+{
+	int ret;
+
+	ret = -EINVAL;
+	if (pw_type >= __L2TP_PWTYPE_MAX)
+		goto err;
+
+	genl_lock();
+	ret = -EBUSY;
+	if (l2tp_nl_cmd_ops[pw_type])
+		goto out;
+
+	l2tp_nl_cmd_ops[pw_type] = ops;
+
+out:
+	genl_unlock();
+err:
+	return 0;
+}
+EXPORT_SYMBOL_GPL(l2tp_nl_register_ops);
+
+void l2tp_nl_unregister_ops(enum l2tp_pwtype pw_type)
+{
+	if (pw_type < __L2TP_PWTYPE_MAX) {
+		genl_lock();
+		l2tp_nl_cmd_ops[pw_type] = NULL;
+		genl_unlock();
+	}
+}
+EXPORT_SYMBOL_GPL(l2tp_nl_unregister_ops);
+
+static int l2tp_nl_init(void)
+{
+	int err;
+
+	printk(KERN_INFO "L2TP netlink interface\n");
+	err = genl_register_family_with_ops(&l2tp_nl_family, l2tp_nl_ops,
+					    ARRAY_SIZE(l2tp_nl_ops));
+
+	return err;
+}
+
+static void l2tp_nl_cleanup(void)
+{
+	genl_unregister_family(&l2tp_nl_family);
+}
+
+module_init(l2tp_nl_init);
+module_exit(l2tp_nl_cleanup);
+
+MODULE_AUTHOR("James Chapman <jchapman@katalix.com>");
+MODULE_DESCRIPTION("L2TP netlink");
+MODULE_LICENSE("GPL");
+MODULE_VERSION("1.0");
+MODULE_ALIAS("net-pf-" __stringify(PF_NETLINK) "-proto-" \
+	     __stringify(NETLINK_GENERIC) "-type-" "l2tp")
diff --git a/net/l2tp/l2tp_ppp.c b/net/l2tp/l2tp_ppp.c
index 63fc62baeeb9..d64f081f2b1c 100644
--- a/net/l2tp/l2tp_ppp.c
+++ b/net/l2tp/l2tp_ppp.c
@@ -87,6 +87,7 @@
 #include <linux/hash.h>
 #include <linux/sort.h>
 #include <linux/proc_fs.h>
+#include <linux/l2tp.h>
 #include <linux/nsproxy.h>
 #include <net/net_namespace.h>
 #include <net/netns/generic.h>
@@ -656,17 +657,23 @@ static int pppol2tp_connect(struct socket *sock, struct sockaddr *uservaddr,
 	if (tunnel_id == 0)
 		goto end;
 
+	tunnel = l2tp_tunnel_find(sock_net(sk), tunnel_id);
+
 	/* Special case: create tunnel context if session_id and
 	 * peer_session_id is 0. Otherwise look up tunnel using supplied
 	 * tunnel id.
 	 */
 	if ((session_id == 0) && (peer_session_id == 0)) {
-		error = l2tp_tunnel_create(sock_net(sk), fd, ver, tunnel_id, peer_tunnel_id, NULL, &tunnel);
-		if (error < 0)
-			goto end;
+		if (tunnel == NULL) {
+			struct l2tp_tunnel_cfg tcfg = {
+				.encap = L2TP_ENCAPTYPE_UDP,
+				.debug = 0,
+			};
+			error = l2tp_tunnel_create(sock_net(sk), fd, ver, tunnel_id, peer_tunnel_id, &tcfg, &tunnel);
+			if (error < 0)
+				goto end;
+		}
 	} else {
-		tunnel = l2tp_tunnel_find(sock_net(sk), tunnel_id);
-
 		/* Error if we can't find the tunnel */
 		error = -ENOENT;
 		if (tunnel == NULL)
@@ -680,28 +687,46 @@ static int pppol2tp_connect(struct socket *sock, struct sockaddr *uservaddr,
 	if (tunnel->recv_payload_hook == NULL)
 		tunnel->recv_payload_hook = pppol2tp_recv_payload_hook;
 
-	/* Check that this session doesn't already exist */
-	error = -EEXIST;
-	session = l2tp_session_find(sock_net(sk), tunnel, session_id);
-	if (session != NULL)
-		goto end;
-
-	/* Default MTU values. */
-	if (cfg.mtu == 0)
-		cfg.mtu = 1500 - PPPOL2TP_HEADER_OVERHEAD;
-	if (cfg.mru == 0)
-		cfg.mru = cfg.mtu;
-	cfg.debug = tunnel->debug;
+	if (tunnel->peer_tunnel_id == 0) {
+		if (ver == 2)
+			tunnel->peer_tunnel_id = sp->pppol2tp.d_tunnel;
+		else
+			tunnel->peer_tunnel_id = sp3->pppol2tp.d_tunnel;
+	}
 
-	/* Allocate and initialize a new session context. */
-	session = l2tp_session_create(sizeof(struct pppol2tp_session),
-				      tunnel, session_id,
-				      peer_session_id, &cfg);
+	/* Create session if it doesn't already exist. We handle the
+	 * case where a session was previously created by the netlink
+	 * interface by checking that the session doesn't already have
+	 * a socket and its tunnel socket are what we expect. If any
+	 * of those checks fail, return EEXIST to the caller.
+	 */
+	session = l2tp_session_find(sock_net(sk), tunnel, session_id);
 	if (session == NULL) {
-		error = -ENOMEM;
-		goto end;
+		/* Default MTU must allow space for UDP/L2TP/PPP
+		 * headers.
+		 */
+		cfg.mtu = cfg.mru = 1500 - PPPOL2TP_HEADER_OVERHEAD;
+
+		/* Allocate and initialize a new session context. */
+		session = l2tp_session_create(sizeof(struct pppol2tp_session),
+					      tunnel, session_id,
+					      peer_session_id, &cfg);
+		if (session == NULL) {
+			error = -ENOMEM;
+			goto end;
+		}
+	} else {
+		ps = l2tp_session_priv(session);
+		error = -EEXIST;
+		if (ps->sock != NULL)
+			goto end;
+
+		/* consistency checks */
+		if (ps->tunnel_sock != tunnel->sock)
+			goto end;
 	}
 
+	/* Associate session with its PPPoL2TP socket */
 	ps = l2tp_session_priv(session);
 	ps->owner	     = current->pid;
 	ps->sock	     = sk;
@@ -764,6 +789,74 @@ end:
 	return error;
 }
 
+#ifdef CONFIG_L2TP_V3
+
+/* Called when creating sessions via the netlink interface.
+ */
+static int pppol2tp_session_create(struct net *net, u32 tunnel_id, u32 session_id, u32 peer_session_id, struct l2tp_session_cfg *cfg)
+{
+	int error;
+	struct l2tp_tunnel *tunnel;
+	struct l2tp_session *session;
+	struct pppol2tp_session *ps;
+
+	tunnel = l2tp_tunnel_find(net, tunnel_id);
+
+	/* Error if we can't find the tunnel */
+	error = -ENOENT;
+	if (tunnel == NULL)
+		goto out;
+
+	/* Error if tunnel socket is not prepped */
+	if (tunnel->sock == NULL)
+		goto out;
+
+	/* Check that this session doesn't already exist */
+	error = -EEXIST;
+	session = l2tp_session_find(net, tunnel, session_id);
+	if (session != NULL)
+		goto out;
+
+	/* Default MTU values. */
+	if (cfg->mtu == 0)
+		cfg->mtu = 1500 - PPPOL2TP_HEADER_OVERHEAD;
+	if (cfg->mru == 0)
+		cfg->mru = cfg->mtu;
+
+	/* Allocate and initialize a new session context. */
+	error = -ENOMEM;
+	session = l2tp_session_create(sizeof(struct pppol2tp_session),
+				      tunnel, session_id,
+				      peer_session_id, cfg);
+	if (session == NULL)
+		goto out;
+
+	ps = l2tp_session_priv(session);
+	ps->tunnel_sock = tunnel->sock;
+
+	PRINTK(session->debug, PPPOL2TP_MSG_CONTROL, KERN_INFO,
+	       "%s: created\n", session->name);
+
+	error = 0;
+
+out:
+	return error;
+}
+
+/* Called when deleting sessions via the netlink interface.
+ */
+static int pppol2tp_session_delete(struct l2tp_session *session)
+{
+	struct pppol2tp_session *ps = l2tp_session_priv(session);
+
+	if (ps->sock == NULL)
+		l2tp_session_dec_refcount(session);
+
+	return 0;
+}
+
+#endif /* CONFIG_L2TP_V3 */
+
 /* getname() support.
  */
 static int pppol2tp_getname(struct socket *sock, struct sockaddr *uaddr,
@@ -1660,6 +1753,15 @@ static struct pppox_proto pppol2tp_proto = {
 	.ioctl		= pppol2tp_ioctl
 };
 
+#ifdef CONFIG_L2TP_V3
+
+static const struct l2tp_nl_cmd_ops pppol2tp_nl_cmd_ops = {
+	.session_create	= pppol2tp_session_create,
+	.session_delete	= pppol2tp_session_delete,
+};
+
+#endif /* CONFIG_L2TP_V3 */
+
 static int __init pppol2tp_init(void)
 {
 	int err;
@@ -1676,11 +1778,22 @@ static int __init pppol2tp_init(void)
 	if (err)
 		goto out_unregister_pppol2tp_proto;
 
+#ifdef CONFIG_L2TP_V3
+	err = l2tp_nl_register_ops(L2TP_PWTYPE_PPP, &pppol2tp_nl_cmd_ops);
+	if (err)
+		goto out_unregister_pppox;
+#endif
+
 	printk(KERN_INFO "PPPoL2TP kernel driver, %s\n",
 	       PPPOL2TP_DRV_VERSION);
 
 out:
 	return err;
+
+#ifdef CONFIG_L2TP_V3
+out_unregister_pppox:
+	unregister_pppox_proto(PX_PROTO_OL2TP);
+#endif
 out_unregister_pppol2tp_proto:
 	proto_unregister(&pppol2tp_sk_proto);
 out_unregister_pppol2tp_pernet:
@@ -1690,6 +1803,9 @@ out_unregister_pppol2tp_pernet:
 
 static void __exit pppol2tp_exit(void)
 {
+#ifdef CONFIG_L2TP_V3
+	l2tp_nl_unregister_ops(L2TP_PWTYPE_PPP);
+#endif
 	unregister_pppox_proto(PX_PROTO_OL2TP);
 	proto_unregister(&pppol2tp_sk_proto);
 	unregister_pernet_device(&pppol2tp_net_ops);
-- 
cgit v1.2.2


From e02d494d2c60746ee6583132904ac1791f5bc9a6 Mon Sep 17 00:00:00 2001
From: James Chapman <jchapman@katalix.com>
Date: Fri, 2 Apr 2010 06:19:16 +0000
Subject: l2tp: Convert rwlock to RCU

Reader/write locks are discouraged because they are slower than spin
locks. So this patch converts the rwlocks used in the per_net structs
to rcu.

Signed-off-by: James Chapman <jchapman@katalix.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/l2tp/l2tp_core.c | 78 ++++++++++++++++++++++++++++------------------------
 1 file changed, 42 insertions(+), 36 deletions(-)

(limited to 'net')

diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c
index fbd1f2119fe9..473cf2d63905 100644
--- a/net/l2tp/l2tp_core.c
+++ b/net/l2tp/l2tp_core.c
@@ -21,6 +21,7 @@
 #include <linux/module.h>
 #include <linux/string.h>
 #include <linux/list.h>
+#include <linux/rculist.h>
 #include <linux/uaccess.h>
 
 #include <linux/kernel.h>
@@ -105,9 +106,9 @@ static atomic_t l2tp_session_count;
 static unsigned int l2tp_net_id;
 struct l2tp_net {
 	struct list_head l2tp_tunnel_list;
-	rwlock_t l2tp_tunnel_list_lock;
+	spinlock_t l2tp_tunnel_list_lock;
 	struct hlist_head l2tp_session_hlist[L2TP_HASH_SIZE_2];
-	rwlock_t l2tp_session_hlist_lock;
+	spinlock_t l2tp_session_hlist_lock;
 };
 
 static inline struct l2tp_net *l2tp_pernet(struct net *net)
@@ -139,14 +140,14 @@ static struct l2tp_session *l2tp_session_find_2(struct net *net, u32 session_id)
 	struct l2tp_session *session;
 	struct hlist_node *walk;
 
-	read_lock_bh(&pn->l2tp_session_hlist_lock);
-	hlist_for_each_entry(session, walk, session_list, global_hlist) {
+	rcu_read_lock_bh();
+	hlist_for_each_entry_rcu(session, walk, session_list, global_hlist) {
 		if (session->session_id == session_id) {
-			read_unlock_bh(&pn->l2tp_session_hlist_lock);
+			rcu_read_unlock_bh();
 			return session;
 		}
 	}
-	read_unlock_bh(&pn->l2tp_session_hlist_lock);
+	rcu_read_unlock_bh();
 
 	return NULL;
 }
@@ -225,17 +226,17 @@ struct l2tp_session *l2tp_session_find_by_ifname(struct net *net, char *ifname)
 	struct hlist_node *walk;
 	struct l2tp_session *session;
 
-	read_lock_bh(&pn->l2tp_session_hlist_lock);
+	rcu_read_lock_bh();
 	for (hash = 0; hash < L2TP_HASH_SIZE_2; hash++) {
-		hlist_for_each_entry(session, walk, &pn->l2tp_session_hlist[hash], global_hlist) {
+		hlist_for_each_entry_rcu(session, walk, &pn->l2tp_session_hlist[hash], global_hlist) {
 			if (!strcmp(session->ifname, ifname)) {
-				read_unlock_bh(&pn->l2tp_session_hlist_lock);
+				rcu_read_unlock_bh();
 				return session;
 			}
 		}
 	}
 
-	read_unlock_bh(&pn->l2tp_session_hlist_lock);
+	rcu_read_unlock_bh();
 
 	return NULL;
 }
@@ -248,14 +249,14 @@ struct l2tp_tunnel *l2tp_tunnel_find(struct net *net, u32 tunnel_id)
 	struct l2tp_tunnel *tunnel;
 	struct l2tp_net *pn = l2tp_pernet(net);
 
-	read_lock_bh(&pn->l2tp_tunnel_list_lock);
-	list_for_each_entry(tunnel, &pn->l2tp_tunnel_list, list) {
+	rcu_read_lock_bh();
+	list_for_each_entry_rcu(tunnel, &pn->l2tp_tunnel_list, list) {
 		if (tunnel->tunnel_id == tunnel_id) {
-			read_unlock_bh(&pn->l2tp_tunnel_list_lock);
+			rcu_read_unlock_bh();
 			return tunnel;
 		}
 	}
-	read_unlock_bh(&pn->l2tp_tunnel_list_lock);
+	rcu_read_unlock_bh();
 
 	return NULL;
 }
@@ -267,15 +268,15 @@ struct l2tp_tunnel *l2tp_tunnel_find_nth(struct net *net, int nth)
 	struct l2tp_tunnel *tunnel;
 	int count = 0;
 
-	read_lock_bh(&pn->l2tp_tunnel_list_lock);
-	list_for_each_entry(tunnel, &pn->l2tp_tunnel_list, list) {
+	rcu_read_lock_bh();
+	list_for_each_entry_rcu(tunnel, &pn->l2tp_tunnel_list, list) {
 		if (++count > nth) {
-			read_unlock_bh(&pn->l2tp_tunnel_list_lock);
+			rcu_read_unlock_bh();
 			return tunnel;
 		}
 	}
 
-	read_unlock_bh(&pn->l2tp_tunnel_list_lock);
+	rcu_read_unlock_bh();
 
 	return NULL;
 }
@@ -1167,9 +1168,10 @@ again:
 			if (tunnel->version != L2TP_HDR_VER_2) {
 				struct l2tp_net *pn = l2tp_pernet(tunnel->l2tp_net);
 
-				write_lock_bh(&pn->l2tp_session_hlist_lock);
-				hlist_del_init(&session->global_hlist);
-				write_unlock_bh(&pn->l2tp_session_hlist_lock);
+				spin_lock_bh(&pn->l2tp_session_hlist_lock);
+				hlist_del_init_rcu(&session->global_hlist);
+				spin_unlock_bh(&pn->l2tp_session_hlist_lock);
+				synchronize_rcu();
 			}
 
 			if (session->session_close != NULL)
@@ -1206,9 +1208,10 @@ void l2tp_tunnel_free(struct l2tp_tunnel *tunnel)
 	       "%s: free...\n", tunnel->name);
 
 	/* Remove from tunnel list */
-	write_lock_bh(&pn->l2tp_tunnel_list_lock);
-	list_del_init(&tunnel->list);
-	write_unlock_bh(&pn->l2tp_tunnel_list_lock);
+	spin_lock_bh(&pn->l2tp_tunnel_list_lock);
+	list_del_rcu(&tunnel->list);
+	spin_unlock_bh(&pn->l2tp_tunnel_list_lock);
+	synchronize_rcu();
 
 	atomic_dec(&l2tp_tunnel_count);
 	kfree(tunnel);
@@ -1310,9 +1313,10 @@ int l2tp_tunnel_create(struct net *net, int fd, int version, u32 tunnel_id, u32
 
 	/* Add tunnel to our list */
 	INIT_LIST_HEAD(&tunnel->list);
-	write_lock_bh(&pn->l2tp_tunnel_list_lock);
-	list_add(&tunnel->list, &pn->l2tp_tunnel_list);
-	write_unlock_bh(&pn->l2tp_tunnel_list_lock);
+	spin_lock_bh(&pn->l2tp_tunnel_list_lock);
+	list_add_rcu(&tunnel->list, &pn->l2tp_tunnel_list);
+	spin_unlock_bh(&pn->l2tp_tunnel_list_lock);
+	synchronize_rcu();
 	atomic_inc(&l2tp_tunnel_count);
 
 	/* Bump the reference count. The tunnel context is deleted
@@ -1370,9 +1374,10 @@ void l2tp_session_free(struct l2tp_session *session)
 		if (tunnel->version != L2TP_HDR_VER_2) {
 			struct l2tp_net *pn = l2tp_pernet(tunnel->l2tp_net);
 
-			write_lock_bh(&pn->l2tp_session_hlist_lock);
-			hlist_del_init(&session->global_hlist);
-			write_unlock_bh(&pn->l2tp_session_hlist_lock);
+			spin_lock_bh(&pn->l2tp_session_hlist_lock);
+			hlist_del_init_rcu(&session->global_hlist);
+			spin_unlock_bh(&pn->l2tp_session_hlist_lock);
+			synchronize_rcu();
 		}
 
 		if (session->session_id != 0)
@@ -1494,10 +1499,11 @@ struct l2tp_session *l2tp_session_create(int priv_size, struct l2tp_tunnel *tunn
 		if (tunnel->version != L2TP_HDR_VER_2) {
 			struct l2tp_net *pn = l2tp_pernet(tunnel->l2tp_net);
 
-			write_lock_bh(&pn->l2tp_session_hlist_lock);
-			hlist_add_head(&session->global_hlist,
-				       l2tp_session_id_hash_2(pn, session_id));
-			write_unlock_bh(&pn->l2tp_session_hlist_lock);
+			spin_lock_bh(&pn->l2tp_session_hlist_lock);
+			hlist_add_head_rcu(&session->global_hlist,
+					   l2tp_session_id_hash_2(pn, session_id));
+			spin_unlock_bh(&pn->l2tp_session_hlist_lock);
+			synchronize_rcu();
 		}
 
 		/* Ignore management session in session count value */
@@ -1524,12 +1530,12 @@ static __net_init int l2tp_init_net(struct net *net)
 		return -ENOMEM;
 
 	INIT_LIST_HEAD(&pn->l2tp_tunnel_list);
-	rwlock_init(&pn->l2tp_tunnel_list_lock);
+	spin_lock_init(&pn->l2tp_tunnel_list_lock);
 
 	for (hash = 0; hash < L2TP_HASH_SIZE_2; hash++)
 		INIT_HLIST_HEAD(&pn->l2tp_session_hlist[hash]);
 
-	rwlock_init(&pn->l2tp_session_hlist_lock);
+	spin_lock_init(&pn->l2tp_session_hlist_lock);
 
 	err = net_assign_generic(net, l2tp_net_id, pn);
 	if (err)
-- 
cgit v1.2.2


From d9e31d17ceba5f0736f5a34bbc236239cd42b420 Mon Sep 17 00:00:00 2001
From: James Chapman <jchapman@katalix.com>
Date: Fri, 2 Apr 2010 06:19:26 +0000
Subject: l2tp: Add L2TP ethernet pseudowire support

This driver presents a regular net_device for each L2TP ethernet
pseudowire instance. These interfaces are named l2tpethN by default,
though userspace can specify an alternative name when the L2TP
session is created, if preferred. When the pseudowire is established,
regular Linux networking utilities may be used to configure the
interface, i.e. give it IP address info or add it to a bridge. Any
data passed over the interface is carried over an L2TP tunnel.

Signed-off-by: James Chapman <jchapman@katalix.com>
Reviewed-by: Randy Dunlap <randy.dunlap@oracle.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/l2tp/Kconfig    |  24 ++++
 net/l2tp/Makefile   |   1 +
 net/l2tp/l2tp_eth.c | 347 ++++++++++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 372 insertions(+)
 create mode 100644 net/l2tp/l2tp_eth.c

(limited to 'net')

diff --git a/net/l2tp/Kconfig b/net/l2tp/Kconfig
index 0a11ccf2d95b..a292270c0abe 100644
--- a/net/l2tp/Kconfig
+++ b/net/l2tp/Kconfig
@@ -68,3 +68,27 @@ config L2TP_IP
 
 	  To compile this driver as a module, choose M here. The module
 	  will be called l2tp_ip.
+
+config L2TP_ETH
+	tristate "L2TP ethernet pseudowire support for L2TPv3"
+	depends on L2TP_V3
+	help
+	  Support for carrying raw ethernet frames over L2TPv3.
+
+	  From RFC 4719 <http://www.ietf.org/rfc/rfc4719.txt>.
+
+	  The Layer 2 Tunneling Protocol, Version 3 (L2TPv3) can be
+	  used as a control protocol and for data encapsulation to set
+	  up Pseudowires for transporting layer 2 Packet Data Units
+	  across an IP network [RFC3931].
+
+	  This driver provides an ethernet virtual interface for each
+	  L2TP ethernet pseudowire instance. Standard Linux tools may
+	  be used to assign an IP address to the local virtual
+	  interface, or add the interface to a bridge.
+
+	  If you are using L2TPv3, you will almost certainly want to
+	  enable this option.
+
+	  To compile this driver as a module, choose M here. The module
+	  will be called l2tp_eth.
diff --git a/net/l2tp/Makefile b/net/l2tp/Makefile
index 2c4a14b673ab..bddbf04f0ed3 100644
--- a/net/l2tp/Makefile
+++ b/net/l2tp/Makefile
@@ -8,3 +8,4 @@ obj-$(CONFIG_L2TP) += l2tp_core.o
 obj-$(subst y,$(CONFIG_L2TP),$(CONFIG_PPPOL2TP)) += l2tp_ppp.o
 obj-$(subst y,$(CONFIG_L2TP),$(CONFIG_L2TP_IP)) += l2tp_ip.o
 obj-$(subst y,$(CONFIG_L2TP),$(CONFIG_L2TP_V3)) += l2tp_netlink.o
+obj-$(subst y,$(CONFIG_L2TP),$(CONFIG_L2TP_ETH)) += l2tp_eth.o
diff --git a/net/l2tp/l2tp_eth.c b/net/l2tp/l2tp_eth.c
new file mode 100644
index 000000000000..755c29729b6f
--- /dev/null
+++ b/net/l2tp/l2tp_eth.c
@@ -0,0 +1,347 @@
+/*
+ * L2TPv3 ethernet pseudowire driver
+ *
+ * Copyright (c) 2008,2009,2010 Katalix Systems Ltd
+ *
+ *	This program is free software; you can redistribute it and/or
+ *	modify it under the terms of the GNU General Public License
+ *	as published by the Free Software Foundation; either version
+ *	2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/module.h>
+#include <linux/skbuff.h>
+#include <linux/socket.h>
+#include <linux/hash.h>
+#include <linux/l2tp.h>
+#include <linux/in.h>
+#include <linux/etherdevice.h>
+#include <linux/spinlock.h>
+#include <net/sock.h>
+#include <net/ip.h>
+#include <net/icmp.h>
+#include <net/udp.h>
+#include <net/inet_common.h>
+#include <net/inet_hashtables.h>
+#include <net/tcp_states.h>
+#include <net/protocol.h>
+#include <net/xfrm.h>
+#include <net/net_namespace.h>
+#include <net/netns/generic.h>
+
+#include "l2tp_core.h"
+
+/* Default device name. May be overridden by name specified by user */
+#define L2TP_ETH_DEV_NAME	"l2tpeth%d"
+
+/* via netdev_priv() */
+struct l2tp_eth {
+	struct net_device	*dev;
+	struct sock		*tunnel_sock;
+	struct l2tp_session	*session;
+	struct list_head	list;
+};
+
+/* via l2tp_session_priv() */
+struct l2tp_eth_sess {
+	struct net_device	*dev;
+};
+
+/* per-net private data for this module */
+static unsigned int l2tp_eth_net_id;
+struct l2tp_eth_net {
+	struct list_head l2tp_eth_dev_list;
+	spinlock_t l2tp_eth_lock;
+};
+
+static inline struct l2tp_eth_net *l2tp_eth_pernet(struct net *net)
+{
+	return net_generic(net, l2tp_eth_net_id);
+}
+
+static int l2tp_eth_dev_init(struct net_device *dev)
+{
+	struct l2tp_eth *priv = netdev_priv(dev);
+
+	priv->dev = dev;
+	random_ether_addr(dev->dev_addr);
+	memset(&dev->broadcast[0], 0xff, 6);
+
+	return 0;
+}
+
+static void l2tp_eth_dev_uninit(struct net_device *dev)
+{
+	struct l2tp_eth *priv = netdev_priv(dev);
+	struct l2tp_eth_net *pn = l2tp_eth_pernet(dev_net(dev));
+
+	spin_lock(&pn->l2tp_eth_lock);
+	list_del_init(&priv->list);
+	spin_unlock(&pn->l2tp_eth_lock);
+	dev_put(dev);
+}
+
+static int l2tp_eth_dev_xmit(struct sk_buff *skb, struct net_device *dev)
+{
+	struct l2tp_eth *priv = netdev_priv(dev);
+	struct l2tp_session *session = priv->session;
+
+	l2tp_xmit_skb(session, skb, session->hdr_len);
+
+	dev->stats.tx_bytes += skb->len;
+	dev->stats.tx_packets++;
+
+	return 0;
+}
+
+static struct net_device_ops l2tp_eth_netdev_ops = {
+	.ndo_init		= l2tp_eth_dev_init,
+	.ndo_uninit		= l2tp_eth_dev_uninit,
+	.ndo_start_xmit		= l2tp_eth_dev_xmit,
+};
+
+static void l2tp_eth_dev_setup(struct net_device *dev)
+{
+	ether_setup(dev);
+
+	dev->netdev_ops		= &l2tp_eth_netdev_ops;
+	dev->destructor		= free_netdev;
+}
+
+static void l2tp_eth_dev_recv(struct l2tp_session *session, struct sk_buff *skb, int data_len)
+{
+	struct l2tp_eth_sess *spriv = l2tp_session_priv(session);
+	struct net_device *dev = spriv->dev;
+
+	if (session->debug & L2TP_MSG_DATA) {
+		unsigned int length;
+		int offset;
+		u8 *ptr = skb->data;
+
+		length = min(32u, skb->len);
+		if (!pskb_may_pull(skb, length))
+			goto error;
+
+		printk(KERN_DEBUG "%s: eth recv: ", session->name);
+
+		offset = 0;
+		do {
+			printk(" %02X", ptr[offset]);
+		} while (++offset < length);
+
+		printk("\n");
+	}
+
+	if (data_len < ETH_HLEN)
+		goto error;
+
+	secpath_reset(skb);
+
+	/* checksums verified by L2TP */
+	skb->ip_summed = CHECKSUM_NONE;
+
+	skb_dst_drop(skb);
+	nf_reset(skb);
+
+	if (dev_forward_skb(dev, skb) == NET_RX_SUCCESS) {
+		dev->last_rx = jiffies;
+		dev->stats.rx_packets++;
+		dev->stats.rx_bytes += data_len;
+	} else
+		dev->stats.rx_errors++;
+
+	return;
+
+error:
+	dev->stats.rx_errors++;
+	kfree_skb(skb);
+}
+
+static void l2tp_eth_delete(struct l2tp_session *session)
+{
+	struct l2tp_eth_sess *spriv;
+	struct net_device *dev;
+
+	if (session) {
+		spriv = l2tp_session_priv(session);
+		dev = spriv->dev;
+		if (dev) {
+			unregister_netdev(dev);
+			spriv->dev = NULL;
+		}
+	}
+}
+
+static int l2tp_eth_create(struct net *net, u32 tunnel_id, u32 session_id, u32 peer_session_id, struct l2tp_session_cfg *cfg)
+{
+	struct net_device *dev;
+	char name[IFNAMSIZ];
+	struct l2tp_tunnel *tunnel;
+	struct l2tp_session *session;
+	struct l2tp_eth *priv;
+	struct l2tp_eth_sess *spriv;
+	int rc;
+	struct l2tp_eth_net *pn;
+
+	tunnel = l2tp_tunnel_find(net, tunnel_id);
+	if (!tunnel) {
+		rc = -ENODEV;
+		goto out;
+	}
+
+	session = l2tp_session_find(net, tunnel, session_id);
+	if (session) {
+		rc = -EEXIST;
+		goto out;
+	}
+
+	if (cfg->ifname) {
+		dev = dev_get_by_name(net, cfg->ifname);
+		if (dev) {
+			dev_put(dev);
+			rc = -EEXIST;
+			goto out;
+		}
+		strlcpy(name, cfg->ifname, IFNAMSIZ);
+	} else
+		strcpy(name, L2TP_ETH_DEV_NAME);
+
+	session = l2tp_session_create(sizeof(*spriv), tunnel, session_id,
+				      peer_session_id, cfg);
+	if (!session) {
+		rc = -ENOMEM;
+		goto out;
+	}
+
+	dev = alloc_netdev(sizeof(*priv), name, l2tp_eth_dev_setup);
+	if (!dev) {
+		rc = -ENOMEM;
+		goto out_del_session;
+	}
+
+	dev_net_set(dev, net);
+	if (session->mtu == 0)
+		session->mtu = dev->mtu - session->hdr_len;
+	dev->mtu = session->mtu;
+	dev->needed_headroom += session->hdr_len;
+
+	priv = netdev_priv(dev);
+	priv->dev = dev;
+	priv->session = session;
+	INIT_LIST_HEAD(&priv->list);
+
+	priv->tunnel_sock = tunnel->sock;
+	session->recv_skb = l2tp_eth_dev_recv;
+	session->session_close = l2tp_eth_delete;
+
+	spriv = l2tp_session_priv(session);
+	spriv->dev = dev;
+
+	rc = register_netdev(dev);
+	if (rc < 0)
+		goto out_del_dev;
+
+	/* Must be done after register_netdev() */
+	strlcpy(session->ifname, dev->name, IFNAMSIZ);
+
+	dev_hold(dev);
+	pn = l2tp_eth_pernet(dev_net(dev));
+	spin_lock(&pn->l2tp_eth_lock);
+	list_add(&priv->list, &pn->l2tp_eth_dev_list);
+	spin_unlock(&pn->l2tp_eth_lock);
+
+	return 0;
+
+out_del_dev:
+	free_netdev(dev);
+out_del_session:
+	l2tp_session_delete(session);
+out:
+	return rc;
+}
+
+static __net_init int l2tp_eth_init_net(struct net *net)
+{
+	struct l2tp_eth_net *pn;
+	int err;
+
+	pn = kzalloc(sizeof(*pn), GFP_KERNEL);
+	if (!pn)
+		return -ENOMEM;
+
+	INIT_LIST_HEAD(&pn->l2tp_eth_dev_list);
+	spin_lock_init(&pn->l2tp_eth_lock);
+
+	err = net_assign_generic(net, l2tp_eth_net_id, pn);
+	if (err)
+		goto out;
+
+	return 0;
+
+out:
+	kfree(pn);
+	return err;
+}
+
+static __net_exit void l2tp_eth_exit_net(struct net *net)
+{
+	struct l2tp_eth_net *pn;
+
+	pn = net_generic(net, l2tp_eth_net_id);
+	/*
+	 * if someone has cached our net then
+	 * further net_generic call will return NULL
+	 */
+	net_assign_generic(net, l2tp_eth_net_id, NULL);
+	kfree(pn);
+}
+
+static __net_initdata struct pernet_operations l2tp_eth_net_ops = {
+	.init = l2tp_eth_init_net,
+	.exit = l2tp_eth_exit_net,
+	.id   = &l2tp_eth_net_id,
+	.size = sizeof(struct l2tp_eth_net),
+};
+
+
+static const struct l2tp_nl_cmd_ops l2tp_eth_nl_cmd_ops = {
+	.session_create	= l2tp_eth_create,
+	.session_delete	= l2tp_session_delete,
+};
+
+
+static int __init l2tp_eth_init(void)
+{
+	int err = 0;
+
+	err = l2tp_nl_register_ops(L2TP_PWTYPE_ETH, &l2tp_eth_nl_cmd_ops);
+	if (err)
+		goto out;
+
+	err = register_pernet_device(&l2tp_eth_net_ops);
+	if (err)
+		goto out_unreg;
+
+	printk(KERN_INFO "L2TP ethernet pseudowire support (L2TPv3)\n");
+
+	return 0;
+
+out_unreg:
+	l2tp_nl_unregister_ops(L2TP_PWTYPE_ETH);
+out:
+	return err;
+}
+
+static void __exit l2tp_eth_exit(void)
+{
+	unregister_pernet_device(&l2tp_eth_net_ops);
+	l2tp_nl_unregister_ops(L2TP_PWTYPE_ETH);
+}
+
+module_init(l2tp_eth_init);
+module_exit(l2tp_eth_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("James Chapman <jchapman@katalix.com>");
+MODULE_DESCRIPTION("L2TP ethernet pseudowire driver");
+MODULE_VERSION("1.0");
-- 
cgit v1.2.2


From 0ad6614048cf722e4d27909665b4846805357f1b Mon Sep 17 00:00:00 2001
From: James Chapman <jchapman@katalix.com>
Date: Fri, 2 Apr 2010 06:19:33 +0000
Subject: l2tp: Add debugfs files for dumping l2tp debug info

The existing pppol2tp driver exports debug info to
/proc/net/pppol2tp. Rather than adding info to that file for the new
functionality added in this patch series, we add new files in debugfs,
leaving the old /proc file for backwards compatibility (L2TPv2 only).

Currently only one file is provided: l2tp/tunnels, which lists
internal debug info for all l2tp tunnels and sessions. More files may
be added later. The info is for debug and problem analysis only -
userspace apps should use netlink to obtain status about l2tp tunnels
and sessions.

Although debugfs does not support net namespaces, the tunnels and
sessions dumped in l2tp/tunnels are only those in the net namespace of
the process reading the file.

Signed-off-by: James Chapman <jchapman@katalix.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/l2tp/Kconfig        |  13 ++
 net/l2tp/Makefile       |   1 +
 net/l2tp/l2tp_core.h    |   8 +-
 net/l2tp/l2tp_debugfs.c | 341 ++++++++++++++++++++++++++++++++++++++++++++++++
 net/l2tp/l2tp_eth.c     |  14 ++
 net/l2tp/l2tp_ppp.c     |  17 +++
 6 files changed, 392 insertions(+), 2 deletions(-)
 create mode 100644 net/l2tp/l2tp_debugfs.c

(limited to 'net')

diff --git a/net/l2tp/Kconfig b/net/l2tp/Kconfig
index a292270c0abe..4b1e71751e10 100644
--- a/net/l2tp/Kconfig
+++ b/net/l2tp/Kconfig
@@ -31,6 +31,19 @@ menuconfig L2TP
 	  If you don't need L2TP, say N. To compile all L2TP code as
 	  modules, choose M here.
 
+config L2TP_DEBUGFS
+	tristate "L2TP debugfs support"
+	depends on L2TP && DEBUG_FS
+	help
+	  Support for l2tp directory in debugfs filesystem. This may be
+	  used to dump internal state of the l2tp drivers for problem
+	  analysis.
+
+	  If unsure, say 'Y'.
+
+	  To compile this driver as a module, choose M here. The module
+	  will be called l2tp_debugfs.
+
 config L2TP_V3
 	bool "L2TPv3 support (EXPERIMENTAL)"
 	depends on EXPERIMENTAL && L2TP
diff --git a/net/l2tp/Makefile b/net/l2tp/Makefile
index bddbf04f0ed3..110e7bc2de5e 100644
--- a/net/l2tp/Makefile
+++ b/net/l2tp/Makefile
@@ -9,3 +9,4 @@ obj-$(subst y,$(CONFIG_L2TP),$(CONFIG_PPPOL2TP)) += l2tp_ppp.o
 obj-$(subst y,$(CONFIG_L2TP),$(CONFIG_L2TP_IP)) += l2tp_ip.o
 obj-$(subst y,$(CONFIG_L2TP),$(CONFIG_L2TP_V3)) += l2tp_netlink.o
 obj-$(subst y,$(CONFIG_L2TP),$(CONFIG_L2TP_ETH)) += l2tp_eth.o
+obj-$(subst y,$(CONFIG_L2TP),$(CONFIG_L2TP_DEBUGFS)) += l2tp_debugfs.o
diff --git a/net/l2tp/l2tp_core.h b/net/l2tp/l2tp_core.h
index 2974d9ade167..571335530c6f 100644
--- a/net/l2tp/l2tp_core.h
+++ b/net/l2tp/l2tp_core.h
@@ -133,7 +133,9 @@ struct l2tp_session {
 	void (*session_close)(struct l2tp_session *session);
 	void (*ref)(struct l2tp_session *session);
 	void (*deref)(struct l2tp_session *session);
-
+#ifdef CONFIG_L2TP_DEBUGFS
+	void (*show)(struct seq_file *m, void *priv);
+#endif
 	uint8_t			priv[0];	/* private data */
 };
 
@@ -166,7 +168,9 @@ struct l2tp_tunnel {
 	struct net		*l2tp_net;	/* the net we belong to */
 
 	atomic_t		ref_count;
-
+#ifdef CONFIG_DEBUG_FS
+	void (*show)(struct seq_file *m, void *arg);
+#endif
 	int (*recv_payload_hook)(struct sk_buff *skb);
 	void (*old_sk_destruct)(struct sock *);
 	struct sock		*sock;		/* Parent socket */
diff --git a/net/l2tp/l2tp_debugfs.c b/net/l2tp/l2tp_debugfs.c
new file mode 100644
index 000000000000..908f10f9720e
--- /dev/null
+++ b/net/l2tp/l2tp_debugfs.c
@@ -0,0 +1,341 @@
+/*
+ * L2TP subsystem debugfs
+ *
+ * Copyright (c) 2010 Katalix Systems Ltd
+ *
+ *	This program is free software; you can redistribute it and/or
+ *	modify it under the terms of the GNU General Public License
+ *	as published by the Free Software Foundation; either version
+ *	2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/module.h>
+#include <linux/skbuff.h>
+#include <linux/socket.h>
+#include <linux/hash.h>
+#include <linux/l2tp.h>
+#include <linux/in.h>
+#include <linux/etherdevice.h>
+#include <linux/spinlock.h>
+#include <linux/debugfs.h>
+#include <net/sock.h>
+#include <net/ip.h>
+#include <net/icmp.h>
+#include <net/udp.h>
+#include <net/inet_common.h>
+#include <net/inet_hashtables.h>
+#include <net/tcp_states.h>
+#include <net/protocol.h>
+#include <net/xfrm.h>
+#include <net/net_namespace.h>
+#include <net/netns/generic.h>
+
+#include "l2tp_core.h"
+
+static struct dentry *rootdir;
+static struct dentry *tunnels;
+
+struct l2tp_dfs_seq_data {
+	struct net *net;
+	int tunnel_idx;			/* current tunnel */
+	int session_idx;		/* index of session within current tunnel */
+	struct l2tp_tunnel *tunnel;
+	struct l2tp_session *session;	/* NULL means get next tunnel */
+};
+
+static void l2tp_dfs_next_tunnel(struct l2tp_dfs_seq_data *pd)
+{
+	pd->tunnel = l2tp_tunnel_find_nth(pd->net, pd->tunnel_idx);
+	pd->tunnel_idx++;
+}
+
+static void l2tp_dfs_next_session(struct l2tp_dfs_seq_data *pd)
+{
+	pd->session = l2tp_session_find_nth(pd->tunnel, pd->session_idx);
+	pd->session_idx++;
+
+	if (pd->session == NULL) {
+		pd->session_idx = 0;
+		l2tp_dfs_next_tunnel(pd);
+	}
+
+}
+
+static void *l2tp_dfs_seq_start(struct seq_file *m, loff_t *offs)
+{
+	struct l2tp_dfs_seq_data *pd = SEQ_START_TOKEN;
+	loff_t pos = *offs;
+
+	if (!pos)
+		goto out;
+
+	BUG_ON(m->private == NULL);
+	pd = m->private;
+
+	if (pd->tunnel == NULL)
+		l2tp_dfs_next_tunnel(pd);
+	else
+		l2tp_dfs_next_session(pd);
+
+	/* NULL tunnel and session indicates end of list */
+	if ((pd->tunnel == NULL) && (pd->session == NULL))
+		pd = NULL;
+
+out:
+	return pd;
+}
+
+
+static void *l2tp_dfs_seq_next(struct seq_file *m, void *v, loff_t *pos)
+{
+	(*pos)++;
+	return NULL;
+}
+
+static void l2tp_dfs_seq_stop(struct seq_file *p, void *v)
+{
+	/* nothing to do */
+}
+
+static void l2tp_dfs_seq_tunnel_show(struct seq_file *m, void *v)
+{
+	struct l2tp_tunnel *tunnel = v;
+	int session_count = 0;
+	int hash;
+	struct hlist_node *walk;
+	struct hlist_node *tmp;
+
+	read_lock_bh(&tunnel->hlist_lock);
+	for (hash = 0; hash < L2TP_HASH_SIZE; hash++) {
+		hlist_for_each_safe(walk, tmp, &tunnel->session_hlist[hash]) {
+			struct l2tp_session *session;
+
+			session = hlist_entry(walk, struct l2tp_session, hlist);
+			if (session->session_id == 0)
+				continue;
+
+			session_count++;
+		}
+	}
+	read_unlock_bh(&tunnel->hlist_lock);
+
+	seq_printf(m, "\nTUNNEL %u peer %u", tunnel->tunnel_id, tunnel->peer_tunnel_id);
+	if (tunnel->sock) {
+		struct inet_sock *inet = inet_sk(tunnel->sock);
+		seq_printf(m, " from " NIPQUAD_FMT " to " NIPQUAD_FMT "\n",
+			   NIPQUAD(inet->inet_saddr), NIPQUAD(inet->inet_daddr));
+		if (tunnel->encap == L2TP_ENCAPTYPE_UDP)
+			seq_printf(m, " source port %hu, dest port %hu\n",
+				   ntohs(inet->inet_sport), ntohs(inet->inet_dport));
+	}
+	seq_printf(m, " L2TPv%d, %s\n", tunnel->version,
+		   tunnel->encap == L2TP_ENCAPTYPE_UDP ? "UDP" :
+		   tunnel->encap == L2TP_ENCAPTYPE_IP ? "IP" :
+		   "");
+	seq_printf(m, " %d sessions, refcnt %d/%d\n", session_count,
+		   tunnel->sock ? atomic_read(&tunnel->sock->sk_refcnt) : 0,
+		   atomic_read(&tunnel->ref_count));
+
+	seq_printf(m, " %08x rx %llu/%llu/%llu rx %llu/%llu/%llu\n",
+		   tunnel->debug,
+		   (unsigned long long)tunnel->stats.tx_packets,
+		   (unsigned long long)tunnel->stats.tx_bytes,
+		   (unsigned long long)tunnel->stats.tx_errors,
+		   (unsigned long long)tunnel->stats.rx_packets,
+		   (unsigned long long)tunnel->stats.rx_bytes,
+		   (unsigned long long)tunnel->stats.rx_errors);
+
+	if (tunnel->show != NULL)
+		tunnel->show(m, tunnel);
+}
+
+static void l2tp_dfs_seq_session_show(struct seq_file *m, void *v)
+{
+	struct l2tp_session *session = v;
+
+	seq_printf(m, "  SESSION %u, peer %u, %s\n", session->session_id,
+		   session->peer_session_id,
+		   session->pwtype == L2TP_PWTYPE_ETH ? "ETH" :
+		   session->pwtype == L2TP_PWTYPE_PPP ? "PPP" :
+		   "");
+	if (session->send_seq || session->recv_seq)
+		seq_printf(m, "   nr %hu, ns %hu\n", session->nr, session->ns);
+	seq_printf(m, "   refcnt %d\n", atomic_read(&session->ref_count));
+	seq_printf(m, "   config %d/%d/%c/%c/%s/%s %08x %u\n",
+		   session->mtu, session->mru,
+		   session->recv_seq ? 'R' : '-',
+		   session->send_seq ? 'S' : '-',
+		   session->data_seq == 1 ? "IPSEQ" :
+		   session->data_seq == 2 ? "DATASEQ" : "-",
+		   session->lns_mode ? "LNS" : "LAC",
+		   session->debug,
+		   jiffies_to_msecs(session->reorder_timeout));
+	seq_printf(m, "   offset %hu l2specific %hu/%hu\n",
+		   session->offset, session->l2specific_type, session->l2specific_len);
+	if (session->cookie_len) {
+		seq_printf(m, "   cookie %02x%02x%02x%02x",
+			   session->cookie[0], session->cookie[1],
+			   session->cookie[2], session->cookie[3]);
+		if (session->cookie_len == 8)
+			seq_printf(m, "%02x%02x%02x%02x",
+				   session->cookie[4], session->cookie[5],
+				   session->cookie[6], session->cookie[7]);
+		seq_printf(m, "\n");
+	}
+	if (session->peer_cookie_len) {
+		seq_printf(m, "   peer cookie %02x%02x%02x%02x",
+			   session->peer_cookie[0], session->peer_cookie[1],
+			   session->peer_cookie[2], session->peer_cookie[3]);
+		if (session->peer_cookie_len == 8)
+			seq_printf(m, "%02x%02x%02x%02x",
+				   session->peer_cookie[4], session->peer_cookie[5],
+				   session->peer_cookie[6], session->peer_cookie[7]);
+		seq_printf(m, "\n");
+	}
+
+	seq_printf(m, "   %hu/%hu tx %llu/%llu/%llu rx %llu/%llu/%llu\n",
+		   session->nr, session->ns,
+		   (unsigned long long)session->stats.tx_packets,
+		   (unsigned long long)session->stats.tx_bytes,
+		   (unsigned long long)session->stats.tx_errors,
+		   (unsigned long long)session->stats.rx_packets,
+		   (unsigned long long)session->stats.rx_bytes,
+		   (unsigned long long)session->stats.rx_errors);
+
+	if (session->show != NULL)
+		session->show(m, session);
+}
+
+static int l2tp_dfs_seq_show(struct seq_file *m, void *v)
+{
+	struct l2tp_dfs_seq_data *pd = v;
+
+	/* display header on line 1 */
+	if (v == SEQ_START_TOKEN) {
+		seq_puts(m, "TUNNEL ID, peer ID from IP to IP\n");
+		seq_puts(m, " L2TPv2/L2TPv3, UDP/IP\n");
+		seq_puts(m, " sessions session-count, refcnt refcnt/sk->refcnt\n");
+		seq_puts(m, " debug tx-pkts/bytes/errs rx-pkts/bytes/errs\n");
+		seq_puts(m, "  SESSION ID, peer ID, PWTYPE\n");
+		seq_puts(m, "   refcnt cnt\n");
+		seq_puts(m, "   offset OFFSET l2specific TYPE/LEN\n");
+		seq_puts(m, "   [ cookie ]\n");
+		seq_puts(m, "   [ peer cookie ]\n");
+		seq_puts(m, "   config mtu/mru/rcvseq/sendseq/dataseq/lns debug reorderto\n");
+		seq_puts(m, "   nr/ns tx-pkts/bytes/errs rx-pkts/bytes/errs\n");
+		goto out;
+	}
+
+	/* Show the tunnel or session context */
+	if (pd->session == NULL)
+		l2tp_dfs_seq_tunnel_show(m, pd->tunnel);
+	else
+		l2tp_dfs_seq_session_show(m, pd->session);
+
+out:
+	return 0;
+}
+
+static const struct seq_operations l2tp_dfs_seq_ops = {
+	.start		= l2tp_dfs_seq_start,
+	.next		= l2tp_dfs_seq_next,
+	.stop		= l2tp_dfs_seq_stop,
+	.show		= l2tp_dfs_seq_show,
+};
+
+static int l2tp_dfs_seq_open(struct inode *inode, struct file *file)
+{
+	struct l2tp_dfs_seq_data *pd;
+	struct seq_file *seq;
+	int rc = -ENOMEM;
+
+	pd = kzalloc(GFP_KERNEL, sizeof(*pd));
+	if (pd == NULL)
+		goto out;
+
+	/* Derive the network namespace from the pid opening the
+	 * file.
+	 */
+	pd->net = get_net_ns_by_pid(current->pid);
+	if (IS_ERR(pd->net)) {
+		rc = -PTR_ERR(pd->net);
+		goto err_free_pd;
+	}
+
+	rc = seq_open(file, &l2tp_dfs_seq_ops);
+	if (rc)
+		goto err_free_net;
+
+	seq = file->private_data;
+	seq->private = pd;
+
+out:
+	return rc;
+
+err_free_net:
+	put_net(pd->net);
+err_free_pd:
+	kfree(pd);
+	goto out;
+}
+
+static int l2tp_dfs_seq_release(struct inode *inode, struct file *file)
+{
+	struct l2tp_dfs_seq_data *pd;
+	struct seq_file *seq;
+
+	seq = file->private_data;
+	pd = seq->private;
+	if (pd->net)
+		put_net(pd->net);
+	kfree(pd);
+	seq_release(inode, file);
+
+	return 0;
+}
+
+static const struct file_operations l2tp_dfs_fops = {
+	.owner		= THIS_MODULE,
+	.open		= l2tp_dfs_seq_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= l2tp_dfs_seq_release,
+};
+
+static int __init l2tp_debugfs_init(void)
+{
+	int rc = 0;
+
+	rootdir = debugfs_create_dir("l2tp", NULL);
+	if (IS_ERR(rootdir)) {
+		rc = PTR_ERR(rootdir);
+		rootdir = NULL;
+		goto out;
+	}
+
+	tunnels = debugfs_create_file("tunnels", 0600, rootdir, NULL, &l2tp_dfs_fops);
+	if (tunnels == NULL)
+		rc = -EIO;
+
+	printk(KERN_INFO "L2TP debugfs support\n");
+
+out:
+	if (rc)
+		printk(KERN_WARNING "l2tp debugfs: unable to init\n");
+
+	return rc;
+}
+
+static void __exit l2tp_debugfs_exit(void)
+{
+	debugfs_remove(tunnels);
+	debugfs_remove(rootdir);
+}
+
+module_init(l2tp_debugfs_init);
+module_exit(l2tp_debugfs_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("James Chapman <jchapman@katalix.com>");
+MODULE_DESCRIPTION("L2TP debugfs driver");
+MODULE_VERSION("1.0");
diff --git a/net/l2tp/l2tp_eth.c b/net/l2tp/l2tp_eth.c
index 755c29729b6f..9848faa3d163 100644
--- a/net/l2tp/l2tp_eth.c
+++ b/net/l2tp/l2tp_eth.c
@@ -172,6 +172,17 @@ static void l2tp_eth_delete(struct l2tp_session *session)
 	}
 }
 
+#ifdef CONFIG_L2TP_DEBUGFS
+static void l2tp_eth_show(struct seq_file *m, void *arg)
+{
+	struct l2tp_session *session = arg;
+	struct l2tp_eth_sess *spriv = l2tp_session_priv(session);
+	struct net_device *dev = spriv->dev;
+
+	seq_printf(m, "   interface %s\n", dev->name);
+}
+#endif
+
 static int l2tp_eth_create(struct net *net, u32 tunnel_id, u32 session_id, u32 peer_session_id, struct l2tp_session_cfg *cfg)
 {
 	struct net_device *dev;
@@ -233,6 +244,9 @@ static int l2tp_eth_create(struct net *net, u32 tunnel_id, u32 session_id, u32 p
 	priv->tunnel_sock = tunnel->sock;
 	session->recv_skb = l2tp_eth_dev_recv;
 	session->session_close = l2tp_eth_delete;
+#ifdef CONFIG_L2TP_DEBUGFS
+	session->show = l2tp_eth_show;
+#endif
 
 	spriv = l2tp_session_priv(session);
 	spriv->dev = dev;
diff --git a/net/l2tp/l2tp_ppp.c b/net/l2tp/l2tp_ppp.c
index d64f081f2b1c..1ef10e4118d2 100644
--- a/net/l2tp/l2tp_ppp.c
+++ b/net/l2tp/l2tp_ppp.c
@@ -597,6 +597,20 @@ out:
 	return error;
 }
 
+#ifdef CONFIG_L2TP_DEBUGFS
+static void pppol2tp_show(struct seq_file *m, void *arg)
+{
+	struct l2tp_session *session = arg;
+	struct pppol2tp_session *ps = l2tp_session_priv(session);
+
+	if (ps) {
+		struct pppox_sock *po = pppox_sk(ps->sock);
+		if (po)
+			seq_printf(m, "   interface %s\n", ppp_dev_name(&po->chan));
+	}
+}
+#endif
+
 /* connect() handler. Attach a PPPoX socket to a tunnel UDP socket
  */
 static int pppol2tp_connect(struct socket *sock, struct sockaddr *uservaddr,
@@ -734,6 +748,9 @@ static int pppol2tp_connect(struct socket *sock, struct sockaddr *uservaddr,
 
 	session->recv_skb	= pppol2tp_recv;
 	session->session_close	= pppol2tp_session_close;
+#ifdef CONFIG_L2TP_DEBUGFS
+	session->show		= pppol2tp_show;
+#endif
 
 	/* We need to know each time a skb is dropped from the reorder
 	 * queue.
-- 
cgit v1.2.2


From 789a4a2c61d843df67988d69e7c3f3a4bca97e8e Mon Sep 17 00:00:00 2001
From: James Chapman <jchapman@katalix.com>
Date: Fri, 2 Apr 2010 06:19:40 +0000
Subject: l2tp: Add support for static unmanaged L2TPv3 tunnels

This patch adds support for static (unmanaged) L2TPv3 tunnels, where
the tunnel socket is created by the kernel rather than being created
by userspace. This means L2TP tunnels and sessions can be created
manually, without needing an L2TP control protocol implemented in
userspace. This might be useful where the user wants a simple ethernet
over IP tunnel.

A patch to iproute2 adds a new command set under "ip l2tp" to make use
of this feature. This will be submitted separately.

Signed-off-by: James Chapman <jchapman@katalix.com>
Reviewed-by: Randy Dunlap <randy.dunlap@oracle.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/l2tp/l2tp_core.c    | 115 +++++++++++++++++++++++++++++++++++++++++++-----
 net/l2tp/l2tp_core.h    |   7 +++
 net/l2tp/l2tp_netlink.c |  18 ++++++--
 3 files changed, 126 insertions(+), 14 deletions(-)

(limited to 'net')

diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c
index 473cf2d63905..13ed85baf4e9 100644
--- a/net/l2tp/l2tp_core.c
+++ b/net/l2tp/l2tp_core.c
@@ -1218,6 +1218,82 @@ void l2tp_tunnel_free(struct l2tp_tunnel *tunnel)
 }
 EXPORT_SYMBOL_GPL(l2tp_tunnel_free);
 
+/* Create a socket for the tunnel, if one isn't set up by
+ * userspace. This is used for static tunnels where there is no
+ * managing L2TP daemon.
+ */
+static int l2tp_tunnel_sock_create(u32 tunnel_id, u32 peer_tunnel_id, struct l2tp_tunnel_cfg *cfg, struct socket **sockp)
+{
+	int err = -EINVAL;
+	struct sockaddr_in udp_addr;
+	struct sockaddr_l2tpip ip_addr;
+	struct socket *sock;
+
+	switch (cfg->encap) {
+	case L2TP_ENCAPTYPE_UDP:
+		err = sock_create(AF_INET, SOCK_DGRAM, 0, sockp);
+		if (err < 0)
+			goto out;
+
+		sock = *sockp;
+
+		memset(&udp_addr, 0, sizeof(udp_addr));
+		udp_addr.sin_family = AF_INET;
+		udp_addr.sin_addr = cfg->local_ip;
+		udp_addr.sin_port = htons(cfg->local_udp_port);
+		err = kernel_bind(sock, (struct sockaddr *) &udp_addr, sizeof(udp_addr));
+		if (err < 0)
+			goto out;
+
+		udp_addr.sin_family = AF_INET;
+		udp_addr.sin_addr = cfg->peer_ip;
+		udp_addr.sin_port = htons(cfg->peer_udp_port);
+		err = kernel_connect(sock, (struct sockaddr *) &udp_addr, sizeof(udp_addr), 0);
+		if (err < 0)
+			goto out;
+
+		if (!cfg->use_udp_checksums)
+			sock->sk->sk_no_check = UDP_CSUM_NOXMIT;
+
+		break;
+
+	case L2TP_ENCAPTYPE_IP:
+		err = sock_create(AF_INET, SOCK_DGRAM, IPPROTO_L2TP, sockp);
+		if (err < 0)
+			goto out;
+
+		sock = *sockp;
+
+		memset(&ip_addr, 0, sizeof(ip_addr));
+		ip_addr.l2tp_family = AF_INET;
+		ip_addr.l2tp_addr = cfg->local_ip;
+		ip_addr.l2tp_conn_id = tunnel_id;
+		err = kernel_bind(sock, (struct sockaddr *) &ip_addr, sizeof(ip_addr));
+		if (err < 0)
+			goto out;
+
+		ip_addr.l2tp_family = AF_INET;
+		ip_addr.l2tp_addr = cfg->peer_ip;
+		ip_addr.l2tp_conn_id = peer_tunnel_id;
+		err = kernel_connect(sock, (struct sockaddr *) &ip_addr, sizeof(ip_addr), 0);
+		if (err < 0)
+			goto out;
+
+		break;
+
+	default:
+		goto out;
+	}
+
+out:
+	if ((err < 0) && sock) {
+		sock_release(sock);
+		*sockp = NULL;
+	}
+
+	return err;
+}
+
 int l2tp_tunnel_create(struct net *net, int fd, int version, u32 tunnel_id, u32 peer_tunnel_id, struct l2tp_tunnel_cfg *cfg, struct l2tp_tunnel **tunnelp)
 {
 	struct l2tp_tunnel *tunnel = NULL;
@@ -1228,14 +1304,21 @@ int l2tp_tunnel_create(struct net *net, int fd, int version, u32 tunnel_id, u32
 	enum l2tp_encap_type encap = L2TP_ENCAPTYPE_UDP;
 
 	/* Get the tunnel socket from the fd, which was opened by
-	 * the userspace L2TP daemon.
+	 * the userspace L2TP daemon. If not specified, create a
+	 * kernel socket.
 	 */
-	err = -EBADF;
-	sock = sockfd_lookup(fd, &err);
-	if (!sock) {
-		printk(KERN_ERR "tunl %hu: sockfd_lookup(fd=%d) returned %d\n",
-		       tunnel_id, fd, err);
-		goto err;
+	if (fd < 0) {
+		err = l2tp_tunnel_sock_create(tunnel_id, peer_tunnel_id, cfg, &sock);
+		if (err < 0)
+			goto err;
+	} else {
+		err = -EBADF;
+		sock = sockfd_lookup(fd, &err);
+		if (!sock) {
+			printk(KERN_ERR "tunl %hu: sockfd_lookup(fd=%d) returned %d\n",
+			       tunnel_id, fd, err);
+			goto err;
+		}
 	}
 
 	sk = sock->sk;
@@ -1329,7 +1412,10 @@ err:
 	if (tunnelp)
 		*tunnelp = tunnel;
 
-	if (sock)
+	/* If tunnel's socket was created by the kernel, it doesn't
+	 *  have a file.
+	 */
+	if (sock && sock->file)
 		sockfd_put(sock);
 
 	return err;
@@ -1341,13 +1427,22 @@ EXPORT_SYMBOL_GPL(l2tp_tunnel_create);
 int l2tp_tunnel_delete(struct l2tp_tunnel *tunnel)
 {
 	int err = 0;
+	struct socket *sock = tunnel->sock ? tunnel->sock->sk_socket : NULL;
 
 	/* Force the tunnel socket to close. This will eventually
 	 * cause the tunnel to be deleted via the normal socket close
 	 * mechanisms when userspace closes the tunnel socket.
 	 */
-	if ((tunnel->sock != NULL) && (tunnel->sock->sk_socket != NULL))
-		err = inet_shutdown(tunnel->sock->sk_socket, 2);
+	if (sock != NULL) {
+		err = inet_shutdown(sock, 2);
+
+		/* If the tunnel's socket was created by the kernel,
+		 * close the socket here since the socket was not
+		 * created by userspace.
+		 */
+		if (sock->file == NULL)
+			err = inet_release(sock);
+	}
 
 	return err;
 }
diff --git a/net/l2tp/l2tp_core.h b/net/l2tp/l2tp_core.h
index 571335530c6f..a961c77e0867 100644
--- a/net/l2tp/l2tp_core.h
+++ b/net/l2tp/l2tp_core.h
@@ -146,6 +146,13 @@ struct l2tp_tunnel_cfg {
 	int			debug;		/* bitmask of debug message
 						 * categories */
 	enum l2tp_encap_type	encap;
+
+	/* Used only for kernel-created sockets */
+	struct in_addr		local_ip;
+	struct in_addr		peer_ip;
+	u16			local_udp_port;
+	u16			peer_udp_port;
+	int			use_udp_checksums:1;
 };
 
 struct l2tp_tunnel {
diff --git a/net/l2tp/l2tp_netlink.c b/net/l2tp/l2tp_netlink.c
index 3d0f7f6f7488..12341a6cc70e 100644
--- a/net/l2tp/l2tp_netlink.c
+++ b/net/l2tp/l2tp_netlink.c
@@ -129,11 +129,21 @@ static int l2tp_nl_cmd_tunnel_create(struct sk_buff *skb, struct genl_info *info
 	}
 	cfg.encap = nla_get_u16(info->attrs[L2TP_ATTR_ENCAP_TYPE]);
 
-	if (!info->attrs[L2TP_ATTR_FD]) {
-		ret = -EINVAL;
-		goto out;
+	fd = -1;
+	if (info->attrs[L2TP_ATTR_FD]) {
+		fd = nla_get_u32(info->attrs[L2TP_ATTR_FD]);
+	} else {
+		if (info->attrs[L2TP_ATTR_IP_SADDR])
+			cfg.local_ip.s_addr = nla_get_be32(info->attrs[L2TP_ATTR_IP_SADDR]);
+		if (info->attrs[L2TP_ATTR_IP_DADDR])
+			cfg.peer_ip.s_addr = nla_get_be32(info->attrs[L2TP_ATTR_IP_DADDR]);
+		if (info->attrs[L2TP_ATTR_UDP_SPORT])
+			cfg.local_udp_port = nla_get_u16(info->attrs[L2TP_ATTR_UDP_SPORT]);
+		if (info->attrs[L2TP_ATTR_UDP_DPORT])
+			cfg.peer_udp_port = nla_get_u16(info->attrs[L2TP_ATTR_UDP_DPORT]);
+		if (info->attrs[L2TP_ATTR_UDP_CSUM])
+			cfg.use_udp_checksums = nla_get_flag(info->attrs[L2TP_ATTR_UDP_CSUM]);
 	}
-	fd = nla_get_u32(info->attrs[L2TP_ATTR_FD]);
 
 	if (info->attrs[L2TP_ATTR_DEBUG])
 		cfg.debug = nla_get_u32(info->attrs[L2TP_ATTR_DEBUG]);
-- 
cgit v1.2.2


From f481c0d86227156fb1691b166a11c3f0058e1cb2 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Sat, 3 Apr 2010 14:58:07 -0700
Subject: l2tp: Add missing semicolon to MODULE_ALIAS() in l2tp_netlink.c

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/l2tp/l2tp_netlink.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/l2tp/l2tp_netlink.c b/net/l2tp/l2tp_netlink.c
index 12341a6cc70e..4c1e540732d7 100644
--- a/net/l2tp/l2tp_netlink.c
+++ b/net/l2tp/l2tp_netlink.c
@@ -837,4 +837,4 @@ MODULE_DESCRIPTION("L2TP netlink");
 MODULE_LICENSE("GPL");
 MODULE_VERSION("1.0");
 MODULE_ALIAS("net-pf-" __stringify(PF_NETLINK) "-proto-" \
-	     __stringify(NETLINK_GENERIC) "-type-" "l2tp")
+	     __stringify(NETLINK_GENERIC) "-type-" "l2tp");
-- 
cgit v1.2.2


From f66ef2d0649b220874532dfb83b6b5b368f83591 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Sat, 3 Apr 2010 15:01:37 -0700
Subject: l2tp: Fix L2TP_DEBUGFS ifdef tests.

We have to check CONFIG_L2TP_DEBUGFS_MODULE as well as
CONFIG_L2TP_DEBUGFS.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/l2tp/l2tp_core.h | 2 +-
 net/l2tp/l2tp_eth.c  | 4 ++--
 net/l2tp/l2tp_ppp.c  | 4 ++--
 3 files changed, 5 insertions(+), 5 deletions(-)

(limited to 'net')

diff --git a/net/l2tp/l2tp_core.h b/net/l2tp/l2tp_core.h
index a961c77e0867..91b1b9ca5cfa 100644
--- a/net/l2tp/l2tp_core.h
+++ b/net/l2tp/l2tp_core.h
@@ -133,7 +133,7 @@ struct l2tp_session {
 	void (*session_close)(struct l2tp_session *session);
 	void (*ref)(struct l2tp_session *session);
 	void (*deref)(struct l2tp_session *session);
-#ifdef CONFIG_L2TP_DEBUGFS
+#if defined(CONFIG_L2TP_DEBUGFS) || defined(CONFIG_L2TP_DEBUGFS_MODULE)
 	void (*show)(struct seq_file *m, void *priv);
 #endif
 	uint8_t			priv[0];	/* private data */
diff --git a/net/l2tp/l2tp_eth.c b/net/l2tp/l2tp_eth.c
index 9848faa3d163..ca1164afeb74 100644
--- a/net/l2tp/l2tp_eth.c
+++ b/net/l2tp/l2tp_eth.c
@@ -172,7 +172,7 @@ static void l2tp_eth_delete(struct l2tp_session *session)
 	}
 }
 
-#ifdef CONFIG_L2TP_DEBUGFS
+#if defined(CONFIG_L2TP_DEBUGFS) || defined(CONFIG_L2TP_DEBUGFS_MODULE)
 static void l2tp_eth_show(struct seq_file *m, void *arg)
 {
 	struct l2tp_session *session = arg;
@@ -244,7 +244,7 @@ static int l2tp_eth_create(struct net *net, u32 tunnel_id, u32 session_id, u32 p
 	priv->tunnel_sock = tunnel->sock;
 	session->recv_skb = l2tp_eth_dev_recv;
 	session->session_close = l2tp_eth_delete;
-#ifdef CONFIG_L2TP_DEBUGFS
+#if defined(CONFIG_L2TP_DEBUGFS) || defined(CONFIG_L2TP_DEBUGFS_MODULE)
 	session->show = l2tp_eth_show;
 #endif
 
diff --git a/net/l2tp/l2tp_ppp.c b/net/l2tp/l2tp_ppp.c
index 1ef10e4118d2..90d82b3f2889 100644
--- a/net/l2tp/l2tp_ppp.c
+++ b/net/l2tp/l2tp_ppp.c
@@ -597,7 +597,7 @@ out:
 	return error;
 }
 
-#ifdef CONFIG_L2TP_DEBUGFS
+#if defined(CONFIG_L2TP_DEBUGFS) || defined(CONFIG_L2TP_DEBUGFS_MODULE)
 static void pppol2tp_show(struct seq_file *m, void *arg)
 {
 	struct l2tp_session *session = arg;
@@ -748,7 +748,7 @@ static int pppol2tp_connect(struct socket *sock, struct sockaddr *uservaddr,
 
 	session->recv_skb	= pppol2tp_recv;
 	session->session_close	= pppol2tp_session_close;
-#ifdef CONFIG_L2TP_DEBUGFS
+#if defined(CONFIG_L2TP_DEBUGFS) || defined(CONFIG_L2TP_DEBUGFS_MODULE)
 	session->show		= pppol2tp_show;
 #endif
 
-- 
cgit v1.2.2


From 1f8438a853667d48055ad38384c63e94b32c6578 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Sat, 3 Apr 2010 15:09:04 -0700
Subject: icmp: Account for ICMP out errors

When ip_append() fails because of socket limit or memory shortage,
increment ICMP_MIB_OUTERRORS counter, so that "netstat -s" can report
these errors.

LANG=C netstat -s | grep "ICMP messages failed"
    0 ICMP messages failed

For IPV6, implement ICMP6_MIB_OUTERRORS counter as well.

# grep Icmp6OutErrors /proc/net/dev_snmp6/*
/proc/net/dev_snmp6/eth0:Icmp6OutErrors                   	0
/proc/net/dev_snmp6/lo:Icmp6OutErrors                   	0

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/icmp.c | 5 +++--
 net/ipv6/icmp.c | 2 ++
 net/ipv6/proc.c | 1 +
 3 files changed, 6 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c
index 4b4c2bcd15db..d2aa7438c523 100644
--- a/net/ipv4/icmp.c
+++ b/net/ipv4/icmp.c
@@ -330,9 +330,10 @@ static void icmp_push_reply(struct icmp_bxm *icmp_param,
 	if (ip_append_data(sk, icmp_glue_bits, icmp_param,
 			   icmp_param->data_len+icmp_param->head_len,
 			   icmp_param->head_len,
-			   ipc, rt, MSG_DONTWAIT) < 0)
+			   ipc, rt, MSG_DONTWAIT) < 0) {
+		ICMP_INC_STATS_BH(sock_net(sk), ICMP_MIB_OUTERRORS);
 		ip_flush_pending_frames(sk);
-	else if ((skb = skb_peek(&sk->sk_write_queue)) != NULL) {
+	} else if ((skb = skb_peek(&sk->sk_write_queue)) != NULL) {
 		struct icmphdr *icmph = icmp_hdr(skb);
 		__wsum csum = 0;
 		struct sk_buff *skb1;
diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c
index eb9abe24bdf0..a00c18aa6c8d 100644
--- a/net/ipv6/icmp.c
+++ b/net/ipv6/icmp.c
@@ -482,6 +482,7 @@ route_done:
 			      np->tclass, NULL, &fl, (struct rt6_info*)dst,
 			      MSG_DONTWAIT);
 	if (err) {
+		ICMP6_INC_STATS_BH(net, idev, ICMP6_MIB_OUTMSGS);
 		ip6_flush_pending_frames(sk);
 		goto out_put;
 	}
@@ -562,6 +563,7 @@ static void icmpv6_echo_reply(struct sk_buff *skb)
 				(struct rt6_info*)dst, MSG_DONTWAIT);
 
 	if (err) {
+		ICMP6_INC_STATS_BH(net, idev, ICMP6_MIB_OUTMSGS);
 		ip6_flush_pending_frames(sk);
 		goto out_put;
 	}
diff --git a/net/ipv6/proc.c b/net/ipv6/proc.c
index 58344c0fbd13..458eabfbe130 100644
--- a/net/ipv6/proc.c
+++ b/net/ipv6/proc.c
@@ -97,6 +97,7 @@ static const struct snmp_mib snmp6_icmp6_list[] = {
 	SNMP_MIB_ITEM("Icmp6InMsgs", ICMP6_MIB_INMSGS),
 	SNMP_MIB_ITEM("Icmp6InErrors", ICMP6_MIB_INERRORS),
 	SNMP_MIB_ITEM("Icmp6OutMsgs", ICMP6_MIB_OUTMSGS),
+	SNMP_MIB_ITEM("Icmp6OutErrors", ICMP6_MIB_OUTERRORS),
 	SNMP_MIB_SENTINEL
 };
 
-- 
cgit v1.2.2


From 486f50ca796a2572c42c34dd4378cdc8eeb0b137 Mon Sep 17 00:00:00 2001
From: Brian Haley <brian.haley@hp.com>
Date: Sat, 3 Apr 2010 15:10:21 -0700
Subject: SCTP: Change to use ipv6_addr_copy()

Change SCTP IPv6 code to use ipv6_addr_copy()

Signed-off-by: Brian Haley <brian.haley@hp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sctp/ipv6.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'net')

diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c
index 216d88f27236..db1c767e509e 100644
--- a/net/sctp/ipv6.c
+++ b/net/sctp/ipv6.c
@@ -364,7 +364,7 @@ static void sctp_v6_copy_addrlist(struct list_head *addrlist,
 		if (addr) {
 			addr->a.v6.sin6_family = AF_INET6;
 			addr->a.v6.sin6_port = 0;
-			addr->a.v6.sin6_addr = ifp->addr;
+			ipv6_addr_copy(&addr->a.v6.sin6_addr, &ifp->addr);
 			addr->a.v6.sin6_scope_id = dev->ifindex;
 			addr->valid = 1;
 			INIT_LIST_HEAD(&addr->list);
@@ -405,7 +405,7 @@ static void sctp_v6_from_sk(union sctp_addr *addr, struct sock *sk)
 {
 	addr->v6.sin6_family = AF_INET6;
 	addr->v6.sin6_port = 0;
-	addr->v6.sin6_addr = inet6_sk(sk)->rcv_saddr;
+	ipv6_addr_copy(&addr->v6.sin6_addr, &inet6_sk(sk)->rcv_saddr);
 }
 
 /* Initialize sk->sk_rcv_saddr from sctp_addr. */
@@ -418,7 +418,7 @@ static void sctp_v6_to_sk_saddr(union sctp_addr *addr, struct sock *sk)
 		inet6_sk(sk)->rcv_saddr.s6_addr32[3] =
 			addr->v4.sin_addr.s_addr;
 	} else {
-		inet6_sk(sk)->rcv_saddr = addr->v6.sin6_addr;
+		ipv6_addr_copy(&inet6_sk(sk)->rcv_saddr, &addr->v6.sin6_addr);
 	}
 }
 
@@ -431,7 +431,7 @@ static void sctp_v6_to_sk_daddr(union sctp_addr *addr, struct sock *sk)
 		inet6_sk(sk)->daddr.s6_addr32[2] = htonl(0x0000ffff);
 		inet6_sk(sk)->daddr.s6_addr32[3] = addr->v4.sin_addr.s_addr;
 	} else {
-		inet6_sk(sk)->daddr = addr->v6.sin6_addr;
+		ipv6_addr_copy(&inet6_sk(sk)->daddr, &addr->v6.sin6_addr);
 	}
 }
 
-- 
cgit v1.2.2


From 7bddd0db6248d92adb1f547fd45507af4368d6fa Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Sun, 4 Apr 2010 01:02:46 -0700
Subject: l2tp: unmanaged L2TPv3 tunnels fixes

Followup to commit 789a4a2c
(l2tp: Add support for static unmanaged L2TPv3 tunnels)

One missing init in l2tp_tunnel_sock_create() could access random kernel
memory, and a bit field should be unsigned.

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/l2tp/l2tp_core.c | 2 +-
 net/l2tp/l2tp_core.h | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c
index 13ed85baf4e9..98dfcce1a5fc 100644
--- a/net/l2tp/l2tp_core.c
+++ b/net/l2tp/l2tp_core.c
@@ -1227,7 +1227,7 @@ static int l2tp_tunnel_sock_create(u32 tunnel_id, u32 peer_tunnel_id, struct l2t
 	int err = -EINVAL;
 	struct sockaddr_in udp_addr;
 	struct sockaddr_l2tpip ip_addr;
-	struct socket *sock;
+	struct socket *sock = NULL;
 
 	switch (cfg->encap) {
 	case L2TP_ENCAPTYPE_UDP:
diff --git a/net/l2tp/l2tp_core.h b/net/l2tp/l2tp_core.h
index 91b1b9ca5cfa..f0f318edd3f1 100644
--- a/net/l2tp/l2tp_core.h
+++ b/net/l2tp/l2tp_core.h
@@ -152,7 +152,7 @@ struct l2tp_tunnel_cfg {
 	struct in_addr		peer_ip;
 	u16			local_udp_port;
 	u16			peer_udp_port;
-	int			use_udp_checksums:1;
+	unsigned int		use_udp_checksums:1;
 };
 
 struct l2tp_tunnel {
-- 
cgit v1.2.2


From 6d96d3ab7aea5f0e75205a0c97f8d1fdf82c5287 Mon Sep 17 00:00:00 2001
From: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>
Date: Mon, 29 Mar 2010 18:13:59 -0500
Subject: 9p: Make sure we are able to clunk the cached fid on umount

dcache prune happen on umount. So we cannot mark the client
satus disconnect. That will prevent a 9p call to the server

Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
Signed-off-by: Eric Van Hensbergen <ericvh@gmail.com>
---
 net/9p/client.c | 18 ++++++++++++++++--
 1 file changed, 16 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/9p/client.c b/net/9p/client.c
index e3e5bf4469ce..a037a29f3f0e 100644
--- a/net/9p/client.c
+++ b/net/9p/client.c
@@ -533,7 +533,12 @@ p9_client_rpc(struct p9_client *c, int8_t type, const char *fmt, ...)
 
 	P9_DPRINTK(P9_DEBUG_MUX, "client %p op %d\n", c, type);
 
-	if (c->status != Connected)
+	/* we allow for any status other than disconnected */
+	if (c->status == Disconnected)
+		return ERR_PTR(-EIO);
+
+	/* if status is begin_disconnected we allow only clunk request */
+	if ((c->status == BeginDisconnect) && (type != P9_TCLUNK))
 		return ERR_PTR(-EIO);
 
 	if (signal_pending(current)) {
@@ -799,8 +804,10 @@ void p9_client_destroy(struct p9_client *clnt)
 
 	v9fs_put_trans(clnt->trans_mod);
 
-	list_for_each_entry_safe(fid, fidptr, &clnt->fidlist, flist)
+	list_for_each_entry_safe(fid, fidptr, &clnt->fidlist, flist) {
+		printk(KERN_INFO "Found fid %d not clunked\n", fid->fid);
 		p9_fid_destroy(fid);
+	}
 
 	if (clnt->fidpool)
 		p9_idpool_destroy(clnt->fidpool);
@@ -818,6 +825,13 @@ void p9_client_disconnect(struct p9_client *clnt)
 }
 EXPORT_SYMBOL(p9_client_disconnect);
 
+void p9_client_begin_disconnect(struct p9_client *clnt)
+{
+	P9_DPRINTK(P9_DEBUG_9P, "clnt %p\n", clnt);
+	clnt->status = BeginDisconnect;
+}
+EXPORT_SYMBOL(p9_client_begin_disconnect);
+
 struct p9_fid *p9_client_attach(struct p9_client *clnt, struct p9_fid *afid,
 	char *uname, u32 n_uname, char *aname)
 {
-- 
cgit v1.2.2


From bade732a2848e640482c1e99629a90e085d574dd Mon Sep 17 00:00:00 2001
From: Tom Tucker <tom@ogc.us>
Date: Sat, 3 Apr 2010 08:27:29 -0500
Subject: svcrdma: RDMA support not yet compatible with RPC6

RPC6 requires that it be possible to create endpoints that listen
exclusively for IPv4 or IPv6 connection requests. This is not currently
supported by the RDMA API.

This fixes a server RDMA regression introduced by 37498292a "NFSD:
Create PF_INET6 listener in write_ports".

Signed-off-by: Tom Tucker<tom@opengridcomputing.com>
Tested-by: Steve Wise <swise@opengridcomputing.com>
Reviewed-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
---
 net/sunrpc/xprtrdma/svc_rdma_transport.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/sunrpc/xprtrdma/svc_rdma_transport.c b/net/sunrpc/xprtrdma/svc_rdma_transport.c
index 3fa5751af0ec..4e6bbf91a570 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_transport.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_transport.c
@@ -678,7 +678,10 @@ static struct svc_xprt *svc_rdma_create(struct svc_serv *serv,
 	int ret;
 
 	dprintk("svcrdma: Creating RDMA socket\n");
-
+	if (sa->sa_family != AF_INET) {
+		dprintk("svcrdma: Address family %d is not supported.\n", sa->sa_family);
+		return ERR_PTR(-EAFNOSUPPORT);
+	}
 	cma_xprt = rdma_create_xprt(serv, 1);
 	if (!cma_xprt)
 		return ERR_PTR(-ENOMEM);
-- 
cgit v1.2.2


From 3dc9fef67f6292692dba181a6d0fd0211bd0a607 Mon Sep 17 00:00:00 2001
From: Dan Carpenter <error27@gmail.com>
Date: Mon, 5 Apr 2010 14:37:28 -0500
Subject: 9p: saving negative to unsigned char

Saving -EINVAL as unsigned char truncates the high bits and changes it
into 234 instead of -22.  This breaks the test for "if (ret == -EINVAL)"
in parse_opts().

Signed-off-by: Dan Carpenter <error27@gmail.com>
Signed-off-by: Eric Van Hensbergen <ericvh@gmail.com>
---
 net/9p/client.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/9p/client.c b/net/9p/client.c
index a037a29f3f0e..20a33194d66e 100644
--- a/net/9p/client.c
+++ b/net/9p/client.c
@@ -71,9 +71,10 @@ inline int p9_is_proto_dotu(struct p9_client *clnt)
 EXPORT_SYMBOL(p9_is_proto_dotu);
 
 /* Interpret mount option for protocol version */
-static unsigned char get_protocol_version(const substring_t *name)
+static int get_protocol_version(const substring_t *name)
 {
-	unsigned char version = -EINVAL;
+	int version = -EINVAL;
+
 	if (!strncmp("9p2000", name->from, name->to-name->from)) {
 		version = p9_proto_legacy;
 		P9_DPRINTK(P9_DEBUG_9P, "Protocol version: Legacy\n");
-- 
cgit v1.2.2


From 5a6d234e73d7d021c74e1aa349b3b37b81372c66 Mon Sep 17 00:00:00 2001
From: Tom Herbert <therbert@google.com>
Date: Mon, 5 Apr 2010 14:37:19 -0700
Subject: rps: fixed missed rps_unlock

Fix spin_unlock_irq which needs to be rps_unlock.

Signed-off-by: Tom Herbert <therbert@google.com>
Acked-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/dev.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/core/dev.c b/net/core/dev.c
index 2a9b7dd0bb6e..74f77ca03349 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -3120,7 +3120,7 @@ static int process_backlog(struct napi_struct *napi, int quota)
 		skb = __skb_dequeue(&queue->input_pkt_queue);
 		if (!skb) {
 			__napi_complete(napi);
-			spin_unlock_irq(&queue->input_pkt_queue.lock);
+			rps_unlock(queue);
 			break;
 		}
 		rps_unlock(queue);
-- 
cgit v1.2.2


From e4008276fddd10445ff06707694a938cb7f35ed4 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Mon, 5 Apr 2010 15:42:39 -0700
Subject: net: Add a missing local_irq_enable()

As noticed by Changli Gao, we must call local_irq_enable() after
rps_unlock()

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/dev.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'net')

diff --git a/net/core/dev.c b/net/core/dev.c
index 74f77ca03349..b98ddc62a55d 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -3121,6 +3121,7 @@ static int process_backlog(struct napi_struct *napi, int quota)
 		if (!skb) {
 			__napi_complete(napi);
 			rps_unlock(queue);
+			local_irq_enable();
 			break;
 		}
 		rps_unlock(queue);
-- 
cgit v1.2.2


From 2f787b0b76bf5de2eaa3ca3a29d89123ae03c856 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?YOSHIFUJI=20Hideaki=20/=20=E5=90=89=E8=97=A4=E8=8B=B1?=
 =?UTF-8?q?=E6=98=8E?= <yoshfuji@linux-ipv6.org>
Date: Sun, 4 Apr 2010 17:59:30 +0000
Subject: mac80211: Ensure initializing private mc_list in prepare_multicast().

Fix kernel panic by NULL pointer dereference in the context of
ieee80211_ops->prepare_multicast().

This bug was introduced by commit 22bedad3c.. ("net: convert
multicast list to list_head").

Call __hw_addr_init() in ieee80211_alloc_hw() to initialize
list_head of private device multicast list, like we do in
bond_init().

Signed-off-by: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
Reviewed-by: Jiri Pirko <jpirko@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/mac80211/main.c | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'net')

diff --git a/net/mac80211/main.c b/net/mac80211/main.c
index 84ad249a4e2e..0b82cd2f781e 100644
--- a/net/mac80211/main.c
+++ b/net/mac80211/main.c
@@ -388,6 +388,9 @@ struct ieee80211_hw *ieee80211_alloc_hw(size_t priv_data_len,
 	local->uapsd_max_sp_len = IEEE80211_DEFAULT_MAX_SP_LEN;
 
 	INIT_LIST_HEAD(&local->interfaces);
+
+	__hw_addr_init(&local->mc_list);
+
 	mutex_init(&local->iflist_mtx);
 	mutex_init(&local->scan_mtx);
 
-- 
cgit v1.2.2


From 1cb561f83793191cf86a2db3948d28f5f42df9ff Mon Sep 17 00:00:00 2001
From: Javier Cardona <javier@cozybit.com>
Date: Mon, 29 Mar 2010 11:00:20 -0700
Subject: mac80211: Handle mesh action frames in ieee80211_rx_h_action

This fixes the problem introduced in commit
8404080568613d93ad7cf0a16dfb68 which broke mesh peer link establishment.

changes:
v2 	Added missing break (Johannes)
v3 	Broke original patch into two (Johannes)

Signed-off-by: Javier Cardona <javier@cozybit.com>
Cc: stable@kernel.org
Reviewed-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/mesh.c | 3 ---
 net/mac80211/rx.c   | 5 +++++
 2 files changed, 5 insertions(+), 3 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/mesh.c b/net/mac80211/mesh.c
index 61080c5fad50..7a6bebce7f2f 100644
--- a/net/mac80211/mesh.c
+++ b/net/mac80211/mesh.c
@@ -749,9 +749,6 @@ ieee80211_mesh_rx_mgmt(struct ieee80211_sub_if_data *sdata, struct sk_buff *skb)
 
 	switch (fc & IEEE80211_FCTL_STYPE) {
 	case IEEE80211_STYPE_ACTION:
-		if (skb->len < IEEE80211_MIN_ACTION_SIZE)
-			return RX_DROP_MONITOR;
-		/* fall through */
 	case IEEE80211_STYPE_PROBE_RESP:
 	case IEEE80211_STYPE_BEACON:
 		skb_queue_tail(&ifmsh->skb_queue, skb);
diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c
index b5c48de81d8b..13fcd2d17c6b 100644
--- a/net/mac80211/rx.c
+++ b/net/mac80211/rx.c
@@ -1973,6 +1973,11 @@ ieee80211_rx_h_action(struct ieee80211_rx_data *rx)
 			goto handled;
 		}
 		break;
+	case MESH_PLINK_CATEGORY:
+	case MESH_PATH_SEL_CATEGORY:
+		if (ieee80211_vif_is_mesh(&sdata->vif))
+			return ieee80211_mesh_rx_mgmt(sdata, rx->skb);
+		break;
 	}
 
 	/*
-- 
cgit v1.2.2


From 0379185b6c0d1e8252023698cf1091da92a3dc03 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes@sipsolutions.net>
Date: Tue, 6 Apr 2010 11:18:42 +0200
Subject: mac80211: annotate station rcu dereferences

The new RCU lockdep support warns about these
in some contexts -- make it aware of the locks
used to protect all this. Different locks are
used in different contexts which unfortunately
means we can't get perfect checking.

Also remove rcu_dereference() from two places
that don't actually dereference the pointers.

Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/main.c     |  4 ++--
 net/mac80211/sta_info.c | 20 ++++++++++++++++----
 2 files changed, 18 insertions(+), 6 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/main.c b/net/mac80211/main.c
index 06c33b68d8e5..b887e484ae04 100644
--- a/net/mac80211/main.c
+++ b/net/mac80211/main.c
@@ -225,11 +225,11 @@ void ieee80211_bss_info_change_notify(struct ieee80211_sub_if_data *sdata,
 			switch (sdata->vif.type) {
 			case NL80211_IFTYPE_AP:
 				sdata->vif.bss_conf.enable_beacon =
-					!!rcu_dereference(sdata->u.ap.beacon);
+					!!sdata->u.ap.beacon;
 				break;
 			case NL80211_IFTYPE_ADHOC:
 				sdata->vif.bss_conf.enable_beacon =
-					!!rcu_dereference(sdata->u.ibss.presp);
+					!!sdata->u.ibss.presp;
 				break;
 			case NL80211_IFTYPE_MESH_POINT:
 				sdata->vif.bss_conf.enable_beacon = true;
diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c
index 56422d894351..fb12cec4d333 100644
--- a/net/mac80211/sta_info.c
+++ b/net/mac80211/sta_info.c
@@ -93,12 +93,18 @@ struct sta_info *sta_info_get(struct ieee80211_sub_if_data *sdata,
 	struct ieee80211_local *local = sdata->local;
 	struct sta_info *sta;
 
-	sta = rcu_dereference(local->sta_hash[STA_HASH(addr)]);
+	sta = rcu_dereference_check(local->sta_hash[STA_HASH(addr)],
+				    rcu_read_lock_held() ||
+				    lockdep_is_held(&local->sta_lock) ||
+				    lockdep_is_held(&local->sta_mtx));
 	while (sta) {
 		if (sta->sdata == sdata &&
 		    memcmp(sta->sta.addr, addr, ETH_ALEN) == 0)
 			break;
-		sta = rcu_dereference(sta->hnext);
+		sta = rcu_dereference_check(sta->hnext,
+					    rcu_read_lock_held() ||
+					    lockdep_is_held(&local->sta_lock) ||
+					    lockdep_is_held(&local->sta_mtx));
 	}
 	return sta;
 }
@@ -113,13 +119,19 @@ struct sta_info *sta_info_get_bss(struct ieee80211_sub_if_data *sdata,
 	struct ieee80211_local *local = sdata->local;
 	struct sta_info *sta;
 
-	sta = rcu_dereference(local->sta_hash[STA_HASH(addr)]);
+	sta = rcu_dereference_check(local->sta_hash[STA_HASH(addr)],
+				    rcu_read_lock_held() ||
+				    lockdep_is_held(&local->sta_lock) ||
+				    lockdep_is_held(&local->sta_mtx));
 	while (sta) {
 		if ((sta->sdata == sdata ||
 		     sta->sdata->bss == sdata->bss) &&
 		    memcmp(sta->sta.addr, addr, ETH_ALEN) == 0)
 			break;
-		sta = rcu_dereference(sta->hnext);
+		sta = rcu_dereference_check(sta->hnext,
+					    rcu_read_lock_held() ||
+					    lockdep_is_held(&local->sta_lock) ||
+					    lockdep_is_held(&local->sta_mtx));
 	}
 	return sta;
 }
-- 
cgit v1.2.2


From d211e90e28a074447584729018a39910d691d1a8 Mon Sep 17 00:00:00 2001
From: Jouni Malinen <j@w1.fi>
Date: Sun, 28 Mar 2010 22:29:52 -0700
Subject: mac80211: Fix robust management frame handling (MFP)

Commit e34e09401ee9888dd662b2fca5d607794a56daf2 incorrectly removed
use of ieee80211_has_protected() from the management frame case and in
practice, made this validation drop all Action frames when MFP is
enabled. This should have only been done for frames with Protected
field set to zero.

Signed-off-by: Jouni Malinen <j@w1.fi>
Cc: stable@kernel.org
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/rx.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c
index 14366d4afbed..b83d4db6ca6a 100644
--- a/net/mac80211/rx.c
+++ b/net/mac80211/rx.c
@@ -1419,7 +1419,8 @@ ieee80211_drop_unencrypted_mgmt(struct ieee80211_rx_data *rx)
 		return 0;
 
 	if (rx->sta && test_sta_flags(rx->sta, WLAN_STA_MFP)) {
-		if (unlikely(ieee80211_is_unicast_robust_mgmt_frame(rx->skb) &&
+		if (unlikely(!ieee80211_has_protected(fc) &&
+			     ieee80211_is_unicast_robust_mgmt_frame(rx->skb) &&
 			     rx->key))
 			return -EACCES;
 		/* BIP does not use Protected field, so need to check MMIE */
-- 
cgit v1.2.2


From c6537d6742985da1fbf12ae26cde6a096fd35b5c Mon Sep 17 00:00:00 2001
From: Jon Paul Maloy <jon.maloy@ericsson.com>
Date: Tue, 6 Apr 2010 11:40:52 +0000
Subject: TIPC: Updated topology subscription protocol according to latest spec

This patch makes it explicit in the API that all fields in subscriptions and events exchanged with the Topology Server must be in
network byte order.
It also ensures that all fields of a subscription are compared when cancelling a subscription, in order to avoid inadvertent
cancelling of the wrong subscription.
Finally, the tipc module version is updated to 2.0.0, to reflect the API change.

Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/tipc/core.c   |  2 +-
 net/tipc/subscr.c | 15 ++++++++++-----
 2 files changed, 11 insertions(+), 6 deletions(-)

(limited to 'net')

diff --git a/net/tipc/core.c b/net/tipc/core.c
index 52c571fedbe0..4e84c8431f32 100644
--- a/net/tipc/core.c
+++ b/net/tipc/core.c
@@ -49,7 +49,7 @@
 #include "config.h"
 
 
-#define TIPC_MOD_VER "1.6.4"
+#define TIPC_MOD_VER "2.0.0"
 
 #ifndef CONFIG_TIPC_ZONES
 #define CONFIG_TIPC_ZONES 3
diff --git a/net/tipc/subscr.c b/net/tipc/subscr.c
index ff123e56114a..ab6eab4c45e2 100644
--- a/net/tipc/subscr.c
+++ b/net/tipc/subscr.c
@@ -274,7 +274,7 @@ static void subscr_cancel(struct tipc_subscr *s,
 {
 	struct subscription *sub;
 	struct subscription *sub_temp;
-	__u32 type, lower, upper;
+	__u32 type, lower, upper, timeout, filter;
 	int found = 0;
 
 	/* Find first matching subscription, exit if not found */
@@ -282,12 +282,18 @@ static void subscr_cancel(struct tipc_subscr *s,
 	type = ntohl(s->seq.type);
 	lower = ntohl(s->seq.lower);
 	upper = ntohl(s->seq.upper);
+	timeout = ntohl(s->timeout);
+	filter = ntohl(s->filter) & ~TIPC_SUB_CANCEL;
 
 	list_for_each_entry_safe(sub, sub_temp, &subscriber->subscription_list,
 				 subscription_list) {
 			if ((type == sub->seq.type) &&
 			    (lower == sub->seq.lower) &&
-			    (upper == sub->seq.upper)) {
+			    (upper == sub->seq.upper) &&
+			    (timeout == sub->timeout) &&
+                            (filter == sub->filter) &&
+                             !memcmp(s->usr_handle,sub->evt.s.usr_handle,
+				     sizeof(s->usr_handle)) ){
 				found = 1;
 				break;
 			}
@@ -304,7 +310,7 @@ static void subscr_cancel(struct tipc_subscr *s,
 		k_term_timer(&sub->timer);
 		spin_lock_bh(subscriber->lock);
 	}
-	dbg("Cancel: removing sub %u,%u,%u from subscriber %x list\n",
+	dbg("Cancel: removing sub %u,%u,%u from subscriber %p list\n",
 	    sub->seq.type, sub->seq.lower, sub->seq.upper, subscriber);
 	subscr_del(sub);
 }
@@ -352,8 +358,7 @@ static struct subscription *subscr_subscribe(struct tipc_subscr *s,
 	sub->seq.upper = ntohl(s->seq.upper);
 	sub->timeout = ntohl(s->timeout);
 	sub->filter = ntohl(s->filter);
-	if ((!(sub->filter & TIPC_SUB_PORTS) ==
-	     !(sub->filter & TIPC_SUB_SERVICE)) ||
+	if ((sub->filter && (sub->filter != TIPC_SUB_PORTS)) ||
 	    (sub->seq.lower > sub->seq.upper)) {
 		warn("Subscription rejected, illegal request\n");
 		kfree(sub);
-- 
cgit v1.2.2


From 842509b8591fd9a40f5532a5f049bd29804af6d6 Mon Sep 17 00:00:00 2001
From: Hagen Paul Pfeifer <hagen@jauu.net>
Date: Tue, 6 Apr 2010 05:39:52 +0000
Subject: socket: remove duplicate declaration of struct timespec

struct timespec ts was alreay defined. Reuse the previously
defined one and reduce the memory footprint on the stack by
16 bytes.

Signed-off-by: Hagen Paul Pfeifer <hagen@jauu.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/socket.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

(limited to 'net')

diff --git a/net/socket.c b/net/socket.c
index 769c386bd428..ae904b58d9f5 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -619,10 +619,9 @@ void __sock_recv_timestamp(struct msghdr *msg, struct sock *sk,
 			put_cmsg(msg, SOL_SOCKET, SCM_TIMESTAMP,
 				 sizeof(tv), &tv);
 		} else {
-			struct timespec ts;
-			skb_get_timestampns(skb, &ts);
+			skb_get_timestampns(skb, &ts[0]);
 			put_cmsg(msg, SOL_SOCKET, SCM_TIMESTAMPNS,
-				 sizeof(ts), &ts);
+				 sizeof(ts[0]), &ts[0]);
 		}
 	}
 
-- 
cgit v1.2.2


From fe1a5f031e76bd8761a7803d75b95ee96e84a574 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Timo=20Ter=C3=A4s?= <timo.teras@iki.fi>
Date: Wed, 7 Apr 2010 00:30:04 +0000
Subject: flow: virtualize flow cache entry methods

This allows to validate the cached object before returning it.
It also allows to destruct object properly, if the last reference
was held in flow cache. This is also a prepartion for caching
bundles in the flow cache.

In return for virtualizing the methods, we save on:
- not having to regenerate the whole flow cache on policy removal:
  each flow matching a killed policy gets refreshed as the getter
  function notices it smartly.
- we do not have to call flow_cache_flush from policy gc, since the
  flow cache now properly deletes the object if it had any references

Signed-off-by: Timo Teras <timo.teras@iki.fi>
Acked-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/flow.c        | 128 ++++++++++++++++++++++++++-----------------------
 net/xfrm/xfrm_policy.c | 112 ++++++++++++++++++++++++++++---------------
 2 files changed, 142 insertions(+), 98 deletions(-)

(limited to 'net')

diff --git a/net/core/flow.c b/net/core/flow.c
index 1d27ca6b421d..521df52a77d2 100644
--- a/net/core/flow.c
+++ b/net/core/flow.c
@@ -26,17 +26,16 @@
 #include <linux/security.h>
 
 struct flow_cache_entry {
-	struct flow_cache_entry	*next;
-	u16			family;
-	u8			dir;
-	u32			genid;
-	struct flowi		key;
-	void			*object;
-	atomic_t		*object_ref;
+	struct flow_cache_entry		*next;
+	u16				family;
+	u8				dir;
+	u32				genid;
+	struct flowi			key;
+	struct flow_cache_object	*object;
 };
 
 struct flow_cache_percpu {
-	struct flow_cache_entry **	hash_table;
+	struct flow_cache_entry		**hash_table;
 	int				hash_count;
 	u32				hash_rnd;
 	int				hash_rnd_recalc;
@@ -44,7 +43,7 @@ struct flow_cache_percpu {
 };
 
 struct flow_flush_info {
-	struct flow_cache *		cache;
+	struct flow_cache		*cache;
 	atomic_t			cpuleft;
 	struct completion		completion;
 };
@@ -52,7 +51,7 @@ struct flow_flush_info {
 struct flow_cache {
 	u32				hash_shift;
 	unsigned long			order;
-	struct flow_cache_percpu *	percpu;
+	struct flow_cache_percpu	*percpu;
 	struct notifier_block		hotcpu_notifier;
 	int				low_watermark;
 	int				high_watermark;
@@ -78,12 +77,21 @@ static void flow_cache_new_hashrnd(unsigned long arg)
 	add_timer(&fc->rnd_timer);
 }
 
+static int flow_entry_valid(struct flow_cache_entry *fle)
+{
+	if (atomic_read(&flow_cache_genid) != fle->genid)
+		return 0;
+	if (fle->object && !fle->object->ops->check(fle->object))
+		return 0;
+	return 1;
+}
+
 static void flow_entry_kill(struct flow_cache *fc,
 			    struct flow_cache_percpu *fcp,
 			    struct flow_cache_entry *fle)
 {
 	if (fle->object)
-		atomic_dec(fle->object_ref);
+		fle->object->ops->delete(fle->object);
 	kmem_cache_free(flow_cachep, fle);
 	fcp->hash_count--;
 }
@@ -96,16 +104,18 @@ static void __flow_cache_shrink(struct flow_cache *fc,
 	int i;
 
 	for (i = 0; i < flow_cache_hash_size(fc); i++) {
-		int k = 0;
+		int saved = 0;
 
 		flp = &fcp->hash_table[i];
-		while ((fle = *flp) != NULL && k < shrink_to) {
-			k++;
-			flp = &fle->next;
-		}
 		while ((fle = *flp) != NULL) {
-			*flp = fle->next;
-			flow_entry_kill(fc, fcp, fle);
+			if (saved < shrink_to &&
+			    flow_entry_valid(fle)) {
+				saved++;
+				flp = &fle->next;
+			} else {
+				*flp = fle->next;
+				flow_entry_kill(fc, fcp, fle);
+			}
 		}
 	}
 }
@@ -166,18 +176,21 @@ static int flow_key_compare(struct flowi *key1, struct flowi *key2)
 	return 0;
 }
 
-void *flow_cache_lookup(struct net *net, struct flowi *key, u16 family, u8 dir,
-			flow_resolve_t resolver)
+struct flow_cache_object *
+flow_cache_lookup(struct net *net, struct flowi *key, u16 family, u8 dir,
+		  flow_resolve_t resolver, void *ctx)
 {
 	struct flow_cache *fc = &flow_cache_global;
 	struct flow_cache_percpu *fcp;
 	struct flow_cache_entry *fle, **head;
+	struct flow_cache_object *flo;
 	unsigned int hash;
 
 	local_bh_disable();
 	fcp = per_cpu_ptr(fc->percpu, smp_processor_id());
 
 	fle = NULL;
+	flo = NULL;
 	/* Packet really early in init?  Making flow_cache_init a
 	 * pre-smp initcall would solve this.  --RR */
 	if (!fcp->hash_table)
@@ -185,27 +198,17 @@ void *flow_cache_lookup(struct net *net, struct flowi *key, u16 family, u8 dir,
 
 	if (fcp->hash_rnd_recalc)
 		flow_new_hash_rnd(fc, fcp);
-	hash = flow_hash_code(fc, fcp, key);
 
+	hash = flow_hash_code(fc, fcp, key);
 	head = &fcp->hash_table[hash];
 	for (fle = *head; fle; fle = fle->next) {
 		if (fle->family == family &&
 		    fle->dir == dir &&
-		    flow_key_compare(key, &fle->key) == 0) {
-			if (fle->genid == atomic_read(&flow_cache_genid)) {
-				void *ret = fle->object;
-
-				if (ret)
-					atomic_inc(fle->object_ref);
-				local_bh_enable();
-
-				return ret;
-			}
+		    flow_key_compare(key, &fle->key) == 0)
 			break;
-		}
 	}
 
-	if (!fle) {
+	if (unlikely(!fle)) {
 		if (fcp->hash_count > fc->high_watermark)
 			flow_cache_shrink(fc, fcp);
 
@@ -219,33 +222,39 @@ void *flow_cache_lookup(struct net *net, struct flowi *key, u16 family, u8 dir,
 			fle->object = NULL;
 			fcp->hash_count++;
 		}
+	} else if (likely(fle->genid == atomic_read(&flow_cache_genid))) {
+		flo = fle->object;
+		if (!flo)
+			goto ret_object;
+		flo = flo->ops->get(flo);
+		if (flo)
+			goto ret_object;
+	} else if (fle->object) {
+	        flo = fle->object;
+	        flo->ops->delete(flo);
+	        fle->object = NULL;
 	}
 
 nocache:
-	{
-		int err;
-		void *obj;
-		atomic_t *obj_ref;
-
-		err = resolver(net, key, family, dir, &obj, &obj_ref);
-
-		if (fle && !err) {
-			fle->genid = atomic_read(&flow_cache_genid);
-
-			if (fle->object)
-				atomic_dec(fle->object_ref);
-
-			fle->object = obj;
-			fle->object_ref = obj_ref;
-			if (obj)
-				atomic_inc(fle->object_ref);
-		}
-		local_bh_enable();
-
-		if (err)
-			obj = ERR_PTR(err);
-		return obj;
+	flo = NULL;
+	if (fle) {
+		flo = fle->object;
+		fle->object = NULL;
+	}
+	flo = resolver(net, key, family, dir, flo, ctx);
+	if (fle) {
+		fle->genid = atomic_read(&flow_cache_genid);
+		if (!IS_ERR(flo))
+			fle->object = flo;
+		else
+			fle->genid--;
+	} else {
+		if (flo && !IS_ERR(flo))
+			flo->ops->delete(flo);
 	}
+ret_object:
+	local_bh_enable();
+	return flo;
 }
 
 static void flow_cache_flush_tasklet(unsigned long data)
@@ -261,13 +270,12 @@ static void flow_cache_flush_tasklet(unsigned long data)
 
 		fle = fcp->hash_table[i];
 		for (; fle; fle = fle->next) {
-			unsigned genid = atomic_read(&flow_cache_genid);
-
-			if (!fle->object || fle->genid == genid)
+			if (flow_entry_valid(fle))
 				continue;
 
+			if (fle->object)
+				fle->object->ops->delete(fle->object);
 			fle->object = NULL;
-			atomic_dec(fle->object_ref);
 		}
 	}
 
diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index 82789cf1c632..7722baeb140d 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -216,6 +216,35 @@ expired:
 	xfrm_pol_put(xp);
 }
 
+static struct flow_cache_object *xfrm_policy_flo_get(struct flow_cache_object *flo)
+{
+	struct xfrm_policy *pol = container_of(flo, struct xfrm_policy, flo);
+
+	if (unlikely(pol->walk.dead))
+		flo = NULL;
+	else
+		xfrm_pol_hold(pol);
+
+	return flo;
+}
+
+static int xfrm_policy_flo_check(struct flow_cache_object *flo)
+{
+	struct xfrm_policy *pol = container_of(flo, struct xfrm_policy, flo);
+
+	return !pol->walk.dead;
+}
+
+static void xfrm_policy_flo_delete(struct flow_cache_object *flo)
+{
+	xfrm_pol_put(container_of(flo, struct xfrm_policy, flo));
+}
+
+static const struct flow_cache_ops xfrm_policy_fc_ops = {
+	.get = xfrm_policy_flo_get,
+	.check = xfrm_policy_flo_check,
+	.delete = xfrm_policy_flo_delete,
+};
 
 /* Allocate xfrm_policy. Not used here, it is supposed to be used by pfkeyv2
  * SPD calls.
@@ -236,6 +265,7 @@ struct xfrm_policy *xfrm_policy_alloc(struct net *net, gfp_t gfp)
 		atomic_set(&policy->refcnt, 1);
 		setup_timer(&policy->timer, xfrm_policy_timer,
 				(unsigned long)policy);
+		policy->flo.ops = &xfrm_policy_fc_ops;
 	}
 	return policy;
 }
@@ -269,9 +299,6 @@ static void xfrm_policy_gc_kill(struct xfrm_policy *policy)
 	if (del_timer(&policy->timer))
 		atomic_dec(&policy->refcnt);
 
-	if (atomic_read(&policy->refcnt) > 1)
-		flow_cache_flush();
-
 	xfrm_pol_put(policy);
 }
 
@@ -661,10 +688,8 @@ struct xfrm_policy *xfrm_policy_bysel_ctx(struct net *net, u32 mark, u8 type,
 	}
 	write_unlock_bh(&xfrm_policy_lock);
 
-	if (ret && delete) {
-		atomic_inc(&flow_cache_genid);
+	if (ret && delete)
 		xfrm_policy_kill(ret);
-	}
 	return ret;
 }
 EXPORT_SYMBOL(xfrm_policy_bysel_ctx);
@@ -703,10 +728,8 @@ struct xfrm_policy *xfrm_policy_byid(struct net *net, u32 mark, u8 type,
 	}
 	write_unlock_bh(&xfrm_policy_lock);
 
-	if (ret && delete) {
-		atomic_inc(&flow_cache_genid);
+	if (ret && delete)
 		xfrm_policy_kill(ret);
-	}
 	return ret;
 }
 EXPORT_SYMBOL(xfrm_policy_byid);
@@ -822,7 +845,6 @@ int xfrm_policy_flush(struct net *net, u8 type, struct xfrm_audit *audit_info)
 	}
 	if (!cnt)
 		err = -ESRCH;
-	atomic_inc(&flow_cache_genid);
 out:
 	write_unlock_bh(&xfrm_policy_lock);
 	return err;
@@ -976,32 +998,35 @@ fail:
 	return ret;
 }
 
-static int xfrm_policy_lookup(struct net *net, struct flowi *fl, u16 family,
-			      u8 dir, void **objp, atomic_t **obj_refp)
+static struct flow_cache_object *
+xfrm_policy_lookup(struct net *net, struct flowi *fl, u16 family,
+		   u8 dir, struct flow_cache_object *old_obj, void *ctx)
 {
 	struct xfrm_policy *pol;
-	int err = 0;
+
+	if (old_obj)
+		xfrm_pol_put(container_of(old_obj, struct xfrm_policy, flo));
 
 #ifdef CONFIG_XFRM_SUB_POLICY
 	pol = xfrm_policy_lookup_bytype(net, XFRM_POLICY_TYPE_SUB, fl, family, dir);
-	if (IS_ERR(pol)) {
-		err = PTR_ERR(pol);
-		pol = NULL;
-	}
-	if (pol || err)
-		goto end;
+	if (IS_ERR(pol))
+		return ERR_CAST(pol);
+	if (pol)
+		goto found;
 #endif
 	pol = xfrm_policy_lookup_bytype(net, XFRM_POLICY_TYPE_MAIN, fl, family, dir);
-	if (IS_ERR(pol)) {
-		err = PTR_ERR(pol);
-		pol = NULL;
-	}
-#ifdef CONFIG_XFRM_SUB_POLICY
-end:
-#endif
-	if ((*objp = (void *) pol) != NULL)
-		*obj_refp = &pol->refcnt;
-	return err;
+	if (IS_ERR(pol))
+		return ERR_CAST(pol);
+	if (pol)
+		goto found;
+	return NULL;
+
+found:
+	/* Resolver returns two references:
+	 * one for cache and one for caller of flow_cache_lookup() */
+	xfrm_pol_hold(pol);
+
+	return &pol->flo;
 }
 
 static inline int policy_to_flow_dir(int dir)
@@ -1091,8 +1116,6 @@ int xfrm_policy_delete(struct xfrm_policy *pol, int dir)
 	pol = __xfrm_policy_unlink(pol, dir);
 	write_unlock_bh(&xfrm_policy_lock);
 	if (pol) {
-		if (dir < XFRM_POLICY_MAX)
-			atomic_inc(&flow_cache_genid);
 		xfrm_policy_kill(pol);
 		return 0;
 	}
@@ -1578,18 +1601,24 @@ restart:
 	}
 
 	if (!policy) {
+		struct flow_cache_object *flo;
+
 		/* To accelerate a bit...  */
 		if ((dst_orig->flags & DST_NOXFRM) ||
 		    !net->xfrm.policy_count[XFRM_POLICY_OUT])
 			goto nopol;
 
-		policy = flow_cache_lookup(net, fl, dst_orig->ops->family,
-					   dir, xfrm_policy_lookup);
-		err = PTR_ERR(policy);
-		if (IS_ERR(policy)) {
+		flo = flow_cache_lookup(net, fl, dst_orig->ops->family,
+					dir, xfrm_policy_lookup, NULL);
+		err = PTR_ERR(flo);
+		if (IS_ERR(flo)) {
 			XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTPOLERROR);
 			goto dropdst;
 		}
+		if (flo)
+			policy = container_of(flo, struct xfrm_policy, flo);
+		else
+			policy = NULL;
 	}
 
 	if (!policy)
@@ -1939,9 +1968,16 @@ int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb,
 		}
 	}
 
-	if (!pol)
-		pol = flow_cache_lookup(net, &fl, family, fl_dir,
-					xfrm_policy_lookup);
+	if (!pol) {
+		struct flow_cache_object *flo;
+
+		flo = flow_cache_lookup(net, &fl, family, fl_dir,
+					xfrm_policy_lookup, NULL);
+		if (IS_ERR_OR_NULL(flo))
+			pol = ERR_CAST(flo);
+		else
+			pol = container_of(flo, struct xfrm_policy, flo);
+	}
 
 	if (IS_ERR(pol)) {
 		XFRM_INC_STATS(net, LINUX_MIB_XFRMINPOLERROR);
-- 
cgit v1.2.2


From 80c802f3073e84c956846e921e8a0b02dfa3755f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Timo=20Ter=C3=A4s?= <timo.teras@iki.fi>
Date: Wed, 7 Apr 2010 00:30:05 +0000
Subject: xfrm: cache bundles instead of policies for outgoing flows

__xfrm_lookup() is called for each packet transmitted out of
system. The xfrm_find_bundle() does a linear search which can
kill system performance depending on how many bundles are
required per policy.

This modifies __xfrm_lookup() to store bundles directly in
the flow cache. If we did not get a hit, we just create a new
bundle instead of doing slow search. This means that we can now
get multiple xfrm_dst's for same flow (on per-cpu basis).

Signed-off-by: Timo Teras <timo.teras@iki.fi>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/xfrm4_policy.c |  22 --
 net/ipv6/xfrm6_policy.c |  31 ---
 net/xfrm/xfrm_policy.c  | 711 +++++++++++++++++++++++++-----------------------
 3 files changed, 376 insertions(+), 388 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c
index e4a1483fba77..1705476670ef 100644
--- a/net/ipv4/xfrm4_policy.c
+++ b/net/ipv4/xfrm4_policy.c
@@ -59,27 +59,6 @@ static int xfrm4_get_saddr(struct net *net,
 	return 0;
 }
 
-static struct dst_entry *
-__xfrm4_find_bundle(struct flowi *fl, struct xfrm_policy *policy)
-{
-	struct dst_entry *dst;
-
-	read_lock_bh(&policy->lock);
-	for (dst = policy->bundles; dst; dst = dst->next) {
-		struct xfrm_dst *xdst = (struct xfrm_dst *)dst;
-		if (xdst->u.rt.fl.oif == fl->oif &&	/*XXX*/
-		    xdst->u.rt.fl.fl4_dst == fl->fl4_dst &&
-		    xdst->u.rt.fl.fl4_src == fl->fl4_src &&
-		    xdst->u.rt.fl.fl4_tos == fl->fl4_tos &&
-		    xfrm_bundle_ok(policy, xdst, fl, AF_INET, 0)) {
-			dst_clone(dst);
-			break;
-		}
-	}
-	read_unlock_bh(&policy->lock);
-	return dst;
-}
-
 static int xfrm4_get_tos(struct flowi *fl)
 {
 	return fl->fl4_tos;
@@ -259,7 +238,6 @@ static struct xfrm_policy_afinfo xfrm4_policy_afinfo = {
 	.dst_ops =		&xfrm4_dst_ops,
 	.dst_lookup =		xfrm4_dst_lookup,
 	.get_saddr =		xfrm4_get_saddr,
-	.find_bundle = 		__xfrm4_find_bundle,
 	.decode_session =	_decode_session4,
 	.get_tos =		xfrm4_get_tos,
 	.init_path =		xfrm4_init_path,
diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c
index ae181651c75a..8c452fd5ceae 100644
--- a/net/ipv6/xfrm6_policy.c
+++ b/net/ipv6/xfrm6_policy.c
@@ -67,36 +67,6 @@ static int xfrm6_get_saddr(struct net *net,
 	return 0;
 }
 
-static struct dst_entry *
-__xfrm6_find_bundle(struct flowi *fl, struct xfrm_policy *policy)
-{
-	struct dst_entry *dst;
-
-	/* Still not clear if we should set fl->fl6_{src,dst}... */
-	read_lock_bh(&policy->lock);
-	for (dst = policy->bundles; dst; dst = dst->next) {
-		struct xfrm_dst *xdst = (struct xfrm_dst*)dst;
-		struct in6_addr fl_dst_prefix, fl_src_prefix;
-
-		ipv6_addr_prefix(&fl_dst_prefix,
-				 &fl->fl6_dst,
-				 xdst->u.rt6.rt6i_dst.plen);
-		ipv6_addr_prefix(&fl_src_prefix,
-				 &fl->fl6_src,
-				 xdst->u.rt6.rt6i_src.plen);
-		if (ipv6_addr_equal(&xdst->u.rt6.rt6i_dst.addr, &fl_dst_prefix) &&
-		    ipv6_addr_equal(&xdst->u.rt6.rt6i_src.addr, &fl_src_prefix) &&
-		    xfrm_bundle_ok(policy, xdst, fl, AF_INET6,
-				   (xdst->u.rt6.rt6i_dst.plen != 128 ||
-				    xdst->u.rt6.rt6i_src.plen != 128))) {
-			dst_clone(dst);
-			break;
-		}
-	}
-	read_unlock_bh(&policy->lock);
-	return dst;
-}
-
 static int xfrm6_get_tos(struct flowi *fl)
 {
 	return 0;
@@ -291,7 +261,6 @@ static struct xfrm_policy_afinfo xfrm6_policy_afinfo = {
 	.dst_ops =		&xfrm6_dst_ops,
 	.dst_lookup =		xfrm6_dst_lookup,
 	.get_saddr = 		xfrm6_get_saddr,
-	.find_bundle =		__xfrm6_find_bundle,
 	.decode_session =	_decode_session6,
 	.get_tos =		xfrm6_get_tos,
 	.init_path =		xfrm6_init_path,
diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index 7722baeb140d..06ccc71c871f 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -37,6 +37,8 @@
 DEFINE_MUTEX(xfrm_cfg_mutex);
 EXPORT_SYMBOL(xfrm_cfg_mutex);
 
+static DEFINE_SPINLOCK(xfrm_policy_sk_bundle_lock);
+static struct dst_entry *xfrm_policy_sk_bundles;
 static DEFINE_RWLOCK(xfrm_policy_lock);
 
 static DEFINE_RWLOCK(xfrm_policy_afinfo_lock);
@@ -50,6 +52,7 @@ static DEFINE_SPINLOCK(xfrm_policy_gc_lock);
 static struct xfrm_policy_afinfo *xfrm_policy_get_afinfo(unsigned short family);
 static void xfrm_policy_put_afinfo(struct xfrm_policy_afinfo *afinfo);
 static void xfrm_init_pmtu(struct dst_entry *dst);
+static int stale_bundle(struct dst_entry *dst);
 
 static struct xfrm_policy *__xfrm_policy_unlink(struct xfrm_policy *pol,
 						int dir);
@@ -277,8 +280,6 @@ void xfrm_policy_destroy(struct xfrm_policy *policy)
 {
 	BUG_ON(!policy->walk.dead);
 
-	BUG_ON(policy->bundles);
-
 	if (del_timer(&policy->timer))
 		BUG();
 
@@ -289,12 +290,7 @@ EXPORT_SYMBOL(xfrm_policy_destroy);
 
 static void xfrm_policy_gc_kill(struct xfrm_policy *policy)
 {
-	struct dst_entry *dst;
-
-	while ((dst = policy->bundles) != NULL) {
-		policy->bundles = dst->next;
-		dst_free(dst);
-	}
+	atomic_inc(&policy->genid);
 
 	if (del_timer(&policy->timer))
 		atomic_dec(&policy->refcnt);
@@ -572,7 +568,6 @@ int xfrm_policy_insert(int dir, struct xfrm_policy *policy, int excl)
 	struct xfrm_policy *delpol;
 	struct hlist_head *chain;
 	struct hlist_node *entry, *newpos;
-	struct dst_entry *gc_list;
 	u32 mark = policy->mark.v & policy->mark.m;
 
 	write_lock_bh(&xfrm_policy_lock);
@@ -622,34 +617,6 @@ int xfrm_policy_insert(int dir, struct xfrm_policy *policy, int excl)
 	else if (xfrm_bydst_should_resize(net, dir, NULL))
 		schedule_work(&net->xfrm.policy_hash_work);
 
-	read_lock_bh(&xfrm_policy_lock);
-	gc_list = NULL;
-	entry = &policy->bydst;
-	hlist_for_each_entry_continue(policy, entry, bydst) {
-		struct dst_entry *dst;
-
-		write_lock(&policy->lock);
-		dst = policy->bundles;
-		if (dst) {
-			struct dst_entry *tail = dst;
-			while (tail->next)
-				tail = tail->next;
-			tail->next = gc_list;
-			gc_list = dst;
-
-			policy->bundles = NULL;
-		}
-		write_unlock(&policy->lock);
-	}
-	read_unlock_bh(&xfrm_policy_lock);
-
-	while (gc_list) {
-		struct dst_entry *dst = gc_list;
-
-		gc_list = dst->next;
-		dst_free(dst);
-	}
-
 	return 0;
 }
 EXPORT_SYMBOL(xfrm_policy_insert);
@@ -998,6 +965,19 @@ fail:
 	return ret;
 }
 
+static struct xfrm_policy *
+__xfrm_policy_lookup(struct net *net, struct flowi *fl, u16 family, u8 dir)
+{
+#ifdef CONFIG_XFRM_SUB_POLICY
+	struct xfrm_policy *pol;
+
+	pol = xfrm_policy_lookup_bytype(net, XFRM_POLICY_TYPE_SUB, fl, family, dir);
+	if (pol != NULL)
+		return pol;
+#endif
+	return xfrm_policy_lookup_bytype(net, XFRM_POLICY_TYPE_MAIN, fl, family, dir);
+}
+
 static struct flow_cache_object *
 xfrm_policy_lookup(struct net *net, struct flowi *fl, u16 family,
 		   u8 dir, struct flow_cache_object *old_obj, void *ctx)
@@ -1007,21 +987,10 @@ xfrm_policy_lookup(struct net *net, struct flowi *fl, u16 family,
 	if (old_obj)
 		xfrm_pol_put(container_of(old_obj, struct xfrm_policy, flo));
 
-#ifdef CONFIG_XFRM_SUB_POLICY
-	pol = xfrm_policy_lookup_bytype(net, XFRM_POLICY_TYPE_SUB, fl, family, dir);
-	if (IS_ERR(pol))
+	pol = __xfrm_policy_lookup(net, fl, family, dir);
+	if (IS_ERR_OR_NULL(pol))
 		return ERR_CAST(pol);
-	if (pol)
-		goto found;
-#endif
-	pol = xfrm_policy_lookup_bytype(net, XFRM_POLICY_TYPE_MAIN, fl, family, dir);
-	if (IS_ERR(pol))
-		return ERR_CAST(pol);
-	if (pol)
-		goto found;
-	return NULL;
 
-found:
 	/* Resolver returns two references:
 	 * one for cache and one for caller of flow_cache_lookup() */
 	xfrm_pol_hold(pol);
@@ -1313,18 +1282,6 @@ xfrm_tmpl_resolve(struct xfrm_policy **pols, int npols, struct flowi *fl,
  * still valid.
  */
 
-static struct dst_entry *
-xfrm_find_bundle(struct flowi *fl, struct xfrm_policy *policy, unsigned short family)
-{
-	struct dst_entry *x;
-	struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family);
-	if (unlikely(afinfo == NULL))
-		return ERR_PTR(-EINVAL);
-	x = afinfo->find_bundle(fl, policy);
-	xfrm_policy_put_afinfo(afinfo);
-	return x;
-}
-
 static inline int xfrm_get_tos(struct flowi *fl, int family)
 {
 	struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family);
@@ -1340,6 +1297,54 @@ static inline int xfrm_get_tos(struct flowi *fl, int family)
 	return tos;
 }
 
+static struct flow_cache_object *xfrm_bundle_flo_get(struct flow_cache_object *flo)
+{
+	struct xfrm_dst *xdst = container_of(flo, struct xfrm_dst, flo);
+	struct dst_entry *dst = &xdst->u.dst;
+
+	if (xdst->route == NULL) {
+		/* Dummy bundle - if it has xfrms we were not
+		 * able to build bundle as template resolution failed.
+		 * It means we need to try again resolving. */
+		if (xdst->num_xfrms > 0)
+			return NULL;
+	} else {
+		/* Real bundle */
+		if (stale_bundle(dst))
+			return NULL;
+	}
+
+	dst_hold(dst);
+	return flo;
+}
+
+static int xfrm_bundle_flo_check(struct flow_cache_object *flo)
+{
+	struct xfrm_dst *xdst = container_of(flo, struct xfrm_dst, flo);
+	struct dst_entry *dst = &xdst->u.dst;
+
+	if (!xdst->route)
+		return 0;
+	if (stale_bundle(dst))
+		return 0;
+
+	return 1;
+}
+
+static void xfrm_bundle_flo_delete(struct flow_cache_object *flo)
+{
+	struct xfrm_dst *xdst = container_of(flo, struct xfrm_dst, flo);
+	struct dst_entry *dst = &xdst->u.dst;
+
+	dst_free(dst);
+}
+
+static const struct flow_cache_ops xfrm_bundle_fc_ops = {
+	.get = xfrm_bundle_flo_get,
+	.check = xfrm_bundle_flo_check,
+	.delete = xfrm_bundle_flo_delete,
+};
+
 static inline struct xfrm_dst *xfrm_alloc_dst(struct net *net, int family)
 {
 	struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family);
@@ -1362,9 +1367,10 @@ static inline struct xfrm_dst *xfrm_alloc_dst(struct net *net, int family)
 		BUG();
 	}
 	xdst = dst_alloc(dst_ops) ?: ERR_PTR(-ENOBUFS);
-
 	xfrm_policy_put_afinfo(afinfo);
 
+	xdst->flo.ops = &xfrm_bundle_fc_ops;
+
 	return xdst;
 }
 
@@ -1402,6 +1408,7 @@ static inline int xfrm_fill_dst(struct xfrm_dst *xdst, struct net_device *dev,
 	return err;
 }
 
+
 /* Allocate chain of dst_entry's, attach known xfrm's, calculate
  * all the metrics... Shortly, bundle a bundle.
  */
@@ -1465,7 +1472,7 @@ static struct dst_entry *xfrm_bundle_create(struct xfrm_policy *policy,
 			dst_hold(dst);
 
 		dst1->xfrm = xfrm[i];
-		xdst->genid = xfrm[i]->genid;
+		xdst->xfrm_genid = xfrm[i]->genid;
 
 		dst1->obsolete = -1;
 		dst1->flags |= DST_HOST;
@@ -1558,7 +1565,186 @@ xfrm_dst_update_origin(struct dst_entry *dst, struct flowi *fl)
 #endif
 }
 
-static int stale_bundle(struct dst_entry *dst);
+static int xfrm_expand_policies(struct flowi *fl, u16 family,
+				struct xfrm_policy **pols,
+				int *num_pols, int *num_xfrms)
+{
+	int i;
+
+	if (*num_pols == 0 || !pols[0]) {
+		*num_pols = 0;
+		*num_xfrms = 0;
+		return 0;
+	}
+	if (IS_ERR(pols[0]))
+		return PTR_ERR(pols[0]);
+
+	*num_xfrms = pols[0]->xfrm_nr;
+
+#ifdef CONFIG_XFRM_SUB_POLICY
+	if (pols[0] && pols[0]->action == XFRM_POLICY_ALLOW &&
+	    pols[0]->type != XFRM_POLICY_TYPE_MAIN) {
+		pols[1] = xfrm_policy_lookup_bytype(xp_net(pols[0]),
+						    XFRM_POLICY_TYPE_MAIN,
+						    fl, family,
+						    XFRM_POLICY_OUT);
+		if (pols[1]) {
+			if (IS_ERR(pols[1])) {
+				xfrm_pols_put(pols, *num_pols);
+				return PTR_ERR(pols[1]);
+			}
+			(*num_pols) ++;
+			(*num_xfrms) += pols[1]->xfrm_nr;
+		}
+	}
+#endif
+	for (i = 0; i < *num_pols; i++) {
+		if (pols[i]->action != XFRM_POLICY_ALLOW) {
+			*num_xfrms = -1;
+			break;
+		}
+	}
+
+	return 0;
+
+}
+
+static struct xfrm_dst *
+xfrm_resolve_and_create_bundle(struct xfrm_policy **pols, int num_pols,
+			       struct flowi *fl, u16 family,
+			       struct dst_entry *dst_orig)
+{
+	struct net *net = xp_net(pols[0]);
+	struct xfrm_state *xfrm[XFRM_MAX_DEPTH];
+	struct dst_entry *dst;
+	struct xfrm_dst *xdst;
+	int err;
+
+	/* Try to instantiate a bundle */
+	err = xfrm_tmpl_resolve(pols, num_pols, fl, xfrm, family);
+	if (err < 0) {
+		if (err != -EAGAIN)
+			XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTPOLERROR);
+		return ERR_PTR(err);
+	}
+
+	dst = xfrm_bundle_create(pols[0], xfrm, err, fl, dst_orig);
+	if (IS_ERR(dst)) {
+		XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTBUNDLEGENERROR);
+		return ERR_CAST(dst);
+	}
+
+	xdst = (struct xfrm_dst *)dst;
+	xdst->num_xfrms = err;
+	if (num_pols > 1)
+		err = xfrm_dst_update_parent(dst, &pols[1]->selector);
+	else
+		err = xfrm_dst_update_origin(dst, fl);
+	if (unlikely(err)) {
+		dst_free(dst);
+		XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTBUNDLECHECKERROR);
+		return ERR_PTR(err);
+	}
+
+	xdst->num_pols = num_pols;
+	memcpy(xdst->pols, pols, sizeof(struct xfrm_policy*) * num_pols);
+	xdst->policy_genid = atomic_read(&pols[0]->genid);
+
+	return xdst;
+}
+
+static struct flow_cache_object *
+xfrm_bundle_lookup(struct net *net, struct flowi *fl, u16 family, u8 dir,
+		   struct flow_cache_object *oldflo, void *ctx)
+{
+	struct dst_entry *dst_orig = (struct dst_entry *)ctx;
+	struct xfrm_policy *pols[XFRM_POLICY_TYPE_MAX];
+	struct xfrm_dst *xdst, *new_xdst;
+	int num_pols = 0, num_xfrms = 0, i, err, pol_dead;
+
+	/* Check if the policies from old bundle are usable */
+	xdst = NULL;
+	if (oldflo) {
+		xdst = container_of(oldflo, struct xfrm_dst, flo);
+		num_pols = xdst->num_pols;
+		num_xfrms = xdst->num_xfrms;
+		pol_dead = 0;
+		for (i = 0; i < num_pols; i++) {
+			pols[i] = xdst->pols[i];
+			pol_dead |= pols[i]->walk.dead;
+		}
+		if (pol_dead) {
+			dst_free(&xdst->u.dst);
+			xdst = NULL;
+			num_pols = 0;
+			num_xfrms = 0;
+			oldflo = NULL;
+		}
+	}
+
+	/* Resolve policies to use if we couldn't get them from
+	 * previous cache entry */
+	if (xdst == NULL) {
+		num_pols = 1;
+		pols[0] = __xfrm_policy_lookup(net, fl, family, dir);
+		err = xfrm_expand_policies(fl, family, pols,
+					   &num_pols, &num_xfrms);
+		if (err < 0)
+			goto inc_error;
+		if (num_pols == 0)
+			return NULL;
+		if (num_xfrms <= 0)
+			goto make_dummy_bundle;
+	}
+
+	new_xdst = xfrm_resolve_and_create_bundle(pols, num_pols, fl, family, dst_orig);
+	if (IS_ERR(new_xdst)) {
+		err = PTR_ERR(new_xdst);
+		if (err != -EAGAIN)
+			goto error;
+		if (oldflo == NULL)
+			goto make_dummy_bundle;
+		dst_hold(&xdst->u.dst);
+		return oldflo;
+	}
+
+	/* Kill the previous bundle */
+	if (xdst) {
+		/* The policies were stolen for newly generated bundle */
+		xdst->num_pols = 0;
+		dst_free(&xdst->u.dst);
+	}
+
+	/* Flow cache does not have reference, it dst_free()'s,
+	 * but we do need to return one reference for original caller */
+	dst_hold(&new_xdst->u.dst);
+	return &new_xdst->flo;
+
+make_dummy_bundle:
+	/* We found policies, but there's no bundles to instantiate:
+	 * either because the policy blocks, has no transformations or
+	 * we could not build template (no xfrm_states).*/
+	xdst = xfrm_alloc_dst(net, family);
+	if (IS_ERR(xdst)) {
+		xfrm_pols_put(pols, num_pols);
+		return ERR_CAST(xdst);
+	}
+	xdst->num_pols = num_pols;
+	xdst->num_xfrms = num_xfrms;
+	memcpy(xdst->pols, pols, sizeof(struct xfrm_policy*) * num_pols);
+
+	dst_hold(&xdst->u.dst);
+	return &xdst->flo;
+
+inc_error:
+	XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTPOLERROR);
+error:
+	if (xdst != NULL)
+		dst_free(&xdst->u.dst);
+	else
+		xfrm_pols_put(pols, num_pols);
+	return ERR_PTR(err);
+}
 
 /* Main function: finds/creates a bundle for given flow.
  *
@@ -1568,248 +1754,152 @@ static int stale_bundle(struct dst_entry *dst);
 int __xfrm_lookup(struct net *net, struct dst_entry **dst_p, struct flowi *fl,
 		  struct sock *sk, int flags)
 {
-	struct xfrm_policy *policy;
 	struct xfrm_policy *pols[XFRM_POLICY_TYPE_MAX];
-	int npols;
-	int pol_dead;
-	int xfrm_nr;
-	int pi;
-	struct xfrm_state *xfrm[XFRM_MAX_DEPTH];
-	struct dst_entry *dst, *dst_orig = *dst_p;
-	int nx = 0;
-	int err;
-	u32 genid;
-	u16 family;
+	struct flow_cache_object *flo;
+	struct xfrm_dst *xdst;
+	struct dst_entry *dst, *dst_orig = *dst_p, *route;
+	u16 family = dst_orig->ops->family;
 	u8 dir = policy_to_flow_dir(XFRM_POLICY_OUT);
+	int i, err, num_pols, num_xfrms, drop_pols = 0;
 
 restart:
-	genid = atomic_read(&flow_cache_genid);
-	policy = NULL;
-	for (pi = 0; pi < ARRAY_SIZE(pols); pi++)
-		pols[pi] = NULL;
-	npols = 0;
-	pol_dead = 0;
-	xfrm_nr = 0;
+	dst = NULL;
+	xdst = NULL;
+	route = NULL;
 
 	if (sk && sk->sk_policy[XFRM_POLICY_OUT]) {
-		policy = xfrm_sk_policy_lookup(sk, XFRM_POLICY_OUT, fl);
-		err = PTR_ERR(policy);
-		if (IS_ERR(policy)) {
-			XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTPOLERROR);
+		num_pols = 1;
+		pols[0] = xfrm_sk_policy_lookup(sk, XFRM_POLICY_OUT, fl);
+		err = xfrm_expand_policies(fl, family, pols,
+					   &num_pols, &num_xfrms);
+		if (err < 0)
 			goto dropdst;
+
+		if (num_pols) {
+			if (num_xfrms <= 0) {
+				drop_pols = num_pols;
+				goto no_transform;
+			}
+
+			xdst = xfrm_resolve_and_create_bundle(
+					pols, num_pols, fl,
+					family, dst_orig);
+			if (IS_ERR(xdst)) {
+				xfrm_pols_put(pols, num_pols);
+				err = PTR_ERR(xdst);
+				goto dropdst;
+			}
+
+			spin_lock_bh(&xfrm_policy_sk_bundle_lock);
+			xdst->u.dst.next = xfrm_policy_sk_bundles;
+			xfrm_policy_sk_bundles = &xdst->u.dst;
+			spin_unlock_bh(&xfrm_policy_sk_bundle_lock);
+
+			route = xdst->route;
 		}
 	}
 
-	if (!policy) {
-		struct flow_cache_object *flo;
-
+	if (xdst == NULL) {
 		/* To accelerate a bit...  */
 		if ((dst_orig->flags & DST_NOXFRM) ||
 		    !net->xfrm.policy_count[XFRM_POLICY_OUT])
 			goto nopol;
 
-		flo = flow_cache_lookup(net, fl, dst_orig->ops->family,
-					dir, xfrm_policy_lookup, NULL);
-		err = PTR_ERR(flo);
+		flo = flow_cache_lookup(net, fl, family, dir,
+					xfrm_bundle_lookup, dst_orig);
+		if (flo == NULL)
+			goto nopol;
 		if (IS_ERR(flo)) {
-			XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTPOLERROR);
+			err = PTR_ERR(flo);
 			goto dropdst;
 		}
-		if (flo)
-			policy = container_of(flo, struct xfrm_policy, flo);
-		else
-			policy = NULL;
+		xdst = container_of(flo, struct xfrm_dst, flo);
+
+		num_pols = xdst->num_pols;
+		num_xfrms = xdst->num_xfrms;
+		memcpy(pols, xdst->pols, sizeof(struct xfrm_policy*) * num_pols);
+		route = xdst->route;
+	}
+
+	dst = &xdst->u.dst;
+	if (route == NULL && num_xfrms > 0) {
+		/* The only case when xfrm_bundle_lookup() returns a
+		 * bundle with null route, is when the template could
+		 * not be resolved. It means policies are there, but
+		 * bundle could not be created, since we don't yet
+		 * have the xfrm_state's. We need to wait for KM to
+		 * negotiate new SA's or bail out with error.*/
+		if (net->xfrm.sysctl_larval_drop) {
+			/* EREMOTE tells the caller to generate
+			 * a one-shot blackhole route. */
+			dst_release(dst);
+			xfrm_pols_put(pols, num_pols);
+			XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTNOSTATES);
+			return -EREMOTE;
+		}
+		if (flags & XFRM_LOOKUP_WAIT) {
+			DECLARE_WAITQUEUE(wait, current);
+
+			add_wait_queue(&net->xfrm.km_waitq, &wait);
+			set_current_state(TASK_INTERRUPTIBLE);
+			schedule();
+			set_current_state(TASK_RUNNING);
+			remove_wait_queue(&net->xfrm.km_waitq, &wait);
+
+			if (!signal_pending(current)) {
+				dst_release(dst);
+				goto restart;
+			}
+
+			err = -ERESTART;
+		} else
+			err = -EAGAIN;
+
+		XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTNOSTATES);
+		goto error;
 	}
 
-	if (!policy)
+no_transform:
+	if (num_pols == 0)
 		goto nopol;
 
-	family = dst_orig->ops->family;
-	pols[0] = policy;
-	npols ++;
-	xfrm_nr += pols[0]->xfrm_nr;
-
-	err = -ENOENT;
-	if ((flags & XFRM_LOOKUP_ICMP) && !(policy->flags & XFRM_POLICY_ICMP))
+	if ((flags & XFRM_LOOKUP_ICMP) &&
+	    !(pols[0]->flags & XFRM_POLICY_ICMP)) {
+		err = -ENOENT;
 		goto error;
+	}
 
-	policy->curlft.use_time = get_seconds();
+	for (i = 0; i < num_pols; i++)
+		pols[i]->curlft.use_time = get_seconds();
 
-	switch (policy->action) {
-	default:
-	case XFRM_POLICY_BLOCK:
+	if (num_xfrms < 0) {
 		/* Prohibit the flow */
 		XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTPOLBLOCK);
 		err = -EPERM;
 		goto error;
-
-	case XFRM_POLICY_ALLOW:
-#ifndef CONFIG_XFRM_SUB_POLICY
-		if (policy->xfrm_nr == 0) {
-			/* Flow passes not transformed. */
-			xfrm_pol_put(policy);
-			return 0;
-		}
-#endif
-
-		/* Try to find matching bundle.
-		 *
-		 * LATER: help from flow cache. It is optional, this
-		 * is required only for output policy.
-		 */
-		dst = xfrm_find_bundle(fl, policy, family);
-		if (IS_ERR(dst)) {
-			XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTBUNDLECHECKERROR);
-			err = PTR_ERR(dst);
-			goto error;
-		}
-
-		if (dst)
-			break;
-
-#ifdef CONFIG_XFRM_SUB_POLICY
-		if (pols[0]->type != XFRM_POLICY_TYPE_MAIN) {
-			pols[1] = xfrm_policy_lookup_bytype(net,
-							    XFRM_POLICY_TYPE_MAIN,
-							    fl, family,
-							    XFRM_POLICY_OUT);
-			if (pols[1]) {
-				if (IS_ERR(pols[1])) {
-					XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTPOLERROR);
-					err = PTR_ERR(pols[1]);
-					goto error;
-				}
-				if (pols[1]->action == XFRM_POLICY_BLOCK) {
-					XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTPOLBLOCK);
-					err = -EPERM;
-					goto error;
-				}
-				npols ++;
-				xfrm_nr += pols[1]->xfrm_nr;
-			}
-		}
-
-		/*
-		 * Because neither flowi nor bundle information knows about
-		 * transformation template size. On more than one policy usage
-		 * we can realize whether all of them is bypass or not after
-		 * they are searched. See above not-transformed bypass
-		 * is surrounded by non-sub policy configuration, too.
-		 */
-		if (xfrm_nr == 0) {
-			/* Flow passes not transformed. */
-			xfrm_pols_put(pols, npols);
-			return 0;
-		}
-
-#endif
-		nx = xfrm_tmpl_resolve(pols, npols, fl, xfrm, family);
-
-		if (unlikely(nx<0)) {
-			err = nx;
-			if (err == -EAGAIN && net->xfrm.sysctl_larval_drop) {
-				/* EREMOTE tells the caller to generate
-				 * a one-shot blackhole route.
-				 */
-				XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTNOSTATES);
-				xfrm_pol_put(policy);
-				return -EREMOTE;
-			}
-			if (err == -EAGAIN && (flags & XFRM_LOOKUP_WAIT)) {
-				DECLARE_WAITQUEUE(wait, current);
-
-				add_wait_queue(&net->xfrm.km_waitq, &wait);
-				set_current_state(TASK_INTERRUPTIBLE);
-				schedule();
-				set_current_state(TASK_RUNNING);
-				remove_wait_queue(&net->xfrm.km_waitq, &wait);
-
-				nx = xfrm_tmpl_resolve(pols, npols, fl, xfrm, family);
-
-				if (nx == -EAGAIN && signal_pending(current)) {
-					XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTNOSTATES);
-					err = -ERESTART;
-					goto error;
-				}
-				if (nx == -EAGAIN ||
-				    genid != atomic_read(&flow_cache_genid)) {
-					xfrm_pols_put(pols, npols);
-					goto restart;
-				}
-				err = nx;
-			}
-			if (err < 0) {
-				XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTNOSTATES);
-				goto error;
-			}
-		}
-		if (nx == 0) {
-			/* Flow passes not transformed. */
-			xfrm_pols_put(pols, npols);
-			return 0;
-		}
-
-		dst = xfrm_bundle_create(policy, xfrm, nx, fl, dst_orig);
-		err = PTR_ERR(dst);
-		if (IS_ERR(dst)) {
-			XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTBUNDLEGENERROR);
-			goto error;
-		}
-
-		for (pi = 0; pi < npols; pi++)
-			pol_dead |= pols[pi]->walk.dead;
-
-		write_lock_bh(&policy->lock);
-		if (unlikely(pol_dead || stale_bundle(dst))) {
-			/* Wow! While we worked on resolving, this
-			 * policy has gone. Retry. It is not paranoia,
-			 * we just cannot enlist new bundle to dead object.
-			 * We can't enlist stable bundles either.
-			 */
-			write_unlock_bh(&policy->lock);
-			dst_free(dst);
-
-			if (pol_dead)
-				XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTPOLDEAD);
-			else
-				XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTBUNDLECHECKERROR);
-			err = -EHOSTUNREACH;
-			goto error;
-		}
-
-		if (npols > 1)
-			err = xfrm_dst_update_parent(dst, &pols[1]->selector);
-		else
-			err = xfrm_dst_update_origin(dst, fl);
-		if (unlikely(err)) {
-			write_unlock_bh(&policy->lock);
-			dst_free(dst);
-			XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTBUNDLECHECKERROR);
-			goto error;
-		}
-
-		dst->next = policy->bundles;
-		policy->bundles = dst;
-		dst_hold(dst);
-		write_unlock_bh(&policy->lock);
+	} else if (num_xfrms > 0) {
+		/* Flow transformed */
+		*dst_p = dst;
+		dst_release(dst_orig);
+	} else {
+		/* Flow passes untransformed */
+		dst_release(dst);
 	}
-	*dst_p = dst;
-	dst_release(dst_orig);
-	xfrm_pols_put(pols, npols);
+ok:
+	xfrm_pols_put(pols, drop_pols);
 	return 0;
 
+nopol:
+	if (!(flags & XFRM_LOOKUP_ICMP))
+		goto ok;
+	err = -ENOENT;
 error:
-	xfrm_pols_put(pols, npols);
+	dst_release(dst);
 dropdst:
 	dst_release(dst_orig);
 	*dst_p = NULL;
+	xfrm_pols_put(pols, drop_pols);
 	return err;
-
-nopol:
-	err = -ENOENT;
-	if (flags & XFRM_LOOKUP_ICMP)
-		goto dropdst;
-	return 0;
 }
 EXPORT_SYMBOL(__xfrm_lookup);
 
@@ -2161,71 +2251,24 @@ static struct dst_entry *xfrm_negative_advice(struct dst_entry *dst)
 	return dst;
 }
 
-static void prune_one_bundle(struct xfrm_policy *pol, int (*func)(struct dst_entry *), struct dst_entry **gc_list_p)
-{
-	struct dst_entry *dst, **dstp;
-
-	write_lock(&pol->lock);
-	dstp = &pol->bundles;
-	while ((dst=*dstp) != NULL) {
-		if (func(dst)) {
-			*dstp = dst->next;
-			dst->next = *gc_list_p;
-			*gc_list_p = dst;
-		} else {
-			dstp = &dst->next;
-		}
-	}
-	write_unlock(&pol->lock);
-}
-
-static void xfrm_prune_bundles(struct net *net, int (*func)(struct dst_entry *))
+static void __xfrm_garbage_collect(struct net *net)
 {
-	struct dst_entry *gc_list = NULL;
-	int dir;
+	struct dst_entry *head, *next;
 
-	read_lock_bh(&xfrm_policy_lock);
-	for (dir = 0; dir < XFRM_POLICY_MAX * 2; dir++) {
-		struct xfrm_policy *pol;
-		struct hlist_node *entry;
-		struct hlist_head *table;
-		int i;
+	flow_cache_flush();
 
-		hlist_for_each_entry(pol, entry,
-				     &net->xfrm.policy_inexact[dir], bydst)
-			prune_one_bundle(pol, func, &gc_list);
+	spin_lock_bh(&xfrm_policy_sk_bundle_lock);
+	head = xfrm_policy_sk_bundles;
+	xfrm_policy_sk_bundles = NULL;
+	spin_unlock_bh(&xfrm_policy_sk_bundle_lock);
 
-		table = net->xfrm.policy_bydst[dir].table;
-		for (i = net->xfrm.policy_bydst[dir].hmask; i >= 0; i--) {
-			hlist_for_each_entry(pol, entry, table + i, bydst)
-				prune_one_bundle(pol, func, &gc_list);
-		}
-	}
-	read_unlock_bh(&xfrm_policy_lock);
-
-	while (gc_list) {
-		struct dst_entry *dst = gc_list;
-		gc_list = dst->next;
-		dst_free(dst);
+	while (head) {
+		next = head->next;
+		dst_free(head);
+		head = next;
 	}
 }
 
-static int unused_bundle(struct dst_entry *dst)
-{
-	return !atomic_read(&dst->__refcnt);
-}
-
-static void __xfrm_garbage_collect(struct net *net)
-{
-	xfrm_prune_bundles(net, unused_bundle);
-}
-
-static int xfrm_flush_bundles(struct net *net)
-{
-	xfrm_prune_bundles(net, stale_bundle);
-	return 0;
-}
-
 static void xfrm_init_pmtu(struct dst_entry *dst)
 {
 	do {
@@ -2283,7 +2326,9 @@ int xfrm_bundle_ok(struct xfrm_policy *pol, struct xfrm_dst *first,
 			return 0;
 		if (dst->xfrm->km.state != XFRM_STATE_VALID)
 			return 0;
-		if (xdst->genid != dst->xfrm->genid)
+		if (xdst->xfrm_genid != dst->xfrm->genid)
+			return 0;
+		if (xdst->policy_genid != atomic_read(&xdst->pols[0]->genid))
 			return 0;
 
 		if (strict && fl &&
@@ -2448,7 +2493,7 @@ static int xfrm_dev_event(struct notifier_block *this, unsigned long event, void
 
 	switch (event) {
 	case NETDEV_DOWN:
-		xfrm_flush_bundles(dev_net(dev));
+		__xfrm_garbage_collect(dev_net(dev));
 	}
 	return NOTIFY_DONE;
 }
@@ -2780,7 +2825,6 @@ static int xfrm_policy_migrate(struct xfrm_policy *pol,
 			       struct xfrm_migrate *m, int num_migrate)
 {
 	struct xfrm_migrate *mp;
-	struct dst_entry *dst;
 	int i, j, n = 0;
 
 	write_lock_bh(&pol->lock);
@@ -2805,10 +2849,7 @@ static int xfrm_policy_migrate(struct xfrm_policy *pol,
 			       sizeof(pol->xfrm_vec[i].saddr));
 			pol->xfrm_vec[i].encap_family = mp->new_family;
 			/* flush bundles */
-			while ((dst = pol->bundles) != NULL) {
-				pol->bundles = dst->next;
-				dst_free(dst);
-			}
+			atomic_inc(&pol->genid);
 		}
 	}
 
-- 
cgit v1.2.2


From 285ead175c5dd5075cab5b6c94f35a3e6c0a3ae6 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Timo=20Ter=C3=A4s?= <timo.teras@iki.fi>
Date: Wed, 7 Apr 2010 00:30:06 +0000
Subject: xfrm: remove policy garbage collection

Policies are now properly reference counted and destroyed from
all code paths. The delayed gc is just an overhead now and can
be removed.

Signed-off-by: Timo Teras <timo.teras@iki.fi>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/xfrm/xfrm_policy.c | 39 +++++----------------------------------
 1 file changed, 5 insertions(+), 34 deletions(-)

(limited to 'net')

diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index 06ccc71c871f..7430ac26ec49 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -46,9 +46,6 @@ static struct xfrm_policy_afinfo *xfrm_policy_afinfo[NPROTO];
 
 static struct kmem_cache *xfrm_dst_cache __read_mostly;
 
-static HLIST_HEAD(xfrm_policy_gc_list);
-static DEFINE_SPINLOCK(xfrm_policy_gc_lock);
-
 static struct xfrm_policy_afinfo *xfrm_policy_get_afinfo(unsigned short family);
 static void xfrm_policy_put_afinfo(struct xfrm_policy_afinfo *afinfo);
 static void xfrm_init_pmtu(struct dst_entry *dst);
@@ -288,32 +285,6 @@ void xfrm_policy_destroy(struct xfrm_policy *policy)
 }
 EXPORT_SYMBOL(xfrm_policy_destroy);
 
-static void xfrm_policy_gc_kill(struct xfrm_policy *policy)
-{
-	atomic_inc(&policy->genid);
-
-	if (del_timer(&policy->timer))
-		atomic_dec(&policy->refcnt);
-
-	xfrm_pol_put(policy);
-}
-
-static void xfrm_policy_gc_task(struct work_struct *work)
-{
-	struct xfrm_policy *policy;
-	struct hlist_node *entry, *tmp;
-	struct hlist_head gc_list;
-
-	spin_lock_bh(&xfrm_policy_gc_lock);
-	gc_list.first = xfrm_policy_gc_list.first;
-	INIT_HLIST_HEAD(&xfrm_policy_gc_list);
-	spin_unlock_bh(&xfrm_policy_gc_lock);
-
-	hlist_for_each_entry_safe(policy, entry, tmp, &gc_list, bydst)
-		xfrm_policy_gc_kill(policy);
-}
-static DECLARE_WORK(xfrm_policy_gc_work, xfrm_policy_gc_task);
-
 /* Rule must be locked. Release descentant resources, announce
  * entry dead. The rule must be unlinked from lists to the moment.
  */
@@ -322,11 +293,12 @@ static void xfrm_policy_kill(struct xfrm_policy *policy)
 {
 	policy->walk.dead = 1;
 
-	spin_lock_bh(&xfrm_policy_gc_lock);
-	hlist_add_head(&policy->bydst, &xfrm_policy_gc_list);
-	spin_unlock_bh(&xfrm_policy_gc_lock);
+	atomic_inc(&policy->genid);
 
-	schedule_work(&xfrm_policy_gc_work);
+	if (del_timer(&policy->timer))
+		xfrm_pol_put(policy);
+
+	xfrm_pol_put(policy);
 }
 
 static unsigned int xfrm_policy_hashmax __read_mostly = 1 * 1024 * 1024;
@@ -2599,7 +2571,6 @@ static void xfrm_policy_fini(struct net *net)
 	audit_info.sessionid = -1;
 	audit_info.secid = 0;
 	xfrm_policy_flush(net, XFRM_POLICY_TYPE_MAIN, &audit_info);
-	flush_work(&xfrm_policy_gc_work);
 
 	WARN_ON(!list_empty(&net->xfrm.policy_all));
 
-- 
cgit v1.2.2


From 8e4795605d1e1b39113818ad7c147b8a867a1f6a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Timo=20Ter=C3=A4s?= <timo.teras@iki.fi>
Date: Wed, 7 Apr 2010 00:30:07 +0000
Subject: flow: delayed deletion of flow cache entries

Speed up lookups by freeing flow cache entries later. After
virtualizing flow cache entry operations, the flow cache may now
end up calling policy or bundle destructor which can be slowish.

As gc_list is more effective with double linked list, the flow cache
is converted to use common hlist and list macroes where appropriate.

Signed-off-by: Timo Teras <timo.teras@iki.fi>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/flow.c | 100 ++++++++++++++++++++++++++++++++++++++------------------
 1 file changed, 69 insertions(+), 31 deletions(-)

(limited to 'net')

diff --git a/net/core/flow.c b/net/core/flow.c
index 521df52a77d2..161900674009 100644
--- a/net/core/flow.c
+++ b/net/core/flow.c
@@ -26,7 +26,10 @@
 #include <linux/security.h>
 
 struct flow_cache_entry {
-	struct flow_cache_entry		*next;
+	union {
+		struct hlist_node	hlist;
+		struct list_head	gc_list;
+	} u;
 	u16				family;
 	u8				dir;
 	u32				genid;
@@ -35,7 +38,7 @@ struct flow_cache_entry {
 };
 
 struct flow_cache_percpu {
-	struct flow_cache_entry		**hash_table;
+	struct hlist_head		*hash_table;
 	int				hash_count;
 	u32				hash_rnd;
 	int				hash_rnd_recalc;
@@ -62,6 +65,9 @@ atomic_t flow_cache_genid = ATOMIC_INIT(0);
 static struct flow_cache flow_cache_global;
 static struct kmem_cache *flow_cachep;
 
+static DEFINE_SPINLOCK(flow_cache_gc_lock);
+static LIST_HEAD(flow_cache_gc_list);
+
 #define flow_cache_hash_size(cache)	(1 << (cache)->hash_shift)
 #define FLOW_HASH_RND_PERIOD		(10 * 60 * HZ)
 
@@ -86,38 +92,66 @@ static int flow_entry_valid(struct flow_cache_entry *fle)
 	return 1;
 }
 
-static void flow_entry_kill(struct flow_cache *fc,
-			    struct flow_cache_percpu *fcp,
-			    struct flow_cache_entry *fle)
+static void flow_entry_kill(struct flow_cache_entry *fle)
 {
 	if (fle->object)
 		fle->object->ops->delete(fle->object);
 	kmem_cache_free(flow_cachep, fle);
-	fcp->hash_count--;
+}
+
+static void flow_cache_gc_task(struct work_struct *work)
+{
+	struct list_head gc_list;
+	struct flow_cache_entry *fce, *n;
+
+	INIT_LIST_HEAD(&gc_list);
+	spin_lock_bh(&flow_cache_gc_lock);
+	list_splice_tail_init(&flow_cache_gc_list, &gc_list);
+	spin_unlock_bh(&flow_cache_gc_lock);
+
+	list_for_each_entry_safe(fce, n, &gc_list, u.gc_list)
+		flow_entry_kill(fce);
+}
+static DECLARE_WORK(flow_cache_gc_work, flow_cache_gc_task);
+
+static void flow_cache_queue_garbage(struct flow_cache_percpu *fcp,
+				     int deleted, struct list_head *gc_list)
+{
+	if (deleted) {
+		fcp->hash_count -= deleted;
+		spin_lock_bh(&flow_cache_gc_lock);
+		list_splice_tail(gc_list, &flow_cache_gc_list);
+		spin_unlock_bh(&flow_cache_gc_lock);
+		schedule_work(&flow_cache_gc_work);
+	}
 }
 
 static void __flow_cache_shrink(struct flow_cache *fc,
 				struct flow_cache_percpu *fcp,
 				int shrink_to)
 {
-	struct flow_cache_entry *fle, **flp;
-	int i;
+	struct flow_cache_entry *fle;
+	struct hlist_node *entry, *tmp;
+	LIST_HEAD(gc_list);
+	int i, deleted = 0;
 
 	for (i = 0; i < flow_cache_hash_size(fc); i++) {
 		int saved = 0;
 
-		flp = &fcp->hash_table[i];
-		while ((fle = *flp) != NULL) {
+		hlist_for_each_entry_safe(fle, entry, tmp,
+					  &fcp->hash_table[i], u.hlist) {
 			if (saved < shrink_to &&
 			    flow_entry_valid(fle)) {
 				saved++;
-				flp = &fle->next;
 			} else {
-				*flp = fle->next;
-				flow_entry_kill(fc, fcp, fle);
+				deleted++;
+				hlist_del(&fle->u.hlist);
+				list_add_tail(&fle->u.gc_list, &gc_list);
 			}
 		}
 	}
+
+	flow_cache_queue_garbage(fcp, deleted, &gc_list);
 }
 
 static void flow_cache_shrink(struct flow_cache *fc,
@@ -182,7 +216,8 @@ flow_cache_lookup(struct net *net, struct flowi *key, u16 family, u8 dir,
 {
 	struct flow_cache *fc = &flow_cache_global;
 	struct flow_cache_percpu *fcp;
-	struct flow_cache_entry *fle, **head;
+	struct flow_cache_entry *fle, *tfle;
+	struct hlist_node *entry;
 	struct flow_cache_object *flo;
 	unsigned int hash;
 
@@ -200,12 +235,13 @@ flow_cache_lookup(struct net *net, struct flowi *key, u16 family, u8 dir,
 		flow_new_hash_rnd(fc, fcp);
 
 	hash = flow_hash_code(fc, fcp, key);
-	head = &fcp->hash_table[hash];
-	for (fle = *head; fle; fle = fle->next) {
-		if (fle->family == family &&
-		    fle->dir == dir &&
-		    flow_key_compare(key, &fle->key) == 0)
+	hlist_for_each_entry(tfle, entry, &fcp->hash_table[hash], u.hlist) {
+		if (tfle->family == family &&
+		    tfle->dir == dir &&
+		    flow_key_compare(key, &tfle->key) == 0) {
+			fle = tfle;
 			break;
+		}
 	}
 
 	if (unlikely(!fle)) {
@@ -214,12 +250,11 @@ flow_cache_lookup(struct net *net, struct flowi *key, u16 family, u8 dir,
 
 		fle = kmem_cache_alloc(flow_cachep, GFP_ATOMIC);
 		if (fle) {
-			fle->next = *head;
-			*head = fle;
 			fle->family = family;
 			fle->dir = dir;
 			memcpy(&fle->key, key, sizeof(*key));
 			fle->object = NULL;
+			hlist_add_head(&fle->u.hlist, &fcp->hash_table[hash]);
 			fcp->hash_count++;
 		}
 	} else if (likely(fle->genid == atomic_read(&flow_cache_genid))) {
@@ -262,23 +297,26 @@ static void flow_cache_flush_tasklet(unsigned long data)
 	struct flow_flush_info *info = (void *)data;
 	struct flow_cache *fc = info->cache;
 	struct flow_cache_percpu *fcp;
-	int i;
+	struct flow_cache_entry *fle;
+	struct hlist_node *entry, *tmp;
+	LIST_HEAD(gc_list);
+	int i, deleted = 0;
 
 	fcp = per_cpu_ptr(fc->percpu, smp_processor_id());
 	for (i = 0; i < flow_cache_hash_size(fc); i++) {
-		struct flow_cache_entry *fle;
-
-		fle = fcp->hash_table[i];
-		for (; fle; fle = fle->next) {
+		hlist_for_each_entry_safe(fle, entry, tmp,
+					  &fcp->hash_table[i], u.hlist) {
 			if (flow_entry_valid(fle))
 				continue;
 
-			if (fle->object)
-				fle->object->ops->delete(fle->object);
-			fle->object = NULL;
+			deleted++;
+			hlist_del(&fle->u.hlist);
+			list_add_tail(&fle->u.gc_list, &gc_list);
 		}
 	}
 
+	flow_cache_queue_garbage(fcp, deleted, &gc_list);
+
 	if (atomic_dec_and_test(&info->cpuleft))
 		complete(&info->completion);
 }
@@ -320,7 +358,7 @@ void flow_cache_flush(void)
 static void __init flow_cache_cpu_prepare(struct flow_cache *fc,
 					  struct flow_cache_percpu *fcp)
 {
-	fcp->hash_table = (struct flow_cache_entry **)
+	fcp->hash_table = (struct hlist_head *)
 		__get_free_pages(GFP_KERNEL|__GFP_ZERO, fc->order);
 	if (!fcp->hash_table)
 		panic("NET: failed to allocate flow cache order %lu\n", fc->order);
@@ -354,7 +392,7 @@ static int flow_cache_init(struct flow_cache *fc)
 
 	for (order = 0;
 	     (PAGE_SIZE << order) <
-		     (sizeof(struct flow_cache_entry *)*flow_cache_hash_size(fc));
+		     (sizeof(struct hlist_head)*flow_cache_hash_size(fc));
 	     order++)
 		/* NOTHING */;
 	fc->order = order;
-- 
cgit v1.2.2


From d5cdfacb35ed886271d1ccfffbded98d3447da17 Mon Sep 17 00:00:00 2001
From: Jouni Malinen <j@w1.fi>
Date: Sun, 4 Apr 2010 09:37:19 +0300
Subject: cfg80211: Add local-state-change-only auth/deauth/disassoc

cfg80211 is quite strict on allowing authentication and association
commands only in certain states. In order to meet these requirements,
user space applications may need to clear authentication or
association state in some cases. Currently, this can be done with
deauth/disassoc command, but that ends up sending out Deauthentication
or Disassociation frame unnecessarily. Add a new nl80211 attribute to
allow this sending of the frame be skipped, but with all other
deauth/disassoc operations being completed.

Similar state change is also needed for IEEE 802.11r FT protocol in
the FT-over-DS case which does not use Authentication frame exchange
in a transition to another BSS. For this to work with cfg80211, an
authentication entry needs to be created for the target BSS without
sending out an Authentication frame. The nl80211 authentication
command can be used for this purpose, too, with the new attribute to
indicate that the command is only for changing local state. This
enables wpa_supplicant to complete FT-over-DS transition successfully.

Signed-off-by: Jouni Malinen <j@w1.fi>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/mlme.c    | 23 +++++++++++++++--------
 net/wireless/core.h    | 15 ++++++++++-----
 net/wireless/mlme.c    | 39 ++++++++++++++++++++++++++++-----------
 net/wireless/nl80211.c | 19 ++++++++++++++++---
 net/wireless/sme.c     | 15 +++++++++------
 5 files changed, 78 insertions(+), 33 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index 57a3c62139e2..4c189d0be4a3 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -210,7 +210,7 @@ static u32 ieee80211_enable_ht(struct ieee80211_sub_if_data *sdata,
 
 static void ieee80211_send_deauth_disassoc(struct ieee80211_sub_if_data *sdata,
 					   const u8 *bssid, u16 stype, u16 reason,
-					   void *cookie)
+					   void *cookie, bool send_frame)
 {
 	struct ieee80211_local *local = sdata->local;
 	struct ieee80211_if_managed *ifmgd = &sdata->u.mgd;
@@ -247,7 +247,11 @@ static void ieee80211_send_deauth_disassoc(struct ieee80211_sub_if_data *sdata,
 			cfg80211_send_disassoc(sdata->dev, (u8 *)mgmt, skb->len);
 	if (!(ifmgd->flags & IEEE80211_STA_MFP_ENABLED))
 		IEEE80211_SKB_CB(skb)->flags |= IEEE80211_TX_INTFL_DONT_ENCRYPT;
-	ieee80211_tx_skb(sdata, skb);
+
+	if (send_frame)
+		ieee80211_tx_skb(sdata, skb);
+	else
+		kfree_skb(skb);
 }
 
 void ieee80211_send_pspoll(struct ieee80211_local *local,
@@ -980,7 +984,7 @@ static void __ieee80211_connection_loss(struct ieee80211_sub_if_data *sdata)
 	ieee80211_send_deauth_disassoc(sdata, bssid,
 				       IEEE80211_STYPE_DEAUTH,
 				       WLAN_REASON_DISASSOC_DUE_TO_INACTIVITY,
-				       NULL);
+				       NULL, true);
 }
 
 void ieee80211_beacon_connection_loss_work(struct work_struct *work)
@@ -1724,7 +1728,7 @@ static void ieee80211_sta_work(struct work_struct *work)
 			ieee80211_send_deauth_disassoc(sdata, bssid,
 					IEEE80211_STYPE_DEAUTH,
 					WLAN_REASON_DISASSOC_DUE_TO_INACTIVITY,
-					NULL);
+					NULL, true);
 			mutex_lock(&ifmgd->mtx);
 		}
 	}
@@ -1908,6 +1912,9 @@ int ieee80211_mgd_auth(struct ieee80211_sub_if_data *sdata,
 	struct ieee80211_work *wk;
 	u16 auth_alg;
 
+	if (req->local_state_change)
+		return 0; /* no need to update mac80211 state */
+
 	switch (req->auth_type) {
 	case NL80211_AUTHTYPE_OPEN_SYSTEM:
 		auth_alg = WLAN_AUTH_OPEN;
@@ -2163,9 +2170,9 @@ int ieee80211_mgd_deauth(struct ieee80211_sub_if_data *sdata,
 	printk(KERN_DEBUG "%s: deauthenticating from %pM by local choice (reason=%d)\n",
 	       sdata->name, bssid, req->reason_code);
 
-	ieee80211_send_deauth_disassoc(sdata, bssid,
-			IEEE80211_STYPE_DEAUTH, req->reason_code,
-			cookie);
+	ieee80211_send_deauth_disassoc(sdata, bssid, IEEE80211_STYPE_DEAUTH,
+				       req->reason_code, cookie,
+				       !req->local_state_change);
 
 	ieee80211_recalc_idle(sdata->local);
 
@@ -2202,7 +2209,7 @@ int ieee80211_mgd_disassoc(struct ieee80211_sub_if_data *sdata,
 
 	ieee80211_send_deauth_disassoc(sdata, req->bss->bssid,
 			IEEE80211_STYPE_DISASSOC, req->reason_code,
-			cookie);
+			cookie, !req->local_state_change);
 	sta_info_destroy_addr(sdata, bssid);
 
 	ieee80211_recalc_idle(sdata->local);
diff --git a/net/wireless/core.h b/net/wireless/core.h
index d52da913145a..b2234b436ead 100644
--- a/net/wireless/core.h
+++ b/net/wireless/core.h
@@ -293,13 +293,15 @@ int __cfg80211_mlme_auth(struct cfg80211_registered_device *rdev,
 			 const u8 *bssid,
 			 const u8 *ssid, int ssid_len,
 			 const u8 *ie, int ie_len,
-			 const u8 *key, int key_len, int key_idx);
+			 const u8 *key, int key_len, int key_idx,
+			 bool local_state_change);
 int cfg80211_mlme_auth(struct cfg80211_registered_device *rdev,
 		       struct net_device *dev, struct ieee80211_channel *chan,
 		       enum nl80211_auth_type auth_type, const u8 *bssid,
 		       const u8 *ssid, int ssid_len,
 		       const u8 *ie, int ie_len,
-		       const u8 *key, int key_len, int key_idx);
+		       const u8 *key, int key_len, int key_idx,
+		       bool local_state_change);
 int __cfg80211_mlme_assoc(struct cfg80211_registered_device *rdev,
 			  struct net_device *dev,
 			  struct ieee80211_channel *chan,
@@ -315,13 +317,16 @@ int cfg80211_mlme_assoc(struct cfg80211_registered_device *rdev,
 			struct cfg80211_crypto_settings *crypt);
 int __cfg80211_mlme_deauth(struct cfg80211_registered_device *rdev,
 			   struct net_device *dev, const u8 *bssid,
-			   const u8 *ie, int ie_len, u16 reason);
+			   const u8 *ie, int ie_len, u16 reason,
+			   bool local_state_change);
 int cfg80211_mlme_deauth(struct cfg80211_registered_device *rdev,
 			 struct net_device *dev, const u8 *bssid,
-			 const u8 *ie, int ie_len, u16 reason);
+			 const u8 *ie, int ie_len, u16 reason,
+			 bool local_state_change);
 int cfg80211_mlme_disassoc(struct cfg80211_registered_device *rdev,
 			   struct net_device *dev, const u8 *bssid,
-			   const u8 *ie, int ie_len, u16 reason);
+			   const u8 *ie, int ie_len, u16 reason,
+			   bool local_state_change);
 void cfg80211_mlme_down(struct cfg80211_registered_device *rdev,
 			struct net_device *dev);
 void __cfg80211_connect_result(struct net_device *dev, const u8 *bssid,
diff --git a/net/wireless/mlme.c b/net/wireless/mlme.c
index 0855f0d32349..387dd2a27d2f 100644
--- a/net/wireless/mlme.c
+++ b/net/wireless/mlme.c
@@ -377,7 +377,8 @@ int __cfg80211_mlme_auth(struct cfg80211_registered_device *rdev,
 			 const u8 *bssid,
 			 const u8 *ssid, int ssid_len,
 			 const u8 *ie, int ie_len,
-			 const u8 *key, int key_len, int key_idx)
+			 const u8 *key, int key_len, int key_idx,
+			 bool local_state_change)
 {
 	struct wireless_dev *wdev = dev->ieee80211_ptr;
 	struct cfg80211_auth_request req;
@@ -407,6 +408,7 @@ int __cfg80211_mlme_auth(struct cfg80211_registered_device *rdev,
 
 	memset(&req, 0, sizeof(req));
 
+	req.local_state_change = local_state_change;
 	req.ie = ie;
 	req.ie_len = ie_len;
 	req.auth_type = auth_type;
@@ -433,12 +435,18 @@ int __cfg80211_mlme_auth(struct cfg80211_registered_device *rdev,
 		goto out;
 	}
 
-	wdev->authtry_bsses[slot] = bss;
+	if (local_state_change)
+		wdev->auth_bsses[slot] = bss;
+	else
+		wdev->authtry_bsses[slot] = bss;
 	cfg80211_hold_bss(bss);
 
 	err = rdev->ops->auth(&rdev->wiphy, dev, &req);
 	if (err) {
-		wdev->authtry_bsses[slot] = NULL;
+		if (local_state_change)
+			wdev->auth_bsses[slot] = NULL;
+		else
+			wdev->authtry_bsses[slot] = NULL;
 		cfg80211_unhold_bss(bss);
 	}
 
@@ -453,14 +461,15 @@ int cfg80211_mlme_auth(struct cfg80211_registered_device *rdev,
 		       enum nl80211_auth_type auth_type, const u8 *bssid,
 		       const u8 *ssid, int ssid_len,
 		       const u8 *ie, int ie_len,
-		       const u8 *key, int key_len, int key_idx)
+		       const u8 *key, int key_len, int key_idx,
+		       bool local_state_change)
 {
 	int err;
 
 	wdev_lock(dev->ieee80211_ptr);
 	err = __cfg80211_mlme_auth(rdev, dev, chan, auth_type, bssid,
 				   ssid, ssid_len, ie, ie_len,
-				   key, key_len, key_idx);
+				   key, key_len, key_idx, local_state_change);
 	wdev_unlock(dev->ieee80211_ptr);
 
 	return err;
@@ -554,7 +563,8 @@ int cfg80211_mlme_assoc(struct cfg80211_registered_device *rdev,
 
 int __cfg80211_mlme_deauth(struct cfg80211_registered_device *rdev,
 			   struct net_device *dev, const u8 *bssid,
-			   const u8 *ie, int ie_len, u16 reason)
+			   const u8 *ie, int ie_len, u16 reason,
+			   bool local_state_change)
 {
 	struct wireless_dev *wdev = dev->ieee80211_ptr;
 	struct cfg80211_deauth_request req;
@@ -564,6 +574,7 @@ int __cfg80211_mlme_deauth(struct cfg80211_registered_device *rdev,
 
 	memset(&req, 0, sizeof(req));
 	req.reason_code = reason;
+	req.local_state_change = local_state_change;
 	req.ie = ie;
 	req.ie_len = ie_len;
 	if (wdev->current_bss &&
@@ -590,13 +601,15 @@ int __cfg80211_mlme_deauth(struct cfg80211_registered_device *rdev,
 
 int cfg80211_mlme_deauth(struct cfg80211_registered_device *rdev,
 			 struct net_device *dev, const u8 *bssid,
-			 const u8 *ie, int ie_len, u16 reason)
+			 const u8 *ie, int ie_len, u16 reason,
+			 bool local_state_change)
 {
 	struct wireless_dev *wdev = dev->ieee80211_ptr;
 	int err;
 
 	wdev_lock(wdev);
-	err = __cfg80211_mlme_deauth(rdev, dev, bssid, ie, ie_len, reason);
+	err = __cfg80211_mlme_deauth(rdev, dev, bssid, ie, ie_len, reason,
+				     local_state_change);
 	wdev_unlock(wdev);
 
 	return err;
@@ -604,7 +617,8 @@ int cfg80211_mlme_deauth(struct cfg80211_registered_device *rdev,
 
 static int __cfg80211_mlme_disassoc(struct cfg80211_registered_device *rdev,
 				    struct net_device *dev, const u8 *bssid,
-				    const u8 *ie, int ie_len, u16 reason)
+				    const u8 *ie, int ie_len, u16 reason,
+				    bool local_state_change)
 {
 	struct wireless_dev *wdev = dev->ieee80211_ptr;
 	struct cfg80211_disassoc_request req;
@@ -619,6 +633,7 @@ static int __cfg80211_mlme_disassoc(struct cfg80211_registered_device *rdev,
 
 	memset(&req, 0, sizeof(req));
 	req.reason_code = reason;
+	req.local_state_change = local_state_change;
 	req.ie = ie;
 	req.ie_len = ie_len;
 	if (memcmp(wdev->current_bss->pub.bssid, bssid, ETH_ALEN) == 0)
@@ -631,13 +646,15 @@ static int __cfg80211_mlme_disassoc(struct cfg80211_registered_device *rdev,
 
 int cfg80211_mlme_disassoc(struct cfg80211_registered_device *rdev,
 			   struct net_device *dev, const u8 *bssid,
-			   const u8 *ie, int ie_len, u16 reason)
+			   const u8 *ie, int ie_len, u16 reason,
+			   bool local_state_change)
 {
 	struct wireless_dev *wdev = dev->ieee80211_ptr;
 	int err;
 
 	wdev_lock(wdev);
-	err = __cfg80211_mlme_disassoc(rdev, dev, bssid, ie, ie_len, reason);
+	err = __cfg80211_mlme_disassoc(rdev, dev, bssid, ie, ie_len, reason,
+				       local_state_change);
 	wdev_unlock(wdev);
 
 	return err;
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index 95149f303409..df5505b3930c 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -150,6 +150,7 @@ static const struct nla_policy nl80211_policy[NL80211_ATTR_MAX+1] = {
 	[NL80211_ATTR_FRAME_MATCH] = { .type = NLA_BINARY, },
 	[NL80211_ATTR_PS_STATE] = { .type = NLA_U32 },
 	[NL80211_ATTR_CQM] = { .type = NLA_NESTED, },
+	[NL80211_ATTR_LOCAL_STATE_CHANGE] = { .type = NLA_FLAG },
 };
 
 /* policy for the attributes */
@@ -3393,6 +3394,7 @@ static int nl80211_authenticate(struct sk_buff *skb, struct genl_info *info)
 	int err, ssid_len, ie_len = 0;
 	enum nl80211_auth_type auth_type;
 	struct key_parse key;
+	bool local_state_change;
 
 	if (!is_valid_ie_attr(info->attrs[NL80211_ATTR_IE]))
 		return -EINVAL;
@@ -3471,9 +3473,12 @@ static int nl80211_authenticate(struct sk_buff *skb, struct genl_info *info)
 		goto out;
 	}
 
+	local_state_change = !!info->attrs[NL80211_ATTR_LOCAL_STATE_CHANGE];
+
 	err = cfg80211_mlme_auth(rdev, dev, chan, auth_type, bssid,
 				 ssid, ssid_len, ie, ie_len,
-				 key.p.key, key.p.key_len, key.idx);
+				 key.p.key, key.p.key_len, key.idx,
+				 local_state_change);
 
 out:
 	cfg80211_unlock_rdev(rdev);
@@ -3650,6 +3655,7 @@ static int nl80211_deauthenticate(struct sk_buff *skb, struct genl_info *info)
 	const u8 *ie = NULL, *bssid;
 	int err, ie_len = 0;
 	u16 reason_code;
+	bool local_state_change;
 
 	if (!is_valid_ie_attr(info->attrs[NL80211_ATTR_IE]))
 		return -EINVAL;
@@ -3695,7 +3701,10 @@ static int nl80211_deauthenticate(struct sk_buff *skb, struct genl_info *info)
 		ie_len = nla_len(info->attrs[NL80211_ATTR_IE]);
 	}
 
-	err = cfg80211_mlme_deauth(rdev, dev, bssid, ie, ie_len, reason_code);
+	local_state_change = !!info->attrs[NL80211_ATTR_LOCAL_STATE_CHANGE];
+
+	err = cfg80211_mlme_deauth(rdev, dev, bssid, ie, ie_len, reason_code,
+				   local_state_change);
 
 out:
 	cfg80211_unlock_rdev(rdev);
@@ -3712,6 +3721,7 @@ static int nl80211_disassociate(struct sk_buff *skb, struct genl_info *info)
 	const u8 *ie = NULL, *bssid;
 	int err, ie_len = 0;
 	u16 reason_code;
+	bool local_state_change;
 
 	if (!is_valid_ie_attr(info->attrs[NL80211_ATTR_IE]))
 		return -EINVAL;
@@ -3757,7 +3767,10 @@ static int nl80211_disassociate(struct sk_buff *skb, struct genl_info *info)
 		ie_len = nla_len(info->attrs[NL80211_ATTR_IE]);
 	}
 
-	err = cfg80211_mlme_disassoc(rdev, dev, bssid, ie, ie_len, reason_code);
+	local_state_change = !!info->attrs[NL80211_ATTR_LOCAL_STATE_CHANGE];
+
+	err = cfg80211_mlme_disassoc(rdev, dev, bssid, ie, ie_len, reason_code,
+				     local_state_change);
 
 out:
 	cfg80211_unlock_rdev(rdev);
diff --git a/net/wireless/sme.c b/net/wireless/sme.c
index 17fde0da1b08..17465777eb47 100644
--- a/net/wireless/sme.c
+++ b/net/wireless/sme.c
@@ -170,7 +170,7 @@ static int cfg80211_conn_do_work(struct wireless_dev *wdev)
 					    params->ssid, params->ssid_len,
 					    NULL, 0,
 					    params->key, params->key_len,
-					    params->key_idx);
+					    params->key_idx, false);
 	case CFG80211_CONN_ASSOCIATE_NEXT:
 		BUG_ON(!rdev->ops->assoc);
 		wdev->conn->state = CFG80211_CONN_ASSOCIATING;
@@ -185,12 +185,13 @@ static int cfg80211_conn_do_work(struct wireless_dev *wdev)
 		if (err)
 			__cfg80211_mlme_deauth(rdev, wdev->netdev, params->bssid,
 					       NULL, 0,
-					       WLAN_REASON_DEAUTH_LEAVING);
+					       WLAN_REASON_DEAUTH_LEAVING,
+					       false);
 		return err;
 	case CFG80211_CONN_DEAUTH_ASSOC_FAIL:
 		__cfg80211_mlme_deauth(rdev, wdev->netdev, params->bssid,
 				       NULL, 0,
-				       WLAN_REASON_DEAUTH_LEAVING);
+				       WLAN_REASON_DEAUTH_LEAVING, false);
 		/* return an error so that we call __cfg80211_connect_result() */
 		return -EINVAL;
 	default:
@@ -675,7 +676,8 @@ void __cfg80211_disconnected(struct net_device *dev, const u8 *ie,
 				continue;
 			bssid = wdev->auth_bsses[i]->pub.bssid;
 			ret = __cfg80211_mlme_deauth(rdev, dev, bssid, NULL, 0,
-						WLAN_REASON_DEAUTH_LEAVING);
+						WLAN_REASON_DEAUTH_LEAVING,
+						false);
 			WARN(ret, "deauth failed: %d\n", ret);
 		}
 	}
@@ -934,7 +936,7 @@ int __cfg80211_disconnect(struct cfg80211_registered_device *rdev,
 		/* wdev->conn->params.bssid must be set if > SCANNING */
 		err = __cfg80211_mlme_deauth(rdev, dev,
 					     wdev->conn->params.bssid,
-					     NULL, 0, reason);
+					     NULL, 0, reason, false);
 		if (err)
 			return err;
 	} else {
@@ -990,7 +992,8 @@ void cfg80211_sme_disassoc(struct net_device *dev, int idx)
 
 	memcpy(bssid, wdev->auth_bsses[idx]->pub.bssid, ETH_ALEN);
 	if (__cfg80211_mlme_deauth(rdev, dev, bssid,
-				   NULL, 0, WLAN_REASON_DEAUTH_LEAVING)) {
+				   NULL, 0, WLAN_REASON_DEAUTH_LEAVING,
+				   false)) {
 		/* whatever -- assume gone anyway */
 		cfg80211_unhold_bss(wdev->auth_bsses[idx]);
 		cfg80211_put_bss(&wdev->auth_bsses[idx]->pub);
-- 
cgit v1.2.2


From e64b379574d6c92c15b4239ee0a5173317176547 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes@sipsolutions.net>
Date: Tue, 6 Apr 2010 11:18:43 +0200
Subject: mac80211: fix station destruction problem

When a station w/o a key is destroyed, or when
a driver submits work for a station and thereby
references it again, it seems like potentially
we could reference the station structure while
it is being destroyed.

Wait for an RCU grace period to elapse before
finishing destroying the station after we have
removed the station from the driver and from
the hash table etc., even in the case where no
key is associated with the station.

Also, there's no point in deleting the plink
timer here since it'll be properly deleted just
a bit later.

Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/sta_info.c | 15 +++++++++------
 1 file changed, 9 insertions(+), 6 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c
index 211c475f73c6..bd11753c1525 100644
--- a/net/mac80211/sta_info.c
+++ b/net/mac80211/sta_info.c
@@ -632,9 +632,6 @@ static int __must_check __sta_info_destroy(struct sta_info *sta)
 		 * may mean it is removed from hardware which requires that
 		 * the key->sta pointer is still valid, so flush the key todo
 		 * list here.
-		 *
-		 * ieee80211_key_todo() will synchronize_rcu() so after this
-		 * nothing can reference this sta struct any more.
 		 */
 		ieee80211_key_todo();
 
@@ -666,11 +663,17 @@ static int __must_check __sta_info_destroy(struct sta_info *sta)
 		sdata = sta->sdata;
 	}
 
+	/*
+	 * At this point, after we wait for an RCU grace period,
+	 * neither mac80211 nor the driver can reference this
+	 * sta struct any more except by still existing timers
+	 * associated with this station that we clean up below.
+	 */
+	synchronize_rcu();
+
 #ifdef CONFIG_MAC80211_MESH
-	if (ieee80211_vif_is_mesh(&sdata->vif)) {
+	if (ieee80211_vif_is_mesh(&sdata->vif))
 		mesh_accept_plinks_update(sdata);
-		del_timer(&sta->plink_timer);
-	}
 #endif
 
 #ifdef CONFIG_MAC80211_VERBOSE_DEBUG
-- 
cgit v1.2.2


From 2b43ae6daf26f29cec49fa3a3f18025355495500 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes@sipsolutions.net>
Date: Tue, 6 Apr 2010 11:18:44 +0200
Subject: mac80211: remove irq disabling for sta lock

All other places except one in the TX path, which
has BHs disabled, and it also cannot be locked from
interrupts so disabling IRQs is not necessary.

Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/tx.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
index 350096afe79a..f7209d691c35 100644
--- a/net/mac80211/tx.c
+++ b/net/mac80211/tx.c
@@ -1144,13 +1144,12 @@ ieee80211_tx_prepare(struct ieee80211_sub_if_data *sdata,
 
 	if (tx->sta && ieee80211_is_data_qos(hdr->frame_control) &&
 	    (local->hw.flags & IEEE80211_HW_AMPDU_AGGREGATION)) {
-		unsigned long flags;
 		struct tid_ampdu_tx *tid_tx;
 
 		qc = ieee80211_get_qos_ctl(hdr);
 		tid = *qc & IEEE80211_QOS_CTL_TID_MASK;
 
-		spin_lock_irqsave(&tx->sta->lock, flags);
+		spin_lock(&tx->sta->lock);
 		/*
 		 * XXX: This spinlock could be fairly expensive, but see the
 		 *	comment in agg-tx.c:ieee80211_agg_tx_operational().
@@ -1175,7 +1174,7 @@ ieee80211_tx_prepare(struct ieee80211_sub_if_data *sdata,
 			info->flags |= IEEE80211_TX_INTFL_NEED_TXPROCESSING;
 			__skb_queue_tail(&tid_tx->pending, skb);
 		}
-		spin_unlock_irqrestore(&tx->sta->lock, flags);
+		spin_unlock(&tx->sta->lock);
 
 		if (unlikely(queued))
 			return TX_QUEUED;
-- 
cgit v1.2.2


From 66b0470aeef10a3b0f9a6a1c60d908b5a06c62ae Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes@sipsolutions.net>
Date: Tue, 6 Apr 2010 11:18:45 +0200
Subject: mac80211: remove ieee80211_sta_stop_rx_ba_session

All callers of ieee80211_sta_stop_rx_ba_session can
just call __ieee80211_stop_rx_ba_session instead
because they already have the station struct, so do
that and remove ieee80211_sta_stop_rx_ba_session.

Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/agg-rx.c      | 24 ++----------------------
 net/mac80211/ht.c          |  3 +--
 net/mac80211/ieee80211_i.h |  2 --
 net/mac80211/rx.c          |  4 ++--
 4 files changed, 5 insertions(+), 28 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/agg-rx.c b/net/mac80211/agg-rx.c
index a978e666ed6f..cb9f80a94002 100644
--- a/net/mac80211/agg-rx.c
+++ b/net/mac80211/agg-rx.c
@@ -79,28 +79,9 @@ void __ieee80211_stop_rx_ba_session(struct sta_info *sta, u16 tid,
 	spin_unlock_bh(&sta->lock);
 }
 
-void ieee80211_sta_stop_rx_ba_session(struct ieee80211_sub_if_data *sdata, u8 *ra, u16 tid,
-					u16 initiator, u16 reason)
-{
-	struct sta_info *sta;
-
-	rcu_read_lock();
-
-	sta = sta_info_get(sdata, ra);
-	if (!sta) {
-		rcu_read_unlock();
-		return;
-	}
-
-	__ieee80211_stop_rx_ba_session(sta, tid, initiator, reason);
-
-	rcu_read_unlock();
-}
-
 /*
  * After accepting the AddBA Request we activated a timer,
  * resetting it after each frame that arrives from the originator.
- * if this timer expires ieee80211_sta_stop_rx_ba_session will be executed.
  */
 static void sta_rx_agg_session_timer_expired(unsigned long data)
 {
@@ -116,9 +97,8 @@ static void sta_rx_agg_session_timer_expired(unsigned long data)
 #ifdef CONFIG_MAC80211_HT_DEBUG
 	printk(KERN_DEBUG "rx session timer expired on tid %d\n", (u16)*ptid);
 #endif
-	ieee80211_sta_stop_rx_ba_session(sta->sdata, sta->sta.addr,
-					 (u16)*ptid, WLAN_BACK_TIMER,
-					 WLAN_REASON_QSTA_TIMEOUT);
+	__ieee80211_stop_rx_ba_session(sta, *ptid, WLAN_BACK_RECIPIENT,
+				       WLAN_REASON_QSTA_TIMEOUT);
 }
 
 static void ieee80211_send_addba_resp(struct ieee80211_sub_if_data *sdata, u8 *da, u16 tid,
diff --git a/net/mac80211/ht.c b/net/mac80211/ht.c
index bb677a73b7c9..2ab106a0a491 100644
--- a/net/mac80211/ht.c
+++ b/net/mac80211/ht.c
@@ -175,8 +175,7 @@ void ieee80211_process_delba(struct ieee80211_sub_if_data *sdata,
 #endif /* CONFIG_MAC80211_HT_DEBUG */
 
 	if (initiator == WLAN_BACK_INITIATOR)
-		ieee80211_sta_stop_rx_ba_session(sdata, sta->sta.addr, tid,
-						 WLAN_BACK_INITIATOR, 0);
+		__ieee80211_stop_rx_ba_session(sta, tid, WLAN_BACK_INITIATOR, 0);
 	else { /* WLAN_BACK_RECIPIENT */
 		spin_lock_bh(&sta->lock);
 		if (sta->ampdu_mlme.tid_state_tx[tid] & HT_ADDBA_REQUESTED_MSK)
diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index 741fb8bbc4a0..4e73660ebe99 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -1098,8 +1098,6 @@ int ieee80211_send_smps_action(struct ieee80211_sub_if_data *sdata,
 			       enum ieee80211_smps_mode smps, const u8 *da,
 			       const u8 *bssid);
 
-void ieee80211_sta_stop_rx_ba_session(struct ieee80211_sub_if_data *sdata, u8 *da,
-				u16 tid, u16 initiator, u16 reason);
 void __ieee80211_stop_rx_ba_session(struct sta_info *sta, u16 tid,
 				    u16 initiator, u16 reason);
 void ieee80211_sta_tear_down_BA_sessions(struct sta_info *sta);
diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c
index b83d4db6ca6a..c02e43b50ac3 100644
--- a/net/mac80211/rx.c
+++ b/net/mac80211/rx.c
@@ -739,8 +739,8 @@ static void ieee80211_rx_reorder_ampdu(struct ieee80211_rx_data *rx,
 	/* if this mpdu is fragmented - terminate rx aggregation session */
 	sc = le16_to_cpu(hdr->seq_ctrl);
 	if (sc & IEEE80211_SCTL_FRAG) {
-		ieee80211_sta_stop_rx_ba_session(sta->sdata, sta->sta.addr,
-			tid, 0, WLAN_REASON_QSTA_REQUIRE_SETUP);
+		__ieee80211_stop_rx_ba_session(sta, tid, WLAN_BACK_RECIPIENT,
+					       WLAN_REASON_QSTA_REQUIRE_SETUP);
 		dev_kfree_skb(skb);
 		return;
 	}
-- 
cgit v1.2.2


From 618f356b95e37ca0c30b3b513898fda54abd52a6 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes@sipsolutions.net>
Date: Tue, 6 Apr 2010 11:18:46 +0200
Subject: mac80211: rename WLAN_STA_SUSPEND to WLAN_STA_BLOCK_BA

I want to use it during station destruction as well
so rename it to WLAN_STA_BLOCK_BA which is also the
only use of it now.

Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/agg-rx.c   | 2 +-
 net/mac80211/agg-tx.c   | 2 +-
 net/mac80211/pm.c       | 2 +-
 net/mac80211/sta_info.h | 6 +++---
 net/mac80211/util.c     | 2 +-
 5 files changed, 7 insertions(+), 7 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/agg-rx.c b/net/mac80211/agg-rx.c
index cb9f80a94002..7d87f446f030 100644
--- a/net/mac80211/agg-rx.c
+++ b/net/mac80211/agg-rx.c
@@ -173,7 +173,7 @@ void ieee80211_process_addba_request(struct ieee80211_local *local,
 
 	status = WLAN_STATUS_REQUEST_DECLINED;
 
-	if (test_sta_flags(sta, WLAN_STA_SUSPEND)) {
+	if (test_sta_flags(sta, WLAN_STA_BLOCK_BA)) {
 #ifdef CONFIG_MAC80211_HT_DEBUG
 		printk(KERN_DEBUG "Suspend in progress. "
 		       "Denying ADDBA request\n");
diff --git a/net/mac80211/agg-tx.c b/net/mac80211/agg-tx.c
index bb4ac70fc97a..32d2148b5b98 100644
--- a/net/mac80211/agg-tx.c
+++ b/net/mac80211/agg-tx.c
@@ -245,7 +245,7 @@ int ieee80211_start_tx_ba_session(struct ieee80211_sta *pubsta, u16 tid)
 		return -EINVAL;
 	}
 
-	if (test_sta_flags(sta, WLAN_STA_SUSPEND)) {
+	if (test_sta_flags(sta, WLAN_STA_BLOCK_BA)) {
 #ifdef CONFIG_MAC80211_HT_DEBUG
 		printk(KERN_DEBUG "Suspend in progress. "
 		       "Denying BA session request\n");
diff --git a/net/mac80211/pm.c b/net/mac80211/pm.c
index 0e64484e861c..75202b295a4e 100644
--- a/net/mac80211/pm.c
+++ b/net/mac80211/pm.c
@@ -46,7 +46,7 @@ int __ieee80211_suspend(struct ieee80211_hw *hw)
 
 	if (hw->flags & IEEE80211_HW_AMPDU_AGGREGATION) {
 		list_for_each_entry_rcu(sta, &local->sta_list, list) {
-			set_sta_flags(sta, WLAN_STA_SUSPEND);
+			set_sta_flags(sta, WLAN_STA_BLOCK_BA);
 			ieee80211_sta_tear_down_BA_sessions(sta);
 		}
 	}
diff --git a/net/mac80211/sta_info.h b/net/mac80211/sta_info.h
index 2b635909de5c..57e81758d6f7 100644
--- a/net/mac80211/sta_info.h
+++ b/net/mac80211/sta_info.h
@@ -35,8 +35,8 @@
  *	IEEE80211_TX_CTL_CLEAR_PS_FILT control flag) when the next
  *	frame to this station is transmitted.
  * @WLAN_STA_MFP: Management frame protection is used with this STA.
- * @WLAN_STA_SUSPEND: Set/cleared during a suspend/resume cycle.
- *	Used to deny ADDBA requests (both TX and RX).
+ * @WLAN_STA_BLOCK_BA: Used to deny ADDBA requests (both TX and RX)
+ *	during suspend/resume.
  * @WLAN_STA_PS_DRIVER: driver requires keeping this station in
  *	power-save mode logically to flush frames that might still
  *	be in the queues
@@ -57,7 +57,7 @@ enum ieee80211_sta_info_flags {
 	WLAN_STA_WDS		= 1<<7,
 	WLAN_STA_CLEAR_PS_FILT	= 1<<9,
 	WLAN_STA_MFP		= 1<<10,
-	WLAN_STA_SUSPEND	= 1<<11,
+	WLAN_STA_BLOCK_BA	= 1<<11,
 	WLAN_STA_PS_DRIVER	= 1<<12,
 	WLAN_STA_PSPOLL		= 1<<13,
 	WLAN_STA_DISASSOC       = 1<<14,
diff --git a/net/mac80211/util.c b/net/mac80211/util.c
index 7b2c170af71c..7614821caed5 100644
--- a/net/mac80211/util.c
+++ b/net/mac80211/util.c
@@ -1140,7 +1140,7 @@ int ieee80211_reconfig(struct ieee80211_local *local)
 
 	if (hw->flags & IEEE80211_HW_AMPDU_AGGREGATION) {
 		list_for_each_entry_rcu(sta, &local->sta_list, list) {
-			clear_sta_flags(sta, WLAN_STA_SUSPEND);
+			clear_sta_flags(sta, WLAN_STA_BLOCK_BA);
 		}
 	}
 
-- 
cgit v1.2.2


From 098a607091426e79178b9a6c318d993fea131791 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes@sipsolutions.net>
Date: Tue, 6 Apr 2010 11:18:47 +0200
Subject: mac80211: clean up/fix aggregation code

The aggregation code has a number of quirks, like
inventing an unneeded WLAN_BACK_TIMER value and
leaking memory under certain circumstances during
station destruction. Fix these issues by using
the regular aggregation session teardown code and
blocking new aggregation sessions, all before the
station is really destructed.

As a side effect, this gets rid of the long code
block to destroy aggregation safely.

Additionally, rename tid_state_rx which can only
have the values IDLE and OPERATIONAL to
tid_active_rx to make it easier to understand
that there is no bitwise stuff going on on the
RX side -- the TX side remains because it needs
to keep track of the driver and peer states.

Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/agg-rx.c      | 48 +++++++++++++++++---------------------
 net/mac80211/debugfs_sta.c | 10 ++++----
 net/mac80211/rx.c          |  5 ++--
 net/mac80211/sta_info.c    | 58 ++++++++--------------------------------------
 net/mac80211/sta_info.h    |  6 ++---
 5 files changed, 40 insertions(+), 87 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/agg-rx.c b/net/mac80211/agg-rx.c
index 7d87f446f030..53233ab50f65 100644
--- a/net/mac80211/agg-rx.c
+++ b/net/mac80211/agg-rx.c
@@ -22,19 +22,20 @@ void __ieee80211_stop_rx_ba_session(struct sta_info *sta, u16 tid,
 				    u16 initiator, u16 reason)
 {
 	struct ieee80211_local *local = sta->local;
+	struct tid_ampdu_rx *tid_rx;
 	int i;
 
-	/* check if TID is in operational state */
 	spin_lock_bh(&sta->lock);
-	if (sta->ampdu_mlme.tid_state_rx[tid] != HT_AGG_STATE_OPERATIONAL) {
+
+	/* check if TID is in operational state */
+	if (!sta->ampdu_mlme.tid_active_rx[tid]) {
 		spin_unlock_bh(&sta->lock);
 		return;
 	}
 
-	sta->ampdu_mlme.tid_state_rx[tid] =
-		HT_AGG_STATE_REQ_STOP_BA_MSK |
-		(initiator << HT_AGG_STATE_INITIATOR_SHIFT);
-	spin_unlock_bh(&sta->lock);
+	sta->ampdu_mlme.tid_active_rx[tid] = false;
+
+	tid_rx = sta->ampdu_mlme.tid_rx[tid];
 
 #ifdef CONFIG_MAC80211_HT_DEBUG
 	printk(KERN_DEBUG "Rx BA session stop requested for %pM tid %u\n",
@@ -46,37 +47,30 @@ void __ieee80211_stop_rx_ba_session(struct sta_info *sta, u16 tid,
 		printk(KERN_DEBUG "HW problem - can not stop rx "
 				"aggregation for tid %d\n", tid);
 
-	/* shutdown timer has not expired */
-	if (initiator != WLAN_BACK_TIMER)
-		del_timer_sync(&sta->ampdu_mlme.tid_rx[tid]->session_timer);
-
 	/* check if this is a self generated aggregation halt */
-	if (initiator == WLAN_BACK_RECIPIENT || initiator == WLAN_BACK_TIMER)
+	if (initiator == WLAN_BACK_RECIPIENT)
 		ieee80211_send_delba(sta->sdata, sta->sta.addr,
 				     tid, 0, reason);
 
 	/* free the reordering buffer */
-	for (i = 0; i < sta->ampdu_mlme.tid_rx[tid]->buf_size; i++) {
-		if (sta->ampdu_mlme.tid_rx[tid]->reorder_buf[i]) {
+	for (i = 0; i < tid_rx->buf_size; i++) {
+		if (tid_rx->reorder_buf[i]) {
 			/* release the reordered frames */
-			dev_kfree_skb(sta->ampdu_mlme.tid_rx[tid]->reorder_buf[i]);
-			sta->ampdu_mlme.tid_rx[tid]->stored_mpdu_num--;
-			sta->ampdu_mlme.tid_rx[tid]->reorder_buf[i] = NULL;
+			dev_kfree_skb(tid_rx->reorder_buf[i]);
+			tid_rx->stored_mpdu_num--;
+			tid_rx->reorder_buf[i] = NULL;
 		}
 	}
 
-	spin_lock_bh(&sta->lock);
 	/* free resources */
-	kfree(sta->ampdu_mlme.tid_rx[tid]->reorder_buf);
-	kfree(sta->ampdu_mlme.tid_rx[tid]->reorder_time);
-
-	if (!sta->ampdu_mlme.tid_rx[tid]->shutdown) {
-		kfree(sta->ampdu_mlme.tid_rx[tid]);
-		sta->ampdu_mlme.tid_rx[tid] = NULL;
-	}
+	kfree(tid_rx->reorder_buf);
+	kfree(tid_rx->reorder_time);
+	sta->ampdu_mlme.tid_rx[tid] = NULL;
 
-	sta->ampdu_mlme.tid_state_rx[tid] = HT_AGG_STATE_IDLE;
 	spin_unlock_bh(&sta->lock);
+
+	del_timer_sync(&tid_rx->session_timer);
+	kfree(tid_rx);
 }
 
 /*
@@ -211,7 +205,7 @@ void ieee80211_process_addba_request(struct ieee80211_local *local,
 	/* examine state machine */
 	spin_lock_bh(&sta->lock);
 
-	if (sta->ampdu_mlme.tid_state_rx[tid] != HT_AGG_STATE_IDLE) {
+	if (sta->ampdu_mlme.tid_active_rx[tid]) {
 #ifdef CONFIG_MAC80211_HT_DEBUG
 		if (net_ratelimit())
 			printk(KERN_DEBUG "unexpected AddBA Req from "
@@ -273,7 +267,7 @@ void ieee80211_process_addba_request(struct ieee80211_local *local,
 	}
 
 	/* change state and send addba resp */
-	sta->ampdu_mlme.tid_state_rx[tid] = HT_AGG_STATE_OPERATIONAL;
+	sta->ampdu_mlme.tid_active_rx[tid] = true;
 	tid_agg_rx->dialog_token = dialog_token;
 	tid_agg_rx->ssn = start_seq_num;
 	tid_agg_rx->head_seq_num = start_seq_num;
diff --git a/net/mac80211/debugfs_sta.c b/net/mac80211/debugfs_sta.c
index 23e720034577..740ff6c5b92c 100644
--- a/net/mac80211/debugfs_sta.c
+++ b/net/mac80211/debugfs_sta.c
@@ -119,7 +119,7 @@ STA_OPS(last_seq_ctrl);
 static ssize_t sta_agg_status_read(struct file *file, char __user *userbuf,
 					size_t count, loff_t *ppos)
 {
-	char buf[64 + STA_TID_NUM * 40], *p = buf;
+	char buf[71 + STA_TID_NUM * 40], *p = buf;
 	int i;
 	struct sta_info *sta = file->private_data;
 
@@ -127,16 +127,16 @@ static ssize_t sta_agg_status_read(struct file *file, char __user *userbuf,
 	p += scnprintf(p, sizeof(buf) + buf - p, "next dialog_token: %#02x\n",
 			sta->ampdu_mlme.dialog_token_allocator + 1);
 	p += scnprintf(p, sizeof(buf) + buf - p,
-		       "TID\t\tRX\tDTKN\tSSN\t\tTX\tDTKN\tSSN\tpending\n");
+		       "TID\t\tRX active\tDTKN\tSSN\t\tTX\tDTKN\tSSN\tpending\n");
 	for (i = 0; i < STA_TID_NUM; i++) {
 		p += scnprintf(p, sizeof(buf) + buf - p, "%02d", i);
 		p += scnprintf(p, sizeof(buf) + buf - p, "\t\t%x",
-				sta->ampdu_mlme.tid_state_rx[i]);
+				sta->ampdu_mlme.tid_active_rx[i]);
 		p += scnprintf(p, sizeof(buf) + buf - p, "\t%#.2x",
-				sta->ampdu_mlme.tid_state_rx[i] ?
+				sta->ampdu_mlme.tid_active_rx[i] ?
 				sta->ampdu_mlme.tid_rx[i]->dialog_token : 0);
 		p += scnprintf(p, sizeof(buf) + buf - p, "\t%#.3x",
-				sta->ampdu_mlme.tid_state_rx[i] ?
+				sta->ampdu_mlme.tid_active_rx[i] ?
 				sta->ampdu_mlme.tid_rx[i]->ssn : 0);
 
 		p += scnprintf(p, sizeof(buf) + buf - p, "\t\t%x",
diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c
index c02e43b50ac3..62053fa711f3 100644
--- a/net/mac80211/rx.c
+++ b/net/mac80211/rx.c
@@ -720,7 +720,7 @@ static void ieee80211_rx_reorder_ampdu(struct ieee80211_rx_data *rx,
 
 	tid = *ieee80211_get_qos_ctl(hdr) & IEEE80211_QOS_CTL_TID_MASK;
 
-	if (sta->ampdu_mlme.tid_state_rx[tid] != HT_AGG_STATE_OPERATIONAL)
+	if (!sta->ampdu_mlme.tid_active_rx[tid])
 		goto dont_reorder;
 
 	tid_agg_rx = sta->ampdu_mlme.tid_rx[tid];
@@ -1805,8 +1805,7 @@ ieee80211_rx_h_ctrl(struct ieee80211_rx_data *rx, struct sk_buff_head *frames)
 		if (!rx->sta)
 			return RX_DROP_MONITOR;
 		tid = le16_to_cpu(bar->control) >> 12;
-		if (rx->sta->ampdu_mlme.tid_state_rx[tid]
-					!= HT_AGG_STATE_OPERATIONAL)
+		if (!rx->sta->ampdu_mlme.tid_active_rx[tid])
 			return RX_DROP_MONITOR;
 		tid_agg_rx = rx->sta->ampdu_mlme.tid_rx[tid];
 
diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c
index bd11753c1525..5bf044b92dca 100644
--- a/net/mac80211/sta_info.c
+++ b/net/mac80211/sta_info.c
@@ -238,9 +238,6 @@ struct sta_info *sta_info_alloc(struct ieee80211_sub_if_data *sdata,
 		 * enable session_timer's data differentiation. refer to
 		 * sta_rx_agg_session_timer_expired for useage */
 		sta->timer_to_tid[i] = i;
-		/* rx */
-		sta->ampdu_mlme.tid_state_rx[i] = HT_AGG_STATE_IDLE;
-		sta->ampdu_mlme.tid_rx[i] = NULL;
 		/* tx */
 		sta->ampdu_mlme.tid_state_tx[i] = HT_AGG_STATE_IDLE;
 		sta->ampdu_mlme.tid_tx[i] = NULL;
@@ -606,7 +603,7 @@ static int __must_check __sta_info_destroy(struct sta_info *sta)
 	struct ieee80211_sub_if_data *sdata;
 	struct sk_buff *skb;
 	unsigned long flags;
-	int ret, i;
+	int ret;
 
 	might_sleep();
 
@@ -616,6 +613,15 @@ static int __must_check __sta_info_destroy(struct sta_info *sta)
 	local = sta->local;
 	sdata = sta->sdata;
 
+	/*
+	 * Before removing the station from the driver and
+	 * rate control, it might still start new aggregation
+	 * sessions -- block that to make sure the tear-down
+	 * will be sufficient.
+	 */
+	set_sta_flags(sta, WLAN_STA_BLOCK_BA);
+	ieee80211_sta_tear_down_BA_sessions(sta);
+
 	spin_lock_irqsave(&local->sta_lock, flags);
 	ret = sta_info_hash_del(local, sta);
 	/* this might still be the pending list ... which is fine */
@@ -700,50 +706,6 @@ static int __must_check __sta_info_destroy(struct sta_info *sta)
 	while ((skb = skb_dequeue(&sta->tx_filtered)) != NULL)
 		dev_kfree_skb_any(skb);
 
-	for (i = 0; i <  STA_TID_NUM; i++) {
-		struct tid_ampdu_rx *tid_rx;
-		struct tid_ampdu_tx *tid_tx;
-
-		spin_lock_bh(&sta->lock);
-		tid_rx = sta->ampdu_mlme.tid_rx[i];
-		/* Make sure timer won't free the tid_rx struct, see below */
-		if (tid_rx)
-			tid_rx->shutdown = true;
-
-		spin_unlock_bh(&sta->lock);
-
-		/*
-		 * Outside spinlock - shutdown is true now so that the timer
-		 * won't free tid_rx, we have to do that now. Can't let the
-		 * timer do it because we have to sync the timer outside the
-		 * lock that it takes itself.
-		 */
-		if (tid_rx) {
-			del_timer_sync(&tid_rx->session_timer);
-			kfree(tid_rx);
-		}
-
-		/*
-		 * No need to do such complications for TX agg sessions, the
-		 * path leading to freeing the tid_tx struct goes via a call
-		 * from the driver, and thus needs to look up the sta struct
-		 * again, which cannot be found when we get here. Hence, we
-		 * just need to delete the timer and free the aggregation
-		 * info; we won't be telling the peer about it then but that
-		 * doesn't matter if we're not talking to it again anyway.
-		 */
-		tid_tx = sta->ampdu_mlme.tid_tx[i];
-		if (tid_tx) {
-			del_timer_sync(&tid_tx->addba_resp_timer);
-			/*
-			 * STA removed while aggregation session being
-			 * started? Bit odd, but purge frames anyway.
-			 */
-			skb_queue_purge(&tid_tx->pending);
-			kfree(tid_tx);
-		}
-	}
-
 	__sta_info_free(local, sta);
 
 	return 0;
diff --git a/net/mac80211/sta_info.h b/net/mac80211/sta_info.h
index 57e81758d6f7..48a5e80957f0 100644
--- a/net/mac80211/sta_info.h
+++ b/net/mac80211/sta_info.h
@@ -36,7 +36,7 @@
  *	frame to this station is transmitted.
  * @WLAN_STA_MFP: Management frame protection is used with this STA.
  * @WLAN_STA_BLOCK_BA: Used to deny ADDBA requests (both TX and RX)
- *	during suspend/resume.
+ *	during suspend/resume and station removal.
  * @WLAN_STA_PS_DRIVER: driver requires keeping this station in
  *	power-save mode logically to flush frames that might still
  *	be in the queues
@@ -106,7 +106,6 @@ struct tid_ampdu_tx {
  * @buf_size: buffer size for incoming A-MPDUs
  * @timeout: reset timer value (in TUs).
  * @dialog_token: dialog token for aggregation session
- * @shutdown: this session is being shut down due to STA removal
  */
 struct tid_ampdu_rx {
 	struct sk_buff **reorder_buf;
@@ -118,7 +117,6 @@ struct tid_ampdu_rx {
 	u16 buf_size;
 	u16 timeout;
 	u8 dialog_token;
-	bool shutdown;
 };
 
 /**
@@ -156,7 +154,7 @@ enum plink_state {
  */
 struct sta_ampdu_mlme {
 	/* rx */
-	u8 tid_state_rx[STA_TID_NUM];
+	bool tid_active_rx[STA_TID_NUM];
 	struct tid_ampdu_rx *tid_rx[STA_TID_NUM];
 	/* tx */
 	u8 tid_state_tx[STA_TID_NUM];
-- 
cgit v1.2.2


From 54297e4d60b74e602138594c131097347d128b5a Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes@sipsolutions.net>
Date: Tue, 6 Apr 2010 11:18:48 +0200
Subject: mac80211: fix some RX aggregation locking

A few places in mac80211 do not currently acquire
the sta lock for RX aggregation, but they should.

Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/rx.c | 19 +++++++++++++++----
 1 file changed, 15 insertions(+), 4 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c
index 62053fa711f3..f42d5060a7bb 100644
--- a/net/mac80211/rx.c
+++ b/net/mac80211/rx.c
@@ -720,14 +720,16 @@ static void ieee80211_rx_reorder_ampdu(struct ieee80211_rx_data *rx,
 
 	tid = *ieee80211_get_qos_ctl(hdr) & IEEE80211_QOS_CTL_TID_MASK;
 
+	spin_lock(&sta->lock);
+
 	if (!sta->ampdu_mlme.tid_active_rx[tid])
-		goto dont_reorder;
+		goto dont_reorder_unlock;
 
 	tid_agg_rx = sta->ampdu_mlme.tid_rx[tid];
 
 	/* qos null data frames are excluded */
 	if (unlikely(hdr->frame_control & cpu_to_le16(IEEE80211_STYPE_NULLFUNC)))
-		goto dont_reorder;
+		goto dont_reorder_unlock;
 
 	/* new, potentially un-ordered, ampdu frame - process it */
 
@@ -739,15 +741,20 @@ static void ieee80211_rx_reorder_ampdu(struct ieee80211_rx_data *rx,
 	/* if this mpdu is fragmented - terminate rx aggregation session */
 	sc = le16_to_cpu(hdr->seq_ctrl);
 	if (sc & IEEE80211_SCTL_FRAG) {
+		spin_unlock(&sta->lock);
 		__ieee80211_stop_rx_ba_session(sta, tid, WLAN_BACK_RECIPIENT,
 					       WLAN_REASON_QSTA_REQUIRE_SETUP);
 		dev_kfree_skb(skb);
 		return;
 	}
 
-	if (ieee80211_sta_manage_reorder_buf(hw, tid_agg_rx, skb, frames))
+	if (ieee80211_sta_manage_reorder_buf(hw, tid_agg_rx, skb, frames)) {
+		spin_unlock(&sta->lock);
 		return;
+	}
 
+ dont_reorder_unlock:
+	spin_unlock(&sta->lock);
  dont_reorder:
 	__skb_queue_tail(frames, skb);
 }
@@ -1804,9 +1811,12 @@ ieee80211_rx_h_ctrl(struct ieee80211_rx_data *rx, struct sk_buff_head *frames)
 	if (ieee80211_is_back_req(bar->frame_control)) {
 		if (!rx->sta)
 			return RX_DROP_MONITOR;
+		spin_lock(&rx->sta->lock);
 		tid = le16_to_cpu(bar->control) >> 12;
-		if (!rx->sta->ampdu_mlme.tid_active_rx[tid])
+		if (!rx->sta->ampdu_mlme.tid_active_rx[tid]) {
+			spin_unlock(&rx->sta->lock);
 			return RX_DROP_MONITOR;
+		}
 		tid_agg_rx = rx->sta->ampdu_mlme.tid_rx[tid];
 
 		start_seq_num = le16_to_cpu(bar->start_seq_num) >> 4;
@@ -1820,6 +1830,7 @@ ieee80211_rx_h_ctrl(struct ieee80211_rx_data *rx, struct sk_buff_head *frames)
 		ieee80211_release_reorder_frames(hw, tid_agg_rx, start_seq_num,
 						 frames);
 		kfree_skb(skb);
+		spin_unlock(&rx->sta->lock);
 		return RX_QUEUED;
 	}
 
-- 
cgit v1.2.2


From 8c11e4ab09ffb975a89802dde0e9aa52a53b8aa5 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Wed, 7 Apr 2010 11:26:56 +0200
Subject: mac80211: fix paged RX crypto

WEP crypto was broken, but upon finding the problem
it is evident that other things were broken by the
paged RX patch as well.

To fix it, for now move the linearising in front.
This means that we linearise all frames, which is
not at all what we want, but at least it fixes the
problem for now.

Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Acked-by: Zhu Yi <yi.zhu@intel.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/rx.c | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c
index f42d5060a7bb..a33f865807f9 100644
--- a/net/mac80211/rx.c
+++ b/net/mac80211/rx.c
@@ -820,7 +820,7 @@ ieee80211_rx_h_decrypt(struct ieee80211_rx_data *rx)
 {
 	struct sk_buff *skb = rx->skb;
 	struct ieee80211_rx_status *status = IEEE80211_SKB_RXCB(skb);
-	struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)skb->data;
+	struct ieee80211_hdr *hdr;
 	int keyidx;
 	int hdrlen;
 	ieee80211_rx_result result = RX_DROP_UNUSABLE;
@@ -861,6 +861,11 @@ ieee80211_rx_h_decrypt(struct ieee80211_rx_data *rx)
 	if (!(rx->flags & IEEE80211_RX_RA_MATCH))
 		return RX_CONTINUE;
 
+	if (skb_linearize(rx->skb))
+		return RX_DROP_UNUSABLE;
+
+	hdr = (struct ieee80211_hdr *)skb->data;
+
 	/* start without a key */
 	rx->key = NULL;
 
@@ -944,9 +949,6 @@ ieee80211_rx_h_decrypt(struct ieee80211_rx_data *rx)
 		return RX_DROP_MONITOR;
 	}
 
-	if (skb_linearize(rx->skb))
-		return RX_DROP_UNUSABLE;
-
 	/* Check for weak IVs if possible */
 	if (rx->sta && rx->key->conf.alg == ALG_WEP &&
 	    ieee80211_is_data(hdr->frame_control) &&
-- 
cgit v1.2.2


From 298b9e44be9592e94c0e69a5d3893cd11f5484fa Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Wed, 7 Apr 2010 16:46:36 -0700
Subject: net: include linux/proc_fs.h in dev_addr_lists.c

As pointed by Randy Dunlap, we must include linux/proc_fs.h in
net/core/dev_addr_lists.c, regardless of CONFIG_PROC_FS

Reported-by: Randy Dunlap <randy.dunlap@oracle.com>,
Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Acked-by: Randy Dunlap <randy.dunlap@oracle.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/dev_addr_lists.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/core/dev_addr_lists.c b/net/core/dev_addr_lists.c
index 37d5975e18a3..508f9c18992f 100644
--- a/net/core/dev_addr_lists.c
+++ b/net/core/dev_addr_lists.c
@@ -14,6 +14,7 @@
 #include <linux/netdevice.h>
 #include <linux/rtnetlink.h>
 #include <linux/list.h>
+#include <linux/proc_fs.h>
 
 /*
  * General list handling functions
@@ -667,7 +668,6 @@ void dev_mc_init(struct net_device *dev)
 EXPORT_SYMBOL(dev_mc_init);
 
 #ifdef CONFIG_PROC_FS
-#include <linux/proc_fs.h>
 #include <linux/seq_file.h>
 
 static int dev_mc_seq_show(struct seq_file *seq, void *v)
-- 
cgit v1.2.2


From fd218cf9557b9bf7061365a8fe7020a56d3f767c Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Wed, 7 Apr 2010 21:20:47 -0700
Subject: bridge: Fix IGMP3 report parsing

The IGMP3 report parsing is looking at the wrong address for
group records.  This patch fixes it.

Reported-by: Banyeer <banyeer@yahoo.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/bridge/br_multicast.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c
index 6980625537ca..f29ada827a6a 100644
--- a/net/bridge/br_multicast.c
+++ b/net/bridge/br_multicast.c
@@ -723,7 +723,7 @@ static int br_multicast_igmp3_report(struct net_bridge *br,
 		if (!pskb_may_pull(skb, len))
 			return -EINVAL;
 
-		grec = (void *)(skb->data + len);
+		grec = (void *)(skb->data + len - sizeof(*grec));
 		group = grec->grec_mca;
 		type = grec->grec_type;
 
-- 
cgit v1.2.2


From f5eb917b861828da18dc28854308068c66d1449a Mon Sep 17 00:00:00 2001
From: John Hughes <john@calva.com>
Date: Wed, 7 Apr 2010 21:29:25 -0700
Subject: x25: Patch to fix bug 15678 - x25 accesses fields beyond end of
 packet.

Here is a patch to stop X.25 examining fields beyond the end of the packet.

For example, when a simple CALL ACCEPTED was received:

	10 10 0f

x25_parse_facilities was attempting to decode the FACILITIES field, but this
packet contains no facilities field.

Signed-off-by: John Hughes <john@calva.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/x25/af_x25.c         | 47 ++++++++++++++++++++++++++++++++++++++++++++++-
 net/x25/x25_facilities.c | 12 +++++++++++-
 net/x25/x25_in.c         | 15 +++++++++++----
 3 files changed, 68 insertions(+), 6 deletions(-)

(limited to 'net')

diff --git a/net/x25/af_x25.c b/net/x25/af_x25.c
index 9796f3ed1edb..fe26c01ef3e6 100644
--- a/net/x25/af_x25.c
+++ b/net/x25/af_x25.c
@@ -82,6 +82,41 @@ struct compat_x25_subscrip_struct {
 };
 #endif
 
+
+int x25_parse_address_block(struct sk_buff *skb,
+		struct x25_address *called_addr,
+		struct x25_address *calling_addr)
+{
+	unsigned char len;
+	int needed;
+	int rc;
+
+	if (skb->len < 1) {
+		/* packet has no address block */
+		rc = 0;
+		goto empty;
+	}
+
+	len = *skb->data;
+	needed = 1 + (len >> 4) + (len & 0x0f);
+
+	if (skb->len < needed) {
+		/* packet is too short to hold the addresses it claims
+		   to hold */
+		rc = -1;
+		goto empty;
+	}
+
+	return x25_addr_ntoa(skb->data, called_addr, calling_addr);
+
+empty:
+	*called_addr->x25_addr = 0;
+	*calling_addr->x25_addr = 0;
+
+	return rc;
+}
+
+
 int x25_addr_ntoa(unsigned char *p, struct x25_address *called_addr,
 		  struct x25_address *calling_addr)
 {
@@ -921,16 +956,26 @@ int x25_rx_call_request(struct sk_buff *skb, struct x25_neigh *nb,
 	/*
 	 *	Extract the X.25 addresses and convert them to ASCII strings,
 	 *	and remove them.
+	 *
+	 *	Address block is mandatory in call request packets
 	 */
-	addr_len = x25_addr_ntoa(skb->data, &source_addr, &dest_addr);
+	addr_len = x25_parse_address_block(skb, &source_addr, &dest_addr);
+	if (addr_len <= 0)
+		goto out_clear_request;
 	skb_pull(skb, addr_len);
 
 	/*
 	 *	Get the length of the facilities, skip past them for the moment
 	 *	get the call user data because this is needed to determine
 	 *	the correct listener
+	 *
+	 *	Facilities length is mandatory in call request packets
 	 */
+	if (skb->len < 1)
+		goto out_clear_request;
 	len = skb->data[0] + 1;
+	if (skb->len < len)
+		goto out_clear_request;
 	skb_pull(skb,len);
 
 	/*
diff --git a/net/x25/x25_facilities.c b/net/x25/x25_facilities.c
index a21f6646eb3a..a2765c6b1f1a 100644
--- a/net/x25/x25_facilities.c
+++ b/net/x25/x25_facilities.c
@@ -35,7 +35,7 @@ int x25_parse_facilities(struct sk_buff *skb, struct x25_facilities *facilities,
 		struct x25_dte_facilities *dte_facs, unsigned long *vc_fac_mask)
 {
 	unsigned char *p = skb->data;
-	unsigned int len = *p++;
+	unsigned int len;
 
 	*vc_fac_mask = 0;
 
@@ -50,6 +50,14 @@ int x25_parse_facilities(struct sk_buff *skb, struct x25_facilities *facilities,
 	memset(dte_facs->called_ae, '\0', sizeof(dte_facs->called_ae));
 	memset(dte_facs->calling_ae, '\0', sizeof(dte_facs->calling_ae));
 
+	if (skb->len < 1)
+		return 0;
+
+	len = *p++;
+
+	if (len >= skb->len)
+		return -1;
+
 	while (len > 0) {
 		switch (*p & X25_FAC_CLASS_MASK) {
 		case X25_FAC_CLASS_A:
@@ -247,6 +255,8 @@ int x25_negotiate_facilities(struct sk_buff *skb, struct sock *sk,
 	memcpy(new, ours, sizeof(*new));
 
 	len = x25_parse_facilities(skb, &theirs, dte, &x25->vc_facil_mask);
+	if (len < 0)
+		return len;
 
 	/*
 	 *	They want reverse charging, we won't accept it.
diff --git a/net/x25/x25_in.c b/net/x25/x25_in.c
index 96d922783547..b39072f3a297 100644
--- a/net/x25/x25_in.c
+++ b/net/x25/x25_in.c
@@ -89,6 +89,7 @@ static int x25_queue_rx_frame(struct sock *sk, struct sk_buff *skb, int more)
 static int x25_state1_machine(struct sock *sk, struct sk_buff *skb, int frametype)
 {
 	struct x25_address source_addr, dest_addr;
+	int len;
 
 	switch (frametype) {
 		case X25_CALL_ACCEPTED: {
@@ -106,11 +107,17 @@ static int x25_state1_machine(struct sock *sk, struct sk_buff *skb, int frametyp
 			 *	Parse the data in the frame.
 			 */
 			skb_pull(skb, X25_STD_MIN_LEN);
-			skb_pull(skb, x25_addr_ntoa(skb->data, &source_addr, &dest_addr));
-			skb_pull(skb,
-				 x25_parse_facilities(skb, &x25->facilities,
+
+			len = x25_parse_address_block(skb, &source_addr,
+						&dest_addr);
+			if (len > 0)
+				skb_pull(skb, len);
+
+			len = x25_parse_facilities(skb, &x25->facilities,
 						&x25->dte_facilities,
-						&x25->vc_facil_mask));
+						&x25->vc_facil_mask);
+			if (len > 0)
+				skb_pull(skb, len);
 			/*
 			 *	Copy any Call User Data.
 			 */
-- 
cgit v1.2.2


From ddd0451fc8dbf94446c81500ff0dcee06c4057cb Mon Sep 17 00:00:00 2001
From: John Hughes <john@Calva.COM>
Date: Sun, 4 Apr 2010 06:48:10 +0000
Subject: x.25 attempts to negotiate invalid throughput

The current X.25 code has some bugs in throughput negotiation:

   1. It does negotiation in all cases, usually there is no need
   2. It incorrectly attempts to negotiate the throughput class in one
      direction only.  There are separate throughput classes for input
      and output and if either is negotiated both mist be negotiates.

This is bug https://bugzilla.kernel.org/show_bug.cgi?id=15681

This bug was first reported by Daniel Ferenci to the linux-x25 mailing
list on 6/8/2004, but is still present.

The current (2.6.34) x.25 code doesn't seem to know that the X.25
throughput facility includes two values, one for the required
throughput outbound, one for inbound.

This causes it to attempt to negotiate throughput 0x0A, which is
throughput 9600 inbound and the illegal value "0" for inbound
throughput.

Because of this some X.25 devices (e.g. Cisco 1600) refuse to connect
to Linux X.25.

The following patch fixes this behaviour.  Unless the user specifies a
required throughput it does not attempt to negotiate.  If the user
does not specify a throughput it accepts the suggestion of the remote
X.25 system.  If the user requests a throughput then it validates both
the input and output throughputs and correctly negotiates them with
the remote end.

Signed-off-by: John Hughes <john@calva.com>
Tested-by: Andrew Hendry <andrew.hendry@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/x25/af_x25.c         | 20 ++++++++++++++++----
 net/x25/x25_facilities.c | 15 ++++++++++++---
 2 files changed, 28 insertions(+), 7 deletions(-)

(limited to 'net')

diff --git a/net/x25/af_x25.c b/net/x25/af_x25.c
index fe26c01ef3e6..8ed51c926c50 100644
--- a/net/x25/af_x25.c
+++ b/net/x25/af_x25.c
@@ -588,7 +588,8 @@ static int x25_create(struct net *net, struct socket *sock, int protocol,
 	x25->facilities.winsize_out = X25_DEFAULT_WINDOW_SIZE;
 	x25->facilities.pacsize_in  = X25_DEFAULT_PACKET_SIZE;
 	x25->facilities.pacsize_out = X25_DEFAULT_PACKET_SIZE;
-	x25->facilities.throughput  = X25_DEFAULT_THROUGHPUT;
+	x25->facilities.throughput  = 0;	/* by default don't negotiate
+						   throughput */
 	x25->facilities.reverse     = X25_DEFAULT_REVERSE;
 	x25->dte_facilities.calling_len = 0;
 	x25->dte_facilities.called_len = 0;
@@ -1459,9 +1460,20 @@ static int x25_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
 			if (facilities.winsize_in < 1 ||
 			    facilities.winsize_in > 127)
 				break;
-			if (facilities.throughput < 0x03 ||
-			    facilities.throughput > 0xDD)
-				break;
+			if (facilities.throughput) {
+				int out = facilities.throughput & 0xf0;
+				int in  = facilities.throughput & 0x0f;
+				if (!out)
+					facilities.throughput |=
+						X25_DEFAULT_THROUGHPUT << 4;
+				else if (out < 0x30 || out > 0xD0)
+					break;
+				if (!in)
+					facilities.throughput |=
+						X25_DEFAULT_THROUGHPUT;
+				else if (in < 0x03 || in > 0x0D)
+					break;
+			}
 			if (facilities.reverse &&
 				(facilities.reverse & 0x81) != 0x81)
 				break;
diff --git a/net/x25/x25_facilities.c b/net/x25/x25_facilities.c
index a2765c6b1f1a..771bab00754b 100644
--- a/net/x25/x25_facilities.c
+++ b/net/x25/x25_facilities.c
@@ -269,9 +269,18 @@ int x25_negotiate_facilities(struct sk_buff *skb, struct sock *sk,
 	new->reverse = theirs.reverse;
 
 	if (theirs.throughput) {
-		if (theirs.throughput < ours->throughput) {
-			SOCK_DEBUG(sk, "X.25: throughput negotiated down\n");
-			new->throughput = theirs.throughput;
+		int theirs_in =  theirs.throughput & 0x0f;
+		int theirs_out = theirs.throughput & 0xf0;
+		int ours_in  = ours->throughput & 0x0f;
+		int ours_out = ours->throughput & 0xf0;
+		if (!ours_in || theirs_in < ours_in) {
+			SOCK_DEBUG(sk, "X.25: inbound throughput negotiated\n");
+			new->throughput = (new->throughput & 0xf0) | theirs_in;
+		}
+		if (!ours_out || theirs_out < ours_out) {
+			SOCK_DEBUG(sk,
+				"X.25: outbound throughput negotiated\n");
+			new->throughput = (new->throughput & 0x0f) | theirs_out;
 		}
 	}
 
-- 
cgit v1.2.2


From 97f8aefbbfb5aa5c9944e5fa8149f1fdaf71c7b6 Mon Sep 17 00:00:00 2001
From: chavey <chavey@google.com>
Date: Wed, 7 Apr 2010 21:54:42 -0700
Subject: net: fix ethtool coding style errors and warnings

Fix coding style errors and warnings output while running checkpatch.pl
on the files net/core/ethtool.c and include/linux/ethtool.h

Signed-off-by: chavey <chavey@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/ethtool.c | 141 ++++++++++++++++++++++++++++-------------------------
 1 file changed, 74 insertions(+), 67 deletions(-)

(limited to 'net')

diff --git a/net/core/ethtool.c b/net/core/ethtool.c
index 73c81edde8d9..99e9f850ea07 100644
--- a/net/core/ethtool.c
+++ b/net/core/ethtool.c
@@ -18,7 +18,7 @@
 #include <linux/ethtool.h>
 #include <linux/netdevice.h>
 #include <linux/bitops.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 
 /*
  * Some useful ethtool_ops methods that're device independent.
@@ -30,6 +30,7 @@ u32 ethtool_op_get_link(struct net_device *dev)
 {
 	return netif_carrier_ok(dev) ? 1 : 0;
 }
+EXPORT_SYMBOL(ethtool_op_get_link);
 
 u32 ethtool_op_get_rx_csum(struct net_device *dev)
 {
@@ -62,6 +63,7 @@ int ethtool_op_set_tx_hw_csum(struct net_device *dev, u32 data)
 
 	return 0;
 }
+EXPORT_SYMBOL(ethtool_op_set_tx_hw_csum);
 
 int ethtool_op_set_tx_ipv6_csum(struct net_device *dev, u32 data)
 {
@@ -72,11 +74,13 @@ int ethtool_op_set_tx_ipv6_csum(struct net_device *dev, u32 data)
 
 	return 0;
 }
+EXPORT_SYMBOL(ethtool_op_set_tx_ipv6_csum);
 
 u32 ethtool_op_get_sg(struct net_device *dev)
 {
 	return (dev->features & NETIF_F_SG) != 0;
 }
+EXPORT_SYMBOL(ethtool_op_get_sg);
 
 int ethtool_op_set_sg(struct net_device *dev, u32 data)
 {
@@ -87,11 +91,13 @@ int ethtool_op_set_sg(struct net_device *dev, u32 data)
 
 	return 0;
 }
+EXPORT_SYMBOL(ethtool_op_set_sg);
 
 u32 ethtool_op_get_tso(struct net_device *dev)
 {
 	return (dev->features & NETIF_F_TSO) != 0;
 }
+EXPORT_SYMBOL(ethtool_op_get_tso);
 
 int ethtool_op_set_tso(struct net_device *dev, u32 data)
 {
@@ -102,11 +108,13 @@ int ethtool_op_set_tso(struct net_device *dev, u32 data)
 
 	return 0;
 }
+EXPORT_SYMBOL(ethtool_op_set_tso);
 
 u32 ethtool_op_get_ufo(struct net_device *dev)
 {
 	return (dev->features & NETIF_F_UFO) != 0;
 }
+EXPORT_SYMBOL(ethtool_op_get_ufo);
 
 int ethtool_op_set_ufo(struct net_device *dev, u32 data)
 {
@@ -116,6 +124,7 @@ int ethtool_op_set_ufo(struct net_device *dev, u32 data)
 		dev->features &= ~NETIF_F_UFO;
 	return 0;
 }
+EXPORT_SYMBOL(ethtool_op_set_ufo);
 
 /* the following list of flags are the same as their associated
  * NETIF_F_xxx values in include/linux/netdevice.h
@@ -132,6 +141,7 @@ u32 ethtool_op_get_flags(struct net_device *dev)
 
 	return dev->features & flags_dup_features;
 }
+EXPORT_SYMBOL(ethtool_op_get_flags);
 
 int ethtool_op_set_flags(struct net_device *dev, u32 data)
 {
@@ -160,6 +170,7 @@ int ethtool_op_set_flags(struct net_device *dev, u32 data)
 	dev->features = features;
 	return 0;
 }
+EXPORT_SYMBOL(ethtool_op_set_flags);
 
 void ethtool_ntuple_flush(struct net_device *dev)
 {
@@ -205,7 +216,8 @@ static int ethtool_set_settings(struct net_device *dev, void __user *useraddr)
 	return dev->ethtool_ops->set_settings(dev, &cmd);
 }
 
-static noinline_for_stack int ethtool_get_drvinfo(struct net_device *dev, void __user *useraddr)
+static noinline_for_stack int ethtool_get_drvinfo(struct net_device *dev,
+						  void __user *useraddr)
 {
 	struct ethtool_drvinfo info;
 	const struct ethtool_ops *ops = dev->ethtool_ops;
@@ -245,7 +257,7 @@ static noinline_for_stack int ethtool_get_drvinfo(struct net_device *dev, void _
 }
 
 static noinline_for_stack int ethtool_get_sset_info(struct net_device *dev,
-                                          void __user *useraddr)
+						    void __user *useraddr)
 {
 	struct ethtool_sset_info info;
 	const struct ethtool_ops *ops = dev->ethtool_ops;
@@ -304,7 +316,8 @@ out:
 	return ret;
 }
 
-static noinline_for_stack int ethtool_set_rxnfc(struct net_device *dev, void __user *useraddr)
+static noinline_for_stack int ethtool_set_rxnfc(struct net_device *dev,
+						void __user *useraddr)
 {
 	struct ethtool_rxnfc cmd;
 
@@ -317,7 +330,8 @@ static noinline_for_stack int ethtool_set_rxnfc(struct net_device *dev, void __u
 	return dev->ethtool_ops->set_rxnfc(dev, &cmd);
 }
 
-static noinline_for_stack int ethtool_get_rxnfc(struct net_device *dev, void __user *useraddr)
+static noinline_for_stack int ethtool_get_rxnfc(struct net_device *dev,
+						void __user *useraddr)
 {
 	struct ethtool_rxnfc info;
 	const struct ethtool_ops *ops = dev->ethtool_ops;
@@ -362,8 +376,8 @@ err_out:
 }
 
 static void __rx_ntuple_filter_add(struct ethtool_rx_ntuple_list *list,
-                              struct ethtool_rx_ntuple_flow_spec *spec,
-                              struct ethtool_rx_ntuple_flow_spec_container *fsc)
+			struct ethtool_rx_ntuple_flow_spec *spec,
+			struct ethtool_rx_ntuple_flow_spec_container *fsc)
 {
 
 	/* don't add filters forever */
@@ -389,7 +403,8 @@ static void __rx_ntuple_filter_add(struct ethtool_rx_ntuple_list *list,
 	list->count++;
 }
 
-static noinline_for_stack int ethtool_set_rx_ntuple(struct net_device *dev, void __user *useraddr)
+static noinline_for_stack int ethtool_set_rx_ntuple(struct net_device *dev,
+						    void __user *useraddr)
 {
 	struct ethtool_rx_ntuple cmd;
 	const struct ethtool_ops *ops = dev->ethtool_ops;
@@ -514,125 +529,125 @@ static int ethtool_get_rx_ntuple(struct net_device *dev, void __user *useraddr)
 		case UDP_V4_FLOW:
 		case SCTP_V4_FLOW:
 			sprintf(p, "\tSrc IP addr: 0x%x\n",
-			        fsc->fs.h_u.tcp_ip4_spec.ip4src);
+				fsc->fs.h_u.tcp_ip4_spec.ip4src);
 			p += ETH_GSTRING_LEN;
 			num_strings++;
 			sprintf(p, "\tSrc IP mask: 0x%x\n",
-			        fsc->fs.m_u.tcp_ip4_spec.ip4src);
+				fsc->fs.m_u.tcp_ip4_spec.ip4src);
 			p += ETH_GSTRING_LEN;
 			num_strings++;
 			sprintf(p, "\tDest IP addr: 0x%x\n",
-			        fsc->fs.h_u.tcp_ip4_spec.ip4dst);
+				fsc->fs.h_u.tcp_ip4_spec.ip4dst);
 			p += ETH_GSTRING_LEN;
 			num_strings++;
 			sprintf(p, "\tDest IP mask: 0x%x\n",
-			        fsc->fs.m_u.tcp_ip4_spec.ip4dst);
+				fsc->fs.m_u.tcp_ip4_spec.ip4dst);
 			p += ETH_GSTRING_LEN;
 			num_strings++;
 			sprintf(p, "\tSrc Port: %d, mask: 0x%x\n",
-			        fsc->fs.h_u.tcp_ip4_spec.psrc,
-			        fsc->fs.m_u.tcp_ip4_spec.psrc);
+				fsc->fs.h_u.tcp_ip4_spec.psrc,
+				fsc->fs.m_u.tcp_ip4_spec.psrc);
 			p += ETH_GSTRING_LEN;
 			num_strings++;
 			sprintf(p, "\tDest Port: %d, mask: 0x%x\n",
-			        fsc->fs.h_u.tcp_ip4_spec.pdst,
-			        fsc->fs.m_u.tcp_ip4_spec.pdst);
+				fsc->fs.h_u.tcp_ip4_spec.pdst,
+				fsc->fs.m_u.tcp_ip4_spec.pdst);
 			p += ETH_GSTRING_LEN;
 			num_strings++;
 			sprintf(p, "\tTOS: %d, mask: 0x%x\n",
-			        fsc->fs.h_u.tcp_ip4_spec.tos,
-			        fsc->fs.m_u.tcp_ip4_spec.tos);
+				fsc->fs.h_u.tcp_ip4_spec.tos,
+				fsc->fs.m_u.tcp_ip4_spec.tos);
 			p += ETH_GSTRING_LEN;
 			num_strings++;
 			break;
 		case AH_ESP_V4_FLOW:
 		case ESP_V4_FLOW:
 			sprintf(p, "\tSrc IP addr: 0x%x\n",
-			        fsc->fs.h_u.ah_ip4_spec.ip4src);
+				fsc->fs.h_u.ah_ip4_spec.ip4src);
 			p += ETH_GSTRING_LEN;
 			num_strings++;
 			sprintf(p, "\tSrc IP mask: 0x%x\n",
-			        fsc->fs.m_u.ah_ip4_spec.ip4src);
+				fsc->fs.m_u.ah_ip4_spec.ip4src);
 			p += ETH_GSTRING_LEN;
 			num_strings++;
 			sprintf(p, "\tDest IP addr: 0x%x\n",
-			        fsc->fs.h_u.ah_ip4_spec.ip4dst);
+				fsc->fs.h_u.ah_ip4_spec.ip4dst);
 			p += ETH_GSTRING_LEN;
 			num_strings++;
 			sprintf(p, "\tDest IP mask: 0x%x\n",
-			        fsc->fs.m_u.ah_ip4_spec.ip4dst);
+				fsc->fs.m_u.ah_ip4_spec.ip4dst);
 			p += ETH_GSTRING_LEN;
 			num_strings++;
 			sprintf(p, "\tSPI: %d, mask: 0x%x\n",
-			        fsc->fs.h_u.ah_ip4_spec.spi,
-			        fsc->fs.m_u.ah_ip4_spec.spi);
+				fsc->fs.h_u.ah_ip4_spec.spi,
+				fsc->fs.m_u.ah_ip4_spec.spi);
 			p += ETH_GSTRING_LEN;
 			num_strings++;
 			sprintf(p, "\tTOS: %d, mask: 0x%x\n",
-			        fsc->fs.h_u.ah_ip4_spec.tos,
-			        fsc->fs.m_u.ah_ip4_spec.tos);
+				fsc->fs.h_u.ah_ip4_spec.tos,
+				fsc->fs.m_u.ah_ip4_spec.tos);
 			p += ETH_GSTRING_LEN;
 			num_strings++;
 			break;
 		case IP_USER_FLOW:
 			sprintf(p, "\tSrc IP addr: 0x%x\n",
-			        fsc->fs.h_u.raw_ip4_spec.ip4src);
+				fsc->fs.h_u.raw_ip4_spec.ip4src);
 			p += ETH_GSTRING_LEN;
 			num_strings++;
 			sprintf(p, "\tSrc IP mask: 0x%x\n",
-			        fsc->fs.m_u.raw_ip4_spec.ip4src);
+				fsc->fs.m_u.raw_ip4_spec.ip4src);
 			p += ETH_GSTRING_LEN;
 			num_strings++;
 			sprintf(p, "\tDest IP addr: 0x%x\n",
-			        fsc->fs.h_u.raw_ip4_spec.ip4dst);
+				fsc->fs.h_u.raw_ip4_spec.ip4dst);
 			p += ETH_GSTRING_LEN;
 			num_strings++;
 			sprintf(p, "\tDest IP mask: 0x%x\n",
-			        fsc->fs.m_u.raw_ip4_spec.ip4dst);
+				fsc->fs.m_u.raw_ip4_spec.ip4dst);
 			p += ETH_GSTRING_LEN;
 			num_strings++;
 			break;
 		case IPV4_FLOW:
 			sprintf(p, "\tSrc IP addr: 0x%x\n",
-			        fsc->fs.h_u.usr_ip4_spec.ip4src);
+				fsc->fs.h_u.usr_ip4_spec.ip4src);
 			p += ETH_GSTRING_LEN;
 			num_strings++;
 			sprintf(p, "\tSrc IP mask: 0x%x\n",
-			        fsc->fs.m_u.usr_ip4_spec.ip4src);
+				fsc->fs.m_u.usr_ip4_spec.ip4src);
 			p += ETH_GSTRING_LEN;
 			num_strings++;
 			sprintf(p, "\tDest IP addr: 0x%x\n",
-			        fsc->fs.h_u.usr_ip4_spec.ip4dst);
+				fsc->fs.h_u.usr_ip4_spec.ip4dst);
 			p += ETH_GSTRING_LEN;
 			num_strings++;
 			sprintf(p, "\tDest IP mask: 0x%x\n",
-			        fsc->fs.m_u.usr_ip4_spec.ip4dst);
+				fsc->fs.m_u.usr_ip4_spec.ip4dst);
 			p += ETH_GSTRING_LEN;
 			num_strings++;
 			sprintf(p, "\tL4 bytes: 0x%x, mask: 0x%x\n",
-			        fsc->fs.h_u.usr_ip4_spec.l4_4_bytes,
-			        fsc->fs.m_u.usr_ip4_spec.l4_4_bytes);
+				fsc->fs.h_u.usr_ip4_spec.l4_4_bytes,
+				fsc->fs.m_u.usr_ip4_spec.l4_4_bytes);
 			p += ETH_GSTRING_LEN;
 			num_strings++;
 			sprintf(p, "\tTOS: %d, mask: 0x%x\n",
-			        fsc->fs.h_u.usr_ip4_spec.tos,
-			        fsc->fs.m_u.usr_ip4_spec.tos);
+				fsc->fs.h_u.usr_ip4_spec.tos,
+				fsc->fs.m_u.usr_ip4_spec.tos);
 			p += ETH_GSTRING_LEN;
 			num_strings++;
 			sprintf(p, "\tIP Version: %d, mask: 0x%x\n",
-			        fsc->fs.h_u.usr_ip4_spec.ip_ver,
-			        fsc->fs.m_u.usr_ip4_spec.ip_ver);
+				fsc->fs.h_u.usr_ip4_spec.ip_ver,
+				fsc->fs.m_u.usr_ip4_spec.ip_ver);
 			p += ETH_GSTRING_LEN;
 			num_strings++;
 			sprintf(p, "\tProtocol: %d, mask: 0x%x\n",
-			        fsc->fs.h_u.usr_ip4_spec.proto,
-			        fsc->fs.m_u.usr_ip4_spec.proto);
+				fsc->fs.h_u.usr_ip4_spec.proto,
+				fsc->fs.m_u.usr_ip4_spec.proto);
 			p += ETH_GSTRING_LEN;
 			num_strings++;
 			break;
 		};
 		sprintf(p, "\tVLAN: %d, mask: 0x%x\n",
-		        fsc->fs.vlan_tag, fsc->fs.vlan_tag_mask);
+			fsc->fs.vlan_tag, fsc->fs.vlan_tag_mask);
 		p += ETH_GSTRING_LEN;
 		num_strings++;
 		sprintf(p, "\tUser-defined: 0x%Lx\n", fsc->fs.data);
@@ -645,7 +660,7 @@ static int ethtool_get_rx_ntuple(struct net_device *dev, void __user *useraddr)
 			sprintf(p, "\tAction: Drop\n");
 		else
 			sprintf(p, "\tAction: Direct to queue %d\n",
-			        fsc->fs.action);
+				fsc->fs.action);
 		p += ETH_GSTRING_LEN;
 		num_strings++;
 unknown_filter:
@@ -857,7 +872,8 @@ static int ethtool_set_eeprom(struct net_device *dev, void __user *useraddr)
 	return ret;
 }
 
-static noinline_for_stack int ethtool_get_coalesce(struct net_device *dev, void __user *useraddr)
+static noinline_for_stack int ethtool_get_coalesce(struct net_device *dev,
+						   void __user *useraddr)
 {
 	struct ethtool_coalesce coalesce = { .cmd = ETHTOOL_GCOALESCE };
 
@@ -871,7 +887,8 @@ static noinline_for_stack int ethtool_get_coalesce(struct net_device *dev, void
 	return 0;
 }
 
-static noinline_for_stack int ethtool_set_coalesce(struct net_device *dev, void __user *useraddr)
+static noinline_for_stack int ethtool_set_coalesce(struct net_device *dev,
+						   void __user *useraddr)
 {
 	struct ethtool_coalesce coalesce;
 
@@ -975,6 +992,7 @@ static int ethtool_set_tx_csum(struct net_device *dev, char __user *useraddr)
 
 	return dev->ethtool_ops->set_tx_csum(dev, edata.data);
 }
+EXPORT_SYMBOL(ethtool_op_set_tx_csum);
 
 static int ethtool_set_rx_csum(struct net_device *dev, char __user *useraddr)
 {
@@ -1046,7 +1064,7 @@ static int ethtool_get_gso(struct net_device *dev, char __user *useraddr)
 
 	edata.data = dev->features & NETIF_F_GSO;
 	if (copy_to_user(useraddr, &edata, sizeof(edata)))
-		 return -EFAULT;
+		return -EFAULT;
 	return 0;
 }
 
@@ -1069,7 +1087,7 @@ static int ethtool_get_gro(struct net_device *dev, char __user *useraddr)
 
 	edata.data = dev->features & NETIF_F_GRO;
 	if (copy_to_user(useraddr, &edata, sizeof(edata)))
-		 return -EFAULT;
+		return -EFAULT;
 	return 0;
 }
 
@@ -1281,7 +1299,8 @@ static int ethtool_set_value(struct net_device *dev, char __user *useraddr,
 	return actor(dev, edata.data);
 }
 
-static noinline_for_stack int ethtool_flash_device(struct net_device *dev, char __user *useraddr)
+static noinline_for_stack int ethtool_flash_device(struct net_device *dev,
+						   char __user *useraddr)
 {
 	struct ethtool_flash efl;
 
@@ -1310,11 +1329,11 @@ int dev_ethtool(struct net *net, struct ifreq *ifr)
 	if (!dev->ethtool_ops)
 		return -EOPNOTSUPP;
 
-	if (copy_from_user(&ethcmd, useraddr, sizeof (ethcmd)))
+	if (copy_from_user(&ethcmd, useraddr, sizeof(ethcmd)))
 		return -EFAULT;
 
 	/* Allow some commands to be done by anyone */
-	switch(ethcmd) {
+	switch (ethcmd) {
 	case ETHTOOL_GDRVINFO:
 	case ETHTOOL_GMSGLVL:
 	case ETHTOOL_GCOALESCE:
@@ -1342,10 +1361,11 @@ int dev_ethtool(struct net *net, struct ifreq *ifr)
 			return -EPERM;
 	}
 
-	if (dev->ethtool_ops->begin)
-		if ((rc = dev->ethtool_ops->begin(dev)) < 0)
+	if (dev->ethtool_ops->begin) {
+		rc = dev->ethtool_ops->begin(dev);
+		if (rc  < 0)
 			return rc;
-
+	}
 	old_features = dev->features;
 
 	switch (ethcmd) {
@@ -1535,16 +1555,3 @@ int dev_ethtool(struct net *net, struct ifreq *ifr)
 
 	return rc;
 }
-
-EXPORT_SYMBOL(ethtool_op_get_link);
-EXPORT_SYMBOL(ethtool_op_get_sg);
-EXPORT_SYMBOL(ethtool_op_get_tso);
-EXPORT_SYMBOL(ethtool_op_set_sg);
-EXPORT_SYMBOL(ethtool_op_set_tso);
-EXPORT_SYMBOL(ethtool_op_set_tx_csum);
-EXPORT_SYMBOL(ethtool_op_set_tx_hw_csum);
-EXPORT_SYMBOL(ethtool_op_set_tx_ipv6_csum);
-EXPORT_SYMBOL(ethtool_op_set_ufo);
-EXPORT_SYMBOL(ethtool_op_get_ufo);
-EXPORT_SYMBOL(ethtool_op_set_flags);
-EXPORT_SYMBOL(ethtool_op_get_flags);
-- 
cgit v1.2.2


From 3d91c1a848c812e0e66e7e57f076667822cb460e Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Thu, 8 Apr 2010 13:35:47 +0200
Subject: IPVS: fix potential stack overflow with overly long protocol names

When protocols use very long names, the sprintf calls might overflow
the on-stack buffer. No protocol in the kernel does this however.

Print the protocol name in the pr_debug statement directly to avoid
this.

Based on patch by Zhitong Wang <zhitong.wangzt@alibaba-inc.com>
Acked-by: Simon Horman <horms@verge.net.au>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 net/netfilter/ipvs/ip_vs_proto.c        | 28 ++++++++++++----------------
 net/netfilter/ipvs/ip_vs_proto_ah_esp.c | 14 ++++++--------
 2 files changed, 18 insertions(+), 24 deletions(-)

(limited to 'net')

diff --git a/net/netfilter/ipvs/ip_vs_proto.c b/net/netfilter/ipvs/ip_vs_proto.c
index 0e584553819d..27add971bb13 100644
--- a/net/netfilter/ipvs/ip_vs_proto.c
+++ b/net/netfilter/ipvs/ip_vs_proto.c
@@ -166,26 +166,24 @@ ip_vs_tcpudp_debug_packet_v4(struct ip_vs_protocol *pp,
 
 	ih = skb_header_pointer(skb, offset, sizeof(_iph), &_iph);
 	if (ih == NULL)
-		sprintf(buf, "%s TRUNCATED", pp->name);
+		sprintf(buf, "TRUNCATED");
 	else if (ih->frag_off & htons(IP_OFFSET))
-		sprintf(buf, "%s %pI4->%pI4 frag",
-			pp->name, &ih->saddr, &ih->daddr);
+		sprintf(buf, "%pI4->%pI4 frag", &ih->saddr, &ih->daddr);
 	else {
 		__be16 _ports[2], *pptr
 ;
 		pptr = skb_header_pointer(skb, offset + ih->ihl*4,
 					  sizeof(_ports), _ports);
 		if (pptr == NULL)
-			sprintf(buf, "%s TRUNCATED %pI4->%pI4",
-				pp->name, &ih->saddr, &ih->daddr);
+			sprintf(buf, "TRUNCATED %pI4->%pI4",
+				&ih->saddr, &ih->daddr);
 		else
-			sprintf(buf, "%s %pI4:%u->%pI4:%u",
-				pp->name,
+			sprintf(buf, "%pI4:%u->%pI4:%u",
 				&ih->saddr, ntohs(pptr[0]),
 				&ih->daddr, ntohs(pptr[1]));
 	}
 
-	pr_debug("%s: %s\n", msg, buf);
+	pr_debug("%s: %s %s\n", msg, pp->name, buf);
 }
 
 #ifdef CONFIG_IP_VS_IPV6
@@ -200,26 +198,24 @@ ip_vs_tcpudp_debug_packet_v6(struct ip_vs_protocol *pp,
 
 	ih = skb_header_pointer(skb, offset, sizeof(_iph), &_iph);
 	if (ih == NULL)
-		sprintf(buf, "%s TRUNCATED", pp->name);
+		sprintf(buf, "TRUNCATED");
 	else if (ih->nexthdr == IPPROTO_FRAGMENT)
-		sprintf(buf, "%s %pI6->%pI6 frag",
-			pp->name, &ih->saddr, &ih->daddr);
+		sprintf(buf, "%pI6->%pI6 frag",	&ih->saddr, &ih->daddr);
 	else {
 		__be16 _ports[2], *pptr;
 
 		pptr = skb_header_pointer(skb, offset + sizeof(struct ipv6hdr),
 					  sizeof(_ports), _ports);
 		if (pptr == NULL)
-			sprintf(buf, "%s TRUNCATED %pI6->%pI6",
-				pp->name, &ih->saddr, &ih->daddr);
+			sprintf(buf, "TRUNCATED %pI6->%pI6",
+				&ih->saddr, &ih->daddr);
 		else
-			sprintf(buf, "%s %pI6:%u->%pI6:%u",
-				pp->name,
+			sprintf(buf, "%pI6:%u->%pI6:%u",
 				&ih->saddr, ntohs(pptr[0]),
 				&ih->daddr, ntohs(pptr[1]));
 	}
 
-	pr_debug("%s: %s\n", msg, buf);
+	pr_debug("%s: %s %s\n", msg, pp->name, buf);
 }
 #endif
 
diff --git a/net/netfilter/ipvs/ip_vs_proto_ah_esp.c b/net/netfilter/ipvs/ip_vs_proto_ah_esp.c
index c30b43c36cd7..1892dfc12fdd 100644
--- a/net/netfilter/ipvs/ip_vs_proto_ah_esp.c
+++ b/net/netfilter/ipvs/ip_vs_proto_ah_esp.c
@@ -136,12 +136,11 @@ ah_esp_debug_packet_v4(struct ip_vs_protocol *pp, const struct sk_buff *skb,
 
 	ih = skb_header_pointer(skb, offset, sizeof(_iph), &_iph);
 	if (ih == NULL)
-		sprintf(buf, "%s TRUNCATED", pp->name);
+		sprintf(buf, "TRUNCATED");
 	else
-		sprintf(buf, "%s %pI4->%pI4",
-			pp->name, &ih->saddr, &ih->daddr);
+		sprintf(buf, "%pI4->%pI4", &ih->saddr, &ih->daddr);
 
-	pr_debug("%s: %s\n", msg, buf);
+	pr_debug("%s: %s %s\n", msg, pp->name, buf);
 }
 
 #ifdef CONFIG_IP_VS_IPV6
@@ -154,12 +153,11 @@ ah_esp_debug_packet_v6(struct ip_vs_protocol *pp, const struct sk_buff *skb,
 
 	ih = skb_header_pointer(skb, offset, sizeof(_iph), &_iph);
 	if (ih == NULL)
-		sprintf(buf, "%s TRUNCATED", pp->name);
+		sprintf(buf, "TRUNCATED");
 	else
-		sprintf(buf, "%s %pI6->%pI6",
-			pp->name, &ih->saddr, &ih->daddr);
+		sprintf(buf, "%pI6->%pI6", &ih->saddr, &ih->daddr);
 
-	pr_debug("%s: %s\n", msg, buf);
+	pr_debug("%s: %s %s\n", msg, pp->name, buf);
 }
 #endif
 
-- 
cgit v1.2.2


From 9e56c21486f2a64473f36fa49475fd253422fbf6 Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Thu, 8 Apr 2010 14:52:28 +0200
Subject: netfilter: only do skb_checksum_help on CHECKSUM_PARTIAL in ip_queue

While doing yet another audit on ip_summed I noticed ip_queue
calling skb_checksum_help unnecessarily.  As we will set ip_summed
to CHECKSUM_NONE when necessary in ipq_mangle_ipv4, there is no
need to zap CHECKSUM_COMPLETE in ipq_build_packet_message.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 net/ipv4/netfilter/ip_queue.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/netfilter/ip_queue.c b/net/ipv4/netfilter/ip_queue.c
index 2855f1f38cbc..d781513282d4 100644
--- a/net/ipv4/netfilter/ip_queue.c
+++ b/net/ipv4/netfilter/ip_queue.c
@@ -160,8 +160,7 @@ ipq_build_packet_message(struct nf_queue_entry *entry, int *errp)
 		break;
 
 	case IPQ_COPY_PACKET:
-		if ((entry->skb->ip_summed == CHECKSUM_PARTIAL ||
-		     entry->skb->ip_summed == CHECKSUM_COMPLETE) &&
+		if (entry->skb->ip_summed == CHECKSUM_PARTIAL &&
 		    (*errp = skb_checksum_help(entry->skb))) {
 			read_unlock_bh(&queue_lock);
 			return NULL;
-- 
cgit v1.2.2


From 5dd59cc99131fb4a2775c00928b0f24b9e192a76 Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Thu, 8 Apr 2010 14:53:40 +0200
Subject: netfilter: only do skb_checksum_help on CHECKSUM_PARTIAL in ip6_queue

As we will set ip_summed to CHECKSUM_NONE when necessary in
ipq_mangle_ipv6, there is no need to zap CHECKSUM_COMPLETE in
ipq_build_packet_message.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 net/ipv6/netfilter/ip6_queue.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/ipv6/netfilter/ip6_queue.c b/net/ipv6/netfilter/ip6_queue.c
index 7854052be60b..39856a25189c 100644
--- a/net/ipv6/netfilter/ip6_queue.c
+++ b/net/ipv6/netfilter/ip6_queue.c
@@ -161,8 +161,7 @@ ipq_build_packet_message(struct nf_queue_entry *entry, int *errp)
 		break;
 
 	case IPQ_COPY_PACKET:
-		if ((entry->skb->ip_summed == CHECKSUM_PARTIAL ||
-		     entry->skb->ip_summed == CHECKSUM_COMPLETE) &&
+		if (entry->skb->ip_summed == CHECKSUM_PARTIAL &&
 		    (*errp = skb_checksum_help(entry->skb))) {
 			read_unlock_bh(&queue_lock);
 			return NULL;
-- 
cgit v1.2.2


From e9f13cab49f7f28b65a6f63201fca56480b2e059 Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Thu, 8 Apr 2010 14:54:35 +0200
Subject: netfilter: only do skb_checksum_help on CHECKSUM_PARTIAL in
 nfnetlink_queue

As we will set ip_summed to CHECKSUM_NONE when necessary in
nfqnl_mangle, there is no need to zap CHECKSUM_COMPLETE in
nfqnl_build_packet_message.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 net/netfilter/nfnetlink_queue.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c
index 7ba4abc405c9..08c1a33077a0 100644
--- a/net/netfilter/nfnetlink_queue.c
+++ b/net/netfilter/nfnetlink_queue.c
@@ -245,8 +245,7 @@ nfqnl_build_packet_message(struct nfqnl_instance *queue,
 		break;
 
 	case NFQNL_COPY_PACKET:
-		if ((entskb->ip_summed == CHECKSUM_PARTIAL ||
-		     entskb->ip_summed == CHECKSUM_COMPLETE) &&
+		if (entskb->ip_summed == CHECKSUM_PARTIAL &&
 		    skb_checksum_help(entskb)) {
 			spin_unlock_bh(&queue->lock);
 			return NULL;
-- 
cgit v1.2.2


From 1223c67c0938d2df309fde618bd82c87c8c1af04 Mon Sep 17 00:00:00 2001
From: "Jorge Boncompte [DTI2]" <jorge@dti2.net>
Date: Thu, 8 Apr 2010 04:56:48 +0000
Subject: udp: fix for unicast RX path optimization

Commits 5051ebd275de672b807c28d93002c2fb0514a3c9 and
5051ebd275de672b807c28d93002c2fb0514a3c9 ("ipv[46]: udp: optimize unicast RX
path") broke some programs.

	After upgrading a L2TP server to 2.6.33 it started to fail, tunnels going up an
down, after the 10th tunnel came up. My modified rp-l2tp uses a global
unconnected socket bound to (INADDR_ANY, 1701) and one connected socket per
tunnel after parameter negotiation.

	After ten sockets were open and due to mixed parameters to
udp[46]_lib_lookup2() kernel started to drop packets.

Signed-off-by: Jorge Boncompte [DTI2] <jorge@dti2.net>
Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/udp.c | 4 ++--
 net/ipv6/udp.c | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 7af756d0f931..24272c4cfbca 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -471,8 +471,8 @@ static struct sock *__udp4_lib_lookup(struct net *net, __be32 saddr,
 			if (hslot->count < hslot2->count)
 				goto begin;
 
-			result = udp4_lib_lookup2(net, INADDR_ANY, sport,
-						  daddr, hnum, dif,
+			result = udp4_lib_lookup2(net, saddr, sport,
+						  INADDR_ANY, hnum, dif,
 						  hslot2, slot2);
 		}
 		rcu_read_unlock();
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index 3c0c9c755c92..787e480cc096 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -258,8 +258,8 @@ static struct sock *__udp6_lib_lookup(struct net *net,
 			if (hslot->count < hslot2->count)
 				goto begin;
 
-			result = udp6_lib_lookup2(net, &in6addr_any, sport,
-						  daddr, hnum, dif,
+			result = udp6_lib_lookup2(net, saddr, sport,
+						  &in6addr_any, hnum, dif,
 						  hslot2, slot2);
 		}
 		rcu_read_unlock();
-- 
cgit v1.2.2


From 2626419ad5be1a054d350786b684b41d23de1538 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Thu, 8 Apr 2010 11:32:30 -0700
Subject: tcp: Set CHECKSUM_UNNECESSARY in tcp_init_nondata_skb

Back in commit 04a0551c87363f100b04d28d7a15a632b70e18e7
("loopback: Drop obsolete ip_summed setting") we stopped
setting CHECKSUM_UNNECESSARY in the loopback xmit.

This is because such a setting was a lie since it implies that the
checksum field of the packet is properly filled in.

Instead what happens normally is that CHECKSUM_PARTIAL is set and
skb->csum is calculated as needed.

But this was only happening for TCP data packets (via the
skb->ip_summed assignment done in tcp_sendmsg()).  It doesn't
happen for non-data packets like ACKs etc.

Fix this by setting skb->ip_summed in the common non-data packet
constructor.  It already is setting skb->csum to zero.

But this reminds us that we still have things like ip_output.c's
ip_dev_loopback_xmit() which sets skb->ip_summed to the value
CHECKSUM_UNNECESSARY, which Herbert's patch teaches us is not
valid.  So we'll have to address that at some point too.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp_output.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'net')

diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index f181b78f2385..00afbb0c7e5f 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -349,6 +349,7 @@ static inline void TCP_ECN_send(struct sock *sk, struct sk_buff *skb,
  */
 static void tcp_init_nondata_skb(struct sk_buff *skb, u32 seq, u8 flags)
 {
+	skb->ip_summed = CHECKSUM_PARTIAL;
 	skb->csum = 0;
 
 	TCP_SKB_CB(skb)->flags = flags;
-- 
cgit v1.2.2


From 97ad9139fd68b5c71f44d28d3f9788d89cfd4916 Mon Sep 17 00:00:00 2001
From: Javier Cardona <javier@cozybit.com>
Date: Mon, 29 Mar 2010 11:00:21 -0700
Subject: mac80211: Moved mesh action codes to a more visible location

Grouped mesh action codes together with the other action codes in
ieee80211.h.

Signed-off-by: Javier Cardona <javier@cozybit.com>
Reviewed-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/mesh.c       | 4 ++--
 net/mac80211/mesh.h       | 2 --
 net/mac80211/mesh_hwmp.c  | 4 ++--
 net/mac80211/mesh_plink.c | 2 +-
 net/mac80211/rx.c         | 6 +++---
 5 files changed, 8 insertions(+), 10 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/mesh.c b/net/mac80211/mesh.c
index 7a6bebce7f2f..2669fbf8c812 100644
--- a/net/mac80211/mesh.c
+++ b/net/mac80211/mesh.c
@@ -600,10 +600,10 @@ static void ieee80211_mesh_rx_mgmt_action(struct ieee80211_sub_if_data *sdata,
 					  struct ieee80211_rx_status *rx_status)
 {
 	switch (mgmt->u.action.category) {
-	case MESH_PLINK_CATEGORY:
+	case WLAN_CATEGORY_MESH_PLINK:
 		mesh_rx_plink_frame(sdata, mgmt, len, rx_status);
 		break;
-	case MESH_PATH_SEL_CATEGORY:
+	case WLAN_CATEGORY_MESH_PATH_SEL:
 		mesh_rx_path_sel_frame(sdata, mgmt, len);
 		break;
 	}
diff --git a/net/mac80211/mesh.h b/net/mac80211/mesh.h
index 85562c59d7d6..c88087f1cd0f 100644
--- a/net/mac80211/mesh.h
+++ b/net/mac80211/mesh.h
@@ -209,8 +209,6 @@ struct mesh_rmc {
 #define MESH_MAX_MPATHS		1024
 
 /* Pending ANA approval */
-#define MESH_PLINK_CATEGORY	30
-#define MESH_PATH_SEL_CATEGORY	32
 #define MESH_PATH_SEL_ACTION	0
 
 /* PERR reason codes */
diff --git a/net/mac80211/mesh_hwmp.c b/net/mac80211/mesh_hwmp.c
index ccff6133e19a..36141d6e701b 100644
--- a/net/mac80211/mesh_hwmp.c
+++ b/net/mac80211/mesh_hwmp.c
@@ -131,7 +131,7 @@ static int mesh_path_sel_frame_tx(enum mpath_frame_type action, u8 flags,
 	memcpy(mgmt->sa, sdata->vif.addr, ETH_ALEN);
 	/* BSSID == SA */
 	memcpy(mgmt->bssid, sdata->vif.addr, ETH_ALEN);
-	mgmt->u.action.category = MESH_PATH_SEL_CATEGORY;
+	mgmt->u.action.category = WLAN_CATEGORY_MESH_PATH_SEL;
 	mgmt->u.action.u.mesh_action.action_code = MESH_PATH_SEL_ACTION;
 
 	switch (action) {
@@ -224,7 +224,7 @@ int mesh_path_error_tx(u8 ttl, u8 *target, __le32 target_sn,
 	memcpy(mgmt->da, ra, ETH_ALEN);
 	memcpy(mgmt->sa, sdata->vif.addr, ETH_ALEN);
 	/* BSSID is left zeroed, wildcard value */
-	mgmt->u.action.category = MESH_PATH_SEL_CATEGORY;
+	mgmt->u.action.category = WLAN_CATEGORY_MESH_PATH_SEL;
 	mgmt->u.action.u.mesh_action.action_code = MESH_PATH_SEL_ACTION;
 	ie_len = 15;
 	pos = skb_put(skb, 2 + ie_len);
diff --git a/net/mac80211/mesh_plink.c b/net/mac80211/mesh_plink.c
index bc4e20e57ff5..c384154ac895 100644
--- a/net/mac80211/mesh_plink.c
+++ b/net/mac80211/mesh_plink.c
@@ -171,7 +171,7 @@ static int mesh_plink_frame_tx(struct ieee80211_sub_if_data *sdata,
 	memcpy(mgmt->da, da, ETH_ALEN);
 	memcpy(mgmt->sa, sdata->vif.addr, ETH_ALEN);
 	/* BSSID is left zeroed, wildcard value */
-	mgmt->u.action.category = MESH_PLINK_CATEGORY;
+	mgmt->u.action.category = WLAN_CATEGORY_MESH_PLINK;
 	mgmt->u.action.u.plink_action.action_code = action;
 
 	if (action == PLINK_CLOSE)
diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c
index c0ad7e879a6e..d08ede44ac7e 100644
--- a/net/mac80211/rx.c
+++ b/net/mac80211/rx.c
@@ -490,7 +490,7 @@ ieee80211_rx_mesh_check(struct ieee80211_rx_data *rx)
 
 		if (ieee80211_is_action(hdr->frame_control)) {
 			mgmt = (struct ieee80211_mgmt *)hdr;
-			if (mgmt->u.action.category != MESH_PLINK_CATEGORY)
+			if (mgmt->u.action.category != WLAN_CATEGORY_MESH_PLINK)
 				return RX_DROP_MONITOR;
 			return RX_CONTINUE;
 		}
@@ -1994,8 +1994,8 @@ ieee80211_rx_h_action(struct ieee80211_rx_data *rx)
 			goto handled;
 		}
 		break;
-	case MESH_PLINK_CATEGORY:
-	case MESH_PATH_SEL_CATEGORY:
+	case WLAN_CATEGORY_MESH_PLINK:
+	case WLAN_CATEGORY_MESH_PATH_SEL:
 		if (ieee80211_vif_is_mesh(&sdata->vif))
 			return ieee80211_mesh_rx_mgmt(sdata, rx->skb);
 		break;
-- 
cgit v1.2.2


From b5878a2dc5e7e7f031a52c3e15b571224cb6b540 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes@sipsolutions.net>
Date: Wed, 7 Apr 2010 16:48:40 +0200
Subject: mac80211: enhance tracing

Enhance tracing by adding tracing for a variety of
callbacks that the drivers call, and also for
internal calls (currently limited to queue status).
This can aid debugging what is going on in mac80211
in interaction with drivers, since we can now see
what drivers call and not just what mac80211 calls
in the driver.

Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/Kconfig        |   8 +-
 net/mac80211/agg-tx.c       |   8 ++
 net/mac80211/driver-trace.h | 275 ++++++++++++++++++++++++++++++++++++++++++++
 net/mac80211/main.c         |   2 +
 net/mac80211/mlme.c         |   6 +
 net/mac80211/scan.c         |   2 +
 net/mac80211/sta_info.c     |   2 +
 net/mac80211/util.c         |   4 +
 8 files changed, 303 insertions(+), 4 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/Kconfig b/net/mac80211/Kconfig
index 334c359da5e8..8a91f6c0bb18 100644
--- a/net/mac80211/Kconfig
+++ b/net/mac80211/Kconfig
@@ -221,8 +221,8 @@ config MAC80211_DRIVER_API_TRACER
 	depends on EVENT_TRACING
 	help
 	  Say Y here to make mac80211 register with the ftrace
-	  framework for the driver API -- you can see which
-	  driver methods it is calling then by looking at the
-	  trace.
+	  framework for the driver API -- you can then see which
+	  driver methods it is calling and which API functions
+	  drivers are calling by looking at the trace.
 
-	  If unsure, say N.
+	  If unsure, say Y.
diff --git a/net/mac80211/agg-tx.c b/net/mac80211/agg-tx.c
index 32d2148b5b98..6bb4d0a1e5c5 100644
--- a/net/mac80211/agg-tx.c
+++ b/net/mac80211/agg-tx.c
@@ -214,6 +214,8 @@ int ieee80211_start_tx_ba_session(struct ieee80211_sta *pubsta, u16 tid)
 	int ret = 0;
 	u16 start_seq_num;
 
+	trace_api_start_tx_ba_session(pubsta, tid);
+
 	if (WARN_ON(!local->ops->ampdu_action))
 		return -EINVAL;
 
@@ -440,6 +442,8 @@ void ieee80211_start_tx_ba_cb(struct ieee80211_vif *vif, u8 *ra, u16 tid)
 	struct sta_info *sta;
 	u8 *state;
 
+	trace_api_start_tx_ba_cb(sdata, ra, tid);
+
 	if (tid >= STA_TID_NUM) {
 #ifdef CONFIG_MAC80211_HT_DEBUG
 		printk(KERN_DEBUG "Bad TID value: tid = %d (>= %d)\n",
@@ -541,6 +545,8 @@ int ieee80211_stop_tx_ba_session(struct ieee80211_sta *pubsta, u16 tid,
 	struct ieee80211_sub_if_data *sdata = sta->sdata;
 	struct ieee80211_local *local = sdata->local;
 
+	trace_api_stop_tx_ba_session(pubsta, tid, initiator);
+
 	if (!local->ops->ampdu_action)
 		return -EINVAL;
 
@@ -558,6 +564,8 @@ void ieee80211_stop_tx_ba_cb(struct ieee80211_vif *vif, u8 *ra, u8 tid)
 	struct sta_info *sta;
 	u8 *state;
 
+	trace_api_stop_tx_ba_cb(sdata, ra, tid);
+
 	if (tid >= STA_TID_NUM) {
 #ifdef CONFIG_MAC80211_HT_DEBUG
 		printk(KERN_DEBUG "Bad TID value: tid = %d (>= %d)\n",
diff --git a/net/mac80211/driver-trace.h b/net/mac80211/driver-trace.h
index 41baf730a5c7..e209cb82ff29 100644
--- a/net/mac80211/driver-trace.h
+++ b/net/mac80211/driver-trace.h
@@ -32,6 +32,10 @@ static inline void trace_ ## name(proto) {}
 #define VIF_PR_FMT	" vif:%s(%d)"
 #define VIF_PR_ARG	__get_str(vif_name), __entry->vif_type
 
+/*
+ * Tracing for driver callbacks.
+ */
+
 TRACE_EVENT(drv_start,
 	TP_PROTO(struct ieee80211_local *local, int ret),
 
@@ -766,6 +770,277 @@ TRACE_EVENT(drv_flush,
 		LOCAL_PR_ARG, __entry->drop
 	)
 );
+
+/*
+ * Tracing for API calls that drivers call.
+ */
+
+TRACE_EVENT(api_start_tx_ba_session,
+	TP_PROTO(struct ieee80211_sta *sta, u16 tid),
+
+	TP_ARGS(sta, tid),
+
+	TP_STRUCT__entry(
+		STA_ENTRY
+		__field(u16, tid)
+	),
+
+	TP_fast_assign(
+		STA_ASSIGN;
+		__entry->tid = tid;
+	),
+
+	TP_printk(
+		STA_PR_FMT " tid:%d",
+		STA_PR_ARG, __entry->tid
+	)
+);
+
+TRACE_EVENT(api_start_tx_ba_cb,
+	TP_PROTO(struct ieee80211_sub_if_data *sdata, const u8 *ra, u16 tid),
+
+	TP_ARGS(sdata, ra, tid),
+
+	TP_STRUCT__entry(
+		VIF_ENTRY
+		__array(u8, ra, ETH_ALEN)
+		__field(u16, tid)
+	),
+
+	TP_fast_assign(
+		VIF_ASSIGN;
+		memcpy(__entry->ra, ra, ETH_ALEN);
+		__entry->tid = tid;
+	),
+
+	TP_printk(
+		VIF_PR_FMT " ra:%pM tid:%d",
+		VIF_PR_ARG, __entry->ra, __entry->tid
+	)
+);
+
+TRACE_EVENT(api_stop_tx_ba_session,
+	TP_PROTO(struct ieee80211_sta *sta, u16 tid, u16 initiator),
+
+	TP_ARGS(sta, tid, initiator),
+
+	TP_STRUCT__entry(
+		STA_ENTRY
+		__field(u16, tid)
+		__field(u16, initiator)
+	),
+
+	TP_fast_assign(
+		STA_ASSIGN;
+		__entry->tid = tid;
+		__entry->initiator = initiator;
+	),
+
+	TP_printk(
+		STA_PR_FMT " tid:%d initiator:%d",
+		STA_PR_ARG, __entry->tid, __entry->initiator
+	)
+);
+
+TRACE_EVENT(api_stop_tx_ba_cb,
+	TP_PROTO(struct ieee80211_sub_if_data *sdata, const u8 *ra, u16 tid),
+
+	TP_ARGS(sdata, ra, tid),
+
+	TP_STRUCT__entry(
+		VIF_ENTRY
+		__array(u8, ra, ETH_ALEN)
+		__field(u16, tid)
+	),
+
+	TP_fast_assign(
+		VIF_ASSIGN;
+		memcpy(__entry->ra, ra, ETH_ALEN);
+		__entry->tid = tid;
+	),
+
+	TP_printk(
+		VIF_PR_FMT " ra:%pM tid:%d",
+		VIF_PR_ARG, __entry->ra, __entry->tid
+	)
+);
+
+TRACE_EVENT(api_restart_hw,
+	TP_PROTO(struct ieee80211_local *local),
+
+	TP_ARGS(local),
+
+	TP_STRUCT__entry(
+		LOCAL_ENTRY
+	),
+
+	TP_fast_assign(
+		LOCAL_ASSIGN;
+	),
+
+	TP_printk(
+		LOCAL_PR_FMT,
+		LOCAL_PR_ARG
+	)
+);
+
+TRACE_EVENT(api_beacon_loss,
+	TP_PROTO(struct ieee80211_sub_if_data *sdata),
+
+	TP_ARGS(sdata),
+
+	TP_STRUCT__entry(
+		VIF_ENTRY
+	),
+
+	TP_fast_assign(
+		VIF_ASSIGN;
+	),
+
+	TP_printk(
+		VIF_PR_FMT,
+		VIF_PR_ARG
+	)
+);
+
+TRACE_EVENT(api_connection_loss,
+	TP_PROTO(struct ieee80211_sub_if_data *sdata),
+
+	TP_ARGS(sdata),
+
+	TP_STRUCT__entry(
+		VIF_ENTRY
+	),
+
+	TP_fast_assign(
+		VIF_ASSIGN;
+	),
+
+	TP_printk(
+		VIF_PR_FMT,
+		VIF_PR_ARG
+	)
+);
+
+TRACE_EVENT(api_cqm_rssi_notify,
+	TP_PROTO(struct ieee80211_sub_if_data *sdata,
+		 enum nl80211_cqm_rssi_threshold_event rssi_event),
+
+	TP_ARGS(sdata, rssi_event),
+
+	TP_STRUCT__entry(
+		VIF_ENTRY
+		__field(u32, rssi_event)
+	),
+
+	TP_fast_assign(
+		VIF_ASSIGN;
+		__entry->rssi_event = rssi_event;
+	),
+
+	TP_printk(
+		VIF_PR_FMT " event:%d",
+		VIF_PR_ARG, __entry->rssi_event
+	)
+);
+
+TRACE_EVENT(api_scan_completed,
+	TP_PROTO(struct ieee80211_local *local, bool aborted),
+
+	TP_ARGS(local, aborted),
+
+	TP_STRUCT__entry(
+		LOCAL_ENTRY
+		__field(bool, aborted)
+	),
+
+	TP_fast_assign(
+		LOCAL_ASSIGN;
+		__entry->aborted = aborted;
+	),
+
+	TP_printk(
+		LOCAL_PR_FMT " aborted:%d",
+		LOCAL_PR_ARG, __entry->aborted
+	)
+);
+
+TRACE_EVENT(api_sta_block_awake,
+	TP_PROTO(struct ieee80211_local *local,
+		 struct ieee80211_sta *sta, bool block),
+
+	TP_ARGS(local, sta, block),
+
+	TP_STRUCT__entry(
+		LOCAL_ENTRY
+		STA_ENTRY
+		__field(bool, block)
+	),
+
+	TP_fast_assign(
+		LOCAL_ASSIGN;
+		STA_ASSIGN;
+		__entry->block = block;
+	),
+
+	TP_printk(
+		LOCAL_PR_FMT STA_PR_FMT " block:%d",
+		LOCAL_PR_ARG, STA_PR_FMT, __entry->block
+	)
+);
+
+/*
+ * Tracing for internal functions
+ * (which may also be called in response to driver calls)
+ */
+
+TRACE_EVENT(wake_queue,
+	TP_PROTO(struct ieee80211_local *local, u16 queue,
+		 enum queue_stop_reason reason),
+
+	TP_ARGS(local, queue, reason),
+
+	TP_STRUCT__entry(
+		LOCAL_ENTRY
+		__field(u16, queue)
+		__field(u32, reason)
+	),
+
+	TP_fast_assign(
+		LOCAL_ASSIGN;
+		__entry->queue = queue;
+		__entry->reason = reason;
+	),
+
+	TP_printk(
+		LOCAL_PR_FMT " queue:%d, reason:%d",
+		LOCAL_PR_ARG, __entry->queue, __entry->reason
+	)
+);
+
+TRACE_EVENT(stop_queue,
+	TP_PROTO(struct ieee80211_local *local, u16 queue,
+		 enum queue_stop_reason reason),
+
+	TP_ARGS(local, queue, reason),
+
+	TP_STRUCT__entry(
+		LOCAL_ENTRY
+		__field(u16, queue)
+		__field(u32, reason)
+	),
+
+	TP_fast_assign(
+		LOCAL_ASSIGN;
+		__entry->queue = queue;
+		__entry->reason = reason;
+	),
+
+	TP_printk(
+		LOCAL_PR_FMT " queue:%d, reason:%d",
+		LOCAL_PR_ARG, __entry->queue, __entry->reason
+	)
+);
 #endif /* !__MAC80211_DRIVER_TRACE || TRACE_HEADER_MULTI_READ */
 
 #undef TRACE_INCLUDE_PATH
diff --git a/net/mac80211/main.c b/net/mac80211/main.c
index b887e484ae04..4afe851cf8dc 100644
--- a/net/mac80211/main.c
+++ b/net/mac80211/main.c
@@ -309,6 +309,8 @@ void ieee80211_restart_hw(struct ieee80211_hw *hw)
 {
 	struct ieee80211_local *local = hw_to_local(hw);
 
+	trace_api_restart_hw(local);
+
 	/* use this reason, __ieee80211_resume will unblock it */
 	ieee80211_stop_queues_by_reason(hw,
 		IEEE80211_QUEUE_STOP_REASON_SUSPEND);
diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index 461167dfa42c..d11a54c289a2 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -1007,6 +1007,8 @@ void ieee80211_beacon_loss(struct ieee80211_vif *vif)
 	struct ieee80211_sub_if_data *sdata = vif_to_sdata(vif);
 	struct ieee80211_hw *hw = &sdata->local->hw;
 
+	trace_api_beacon_loss(sdata);
+
 	WARN_ON(hw->flags & IEEE80211_HW_CONNECTION_MONITOR);
 	ieee80211_queue_work(hw, &sdata->u.mgd.beacon_connection_loss_work);
 }
@@ -1017,6 +1019,8 @@ void ieee80211_connection_loss(struct ieee80211_vif *vif)
 	struct ieee80211_sub_if_data *sdata = vif_to_sdata(vif);
 	struct ieee80211_hw *hw = &sdata->local->hw;
 
+	trace_api_connection_loss(sdata);
+
 	WARN_ON(!(hw->flags & IEEE80211_HW_CONNECTION_MONITOR));
 	ieee80211_queue_work(hw, &sdata->u.mgd.beacon_connection_loss_work);
 }
@@ -2261,6 +2265,8 @@ void ieee80211_cqm_rssi_notify(struct ieee80211_vif *vif,
 {
 	struct ieee80211_sub_if_data *sdata = vif_to_sdata(vif);
 
+	trace_api_cqm_rssi_notify(sdata, rssi_event);
+
 	cfg80211_cqm_rssi_notify(sdata->dev, rssi_event, gfp);
 }
 EXPORT_SYMBOL(ieee80211_cqm_rssi_notify);
diff --git a/net/mac80211/scan.c b/net/mac80211/scan.c
index 75a85978c3b3..eb86a5f6e645 100644
--- a/net/mac80211/scan.c
+++ b/net/mac80211/scan.c
@@ -247,6 +247,8 @@ void ieee80211_scan_completed(struct ieee80211_hw *hw, bool aborted)
 	struct ieee80211_local *local = hw_to_local(hw);
 	bool was_hw_scan;
 
+	trace_api_scan_completed(local, aborted);
+
 	mutex_lock(&local->scan_mtx);
 
 	/*
diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c
index 4de987cbda1c..ff0eb948917b 100644
--- a/net/mac80211/sta_info.c
+++ b/net/mac80211/sta_info.c
@@ -957,6 +957,8 @@ void ieee80211_sta_block_awake(struct ieee80211_hw *hw,
 {
 	struct sta_info *sta = container_of(pubsta, struct sta_info, sta);
 
+	trace_api_sta_block_awake(sta->local, pubsta, block);
+
 	if (block)
 		set_sta_flags(sta, WLAN_STA_PS_DRIVER);
 	else
diff --git a/net/mac80211/util.c b/net/mac80211/util.c
index ad9009f717ed..2b75b4fb68f4 100644
--- a/net/mac80211/util.c
+++ b/net/mac80211/util.c
@@ -270,6 +270,8 @@ static void __ieee80211_wake_queue(struct ieee80211_hw *hw, int queue,
 	struct ieee80211_local *local = hw_to_local(hw);
 	struct ieee80211_sub_if_data *sdata;
 
+	trace_wake_queue(local, queue, reason);
+
 	if (WARN_ON(queue >= hw->queues))
 		return;
 
@@ -312,6 +314,8 @@ static void __ieee80211_stop_queue(struct ieee80211_hw *hw, int queue,
 	struct ieee80211_local *local = hw_to_local(hw);
 	struct ieee80211_sub_if_data *sdata;
 
+	trace_stop_queue(local, queue, reason);
+
 	if (WARN_ON(queue >= hw->queues))
 		return;
 
-- 
cgit v1.2.2


From ed86308f6179d8fa6151c2d0f652aad0091548e2 Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Fri, 9 Apr 2010 16:42:15 +0200
Subject: netfilter: remove invalid rcu_dereference() calls

The CONFIG_PROVE_RCU option discovered a few invalid uses of
rcu_dereference() in netfilter. In all these cases, the code code
intends to check whether a pointer is already assigned when
performing registration or whether the assigned pointer matches
when performing unregistration. The entire registration/
unregistration is protected by a mutex, so we don't need the
rcu_dereference() calls.

Reported-by: Valdis Kletnieks <Valdis.Kletnieks@vt.edu>
Tested-by: Valdis Kletnieks <Valdis.Kletnieks@vt.edu>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 net/netfilter/nf_conntrack_ecache.c | 18 ++++--------------
 net/netfilter/nf_log.c              |  8 ++------
 2 files changed, 6 insertions(+), 20 deletions(-)

(limited to 'net')

diff --git a/net/netfilter/nf_conntrack_ecache.c b/net/netfilter/nf_conntrack_ecache.c
index d5a9bcd7d61b..849614af2322 100644
--- a/net/netfilter/nf_conntrack_ecache.c
+++ b/net/netfilter/nf_conntrack_ecache.c
@@ -81,11 +81,9 @@ EXPORT_SYMBOL_GPL(nf_ct_deliver_cached_events);
 int nf_conntrack_register_notifier(struct nf_ct_event_notifier *new)
 {
 	int ret = 0;
-	struct nf_ct_event_notifier *notify;
 
 	mutex_lock(&nf_ct_ecache_mutex);
-	notify = rcu_dereference(nf_conntrack_event_cb);
-	if (notify != NULL) {
+	if (nf_conntrack_event_cb != NULL) {
 		ret = -EBUSY;
 		goto out_unlock;
 	}
@@ -101,11 +99,8 @@ EXPORT_SYMBOL_GPL(nf_conntrack_register_notifier);
 
 void nf_conntrack_unregister_notifier(struct nf_ct_event_notifier *new)
 {
-	struct nf_ct_event_notifier *notify;
-
 	mutex_lock(&nf_ct_ecache_mutex);
-	notify = rcu_dereference(nf_conntrack_event_cb);
-	BUG_ON(notify != new);
+	BUG_ON(nf_conntrack_event_cb != new);
 	rcu_assign_pointer(nf_conntrack_event_cb, NULL);
 	mutex_unlock(&nf_ct_ecache_mutex);
 }
@@ -114,11 +109,9 @@ EXPORT_SYMBOL_GPL(nf_conntrack_unregister_notifier);
 int nf_ct_expect_register_notifier(struct nf_exp_event_notifier *new)
 {
 	int ret = 0;
-	struct nf_exp_event_notifier *notify;
 
 	mutex_lock(&nf_ct_ecache_mutex);
-	notify = rcu_dereference(nf_expect_event_cb);
-	if (notify != NULL) {
+	if (nf_expect_event_cb != NULL) {
 		ret = -EBUSY;
 		goto out_unlock;
 	}
@@ -134,11 +127,8 @@ EXPORT_SYMBOL_GPL(nf_ct_expect_register_notifier);
 
 void nf_ct_expect_unregister_notifier(struct nf_exp_event_notifier *new)
 {
-	struct nf_exp_event_notifier *notify;
-
 	mutex_lock(&nf_ct_ecache_mutex);
-	notify = rcu_dereference(nf_expect_event_cb);
-	BUG_ON(notify != new);
+	BUG_ON(nf_expect_event_cb != new);
 	rcu_assign_pointer(nf_expect_event_cb, NULL);
 	mutex_unlock(&nf_ct_ecache_mutex);
 }
diff --git a/net/netfilter/nf_log.c b/net/netfilter/nf_log.c
index 015725a5cd50..908f59935fbb 100644
--- a/net/netfilter/nf_log.c
+++ b/net/netfilter/nf_log.c
@@ -35,7 +35,6 @@ static struct nf_logger *__find_logger(int pf, const char *str_logger)
 /* return EEXIST if the same logger is registred, 0 on success. */
 int nf_log_register(u_int8_t pf, struct nf_logger *logger)
 {
-	const struct nf_logger *llog;
 	int i;
 
 	if (pf >= ARRAY_SIZE(nf_loggers))
@@ -52,8 +51,7 @@ int nf_log_register(u_int8_t pf, struct nf_logger *logger)
 	} else {
 		/* register at end of list to honor first register win */
 		list_add_tail(&logger->list[pf], &nf_loggers_l[pf]);
-		llog = rcu_dereference(nf_loggers[pf]);
-		if (llog == NULL)
+		if (nf_loggers[pf] == NULL)
 			rcu_assign_pointer(nf_loggers[pf], logger);
 	}
 
@@ -65,13 +63,11 @@ EXPORT_SYMBOL(nf_log_register);
 
 void nf_log_unregister(struct nf_logger *logger)
 {
-	const struct nf_logger *c_logger;
 	int i;
 
 	mutex_lock(&nf_log_mutex);
 	for (i = 0; i < ARRAY_SIZE(nf_loggers); i++) {
-		c_logger = rcu_dereference(nf_loggers[i]);
-		if (c_logger == logger)
+		if (nf_loggers[i] == logger)
 			rcu_assign_pointer(nf_loggers[i], NULL);
 		list_del(&logger->list[i]);
 	}
-- 
cgit v1.2.2


From b660d0485efeff743c72f1be2185832de8477a24 Mon Sep 17 00:00:00 2001
From: Adam Nielsen <a.nielsen@shikadi.net>
Date: Fri, 9 Apr 2010 16:51:40 +0200
Subject: netfilter: xt_LED: add refcounts to LED target

Add reference counting to the netfilter LED target, to fix errors when
multiple rules point to the same target ("LED trigger already exists").

Signed-off-by: Adam Nielsen <a.nielsen@shikadi.net>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 net/netfilter/xt_LED.c | 69 +++++++++++++++++++++++++++++++++++++++++++++-----
 1 file changed, 63 insertions(+), 6 deletions(-)

(limited to 'net')

diff --git a/net/netfilter/xt_LED.c b/net/netfilter/xt_LED.c
index efcf56db23e8..bd102c77d1f0 100644
--- a/net/netfilter/xt_LED.c
+++ b/net/netfilter/xt_LED.c
@@ -31,12 +31,18 @@ MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Adam Nielsen <a.nielsen@shikadi.net>");
 MODULE_DESCRIPTION("Xtables: trigger LED devices on packet match");
 
+static LIST_HEAD(xt_led_triggers);
+static DEFINE_MUTEX(xt_led_mutex);
+
 /*
  * This is declared in here (the kernel module) only, to avoid having these
  * dependencies in userspace code.  This is what xt_led_info.internal_data
  * points to.
  */
 struct xt_led_info_internal {
+	struct list_head list;
+	int refcnt;
+	char *trigger_id;
 	struct led_trigger netfilter_led_trigger;
 	struct timer_list timer;
 };
@@ -53,7 +59,7 @@ led_tg(struct sk_buff *skb, const struct xt_target_param *par)
 	 */
 	if ((ledinfo->delay > 0) && ledinfo->always_blink &&
 	    timer_pending(&ledinternal->timer))
-		led_trigger_event(&ledinternal->netfilter_led_trigger,LED_OFF);
+		led_trigger_event(&ledinternal->netfilter_led_trigger, LED_OFF);
 
 	led_trigger_event(&ledinternal->netfilter_led_trigger, LED_FULL);
 
@@ -74,12 +80,23 @@ led_tg(struct sk_buff *skb, const struct xt_target_param *par)
 
 static void led_timeout_callback(unsigned long data)
 {
-	struct xt_led_info *ledinfo = (struct xt_led_info *)data;
-	struct xt_led_info_internal *ledinternal = ledinfo->internal_data;
+	struct xt_led_info_internal *ledinternal = (struct xt_led_info_internal *)data;
 
 	led_trigger_event(&ledinternal->netfilter_led_trigger, LED_OFF);
 }
 
+static struct xt_led_info_internal *led_trigger_lookup(const char *name)
+{
+	struct xt_led_info_internal *ledinternal;
+
+	list_for_each_entry(ledinternal, &xt_led_triggers, list) {
+		if (!strcmp(name, ledinternal->netfilter_led_trigger.name)) {
+			return ledinternal;
+		}
+	}
+	return NULL;
+}
+
 static int led_tg_check(const struct xt_tgchk_param *par)
 {
 	struct xt_led_info *ledinfo = par->targinfo;
@@ -91,11 +108,25 @@ static int led_tg_check(const struct xt_tgchk_param *par)
 		return -EINVAL;
 	}
 
+	mutex_lock(&xt_led_mutex);
+
+	ledinternal = led_trigger_lookup(ledinfo->id);
+	if (ledinternal) {
+		ledinternal->refcnt++;
+		goto out;
+	}
+
+	err = -ENOMEM;
 	ledinternal = kzalloc(sizeof(struct xt_led_info_internal), GFP_KERNEL);
 	if (!ledinternal)
-		return -ENOMEM;
+		goto exit_mutex_only;
+
+	ledinternal->trigger_id = kstrdup(ledinfo->id, GFP_KERNEL);
+	if (!ledinternal->trigger_id)
+		goto exit_internal_alloc;
 
-	ledinternal->netfilter_led_trigger.name = ledinfo->id;
+	ledinternal->refcnt = 1;
+	ledinternal->netfilter_led_trigger.name = ledinternal->trigger_id;
 
 	err = led_trigger_register(&ledinternal->netfilter_led_trigger);
 	if (err) {
@@ -108,13 +139,26 @@ static int led_tg_check(const struct xt_tgchk_param *par)
 	/* See if we need to set up a timer */
 	if (ledinfo->delay > 0)
 		setup_timer(&ledinternal->timer, led_timeout_callback,
-			    (unsigned long)ledinfo);
+			    (unsigned long)ledinternal);
+
+	list_add_tail(&ledinternal->list, &xt_led_triggers);
+
+out:
+	mutex_unlock(&xt_led_mutex);
 
 	ledinfo->internal_data = ledinternal;
+
 	return 0;
 
 exit_alloc:
+	kfree(ledinternal->trigger_id);
+
+exit_internal_alloc:
 	kfree(ledinternal);
+
+exit_mutex_only:
+	mutex_unlock(&xt_led_mutex);
+
 	return err;
 }
 
@@ -123,10 +167,23 @@ static void led_tg_destroy(const struct xt_tgdtor_param *par)
 	const struct xt_led_info *ledinfo = par->targinfo;
 	struct xt_led_info_internal *ledinternal = ledinfo->internal_data;
 
+	mutex_lock(&xt_led_mutex);
+
+	if (--ledinternal->refcnt) {
+		mutex_unlock(&xt_led_mutex);
+		return;
+	}
+
+	list_del(&ledinternal->list);
+
 	if (ledinfo->delay > 0)
 		del_timer_sync(&ledinternal->timer);
 
 	led_trigger_unregister(&ledinternal->netfilter_led_trigger);
+
+	mutex_unlock(&xt_led_mutex);
+
+	kfree(ledinternal->trigger_id);
 	kfree(ledinternal);
 }
 
-- 
cgit v1.2.2


From 39184b151cbe5ce9f1487190ac4244f69bf6a04b Mon Sep 17 00:00:00 2001
From: Zhu Yi <yi.zhu@intel.com>
Date: Thu, 8 Apr 2010 15:35:10 +0800
Subject: mac80211: delay skb linearising in rx decryption

We delay the skb linearising in ieee80211_rx_h_decrypt so that
frames do not require software decryption are not linearized. We
are safe to do this because ieee80211_get_mmie_keyidx() only
requires to touch nonlinear data for management frames, which are
already linearized before getting here.

Cc: Johannes Berg <johannes.berg@intel.com>
Signed-off-by: Zhu Yi <yi.zhu@intel.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/rx.c | 16 +++++++++-------
 1 file changed, 9 insertions(+), 7 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c
index d08ede44ac7e..8ee7db193269 100644
--- a/net/mac80211/rx.c
+++ b/net/mac80211/rx.c
@@ -820,7 +820,7 @@ ieee80211_rx_h_decrypt(struct ieee80211_rx_data *rx)
 {
 	struct sk_buff *skb = rx->skb;
 	struct ieee80211_rx_status *status = IEEE80211_SKB_RXCB(skb);
-	struct ieee80211_hdr *hdr;
+	struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)skb->data;
 	int keyidx;
 	int hdrlen;
 	ieee80211_rx_result result = RX_DROP_UNUSABLE;
@@ -861,11 +861,6 @@ ieee80211_rx_h_decrypt(struct ieee80211_rx_data *rx)
 	if (!(rx->flags & IEEE80211_RX_RA_MATCH))
 		return RX_CONTINUE;
 
-	if (skb_linearize(rx->skb))
-		return RX_DROP_UNUSABLE;
-
-	hdr = (struct ieee80211_hdr *)skb->data;
-
 	/* start without a key */
 	rx->key = NULL;
 
@@ -906,6 +901,7 @@ ieee80211_rx_h_decrypt(struct ieee80211_rx_data *rx)
 			rx->key = key;
 		return RX_CONTINUE;
 	} else {
+		u8 keyid;
 		/*
 		 * The device doesn't give us the IV so we won't be
 		 * able to look up the key. That's ok though, we
@@ -928,7 +924,8 @@ ieee80211_rx_h_decrypt(struct ieee80211_rx_data *rx)
 		 * no need to call ieee80211_wep_get_keyidx,
 		 * it verifies a bunch of things we've done already
 		 */
-		keyidx = rx->skb->data[hdrlen + 3] >> 6;
+		skb_copy_bits(rx->skb, hdrlen + 3, &keyid, 1);
+		keyidx = keyid >> 6;
 
 		rx->key = rcu_dereference(rx->sdata->keys[keyidx]);
 
@@ -949,6 +946,11 @@ ieee80211_rx_h_decrypt(struct ieee80211_rx_data *rx)
 		return RX_DROP_MONITOR;
 	}
 
+	if (skb_linearize(rx->skb))
+		return RX_DROP_UNUSABLE;
+
+	hdr = (struct ieee80211_hdr *)rx->skb->data;
+
 	/* Check for weak IVs if possible */
 	if (rx->sta && rx->key->conf.alg == ALG_WEP &&
 	    ieee80211_is_data(hdr->frame_control) &&
-- 
cgit v1.2.2


From c15cf5fcf9ea0a7749536c201965370d99c86c7f Mon Sep 17 00:00:00 2001
From: "Luis R. Rodriguez" <lrodriguez@atheros.com>
Date: Thu, 8 Apr 2010 16:08:46 -0400
Subject: mac80211: fix typo for LDPC capability

Signed-off-by: Luis R. Rodriguez <lrodriguez@atheros.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/debugfs_sta.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/mac80211/debugfs_sta.c b/net/mac80211/debugfs_sta.c
index 740ff6c5b92c..6bc9b07c3eda 100644
--- a/net/mac80211/debugfs_sta.c
+++ b/net/mac80211/debugfs_sta.c
@@ -176,7 +176,7 @@ static ssize_t sta_ht_capa_read(struct file *file, char __user *userbuf,
 	if (htc->ht_supported) {
 		p += scnprintf(p, sizeof(buf)+buf-p, "cap: %#.4x\n", htc->cap);
 
-		PRINT_HT_CAP((htc->cap & BIT(0)), "RX LDCP");
+		PRINT_HT_CAP((htc->cap & BIT(0)), "RX LDPC");
 		PRINT_HT_CAP((htc->cap & BIT(1)), "HT20/HT40");
 		PRINT_HT_CAP(!(htc->cap & BIT(1)), "HT20");
 
-- 
cgit v1.2.2


From 68dd5b7a45d1935fcd32b786e8d3d3f7bb4bbfe7 Mon Sep 17 00:00:00 2001
From: Teemu Paasikivi <ext-teemu.3.paasikivi@nokia.com>
Date: Fri, 9 Apr 2010 13:07:55 +0300
Subject: mac80211: check whether scan is in progress before queueing scan_work

As scan_work is queued from work_work it needs to be checked if scan
has been started during execution of work_work. Otherwise, when hw
scan is used, the stack gets error about hw being busy with ongoing
scan. This causes the stack to abort scan without notifying the driver
about it. This leads to a situation where the hw is scanning and the stack
thinks it's not. Then when the scan finishes, the stack will complain by
warnings.

Signed-off-by: Teemu Paasikivi <ext-teemu.3.paasikivi@nokia.com>
Reviewed-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/work.c | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/mac80211/work.c b/net/mac80211/work.c
index 1e1ea3007b06..7bd8670379de 100644
--- a/net/mac80211/work.c
+++ b/net/mac80211/work.c
@@ -919,11 +919,16 @@ static void ieee80211_work_work(struct work_struct *work)
 		run_again(local, jiffies + HZ/2);
 	}
 
-	if (list_empty(&local->work_list) && local->scan_req)
+	mutex_lock(&local->scan_mtx);
+
+	if (list_empty(&local->work_list) && local->scan_req &&
+	    !local->scanning)
 		ieee80211_queue_delayed_work(&local->hw,
 					     &local->scan_work,
 					     round_jiffies_relative(0));
 
+	mutex_unlock(&local->scan_mtx);
+
 	mutex_unlock(&local->work_mtx);
 
 	ieee80211_recalc_idle(local);
-- 
cgit v1.2.2


From ae4e8d63b5619d4d95f1d2bfa2b836caa6e62d06 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Sun, 11 Apr 2010 02:40:49 -0700
Subject: Revert "tcp: Set CHECKSUM_UNNECESSARY in tcp_init_nondata_skb"

This reverts commit 2626419ad5be1a054d350786b684b41d23de1538.

It causes regressions for people with IGB cards.  Connection
requests don't complete etc.  The true cause of the issue is
still not known, but we should sort this out in net-next-2.6
not net-2.6

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp_output.c | 1 -
 1 file changed, 1 deletion(-)

(limited to 'net')

diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 00afbb0c7e5f..f181b78f2385 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -349,7 +349,6 @@ static inline void TCP_ECN_send(struct sock *sk, struct sk_buff *skb,
  */
 static void tcp_init_nondata_skb(struct sk_buff *skb, u32 seq, u8 flags)
 {
-	skb->ip_summed = CHECKSUM_PARTIAL;
 	skb->csum = 0;
 
 	TCP_SKB_CB(skb)->flags = flags;
-- 
cgit v1.2.2


From 419f9f896074ce8b21e88066e6f3515f18e5641c Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Sun, 11 Apr 2010 02:15:53 +0000
Subject: tcp: Handle CHECKSUM_PARTIAL for SYNACK packets for IPv4

tcp: Handle CHECKSUM_PARTIAL for SYNACK packets for IPv4

This patch moves the common code between tcp_v4_send_check and
tcp_v4_gso_send_check into a new function __tcp_v4_send_check.

It then uses the new function in tcp_v4_send_synack so that it
handles CHECKSUM_PARTIAL properly.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Tested-by: Yinghai <yinghai.lu@oracle.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp_ipv4.c | 31 ++++++++++++++-----------------
 1 file changed, 14 insertions(+), 17 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 3c23e70885f4..aebfd28c5089 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -519,26 +519,31 @@ out:
 	sock_put(sk);
 }
 
-/* This routine computes an IPv4 TCP checksum. */
-void tcp_v4_send_check(struct sock *sk, int len, struct sk_buff *skb)
+static void __tcp_v4_send_check(struct sk_buff *skb,
+				__be32 saddr, __be32 daddr)
 {
-	struct inet_sock *inet = inet_sk(sk);
 	struct tcphdr *th = tcp_hdr(skb);
 
 	if (skb->ip_summed == CHECKSUM_PARTIAL) {
-		th->check = ~tcp_v4_check(len, inet->inet_saddr,
-					  inet->inet_daddr, 0);
+		th->check = ~tcp_v4_check(skb->len, saddr, daddr, 0);
 		skb->csum_start = skb_transport_header(skb) - skb->head;
 		skb->csum_offset = offsetof(struct tcphdr, check);
 	} else {
-		th->check = tcp_v4_check(len, inet->inet_saddr,
-					 inet->inet_daddr,
+		th->check = tcp_v4_check(skb->len, saddr, daddr,
 					 csum_partial(th,
 						      th->doff << 2,
 						      skb->csum));
 	}
 }
 
+/* This routine computes an IPv4 TCP checksum. */
+void tcp_v4_send_check(struct sock *sk, int len, struct sk_buff *skb)
+{
+	struct inet_sock *inet = inet_sk(sk);
+
+	__tcp_v4_send_check(skb, inet->inet_saddr, inet->inet_daddr);
+}
+
 int tcp_v4_gso_send_check(struct sk_buff *skb)
 {
 	const struct iphdr *iph;
@@ -551,10 +556,8 @@ int tcp_v4_gso_send_check(struct sk_buff *skb)
 	th = tcp_hdr(skb);
 
 	th->check = 0;
-	th->check = ~tcp_v4_check(skb->len, iph->saddr, iph->daddr, 0);
-	skb->csum_start = skb_transport_header(skb) - skb->head;
-	skb->csum_offset = offsetof(struct tcphdr, check);
 	skb->ip_summed = CHECKSUM_PARTIAL;
+	__tcp_v4_send_check(skb, iph->saddr, iph->daddr);
 	return 0;
 }
 
@@ -763,13 +766,7 @@ static int tcp_v4_send_synack(struct sock *sk, struct dst_entry *dst,
 	skb = tcp_make_synack(sk, dst, req, rvp);
 
 	if (skb) {
-		struct tcphdr *th = tcp_hdr(skb);
-
-		th->check = tcp_v4_check(skb->len,
-					 ireq->loc_addr,
-					 ireq->rmt_addr,
-					 csum_partial(th, skb->len,
-						      skb->csum));
+		__tcp_v4_send_check(skb, ireq->loc_addr, ireq->rmt_addr);
 
 		err = ip_build_and_send_pkt(skb, sk, ireq->loc_addr,
 					    ireq->rmt_addr,
-- 
cgit v1.2.2


From 8ad50d96db58c58ba67ec1c6f9d3dae0db52338a Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Sun, 11 Apr 2010 02:15:54 +0000
Subject: tcp: Handle CHECKSUM_PARTIAL for SYNACK packets for IPv6

tcp: Handle CHECKSUM_PARTIAL for SYNACK packets for IPv6

This patch moves the common code between tcp_v6_send_check and
tcp_v6_gso_send_check into a new function __tcp_v6_send_check.

It then uses the new function in tcp_v6_send_synack as well as
tcp_v6_send_response so that they handle CHECKSUM_PARTIAL properly.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Tested-by: Yinghai <yinghai.lu@oracle.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/tcp_ipv6.c | 37 +++++++++++++++++++------------------
 1 file changed, 19 insertions(+), 18 deletions(-)

(limited to 'net')

diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index c92ebe8f80d5..f84c506c588a 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -75,6 +75,9 @@ static void	tcp_v6_reqsk_send_ack(struct sock *sk, struct sk_buff *skb,
 				      struct request_sock *req);
 
 static int	tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb);
+static void	__tcp_v6_send_check(struct sk_buff *skb,
+				    struct in6_addr *saddr,
+				    struct in6_addr *daddr);
 
 static const struct inet_connection_sock_af_ops ipv6_mapped;
 static const struct inet_connection_sock_af_ops ipv6_specific;
@@ -503,11 +506,7 @@ static int tcp_v6_send_synack(struct sock *sk, struct request_sock *req,
 
 	skb = tcp_make_synack(sk, dst, req, rvp);
 	if (skb) {
-		struct tcphdr *th = tcp_hdr(skb);
-
-		th->check = tcp_v6_check(skb->len,
-					 &treq->loc_addr, &treq->rmt_addr,
-					 csum_partial(th, skb->len, skb->csum));
+		__tcp_v6_send_check(skb, &treq->loc_addr, &treq->rmt_addr);
 
 		ipv6_addr_copy(&fl.fl6_dst, &treq->rmt_addr);
 		err = ip6_xmit(sk, skb, &fl, opt, 0);
@@ -918,22 +917,29 @@ static struct timewait_sock_ops tcp6_timewait_sock_ops = {
 	.twsk_destructor= tcp_twsk_destructor,
 };
 
-static void tcp_v6_send_check(struct sock *sk, int len, struct sk_buff *skb)
+static void __tcp_v6_send_check(struct sk_buff *skb,
+				struct in6_addr *saddr, struct in6_addr *daddr)
 {
-	struct ipv6_pinfo *np = inet6_sk(sk);
 	struct tcphdr *th = tcp_hdr(skb);
 
 	if (skb->ip_summed == CHECKSUM_PARTIAL) {
-		th->check = ~csum_ipv6_magic(&np->saddr, &np->daddr, len, IPPROTO_TCP,  0);
+		th->check = ~tcp_v6_check(skb->len, saddr, daddr, 0);
 		skb->csum_start = skb_transport_header(skb) - skb->head;
 		skb->csum_offset = offsetof(struct tcphdr, check);
 	} else {
-		th->check = csum_ipv6_magic(&np->saddr, &np->daddr, len, IPPROTO_TCP,
-					    csum_partial(th, th->doff<<2,
-							 skb->csum));
+		th->check = tcp_v6_check(skb->len, saddr, daddr,
+					 csum_partial(th, th->doff << 2,
+						      skb->csum));
 	}
 }
 
+static void tcp_v6_send_check(struct sock *sk, int len, struct sk_buff *skb)
+{
+	struct ipv6_pinfo *np = inet6_sk(sk);
+
+	__tcp_v6_send_check(skb, &np->saddr, &np->daddr);
+}
+
 static int tcp_v6_gso_send_check(struct sk_buff *skb)
 {
 	struct ipv6hdr *ipv6h;
@@ -946,11 +952,8 @@ static int tcp_v6_gso_send_check(struct sk_buff *skb)
 	th = tcp_hdr(skb);
 
 	th->check = 0;
-	th->check = ~csum_ipv6_magic(&ipv6h->saddr, &ipv6h->daddr, skb->len,
-				     IPPROTO_TCP, 0);
-	skb->csum_start = skb_transport_header(skb) - skb->head;
-	skb->csum_offset = offsetof(struct tcphdr, check);
 	skb->ip_summed = CHECKSUM_PARTIAL;
+	__tcp_v6_send_check(skb, &ipv6h->saddr, &ipv6h->daddr);
 	return 0;
 }
 
@@ -1053,9 +1056,7 @@ static void tcp_v6_send_response(struct sk_buff *skb, u32 seq, u32 ack, u32 win,
 	ipv6_addr_copy(&fl.fl6_dst, &ipv6_hdr(skb)->saddr);
 	ipv6_addr_copy(&fl.fl6_src, &ipv6_hdr(skb)->daddr);
 
-	t1->check = csum_ipv6_magic(&fl.fl6_src, &fl.fl6_dst,
-				    tot_len, IPPROTO_TCP,
-				    buff->csum);
+	__tcp_v6_send_check(buff, &fl.fl6_src, &fl.fl6_dst);
 
 	fl.proto = IPPROTO_TCP;
 	fl.oif = inet6_iif(skb);
-- 
cgit v1.2.2


From bb29624614c2afe2873ee8ee97cf09df42701694 Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Sun, 11 Apr 2010 02:15:55 +0000
Subject: inet: Remove unused send_check length argument

inet: Remove unused send_check length argument

This patch removes the unused length argument from the send_check
function in struct inet_connection_sock_af_ops.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Tested-by: Yinghai <yinghai.lu@oracle.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/dccp/dccp.h       | 2 +-
 net/dccp/ipv4.c       | 2 +-
 net/dccp/ipv6.c       | 3 +--
 net/dccp/output.c     | 2 +-
 net/ipv4/tcp_ipv4.c   | 2 +-
 net/ipv4/tcp_output.c | 2 +-
 net/ipv6/tcp_ipv6.c   | 2 +-
 7 files changed, 7 insertions(+), 8 deletions(-)

(limited to 'net')

diff --git a/net/dccp/dccp.h b/net/dccp/dccp.h
index 53f8e12d0c10..a10a61a1ded2 100644
--- a/net/dccp/dccp.h
+++ b/net/dccp/dccp.h
@@ -223,7 +223,7 @@ static inline void dccp_csum_outgoing(struct sk_buff *skb)
 	skb->csum = skb_checksum(skb, 0, (cov > skb->len)? skb->len : cov, 0);
 }
 
-extern void dccp_v4_send_check(struct sock *sk, int len, struct sk_buff *skb);
+extern void dccp_v4_send_check(struct sock *sk, struct sk_buff *skb);
 
 extern int  dccp_retransmit_skb(struct sock *sk);
 
diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c
index 52ffa1cde15a..d9b11ef8694c 100644
--- a/net/dccp/ipv4.c
+++ b/net/dccp/ipv4.c
@@ -349,7 +349,7 @@ static inline __sum16 dccp_v4_csum_finish(struct sk_buff *skb,
 	return csum_tcpudp_magic(src, dst, skb->len, IPPROTO_DCCP, skb->csum);
 }
 
-void dccp_v4_send_check(struct sock *sk, int unused, struct sk_buff *skb)
+void dccp_v4_send_check(struct sock *sk, struct sk_buff *skb)
 {
 	const struct inet_sock *inet = inet_sk(sk);
 	struct dccp_hdr *dh = dccp_hdr(skb);
diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c
index 3b11e41a2929..ab1ab95946df 100644
--- a/net/dccp/ipv6.c
+++ b/net/dccp/ipv6.c
@@ -60,8 +60,7 @@ static inline __sum16 dccp_v6_csum_finish(struct sk_buff *skb,
 	return csum_ipv6_magic(saddr, daddr, skb->len, IPPROTO_DCCP, skb->csum);
 }
 
-static inline void dccp_v6_send_check(struct sock *sk, int unused_value,
-				      struct sk_buff *skb)
+static inline void dccp_v6_send_check(struct sock *sk, struct sk_buff *skb)
 {
 	struct ipv6_pinfo *np = inet6_sk(sk);
 	struct dccp_hdr *dh = dccp_hdr(skb);
diff --git a/net/dccp/output.c b/net/dccp/output.c
index fc3f436440b4..b8d98e3c052a 100644
--- a/net/dccp/output.c
+++ b/net/dccp/output.c
@@ -129,7 +129,7 @@ static int dccp_transmit_skb(struct sock *sk, struct sk_buff *skb)
 			break;
 		}
 
-		icsk->icsk_af_ops->send_check(sk, 0, skb);
+		icsk->icsk_af_ops->send_check(sk, skb);
 
 		if (set_ack)
 			dccp_event_ack_sent(sk);
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index aebfd28c5089..a24995cdc4b6 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -537,7 +537,7 @@ static void __tcp_v4_send_check(struct sk_buff *skb,
 }
 
 /* This routine computes an IPv4 TCP checksum. */
-void tcp_v4_send_check(struct sock *sk, int len, struct sk_buff *skb)
+void tcp_v4_send_check(struct sock *sk, struct sk_buff *skb)
 {
 	struct inet_sock *inet = inet_sk(sk);
 
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 0dda86e72ad8..0ae7ce7a71a6 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -878,7 +878,7 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,
 	}
 #endif
 
-	icsk->icsk_af_ops->send_check(sk, skb->len, skb);
+	icsk->icsk_af_ops->send_check(sk, skb);
 
 	if (likely(tcb->flags & TCPCB_FLAG_ACK))
 		tcp_event_ack_sent(sk, tcp_skb_pcount(skb));
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index f84c506c588a..b429dfdd69dc 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -933,7 +933,7 @@ static void __tcp_v6_send_check(struct sk_buff *skb,
 	}
 }
 
-static void tcp_v6_send_check(struct sock *sk, int len, struct sk_buff *skb)
+static void tcp_v6_send_check(struct sock *sk, struct sk_buff *skb)
 {
 	struct ipv6_pinfo *np = inet6_sk(sk);
 
-- 
cgit v1.2.2


From 2e8e18ef52e7dd1af0a3bd1f7d990a1d0b249586 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Thu, 8 Apr 2010 11:32:30 -0700
Subject: tcp: Set CHECKSUM_UNNECESSARY in tcp_init_nondata_skb

Back in commit 04a0551c87363f100b04d28d7a15a632b70e18e7
("loopback: Drop obsolete ip_summed setting") we stopped
setting CHECKSUM_UNNECESSARY in the loopback xmit.

This is because such a setting was a lie since it implies that the
checksum field of the packet is properly filled in.

Instead what happens normally is that CHECKSUM_PARTIAL is set and
skb->csum is calculated as needed.

But this was only happening for TCP data packets (via the
skb->ip_summed assignment done in tcp_sendmsg()).  It doesn't
happen for non-data packets like ACKs etc.

Fix this by setting skb->ip_summed in the common non-data packet
constructor.  It already is setting skb->csum to zero.

But this reminds us that we still have things like ip_output.c's
ip_dev_loopback_xmit() which sets skb->ip_summed to the value
CHECKSUM_UNNECESSARY, which Herbert's patch teaches us is not
valid.  So we'll have to address that at some point too.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp_output.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'net')

diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 0ae7ce7a71a6..e46849989a53 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -350,6 +350,7 @@ static inline void TCP_ECN_send(struct sock *sk, struct sk_buff *skb,
  */
 static void tcp_init_nondata_skb(struct sk_buff *skb, u32 seq, u8 flags)
 {
+	skb->ip_summed = CHECKSUM_PARTIAL;
 	skb->csum = 0;
 
 	TCP_SKB_CB(skb)->flags = flags;
-- 
cgit v1.2.2


From ed85b565b825566da34e55eee9ad150ed93fdda0 Mon Sep 17 00:00:00 2001
From: Richard Cochran <richardcochran@gmail.com>
Date: Wed, 7 Apr 2010 22:41:28 +0000
Subject: packet: support for TX time stamps on RAW sockets

Enable the SO_TIMESTAMPING socket infrastructure for raw packet sockets.
We introduce PACKET_TX_TIMESTAMP for the control message cmsg_type.

Similar support for UDP and CAN sockets was added in commit
51f31cabe3ce5345b51e4a4f82138b38c4d5dc91

Signed-off-by: Richard Cochran <richard.cochran@omicron.at>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/packet/af_packet.c | 61 +++++++++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 60 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index d7d0310dca9d..f162d59d8161 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -82,6 +82,7 @@
 #include <linux/mutex.h>
 #include <linux/if_vlan.h>
 #include <linux/virtio_net.h>
+#include <linux/errqueue.h>
 
 #ifdef CONFIG_INET
 #include <net/inet_common.h>
@@ -315,6 +316,8 @@ static inline struct packet_sock *pkt_sk(struct sock *sk)
 
 static void packet_sock_destruct(struct sock *sk)
 {
+	skb_queue_purge(&sk->sk_error_queue);
+
 	WARN_ON(atomic_read(&sk->sk_rmem_alloc));
 	WARN_ON(atomic_read(&sk->sk_wmem_alloc));
 
@@ -483,6 +486,9 @@ retry:
 	skb->dev = dev;
 	skb->priority = sk->sk_priority;
 	skb->mark = sk->sk_mark;
+	err = sock_tx_timestamp(msg, sk, skb_tx(skb));
+	if (err < 0)
+		goto out_unlock;
 
 	dev_queue_xmit(skb);
 	rcu_read_unlock();
@@ -1188,6 +1194,9 @@ static int packet_snd(struct socket *sock,
 	err = skb_copy_datagram_from_iovec(skb, offset, msg->msg_iov, 0, len);
 	if (err)
 		goto out_free;
+	err = sock_tx_timestamp(msg, sk, skb_tx(skb));
+	if (err < 0)
+		goto out_free;
 
 	skb->protocol = proto;
 	skb->dev = dev;
@@ -1487,6 +1496,51 @@ out:
 	return err;
 }
 
+static int packet_recv_error(struct sock *sk, struct msghdr *msg, int len)
+{
+	struct sock_exterr_skb *serr;
+	struct sk_buff *skb, *skb2;
+	int copied, err;
+
+	err = -EAGAIN;
+	skb = skb_dequeue(&sk->sk_error_queue);
+	if (skb == NULL)
+		goto out;
+
+	copied = skb->len;
+	if (copied > len) {
+		msg->msg_flags |= MSG_TRUNC;
+		copied = len;
+	}
+	err = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, copied);
+	if (err)
+		goto out_free_skb;
+
+	sock_recv_timestamp(msg, sk, skb);
+
+	serr = SKB_EXT_ERR(skb);
+	put_cmsg(msg, SOL_PACKET, PACKET_TX_TIMESTAMP,
+		 sizeof(serr->ee), &serr->ee);
+
+	msg->msg_flags |= MSG_ERRQUEUE;
+	err = copied;
+
+	/* Reset and regenerate socket error */
+	spin_lock_bh(&sk->sk_error_queue.lock);
+	sk->sk_err = 0;
+	if ((skb2 = skb_peek(&sk->sk_error_queue)) != NULL) {
+		sk->sk_err = SKB_EXT_ERR(skb2)->ee.ee_errno;
+		spin_unlock_bh(&sk->sk_error_queue.lock);
+		sk->sk_error_report(sk);
+	} else
+		spin_unlock_bh(&sk->sk_error_queue.lock);
+
+out_free_skb:
+	kfree_skb(skb);
+out:
+	return err;
+}
+
 /*
  *	Pull a packet from our receive queue and hand it to the user.
  *	If necessary we block.
@@ -1502,7 +1556,7 @@ static int packet_recvmsg(struct kiocb *iocb, struct socket *sock,
 	int vnet_hdr_len = 0;
 
 	err = -EINVAL;
-	if (flags & ~(MSG_PEEK|MSG_DONTWAIT|MSG_TRUNC|MSG_CMSG_COMPAT))
+	if (flags & ~(MSG_PEEK|MSG_DONTWAIT|MSG_TRUNC|MSG_CMSG_COMPAT|MSG_ERRQUEUE))
 		goto out;
 
 #if 0
@@ -1511,6 +1565,11 @@ static int packet_recvmsg(struct kiocb *iocb, struct socket *sock,
 		return -ENODEV;
 #endif
 
+	if (flags & MSG_ERRQUEUE) {
+		err = packet_recv_error(sk, msg, len);
+		goto out;
+	}
+
 	/*
 	 *	Call the generic datagram receiver. This handles all sorts
 	 *	of horrible races and re-entrancy so we can forget about it
-- 
cgit v1.2.2


From 7a161ea92471087a1579239d7a58dd06eaa5601c Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Thu, 8 Apr 2010 21:26:13 +0000
Subject: net: Dont use netdev_warn()

Dont use netdev_warn() in dev_cap_txqueue() and get_rps_cpu() so that we
can catch following warnings without crash.

bond0.2240 received packet on queue 6, but number of RX queues is 1
bond0.2240 received packet on queue 11, but number of RX queues is 1

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/dev.c | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

(limited to 'net')

diff --git a/net/core/dev.c b/net/core/dev.c
index a10a21619ae3..0eb79e35671f 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -1987,9 +1987,9 @@ static inline u16 dev_cap_txqueue(struct net_device *dev, u16 queue_index)
 {
 	if (unlikely(queue_index >= dev->real_num_tx_queues)) {
 		if (net_ratelimit()) {
-			netdev_warn(dev, "selects TX queue %d, but "
-			     "real number of TX queues is %d\n",
-			     queue_index, dev->real_num_tx_queues);
+			pr_warning("%s selects TX queue %d, but "
+				"real number of TX queues is %d\n",
+				dev->name, queue_index, dev->real_num_tx_queues);
 		}
 		return 0;
 	}
@@ -2223,9 +2223,9 @@ static int get_rps_cpu(struct net_device *dev, struct sk_buff *skb)
 		u16 index = skb_get_rx_queue(skb);
 		if (unlikely(index >= dev->num_rx_queues)) {
 			if (net_ratelimit()) {
-				netdev_warn(dev, "received packet on queue "
-				    "%u, but number of RX queues is %u\n",
-				     index, dev->num_rx_queues);
+				pr_warning("%s received packet on queue "
+					"%u, but number of RX queues is %u\n",
+					dev->name, index, dev->num_rx_queues);
 			}
 			goto done;
 		}
-- 
cgit v1.2.2


From b6c6712a42ca3f9fa7f4a3d7c40e3a9dd1fd9e03 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Thu, 8 Apr 2010 23:03:29 +0000
Subject: net: sk_dst_cache RCUification

With latest CONFIG_PROVE_RCU stuff, I felt more comfortable to make this
work.

sk->sk_dst_cache is currently protected by a rwlock (sk_dst_lock)

This rwlock is readlocked for a very small amount of time, and dst
entries are already freed after RCU grace period. This calls for RCU
again :)

This patch converts sk_dst_lock to a spinlock, and use RCU for readers.

__sk_dst_get() is supposed to be called with rcu_read_lock() or if
socket locked by user, so use appropriate rcu_dereference_check()
condition (rcu_read_lock_held() || sock_owned_by_user(sk))

This patch avoids two atomic ops per tx packet on UDP connected sockets,
for example, and permits sk_dst_lock to be much less dirtied.

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/dev.c           |  2 +-
 net/core/sock.c          |  8 ++++----
 net/dccp/timer.c         |  4 ++--
 net/decnet/af_decnet.c   |  6 +++---
 net/ipv4/af_inet.c       |  2 +-
 net/ipv4/tcp_input.c     |  4 ++--
 net/ipv4/tcp_timer.c     |  4 ++--
 net/ipv6/ipv6_sockglue.c | 25 +++++++++++++------------
 8 files changed, 28 insertions(+), 27 deletions(-)

(limited to 'net')

diff --git a/net/core/dev.c b/net/core/dev.c
index 0eb79e35671f..ca4cdef74a1b 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -2015,7 +2015,7 @@ static struct netdev_queue *dev_pick_tx(struct net_device *dev,
 			if (dev->real_num_tx_queues > 1)
 				queue_index = skb_tx_hash(dev, skb);
 
-			if (sk && sk->sk_dst_cache)
+			if (sk && rcu_dereference_check(sk->sk_dst_cache, 1))
 				sk_tx_queue_set(sk, queue_index);
 		}
 	}
diff --git a/net/core/sock.c b/net/core/sock.c
index c5812bbc2cc9..7effa1e689df 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -364,11 +364,11 @@ EXPORT_SYMBOL(sk_reset_txq);
 
 struct dst_entry *__sk_dst_check(struct sock *sk, u32 cookie)
 {
-	struct dst_entry *dst = sk->sk_dst_cache;
+	struct dst_entry *dst = __sk_dst_get(sk);
 
 	if (dst && dst->obsolete && dst->ops->check(dst, cookie) == NULL) {
 		sk_tx_queue_clear(sk);
-		sk->sk_dst_cache = NULL;
+		rcu_assign_pointer(sk->sk_dst_cache, NULL);
 		dst_release(dst);
 		return NULL;
 	}
@@ -1157,7 +1157,7 @@ struct sock *sk_clone(const struct sock *sk, const gfp_t priority)
 		skb_queue_head_init(&newsk->sk_async_wait_queue);
 #endif
 
-		rwlock_init(&newsk->sk_dst_lock);
+		spin_lock_init(&newsk->sk_dst_lock);
 		rwlock_init(&newsk->sk_callback_lock);
 		lockdep_set_class_and_name(&newsk->sk_callback_lock,
 				af_callback_keys + newsk->sk_family,
@@ -1898,7 +1898,7 @@ void sock_init_data(struct socket *sock, struct sock *sk)
 	} else
 		sk->sk_sleep	=	NULL;
 
-	rwlock_init(&sk->sk_dst_lock);
+	spin_lock_init(&sk->sk_dst_lock);
 	rwlock_init(&sk->sk_callback_lock);
 	lockdep_set_class_and_name(&sk->sk_callback_lock,
 			af_callback_keys + sk->sk_family,
diff --git a/net/dccp/timer.c b/net/dccp/timer.c
index bbfeb5eae46a..1a9aa05d4dc4 100644
--- a/net/dccp/timer.c
+++ b/net/dccp/timer.c
@@ -38,7 +38,7 @@ static int dccp_write_timeout(struct sock *sk)
 
 	if (sk->sk_state == DCCP_REQUESTING || sk->sk_state == DCCP_PARTOPEN) {
 		if (icsk->icsk_retransmits != 0)
-			dst_negative_advice(&sk->sk_dst_cache, sk);
+			dst_negative_advice(sk);
 		retry_until = icsk->icsk_syn_retries ?
 			    : sysctl_dccp_request_retries;
 	} else {
@@ -63,7 +63,7 @@ static int dccp_write_timeout(struct sock *sk)
 			   Golden words :-).
 		   */
 
-			dst_negative_advice(&sk->sk_dst_cache, sk);
+			dst_negative_advice(sk);
 		}
 
 		retry_until = sysctl_dccp_retries2;
diff --git a/net/decnet/af_decnet.c b/net/decnet/af_decnet.c
index 2b494fac9468..55e3b6b0061a 100644
--- a/net/decnet/af_decnet.c
+++ b/net/decnet/af_decnet.c
@@ -446,7 +446,7 @@ static void dn_destruct(struct sock *sk)
 	skb_queue_purge(&scp->other_xmit_queue);
 	skb_queue_purge(&scp->other_receive_queue);
 
-	dst_release(xchg(&sk->sk_dst_cache, NULL));
+	dst_release(rcu_dereference_check(sk->sk_dst_cache, 1));
 }
 
 static int dn_memory_pressure;
@@ -1105,7 +1105,7 @@ static int dn_accept(struct socket *sock, struct socket *newsock, int flags)
 	release_sock(sk);
 
 	dst = skb_dst(skb);
-	dst_release(xchg(&newsk->sk_dst_cache, dst));
+	sk_dst_set(newsk, dst);
 	skb_dst_set(skb, NULL);
 
 	DN_SK(newsk)->state        = DN_CR;
@@ -1956,7 +1956,7 @@ static int dn_sendmsg(struct kiocb *iocb, struct socket *sock,
 	}
 
 	if ((flags & MSG_TRYHARD) && sk->sk_dst_cache)
-		dst_negative_advice(&sk->sk_dst_cache, sk);
+		dst_negative_advice(sk);
 
 	mss = scp->segsize_rem;
 	fctype = scp->services_rem & NSP_FC_MASK;
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index a0beb32beaa3..193dcd6ed64f 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -154,7 +154,7 @@ void inet_sock_destruct(struct sock *sk)
 	WARN_ON(sk->sk_forward_alloc);
 
 	kfree(inet->opt);
-	dst_release(sk->sk_dst_cache);
+	dst_release(rcu_dereference_check(sk->sk_dst_cache, 1));
 	sk_refcnt_debug_dec(sk);
 }
 EXPORT_SYMBOL(inet_sock_destruct);
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 4000b10610b7..ae3ec15fb630 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -3710,7 +3710,7 @@ static int tcp_ack(struct sock *sk, struct sk_buff *skb, int flag)
 	}
 
 	if ((flag & FLAG_FORWARD_PROGRESS) || !(flag & FLAG_NOT_DUP))
-		dst_confirm(sk->sk_dst_cache);
+		dst_confirm(__sk_dst_get(sk));
 
 	return 1;
 
@@ -5833,7 +5833,7 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
 			if (tp->snd_una == tp->write_seq) {
 				tcp_set_state(sk, TCP_FIN_WAIT2);
 				sk->sk_shutdown |= SEND_SHUTDOWN;
-				dst_confirm(sk->sk_dst_cache);
+				dst_confirm(__sk_dst_get(sk));
 
 				if (!sock_flag(sk, SOCK_DEAD))
 					/* Wake up lingering close() */
diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c
index 8a0ab2977f1f..c732be00606b 100644
--- a/net/ipv4/tcp_timer.c
+++ b/net/ipv4/tcp_timer.c
@@ -172,14 +172,14 @@ static int tcp_write_timeout(struct sock *sk)
 
 	if ((1 << sk->sk_state) & (TCPF_SYN_SENT | TCPF_SYN_RECV)) {
 		if (icsk->icsk_retransmits)
-			dst_negative_advice(&sk->sk_dst_cache, sk);
+			dst_negative_advice(sk);
 		retry_until = icsk->icsk_syn_retries ? : sysctl_tcp_syn_retries;
 	} else {
 		if (retransmits_timed_out(sk, sysctl_tcp_retries1)) {
 			/* Black hole detection */
 			tcp_mtu_probing(icsk, sk);
 
-			dst_negative_advice(&sk->sk_dst_cache, sk);
+			dst_negative_advice(sk);
 		}
 
 		retry_until = sysctl_tcp_retries2;
diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c
index 33f60fca7aa7..1160400e9dbd 100644
--- a/net/ipv6/ipv6_sockglue.c
+++ b/net/ipv6/ipv6_sockglue.c
@@ -114,9 +114,9 @@ struct ipv6_txoptions *ipv6_update_options(struct sock *sk,
 		}
 		opt = xchg(&inet6_sk(sk)->opt, opt);
 	} else {
-		write_lock(&sk->sk_dst_lock);
+		spin_lock(&sk->sk_dst_lock);
 		opt = xchg(&inet6_sk(sk)->opt, opt);
-		write_unlock(&sk->sk_dst_lock);
+		spin_unlock(&sk->sk_dst_lock);
 	}
 	sk_dst_reset(sk);
 
@@ -971,14 +971,13 @@ static int do_ipv6_getsockopt(struct sock *sk, int level, int optname,
 	case IPV6_MTU:
 	{
 		struct dst_entry *dst;
+
 		val = 0;
-		lock_sock(sk);
-		dst = sk_dst_get(sk);
-		if (dst) {
+		rcu_read_lock();
+		dst = __sk_dst_get(sk);
+		if (dst)
 			val = dst_mtu(dst);
-			dst_release(dst);
-		}
-		release_sock(sk);
+		rcu_read_unlock();
 		if (!val)
 			return -ENOTCONN;
 		break;
@@ -1066,12 +1065,14 @@ static int do_ipv6_getsockopt(struct sock *sk, int level, int optname,
 		else
 			val = np->mcast_hops;
 
-		dst = sk_dst_get(sk);
-		if (dst) {
-			if (val < 0)
+		if (val < 0) {
+			rcu_read_lock();
+			dst = __sk_dst_get(sk);
+			if (dst)
 				val = ip6_dst_hoplimit(dst);
-			dst_release(dst);
+			rcu_read_unlock();
 		}
+
 		if (val < 0)
 			val = sock_net(sk)->ipv6.devconf_all->hop_limit;
 		break;
-- 
cgit v1.2.2


From 22068311b62858ea7eb71653a07564fd73d7a9b0 Mon Sep 17 00:00:00 2001
From: Zhitong Wang <zhitong.wangzt@alibaba-inc.com>
Date: Tue, 13 Apr 2010 11:25:41 +0200
Subject: netfilter: fix some coding styles and remove moduleparam.h

Fix some coding styles and remove moduleparam.h

Signed-off-by: Zhitong Wang <zhitong.wangzt@alibaba-inc.com>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 net/ipv4/netfilter/nf_nat_standalone.c | 3 +--
 net/ipv4/netfilter/nf_nat_tftp.c       | 1 -
 net/netfilter/nf_conntrack_proto.c     | 2 --
 3 files changed, 1 insertion(+), 5 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/netfilter/nf_nat_standalone.c b/net/ipv4/netfilter/nf_nat_standalone.c
index 5678e9562c15..0b49248e34fa 100644
--- a/net/ipv4/netfilter/nf_nat_standalone.c
+++ b/net/ipv4/netfilter/nf_nat_standalone.c
@@ -137,9 +137,8 @@ nf_nat_fn(unsigned int hooknum,
 				ret = nf_nat_rule_find(skb, hooknum, in, out,
 						       ct);
 
-			if (ret != NF_ACCEPT) {
+			if (ret != NF_ACCEPT)
 				return ret;
-			}
 		} else
 			pr_debug("Already setup manip %s for ct %p\n",
 				 maniptype == IP_NAT_MANIP_SRC ? "SRC" : "DST",
diff --git a/net/ipv4/netfilter/nf_nat_tftp.c b/net/ipv4/netfilter/nf_nat_tftp.c
index b096e81500ae..7274a43c7a12 100644
--- a/net/ipv4/netfilter/nf_nat_tftp.c
+++ b/net/ipv4/netfilter/nf_nat_tftp.c
@@ -6,7 +6,6 @@
  */
 
 #include <linux/module.h>
-#include <linux/moduleparam.h>
 #include <linux/udp.h>
 
 #include <net/netfilter/nf_nat_helper.h>
diff --git a/net/netfilter/nf_conntrack_proto.c b/net/netfilter/nf_conntrack_proto.c
index 1a4568bf7ea5..f71cd5da751c 100644
--- a/net/netfilter/nf_conntrack_proto.c
+++ b/net/netfilter/nf_conntrack_proto.c
@@ -13,12 +13,10 @@
 #include <linux/netfilter.h>
 #include <linux/module.h>
 #include <linux/mutex.h>
-#include <linux/skbuff.h>
 #include <linux/vmalloc.h>
 #include <linux/stddef.h>
 #include <linux/err.h>
 #include <linux/percpu.h>
-#include <linux/moduleparam.h>
 #include <linux/notifier.h>
 #include <linux/kernel.h>
 #include <linux/netdevice.h>
-- 
cgit v1.2.2


From 93fa159abe50d3c55c7f83622d3f5c09b6e06f4b Mon Sep 17 00:00:00 2001
From: stephen hemminger <shemminger@vyatta.com>
Date: Mon, 12 Apr 2010 05:41:31 +0000
Subject: IPv6: keep route for tentative address

Recent changes preserve IPv6 address when link goes down (good).
But would cause address to point to dead dst entry (bad).
The simplest fix is to just not delete route if address is
being held for later use.

Signed-off-by: Stephen Hemminger <shemminger@vyatta.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/addrconf.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 1b00bfef268e..a9913d23f7c7 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -4047,7 +4047,8 @@ static void __ipv6_ifa_notify(int event, struct inet6_ifaddr *ifp)
 			addrconf_leave_anycast(ifp);
 		addrconf_leave_solict(ifp->idev, &ifp->addr);
 		dst_hold(&ifp->rt->u.dst);
-		if (ip6_del_rt(ifp->rt))
+
+		if (ifp->dead && ip6_del_rt(ifp->rt))
 			dst_free(&ifp->rt->u.dst);
 		break;
 	}
-- 
cgit v1.2.2


From 27bdb2abcc5edb3526e25407b74bf17d1872c329 Mon Sep 17 00:00:00 2001
From: stephen hemminger <shemminger@vyatta.com>
Date: Mon, 12 Apr 2010 05:41:32 +0000
Subject: IPv6: keep tentative addresses in hash table

When link goes down, want address to be preserved but in a tentative
state, therefore it has to stay in hash list.

Signed-off-by: Stephen Hemminger <shemminger@vyatta.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/addrconf.c | 15 ++++++++-------
 1 file changed, 8 insertions(+), 7 deletions(-)

(limited to 'net')

diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index a9913d23f7c7..9d78c1229497 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -2704,17 +2704,18 @@ static int addrconf_ifdown(struct net_device *dev, int how)
 			/* Flag it for later restoration when link comes up */
 			ifa->flags |= IFA_F_TENTATIVE;
 			in6_ifa_hold(ifa);
+			write_unlock_bh(&idev->lock);
 		} else {
 			list_del(&ifa->if_list);
 			ifa->dead = 1;
-		}
-		write_unlock_bh(&idev->lock);
+			write_unlock_bh(&idev->lock);
 
-		/* clear hash table */
-		spin_lock_bh(&addrconf_hash_lock);
-		hlist_del_init_rcu(&ifa->addr_lst);
-		__in6_ifa_put(ifa);
-		spin_unlock_bh(&addrconf_hash_lock);
+			/* clear hash table */
+			spin_lock_bh(&addrconf_hash_lock);
+			hlist_del_init_rcu(&ifa->addr_lst);
+			__in6_ifa_put(ifa);
+			spin_unlock_bh(&addrconf_hash_lock);
+		}
 
 		__ipv6_ifa_notify(RTM_DELADDR, ifa);
 		atomic_notifier_call_chain(&inet6addr_chain, NETDEV_DOWN, ifa);
-- 
cgit v1.2.2


From d1f84c63a465d6ba16955930519b7f68c550cae1 Mon Sep 17 00:00:00 2001
From: stephen hemminger <shemminger@vyatta.com>
Date: Mon, 12 Apr 2010 05:41:33 +0000
Subject: ipv6: additional ref count for hash list unnecessary

Since an address in hash list has to already have a ref count,
no additional ref count is needed.

Signed-off-by: Stephen Hemminger <shemminger@vyatta.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/addrconf.c | 3 ---
 1 file changed, 3 deletions(-)

(limited to 'net')

diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 9d78c1229497..a0175edb6589 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -676,7 +676,6 @@ ipv6_add_addr(struct inet6_dev *idev, const struct in6_addr *addr, int pfxlen,
 	hash = ipv6_addr_hash(addr);
 
 	hlist_add_head_rcu(&ifa->addr_lst, &inet6_addr_lst[hash]);
-	in6_ifa_hold(ifa);
 	spin_unlock(&addrconf_hash_lock);
 
 	write_lock(&idev->lock);
@@ -724,7 +723,6 @@ static void ipv6_del_addr(struct inet6_ifaddr *ifp)
 
 	spin_lock_bh(&addrconf_hash_lock);
 	hlist_del_init_rcu(&ifp->addr_lst);
-	__in6_ifa_put(ifp);
 	spin_unlock_bh(&addrconf_hash_lock);
 
 	write_lock_bh(&idev->lock);
@@ -2713,7 +2711,6 @@ static int addrconf_ifdown(struct net_device *dev, int how)
 			/* clear hash table */
 			spin_lock_bh(&addrconf_hash_lock);
 			hlist_del_init_rcu(&ifa->addr_lst);
-			__in6_ifa_put(ifa);
 			spin_unlock_bh(&addrconf_hash_lock);
 		}
 
-- 
cgit v1.2.2


From 8595805aafc8b077e01804c9a3668e9aa3510e89 Mon Sep 17 00:00:00 2001
From: stephen hemminger <shemminger@vyatta.com>
Date: Mon, 12 Apr 2010 05:41:34 +0000
Subject: IPv6: only notify protocols if address is compeletely gone

The notifier for address down should only be called if address is completely
gone, not just being marked as tentative on link transistion. The code
in net-next would case bonding/sctp/s390 to see address disappear on link
down, but they would never see it reappear on link up.

Signed-off-by: Stephen Hemminger <shemminger@vyatta.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/addrconf.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index a0175edb6589..7cba8845242f 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -2715,7 +2715,9 @@ static int addrconf_ifdown(struct net_device *dev, int how)
 		}
 
 		__ipv6_ifa_notify(RTM_DELADDR, ifa);
-		atomic_notifier_call_chain(&inet6addr_chain, NETDEV_DOWN, ifa);
+		if (ifa->dead)
+			atomic_notifier_call_chain(&inet6addr_chain,
+						   NETDEV_DOWN, ifa);
 		in6_ifa_put(ifa);
 
 		write_lock_bh(&idev->lock);
-- 
cgit v1.2.2


From 8237908e145b84d4b020790df0f9315d2f71e980 Mon Sep 17 00:00:00 2001
From: Bart De Schuymer <bdschuym@pandora.be>
Date: Tue, 13 Apr 2010 11:40:41 +0200
Subject: netfilter: bridge-netfilter: cleanup br_netfilter.c

bridge-netfilter: cleanup br_netfilter.c

- remove some of the graffiti at the head of br_netfilter.c
- remove __br_dnat_complain()
- remove KERN_INFO messages when CONFIG_NETFILTER_DEBUG is defined

Signed-off-by: Bart De Schuymer <bdschuym@pandora.be>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 net/bridge/br_netfilter.c | 58 ++---------------------------------------------
 1 file changed, 2 insertions(+), 56 deletions(-)

(limited to 'net')

diff --git a/net/bridge/br_netfilter.c b/net/bridge/br_netfilter.c
index bc6b57248494..dd6f538ba0b0 100644
--- a/net/bridge/br_netfilter.c
+++ b/net/bridge/br_netfilter.c
@@ -3,15 +3,8 @@
  *	Linux ethernet bridge
  *
  *	Authors:
- *	Lennert Buytenhek               <buytenh@gnu.org>
- *	Bart De Schuymer (maintainer)	<bdschuym@pandora.be>
- *
- *	Changes:
- *	Apr 29 2003: physdev module support (bdschuym)
- *	Jun 19 2003: let arptables see bridged ARP traffic (bdschuym)
- *	Oct 06 2003: filter encapsulated IP/ARP VLAN traffic on untagged bridge
- *	             (bdschuym)
- *	Sep 01 2004: add IPv6 filtering (bdschuym)
+ *	Lennert Buytenhek		<buytenh@gnu.org>
+ *	Bart De Schuymer		<bdschuym@pandora.be>
  *
  *	This program is free software; you can redistribute it and/or
  *	modify it under the terms of the GNU General Public License
@@ -252,17 +245,6 @@ static int br_nf_pre_routing_finish_ipv6(struct sk_buff *skb)
 	return 0;
 }
 
-static void __br_dnat_complain(void)
-{
-	static unsigned long last_complaint;
-
-	if (jiffies - last_complaint >= 5 * HZ) {
-		printk(KERN_WARNING "Performing cross-bridge DNAT requires IP "
-		       "forwarding to be enabled\n");
-		last_complaint = jiffies;
-	}
-}
-
 /* This requires some explaining. If DNAT has taken place,
  * we will need to fix up the destination Ethernet address,
  * and this is a tricky process.
@@ -378,11 +360,6 @@ static int br_nf_pre_routing_finish(struct sk_buff *skb)
 					skb_dst_set(skb, (struct dst_entry *)rt);
 					goto bridged_dnat;
 				}
-				/* we are sure that forwarding is disabled, so printing
-				 * this message is no problem. Note that the packet could
-				 * still have a martian destination address, in which case
-				 * the packet could be dropped even if forwarding were enabled */
-				__br_dnat_complain();
 				dst_release((struct dst_entry *)rt);
 			}
 free_skb:
@@ -820,17 +797,6 @@ static unsigned int br_nf_post_routing(unsigned int hook, struct sk_buff *skb,
 	struct net_device *realoutdev = bridge_parent(skb->dev);
 	u_int8_t pf;
 
-#ifdef CONFIG_NETFILTER_DEBUG
-	/* Be very paranoid. This probably won't happen anymore, but let's
-	 * keep the check just to be sure... */
-	if (skb_mac_header(skb) < skb->head ||
-	    skb_mac_header(skb) + ETH_HLEN > skb->data) {
-		printk(KERN_CRIT "br_netfilter: Argh!! br_nf_post_routing: "
-		       "bad mac.raw pointer.\n");
-		goto print_error;
-	}
-#endif
-
 	if (!nf_bridge)
 		return NF_ACCEPT;
 
@@ -849,13 +815,6 @@ static unsigned int br_nf_post_routing(unsigned int hook, struct sk_buff *skb,
 	else
 		return NF_ACCEPT;
 
-#ifdef CONFIG_NETFILTER_DEBUG
-	if (skb_dst(skb) == NULL) {
-		printk(KERN_INFO "br_netfilter post_routing: skb->dst == NULL\n");
-		goto print_error;
-	}
-#endif
-
 	/* We assume any code from br_dev_queue_push_xmit onwards doesn't care
 	 * about the value of skb->pkt_type. */
 	if (skb->pkt_type == PACKET_OTHERHOST) {
@@ -870,19 +829,6 @@ static unsigned int br_nf_post_routing(unsigned int hook, struct sk_buff *skb,
 		br_nf_dev_queue_xmit);
 
 	return NF_STOLEN;
-
-#ifdef CONFIG_NETFILTER_DEBUG
-print_error:
-	if (skb->dev != NULL) {
-		printk("[%s]", skb->dev->name);
-		if (realoutdev)
-			printk("[%s]", realoutdev->name);
-	}
-	printk(" head:%p, raw:%p, data:%p\n", skb->head, skb_mac_header(skb),
-	       skb->data);
-	dump_stack();
-	return NF_ACCEPT;
-#endif
 }
 
 /* IP/SABOTAGE *****************************************************/
-- 
cgit v1.2.2


From e26c28e8bffe12b27df5b828404afed9e1949191 Mon Sep 17 00:00:00 2001
From: Bart De Schuymer <bdschuym@pandora.be>
Date: Tue, 13 Apr 2010 11:41:39 +0200
Subject: netfilter: bridge-netfilter: update a comment in br_forward.c about
 ip_fragment()

ip_refrag isn't used anymore in the bridge-netfilter code

Signed-off-by: Bart De Schuymer <bdschuym@pandora.be>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 net/bridge/br_forward.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/bridge/br_forward.c b/net/bridge/br_forward.c
index 7ab52d07b477..15abef7349f3 100644
--- a/net/bridge/br_forward.c
+++ b/net/bridge/br_forward.c
@@ -43,7 +43,7 @@ int br_dev_queue_push_xmit(struct sk_buff *skb)
 	if (packet_length(skb) > skb->dev->mtu && !skb_is_gso(skb))
 		kfree_skb(skb);
 	else {
-		/* ip_refrag calls ip_fragment, doesn't copy the MAC header. */
+		/* ip_fragment doesn't copy the MAC header */
 		if (nf_bridge_maybe_copy_header(skb))
 			kfree_skb(skb);
 		else {
-- 
cgit v1.2.2


From 4ffa87012efd7b664762b579213d4663560ef4a3 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Fri, 9 Apr 2010 23:47:31 +0000
Subject: can: avoids a false warning
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

At this point optlen == sizeof(sfilter) but some compilers are dumb.

Reported-by: Németh Márton <nm127@freemail.h
Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Acked-by: Oliver Hartkopp <oliver@hartkopp.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/can/raw.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/can/raw.c b/net/can/raw.c
index 3a7dffb6519c..da99cf153b33 100644
--- a/net/can/raw.c
+++ b/net/can/raw.c
@@ -445,7 +445,7 @@ static int raw_setsockopt(struct socket *sock, int level, int optname,
 				return -EFAULT;
 			}
 		} else if (count == 1) {
-			if (copy_from_user(&sfilter, optval, optlen))
+			if (copy_from_user(&sfilter, optval, sizeof(sfilter)))
 				return -EFAULT;
 		}
 
-- 
cgit v1.2.2


From acbbc07145b919248c410e1852b953d385be5c97 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Sun, 11 Apr 2010 06:56:11 +0000
Subject: net: uninline skb_bond_should_drop()

skb_bond_should_drop() is too big to be inlined.

This patch reduces kernel text size, and its compilation time as well
(shrinking include/linux/netdevice.h)

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/dev.c | 49 +++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 49 insertions(+)

(limited to 'net')

diff --git a/net/core/dev.c b/net/core/dev.c
index ca4cdef74a1b..876b1112d5ba 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -2646,6 +2646,55 @@ void netif_nit_deliver(struct sk_buff *skb)
 	rcu_read_unlock();
 }
 
+static inline void skb_bond_set_mac_by_master(struct sk_buff *skb,
+					      struct net_device *master)
+{
+	if (skb->pkt_type == PACKET_HOST) {
+		u16 *dest = (u16 *) eth_hdr(skb)->h_dest;
+
+		memcpy(dest, master->dev_addr, ETH_ALEN);
+	}
+}
+
+/* On bonding slaves other than the currently active slave, suppress
+ * duplicates except for 802.3ad ETH_P_SLOW, alb non-mcast/bcast, and
+ * ARP on active-backup slaves with arp_validate enabled.
+ */
+int __skb_bond_should_drop(struct sk_buff *skb, struct net_device *master)
+{
+	struct net_device *dev = skb->dev;
+
+	if (master->priv_flags & IFF_MASTER_ARPMON)
+		dev->last_rx = jiffies;
+
+	if ((master->priv_flags & IFF_MASTER_ALB) && master->br_port) {
+		/* Do address unmangle. The local destination address
+		 * will be always the one master has. Provides the right
+		 * functionality in a bridge.
+		 */
+		skb_bond_set_mac_by_master(skb, master);
+	}
+
+	if (dev->priv_flags & IFF_SLAVE_INACTIVE) {
+		if ((dev->priv_flags & IFF_SLAVE_NEEDARP) &&
+		    skb->protocol == __cpu_to_be16(ETH_P_ARP))
+			return 0;
+
+		if (master->priv_flags & IFF_MASTER_ALB) {
+			if (skb->pkt_type != PACKET_BROADCAST &&
+			    skb->pkt_type != PACKET_MULTICAST)
+				return 0;
+		}
+		if (master->priv_flags & IFF_MASTER_8023AD &&
+		    skb->protocol == __cpu_to_be16(ETH_P_SLOW))
+			return 0;
+
+		return 1;
+	}
+	return 0;
+}
+EXPORT_SYMBOL(__skb_bond_should_drop);
+
 static int __netif_receive_skb(struct sk_buff *skb)
 {
 	struct packet_type *ptype, *pt_prev;
-- 
cgit v1.2.2


From 561155110307ad304226a23272244398fa46cbae Mon Sep 17 00:00:00 2001
From: stephen hemminger <shemminger@vyatta.com>
Date: Mon, 12 Apr 2010 07:38:05 +0000
Subject: dst: don't inline dst_ifdown

The function dst_ifdown is called only two places but in a non-
performance critical code path, there is no reason to inline it.

Signed-off-by: Stephen Hemminger <shemminger@vyatta.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/dst.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/core/dst.c b/net/core/dst.c
index b8c22f0f9373..9920722cc82b 100644
--- a/net/core/dst.c
+++ b/net/core/dst.c
@@ -286,8 +286,8 @@ EXPORT_SYMBOL(dst_release);
  *
  * Commented and originally written by Alexey.
  */
-static inline void dst_ifdown(struct dst_entry *dst, struct net_device *dev,
-			      int unregister)
+static void dst_ifdown(struct dst_entry *dst, struct net_device *dev,
+		       int unregister)
 {
 	if (dst->ops->ifdown)
 		dst->ops->ifdown(dst, dev, unregister);
-- 
cgit v1.2.2


From 9e50849054a4824f06c66d2b449de21b98e03770 Mon Sep 17 00:00:00 2001
From: Jan Engelhardt <jengelh@medozas.de>
Date: Tue, 13 Apr 2010 15:28:11 +0200
Subject: netfilter: ipv6: move POSTROUTING invocation before fragmentation

Patrick McHardy notes: "We used to invoke IPv4 POST_ROUTING after
fragmentation as well just to defragment the packets in conntrack
immediately afterwards, but that got changed during the
netfilter-ipsec integration. Ideally IPv6 would behave like IPv4."

This patch makes it so. Sending an oversized frame (e.g. `ping6
-s64000 -c1 ::1`) will now show up in POSTROUTING as a single skb
rather than multiple ones.

Signed-off-by: Jan Engelhardt <jengelh@medozas.de>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 net/ipv6/ip6_output.c | 49 +++++++++++++++++++++++--------------------------
 1 file changed, 23 insertions(+), 26 deletions(-)

(limited to 'net')

diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index 4535b7a0169b..236ac7813744 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -82,22 +82,6 @@ int ip6_local_out(struct sk_buff *skb)
 }
 EXPORT_SYMBOL_GPL(ip6_local_out);
 
-static int ip6_output_finish(struct sk_buff *skb)
-{
-	struct dst_entry *dst = skb_dst(skb);
-
-	if (dst->hh)
-		return neigh_hh_output(dst->hh, skb);
-	else if (dst->neighbour)
-		return dst->neighbour->output(skb);
-
-	IP6_INC_STATS_BH(dev_net(dst->dev),
-			 ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES);
-	kfree_skb(skb);
-	return -EINVAL;
-
-}
-
 /* dev_loopback_xmit for use with netfilter. */
 static int ip6_dev_loopback_xmit(struct sk_buff *newskb)
 {
@@ -111,8 +95,7 @@ static int ip6_dev_loopback_xmit(struct sk_buff *newskb)
 	return 0;
 }
 
-
-static int ip6_output2(struct sk_buff *skb)
+static int ip6_finish_output2(struct sk_buff *skb)
 {
 	struct dst_entry *dst = skb_dst(skb);
 	struct net_device *dev = dst->dev;
@@ -150,8 +133,15 @@ static int ip6_output2(struct sk_buff *skb)
 				skb->len);
 	}
 
-	return NF_HOOK(NFPROTO_IPV6, NF_INET_POST_ROUTING, skb, NULL, skb->dev,
-		       ip6_output_finish);
+	if (dst->hh)
+		return neigh_hh_output(dst->hh, skb);
+	else if (dst->neighbour)
+		return dst->neighbour->output(skb);
+
+	IP6_INC_STATS_BH(dev_net(dst->dev),
+			 ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES);
+	kfree_skb(skb);
+	return -EINVAL;
 }
 
 static inline int ip6_skb_dst_mtu(struct sk_buff *skb)
@@ -162,21 +152,28 @@ static inline int ip6_skb_dst_mtu(struct sk_buff *skb)
 	       skb_dst(skb)->dev->mtu : dst_mtu(skb_dst(skb));
 }
 
+static int ip6_finish_output(struct sk_buff *skb)
+{
+	if ((skb->len > ip6_skb_dst_mtu(skb) && !skb_is_gso(skb)) ||
+	    dst_allfrag(skb_dst(skb)))
+		return ip6_fragment(skb, ip6_finish_output2);
+	else
+		return ip6_finish_output2(skb);
+}
+
 int ip6_output(struct sk_buff *skb)
 {
+	struct net_device *dev = skb_dst(skb)->dev;
 	struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
 	if (unlikely(idev->cnf.disable_ipv6)) {
-		IP6_INC_STATS(dev_net(skb_dst(skb)->dev), idev,
+		IP6_INC_STATS(dev_net(dev), idev,
 			      IPSTATS_MIB_OUTDISCARDS);
 		kfree_skb(skb);
 		return 0;
 	}
 
-	if ((skb->len > ip6_skb_dst_mtu(skb) && !skb_is_gso(skb)) ||
-				dst_allfrag(skb_dst(skb)))
-		return ip6_fragment(skb, ip6_output2);
-	else
-		return ip6_output2(skb);
+	return NF_HOOK(NFPROTO_IPV6, NF_INET_POST_ROUTING, skb, NULL, dev,
+		       ip6_finish_output);
 }
 
 /*
-- 
cgit v1.2.2


From 9c6eb28aca52d562f3ffbaebaa56385df9972a43 Mon Sep 17 00:00:00 2001
From: Jan Engelhardt <jengelh@medozas.de>
Date: Tue, 13 Apr 2010 15:32:16 +0200
Subject: netfilter: ipv6: add IPSKB_REROUTED exclusion to NF_HOOK/POSTROUTING
 invocation

Similar to how IPv4's ip_output.c works, have ip6_output also check
the IPSKB_REROUTED flag. It will be set from xt_TEE for cloned packets
since Xtables can currently only deal with a single packet in flight
at a time.

Signed-off-by: Jan Engelhardt <jengelh@medozas.de>
Acked-by: David S. Miller <davem@davemloft.net>
[Patrick: changed to use an IP6SKB value instead of IPSKB]
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 net/ipv6/ip6_output.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index 236ac7813744..c10a38a71a5e 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -172,8 +172,9 @@ int ip6_output(struct sk_buff *skb)
 		return 0;
 	}
 
-	return NF_HOOK(NFPROTO_IPV6, NF_INET_POST_ROUTING, skb, NULL, dev,
-		       ip6_finish_output);
+	return NF_HOOK_COND(NFPROTO_IPV6, NF_INET_POST_ROUTING, skb, NULL, dev,
+			    ip6_finish_output,
+			    !(IP6CB(skb)->flags & IP6SKB_REROUTED));
 }
 
 /*
-- 
cgit v1.2.2


From d8a566beaa75c6ad5e38cdccf0ea5294323e7866 Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Tue, 13 Apr 2010 05:03:15 +0000
Subject: net: fib_rules: consolidate IPv4 and DECnet ->default_pref()
 functions.

Both functions are equivalent, consolidate them since a following patch
needs a third implementation for multicast routing.

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/fib_rules.c  | 18 ++++++++++++++++++
 net/decnet/dn_rules.c | 19 +------------------
 net/ipv4/fib_rules.c  | 19 +------------------
 3 files changed, 20 insertions(+), 36 deletions(-)

(limited to 'net')

diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c
index 05cce4ec84dd..1eb32276be77 100644
--- a/net/core/fib_rules.c
+++ b/net/core/fib_rules.c
@@ -39,6 +39,24 @@ int fib_default_rule_add(struct fib_rules_ops *ops,
 }
 EXPORT_SYMBOL(fib_default_rule_add);
 
+u32 fib_default_rule_pref(struct fib_rules_ops *ops)
+{
+	struct list_head *pos;
+	struct fib_rule *rule;
+
+	if (!list_empty(&ops->rules_list)) {
+		pos = ops->rules_list.next;
+		if (pos->next != &ops->rules_list) {
+			rule = list_entry(pos->next, struct fib_rule, list);
+			if (rule->pref)
+				return rule->pref - 1;
+		}
+	}
+
+	return 0;
+}
+EXPORT_SYMBOL(fib_default_rule_pref);
+
 static void notify_rule_change(int event, struct fib_rule *rule,
 			       struct fib_rules_ops *ops, struct nlmsghdr *nlh,
 			       u32 pid);
diff --git a/net/decnet/dn_rules.c b/net/decnet/dn_rules.c
index 7466c546f286..2d14093a2c3a 100644
--- a/net/decnet/dn_rules.c
+++ b/net/decnet/dn_rules.c
@@ -212,23 +212,6 @@ nla_put_failure:
 	return -ENOBUFS;
 }
 
-static u32 dn_fib_rule_default_pref(struct fib_rules_ops *ops)
-{
-	struct list_head *pos;
-	struct fib_rule *rule;
-
-	if (!list_empty(&dn_fib_rules_ops->rules_list)) {
-		pos = dn_fib_rules_ops->rules_list.next;
-		if (pos->next != &dn_fib_rules_ops->rules_list) {
-			rule = list_entry(pos->next, struct fib_rule, list);
-			if (rule->pref)
-				return rule->pref - 1;
-		}
-	}
-
-	return 0;
-}
-
 static void dn_fib_rule_flush_cache(struct fib_rules_ops *ops)
 {
 	dn_rt_cache_flush(-1);
@@ -243,7 +226,7 @@ static struct fib_rules_ops dn_fib_rules_ops_template = {
 	.configure	= dn_fib_rule_configure,
 	.compare	= dn_fib_rule_compare,
 	.fill		= dn_fib_rule_fill,
-	.default_pref	= dn_fib_rule_default_pref,
+	.default_pref	= fib_default_rule_pref,
 	.flush_cache	= dn_fib_rule_flush_cache,
 	.nlgroup	= RTNLGRP_DECnet_RULE,
 	.policy		= dn_fib_rule_policy,
diff --git a/net/ipv4/fib_rules.c b/net/ipv4/fib_rules.c
index ca2d07b1c706..73b67849c5b9 100644
--- a/net/ipv4/fib_rules.c
+++ b/net/ipv4/fib_rules.c
@@ -234,23 +234,6 @@ nla_put_failure:
 	return -ENOBUFS;
 }
 
-static u32 fib4_rule_default_pref(struct fib_rules_ops *ops)
-{
-	struct list_head *pos;
-	struct fib_rule *rule;
-
-	if (!list_empty(&ops->rules_list)) {
-		pos = ops->rules_list.next;
-		if (pos->next != &ops->rules_list) {
-			rule = list_entry(pos->next, struct fib_rule, list);
-			if (rule->pref)
-				return rule->pref - 1;
-		}
-	}
-
-	return 0;
-}
-
 static size_t fib4_rule_nlmsg_payload(struct fib_rule *rule)
 {
 	return nla_total_size(4) /* dst */
@@ -272,7 +255,7 @@ static struct fib_rules_ops fib4_rules_ops_template = {
 	.configure	= fib4_rule_configure,
 	.compare	= fib4_rule_compare,
 	.fill		= fib4_rule_fill,
-	.default_pref	= fib4_rule_default_pref,
+	.default_pref	= fib_default_rule_pref,
 	.nlmsg_payload	= fib4_rule_nlmsg_payload,
 	.flush_cache	= fib4_rule_flush_cache,
 	.nlgroup	= RTNLGRP_IPV4_RULE,
-- 
cgit v1.2.2


From 28bb17268b92b0c568f2496e5e631008f9108409 Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Tue, 13 Apr 2010 05:03:16 +0000
Subject: net: fib_rules: set family in fib_rule_hdr centrally

All fib_rules implementations need to set the family in their ->fill()
functions. Since the value is available to the generic fib_nl_fill_rule()
function, set it there.

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/fib_rules.c  | 1 +
 net/decnet/dn_rules.c | 1 -
 net/ipv4/fib_rules.c  | 1 -
 net/ipv6/fib6_rules.c | 1 -
 4 files changed, 1 insertion(+), 3 deletions(-)

(limited to 'net')

diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c
index 1eb32276be77..1bc66592453c 100644
--- a/net/core/fib_rules.c
+++ b/net/core/fib_rules.c
@@ -535,6 +535,7 @@ static int fib_nl_fill_rule(struct sk_buff *skb, struct fib_rule *rule,
 		return -EMSGSIZE;
 
 	frh = nlmsg_data(nlh);
+	frh->family = ops->family;
 	frh->table = rule->table;
 	NLA_PUT_U32(skb, FRA_TABLE, rule->table);
 	frh->res1 = 0;
diff --git a/net/decnet/dn_rules.c b/net/decnet/dn_rules.c
index 2d14093a2c3a..1c8cc6d5b645 100644
--- a/net/decnet/dn_rules.c
+++ b/net/decnet/dn_rules.c
@@ -196,7 +196,6 @@ static int dn_fib_rule_fill(struct fib_rule *rule, struct sk_buff *skb,
 {
 	struct dn_fib_rule *r = (struct dn_fib_rule *)rule;
 
-	frh->family = AF_DECnet;
 	frh->dst_len = r->dst_len;
 	frh->src_len = r->src_len;
 	frh->tos = 0;
diff --git a/net/ipv4/fib_rules.c b/net/ipv4/fib_rules.c
index 73b67849c5b9..a18355e15111 100644
--- a/net/ipv4/fib_rules.c
+++ b/net/ipv4/fib_rules.c
@@ -213,7 +213,6 @@ static int fib4_rule_fill(struct fib_rule *rule, struct sk_buff *skb,
 {
 	struct fib4_rule *rule4 = (struct fib4_rule *) rule;
 
-	frh->family = AF_INET;
 	frh->dst_len = rule4->dst_len;
 	frh->src_len = rule4->src_len;
 	frh->tos = rule4->tos;
diff --git a/net/ipv6/fib6_rules.c b/net/ipv6/fib6_rules.c
index 5e463c43fcc2..92b2b7fb6c3d 100644
--- a/net/ipv6/fib6_rules.c
+++ b/net/ipv6/fib6_rules.c
@@ -208,7 +208,6 @@ static int fib6_rule_fill(struct fib_rule *rule, struct sk_buff *skb,
 {
 	struct fib6_rule *rule6 = (struct fib6_rule *) rule;
 
-	frh->family = AF_INET6;
 	frh->dst_len = rule6->dst.plen;
 	frh->src_len = rule6->src.plen;
 	frh->tos = rule6->tclass;
-- 
cgit v1.2.2


From 0f87b1dd01b51dc3c789f7a212656a4a87eee1bd Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Tue, 13 Apr 2010 05:03:17 +0000
Subject: net: fib_rules: decouple address families from real address families

Decouple the address family values used for fib_rules from the real
address families in socket.h. This allows to use fib_rules for
code that is not a real address family without increasing AF_MAX/NPROTO.

Values up to 127 are reserved for real address families and map directly
to the corresponding AF value, values starting from 128 are for other
uses. rtnetlink is changed to invoke the AF_UNSPEC dumpit/doit handlers
for these families.

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/rtnetlink.c  | 15 ++++++++++-----
 net/decnet/dn_rules.c |  2 +-
 net/ipv4/fib_rules.c  |  2 +-
 net/ipv6/fib6_rules.c |  2 +-
 4 files changed, 13 insertions(+), 8 deletions(-)

(limited to 'net')

diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index bf919b6acea2..78c85985cb30 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -118,7 +118,11 @@ static rtnl_doit_func rtnl_get_doit(int protocol, int msgindex)
 {
 	struct rtnl_link *tab;
 
-	tab = rtnl_msg_handlers[protocol];
+	if (protocol < NPROTO)
+		tab = rtnl_msg_handlers[protocol];
+	else
+		tab = NULL;
+
 	if (tab == NULL || tab[msgindex].doit == NULL)
 		tab = rtnl_msg_handlers[PF_UNSPEC];
 
@@ -129,7 +133,11 @@ static rtnl_dumpit_func rtnl_get_dumpit(int protocol, int msgindex)
 {
 	struct rtnl_link *tab;
 
-	tab = rtnl_msg_handlers[protocol];
+	if (protocol < NPROTO)
+		tab = rtnl_msg_handlers[protocol];
+	else
+		tab = NULL;
+
 	if (tab == NULL || tab[msgindex].dumpit == NULL)
 		tab = rtnl_msg_handlers[PF_UNSPEC];
 
@@ -1444,9 +1452,6 @@ static int rtnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
 		return 0;
 
 	family = ((struct rtgenmsg *)NLMSG_DATA(nlh))->rtgen_family;
-	if (family >= NPROTO)
-		return -EAFNOSUPPORT;
-
 	sz_idx = type>>2;
 	kind = type&3;
 
diff --git a/net/decnet/dn_rules.c b/net/decnet/dn_rules.c
index 1c8cc6d5b645..af28dcc21844 100644
--- a/net/decnet/dn_rules.c
+++ b/net/decnet/dn_rules.c
@@ -217,7 +217,7 @@ static void dn_fib_rule_flush_cache(struct fib_rules_ops *ops)
 }
 
 static struct fib_rules_ops dn_fib_rules_ops_template = {
-	.family		= AF_DECnet,
+	.family		= FIB_RULES_DECNET,
 	.rule_size	= sizeof(struct dn_fib_rule),
 	.addr_size	= sizeof(u16),
 	.action		= dn_fib_rule_action,
diff --git a/net/ipv4/fib_rules.c b/net/ipv4/fib_rules.c
index a18355e15111..3ec84fea5b71 100644
--- a/net/ipv4/fib_rules.c
+++ b/net/ipv4/fib_rules.c
@@ -246,7 +246,7 @@ static void fib4_rule_flush_cache(struct fib_rules_ops *ops)
 }
 
 static struct fib_rules_ops fib4_rules_ops_template = {
-	.family		= AF_INET,
+	.family		= FIB_RULES_IPV4,
 	.rule_size	= sizeof(struct fib4_rule),
 	.addr_size	= sizeof(u32),
 	.action		= fib4_rule_action,
diff --git a/net/ipv6/fib6_rules.c b/net/ipv6/fib6_rules.c
index 92b2b7fb6c3d..8124f16f2ac2 100644
--- a/net/ipv6/fib6_rules.c
+++ b/net/ipv6/fib6_rules.c
@@ -238,7 +238,7 @@ static size_t fib6_rule_nlmsg_payload(struct fib_rule *rule)
 }
 
 static struct fib_rules_ops fib6_rules_ops_template = {
-	.family			= AF_INET6,
+	.family			= FIB_RULES_IPV6,
 	.rule_size		= sizeof(struct fib6_rule),
 	.addr_size		= sizeof(struct in6_addr),
 	.action			= fib6_rule_action,
-- 
cgit v1.2.2


From e258beb22f4d3ea3dc88586ffc9c990d0eb03380 Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Tue, 13 Apr 2010 05:03:19 +0000
Subject: ipv4: ipmr: move unres_queue and timer to per-namespace data

The unres_queue is currently shared between all namespaces. Following patches
will additionally allow to create multiple multicast routing tables in each
namespace. Having a single shared queue for all these users seems to excessive,
move the queue and the cleanup timer to the per-namespace data to unshare it.

As a side-effect, this fixes a bug in the seq file iteration functions: the
first entry returned is always from the current namespace, entries returned
after that may belong to any namespace.

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/ipmr.c | 70 ++++++++++++++++++++++++---------------------------------
 1 file changed, 29 insertions(+), 41 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
index 9d4f6d1340a4..d6aa65e2b08f 100644
--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c
@@ -80,8 +80,6 @@ static DEFINE_RWLOCK(mrt_lock);
 
 #define VIF_EXISTS(_net, _idx) ((_net)->ipv4.vif_table[_idx].dev != NULL)
 
-static struct mfc_cache *mfc_unres_queue;		/* Queue of unresolved entries */
-
 /* Special spinlock for queue of unresolved entries */
 static DEFINE_SPINLOCK(mfc_unres_lock);
 
@@ -100,8 +98,6 @@ static int ipmr_cache_report(struct net *net,
 			     struct sk_buff *pkt, vifi_t vifi, int assert);
 static int ipmr_fill_mroute(struct sk_buff *skb, struct mfc_cache *c, struct rtmsg *rtm);
 
-static struct timer_list ipmr_expire_timer;
-
 /* Service routines creating virtual interfaces: DVMRP tunnels and PIMREG */
 
 static void ipmr_del_tunnel(struct net_device *dev, struct vifctl *v)
@@ -364,25 +360,26 @@ static void ipmr_destroy_unres(struct mfc_cache *c)
 }
 
 
-/* Single timer process for all the unresolved queue. */
+/* Timer process for the unresolved queue. */
 
-static void ipmr_expire_process(unsigned long dummy)
+static void ipmr_expire_process(unsigned long arg)
 {
+	struct net *net = (struct net *)arg;
 	unsigned long now;
 	unsigned long expires;
 	struct mfc_cache *c, **cp;
 
 	if (!spin_trylock(&mfc_unres_lock)) {
-		mod_timer(&ipmr_expire_timer, jiffies+HZ/10);
+		mod_timer(&net->ipv4.ipmr_expire_timer, jiffies+HZ/10);
 		return;
 	}
 
-	if (mfc_unres_queue == NULL)
+	if (net->ipv4.mfc_unres_queue == NULL)
 		goto out;
 
 	now = jiffies;
 	expires = 10*HZ;
-	cp = &mfc_unres_queue;
+	cp = &net->ipv4.mfc_unres_queue;
 
 	while ((c=*cp) != NULL) {
 		if (time_after(c->mfc_un.unres.expires, now)) {
@@ -398,8 +395,8 @@ static void ipmr_expire_process(unsigned long dummy)
 		ipmr_destroy_unres(c);
 	}
 
-	if (mfc_unres_queue != NULL)
-		mod_timer(&ipmr_expire_timer, jiffies + expires);
+	if (net->ipv4.mfc_unres_queue != NULL)
+		mod_timer(&net->ipv4.ipmr_expire_timer, jiffies + expires);
 
 out:
 	spin_unlock(&mfc_unres_lock);
@@ -708,9 +705,8 @@ ipmr_cache_unresolved(struct net *net, vifi_t vifi, struct sk_buff *skb)
 	const struct iphdr *iph = ip_hdr(skb);
 
 	spin_lock_bh(&mfc_unres_lock);
-	for (c=mfc_unres_queue; c; c=c->next) {
-		if (net_eq(mfc_net(c), net) &&
-		    c->mfc_mcastgrp == iph->daddr &&
+	for (c=net->ipv4.mfc_unres_queue; c; c=c->next) {
+		if (c->mfc_mcastgrp == iph->daddr &&
 		    c->mfc_origin == iph->saddr)
 			break;
 	}
@@ -751,10 +747,10 @@ ipmr_cache_unresolved(struct net *net, vifi_t vifi, struct sk_buff *skb)
 		}
 
 		atomic_inc(&net->ipv4.cache_resolve_queue_len);
-		c->next = mfc_unres_queue;
-		mfc_unres_queue = c;
+		c->next = net->ipv4.mfc_unres_queue;
+		net->ipv4.mfc_unres_queue = c;
 
-		mod_timer(&ipmr_expire_timer, c->mfc_un.unres.expires);
+		mod_timer(&net->ipv4.ipmr_expire_timer, c->mfc_un.unres.expires);
 	}
 
 	/*
@@ -849,18 +845,17 @@ static int ipmr_mfc_add(struct net *net, struct mfcctl *mfc, int mrtsock)
 	 *	need to send on the frames and tidy up.
 	 */
 	spin_lock_bh(&mfc_unres_lock);
-	for (cp = &mfc_unres_queue; (uc=*cp) != NULL;
+	for (cp = &net->ipv4.mfc_unres_queue; (uc=*cp) != NULL;
 	     cp = &uc->next) {
-		if (net_eq(mfc_net(uc), net) &&
-		    uc->mfc_origin == c->mfc_origin &&
+		if (uc->mfc_origin == c->mfc_origin &&
 		    uc->mfc_mcastgrp == c->mfc_mcastgrp) {
 			*cp = uc->next;
 			atomic_dec(&net->ipv4.cache_resolve_queue_len);
 			break;
 		}
 	}
-	if (mfc_unres_queue == NULL)
-		del_timer(&ipmr_expire_timer);
+	if (net->ipv4.mfc_unres_queue == NULL)
+		del_timer(&net->ipv4.ipmr_expire_timer);
 	spin_unlock_bh(&mfc_unres_lock);
 
 	if (uc) {
@@ -912,14 +907,9 @@ static void mroute_clean_tables(struct net *net)
 		struct mfc_cache *c, **cp;
 
 		spin_lock_bh(&mfc_unres_lock);
-		cp = &mfc_unres_queue;
+		cp = &net->ipv4.mfc_unres_queue;
 		while ((c = *cp) != NULL) {
-			if (!net_eq(mfc_net(c), net)) {
-				cp = &c->next;
-				continue;
-			}
 			*cp = c->next;
-
 			ipmr_destroy_unres(c);
 		}
 		spin_unlock_bh(&mfc_unres_lock);
@@ -1819,11 +1809,10 @@ static struct mfc_cache *ipmr_mfc_seq_idx(struct net *net,
 				return mfc;
 	read_unlock(&mrt_lock);
 
-	it->cache = &mfc_unres_queue;
+	it->cache = &net->ipv4.mfc_unres_queue;
 	spin_lock_bh(&mfc_unres_lock);
-	for (mfc = mfc_unres_queue; mfc; mfc = mfc->next)
-		if (net_eq(mfc_net(mfc), net) &&
-		    pos-- == 0)
+	for (mfc = net->ipv4.mfc_unres_queue; mfc; mfc = mfc->next)
+		if (pos-- == 0)
 			return mfc;
 	spin_unlock_bh(&mfc_unres_lock);
 
@@ -1857,7 +1846,7 @@ static void *ipmr_mfc_seq_next(struct seq_file *seq, void *v, loff_t *pos)
 	if (mfc->next)
 		return mfc->next;
 
-	if (it->cache == &mfc_unres_queue)
+	if (it->cache == &net->ipv4.mfc_unres_queue)
 		goto end_of_list;
 
 	BUG_ON(it->cache != net->ipv4.mfc_cache_array);
@@ -1870,13 +1859,11 @@ static void *ipmr_mfc_seq_next(struct seq_file *seq, void *v, loff_t *pos)
 
 	/* exhausted cache_array, show unresolved */
 	read_unlock(&mrt_lock);
-	it->cache = &mfc_unres_queue;
+	it->cache = &net->ipv4.mfc_unres_queue;
 	it->ct = 0;
 
 	spin_lock_bh(&mfc_unres_lock);
-	mfc = mfc_unres_queue;
-	while (mfc && !net_eq(mfc_net(mfc), net))
-		mfc = mfc->next;
+	mfc = net->ipv4.mfc_unres_queue;
 	if (mfc)
 		return mfc;
 
@@ -1892,7 +1879,7 @@ static void ipmr_mfc_seq_stop(struct seq_file *seq, void *v)
 	struct ipmr_mfc_iter *it = seq->private;
 	struct net *net = seq_file_net(seq);
 
-	if (it->cache == &mfc_unres_queue)
+	if (it->cache == &net->ipv4.mfc_unres_queue)
 		spin_unlock_bh(&mfc_unres_lock);
 	else if (it->cache == net->ipv4.mfc_cache_array)
 		read_unlock(&mrt_lock);
@@ -1915,7 +1902,7 @@ static int ipmr_mfc_seq_show(struct seq_file *seq, void *v)
 			   (unsigned long) mfc->mfc_origin,
 			   mfc->mfc_parent);
 
-		if (it->cache != &mfc_unres_queue) {
+		if (it->cache != &net->ipv4.mfc_unres_queue) {
 			seq_printf(seq, " %8lu %8lu %8lu",
 				   mfc->mfc_un.res.pkt,
 				   mfc->mfc_un.res.bytes,
@@ -1992,6 +1979,9 @@ static int __net_init ipmr_net_init(struct net *net)
 		goto fail_mfc_cache;
 	}
 
+	setup_timer(&net->ipv4.ipmr_expire_timer, ipmr_expire_process,
+		    (unsigned long)net);
+
 #ifdef CONFIG_IP_PIMSM
 	net->ipv4.mroute_reg_vif_num = -1;
 #endif
@@ -2047,7 +2037,6 @@ int __init ip_mr_init(void)
 	if (err)
 		goto reg_pernet_fail;
 
-	setup_timer(&ipmr_expire_timer, ipmr_expire_process, 0);
 	err = register_netdevice_notifier(&ip_mr_notifier);
 	if (err)
 		goto reg_notif_fail;
@@ -2065,7 +2054,6 @@ add_proto_fail:
 	unregister_netdevice_notifier(&ip_mr_notifier);
 #endif
 reg_notif_fail:
-	del_timer(&ipmr_expire_timer);
 	unregister_pernet_subsys(&ipmr_net_ops);
 reg_pernet_fail:
 	kmem_cache_destroy(mrt_cachep);
-- 
cgit v1.2.2


From d658f8a0e63b6476148162aa7a3ffffc58dcad52 Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Tue, 13 Apr 2010 05:03:20 +0000
Subject: ipv4: ipmr: remove net pointer from struct mfc_cache

Now that cache entries in unres_queue don't need to be distinguished by their
network namespace pointer anymore, we can remove it from struct mfc_cache
add pass the namespace as function argument to the functions that need it.

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/ipmr.c | 65 ++++++++++++++++++++++++++++-----------------------------
 1 file changed, 32 insertions(+), 33 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
index d6aa65e2b08f..f8e25c8ba070 100644
--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c
@@ -93,10 +93,12 @@ static DEFINE_SPINLOCK(mfc_unres_lock);
 
 static struct kmem_cache *mrt_cachep __read_mostly;
 
-static int ip_mr_forward(struct sk_buff *skb, struct mfc_cache *cache, int local);
+static int ip_mr_forward(struct net *net, struct sk_buff *skb,
+			 struct mfc_cache *cache, int local);
 static int ipmr_cache_report(struct net *net,
 			     struct sk_buff *pkt, vifi_t vifi, int assert);
-static int ipmr_fill_mroute(struct sk_buff *skb, struct mfc_cache *c, struct rtmsg *rtm);
+static int ipmr_fill_mroute(struct net *net, struct sk_buff *skb,
+			    struct mfc_cache *c, struct rtmsg *rtm);
 
 /* Service routines creating virtual interfaces: DVMRP tunnels and PIMREG */
 
@@ -325,7 +327,6 @@ static int vif_delete(struct net *net, int vifi, int notify,
 
 static inline void ipmr_cache_free(struct mfc_cache *c)
 {
-	release_net(mfc_net(c));
 	kmem_cache_free(mrt_cachep, c);
 }
 
@@ -333,11 +334,10 @@ static inline void ipmr_cache_free(struct mfc_cache *c)
    and reporting error to netlink readers.
  */
 
-static void ipmr_destroy_unres(struct mfc_cache *c)
+static void ipmr_destroy_unres(struct net *net, struct mfc_cache *c)
 {
 	struct sk_buff *skb;
 	struct nlmsgerr *e;
-	struct net *net = mfc_net(c);
 
 	atomic_dec(&net->ipv4.cache_resolve_queue_len);
 
@@ -392,7 +392,7 @@ static void ipmr_expire_process(unsigned long arg)
 
 		*cp = c->next;
 
-		ipmr_destroy_unres(c);
+		ipmr_destroy_unres(net, c);
 	}
 
 	if (net->ipv4.mfc_unres_queue != NULL)
@@ -404,10 +404,10 @@ out:
 
 /* Fill oifs list. It is called under write locked mrt_lock. */
 
-static void ipmr_update_thresholds(struct mfc_cache *cache, unsigned char *ttls)
+static void ipmr_update_thresholds(struct net *net, struct mfc_cache *cache,
+				   unsigned char *ttls)
 {
 	int vifi;
-	struct net *net = mfc_net(cache);
 
 	cache->mfc_un.res.minvif = MAXVIFS;
 	cache->mfc_un.res.maxvif = 0;
@@ -547,24 +547,22 @@ static struct mfc_cache *ipmr_cache_find(struct net *net,
 /*
  *	Allocate a multicast cache entry
  */
-static struct mfc_cache *ipmr_cache_alloc(struct net *net)
+static struct mfc_cache *ipmr_cache_alloc(void)
 {
 	struct mfc_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_KERNEL);
 	if (c == NULL)
 		return NULL;
 	c->mfc_un.res.minvif = MAXVIFS;
-	mfc_net_set(c, net);
 	return c;
 }
 
-static struct mfc_cache *ipmr_cache_alloc_unres(struct net *net)
+static struct mfc_cache *ipmr_cache_alloc_unres(void)
 {
 	struct mfc_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_ATOMIC);
 	if (c == NULL)
 		return NULL;
 	skb_queue_head_init(&c->mfc_un.unres.unresolved);
 	c->mfc_un.unres.expires = jiffies + 10*HZ;
-	mfc_net_set(c, net);
 	return c;
 }
 
@@ -572,7 +570,8 @@ static struct mfc_cache *ipmr_cache_alloc_unres(struct net *net)
  *	A cache entry has gone into a resolved state from queued
  */
 
-static void ipmr_cache_resolve(struct mfc_cache *uc, struct mfc_cache *c)
+static void ipmr_cache_resolve(struct net *net, struct mfc_cache *uc,
+			       struct mfc_cache *c)
 {
 	struct sk_buff *skb;
 	struct nlmsgerr *e;
@@ -585,7 +584,7 @@ static void ipmr_cache_resolve(struct mfc_cache *uc, struct mfc_cache *c)
 		if (ip_hdr(skb)->version == 0) {
 			struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct iphdr));
 
-			if (ipmr_fill_mroute(skb, c, NLMSG_DATA(nlh)) > 0) {
+			if (ipmr_fill_mroute(net, skb, c, NLMSG_DATA(nlh)) > 0) {
 				nlh->nlmsg_len = (skb_tail_pointer(skb) -
 						  (u8 *)nlh);
 			} else {
@@ -597,9 +596,9 @@ static void ipmr_cache_resolve(struct mfc_cache *uc, struct mfc_cache *c)
 				memset(&e->msg, 0, sizeof(e->msg));
 			}
 
-			rtnl_unicast(skb, mfc_net(c), NETLINK_CB(skb).pid);
+			rtnl_unicast(skb, net, NETLINK_CB(skb).pid);
 		} else
-			ip_mr_forward(skb, c, 0);
+			ip_mr_forward(net, skb, c, 0);
 	}
 }
 
@@ -717,7 +716,7 @@ ipmr_cache_unresolved(struct net *net, vifi_t vifi, struct sk_buff *skb)
 		 */
 
 		if (atomic_read(&net->ipv4.cache_resolve_queue_len) >= 10 ||
-		    (c = ipmr_cache_alloc_unres(net)) == NULL) {
+		    (c = ipmr_cache_alloc_unres()) == NULL) {
 			spin_unlock_bh(&mfc_unres_lock);
 
 			kfree_skb(skb);
@@ -814,7 +813,7 @@ static int ipmr_mfc_add(struct net *net, struct mfcctl *mfc, int mrtsock)
 	if (c != NULL) {
 		write_lock_bh(&mrt_lock);
 		c->mfc_parent = mfc->mfcc_parent;
-		ipmr_update_thresholds(c, mfc->mfcc_ttls);
+		ipmr_update_thresholds(net, c, mfc->mfcc_ttls);
 		if (!mrtsock)
 			c->mfc_flags |= MFC_STATIC;
 		write_unlock_bh(&mrt_lock);
@@ -824,14 +823,14 @@ static int ipmr_mfc_add(struct net *net, struct mfcctl *mfc, int mrtsock)
 	if (!ipv4_is_multicast(mfc->mfcc_mcastgrp.s_addr))
 		return -EINVAL;
 
-	c = ipmr_cache_alloc(net);
+	c = ipmr_cache_alloc();
 	if (c == NULL)
 		return -ENOMEM;
 
 	c->mfc_origin = mfc->mfcc_origin.s_addr;
 	c->mfc_mcastgrp = mfc->mfcc_mcastgrp.s_addr;
 	c->mfc_parent = mfc->mfcc_parent;
-	ipmr_update_thresholds(c, mfc->mfcc_ttls);
+	ipmr_update_thresholds(net, c, mfc->mfcc_ttls);
 	if (!mrtsock)
 		c->mfc_flags |= MFC_STATIC;
 
@@ -859,7 +858,7 @@ static int ipmr_mfc_add(struct net *net, struct mfcctl *mfc, int mrtsock)
 	spin_unlock_bh(&mfc_unres_lock);
 
 	if (uc) {
-		ipmr_cache_resolve(uc, c);
+		ipmr_cache_resolve(net, uc, c);
 		ipmr_cache_free(uc);
 	}
 	return 0;
@@ -910,7 +909,7 @@ static void mroute_clean_tables(struct net *net)
 		cp = &net->ipv4.mfc_unres_queue;
 		while ((c = *cp) != NULL) {
 			*cp = c->next;
-			ipmr_destroy_unres(c);
+			ipmr_destroy_unres(net, c);
 		}
 		spin_unlock_bh(&mfc_unres_lock);
 	}
@@ -1221,9 +1220,9 @@ static inline int ipmr_forward_finish(struct sk_buff *skb)
  *	Processing handlers for ipmr_forward
  */
 
-static void ipmr_queue_xmit(struct sk_buff *skb, struct mfc_cache *c, int vifi)
+static void ipmr_queue_xmit(struct net *net, struct sk_buff *skb,
+			    struct mfc_cache *c, int vifi)
 {
-	struct net *net = mfc_net(c);
 	const struct iphdr *iph = ip_hdr(skb);
 	struct vif_device *vif = &net->ipv4.vif_table[vifi];
 	struct net_device *dev;
@@ -1335,11 +1334,11 @@ static int ipmr_find_vif(struct net_device *dev)
 
 /* "local" means that we should preserve one skb (for local delivery) */
 
-static int ip_mr_forward(struct sk_buff *skb, struct mfc_cache *cache, int local)
+static int ip_mr_forward(struct net *net, struct sk_buff *skb,
+			 struct mfc_cache *cache, int local)
 {
 	int psend = -1;
 	int vif, ct;
-	struct net *net = mfc_net(cache);
 
 	vif = cache->mfc_parent;
 	cache->mfc_un.res.pkt++;
@@ -1396,7 +1395,7 @@ static int ip_mr_forward(struct sk_buff *skb, struct mfc_cache *cache, int local
 			if (psend != -1) {
 				struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
 				if (skb2)
-					ipmr_queue_xmit(skb2, cache, psend);
+					ipmr_queue_xmit(net, skb2, cache, psend);
 			}
 			psend = ct;
 		}
@@ -1405,9 +1404,9 @@ static int ip_mr_forward(struct sk_buff *skb, struct mfc_cache *cache, int local
 		if (local) {
 			struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
 			if (skb2)
-				ipmr_queue_xmit(skb2, cache, psend);
+				ipmr_queue_xmit(net, skb2, cache, psend);
 		} else {
-			ipmr_queue_xmit(skb, cache, psend);
+			ipmr_queue_xmit(net, skb, cache, psend);
 			return 0;
 		}
 	}
@@ -1488,7 +1487,7 @@ int ip_mr_input(struct sk_buff *skb)
 		return -ENODEV;
 	}
 
-	ip_mr_forward(skb, cache, local);
+	ip_mr_forward(net, skb, cache, local);
 
 	read_unlock(&mrt_lock);
 
@@ -1602,11 +1601,11 @@ drop:
 #endif
 
 static int
-ipmr_fill_mroute(struct sk_buff *skb, struct mfc_cache *c, struct rtmsg *rtm)
+ipmr_fill_mroute(struct net *net, struct sk_buff *skb, struct mfc_cache *c,
+		 struct rtmsg *rtm)
 {
 	int ct;
 	struct rtnexthop *nhp;
-	struct net *net = mfc_net(c);
 	u8 *b = skb_tail_pointer(skb);
 	struct rtattr *mp_head;
 
@@ -1686,7 +1685,7 @@ int ipmr_get_route(struct net *net,
 
 	if (!nowait && (rtm->rtm_flags&RTM_F_NOTIFY))
 		cache->mfc_flags |= MFC_NOTIFY;
-	err = ipmr_fill_mroute(skb, cache, rtm);
+	err = ipmr_fill_mroute(net, skb, cache, rtm);
 	read_unlock(&mrt_lock);
 	return err;
 }
-- 
cgit v1.2.2


From 862465f2e7e90975e7bf0ecfbb171dd3adedd950 Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Tue, 13 Apr 2010 05:03:21 +0000
Subject: ipv4: ipmr: convert struct mfc_cache to struct list_head

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/ipmr.c | 125 +++++++++++++++++++++++++++-----------------------------
 1 file changed, 61 insertions(+), 64 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
index f8e25c8ba070..21b5edc2f343 100644
--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c
@@ -367,35 +367,32 @@ static void ipmr_expire_process(unsigned long arg)
 	struct net *net = (struct net *)arg;
 	unsigned long now;
 	unsigned long expires;
-	struct mfc_cache *c, **cp;
+	struct mfc_cache *c, *next;
 
 	if (!spin_trylock(&mfc_unres_lock)) {
 		mod_timer(&net->ipv4.ipmr_expire_timer, jiffies+HZ/10);
 		return;
 	}
 
-	if (net->ipv4.mfc_unres_queue == NULL)
+	if (list_empty(&net->ipv4.mfc_unres_queue))
 		goto out;
 
 	now = jiffies;
 	expires = 10*HZ;
-	cp = &net->ipv4.mfc_unres_queue;
 
-	while ((c=*cp) != NULL) {
+	list_for_each_entry_safe(c, next, &net->ipv4.mfc_unres_queue, list) {
 		if (time_after(c->mfc_un.unres.expires, now)) {
 			unsigned long interval = c->mfc_un.unres.expires - now;
 			if (interval < expires)
 				expires = interval;
-			cp = &c->next;
 			continue;
 		}
 
-		*cp = c->next;
-
+		list_del(&c->list);
 		ipmr_destroy_unres(net, c);
 	}
 
-	if (net->ipv4.mfc_unres_queue != NULL)
+	if (!list_empty(&net->ipv4.mfc_unres_queue))
 		mod_timer(&net->ipv4.ipmr_expire_timer, jiffies + expires);
 
 out:
@@ -537,11 +534,11 @@ static struct mfc_cache *ipmr_cache_find(struct net *net,
 	int line = MFC_HASH(mcastgrp, origin);
 	struct mfc_cache *c;
 
-	for (c = net->ipv4.mfc_cache_array[line]; c; c = c->next) {
-		if (c->mfc_origin==origin && c->mfc_mcastgrp==mcastgrp)
-			break;
+	list_for_each_entry(c, &net->ipv4.mfc_cache_array[line], list) {
+		if (c->mfc_origin == origin && c->mfc_mcastgrp == mcastgrp)
+			return c;
 	}
-	return c;
+	return NULL;
 }
 
 /*
@@ -699,18 +696,21 @@ static int ipmr_cache_report(struct net *net,
 static int
 ipmr_cache_unresolved(struct net *net, vifi_t vifi, struct sk_buff *skb)
 {
+	bool found = false;
 	int err;
 	struct mfc_cache *c;
 	const struct iphdr *iph = ip_hdr(skb);
 
 	spin_lock_bh(&mfc_unres_lock);
-	for (c=net->ipv4.mfc_unres_queue; c; c=c->next) {
+	list_for_each_entry(c, &net->ipv4.mfc_unres_queue, list) {
 		if (c->mfc_mcastgrp == iph->daddr &&
-		    c->mfc_origin == iph->saddr)
+		    c->mfc_origin == iph->saddr) {
+			found = true;
 			break;
+		}
 	}
 
-	if (c == NULL) {
+	if (!found) {
 		/*
 		 *	Create a new entry if allowable
 		 */
@@ -746,8 +746,7 @@ ipmr_cache_unresolved(struct net *net, vifi_t vifi, struct sk_buff *skb)
 		}
 
 		atomic_inc(&net->ipv4.cache_resolve_queue_len);
-		c->next = net->ipv4.mfc_unres_queue;
-		net->ipv4.mfc_unres_queue = c;
+		list_add(&c->list, &net->ipv4.mfc_unres_queue);
 
 		mod_timer(&net->ipv4.ipmr_expire_timer, c->mfc_un.unres.expires);
 	}
@@ -774,16 +773,15 @@ ipmr_cache_unresolved(struct net *net, vifi_t vifi, struct sk_buff *skb)
 static int ipmr_mfc_delete(struct net *net, struct mfcctl *mfc)
 {
 	int line;
-	struct mfc_cache *c, **cp;
+	struct mfc_cache *c, *next;
 
 	line = MFC_HASH(mfc->mfcc_mcastgrp.s_addr, mfc->mfcc_origin.s_addr);
 
-	for (cp = &net->ipv4.mfc_cache_array[line];
-	     (c = *cp) != NULL; cp = &c->next) {
+	list_for_each_entry_safe(c, next, &net->ipv4.mfc_cache_array[line], list) {
 		if (c->mfc_origin == mfc->mfcc_origin.s_addr &&
 		    c->mfc_mcastgrp == mfc->mfcc_mcastgrp.s_addr) {
 			write_lock_bh(&mrt_lock);
-			*cp = c->next;
+			list_del(&c->list);
 			write_unlock_bh(&mrt_lock);
 
 			ipmr_cache_free(c);
@@ -795,22 +793,24 @@ static int ipmr_mfc_delete(struct net *net, struct mfcctl *mfc)
 
 static int ipmr_mfc_add(struct net *net, struct mfcctl *mfc, int mrtsock)
 {
+	bool found = false;
 	int line;
-	struct mfc_cache *uc, *c, **cp;
+	struct mfc_cache *uc, *c;
 
 	if (mfc->mfcc_parent >= MAXVIFS)
 		return -ENFILE;
 
 	line = MFC_HASH(mfc->mfcc_mcastgrp.s_addr, mfc->mfcc_origin.s_addr);
 
-	for (cp = &net->ipv4.mfc_cache_array[line];
-	     (c = *cp) != NULL; cp = &c->next) {
+	list_for_each_entry(c, &net->ipv4.mfc_cache_array[line], list) {
 		if (c->mfc_origin == mfc->mfcc_origin.s_addr &&
-		    c->mfc_mcastgrp == mfc->mfcc_mcastgrp.s_addr)
+		    c->mfc_mcastgrp == mfc->mfcc_mcastgrp.s_addr) {
+			found = true;
 			break;
+		}
 	}
 
-	if (c != NULL) {
+	if (found) {
 		write_lock_bh(&mrt_lock);
 		c->mfc_parent = mfc->mfcc_parent;
 		ipmr_update_thresholds(net, c, mfc->mfcc_ttls);
@@ -835,8 +835,7 @@ static int ipmr_mfc_add(struct net *net, struct mfcctl *mfc, int mrtsock)
 		c->mfc_flags |= MFC_STATIC;
 
 	write_lock_bh(&mrt_lock);
-	c->next = net->ipv4.mfc_cache_array[line];
-	net->ipv4.mfc_cache_array[line] = c;
+	list_add(&c->list, &net->ipv4.mfc_cache_array[line]);
 	write_unlock_bh(&mrt_lock);
 
 	/*
@@ -844,16 +843,15 @@ static int ipmr_mfc_add(struct net *net, struct mfcctl *mfc, int mrtsock)
 	 *	need to send on the frames and tidy up.
 	 */
 	spin_lock_bh(&mfc_unres_lock);
-	for (cp = &net->ipv4.mfc_unres_queue; (uc=*cp) != NULL;
-	     cp = &uc->next) {
+	list_for_each_entry(uc, &net->ipv4.mfc_unres_queue, list) {
 		if (uc->mfc_origin == c->mfc_origin &&
 		    uc->mfc_mcastgrp == c->mfc_mcastgrp) {
-			*cp = uc->next;
+			list_del(&uc->list);
 			atomic_dec(&net->ipv4.cache_resolve_queue_len);
 			break;
 		}
 	}
-	if (net->ipv4.mfc_unres_queue == NULL)
+	if (list_empty(&net->ipv4.mfc_unres_queue))
 		del_timer(&net->ipv4.ipmr_expire_timer);
 	spin_unlock_bh(&mfc_unres_lock);
 
@@ -872,6 +870,7 @@ static void mroute_clean_tables(struct net *net)
 {
 	int i;
 	LIST_HEAD(list);
+	struct mfc_cache *c, *next;
 
 	/*
 	 *	Shut down all active vif entries
@@ -885,17 +884,12 @@ static void mroute_clean_tables(struct net *net)
 	/*
 	 *	Wipe the cache
 	 */
-	for (i=0; i<MFC_LINES; i++) {
-		struct mfc_cache *c, **cp;
-
-		cp = &net->ipv4.mfc_cache_array[i];
-		while ((c = *cp) != NULL) {
-			if (c->mfc_flags&MFC_STATIC) {
-				cp = &c->next;
+	for (i = 0; i < MFC_LINES; i++) {
+		list_for_each_entry_safe(c, next, &net->ipv4.mfc_cache_array[i], list) {
+			if (c->mfc_flags&MFC_STATIC)
 				continue;
-			}
 			write_lock_bh(&mrt_lock);
-			*cp = c->next;
+			list_del(&c->list);
 			write_unlock_bh(&mrt_lock);
 
 			ipmr_cache_free(c);
@@ -903,12 +897,9 @@ static void mroute_clean_tables(struct net *net)
 	}
 
 	if (atomic_read(&net->ipv4.cache_resolve_queue_len) != 0) {
-		struct mfc_cache *c, **cp;
-
 		spin_lock_bh(&mfc_unres_lock);
-		cp = &net->ipv4.mfc_unres_queue;
-		while ((c = *cp) != NULL) {
-			*cp = c->next;
+		list_for_each_entry_safe(c, next, &net->ipv4.mfc_unres_queue, list) {
+			list_del(&c->list);
 			ipmr_destroy_unres(net, c);
 		}
 		spin_unlock_bh(&mfc_unres_lock);
@@ -1789,7 +1780,7 @@ static const struct file_operations ipmr_vif_fops = {
 
 struct ipmr_mfc_iter {
 	struct seq_net_private p;
-	struct mfc_cache **cache;
+	struct list_head *cache;
 	int ct;
 };
 
@@ -1799,18 +1790,18 @@ static struct mfc_cache *ipmr_mfc_seq_idx(struct net *net,
 {
 	struct mfc_cache *mfc;
 
-	it->cache = net->ipv4.mfc_cache_array;
 	read_lock(&mrt_lock);
-	for (it->ct = 0; it->ct < MFC_LINES; it->ct++)
-		for (mfc = net->ipv4.mfc_cache_array[it->ct];
-		     mfc; mfc = mfc->next)
+	for (it->ct = 0; it->ct < MFC_LINES; it->ct++) {
+		it->cache = &net->ipv4.mfc_cache_array[it->ct];
+		list_for_each_entry(mfc, it->cache, list)
 			if (pos-- == 0)
 				return mfc;
+	}
 	read_unlock(&mrt_lock);
 
-	it->cache = &net->ipv4.mfc_unres_queue;
 	spin_lock_bh(&mfc_unres_lock);
-	for (mfc = net->ipv4.mfc_unres_queue; mfc; mfc = mfc->next)
+	it->cache = &net->ipv4.mfc_unres_queue;
+	list_for_each_entry(mfc, it->cache, list)
 		if (pos-- == 0)
 			return mfc;
 	spin_unlock_bh(&mfc_unres_lock);
@@ -1842,18 +1833,19 @@ static void *ipmr_mfc_seq_next(struct seq_file *seq, void *v, loff_t *pos)
 	if (v == SEQ_START_TOKEN)
 		return ipmr_mfc_seq_idx(net, seq->private, 0);
 
-	if (mfc->next)
-		return mfc->next;
+	if (mfc->list.next != it->cache)
+		return list_entry(mfc->list.next, struct mfc_cache, list);
 
 	if (it->cache == &net->ipv4.mfc_unres_queue)
 		goto end_of_list;
 
-	BUG_ON(it->cache != net->ipv4.mfc_cache_array);
+	BUG_ON(it->cache != &net->ipv4.mfc_cache_array[it->ct]);
 
 	while (++it->ct < MFC_LINES) {
-		mfc = net->ipv4.mfc_cache_array[it->ct];
-		if (mfc)
-			return mfc;
+		it->cache = &net->ipv4.mfc_cache_array[it->ct];
+		if (list_empty(it->cache))
+			continue;
+		return list_first_entry(it->cache, struct mfc_cache, list);
 	}
 
 	/* exhausted cache_array, show unresolved */
@@ -1862,9 +1854,8 @@ static void *ipmr_mfc_seq_next(struct seq_file *seq, void *v, loff_t *pos)
 	it->ct = 0;
 
 	spin_lock_bh(&mfc_unres_lock);
-	mfc = net->ipv4.mfc_unres_queue;
-	if (mfc)
-		return mfc;
+	if (!list_empty(it->cache))
+		return list_first_entry(it->cache, struct mfc_cache, list);
 
  end_of_list:
 	spin_unlock_bh(&mfc_unres_lock);
@@ -1880,7 +1871,7 @@ static void ipmr_mfc_seq_stop(struct seq_file *seq, void *v)
 
 	if (it->cache == &net->ipv4.mfc_unres_queue)
 		spin_unlock_bh(&mfc_unres_lock);
-	else if (it->cache == net->ipv4.mfc_cache_array)
+	else if (it->cache == &net->ipv4.mfc_cache_array[it->ct])
 		read_unlock(&mrt_lock);
 }
 
@@ -1960,6 +1951,7 @@ static const struct net_protocol pim_protocol = {
  */
 static int __net_init ipmr_net_init(struct net *net)
 {
+	unsigned int i;
 	int err = 0;
 
 	net->ipv4.vif_table = kcalloc(MAXVIFS, sizeof(struct vif_device),
@@ -1971,13 +1963,18 @@ static int __net_init ipmr_net_init(struct net *net)
 
 	/* Forwarding cache */
 	net->ipv4.mfc_cache_array = kcalloc(MFC_LINES,
-					    sizeof(struct mfc_cache *),
+					    sizeof(struct list_head),
 					    GFP_KERNEL);
 	if (!net->ipv4.mfc_cache_array) {
 		err = -ENOMEM;
 		goto fail_mfc_cache;
 	}
 
+	for (i = 0; i < MFC_LINES; i++)
+		INIT_LIST_HEAD(&net->ipv4.mfc_cache_array[i]);
+
+	INIT_LIST_HEAD(&net->ipv4.mfc_unres_queue);
+
 	setup_timer(&net->ipv4.ipmr_expire_timer, ipmr_expire_process,
 		    (unsigned long)net);
 
-- 
cgit v1.2.2


From 0c12295a741d3186987f96f518cfbdaf01abb087 Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Tue, 13 Apr 2010 05:03:22 +0000
Subject: ipv4: ipmr: move mroute data into seperate structure

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/ipmr.c | 369 ++++++++++++++++++++++++++++++--------------------------
 1 file changed, 199 insertions(+), 170 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
index 21b5edc2f343..498f4e907d52 100644
--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c
@@ -68,6 +68,21 @@
 #define CONFIG_IP_PIMSM	1
 #endif
 
+struct mr_table {
+	struct sock		*mroute_sk;
+	struct timer_list	ipmr_expire_timer;
+	struct list_head	mfc_unres_queue;
+	struct list_head	mfc_cache_array[MFC_LINES];
+	struct vif_device	vif_table[MAXVIFS];
+	int			maxvif;
+	atomic_t		cache_resolve_queue_len;
+	int			mroute_do_assert;
+	int			mroute_do_pim;
+#if defined(CONFIG_IP_PIMSM_V1) || defined(CONFIG_IP_PIMSM_V2)
+	int			mroute_reg_vif_num;
+#endif
+};
+
 /* Big lock, protecting vif table, mrt cache and mroute socket state.
    Note that the changes are semaphored via rtnl_lock.
  */
@@ -78,7 +93,7 @@ static DEFINE_RWLOCK(mrt_lock);
  *	Multicast router control variables
  */
 
-#define VIF_EXISTS(_net, _idx) ((_net)->ipv4.vif_table[_idx].dev != NULL)
+#define VIF_EXISTS(_mrt, _idx) ((_mrt)->vif_table[_idx].dev != NULL)
 
 /* Special spinlock for queue of unresolved entries */
 static DEFINE_SPINLOCK(mfc_unres_lock);
@@ -93,11 +108,12 @@ static DEFINE_SPINLOCK(mfc_unres_lock);
 
 static struct kmem_cache *mrt_cachep __read_mostly;
 
-static int ip_mr_forward(struct net *net, struct sk_buff *skb,
-			 struct mfc_cache *cache, int local);
-static int ipmr_cache_report(struct net *net,
+static int ip_mr_forward(struct net *net, struct mr_table *mrt,
+			 struct sk_buff *skb, struct mfc_cache *cache,
+			 int local);
+static int ipmr_cache_report(struct mr_table *mrt,
 			     struct sk_buff *pkt, vifi_t vifi, int assert);
-static int ipmr_fill_mroute(struct net *net, struct sk_buff *skb,
+static int ipmr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb,
 			    struct mfc_cache *c, struct rtmsg *rtm);
 
 /* Service routines creating virtual interfaces: DVMRP tunnels and PIMREG */
@@ -199,12 +215,12 @@ failure:
 static netdev_tx_t reg_vif_xmit(struct sk_buff *skb, struct net_device *dev)
 {
 	struct net *net = dev_net(dev);
+	struct mr_table *mrt = net->ipv4.mrt;
 
 	read_lock(&mrt_lock);
 	dev->stats.tx_bytes += skb->len;
 	dev->stats.tx_packets++;
-	ipmr_cache_report(net, skb, net->ipv4.mroute_reg_vif_num,
-			  IGMPMSG_WHOLEPKT);
+	ipmr_cache_report(mrt, skb, mrt->mroute_reg_vif_num, IGMPMSG_WHOLEPKT);
 	read_unlock(&mrt_lock);
 	kfree_skb(skb);
 	return NETDEV_TX_OK;
@@ -274,17 +290,17 @@ failure:
  *	@notify: Set to 1, if the caller is a notifier_call
  */
 
-static int vif_delete(struct net *net, int vifi, int notify,
+static int vif_delete(struct mr_table *mrt, int vifi, int notify,
 		      struct list_head *head)
 {
 	struct vif_device *v;
 	struct net_device *dev;
 	struct in_device *in_dev;
 
-	if (vifi < 0 || vifi >= net->ipv4.maxvif)
+	if (vifi < 0 || vifi >= mrt->maxvif)
 		return -EADDRNOTAVAIL;
 
-	v = &net->ipv4.vif_table[vifi];
+	v = &mrt->vif_table[vifi];
 
 	write_lock_bh(&mrt_lock);
 	dev = v->dev;
@@ -296,17 +312,17 @@ static int vif_delete(struct net *net, int vifi, int notify,
 	}
 
 #ifdef CONFIG_IP_PIMSM
-	if (vifi == net->ipv4.mroute_reg_vif_num)
-		net->ipv4.mroute_reg_vif_num = -1;
+	if (vifi == mrt->mroute_reg_vif_num)
+		mrt->mroute_reg_vif_num = -1;
 #endif
 
-	if (vifi+1 == net->ipv4.maxvif) {
+	if (vifi+1 == mrt->maxvif) {
 		int tmp;
 		for (tmp=vifi-1; tmp>=0; tmp--) {
-			if (VIF_EXISTS(net, tmp))
+			if (VIF_EXISTS(mrt, tmp))
 				break;
 		}
-		net->ipv4.maxvif = tmp+1;
+		mrt->maxvif = tmp+1;
 	}
 
 	write_unlock_bh(&mrt_lock);
@@ -334,12 +350,13 @@ static inline void ipmr_cache_free(struct mfc_cache *c)
    and reporting error to netlink readers.
  */
 
-static void ipmr_destroy_unres(struct net *net, struct mfc_cache *c)
+static void ipmr_destroy_unres(struct mr_table *mrt, struct mfc_cache *c)
 {
+	struct net *net = NULL; //mrt->net;
 	struct sk_buff *skb;
 	struct nlmsgerr *e;
 
-	atomic_dec(&net->ipv4.cache_resolve_queue_len);
+	atomic_dec(&mrt->cache_resolve_queue_len);
 
 	while ((skb = skb_dequeue(&c->mfc_un.unres.unresolved))) {
 		if (ip_hdr(skb)->version == 0) {
@@ -364,23 +381,23 @@ static void ipmr_destroy_unres(struct net *net, struct mfc_cache *c)
 
 static void ipmr_expire_process(unsigned long arg)
 {
-	struct net *net = (struct net *)arg;
+	struct mr_table *mrt = (struct mr_table *)arg;
 	unsigned long now;
 	unsigned long expires;
 	struct mfc_cache *c, *next;
 
 	if (!spin_trylock(&mfc_unres_lock)) {
-		mod_timer(&net->ipv4.ipmr_expire_timer, jiffies+HZ/10);
+		mod_timer(&mrt->ipmr_expire_timer, jiffies+HZ/10);
 		return;
 	}
 
-	if (list_empty(&net->ipv4.mfc_unres_queue))
+	if (list_empty(&mrt->mfc_unres_queue))
 		goto out;
 
 	now = jiffies;
 	expires = 10*HZ;
 
-	list_for_each_entry_safe(c, next, &net->ipv4.mfc_unres_queue, list) {
+	list_for_each_entry_safe(c, next, &mrt->mfc_unres_queue, list) {
 		if (time_after(c->mfc_un.unres.expires, now)) {
 			unsigned long interval = c->mfc_un.unres.expires - now;
 			if (interval < expires)
@@ -389,11 +406,11 @@ static void ipmr_expire_process(unsigned long arg)
 		}
 
 		list_del(&c->list);
-		ipmr_destroy_unres(net, c);
+		ipmr_destroy_unres(mrt, c);
 	}
 
-	if (!list_empty(&net->ipv4.mfc_unres_queue))
-		mod_timer(&net->ipv4.ipmr_expire_timer, jiffies + expires);
+	if (!list_empty(&mrt->mfc_unres_queue))
+		mod_timer(&mrt->ipmr_expire_timer, jiffies + expires);
 
 out:
 	spin_unlock(&mfc_unres_lock);
@@ -401,7 +418,7 @@ out:
 
 /* Fill oifs list. It is called under write locked mrt_lock. */
 
-static void ipmr_update_thresholds(struct net *net, struct mfc_cache *cache,
+static void ipmr_update_thresholds(struct mr_table *mrt, struct mfc_cache *cache,
 				   unsigned char *ttls)
 {
 	int vifi;
@@ -410,8 +427,8 @@ static void ipmr_update_thresholds(struct net *net, struct mfc_cache *cache,
 	cache->mfc_un.res.maxvif = 0;
 	memset(cache->mfc_un.res.ttls, 255, MAXVIFS);
 
-	for (vifi = 0; vifi < net->ipv4.maxvif; vifi++) {
-		if (VIF_EXISTS(net, vifi) &&
+	for (vifi = 0; vifi < mrt->maxvif; vifi++) {
+		if (VIF_EXISTS(mrt, vifi) &&
 		    ttls[vifi] && ttls[vifi] < 255) {
 			cache->mfc_un.res.ttls[vifi] = ttls[vifi];
 			if (cache->mfc_un.res.minvif > vifi)
@@ -422,16 +439,17 @@ static void ipmr_update_thresholds(struct net *net, struct mfc_cache *cache,
 	}
 }
 
-static int vif_add(struct net *net, struct vifctl *vifc, int mrtsock)
+static int vif_add(struct net *net, struct mr_table *mrt,
+		   struct vifctl *vifc, int mrtsock)
 {
 	int vifi = vifc->vifc_vifi;
-	struct vif_device *v = &net->ipv4.vif_table[vifi];
+	struct vif_device *v = &mrt->vif_table[vifi];
 	struct net_device *dev;
 	struct in_device *in_dev;
 	int err;
 
 	/* Is vif busy ? */
-	if (VIF_EXISTS(net, vifi))
+	if (VIF_EXISTS(mrt, vifi))
 		return -EADDRINUSE;
 
 	switch (vifc->vifc_flags) {
@@ -441,7 +459,7 @@ static int vif_add(struct net *net, struct vifctl *vifc, int mrtsock)
 		 * Special Purpose VIF in PIM
 		 * All the packets will be sent to the daemon
 		 */
-		if (net->ipv4.mroute_reg_vif_num >= 0)
+		if (mrt->mroute_reg_vif_num >= 0)
 			return -EADDRINUSE;
 		dev = ipmr_reg_vif(net);
 		if (!dev)
@@ -519,22 +537,22 @@ static int vif_add(struct net *net, struct vifctl *vifc, int mrtsock)
 	v->dev = dev;
 #ifdef CONFIG_IP_PIMSM
 	if (v->flags&VIFF_REGISTER)
-		net->ipv4.mroute_reg_vif_num = vifi;
+		mrt->mroute_reg_vif_num = vifi;
 #endif
-	if (vifi+1 > net->ipv4.maxvif)
-		net->ipv4.maxvif = vifi+1;
+	if (vifi+1 > mrt->maxvif)
+		mrt->maxvif = vifi+1;
 	write_unlock_bh(&mrt_lock);
 	return 0;
 }
 
-static struct mfc_cache *ipmr_cache_find(struct net *net,
+static struct mfc_cache *ipmr_cache_find(struct mr_table *mrt,
 					 __be32 origin,
 					 __be32 mcastgrp)
 {
 	int line = MFC_HASH(mcastgrp, origin);
 	struct mfc_cache *c;
 
-	list_for_each_entry(c, &net->ipv4.mfc_cache_array[line], list) {
+	list_for_each_entry(c, &mrt->mfc_cache_array[line], list) {
 		if (c->mfc_origin == origin && c->mfc_mcastgrp == mcastgrp)
 			return c;
 	}
@@ -567,8 +585,8 @@ static struct mfc_cache *ipmr_cache_alloc_unres(void)
  *	A cache entry has gone into a resolved state from queued
  */
 
-static void ipmr_cache_resolve(struct net *net, struct mfc_cache *uc,
-			       struct mfc_cache *c)
+static void ipmr_cache_resolve(struct net *net, struct mr_table *mrt,
+			       struct mfc_cache *uc, struct mfc_cache *c)
 {
 	struct sk_buff *skb;
 	struct nlmsgerr *e;
@@ -581,7 +599,7 @@ static void ipmr_cache_resolve(struct net *net, struct mfc_cache *uc,
 		if (ip_hdr(skb)->version == 0) {
 			struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct iphdr));
 
-			if (ipmr_fill_mroute(net, skb, c, NLMSG_DATA(nlh)) > 0) {
+			if (ipmr_fill_mroute(mrt, skb, c, NLMSG_DATA(nlh)) > 0) {
 				nlh->nlmsg_len = (skb_tail_pointer(skb) -
 						  (u8 *)nlh);
 			} else {
@@ -595,7 +613,7 @@ static void ipmr_cache_resolve(struct net *net, struct mfc_cache *uc,
 
 			rtnl_unicast(skb, net, NETLINK_CB(skb).pid);
 		} else
-			ip_mr_forward(net, skb, c, 0);
+			ip_mr_forward(net, mrt, skb, c, 0);
 	}
 }
 
@@ -606,7 +624,7 @@ static void ipmr_cache_resolve(struct net *net, struct mfc_cache *uc,
  *	Called under mrt_lock.
  */
 
-static int ipmr_cache_report(struct net *net,
+static int ipmr_cache_report(struct mr_table *mrt,
 			     struct sk_buff *pkt, vifi_t vifi, int assert)
 {
 	struct sk_buff *skb;
@@ -639,7 +657,7 @@ static int ipmr_cache_report(struct net *net,
 		memcpy(msg, skb_network_header(pkt), sizeof(struct iphdr));
 		msg->im_msgtype = IGMPMSG_WHOLEPKT;
 		msg->im_mbz = 0;
-		msg->im_vif = net->ipv4.mroute_reg_vif_num;
+		msg->im_vif = mrt->mroute_reg_vif_num;
 		ip_hdr(skb)->ihl = sizeof(struct iphdr) >> 2;
 		ip_hdr(skb)->tot_len = htons(ntohs(ip_hdr(pkt)->tot_len) +
 					     sizeof(struct iphdr));
@@ -671,7 +689,7 @@ static int ipmr_cache_report(struct net *net,
 	skb->transport_header = skb->network_header;
 	}
 
-	if (net->ipv4.mroute_sk == NULL) {
+	if (mrt->mroute_sk == NULL) {
 		kfree_skb(skb);
 		return -EINVAL;
 	}
@@ -679,7 +697,7 @@ static int ipmr_cache_report(struct net *net,
 	/*
 	 *	Deliver to mrouted
 	 */
-	ret = sock_queue_rcv_skb(net->ipv4.mroute_sk, skb);
+	ret = sock_queue_rcv_skb(mrt->mroute_sk, skb);
 	if (ret < 0) {
 		if (net_ratelimit())
 			printk(KERN_WARNING "mroute: pending queue full, dropping entries.\n");
@@ -694,7 +712,7 @@ static int ipmr_cache_report(struct net *net,
  */
 
 static int
-ipmr_cache_unresolved(struct net *net, vifi_t vifi, struct sk_buff *skb)
+ipmr_cache_unresolved(struct mr_table *mrt, vifi_t vifi, struct sk_buff *skb)
 {
 	bool found = false;
 	int err;
@@ -702,7 +720,7 @@ ipmr_cache_unresolved(struct net *net, vifi_t vifi, struct sk_buff *skb)
 	const struct iphdr *iph = ip_hdr(skb);
 
 	spin_lock_bh(&mfc_unres_lock);
-	list_for_each_entry(c, &net->ipv4.mfc_unres_queue, list) {
+	list_for_each_entry(c, &mrt->mfc_unres_queue, list) {
 		if (c->mfc_mcastgrp == iph->daddr &&
 		    c->mfc_origin == iph->saddr) {
 			found = true;
@@ -715,7 +733,7 @@ ipmr_cache_unresolved(struct net *net, vifi_t vifi, struct sk_buff *skb)
 		 *	Create a new entry if allowable
 		 */
 
-		if (atomic_read(&net->ipv4.cache_resolve_queue_len) >= 10 ||
+		if (atomic_read(&mrt->cache_resolve_queue_len) >= 10 ||
 		    (c = ipmr_cache_alloc_unres()) == NULL) {
 			spin_unlock_bh(&mfc_unres_lock);
 
@@ -733,7 +751,7 @@ ipmr_cache_unresolved(struct net *net, vifi_t vifi, struct sk_buff *skb)
 		/*
 		 *	Reflect first query at mrouted.
 		 */
-		err = ipmr_cache_report(net, skb, vifi, IGMPMSG_NOCACHE);
+		err = ipmr_cache_report(mrt, skb, vifi, IGMPMSG_NOCACHE);
 		if (err < 0) {
 			/* If the report failed throw the cache entry
 			   out - Brad Parker
@@ -745,10 +763,10 @@ ipmr_cache_unresolved(struct net *net, vifi_t vifi, struct sk_buff *skb)
 			return err;
 		}
 
-		atomic_inc(&net->ipv4.cache_resolve_queue_len);
-		list_add(&c->list, &net->ipv4.mfc_unres_queue);
+		atomic_inc(&mrt->cache_resolve_queue_len);
+		list_add(&c->list, &mrt->mfc_unres_queue);
 
-		mod_timer(&net->ipv4.ipmr_expire_timer, c->mfc_un.unres.expires);
+		mod_timer(&mrt->ipmr_expire_timer, c->mfc_un.unres.expires);
 	}
 
 	/*
@@ -770,14 +788,14 @@ ipmr_cache_unresolved(struct net *net, vifi_t vifi, struct sk_buff *skb)
  *	MFC cache manipulation by user space mroute daemon
  */
 
-static int ipmr_mfc_delete(struct net *net, struct mfcctl *mfc)
+static int ipmr_mfc_delete(struct mr_table *mrt, struct mfcctl *mfc)
 {
 	int line;
 	struct mfc_cache *c, *next;
 
 	line = MFC_HASH(mfc->mfcc_mcastgrp.s_addr, mfc->mfcc_origin.s_addr);
 
-	list_for_each_entry_safe(c, next, &net->ipv4.mfc_cache_array[line], list) {
+	list_for_each_entry_safe(c, next, &mrt->mfc_cache_array[line], list) {
 		if (c->mfc_origin == mfc->mfcc_origin.s_addr &&
 		    c->mfc_mcastgrp == mfc->mfcc_mcastgrp.s_addr) {
 			write_lock_bh(&mrt_lock);
@@ -791,7 +809,8 @@ static int ipmr_mfc_delete(struct net *net, struct mfcctl *mfc)
 	return -ENOENT;
 }
 
-static int ipmr_mfc_add(struct net *net, struct mfcctl *mfc, int mrtsock)
+static int ipmr_mfc_add(struct net *net, struct mr_table *mrt,
+			struct mfcctl *mfc, int mrtsock)
 {
 	bool found = false;
 	int line;
@@ -802,7 +821,7 @@ static int ipmr_mfc_add(struct net *net, struct mfcctl *mfc, int mrtsock)
 
 	line = MFC_HASH(mfc->mfcc_mcastgrp.s_addr, mfc->mfcc_origin.s_addr);
 
-	list_for_each_entry(c, &net->ipv4.mfc_cache_array[line], list) {
+	list_for_each_entry(c, &mrt->mfc_cache_array[line], list) {
 		if (c->mfc_origin == mfc->mfcc_origin.s_addr &&
 		    c->mfc_mcastgrp == mfc->mfcc_mcastgrp.s_addr) {
 			found = true;
@@ -813,7 +832,7 @@ static int ipmr_mfc_add(struct net *net, struct mfcctl *mfc, int mrtsock)
 	if (found) {
 		write_lock_bh(&mrt_lock);
 		c->mfc_parent = mfc->mfcc_parent;
-		ipmr_update_thresholds(net, c, mfc->mfcc_ttls);
+		ipmr_update_thresholds(mrt, c, mfc->mfcc_ttls);
 		if (!mrtsock)
 			c->mfc_flags |= MFC_STATIC;
 		write_unlock_bh(&mrt_lock);
@@ -830,12 +849,12 @@ static int ipmr_mfc_add(struct net *net, struct mfcctl *mfc, int mrtsock)
 	c->mfc_origin = mfc->mfcc_origin.s_addr;
 	c->mfc_mcastgrp = mfc->mfcc_mcastgrp.s_addr;
 	c->mfc_parent = mfc->mfcc_parent;
-	ipmr_update_thresholds(net, c, mfc->mfcc_ttls);
+	ipmr_update_thresholds(mrt, c, mfc->mfcc_ttls);
 	if (!mrtsock)
 		c->mfc_flags |= MFC_STATIC;
 
 	write_lock_bh(&mrt_lock);
-	list_add(&c->list, &net->ipv4.mfc_cache_array[line]);
+	list_add(&c->list, &mrt->mfc_cache_array[line]);
 	write_unlock_bh(&mrt_lock);
 
 	/*
@@ -843,20 +862,20 @@ static int ipmr_mfc_add(struct net *net, struct mfcctl *mfc, int mrtsock)
 	 *	need to send on the frames and tidy up.
 	 */
 	spin_lock_bh(&mfc_unres_lock);
-	list_for_each_entry(uc, &net->ipv4.mfc_unres_queue, list) {
+	list_for_each_entry(uc, &mrt->mfc_unres_queue, list) {
 		if (uc->mfc_origin == c->mfc_origin &&
 		    uc->mfc_mcastgrp == c->mfc_mcastgrp) {
 			list_del(&uc->list);
-			atomic_dec(&net->ipv4.cache_resolve_queue_len);
+			atomic_dec(&mrt->cache_resolve_queue_len);
 			break;
 		}
 	}
-	if (list_empty(&net->ipv4.mfc_unres_queue))
-		del_timer(&net->ipv4.ipmr_expire_timer);
+	if (list_empty(&mrt->mfc_unres_queue))
+		del_timer(&mrt->ipmr_expire_timer);
 	spin_unlock_bh(&mfc_unres_lock);
 
 	if (uc) {
-		ipmr_cache_resolve(net, uc, c);
+		ipmr_cache_resolve(net, mrt, uc, c);
 		ipmr_cache_free(uc);
 	}
 	return 0;
@@ -866,7 +885,7 @@ static int ipmr_mfc_add(struct net *net, struct mfcctl *mfc, int mrtsock)
  *	Close the multicast socket, and clear the vif tables etc
  */
 
-static void mroute_clean_tables(struct net *net)
+static void mroute_clean_tables(struct mr_table *mrt)
 {
 	int i;
 	LIST_HEAD(list);
@@ -875,9 +894,9 @@ static void mroute_clean_tables(struct net *net)
 	/*
 	 *	Shut down all active vif entries
 	 */
-	for (i = 0; i < net->ipv4.maxvif; i++) {
-		if (!(net->ipv4.vif_table[i].flags&VIFF_STATIC))
-			vif_delete(net, i, 0, &list);
+	for (i = 0; i < mrt->maxvif; i++) {
+		if (!(mrt->vif_table[i].flags&VIFF_STATIC))
+			vif_delete(mrt, i, 0, &list);
 	}
 	unregister_netdevice_many(&list);
 
@@ -885,7 +904,7 @@ static void mroute_clean_tables(struct net *net)
 	 *	Wipe the cache
 	 */
 	for (i = 0; i < MFC_LINES; i++) {
-		list_for_each_entry_safe(c, next, &net->ipv4.mfc_cache_array[i], list) {
+		list_for_each_entry_safe(c, next, &mrt->mfc_cache_array[i], list) {
 			if (c->mfc_flags&MFC_STATIC)
 				continue;
 			write_lock_bh(&mrt_lock);
@@ -896,11 +915,11 @@ static void mroute_clean_tables(struct net *net)
 		}
 	}
 
-	if (atomic_read(&net->ipv4.cache_resolve_queue_len) != 0) {
+	if (atomic_read(&mrt->cache_resolve_queue_len) != 0) {
 		spin_lock_bh(&mfc_unres_lock);
-		list_for_each_entry_safe(c, next, &net->ipv4.mfc_unres_queue, list) {
+		list_for_each_entry_safe(c, next, &mrt->mfc_unres_queue, list) {
 			list_del(&c->list);
-			ipmr_destroy_unres(net, c);
+			ipmr_destroy_unres(mrt, c);
 		}
 		spin_unlock_bh(&mfc_unres_lock);
 	}
@@ -909,16 +928,17 @@ static void mroute_clean_tables(struct net *net)
 static void mrtsock_destruct(struct sock *sk)
 {
 	struct net *net = sock_net(sk);
+	struct mr_table *mrt = net->ipv4.mrt;
 
 	rtnl_lock();
-	if (sk == net->ipv4.mroute_sk) {
+	if (sk == mrt->mroute_sk) {
 		IPV4_DEVCONF_ALL(net, MC_FORWARDING)--;
 
 		write_lock_bh(&mrt_lock);
-		net->ipv4.mroute_sk = NULL;
+		mrt->mroute_sk = NULL;
 		write_unlock_bh(&mrt_lock);
 
-		mroute_clean_tables(net);
+		mroute_clean_tables(mrt);
 	}
 	rtnl_unlock();
 }
@@ -936,9 +956,10 @@ int ip_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, unsi
 	struct vifctl vif;
 	struct mfcctl mfc;
 	struct net *net = sock_net(sk);
+	struct mr_table *mrt = net->ipv4.mrt;
 
 	if (optname != MRT_INIT) {
-		if (sk != net->ipv4.mroute_sk && !capable(CAP_NET_ADMIN))
+		if (sk != mrt->mroute_sk && !capable(CAP_NET_ADMIN))
 			return -EACCES;
 	}
 
@@ -951,7 +972,7 @@ int ip_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, unsi
 			return -ENOPROTOOPT;
 
 		rtnl_lock();
-		if (net->ipv4.mroute_sk) {
+		if (mrt->mroute_sk) {
 			rtnl_unlock();
 			return -EADDRINUSE;
 		}
@@ -959,7 +980,7 @@ int ip_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, unsi
 		ret = ip_ra_control(sk, 1, mrtsock_destruct);
 		if (ret == 0) {
 			write_lock_bh(&mrt_lock);
-			net->ipv4.mroute_sk = sk;
+			mrt->mroute_sk = sk;
 			write_unlock_bh(&mrt_lock);
 
 			IPV4_DEVCONF_ALL(net, MC_FORWARDING)++;
@@ -967,7 +988,7 @@ int ip_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, unsi
 		rtnl_unlock();
 		return ret;
 	case MRT_DONE:
-		if (sk != net->ipv4.mroute_sk)
+		if (sk != mrt->mroute_sk)
 			return -EACCES;
 		return ip_ra_control(sk, 0, NULL);
 	case MRT_ADD_VIF:
@@ -980,9 +1001,9 @@ int ip_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, unsi
 			return -ENFILE;
 		rtnl_lock();
 		if (optname == MRT_ADD_VIF) {
-			ret = vif_add(net, &vif, sk == net->ipv4.mroute_sk);
+			ret = vif_add(net, mrt, &vif, sk == mrt->mroute_sk);
 		} else {
-			ret = vif_delete(net, vif.vifc_vifi, 0, NULL);
+			ret = vif_delete(mrt, vif.vifc_vifi, 0, NULL);
 		}
 		rtnl_unlock();
 		return ret;
@@ -999,9 +1020,9 @@ int ip_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, unsi
 			return -EFAULT;
 		rtnl_lock();
 		if (optname == MRT_DEL_MFC)
-			ret = ipmr_mfc_delete(net, &mfc);
+			ret = ipmr_mfc_delete(mrt, &mfc);
 		else
-			ret = ipmr_mfc_add(net, &mfc, sk == net->ipv4.mroute_sk);
+			ret = ipmr_mfc_add(net, mrt, &mfc, sk == mrt->mroute_sk);
 		rtnl_unlock();
 		return ret;
 		/*
@@ -1012,7 +1033,7 @@ int ip_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, unsi
 		int v;
 		if (get_user(v,(int __user *)optval))
 			return -EFAULT;
-		net->ipv4.mroute_do_assert = (v) ? 1 : 0;
+		mrt->mroute_do_assert = (v) ? 1 : 0;
 		return 0;
 	}
 #ifdef CONFIG_IP_PIMSM
@@ -1026,9 +1047,9 @@ int ip_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, unsi
 
 		rtnl_lock();
 		ret = 0;
-		if (v != net->ipv4.mroute_do_pim) {
-			net->ipv4.mroute_do_pim = v;
-			net->ipv4.mroute_do_assert = v;
+		if (v != mrt->mroute_do_pim) {
+			mrt->mroute_do_pim = v;
+			mrt->mroute_do_assert = v;
 		}
 		rtnl_unlock();
 		return ret;
@@ -1052,6 +1073,7 @@ int ip_mroute_getsockopt(struct sock *sk, int optname, char __user *optval, int
 	int olr;
 	int val;
 	struct net *net = sock_net(sk);
+	struct mr_table *mrt = net->ipv4.mrt;
 
 	if (optname != MRT_VERSION &&
 #ifdef CONFIG_IP_PIMSM
@@ -1073,10 +1095,10 @@ int ip_mroute_getsockopt(struct sock *sk, int optname, char __user *optval, int
 		val = 0x0305;
 #ifdef CONFIG_IP_PIMSM
 	else if (optname == MRT_PIM)
-		val = net->ipv4.mroute_do_pim;
+		val = mrt->mroute_do_pim;
 #endif
 	else
-		val = net->ipv4.mroute_do_assert;
+		val = mrt->mroute_do_assert;
 	if (copy_to_user(optval, &val, olr))
 		return -EFAULT;
 	return 0;
@@ -1093,16 +1115,17 @@ int ipmr_ioctl(struct sock *sk, int cmd, void __user *arg)
 	struct vif_device *vif;
 	struct mfc_cache *c;
 	struct net *net = sock_net(sk);
+	struct mr_table *mrt = net->ipv4.mrt;
 
 	switch (cmd) {
 	case SIOCGETVIFCNT:
 		if (copy_from_user(&vr, arg, sizeof(vr)))
 			return -EFAULT;
-		if (vr.vifi >= net->ipv4.maxvif)
+		if (vr.vifi >= mrt->maxvif)
 			return -EINVAL;
 		read_lock(&mrt_lock);
-		vif = &net->ipv4.vif_table[vr.vifi];
-		if (VIF_EXISTS(net, vr.vifi)) {
+		vif = &mrt->vif_table[vr.vifi];
+		if (VIF_EXISTS(mrt, vr.vifi)) {
 			vr.icount = vif->pkt_in;
 			vr.ocount = vif->pkt_out;
 			vr.ibytes = vif->bytes_in;
@@ -1120,7 +1143,7 @@ int ipmr_ioctl(struct sock *sk, int cmd, void __user *arg)
 			return -EFAULT;
 
 		read_lock(&mrt_lock);
-		c = ipmr_cache_find(net, sr.src.s_addr, sr.grp.s_addr);
+		c = ipmr_cache_find(mrt, sr.src.s_addr, sr.grp.s_addr);
 		if (c) {
 			sr.pktcnt = c->mfc_un.res.pkt;
 			sr.bytecnt = c->mfc_un.res.bytes;
@@ -1143,16 +1166,17 @@ static int ipmr_device_event(struct notifier_block *this, unsigned long event, v
 {
 	struct net_device *dev = ptr;
 	struct net *net = dev_net(dev);
+	struct mr_table *mrt = net->ipv4.mrt;
 	struct vif_device *v;
 	int ct;
 	LIST_HEAD(list);
 
 	if (event != NETDEV_UNREGISTER)
 		return NOTIFY_DONE;
-	v = &net->ipv4.vif_table[0];
-	for (ct = 0; ct < net->ipv4.maxvif; ct++, v++) {
+	v = &mrt->vif_table[0];
+	for (ct = 0; ct < mrt->maxvif; ct++, v++) {
 		if (v->dev == dev)
-			vif_delete(net, ct, 1, &list);
+			vif_delete(mrt, ct, 1, &list);
 	}
 	unregister_netdevice_many(&list);
 	return NOTIFY_DONE;
@@ -1211,11 +1235,11 @@ static inline int ipmr_forward_finish(struct sk_buff *skb)
  *	Processing handlers for ipmr_forward
  */
 
-static void ipmr_queue_xmit(struct net *net, struct sk_buff *skb,
-			    struct mfc_cache *c, int vifi)
+static void ipmr_queue_xmit(struct net *net, struct mr_table *mrt,
+			    struct sk_buff *skb, struct mfc_cache *c, int vifi)
 {
 	const struct iphdr *iph = ip_hdr(skb);
-	struct vif_device *vif = &net->ipv4.vif_table[vifi];
+	struct vif_device *vif = &mrt->vif_table[vifi];
 	struct net_device *dev;
 	struct rtable *rt;
 	int    encap = 0;
@@ -1229,7 +1253,7 @@ static void ipmr_queue_xmit(struct net *net, struct sk_buff *skb,
 		vif->bytes_out += skb->len;
 		vif->dev->stats.tx_bytes += skb->len;
 		vif->dev->stats.tx_packets++;
-		ipmr_cache_report(net, skb, vifi, IGMPMSG_WHOLEPKT);
+		ipmr_cache_report(mrt, skb, vifi, IGMPMSG_WHOLEPKT);
 		goto out_free;
 	}
 #endif
@@ -1312,12 +1336,12 @@ out_free:
 	return;
 }
 
-static int ipmr_find_vif(struct net_device *dev)
+static int ipmr_find_vif(struct mr_table *mrt, struct net_device *dev)
 {
-	struct net *net = dev_net(dev);
 	int ct;
-	for (ct = net->ipv4.maxvif-1; ct >= 0; ct--) {
-		if (net->ipv4.vif_table[ct].dev == dev)
+
+	for (ct = mrt->maxvif-1; ct >= 0; ct--) {
+		if (mrt->vif_table[ct].dev == dev)
 			break;
 	}
 	return ct;
@@ -1325,8 +1349,9 @@ static int ipmr_find_vif(struct net_device *dev)
 
 /* "local" means that we should preserve one skb (for local delivery) */
 
-static int ip_mr_forward(struct net *net, struct sk_buff *skb,
-			 struct mfc_cache *cache, int local)
+static int ip_mr_forward(struct net *net, struct mr_table *mrt,
+			 struct sk_buff *skb, struct mfc_cache *cache,
+			 int local)
 {
 	int psend = -1;
 	int vif, ct;
@@ -1338,7 +1363,7 @@ static int ip_mr_forward(struct net *net, struct sk_buff *skb,
 	/*
 	 * Wrong interface: drop packet and (maybe) send PIM assert.
 	 */
-	if (net->ipv4.vif_table[vif].dev != skb->dev) {
+	if (mrt->vif_table[vif].dev != skb->dev) {
 		int true_vifi;
 
 		if (skb_rtable(skb)->fl.iif == 0) {
@@ -1357,26 +1382,26 @@ static int ip_mr_forward(struct net *net, struct sk_buff *skb,
 		}
 
 		cache->mfc_un.res.wrong_if++;
-		true_vifi = ipmr_find_vif(skb->dev);
+		true_vifi = ipmr_find_vif(mrt, skb->dev);
 
-		if (true_vifi >= 0 && net->ipv4.mroute_do_assert &&
+		if (true_vifi >= 0 && mrt->mroute_do_assert &&
 		    /* pimsm uses asserts, when switching from RPT to SPT,
 		       so that we cannot check that packet arrived on an oif.
 		       It is bad, but otherwise we would need to move pretty
 		       large chunk of pimd to kernel. Ough... --ANK
 		     */
-		    (net->ipv4.mroute_do_pim ||
+		    (mrt->mroute_do_pim ||
 		     cache->mfc_un.res.ttls[true_vifi] < 255) &&
 		    time_after(jiffies,
 			       cache->mfc_un.res.last_assert + MFC_ASSERT_THRESH)) {
 			cache->mfc_un.res.last_assert = jiffies;
-			ipmr_cache_report(net, skb, true_vifi, IGMPMSG_WRONGVIF);
+			ipmr_cache_report(mrt, skb, true_vifi, IGMPMSG_WRONGVIF);
 		}
 		goto dont_forward;
 	}
 
-	net->ipv4.vif_table[vif].pkt_in++;
-	net->ipv4.vif_table[vif].bytes_in += skb->len;
+	mrt->vif_table[vif].pkt_in++;
+	mrt->vif_table[vif].bytes_in += skb->len;
 
 	/*
 	 *	Forward the frame
@@ -1386,7 +1411,8 @@ static int ip_mr_forward(struct net *net, struct sk_buff *skb,
 			if (psend != -1) {
 				struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
 				if (skb2)
-					ipmr_queue_xmit(net, skb2, cache, psend);
+					ipmr_queue_xmit(net, mrt, skb2, cache,
+							psend);
 			}
 			psend = ct;
 		}
@@ -1395,9 +1421,9 @@ static int ip_mr_forward(struct net *net, struct sk_buff *skb,
 		if (local) {
 			struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
 			if (skb2)
-				ipmr_queue_xmit(net, skb2, cache, psend);
+				ipmr_queue_xmit(net, mrt, skb2, cache, psend);
 		} else {
-			ipmr_queue_xmit(net, skb, cache, psend);
+			ipmr_queue_xmit(net, mrt, skb, cache, psend);
 			return 0;
 		}
 	}
@@ -1417,6 +1443,7 @@ int ip_mr_input(struct sk_buff *skb)
 {
 	struct mfc_cache *cache;
 	struct net *net = dev_net(skb->dev);
+	struct mr_table *mrt = net->ipv4.mrt;
 	int local = skb_rtable(skb)->rt_flags & RTCF_LOCAL;
 
 	/* Packet is looped back after forward, it should not be
@@ -1437,9 +1464,9 @@ int ip_mr_input(struct sk_buff *skb)
 			       that we can forward NO IGMP messages.
 			     */
 			    read_lock(&mrt_lock);
-			    if (net->ipv4.mroute_sk) {
+			    if (mrt->mroute_sk) {
 				    nf_reset(skb);
-				    raw_rcv(net->ipv4.mroute_sk, skb);
+				    raw_rcv(mrt->mroute_sk, skb);
 				    read_unlock(&mrt_lock);
 				    return 0;
 			    }
@@ -1448,7 +1475,7 @@ int ip_mr_input(struct sk_buff *skb)
 	}
 
 	read_lock(&mrt_lock);
-	cache = ipmr_cache_find(net, ip_hdr(skb)->saddr, ip_hdr(skb)->daddr);
+	cache = ipmr_cache_find(mrt, ip_hdr(skb)->saddr, ip_hdr(skb)->daddr);
 
 	/*
 	 *	No usable cache entry
@@ -1466,9 +1493,9 @@ int ip_mr_input(struct sk_buff *skb)
 			skb = skb2;
 		}
 
-		vif = ipmr_find_vif(skb->dev);
+		vif = ipmr_find_vif(mrt, skb->dev);
 		if (vif >= 0) {
-			int err = ipmr_cache_unresolved(net, vif, skb);
+			int err = ipmr_cache_unresolved(mrt, vif, skb);
 			read_unlock(&mrt_lock);
 
 			return err;
@@ -1478,7 +1505,7 @@ int ip_mr_input(struct sk_buff *skb)
 		return -ENODEV;
 	}
 
-	ip_mr_forward(net, skb, cache, local);
+	ip_mr_forward(net, mrt, skb, cache, local);
 
 	read_unlock(&mrt_lock);
 
@@ -1500,6 +1527,7 @@ static int __pim_rcv(struct sk_buff *skb, unsigned int pimlen)
 	struct net_device *reg_dev = NULL;
 	struct iphdr *encap;
 	struct net *net = dev_net(skb->dev);
+	struct mr_table *mrt = net->ipv4.mrt;
 
 	encap = (struct iphdr *)(skb_transport_header(skb) + pimlen);
 	/*
@@ -1514,8 +1542,8 @@ static int __pim_rcv(struct sk_buff *skb, unsigned int pimlen)
 		return 1;
 
 	read_lock(&mrt_lock);
-	if (net->ipv4.mroute_reg_vif_num >= 0)
-		reg_dev = net->ipv4.vif_table[net->ipv4.mroute_reg_vif_num].dev;
+	if (mrt->mroute_reg_vif_num >= 0)
+		reg_dev = mrt->vif_table[mrt->mroute_reg_vif_num].dev;
 	if (reg_dev)
 		dev_hold(reg_dev);
 	read_unlock(&mrt_lock);
@@ -1550,13 +1578,14 @@ int pim_rcv_v1(struct sk_buff * skb)
 {
 	struct igmphdr *pim;
 	struct net *net = dev_net(skb->dev);
+	struct mr_table *mrt = net->ipv4.mrt;
 
 	if (!pskb_may_pull(skb, sizeof(*pim) + sizeof(struct iphdr)))
 		goto drop;
 
 	pim = igmp_hdr(skb);
 
-	if (!net->ipv4.mroute_do_pim ||
+	if (!mrt->mroute_do_pim ||
 	    pim->group != PIM_V1_VERSION || pim->code != PIM_V1_REGISTER)
 		goto drop;
 
@@ -1592,7 +1621,7 @@ drop:
 #endif
 
 static int
-ipmr_fill_mroute(struct net *net, struct sk_buff *skb, struct mfc_cache *c,
+ipmr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb, struct mfc_cache *c,
 		 struct rtmsg *rtm)
 {
 	int ct;
@@ -1604,19 +1633,19 @@ ipmr_fill_mroute(struct net *net, struct sk_buff *skb, struct mfc_cache *c,
 	if (c->mfc_parent > MAXVIFS)
 		return -ENOENT;
 
-	if (VIF_EXISTS(net, c->mfc_parent))
-		RTA_PUT(skb, RTA_IIF, 4, &net->ipv4.vif_table[c->mfc_parent].dev->ifindex);
+	if (VIF_EXISTS(mrt, c->mfc_parent))
+		RTA_PUT(skb, RTA_IIF, 4, &mrt->vif_table[c->mfc_parent].dev->ifindex);
 
 	mp_head = (struct rtattr *)skb_put(skb, RTA_LENGTH(0));
 
 	for (ct = c->mfc_un.res.minvif; ct < c->mfc_un.res.maxvif; ct++) {
-		if (VIF_EXISTS(net, ct) && c->mfc_un.res.ttls[ct] < 255) {
+		if (VIF_EXISTS(mrt, ct) && c->mfc_un.res.ttls[ct] < 255) {
 			if (skb_tailroom(skb) < RTA_ALIGN(RTA_ALIGN(sizeof(*nhp)) + 4))
 				goto rtattr_failure;
 			nhp = (struct rtnexthop *)skb_put(skb, RTA_ALIGN(sizeof(*nhp)));
 			nhp->rtnh_flags = 0;
 			nhp->rtnh_hops = c->mfc_un.res.ttls[ct];
-			nhp->rtnh_ifindex = net->ipv4.vif_table[ct].dev->ifindex;
+			nhp->rtnh_ifindex = mrt->vif_table[ct].dev->ifindex;
 			nhp->rtnh_len = sizeof(*nhp);
 		}
 	}
@@ -1634,11 +1663,12 @@ int ipmr_get_route(struct net *net,
 		   struct sk_buff *skb, struct rtmsg *rtm, int nowait)
 {
 	int err;
+	struct mr_table *mrt = net->ipv4.mrt;
 	struct mfc_cache *cache;
 	struct rtable *rt = skb_rtable(skb);
 
 	read_lock(&mrt_lock);
-	cache = ipmr_cache_find(net, rt->rt_src, rt->rt_dst);
+	cache = ipmr_cache_find(mrt, rt->rt_src, rt->rt_dst);
 
 	if (cache == NULL) {
 		struct sk_buff *skb2;
@@ -1652,7 +1682,7 @@ int ipmr_get_route(struct net *net,
 		}
 
 		dev = skb->dev;
-		if (dev == NULL || (vif = ipmr_find_vif(dev)) < 0) {
+		if (dev == NULL || (vif = ipmr_find_vif(mrt, dev)) < 0) {
 			read_unlock(&mrt_lock);
 			return -ENODEV;
 		}
@@ -1669,14 +1699,14 @@ int ipmr_get_route(struct net *net,
 		iph->saddr = rt->rt_src;
 		iph->daddr = rt->rt_dst;
 		iph->version = 0;
-		err = ipmr_cache_unresolved(net, vif, skb2);
+		err = ipmr_cache_unresolved(mrt, vif, skb2);
 		read_unlock(&mrt_lock);
 		return err;
 	}
 
 	if (!nowait && (rtm->rtm_flags&RTM_F_NOTIFY))
 		cache->mfc_flags |= MFC_NOTIFY;
-	err = ipmr_fill_mroute(net, skb, cache, rtm);
+	err = ipmr_fill_mroute(mrt, skb, cache, rtm);
 	read_unlock(&mrt_lock);
 	return err;
 }
@@ -1694,11 +1724,13 @@ static struct vif_device *ipmr_vif_seq_idx(struct net *net,
 					   struct ipmr_vif_iter *iter,
 					   loff_t pos)
 {
-	for (iter->ct = 0; iter->ct < net->ipv4.maxvif; ++iter->ct) {
-		if (!VIF_EXISTS(net, iter->ct))
+	struct mr_table *mrt = net->ipv4.mrt;
+
+	for (iter->ct = 0; iter->ct < mrt->maxvif; ++iter->ct) {
+		if (!VIF_EXISTS(mrt, iter->ct))
 			continue;
 		if (pos-- == 0)
-			return &net->ipv4.vif_table[iter->ct];
+			return &mrt->vif_table[iter->ct];
 	}
 	return NULL;
 }
@@ -1717,15 +1749,16 @@ static void *ipmr_vif_seq_next(struct seq_file *seq, void *v, loff_t *pos)
 {
 	struct ipmr_vif_iter *iter = seq->private;
 	struct net *net = seq_file_net(seq);
+	struct mr_table *mrt = net->ipv4.mrt;
 
 	++*pos;
 	if (v == SEQ_START_TOKEN)
 		return ipmr_vif_seq_idx(net, iter, 0);
 
-	while (++iter->ct < net->ipv4.maxvif) {
-		if (!VIF_EXISTS(net, iter->ct))
+	while (++iter->ct < mrt->maxvif) {
+		if (!VIF_EXISTS(mrt, iter->ct))
 			continue;
-		return &net->ipv4.vif_table[iter->ct];
+		return &mrt->vif_table[iter->ct];
 	}
 	return NULL;
 }
@@ -1739,6 +1772,7 @@ static void ipmr_vif_seq_stop(struct seq_file *seq, void *v)
 static int ipmr_vif_seq_show(struct seq_file *seq, void *v)
 {
 	struct net *net = seq_file_net(seq);
+	struct mr_table *mrt = net->ipv4.mrt;
 
 	if (v == SEQ_START_TOKEN) {
 		seq_puts(seq,
@@ -1749,7 +1783,7 @@ static int ipmr_vif_seq_show(struct seq_file *seq, void *v)
 
 		seq_printf(seq,
 			   "%2Zd %-10s %8ld %7ld  %8ld %7ld %05X %08X %08X\n",
-			   vif - net->ipv4.vif_table,
+			   vif - mrt->vif_table,
 			   name, vif->bytes_in, vif->pkt_in,
 			   vif->bytes_out, vif->pkt_out,
 			   vif->flags, vif->local, vif->remote);
@@ -1788,11 +1822,12 @@ struct ipmr_mfc_iter {
 static struct mfc_cache *ipmr_mfc_seq_idx(struct net *net,
 					  struct ipmr_mfc_iter *it, loff_t pos)
 {
+	struct mr_table *mrt = net->ipv4.mrt;
 	struct mfc_cache *mfc;
 
 	read_lock(&mrt_lock);
 	for (it->ct = 0; it->ct < MFC_LINES; it->ct++) {
-		it->cache = &net->ipv4.mfc_cache_array[it->ct];
+		it->cache = &mrt->mfc_cache_array[it->ct];
 		list_for_each_entry(mfc, it->cache, list)
 			if (pos-- == 0)
 				return mfc;
@@ -1800,7 +1835,7 @@ static struct mfc_cache *ipmr_mfc_seq_idx(struct net *net,
 	read_unlock(&mrt_lock);
 
 	spin_lock_bh(&mfc_unres_lock);
-	it->cache = &net->ipv4.mfc_unres_queue;
+	it->cache = &mrt->mfc_unres_queue;
 	list_for_each_entry(mfc, it->cache, list)
 		if (pos-- == 0)
 			return mfc;
@@ -1827,6 +1862,7 @@ static void *ipmr_mfc_seq_next(struct seq_file *seq, void *v, loff_t *pos)
 	struct mfc_cache *mfc = v;
 	struct ipmr_mfc_iter *it = seq->private;
 	struct net *net = seq_file_net(seq);
+	struct mr_table *mrt = net->ipv4.mrt;
 
 	++*pos;
 
@@ -1836,13 +1872,13 @@ static void *ipmr_mfc_seq_next(struct seq_file *seq, void *v, loff_t *pos)
 	if (mfc->list.next != it->cache)
 		return list_entry(mfc->list.next, struct mfc_cache, list);
 
-	if (it->cache == &net->ipv4.mfc_unres_queue)
+	if (it->cache == &mrt->mfc_unres_queue)
 		goto end_of_list;
 
-	BUG_ON(it->cache != &net->ipv4.mfc_cache_array[it->ct]);
+	BUG_ON(it->cache != &mrt->mfc_cache_array[it->ct]);
 
 	while (++it->ct < MFC_LINES) {
-		it->cache = &net->ipv4.mfc_cache_array[it->ct];
+		it->cache = &mrt->mfc_cache_array[it->ct];
 		if (list_empty(it->cache))
 			continue;
 		return list_first_entry(it->cache, struct mfc_cache, list);
@@ -1850,7 +1886,7 @@ static void *ipmr_mfc_seq_next(struct seq_file *seq, void *v, loff_t *pos)
 
 	/* exhausted cache_array, show unresolved */
 	read_unlock(&mrt_lock);
-	it->cache = &net->ipv4.mfc_unres_queue;
+	it->cache = &mrt->mfc_unres_queue;
 	it->ct = 0;
 
 	spin_lock_bh(&mfc_unres_lock);
@@ -1868,10 +1904,11 @@ static void ipmr_mfc_seq_stop(struct seq_file *seq, void *v)
 {
 	struct ipmr_mfc_iter *it = seq->private;
 	struct net *net = seq_file_net(seq);
+	struct mr_table *mrt = net->ipv4.mrt;
 
-	if (it->cache == &net->ipv4.mfc_unres_queue)
+	if (it->cache == &mrt->mfc_unres_queue)
 		spin_unlock_bh(&mfc_unres_lock);
-	else if (it->cache == &net->ipv4.mfc_cache_array[it->ct])
+	else if (it->cache == &mrt->mfc_cache_array[it->ct])
 		read_unlock(&mrt_lock);
 }
 
@@ -1879,6 +1916,7 @@ static int ipmr_mfc_seq_show(struct seq_file *seq, void *v)
 {
 	int n;
 	struct net *net = seq_file_net(seq);
+	struct mr_table *mrt = net->ipv4.mrt;
 
 	if (v == SEQ_START_TOKEN) {
 		seq_puts(seq,
@@ -1892,14 +1930,14 @@ static int ipmr_mfc_seq_show(struct seq_file *seq, void *v)
 			   (unsigned long) mfc->mfc_origin,
 			   mfc->mfc_parent);
 
-		if (it->cache != &net->ipv4.mfc_unres_queue) {
+		if (it->cache != &mrt->mfc_unres_queue) {
 			seq_printf(seq, " %8lu %8lu %8lu",
 				   mfc->mfc_un.res.pkt,
 				   mfc->mfc_un.res.bytes,
 				   mfc->mfc_un.res.wrong_if);
 			for (n = mfc->mfc_un.res.minvif;
 			     n < mfc->mfc_un.res.maxvif; n++ ) {
-				if (VIF_EXISTS(net, n) &&
+				if (VIF_EXISTS(mrt, n) &&
 				    mfc->mfc_un.res.ttls[n] < 255)
 					seq_printf(seq,
 					   " %2d:%-3d",
@@ -1951,35 +1989,27 @@ static const struct net_protocol pim_protocol = {
  */
 static int __net_init ipmr_net_init(struct net *net)
 {
+	struct mr_table *mrt;
 	unsigned int i;
 	int err = 0;
 
-	net->ipv4.vif_table = kcalloc(MAXVIFS, sizeof(struct vif_device),
-				      GFP_KERNEL);
-	if (!net->ipv4.vif_table) {
+	mrt = kzalloc(sizeof(*mrt), GFP_KERNEL);
+	if (mrt == NULL) {
 		err = -ENOMEM;
 		goto fail;
 	}
 
 	/* Forwarding cache */
-	net->ipv4.mfc_cache_array = kcalloc(MFC_LINES,
-					    sizeof(struct list_head),
-					    GFP_KERNEL);
-	if (!net->ipv4.mfc_cache_array) {
-		err = -ENOMEM;
-		goto fail_mfc_cache;
-	}
-
 	for (i = 0; i < MFC_LINES; i++)
-		INIT_LIST_HEAD(&net->ipv4.mfc_cache_array[i]);
+		INIT_LIST_HEAD(&mrt->mfc_cache_array[i]);
 
-	INIT_LIST_HEAD(&net->ipv4.mfc_unres_queue);
+	INIT_LIST_HEAD(&mrt->mfc_unres_queue);
 
-	setup_timer(&net->ipv4.ipmr_expire_timer, ipmr_expire_process,
+	setup_timer(&mrt->ipmr_expire_timer, ipmr_expire_process,
 		    (unsigned long)net);
 
 #ifdef CONFIG_IP_PIMSM
-	net->ipv4.mroute_reg_vif_num = -1;
+	mrt->mroute_reg_vif_num = -1;
 #endif
 
 #ifdef CONFIG_PROC_FS
@@ -1989,16 +2019,16 @@ static int __net_init ipmr_net_init(struct net *net)
 	if (!proc_net_fops_create(net, "ip_mr_cache", 0, &ipmr_mfc_fops))
 		goto proc_cache_fail;
 #endif
+
+	net->ipv4.mrt = mrt;
 	return 0;
 
 #ifdef CONFIG_PROC_FS
 proc_cache_fail:
 	proc_net_remove(net, "ip_mr_vif");
 proc_vif_fail:
-	kfree(net->ipv4.mfc_cache_array);
+	kfree(mrt);
 #endif
-fail_mfc_cache:
-	kfree(net->ipv4.vif_table);
 fail:
 	return err;
 }
@@ -2009,8 +2039,7 @@ static void __net_exit ipmr_net_exit(struct net *net)
 	proc_net_remove(net, "ip_mr_cache");
 	proc_net_remove(net, "ip_mr_vif");
 #endif
-	kfree(net->ipv4.mfc_cache_array);
-	kfree(net->ipv4.vif_table);
+	kfree(net->ipv4.mrt);
 }
 
 static struct pernet_operations ipmr_net_ops = {
-- 
cgit v1.2.2


From f0ad0860d01e47a3ffd220564c5c653b3afbe962 Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Tue, 13 Apr 2010 05:03:23 +0000
Subject: ipv4: ipmr: support multiple tables

This patch adds support for multiple independant multicast routing instances,
named "tables".

Userspace multicast routing daemons can bind to a specific table instance by
issuing a setsockopt call using a new option MRT_TABLE. The table number is
stored in the raw socket data and affects all following ipmr setsockopt(),
getsockopt() and ioctl() calls. By default, a single table (RT_TABLE_DEFAULT)
is created with a default routing rule pointing to it. Newly created pimreg
devices have the table number appended ("pimregX"), with the exception of
devices created in the default table, which are named just "pimreg" for
compatibility reasons.

Packets are directed to a specific table instance using routing rules,
similar to how regular routing rules work. Currently iif, oif and mark
are supported as keys, source and destination addresses could be supported
additionally.

Example usage:

- bind pimd/xorp/... to a specific table:

uint32_t table = 123;
setsockopt(fd, IPPROTO_IP, MRT_TABLE, &table, sizeof(table));

- create routing rules directing packets to the new table:

# ip mrule add iif eth0 lookup 123
# ip mrule add oif eth0 lookup 123

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/Kconfig |  14 ++
 net/ipv4/ipmr.c  | 399 ++++++++++++++++++++++++++++++++++++++++++++++---------
 2 files changed, 352 insertions(+), 61 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig
index c9a1c68767ff..be597749c385 100644
--- a/net/ipv4/Kconfig
+++ b/net/ipv4/Kconfig
@@ -250,6 +250,20 @@ config IP_MROUTE
 	  <file:Documentation/networking/multicast.txt>. If you haven't heard
 	  about it, you don't need it.
 
+config IP_MROUTE_MULTIPLE_TABLES
+	bool "IP: multicast policy routing"
+	depends on IP_ADVANCED_ROUTER
+	select FIB_RULES
+	help
+	  Normally, a multicast router runs a userspace daemon and decides
+	  what to do with a multicast packet based on the source and
+	  destination addresses. If you say Y here, the multicast router
+	  will also be able to take interfaces and packet marks into
+	  account and run multiple instances of userspace daemons
+	  simultaneously, each one handling a single table.
+
+	  If unsure, say N.
+
 config IP_PIMSM_V1
 	bool "IP: PIM-SM version 1 support"
 	depends on IP_MROUTE
diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
index 498f4e907d52..5df5fd74c6d1 100644
--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c
@@ -63,12 +63,15 @@
 #include <net/ipip.h>
 #include <net/checksum.h>
 #include <net/netlink.h>
+#include <net/fib_rules.h>
 
 #if defined(CONFIG_IP_PIMSM_V1) || defined(CONFIG_IP_PIMSM_V2)
 #define CONFIG_IP_PIMSM	1
 #endif
 
 struct mr_table {
+	struct list_head	list;
+	u32			id;
 	struct sock		*mroute_sk;
 	struct timer_list	ipmr_expire_timer;
 	struct list_head	mfc_unres_queue;
@@ -83,6 +86,14 @@ struct mr_table {
 #endif
 };
 
+struct ipmr_rule {
+	struct fib_rule		common;
+};
+
+struct ipmr_result {
+	struct mr_table		*mrt;
+};
+
 /* Big lock, protecting vif table, mrt cache and mroute socket state.
    Note that the changes are semaphored via rtnl_lock.
  */
@@ -108,6 +119,7 @@ static DEFINE_SPINLOCK(mfc_unres_lock);
 
 static struct kmem_cache *mrt_cachep __read_mostly;
 
+static struct mr_table *ipmr_new_table(struct net *net, u32 id);
 static int ip_mr_forward(struct net *net, struct mr_table *mrt,
 			 struct sk_buff *skb, struct mfc_cache *cache,
 			 int local);
@@ -115,6 +127,206 @@ static int ipmr_cache_report(struct mr_table *mrt,
 			     struct sk_buff *pkt, vifi_t vifi, int assert);
 static int ipmr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb,
 			    struct mfc_cache *c, struct rtmsg *rtm);
+static void ipmr_expire_process(unsigned long arg);
+
+#ifdef CONFIG_IP_MROUTE_MULTIPLE_TABLES
+#define ipmr_for_each_table(mrt, net) \
+	list_for_each_entry_rcu(mrt, &net->ipv4.mr_tables, list)
+
+static struct mr_table *ipmr_get_table(struct net *net, u32 id)
+{
+	struct mr_table *mrt;
+
+	ipmr_for_each_table(mrt, net) {
+		if (mrt->id == id)
+			return mrt;
+	}
+	return NULL;
+}
+
+static int ipmr_fib_lookup(struct net *net, struct flowi *flp,
+			   struct mr_table **mrt)
+{
+	struct ipmr_result res;
+	struct fib_lookup_arg arg = { .result = &res, };
+	int err;
+
+	err = fib_rules_lookup(net->ipv4.mr_rules_ops, flp, 0, &arg);
+	if (err < 0)
+		return err;
+	*mrt = res.mrt;
+	return 0;
+}
+
+static int ipmr_rule_action(struct fib_rule *rule, struct flowi *flp,
+			    int flags, struct fib_lookup_arg *arg)
+{
+	struct ipmr_result *res = arg->result;
+	struct mr_table *mrt;
+
+	switch (rule->action) {
+	case FR_ACT_TO_TBL:
+		break;
+	case FR_ACT_UNREACHABLE:
+		return -ENETUNREACH;
+	case FR_ACT_PROHIBIT:
+		return -EACCES;
+	case FR_ACT_BLACKHOLE:
+	default:
+		return -EINVAL;
+	}
+
+	mrt = ipmr_get_table(rule->fr_net, rule->table);
+	if (mrt == NULL)
+		return -EAGAIN;
+	res->mrt = mrt;
+	return 0;
+}
+
+static int ipmr_rule_match(struct fib_rule *rule, struct flowi *fl, int flags)
+{
+	return 1;
+}
+
+static const struct nla_policy ipmr_rule_policy[FRA_MAX + 1] = {
+	FRA_GENERIC_POLICY,
+};
+
+static int ipmr_rule_configure(struct fib_rule *rule, struct sk_buff *skb,
+			       struct fib_rule_hdr *frh, struct nlattr **tb)
+{
+	return 0;
+}
+
+static int ipmr_rule_compare(struct fib_rule *rule, struct fib_rule_hdr *frh,
+			     struct nlattr **tb)
+{
+	return 1;
+}
+
+static int ipmr_rule_fill(struct fib_rule *rule, struct sk_buff *skb,
+			  struct fib_rule_hdr *frh)
+{
+	frh->dst_len = 0;
+	frh->src_len = 0;
+	frh->tos     = 0;
+	return 0;
+}
+
+static struct fib_rules_ops ipmr_rules_ops_template = {
+	.family		= FIB_RULES_IPMR,
+	.rule_size	= sizeof(struct ipmr_rule),
+	.addr_size	= sizeof(u32),
+	.action		= ipmr_rule_action,
+	.match		= ipmr_rule_match,
+	.configure	= ipmr_rule_configure,
+	.compare	= ipmr_rule_compare,
+	.default_pref	= fib_default_rule_pref,
+	.fill		= ipmr_rule_fill,
+	.nlgroup	= RTNLGRP_IPV4_RULE,
+	.policy		= ipmr_rule_policy,
+	.owner		= THIS_MODULE,
+};
+
+static int __net_init ipmr_rules_init(struct net *net)
+{
+	struct fib_rules_ops *ops;
+	struct mr_table *mrt;
+	int err;
+
+	ops = fib_rules_register(&ipmr_rules_ops_template, net);
+	if (IS_ERR(ops))
+		return PTR_ERR(ops);
+
+	INIT_LIST_HEAD(&net->ipv4.mr_tables);
+
+	mrt = ipmr_new_table(net, RT_TABLE_DEFAULT);
+	if (mrt == NULL) {
+		err = -ENOMEM;
+		goto err1;
+	}
+
+	err = fib_default_rule_add(ops, 0x7fff, RT_TABLE_DEFAULT, 0);
+	if (err < 0)
+		goto err2;
+
+	net->ipv4.mr_rules_ops = ops;
+	return 0;
+
+err2:
+	kfree(mrt);
+err1:
+	fib_rules_unregister(ops);
+	return err;
+}
+
+static void __net_exit ipmr_rules_exit(struct net *net)
+{
+	struct mr_table *mrt, *next;
+
+	list_for_each_entry_safe(mrt, next, &net->ipv4.mr_tables, list)
+		kfree(mrt);
+	fib_rules_unregister(net->ipv4.mr_rules_ops);
+}
+#else
+#define ipmr_for_each_table(mrt, net) \
+	for (mrt = net->ipv4.mrt; mrt; mrt = NULL)
+
+static struct mr_table *ipmr_get_table(struct net *net, u32 id)
+{
+	return net->ipv4.mrt;
+}
+
+static int ipmr_fib_lookup(struct net *net, struct flowi *flp,
+			   struct mr_table **mrt)
+{
+	*mrt = net->ipv4.mrt;
+	return 0;
+}
+
+static int __net_init ipmr_rules_init(struct net *net)
+{
+	net->ipv4.mrt = ipmr_new_table(net, RT_TABLE_DEFAULT);
+	return net->ipv4.mrt ? 0 : -ENOMEM;
+}
+
+static void __net_exit ipmr_rules_exit(struct net *net)
+{
+	kfree(net->ipv4.mrt);
+}
+#endif
+
+static struct mr_table *ipmr_new_table(struct net *net, u32 id)
+{
+	struct mr_table *mrt;
+	unsigned int i;
+
+	mrt = ipmr_get_table(net, id);
+	if (mrt != NULL)
+		return mrt;
+
+	mrt = kzalloc(sizeof(*mrt), GFP_KERNEL);
+	if (mrt == NULL)
+		return NULL;
+	mrt->id = id;
+
+	/* Forwarding cache */
+	for (i = 0; i < MFC_LINES; i++)
+		INIT_LIST_HEAD(&mrt->mfc_cache_array[i]);
+
+	INIT_LIST_HEAD(&mrt->mfc_unres_queue);
+
+	setup_timer(&mrt->ipmr_expire_timer, ipmr_expire_process,
+		    (unsigned long)mrt);
+
+#ifdef CONFIG_IP_PIMSM
+	mrt->mroute_reg_vif_num = -1;
+#endif
+#ifdef CONFIG_IP_MROUTE_MULTIPLE_TABLES
+	list_add_tail_rcu(&mrt->list, &net->ipv4.mr_tables);
+#endif
+	return mrt;
+}
 
 /* Service routines creating virtual interfaces: DVMRP tunnels and PIMREG */
 
@@ -215,7 +427,17 @@ failure:
 static netdev_tx_t reg_vif_xmit(struct sk_buff *skb, struct net_device *dev)
 {
 	struct net *net = dev_net(dev);
-	struct mr_table *mrt = net->ipv4.mrt;
+	struct mr_table *mrt;
+	struct flowi fl = {
+		.oif		= dev->ifindex,
+		.iif		= skb->skb_iif,
+		.mark		= skb->mark,
+	};
+	int err;
+
+	err = ipmr_fib_lookup(net, &fl, &mrt);
+	if (err < 0)
+		return err;
 
 	read_lock(&mrt_lock);
 	dev->stats.tx_bytes += skb->len;
@@ -240,12 +462,18 @@ static void reg_vif_setup(struct net_device *dev)
 	dev->features		|= NETIF_F_NETNS_LOCAL;
 }
 
-static struct net_device *ipmr_reg_vif(struct net *net)
+static struct net_device *ipmr_reg_vif(struct net *net, struct mr_table *mrt)
 {
 	struct net_device *dev;
 	struct in_device *in_dev;
+	char name[IFNAMSIZ];
+
+	if (mrt->id == RT_TABLE_DEFAULT)
+		sprintf(name, "pimreg");
+	else
+		sprintf(name, "pimreg%u", mrt->id);
 
-	dev = alloc_netdev(0, "pimreg", reg_vif_setup);
+	dev = alloc_netdev(0, name, reg_vif_setup);
 
 	if (dev == NULL)
 		return NULL;
@@ -461,7 +689,7 @@ static int vif_add(struct net *net, struct mr_table *mrt,
 		 */
 		if (mrt->mroute_reg_vif_num >= 0)
 			return -EADDRINUSE;
-		dev = ipmr_reg_vif(net);
+		dev = ipmr_reg_vif(net, mrt);
 		if (!dev)
 			return -ENOBUFS;
 		err = dev_set_allmulti(dev, 1);
@@ -928,17 +1156,19 @@ static void mroute_clean_tables(struct mr_table *mrt)
 static void mrtsock_destruct(struct sock *sk)
 {
 	struct net *net = sock_net(sk);
-	struct mr_table *mrt = net->ipv4.mrt;
+	struct mr_table *mrt;
 
 	rtnl_lock();
-	if (sk == mrt->mroute_sk) {
-		IPV4_DEVCONF_ALL(net, MC_FORWARDING)--;
+	ipmr_for_each_table(mrt, net) {
+		if (sk == mrt->mroute_sk) {
+			IPV4_DEVCONF_ALL(net, MC_FORWARDING)--;
 
-		write_lock_bh(&mrt_lock);
-		mrt->mroute_sk = NULL;
-		write_unlock_bh(&mrt_lock);
+			write_lock_bh(&mrt_lock);
+			mrt->mroute_sk = NULL;
+			write_unlock_bh(&mrt_lock);
 
-		mroute_clean_tables(mrt);
+			mroute_clean_tables(mrt);
+		}
 	}
 	rtnl_unlock();
 }
@@ -956,7 +1186,11 @@ int ip_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, unsi
 	struct vifctl vif;
 	struct mfcctl mfc;
 	struct net *net = sock_net(sk);
-	struct mr_table *mrt = net->ipv4.mrt;
+	struct mr_table *mrt;
+
+	mrt = ipmr_get_table(net, raw_sk(sk)->ipmr_table ? : RT_TABLE_DEFAULT);
+	if (mrt == NULL)
+		return -ENOENT;
 
 	if (optname != MRT_INIT) {
 		if (sk != mrt->mroute_sk && !capable(CAP_NET_ADMIN))
@@ -1054,6 +1288,27 @@ int ip_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, unsi
 		rtnl_unlock();
 		return ret;
 	}
+#endif
+#ifdef CONFIG_IP_MROUTE_MULTIPLE_TABLES
+	case MRT_TABLE:
+	{
+		u32 v;
+
+		if (optlen != sizeof(u32))
+			return -EINVAL;
+		if (get_user(v, (u32 __user *)optval))
+			return -EFAULT;
+		if (sk == mrt->mroute_sk)
+			return -EBUSY;
+
+		rtnl_lock();
+		ret = 0;
+		if (!ipmr_new_table(net, v))
+			ret = -ENOMEM;
+		raw_sk(sk)->ipmr_table = v;
+		rtnl_unlock();
+		return ret;
+	}
 #endif
 	/*
 	 *	Spurious command, or MRT_VERSION which you cannot
@@ -1073,7 +1328,11 @@ int ip_mroute_getsockopt(struct sock *sk, int optname, char __user *optval, int
 	int olr;
 	int val;
 	struct net *net = sock_net(sk);
-	struct mr_table *mrt = net->ipv4.mrt;
+	struct mr_table *mrt;
+
+	mrt = ipmr_get_table(net, raw_sk(sk)->ipmr_table ? : RT_TABLE_DEFAULT);
+	if (mrt == NULL)
+		return -ENOENT;
 
 	if (optname != MRT_VERSION &&
 #ifdef CONFIG_IP_PIMSM
@@ -1115,7 +1374,11 @@ int ipmr_ioctl(struct sock *sk, int cmd, void __user *arg)
 	struct vif_device *vif;
 	struct mfc_cache *c;
 	struct net *net = sock_net(sk);
-	struct mr_table *mrt = net->ipv4.mrt;
+	struct mr_table *mrt;
+
+	mrt = ipmr_get_table(net, raw_sk(sk)->ipmr_table ? : RT_TABLE_DEFAULT);
+	if (mrt == NULL)
+		return -ENOENT;
 
 	switch (cmd) {
 	case SIOCGETVIFCNT:
@@ -1166,17 +1429,20 @@ static int ipmr_device_event(struct notifier_block *this, unsigned long event, v
 {
 	struct net_device *dev = ptr;
 	struct net *net = dev_net(dev);
-	struct mr_table *mrt = net->ipv4.mrt;
+	struct mr_table *mrt;
 	struct vif_device *v;
 	int ct;
 	LIST_HEAD(list);
 
 	if (event != NETDEV_UNREGISTER)
 		return NOTIFY_DONE;
-	v = &mrt->vif_table[0];
-	for (ct = 0; ct < mrt->maxvif; ct++, v++) {
-		if (v->dev == dev)
-			vif_delete(mrt, ct, 1, &list);
+
+	ipmr_for_each_table(mrt, net) {
+		v = &mrt->vif_table[0];
+		for (ct = 0; ct < mrt->maxvif; ct++, v++) {
+			if (v->dev == dev)
+				vif_delete(mrt, ct, 1, &list);
+		}
 	}
 	unregister_netdevice_many(&list);
 	return NOTIFY_DONE;
@@ -1443,8 +1709,9 @@ int ip_mr_input(struct sk_buff *skb)
 {
 	struct mfc_cache *cache;
 	struct net *net = dev_net(skb->dev);
-	struct mr_table *mrt = net->ipv4.mrt;
 	int local = skb_rtable(skb)->rt_flags & RTCF_LOCAL;
+	struct mr_table *mrt;
+	int err;
 
 	/* Packet is looped back after forward, it should not be
 	   forwarded second time, but still can be delivered locally.
@@ -1452,6 +1719,10 @@ int ip_mr_input(struct sk_buff *skb)
 	if (IPCB(skb)->flags&IPSKB_FORWARDED)
 		goto dont_forward;
 
+	err = ipmr_fib_lookup(net, &skb_rtable(skb)->fl, &mrt);
+	if (err < 0)
+		return err;
+
 	if (!local) {
 		    if (IPCB(skb)->opt.router_alert) {
 			    if (ip_call_ra_chain(skb))
@@ -1522,12 +1793,11 @@ dont_forward:
 }
 
 #ifdef CONFIG_IP_PIMSM
-static int __pim_rcv(struct sk_buff *skb, unsigned int pimlen)
+static int __pim_rcv(struct mr_table *mrt, struct sk_buff *skb,
+		     unsigned int pimlen)
 {
 	struct net_device *reg_dev = NULL;
 	struct iphdr *encap;
-	struct net *net = dev_net(skb->dev);
-	struct mr_table *mrt = net->ipv4.mrt;
 
 	encap = (struct iphdr *)(skb_transport_header(skb) + pimlen);
 	/*
@@ -1578,18 +1848,21 @@ int pim_rcv_v1(struct sk_buff * skb)
 {
 	struct igmphdr *pim;
 	struct net *net = dev_net(skb->dev);
-	struct mr_table *mrt = net->ipv4.mrt;
+	struct mr_table *mrt;
 
 	if (!pskb_may_pull(skb, sizeof(*pim) + sizeof(struct iphdr)))
 		goto drop;
 
 	pim = igmp_hdr(skb);
 
+	if (ipmr_fib_lookup(net, &skb_rtable(skb)->fl, &mrt) < 0)
+		goto drop;
+
 	if (!mrt->mroute_do_pim ||
 	    pim->group != PIM_V1_VERSION || pim->code != PIM_V1_REGISTER)
 		goto drop;
 
-	if (__pim_rcv(skb, sizeof(*pim))) {
+	if (__pim_rcv(mrt, skb, sizeof(*pim))) {
 drop:
 		kfree_skb(skb);
 	}
@@ -1601,6 +1874,8 @@ drop:
 static int pim_rcv(struct sk_buff * skb)
 {
 	struct pimreghdr *pim;
+	struct net *net = dev_net(skb->dev);
+	struct mr_table *mrt;
 
 	if (!pskb_may_pull(skb, sizeof(*pim) + sizeof(struct iphdr)))
 		goto drop;
@@ -1612,7 +1887,10 @@ static int pim_rcv(struct sk_buff * skb)
 	     csum_fold(skb_checksum(skb, 0, skb->len, 0))))
 		goto drop;
 
-	if (__pim_rcv(skb, sizeof(*pim))) {
+	if (ipmr_fib_lookup(net, &skb_rtable(skb)->fl, &mrt) < 0)
+		goto drop;
+
+	if (__pim_rcv(mrt, skb, sizeof(*pim))) {
 drop:
 		kfree_skb(skb);
 	}
@@ -1663,10 +1941,14 @@ int ipmr_get_route(struct net *net,
 		   struct sk_buff *skb, struct rtmsg *rtm, int nowait)
 {
 	int err;
-	struct mr_table *mrt = net->ipv4.mrt;
+	struct mr_table *mrt;
 	struct mfc_cache *cache;
 	struct rtable *rt = skb_rtable(skb);
 
+	mrt = ipmr_get_table(net, RT_TABLE_DEFAULT);
+	if (mrt == NULL)
+		return -ENOENT;
+
 	read_lock(&mrt_lock);
 	cache = ipmr_cache_find(mrt, rt->rt_src, rt->rt_dst);
 
@@ -1717,6 +1999,7 @@ int ipmr_get_route(struct net *net,
  */
 struct ipmr_vif_iter {
 	struct seq_net_private p;
+	struct mr_table *mrt;
 	int ct;
 };
 
@@ -1724,7 +2007,7 @@ static struct vif_device *ipmr_vif_seq_idx(struct net *net,
 					   struct ipmr_vif_iter *iter,
 					   loff_t pos)
 {
-	struct mr_table *mrt = net->ipv4.mrt;
+	struct mr_table *mrt = iter->mrt;
 
 	for (iter->ct = 0; iter->ct < mrt->maxvif; ++iter->ct) {
 		if (!VIF_EXISTS(mrt, iter->ct))
@@ -1738,7 +2021,15 @@ static struct vif_device *ipmr_vif_seq_idx(struct net *net,
 static void *ipmr_vif_seq_start(struct seq_file *seq, loff_t *pos)
 	__acquires(mrt_lock)
 {
+	struct ipmr_vif_iter *iter = seq->private;
 	struct net *net = seq_file_net(seq);
+	struct mr_table *mrt;
+
+	mrt = ipmr_get_table(net, RT_TABLE_DEFAULT);
+	if (mrt == NULL)
+		return ERR_PTR(-ENOENT);
+
+	iter->mrt = mrt;
 
 	read_lock(&mrt_lock);
 	return *pos ? ipmr_vif_seq_idx(net, seq->private, *pos - 1)
@@ -1749,7 +2040,7 @@ static void *ipmr_vif_seq_next(struct seq_file *seq, void *v, loff_t *pos)
 {
 	struct ipmr_vif_iter *iter = seq->private;
 	struct net *net = seq_file_net(seq);
-	struct mr_table *mrt = net->ipv4.mrt;
+	struct mr_table *mrt = iter->mrt;
 
 	++*pos;
 	if (v == SEQ_START_TOKEN)
@@ -1771,8 +2062,8 @@ static void ipmr_vif_seq_stop(struct seq_file *seq, void *v)
 
 static int ipmr_vif_seq_show(struct seq_file *seq, void *v)
 {
-	struct net *net = seq_file_net(seq);
-	struct mr_table *mrt = net->ipv4.mrt;
+	struct ipmr_vif_iter *iter = seq->private;
+	struct mr_table *mrt = iter->mrt;
 
 	if (v == SEQ_START_TOKEN) {
 		seq_puts(seq,
@@ -1814,6 +2105,7 @@ static const struct file_operations ipmr_vif_fops = {
 
 struct ipmr_mfc_iter {
 	struct seq_net_private p;
+	struct mr_table *mrt;
 	struct list_head *cache;
 	int ct;
 };
@@ -1822,7 +2114,7 @@ struct ipmr_mfc_iter {
 static struct mfc_cache *ipmr_mfc_seq_idx(struct net *net,
 					  struct ipmr_mfc_iter *it, loff_t pos)
 {
-	struct mr_table *mrt = net->ipv4.mrt;
+	struct mr_table *mrt = it->mrt;
 	struct mfc_cache *mfc;
 
 	read_lock(&mrt_lock);
@@ -1850,7 +2142,13 @@ static void *ipmr_mfc_seq_start(struct seq_file *seq, loff_t *pos)
 {
 	struct ipmr_mfc_iter *it = seq->private;
 	struct net *net = seq_file_net(seq);
+	struct mr_table *mrt;
+
+	mrt = ipmr_get_table(net, RT_TABLE_DEFAULT);
+	if (mrt == NULL)
+		return ERR_PTR(-ENOENT);
 
+	it->mrt = mrt;
 	it->cache = NULL;
 	it->ct = 0;
 	return *pos ? ipmr_mfc_seq_idx(net, seq->private, *pos - 1)
@@ -1862,7 +2160,7 @@ static void *ipmr_mfc_seq_next(struct seq_file *seq, void *v, loff_t *pos)
 	struct mfc_cache *mfc = v;
 	struct ipmr_mfc_iter *it = seq->private;
 	struct net *net = seq_file_net(seq);
-	struct mr_table *mrt = net->ipv4.mrt;
+	struct mr_table *mrt = it->mrt;
 
 	++*pos;
 
@@ -1903,8 +2201,7 @@ static void *ipmr_mfc_seq_next(struct seq_file *seq, void *v, loff_t *pos)
 static void ipmr_mfc_seq_stop(struct seq_file *seq, void *v)
 {
 	struct ipmr_mfc_iter *it = seq->private;
-	struct net *net = seq_file_net(seq);
-	struct mr_table *mrt = net->ipv4.mrt;
+	struct mr_table *mrt = it->mrt;
 
 	if (it->cache == &mrt->mfc_unres_queue)
 		spin_unlock_bh(&mfc_unres_lock);
@@ -1915,8 +2212,6 @@ static void ipmr_mfc_seq_stop(struct seq_file *seq, void *v)
 static int ipmr_mfc_seq_show(struct seq_file *seq, void *v)
 {
 	int n;
-	struct net *net = seq_file_net(seq);
-	struct mr_table *mrt = net->ipv4.mrt;
 
 	if (v == SEQ_START_TOKEN) {
 		seq_puts(seq,
@@ -1924,6 +2219,7 @@ static int ipmr_mfc_seq_show(struct seq_file *seq, void *v)
 	} else {
 		const struct mfc_cache *mfc = v;
 		const struct ipmr_mfc_iter *it = seq->private;
+		const struct mr_table *mrt = it->mrt;
 
 		seq_printf(seq, "%08lX %08lX %-3hd",
 			   (unsigned long) mfc->mfc_mcastgrp,
@@ -1989,28 +2285,11 @@ static const struct net_protocol pim_protocol = {
  */
 static int __net_init ipmr_net_init(struct net *net)
 {
-	struct mr_table *mrt;
-	unsigned int i;
-	int err = 0;
+	int err;
 
-	mrt = kzalloc(sizeof(*mrt), GFP_KERNEL);
-	if (mrt == NULL) {
-		err = -ENOMEM;
+	err = ipmr_rules_init(net);
+	if (err < 0)
 		goto fail;
-	}
-
-	/* Forwarding cache */
-	for (i = 0; i < MFC_LINES; i++)
-		INIT_LIST_HEAD(&mrt->mfc_cache_array[i]);
-
-	INIT_LIST_HEAD(&mrt->mfc_unres_queue);
-
-	setup_timer(&mrt->ipmr_expire_timer, ipmr_expire_process,
-		    (unsigned long)net);
-
-#ifdef CONFIG_IP_PIMSM
-	mrt->mroute_reg_vif_num = -1;
-#endif
 
 #ifdef CONFIG_PROC_FS
 	err = -ENOMEM;
@@ -2019,15 +2298,13 @@ static int __net_init ipmr_net_init(struct net *net)
 	if (!proc_net_fops_create(net, "ip_mr_cache", 0, &ipmr_mfc_fops))
 		goto proc_cache_fail;
 #endif
-
-	net->ipv4.mrt = mrt;
 	return 0;
 
 #ifdef CONFIG_PROC_FS
 proc_cache_fail:
 	proc_net_remove(net, "ip_mr_vif");
 proc_vif_fail:
-	kfree(mrt);
+	ipmr_rules_exit(net);
 #endif
 fail:
 	return err;
@@ -2039,7 +2316,7 @@ static void __net_exit ipmr_net_exit(struct net *net)
 	proc_net_remove(net, "ip_mr_cache");
 	proc_net_remove(net, "ip_mr_vif");
 #endif
-	kfree(net->ipv4.mrt);
+	ipmr_rules_exit(net);
 }
 
 static struct pernet_operations ipmr_net_ops = {
-- 
cgit v1.2.2


From 4eaa0e3c869acd5dbc7c2e3818a9ae9cbf221d27 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Wed, 14 Apr 2010 16:13:29 -0700
Subject: fib: suppress lockdep-RCU false positive in FIB trie.

Followup of commit 634a4b20

Allow tnode_get_child_rcu() to be called either under rcu_read_lock()
protection or with RTNL held.

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/fib_trie.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c
index 59a838795e3e..c98f115fb0fd 100644
--- a/net/ipv4/fib_trie.c
+++ b/net/ipv4/fib_trie.c
@@ -209,7 +209,9 @@ static inline struct node *tnode_get_child_rcu(struct tnode *tn, unsigned int i)
 {
 	struct node *ret = tnode_get_child(tn, i);
 
-	return rcu_dereference(ret);
+	return rcu_dereference_check(ret,
+				     rcu_read_lock_held() ||
+				     lockdep_rtnl_is_held());
 }
 
 static inline int tnode_child_length(const struct tnode *tn)
-- 
cgit v1.2.2


From b0e28f1effd1d840b36e961edc1def81e01b1ca1 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Thu, 15 Apr 2010 00:14:07 -0700
Subject: net: netif_rx() must disable preemption

Eric Paris reported netif_rx() is calling smp_processor_id() from
preemptible context, in particular when caller is
ip_dev_loopback_xmit().

RPS commit added this smp_processor_id() call, this patch makes sure
preemption is disabled. rps_get_cpus() wants rcu_read_lock() anyway, we
can dot it a bit earlier.

Reported-by: Eric Paris <eparis@redhat.com>
Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/dev.c | 25 +++++++++++++++----------
 1 file changed, 15 insertions(+), 10 deletions(-)

(limited to 'net')

diff --git a/net/core/dev.c b/net/core/dev.c
index 876b1112d5ba..e8041eb76ac1 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -2206,6 +2206,7 @@ DEFINE_PER_CPU(struct netif_rx_stats, netdev_rx_stat) = { 0, };
 /*
  * get_rps_cpu is called from netif_receive_skb and returns the target
  * CPU from the RPS map of the receiving queue for a given skb.
+ * rcu_read_lock must be held on entry.
  */
 static int get_rps_cpu(struct net_device *dev, struct sk_buff *skb)
 {
@@ -2217,8 +2218,6 @@ static int get_rps_cpu(struct net_device *dev, struct sk_buff *skb)
 	u8 ip_proto;
 	u32 addr1, addr2, ports, ihl;
 
-	rcu_read_lock();
-
 	if (skb_rx_queue_recorded(skb)) {
 		u16 index = skb_get_rx_queue(skb);
 		if (unlikely(index >= dev->num_rx_queues)) {
@@ -2296,7 +2295,6 @@ got_hash:
 	}
 
 done:
-	rcu_read_unlock();
 	return cpu;
 }
 
@@ -2392,7 +2390,7 @@ enqueue:
 
 int netif_rx(struct sk_buff *skb)
 {
-	int cpu;
+	int ret;
 
 	/* if netpoll wants it, pretend we never saw it */
 	if (netpoll_rx(skb))
@@ -2402,14 +2400,21 @@ int netif_rx(struct sk_buff *skb)
 		net_timestamp(skb);
 
 #ifdef CONFIG_RPS
-	cpu = get_rps_cpu(skb->dev, skb);
-	if (cpu < 0)
-		cpu = smp_processor_id();
+	{
+		int cpu;
+
+		rcu_read_lock();
+		cpu = get_rps_cpu(skb->dev, skb);
+		if (cpu < 0)
+			cpu = smp_processor_id();
+		ret = enqueue_to_backlog(skb, cpu);
+		rcu_read_unlock();
+	}
 #else
-	cpu = smp_processor_id();
+	ret = enqueue_to_backlog(skb, get_cpu());
+	put_cpu();
 #endif
-
-	return enqueue_to_backlog(skb, cpu);
+	return ret;
 }
 EXPORT_SYMBOL(netif_rx);
 
-- 
cgit v1.2.2


From 8728c544a9cbdcb0034aa5c45706c5f953f030ee Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Sun, 11 Apr 2010 21:18:17 +0000
Subject: net: dev_pick_tx() fix

When dev_pick_tx() caches tx queue_index on a socket, we must check
socket dst_entry matches skb one, or risk a crash later, as reported by
Denys Fedorysychenko, if old packets are in flight during a route
change, involving devices with different number of queues.

Bug introduced by commit a4ee3ce3
(net: Use sk_tx_queue_mapping for connected sockets)

Reported-by: Denys Fedorysychenko <nuclearcat@nuclearcat.com>
Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/dev.c | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/core/dev.c b/net/core/dev.c
index 1c8a0ce473a8..92584bfef09b 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -1989,8 +1989,12 @@ static struct netdev_queue *dev_pick_tx(struct net_device *dev,
 			if (dev->real_num_tx_queues > 1)
 				queue_index = skb_tx_hash(dev, skb);
 
-			if (sk && sk->sk_dst_cache)
-				sk_tx_queue_set(sk, queue_index);
+			if (sk) {
+				struct dst_entry *dst = rcu_dereference(sk->sk_dst_cache);
+
+				if (dst && skb_dst(skb) == dst)
+					sk_tx_queue_set(sk, queue_index);
+			}
 		}
 	}
 
-- 
cgit v1.2.2


From ea2d9b41bd418894d1ee25de1642c3325d71c397 Mon Sep 17 00:00:00 2001
From: Bart De Schuymer <bdschuym@pandora.be>
Date: Thu, 15 Apr 2010 12:14:51 +0200
Subject: netfilter: bridge-netfilter: simplify IP DNAT

Remove br_netfilter.c::br_nf_local_out(). The function
br_nf_local_out() was needed because the PF_BRIDGE::LOCAL_OUT hook
could be called when IP DNAT happens on to-be-bridged traffic. The
new scheme eliminates this mess.

Signed-off-by: Bart De Schuymer <bdschuym@pandora.be>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 net/bridge/br_device.c    |   9 +++-
 net/bridge/br_netfilter.c | 114 ++++++++--------------------------------------
 2 files changed, 26 insertions(+), 97 deletions(-)

(limited to 'net')

diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c
index 5b8a6e73b02f..007bde87415d 100644
--- a/net/bridge/br_device.c
+++ b/net/bridge/br_device.c
@@ -15,7 +15,7 @@
 #include <linux/netdevice.h>
 #include <linux/etherdevice.h>
 #include <linux/ethtool.h>
-
+#include <linux/netfilter_bridge.h>
 #include <asm/uaccess.h>
 #include "br_private.h"
 
@@ -28,6 +28,13 @@ netdev_tx_t br_dev_xmit(struct sk_buff *skb, struct net_device *dev)
 	struct net_bridge_mdb_entry *mdst;
 	struct br_cpu_netstats *brstats = this_cpu_ptr(br->stats);
 
+#ifdef CONFIG_BRIDGE_NETFILTER
+	if (skb->nf_bridge && (skb->nf_bridge->mask & BRNF_BRIDGED_DNAT)) {
+		br_nf_pre_routing_finish_bridge_slow(skb);
+		return NETDEV_TX_OK;
+	}
+#endif
+
 	brstats->tx_packets++;
 	brstats->tx_bytes += skb->len;
 
diff --git a/net/bridge/br_netfilter.c b/net/bridge/br_netfilter.c
index dd6f538ba0b0..05dc6304992c 100644
--- a/net/bridge/br_netfilter.c
+++ b/net/bridge/br_netfilter.c
@@ -246,8 +246,7 @@ static int br_nf_pre_routing_finish_ipv6(struct sk_buff *skb)
 }
 
 /* This requires some explaining. If DNAT has taken place,
- * we will need to fix up the destination Ethernet address,
- * and this is a tricky process.
+ * we will need to fix up the destination Ethernet address.
  *
  * There are two cases to consider:
  * 1. The packet was DNAT'ed to a device in the same bridge
@@ -261,52 +260,38 @@ static int br_nf_pre_routing_finish_ipv6(struct sk_buff *skb)
  * call ip_route_input() and to look at skb->dst->dev, which is
  * changed to the destination device if ip_route_input() succeeds.
  *
- * Let us first consider the case that ip_route_input() succeeds:
- *
- * If skb->dst->dev equals the logical bridge device the packet
- * came in on, we can consider this bridging. The packet is passed
- * through the neighbour output function to build a new destination
- * MAC address, which will make the packet enter br_nf_local_out()
- * not much later. In that function it is assured that the iptables
- * FORWARD chain is traversed for the packet.
+ * Let's first consider the case that ip_route_input() succeeds:
  *
+ * If the output device equals the logical bridge device the packet
+ * came in on, we can consider this bridging. The corresponding MAC
+ * address will be obtained in br_nf_pre_routing_finish_bridge.
  * Otherwise, the packet is considered to be routed and we just
  * change the destination MAC address so that the packet will
  * later be passed up to the IP stack to be routed. For a redirected
  * packet, ip_route_input() will give back the localhost as output device,
  * which differs from the bridge device.
  *
- * Let us now consider the case that ip_route_input() fails:
+ * Let's now consider the case that ip_route_input() fails:
  *
  * This can be because the destination address is martian, in which case
  * the packet will be dropped.
- * After a "echo '0' > /proc/sys/net/ipv4/ip_forward" ip_route_input()
- * will fail, while __ip_route_output_key() will return success. The source
- * address for __ip_route_output_key() is set to zero, so __ip_route_output_key
+ * If IP forwarding is disabled, ip_route_input() will fail, while
+ * ip_route_output_key() can return success. The source
+ * address for ip_route_output_key() is set to zero, so ip_route_output_key()
  * thinks we're handling a locally generated packet and won't care
- * if IP forwarding is allowed. We send a warning message to the users's
- * log telling her to put IP forwarding on.
- *
- * ip_route_input() will also fail if there is no route available.
- * In that case we just drop the packet.
- *
- * --Lennert, 20020411
- * --Bart, 20020416 (updated)
- * --Bart, 20021007 (updated)
- * --Bart, 20062711 (updated) */
+ * if IP forwarding is enabled. If the output device equals the logical bridge
+ * device, we proceed as if ip_route_input() succeeded. If it differs from the
+ * logical bridge port or if ip_route_output_key() fails we drop the packet.
+ */
+
 static int br_nf_pre_routing_finish_bridge(struct sk_buff *skb)
 {
-	if (skb->pkt_type == PACKET_OTHERHOST) {
-		skb->pkt_type = PACKET_HOST;
-		skb->nf_bridge->mask |= BRNF_PKT_TYPE;
-	}
-	skb->nf_bridge->mask ^= BRNF_NF_BRIDGE_PREROUTING;
-
 	skb->dev = bridge_parent(skb->dev);
 	if (skb->dev) {
 		struct dst_entry *dst = skb_dst(skb);
 
 		nf_bridge_pull_encap_header(skb);
+		skb->nf_bridge->mask |= BRNF_BRIDGED_DNAT;
 
 		if (dst->hh)
 			return neigh_hh_output(dst->hh, skb);
@@ -368,9 +353,6 @@ free_skb:
 		} else {
 			if (skb_dst(skb)->dev == dev) {
 bridged_dnat:
-				/* Tell br_nf_local_out this is a
-				 * bridged frame */
-				nf_bridge->mask |= BRNF_BRIDGED_DNAT;
 				skb->dev = nf_bridge->physindev;
 				nf_bridge_push_encap_header(skb);
 				NF_HOOK_THRESH(NFPROTO_BRIDGE,
@@ -721,54 +703,6 @@ static unsigned int br_nf_forward_arp(unsigned int hook, struct sk_buff *skb,
 	return NF_STOLEN;
 }
 
-/* PF_BRIDGE/LOCAL_OUT ***********************************************
- *
- * This function sees both locally originated IP packets and forwarded
- * IP packets (in both cases the destination device is a bridge
- * device). It also sees bridged-and-DNAT'ed packets.
- *
- * If (nf_bridge->mask & BRNF_BRIDGED_DNAT) then the packet is bridged
- * and we fake the PF_BRIDGE/FORWARD hook. The function br_nf_forward()
- * will then fake the PF_INET/FORWARD hook. br_nf_local_out() has priority
- * NF_BR_PRI_FIRST, so no relevant PF_BRIDGE/INPUT functions have been nor
- * will be executed.
- */
-static unsigned int br_nf_local_out(unsigned int hook, struct sk_buff *skb,
-				    const struct net_device *in,
-				    const struct net_device *out,
-				    int (*okfn)(struct sk_buff *))
-{
-	struct net_device *realindev;
-	struct nf_bridge_info *nf_bridge;
-
-	if (!skb->nf_bridge)
-		return NF_ACCEPT;
-
-	/* Need exclusive nf_bridge_info since we might have multiple
-	 * different physoutdevs. */
-	if (!nf_bridge_unshare(skb))
-		return NF_DROP;
-
-	nf_bridge = skb->nf_bridge;
-	if (!(nf_bridge->mask & BRNF_BRIDGED_DNAT))
-		return NF_ACCEPT;
-
-	/* Bridged, take PF_BRIDGE/FORWARD.
-	 * (see big note in front of br_nf_pre_routing_finish) */
-	nf_bridge->physoutdev = skb->dev;
-	realindev = nf_bridge->physindev;
-
-	if (nf_bridge->mask & BRNF_PKT_TYPE) {
-		skb->pkt_type = PACKET_OTHERHOST;
-		nf_bridge->mask ^= BRNF_PKT_TYPE;
-	}
-	nf_bridge_push_encap_header(skb);
-
-	NF_HOOK(NFPROTO_BRIDGE, NF_BR_FORWARD, skb, realindev, skb->dev,
-		br_forward_finish);
-	return NF_STOLEN;
-}
-
 #if defined(CONFIG_NF_CONNTRACK_IPV4) || defined(CONFIG_NF_CONNTRACK_IPV4_MODULE)
 static int br_nf_dev_queue_xmit(struct sk_buff *skb)
 {
@@ -797,10 +731,7 @@ static unsigned int br_nf_post_routing(unsigned int hook, struct sk_buff *skb,
 	struct net_device *realoutdev = bridge_parent(skb->dev);
 	u_int8_t pf;
 
-	if (!nf_bridge)
-		return NF_ACCEPT;
-
-	if (!(nf_bridge->mask & (BRNF_BRIDGED | BRNF_BRIDGED_DNAT)))
+	if (!nf_bridge || !(nf_bridge->mask & BRNF_BRIDGED))
 		return NF_ACCEPT;
 
 	if (!realoutdev)
@@ -847,10 +778,8 @@ static unsigned int ip_sabotage_in(unsigned int hook, struct sk_buff *skb,
 	return NF_ACCEPT;
 }
 
-/* For br_nf_local_out we need (prio = NF_BR_PRI_FIRST), to insure that innocent
- * PF_BRIDGE/NF_BR_LOCAL_OUT functions don't get bridged traffic as input.
- * For br_nf_post_routing, we need (prio = NF_BR_PRI_LAST), because
- * ip_refrag() can return NF_STOLEN. */
+/* For br_nf_post_routing, we need (prio = NF_BR_PRI_LAST), because
+ * br_dev_queue_push_xmit is called afterwards */
 static struct nf_hook_ops br_nf_ops[] __read_mostly = {
 	{
 		.hook = br_nf_pre_routing,
@@ -880,13 +809,6 @@ static struct nf_hook_ops br_nf_ops[] __read_mostly = {
 		.hooknum = NF_BR_FORWARD,
 		.priority = NF_BR_PRI_BRNF,
 	},
-	{
-		.hook = br_nf_local_out,
-		.owner = THIS_MODULE,
-		.pf = PF_BRIDGE,
-		.hooknum = NF_BR_LOCAL_OUT,
-		.priority = NF_BR_PRI_FIRST,
-	},
 	{
 		.hook = br_nf_post_routing,
 		.owner = THIS_MODULE,
-- 
cgit v1.2.2


From e179e6322ac334e21a3c6d669d95bc967e5d0a80 Mon Sep 17 00:00:00 2001
From: Bart De Schuymer <bdschuym@pandora.be>
Date: Thu, 15 Apr 2010 12:26:39 +0200
Subject: netfilter: bridge-netfilter: Fix MAC header handling with IP DNAT

- fix IP DNAT on vlan- or pppoe-encapsulated traffic: The functions
neigh_hh_output() or dst->neighbour->output() overwrite the complete
Ethernet header, although we only need the destination MAC address.
For encapsulated packets, they ended up overwriting the encapsulating
header. The new code copies the Ethernet source MAC address and
protocol number before calling dst->neighbour->output(). The Ethernet
source MAC and protocol number are copied back in place in
br_nf_pre_routing_finish_bridge_slow(). This also makes the IP DNAT
more transparent because in the old scheme the source MAC of the
bridge was copied into the source address in the Ethernet header. We
also let skb->protocol equal ETH_P_IP resp. ETH_P_IPV6 during the
execution of the PF_INET resp. PF_INET6 hooks.

- Speed up IP DNAT by calling neigh_hh_bridge() instead of
neigh_hh_output(): if dst->hh is available, we already know the MAC
address so we can just copy it.

Signed-off-by: Bart De Schuymer <bdschuym@pandora.be>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 net/bridge/br_netfilter.c | 90 ++++++++++++++++++++++++++++++++++-------------
 1 file changed, 65 insertions(+), 25 deletions(-)

(limited to 'net')

diff --git a/net/bridge/br_netfilter.c b/net/bridge/br_netfilter.c
index 05dc6304992c..b7e405dc9d1c 100644
--- a/net/bridge/br_netfilter.c
+++ b/net/bridge/br_netfilter.c
@@ -196,15 +196,24 @@ static inline void nf_bridge_save_header(struct sk_buff *skb)
 					 skb->nf_bridge->data, header_size);
 }
 
-/*
- * When forwarding bridge frames, we save a copy of the original
- * header before processing.
+static inline void nf_bridge_update_protocol(struct sk_buff *skb)
+{
+	if (skb->nf_bridge->mask & BRNF_8021Q)
+		skb->protocol = htons(ETH_P_8021Q);
+	else if (skb->nf_bridge->mask & BRNF_PPPoE)
+		skb->protocol = htons(ETH_P_PPP_SES);
+}
+
+/* Fill in the header for fragmented IP packets handled by
+ * the IPv4 connection tracking code.
  */
 int nf_bridge_copy_header(struct sk_buff *skb)
 {
 	int err;
-	int header_size = ETH_HLEN + nf_bridge_encap_header_len(skb);
+	unsigned int header_size;
 
+	nf_bridge_update_protocol(skb);
+	header_size = ETH_HLEN + nf_bridge_encap_header_len(skb);
 	err = skb_cow_head(skb, header_size);
 	if (err)
 		return err;
@@ -238,6 +247,7 @@ static int br_nf_pre_routing_finish_ipv6(struct sk_buff *skb)
 	skb_dst_set(skb, &rt->u.dst);
 
 	skb->dev = nf_bridge->physindev;
+	nf_bridge_update_protocol(skb);
 	nf_bridge_push_encap_header(skb);
 	NF_HOOK_THRESH(NFPROTO_BRIDGE, NF_BR_PRE_ROUTING, skb, skb->dev, NULL,
 		       br_handle_frame_finish, 1);
@@ -245,6 +255,38 @@ static int br_nf_pre_routing_finish_ipv6(struct sk_buff *skb)
 	return 0;
 }
 
+/* Obtain the correct destination MAC address, while preserving the original
+ * source MAC address. If we already know this address, we just copy it. If we
+ * don't, we use the neighbour framework to find out. In both cases, we make
+ * sure that br_handle_frame_finish() is called afterwards.
+ */
+static int br_nf_pre_routing_finish_bridge(struct sk_buff *skb)
+{
+	struct nf_bridge_info *nf_bridge = skb->nf_bridge;
+	struct dst_entry *dst;
+
+	skb->dev = bridge_parent(skb->dev);
+	if (!skb->dev)
+		goto free_skb;
+	dst = skb_dst(skb);
+	if (dst->hh) {
+		neigh_hh_bridge(dst->hh, skb);
+		skb->dev = nf_bridge->physindev;
+		return br_handle_frame_finish(skb);
+	} else if (dst->neighbour) {
+		/* the neighbour function below overwrites the complete
+		 * MAC header, so we save the Ethernet source address and
+		 * protocol number. */
+		skb_copy_from_linear_data_offset(skb, -(ETH_HLEN-ETH_ALEN), skb->nf_bridge->data, ETH_HLEN-ETH_ALEN);
+		/* tell br_dev_xmit to continue with forwarding */
+		nf_bridge->mask |= BRNF_BRIDGED_DNAT;
+		return dst->neighbour->output(skb);
+	}
+free_skb:
+	kfree_skb(skb);
+	return 0;
+}
+
 /* This requires some explaining. If DNAT has taken place,
  * we will need to fix up the destination Ethernet address.
  *
@@ -283,25 +325,6 @@ static int br_nf_pre_routing_finish_ipv6(struct sk_buff *skb)
  * device, we proceed as if ip_route_input() succeeded. If it differs from the
  * logical bridge port or if ip_route_output_key() fails we drop the packet.
  */
-
-static int br_nf_pre_routing_finish_bridge(struct sk_buff *skb)
-{
-	skb->dev = bridge_parent(skb->dev);
-	if (skb->dev) {
-		struct dst_entry *dst = skb_dst(skb);
-
-		nf_bridge_pull_encap_header(skb);
-		skb->nf_bridge->mask |= BRNF_BRIDGED_DNAT;
-
-		if (dst->hh)
-			return neigh_hh_output(dst->hh, skb);
-		else if (dst->neighbour)
-			return dst->neighbour->output(skb);
-	}
-	kfree_skb(skb);
-	return 0;
-}
-
 static int br_nf_pre_routing_finish(struct sk_buff *skb)
 {
 	struct net_device *dev = skb->dev;
@@ -354,6 +377,7 @@ free_skb:
 			if (skb_dst(skb)->dev == dev) {
 bridged_dnat:
 				skb->dev = nf_bridge->physindev;
+				nf_bridge_update_protocol(skb);
 				nf_bridge_push_encap_header(skb);
 				NF_HOOK_THRESH(NFPROTO_BRIDGE,
 					       NF_BR_PRE_ROUTING,
@@ -376,6 +400,7 @@ bridged_dnat:
 	}
 
 	skb->dev = nf_bridge->physindev;
+	nf_bridge_update_protocol(skb);
 	nf_bridge_push_encap_header(skb);
 	NF_HOOK_THRESH(NFPROTO_BRIDGE, NF_BR_PRE_ROUTING, skb, skb->dev, NULL,
 		       br_handle_frame_finish, 1);
@@ -396,6 +421,10 @@ static struct net_device *setup_pre_routing(struct sk_buff *skb)
 	nf_bridge->mask |= BRNF_NF_BRIDGE_PREROUTING;
 	nf_bridge->physindev = skb->dev;
 	skb->dev = bridge_parent(skb->dev);
+	if (skb->protocol == htons(ETH_P_8021Q))
+		nf_bridge->mask |= BRNF_8021Q;
+	else if (skb->protocol == htons(ETH_P_PPP_SES))
+		nf_bridge->mask |= BRNF_PPPoE;
 
 	return skb->dev;
 }
@@ -494,6 +523,7 @@ static unsigned int br_nf_pre_routing_ipv6(unsigned int hook,
 	if (!setup_pre_routing(skb))
 		return NF_DROP;
 
+	skb->protocol = htons(ETH_P_IPV6);
 	NF_HOOK(NFPROTO_IPV6, NF_INET_PRE_ROUTING, skb, skb->dev, NULL,
 		br_nf_pre_routing_finish_ipv6);
 
@@ -566,6 +596,7 @@ static unsigned int br_nf_pre_routing(unsigned int hook, struct sk_buff *skb,
 	if (!setup_pre_routing(skb))
 		return NF_DROP;
 	store_orig_dstaddr(skb);
+	skb->protocol = htons(ETH_P_IP);
 
 	NF_HOOK(NFPROTO_IPV4, NF_INET_PRE_ROUTING, skb, skb->dev, NULL,
 		br_nf_pre_routing_finish);
@@ -614,7 +645,9 @@ static int br_nf_forward_finish(struct sk_buff *skb)
 	} else {
 		in = *((struct net_device **)(skb->cb));
 	}
+	nf_bridge_update_protocol(skb);
 	nf_bridge_push_encap_header(skb);
+
 	NF_HOOK_THRESH(NFPROTO_BRIDGE, NF_BR_FORWARD, skb, in,
 		       skb->dev, br_forward_finish, 1);
 	return 0;
@@ -666,6 +699,10 @@ static unsigned int br_nf_forward_ip(unsigned int hook, struct sk_buff *skb,
 	/* The physdev module checks on this */
 	nf_bridge->mask |= BRNF_BRIDGED;
 	nf_bridge->physoutdev = skb->dev;
+	if (pf == PF_INET)
+		skb->protocol = htons(ETH_P_IP);
+	else
+		skb->protocol = htons(ETH_P_IPV6);
 
 	NF_HOOK(pf, NF_INET_FORWARD, skb, bridge_parent(in), parent,
 		br_nf_forward_finish);
@@ -706,8 +743,7 @@ static unsigned int br_nf_forward_arp(unsigned int hook, struct sk_buff *skb,
 #if defined(CONFIG_NF_CONNTRACK_IPV4) || defined(CONFIG_NF_CONNTRACK_IPV4_MODULE)
 static int br_nf_dev_queue_xmit(struct sk_buff *skb)
 {
-	if (skb->nfct != NULL &&
-	    (skb->protocol == htons(ETH_P_IP) || IS_VLAN_IP(skb)) &&
+	if (skb->nfct != NULL && skb->protocol == htons(ETH_P_IP) &&
 	    skb->len > skb->dev->mtu &&
 	    !skb_is_gso(skb))
 		return ip_fragment(skb, br_dev_queue_push_xmit);
@@ -755,6 +791,10 @@ static unsigned int br_nf_post_routing(unsigned int hook, struct sk_buff *skb,
 
 	nf_bridge_pull_encap_header(skb);
 	nf_bridge_save_header(skb);
+	if (pf == PF_INET)
+		skb->protocol = htons(ETH_P_IP);
+	else
+		skb->protocol = htons(ETH_P_IPV6);
 
 	NF_HOOK(pf, NF_INET_POST_ROUTING, skb, NULL, realoutdev,
 		br_nf_dev_queue_xmit);
-- 
cgit v1.2.2


From 90348e0ede4e74f9404c4d08cce1dbb1baa05b06 Mon Sep 17 00:00:00 2001
From: Ulrich Weber <uweber@astaro.com>
Date: Thu, 15 Apr 2010 12:37:18 +0200
Subject: netfilter: ipv6: move xfrm_lookup at end of ip6_route_me_harder

xfrm_lookup should be called after ip6_route_output skb_dst_set,
otherwise skb_dst_set of xfrm_lookup is pointless

Signed-off-by: Ulrich Weber <uweber@astaro.com>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 net/ipv6/netfilter.c | 25 +++++++++++--------------
 1 file changed, 11 insertions(+), 14 deletions(-)

(limited to 'net')

diff --git a/net/ipv6/netfilter.c b/net/ipv6/netfilter.c
index d5ed92b14346..a74951c039b6 100644
--- a/net/ipv6/netfilter.c
+++ b/net/ipv6/netfilter.c
@@ -25,20 +25,6 @@ int ip6_route_me_harder(struct sk_buff *skb)
 	};
 
 	dst = ip6_route_output(net, skb->sk, &fl);
-
-#ifdef CONFIG_XFRM
-	if (!(IP6CB(skb)->flags & IP6SKB_XFRM_TRANSFORMED) &&
-	    xfrm_decode_session(skb, &fl, AF_INET6) == 0) {
-		struct dst_entry *dst2 = skb_dst(skb);
-
-		if (xfrm_lookup(net, &dst2, &fl, skb->sk, 0)) {
-			skb_dst_set(skb, NULL);
-			return -1;
-		}
-		skb_dst_set(skb, dst2);
-	}
-#endif
-
 	if (dst->error) {
 		IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES);
 		LIMIT_NETDEBUG(KERN_DEBUG "ip6_route_me_harder: No more route.\n");
@@ -50,6 +36,17 @@ int ip6_route_me_harder(struct sk_buff *skb)
 	skb_dst_drop(skb);
 
 	skb_dst_set(skb, dst);
+
+#ifdef CONFIG_XFRM
+	if (!(IP6CB(skb)->flags & IP6SKB_XFRM_TRANSFORMED) &&
+	    xfrm_decode_session(skb, &fl, AF_INET6) == 0) {
+		skb_dst_set(skb, NULL);
+		if (xfrm_lookup(net, &dst, &fl, skb->sk, 0))
+			return -1;
+		skb_dst_set(skb, dst);
+	}
+#endif
+
 	return 0;
 }
 EXPORT_SYMBOL(ip6_route_me_harder);
-- 
cgit v1.2.2


From 66496d4973dcb848d163805fa6b485850b7555e3 Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Thu, 15 Apr 2010 13:29:27 +0200
Subject: ipv4: ipmr: fix IP_MROUTE_MULTIPLE_TABLES Kconfig dependencies

IP_MROUTE_MULTIPLE_TABLES should depend on IP_MROUTE.

Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 net/ipv4/Kconfig | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig
index be597749c385..8e3a1fd938ab 100644
--- a/net/ipv4/Kconfig
+++ b/net/ipv4/Kconfig
@@ -252,7 +252,7 @@ config IP_MROUTE
 
 config IP_MROUTE_MULTIPLE_TABLES
 	bool "IP: multicast policy routing"
-	depends on IP_ADVANCED_ROUTER
+	depends on IP_MROUTE && IP_ADVANCED_ROUTER
 	select FIB_RULES
 	help
 	  Normally, a multicast router runs a userspace daemon and decides
-- 
cgit v1.2.2


From b0ebb739a8f68039f03e80b3476b204fe5adf0d7 Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Thu, 15 Apr 2010 13:29:28 +0200
Subject: ipv4: ipmr: fix invalid cache resolving when adding a non-matching
 entry

The patch to convert struct mfc_cache to list_heads (ipv4: ipmr: convert
struct mfc_cache to struct list_head) introduced a bug when adding new
cache entries that don't match any unresolved entries.

The unres queue is searched for a matching entry, which is then resolved.
When no matching entry is present, the iterator points to the head of the
list, but is treated as a matching entry. Use a seperate variable to
indicate that a matching entry was found.

Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 net/ipv4/ipmr.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
index 5df5fd74c6d1..0643fb6d47c4 100644
--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c
@@ -1089,12 +1089,14 @@ static int ipmr_mfc_add(struct net *net, struct mr_table *mrt,
 	 *	Check to see if we resolved a queued list. If so we
 	 *	need to send on the frames and tidy up.
 	 */
+	found = false;
 	spin_lock_bh(&mfc_unres_lock);
 	list_for_each_entry(uc, &mrt->mfc_unres_queue, list) {
 		if (uc->mfc_origin == c->mfc_origin &&
 		    uc->mfc_mcastgrp == c->mfc_mcastgrp) {
 			list_del(&uc->list);
 			atomic_dec(&mrt->cache_resolve_queue_len);
+			found = true;
 			break;
 		}
 	}
@@ -1102,7 +1104,7 @@ static int ipmr_mfc_add(struct net *net, struct mr_table *mrt,
 		del_timer(&mrt->ipmr_expire_timer);
 	spin_unlock_bh(&mfc_unres_lock);
 
-	if (uc) {
+	if (found) {
 		ipmr_cache_resolve(net, mrt, uc, c);
 		ipmr_cache_free(uc);
 	}
-- 
cgit v1.2.2


From 8de53dfbf9a0a0f7538c005137059c5c021476e1 Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Thu, 15 Apr 2010 13:29:28 +0200
Subject: ipv4: ipmr: fix NULL pointer deref during unres queue destruction

Fix an oversight in ipmr_destroy_unres() - the net pointer is
unconditionally initialized to NULL, resulting in a NULL pointer
dereference later on.

Fix by adding a net pointer to struct mr_table and using it in
ipmr_destroy_unres().

Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 net/ipv4/ipmr.c | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
index 0643fb6d47c4..7d8a2bcecb76 100644
--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c
@@ -71,6 +71,9 @@
 
 struct mr_table {
 	struct list_head	list;
+#ifdef CONFIG_NET_NS
+	struct net		*net;
+#endif
 	u32			id;
 	struct sock		*mroute_sk;
 	struct timer_list	ipmr_expire_timer;
@@ -308,6 +311,7 @@ static struct mr_table *ipmr_new_table(struct net *net, u32 id)
 	mrt = kzalloc(sizeof(*mrt), GFP_KERNEL);
 	if (mrt == NULL)
 		return NULL;
+	write_pnet(&mrt->net, net);
 	mrt->id = id;
 
 	/* Forwarding cache */
@@ -580,7 +584,7 @@ static inline void ipmr_cache_free(struct mfc_cache *c)
 
 static void ipmr_destroy_unres(struct mr_table *mrt, struct mfc_cache *c)
 {
-	struct net *net = NULL; //mrt->net;
+	struct net *net = read_pnet(&mrt->net);
 	struct sk_buff *skb;
 	struct nlmsgerr *e;
 
-- 
cgit v1.2.2


From f0d57a54aa9fdf3a4d9435d44c69b20388ad0b3b Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Thu, 15 Apr 2010 19:09:01 +0200
Subject: netfilter: ipt_LOG/ip6t_LOG: use more appropriate log level as
 default

Use KERN_NOTICE instead of KERN_EMERG by default. This only affects
kernel internal logging (like conntrack), user-specified logging rules
contain a seperate log level.

Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 net/ipv4/netfilter/ipt_LOG.c  | 2 +-
 net/ipv6/netfilter/ip6t_LOG.c | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/netfilter/ipt_LOG.c b/net/ipv4/netfilter/ipt_LOG.c
index a6a454b25502..3bd35f370817 100644
--- a/net/ipv4/netfilter/ipt_LOG.c
+++ b/net/ipv4/netfilter/ipt_LOG.c
@@ -367,7 +367,7 @@ static struct nf_loginfo default_loginfo = {
 	.type	= NF_LOG_TYPE_LOG,
 	.u = {
 		.log = {
-			.level    = 0,
+			.level    = 5,
 			.logflags = NF_LOG_MASK,
 		},
 	},
diff --git a/net/ipv6/netfilter/ip6t_LOG.c b/net/ipv6/netfilter/ip6t_LOG.c
index 439ededd5300..1f47a525f484 100644
--- a/net/ipv6/netfilter/ip6t_LOG.c
+++ b/net/ipv6/netfilter/ip6t_LOG.c
@@ -377,7 +377,7 @@ static struct nf_loginfo default_loginfo = {
 	.type	= NF_LOG_TYPE_LOG,
 	.u = {
 		.log = {
-			.level	  = 0,
+			.level	  = 5,
 			.logflags = NF_LOG_MASK,
 		},
 	},
-- 
cgit v1.2.2


From e30b38c298b55e09456d3ccbc1df2f3e2e8dc6e9 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Thu, 15 Apr 2010 09:13:03 +0000
Subject: ip: Fix ip_dev_loopback_xmit()

Eric Paris got following trace with a linux-next kernel

[   14.203970] BUG: using smp_processor_id() in preemptible [00000000]
code: avahi-daemon/2093
[   14.204025] caller is netif_rx+0xfa/0x110
[   14.204035] Call Trace:
[   14.204064]  [<ffffffff81278fe5>] debug_smp_processor_id+0x105/0x110
[   14.204070]  [<ffffffff8142163a>] netif_rx+0xfa/0x110
[   14.204090]  [<ffffffff8145b631>] ip_dev_loopback_xmit+0x71/0xa0
[   14.204095]  [<ffffffff8145b892>] ip_mc_output+0x192/0x2c0
[   14.204099]  [<ffffffff8145d610>] ip_local_out+0x20/0x30
[   14.204105]  [<ffffffff8145d8ad>] ip_push_pending_frames+0x28d/0x3d0
[   14.204119]  [<ffffffff8147f1cc>] udp_push_pending_frames+0x14c/0x400
[   14.204125]  [<ffffffff814803fc>] udp_sendmsg+0x39c/0x790
[   14.204137]  [<ffffffff814891d5>] inet_sendmsg+0x45/0x80
[   14.204149]  [<ffffffff8140af91>] sock_sendmsg+0xf1/0x110
[   14.204189]  [<ffffffff8140dc6c>] sys_sendmsg+0x20c/0x380
[   14.204233]  [<ffffffff8100ad82>] system_call_fastpath+0x16/0x1b

While current linux-2.6 kernel doesnt emit this warning, bug is latent
and might cause unexpected failures.

ip_dev_loopback_xmit() runs in process context, preemption enabled, so
must call netif_rx_ni() instead of netif_rx(), to make sure that we
process pending software interrupt.

Same change for ip6_dev_loopback_xmit()

Reported-by: Eric Paris <eparis@redhat.com>
Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/ip_output.c  | 2 +-
 net/ipv6/ip6_output.c | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index c65f18e0936e..d1bcc9f21d4f 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -120,7 +120,7 @@ static int ip_dev_loopback_xmit(struct sk_buff *newskb)
 	newskb->pkt_type = PACKET_LOOPBACK;
 	newskb->ip_summed = CHECKSUM_UNNECESSARY;
 	WARN_ON(!skb_dst(newskb));
-	netif_rx(newskb);
+	netif_rx_ni(newskb);
 	return 0;
 }
 
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index 16c4391f952b..65f9c379df38 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -108,7 +108,7 @@ static int ip6_dev_loopback_xmit(struct sk_buff *newskb)
 	newskb->ip_summed = CHECKSUM_UNNECESSARY;
 	WARN_ON(!skb_dst(newskb));
 
-	netif_rx(newskb);
+	netif_rx_ni(newskb);
 	return 0;
 }
 
-- 
cgit v1.2.2


From a4fbf8415c462208e77251779d80dbc81914cebd Mon Sep 17 00:00:00 2001
From: Joe Perches <joe@perches.com>
Date: Thu, 15 Apr 2010 15:37:13 -0700
Subject: net/l2tp/l2tp_debugfs.c: Convert NIPQUAD to %pI4

Signed-off-by: Joe Perches <joe@perches.com>
Acked-by: James Chapman <jchapman@katalix.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/l2tp/l2tp_debugfs.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/l2tp/l2tp_debugfs.c b/net/l2tp/l2tp_debugfs.c
index 908f10f9720e..104ec3b283d4 100644
--- a/net/l2tp/l2tp_debugfs.c
+++ b/net/l2tp/l2tp_debugfs.c
@@ -122,8 +122,8 @@ static void l2tp_dfs_seq_tunnel_show(struct seq_file *m, void *v)
 	seq_printf(m, "\nTUNNEL %u peer %u", tunnel->tunnel_id, tunnel->peer_tunnel_id);
 	if (tunnel->sock) {
 		struct inet_sock *inet = inet_sk(tunnel->sock);
-		seq_printf(m, " from " NIPQUAD_FMT " to " NIPQUAD_FMT "\n",
-			   NIPQUAD(inet->inet_saddr), NIPQUAD(inet->inet_daddr));
+		seq_printf(m, " from %pI4 to %pI4\n",
+			   &inet->inet_saddr, &inet->inet_daddr);
 		if (tunnel->encap == L2TP_ENCAPTYPE_UDP)
 			seq_printf(m, " source port %hu, dest port %hu\n",
 				   ntohs(inet->inet_sport), ntohs(inet->inet_dport));
-- 
cgit v1.2.2


From 0eecb784942792863b77dfe11e0c7e286e92db85 Mon Sep 17 00:00:00 2001
From: Shan Wei <shanwei@cn.fujitsu.com>
Date: Thu, 15 Apr 2010 16:39:14 +0000
Subject: ipv6: cancel to setting local_df in ip6_xmit()

commit f88037(sctp: Drop ipfargok in sctp_xmit function)
has droped ipfragok and set local_df value properly.

So the change of commit 77e2f1(ipv6: Fix ip6_xmit to
send fragments if ipfragok is true) is not needed.
So the patch remove them.

Signed-off-by: Shan Wei <shanwei@cn.fujitsu.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/ip6_output.c | 4 ----
 1 file changed, 4 deletions(-)

(limited to 'net')

diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index 16c4391f952b..f3a847e3ec88 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -231,10 +231,6 @@ int ip6_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl,
 	skb_reset_network_header(skb);
 	hdr = ipv6_hdr(skb);
 
-	/* Allow local fragmentation. */
-	if (ipfragok)
-		skb->local_df = 1;
-
 	/*
 	 *	Fill in the IPv6 header
 	 */
-- 
cgit v1.2.2


From 4e15ed4d930297c127d280ca1d0c785be870def4 Mon Sep 17 00:00:00 2001
From: Shan Wei <shanwei@cn.fujitsu.com>
Date: Thu, 15 Apr 2010 16:43:08 +0000
Subject: net: replace ipfragok with skb->local_df

As Herbert Xu said: we should be able to simply replace ipfragok
with skb->local_df. commit f88037(sctp: Drop ipfargok in sctp_xmit function)
has droped ipfragok and set local_df value properly.

The patch kills the ipfragok parameter of .queue_xmit().

Signed-off-by: Shan Wei <shanwei@cn.fujitsu.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/dccp/ipv6.c                  | 4 ++--
 net/dccp/output.c                | 2 +-
 net/ipv4/ip_output.c             | 4 ++--
 net/ipv4/tcp_output.c            | 2 +-
 net/ipv6/inet6_connection_sock.c | 4 ++--
 net/ipv6/ip6_output.c            | 2 +-
 net/ipv6/tcp_ipv6.c              | 4 ++--
 net/l2tp/l2tp_core.c             | 3 ++-
 net/l2tp/l2tp_ip.c               | 2 +-
 net/sctp/ipv6.c                  | 2 +-
 net/sctp/protocol.c              | 2 +-
 11 files changed, 16 insertions(+), 15 deletions(-)

(limited to 'net')

diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c
index ab1ab95946df..091698899594 100644
--- a/net/dccp/ipv6.c
+++ b/net/dccp/ipv6.c
@@ -292,7 +292,7 @@ static int dccp_v6_send_response(struct sock *sk, struct request_sock *req,
 							 &ireq6->loc_addr,
 							 &ireq6->rmt_addr);
 		ipv6_addr_copy(&fl.fl6_dst, &ireq6->rmt_addr);
-		err = ip6_xmit(sk, skb, &fl, opt, 0);
+		err = ip6_xmit(sk, skb, &fl, opt);
 		err = net_xmit_eval(err);
 	}
 
@@ -347,7 +347,7 @@ static void dccp_v6_ctl_send_reset(struct sock *sk, struct sk_buff *rxskb)
 	if (!ip6_dst_lookup(ctl_sk, &dst, &fl)) {
 		if (xfrm_lookup(net, &dst, &fl, NULL, 0) >= 0) {
 			skb_dst_set(skb, dst);
-			ip6_xmit(ctl_sk, skb, &fl, NULL, 0);
+			ip6_xmit(ctl_sk, skb, &fl, NULL);
 			DCCP_INC_STATS_BH(DCCP_MIB_OUTSEGS);
 			DCCP_INC_STATS_BH(DCCP_MIB_OUTRSTS);
 			return;
diff --git a/net/dccp/output.c b/net/dccp/output.c
index b8d98e3c052a..e98b65e9569f 100644
--- a/net/dccp/output.c
+++ b/net/dccp/output.c
@@ -136,7 +136,7 @@ static int dccp_transmit_skb(struct sock *sk, struct sk_buff *skb)
 
 		DCCP_INC_STATS(DCCP_MIB_OUTSEGS);
 
-		err = icsk->icsk_af_ops->queue_xmit(skb, 0);
+		err = icsk->icsk_af_ops->queue_xmit(skb);
 		return net_xmit_eval(err);
 	}
 	return -ENOBUFS;
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index c65f18e0936e..512af81b750f 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -311,7 +311,7 @@ int ip_output(struct sk_buff *skb)
 			    !(IPCB(skb)->flags & IPSKB_REROUTED));
 }
 
-int ip_queue_xmit(struct sk_buff *skb, int ipfragok)
+int ip_queue_xmit(struct sk_buff *skb)
 {
 	struct sock *sk = skb->sk;
 	struct inet_sock *inet = inet_sk(sk);
@@ -370,7 +370,7 @@ packet_routed:
 	skb_reset_network_header(skb);
 	iph = ip_hdr(skb);
 	*((__be16 *)iph) = htons((4 << 12) | (5 << 8) | (inet->tos & 0xff));
-	if (ip_dont_fragment(sk, &rt->u.dst) && !ipfragok)
+	if (ip_dont_fragment(sk, &rt->u.dst) && !skb->local_df)
 		iph->frag_off = htons(IP_DF);
 	else
 		iph->frag_off = 0;
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index e46849989a53..2b7d71fb8439 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -890,7 +890,7 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,
 	if (after(tcb->end_seq, tp->snd_nxt) || tcb->seq == tcb->end_seq)
 		TCP_INC_STATS(sock_net(sk), TCP_MIB_OUTSEGS);
 
-	err = icsk->icsk_af_ops->queue_xmit(skb, 0);
+	err = icsk->icsk_af_ops->queue_xmit(skb);
 	if (likely(err <= 0))
 		return err;
 
diff --git a/net/ipv6/inet6_connection_sock.c b/net/ipv6/inet6_connection_sock.c
index 628db24bcf22..0c5e3c3b7fd5 100644
--- a/net/ipv6/inet6_connection_sock.c
+++ b/net/ipv6/inet6_connection_sock.c
@@ -178,7 +178,7 @@ struct dst_entry *__inet6_csk_dst_check(struct sock *sk, u32 cookie)
 	return dst;
 }
 
-int inet6_csk_xmit(struct sk_buff *skb, int ipfragok)
+int inet6_csk_xmit(struct sk_buff *skb)
 {
 	struct sock *sk = skb->sk;
 	struct inet_sock *inet = inet_sk(sk);
@@ -234,7 +234,7 @@ int inet6_csk_xmit(struct sk_buff *skb, int ipfragok)
 	/* Restore final destination back after routing done */
 	ipv6_addr_copy(&fl.fl6_dst, &np->daddr);
 
-	return ip6_xmit(sk, skb, &fl, np->opt, 0);
+	return ip6_xmit(sk, skb, &fl, np->opt);
 }
 
 EXPORT_SYMBOL_GPL(inet6_csk_xmit);
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index f3a847e3ec88..141819f0c6f1 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -185,7 +185,7 @@ int ip6_output(struct sk_buff *skb)
  */
 
 int ip6_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl,
-	     struct ipv6_txoptions *opt, int ipfragok)
+	     struct ipv6_txoptions *opt)
 {
 	struct net *net = sock_net(sk);
 	struct ipv6_pinfo *np = inet6_sk(sk);
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index b429dfdd69dc..bd5ef7b6e48e 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -509,7 +509,7 @@ static int tcp_v6_send_synack(struct sock *sk, struct request_sock *req,
 		__tcp_v6_send_check(skb, &treq->loc_addr, &treq->rmt_addr);
 
 		ipv6_addr_copy(&fl.fl6_dst, &treq->rmt_addr);
-		err = ip6_xmit(sk, skb, &fl, opt, 0);
+		err = ip6_xmit(sk, skb, &fl, opt);
 		err = net_xmit_eval(err);
 	}
 
@@ -1071,7 +1071,7 @@ static void tcp_v6_send_response(struct sk_buff *skb, u32 seq, u32 ack, u32 win,
 	if (!ip6_dst_lookup(ctl_sk, &dst, &fl)) {
 		if (xfrm_lookup(net, &dst, &fl, NULL, 0) >= 0) {
 			skb_dst_set(buff, dst);
-			ip6_xmit(ctl_sk, buff, &fl, NULL, 0);
+			ip6_xmit(ctl_sk, buff, &fl, NULL);
 			TCP_INC_STATS_BH(net, TCP_MIB_OUTSEGS);
 			if (rst)
 				TCP_INC_STATS_BH(net, TCP_MIB_OUTRSTS);
diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c
index 98dfcce1a5fc..ecc7aea9efe4 100644
--- a/net/l2tp/l2tp_core.c
+++ b/net/l2tp/l2tp_core.c
@@ -954,7 +954,8 @@ int l2tp_xmit_core(struct l2tp_session *session, struct sk_buff *skb, size_t dat
 	}
 
 	/* Queue the packet to IP for output */
-	error = ip_queue_xmit(skb, 1);
+	skb->local_df = 1;
+	error = ip_queue_xmit(skb);
 
 	/* Update stats */
 	if (error >= 0) {
diff --git a/net/l2tp/l2tp_ip.c b/net/l2tp/l2tp_ip.c
index 75bf784ba18d..0852512d392c 100644
--- a/net/l2tp/l2tp_ip.c
+++ b/net/l2tp/l2tp_ip.c
@@ -501,7 +501,7 @@ static int l2tp_ip_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *m
 	skb_dst_set(skb, dst_clone(&rt->u.dst));
 
 	/* Queue the packet to IP for output */
-	rc = ip_queue_xmit(skb, 0);
+	rc = ip_queue_xmit(skb);
 
 error:
 	/* Update stats */
diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c
index 14db5689fb89..732689140fb8 100644
--- a/net/sctp/ipv6.c
+++ b/net/sctp/ipv6.c
@@ -232,7 +232,7 @@ static int sctp_v6_xmit(struct sk_buff *skb, struct sctp_transport *transport)
 	if (!(transport->param_flags & SPP_PMTUD_ENABLE))
 		skb->local_df = 1;
 
-	return ip6_xmit(sk, skb, &fl, np->opt, 0);
+	return ip6_xmit(sk, skb, &fl, np->opt);
 }
 
 /* Returns the dst cache entry for the given source and destination ip
diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c
index a56f98e82f92..704298f4b284 100644
--- a/net/sctp/protocol.c
+++ b/net/sctp/protocol.c
@@ -854,7 +854,7 @@ static inline int sctp_v4_xmit(struct sk_buff *skb,
 			 IP_PMTUDISC_DO : IP_PMTUDISC_DONT;
 
 	SCTP_INC_STATS(SCTP_MIB_OUTSCTPPACKS);
-	return ip_queue_xmit(skb, 0);
+	return ip_queue_xmit(skb);
 }
 
 static struct sctp_af sctp_af_inet;
-- 
cgit v1.2.2


From b5d43998234331b9c01bd2165fdbb25115f4387f Mon Sep 17 00:00:00 2001
From: Shan Wei <shanwei@cn.fujitsu.com>
Date: Thu, 15 Apr 2010 16:48:48 +0000
Subject: ipv6: fix the comment of ip6_xmit()

ip6_xmit() is used by upper transport protocol.

Signed-off-by: Shan Wei <shanwei@cn.fujitsu.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/ip6_output.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index 141819f0c6f1..5129a16f482b 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -181,7 +181,7 @@ int ip6_output(struct sk_buff *skb)
 }
 
 /*
- *	xmit an sk_buff (used by TCP)
+ *	xmit an sk_buff (used by TCP, SCTP and DCCP)
  */
 
 int ip6_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl,
-- 
cgit v1.2.2


From 7834704be4777fc0ed67c4486ef8c5691078d135 Mon Sep 17 00:00:00 2001
From: Nishant Sarmukadam <nishants@marvell.com>
Date: Wed, 14 Apr 2010 22:03:02 -0700
Subject: cfg80211: Avoid sending IWEVASSOCREQIE and IWEVASSOCRESPIE events
 with NULL event body

In a scenario, where a cfg80211 driver (station mode) does not send assoc request
and assoc response IEs in cfg80211_connect_result after a successful association
to an AP, cfg80211 sends IWEVASSOCREQIE and IWEVASSOCRESPIE to the user space
application with NULL data. This can cause an issue at the event recipient.

An example of this is when cfg80211 sends IWEVASSOCREQIE and IWEVASSOCRESPIE
events with NULL event body to wpa_supplicant. The wpa_supplicant overwrites
the assoc request and assoc response IEs for this station with NULL data.
If the association is WPA/WPA2, the wpa_supplicant is not able to generate
EAPOL handshake messages, since the IEs are NULL.

With the patch, req_ie and resp_ie will be NULL by avoiding the
assignment if the driver has not sent the IEs to cfg80211. The event sending
code sends the events only if resp_ie and req_ie are not NULL. This
will ensure that the events are not sent with NULL event body.

Signed-off-by: Nishant Sarmukadam <nishants@marvell.com>
Reviewed-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/wireless/sme.c | 16 ++++++++++------
 1 file changed, 10 insertions(+), 6 deletions(-)

(limited to 'net')

diff --git a/net/wireless/sme.c b/net/wireless/sme.c
index 17465777eb47..dcd7685242f7 100644
--- a/net/wireless/sme.c
+++ b/net/wireless/sme.c
@@ -517,12 +517,16 @@ void cfg80211_connect_result(struct net_device *dev, const u8 *bssid,
 	ev->type = EVENT_CONNECT_RESULT;
 	if (bssid)
 		memcpy(ev->cr.bssid, bssid, ETH_ALEN);
-	ev->cr.req_ie = ((u8 *)ev) + sizeof(*ev);
-	ev->cr.req_ie_len = req_ie_len;
-	memcpy((void *)ev->cr.req_ie, req_ie, req_ie_len);
-	ev->cr.resp_ie = ((u8 *)ev) + sizeof(*ev) + req_ie_len;
-	ev->cr.resp_ie_len = resp_ie_len;
-	memcpy((void *)ev->cr.resp_ie, resp_ie, resp_ie_len);
+	if (req_ie_len) {
+		ev->cr.req_ie = ((u8 *)ev) + sizeof(*ev);
+		ev->cr.req_ie_len = req_ie_len;
+		memcpy((void *)ev->cr.req_ie, req_ie, req_ie_len);
+	}
+	if (resp_ie_len) {
+		ev->cr.resp_ie = ((u8 *)ev) + sizeof(*ev) + req_ie_len;
+		ev->cr.resp_ie_len = resp_ie_len;
+		memcpy((void *)ev->cr.resp_ie, resp_ie, resp_ie_len);
+	}
 	ev->cr.status = status;
 
 	spin_lock_irqsave(&wdev->event_lock, flags);
-- 
cgit v1.2.2


From 1c4f0197323254e463b642abf2c8361e2a924859 Mon Sep 17 00:00:00 2001
From: Daniel Lezcano <daniel.lezcano@free.fr>
Date: Wed, 14 Apr 2010 23:11:14 +0000
Subject: packet : remove init_net restriction

The af_packet protocol is used by Perl to do ioctls as reported by
Stephane Riviere:

"Net::RawIP relies on SIOCGIFADDR et SIOCGIFHWADDR to get the IP and MAC
addresses of the network interface."

But in a new network namespace these ioctl fail because it is disabled for
a namespace different from the init_net_ns.

These two lines should not be there as af_inet and af_packet are
namespace aware since a long time now. I suppose we forget to remove these
lines because we sent the af_packet first, before af_inet was supported.

Signed-off-by: Daniel Lezcano <daniel.lezcano@free.fr>
Reported-by: Stephane Riviere <stephane.riviere@regis-dgac.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/packet/af_packet.c | 2 --
 1 file changed, 2 deletions(-)

(limited to 'net')

diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index cc90363d7e7a..243946d4809d 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -2169,8 +2169,6 @@ static int packet_ioctl(struct socket *sock, unsigned int cmd,
 	case SIOCGIFDSTADDR:
 	case SIOCSIFDSTADDR:
 	case SIOCSIFFLAGS:
-		if (!net_eq(sock_net(sk), &init_net))
-			return -ENOIOCTLCMD;
 		return inet_dgram_ops.ioctl(sock, cmd, arg);
 #endif
 
-- 
cgit v1.2.2


From fec5e652e58fa6017b2c9e06466cb2a6538de5b4 Mon Sep 17 00:00:00 2001
From: Tom Herbert <therbert@google.com>
Date: Fri, 16 Apr 2010 16:01:27 -0700
Subject: rfs: Receive Flow Steering

This patch implements receive flow steering (RFS).  RFS steers
received packets for layer 3 and 4 processing to the CPU where
the application for the corresponding flow is running.  RFS is an
extension of Receive Packet Steering (RPS).

The basic idea of RFS is that when an application calls recvmsg
(or sendmsg) the application's running CPU is stored in a hash
table that is indexed by the connection's rxhash which is stored in
the socket structure.  The rxhash is passed in skb's received on
the connection from netif_receive_skb.  For each received packet,
the associated rxhash is used to look up the CPU in the hash table,
if a valid CPU is set then the packet is steered to that CPU using
the RPS mechanisms.

The convolution of the simple approach is that it would potentially
allow OOO packets.  If threads are thrashing around CPUs or multiple
threads are trying to read from the same sockets, a quickly changing
CPU value in the hash table could cause rampant OOO packets--
we consider this a non-starter.

To avoid OOO packets, this solution implements two types of hash
tables: rps_sock_flow_table and rps_dev_flow_table.

rps_sock_table is a global hash table.  Each entry is just a CPU
number and it is populated in recvmsg and sendmsg as described above.
This table contains the "desired" CPUs for flows.

rps_dev_flow_table is specific to each device queue.  Each entry
contains a CPU and a tail queue counter.  The CPU is the "current"
CPU for a matching flow.  The tail queue counter holds the value
of a tail queue counter for the associated CPU's backlog queue at
the time of last enqueue for a flow matching the entry.

Each backlog queue has a queue head counter which is incremented
on dequeue, and so a queue tail counter is computed as queue head
count + queue length.  When a packet is enqueued on a backlog queue,
the current value of the queue tail counter is saved in the hash
entry of the rps_dev_flow_table.

And now the trick: when selecting the CPU for RPS (get_rps_cpu)
the rps_sock_flow table and the rps_dev_flow table for the RX queue
are consulted.  When the desired CPU for the flow (found in the
rps_sock_flow table) does not match the current CPU (found in the
rps_dev_flow table), the current CPU is changed to the desired CPU
if one of the following is true:

- The current CPU is unset (equal to RPS_NO_CPU)
- Current CPU is offline
- The current CPU's queue head counter >= queue tail counter in the
rps_dev_flow table.  This checks if the queue tail has advanced
beyond the last packet that was enqueued using this table entry.
This guarantees that all packets queued using this entry have been
dequeued, thus preserving in order delivery.

Making each queue have its own rps_dev_flow table has two advantages:
1) the tail queue counters will be written on each receive, so
keeping the table local to interrupting CPU s good for locality.  2)
this allows lockless access to the table-- the CPU number and queue
tail counter need to be accessed together under mutual exclusion
from netif_receive_skb, we assume that this is only called from
device napi_poll which is non-reentrant.

This patch implements RFS for TCP and connected UDP sockets.
It should be usable for other flow oriented protocols.

There are two configuration parameters for RFS.  The
"rps_flow_entries" kernel init parameter sets the number of
entries in the rps_sock_flow_table, the per rxqueue sysfs entry
"rps_flow_cnt" contains the number of entries in the rps_dev_flow
table for the rxqueue.  Both are rounded to power of two.

The obvious benefit of RFS (over just RPS) is that it achieves
CPU locality between the receive processing for a flow and the
applications processing; this can result in increased performance
(higher pps, lower latency).

The benefits of RFS are dependent on cache hierarchy, application
load, and other factors.  On simple benchmarks, we don't necessarily
see improvement and sometimes see degradation.  However, for more
complex benchmarks and for applications where cache pressure is
much higher this technique seems to perform very well.

Below are some benchmark results which show the potential benfit of
this patch.  The netperf test has 500 instances of netperf TCP_RR
test with 1 byte req. and resp.  The RPC test is an request/response
test similar in structure to netperf RR test ith 100 threads on
each host, but does more work in userspace that netperf.

e1000e on 8 core Intel
   No RFS or RPS		104K tps at 30% CPU
   No RFS (best RPS config):    290K tps at 63% CPU
   RFS				303K tps at 61% CPU

RPC test	tps	CPU%	50/90/99% usec latency	Latency StdDev
  No RFS/RPS	103K	48%	757/900/3185		4472.35
  RPS only:	174K	73%	415/993/2468		491.66
  RFS		223K	73%	379/651/1382		315.61

Signed-off-by: Tom Herbert <therbert@google.com>
Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/dev.c             | 111 +++++++++++++++++++++++++++++++++++++--------
 net/core/net-sysfs.c       |  94 ++++++++++++++++++++++++++++++++++++--
 net/core/sysctl_net_core.c |  68 +++++++++++++++++++++++++++
 net/ipv4/af_inet.c         |  29 ++++++++++--
 net/ipv4/tcp_ipv4.c        |   2 +
 net/ipv4/udp.c             |   7 ++-
 6 files changed, 283 insertions(+), 28 deletions(-)

(limited to 'net')

diff --git a/net/core/dev.c b/net/core/dev.c
index e8041eb76ac1..d7107ac835fa 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -2203,19 +2203,28 @@ int weight_p __read_mostly = 64;            /* old backlog weight */
 DEFINE_PER_CPU(struct netif_rx_stats, netdev_rx_stat) = { 0, };
 
 #ifdef CONFIG_RPS
+
+/* One global table that all flow-based protocols share. */
+struct rps_sock_flow_table *rps_sock_flow_table;
+EXPORT_SYMBOL(rps_sock_flow_table);
+
 /*
  * get_rps_cpu is called from netif_receive_skb and returns the target
  * CPU from the RPS map of the receiving queue for a given skb.
  * rcu_read_lock must be held on entry.
  */
-static int get_rps_cpu(struct net_device *dev, struct sk_buff *skb)
+static int get_rps_cpu(struct net_device *dev, struct sk_buff *skb,
+		       struct rps_dev_flow **rflowp)
 {
 	struct ipv6hdr *ip6;
 	struct iphdr *ip;
 	struct netdev_rx_queue *rxqueue;
 	struct rps_map *map;
+	struct rps_dev_flow_table *flow_table;
+	struct rps_sock_flow_table *sock_flow_table;
 	int cpu = -1;
 	u8 ip_proto;
+	u16 tcpu;
 	u32 addr1, addr2, ports, ihl;
 
 	if (skb_rx_queue_recorded(skb)) {
@@ -2232,7 +2241,7 @@ static int get_rps_cpu(struct net_device *dev, struct sk_buff *skb)
 	} else
 		rxqueue = dev->_rx;
 
-	if (!rxqueue->rps_map)
+	if (!rxqueue->rps_map && !rxqueue->rps_flow_table)
 		goto done;
 
 	if (skb->rxhash)
@@ -2284,9 +2293,48 @@ static int get_rps_cpu(struct net_device *dev, struct sk_buff *skb)
 		skb->rxhash = 1;
 
 got_hash:
+	flow_table = rcu_dereference(rxqueue->rps_flow_table);
+	sock_flow_table = rcu_dereference(rps_sock_flow_table);
+	if (flow_table && sock_flow_table) {
+		u16 next_cpu;
+		struct rps_dev_flow *rflow;
+
+		rflow = &flow_table->flows[skb->rxhash & flow_table->mask];
+		tcpu = rflow->cpu;
+
+		next_cpu = sock_flow_table->ents[skb->rxhash &
+		    sock_flow_table->mask];
+
+		/*
+		 * If the desired CPU (where last recvmsg was done) is
+		 * different from current CPU (one in the rx-queue flow
+		 * table entry), switch if one of the following holds:
+		 *   - Current CPU is unset (equal to RPS_NO_CPU).
+		 *   - Current CPU is offline.
+		 *   - The current CPU's queue tail has advanced beyond the
+		 *     last packet that was enqueued using this table entry.
+		 *     This guarantees that all previous packets for the flow
+		 *     have been dequeued, thus preserving in order delivery.
+		 */
+		if (unlikely(tcpu != next_cpu) &&
+		    (tcpu == RPS_NO_CPU || !cpu_online(tcpu) ||
+		     ((int)(per_cpu(softnet_data, tcpu).input_queue_head -
+		      rflow->last_qtail)) >= 0)) {
+			tcpu = rflow->cpu = next_cpu;
+			if (tcpu != RPS_NO_CPU)
+				rflow->last_qtail = per_cpu(softnet_data,
+				    tcpu).input_queue_head;
+		}
+		if (tcpu != RPS_NO_CPU && cpu_online(tcpu)) {
+			*rflowp = rflow;
+			cpu = tcpu;
+			goto done;
+		}
+	}
+
 	map = rcu_dereference(rxqueue->rps_map);
 	if (map) {
-		u16 tcpu = map->cpus[((u64) skb->rxhash * map->len) >> 32];
+		tcpu = map->cpus[((u64) skb->rxhash * map->len) >> 32];
 
 		if (cpu_online(tcpu)) {
 			cpu = tcpu;
@@ -2320,13 +2368,14 @@ static void trigger_softirq(void *data)
 	__napi_schedule(&queue->backlog);
 	__get_cpu_var(netdev_rx_stat).received_rps++;
 }
-#endif /* CONFIG_SMP */
+#endif /* CONFIG_RPS */
 
 /*
  * enqueue_to_backlog is called to queue an skb to a per CPU backlog
  * queue (may be a remote CPU queue).
  */
-static int enqueue_to_backlog(struct sk_buff *skb, int cpu)
+static int enqueue_to_backlog(struct sk_buff *skb, int cpu,
+			      unsigned int *qtail)
 {
 	struct softnet_data *queue;
 	unsigned long flags;
@@ -2341,6 +2390,10 @@ static int enqueue_to_backlog(struct sk_buff *skb, int cpu)
 		if (queue->input_pkt_queue.qlen) {
 enqueue:
 			__skb_queue_tail(&queue->input_pkt_queue, skb);
+#ifdef CONFIG_RPS
+			*qtail = queue->input_queue_head +
+			    queue->input_pkt_queue.qlen;
+#endif
 			rps_unlock(queue);
 			local_irq_restore(flags);
 			return NET_RX_SUCCESS;
@@ -2355,11 +2408,10 @@ enqueue:
 
 				cpu_set(cpu, rcpus->mask[rcpus->select]);
 				__raise_softirq_irqoff(NET_RX_SOFTIRQ);
-			} else
-				__napi_schedule(&queue->backlog);
-#else
-			__napi_schedule(&queue->backlog);
+				goto enqueue;
+			}
 #endif
+			__napi_schedule(&queue->backlog);
 		}
 		goto enqueue;
 	}
@@ -2401,18 +2453,25 @@ int netif_rx(struct sk_buff *skb)
 
 #ifdef CONFIG_RPS
 	{
+		struct rps_dev_flow voidflow, *rflow = &voidflow;
 		int cpu;
 
 		rcu_read_lock();
-		cpu = get_rps_cpu(skb->dev, skb);
+
+		cpu = get_rps_cpu(skb->dev, skb, &rflow);
 		if (cpu < 0)
 			cpu = smp_processor_id();
-		ret = enqueue_to_backlog(skb, cpu);
+
+		ret = enqueue_to_backlog(skb, cpu, &rflow->last_qtail);
+
 		rcu_read_unlock();
 	}
 #else
-	ret = enqueue_to_backlog(skb, get_cpu());
-	put_cpu();
+	{
+		unsigned int qtail;
+		ret = enqueue_to_backlog(skb, get_cpu(), &qtail);
+		put_cpu();
+	}
 #endif
 	return ret;
 }
@@ -2830,14 +2889,22 @@ out:
 int netif_receive_skb(struct sk_buff *skb)
 {
 #ifdef CONFIG_RPS
-	int cpu;
+	struct rps_dev_flow voidflow, *rflow = &voidflow;
+	int cpu, ret;
+
+	rcu_read_lock();
 
-	cpu = get_rps_cpu(skb->dev, skb);
+	cpu = get_rps_cpu(skb->dev, skb, &rflow);
 
-	if (cpu < 0)
-		return __netif_receive_skb(skb);
-	else
-		return enqueue_to_backlog(skb, cpu);
+	if (cpu >= 0) {
+		ret = enqueue_to_backlog(skb, cpu, &rflow->last_qtail);
+		rcu_read_unlock();
+	} else {
+		rcu_read_unlock();
+		ret = __netif_receive_skb(skb);
+	}
+
+	return ret;
 #else
 	return __netif_receive_skb(skb);
 #endif
@@ -2856,6 +2923,7 @@ static void flush_backlog(void *arg)
 		if (skb->dev == dev) {
 			__skb_unlink(skb, &queue->input_pkt_queue);
 			kfree_skb(skb);
+			incr_input_queue_head(queue);
 		}
 	rps_unlock(queue);
 }
@@ -3179,6 +3247,7 @@ static int process_backlog(struct napi_struct *napi, int quota)
 			local_irq_enable();
 			break;
 		}
+		incr_input_queue_head(queue);
 		rps_unlock(queue);
 		local_irq_enable();
 
@@ -5542,8 +5611,10 @@ static int dev_cpu_callback(struct notifier_block *nfb,
 	local_irq_enable();
 
 	/* Process offline CPU's input_pkt_queue */
-	while ((skb = __skb_dequeue(&oldsd->input_pkt_queue)))
+	while ((skb = __skb_dequeue(&oldsd->input_pkt_queue))) {
 		netif_rx(skb);
+		incr_input_queue_head(oldsd);
+	}
 
 	return NOTIFY_OK;
 }
diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c
index 96ed6905b823..143052a22b9b 100644
--- a/net/core/net-sysfs.c
+++ b/net/core/net-sysfs.c
@@ -17,6 +17,7 @@
 #include <net/sock.h>
 #include <linux/rtnetlink.h>
 #include <linux/wireless.h>
+#include <linux/vmalloc.h>
 #include <net/wext.h>
 
 #include "net-sysfs.h"
@@ -601,22 +602,109 @@ ssize_t store_rps_map(struct netdev_rx_queue *queue,
 	return len;
 }
 
+static ssize_t show_rps_dev_flow_table_cnt(struct netdev_rx_queue *queue,
+					   struct rx_queue_attribute *attr,
+					   char *buf)
+{
+	struct rps_dev_flow_table *flow_table;
+	unsigned int val = 0;
+
+	rcu_read_lock();
+	flow_table = rcu_dereference(queue->rps_flow_table);
+	if (flow_table)
+		val = flow_table->mask + 1;
+	rcu_read_unlock();
+
+	return sprintf(buf, "%u\n", val);
+}
+
+static void rps_dev_flow_table_release_work(struct work_struct *work)
+{
+	struct rps_dev_flow_table *table = container_of(work,
+	    struct rps_dev_flow_table, free_work);
+
+	vfree(table);
+}
+
+static void rps_dev_flow_table_release(struct rcu_head *rcu)
+{
+	struct rps_dev_flow_table *table = container_of(rcu,
+	    struct rps_dev_flow_table, rcu);
+
+	INIT_WORK(&table->free_work, rps_dev_flow_table_release_work);
+	schedule_work(&table->free_work);
+}
+
+ssize_t store_rps_dev_flow_table_cnt(struct netdev_rx_queue *queue,
+				     struct rx_queue_attribute *attr,
+				     const char *buf, size_t len)
+{
+	unsigned int count;
+	char *endp;
+	struct rps_dev_flow_table *table, *old_table;
+	static DEFINE_SPINLOCK(rps_dev_flow_lock);
+
+	if (!capable(CAP_NET_ADMIN))
+		return -EPERM;
+
+	count = simple_strtoul(buf, &endp, 0);
+	if (endp == buf)
+		return -EINVAL;
+
+	if (count) {
+		int i;
+
+		if (count > 1<<30) {
+			/* Enforce a limit to prevent overflow */
+			return -EINVAL;
+		}
+		count = roundup_pow_of_two(count);
+		table = vmalloc(RPS_DEV_FLOW_TABLE_SIZE(count));
+		if (!table)
+			return -ENOMEM;
+
+		table->mask = count - 1;
+		for (i = 0; i < count; i++)
+			table->flows[i].cpu = RPS_NO_CPU;
+	} else
+		table = NULL;
+
+	spin_lock(&rps_dev_flow_lock);
+	old_table = queue->rps_flow_table;
+	rcu_assign_pointer(queue->rps_flow_table, table);
+	spin_unlock(&rps_dev_flow_lock);
+
+	if (old_table)
+		call_rcu(&old_table->rcu, rps_dev_flow_table_release);
+
+	return len;
+}
+
 static struct rx_queue_attribute rps_cpus_attribute =
 	__ATTR(rps_cpus, S_IRUGO | S_IWUSR, show_rps_map, store_rps_map);
 
+
+static struct rx_queue_attribute rps_dev_flow_table_cnt_attribute =
+	__ATTR(rps_flow_cnt, S_IRUGO | S_IWUSR,
+	    show_rps_dev_flow_table_cnt, store_rps_dev_flow_table_cnt);
+
 static struct attribute *rx_queue_default_attrs[] = {
 	&rps_cpus_attribute.attr,
+	&rps_dev_flow_table_cnt_attribute.attr,
 	NULL
 };
 
 static void rx_queue_release(struct kobject *kobj)
 {
 	struct netdev_rx_queue *queue = to_rx_queue(kobj);
-	struct rps_map *map = queue->rps_map;
 	struct netdev_rx_queue *first = queue->first;
 
-	if (map)
-		call_rcu(&map->rcu, rps_map_release);
+	if (queue->rps_map)
+		call_rcu(&queue->rps_map->rcu, rps_map_release);
+
+	if (queue->rps_flow_table)
+		call_rcu(&queue->rps_flow_table->rcu,
+		    rps_dev_flow_table_release);
 
 	if (atomic_dec_and_test(&first->count))
 		kfree(first);
diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c
index b7b6b8208f75..dcc7d25996ab 100644
--- a/net/core/sysctl_net_core.c
+++ b/net/core/sysctl_net_core.c
@@ -11,12 +11,72 @@
 #include <linux/socket.h>
 #include <linux/netdevice.h>
 #include <linux/ratelimit.h>
+#include <linux/vmalloc.h>
 #include <linux/init.h>
 #include <linux/slab.h>
 
 #include <net/ip.h>
 #include <net/sock.h>
 
+#ifdef CONFIG_RPS
+static int rps_sock_flow_sysctl(ctl_table *table, int write,
+				void __user *buffer, size_t *lenp, loff_t *ppos)
+{
+	unsigned int orig_size, size;
+	int ret, i;
+	ctl_table tmp = {
+		.data = &size,
+		.maxlen = sizeof(size),
+		.mode = table->mode
+	};
+	struct rps_sock_flow_table *orig_sock_table, *sock_table;
+	static DEFINE_MUTEX(sock_flow_mutex);
+
+	mutex_lock(&sock_flow_mutex);
+
+	orig_sock_table = rps_sock_flow_table;
+	size = orig_size = orig_sock_table ? orig_sock_table->mask + 1 : 0;
+
+	ret = proc_dointvec(&tmp, write, buffer, lenp, ppos);
+
+	if (write) {
+		if (size) {
+			if (size > 1<<30) {
+				/* Enforce limit to prevent overflow */
+				mutex_unlock(&sock_flow_mutex);
+				return -EINVAL;
+			}
+			size = roundup_pow_of_two(size);
+			if (size != orig_size) {
+				sock_table =
+				    vmalloc(RPS_SOCK_FLOW_TABLE_SIZE(size));
+				if (!sock_table) {
+					mutex_unlock(&sock_flow_mutex);
+					return -ENOMEM;
+				}
+
+				sock_table->mask = size - 1;
+			} else
+				sock_table = orig_sock_table;
+
+			for (i = 0; i < size; i++)
+				sock_table->ents[i] = RPS_NO_CPU;
+		} else
+			sock_table = NULL;
+
+		if (sock_table != orig_sock_table) {
+			rcu_assign_pointer(rps_sock_flow_table, sock_table);
+			synchronize_rcu();
+			vfree(orig_sock_table);
+		}
+	}
+
+	mutex_unlock(&sock_flow_mutex);
+
+	return ret;
+}
+#endif /* CONFIG_RPS */
+
 static struct ctl_table net_core_table[] = {
 #ifdef CONFIG_NET
 	{
@@ -82,6 +142,14 @@ static struct ctl_table net_core_table[] = {
 		.mode		= 0644,
 		.proc_handler	= proc_dointvec
 	},
+#ifdef CONFIG_RPS
+	{
+		.procname	= "rps_sock_flow_entries",
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= rps_sock_flow_sysctl
+	},
+#endif
 #endif /* CONFIG_NET */
 	{
 		.procname	= "netdev_budget",
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index 193dcd6ed64f..c5376c725503 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -419,6 +419,8 @@ int inet_release(struct socket *sock)
 	if (sk) {
 		long timeout;
 
+		inet_rps_reset_flow(sk);
+
 		/* Applications forget to leave groups before exiting */
 		ip_mc_drop_socket(sk);
 
@@ -720,6 +722,8 @@ int inet_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg,
 {
 	struct sock *sk = sock->sk;
 
+	inet_rps_record_flow(sk);
+
 	/* We may need to bind the socket. */
 	if (!inet_sk(sk)->inet_num && inet_autobind(sk))
 		return -EAGAIN;
@@ -728,12 +732,13 @@ int inet_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg,
 }
 EXPORT_SYMBOL(inet_sendmsg);
 
-
 static ssize_t inet_sendpage(struct socket *sock, struct page *page, int offset,
 			     size_t size, int flags)
 {
 	struct sock *sk = sock->sk;
 
+	inet_rps_record_flow(sk);
+
 	/* We may need to bind the socket. */
 	if (!inet_sk(sk)->inet_num && inet_autobind(sk))
 		return -EAGAIN;
@@ -743,6 +748,22 @@ static ssize_t inet_sendpage(struct socket *sock, struct page *page, int offset,
 	return sock_no_sendpage(sock, page, offset, size, flags);
 }
 
+int inet_recvmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg,
+		 size_t size, int flags)
+{
+	struct sock *sk = sock->sk;
+	int addr_len = 0;
+	int err;
+
+	inet_rps_record_flow(sk);
+
+	err = sk->sk_prot->recvmsg(iocb, sk, msg, size, flags & MSG_DONTWAIT,
+				   flags & ~MSG_DONTWAIT, &addr_len);
+	if (err >= 0)
+		msg->msg_namelen = addr_len;
+	return err;
+}
+EXPORT_SYMBOL(inet_recvmsg);
 
 int inet_shutdown(struct socket *sock, int how)
 {
@@ -872,7 +893,7 @@ const struct proto_ops inet_stream_ops = {
 	.setsockopt	   = sock_common_setsockopt,
 	.getsockopt	   = sock_common_getsockopt,
 	.sendmsg	   = tcp_sendmsg,
-	.recvmsg	   = sock_common_recvmsg,
+	.recvmsg	   = inet_recvmsg,
 	.mmap		   = sock_no_mmap,
 	.sendpage	   = tcp_sendpage,
 	.splice_read	   = tcp_splice_read,
@@ -899,7 +920,7 @@ const struct proto_ops inet_dgram_ops = {
 	.setsockopt	   = sock_common_setsockopt,
 	.getsockopt	   = sock_common_getsockopt,
 	.sendmsg	   = inet_sendmsg,
-	.recvmsg	   = sock_common_recvmsg,
+	.recvmsg	   = inet_recvmsg,
 	.mmap		   = sock_no_mmap,
 	.sendpage	   = inet_sendpage,
 #ifdef CONFIG_COMPAT
@@ -929,7 +950,7 @@ static const struct proto_ops inet_sockraw_ops = {
 	.setsockopt	   = sock_common_setsockopt,
 	.getsockopt	   = sock_common_getsockopt,
 	.sendmsg	   = inet_sendmsg,
-	.recvmsg	   = sock_common_recvmsg,
+	.recvmsg	   = inet_recvmsg,
 	.mmap		   = sock_no_mmap,
 	.sendpage	   = inet_sendpage,
 #ifdef CONFIG_COMPAT
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index a24995cdc4b6..ad08392a738c 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -1672,6 +1672,8 @@ process:
 
 	skb->dev = NULL;
 
+	inet_rps_save_rxhash(sk, skb->rxhash);
+
 	bh_lock_sock_nested(sk);
 	ret = 0;
 	if (!sock_owned_by_user(sk)) {
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 8fef859db35d..666b963496ff 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -1217,6 +1217,7 @@ int udp_disconnect(struct sock *sk, int flags)
 	sk->sk_state = TCP_CLOSE;
 	inet->inet_daddr = 0;
 	inet->inet_dport = 0;
+	inet_rps_save_rxhash(sk, 0);
 	sk->sk_bound_dev_if = 0;
 	if (!(sk->sk_userlocks & SOCK_BINDADDR_LOCK))
 		inet_reset_saddr(sk);
@@ -1258,8 +1259,12 @@ EXPORT_SYMBOL(udp_lib_unhash);
 
 static int __udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
 {
-	int rc = sock_queue_rcv_skb(sk, skb);
+	int rc;
+
+	if (inet_sk(sk)->inet_daddr)
+		inet_rps_save_rxhash(sk, skb->rxhash);
 
+	rc = sock_queue_rcv_skb(sk, skb);
 	if (rc < 0) {
 		int is_udplite = IS_UDPLITE(sk);
 
-- 
cgit v1.2.2


From 8770acf0494ae06de6abd34f951a436f8f15d1de Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Sat, 17 Apr 2010 00:54:36 -0700
Subject: rps: rps_sock_flow_table is mostly read

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/dev.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/core/dev.c b/net/core/dev.c
index d7107ac835fa..7abf9590e3c5 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -2205,7 +2205,7 @@ DEFINE_PER_CPU(struct netif_rx_stats, netdev_rx_stat) = { 0, };
 #ifdef CONFIG_RPS
 
 /* One global table that all flow-based protocols share. */
-struct rps_sock_flow_table *rps_sock_flow_table;
+struct rps_sock_flow_table *rps_sock_flow_table __read_mostly;
 EXPORT_SYMBOL(rps_sock_flow_table);
 
 /*
-- 
cgit v1.2.2


From 9958da0501fced47c1ac5c5a3a7731c87e45472c Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Sat, 17 Apr 2010 04:17:02 +0000
Subject: net: remove time limit in process_backlog()

- There is no point to enforce a time limit in process_backlog(), since
other napi instances dont follow same rule. We can exit after only one
packet processed...
The normal quota of 64 packets per napi instance should be the norm, and
net_rx_action() already has its own time limit.
Note : /proc/net/core/dev_weight can be used to tune this 64 default
value.

- Use DEFINE_PER_CPU_ALIGNED for softnet_data definition.

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/dev.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

(limited to 'net')

diff --git a/net/core/dev.c b/net/core/dev.c
index 7abf9590e3c5..8092f01713fb 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -264,7 +264,7 @@ static RAW_NOTIFIER_HEAD(netdev_chain);
  *	queue in the local softnet handler.
  */
 
-DEFINE_PER_CPU(struct softnet_data, softnet_data);
+DEFINE_PER_CPU_ALIGNED(struct softnet_data, softnet_data);
 EXPORT_PER_CPU_SYMBOL(softnet_data);
 
 #ifdef CONFIG_LOCKDEP
@@ -3232,7 +3232,6 @@ static int process_backlog(struct napi_struct *napi, int quota)
 {
 	int work = 0;
 	struct softnet_data *queue = &__get_cpu_var(softnet_data);
-	unsigned long start_time = jiffies;
 
 	napi->weight = weight_p;
 	do {
@@ -3252,7 +3251,7 @@ static int process_backlog(struct napi_struct *napi, int quota)
 		local_irq_enable();
 
 		__netif_receive_skb(skb);
-	} while (++work < quota && jiffies == start_time);
+	} while (++work < quota);
 
 	return work;
 }
-- 
cgit v1.2.2


From fc6055a5ba31e2c14e36e8939f9bf2b6d586a7f5 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Fri, 16 Apr 2010 12:18:22 +0000
Subject: net: Introduce skb_orphan_try()

Transmitted skb might be attached to a socket and a destructor, for
memory accounting purposes.

Traditionally, this destructor is called at tx completion time, when skb
is freed.

When tx completion is performed by another cpu than the sender, this
forces some cache lines to change ownership. XPS was an attempt to give
tx completion to initial cpu.

David idea is to call destructor right before giving skb to device (call
to ndo_start_xmit()). Because device queues are usually small, orphaning
skb before tx completion is not a big deal. Some drivers already do
this, we could do it in upper level.

There is one known exception to this early orphaning, called tx
timestamping. It needs to keep a reference to socket until device can
give a hardware or software timestamp.

This patch adds a skb_orphan_try() helper, to centralize all exceptions
to early orphaning in one spot, and use it in dev_hard_start_xmit().

"tbench 16" results on a Nehalem machine (2 X5570  @ 2.93GHz)
before: Throughput 4428.9 MB/sec 16 procs
after: Throughput 4448.14 MB/sec 16 procs

UDP should get even better results, its destructor being more complex,
since SOCK_USE_WRITE_QUEUE is not set (four atomic ops instead of one)

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/dev.c | 27 +++++++++++++--------------
 1 file changed, 13 insertions(+), 14 deletions(-)

(limited to 'net')

diff --git a/net/core/dev.c b/net/core/dev.c
index 8092f01713fb..8eb50e2292fb 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -1880,6 +1880,17 @@ static int dev_gso_segment(struct sk_buff *skb)
 	return 0;
 }
 
+/*
+ * Try to orphan skb early, right before transmission by the device.
+ * We cannot orphan skb if tx timestamp is requested, since
+ * drivers need to call skb_tstamp_tx() to send the timestamp.
+ */
+static inline void skb_orphan_try(struct sk_buff *skb)
+{
+	if (!skb_tx(skb)->flags)
+		skb_orphan(skb);
+}
+
 int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev,
 			struct netdev_queue *txq)
 {
@@ -1904,23 +1915,10 @@ int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev,
 		if (dev->priv_flags & IFF_XMIT_DST_RELEASE)
 			skb_dst_drop(skb);
 
+		skb_orphan_try(skb);
 		rc = ops->ndo_start_xmit(skb, dev);
 		if (rc == NETDEV_TX_OK)
 			txq_trans_update(txq);
-		/*
-		 * TODO: if skb_orphan() was called by
-		 * dev->hard_start_xmit() (for example, the unmodified
-		 * igb driver does that; bnx2 doesn't), then
-		 * skb_tx_software_timestamp() will be unable to send
-		 * back the time stamp.
-		 *
-		 * How can this be prevented? Always create another
-		 * reference to the socket before calling
-		 * dev->hard_start_xmit()? Prevent that skb_orphan()
-		 * does anything in dev->hard_start_xmit() by clearing
-		 * the skb destructor before the call and restoring it
-		 * afterwards, then doing the skb_orphan() ourselves?
-		 */
 		return rc;
 	}
 
@@ -1938,6 +1936,7 @@ gso:
 		if (dev->priv_flags & IFF_XMIT_DST_RELEASE)
 			skb_dst_drop(nskb);
 
+		skb_orphan_try(nskb);
 		rc = ops->ndo_start_xmit(nskb, dev);
 		if (unlikely(rc != NETDEV_TX_OK)) {
 			if (rc & ~NETDEV_TX_MASK)
-- 
cgit v1.2.2


From e281b19897dc21c1071802808d461627d747a877 Mon Sep 17 00:00:00 2001
From: Jan Engelhardt <jengelh@medozas.de>
Date: Mon, 19 Apr 2010 14:17:47 +0200
Subject: netfilter: xtables: inclusion of xt_TEE

xt_TEE can be used to clone and reroute a packet. This can for
example be used to copy traffic at a router for logging purposes
to another dedicated machine.

References: http://www.gossamer-threads.com/lists/iptables/devel/68781
Signed-off-by: Jan Engelhardt <jengelh@medozas.de>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 net/ipv4/ip_output.c   |   1 +
 net/ipv6/ip6_output.c  |   1 +
 net/netfilter/Kconfig  |   7 ++
 net/netfilter/Makefile |   1 +
 net/netfilter/xt_TEE.c | 256 +++++++++++++++++++++++++++++++++++++++++++++++++
 5 files changed, 266 insertions(+)
 create mode 100644 net/netfilter/xt_TEE.c

(limited to 'net')

diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index f09135e1e14f..0abfddec1e26 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -309,6 +309,7 @@ int ip_output(struct sk_buff *skb)
 			    ip_finish_output,
 			    !(IPCB(skb)->flags & IPSKB_REROUTED));
 }
+EXPORT_SYMBOL_GPL(ip_output);
 
 int ip_queue_xmit(struct sk_buff *skb, int ipfragok)
 {
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index c10a38a71a5e..d09be7ff8735 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -176,6 +176,7 @@ int ip6_output(struct sk_buff *skb)
 			    ip6_finish_output,
 			    !(IP6CB(skb)->flags & IP6SKB_REROUTED));
 }
+EXPORT_SYMBOL_GPL(ip6_output);
 
 /*
  *	xmit an sk_buff (used by TCP)
diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig
index 8055786b7702..673a6c8f0e95 100644
--- a/net/netfilter/Kconfig
+++ b/net/netfilter/Kconfig
@@ -502,6 +502,13 @@ config NETFILTER_XT_TARGET_RATEEST
 
 	  To compile it as a module, choose M here.  If unsure, say N.
 
+config NETFILTER_XT_TARGET_TEE
+	tristate '"TEE" - packet cloning to alternate destiantion'
+	depends on NETFILTER_ADVANCED
+	---help---
+	This option adds a "TEE" target with which a packet can be cloned and
+	this clone be rerouted to another nexthop.
+
 config NETFILTER_XT_TARGET_TPROXY
 	tristate '"TPROXY" target support (EXPERIMENTAL)'
 	depends on EXPERIMENTAL
diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile
index cd31afe0692a..14e3a8fd8180 100644
--- a/net/netfilter/Makefile
+++ b/net/netfilter/Makefile
@@ -59,6 +59,7 @@ obj-$(CONFIG_NETFILTER_XT_TARGET_SECMARK) += xt_SECMARK.o
 obj-$(CONFIG_NETFILTER_XT_TARGET_TPROXY) += xt_TPROXY.o
 obj-$(CONFIG_NETFILTER_XT_TARGET_TCPMSS) += xt_TCPMSS.o
 obj-$(CONFIG_NETFILTER_XT_TARGET_TCPOPTSTRIP) += xt_TCPOPTSTRIP.o
+obj-$(CONFIG_NETFILTER_XT_TARGET_TEE) += xt_TEE.o
 obj-$(CONFIG_NETFILTER_XT_TARGET_TRACE) += xt_TRACE.o
 
 # matches
diff --git a/net/netfilter/xt_TEE.c b/net/netfilter/xt_TEE.c
new file mode 100644
index 000000000000..b3d730163f12
--- /dev/null
+++ b/net/netfilter/xt_TEE.c
@@ -0,0 +1,256 @@
+/*
+ *	"TEE" target extension for Xtables
+ *	Copyright © Sebastian Claßen, 2007
+ *	Jan Engelhardt, 2007-2010
+ *
+ *	based on ipt_ROUTE.c from Cédric de Launois
+ *	<delaunois@info.ucl.be>
+ *
+ *	This program is free software; you can redistribute it and/or
+ *	modify it under the terms of the GNU General Public License
+ *	version 2 or later, as published by the Free Software Foundation.
+ */
+#include <linux/ip.h>
+#include <linux/module.h>
+#include <linux/route.h>
+#include <linux/skbuff.h>
+#include <net/checksum.h>
+#include <net/icmp.h>
+#include <net/ip.h>
+#include <net/ipv6.h>
+#include <net/ip6_route.h>
+#include <net/route.h>
+#include <linux/netfilter/x_tables.h>
+#include <linux/netfilter/xt_TEE.h>
+
+#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
+#	define WITH_CONNTRACK 1
+#	include <net/netfilter/nf_conntrack.h>
+#endif
+#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+#	define WITH_IPV6 1
+#endif
+
+static const union nf_inet_addr tee_zero_address;
+
+static struct net *pick_net(struct sk_buff *skb)
+{
+#ifdef CONFIG_NET_NS
+	const struct dst_entry *dst;
+
+	if (skb->dev != NULL)
+		return dev_net(skb->dev);
+	dst = skb_dst(skb);
+	if (dst != NULL && dst->dev != NULL)
+		return dev_net(dst->dev);
+#endif
+	return &init_net;
+}
+
+static bool tee_tg_route_oif(struct flowi *f, struct net *net,
+			     const struct xt_tee_tginfo *info)
+{
+	const struct net_device *dev;
+
+	if (*info->oif != '\0')
+		return true;
+	dev = dev_get_by_name(net, info->oif);
+	if (dev == NULL)
+		return false;
+	f->oif = dev->ifindex;
+	return true;
+}
+
+static bool
+tee_tg_route4(struct sk_buff *skb, const struct xt_tee_tginfo *info)
+{
+	const struct iphdr *iph = ip_hdr(skb);
+	struct net *net = pick_net(skb);
+	struct rtable *rt;
+	struct flowi fl;
+
+	memset(&fl, 0, sizeof(fl));
+	if (!tee_tg_route_oif(&fl, net, info))
+		return false;
+	fl.nl_u.ip4_u.daddr = info->gw.ip;
+	fl.nl_u.ip4_u.tos   = RT_TOS(iph->tos);
+	fl.nl_u.ip4_u.scope = RT_SCOPE_UNIVERSE;
+	if (ip_route_output_key(net, &rt, &fl) != 0)
+		return false;
+
+	dst_release(skb_dst(skb));
+	skb_dst_set(skb, &rt->u.dst);
+	skb->dev      = rt->u.dst.dev;
+	skb->protocol = htons(ETH_P_IP);
+	return true;
+}
+
+static unsigned int
+tee_tg4(struct sk_buff *skb, const struct xt_target_param *par)
+{
+	const struct xt_tee_tginfo *info = par->targinfo;
+	struct iphdr *iph;
+
+	/*
+	 * Copy the skb, and route the copy. Will later return %XT_CONTINUE for
+	 * the original skb, which should continue on its way as if nothing has
+	 * happened. The copy should be independently delivered to the TEE
+	 * --gateway.
+	 */
+	skb = pskb_copy(skb, GFP_ATOMIC);
+	if (skb == NULL)
+		return XT_CONTINUE;
+
+#ifdef WITH_CONNTRACK
+	/* Avoid counting cloned packets towards the original connection. */
+	nf_conntrack_put(skb->nfct);
+	skb->nfct     = &nf_conntrack_untracked.ct_general;
+	skb->nfctinfo = IP_CT_NEW;
+	nf_conntrack_get(skb->nfct);
+#endif
+	/*
+	 * If we are in PREROUTING/INPUT, the checksum must be recalculated
+	 * since the length could have changed as a result of defragmentation.
+	 *
+	 * We also decrease the TTL to mitigate potential TEE loops
+	 * between two hosts.
+	 *
+	 * Set %IP_DF so that the original source is notified of a potentially
+	 * decreased MTU on the clone route. IPv6 does this too.
+	 */
+	iph = ip_hdr(skb);
+	iph->frag_off |= htons(IP_DF);
+	if (par->hooknum == NF_INET_PRE_ROUTING ||
+	    par->hooknum == NF_INET_LOCAL_IN)
+		--iph->ttl;
+	ip_send_check(iph);
+
+	/*
+	 * Xtables is not reentrant currently, so a choice has to be made:
+	 * 1. return absolute verdict for the original and let the cloned
+	 *    packet travel through the chains
+	 * 2. let the original continue travelling and not pass the clone
+	 *    to Xtables.
+	 * #2 is chosen. Normally, we would use ip_local_out for the clone.
+	 * Because iph->check is already correct and we don't pass it to
+	 * Xtables anyway, a shortcut to dst_output [forwards to ip_output] can
+	 * be taken. %IPSKB_REROUTED needs to be set so that ip_output does not
+	 * invoke POSTROUTING on the cloned packet.
+	 */
+	IPCB(skb)->flags |= IPSKB_REROUTED;
+	if (tee_tg_route4(skb, info))
+		ip_output(skb);
+	else
+		kfree_skb(skb);
+
+	return XT_CONTINUE;
+}
+
+#ifdef WITH_IPV6
+static bool
+tee_tg_route6(struct sk_buff *skb, const struct xt_tee_tginfo *info)
+{
+	const struct ipv6hdr *iph = ipv6_hdr(skb);
+	struct net *net = pick_net(skb);
+	struct dst_entry *dst;
+	struct flowi fl;
+
+	memset(&fl, 0, sizeof(fl));
+	if (!tee_tg_route_oif(&fl, net, info))
+		return false;
+	fl.nl_u.ip6_u.daddr = info->gw.in6;
+	fl.nl_u.ip6_u.flowlabel = ((iph->flow_lbl[0] & 0xF) << 16) |
+				  (iph->flow_lbl[1] << 8) | iph->flow_lbl[2];
+	dst = ip6_route_output(net, NULL, &fl);
+	if (dst == NULL)
+		return false;
+
+	dst_release(skb_dst(skb));
+	skb_dst_set(skb, dst);
+	skb->dev      = dst->dev;
+	skb->protocol = htons(ETH_P_IPV6);
+	return true;
+}
+
+static unsigned int
+tee_tg6(struct sk_buff *skb, const struct xt_target_param *par)
+{
+	const struct xt_tee_tginfo *info = par->targinfo;
+
+	skb = pskb_copy(skb, GFP_ATOMIC);
+	if (skb == NULL)
+		return XT_CONTINUE;
+
+#ifdef WITH_CONNTRACK
+	nf_conntrack_put(skb->nfct);
+	skb->nfct     = &nf_conntrack_untracked.ct_general;
+	skb->nfctinfo = IP_CT_NEW;
+	nf_conntrack_get(skb->nfct);
+#endif
+	if (par->hooknum == NF_INET_PRE_ROUTING ||
+	    par->hooknum == NF_INET_LOCAL_IN) {
+		struct ipv6hdr *iph = ipv6_hdr(skb);
+		--iph->hop_limit;
+	}
+	IP6CB(skb)->flags |= IP6SKB_REROUTED;
+	if (tee_tg_route6(skb, info))
+		ip6_output(skb);
+	else
+		kfree_skb(skb);
+
+	return XT_CONTINUE;
+}
+#endif /* WITH_IPV6 */
+
+static int tee_tg_check(const struct xt_tgchk_param *par)
+{
+	const struct xt_tee_tginfo *info = par->targinfo;
+
+	if (info->oif[sizeof(info->oif)-1] != '\0')
+		return -EINVAL;
+	/* 0.0.0.0 and :: not allowed */
+	return (memcmp(&info->gw, &tee_zero_address,
+	       sizeof(tee_zero_address)) == 0) ? -EINVAL : 0;
+}
+
+static struct xt_target tee_tg_reg[] __read_mostly = {
+	{
+		.name       = "TEE",
+		.revision   = 1,
+		.family     = NFPROTO_IPV4,
+		.target     = tee_tg4,
+		.targetsize = sizeof(struct xt_tee_tginfo),
+		.checkentry = tee_tg_check,
+		.me         = THIS_MODULE,
+	},
+#ifdef WITH_IPV6
+	{
+		.name       = "TEE",
+		.revision   = 1,
+		.family     = NFPROTO_IPV6,
+		.target     = tee_tg6,
+		.targetsize = sizeof(struct xt_tee_tginfo),
+		.checkentry = tee_tg_check,
+		.me         = THIS_MODULE,
+	},
+#endif
+};
+
+static int __init tee_tg_init(void)
+{
+	return xt_register_targets(tee_tg_reg, ARRAY_SIZE(tee_tg_reg));
+}
+
+static void __exit tee_tg_exit(void)
+{
+	xt_unregister_targets(tee_tg_reg, ARRAY_SIZE(tee_tg_reg));
+}
+
+module_init(tee_tg_init);
+module_exit(tee_tg_exit);
+MODULE_AUTHOR("Sebastian Claßen <sebastian.classen@freenet.ag>");
+MODULE_AUTHOR("Jan Engelhardt <jengelh@medozas.de>");
+MODULE_DESCRIPTION("Xtables: Reroute packet copy");
+MODULE_LICENSE("GPL");
+MODULE_ALIAS("ipt_TEE");
+MODULE_ALIAS("ip6t_TEE");
-- 
cgit v1.2.2


From f3c5c1bfd430858d3a05436f82c51e53104feb6b Mon Sep 17 00:00:00 2001
From: Jan Engelhardt <jengelh@medozas.de>
Date: Mon, 19 Apr 2010 16:05:10 +0200
Subject: netfilter: xtables: make ip_tables reentrant

Currently, the table traverser stores return addresses in the ruleset
itself (struct ip6t_entry->comefrom). This has a well-known drawback:
the jumpstack is overwritten on reentry, making it necessary for
targets to return absolute verdicts. Also, the ruleset (which might
be heavy memory-wise) needs to be replicated for each CPU that can
possibly invoke ip6t_do_table.

This patch decouples the jumpstack from struct ip6t_entry and instead
puts it into xt_table_info. Not being restricted by 'comefrom'
anymore, we can set up a stack as needed. By default, there is room
allocated for two entries into the traverser.

arp_tables is not touched though, because there is just one/two
modules and further patches seek to collapse the table traverser
anyhow.

Signed-off-by: Jan Engelhardt <jengelh@medozas.de>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 net/ipv4/netfilter/arp_tables.c |  6 ++--
 net/ipv4/netfilter/ip_tables.c  | 65 ++++++++++++++++++----------------
 net/ipv6/netfilter/ip6_tables.c | 56 ++++++++++++------------------
 net/netfilter/x_tables.c        | 77 +++++++++++++++++++++++++++++++++++++++++
 4 files changed, 138 insertions(+), 66 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c
index e8e363d90365..07a699059390 100644
--- a/net/ipv4/netfilter/arp_tables.c
+++ b/net/ipv4/netfilter/arp_tables.c
@@ -649,6 +649,9 @@ static int translate_table(struct xt_table_info *newinfo, void *entry0,
 		if (ret != 0)
 			break;
 		++i;
+		if (strcmp(arpt_get_target(iter)->u.user.name,
+		    XT_ERROR_TARGET) == 0)
+			++newinfo->stacksize;
 	}
 	duprintf("translate_table: ARPT_ENTRY_ITERATE gives %d\n", ret);
 	if (ret != 0)
@@ -1774,8 +1777,7 @@ struct xt_table *arpt_register_table(struct net *net,
 {
 	int ret;
 	struct xt_table_info *newinfo;
-	struct xt_table_info bootstrap
-		= { 0, 0, 0, { 0 }, { 0 }, { } };
+	struct xt_table_info bootstrap = {0};
 	void *loc_cpu_entry;
 	struct xt_table *new_table;
 
diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c
index 18c5b1573f3e..70900ecf88e2 100644
--- a/net/ipv4/netfilter/ip_tables.c
+++ b/net/ipv4/netfilter/ip_tables.c
@@ -321,8 +321,6 @@ ipt_do_table(struct sk_buff *skb,
 	     const struct net_device *out,
 	     struct xt_table *table)
 {
-#define tb_comefrom ((struct ipt_entry *)table_base)->comefrom
-
 	static const char nulldevname[IFNAMSIZ] __attribute__((aligned(sizeof(long))));
 	const struct iphdr *ip;
 	bool hotdrop = false;
@@ -330,7 +328,8 @@ ipt_do_table(struct sk_buff *skb,
 	unsigned int verdict = NF_DROP;
 	const char *indev, *outdev;
 	const void *table_base;
-	struct ipt_entry *e, *back;
+	struct ipt_entry *e, **jumpstack;
+	unsigned int *stackptr, origptr, cpu;
 	const struct xt_table_info *private;
 	struct xt_match_param mtpar;
 	struct xt_target_param tgpar;
@@ -356,19 +355,23 @@ ipt_do_table(struct sk_buff *skb,
 	IP_NF_ASSERT(table->valid_hooks & (1 << hook));
 	xt_info_rdlock_bh();
 	private = table->private;
-	table_base = private->entries[smp_processor_id()];
+	cpu        = smp_processor_id();
+	table_base = private->entries[cpu];
+	jumpstack  = (struct ipt_entry **)private->jumpstack[cpu];
+	stackptr   = &private->stackptr[cpu];
+	origptr    = *stackptr;
 
 	e = get_entry(table_base, private->hook_entry[hook]);
 
-	/* For return from builtin chain */
-	back = get_entry(table_base, private->underflow[hook]);
+	pr_devel("Entering %s(hook %u); sp at %u (UF %p)\n",
+		 table->name, hook, origptr,
+		 get_entry(table_base, private->underflow[hook]));
 
 	do {
 		const struct ipt_entry_target *t;
 		const struct xt_entry_match *ematch;
 
 		IP_NF_ASSERT(e);
-		IP_NF_ASSERT(back);
 		if (!ip_packet_match(ip, indev, outdev,
 		    &e->ip, mtpar.fragoff)) {
  no_match:
@@ -403,17 +406,28 @@ ipt_do_table(struct sk_buff *skb,
 					verdict = (unsigned)(-v) - 1;
 					break;
 				}
-				e = back;
-				back = get_entry(table_base, back->comefrom);
+				if (*stackptr == 0) {
+					e = get_entry(table_base,
+					    private->underflow[hook]);
+					pr_devel("Underflow (this is normal) "
+						 "to %p\n", e);
+				} else {
+					e = jumpstack[--*stackptr];
+					pr_devel("Pulled %p out from pos %u\n",
+						 e, *stackptr);
+					e = ipt_next_entry(e);
+				}
 				continue;
 			}
 			if (table_base + v != ipt_next_entry(e) &&
 			    !(e->ip.flags & IPT_F_GOTO)) {
-				/* Save old back ptr in next entry */
-				struct ipt_entry *next = ipt_next_entry(e);
-				next->comefrom = (void *)back - table_base;
-				/* set back pointer to next entry */
-				back = next;
+				if (*stackptr >= private->stacksize) {
+					verdict = NF_DROP;
+					break;
+				}
+				jumpstack[(*stackptr)++] = e;
+				pr_devel("Pushed %p into pos %u\n",
+					 e, *stackptr - 1);
 			}
 
 			e = get_entry(table_base, v);
@@ -426,18 +440,7 @@ ipt_do_table(struct sk_buff *skb,
 		tgpar.targinfo = t->data;
 
 
-#ifdef CONFIG_NETFILTER_DEBUG
-		tb_comefrom = 0xeeeeeeec;
-#endif
 		verdict = t->u.kernel.target->target(skb, &tgpar);
-#ifdef CONFIG_NETFILTER_DEBUG
-		if (tb_comefrom != 0xeeeeeeec && verdict == IPT_CONTINUE) {
-			printk("Target %s reentered!\n",
-			       t->u.kernel.target->name);
-			verdict = NF_DROP;
-		}
-		tb_comefrom = 0x57acc001;
-#endif
 		/* Target might have changed stuff. */
 		ip = ip_hdr(skb);
 		if (verdict == IPT_CONTINUE)
@@ -447,7 +450,9 @@ ipt_do_table(struct sk_buff *skb,
 			break;
 	} while (!hotdrop);
 	xt_info_rdunlock_bh();
-
+	pr_devel("Exiting %s; resetting sp from %u to %u\n",
+		 __func__, *stackptr, origptr);
+	*stackptr = origptr;
 #ifdef DEBUG_ALLOW_ALL
 	return NF_ACCEPT;
 #else
@@ -455,8 +460,6 @@ ipt_do_table(struct sk_buff *skb,
 		return NF_DROP;
 	else return verdict;
 #endif
-
-#undef tb_comefrom
 }
 
 /* Figures out from what hook each rule can be called: returns 0 if
@@ -838,6 +841,9 @@ translate_table(struct net *net, struct xt_table_info *newinfo, void *entry0,
 		if (ret != 0)
 			return ret;
 		++i;
+		if (strcmp(ipt_get_target(iter)->u.user.name,
+		    XT_ERROR_TARGET) == 0)
+			++newinfo->stacksize;
 	}
 
 	if (i != repl->num_entries) {
@@ -2086,8 +2092,7 @@ struct xt_table *ipt_register_table(struct net *net,
 {
 	int ret;
 	struct xt_table_info *newinfo;
-	struct xt_table_info bootstrap
-		= { 0, 0, 0, { 0 }, { 0 }, { } };
+	struct xt_table_info bootstrap = {0};
 	void *loc_cpu_entry;
 	struct xt_table *new_table;
 
diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c
index f2b815e72329..2a2770bcd640 100644
--- a/net/ipv6/netfilter/ip6_tables.c
+++ b/net/ipv6/netfilter/ip6_tables.c
@@ -351,15 +351,14 @@ ip6t_do_table(struct sk_buff *skb,
 	      const struct net_device *out,
 	      struct xt_table *table)
 {
-#define tb_comefrom ((struct ip6t_entry *)table_base)->comefrom
-
 	static const char nulldevname[IFNAMSIZ] __attribute__((aligned(sizeof(long))));
 	bool hotdrop = false;
 	/* Initializing verdict to NF_DROP keeps gcc happy. */
 	unsigned int verdict = NF_DROP;
 	const char *indev, *outdev;
 	const void *table_base;
-	struct ip6t_entry *e, *back;
+	struct ip6t_entry *e, **jumpstack;
+	unsigned int *stackptr, origptr, cpu;
 	const struct xt_table_info *private;
 	struct xt_match_param mtpar;
 	struct xt_target_param tgpar;
@@ -383,19 +382,19 @@ ip6t_do_table(struct sk_buff *skb,
 
 	xt_info_rdlock_bh();
 	private = table->private;
-	table_base = private->entries[smp_processor_id()];
+	cpu        = smp_processor_id();
+	table_base = private->entries[cpu];
+	jumpstack  = (struct ip6t_entry **)private->jumpstack[cpu];
+	stackptr   = &private->stackptr[cpu];
+	origptr    = *stackptr;
 
 	e = get_entry(table_base, private->hook_entry[hook]);
 
-	/* For return from builtin chain */
-	back = get_entry(table_base, private->underflow[hook]);
-
 	do {
 		const struct ip6t_entry_target *t;
 		const struct xt_entry_match *ematch;
 
 		IP_NF_ASSERT(e);
-		IP_NF_ASSERT(back);
 		if (!ip6_packet_match(skb, indev, outdev, &e->ipv6,
 		    &mtpar.thoff, &mtpar.fragoff, &hotdrop)) {
  no_match:
@@ -432,17 +431,20 @@ ip6t_do_table(struct sk_buff *skb,
 					verdict = (unsigned)(-v) - 1;
 					break;
 				}
-				e = back;
-				back = get_entry(table_base, back->comefrom);
+				if (*stackptr == 0)
+					e = get_entry(table_base,
+					    private->underflow[hook]);
+				else
+					e = ip6t_next_entry(jumpstack[--*stackptr]);
 				continue;
 			}
 			if (table_base + v != ip6t_next_entry(e) &&
 			    !(e->ipv6.flags & IP6T_F_GOTO)) {
-				/* Save old back ptr in next entry */
-				struct ip6t_entry *next = ip6t_next_entry(e);
-				next->comefrom = (void *)back - table_base;
-				/* set back pointer to next entry */
-				back = next;
+				if (*stackptr >= private->stacksize) {
+					verdict = NF_DROP;
+					break;
+				}
+				jumpstack[(*stackptr)++] = e;
 			}
 
 			e = get_entry(table_base, v);
@@ -454,19 +456,7 @@ ip6t_do_table(struct sk_buff *skb,
 		tgpar.target   = t->u.kernel.target;
 		tgpar.targinfo = t->data;
 
-#ifdef CONFIG_NETFILTER_DEBUG
-		tb_comefrom = 0xeeeeeeec;
-#endif
 		verdict = t->u.kernel.target->target(skb, &tgpar);
-
-#ifdef CONFIG_NETFILTER_DEBUG
-		if (tb_comefrom != 0xeeeeeeec && verdict == IP6T_CONTINUE) {
-			printk("Target %s reentered!\n",
-			       t->u.kernel.target->name);
-			verdict = NF_DROP;
-		}
-		tb_comefrom = 0x57acc001;
-#endif
 		if (verdict == IP6T_CONTINUE)
 			e = ip6t_next_entry(e);
 		else
@@ -474,10 +464,8 @@ ip6t_do_table(struct sk_buff *skb,
 			break;
 	} while (!hotdrop);
 
-#ifdef CONFIG_NETFILTER_DEBUG
-	tb_comefrom = NETFILTER_LINK_POISON;
-#endif
 	xt_info_rdunlock_bh();
+	*stackptr = origptr;
 
 #ifdef DEBUG_ALLOW_ALL
 	return NF_ACCEPT;
@@ -486,8 +474,6 @@ ip6t_do_table(struct sk_buff *skb,
 		return NF_DROP;
 	else return verdict;
 #endif
-
-#undef tb_comefrom
 }
 
 /* Figures out from what hook each rule can be called: returns 0 if
@@ -869,6 +855,9 @@ translate_table(struct net *net, struct xt_table_info *newinfo, void *entry0,
 		if (ret != 0)
 			return ret;
 		++i;
+		if (strcmp(ip6t_get_target(iter)->u.user.name,
+		    XT_ERROR_TARGET) == 0)
+			++newinfo->stacksize;
 	}
 
 	if (i != repl->num_entries) {
@@ -2120,8 +2109,7 @@ struct xt_table *ip6t_register_table(struct net *net,
 {
 	int ret;
 	struct xt_table_info *newinfo;
-	struct xt_table_info bootstrap
-		= { 0, 0, 0, { 0 }, { 0 }, { } };
+	struct xt_table_info bootstrap = {0};
 	void *loc_cpu_entry;
 	struct xt_table *new_table;
 
diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c
index 8e23d8f68459..edde5c602890 100644
--- a/net/netfilter/x_tables.c
+++ b/net/netfilter/x_tables.c
@@ -62,6 +62,9 @@ static const char *const xt_prefix[NFPROTO_NUMPROTO] = {
 	[NFPROTO_IPV6]   = "ip6",
 };
 
+/* Allow this many total (re)entries. */
+static const unsigned int xt_jumpstack_multiplier = 2;
+
 /* Registration hooks for targets. */
 int
 xt_register_target(struct xt_target *target)
@@ -680,6 +683,26 @@ void xt_free_table_info(struct xt_table_info *info)
 		else
 			vfree(info->entries[cpu]);
 	}
+
+	if (info->jumpstack != NULL) {
+		if (sizeof(void *) * info->stacksize > PAGE_SIZE) {
+			for_each_possible_cpu(cpu)
+				vfree(info->jumpstack[cpu]);
+		} else {
+			for_each_possible_cpu(cpu)
+				kfree(info->jumpstack[cpu]);
+		}
+	}
+
+	if (sizeof(void **) * nr_cpu_ids > PAGE_SIZE)
+		vfree(info->jumpstack);
+	else
+		kfree(info->jumpstack);
+	if (sizeof(unsigned int) * nr_cpu_ids > PAGE_SIZE)
+		vfree(info->stackptr);
+	else
+		kfree(info->stackptr);
+
 	kfree(info);
 }
 EXPORT_SYMBOL(xt_free_table_info);
@@ -724,6 +747,49 @@ EXPORT_SYMBOL_GPL(xt_compat_unlock);
 DEFINE_PER_CPU(struct xt_info_lock, xt_info_locks);
 EXPORT_PER_CPU_SYMBOL_GPL(xt_info_locks);
 
+static int xt_jumpstack_alloc(struct xt_table_info *i)
+{
+	unsigned int size;
+	int cpu;
+
+	size = sizeof(unsigned int) * nr_cpu_ids;
+	if (size > PAGE_SIZE)
+		i->stackptr = vmalloc(size);
+	else
+		i->stackptr = kmalloc(size, GFP_KERNEL);
+	if (i->stackptr == NULL)
+		return -ENOMEM;
+	memset(i->stackptr, 0, size);
+
+	size = sizeof(void **) * nr_cpu_ids;
+	if (size > PAGE_SIZE)
+		i->jumpstack = vmalloc(size);
+	else
+		i->jumpstack = kmalloc(size, GFP_KERNEL);
+	if (i->jumpstack == NULL)
+		return -ENOMEM;
+	memset(i->jumpstack, 0, size);
+
+	i->stacksize *= xt_jumpstack_multiplier;
+	size = sizeof(void *) * i->stacksize;
+	for_each_possible_cpu(cpu) {
+		if (size > PAGE_SIZE)
+			i->jumpstack[cpu] = vmalloc_node(size,
+				cpu_to_node(cpu));
+		else
+			i->jumpstack[cpu] = kmalloc_node(size,
+				GFP_KERNEL, cpu_to_node(cpu));
+		if (i->jumpstack[cpu] == NULL)
+			/*
+			 * Freeing will be done later on by the callers. The
+			 * chain is: xt_replace_table -> __do_replace ->
+			 * do_replace -> xt_free_table_info.
+			 */
+			return -ENOMEM;
+	}
+
+	return 0;
+}
 
 struct xt_table_info *
 xt_replace_table(struct xt_table *table,
@@ -732,6 +798,7 @@ xt_replace_table(struct xt_table *table,
 	      int *error)
 {
 	struct xt_table_info *private;
+	int ret;
 
 	/* Do the substitution. */
 	local_bh_disable();
@@ -746,6 +813,12 @@ xt_replace_table(struct xt_table *table,
 		return NULL;
 	}
 
+	ret = xt_jumpstack_alloc(newinfo);
+	if (ret < 0) {
+		*error = ret;
+		return NULL;
+	}
+
 	table->private = newinfo;
 	newinfo->initial_entries = private->initial_entries;
 
@@ -770,6 +843,10 @@ struct xt_table *xt_register_table(struct net *net,
 	struct xt_table_info *private;
 	struct xt_table *t, *table;
 
+	ret = xt_jumpstack_alloc(newinfo);
+	if (ret < 0)
+		return ERR_PTR(ret);
+
 	/* Don't add one object to multiple lists. */
 	table = kmemdup(input_table, sizeof(struct xt_table), GFP_KERNEL);
 	if (!table) {
-- 
cgit v1.2.2


From cd58bcd9787ef4c16ab6e442c4f1bf3539b3ab39 Mon Sep 17 00:00:00 2001
From: Jan Engelhardt <jengelh@medozas.de>
Date: Mon, 19 Apr 2010 16:06:52 +0200
Subject: netfilter: xt_TEE: have cloned packet travel through Xtables too

Since Xtables is now reentrant/nestable, the cloned packet can also go
through Xtables and be subject to rules itself.

Signed-off-by: Jan Engelhardt <jengelh@medozas.de>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 net/ipv4/ip_output.c   |  1 -
 net/ipv6/ip6_output.c  |  1 -
 net/netfilter/xt_TEE.c | 40 ++++++++++++++++++----------------------
 3 files changed, 18 insertions(+), 24 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index 0abfddec1e26..f09135e1e14f 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -309,7 +309,6 @@ int ip_output(struct sk_buff *skb)
 			    ip_finish_output,
 			    !(IPCB(skb)->flags & IPSKB_REROUTED));
 }
-EXPORT_SYMBOL_GPL(ip_output);
 
 int ip_queue_xmit(struct sk_buff *skb, int ipfragok)
 {
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index d09be7ff8735..c10a38a71a5e 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -176,7 +176,6 @@ int ip6_output(struct sk_buff *skb)
 			    ip6_finish_output,
 			    !(IP6CB(skb)->flags & IP6SKB_REROUTED));
 }
-EXPORT_SYMBOL_GPL(ip6_output);
 
 /*
  *	xmit an sk_buff (used by TCP)
diff --git a/net/netfilter/xt_TEE.c b/net/netfilter/xt_TEE.c
index b3d730163f12..842e7012eca7 100644
--- a/net/netfilter/xt_TEE.c
+++ b/net/netfilter/xt_TEE.c
@@ -12,6 +12,7 @@
  */
 #include <linux/ip.h>
 #include <linux/module.h>
+#include <linux/percpu.h>
 #include <linux/route.h>
 #include <linux/skbuff.h>
 #include <net/checksum.h>
@@ -32,6 +33,7 @@
 #endif
 
 static const union nf_inet_addr tee_zero_address;
+static DEFINE_PER_CPU(bool, tee_active);
 
 static struct net *pick_net(struct sk_buff *skb)
 {
@@ -91,6 +93,8 @@ tee_tg4(struct sk_buff *skb, const struct xt_target_param *par)
 	const struct xt_tee_tginfo *info = par->targinfo;
 	struct iphdr *iph;
 
+	if (percpu_read(tee_active))
+		return XT_CONTINUE;
 	/*
 	 * Copy the skb, and route the copy. Will later return %XT_CONTINUE for
 	 * the original skb, which should continue on its way as if nothing has
@@ -125,24 +129,13 @@ tee_tg4(struct sk_buff *skb, const struct xt_target_param *par)
 		--iph->ttl;
 	ip_send_check(iph);
 
-	/*
-	 * Xtables is not reentrant currently, so a choice has to be made:
-	 * 1. return absolute verdict for the original and let the cloned
-	 *    packet travel through the chains
-	 * 2. let the original continue travelling and not pass the clone
-	 *    to Xtables.
-	 * #2 is chosen. Normally, we would use ip_local_out for the clone.
-	 * Because iph->check is already correct and we don't pass it to
-	 * Xtables anyway, a shortcut to dst_output [forwards to ip_output] can
-	 * be taken. %IPSKB_REROUTED needs to be set so that ip_output does not
-	 * invoke POSTROUTING on the cloned packet.
-	 */
-	IPCB(skb)->flags |= IPSKB_REROUTED;
-	if (tee_tg_route4(skb, info))
-		ip_output(skb);
-	else
+	if (tee_tg_route4(skb, info)) {
+		percpu_write(tee_active, true);
+		ip_local_out(skb);
+		percpu_write(tee_active, false);
+	} else {
 		kfree_skb(skb);
-
+	}
 	return XT_CONTINUE;
 }
 
@@ -177,6 +170,8 @@ tee_tg6(struct sk_buff *skb, const struct xt_target_param *par)
 {
 	const struct xt_tee_tginfo *info = par->targinfo;
 
+	if (percpu_read(tee_active))
+		return XT_CONTINUE;
 	skb = pskb_copy(skb, GFP_ATOMIC);
 	if (skb == NULL)
 		return XT_CONTINUE;
@@ -192,12 +187,13 @@ tee_tg6(struct sk_buff *skb, const struct xt_target_param *par)
 		struct ipv6hdr *iph = ipv6_hdr(skb);
 		--iph->hop_limit;
 	}
-	IP6CB(skb)->flags |= IP6SKB_REROUTED;
-	if (tee_tg_route6(skb, info))
-		ip6_output(skb);
-	else
+	if (tee_tg_route6(skb, info)) {
+		percpu_write(tee_active, true);
+		ip6_local_out(skb);
+		percpu_write(tee_active, false);
+	} else {
 		kfree_skb(skb);
-
+	}
 	return XT_CONTINUE;
 }
 #endif /* WITH_IPV6 */
-- 
cgit v1.2.2


From 5b775eb1c04c2ef33f5e17035e368214214ef9c2 Mon Sep 17 00:00:00 2001
From: Jan Engelhardt <jengelh@medozas.de>
Date: Mon, 19 Apr 2010 16:07:47 +0200
Subject: netfilter: xtables: remove old comments about reentrancy

Signed-off-by: Jan Engelhardt <jengelh@medozas.de>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 net/ipv4/netfilter/ip_tables.c   | 2 --
 net/ipv4/netfilter/ipt_REJECT.c  | 3 ---
 net/ipv6/netfilter/ip6_tables.c  | 2 --
 net/ipv6/netfilter/ip6t_REJECT.c | 3 ---
 4 files changed, 10 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c
index 70900ecf88e2..bb5e0d9b8137 100644
--- a/net/ipv4/netfilter/ip_tables.c
+++ b/net/ipv4/netfilter/ip_tables.c
@@ -434,8 +434,6 @@ ipt_do_table(struct sk_buff *skb,
 			continue;
 		}
 
-		/* Targets which reenter must return
-		   abs. verdicts */
 		tgpar.target   = t->u.kernel.target;
 		tgpar.targinfo = t->data;
 
diff --git a/net/ipv4/netfilter/ipt_REJECT.c b/net/ipv4/netfilter/ipt_REJECT.c
index b026014e7a5b..038fa0bb8f6b 100644
--- a/net/ipv4/netfilter/ipt_REJECT.c
+++ b/net/ipv4/netfilter/ipt_REJECT.c
@@ -139,9 +139,6 @@ reject_tg(struct sk_buff *skb, const struct xt_target_param *par)
 {
 	const struct ipt_reject_info *reject = par->targinfo;
 
-	/* WARNING: This code causes reentry within iptables.
-	   This means that the iptables jump stack is now crap.  We
-	   must return an absolute verdict. --RR */
 	switch (reject->with) {
 	case IPT_ICMP_NET_UNREACHABLE:
 		send_unreach(skb, ICMP_NET_UNREACH);
diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c
index 2a2770bcd640..7afa11773164 100644
--- a/net/ipv6/netfilter/ip6_tables.c
+++ b/net/ipv6/netfilter/ip6_tables.c
@@ -451,8 +451,6 @@ ip6t_do_table(struct sk_buff *skb,
 			continue;
 		}
 
-		/* Targets which reenter must return
-		   abs. verdicts */
 		tgpar.target   = t->u.kernel.target;
 		tgpar.targinfo = t->data;
 
diff --git a/net/ipv6/netfilter/ip6t_REJECT.c b/net/ipv6/netfilter/ip6t_REJECT.c
index 55b9b2da1340..dad97622ed72 100644
--- a/net/ipv6/netfilter/ip6t_REJECT.c
+++ b/net/ipv6/netfilter/ip6t_REJECT.c
@@ -179,9 +179,6 @@ reject_tg6(struct sk_buff *skb, const struct xt_target_param *par)
 	struct net *net = dev_net((par->in != NULL) ? par->in : par->out);
 
 	pr_debug("%s: medium point\n", __func__);
-	/* WARNING: This code causes reentry within ip6tables.
-	   This means that the ip6tables jump stack is now crap.  We
-	   must return an absolute verdict. --RR */
 	switch (reject->with) {
 	case IP6T_ICMP6_NO_ROUTE:
 		send_unreach(net, skb, ICMPV6_NOROUTE, par->hooknum);
-- 
cgit v1.2.2


From 88751275b8e867d756e4f86ae92afe0232de129f Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Mon, 19 Apr 2010 05:07:33 +0000
Subject: rps: shortcut net_rps_action()

net_rps_action() is a bit expensive on NR_CPUS=64..4096 kernels, even if
RPS is not active.

Tom Herbert used two bitmasks to hold information needed to send IPI,
but a single LIFO list seems more appropriate.

Move all RPS logic into net_rps_action() to cleanup net_rx_action() code
(remove two ifdefs)

Move rps_remote_softirq_cpus into softnet_data to share its first cache
line, filling an existing hole.

In a future patch, we could call net_rps_action() from process_backlog()
to make sure we send IPI before handling this cpu backlog.

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/dev.c | 79 ++++++++++++++++++++++++----------------------------------
 1 file changed, 32 insertions(+), 47 deletions(-)

(limited to 'net')

diff --git a/net/core/dev.c b/net/core/dev.c
index 8eb50e2292fb..05a2b294906b 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -2345,21 +2345,6 @@ done:
 	return cpu;
 }
 
-/*
- * This structure holds the per-CPU mask of CPUs for which IPIs are scheduled
- * to be sent to kick remote softirq processing.  There are two masks since
- * the sending of IPIs must be done with interrupts enabled.  The select field
- * indicates the current mask that enqueue_backlog uses to schedule IPIs.
- * select is flipped before net_rps_action is called while still under lock,
- * net_rps_action then uses the non-selected mask to send the IPIs and clears
- * it without conflicting with enqueue_backlog operation.
- */
-struct rps_remote_softirq_cpus {
-	cpumask_t mask[2];
-	int select;
-};
-static DEFINE_PER_CPU(struct rps_remote_softirq_cpus, rps_remote_softirq_cpus);
-
 /* Called from hardirq (IPI) context */
 static void trigger_softirq(void *data)
 {
@@ -2402,10 +2387,12 @@ enqueue:
 		if (napi_schedule_prep(&queue->backlog)) {
 #ifdef CONFIG_RPS
 			if (cpu != smp_processor_id()) {
-				struct rps_remote_softirq_cpus *rcpus =
-				    &__get_cpu_var(rps_remote_softirq_cpus);
+				struct softnet_data *myqueue;
+
+				myqueue = &__get_cpu_var(softnet_data);
+				queue->rps_ipi_next = myqueue->rps_ipi_list;
+				myqueue->rps_ipi_list = queue;
 
-				cpu_set(cpu, rcpus->mask[rcpus->select]);
 				__raise_softirq_irqoff(NET_RX_SOFTIRQ);
 				goto enqueue;
 			}
@@ -2910,7 +2897,9 @@ int netif_receive_skb(struct sk_buff *skb)
 }
 EXPORT_SYMBOL(netif_receive_skb);
 
-/* Network device is going away, flush any packets still pending  */
+/* Network device is going away, flush any packets still pending
+ * Called with irqs disabled.
+ */
 static void flush_backlog(void *arg)
 {
 	struct net_device *dev = arg;
@@ -3338,24 +3327,33 @@ void netif_napi_del(struct napi_struct *napi)
 }
 EXPORT_SYMBOL(netif_napi_del);
 
-#ifdef CONFIG_RPS
 /*
- * net_rps_action sends any pending IPI's for rps.  This is only called from
- * softirq and interrupts must be enabled.
+ * net_rps_action sends any pending IPI's for rps.
+ * Note: called with local irq disabled, but exits with local irq enabled.
  */
-static void net_rps_action(cpumask_t *mask)
+static void net_rps_action(void)
 {
-	int cpu;
+#ifdef CONFIG_RPS
+	struct softnet_data *locqueue = &__get_cpu_var(softnet_data);
+	struct softnet_data *remqueue = locqueue->rps_ipi_list;
 
-	/* Send pending IPI's to kick RPS processing on remote cpus. */
-	for_each_cpu_mask_nr(cpu, *mask) {
-		struct softnet_data *queue = &per_cpu(softnet_data, cpu);
-		if (cpu_online(cpu))
-			__smp_call_function_single(cpu, &queue->csd, 0);
-	}
-	cpus_clear(*mask);
-}
+	if (remqueue) {
+		locqueue->rps_ipi_list = NULL;
+
+		local_irq_enable();
+
+		/* Send pending IPI's to kick RPS processing on remote cpus. */
+		while (remqueue) {
+			struct softnet_data *next = remqueue->rps_ipi_next;
+			if (cpu_online(remqueue->cpu))
+				__smp_call_function_single(remqueue->cpu,
+							   &remqueue->csd, 0);
+			remqueue = next;
+		}
+	} else
 #endif
+		local_irq_enable();
+}
 
 static void net_rx_action(struct softirq_action *h)
 {
@@ -3363,10 +3361,6 @@ static void net_rx_action(struct softirq_action *h)
 	unsigned long time_limit = jiffies + 2;
 	int budget = netdev_budget;
 	void *have;
-#ifdef CONFIG_RPS
-	int select;
-	struct rps_remote_softirq_cpus *rcpus;
-#endif
 
 	local_irq_disable();
 
@@ -3429,17 +3423,7 @@ static void net_rx_action(struct softirq_action *h)
 		netpoll_poll_unlock(have);
 	}
 out:
-#ifdef CONFIG_RPS
-	rcpus = &__get_cpu_var(rps_remote_softirq_cpus);
-	select = rcpus->select;
-	rcpus->select ^= 1;
-
-	local_irq_enable();
-
-	net_rps_action(&rcpus->mask[select]);
-#else
-	local_irq_enable();
-#endif
+	net_rps_action();
 
 #ifdef CONFIG_NET_DMA
 	/*
@@ -5839,6 +5823,7 @@ static int __init net_dev_init(void)
 		queue->csd.func = trigger_softirq;
 		queue->csd.info = queue;
 		queue->csd.flags = 0;
+		queue->cpu = i;
 #endif
 
 		queue->backlog.poll = process_backlog;
-- 
cgit v1.2.2


From b4bb5c3fd9333024044362df67e23e96158489ed Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes@sipsolutions.net>
Date: Mon, 19 Apr 2010 10:48:38 +0200
Subject: mac80211: remove bogus TX agg state assignment

When the addba timer expires but has no work to do,
it should not affect the state machine. If it does,
TX will not see the successfully established and we
can also crash trying to re-establish the session.

Cc: stable@kernel.org [2.6.32, 2.6.33]
Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/agg-tx.c | 1 -
 1 file changed, 1 deletion(-)

(limited to 'net')

diff --git a/net/mac80211/agg-tx.c b/net/mac80211/agg-tx.c
index 5538e1b4a697..944a8a92207b 100644
--- a/net/mac80211/agg-tx.c
+++ b/net/mac80211/agg-tx.c
@@ -183,7 +183,6 @@ static void sta_addba_resp_timer_expired(unsigned long data)
 		       HT_AGG_STATE_REQ_STOP_BA_MSK)) !=
 						HT_ADDBA_REQUESTED_MSK) {
 		spin_unlock_bh(&sta->lock);
-		*state = HT_AGG_STATE_IDLE;
 #ifdef CONFIG_MAC80211_HT_DEBUG
 		printk(KERN_DEBUG "timer expired on tid %d but we are not "
 				"(or no longer) expecting addBA response there",
-- 
cgit v1.2.2


From fe6f212ce12341df18ef9b890bea739b4547157b Mon Sep 17 00:00:00 2001
From: Reinette Chatre <reinette.chatre@intel.com>
Date: Mon, 19 Apr 2010 10:46:31 -0700
Subject: mac80211: pass HT changes to driver when off channel

Since "mac80211: make off-channel work generic" drivers have not been
notified of configuration changes after association or authentication. This
caused more dependence on current state to ensure driver will be notified
when configuration changes occur. One such problem arises if off-channel is
in progress when HT information changes. Since HT is only enabled on the
"oper_channel" the driver will never be notified of this change. Usually
the driver is notified soon after of a BSS information change
(BSS_CHANGED_HT) ... but since the driver did not get a notification that
this is a HT channel the new BSS information does not make sense.

Fix this by also changing the off-channel information when HT is enabled
and thus cause driver to be notified correctly.

This fixes a problem in 4965 when associated with 5GHz 40MHz channel.
Without this patch the system can associate but is unable to transfer any
data, not even ping.

See http://bugzilla.intellinuxwireless.org/show_bug.cgi?id=2158

Signed-off-by: Reinette Chatre <reinette.chatre@intel.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/mlme.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'net')

diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index be5f723d643a..8a9650343f26 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -167,6 +167,8 @@ static u32 ieee80211_enable_ht(struct ieee80211_sub_if_data *sdata,
 	ht_changed = conf_is_ht(&local->hw.conf) != enable_ht ||
 		     channel_type != local->hw.conf.channel_type;
 
+	if (local->tmp_channel)
+		local->tmp_channel_type = channel_type;
 	local->oper_channel_type = channel_type;
 
 	if (ht_changed) {
-- 
cgit v1.2.2


From 2aab4c273ad837fbcf2955aee32b9ec4706c2521 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes@sipsolutions.net>
Date: Mon, 19 Apr 2010 11:00:24 +0200
Subject: mac80211: fix stopping RX BA session from timer

Kalle reported that his system deadlocks since my
recent work in this area. The reason quickly became
apparent: we try to cancel_timer_sync() a timer
from within itself. Fix that by making the function
aware of the context it is called from.

Reported-by: Kalle Valo <kvalo@adurom.com>
Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
Tested-by: Kalle Valo <kvalo@adurom.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/agg-rx.c | 18 +++++++++++++-----
 1 file changed, 13 insertions(+), 5 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/agg-rx.c b/net/mac80211/agg-rx.c
index 53233ab50f65..1771dd9bd137 100644
--- a/net/mac80211/agg-rx.c
+++ b/net/mac80211/agg-rx.c
@@ -18,8 +18,9 @@
 #include "ieee80211_i.h"
 #include "driver-ops.h"
 
-void __ieee80211_stop_rx_ba_session(struct sta_info *sta, u16 tid,
-				    u16 initiator, u16 reason)
+static void ___ieee80211_stop_rx_ba_session(struct sta_info *sta, u16 tid,
+					    u16 initiator, u16 reason,
+					    bool from_timer)
 {
 	struct ieee80211_local *local = sta->local;
 	struct tid_ampdu_rx *tid_rx;
@@ -69,10 +70,17 @@ void __ieee80211_stop_rx_ba_session(struct sta_info *sta, u16 tid,
 
 	spin_unlock_bh(&sta->lock);
 
-	del_timer_sync(&tid_rx->session_timer);
+	if (!from_timer)
+		del_timer_sync(&tid_rx->session_timer);
 	kfree(tid_rx);
 }
 
+void __ieee80211_stop_rx_ba_session(struct sta_info *sta, u16 tid,
+				    u16 initiator, u16 reason)
+{
+	___ieee80211_stop_rx_ba_session(sta, tid, initiator, reason, false);
+}
+
 /*
  * After accepting the AddBA Request we activated a timer,
  * resetting it after each frame that arrives from the originator.
@@ -91,8 +99,8 @@ static void sta_rx_agg_session_timer_expired(unsigned long data)
 #ifdef CONFIG_MAC80211_HT_DEBUG
 	printk(KERN_DEBUG "rx session timer expired on tid %d\n", (u16)*ptid);
 #endif
-	__ieee80211_stop_rx_ba_session(sta, *ptid, WLAN_BACK_RECIPIENT,
-				       WLAN_REASON_QSTA_TIMEOUT);
+	___ieee80211_stop_rx_ba_session(sta, *ptid, WLAN_BACK_RECIPIENT,
+					WLAN_REASON_QSTA_TIMEOUT, true);
 }
 
 static void ieee80211_send_addba_resp(struct ieee80211_sub_if_data *sdata, u8 *da, u16 tid,
-- 
cgit v1.2.2


From 3393a608c4979a94d1887efc05b792849d361a65 Mon Sep 17 00:00:00 2001
From: Juuso Oikarinen <juuso.oikarinen@nokia.com>
Date: Mon, 19 Apr 2010 10:12:52 +0300
Subject: mac80211: Prevent running sta_cleanup timer unnecessarily
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The sta_cleanup timer is used to periodically expire buffered frames from the
tx buf. The timer is executing periodically, regardless of the need for it.
This is wasting resources.

Fix this simply by not restarting the sta_cleanup timer if the tx buffer was
empty. Restart the timer when there is some more tx-traffic.

Cc: Janne Ylälehto <janne.ylalehto@nokia.com>
Signed-off-by: Juuso Oikarinen <juuso.oikarinen@nokia.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/sta_info.c | 13 ++++++++++---
 net/mac80211/tx.c       |  7 +++++++
 2 files changed, 17 insertions(+), 3 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c
index ff0eb948917b..3de7a2260d65 100644
--- a/net/mac80211/sta_info.c
+++ b/net/mac80211/sta_info.c
@@ -575,7 +575,7 @@ static int sta_info_buffer_expired(struct sta_info *sta,
 }
 
 
-static void sta_info_cleanup_expire_buffered(struct ieee80211_local *local,
+static bool sta_info_cleanup_expire_buffered(struct ieee80211_local *local,
 					     struct sta_info *sta)
 {
 	unsigned long flags;
@@ -583,7 +583,7 @@ static void sta_info_cleanup_expire_buffered(struct ieee80211_local *local,
 	struct ieee80211_sub_if_data *sdata;
 
 	if (skb_queue_empty(&sta->ps_tx_buf))
-		return;
+		return false;
 
 	for (;;) {
 		spin_lock_irqsave(&sta->ps_tx_buf.lock, flags);
@@ -608,6 +608,8 @@ static void sta_info_cleanup_expire_buffered(struct ieee80211_local *local,
 		if (skb_queue_empty(&sta->ps_tx_buf))
 			sta_info_clear_tim_bit(sta);
 	}
+
+	return true;
 }
 
 static int __must_check __sta_info_destroy(struct sta_info *sta)
@@ -755,15 +757,20 @@ static void sta_info_cleanup(unsigned long data)
 {
 	struct ieee80211_local *local = (struct ieee80211_local *) data;
 	struct sta_info *sta;
+	bool timer_needed = false;
 
 	rcu_read_lock();
 	list_for_each_entry_rcu(sta, &local->sta_list, list)
-		sta_info_cleanup_expire_buffered(local, sta);
+		if (sta_info_cleanup_expire_buffered(local, sta))
+			timer_needed = true;
 	rcu_read_unlock();
 
 	if (local->quiescing)
 		return;
 
+	if (!timer_needed)
+		return;
+
 	local->sta_cleanup.expires =
 		round_jiffies(jiffies + STA_INFO_CLEANUP_INTERVAL);
 	add_timer(&local->sta_cleanup);
diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
index 2cb77267f733..e2aa972d584f 100644
--- a/net/mac80211/tx.c
+++ b/net/mac80211/tx.c
@@ -429,6 +429,7 @@ ieee80211_tx_h_unicast_ps_buf(struct ieee80211_tx_data *tx)
 	struct sta_info *sta = tx->sta;
 	struct ieee80211_tx_info *info = IEEE80211_SKB_CB(tx->skb);
 	struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)tx->skb->data;
+	struct ieee80211_local *local = tx->local;
 	u32 staflags;
 
 	if (unlikely(!sta ||
@@ -476,6 +477,12 @@ ieee80211_tx_h_unicast_ps_buf(struct ieee80211_tx_data *tx)
 		info->control.vif = &tx->sdata->vif;
 		info->flags |= IEEE80211_TX_INTFL_NEED_TXPROCESSING;
 		skb_queue_tail(&sta->ps_tx_buf, tx->skb);
+
+		if (!timer_pending(&local->sta_cleanup))
+			mod_timer(&local->sta_cleanup,
+				  round_jiffies(jiffies +
+						STA_INFO_CLEANUP_INTERVAL));
+
 		return TX_QUEUED;
 	}
 #ifdef CONFIG_MAC80211_VERBOSE_PS_DEBUG
-- 
cgit v1.2.2


From 67e0f392779e35a96c43bc240ef5d30a701d153e Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes@sipsolutions.net>
Date: Mon, 19 Apr 2010 11:03:13 +0200
Subject: mac80211: add missing newline

One HT debugging printk is missing a newline,
add it.

Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/agg-tx.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/mac80211/agg-tx.c b/net/mac80211/agg-tx.c
index 6bb4d0a1e5c5..7dfe833d9716 100644
--- a/net/mac80211/agg-tx.c
+++ b/net/mac80211/agg-tx.c
@@ -186,7 +186,7 @@ static void sta_addba_resp_timer_expired(unsigned long data)
 		*state = HT_AGG_STATE_IDLE;
 #ifdef CONFIG_MAC80211_HT_DEBUG
 		printk(KERN_DEBUG "timer expired on tid %d but we are not "
-				"(or no longer) expecting addBA response there",
+				"(or no longer) expecting addBA response there\n",
 			tid);
 #endif
 		return;
-- 
cgit v1.2.2


From f5acb907dc24c3822f408211bad1cd6e5d0433cf Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Mon, 19 Apr 2010 14:40:57 -0700
Subject: rps: static functions

store_rps_map() & store_rps_dev_flow_table_cnt() are static.

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/net-sysfs.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c
index 143052a22b9b..c57c4b228bb5 100644
--- a/net/core/net-sysfs.c
+++ b/net/core/net-sysfs.c
@@ -550,7 +550,7 @@ static void rps_map_release(struct rcu_head *rcu)
 	kfree(map);
 }
 
-ssize_t store_rps_map(struct netdev_rx_queue *queue,
+static ssize_t store_rps_map(struct netdev_rx_queue *queue,
 		      struct rx_queue_attribute *attribute,
 		      const char *buf, size_t len)
 {
@@ -635,7 +635,7 @@ static void rps_dev_flow_table_release(struct rcu_head *rcu)
 	schedule_work(&table->free_work);
 }
 
-ssize_t store_rps_dev_flow_table_cnt(struct netdev_rx_queue *queue,
+static ssize_t store_rps_dev_flow_table_cnt(struct netdev_rx_queue *queue,
 				     struct rx_queue_attribute *attr,
 				     const char *buf, size_t len)
 {
-- 
cgit v1.2.2


From e36fa2f7e92f25aab2e3d787dcfe3590817f19d3 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Mon, 19 Apr 2010 21:17:14 +0000
Subject: rps: cleanups

struct softnet_data holds many queues, so consistent use "sd" name
instead of "queue" is better.

Adds a rps_ipi_queued() helper to cleanup enqueue_to_backlog()

Adds a _and_irq_disable suffix to net_rps_action() name, as David
suggested.

incr_input_queue_head() becomes input_queue_head_incr()

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/dev.c | 149 +++++++++++++++++++++++++++++++--------------------------
 1 file changed, 80 insertions(+), 69 deletions(-)

(limited to 'net')

diff --git a/net/core/dev.c b/net/core/dev.c
index 05a2b294906b..7f5755b0a57c 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -208,17 +208,17 @@ static inline struct hlist_head *dev_index_hash(struct net *net, int ifindex)
 	return &net->dev_index_head[ifindex & (NETDEV_HASHENTRIES - 1)];
 }
 
-static inline void rps_lock(struct softnet_data *queue)
+static inline void rps_lock(struct softnet_data *sd)
 {
 #ifdef CONFIG_RPS
-	spin_lock(&queue->input_pkt_queue.lock);
+	spin_lock(&sd->input_pkt_queue.lock);
 #endif
 }
 
-static inline void rps_unlock(struct softnet_data *queue)
+static inline void rps_unlock(struct softnet_data *sd)
 {
 #ifdef CONFIG_RPS
-	spin_unlock(&queue->input_pkt_queue.lock);
+	spin_unlock(&sd->input_pkt_queue.lock);
 #endif
 }
 
@@ -2346,14 +2346,37 @@ done:
 }
 
 /* Called from hardirq (IPI) context */
-static void trigger_softirq(void *data)
+static void rps_trigger_softirq(void *data)
 {
-	struct softnet_data *queue = data;
-	__napi_schedule(&queue->backlog);
+	struct softnet_data *sd = data;
+
+	__napi_schedule(&sd->backlog);
 	__get_cpu_var(netdev_rx_stat).received_rps++;
 }
+
 #endif /* CONFIG_RPS */
 
+/*
+ * Check if this softnet_data structure is another cpu one
+ * If yes, queue it to our IPI list and return 1
+ * If no, return 0
+ */
+static int rps_ipi_queued(struct softnet_data *sd)
+{
+#ifdef CONFIG_RPS
+	struct softnet_data *mysd = &__get_cpu_var(softnet_data);
+
+	if (sd != mysd) {
+		sd->rps_ipi_next = mysd->rps_ipi_list;
+		mysd->rps_ipi_list = sd;
+
+		__raise_softirq_irqoff(NET_RX_SOFTIRQ);
+		return 1;
+	}
+#endif /* CONFIG_RPS */
+	return 0;
+}
+
 /*
  * enqueue_to_backlog is called to queue an skb to a per CPU backlog
  * queue (may be a remote CPU queue).
@@ -2361,48 +2384,36 @@ static void trigger_softirq(void *data)
 static int enqueue_to_backlog(struct sk_buff *skb, int cpu,
 			      unsigned int *qtail)
 {
-	struct softnet_data *queue;
+	struct softnet_data *sd;
 	unsigned long flags;
 
-	queue = &per_cpu(softnet_data, cpu);
+	sd = &per_cpu(softnet_data, cpu);
 
 	local_irq_save(flags);
 	__get_cpu_var(netdev_rx_stat).total++;
 
-	rps_lock(queue);
-	if (queue->input_pkt_queue.qlen <= netdev_max_backlog) {
-		if (queue->input_pkt_queue.qlen) {
+	rps_lock(sd);
+	if (sd->input_pkt_queue.qlen <= netdev_max_backlog) {
+		if (sd->input_pkt_queue.qlen) {
 enqueue:
-			__skb_queue_tail(&queue->input_pkt_queue, skb);
+			__skb_queue_tail(&sd->input_pkt_queue, skb);
 #ifdef CONFIG_RPS
-			*qtail = queue->input_queue_head +
-			    queue->input_pkt_queue.qlen;
+			*qtail = sd->input_queue_head + sd->input_pkt_queue.qlen;
 #endif
-			rps_unlock(queue);
+			rps_unlock(sd);
 			local_irq_restore(flags);
 			return NET_RX_SUCCESS;
 		}
 
 		/* Schedule NAPI for backlog device */
-		if (napi_schedule_prep(&queue->backlog)) {
-#ifdef CONFIG_RPS
-			if (cpu != smp_processor_id()) {
-				struct softnet_data *myqueue;
-
-				myqueue = &__get_cpu_var(softnet_data);
-				queue->rps_ipi_next = myqueue->rps_ipi_list;
-				myqueue->rps_ipi_list = queue;
-
-				__raise_softirq_irqoff(NET_RX_SOFTIRQ);
-				goto enqueue;
-			}
-#endif
-			__napi_schedule(&queue->backlog);
+		if (napi_schedule_prep(&sd->backlog)) {
+			if (!rps_ipi_queued(sd))
+				__napi_schedule(&sd->backlog);
 		}
 		goto enqueue;
 	}
 
-	rps_unlock(queue);
+	rps_unlock(sd);
 
 	__get_cpu_var(netdev_rx_stat).dropped++;
 	local_irq_restore(flags);
@@ -2903,17 +2914,17 @@ EXPORT_SYMBOL(netif_receive_skb);
 static void flush_backlog(void *arg)
 {
 	struct net_device *dev = arg;
-	struct softnet_data *queue = &__get_cpu_var(softnet_data);
+	struct softnet_data *sd = &__get_cpu_var(softnet_data);
 	struct sk_buff *skb, *tmp;
 
-	rps_lock(queue);
-	skb_queue_walk_safe(&queue->input_pkt_queue, skb, tmp)
+	rps_lock(sd);
+	skb_queue_walk_safe(&sd->input_pkt_queue, skb, tmp)
 		if (skb->dev == dev) {
-			__skb_unlink(skb, &queue->input_pkt_queue);
+			__skb_unlink(skb, &sd->input_pkt_queue);
 			kfree_skb(skb);
-			incr_input_queue_head(queue);
+			input_queue_head_incr(sd);
 		}
-	rps_unlock(queue);
+	rps_unlock(sd);
 }
 
 static int napi_gro_complete(struct sk_buff *skb)
@@ -3219,23 +3230,23 @@ EXPORT_SYMBOL(napi_gro_frags);
 static int process_backlog(struct napi_struct *napi, int quota)
 {
 	int work = 0;
-	struct softnet_data *queue = &__get_cpu_var(softnet_data);
+	struct softnet_data *sd = &__get_cpu_var(softnet_data);
 
 	napi->weight = weight_p;
 	do {
 		struct sk_buff *skb;
 
 		local_irq_disable();
-		rps_lock(queue);
-		skb = __skb_dequeue(&queue->input_pkt_queue);
+		rps_lock(sd);
+		skb = __skb_dequeue(&sd->input_pkt_queue);
 		if (!skb) {
 			__napi_complete(napi);
-			rps_unlock(queue);
+			rps_unlock(sd);
 			local_irq_enable();
 			break;
 		}
-		incr_input_queue_head(queue);
-		rps_unlock(queue);
+		input_queue_head_incr(sd);
+		rps_unlock(sd);
 		local_irq_enable();
 
 		__netif_receive_skb(skb);
@@ -3331,24 +3342,25 @@ EXPORT_SYMBOL(netif_napi_del);
  * net_rps_action sends any pending IPI's for rps.
  * Note: called with local irq disabled, but exits with local irq enabled.
  */
-static void net_rps_action(void)
+static void net_rps_action_and_irq_disable(void)
 {
 #ifdef CONFIG_RPS
-	struct softnet_data *locqueue = &__get_cpu_var(softnet_data);
-	struct softnet_data *remqueue = locqueue->rps_ipi_list;
+	struct softnet_data *sd = &__get_cpu_var(softnet_data);
+	struct softnet_data *remsd = sd->rps_ipi_list;
 
-	if (remqueue) {
-		locqueue->rps_ipi_list = NULL;
+	if (remsd) {
+		sd->rps_ipi_list = NULL;
 
 		local_irq_enable();
 
 		/* Send pending IPI's to kick RPS processing on remote cpus. */
-		while (remqueue) {
-			struct softnet_data *next = remqueue->rps_ipi_next;
-			if (cpu_online(remqueue->cpu))
-				__smp_call_function_single(remqueue->cpu,
-							   &remqueue->csd, 0);
-			remqueue = next;
+		while (remsd) {
+			struct softnet_data *next = remsd->rps_ipi_next;
+
+			if (cpu_online(remsd->cpu))
+				__smp_call_function_single(remsd->cpu,
+							   &remsd->csd, 0);
+			remsd = next;
 		}
 	} else
 #endif
@@ -3423,7 +3435,7 @@ static void net_rx_action(struct softirq_action *h)
 		netpoll_poll_unlock(have);
 	}
 out:
-	net_rps_action();
+	net_rps_action_and_irq_disable();
 
 #ifdef CONFIG_NET_DMA
 	/*
@@ -5595,7 +5607,7 @@ static int dev_cpu_callback(struct notifier_block *nfb,
 	/* Process offline CPU's input_pkt_queue */
 	while ((skb = __skb_dequeue(&oldsd->input_pkt_queue))) {
 		netif_rx(skb);
-		incr_input_queue_head(oldsd);
+		input_queue_head_incr(oldsd);
 	}
 
 	return NOTIFY_OK;
@@ -5812,24 +5824,23 @@ static int __init net_dev_init(void)
 	 */
 
 	for_each_possible_cpu(i) {
-		struct softnet_data *queue;
+		struct softnet_data *sd = &per_cpu(softnet_data, i);
 
-		queue = &per_cpu(softnet_data, i);
-		skb_queue_head_init(&queue->input_pkt_queue);
-		queue->completion_queue = NULL;
-		INIT_LIST_HEAD(&queue->poll_list);
+		skb_queue_head_init(&sd->input_pkt_queue);
+		sd->completion_queue = NULL;
+		INIT_LIST_HEAD(&sd->poll_list);
 
 #ifdef CONFIG_RPS
-		queue->csd.func = trigger_softirq;
-		queue->csd.info = queue;
-		queue->csd.flags = 0;
-		queue->cpu = i;
+		sd->csd.func = rps_trigger_softirq;
+		sd->csd.info = sd;
+		sd->csd.flags = 0;
+		sd->cpu = i;
 #endif
 
-		queue->backlog.poll = process_backlog;
-		queue->backlog.weight = weight_p;
-		queue->backlog.gro_list = NULL;
-		queue->backlog.gro_count = 0;
+		sd->backlog.poll = process_backlog;
+		sd->backlog.weight = weight_p;
+		sd->backlog.gro_list = NULL;
+		sd->backlog.gro_count = 0;
 	}
 
 	dev_boot_phase = 0;
-- 
cgit v1.2.2


From b249dcb82d327e419d3cb45773b146ebb5faf419 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Mon, 19 Apr 2010 21:56:38 +0000
Subject: rps: consistent rxhash

In case we compute a software skb->rxhash, we can generate a consistent
hash : Its value will be the same in both flow directions.

This helps some workloads, like conntracking, since the same state needs
to be accessed in both directions.

tbench + RFS + this patch gives better results than tbench with default
kernel configuration (no RPS, no RFS)

Also fixed some sparse warnings.

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/dev.c | 25 ++++++++++++++++++-------
 1 file changed, 18 insertions(+), 7 deletions(-)

(limited to 'net')

diff --git a/net/core/dev.c b/net/core/dev.c
index 7f5755b0a57c..0d78e0454a6d 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -1974,7 +1974,7 @@ u16 skb_tx_hash(const struct net_device *dev, const struct sk_buff *skb)
 	if (skb->sk && skb->sk->sk_hash)
 		hash = skb->sk->sk_hash;
 	else
-		hash = skb->protocol;
+		hash = (__force u16) skb->protocol;
 
 	hash = jhash_1word(hash, hashrnd);
 
@@ -2253,8 +2253,8 @@ static int get_rps_cpu(struct net_device *dev, struct sk_buff *skb,
 
 		ip = (struct iphdr *) skb->data;
 		ip_proto = ip->protocol;
-		addr1 = ip->saddr;
-		addr2 = ip->daddr;
+		addr1 = (__force u32) ip->saddr;
+		addr2 = (__force u32) ip->daddr;
 		ihl = ip->ihl;
 		break;
 	case __constant_htons(ETH_P_IPV6):
@@ -2263,8 +2263,8 @@ static int get_rps_cpu(struct net_device *dev, struct sk_buff *skb,
 
 		ip6 = (struct ipv6hdr *) skb->data;
 		ip_proto = ip6->nexthdr;
-		addr1 = ip6->saddr.s6_addr32[3];
-		addr2 = ip6->daddr.s6_addr32[3];
+		addr1 = (__force u32) ip6->saddr.s6_addr32[3];
+		addr2 = (__force u32) ip6->daddr.s6_addr32[3];
 		ihl = (40 >> 2);
 		break;
 	default:
@@ -2279,14 +2279,25 @@ static int get_rps_cpu(struct net_device *dev, struct sk_buff *skb,
 	case IPPROTO_AH:
 	case IPPROTO_SCTP:
 	case IPPROTO_UDPLITE:
-		if (pskb_may_pull(skb, (ihl * 4) + 4))
-			ports = *((u32 *) (skb->data + (ihl * 4)));
+		if (pskb_may_pull(skb, (ihl * 4) + 4)) {
+			__be16 *hports = (__be16 *) (skb->data + (ihl * 4));
+			u32 sport, dport;
+
+			sport = (__force u16) hports[0];
+			dport = (__force u16) hports[1];
+			if (dport < sport)
+				swap(sport, dport);
+			ports = (sport << 16) + dport;
+		}
 		break;
 
 	default:
 		break;
 	}
 
+	/* get a consistent hash (same value on both flow directions) */
+	if (addr2 < addr1)
+		swap(addr1, addr2);
 	skb->rxhash = jhash_3words(addr1, addr2, ports, hashrnd);
 	if (!skb->rxhash)
 		skb->rxhash = 1;
-- 
cgit v1.2.2


From ab9304717f7624c41927f442e6b6d418b2d8b3e4 Mon Sep 17 00:00:00 2001
From: Jiri Pirko <jpirko@redhat.com>
Date: Tue, 20 Apr 2010 01:45:37 -0700
Subject: net: emphasize rtnl lock required in call_netdevice_notifiers

Since netdev_chain is guarded by rtnl_lock, ASSERT_RTNL should be
present here to make sure that all callers of call_netdevice_notifiers
does the locking properly.

Signed-off-by: Jiri Pirko <jpirko@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/dev.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'net')

diff --git a/net/core/dev.c b/net/core/dev.c
index 0d78e0454a6d..b31d5d69a467 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -1435,6 +1435,7 @@ EXPORT_SYMBOL(unregister_netdevice_notifier);
 
 int call_netdevice_notifiers(unsigned long val, struct net_device *dev)
 {
+	ASSERT_RTNL();
 	return raw_notifier_call_chain(&netdev_chain, val, dev);
 }
 
-- 
cgit v1.2.2


From 22265a5c3c103cf8c50be62e6c90d045eb649e6d Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Tue, 20 Apr 2010 15:07:32 +0200
Subject: netfilter: xt_TEE: resolve oif using netdevice notifiers

Replace the runtime oif name resolving by netdevice notifier based
resolving. When an oif is given, a netdevice notifier is registered
to resolve the name on NETDEV_REGISTER or NETDEV_CHANGE and unresolve
it again on NETDEV_UNREGISTER or NETDEV_CHANGE to a different name.

Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 net/netfilter/xt_TEE.c | 103 ++++++++++++++++++++++++++++++++++++++-----------
 1 file changed, 80 insertions(+), 23 deletions(-)

(limited to 'net')

diff --git a/net/netfilter/xt_TEE.c b/net/netfilter/xt_TEE.c
index 842e7012eca7..49da6c05f4e0 100644
--- a/net/netfilter/xt_TEE.c
+++ b/net/netfilter/xt_TEE.c
@@ -15,6 +15,7 @@
 #include <linux/percpu.h>
 #include <linux/route.h>
 #include <linux/skbuff.h>
+#include <linux/notifier.h>
 #include <net/checksum.h>
 #include <net/icmp.h>
 #include <net/ip.h>
@@ -32,6 +33,12 @@
 #	define WITH_IPV6 1
 #endif
 
+struct xt_tee_priv {
+	struct notifier_block	notifier;
+	struct xt_tee_tginfo	*tginfo;
+	int			oif;
+};
+
 static const union nf_inet_addr tee_zero_address;
 static DEFINE_PER_CPU(bool, tee_active);
 
@@ -49,20 +56,6 @@ static struct net *pick_net(struct sk_buff *skb)
 	return &init_net;
 }
 
-static bool tee_tg_route_oif(struct flowi *f, struct net *net,
-			     const struct xt_tee_tginfo *info)
-{
-	const struct net_device *dev;
-
-	if (*info->oif != '\0')
-		return true;
-	dev = dev_get_by_name(net, info->oif);
-	if (dev == NULL)
-		return false;
-	f->oif = dev->ifindex;
-	return true;
-}
-
 static bool
 tee_tg_route4(struct sk_buff *skb, const struct xt_tee_tginfo *info)
 {
@@ -72,8 +65,11 @@ tee_tg_route4(struct sk_buff *skb, const struct xt_tee_tginfo *info)
 	struct flowi fl;
 
 	memset(&fl, 0, sizeof(fl));
-	if (!tee_tg_route_oif(&fl, net, info))
-		return false;
+	if (info->priv) {
+		if (info->priv->oif == -1)
+			return false;
+		fl.oif = info->priv->oif;
+	}
 	fl.nl_u.ip4_u.daddr = info->gw.ip;
 	fl.nl_u.ip4_u.tos   = RT_TOS(iph->tos);
 	fl.nl_u.ip4_u.scope = RT_SCOPE_UNIVERSE;
@@ -149,8 +145,11 @@ tee_tg_route6(struct sk_buff *skb, const struct xt_tee_tginfo *info)
 	struct flowi fl;
 
 	memset(&fl, 0, sizeof(fl));
-	if (!tee_tg_route_oif(&fl, net, info))
-		return false;
+	if (info->priv) {
+		if (info->priv->oif == -1)
+			return false;
+		fl.oif = info->priv->oif;
+	}
 	fl.nl_u.ip6_u.daddr = info->gw.in6;
 	fl.nl_u.ip6_u.flowlabel = ((iph->flow_lbl[0] & 0xF) << 16) |
 				  (iph->flow_lbl[1] << 8) | iph->flow_lbl[2];
@@ -198,15 +197,71 @@ tee_tg6(struct sk_buff *skb, const struct xt_target_param *par)
 }
 #endif /* WITH_IPV6 */
 
+static int tee_netdev_event(struct notifier_block *this, unsigned long event,
+			    void *ptr)
+{
+	struct net_device *dev = ptr;
+	struct xt_tee_priv *priv;
+
+	priv = container_of(this, struct xt_tee_priv, notifier);
+	switch (event) {
+	case NETDEV_REGISTER:
+		if (!strcmp(dev->name, priv->tginfo->oif))
+			priv->oif = dev->ifindex;
+		break;
+	case NETDEV_UNREGISTER:
+		if (dev->ifindex == priv->oif)
+			priv->oif = -1;
+		break;
+	case NETDEV_CHANGENAME:
+		if (!strcmp(dev->name, priv->tginfo->oif))
+			priv->oif = dev->ifindex;
+		else if (dev->ifindex == priv->oif)
+			priv->oif = -1;
+		break;
+	}
+
+	return NOTIFY_DONE;
+}
+
 static int tee_tg_check(const struct xt_tgchk_param *par)
 {
-	const struct xt_tee_tginfo *info = par->targinfo;
+	struct xt_tee_tginfo *info = par->targinfo;
+	struct xt_tee_priv *priv;
 
-	if (info->oif[sizeof(info->oif)-1] != '\0')
-		return -EINVAL;
 	/* 0.0.0.0 and :: not allowed */
-	return (memcmp(&info->gw, &tee_zero_address,
-	       sizeof(tee_zero_address)) == 0) ? -EINVAL : 0;
+	if (memcmp(&info->gw, &tee_zero_address,
+		   sizeof(tee_zero_address)) == 0)
+		return -EINVAL;
+
+	if (info->oif[0]) {
+		if (info->oif[sizeof(info->oif)-1] != '\0')
+			return -EINVAL;
+
+		priv = kzalloc(sizeof(*priv), GFP_KERNEL);
+		if (priv == NULL)
+			return -ENOMEM;
+
+		priv->tginfo  = info;
+		priv->oif     = -1;
+		priv->notifier.notifier_call = tee_netdev_event;
+		info->priv    = priv;
+
+		register_netdevice_notifier(&priv->notifier);
+	} else
+		info->priv = NULL;
+
+	return 0;
+}
+
+static void tee_tg_destroy(const struct xt_tgdtor_param *par)
+{
+	struct xt_tee_tginfo *info = par->targinfo;
+
+	if (info->priv) {
+		unregister_netdevice_notifier(&info->priv->notifier);
+		kfree(info->priv);
+	}
 }
 
 static struct xt_target tee_tg_reg[] __read_mostly = {
@@ -217,6 +272,7 @@ static struct xt_target tee_tg_reg[] __read_mostly = {
 		.target     = tee_tg4,
 		.targetsize = sizeof(struct xt_tee_tginfo),
 		.checkentry = tee_tg_check,
+		.destroy    = tee_tg_destroy,
 		.me         = THIS_MODULE,
 	},
 #ifdef WITH_IPV6
@@ -227,6 +283,7 @@ static struct xt_target tee_tg_reg[] __read_mostly = {
 		.target     = tee_tg6,
 		.targetsize = sizeof(struct xt_tee_tginfo),
 		.checkentry = tee_tg_check,
+		.destroy    = tee_tg_destroy,
 		.me         = THIS_MODULE,
 	},
 #endif
-- 
cgit v1.2.2


From 6c79bf0f2440fd250c8fce8d9b82fcf03d4e8350 Mon Sep 17 00:00:00 2001
From: Bart De Schuymer <bdschuym@pandora.be>
Date: Tue, 20 Apr 2010 16:22:01 +0200
Subject: netfilter: bridge-netfilter: fix refragmenting IP traffic
 encapsulated in PPPoE traffic

The MTU for IP traffic encapsulated inside PPPoE traffic is smaller
than the MTU of the Ethernet device (1500). Connection tracking
gathers all IP packets and sometimes will refragment them in
ip_fragment(). We then need to subtract the length of the
encapsulating header from the mtu used in ip_fragment(). The check in
br_nf_dev_queue_xmit() which determines if ip_fragment() has to be
called is also updated for the PPPoE-encapsulated packets.
nf_bridge_copy_header() is also updated to make sure the PPPoE data
length field has the correct value.

Signed-off-by: Bart De Schuymer <bdschuym@pandora.be>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 net/bridge/br_netfilter.c | 2 +-
 net/ipv4/ip_output.c      | 4 ++++
 2 files changed, 5 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/bridge/br_netfilter.c b/net/bridge/br_netfilter.c
index 6b80ebc37667..93f80fefa496 100644
--- a/net/bridge/br_netfilter.c
+++ b/net/bridge/br_netfilter.c
@@ -745,7 +745,7 @@ static unsigned int br_nf_forward_arp(unsigned int hook, struct sk_buff *skb,
 static int br_nf_dev_queue_xmit(struct sk_buff *skb)
 {
 	if (skb->nfct != NULL && skb->protocol == htons(ETH_P_IP) &&
-	    skb->len > skb->dev->mtu &&
+	    skb->len + nf_bridge_mtu_reduction(skb) > skb->dev->mtu &&
 	    !skb_is_gso(skb))
 		return ip_fragment(skb, br_dev_queue_push_xmit);
 	else
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index b0b2e3059f11..d979710684b2 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -469,6 +469,10 @@ int ip_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *))
 
 	hlen = iph->ihl * 4;
 	mtu = dst_mtu(&rt->u.dst) - hlen;	/* Size of data space */
+#ifdef CONFIG_BRIDGE_NETFILTER
+	if (skb->nf_bridge)
+		mtu -= nf_bridge_mtu_reduction(skb);
+#endif
 	IPCB(skb)->flags |= IPSKB_FRAG_COMPLETE;
 
 	/* When frag_list is given, use it. First, check its validity:
-- 
cgit v1.2.2


From 03ceedea972a82d343fa5c2528b3952fa9e615d5 Mon Sep 17 00:00:00 2001
From: Daniel Yingqiang Ma <yma.cool@gmail.com>
Date: Tue, 13 Apr 2010 15:12:07 +0800
Subject: ath9k: Group Key fix for VAPs

When I set up multiple VAPs with ath9k, I encountered an issue that
the traffic may be lost after a while.

The detailed phenomenon is
1. After a while the clients connected to one of these VAPs will get
into a state that no broadcast/multicast packets can be transfered
successfully while the unicast packets can be transfered normally.
2. Minutes latter the unitcast packets transfer will fail as well,
because the ARP entry is expired and it can't be freshed due to the
broadcast trouble.

It's caused by the group key overwritten and someone discussed this
issue in ath9k-devel maillist before, but haven't work out a fix yet.

I referred the method in madwifi, and made a patch for ath9k.
The method is to set the high bit of the sender(AP)'s address, and
associated that mac and the group key. It requires the hardware
supports multicast frame key search. It seems true for AR9160.

Not sure whether it's the correct way to fix this issue. But it seems
to work in my test. The patch is attached, feel free to revise it.

Signed-off-by: Daniel Yingqiang ma <yma.cool@gmail.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/key.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'net')

diff --git a/net/mac80211/key.c b/net/mac80211/key.c
index 8160d9c5372e..75705bd41956 100644
--- a/net/mac80211/key.c
+++ b/net/mac80211/key.c
@@ -139,6 +139,7 @@ static void ieee80211_key_enable_hw_accel(struct ieee80211_key *key)
 				     struct ieee80211_sub_if_data,
 				     u.ap);
 
+	key->conf.ap_addr = sdata->dev->dev_addr;
 	ret = drv_set_key(key->local, SET_KEY, sdata, sta, &key->conf);
 
 	if (!ret) {
-- 
cgit v1.2.2


From 1289723ef238908ca8d95ff48a46ee0de970f882 Mon Sep 17 00:00:00 2001
From: Holger Schurig <holgerschurig@gmail.com>
Date: Mon, 19 Apr 2010 10:23:57 +0200
Subject: mac80211: sample survey implementation for mac80211 & hwsim

This adds the survey function to both mac80211 itself and to mac80211_hwsim.
For the latter driver, we simply invent some noise level.A real driver which
cannot determine the real channel noise MUST NOT report any noise, especially
not a magically conjured one :-)

Signed-off-by: Holger Schurig <holgerschurig@gmail.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/cfg.c        | 12 ++++++++++++
 net/mac80211/driver-ops.h |  9 +++++++++
 2 files changed, 21 insertions(+)

(limited to 'net')

diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c
index 4edd73cbf052..f97dda735cbb 100644
--- a/net/mac80211/cfg.c
+++ b/net/mac80211/cfg.c
@@ -410,6 +410,17 @@ static int ieee80211_dump_station(struct wiphy *wiphy, struct net_device *dev,
 	return ret;
 }
 
+static int ieee80211_dump_survey(struct wiphy *wiphy, struct net_device *dev,
+				 int idx, struct survey_info *survey)
+{
+	struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr);
+
+	if (!local->ops->get_survey)
+		return -EOPNOTSUPP;
+
+	return drv_get_survey(local, idx, survey);
+}
+
 static int ieee80211_get_station(struct wiphy *wiphy, struct net_device *dev,
 				 u8 *mac, struct station_info *sinfo)
 {
@@ -1507,6 +1518,7 @@ struct cfg80211_ops mac80211_config_ops = {
 	.change_station = ieee80211_change_station,
 	.get_station = ieee80211_get_station,
 	.dump_station = ieee80211_dump_station,
+	.dump_survey = ieee80211_dump_survey,
 #ifdef CONFIG_MAC80211_MESH
 	.add_mpath = ieee80211_add_mpath,
 	.del_mpath = ieee80211_del_mpath,
diff --git a/net/mac80211/driver-ops.h b/net/mac80211/driver-ops.h
index c3d844093a2f..d1f8a7c2225a 100644
--- a/net/mac80211/driver-ops.h
+++ b/net/mac80211/driver-ops.h
@@ -346,6 +346,15 @@ static inline int drv_ampdu_action(struct ieee80211_local *local,
 	return ret;
 }
 
+static inline int drv_get_survey(struct ieee80211_local *local, int idx,
+				struct survey_info *survey)
+{
+	int ret = -EOPNOTSUPP;
+	if (local->ops->conf_tx)
+		ret = local->ops->get_survey(&local->hw, idx, survey);
+	/* trace_drv_get_survey(local, idx, survey, ret); */
+	return ret;
+}
 
 static inline void drv_rfkill_poll(struct ieee80211_local *local)
 {
-- 
cgit v1.2.2


From 7bdfcaaff5de368a88a4f784f7283b66c17d051d Mon Sep 17 00:00:00 2001
From: Juuso Oikarinen <juuso.oikarinen@nokia.com>
Date: Tue, 20 Apr 2010 13:15:56 +0300
Subject: mac80211: Fix ieee80211_sta_conn_mon_timer with hw connection
 monitoring

When IEEE80211_HW_CONNECTION_MONITOR is configured by the driver, starting
of ieee80211_sta_conn_mon_timer should be prevented, as it is then not needed.

This is currently partially the case. As it seems, when a probe-response is
received from the AP the timer is still restarted, thus restarting the host
based connection keep-alive mechanism. These probe-responses happen at least
when scanning while associated.

Fix this by preventing starting of the ieee80211_sta_conn_mon_timer in the
ieee80211_rx_mgmt_probe_resp function.

Signed-off-by: Juuso Oikarinen <juuso.oikarinen@nokia.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/mlme.c | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'net')

diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index d11a54c289a2..d811e3fa1d75 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -1330,12 +1330,17 @@ static void ieee80211_rx_mgmt_probe_resp(struct ieee80211_sub_if_data *sdata,
 		mutex_lock(&sdata->local->iflist_mtx);
 		ieee80211_recalc_ps(sdata->local, -1);
 		mutex_unlock(&sdata->local->iflist_mtx);
+
+		if (sdata->local->hw.flags & IEEE80211_HW_CONNECTION_MONITOR)
+			return;
+
 		/*
 		 * We've received a probe response, but are not sure whether
 		 * we have or will be receiving any beacons or data, so let's
 		 * schedule the timers again, just in case.
 		 */
 		mod_beacon_timer(sdata);
+
 		mod_timer(&ifmgd->conn_mon_timer,
 			  round_jiffies_up(jiffies +
 					   IEEE80211_CONNECTION_IDLE_TIME));
-- 
cgit v1.2.2


From aa395145165cb06a0d0885221bbe0ce4a564391d Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Tue, 20 Apr 2010 13:03:51 +0000
Subject: net: sk_sleep() helper

Define a new function to return the waitqueue of a "struct sock".

static inline wait_queue_head_t *sk_sleep(struct sock *sk)
{
	return sk->sk_sleep;
}

Change all read occurrences of sk_sleep by a call to this function.

Needed for a future RCU conversion. sk_sleep wont be a field directly
available.

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/atm/common.c                | 12 ++++----
 net/atm/signaling.c             |  2 +-
 net/atm/svc.c                   | 62 ++++++++++++++++++++---------------------
 net/ax25/af_ax25.c              |  8 +++---
 net/bluetooth/af_bluetooth.c    |  6 ++--
 net/bluetooth/bnep/core.c       |  8 +++---
 net/bluetooth/bnep/netdev.c     |  6 ++--
 net/bluetooth/cmtp/cmtp.h       |  2 +-
 net/bluetooth/cmtp/core.c       |  4 +--
 net/bluetooth/hidp/core.c       | 10 +++----
 net/bluetooth/hidp/hidp.h       |  4 +--
 net/bluetooth/l2cap.c           |  4 +--
 net/bluetooth/rfcomm/sock.c     |  8 +++---
 net/bluetooth/sco.c             |  4 +--
 net/caif/caif_socket.c          |  2 +-
 net/core/datagram.c             |  6 ++--
 net/core/sock.c                 | 16 +++++------
 net/core/stream.c               | 16 +++++------
 net/dccp/output.c               |  6 ++--
 net/dccp/proto.c                |  2 +-
 net/decnet/af_decnet.c          | 26 ++++++++---------
 net/ipv4/af_inet.c              |  6 ++--
 net/ipv4/inet_connection_sock.c |  4 +--
 net/ipv4/tcp.c                  |  2 +-
 net/irda/af_irda.c              | 14 +++++-----
 net/iucv/af_iucv.c              | 12 ++++----
 net/llc/af_llc.c                | 12 ++++----
 net/netfilter/ipvs/ip_vs_sync.c |  2 +-
 net/netrom/af_netrom.c          |  8 +++---
 net/rds/af_rds.c                |  2 +-
 net/rds/rds.h                   |  2 +-
 net/rds/recv.c                  |  2 +-
 net/rds/send.c                  |  2 +-
 net/rose/af_rose.c              |  8 +++---
 net/rxrpc/af_rxrpc.c            |  4 +--
 net/sctp/socket.c               | 20 ++++++-------
 net/sunrpc/svcsock.c            | 24 ++++++++--------
 net/tipc/socket.c               | 26 ++++++++---------
 net/unix/af_unix.c              | 10 +++----
 net/x25/af_x25.c                |  8 +++---
 40 files changed, 191 insertions(+), 191 deletions(-)

(limited to 'net')

diff --git a/net/atm/common.c b/net/atm/common.c
index 97ed94aa0cbc..e3e10e6f8628 100644
--- a/net/atm/common.c
+++ b/net/atm/common.c
@@ -92,7 +92,7 @@ static void vcc_def_wakeup(struct sock *sk)
 {
 	read_lock(&sk->sk_callback_lock);
 	if (sk_has_sleeper(sk))
-		wake_up(sk->sk_sleep);
+		wake_up(sk_sleep(sk));
 	read_unlock(&sk->sk_callback_lock);
 }
 
@@ -110,7 +110,7 @@ static void vcc_write_space(struct sock *sk)
 
 	if (vcc_writable(sk)) {
 		if (sk_has_sleeper(sk))
-			wake_up_interruptible(sk->sk_sleep);
+			wake_up_interruptible(sk_sleep(sk));
 
 		sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT);
 	}
@@ -549,7 +549,7 @@ int vcc_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *m,
 	}
 
 	eff = (size+3) & ~3; /* align to word boundary */
-	prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE);
+	prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
 	error = 0;
 	while (!(skb = alloc_tx(vcc, eff))) {
 		if (m->msg_flags & MSG_DONTWAIT) {
@@ -568,9 +568,9 @@ int vcc_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *m,
 			send_sig(SIGPIPE, current, 0);
 			break;
 		}
-		prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE);
+		prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
 	}
-	finish_wait(sk->sk_sleep, &wait);
+	finish_wait(sk_sleep(sk), &wait);
 	if (error)
 		goto out;
 	skb->dev = NULL; /* for paths shared with net_device interfaces */
@@ -595,7 +595,7 @@ unsigned int vcc_poll(struct file *file, struct socket *sock, poll_table *wait)
 	struct atm_vcc *vcc;
 	unsigned int mask;
 
-	sock_poll_wait(file, sk->sk_sleep, wait);
+	sock_poll_wait(file, sk_sleep(sk), wait);
 	mask = 0;
 
 	vcc = ATM_SD(sock);
diff --git a/net/atm/signaling.c b/net/atm/signaling.c
index 6ba6e466ee54..509c8ac02b63 100644
--- a/net/atm/signaling.c
+++ b/net/atm/signaling.c
@@ -131,7 +131,7 @@ static int sigd_send(struct atm_vcc *vcc, struct sk_buff *skb)
 		}
 		sk->sk_ack_backlog++;
 		skb_queue_tail(&sk->sk_receive_queue, skb);
-		pr_debug("waking sk->sk_sleep 0x%p\n", sk->sk_sleep);
+		pr_debug("waking sk_sleep(sk) 0x%p\n", sk_sleep(sk));
 		sk->sk_state_change(sk);
 as_indicate_complete:
 		release_sock(sk);
diff --git a/net/atm/svc.c b/net/atm/svc.c
index 3ba9a45a51ac..754ee4791d96 100644
--- a/net/atm/svc.c
+++ b/net/atm/svc.c
@@ -49,14 +49,14 @@ static void svc_disconnect(struct atm_vcc *vcc)
 
 	pr_debug("%p\n", vcc);
 	if (test_bit(ATM_VF_REGIS, &vcc->flags)) {
-		prepare_to_wait(sk->sk_sleep, &wait, TASK_UNINTERRUPTIBLE);
+		prepare_to_wait(sk_sleep(sk), &wait, TASK_UNINTERRUPTIBLE);
 		sigd_enq(vcc, as_close, NULL, NULL, NULL);
 		while (!test_bit(ATM_VF_RELEASED, &vcc->flags) && sigd) {
 			schedule();
-			prepare_to_wait(sk->sk_sleep, &wait,
+			prepare_to_wait(sk_sleep(sk), &wait,
 					TASK_UNINTERRUPTIBLE);
 		}
-		finish_wait(sk->sk_sleep, &wait);
+		finish_wait(sk_sleep(sk), &wait);
 	}
 	/* beware - socket is still in use by atmsigd until the last
 	   as_indicate has been answered */
@@ -125,13 +125,13 @@ static int svc_bind(struct socket *sock, struct sockaddr *sockaddr,
 	}
 	vcc->local = *addr;
 	set_bit(ATM_VF_WAITING, &vcc->flags);
-	prepare_to_wait(sk->sk_sleep, &wait, TASK_UNINTERRUPTIBLE);
+	prepare_to_wait(sk_sleep(sk), &wait, TASK_UNINTERRUPTIBLE);
 	sigd_enq(vcc, as_bind, NULL, NULL, &vcc->local);
 	while (test_bit(ATM_VF_WAITING, &vcc->flags) && sigd) {
 		schedule();
-		prepare_to_wait(sk->sk_sleep, &wait, TASK_UNINTERRUPTIBLE);
+		prepare_to_wait(sk_sleep(sk), &wait, TASK_UNINTERRUPTIBLE);
 	}
-	finish_wait(sk->sk_sleep, &wait);
+	finish_wait(sk_sleep(sk), &wait);
 	clear_bit(ATM_VF_REGIS, &vcc->flags); /* doesn't count */
 	if (!sigd) {
 		error = -EUNATCH;
@@ -201,10 +201,10 @@ static int svc_connect(struct socket *sock, struct sockaddr *sockaddr,
 		}
 		vcc->remote = *addr;
 		set_bit(ATM_VF_WAITING, &vcc->flags);
-		prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE);
+		prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
 		sigd_enq(vcc, as_connect, NULL, NULL, &vcc->remote);
 		if (flags & O_NONBLOCK) {
-			finish_wait(sk->sk_sleep, &wait);
+			finish_wait(sk_sleep(sk), &wait);
 			sock->state = SS_CONNECTING;
 			error = -EINPROGRESS;
 			goto out;
@@ -213,7 +213,7 @@ static int svc_connect(struct socket *sock, struct sockaddr *sockaddr,
 		while (test_bit(ATM_VF_WAITING, &vcc->flags) && sigd) {
 			schedule();
 			if (!signal_pending(current)) {
-				prepare_to_wait(sk->sk_sleep, &wait,
+				prepare_to_wait(sk_sleep(sk), &wait,
 						TASK_INTERRUPTIBLE);
 				continue;
 			}
@@ -232,14 +232,14 @@ static int svc_connect(struct socket *sock, struct sockaddr *sockaddr,
 			 */
 			sigd_enq(vcc, as_close, NULL, NULL, NULL);
 			while (test_bit(ATM_VF_WAITING, &vcc->flags) && sigd) {
-				prepare_to_wait(sk->sk_sleep, &wait,
+				prepare_to_wait(sk_sleep(sk), &wait,
 						TASK_INTERRUPTIBLE);
 				schedule();
 			}
 			if (!sk->sk_err)
 				while (!test_bit(ATM_VF_RELEASED, &vcc->flags) &&
 				       sigd) {
-					prepare_to_wait(sk->sk_sleep, &wait,
+					prepare_to_wait(sk_sleep(sk), &wait,
 							TASK_INTERRUPTIBLE);
 					schedule();
 				}
@@ -250,7 +250,7 @@ static int svc_connect(struct socket *sock, struct sockaddr *sockaddr,
 			error = -EINTR;
 			break;
 		}
-		finish_wait(sk->sk_sleep, &wait);
+		finish_wait(sk_sleep(sk), &wait);
 		if (error)
 			goto out;
 		if (!sigd) {
@@ -302,13 +302,13 @@ static int svc_listen(struct socket *sock, int backlog)
 		goto out;
 	}
 	set_bit(ATM_VF_WAITING, &vcc->flags);
-	prepare_to_wait(sk->sk_sleep, &wait, TASK_UNINTERRUPTIBLE);
+	prepare_to_wait(sk_sleep(sk), &wait, TASK_UNINTERRUPTIBLE);
 	sigd_enq(vcc, as_listen, NULL, NULL, &vcc->local);
 	while (test_bit(ATM_VF_WAITING, &vcc->flags) && sigd) {
 		schedule();
-		prepare_to_wait(sk->sk_sleep, &wait, TASK_UNINTERRUPTIBLE);
+		prepare_to_wait(sk_sleep(sk), &wait, TASK_UNINTERRUPTIBLE);
 	}
-	finish_wait(sk->sk_sleep, &wait);
+	finish_wait(sk_sleep(sk), &wait);
 	if (!sigd) {
 		error = -EUNATCH;
 		goto out;
@@ -343,7 +343,7 @@ static int svc_accept(struct socket *sock, struct socket *newsock, int flags)
 	while (1) {
 		DEFINE_WAIT(wait);
 
-		prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE);
+		prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
 		while (!(skb = skb_dequeue(&sk->sk_receive_queue)) &&
 		       sigd) {
 			if (test_bit(ATM_VF_RELEASED, &old_vcc->flags))
@@ -363,10 +363,10 @@ static int svc_accept(struct socket *sock, struct socket *newsock, int flags)
 				error = -ERESTARTSYS;
 				break;
 			}
-			prepare_to_wait(sk->sk_sleep, &wait,
+			prepare_to_wait(sk_sleep(sk), &wait,
 					TASK_INTERRUPTIBLE);
 		}
-		finish_wait(sk->sk_sleep, &wait);
+		finish_wait(sk_sleep(sk), &wait);
 		if (error)
 			goto out;
 		if (!skb) {
@@ -392,17 +392,17 @@ static int svc_accept(struct socket *sock, struct socket *newsock, int flags)
 		}
 		/* wait should be short, so we ignore the non-blocking flag */
 		set_bit(ATM_VF_WAITING, &new_vcc->flags);
-		prepare_to_wait(sk_atm(new_vcc)->sk_sleep, &wait,
+		prepare_to_wait(sk_sleep(sk_atm(new_vcc)), &wait,
 				TASK_UNINTERRUPTIBLE);
 		sigd_enq(new_vcc, as_accept, old_vcc, NULL, NULL);
 		while (test_bit(ATM_VF_WAITING, &new_vcc->flags) && sigd) {
 			release_sock(sk);
 			schedule();
 			lock_sock(sk);
-			prepare_to_wait(sk_atm(new_vcc)->sk_sleep, &wait,
+			prepare_to_wait(sk_sleep(sk_atm(new_vcc)), &wait,
 					TASK_UNINTERRUPTIBLE);
 		}
-		finish_wait(sk_atm(new_vcc)->sk_sleep, &wait);
+		finish_wait(sk_sleep(sk_atm(new_vcc)), &wait);
 		if (!sigd) {
 			error = -EUNATCH;
 			goto out;
@@ -438,14 +438,14 @@ int svc_change_qos(struct atm_vcc *vcc, struct atm_qos *qos)
 	DEFINE_WAIT(wait);
 
 	set_bit(ATM_VF_WAITING, &vcc->flags);
-	prepare_to_wait(sk->sk_sleep, &wait, TASK_UNINTERRUPTIBLE);
+	prepare_to_wait(sk_sleep(sk), &wait, TASK_UNINTERRUPTIBLE);
 	sigd_enq2(vcc, as_modify, NULL, NULL, &vcc->local, qos, 0);
 	while (test_bit(ATM_VF_WAITING, &vcc->flags) &&
 	       !test_bit(ATM_VF_RELEASED, &vcc->flags) && sigd) {
 		schedule();
-		prepare_to_wait(sk->sk_sleep, &wait, TASK_UNINTERRUPTIBLE);
+		prepare_to_wait(sk_sleep(sk), &wait, TASK_UNINTERRUPTIBLE);
 	}
-	finish_wait(sk->sk_sleep, &wait);
+	finish_wait(sk_sleep(sk), &wait);
 	if (!sigd)
 		return -EUNATCH;
 	return -sk->sk_err;
@@ -534,20 +534,20 @@ static int svc_addparty(struct socket *sock, struct sockaddr *sockaddr,
 
 	lock_sock(sk);
 	set_bit(ATM_VF_WAITING, &vcc->flags);
-	prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE);
+	prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
 	sigd_enq(vcc, as_addparty, NULL, NULL,
 		 (struct sockaddr_atmsvc *) sockaddr);
 	if (flags & O_NONBLOCK) {
-		finish_wait(sk->sk_sleep, &wait);
+		finish_wait(sk_sleep(sk), &wait);
 		error = -EINPROGRESS;
 		goto out;
 	}
 	pr_debug("added wait queue\n");
 	while (test_bit(ATM_VF_WAITING, &vcc->flags) && sigd) {
 		schedule();
-		prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE);
+		prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
 	}
-	finish_wait(sk->sk_sleep, &wait);
+	finish_wait(sk_sleep(sk), &wait);
 	error = xchg(&sk->sk_err_soft, 0);
 out:
 	release_sock(sk);
@@ -563,13 +563,13 @@ static int svc_dropparty(struct socket *sock, int ep_ref)
 
 	lock_sock(sk);
 	set_bit(ATM_VF_WAITING, &vcc->flags);
-	prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE);
+	prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
 	sigd_enq2(vcc, as_dropparty, NULL, NULL, NULL, NULL, ep_ref);
 	while (test_bit(ATM_VF_WAITING, &vcc->flags) && sigd) {
 		schedule();
-		prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE);
+		prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
 	}
-	finish_wait(sk->sk_sleep, &wait);
+	finish_wait(sk_sleep(sk), &wait);
 	if (!sigd) {
 		error = -EUNATCH;
 		goto out;
diff --git a/net/ax25/af_ax25.c b/net/ax25/af_ax25.c
index 65c5801261f9..cfdfd7e2a172 100644
--- a/net/ax25/af_ax25.c
+++ b/net/ax25/af_ax25.c
@@ -1281,7 +1281,7 @@ static int __must_check ax25_connect(struct socket *sock,
 		DEFINE_WAIT(wait);
 
 		for (;;) {
-			prepare_to_wait(sk->sk_sleep, &wait,
+			prepare_to_wait(sk_sleep(sk), &wait,
 					TASK_INTERRUPTIBLE);
 			if (sk->sk_state != TCP_SYN_SENT)
 				break;
@@ -1294,7 +1294,7 @@ static int __must_check ax25_connect(struct socket *sock,
 			err = -ERESTARTSYS;
 			break;
 		}
-		finish_wait(sk->sk_sleep, &wait);
+		finish_wait(sk_sleep(sk), &wait);
 
 		if (err)
 			goto out_release;
@@ -1346,7 +1346,7 @@ static int ax25_accept(struct socket *sock, struct socket *newsock, int flags)
 	 *	hooked into the SABM we saved
 	 */
 	for (;;) {
-		prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE);
+		prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
 		skb = skb_dequeue(&sk->sk_receive_queue);
 		if (skb)
 			break;
@@ -1364,7 +1364,7 @@ static int ax25_accept(struct socket *sock, struct socket *newsock, int flags)
 		err = -ERESTARTSYS;
 		break;
 	}
-	finish_wait(sk->sk_sleep, &wait);
+	finish_wait(sk_sleep(sk), &wait);
 
 	if (err)
 		goto out;
diff --git a/net/bluetooth/af_bluetooth.c b/net/bluetooth/af_bluetooth.c
index 404a8500fd03..421c45bd1b95 100644
--- a/net/bluetooth/af_bluetooth.c
+++ b/net/bluetooth/af_bluetooth.c
@@ -288,7 +288,7 @@ unsigned int bt_sock_poll(struct file * file, struct socket *sock, poll_table *w
 
 	BT_DBG("sock %p, sk %p", sock, sk);
 
-	poll_wait(file, sk->sk_sleep, wait);
+	poll_wait(file, sk_sleep(sk), wait);
 
 	if (sk->sk_state == BT_LISTEN)
 		return bt_accept_poll(sk);
@@ -378,7 +378,7 @@ int bt_sock_wait_state(struct sock *sk, int state, unsigned long timeo)
 
 	BT_DBG("sk %p", sk);
 
-	add_wait_queue(sk->sk_sleep, &wait);
+	add_wait_queue(sk_sleep(sk), &wait);
 	while (sk->sk_state != state) {
 		set_current_state(TASK_INTERRUPTIBLE);
 
@@ -401,7 +401,7 @@ int bt_sock_wait_state(struct sock *sk, int state, unsigned long timeo)
 			break;
 	}
 	set_current_state(TASK_RUNNING);
-	remove_wait_queue(sk->sk_sleep, &wait);
+	remove_wait_queue(sk_sleep(sk), &wait);
 	return err;
 }
 EXPORT_SYMBOL(bt_sock_wait_state);
diff --git a/net/bluetooth/bnep/core.c b/net/bluetooth/bnep/core.c
index 8062dad6d10d..f10b41fb05a0 100644
--- a/net/bluetooth/bnep/core.c
+++ b/net/bluetooth/bnep/core.c
@@ -474,7 +474,7 @@ static int bnep_session(void *arg)
 	set_user_nice(current, -15);
 
 	init_waitqueue_entry(&wait, current);
-	add_wait_queue(sk->sk_sleep, &wait);
+	add_wait_queue(sk_sleep(sk), &wait);
 	while (!atomic_read(&s->killed)) {
 		set_current_state(TASK_INTERRUPTIBLE);
 
@@ -496,7 +496,7 @@ static int bnep_session(void *arg)
 		schedule();
 	}
 	set_current_state(TASK_RUNNING);
-	remove_wait_queue(sk->sk_sleep, &wait);
+	remove_wait_queue(sk_sleep(sk), &wait);
 
 	/* Cleanup session */
 	down_write(&bnep_session_sem);
@@ -507,7 +507,7 @@ static int bnep_session(void *arg)
 	/* Wakeup user-space polling for socket errors */
 	s->sock->sk->sk_err = EUNATCH;
 
-	wake_up_interruptible(s->sock->sk->sk_sleep);
+	wake_up_interruptible(sk_sleep(s->sock->sk));
 
 	/* Release the socket */
 	fput(s->sock->file);
@@ -638,7 +638,7 @@ int bnep_del_connection(struct bnep_conndel_req *req)
 
 		/* Kill session thread */
 		atomic_inc(&s->killed);
-		wake_up_interruptible(s->sock->sk->sk_sleep);
+		wake_up_interruptible(sk_sleep(s->sock->sk));
 	} else
 		err = -ENOENT;
 
diff --git a/net/bluetooth/bnep/netdev.c b/net/bluetooth/bnep/netdev.c
index d48b33f4d4ba..0faad5ce6dc4 100644
--- a/net/bluetooth/bnep/netdev.c
+++ b/net/bluetooth/bnep/netdev.c
@@ -109,7 +109,7 @@ static void bnep_net_set_mc_list(struct net_device *dev)
 	}
 
 	skb_queue_tail(&sk->sk_write_queue, skb);
-	wake_up_interruptible(sk->sk_sleep);
+	wake_up_interruptible(sk_sleep(sk));
 #endif
 }
 
@@ -193,11 +193,11 @@ static netdev_tx_t bnep_net_xmit(struct sk_buff *skb,
 	/*
 	 * We cannot send L2CAP packets from here as we are potentially in a bh.
 	 * So we have to queue them and wake up session thread which is sleeping
-	 * on the sk->sk_sleep.
+	 * on the sk_sleep(sk).
 	 */
 	dev->trans_start = jiffies;
 	skb_queue_tail(&sk->sk_write_queue, skb);
-	wake_up_interruptible(sk->sk_sleep);
+	wake_up_interruptible(sk_sleep(sk));
 
 	if (skb_queue_len(&sk->sk_write_queue) >= BNEP_TX_QUEUE_LEN) {
 		BT_DBG("tx queue is full");
diff --git a/net/bluetooth/cmtp/cmtp.h b/net/bluetooth/cmtp/cmtp.h
index e4663aa14d26..785e79e953c5 100644
--- a/net/bluetooth/cmtp/cmtp.h
+++ b/net/bluetooth/cmtp/cmtp.h
@@ -125,7 +125,7 @@ static inline void cmtp_schedule(struct cmtp_session *session)
 {
 	struct sock *sk = session->sock->sk;
 
-	wake_up_interruptible(sk->sk_sleep);
+	wake_up_interruptible(sk_sleep(sk));
 }
 
 /* CMTP init defines */
diff --git a/net/bluetooth/cmtp/core.c b/net/bluetooth/cmtp/core.c
index 0073ec8495da..d4c6af082d48 100644
--- a/net/bluetooth/cmtp/core.c
+++ b/net/bluetooth/cmtp/core.c
@@ -284,7 +284,7 @@ static int cmtp_session(void *arg)
 	set_user_nice(current, -15);
 
 	init_waitqueue_entry(&wait, current);
-	add_wait_queue(sk->sk_sleep, &wait);
+	add_wait_queue(sk_sleep(sk), &wait);
 	while (!atomic_read(&session->terminate)) {
 		set_current_state(TASK_INTERRUPTIBLE);
 
@@ -301,7 +301,7 @@ static int cmtp_session(void *arg)
 		schedule();
 	}
 	set_current_state(TASK_RUNNING);
-	remove_wait_queue(sk->sk_sleep, &wait);
+	remove_wait_queue(sk_sleep(sk), &wait);
 
 	down_write(&cmtp_session_sem);
 
diff --git a/net/bluetooth/hidp/core.c b/net/bluetooth/hidp/core.c
index 280529ad9274..bfe641b7dfaf 100644
--- a/net/bluetooth/hidp/core.c
+++ b/net/bluetooth/hidp/core.c
@@ -561,8 +561,8 @@ static int hidp_session(void *arg)
 
 	init_waitqueue_entry(&ctrl_wait, current);
 	init_waitqueue_entry(&intr_wait, current);
-	add_wait_queue(ctrl_sk->sk_sleep, &ctrl_wait);
-	add_wait_queue(intr_sk->sk_sleep, &intr_wait);
+	add_wait_queue(sk_sleep(ctrl_sk), &ctrl_wait);
+	add_wait_queue(sk_sleep(intr_sk), &intr_wait);
 	while (!atomic_read(&session->terminate)) {
 		set_current_state(TASK_INTERRUPTIBLE);
 
@@ -584,8 +584,8 @@ static int hidp_session(void *arg)
 		schedule();
 	}
 	set_current_state(TASK_RUNNING);
-	remove_wait_queue(intr_sk->sk_sleep, &intr_wait);
-	remove_wait_queue(ctrl_sk->sk_sleep, &ctrl_wait);
+	remove_wait_queue(sk_sleep(intr_sk), &intr_wait);
+	remove_wait_queue(sk_sleep(ctrl_sk), &ctrl_wait);
 
 	down_write(&hidp_session_sem);
 
@@ -609,7 +609,7 @@ static int hidp_session(void *arg)
 
 	fput(session->intr_sock->file);
 
-	wait_event_timeout(*(ctrl_sk->sk_sleep),
+	wait_event_timeout(*(sk_sleep(ctrl_sk)),
 		(ctrl_sk->sk_state == BT_CLOSED), msecs_to_jiffies(500));
 
 	fput(session->ctrl_sock->file);
diff --git a/net/bluetooth/hidp/hidp.h b/net/bluetooth/hidp/hidp.h
index a4e215d50c10..8d934a19da0a 100644
--- a/net/bluetooth/hidp/hidp.h
+++ b/net/bluetooth/hidp/hidp.h
@@ -164,8 +164,8 @@ static inline void hidp_schedule(struct hidp_session *session)
 	struct sock *ctrl_sk = session->ctrl_sock->sk;
 	struct sock *intr_sk = session->intr_sock->sk;
 
-	wake_up_interruptible(ctrl_sk->sk_sleep);
-	wake_up_interruptible(intr_sk->sk_sleep);
+	wake_up_interruptible(sk_sleep(ctrl_sk));
+	wake_up_interruptible(sk_sleep(intr_sk));
 }
 
 /* HIDP init defines */
diff --git a/net/bluetooth/l2cap.c b/net/bluetooth/l2cap.c
index 99d68c34e4f1..c1e60eed5a97 100644
--- a/net/bluetooth/l2cap.c
+++ b/net/bluetooth/l2cap.c
@@ -1147,7 +1147,7 @@ static int l2cap_sock_accept(struct socket *sock, struct socket *newsock, int fl
 	BT_DBG("sk %p timeo %ld", sk, timeo);
 
 	/* Wait for an incoming connection. (wake-one). */
-	add_wait_queue_exclusive(sk->sk_sleep, &wait);
+	add_wait_queue_exclusive(sk_sleep(sk), &wait);
 	while (!(nsk = bt_accept_dequeue(sk, newsock))) {
 		set_current_state(TASK_INTERRUPTIBLE);
 		if (!timeo) {
@@ -1170,7 +1170,7 @@ static int l2cap_sock_accept(struct socket *sock, struct socket *newsock, int fl
 		}
 	}
 	set_current_state(TASK_RUNNING);
-	remove_wait_queue(sk->sk_sleep, &wait);
+	remove_wait_queue(sk_sleep(sk), &wait);
 
 	if (err)
 		goto done;
diff --git a/net/bluetooth/rfcomm/sock.c b/net/bluetooth/rfcomm/sock.c
index 8ed3c37684fa..43fbf6b4b4bf 100644
--- a/net/bluetooth/rfcomm/sock.c
+++ b/net/bluetooth/rfcomm/sock.c
@@ -503,7 +503,7 @@ static int rfcomm_sock_accept(struct socket *sock, struct socket *newsock, int f
 	BT_DBG("sk %p timeo %ld", sk, timeo);
 
 	/* Wait for an incoming connection. (wake-one). */
-	add_wait_queue_exclusive(sk->sk_sleep, &wait);
+	add_wait_queue_exclusive(sk_sleep(sk), &wait);
 	while (!(nsk = bt_accept_dequeue(sk, newsock))) {
 		set_current_state(TASK_INTERRUPTIBLE);
 		if (!timeo) {
@@ -526,7 +526,7 @@ static int rfcomm_sock_accept(struct socket *sock, struct socket *newsock, int f
 		}
 	}
 	set_current_state(TASK_RUNNING);
-	remove_wait_queue(sk->sk_sleep, &wait);
+	remove_wait_queue(sk_sleep(sk), &wait);
 
 	if (err)
 		goto done;
@@ -621,7 +621,7 @@ static long rfcomm_sock_data_wait(struct sock *sk, long timeo)
 {
 	DECLARE_WAITQUEUE(wait, current);
 
-	add_wait_queue(sk->sk_sleep, &wait);
+	add_wait_queue(sk_sleep(sk), &wait);
 	for (;;) {
 		set_current_state(TASK_INTERRUPTIBLE);
 
@@ -640,7 +640,7 @@ static long rfcomm_sock_data_wait(struct sock *sk, long timeo)
 	}
 
 	__set_current_state(TASK_RUNNING);
-	remove_wait_queue(sk->sk_sleep, &wait);
+	remove_wait_queue(sk_sleep(sk), &wait);
 	return timeo;
 }
 
diff --git a/net/bluetooth/sco.c b/net/bluetooth/sco.c
index ca6b2ad1c3fc..b406d3eff53a 100644
--- a/net/bluetooth/sco.c
+++ b/net/bluetooth/sco.c
@@ -567,7 +567,7 @@ static int sco_sock_accept(struct socket *sock, struct socket *newsock, int flag
 	BT_DBG("sk %p timeo %ld", sk, timeo);
 
 	/* Wait for an incoming connection. (wake-one). */
-	add_wait_queue_exclusive(sk->sk_sleep, &wait);
+	add_wait_queue_exclusive(sk_sleep(sk), &wait);
 	while (!(ch = bt_accept_dequeue(sk, newsock))) {
 		set_current_state(TASK_INTERRUPTIBLE);
 		if (!timeo) {
@@ -590,7 +590,7 @@ static int sco_sock_accept(struct socket *sock, struct socket *newsock, int flag
 		}
 	}
 	set_current_state(TASK_RUNNING);
-	remove_wait_queue(sk->sk_sleep, &wait);
+	remove_wait_queue(sk_sleep(sk), &wait);
 
 	if (err)
 		goto done;
diff --git a/net/caif/caif_socket.c b/net/caif/caif_socket.c
index cdf62b9fefac..90317e7d10b4 100644
--- a/net/caif/caif_socket.c
+++ b/net/caif/caif_socket.c
@@ -689,7 +689,7 @@ static unsigned int caif_poll(struct file *file, struct socket *sock,
 	struct sock *sk = sock->sk;
 	struct caifsock *cf_sk = container_of(sk, struct caifsock, sk);
 	u32 mask = 0;
-	poll_wait(file, sk->sk_sleep, wait);
+	poll_wait(file, sk_sleep(sk), wait);
 	lock_sock(&(cf_sk->sk));
 	if (!STATE_IS_OPEN(cf_sk)) {
 		if (!STATE_IS_PENDING(cf_sk))
diff --git a/net/core/datagram.c b/net/core/datagram.c
index 2dccd4ee591b..5574a5ddf908 100644
--- a/net/core/datagram.c
+++ b/net/core/datagram.c
@@ -86,7 +86,7 @@ static int wait_for_packet(struct sock *sk, int *err, long *timeo_p)
 	int error;
 	DEFINE_WAIT_FUNC(wait, receiver_wake_function);
 
-	prepare_to_wait_exclusive(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE);
+	prepare_to_wait_exclusive(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
 
 	/* Socket errors? */
 	error = sock_error(sk);
@@ -115,7 +115,7 @@ static int wait_for_packet(struct sock *sk, int *err, long *timeo_p)
 	error = 0;
 	*timeo_p = schedule_timeout(*timeo_p);
 out:
-	finish_wait(sk->sk_sleep, &wait);
+	finish_wait(sk_sleep(sk), &wait);
 	return error;
 interrupted:
 	error = sock_intr_errno(*timeo_p);
@@ -726,7 +726,7 @@ unsigned int datagram_poll(struct file *file, struct socket *sock,
 	struct sock *sk = sock->sk;
 	unsigned int mask;
 
-	sock_poll_wait(file, sk->sk_sleep, wait);
+	sock_poll_wait(file, sk_sleep(sk), wait);
 	mask = 0;
 
 	/* exceptional events? */
diff --git a/net/core/sock.c b/net/core/sock.c
index 7effa1e689df..58ebd146ce5a 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -1395,7 +1395,7 @@ static long sock_wait_for_wmem(struct sock *sk, long timeo)
 		if (signal_pending(current))
 			break;
 		set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
-		prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE);
+		prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
 		if (atomic_read(&sk->sk_wmem_alloc) < sk->sk_sndbuf)
 			break;
 		if (sk->sk_shutdown & SEND_SHUTDOWN)
@@ -1404,7 +1404,7 @@ static long sock_wait_for_wmem(struct sock *sk, long timeo)
 			break;
 		timeo = schedule_timeout(timeo);
 	}
-	finish_wait(sk->sk_sleep, &wait);
+	finish_wait(sk_sleep(sk), &wait);
 	return timeo;
 }
 
@@ -1570,11 +1570,11 @@ int sk_wait_data(struct sock *sk, long *timeo)
 	int rc;
 	DEFINE_WAIT(wait);
 
-	prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE);
+	prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
 	set_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags);
 	rc = sk_wait_event(sk, timeo, !skb_queue_empty(&sk->sk_receive_queue));
 	clear_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags);
-	finish_wait(sk->sk_sleep, &wait);
+	finish_wait(sk_sleep(sk), &wait);
 	return rc;
 }
 EXPORT_SYMBOL(sk_wait_data);
@@ -1798,7 +1798,7 @@ static void sock_def_wakeup(struct sock *sk)
 {
 	read_lock(&sk->sk_callback_lock);
 	if (sk_has_sleeper(sk))
-		wake_up_interruptible_all(sk->sk_sleep);
+		wake_up_interruptible_all(sk_sleep(sk));
 	read_unlock(&sk->sk_callback_lock);
 }
 
@@ -1806,7 +1806,7 @@ static void sock_def_error_report(struct sock *sk)
 {
 	read_lock(&sk->sk_callback_lock);
 	if (sk_has_sleeper(sk))
-		wake_up_interruptible_poll(sk->sk_sleep, POLLERR);
+		wake_up_interruptible_poll(sk_sleep(sk), POLLERR);
 	sk_wake_async(sk, SOCK_WAKE_IO, POLL_ERR);
 	read_unlock(&sk->sk_callback_lock);
 }
@@ -1815,7 +1815,7 @@ static void sock_def_readable(struct sock *sk, int len)
 {
 	read_lock(&sk->sk_callback_lock);
 	if (sk_has_sleeper(sk))
-		wake_up_interruptible_sync_poll(sk->sk_sleep, POLLIN |
+		wake_up_interruptible_sync_poll(sk_sleep(sk), POLLIN |
 						POLLRDNORM | POLLRDBAND);
 	sk_wake_async(sk, SOCK_WAKE_WAITD, POLL_IN);
 	read_unlock(&sk->sk_callback_lock);
@@ -1830,7 +1830,7 @@ static void sock_def_write_space(struct sock *sk)
 	 */
 	if ((atomic_read(&sk->sk_wmem_alloc) << 1) <= sk->sk_sndbuf) {
 		if (sk_has_sleeper(sk))
-			wake_up_interruptible_sync_poll(sk->sk_sleep, POLLOUT |
+			wake_up_interruptible_sync_poll(sk_sleep(sk), POLLOUT |
 						POLLWRNORM | POLLWRBAND);
 
 		/* Should agree with poll, otherwise some programs break */
diff --git a/net/core/stream.c b/net/core/stream.c
index a37debfeb1b2..7b3c3f30b107 100644
--- a/net/core/stream.c
+++ b/net/core/stream.c
@@ -32,8 +32,8 @@ void sk_stream_write_space(struct sock *sk)
 	if (sk_stream_wspace(sk) >= sk_stream_min_wspace(sk) && sock) {
 		clear_bit(SOCK_NOSPACE, &sock->flags);
 
-		if (sk->sk_sleep && waitqueue_active(sk->sk_sleep))
-			wake_up_interruptible_poll(sk->sk_sleep, POLLOUT |
+		if (sk_sleep(sk) && waitqueue_active(sk_sleep(sk)))
+			wake_up_interruptible_poll(sk_sleep(sk), POLLOUT |
 						POLLWRNORM | POLLWRBAND);
 		if (sock->fasync_list && !(sk->sk_shutdown & SEND_SHUTDOWN))
 			sock_wake_async(sock, SOCK_WAKE_SPACE, POLL_OUT);
@@ -66,13 +66,13 @@ int sk_stream_wait_connect(struct sock *sk, long *timeo_p)
 		if (signal_pending(tsk))
 			return sock_intr_errno(*timeo_p);
 
-		prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE);
+		prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
 		sk->sk_write_pending++;
 		done = sk_wait_event(sk, timeo_p,
 				     !sk->sk_err &&
 				     !((1 << sk->sk_state) &
 				       ~(TCPF_ESTABLISHED | TCPF_CLOSE_WAIT)));
-		finish_wait(sk->sk_sleep, &wait);
+		finish_wait(sk_sleep(sk), &wait);
 		sk->sk_write_pending--;
 	} while (!done);
 	return 0;
@@ -96,13 +96,13 @@ void sk_stream_wait_close(struct sock *sk, long timeout)
 		DEFINE_WAIT(wait);
 
 		do {
-			prepare_to_wait(sk->sk_sleep, &wait,
+			prepare_to_wait(sk_sleep(sk), &wait,
 					TASK_INTERRUPTIBLE);
 			if (sk_wait_event(sk, &timeout, !sk_stream_closing(sk)))
 				break;
 		} while (!signal_pending(current) && timeout);
 
-		finish_wait(sk->sk_sleep, &wait);
+		finish_wait(sk_sleep(sk), &wait);
 	}
 }
 
@@ -126,7 +126,7 @@ int sk_stream_wait_memory(struct sock *sk, long *timeo_p)
 	while (1) {
 		set_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags);
 
-		prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE);
+		prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
 
 		if (sk->sk_err || (sk->sk_shutdown & SEND_SHUTDOWN))
 			goto do_error;
@@ -157,7 +157,7 @@ int sk_stream_wait_memory(struct sock *sk, long *timeo_p)
 		*timeo_p = current_timeo;
 	}
 out:
-	finish_wait(sk->sk_sleep, &wait);
+	finish_wait(sk_sleep(sk), &wait);
 	return err;
 
 do_error:
diff --git a/net/dccp/output.c b/net/dccp/output.c
index e98b65e9569f..2d3dcb39851f 100644
--- a/net/dccp/output.c
+++ b/net/dccp/output.c
@@ -198,7 +198,7 @@ void dccp_write_space(struct sock *sk)
 	read_lock(&sk->sk_callback_lock);
 
 	if (sk_has_sleeper(sk))
-		wake_up_interruptible(sk->sk_sleep);
+		wake_up_interruptible(sk_sleep(sk));
 	/* Should agree with poll, otherwise some programs break */
 	if (sock_writeable(sk))
 		sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT);
@@ -225,7 +225,7 @@ static int dccp_wait_for_ccid(struct sock *sk, struct sk_buff *skb, int delay)
 		dccp_pr_debug("delayed send by %d msec\n", delay);
 		jiffdelay = msecs_to_jiffies(delay);
 
-		prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE);
+		prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
 
 		sk->sk_write_pending++;
 		release_sock(sk);
@@ -241,7 +241,7 @@ static int dccp_wait_for_ccid(struct sock *sk, struct sk_buff *skb, int delay)
 		rc = ccid_hc_tx_send_packet(dp->dccps_hc_tx_ccid, sk, skb);
 	} while ((delay = rc) > 0);
 out:
-	finish_wait(sk->sk_sleep, &wait);
+	finish_wait(sk_sleep(sk), &wait);
 	return rc;
 
 do_error:
diff --git a/net/dccp/proto.c b/net/dccp/proto.c
index a0e38d8018f5..b03ecf6b2bb0 100644
--- a/net/dccp/proto.c
+++ b/net/dccp/proto.c
@@ -312,7 +312,7 @@ unsigned int dccp_poll(struct file *file, struct socket *sock,
 	unsigned int mask;
 	struct sock *sk = sock->sk;
 
-	sock_poll_wait(file, sk->sk_sleep, wait);
+	sock_poll_wait(file, sk_sleep(sk), wait);
 	if (sk->sk_state == DCCP_LISTEN)
 		return inet_csk_listen_poll(sk);
 
diff --git a/net/decnet/af_decnet.c b/net/decnet/af_decnet.c
index 55e3b6b0061a..d6b93d19790f 100644
--- a/net/decnet/af_decnet.c
+++ b/net/decnet/af_decnet.c
@@ -832,7 +832,7 @@ static int dn_confirm_accept(struct sock *sk, long *timeo, gfp_t allocation)
 	scp->segsize_loc = dst_metric(__sk_dst_get(sk), RTAX_ADVMSS);
 	dn_send_conn_conf(sk, allocation);
 
-	prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE);
+	prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
 	for(;;) {
 		release_sock(sk);
 		if (scp->state == DN_CC)
@@ -850,9 +850,9 @@ static int dn_confirm_accept(struct sock *sk, long *timeo, gfp_t allocation)
 		err = -EAGAIN;
 		if (!*timeo)
 			break;
-		prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE);
+		prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
 	}
-	finish_wait(sk->sk_sleep, &wait);
+	finish_wait(sk_sleep(sk), &wait);
 	if (err == 0) {
 		sk->sk_socket->state = SS_CONNECTED;
 	} else if (scp->state != DN_CC) {
@@ -873,7 +873,7 @@ static int dn_wait_run(struct sock *sk, long *timeo)
 	if (!*timeo)
 		return -EALREADY;
 
-	prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE);
+	prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
 	for(;;) {
 		release_sock(sk);
 		if (scp->state == DN_CI || scp->state == DN_CC)
@@ -891,9 +891,9 @@ static int dn_wait_run(struct sock *sk, long *timeo)
 		err = -ETIMEDOUT;
 		if (!*timeo)
 			break;
-		prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE);
+		prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
 	}
-	finish_wait(sk->sk_sleep, &wait);
+	finish_wait(sk_sleep(sk), &wait);
 out:
 	if (err == 0) {
 		sk->sk_socket->state = SS_CONNECTED;
@@ -1040,7 +1040,7 @@ static struct sk_buff *dn_wait_for_connect(struct sock *sk, long *timeo)
 	struct sk_buff *skb = NULL;
 	int err = 0;
 
-	prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE);
+	prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
 	for(;;) {
 		release_sock(sk);
 		skb = skb_dequeue(&sk->sk_receive_queue);
@@ -1060,9 +1060,9 @@ static struct sk_buff *dn_wait_for_connect(struct sock *sk, long *timeo)
 		err = -EAGAIN;
 		if (!*timeo)
 			break;
-		prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE);
+		prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
 	}
-	finish_wait(sk->sk_sleep, &wait);
+	finish_wait(sk_sleep(sk), &wait);
 
 	return skb == NULL ? ERR_PTR(err) : skb;
 }
@@ -1746,11 +1746,11 @@ static int dn_recvmsg(struct kiocb *iocb, struct socket *sock,
 			goto out;
 		}
 
-		prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE);
+		prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
 		set_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags);
 		sk_wait_event(sk, &timeo, dn_data_ready(sk, queue, flags, target));
 		clear_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags);
-		finish_wait(sk->sk_sleep, &wait);
+		finish_wait(sk_sleep(sk), &wait);
 	}
 
 	skb_queue_walk_safe(queue, skb, n) {
@@ -2003,12 +2003,12 @@ static int dn_sendmsg(struct kiocb *iocb, struct socket *sock,
 				goto out;
 			}
 
-			prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE);
+			prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
 			set_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags);
 			sk_wait_event(sk, &timeo,
 				      !dn_queue_too_long(scp, queue, flags));
 			clear_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags);
-			finish_wait(sk->sk_sleep, &wait);
+			finish_wait(sk_sleep(sk), &wait);
 			continue;
 		}
 
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index c5376c725503..5ca7290c2e61 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -548,7 +548,7 @@ static long inet_wait_for_connect(struct sock *sk, long timeo)
 {
 	DEFINE_WAIT(wait);
 
-	prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE);
+	prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
 
 	/* Basic assumption: if someone sets sk->sk_err, he _must_
 	 * change state of the socket from TCP_SYN_*.
@@ -561,9 +561,9 @@ static long inet_wait_for_connect(struct sock *sk, long timeo)
 		lock_sock(sk);
 		if (signal_pending(current) || !timeo)
 			break;
-		prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE);
+		prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
 	}
-	finish_wait(sk->sk_sleep, &wait);
+	finish_wait(sk_sleep(sk), &wait);
 	return timeo;
 }
 
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
index 8da6429269dd..e0a3e3537b14 100644
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -234,7 +234,7 @@ static int inet_csk_wait_for_connect(struct sock *sk, long timeo)
 	 * having to remove and re-insert us on the wait queue.
 	 */
 	for (;;) {
-		prepare_to_wait_exclusive(sk->sk_sleep, &wait,
+		prepare_to_wait_exclusive(sk_sleep(sk), &wait,
 					  TASK_INTERRUPTIBLE);
 		release_sock(sk);
 		if (reqsk_queue_empty(&icsk->icsk_accept_queue))
@@ -253,7 +253,7 @@ static int inet_csk_wait_for_connect(struct sock *sk, long timeo)
 		if (!timeo)
 			break;
 	}
-	finish_wait(sk->sk_sleep, &wait);
+	finish_wait(sk_sleep(sk), &wait);
 	return err;
 }
 
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 0f8caf64caa3..77208334a613 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -378,7 +378,7 @@ unsigned int tcp_poll(struct file *file, struct socket *sock, poll_table *wait)
 	struct sock *sk = sock->sk;
 	struct tcp_sock *tp = tcp_sk(sk);
 
-	sock_poll_wait(file, sk->sk_sleep, wait);
+	sock_poll_wait(file, sk_sleep(sk), wait);
 	if (sk->sk_state == TCP_LISTEN)
 		return inet_csk_listen_poll(sk);
 
diff --git a/net/irda/af_irda.c b/net/irda/af_irda.c
index 2a4efcea3423..79986a674f6e 100644
--- a/net/irda/af_irda.c
+++ b/net/irda/af_irda.c
@@ -347,7 +347,7 @@ static void irda_flow_indication(void *instance, void *sap, LOCAL_FLOW flow)
 		self->tx_flow = flow;
 		IRDA_DEBUG(1, "%s(), IrTTP wants us to start again\n",
 			   __func__);
-		wake_up_interruptible(sk->sk_sleep);
+		wake_up_interruptible(sk_sleep(sk));
 		break;
 	default:
 		IRDA_DEBUG(0, "%s(), Unknown flow command!\n", __func__);
@@ -900,7 +900,7 @@ static int irda_accept(struct socket *sock, struct socket *newsock, int flags)
 		if (flags & O_NONBLOCK)
 			goto out;
 
-		err = wait_event_interruptible(*(sk->sk_sleep),
+		err = wait_event_interruptible(*(sk_sleep(sk)),
 					skb_peek(&sk->sk_receive_queue));
 		if (err)
 			goto out;
@@ -1066,7 +1066,7 @@ static int irda_connect(struct socket *sock, struct sockaddr *uaddr,
 		goto out;
 
 	err = -ERESTARTSYS;
-	if (wait_event_interruptible(*(sk->sk_sleep),
+	if (wait_event_interruptible(*(sk_sleep(sk)),
 				     (sk->sk_state != TCP_SYN_SENT)))
 		goto out;
 
@@ -1318,7 +1318,7 @@ static int irda_sendmsg(struct kiocb *iocb, struct socket *sock,
 
 	/* Check if IrTTP is wants us to slow down */
 
-	if (wait_event_interruptible(*(sk->sk_sleep),
+	if (wait_event_interruptible(*(sk_sleep(sk)),
 	    (self->tx_flow != FLOW_STOP  ||  sk->sk_state != TCP_ESTABLISHED))) {
 		err = -ERESTARTSYS;
 		goto out;
@@ -1477,7 +1477,7 @@ static int irda_recvmsg_stream(struct kiocb *iocb, struct socket *sock,
 			if (copied >= target)
 				break;
 
-			prepare_to_wait_exclusive(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE);
+			prepare_to_wait_exclusive(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
 
 			/*
 			 *	POSIX 1003.1g mandates this order.
@@ -1497,7 +1497,7 @@ static int irda_recvmsg_stream(struct kiocb *iocb, struct socket *sock,
 				/* Wait process until data arrives */
 				schedule();
 
-			finish_wait(sk->sk_sleep, &wait);
+			finish_wait(sk_sleep(sk), &wait);
 
 			if (err)
 				goto out;
@@ -1787,7 +1787,7 @@ static unsigned int irda_poll(struct file * file, struct socket *sock,
 	IRDA_DEBUG(4, "%s()\n", __func__);
 
 	lock_kernel();
-	poll_wait(file, sk->sk_sleep, wait);
+	poll_wait(file, sk_sleep(sk), wait);
 	mask = 0;
 
 	/* Exceptional events? */
diff --git a/net/iucv/af_iucv.c b/net/iucv/af_iucv.c
index c18286a2167b..9636b7d27b48 100644
--- a/net/iucv/af_iucv.c
+++ b/net/iucv/af_iucv.c
@@ -59,7 +59,7 @@ do {									\
 	DEFINE_WAIT(__wait);						\
 	long __timeo = timeo;						\
 	ret = 0;							\
-	prepare_to_wait(sk->sk_sleep, &__wait, TASK_INTERRUPTIBLE);	\
+	prepare_to_wait(sk_sleep(sk), &__wait, TASK_INTERRUPTIBLE);	\
 	while (!(condition)) {						\
 		if (!__timeo) {						\
 			ret = -EAGAIN;					\
@@ -76,7 +76,7 @@ do {									\
 		if (ret)						\
 			break;						\
 	}								\
-	finish_wait(sk->sk_sleep, &__wait);				\
+	finish_wait(sk_sleep(sk), &__wait);				\
 } while (0)
 
 #define iucv_sock_wait(sk, condition, timeo)				\
@@ -307,7 +307,7 @@ static void iucv_sock_wake_msglim(struct sock *sk)
 {
 	read_lock(&sk->sk_callback_lock);
 	if (sk_has_sleeper(sk))
-		wake_up_interruptible_all(sk->sk_sleep);
+		wake_up_interruptible_all(sk_sleep(sk));
 	sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT);
 	read_unlock(&sk->sk_callback_lock);
 }
@@ -795,7 +795,7 @@ static int iucv_sock_accept(struct socket *sock, struct socket *newsock,
 	timeo = sock_rcvtimeo(sk, flags & O_NONBLOCK);
 
 	/* Wait for an incoming connection */
-	add_wait_queue_exclusive(sk->sk_sleep, &wait);
+	add_wait_queue_exclusive(sk_sleep(sk), &wait);
 	while (!(nsk = iucv_accept_dequeue(sk, newsock))) {
 		set_current_state(TASK_INTERRUPTIBLE);
 		if (!timeo) {
@@ -819,7 +819,7 @@ static int iucv_sock_accept(struct socket *sock, struct socket *newsock,
 	}
 
 	set_current_state(TASK_RUNNING);
-	remove_wait_queue(sk->sk_sleep, &wait);
+	remove_wait_queue(sk_sleep(sk), &wait);
 
 	if (err)
 		goto done;
@@ -1269,7 +1269,7 @@ unsigned int iucv_sock_poll(struct file *file, struct socket *sock,
 	struct sock *sk = sock->sk;
 	unsigned int mask = 0;
 
-	sock_poll_wait(file, sk->sk_sleep, wait);
+	sock_poll_wait(file, sk_sleep(sk), wait);
 
 	if (sk->sk_state == IUCV_LISTEN)
 		return iucv_accept_poll(sk);
diff --git a/net/llc/af_llc.c b/net/llc/af_llc.c
index 2db6a9f75913..023ba820236f 100644
--- a/net/llc/af_llc.c
+++ b/net/llc/af_llc.c
@@ -536,7 +536,7 @@ static int llc_ui_wait_for_disc(struct sock *sk, long timeout)
 	int rc = 0;
 
 	while (1) {
-		prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE);
+		prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
 		if (sk_wait_event(sk, &timeout, sk->sk_state == TCP_CLOSE))
 			break;
 		rc = -ERESTARTSYS;
@@ -547,7 +547,7 @@ static int llc_ui_wait_for_disc(struct sock *sk, long timeout)
 			break;
 		rc = 0;
 	}
-	finish_wait(sk->sk_sleep, &wait);
+	finish_wait(sk_sleep(sk), &wait);
 	return rc;
 }
 
@@ -556,13 +556,13 @@ static int llc_ui_wait_for_conn(struct sock *sk, long timeout)
 	DEFINE_WAIT(wait);
 
 	while (1) {
-		prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE);
+		prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
 		if (sk_wait_event(sk, &timeout, sk->sk_state != TCP_SYN_SENT))
 			break;
 		if (signal_pending(current) || !timeout)
 			break;
 	}
-	finish_wait(sk->sk_sleep, &wait);
+	finish_wait(sk_sleep(sk), &wait);
 	return timeout;
 }
 
@@ -573,7 +573,7 @@ static int llc_ui_wait_for_busy_core(struct sock *sk, long timeout)
 	int rc;
 
 	while (1) {
-		prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE);
+		prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
 		rc = 0;
 		if (sk_wait_event(sk, &timeout,
 				  (sk->sk_shutdown & RCV_SHUTDOWN) ||
@@ -588,7 +588,7 @@ static int llc_ui_wait_for_busy_core(struct sock *sk, long timeout)
 		if (!timeout)
 			break;
 	}
-	finish_wait(sk->sk_sleep, &wait);
+	finish_wait(sk_sleep(sk), &wait);
 	return rc;
 }
 
diff --git a/net/netfilter/ipvs/ip_vs_sync.c b/net/netfilter/ipvs/ip_vs_sync.c
index 8fb0ae616761..7ba06939829f 100644
--- a/net/netfilter/ipvs/ip_vs_sync.c
+++ b/net/netfilter/ipvs/ip_vs_sync.c
@@ -802,7 +802,7 @@ static int sync_thread_backup(void *data)
 		ip_vs_backup_mcast_ifn, ip_vs_backup_syncid);
 
 	while (!kthread_should_stop()) {
-		wait_event_interruptible(*tinfo->sock->sk->sk_sleep,
+		wait_event_interruptible(*sk_sleep(tinfo->sock->sk),
 			 !skb_queue_empty(&tinfo->sock->sk->sk_receive_queue)
 			 || kthread_should_stop());
 
diff --git a/net/netrom/af_netrom.c b/net/netrom/af_netrom.c
index fa07f044b599..06cb02796a0e 100644
--- a/net/netrom/af_netrom.c
+++ b/net/netrom/af_netrom.c
@@ -739,7 +739,7 @@ static int nr_connect(struct socket *sock, struct sockaddr *uaddr,
 		DEFINE_WAIT(wait);
 
 		for (;;) {
-			prepare_to_wait(sk->sk_sleep, &wait,
+			prepare_to_wait(sk_sleep(sk), &wait,
 					TASK_INTERRUPTIBLE);
 			if (sk->sk_state != TCP_SYN_SENT)
 				break;
@@ -752,7 +752,7 @@ static int nr_connect(struct socket *sock, struct sockaddr *uaddr,
 			err = -ERESTARTSYS;
 			break;
 		}
-		finish_wait(sk->sk_sleep, &wait);
+		finish_wait(sk_sleep(sk), &wait);
 		if (err)
 			goto out_release;
 	}
@@ -798,7 +798,7 @@ static int nr_accept(struct socket *sock, struct socket *newsock, int flags)
 	 *	hooked into the SABM we saved
 	 */
 	for (;;) {
-		prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE);
+		prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
 		skb = skb_dequeue(&sk->sk_receive_queue);
 		if (skb)
 			break;
@@ -816,7 +816,7 @@ static int nr_accept(struct socket *sock, struct socket *newsock, int flags)
 		err = -ERESTARTSYS;
 		break;
 	}
-	finish_wait(sk->sk_sleep, &wait);
+	finish_wait(sk_sleep(sk), &wait);
 	if (err)
 		goto out_release;
 
diff --git a/net/rds/af_rds.c b/net/rds/af_rds.c
index 7919a9edb8e9..aebfecbdb841 100644
--- a/net/rds/af_rds.c
+++ b/net/rds/af_rds.c
@@ -158,7 +158,7 @@ static unsigned int rds_poll(struct file *file, struct socket *sock,
 	unsigned int mask = 0;
 	unsigned long flags;
 
-	poll_wait(file, sk->sk_sleep, wait);
+	poll_wait(file, sk_sleep(sk), wait);
 
 	if (rs->rs_seen_congestion)
 		poll_wait(file, &rds_poll_waitq, wait);
diff --git a/net/rds/rds.h b/net/rds/rds.h
index 4bec6e2ed495..c224b5bb3ba9 100644
--- a/net/rds/rds.h
+++ b/net/rds/rds.h
@@ -492,7 +492,7 @@ void rds_sock_put(struct rds_sock *rs);
 void rds_wake_sk_sleep(struct rds_sock *rs);
 static inline void __rds_wake_sk_sleep(struct sock *sk)
 {
-	wait_queue_head_t *waitq = sk->sk_sleep;
+	wait_queue_head_t *waitq = sk_sleep(sk);
 
 	if (!sock_flag(sk, SOCK_DEAD) && waitq)
 		wake_up(waitq);
diff --git a/net/rds/recv.c b/net/rds/recv.c
index e2a2b9344f7b..795a00b7f2cb 100644
--- a/net/rds/recv.c
+++ b/net/rds/recv.c
@@ -432,7 +432,7 @@ int rds_recvmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg,
 				break;
 			}
 
-			timeo = wait_event_interruptible_timeout(*sk->sk_sleep,
+			timeo = wait_event_interruptible_timeout(*sk_sleep(sk),
 					(!list_empty(&rs->rs_notify_queue) ||
 					 rs->rs_cong_notify ||
 					 rds_next_incoming(rs, &inc)), timeo);
diff --git a/net/rds/send.c b/net/rds/send.c
index 53d6795ac9d0..9c1c6bcaa6c9 100644
--- a/net/rds/send.c
+++ b/net/rds/send.c
@@ -915,7 +915,7 @@ int rds_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg,
 			goto out;
 		}
 
-		timeo = wait_event_interruptible_timeout(*sk->sk_sleep,
+		timeo = wait_event_interruptible_timeout(*sk_sleep(sk),
 					rds_send_queue_rm(rs, conn, rm,
 							  rs->rs_bound_port,
 							  dport,
diff --git a/net/rose/af_rose.c b/net/rose/af_rose.c
index 4fb711a035f4..8e45e76a95f5 100644
--- a/net/rose/af_rose.c
+++ b/net/rose/af_rose.c
@@ -845,7 +845,7 @@ rose_try_next_neigh:
 		DEFINE_WAIT(wait);
 
 		for (;;) {
-			prepare_to_wait(sk->sk_sleep, &wait,
+			prepare_to_wait(sk_sleep(sk), &wait,
 					TASK_INTERRUPTIBLE);
 			if (sk->sk_state != TCP_SYN_SENT)
 				break;
@@ -858,7 +858,7 @@ rose_try_next_neigh:
 			err = -ERESTARTSYS;
 			break;
 		}
-		finish_wait(sk->sk_sleep, &wait);
+		finish_wait(sk_sleep(sk), &wait);
 
 		if (err)
 			goto out_release;
@@ -911,7 +911,7 @@ static int rose_accept(struct socket *sock, struct socket *newsock, int flags)
 	 *	hooked into the SABM we saved
 	 */
 	for (;;) {
-		prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE);
+		prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
 
 		skb = skb_dequeue(&sk->sk_receive_queue);
 		if (skb)
@@ -930,7 +930,7 @@ static int rose_accept(struct socket *sock, struct socket *newsock, int flags)
 		err = -ERESTARTSYS;
 		break;
 	}
-	finish_wait(sk->sk_sleep, &wait);
+	finish_wait(sk_sleep(sk), &wait);
 	if (err)
 		goto out_release;
 
diff --git a/net/rxrpc/af_rxrpc.c b/net/rxrpc/af_rxrpc.c
index c060095b27ce..c432d76f415e 100644
--- a/net/rxrpc/af_rxrpc.c
+++ b/net/rxrpc/af_rxrpc.c
@@ -65,7 +65,7 @@ static void rxrpc_write_space(struct sock *sk)
 	read_lock(&sk->sk_callback_lock);
 	if (rxrpc_writable(sk)) {
 		if (sk_has_sleeper(sk))
-			wake_up_interruptible(sk->sk_sleep);
+			wake_up_interruptible(sk_sleep(sk));
 		sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT);
 	}
 	read_unlock(&sk->sk_callback_lock);
@@ -589,7 +589,7 @@ static unsigned int rxrpc_poll(struct file *file, struct socket *sock,
 	unsigned int mask;
 	struct sock *sk = sock->sk;
 
-	sock_poll_wait(file, sk->sk_sleep, wait);
+	sock_poll_wait(file, sk_sleep(sk), wait);
 	mask = 0;
 
 	/* the socket is readable if there are any messages waiting on the Rx
diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index c1941276f6e3..f34adcca8a8c 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -5702,7 +5702,7 @@ unsigned int sctp_poll(struct file *file, struct socket *sock, poll_table *wait)
 	struct sctp_sock *sp = sctp_sk(sk);
 	unsigned int mask;
 
-	poll_wait(file, sk->sk_sleep, wait);
+	poll_wait(file, sk_sleep(sk), wait);
 
 	/* A TCP-style listening socket becomes readable when the accept queue
 	 * is not empty.
@@ -5943,7 +5943,7 @@ static int sctp_wait_for_packet(struct sock * sk, int *err, long *timeo_p)
 	int error;
 	DEFINE_WAIT(wait);
 
-	prepare_to_wait_exclusive(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE);
+	prepare_to_wait_exclusive(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
 
 	/* Socket errors? */
 	error = sock_error(sk);
@@ -5980,14 +5980,14 @@ static int sctp_wait_for_packet(struct sock * sk, int *err, long *timeo_p)
 	sctp_lock_sock(sk);
 
 ready:
-	finish_wait(sk->sk_sleep, &wait);
+	finish_wait(sk_sleep(sk), &wait);
 	return 0;
 
 interrupted:
 	error = sock_intr_errno(*timeo_p);
 
 out:
-	finish_wait(sk->sk_sleep, &wait);
+	finish_wait(sk_sleep(sk), &wait);
 	*err = error;
 	return error;
 }
@@ -6061,8 +6061,8 @@ static void __sctp_write_space(struct sctp_association *asoc)
 			wake_up_interruptible(&asoc->wait);
 
 		if (sctp_writeable(sk)) {
-			if (sk->sk_sleep && waitqueue_active(sk->sk_sleep))
-				wake_up_interruptible(sk->sk_sleep);
+			if (sk_sleep(sk) && waitqueue_active(sk_sleep(sk)))
+				wake_up_interruptible(sk_sleep(sk));
 
 			/* Note that we try to include the Async I/O support
 			 * here by modeling from the current TCP/UDP code.
@@ -6296,7 +6296,7 @@ static int sctp_wait_for_accept(struct sock *sk, long timeo)
 
 
 	for (;;) {
-		prepare_to_wait_exclusive(sk->sk_sleep, &wait,
+		prepare_to_wait_exclusive(sk_sleep(sk), &wait,
 					  TASK_INTERRUPTIBLE);
 
 		if (list_empty(&ep->asocs)) {
@@ -6322,7 +6322,7 @@ static int sctp_wait_for_accept(struct sock *sk, long timeo)
 			break;
 	}
 
-	finish_wait(sk->sk_sleep, &wait);
+	finish_wait(sk_sleep(sk), &wait);
 
 	return err;
 }
@@ -6332,7 +6332,7 @@ static void sctp_wait_for_close(struct sock *sk, long timeout)
 	DEFINE_WAIT(wait);
 
 	do {
-		prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE);
+		prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
 		if (list_empty(&sctp_sk(sk)->ep->asocs))
 			break;
 		sctp_release_sock(sk);
@@ -6340,7 +6340,7 @@ static void sctp_wait_for_close(struct sock *sk, long timeout)
 		sctp_lock_sock(sk);
 	} while (!signal_pending(current) && timeout);
 
-	finish_wait(sk->sk_sleep, &wait);
+	finish_wait(sk_sleep(sk), &wait);
 }
 
 static void sctp_skb_set_owner_r_frag(struct sk_buff *skb, struct sock *sk)
diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c
index a29f259204e6..ce0d5b35c2ac 100644
--- a/net/sunrpc/svcsock.c
+++ b/net/sunrpc/svcsock.c
@@ -419,8 +419,8 @@ static void svc_udp_data_ready(struct sock *sk, int count)
 		set_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags);
 		svc_xprt_enqueue(&svsk->sk_xprt);
 	}
-	if (sk->sk_sleep && waitqueue_active(sk->sk_sleep))
-		wake_up_interruptible(sk->sk_sleep);
+	if (sk_sleep(sk) && waitqueue_active(sk_sleep(sk)))
+		wake_up_interruptible(sk_sleep(sk));
 }
 
 /*
@@ -436,10 +436,10 @@ static void svc_write_space(struct sock *sk)
 		svc_xprt_enqueue(&svsk->sk_xprt);
 	}
 
-	if (sk->sk_sleep && waitqueue_active(sk->sk_sleep)) {
+	if (sk_sleep(sk) && waitqueue_active(sk_sleep(sk))) {
 		dprintk("RPC svc_write_space: someone sleeping on %p\n",
 		       svsk);
-		wake_up_interruptible(sk->sk_sleep);
+		wake_up_interruptible(sk_sleep(sk));
 	}
 }
 
@@ -757,8 +757,8 @@ static void svc_tcp_listen_data_ready(struct sock *sk, int count_unused)
 			printk("svc: socket %p: no user data\n", sk);
 	}
 
-	if (sk->sk_sleep && waitqueue_active(sk->sk_sleep))
-		wake_up_interruptible_all(sk->sk_sleep);
+	if (sk_sleep(sk) && waitqueue_active(sk_sleep(sk)))
+		wake_up_interruptible_all(sk_sleep(sk));
 }
 
 /*
@@ -777,8 +777,8 @@ static void svc_tcp_state_change(struct sock *sk)
 		set_bit(XPT_CLOSE, &svsk->sk_xprt.xpt_flags);
 		svc_xprt_enqueue(&svsk->sk_xprt);
 	}
-	if (sk->sk_sleep && waitqueue_active(sk->sk_sleep))
-		wake_up_interruptible_all(sk->sk_sleep);
+	if (sk_sleep(sk) && waitqueue_active(sk_sleep(sk)))
+		wake_up_interruptible_all(sk_sleep(sk));
 }
 
 static void svc_tcp_data_ready(struct sock *sk, int count)
@@ -791,8 +791,8 @@ static void svc_tcp_data_ready(struct sock *sk, int count)
 		set_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags);
 		svc_xprt_enqueue(&svsk->sk_xprt);
 	}
-	if (sk->sk_sleep && waitqueue_active(sk->sk_sleep))
-		wake_up_interruptible(sk->sk_sleep);
+	if (sk_sleep(sk) && waitqueue_active(sk_sleep(sk)))
+		wake_up_interruptible(sk_sleep(sk));
 }
 
 /*
@@ -1494,8 +1494,8 @@ static void svc_sock_detach(struct svc_xprt *xprt)
 	sk->sk_data_ready = svsk->sk_odata;
 	sk->sk_write_space = svsk->sk_owspace;
 
-	if (sk->sk_sleep && waitqueue_active(sk->sk_sleep))
-		wake_up_interruptible(sk->sk_sleep);
+	if (sk_sleep(sk) && waitqueue_active(sk_sleep(sk)))
+		wake_up_interruptible(sk_sleep(sk));
 }
 
 /*
diff --git a/net/tipc/socket.c b/net/tipc/socket.c
index cfb20b80b3a1..66e889ba48fd 100644
--- a/net/tipc/socket.c
+++ b/net/tipc/socket.c
@@ -446,7 +446,7 @@ static unsigned int poll(struct file *file, struct socket *sock,
 	struct sock *sk = sock->sk;
 	u32 mask;
 
-	poll_wait(file, sk->sk_sleep, wait);
+	poll_wait(file, sk_sleep(sk), wait);
 
 	if (!skb_queue_empty(&sk->sk_receive_queue) ||
 	    (sock->state == SS_UNCONNECTED) ||
@@ -591,7 +591,7 @@ static int send_msg(struct kiocb *iocb, struct socket *sock,
 			break;
 		}
 		release_sock(sk);
-		res = wait_event_interruptible(*sk->sk_sleep,
+		res = wait_event_interruptible(*sk_sleep(sk),
 					       !tport->congested);
 		lock_sock(sk);
 		if (res)
@@ -650,7 +650,7 @@ static int send_packet(struct kiocb *iocb, struct socket *sock,
 			break;
 		}
 		release_sock(sk);
-		res = wait_event_interruptible(*sk->sk_sleep,
+		res = wait_event_interruptible(*sk_sleep(sk),
 			(!tport->congested || !tport->connected));
 		lock_sock(sk);
 		if (res)
@@ -931,7 +931,7 @@ restart:
 			goto exit;
 		}
 		release_sock(sk);
-		res = wait_event_interruptible(*sk->sk_sleep,
+		res = wait_event_interruptible(*sk_sleep(sk),
 			(!skb_queue_empty(&sk->sk_receive_queue) ||
 			 (sock->state == SS_DISCONNECTING)));
 		lock_sock(sk);
@@ -1064,7 +1064,7 @@ restart:
 			goto exit;
 		}
 		release_sock(sk);
-		res = wait_event_interruptible(*sk->sk_sleep,
+		res = wait_event_interruptible(*sk_sleep(sk),
 			(!skb_queue_empty(&sk->sk_receive_queue) ||
 			 (sock->state == SS_DISCONNECTING)));
 		lock_sock(sk);
@@ -1271,8 +1271,8 @@ static u32 filter_rcv(struct sock *sk, struct sk_buff *buf)
 		tipc_disconnect_port(tipc_sk_port(sk));
 	}
 
-	if (waitqueue_active(sk->sk_sleep))
-		wake_up_interruptible(sk->sk_sleep);
+	if (waitqueue_active(sk_sleep(sk)))
+		wake_up_interruptible(sk_sleep(sk));
 	return TIPC_OK;
 }
 
@@ -1343,8 +1343,8 @@ static void wakeupdispatch(struct tipc_port *tport)
 {
 	struct sock *sk = (struct sock *)tport->usr_handle;
 
-	if (waitqueue_active(sk->sk_sleep))
-		wake_up_interruptible(sk->sk_sleep);
+	if (waitqueue_active(sk_sleep(sk)))
+		wake_up_interruptible(sk_sleep(sk));
 }
 
 /**
@@ -1426,7 +1426,7 @@ static int connect(struct socket *sock, struct sockaddr *dest, int destlen,
 	/* Wait until an 'ACK' or 'RST' arrives, or a timeout occurs */
 
 	release_sock(sk);
-	res = wait_event_interruptible_timeout(*sk->sk_sleep,
+	res = wait_event_interruptible_timeout(*sk_sleep(sk),
 			(!skb_queue_empty(&sk->sk_receive_queue) ||
 			(sock->state != SS_CONNECTING)),
 			sk->sk_rcvtimeo);
@@ -1521,7 +1521,7 @@ static int accept(struct socket *sock, struct socket *new_sock, int flags)
 			goto exit;
 		}
 		release_sock(sk);
-		res = wait_event_interruptible(*sk->sk_sleep,
+		res = wait_event_interruptible(*sk_sleep(sk),
 				(!skb_queue_empty(&sk->sk_receive_queue)));
 		lock_sock(sk);
 		if (res)
@@ -1632,8 +1632,8 @@ restart:
 		/* Discard any unreceived messages; wake up sleeping tasks */
 
 		discard_rx_queue(sk);
-		if (waitqueue_active(sk->sk_sleep))
-			wake_up_interruptible(sk->sk_sleep);
+		if (waitqueue_active(sk_sleep(sk)))
+			wake_up_interruptible(sk_sleep(sk));
 		res = 0;
 		break;
 
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index 3d9122e78f41..87c0360eaa25 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -316,7 +316,7 @@ static void unix_write_space(struct sock *sk)
 	read_lock(&sk->sk_callback_lock);
 	if (unix_writable(sk)) {
 		if (sk_has_sleeper(sk))
-			wake_up_interruptible_sync(sk->sk_sleep);
+			wake_up_interruptible_sync(sk_sleep(sk));
 		sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT);
 	}
 	read_unlock(&sk->sk_callback_lock);
@@ -1736,7 +1736,7 @@ static long unix_stream_data_wait(struct sock *sk, long timeo)
 	unix_state_lock(sk);
 
 	for (;;) {
-		prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE);
+		prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
 
 		if (!skb_queue_empty(&sk->sk_receive_queue) ||
 		    sk->sk_err ||
@@ -1752,7 +1752,7 @@ static long unix_stream_data_wait(struct sock *sk, long timeo)
 		clear_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags);
 	}
 
-	finish_wait(sk->sk_sleep, &wait);
+	finish_wait(sk_sleep(sk), &wait);
 	unix_state_unlock(sk);
 	return timeo;
 }
@@ -1991,7 +1991,7 @@ static unsigned int unix_poll(struct file *file, struct socket *sock, poll_table
 	struct sock *sk = sock->sk;
 	unsigned int mask;
 
-	sock_poll_wait(file, sk->sk_sleep, wait);
+	sock_poll_wait(file, sk_sleep(sk), wait);
 	mask = 0;
 
 	/* exceptional events? */
@@ -2028,7 +2028,7 @@ static unsigned int unix_dgram_poll(struct file *file, struct socket *sock,
 	struct sock *sk = sock->sk, *other;
 	unsigned int mask, writable;
 
-	sock_poll_wait(file, sk->sk_sleep, wait);
+	sock_poll_wait(file, sk_sleep(sk), wait);
 	mask = 0;
 
 	/* exceptional events? */
diff --git a/net/x25/af_x25.c b/net/x25/af_x25.c
index cbddd0cb83f1..6cffbc4da029 100644
--- a/net/x25/af_x25.c
+++ b/net/x25/af_x25.c
@@ -718,7 +718,7 @@ static int x25_wait_for_connection_establishment(struct sock *sk)
 	DECLARE_WAITQUEUE(wait, current);
 	int rc;
 
-	add_wait_queue_exclusive(sk->sk_sleep, &wait);
+	add_wait_queue_exclusive(sk_sleep(sk), &wait);
 	for (;;) {
 		__set_current_state(TASK_INTERRUPTIBLE);
 		rc = -ERESTARTSYS;
@@ -738,7 +738,7 @@ static int x25_wait_for_connection_establishment(struct sock *sk)
 			break;
 	}
 	__set_current_state(TASK_RUNNING);
-	remove_wait_queue(sk->sk_sleep, &wait);
+	remove_wait_queue(sk_sleep(sk), &wait);
 	return rc;
 }
 
@@ -838,7 +838,7 @@ static int x25_wait_for_data(struct sock *sk, long timeout)
 	DECLARE_WAITQUEUE(wait, current);
 	int rc = 0;
 
-	add_wait_queue_exclusive(sk->sk_sleep, &wait);
+	add_wait_queue_exclusive(sk_sleep(sk), &wait);
 	for (;;) {
 		__set_current_state(TASK_INTERRUPTIBLE);
 		if (sk->sk_shutdown & RCV_SHUTDOWN)
@@ -858,7 +858,7 @@ static int x25_wait_for_data(struct sock *sk, long timeout)
 			break;
 	}
 	__set_current_state(TASK_RUNNING);
-	remove_wait_queue(sk->sk_sleep, &wait);
+	remove_wait_queue(sk_sleep(sk), &wait);
 	return rc;
 }
 
-- 
cgit v1.2.2


From 8eabf95cb17253a3ac72b1a62ce8a80b3efecd62 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Tue, 20 Apr 2010 03:20:05 +0000
Subject: bridge: add a missing ntohs()

grec_nsrcs is in network order, we should convert to host horder in
br_multicast_igmp3_report()

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Acked-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/bridge/br_multicast.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c
index f29ada827a6a..386c15369d91 100644
--- a/net/bridge/br_multicast.c
+++ b/net/bridge/br_multicast.c
@@ -727,7 +727,7 @@ static int br_multicast_igmp3_report(struct net_bridge *br,
 		group = grec->grec_mca;
 		type = grec->grec_type;
 
-		len += grec->grec_nsrcs * 4;
+		len += ntohs(grec->grec_nsrcs) * 4;
 		if (!pskb_may_pull(skb, len))
 			return -EINVAL;
 
-- 
cgit v1.2.2


From 0eae88f31ca2b88911ce843452054139e028771f Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Tue, 20 Apr 2010 19:06:52 -0700
Subject: net: Fix various endianness glitches

Sparse can help us find endianness bugs, but we need to make some
cleanups to be able to more easily spot real bugs.

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/bridge/br_multicast.c |  2 +-
 net/bridge/br_private.h   | 15 ++++++++-------
 net/ethernet/eth.c        |  2 +-
 net/ipv4/af_inet.c        |  8 ++++----
 net/ipv4/ipmr.c           | 10 +++++-----
 net/ipv4/route.c          | 29 ++++++++++++++---------------
 net/ipv4/tcp.c            | 15 ++++++++-------
 net/ipv4/tcp_ipv4.c       |  4 ++--
 net/ipv4/tcp_output.c     |  4 ++--
 net/ipv4/udp.c            |  8 ++++----
 net/ipv6/addrconf.c       |  3 ++-
 net/ipv6/ip6_fib.c        |  3 ++-
 net/ipv6/tcp_ipv6.c       |  4 ++--
 net/ipv6/udp.c            |  4 ++--
 net/sched/sch_sfq.c       | 10 +++++-----
 net/sunrpc/xprt.c         |  2 +-
 net/xfrm/xfrm_hash.h      |  3 ++-
 17 files changed, 65 insertions(+), 61 deletions(-)

(limited to 'net')

diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c
index 3fe86ffc069c..61e1d1094b85 100644
--- a/net/bridge/br_multicast.c
+++ b/net/bridge/br_multicast.c
@@ -29,7 +29,7 @@
 
 static inline int br_ip_hash(struct net_bridge_mdb_htable *mdb, __be32 ip)
 {
-	return jhash_1word(mdb->secret, (u32)ip) & (mdb->max - 1);
+	return jhash_1word(mdb->secret, (__force u32)ip) & (mdb->max - 1);
 }
 
 static struct net_bridge_mdb_entry *__br_mdb_ip_get(
diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h
index 791d4ab0fd4d..63181e4a2a67 100644
--- a/net/bridge/br_private.h
+++ b/net/bridge/br_private.h
@@ -130,19 +130,20 @@ struct net_bridge_port
 #endif
 };
 
+struct br_cpu_netstats {
+	unsigned long	rx_packets;
+	unsigned long	rx_bytes;
+	unsigned long	tx_packets;
+	unsigned long	tx_bytes;
+};
+
 struct net_bridge
 {
 	spinlock_t			lock;
 	struct list_head		port_list;
 	struct net_device		*dev;
 
-	struct br_cpu_netstats __percpu {
-		unsigned long	rx_packets;
-		unsigned long	rx_bytes;
-		unsigned long	tx_packets;
-		unsigned long	tx_bytes;
-	} *stats;
-
+	struct br_cpu_netstats __percpu *stats;
 	spinlock_t			hash_lock;
 	struct hlist_head		hash[BR_HASH_SIZE];
 	unsigned long			feature_mask;
diff --git a/net/ethernet/eth.c b/net/ethernet/eth.c
index 205a1c12f3c0..35846964082c 100644
--- a/net/ethernet/eth.c
+++ b/net/ethernet/eth.c
@@ -136,7 +136,7 @@ int eth_rebuild_header(struct sk_buff *skb)
 	default:
 		printk(KERN_DEBUG
 		       "%s: unable to resolve type %X addresses.\n",
-		       dev->name, (int)eth->h_proto);
+		       dev->name, (__force int)eth->h_proto);
 
 		memcpy(eth->h_source, dev->dev_addr, ETH_ALEN);
 		break;
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index 5ca7290c2e61..9f52880fae10 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -1323,8 +1323,8 @@ static struct sk_buff **inet_gro_receive(struct sk_buff **head,
 	if (unlikely(ip_fast_csum((u8 *)iph, iph->ihl)))
 		goto out_unlock;
 
-	id = ntohl(*(u32 *)&iph->id);
-	flush = (u16)((ntohl(*(u32 *)iph) ^ skb_gro_len(skb)) | (id ^ IP_DF));
+	id = ntohl(*(__be32 *)&iph->id);
+	flush = (u16)((ntohl(*(__be32 *)iph) ^ skb_gro_len(skb)) | (id ^ IP_DF));
 	id >>= 16;
 
 	for (p = *head; p; p = p->next) {
@@ -1337,8 +1337,8 @@ static struct sk_buff **inet_gro_receive(struct sk_buff **head,
 
 		if ((iph->protocol ^ iph2->protocol) |
 		    (iph->tos ^ iph2->tos) |
-		    (iph->saddr ^ iph2->saddr) |
-		    (iph->daddr ^ iph2->daddr)) {
+		    ((__force u32)iph->saddr ^ (__force u32)iph2->saddr) |
+		    ((__force u32)iph->daddr ^ (__force u32)iph2->daddr)) {
 			NAPI_GRO_CB(p)->same_flow = 0;
 			continue;
 		}
diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
index 7d8a2bcecb76..a2df5012a1d0 100644
--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c
@@ -1772,10 +1772,10 @@ int ip_mr_input(struct sk_buff *skb)
 
 		vif = ipmr_find_vif(mrt, skb->dev);
 		if (vif >= 0) {
-			int err = ipmr_cache_unresolved(mrt, vif, skb);
+			int err2 = ipmr_cache_unresolved(mrt, vif, skb);
 			read_unlock(&mrt_lock);
 
-			return err;
+			return err2;
 		}
 		read_unlock(&mrt_lock);
 		kfree_skb(skb);
@@ -2227,9 +2227,9 @@ static int ipmr_mfc_seq_show(struct seq_file *seq, void *v)
 		const struct ipmr_mfc_iter *it = seq->private;
 		const struct mr_table *mrt = it->mrt;
 
-		seq_printf(seq, "%08lX %08lX %-3hd",
-			   (unsigned long) mfc->mfc_mcastgrp,
-			   (unsigned long) mfc->mfc_origin,
+		seq_printf(seq, "%08X %08X %-3hd",
+			   (__force u32) mfc->mfc_mcastgrp,
+			   (__force u32) mfc->mfc_origin,
 			   mfc->mfc_parent);
 
 		if (it->cache != &mrt->mfc_unres_queue) {
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index cb562fdd9b9a..a947428ef0ae 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -258,10 +258,9 @@ static DEFINE_PER_CPU(struct rt_cache_stat, rt_cache_stat);
 	(__raw_get_cpu_var(rt_cache_stat).field++)
 
 static inline unsigned int rt_hash(__be32 daddr, __be32 saddr, int idx,
-		int genid)
+				   int genid)
 {
-	return jhash_3words((__force u32)(__be32)(daddr),
-			    (__force u32)(__be32)(saddr),
+	return jhash_3words((__force u32)daddr, (__force u32)saddr,
 			    idx, genid)
 		& rt_hash_mask;
 }
@@ -378,12 +377,13 @@ static int rt_cache_seq_show(struct seq_file *seq, void *v)
 		struct rtable *r = v;
 		int len;
 
-		seq_printf(seq, "%s\t%08lX\t%08lX\t%8X\t%d\t%u\t%d\t"
-			      "%08lX\t%d\t%u\t%u\t%02X\t%d\t%1d\t%08X%n",
+		seq_printf(seq, "%s\t%08X\t%08X\t%8X\t%d\t%u\t%d\t"
+			      "%08X\t%d\t%u\t%u\t%02X\t%d\t%1d\t%08X%n",
 			r->u.dst.dev ? r->u.dst.dev->name : "*",
-			(unsigned long)r->rt_dst, (unsigned long)r->rt_gateway,
+			(__force u32)r->rt_dst,
+			(__force u32)r->rt_gateway,
 			r->rt_flags, atomic_read(&r->u.dst.__refcnt),
-			r->u.dst.__use, 0, (unsigned long)r->rt_src,
+			r->u.dst.__use, 0, (__force u32)r->rt_src,
 			(dst_metric(&r->u.dst, RTAX_ADVMSS) ?
 			     (int)dst_metric(&r->u.dst, RTAX_ADVMSS) + 40 : 0),
 			dst_metric(&r->u.dst, RTAX_WINDOW),
@@ -685,18 +685,17 @@ static inline bool rt_caching(const struct net *net)
 static inline bool compare_hash_inputs(const struct flowi *fl1,
 					const struct flowi *fl2)
 {
-	return (__force u32)(((fl1->nl_u.ip4_u.daddr ^ fl2->nl_u.ip4_u.daddr) |
-		(fl1->nl_u.ip4_u.saddr ^ fl2->nl_u.ip4_u.saddr) |
+	return ((((__force u32)fl1->nl_u.ip4_u.daddr ^ (__force u32)fl2->nl_u.ip4_u.daddr) |
+		((__force u32)fl1->nl_u.ip4_u.saddr ^ (__force u32)fl2->nl_u.ip4_u.saddr) |
 		(fl1->iif ^ fl2->iif)) == 0);
 }
 
 static inline int compare_keys(struct flowi *fl1, struct flowi *fl2)
 {
-	return ((__force u32)((fl1->nl_u.ip4_u.daddr ^ fl2->nl_u.ip4_u.daddr) |
-		(fl1->nl_u.ip4_u.saddr ^ fl2->nl_u.ip4_u.saddr)) |
+	return (((__force u32)fl1->nl_u.ip4_u.daddr ^ (__force u32)fl2->nl_u.ip4_u.daddr) |
+		((__force u32)fl1->nl_u.ip4_u.saddr ^ (__force u32)fl2->nl_u.ip4_u.saddr) |
 		(fl1->mark ^ fl2->mark) |
-		(*(u16 *)&fl1->nl_u.ip4_u.tos ^
-		 *(u16 *)&fl2->nl_u.ip4_u.tos) |
+		(*(u16 *)&fl1->nl_u.ip4_u.tos ^ *(u16 *)&fl2->nl_u.ip4_u.tos) |
 		(fl1->oif ^ fl2->oif) |
 		(fl1->iif ^ fl2->iif)) == 0;
 }
@@ -2319,8 +2318,8 @@ int ip_route_input(struct sk_buff *skb, __be32 daddr, __be32 saddr,
 	rcu_read_lock();
 	for (rth = rcu_dereference(rt_hash_table[hash].chain); rth;
 	     rth = rcu_dereference(rth->u.dst.rt_next)) {
-		if (((rth->fl.fl4_dst ^ daddr) |
-		     (rth->fl.fl4_src ^ saddr) |
+		if ((((__force u32)rth->fl.fl4_dst ^ (__force u32)daddr) |
+		     ((__force u32)rth->fl.fl4_src ^ (__force u32)saddr) |
 		     (rth->fl.iif ^ iif) |
 		     rth->fl.oif |
 		     (rth->fl.fl4_tos ^ tos)) == 0 &&
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 77208334a613..6689c61cab47 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -2721,7 +2721,7 @@ struct sk_buff **tcp_gro_receive(struct sk_buff **head, struct sk_buff *skb)
 	struct tcphdr *th2;
 	unsigned int len;
 	unsigned int thlen;
-	unsigned int flags;
+	__be32 flags;
 	unsigned int mss = 1;
 	unsigned int hlen;
 	unsigned int off;
@@ -2771,10 +2771,10 @@ struct sk_buff **tcp_gro_receive(struct sk_buff **head, struct sk_buff *skb)
 
 found:
 	flush = NAPI_GRO_CB(p)->flush;
-	flush |= flags & TCP_FLAG_CWR;
-	flush |= (flags ^ tcp_flag_word(th2)) &
-		  ~(TCP_FLAG_CWR | TCP_FLAG_FIN | TCP_FLAG_PSH);
-	flush |= th->ack_seq ^ th2->ack_seq;
+	flush |= (__force int)(flags & TCP_FLAG_CWR);
+	flush |= (__force int)((flags ^ tcp_flag_word(th2)) &
+		  ~(TCP_FLAG_CWR | TCP_FLAG_FIN | TCP_FLAG_PSH));
+	flush |= (__force int)(th->ack_seq ^ th2->ack_seq);
 	for (i = sizeof(*th); i < thlen; i += 4)
 		flush |= *(u32 *)((u8 *)th + i) ^
 			 *(u32 *)((u8 *)th2 + i);
@@ -2795,8 +2795,9 @@ found:
 
 out_check_final:
 	flush = len < mss;
-	flush |= flags & (TCP_FLAG_URG | TCP_FLAG_PSH | TCP_FLAG_RST |
-			  TCP_FLAG_SYN | TCP_FLAG_FIN);
+	flush |= (__force int)(flags & (TCP_FLAG_URG | TCP_FLAG_PSH |
+					TCP_FLAG_RST | TCP_FLAG_SYN |
+					TCP_FLAG_FIN));
 
 	if (p && (!NAPI_GRO_CB(skb)->same_flow || flush))
 		pp = head;
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index ad08392a738c..4d6717d1e61c 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -1286,8 +1286,8 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
 			goto drop_and_release;
 
 		/* Secret recipe starts with IP addresses */
-		*mess++ ^= daddr;
-		*mess++ ^= saddr;
+		*mess++ ^= (__force u32)daddr;
+		*mess++ ^= (__force u32)saddr;
 
 		/* plus variable length Initiator Cookie */
 		c = (u8 *)mess;
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 2b7d71fb8439..429ad9286efc 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -861,7 +861,7 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,
 			th->urg_ptr = htons(tp->snd_up - tcb->seq);
 			th->urg = 1;
 		} else if (after(tcb->seq + 0xFFFF, tp->snd_nxt)) {
-			th->urg_ptr = 0xFFFF;
+			th->urg_ptr = htons(0xFFFF);
 			th->urg = 1;
 		}
 	}
@@ -2485,7 +2485,7 @@ struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst,
 			*tail-- ^= TCP_SKB_CB(skb)->seq + 1;
 
 			/* recommended */
-			*tail-- ^= ((th->dest << 16) | th->source);
+			*tail-- ^= (((__force u32)th->dest << 16) | (__force u32)th->source);
 			*tail-- ^= (u32)(unsigned long)cvp; /* per sockopt */
 
 			sha_transform((__u32 *)&xvp->cookie_bakery[0],
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 666b963496ff..1e18f9cc9247 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -307,13 +307,13 @@ static int ipv4_rcv_saddr_equal(const struct sock *sk1, const struct sock *sk2)
 static unsigned int udp4_portaddr_hash(struct net *net, __be32 saddr,
 				       unsigned int port)
 {
-	return jhash_1word(saddr, net_hash_mix(net)) ^ port;
+	return jhash_1word((__force u32)saddr, net_hash_mix(net)) ^ port;
 }
 
 int udp_v4_get_port(struct sock *sk, unsigned short snum)
 {
 	unsigned int hash2_nulladdr =
-		udp4_portaddr_hash(sock_net(sk), INADDR_ANY, snum);
+		udp4_portaddr_hash(sock_net(sk), htonl(INADDR_ANY), snum);
 	unsigned int hash2_partial =
 		udp4_portaddr_hash(sock_net(sk), inet_sk(sk)->inet_rcv_saddr, 0);
 
@@ -466,14 +466,14 @@ static struct sock *__udp4_lib_lookup(struct net *net, __be32 saddr,
 					  daddr, hnum, dif,
 					  hslot2, slot2);
 		if (!result) {
-			hash2 = udp4_portaddr_hash(net, INADDR_ANY, hnum);
+			hash2 = udp4_portaddr_hash(net, htonl(INADDR_ANY), hnum);
 			slot2 = hash2 & udptable->mask;
 			hslot2 = &udptable->hash2[slot2];
 			if (hslot->count < hslot2->count)
 				goto begin;
 
 			result = udp4_lib_lookup2(net, saddr, sport,
-						  INADDR_ANY, hnum, dif,
+						  htonl(INADDR_ANY), hnum, dif,
 						  hslot2, slot2);
 		}
 		rcu_read_unlock();
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 7cba8845242f..34d2d649e396 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -588,7 +588,8 @@ static u32 ipv6_addr_hash(const struct in6_addr *addr)
 	 * We perform the hash function over the last 64 bits of the address
 	 * This will include the IEEE address token on links that support it.
 	 */
-	return jhash_2words(addr->s6_addr32[2],  addr->s6_addr32[3], 0)
+	return jhash_2words((__force u32)addr->s6_addr32[2],
+			    (__force u32)addr->s6_addr32[3], 0)
 		& (IN6_ADDR_HSIZE - 1);
 }
 
diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c
index dc6e0b8f260d..92a122b7795d 100644
--- a/net/ipv6/ip6_fib.c
+++ b/net/ipv6/ip6_fib.c
@@ -144,7 +144,8 @@ static __inline__ __be32 addr_bit_set(void *token, int fn_bit)
 	 *	htonl(1 << ((~fn_bit)&0x1F))
 	 * See include/asm-generic/bitops/le.h.
 	 */
-	return (1 << ((~fn_bit ^ BITOP_BE32_SWIZZLE) & 0x1f)) & addr[fn_bit >> 5];
+	return (__force __be32)(1 << ((~fn_bit ^ BITOP_BE32_SWIZZLE) & 0x1f)) &
+	       addr[fn_bit >> 5];
 }
 
 static __inline__ struct fib6_node * node_alloc(void)
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index bd5ef7b6e48e..a92b4a5cd8bf 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -1234,12 +1234,12 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
 			goto drop_and_free;
 
 		/* Secret recipe starts with IP addresses */
-		d = &ipv6_hdr(skb)->daddr.s6_addr32[0];
+		d = (__force u32 *)&ipv6_hdr(skb)->daddr.s6_addr32[0];
 		*mess++ ^= *d++;
 		*mess++ ^= *d++;
 		*mess++ ^= *d++;
 		*mess++ ^= *d++;
-		d = &ipv6_hdr(skb)->saddr.s6_addr32[0];
+		d = (__force u32 *)&ipv6_hdr(skb)->saddr.s6_addr32[0];
 		*mess++ ^= *d++;
 		*mess++ ^= *d++;
 		*mess++ ^= *d++;
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index 90824852f598..92bf9033e245 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -91,9 +91,9 @@ static unsigned int udp6_portaddr_hash(struct net *net,
 	if (ipv6_addr_any(addr6))
 		hash = jhash_1word(0, mix);
 	else if (ipv6_addr_v4mapped(addr6))
-		hash = jhash_1word(addr6->s6_addr32[3], mix);
+		hash = jhash_1word((__force u32)addr6->s6_addr32[3], mix);
 	else
-		hash = jhash2(addr6->s6_addr32, 4, mix);
+		hash = jhash2((__force u32 *)addr6->s6_addr32, 4, mix);
 
 	return hash ^ port;
 }
diff --git a/net/sched/sch_sfq.c b/net/sched/sch_sfq.c
index c5a9ac566007..c65762823f5e 100644
--- a/net/sched/sch_sfq.c
+++ b/net/sched/sch_sfq.c
@@ -123,8 +123,8 @@ static unsigned sfq_hash(struct sfq_sched_data *q, struct sk_buff *skb)
 	case htons(ETH_P_IP):
 	{
 		const struct iphdr *iph = ip_hdr(skb);
-		h = iph->daddr;
-		h2 = iph->saddr ^ iph->protocol;
+		h = (__force u32)iph->daddr;
+		h2 = (__force u32)iph->saddr ^ iph->protocol;
 		if (!(iph->frag_off&htons(IP_MF|IP_OFFSET)) &&
 		    (iph->protocol == IPPROTO_TCP ||
 		     iph->protocol == IPPROTO_UDP ||
@@ -138,8 +138,8 @@ static unsigned sfq_hash(struct sfq_sched_data *q, struct sk_buff *skb)
 	case htons(ETH_P_IPV6):
 	{
 		struct ipv6hdr *iph = ipv6_hdr(skb);
-		h = iph->daddr.s6_addr32[3];
-		h2 = iph->saddr.s6_addr32[3] ^ iph->nexthdr;
+		h = (__force u32)iph->daddr.s6_addr32[3];
+		h2 = (__force u32)iph->saddr.s6_addr32[3] ^ iph->nexthdr;
 		if (iph->nexthdr == IPPROTO_TCP ||
 		    iph->nexthdr == IPPROTO_UDP ||
 		    iph->nexthdr == IPPROTO_UDPLITE ||
@@ -150,7 +150,7 @@ static unsigned sfq_hash(struct sfq_sched_data *q, struct sk_buff *skb)
 		break;
 	}
 	default:
-		h = (unsigned long)skb_dst(skb) ^ skb->protocol;
+		h = (unsigned long)skb_dst(skb) ^ (__force u32)skb->protocol;
 		h2 = (unsigned long)skb->sk;
 	}
 
diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c
index 42f09ade0044..699ade68aac1 100644
--- a/net/sunrpc/xprt.c
+++ b/net/sunrpc/xprt.c
@@ -974,7 +974,7 @@ void xprt_reserve(struct rpc_task *task)
 
 static inline __be32 xprt_alloc_xid(struct rpc_xprt *xprt)
 {
-	return xprt->xid++;
+	return (__force __be32)xprt->xid++;
 }
 
 static inline void xprt_init_xid(struct rpc_xprt *xprt)
diff --git a/net/xfrm/xfrm_hash.h b/net/xfrm/xfrm_hash.h
index e5195c99f71e..1396572d2ade 100644
--- a/net/xfrm/xfrm_hash.h
+++ b/net/xfrm/xfrm_hash.h
@@ -16,7 +16,8 @@ static inline unsigned int __xfrm6_addr_hash(xfrm_address_t *addr)
 
 static inline unsigned int __xfrm4_daddr_saddr_hash(xfrm_address_t *daddr, xfrm_address_t *saddr)
 {
-	return ntohl(daddr->a4 + saddr->a4);
+	u32 sum = (__force u32)daddr->a4 + (__force u32)saddr->a4;
+	return ntohl((__force __be32)sum);
 }
 
 static inline unsigned int __xfrm6_daddr_saddr_hash(xfrm_address_t *daddr, xfrm_address_t *saddr)
-- 
cgit v1.2.2


From ccb7c7732e2ceb4e81a7806faf1670be9681ccd2 Mon Sep 17 00:00:00 2001
From: Rami Rosen <ramirose@gmail.com>
Date: Tue, 20 Apr 2010 22:39:53 -0700
Subject: net: Remove two unnecessary exports (skbuff).

There is no need to export skb_under_panic() and skb_over_panic() in
skbuff.c, since these methods are used only in skbuff.c ; this patch
removes these two exports. It also marks these functions as 'static'
and removeS the extern declarations of them from
include/linux/skbuff.h

Signed-off-by: Rami Rosen <ramirose@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/skbuff.c | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

(limited to 'net')

diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index bdea0efdf8cb..4218ff49bf13 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -117,7 +117,7 @@ static const struct pipe_buf_operations sock_pipe_buf_ops = {
  *
  *	Out of line support code for skb_put(). Not user callable.
  */
-void skb_over_panic(struct sk_buff *skb, int sz, void *here)
+static void skb_over_panic(struct sk_buff *skb, int sz, void *here)
 {
 	printk(KERN_EMERG "skb_over_panic: text:%p len:%d put:%d head:%p "
 			  "data:%p tail:%#lx end:%#lx dev:%s\n",
@@ -126,7 +126,6 @@ void skb_over_panic(struct sk_buff *skb, int sz, void *here)
 	       skb->dev ? skb->dev->name : "<NULL>");
 	BUG();
 }
-EXPORT_SYMBOL(skb_over_panic);
 
 /**
  *	skb_under_panic	- 	private function
@@ -137,7 +136,7 @@ EXPORT_SYMBOL(skb_over_panic);
  *	Out of line support code for skb_push(). Not user callable.
  */
 
-void skb_under_panic(struct sk_buff *skb, int sz, void *here)
+static void skb_under_panic(struct sk_buff *skb, int sz, void *here)
 {
 	printk(KERN_EMERG "skb_under_panic: text:%p len:%d put:%d head:%p "
 			  "data:%p tail:%#lx end:%#lx dev:%s\n",
@@ -146,7 +145,6 @@ void skb_under_panic(struct sk_buff *skb, int sz, void *here)
 	       skb->dev ? skb->dev->name : "<NULL>");
 	BUG();
 }
-EXPORT_SYMBOL(skb_under_panic);
 
 /* 	Allocate a new skbuff. We do this ourselves so we can fill in a few
  *	'private' fields and also do memory statistics to find all the
-- 
cgit v1.2.2


From 6651ffc8e8bdd5fb4b7d1867c6cfebb4f309512c Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Wed, 21 Apr 2010 00:47:15 -0700
Subject: ipv6: Fix tcp_v6_send_response transport header setting.

My recent patch to remove the open-coded checksum sequence in
tcp_v6_send_response broke it as we did not set the transport
header pointer on the new packet.

Actually, there is code there trying to set the transport
header properly, but it sets it for the wrong skb ('skb'
instead of 'buff').

This bug was introduced by commit
a8fdf2b331b38d61fb5f11f3aec4a4f9fb2dedcb ("ipv6: Fix
tcp_v6_send_response(): it didn't set skb transport header")

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/tcp_ipv6.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index c92ebe8f80d5..075f540ec197 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -1015,7 +1015,7 @@ static void tcp_v6_send_response(struct sk_buff *skb, u32 seq, u32 ack, u32 win,
 	skb_reserve(buff, MAX_HEADER + sizeof(struct ipv6hdr) + tot_len);
 
 	t1 = (struct tcphdr *) skb_push(buff, tot_len);
-	skb_reset_transport_header(skb);
+	skb_reset_transport_header(buff);
 
 	/* Swap the send and the receive. */
 	memset(t1, 0, sizeof(*t1));
-- 
cgit v1.2.2


From 05d17608a69b3ae653ea5c9857283bef3439c733 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Tue, 20 Apr 2010 00:25:58 +0000
Subject: net: Fix an RCU warning in dev_pick_tx()

Fix the following RCU warning in dev_pick_tx():

===================================================
[ INFO: suspicious rcu_dereference_check() usage. ]
---------------------------------------------------
net/core/dev.c:1993 invoked rcu_dereference_check() without protection!

other info that might help us debug this:

rcu_scheduler_active = 1, debug_locks = 0
2 locks held by swapper/0:
 #0:  (&idev->mc_ifc_timer){+.-...}, at: [<ffffffff81039e65>] run_timer_softirq+0x17b/0x278
 #1:  (rcu_read_lock_bh){.+....}, at: [<ffffffff812ea3eb>] dev_queue_xmit+0x14e/0x4dc

stack backtrace:
Pid: 0, comm: swapper Not tainted 2.6.34-rc5-cachefs #4
Call Trace:
 <IRQ>  [<ffffffff810516c4>] lockdep_rcu_dereference+0xaa/0xb2
 [<ffffffff812ea4f6>] dev_queue_xmit+0x259/0x4dc
 [<ffffffff812ea3eb>] ? dev_queue_xmit+0x14e/0x4dc
 [<ffffffff81052324>] ? trace_hardirqs_on+0xd/0xf
 [<ffffffff81035362>] ? local_bh_enable_ip+0xbc/0xc1
 [<ffffffff812f0954>] neigh_resolve_output+0x24b/0x27c
 [<ffffffff8134f673>] ip6_output_finish+0x7c/0xb4
 [<ffffffff81350c34>] ip6_output2+0x256/0x261
 [<ffffffff81052324>] ? trace_hardirqs_on+0xd/0xf
 [<ffffffff813517fb>] ip6_output+0xbbc/0xbcb
 [<ffffffff8135bc5d>] ? fib6_force_start_gc+0x2b/0x2d
 [<ffffffff81368acb>] mld_sendpack+0x273/0x39d
 [<ffffffff81368858>] ? mld_sendpack+0x0/0x39d
 [<ffffffff81052099>] ? mark_held_locks+0x52/0x70
 [<ffffffff813692fc>] mld_ifc_timer_expire+0x24f/0x288
 [<ffffffff81039ed6>] run_timer_softirq+0x1ec/0x278
 [<ffffffff81039e65>] ? run_timer_softirq+0x17b/0x278
 [<ffffffff813690ad>] ? mld_ifc_timer_expire+0x0/0x288
 [<ffffffff81035531>] ? __do_softirq+0x69/0x140
 [<ffffffff8103556a>] __do_softirq+0xa2/0x140
 [<ffffffff81002e0c>] call_softirq+0x1c/0x28
 [<ffffffff81004b54>] do_softirq+0x38/0x80
 [<ffffffff81034f06>] irq_exit+0x45/0x47
 [<ffffffff810177c3>] smp_apic_timer_interrupt+0x88/0x96
 [<ffffffff810028d3>] apic_timer_interrupt+0x13/0x20
 <EOI>  [<ffffffff810488dd>] ? __atomic_notifier_call_chain+0x0/0x86
 [<ffffffff810096bf>] ? mwait_idle+0x6e/0x78
 [<ffffffff810096b6>] ? mwait_idle+0x65/0x78
 [<ffffffff810011cb>] cpu_idle+0x4d/0x83
 [<ffffffff81380b05>] rest_init+0xb9/0xc0
 [<ffffffff81380a4c>] ? rest_init+0x0/0xc0
 [<ffffffff8168dcf0>] start_kernel+0x392/0x39d
 [<ffffffff8168d2a3>] x86_64_start_reservations+0xb3/0xb7
 [<ffffffff8168d38b>] x86_64_start_kernel+0xe4/0xeb

An rcu_dereference() should be an rcu_dereference_bh().

Signed-off-by: David Howells <dhowells@redhat.com>
Acked-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/dev.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/core/dev.c b/net/core/dev.c
index 92584bfef09b..f769098774b7 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -1990,7 +1990,7 @@ static struct netdev_queue *dev_pick_tx(struct net_device *dev,
 				queue_index = skb_tx_hash(dev, skb);
 
 			if (sk) {
-				struct dst_entry *dst = rcu_dereference(sk->sk_dst_cache);
+				struct dst_entry *dst = rcu_dereference_bh(sk->sk_dst_cache);
 
 				if (dst && skb_dst(skb) == dst)
 					sk_tx_queue_set(sk, queue_index);
-- 
cgit v1.2.2


From f71b70e115dd0bb34eee4d281a4fb6416e88cfff Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Wed, 21 Apr 2010 01:57:01 -0700
Subject: tcp: Fix ipv6 checksumming on response packets for real.

Commit 6651ffc8e8bdd5fb4b7d1867c6cfebb4f309512c
("ipv6: Fix tcp_v6_send_response transport header setting.")
fixed one half of why ipv6 tcp response checksums were
invalid, but it's not the whole story.

If we're going to use CHECKSUM_PARTIAL for these things (which we are
since commit 2e8e18ef52e7dd1af0a3bd1f7d990a1d0b249586 "tcp: Set
CHECKSUM_UNNECESSARY in tcp_init_nondata_skb"), we can't be setting
buff->csum as we always have been here in tcp_v6_send_response.  We
need to leave it at zero.

Kill that line and checksums are good again.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/tcp_ipv6.c | 2 --
 1 file changed, 2 deletions(-)

(limited to 'net')

diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 78480f410a9b..5d2e4301e246 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -1050,8 +1050,6 @@ static void tcp_v6_send_response(struct sk_buff *skb, u32 seq, u32 ack, u32 win,
 	}
 #endif
 
-	buff->csum = csum_partial(t1, tot_len, 0);
-
 	memset(&fl, 0, sizeof(fl));
 	ipv6_addr_copy(&fl.fl6_dst, &ipv6_hdr(skb)->saddr);
 	ipv6_addr_copy(&fl.fl6_src, &ipv6_hdr(skb)->daddr);
-- 
cgit v1.2.2


From d97a9e47ba148cfc41e354c5cd241f472273207c Mon Sep 17 00:00:00 2001
From: Jan Engelhardt <jengelh@medozas.de>
Date: Wed, 21 Apr 2010 14:45:51 +0200
Subject: netfilter: x_tables: move sleeping allocation outside BH-disabled
 region

The jumpstack allocation needs to be moved out of the critical region.
Corrects this notice:

BUG: sleeping function called from invalid context at mm/slub.c:1705
[  428.295762] in_atomic(): 1, irqs_disabled(): 0, pid: 9111, name: iptables
[  428.295771] Pid: 9111, comm: iptables Not tainted 2.6.34-rc1 #2
[  428.295776] Call Trace:
[  428.295791]  [<c012138e>] __might_sleep+0xe5/0xed
[  428.295801]  [<c019e8ca>] __kmalloc+0x92/0xfc
[  428.295825]  [<f865b3bb>] ? xt_jumpstack_alloc+0x36/0xff [x_tables]

Signed-off-by: Jan Engelhardt <jengelh@medozas.de>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 net/netfilter/x_tables.c | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

(limited to 'net')

diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c
index 3ae32340d4df..445de702b8b7 100644
--- a/net/netfilter/x_tables.c
+++ b/net/netfilter/x_tables.c
@@ -801,6 +801,12 @@ xt_replace_table(struct xt_table *table,
 	struct xt_table_info *private;
 	int ret;
 
+	ret = xt_jumpstack_alloc(newinfo);
+	if (ret < 0) {
+		*error = ret;
+		return NULL;
+	}
+
 	/* Do the substitution. */
 	local_bh_disable();
 	private = table->private;
@@ -814,12 +820,6 @@ xt_replace_table(struct xt_table *table,
 		return NULL;
 	}
 
-	ret = xt_jumpstack_alloc(newinfo);
-	if (ret < 0) {
-		*error = ret;
-		return NULL;
-	}
-
 	table->private = newinfo;
 	newinfo->initial_entries = private->initial_entries;
 
-- 
cgit v1.2.2


From e5700aff144fbbba46be40049f0c55fb57283777 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Wed, 21 Apr 2010 14:59:20 -0700
Subject: tcp: Mark v6 response packets as CHECKSUM_PARTIAL

Otherwise we only get the checksum right for data-less TCP responses.

Noticed by Herbert Xu.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/tcp_ipv6.c | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'net')

diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 5d2e4301e246..1ababbb41131 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -1054,6 +1054,9 @@ static void tcp_v6_send_response(struct sk_buff *skb, u32 seq, u32 ack, u32 win,
 	ipv6_addr_copy(&fl.fl6_dst, &ipv6_hdr(skb)->saddr);
 	ipv6_addr_copy(&fl.fl6_src, &ipv6_hdr(skb)->daddr);
 
+	buff->ip_summed = CHECKSUM_PARTIAL;
+	buff->csum = 0;
+
 	__tcp_v6_send_check(buff, &fl.fl6_src, &fl.fl6_dst);
 
 	fl.proto = IPPROTO_TCP;
-- 
cgit v1.2.2


From 989a2979205dd34269382b357e6d4b4b6956b889 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Wed, 14 Apr 2010 09:55:35 +0000
Subject: fasync: RCU and fine grained locking

kill_fasync() uses a central rwlock, candidate for RCU conversion, to
avoid cache line ping pongs on SMP.

fasync_remove_entry() and fasync_add_entry() can disable IRQS on a short
section instead during whole list scan.

Use a spinlock per fasync_struct to synchronize kill_fasync_rcu() and
fasync_{remove|add}_entry(). This spinlock is IRQ safe, so sock_fasync()
doesnt need its own implementation and can use fasync_helper(), to
reduce code size and complexity.

We can remove __kill_fasync() direct use in net/socket.c, and rename it
to kill_fasync_rcu().

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Lai Jiangshan <laijs@cn.fujitsu.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/socket.c | 73 +++++++++---------------------------------------------------
 1 file changed, 11 insertions(+), 62 deletions(-)

(limited to 'net')

diff --git a/net/socket.c b/net/socket.c
index 35bc198bbf68..9822081eab38 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -1067,78 +1067,27 @@ static int sock_close(struct inode *inode, struct file *filp)
  *	1. fasync_list is modified only under process context socket lock
  *	   i.e. under semaphore.
  *	2. fasync_list is used under read_lock(&sk->sk_callback_lock)
- *	   or under socket lock.
- *	3. fasync_list can be used from softirq context, so that
- *	   modification under socket lock have to be enhanced with
- *	   write_lock_bh(&sk->sk_callback_lock).
- *							--ANK (990710)
+ *	   or under socket lock
  */
 
 static int sock_fasync(int fd, struct file *filp, int on)
 {
-	struct fasync_struct *fa, *fna = NULL, **prev;
-	struct socket *sock;
-	struct sock *sk;
-
-	if (on) {
-		fna = kmalloc(sizeof(struct fasync_struct), GFP_KERNEL);
-		if (fna == NULL)
-			return -ENOMEM;
-	}
-
-	sock = filp->private_data;
+	struct socket *sock = filp->private_data;
+	struct sock *sk = sock->sk;
 
-	sk = sock->sk;
-	if (sk == NULL) {
-		kfree(fna);
+	if (sk == NULL)
 		return -EINVAL;
-	}
 
 	lock_sock(sk);
 
-	spin_lock(&filp->f_lock);
-	if (on)
-		filp->f_flags |= FASYNC;
-	else
-		filp->f_flags &= ~FASYNC;
-	spin_unlock(&filp->f_lock);
-
-	prev = &(sock->fasync_list);
+	fasync_helper(fd, filp, on, &sock->fasync_list);
 
-	for (fa = *prev; fa != NULL; prev = &fa->fa_next, fa = *prev)
-		if (fa->fa_file == filp)
-			break;
-
-	if (on) {
-		if (fa != NULL) {
-			write_lock_bh(&sk->sk_callback_lock);
-			fa->fa_fd = fd;
-			write_unlock_bh(&sk->sk_callback_lock);
-
-			kfree(fna);
-			goto out;
-		}
-		fna->fa_file = filp;
-		fna->fa_fd = fd;
-		fna->magic = FASYNC_MAGIC;
-		fna->fa_next = sock->fasync_list;
-		write_lock_bh(&sk->sk_callback_lock);
-		sock->fasync_list = fna;
+	if (!sock->fasync_list)
+		sock_reset_flag(sk, SOCK_FASYNC);
+	else
 		sock_set_flag(sk, SOCK_FASYNC);
-		write_unlock_bh(&sk->sk_callback_lock);
-	} else {
-		if (fa != NULL) {
-			write_lock_bh(&sk->sk_callback_lock);
-			*prev = fa->fa_next;
-			if (!sock->fasync_list)
-				sock_reset_flag(sk, SOCK_FASYNC);
-			write_unlock_bh(&sk->sk_callback_lock);
-			kfree(fa);
-		}
-	}
 
-out:
-	release_sock(sock->sk);
+	release_sock(sk);
 	return 0;
 }
 
@@ -1159,10 +1108,10 @@ int sock_wake_async(struct socket *sock, int how, int band)
 		/* fall through */
 	case SOCK_WAKE_IO:
 call_kill:
-		__kill_fasync(sock->fasync_list, SIGIO, band);
+		kill_fasync(&sock->fasync_list, SIGIO, band);
 		break;
 	case SOCK_WAKE_URG:
-		__kill_fasync(sock->fasync_list, SIGURG, band);
+		kill_fasync(&sock->fasync_list, SIGURG, band);
 	}
 	return 0;
 }
-- 
cgit v1.2.2


From bc8e4b954e463716a57d8113dd50ae9d47b682a7 Mon Sep 17 00:00:00 2001
From: Nicolas Dichtel <nicolas.dichtel@6wind.com>
Date: Wed, 21 Apr 2010 16:25:30 -0700
Subject: xfrm6: ensure to use the same dev when building a bundle

When building a bundle, we set dst.dev and rt6.rt6i_idev.
We must ensure to set the same device for both fields.

Signed-off-by: Nicolas Dichtel <nicolas.dichtel@6wind.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/xfrm6_policy.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c
index ae181651c75a..00bf7c962b7e 100644
--- a/net/ipv6/xfrm6_policy.c
+++ b/net/ipv6/xfrm6_policy.c
@@ -124,7 +124,7 @@ static int xfrm6_fill_dst(struct xfrm_dst *xdst, struct net_device *dev,
 	xdst->u.dst.dev = dev;
 	dev_hold(dev);
 
-	xdst->u.rt6.rt6i_idev = in6_dev_get(rt->u.dst.dev);
+	xdst->u.rt6.rt6i_idev = in6_dev_get(dev);
 	if (!xdst->u.rt6.rt6i_idev)
 		return -ENODEV;
 
-- 
cgit v1.2.2


From 2cec6b014da6fb4a40ba1c6556cdf9681ed3f89e Mon Sep 17 00:00:00 2001
From: andrew hendry <andrew.hendry@gmail.com>
Date: Sat, 17 Apr 2010 14:17:32 +0000
Subject: X25 fix dead unaccepted sockets

1, An X25 program binds and listens
2, calls arrive waiting to be accepted
3, Program exits without accepting
4, Sockets time out but don't get correctly cleaned up
5, cat /proc/net/x25/socket shows the dead sockets with bad inode fields.

This line borrowed from AX25 sets the dying socket so the timers clean up later.

Signed-off-by: Andrew Hendry <andrew.hendry@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/x25/af_x25.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'net')

diff --git a/net/x25/af_x25.c b/net/x25/af_x25.c
index cbddd0cb83f1..36e84e13c6aa 100644
--- a/net/x25/af_x25.c
+++ b/net/x25/af_x25.c
@@ -402,6 +402,7 @@ static void __x25_destroy_socket(struct sock *sk)
 			/*
 			 * Queue the unaccepted socket for death
 			 */
+			skb->sk->sk_state = TCP_LISTEN;
 			sock_set_flag(skb->sk, SOCK_DEAD);
 			x25_start_heartbeat(skb->sk);
 			x25_sk(skb->sk)->state = X25_STATE_0;
-- 
cgit v1.2.2


From f2228f785a9d97307aa8ba709088cfda6c3df73f Mon Sep 17 00:00:00 2001
From: Shan Wei <shanwei@cn.fujitsu.com>
Date: Sun, 18 Apr 2010 16:58:22 +0000
Subject: ipv6: allow to send packet after receiving ICMPv6 Too Big message
 with MTU field less than IPV6_MIN_MTU

According to RFC2460, PMTU is set to the IPv6 Minimum Link
MTU (1280) and a fragment header should always be included
after a node receiving Too Big message reporting PMTU is
less than the IPv6 Minimum Link MTU.

After receiving a ICMPv6 Too Big message reporting PMTU is
less than the IPv6 Minimum Link MTU, sctp *can't* send any
data/control chunk that total length including IPv6 head
and IPv6 extend head is less than IPV6_MIN_MTU(1280 bytes).

The failure occured in p6_fragment(), about reason
see following(take SHUTDOWN chunk for example):
sctp_packet_transmit (SHUTDOWN chunk, len=16 byte)
|------sctp_v6_xmit (local_df=0)
   |------ip6_xmit
       |------ip6_output (dst_allfrag is ture)
           |------ip6_fragment

In ip6_fragment(), for local_df=0, drops the the packet
and returns EMSGSIZE.

The patch fixes it with adding check length of skb->len.
In this case, Ipv6 not to fragment upper protocol data,
just only add a fragment header before it.

Signed-off-by: Shan Wei <shanwei@cn.fujitsu.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/ip6_output.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index 65f9c379df38..75d5ef830097 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -629,7 +629,7 @@ static int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *))
 	/* We must not fragment if the socket is set to force MTU discovery
 	 * or if the skb it not generated by a local socket.
 	 */
-	if (!skb->local_df) {
+	if (!skb->local_df && skb->len > mtu) {
 		skb->dev = skb_dst(skb)->dev;
 		icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
 		IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
-- 
cgit v1.2.2


From 9a20e3197e7f6097897c6d1f18335a326ee06299 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Tue, 20 Apr 2010 20:08:36 +0000
Subject: net: Introduce skb_orphan_try()

At this point, skb->destructor is not the original one (stored in
DEV_GSO_CB(skb)->destructor)

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/dev.c | 1 -
 1 file changed, 1 deletion(-)

(limited to 'net')

diff --git a/net/core/dev.c b/net/core/dev.c
index e904c476b112..9bf1cccb067e 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -1937,7 +1937,6 @@ gso:
 		if (dev->priv_flags & IFF_XMIT_DST_RELEASE)
 			skb_dst_drop(nskb);
 
-		skb_orphan_try(nskb);
 		rc = ops->ndo_start_xmit(nskb, dev);
 		if (unlikely(rc != NETDEV_TX_OK)) {
 			if (rc & ~NETDEV_TX_MASK)
-- 
cgit v1.2.2


From b002a861092b0db128800794a116cc3acc5ec239 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes@sipsolutions.net>
Date: Tue, 20 Apr 2010 21:06:07 +0000
Subject: ethernet: print protocol in host byte order

Eric's recent patch added __force, but this
place would seem to require actually doing
a byte order conversion so the printk is
consistent across architectures.

Cc: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ethernet/eth.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/ethernet/eth.c b/net/ethernet/eth.c
index 35846964082c..0c0d272a9888 100644
--- a/net/ethernet/eth.c
+++ b/net/ethernet/eth.c
@@ -136,7 +136,7 @@ int eth_rebuild_header(struct sk_buff *skb)
 	default:
 		printk(KERN_DEBUG
 		       "%s: unable to resolve type %X addresses.\n",
-		       dev->name, (__force int)eth->h_proto);
+		       dev->name, ntohs(eth->h_proto));
 
 		memcpy(eth->h_source, dev->dev_addr, ETH_ALEN);
 		break;
-- 
cgit v1.2.2


From f4f914b58019f0e50d521bbbadfaee260d766f95 Mon Sep 17 00:00:00 2001
From: Jiri Olsa <jolsa@redhat.com>
Date: Tue, 20 Apr 2010 21:21:26 +0000
Subject: net: ipv6 bind to device issue

The issue raises when having 2 NICs both assigned the same
IPv6 global address.

If a sender binds to a particular NIC (SO_BINDTODEVICE),
the outgoing traffic is being sent via the first found.
The bonded device is thus not taken into an account during the
routing.

From the ip6_route_output function:

If the binding address is multicast, linklocal or loopback,
the RT6_LOOKUP_F_IFACE bit is set, but not for global address.

So binding global address will neglect SO_BINDTODEVICE-binded device,
because the fib6_rule_lookup function path won't check for the
flowi::oif field and take first route that fits.

Signed-off-by: Jiri Olsa <jolsa@redhat.com>
Signed-off-by: Scott Otto <scott.otto@alcatel-lucent.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/route.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index c2438e8cb9d0..05ebd7833043 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -815,7 +815,7 @@ struct dst_entry * ip6_route_output(struct net *net, struct sock *sk,
 {
 	int flags = 0;
 
-	if (rt6_need_strict(&fl->fl6_dst))
+	if (fl->oif || rt6_need_strict(&fl->fl6_dst))
 		flags |= RT6_LOOKUP_F_IFACE;
 
 	if (!ipv6_addr_any(&fl->fl6_src))
-- 
cgit v1.2.2


From e326bed2f47d0365da5a8faaf8ee93ed2d86325b Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Thu, 22 Apr 2010 00:22:45 -0700
Subject: rps: immediate send IPI in process_backlog()

If some skb are queued to our backlog, we are delaying IPI sending at
the end of net_rx_action(), increasing latencies. This defeats the
queueing, since we want to quickly dispatch packets to the pool of
worker cpus, then eventually deeply process our packets.

It's better to send IPI before processing our packets in upper layers,
from process_backlog().

Change the _and_disable_irq suffix to _and_enable_irq(), since we enable
local irq in net_rps_action(), sorry for the confusion.

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/dev.c | 76 ++++++++++++++++++++++++++++++++--------------------------
 1 file changed, 42 insertions(+), 34 deletions(-)

(limited to 'net')

diff --git a/net/core/dev.c b/net/core/dev.c
index 9bf1cccb067e..3ba774b6091c 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -3242,11 +3242,48 @@ gro_result_t napi_gro_frags(struct napi_struct *napi)
 }
 EXPORT_SYMBOL(napi_gro_frags);
 
+/*
+ * net_rps_action sends any pending IPI's for rps.
+ * Note: called with local irq disabled, but exits with local irq enabled.
+ */
+static void net_rps_action_and_irq_enable(struct softnet_data *sd)
+{
+#ifdef CONFIG_RPS
+	struct softnet_data *remsd = sd->rps_ipi_list;
+
+	if (remsd) {
+		sd->rps_ipi_list = NULL;
+
+		local_irq_enable();
+
+		/* Send pending IPI's to kick RPS processing on remote cpus. */
+		while (remsd) {
+			struct softnet_data *next = remsd->rps_ipi_next;
+
+			if (cpu_online(remsd->cpu))
+				__smp_call_function_single(remsd->cpu,
+							   &remsd->csd, 0);
+			remsd = next;
+		}
+	} else
+#endif
+		local_irq_enable();
+}
+
 static int process_backlog(struct napi_struct *napi, int quota)
 {
 	int work = 0;
 	struct softnet_data *sd = &__get_cpu_var(softnet_data);
 
+#ifdef CONFIG_RPS
+	/* Check if we have pending ipi, its better to send them now,
+	 * not waiting net_rx_action() end.
+	 */
+	if (sd->rps_ipi_list) {
+		local_irq_disable();
+		net_rps_action_and_irq_enable(sd);
+	}
+#endif
 	napi->weight = weight_p;
 	do {
 		struct sk_buff *skb;
@@ -3353,45 +3390,16 @@ void netif_napi_del(struct napi_struct *napi)
 }
 EXPORT_SYMBOL(netif_napi_del);
 
-/*
- * net_rps_action sends any pending IPI's for rps.
- * Note: called with local irq disabled, but exits with local irq enabled.
- */
-static void net_rps_action_and_irq_disable(void)
-{
-#ifdef CONFIG_RPS
-	struct softnet_data *sd = &__get_cpu_var(softnet_data);
-	struct softnet_data *remsd = sd->rps_ipi_list;
-
-	if (remsd) {
-		sd->rps_ipi_list = NULL;
-
-		local_irq_enable();
-
-		/* Send pending IPI's to kick RPS processing on remote cpus. */
-		while (remsd) {
-			struct softnet_data *next = remsd->rps_ipi_next;
-
-			if (cpu_online(remsd->cpu))
-				__smp_call_function_single(remsd->cpu,
-							   &remsd->csd, 0);
-			remsd = next;
-		}
-	} else
-#endif
-		local_irq_enable();
-}
-
 static void net_rx_action(struct softirq_action *h)
 {
-	struct list_head *list = &__get_cpu_var(softnet_data).poll_list;
+	struct softnet_data *sd = &__get_cpu_var(softnet_data);
 	unsigned long time_limit = jiffies + 2;
 	int budget = netdev_budget;
 	void *have;
 
 	local_irq_disable();
 
-	while (!list_empty(list)) {
+	while (!list_empty(&sd->poll_list)) {
 		struct napi_struct *n;
 		int work, weight;
 
@@ -3409,7 +3417,7 @@ static void net_rx_action(struct softirq_action *h)
 		 * entries to the tail of this list, and only ->poll()
 		 * calls can remove this head entry from the list.
 		 */
-		n = list_first_entry(list, struct napi_struct, poll_list);
+		n = list_first_entry(&sd->poll_list, struct napi_struct, poll_list);
 
 		have = netpoll_poll_lock(n);
 
@@ -3444,13 +3452,13 @@ static void net_rx_action(struct softirq_action *h)
 				napi_complete(n);
 				local_irq_disable();
 			} else
-				list_move_tail(&n->poll_list, list);
+				list_move_tail(&n->poll_list, &sd->poll_list);
 		}
 
 		netpoll_poll_unlock(have);
 	}
 out:
-	net_rps_action_and_irq_disable();
+	net_rps_action_and_irq_enable(sd);
 
 #ifdef CONFIG_NET_DMA
 	/*
-- 
cgit v1.2.2


From 9ccb8975940c4ee51161152e37058e3d9e06c62f Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Thu, 22 Apr 2010 01:02:07 -0700
Subject: net: Orphan and de-dst skbs earlier in xmit path.

This way GSO packets don't get handled differently.

With help from Eric Dumazet.

Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
---
 net/core/dev.c | 15 ++++++++-------
 1 file changed, 8 insertions(+), 7 deletions(-)

(limited to 'net')

diff --git a/net/core/dev.c b/net/core/dev.c
index 3ba774b6091c..a4a7c36917d1 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -1902,13 +1902,6 @@ int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev,
 		if (!list_empty(&ptype_all))
 			dev_queue_xmit_nit(skb, dev);
 
-		if (netif_needs_gso(dev, skb)) {
-			if (unlikely(dev_gso_segment(skb)))
-				goto out_kfree_skb;
-			if (skb->next)
-				goto gso;
-		}
-
 		/*
 		 * If device doesnt need skb->dst, release it right now while
 		 * its hot in this cpu cache
@@ -1917,6 +1910,14 @@ int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev,
 			skb_dst_drop(skb);
 
 		skb_orphan_try(skb);
+
+		if (netif_needs_gso(dev, skb)) {
+			if (unlikely(dev_gso_segment(skb)))
+				goto out_kfree_skb;
+			if (skb->next)
+				goto gso;
+		}
+
 		rc = ops->ndo_start_xmit(skb, dev);
 		if (rc == NETDEV_TX_OK)
 			txq_trans_update(txq);
-- 
cgit v1.2.2


From cecc74de25d2cfb08e7702cd38e3f195950f1228 Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Thu, 22 Apr 2010 13:03:24 +0200
Subject: netfilter: ip_tables: convert pr_devel() to pr_debug()

We want to be able to use CONFIG_DYNAMIC_DEBUG in netfilter code, switch
the few existing pr_devel() calls to pr_debug().

Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 net/ipv4/netfilter/ip_tables.c | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c
index bb5e0d9b8137..3e6af1036fbc 100644
--- a/net/ipv4/netfilter/ip_tables.c
+++ b/net/ipv4/netfilter/ip_tables.c
@@ -363,7 +363,7 @@ ipt_do_table(struct sk_buff *skb,
 
 	e = get_entry(table_base, private->hook_entry[hook]);
 
-	pr_devel("Entering %s(hook %u); sp at %u (UF %p)\n",
+	pr_debug("Entering %s(hook %u); sp at %u (UF %p)\n",
 		 table->name, hook, origptr,
 		 get_entry(table_base, private->underflow[hook]));
 
@@ -409,11 +409,11 @@ ipt_do_table(struct sk_buff *skb,
 				if (*stackptr == 0) {
 					e = get_entry(table_base,
 					    private->underflow[hook]);
-					pr_devel("Underflow (this is normal) "
+					pr_debug("Underflow (this is normal) "
 						 "to %p\n", e);
 				} else {
 					e = jumpstack[--*stackptr];
-					pr_devel("Pulled %p out from pos %u\n",
+					pr_debug("Pulled %p out from pos %u\n",
 						 e, *stackptr);
 					e = ipt_next_entry(e);
 				}
@@ -426,7 +426,7 @@ ipt_do_table(struct sk_buff *skb,
 					break;
 				}
 				jumpstack[(*stackptr)++] = e;
-				pr_devel("Pushed %p into pos %u\n",
+				pr_debug("Pushed %p into pos %u\n",
 					 e, *stackptr - 1);
 			}
 
@@ -448,7 +448,7 @@ ipt_do_table(struct sk_buff *skb,
 			break;
 	} while (!hotdrop);
 	xt_info_rdunlock_bh();
-	pr_devel("Exiting %s; resetting sp from %u to %u\n",
+	pr_debug("Exiting %s; resetting sp from %u to %u\n",
 		 __func__, *stackptr, origptr);
 	*stackptr = origptr;
 #ifdef DEBUG_ALLOW_ALL
-- 
cgit v1.2.2


From 3d7b08945e54a3a5358d5890240619a013cb7388 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Thu, 22 Apr 2010 15:35:55 -0400
Subject: SUNRPC: Fix a bug in rpcauth_prune_expired

Don't want to evict a credential if cred->cr_expire == jiffies, since that
means that it was just placed on the cred_unused list. We therefore need to
use time_in_range() rather than time_in_range_open().

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 net/sunrpc/auth.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/sunrpc/auth.c b/net/sunrpc/auth.c
index f394fc190a49..95afe79dd9d7 100644
--- a/net/sunrpc/auth.c
+++ b/net/sunrpc/auth.c
@@ -237,7 +237,7 @@ rpcauth_prune_expired(struct list_head *free, int nr_to_scan)
 	list_for_each_entry_safe(cred, next, &cred_unused, cr_lru) {
 
 		/* Enforce a 60 second garbage collection moratorium */
-		if (time_in_range_open(cred->cr_expire, expired, jiffies) &&
+		if (time_in_range(cred->cr_expire, expired, jiffies) &&
 		    test_bit(RPCAUTH_CRED_HASHED, &cred->cr_flags) != 0)
 			continue;
 
-- 
cgit v1.2.2


From e802af9cabb011f09b9c19a82faef3dd315f27eb Mon Sep 17 00:00:00 2001
From: Stephen Hemminger <shemminger@vyatta.com>
Date: Thu, 22 Apr 2010 15:24:53 -0700
Subject: IPv6: Generic TTL Security Mechanism (final version)

This patch adds IPv6 support for RFC5082 Generalized TTL Security Mechanism.

Not to users of mapped address; the IPV6 and IPV4 socket options are seperate.
The server does have to deal with both IPv4 and IPv6 socket options
and the client has to handle the different for each family.

On client:
	int ttl = 255;
	getaddrinfo(argv[1], argv[2], &hint, &result);

	for (rp = result; rp != NULL; rp = rp->ai_next) {
		s = socket(rp->ai_family, rp->ai_socktype, rp->ai_protocol);
		if (s < 0) continue;

		if (rp->ai_family == AF_INET) {
			setsockopt(s, IPPROTO_IP, IP_TTL, &ttl, sizeof(ttl));
		} else if (rp->ai_family == AF_INET6) {
			setsockopt(s, IPPROTO_IPV6,  IPV6_UNICAST_HOPS,
					&ttl, sizeof(ttl)))
		}

		if (connect(s, rp->ai_addr, rp->ai_addrlen) == 0) {
		   ...

On server:
	int minttl = 255 - maxhops;

	getaddrinfo(NULL, port, &hints, &result);
	for (rp = result; rp != NULL; rp = rp->ai_next) {
		s = socket(rp->ai_family, rp->ai_socktype, rp->ai_protocol);
		if (s < 0) continue;

		if (rp->ai_family == AF_INET6)
			setsockopt(s, IPPROTO_IPV6,  IPV6_MINHOPCOUNT,
					&minttl, sizeof(minttl));
		setsockopt(s, IPPROTO_IP, IP_MINTTL, &minttl, sizeof(minttl));

		if (bind(s, rp->ai_addr, rp->ai_addrlen) == 0)
			break
...

Signed-off-by: Stephen Hemminger <shemminger@vyatta.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/ipv6_sockglue.c | 12 ++++++++++++
 net/ipv6/tcp_ipv6.c      | 14 +++++++++++++-
 2 files changed, 25 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c
index 1160400e9dbd..92295ad3487a 100644
--- a/net/ipv6/ipv6_sockglue.c
+++ b/net/ipv6/ipv6_sockglue.c
@@ -767,6 +767,14 @@ pref_skip_coa:
 
 		break;
 	    }
+	case IPV6_MINHOPCOUNT:
+		if (optlen < sizeof(int))
+			goto e_inval;
+		if (val < 0 || val > 255)
+			goto e_inval;
+		np->min_hopcount = val;
+		retv = 0;
+		break;
 	}
 
 	release_sock(sk);
@@ -1116,6 +1124,10 @@ static int do_ipv6_getsockopt(struct sock *sk, int level, int optname,
 			val |= IPV6_PREFER_SRC_HOME;
 		break;
 
+	case IPV6_MINHOPCOUNT:
+		val = np->min_hopcount;
+		break;
+
 	default:
 		return -ENOPROTOOPT;
 	}
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 1ababbb41131..6603511e3673 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -353,6 +353,11 @@ static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
 	if (sk->sk_state == TCP_CLOSE)
 		goto out;
 
+	if (ipv6_hdr(skb)->hop_limit < inet6_sk(sk)->min_hopcount) {
+		NET_INC_STATS_BH(net, LINUX_MIB_TCPMINTTLDROP);
+		goto out;
+	}
+
 	tp = tcp_sk(sk);
 	seq = ntohl(th->seq);
 	if (sk->sk_state != TCP_LISTEN &&
@@ -1678,6 +1683,7 @@ ipv6_pktoptions:
 static int tcp_v6_rcv(struct sk_buff *skb)
 {
 	struct tcphdr *th;
+	struct ipv6hdr *hdr;
 	struct sock *sk;
 	int ret;
 	struct net *net = dev_net(skb->dev);
@@ -1704,12 +1710,13 @@ static int tcp_v6_rcv(struct sk_buff *skb)
 		goto bad_packet;
 
 	th = tcp_hdr(skb);
+	hdr = ipv6_hdr(skb);
 	TCP_SKB_CB(skb)->seq = ntohl(th->seq);
 	TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin +
 				    skb->len - th->doff*4);
 	TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq);
 	TCP_SKB_CB(skb)->when = 0;
-	TCP_SKB_CB(skb)->flags = ipv6_get_dsfield(ipv6_hdr(skb));
+	TCP_SKB_CB(skb)->flags = ipv6_get_dsfield(hdr);
 	TCP_SKB_CB(skb)->sacked = 0;
 
 	sk = __inet6_lookup_skb(&tcp_hashinfo, skb, th->source, th->dest);
@@ -1720,6 +1727,11 @@ process:
 	if (sk->sk_state == TCP_TIME_WAIT)
 		goto do_time_wait;
 
+	if (hdr->hop_limit < inet6_sk(sk)->min_hopcount) {
+		NET_INC_STATS_BH(net, LINUX_MIB_TCPMINTTLDROP);
+		goto discard_and_relse;
+	}
+
 	if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb))
 		goto discard_and_relse;
 
-- 
cgit v1.2.2


From 80032cffb95edff4fc216b1cb21682257be326b7 Mon Sep 17 00:00:00 2001
From: Dan Carpenter <error27@gmail.com>
Date: Wed, 21 Apr 2010 23:53:27 +0000
Subject: rtnetlink: potential ERR_PTR dereference

In the original code, if rtnl_create_link() returned an ERR_PTR then that
would get passed to rtnl_configure_link() which dereferences it.

Signed-off-by: Dan Carpenter <error27@gmail.com>
Acked-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/rtnetlink.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 4568120d8533..fe776c9ddeca 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -1270,10 +1270,11 @@ replay:
 			err = ops->newlink(net, dev, tb, data);
 		else
 			err = register_netdevice(dev);
-		if (err < 0 && !IS_ERR(dev)) {
+
+		if (err < 0 && !IS_ERR(dev))
 			free_netdev(dev);
+		if (err < 0)
 			goto out;
-		}
 
 		err = rtnl_configure_link(dev, ifm);
 		if (err < 0)
-- 
cgit v1.2.2


From 24acc6895616b373475e92e49925efc3ef591563 Mon Sep 17 00:00:00 2001
From: Dan Carpenter <error27@gmail.com>
Date: Wed, 21 Apr 2010 23:55:27 +0000
Subject: rdma: potential ERR_PTR dereference

In the original code, the "goto out" calls "rdma_destroy_id(cm_id);"
That isn't needed here and would cause problems because "cm_id" is an
ERR_PTR.  The new code just returns directly.

Signed-off-by: Dan Carpenter <error27@gmail.com>
Acked-by: Andy Grover <andy.grover@oracle.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/rds/rdma_transport.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/rds/rdma_transport.c b/net/rds/rdma_transport.c
index 9ece910ea394..7b155081b4dc 100644
--- a/net/rds/rdma_transport.c
+++ b/net/rds/rdma_transport.c
@@ -134,7 +134,7 @@ static int __init rds_rdma_listen_init(void)
 		ret = PTR_ERR(cm_id);
 		printk(KERN_ERR "RDS/RDMA: failed to setup listener, "
 		       "rdma_create_id() returned %d\n", ret);
-		goto out;
+		return ret;
 	}
 
 	sin.sin_family = AF_INET,
-- 
cgit v1.2.2


From aa2ea0586d9dbe56a334d835a43b45e8c2104e77 Mon Sep 17 00:00:00 2001
From: Tom Herbert <therbert@google.com>
Date: Thu, 22 Apr 2010 07:00:24 +0000
Subject: tcp: fix outsegs stat for TSO segments

Account for TSO segments of an skb in TCP_MIB_OUTSEGS counter.  Without
doing this, the counter can be off by orders of magnitude from the
actual number of segments sent.

Signed-off-by: Tom Herbert <therbert@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp_output.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 429ad9286efc..5db3a2c6cb33 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -888,7 +888,8 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,
 		tcp_event_data_sent(tp, skb, sk);
 
 	if (after(tcb->end_seq, tp->snd_nxt) || tcb->seq == tcb->end_seq)
-		TCP_INC_STATS(sock_net(sk), TCP_MIB_OUTSEGS);
+		TCP_ADD_STATS(sock_net(sk), TCP_MIB_OUTSEGS,
+			      tcp_skb_pcount(skb));
 
 	err = icsk->icsk_af_ops->queue_xmit(skb);
 	if (likely(err <= 0))
@@ -2503,7 +2504,7 @@ struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst,
 	th->window = htons(min(req->rcv_wnd, 65535U));
 	tcp_options_write((__be32 *)(th + 1), tp, &opts);
 	th->doff = (tcp_header_size >> 2);
-	TCP_INC_STATS(sock_net(sk), TCP_MIB_OUTSEGS);
+	TCP_ADD_STATS(sock_net(sk), TCP_MIB_OUTSEGS, tcp_skb_pcount(skb));
 
 #ifdef CONFIG_TCP_MD5SIG
 	/* Okay, we have all we need - do the md5 hash if needed */
-- 
cgit v1.2.2


From 40eaf96271526a9f71030dd1a199ce46c045752e Mon Sep 17 00:00:00 2001
From: Paul LeoNerd Evans <leonerd@leonerd.org.uk>
Date: Thu, 22 Apr 2010 03:32:22 +0000
Subject: net: Socket filter ancilliary data access for skb->dev->type

Add an SKF_AD_HATYPE field to the packet ancilliary data area, giving
access to skb->dev->type, as reported in the sll_hatype field.

When capturing packets on a PF_PACKET/SOCK_RAW socket bound to all
interfaces, there doesn't appear to be a way for the filter program to
actually find out the underlying hardware type the packet was captured
on. This patch adds such ability.

This patch also handles the case where skb->dev can be NULL, such as on
netlink sockets.

Signed-off-by: Paul Evans <leonerd@leonerd.org.uk>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/filter.c | 7 +++++++
 1 file changed, 7 insertions(+)

(limited to 'net')

diff --git a/net/core/filter.c b/net/core/filter.c
index ff943bed21af..da69fb728d32 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -302,6 +302,8 @@ load_b:
 			A = skb->pkt_type;
 			continue;
 		case SKF_AD_IFINDEX:
+			if (!skb->dev)
+				return 0;
 			A = skb->dev->ifindex;
 			continue;
 		case SKF_AD_MARK:
@@ -310,6 +312,11 @@ load_b:
 		case SKF_AD_QUEUE:
 			A = skb->queue_mapping;
 			continue;
+		case SKF_AD_HATYPE:
+			if (!skb->dev)
+				return 0;
+			A = skb->dev->type;
+			continue;
 		case SKF_AD_NLATTR: {
 			struct nlattr *nla;
 
-- 
cgit v1.2.2


From 5ebfbc06aae941484326c9e7e9c4d85330f63591 Mon Sep 17 00:00:00 2001
From: Andrew Hendry <andrew.hendry@gmail.com>
Date: Thu, 22 Apr 2010 16:12:36 -0700
Subject: X25: Add if_x25.h and x25 to device identifiers

V2 Feedback from John Hughes.
- Add header for userspace implementations such as xot/xoe to use
- Use explicit values for interface stability
- No changes to driver patches

V1
- Use identifiers instead of magic numbers for X25 layer 3 to device interface.
- Also fixed checkpatch notes on updated code.

[ Add new user header to include/linux/Kbuild  -DaveM ]

Signed-off-by: Andrew Hendry <andrew.hendry@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/x25/x25_dev.c | 36 ++++++++++++++++++++----------------
 1 file changed, 20 insertions(+), 16 deletions(-)

(limited to 'net')

diff --git a/net/x25/x25_dev.c b/net/x25/x25_dev.c
index b9ef682230a0..9005f6daeab5 100644
--- a/net/x25/x25_dev.c
+++ b/net/x25/x25_dev.c
@@ -24,6 +24,7 @@
 #include <net/sock.h>
 #include <linux/if_arp.h>
 #include <net/x25.h>
+#include <net/x25device.h>
 
 static int x25_receive_data(struct sk_buff *skb, struct x25_neigh *nb)
 {
@@ -115,19 +116,22 @@ int x25_lapb_receive_frame(struct sk_buff *skb, struct net_device *dev,
 	}
 
 	switch (skb->data[0]) {
-		case 0x00:
-			skb_pull(skb, 1);
-			if (x25_receive_data(skb, nb)) {
-				x25_neigh_put(nb);
-				goto out;
-			}
-			break;
-		case 0x01:
-			x25_link_established(nb);
-			break;
-		case 0x02:
-			x25_link_terminated(nb);
-			break;
+
+	case X25_IFACE_DATA:
+		skb_pull(skb, 1);
+		if (x25_receive_data(skb, nb)) {
+			x25_neigh_put(nb);
+			goto out;
+		}
+		break;
+
+	case X25_IFACE_CONNECT:
+		x25_link_established(nb);
+		break;
+
+	case X25_IFACE_DISCONNECT:
+		x25_link_terminated(nb);
+		break;
 	}
 	x25_neigh_put(nb);
 drop:
@@ -148,7 +152,7 @@ void x25_establish_link(struct x25_neigh *nb)
 				return;
 			}
 			ptr  = skb_put(skb, 1);
-			*ptr = 0x01;
+			*ptr = X25_IFACE_CONNECT;
 			break;
 
 #if defined(CONFIG_LLC) || defined(CONFIG_LLC_MODULE)
@@ -184,7 +188,7 @@ void x25_terminate_link(struct x25_neigh *nb)
 	}
 
 	ptr  = skb_put(skb, 1);
-	*ptr = 0x02;
+	*ptr = X25_IFACE_DISCONNECT;
 
 	skb->protocol = htons(ETH_P_X25);
 	skb->dev      = nb->dev;
@@ -200,7 +204,7 @@ void x25_send_frame(struct sk_buff *skb, struct x25_neigh *nb)
 	switch (nb->dev->type) {
 		case ARPHRD_X25:
 			dptr  = skb_push(skb, 1);
-			*dptr = 0x00;
+			*dptr = X25_IFACE_DATA;
 			break;
 
 #if defined(CONFIG_LLC) || defined(CONFIG_LLC_MODULE)
-- 
cgit v1.2.2


From fda48a0d7a8412cedacda46a9c0bf8ef9cd13559 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Wed, 21 Apr 2010 09:26:15 +0000
Subject: tcp: bind() fix when many ports are bound

Port autoselection done by kernel only works when number of bound
sockets is under a threshold (typically 30000).

When this threshold is over, we must check if there is a conflict before
exiting first loop in inet_csk_get_port()

Change inet_csk_bind_conflict() to forbid two reuse-enabled sockets to
bind on same (address,port) tuple (with a non ANY address)

Same change for inet6_csk_bind_conflict()

Reported-by: Gaspar Chilingarov <gasparch@gmail.com>
Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Acked-by: Evgeniy Polyakov <zbr@ioremap.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/inet_connection_sock.c  | 16 +++++++++++-----
 net/ipv6/inet6_connection_sock.c | 15 ++++++++++-----
 2 files changed, 21 insertions(+), 10 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
index 8da6429269dd..14825eb09770 100644
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -70,13 +70,17 @@ int inet_csk_bind_conflict(const struct sock *sk,
 		    (!sk->sk_bound_dev_if ||
 		     !sk2->sk_bound_dev_if ||
 		     sk->sk_bound_dev_if == sk2->sk_bound_dev_if)) {
+			const __be32 sk2_rcv_saddr = inet_rcv_saddr(sk2);
+
 			if (!reuse || !sk2->sk_reuse ||
 			    sk2->sk_state == TCP_LISTEN) {
-				const __be32 sk2_rcv_saddr = inet_rcv_saddr(sk2);
 				if (!sk2_rcv_saddr || !sk_rcv_saddr ||
 				    sk2_rcv_saddr == sk_rcv_saddr)
 					break;
-			}
+			} else if (reuse && sk2->sk_reuse &&
+				   sk2_rcv_saddr &&
+				   sk2_rcv_saddr == sk_rcv_saddr)
+				break;
 		}
 	}
 	return node != NULL;
@@ -120,9 +124,11 @@ again:
 						smallest_size = tb->num_owners;
 						smallest_rover = rover;
 						if (atomic_read(&hashinfo->bsockets) > (high - low) + 1) {
-							spin_unlock(&head->lock);
-							snum = smallest_rover;
-							goto have_snum;
+							if (!inet_csk(sk)->icsk_af_ops->bind_conflict(sk, tb)) {
+								spin_unlock(&head->lock);
+								snum = smallest_rover;
+								goto have_snum;
+							}
 						}
 					}
 					goto next;
diff --git a/net/ipv6/inet6_connection_sock.c b/net/ipv6/inet6_connection_sock.c
index 628db24bcf22..b4b7d40a9c95 100644
--- a/net/ipv6/inet6_connection_sock.c
+++ b/net/ipv6/inet6_connection_sock.c
@@ -42,11 +42,16 @@ int inet6_csk_bind_conflict(const struct sock *sk,
 		if (sk != sk2 &&
 		    (!sk->sk_bound_dev_if ||
 		     !sk2->sk_bound_dev_if ||
-		     sk->sk_bound_dev_if == sk2->sk_bound_dev_if) &&
-		    (!sk->sk_reuse || !sk2->sk_reuse ||
-		     sk2->sk_state == TCP_LISTEN) &&
-		     ipv6_rcv_saddr_equal(sk, sk2))
-			break;
+		     sk->sk_bound_dev_if == sk2->sk_bound_dev_if)) {
+			if ((!sk->sk_reuse || !sk2->sk_reuse ||
+			     sk2->sk_state == TCP_LISTEN) &&
+			     ipv6_rcv_saddr_equal(sk, sk2))
+				break;
+			else if (sk->sk_reuse && sk2->sk_reuse &&
+				!ipv6_addr_any(inet6_rcv_saddr(sk2)) &&
+				ipv6_rcv_saddr_equal(sk, sk2))
+				break;
+		}
 	}
 
 	return node != NULL;
-- 
cgit v1.2.2


From 6e7cb8370760ec17e10098399822292def8d84f3 Mon Sep 17 00:00:00 2001
From: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
Date: Sun, 18 Apr 2010 12:42:05 +0900
Subject: ipv6 mcast: Introduce include/net/mld.h for MLD definitions.

Signed-off-by: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
---
 net/ipv6/mcast.c | 135 +++++++++++++++++--------------------------------------
 1 file changed, 40 insertions(+), 95 deletions(-)

(limited to 'net')

diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c
index 62ed08213d91..006aee683a0f 100644
--- a/net/ipv6/mcast.c
+++ b/net/ipv6/mcast.c
@@ -44,6 +44,7 @@
 #include <linux/proc_fs.h>
 #include <linux/seq_file.h>
 #include <linux/slab.h>
+#include <net/mld.h>
 
 #include <linux/netfilter.h>
 #include <linux/netfilter_ipv6.h>
@@ -71,54 +72,11 @@
 #define MDBG(x)
 #endif
 
-/*
- *  These header formats should be in a separate include file, but icmpv6.h
- *  doesn't have in6_addr defined in all cases, there is no __u128, and no
- *  other files reference these.
- *
- *  			+-DLS 4/14/03
- */
-
-/* Multicast Listener Discovery version 2 headers */
-
-struct mld2_grec {
-	__u8		grec_type;
-	__u8		grec_auxwords;
-	__be16		grec_nsrcs;
-	struct in6_addr	grec_mca;
-	struct in6_addr	grec_src[0];
-};
-
-struct mld2_report {
-	__u8	type;
-	__u8	resv1;
-	__sum16	csum;
-	__be16	resv2;
-	__be16	ngrec;
-	struct mld2_grec grec[0];
-};
-
-struct mld2_query {
-	__u8 type;
-	__u8 code;
-	__sum16 csum;
-	__be16 mrc;
-	__be16 resv1;
-	struct in6_addr mca;
-#if defined(__LITTLE_ENDIAN_BITFIELD)
-	__u8 qrv:3,
-	     suppress:1,
-	     resv2:4;
-#elif defined(__BIG_ENDIAN_BITFIELD)
-	__u8 resv2:4,
-	     suppress:1,
-	     qrv:3;
-#else
-#error "Please fix <asm/byteorder.h>"
-#endif
-	__u8 qqic;
-	__be16 nsrcs;
-	struct in6_addr srcs[0];
+/* Ensure that we have struct in6_addr aligned on 32bit word. */
+static void *__mld2_query_bugs[] __attribute__((__unused__)) = {
+	BUILD_BUG_ON_NULL(offsetof(struct mld2_query, mld2q_srcs) % 4),
+	BUILD_BUG_ON_NULL(offsetof(struct mld2_report, mld2r_grec) % 4),
+	BUILD_BUG_ON_NULL(offsetof(struct mld2_grec, grec_mca) % 4)
 };
 
 static struct in6_addr mld2_all_mcr = MLD2_ALL_MCR_INIT;
@@ -157,14 +115,6 @@ static int ip6_mc_leave_src(struct sock *sk, struct ipv6_mc_socklist *iml,
 		((idev)->mc_v1_seen && \
 		time_before(jiffies, (idev)->mc_v1_seen)))
 
-#define MLDV2_MASK(value, nb) ((nb)>=32 ? (value) : ((1<<(nb))-1) & (value))
-#define MLDV2_EXP(thresh, nbmant, nbexp, value) \
-	((value) < (thresh) ? (value) : \
-	((MLDV2_MASK(value, nbmant) | (1<<(nbmant))) << \
-	(MLDV2_MASK((value) >> (nbmant), nbexp) + (nbexp))))
-
-#define MLDV2_MRC(value) MLDV2_EXP(0x8000, 12, 3, value)
-
 #define IPV6_MLD_MAX_MSF	64
 
 int sysctl_mld_max_msf __read_mostly = IPV6_MLD_MAX_MSF;
@@ -1161,7 +1111,7 @@ int igmp6_event_query(struct sk_buff *skb)
 	struct in6_addr *group;
 	unsigned long max_delay;
 	struct inet6_dev *idev;
-	struct icmp6hdr *hdr;
+	struct mld_msg *mld;
 	int group_type;
 	int mark = 0;
 	int len;
@@ -1182,8 +1132,8 @@ int igmp6_event_query(struct sk_buff *skb)
 	if (idev == NULL)
 		return 0;
 
-	hdr = icmp6_hdr(skb);
-	group = (struct in6_addr *) (hdr + 1);
+	mld = (struct mld_msg *)icmp6_hdr(skb);
+	group = &mld->mld_mca;
 	group_type = ipv6_addr_type(group);
 
 	if (group_type != IPV6_ADDR_ANY &&
@@ -1197,7 +1147,7 @@ int igmp6_event_query(struct sk_buff *skb)
 		/* MLDv1 router present */
 
 		/* Translate milliseconds to jiffies */
-		max_delay = (ntohs(hdr->icmp6_maxdelay)*HZ)/1000;
+		max_delay = (ntohs(mld->mld_maxdelay)*HZ)/1000;
 
 		switchback = (idev->mc_qrv + 1) * max_delay;
 		idev->mc_v1_seen = jiffies + switchback;
@@ -1216,14 +1166,14 @@ int igmp6_event_query(struct sk_buff *skb)
 			return -EINVAL;
 		}
 		mlh2 = (struct mld2_query *)skb_transport_header(skb);
-		max_delay = (MLDV2_MRC(ntohs(mlh2->mrc))*HZ)/1000;
+		max_delay = (MLDV2_MRC(ntohs(mlh2->mld2q_mrc))*HZ)/1000;
 		if (!max_delay)
 			max_delay = 1;
 		idev->mc_maxdelay = max_delay;
-		if (mlh2->qrv)
-			idev->mc_qrv = mlh2->qrv;
+		if (mlh2->mld2q_qrv)
+			idev->mc_qrv = mlh2->mld2q_qrv;
 		if (group_type == IPV6_ADDR_ANY) { /* general query */
-			if (mlh2->nsrcs) {
+			if (mlh2->mld2q_nsrcs) {
 				in6_dev_put(idev);
 				return -EINVAL; /* no sources allowed */
 			}
@@ -1232,9 +1182,9 @@ int igmp6_event_query(struct sk_buff *skb)
 			return 0;
 		}
 		/* mark sources to include, if group & source-specific */
-		if (mlh2->nsrcs != 0) {
+		if (mlh2->mld2q_nsrcs != 0) {
 			if (!pskb_may_pull(skb, srcs_offset +
-			    ntohs(mlh2->nsrcs) * sizeof(struct in6_addr))) {
+			    ntohs(mlh2->mld2q_nsrcs) * sizeof(struct in6_addr))) {
 				in6_dev_put(idev);
 				return -EINVAL;
 			}
@@ -1270,7 +1220,7 @@ int igmp6_event_query(struct sk_buff *skb)
 					ma->mca_flags &= ~MAF_GSQUERY;
 			}
 			if (!(ma->mca_flags & MAF_GSQUERY) ||
-			    mld_marksources(ma, ntohs(mlh2->nsrcs), mlh2->srcs))
+			    mld_marksources(ma, ntohs(mlh2->mld2q_nsrcs), mlh2->mld2q_srcs))
 				igmp6_group_queried(ma, max_delay);
 			spin_unlock_bh(&ma->mca_lock);
 			break;
@@ -1286,9 +1236,8 @@ int igmp6_event_query(struct sk_buff *skb)
 int igmp6_event_report(struct sk_buff *skb)
 {
 	struct ifmcaddr6 *ma;
-	struct in6_addr *addrp;
 	struct inet6_dev *idev;
-	struct icmp6hdr *hdr;
+	struct mld_msg *mld;
 	int addr_type;
 
 	/* Our own report looped back. Ignore it. */
@@ -1300,10 +1249,10 @@ int igmp6_event_report(struct sk_buff *skb)
 	    skb->pkt_type != PACKET_BROADCAST)
 		return 0;
 
-	if (!pskb_may_pull(skb, sizeof(struct in6_addr)))
+	if (!pskb_may_pull(skb, sizeof(*mld) - sizeof(struct icmp6hdr)))
 		return -EINVAL;
 
-	hdr = icmp6_hdr(skb);
+	mld = (struct mld_msg *)icmp6_hdr(skb);
 
 	/* Drop reports with not link local source */
 	addr_type = ipv6_addr_type(&ipv6_hdr(skb)->saddr);
@@ -1311,8 +1260,6 @@ int igmp6_event_report(struct sk_buff *skb)
 	    !(addr_type&IPV6_ADDR_LINKLOCAL))
 		return -EINVAL;
 
-	addrp = (struct in6_addr *) (hdr + 1);
-
 	idev = in6_dev_get(skb->dev);
 	if (idev == NULL)
 		return -ENODEV;
@@ -1323,7 +1270,7 @@ int igmp6_event_report(struct sk_buff *skb)
 
 	read_lock_bh(&idev->lock);
 	for (ma = idev->mc_list; ma; ma=ma->next) {
-		if (ipv6_addr_equal(&ma->mca_addr, addrp)) {
+		if (ipv6_addr_equal(&ma->mca_addr, &mld->mld_mca)) {
 			spin_lock(&ma->mca_lock);
 			if (del_timer(&ma->mca_timer))
 				atomic_dec(&ma->mca_refcnt);
@@ -1432,11 +1379,11 @@ static struct sk_buff *mld_newpack(struct net_device *dev, int size)
 	skb_set_transport_header(skb, skb_tail_pointer(skb) - skb->data);
 	skb_put(skb, sizeof(*pmr));
 	pmr = (struct mld2_report *)skb_transport_header(skb);
-	pmr->type = ICMPV6_MLD2_REPORT;
-	pmr->resv1 = 0;
-	pmr->csum = 0;
-	pmr->resv2 = 0;
-	pmr->ngrec = 0;
+	pmr->mld2r_type = ICMPV6_MLD2_REPORT;
+	pmr->mld2r_resv1 = 0;
+	pmr->mld2r_cksum = 0;
+	pmr->mld2r_resv2 = 0;
+	pmr->mld2r_ngrec = 0;
 	return skb;
 }
 
@@ -1458,9 +1405,10 @@ static void mld_sendpack(struct sk_buff *skb)
 	mldlen = skb->tail - skb->transport_header;
 	pip6->payload_len = htons(payload_len);
 
-	pmr->csum = csum_ipv6_magic(&pip6->saddr, &pip6->daddr, mldlen,
-		IPPROTO_ICMPV6, csum_partial(skb_transport_header(skb),
-					     mldlen, 0));
+	pmr->mld2r_cksum = csum_ipv6_magic(&pip6->saddr, &pip6->daddr, mldlen,
+					   IPPROTO_ICMPV6,
+					   csum_partial(skb_transport_header(skb),
+							mldlen, 0));
 
 	dst = icmp6_dst_alloc(skb->dev, NULL, &ipv6_hdr(skb)->daddr);
 
@@ -1521,7 +1469,7 @@ static struct sk_buff *add_grhead(struct sk_buff *skb, struct ifmcaddr6 *pmc,
 	pgr->grec_nsrcs = 0;
 	pgr->grec_mca = pmc->mca_addr;	/* structure copy */
 	pmr = (struct mld2_report *)skb_transport_header(skb);
-	pmr->ngrec = htons(ntohs(pmr->ngrec)+1);
+	pmr->mld2r_ngrec = htons(ntohs(pmr->mld2r_ngrec)+1);
 	*ppgr = pgr;
 	return skb;
 }
@@ -1557,7 +1505,7 @@ static struct sk_buff *add_grec(struct sk_buff *skb, struct ifmcaddr6 *pmc,
 
 	/* EX and TO_EX get a fresh packet, if needed */
 	if (truncate) {
-		if (pmr && pmr->ngrec &&
+		if (pmr && pmr->mld2r_ngrec &&
 		    AVAILABLE(skb) < grec_size(pmc, type, gdeleted, sdeleted)) {
 			if (skb)
 				mld_sendpack(skb);
@@ -1770,9 +1718,8 @@ static void igmp6_send(struct in6_addr *addr, struct net_device *dev, int type)
 	struct sock *sk = net->ipv6.igmp_sk;
 	struct inet6_dev *idev;
 	struct sk_buff *skb;
-	struct icmp6hdr *hdr;
+	struct mld_msg *hdr;
 	const struct in6_addr *snd_addr, *saddr;
-	struct in6_addr *addrp;
 	struct in6_addr addr_buf;
 	int err, len, payload_len, full_len;
 	u8 ra[8] = { IPPROTO_ICMPV6, 0,
@@ -1820,16 +1767,14 @@ static void igmp6_send(struct in6_addr *addr, struct net_device *dev, int type)
 
 	memcpy(skb_put(skb, sizeof(ra)), ra, sizeof(ra));
 
-	hdr = (struct icmp6hdr *) skb_put(skb, sizeof(struct icmp6hdr));
-	memset(hdr, 0, sizeof(struct icmp6hdr));
-	hdr->icmp6_type = type;
+	hdr = (struct mld_msg *) skb_put(skb, sizeof(struct mld_msg));
+	memset(hdr, 0, sizeof(struct mld_msg));
+	hdr->mld_type = type;
+	ipv6_addr_copy(&hdr->mld_mca, addr);
 
-	addrp = (struct in6_addr *) skb_put(skb, sizeof(struct in6_addr));
-	ipv6_addr_copy(addrp, addr);
-
-	hdr->icmp6_cksum = csum_ipv6_magic(saddr, snd_addr, len,
-					   IPPROTO_ICMPV6,
-					   csum_partial(hdr, len, 0));
+	hdr->mld_cksum = csum_ipv6_magic(saddr, snd_addr, len,
+					 IPPROTO_ICMPV6,
+					 csum_partial(hdr, len, 0));
 
 	idev = in6_dev_get(skb->dev);
 
-- 
cgit v1.2.2


From 8ef2a9a59854994bace13b5c4f7edc2c8d4d124e Mon Sep 17 00:00:00 2001
From: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
Date: Sun, 18 Apr 2010 12:42:07 +0900
Subject: bridge br_multicast: Make functions less ipv4 dependent.

Introduce struct br_ip{} to store ip address and protocol
and make functions more generic so that we can support
both IPv4 and IPv6 with less pain.

Signed-off-by: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
---
 net/bridge/br_multicast.c | 197 +++++++++++++++++++++++++++++++++-------------
 net/bridge/br_private.h   |  12 ++-
 2 files changed, 151 insertions(+), 58 deletions(-)

(limited to 'net')

diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c
index 81bfdfe14ce5..64a3e4f74348 100644
--- a/net/bridge/br_multicast.c
+++ b/net/bridge/br_multicast.c
@@ -27,48 +27,86 @@
 
 #include "br_private.h"
 
-static inline int br_ip_hash(struct net_bridge_mdb_htable *mdb, __be32 ip)
+static inline int br_ip_equal(const struct br_ip *a, const struct br_ip *b)
+{
+	if (a->proto != b->proto)
+		return 0;
+	switch (a->proto) {
+	case htons(ETH_P_IP):
+		return a->u.ip4 == b->u.ip4;
+	}
+	return 0;
+}
+
+static inline int __br_ip4_hash(struct net_bridge_mdb_htable *mdb, __be32 ip)
 {
 	return jhash_1word(mdb->secret, (__force u32)ip) & (mdb->max - 1);
 }
 
+static inline int br_ip_hash(struct net_bridge_mdb_htable *mdb,
+			     struct br_ip *ip)
+{
+	switch (ip->proto) {
+	case htons(ETH_P_IP):
+		return __br_ip4_hash(mdb, ip->u.ip4);
+	}
+	return 0;
+}
+
 static struct net_bridge_mdb_entry *__br_mdb_ip_get(
-	struct net_bridge_mdb_htable *mdb, __be32 dst, int hash)
+	struct net_bridge_mdb_htable *mdb, struct br_ip *dst, int hash)
 {
 	struct net_bridge_mdb_entry *mp;
 	struct hlist_node *p;
 
 	hlist_for_each_entry_rcu(mp, p, &mdb->mhash[hash], hlist[mdb->ver]) {
-		if (dst == mp->addr)
+		if (br_ip_equal(&mp->addr, dst))
 			return mp;
 	}
 
 	return NULL;
 }
 
-static struct net_bridge_mdb_entry *br_mdb_ip_get(
+static struct net_bridge_mdb_entry *br_mdb_ip4_get(
 	struct net_bridge_mdb_htable *mdb, __be32 dst)
 {
-	if (!mdb)
-		return NULL;
+	struct br_ip br_dst;
+
+	br_dst.u.ip4 = dst;
+	br_dst.proto = htons(ETH_P_IP);
 
+	return __br_mdb_ip_get(mdb, &br_dst, __br_ip4_hash(mdb, dst));
+}
+
+static struct net_bridge_mdb_entry *br_mdb_ip_get(
+	struct net_bridge_mdb_htable *mdb, struct br_ip *dst)
+{
 	return __br_mdb_ip_get(mdb, dst, br_ip_hash(mdb, dst));
 }
 
 struct net_bridge_mdb_entry *br_mdb_get(struct net_bridge *br,
 					struct sk_buff *skb)
 {
-	if (br->multicast_disabled)
+	struct net_bridge_mdb_htable *mdb = br->mdb;
+	struct br_ip ip;
+
+	if (!mdb || br->multicast_disabled)
+		return NULL;
+
+	if (BR_INPUT_SKB_CB(skb)->igmp)
 		return NULL;
 
+	ip.proto = skb->protocol;
+
 	switch (skb->protocol) {
 	case htons(ETH_P_IP):
-		if (BR_INPUT_SKB_CB(skb)->igmp)
-			break;
-		return br_mdb_ip_get(br->mdb, ip_hdr(skb)->daddr);
+		ip.u.ip4 = ip_hdr(skb)->daddr;
+		break;
+	default:
+		return NULL;
 	}
 
-	return NULL;
+	return br_mdb_ip_get(mdb, &ip);
 }
 
 static void br_mdb_free(struct rcu_head *head)
@@ -95,7 +133,7 @@ static int br_mdb_copy(struct net_bridge_mdb_htable *new,
 	for (i = 0; i < old->max; i++)
 		hlist_for_each_entry(mp, p, &old->mhash[i], hlist[old->ver])
 			hlist_add_head(&mp->hlist[new->ver],
-				       &new->mhash[br_ip_hash(new, mp->addr)]);
+				       &new->mhash[br_ip_hash(new, &mp->addr)]);
 
 	if (!elasticity)
 		return 0;
@@ -163,7 +201,7 @@ static void br_multicast_del_pg(struct net_bridge *br,
 	struct net_bridge_port_group *p;
 	struct net_bridge_port_group **pp;
 
-	mp = br_mdb_ip_get(mdb, pg->addr);
+	mp = br_mdb_ip_get(mdb, &pg->addr);
 	if (WARN_ON(!mp))
 		return;
 
@@ -249,8 +287,8 @@ out:
 	return 0;
 }
 
-static struct sk_buff *br_multicast_alloc_query(struct net_bridge *br,
-						__be32 group)
+static struct sk_buff *br_ip4_multicast_alloc_query(struct net_bridge *br,
+						    __be32 group)
 {
 	struct sk_buff *skb;
 	struct igmphdr *ih;
@@ -314,12 +352,22 @@ out:
 	return skb;
 }
 
+static struct sk_buff *br_multicast_alloc_query(struct net_bridge *br,
+						struct br_ip *addr)
+{
+	switch (addr->proto) {
+	case htons(ETH_P_IP):
+		return br_ip4_multicast_alloc_query(br, addr->u.ip4);
+	}
+	return NULL;
+}
+
 static void br_multicast_send_group_query(struct net_bridge_mdb_entry *mp)
 {
 	struct net_bridge *br = mp->br;
 	struct sk_buff *skb;
 
-	skb = br_multicast_alloc_query(br, mp->addr);
+	skb = br_multicast_alloc_query(br, &mp->addr);
 	if (!skb)
 		goto timer;
 
@@ -353,7 +401,7 @@ static void br_multicast_send_port_group_query(struct net_bridge_port_group *pg)
 	struct net_bridge *br = port->br;
 	struct sk_buff *skb;
 
-	skb = br_multicast_alloc_query(br, pg->addr);
+	skb = br_multicast_alloc_query(br, &pg->addr);
 	if (!skb)
 		goto timer;
 
@@ -383,8 +431,8 @@ out:
 }
 
 static struct net_bridge_mdb_entry *br_multicast_get_group(
-	struct net_bridge *br, struct net_bridge_port *port, __be32 group,
-	int hash)
+	struct net_bridge *br, struct net_bridge_port *port,
+	struct br_ip *group, int hash)
 {
 	struct net_bridge_mdb_htable *mdb = br->mdb;
 	struct net_bridge_mdb_entry *mp;
@@ -396,9 +444,8 @@ static struct net_bridge_mdb_entry *br_multicast_get_group(
 
 	hlist_for_each_entry(mp, p, &mdb->mhash[hash], hlist[mdb->ver]) {
 		count++;
-		if (unlikely(group == mp->addr)) {
+		if (unlikely(br_ip_equal(group, &mp->addr)))
 			return mp;
-		}
 	}
 
 	elasticity = 0;
@@ -463,7 +510,8 @@ err:
 }
 
 static struct net_bridge_mdb_entry *br_multicast_new_group(
-	struct net_bridge *br, struct net_bridge_port *port, __be32 group)
+	struct net_bridge *br, struct net_bridge_port *port,
+	struct br_ip *group)
 {
 	struct net_bridge_mdb_htable *mdb = br->mdb;
 	struct net_bridge_mdb_entry *mp;
@@ -496,7 +544,7 @@ rehash:
 		goto out;
 
 	mp->br = br;
-	mp->addr = group;
+	mp->addr = *group;
 	setup_timer(&mp->timer, br_multicast_group_expired,
 		    (unsigned long)mp);
 	setup_timer(&mp->query_timer, br_multicast_group_query_expired,
@@ -510,7 +558,8 @@ out:
 }
 
 static int br_multicast_add_group(struct net_bridge *br,
-				  struct net_bridge_port *port, __be32 group)
+				  struct net_bridge_port *port,
+				  struct br_ip *group)
 {
 	struct net_bridge_mdb_entry *mp;
 	struct net_bridge_port_group *p;
@@ -518,9 +567,6 @@ static int br_multicast_add_group(struct net_bridge *br,
 	unsigned long now = jiffies;
 	int err;
 
-	if (ipv4_is_local_multicast(group))
-		return 0;
-
 	spin_lock(&br->multicast_lock);
 	if (!netif_running(br->dev) ||
 	    (port && port->state == BR_STATE_DISABLED))
@@ -549,7 +595,7 @@ static int br_multicast_add_group(struct net_bridge *br,
 	if (unlikely(!p))
 		goto err;
 
-	p->addr = group;
+	p->addr = *group;
 	p->port = port;
 	p->next = *pp;
 	hlist_add_head(&p->mglist, &port->mglist);
@@ -570,6 +616,21 @@ err:
 	return err;
 }
 
+static int br_ip4_multicast_add_group(struct net_bridge *br,
+				      struct net_bridge_port *port,
+				      __be32 group)
+{
+	struct br_ip br_group;
+
+	if (ipv4_is_local_multicast(group))
+		return 0;
+
+	br_group.u.ip4 = group;
+	br_group.proto = htons(ETH_P_IP);
+
+	return br_multicast_add_group(br, port, &br_group);
+}
+
 static void br_multicast_router_expired(unsigned long data)
 {
 	struct net_bridge_port *port = (void *)data;
@@ -591,19 +652,15 @@ static void br_multicast_local_router_expired(unsigned long data)
 {
 }
 
-static void br_multicast_send_query(struct net_bridge *br,
-				    struct net_bridge_port *port, u32 sent)
+static void __br_multicast_send_query(struct net_bridge *br,
+				      struct net_bridge_port *port,
+				      struct br_ip *ip)
 {
-	unsigned long time;
 	struct sk_buff *skb;
 
-	if (!netif_running(br->dev) || br->multicast_disabled ||
-	    timer_pending(&br->multicast_querier_timer))
-		return;
-
-	skb = br_multicast_alloc_query(br, 0);
+	skb = br_multicast_alloc_query(br, ip);
 	if (!skb)
-		goto timer;
+		return;
 
 	if (port) {
 		__skb_push(skb, sizeof(struct ethhdr));
@@ -612,8 +669,23 @@ static void br_multicast_send_query(struct net_bridge *br,
 			dev_queue_xmit);
 	} else
 		netif_rx(skb);
+}
+
+static void br_multicast_send_query(struct net_bridge *br,
+				    struct net_bridge_port *port, u32 sent)
+{
+	unsigned long time;
+	struct br_ip br_group;
+
+	if (!netif_running(br->dev) || br->multicast_disabled ||
+	    timer_pending(&br->multicast_querier_timer))
+		return;
+
+	br_group.u.ip4 = 0;
+	br_group.proto = htons(ETH_P_IP);
+
+	__br_multicast_send_query(br, port, &br_group);
 
-timer:
 	time = jiffies;
 	time += sent < br->multicast_startup_query_count ?
 		br->multicast_startup_query_interval :
@@ -698,9 +770,9 @@ void br_multicast_disable_port(struct net_bridge_port *port)
 	spin_unlock(&br->multicast_lock);
 }
 
-static int br_multicast_igmp3_report(struct net_bridge *br,
-				     struct net_bridge_port *port,
-				     struct sk_buff *skb)
+static int br_ip4_multicast_igmp3_report(struct net_bridge *br,
+					 struct net_bridge_port *port,
+					 struct sk_buff *skb)
 {
 	struct igmpv3_report *ih;
 	struct igmpv3_grec *grec;
@@ -745,7 +817,7 @@ static int br_multicast_igmp3_report(struct net_bridge *br,
 			continue;
 		}
 
-		err = br_multicast_add_group(br, port, group);
+		err = br_ip4_multicast_add_group(br, port, group);
 		if (err)
 			break;
 	}
@@ -800,7 +872,7 @@ timer:
 
 static void br_multicast_query_received(struct net_bridge *br,
 					struct net_bridge_port *port,
-					__be32 saddr)
+					int saddr)
 {
 	if (saddr)
 		mod_timer(&br->multicast_querier_timer,
@@ -811,9 +883,9 @@ static void br_multicast_query_received(struct net_bridge *br,
 	br_multicast_mark_router(br, port);
 }
 
-static int br_multicast_query(struct net_bridge *br,
-			      struct net_bridge_port *port,
-			      struct sk_buff *skb)
+static int br_ip4_multicast_query(struct net_bridge *br,
+				  struct net_bridge_port *port,
+				  struct sk_buff *skb)
 {
 	struct iphdr *iph = ip_hdr(skb);
 	struct igmphdr *ih = igmp_hdr(skb);
@@ -831,7 +903,7 @@ static int br_multicast_query(struct net_bridge *br,
 	    (port && port->state == BR_STATE_DISABLED))
 		goto out;
 
-	br_multicast_query_received(br, port, iph->saddr);
+	br_multicast_query_received(br, port, !!iph->saddr);
 
 	group = ih->group;
 
@@ -859,7 +931,7 @@ static int br_multicast_query(struct net_bridge *br,
 	if (!group)
 		goto out;
 
-	mp = br_mdb_ip_get(br->mdb, group);
+	mp = br_mdb_ip4_get(br->mdb, group);
 	if (!mp)
 		goto out;
 
@@ -885,7 +957,7 @@ out:
 
 static void br_multicast_leave_group(struct net_bridge *br,
 				     struct net_bridge_port *port,
-				     __be32 group)
+				     struct br_ip *group)
 {
 	struct net_bridge_mdb_htable *mdb;
 	struct net_bridge_mdb_entry *mp;
@@ -893,9 +965,6 @@ static void br_multicast_leave_group(struct net_bridge *br,
 	unsigned long now;
 	unsigned long time;
 
-	if (ipv4_is_local_multicast(group))
-		return;
-
 	spin_lock(&br->multicast_lock);
 	if (!netif_running(br->dev) ||
 	    (port && port->state == BR_STATE_DISABLED) ||
@@ -946,6 +1015,22 @@ out:
 	spin_unlock(&br->multicast_lock);
 }
 
+static void br_ip4_multicast_leave_group(struct net_bridge *br,
+					 struct net_bridge_port *port,
+					 __be32 group)
+{
+	struct br_ip br_group;
+
+	if (ipv4_is_local_multicast(group))
+		return;
+
+	br_group.u.ip4 = group;
+	br_group.proto = htons(ETH_P_IP);
+
+	br_multicast_leave_group(br, port, &br_group);
+}
+
+
 static int br_multicast_ipv4_rcv(struct net_bridge *br,
 				 struct net_bridge_port *port,
 				 struct sk_buff *skb)
@@ -1023,16 +1108,16 @@ static int br_multicast_ipv4_rcv(struct net_bridge *br,
 	case IGMP_HOST_MEMBERSHIP_REPORT:
 	case IGMPV2_HOST_MEMBERSHIP_REPORT:
 		BR_INPUT_SKB_CB(skb2)->mrouters_only = 1;
-		err = br_multicast_add_group(br, port, ih->group);
+		err = br_ip4_multicast_add_group(br, port, ih->group);
 		break;
 	case IGMPV3_HOST_MEMBERSHIP_REPORT:
-		err = br_multicast_igmp3_report(br, port, skb2);
+		err = br_ip4_multicast_igmp3_report(br, port, skb2);
 		break;
 	case IGMP_HOST_MEMBERSHIP_QUERY:
-		err = br_multicast_query(br, port, skb2);
+		err = br_ip4_multicast_query(br, port, skb2);
 		break;
 	case IGMP_HOST_LEAVE_MESSAGE:
-		br_multicast_leave_group(br, port, ih->group);
+		br_ip4_multicast_leave_group(br, port, ih->group);
 		break;
 	}
 
diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h
index 63181e4a2a67..45d11e49fbbb 100644
--- a/net/bridge/br_private.h
+++ b/net/bridge/br_private.h
@@ -45,6 +45,14 @@ struct mac_addr
 	unsigned char	addr[6];
 };
 
+struct br_ip
+{
+	union {
+		__be32	ip4;
+	} u;
+	__be16		proto;
+};
+
 struct net_bridge_fdb_entry
 {
 	struct hlist_node		hlist;
@@ -64,7 +72,7 @@ struct net_bridge_port_group {
 	struct rcu_head			rcu;
 	struct timer_list		timer;
 	struct timer_list		query_timer;
-	__be32				addr;
+	struct br_ip			addr;
 	u32				queries_sent;
 };
 
@@ -77,7 +85,7 @@ struct net_bridge_mdb_entry
 	struct rcu_head			rcu;
 	struct timer_list		timer;
 	struct timer_list		query_timer;
-	__be32				addr;
+	struct br_ip			addr;
 	u32				queries_sent;
 };
 
-- 
cgit v1.2.2


From 08b202b6726459626c73ecfa08fcdc8c3efc76c2 Mon Sep 17 00:00:00 2001
From: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
Date: Fri, 23 Apr 2010 01:54:22 +0900
Subject: bridge br_multicast: IPv6 MLD support.

Signed-off-by: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
---
 net/bridge/Kconfig        |   6 +-
 net/bridge/br_multicast.c | 424 +++++++++++++++++++++++++++++++++++++++++++++-
 net/bridge/br_private.h   |   3 +
 3 files changed, 429 insertions(+), 4 deletions(-)

(limited to 'net')

diff --git a/net/bridge/Kconfig b/net/bridge/Kconfig
index d115d5cea5b6..9190ae462cb4 100644
--- a/net/bridge/Kconfig
+++ b/net/bridge/Kconfig
@@ -33,14 +33,14 @@ config BRIDGE
 	  If unsure, say N.
 
 config BRIDGE_IGMP_SNOOPING
-	bool "IGMP snooping"
+	bool "IGMP/MLD snooping"
 	depends on BRIDGE
 	depends on INET
 	default y
 	---help---
 	  If you say Y here, then the Ethernet bridge will be able selectively
-	  forward multicast traffic based on IGMP traffic received from each
-	  port.
+	  forward multicast traffic based on IGMP/MLD traffic received from
+	  each port.
 
 	  Say N to exclude this support and reduce the binary size.
 
diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c
index 64a3e4f74348..38d1fbde5fb8 100644
--- a/net/bridge/br_multicast.c
+++ b/net/bridge/br_multicast.c
@@ -24,9 +24,24 @@
 #include <linux/slab.h>
 #include <linux/timer.h>
 #include <net/ip.h>
+#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+#include <net/ipv6.h>
+#include <net/mld.h>
+#include <net/addrconf.h>
+#endif
 
 #include "br_private.h"
 
+#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+static inline int ipv6_is_local_multicast(const struct in6_addr *addr)
+{
+	if (ipv6_addr_is_multicast(addr) &&
+	    IPV6_ADDR_MC_SCOPE(addr) <= IPV6_ADDR_SCOPE_LINKLOCAL)
+		return 1;
+	return 0;
+}
+#endif
+
 static inline int br_ip_equal(const struct br_ip *a, const struct br_ip *b)
 {
 	if (a->proto != b->proto)
@@ -34,6 +49,10 @@ static inline int br_ip_equal(const struct br_ip *a, const struct br_ip *b)
 	switch (a->proto) {
 	case htons(ETH_P_IP):
 		return a->u.ip4 == b->u.ip4;
+#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+	case htons(ETH_P_IPV6):
+		return ipv6_addr_equal(&a->u.ip6, &b->u.ip6);
+#endif
 	}
 	return 0;
 }
@@ -43,12 +62,24 @@ static inline int __br_ip4_hash(struct net_bridge_mdb_htable *mdb, __be32 ip)
 	return jhash_1word(mdb->secret, (__force u32)ip) & (mdb->max - 1);
 }
 
+#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+static inline int __br_ip6_hash(struct net_bridge_mdb_htable *mdb,
+				const struct in6_addr *ip)
+{
+	return jhash2((__force u32 *)ip->s6_addr32, 4, mdb->secret) & (mdb->max - 1);
+}
+#endif
+
 static inline int br_ip_hash(struct net_bridge_mdb_htable *mdb,
 			     struct br_ip *ip)
 {
 	switch (ip->proto) {
 	case htons(ETH_P_IP):
 		return __br_ip4_hash(mdb, ip->u.ip4);
+#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+	case htons(ETH_P_IPV6):
+		return __br_ip6_hash(mdb, &ip->u.ip6);
+#endif
 	}
 	return 0;
 }
@@ -78,6 +109,19 @@ static struct net_bridge_mdb_entry *br_mdb_ip4_get(
 	return __br_mdb_ip_get(mdb, &br_dst, __br_ip4_hash(mdb, dst));
 }
 
+#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+static struct net_bridge_mdb_entry *br_mdb_ip6_get(
+	struct net_bridge_mdb_htable *mdb, const struct in6_addr *dst)
+{
+	struct br_ip br_dst;
+
+	ipv6_addr_copy(&br_dst.u.ip6, dst);
+	br_dst.proto = htons(ETH_P_IPV6);
+
+	return __br_mdb_ip_get(mdb, &br_dst, __br_ip6_hash(mdb, dst));
+}
+#endif
+
 static struct net_bridge_mdb_entry *br_mdb_ip_get(
 	struct net_bridge_mdb_htable *mdb, struct br_ip *dst)
 {
@@ -102,6 +146,11 @@ struct net_bridge_mdb_entry *br_mdb_get(struct net_bridge *br,
 	case htons(ETH_P_IP):
 		ip.u.ip4 = ip_hdr(skb)->daddr;
 		break;
+#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+	case htons(ETH_P_IPV6):
+		ipv6_addr_copy(&ip.u.ip6, &ipv6_hdr(skb)->daddr);
+		break;
+#endif
 	default:
 		return NULL;
 	}
@@ -352,12 +401,94 @@ out:
 	return skb;
 }
 
+#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+static struct sk_buff *br_ip6_multicast_alloc_query(struct net_bridge *br,
+						    struct in6_addr *group)
+{
+	struct sk_buff *skb;
+	struct ipv6hdr *ip6h;
+	struct mld_msg *mldq;
+	struct ethhdr *eth;
+	u8 *hopopt;
+	unsigned long interval;
+
+	skb = netdev_alloc_skb_ip_align(br->dev, sizeof(*eth) + sizeof(*ip6h) +
+						 8 + sizeof(*mldq));
+	if (!skb)
+		goto out;
+
+	skb->protocol = htons(ETH_P_IPV6);
+
+	/* Ethernet header */
+	skb_reset_mac_header(skb);
+	eth = eth_hdr(skb);
+
+	memcpy(eth->h_source, br->dev->dev_addr, 6);
+	ipv6_eth_mc_map(group, eth->h_dest);
+	eth->h_proto = htons(ETH_P_IPV6);
+	skb_put(skb, sizeof(*eth));
+
+	/* IPv6 header + HbH option */
+	skb_set_network_header(skb, skb->len);
+	ip6h = ipv6_hdr(skb);
+
+	*(__force __be32 *)ip6h = htonl(0x60000000);
+	ip6h->payload_len = 8 + sizeof(*mldq);
+	ip6h->nexthdr = IPPROTO_HOPOPTS;
+	ip6h->hop_limit = 1;
+	ipv6_addr_set(&ip6h->saddr, 0, 0, 0, 0);
+	ipv6_addr_set(&ip6h->daddr, htonl(0xff020000), 0, 0, htonl(1));
+
+	hopopt = (u8 *)(ip6h + 1);
+	hopopt[0] = IPPROTO_ICMPV6;		/* next hdr */
+	hopopt[1] = 0;				/* length of HbH */
+	hopopt[2] = IPV6_TLV_ROUTERALERT;	/* Router Alert */
+	hopopt[3] = 2;				/* Length of RA Option */
+	hopopt[4] = 0;				/* Type = 0x0000 (MLD) */
+	hopopt[5] = 0;
+	hopopt[6] = IPV6_TLV_PAD0;		/* Pad0 */
+	hopopt[7] = IPV6_TLV_PAD0;		/* Pad0 */
+
+	skb_put(skb, sizeof(*ip6h) + 8);
+
+	/* ICMPv6 */
+	skb_set_transport_header(skb, skb->len);
+	mldq = (struct mld_msg *) icmp6_hdr(skb);
+
+	interval = ipv6_addr_any(group) ? br->multicast_last_member_interval :
+					  br->multicast_query_response_interval;
+
+	mldq->mld_type = ICMPV6_MGM_QUERY;
+	mldq->mld_code = 0;
+	mldq->mld_cksum = 0;
+	mldq->mld_maxdelay = htons((u16)jiffies_to_msecs(interval));
+	mldq->mld_reserved = 0;
+	ipv6_addr_copy(&mldq->mld_mca, group);
+
+	/* checksum */
+	mldq->mld_cksum = csum_ipv6_magic(&ip6h->saddr, &ip6h->daddr,
+					  sizeof(*mldq), IPPROTO_ICMPV6,
+					  csum_partial(mldq,
+						       sizeof(*mldq), 0));
+	skb_put(skb, sizeof(*mldq));
+
+	__skb_pull(skb, sizeof(*eth));
+
+out:
+	return skb;
+}
+#endif
+
 static struct sk_buff *br_multicast_alloc_query(struct net_bridge *br,
 						struct br_ip *addr)
 {
 	switch (addr->proto) {
 	case htons(ETH_P_IP):
 		return br_ip4_multicast_alloc_query(br, addr->u.ip4);
+#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+	case htons(ETH_P_IPV6):
+		return br_ip6_multicast_alloc_query(br, &addr->u.ip6);
+#endif
 	}
 	return NULL;
 }
@@ -631,6 +762,23 @@ static int br_ip4_multicast_add_group(struct net_bridge *br,
 	return br_multicast_add_group(br, port, &br_group);
 }
 
+#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+static int br_ip6_multicast_add_group(struct net_bridge *br,
+				      struct net_bridge_port *port,
+				      const struct in6_addr *group)
+{
+	struct br_ip br_group;
+
+	if (ipv6_is_local_multicast(group))
+		return 0;
+
+	ipv6_addr_copy(&br_group.u.ip6, group);
+	br_group.proto = htons(ETH_P_IP);
+
+	return br_multicast_add_group(br, port, &br_group);
+}
+#endif
+
 static void br_multicast_router_expired(unsigned long data)
 {
 	struct net_bridge_port *port = (void *)data;
@@ -681,10 +829,15 @@ static void br_multicast_send_query(struct net_bridge *br,
 	    timer_pending(&br->multicast_querier_timer))
 		return;
 
-	br_group.u.ip4 = 0;
+	memset(&br_group.u, 0, sizeof(br_group.u));
+
 	br_group.proto = htons(ETH_P_IP);
+	__br_multicast_send_query(br, port, &br_group);
 
+#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+	br_group.proto = htons(ETH_P_IPV6);
 	__br_multicast_send_query(br, port, &br_group);
+#endif
 
 	time = jiffies;
 	time += sent < br->multicast_startup_query_count ?
@@ -825,6 +978,66 @@ static int br_ip4_multicast_igmp3_report(struct net_bridge *br,
 	return err;
 }
 
+#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+static int br_ip6_multicast_mld2_report(struct net_bridge *br,
+					struct net_bridge_port *port,
+					struct sk_buff *skb)
+{
+	struct icmp6hdr *icmp6h;
+	struct mld2_grec *grec;
+	int i;
+	int len;
+	int num;
+	int err = 0;
+
+	if (!pskb_may_pull(skb, sizeof(*icmp6h)))
+		return -EINVAL;
+
+	icmp6h = icmp6_hdr(skb);
+	num = ntohs(icmp6h->icmp6_dataun.un_data16[1]);
+	len = sizeof(*icmp6h);
+
+	for (i = 0; i < num; i++) {
+		__be16 *nsrcs, _nsrcs;
+
+		nsrcs = skb_header_pointer(skb,
+					   len + offsetof(struct mld2_grec,
+							  grec_mca),
+					   sizeof(_nsrcs), &_nsrcs);
+		if (!nsrcs)
+			return -EINVAL;
+
+		if (!pskb_may_pull(skb,
+				   len + sizeof(*grec) +
+				   sizeof(struct in6_addr) * (*nsrcs)))
+			return -EINVAL;
+
+		grec = (struct mld2_grec *)(skb->data + len);
+		len += sizeof(*grec) + sizeof(struct in6_addr) * (*nsrcs);
+
+		/* We treat these as MLDv1 reports for now. */
+		switch (grec->grec_type) {
+		case MLD2_MODE_IS_INCLUDE:
+		case MLD2_MODE_IS_EXCLUDE:
+		case MLD2_CHANGE_TO_INCLUDE:
+		case MLD2_CHANGE_TO_EXCLUDE:
+		case MLD2_ALLOW_NEW_SOURCES:
+		case MLD2_BLOCK_OLD_SOURCES:
+			break;
+
+		default:
+			continue;
+		}
+
+		err = br_ip6_multicast_add_group(br, port, &grec->grec_mca);
+		if (!err)
+			break;
+	}
+
+	return err;
+}
+#endif
+
 static void br_multicast_add_router(struct net_bridge *br,
 				    struct net_bridge_port *port)
 {
@@ -955,6 +1168,75 @@ out:
 	return err;
 }
 
+#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+static int br_ip6_multicast_query(struct net_bridge *br,
+				  struct net_bridge_port *port,
+				  struct sk_buff *skb)
+{
+	struct ipv6hdr *ip6h = ipv6_hdr(skb);
+	struct mld_msg *mld = (struct mld_msg *) icmp6_hdr(skb);
+	struct net_bridge_mdb_entry *mp;
+	struct mld2_query *mld2q;
+	struct net_bridge_port_group *p, **pp;
+	unsigned long max_delay;
+	unsigned long now = jiffies;
+	struct in6_addr *group = NULL;
+	int err = 0;
+
+	spin_lock(&br->multicast_lock);
+	if (!netif_running(br->dev) ||
+	    (port && port->state == BR_STATE_DISABLED))
+		goto out;
+
+	br_multicast_query_received(br, port, !ipv6_addr_any(&ip6h->saddr));
+
+	if (skb->len == sizeof(*mld)) {
+		if (!pskb_may_pull(skb, sizeof(*mld))) {
+			err = -EINVAL;
+			goto out;
+		}
+		mld = (struct mld_msg *) icmp6_hdr(skb);
+		max_delay = msecs_to_jiffies(htons(mld->mld_maxdelay));
+		if (max_delay)
+			group = &mld->mld_mca;
+	} else if (skb->len >= sizeof(*mld2q)) {
+		if (!pskb_may_pull(skb, sizeof(*mld2q))) {
+			err = -EINVAL;
+			goto out;
+		}
+		mld2q = (struct mld2_query *)icmp6_hdr(skb);
+		if (!mld2q->mld2q_nsrcs)
+			group = &mld2q->mld2q_mca;
+		max_delay = mld2q->mld2q_mrc ? MLDV2_MRC(mld2q->mld2q_mrc) : 1;
+	}
+
+	if (!group)
+		goto out;
+
+	mp = br_mdb_ip6_get(br->mdb, group);
+	if (!mp)
+		goto out;
+
+	max_delay *= br->multicast_last_member_count;
+	if (!hlist_unhashed(&mp->mglist) &&
+	    (timer_pending(&mp->timer) ?
+	     time_after(mp->timer.expires, now + max_delay) :
+	     try_to_del_timer_sync(&mp->timer) >= 0))
+		mod_timer(&mp->timer, now + max_delay);
+
+	for (pp = &mp->ports; (p = *pp); pp = &p->next) {
+		if (timer_pending(&p->timer) ?
+		    time_after(p->timer.expires, now + max_delay) :
+		    try_to_del_timer_sync(&p->timer) >= 0)
+			mod_timer(&mp->timer, now + max_delay);
+	}
+
+out:
+	spin_unlock(&br->multicast_lock);
+	return err;
+}
+#endif
+
 static void br_multicast_leave_group(struct net_bridge *br,
 				     struct net_bridge_port *port,
 				     struct br_ip *group)
@@ -1030,6 +1312,22 @@ static void br_ip4_multicast_leave_group(struct net_bridge *br,
 	br_multicast_leave_group(br, port, &br_group);
 }
 
+#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+static void br_ip6_multicast_leave_group(struct net_bridge *br,
+					 struct net_bridge_port *port,
+					 const struct in6_addr *group)
+{
+	struct br_ip br_group;
+
+	if (ipv6_is_local_multicast(group))
+		return;
+
+	ipv6_addr_copy(&br_group.u.ip6, group);
+	br_group.proto = htons(ETH_P_IPV6);
+
+	br_multicast_leave_group(br, port, &br_group);
+}
+#endif
 
 static int br_multicast_ipv4_rcv(struct net_bridge *br,
 				 struct net_bridge_port *port,
@@ -1129,6 +1427,126 @@ err_out:
 	return err;
 }
 
+#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+static int br_multicast_ipv6_rcv(struct net_bridge *br,
+				 struct net_bridge_port *port,
+				 struct sk_buff *skb)
+{
+	struct sk_buff *skb2 = skb;
+	struct ipv6hdr *ip6h;
+	struct icmp6hdr *icmp6h;
+	u8 nexthdr;
+	unsigned len;
+	unsigned offset;
+	int err;
+
+	BR_INPUT_SKB_CB(skb)->igmp = 0;
+	BR_INPUT_SKB_CB(skb)->mrouters_only = 0;
+
+	if (!pskb_may_pull(skb, sizeof(*ip6h)))
+		return -EINVAL;
+
+	ip6h = ipv6_hdr(skb);
+
+	/*
+	 * We're interested in MLD messages only.
+	 *  - Version is 6
+	 *  - MLD has always Router Alert hop-by-hop option
+	 *  - But we do not support jumbrograms.
+	 */
+	if (ip6h->version != 6 ||
+	    ip6h->nexthdr != IPPROTO_HOPOPTS ||
+	    ip6h->payload_len == 0)
+		return 0;
+
+	len = ntohs(ip6h->payload_len);
+	if (skb->len < len)
+		return -EINVAL;
+
+	nexthdr = ip6h->nexthdr;
+	offset = ipv6_skip_exthdr(skb, sizeof(*ip6h), &nexthdr);
+
+	if (offset < 0 || nexthdr != IPPROTO_ICMPV6)
+		return 0;
+
+	/* Okay, we found ICMPv6 header */
+	skb2 = skb_clone(skb, GFP_ATOMIC);
+	if (!skb2)
+		return -ENOMEM;
+
+	len -= offset - skb_network_offset(skb2);
+
+	__skb_pull(skb2, offset);
+	skb_reset_transport_header(skb2);
+
+	err = -EINVAL;
+	if (!pskb_may_pull(skb2, sizeof(*icmp6h)))
+		goto out;
+
+	icmp6h = icmp6_hdr(skb2);
+
+	switch (icmp6h->icmp6_type) {
+	case ICMPV6_MGM_QUERY:
+	case ICMPV6_MGM_REPORT:
+	case ICMPV6_MGM_REDUCTION:
+	case ICMPV6_MLD2_REPORT:
+		break;
+	default:
+		err = 0;
+		goto out;
+	}
+
+	/* Okay, we found MLD message. Check further. */
+	if (skb2->len > len) {
+		err = pskb_trim_rcsum(skb2, len);
+		if (err)
+			goto out;
+	}
+
+	switch (skb2->ip_summed) {
+	case CHECKSUM_COMPLETE:
+		if (!csum_fold(skb2->csum))
+			break;
+		/*FALLTHROUGH*/
+	case CHECKSUM_NONE:
+		skb2->csum = 0;
+		if (skb_checksum_complete(skb2))
+			goto out;
+	}
+
+	err = 0;
+
+	BR_INPUT_SKB_CB(skb)->igmp = 1;
+
+	switch (icmp6h->icmp6_type) {
+	case ICMPV6_MGM_REPORT:
+	    {
+		struct mld_msg *mld = (struct mld_msg *)icmp6h;
+		BR_INPUT_SKB_CB(skb2)->mrouters_only = 1;
+		err = br_ip6_multicast_add_group(br, port, &mld->mld_mca);
+		break;
+	    }
+	case ICMPV6_MLD2_REPORT:
+		err = br_ip6_multicast_mld2_report(br, port, skb2);
+		break;
+	case ICMPV6_MGM_QUERY:
+		err = br_ip6_multicast_query(br, port, skb2);
+		break;
+	case ICMPV6_MGM_REDUCTION:
+	    {
+		struct mld_msg *mld = (struct mld_msg *)icmp6h;
+		br_ip6_multicast_leave_group(br, port, &mld->mld_mca);
+	    }
+	}
+
+out:
+	__skb_push(skb2, offset);
+	if (skb2 != skb)
+		kfree_skb(skb2);
+	return err;
+}
+#endif
+
 int br_multicast_rcv(struct net_bridge *br, struct net_bridge_port *port,
 		     struct sk_buff *skb)
 {
@@ -1138,6 +1556,10 @@ int br_multicast_rcv(struct net_bridge *br, struct net_bridge_port *port,
 	switch (skb->protocol) {
 	case htons(ETH_P_IP):
 		return br_multicast_ipv4_rcv(br, port, skb);
+#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+	case htons(ETH_P_IPV6):
+		return br_multicast_ipv6_rcv(br, port, skb);
+#endif
 	}
 
 	return 0;
diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h
index 45d11e49fbbb..018499ebe19d 100644
--- a/net/bridge/br_private.h
+++ b/net/bridge/br_private.h
@@ -49,6 +49,9 @@ struct br_ip
 {
 	union {
 		__be32	ip4;
+#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+		struct in6_addr ip6;
+#endif
 	} u;
 	__be16		proto;
 };
-- 
cgit v1.2.2


From af740b2c8f4521e2c45698ee6040941a82d6349d Mon Sep 17 00:00:00 2001
From: Jesper Dangaard Brouer <hawk@comx.dk>
Date: Fri, 23 Apr 2010 12:34:56 +0200
Subject: netfilter: nf_conntrack: extend with extra stat counter

I suspect an unfortunatly series of events occuring under a DDoS
attack, in function __nf_conntrack_find() nf_contrack_core.c.

Adding a stats counter to see if the search is restarted too often.

Signed-off-by: Jesper Dangaard Brouer <hawk@comx.dk>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c | 7 ++++---
 net/netfilter/nf_conntrack_core.c                     | 4 +++-
 net/netfilter/nf_conntrack_standalone.c               | 7 ++++---
 3 files changed, 11 insertions(+), 7 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c
index 2fb7b76da94f..244f7cb08d68 100644
--- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c
+++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c
@@ -336,12 +336,12 @@ static int ct_cpu_seq_show(struct seq_file *seq, void *v)
 	const struct ip_conntrack_stat *st = v;
 
 	if (v == SEQ_START_TOKEN) {
-		seq_printf(seq, "entries  searched found new invalid ignore delete delete_list insert insert_failed drop early_drop icmp_error  expect_new expect_create expect_delete\n");
+		seq_printf(seq, "entries  searched found new invalid ignore delete delete_list insert insert_failed drop early_drop icmp_error  expect_new expect_create expect_delete search_restart\n");
 		return 0;
 	}
 
 	seq_printf(seq, "%08x  %08x %08x %08x %08x %08x %08x %08x "
-			"%08x %08x %08x %08x %08x  %08x %08x %08x \n",
+			"%08x %08x %08x %08x %08x  %08x %08x %08x %08x\n",
 		   nr_conntracks,
 		   st->searched,
 		   st->found,
@@ -358,7 +358,8 @@ static int ct_cpu_seq_show(struct seq_file *seq, void *v)
 
 		   st->expect_new,
 		   st->expect_create,
-		   st->expect_delete
+		   st->expect_delete,
+		   st->search_restart
 		);
 	return 0;
 }
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index 0c9bbe93cc16..3907efb97a7c 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -319,8 +319,10 @@ begin:
 	 * not the expected one, we must restart lookup.
 	 * We probably met an item that was moved to another chain.
 	 */
-	if (get_nulls_value(n) != hash)
+	if (get_nulls_value(n) != hash) {
+		NF_CT_STAT_INC(net, search_restart);
 		goto begin;
+	}
 	local_bh_enable();
 
 	return NULL;
diff --git a/net/netfilter/nf_conntrack_standalone.c b/net/netfilter/nf_conntrack_standalone.c
index faa8eb3722b9..ea4a8d384234 100644
--- a/net/netfilter/nf_conntrack_standalone.c
+++ b/net/netfilter/nf_conntrack_standalone.c
@@ -252,12 +252,12 @@ static int ct_cpu_seq_show(struct seq_file *seq, void *v)
 	const struct ip_conntrack_stat *st = v;
 
 	if (v == SEQ_START_TOKEN) {
-		seq_printf(seq, "entries  searched found new invalid ignore delete delete_list insert insert_failed drop early_drop icmp_error  expect_new expect_create expect_delete\n");
+		seq_printf(seq, "entries  searched found new invalid ignore delete delete_list insert insert_failed drop early_drop icmp_error  expect_new expect_create expect_delete search_restart\n");
 		return 0;
 	}
 
 	seq_printf(seq, "%08x  %08x %08x %08x %08x %08x %08x %08x "
-			"%08x %08x %08x %08x %08x  %08x %08x %08x \n",
+			"%08x %08x %08x %08x %08x  %08x %08x %08x %08x\n",
 		   nr_conntracks,
 		   st->searched,
 		   st->found,
@@ -274,7 +274,8 @@ static int ct_cpu_seq_show(struct seq_file *seq, void *v)
 
 		   st->expect_new,
 		   st->expect_create,
-		   st->expect_delete
+		   st->expect_delete,
+		   st->search_restart
 		);
 	return 0;
 }
-- 
cgit v1.2.2


From e773aaff8295e7f3428d9cf6f8a476a33de00716 Mon Sep 17 00:00:00 2001
From: Jiri Pirko <jpirko@redhat.com>
Date: Fri, 23 Apr 2010 00:53:39 +0000
Subject: l2tp: fix memory allocation

Since .size is set properly in "struct pernet_operations l2tp_net_ops",
allocating space for "struct l2tp_net" by hand is not correct, even causes
memory leakage.

Signed-off-by: Jiri Pirko <jpirko@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/l2tp/l2tp_core.c | 29 +----------------------------
 1 file changed, 1 insertion(+), 28 deletions(-)

(limited to 'net')

diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c
index ecc7aea9efe4..1712af1c7b3f 100644
--- a/net/l2tp/l2tp_core.c
+++ b/net/l2tp/l2tp_core.c
@@ -1617,14 +1617,9 @@ EXPORT_SYMBOL_GPL(l2tp_session_create);
 
 static __net_init int l2tp_init_net(struct net *net)
 {
-	struct l2tp_net *pn;
-	int err;
+	struct l2tp_net *pn = net_generic(net, l2tp_net_id);
 	int hash;
 
-	pn = kzalloc(sizeof(*pn), GFP_KERNEL);
-	if (!pn)
-		return -ENOMEM;
-
 	INIT_LIST_HEAD(&pn->l2tp_tunnel_list);
 	spin_lock_init(&pn->l2tp_tunnel_list_lock);
 
@@ -1633,33 +1628,11 @@ static __net_init int l2tp_init_net(struct net *net)
 
 	spin_lock_init(&pn->l2tp_session_hlist_lock);
 
-	err = net_assign_generic(net, l2tp_net_id, pn);
-	if (err)
-		goto out;
-
 	return 0;
-
-out:
-	kfree(pn);
-	return err;
-}
-
-static __net_exit void l2tp_exit_net(struct net *net)
-{
-	struct l2tp_net *pn;
-
-	pn = net_generic(net, l2tp_net_id);
-	/*
-	 * if someone has cached our net then
-	 * further net_generic call will return NULL
-	 */
-	net_assign_generic(net, l2tp_net_id, NULL);
-	kfree(pn);
 }
 
 static struct pernet_operations l2tp_net_ops = {
 	.init = l2tp_init_net,
-	.exit = l2tp_exit_net,
 	.id   = &l2tp_net_id,
 	.size = sizeof(struct l2tp_net),
 };
-- 
cgit v1.2.2


From 3a737028630bb3c2b9efc38b9ddef2e09b06b808 Mon Sep 17 00:00:00 2001
From: Jiri Pirko <jpirko@redhat.com>
Date: Fri, 23 Apr 2010 01:01:52 +0000
Subject: l2tp_eth: fix memory allocation

Since .size is set properly in "struct pernet_operations l2tp_eth_net_ops",
allocating space for "struct l2tp_eth_net" by hand is not correct, even causes
memory leakage.

Signed-off-by: Jiri Pirko <jpirko@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/l2tp/l2tp_eth.c | 29 +----------------------------
 1 file changed, 1 insertion(+), 28 deletions(-)

(limited to 'net')

diff --git a/net/l2tp/l2tp_eth.c b/net/l2tp/l2tp_eth.c
index ca1164afeb74..58c6c4cda73b 100644
--- a/net/l2tp/l2tp_eth.c
+++ b/net/l2tp/l2tp_eth.c
@@ -276,43 +276,16 @@ out:
 
 static __net_init int l2tp_eth_init_net(struct net *net)
 {
-	struct l2tp_eth_net *pn;
-	int err;
-
-	pn = kzalloc(sizeof(*pn), GFP_KERNEL);
-	if (!pn)
-		return -ENOMEM;
+	struct l2tp_eth_net *pn = net_generic(net, l2tp_eth_net_id);
 
 	INIT_LIST_HEAD(&pn->l2tp_eth_dev_list);
 	spin_lock_init(&pn->l2tp_eth_lock);
 
-	err = net_assign_generic(net, l2tp_eth_net_id, pn);
-	if (err)
-		goto out;
-
 	return 0;
-
-out:
-	kfree(pn);
-	return err;
-}
-
-static __net_exit void l2tp_eth_exit_net(struct net *net)
-{
-	struct l2tp_eth_net *pn;
-
-	pn = net_generic(net, l2tp_eth_net_id);
-	/*
-	 * if someone has cached our net then
-	 * further net_generic call will return NULL
-	 */
-	net_assign_generic(net, l2tp_eth_net_id, NULL);
-	kfree(pn);
 }
 
 static __net_initdata struct pernet_operations l2tp_eth_net_ops = {
 	.init = l2tp_eth_init_net,
-	.exit = l2tp_eth_exit_net,
 	.id   = &l2tp_eth_net_id,
 	.size = sizeof(struct l2tp_eth_net),
 };
-- 
cgit v1.2.2


From 793b14731686595a741d9f47726ad8b9a235385a Mon Sep 17 00:00:00 2001
From: Brian Haley <brian.haley@hp.com>
Date: Fri, 23 Apr 2010 11:26:07 +0000
Subject: IPv6: data structure changes for new socket options

Add underlying data structure changes and basic setsockopt()
and getsockopt() support for IPV6_RECVPATHMTU, IPV6_PATHMTU,
and IPV6_DONTFRAG.  IPV6_PATHMTU is actually fully functional
at this point.

Signed-off-by: Brian Haley <brian.haley@hp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/ipv6_sockglue.c | 46 ++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 46 insertions(+)

(limited to 'net')

diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c
index 92295ad3487a..2bf9eda72788 100644
--- a/net/ipv6/ipv6_sockglue.c
+++ b/net/ipv6/ipv6_sockglue.c
@@ -337,6 +337,13 @@ static int do_ipv6_setsockopt(struct sock *sk, int level, int optname,
 		retv = 0;
 		break;
 
+	case IPV6_RECVPATHMTU:
+		if (optlen < sizeof(int))
+			goto e_inval;
+		np->rxopt.bits.rxpmtu = valbool;
+		retv = 0;
+		break;
+
 	case IPV6_HOPOPTS:
 	case IPV6_RTHDRDSTOPTS:
 	case IPV6_RTHDR:
@@ -773,6 +780,9 @@ pref_skip_coa:
 		if (val < 0 || val > 255)
 			goto e_inval;
 		np->min_hopcount = val;
+		break;
+	case IPV6_DONTFRAG:
+		np->dontfrag = valbool;
 		retv = 0;
 		break;
 	}
@@ -1063,6 +1073,38 @@ static int do_ipv6_getsockopt(struct sock *sk, int level, int optname,
 		val = np->rxopt.bits.rxflow;
 		break;
 
+	case IPV6_RECVPATHMTU:
+		val = np->rxopt.bits.rxpmtu;
+		break;
+
+	case IPV6_PATHMTU:
+	{
+		struct dst_entry *dst;
+		struct ip6_mtuinfo mtuinfo;
+
+		if (len < sizeof(mtuinfo))
+			return -EINVAL;
+
+		len = sizeof(mtuinfo);
+		memset(&mtuinfo, 0, sizeof(mtuinfo));
+
+		rcu_read_lock();
+		dst = __sk_dst_get(sk);
+		if (dst)
+			mtuinfo.ip6m_mtu = dst_mtu(dst);
+		rcu_read_unlock();
+		if (!mtuinfo.ip6m_mtu)
+			return -ENOTCONN;
+
+		if (put_user(len, optlen))
+			return -EFAULT;
+		if (copy_to_user(optval, &mtuinfo, len))
+			return -EFAULT;
+
+		return 0;
+		break;
+	}
+
 	case IPV6_UNICAST_HOPS:
 	case IPV6_MULTICAST_HOPS:
 	{
@@ -1128,6 +1170,10 @@ static int do_ipv6_getsockopt(struct sock *sk, int level, int optname,
 		val = np->min_hopcount;
 		break;
 
+	case IPV6_DONTFRAG:
+		val = np->dontfrag;
+		break;
+
 	default:
 		return -ENOPROTOOPT;
 	}
-- 
cgit v1.2.2


From 13b52cd44670e3359055e9918d0e766d89836425 Mon Sep 17 00:00:00 2001
From: Brian Haley <brian.haley@hp.com>
Date: Fri, 23 Apr 2010 11:26:08 +0000
Subject: IPv6: Add dontfrag argument to relevant functions

Add dontfrag argument to relevant functions for
IPV6_DONTFRAG support, as well as allowing the value
to be passed-in via ancillary cmsg data.

Signed-off-by: Brian Haley <brian.haley@hp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/datagram.c      | 21 ++++++++++++++++++++-
 net/ipv6/icmp.c          |  5 +++--
 net/ipv6/ip6_flowlabel.c |  3 ++-
 net/ipv6/ip6_output.c    |  2 +-
 net/ipv6/ipv6_sockglue.c |  3 ++-
 net/ipv6/raw.c           |  9 +++++++--
 net/ipv6/udp.c           |  9 +++++++--
 7 files changed, 42 insertions(+), 10 deletions(-)

(limited to 'net')

diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c
index 622dc7939a1b..f5076d349b18 100644
--- a/net/ipv6/datagram.c
+++ b/net/ipv6/datagram.c
@@ -497,7 +497,7 @@ int datagram_recv_ctl(struct sock *sk, struct msghdr *msg, struct sk_buff *skb)
 int datagram_send_ctl(struct net *net,
 		      struct msghdr *msg, struct flowi *fl,
 		      struct ipv6_txoptions *opt,
-		      int *hlimit, int *tclass)
+		      int *hlimit, int *tclass, int *dontfrag)
 {
 	struct in6_pktinfo *src_info;
 	struct cmsghdr *cmsg;
@@ -735,6 +735,25 @@ int datagram_send_ctl(struct net *net,
 			err = 0;
 			*tclass = tc;
 
+			break;
+		    }
+
+		case IPV6_DONTFRAG:
+		    {
+			int df;
+
+			err = -EINVAL;
+			if (cmsg->cmsg_len != CMSG_LEN(sizeof(int))) {
+				goto exit_f;
+			}
+
+			df = *(int *)CMSG_DATA(cmsg);
+			if (df < 0 || df > 1)
+				goto exit_f;
+
+			err = 0;
+			*dontfrag = df;
+
 			break;
 		    }
 		default:
diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c
index 12d2fa42657d..ce7992982557 100644
--- a/net/ipv6/icmp.c
+++ b/net/ipv6/icmp.c
@@ -481,7 +481,7 @@ route_done:
 			      len + sizeof(struct icmp6hdr),
 			      sizeof(struct icmp6hdr), hlimit,
 			      np->tclass, NULL, &fl, (struct rt6_info*)dst,
-			      MSG_DONTWAIT);
+			      MSG_DONTWAIT, np->dontfrag);
 	if (err) {
 		ICMP6_INC_STATS_BH(net, idev, ICMP6_MIB_OUTMSGS);
 		ip6_flush_pending_frames(sk);
@@ -561,7 +561,8 @@ static void icmpv6_echo_reply(struct sk_buff *skb)
 
 	err = ip6_append_data(sk, icmpv6_getfrag, &msg, skb->len + sizeof(struct icmp6hdr),
 				sizeof(struct icmp6hdr), hlimit, np->tclass, NULL, &fl,
-				(struct rt6_info*)dst, MSG_DONTWAIT);
+				(struct rt6_info*)dst, MSG_DONTWAIT,
+				np->dontfrag);
 
 	if (err) {
 		ICMP6_INC_STATS_BH(net, idev, ICMP6_MIB_OUTMSGS);
diff --git a/net/ipv6/ip6_flowlabel.c b/net/ipv6/ip6_flowlabel.c
index 14e23216eb28..13654686aeab 100644
--- a/net/ipv6/ip6_flowlabel.c
+++ b/net/ipv6/ip6_flowlabel.c
@@ -360,7 +360,8 @@ fl_create(struct net *net, struct in6_flowlabel_req *freq, char __user *optval,
 		msg.msg_control = (void*)(fl->opt+1);
 		flowi.oif = 0;
 
-		err = datagram_send_ctl(net, &msg, &flowi, fl->opt, &junk, &junk);
+		err = datagram_send_ctl(net, &msg, &flowi, fl->opt, &junk,
+					&junk, &junk);
 		if (err)
 			goto done;
 		err = -EINVAL;
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index 263d4cf5a8de..54d43dd1f085 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -1105,7 +1105,7 @@ int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to,
 	int offset, int len, int odd, struct sk_buff *skb),
 	void *from, int length, int transhdrlen,
 	int hlimit, int tclass, struct ipv6_txoptions *opt, struct flowi *fl,
-	struct rt6_info *rt, unsigned int flags)
+	struct rt6_info *rt, unsigned int flags, int dontfrag)
 {
 	struct inet_sock *inet = inet_sk(sk);
 	struct ipv6_pinfo *np = inet6_sk(sk);
diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c
index 2bf9eda72788..bd43f0152c21 100644
--- a/net/ipv6/ipv6_sockglue.c
+++ b/net/ipv6/ipv6_sockglue.c
@@ -458,7 +458,8 @@ sticky_done:
 		msg.msg_controllen = optlen;
 		msg.msg_control = (void*)(opt+1);
 
-		retv = datagram_send_ctl(net, &msg, &fl, opt, &junk, &junk);
+		retv = datagram_send_ctl(net, &msg, &fl, opt, &junk, &junk,
+					 &junk);
 		if (retv)
 			goto done;
 update:
diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c
index 8763b1a0814a..44a84ea9b3e8 100644
--- a/net/ipv6/raw.c
+++ b/net/ipv6/raw.c
@@ -733,6 +733,7 @@ static int rawv6_sendmsg(struct kiocb *iocb, struct sock *sk,
 	int addr_len = msg->msg_namelen;
 	int hlimit = -1;
 	int tclass = -1;
+	int dontfrag = -1;
 	u16 proto;
 	int err;
 
@@ -811,7 +812,8 @@ static int rawv6_sendmsg(struct kiocb *iocb, struct sock *sk,
 		memset(opt, 0, sizeof(struct ipv6_txoptions));
 		opt->tot_len = sizeof(struct ipv6_txoptions);
 
-		err = datagram_send_ctl(sock_net(sk), msg, &fl, opt, &hlimit, &tclass);
+		err = datagram_send_ctl(sock_net(sk), msg, &fl, opt, &hlimit,
+					&tclass, &dontfrag);
 		if (err < 0) {
 			fl6_sock_release(flowlabel);
 			return err;
@@ -880,6 +882,9 @@ static int rawv6_sendmsg(struct kiocb *iocb, struct sock *sk,
 	if (tclass < 0)
 		tclass = np->tclass;
 
+	if (dontfrag < 0)
+		dontfrag = np->dontfrag;
+
 	if (msg->msg_flags&MSG_CONFIRM)
 		goto do_confirm;
 
@@ -890,7 +895,7 @@ back_from_confirm:
 		lock_sock(sk);
 		err = ip6_append_data(sk, ip_generic_getfrag, msg->msg_iov,
 			len, 0, hlimit, tclass, opt, &fl, (struct rt6_info*)dst,
-			msg->msg_flags);
+			msg->msg_flags, dontfrag);
 
 		if (err)
 			ip6_flush_pending_frames(sk);
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index 92bf9033e245..39e3665d9460 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -919,6 +919,7 @@ int udpv6_sendmsg(struct kiocb *iocb, struct sock *sk,
 	int ulen = len;
 	int hlimit = -1;
 	int tclass = -1;
+	int dontfrag = -1;
 	int corkreq = up->corkflag || msg->msg_flags&MSG_MORE;
 	int err;
 	int connected = 0;
@@ -1049,7 +1050,8 @@ do_udp_sendmsg:
 		memset(opt, 0, sizeof(struct ipv6_txoptions));
 		opt->tot_len = sizeof(*opt);
 
-		err = datagram_send_ctl(sock_net(sk), msg, &fl, opt, &hlimit, &tclass);
+		err = datagram_send_ctl(sock_net(sk), msg, &fl, opt, &hlimit,
+					&tclass, &dontfrag);
 		if (err < 0) {
 			fl6_sock_release(flowlabel);
 			return err;
@@ -1120,6 +1122,9 @@ do_udp_sendmsg:
 	if (tclass < 0)
 		tclass = np->tclass;
 
+	if (dontfrag < 0)
+		dontfrag = np->dontfrag;
+
 	if (msg->msg_flags&MSG_CONFIRM)
 		goto do_confirm;
 back_from_confirm:
@@ -1143,7 +1148,7 @@ do_append_data:
 	err = ip6_append_data(sk, getfrag, msg->msg_iov, ulen,
 		sizeof(struct udphdr), hlimit, tclass, opt, &fl,
 		(struct rt6_info*)dst,
-		corkreq ? msg->msg_flags|MSG_MORE : msg->msg_flags);
+		corkreq ? msg->msg_flags|MSG_MORE : msg->msg_flags, dontfrag);
 	if (err)
 		udp_v6_flush_pending_frames(sk);
 	else if (!corkreq)
-- 
cgit v1.2.2


From 4b340ae20d0e2366792abe70f46629e576adaf5e Mon Sep 17 00:00:00 2001
From: Brian Haley <brian.haley@hp.com>
Date: Fri, 23 Apr 2010 11:26:09 +0000
Subject: IPv6: Complete IPV6_DONTFRAG support

Finally add support to detect a local IPV6_DONTFRAG event
and return the relevant data to the user if they've enabled
IPV6_RECVPATHMTU on the socket.  The next recvmsg() will
return no data, but have an IPV6_PATHMTU as ancillary data.

Signed-off-by: Brian Haley <brian.haley@hp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/af_inet6.c   |  3 ++
 net/ipv6/datagram.c   | 87 +++++++++++++++++++++++++++++++++++++++++++++++++++
 net/ipv6/ip6_output.c | 24 +++++++++-----
 net/ipv6/raw.c        |  3 ++
 net/ipv6/udp.c        |  3 ++
 5 files changed, 112 insertions(+), 8 deletions(-)

(limited to 'net')

diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index 3192aa02ba5d..d2df3144429b 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -417,6 +417,9 @@ void inet6_destroy_sock(struct sock *sk)
 	if ((skb = xchg(&np->pktoptions, NULL)) != NULL)
 		kfree_skb(skb);
 
+	if ((skb = xchg(&np->rxpmtu, NULL)) != NULL)
+		kfree_skb(skb);
+
 	/* Free flowlabels */
 	fl6_free_socklist(sk);
 
diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c
index f5076d349b18..5959230bc6c1 100644
--- a/net/ipv6/datagram.c
+++ b/net/ipv6/datagram.c
@@ -278,6 +278,45 @@ void ipv6_local_error(struct sock *sk, int err, struct flowi *fl, u32 info)
 		kfree_skb(skb);
 }
 
+void ipv6_local_rxpmtu(struct sock *sk, struct flowi *fl, u32 mtu)
+{
+	struct ipv6_pinfo *np = inet6_sk(sk);
+	struct ipv6hdr *iph;
+	struct sk_buff *skb;
+	struct ip6_mtuinfo *mtu_info;
+
+	if (!np->rxopt.bits.rxpmtu)
+		return;
+
+	skb = alloc_skb(sizeof(struct ipv6hdr), GFP_ATOMIC);
+	if (!skb)
+		return;
+
+	skb_put(skb, sizeof(struct ipv6hdr));
+	skb_reset_network_header(skb);
+	iph = ipv6_hdr(skb);
+	ipv6_addr_copy(&iph->daddr, &fl->fl6_dst);
+
+	mtu_info = IP6CBMTU(skb);
+	if (!mtu_info) {
+		kfree_skb(skb);
+		return;
+	}
+
+	mtu_info->ip6m_mtu = mtu;
+	mtu_info->ip6m_addr.sin6_family = AF_INET6;
+	mtu_info->ip6m_addr.sin6_port = 0;
+	mtu_info->ip6m_addr.sin6_flowinfo = 0;
+	mtu_info->ip6m_addr.sin6_scope_id = fl->oif;
+	ipv6_addr_copy(&mtu_info->ip6m_addr.sin6_addr, &ipv6_hdr(skb)->daddr);
+
+	__skb_pull(skb, skb_tail_pointer(skb) - skb->data);
+	skb_reset_transport_header(skb);
+
+	skb = xchg(&np->rxpmtu, skb);
+	kfree_skb(skb);
+}
+
 /*
  *	Handle MSG_ERRQUEUE
  */
@@ -381,6 +420,54 @@ out:
 	return err;
 }
 
+/*
+ *	Handle IPV6_RECVPATHMTU
+ */
+int ipv6_recv_rxpmtu(struct sock *sk, struct msghdr *msg, int len)
+{
+	struct ipv6_pinfo *np = inet6_sk(sk);
+	struct sk_buff *skb;
+	struct sockaddr_in6 *sin;
+	struct ip6_mtuinfo mtu_info;
+	int err;
+	int copied;
+
+	err = -EAGAIN;
+	skb = xchg(&np->rxpmtu, NULL);
+	if (skb == NULL)
+		goto out;
+
+	copied = skb->len;
+	if (copied > len) {
+		msg->msg_flags |= MSG_TRUNC;
+		copied = len;
+	}
+	err = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, copied);
+	if (err)
+		goto out_free_skb;
+
+	sock_recv_timestamp(msg, sk, skb);
+
+	memcpy(&mtu_info, IP6CBMTU(skb), sizeof(mtu_info));
+
+	sin = (struct sockaddr_in6 *)msg->msg_name;
+	if (sin) {
+		sin->sin6_family = AF_INET6;
+		sin->sin6_flowinfo = 0;
+		sin->sin6_port = 0;
+		sin->sin6_scope_id = mtu_info.ip6m_addr.sin6_scope_id;
+		ipv6_addr_copy(&sin->sin6_addr, &mtu_info.ip6m_addr.sin6_addr);
+	}
+
+	put_cmsg(msg, SOL_IPV6, IPV6_PATHMTU, sizeof(mtu_info), &mtu_info);
+
+	err = copied;
+
+out_free_skb:
+	kfree_skb(skb);
+out:
+	return err;
+}
 
 
 int datagram_recv_ctl(struct sock *sk, struct msghdr *msg, struct sk_buff *skb)
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index 54d43dd1f085..61e2bef56090 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -1219,15 +1219,23 @@ int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to,
 	 */
 
 	inet->cork.length += length;
-	if (((length > mtu) && (sk->sk_protocol == IPPROTO_UDP)) &&
-	    (rt->u.dst.dev->features & NETIF_F_UFO)) {
+	if (length > mtu) {
+		int proto = sk->sk_protocol;
+		if (dontfrag && (proto == IPPROTO_UDP || proto == IPPROTO_RAW)){
+			ipv6_local_rxpmtu(sk, fl, mtu-exthdrlen);
+			return -EMSGSIZE;
+		}
 
-		err = ip6_ufo_append_data(sk, getfrag, from, length, hh_len,
-					  fragheaderlen, transhdrlen, mtu,
-					  flags);
-		if (err)
-			goto error;
-		return 0;
+		if (proto == IPPROTO_UDP &&
+		    (rt->u.dst.dev->features & NETIF_F_UFO)) {
+
+			err = ip6_ufo_append_data(sk, getfrag, from, length,
+						  hh_len, fragheaderlen,
+						  transhdrlen, mtu, flags);
+			if (err)
+				goto error;
+			return 0;
+		}
 	}
 
 	if ((skb = skb_peek_tail(&sk->sk_write_queue)) == NULL)
diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c
index 44a84ea9b3e8..85627386cb02 100644
--- a/net/ipv6/raw.c
+++ b/net/ipv6/raw.c
@@ -461,6 +461,9 @@ static int rawv6_recvmsg(struct kiocb *iocb, struct sock *sk,
 	if (flags & MSG_ERRQUEUE)
 		return ipv6_recv_error(sk, msg, len);
 
+	if (np->rxpmtu && np->rxopt.bits.rxpmtu)
+		return ipv6_recv_rxpmtu(sk, msg, len);
+
 	skb = skb_recv_datagram(sk, flags, noblock, &err);
 	if (!skb)
 		goto out;
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index 39e3665d9460..2850e35cee3d 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -335,6 +335,9 @@ int udpv6_recvmsg(struct kiocb *iocb, struct sock *sk,
 	if (flags & MSG_ERRQUEUE)
 		return ipv6_recv_error(sk, msg, len);
 
+	if (np->rxpmtu && np->rxopt.bits.rxpmtu)
+		return ipv6_recv_rxpmtu(sk, msg, len);
+
 try_again:
 	skb = __skb_recv_datagram(sk, flags | (noblock ? MSG_DONTWAIT : 0),
 				  &peeked, &err);
-- 
cgit v1.2.2


From 8c52d509e84bbf26cffb8b6e75b399689af67885 Mon Sep 17 00:00:00 2001
From: Changli Gao <xiaosuo@gmail.com>
Date: Sat, 24 Apr 2010 22:50:10 -0700
Subject: rps: optimize rps_get_cpu()

optimize rps_get_cpu().

don't initialize ports when we can get the ports. one memory access
for ports than two.

Signed-off-by: Changli Gao <xiaosuo@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/dev.c | 24 +++++++++++-------------
 1 file changed, 11 insertions(+), 13 deletions(-)

(limited to 'net')

diff --git a/net/core/dev.c b/net/core/dev.c
index a4a7c36917d1..4d43f1a80f74 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -2229,7 +2229,11 @@ static int get_rps_cpu(struct net_device *dev, struct sk_buff *skb,
 	int cpu = -1;
 	u8 ip_proto;
 	u16 tcpu;
-	u32 addr1, addr2, ports, ihl;
+	u32 addr1, addr2, ihl;
+	union {
+		u32 v32;
+		u16 v16[2];
+	} ports;
 
 	if (skb_rx_queue_recorded(skb)) {
 		u16 index = skb_get_rx_queue(skb);
@@ -2275,7 +2279,6 @@ static int get_rps_cpu(struct net_device *dev, struct sk_buff *skb,
 	default:
 		goto done;
 	}
-	ports = 0;
 	switch (ip_proto) {
 	case IPPROTO_TCP:
 	case IPPROTO_UDP:
@@ -2285,25 +2288,20 @@ static int get_rps_cpu(struct net_device *dev, struct sk_buff *skb,
 	case IPPROTO_SCTP:
 	case IPPROTO_UDPLITE:
 		if (pskb_may_pull(skb, (ihl * 4) + 4)) {
-			__be16 *hports = (__be16 *) (skb->data + (ihl * 4));
-			u32 sport, dport;
-
-			sport = (__force u16) hports[0];
-			dport = (__force u16) hports[1];
-			if (dport < sport)
-				swap(sport, dport);
-			ports = (sport << 16) + dport;
+			ports.v32 = * (__force u32 *) (skb->data + (ihl * 4));
+			if (ports.v16[1] < ports.v16[0])
+				swap(ports.v16[0], ports.v16[1]);
+			break;
 		}
-		break;
-
 	default:
+		ports.v32 = 0;
 		break;
 	}
 
 	/* get a consistent hash (same value on both flow directions) */
 	if (addr2 < addr1)
 		swap(addr1, addr2);
-	skb->rxhash = jhash_3words(addr1, addr2, ports, hashrnd);
+	skb->rxhash = jhash_3words(addr1, addr2, ports.v32, hashrnd);
 	if (!skb->rxhash)
 		skb->rxhash = 1;
 
-- 
cgit v1.2.2


From b3c981d2bbbe889125169bd0bb482e64d3c028a1 Mon Sep 17 00:00:00 2001
From: Jiri Pirko <jpirko@redhat.com>
Date: Sun, 25 Apr 2010 00:49:56 -0700
Subject: netns: rename unregister_pernet_subsys parameter

Stay consistent with other functions and with comment also and name
pernet_operations parameter properly.

Signed-off-by: Jiri Pirko <jpirko@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/net_namespace.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c
index bd8c4712ea24..69a20bfc527c 100644
--- a/net/core/net_namespace.c
+++ b/net/core/net_namespace.c
@@ -469,10 +469,10 @@ EXPORT_SYMBOL_GPL(register_pernet_subsys);
  *	addition run the exit method for all existing network
  *	namespaces.
  */
-void unregister_pernet_subsys(struct pernet_operations *module)
+void unregister_pernet_subsys(struct pernet_operations *ops)
 {
 	mutex_lock(&net_mutex);
-	unregister_pernet_operations(module);
+	unregister_pernet_operations(ops);
 	mutex_unlock(&net_mutex);
 }
 EXPORT_SYMBOL_GPL(unregister_pernet_subsys);
-- 
cgit v1.2.2


From 6443bb1fc2050ca2b6585a3fa77f7833b55329ed Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Sun, 25 Apr 2010 15:09:42 -0700
Subject: ipv6: Fix inet6_csk_bind_conflict()

Commit fda48a0d7a84 (tcp: bind() fix when many ports are bound)
introduced a bug on IPV6 part.
We should not call ipv6_addr_any(inet6_rcv_saddr(sk2)) but
ipv6_addr_any(inet6_rcv_saddr(sk)) because sk2 can be IPV4, while sk is
IPV6.

Reported-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Tested-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/inet6_connection_sock.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/ipv6/inet6_connection_sock.c b/net/ipv6/inet6_connection_sock.c
index b4b7d40a9c95..3a4d92b5a83e 100644
--- a/net/ipv6/inet6_connection_sock.c
+++ b/net/ipv6/inet6_connection_sock.c
@@ -48,7 +48,7 @@ int inet6_csk_bind_conflict(const struct sock *sk,
 			     ipv6_rcv_saddr_equal(sk, sk2))
 				break;
 			else if (sk->sk_reuse && sk2->sk_reuse &&
-				!ipv6_addr_any(inet6_rcv_saddr(sk2)) &&
+				!ipv6_addr_any(inet6_rcv_saddr(sk)) &&
 				ipv6_rcv_saddr_equal(sk, sk2))
 				break;
 		}
-- 
cgit v1.2.2


From 3d0c9c4eb2dbdcc461be4084abd87a9a9e70f713 Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Mon, 26 Apr 2010 16:02:04 +0200
Subject: net: fib_rules: mark arguments to fib_rules_register const and
 __net_initdata

fib_rules_register() duplicates the template passed to it without modification,
mark the argument as const. Additionally the templates are only needed when
instantiating a new namespace, so mark them as __net_initdata, which means
they can be discarded when CONFIG_NET_NS=n.

Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 net/core/fib_rules.c  | 2 +-
 net/decnet/dn_rules.c | 2 +-
 net/ipv4/fib_rules.c  | 2 +-
 net/ipv4/ipmr.c       | 2 +-
 net/ipv6/fib6_rules.c | 2 +-
 5 files changed, 5 insertions(+), 5 deletions(-)

(limited to 'net')

diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c
index 1bc66592453c..42e84e08a1be 100644
--- a/net/core/fib_rules.c
+++ b/net/core/fib_rules.c
@@ -122,7 +122,7 @@ errout:
 }
 
 struct fib_rules_ops *
-fib_rules_register(struct fib_rules_ops *tmpl, struct net *net)
+fib_rules_register(const struct fib_rules_ops *tmpl, struct net *net)
 {
 	struct fib_rules_ops *ops;
 	int err;
diff --git a/net/decnet/dn_rules.c b/net/decnet/dn_rules.c
index af28dcc21844..1226bcad776b 100644
--- a/net/decnet/dn_rules.c
+++ b/net/decnet/dn_rules.c
@@ -216,7 +216,7 @@ static void dn_fib_rule_flush_cache(struct fib_rules_ops *ops)
 	dn_rt_cache_flush(-1);
 }
 
-static struct fib_rules_ops dn_fib_rules_ops_template = {
+static const struct fib_rules_ops __net_initdata dn_fib_rules_ops_template = {
 	.family		= FIB_RULES_DECNET,
 	.rule_size	= sizeof(struct dn_fib_rule),
 	.addr_size	= sizeof(u16),
diff --git a/net/ipv4/fib_rules.c b/net/ipv4/fib_rules.c
index 3ec84fea5b71..8ab62a56701c 100644
--- a/net/ipv4/fib_rules.c
+++ b/net/ipv4/fib_rules.c
@@ -245,7 +245,7 @@ static void fib4_rule_flush_cache(struct fib_rules_ops *ops)
 	rt_cache_flush(ops->fro_net, -1);
 }
 
-static struct fib_rules_ops fib4_rules_ops_template = {
+static const struct fib_rules_ops __net_initdata fib4_rules_ops_template = {
 	.family		= FIB_RULES_IPV4,
 	.rule_size	= sizeof(struct fib4_rule),
 	.addr_size	= sizeof(u32),
diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
index a2df5012a1d0..7d3e382aed64 100644
--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c
@@ -216,7 +216,7 @@ static int ipmr_rule_fill(struct fib_rule *rule, struct sk_buff *skb,
 	return 0;
 }
 
-static struct fib_rules_ops ipmr_rules_ops_template = {
+static const struct fib_rules_ops __net_initdata ipmr_rules_ops_template = {
 	.family		= FIB_RULES_IPMR,
 	.rule_size	= sizeof(struct ipmr_rule),
 	.addr_size	= sizeof(u32),
diff --git a/net/ipv6/fib6_rules.c b/net/ipv6/fib6_rules.c
index 8124f16f2ac2..35f6949446f0 100644
--- a/net/ipv6/fib6_rules.c
+++ b/net/ipv6/fib6_rules.c
@@ -237,7 +237,7 @@ static size_t fib6_rule_nlmsg_payload(struct fib_rule *rule)
 	       + nla_total_size(16); /* src */
 }
 
-static struct fib_rules_ops fib6_rules_ops_template = {
+static const struct fib_rules_ops __net_initdata fib6_rules_ops_template = {
 	.family			= FIB_RULES_IPV6,
 	.rule_size		= sizeof(struct fib6_rule),
 	.addr_size		= sizeof(struct in6_addr),
-- 
cgit v1.2.2


From 25239cee7e8732dbdc9f5d324f1c22a3bdec1d1f Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Mon, 26 Apr 2010 16:02:05 +0200
Subject: net: rtnetlink: decouple rtnetlink address families from real address
 families

Decouple rtnetlink address families from real address families in socket.h to
be able to add rtnetlink interfaces to code that is not a real address family
without increasing AF_MAX/NPROTO.

This will be used to add support for multicast route dumping from all tables
as the proc interface can't be extended to support anything but the main table
without breaking compatibility.

This partialy undoes the patch to introduce independant families for routing
rules and converts ipmr routing rules to a new rtnetlink family. Similar to
that patch, values up to 127 are reserved for real address families, values
above that may be used arbitrarily.

Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 net/core/rtnetlink.c  | 14 +++++++-------
 net/decnet/dn_rules.c |  2 +-
 net/ipv4/fib_rules.c  |  2 +-
 net/ipv4/ipmr.c       |  2 +-
 net/ipv6/fib6_rules.c |  2 +-
 5 files changed, 11 insertions(+), 11 deletions(-)

(limited to 'net')

diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 78c85985cb30..fd781b62fa7f 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -98,7 +98,7 @@ int lockdep_rtnl_is_held(void)
 EXPORT_SYMBOL(lockdep_rtnl_is_held);
 #endif /* #ifdef CONFIG_PROVE_LOCKING */
 
-static struct rtnl_link *rtnl_msg_handlers[NPROTO];
+static struct rtnl_link *rtnl_msg_handlers[RTNL_FAMILY_MAX + 1];
 
 static inline int rtm_msgindex(int msgtype)
 {
@@ -118,7 +118,7 @@ static rtnl_doit_func rtnl_get_doit(int protocol, int msgindex)
 {
 	struct rtnl_link *tab;
 
-	if (protocol < NPROTO)
+	if (protocol <= RTNL_FAMILY_MAX)
 		tab = rtnl_msg_handlers[protocol];
 	else
 		tab = NULL;
@@ -133,7 +133,7 @@ static rtnl_dumpit_func rtnl_get_dumpit(int protocol, int msgindex)
 {
 	struct rtnl_link *tab;
 
-	if (protocol < NPROTO)
+	if (protocol <= RTNL_FAMILY_MAX)
 		tab = rtnl_msg_handlers[protocol];
 	else
 		tab = NULL;
@@ -167,7 +167,7 @@ int __rtnl_register(int protocol, int msgtype,
 	struct rtnl_link *tab;
 	int msgindex;
 
-	BUG_ON(protocol < 0 || protocol >= NPROTO);
+	BUG_ON(protocol < 0 || protocol > RTNL_FAMILY_MAX);
 	msgindex = rtm_msgindex(msgtype);
 
 	tab = rtnl_msg_handlers[protocol];
@@ -219,7 +219,7 @@ int rtnl_unregister(int protocol, int msgtype)
 {
 	int msgindex;
 
-	BUG_ON(protocol < 0 || protocol >= NPROTO);
+	BUG_ON(protocol < 0 || protocol > RTNL_FAMILY_MAX);
 	msgindex = rtm_msgindex(msgtype);
 
 	if (rtnl_msg_handlers[protocol] == NULL)
@@ -241,7 +241,7 @@ EXPORT_SYMBOL_GPL(rtnl_unregister);
  */
 void rtnl_unregister_all(int protocol)
 {
-	BUG_ON(protocol < 0 || protocol >= NPROTO);
+	BUG_ON(protocol < 0 || protocol > RTNL_FAMILY_MAX);
 
 	kfree(rtnl_msg_handlers[protocol]);
 	rtnl_msg_handlers[protocol] = NULL;
@@ -1384,7 +1384,7 @@ static int rtnl_dump_all(struct sk_buff *skb, struct netlink_callback *cb)
 
 	if (s_idx == 0)
 		s_idx = 1;
-	for (idx = 1; idx < NPROTO; idx++) {
+	for (idx = 1; idx <= RTNL_FAMILY_MAX; idx++) {
 		int type = cb->nlh->nlmsg_type-RTM_BASE;
 		if (idx < s_idx || idx == PF_PACKET)
 			continue;
diff --git a/net/decnet/dn_rules.c b/net/decnet/dn_rules.c
index 1226bcad776b..48fdf10be7a1 100644
--- a/net/decnet/dn_rules.c
+++ b/net/decnet/dn_rules.c
@@ -217,7 +217,7 @@ static void dn_fib_rule_flush_cache(struct fib_rules_ops *ops)
 }
 
 static const struct fib_rules_ops __net_initdata dn_fib_rules_ops_template = {
-	.family		= FIB_RULES_DECNET,
+	.family		= AF_DECnet,
 	.rule_size	= sizeof(struct dn_fib_rule),
 	.addr_size	= sizeof(u16),
 	.action		= dn_fib_rule_action,
diff --git a/net/ipv4/fib_rules.c b/net/ipv4/fib_rules.c
index 8ab62a56701c..76daeb5ff564 100644
--- a/net/ipv4/fib_rules.c
+++ b/net/ipv4/fib_rules.c
@@ -246,7 +246,7 @@ static void fib4_rule_flush_cache(struct fib_rules_ops *ops)
 }
 
 static const struct fib_rules_ops __net_initdata fib4_rules_ops_template = {
-	.family		= FIB_RULES_IPV4,
+	.family		= AF_INET,
 	.rule_size	= sizeof(struct fib4_rule),
 	.addr_size	= sizeof(u32),
 	.action		= fib4_rule_action,
diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
index 7d3e382aed64..41e8fc0ce8b3 100644
--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c
@@ -217,7 +217,7 @@ static int ipmr_rule_fill(struct fib_rule *rule, struct sk_buff *skb,
 }
 
 static const struct fib_rules_ops __net_initdata ipmr_rules_ops_template = {
-	.family		= FIB_RULES_IPMR,
+	.family		= RTNL_FAMILY_IPMR,
 	.rule_size	= sizeof(struct ipmr_rule),
 	.addr_size	= sizeof(u32),
 	.action		= ipmr_rule_action,
diff --git a/net/ipv6/fib6_rules.c b/net/ipv6/fib6_rules.c
index 35f6949446f0..8e44f8f9c188 100644
--- a/net/ipv6/fib6_rules.c
+++ b/net/ipv6/fib6_rules.c
@@ -238,7 +238,7 @@ static size_t fib6_rule_nlmsg_payload(struct fib_rule *rule)
 }
 
 static const struct fib_rules_ops __net_initdata fib6_rules_ops_template = {
-	.family			= FIB_RULES_IPV6,
+	.family			= AF_INET6,
 	.rule_size		= sizeof(struct fib6_rule),
 	.addr_size		= sizeof(struct in6_addr),
 	.action			= fib6_rule_action,
-- 
cgit v1.2.2


From cb6a4e461fb427689920472bd7335f926d521747 Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Mon, 26 Apr 2010 16:02:08 +0200
Subject: net: ipmr: add support for dumping routing tables over netlink

The ipmr /proc interface (ip_mr_cache) can't be extended to dump routes
from any tables but the main table in a backwards compatible fashion since
the output format ends in a variable amount of output interfaces.

Introduce a new netlink interface to dump multicast routes from all tables,
similar to the netlink interface for regular routes.

Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 net/ipv4/ipmr.c | 96 ++++++++++++++++++++++++++++++++++++++++++++++++++++-----
 1 file changed, 89 insertions(+), 7 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
index 41e8fc0ce8b3..eddfd12f55b8 100644
--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c
@@ -128,8 +128,8 @@ static int ip_mr_forward(struct net *net, struct mr_table *mrt,
 			 int local);
 static int ipmr_cache_report(struct mr_table *mrt,
 			     struct sk_buff *pkt, vifi_t vifi, int assert);
-static int ipmr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb,
-			    struct mfc_cache *c, struct rtmsg *rtm);
+static int __ipmr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb,
+			      struct mfc_cache *c, struct rtmsg *rtm);
 static void ipmr_expire_process(unsigned long arg);
 
 #ifdef CONFIG_IP_MROUTE_MULTIPLE_TABLES
@@ -831,7 +831,7 @@ static void ipmr_cache_resolve(struct net *net, struct mr_table *mrt,
 		if (ip_hdr(skb)->version == 0) {
 			struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct iphdr));
 
-			if (ipmr_fill_mroute(mrt, skb, c, NLMSG_DATA(nlh)) > 0) {
+			if (__ipmr_fill_mroute(mrt, skb, c, NLMSG_DATA(nlh)) > 0) {
 				nlh->nlmsg_len = (skb_tail_pointer(skb) -
 						  (u8 *)nlh);
 			} else {
@@ -1904,9 +1904,8 @@ drop:
 }
 #endif
 
-static int
-ipmr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb, struct mfc_cache *c,
-		 struct rtmsg *rtm)
+static int __ipmr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb,
+			      struct mfc_cache *c, struct rtmsg *rtm)
 {
 	int ct;
 	struct rtnexthop *nhp;
@@ -1994,11 +1993,93 @@ int ipmr_get_route(struct net *net,
 
 	if (!nowait && (rtm->rtm_flags&RTM_F_NOTIFY))
 		cache->mfc_flags |= MFC_NOTIFY;
-	err = ipmr_fill_mroute(mrt, skb, cache, rtm);
+	err = __ipmr_fill_mroute(mrt, skb, cache, rtm);
 	read_unlock(&mrt_lock);
 	return err;
 }
 
+static int ipmr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb,
+			    u32 pid, u32 seq, struct mfc_cache *c)
+{
+	struct nlmsghdr *nlh;
+	struct rtmsg *rtm;
+
+	nlh = nlmsg_put(skb, pid, seq, RTM_NEWROUTE, sizeof(*rtm), NLM_F_MULTI);
+	if (nlh == NULL)
+		return -EMSGSIZE;
+
+	rtm = nlmsg_data(nlh);
+	rtm->rtm_family   = RTNL_FAMILY_IPMR;
+	rtm->rtm_dst_len  = 32;
+	rtm->rtm_src_len  = 32;
+	rtm->rtm_tos      = 0;
+	rtm->rtm_table    = mrt->id;
+	NLA_PUT_U32(skb, RTA_TABLE, mrt->id);
+	rtm->rtm_type     = RTN_MULTICAST;
+	rtm->rtm_scope    = RT_SCOPE_UNIVERSE;
+	rtm->rtm_protocol = RTPROT_UNSPEC;
+	rtm->rtm_flags    = 0;
+
+	NLA_PUT_BE32(skb, RTA_SRC, c->mfc_origin);
+	NLA_PUT_BE32(skb, RTA_DST, c->mfc_mcastgrp);
+
+	if (__ipmr_fill_mroute(mrt, skb, c, rtm) < 0)
+		goto nla_put_failure;
+
+	return nlmsg_end(skb, nlh);
+
+nla_put_failure:
+	nlmsg_cancel(skb, nlh);
+	return -EMSGSIZE;
+}
+
+static int ipmr_rtm_dumproute(struct sk_buff *skb, struct netlink_callback *cb)
+{
+	struct net *net = sock_net(skb->sk);
+	struct mr_table *mrt;
+	struct mfc_cache *mfc;
+	unsigned int t = 0, s_t;
+	unsigned int h = 0, s_h;
+	unsigned int e = 0, s_e;
+
+	s_t = cb->args[0];
+	s_h = cb->args[1];
+	s_e = cb->args[2];
+
+	read_lock(&mrt_lock);
+	ipmr_for_each_table(mrt, net) {
+		if (t < s_t)
+			goto next_table;
+		if (t > s_t)
+			s_h = 0;
+		for (h = s_h; h < MFC_LINES; h++) {
+			list_for_each_entry(mfc, &mrt->mfc_cache_array[h], list) {
+				if (e < s_e)
+					goto next_entry;
+				if (ipmr_fill_mroute(mrt, skb,
+						     NETLINK_CB(cb->skb).pid,
+						     cb->nlh->nlmsg_seq,
+						     mfc) < 0)
+					goto done;
+next_entry:
+				e++;
+			}
+			e = s_e = 0;
+		}
+		s_h = 0;
+next_table:
+		t++;
+	}
+done:
+	read_unlock(&mrt_lock);
+
+	cb->args[2] = e;
+	cb->args[1] = h;
+	cb->args[0] = t;
+
+	return skb->len;
+}
+
 #ifdef CONFIG_PROC_FS
 /*
  *	The /proc interfaces to multicast routing /proc/ip_mr_cache /proc/ip_mr_vif
@@ -2355,6 +2436,7 @@ int __init ip_mr_init(void)
 		goto add_proto_fail;
 	}
 #endif
+	rtnl_register(RTNL_FAMILY_IPMR, RTM_GETROUTE, NULL, ipmr_rtm_dumproute);
 	return 0;
 
 #ifdef CONFIG_IP_PIMSM_V2
-- 
cgit v1.2.2


From 0c86980817853e4166f66c7cd18bc5fe1adeb5f7 Mon Sep 17 00:00:00 2001
From: Juuso Oikarinen <juuso.oikarinen@nokia.com>
Date: Thu, 22 Apr 2010 10:27:48 +0300
Subject: mac80211: Fix sta->last_tx_rate setting with no-op rate control
 devices

The sta->last_tx_rate is traditionally updated just before transmitting a
frame based on information from the rate control algorithm. However, for
hardware drivers with IEEE80211_HW_HAS_RATE_CONTROL this is not performed,
as the rate control algorithm is not executed, and because the used rate is
not known before the frame has actually been transmitted.

This causes atleast a fixed 1Mb/s to be reported to user space. A few other
instances of code also rely on this information.

Fix this by setting the sta->last_tx_rate in tx_status handling. There, look
for last rates entry set by the driver, and use that as value for
sta->last_tx_rate.

Signed-off-by: Juuso Oikarinen <juuso.oikarinen@nokia.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/status.c | 7 +++++++
 1 file changed, 7 insertions(+)

(limited to 'net')

diff --git a/net/mac80211/status.c b/net/mac80211/status.c
index 11805a3a626f..94613af009f3 100644
--- a/net/mac80211/status.c
+++ b/net/mac80211/status.c
@@ -171,6 +171,7 @@ void ieee80211_tx_status(struct ieee80211_hw *hw, struct sk_buff *skb)
 	struct net_device *prev_dev = NULL;
 	struct sta_info *sta, *tmp;
 	int retry_count = -1, i;
+	int rates_idx = -1;
 	bool send_to_cooked;
 
 	for (i = 0; i < IEEE80211_TX_MAX_RATES; i++) {
@@ -178,6 +179,8 @@ void ieee80211_tx_status(struct ieee80211_hw *hw, struct sk_buff *skb)
 		if (i >= hw->max_rates) {
 			info->status.rates[i].idx = -1;
 			info->status.rates[i].count = 0;
+		} else if (info->status.rates[i].idx >= 0) {
+			rates_idx = i;
 		}
 
 		retry_count += info->status.rates[i].count;
@@ -206,6 +209,10 @@ void ieee80211_tx_status(struct ieee80211_hw *hw, struct sk_buff *skb)
 			return;
 		}
 
+		if ((local->hw.flags & IEEE80211_HW_HAS_RATE_CONTROL) &&
+		    (rates_idx != -1))
+			sta->last_tx_rate = info->status.rates[rates_idx];
+
 		if ((info->flags & IEEE80211_TX_STAT_AMPDU_NO_BACK) &&
 		    (ieee80211_is_data_qos(fc))) {
 			u16 tid, ssn;
-- 
cgit v1.2.2


From 0db3f0f49a99db33a411af1c4352839c0296eff3 Mon Sep 17 00:00:00 2001
From: Jiri Pirko <jpirko@redhat.com>
Date: Mon, 26 Apr 2010 03:41:00 +0000
Subject: phonet: use phonet_pernet instead of directly net_generic

As in for example pppoe introduce phonet_pernet and use it instead of calling
net_generic directly.

Signed-off-by: Jiri Pirko <jpirko@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/phonet/pn_dev.c | 23 +++++++++++++++--------
 1 file changed, 15 insertions(+), 8 deletions(-)

(limited to 'net')

diff --git a/net/phonet/pn_dev.c b/net/phonet/pn_dev.c
index 9b4ced6e0968..c33da6576942 100644
--- a/net/phonet/pn_dev.c
+++ b/net/phonet/pn_dev.c
@@ -46,9 +46,16 @@ struct phonet_net {
 
 int phonet_net_id __read_mostly;
 
+static struct phonet_net *phonet_pernet(struct net *net)
+{
+	BUG_ON(!net);
+
+	return net_generic(net, phonet_net_id);
+}
+
 struct phonet_device_list *phonet_device_list(struct net *net)
 {
-	struct phonet_net *pnn = net_generic(net, phonet_net_id);
+	struct phonet_net *pnn = phonet_pernet(net);
 	return &pnn->pndevs;
 }
 
@@ -261,7 +268,7 @@ static int phonet_device_autoconf(struct net_device *dev)
 
 static void phonet_route_autodel(struct net_device *dev)
 {
-	struct phonet_net *pnn = net_generic(dev_net(dev), phonet_net_id);
+	struct phonet_net *pnn = phonet_pernet(dev_net(dev));
 	unsigned i;
 	DECLARE_BITMAP(deleted, 64);
 
@@ -313,7 +320,7 @@ static struct notifier_block phonet_device_notifier = {
 /* Per-namespace Phonet devices handling */
 static int __net_init phonet_init_net(struct net *net)
 {
-	struct phonet_net *pnn = net_generic(net, phonet_net_id);
+	struct phonet_net *pnn = phonet_pernet(net);
 
 	if (!proc_net_fops_create(net, "phonet", 0, &pn_sock_seq_fops))
 		return -ENOMEM;
@@ -326,7 +333,7 @@ static int __net_init phonet_init_net(struct net *net)
 
 static void __net_exit phonet_exit_net(struct net *net)
 {
-	struct phonet_net *pnn = net_generic(net, phonet_net_id);
+	struct phonet_net *pnn = phonet_pernet(net);
 	struct net_device *dev;
 	unsigned i;
 
@@ -376,7 +383,7 @@ void phonet_device_exit(void)
 
 int phonet_route_add(struct net_device *dev, u8 daddr)
 {
-	struct phonet_net *pnn = net_generic(dev_net(dev), phonet_net_id);
+	struct phonet_net *pnn = phonet_pernet(dev_net(dev));
 	struct phonet_routes *routes = &pnn->routes;
 	int err = -EEXIST;
 
@@ -393,7 +400,7 @@ int phonet_route_add(struct net_device *dev, u8 daddr)
 
 int phonet_route_del(struct net_device *dev, u8 daddr)
 {
-	struct phonet_net *pnn = net_generic(dev_net(dev), phonet_net_id);
+	struct phonet_net *pnn = phonet_pernet(dev_net(dev));
 	struct phonet_routes *routes = &pnn->routes;
 
 	daddr = daddr >> 2;
@@ -413,7 +420,7 @@ int phonet_route_del(struct net_device *dev, u8 daddr)
 
 struct net_device *phonet_route_get(struct net *net, u8 daddr)
 {
-	struct phonet_net *pnn = net_generic(net, phonet_net_id);
+	struct phonet_net *pnn = phonet_pernet(net);
 	struct phonet_routes *routes = &pnn->routes;
 	struct net_device *dev;
 
@@ -428,7 +435,7 @@ struct net_device *phonet_route_get(struct net *net, u8 daddr)
 
 struct net_device *phonet_route_output(struct net *net, u8 daddr)
 {
-	struct phonet_net *pnn = net_generic(net, phonet_net_id);
+	struct phonet_net *pnn = phonet_pernet(net);
 	struct phonet_routes *routes = &pnn->routes;
 	struct net_device *dev;
 
-- 
cgit v1.2.2


From 4a4771a58e13b46bfdc999fe481e550f8c6937ff Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Sun, 25 Apr 2010 22:20:06 +0000
Subject: net: use sk_sleep()

Commit aa395145 (net: sk_sleep() helper) missed three files in the
conversion.

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/caif/caif_socket.c | 30 +++++++++++++++---------------
 net/rxrpc/ar-recvmsg.c |  6 +++---
 2 files changed, 18 insertions(+), 18 deletions(-)

(limited to 'net')

diff --git a/net/caif/caif_socket.c b/net/caif/caif_socket.c
index 90317e7d10b4..d455375789fb 100644
--- a/net/caif/caif_socket.c
+++ b/net/caif/caif_socket.c
@@ -169,7 +169,7 @@ static int caif_sktrecv_cb(struct cflayer *layr, struct cfpkt *pkt)
 
 	/* Signal reader that data is available. */
 
-	wake_up_interruptible(cf_sk->sk.sk_sleep);
+	wake_up_interruptible(sk_sleep(&cf_sk->sk));
 
 	return 0;
 }
@@ -203,7 +203,7 @@ static void caif_sktflowctrl_cb(struct cflayer *layr,
 		dbfs_atomic_inc(&cnt.num_tx_flow_on_ind);
 		/* Signal reader that data is available. */
 		SET_TX_FLOW_ON(cf_sk);
-		wake_up_interruptible(cf_sk->sk.sk_sleep);
+		wake_up_interruptible(sk_sleep(&cf_sk->sk));
 		break;
 
 	case CAIF_CTRLCMD_FLOW_OFF_IND:
@@ -217,7 +217,7 @@ static void caif_sktflowctrl_cb(struct cflayer *layr,
 		caif_assert(STATE_IS_OPEN(cf_sk));
 		SET_PENDING_OFF(cf_sk);
 		SET_TX_FLOW_ON(cf_sk);
-		wake_up_interruptible(cf_sk->sk.sk_sleep);
+		wake_up_interruptible(sk_sleep(&cf_sk->sk));
 		break;
 
 	case CAIF_CTRLCMD_DEINIT_RSP:
@@ -225,8 +225,8 @@ static void caif_sktflowctrl_cb(struct cflayer *layr,
 		caif_assert(!STATE_IS_OPEN(cf_sk));
 		SET_PENDING_OFF(cf_sk);
 		if (!STATE_IS_PENDING_DESTROY(cf_sk)) {
-			if (cf_sk->sk.sk_sleep != NULL)
-				wake_up_interruptible(cf_sk->sk.sk_sleep);
+			if (sk_sleep(&cf_sk->sk) != NULL)
+				wake_up_interruptible(sk_sleep(&cf_sk->sk));
 		}
 		dbfs_atomic_inc(&cnt.num_deinit);
 		sock_put(&cf_sk->sk);
@@ -238,7 +238,7 @@ static void caif_sktflowctrl_cb(struct cflayer *layr,
 		SET_STATE_CLOSED(cf_sk);
 		SET_PENDING_OFF(cf_sk);
 		SET_TX_FLOW_OFF(cf_sk);
-		wake_up_interruptible(cf_sk->sk.sk_sleep);
+		wake_up_interruptible(sk_sleep(&cf_sk->sk));
 		break;
 
 	case CAIF_CTRLCMD_REMOTE_SHUTDOWN_IND:
@@ -247,7 +247,7 @@ static void caif_sktflowctrl_cb(struct cflayer *layr,
 		/* Use sk_shutdown to indicate remote shutdown indication */
 		cf_sk->sk.sk_shutdown |= RCV_SHUTDOWN;
 		cf_sk->file_mode = 0;
-		wake_up_interruptible(cf_sk->sk.sk_sleep);
+		wake_up_interruptible(sk_sleep(&cf_sk->sk));
 		break;
 
 	default:
@@ -325,7 +325,7 @@ static int caif_recvmsg(struct kiocb *iocb, struct socket *sock,
 		release_sock(&cf_sk->sk);
 
 		result =
-		    wait_event_interruptible(*cf_sk->sk.sk_sleep,
+		    wait_event_interruptible(*sk_sleep(&cf_sk->sk),
 					     !STATE_IS_PENDING(cf_sk));
 
 		lock_sock(&(cf_sk->sk));
@@ -365,7 +365,7 @@ static int caif_recvmsg(struct kiocb *iocb, struct socket *sock,
 		release_sock(&cf_sk->sk);
 
 		/* Block reader until data arrives or socket is closed. */
-		if (wait_event_interruptible(*cf_sk->sk.sk_sleep,
+		if (wait_event_interruptible(*sk_sleep(&cf_sk->sk),
 					cfpkt_qpeek(cf_sk->pktq)
 					|| STATE_IS_REMOTE_SHUTDOWN(cf_sk)
 					|| !STATE_IS_OPEN(cf_sk)) ==
@@ -537,7 +537,7 @@ static int caif_sendmsg(struct kiocb *kiocb, struct socket *sock,
 		 * for its conclusion.
 		 */
 		result =
-		    wait_event_interruptible(*cf_sk->sk.sk_sleep,
+		    wait_event_interruptible(*sk_sleep(&cf_sk->sk),
 					     !STATE_IS_PENDING(cf_sk));
 		/* I want to be alone on cf_sk (except status and queue) */
 		lock_sock(&(cf_sk->sk));
@@ -573,7 +573,7 @@ static int caif_sendmsg(struct kiocb *kiocb, struct socket *sock,
 		release_sock(&cf_sk->sk);
 
 		/* Wait until flow is on or socket is closed */
-		if (wait_event_interruptible(*cf_sk->sk.sk_sleep,
+		if (wait_event_interruptible(*sk_sleep(&cf_sk->sk),
 					TX_FLOW_IS_ON(cf_sk)
 					|| !STATE_IS_OPEN(cf_sk)
 					|| STATE_IS_REMOTE_SHUTDOWN(cf_sk)
@@ -650,7 +650,7 @@ static int caif_sendmsg(struct kiocb *kiocb, struct socket *sock,
 		release_sock(&cf_sk->sk);
 
 		/* Wait until flow is on or socket is closed */
-		if (wait_event_interruptible(*cf_sk->sk.sk_sleep,
+		if (wait_event_interruptible(*sk_sleep(&cf_sk->sk),
 					TX_FLOW_IS_ON(cf_sk)
 					|| !STATE_IS_OPEN(cf_sk)
 					|| STATE_IS_REMOTE_SHUTDOWN(cf_sk)
@@ -898,7 +898,7 @@ static int caif_connect(struct socket *sock, struct sockaddr *uservaddr,
 			 * for its conclusion.
 			 */
 			result =
-			    wait_event_interruptible(*cf_sk->sk.sk_sleep,
+			    wait_event_interruptible(*sk_sleep(&cf_sk->sk),
 						     !STATE_IS_PENDING(cf_sk));
 
 			lock_sock(&(cf_sk->sk));
@@ -965,7 +965,7 @@ static int caif_connect(struct socket *sock, struct sockaddr *uservaddr,
 		release_sock(&cf_sk->sk);
 
 		result =
-		    wait_event_interruptible(*cf_sk->sk.sk_sleep,
+		    wait_event_interruptible(*sk_sleep(&cf_sk->sk),
 					     !STATE_IS_PENDING(cf_sk));
 
 		lock_sock(&(cf_sk->sk));
@@ -1107,7 +1107,7 @@ static int caif_release(struct socket *sock)
 	 * CAIF stack.
 	 */
 	if (!(sock->file->f_flags & O_NONBLOCK)) {
-		res = wait_event_interruptible(*cf_sk->sk.sk_sleep,
+		res = wait_event_interruptible(*sk_sleep(&cf_sk->sk),
 						!STATE_IS_PENDING(cf_sk));
 
 		if (res == -ERESTARTSYS) {
diff --git a/net/rxrpc/ar-recvmsg.c b/net/rxrpc/ar-recvmsg.c
index 60c2b94e6b54..0c65013e3bfe 100644
--- a/net/rxrpc/ar-recvmsg.c
+++ b/net/rxrpc/ar-recvmsg.c
@@ -91,7 +91,7 @@ int rxrpc_recvmsg(struct kiocb *iocb, struct socket *sock,
 
 			/* wait for a message to turn up */
 			release_sock(&rx->sk);
-			prepare_to_wait_exclusive(rx->sk.sk_sleep, &wait,
+			prepare_to_wait_exclusive(sk_sleep(&rx->sk), &wait,
 						  TASK_INTERRUPTIBLE);
 			ret = sock_error(&rx->sk);
 			if (ret)
@@ -102,7 +102,7 @@ int rxrpc_recvmsg(struct kiocb *iocb, struct socket *sock,
 					goto wait_interrupted;
 				timeo = schedule_timeout(timeo);
 			}
-			finish_wait(rx->sk.sk_sleep, &wait);
+			finish_wait(sk_sleep(&rx->sk), &wait);
 			lock_sock(&rx->sk);
 			continue;
 		}
@@ -356,7 +356,7 @@ csum_copy_error:
 wait_interrupted:
 	ret = sock_intr_errno(timeo);
 wait_error:
-	finish_wait(rx->sk.sk_sleep, &wait);
+	finish_wait(sk_sleep(&rx->sk), &wait);
 	if (continue_call)
 		rxrpc_put_call(continue_call);
 	if (copied)
-- 
cgit v1.2.2


From 93c0c8b4a5a174645550d444bd5c3ff0cccf74cb Mon Sep 17 00:00:00 2001
From: Stefan Schmidt <stefan@datenfreihafen.org>
Date: Mon, 26 Apr 2010 11:20:32 -0700
Subject: ieee802154: Fix oops during ieee802154_sock_ioctl

Trying to run izlisten (from lowpan-tools tests) on a device that does not
exists I got the oops below. The problem is that we are using get_dev_by_name
without checking if we really get a device back. We don't in this case and
writing to dev->type generates this oops.

[Oops code removed by Dmitry Eremin-Solenikov]

If possible this patch should be applied to the current -rc fixes branch.

Signed-off-by: Stefan Schmidt <stefan@datenfreihafen.org>
Signed-off-by: Dmitry Eremin-Solenikov <dbaryshkov@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ieee802154/af_ieee802154.c | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'net')

diff --git a/net/ieee802154/af_ieee802154.c b/net/ieee802154/af_ieee802154.c
index c7da600750bb..93c91b633a56 100644
--- a/net/ieee802154/af_ieee802154.c
+++ b/net/ieee802154/af_ieee802154.c
@@ -151,6 +151,9 @@ static int ieee802154_dev_ioctl(struct sock *sk, struct ifreq __user *arg,
 	dev_load(sock_net(sk), ifr.ifr_name);
 	dev = dev_get_by_name(sock_net(sk), ifr.ifr_name);
 
+	if (!dev)
+		return -ENODEV;
+
 	if (dev->type == ARPHRD_IEEE802154 && dev->netdev_ops->ndo_do_ioctl)
 		ret = dev->netdev_ops->ndo_do_ioctl(dev, &ifr, cmd);
 
-- 
cgit v1.2.2


From 4eb8b9031a0314539605733597b1e30222d4da70 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?YOSHIFUJI=20Hideaki=20/=20=E5=90=89=E8=97=A4=E8=8B=B1?=
 =?UTF-8?q?=E6=98=8E?= <yoshfuji@linux-ipv6.org>
Date: Sun, 25 Apr 2010 08:59:07 +0000
Subject: bridge br_multicast: Ensure to initialize
 BR_INPUT_SKB_CB(skb)->mrouters_only.

Even with commit 32dec5dd0233ebffa9cae25ce7ba6daeb7df4467 ("bridge
br_multicast: Don't refer to BR_INPUT_SKB_CB(skb)->mrouters_only
without IGMP snooping."), BR_INPUT_SKB_CB(skb)->mrouters_only is
not appropriately initialized if IGMP snooping support is
compiled and disabled, so we can see garbage.

Signed-off-by: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/bridge/br_multicast.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'net')

diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c
index 386c15369d91..eaa0e1bae49b 100644
--- a/net/bridge/br_multicast.c
+++ b/net/bridge/br_multicast.c
@@ -957,9 +957,6 @@ static int br_multicast_ipv4_rcv(struct net_bridge *br,
 	unsigned offset;
 	int err;
 
-	BR_INPUT_SKB_CB(skb)->igmp = 0;
-	BR_INPUT_SKB_CB(skb)->mrouters_only = 0;
-
 	/* We treat OOM as packet loss for now. */
 	if (!pskb_may_pull(skb, sizeof(*iph)))
 		return -EINVAL;
@@ -1049,6 +1046,9 @@ err_out:
 int br_multicast_rcv(struct net_bridge *br, struct net_bridge_port *port,
 		     struct sk_buff *skb)
 {
+	BR_INPUT_SKB_CB(skb)->igmp = 0;
+	BR_INPUT_SKB_CB(skb)->mrouters_only = 0;
+
 	if (br->multicast_disabled)
 		return 0;
 
-- 
cgit v1.2.2


From 1fafc7a9353ef68e1b8d4bb130cb6402cf7dfd5a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?YOSHIFUJI=20Hideaki=20/=20=E5=90=89=E8=97=A4=E8=8B=B1?=
 =?UTF-8?q?=E6=98=8E?= <yoshfuji@linux-ipv6.org>
Date: Sun, 25 Apr 2010 08:06:40 +0000
Subject: bridge br_multicast: Ensure to initialize
 BR_INPUT_SKB_CB(skb)->mrouters_only.

Even with commit 32dec5dd0233ebffa9cae25ce7ba6daeb7df4467 ("bridge
br_multicast: Don't refer to BR_INPUT_SKB_CB(skb)->mrouters_only
without IGMP snooping."), BR_INPUT_SKB_CB(skb)->mrouters_only is
not appropriately initialized if IGMP/MLD snooping support is
compiled and disabled, so we can see garbage.

Signed-off-by: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/bridge/br_multicast.c | 9 +++------
 1 file changed, 3 insertions(+), 6 deletions(-)

(limited to 'net')

diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c
index 38d1fbde5fb8..e481dbd19495 100644
--- a/net/bridge/br_multicast.c
+++ b/net/bridge/br_multicast.c
@@ -1340,9 +1340,6 @@ static int br_multicast_ipv4_rcv(struct net_bridge *br,
 	unsigned offset;
 	int err;
 
-	BR_INPUT_SKB_CB(skb)->igmp = 0;
-	BR_INPUT_SKB_CB(skb)->mrouters_only = 0;
-
 	/* We treat OOM as packet loss for now. */
 	if (!pskb_may_pull(skb, sizeof(*iph)))
 		return -EINVAL;
@@ -1440,9 +1437,6 @@ static int br_multicast_ipv6_rcv(struct net_bridge *br,
 	unsigned offset;
 	int err;
 
-	BR_INPUT_SKB_CB(skb)->igmp = 0;
-	BR_INPUT_SKB_CB(skb)->mrouters_only = 0;
-
 	if (!pskb_may_pull(skb, sizeof(*ip6h)))
 		return -EINVAL;
 
@@ -1550,6 +1544,9 @@ out:
 int br_multicast_rcv(struct net_bridge *br, struct net_bridge_port *port,
 		     struct sk_buff *skb)
 {
+	BR_INPUT_SKB_CB(skb)->igmp = 0;
+	BR_INPUT_SKB_CB(skb)->mrouters_only = 0;
+
 	if (br->multicast_disabled)
 		return 0;
 
-- 
cgit v1.2.2


From d4c4f07df16c767b8efbc44e7cdf795fac326b33 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Tue, 27 Apr 2010 10:16:54 -0700
Subject: bridge: Fix build of ipv6 multicast code.

Based upon a report from Stephen Rothwell:

--------------------
net/bridge/br_multicast.c: In function 'br_ip6_multicast_alloc_query':
net/bridge/br_multicast.c:469: error: implicit declaration of function 'csum_ipv6_magic'

Introduced by commit 08b202b6726459626c73ecfa08fcdc8c3efc76c2 ("bridge
br_multicast: IPv6 MLD support") from the net tree.

csum_ipv6_magic is declared in net/ip6_checksum.h ...
--------------------

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/bridge/br_multicast.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'net')

diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c
index e481dbd19495..2048ef0f9be5 100644
--- a/net/bridge/br_multicast.c
+++ b/net/bridge/br_multicast.c
@@ -28,6 +28,7 @@
 #include <net/ipv6.h>
 #include <net/mld.h>
 #include <net/addrconf.h>
+#include <net/ip6_checksum.h>
 #endif
 
 #include "br_private.h"
-- 
cgit v1.2.2


From 7180f7751d5a1b6f1ef40285b5e928970cdd5306 Mon Sep 17 00:00:00 2001
From: stephen hemminger <shemminger@vyatta.com>
Date: Tue, 27 Apr 2010 07:13:06 +0000
Subject: bridge: use is_multicast_ether_addr

Use existing inline function.

Signed-off-by: Stephen Hemminger <shemminger@vyatta.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/bridge/br_device.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c
index 5b8a6e73b02f..82599405dc15 100644
--- a/net/bridge/br_device.c
+++ b/net/bridge/br_device.c
@@ -36,7 +36,7 @@ netdev_tx_t br_dev_xmit(struct sk_buff *skb, struct net_device *dev)
 	skb_reset_mac_header(skb);
 	skb_pull(skb, ETH_HLEN);
 
-	if (dest[0] & 1) {
+	if (is_multicast_ether_addr(dest)) {
 		if (br_multicast_rcv(br, NULL, skb))
 			goto out;
 
-- 
cgit v1.2.2


From dcdca2c49bb6328bbc7cd8d73434c308b5dd0df2 Mon Sep 17 00:00:00 2001
From: stephen hemminger <shemminger@vyatta.com>
Date: Tue, 27 Apr 2010 07:13:11 +0000
Subject: bridge: multicast router list manipulation

I prefer that the hlist be only accessed through the hlist macro
objects. Explicit twiddling of links (especially with RCU) exposes
the code to future bugs.

Compile tested only.

Signed-off-by: Stephen Hemminger <shemminger@vyatta.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/bridge/br_multicast.c | 30 +++++++++++++++---------------
 1 file changed, 15 insertions(+), 15 deletions(-)

(limited to 'net')

diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c
index 2048ef0f9be5..fcba313f1894 100644
--- a/net/bridge/br_multicast.c
+++ b/net/bridge/br_multicast.c
@@ -1042,21 +1042,21 @@ static int br_ip6_multicast_mld2_report(struct net_bridge *br,
 static void br_multicast_add_router(struct net_bridge *br,
 				    struct net_bridge_port *port)
 {
-	struct hlist_node *p;
-	struct hlist_node **h;
-
-	for (h = &br->router_list.first;
-	     (p = *h) &&
-	     (unsigned long)container_of(p, struct net_bridge_port, rlist) >
-	     (unsigned long)port;
-	     h = &p->next)
-		;
-
-	port->rlist.pprev = h;
-	port->rlist.next = p;
-	rcu_assign_pointer(*h, &port->rlist);
-	if (p)
-		p->pprev = &port->rlist.next;
+	struct net_bridge_port *p;
+	struct hlist_node *n, *last = NULL;
+
+	hlist_for_each_entry(p, n, &br->router_list, rlist) {
+		if ((unsigned long) port >= (unsigned long) p) {
+			hlist_add_before_rcu(n, &port->rlist);
+			return;
+		}
+		last = n;
+	}
+
+	if (last)
+		hlist_add_after_rcu(last, &port->rlist);
+	else
+		hlist_add_head_rcu(&port->rlist, &br->router_list);
 }
 
 static void br_multicast_mark_router(struct net_bridge *br,
-- 
cgit v1.2.2


From 6c37e5de456987f5bc80879afde05aa120784095 Mon Sep 17 00:00:00 2001
From: Flavio Leitner <fleitner@redhat.com>
Date: Mon, 26 Apr 2010 18:33:27 +0000
Subject: TCP: avoid to send keepalive probes if receiving data
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

RFC 1122 says the following:
...
  Keep-alive packets MUST only be sent when no data or
  acknowledgement packets have been received for the
  connection within an interval.
...

The acknowledgement packet is reseting the keepalive
timer but the data packet isn't. This patch fixes it by
checking the timestamp of the last received data packet
too when the keepalive timer expires.

Signed-off-by: Flavio Leitner <fleitner@redhat.com>
Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Acked-by: Ilpo Järvinen <ilpo.jarvinen@helsinki.fi>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp.c       | 2 +-
 net/ipv4/tcp_timer.c | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 6689c61cab47..8ce29747ad9b 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -2298,7 +2298,7 @@ static int do_tcp_setsockopt(struct sock *sk, int level,
 			if (sock_flag(sk, SOCK_KEEPOPEN) &&
 			    !((1 << sk->sk_state) &
 			      (TCPF_CLOSE | TCPF_LISTEN))) {
-				__u32 elapsed = tcp_time_stamp - tp->rcv_tstamp;
+				u32 elapsed = keepalive_time_elapsed(tp);
 				if (tp->keepalive_time > elapsed)
 					elapsed = tp->keepalive_time - elapsed;
 				else
diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c
index c732be00606b..440a5c6004f6 100644
--- a/net/ipv4/tcp_timer.c
+++ b/net/ipv4/tcp_timer.c
@@ -517,7 +517,7 @@ static void tcp_keepalive_timer (unsigned long data)
 	struct sock *sk = (struct sock *) data;
 	struct inet_connection_sock *icsk = inet_csk(sk);
 	struct tcp_sock *tp = tcp_sk(sk);
-	__u32 elapsed;
+	u32 elapsed;
 
 	/* Only process if socket is not in use. */
 	bh_lock_sock(sk);
@@ -554,7 +554,7 @@ static void tcp_keepalive_timer (unsigned long data)
 	if (tp->packets_out || tcp_send_head(sk))
 		goto resched;
 
-	elapsed = tcp_time_stamp - tp->rcv_tstamp;
+	elapsed = keepalive_time_elapsed(tp);
 
 	if (elapsed >= keepalive_time_when(tp)) {
 		if (icsk->icsk_probes_out >= keepalive_probes(tp)) {
-- 
cgit v1.2.2


From f7917af92024d43bc20bc1afc92de27b0bd0f50b Mon Sep 17 00:00:00 2001
From: Felix Fietkau <nbd@openwrt.org>
Date: Tue, 27 Apr 2010 00:26:34 +0200
Subject: mac80211: fix handling of 4-address-mode in ieee80211_change_iface

A misplaced interface type check bails out too early if the interface
is not in monitor mode. This patch moves it to the right place, so that
it only covers changes to the monitor flags.

Signed-off-by: Felix Fietkau <nbd@openwrt.org>
Cc: stable@kernel.org
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/cfg.c | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c
index f97dda735cbb..592f07d2ae5c 100644
--- a/net/mac80211/cfg.c
+++ b/net/mac80211/cfg.c
@@ -96,9 +96,6 @@ static int ieee80211_change_iface(struct wiphy *wiphy,
 					    params->mesh_id_len,
 					    params->mesh_id);
 
-	if (sdata->vif.type != NL80211_IFTYPE_MONITOR || !flags)
-		return 0;
-
 	if (type == NL80211_IFTYPE_AP_VLAN &&
 	    params && params->use_4addr == 0)
 		rcu_assign_pointer(sdata->u.vlan.sta, NULL);
@@ -106,7 +103,9 @@ static int ieee80211_change_iface(struct wiphy *wiphy,
 		 params && params->use_4addr >= 0)
 		sdata->u.mgd.use_4addr = params->use_4addr;
 
-	sdata->u.mntr_flags = *flags;
+	if (sdata->vif.type == NL80211_IFTYPE_MONITOR && flags)
+		sdata->u.mntr_flags = *flags;
+
 	return 0;
 }
 
-- 
cgit v1.2.2


From fd8aaaf3519f3fd3c82594e90bc6808072b94d54 Mon Sep 17 00:00:00 2001
From: Felix Fietkau <nbd@openwrt.org>
Date: Tue, 27 Apr 2010 01:23:35 +0200
Subject: cfg80211: add ap isolation support

This is used to configure APs to not bridge traffic between connected stations.

Signed-off-by: Felix Fietkau <nbd@openwrt.org>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/wireless/nl80211.c | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'net')

diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index df5505b3930c..c27bef8e0c11 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -151,6 +151,7 @@ static const struct nla_policy nl80211_policy[NL80211_ATTR_MAX+1] = {
 	[NL80211_ATTR_PS_STATE] = { .type = NLA_U32 },
 	[NL80211_ATTR_CQM] = { .type = NLA_NESTED, },
 	[NL80211_ATTR_LOCAL_STATE_CHANGE] = { .type = NLA_FLAG },
+	[NL80211_ATTR_AP_ISOLATE] = { .type = NLA_U8 },
 };
 
 /* policy for the attributes */
@@ -2441,6 +2442,7 @@ static int nl80211_set_bss(struct sk_buff *skb, struct genl_info *info)
 	params.use_cts_prot = -1;
 	params.use_short_preamble = -1;
 	params.use_short_slot_time = -1;
+	params.ap_isolate = -1;
 
 	if (info->attrs[NL80211_ATTR_BSS_CTS_PROT])
 		params.use_cts_prot =
@@ -2457,6 +2459,8 @@ static int nl80211_set_bss(struct sk_buff *skb, struct genl_info *info)
 		params.basic_rates_len =
 			nla_len(info->attrs[NL80211_ATTR_BSS_BASIC_RATES]);
 	}
+	if (info->attrs[NL80211_ATTR_AP_ISOLATE])
+		params.ap_isolate = !!nla_get_u8(info->attrs[NL80211_ATTR_AP_ISOLATE]);
 
 	rtnl_lock();
 
-- 
cgit v1.2.2


From 7b7b5e56d7bdfdd0eb5ea13e6c9613b16b8eac46 Mon Sep 17 00:00:00 2001
From: Felix Fietkau <nbd@openwrt.org>
Date: Tue, 27 Apr 2010 01:23:36 +0200
Subject: mac80211: implement ap isolation support

Signed-off-by: Felix Fietkau <nbd@openwrt.org>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/cfg.c | 7 +++++++
 1 file changed, 7 insertions(+)

(limited to 'net')

diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c
index 592f07d2ae5c..e13fb3a62239 100644
--- a/net/mac80211/cfg.c
+++ b/net/mac80211/cfg.c
@@ -1113,6 +1113,13 @@ static int ieee80211_change_bss(struct wiphy *wiphy,
 		changed |= BSS_CHANGED_BASIC_RATES;
 	}
 
+	if (params->ap_isolate >= 0) {
+		if (params->ap_isolate)
+			sdata->flags |= IEEE80211_SDATA_DONT_BRIDGE_PACKETS;
+		else
+			sdata->flags &= ~IEEE80211_SDATA_DONT_BRIDGE_PACKETS;
+	}
+
 	ieee80211_bss_info_change_notify(sdata, changed);
 
 	return 0;
-- 
cgit v1.2.2


From 195e294d21e88af879da4f88db2ceeb4ec28a755 Mon Sep 17 00:00:00 2001
From: Juuso Oikarinen <juuso.oikarinen@nokia.com>
Date: Tue, 27 Apr 2010 12:47:40 +0300
Subject: mac80211: Determine dynamic PS timeout based on ps-qos network
 latency

Determine the dynamic PS timeout based on the configured ps-qos network
latency. For backwards wext compatibility, allow the dynamic PS timeout
configured by the cfg80211 to overrule the automatically determined value.

Signed-off-by: Juuso Oikarinen <juuso.oikarinen@nokia.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/cfg.c  |  4 ++--
 net/mac80211/main.c |  2 ++
 net/mac80211/mlme.c | 21 +++++++++++++++++++++
 3 files changed, 25 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c
index e13fb3a62239..b575a5066219 100644
--- a/net/mac80211/cfg.c
+++ b/net/mac80211/cfg.c
@@ -1404,11 +1404,11 @@ static int ieee80211_set_power_mgmt(struct wiphy *wiphy, struct net_device *dev,
 		return -EOPNOTSUPP;
 
 	if (enabled == sdata->u.mgd.powersave &&
-	    timeout == conf->dynamic_ps_timeout)
+	    timeout == conf->dynamic_ps_forced_timeout)
 		return 0;
 
 	sdata->u.mgd.powersave = enabled;
-	conf->dynamic_ps_timeout = timeout;
+	conf->dynamic_ps_forced_timeout = timeout;
 
 	/* no change, but if automatic follow powersave */
 	mutex_lock(&sdata->u.mgd.mtx);
diff --git a/net/mac80211/main.c b/net/mac80211/main.c
index 4afe851cf8dc..ebcca0eaf1dc 100644
--- a/net/mac80211/main.c
+++ b/net/mac80211/main.c
@@ -569,6 +569,8 @@ int ieee80211_register_hw(struct ieee80211_hw *hw)
 
 	local->hw.conf.listen_interval = local->hw.max_listen_interval;
 
+	local->hw.conf.dynamic_ps_forced_timeout = -1;
+
 	result = sta_info_start(local);
 	if (result < 0)
 		goto fail_sta_info;
diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index d811e3fa1d75..2d1a2bef366d 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -475,6 +475,7 @@ void ieee80211_recalc_ps(struct ieee80211_local *local, s32 latency)
 {
 	struct ieee80211_sub_if_data *sdata, *found = NULL;
 	int count = 0;
+	int timeout;
 
 	if (!(local->hw.flags & IEEE80211_HW_SUPPORTS_PS)) {
 		local->ps_sdata = NULL;
@@ -508,6 +509,26 @@ void ieee80211_recalc_ps(struct ieee80211_local *local, s32 latency)
 		beaconint_us = ieee80211_tu_to_usec(
 					found->vif.bss_conf.beacon_int);
 
+		timeout = local->hw.conf.dynamic_ps_forced_timeout;
+		if (timeout < 0) {
+			/*
+			 * The 2 second value is there for compatibility until
+			 * the PM_QOS_NETWORK_LATENCY is configured with real
+			 * values.
+			 */
+			if (latency == 2000000000)
+				timeout = 100;
+			else if (latency <= 50000)
+				timeout = 300;
+			else if (latency <= 100000)
+				timeout = 100;
+			else if (latency <= 500000)
+				timeout = 50;
+			else
+				timeout = 0;
+		}
+		local->hw.conf.dynamic_ps_timeout = timeout;
+
 		if (beaconint_us > latency) {
 			local->ps_sdata = NULL;
 		} else {
-- 
cgit v1.2.2


From 9043f3b89abebfbfe4b8d64c7b71b9ac0b9eaa0b Mon Sep 17 00:00:00 2001
From: Juuso Oikarinen <juuso.oikarinen@nokia.com>
Date: Tue, 27 Apr 2010 12:47:41 +0300
Subject: cfg80211: Remove default dynamic PS timeout value

Now that the mac80211 is choosing dynamic ps timeouts based on the ps-qos
network latency configuration, configure a default value of -1 as the dynamic
ps timeout in cfg80211. This value allows the mac80211 to determine the value
to be used.

Signed-off-by: Juuso Oikarinen <juuso.oikarinen@nokia.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/wireless/core.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/wireless/core.c b/net/wireless/core.c
index 7fdb9409ad2a..40cbbbfbccbf 100644
--- a/net/wireless/core.c
+++ b/net/wireless/core.c
@@ -704,7 +704,8 @@ static int cfg80211_netdev_notifier_call(struct notifier_block * nb,
 			wdev->ps = true;
 		else
 			wdev->ps = false;
-		wdev->ps_timeout = 100;
+		/* allow mac80211 to determine the timeout */
+		wdev->ps_timeout = -1;
 		if (rdev->ops->set_power_mgmt)
 			if (rdev->ops->set_power_mgmt(wdev->wiphy, dev,
 						      wdev->ps,
-- 
cgit v1.2.2


From a060bbfe4ee95d115e8f9705a66894ac34e2c475 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes@sipsolutions.net>
Date: Tue, 27 Apr 2010 11:59:34 +0200
Subject: mac80211: give virtual interface to hw_scan

When scanning, it is somewhat important to scan
on the correct virtual interface. All drivers
that currently implement hw_scan only support a
single virtual interface, but that may change
and then we'd want to be ready.

Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/driver-ops.h   | 5 +++--
 net/mac80211/driver-trace.h | 9 ++++++---
 net/mac80211/scan.c         | 4 ++--
 3 files changed, 11 insertions(+), 7 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/driver-ops.h b/net/mac80211/driver-ops.h
index d1f8a7c2225a..997008e236ff 100644
--- a/net/mac80211/driver-ops.h
+++ b/net/mac80211/driver-ops.h
@@ -154,14 +154,15 @@ static inline void drv_update_tkip_key(struct ieee80211_local *local,
 }
 
 static inline int drv_hw_scan(struct ieee80211_local *local,
+			      struct ieee80211_sub_if_data *sdata,
 			      struct cfg80211_scan_request *req)
 {
 	int ret;
 
 	might_sleep();
 
-	ret = local->ops->hw_scan(&local->hw, req);
-	trace_drv_hw_scan(local, req, ret);
+	ret = local->ops->hw_scan(&local->hw, &sdata->vif, req);
+	trace_drv_hw_scan(local, sdata, req, ret);
 	return ret;
 }
 
diff --git a/net/mac80211/driver-trace.h b/net/mac80211/driver-trace.h
index e209cb82ff29..ce734b58d07a 100644
--- a/net/mac80211/driver-trace.h
+++ b/net/mac80211/driver-trace.h
@@ -363,23 +363,26 @@ TRACE_EVENT(drv_update_tkip_key,
 
 TRACE_EVENT(drv_hw_scan,
 	TP_PROTO(struct ieee80211_local *local,
+		 struct ieee80211_sub_if_data *sdata,
 		 struct cfg80211_scan_request *req, int ret),
 
-	TP_ARGS(local, req, ret),
+	TP_ARGS(local, sdata, req, ret),
 
 	TP_STRUCT__entry(
 		LOCAL_ENTRY
+		VIF_ENTRY
 		__field(int, ret)
 	),
 
 	TP_fast_assign(
 		LOCAL_ASSIGN;
+		VIF_ASSIGN;
 		__entry->ret = ret;
 	),
 
 	TP_printk(
-		LOCAL_PR_FMT " ret:%d",
-		LOCAL_PR_ARG, __entry->ret
+		LOCAL_PR_FMT VIF_PR_FMT " ret:%d",
+		LOCAL_PR_ARG,VIF_PR_ARG, __entry->ret
 	)
 );
 
diff --git a/net/mac80211/scan.c b/net/mac80211/scan.c
index eb86a5f6e645..2b1f1f3d6a58 100644
--- a/net/mac80211/scan.c
+++ b/net/mac80211/scan.c
@@ -410,7 +410,7 @@ static int __ieee80211_start_scan(struct ieee80211_sub_if_data *sdata,
 
 	if (local->ops->hw_scan) {
 		WARN_ON(!ieee80211_prep_hw_scan(local));
-		rc = drv_hw_scan(local, local->hw_scan_req);
+		rc = drv_hw_scan(local, sdata, local->hw_scan_req);
 	} else
 		rc = ieee80211_start_sw_scan(local);
 
@@ -654,7 +654,7 @@ void ieee80211_scan_work(struct work_struct *work)
 	}
 
 	if (local->hw_scan_req) {
-		int rc = drv_hw_scan(local, local->hw_scan_req);
+		int rc = drv_hw_scan(local, sdata, local->hw_scan_req);
 		mutex_unlock(&local->scan_mtx);
 		if (rc)
 			ieee80211_scan_completed(&local->hw, true);
-- 
cgit v1.2.2


From a2c40249a36d0b4d76d1caf6bf806e4ae5b06e8a Mon Sep 17 00:00:00 2001
From: Shanyu Zhao <shanyu.zhao@intel.com>
Date: Tue, 27 Apr 2010 11:15:12 -0700
Subject: mac80211: fix rts threshold check

Currently whenever rts thresold is set, every packet will use RTS
protection no matter its size exceeds the threshold or not. This is
due to a bug in the rts threshold check.
	if (len > tx->local->hw.wiphy->rts_threshold) {
		txrc.rts = rts = true;
	}
Basically it is comparing an int (len) and a u32 (rts_threshold),
and the variable len is assigned as:
	len = min_t(int, tx->skb->len + FCS_LEN,
			 tx->local->hw.wiphy->frag_threshold);
However, when frag_threshold is "-1", len is always "-1", which is
0xffffffff therefore rts is always set to true.

CC: stable@kernel.org
Signed-off-by: Shanyu Zhao <shanyu.zhao@intel.com>
Reviewed-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/tx.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
index e2aa972d584f..f3841f43249e 100644
--- a/net/mac80211/tx.c
+++ b/net/mac80211/tx.c
@@ -593,7 +593,8 @@ ieee80211_tx_h_rate_ctrl(struct ieee80211_tx_data *tx)
 	struct ieee80211_hdr *hdr = (void *)tx->skb->data;
 	struct ieee80211_supported_band *sband;
 	struct ieee80211_rate *rate;
-	int i, len;
+	int i;
+	u32 len;
 	bool inval = false, rts = false, short_preamble = false;
 	struct ieee80211_tx_rate_control txrc;
 	u32 sta_flags;
@@ -602,7 +603,7 @@ ieee80211_tx_h_rate_ctrl(struct ieee80211_tx_data *tx)
 
 	sband = tx->local->hw.wiphy->bands[tx->channel->band];
 
-	len = min_t(int, tx->skb->len + FCS_LEN,
+	len = min_t(u32, tx->skb->len + FCS_LEN,
 			 tx->local->hw.wiphy->frag_threshold);
 
 	/* set up the tx rate control struct we give the RC algo */
-- 
cgit v1.2.2


From a9cbd588fdb71ea415754c885e2f9f03e6bf1ba0 Mon Sep 17 00:00:00 2001
From: Changli Gao <xiaosuo@gmail.com>
Date: Mon, 26 Apr 2010 23:06:24 +0000
Subject: net: reimplement softnet_data.output_queue as a FIFO queue

reimplement softnet_data.output_queue as a FIFO queue to keep the
fairness among the qdiscs rescheduled.

Signed-off-by: Changli Gao <xiaosuo@gmail.com>
Acked-by: Eric Dumazet <eric.dumazet@gmail.com>
----
 include/linux/netdevice.h |    1 +
 net/core/dev.c            |   22 ++++++++++++----------
 2 files changed, 13 insertions(+), 10 deletions(-)
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/dev.c | 22 ++++++++++++----------
 1 file changed, 12 insertions(+), 10 deletions(-)

(limited to 'net')

diff --git a/net/core/dev.c b/net/core/dev.c
index 4d43f1a80f74..3d314919a2cf 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -1557,8 +1557,9 @@ static inline void __netif_reschedule(struct Qdisc *q)
 
 	local_irq_save(flags);
 	sd = &__get_cpu_var(softnet_data);
-	q->next_sched = sd->output_queue;
-	sd->output_queue = q;
+	q->next_sched = NULL;
+	*sd->output_queue_tailp = q;
+	sd->output_queue_tailp = &q->next_sched;
 	raise_softirq_irqoff(NET_TX_SOFTIRQ);
 	local_irq_restore(flags);
 }
@@ -2529,6 +2530,7 @@ static void net_tx_action(struct softirq_action *h)
 		local_irq_disable();
 		head = sd->output_queue;
 		sd->output_queue = NULL;
+		sd->output_queue_tailp = &sd->output_queue;
 		local_irq_enable();
 
 		while (head) {
@@ -5594,7 +5596,6 @@ static int dev_cpu_callback(struct notifier_block *nfb,
 			    void *ocpu)
 {
 	struct sk_buff **list_skb;
-	struct Qdisc **list_net;
 	struct sk_buff *skb;
 	unsigned int cpu, oldcpu = (unsigned long)ocpu;
 	struct softnet_data *sd, *oldsd;
@@ -5615,13 +5616,13 @@ static int dev_cpu_callback(struct notifier_block *nfb,
 	*list_skb = oldsd->completion_queue;
 	oldsd->completion_queue = NULL;
 
-	/* Find end of our output_queue. */
-	list_net = &sd->output_queue;
-	while (*list_net)
-		list_net = &(*list_net)->next_sched;
 	/* Append output queue from offline CPU. */
-	*list_net = oldsd->output_queue;
-	oldsd->output_queue = NULL;
+	if (oldsd->output_queue) {
+		*sd->output_queue_tailp = oldsd->output_queue;
+		sd->output_queue_tailp = oldsd->output_queue_tailp;
+		oldsd->output_queue = NULL;
+		oldsd->output_queue_tailp = &oldsd->output_queue;
+	}
 
 	raise_softirq_irqoff(NET_TX_SOFTIRQ);
 	local_irq_enable();
@@ -5851,7 +5852,8 @@ static int __init net_dev_init(void)
 		skb_queue_head_init(&sd->input_pkt_queue);
 		sd->completion_queue = NULL;
 		INIT_LIST_HEAD(&sd->poll_list);
-
+		sd->output_queue = NULL;
+		sd->output_queue_tailp = &sd->output_queue;
 #ifdef CONFIG_RPS
 		sd->csd.func = rps_trigger_softirq;
 		sd->csd.info = sd;
-- 
cgit v1.2.2


From c58dc01babfd58ec9e71a6ce080150dc27755d88 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Tue, 27 Apr 2010 15:05:31 -0700
Subject: net: Make RFS socket operations not be inet specific.

Idea from Eric Dumazet.

As for placement inside of struct sock, I tried to choose a place
that otherwise has a 32-bit hole on 64-bit systems.

Signed-off-by: David S. Miller <davem@davemloft.net>
Acked-by: Eric Dumazet <eric.dumazet@gmail.com>
---
 net/ipv4/af_inet.c  | 8 ++++----
 net/ipv4/tcp_ipv4.c | 2 +-
 net/ipv4/udp.c      | 4 ++--
 3 files changed, 7 insertions(+), 7 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index 9f52880fae10..c6c43bcd1c6f 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -419,7 +419,7 @@ int inet_release(struct socket *sock)
 	if (sk) {
 		long timeout;
 
-		inet_rps_reset_flow(sk);
+		sock_rps_reset_flow(sk);
 
 		/* Applications forget to leave groups before exiting */
 		ip_mc_drop_socket(sk);
@@ -722,7 +722,7 @@ int inet_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg,
 {
 	struct sock *sk = sock->sk;
 
-	inet_rps_record_flow(sk);
+	sock_rps_record_flow(sk);
 
 	/* We may need to bind the socket. */
 	if (!inet_sk(sk)->inet_num && inet_autobind(sk))
@@ -737,7 +737,7 @@ static ssize_t inet_sendpage(struct socket *sock, struct page *page, int offset,
 {
 	struct sock *sk = sock->sk;
 
-	inet_rps_record_flow(sk);
+	sock_rps_record_flow(sk);
 
 	/* We may need to bind the socket. */
 	if (!inet_sk(sk)->inet_num && inet_autobind(sk))
@@ -755,7 +755,7 @@ int inet_recvmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg,
 	int addr_len = 0;
 	int err;
 
-	inet_rps_record_flow(sk);
+	sock_rps_record_flow(sk);
 
 	err = sk->sk_prot->recvmsg(iocb, sk, msg, size, flags & MSG_DONTWAIT,
 				   flags & ~MSG_DONTWAIT, &addr_len);
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 4d6717d1e61c..771f8146a2e5 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -1672,7 +1672,7 @@ process:
 
 	skb->dev = NULL;
 
-	inet_rps_save_rxhash(sk, skb->rxhash);
+	sock_rps_save_rxhash(sk, skb->rxhash);
 
 	bh_lock_sock_nested(sk);
 	ret = 0;
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 1e18f9cc9247..fa3d2874db41 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -1217,7 +1217,7 @@ int udp_disconnect(struct sock *sk, int flags)
 	sk->sk_state = TCP_CLOSE;
 	inet->inet_daddr = 0;
 	inet->inet_dport = 0;
-	inet_rps_save_rxhash(sk, 0);
+	sock_rps_save_rxhash(sk, 0);
 	sk->sk_bound_dev_if = 0;
 	if (!(sk->sk_userlocks & SOCK_BINDADDR_LOCK))
 		inet_reset_saddr(sk);
@@ -1262,7 +1262,7 @@ static int __udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
 	int rc;
 
 	if (inet_sk(sk)->inet_daddr)
-		inet_rps_save_rxhash(sk, skb->rxhash);
+		sock_rps_save_rxhash(sk, skb->rxhash);
 
 	rc = sock_queue_rcv_skb(sk, skb);
 	if (rc < 0) {
-- 
cgit v1.2.2


From 6e7676c1a76aed6e957611d8d7a9e5592e23aeba Mon Sep 17 00:00:00 2001
From: Changli Gao <xiaosuo@gmail.com>
Date: Tue, 27 Apr 2010 15:07:33 -0700
Subject: net: batch skb dequeueing from softnet input_pkt_queue

batch skb dequeueing from softnet input_pkt_queue to reduce potential lock
contention when RPS is enabled.

Note: in the worst case, the number of packets in a softnet_data may
be double of netdev_max_backlog.

Signed-off-by: Changli Gao <xiaosuo@gmail.com>
Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/dev.c | 57 +++++++++++++++++++++++++++++++++++++++------------------
 1 file changed, 39 insertions(+), 18 deletions(-)

(limited to 'net')

diff --git a/net/core/dev.c b/net/core/dev.c
index 3d314919a2cf..100dcbd29739 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -2408,12 +2408,13 @@ static int enqueue_to_backlog(struct sk_buff *skb, int cpu,
 	__get_cpu_var(netdev_rx_stat).total++;
 
 	rps_lock(sd);
-	if (sd->input_pkt_queue.qlen <= netdev_max_backlog) {
-		if (sd->input_pkt_queue.qlen) {
+	if (skb_queue_len(&sd->input_pkt_queue) <= netdev_max_backlog) {
+		if (skb_queue_len(&sd->input_pkt_queue)) {
 enqueue:
 			__skb_queue_tail(&sd->input_pkt_queue, skb);
 #ifdef CONFIG_RPS
-			*qtail = sd->input_queue_head + sd->input_pkt_queue.qlen;
+			*qtail = sd->input_queue_head +
+					skb_queue_len(&sd->input_pkt_queue);
 #endif
 			rps_unlock(sd);
 			local_irq_restore(flags);
@@ -2934,13 +2935,21 @@ static void flush_backlog(void *arg)
 	struct sk_buff *skb, *tmp;
 
 	rps_lock(sd);
-	skb_queue_walk_safe(&sd->input_pkt_queue, skb, tmp)
+	skb_queue_walk_safe(&sd->input_pkt_queue, skb, tmp) {
 		if (skb->dev == dev) {
 			__skb_unlink(skb, &sd->input_pkt_queue);
 			kfree_skb(skb);
-			input_queue_head_incr(sd);
+			input_queue_head_add(sd, 1);
 		}
+	}
 	rps_unlock(sd);
+
+	skb_queue_walk_safe(&sd->process_queue, skb, tmp) {
+		if (skb->dev == dev) {
+			__skb_unlink(skb, &sd->process_queue);
+			kfree_skb(skb);
+		}
+	}
 }
 
 static int napi_gro_complete(struct sk_buff *skb)
@@ -3286,24 +3295,33 @@ static int process_backlog(struct napi_struct *napi, int quota)
 	}
 #endif
 	napi->weight = weight_p;
-	do {
+	local_irq_disable();
+	while (work < quota) {
 		struct sk_buff *skb;
+		unsigned int qlen;
+
+		while ((skb = __skb_dequeue(&sd->process_queue))) {
+			local_irq_enable();
+			__netif_receive_skb(skb);
+			if (++work >= quota)
+				return work;
+			local_irq_disable();
+		}
 
-		local_irq_disable();
 		rps_lock(sd);
-		skb = __skb_dequeue(&sd->input_pkt_queue);
-		if (!skb) {
+		qlen = skb_queue_len(&sd->input_pkt_queue);
+		if (qlen) {
+			input_queue_head_add(sd, qlen);
+			skb_queue_splice_tail_init(&sd->input_pkt_queue,
+						   &sd->process_queue);
+		}
+		if (qlen < quota - work) {
 			__napi_complete(napi);
-			rps_unlock(sd);
-			local_irq_enable();
-			break;
+			quota = work + qlen;
 		}
-		input_queue_head_incr(sd);
 		rps_unlock(sd);
-		local_irq_enable();
-
-		__netif_receive_skb(skb);
-	} while (++work < quota);
+	}
+	local_irq_enable();
 
 	return work;
 }
@@ -5630,8 +5648,10 @@ static int dev_cpu_callback(struct notifier_block *nfb,
 	/* Process offline CPU's input_pkt_queue */
 	while ((skb = __skb_dequeue(&oldsd->input_pkt_queue))) {
 		netif_rx(skb);
-		input_queue_head_incr(oldsd);
+		input_queue_head_add(oldsd, 1);
 	}
+	while ((skb = __skb_dequeue(&oldsd->process_queue)))
+		netif_rx(skb);
 
 	return NOTIFY_OK;
 }
@@ -5850,6 +5870,7 @@ static int __init net_dev_init(void)
 		struct softnet_data *sd = &per_cpu(softnet_data, i);
 
 		skb_queue_head_init(&sd->input_pkt_queue);
+		skb_queue_head_init(&sd->process_queue);
 		sd->completion_queue = NULL;
 		INIT_LIST_HEAD(&sd->poll_list);
 		sd->output_queue = NULL;
-- 
cgit v1.2.2


From c377411f2494a931ff7facdbb3a6839b1266bcf6 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Tue, 27 Apr 2010 15:13:20 -0700
Subject: net: sk_add_backlog() take rmem_alloc into account

Current socket backlog limit is not enough to really stop DDOS attacks,
because user thread spend many time to process a full backlog each
round, and user might crazy spin on socket lock.

We should add backlog size and receive_queue size (aka rmem_alloc) to
pace writers, and let user run without being slow down too much.

Introduce a sk_rcvqueues_full() helper, to avoid taking socket lock in
stress situations.

Under huge stress from a multiqueue/RPS enabled NIC, a single flow udp
receiver can now process ~200.000 pps (instead of ~100 pps before the
patch) on a 8 core machine.

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/sock.c   | 5 ++++-
 net/ipv4/udp.c    | 4 ++++
 net/ipv6/udp.c    | 8 ++++++++
 net/sctp/socket.c | 3 ---
 4 files changed, 16 insertions(+), 4 deletions(-)

(limited to 'net')

diff --git a/net/core/sock.c b/net/core/sock.c
index 58ebd146ce5a..51041759517e 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -327,6 +327,10 @@ int sk_receive_skb(struct sock *sk, struct sk_buff *skb, const int nested)
 
 	skb->dev = NULL;
 
+	if (sk_rcvqueues_full(sk, skb)) {
+		atomic_inc(&sk->sk_drops);
+		goto discard_and_relse;
+	}
 	if (nested)
 		bh_lock_sock_nested(sk);
 	else
@@ -1885,7 +1889,6 @@ void sock_init_data(struct socket *sock, struct sock *sk)
 	sk->sk_allocation	=	GFP_KERNEL;
 	sk->sk_rcvbuf		=	sysctl_rmem_default;
 	sk->sk_sndbuf		=	sysctl_wmem_default;
-	sk->sk_backlog.limit	=	sk->sk_rcvbuf << 1;
 	sk->sk_state		=	TCP_CLOSE;
 	sk_set_socket(sk, sock);
 
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index fa3d2874db41..63eb56b2d873 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -1372,6 +1372,10 @@ int udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
 			goto drop;
 	}
 
+
+	if (sk_rcvqueues_full(sk, skb))
+		goto drop;
+
 	rc = 0;
 
 	bh_lock_sock(sk);
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index 2850e35cee3d..3ead20ad9d07 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -584,6 +584,10 @@ static void flush_stack(struct sock **stack, unsigned int count,
 
 		sk = stack[i];
 		if (skb1) {
+			if (sk_rcvqueues_full(sk, skb)) {
+				kfree_skb(skb1);
+				goto drop;
+			}
 			bh_lock_sock(sk);
 			if (!sock_owned_by_user(sk))
 				udpv6_queue_rcv_skb(sk, skb1);
@@ -759,6 +763,10 @@ int __udp6_lib_rcv(struct sk_buff *skb, struct udp_table *udptable,
 
 	/* deliver */
 
+	if (sk_rcvqueues_full(sk, skb)) {
+		sock_put(sk);
+		goto discard;
+	}
 	bh_lock_sock(sk);
 	if (!sock_owned_by_user(sk))
 		udpv6_queue_rcv_skb(sk, skb);
diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index f34adcca8a8c..13d8229f3a9c 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -3721,9 +3721,6 @@ SCTP_STATIC int sctp_init_sock(struct sock *sk)
 	SCTP_DBG_OBJCNT_INC(sock);
 	percpu_counter_inc(&sctp_sockets_allocated);
 
-	/* Set socket backlog limit. */
-	sk->sk_backlog.limit = sysctl_sctp_rmem[1];
-
 	local_bh_disable();
 	sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
 	local_bh_enable();
-- 
cgit v1.2.2


From 05fceb4ad7e8bf809a2a97061d6273d27d1a8449 Mon Sep 17 00:00:00 2001
From: Jiri Pirko <jpirko@redhat.com>
Date: Fri, 23 Apr 2010 01:40:47 +0000
Subject: net: disallow to use net_assign_generic externally

Now there's no need to use this fuction directly because it's handled by
register_pernet_device. So to make this simple and easy to understand,
make this static to do not tempt potentional users.

Signed-off-by: Jiri Pirko <jpirko@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/net_namespace.c | 91 ++++++++++++++++++++++++------------------------
 1 file changed, 45 insertions(+), 46 deletions(-)

(limited to 'net')

diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c
index 69a20bfc527c..c988e685433a 100644
--- a/net/core/net_namespace.c
+++ b/net/core/net_namespace.c
@@ -27,6 +27,51 @@ EXPORT_SYMBOL(init_net);
 
 #define INITIAL_NET_GEN_PTRS	13 /* +1 for len +2 for rcu_head */
 
+static void net_generic_release(struct rcu_head *rcu)
+{
+	struct net_generic *ng;
+
+	ng = container_of(rcu, struct net_generic, rcu);
+	kfree(ng);
+}
+
+static int net_assign_generic(struct net *net, int id, void *data)
+{
+	struct net_generic *ng, *old_ng;
+
+	BUG_ON(!mutex_is_locked(&net_mutex));
+	BUG_ON(id == 0);
+
+	ng = old_ng = net->gen;
+	if (old_ng->len >= id)
+		goto assign;
+
+	ng = kzalloc(sizeof(struct net_generic) +
+			id * sizeof(void *), GFP_KERNEL);
+	if (ng == NULL)
+		return -ENOMEM;
+
+	/*
+	 * Some synchronisation notes:
+	 *
+	 * The net_generic explores the net->gen array inside rcu
+	 * read section. Besides once set the net->gen->ptr[x]
+	 * pointer never changes (see rules in netns/generic.h).
+	 *
+	 * That said, we simply duplicate this array and schedule
+	 * the old copy for kfree after a grace period.
+	 */
+
+	ng->len = id;
+	memcpy(&ng->ptr, &old_ng->ptr, old_ng->len * sizeof(void*));
+
+	rcu_assign_pointer(net->gen, ng);
+	call_rcu(&old_ng->rcu, net_generic_release);
+assign:
+	ng->ptr[id - 1] = data;
+	return 0;
+}
+
 static int ops_init(const struct pernet_operations *ops, struct net *net)
 {
 	int err;
@@ -526,49 +571,3 @@ void unregister_pernet_device(struct pernet_operations *ops)
 	mutex_unlock(&net_mutex);
 }
 EXPORT_SYMBOL_GPL(unregister_pernet_device);
-
-static void net_generic_release(struct rcu_head *rcu)
-{
-	struct net_generic *ng;
-
-	ng = container_of(rcu, struct net_generic, rcu);
-	kfree(ng);
-}
-
-int net_assign_generic(struct net *net, int id, void *data)
-{
-	struct net_generic *ng, *old_ng;
-
-	BUG_ON(!mutex_is_locked(&net_mutex));
-	BUG_ON(id == 0);
-
-	ng = old_ng = net->gen;
-	if (old_ng->len >= id)
-		goto assign;
-
-	ng = kzalloc(sizeof(struct net_generic) +
-			id * sizeof(void *), GFP_KERNEL);
-	if (ng == NULL)
-		return -ENOMEM;
-
-	/*
-	 * Some synchronisation notes:
-	 *
-	 * The net_generic explores the net->gen array inside rcu
-	 * read section. Besides once set the net->gen->ptr[x]
-	 * pointer never changes (see rules in netns/generic.h).
-	 *
-	 * That said, we simply duplicate this array and schedule
-	 * the old copy for kfree after a grace period.
-	 */
-
-	ng->len = id;
-	memcpy(&ng->ptr, &old_ng->ptr, old_ng->len * sizeof(void*));
-
-	rcu_assign_pointer(net->gen, ng);
-	call_rcu(&old_ng->rcu, net_generic_release);
-assign:
-	ng->ptr[id - 1] = data;
-	return 0;
-}
-EXPORT_SYMBOL_GPL(net_assign_generic);
-- 
cgit v1.2.2


From ff65e8275f6c96a5eda57493bd84c4555decf7b3 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Tue, 27 Apr 2010 16:26:49 -0700
Subject: bridge: Use hlist_for_each_entry_rcu() in br_multicast_add_router()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Noticed by Michał Mirosław.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/bridge/br_multicast.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c
index fcba313f1894..e29c9b738796 100644
--- a/net/bridge/br_multicast.c
+++ b/net/bridge/br_multicast.c
@@ -1045,7 +1045,7 @@ static void br_multicast_add_router(struct net_bridge *br,
 	struct net_bridge_port *p;
 	struct hlist_node *n, *last = NULL;
 
-	hlist_for_each_entry(p, n, &br->router_list, rlist) {
+	hlist_for_each_entry_rcu(p, n, &br->router_list, rlist) {
 		if ((unsigned long) port >= (unsigned long) p) {
 			hlist_add_before_rcu(n, &port->rlist);
 			return;
-- 
cgit v1.2.2


From 709b9326ef6fc1b7e379d47d5f39ebc27dad3f4d Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Tue, 27 Apr 2010 16:49:58 -0700
Subject: Revert "bridge: Use hlist_for_each_entry_rcu() in
 br_multicast_add_router()"

This reverts commit ff65e8275f6c96a5eda57493bd84c4555decf7b3.

As explained by Stephen Hemminger, the traversal doesn't require
RCU handling as we hold a lock.

The list addition et al. calls, on the other hand, do.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/bridge/br_multicast.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c
index e29c9b738796..fcba313f1894 100644
--- a/net/bridge/br_multicast.c
+++ b/net/bridge/br_multicast.c
@@ -1045,7 +1045,7 @@ static void br_multicast_add_router(struct net_bridge *br,
 	struct net_bridge_port *p;
 	struct hlist_node *n, *last = NULL;
 
-	hlist_for_each_entry_rcu(p, n, &br->router_list, rlist) {
+	hlist_for_each_entry(p, n, &br->router_list, rlist) {
 		if ((unsigned long) port >= (unsigned long) p) {
 			hlist_add_before_rcu(n, &port->rlist);
 			return;
-- 
cgit v1.2.2


From 477fffb082920476cc26f238d65538ccb8d601e1 Mon Sep 17 00:00:00 2001
From: Dan Carpenter <error27@gmail.com>
Date: Wed, 21 Apr 2010 23:52:01 +0000
Subject: bluetooth: handle l2cap_create_connless_pdu() errors

l2cap_create_connless_pdu() can sometimes return ERR_PTR(-ENOMEM) or
ERR_PTR(-EFAULT).

Signed-off-by: Dan Carpenter <error27@gmail.com>
Acked-by: Marcel Holtmann <marcel@holtmann.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/bluetooth/l2cap.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/bluetooth/l2cap.c b/net/bluetooth/l2cap.c
index 99d68c34e4f1..9753b690a8b3 100644
--- a/net/bluetooth/l2cap.c
+++ b/net/bluetooth/l2cap.c
@@ -1626,7 +1626,10 @@ static int l2cap_sock_sendmsg(struct kiocb *iocb, struct socket *sock, struct ms
 	/* Connectionless channel */
 	if (sk->sk_type == SOCK_DGRAM) {
 		skb = l2cap_create_connless_pdu(sk, msg, len);
-		err = l2cap_do_send(sk, skb);
+		if (IS_ERR(skb))
+			err = PTR_ERR(skb);
+		else
+			err = l2cap_do_send(sk, skb);
 		goto done;
 	}
 
-- 
cgit v1.2.2


From 7e80c124485b73146deadce14fd4da2054581806 Mon Sep 17 00:00:00 2001
From: stephen hemminger <shemminger@vyatta.com>
Date: Tue, 27 Apr 2010 15:01:04 +0000
Subject: bridge: simplify multicast_add_router

By coding slightly differently, there are only two cases
to deal with: add at head and add after previous entry.

Signed-off-by: Stephen Hemminger <shemminger@vyatta.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/bridge/br_multicast.c | 19 +++++++++++--------
 1 file changed, 11 insertions(+), 8 deletions(-)

(limited to 'net')

diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c
index fcba313f1894..d63868c9b2c0 100644
--- a/net/bridge/br_multicast.c
+++ b/net/bridge/br_multicast.c
@@ -1039,22 +1039,25 @@ static int br_ip6_multicast_mld2_report(struct net_bridge *br,
 }
 #endif
 
+/*
+ * Add port to rotuer_list
+ *  list is maintained ordered by pointer value
+ *  and locked by br->multicast_lock and RCU
+ */
 static void br_multicast_add_router(struct net_bridge *br,
 				    struct net_bridge_port *port)
 {
 	struct net_bridge_port *p;
-	struct hlist_node *n, *last = NULL;
+	struct hlist_node *n, *slot = NULL;
 
 	hlist_for_each_entry(p, n, &br->router_list, rlist) {
-		if ((unsigned long) port >= (unsigned long) p) {
-			hlist_add_before_rcu(n, &port->rlist);
-			return;
-		}
-		last = n;
+		if ((unsigned long) port >= (unsigned long) p)
+			break;
+		slot = n;
 	}
 
-	if (last)
-		hlist_add_after_rcu(last, &port->rlist);
+	if (slot)
+		hlist_add_after_rcu(slot, &port->rlist);
 	else
 		hlist_add_head_rcu(&port->rlist, &br->router_list);
 }
-- 
cgit v1.2.2


From 168d40ee3d147ae20860e7916bd79b636cbe8fd5 Mon Sep 17 00:00:00 2001
From: stephen hemminger <shemminger@vyatta.com>
Date: Tue, 27 Apr 2010 15:01:05 +0000
Subject: bridge: multicast flood

Fix unsafe usage of RCU. Would never work on Alpha SMP because
of lack of rcu_dereference()

Signed-off-by: Stephen Hemminger <shemminger@vyatta.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/bridge/br_forward.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/bridge/br_forward.c b/net/bridge/br_forward.c
index 7a241c396981..5b70fc012e40 100644
--- a/net/bridge/br_forward.c
+++ b/net/bridge/br_forward.c
@@ -216,7 +216,7 @@ static void br_multicast_flood(struct net_bridge_mdb_entry *mdst,
 
 	prev = NULL;
 
-	rp = br->router_list.first;
+	rp = rcu_dereference(br->router_list.first);
 	p = mdst ? mdst->ports : NULL;
 	while (p || rp) {
 		lport = p ? p->port : NULL;
@@ -233,7 +233,7 @@ static void br_multicast_flood(struct net_bridge_mdb_entry *mdst,
 		if ((unsigned long)lport >= (unsigned long)port)
 			p = p->next;
 		if ((unsigned long)rport >= (unsigned long)port)
-			rp = rp->next;
+			rp = rcu_dereference(rp->next);
 	}
 
 	if (!prev)
-- 
cgit v1.2.2


From 83f6a740b4e52f88e312223df2fc94016a208618 Mon Sep 17 00:00:00 2001
From: stephen hemminger <shemminger@vyatta.com>
Date: Tue, 27 Apr 2010 15:01:06 +0000
Subject: bridge: multicast port group RCU fix

The recently introduced bridge mulitcast port group list was only
partially using RCU correctly. It was missing rcu_dereference()
and missing the necessary barrier on deletion.

The code should have used one of the standard list methods (list or hlist)
instead of open coding a RCU based link list.

Signed-off-by: Stephen Hemminger <shemminger@vyatta.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/bridge/br_forward.c   | 4 ++--
 net/bridge/br_multicast.c | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

(limited to 'net')

diff --git a/net/bridge/br_forward.c b/net/bridge/br_forward.c
index 5b70fc012e40..5f9988a3f06a 100644
--- a/net/bridge/br_forward.c
+++ b/net/bridge/br_forward.c
@@ -217,7 +217,7 @@ static void br_multicast_flood(struct net_bridge_mdb_entry *mdst,
 	prev = NULL;
 
 	rp = rcu_dereference(br->router_list.first);
-	p = mdst ? mdst->ports : NULL;
+	p = mdst ? rcu_dereference(mdst->ports) : NULL;
 	while (p || rp) {
 		lport = p ? p->port : NULL;
 		rport = rp ? hlist_entry(rp, struct net_bridge_port, rlist) :
@@ -231,7 +231,7 @@ static void br_multicast_flood(struct net_bridge_mdb_entry *mdst,
 			goto out;
 
 		if ((unsigned long)lport >= (unsigned long)port)
-			p = p->next;
+			p = rcu_dereference(p->next);
 		if ((unsigned long)rport >= (unsigned long)port)
 			rp = rcu_dereference(rp->next);
 	}
diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c
index d63868c9b2c0..7128abdce45f 100644
--- a/net/bridge/br_multicast.c
+++ b/net/bridge/br_multicast.c
@@ -259,7 +259,7 @@ static void br_multicast_del_pg(struct net_bridge *br,
 		if (p != pg)
 			continue;
 
-		*pp = p->next;
+		rcu_assign_pointer(*pp, p->next);
 		hlist_del_init(&p->mglist);
 		del_timer(&p->timer);
 		del_timer(&p->query_timer);
-- 
cgit v1.2.2


From afe0159d935ab731c682e811356914bb2be9470c Mon Sep 17 00:00:00 2001
From: stephen hemminger <shemminger@vyatta.com>
Date: Tue, 27 Apr 2010 15:01:07 +0000
Subject: bridge: multicast_flood cleanup

Move some declarations around to make it clearer which variables
are being used inside loop.

Signed-off-by: Stephen Hemminger <shemminger@vyatta.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/bridge/br_forward.c | 8 +++-----
 1 file changed, 3 insertions(+), 5 deletions(-)

(limited to 'net')

diff --git a/net/bridge/br_forward.c b/net/bridge/br_forward.c
index 5f9988a3f06a..396f077216a3 100644
--- a/net/bridge/br_forward.c
+++ b/net/bridge/br_forward.c
@@ -208,17 +208,15 @@ static void br_multicast_flood(struct net_bridge_mdb_entry *mdst,
 {
 	struct net_device *dev = BR_INPUT_SKB_CB(skb)->brdev;
 	struct net_bridge *br = netdev_priv(dev);
-	struct net_bridge_port *port;
-	struct net_bridge_port *lport, *rport;
-	struct net_bridge_port *prev;
+	struct net_bridge_port *prev = NULL;
 	struct net_bridge_port_group *p;
 	struct hlist_node *rp;
 
-	prev = NULL;
-
 	rp = rcu_dereference(br->router_list.first);
 	p = mdst ? rcu_dereference(mdst->ports) : NULL;
 	while (p || rp) {
+		struct net_bridge_port *port, *lport, *rport;
+
 		lport = p ? p->port : NULL;
 		rport = rp ? hlist_entry(rp, struct net_bridge_port, rlist) :
 			     NULL;
-- 
cgit v1.2.2


From 8d238b25b1ec22a73b1c2206f111df2faaff8285 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Wed, 28 Apr 2010 11:25:59 -0700
Subject: Revert "tcp: bind() fix when many ports are bound"

This reverts two commits:

fda48a0d7a8412cedacda46a9c0bf8ef9cd13559
tcp: bind() fix when many ports are bound

and a follow-on fix for it:

6443bb1fc2050ca2b6585a3fa77f7833b55329ed
ipv6: Fix inet6_csk_bind_conflict()

It causes problems with binding listening sockets when time-wait
sockets from a previous instance still are alive.

It's too late to keep fiddling with this so late in the -rc
series, and we'll deal with it in net-next-2.6 instead.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/inet_connection_sock.c  | 16 +++++-----------
 net/ipv6/inet6_connection_sock.c | 15 +++++----------
 2 files changed, 10 insertions(+), 21 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
index 14825eb09770..8da6429269dd 100644
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -70,17 +70,13 @@ int inet_csk_bind_conflict(const struct sock *sk,
 		    (!sk->sk_bound_dev_if ||
 		     !sk2->sk_bound_dev_if ||
 		     sk->sk_bound_dev_if == sk2->sk_bound_dev_if)) {
-			const __be32 sk2_rcv_saddr = inet_rcv_saddr(sk2);
-
 			if (!reuse || !sk2->sk_reuse ||
 			    sk2->sk_state == TCP_LISTEN) {
+				const __be32 sk2_rcv_saddr = inet_rcv_saddr(sk2);
 				if (!sk2_rcv_saddr || !sk_rcv_saddr ||
 				    sk2_rcv_saddr == sk_rcv_saddr)
 					break;
-			} else if (reuse && sk2->sk_reuse &&
-				   sk2_rcv_saddr &&
-				   sk2_rcv_saddr == sk_rcv_saddr)
-				break;
+			}
 		}
 	}
 	return node != NULL;
@@ -124,11 +120,9 @@ again:
 						smallest_size = tb->num_owners;
 						smallest_rover = rover;
 						if (atomic_read(&hashinfo->bsockets) > (high - low) + 1) {
-							if (!inet_csk(sk)->icsk_af_ops->bind_conflict(sk, tb)) {
-								spin_unlock(&head->lock);
-								snum = smallest_rover;
-								goto have_snum;
-							}
+							spin_unlock(&head->lock);
+							snum = smallest_rover;
+							goto have_snum;
 						}
 					}
 					goto next;
diff --git a/net/ipv6/inet6_connection_sock.c b/net/ipv6/inet6_connection_sock.c
index 3a4d92b5a83e..628db24bcf22 100644
--- a/net/ipv6/inet6_connection_sock.c
+++ b/net/ipv6/inet6_connection_sock.c
@@ -42,16 +42,11 @@ int inet6_csk_bind_conflict(const struct sock *sk,
 		if (sk != sk2 &&
 		    (!sk->sk_bound_dev_if ||
 		     !sk2->sk_bound_dev_if ||
-		     sk->sk_bound_dev_if == sk2->sk_bound_dev_if)) {
-			if ((!sk->sk_reuse || !sk2->sk_reuse ||
-			     sk2->sk_state == TCP_LISTEN) &&
-			     ipv6_rcv_saddr_equal(sk, sk2))
-				break;
-			else if (sk->sk_reuse && sk2->sk_reuse &&
-				!ipv6_addr_any(inet6_rcv_saddr(sk)) &&
-				ipv6_rcv_saddr_equal(sk, sk2))
-				break;
-		}
+		     sk->sk_bound_dev_if == sk2->sk_bound_dev_if) &&
+		    (!sk->sk_reuse || !sk2->sk_reuse ||
+		     sk2->sk_state == TCP_LISTEN) &&
+		     ipv6_rcv_saddr_equal(sk, sk2))
+			break;
 	}
 
 	return node != NULL;
-- 
cgit v1.2.2


From 561b1733a465cf9677356b40c27653dd45f1ac56 Mon Sep 17 00:00:00 2001
From: Wei Yongjun <yjwei@cn.fujitsu.com>
Date: Wed, 28 Apr 2010 08:47:18 +0000
Subject: sctp: avoid irq lock inversion while call sk->sk_data_ready()

sk->sk_data_ready() of sctp socket can be called from both BH and non-BH
contexts, but the default sk->sk_data_ready(), sock_def_readable(), can
not be used in this case. Therefore, we have to make a new function
sctp_data_ready() to grab sk->sk_data_ready() with BH disabling.

=========================================================
[ INFO: possible irq lock inversion dependency detected ]
2.6.33-rc6 #129
---------------------------------------------------------
sctp_darn/1517 just changed the state of lock:
 (clock-AF_INET){++.?..}, at: [<c06aab60>] sock_def_readable+0x20/0x80
but this lock took another, SOFTIRQ-unsafe lock in the past:
 (slock-AF_INET){+.-...}

and interrupts could create inverse lock ordering between them.

other info that might help us debug this:
1 lock held by sctp_darn/1517:
 #0:  (sk_lock-AF_INET){+.+.+.}, at: [<cdfe363d>] sctp_sendmsg+0x23d/0xc00 [sctp]

Signed-off-by: Wei Yongjun <yjwei@cn.fujitsu.com>
Signed-off-by: Vlad Yasevich <vladislav.yasevich@hp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sctp/endpointola.c |  1 +
 net/sctp/socket.c      | 10 ++++++++++
 2 files changed, 11 insertions(+)

(limited to 'net')

diff --git a/net/sctp/endpointola.c b/net/sctp/endpointola.c
index 905fda582b92..7ec09ba03a1c 100644
--- a/net/sctp/endpointola.c
+++ b/net/sctp/endpointola.c
@@ -144,6 +144,7 @@ static struct sctp_endpoint *sctp_endpoint_init(struct sctp_endpoint *ep,
 	/* Use SCTP specific send buffer space queues.  */
 	ep->sndbuf_policy = sctp_sndbuf_policy;
 
+	sk->sk_data_ready = sctp_data_ready;
 	sk->sk_write_space = sctp_write_space;
 	sock_set_flag(sk, SOCK_USE_WRITE_QUEUE);
 
diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index 007e8baba089..efa2bc3f0028 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -6189,6 +6189,16 @@ do_nonblock:
 	goto out;
 }
 
+void sctp_data_ready(struct sock *sk, int len)
+{
+	read_lock_bh(&sk->sk_callback_lock);
+	if (sk_has_sleeper(sk))
+		wake_up_interruptible_sync_poll(sk->sk_sleep, POLLIN |
+						POLLRDNORM | POLLRDBAND);
+	sk_wake_async(sk, SOCK_WAKE_WAITD, POLL_IN);
+	read_unlock_bh(&sk->sk_callback_lock);
+}
+
 /* If socket sndbuf has changed, wake up all per association waiters.  */
 void sctp_write_space(struct sock *sk)
 {
-- 
cgit v1.2.2


From 0c42749cffbb4a06be86c5e5db6c7ebad548781f Mon Sep 17 00:00:00 2001
From: Vlad Yasevich <vladislav.yasevich@hp.com>
Date: Wed, 28 Apr 2010 08:47:19 +0000
Subject: sctp: fix potential reference of a freed pointer

When sctp attempts to update an assocition, it removes any
addresses that were not in the updated INITs.  However, the loop
may attempt to refrence a transport with address after removing it.

Signed-off-by: Vlad Yasevich <vladislav.yasevich@hp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sctp/associola.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/sctp/associola.c b/net/sctp/associola.c
index df5abbff63e2..99c93ee98ad9 100644
--- a/net/sctp/associola.c
+++ b/net/sctp/associola.c
@@ -1194,8 +1194,10 @@ void sctp_assoc_update(struct sctp_association *asoc,
 	/* Remove any peer addresses not present in the new association. */
 	list_for_each_safe(pos, temp, &asoc->peer.transport_addr_list) {
 		trans = list_entry(pos, struct sctp_transport, transports);
-		if (!sctp_assoc_lookup_paddr(new, &trans->ipaddr))
-			sctp_assoc_del_peer(asoc, &trans->ipaddr);
+		if (!sctp_assoc_lookup_paddr(new, &trans->ipaddr)) {
+			sctp_assoc_rm_peer(asoc, trans);
+			continue;
+		}
 
 		if (asoc->state >= SCTP_STATE_ESTABLISHED)
 			sctp_transport_reset(trans);
-- 
cgit v1.2.2


From 81419d862db743fe4450a021893f24bab4698c1d Mon Sep 17 00:00:00 2001
From: Vlad Yasevich <vladislav.yasevich@hp.com>
Date: Wed, 28 Apr 2010 08:47:20 +0000
Subject: sctp: per_cpu variables should be in bh_disabled section

Since the change of the atomics to percpu variables, we now
have to disable BH in process context when touching percpu variables.

Signed-off-by: Vlad Yasevich <vladislav.yasevich@hp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sctp/socket.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index efa2bc3f0028..44a1ab03a3f0 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -3719,12 +3719,12 @@ SCTP_STATIC int sctp_init_sock(struct sock *sk)
 	sp->hmac = NULL;
 
 	SCTP_DBG_OBJCNT_INC(sock);
-	percpu_counter_inc(&sctp_sockets_allocated);
 
 	/* Set socket backlog limit. */
 	sk->sk_backlog.limit = sysctl_sctp_rmem[1];
 
 	local_bh_disable();
+	percpu_counter_inc(&sctp_sockets_allocated);
 	sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
 	local_bh_enable();
 
@@ -3741,8 +3741,8 @@ SCTP_STATIC void sctp_destroy_sock(struct sock *sk)
 	/* Release our hold on the endpoint. */
 	ep = sctp_sk(sk)->ep;
 	sctp_endpoint_free(ep);
-	percpu_counter_dec(&sctp_sockets_allocated);
 	local_bh_disable();
+	percpu_counter_dec(&sctp_sockets_allocated);
 	sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1);
 	local_bh_enable();
 }
-- 
cgit v1.2.2


From a8170c35e738d62e9919ce5b109cf4ed66e95bde Mon Sep 17 00:00:00 2001
From: Wei Yongjun <yjwei@cn.fujitsu.com>
Date: Wed, 28 Apr 2010 08:47:21 +0000
Subject: sctp: fix to calc the INIT/INIT-ACK chunk length correctly is set

When calculating the INIT/INIT-ACK chunk length, we should not
only account the length of parameters, but also the parameters
zero padding length, such as AUTH HMACS parameter and CHUNKS
parameter. Without the parameters zero padding length we may get
following oops.

skb_over_panic: text:ce2068d2 len:130 put:6 head:cac3fe00 data:cac3fe00 tail:0xcac3fe82 end:0xcac3fe80 dev:<NULL>
------------[ cut here ]------------
kernel BUG at net/core/skbuff.c:127!
invalid opcode: 0000 [#2] SMP
last sysfs file: /sys/module/aes_generic/initstate
Modules linked in: authenc ......

Pid: 4102, comm: sctp_darn Tainted: G      D    2.6.34-rc2 #6
EIP: 0060:[<c0607630>] EFLAGS: 00010282 CPU: 0
EIP is at skb_over_panic+0x37/0x3e
EAX: 00000078 EBX: c07c024b ECX: c07c02b9 EDX: cb607b78
ESI: 00000000 EDI: cac3fe7a EBP: 00000002 ESP: cb607b74
 DS: 007b ES: 007b FS: 00d8 GS: 0033 SS: 0068
Process sctp_darn (pid: 4102, ti=cb607000 task=cabdc990 task.ti=cb607000)
Stack:
 c07c02b9 ce2068d2 00000082 00000006 cac3fe00 cac3fe00 cac3fe82 cac3fe80
<0> c07c024b cac3fe7c cac3fe7a c0608dec ca986e80 ce2068d2 00000006 0000007a
<0> cb8120ca ca986e80 cb812000 00000003 cb8120c4 ce208a25 cb8120ca cadd9400
Call Trace:
 [<ce2068d2>] ? sctp_addto_chunk+0x45/0x85 [sctp]
 [<c0608dec>] ? skb_put+0x2e/0x32
 [<ce2068d2>] ? sctp_addto_chunk+0x45/0x85 [sctp]
 [<ce208a25>] ? sctp_make_init+0x279/0x28c [sctp]
 [<c0686a92>] ? apic_timer_interrupt+0x2a/0x30
 [<ce1fdc0b>] ? sctp_sf_do_prm_asoc+0x2b/0x7b [sctp]
 [<ce202823>] ? sctp_do_sm+0xa0/0x14a [sctp]
 [<ce2133b9>] ? sctp_pname+0x0/0x14 [sctp]
 [<ce211d72>] ? sctp_primitive_ASSOCIATE+0x2b/0x31 [sctp]
 [<ce20f3cf>] ? sctp_sendmsg+0x7a0/0x9eb [sctp]
 [<c064eb1e>] ? inet_sendmsg+0x3b/0x43
 [<c04244b7>] ? task_tick_fair+0x2d/0xd9
 [<c06031e1>] ? sock_sendmsg+0xa7/0xc1
 [<c0416afe>] ? smp_apic_timer_interrupt+0x6b/0x75
 [<c0425123>] ? dequeue_task_fair+0x34/0x19b
 [<c0446abb>] ? sched_clock_local+0x17/0x11e
 [<c052ea87>] ? _copy_from_user+0x2b/0x10c
 [<c060ab3a>] ? verify_iovec+0x3c/0x6a
 [<c06035ca>] ? sys_sendmsg+0x186/0x1e2
 [<c042176b>] ? __wake_up_common+0x34/0x5b
 [<c04240c2>] ? __wake_up+0x2c/0x3b
 [<c057e35c>] ? tty_wakeup+0x43/0x47
 [<c04430f2>] ? remove_wait_queue+0x16/0x24
 [<c0580c94>] ? n_tty_read+0x5b8/0x65e
 [<c042be02>] ? default_wake_function+0x0/0x8
 [<c0604e0e>] ? sys_socketcall+0x17f/0x1cd
 [<c040264c>] ? sysenter_do_call+0x12/0x22
Code: 0f 45 de 53 ff b0 98 00 00 00 ff b0 94 ......
EIP: [<c0607630>] skb_over_panic+0x37/0x3e SS:ESP 0068:cb607b74

To reproduce:

# modprobe sctp
# echo 1 > /proc/sys/net/sctp/addip_enable
# echo 1 > /proc/sys/net/sctp/auth_enable
# sctp_test -H 3ffe:501:ffff:100:20c:29ff:fe4d:f37e -P 800 -l
# sctp_darn -H 3ffe:501:ffff:100:20c:29ff:fe4d:f37e -P 900 -h 192.168.0.21 -p 800 -I -s -t
sctp_darn ready to send...
3ffe:501:ffff:100:20c:29ff:fe4d:f37e:900-192.168.0.21:800 Interactive mode> bindx-add=192.168.0.21
3ffe:501:ffff:100:20c:29ff:fe4d:f37e:900-192.168.0.21:800 Interactive mode> bindx-add=192.168.1.21
3ffe:501:ffff:100:20c:29ff:fe4d:f37e:900-192.168.0.21:800 Interactive mode> snd=10

------------------------------------------------------------------
eth0 has addresses: 3ffe:501:ffff:100:20c:29ff:fe4d:f37e and 192.168.0.21
eth1 has addresses: 192.168.1.21
------------------------------------------------------------------

Reported-by: George Cheimonidis <gchimon@gmail.com>
Signed-off-by: Wei Yongjun <yjwei@cn.fujitsu.com>
Signed-off-by: Vlad Yasevich <vladislav.yasevich@hp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sctp/sm_make_chunk.c | 17 ++++++++++-------
 1 file changed, 10 insertions(+), 7 deletions(-)

(limited to 'net')

diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c
index 17cb400ecd6a..f6fc5c1a4078 100644
--- a/net/sctp/sm_make_chunk.c
+++ b/net/sctp/sm_make_chunk.c
@@ -208,7 +208,8 @@ struct sctp_chunk *sctp_make_init(const struct sctp_association *asoc,
 	sp = sctp_sk(asoc->base.sk);
 	num_types = sp->pf->supported_addrs(sp, types);
 
-	chunksize = sizeof(init) + addrs_len + SCTP_SAT_LEN(num_types);
+	chunksize = sizeof(init) + addrs_len;
+	chunksize += WORD_ROUND(SCTP_SAT_LEN(num_types));
 	chunksize += sizeof(ecap_param);
 
 	if (sctp_prsctp_enable)
@@ -238,14 +239,14 @@ struct sctp_chunk *sctp_make_init(const struct sctp_association *asoc,
 		/* Add HMACS parameter length if any were defined */
 		auth_hmacs = (sctp_paramhdr_t *)asoc->c.auth_hmacs;
 		if (auth_hmacs->length)
-			chunksize += ntohs(auth_hmacs->length);
+			chunksize += WORD_ROUND(ntohs(auth_hmacs->length));
 		else
 			auth_hmacs = NULL;
 
 		/* Add CHUNKS parameter length */
 		auth_chunks = (sctp_paramhdr_t *)asoc->c.auth_chunks;
 		if (auth_chunks->length)
-			chunksize += ntohs(auth_chunks->length);
+			chunksize += WORD_ROUND(ntohs(auth_chunks->length));
 		else
 			auth_chunks = NULL;
 
@@ -255,7 +256,8 @@ struct sctp_chunk *sctp_make_init(const struct sctp_association *asoc,
 
 	/* If we have any extensions to report, account for that */
 	if (num_ext)
-		chunksize += sizeof(sctp_supported_ext_param_t) + num_ext;
+		chunksize += WORD_ROUND(sizeof(sctp_supported_ext_param_t) +
+					num_ext);
 
 	/* RFC 2960 3.3.2 Initiation (INIT) (1)
 	 *
@@ -397,13 +399,13 @@ struct sctp_chunk *sctp_make_init_ack(const struct sctp_association *asoc,
 
 		auth_hmacs = (sctp_paramhdr_t *)asoc->c.auth_hmacs;
 		if (auth_hmacs->length)
-			chunksize += ntohs(auth_hmacs->length);
+			chunksize += WORD_ROUND(ntohs(auth_hmacs->length));
 		else
 			auth_hmacs = NULL;
 
 		auth_chunks = (sctp_paramhdr_t *)asoc->c.auth_chunks;
 		if (auth_chunks->length)
-			chunksize += ntohs(auth_chunks->length);
+			chunksize += WORD_ROUND(ntohs(auth_chunks->length));
 		else
 			auth_chunks = NULL;
 
@@ -412,7 +414,8 @@ struct sctp_chunk *sctp_make_init_ack(const struct sctp_association *asoc,
 	}
 
 	if (num_ext)
-		chunksize += sizeof(sctp_supported_ext_param_t) + num_ext;
+		chunksize += WORD_ROUND(sizeof(sctp_supported_ext_param_t) +
+					num_ext);
 
 	/* Now allocate and fill out the chunk.  */
 	retval = sctp_make_chunk(asoc, SCTP_CID_INIT_ACK, 0, chunksize);
-- 
cgit v1.2.2


From c0786693404cffd80ca3cb6e75ee7b35186b2825 Mon Sep 17 00:00:00 2001
From: Vlad Yasevich <vladislav.yasevich@hp.com>
Date: Wed, 28 Apr 2010 08:47:22 +0000
Subject: sctp: Fix oops when sending queued ASCONF chunks

When we finish processing ASCONF_ACK chunk, we try to send
the next queued ASCONF.  This action runs the sctp state
machine recursively and it's not prepared to do so.

kernel BUG at kernel/timer.c:790!
invalid opcode: 0000 [#1] SMP
last sysfs file: /sys/module/ipv6/initstate
Modules linked in: sha256_generic sctp libcrc32c ipv6 dm_multipath
uinput 8139too i2c_piix4 8139cp mii i2c_core pcspkr virtio_net joydev
floppy virtio_blk virtio_pci [last unloaded: scsi_wait_scan]

Pid: 0, comm: swapper Not tainted 2.6.34-rc4 #15 /Bochs
EIP: 0060:[<c044a2ef>] EFLAGS: 00010286 CPU: 0
EIP is at add_timer+0xd/0x1b
EAX: cecbab14 EBX: 000000f0 ECX: c0957b1c EDX: 03595cf4
ESI: cecba800 EDI: cf276f00 EBP: c0957aa0 ESP: c0957aa0
 DS: 007b ES: 007b FS: 00d8 GS: 00e0 SS: 0068
Process swapper (pid: 0, ti=c0956000 task=c0988ba0 task.ti=c0956000)
Stack:
 c0957ae0 d1851214 c0ab62e4 c0ab5f26 0500ffff 00000004 00000005 00000004
<0> 00000000 d18694fd 00000004 1666b892 cecba800 cecba800 c0957b14
00000004
<0> c0957b94 d1851b11 ceda8b00 cecba800 cf276f00 00000001 c0957b14
000000d0
Call Trace:
 [<d1851214>] ? sctp_side_effects+0x607/0xdfc [sctp]
 [<d1851b11>] ? sctp_do_sm+0x108/0x159 [sctp]
 [<d1863386>] ? sctp_pname+0x0/0x1d [sctp]
 [<d1861a56>] ? sctp_primitive_ASCONF+0x36/0x3b [sctp]
 [<d185657c>] ? sctp_process_asconf_ack+0x2a4/0x2d3 [sctp]
 [<d184e35c>] ? sctp_sf_do_asconf_ack+0x1dd/0x2b4 [sctp]
 [<d1851ac1>] ? sctp_do_sm+0xb8/0x159 [sctp]
 [<d1863334>] ? sctp_cname+0x0/0x52 [sctp]
 [<d1854377>] ? sctp_assoc_bh_rcv+0xac/0xe1 [sctp]
 [<d1858f0f>] ? sctp_inq_push+0x2d/0x30 [sctp]
 [<d186329d>] ? sctp_rcv+0x797/0x82e [sctp]

Tested-by: Wei Yongjun <yjwei@cn.fujitsu.com>
Signed-off-by: Yuansong Qiao <ysqiao@research.ait.ie>
Signed-off-by: Shuaijun Zhang <szhang@research.ait.ie>
Signed-off-by: Vlad Yasevich <vladislav.yasevich@hp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sctp/sm_make_chunk.c | 15 ---------------
 net/sctp/sm_sideeffect.c | 26 ++++++++++++++++++++++++++
 net/sctp/sm_statefuns.c  |  8 +++++++-
 3 files changed, 33 insertions(+), 16 deletions(-)

(limited to 'net')

diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c
index f6fc5c1a4078..0fd5b4c88358 100644
--- a/net/sctp/sm_make_chunk.c
+++ b/net/sctp/sm_make_chunk.c
@@ -3318,21 +3318,6 @@ int sctp_process_asconf_ack(struct sctp_association *asoc,
 	sctp_chunk_free(asconf);
 	asoc->addip_last_asconf = NULL;
 
-	/* Send the next asconf chunk from the addip chunk queue. */
-	if (!list_empty(&asoc->addip_chunk_list)) {
-		struct list_head *entry = asoc->addip_chunk_list.next;
-		asconf = list_entry(entry, struct sctp_chunk, list);
-
-		list_del_init(entry);
-
-		/* Hold the chunk until an ASCONF_ACK is received. */
-		sctp_chunk_hold(asconf);
-		if (sctp_primitive_ASCONF(asoc, asconf))
-			sctp_chunk_free(asconf);
-		else
-			asoc->addip_last_asconf = asconf;
-	}
-
 	return retval;
 }
 
diff --git a/net/sctp/sm_sideeffect.c b/net/sctp/sm_sideeffect.c
index 4c5bed9af4e3..d5ae450b6f02 100644
--- a/net/sctp/sm_sideeffect.c
+++ b/net/sctp/sm_sideeffect.c
@@ -962,6 +962,29 @@ static int sctp_cmd_send_msg(struct sctp_association *asoc,
 }
 
 
+/* Sent the next ASCONF packet currently stored in the association.
+ * This happens after the ASCONF_ACK was succeffully processed.
+ */
+static void sctp_cmd_send_asconf(struct sctp_association *asoc)
+{
+	/* Send the next asconf chunk from the addip chunk
+	 * queue.
+	 */
+	if (!list_empty(&asoc->addip_chunk_list)) {
+		struct list_head *entry = asoc->addip_chunk_list.next;
+		struct sctp_chunk *asconf = list_entry(entry,
+						struct sctp_chunk, list);
+		list_del_init(entry);
+
+		/* Hold the chunk until an ASCONF_ACK is received. */
+		sctp_chunk_hold(asconf);
+		if (sctp_primitive_ASCONF(asoc, asconf))
+			sctp_chunk_free(asconf);
+		else
+			asoc->addip_last_asconf = asconf;
+	}
+}
+
 
 /* These three macros allow us to pull the debugging code out of the
  * main flow of sctp_do_sm() to keep attention focused on the real
@@ -1617,6 +1640,9 @@ static int sctp_cmd_interpreter(sctp_event_t event_type,
 			}
 			error = sctp_cmd_send_msg(asoc, cmd->obj.msg);
 			break;
+		case SCTP_CMD_SEND_NEXT_ASCONF:
+			sctp_cmd_send_asconf(asoc);
+			break;
 		default:
 			printk(KERN_WARNING "Impossible command: %u, %p\n",
 			       cmd->verb, cmd->obj.ptr);
diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c
index abf601a1b847..24b2cd555637 100644
--- a/net/sctp/sm_statefuns.c
+++ b/net/sctp/sm_statefuns.c
@@ -3676,8 +3676,14 @@ sctp_disposition_t sctp_sf_do_asconf_ack(const struct sctp_endpoint *ep,
 				SCTP_TO(SCTP_EVENT_TIMEOUT_T4_RTO));
 
 		if (!sctp_process_asconf_ack((struct sctp_association *)asoc,
-					     asconf_ack))
+					     asconf_ack)) {
+			/* Successfully processed ASCONF_ACK.  We can
+			 * release the next asconf if we have one.
+			 */
+			sctp_add_cmd_sf(commands, SCTP_CMD_SEND_NEXT_ASCONF,
+					SCTP_NULL());
 			return SCTP_DISPOSITION_CONSUME;
+		}
 
 		abort = sctp_make_abort(asoc, asconf_ack,
 					sizeof(sctp_errhdr_t));
-- 
cgit v1.2.2


From e539d83cc8a4fa581cbf8ed288fdadb19a692cb0 Mon Sep 17 00:00:00 2001
From: Sjur Braendeland <sjur.brandeland@stericsson.com>
Date: Wed, 28 Apr 2010 08:54:35 +0000
Subject: caif: Rename functions in cfcnfg and caif_dev

Changes:
 o Renamed cfcnfg_del_adapt_layer to cfcnfg_disconn_adapt_layer
 o Fixed typo cfcfg to cfcnfg
 o Renamed linkid to channel_id
 o Updated documentation in caif_dev.h
 o Minor formatting changes

Signed-off-by: Sjur Braendeland <sjur.brandeland@stericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/caif/caif_dev.c | 15 +++++++--------
 net/caif/cfcnfg.c   | 51 +++++++++++++++++++++++++--------------------------
 2 files changed, 32 insertions(+), 34 deletions(-)

(limited to 'net')

diff --git a/net/caif/caif_dev.c b/net/caif/caif_dev.c
index e84837e1bc86..be1f674a3b67 100644
--- a/net/caif/caif_dev.c
+++ b/net/caif/caif_dev.c
@@ -330,20 +330,19 @@ int caif_connect_client(struct caif_connect_request *conn_req,
 			   struct cflayer *client_layer)
 {
 	struct cfctrl_link_param param;
-	if (connect_req_to_link_param(get_caif_conf(), conn_req, &param) == 0)
-		/* Hook up the adaptation layer. */
-		return cfcnfg_add_adaptation_layer(get_caif_conf(),
+	int ret;
+	ret = connect_req_to_link_param(get_caif_conf(), conn_req, &param);
+	if (ret)
+		return ret;
+	/* Hook up the adaptation layer. */
+	return cfcnfg_add_adaptation_layer(get_caif_conf(),
 						&param, client_layer);
-
-	return -EINVAL;
-
-	caif_assert(0);
 }
 EXPORT_SYMBOL(caif_connect_client);
 
 int caif_disconnect_client(struct cflayer *adap_layer)
 {
-	return cfcnfg_del_adapt_layer(get_caif_conf(), adap_layer);
+       return cfcnfg_disconn_adapt_layer(get_caif_conf(), adap_layer);
 }
 EXPORT_SYMBOL(caif_disconnect_client);
 
diff --git a/net/caif/cfcnfg.c b/net/caif/cfcnfg.c
index c873e3d4387c..d52f2566916e 100644
--- a/net/caif/cfcnfg.c
+++ b/net/caif/cfcnfg.c
@@ -51,12 +51,12 @@ struct cfcnfg {
 	struct cfcnfg_phyinfo phy_layers[MAX_PHY_LAYERS];
 };
 
-static void cncfg_linkup_rsp(struct cflayer *layer, u8 linkid,
+static void cfcnfg_linkup_rsp(struct cflayer *layer, u8 channel_id,
 			     enum cfctrl_srv serv, u8 phyid,
 			     struct cflayer *adapt_layer);
-static void cncfg_linkdestroy_rsp(struct cflayer *layer, u8 linkid,
+static void cfcnfg_linkdestroy_rsp(struct cflayer *layer, u8 channel_id,
 				  struct cflayer *client_layer);
-static void cncfg_reject_rsp(struct cflayer *layer, u8 linkid,
+static void cfcnfg_reject_rsp(struct cflayer *layer, u8 channel_id,
 			     struct cflayer *adapt_layer);
 static void cfctrl_resp_func(void);
 static void cfctrl_enum_resp(void);
@@ -82,13 +82,13 @@ struct cfcnfg *cfcnfg_create(void)
 	resp = cfctrl_get_respfuncs(this->ctrl);
 	resp->enum_rsp = cfctrl_enum_resp;
 	resp->linkerror_ind = cfctrl_resp_func;
-	resp->linkdestroy_rsp = cncfg_linkdestroy_rsp;
+	resp->linkdestroy_rsp = cfcnfg_linkdestroy_rsp;
 	resp->sleep_rsp = cfctrl_resp_func;
 	resp->wake_rsp = cfctrl_resp_func;
 	resp->restart_rsp = cfctrl_resp_func;
 	resp->radioset_rsp = cfctrl_resp_func;
-	resp->linksetup_rsp = cncfg_linkup_rsp;
-	resp->reject_rsp = cncfg_reject_rsp;
+	resp->linksetup_rsp = cfcnfg_linkup_rsp;
+	resp->reject_rsp = cfcnfg_reject_rsp;
 
 	this->last_phyid = 1;
 
@@ -191,8 +191,7 @@ int cfcnfg_get_named(struct cfcnfg *cnfg, char *name)
  *	 4) Link-Error - (no response)
  *	      Not handled, but this should be a CAIF PROTOCOL ERROR
  */
-
-int cfcnfg_del_adapt_layer(struct cfcnfg *cnfg, struct cflayer *adap_layer)
+int cfcnfg_disconn_adapt_layer(struct cfcnfg *cnfg, struct cflayer *adap_layer)
 {
 	u8 channel_id = 0;
 	int ret = 0;
@@ -246,9 +245,9 @@ end:
 	return ret;
 
 }
-EXPORT_SYMBOL(cfcnfg_del_adapt_layer);
+EXPORT_SYMBOL(cfcnfg_disconn_adapt_layer);
 
-static void cncfg_linkdestroy_rsp(struct cflayer *layer, u8 linkid,
+static void cfcnfg_linkdestroy_rsp(struct cflayer *layer, u8 channel_id,
 				  struct cflayer *client_layer)
 {
 	struct cfcnfg *cnfg = container_obj(layer);
@@ -258,20 +257,20 @@ static void cncfg_linkdestroy_rsp(struct cflayer *layer, u8 linkid,
 	 * 1) Remove service from the MUX layer. The MUX must
 	 *    guarante that no more payload sent "upwards" (receive)
 	 */
-	servl = cfmuxl_remove_uplayer(cnfg->mux, linkid);
+	servl = cfmuxl_remove_uplayer(cnfg->mux, channel_id);
 
 	if (servl == NULL) {
 		pr_err("CAIF: %s(): PROTOCOL ERROR "
-		       "- Error removing service_layer Linkid(%d)",
-			__func__, linkid);
+		       "- Error removing service_layer Channel_Id(%d)",
+			__func__, channel_id);
 		return;
 	}
-	caif_assert(linkid == servl->id);
+	caif_assert(channel_id == servl->id);
 
 	if (servl != client_layer && servl->up != client_layer) {
 		pr_err("CAIF: %s(): Error removing service_layer "
-		       "Linkid(%d) %p %p",
-			__func__, linkid, (void *) servl,
+		       "Channel_Id(%d) %p %p",
+			__func__, channel_id, (void *) servl,
 			(void *) client_layer);
 		return;
 	}
@@ -345,7 +344,7 @@ cfcnfg_add_adaptation_layer(struct cfcnfg *cnfg,
 }
 EXPORT_SYMBOL(cfcnfg_add_adaptation_layer);
 
-static void cncfg_reject_rsp(struct cflayer *layer, u8 linkid,
+static void cfcnfg_reject_rsp(struct cflayer *layer, u8 channel_id,
 			     struct cflayer *adapt_layer)
 {
 	if (adapt_layer != NULL && adapt_layer->ctrlcmd != NULL)
@@ -354,7 +353,7 @@ static void cncfg_reject_rsp(struct cflayer *layer, u8 linkid,
 }
 
 static void
-cncfg_linkup_rsp(struct cflayer *layer, u8 linkid, enum cfctrl_srv serv,
+cfcnfg_linkup_rsp(struct cflayer *layer, u8 channel_id, enum cfctrl_srv serv,
 		 u8 phyid, struct cflayer *adapt_layer)
 {
 	struct cfcnfg *cnfg = container_obj(layer);
@@ -383,26 +382,26 @@ cncfg_linkup_rsp(struct cflayer *layer, u8 linkid, enum cfctrl_srv serv,
 					     _CAIF_MODEMCMD_PHYIF_USEFULL);
 
 	}
-	adapt_layer->id = linkid;
+	adapt_layer->id = channel_id;
 
 	switch (serv) {
 	case CFCTRL_SRV_VEI:
-		servicel = cfvei_create(linkid, &phyinfo->dev_info);
+		servicel = cfvei_create(channel_id, &phyinfo->dev_info);
 		break;
 	case CFCTRL_SRV_DATAGRAM:
-		servicel = cfdgml_create(linkid, &phyinfo->dev_info);
+		servicel = cfdgml_create(channel_id, &phyinfo->dev_info);
 		break;
 	case CFCTRL_SRV_RFM:
-		servicel = cfrfml_create(linkid, &phyinfo->dev_info);
+		servicel = cfrfml_create(channel_id, &phyinfo->dev_info);
 		break;
 	case CFCTRL_SRV_UTIL:
-		servicel = cfutill_create(linkid, &phyinfo->dev_info);
+		servicel = cfutill_create(channel_id, &phyinfo->dev_info);
 		break;
 	case CFCTRL_SRV_VIDEO:
-		servicel = cfvidl_create(linkid, &phyinfo->dev_info);
+		servicel = cfvidl_create(channel_id, &phyinfo->dev_info);
 		break;
 	case CFCTRL_SRV_DBG:
-		servicel = cfdbgl_create(linkid, &phyinfo->dev_info);
+		servicel = cfdbgl_create(channel_id, &phyinfo->dev_info);
 		break;
 	default:
 		pr_err("CAIF: %s(): Protocol error. "
@@ -415,7 +414,7 @@ cncfg_linkup_rsp(struct cflayer *layer, u8 linkid, enum cfctrl_srv serv,
 		return;
 	}
 	layer_set_dn(servicel, cnfg->mux);
-	cfmuxl_set_uplayer(cnfg->mux, servicel, linkid);
+	cfmuxl_set_uplayer(cnfg->mux, servicel, channel_id);
 	layer_set_up(servicel, adapt_layer);
 	layer_set_dn(adapt_layer, servicel);
 	servicel->ctrlcmd(servicel, CAIF_CTRLCMD_INIT_RSP, 0);
-- 
cgit v1.2.2


From 5b2086567503f9b55136642031ec0067319f58e0 Mon Sep 17 00:00:00 2001
From: Sjur Braendeland <sjur.brandeland@stericsson.com>
Date: Wed, 28 Apr 2010 08:54:36 +0000
Subject: caif: Add reference counting to service layer

Changes:
o Added functions cfsrvl_get and cfsrvl_put.
o Added support release_client to use by socket and net device.
o Increase reference counting for in-flight packets from cfmuxl

Signed-off-by: Sjur Braendeland <sjur.brandeland@stericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/caif/caif_dev.c | 6 ++++++
 net/caif/cfcnfg.c   | 7 +++++++
 net/caif/cfmuxl.c   | 7 ++++++-
 net/caif/cfsrvl.c   | 7 +++++++
 4 files changed, 26 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/caif/caif_dev.c b/net/caif/caif_dev.c
index be1f674a3b67..0145bae0274f 100644
--- a/net/caif/caif_dev.c
+++ b/net/caif/caif_dev.c
@@ -346,6 +346,12 @@ int caif_disconnect_client(struct cflayer *adap_layer)
 }
 EXPORT_SYMBOL(caif_disconnect_client);
 
+void caif_release_client(struct cflayer *adap_layer)
+{
+       cfcnfg_release_adap_layer(adap_layer);
+}
+EXPORT_SYMBOL(caif_release_client);
+
 /* Per-namespace Caif devices handling */
 static int caif_init_net(struct net *net)
 {
diff --git a/net/caif/cfcnfg.c b/net/caif/cfcnfg.c
index d52f2566916e..f94f3dfe85c1 100644
--- a/net/caif/cfcnfg.c
+++ b/net/caif/cfcnfg.c
@@ -247,6 +247,13 @@ end:
 }
 EXPORT_SYMBOL(cfcnfg_disconn_adapt_layer);
 
+void cfcnfg_release_adap_layer(struct cflayer *adap_layer)
+{
+	if (adap_layer->dn)
+		cfsrvl_put(adap_layer->dn);
+}
+EXPORT_SYMBOL(cfcnfg_release_adap_layer);
+
 static void cfcnfg_linkdestroy_rsp(struct cflayer *layer, u8 channel_id,
 				  struct cflayer *client_layer)
 {
diff --git a/net/caif/cfmuxl.c b/net/caif/cfmuxl.c
index 6fb9f9e96cf8..7372f27f1d32 100644
--- a/net/caif/cfmuxl.c
+++ b/net/caif/cfmuxl.c
@@ -62,6 +62,7 @@ int cfmuxl_set_uplayer(struct cflayer *layr, struct cflayer *up, u8 linkid)
 {
 	struct cfmuxl *muxl = container_obj(layr);
 	spin_lock(&muxl->receive_lock);
+	cfsrvl_get(up);
 	list_add(&up->node, &muxl->srvl_list);
 	spin_unlock(&muxl->receive_lock);
 	return 0;
@@ -172,8 +173,11 @@ struct cflayer *cfmuxl_remove_uplayer(struct cflayer *layr, u8 id)
 	struct cfmuxl *muxl = container_obj(layr);
 	spin_lock(&muxl->receive_lock);
 	up = get_up(muxl, id);
+	if (up == NULL)
+		return NULL;
 	memset(muxl->up_cache, 0, sizeof(muxl->up_cache));
 	list_del(&up->node);
+	cfsrvl_put(up);
 	spin_unlock(&muxl->receive_lock);
 	return up;
 }
@@ -203,8 +207,9 @@ static int cfmuxl_receive(struct cflayer *layr, struct cfpkt *pkt)
 		 */
 		return /* CFGLU_EPROT; */ 0;
 	}
-
+	cfsrvl_get(up);
 	ret = up->receive(up, pkt);
+	cfsrvl_put(up);
 	return ret;
 }
 
diff --git a/net/caif/cfsrvl.c b/net/caif/cfsrvl.c
index d470c51c6431..aff31f34528f 100644
--- a/net/caif/cfsrvl.c
+++ b/net/caif/cfsrvl.c
@@ -158,6 +158,13 @@ void cfsrvl_init(struct cfsrvl *service,
 	service->layer.ctrlcmd = cfservl_ctrlcmd;
 	service->layer.modemcmd = cfservl_modemcmd;
 	service->dev_info = *dev_info;
+	kref_init(&service->ref);
+}
+
+void cfsrvl_release(struct kref *kref)
+{
+	struct cfsrvl *service = container_of(kref, struct cfsrvl, ref);
+	kfree(service);
 }
 
 bool cfsrvl_ready(struct cfsrvl *service, int *err)
-- 
cgit v1.2.2


From 8d545c8f958f5f433c50a00762ce1f231ed56eee Mon Sep 17 00:00:00 2001
From: Sjur Braendeland <sjur.brandeland@stericsson.com>
Date: Wed, 28 Apr 2010 08:54:37 +0000
Subject: caif: Disconnect without waiting for response

Changes:
o Function cfcnfg_disconn_adapt_layer is changed to do asynchronous
  disconnect, not waiting for any response from the modem. Due to this
  the function cfcnfg_linkdestroy_rsp does nothing anymore.
o Because disconnect may take down a connection before a connect response
  is received the function cfcnfg_linkup_rsp is checking if the client is
  still waiting for the response, if not a disconnect request is sent to
  the modem.
o cfctrl is no longer keeping track of pending disconnect requests.
o Added function cfctrl_cancel_req, which is used for deleting a pending
  connect request if disconnect is done before connect response is received.
o Removed unused function cfctrl_insert_req2
o Added better handling of connect reject from modem.

Signed-off-by: Sjur Braendeland <sjur.brandeland@stericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/caif/cfcnfg.c | 155 ++++++++++++++++--------------------------------------
 net/caif/cfctrl.c |  95 +++++++++++++++++++++------------
 2 files changed, 107 insertions(+), 143 deletions(-)

(limited to 'net')

diff --git a/net/caif/cfcnfg.c b/net/caif/cfcnfg.c
index f94f3dfe85c1..471c62939fad 100644
--- a/net/caif/cfcnfg.c
+++ b/net/caif/cfcnfg.c
@@ -54,8 +54,7 @@ struct cfcnfg {
 static void cfcnfg_linkup_rsp(struct cflayer *layer, u8 channel_id,
 			     enum cfctrl_srv serv, u8 phyid,
 			     struct cflayer *adapt_layer);
-static void cfcnfg_linkdestroy_rsp(struct cflayer *layer, u8 channel_id,
-				  struct cflayer *client_layer);
+static void cfcnfg_linkdestroy_rsp(struct cflayer *layer, u8 channel_id);
 static void cfcnfg_reject_rsp(struct cflayer *layer, u8 channel_id,
 			     struct cflayer *adapt_layer);
 static void cfctrl_resp_func(void);
@@ -175,73 +174,65 @@ int cfcnfg_get_named(struct cfcnfg *cnfg, char *name)
 	return 0;
 }
 
-/*
- * NOTE: What happens on destroy failure:
- *	 1a) No response - Too early
- *	      This will not happen because enumerate has already
- *	      completed.
- *	 1b) No response - FATAL
- *	      Not handled, but this should be a CAIF PROTOCOL ERROR
- *	      Modem error, response is really expected -  this
- *	      case is not really handled.
- *	 2) O/E-bit indicate error
- *	      Ignored - this link is destroyed anyway.
- *	 3) Not able to match on request
- *	      Not handled, but this should be a CAIF PROTOCOL ERROR
- *	 4) Link-Error - (no response)
- *	      Not handled, but this should be a CAIF PROTOCOL ERROR
- */
 int cfcnfg_disconn_adapt_layer(struct cfcnfg *cnfg, struct cflayer *adap_layer)
 {
 	u8 channel_id = 0;
 	int ret = 0;
+	struct cflayer *servl = NULL;
 	struct cfcnfg_phyinfo *phyinfo = NULL;
 	u8 phyid = 0;
-
 	caif_assert(adap_layer != NULL);
 	channel_id = adap_layer->id;
-	if (channel_id == 0) {
+	if (adap_layer->dn == NULL || channel_id == 0) {
 		pr_err("CAIF: %s():adap_layer->id is 0\n", __func__);
 		ret = -ENOTCONN;
 		goto end;
 	}
-
-	if (adap_layer->dn == NULL) {
-		pr_err("CAIF: %s():adap_layer->dn is NULL\n", __func__);
-		ret = -ENODEV;
-		goto end;
-	}
-
-	if (adap_layer->dn != NULL)
-		phyid = cfsrvl_getphyid(adap_layer->dn);
-
-	phyinfo = cfcnfg_get_phyinfo(cnfg, phyid);
-	if (phyinfo == NULL) {
-		pr_warning("CAIF: %s(): No interface to send disconnect to\n",
-			   __func__);
-		ret = -ENODEV;
+	servl = cfmuxl_remove_uplayer(cnfg->mux, channel_id);
+	if (servl == NULL)
 		goto end;
-	}
-
-	if (phyinfo->id != phyid
-		|| phyinfo->phy_layer->id != phyid
-		|| phyinfo->frm_layer->id != phyid) {
-
-		pr_err("CAIF: %s(): Inconsistency in phy registration\n",
-			__func__);
+	layer_set_up(servl, NULL);
+	ret = cfctrl_linkdown_req(cnfg->ctrl, channel_id, adap_layer);
+	if (servl == NULL) {
+		pr_err("CAIF: %s(): PROTOCOL ERROR "
+		       "- Error removing service_layer Channel_Id(%d)",
+			__func__, channel_id);
 		ret = -EINVAL;
 		goto end;
 	}
+	caif_assert(channel_id == servl->id);
+	if (adap_layer->dn != NULL) {
+		phyid = cfsrvl_getphyid(adap_layer->dn);
 
-	ret = cfctrl_linkdown_req(cnfg->ctrl, channel_id, adap_layer);
-
-end:
+		phyinfo = cfcnfg_get_phyinfo(cnfg, phyid);
+		if (phyinfo == NULL) {
+			pr_warning("CAIF: %s(): "
+				"No interface to send disconnect to\n",
+				__func__);
+			ret = -ENODEV;
+			goto end;
+		}
+		if (phyinfo->id != phyid ||
+			phyinfo->phy_layer->id != phyid ||
+			phyinfo->frm_layer->id != phyid) {
+			pr_err("CAIF: %s(): "
+				"Inconsistency in phy registration\n",
+				__func__);
+			ret = -EINVAL;
+			goto end;
+		}
+	}
 	if (phyinfo != NULL && --phyinfo->phy_ref_count == 0 &&
 		phyinfo->phy_layer != NULL &&
 		phyinfo->phy_layer->modemcmd != NULL) {
 		phyinfo->phy_layer->modemcmd(phyinfo->phy_layer,
 					     _CAIF_MODEMCMD_PHYIF_USELESS);
 	}
+end:
+	cfsrvl_put(servl);
+	cfctrl_cancel_req(cnfg->ctrl, adap_layer);
+	if (adap_layer->ctrlcmd != NULL)
+		adap_layer->ctrlcmd(adap_layer, CAIF_CTRLCMD_DEINIT_RSP, 0);
 	return ret;
 
 }
@@ -254,69 +245,11 @@ void cfcnfg_release_adap_layer(struct cflayer *adap_layer)
 }
 EXPORT_SYMBOL(cfcnfg_release_adap_layer);
 
-static void cfcnfg_linkdestroy_rsp(struct cflayer *layer, u8 channel_id,
-				  struct cflayer *client_layer)
+static void cfcnfg_linkdestroy_rsp(struct cflayer *layer, u8 channel_id)
 {
-	struct cfcnfg *cnfg = container_obj(layer);
-	struct cflayer *servl;
-
-	/*
-	 * 1) Remove service from the MUX layer. The MUX must
-	 *    guarante that no more payload sent "upwards" (receive)
-	 */
-	servl = cfmuxl_remove_uplayer(cnfg->mux, channel_id);
-
-	if (servl == NULL) {
-		pr_err("CAIF: %s(): PROTOCOL ERROR "
-		       "- Error removing service_layer Channel_Id(%d)",
-			__func__, channel_id);
-		return;
-	}
-	caif_assert(channel_id == servl->id);
-
-	if (servl != client_layer && servl->up != client_layer) {
-		pr_err("CAIF: %s(): Error removing service_layer "
-		       "Channel_Id(%d) %p %p",
-			__func__, channel_id, (void *) servl,
-			(void *) client_layer);
-		return;
-	}
-
-	/*
-	 * 2) DEINIT_RSP must guarantee that no more packets are transmitted
-	 *    from client (adap_layer) when it returns.
-	 */
-
-	if (servl->ctrlcmd == NULL) {
-		pr_err("CAIF: %s(): Error servl->ctrlcmd == NULL", __func__);
-		return;
-	}
-
-	servl->ctrlcmd(servl, CAIF_CTRLCMD_DEINIT_RSP, 0);
-
-	/* 3) It is now safe to destroy the service layer. */
-	cfservl_destroy(servl);
 }
 
-/*
- * NOTE: What happens on linksetup failure:
- *	 1a) No response - Too early
- *	      This will not happen because enumerate is secured
- *	      before using interface.
- *	 1b) No response - FATAL
- *	      Not handled, but this should be a CAIF PROTOCOL ERROR
- *	      Modem error, response is really expected -  this case is
- *	      not really handled.
- *	 2) O/E-bit indicate error
- *	      Handled in cnfg_reject_rsp
- *	 3) Not able to match on request
- *	      Not handled, but this should be a CAIF PROTOCOL ERROR
- *	 4) Link-Error - (no response)
- *	      Not handled, but this should be a CAIF PROTOCOL ERROR
- */
-
-int
-cfcnfg_add_adaptation_layer(struct cfcnfg *cnfg,
+int cfcnfg_add_adaptation_layer(struct cfcnfg *cnfg,
 				struct cfctrl_link_param *param,
 				struct cflayer *adap_layer)
 {
@@ -346,8 +279,7 @@ cfcnfg_add_adaptation_layer(struct cfcnfg *cnfg,
 		     param->phyid);
 	/* FIXME: ENUMERATE INITIALLY WHEN ACTIVATING PHYSICAL INTERFACE */
 	cfctrl_enum_req(cnfg->ctrl, param->phyid);
-	cfctrl_linkup_request(cnfg->ctrl, param, adap_layer);
-	return 0;
+	return cfctrl_linkup_request(cnfg->ctrl, param, adap_layer);
 }
 EXPORT_SYMBOL(cfcnfg_add_adaptation_layer);
 
@@ -367,8 +299,10 @@ cfcnfg_linkup_rsp(struct cflayer *layer, u8 channel_id, enum cfctrl_srv serv,
 	struct cflayer *servicel = NULL;
 	struct cfcnfg_phyinfo *phyinfo;
 	if (adapt_layer == NULL) {
-		pr_err("CAIF: %s(): PROTOCOL ERROR "
-			"- LinkUp Request/Response did not match\n", __func__);
+		pr_debug("CAIF: %s(): link setup response "
+				"but no client exist, send linkdown back\n",
+				__func__);
+		cfctrl_linkdown_req(cnfg->ctrl, channel_id, NULL);
 		return;
 	}
 
@@ -424,6 +358,7 @@ cfcnfg_linkup_rsp(struct cflayer *layer, u8 channel_id, enum cfctrl_srv serv,
 	cfmuxl_set_uplayer(cnfg->mux, servicel, channel_id);
 	layer_set_up(servicel, adapt_layer);
 	layer_set_dn(adapt_layer, servicel);
+	cfsrvl_get(servicel);
 	servicel->ctrlcmd(servicel, CAIF_CTRLCMD_INIT_RSP, 0);
 }
 
diff --git a/net/caif/cfctrl.c b/net/caif/cfctrl.c
index 11f80140f3cb..a521d32cfe56 100644
--- a/net/caif/cfctrl.c
+++ b/net/caif/cfctrl.c
@@ -32,6 +32,7 @@ static void cfctrl_ctrlcmd(struct cflayer *layr, enum caif_ctrlcmd ctrl,
 
 struct cflayer *cfctrl_create(void)
 {
+	struct dev_info dev_info;
 	struct cfctrl *this =
 		kmalloc(sizeof(struct cfctrl), GFP_ATOMIC);
 	if (!this) {
@@ -39,12 +40,13 @@ struct cflayer *cfctrl_create(void)
 		return NULL;
 	}
 	caif_assert(offsetof(struct cfctrl, serv.layer) == 0);
+	memset(&dev_info, 0, sizeof(dev_info));
+	dev_info.id = 0xff;
 	memset(this, 0, sizeof(*this));
+	cfsrvl_init(&this->serv, 0, &dev_info);
 	spin_lock_init(&this->info_list_lock);
 	atomic_set(&this->req_seq_no, 1);
 	atomic_set(&this->rsp_seq_no, 1);
-	this->serv.dev_info.id = 0xff;
-	this->serv.layer.id = 0;
 	this->serv.layer.receive = cfctrl_recv;
 	sprintf(this->serv.layer.name, "ctrl");
 	this->serv.layer.ctrlcmd = cfctrl_ctrlcmd;
@@ -127,20 +129,6 @@ void cfctrl_insert_req(struct cfctrl *ctrl,
 	spin_unlock(&ctrl->info_list_lock);
 }
 
-static void cfctrl_insert_req2(struct cfctrl *ctrl, enum cfctrl_cmd cmd,
-			       u8 linkid, struct cflayer *user_layer)
-{
-	struct cfctrl_request_info *req = kmalloc(sizeof(*req), GFP_KERNEL);
-	if (!req) {
-		pr_warning("CAIF: %s(): Out of memory\n", __func__);
-		return;
-	}
-	req->client_layer = user_layer;
-	req->cmd = cmd;
-	req->channel_id = linkid;
-	cfctrl_insert_req(ctrl, req);
-}
-
 /* Compare and remove request */
 struct cfctrl_request_info *cfctrl_remove_req(struct cfctrl *ctrl,
 					      struct cfctrl_request_info *req)
@@ -234,7 +222,7 @@ void cfctrl_enum_req(struct cflayer *layer, u8 physlinkid)
 	}
 }
 
-void cfctrl_linkup_request(struct cflayer *layer,
+int cfctrl_linkup_request(struct cflayer *layer,
 			   struct cfctrl_link_param *param,
 			   struct cflayer *user_layer)
 {
@@ -248,7 +236,7 @@ void cfctrl_linkup_request(struct cflayer *layer,
 	struct cfpkt *pkt = cfpkt_create(CFPKT_CTRL_PKT_LEN);
 	if (!pkt) {
 		pr_warning("CAIF: %s(): Out of memory\n", __func__);
-		return;
+		return -ENOMEM;
 	}
 	cfpkt_addbdy(pkt, CFCTRL_CMD_LINK_SETUP);
 	cfpkt_addbdy(pkt, (param->chtype << 4) + param->linktype);
@@ -294,11 +282,12 @@ void cfctrl_linkup_request(struct cflayer *layer,
 	default:
 		pr_warning("CAIF: %s():Request setup of bad link type = %d\n",
 			   __func__, param->linktype);
+		return -EINVAL;
 	}
 	req = kmalloc(sizeof(*req), GFP_KERNEL);
 	if (!req) {
 		pr_warning("CAIF: %s(): Out of memory\n", __func__);
-		return;
+		return -ENOMEM;
 	}
 	memset(req, 0, sizeof(*req));
 	req->client_layer = user_layer;
@@ -306,6 +295,11 @@ void cfctrl_linkup_request(struct cflayer *layer,
 	req->param = *param;
 	cfctrl_insert_req(cfctrl, req);
 	init_info(cfpkt_info(pkt), cfctrl);
+	/*
+	 * NOTE:Always send linkup and linkdown request on the same
+	 *	device as the payload. Otherwise old queued up payload
+	 *	might arrive with the newly allocated channel ID.
+	 */
 	cfpkt_info(pkt)->dev_info->id = param->phyid;
 	ret =
 	    cfctrl->serv.layer.dn->transmit(cfctrl->serv.layer.dn, pkt);
@@ -313,7 +307,9 @@ void cfctrl_linkup_request(struct cflayer *layer,
 		pr_err("CAIF: %s(): Could not transmit linksetup request\n",
 			__func__);
 		cfpkt_destroy(pkt);
+		return -ENODEV;
 	}
+	return 0;
 }
 
 int cfctrl_linkdown_req(struct cflayer *layer, u8 channelid,
@@ -326,7 +322,6 @@ int cfctrl_linkdown_req(struct cflayer *layer, u8 channelid,
 		pr_warning("CAIF: %s(): Out of memory\n", __func__);
 		return -ENOMEM;
 	}
-	cfctrl_insert_req2(cfctrl, CFCTRL_CMD_LINK_DESTROY, channelid, client);
 	cfpkt_addbdy(pkt, CFCTRL_CMD_LINK_DESTROY);
 	cfpkt_addbdy(pkt, channelid);
 	init_info(cfpkt_info(pkt), cfctrl);
@@ -392,6 +387,38 @@ void cfctrl_getstartreason_req(struct cflayer *layer)
 }
 
 
+void cfctrl_cancel_req(struct cflayer *layr, struct cflayer *adap_layer)
+{
+	struct cfctrl_request_info *p, *req;
+	struct cfctrl *ctrl = container_obj(layr);
+	spin_lock(&ctrl->info_list_lock);
+
+	if (ctrl->first_req == NULL) {
+		spin_unlock(&ctrl->info_list_lock);
+		return;
+	}
+
+	if (ctrl->first_req->client_layer == adap_layer) {
+
+		req = ctrl->first_req;
+		ctrl->first_req = ctrl->first_req->next;
+		kfree(req);
+	}
+
+	p = ctrl->first_req;
+	while (p != NULL && p->next != NULL) {
+		if (p->next->client_layer == adap_layer) {
+
+			req = p->next;
+			p->next = p->next->next;
+			kfree(p->next);
+		}
+		p = p->next;
+	}
+
+	spin_unlock(&ctrl->info_list_lock);
+}
+
 static int cfctrl_recv(struct cflayer *layer, struct cfpkt *pkt)
 {
 	u8 cmdrsp;
@@ -409,11 +436,8 @@ static int cfctrl_recv(struct cflayer *layer, struct cfpkt *pkt)
 	cmd = cmdrsp & CFCTRL_CMD_MASK;
 	if (cmd != CFCTRL_CMD_LINK_ERR
 	    && CFCTRL_RSP_BIT != (CFCTRL_RSP_BIT & cmdrsp)) {
-		if (handle_loop(cfctrl, cmd, pkt) == CAIF_FAILURE) {
-			pr_info("CAIF: %s() CAIF Protocol error:"
-				"Response bit not set\n", __func__);
-			goto error;
-		}
+		if (handle_loop(cfctrl, cmd, pkt) == CAIF_FAILURE)
+			cmdrsp |= CFCTRL_ERR_BIT;
 	}
 
 	switch (cmd) {
@@ -451,12 +475,16 @@ static int cfctrl_recv(struct cflayer *layer, struct cfpkt *pkt)
 			switch (serv) {
 			case CFCTRL_SRV_VEI:
 			case CFCTRL_SRV_DBG:
+				if (CFCTRL_ERR_BIT & cmdrsp)
+					break;
 				/* Link ID */
 				cfpkt_extr_head(pkt, &linkid, 1);
 				break;
 			case CFCTRL_SRV_VIDEO:
 				cfpkt_extr_head(pkt, &tmp, 1);
 				linkparam.u.video.connid = tmp;
+				if (CFCTRL_ERR_BIT & cmdrsp)
+					break;
 				/* Link ID */
 				cfpkt_extr_head(pkt, &linkid, 1);
 				break;
@@ -465,6 +493,8 @@ static int cfctrl_recv(struct cflayer *layer, struct cfpkt *pkt)
 				cfpkt_extr_head(pkt, &tmp32, 4);
 				linkparam.u.datagram.connid =
 				    le32_to_cpu(tmp32);
+				if (CFCTRL_ERR_BIT & cmdrsp)
+					break;
 				/* Link ID */
 				cfpkt_extr_head(pkt, &linkid, 1);
 				break;
@@ -483,6 +513,8 @@ static int cfctrl_recv(struct cflayer *layer, struct cfpkt *pkt)
 					*cp++ = tmp;
 				*cp = '\0';
 
+				if (CFCTRL_ERR_BIT & cmdrsp)
+					break;
 				/* Link ID */
 				cfpkt_extr_head(pkt, &linkid, 1);
 
@@ -519,6 +551,8 @@ static int cfctrl_recv(struct cflayer *layer, struct cfpkt *pkt)
 					cfpkt_extr_head(pkt, &tmp, 1);
 					*cp++ = tmp;
 				}
+				if (CFCTRL_ERR_BIT & cmdrsp)
+					break;
 				/* Link ID */
 				cfpkt_extr_head(pkt, &linkid, 1);
 				/* Length */
@@ -560,13 +594,7 @@ static int cfctrl_recv(struct cflayer *layer, struct cfpkt *pkt)
 		break;
 	case CFCTRL_CMD_LINK_DESTROY:
 		cfpkt_extr_head(pkt, &linkid, 1);
-		rsp.cmd = cmd;
-		rsp.channel_id = linkid;
-		req = cfctrl_remove_req(cfctrl, &rsp);
-		cfctrl->res.linkdestroy_rsp(cfctrl->serv.layer.up, linkid,
-					    req ? req->client_layer : NULL);
-		if (req != NULL)
-			kfree(req);
+		cfctrl->res.linkdestroy_rsp(cfctrl->serv.layer.up, linkid);
 		break;
 	case CFCTRL_CMD_LINK_ERR:
 		pr_err("CAIF: %s(): Frame Error Indication received\n",
@@ -608,7 +636,7 @@ static void cfctrl_ctrlcmd(struct cflayer *layr, enum caif_ctrlcmd ctrl,
 	case CAIF_CTRLCMD_FLOW_OFF_IND:
 		spin_lock(&this->info_list_lock);
 		if (this->first_req != NULL) {
-			pr_warning("CAIF: %s(): Received flow off in "
+			pr_debug("CAIF: %s(): Received flow off in "
 				   "control layer", __func__);
 		}
 		spin_unlock(&this->info_list_lock);
@@ -633,6 +661,7 @@ static int handle_loop(struct cfctrl *ctrl, int cmd, struct cfpkt *pkt)
 			if (!ctrl->loop_linkused[linkid])
 				goto found;
 		spin_unlock(&ctrl->loop_linkid_lock);
+		pr_err("CAIF: %s(): Out of link-ids\n", __func__);
 		return -EINVAL;
 found:
 		if (!ctrl->loop_linkused[linkid])
-- 
cgit v1.2.2


From bece7b2398d073d11b2e352405a3ecd3a1e39c60 Mon Sep 17 00:00:00 2001
From: Sjur Braendeland <sjur.brandeland@stericsson.com>
Date: Wed, 28 Apr 2010 08:54:38 +0000
Subject: caif: Rewritten socket implementation

Changes:
 This is a complete re-write of the socket layer. Making the socket
 implementation more aligned with the other socket layers and using more
 of the support functions available in sock.c. Lots of code is copied
 from af_unix (and some from af_irda).
 Non-blocking mode should be working as well.

Signed-off-by: Sjur Braendeland <sjur.brandeland@stericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/caif/caif_socket.c | 1795 ++++++++++++++++++++++--------------------------
 1 file changed, 828 insertions(+), 967 deletions(-)

(limited to 'net')

diff --git a/net/caif/caif_socket.c b/net/caif/caif_socket.c
index d455375789fb..c3a70c5c893a 100644
--- a/net/caif/caif_socket.c
+++ b/net/caif/caif_socket.c
@@ -1,7 +1,6 @@
 /*
  * Copyright (C) ST-Ericsson AB 2010
  * Author:	Sjur Brendeland sjur.brandeland@stericsson.com
- *		Per Sigmond per.sigmond@stericsson.com
  * License terms: GNU General Public License (GPL) version 2
  */
 
@@ -16,91 +15,52 @@
 #include <linux/poll.h>
 #include <linux/tcp.h>
 #include <linux/uaccess.h>
-#include <asm/atomic.h>
-
+#include <linux/mutex.h>
+#include <linux/debugfs.h>
 #include <linux/caif/caif_socket.h>
+#include <asm/atomic.h>
+#include <net/sock.h>
+#include <net/tcp_states.h>
 #include <net/caif/caif_layer.h>
 #include <net/caif/caif_dev.h>
 #include <net/caif/cfpkt.h>
 
 MODULE_LICENSE("GPL");
+MODULE_ALIAS_NETPROTO(AF_CAIF);
+
+#define CAIF_DEF_SNDBUF (CAIF_MAX_PAYLOAD_SIZE*10)
+#define CAIF_DEF_RCVBUF (CAIF_MAX_PAYLOAD_SIZE*100)
+
+/*
+ * CAIF state is re-using the TCP socket states.
+ * caif_states stored in sk_state reflect the state as reported by
+ * the CAIF stack, while sk_socket->state is the state of the socket.
+ */
+enum caif_states {
+	CAIF_CONNECTED		= TCP_ESTABLISHED,
+	CAIF_CONNECTING	= TCP_SYN_SENT,
+	CAIF_DISCONNECTED	= TCP_CLOSE
+};
+
+#define TX_FLOW_ON_BIT	1
+#define RX_FLOW_ON_BIT	2
 
-#define CHNL_SKT_READ_QUEUE_HIGH 200
-#define CHNL_SKT_READ_QUEUE_LOW 100
-
-static int caif_sockbuf_size = 40000;
-static atomic_t caif_nr_socks = ATOMIC_INIT(0);
-
-#define CONN_STATE_OPEN_BIT	      1
-#define CONN_STATE_PENDING_BIT	      2
-#define CONN_STATE_PEND_DESTROY_BIT   3
-#define CONN_REMOTE_SHUTDOWN_BIT      4
-
-#define TX_FLOW_ON_BIT		      1
-#define RX_FLOW_ON_BIT		      2
-
-#define STATE_IS_OPEN(cf_sk) test_bit(CONN_STATE_OPEN_BIT,\
-				    (void *) &(cf_sk)->conn_state)
-#define STATE_IS_REMOTE_SHUTDOWN(cf_sk) test_bit(CONN_REMOTE_SHUTDOWN_BIT,\
-				    (void *) &(cf_sk)->conn_state)
-#define STATE_IS_PENDING(cf_sk) test_bit(CONN_STATE_PENDING_BIT,\
-				       (void *) &(cf_sk)->conn_state)
-#define STATE_IS_PENDING_DESTROY(cf_sk) test_bit(CONN_STATE_PEND_DESTROY_BIT,\
-				       (void *) &(cf_sk)->conn_state)
-
-#define SET_STATE_PENDING_DESTROY(cf_sk) set_bit(CONN_STATE_PEND_DESTROY_BIT,\
-				    (void *) &(cf_sk)->conn_state)
-#define SET_STATE_OPEN(cf_sk) set_bit(CONN_STATE_OPEN_BIT,\
-				    (void *) &(cf_sk)->conn_state)
-#define SET_STATE_CLOSED(cf_sk) clear_bit(CONN_STATE_OPEN_BIT,\
-					(void *) &(cf_sk)->conn_state)
-#define SET_PENDING_ON(cf_sk) set_bit(CONN_STATE_PENDING_BIT,\
-				    (void *) &(cf_sk)->conn_state)
-#define SET_PENDING_OFF(cf_sk) clear_bit(CONN_STATE_PENDING_BIT,\
-				       (void *) &(cf_sk)->conn_state)
-#define SET_REMOTE_SHUTDOWN(cf_sk) set_bit(CONN_REMOTE_SHUTDOWN_BIT,\
-				    (void *) &(cf_sk)->conn_state)
-
-#define SET_REMOTE_SHUTDOWN_OFF(dev) clear_bit(CONN_REMOTE_SHUTDOWN_BIT,\
-				    (void *) &(dev)->conn_state)
-#define RX_FLOW_IS_ON(cf_sk) test_bit(RX_FLOW_ON_BIT,\
-				    (void *) &(cf_sk)->flow_state)
-#define TX_FLOW_IS_ON(cf_sk) test_bit(TX_FLOW_ON_BIT,\
-				    (void *) &(cf_sk)->flow_state)
-
-#define SET_RX_FLOW_OFF(cf_sk) clear_bit(RX_FLOW_ON_BIT,\
-				       (void *) &(cf_sk)->flow_state)
-#define SET_RX_FLOW_ON(cf_sk) set_bit(RX_FLOW_ON_BIT,\
-				    (void *) &(cf_sk)->flow_state)
-#define SET_TX_FLOW_OFF(cf_sk) clear_bit(TX_FLOW_ON_BIT,\
-				       (void *) &(cf_sk)->flow_state)
-#define SET_TX_FLOW_ON(cf_sk) set_bit(TX_FLOW_ON_BIT,\
-				    (void *) &(cf_sk)->flow_state)
-
-#define SKT_READ_FLAG 0x01
-#define SKT_WRITE_FLAG 0x02
 static struct dentry *debugfsdir;
-#include <linux/debugfs.h>
 
 #ifdef CONFIG_DEBUG_FS
 struct debug_fs_counter {
-	atomic_t num_open;
-	atomic_t num_close;
-	atomic_t num_init;
-	atomic_t num_init_resp;
-	atomic_t num_init_fail_resp;
-	atomic_t num_deinit;
-	atomic_t num_deinit_resp;
+	atomic_t caif_nr_socks;
+	atomic_t num_connect_req;
+	atomic_t num_connect_resp;
+	atomic_t num_connect_fail_resp;
+	atomic_t num_disconnect;
 	atomic_t num_remote_shutdown_ind;
 	atomic_t num_tx_flow_off_ind;
 	atomic_t num_tx_flow_on_ind;
 	atomic_t num_rx_flow_off;
 	atomic_t num_rx_flow_on;
-	atomic_t skb_in_use;
-	atomic_t skb_alloc;
-	atomic_t skb_free;
 };
-static struct debug_fs_counter cnt;
+struct debug_fs_counter cnt;
 #define	dbfs_atomic_inc(v) atomic_inc(v)
 #define	dbfs_atomic_dec(v) atomic_dec(v)
 #else
@@ -108,624 +68,666 @@ static struct debug_fs_counter cnt;
 #define	dbfs_atomic_dec(v)
 #endif
 
-/* The AF_CAIF socket */
 struct caifsock {
-	/* NOTE: sk has to be the first member */
-	struct sock sk;
+	struct sock sk; /* must be first member */
 	struct cflayer layer;
-	char name[CAIF_LAYER_NAME_SZ];
-	u32 conn_state;
+	char name[CAIF_LAYER_NAME_SZ]; /* Used for debugging */
 	u32 flow_state;
-	struct cfpktq *pktq;
-	int file_mode;
 	struct caif_connect_request conn_req;
-	int read_queue_len;
-	/* protect updates of read_queue_len */
-	spinlock_t read_queue_len_lock;
+	struct mutex readlock;
 	struct dentry *debugfs_socket_dir;
 };
 
-static void drain_queue(struct caifsock *cf_sk);
+static int rx_flow_is_on(struct caifsock *cf_sk)
+{
+	return test_bit(RX_FLOW_ON_BIT,
+			(void *) &cf_sk->flow_state);
+}
+
+static int tx_flow_is_on(struct caifsock *cf_sk)
+{
+	return test_bit(TX_FLOW_ON_BIT,
+			(void *) &cf_sk->flow_state);
+}
 
-/* Packet Receive Callback function called from CAIF Stack */
-static int caif_sktrecv_cb(struct cflayer *layr, struct cfpkt *pkt)
+static void set_rx_flow_off(struct caifsock *cf_sk)
 {
-	struct caifsock *cf_sk;
-	int read_queue_high;
-	cf_sk = container_of(layr, struct caifsock, layer);
+	 clear_bit(RX_FLOW_ON_BIT,
+		 (void *) &cf_sk->flow_state);
+}
 
-	if (!STATE_IS_OPEN(cf_sk)) {
-		/*FIXME: This should be allowed finally!*/
-		pr_debug("CAIF: %s(): called after close request\n", __func__);
-		cfpkt_destroy(pkt);
-		return 0;
-	}
-	/* NOTE: This function may be called in Tasklet context! */
+static void set_rx_flow_on(struct caifsock *cf_sk)
+{
+	 set_bit(RX_FLOW_ON_BIT,
+			(void *) &cf_sk->flow_state);
+}
 
-	/* The queue has its own lock */
-	cfpkt_queue(cf_sk->pktq, pkt, 0);
+static void set_tx_flow_off(struct caifsock *cf_sk)
+{
+	 clear_bit(TX_FLOW_ON_BIT,
+		(void *) &cf_sk->flow_state);
+}
 
-	spin_lock(&cf_sk->read_queue_len_lock);
-	cf_sk->read_queue_len++;
+static void set_tx_flow_on(struct caifsock *cf_sk)
+{
+	 set_bit(TX_FLOW_ON_BIT,
+		(void *) &cf_sk->flow_state);
+}
 
-	read_queue_high = (cf_sk->read_queue_len > CHNL_SKT_READ_QUEUE_HIGH);
-	spin_unlock(&cf_sk->read_queue_len_lock);
+static void caif_read_lock(struct sock *sk)
+{
+	struct caifsock *cf_sk;
+	cf_sk = container_of(sk, struct caifsock, sk);
+	mutex_lock(&cf_sk->readlock);
+}
 
-	if (RX_FLOW_IS_ON(cf_sk) && read_queue_high) {
-		dbfs_atomic_inc(&cnt.num_rx_flow_off);
-		SET_RX_FLOW_OFF(cf_sk);
+static void caif_read_unlock(struct sock *sk)
+{
+	struct caifsock *cf_sk;
+	cf_sk = container_of(sk, struct caifsock, sk);
+	mutex_unlock(&cf_sk->readlock);
+}
 
-		/* Send flow off (NOTE: must not sleep) */
-		pr_debug("CAIF: %s():"
-			" sending flow OFF (queue len = %d)\n",
-			__func__,
-		     cf_sk->read_queue_len);
-		caif_assert(cf_sk->layer.dn);
-		caif_assert(cf_sk->layer.dn->ctrlcmd);
+int sk_rcvbuf_lowwater(struct caifsock *cf_sk)
+{
+	/* A quarter of full buffer is used a low water mark */
+	return cf_sk->sk.sk_rcvbuf / 4;
+}
 
-		(void) cf_sk->layer.dn->modemcmd(cf_sk->layer.dn,
-					       CAIF_MODEMCMD_FLOW_OFF_REQ);
-	}
+void caif_flow_ctrl(struct sock *sk, int mode)
+{
+	struct caifsock *cf_sk;
+	cf_sk = container_of(sk, struct caifsock, sk);
+	if (cf_sk->layer.dn)
+		cf_sk->layer.dn->modemcmd(cf_sk->layer.dn, mode);
+}
 
-	/* Signal reader that data is available. */
+/*
+ * Copied from sock.c:sock_queue_rcv_skb(), but changed so packets are
+ * not dropped, but CAIF is sending flow off instead.
+ */
+int caif_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
+{
+	int err;
+	int skb_len;
+	unsigned long flags;
+	struct sk_buff_head *list = &sk->sk_receive_queue;
+	struct caifsock *cf_sk = container_of(sk, struct caifsock, sk);
 
-	wake_up_interruptible(sk_sleep(&cf_sk->sk));
+	if (atomic_read(&sk->sk_rmem_alloc) + skb->truesize >=
+		(unsigned)sk->sk_rcvbuf && rx_flow_is_on(cf_sk)) {
+		trace_printk("CAIF: %s():"
+			" sending flow OFF (queue len = %d %d)\n",
+			__func__,
+			atomic_read(&cf_sk->sk.sk_rmem_alloc),
+			sk_rcvbuf_lowwater(cf_sk));
+		set_rx_flow_off(cf_sk);
+		if (cf_sk->layer.dn)
+			cf_sk->layer.dn->modemcmd(cf_sk->layer.dn,
+						CAIF_MODEMCMD_FLOW_OFF_REQ);
+	}
 
+	err = sk_filter(sk, skb);
+	if (err)
+		return err;
+	if (!sk_rmem_schedule(sk, skb->truesize) && rx_flow_is_on(cf_sk)) {
+		set_rx_flow_off(cf_sk);
+		trace_printk("CAIF: %s():"
+			" sending flow OFF due to rmem_schedule\n",
+			__func__);
+		if (cf_sk->layer.dn)
+			cf_sk->layer.dn->modemcmd(cf_sk->layer.dn,
+						CAIF_MODEMCMD_FLOW_OFF_REQ);
+	}
+	skb->dev = NULL;
+	skb_set_owner_r(skb, sk);
+	/* Cache the SKB length before we tack it onto the receive
+	 * queue. Once it is added it no longer belongs to us and
+	 * may be freed by other threads of control pulling packets
+	 * from the queue.
+	 */
+	skb_len = skb->len;
+	spin_lock_irqsave(&list->lock, flags);
+	if (!sock_flag(sk, SOCK_DEAD))
+		__skb_queue_tail(list, skb);
+	spin_unlock_irqrestore(&list->lock, flags);
+
+	if (!sock_flag(sk, SOCK_DEAD))
+		sk->sk_data_ready(sk, skb_len);
+	else
+		kfree_skb(skb);
 	return 0;
 }
 
-/* Packet Flow Control Callback function called from CAIF */
-static void caif_sktflowctrl_cb(struct cflayer *layr,
-				enum caif_ctrlcmd flow,
-				int phyid)
+/* Packet Receive Callback function called from CAIF Stack */
+static int caif_sktrecv_cb(struct cflayer *layr, struct cfpkt *pkt)
 {
 	struct caifsock *cf_sk;
-
-	/* NOTE: This function may be called in Tasklet context! */
-	pr_debug("CAIF: %s(): flowctrl func called: %s.\n",
-		      __func__,
-		      flow == CAIF_CTRLCMD_FLOW_ON_IND ? "ON" :
-		      flow == CAIF_CTRLCMD_FLOW_OFF_IND ? "OFF" :
-		      flow == CAIF_CTRLCMD_INIT_RSP ? "INIT_RSP" :
-		      flow == CAIF_CTRLCMD_DEINIT_RSP ? "DEINIT_RSP" :
-		      flow == CAIF_CTRLCMD_INIT_FAIL_RSP ? "INIT_FAIL_RSP" :
-		      flow ==
-		      CAIF_CTRLCMD_REMOTE_SHUTDOWN_IND ? "REMOTE_SHUTDOWN" :
-		      "UKNOWN CTRL COMMAND");
-
-	if (layr == NULL)
-		return;
+	struct sk_buff *skb;
 
 	cf_sk = container_of(layr, struct caifsock, layer);
+	skb = cfpkt_tonative(pkt);
+
+	if (unlikely(cf_sk->sk.sk_state != CAIF_CONNECTED)) {
+		cfpkt_destroy(pkt);
+		return 0;
+	}
+	caif_queue_rcv_skb(&cf_sk->sk, skb);
+	return 0;
+}
 
+/* Packet Control Callback function called from CAIF */
+static void caif_ctrl_cb(struct cflayer *layr,
+				enum caif_ctrlcmd flow,
+				int phyid)
+{
+	struct caifsock *cf_sk = container_of(layr, struct caifsock, layer);
 	switch (flow) {
 	case CAIF_CTRLCMD_FLOW_ON_IND:
+		/* OK from modem to start sending again */
 		dbfs_atomic_inc(&cnt.num_tx_flow_on_ind);
-		/* Signal reader that data is available. */
-		SET_TX_FLOW_ON(cf_sk);
-		wake_up_interruptible(sk_sleep(&cf_sk->sk));
+		set_tx_flow_on(cf_sk);
+		cf_sk->sk.sk_state_change(&cf_sk->sk);
 		break;
 
 	case CAIF_CTRLCMD_FLOW_OFF_IND:
+		/* Modem asks us to shut up */
 		dbfs_atomic_inc(&cnt.num_tx_flow_off_ind);
-		SET_TX_FLOW_OFF(cf_sk);
+		set_tx_flow_off(cf_sk);
+		cf_sk->sk.sk_state_change(&cf_sk->sk);
 		break;
 
 	case CAIF_CTRLCMD_INIT_RSP:
-		dbfs_atomic_inc(&cnt.num_init_resp);
-		/* Signal reader that data is available. */
-		caif_assert(STATE_IS_OPEN(cf_sk));
-		SET_PENDING_OFF(cf_sk);
-		SET_TX_FLOW_ON(cf_sk);
-		wake_up_interruptible(sk_sleep(&cf_sk->sk));
+		/* We're now connected */
+		dbfs_atomic_inc(&cnt.num_connect_resp);
+		cf_sk->sk.sk_state = CAIF_CONNECTED;
+		set_tx_flow_on(cf_sk);
+		cf_sk->sk.sk_state_change(&cf_sk->sk);
 		break;
 
 	case CAIF_CTRLCMD_DEINIT_RSP:
-		dbfs_atomic_inc(&cnt.num_deinit_resp);
-		caif_assert(!STATE_IS_OPEN(cf_sk));
-		SET_PENDING_OFF(cf_sk);
-		if (!STATE_IS_PENDING_DESTROY(cf_sk)) {
-			if (sk_sleep(&cf_sk->sk) != NULL)
-				wake_up_interruptible(sk_sleep(&cf_sk->sk));
-		}
-		dbfs_atomic_inc(&cnt.num_deinit);
-		sock_put(&cf_sk->sk);
+		/* We're now disconnected */
+		cf_sk->sk.sk_state = CAIF_DISCONNECTED;
+		cf_sk->sk.sk_state_change(&cf_sk->sk);
+		cfcnfg_release_adap_layer(&cf_sk->layer);
 		break;
 
 	case CAIF_CTRLCMD_INIT_FAIL_RSP:
-		dbfs_atomic_inc(&cnt.num_init_fail_resp);
-		caif_assert(STATE_IS_OPEN(cf_sk));
-		SET_STATE_CLOSED(cf_sk);
-		SET_PENDING_OFF(cf_sk);
-		SET_TX_FLOW_OFF(cf_sk);
-		wake_up_interruptible(sk_sleep(&cf_sk->sk));
+		/* Connect request failed */
+		dbfs_atomic_inc(&cnt.num_connect_fail_resp);
+		cf_sk->sk.sk_err = ECONNREFUSED;
+		cf_sk->sk.sk_state = CAIF_DISCONNECTED;
+		cf_sk->sk.sk_shutdown = SHUTDOWN_MASK;
+		/*
+		 * Socket "standards" seems to require POLLOUT to
+		 * be set at connect failure.
+		 */
+		set_tx_flow_on(cf_sk);
+		cf_sk->sk.sk_state_change(&cf_sk->sk);
 		break;
 
 	case CAIF_CTRLCMD_REMOTE_SHUTDOWN_IND:
+		/* Modem has closed this connection, or device is down. */
 		dbfs_atomic_inc(&cnt.num_remote_shutdown_ind);
-		SET_REMOTE_SHUTDOWN(cf_sk);
-		/* Use sk_shutdown to indicate remote shutdown indication */
-		cf_sk->sk.sk_shutdown |= RCV_SHUTDOWN;
-		cf_sk->file_mode = 0;
-		wake_up_interruptible(sk_sleep(&cf_sk->sk));
+		cf_sk->sk.sk_shutdown = SHUTDOWN_MASK;
+		cf_sk->sk.sk_err = ECONNRESET;
+		set_rx_flow_on(cf_sk);
+		cf_sk->sk.sk_error_report(&cf_sk->sk);
 		break;
 
 	default:
 		pr_debug("CAIF: %s(): Unexpected flow command %d\n",
-			      __func__, flow);
+				__func__, flow);
 	}
 }
 
-static void skb_destructor(struct sk_buff *skb)
+static void caif_check_flow_release(struct sock *sk)
 {
-	dbfs_atomic_inc(&cnt.skb_free);
-	dbfs_atomic_dec(&cnt.skb_in_use);
-}
+	struct caifsock *cf_sk = container_of(sk, struct caifsock, sk);
 
+	if (cf_sk->layer.dn == NULL || cf_sk->layer.dn->modemcmd == NULL)
+		return;
+	if (rx_flow_is_on(cf_sk))
+		return;
 
-static int caif_recvmsg(struct kiocb *iocb, struct socket *sock,
+	if (atomic_read(&sk->sk_rmem_alloc) <= sk_rcvbuf_lowwater(cf_sk)) {
+			dbfs_atomic_inc(&cnt.num_rx_flow_on);
+			set_rx_flow_on(cf_sk);
+			cf_sk->layer.dn->modemcmd(cf_sk->layer.dn,
+						CAIF_MODEMCMD_FLOW_ON_REQ);
+	}
+}
+/*
+ * Copied from sock.c:sock_queue_rcv_skb(), and added check that user buffer
+ * has sufficient size.
+ */
+
+static int caif_seqpkt_recvmsg(struct kiocb *iocb, struct socket *sock,
 				struct msghdr *m, size_t buf_len, int flags)
 
 {
 	struct sock *sk = sock->sk;
-	struct caifsock *cf_sk = container_of(sk, struct caifsock, sk);
-	struct cfpkt *pkt = NULL;
-	size_t len;
-	int result;
 	struct sk_buff *skb;
-	ssize_t ret = -EIO;
-	int read_queue_low;
-
-	if (cf_sk == NULL) {
-		pr_debug("CAIF: %s(): private_data not set!\n",
-			      __func__);
-		ret = -EBADFD;
-		goto read_error;
-	}
-
-	/* Don't do multiple iovec entries yet */
-	if (m->msg_iovlen != 1)
-		return -EOPNOTSUPP;
+	int ret = 0;
+	int len;
 
 	if (unlikely(!buf_len))
 		return -EINVAL;
 
-	lock_sock(&(cf_sk->sk));
-
-	caif_assert(cf_sk->pktq);
-
-	if (!STATE_IS_OPEN(cf_sk)) {
-		/* Socket is closed or closing. */
-		if (!STATE_IS_PENDING(cf_sk)) {
-			pr_debug("CAIF: %s(): socket is closed (by remote)\n",
-				 __func__);
-			ret = -EPIPE;
-		} else {
-			pr_debug("CAIF: %s(): socket is closing..\n", __func__);
-			ret = -EBADF;
-		}
+	skb = skb_recv_datagram(sk, flags, 0 , &ret);
+	if (!skb)
 		goto read_error;
-	}
-	/* Socket is open or opening. */
-	if (STATE_IS_PENDING(cf_sk)) {
-		pr_debug("CAIF: %s(): socket is opening...\n", __func__);
-
-		if (flags & MSG_DONTWAIT) {
-			/* We can't block. */
-			pr_debug("CAIF: %s():state pending and MSG_DONTWAIT\n",
-				 __func__);
-			ret = -EAGAIN;
-			goto read_error;
-		}
 
+	len = skb->len;
+
+	if (skb && skb->len > buf_len && !(flags & MSG_PEEK)) {
+		len = buf_len;
 		/*
-		 * Blocking mode; state is pending and we need to wait
-		 * for its conclusion.
+		 * Push skb back on receive queue if buffer too small.
+		 * This has a built-in race where multi-threaded receive
+		 * may get packet in wrong order, but multiple read does
+		 * not really guarantee ordered delivery anyway.
+		 * Let's optimize for speed without taking locks.
 		 */
-		release_sock(&cf_sk->sk);
-
-		result =
-		    wait_event_interruptible(*sk_sleep(&cf_sk->sk),
-					     !STATE_IS_PENDING(cf_sk));
 
-		lock_sock(&(cf_sk->sk));
-
-		if (result == -ERESTARTSYS) {
-			pr_debug("CAIF: %s(): wait_event_interruptible"
-				 " woken by a signal (1)", __func__);
-			ret = -ERESTARTSYS;
-			goto read_error;
-		}
+		skb_queue_head(&sk->sk_receive_queue, skb);
+		ret = -EMSGSIZE;
+		goto read_error;
 	}
 
-	if (STATE_IS_REMOTE_SHUTDOWN(cf_sk) ||
-		!STATE_IS_OPEN(cf_sk) ||
-		STATE_IS_PENDING(cf_sk)) {
-
-		pr_debug("CAIF: %s(): socket closed\n",
-			__func__);
-		ret = -ESHUTDOWN;
+	ret = skb_copy_datagram_iovec(skb, 0, m->msg_iov, len);
+	if (ret)
 		goto read_error;
-	}
 
-	/*
-	 * Block if we don't have any received buffers.
-	 * The queue has its own lock.
-	 */
-	while ((pkt = cfpkt_qpeek(cf_sk->pktq)) == NULL) {
+	skb_free_datagram(sk, skb);
 
-		if (flags & MSG_DONTWAIT) {
-			pr_debug("CAIF: %s(): MSG_DONTWAIT\n", __func__);
-			ret = -EAGAIN;
-			goto read_error;
-		}
-		trace_printk("CAIF: %s() wait_event\n", __func__);
+	caif_check_flow_release(sk);
 
-		/* Let writers in. */
-		release_sock(&cf_sk->sk);
+	return len;
 
-		/* Block reader until data arrives or socket is closed. */
-		if (wait_event_interruptible(*sk_sleep(&cf_sk->sk),
-					cfpkt_qpeek(cf_sk->pktq)
-					|| STATE_IS_REMOTE_SHUTDOWN(cf_sk)
-					|| !STATE_IS_OPEN(cf_sk)) ==
-		    -ERESTARTSYS) {
-			pr_debug("CAIF: %s():"
-				" wait_event_interruptible woken by "
-				"a signal, signal_pending(current) = %d\n",
-				__func__,
-				signal_pending(current));
-			return -ERESTARTSYS;
-		}
+read_error:
+	return ret;
+}
 
-		trace_printk("CAIF: %s() awake\n", __func__);
-		if (STATE_IS_REMOTE_SHUTDOWN(cf_sk)) {
-			pr_debug("CAIF: %s(): "
-				 "received remote_shutdown indication\n",
-				 __func__);
-			ret = -ESHUTDOWN;
-			goto read_error_no_unlock;
-		}
 
-		/* I want to be alone on cf_sk (except status and queue). */
-		lock_sock(&(cf_sk->sk));
+/* Copied from unix_stream_wait_data, identical except for lock call. */
+static long caif_stream_data_wait(struct sock *sk, long timeo)
+{
+	DEFINE_WAIT(wait);
+	lock_sock(sk);
+
+	for (;;) {
+		prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
+
+		if (!skb_queue_empty(&sk->sk_receive_queue) ||
+			sk->sk_err ||
+			sk->sk_state != CAIF_CONNECTED ||
+			sock_flag(sk, SOCK_DEAD) ||
+			(sk->sk_shutdown & RCV_SHUTDOWN) ||
+			signal_pending(current) ||
+			!timeo)
+			break;
 
-		if (!STATE_IS_OPEN(cf_sk)) {
-			/* Someone closed the link, report error. */
-			pr_debug("CAIF: %s(): remote end shutdown!\n",
-				      __func__);
-			ret = -EPIPE;
-			goto read_error;
-		}
+		set_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags);
+		release_sock(sk);
+		timeo = schedule_timeout(timeo);
+		lock_sock(sk);
+		clear_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags);
 	}
 
-	/* The queue has its own lock. */
-	len = cfpkt_getlen(pkt);
-
-	/* Check max length that can be copied. */
-	if (len <= buf_len)
-		pkt = cfpkt_dequeue(cf_sk->pktq);
-	else {
-		pr_debug("CAIF: %s(): user buffer too small (%ld,%ld)\n",
-			 __func__, (long) len, (long) buf_len);
-		if (sock->type == SOCK_SEQPACKET) {
-			ret = -EMSGSIZE;
-			goto read_error;
-		}
-		len = buf_len;
-	}
+	finish_wait(sk_sleep(sk), &wait);
+	release_sock(sk);
+	return timeo;
+}
 
 
-	spin_lock(&cf_sk->read_queue_len_lock);
-	cf_sk->read_queue_len--;
-	read_queue_low = (cf_sk->read_queue_len < CHNL_SKT_READ_QUEUE_LOW);
-	spin_unlock(&cf_sk->read_queue_len_lock);
+/*
+ * Copied from unix_stream_recvmsg, but removed credit checks,
+ * changed locking calls, changed address handling.
+ */
+static int caif_stream_recvmsg(struct kiocb *iocb, struct socket *sock,
+				struct msghdr *msg, size_t size,
+				int flags)
+{
+	struct sock *sk = sock->sk;
+	int copied = 0;
+	int target;
+	int err = 0;
+	long timeo;
 
-	if (!RX_FLOW_IS_ON(cf_sk) && read_queue_low) {
-		dbfs_atomic_inc(&cnt.num_rx_flow_on);
-		SET_RX_FLOW_ON(cf_sk);
+	err = -EOPNOTSUPP;
+	if (flags&MSG_OOB)
+		goto out;
 
-		/* Send flow on. */
-		pr_debug("CAIF: %s(): sending flow ON (queue len = %d)\n",
-			 __func__, cf_sk->read_queue_len);
-		caif_assert(cf_sk->layer.dn);
-		caif_assert(cf_sk->layer.dn->ctrlcmd);
-		(void) cf_sk->layer.dn->modemcmd(cf_sk->layer.dn,
-					       CAIF_MODEMCMD_FLOW_ON_REQ);
+	msg->msg_namelen = 0;
 
-		caif_assert(cf_sk->read_queue_len >= 0);
-	}
+	/*
+	 * Lock the socket to prevent queue disordering
+	 * while sleeps in memcpy_tomsg
+	 */
+	err = -EAGAIN;
+	if (sk->sk_state == CAIF_CONNECTING)
+		goto out;
 
-	skb = cfpkt_tonative(pkt);
-	result = skb_copy_datagram_iovec(skb, 0, m->msg_iov, len);
-	skb_pull(skb, len);
+	caif_read_lock(sk);
+	target = sock_rcvlowat(sk, flags&MSG_WAITALL, size);
+	timeo = sock_rcvtimeo(sk, flags&MSG_DONTWAIT);
 
-	if (result) {
-		pr_debug("CAIF: %s(): copy to_iovec failed\n", __func__);
-		cfpkt_destroy(pkt);
-		ret = -EFAULT;
-		goto read_error;
-	}
+	do {
+		int chunk;
+		struct sk_buff *skb;
 
-	/* Free packet and remove from queue */
-	if (skb->len == 0)
-		skb_free_datagram(sk, skb);
+		lock_sock(sk);
+		skb = skb_dequeue(&sk->sk_receive_queue);
+		caif_check_flow_release(sk);
 
-	/* Let the others in. */
-	release_sock(&cf_sk->sk);
-	return len;
+		if (skb == NULL) {
+			if (copied >= target)
+				goto unlock;
+			/*
+			 *	POSIX 1003.1g mandates this order.
+			 */
+			err = sock_error(sk);
+			if (err)
+				goto unlock;
+			err = -ECONNRESET;
+			if (sk->sk_shutdown & RCV_SHUTDOWN)
+				goto unlock;
 
-read_error:
-	release_sock(&cf_sk->sk);
-read_error_no_unlock:
-	return ret;
-}
+			err = -EPIPE;
+			if (sk->sk_state != CAIF_CONNECTED)
+				goto unlock;
+			if (sock_flag(sk, SOCK_DEAD))
+				goto unlock;
 
-/* Send a signal as a consequence of sendmsg, sendto or caif_sendmsg. */
-static int caif_sendmsg(struct kiocb *kiocb, struct socket *sock,
-			struct msghdr *msg, size_t len)
-{
+			release_sock(sk);
 
-	struct sock *sk = sock->sk;
-	struct caifsock *cf_sk = container_of(sk, struct caifsock, sk);
-	size_t payload_size = msg->msg_iov->iov_len;
-	struct cfpkt *pkt = NULL;
-	struct caif_payload_info info;
-	unsigned char *txbuf;
-	ssize_t ret = -EIO;
-	int result;
-	struct sk_buff *skb;
-	caif_assert(msg->msg_iovlen == 1);
+			err = -EAGAIN;
+			if (!timeo)
+				break;
 
-	if (cf_sk == NULL) {
-		pr_debug("CAIF: %s(): private_data not set!\n",
-			      __func__);
-		ret = -EBADFD;
-		goto write_error_no_unlock;
-	}
+			caif_read_unlock(sk);
 
-	if (unlikely(msg->msg_iov->iov_base == NULL)) {
-		pr_warning("CAIF: %s(): Buffer is NULL.\n", __func__);
-		ret = -EINVAL;
-		goto write_error_no_unlock;
-	}
+			timeo = caif_stream_data_wait(sk, timeo);
 
-	if (payload_size > CAIF_MAX_PAYLOAD_SIZE) {
-		pr_debug("CAIF: %s(): buffer too long\n", __func__);
-		if (sock->type == SOCK_SEQPACKET) {
-			ret = -EINVAL;
-			goto write_error_no_unlock;
+			if (signal_pending(current)) {
+				err = sock_intr_errno(timeo);
+				goto out;
+			}
+			caif_read_lock(sk);
+			continue;
+unlock:
+			release_sock(sk);
+			break;
 		}
-		payload_size = CAIF_MAX_PAYLOAD_SIZE;
-	}
+		release_sock(sk);
+		chunk = min_t(unsigned int, skb->len, size);
+		if (memcpy_toiovec(msg->msg_iov, skb->data, chunk)) {
+			skb_queue_head(&sk->sk_receive_queue, skb);
+			if (copied == 0)
+				copied = -EFAULT;
+			break;
+		}
+		copied += chunk;
+		size -= chunk;
 
-	/* I want to be alone on cf_sk (except status and queue) */
-	lock_sock(&(cf_sk->sk));
+		/* Mark read part of skb as used */
+		if (!(flags & MSG_PEEK)) {
+			skb_pull(skb, chunk);
 
-	caif_assert(cf_sk->pktq);
+			/* put the skb back if we didn't use it up. */
+			if (skb->len) {
+				skb_queue_head(&sk->sk_receive_queue, skb);
+				break;
+			}
+			kfree_skb(skb);
 
-	if (!STATE_IS_OPEN(cf_sk)) {
-		/* Socket is closed or closing */
-		if (!STATE_IS_PENDING(cf_sk)) {
-			pr_debug("CAIF: %s(): socket is closed (by remote)\n",
-				 __func__);
-			ret = -EPIPE;
 		} else {
-			pr_debug("CAIF: %s(): socket is closing...\n",
-				 __func__);
-			ret = -EBADF;
-		}
-		goto write_error;
-	}
-
-	/* Socket is open or opening */
-	if (STATE_IS_PENDING(cf_sk)) {
-		pr_debug("CAIF: %s(): socket is opening...\n", __func__);
-
-		if (msg->msg_flags & MSG_DONTWAIT) {
-			/* We can't block */
-			trace_printk("CAIF: %s():state pending:"
-				     "state=MSG_DONTWAIT\n", __func__);
-			ret = -EAGAIN;
-			goto write_error;
+			/*
+			 * It is questionable, see note in unix_dgram_recvmsg.
+			 */
+			/* put message back and return */
+			skb_queue_head(&sk->sk_receive_queue, skb);
+			break;
 		}
-		/* Let readers in */
-		release_sock(&cf_sk->sk);
-
-		/*
-		 * Blocking mode; state is pending and we need to wait
-		 * for its conclusion.
-		 */
-		result =
-		    wait_event_interruptible(*sk_sleep(&cf_sk->sk),
-					     !STATE_IS_PENDING(cf_sk));
-		/* I want to be alone on cf_sk (except status and queue) */
-		lock_sock(&(cf_sk->sk));
+	} while (size);
+	caif_read_unlock(sk);
 
-		if (result == -ERESTARTSYS) {
-			pr_debug("CAIF: %s(): wait_event_interruptible"
-				 " woken by a signal (1)", __func__);
-			ret = -ERESTARTSYS;
-			goto write_error;
-		}
-	}
-	if (STATE_IS_REMOTE_SHUTDOWN(cf_sk) ||
-		!STATE_IS_OPEN(cf_sk) ||
-		STATE_IS_PENDING(cf_sk)) {
+out:
+	return copied ? : err;
+}
 
-		pr_debug("CAIF: %s(): socket closed\n",
-			__func__);
-		ret = -ESHUTDOWN;
-		goto write_error;
+/*
+ * Copied from sock.c:sock_wait_for_wmem, but change to wait for
+ * CAIF flow-on and sock_writable.
+ */
+static long caif_wait_for_flow_on(struct caifsock *cf_sk,
+				int wait_writeable, long timeo, int *err)
+{
+	struct sock *sk = &cf_sk->sk;
+	DEFINE_WAIT(wait);
+	for (;;) {
+		*err = 0;
+		if (tx_flow_is_on(cf_sk) &&
+			(!wait_writeable || sock_writeable(&cf_sk->sk)))
+			break;
+		*err = -ETIMEDOUT;
+		if (!timeo)
+			break;
+		*err = -ERESTARTSYS;
+		if (signal_pending(current))
+			break;
+		prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
+		*err = -ECONNRESET;
+		if (sk->sk_shutdown & SHUTDOWN_MASK)
+			break;
+		*err = -sk->sk_err;
+		if (sk->sk_err)
+			break;
+		*err = -EPIPE;
+		if (cf_sk->sk.sk_state != CAIF_CONNECTED)
+			break;
+		timeo = schedule_timeout(timeo);
 	}
+	finish_wait(sk_sleep(sk), &wait);
+	return timeo;
+}
 
-	if (!TX_FLOW_IS_ON(cf_sk)) {
+/*
+ * Transmit a SKB. The device may temporarily request re-transmission
+ * by returning EAGAIN.
+ */
+static int transmit_skb(struct sk_buff *skb, struct caifsock *cf_sk,
+			int noblock, long timeo)
+{
+	struct cfpkt *pkt;
+	int ret, loopcnt = 0;
 
-		/* Flow is off. Check non-block flag */
-		if (msg->msg_flags & MSG_DONTWAIT) {
-			trace_printk("CAIF: %s(): MSG_DONTWAIT and tx flow off",
-				 __func__);
-			ret = -EAGAIN;
-			goto write_error;
-		}
+	pkt = cfpkt_fromnative(CAIF_DIR_OUT, skb);
+	memset(cfpkt_info(pkt), 0, sizeof(struct caif_payload_info));
+	do {
 
-		/* release lock before waiting */
-		release_sock(&cf_sk->sk);
+		ret = -ETIMEDOUT;
 
-		/* Wait until flow is on or socket is closed */
-		if (wait_event_interruptible(*sk_sleep(&cf_sk->sk),
-					TX_FLOW_IS_ON(cf_sk)
-					|| !STATE_IS_OPEN(cf_sk)
-					|| STATE_IS_REMOTE_SHUTDOWN(cf_sk)
-					) == -ERESTARTSYS) {
-			pr_debug("CAIF: %s():"
-				 " wait_event_interruptible woken by a signal",
-				 __func__);
-			ret = -ERESTARTSYS;
-			goto write_error_no_unlock;
+		/* Slight paranoia, probably not needed. */
+		if (unlikely(loopcnt++ > 1000)) {
+			pr_warning("CAIF: %s(): transmit retries failed,"
+				" error = %d\n", __func__, ret);
+			break;
 		}
 
-		/* I want to be alone on cf_sk (except status and queue) */
-		lock_sock(&(cf_sk->sk));
-
-		if (!STATE_IS_OPEN(cf_sk)) {
-			/* someone closed the link, report error */
-			pr_debug("CAIF: %s(): remote end shutdown!\n",
-				      __func__);
-			ret = -EPIPE;
-			goto write_error;
+		if (cf_sk->layer.dn != NULL)
+			ret = cf_sk->layer.dn->transmit(cf_sk->layer.dn, pkt);
+		if (likely(ret >= 0))
+			break;
+		/* if transmit return -EAGAIN, then retry */
+		if (noblock && ret == -EAGAIN)
+			break;
+		timeo = caif_wait_for_flow_on(cf_sk, 0, timeo, &ret);
+		if (signal_pending(current)) {
+			ret = sock_intr_errno(timeo);
+			break;
 		}
-
-		if (STATE_IS_REMOTE_SHUTDOWN(cf_sk)) {
-			pr_debug("CAIF: %s(): "
-				 "received remote_shutdown indication\n",
-				 __func__);
-			ret = -ESHUTDOWN;
-			goto write_error;
+		if (ret)
+			break;
+		if (cf_sk->sk.sk_state != CAIF_CONNECTED ||
+			sock_flag(&cf_sk->sk, SOCK_DEAD) ||
+			(cf_sk->sk.sk_shutdown & RCV_SHUTDOWN)) {
+			ret = -EPIPE;
+			cf_sk->sk.sk_err = EPIPE;
+			break;
 		}
-	}
+	} while (ret == -EAGAIN);
+	return ret;
+}
 
-	pkt = cfpkt_create(payload_size);
-	skb = (struct sk_buff *)pkt;
-	skb->destructor = skb_destructor;
-	skb->sk = sk;
-	dbfs_atomic_inc(&cnt.skb_alloc);
-	dbfs_atomic_inc(&cnt.skb_in_use);
-	if (cfpkt_raw_append(pkt, (void **) &txbuf, payload_size) < 0) {
-		pr_debug("CAIF: %s(): cfpkt_raw_append failed\n", __func__);
-		cfpkt_destroy(pkt);
-		ret = -EINVAL;
-		goto write_error;
-	}
+/* Copied from af_unix:unix_dgram_sendmsg, and adapted to CAIF */
+static int caif_seqpkt_sendmsg(struct kiocb *kiocb, struct socket *sock,
+			struct msghdr *msg, size_t len)
+{
+	struct sock *sk = sock->sk;
+	struct caifsock *cf_sk = container_of(sk, struct caifsock, sk);
+	int buffer_size;
+	int ret = 0;
+	struct sk_buff *skb = NULL;
+	int noblock;
+	long timeo;
+	caif_assert(cf_sk);
+	ret = sock_error(sk);
+	if (ret)
+		goto err;
+
+	ret = -EOPNOTSUPP;
+	if (msg->msg_flags&MSG_OOB)
+		goto err;
+
+	ret = -EOPNOTSUPP;
+	if (msg->msg_namelen)
+		goto err;
+
+	ret = -EINVAL;
+	if (unlikely(msg->msg_iov->iov_base == NULL))
+		goto err;
+	noblock = msg->msg_flags & MSG_DONTWAIT;
+
+	buffer_size = len + CAIF_NEEDED_HEADROOM + CAIF_NEEDED_TAILROOM;
+
+	ret = -EMSGSIZE;
+	if (buffer_size > CAIF_MAX_PAYLOAD_SIZE)
+		goto err;
+
+	timeo = sock_sndtimeo(sk, noblock);
+	timeo = caif_wait_for_flow_on(container_of(sk, struct caifsock, sk),
+				1, timeo, &ret);
+
+	ret = -EPIPE;
+	if (cf_sk->sk.sk_state != CAIF_CONNECTED ||
+		sock_flag(sk, SOCK_DEAD) ||
+		(sk->sk_shutdown & RCV_SHUTDOWN))
+		goto err;
+
+	ret = -ENOMEM;
+	skb = sock_alloc_send_skb(sk, buffer_size, noblock, &ret);
+	if (!skb)
+		goto err;
+	skb_reserve(skb, CAIF_NEEDED_HEADROOM);
+
+	ret = memcpy_fromiovec(skb_put(skb, len), msg->msg_iov, len);
+
+	if (ret)
+		goto err;
+	ret = transmit_skb(skb, cf_sk, noblock, timeo);
+	if (ret < 0)
+		goto err;
+	return len;
+err:
+	kfree_skb(skb);
+	return ret;
+}
 
-	/* Copy data into buffer. */
-	if (copy_from_user(txbuf, msg->msg_iov->iov_base, payload_size)) {
-		pr_debug("CAIF: %s(): copy_from_user returned non zero.\n",
-			 __func__);
-		cfpkt_destroy(pkt);
-		ret = -EINVAL;
-		goto write_error;
-	}
-	memset(&info, 0, sizeof(info));
+/*
+ * Copied from unix_stream_sendmsg and adapted to CAIF:
+ * Changed removed permission handling and added waiting for flow on
+ * and other minor adaptations.
+ */
+static int caif_stream_sendmsg(struct kiocb *kiocb, struct socket *sock,
+				struct msghdr *msg, size_t len)
+{
+	struct sock *sk = sock->sk;
+	struct caifsock *cf_sk = container_of(sk, struct caifsock, sk);
+	int err, size;
+	struct sk_buff *skb;
+	int sent = 0;
+	long timeo;
 
-	/* Send the packet down the stack. */
-	caif_assert(cf_sk->layer.dn);
-	caif_assert(cf_sk->layer.dn->transmit);
+	err = -EOPNOTSUPP;
 
-	do {
-		ret = cf_sk->layer.dn->transmit(cf_sk->layer.dn, pkt);
+	if (unlikely(msg->msg_flags&MSG_OOB))
+		goto out_err;
 
-		if (likely((ret >= 0) || (ret != -EAGAIN)))
-			break;
+	if (unlikely(msg->msg_namelen))
+		goto out_err;
 
-		/* EAGAIN - retry */
-		if (msg->msg_flags & MSG_DONTWAIT) {
-			pr_debug("CAIF: %s(): NONBLOCK and transmit failed,"
-				 " error = %ld\n", __func__, (long) ret);
-			ret = -EAGAIN;
-			goto write_error;
-		}
+	timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT);
+	timeo = caif_wait_for_flow_on(cf_sk, 1, timeo, &err);
 
-		/* Let readers in */
-		release_sock(&cf_sk->sk);
+	if (unlikely(sk->sk_shutdown & SEND_SHUTDOWN))
+		goto pipe_err;
 
-		/* Wait until flow is on or socket is closed */
-		if (wait_event_interruptible(*sk_sleep(&cf_sk->sk),
-					TX_FLOW_IS_ON(cf_sk)
-					|| !STATE_IS_OPEN(cf_sk)
-					|| STATE_IS_REMOTE_SHUTDOWN(cf_sk)
-					) == -ERESTARTSYS) {
-			pr_debug("CAIF: %s(): wait_event_interruptible"
-				 " woken by a signal", __func__);
-			ret = -ERESTARTSYS;
-			goto write_error_no_unlock;
-		}
+	while (sent < len) {
 
-		/* I want to be alone on cf_sk (except status and queue) */
-		lock_sock(&(cf_sk->sk));
+		size = len-sent;
 
-	} while (ret == -EAGAIN);
+		if (size > CAIF_MAX_PAYLOAD_SIZE)
+			size = CAIF_MAX_PAYLOAD_SIZE;
 
-	if (ret < 0) {
-		cfpkt_destroy(pkt);
-		pr_debug("CAIF: %s(): transmit failed, error = %ld\n",
-			 __func__, (long) ret);
+		/* If size is more than half of sndbuf, chop up message */
+		if (size > ((sk->sk_sndbuf >> 1) - 64))
+			size = (sk->sk_sndbuf >> 1) - 64;
 
-		goto write_error;
-	}
+		if (size > SKB_MAX_ALLOC)
+			size = SKB_MAX_ALLOC;
 
-	release_sock(&cf_sk->sk);
-	return payload_size;
+		skb = sock_alloc_send_skb(sk,
+					size + CAIF_NEEDED_HEADROOM
+					+ CAIF_NEEDED_TAILROOM,
+					msg->msg_flags&MSG_DONTWAIT,
+					&err);
+		if (skb == NULL)
+			goto out_err;
 
-write_error:
-	release_sock(&cf_sk->sk);
-write_error_no_unlock:
-	return ret;
-}
+		skb_reserve(skb, CAIF_NEEDED_HEADROOM);
+		/*
+		 *	If you pass two values to the sock_alloc_send_skb
+		 *	it tries to grab the large buffer with GFP_NOFS
+		 *	(which can fail easily), and if it fails grab the
+		 *	fallback size buffer which is under a page and will
+		 *	succeed. [Alan]
+		 */
+		size = min_t(int, size, skb_tailroom(skb));
 
-static unsigned int caif_poll(struct file *file, struct socket *sock,
-						poll_table *wait)
-{
-	struct sock *sk = sock->sk;
-	struct caifsock *cf_sk = container_of(sk, struct caifsock, sk);
-	u32 mask = 0;
-	poll_wait(file, sk_sleep(sk), wait);
-	lock_sock(&(cf_sk->sk));
-	if (!STATE_IS_OPEN(cf_sk)) {
-		if (!STATE_IS_PENDING(cf_sk))
-			mask |= POLLHUP;
-	} else {
-		if (cfpkt_qpeek(cf_sk->pktq) != NULL)
-			mask |= (POLLIN | POLLRDNORM);
-		if (TX_FLOW_IS_ON(cf_sk))
-			mask |= (POLLOUT | POLLWRNORM);
+		err = memcpy_fromiovec(skb_put(skb, size), msg->msg_iov, size);
+		if (err) {
+			kfree_skb(skb);
+			goto out_err;
+		}
+		err = transmit_skb(skb, cf_sk,
+				msg->msg_flags&MSG_DONTWAIT, timeo);
+		if (err < 0) {
+			kfree_skb(skb);
+			goto pipe_err;
+		}
+		sent += size;
 	}
-	release_sock(&cf_sk->sk);
-	trace_printk("CAIF: %s(): poll mask=0x%04x\n",
-		      __func__, mask);
-	return mask;
-}
-
-static void drain_queue(struct caifsock *cf_sk)
-{
-	struct cfpkt *pkt = NULL;
-
-	/* Empty the queue */
-	do {
-		/* The queue has its own lock */
-		if (!cf_sk->pktq)
-			break;
-
-		pkt = cfpkt_dequeue(cf_sk->pktq);
-		if (!pkt)
-			break;
-		pr_debug("CAIF: %s(): freeing packet from read queue\n",
-			 __func__);
-		cfpkt_destroy(pkt);
 
-	} while (1);
+	return sent;
 
-	cf_sk->read_queue_len = 0;
+pipe_err:
+	if (sent == 0 && !(msg->msg_flags&MSG_NOSIGNAL))
+		send_sig(SIGPIPE, current, 0);
+	err = -EPIPE;
+out_err:
+	return sent ? : err;
 }
 
 static int setsockopt(struct socket *sock,
@@ -736,19 +738,13 @@ static int setsockopt(struct socket *sock,
 	int prio, linksel;
 	struct ifreq ifreq;
 
-	if (STATE_IS_OPEN(cf_sk)) {
-		pr_debug("CAIF: %s(): setsockopt "
-			 "cannot be done on a connected socket\n",
-			 __func__);
+	if (cf_sk->sk.sk_socket->state != SS_UNCONNECTED)
 		return -ENOPROTOOPT;
-	}
+
 	switch (opt) {
 	case CAIFSO_LINK_SELECT:
-		if (ol < sizeof(int)) {
-			pr_debug("CAIF: %s(): setsockopt"
-				 " CAIFSO_CHANNEL_CONFIG bad size\n", __func__);
+		if (ol < sizeof(int))
 			return -EINVAL;
-		}
 		if (lvl != SOL_CAIF)
 			goto bad_sol;
 		if (copy_from_user(&linksel, ov, sizeof(int)))
@@ -761,28 +757,20 @@ static int setsockopt(struct socket *sock,
 	case SO_PRIORITY:
 		if (lvl != SOL_SOCKET)
 			goto bad_sol;
-		if (ol < sizeof(int)) {
-			pr_debug("CAIF: %s(): setsockopt"
-				 " SO_PRIORITY bad size\n", __func__);
+		if (ol < sizeof(int))
 			return -EINVAL;
-		}
 		if (copy_from_user(&prio, ov, sizeof(int)))
 			return -EINVAL;
 		lock_sock(&(cf_sk->sk));
 		cf_sk->conn_req.priority = prio;
-		pr_debug("CAIF: %s(): Setting sockopt priority=%d\n", __func__,
-			cf_sk->conn_req.priority);
 		release_sock(&cf_sk->sk);
 		return 0;
 
 	case SO_BINDTODEVICE:
 		if (lvl != SOL_SOCKET)
 			goto bad_sol;
-		if (ol < sizeof(struct ifreq)) {
-			pr_debug("CAIF: %s(): setsockopt"
-				 " SO_PRIORITY bad size\n", __func__);
+		if (ol < sizeof(struct ifreq))
 			return -EINVAL;
-		}
 		if (copy_from_user(&ifreq, ov, sizeof(ifreq)))
 			return -EFAULT;
 		lock_sock(&(cf_sk->sk));
@@ -798,359 +786,275 @@ static int setsockopt(struct socket *sock,
 			goto bad_sol;
 		if (cf_sk->sk.sk_protocol != CAIFPROTO_UTIL)
 			return -ENOPROTOOPT;
-		if (ol > sizeof(cf_sk->conn_req.param.data))
-			goto req_param_bad_size;
-
 		lock_sock(&(cf_sk->sk));
 		cf_sk->conn_req.param.size = ol;
-		if (copy_from_user(&cf_sk->conn_req.param.data, ov, ol)) {
+		if (ol > sizeof(cf_sk->conn_req.param.data) ||
+			copy_from_user(&cf_sk->conn_req.param.data, ov, ol)) {
 			release_sock(&cf_sk->sk);
-req_param_bad_size:
-			pr_debug("CAIF: %s(): setsockopt"
-				 " CAIFSO_CHANNEL_CONFIG bad size\n", __func__);
 			return -EINVAL;
 		}
-
 		release_sock(&cf_sk->sk);
 		return 0;
 
 	default:
-		pr_debug("CAIF: %s(): unhandled option %d\n", __func__, opt);
-		return -EINVAL;
+		return -ENOPROTOOPT;
 	}
 
 	return 0;
 bad_sol:
-	pr_debug("CAIF: %s(): setsockopt bad level\n", __func__);
 	return -ENOPROTOOPT;
 
 }
 
-static int caif_connect(struct socket *sock, struct sockaddr *uservaddr,
-	       int sockaddr_len, int flags)
+/*
+ * caif_connect() - Connect a CAIF Socket
+ * Copied and modified af_irda.c:irda_connect().
+ *
+ * Note : by consulting "errno", the user space caller may learn the cause
+ * of the failure. Most of them are visible in the function, others may come
+ * from subroutines called and are listed here :
+ *  o -EAFNOSUPPORT: bad socket family or type.
+ *  o -ESOCKTNOSUPPORT: bad socket type or protocol
+ *  o -EINVAL: bad socket address, or CAIF link type
+ *  o -ECONNREFUSED: remote end refused the connection.
+ *  o -EINPROGRESS: connect request sent but timed out (or non-blocking)
+ *  o -EISCONN: already connected.
+ *  o -ETIMEDOUT: Connection timed out (send timeout)
+ *  o -ENODEV: No link layer to send request
+ *  o -ECONNRESET: Received Shutdown indication or lost link layer
+ *  o -ENOMEM: Out of memory
+ *
+ *  State Strategy:
+ *  o sk_state: holds the CAIF_* protocol state, it's updated by
+ *	caif_ctrl_cb.
+ *  o sock->state: holds the SS_* socket state and is updated by connect and
+ *	disconnect.
+ */
+static int caif_connect(struct socket *sock, struct sockaddr *uaddr,
+			int addr_len, int flags)
 {
-	struct caifsock *cf_sk = NULL;
-	int result = -1;
-	int mode = 0;
-	int ret = -EIO;
 	struct sock *sk = sock->sk;
-	BUG_ON(sk == NULL);
-
-	cf_sk = container_of(sk, struct caifsock, sk);
-
-	trace_printk("CAIF: %s(): cf_sk=%p OPEN=%d, TX_FLOW=%d, RX_FLOW=%d\n",
-		 __func__, cf_sk,
-		STATE_IS_OPEN(cf_sk),
-		TX_FLOW_IS_ON(cf_sk), RX_FLOW_IS_ON(cf_sk));
-
+	struct caifsock *cf_sk = container_of(sk, struct caifsock, sk);
+	long timeo;
+	int err;
+	lock_sock(sk);
 
-	if (sock->type == SOCK_SEQPACKET || sock->type == SOCK_STREAM)
-		sock->state	= SS_CONNECTING;
-	else
+	err = -EAFNOSUPPORT;
+	if (uaddr->sa_family != AF_CAIF)
 		goto out;
 
-	/* I want to be alone on cf_sk (except status and queue) */
-	lock_sock(&(cf_sk->sk));
-
-	if (sockaddr_len != sizeof(struct sockaddr_caif)) {
-		pr_debug("CAIF: %s(): Bad address len (%ld,%lu)\n",
-			 __func__, (long) sockaddr_len,
-			(long unsigned) sizeof(struct sockaddr_caif));
-		ret = -EINVAL;
-		goto open_error;
+	err = -ESOCKTNOSUPPORT;
+	if (unlikely(!(sk->sk_type == SOCK_STREAM &&
+		       cf_sk->sk.sk_protocol == CAIFPROTO_AT) &&
+		       sk->sk_type != SOCK_SEQPACKET))
+		goto out;
+	switch (sock->state) {
+	case SS_UNCONNECTED:
+		/* Normal case, a fresh connect */
+		caif_assert(sk->sk_state == CAIF_DISCONNECTED);
+		break;
+	case SS_CONNECTING:
+		switch (sk->sk_state) {
+		case CAIF_CONNECTED:
+			sock->state = SS_CONNECTED;
+			err = -EISCONN;
+			goto out;
+		case CAIF_DISCONNECTED:
+			/* Reconnect allowed */
+			break;
+		case CAIF_CONNECTING:
+			err = -EALREADY;
+			if (flags & O_NONBLOCK)
+				goto out;
+			goto wait_connect;
+		}
+		break;
+	case SS_CONNECTED:
+		caif_assert(sk->sk_state == CAIF_CONNECTED ||
+				sk->sk_state == CAIF_DISCONNECTED);
+		if (sk->sk_shutdown & SHUTDOWN_MASK) {
+			/* Allow re-connect after SHUTDOWN_IND */
+			caif_disconnect_client(&cf_sk->layer);
+			break;
+		}
+		/* No reconnect on a seqpacket socket */
+		err = -EISCONN;
+		goto out;
+	case SS_DISCONNECTING:
+	case SS_FREE:
+		caif_assert(1); /*Should never happen */
+		break;
 	}
+	sk->sk_state = CAIF_DISCONNECTED;
+	sock->state = SS_UNCONNECTED;
+	sk_stream_kill_queues(&cf_sk->sk);
 
-	if (uservaddr->sa_family != AF_CAIF) {
-		pr_debug("CAIF: %s(): Bad address family (%d)\n",
-			 __func__, uservaddr->sa_family);
-		ret = -EAFNOSUPPORT;
-		goto open_error;
-	}
+	err = -EINVAL;
+	if (addr_len != sizeof(struct sockaddr_caif) ||
+		!uaddr)
+		goto out;
 
-	memcpy(&cf_sk->conn_req.sockaddr, uservaddr,
+	memcpy(&cf_sk->conn_req.sockaddr, uaddr,
 		sizeof(struct sockaddr_caif));
 
-	dbfs_atomic_inc(&cnt.num_open);
-	mode = SKT_READ_FLAG | SKT_WRITE_FLAG;
-
-	/* If socket is not open, make sure socket is in fully closed state */
-	if (!STATE_IS_OPEN(cf_sk)) {
-		/* Has link close response been received (if we ever sent it)?*/
-		if (STATE_IS_PENDING(cf_sk)) {
-			/*
-			 * Still waiting for close response from remote.
-			 * If opened non-blocking, report "would block"
-			 */
-			if (flags & O_NONBLOCK) {
-				pr_debug("CAIF: %s(): O_NONBLOCK"
-					" && close pending\n", __func__);
-				ret = -EAGAIN;
-				goto open_error;
-			}
-
-			pr_debug("CAIF: %s(): Wait for close response"
-				 " from remote...\n", __func__);
-
-			release_sock(&cf_sk->sk);
-
-			/*
-			 * Blocking mode; close is pending and we need to wait
-			 * for its conclusion.
-			 */
-			result =
-			    wait_event_interruptible(*sk_sleep(&cf_sk->sk),
-						     !STATE_IS_PENDING(cf_sk));
-
-			lock_sock(&(cf_sk->sk));
-			if (result == -ERESTARTSYS) {
-				pr_debug("CAIF: %s(): wait_event_interruptible"
-					 "woken by a signal (1)", __func__);
-				ret = -ERESTARTSYS;
-				goto open_error;
-			}
-		}
+	/* Move to connecting socket, start sending Connect Requests */
+	sock->state = SS_CONNECTING;
+	sk->sk_state = CAIF_CONNECTING;
+
+	dbfs_atomic_inc(&cnt.num_connect_req);
+	cf_sk->layer.receive = caif_sktrecv_cb;
+	err = caif_connect_client(&cf_sk->conn_req,
+				&cf_sk->layer);
+	if (err < 0) {
+		cf_sk->sk.sk_socket->state = SS_UNCONNECTED;
+		cf_sk->sk.sk_state = CAIF_DISCONNECTED;
+		goto out;
 	}
 
-	/* socket is now either closed, pending open or open */
-	if (STATE_IS_OPEN(cf_sk) && !STATE_IS_PENDING(cf_sk)) {
-		/* Open */
-		pr_debug("CAIF: %s(): Socket is already opened (cf_sk=%p)"
-			" check access f_flags = 0x%x file_mode = 0x%x\n",
-			 __func__, cf_sk, mode, cf_sk->file_mode);
-
-	} else {
-		/* We are closed or pending open.
-		 * If closed:	    send link setup
-		 * If pending open: link setup already sent (we could have been
-		 *		    interrupted by a signal last time)
-		 */
-		if (!STATE_IS_OPEN(cf_sk)) {
-			/* First opening of file; connect lower layers: */
-			/* Drain queue (very unlikely) */
-			drain_queue(cf_sk);
-
-			cf_sk->layer.receive = caif_sktrecv_cb;
-			SET_STATE_OPEN(cf_sk);
-			SET_PENDING_ON(cf_sk);
-
-			/* Register this channel. */
-			result =
-				caif_connect_client(&cf_sk->conn_req,
-							&cf_sk->layer);
-			if (result < 0) {
-				pr_debug("CAIF: %s(): can't register channel\n",
-					__func__);
-				ret = -EIO;
-				SET_STATE_CLOSED(cf_sk);
-				SET_PENDING_OFF(cf_sk);
-				goto open_error;
-			}
-			dbfs_atomic_inc(&cnt.num_init);
-		}
-
-		/* If opened non-blocking, report "success".
-		 */
-		if (flags & O_NONBLOCK) {
-			pr_debug("CAIF: %s(): O_NONBLOCK success\n",
-				 __func__);
-			ret = -EINPROGRESS;
-			cf_sk->sk.sk_err = -EINPROGRESS;
-			goto open_error;
-		}
-
-		trace_printk("CAIF: %s(): Wait for connect response\n",
-			     __func__);
+	err = -EINPROGRESS;
+wait_connect:
 
-		/* release lock before waiting */
-		release_sock(&cf_sk->sk);
-
-		result =
-		    wait_event_interruptible(*sk_sleep(&cf_sk->sk),
-					     !STATE_IS_PENDING(cf_sk));
-
-		lock_sock(&(cf_sk->sk));
-
-		if (result == -ERESTARTSYS) {
-			pr_debug("CAIF: %s(): wait_event_interruptible"
-				 "woken by a signal (2)", __func__);
-			ret = -ERESTARTSYS;
-			goto open_error;
-		}
-
-		if (!STATE_IS_OPEN(cf_sk)) {
-			/* Lower layers said "no" */
-			pr_debug("CAIF: %s(): Closed received\n", __func__);
-			ret = -EPIPE;
-			goto open_error;
-		}
+	if (sk->sk_state != CAIF_CONNECTED && (flags & O_NONBLOCK))
+		goto out;
 
-		trace_printk("CAIF: %s(): Connect received\n", __func__);
+	timeo = sock_sndtimeo(sk, flags & O_NONBLOCK);
+
+	release_sock(sk);
+	err = wait_event_interruptible_timeout(*sk_sleep(sk),
+			sk->sk_state != CAIF_CONNECTING,
+			timeo);
+	lock_sock(sk);
+	if (err < 0)
+		goto out; /* -ERESTARTSYS */
+	if (err == 0 && sk->sk_state != CAIF_CONNECTED) {
+		err = -ETIMEDOUT;
+		goto out;
 	}
-	/* Open is ok */
-	cf_sk->file_mode |= mode;
 
-	trace_printk("CAIF: %s(): Connected - file mode = %x\n",
-		  __func__, cf_sk->file_mode);
-
-	release_sock(&cf_sk->sk);
-	return 0;
-open_error:
-	sock->state	= SS_UNCONNECTED;
-	release_sock(&cf_sk->sk);
+	if (sk->sk_state != CAIF_CONNECTED) {
+		sock->state = SS_UNCONNECTED;
+		err = sock_error(sk);
+		if (!err)
+			err = -ECONNREFUSED;
+		goto out;
+	}
+	sock->state = SS_CONNECTED;
+	err = 0;
 out:
-	return ret;
+	release_sock(sk);
+	return err;
 }
 
-static int caif_shutdown(struct socket *sock, int how)
+
+/*
+ * caif_release() - Disconnect a CAIF Socket
+ * Copied and modified af_irda.c:irda_release().
+ */
+static int caif_release(struct socket *sock)
 {
-	struct caifsock *cf_sk = NULL;
-	int result = 0;
-	int tx_flow_state_was_on;
 	struct sock *sk = sock->sk;
+	struct caifsock *cf_sk = container_of(sk, struct caifsock, sk);
+	int res = 0;
 
-	trace_printk("CAIF: %s(): enter\n", __func__);
-	pr_debug("f_flags=%x\n", sock->file->f_flags);
-
-	if (how != SHUT_RDWR)
-		return -EOPNOTSUPP;
-
-	cf_sk = container_of(sk, struct caifsock, sk);
-	if (cf_sk == NULL) {
-		pr_debug("CAIF: %s(): COULD NOT FIND SOCKET\n", __func__);
-		return -EBADF;
-	}
-
-	/* I want to be alone on cf_sk (except status queue) */
-	lock_sock(&(cf_sk->sk));
-	sock_hold(&cf_sk->sk);
-
-	/* IS_CLOSED have double meaning:
-	 * 1) Spontanous Remote Shutdown Request.
-	 * 2) Ack on a channel teardown(disconnect)
-	 * Must clear bit in case we previously received
-	 * remote shudown request.
-	 */
-	if (STATE_IS_OPEN(cf_sk) && !STATE_IS_PENDING(cf_sk)) {
-		SET_STATE_CLOSED(cf_sk);
-		SET_PENDING_ON(cf_sk);
-		tx_flow_state_was_on = TX_FLOW_IS_ON(cf_sk);
-		SET_TX_FLOW_OFF(cf_sk);
-
-		/* Hold the socket until DEINIT_RSP is received */
-		sock_hold(&cf_sk->sk);
-		result = caif_disconnect_client(&cf_sk->layer);
-
-		if (result < 0) {
-			pr_debug("CAIF: %s(): "
-					"caif_disconnect_client() failed\n",
-					 __func__);
-			SET_STATE_CLOSED(cf_sk);
-			SET_PENDING_OFF(cf_sk);
-			SET_TX_FLOW_OFF(cf_sk);
-			release_sock(&cf_sk->sk);
-			sock_put(&cf_sk->sk);
-			return -EIO;
-		}
+	if (!sk)
+		return 0;
 
-	}
-	if (STATE_IS_REMOTE_SHUTDOWN(cf_sk)) {
-		SET_PENDING_OFF(cf_sk);
-		SET_REMOTE_SHUTDOWN_OFF(cf_sk);
-	}
+	set_tx_flow_off(cf_sk);
 
 	/*
-	 * Socket is no longer in state pending close,
-	 * and we can release the reference.
+	 * Ensure that packets are not queued after this point in time.
+	 * caif_queue_rcv_skb checks SOCK_DEAD holding the queue lock,
+	 * this ensures no packets when sock is dead.
 	 */
+	spin_lock(&sk->sk_receive_queue.lock);
+	sock_set_flag(sk, SOCK_DEAD);
+	spin_unlock(&sk->sk_receive_queue.lock);
+	sock->sk = NULL;
 
-	dbfs_atomic_inc(&cnt.num_close);
-	drain_queue(cf_sk);
-	SET_RX_FLOW_ON(cf_sk);
-	cf_sk->file_mode = 0;
-	sock_put(&cf_sk->sk);
-	release_sock(&cf_sk->sk);
-	if (!result && (sock->file->f_flags & O_NONBLOCK)) {
-		pr_debug("nonblocking shutdown returing -EAGAIN\n");
-		return -EAGAIN;
-	} else
-		return result;
-}
-
-static ssize_t caif_sock_no_sendpage(struct socket *sock,
-				     struct page *page,
-				     int offset, size_t size, int flags)
-{
-	return -EOPNOTSUPP;
-}
-
-/* This function is called as part of close. */
-static int caif_release(struct socket *sock)
-{
-	struct sock *sk = sock->sk;
-	struct caifsock *cf_sk = NULL;
-	int res;
-	caif_assert(sk != NULL);
-	cf_sk = container_of(sk, struct caifsock, sk);
+	dbfs_atomic_inc(&cnt.num_disconnect);
 
 	if (cf_sk->debugfs_socket_dir != NULL)
 		debugfs_remove_recursive(cf_sk->debugfs_socket_dir);
 
-	res = caif_shutdown(sock, SHUT_RDWR);
-	if (res && res != -EINPROGRESS)
-		return res;
-
-	/*
-	 * FIXME: Shutdown should probably be possible to do async
-	 * without flushing queues, allowing reception of frames while
-	 * waiting for DEINIT_IND.
-	 * Release should always block, to allow secure decoupling of
-	 * CAIF stack.
-	 */
-	if (!(sock->file->f_flags & O_NONBLOCK)) {
-		res = wait_event_interruptible(*sk_sleep(&cf_sk->sk),
-						!STATE_IS_PENDING(cf_sk));
-
-		if (res == -ERESTARTSYS) {
-			pr_debug("CAIF: %s(): wait_event_interruptible"
-				"woken by a signal (1)", __func__);
-		}
-	}
 	lock_sock(&(cf_sk->sk));
+	sk->sk_state = CAIF_DISCONNECTED;
+	sk->sk_shutdown = SHUTDOWN_MASK;
 
-	sock->sk = NULL;
+	if (cf_sk->sk.sk_socket->state == SS_CONNECTED ||
+		cf_sk->sk.sk_socket->state == SS_CONNECTING)
+		res = caif_disconnect_client(&cf_sk->layer);
 
-	/* Detach the socket from its process context by making it orphan. */
-	sock_orphan(sk);
+	cf_sk->sk.sk_socket->state = SS_DISCONNECTING;
+	wake_up_interruptible_poll(sk_sleep(sk), POLLERR|POLLHUP);
 
-	/*
-	 * Setting SHUTDOWN_MASK means that both send and receive are shutdown
-	 * for the socket.
-	 */
-	sk->sk_shutdown = SHUTDOWN_MASK;
+	sock_orphan(sk);
+	cf_sk->layer.dn = NULL;
+	sk_stream_kill_queues(&cf_sk->sk);
+	release_sock(sk);
+	sock_put(sk);
+	return res;
+}
 
-	/*
-	 * Set the socket state to closed, the TCP_CLOSE macro is used when
-	 * closing any socket.
-	 */
+/* Copied from af_unix.c:unix_poll(), added CAIF tx_flow handling */
+static unsigned int caif_poll(struct file *file,
+				struct socket *sock, poll_table *wait)
+{
+	struct sock *sk = sock->sk;
+	unsigned int mask;
+	struct caifsock *cf_sk = container_of(sk, struct caifsock, sk);
 
-	/* Flush out this sockets receive queue. */
-	drain_queue(cf_sk);
+	sock_poll_wait(file, sk_sleep(sk), wait);
+	mask = 0;
 
-	/* Finally release the socket. */
-	SET_STATE_PENDING_DESTROY(cf_sk);
+	/* exceptional events? */
+	if (sk->sk_err)
+		mask |= POLLERR;
+	if (sk->sk_shutdown == SHUTDOWN_MASK)
+		mask |= POLLHUP;
+	if (sk->sk_shutdown & RCV_SHUTDOWN)
+		mask |= POLLRDHUP;
 
-	release_sock(&cf_sk->sk);
+	/* readable? */
+	if (!skb_queue_empty(&sk->sk_receive_queue) ||
+		(sk->sk_shutdown & RCV_SHUTDOWN))
+		mask |= POLLIN | POLLRDNORM;
 
-	sock_put(sk);
+	/* Connection-based need to check for termination and startup */
+	if (sk->sk_state == CAIF_DISCONNECTED)
+		mask |= POLLHUP;
 
 	/*
-	 * The rest of the cleanup will be handled from the
-	 * caif_sock_destructor
+	 * we set writable also when the other side has shut down the
+	 * connection. This prevents stuck sockets.
 	 */
-	return res;
+	if (sock_writeable(sk) && tx_flow_is_on(cf_sk))
+		mask |= POLLOUT | POLLWRNORM | POLLWRBAND;
+
+	return mask;
 }
 
-static const struct proto_ops caif_ops = {
+static const struct proto_ops caif_seqpacket_ops = {
+	.family = PF_CAIF,
+	.owner = THIS_MODULE,
+	.release = caif_release,
+	.bind = sock_no_bind,
+	.connect = caif_connect,
+	.socketpair = sock_no_socketpair,
+	.accept = sock_no_accept,
+	.getname = sock_no_getname,
+	.poll = caif_poll,
+	.ioctl = sock_no_ioctl,
+	.listen = sock_no_listen,
+	.shutdown = sock_no_shutdown,
+	.setsockopt = setsockopt,
+	.getsockopt = sock_no_getsockopt,
+	.sendmsg = caif_seqpkt_sendmsg,
+	.recvmsg = caif_seqpkt_recvmsg,
+	.mmap = sock_no_mmap,
+	.sendpage = sock_no_sendpage,
+};
+
+static const struct proto_ops caif_stream_ops = {
 	.family = PF_CAIF,
 	.owner = THIS_MODULE,
 	.release = caif_release,
@@ -1162,73 +1066,62 @@ static const struct proto_ops caif_ops = {
 	.poll = caif_poll,
 	.ioctl = sock_no_ioctl,
 	.listen = sock_no_listen,
-	.shutdown = caif_shutdown,
+	.shutdown = sock_no_shutdown,
 	.setsockopt = setsockopt,
 	.getsockopt = sock_no_getsockopt,
-	.sendmsg = caif_sendmsg,
-	.recvmsg = caif_recvmsg,
+	.sendmsg = caif_stream_sendmsg,
+	.recvmsg = caif_stream_recvmsg,
 	.mmap = sock_no_mmap,
-	.sendpage = caif_sock_no_sendpage,
+	.sendpage = sock_no_sendpage,
 };
 
 /* This function is called when a socket is finally destroyed. */
 static void caif_sock_destructor(struct sock *sk)
 {
-	struct caifsock *cf_sk = NULL;
-	cf_sk = container_of(sk, struct caifsock, sk);
-	/* Error checks. */
+	struct caifsock *cf_sk = container_of(sk, struct caifsock, sk);
 	caif_assert(!atomic_read(&sk->sk_wmem_alloc));
 	caif_assert(sk_unhashed(sk));
 	caif_assert(!sk->sk_socket);
 	if (!sock_flag(sk, SOCK_DEAD)) {
-		pr_debug("CAIF: %s(): 0x%p", __func__, sk);
+		pr_info("Attempt to release alive CAIF socket: %p\n", sk);
 		return;
 	}
-
-	if (STATE_IS_OPEN(cf_sk)) {
-		pr_debug("CAIF: %s(): socket is opened (cf_sk=%p)"
-			 " file_mode = 0x%x\n", __func__,
-			 cf_sk, cf_sk->file_mode);
-		return;
-	}
-	drain_queue(cf_sk);
-	kfree(cf_sk->pktq);
-
-	trace_printk("CAIF: %s(): caif_sock_destructor: Removing socket %s\n",
-		__func__, cf_sk->name);
-	atomic_dec(&caif_nr_socks);
+	sk_stream_kill_queues(&cf_sk->sk);
+	dbfs_atomic_dec(&cnt.caif_nr_socks);
 }
 
 static int caif_create(struct net *net, struct socket *sock, int protocol,
-		       int kern)
+			int kern)
 {
 	struct sock *sk = NULL;
 	struct caifsock *cf_sk = NULL;
-	int result = 0;
 	static struct proto prot = {.name = "PF_CAIF",
 		.owner = THIS_MODULE,
 		.obj_size = sizeof(struct caifsock),
 	};
 
+	if (!capable(CAP_SYS_ADMIN) && !capable(CAP_NET_ADMIN))
+		return -EPERM;
 	/*
 	 * The sock->type specifies the socket type to use.
-	 * in SEQPACKET mode packet boundaries are enforced.
+	 * The CAIF socket is a packet stream in the sense
+	 * that it is packet based. CAIF trusts the reliability
+	 * of the link, no resending is implemented.
 	 */
-	if (sock->type != SOCK_SEQPACKET && sock->type != SOCK_STREAM)
+	if (sock->type == SOCK_SEQPACKET)
+		sock->ops = &caif_seqpacket_ops;
+	else if (sock->type == SOCK_STREAM)
+		sock->ops = &caif_stream_ops;
+	else
 		return -ESOCKTNOSUPPORT;
 
-	if (net != &init_net)
-		return -EAFNOSUPPORT;
-
 	if (protocol < 0 || protocol >= CAIFPROTO_MAX)
 		return -EPROTONOSUPPORT;
 	/*
-	 * Set the socket state to unconnected.	 The socket state is really
-	 * not used at all in the net/core or socket.c but the
+	 * Set the socket state to unconnected.	 The socket state
+	 * is really not used at all in the net/core or socket.c but the
 	 * initialization makes sure that sock->state is not uninitialized.
 	 */
-	sock->state = SS_UNCONNECTED;
-
 	sk = sk_alloc(net, PF_CAIF, GFP_KERNEL, &prot);
 	if (!sk)
 		return -ENOMEM;
@@ -1238,11 +1131,9 @@ static int caif_create(struct net *net, struct socket *sock, int protocol,
 	/* Store the protocol */
 	sk->sk_protocol = (unsigned char) protocol;
 
-	spin_lock_init(&cf_sk->read_queue_len_lock);
-
-	/* Fill in some information concerning the misc socket. */
-	snprintf(cf_sk->name, sizeof(cf_sk->name), "cf_sk%d",
-		atomic_read(&caif_nr_socks));
+	/* Sendbuf dictates the amount of outbound packets not yet sent */
+	sk->sk_sndbuf = CAIF_DEF_SNDBUF;
+	sk->sk_rcvbuf = CAIF_DEF_RCVBUF;
 
 	/*
 	 * Lock in order to try to stop someone from opening the socket
@@ -1252,108 +1143,85 @@ static int caif_create(struct net *net, struct socket *sock, int protocol,
 
 	/* Initialize the nozero default sock structure data. */
 	sock_init_data(sock, sk);
-	sock->ops = &caif_ops;
 	sk->sk_destruct = caif_sock_destructor;
-	sk->sk_sndbuf = caif_sockbuf_size;
-	sk->sk_rcvbuf = caif_sockbuf_size;
 
-	cf_sk->pktq = cfpktq_create();
+	mutex_init(&cf_sk->readlock); /* single task reading lock */
+	cf_sk->layer.ctrlcmd = caif_ctrl_cb;
+	cf_sk->sk.sk_socket->state = SS_UNCONNECTED;
+	cf_sk->sk.sk_state = CAIF_DISCONNECTED;
 
-	if (!cf_sk->pktq) {
-		pr_err("CAIF: %s(): queue create failed.\n", __func__);
-		result = -ENOMEM;
-		release_sock(&cf_sk->sk);
-		goto err_failed;
-	}
-	cf_sk->layer.ctrlcmd = caif_sktflowctrl_cb;
-	SET_STATE_CLOSED(cf_sk);
-	SET_PENDING_OFF(cf_sk);
-	SET_TX_FLOW_OFF(cf_sk);
-	SET_RX_FLOW_ON(cf_sk);
+	set_tx_flow_off(cf_sk);
+	set_rx_flow_on(cf_sk);
 
 	/* Set default options on configuration */
 	cf_sk->conn_req.priority = CAIF_PRIO_NORMAL;
-	cf_sk->conn_req.link_selector = CAIF_LINK_HIGH_BANDW;
+	cf_sk->conn_req.link_selector = CAIF_LINK_LOW_LATENCY;
 	cf_sk->conn_req.protocol = protocol;
 	/* Increase the number of sockets created. */
-	atomic_inc(&caif_nr_socks);
+	dbfs_atomic_inc(&cnt.caif_nr_socks);
+#ifdef CONFIG_DEBUG_FS
 	if (!IS_ERR(debugfsdir)) {
+		/* Fill in some information concerning the misc socket. */
+		snprintf(cf_sk->name, sizeof(cf_sk->name), "cfsk%d",
+				atomic_read(&cnt.caif_nr_socks));
+
 		cf_sk->debugfs_socket_dir =
 			debugfs_create_dir(cf_sk->name, debugfsdir);
-		debugfs_create_u32("conn_state", S_IRUSR | S_IWUSR,
-				cf_sk->debugfs_socket_dir, &cf_sk->conn_state);
+		debugfs_create_u32("sk_state", S_IRUSR | S_IWUSR,
+				cf_sk->debugfs_socket_dir,
+				(u32 *) &cf_sk->sk.sk_state);
 		debugfs_create_u32("flow_state", S_IRUSR | S_IWUSR,
 				cf_sk->debugfs_socket_dir, &cf_sk->flow_state);
-		debugfs_create_u32("read_queue_len", S_IRUSR | S_IWUSR,
+		debugfs_create_u32("sk_rmem_alloc", S_IRUSR | S_IWUSR,
+				cf_sk->debugfs_socket_dir,
+				(u32 *) &cf_sk->sk.sk_rmem_alloc);
+		debugfs_create_u32("sk_wmem_alloc", S_IRUSR | S_IWUSR,
 				cf_sk->debugfs_socket_dir,
-				(u32 *) &cf_sk->read_queue_len);
+				(u32 *) &cf_sk->sk.sk_wmem_alloc);
 		debugfs_create_u32("identity", S_IRUSR | S_IWUSR,
 				cf_sk->debugfs_socket_dir,
 				(u32 *) &cf_sk->layer.id);
 	}
+#endif
 	release_sock(&cf_sk->sk);
 	return 0;
-err_failed:
-	sk_free(sk);
-	return result;
 }
 
+
 static struct net_proto_family caif_family_ops = {
 	.family = PF_CAIF,
 	.create = caif_create,
 	.owner = THIS_MODULE,
 };
 
-static int af_caif_init(void)
+int af_caif_init(void)
 {
-	int err;
-	err = sock_register(&caif_family_ops);
-
+	int err = sock_register(&caif_family_ops);
 	if (!err)
 		return err;
-
 	return 0;
 }
 
 static int __init caif_sktinit_module(void)
 {
-	int stat;
 #ifdef CONFIG_DEBUG_FS
-	debugfsdir = debugfs_create_dir("chnl_skt", NULL);
+	debugfsdir = debugfs_create_dir("caif_sk", NULL);
 	if (!IS_ERR(debugfsdir)) {
-		debugfs_create_u32("skb_inuse", S_IRUSR | S_IWUSR,
-				debugfsdir,
-				(u32 *) &cnt.skb_in_use);
-		debugfs_create_u32("skb_alloc", S_IRUSR | S_IWUSR,
-				debugfsdir,
-				(u32 *) &cnt.skb_alloc);
-		debugfs_create_u32("skb_free", S_IRUSR | S_IWUSR,
-				debugfsdir,
-				(u32 *) &cnt.skb_free);
 		debugfs_create_u32("num_sockets", S_IRUSR | S_IWUSR,
 				debugfsdir,
-				(u32 *) &caif_nr_socks);
-		debugfs_create_u32("num_open", S_IRUSR | S_IWUSR,
+				(u32 *) &cnt.caif_nr_socks);
+		debugfs_create_u32("num_connect_req", S_IRUSR | S_IWUSR,
 				debugfsdir,
-				(u32 *) &cnt.num_open);
-		debugfs_create_u32("num_close", S_IRUSR | S_IWUSR,
+				(u32 *) &cnt.num_connect_req);
+		debugfs_create_u32("num_connect_resp", S_IRUSR | S_IWUSR,
 				debugfsdir,
-				(u32 *) &cnt.num_close);
-		debugfs_create_u32("num_init", S_IRUSR | S_IWUSR,
+				(u32 *) &cnt.num_connect_resp);
+		debugfs_create_u32("num_connect_fail_resp", S_IRUSR | S_IWUSR,
 				debugfsdir,
-				(u32 *) &cnt.num_init);
-		debugfs_create_u32("num_init_resp", S_IRUSR | S_IWUSR,
+				(u32 *) &cnt.num_connect_fail_resp);
+		debugfs_create_u32("num_disconnect", S_IRUSR | S_IWUSR,
 				debugfsdir,
-				(u32 *) &cnt.num_init_resp);
-		debugfs_create_u32("num_init_fail_resp", S_IRUSR | S_IWUSR,
-				debugfsdir,
-				(u32 *) &cnt.num_init_fail_resp);
-		debugfs_create_u32("num_deinit", S_IRUSR | S_IWUSR,
-				debugfsdir,
-				(u32 *) &cnt.num_deinit);
-		debugfs_create_u32("num_deinit_resp", S_IRUSR | S_IWUSR,
-				debugfsdir,
-				(u32 *) &cnt.num_deinit_resp);
+				(u32 *) &cnt.num_disconnect);
 		debugfs_create_u32("num_remote_shutdown_ind",
 				S_IRUSR | S_IWUSR, debugfsdir,
 				(u32 *) &cnt.num_remote_shutdown_ind);
@@ -1371,13 +1239,7 @@ static int __init caif_sktinit_module(void)
 				(u32 *) &cnt.num_rx_flow_on);
 	}
 #endif
-	stat = af_caif_init();
-	if (stat) {
-		pr_err("CAIF: %s(): Failed to initialize CAIF socket layer.",
-		       __func__);
-		return stat;
-	}
-	return 0;
+	return af_caif_init();
 }
 
 static void __exit caif_sktexit_module(void)
@@ -1386,6 +1248,5 @@ static void __exit caif_sktexit_module(void)
 	if (debugfsdir != NULL)
 		debugfs_remove_recursive(debugfsdir);
 }
-
 module_init(caif_sktinit_module);
 module_exit(caif_sktexit_module);
-- 
cgit v1.2.2


From 8391c4aab1aa4f47a9dab2c1ec3ebd2cbf09df1b Mon Sep 17 00:00:00 2001
From: Sjur Braendeland <sjur.brandeland@stericsson.com>
Date: Wed, 28 Apr 2010 08:54:39 +0000
Subject: caif: Bugfixes in CAIF netdevice for close and flow control

Changes:
o Bugfix: Flow control was causing the device to be destroyed.
o Bugfix: Handle CAIF channel connect failures.
o If the underlying link layer is gone the net-device is no longer removed,
  but closed.

Signed-off-by: Sjur Braendeland <sjur.brandeland@stericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/caif/chnl_net.c | 130 +++++++++++++++++++++++++++++-----------------------
 1 file changed, 73 insertions(+), 57 deletions(-)

(limited to 'net')

diff --git a/net/caif/chnl_net.c b/net/caif/chnl_net.c
index f622ff1d39ba..610966abe2dc 100644
--- a/net/caif/chnl_net.c
+++ b/net/caif/chnl_net.c
@@ -22,10 +22,10 @@
 #include <net/caif/cfpkt.h>
 #include <net/caif/caif_dev.h>
 
-#define CAIF_CONNECT_TIMEOUT 30
+/* GPRS PDP connection has MTU to 1500 */
 #define SIZE_MTU 1500
-#define SIZE_MTU_MAX 4080
-#define SIZE_MTU_MIN 68
+/* 5 sec. connect timeout */
+#define CONNECT_TIMEOUT (5 * HZ)
 #define CAIF_NET_DEFAULT_QUEUE_LEN 500
 
 #undef pr_debug
@@ -37,6 +37,13 @@ static LIST_HEAD(chnl_net_list);
 MODULE_LICENSE("GPL");
 MODULE_ALIAS_RTNL_LINK("caif");
 
+enum caif_states {
+	CAIF_CONNECTED		= 1,
+	CAIF_CONNECTING,
+	CAIF_DISCONNECTED,
+	CAIF_SHUTDOWN
+};
+
 struct chnl_net {
 	struct cflayer chnl;
 	struct net_device_stats stats;
@@ -47,7 +54,7 @@ struct chnl_net {
 	wait_queue_head_t netmgmt_wq;
 	/* Flow status to remember and control the transmission. */
 	bool flowenabled;
-	bool pending_close;
+	enum caif_states state;
 };
 
 static void robust_list_del(struct list_head *delete_node)
@@ -58,15 +65,16 @@ static void robust_list_del(struct list_head *delete_node)
 	list_for_each_safe(list_node, n, &chnl_net_list) {
 		if (list_node == delete_node) {
 			list_del(list_node);
-			break;
+			return;
 		}
 	}
+	WARN_ON(1);
 }
 
 static int chnl_recv_cb(struct cflayer *layr, struct cfpkt *pkt)
 {
 	struct sk_buff *skb;
-	struct chnl_net *priv  = NULL;
+	struct chnl_net *priv  = container_of(layr, struct chnl_net, chnl);
 	int pktlen;
 	int err = 0;
 
@@ -91,7 +99,6 @@ static int chnl_recv_cb(struct cflayer *layr, struct cfpkt *pkt)
 	else
 		skb->ip_summed = CHECKSUM_NONE;
 
-	/* FIXME: Drivers should call this in tasklet context. */
 	if (in_interrupt())
 		netif_rx(skb);
 	else
@@ -117,23 +124,25 @@ static void close_work(struct work_struct *work)
 	struct chnl_net *dev = NULL;
 	struct list_head *list_node;
 	struct list_head *_tmp;
-	rtnl_lock();
+	/* May be called with or without RTNL lock held */
+	int islocked = rtnl_is_locked();
+	if (!islocked)
+		rtnl_lock();
 	list_for_each_safe(list_node, _tmp, &chnl_net_list) {
 		dev = list_entry(list_node, struct chnl_net, list_field);
-		if (!dev->pending_close)
-			continue;
-		list_del(list_node);
-		delete_device(dev);
+		if (dev->state == CAIF_SHUTDOWN)
+			dev_close(dev->netdev);
 	}
-	rtnl_unlock();
+	if (!islocked)
+		rtnl_unlock();
 }
 static DECLARE_WORK(close_worker, close_work);
 
 static void chnl_flowctrl_cb(struct cflayer *layr, enum caif_ctrlcmd flow,
 				int phyid)
 {
-	struct chnl_net *priv;
-	pr_debug("CAIF: %s(): NET flowctrl func called flow: %s.\n",
+	struct chnl_net *priv = container_of(layr, struct chnl_net, chnl);
+	pr_debug("CAIF: %s(): NET flowctrl func called flow: %s\n",
 		__func__,
 		flow == CAIF_CTRLCMD_FLOW_ON_IND ? "ON" :
 		flow == CAIF_CTRLCMD_INIT_RSP ? "INIT" :
@@ -143,21 +152,31 @@ static void chnl_flowctrl_cb(struct cflayer *layr, enum caif_ctrlcmd flow,
 		flow == CAIF_CTRLCMD_REMOTE_SHUTDOWN_IND ?
 		 "REMOTE_SHUTDOWN" : "UKNOWN CTRL COMMAND");
 
-	priv = container_of(layr, struct chnl_net, chnl);
+
 
 	switch (flow) {
 	case CAIF_CTRLCMD_FLOW_OFF_IND:
+		priv->flowenabled = false;
+		netif_stop_queue(priv->netdev);
+		break;
 	case CAIF_CTRLCMD_DEINIT_RSP:
+		priv->state = CAIF_DISCONNECTED;
+		break;
 	case CAIF_CTRLCMD_INIT_FAIL_RSP:
+		priv->state = CAIF_DISCONNECTED;
+		wake_up_interruptible(&priv->netmgmt_wq);
+		break;
 	case CAIF_CTRLCMD_REMOTE_SHUTDOWN_IND:
-		priv->flowenabled = false;
+		priv->state = CAIF_SHUTDOWN;
 		netif_tx_disable(priv->netdev);
-		pr_warning("CAIF: %s(): done\n", __func__);
-		priv->pending_close = 1;
 		schedule_work(&close_worker);
 		break;
 	case CAIF_CTRLCMD_FLOW_ON_IND:
+		priv->flowenabled = true;
+		netif_wake_queue(priv->netdev);
+		break;
 	case CAIF_CTRLCMD_INIT_RSP:
+		priv->state = CAIF_CONNECTED;
 		priv->flowenabled = true;
 		netif_wake_queue(priv->netdev);
 		wake_up_interruptible(&priv->netmgmt_wq);
@@ -194,9 +213,6 @@ static int chnl_net_start_xmit(struct sk_buff *skb, struct net_device *dev)
 
 	pkt = cfpkt_fromnative(CAIF_DIR_OUT, (void *) skb);
 
-	pr_debug("CAIF: %s(): transmit inst %s %d,%p\n",
-		__func__, dev->name, priv->chnl.dn->id, &priv->chnl.dn);
-
 	/* Send the packet down the stack. */
 	result = priv->chnl.dn->transmit(priv->chnl.dn, pkt);
 	if (result) {
@@ -217,61 +233,59 @@ static int chnl_net_open(struct net_device *dev)
 	struct chnl_net *priv = NULL;
 	int result = -1;
 	ASSERT_RTNL();
-
 	priv = netdev_priv(dev);
-	pr_debug("CAIF: %s(): dev name: %s\n", __func__, priv->name);
-
 	if (!priv) {
 		pr_debug("CAIF: %s(): chnl_net_open: no priv\n", __func__);
 		return -ENODEV;
 	}
-	result = caif_connect_client(&priv->conn_req, &priv->chnl);
-	if (result != 0) {
-		pr_debug("CAIF: %s(): err: "
-			 "Unable to register and open device, Err:%d\n",
-			__func__,
-			result);
-		return -ENODEV;
+
+	if (priv->state != CAIF_CONNECTING) {
+		priv->state = CAIF_CONNECTING;
+		result = caif_connect_client(&priv->conn_req, &priv->chnl);
+		if (result != 0) {
+				priv->state = CAIF_DISCONNECTED;
+				pr_debug("CAIF: %s(): err: "
+					"Unable to register and open device,"
+					" Err:%d\n",
+					__func__,
+					result);
+				return result;
+		}
 	}
-	result = wait_event_interruptible(priv->netmgmt_wq, priv->flowenabled);
+
+	result = wait_event_interruptible_timeout(priv->netmgmt_wq,
+						priv->state != CAIF_CONNECTING,
+						CONNECT_TIMEOUT);
 
 	if (result == -ERESTARTSYS) {
 		pr_debug("CAIF: %s(): wait_event_interruptible"
 			 " woken by a signal\n", __func__);
 		return -ERESTARTSYS;
-	} else
-		pr_debug("CAIF: %s(): Flow on recieved\n", __func__);
+	}
+	if (result == 0) {
+		pr_debug("CAIF: %s(): connect timeout\n", __func__);
+		caif_disconnect_client(&priv->chnl);
+		priv->state = CAIF_DISCONNECTED;
+		pr_debug("CAIF: %s(): state disconnected\n", __func__);
+		return -ETIMEDOUT;
+	}
 
+	if (priv->state != CAIF_CONNECTED) {
+		pr_debug("CAIF: %s(): connect failed\n", __func__);
+		return -ECONNREFUSED;
+	}
+	pr_debug("CAIF: %s(): CAIF Netdevice connected\n", __func__);
 	return 0;
 }
 
 static int chnl_net_stop(struct net_device *dev)
 {
 	struct chnl_net *priv;
-	int result = -1;
+
 	ASSERT_RTNL();
 	priv = netdev_priv(dev);
-
-	result = caif_disconnect_client(&priv->chnl);
-	if (result != 0) {
-		pr_debug("CAIF: %s(): chnl_net_stop: err: "
-			 "Unable to STOP device, Err:%d\n",
-			 __func__, result);
-		return -EBUSY;
-	}
-	result = wait_event_interruptible(priv->netmgmt_wq,
-					  !priv->flowenabled);
-
-	if (result == -ERESTARTSYS) {
-		pr_debug("CAIF: %s(): wait_event_interruptible woken by"
-			 " signal, signal_pending(current) = %d\n",
-			 __func__,
-			 signal_pending(current));
-	} else {
-		pr_debug("CAIF: %s(): disconnect received\n", __func__);
-
-	}
-
+	priv->state = CAIF_DISCONNECTED;
+	caif_disconnect_client(&priv->chnl);
 	return 0;
 }
 
@@ -377,6 +391,8 @@ static int ipcaif_newlink(struct net *src_net, struct net_device *dev,
 	ASSERT_RTNL();
 	caifdev = netdev_priv(dev);
 	caif_netlink_parms(data, &caifdev->conn_req);
+	dev_net_set(caifdev->netdev, src_net);
+
 	ret = register_netdevice(dev);
 	if (ret)
 		pr_warning("CAIF: %s(): device rtml registration failed\n",
-- 
cgit v1.2.2


From 2c485209a5c73fe94f4074e381cdc1ee24ca94a8 Mon Sep 17 00:00:00 2001
From: Sjur Braendeland <sjur.brandeland@stericsson.com>
Date: Wed, 28 Apr 2010 08:54:40 +0000
Subject: Bugfix: Link selection was swapped in switch.

Signed-off-by: Sjur Braendeland <sjur.brandeland@stericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/caif/caif_dev.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/caif/caif_dev.c b/net/caif/caif_dev.c
index 0145bae0274f..024fd5bb2d39 100644
--- a/net/caif/caif_dev.c
+++ b/net/caif/caif_dev.c
@@ -247,10 +247,10 @@ static int caif_device_notify(struct notifier_block *me, unsigned long what,
 
 		switch (caifdev->link_select) {
 		case CAIF_LINK_HIGH_BANDW:
-			pref = CFPHYPREF_LOW_LAT;
+			pref = CFPHYPREF_HIGH_BW;
 			break;
 		case CAIF_LINK_LOW_LATENCY:
-			pref = CFPHYPREF_HIGH_BW;
+			pref = CFPHYPREF_LOW_LAT;
 			break;
 		default:
 			pref = CFPHYPREF_HIGH_BW;
-- 
cgit v1.2.2


From f0b058b61711ebf5be94d6865ca7b2c259b71d37 Mon Sep 17 00:00:00 2001
From: Stanislaw Gruszka <sgruszka@redhat.com>
Date: Wed, 28 Apr 2010 15:17:03 +0200
Subject: mac80211: do not wip out old supported rates

Use old supported rates, if AP do not provide supported rates
information element in a new managment frame.

Signed-off-by: Stanislaw Gruszka <sgruszka@redhat.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/scan.c | 21 +++++++++++----------
 1 file changed, 11 insertions(+), 10 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/scan.c b/net/mac80211/scan.c
index 2b1f1f3d6a58..a9d40584e383 100644
--- a/net/mac80211/scan.c
+++ b/net/mac80211/scan.c
@@ -84,7 +84,7 @@ ieee80211_bss_info_update(struct ieee80211_local *local,
 {
 	struct cfg80211_bss *cbss;
 	struct ieee80211_bss *bss;
-	int clen;
+	int clen, srlen;
 	s32 signal = 0;
 
 	if (local->hw.flags & IEEE80211_HW_SIGNAL_DBM)
@@ -113,23 +113,24 @@ ieee80211_bss_info_update(struct ieee80211_local *local,
 		bss->dtim_period = tim_ie->dtim_period;
 	}
 
-	bss->supp_rates_len = 0;
+	/* replace old supported rates if we get new values */
+	srlen = 0;
 	if (elems->supp_rates) {
-		clen = IEEE80211_MAX_SUPP_RATES - bss->supp_rates_len;
+		clen = IEEE80211_MAX_SUPP_RATES;
 		if (clen > elems->supp_rates_len)
 			clen = elems->supp_rates_len;
-		memcpy(&bss->supp_rates[bss->supp_rates_len], elems->supp_rates,
-		       clen);
-		bss->supp_rates_len += clen;
+		memcpy(bss->supp_rates, elems->supp_rates, clen);
+		srlen += clen;
 	}
 	if (elems->ext_supp_rates) {
-		clen = IEEE80211_MAX_SUPP_RATES - bss->supp_rates_len;
+		clen = IEEE80211_MAX_SUPP_RATES - srlen;
 		if (clen > elems->ext_supp_rates_len)
 			clen = elems->ext_supp_rates_len;
-		memcpy(&bss->supp_rates[bss->supp_rates_len],
-		       elems->ext_supp_rates, clen);
-		bss->supp_rates_len += clen;
+		memcpy(bss->supp_rates + srlen, elems->ext_supp_rates, clen);
+		srlen += clen;
 	}
+	if (srlen)
+		bss->supp_rates_len = srlen;
 
 	bss->wmm_used = elems->wmm_param || elems->wmm_info;
 	bss->uapsd_supported = is_uapsd_supported(elems);
-- 
cgit v1.2.2


From 76f273640134f3eb8257179cd5b3bc6ba5fe4a96 Mon Sep 17 00:00:00 2001
From: Stanislaw Gruszka <sgruszka@redhat.com>
Date: Wed, 28 Apr 2010 17:03:15 +0200
Subject: mac80211: fix supported rates IE if AP doesn't give us it's rates

If AP do not provide us supported rates before assiociation, send
all rates we are supporting instead of empty information element.

v1 -> v2: Add comment.

Signed-off-by: Stanislaw Gruszka <sgruszka@redhat.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/work.c | 28 +++++++++++++++++++---------
 1 file changed, 19 insertions(+), 9 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/work.c b/net/mac80211/work.c
index 7bd8670379de..4c7de72c27e7 100644
--- a/net/mac80211/work.c
+++ b/net/mac80211/work.c
@@ -212,15 +212,25 @@ static void ieee80211_send_assoc(struct ieee80211_sub_if_data *sdata,
 
 	sband = local->hw.wiphy->bands[wk->chan->band];
 
-	/*
-	 * Get all rates supported by the device and the AP as
-	 * some APs don't like getting a superset of their rates
-	 * in the association request (e.g. D-Link DAP 1353 in
-	 * b-only mode)...
-	 */
-	rates_len = ieee80211_compatible_rates(wk->assoc.supp_rates,
-					       wk->assoc.supp_rates_len,
-					       sband, &rates);
+	if (wk->assoc.supp_rates_len) {
+		/*
+		 * Get all rates supported by the device and the AP as
+		 * some APs don't like getting a superset of their rates
+		 * in the association request (e.g. D-Link DAP 1353 in
+		 * b-only mode)...
+		 */
+		rates_len = ieee80211_compatible_rates(wk->assoc.supp_rates,
+						       wk->assoc.supp_rates_len,
+						       sband, &rates);
+	} else {
+		/*
+		 * In case AP not provide any supported rates information
+		 * before association, we send information element(s) with
+		 * all rates that we support.
+		 */
+		rates = ~0;
+		rates_len = sband->n_bitrates;
+	}
 
 	skb = alloc_skb(local->hw.extra_tx_headroom +
 			sizeof(*mgmt) + /* bit too much but doesn't matter */
-- 
cgit v1.2.2


From 8fc214ba958648ab111a173f2db7b0e1dfed5b11 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes@sipsolutions.net>
Date: Wed, 28 Apr 2010 17:40:43 +0200
Subject: mac80211: notify driver about IBSS status

Some drivers (e.g. iwlwifi) need to know and try
to figure it out based on other things, but making
it explicit is definitely better.

Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/ibss.c | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/mac80211/ibss.c b/net/mac80211/ibss.c
index 01974c2510a8..a87e309e3b99 100644
--- a/net/mac80211/ibss.c
+++ b/net/mac80211/ibss.c
@@ -170,6 +170,8 @@ static void __ieee80211_sta_join_ibss(struct ieee80211_sub_if_data *sdata,
 	bss_change |= BSS_CHANGED_BSSID;
 	bss_change |= BSS_CHANGED_BEACON;
 	bss_change |= BSS_CHANGED_BEACON_ENABLED;
+	bss_change |= BSS_CHANGED_IBSS;
+	sdata->vif.bss_conf.ibss_joined = true;
 	ieee80211_bss_info_change_notify(sdata, bss_change);
 
 	ieee80211_sta_def_wmm_params(sdata, sband->n_bitrates, supp_rates);
@@ -950,7 +952,9 @@ int ieee80211_ibss_leave(struct ieee80211_sub_if_data *sdata)
 	kfree(sdata->u.ibss.ie);
 	skb = sdata->u.ibss.presp;
 	rcu_assign_pointer(sdata->u.ibss.presp, NULL);
-	ieee80211_bss_info_change_notify(sdata, BSS_CHANGED_BEACON_ENABLED);
+	sdata->vif.bss_conf.ibss_joined = false;
+	ieee80211_bss_info_change_notify(sdata, BSS_CHANGED_BEACON_ENABLED |
+						BSS_CHANGED_IBSS);
 	synchronize_rcu();
 	kfree_skb(skb);
 
-- 
cgit v1.2.2


From 5fa782c2f5ef6c2e4f04d3e228412c9b4a4c8809 Mon Sep 17 00:00:00 2001
From: Neil Horman <nhorman@tuxdriver.com>
Date: Wed, 28 Apr 2010 10:30:59 +0000
Subject: sctp: Fix skb_over_panic resulting from multiple invalid parameter
 errors (CVE-2010-1173) (v4)

Ok, version 4

Change Notes:
1) Minor cleanups, from Vlads notes

Summary:

Hey-
	Recently, it was reported to me that the kernel could oops in the
following way:

<5> kernel BUG at net/core/skbuff.c:91!
<5> invalid operand: 0000 [#1]
<5> Modules linked in: sctp netconsole nls_utf8 autofs4 sunrpc iptable_filter
ip_tables cpufreq_powersave parport_pc lp parport vmblock(U) vsock(U) vmci(U)
vmxnet(U) vmmemctl(U) vmhgfs(U) acpiphp dm_mirror dm_mod button battery ac md5
ipv6 uhci_hcd ehci_hcd snd_ens1371 snd_rawmidi snd_seq_device snd_pcm_oss
snd_mixer_oss snd_pcm snd_timer snd_page_alloc snd_ac97_codec snd soundcore
pcnet32 mii floppy ext3 jbd ata_piix libata mptscsih mptsas mptspi mptscsi
mptbase sd_mod scsi_mod
<5> CPU:    0
<5> EIP:    0060:[<c02bff27>]    Not tainted VLI
<5> EFLAGS: 00010216   (2.6.9-89.0.25.EL)
<5> EIP is at skb_over_panic+0x1f/0x2d
<5> eax: 0000002c   ebx: c033f461   ecx: c0357d96   edx: c040fd44
<5> esi: c033f461   edi: df653280   ebp: 00000000   esp: c040fd40
<5> ds: 007b   es: 007b   ss: 0068
<5> Process swapper (pid: 0, threadinfo=c040f000 task=c0370be0)
<5> Stack: c0357d96 e0c29478 00000084 00000004 c033f461 df653280 d7883180
e0c2947d
<5>        00000000 00000080 df653490 00000004 de4f1ac0 de4f1ac0 00000004
df653490
<5>        00000001 e0c2877a 08000800 de4f1ac0 df653490 00000000 e0c29d2e
00000004
<5> Call Trace:
<5>  [<e0c29478>] sctp_addto_chunk+0xb0/0x128 [sctp]
<5>  [<e0c2947d>] sctp_addto_chunk+0xb5/0x128 [sctp]
<5>  [<e0c2877a>] sctp_init_cause+0x3f/0x47 [sctp]
<5>  [<e0c29d2e>] sctp_process_unk_param+0xac/0xb8 [sctp]
<5>  [<e0c29e90>] sctp_verify_init+0xcc/0x134 [sctp]
<5>  [<e0c20322>] sctp_sf_do_5_1B_init+0x83/0x28e [sctp]
<5>  [<e0c25333>] sctp_do_sm+0x41/0x77 [sctp]
<5>  [<c01555a4>] cache_grow+0x140/0x233
<5>  [<e0c26ba1>] sctp_endpoint_bh_rcv+0xc5/0x108 [sctp]
<5>  [<e0c2b863>] sctp_inq_push+0xe/0x10 [sctp]
<5>  [<e0c34600>] sctp_rcv+0x454/0x509 [sctp]
<5>  [<e084e017>] ipt_hook+0x17/0x1c [iptable_filter]
<5>  [<c02d005e>] nf_iterate+0x40/0x81
<5>  [<c02e0bb9>] ip_local_deliver_finish+0x0/0x151
<5>  [<c02e0c7f>] ip_local_deliver_finish+0xc6/0x151
<5>  [<c02d0362>] nf_hook_slow+0x83/0xb5
<5>  [<c02e0bb2>] ip_local_deliver+0x1a2/0x1a9
<5>  [<c02e0bb9>] ip_local_deliver_finish+0x0/0x151
<5>  [<c02e103e>] ip_rcv+0x334/0x3b4
<5>  [<c02c66fd>] netif_receive_skb+0x320/0x35b
<5>  [<e0a0928b>] init_stall_timer+0x67/0x6a [uhci_hcd]
<5>  [<c02c67a4>] process_backlog+0x6c/0xd9
<5>  [<c02c690f>] net_rx_action+0xfe/0x1f8
<5>  [<c012a7b1>] __do_softirq+0x35/0x79
<5>  [<c0107efb>] handle_IRQ_event+0x0/0x4f
<5>  [<c01094de>] do_softirq+0x46/0x4d

Its an skb_over_panic BUG halt that results from processing an init chunk in
which too many of its variable length parameters are in some way malformed.

The problem is in sctp_process_unk_param:
if (NULL == *errp)
	*errp = sctp_make_op_error_space(asoc, chunk,
					 ntohs(chunk->chunk_hdr->length));

	if (*errp) {
		sctp_init_cause(*errp, SCTP_ERROR_UNKNOWN_PARAM,
				 WORD_ROUND(ntohs(param.p->length)));
		sctp_addto_chunk(*errp,
			WORD_ROUND(ntohs(param.p->length)),
				  param.v);

When we allocate an error chunk, we assume that the worst case scenario requires
that we have chunk_hdr->length data allocated, which would be correct nominally,
given that we call sctp_addto_chunk for the violating parameter.  Unfortunately,
we also, in sctp_init_cause insert a sctp_errhdr_t structure into the error
chunk, so the worst case situation in which all parameters are in violation
requires chunk_hdr->length+(sizeof(sctp_errhdr_t)*param_count) bytes of data.

The result of this error is that a deliberately malformed packet sent to a
listening host can cause a remote DOS, described in CVE-2010-1173:
http://cve.mitre.org/cgi-bin/cvename.cgi?name=2010-1173

I've tested the below fix and confirmed that it fixes the issue.  We move to a
strategy whereby we allocate a fixed size error chunk and ignore errors we don't
have space to report.  Tested by me successfully

Signed-off-by: Neil Horman <nhorman@tuxdriver.com>
Acked-by: Vlad Yasevich <vladislav.yasevich@hp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sctp/sm_make_chunk.c | 62 ++++++++++++++++++++++++++++++++++++++++++++----
 1 file changed, 57 insertions(+), 5 deletions(-)

(limited to 'net')

diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c
index 0fd5b4c88358..30c1767186b8 100644
--- a/net/sctp/sm_make_chunk.c
+++ b/net/sctp/sm_make_chunk.c
@@ -108,7 +108,7 @@ static const struct sctp_paramhdr prsctp_param = {
 	cpu_to_be16(sizeof(struct sctp_paramhdr)),
 };
 
-/* A helper to initialize to initialize an op error inside a
+/* A helper to initialize an op error inside a
  * provided chunk, as most cause codes will be embedded inside an
  * abort chunk.
  */
@@ -125,6 +125,29 @@ void  sctp_init_cause(struct sctp_chunk *chunk, __be16 cause_code,
 	chunk->subh.err_hdr = sctp_addto_chunk(chunk, sizeof(sctp_errhdr_t), &err);
 }
 
+/* A helper to initialize an op error inside a
+ * provided chunk, as most cause codes will be embedded inside an
+ * abort chunk.  Differs from sctp_init_cause in that it won't oops
+ * if there isn't enough space in the op error chunk
+ */
+int sctp_init_cause_fixed(struct sctp_chunk *chunk, __be16 cause_code,
+		      size_t paylen)
+{
+	sctp_errhdr_t err;
+	__u16 len;
+
+	/* Cause code constants are now defined in network order.  */
+	err.cause = cause_code;
+	len = sizeof(sctp_errhdr_t) + paylen;
+	err.length  = htons(len);
+
+	if (skb_tailroom(chunk->skb) >  len)
+		return -ENOSPC;
+	chunk->subh.err_hdr = sctp_addto_chunk_fixed(chunk,
+						     sizeof(sctp_errhdr_t),
+						     &err);
+	return 0;
+}
 /* 3.3.2 Initiation (INIT) (1)
  *
  * This chunk is used to initiate a SCTP association between two
@@ -1132,6 +1155,24 @@ nodata:
 	return retval;
 }
 
+/* Create an Operation Error chunk of a fixed size,
+ * specifically, max(asoc->pathmtu, SCTP_DEFAULT_MAXSEGMENT)
+ * This is a helper function to allocate an error chunk for
+ * for those invalid parameter codes in which we may not want
+ * to report all the errors, if the incomming chunk is large
+ */
+static inline struct sctp_chunk *sctp_make_op_error_fixed(
+	const struct sctp_association *asoc,
+	const struct sctp_chunk *chunk)
+{
+	size_t size = asoc ? asoc->pathmtu : 0;
+
+	if (!size)
+		size = SCTP_DEFAULT_MAXSEGMENT;
+
+	return sctp_make_op_error_space(asoc, chunk, size);
+}
+
 /* Create an Operation Error chunk.  */
 struct sctp_chunk *sctp_make_op_error(const struct sctp_association *asoc,
 				 const struct sctp_chunk *chunk,
@@ -1374,6 +1415,18 @@ void *sctp_addto_chunk(struct sctp_chunk *chunk, int len, const void *data)
 	return target;
 }
 
+/* Append bytes to the end of a chunk. Returns NULL if there isn't sufficient
+ * space in the chunk
+ */
+void *sctp_addto_chunk_fixed(struct sctp_chunk *chunk,
+			     int len, const void *data)
+{
+	if (skb_tailroom(chunk->skb) > len)
+		return sctp_addto_chunk(chunk, len, data);
+	else
+		return NULL;
+}
+
 /* Append bytes from user space to the end of a chunk.  Will panic if
  * chunk is not big enough.
  * Returns a kernel err value.
@@ -1977,13 +2030,12 @@ static sctp_ierror_t sctp_process_unk_param(const struct sctp_association *asoc,
 		 * returning multiple unknown parameters.
 		 */
 		if (NULL == *errp)
-			*errp = sctp_make_op_error_space(asoc, chunk,
-					ntohs(chunk->chunk_hdr->length));
+			*errp = sctp_make_op_error_fixed(asoc, chunk);
 
 		if (*errp) {
-			sctp_init_cause(*errp, SCTP_ERROR_UNKNOWN_PARAM,
+			sctp_init_cause_fixed(*errp, SCTP_ERROR_UNKNOWN_PARAM,
 					WORD_ROUND(ntohs(param.p->length)));
-			sctp_addto_chunk(*errp,
+			sctp_addto_chunk_fixed(*errp,
 					WORD_ROUND(ntohs(param.p->length)),
 					param.v);
 		} else {
-- 
cgit v1.2.2


From 4b0b72f7dd617b13abd1b04c947e15873e011a24 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Wed, 28 Apr 2010 14:35:48 -0700
Subject: net: speedup udp receive path

Since commit 95766fff ([UDP]: Add memory accounting.),
each received packet needs one extra sock_lock()/sock_release() pair.

This added latency because of possible backlog handling. Then later,
ticket spinlocks added yet another latency source in case of DDOS.

This patch introduces lock_sock_bh() and unlock_sock_bh()
synchronization primitives, avoiding one atomic operation and backlog
processing.

skb_free_datagram_locked() uses them instead of full blown
lock_sock()/release_sock(). skb is orphaned inside locked section for
proper socket memory reclaim, and finally freed outside of it.

UDP receive path now take the socket spinlock only once.

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/datagram.c | 10 +++++++---
 net/ipv4/udp.c      | 12 ++++++------
 net/ipv6/udp.c      |  4 ++--
 3 files changed, 15 insertions(+), 11 deletions(-)

(limited to 'net')

diff --git a/net/core/datagram.c b/net/core/datagram.c
index 5574a5ddf908..95b851f3d713 100644
--- a/net/core/datagram.c
+++ b/net/core/datagram.c
@@ -229,9 +229,13 @@ EXPORT_SYMBOL(skb_free_datagram);
 
 void skb_free_datagram_locked(struct sock *sk, struct sk_buff *skb)
 {
-	lock_sock(sk);
-	skb_free_datagram(sk, skb);
-	release_sock(sk);
+	lock_sock_bh(sk);
+	skb_orphan(skb);
+	sk_mem_reclaim_partial(sk);
+	unlock_sock_bh(sk);
+
+	/* skb is now orphaned, might be freed outside of locked section */
+	consume_skb(skb);
 }
 EXPORT_SYMBOL(skb_free_datagram_locked);
 
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 63eb56b2d873..1f86965ba7d7 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -1062,10 +1062,10 @@ static unsigned int first_packet_length(struct sock *sk)
 	spin_unlock_bh(&rcvq->lock);
 
 	if (!skb_queue_empty(&list_kill)) {
-		lock_sock(sk);
+		lock_sock_bh(sk);
 		__skb_queue_purge(&list_kill);
 		sk_mem_reclaim_partial(sk);
-		release_sock(sk);
+		unlock_sock_bh(sk);
 	}
 	return res;
 }
@@ -1196,10 +1196,10 @@ out:
 	return err;
 
 csum_copy_err:
-	lock_sock(sk);
+	lock_sock_bh(sk);
 	if (!skb_kill_datagram(sk, skb, flags))
 		UDP_INC_STATS_USER(sock_net(sk), UDP_MIB_INERRORS, is_udplite);
-	release_sock(sk);
+	unlock_sock_bh(sk);
 
 	if (noblock)
 		return -EAGAIN;
@@ -1624,9 +1624,9 @@ int udp_rcv(struct sk_buff *skb)
 
 void udp_destroy_sock(struct sock *sk)
 {
-	lock_sock(sk);
+	lock_sock_bh(sk);
 	udp_flush_pending_frames(sk);
-	release_sock(sk);
+	unlock_sock_bh(sk);
 }
 
 /*
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index 3ead20ad9d07..91c60f0090a4 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -424,7 +424,7 @@ out:
 	return err;
 
 csum_copy_err:
-	lock_sock(sk);
+	lock_sock_bh(sk);
 	if (!skb_kill_datagram(sk, skb, flags)) {
 		if (is_udp4)
 			UDP_INC_STATS_USER(sock_net(sk),
@@ -433,7 +433,7 @@ csum_copy_err:
 			UDP6_INC_STATS_USER(sock_net(sk),
 					UDP_MIB_INERRORS, is_udplite);
 	}
-	release_sock(sk);
+	unlock_sock_bh(sk);
 
 	if (flags & MSG_DONTWAIT)
 		return -EAGAIN;
-- 
cgit v1.2.2


From f84af32cbca70a3c6d30463dc08c7984af11c277 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Wed, 28 Apr 2010 15:31:51 -0700
Subject: net: ip_queue_rcv_skb() helper

When queueing a skb to socket, we can immediately release its dst if
target socket do not use IP_CMSG_PKTINFO.

tcp_data_queue() can drop dst too.

This to benefit from a hot cache line and avoid the receiver, possibly
on another cpu, to dirty this cache line himself.

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/ip_sockglue.c | 16 ++++++++++++++++
 net/ipv4/raw.c         |  2 +-
 net/ipv4/tcp_input.c   |  1 +
 net/ipv4/udp.c         |  2 +-
 net/ipv6/raw.c         |  2 +-
 net/ipv6/udp.c         |  2 +-
 6 files changed, 21 insertions(+), 4 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c
index b0aa0546a3b3..ce231780a2b1 100644
--- a/net/ipv4/ip_sockglue.c
+++ b/net/ipv4/ip_sockglue.c
@@ -954,6 +954,22 @@ e_inval:
 	return -EINVAL;
 }
 
+/**
+ * ip_queue_rcv_skb - Queue an skb into sock receive queue
+ * @sk: socket
+ * @skb: buffer
+ *
+ * Queues an skb into socket receive queue. If IP_CMSG_PKTINFO option
+ * is not set, we drop skb dst entry now, while dst cache line is hot.
+ */
+int ip_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
+{
+	if (!(inet_sk(sk)->cmsg_flags & IP_CMSG_PKTINFO))
+		skb_dst_drop(skb);
+	return sock_queue_rcv_skb(sk, skb);
+}
+EXPORT_SYMBOL(ip_queue_rcv_skb);
+
 int ip_setsockopt(struct sock *sk, int level,
 		int optname, char __user *optval, unsigned int optlen)
 {
diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c
index cc6f097fbd5f..52ef5af78a45 100644
--- a/net/ipv4/raw.c
+++ b/net/ipv4/raw.c
@@ -290,7 +290,7 @@ static int raw_rcv_skb(struct sock * sk, struct sk_buff * skb)
 {
 	/* Charge it to the socket. */
 
-	if (sock_queue_rcv_skb(sk, skb) < 0) {
+	if (ip_queue_rcv_skb(sk, skb) < 0) {
 		kfree_skb(skb);
 		return NET_RX_DROP;
 	}
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index ae3ec15fb630..e82162c211bf 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -4367,6 +4367,7 @@ static void tcp_data_queue(struct sock *sk, struct sk_buff *skb)
 	if (TCP_SKB_CB(skb)->seq == TCP_SKB_CB(skb)->end_seq)
 		goto drop;
 
+	skb_dst_drop(skb);
 	__skb_pull(skb, th->doff * 4);
 
 	TCP_ECN_accept_cwr(tp, skb);
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 1f86965ba7d7..4560b291180b 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -1264,7 +1264,7 @@ static int __udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
 	if (inet_sk(sk)->inet_daddr)
 		sock_rps_save_rxhash(sk, skb->rxhash);
 
-	rc = sock_queue_rcv_skb(sk, skb);
+	rc = ip_queue_rcv_skb(sk, skb);
 	if (rc < 0) {
 		int is_udplite = IS_UDPLITE(sk);
 
diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c
index 85627386cb02..0e3d2dd92078 100644
--- a/net/ipv6/raw.c
+++ b/net/ipv6/raw.c
@@ -381,7 +381,7 @@ static inline int rawv6_rcv_skb(struct sock * sk, struct sk_buff * skb)
 	}
 
 	/* Charge it to the socket. */
-	if (sock_queue_rcv_skb(sk, skb) < 0) {
+	if (ip_queue_rcv_skb(sk, skb) < 0) {
 		kfree_skb(skb);
 		return NET_RX_DROP;
 	}
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index 91c60f0090a4..79359c8380bc 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -514,7 +514,7 @@ int udpv6_queue_rcv_skb(struct sock * sk, struct sk_buff *skb)
 			goto drop;
 	}
 
-	if ((rc = sock_queue_rcv_skb(sk, skb)) < 0) {
+	if ((rc = ip_queue_rcv_skb(sk, skb)) < 0) {
 		/* Note that an ENOMEM error is charged twice */
 		if (rc == -ENOMEM)
 			UDP6_INC_STATS_BH(sock_net(sk),
-- 
cgit v1.2.2


From 49b5c7f473f1bbcb30275dcaee2c06dfb8ec2279 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes@sipsolutions.net>
Date: Thu, 29 Apr 2010 21:34:01 +0200
Subject: mac80211: tell driver about IBSS merge

My previous patch "mac80211: notify driver about
IBSS status" left a problem -- when we merge with
a new BSSID, we never tell the driver that we left
the old one. Fix that.

Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/ibss.c | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'net')

diff --git a/net/mac80211/ibss.c b/net/mac80211/ibss.c
index a87e309e3b99..c585fced8584 100644
--- a/net/mac80211/ibss.c
+++ b/net/mac80211/ibss.c
@@ -91,6 +91,12 @@ static void __ieee80211_sta_join_ibss(struct ieee80211_sub_if_data *sdata,
 	if (memcmp(ifibss->bssid, bssid, ETH_ALEN))
 		sta_info_flush(sdata->local, sdata);
 
+	/* if merging, indicate to driver that we leave the old IBSS */
+	if (sdata->vif.bss_conf.ibss_joined) {
+		sdata->vif.bss_conf.ibss_joined = false;
+		ieee80211_bss_info_change_notify(sdata, BSS_CHANGED_IBSS);
+	}
+
 	memcpy(ifibss->bssid, bssid, ETH_ALEN);
 
 	sdata->drop_unencrypted = capability & WLAN_CAPABILITY_PRIVACY ? 1 : 0;
-- 
cgit v1.2.2


From f5c044e53a6b319776c7140b22fee9be3bc1f758 Mon Sep 17 00:00:00 2001
From: "John W. Linville" <linville@tuxdriver.com>
Date: Fri, 30 Apr 2010 15:37:00 -0400
Subject: mac80211: remove deprecated noise field from ieee80211_rx_status

Also remove associated IEEE80211_HW_NOISE_DBM from ieee80211_hw_flags.

Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/rx.c | 2 --
 1 file changed, 2 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c
index 8ee7db193269..e4f325f68fd3 100644
--- a/net/mac80211/rx.c
+++ b/net/mac80211/rx.c
@@ -80,8 +80,6 @@ ieee80211_rx_radiotap_len(struct ieee80211_local *local,
 		len += 8;
 	if (local->hw.flags & IEEE80211_HW_SIGNAL_DBM)
 		len += 1;
-	if (local->hw.flags & IEEE80211_HW_NOISE_DBM)
-		len += 1;
 
 	if (len & 1) /* padding for RX_FLAGS if necessary */
 		len++;
-- 
cgit v1.2.2


From 767dd03369ac18af58efdef0383d6eb986eab426 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Wed, 28 Apr 2010 19:14:43 +0000
Subject: net: speedup sock_recv_ts_and_drops()

sock_recv_ts_and_drops() is fat and slow (~ 4% of cpu time on some
profiles)

We can test all socket flags at once to make fast path fast again.

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/socket.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/socket.c b/net/socket.c
index 9822081eab38..cb7c1f6c0d6e 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -655,13 +655,13 @@ inline void sock_recv_drops(struct msghdr *msg, struct sock *sk, struct sk_buff
 			sizeof(__u32), &skb->dropcount);
 }
 
-void sock_recv_ts_and_drops(struct msghdr *msg, struct sock *sk,
+void __sock_recv_ts_and_drops(struct msghdr *msg, struct sock *sk,
 	struct sk_buff *skb)
 {
 	sock_recv_timestamp(msg, sk, skb);
 	sock_recv_drops(msg, sk, skb);
 }
-EXPORT_SYMBOL_GPL(sock_recv_ts_and_drops);
+EXPORT_SYMBOL_GPL(__sock_recv_ts_and_drops);
 
 static inline int __sock_recvmsg_nosec(struct kiocb *iocb, struct socket *sock,
 				       struct msghdr *msg, size_t size, int flags)
-- 
cgit v1.2.2


From 4b021628beb26238087812829cc080da47e4b236 Mon Sep 17 00:00:00 2001
From: Changli Gao <xiaosuo@gmail.com>
Date: Tue, 27 Apr 2010 21:20:22 +0000
Subject: xfrm: potential uninitialized variable num_xfrms

potential uninitialized variable num_xfrms

fix compiler warning: 'num_xfrms' may be used uninitialized in this function.

Signed-off-by: Changli Gao <xiaosuo@gmail.com>
----
 net/xfrm/xfrm_policy.c |    2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/xfrm/xfrm_policy.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index 7430ac26ec49..31f4ba43b48f 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -1732,7 +1732,7 @@ int __xfrm_lookup(struct net *net, struct dst_entry **dst_p, struct flowi *fl,
 	struct dst_entry *dst, *dst_orig = *dst_p, *route;
 	u16 family = dst_orig->ops->family;
 	u8 dir = policy_to_flow_dir(XFRM_POLICY_OUT);
-	int i, err, num_pols, num_xfrms, drop_pols = 0;
+	int i, err, num_pols, num_xfrms = 0, drop_pols = 0;
 
 restart:
 	dst = NULL;
-- 
cgit v1.2.2


From 83d7eb2979cd3390c375470225dd2d8f2009bc70 Mon Sep 17 00:00:00 2001
From: Dan Carpenter <error27@gmail.com>
Date: Fri, 30 Apr 2010 16:42:08 -0700
Subject: ipv6: cleanup: remove unneeded null check

We dereference "sk" unconditionally elsewhere in the function.

This was left over from:  b30bd282 "ip6_xmit: remove unnecessary NULL
ptr check".  According to that commit message, "the sk argument to
ip6_xmit is never NULL nowadays since the skb->priority assigment
expects a valid socket."

Signed-off-by: Dan Carpenter <error27@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/ip6_output.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index 7db09c3f5289..e7a5f17d5e95 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -218,8 +218,7 @@ int ip6_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl,
 			}
 			kfree_skb(skb);
 			skb = skb2;
-			if (sk)
-				skb_set_owner_w(skb, sk);
+			skb_set_owner_w(skb, sk);
 		}
 		if (opt->opt_flen)
 			ipv6_push_frag_opts(skb, opt, &proto);
-- 
cgit v1.2.2


From a5f4cea74f1397bb29d0bbdabeb05bd05a23a741 Mon Sep 17 00:00:00 2001
From: Vlad Yasevich <vladislav.yasevich@hp.com>
Date: Fri, 30 Apr 2010 21:42:42 -0400
Subject: sctp: Use correct address family in sctp_getsockopt_peer_addrs()

The function should use the address family of the address when
trying to determine the length of the structure.

Signed-off-by: Vlad Yasevich <vladislav.yasevich@hp.com>
---
 net/sctp/socket.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index 13d8229f3a9c..1282a0ed855e 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -4384,7 +4384,7 @@ static int sctp_getsockopt_peer_addrs(struct sock *sk, int len,
 				transports) {
 		memcpy(&temp, &from->ipaddr, sizeof(temp));
 		sctp_get_pf_specific(sk->sk_family)->addr_v4map(sp, &temp);
-		addrlen = sctp_get_af_specific(sk->sk_family)->sockaddr_len;
+		addrlen = sctp_get_af_specific(temp.sa.sa_family)->sockaddr_len;
 		if (space_left < addrlen)
 			return -ENOMEM;
 		if (copy_to_user(to, &temp, addrlen))
-- 
cgit v1.2.2


From c17b02b38aa99ef806c7066ef19a6f51122304f1 Mon Sep 17 00:00:00 2001
From: Vlad Yasevich <vladislav.yasevich@hp.com>
Date: Fri, 30 Apr 2010 21:42:43 -0400
Subject: sctp: send SHUTDOWN-ACK chunk back to the source.

SHUTDOWN-ACK is alaways sent to the primary path at the first time,
but should better transmit SHUTDOWN-ACK chunk to the same destination
transport address from which it received the SHUTDOWN chunk.
Based on the work from Wei Yongjun <yjwei@cn.fujitsu.com>.

Signed-off-by: Vlad Yasevich <vladislav.yasevich@hp.com>
---
 net/sctp/sm_sideeffect.c | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/sctp/sm_sideeffect.c b/net/sctp/sm_sideeffect.c
index 4c5bed9af4e3..49fb9acece63 100644
--- a/net/sctp/sm_sideeffect.c
+++ b/net/sctp/sm_sideeffect.c
@@ -697,11 +697,15 @@ static void sctp_cmd_setup_t2(sctp_cmd_seq_t *cmds,
 {
 	struct sctp_transport *t;
 
-	t = sctp_assoc_choose_alter_transport(asoc,
+	if (chunk->transport)
+		t = chunk->transport;
+	else {
+		t = sctp_assoc_choose_alter_transport(asoc,
 					      asoc->shutdown_last_sent_to);
+		chunk->transport = t;
+	}
 	asoc->shutdown_last_sent_to = t;
 	asoc->timeouts[SCTP_EVENT_TIMEOUT_T2_SHUTDOWN] = t->rto;
-	chunk->transport = t;
 }
 
 /* Helper function to change the state of an association. */
-- 
cgit v1.2.2


From bd69b981a354be40cc709f3046f0c56f00da6163 Mon Sep 17 00:00:00 2001
From: Wei Yongjun <yjwei@cn.fujitsu.com>
Date: Fri, 30 Apr 2010 21:42:43 -0400
Subject: sctp: assure at least one T3-rtx timer is running if a FORWARD TSN is
 sent

PR-SCTP extension section 3.5 Sender Side Implementation of PR-SCTP:
  C5) If a FORWARD TSN is sent, the sender MUST assure that at
      least one T3-rtx timer is running.

So this patch fix to assure at least one T3-rtx timer is running
if a FORWARD TSN is or will to sent.

Signed-off-by: Wei Yongjun <yjwei@cn.fujitsu.com>
Signed-off-by: Vlad Yasevich <vladislav.yasevich@hp.com>
---
 net/sctp/outqueue.c | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'net')

diff --git a/net/sctp/outqueue.c b/net/sctp/outqueue.c
index abfc0b8dee74..16d451a62b3f 100644
--- a/net/sctp/outqueue.c
+++ b/net/sctp/outqueue.c
@@ -854,6 +854,12 @@ static int sctp_outq_flush(struct sctp_outq *q, int rtx_timeout)
 			if (status  != SCTP_XMIT_OK) {
 				/* put the chunk back */
 				list_add(&chunk->list, &q->control_chunk_list);
+			} else if (chunk->chunk_hdr->type == SCTP_CID_FWD_TSN) {
+				/* PR-SCTP C5) If a FORWARD TSN is sent, the
+				 * sender MUST assure that at least one T3-rtx
+				 * timer is running.
+				 */
+				sctp_transport_reset_timers(transport, 0);
 			}
 			break;
 
-- 
cgit v1.2.2


From 6429d3dc4bd6251b01c11b851e23a4d60f079e06 Mon Sep 17 00:00:00 2001
From: Wei Yongjun <yjwei@cn.fujitsu.com>
Date: Fri, 30 Apr 2010 21:42:44 -0400
Subject: sctp: missing set src and dest port while lookup output route

While lookup the output route, we do not set the src and dest
port. This will cause we got a wrong route if we had set the
outbund transport to IPsec with src or dst port.

Signed-off-by: Wei Yongjun <yjwei@cn.fujitsu.com>
Signed-off-by: Vlad Yasevich <vladislav.yasevich@hp.com>
---
 net/sctp/protocol.c | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c
index 704298f4b284..182749867c72 100644
--- a/net/sctp/protocol.c
+++ b/net/sctp/protocol.c
@@ -474,13 +474,17 @@ static struct dst_entry *sctp_v4_get_dst(struct sctp_association *asoc,
 
 	memset(&fl, 0x0, sizeof(struct flowi));
 	fl.fl4_dst  = daddr->v4.sin_addr.s_addr;
+	fl.fl_ip_dport = daddr->v4.sin_port;
 	fl.proto = IPPROTO_SCTP;
 	if (asoc) {
 		fl.fl4_tos = RT_CONN_FLAGS(asoc->base.sk);
 		fl.oif = asoc->base.sk->sk_bound_dev_if;
+		fl.fl_ip_sport = htons(asoc->base.bind_addr.port);
 	}
-	if (saddr)
+	if (saddr) {
 		fl.fl4_src = saddr->v4.sin_addr.s_addr;
+		fl.fl_ip_sport = saddr->v4.sin_port;
+	}
 
 	SCTP_DEBUG_PRINTK("%s: DST:%pI4, SRC:%pI4 - ",
 			  __func__, &fl.fl4_dst, &fl.fl4_src);
@@ -528,6 +532,7 @@ static struct dst_entry *sctp_v4_get_dst(struct sctp_association *asoc,
 		if ((laddr->state == SCTP_ADDR_SRC) &&
 		    (AF_INET == laddr->a.sa.sa_family)) {
 			fl.fl4_src = laddr->a.v4.sin_addr.s_addr;
+			fl.fl_ip_sport = laddr->a.v4.sin_port;
 			if (!ip_route_output_key(&init_net, &rt, &fl)) {
 				dst = &rt->u.dst;
 				goto out_unlock;
-- 
cgit v1.2.2


From bc4f841a05364b2572bcc266e9fd7e9cf5f06d5b Mon Sep 17 00:00:00 2001
From: Wei Yongjun <yjwei@cn.fujitsu.com>
Date: Fri, 30 Apr 2010 22:38:53 -0400
Subject: sctp: fix to retranmit at least one DATA chunk

While doing retranmit, if control chunk exists, such as
FORWARD TSN chunk, and the DATA chunk can not be bundled with
this control chunk because of PMTU limit, no DATA chunk
will be retranmitted in the current implementation. This
patch makes sure to retranmit at least one DATA chunk in this case.

Signed-off-by: Wei Yongjun <yjwei@cn.fujitsu.com>
Signed-off-by: Vlad Yasevich <vladislav.yasevich@hp.com>
---
 net/sctp/outqueue.c | 12 ++++++++++++
 1 file changed, 12 insertions(+)

(limited to 'net')

diff --git a/net/sctp/outqueue.c b/net/sctp/outqueue.c
index 16d451a62b3f..e333d5833616 100644
--- a/net/sctp/outqueue.c
+++ b/net/sctp/outqueue.c
@@ -598,11 +598,23 @@ static int sctp_outq_flush_rtx(struct sctp_outq *q, struct sctp_packet *pkt,
 		if (fast_rtx && !chunk->fast_retransmit)
 			continue;
 
+redo:
 		/* Attempt to append this chunk to the packet. */
 		status = sctp_packet_append_chunk(pkt, chunk);
 
 		switch (status) {
 		case SCTP_XMIT_PMTU_FULL:
+			if (!pkt->has_data && !pkt->has_cookie_echo) {
+				/* If this packet did not contain DATA then
+				 * retransmission did not happen, so do it
+				 * again.  We'll ignore the error here since
+				 * control chunks are already freed so there
+				 * is nothing we can do.
+				 */
+				sctp_packet_transmit(pkt);
+				goto redo;
+			}
+
 			/* Send this packet.  */
 			error = sctp_packet_transmit(pkt);
 
-- 
cgit v1.2.2


From fbdf501c9374966a56829ecca3a7f25d2b49a305 Mon Sep 17 00:00:00 2001
From: Vlad Yasevich <vladislav.yasevich@hp.com>
Date: Fri, 30 Apr 2010 22:39:26 -0400
Subject: sctp: Do no select unconfirmed transports for retransmissions

An unconfirmed transport is one that we have not been
able to reach since the beginning.  There is no point in
trying to retrasnmit data on those transports.  Also, the
specification forbids it due to security issues.

Reported-by: Frank Schuster <frank.schuster01@web.de>

Signed-off-by: Vlad Yasevich <vladislav.yasevich@hp.com>
---
 net/sctp/associola.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/sctp/associola.c b/net/sctp/associola.c
index df5abbff63e2..de830c268564 100644
--- a/net/sctp/associola.c
+++ b/net/sctp/associola.c
@@ -762,7 +762,8 @@ struct sctp_transport *sctp_assoc_add_peer(struct sctp_association *asoc,
 		asoc->peer.retran_path = peer;
 	}
 
-	if (asoc->peer.active_path == asoc->peer.retran_path) {
+	if (asoc->peer.active_path == asoc->peer.retran_path &&
+	    peer->state != SCTP_UNCONFIRMED) {
 		asoc->peer.retran_path = peer;
 	}
 
@@ -1318,7 +1319,7 @@ void sctp_assoc_update_retran_path(struct sctp_association *asoc)
 			/* Keep track of the next transport in case
 			 * we don't find any active transport.
 			 */
-			if (!next)
+			if (t->state != SCTP_UNCONFIRMED && !next)
 				next = t;
 		}
 	}
-- 
cgit v1.2.2


From ec7b9519509061bbc09a43284c3570aa492e07f0 Mon Sep 17 00:00:00 2001
From: Shan Wei <shanwei@cn.fujitsu.com>
Date: Fri, 30 Apr 2010 22:41:09 -0400
Subject: sctp: use sctp_chunk_is_data macro to decide a chunk is data chunk

sctp_chunk_is_data macro is defined to decide that
whether a chunk is data chunk or not.

Signed-off-by: Shan Wei <shanwei@cn.fujitsu.com>
Signed-off-by: Vlad Yasevich <vladislav.yasevich@hp.com>
---
 net/sctp/outqueue.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/sctp/outqueue.c b/net/sctp/outqueue.c
index e333d5833616..a4fe7dee76e8 100644
--- a/net/sctp/outqueue.c
+++ b/net/sctp/outqueue.c
@@ -308,7 +308,7 @@ int sctp_outq_tail(struct sctp_outq *q, struct sctp_chunk *chunk)
 	/* If it is data, queue it up, otherwise, send it
 	 * immediately.
 	 */
-	if (SCTP_CID_DATA == chunk->chunk_hdr->type) {
+	if (sctp_chunk_is_data(chunk)) {
 		/* Is it OK to queue data chunks?  */
 		/* From 9. Termination of Association
 		 *
-- 
cgit v1.2.2


From 787a51a0878f7bee3a9a83040077301e1556b69a Mon Sep 17 00:00:00 2001
From: Wei Yongjun <yjwei@cn.fujitsu.com>
Date: Fri, 30 Apr 2010 22:41:09 -0400
Subject: sctp: implement sctp association probing module

This patch implement sctp association probing module, the module
will be called sctp_probe.

This module allows for capturing the changes to SCTP association
state in response to incoming packets. It is used for debugging
SCTP congestion control algorithms.

Usage:
  $ modprobe sctp_probe [full=n] [port=n] [bufsize=n]
  $ cat /proc/net/sctpprobe

  The output format is:
    TIME     ASSOC     LPORT RPORT MTU    RWND  UNACK <REMOTE-ADDR   STATE  CWND   SSTHRESH  INFLIGHT  PARTIAL_BYTES_ACKED MTU> ...

  The output will be like this:
    9.226086 c4064c48  9000  8000  1500    53352     1 *192.168.0.19  1     4380    54784     1252        0     1500
    9.287195 c4064c48  9000  8000  1500    45144     5 *192.168.0.19  1     5880    54784     6500        0     1500
    9.289130 c4064c48  9000  8000  1500    42724     5 *192.168.0.19  1     7380    54784     6500        0     1500
    9.620332 c4064c48  9000  8000  1500    48284     4 *192.168.0.19  1     8880    54784     5200        0     1500
    ......

Signed-off-by: Wei Yongjun <yjwei@cn.fujitsu.com>
Signed-off-by: Vlad Yasevich <vladislav.yasevich@hp.com>
---
 net/sctp/Kconfig  |  12 +++
 net/sctp/Makefile |   3 +
 net/sctp/probe.c  | 213 ++++++++++++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 228 insertions(+)
 create mode 100644 net/sctp/probe.c

(limited to 'net')

diff --git a/net/sctp/Kconfig b/net/sctp/Kconfig
index 58b3e882a187..126b014eb79b 100644
--- a/net/sctp/Kconfig
+++ b/net/sctp/Kconfig
@@ -37,6 +37,18 @@ menuconfig IP_SCTP
 
 if IP_SCTP
 
+config NET_SCTPPROBE
+	tristate "SCTP: Association probing"
+        depends on PROC_FS && KPROBES
+        ---help---
+        This module allows for capturing the changes to SCTP association
+        state in response to incoming packets. It is used for debugging
+        SCTP congestion control algorithms. If you don't understand
+        what was just said, you don't need it: say N.
+
+        To compile this code as a module, choose M here: the
+        module will be called sctp_probe.
+
 config SCTP_DBG_MSG
 	bool "SCTP: Debug messages"
 	help
diff --git a/net/sctp/Makefile b/net/sctp/Makefile
index 6b794734380a..5c30b7a873df 100644
--- a/net/sctp/Makefile
+++ b/net/sctp/Makefile
@@ -3,6 +3,7 @@
 #
 
 obj-$(CONFIG_IP_SCTP) += sctp.o
+obj-$(CONFIG_NET_SCTPPROBE) += sctp_probe.o
 
 sctp-y := sm_statetable.o sm_statefuns.o sm_sideeffect.o \
 	  protocol.o endpointola.o associola.o \
@@ -11,6 +12,8 @@ sctp-y := sm_statetable.o sm_statefuns.o sm_sideeffect.o \
 	  tsnmap.o bind_addr.o socket.o primitive.o \
 	  output.o input.o debug.o ssnmap.o auth.o
 
+sctp_probe-y := probe.o
+
 sctp-$(CONFIG_SCTP_DBG_OBJCNT) += objcnt.o
 sctp-$(CONFIG_PROC_FS) += proc.o
 sctp-$(CONFIG_SYSCTL) += sysctl.o
diff --git a/net/sctp/probe.c b/net/sctp/probe.c
new file mode 100644
index 000000000000..8f025d5831aa
--- /dev/null
+++ b/net/sctp/probe.c
@@ -0,0 +1,213 @@
+/*
+ * sctp_probe - Observe the SCTP flow with kprobes.
+ *
+ * The idea for this came from Werner Almesberger's umlsim
+ * Copyright (C) 2004, Stephen Hemminger <shemminger@osdl.org>
+ *
+ * Modified for SCTP from Stephen Hemminger's code
+ * Copyright (C) 2010, Wei Yongjun <yjwei@cn.fujitsu.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#include <linux/kernel.h>
+#include <linux/kprobes.h>
+#include <linux/socket.h>
+#include <linux/sctp.h>
+#include <linux/proc_fs.h>
+#include <linux/module.h>
+#include <linux/kfifo.h>
+#include <linux/time.h>
+#include <net/net_namespace.h>
+
+#include <net/sctp/sctp.h>
+#include <net/sctp/sm.h>
+
+MODULE_AUTHOR("Wei Yongjun <yjwei@cn.fujitsu.com>");
+MODULE_DESCRIPTION("SCTP snooper");
+MODULE_LICENSE("GPL");
+
+static int port __read_mostly = 0;
+MODULE_PARM_DESC(port, "Port to match (0=all)");
+module_param(port, int, 0);
+
+static int bufsize __read_mostly = 64 * 1024;
+MODULE_PARM_DESC(bufsize, "Log buffer size (default 64k)");
+module_param(bufsize, int, 0);
+
+static int full __read_mostly = 1;
+MODULE_PARM_DESC(full, "Full log (1=every ack packet received,  0=only cwnd changes)");
+module_param(full, int, 0);
+
+static const char procname[] = "sctpprobe";
+
+static struct {
+	struct kfifo	  fifo;
+	spinlock_t	  lock;
+	wait_queue_head_t wait;
+	struct timespec	  tstart;
+} sctpw;
+
+static void printl(const char *fmt, ...)
+{
+	va_list args;
+	int len;
+	char tbuf[256];
+
+	va_start(args, fmt);
+	len = vscnprintf(tbuf, sizeof(tbuf), fmt, args);
+	va_end(args);
+
+	kfifo_in_locked(&sctpw.fifo, tbuf, len, &sctpw.lock);
+	wake_up(&sctpw.wait);
+}
+
+static int sctpprobe_open(struct inode *inode, struct file *file)
+{
+	kfifo_reset(&sctpw.fifo);
+	getnstimeofday(&sctpw.tstart);
+
+	return 0;
+}
+
+static ssize_t sctpprobe_read(struct file *file, char __user *buf,
+			      size_t len, loff_t *ppos)
+{
+	int error = 0, cnt = 0;
+	unsigned char *tbuf;
+
+	if (!buf)
+		return -EINVAL;
+
+	if (len == 0)
+		return 0;
+
+	tbuf = vmalloc(len);
+	if (!tbuf)
+		return -ENOMEM;
+
+	error = wait_event_interruptible(sctpw.wait,
+					 kfifo_len(&sctpw.fifo) != 0);
+	if (error)
+		goto out_free;
+
+	cnt = kfifo_out_locked(&sctpw.fifo, tbuf, len, &sctpw.lock);
+	error = copy_to_user(buf, tbuf, cnt) ? -EFAULT : 0;
+
+out_free:
+	vfree(tbuf);
+
+	return error ? error : cnt;
+}
+
+static const struct file_operations sctpprobe_fops = {
+	.owner	= THIS_MODULE,
+	.open	= sctpprobe_open,
+	.read	= sctpprobe_read,
+};
+
+sctp_disposition_t jsctp_sf_eat_sack(const struct sctp_endpoint *ep,
+				     const struct sctp_association *asoc,
+				     const sctp_subtype_t type,
+				     void *arg,
+				     sctp_cmd_seq_t *commands)
+{
+	struct sctp_transport *sp;
+	static __u32 lcwnd = 0;
+	struct timespec now;
+
+	sp = asoc->peer.primary_path;
+
+	if ((full || sp->cwnd != lcwnd) &&
+	    (!port || asoc->peer.port == port ||
+	     ep->base.bind_addr.port == port)) {
+		lcwnd = sp->cwnd;
+
+		getnstimeofday(&now);
+		now = timespec_sub(now, sctpw.tstart);
+
+		printl("%lu.%06lu ", (unsigned long) now.tv_sec,
+		       (unsigned long) now.tv_nsec / NSEC_PER_USEC);
+
+		printl("%p %5d %5d %5d %8d %5d ", asoc,
+		       ep->base.bind_addr.port, asoc->peer.port,
+		       asoc->pathmtu, asoc->peer.rwnd, asoc->unack_data);
+
+		list_for_each_entry(sp, &asoc->peer.transport_addr_list,
+					transports) {
+			if (sp == asoc->peer.primary_path)
+				printl("*");
+
+			if (sp->ipaddr.sa.sa_family == AF_INET)
+				printl("%pI4 ", &sp->ipaddr.v4.sin_addr);
+			else
+				printl("%pI6 ", &sp->ipaddr.v6.sin6_addr);
+
+			printl("%2u %8u %8u %8u %8u %8u ",
+			       sp->state, sp->cwnd, sp->ssthresh,
+			       sp->flight_size, sp->partial_bytes_acked,
+			       sp->pathmtu);
+		}
+		printl("\n");
+	}
+
+	jprobe_return();
+	return 0;
+}
+
+static struct jprobe sctp_recv_probe = {
+	.kp	= {
+		.symbol_name = "sctp_sf_eat_sack_6_2",
+	},
+	.entry	= jsctp_sf_eat_sack,
+};
+
+static __init int sctpprobe_init(void)
+{
+	int ret = -ENOMEM;
+
+	init_waitqueue_head(&sctpw.wait);
+	spin_lock_init(&sctpw.lock);
+	if (kfifo_alloc(&sctpw.fifo, bufsize, GFP_KERNEL))
+		return ret;
+
+	if (!proc_net_fops_create(&init_net, procname, S_IRUSR,
+				  &sctpprobe_fops))
+		goto free_kfifo;
+
+	ret = register_jprobe(&sctp_recv_probe);
+	if (ret)
+		goto remove_proc;
+
+	pr_info("SCTP probe registered (port=%d)\n", port);
+
+	return 0;
+
+remove_proc:
+	proc_net_remove(&init_net, procname);
+free_kfifo:
+	kfifo_free(&sctpw.fifo);
+	return ret;
+}
+
+static __exit void sctpprobe_exit(void)
+{
+	kfifo_free(&sctpw.fifo);
+	proc_net_remove(&init_net, procname);
+	unregister_jprobe(&sctp_recv_probe);
+}
+
+module_init(sctpprobe_init);
+module_exit(sctpprobe_exit);
-- 
cgit v1.2.2


From b99a4d53a74ac25eb4b930eef6c745579149c571 Mon Sep 17 00:00:00 2001
From: Dan Carpenter <error27@gmail.com>
Date: Fri, 30 Apr 2010 22:41:09 -0400
Subject: sctp: cleanup: remove duplicate assignment

This assignment isn't needed because we did it earlier already.

Also another reason to delete the assignment is because it triggers a
Smatch warning about checking for NULL pointers after a dereference.

Reported-by: Vlad Yasevich <vladislav.yasevich@hp.com>
Signed-off-by: Dan Carpenter <error27@gmail.com>
Signed-off-by: Vlad Yasevich <vladislav.yasevich@hp.com>
---
 net/sctp/sm_make_chunk.c | 23 +++++++++--------------
 1 file changed, 9 insertions(+), 14 deletions(-)

(limited to 'net')

diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c
index 17cb400ecd6a..33aed1c6cbd6 100644
--- a/net/sctp/sm_make_chunk.c
+++ b/net/sctp/sm_make_chunk.c
@@ -419,10 +419,17 @@ struct sctp_chunk *sctp_make_init_ack(const struct sctp_association *asoc,
 	if (!retval)
 		goto nomem_chunk;
 
-	/* Per the advice in RFC 2960 6.4, send this reply to
-	 * the source of the INIT packet.
+	/* RFC 2960 6.4 Multi-homed SCTP Endpoints
+	 *
+	 * An endpoint SHOULD transmit reply chunks (e.g., SACK,
+	 * HEARTBEAT ACK, * etc.) to the same destination transport
+	 * address from which it received the DATA or control chunk
+	 * to which it is replying.
+	 *
+	 * [INIT ACK back to where the INIT came from.]
 	 */
 	retval->transport = chunk->transport;
+
 	retval->subh.init_hdr =
 		sctp_addto_chunk(retval, sizeof(initack), &initack);
 	retval->param_hdr.v = sctp_addto_chunk(retval, addrs_len, addrs.v);
@@ -461,18 +468,6 @@ struct sctp_chunk *sctp_make_init_ack(const struct sctp_association *asoc,
 	/* We need to remove the const qualifier at this point.  */
 	retval->asoc = (struct sctp_association *) asoc;
 
-	/* RFC 2960 6.4 Multi-homed SCTP Endpoints
-	 *
-	 * An endpoint SHOULD transmit reply chunks (e.g., SACK,
-	 * HEARTBEAT ACK, * etc.) to the same destination transport
-	 * address from which it received the DATA or control chunk
-	 * to which it is replying.
-	 *
-	 * [INIT ACK back to where the INIT came from.]
-	 */
-	if (chunk)
-		retval->transport = chunk->transport;
-
 nomem_chunk:
 	kfree(cookie);
 nomem_cookie:
-- 
cgit v1.2.2


From d598b166ced20d9b9281ea3527c0e18405ddb803 Mon Sep 17 00:00:00 2001
From: Vlad Yasevich <vladislav.yasevich@hp.com>
Date: Fri, 30 Apr 2010 22:41:09 -0400
Subject: sctp: Make sure we always return valid retransmit path

commit 4951feda0c60d1ef681f1a270afdd617924ab041
    sctp: Do no select unconfirmed transports for retransmissions

added code to make sure that we do not select unconfirmed paths
for data transmission.  This caused a problem when there are only
2 paths, 1 unconfirmed and 1 unreachable.  In that case, the next
retransmit path returned is NULL and that causes a kernel crash.

The solution is to only change retransmit paths if we found one to use.

Reported-by: Frank Schuster <frank.schuster01@web.de>
Signed-off-b: Vlad Yasevich <vladislav.yasevich@hp.com>
---
 net/sctp/associola.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/sctp/associola.c b/net/sctp/associola.c
index de830c268564..fab9cb2783a7 100644
--- a/net/sctp/associola.c
+++ b/net/sctp/associola.c
@@ -1324,7 +1324,8 @@ void sctp_assoc_update_retran_path(struct sctp_association *asoc)
 		}
 	}
 
-	asoc->peer.retran_path = t;
+	if (t)
+		asoc->peer.retran_path = t;
 
 	SCTP_DEBUG_PRINTK_IPADDR("sctp_assoc_update_retran_path:association"
 				 " %p addr: ",
-- 
cgit v1.2.2


From ae19c54866450f6c6f79223ca7d37965859a54e1 Mon Sep 17 00:00:00 2001
From: Vlad Yasevich <vladislav.yasevich@hp.com>
Date: Fri, 30 Apr 2010 22:41:09 -0400
Subject: sctp: remove 'resent' bit from the chunk

The 'resent' bit is used to make sure that we don't update
rto estimate based on retransmitted chunks.  However, we already
have the 'rto_pending' bit that we test when need to update rto,
so 'resent' bit is just extra.  Additionally, we currently have
a bug in that we always set a 'resent' bit and thus rto estimate
is only updated by Heartbeats.

Signed-off-by: Vlad Yasevich <vladislav.yasevich@hp.com>
---
 net/sctp/output.c        | 25 +++++++++----------------
 net/sctp/outqueue.c      |  1 -
 net/sctp/sm_make_chunk.c |  1 -
 3 files changed, 9 insertions(+), 18 deletions(-)

(limited to 'net')

diff --git a/net/sctp/output.c b/net/sctp/output.c
index fad261d41ec2..35e49b9df4e0 100644
--- a/net/sctp/output.c
+++ b/net/sctp/output.c
@@ -429,24 +429,17 @@ int sctp_packet_transmit(struct sctp_packet *packet)
 	list_for_each_entry_safe(chunk, tmp, &packet->chunk_list, list) {
 		list_del_init(&chunk->list);
 		if (sctp_chunk_is_data(chunk)) {
+			/* 6.3.1 C4) When data is in flight and when allowed
+			 * by rule C5, a new RTT measurement MUST be made each
+			 * round trip.  Furthermore, new RTT measurements
+			 * SHOULD be made no more than once per round-trip
+			 * for a given destination transport address.
+			 */
 
-			if (!chunk->resent) {
-
-				/* 6.3.1 C4) When data is in flight and when allowed
-				 * by rule C5, a new RTT measurement MUST be made each
-				 * round trip.  Furthermore, new RTT measurements
-				 * SHOULD be made no more than once per round-trip
-				 * for a given destination transport address.
-				 */
-
-				if (!tp->rto_pending) {
-					chunk->rtt_in_progress = 1;
-					tp->rto_pending = 1;
-				}
+			if (!tp->rto_pending) {
+				chunk->rtt_in_progress = 1;
+				tp->rto_pending = 1;
 			}
-
-			chunk->resent = 1;
-
 			has_data = 1;
 		}
 
diff --git a/net/sctp/outqueue.c b/net/sctp/outqueue.c
index a4fe7dee76e8..4e551ba8998c 100644
--- a/net/sctp/outqueue.c
+++ b/net/sctp/outqueue.c
@@ -1405,7 +1405,6 @@ static void sctp_check_transmitted(struct sctp_outq *q,
 				 * instance).
 				 */
 				if (!tchunk->tsn_gap_acked &&
-				    !tchunk->resent &&
 				    tchunk->rtt_in_progress) {
 					tchunk->rtt_in_progress = 0;
 					rtt = jiffies - tchunk->sent_at;
diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c
index 33aed1c6cbd6..24effdf471eb 100644
--- a/net/sctp/sm_make_chunk.c
+++ b/net/sctp/sm_make_chunk.c
@@ -1205,7 +1205,6 @@ struct sctp_chunk *sctp_chunkify(struct sk_buff *skb,
 	INIT_LIST_HEAD(&retval->list);
 	retval->skb		= skb;
 	retval->asoc		= (struct sctp_association *)asoc;
-	retval->resent  	= 0;
 	retval->has_tsn		= 0;
 	retval->has_ssn         = 0;
 	retval->rtt_in_progress	= 0;
-- 
cgit v1.2.2


From d9efc2231b28bc199f9de4dd594248b7341188e5 Mon Sep 17 00:00:00 2001
From: Vlad Yasevich <vladislav.yasevich@hp.com>
Date: Fri, 30 Apr 2010 22:41:09 -0400
Subject: sctp: Do not force T3 timer on fast retransmissions.

We don't need to force the T3 timer any more and it's
actually wrong to do as it causes too long of a delay.
The timer will be started if one is not running, but if
one is running, we leave it alone.

Signed-off-by: Vlad Yasevich <vladislav.yasevich@hp.com>
---
 net/sctp/outqueue.c  | 15 +++------------
 net/sctp/transport.c |  4 ++--
 2 files changed, 5 insertions(+), 14 deletions(-)

(limited to 'net')

diff --git a/net/sctp/outqueue.c b/net/sctp/outqueue.c
index 4e551ba8998c..786c4ff97ae4 100644
--- a/net/sctp/outqueue.c
+++ b/net/sctp/outqueue.c
@@ -659,14 +659,6 @@ redo:
 			if (chunk->fast_retransmit == SCTP_NEED_FRTX)
 				chunk->fast_retransmit = SCTP_DONT_FRTX;
 
-			/* Force start T3-rtx timer when fast retransmitting
-			 * the earliest outstanding TSN
-			 */
-			if (!timer && fast_rtx &&
-			    ntohl(chunk->subh.data_hdr->tsn) ==
-					     asoc->ctsn_ack_point + 1)
-				timer = 2;
-
 			q->empty = 0;
 			break;
 		}
@@ -871,7 +863,7 @@ static int sctp_outq_flush(struct sctp_outq *q, int rtx_timeout)
 				 * sender MUST assure that at least one T3-rtx
 				 * timer is running.
 				 */
-				sctp_transport_reset_timers(transport, 0);
+				sctp_transport_reset_timers(transport);
 			}
 			break;
 
@@ -924,8 +916,7 @@ static int sctp_outq_flush(struct sctp_outq *q, int rtx_timeout)
 						    rtx_timeout, &start_timer);
 
 			if (start_timer)
-				sctp_transport_reset_timers(transport,
-							    start_timer-1);
+				sctp_transport_reset_timers(transport);
 
 			/* This can happen on COOKIE-ECHO resend.  Only
 			 * one chunk can get bundled with a COOKIE-ECHO.
@@ -1058,7 +1049,7 @@ static int sctp_outq_flush(struct sctp_outq *q, int rtx_timeout)
 			list_add_tail(&chunk->transmitted_list,
 				      &transport->transmitted);
 
-			sctp_transport_reset_timers(transport, 0);
+			sctp_transport_reset_timers(transport);
 
 			q->empty = 0;
 
diff --git a/net/sctp/transport.c b/net/sctp/transport.c
index be4d63d5a5cc..0ebb97fc98e0 100644
--- a/net/sctp/transport.c
+++ b/net/sctp/transport.c
@@ -195,7 +195,7 @@ static void sctp_transport_destroy(struct sctp_transport *transport)
 /* Start T3_rtx timer if it is not already running and update the heartbeat
  * timer.  This routine is called every time a DATA chunk is sent.
  */
-void sctp_transport_reset_timers(struct sctp_transport *transport, int force)
+void sctp_transport_reset_timers(struct sctp_transport *transport)
 {
 	/* RFC 2960 6.3.2 Retransmission Timer Rules
 	 *
@@ -205,7 +205,7 @@ void sctp_transport_reset_timers(struct sctp_transport *transport, int force)
 	 * address.
 	 */
 
-	if (force || !timer_pending(&transport->T3_rtx_timer))
+	if (!timer_pending(&transport->T3_rtx_timer))
 		if (!mod_timer(&transport->T3_rtx_timer,
 			       jiffies + transport->rto))
 			sctp_transport_hold(transport);
-- 
cgit v1.2.2


From b2cf9b6bd93af1cc047d3356f1c6cc9367fe3731 Mon Sep 17 00:00:00 2001
From: Vlad Yasevich <vladislav.yasevich@hp.com>
Date: Fri, 30 Apr 2010 22:41:10 -0400
Subject: sctp: update transport initializations

Right now, sctp transports are not fully initialized and when
adding any new fields, they have to be explicitely initialized.
This is prone to mistakes.  So we switch to calling kzalloc()
which makes things much simpler.

Signed-off-by: Vlad Yasevich <vladislav.yasevich@hp.com>
---
 net/sctp/associola.c   |  3 ---
 net/sctp/endpointola.c |  2 --
 net/sctp/transport.c   | 25 -------------------------
 3 files changed, 30 deletions(-)

(limited to 'net')

diff --git a/net/sctp/associola.c b/net/sctp/associola.c
index fab9cb2783a7..37753cd48be3 100644
--- a/net/sctp/associola.c
+++ b/net/sctp/associola.c
@@ -87,9 +87,6 @@ static struct sctp_association *sctp_association_init(struct sctp_association *a
 	/* Retrieve the SCTP per socket area.  */
 	sp = sctp_sk((struct sock *)sk);
 
-	/* Init all variables to a known value.  */
-	memset(asoc, 0, sizeof(struct sctp_association));
-
 	/* Discarding const is appropriate here.  */
 	asoc->ep = (struct sctp_endpoint *)ep;
 	sctp_endpoint_hold(asoc->ep);
diff --git a/net/sctp/endpointola.c b/net/sctp/endpointola.c
index 905fda582b92..2f8763bae9ed 100644
--- a/net/sctp/endpointola.c
+++ b/net/sctp/endpointola.c
@@ -70,8 +70,6 @@ static struct sctp_endpoint *sctp_endpoint_init(struct sctp_endpoint *ep,
 	struct sctp_shared_key *null_key;
 	int err;
 
-	memset(ep, 0, sizeof(struct sctp_endpoint));
-
 	ep->digest = kzalloc(SCTP_SIGNATURE_SIZE, gfp);
 	if (!ep->digest)
 		return NULL;
diff --git a/net/sctp/transport.c b/net/sctp/transport.c
index 0ebb97fc98e0..854228bf3f34 100644
--- a/net/sctp/transport.c
+++ b/net/sctp/transport.c
@@ -64,9 +64,6 @@ static struct sctp_transport *sctp_transport_init(struct sctp_transport *peer,
 	/* Copy in the address.  */
 	peer->ipaddr = *addr;
 	peer->af_specific = sctp_get_af_specific(addr->sa.sa_family);
-	peer->asoc = NULL;
-
-	peer->dst = NULL;
 	memset(&peer->saddr, 0, sizeof(union sctp_addr));
 
 	/* From 6.3.1 RTO Calculation:
@@ -76,34 +73,21 @@ static struct sctp_transport *sctp_transport_init(struct sctp_transport *peer,
 	 * parameter 'RTO.Initial'.
 	 */
 	peer->rto = msecs_to_jiffies(sctp_rto_initial);
-	peer->rtt = 0;
-	peer->rttvar = 0;
-	peer->srtt = 0;
-	peer->rto_pending = 0;
-	peer->hb_sent = 0;
-	peer->fast_recovery = 0;
 
 	peer->last_time_heard = jiffies;
 	peer->last_time_ecne_reduced = jiffies;
 
-	peer->init_sent_count = 0;
-
 	peer->param_flags = SPP_HB_DISABLE |
 			    SPP_PMTUD_ENABLE |
 			    SPP_SACKDELAY_ENABLE;
-	peer->hbinterval  = 0;
 
 	/* Initialize the default path max_retrans.  */
 	peer->pathmaxrxt  = sctp_max_retrans_path;
-	peer->error_count = 0;
 
 	INIT_LIST_HEAD(&peer->transmitted);
 	INIT_LIST_HEAD(&peer->send_ready);
 	INIT_LIST_HEAD(&peer->transports);
 
-	peer->T3_rtx_timer.expires = 0;
-	peer->hb_timer.expires = 0;
-
 	setup_timer(&peer->T3_rtx_timer, sctp_generate_t3_rtx_event,
 			(unsigned long)peer);
 	setup_timer(&peer->hb_timer, sctp_generate_heartbeat_event,
@@ -113,15 +97,6 @@ static struct sctp_transport *sctp_transport_init(struct sctp_transport *peer,
 	get_random_bytes(&peer->hb_nonce, sizeof(peer->hb_nonce));
 
 	atomic_set(&peer->refcnt, 1);
-	peer->dead = 0;
-
-	peer->malloced = 0;
-
-	/* Initialize the state information for SFR-CACC */
-	peer->cacc.changeover_active = 0;
-	peer->cacc.cycling_changeover = 0;
-	peer->cacc.next_tsn_at_change = 0;
-	peer->cacc.cacc_saw_newack = 0;
 
 	return peer;
 }
-- 
cgit v1.2.2


From cf9b4812e18aab6f86ff998bd7425a9e823269c3 Mon Sep 17 00:00:00 2001
From: Vlad Yasevich <vladislav.yasevich@hp.com>
Date: Fri, 30 Apr 2010 22:41:10 -0400
Subject: sctp: fast recovery algorithm is per association.

SCTP fast recovery algorithm really applies per association
and impacts all transports.

Signed-off-by: Vlad Yasevich <vladislav.yasevich@hp.com>
---
 net/sctp/transport.c | 32 +++++++++++++++++---------------
 1 file changed, 17 insertions(+), 15 deletions(-)

(limited to 'net')

diff --git a/net/sctp/transport.c b/net/sctp/transport.c
index 854228bf3f34..fccf4947aff1 100644
--- a/net/sctp/transport.c
+++ b/net/sctp/transport.c
@@ -378,15 +378,16 @@ void sctp_transport_update_rto(struct sctp_transport *tp, __u32 rtt)
 void sctp_transport_raise_cwnd(struct sctp_transport *transport,
 			       __u32 sack_ctsn, __u32 bytes_acked)
 {
+	struct sctp_association *asoc = transport->asoc;
 	__u32 cwnd, ssthresh, flight_size, pba, pmtu;
 
 	cwnd = transport->cwnd;
 	flight_size = transport->flight_size;
 
 	/* See if we need to exit Fast Recovery first */
-	if (transport->fast_recovery &&
-	    TSN_lte(transport->fast_recovery_exit, sack_ctsn))
-		transport->fast_recovery = 0;
+	if (asoc->fast_recovery &&
+	    TSN_lte(asoc->fast_recovery_exit, sack_ctsn))
+		asoc->fast_recovery = 0;
 
 	/* The appropriate cwnd increase algorithm is performed if, and only
 	 * if the cumulative TSN whould advanced and the congestion window is
@@ -415,7 +416,7 @@ void sctp_transport_raise_cwnd(struct sctp_transport *transport,
 		 *    2) the destination's path MTU.  This upper bound protects
 		 *    against the ACK-Splitting attack outlined in [SAVAGE99].
 		 */
-		if (transport->fast_recovery)
+		if (asoc->fast_recovery)
 			return;
 
 		if (bytes_acked > pmtu)
@@ -466,6 +467,8 @@ void sctp_transport_raise_cwnd(struct sctp_transport *transport,
 void sctp_transport_lower_cwnd(struct sctp_transport *transport,
 			       sctp_lower_cwnd_t reason)
 {
+	struct sctp_association *asoc = transport->asoc;
+
 	switch (reason) {
 	case SCTP_LOWER_CWND_T3_RTX:
 		/* RFC 2960 Section 7.2.3, sctpimpguide
@@ -476,11 +479,11 @@ void sctp_transport_lower_cwnd(struct sctp_transport *transport,
 		 *      partial_bytes_acked = 0
 		 */
 		transport->ssthresh = max(transport->cwnd/2,
-					  4*transport->asoc->pathmtu);
-		transport->cwnd = transport->asoc->pathmtu;
+					  4*asoc->pathmtu);
+		transport->cwnd = asoc->pathmtu;
 
-		/* T3-rtx also clears fast recovery on the transport */
-		transport->fast_recovery = 0;
+		/* T3-rtx also clears fast recovery */
+		asoc->fast_recovery = 0;
 		break;
 
 	case SCTP_LOWER_CWND_FAST_RTX:
@@ -496,15 +499,15 @@ void sctp_transport_lower_cwnd(struct sctp_transport *transport,
 		 *      cwnd = ssthresh
 		 *      partial_bytes_acked = 0
 		 */
-		if (transport->fast_recovery)
+		if (asoc->fast_recovery)
 			return;
 
 		/* Mark Fast recovery */
-		transport->fast_recovery = 1;
-		transport->fast_recovery_exit = transport->asoc->next_tsn - 1;
+		asoc->fast_recovery = 1;
+		asoc->fast_recovery_exit = asoc->next_tsn - 1;
 
 		transport->ssthresh = max(transport->cwnd/2,
-					  4*transport->asoc->pathmtu);
+					  4*asoc->pathmtu);
 		transport->cwnd = transport->ssthresh;
 		break;
 
@@ -524,7 +527,7 @@ void sctp_transport_lower_cwnd(struct sctp_transport *transport,
 		if (time_after(jiffies, transport->last_time_ecne_reduced +
 					transport->rtt)) {
 			transport->ssthresh = max(transport->cwnd/2,
-						  4*transport->asoc->pathmtu);
+						  4*asoc->pathmtu);
 			transport->cwnd = transport->ssthresh;
 			transport->last_time_ecne_reduced = jiffies;
 		}
@@ -540,7 +543,7 @@ void sctp_transport_lower_cwnd(struct sctp_transport *transport,
 		 * interval.
 		 */
 		transport->cwnd = max(transport->cwnd/2,
-					 4*transport->asoc->pathmtu);
+					 4*asoc->pathmtu);
 		break;
 	}
 
@@ -625,7 +628,6 @@ void sctp_transport_reset(struct sctp_transport *t)
 	t->error_count = 0;
 	t->rto_pending = 0;
 	t->hb_sent = 0;
-	t->fast_recovery = 0;
 
 	/* Initialize the state information for SFR-CACC */
 	t->cacc.changeover_active = 0;
-- 
cgit v1.2.2


From 65883371894be2631603d5d412f90f8c09290fef Mon Sep 17 00:00:00 2001
From: Vlad Yasevich <vladislav.yasevich@hp.com>
Date: Fri, 30 Apr 2010 22:41:10 -0400
Subject: sctp: rwnd_press should be cumulative

rwnd_press tracks the pressure on the recieve window.  Every
timer the receive buffer overlows, we truncate the receive
window and then grow it back.  However, if we don't track
the cumulative presser, it's possible to reach a situation
when receive buffer is empty, but rwnd stays truncated.

Signed-off-by: Vlad Yasevich <vladislav.yasevich@hp.com>
---
 net/sctp/associola.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/sctp/associola.c b/net/sctp/associola.c
index 37753cd48be3..65f9a7cdf466 100644
--- a/net/sctp/associola.c
+++ b/net/sctp/associola.c
@@ -1482,7 +1482,7 @@ void sctp_assoc_rwnd_decrease(struct sctp_association *asoc, unsigned len)
 	if (asoc->rwnd >= len) {
 		asoc->rwnd -= len;
 		if (over) {
-			asoc->rwnd_press = asoc->rwnd;
+			asoc->rwnd_press += asoc->rwnd;
 			asoc->rwnd = 0;
 		}
 	} else {
-- 
cgit v1.2.2


From ea862c8d1f4a0d193979c7412c3b946f600721ce Mon Sep 17 00:00:00 2001
From: Vlad Yasevich <vladislav.yasevich@hp.com>
Date: Fri, 30 Apr 2010 22:41:10 -0400
Subject: sctp: correctly mark missing chunks in fast recovery

According to RFC 4960 Section 7.2.4:
 					If an endpoint is in Fast
   Recovery and a SACK arrives that advances the Cumulative TSN Ack
   Point, the miss indications are incremented for all TSNs reported
   missing in the SACK.

Signed-off-by: Vlad Yasevich <vladislav.yasevich@hp.com>
---
 net/sctp/outqueue.c | 18 +++++++++++++-----
 1 file changed, 13 insertions(+), 5 deletions(-)

(limited to 'net')

diff --git a/net/sctp/outqueue.c b/net/sctp/outqueue.c
index 786c4ff97ae4..b491a1aac3e4 100644
--- a/net/sctp/outqueue.c
+++ b/net/sctp/outqueue.c
@@ -1154,6 +1154,7 @@ int sctp_outq_sack(struct sctp_outq *q, struct sctp_sackhdr *sack)
 	struct sctp_transport *primary = asoc->peer.primary_path;
 	int count_of_newacks = 0;
 	int gap_ack_blocks;
+	u8 accum_moved = 0;
 
 	/* Grab the association's destination address list. */
 	transport_list = &asoc->peer.transport_addr_list;
@@ -1232,16 +1233,22 @@ int sctp_outq_sack(struct sctp_outq *q, struct sctp_sackhdr *sack)
 			count_of_newacks ++;
 	}
 
+	/* Move the Cumulative TSN Ack Point if appropriate.  */
+	if (TSN_lt(asoc->ctsn_ack_point, sack_ctsn)) {
+		asoc->ctsn_ack_point = sack_ctsn;
+		accum_moved = 1;
+	}
+
 	if (gap_ack_blocks) {
+
+		if (asoc->fast_recovery && accum_moved)
+			highest_new_tsn = highest_tsn;
+
 		list_for_each_entry(transport, transport_list, transports)
 			sctp_mark_missing(q, &transport->transmitted, transport,
 					  highest_new_tsn, count_of_newacks);
 	}
 
-	/* Move the Cumulative TSN Ack Point if appropriate.  */
-	if (TSN_lt(asoc->ctsn_ack_point, sack_ctsn))
-		asoc->ctsn_ack_point = sack_ctsn;
-
 	/* Update unack_data field in the assoc. */
 	sctp_sack_update_unack_data(asoc, sack);
 
@@ -1685,7 +1692,8 @@ static void sctp_mark_missing(struct sctp_outq *q,
 	struct sctp_chunk *chunk;
 	__u32 tsn;
 	char do_fast_retransmit = 0;
-	struct sctp_transport *primary = q->asoc->peer.primary_path;
+	struct sctp_association *asoc = q->asoc;
+	struct sctp_transport *primary = asoc->peer.primary_path;
 
 	list_for_each_entry(chunk, transmitted_queue, transmitted_list) {
 
-- 
cgit v1.2.2


From bfa0d9843ac5feb9667990706b4524390fee4df9 Mon Sep 17 00:00:00 2001
From: Vlad Yasevich <vladislav.yasevich@hp.com>
Date: Fri, 30 Apr 2010 22:41:10 -0400
Subject: sctp: Optimize computation of highest new tsn in SACK.

Right now, if the highest tsn in the SACK doesn't change, we'll
end up scanning the transmitted lists on the transports twice:
once for locating the highest _new_ tsn, and once for actually
tagging chunks as acked.  This is a waste, since we can record
the highest _new_ tsn at the same time as tagging chunks.  Long
ago this was not possible because we would try to mark chunks
as missing at the same time as tagging them acked and this approach
didn't work.  Now that the two steps are separate, we can re-use
the old approach.

Signed-off-by: Vlad Yasevich <vladislav.yasevich@hp.com>
---
 net/sctp/outqueue.c | 42 +++++++-----------------------------------
 1 file changed, 7 insertions(+), 35 deletions(-)

(limited to 'net')

diff --git a/net/sctp/outqueue.c b/net/sctp/outqueue.c
index b491a1aac3e4..5d057178ce0c 100644
--- a/net/sctp/outqueue.c
+++ b/net/sctp/outqueue.c
@@ -62,7 +62,7 @@ static void sctp_check_transmitted(struct sctp_outq *q,
 				   struct list_head *transmitted_queue,
 				   struct sctp_transport *transport,
 				   struct sctp_sackhdr *sack,
-				   __u32 highest_new_tsn);
+				   __u32 *highest_new_tsn);
 
 static void sctp_mark_missing(struct sctp_outq *q,
 			      struct list_head *transmitted_queue,
@@ -1109,32 +1109,6 @@ static void sctp_sack_update_unack_data(struct sctp_association *assoc,
 	assoc->unack_data = unack_data;
 }
 
-/* Return the highest new tsn that is acknowledged by the given SACK chunk. */
-static __u32 sctp_highest_new_tsn(struct sctp_sackhdr *sack,
-				  struct sctp_association *asoc)
-{
-	struct sctp_transport *transport;
-	struct sctp_chunk *chunk;
-	__u32 highest_new_tsn, tsn;
-	struct list_head *transport_list = &asoc->peer.transport_addr_list;
-
-	highest_new_tsn = ntohl(sack->cum_tsn_ack);
-
-	list_for_each_entry(transport, transport_list, transports) {
-		list_for_each_entry(chunk, &transport->transmitted,
-				transmitted_list) {
-			tsn = ntohl(chunk->subh.data_hdr->tsn);
-
-			if (!chunk->tsn_gap_acked &&
-			    TSN_lt(highest_new_tsn, tsn) &&
-			    sctp_acked(sack, tsn))
-				highest_new_tsn = tsn;
-		}
-	}
-
-	return highest_new_tsn;
-}
-
 /* This is where we REALLY process a SACK.
  *
  * Process the SACK against the outqueue.  Mostly, this just frees
@@ -1203,18 +1177,15 @@ int sctp_outq_sack(struct sctp_outq *q, struct sctp_sackhdr *sack)
 	if (gap_ack_blocks)
 		highest_tsn += ntohs(frags[gap_ack_blocks - 1].gab.end);
 
-	if (TSN_lt(asoc->highest_sacked, highest_tsn)) {
-		highest_new_tsn = highest_tsn;
+	if (TSN_lt(asoc->highest_sacked, highest_tsn))
 		asoc->highest_sacked = highest_tsn;
-	} else {
-		highest_new_tsn = sctp_highest_new_tsn(sack, asoc);
-	}
 
+	highest_new_tsn = sack_ctsn;
 
 	/* Run through the retransmit queue.  Credit bytes received
 	 * and free those chunks that we can.
 	 */
-	sctp_check_transmitted(q, &q->retransmit, NULL, sack, highest_new_tsn);
+	sctp_check_transmitted(q, &q->retransmit, NULL, sack, &highest_new_tsn);
 
 	/* Run through the transmitted queue.
 	 * Credit bytes received and free those chunks which we can.
@@ -1223,7 +1194,7 @@ int sctp_outq_sack(struct sctp_outq *q, struct sctp_sackhdr *sack)
 	 */
 	list_for_each_entry(transport, transport_list, transports) {
 		sctp_check_transmitted(q, &transport->transmitted,
-				       transport, sack, highest_new_tsn);
+				       transport, sack, &highest_new_tsn);
 		/*
 		 * SFR-CACC algorithm:
 		 * C) Let count_of_newacks be the number of
@@ -1331,7 +1302,7 @@ static void sctp_check_transmitted(struct sctp_outq *q,
 				   struct list_head *transmitted_queue,
 				   struct sctp_transport *transport,
 				   struct sctp_sackhdr *sack,
-				   __u32 highest_new_tsn_in_sack)
+				   __u32 *highest_new_tsn_in_sack)
 {
 	struct list_head *lchunk;
 	struct sctp_chunk *tchunk;
@@ -1419,6 +1390,7 @@ static void sctp_check_transmitted(struct sctp_outq *q,
 			 */
 			if (!tchunk->tsn_gap_acked) {
 				tchunk->tsn_gap_acked = 1;
+				*highest_new_tsn_in_sack = tsn;
 				bytes_acked += sctp_data_size(tchunk);
 				if (!tchunk->transport)
 					migrate_bytes += sctp_data_size(tchunk);
-- 
cgit v1.2.2


From 0e3aef8d09a8c11e3fb83cdcb24b5bc7421b3726 Mon Sep 17 00:00:00 2001
From: Vlad Yasevich <vladislav.yasevich@hp.com>
Date: Fri, 30 Apr 2010 22:41:10 -0400
Subject: sctp: Tag messages that can be Nagle delayed at creation.

When we create the sctp_datamsg and fragment the user data,
we know exactly if we are sending full segments or not and
how they might be bundled.  During this time, we can mark
messages a Nagle capable or not.  This makes the check at
transmit time much simpler.

Signed-off-by: Vlad Yasevich <vladislav.yasevich@hp.com>
---
 net/sctp/chunk.c  | 4 ++--
 net/sctp/output.c | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

(limited to 'net')

diff --git a/net/sctp/chunk.c b/net/sctp/chunk.c
index 3eab6db59a37..476caaf100ed 100644
--- a/net/sctp/chunk.c
+++ b/net/sctp/chunk.c
@@ -58,9 +58,9 @@ static void sctp_datamsg_init(struct sctp_datamsg *msg)
 	msg->send_failed = 0;
 	msg->send_error = 0;
 	msg->can_abandon = 0;
+	msg->can_delay = 1;
 	msg->expires_at = 0;
 	INIT_LIST_HEAD(&msg->chunks);
-	msg->msg_size = 0;
 }
 
 /* Allocate and initialize datamsg. */
@@ -157,7 +157,6 @@ static void sctp_datamsg_assign(struct sctp_datamsg *msg, struct sctp_chunk *chu
 {
 	sctp_datamsg_hold(msg);
 	chunk->msg = msg;
-	msg->msg_size += chunk->skb->len;
 }
 
 
@@ -247,6 +246,7 @@ struct sctp_datamsg *sctp_datamsg_from_user(struct sctp_association *asoc,
 	if (msg_len >= first_len) {
 		msg_len -= first_len;
 		whole = 1;
+		msg->can_delay = 0;
 	}
 
 	/* How many full sized?  How many bytes leftover? */
diff --git a/net/sctp/output.c b/net/sctp/output.c
index 35e49b9df4e0..a646681f5acd 100644
--- a/net/sctp/output.c
+++ b/net/sctp/output.c
@@ -674,7 +674,7 @@ static sctp_xmit_t sctp_packet_can_append_data(struct sctp_packet *packet,
 		 * Don't delay large message writes that may have been
 		 * fragmeneted into small peices.
 		 */
-		if ((len < max) && (chunk->msg->msg_size < max)) {
+		if ((len < max) && chunk->msg->can_delay) {
 			retval = SCTP_XMIT_NAGLE_DELAY;
 			goto finish;
 		}
-- 
cgit v1.2.2


From e772c349a11de448f194d0c9f2e7eb23800e1a13 Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Sat, 1 May 2010 18:29:43 +0200
Subject: netfilter: nf_ct_h323: switch "incomplete TPKT" message to pr_debug()

The message might be falsely triggered by non-H.323 traffic on port
1720.

Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 net/netfilter/nf_conntrack_h323_main.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/netfilter/nf_conntrack_h323_main.c b/net/netfilter/nf_conntrack_h323_main.c
index a487c8038044..48bf15073a85 100644
--- a/net/netfilter/nf_conntrack_h323_main.c
+++ b/net/netfilter/nf_conntrack_h323_main.c
@@ -194,8 +194,7 @@ static int get_tpkt_data(struct sk_buff *skb, unsigned int protoff,
 			return 0;
 		}
 
-		if (net_ratelimit())
-			printk("nf_ct_h323: incomplete TPKT (fragmented?)\n");
+		pr_debug("nf_ct_h323: incomplete TPKT (fragmented?)\n");
 		goto clear_out;
 	}
 
-- 
cgit v1.2.2


From 43815482370c510c569fd18edb57afcb0fa8cab6 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Thu, 29 Apr 2010 11:01:49 +0000
Subject: net: sock_def_readable() and friends RCU conversion

sk_callback_lock rwlock actually protects sk->sk_sleep pointer, so we
need two atomic operations (and associated dirtying) per incoming
packet.

RCU conversion is pretty much needed :

1) Add a new structure, called "struct socket_wq" to hold all fields
that will need rcu_read_lock() protection (currently: a
wait_queue_head_t and a struct fasync_struct pointer).

[Future patch will add a list anchor for wakeup coalescing]

2) Attach one of such structure to each "struct socket" created in
sock_alloc_inode().

3) Respect RCU grace period when freeing a "struct socket_wq"

4) Change sk_sleep pointer in "struct sock" by sk_wq, pointer to "struct
socket_wq"

5) Change sk_sleep() function to use new sk->sk_wq instead of
sk->sk_sleep

6) Change sk_has_sleeper() to wq_has_sleeper() that must be used inside
a rcu_read_lock() section.

7) Change all sk_has_sleeper() callers to :
  - Use rcu_read_lock() instead of read_lock(&sk->sk_callback_lock)
  - Use wq_has_sleeper() to eventually wakeup tasks.
  - Use rcu_read_unlock() instead of read_unlock(&sk->sk_callback_lock)

8) sock_wake_async() is modified to use rcu protection as well.

9) Exceptions :
  macvtap, drivers/net/tun.c, af_unix use integrated "struct socket_wq"
instead of dynamically allocated ones. They dont need rcu freeing.

Some cleanups or followups are probably needed, (possible
sk_callback_lock conversion to a spinlock for example...).

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/atm/common.c     | 22 ++++++++++++++--------
 net/core/sock.c      | 50 +++++++++++++++++++++++++++++++-------------------
 net/core/stream.c    | 10 +++++++---
 net/dccp/output.c    | 10 ++++++----
 net/iucv/af_iucv.c   | 11 +++++++----
 net/phonet/pep.c     |  8 ++++----
 net/phonet/socket.c  |  2 +-
 net/rxrpc/af_rxrpc.c | 10 ++++++----
 net/sctp/socket.c    |  2 +-
 net/socket.c         | 47 ++++++++++++++++++++++++++++++++++++-----------
 net/unix/af_unix.c   | 17 ++++++++---------
 11 files changed, 121 insertions(+), 68 deletions(-)

(limited to 'net')

diff --git a/net/atm/common.c b/net/atm/common.c
index e3e10e6f8628..b43feb1a3995 100644
--- a/net/atm/common.c
+++ b/net/atm/common.c
@@ -90,10 +90,13 @@ static void vcc_sock_destruct(struct sock *sk)
 
 static void vcc_def_wakeup(struct sock *sk)
 {
-	read_lock(&sk->sk_callback_lock);
-	if (sk_has_sleeper(sk))
-		wake_up(sk_sleep(sk));
-	read_unlock(&sk->sk_callback_lock);
+	struct socket_wq *wq;
+
+	rcu_read_lock();
+	wq = rcu_dereference(sk->sk_wq);
+	if (wq_has_sleeper(wq))
+		wake_up(&wq->wait);
+	rcu_read_unlock();
 }
 
 static inline int vcc_writable(struct sock *sk)
@@ -106,16 +109,19 @@ static inline int vcc_writable(struct sock *sk)
 
 static void vcc_write_space(struct sock *sk)
 {
-	read_lock(&sk->sk_callback_lock);
+	struct socket_wq *wq;
+
+	rcu_read_lock();
 
 	if (vcc_writable(sk)) {
-		if (sk_has_sleeper(sk))
-			wake_up_interruptible(sk_sleep(sk));
+		wq = rcu_dereference(sk->sk_wq);
+		if (wq_has_sleeper(wq))
+			wake_up_interruptible(&wq->wait);
 
 		sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT);
 	}
 
-	read_unlock(&sk->sk_callback_lock);
+	rcu_read_unlock();
 }
 
 static struct proto vcc_proto = {
diff --git a/net/core/sock.c b/net/core/sock.c
index 51041759517e..94c4affdda9b 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -1211,7 +1211,7 @@ struct sock *sk_clone(const struct sock *sk, const gfp_t priority)
 		 */
 		sk_refcnt_debug_inc(newsk);
 		sk_set_socket(newsk, NULL);
-		newsk->sk_sleep	 = NULL;
+		newsk->sk_wq = NULL;
 
 		if (newsk->sk_prot->sockets_allocated)
 			percpu_counter_inc(newsk->sk_prot->sockets_allocated);
@@ -1800,41 +1800,53 @@ EXPORT_SYMBOL(sock_no_sendpage);
 
 static void sock_def_wakeup(struct sock *sk)
 {
-	read_lock(&sk->sk_callback_lock);
-	if (sk_has_sleeper(sk))
-		wake_up_interruptible_all(sk_sleep(sk));
-	read_unlock(&sk->sk_callback_lock);
+	struct socket_wq *wq;
+
+	rcu_read_lock();
+	wq = rcu_dereference(sk->sk_wq);
+	if (wq_has_sleeper(wq))
+		wake_up_interruptible_all(&wq->wait);
+	rcu_read_unlock();
 }
 
 static void sock_def_error_report(struct sock *sk)
 {
-	read_lock(&sk->sk_callback_lock);
-	if (sk_has_sleeper(sk))
-		wake_up_interruptible_poll(sk_sleep(sk), POLLERR);
+	struct socket_wq *wq;
+
+	rcu_read_lock();
+	wq = rcu_dereference(sk->sk_wq);
+	if (wq_has_sleeper(wq))
+		wake_up_interruptible_poll(&wq->wait, POLLERR);
 	sk_wake_async(sk, SOCK_WAKE_IO, POLL_ERR);
-	read_unlock(&sk->sk_callback_lock);
+	rcu_read_unlock();
 }
 
 static void sock_def_readable(struct sock *sk, int len)
 {
-	read_lock(&sk->sk_callback_lock);
-	if (sk_has_sleeper(sk))
-		wake_up_interruptible_sync_poll(sk_sleep(sk), POLLIN |
+	struct socket_wq *wq;
+
+	rcu_read_lock();
+	wq = rcu_dereference(sk->sk_wq);
+	if (wq_has_sleeper(wq))
+		wake_up_interruptible_sync_poll(&wq->wait, POLLIN |
 						POLLRDNORM | POLLRDBAND);
 	sk_wake_async(sk, SOCK_WAKE_WAITD, POLL_IN);
-	read_unlock(&sk->sk_callback_lock);
+	rcu_read_unlock();
 }
 
 static void sock_def_write_space(struct sock *sk)
 {
-	read_lock(&sk->sk_callback_lock);
+	struct socket_wq *wq;
+
+	rcu_read_lock();
 
 	/* Do not wake up a writer until he can make "significant"
 	 * progress.  --DaveM
 	 */
 	if ((atomic_read(&sk->sk_wmem_alloc) << 1) <= sk->sk_sndbuf) {
-		if (sk_has_sleeper(sk))
-			wake_up_interruptible_sync_poll(sk_sleep(sk), POLLOUT |
+		wq = rcu_dereference(sk->sk_wq);
+		if (wq_has_sleeper(wq))
+			wake_up_interruptible_sync_poll(&wq->wait, POLLOUT |
 						POLLWRNORM | POLLWRBAND);
 
 		/* Should agree with poll, otherwise some programs break */
@@ -1842,7 +1854,7 @@ static void sock_def_write_space(struct sock *sk)
 			sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT);
 	}
 
-	read_unlock(&sk->sk_callback_lock);
+	rcu_read_unlock();
 }
 
 static void sock_def_destruct(struct sock *sk)
@@ -1896,10 +1908,10 @@ void sock_init_data(struct socket *sock, struct sock *sk)
 
 	if (sock) {
 		sk->sk_type	=	sock->type;
-		sk->sk_sleep	=	&sock->wait;
+		sk->sk_wq	=	sock->wq;
 		sock->sk	=	sk;
 	} else
-		sk->sk_sleep	=	NULL;
+		sk->sk_wq	=	NULL;
 
 	spin_lock_init(&sk->sk_dst_lock);
 	rwlock_init(&sk->sk_callback_lock);
diff --git a/net/core/stream.c b/net/core/stream.c
index 7b3c3f30b107..cc196f42b8d8 100644
--- a/net/core/stream.c
+++ b/net/core/stream.c
@@ -28,15 +28,19 @@
 void sk_stream_write_space(struct sock *sk)
 {
 	struct socket *sock = sk->sk_socket;
+	struct socket_wq *wq;
 
 	if (sk_stream_wspace(sk) >= sk_stream_min_wspace(sk) && sock) {
 		clear_bit(SOCK_NOSPACE, &sock->flags);
 
-		if (sk_sleep(sk) && waitqueue_active(sk_sleep(sk)))
-			wake_up_interruptible_poll(sk_sleep(sk), POLLOUT |
+		rcu_read_lock();
+		wq = rcu_dereference(sk->sk_wq);
+		if (wq_has_sleeper(wq))
+			wake_up_interruptible_poll(&wq->wait, POLLOUT |
 						POLLWRNORM | POLLWRBAND);
-		if (sock->fasync_list && !(sk->sk_shutdown & SEND_SHUTDOWN))
+		if (wq && wq->fasync_list && !(sk->sk_shutdown & SEND_SHUTDOWN))
 			sock_wake_async(sock, SOCK_WAKE_SPACE, POLL_OUT);
+		rcu_read_unlock();
 	}
 }
 
diff --git a/net/dccp/output.c b/net/dccp/output.c
index 2d3dcb39851f..aadbdb58758b 100644
--- a/net/dccp/output.c
+++ b/net/dccp/output.c
@@ -195,15 +195,17 @@ EXPORT_SYMBOL_GPL(dccp_sync_mss);
 
 void dccp_write_space(struct sock *sk)
 {
-	read_lock(&sk->sk_callback_lock);
+	struct socket_wq *wq;
 
-	if (sk_has_sleeper(sk))
-		wake_up_interruptible(sk_sleep(sk));
+	rcu_read_lock();
+	wq = rcu_dereference(sk->sk_wq);
+	if (wq_has_sleeper(wq))
+		wake_up_interruptible(&wq->wait);
 	/* Should agree with poll, otherwise some programs break */
 	if (sock_writeable(sk))
 		sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT);
 
-	read_unlock(&sk->sk_callback_lock);
+	rcu_read_unlock();
 }
 
 /**
diff --git a/net/iucv/af_iucv.c b/net/iucv/af_iucv.c
index 9636b7d27b48..8be324fe08b9 100644
--- a/net/iucv/af_iucv.c
+++ b/net/iucv/af_iucv.c
@@ -305,11 +305,14 @@ static inline int iucv_below_msglim(struct sock *sk)
  */
 static void iucv_sock_wake_msglim(struct sock *sk)
 {
-	read_lock(&sk->sk_callback_lock);
-	if (sk_has_sleeper(sk))
-		wake_up_interruptible_all(sk_sleep(sk));
+	struct socket_wq *wq;
+
+	rcu_read_lock();
+	wq = rcu_dereference(sk->sk_wq);
+	if (wq_has_sleeper(wq))
+		wake_up_interruptible_all(&wq->wait);
 	sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT);
-	read_unlock(&sk->sk_callback_lock);
+	rcu_read_unlock();
 }
 
 /* Timers */
diff --git a/net/phonet/pep.c b/net/phonet/pep.c
index e2a95762abd3..af4d38bc3b22 100644
--- a/net/phonet/pep.c
+++ b/net/phonet/pep.c
@@ -664,12 +664,12 @@ static int pep_wait_connreq(struct sock *sk, int noblock)
 		if (signal_pending(tsk))
 			return sock_intr_errno(timeo);
 
-		prepare_to_wait_exclusive(&sk->sk_socket->wait, &wait,
+		prepare_to_wait_exclusive(sk_sleep(sk), &wait,
 						TASK_INTERRUPTIBLE);
 		release_sock(sk);
 		timeo = schedule_timeout(timeo);
 		lock_sock(sk);
-		finish_wait(&sk->sk_socket->wait, &wait);
+		finish_wait(sk_sleep(sk), &wait);
 	}
 
 	return 0;
@@ -910,10 +910,10 @@ disabled:
 			goto out;
 		}
 
-		prepare_to_wait(&sk->sk_socket->wait, &wait,
+		prepare_to_wait(sk_sleep(sk), &wait,
 				TASK_INTERRUPTIBLE);
 		done = sk_wait_event(sk, &timeo, atomic_read(&pn->tx_credits));
-		finish_wait(&sk->sk_socket->wait, &wait);
+		finish_wait(sk_sleep(sk), &wait);
 
 		if (sk->sk_state != TCP_ESTABLISHED)
 			goto disabled;
diff --git a/net/phonet/socket.c b/net/phonet/socket.c
index c785bfd0744f..6e9848bf0370 100644
--- a/net/phonet/socket.c
+++ b/net/phonet/socket.c
@@ -265,7 +265,7 @@ static unsigned int pn_socket_poll(struct file *file, struct socket *sock,
 	struct pep_sock *pn = pep_sk(sk);
 	unsigned int mask = 0;
 
-	poll_wait(file, &sock->wait, wait);
+	poll_wait(file, sk_sleep(sk), wait);
 
 	switch (sk->sk_state) {
 	case TCP_LISTEN:
diff --git a/net/rxrpc/af_rxrpc.c b/net/rxrpc/af_rxrpc.c
index c432d76f415e..0b9bb2085ce4 100644
--- a/net/rxrpc/af_rxrpc.c
+++ b/net/rxrpc/af_rxrpc.c
@@ -62,13 +62,15 @@ static inline int rxrpc_writable(struct sock *sk)
 static void rxrpc_write_space(struct sock *sk)
 {
 	_enter("%p", sk);
-	read_lock(&sk->sk_callback_lock);
+	rcu_read_lock();
 	if (rxrpc_writable(sk)) {
-		if (sk_has_sleeper(sk))
-			wake_up_interruptible(sk_sleep(sk));
+		struct socket_wq *wq = rcu_dereference(sk->sk_wq);
+
+		if (wq_has_sleeper(wq))
+			wake_up_interruptible(&wq->wait);
 		sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT);
 	}
-	read_unlock(&sk->sk_callback_lock);
+	rcu_read_unlock();
 }
 
 /*
diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index 13d8229f3a9c..d54700af927a 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -6065,7 +6065,7 @@ static void __sctp_write_space(struct sctp_association *asoc)
 			 * here by modeling from the current TCP/UDP code.
 			 * We have not tested with it yet.
 			 */
-			if (sock->fasync_list &&
+			if (sock->wq->fasync_list &&
 			    !(sk->sk_shutdown & SEND_SHUTDOWN))
 				sock_wake_async(sock,
 						SOCK_WAKE_SPACE, POLL_OUT);
diff --git a/net/socket.c b/net/socket.c
index cb7c1f6c0d6e..dae8c6b84a09 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -252,9 +252,14 @@ static struct inode *sock_alloc_inode(struct super_block *sb)
 	ei = kmem_cache_alloc(sock_inode_cachep, GFP_KERNEL);
 	if (!ei)
 		return NULL;
-	init_waitqueue_head(&ei->socket.wait);
+	ei->socket.wq = kmalloc(sizeof(struct socket_wq), GFP_KERNEL);
+	if (!ei->socket.wq) {
+		kmem_cache_free(sock_inode_cachep, ei);
+		return NULL;
+	}
+	init_waitqueue_head(&ei->socket.wq->wait);
+	ei->socket.wq->fasync_list = NULL;
 
-	ei->socket.fasync_list = NULL;
 	ei->socket.state = SS_UNCONNECTED;
 	ei->socket.flags = 0;
 	ei->socket.ops = NULL;
@@ -264,10 +269,21 @@ static struct inode *sock_alloc_inode(struct super_block *sb)
 	return &ei->vfs_inode;
 }
 
+
+static void wq_free_rcu(struct rcu_head *head)
+{
+	struct socket_wq *wq = container_of(head, struct socket_wq, rcu);
+
+	kfree(wq);
+}
+
 static void sock_destroy_inode(struct inode *inode)
 {
-	kmem_cache_free(sock_inode_cachep,
-			container_of(inode, struct socket_alloc, vfs_inode));
+	struct socket_alloc *ei;
+
+	ei = container_of(inode, struct socket_alloc, vfs_inode);
+	call_rcu(&ei->socket.wq->rcu, wq_free_rcu);
+	kmem_cache_free(sock_inode_cachep, ei);
 }
 
 static void init_once(void *foo)
@@ -513,7 +529,7 @@ void sock_release(struct socket *sock)
 		module_put(owner);
 	}
 
-	if (sock->fasync_list)
+	if (sock->wq->fasync_list)
 		printk(KERN_ERR "sock_release: fasync list not empty!\n");
 
 	percpu_sub(sockets_in_use, 1);
@@ -1080,9 +1096,9 @@ static int sock_fasync(int fd, struct file *filp, int on)
 
 	lock_sock(sk);
 
-	fasync_helper(fd, filp, on, &sock->fasync_list);
+	fasync_helper(fd, filp, on, &sock->wq->fasync_list);
 
-	if (!sock->fasync_list)
+	if (!sock->wq->fasync_list)
 		sock_reset_flag(sk, SOCK_FASYNC);
 	else
 		sock_set_flag(sk, SOCK_FASYNC);
@@ -1091,12 +1107,20 @@ static int sock_fasync(int fd, struct file *filp, int on)
 	return 0;
 }
 
-/* This function may be called only under socket lock or callback_lock */
+/* This function may be called only under socket lock or callback_lock or rcu_lock */
 
 int sock_wake_async(struct socket *sock, int how, int band)
 {
-	if (!sock || !sock->fasync_list)
+	struct socket_wq *wq;
+
+	if (!sock)
 		return -1;
+	rcu_read_lock();
+	wq = rcu_dereference(sock->wq);
+	if (!wq || !wq->fasync_list) {
+		rcu_read_unlock();
+		return -1;
+	}
 	switch (how) {
 	case SOCK_WAKE_WAITD:
 		if (test_bit(SOCK_ASYNC_WAITDATA, &sock->flags))
@@ -1108,11 +1132,12 @@ int sock_wake_async(struct socket *sock, int how, int band)
 		/* fall through */
 	case SOCK_WAKE_IO:
 call_kill:
-		kill_fasync(&sock->fasync_list, SIGIO, band);
+		kill_fasync(&wq->fasync_list, SIGIO, band);
 		break;
 	case SOCK_WAKE_URG:
-		kill_fasync(&sock->fasync_list, SIGURG, band);
+		kill_fasync(&wq->fasync_list, SIGURG, band);
 	}
+	rcu_read_unlock();
 	return 0;
 }
 
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index 87c0360eaa25..fef2cc5e9d2b 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -313,13 +313,16 @@ static inline int unix_writable(struct sock *sk)
 
 static void unix_write_space(struct sock *sk)
 {
-	read_lock(&sk->sk_callback_lock);
+	struct socket_wq *wq;
+
+	rcu_read_lock();
 	if (unix_writable(sk)) {
-		if (sk_has_sleeper(sk))
-			wake_up_interruptible_sync(sk_sleep(sk));
+		wq = rcu_dereference(sk->sk_wq);
+		if (wq_has_sleeper(wq))
+			wake_up_interruptible_sync(&wq->wait);
 		sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT);
 	}
-	read_unlock(&sk->sk_callback_lock);
+	rcu_read_unlock();
 }
 
 /* When dgram socket disconnects (or changes its peer), we clear its receive
@@ -406,9 +409,7 @@ static int unix_release_sock(struct sock *sk, int embrion)
 				skpair->sk_err = ECONNRESET;
 			unix_state_unlock(skpair);
 			skpair->sk_state_change(skpair);
-			read_lock(&skpair->sk_callback_lock);
 			sk_wake_async(skpair, SOCK_WAKE_WAITD, POLL_HUP);
-			read_unlock(&skpair->sk_callback_lock);
 		}
 		sock_put(skpair); /* It may now die */
 		unix_peer(sk) = NULL;
@@ -1142,7 +1143,7 @@ restart:
 	newsk->sk_peercred.pid	= task_tgid_vnr(current);
 	current_euid_egid(&newsk->sk_peercred.uid, &newsk->sk_peercred.gid);
 	newu = unix_sk(newsk);
-	newsk->sk_sleep		= &newu->peer_wait;
+	newsk->sk_wq		= &newu->peer_wq;
 	otheru = unix_sk(other);
 
 	/* copy address information from listening to new sock*/
@@ -1931,12 +1932,10 @@ static int unix_shutdown(struct socket *sock, int mode)
 			other->sk_shutdown |= peer_mode;
 			unix_state_unlock(other);
 			other->sk_state_change(other);
-			read_lock(&other->sk_callback_lock);
 			if (peer_mode == SHUTDOWN_MASK)
 				sk_wake_async(other, SOCK_WAKE_WAITD, POLL_HUP);
 			else if (peer_mode & RCV_SHUTDOWN)
 				sk_wake_async(other, SOCK_WAKE_WAITD, POLL_IN);
-			read_unlock(&other->sk_callback_lock);
 		}
 		if (other)
 			sock_put(other);
-- 
cgit v1.2.2


From 47d29646a2c1c147d8a7598aeac2c87dd71ed638 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Sun, 2 May 2010 02:21:44 -0700
Subject: net: Inline skb_pull() in eth_type_trans().

In commit 6be8ac2f ("[NET]: uninline skb_pull, de-bloats a lot")
we uninlined skb_pull.

But in some critical paths it makes sense to inline this thing
and it helps performance significantly.

Create an skb_pull_inline() so that we can do this in a way that
serves also as annotation.

Based upon a patch by Eric Dumazet.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/skbuff.c  | 2 +-
 net/ethernet/eth.c | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 4218ff49bf13..8b9c109166a7 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -1051,7 +1051,7 @@ EXPORT_SYMBOL(skb_push);
  */
 unsigned char *skb_pull(struct sk_buff *skb, unsigned int len)
 {
-	return unlikely(len > skb->len) ? NULL : __skb_pull(skb, len);
+	return skb_pull_inline(skb, len);
 }
 EXPORT_SYMBOL(skb_pull);
 
diff --git a/net/ethernet/eth.c b/net/ethernet/eth.c
index 0c0d272a9888..61ec0329316c 100644
--- a/net/ethernet/eth.c
+++ b/net/ethernet/eth.c
@@ -162,7 +162,7 @@ __be16 eth_type_trans(struct sk_buff *skb, struct net_device *dev)
 
 	skb->dev = dev;
 	skb_reset_mac_header(skb);
-	skb_pull(skb, ETH_HLEN);
+	skb_pull_inline(skb, ETH_HLEN);
 	eth = eth_hdr(skb);
 
 	if (unlikely(is_multicast_ether_addr(eth->h_dest))) {
-- 
cgit v1.2.2


From b5cad0dfd3c80501330215b9a9ae31bcffbd7306 Mon Sep 17 00:00:00 2001
From: Jan Engelhardt <jengelh@medozas.de>
Date: Sun, 2 May 2010 13:55:21 +0200
Subject: netfilter: ip_tables: fix compilation when debug is enabled

Signed-off-by: Jan Engelhardt <jengelh@medozas.de>
---
 net/ipv4/netfilter/ip_tables.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c
index 3e6af1036fbc..f92818f76671 100644
--- a/net/ipv4/netfilter/ip_tables.c
+++ b/net/ipv4/netfilter/ip_tables.c
@@ -591,7 +591,7 @@ check_entry(const struct ipt_entry *e, const char *name)
 	const struct ipt_entry_target *t;
 
 	if (!ip_checkentry(&e->ip)) {
-		duprintf("ip check failed %p %s.\n", e, name);
+		duprintf("ip check failed %p %s.\n", e, par->match->name);
 		return -EINVAL;
 	}
 
@@ -618,7 +618,7 @@ check_match(struct ipt_entry_match *m, struct xt_mtchk_param *par)
 	ret = xt_check_match(par, m->u.match_size - sizeof(*m),
 	      ip->proto, ip->invflags & IPT_INV_PROTO);
 	if (ret < 0) {
-		duprintf("check failed for `%s'.\n", par.match->name);
+		duprintf("check failed for `%s'.\n", par->match->name);
 		return ret;
 	}
 	return 0;
-- 
cgit v1.2.2


From c29c949288d343be0d5c4f3091bc87ac047e1a09 Mon Sep 17 00:00:00 2001
From: Jan Engelhardt <jengelh@medozas.de>
Date: Sun, 2 May 2010 14:04:54 +0200
Subject: netfilter: xtables: fix incorrect return code

Signed-off-by: Jan Engelhardt <jengelh@medozas.de>
---
 net/netfilter/xt_multiport.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/netfilter/xt_multiport.c b/net/netfilter/xt_multiport.c
index 83b77ceb264f..b21f90432247 100644
--- a/net/netfilter/xt_multiport.c
+++ b/net/netfilter/xt_multiport.c
@@ -117,7 +117,7 @@ static int multiport_mt_check(const struct xt_mtchk_param *par)
 	const struct xt_multiport_v1 *multiinfo = par->matchinfo;
 
 	return check(ip->proto, ip->invflags, multiinfo->flags,
-		     multiinfo->count);
+		     multiinfo->count) ? 0 : -EINVAL;
 }
 
 static int multiport_mt6_check(const struct xt_mtchk_param *par)
@@ -126,7 +126,7 @@ static int multiport_mt6_check(const struct xt_mtchk_param *par)
 	const struct xt_multiport_v1 *multiinfo = par->matchinfo;
 
 	return check(ip->proto, ip->invflags, multiinfo->flags,
-		     multiinfo->count);
+		     multiinfo->count) ? 0 : -EINVAL;
 }
 
 static struct xt_match multiport_mt_reg[] __read_mostly = {
-- 
cgit v1.2.2


From ef53d702c3614fb919e8a8291033e3dbccfd1aea Mon Sep 17 00:00:00 2001
From: Jan Engelhardt <jengelh@medozas.de>
Date: Thu, 9 Jul 2009 19:14:18 +0200
Subject: netfilter: xtables: dissolve do_match function

Signed-off-by: Jan Engelhardt <jengelh@medozas.de>
---
 net/ipv4/netfilter/ip_tables.c  | 22 +++++-----------------
 net/ipv6/netfilter/ip6_tables.c | 22 +++++-----------------
 2 files changed, 10 insertions(+), 34 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c
index f92818f76671..265cedf88660 100644
--- a/net/ipv4/netfilter/ip_tables.c
+++ b/net/ipv4/netfilter/ip_tables.c
@@ -173,21 +173,6 @@ ipt_error(struct sk_buff *skb, const struct xt_target_param *par)
 	return NF_DROP;
 }
 
-/* Performance critical - called for every packet */
-static inline bool
-do_match(const struct ipt_entry_match *m, const struct sk_buff *skb,
-	 struct xt_match_param *par)
-{
-	par->match     = m->u.kernel.match;
-	par->matchinfo = m->data;
-
-	/* Stop iteration if it doesn't match */
-	if (!m->u.kernel.match->match(skb, par))
-		return true;
-	else
-		return false;
-}
-
 /* Performance critical */
 static inline struct ipt_entry *
 get_entry(const void *base, unsigned int offset)
@@ -379,9 +364,12 @@ ipt_do_table(struct sk_buff *skb,
 			continue;
 		}
 
-		xt_ematch_foreach(ematch, e)
-			if (do_match(ematch, skb, &mtpar) != 0)
+		xt_ematch_foreach(ematch, e) {
+			mtpar.match     = ematch->u.kernel.match;
+			mtpar.matchinfo = ematch->data;
+			if (!mtpar.match->match(skb, &mtpar))
 				goto no_match;
+		}
 
 		ADD_COUNTER(e->counters, ntohs(ip->tot_len), 1);
 
diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c
index 7afa11773164..f8ac4a0b5899 100644
--- a/net/ipv6/netfilter/ip6_tables.c
+++ b/net/ipv6/netfilter/ip6_tables.c
@@ -205,21 +205,6 @@ ip6t_error(struct sk_buff *skb, const struct xt_target_param *par)
 	return NF_DROP;
 }
 
-/* Performance critical - called for every packet */
-static inline bool
-do_match(const struct ip6t_entry_match *m, const struct sk_buff *skb,
-	 struct xt_match_param *par)
-{
-	par->match     = m->u.kernel.match;
-	par->matchinfo = m->data;
-
-	/* Stop iteration if it doesn't match */
-	if (!m->u.kernel.match->match(skb, par))
-		return true;
-	else
-		return false;
-}
-
 static inline struct ip6t_entry *
 get_entry(const void *base, unsigned int offset)
 {
@@ -402,9 +387,12 @@ ip6t_do_table(struct sk_buff *skb,
 			continue;
 		}
 
-		xt_ematch_foreach(ematch, e)
-			if (do_match(ematch, skb, &mtpar) != 0)
+		xt_ematch_foreach(ematch, e) {
+			mtpar.match     = ematch->u.kernel.match;
+			mtpar.matchinfo = ematch->data;
+			if (!mtpar.match->match(skb, &mtpar))
 				goto no_match;
+		}
 
 		ADD_COUNTER(e->counters,
 			    ntohs(ipv6_hdr(skb)->payload_len) +
-- 
cgit v1.2.2


From dee42870a423ad485129f43cddfe7275479f11d8 Mon Sep 17 00:00:00 2001
From: Changli Gao <xiaosuo@gmail.com>
Date: Sun, 2 May 2010 05:42:16 +0000
Subject: net: fix softnet_stat

Per cpu variable softnet_data.total was shared between IRQ and SoftIRQ context
without any protection. And enqueue_to_backlog should update the netdev_rx_stat
of the target CPU.

This patch renames softnet_data.total to softnet_data.processed: the number of
packets processed in uppper levels(IP stacks).

softnet_stat data is moved into softnet_data.

Signed-off-by: Changli Gao <xiaosuo@gmail.com>
----
 include/linux/netdevice.h |   17 +++++++----------
 net/core/dev.c            |   26 ++++++++++++--------------
 net/sched/sch_generic.c   |    2 +-
 3 files changed, 20 insertions(+), 25 deletions(-)
Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/dev.c          | 26 ++++++++++++--------------
 net/sched/sch_generic.c |  2 +-
 2 files changed, 13 insertions(+), 15 deletions(-)

(limited to 'net')

diff --git a/net/core/dev.c b/net/core/dev.c
index 100dcbd29739..36d53be4fca6 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -2205,8 +2205,6 @@ int netdev_max_backlog __read_mostly = 1000;
 int netdev_budget __read_mostly = 300;
 int weight_p __read_mostly = 64;            /* old backlog weight */
 
-DEFINE_PER_CPU(struct netif_rx_stats, netdev_rx_stat) = { 0, };
-
 #ifdef CONFIG_RPS
 
 /* One global table that all flow-based protocols share. */
@@ -2366,7 +2364,7 @@ static void rps_trigger_softirq(void *data)
 	struct softnet_data *sd = data;
 
 	__napi_schedule(&sd->backlog);
-	__get_cpu_var(netdev_rx_stat).received_rps++;
+	sd->received_rps++;
 }
 
 #endif /* CONFIG_RPS */
@@ -2405,7 +2403,6 @@ static int enqueue_to_backlog(struct sk_buff *skb, int cpu,
 	sd = &per_cpu(softnet_data, cpu);
 
 	local_irq_save(flags);
-	__get_cpu_var(netdev_rx_stat).total++;
 
 	rps_lock(sd);
 	if (skb_queue_len(&sd->input_pkt_queue) <= netdev_max_backlog) {
@@ -2429,9 +2426,9 @@ enqueue:
 		goto enqueue;
 	}
 
+	sd->dropped++;
 	rps_unlock(sd);
 
-	__get_cpu_var(netdev_rx_stat).dropped++;
 	local_irq_restore(flags);
 
 	kfree_skb(skb);
@@ -2806,7 +2803,7 @@ static int __netif_receive_skb(struct sk_buff *skb)
 			skb->dev = master;
 	}
 
-	__get_cpu_var(netdev_rx_stat).total++;
+	__get_cpu_var(softnet_data).processed++;
 
 	skb_reset_network_header(skb);
 	skb_reset_transport_header(skb);
@@ -3490,7 +3487,7 @@ out:
 	return;
 
 softnet_break:
-	__get_cpu_var(netdev_rx_stat).time_squeeze++;
+	sd->time_squeeze++;
 	__raise_softirq_irqoff(NET_RX_SOFTIRQ);
 	goto out;
 }
@@ -3691,17 +3688,17 @@ static int dev_seq_show(struct seq_file *seq, void *v)
 	return 0;
 }
 
-static struct netif_rx_stats *softnet_get_online(loff_t *pos)
+static struct softnet_data *softnet_get_online(loff_t *pos)
 {
-	struct netif_rx_stats *rc = NULL;
+	struct softnet_data *sd = NULL;
 
 	while (*pos < nr_cpu_ids)
 		if (cpu_online(*pos)) {
-			rc = &per_cpu(netdev_rx_stat, *pos);
+			sd = &per_cpu(softnet_data, *pos);
 			break;
 		} else
 			++*pos;
-	return rc;
+	return sd;
 }
 
 static void *softnet_seq_start(struct seq_file *seq, loff_t *pos)
@@ -3721,12 +3718,12 @@ static void softnet_seq_stop(struct seq_file *seq, void *v)
 
 static int softnet_seq_show(struct seq_file *seq, void *v)
 {
-	struct netif_rx_stats *s = v;
+	struct softnet_data *sd = v;
 
 	seq_printf(seq, "%08x %08x %08x %08x %08x %08x %08x %08x %08x %08x\n",
-		   s->total, s->dropped, s->time_squeeze, 0,
+		   sd->processed, sd->dropped, sd->time_squeeze, 0,
 		   0, 0, 0, 0, /* was fastroute */
-		   s->cpu_collision, s->received_rps);
+		   sd->cpu_collision, sd->received_rps);
 	return 0;
 }
 
@@ -5869,6 +5866,7 @@ static int __init net_dev_init(void)
 	for_each_possible_cpu(i) {
 		struct softnet_data *sd = &per_cpu(softnet_data, i);
 
+		memset(sd, 0, sizeof(*sd));
 		skb_queue_head_init(&sd->input_pkt_queue);
 		skb_queue_head_init(&sd->process_queue);
 		sd->completion_queue = NULL;
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index aeddabfb8e4e..a969b111bd76 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -94,7 +94,7 @@ static inline int handle_dev_cpu_collision(struct sk_buff *skb,
 		 * Another cpu is holding lock, requeue & delay xmits for
 		 * some time.
 		 */
-		__get_cpu_var(netdev_rx_stat).cpu_collision++;
+		__get_cpu_var(softnet_data).cpu_collision++;
 		ret = dev_requeue_skb(skb, q);
 	}
 
-- 
cgit v1.2.2


From b48fa6b99100dc7772af3cd276035fcec9719ceb Mon Sep 17 00:00:00 2001
From: Neil Brown <neilb@suse.de>
Date: Mon, 1 Mar 2010 16:51:14 +1100
Subject: sunrpc: centralise most calls to svc_xprt_received

svc_xprt_received must be called when ->xpo_recvfrom has finished
receiving a message, so that the XPT_BUSY flag will be cleared and
if necessary, requeued for further work.

This call is currently made in each ->xpo_recvfrom function, often
from multiple different points.  In each case it is the earliest point
on a particular path where it is known that the protection provided by
XPT_BUSY is no longer needed.

However there are (still) some error paths which do not call
svc_xprt_received, and requiring each ->xpo_recvfrom to make the call
does not encourage robustness.

So: move the svc_xprt_received call to be made just after the
call to ->xpo_recvfrom(), and move it of the various ->xpo_recvfrom
methods.

This means that it may not be called at the earliest possible instant,
but this is unlikely to be a measurable performance issue.

Note that there are still other calls to svc_xprt_received as it is
also needed when an xprt is newly created.

Signed-off-by: NeilBrown <neilb@suse.de>
Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
---
 net/sunrpc/svc_xprt.c                   |  4 +++-
 net/sunrpc/svcsock.c                    | 15 +--------------
 net/sunrpc/xprtrdma/svc_rdma_recvfrom.c |  3 ---
 3 files changed, 4 insertions(+), 18 deletions(-)

(limited to 'net')

diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c
index c334f5413c60..75f9aa2eb089 100644
--- a/net/sunrpc/svc_xprt.c
+++ b/net/sunrpc/svc_xprt.c
@@ -743,8 +743,10 @@ int svc_recv(struct svc_rqst *rqstp, long timeout)
 		if (rqstp->rq_deferred) {
 			svc_xprt_received(xprt);
 			len = svc_deferred_recv(rqstp);
-		} else
+		} else {
 			len = xprt->xpt_ops->xpo_recvfrom(rqstp);
+			svc_xprt_received(xprt);
+		}
 		dprintk("svc: got len=%d\n", len);
 	}
 
diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c
index a29f259204e6..a33892733643 100644
--- a/net/sunrpc/svcsock.c
+++ b/net/sunrpc/svcsock.c
@@ -547,7 +547,6 @@ static int svc_udp_recvfrom(struct svc_rqst *rqstp)
 			dprintk("svc: recvfrom returned error %d\n", -err);
 			set_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags);
 		}
-		svc_xprt_received(&svsk->sk_xprt);
 		return -EAGAIN;
 	}
 	len = svc_addr_len(svc_addr(rqstp));
@@ -562,11 +561,6 @@ static int svc_udp_recvfrom(struct svc_rqst *rqstp)
 	svsk->sk_sk->sk_stamp = skb->tstamp;
 	set_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags); /* there may be more data... */
 
-	/*
-	 * Maybe more packets - kick another thread ASAP.
-	 */
-	svc_xprt_received(&svsk->sk_xprt);
-
 	len  = skb->len - sizeof(struct udphdr);
 	rqstp->rq_arg.len = len;
 
@@ -917,7 +911,6 @@ static int svc_tcp_recv_record(struct svc_sock *svsk, struct svc_rqst *rqstp)
 		if (len < want) {
 			dprintk("svc: short recvfrom while reading record "
 				"length (%d of %d)\n", len, want);
-			svc_xprt_received(&svsk->sk_xprt);
 			goto err_again; /* record header not complete */
 		}
 
@@ -953,7 +946,6 @@ static int svc_tcp_recv_record(struct svc_sock *svsk, struct svc_rqst *rqstp)
 	if (len < svsk->sk_reclen) {
 		dprintk("svc: incomplete TCP record (%d of %d)\n",
 			len, svsk->sk_reclen);
-		svc_xprt_received(&svsk->sk_xprt);
 		goto err_again;	/* record not complete */
 	}
 	len = svsk->sk_reclen;
@@ -961,14 +953,11 @@ static int svc_tcp_recv_record(struct svc_sock *svsk, struct svc_rqst *rqstp)
 
 	return len;
  error:
-	if (len == -EAGAIN) {
+	if (len == -EAGAIN)
 		dprintk("RPC: TCP recv_record got EAGAIN\n");
-		svc_xprt_received(&svsk->sk_xprt);
-	}
 	return len;
  err_delete:
 	set_bit(XPT_CLOSE, &svsk->sk_xprt.xpt_flags);
-	svc_xprt_received(&svsk->sk_xprt);
  err_again:
 	return -EAGAIN;
 }
@@ -1110,7 +1099,6 @@ out:
 	svsk->sk_tcplen = 0;
 
 	svc_xprt_copy_addrs(rqstp, &svsk->sk_xprt);
-	svc_xprt_received(&svsk->sk_xprt);
 	if (serv->sv_stats)
 		serv->sv_stats->nettcpcnt++;
 
@@ -1119,7 +1107,6 @@ out:
 err_again:
 	if (len == -EAGAIN) {
 		dprintk("RPC: TCP recvfrom got EAGAIN\n");
-		svc_xprt_received(&svsk->sk_xprt);
 		return len;
 	}
 error:
diff --git a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
index f92e37eb413c..0194de814933 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
@@ -566,7 +566,6 @@ static int rdma_read_complete(struct svc_rqst *rqstp,
 		ret, rqstp->rq_arg.len,	rqstp->rq_arg.head[0].iov_base,
 		rqstp->rq_arg.head[0].iov_len);
 
-	svc_xprt_received(rqstp->rq_xprt);
 	return ret;
 }
 
@@ -665,7 +664,6 @@ int svc_rdma_recvfrom(struct svc_rqst *rqstp)
 		rqstp->rq_arg.head[0].iov_len);
 	rqstp->rq_prot = IPPROTO_MAX;
 	svc_xprt_copy_addrs(rqstp, xprt);
-	svc_xprt_received(xprt);
 	return ret;
 
  close_out:
@@ -678,6 +676,5 @@ int svc_rdma_recvfrom(struct svc_rqst *rqstp)
 	 */
 	set_bit(XPT_CLOSE, &xprt->xpt_flags);
 defer:
-	svc_xprt_received(xprt);
 	return 0;
 }
-- 
cgit v1.2.2


From f7c65594f7148b778f41d591a701e94bb22428e4 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes@sipsolutions.net>
Date: Fri, 30 Apr 2010 13:48:36 +0200
Subject: mac80211: fix ieee80211_find_sta[_by_hw]

Both of these functions can currently return
a station pointer that, to the driver, is
invalid (in IBSS mode only) because adding
the station failed. Check for that, and also
make ieee80211_find_sta() properly use the
per interface station search.

Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/sta_info.c | 17 +++++++++++++----
 1 file changed, 13 insertions(+), 4 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c
index 3de7a2260d65..730197591ab5 100644
--- a/net/mac80211/sta_info.c
+++ b/net/mac80211/sta_info.c
@@ -855,8 +855,12 @@ struct ieee80211_sta *ieee80211_find_sta_by_hw(struct ieee80211_hw *hw,
 	struct sta_info *sta, *nxt;
 
 	/* Just return a random station ... first in list ... */
-	for_each_sta_info(hw_to_local(hw), addr, sta, nxt)
+	for_each_sta_info(hw_to_local(hw), addr, sta, nxt) {
+		if (!sta->uploaded)
+			return NULL;
 		return &sta->sta;
+	}
+
 	return NULL;
 }
 EXPORT_SYMBOL_GPL(ieee80211_find_sta_by_hw);
@@ -864,14 +868,19 @@ EXPORT_SYMBOL_GPL(ieee80211_find_sta_by_hw);
 struct ieee80211_sta *ieee80211_find_sta(struct ieee80211_vif *vif,
 					 const u8 *addr)
 {
-	struct ieee80211_sub_if_data *sdata;
+	struct sta_info *sta;
 
 	if (!vif)
 		return NULL;
 
-	sdata = vif_to_sdata(vif);
+	sta = sta_info_get_bss(vif_to_sdata(vif), addr);
+	if (!sta)
+		return NULL;
+
+	if (!sta->uploaded)
+		return NULL;
 
-	return ieee80211_find_sta_by_hw(&sdata->local->hw, addr);
+	return &sta->sta;
 }
 EXPORT_SYMBOL(ieee80211_find_sta);
 
-- 
cgit v1.2.2


From a75b4363eaafa99d909da4f1192322a78b074c73 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes@sipsolutions.net>
Date: Sat, 1 May 2010 18:53:51 +0200
Subject: mac80211: allow controlling aggregation manually

This allows enabling TX and disabling both TX and
RX aggregation sessions manually in debugfs. It is
very useful for debugging session initiation and
teardown problems since with this you don't have
to force a lot of traffic to get aggregation and
thus have less data to analyse.

Also, to debug mac80211 code itself, make hwsim
"support" aggregation sessions. It will still just
transfer the frame, but go through the setup and
teardown handshakes.

Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/debugfs_sta.c | 65 +++++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 64 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/mac80211/debugfs_sta.c b/net/mac80211/debugfs_sta.c
index 6bc9b07c3eda..e763f1529ddb 100644
--- a/net/mac80211/debugfs_sta.c
+++ b/net/mac80211/debugfs_sta.c
@@ -39,6 +39,13 @@ static const struct file_operations sta_ ##name## _ops = {		\
 	.open = mac80211_open_file_generic,				\
 }
 
+#define STA_OPS_RW(name)						\
+static const struct file_operations sta_ ##name## _ops = {		\
+	.read = sta_##name##_read,					\
+	.write = sta_##name##_write,					\
+	.open = mac80211_open_file_generic,				\
+}
+
 #define STA_FILE(name, field, format)					\
 		STA_READ_##format(name, field)				\
 		STA_OPS(name)
@@ -156,7 +163,63 @@ static ssize_t sta_agg_status_read(struct file *file, char __user *userbuf,
 
 	return simple_read_from_buffer(userbuf, count, ppos, buf, p - buf);
 }
-STA_OPS(agg_status);
+
+static ssize_t sta_agg_status_write(struct file *file, const char __user *userbuf,
+				    size_t count, loff_t *ppos)
+{
+	char _buf[12], *buf = _buf;
+	struct sta_info *sta = file->private_data;
+	bool start, tx;
+	unsigned long tid;
+	int ret;
+
+	if (count > sizeof(_buf))
+		return -EINVAL;
+
+	if (copy_from_user(buf, userbuf, count))
+		return -EFAULT;
+
+	buf[sizeof(_buf) - 1] = '\0';
+
+	if (strncmp(buf, "tx ", 3) == 0) {
+		buf += 3;
+		tx = true;
+	} else if (strncmp(buf, "rx ", 3) == 0) {
+		buf += 3;
+		tx = false;
+	} else
+		return -EINVAL;
+
+	if (strncmp(buf, "start ", 6) == 0) {
+		buf += 6;
+		start = true;
+		if (!tx)
+			return -EINVAL;
+	} else if (strncmp(buf, "stop ", 5) == 0) {
+		buf += 5;
+		start = false;
+	} else
+		return -EINVAL;
+
+	tid = simple_strtoul(buf, NULL, 0);
+
+	if (tid >= STA_TID_NUM)
+		return -EINVAL;
+
+	if (tx) {
+		if (start)
+			ret = ieee80211_start_tx_ba_session(&sta->sta, tid);
+		else
+			ret = ieee80211_stop_tx_ba_session(&sta->sta, tid,
+							   WLAN_BACK_RECIPIENT);
+	} else {
+		__ieee80211_stop_rx_ba_session(sta, tid, WLAN_BACK_RECIPIENT, 3);
+		ret = 0;
+	}
+
+	return ret ?: count;
+}
+STA_OPS_RW(agg_status);
 
 static ssize_t sta_ht_capa_read(struct file *file, char __user *userbuf,
 				size_t count, loff_t *ppos)
-- 
cgit v1.2.2


From be4a4b6a5d2f76393f545a2545fbaa1b65577e13 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes@sipsolutions.net>
Date: Mon, 3 May 2010 08:49:48 +0200
Subject: mac80211: improve IBSS scanning

When IBSS is fixed to a frequency, it can still
scan to try to find the right BSSID. This makes
sense if the BSSID isn't also fixed, but it need
not scan all channels -- just one is sufficient.
Make it do that by moving the scan setup code to
ieee80211_request_internal_scan() and include
a channel variable setting.

Note that this can be further improved to start
the IBSS right away if both frequency and BSSID
are fixed.

Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/ibss.c        |  9 ++++++---
 net/mac80211/ieee80211_i.h |  3 ++-
 net/mac80211/main.c        | 17 +----------------
 net/mac80211/scan.c        | 28 +++++++++++++++++++++++++++-
 4 files changed, 36 insertions(+), 21 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/ibss.c b/net/mac80211/ibss.c
index c585fced8584..ba752362b2b2 100644
--- a/net/mac80211/ibss.c
+++ b/net/mac80211/ibss.c
@@ -488,7 +488,9 @@ static void ieee80211_sta_merge_ibss(struct ieee80211_sub_if_data *sdata)
 	printk(KERN_DEBUG "%s: No active IBSS STAs - trying to scan for other "
 	       "IBSS networks with same SSID (merge)\n", sdata->name);
 
-	ieee80211_request_internal_scan(sdata, ifibss->ssid, ifibss->ssid_len);
+	ieee80211_request_internal_scan(sdata,
+			ifibss->ssid, ifibss->ssid_len,
+			ifibss->fixed_channel ? ifibss->channel : NULL);
 }
 
 static void ieee80211_sta_create_ibss(struct ieee80211_sub_if_data *sdata)
@@ -595,8 +597,9 @@ static void ieee80211_sta_find_ibss(struct ieee80211_sub_if_data *sdata)
 		printk(KERN_DEBUG "%s: Trigger new scan to find an IBSS to "
 		       "join\n", sdata->name);
 
-		ieee80211_request_internal_scan(sdata, ifibss->ssid,
-						ifibss->ssid_len);
+		ieee80211_request_internal_scan(sdata,
+				ifibss->ssid, ifibss->ssid_len,
+				ifibss->fixed_channel ? ifibss->channel : NULL);
 	} else {
 		int interval = IEEE80211_SCAN_INTERVAL;
 
diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index 4e73660ebe99..c8077a3647c6 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -1020,7 +1020,8 @@ void ieee80211_ibss_restart(struct ieee80211_sub_if_data *sdata);
 /* scan/BSS handling */
 void ieee80211_scan_work(struct work_struct *work);
 int ieee80211_request_internal_scan(struct ieee80211_sub_if_data *sdata,
-				    const u8 *ssid, u8 ssid_len);
+				    const u8 *ssid, u8 ssid_len,
+				    struct ieee80211_channel *chan);
 int ieee80211_request_scan(struct ieee80211_sub_if_data *sdata,
 			   struct cfg80211_scan_request *req);
 void ieee80211_scan_cancel(struct ieee80211_local *local);
diff --git a/net/mac80211/main.c b/net/mac80211/main.c
index ebcca0eaf1dc..353b6b42d9c5 100644
--- a/net/mac80211/main.c
+++ b/net/mac80211/main.c
@@ -439,7 +439,7 @@ int ieee80211_register_hw(struct ieee80211_hw *hw)
 	struct ieee80211_local *local = hw_to_local(hw);
 	int result;
 	enum ieee80211_band band;
-	int channels, i, j, max_bitrates;
+	int channels, max_bitrates;
 	bool supp_ht;
 	static const u32 cipher_suites[] = {
 		WLAN_CIPHER_SUITE_WEP40,
@@ -605,21 +605,6 @@ int ieee80211_register_hw(struct ieee80211_hw *hw)
 
 	ieee80211_led_init(local);
 
-	/* alloc internal scan request */
-	i = 0;
-	local->int_scan_req->ssids = &local->scan_ssid;
-	local->int_scan_req->n_ssids = 1;
-	for (band = 0; band < IEEE80211_NUM_BANDS; band++) {
-		if (!hw->wiphy->bands[band])
-			continue;
-		for (j = 0; j < hw->wiphy->bands[band]->n_channels; j++) {
-			local->int_scan_req->channels[i] =
-				&hw->wiphy->bands[band]->channels[j];
-			i++;
-		}
-	}
-	local->int_scan_req->n_channels = i;
-
 	local->network_latency_notifier.notifier_call =
 		ieee80211_max_network_latency;
 	result = pm_qos_add_notifier(PM_QOS_NETWORK_LATENCY,
diff --git a/net/mac80211/scan.c b/net/mac80211/scan.c
index a9d40584e383..414651217b49 100644
--- a/net/mac80211/scan.c
+++ b/net/mac80211/scan.c
@@ -728,10 +728,12 @@ int ieee80211_request_scan(struct ieee80211_sub_if_data *sdata,
 }
 
 int ieee80211_request_internal_scan(struct ieee80211_sub_if_data *sdata,
-				    const u8 *ssid, u8 ssid_len)
+				    const u8 *ssid, u8 ssid_len,
+				    struct ieee80211_channel *chan)
 {
 	struct ieee80211_local *local = sdata->local;
 	int ret = -EBUSY;
+	enum nl80211_band band;
 
 	mutex_lock(&local->scan_mtx);
 
@@ -739,6 +741,30 @@ int ieee80211_request_internal_scan(struct ieee80211_sub_if_data *sdata,
 	if (local->scan_req)
 		goto unlock;
 
+	/* fill internal scan request */
+	if (!chan) {
+		int i, nchan = 0;
+
+		for (band = 0; band < IEEE80211_NUM_BANDS; band++) {
+			if (!local->hw.wiphy->bands[band])
+				continue;
+			for (i = 0;
+			     i < local->hw.wiphy->bands[band]->n_channels;
+			     i++) {
+				local->int_scan_req->channels[nchan] =
+				    &local->hw.wiphy->bands[band]->channels[i];
+				nchan++;
+			}
+		}
+
+		local->int_scan_req->n_channels = nchan;
+	} else {
+		local->int_scan_req->channels[0] = chan;
+		local->int_scan_req->n_channels = 1;
+	}
+
+	local->int_scan_req->ssids = &local->scan_ssid;
+	local->int_scan_req->n_ssids = 1;
 	memcpy(local->int_scan_req->ssids[0].ssid, ssid, IEEE80211_MAX_SSID_LEN);
 	local->int_scan_req->ssids[0].ssid_len = ssid_len;
 
-- 
cgit v1.2.2


From a2f3be17c07ad9bd45ab300f79642ecb39cfb553 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= <ilpo.jarvinen@helsinki.fi>
Date: Mon, 3 May 2010 03:22:18 +0000
Subject: unix/garbage: kill copy of the skb queue walker
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Worse yet, it seems that its arguments were in reverse order. Also
remove one related helper which seems hardly worth keeping.

Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@helsinki.fi>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/unix/garbage.c | 13 ++-----------
 1 file changed, 2 insertions(+), 11 deletions(-)

(limited to 'net')

diff --git a/net/unix/garbage.c b/net/unix/garbage.c
index 14c22c3768da..c8df6fda0b1f 100644
--- a/net/unix/garbage.c
+++ b/net/unix/garbage.c
@@ -153,15 +153,6 @@ void unix_notinflight(struct file *fp)
 	}
 }
 
-static inline struct sk_buff *sock_queue_head(struct sock *sk)
-{
-	return (struct sk_buff *)&sk->sk_receive_queue;
-}
-
-#define receive_queue_for_each_skb(sk, next, skb) \
-	for (skb = sock_queue_head(sk)->next, next = skb->next; \
-	     skb != sock_queue_head(sk); skb = next, next = skb->next)
-
 static void scan_inflight(struct sock *x, void (*func)(struct unix_sock *),
 			  struct sk_buff_head *hitlist)
 {
@@ -169,7 +160,7 @@ static void scan_inflight(struct sock *x, void (*func)(struct unix_sock *),
 	struct sk_buff *next;
 
 	spin_lock(&x->sk_receive_queue.lock);
-	receive_queue_for_each_skb(x, next, skb) {
+	skb_queue_walk_safe(&x->sk_receive_queue, skb, next) {
 		/*
 		 *	Do we have file descriptors ?
 		 */
@@ -225,7 +216,7 @@ static void scan_children(struct sock *x, void (*func)(struct unix_sock *),
 		 * and perform a scan on them as well.
 		 */
 		spin_lock(&x->sk_receive_queue.lock);
-		receive_queue_for_each_skb(x, next, skb) {
+		skb_queue_walk_safe(&x->sk_receive_queue, skb, next) {
 			u = unix_sk(skb->sk);
 
 			/*
-- 
cgit v1.2.2


From 4f70ecca9c57731b4acbe5043eb22e4416bd2368 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Mon, 3 May 2010 10:50:14 +0000
Subject: net: rcu fixes

Add hlist_for_each_entry_rcu_bh() and
hlist_for_each_entry_continue_rcu_bh() macros, and use them in
ipv6_get_ifaddr(), if6_get_first() and if6_get_next() to fix lockdeps
warnings.

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Reviewed-by: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/addrconf.c | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

(limited to 'net')

diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 34d2d649e396..3984f52181f4 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -1346,7 +1346,7 @@ struct inet6_ifaddr *ipv6_get_ifaddr(struct net *net, const struct in6_addr *add
 	struct hlist_node *node;
 
 	rcu_read_lock_bh();
-	hlist_for_each_entry_rcu(ifp, node, &inet6_addr_lst[hash], addr_lst) {
+	hlist_for_each_entry_rcu_bh(ifp, node, &inet6_addr_lst[hash], addr_lst) {
 		if (!net_eq(dev_net(ifp->idev->dev), net))
 			continue;
 		if (ipv6_addr_equal(&ifp->addr, addr)) {
@@ -2959,7 +2959,7 @@ static struct inet6_ifaddr *if6_get_first(struct seq_file *seq)
 
 	for (state->bucket = 0; state->bucket < IN6_ADDR_HSIZE; ++state->bucket) {
 		struct hlist_node *n;
-		hlist_for_each_entry_rcu(ifa, n, &inet6_addr_lst[state->bucket],
+		hlist_for_each_entry_rcu_bh(ifa, n, &inet6_addr_lst[state->bucket],
 					 addr_lst)
 			if (net_eq(dev_net(ifa->idev->dev), net))
 				return ifa;
@@ -2974,12 +2974,12 @@ static struct inet6_ifaddr *if6_get_next(struct seq_file *seq,
 	struct net *net = seq_file_net(seq);
 	struct hlist_node *n = &ifa->addr_lst;
 
-	hlist_for_each_entry_continue_rcu(ifa, n, addr_lst)
+	hlist_for_each_entry_continue_rcu_bh(ifa, n, addr_lst)
 		if (net_eq(dev_net(ifa->idev->dev), net))
 			return ifa;
 
 	while (++state->bucket < IN6_ADDR_HSIZE) {
-		hlist_for_each_entry(ifa, n,
+		hlist_for_each_entry_rcu_bh(ifa, n,
 				     &inet6_addr_lst[state->bucket], addr_lst) {
 			if (net_eq(dev_net(ifa->idev->dev), net))
 				return ifa;
@@ -3000,7 +3000,7 @@ static struct inet6_ifaddr *if6_get_idx(struct seq_file *seq, loff_t pos)
 }
 
 static void *if6_seq_start(struct seq_file *seq, loff_t *pos)
-	__acquires(rcu)
+	__acquires(rcu_bh)
 {
 	rcu_read_lock_bh();
 	return if6_get_idx(seq, *pos);
@@ -3016,7 +3016,7 @@ static void *if6_seq_next(struct seq_file *seq, void *v, loff_t *pos)
 }
 
 static void if6_seq_stop(struct seq_file *seq, void *v)
-	__releases(rcu)
+	__releases(rcu_bh)
 {
 	rcu_read_unlock_bh();
 }
@@ -3093,7 +3093,7 @@ int ipv6_chk_home_addr(struct net *net, struct in6_addr *addr)
 	unsigned int hash = ipv6_addr_hash(addr);
 
 	rcu_read_lock_bh();
-	hlist_for_each_entry_rcu(ifp, n, &inet6_addr_lst[hash], addr_lst) {
+	hlist_for_each_entry_rcu_bh(ifp, n, &inet6_addr_lst[hash], addr_lst) {
 		if (!net_eq(dev_net(ifp->idev->dev), net))
 			continue;
 		if (ipv6_addr_equal(&ifp->addr, addr) &&
@@ -3127,7 +3127,7 @@ static void addrconf_verify(unsigned long foo)
 
 	for (i = 0; i < IN6_ADDR_HSIZE; i++) {
 restart:
-		hlist_for_each_entry_rcu(ifp, node,
+		hlist_for_each_entry_rcu_bh(ifp, node,
 					 &inet6_addr_lst[i], addr_lst) {
 			unsigned long age;
 
-- 
cgit v1.2.2


From 93bb64eac10aad3dae6178d7da94765f207d121f Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Mon, 3 May 2010 23:18:14 -0700
Subject: net: skb_free_datagram_locked() fix

Commit 4b0b72f7dd617b ( net: speedup udp receive path )
introduced a bug in skb_free_datagram_locked().

We should not skb_orphan() skb if we dont have the guarantee we are the
last skb user, this might happen with MSG_PEEK concurrent users.

To keep socket locked for the smallest period of time, we split
consume_skb() logic, inlined in skb_free_datagram_locked()

Reported-by: Stephen Hemminger <shemminger@vyatta.com>
Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/datagram.c | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/core/datagram.c b/net/core/datagram.c
index 95b851f3d713..e0097531417a 100644
--- a/net/core/datagram.c
+++ b/net/core/datagram.c
@@ -229,13 +229,18 @@ EXPORT_SYMBOL(skb_free_datagram);
 
 void skb_free_datagram_locked(struct sock *sk, struct sk_buff *skb)
 {
+	if (likely(atomic_read(&skb->users) == 1))
+		smp_rmb();
+	else if (likely(!atomic_dec_and_test(&skb->users)))
+		return;
+
 	lock_sock_bh(sk);
 	skb_orphan(skb);
 	sk_mem_reclaim_partial(sk);
 	unlock_sock_bh(sk);
 
-	/* skb is now orphaned, might be freed outside of locked section */
-	consume_skb(skb);
+	/* skb is now orphaned, can be freed outside of locked section */
+	__kfree_skb(skb);
 }
 EXPORT_SYMBOL(skb_free_datagram_locked);
 
-- 
cgit v1.2.2


From f935aa9e99d6ec74a50871c120e6b21de7256efb Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Mon, 3 May 2010 23:42:27 -0700
Subject: ipv6: Fix default multicast hops setting.

As per RFC 3493 the default multicast hops setting
for a socket should be "1" just like ipv4.

Ironically we have a IPV6_DEFAULT_MCASTHOPS macro
it just wasn't being used.

Reported-by: Elliot Hughes <enh@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/af_inet6.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index 3192aa02ba5d..3f9e86b15e0d 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -200,7 +200,7 @@ lookup_protocol:
 
 	inet_sk(sk)->pinet6 = np = inet6_sk_generic(sk);
 	np->hop_limit	= -1;
-	np->mcast_hops	= -1;
+	np->mcast_hops	= IPV6_DEFAULT_MCASTHOPS;
 	np->mc_loop	= 1;
 	np->pmtudisc	= IPV6_PMTUDISC_WANT;
 	np->ipv6only	= net->ipv6.sysctl.bindv6only;
-- 
cgit v1.2.2


From ec7d2f2cf3a1b76202986519ec4f8ec75b2de232 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Wed, 5 May 2010 01:07:37 -0700
Subject: net: __alloc_skb() speedup

With following patch I can reach maximum rate of my pktgen+udpsink
simulator :
- 'old' machine : dual quad core E5450  @3.00GHz
- 64 UDP rx flows (only differ by destination port)
- RPS enabled, NIC interrupts serviced on cpu0
- rps dispatched on 7 other cores. (~130.000 IPI per second)
- SLAB allocator (faster than SLUB in this workload)
- tg3 NIC
- 1.080.000 pps without a single drop at NIC level.

Idea is to add two prefetchw() calls in __alloc_skb(), one to prefetch
first sk_buff cache line, the second to prefetch the shinfo part.

Also using one memset() to initialize all skb_shared_info fields instead
of one by one to reduce number of instructions, using long word moves.

All skb_shared_info fields before 'dataref' are cleared in
__alloc_skb().

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/skbuff.c | 21 +++++----------------
 1 file changed, 5 insertions(+), 16 deletions(-)

(limited to 'net')

diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 8b9c109166a7..a9b0e1f77806 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -181,12 +181,14 @@ struct sk_buff *__alloc_skb(unsigned int size, gfp_t gfp_mask,
 	skb = kmem_cache_alloc_node(cache, gfp_mask & ~__GFP_DMA, node);
 	if (!skb)
 		goto out;
+	prefetchw(skb);
 
 	size = SKB_DATA_ALIGN(size);
 	data = kmalloc_node_track_caller(size + sizeof(struct skb_shared_info),
 			gfp_mask, node);
 	if (!data)
 		goto nodata;
+	prefetchw(data + size);
 
 	/*
 	 * Only clear those fields we need to clear, not those that we will
@@ -208,15 +210,8 @@ struct sk_buff *__alloc_skb(unsigned int size, gfp_t gfp_mask,
 
 	/* make sure we initialize shinfo sequentially */
 	shinfo = skb_shinfo(skb);
+	memset(shinfo, 0, offsetof(struct skb_shared_info, dataref));
 	atomic_set(&shinfo->dataref, 1);
-	shinfo->nr_frags  = 0;
-	shinfo->gso_size = 0;
-	shinfo->gso_segs = 0;
-	shinfo->gso_type = 0;
-	shinfo->ip6_frag_id = 0;
-	shinfo->tx_flags.flags = 0;
-	skb_frag_list_init(skb);
-	memset(&shinfo->hwtstamps, 0, sizeof(shinfo->hwtstamps));
 
 	if (fclone) {
 		struct sk_buff *child = skb + 1;
@@ -505,16 +500,10 @@ int skb_recycle_check(struct sk_buff *skb, int skb_size)
 		return 0;
 
 	skb_release_head_state(skb);
+
 	shinfo = skb_shinfo(skb);
+	memset(shinfo, 0, offsetof(struct skb_shared_info, dataref));
 	atomic_set(&shinfo->dataref, 1);
-	shinfo->nr_frags = 0;
-	shinfo->gso_size = 0;
-	shinfo->gso_segs = 0;
-	shinfo->gso_type = 0;
-	shinfo->ip6_frag_id = 0;
-	shinfo->tx_flags.flags = 0;
-	skb_frag_list_init(skb);
-	memset(&shinfo->hwtstamps, 0, sizeof(shinfo->hwtstamps));
 
 	memset(skb, 0, offsetof(struct sk_buff, tail));
 	skb->data = skb->head + NET_SKB_PAD;
-- 
cgit v1.2.2


From adfba3c7c026a6a5560d2a43fefc9b198cb74462 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes@sipsolutions.net>
Date: Wed, 5 May 2010 15:33:55 +0200
Subject: mac80211: use fixed channel in ibss join when appropriate

"mac80211: improve IBSS scanning" was missing a hunk.
This adds that hunk as originally intended.

Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/ibss.c | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'net')

diff --git a/net/mac80211/ibss.c b/net/mac80211/ibss.c
index ba752362b2b2..d5855ae387e8 100644
--- a/net/mac80211/ibss.c
+++ b/net/mac80211/ibss.c
@@ -907,6 +907,12 @@ int ieee80211_ibss_join(struct ieee80211_sub_if_data *sdata,
 	sdata->u.ibss.channel = params->channel;
 	sdata->u.ibss.fixed_channel = params->channel_fixed;
 
+	/* fix ourselves to that channel now already */
+	if (params->channel_fixed) {
+		sdata->local->oper_channel = params->channel;
+		sdata->local->oper_channel_type = NL80211_CHAN_NO_HT;
+	}
+
 	if (params->ie) {
 		sdata->u.ibss.ie = kmemdup(params->ie, params->ie_len,
 					   GFP_KERNEL);
-- 
cgit v1.2.2


From d40a4de0be08f005814a4fddac748fe5353208ec Mon Sep 17 00:00:00 2001
From: Brian Haley <brian.haley@hp.com>
Date: Mon, 3 May 2010 15:44:27 +0000
Subject: IPv6: fix IPV6_RECVERR handling of locally-generated errors

I noticed when I added support for IPV6_DONTFRAG that if you set
IPV6_RECVERR and tried to send a UDP packet larger than 64K to an
IPv6 destination, you'd correctly get an EMSGSIZE, but reading from
MSG_ERRQUEUE returned the incorrect address in the cmsg:

struct msghdr:
	 msg_name         0x7fff8f3c96d0
	 msg_namelen      28
struct sockaddr_in6:
	 sin6_family      10
	 sin6_port        7639
	 sin6_flowinfo    0
	 sin6_addr        ::ffff:38.32.0.0
	 sin6_scope_id    0  ((null))

It should have returned this in my case:

struct msghdr:
	 msg_name         0x7fffd866b510
	 msg_namelen      28
struct sockaddr_in6:
	 sin6_family      10
	 sin6_port        7639
	 sin6_flowinfo    0
	 sin6_addr        2620:0:a09:e000:21f:29ff:fe57:f88b
	 sin6_scope_id    0  ((null))

The problem is that ipv6_recv_error() assumes that if the error
wasn't generated by ICMPv6, it's an IPv4 address sitting there,
and proceeds to create a v4-mapped address from it.

Change ipv6_icmp_error() and ipv6_local_error() to set skb->protocol
to htons(ETH_P_IPV6) so that ipv6_recv_error() knows the address
sitting right after the extended error is IPv6, else it will
incorrectly map the first octet into an IPv4-mapped IPv6 address
in the cmsg structure returned in a recvmsg() call to obtain
the error.

Signed-off-by: Brian Haley <brian.haley@hp.com>

--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/datagram.c | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c
index 622dc7939a1b..61573885e451 100644
--- a/net/ipv6/datagram.c
+++ b/net/ipv6/datagram.c
@@ -222,6 +222,8 @@ void ipv6_icmp_error(struct sock *sk, struct sk_buff *skb, int err,
 	if (!skb)
 		return;
 
+	skb->protocol = htons(ETH_P_IPV6);
+
 	serr = SKB_EXT_ERR(skb);
 	serr->ee.ee_errno = err;
 	serr->ee.ee_origin = SO_EE_ORIGIN_ICMP6;
@@ -255,6 +257,8 @@ void ipv6_local_error(struct sock *sk, int err, struct flowi *fl, u32 info)
 	if (!skb)
 		return;
 
+	skb->protocol = htons(ETH_P_IPV6);
+
 	skb_put(skb, sizeof(struct ipv6hdr));
 	skb_reset_network_header(skb);
 	iph = ipv6_hdr(skb);
@@ -319,7 +323,7 @@ int ipv6_recv_error(struct sock *sk, struct msghdr *msg, int len)
 		sin->sin6_flowinfo = 0;
 		sin->sin6_port = serr->port;
 		sin->sin6_scope_id = 0;
-		if (serr->ee.ee_origin == SO_EE_ORIGIN_ICMP6) {
+		if (skb->protocol == htons(ETH_P_IPV6)) {
 			ipv6_addr_copy(&sin->sin6_addr,
 				  (struct in6_addr *)(nh + serr->addr_offset));
 			if (np->sndflow)
@@ -341,7 +345,7 @@ int ipv6_recv_error(struct sock *sk, struct msghdr *msg, int len)
 		sin->sin6_family = AF_INET6;
 		sin->sin6_flowinfo = 0;
 		sin->sin6_scope_id = 0;
-		if (serr->ee.ee_origin == SO_EE_ORIGIN_ICMP6) {
+		if (skb->protocol == htons(ETH_P_IPV6)) {
 			ipv6_addr_copy(&sin->sin6_addr, &ipv6_hdr(skb)->saddr);
 			if (np->rxopt.all)
 				datagram_recv_ctl(sk, msg, skb);
-- 
cgit v1.2.2


From 0e34e93177fb1f642cab080e0bde664c06c7183a Mon Sep 17 00:00:00 2001
From: WANG Cong <amwang@redhat.com>
Date: Thu, 6 May 2010 00:47:21 -0700
Subject: netpoll: add generic support for bridge and bonding devices

This whole patchset is for adding netpoll support to bridge and bonding
devices. I already tested it for bridge, bonding, bridge over bonding,
and bonding over bridge. It looks fine now.

To make bridge and bonding support netpoll, we need to adjust
some netpoll generic code. This patch does the following things:

1) introduce two new priv_flags for struct net_device:
   IFF_IN_NETPOLL which identifies we are processing a netpoll;
   IFF_DISABLE_NETPOLL is used to disable netpoll support for a device
   at run-time;

2) introduce one new method for netdev_ops:
   ->ndo_netpoll_cleanup() is used to clean up netpoll when a device is
     removed.

3) introduce netpoll_poll_dev() which takes a struct net_device * parameter;
   export netpoll_send_skb() and netpoll_poll_dev() which will be used later;

4) hide a pointer to struct netpoll in struct netpoll_info, ditto.

5) introduce ->real_dev for struct netpoll.

6) introduce a new status NETDEV_BONDING_DESLAE, which is used to disable
   netconsole before releasing a slave, to avoid deadlocks.

Cc: David Miller <davem@davemloft.net>
Cc: Neil Horman <nhorman@tuxdriver.com>
Signed-off-by: WANG Cong <amwang@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/netpoll.c | 26 +++++++++++++++++++++-----
 1 file changed, 21 insertions(+), 5 deletions(-)

(limited to 'net')

diff --git a/net/core/netpoll.c b/net/core/netpoll.c
index a58f59b97597..94825b109551 100644
--- a/net/core/netpoll.c
+++ b/net/core/netpoll.c
@@ -179,9 +179,8 @@ static void service_arp_queue(struct netpoll_info *npi)
 	}
 }
 
-void netpoll_poll(struct netpoll *np)
+void netpoll_poll_dev(struct net_device *dev)
 {
-	struct net_device *dev = np->dev;
 	const struct net_device_ops *ops;
 
 	if (!dev || !netif_running(dev))
@@ -201,6 +200,11 @@ void netpoll_poll(struct netpoll *np)
 	zap_completion_queue();
 }
 
+void netpoll_poll(struct netpoll *np)
+{
+	netpoll_poll_dev(np->dev);
+}
+
 static void refill_skbs(void)
 {
 	struct sk_buff *skb;
@@ -282,7 +286,7 @@ static int netpoll_owner_active(struct net_device *dev)
 	return 0;
 }
 
-static void netpoll_send_skb(struct netpoll *np, struct sk_buff *skb)
+void netpoll_send_skb(struct netpoll *np, struct sk_buff *skb)
 {
 	int status = NETDEV_TX_BUSY;
 	unsigned long tries;
@@ -308,7 +312,9 @@ static void netpoll_send_skb(struct netpoll *np, struct sk_buff *skb)
 		     tries > 0; --tries) {
 			if (__netif_tx_trylock(txq)) {
 				if (!netif_tx_queue_stopped(txq)) {
+					dev->priv_flags |= IFF_IN_NETPOLL;
 					status = ops->ndo_start_xmit(skb, dev);
+					dev->priv_flags &= ~IFF_IN_NETPOLL;
 					if (status == NETDEV_TX_OK)
 						txq_trans_update(txq);
 				}
@@ -756,7 +762,10 @@ int netpoll_setup(struct netpoll *np)
 		atomic_inc(&npinfo->refcnt);
 	}
 
-	if (!ndev->netdev_ops->ndo_poll_controller) {
+	npinfo->netpoll = np;
+
+	if ((ndev->priv_flags & IFF_DISABLE_NETPOLL) ||
+	    !ndev->netdev_ops->ndo_poll_controller) {
 		printk(KERN_ERR "%s: %s doesn't support polling, aborting.\n",
 		       np->name, np->dev_name);
 		err = -ENOTSUPP;
@@ -878,6 +887,7 @@ void netpoll_cleanup(struct netpoll *np)
 			}
 
 			if (atomic_dec_and_test(&npinfo->refcnt)) {
+				const struct net_device_ops *ops;
 				skb_queue_purge(&npinfo->arp_tx);
 				skb_queue_purge(&npinfo->txq);
 				cancel_rearming_delayed_work(&npinfo->tx_work);
@@ -885,7 +895,11 @@ void netpoll_cleanup(struct netpoll *np)
 				/* clean after last, unfinished work */
 				__skb_queue_purge(&npinfo->txq);
 				kfree(npinfo);
-				np->dev->npinfo = NULL;
+				ops = np->dev->netdev_ops;
+				if (ops->ndo_netpoll_cleanup)
+					ops->ndo_netpoll_cleanup(np->dev);
+				else
+					np->dev->npinfo = NULL;
 			}
 		}
 
@@ -908,6 +922,7 @@ void netpoll_set_trap(int trap)
 		atomic_dec(&trapped);
 }
 
+EXPORT_SYMBOL(netpoll_send_skb);
 EXPORT_SYMBOL(netpoll_set_trap);
 EXPORT_SYMBOL(netpoll_trap);
 EXPORT_SYMBOL(netpoll_print_options);
@@ -915,4 +930,5 @@ EXPORT_SYMBOL(netpoll_parse_options);
 EXPORT_SYMBOL(netpoll_setup);
 EXPORT_SYMBOL(netpoll_cleanup);
 EXPORT_SYMBOL(netpoll_send_udp);
+EXPORT_SYMBOL(netpoll_poll_dev);
 EXPORT_SYMBOL(netpoll_poll);
-- 
cgit v1.2.2


From c06ee961d3c0e51009cbd0e123b61fbb97f37d0b Mon Sep 17 00:00:00 2001
From: WANG Cong <amwang@redhat.com>
Date: Thu, 6 May 2010 00:48:24 -0700
Subject: bridge: make bridge support netpoll

Based on the previous patch, make bridge support netpoll by:

1) implement the 2 methods to support netpoll for bridge;

2) modify netpoll during forwarding packets via bridge;

3) disable netpoll support of bridge when a netpoll-unabled device
   is added to bridge;

4) enable netpoll support when all underlying devices support netpoll.

Cc: David Miller <davem@davemloft.net>
Cc: Neil Horman <nhorman@tuxdriver.com>
Cc: Stephen Hemminger <shemminger@linux-foundation.org>
Cc: Matt Mackall <mpm@selenic.com>
Signed-off-by: WANG Cong <amwang@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/bridge/br_device.c  | 59 +++++++++++++++++++++++++++++++++++++++++++++++++
 net/bridge/br_forward.c | 23 ++++++++++++++++++-
 net/bridge/br_if.c      | 25 +++++++++++++++++++++
 net/bridge/br_private.h |  2 ++
 4 files changed, 108 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c
index 82599405dc15..074c59690fc5 100644
--- a/net/bridge/br_device.c
+++ b/net/bridge/br_device.c
@@ -13,8 +13,10 @@
 
 #include <linux/kernel.h>
 #include <linux/netdevice.h>
+#include <linux/netpoll.h>
 #include <linux/etherdevice.h>
 #include <linux/ethtool.h>
+#include <linux/list.h>
 
 #include <asm/uaccess.h>
 #include "br_private.h"
@@ -188,6 +190,59 @@ static int br_set_tx_csum(struct net_device *dev, u32 data)
 	return 0;
 }
 
+#ifdef CONFIG_NET_POLL_CONTROLLER
+bool br_devices_support_netpoll(struct net_bridge *br)
+{
+	struct net_bridge_port *p;
+	bool ret = true;
+	int count = 0;
+	unsigned long flags;
+
+	spin_lock_irqsave(&br->lock, flags);
+	list_for_each_entry(p, &br->port_list, list) {
+		count++;
+		if ((p->dev->priv_flags & IFF_DISABLE_NETPOLL) ||
+		    !p->dev->netdev_ops->ndo_poll_controller)
+			ret = false;
+	}
+	spin_unlock_irqrestore(&br->lock, flags);
+	return count != 0 && ret;
+}
+
+static void br_poll_controller(struct net_device *br_dev)
+{
+	struct netpoll *np = br_dev->npinfo->netpoll;
+
+	if (np->real_dev != br_dev)
+		netpoll_poll_dev(np->real_dev);
+}
+
+void br_netpoll_cleanup(struct net_device *br_dev)
+{
+	struct net_bridge *br = netdev_priv(br_dev);
+	struct net_bridge_port *p, *n;
+	const struct net_device_ops *ops;
+
+	br->dev->npinfo = NULL;
+	list_for_each_entry_safe(p, n, &br->port_list, list) {
+		if (p->dev) {
+			ops = p->dev->netdev_ops;
+			if (ops->ndo_netpoll_cleanup)
+				ops->ndo_netpoll_cleanup(p->dev);
+			else
+				p->dev->npinfo = NULL;
+		}
+	}
+}
+
+#else
+
+void br_netpoll_cleanup(struct net_device *br_dev)
+{
+}
+
+#endif
+
 static const struct ethtool_ops br_ethtool_ops = {
 	.get_drvinfo    = br_getinfo,
 	.get_link	= ethtool_op_get_link,
@@ -211,6 +266,10 @@ static const struct net_device_ops br_netdev_ops = {
 	.ndo_set_multicast_list	 = br_dev_set_multicast_list,
 	.ndo_change_mtu		 = br_change_mtu,
 	.ndo_do_ioctl		 = br_dev_ioctl,
+#ifdef CONFIG_NET_POLL_CONTROLLER
+	.ndo_netpoll_cleanup	 = br_netpoll_cleanup,
+	.ndo_poll_controller	 = br_poll_controller,
+#endif
 };
 
 static void br_dev_free(struct net_device *dev)
diff --git a/net/bridge/br_forward.c b/net/bridge/br_forward.c
index 396f077216a3..92ad9feb199d 100644
--- a/net/bridge/br_forward.c
+++ b/net/bridge/br_forward.c
@@ -15,6 +15,7 @@
 #include <linux/slab.h>
 #include <linux/kernel.h>
 #include <linux/netdevice.h>
+#include <linux/netpoll.h>
 #include <linux/skbuff.h>
 #include <linux/if_vlan.h>
 #include <linux/netfilter_bridge.h>
@@ -50,7 +51,13 @@ int br_dev_queue_push_xmit(struct sk_buff *skb)
 		else {
 			skb_push(skb, ETH_HLEN);
 
-			dev_queue_xmit(skb);
+#ifdef CONFIG_NET_POLL_CONTROLLER
+			if (unlikely(skb->dev->priv_flags & IFF_IN_NETPOLL)) {
+				netpoll_send_skb(skb->dev->npinfo->netpoll, skb);
+				skb->dev->priv_flags &= ~IFF_IN_NETPOLL;
+			} else
+#endif
+				dev_queue_xmit(skb);
 		}
 	}
 
@@ -66,9 +73,23 @@ int br_forward_finish(struct sk_buff *skb)
 
 static void __br_deliver(const struct net_bridge_port *to, struct sk_buff *skb)
 {
+#ifdef CONFIG_NET_POLL_CONTROLLER
+	struct net_bridge *br = to->br;
+	if (unlikely(br->dev->priv_flags & IFF_IN_NETPOLL)) {
+		struct netpoll *np;
+		to->dev->npinfo = skb->dev->npinfo;
+		np = skb->dev->npinfo->netpoll;
+		np->real_dev = np->dev = to->dev;
+		to->dev->priv_flags |= IFF_IN_NETPOLL;
+	}
+#endif
 	skb->dev = to->dev;
 	NF_HOOK(PF_BRIDGE, NF_BR_LOCAL_OUT, skb, NULL, skb->dev,
 			br_forward_finish);
+#ifdef CONFIG_NET_POLL_CONTROLLER
+	if (skb->dev->npinfo)
+		skb->dev->npinfo->netpoll->dev = br->dev;
+#endif
 }
 
 static void __br_forward(const struct net_bridge_port *to, struct sk_buff *skb)
diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c
index 521439333316..537bdd60d9b9 100644
--- a/net/bridge/br_if.c
+++ b/net/bridge/br_if.c
@@ -13,6 +13,7 @@
 
 #include <linux/kernel.h>
 #include <linux/netdevice.h>
+#include <linux/netpoll.h>
 #include <linux/ethtool.h>
 #include <linux/if_arp.h>
 #include <linux/module.h>
@@ -153,6 +154,14 @@ static void del_nbp(struct net_bridge_port *p)
 	kobject_uevent(&p->kobj, KOBJ_REMOVE);
 	kobject_del(&p->kobj);
 
+#ifdef CONFIG_NET_POLL_CONTROLLER
+	if (br_devices_support_netpoll(br))
+		br->dev->priv_flags &= ~IFF_DISABLE_NETPOLL;
+	if (dev->netdev_ops->ndo_netpoll_cleanup)
+		dev->netdev_ops->ndo_netpoll_cleanup(dev);
+	else
+		dev->npinfo = NULL;
+#endif
 	call_rcu(&p->rcu, destroy_nbp_rcu);
 }
 
@@ -165,6 +174,8 @@ static void del_br(struct net_bridge *br, struct list_head *head)
 		del_nbp(p);
 	}
 
+	br_netpoll_cleanup(br->dev);
+
 	del_timer_sync(&br->gc_timer);
 
 	br_sysfs_delbr(br->dev);
@@ -444,6 +455,20 @@ int br_add_if(struct net_bridge *br, struct net_device *dev)
 
 	kobject_uevent(&p->kobj, KOBJ_ADD);
 
+#ifdef CONFIG_NET_POLL_CONTROLLER
+	if (br_devices_support_netpoll(br)) {
+		br->dev->priv_flags &= ~IFF_DISABLE_NETPOLL;
+		if (br->dev->npinfo)
+			dev->npinfo = br->dev->npinfo;
+	} else if (!(br->dev->priv_flags & IFF_DISABLE_NETPOLL)) {
+		br->dev->priv_flags |= IFF_DISABLE_NETPOLL;
+		printk(KERN_INFO "New device %s does not support netpoll\n",
+			dev->name);
+		printk(KERN_INFO "Disabling netpoll for %s\n",
+			br->dev->name);
+	}
+#endif
+
 	return 0;
 err2:
 	br_fdb_delete_by_port(br, p, 1);
diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h
index 018499ebe19d..3d2d3fe0a97e 100644
--- a/net/bridge/br_private.h
+++ b/net/bridge/br_private.h
@@ -253,6 +253,8 @@ static inline int br_is_root_bridge(const struct net_bridge *br)
 extern void br_dev_setup(struct net_device *dev);
 extern netdev_tx_t br_dev_xmit(struct sk_buff *skb,
 			       struct net_device *dev);
+extern bool br_devices_support_netpoll(struct net_bridge *br);
+extern void br_netpoll_cleanup(struct net_device *br_dev);
 
 /* br_fdb.c */
 extern int br_fdb_init(void);
-- 
cgit v1.2.2


From 6ec82562ffc6f297d0de36d65776cff8e5704867 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Thu, 6 May 2010 00:53:53 -0700
Subject: veth: Dont kfree_skb() after dev_forward_skb()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

In case of congestion, netif_rx() frees the skb, so we must assume
dev_forward_skb() also consume skb.

Bug introduced by commit 445409602c092
(veth: move loopback logic to common location)

We must change dev_forward_skb() to always consume skb, and veth to not
double free it.

Bug report : http://marc.info/?l=linux-netdev&m=127310770900442&w=3

Reported-by: Martín Ferrari <martin.ferrari@gmail.com>
Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/dev.c | 11 +++++------
 1 file changed, 5 insertions(+), 6 deletions(-)

(limited to 'net')

diff --git a/net/core/dev.c b/net/core/dev.c
index f769098774b7..264137fce3a2 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -1451,7 +1451,7 @@ static inline void net_timestamp(struct sk_buff *skb)
  *
  * return values:
  *	NET_RX_SUCCESS	(no congestion)
- *	NET_RX_DROP     (packet was dropped)
+ *	NET_RX_DROP     (packet was dropped, but freed)
  *
  * dev_forward_skb can be used for injecting an skb from the
  * start_xmit function of one device into the receive queue
@@ -1465,12 +1465,11 @@ int dev_forward_skb(struct net_device *dev, struct sk_buff *skb)
 {
 	skb_orphan(skb);
 
-	if (!(dev->flags & IFF_UP))
-		return NET_RX_DROP;
-
-	if (skb->len > (dev->mtu + dev->hard_header_len))
+	if (!(dev->flags & IFF_UP) ||
+	    (skb->len > (dev->mtu + dev->hard_header_len))) {
+		kfree_skb(skb);
 		return NET_RX_DROP;
-
+	}
 	skb_set_dev(skb, dev);
 	skb->tstamp.tv64 = 0;
 	skb->pkt_type = PACKET_HOST;
-- 
cgit v1.2.2


From 50b5d6ad63821cea324a5a7a19854d4de1a0a819 Mon Sep 17 00:00:00 2001
From: Vlad Yasevich <vladislav.yasevich@hp.com>
Date: Thu, 6 May 2010 00:56:07 -0700
Subject: sctp: Fix a race between ICMP protocol unreachable and connect()

ICMP protocol unreachable handling completely disregarded
the fact that the user may have locked the socket.  It proceeded
to destroy the association, even though the user may have
held the lock and had a ref on the association.  This resulted
in the following:

Attempt to release alive inet socket f6afcc00

=========================
[ BUG: held lock freed! ]
-------------------------
somenu/2672 is freeing memory f6afcc00-f6afcfff, with a lock still held
there!
 (sk_lock-AF_INET){+.+.+.}, at: [<c122098a>] sctp_connect+0x13/0x4c
1 lock held by somenu/2672:
 #0:  (sk_lock-AF_INET){+.+.+.}, at: [<c122098a>] sctp_connect+0x13/0x4c

stack backtrace:
Pid: 2672, comm: somenu Not tainted 2.6.32-telco #55
Call Trace:
 [<c1232266>] ? printk+0xf/0x11
 [<c1038553>] debug_check_no_locks_freed+0xce/0xff
 [<c10620b4>] kmem_cache_free+0x21/0x66
 [<c1185f25>] __sk_free+0x9d/0xab
 [<c1185f9c>] sk_free+0x1c/0x1e
 [<c1216e38>] sctp_association_put+0x32/0x89
 [<c1220865>] __sctp_connect+0x36d/0x3f4
 [<c122098a>] ? sctp_connect+0x13/0x4c
 [<c102d073>] ? autoremove_wake_function+0x0/0x33
 [<c12209a8>] sctp_connect+0x31/0x4c
 [<c11d1e80>] inet_dgram_connect+0x4b/0x55
 [<c11834fa>] sys_connect+0x54/0x71
 [<c103a3a2>] ? lock_release_non_nested+0x88/0x239
 [<c1054026>] ? might_fault+0x42/0x7c
 [<c1054026>] ? might_fault+0x42/0x7c
 [<c11847ab>] sys_socketcall+0x6d/0x178
 [<c10da994>] ? trace_hardirqs_on_thunk+0xc/0x10
 [<c1002959>] syscall_call+0x7/0xb

This was because the sctp_wait_for_connect() would aqcure the socket
lock and then proceed to release the last reference count on the
association, thus cause the fully destruction path to finish freeing
the socket.

The simplest solution is to start a very short timer in case the socket
is owned by user.  When the timer expires, we can do some verification
and be able to do the release properly.

Signed-off-by: Vlad Yasevich <vladislav.yasevich@hp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sctp/input.c         | 22 ++++++++++++++++++----
 net/sctp/sm_sideeffect.c | 35 +++++++++++++++++++++++++++++++++++
 net/sctp/transport.c     |  2 ++
 3 files changed, 55 insertions(+), 4 deletions(-)

(limited to 'net')

diff --git a/net/sctp/input.c b/net/sctp/input.c
index 2a570184e5a9..ea2192444ce6 100644
--- a/net/sctp/input.c
+++ b/net/sctp/input.c
@@ -440,11 +440,25 @@ void sctp_icmp_proto_unreachable(struct sock *sk,
 {
 	SCTP_DEBUG_PRINTK("%s\n",  __func__);
 
-	sctp_do_sm(SCTP_EVENT_T_OTHER,
-		   SCTP_ST_OTHER(SCTP_EVENT_ICMP_PROTO_UNREACH),
-		   asoc->state, asoc->ep, asoc, t,
-		   GFP_ATOMIC);
+	if (sock_owned_by_user(sk)) {
+		if (timer_pending(&t->proto_unreach_timer))
+			return;
+		else {
+			if (!mod_timer(&t->proto_unreach_timer,
+						jiffies + (HZ/20)))
+				sctp_association_hold(asoc);
+		}
+			
+	} else {
+		if (timer_pending(&t->proto_unreach_timer) &&
+		    del_timer(&t->proto_unreach_timer))
+			sctp_association_put(asoc);
 
+		sctp_do_sm(SCTP_EVENT_T_OTHER,
+			   SCTP_ST_OTHER(SCTP_EVENT_ICMP_PROTO_UNREACH),
+			   asoc->state, asoc->ep, asoc, t,
+			   GFP_ATOMIC);
+	}
 }
 
 /* Common lookup code for icmp/icmpv6 error handler. */
diff --git a/net/sctp/sm_sideeffect.c b/net/sctp/sm_sideeffect.c
index d5ae450b6f02..eb1f42f45fdd 100644
--- a/net/sctp/sm_sideeffect.c
+++ b/net/sctp/sm_sideeffect.c
@@ -397,6 +397,41 @@ out_unlock:
 	sctp_transport_put(transport);
 }
 
+/* Handle the timeout of the ICMP protocol unreachable timer.  Trigger
+ * the correct state machine transition that will close the association.
+ */
+void sctp_generate_proto_unreach_event(unsigned long data)
+{
+	struct sctp_transport *transport = (struct sctp_transport *) data;
+	struct sctp_association *asoc = transport->asoc;
+	
+	sctp_bh_lock_sock(asoc->base.sk);
+	if (sock_owned_by_user(asoc->base.sk)) {
+		SCTP_DEBUG_PRINTK("%s:Sock is busy.\n", __func__);
+
+		/* Try again later.  */
+		if (!mod_timer(&transport->proto_unreach_timer,
+				jiffies + (HZ/20)))
+			sctp_association_hold(asoc);
+		goto out_unlock;
+	}
+
+	/* Is this structure just waiting around for us to actually
+	 * get destroyed?
+	 */
+	if (asoc->base.dead)
+		goto out_unlock;
+
+	sctp_do_sm(SCTP_EVENT_T_OTHER,
+		   SCTP_ST_OTHER(SCTP_EVENT_ICMP_PROTO_UNREACH),
+		   asoc->state, asoc->ep, asoc, transport, GFP_ATOMIC);
+
+out_unlock:
+	sctp_bh_unlock_sock(asoc->base.sk);
+	sctp_association_put(asoc);
+}
+
+
 /* Inject a SACK Timeout event into the state machine.  */
 static void sctp_generate_sack_event(unsigned long data)
 {
diff --git a/net/sctp/transport.c b/net/sctp/transport.c
index be4d63d5a5cc..4a368038d46f 100644
--- a/net/sctp/transport.c
+++ b/net/sctp/transport.c
@@ -108,6 +108,8 @@ static struct sctp_transport *sctp_transport_init(struct sctp_transport *peer,
 			(unsigned long)peer);
 	setup_timer(&peer->hb_timer, sctp_generate_heartbeat_event,
 			(unsigned long)peer);
+	setup_timer(&peer->proto_unreach_timer,
+		    sctp_generate_proto_unreach_event, (unsigned long)peer);
 
 	/* Initialize the 64-bit random nonce sent with heartbeat. */
 	get_random_bytes(&peer->hb_nonce, sizeof(peer->hb_nonce));
-- 
cgit v1.2.2


From ccc2d97cb7c798e785c9f198de243e2b59f7073b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Bj=C3=B8rn=20Mork?= <bjorn@mork.no>
Date: Thu, 6 May 2010 03:44:34 +0000
Subject: ipv4: udp: fix short packet and bad checksum logging
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

commit 2783ef23 moved the initialisation of saddr and daddr after
pskb_may_pull() to avoid a potential data corruption.  Unfortunately
also placing it after the short packet and bad checksum error paths,
where these variables are used for logging.  The result is bogus
output like

[92238.389505] UDP: short packet: From 2.0.0.0:65535 23715/178 to 0.0.0.0:65535

Moving the saddr and daddr initialisation above the error paths, while still
keeping it after the pskb_may_pull() to keep the fix from commit 2783ef23.

Signed-off-by: Bjørn Mork <bjorn@mork.no>
Cc: stable@kernel.org
Acked-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/udp.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 8fef859db35d..c36522a0f113 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -1527,6 +1527,9 @@ int __udp4_lib_rcv(struct sk_buff *skb, struct udp_table *udptable,
 
 	uh   = udp_hdr(skb);
 	ulen = ntohs(uh->len);
+	saddr = ip_hdr(skb)->saddr;
+	daddr = ip_hdr(skb)->daddr;
+
 	if (ulen > skb->len)
 		goto short_packet;
 
@@ -1540,9 +1543,6 @@ int __udp4_lib_rcv(struct sk_buff *skb, struct udp_table *udptable,
 	if (udp4_csum_init(skb, uh, proto))
 		goto csum_error;
 
-	saddr = ip_hdr(skb)->saddr;
-	daddr = ip_hdr(skb)->daddr;
-
 	if (rt->rt_flags & (RTCF_BROADCAST|RTCF_MULTICAST))
 		return __udp4_lib_mcast_deliver(net, skb, uh,
 				saddr, daddr, udptable);
-- 
cgit v1.2.2


From d6bc0149d8f2300bffa03ea6fea3ca39744277a6 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Bj=C3=B8rn=20Mork?= <bjorn@mork.no>
Date: Thu, 6 May 2010 03:44:35 +0000
Subject: ipv6: udp: make short packet logging consistent with ipv4
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Adding addresses and ports to the short packet log message,
like ipv4/udp.c does it, makes these messages a lot more useful:

[  822.182450] UDPv6: short packet: From [2001:db8:ffb4:3::1]:47839 23715/178 to [2001:db8:ffb4:3:5054:ff:feff:200]:1234

This requires us to drop logging in case pskb_may_pull() fails,
which also is consistent with ipv4/udp.c

Signed-off-by: Bjørn Mork <bjorn@mork.no>
Acked-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/udp.c | 11 ++++++++---
 1 file changed, 8 insertions(+), 3 deletions(-)

(limited to 'net')

diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index 79359c8380bc..3d7a2c0b836a 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -699,7 +699,7 @@ int __udp6_lib_rcv(struct sk_buff *skb, struct udp_table *udptable,
 	u32 ulen = 0;
 
 	if (!pskb_may_pull(skb, sizeof(struct udphdr)))
-		goto short_packet;
+		goto discard;
 
 	saddr = &ipv6_hdr(skb)->saddr;
 	daddr = &ipv6_hdr(skb)->daddr;
@@ -781,9 +781,14 @@ int __udp6_lib_rcv(struct sk_buff *skb, struct udp_table *udptable,
 	return 0;
 
 short_packet:
-	LIMIT_NETDEBUG(KERN_DEBUG "UDP%sv6: short packet: %d/%u\n",
+	LIMIT_NETDEBUG(KERN_DEBUG "UDP%sv6: short packet: From [%pI6c]:%u %d/%d to [%pI6c]:%u\n",
 		       proto == IPPROTO_UDPLITE ? "-Lite" : "",
-		       ulen, skb->len);
+		       saddr,
+		       ntohs(uh->source),
+		       ulen,
+		       skb->len,
+		       daddr,
+		       ntohs(uh->dest));
 
 discard:
 	UDP6_INC_STATS_BH(net, UDP_MIB_INERRORS, proto == IPPROTO_UDPLITE);
-- 
cgit v1.2.2


From eecfd7c4e36ff532d895885971d01d049bd3e014 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Thu, 6 May 2010 22:07:48 -0700
Subject: rps: Various optimizations

Introduce ____napi_schedule() helper for callers in irq disabled
contexts. rps_trigger_softirq() becomes a leaf function.

Use container_of() in process_backlog() instead of accessing per_cpu
address.

Use a custom inlined version of __napi_complete() in process_backlog()
to avoid one locked instruction :

 only current cpu owns and manipulates this napi,
 and NAPI_STATE_SCHED is the only possible flag set on backlog.
 we can use a plain write instead of clear_bit(),
 and we dont need an smp_mb() memory barrier, since RPS is on,
 backlog is protected by a spinlock.

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/dev.c | 28 ++++++++++++++++++++++------
 1 file changed, 22 insertions(+), 6 deletions(-)

(limited to 'net')

diff --git a/net/core/dev.c b/net/core/dev.c
index 36d53be4fca6..32611c8f1219 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -2205,6 +2205,14 @@ int netdev_max_backlog __read_mostly = 1000;
 int netdev_budget __read_mostly = 300;
 int weight_p __read_mostly = 64;            /* old backlog weight */
 
+/* Called with irq disabled */
+static inline void ____napi_schedule(struct softnet_data *sd,
+				     struct napi_struct *napi)
+{
+	list_add_tail(&napi->poll_list, &sd->poll_list);
+	__raise_softirq_irqoff(NET_RX_SOFTIRQ);
+}
+
 #ifdef CONFIG_RPS
 
 /* One global table that all flow-based protocols share. */
@@ -2363,7 +2371,7 @@ static void rps_trigger_softirq(void *data)
 {
 	struct softnet_data *sd = data;
 
-	__napi_schedule(&sd->backlog);
+	____napi_schedule(sd, &sd->backlog);
 	sd->received_rps++;
 }
 
@@ -2421,7 +2429,7 @@ enqueue:
 		/* Schedule NAPI for backlog device */
 		if (napi_schedule_prep(&sd->backlog)) {
 			if (!rps_ipi_queued(sd))
-				__napi_schedule(&sd->backlog);
+				____napi_schedule(sd, &sd->backlog);
 		}
 		goto enqueue;
 	}
@@ -3280,7 +3288,7 @@ static void net_rps_action_and_irq_enable(struct softnet_data *sd)
 static int process_backlog(struct napi_struct *napi, int quota)
 {
 	int work = 0;
-	struct softnet_data *sd = &__get_cpu_var(softnet_data);
+	struct softnet_data *sd = container_of(napi, struct softnet_data, backlog);
 
 #ifdef CONFIG_RPS
 	/* Check if we have pending ipi, its better to send them now,
@@ -3313,7 +3321,16 @@ static int process_backlog(struct napi_struct *napi, int quota)
 						   &sd->process_queue);
 		}
 		if (qlen < quota - work) {
-			__napi_complete(napi);
+			/*
+			 * Inline a custom version of __napi_complete().
+			 * only current cpu owns and manipulates this napi,
+			 * and NAPI_STATE_SCHED is the only possible flag set on backlog.
+			 * we can use a plain write instead of clear_bit(),
+			 * and we dont need an smp_mb() memory barrier.
+			 */
+			list_del(&napi->poll_list);
+			napi->state = 0;
+
 			quota = work + qlen;
 		}
 		rps_unlock(sd);
@@ -3334,8 +3351,7 @@ void __napi_schedule(struct napi_struct *n)
 	unsigned long flags;
 
 	local_irq_save(flags);
-	list_add_tail(&n->poll_list, &__get_cpu_var(softnet_data).poll_list);
-	__raise_softirq_irqoff(NET_RX_SOFTIRQ);
+	____napi_schedule(&__get_cpu_var(softnet_data), n);
 	local_irq_restore(flags);
 }
 EXPORT_SYMBOL(__napi_schedule);
-- 
cgit v1.2.2


From 79733a865c7fd778ce45e3503962b3a875b0a153 Mon Sep 17 00:00:00 2001
From: Reinette Chatre <reinette.chatre@intel.com>
Date: Tue, 4 May 2010 16:04:49 -0700
Subject: mac80211: remove association work when processing deauth request

In https://bugzilla.kernel.org/show_bug.cgi?id=15794 a user encountered the
following:

[18967.469098] wlan0: authenticated
[18967.472527] wlan0: associate with 00:1c:10:b8:e3:ea (try 1)
[18967.472585] wlan0: deauthenticating from 00:1c:10:b8:e3:ea by local choice (reason=3)
[18967.672057] wlan0: associate with 00:1c:10:b8:e3:ea (try 2)
[18967.872357] wlan0: associate with 00:1c:10:b8:e3:ea (try 3)
[18968.072960] wlan0: association with 00:1c:10:b8:e3:ea timed out
[18968.076890] ------------[ cut here ]------------
[18968.076898] WARNING: at net/wireless/mlme.c:341 cfg80211_send_assoc_timeout+0xa8/0x140()
[18968.076900] Hardware name: GX628
[18968.076924] Pid: 1408, comm: phy0 Not tainted 2.6.34-rc4-00082-g250541f-dirty #3
[18968.076926] Call Trace:
[18968.076931]  [<ffffffff8103459e>] ?  warn_slowpath_common+0x6e/0xb0
[18968.076934]  [<ffffffff8157c2d8>] ?  cfg80211_send_assoc_timeout+0xa8/0x140
[18968.076937]  [<ffffffff8103ff8b>] ? mod_timer+0x10b/0x180
[18968.076940]  [<ffffffff8158f0fc>] ?  ieee80211_assoc_done+0xbc/0xc0
[18968.076943]  [<ffffffff81590d53>] ?  ieee80211_work_work+0x553/0x11c0
[18968.076945]  [<ffffffff8102d931>] ? finish_task_switch+0x41/0xb0
[18968.076948]  [<ffffffff81590800>] ?  ieee80211_work_work+0x0/0x11c0
[18968.076951]  [<ffffffff810476fb>] ? worker_thread+0x13b/0x210
[18968.076954]  [<ffffffff8104b6b0>] ?  autoremove_wake_function+0x0/0x30
[18968.076956]  [<ffffffff810475c0>] ? worker_thread+0x0/0x210
[18968.076959]  [<ffffffff8104b21e>] ? kthread+0x8e/0xa0
[18968.076962]  [<ffffffff810031f4>] ?  kernel_thread_helper+0x4/0x10
[18968.076964]  [<ffffffff8104b190>] ? kthread+0x0/0xa0
[18968.076966]  [<ffffffff810031f0>] ?  kernel_thread_helper+0x0/0x10
[18968.076968] ---[ end trace 8aa6265f4b1adfe0 ]---

As explained by Johannes Berg <johannes@sipsolutions.net>:

We authenticate successfully, and then userspace requests association.
Then we start that process, but the AP doesn't respond. While we're
still waiting for an AP response, userspace asks for a deauth. We do
the deauth, but don't abort the association work. Then once the
association work times out we tell cfg80211, but it no longer wants
to know since for all it is concerned we accepted the deauth that
also kills the association attempt.

Fix this by, upon receipt of deauth request, removing the association work
and continuing to send the deauth.

Unfortunately the user reporting the issue is not able to reproduce this
problem anymore and cannot verify this fix. This seems like a well understood
issue though and I thus present the patch.

Bug-identified-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: Reinette Chatre <reinette.chatre@intel.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/mlme.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index 8a9650343f26..6ccd48e180ee 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -2029,7 +2029,8 @@ int ieee80211_mgd_deauth(struct ieee80211_sub_if_data *sdata,
 				continue;
 
 			if (wk->type != IEEE80211_WORK_DIRECT_PROBE &&
-			    wk->type != IEEE80211_WORK_AUTH)
+			    wk->type != IEEE80211_WORK_AUTH &&
+			    wk->type != IEEE80211_WORK_ASSOC)
 				continue;
 
 			if (memcmp(req->bss->bssid, wk->filter_ta, ETH_ALEN))
-- 
cgit v1.2.2


From ac8dd506e40ee2c7fcc61654a44c32555a0a8d6c Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes@sipsolutions.net>
Date: Wed, 5 May 2010 09:44:02 +0200
Subject: mac80211: fix BSS info reconfiguration

When reconfiguring an interface due to a previous
hardware restart, mac80211 will currently include
the new IBSS flag on non-IBSS interfaces which may
confuse drivers.

Instead of doing the ~0 trick, simply spell out
which things are going to be reconfigured.

Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/util.c | 25 ++++++++++++++++++++-----
 1 file changed, 20 insertions(+), 5 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/util.c b/net/mac80211/util.c
index 2b75b4fb68f4..5b79d552780a 100644
--- a/net/mac80211/util.c
+++ b/net/mac80211/util.c
@@ -1160,18 +1160,33 @@ int ieee80211_reconfig(struct ieee80211_local *local)
 
 	/* Finally also reconfigure all the BSS information */
 	list_for_each_entry(sdata, &local->interfaces, list) {
-		u32 changed = ~0;
+		u32 changed;
+
 		if (!ieee80211_sdata_running(sdata))
 			continue;
+
+		/* common change flags for all interface types */
+		changed = BSS_CHANGED_ERP_CTS_PROT |
+			  BSS_CHANGED_ERP_PREAMBLE |
+			  BSS_CHANGED_ERP_SLOT |
+			  BSS_CHANGED_HT |
+			  BSS_CHANGED_BASIC_RATES |
+			  BSS_CHANGED_BEACON_INT |
+			  BSS_CHANGED_BSSID |
+			  BSS_CHANGED_CQM;
+
 		switch (sdata->vif.type) {
 		case NL80211_IFTYPE_STATION:
-			/* disable beacon change bits */
-			changed &= ~(BSS_CHANGED_BEACON |
-				     BSS_CHANGED_BEACON_ENABLED);
-			/* fall through */
+			changed |= BSS_CHANGED_ASSOC;
+			ieee80211_bss_info_change_notify(sdata, changed);
+			break;
 		case NL80211_IFTYPE_ADHOC:
+			changed |= BSS_CHANGED_IBSS;
+			/* fall through */
 		case NL80211_IFTYPE_AP:
 		case NL80211_IFTYPE_MESH_POINT:
+			changed |= BSS_CHANGED_BEACON |
+				   BSS_CHANGED_BEACON_ENABLED;
 			ieee80211_bss_info_change_notify(sdata, changed);
 			break;
 		case NL80211_IFTYPE_WDS:
-- 
cgit v1.2.2


From f444de05d20e27cdd960c13fcbcfca3099f03143 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes@sipsolutions.net>
Date: Wed, 5 May 2010 15:25:02 +0200
Subject: cfg80211/mac80211: better channel handling

Currently (all tested with hwsim) you can do stupid
things like setting up an AP on a certain channel,
then adding another virtual interface and making
that associate on another channel -- this will make
the beaconing to move channel but obviously without
the necessary IEs data update.

In order to improve this situation, first make the
configuration APIs (cfg80211 and nl80211) aware of
multi-channel operation -- we'll eventually need
that in the future anyway. There's one userland API
change and one API addition. The API change is that
now SET_WIPHY must be called with virtual interface
index rather than only wiphy index in order to take
effect for that interface -- luckily all current
users (hostapd) do that. For monitor interfaces, the
old setting is preserved, but monitors are always
slaved to other devices anyway so no guarantees.

The second userland API change is the introduction
of a per virtual interface SET_CHANNEL command, that
hostapd should use going forward to make it easier
to understand what's going on (it can automatically
detect a kernel with this command).

Other than mac80211, no existing cfg80211 drivers
are affected by this change because they only allow
a single virtual interface.

mac80211, however, now needs to be aware that the
channel settings are per interface now, and needs
to disallow (for now) real multi-channel operation,
which is another important part of this patch.

One of the immediate benefits is that you can now
start hostapd to operate on a hardware that already
has a connection on another virtual interface, as
long as you specify the same channel.

Note that two things are left unhandled (this is an
improvement -- not a complete fix):

 * different HT/no-HT modes

   currently you could start an HT AP and then
   connect to a non-HT network on the same channel
   which would configure the hardware for no HT;
   that can be fixed fairly easily

 * CSA

   An AP we're connected to on a virtual interface
   might indicate switching channels, and in that
   case we would follow it, regardless of how many
   other interfaces are operating; this requires
   more effort to fix but is pretty rare after all

Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/Makefile      |   3 +-
 net/mac80211/cfg.c         |  41 +++++++++++
 net/mac80211/chan.c        |  57 +++++++++++++++
 net/mac80211/ieee80211_i.h |  11 +++
 net/wireless/chan.c        |  56 +++++----------
 net/wireless/core.h        |  12 +---
 net/wireless/ibss.c        |   5 --
 net/wireless/nl80211.c     | 171 ++++++++++++++++++++++++++++++++++-----------
 net/wireless/sme.c         |   5 --
 net/wireless/wext-compat.c |  15 ++--
 net/wireless/wext-sme.c    |   2 +-
 11 files changed, 274 insertions(+), 104 deletions(-)
 create mode 100644 net/mac80211/chan.c

(limited to 'net')

diff --git a/net/mac80211/Makefile b/net/mac80211/Makefile
index 04420291e7ad..84b48ba8a77e 100644
--- a/net/mac80211/Makefile
+++ b/net/mac80211/Makefile
@@ -23,7 +23,8 @@ mac80211-y := \
 	key.o \
 	util.o \
 	wme.o \
-	event.o
+	event.o \
+	chan.o
 
 mac80211-$(CONFIG_MAC80211_LEDS) += led.o
 mac80211-$(CONFIG_MAC80211_DEBUGFS) += \
diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c
index b575a5066219..414b7dd7d7fd 100644
--- a/net/mac80211/cfg.c
+++ b/net/mac80211/cfg.c
@@ -1161,11 +1161,24 @@ static int ieee80211_set_txq_params(struct wiphy *wiphy,
 }
 
 static int ieee80211_set_channel(struct wiphy *wiphy,
+				 struct net_device *netdev,
 				 struct ieee80211_channel *chan,
 				 enum nl80211_channel_type channel_type)
 {
 	struct ieee80211_local *local = wiphy_priv(wiphy);
 
+	switch (ieee80211_get_channel_mode(local, NULL)) {
+	case CHAN_MODE_HOPPING:
+		return -EBUSY;
+	case CHAN_MODE_FIXED:
+		if (local->oper_channel == chan &&
+		    local->oper_channel_type == channel_type)
+			return 0;
+		return -EBUSY;
+	case CHAN_MODE_UNDEFINED:
+		break;
+	}
+
 	local->oper_channel = chan;
 	local->oper_channel_type = channel_type;
 
@@ -1213,6 +1226,20 @@ static int ieee80211_auth(struct wiphy *wiphy, struct net_device *dev,
 static int ieee80211_assoc(struct wiphy *wiphy, struct net_device *dev,
 			   struct cfg80211_assoc_request *req)
 {
+	struct ieee80211_local *local = wiphy_priv(wiphy);
+	struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
+
+	switch (ieee80211_get_channel_mode(local, sdata)) {
+	case CHAN_MODE_HOPPING:
+		return -EBUSY;
+	case CHAN_MODE_FIXED:
+		if (local->oper_channel == req->bss->channel)
+			break;
+		return -EBUSY;
+	case CHAN_MODE_UNDEFINED:
+		break;
+	}
+
 	return ieee80211_mgd_assoc(IEEE80211_DEV_TO_SUB_IF(dev), req);
 }
 
@@ -1235,8 +1262,22 @@ static int ieee80211_disassoc(struct wiphy *wiphy, struct net_device *dev,
 static int ieee80211_join_ibss(struct wiphy *wiphy, struct net_device *dev,
 			       struct cfg80211_ibss_params *params)
 {
+	struct ieee80211_local *local = wiphy_priv(wiphy);
 	struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
 
+	switch (ieee80211_get_channel_mode(local, sdata)) {
+	case CHAN_MODE_HOPPING:
+		return -EBUSY;
+	case CHAN_MODE_FIXED:
+		if (!params->channel_fixed)
+			return -EBUSY;
+		if (local->oper_channel == params->channel)
+			break;
+		return -EBUSY;
+	case CHAN_MODE_UNDEFINED:
+		break;
+	}
+
 	return ieee80211_ibss_join(sdata, params);
 }
 
diff --git a/net/mac80211/chan.c b/net/mac80211/chan.c
new file mode 100644
index 000000000000..08f3832661a5
--- /dev/null
+++ b/net/mac80211/chan.c
@@ -0,0 +1,57 @@
+/*
+ * mac80211 - channel management
+ */
+
+#include "ieee80211_i.h"
+
+enum ieee80211_chan_mode
+__ieee80211_get_channel_mode(struct ieee80211_local *local,
+			     struct ieee80211_sub_if_data *ignore)
+{
+	struct ieee80211_sub_if_data *sdata;
+
+	WARN_ON(!mutex_is_locked(&local->iflist_mtx));
+
+	list_for_each_entry(sdata, &local->interfaces, list) {
+		if (sdata == ignore)
+			continue;
+
+		if (!ieee80211_sdata_running(sdata))
+			continue;
+
+		if (sdata->vif.type == NL80211_IFTYPE_MONITOR)
+			continue;
+
+		if (sdata->vif.type == NL80211_IFTYPE_STATION &&
+		    !sdata->u.mgd.associated)
+			continue;
+
+		if (sdata->vif.type == NL80211_IFTYPE_ADHOC) {
+			if (!sdata->u.ibss.ssid_len)
+				continue;
+			if (!sdata->u.ibss.fixed_channel)
+				return CHAN_MODE_HOPPING;
+		}
+
+		if (sdata->vif.type == NL80211_IFTYPE_AP &&
+		    !sdata->u.ap.beacon)
+			continue;
+
+		return CHAN_MODE_FIXED;
+	}
+
+	return CHAN_MODE_UNDEFINED;
+}
+
+enum ieee80211_chan_mode
+ieee80211_get_channel_mode(struct ieee80211_local *local,
+			   struct ieee80211_sub_if_data *ignore)
+{
+	enum ieee80211_chan_mode mode;
+
+	mutex_lock(&local->iflist_mtx);
+	mode = __ieee80211_get_channel_mode(local, ignore);
+	mutex_unlock(&local->iflist_mtx);
+
+	return mode;
+}
diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index c8077a3647c6..359edff31471 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -1229,6 +1229,17 @@ int ieee80211_wk_remain_on_channel(struct ieee80211_sub_if_data *sdata,
 int ieee80211_wk_cancel_remain_on_channel(
 	struct ieee80211_sub_if_data *sdata, u64 cookie);
 
+/* channel management */
+enum ieee80211_chan_mode {
+	CHAN_MODE_UNDEFINED,
+	CHAN_MODE_HOPPING,
+	CHAN_MODE_FIXED,
+};
+
+enum ieee80211_chan_mode
+ieee80211_get_channel_mode(struct ieee80211_local *local,
+			   struct ieee80211_sub_if_data *ignore);
+
 #ifdef CONFIG_MAC80211_NOINLINE
 #define debug_noinline noinline
 #else
diff --git a/net/wireless/chan.c b/net/wireless/chan.c
index bf1737fc9a7e..d92d088026bf 100644
--- a/net/wireless/chan.c
+++ b/net/wireless/chan.c
@@ -9,38 +9,6 @@
 #include <net/cfg80211.h>
 #include "core.h"
 
-struct ieee80211_channel *
-rdev_fixed_channel(struct cfg80211_registered_device *rdev,
-		   struct wireless_dev *for_wdev)
-{
-	struct wireless_dev *wdev;
-	struct ieee80211_channel *result = NULL;
-
-	WARN_ON(!mutex_is_locked(&rdev->devlist_mtx));
-
-	list_for_each_entry(wdev, &rdev->netdev_list, list) {
-		if (wdev == for_wdev)
-			continue;
-
-		/*
-		 * Lock manually to tell lockdep about allowed
-		 * nesting here if for_wdev->mtx is held already.
-		 * This is ok as it's all under the rdev devlist
-		 * mutex and as such can only be done once at any
-		 * given time.
-		 */
-		mutex_lock_nested(&wdev->mtx, SINGLE_DEPTH_NESTING);
-		if (wdev->current_bss)
-			result = wdev->current_bss->pub.channel;
-		wdev_unlock(wdev);
-
-		if (result)
-			break;
-	}
-
-	return result;
-}
-
 struct ieee80211_channel *
 rdev_freq_to_chan(struct cfg80211_registered_device *rdev,
 		  int freq, enum nl80211_channel_type channel_type)
@@ -75,15 +43,22 @@ rdev_freq_to_chan(struct cfg80211_registered_device *rdev,
 	return chan;
 }
 
-int rdev_set_freq(struct cfg80211_registered_device *rdev,
-		  struct wireless_dev *for_wdev,
-		  int freq, enum nl80211_channel_type channel_type)
+int cfg80211_set_freq(struct cfg80211_registered_device *rdev,
+		      struct wireless_dev *wdev, int freq,
+		      enum nl80211_channel_type channel_type)
 {
 	struct ieee80211_channel *chan;
 	int result;
 
-	if (rdev_fixed_channel(rdev, for_wdev))
-		return -EBUSY;
+	if (wdev->iftype == NL80211_IFTYPE_MONITOR)
+		wdev = NULL;
+
+	if (wdev) {
+		ASSERT_WDEV_LOCK(wdev);
+
+		if (!netif_running(wdev->netdev))
+			return -ENETDOWN;
+	}
 
 	if (!rdev->ops->set_channel)
 		return -EOPNOTSUPP;
@@ -92,11 +67,14 @@ int rdev_set_freq(struct cfg80211_registered_device *rdev,
 	if (!chan)
 		return -EINVAL;
 
-	result = rdev->ops->set_channel(&rdev->wiphy, chan, channel_type);
+	result = rdev->ops->set_channel(&rdev->wiphy,
+					wdev ? wdev->netdev : NULL,
+					chan, channel_type);
 	if (result)
 		return result;
 
-	rdev->channel = chan;
+	if (wdev)
+		wdev->channel = chan;
 
 	return 0;
 }
diff --git a/net/wireless/core.h b/net/wireless/core.h
index b2234b436ead..ae930acf75e9 100644
--- a/net/wireless/core.h
+++ b/net/wireless/core.h
@@ -70,9 +70,6 @@ struct cfg80211_registered_device {
 	struct work_struct conn_work;
 	struct work_struct event_work;
 
-	/* current channel */
-	struct ieee80211_channel *channel;
-
 	/* must be last because of the way we do wiphy_priv(),
 	 * and it should at least be aligned to NETDEV_ALIGN */
 	struct wiphy wiphy __attribute__((__aligned__(NETDEV_ALIGN)));
@@ -387,15 +384,12 @@ int cfg80211_change_iface(struct cfg80211_registered_device *rdev,
 			  u32 *flags, struct vif_params *params);
 void cfg80211_process_rdev_events(struct cfg80211_registered_device *rdev);
 
-struct ieee80211_channel *
-rdev_fixed_channel(struct cfg80211_registered_device *rdev,
-		   struct wireless_dev *for_wdev);
 struct ieee80211_channel *
 rdev_freq_to_chan(struct cfg80211_registered_device *rdev,
 		  int freq, enum nl80211_channel_type channel_type);
-int rdev_set_freq(struct cfg80211_registered_device *rdev,
-		  struct wireless_dev *for_wdev,
-		  int freq, enum nl80211_channel_type channel_type);
+int cfg80211_set_freq(struct cfg80211_registered_device *rdev,
+		      struct wireless_dev *wdev, int freq,
+		      enum nl80211_channel_type channel_type);
 
 u16 cfg80211_calculate_bitrate(struct rate_info *rate);
 
diff --git a/net/wireless/ibss.c b/net/wireless/ibss.c
index 6ef5a491fb4b..9825317e653a 100644
--- a/net/wireless/ibss.c
+++ b/net/wireless/ibss.c
@@ -80,15 +80,10 @@ int __cfg80211_join_ibss(struct cfg80211_registered_device *rdev,
 			 struct cfg80211_cached_keys *connkeys)
 {
 	struct wireless_dev *wdev = dev->ieee80211_ptr;
-	struct ieee80211_channel *chan;
 	int err;
 
 	ASSERT_WDEV_LOCK(wdev);
 
-	chan = rdev_fixed_channel(rdev, wdev);
-	if (chan && chan != params->channel)
-		return -EBUSY;
-
 	if (wdev->ssid_len)
 		return -EALREADY;
 
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index c27bef8e0c11..ec1b4a896c6e 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -588,6 +588,7 @@ static int nl80211_send_wiphy(struct sk_buff *msg, u32 pid, u32 seq, int flags,
 		i++;
 		NLA_PUT_U32(msg, i, NL80211_CMD_SET_WIPHY_NETNS);
 	}
+	CMD(set_channel, SET_CHANNEL);
 
 #undef CMD
 
@@ -688,10 +689,90 @@ static int parse_txq_params(struct nlattr *tb[],
 	return 0;
 }
 
+static bool nl80211_can_set_dev_channel(struct wireless_dev *wdev)
+{
+	/*
+	 * You can only set the channel explicitly for AP, mesh
+	 * and WDS type interfaces; all others have their channel
+	 * managed via their respective "establish a connection"
+	 * command (connect, join, ...)
+	 *
+	 * Monitors are special as they are normally slaved to
+	 * whatever else is going on, so they behave as though
+	 * you tried setting the wiphy channel itself.
+	 */
+	return !wdev ||
+		wdev->iftype == NL80211_IFTYPE_AP ||
+		wdev->iftype == NL80211_IFTYPE_WDS ||
+		wdev->iftype == NL80211_IFTYPE_MESH_POINT ||
+		wdev->iftype == NL80211_IFTYPE_MONITOR;
+}
+
+static int __nl80211_set_channel(struct cfg80211_registered_device *rdev,
+				 struct wireless_dev *wdev,
+				 struct genl_info *info)
+{
+	enum nl80211_channel_type channel_type = NL80211_CHAN_NO_HT;
+	u32 freq;
+	int result;
+
+	if (!info->attrs[NL80211_ATTR_WIPHY_FREQ])
+		return -EINVAL;
+
+	if (!nl80211_can_set_dev_channel(wdev))
+		return -EOPNOTSUPP;
+
+	if (info->attrs[NL80211_ATTR_WIPHY_CHANNEL_TYPE]) {
+		channel_type = nla_get_u32(info->attrs[
+				   NL80211_ATTR_WIPHY_CHANNEL_TYPE]);
+		if (channel_type != NL80211_CHAN_NO_HT &&
+		    channel_type != NL80211_CHAN_HT20 &&
+		    channel_type != NL80211_CHAN_HT40PLUS &&
+		    channel_type != NL80211_CHAN_HT40MINUS)
+			return -EINVAL;
+	}
+
+	freq = nla_get_u32(info->attrs[NL80211_ATTR_WIPHY_FREQ]);
+
+	mutex_lock(&rdev->devlist_mtx);
+	if (wdev) {
+		wdev_lock(wdev);
+		result = cfg80211_set_freq(rdev, wdev, freq, channel_type);
+		wdev_unlock(wdev);
+	} else {
+		result = cfg80211_set_freq(rdev, NULL, freq, channel_type);
+	}
+	mutex_unlock(&rdev->devlist_mtx);
+
+	return result;
+}
+
+static int nl80211_set_channel(struct sk_buff *skb, struct genl_info *info)
+{
+	struct cfg80211_registered_device *rdev;
+	struct net_device *netdev;
+	int result;
+
+	rtnl_lock();
+
+	result = get_rdev_dev_by_info_ifindex(info, &rdev, &netdev);
+	if (result)
+		goto unlock;
+
+	result = __nl80211_set_channel(rdev, netdev->ieee80211_ptr, info);
+
+ unlock:
+	rtnl_unlock();
+
+	return result;
+}
+
 static int nl80211_set_wiphy(struct sk_buff *skb, struct genl_info *info)
 {
 	struct cfg80211_registered_device *rdev;
-	int result = 0, rem_txq_params = 0;
+	struct net_device *netdev = NULL;
+	struct wireless_dev *wdev;
+	int result, rem_txq_params = 0;
 	struct nlattr *nl_txq_params;
 	u32 changed;
 	u8 retry_short = 0, retry_long = 0;
@@ -700,16 +781,50 @@ static int nl80211_set_wiphy(struct sk_buff *skb, struct genl_info *info)
 
 	rtnl_lock();
 
+	/*
+	 * Try to find the wiphy and netdev. Normally this
+	 * function shouldn't need the netdev, but this is
+	 * done for backward compatibility -- previously
+	 * setting the channel was done per wiphy, but now
+	 * it is per netdev. Previous userland like hostapd
+	 * also passed a netdev to set_wiphy, so that it is
+	 * possible to let that go to the right netdev!
+	 */
 	mutex_lock(&cfg80211_mutex);
 
-	rdev = __cfg80211_rdev_from_info(info);
-	if (IS_ERR(rdev)) {
-		mutex_unlock(&cfg80211_mutex);
-		result = PTR_ERR(rdev);
-		goto unlock;
+	if (info->attrs[NL80211_ATTR_IFINDEX]) {
+		int ifindex = nla_get_u32(info->attrs[NL80211_ATTR_IFINDEX]);
+
+		netdev = dev_get_by_index(genl_info_net(info), ifindex);
+		if (netdev && netdev->ieee80211_ptr) {
+			rdev = wiphy_to_dev(netdev->ieee80211_ptr->wiphy);
+			mutex_lock(&rdev->mtx);
+		} else
+			netdev = NULL;
 	}
 
-	mutex_lock(&rdev->mtx);
+	if (!netdev) {
+		rdev = __cfg80211_rdev_from_info(info);
+		if (IS_ERR(rdev)) {
+			mutex_unlock(&cfg80211_mutex);
+			result = PTR_ERR(rdev);
+			goto unlock;
+		}
+		wdev = NULL;
+		netdev = NULL;
+		result = 0;
+
+		mutex_lock(&rdev->mtx);
+	} else if (netif_running(netdev) &&
+		   nl80211_can_set_dev_channel(netdev->ieee80211_ptr))
+		wdev = netdev->ieee80211_ptr;
+	else
+		wdev = NULL;
+
+	/*
+	 * end workaround code, by now the rdev is available
+	 * and locked, and wdev may or may not be NULL.
+	 */
 
 	if (info->attrs[NL80211_ATTR_WIPHY_NAME])
 		result = cfg80211_dev_rename(
@@ -748,26 +863,7 @@ static int nl80211_set_wiphy(struct sk_buff *skb, struct genl_info *info)
 	}
 
 	if (info->attrs[NL80211_ATTR_WIPHY_FREQ]) {
-		enum nl80211_channel_type channel_type = NL80211_CHAN_NO_HT;
-		u32 freq;
-
-		result = -EINVAL;
-
-		if (info->attrs[NL80211_ATTR_WIPHY_CHANNEL_TYPE]) {
-			channel_type = nla_get_u32(info->attrs[
-					   NL80211_ATTR_WIPHY_CHANNEL_TYPE]);
-			if (channel_type != NL80211_CHAN_NO_HT &&
-			    channel_type != NL80211_CHAN_HT20 &&
-			    channel_type != NL80211_CHAN_HT40PLUS &&
-			    channel_type != NL80211_CHAN_HT40MINUS)
-				goto bad_res;
-		}
-
-		freq = nla_get_u32(info->attrs[NL80211_ATTR_WIPHY_FREQ]);
-
-		mutex_lock(&rdev->devlist_mtx);
-		result = rdev_set_freq(rdev, NULL, freq, channel_type);
-		mutex_unlock(&rdev->devlist_mtx);
+		result = __nl80211_set_channel(rdev, wdev, info);
 		if (result)
 			goto bad_res;
 	}
@@ -864,6 +960,8 @@ static int nl80211_set_wiphy(struct sk_buff *skb, struct genl_info *info)
 
  bad_res:
 	mutex_unlock(&rdev->mtx);
+	if (netdev)
+		dev_put(netdev);
  unlock:
 	rtnl_unlock();
 	return result;
@@ -3561,9 +3659,8 @@ static int nl80211_associate(struct sk_buff *skb, struct genl_info *info)
 {
 	struct cfg80211_registered_device *rdev;
 	struct net_device *dev;
-	struct wireless_dev *wdev;
 	struct cfg80211_crypto_settings crypto;
-	struct ieee80211_channel *chan, *fixedchan;
+	struct ieee80211_channel *chan;
 	const u8 *bssid, *ssid, *ie = NULL, *prev_bssid = NULL;
 	int err, ssid_len, ie_len = 0;
 	bool use_mfp = false;
@@ -3606,16 +3703,6 @@ static int nl80211_associate(struct sk_buff *skb, struct genl_info *info)
 		goto out;
 	}
 
-	mutex_lock(&rdev->devlist_mtx);
-	wdev = dev->ieee80211_ptr;
-	fixedchan = rdev_fixed_channel(rdev, wdev);
-	if (fixedchan && chan != fixedchan) {
-		err = -EBUSY;
-		mutex_unlock(&rdev->devlist_mtx);
-		goto out;
-	}
-	mutex_unlock(&rdev->devlist_mtx);
-
 	ssid = nla_data(info->attrs[NL80211_ATTR_SSID]);
 	ssid_len = nla_len(info->attrs[NL80211_ATTR_SSID]);
 
@@ -5185,6 +5272,12 @@ static struct genl_ops nl80211_ops[] = {
 		.policy = nl80211_policy,
 		.flags = GENL_ADMIN_PERM,
 	},
+	{
+		.cmd = NL80211_CMD_SET_CHANNEL,
+		.doit = nl80211_set_channel,
+		.policy = nl80211_policy,
+		.flags = GENL_ADMIN_PERM,
+	},
 };
 
 static struct genl_multicast_group nl80211_mlme_mcgrp = {
diff --git a/net/wireless/sme.c b/net/wireless/sme.c
index dcd7685242f7..14cf8163912a 100644
--- a/net/wireless/sme.c
+++ b/net/wireless/sme.c
@@ -740,7 +740,6 @@ int __cfg80211_connect(struct cfg80211_registered_device *rdev,
 		       const u8 *prev_bssid)
 {
 	struct wireless_dev *wdev = dev->ieee80211_ptr;
-	struct ieee80211_channel *chan;
 	struct cfg80211_bss *bss = NULL;
 	int err;
 
@@ -749,10 +748,6 @@ int __cfg80211_connect(struct cfg80211_registered_device *rdev,
 	if (wdev->sme_state != CFG80211_SME_IDLE)
 		return -EALREADY;
 
-	chan = rdev_fixed_channel(rdev, wdev);
-	if (chan && chan != connect->channel)
-		return -EBUSY;
-
 	if (WARN_ON(wdev->connect_keys)) {
 		kfree(wdev->connect_keys);
 		wdev->connect_keys = NULL;
diff --git a/net/wireless/wext-compat.c b/net/wireless/wext-compat.c
index 9ab51838849e..75848c6cb22a 100644
--- a/net/wireless/wext-compat.c
+++ b/net/wireless/wext-compat.c
@@ -781,16 +781,22 @@ int cfg80211_wext_siwfreq(struct net_device *dev,
 		return cfg80211_mgd_wext_siwfreq(dev, info, wextfreq, extra);
 	case NL80211_IFTYPE_ADHOC:
 		return cfg80211_ibss_wext_siwfreq(dev, info, wextfreq, extra);
-	default:
+	case NL80211_IFTYPE_MONITOR:
+	case NL80211_IFTYPE_WDS:
+	case NL80211_IFTYPE_MESH_POINT:
 		freq = cfg80211_wext_freq(wdev->wiphy, wextfreq);
 		if (freq < 0)
 			return freq;
 		if (freq == 0)
 			return -EINVAL;
+		wdev_lock(wdev);
 		mutex_lock(&rdev->devlist_mtx);
-		err = rdev_set_freq(rdev, NULL, freq, NL80211_CHAN_NO_HT);
+		err = cfg80211_set_freq(rdev, wdev, freq, NL80211_CHAN_NO_HT);
 		mutex_unlock(&rdev->devlist_mtx);
+		wdev_unlock(wdev);
 		return err;
+	default:
+		return -EOPNOTSUPP;
 	}
 }
 EXPORT_SYMBOL_GPL(cfg80211_wext_siwfreq);
@@ -800,7 +806,6 @@ int cfg80211_wext_giwfreq(struct net_device *dev,
 			  struct iw_freq *freq, char *extra)
 {
 	struct wireless_dev *wdev = dev->ieee80211_ptr;
-	struct cfg80211_registered_device *rdev = wiphy_to_dev(wdev->wiphy);
 
 	switch (wdev->iftype) {
 	case NL80211_IFTYPE_STATION:
@@ -808,9 +813,9 @@ int cfg80211_wext_giwfreq(struct net_device *dev,
 	case NL80211_IFTYPE_ADHOC:
 		return cfg80211_ibss_wext_giwfreq(dev, info, freq, extra);
 	default:
-		if (!rdev->channel)
+		if (!wdev->channel)
 			return -EINVAL;
-		freq->m = rdev->channel->center_freq;
+		freq->m = wdev->channel->center_freq;
 		freq->e = 6;
 		return 0;
 	}
diff --git a/net/wireless/wext-sme.c b/net/wireless/wext-sme.c
index 5615a8802536..8e5ab4f4e9c4 100644
--- a/net/wireless/wext-sme.c
+++ b/net/wireless/wext-sme.c
@@ -107,7 +107,7 @@ int cfg80211_mgd_wext_siwfreq(struct net_device *dev,
 
 	/* SSID is not set, we just want to switch channel */
 	if (chan && !wdev->wext.connect.ssid_len) {
-		err = rdev_set_freq(rdev, wdev, freq, NL80211_CHAN_NO_HT);
+		err = cfg80211_set_freq(rdev, wdev, freq, NL80211_CHAN_NO_HT);
 		goto out;
 	}
 
-- 
cgit v1.2.2


From 0aaffa9b9699894aab3266195a529baf9f96ac29 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes@sipsolutions.net>
Date: Wed, 5 May 2010 15:28:27 +0200
Subject: mac80211: improve HT channel handling

Currently, when one interface switches HT mode,
all others will follow along. This is clearly
undesirable, since the new one might switch to
no-HT while another one is operating in HT.

Address this issue by keeping track of the HT
mode per interface, and allowing only changes
that are compatible, i.e. switching into HT40+
is not possible when another interface is in
HT40-, in that case the second one needs to
fall back to HT20.

Also, to allow drivers to know what's going on,
store the per-interface HT mode (channel type)
in the virtual interface's bss_conf.

Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/cfg.c         | 23 +++++++++++----
 net/mac80211/chan.c        | 70 ++++++++++++++++++++++++++++++++++++++++++++++
 net/mac80211/ibss.c        |  5 ++--
 net/mac80211/ieee80211_i.h |  5 +++-
 net/mac80211/main.c        |  2 +-
 net/mac80211/mlme.c        | 44 +++++++++++++++--------------
 6 files changed, 118 insertions(+), 31 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c
index 414b7dd7d7fd..ab166c6d9399 100644
--- a/net/mac80211/cfg.c
+++ b/net/mac80211/cfg.c
@@ -1166,23 +1166,34 @@ static int ieee80211_set_channel(struct wiphy *wiphy,
 				 enum nl80211_channel_type channel_type)
 {
 	struct ieee80211_local *local = wiphy_priv(wiphy);
+	struct ieee80211_sub_if_data *sdata = NULL;
+
+	if (netdev)
+		sdata = IEEE80211_DEV_TO_SUB_IF(netdev);
 
 	switch (ieee80211_get_channel_mode(local, NULL)) {
 	case CHAN_MODE_HOPPING:
 		return -EBUSY;
 	case CHAN_MODE_FIXED:
-		if (local->oper_channel == chan &&
-		    local->oper_channel_type == channel_type)
+		if (local->oper_channel != chan)
+			return -EBUSY;
+		if (!sdata && local->_oper_channel_type == channel_type)
 			return 0;
-		return -EBUSY;
+		break;
 	case CHAN_MODE_UNDEFINED:
 		break;
 	}
 
 	local->oper_channel = chan;
-	local->oper_channel_type = channel_type;
 
-	return ieee80211_hw_config(local, IEEE80211_CONF_CHANGE_CHANNEL);
+	if (!ieee80211_set_channel_type(local, sdata, channel_type))
+		return -EBUSY;
+
+	ieee80211_hw_config(local, IEEE80211_CONF_CHANGE_CHANNEL);
+	if (sdata && sdata->vif.type != NL80211_IFTYPE_MONITOR)
+		ieee80211_bss_info_change_notify(sdata, BSS_CHANGED_HT);
+
+	return 0;
 }
 
 #ifdef CONFIG_PM
@@ -1406,7 +1417,7 @@ int __ieee80211_request_smps(struct ieee80211_sub_if_data *sdata,
 	 * association, there's no need to send an action frame.
 	 */
 	if (!sdata->u.mgd.associated ||
-	    sdata->local->oper_channel_type == NL80211_CHAN_NO_HT) {
+	    sdata->vif.bss_conf.channel_type == NL80211_CHAN_NO_HT) {
 		mutex_lock(&sdata->local->iflist_mtx);
 		ieee80211_recalc_smps(sdata->local, sdata);
 		mutex_unlock(&sdata->local->iflist_mtx);
diff --git a/net/mac80211/chan.c b/net/mac80211/chan.c
index 08f3832661a5..5d218c530a4e 100644
--- a/net/mac80211/chan.c
+++ b/net/mac80211/chan.c
@@ -2,6 +2,7 @@
  * mac80211 - channel management
  */
 
+#include <linux/nl80211.h>
 #include "ieee80211_i.h"
 
 enum ieee80211_chan_mode
@@ -55,3 +56,72 @@ ieee80211_get_channel_mode(struct ieee80211_local *local,
 
 	return mode;
 }
+
+bool ieee80211_set_channel_type(struct ieee80211_local *local,
+				struct ieee80211_sub_if_data *sdata,
+				enum nl80211_channel_type chantype)
+{
+	struct ieee80211_sub_if_data *tmp;
+	enum nl80211_channel_type superchan = NL80211_CHAN_NO_HT;
+	bool result;
+
+	mutex_lock(&local->iflist_mtx);
+
+	list_for_each_entry(tmp, &local->interfaces, list) {
+		if (tmp == sdata)
+			continue;
+
+		if (!ieee80211_sdata_running(tmp))
+			continue;
+
+		switch (tmp->vif.bss_conf.channel_type) {
+		case NL80211_CHAN_NO_HT:
+		case NL80211_CHAN_HT20:
+			superchan = tmp->vif.bss_conf.channel_type;
+			break;
+		case NL80211_CHAN_HT40PLUS:
+			WARN_ON(superchan == NL80211_CHAN_HT40MINUS);
+			superchan = NL80211_CHAN_HT40PLUS;
+			break;
+		case NL80211_CHAN_HT40MINUS:
+			WARN_ON(superchan == NL80211_CHAN_HT40PLUS);
+			superchan = NL80211_CHAN_HT40MINUS;
+			break;
+		}
+	}
+
+	switch (superchan) {
+	case NL80211_CHAN_NO_HT:
+	case NL80211_CHAN_HT20:
+		/*
+		 * allow any change that doesn't go to no-HT
+		 * (if it already is no-HT no change is needed)
+		 */
+		if (chantype == NL80211_CHAN_NO_HT)
+			break;
+		superchan = chantype;
+		break;
+	case NL80211_CHAN_HT40PLUS:
+	case NL80211_CHAN_HT40MINUS:
+		/* allow smaller bandwidth and same */
+		if (chantype == NL80211_CHAN_NO_HT)
+			break;
+		if (chantype == NL80211_CHAN_HT20)
+			break;
+		if (superchan == chantype)
+			break;
+		result = false;
+		goto out;
+	}
+
+	local->_oper_channel_type = superchan;
+
+	if (sdata)
+		sdata->vif.bss_conf.channel_type = chantype;
+
+	result = true;
+ out:
+	mutex_unlock(&local->iflist_mtx);
+
+	return result;
+}
diff --git a/net/mac80211/ibss.c b/net/mac80211/ibss.c
index d5855ae387e8..36745f494f63 100644
--- a/net/mac80211/ibss.c
+++ b/net/mac80211/ibss.c
@@ -102,7 +102,7 @@ static void __ieee80211_sta_join_ibss(struct ieee80211_sub_if_data *sdata,
 	sdata->drop_unencrypted = capability & WLAN_CAPABILITY_PRIVACY ? 1 : 0;
 
 	local->oper_channel = chan;
-	local->oper_channel_type = NL80211_CHAN_NO_HT;
+	WARN_ON(!ieee80211_set_channel_type(local, sdata, NL80211_CHAN_NO_HT));
 	ieee80211_hw_config(local, IEEE80211_CONF_CHANGE_CHANNEL);
 
 	sband = local->hw.wiphy->bands[chan->band];
@@ -910,7 +910,8 @@ int ieee80211_ibss_join(struct ieee80211_sub_if_data *sdata,
 	/* fix ourselves to that channel now already */
 	if (params->channel_fixed) {
 		sdata->local->oper_channel = params->channel;
-		sdata->local->oper_channel_type = NL80211_CHAN_NO_HT;
+		WARN_ON(!ieee80211_set_channel_type(sdata->local, sdata,
+						    NL80211_CHAN_NO_HT));
 	}
 
 	if (params->ie) {
diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index 359edff31471..69e7f4131f46 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -768,7 +768,7 @@ struct ieee80211_local {
 	enum mac80211_scan_state next_scan_state;
 	struct delayed_work scan_work;
 	struct ieee80211_sub_if_data *scan_sdata;
-	enum nl80211_channel_type oper_channel_type;
+	enum nl80211_channel_type _oper_channel_type;
 	struct ieee80211_channel *oper_channel, *csa_channel;
 
 	/* Temporary remain-on-channel for off-channel operations */
@@ -1239,6 +1239,9 @@ enum ieee80211_chan_mode {
 enum ieee80211_chan_mode
 ieee80211_get_channel_mode(struct ieee80211_local *local,
 			   struct ieee80211_sub_if_data *ignore);
+bool ieee80211_set_channel_type(struct ieee80211_local *local,
+				struct ieee80211_sub_if_data *sdata,
+				enum nl80211_channel_type chantype);
 
 #ifdef CONFIG_MAC80211_NOINLINE
 #define debug_noinline noinline
diff --git a/net/mac80211/main.c b/net/mac80211/main.c
index 353b6b42d9c5..d763d76e809f 100644
--- a/net/mac80211/main.c
+++ b/net/mac80211/main.c
@@ -111,7 +111,7 @@ int ieee80211_hw_config(struct ieee80211_local *local, u32 changed)
 		channel_type = local->tmp_channel_type;
 	} else {
 		chan = local->oper_channel;
-		channel_type = local->oper_channel_type;
+		channel_type = local->_oper_channel_type;
 	}
 
 	if (chan != local->hw.conf.channel ||
diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index 17cb8ae912bc..6e149b49d4f0 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -136,11 +136,14 @@ static u32 ieee80211_enable_ht(struct ieee80211_sub_if_data *sdata,
 	struct sta_info *sta;
 	u32 changed = 0;
 	u16 ht_opmode;
-	bool enable_ht = true, ht_changed;
+	bool enable_ht = true;
+	enum nl80211_channel_type prev_chantype;
 	enum nl80211_channel_type channel_type = NL80211_CHAN_NO_HT;
 
 	sband = local->hw.wiphy->bands[local->hw.conf.channel->band];
 
+	prev_chantype = sdata->vif.bss_conf.channel_type;
+
 	/* HT is not supported */
 	if (!sband->ht_cap.ht_supported)
 		enable_ht = false;
@@ -171,38 +174,37 @@ static u32 ieee80211_enable_ht(struct ieee80211_sub_if_data *sdata,
 		}
 	}
 
-	ht_changed = conf_is_ht(&local->hw.conf) != enable_ht ||
-		     channel_type != local->hw.conf.channel_type;
-
 	if (local->tmp_channel)
 		local->tmp_channel_type = channel_type;
-	local->oper_channel_type = channel_type;
 
-	if (ht_changed) {
-                /* channel_type change automatically detected */
-		ieee80211_hw_config(local, 0);
+	if (!ieee80211_set_channel_type(local, sdata, channel_type)) {
+		/* can only fail due to HT40+/- mismatch */
+		channel_type = NL80211_CHAN_HT20;
+		WARN_ON(!ieee80211_set_channel_type(local, sdata, channel_type));
+	}
 
+	/* channel_type change automatically detected */
+	ieee80211_hw_config(local, 0);
+
+	if (prev_chantype != channel_type) {
 		rcu_read_lock();
 		sta = sta_info_get(sdata, bssid);
 		if (sta)
 			rate_control_rate_update(local, sband, sta,
 						 IEEE80211_RC_HT_CHANGED,
-						 local->oper_channel_type);
+						 channel_type);
 		rcu_read_unlock();
-        }
-
-	/* disable HT */
-	if (!enable_ht)
-		return 0;
+	}
 
 	ht_opmode = le16_to_cpu(hti->operation_mode);
 
 	/* if bss configuration changed store the new one */
-	if (!sdata->ht_opmode_valid ||
-	    sdata->vif.bss_conf.ht_operation_mode != ht_opmode) {
+	if (sdata->ht_opmode_valid != enable_ht ||
+	    sdata->vif.bss_conf.ht_operation_mode != ht_opmode ||
+	    prev_chantype != channel_type) {
 		changed |= BSS_CHANGED_HT;
 		sdata->vif.bss_conf.ht_operation_mode = ht_opmode;
-		sdata->ht_opmode_valid = true;
+		sdata->ht_opmode_valid = enable_ht;
 	}
 
 	return changed;
@@ -865,7 +867,7 @@ static void ieee80211_set_disassoc(struct ieee80211_sub_if_data *sdata,
 	ieee80211_set_wmm_default(sdata);
 
 	/* channel(_type) changes are handled by ieee80211_hw_config */
-	local->oper_channel_type = NL80211_CHAN_NO_HT;
+	WARN_ON(!ieee80211_set_channel_type(local, sdata, NL80211_CHAN_NO_HT));
 
 	/* on the next assoc, re-program HT parameters */
 	sdata->ht_opmode_valid = false;
@@ -882,8 +884,8 @@ static void ieee80211_set_disassoc(struct ieee80211_sub_if_data *sdata,
 
 	ieee80211_hw_config(local, config_changed);
 
-	/* And the BSSID changed -- not very interesting here */
-	changed |= BSS_CHANGED_BSSID;
+	/* The BSSID (not really interesting) and HT changed */
+	changed |= BSS_CHANGED_BSSID | BSS_CHANGED_HT;
 	ieee80211_bss_info_change_notify(sdata, changed);
 
 	if (remove_sta)
@@ -2265,7 +2267,7 @@ int ieee80211_mgd_action(struct ieee80211_sub_if_data *sdata,
 	if ((chan != local->tmp_channel ||
 	     channel_type != local->tmp_channel_type) &&
 	    (chan != local->oper_channel ||
-	     channel_type != local->oper_channel_type))
+	     channel_type != local->_oper_channel_type))
 		return -EBUSY;
 
 	skb = dev_alloc_skb(local->hw.extra_tx_headroom + len);
-- 
cgit v1.2.2


From a472e71b3c71619087d2485282955c3b62ebfde9 Mon Sep 17 00:00:00 2001
From: "John W. Linville" <linville@tuxdriver.com>
Date: Thu, 6 May 2010 14:45:17 -0400
Subject: mac80211: set IEEE80211_TX_CTL_FIRST_FRAGMENT for beacons

Also simplify the flags assignment into a single statement at the
end of ieee80211_beacon_get_tim.

Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/tx.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
index f3841f43249e..680bcb7093db 100644
--- a/net/mac80211/tx.c
+++ b/net/mac80211/tx.c
@@ -2251,8 +2251,9 @@ struct sk_buff *ieee80211_beacon_get_tim(struct ieee80211_hw *hw,
 
 	info->control.vif = vif;
 
-	info->flags |= IEEE80211_TX_CTL_CLEAR_PS_FILT;
-	info->flags |= IEEE80211_TX_CTL_ASSIGN_SEQ;
+	info->flags |= IEEE80211_TX_CTL_CLEAR_PS_FILT |
+			IEEE80211_TX_CTL_ASSIGN_SEQ |
+			IEEE80211_TX_CTL_FIRST_FRAGMENT;
  out:
 	rcu_read_unlock();
 	return skb;
-- 
cgit v1.2.2


From 3ee943728fff536edaf8f59faa58aaa1aa7366e3 Mon Sep 17 00:00:00 2001
From: Neil Horman <nhorman@tuxdriver.com>
Date: Sat, 8 May 2010 01:57:52 -0700
Subject: ipv4: remove ip_rt_secret timer (v4)

A while back there was a discussion regarding the rt_secret_interval timer.
Given that we've had the ability to do emergency route cache rebuilds for awhile
now, based on a statistical analysis of the various hash chain lengths in the
cache, the use of the flush timer is somewhat redundant.  This patch removes the
rt_secret_interval sysctl, allowing us to rely solely on the statistical
analysis mechanism to determine the need for route cache flushes.

Signed-off-by: Neil Horman <nhorman@tuxdriver.com>
Acked-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/route.c | 108 +++++--------------------------------------------------
 1 file changed, 8 insertions(+), 100 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index a947428ef0ae..dea3f9264250 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -129,7 +129,6 @@ static int ip_rt_gc_elasticity __read_mostly	= 8;
 static int ip_rt_mtu_expires __read_mostly	= 10 * 60 * HZ;
 static int ip_rt_min_pmtu __read_mostly		= 512 + 20 + 20;
 static int ip_rt_min_advmss __read_mostly	= 256;
-static int ip_rt_secret_interval __read_mostly	= 10 * 60 * HZ;
 static int rt_chain_length_max __read_mostly	= 20;
 
 static struct delayed_work expires_work;
@@ -918,32 +917,11 @@ void rt_cache_flush_batch(void)
 	rt_do_flush(!in_softirq());
 }
 
-/*
- * We change rt_genid and let gc do the cleanup
- */
-static void rt_secret_rebuild(unsigned long __net)
-{
-	struct net *net = (struct net *)__net;
-	rt_cache_invalidate(net);
-	mod_timer(&net->ipv4.rt_secret_timer, jiffies + ip_rt_secret_interval);
-}
-
-static void rt_secret_rebuild_oneshot(struct net *net)
-{
-	del_timer_sync(&net->ipv4.rt_secret_timer);
-	rt_cache_invalidate(net);
-	if (ip_rt_secret_interval)
-		mod_timer(&net->ipv4.rt_secret_timer, jiffies + ip_rt_secret_interval);
-}
-
 static void rt_emergency_hash_rebuild(struct net *net)
 {
-	if (net_ratelimit()) {
+	if (net_ratelimit())
 		printk(KERN_WARNING "Route hash chain too long!\n");
-		printk(KERN_WARNING "Adjust your secret_interval!\n");
-	}
-
-	rt_secret_rebuild_oneshot(net);
+	rt_cache_invalidate(net);
 }
 
 /*
@@ -3101,48 +3079,6 @@ static int ipv4_sysctl_rtcache_flush(ctl_table *__ctl, int write,
 	return -EINVAL;
 }
 
-static void rt_secret_reschedule(int old)
-{
-	struct net *net;
-	int new = ip_rt_secret_interval;
-	int diff = new - old;
-
-	if (!diff)
-		return;
-
-	rtnl_lock();
-	for_each_net(net) {
-		int deleted = del_timer_sync(&net->ipv4.rt_secret_timer);
-		long time;
-
-		if (!new)
-			continue;
-
-		if (deleted) {
-			time = net->ipv4.rt_secret_timer.expires - jiffies;
-
-			if (time <= 0 || (time += diff) <= 0)
-				time = 0;
-		} else
-			time = new;
-
-		mod_timer(&net->ipv4.rt_secret_timer, jiffies + time);
-	}
-	rtnl_unlock();
-}
-
-static int ipv4_sysctl_rt_secret_interval(ctl_table *ctl, int write,
-					  void __user *buffer, size_t *lenp,
-					  loff_t *ppos)
-{
-	int old = ip_rt_secret_interval;
-	int ret = proc_dointvec_jiffies(ctl, write, buffer, lenp, ppos);
-
-	rt_secret_reschedule(old);
-
-	return ret;
-}
-
 static ctl_table ipv4_route_table[] = {
 	{
 		.procname	= "gc_thresh",
@@ -3251,13 +3187,6 @@ static ctl_table ipv4_route_table[] = {
 		.mode		= 0644,
 		.proc_handler	= proc_dointvec,
 	},
-	{
-		.procname	= "secret_interval",
-		.data		= &ip_rt_secret_interval,
-		.maxlen		= sizeof(int),
-		.mode		= 0644,
-		.proc_handler	= ipv4_sysctl_rt_secret_interval,
-	},
 	{ }
 };
 
@@ -3336,34 +3265,15 @@ static __net_initdata struct pernet_operations sysctl_route_ops = {
 };
 #endif
 
-
-static __net_init int rt_secret_timer_init(struct net *net)
+static __net_init int rt_genid_init(struct net *net)
 {
-	atomic_set(&net->ipv4.rt_genid,
-			(int) ((num_physpages ^ (num_physpages>>8)) ^
-			(jiffies ^ (jiffies >> 7))));
-
-	net->ipv4.rt_secret_timer.function = rt_secret_rebuild;
-	net->ipv4.rt_secret_timer.data = (unsigned long)net;
-	init_timer_deferrable(&net->ipv4.rt_secret_timer);
-
-	if (ip_rt_secret_interval) {
-		net->ipv4.rt_secret_timer.expires =
-			jiffies + net_random() % ip_rt_secret_interval +
-			ip_rt_secret_interval;
-		add_timer(&net->ipv4.rt_secret_timer);
-	}
+	get_random_bytes(&net->ipv4.rt_genid,
+			 sizeof(net->ipv4.rt_genid));
 	return 0;
 }
 
-static __net_exit void rt_secret_timer_exit(struct net *net)
-{
-	del_timer_sync(&net->ipv4.rt_secret_timer);
-}
-
-static __net_initdata struct pernet_operations rt_secret_timer_ops = {
-	.init = rt_secret_timer_init,
-	.exit = rt_secret_timer_exit,
+static __net_initdata struct pernet_operations rt_genid_ops = {
+	.init = rt_genid_init,
 };
 
 
@@ -3424,9 +3334,6 @@ int __init ip_rt_init(void)
 	schedule_delayed_work(&expires_work,
 		net_random() % ip_rt_gc_interval + ip_rt_gc_interval);
 
-	if (register_pernet_subsys(&rt_secret_timer_ops))
-		printk(KERN_ERR "Unable to setup rt_secret_timer\n");
-
 	if (ip_rt_proc_init())
 		printk(KERN_ERR "Unable to create route proc files\n");
 #ifdef CONFIG_XFRM
@@ -3438,6 +3345,7 @@ int __init ip_rt_init(void)
 #ifdef CONFIG_SYSCTL
 	register_pernet_subsys(&sysctl_route_ops);
 #endif
+	register_pernet_subsys(&rt_genid_ops);
 	return rc;
 }
 
-- 
cgit v1.2.2


From 0d861d8b8edd139a9b291cb262d08dec8dc3922d Mon Sep 17 00:00:00 2001
From: "Gustavo F. Padovan" <padovan@profusion.mobi>
Date: Sat, 1 May 2010 16:15:35 -0300
Subject: Bluetooth: Make hci_send_sco() void
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

It also removes an unneeded check for the MTU. The check is done before
on sco_send_frame()

Signed-off-by: Gustavo F. Padovan <padovan@profusion.mobi>
Reviewed-by: João Paulo Rechi Vita <jprvita@profusion.mobi>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 net/bluetooth/hci_core.c | 9 +--------
 net/bluetooth/sco.c      | 3 +--
 2 files changed, 2 insertions(+), 10 deletions(-)

(limited to 'net')

diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c
index 4ad23192c7a5..1c9aef97f519 100644
--- a/net/bluetooth/hci_core.c
+++ b/net/bluetooth/hci_core.c
@@ -1308,18 +1308,13 @@ int hci_send_acl(struct hci_conn *conn, struct sk_buff *skb, __u16 flags)
 EXPORT_SYMBOL(hci_send_acl);
 
 /* Send SCO data */
-int hci_send_sco(struct hci_conn *conn, struct sk_buff *skb)
+void hci_send_sco(struct hci_conn *conn, struct sk_buff *skb)
 {
 	struct hci_dev *hdev = conn->hdev;
 	struct hci_sco_hdr hdr;
 
 	BT_DBG("%s len %d", hdev->name, skb->len);
 
-	if (skb->len > hdev->sco_mtu) {
-		kfree_skb(skb);
-		return -EINVAL;
-	}
-
 	hdr.handle = cpu_to_le16(conn->handle);
 	hdr.dlen   = skb->len;
 
@@ -1332,8 +1327,6 @@ int hci_send_sco(struct hci_conn *conn, struct sk_buff *skb)
 
 	skb_queue_tail(&conn->data_q, skb);
 	tasklet_schedule(&hdev->tx_task);
-
-	return 0;
 }
 EXPORT_SYMBOL(hci_send_sco);
 
diff --git a/net/bluetooth/sco.c b/net/bluetooth/sco.c
index b406d3eff53a..541b26efb724 100644
--- a/net/bluetooth/sco.c
+++ b/net/bluetooth/sco.c
@@ -249,8 +249,7 @@ static inline int sco_send_frame(struct sock *sk, struct msghdr *msg, int len)
 		goto fail;
 	}
 
-	if ((err = hci_send_sco(conn->hcon, skb)) < 0)
-		return err;
+	hci_send_sco(conn->hcon, skb);
 
 	return count;
 
-- 
cgit v1.2.2


From b9dbdbc1f4404cba2e64939c30c87d59c9796e4e Mon Sep 17 00:00:00 2001
From: "Gustavo F. Padovan" <padovan@profusion.mobi>
Date: Sat, 1 May 2010 16:15:35 -0300
Subject: Bluetooth: Trivial clean ups to SCO

Remove extra braces and labels, break over column 80 lines, etc

Signed-off-by: Gustavo F. Padovan <padovan@profusion.mobi>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 net/bluetooth/sco.c | 23 +++++++++++------------
 1 file changed, 11 insertions(+), 12 deletions(-)

(limited to 'net')

diff --git a/net/bluetooth/sco.c b/net/bluetooth/sco.c
index 541b26efb724..4767928a93d3 100644
--- a/net/bluetooth/sco.c
+++ b/net/bluetooth/sco.c
@@ -165,11 +165,11 @@ static inline int sco_chan_add(struct sco_conn *conn, struct sock *sk, struct so
 	int err = 0;
 
 	sco_conn_lock(conn);
-	if (conn->sk) {
+	if (conn->sk)
 		err = -EBUSY;
-	} else {
+	else
 		__sco_chan_add(conn, sk, parent);
-	}
+
 	sco_conn_unlock(conn);
 	return err;
 }
@@ -241,21 +241,19 @@ static inline int sco_send_frame(struct sock *sk, struct msghdr *msg, int len)
 	BT_DBG("sk %p len %d", sk, len);
 
 	count = min_t(unsigned int, conn->mtu, len);
-	if (!(skb = bt_skb_send_alloc(sk, count, msg->msg_flags & MSG_DONTWAIT, &err)))
+	skb = bt_skb_send_alloc(sk, count,
+			msg->msg_flags & MSG_DONTWAIT, &err);
+	if (!skb)
 		return err;
 
 	if (memcpy_fromiovec(skb_put(skb, count), msg->msg_iov, count)) {
-		err = -EFAULT;
-		goto fail;
+		kfree_skb(skb);
+		return -EFAULT;
 	}
 
 	hci_send_sco(conn->hcon, skb);
 
 	return count;
-
-fail:
-	kfree_skb(skb);
-	return err;
 }
 
 static inline void sco_recv_frame(struct sco_conn *conn, struct sk_buff *skb)
@@ -625,7 +623,7 @@ static int sco_sock_sendmsg(struct kiocb *iocb, struct socket *sock,
 			    struct msghdr *msg, size_t len)
 {
 	struct sock *sk = sock->sk;
-	int err = 0;
+	int err;
 
 	BT_DBG("sock %p, sk %p", sock, sk);
 
@@ -850,7 +848,8 @@ static void sco_conn_ready(struct sco_conn *conn)
 
 		bh_lock_sock(parent);
 
-		sk = sco_sock_alloc(sock_net(parent), NULL, BTPROTO_SCO, GFP_ATOMIC);
+		sk = sco_sock_alloc(sock_net(parent), NULL,
+				BTPROTO_SCO, GFP_ATOMIC);
 		if (!sk) {
 			bh_unlock_sock(parent);
 			goto done;
-- 
cgit v1.2.2


From c69163e9ed5048407cc84f439cbfecc53f6f7131 Mon Sep 17 00:00:00 2001
From: "Gustavo F. Padovan" <padovan@profusion.mobi>
Date: Sat, 1 May 2010 16:15:35 -0300
Subject: Bluetooth: Move specific Basic Mode code to the right place
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Inside "case L2CAP_MODE_BASIC:" we don't need to check for sk_type and
L2CAP mode. So only the length check is fine.

Signed-off-by: Gustavo F. Padovan <padovan@profusion.mobi>
Reviewed-by: João Paulo Rechi Vita <jprvita@profusion.mobi>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 net/bluetooth/l2cap.c | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

(limited to 'net')

diff --git a/net/bluetooth/l2cap.c b/net/bluetooth/l2cap.c
index 864c76f4a678..c9a848d3ef94 100644
--- a/net/bluetooth/l2cap.c
+++ b/net/bluetooth/l2cap.c
@@ -1611,11 +1611,6 @@ static int l2cap_sock_sendmsg(struct kiocb *iocb, struct socket *sock, struct ms
 	if (msg->msg_flags & MSG_OOB)
 		return -EOPNOTSUPP;
 
-	/* Check outgoing MTU */
-	if (sk->sk_type == SOCK_SEQPACKET && pi->mode == L2CAP_MODE_BASIC &&
-	    len > pi->omtu)
-		return -EINVAL;
-
 	lock_sock(sk);
 
 	if (sk->sk_state != BT_CONNECTED) {
@@ -1635,6 +1630,12 @@ static int l2cap_sock_sendmsg(struct kiocb *iocb, struct socket *sock, struct ms
 
 	switch (pi->mode) {
 	case L2CAP_MODE_BASIC:
+		/* Check outgoing MTU */
+		if (len > pi->omtu) {
+			err = -EINVAL;
+			goto done;
+		}
+
 		/* Create a basic PDU */
 		skb = l2cap_create_basic_pdu(sk, msg, len);
 		if (IS_ERR(skb)) {
-- 
cgit v1.2.2


From faaebd192ec9c3febcab98149d1309199a5b886c Mon Sep 17 00:00:00 2001
From: "Gustavo F. Padovan" <padovan@profusion.mobi>
Date: Sat, 1 May 2010 16:15:35 -0300
Subject: Bluetooth: Fix memory leak of S-frames into L2CAP
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

l2cap_data_channel do not free the S-frame, so we free it here.

Signed-off-by: Gustavo F. Padovan <padovan@profusion.mobi>
Reviewed-by: João Paulo Rechi Vita <jprvita@profusion.mobi>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 net/bluetooth/l2cap.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'net')

diff --git a/net/bluetooth/l2cap.c b/net/bluetooth/l2cap.c
index c9a848d3ef94..46f22640a337 100644
--- a/net/bluetooth/l2cap.c
+++ b/net/bluetooth/l2cap.c
@@ -3522,6 +3522,7 @@ static inline int l2cap_data_channel_sframe(struct sock *sk, u16 rx_control, str
 		break;
 	}
 
+	kfree_skb(skb);
 	return 0;
 }
 
-- 
cgit v1.2.2


From 7dffe4210233a2860c3f41477c40b3252edf2b7d Mon Sep 17 00:00:00 2001
From: "Gustavo F. Padovan" <padovan@profusion.mobi>
Date: Sat, 1 May 2010 16:15:36 -0300
Subject: Bluetooth: Fix expected_tx_seq calculation on L2CAP
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

All operation related to the txWindow should be modulo 64.

Signed-off-by: Gustavo F. Padovan <padovan@profusion.mobi>
Reviewed-by: João Paulo Rechi Vita <jprvita@profusion.mobi>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 net/bluetooth/l2cap.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/bluetooth/l2cap.c b/net/bluetooth/l2cap.c
index 46f22640a337..401011a53c73 100644
--- a/net/bluetooth/l2cap.c
+++ b/net/bluetooth/l2cap.c
@@ -3611,7 +3611,7 @@ static inline int l2cap_data_channel(struct l2cap_conn *conn, u16 cid, struct sk
 		if (pi->expected_tx_seq == tx_seq)
 			pi->expected_tx_seq = (pi->expected_tx_seq + 1) % 64;
 		else
-			pi->expected_tx_seq = tx_seq + 1;
+			pi->expected_tx_seq = (tx_seq + 1) % 64;
 
 		l2cap_sar_reassembly_sdu(sk, skb, control);
 
-- 
cgit v1.2.2


From d1daa091e8612f3aab14d28b5836375fafe155e1 Mon Sep 17 00:00:00 2001
From: "Gustavo F. Padovan" <padovan@profusion.mobi>
Date: Sat, 1 May 2010 16:15:36 -0300
Subject: Bluetooth: Fix ACL MTU issue
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

ERTM and Streaming Modes was having problems when the ACL MTU is lower
than MPS. The 'minus 10' is to take in account the header and fcs
lenghts.

Signed-off-by: Gustavo F. Padovan <padovan@profusion.mobi>
Reviewed-by: João Paulo Rechi Vita <jprvita@profusion.mobi>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 net/bluetooth/l2cap.c | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'net')

diff --git a/net/bluetooth/l2cap.c b/net/bluetooth/l2cap.c
index 401011a53c73..99cf1772b481 100644
--- a/net/bluetooth/l2cap.c
+++ b/net/bluetooth/l2cap.c
@@ -2267,6 +2267,8 @@ done:
 		rfc.retrans_timeout = 0;
 		rfc.monitor_timeout = 0;
 		rfc.max_pdu_size    = cpu_to_le16(L2CAP_DEFAULT_MAX_PDU_SIZE);
+		if (L2CAP_DEFAULT_MAX_PDU_SIZE > pi->conn->mtu - 10)
+			rfc.max_pdu_size = pi->conn->mtu - 10;
 
 		l2cap_add_conf_opt(&ptr, L2CAP_CONF_RFC,
 					sizeof(rfc), (unsigned long) &rfc);
@@ -2288,6 +2290,8 @@ done:
 		rfc.retrans_timeout = 0;
 		rfc.monitor_timeout = 0;
 		rfc.max_pdu_size    = cpu_to_le16(L2CAP_DEFAULT_MAX_PDU_SIZE);
+		if (L2CAP_DEFAULT_MAX_PDU_SIZE > pi->conn->mtu - 10)
+			rfc.max_pdu_size = pi->conn->mtu - 10;
 
 		l2cap_add_conf_opt(&ptr, L2CAP_CONF_RFC,
 					sizeof(rfc), (unsigned long) &rfc);
-- 
cgit v1.2.2


From e8235c6bdd1c7ffbaa7eb8dcdbb46c51f1e5d72e Mon Sep 17 00:00:00 2001
From: "Gustavo F. Padovan" <padovan@profusion.mobi>
Date: Sat, 1 May 2010 16:15:36 -0300
Subject: Bluetooth: Use a l2cap_pinfo struct instead l2cap_pi() macro
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Trivial clean up.

Signed-off-by: Gustavo F. Padovan <padovan@profusion.mobi>
Reviewed-by: João Paulo Rechi Vita <jprvita@profusion.mobi>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 net/bluetooth/l2cap.c | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

(limited to 'net')

diff --git a/net/bluetooth/l2cap.c b/net/bluetooth/l2cap.c
index 99cf1772b481..a9c152a09f0b 100644
--- a/net/bluetooth/l2cap.c
+++ b/net/bluetooth/l2cap.c
@@ -1291,7 +1291,7 @@ static int l2cap_streaming_send(struct sock *sk)
 		control |= pi->next_tx_seq << L2CAP_CTRL_TXSEQ_SHIFT;
 		put_unaligned_le16(control, tx_skb->data + L2CAP_HDR_SIZE);
 
-		if (l2cap_pi(sk)->fcs == L2CAP_FCS_CRC16) {
+		if (pi->fcs == L2CAP_FCS_CRC16) {
 			fcs = crc16(0, (u8 *)tx_skb->data, tx_skb->len - 2);
 			put_unaligned_le16(fcs, tx_skb->data + tx_skb->len - 2);
 		}
@@ -1344,7 +1344,7 @@ static int l2cap_retransmit_frame(struct sock *sk, u8 tx_seq)
 				| (tx_seq << L2CAP_CTRL_TXSEQ_SHIFT);
 		put_unaligned_le16(control, tx_skb->data + L2CAP_HDR_SIZE);
 
-		if (l2cap_pi(sk)->fcs == L2CAP_FCS_CRC16) {
+		if (pi->fcs == L2CAP_FCS_CRC16) {
 			fcs = crc16(0, (u8 *)tx_skb->data, tx_skb->len - 2);
 			put_unaligned_le16(fcs, tx_skb->data + tx_skb->len - 2);
 		}
@@ -1388,7 +1388,7 @@ static int l2cap_ertm_send(struct sock *sk)
 		put_unaligned_le16(control, tx_skb->data + L2CAP_HDR_SIZE);
 
 
-		if (l2cap_pi(sk)->fcs == L2CAP_FCS_CRC16) {
+		if (pi->fcs == L2CAP_FCS_CRC16) {
 			fcs = crc16(0, (u8 *)skb->data, tx_skb->len - 2);
 			put_unaligned_le16(fcs, skb->data + tx_skb->len - 2);
 		}
@@ -3518,10 +3518,10 @@ static inline int l2cap_data_channel_sframe(struct sock *sk, u16 rx_control, str
 		pi->expected_ack_seq = tx_seq;
 		l2cap_drop_acked_frames(sk);
 
-		del_timer(&l2cap_pi(sk)->retrans_timer);
+		del_timer(&pi->retrans_timer);
 		if (rx_control & L2CAP_CTRL_POLL) {
 			u16 control = L2CAP_CTRL_FINAL;
-			l2cap_send_rr_or_rnr(l2cap_pi(sk), control);
+			l2cap_send_rr_or_rnr(pi, control);
 		}
 		break;
 	}
@@ -3622,7 +3622,7 @@ static inline int l2cap_data_channel(struct l2cap_conn *conn, u16 cid, struct sk
 		goto done;
 
 	default:
-		BT_DBG("sk %p: bad mode 0x%2.2x", sk, l2cap_pi(sk)->mode);
+		BT_DBG("sk %p: bad mode 0x%2.2x", sk, pi->mode);
 		break;
 	}
 
-- 
cgit v1.2.2


From d5392c8f1e9faef089bb7cb66c3314da8bddd1fe Mon Sep 17 00:00:00 2001
From: "Gustavo F. Padovan" <padovan@profusion.mobi>
Date: Sat, 1 May 2010 16:15:36 -0300
Subject: Bluetooth: Implement 'Send IorRRorRNR' event
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

After receive a RR with P bit set ERTM shall use this funcion to choose
what type of frame to reply with F bit = 1.

Signed-off-by: Gustavo F. Padovan <padovan@profusion.mobi>
Reviewed-by: João Paulo Rechi Vita <jprvita@profusion.mobi>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 net/bluetooth/l2cap.c | 43 +++++++++++++++++++++++++++++++++++++++----
 1 file changed, 39 insertions(+), 4 deletions(-)

(limited to 'net')

diff --git a/net/bluetooth/l2cap.c b/net/bluetooth/l2cap.c
index a9c152a09f0b..06687e264703 100644
--- a/net/bluetooth/l2cap.c
+++ b/net/bluetooth/l2cap.c
@@ -1383,6 +1383,10 @@ static int l2cap_ertm_send(struct sock *sk)
 		bt_cb(skb)->retries++;
 
 		control = get_unaligned_le16(tx_skb->data + L2CAP_HDR_SIZE);
+		if (pi->conn_state & L2CAP_CONN_SEND_FBIT) {
+			control |= L2CAP_CTRL_FINAL;
+			pi->conn_state &= ~L2CAP_CONN_SEND_FBIT;
+		}
 		control |= (pi->buffer_seq << L2CAP_CTRL_REQSEQ_SHIFT)
 				| (pi->next_tx_seq << L2CAP_CTRL_TXSEQ_SHIFT);
 		put_unaligned_le16(control, tx_skb->data + L2CAP_HDR_SIZE);
@@ -1404,6 +1408,7 @@ static int l2cap_ertm_send(struct sock *sk)
 		pi->next_tx_seq = (pi->next_tx_seq + 1) % 64;
 
 		pi->unacked_frames++;
+		pi->frames_sent++;
 
 		if (skb_queue_is_last(TX_QUEUE(sk), skb))
 			sk->sk_send_head = NULL;
@@ -2191,6 +2196,7 @@ static inline void l2cap_ertm_init(struct sock *sk)
 	l2cap_pi(sk)->unacked_frames = 0;
 	l2cap_pi(sk)->buffer_seq = 0;
 	l2cap_pi(sk)->num_to_ack = 0;
+	l2cap_pi(sk)->frames_sent = 0;
 
 	setup_timer(&l2cap_pi(sk)->retrans_timer,
 			l2cap_retrans_timeout, (unsigned long) sk);
@@ -3148,6 +3154,38 @@ static int l2cap_check_fcs(struct l2cap_pinfo *pi,  struct sk_buff *skb)
 	return 0;
 }
 
+static inline void l2cap_send_i_or_rr_or_rnr(struct sock *sk)
+{
+	struct l2cap_pinfo *pi = l2cap_pi(sk);
+	u16 control = 0;
+
+	pi->frames_sent = 0;
+	pi->conn_state |= L2CAP_CONN_SEND_FBIT;
+
+	control |= pi->buffer_seq << L2CAP_CTRL_REQSEQ_SHIFT;
+
+	if (pi->conn_state & L2CAP_CONN_LOCAL_BUSY) {
+		control |= L2CAP_SUPER_RCV_NOT_READY | L2CAP_CTRL_FINAL;
+		l2cap_send_sframe(pi, control);
+		pi->conn_state &= ~L2CAP_CONN_SEND_FBIT;
+	}
+
+	if (pi->conn_state & L2CAP_CONN_REMOTE_BUSY && pi->unacked_frames > 0)
+		__mod_retrans_timer();
+
+	l2cap_ertm_send(sk);
+
+	if (!(pi->conn_state & L2CAP_CONN_LOCAL_BUSY) &&
+			pi->frames_sent == 0) {
+		control |= L2CAP_SUPER_RCV_READY;
+		if (pi->conn_state & L2CAP_CONN_SEND_FBIT) {
+			control |= L2CAP_CTRL_FINAL;
+			pi->conn_state &= ~L2CAP_CONN_SEND_FBIT;
+		}
+		l2cap_send_sframe(pi, control);
+	}
+}
+
 static void l2cap_add_to_srej_queue(struct sock *sk, struct sk_buff *skb, u8 tx_seq, u8 sar)
 {
 	struct sk_buff *next_skb;
@@ -3418,10 +3456,7 @@ static inline int l2cap_data_channel_sframe(struct sock *sk, u16 rx_control, str
 	switch (rx_control & L2CAP_CTRL_SUPERVISE) {
 	case L2CAP_SUPER_RCV_READY:
 		if (rx_control & L2CAP_CTRL_POLL) {
-			u16 control = L2CAP_CTRL_FINAL;
-			control |= L2CAP_SUPER_RCV_READY |
-				(pi->buffer_seq << L2CAP_CTRL_REQSEQ_SHIFT);
-			l2cap_send_sframe(l2cap_pi(sk), control);
+			l2cap_send_i_or_rr_or_rnr(sk);
 			pi->conn_state &= ~L2CAP_CONN_REMOTE_BUSY;
 
 		} else if (rx_control & L2CAP_CTRL_FINAL) {
-- 
cgit v1.2.2


From 1d8f5d16913d74e428950ee02fe9ff7e6391c120 Mon Sep 17 00:00:00 2001
From: "Gustavo F. Padovan" <padovan@profusion.mobi>
Date: Sat, 1 May 2010 16:15:37 -0300
Subject: Bluetooth: Support case with F bit set under WAIT_F state.
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

On receipt of a F=1 under WAIT_F state ERTM shall stop monitor timer and
start retransmission timer (if there are unacked frames).

Signed-off-by: Gustavo F. Padovan <padovan@profusion.mobi>
Reviewed-by: João Paulo Rechi Vita <jprvita@profusion.mobi>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 net/bluetooth/l2cap.c | 22 ++++++++++++++--------
 1 file changed, 14 insertions(+), 8 deletions(-)

(limited to 'net')

diff --git a/net/bluetooth/l2cap.c b/net/bluetooth/l2cap.c
index 06687e264703..36cd4e4e6ad1 100644
--- a/net/bluetooth/l2cap.c
+++ b/net/bluetooth/l2cap.c
@@ -3364,6 +3364,13 @@ static inline int l2cap_data_channel_iframe(struct sock *sk, u16 rx_control, str
 
 	BT_DBG("sk %p rx_control 0x%4.4x len %d", sk, rx_control, skb->len);
 
+	if (L2CAP_CTRL_FINAL & rx_control) {
+		del_timer(&pi->monitor_timer);
+		if (pi->unacked_frames > 0)
+			__mod_retrans_timer();
+		pi->conn_state &= ~L2CAP_CONN_WAIT_F;
+	}
+
 	pi->expected_ack_seq = req_seq;
 	l2cap_drop_acked_frames(sk);
 
@@ -3453,6 +3460,13 @@ static inline int l2cap_data_channel_sframe(struct sock *sk, u16 rx_control, str
 
 	BT_DBG("sk %p rx_control 0x%4.4x len %d", sk, rx_control, skb->len);
 
+	if (L2CAP_CTRL_FINAL & rx_control) {
+		del_timer(&pi->monitor_timer);
+		if (pi->unacked_frames > 0)
+			__mod_retrans_timer();
+		pi->conn_state &= ~L2CAP_CONN_WAIT_F;
+	}
+
 	switch (rx_control & L2CAP_CTRL_SUPERVISE) {
 	case L2CAP_SUPER_RCV_READY:
 		if (rx_control & L2CAP_CTRL_POLL) {
@@ -3472,14 +3486,6 @@ static inline int l2cap_data_channel_sframe(struct sock *sk, u16 rx_control, str
 				l2cap_ertm_send(sk);
 			}
 
-			if (!(pi->conn_state & L2CAP_CONN_WAIT_F))
-				break;
-
-			pi->conn_state &= ~L2CAP_CONN_WAIT_F;
-			del_timer(&pi->monitor_timer);
-
-			if (pi->unacked_frames > 0)
-				__mod_retrans_timer();
 		} else {
 			pi->expected_ack_seq = tx_seq;
 			l2cap_drop_acked_frames(sk);
-- 
cgit v1.2.2


From 277ffbe362823d18a17792fbd8e507010e666299 Mon Sep 17 00:00:00 2001
From: "Gustavo F. Padovan" <padovan@profusion.mobi>
Date: Sat, 1 May 2010 16:15:37 -0300
Subject: Bluetooth: Check the minimum {I,S}-frame size into L2CAP
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

All packets with size fewer than the minimum specified is dropped.
Note that the size of the l2cap basic header, FCS and SAR fields are
already subtracted of len at the moment of the size check.

Signed-off-by: Gustavo F. Padovan <padovan@profusion.mobi>
Reviewed-by: João Paulo Rechi Vita <jprvita@profusion.mobi>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 net/bluetooth/l2cap.c | 14 +++++++++++---
 1 file changed, 11 insertions(+), 3 deletions(-)

(limited to 'net')

diff --git a/net/bluetooth/l2cap.c b/net/bluetooth/l2cap.c
index 36cd4e4e6ad1..ac00f5fac2d2 100644
--- a/net/bluetooth/l2cap.c
+++ b/net/bluetooth/l2cap.c
@@ -3627,10 +3627,17 @@ static inline int l2cap_data_channel(struct l2cap_conn *conn, u16 cid, struct sk
 		if (l2cap_check_fcs(pi, skb))
 			goto drop;
 
-		if (__is_iframe(control))
+		if (__is_iframe(control)) {
+			if (len < 4)
+				goto drop;
+
 			l2cap_data_channel_iframe(sk, control, skb);
-		else
+		} else {
+			if (len != 0)
+				goto drop;
+
 			l2cap_data_channel_sframe(sk, control, skb);
+		}
 
 		goto done;
 
@@ -3645,7 +3652,8 @@ static inline int l2cap_data_channel(struct l2cap_conn *conn, u16 cid, struct sk
 		if (pi->fcs == L2CAP_FCS_CRC16)
 			len -= 2;
 
-		if (len > L2CAP_DEFAULT_MAX_PDU_SIZE || __is_sframe(control))
+		if (len > L2CAP_DEFAULT_MAX_PDU_SIZE || len < 4
+				|| __is_sframe(control))
 			goto drop;
 
 		if (l2cap_check_fcs(pi, skb))
-- 
cgit v1.2.2


From 36f2fd585f43199f006a3b5ff84e95815102cd31 Mon Sep 17 00:00:00 2001
From: "Gustavo F. Padovan" <padovan@profusion.mobi>
Date: Sat, 1 May 2010 16:15:37 -0300
Subject: Bluetooth: Check if SDU size is greater than MTU on L2CAP
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

After reassembly the SDU we need to check his size. It can't overflow
the MTU size.

Signed-off-by: Gustavo F. Padovan <padovan@profusion.mobi>
Reviewed-by: João Paulo Rechi Vita <jprvita@profusion.mobi>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 net/bluetooth/l2cap.c | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/bluetooth/l2cap.c b/net/bluetooth/l2cap.c
index ac00f5fac2d2..2e354d29f102 100644
--- a/net/bluetooth/l2cap.c
+++ b/net/bluetooth/l2cap.c
@@ -3277,15 +3277,19 @@ static int l2cap_sar_reassembly_sdu(struct sock *sk, struct sk_buff *skb, u16 co
 		pi->conn_state &= ~L2CAP_CONN_SAR_SDU;
 		pi->partial_sdu_len += skb->len;
 
+		if (pi->partial_sdu_len > pi->imtu)
+			goto drop;
+
 		if (pi->partial_sdu_len == pi->sdu_len) {
 			_skb = skb_clone(pi->sdu, GFP_ATOMIC);
 			err = sock_queue_rcv_skb(sk, _skb);
 			if (err < 0)
 				kfree_skb(_skb);
 		}
-		kfree_skb(pi->sdu);
 		err = 0;
 
+drop:
+		kfree_skb(pi->sdu);
 		break;
 	}
 
-- 
cgit v1.2.2


From 9e917af13d59182f95bbb5483dc0c4254dfb7944 Mon Sep 17 00:00:00 2001
From: "Gustavo F. Padovan" <padovan@profusion.mobi>
Date: Sat, 1 May 2010 16:15:37 -0300
Subject: Bluetooth: Implement SendAck() Action on ERTM.
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Shall be used to ack received frames, It must decide type of
acknowledgment between a RR frame, a RNR frame or transmission of
pending I-frames.
It also modifies l2cap_ertm_send() to report the number of frames sent.

Signed-off-by: Gustavo F. Padovan <padovan@profusion.mobi>
Reviewed-by: João Paulo Rechi Vita <jprvita@profusion.mobi>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 net/bluetooth/l2cap.c | 41 +++++++++++++++++++++++++++++------------
 1 file changed, 29 insertions(+), 12 deletions(-)

(limited to 'net')

diff --git a/net/bluetooth/l2cap.c b/net/bluetooth/l2cap.c
index 2e354d29f102..0a739ef167c2 100644
--- a/net/bluetooth/l2cap.c
+++ b/net/bluetooth/l2cap.c
@@ -352,6 +352,11 @@ static inline int l2cap_send_sframe(struct l2cap_pinfo *pi, u16 control)
 	count = min_t(unsigned int, conn->mtu, hlen);
 	control |= L2CAP_CTRL_FRAME_TYPE;
 
+	if (pi->conn_state & L2CAP_CONN_SEND_FBIT) {
+		control |= L2CAP_CTRL_FINAL;
+		pi->conn_state &= ~L2CAP_CONN_SEND_FBIT;
+	}
+
 	skb = bt_skb_alloc(count, GFP_ATOMIC);
 	if (!skb)
 		return -ENOMEM;
@@ -1364,7 +1369,7 @@ static int l2cap_ertm_send(struct sock *sk)
 	struct sk_buff *skb, *tx_skb;
 	struct l2cap_pinfo *pi = l2cap_pi(sk);
 	u16 control, fcs;
-	int err;
+	int err, nsent = 0;
 
 	if (pi->conn_state & L2CAP_CONN_WAIT_F)
 		return 0;
@@ -1414,8 +1419,27 @@ static int l2cap_ertm_send(struct sock *sk)
 			sk->sk_send_head = NULL;
 		else
 			sk->sk_send_head = skb_queue_next(TX_QUEUE(sk), skb);
+
+		nsent++;
 	}
 
+	return nsent;
+}
+
+static int l2cap_send_ack(struct l2cap_pinfo *pi)
+{
+	struct sock *sk = (struct sock *)pi;
+	u16 control = 0;
+
+	control |= pi->buffer_seq << L2CAP_CTRL_REQSEQ_SHIFT;
+
+	if (pi->conn_state & L2CAP_CONN_LOCAL_BUSY) {
+		control |= L2CAP_SUPER_RCV_NOT_READY;
+		return l2cap_send_sframe(pi, control);
+	} else if (l2cap_ertm_send(sk) == 0) {
+		control |= L2CAP_SUPER_RCV_READY;
+		return l2cap_send_sframe(pi, control);
+	}
 	return 0;
 }
 
@@ -1678,7 +1702,7 @@ static int l2cap_sock_sendmsg(struct kiocb *iocb, struct socket *sock, struct ms
 		else
 			err = l2cap_ertm_send(sk);
 
-		if (!err)
+		if (err >= 0)
 			err = len;
 		break;
 
@@ -3178,10 +3202,6 @@ static inline void l2cap_send_i_or_rr_or_rnr(struct sock *sk)
 	if (!(pi->conn_state & L2CAP_CONN_LOCAL_BUSY) &&
 			pi->frames_sent == 0) {
 		control |= L2CAP_SUPER_RCV_READY;
-		if (pi->conn_state & L2CAP_CONN_SEND_FBIT) {
-			control |= L2CAP_CTRL_FINAL;
-			pi->conn_state &= ~L2CAP_CONN_SEND_FBIT;
-		}
 		l2cap_send_sframe(pi, control);
 	}
 }
@@ -3362,7 +3382,6 @@ static inline int l2cap_data_channel_iframe(struct sock *sk, u16 rx_control, str
 	struct l2cap_pinfo *pi = l2cap_pi(sk);
 	u8 tx_seq = __get_txseq(rx_control);
 	u8 req_seq = __get_reqseq(rx_control);
-	u16 tx_control = 0;
 	u8 sar = rx_control >> L2CAP_CTRL_SAR_SHIFT;
 	int err = 0;
 
@@ -3449,11 +3468,9 @@ expected:
 		return err;
 
 	pi->num_to_ack = (pi->num_to_ack + 1) % L2CAP_DEFAULT_NUM_TO_ACK;
-	if (pi->num_to_ack == L2CAP_DEFAULT_NUM_TO_ACK - 1) {
-		tx_control |= L2CAP_SUPER_RCV_READY;
-		tx_control |= pi->buffer_seq << L2CAP_CTRL_REQSEQ_SHIFT;
-		l2cap_send_sframe(pi, tx_control);
-	}
+	if (pi->num_to_ack == L2CAP_DEFAULT_NUM_TO_ACK - 1)
+		l2cap_send_ack(pi);
+
 	return 0;
 }
 
-- 
cgit v1.2.2


From f0946ccfc7da403a46b7ff7cb2e3deffac108742 Mon Sep 17 00:00:00 2001
From: "Gustavo F. Padovan" <padovan@profusion.mobi>
Date: Sat, 1 May 2010 16:15:37 -0300
Subject: Bluetooth: Move set of P-bit to l2cap_send_sframe()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Abstract the send of of P-bit and avoids code duplication like we
did with the setting of F-bit.

Signed-off-by: Gustavo F. Padovan <padovan@profusion.mobi>
Reviewed-by: João Paulo Rechi Vita <jprvita@profusion.mobi>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 net/bluetooth/l2cap.c | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

(limited to 'net')

diff --git a/net/bluetooth/l2cap.c b/net/bluetooth/l2cap.c
index 0a739ef167c2..852c1400d069 100644
--- a/net/bluetooth/l2cap.c
+++ b/net/bluetooth/l2cap.c
@@ -357,6 +357,11 @@ static inline int l2cap_send_sframe(struct l2cap_pinfo *pi, u16 control)
 		pi->conn_state &= ~L2CAP_CONN_SEND_FBIT;
 	}
 
+	if (pi->conn_state & L2CAP_CONN_SEND_PBIT) {
+		control |= L2CAP_CTRL_POLL;
+		pi->conn_state &= ~L2CAP_CONN_SEND_PBIT;
+	}
+
 	skb = bt_skb_alloc(count, GFP_ATOMIC);
 	if (!skb)
 		return -ENOMEM;
@@ -3364,10 +3369,6 @@ static void l2cap_send_srejframe(struct sock *sk, u8 tx_seq)
 	while (tx_seq != pi->expected_tx_seq) {
 		control = L2CAP_SUPER_SELECT_REJECT;
 		control |= pi->expected_tx_seq << L2CAP_CTRL_REQSEQ_SHIFT;
-		if (pi->conn_state & L2CAP_CONN_SEND_PBIT) {
-			control |= L2CAP_CTRL_POLL;
-			pi->conn_state &= ~L2CAP_CONN_SEND_PBIT;
-		}
 		l2cap_send_sframe(pi, control);
 
 		new = kzalloc(sizeof(struct srej_list), GFP_ATOMIC);
-- 
cgit v1.2.2


From 73edaa9933219e25d6733b78d1e2c881025705e2 Mon Sep 17 00:00:00 2001
From: "Gustavo F. Padovan" <padovan@profusion.mobi>
Date: Sat, 1 May 2010 16:15:38 -0300
Subject: Bluetooth: Add Recv RR (P=0)(F=0) for SREJ_SENT state on ERTM
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This finishes the implementation of Recv RR (P=0)(F=0) for the Enhanced
Retransmission Mode on L2CAP.

Signed-off-by: Gustavo F. Padovan <padovan@profusion.mobi>
Reviewed-by: João Paulo Rechi Vita <jprvita@profusion.mobi>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 net/bluetooth/l2cap.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/bluetooth/l2cap.c b/net/bluetooth/l2cap.c
index 852c1400d069..e5cd64ac6fb2 100644
--- a/net/bluetooth/l2cap.c
+++ b/net/bluetooth/l2cap.c
@@ -3517,7 +3517,10 @@ static inline int l2cap_data_channel_sframe(struct sock *sk, u16 rx_control, str
 				__mod_retrans_timer();
 
 			pi->conn_state &= ~L2CAP_CONN_REMOTE_BUSY;
-			l2cap_ertm_send(sk);
+			if (pi->conn_state & L2CAP_CONN_SREJ_SENT)
+				l2cap_send_ack(pi);
+			else
+				l2cap_ertm_send(sk);
 		}
 		break;
 
-- 
cgit v1.2.2


From e072745f4adb01b909bd08a0cfc8f79348f4d2c6 Mon Sep 17 00:00:00 2001
From: "Gustavo F. Padovan" <padovan@profusion.mobi>
Date: Sat, 1 May 2010 16:15:38 -0300
Subject: Bluetooth: Split l2cap_data_channel_sframe()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Create a function for each type fo S-frame and avoid a lot of nested
code.

Signed-off-by: Gustavo F. Padovan <padovan@profusion.mobi>
Reviewed-by: João Paulo Rechi Vita <jprvita@profusion.mobi>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 net/bluetooth/l2cap.c | 204 ++++++++++++++++++++++++++++----------------------
 1 file changed, 115 insertions(+), 89 deletions(-)

(limited to 'net')

diff --git a/net/bluetooth/l2cap.c b/net/bluetooth/l2cap.c
index e5cd64ac6fb2..068edf7704bf 100644
--- a/net/bluetooth/l2cap.c
+++ b/net/bluetooth/l2cap.c
@@ -3475,120 +3475,146 @@ expected:
 	return 0;
 }
 
-static inline int l2cap_data_channel_sframe(struct sock *sk, u16 rx_control, struct sk_buff *skb)
+static inline void l2cap_data_channel_rrframe(struct sock *sk, u16 rx_control)
 {
 	struct l2cap_pinfo *pi = l2cap_pi(sk);
 	u8 tx_seq = __get_reqseq(rx_control);
 
-	BT_DBG("sk %p rx_control 0x%4.4x len %d", sk, rx_control, skb->len);
-
-	if (L2CAP_CTRL_FINAL & rx_control) {
-		del_timer(&pi->monitor_timer);
-		if (pi->unacked_frames > 0)
-			__mod_retrans_timer();
-		pi->conn_state &= ~L2CAP_CONN_WAIT_F;
-	}
+	if (rx_control & L2CAP_CTRL_POLL) {
+		l2cap_send_i_or_rr_or_rnr(sk);
+		pi->conn_state &= ~L2CAP_CONN_REMOTE_BUSY;
 
-	switch (rx_control & L2CAP_CTRL_SUPERVISE) {
-	case L2CAP_SUPER_RCV_READY:
-		if (rx_control & L2CAP_CTRL_POLL) {
-			l2cap_send_i_or_rr_or_rnr(sk);
-			pi->conn_state &= ~L2CAP_CONN_REMOTE_BUSY;
-
-		} else if (rx_control & L2CAP_CTRL_FINAL) {
-			pi->conn_state &= ~L2CAP_CONN_REMOTE_BUSY;
-			pi->expected_ack_seq = tx_seq;
-			l2cap_drop_acked_frames(sk);
-
-			if (pi->conn_state & L2CAP_CONN_REJ_ACT)
-				pi->conn_state &= ~L2CAP_CONN_REJ_ACT;
-			else {
-				sk->sk_send_head = TX_QUEUE(sk)->next;
-				pi->next_tx_seq = pi->expected_ack_seq;
-				l2cap_ertm_send(sk);
-			}
+	} else if (rx_control & L2CAP_CTRL_FINAL) {
+		pi->conn_state &= ~L2CAP_CONN_REMOTE_BUSY;
+		pi->expected_ack_seq = tx_seq;
+		l2cap_drop_acked_frames(sk);
 
-		} else {
-			pi->expected_ack_seq = tx_seq;
-			l2cap_drop_acked_frames(sk);
+		if (pi->conn_state & L2CAP_CONN_REJ_ACT)
+			pi->conn_state &= ~L2CAP_CONN_REJ_ACT;
+		else {
+			sk->sk_send_head = TX_QUEUE(sk)->next;
+			pi->next_tx_seq = pi->expected_ack_seq;
+			l2cap_ertm_send(sk);
+		}
 
-			if ((pi->conn_state & L2CAP_CONN_REMOTE_BUSY) &&
-			    (pi->unacked_frames > 0))
-				__mod_retrans_timer();
+	} else {
+		pi->expected_ack_seq = tx_seq;
+		l2cap_drop_acked_frames(sk);
 
-			pi->conn_state &= ~L2CAP_CONN_REMOTE_BUSY;
-			if (pi->conn_state & L2CAP_CONN_SREJ_SENT)
-				l2cap_send_ack(pi);
-			else
-				l2cap_ertm_send(sk);
-		}
-		break;
+		if ((pi->conn_state & L2CAP_CONN_REMOTE_BUSY) &&
+				(pi->unacked_frames > 0))
+			__mod_retrans_timer();
 
-	case L2CAP_SUPER_REJECT:
 		pi->conn_state &= ~L2CAP_CONN_REMOTE_BUSY;
+		if (pi->conn_state & L2CAP_CONN_SREJ_SENT)
+			l2cap_send_ack(pi);
+		else
+			l2cap_ertm_send(sk);
+	}
+}
 
-		pi->expected_ack_seq = __get_reqseq(rx_control);
-		l2cap_drop_acked_frames(sk);
+static inline void l2cap_data_channel_rejframe(struct sock *sk, u16 rx_control)
+{
+	struct l2cap_pinfo *pi = l2cap_pi(sk);
+	u8 tx_seq = __get_reqseq(rx_control);
 
-		if (rx_control & L2CAP_CTRL_FINAL) {
-			if (pi->conn_state & L2CAP_CONN_REJ_ACT)
-				pi->conn_state &= ~L2CAP_CONN_REJ_ACT;
-			else {
-				sk->sk_send_head = TX_QUEUE(sk)->next;
-				pi->next_tx_seq = pi->expected_ack_seq;
-				l2cap_ertm_send(sk);
-			}
-		} else {
+	pi->conn_state &= ~L2CAP_CONN_REMOTE_BUSY;
+
+	pi->expected_ack_seq = __get_reqseq(rx_control);
+	l2cap_drop_acked_frames(sk);
+
+	if (rx_control & L2CAP_CTRL_FINAL) {
+		if (pi->conn_state & L2CAP_CONN_REJ_ACT)
+			pi->conn_state &= ~L2CAP_CONN_REJ_ACT;
+		else {
 			sk->sk_send_head = TX_QUEUE(sk)->next;
 			pi->next_tx_seq = pi->expected_ack_seq;
 			l2cap_ertm_send(sk);
-
-			if (pi->conn_state & L2CAP_CONN_WAIT_F) {
-				pi->srej_save_reqseq = tx_seq;
-				pi->conn_state |= L2CAP_CONN_REJ_ACT;
-			}
 		}
+	} else {
+		sk->sk_send_head = TX_QUEUE(sk)->next;
+		pi->next_tx_seq = pi->expected_ack_seq;
+		l2cap_ertm_send(sk);
 
-		break;
+		if (pi->conn_state & L2CAP_CONN_WAIT_F) {
+			pi->srej_save_reqseq = tx_seq;
+			pi->conn_state |= L2CAP_CONN_REJ_ACT;
+		}
+	}
+}
+static inline void l2cap_data_channel_srejframe(struct sock *sk, u16 rx_control)
+{
+	struct l2cap_pinfo *pi = l2cap_pi(sk);
+	u8 tx_seq = __get_reqseq(rx_control);
 
-	case L2CAP_SUPER_SELECT_REJECT:
-		pi->conn_state &= ~L2CAP_CONN_REMOTE_BUSY;
+	pi->conn_state &= ~L2CAP_CONN_REMOTE_BUSY;
 
-		if (rx_control & L2CAP_CTRL_POLL) {
-			pi->expected_ack_seq = tx_seq;
-			l2cap_drop_acked_frames(sk);
-			l2cap_retransmit_frame(sk, tx_seq);
-			l2cap_ertm_send(sk);
-			if (pi->conn_state & L2CAP_CONN_WAIT_F) {
-				pi->srej_save_reqseq = tx_seq;
-				pi->conn_state |= L2CAP_CONN_SREJ_ACT;
-			}
-		} else if (rx_control & L2CAP_CTRL_FINAL) {
-			if ((pi->conn_state & L2CAP_CONN_SREJ_ACT) &&
-					pi->srej_save_reqseq == tx_seq)
-				pi->conn_state &= ~L2CAP_CONN_SREJ_ACT;
-			else
-				l2cap_retransmit_frame(sk, tx_seq);
+	if (rx_control & L2CAP_CTRL_POLL) {
+		pi->expected_ack_seq = tx_seq;
+		l2cap_drop_acked_frames(sk);
+		l2cap_retransmit_frame(sk, tx_seq);
+		l2cap_ertm_send(sk);
+		if (pi->conn_state & L2CAP_CONN_WAIT_F) {
+			pi->srej_save_reqseq = tx_seq;
+			pi->conn_state |= L2CAP_CONN_SREJ_ACT;
 		}
-		else {
+	} else if (rx_control & L2CAP_CTRL_FINAL) {
+		if ((pi->conn_state & L2CAP_CONN_SREJ_ACT) &&
+				pi->srej_save_reqseq == tx_seq)
+			pi->conn_state &= ~L2CAP_CONN_SREJ_ACT;
+		else
 			l2cap_retransmit_frame(sk, tx_seq);
-			if (pi->conn_state & L2CAP_CONN_WAIT_F) {
-				pi->srej_save_reqseq = tx_seq;
-				pi->conn_state |= L2CAP_CONN_SREJ_ACT;
-			}
+	} else {
+		l2cap_retransmit_frame(sk, tx_seq);
+		if (pi->conn_state & L2CAP_CONN_WAIT_F) {
+			pi->srej_save_reqseq = tx_seq;
+			pi->conn_state |= L2CAP_CONN_SREJ_ACT;
 		}
+	}
+}
+
+static inline void l2cap_data_channel_rnrframe(struct sock *sk, u16 rx_control)
+{
+	struct l2cap_pinfo *pi = l2cap_pi(sk);
+	u8 tx_seq = __get_reqseq(rx_control);
+
+	pi->conn_state |= L2CAP_CONN_REMOTE_BUSY;
+	pi->expected_ack_seq = tx_seq;
+	l2cap_drop_acked_frames(sk);
+
+	del_timer(&pi->retrans_timer);
+	if (rx_control & L2CAP_CTRL_POLL) {
+		u16 control = L2CAP_CTRL_FINAL;
+		l2cap_send_rr_or_rnr(pi, control);
+	}
+}
+
+static inline int l2cap_data_channel_sframe(struct sock *sk, u16 rx_control, struct sk_buff *skb)
+{
+	BT_DBG("sk %p rx_control 0x%4.4x len %d", sk, rx_control, skb->len);
+
+	if (L2CAP_CTRL_FINAL & rx_control) {
+		del_timer(&l2cap_pi(sk)->monitor_timer);
+		if (l2cap_pi(sk)->unacked_frames > 0)
+			__mod_retrans_timer();
+		l2cap_pi(sk)->conn_state &= ~L2CAP_CONN_WAIT_F;
+	}
+
+	switch (rx_control & L2CAP_CTRL_SUPERVISE) {
+	case L2CAP_SUPER_RCV_READY:
+		l2cap_data_channel_rrframe(sk, rx_control);
 		break;
 
-	case L2CAP_SUPER_RCV_NOT_READY:
-		pi->conn_state |= L2CAP_CONN_REMOTE_BUSY;
-		pi->expected_ack_seq = tx_seq;
-		l2cap_drop_acked_frames(sk);
+	case L2CAP_SUPER_REJECT:
+		l2cap_data_channel_rejframe(sk, rx_control);
+		break;
 
-		del_timer(&pi->retrans_timer);
-		if (rx_control & L2CAP_CTRL_POLL) {
-			u16 control = L2CAP_CTRL_FINAL;
-			l2cap_send_rr_or_rnr(pi, control);
-		}
+	case L2CAP_SUPER_SELECT_REJECT:
+		l2cap_data_channel_srejframe(sk, rx_control);
+		break;
+
+	case L2CAP_SUPER_RCV_NOT_READY:
+		l2cap_data_channel_rnrframe(sk, rx_control);
 		break;
 	}
 
-- 
cgit v1.2.2


From 99b0d4b7b09edeacf4542bced5c01239375b51a9 Mon Sep 17 00:00:00 2001
From: "Gustavo F. Padovan" <padovan@profusion.mobi>
Date: Sat, 1 May 2010 16:15:38 -0300
Subject: Bluetooth: Handle all cases of receipt of RNR-frames into L2CAP
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

We weren't handling the receipt under SREJ_SENT state table.
It also introduce l2cap_send_srejtail(). It will be used in the nexts
commits too.

Signed-off-by: Gustavo F. Padovan <padovan@profusion.mobi>
Reviewed-by: João Paulo Rechi Vita <jprvita@profusion.mobi>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 net/bluetooth/l2cap.c | 32 ++++++++++++++++++++++++++++----
 1 file changed, 28 insertions(+), 4 deletions(-)

(limited to 'net')

diff --git a/net/bluetooth/l2cap.c b/net/bluetooth/l2cap.c
index 068edf7704bf..8937a842347a 100644
--- a/net/bluetooth/l2cap.c
+++ b/net/bluetooth/l2cap.c
@@ -1448,6 +1448,22 @@ static int l2cap_send_ack(struct l2cap_pinfo *pi)
 	return 0;
 }
 
+static int l2cap_send_srejtail(struct sock *sk)
+{
+	struct srej_list *tail;
+	u16 control;
+
+	control = L2CAP_SUPER_SELECT_REJECT;
+	control |= L2CAP_CTRL_FINAL;
+
+	tail = list_entry(SREJ_LIST(sk)->prev, struct srej_list, list);
+	control |= tail->tx_seq << L2CAP_CTRL_REQSEQ_SHIFT;
+
+	l2cap_send_sframe(l2cap_pi(sk), control);
+
+	return 0;
+}
+
 static inline int l2cap_skbuff_fromiovec(struct sock *sk, struct msghdr *msg, int len, int count, struct sk_buff *skb)
 {
 	struct l2cap_conn *conn = l2cap_pi(sk)->conn;
@@ -3582,11 +3598,19 @@ static inline void l2cap_data_channel_rnrframe(struct sock *sk, u16 rx_control)
 	pi->expected_ack_seq = tx_seq;
 	l2cap_drop_acked_frames(sk);
 
-	del_timer(&pi->retrans_timer);
-	if (rx_control & L2CAP_CTRL_POLL) {
-		u16 control = L2CAP_CTRL_FINAL;
-		l2cap_send_rr_or_rnr(pi, control);
+	if (!(pi->conn_state & L2CAP_CONN_SREJ_SENT)) {
+		del_timer(&pi->retrans_timer);
+		if (rx_control & L2CAP_CTRL_POLL) {
+			u16 control = L2CAP_CTRL_FINAL;
+			l2cap_send_rr_or_rnr(pi, control);
+		}
+		return;
 	}
+
+	if (rx_control & L2CAP_CTRL_POLL)
+		l2cap_send_srejtail(sk);
+	else
+		l2cap_send_sframe(pi, L2CAP_SUPER_RCV_READY);
 }
 
 static inline int l2cap_data_channel_sframe(struct sock *sk, u16 rx_control, struct sk_buff *skb)
-- 
cgit v1.2.2


From 6e3a59819fac19006fe4255b87928e5a12c54532 Mon Sep 17 00:00:00 2001
From: "Gustavo F. Padovan" <padovan@profusion.mobi>
Date: Sat, 1 May 2010 16:15:38 -0300
Subject: Bluetooth: Group the ack of I-frames into
 l2cap_data_channel_rrframe()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

It also fix a bug: we weren't acknowledging I-frames when P=1.
Note that when F=1 we are acknowledging packets before setting
RemoteBusy to False. The spec says we should do that in the opposite
order, but acknowledment of packets doesn't care about RemoteBusy flag
so we can do that in the order we want.

Signed-off-by: Gustavo F. Padovan <padovan@profusion.mobi>
Reviewed-by: João Paulo Rechi Vita <jprvita@profusion.mobi>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 net/bluetooth/l2cap.c | 9 +++------
 1 file changed, 3 insertions(+), 6 deletions(-)

(limited to 'net')

diff --git a/net/bluetooth/l2cap.c b/net/bluetooth/l2cap.c
index 8937a842347a..d096c7c11ab5 100644
--- a/net/bluetooth/l2cap.c
+++ b/net/bluetooth/l2cap.c
@@ -3494,7 +3494,9 @@ expected:
 static inline void l2cap_data_channel_rrframe(struct sock *sk, u16 rx_control)
 {
 	struct l2cap_pinfo *pi = l2cap_pi(sk);
-	u8 tx_seq = __get_reqseq(rx_control);
+
+	pi->expected_ack_seq = __get_reqseq(rx_control);
+	l2cap_drop_acked_frames(sk);
 
 	if (rx_control & L2CAP_CTRL_POLL) {
 		l2cap_send_i_or_rr_or_rnr(sk);
@@ -3502,8 +3504,6 @@ static inline void l2cap_data_channel_rrframe(struct sock *sk, u16 rx_control)
 
 	} else if (rx_control & L2CAP_CTRL_FINAL) {
 		pi->conn_state &= ~L2CAP_CONN_REMOTE_BUSY;
-		pi->expected_ack_seq = tx_seq;
-		l2cap_drop_acked_frames(sk);
 
 		if (pi->conn_state & L2CAP_CONN_REJ_ACT)
 			pi->conn_state &= ~L2CAP_CONN_REJ_ACT;
@@ -3514,9 +3514,6 @@ static inline void l2cap_data_channel_rrframe(struct sock *sk, u16 rx_control)
 		}
 
 	} else {
-		pi->expected_ack_seq = tx_seq;
-		l2cap_drop_acked_frames(sk);
-
 		if ((pi->conn_state & L2CAP_CONN_REMOTE_BUSY) &&
 				(pi->unacked_frames > 0))
 			__mod_retrans_timer();
-- 
cgit v1.2.2


From 8abb52ee00c4b3f857269eb6b7145991bab869bf Mon Sep 17 00:00:00 2001
From: "Gustavo F. Padovan" <padovan@profusion.mobi>
Date: Sat, 1 May 2010 16:15:38 -0300
Subject: Bluetooth: Remove duplicate use of __get_reqseq() macro on L2CAP
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

tx_seq var already has the value of __get_reqseq().

Signed-off-by: Gustavo F. Padovan <padovan@profusion.mobi>
Reviewed-by: João Paulo Rechi Vita <jprvita@profusion.mobi>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 net/bluetooth/l2cap.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/bluetooth/l2cap.c b/net/bluetooth/l2cap.c
index d096c7c11ab5..e9ac9fb11917 100644
--- a/net/bluetooth/l2cap.c
+++ b/net/bluetooth/l2cap.c
@@ -3533,7 +3533,7 @@ static inline void l2cap_data_channel_rejframe(struct sock *sk, u16 rx_control)
 
 	pi->conn_state &= ~L2CAP_CONN_REMOTE_BUSY;
 
-	pi->expected_ack_seq = __get_reqseq(rx_control);
+	pi->expected_ack_seq = tx_seq;
 	l2cap_drop_acked_frames(sk);
 
 	if (rx_control & L2CAP_CTRL_FINAL) {
-- 
cgit v1.2.2


From 05fbd89dd4153341717b33d9e8ae8bd29db6c1c8 Mon Sep 17 00:00:00 2001
From: "Gustavo F. Padovan" <padovan@profusion.mobi>
Date: Sat, 1 May 2010 16:15:39 -0300
Subject: Bluetooth: Finish implementation for Rec RR (P=1) on ERTM
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Now the code handles the case under SREJ_SENT state.

Signed-off-by: Gustavo F. Padovan <padovan@profusion.mobi>
Reviewed-by: João Paulo Rechi Vita <jprvita@profusion.mobi>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 net/bluetooth/l2cap.c | 13 +++++++++++--
 1 file changed, 11 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/bluetooth/l2cap.c b/net/bluetooth/l2cap.c
index e9ac9fb11917..f3869857ee9f 100644
--- a/net/bluetooth/l2cap.c
+++ b/net/bluetooth/l2cap.c
@@ -3499,8 +3499,17 @@ static inline void l2cap_data_channel_rrframe(struct sock *sk, u16 rx_control)
 	l2cap_drop_acked_frames(sk);
 
 	if (rx_control & L2CAP_CTRL_POLL) {
-		l2cap_send_i_or_rr_or_rnr(sk);
-		pi->conn_state &= ~L2CAP_CONN_REMOTE_BUSY;
+		if (pi->conn_state & L2CAP_CONN_SREJ_SENT) {
+			if ((pi->conn_state & L2CAP_CONN_REMOTE_BUSY) &&
+					(pi->unacked_frames > 0))
+				__mod_retrans_timer();
+
+			pi->conn_state &= ~L2CAP_CONN_REMOTE_BUSY;
+			l2cap_send_srejtail(sk);
+		} else {
+			l2cap_send_i_or_rr_or_rnr(sk);
+			pi->conn_state &= ~L2CAP_CONN_REMOTE_BUSY;
+		}
 
 	} else if (rx_control & L2CAP_CTRL_FINAL) {
 		pi->conn_state &= ~L2CAP_CONN_REMOTE_BUSY;
-- 
cgit v1.2.2


From c1b4f43be01c2a363be021485dd18cca33cfab8a Mon Sep 17 00:00:00 2001
From: "Gustavo F. Padovan" <padovan@profusion.mobi>
Date: Sat, 1 May 2010 16:15:39 -0300
Subject: Bluetooth: Add timer to Acknowledge I-frames
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

We ack I-frames on each txWindow/5 I-frames received, but if the sender
stop to send I-frames and it's not a txWindow multiple we can leave some
frames unacked.
So I added a timer to ack I-frames on this case. The timer expires in
200ms.

Signed-off-by: Gustavo F. Padovan <padovan@profusion.mobi>
Reviewed-by: João Paulo Rechi Vita <jprvita@profusion.mobi>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 net/bluetooth/l2cap.c | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

(limited to 'net')

diff --git a/net/bluetooth/l2cap.c b/net/bluetooth/l2cap.c
index f3869857ee9f..03006568f8a1 100644
--- a/net/bluetooth/l2cap.c
+++ b/net/bluetooth/l2cap.c
@@ -2235,6 +2235,15 @@ static void l2cap_add_conf_opt(void **ptr, u8 type, u8 len, unsigned long val)
 	*ptr += L2CAP_CONF_OPT_SIZE + len;
 }
 
+static void l2cap_ack_timeout(unsigned long arg)
+{
+	struct sock *sk = (void *) arg;
+
+	bh_lock_sock(sk);
+	l2cap_send_ack(l2cap_pi(sk));
+	bh_unlock_sock(sk);
+}
+
 static inline void l2cap_ertm_init(struct sock *sk)
 {
 	l2cap_pi(sk)->expected_ack_seq = 0;
@@ -2247,6 +2256,8 @@ static inline void l2cap_ertm_init(struct sock *sk)
 			l2cap_retrans_timeout, (unsigned long) sk);
 	setup_timer(&l2cap_pi(sk)->monitor_timer,
 			l2cap_monitor_timeout, (unsigned long) sk);
+	setup_timer(&l2cap_pi(sk)->ack_timer,
+			l2cap_ack_timeout, (unsigned long) sk);
 
 	__skb_queue_head_init(SREJ_QUEUE(sk));
 }
@@ -2975,6 +2986,7 @@ static inline int l2cap_disconnect_req(struct l2cap_conn *conn, struct l2cap_cmd
 		skb_queue_purge(SREJ_QUEUE(sk));
 		del_timer(&l2cap_pi(sk)->retrans_timer);
 		del_timer(&l2cap_pi(sk)->monitor_timer);
+		del_timer(&l2cap_pi(sk)->ack_timer);
 	}
 
 	l2cap_chan_del(sk, ECONNRESET);
@@ -3005,6 +3017,7 @@ static inline int l2cap_disconnect_rsp(struct l2cap_conn *conn, struct l2cap_cmd
 		skb_queue_purge(SREJ_QUEUE(sk));
 		del_timer(&l2cap_pi(sk)->retrans_timer);
 		del_timer(&l2cap_pi(sk)->monitor_timer);
+		del_timer(&l2cap_pi(sk)->ack_timer);
 	}
 
 	l2cap_chan_del(sk, 0);
@@ -3484,6 +3497,8 @@ expected:
 	if (err < 0)
 		return err;
 
+	__mod_ack_timer();
+
 	pi->num_to_ack = (pi->num_to_ack + 1) % L2CAP_DEFAULT_NUM_TO_ACK;
 	if (pi->num_to_ack == L2CAP_DEFAULT_NUM_TO_ACK - 1)
 		l2cap_send_ack(pi);
-- 
cgit v1.2.2


From 2fb862e215e53630066c677e06d7551fa38bf235 Mon Sep 17 00:00:00 2001
From: "Gustavo F. Padovan" <padovan@profusion.mobi>
Date: Sat, 1 May 2010 16:15:39 -0300
Subject: Bluetooth: Ignore Tx Window value with Streaming mode
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Tx Window value shall not be used with Streaming Mode and the receiver
of the config Request shall ignore its value.

Signed-off-by: Gustavo F. Padovan <padovan@profusion.mobi>
Reviewed-by: João Paulo Rechi Vita <jprvita@profusion.mobi>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 net/bluetooth/l2cap.c | 1 -
 1 file changed, 1 deletion(-)

(limited to 'net')

diff --git a/net/bluetooth/l2cap.c b/net/bluetooth/l2cap.c
index 03006568f8a1..f604405fe667 100644
--- a/net/bluetooth/l2cap.c
+++ b/net/bluetooth/l2cap.c
@@ -2495,7 +2495,6 @@ done:
 			break;
 
 		case L2CAP_MODE_STREAMING:
-			pi->remote_tx_win = rfc.txwin_size;
 			pi->max_pdu_size = rfc.max_pdu_size;
 
 			pi->conf_state |= L2CAP_CONF_MODE_DONE;
-- 
cgit v1.2.2


From 7b1c0049be3aabc18831ada339dbcf41ba8c81fd Mon Sep 17 00:00:00 2001
From: "Gustavo F. Padovan" <padovan@profusion.mobi>
Date: Sat, 1 May 2010 16:15:39 -0300
Subject: Bluetooth: Read RFC conf option on a successful Conf RSP
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

On Enhanced Retransmission Mode and Streaming Mode a entity can send, on
a successful Conf RSP, new values for the RFC fields. For example, the
entity can send txWindow and MPS values less than the value received on
a Conf REQ.

Signed-off-by: Gustavo F. Padovan <padovan@profusion.mobi>
Reviewed-by: João Paulo Rechi Vita <jprvita@profusion.mobi>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 net/bluetooth/l2cap.c | 39 ++++++++++++++++++++++++++++++++++++++-
 1 file changed, 38 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/bluetooth/l2cap.c b/net/bluetooth/l2cap.c
index f604405fe667..c50c05738fb6 100644
--- a/net/bluetooth/l2cap.c
+++ b/net/bluetooth/l2cap.c
@@ -2602,6 +2602,42 @@ static int l2cap_build_conf_rsp(struct sock *sk, void *data, u16 result, u16 fla
 	return ptr - data;
 }
 
+static void l2cap_conf_rfc_get(struct sock *sk, void *rsp, int len)
+{
+	struct l2cap_pinfo *pi = l2cap_pi(sk);
+	int type, olen;
+	unsigned long val;
+	struct l2cap_conf_rfc rfc;
+
+	BT_DBG("sk %p, rsp %p, len %d", sk, rsp, len);
+
+	if ((pi->mode != L2CAP_MODE_ERTM) && (pi->mode != L2CAP_MODE_STREAMING))
+		return;
+
+	while (len >= L2CAP_CONF_OPT_SIZE) {
+		len -= l2cap_get_conf_opt(&rsp, &type, &olen, &val);
+
+		switch (type) {
+		case L2CAP_CONF_RFC:
+			if (olen == sizeof(rfc))
+				memcpy(&rfc, (void *)val, olen);
+			goto done;
+		}
+	}
+
+done:
+	switch (rfc.mode) {
+	case L2CAP_MODE_ERTM:
+		pi->remote_tx_win   = rfc.txwin_size;
+		pi->retrans_timeout = rfc.retrans_timeout;
+		pi->monitor_timeout = rfc.monitor_timeout;
+		pi->mps    = le16_to_cpu(rfc.max_pdu_size);
+		break;
+	case L2CAP_MODE_STREAMING:
+		pi->mps    = le16_to_cpu(rfc.max_pdu_size);
+	}
+}
+
 static inline int l2cap_command_rej(struct l2cap_conn *conn, struct l2cap_cmd_hdr *cmd, u8 *data)
 {
 	struct l2cap_cmd_rej *rej = (struct l2cap_cmd_rej *) data;
@@ -2881,6 +2917,7 @@ static inline int l2cap_config_rsp(struct l2cap_conn *conn, struct l2cap_cmd_hdr
 	struct l2cap_conf_rsp *rsp = (struct l2cap_conf_rsp *)data;
 	u16 scid, flags, result;
 	struct sock *sk;
+	int len = cmd->len - sizeof(*rsp);
 
 	scid   = __le16_to_cpu(rsp->scid);
 	flags  = __le16_to_cpu(rsp->flags);
@@ -2895,11 +2932,11 @@ static inline int l2cap_config_rsp(struct l2cap_conn *conn, struct l2cap_cmd_hdr
 
 	switch (result) {
 	case L2CAP_CONF_SUCCESS:
+		l2cap_conf_rfc_get(sk, rsp->data, len);
 		break;
 
 	case L2CAP_CONF_UNACCEPT:
 		if (l2cap_pi(sk)->num_conf_rsp <= L2CAP_CONF_MAX_CONF_RSP) {
-			int len = cmd->len - sizeof(*rsp);
 			char req[64];
 
 			if (len > sizeof(req) - sizeof(struct l2cap_conf_req)) {
-- 
cgit v1.2.2


From 1c7621596d11b9c3e19eb88a818758dee4901c95 Mon Sep 17 00:00:00 2001
From: "Gustavo F. Padovan" <padovan@profusion.mobi>
Date: Sat, 1 May 2010 16:15:40 -0300
Subject: Bluetooth: Fix configuration of the MPS value
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

We were accepting values bigger than we can accept. This was leading
ERTM to drop packets because of wrong FCS checks.

Signed-off-by: Gustavo F. Padovan <padovan@profusion.mobi>
Reviewed-by: João Paulo Rechi Vita <jprvita@profusion.mobi>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 net/bluetooth/l2cap.c | 36 ++++++++++++++++++++----------------
 1 file changed, 20 insertions(+), 16 deletions(-)

(limited to 'net')

diff --git a/net/bluetooth/l2cap.c b/net/bluetooth/l2cap.c
index c50c05738fb6..94be5dbb2569 100644
--- a/net/bluetooth/l2cap.c
+++ b/net/bluetooth/l2cap.c
@@ -1606,21 +1606,21 @@ static inline int l2cap_sar_segment_sdu(struct sock *sk, struct msghdr *msg, siz
 
 	__skb_queue_head_init(&sar_queue);
 	control = L2CAP_SDU_START;
-	skb = l2cap_create_iframe_pdu(sk, msg, pi->max_pdu_size, control, len);
+	skb = l2cap_create_iframe_pdu(sk, msg, pi->remote_mps, control, len);
 	if (IS_ERR(skb))
 		return PTR_ERR(skb);
 
 	__skb_queue_tail(&sar_queue, skb);
-	len -= pi->max_pdu_size;
-	size +=pi->max_pdu_size;
+	len -= pi->remote_mps;
+	size += pi->remote_mps;
 	control = 0;
 
 	while (len > 0) {
 		size_t buflen;
 
-		if (len > pi->max_pdu_size) {
+		if (len > pi->remote_mps) {
 			control |= L2CAP_SDU_CONTINUE;
-			buflen = pi->max_pdu_size;
+			buflen = pi->remote_mps;
 		} else {
 			control |= L2CAP_SDU_END;
 			buflen = len;
@@ -1701,7 +1701,7 @@ static int l2cap_sock_sendmsg(struct kiocb *iocb, struct socket *sock, struct ms
 	case L2CAP_MODE_ERTM:
 	case L2CAP_MODE_STREAMING:
 		/* Entire SDU fits into one PDU */
-		if (len <= pi->max_pdu_size) {
+		if (len <= pi->remote_mps) {
 			control = L2CAP_SDU_UNSEGMENTED;
 			skb = l2cap_create_iframe_pdu(sk, msg, len, control, 0);
 			if (IS_ERR(skb)) {
@@ -2330,7 +2330,7 @@ done:
 		rfc.monitor_timeout = 0;
 		rfc.max_pdu_size    = cpu_to_le16(L2CAP_DEFAULT_MAX_PDU_SIZE);
 		if (L2CAP_DEFAULT_MAX_PDU_SIZE > pi->conn->mtu - 10)
-			rfc.max_pdu_size = pi->conn->mtu - 10;
+			rfc.max_pdu_size = cpu_to_le16(pi->conn->mtu - 10);
 
 		l2cap_add_conf_opt(&ptr, L2CAP_CONF_RFC,
 					sizeof(rfc), (unsigned long) &rfc);
@@ -2353,7 +2353,7 @@ done:
 		rfc.monitor_timeout = 0;
 		rfc.max_pdu_size    = cpu_to_le16(L2CAP_DEFAULT_MAX_PDU_SIZE);
 		if (L2CAP_DEFAULT_MAX_PDU_SIZE > pi->conn->mtu - 10)
-			rfc.max_pdu_size = pi->conn->mtu - 10;
+			rfc.max_pdu_size = cpu_to_le16(pi->conn->mtu - 10);
 
 		l2cap_add_conf_opt(&ptr, L2CAP_CONF_RFC,
 					sizeof(rfc), (unsigned long) &rfc);
@@ -2482,7 +2482,10 @@ done:
 		case L2CAP_MODE_ERTM:
 			pi->remote_tx_win = rfc.txwin_size;
 			pi->remote_max_tx = rfc.max_transmit;
-			pi->max_pdu_size = rfc.max_pdu_size;
+			if (rfc.max_pdu_size > pi->conn->mtu - 10)
+				rfc.max_pdu_size = le16_to_cpu(pi->conn->mtu - 10);
+
+			pi->remote_mps = le16_to_cpu(rfc.max_pdu_size);
 
 			rfc.retrans_timeout = L2CAP_DEFAULT_RETRANS_TO;
 			rfc.monitor_timeout = L2CAP_DEFAULT_MONITOR_TO;
@@ -2495,7 +2498,10 @@ done:
 			break;
 
 		case L2CAP_MODE_STREAMING:
-			pi->max_pdu_size = rfc.max_pdu_size;
+			if (rfc.max_pdu_size > pi->conn->mtu - 10)
+				rfc.max_pdu_size = le16_to_cpu(pi->conn->mtu - 10);
+
+			pi->remote_mps = le16_to_cpu(rfc.max_pdu_size);
 
 			pi->conf_state |= L2CAP_CONF_MODE_DONE;
 
@@ -2574,11 +2580,10 @@ static int l2cap_parse_conf_rsp(struct sock *sk, void *rsp, int len, void *data,
 			pi->remote_tx_win   = rfc.txwin_size;
 			pi->retrans_timeout = rfc.retrans_timeout;
 			pi->monitor_timeout = rfc.monitor_timeout;
-			pi->max_pdu_size    = le16_to_cpu(rfc.max_pdu_size);
+			pi->mps    = le16_to_cpu(rfc.max_pdu_size);
 			break;
 		case L2CAP_MODE_STREAMING:
-			pi->max_pdu_size    = le16_to_cpu(rfc.max_pdu_size);
-			break;
+			pi->mps    = le16_to_cpu(rfc.max_pdu_size);
 		}
 	}
 
@@ -3753,7 +3758,7 @@ static inline int l2cap_data_channel(struct l2cap_conn *conn, u16 cid, struct sk
 		 * Receiver will miss it and start proper recovery
 		 * procedures and ask retransmission.
 		 */
-		if (len > L2CAP_DEFAULT_MAX_PDU_SIZE)
+		if (len > pi->mps)
 			goto drop;
 
 		if (l2cap_check_fcs(pi, skb))
@@ -3784,8 +3789,7 @@ static inline int l2cap_data_channel(struct l2cap_conn *conn, u16 cid, struct sk
 		if (pi->fcs == L2CAP_FCS_CRC16)
 			len -= 2;
 
-		if (len > L2CAP_DEFAULT_MAX_PDU_SIZE || len < 4
-				|| __is_sframe(control))
+		if (len > pi->mps || len < 4 || __is_sframe(control))
 			goto drop;
 
 		if (l2cap_check_fcs(pi, skb))
-- 
cgit v1.2.2


From 10467e9e9b89272b25b56688bb276d0830e9ab9a Mon Sep 17 00:00:00 2001
From: "Gustavo F. Padovan" <padovan@profusion.mobi>
Date: Sat, 1 May 2010 16:15:40 -0300
Subject: Bluetooth: Add le16 macro to Retransmission and Monitor Timeouts
 values
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Fix a possible problem with Big Endian machines.

Signed-off-by: Gustavo F. Padovan <padovan@profusion.mobi>
Reviewed-by: João Paulo Rechi Vita <jprvita@profusion.mobi>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 net/bluetooth/l2cap.c | 14 ++++++++------
 1 file changed, 8 insertions(+), 6 deletions(-)

(limited to 'net')

diff --git a/net/bluetooth/l2cap.c b/net/bluetooth/l2cap.c
index 94be5dbb2569..0889949b6896 100644
--- a/net/bluetooth/l2cap.c
+++ b/net/bluetooth/l2cap.c
@@ -2487,8 +2487,10 @@ done:
 
 			pi->remote_mps = le16_to_cpu(rfc.max_pdu_size);
 
-			rfc.retrans_timeout = L2CAP_DEFAULT_RETRANS_TO;
-			rfc.monitor_timeout = L2CAP_DEFAULT_MONITOR_TO;
+			rfc.retrans_timeout =
+				le16_to_cpu(L2CAP_DEFAULT_RETRANS_TO);
+			rfc.monitor_timeout =
+				le16_to_cpu(L2CAP_DEFAULT_MONITOR_TO);
 
 			pi->conf_state |= L2CAP_CONF_MODE_DONE;
 
@@ -2578,8 +2580,8 @@ static int l2cap_parse_conf_rsp(struct sock *sk, void *rsp, int len, void *data,
 		switch (rfc.mode) {
 		case L2CAP_MODE_ERTM:
 			pi->remote_tx_win   = rfc.txwin_size;
-			pi->retrans_timeout = rfc.retrans_timeout;
-			pi->monitor_timeout = rfc.monitor_timeout;
+			pi->retrans_timeout = le16_to_cpu(rfc.retrans_timeout);
+			pi->monitor_timeout = le16_to_cpu(rfc.monitor_timeout);
 			pi->mps    = le16_to_cpu(rfc.max_pdu_size);
 			break;
 		case L2CAP_MODE_STREAMING:
@@ -2634,8 +2636,8 @@ done:
 	switch (rfc.mode) {
 	case L2CAP_MODE_ERTM:
 		pi->remote_tx_win   = rfc.txwin_size;
-		pi->retrans_timeout = rfc.retrans_timeout;
-		pi->monitor_timeout = rfc.monitor_timeout;
+		pi->retrans_timeout = le16_to_cpu(rfc.retrans_timeout);
+		pi->monitor_timeout = le16_to_cpu(rfc.monitor_timeout);
 		pi->mps    = le16_to_cpu(rfc.max_pdu_size);
 		break;
 	case L2CAP_MODE_STREAMING:
-- 
cgit v1.2.2


From 052897ca5019d9157ae09e5e84eee2a9ef5dccc6 Mon Sep 17 00:00:00 2001
From: "Gustavo F. Padovan" <padovan@profusion.mobi>
Date: Sat, 1 May 2010 16:15:40 -0300
Subject: Bluetooth: Check the SDU size against the MTU value
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

If the SDU size is greater than the MTU something is wrong, so report
an error.

Signed-off-by: Gustavo F. Padovan <padovan@profusion.mobi>
[jprvita@profusion.mobi: set err to appropriate errno value]
Signed-off-by: João Paulo Rechi Vita <jprvita@profusion.mobi>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 net/bluetooth/l2cap.c | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'net')

diff --git a/net/bluetooth/l2cap.c b/net/bluetooth/l2cap.c
index 0889949b6896..e936913c921e 100644
--- a/net/bluetooth/l2cap.c
+++ b/net/bluetooth/l2cap.c
@@ -3338,6 +3338,11 @@ static int l2cap_sar_reassembly_sdu(struct sock *sk, struct sk_buff *skb, u16 co
 		pi->sdu_len = get_unaligned_le16(skb->data);
 		skb_pull(skb, 2);
 
+		if (pi->sdu_len > pi->imtu) {
+			err = -EMSGSIZE;
+			break;
+		}
+
 		pi->sdu = bt_skb_alloc(pi->sdu_len, GFP_ATOMIC);
 		if (!pi->sdu) {
 			err = -ENOMEM;
-- 
cgit v1.2.2


From 855666cccc939d392316de17512e17a08b2fa05a Mon Sep 17 00:00:00 2001
From: "Gustavo F. Padovan" <padovan@profusion.mobi>
Date: Sat, 1 May 2010 16:15:40 -0300
Subject: Bluetooth: Send Ack after clear the SREJ list
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

As specified by Bluetooth 3.0 spec we shall send an acknowledgment using
the Send-Ack() after clear the SREJ list.

Signed-off-by: Gustavo F. Padovan <padovan@profusion.mobi>
Reviewed-by: João Paulo Rechi Vita <jprvita@profusion.mobi>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 net/bluetooth/l2cap.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'net')

diff --git a/net/bluetooth/l2cap.c b/net/bluetooth/l2cap.c
index e936913c921e..c6bc1b9ed657 100644
--- a/net/bluetooth/l2cap.c
+++ b/net/bluetooth/l2cap.c
@@ -3493,6 +3493,7 @@ static inline int l2cap_data_channel_iframe(struct sock *sk, u16 rx_control, str
 			if (list_empty(SREJ_LIST(sk))) {
 				pi->buffer_seq = pi->buffer_seq_srej;
 				pi->conn_state &= ~L2CAP_CONN_SREJ_SENT;
+				l2cap_send_ack(pi);
 			}
 		} else {
 			struct srej_list *l;
-- 
cgit v1.2.2


From 14b5aa71ec506f4e38ca6a1dc02ecd668ecfd902 Mon Sep 17 00:00:00 2001
From: "Gustavo F. Padovan" <padovan@profusion.mobi>
Date: Sat, 1 May 2010 16:15:40 -0300
Subject: Bluetooth: Add sockopt configuration for txWindow on L2CAP
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Now we can set/get Transmission Window size via sockopt.

Signed-off-by: Gustavo F. Padovan <padovan@profusion.mobi>
Reviewed-by: João Paulo Rechi Vita <jprvita@profusion.mobi>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 net/bluetooth/l2cap.c | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/bluetooth/l2cap.c b/net/bluetooth/l2cap.c
index c6bc1b9ed657..530079649b43 100644
--- a/net/bluetooth/l2cap.c
+++ b/net/bluetooth/l2cap.c
@@ -782,6 +782,7 @@ static void l2cap_sock_init(struct sock *sk, struct sock *parent)
 		pi->omtu = l2cap_pi(parent)->omtu;
 		pi->mode = l2cap_pi(parent)->mode;
 		pi->fcs  = l2cap_pi(parent)->fcs;
+		pi->tx_win = l2cap_pi(parent)->tx_win;
 		pi->sec_level = l2cap_pi(parent)->sec_level;
 		pi->role_switch = l2cap_pi(parent)->role_switch;
 		pi->force_reliable = l2cap_pi(parent)->force_reliable;
@@ -790,6 +791,7 @@ static void l2cap_sock_init(struct sock *sk, struct sock *parent)
 		pi->omtu = 0;
 		pi->mode = L2CAP_MODE_BASIC;
 		pi->fcs  = L2CAP_FCS_CRC16;
+		pi->tx_win = L2CAP_DEFAULT_TX_WINDOW;
 		pi->sec_level = BT_SECURITY_LOW;
 		pi->role_switch = 0;
 		pi->force_reliable = 0;
@@ -1782,6 +1784,7 @@ static int l2cap_sock_setsockopt_old(struct socket *sock, int optname, char __us
 		opts.flush_to = l2cap_pi(sk)->flush_to;
 		opts.mode     = l2cap_pi(sk)->mode;
 		opts.fcs      = l2cap_pi(sk)->fcs;
+		opts.txwin_size = (__u16)l2cap_pi(sk)->tx_win;
 
 		len = min_t(unsigned int, sizeof(opts), optlen);
 		if (copy_from_user((char *) &opts, optval, len)) {
@@ -1793,6 +1796,7 @@ static int l2cap_sock_setsockopt_old(struct socket *sock, int optname, char __us
 		l2cap_pi(sk)->omtu = opts.omtu;
 		l2cap_pi(sk)->mode = opts.mode;
 		l2cap_pi(sk)->fcs  = opts.fcs;
+		l2cap_pi(sk)->tx_win = (__u8)opts.txwin_size;
 		break;
 
 	case L2CAP_LM:
@@ -1907,6 +1911,7 @@ static int l2cap_sock_getsockopt_old(struct socket *sock, int optname, char __us
 		opts.flush_to = l2cap_pi(sk)->flush_to;
 		opts.mode     = l2cap_pi(sk)->mode;
 		opts.fcs      = l2cap_pi(sk)->fcs;
+		opts.txwin_size = (__u16)l2cap_pi(sk)->tx_win;
 
 		len = min_t(unsigned int, len, sizeof(opts));
 		if (copy_to_user(optval, (char *) &opts, len))
@@ -2324,7 +2329,7 @@ done:
 
 	case L2CAP_MODE_ERTM:
 		rfc.mode            = L2CAP_MODE_ERTM;
-		rfc.txwin_size      = L2CAP_DEFAULT_TX_WINDOW;
+		rfc.txwin_size      = pi->tx_win;
 		rfc.max_transmit    = max_transmit;
 		rfc.retrans_timeout = 0;
 		rfc.monitor_timeout = 0;
-- 
cgit v1.2.2


From 803020c6fa63aa738cfda3329c9675b42023e9d2 Mon Sep 17 00:00:00 2001
From: "Gustavo F. Padovan" <padovan@profusion.mobi>
Date: Sat, 1 May 2010 16:15:41 -0300
Subject: Bluetooth: Change acknowledgement to use the value of txWindow
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Now that we can set the txWindow we need to change the acknowledgement
procedure to ack after each (pi->txWindow/6 + 1). The plus 1 is to avoid
the zero value.
It also renames pi->num_to_ack to a better name: pi->num_acked.

Signed-off-by: Gustavo F. Padovan <padovan@profusion.mobi>
Reviewed-by: João Paulo Rechi Vita <jprvita@profusion.mobi>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 net/bluetooth/l2cap.c | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

(limited to 'net')

diff --git a/net/bluetooth/l2cap.c b/net/bluetooth/l2cap.c
index 530079649b43..0b0b237bb786 100644
--- a/net/bluetooth/l2cap.c
+++ b/net/bluetooth/l2cap.c
@@ -2254,7 +2254,7 @@ static inline void l2cap_ertm_init(struct sock *sk)
 	l2cap_pi(sk)->expected_ack_seq = 0;
 	l2cap_pi(sk)->unacked_frames = 0;
 	l2cap_pi(sk)->buffer_seq = 0;
-	l2cap_pi(sk)->num_to_ack = 0;
+	l2cap_pi(sk)->num_acked = 0;
 	l2cap_pi(sk)->frames_sent = 0;
 
 	setup_timer(&l2cap_pi(sk)->retrans_timer,
@@ -3466,6 +3466,7 @@ static inline int l2cap_data_channel_iframe(struct sock *sk, u16 rx_control, str
 	u8 tx_seq = __get_txseq(rx_control);
 	u8 req_seq = __get_reqseq(rx_control);
 	u8 sar = rx_control >> L2CAP_CTRL_SAR_SHIFT;
+	int num_to_ack = (pi->tx_win/6) + 1;
 	int err = 0;
 
 	BT_DBG("sk %p rx_control 0x%4.4x len %d", sk, rx_control, skb->len);
@@ -3553,8 +3554,8 @@ expected:
 
 	__mod_ack_timer();
 
-	pi->num_to_ack = (pi->num_to_ack + 1) % L2CAP_DEFAULT_NUM_TO_ACK;
-	if (pi->num_to_ack == L2CAP_DEFAULT_NUM_TO_ACK - 1)
+	pi->num_acked = (pi->num_acked + 1) % num_to_ack;
+	if (pi->num_acked == num_to_ack - 1)
 		l2cap_send_ack(pi);
 
 	return 0;
-- 
cgit v1.2.2


From 369ba30264826f38eefc61b93688100be8adbd4d Mon Sep 17 00:00:00 2001
From: "Gustavo F. Padovan" <padovan@profusion.mobi>
Date: Sat, 1 May 2010 16:15:41 -0300
Subject: Bluetooth: Add module parameter for txWindow size on L2CAP
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Very useful for testing purposes.

Signed-off-by: Gustavo F. Padovan <padovan@profusion.mobi>
[jprvita@profusion.mobi: improved parameter description]
Signed-off-by: João Paulo Rechi Vita <jprvita@profusion.mobi>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 net/bluetooth/l2cap.c | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/bluetooth/l2cap.c b/net/bluetooth/l2cap.c
index 0b0b237bb786..ff1466b21580 100644
--- a/net/bluetooth/l2cap.c
+++ b/net/bluetooth/l2cap.c
@@ -57,6 +57,7 @@
 
 static int enable_ertm = 0;
 static int max_transmit = L2CAP_DEFAULT_MAX_TX;
+static int tx_window = L2CAP_DEFAULT_TX_WINDOW;
 
 static u32 l2cap_feat_mask = L2CAP_FEAT_FIXED_CHAN;
 static u8 l2cap_fixed_chan[8] = { 0x02, };
@@ -791,7 +792,7 @@ static void l2cap_sock_init(struct sock *sk, struct sock *parent)
 		pi->omtu = 0;
 		pi->mode = L2CAP_MODE_BASIC;
 		pi->fcs  = L2CAP_FCS_CRC16;
-		pi->tx_win = L2CAP_DEFAULT_TX_WINDOW;
+		pi->tx_win = tx_window;
 		pi->sec_level = BT_SECURITY_LOW;
 		pi->role_switch = 0;
 		pi->force_reliable = 0;
@@ -4296,6 +4297,9 @@ MODULE_PARM_DESC(enable_ertm, "Enable enhanced retransmission mode");
 module_param(max_transmit, uint, 0644);
 MODULE_PARM_DESC(max_transmit, "Max transmit value (default = 3)");
 
+module_param(tx_window, uint, 0644);
+MODULE_PARM_DESC(tx_window, "Transmission window size value (default = 63)");
+
 MODULE_AUTHOR("Marcel Holtmann <marcel@holtmann.org>");
 MODULE_DESCRIPTION("Bluetooth L2CAP ver " VERSION);
 MODULE_VERSION(VERSION);
-- 
cgit v1.2.2


From 68d7f0ce911e41e463c45911be031cdf6a096fe8 Mon Sep 17 00:00:00 2001
From: "Gustavo F. Padovan" <padovan@profusion.mobi>
Date: Sat, 1 May 2010 16:15:41 -0300
Subject: Bluetooth: Enable option to configure Max Transmission value via
 sockopt
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

With the sockopt extension we can set a per-channel MaxTx value.

Signed-off-by: Gustavo F. Padovan <padovan@profusion.mobi>
Reviewed-by: João Paulo Rechi Vita <jprvita@profusion.mobi>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 net/bluetooth/l2cap.c | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/bluetooth/l2cap.c b/net/bluetooth/l2cap.c
index ff1466b21580..f9e4da2677af 100644
--- a/net/bluetooth/l2cap.c
+++ b/net/bluetooth/l2cap.c
@@ -783,6 +783,7 @@ static void l2cap_sock_init(struct sock *sk, struct sock *parent)
 		pi->omtu = l2cap_pi(parent)->omtu;
 		pi->mode = l2cap_pi(parent)->mode;
 		pi->fcs  = l2cap_pi(parent)->fcs;
+		pi->max_tx = l2cap_pi(parent)->max_tx;
 		pi->tx_win = l2cap_pi(parent)->tx_win;
 		pi->sec_level = l2cap_pi(parent)->sec_level;
 		pi->role_switch = l2cap_pi(parent)->role_switch;
@@ -791,6 +792,7 @@ static void l2cap_sock_init(struct sock *sk, struct sock *parent)
 		pi->imtu = L2CAP_DEFAULT_MTU;
 		pi->omtu = 0;
 		pi->mode = L2CAP_MODE_BASIC;
+		pi->max_tx = max_transmit;
 		pi->fcs  = L2CAP_FCS_CRC16;
 		pi->tx_win = tx_window;
 		pi->sec_level = BT_SECURITY_LOW;
@@ -1785,6 +1787,7 @@ static int l2cap_sock_setsockopt_old(struct socket *sock, int optname, char __us
 		opts.flush_to = l2cap_pi(sk)->flush_to;
 		opts.mode     = l2cap_pi(sk)->mode;
 		opts.fcs      = l2cap_pi(sk)->fcs;
+		opts.max_tx   = l2cap_pi(sk)->max_tx;
 		opts.txwin_size = (__u16)l2cap_pi(sk)->tx_win;
 
 		len = min_t(unsigned int, sizeof(opts), optlen);
@@ -1797,6 +1800,7 @@ static int l2cap_sock_setsockopt_old(struct socket *sock, int optname, char __us
 		l2cap_pi(sk)->omtu = opts.omtu;
 		l2cap_pi(sk)->mode = opts.mode;
 		l2cap_pi(sk)->fcs  = opts.fcs;
+		l2cap_pi(sk)->max_tx = opts.max_tx;
 		l2cap_pi(sk)->tx_win = (__u8)opts.txwin_size;
 		break;
 
@@ -1912,6 +1916,7 @@ static int l2cap_sock_getsockopt_old(struct socket *sock, int optname, char __us
 		opts.flush_to = l2cap_pi(sk)->flush_to;
 		opts.mode     = l2cap_pi(sk)->mode;
 		opts.fcs      = l2cap_pi(sk)->fcs;
+		opts.max_tx   = l2cap_pi(sk)->max_tx;
 		opts.txwin_size = (__u16)l2cap_pi(sk)->tx_win;
 
 		len = min_t(unsigned int, len, sizeof(opts));
@@ -2331,7 +2336,7 @@ done:
 	case L2CAP_MODE_ERTM:
 		rfc.mode            = L2CAP_MODE_ERTM;
 		rfc.txwin_size      = pi->tx_win;
-		rfc.max_transmit    = max_transmit;
+		rfc.max_transmit    = pi->max_tx;
 		rfc.retrans_timeout = 0;
 		rfc.monitor_timeout = 0;
 		rfc.max_pdu_size    = cpu_to_le16(L2CAP_DEFAULT_MAX_PDU_SIZE);
-- 
cgit v1.2.2


From f6e6b16823de0aff31cb8ee8c098383e3aceec58 Mon Sep 17 00:00:00 2001
From: "Gustavo F. Padovan" <padovan@profusion.mobi>
Date: Sat, 1 May 2010 16:15:41 -0300
Subject: Bluetooth: Fix bug when retransmitting I-frames
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

If there is no frames to retransmit l2cap was crashing the kernel, now
we check if the queue is empty first.

Signed-off-by: Gustavo F. Padovan <padovan@profusion.mobi>
Reviewed-by: João Paulo Rechi Vita <jprvita@profusion.mobi>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 net/bluetooth/l2cap.c | 12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)

(limited to 'net')

diff --git a/net/bluetooth/l2cap.c b/net/bluetooth/l2cap.c
index f9e4da2677af..4c7b2d22faa5 100644
--- a/net/bluetooth/l2cap.c
+++ b/net/bluetooth/l2cap.c
@@ -3546,7 +3546,8 @@ expected:
 		if (pi->conn_state & L2CAP_CONN_REJ_ACT)
 			pi->conn_state &= ~L2CAP_CONN_REJ_ACT;
 		else {
-			sk->sk_send_head = TX_QUEUE(sk)->next;
+			if (!skb_queue_empty(TX_QUEUE(sk)))
+				sk->sk_send_head = TX_QUEUE(sk)->next;
 			pi->next_tx_seq = pi->expected_ack_seq;
 			l2cap_ertm_send(sk);
 		}
@@ -3593,7 +3594,8 @@ static inline void l2cap_data_channel_rrframe(struct sock *sk, u16 rx_control)
 		if (pi->conn_state & L2CAP_CONN_REJ_ACT)
 			pi->conn_state &= ~L2CAP_CONN_REJ_ACT;
 		else {
-			sk->sk_send_head = TX_QUEUE(sk)->next;
+			if (!skb_queue_empty(TX_QUEUE(sk)))
+				sk->sk_send_head = TX_QUEUE(sk)->next;
 			pi->next_tx_seq = pi->expected_ack_seq;
 			l2cap_ertm_send(sk);
 		}
@@ -3625,12 +3627,14 @@ static inline void l2cap_data_channel_rejframe(struct sock *sk, u16 rx_control)
 		if (pi->conn_state & L2CAP_CONN_REJ_ACT)
 			pi->conn_state &= ~L2CAP_CONN_REJ_ACT;
 		else {
-			sk->sk_send_head = TX_QUEUE(sk)->next;
+			if (!skb_queue_empty(TX_QUEUE(sk)))
+				sk->sk_send_head = TX_QUEUE(sk)->next;
 			pi->next_tx_seq = pi->expected_ack_seq;
 			l2cap_ertm_send(sk);
 		}
 	} else {
-		sk->sk_send_head = TX_QUEUE(sk)->next;
+		if (!skb_queue_empty(TX_QUEUE(sk)))
+			sk->sk_send_head = TX_QUEUE(sk)->next;
 		pi->next_tx_seq = pi->expected_ack_seq;
 		l2cap_ertm_send(sk);
 
-- 
cgit v1.2.2


From 0ee0d20855ae9271de3f6695f4cafc08ab2533bb Mon Sep 17 00:00:00 2001
From: "Gustavo F. Padovan" <padovan@profusion.mobi>
Date: Sat, 1 May 2010 16:15:41 -0300
Subject: Bluetooth: Fix crash when monitor timeout expires
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The code was crashing due to a invalid access to hci_conn after the
channel disconnect.

Signed-off-by: Gustavo F. Padovan <padovan@profusion.mobi>
Reviewed-by: João Paulo Rechi Vita <jprvita@profusion.mobi>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 net/bluetooth/l2cap.c | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'net')

diff --git a/net/bluetooth/l2cap.c b/net/bluetooth/l2cap.c
index 4c7b2d22faa5..2f9bbad42887 100644
--- a/net/bluetooth/l2cap.c
+++ b/net/bluetooth/l2cap.c
@@ -1568,6 +1568,9 @@ static struct sk_buff *l2cap_create_iframe_pdu(struct sock *sk, struct msghdr *m
 
 	BT_DBG("sk %p len %d", sk, (int)len);
 
+	if (!conn)
+		return ERR_PTR(-ENOTCONN);
+
 	if (sdulen)
 		hlen += 2;
 
-- 
cgit v1.2.2


From 812e737e29a1d559e7bfbea675fdcfcbad9f5e1f Mon Sep 17 00:00:00 2001
From: "Gustavo F. Padovan" <padovan@profusion.mobi>
Date: Sat, 1 May 2010 16:15:42 -0300
Subject: Bluetooth: Fix drop of acked packets on ERTM
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

l2cap_drop_acked_frames() was droping not sent packets, causing them to
be not transmitted.

Signed-off-by: Gustavo F. Padovan <padovan@profusion.mobi>
Reviewed-by: João Paulo Rechi Vita <jprvita@profusion.mobi>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 net/bluetooth/l2cap.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/bluetooth/l2cap.c b/net/bluetooth/l2cap.c
index 2f9bbad42887..042fd967e79c 100644
--- a/net/bluetooth/l2cap.c
+++ b/net/bluetooth/l2cap.c
@@ -1262,7 +1262,8 @@ static void l2cap_drop_acked_frames(struct sock *sk)
 {
 	struct sk_buff *skb;
 
-	while ((skb = skb_peek(TX_QUEUE(sk)))) {
+	while ((skb = skb_peek(TX_QUEUE(sk))) &&
+			l2cap_pi(sk)->unacked_frames) {
 		if (bt_cb(skb)->tx_seq == l2cap_pi(sk)->expected_ack_seq)
 			break;
 
-- 
cgit v1.2.2


From 3b1a9f3fa6ad842991538da2c3b2e29e047b131f Mon Sep 17 00:00:00 2001
From: "Gustavo F. Padovan" <padovan@profusion.mobi>
Date: Sat, 1 May 2010 16:15:42 -0300
Subject: Bluetooth: Optimize SREJ_QUEUE append
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

When the I-frame received is the expected, i.e., its tx_seq is equal to
expected_tx_seq and we are under a SREJ, we can just add it to the tail
of the list. Doing that we change the complexity from O(n) to O(1).

Signed-off-by: Gustavo F. Padovan <padovan@profusion.mobi>
Reviewed-by: João Paulo Rechi Vita <jprvita@profusion.mobi>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 net/bluetooth/l2cap.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/bluetooth/l2cap.c b/net/bluetooth/l2cap.c
index 042fd967e79c..187f46dd8309 100644
--- a/net/bluetooth/l2cap.c
+++ b/net/bluetooth/l2cap.c
@@ -3542,7 +3542,9 @@ expected:
 	pi->expected_tx_seq = (pi->expected_tx_seq + 1) % 64;
 
 	if (pi->conn_state & L2CAP_CONN_SREJ_SENT) {
-		l2cap_add_to_srej_queue(sk, skb, tx_seq, sar);
+		bt_cb(skb)->tx_seq = tx_seq;
+		bt_cb(skb)->sar = sar;
+		__skb_queue_tail(SREJ_QUEUE(sk), skb);
 		return 0;
 	}
 
-- 
cgit v1.2.2


From 84fb0a6334af0ccad3544f6972c055d90fbb9fbe Mon Sep 17 00:00:00 2001
From: "Gustavo F. Padovan" <padovan@profusion.mobi>
Date: Sat, 1 May 2010 16:15:42 -0300
Subject: Bluetooth: Add Kconfig option for L2CAP Extended Features
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The L2CAP Extended Features are still unstable and under development,
so we are adding them under the EXPERIMENTAL flag to get more feedback
on them. L2CAP Extended Features includes the Enhanced Retransmission
and Streaming Modes, Frame Check Sequence (FCS), and Segmentation and
Reassemby (SAR).

Signed-off-by: Gustavo F. Padovan <padovan@profusion.mobi>
Reviewed-by: João Paulo Rechi Vita <jprvita@profusion.mobi>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 net/bluetooth/Kconfig | 13 +++++++++++++
 net/bluetooth/l2cap.c |  4 ++++
 2 files changed, 17 insertions(+)

(limited to 'net')

diff --git a/net/bluetooth/Kconfig b/net/bluetooth/Kconfig
index ed371684c133..ee3b3049d385 100644
--- a/net/bluetooth/Kconfig
+++ b/net/bluetooth/Kconfig
@@ -43,6 +43,19 @@ config BT_L2CAP
 	  Say Y here to compile L2CAP support into the kernel or say M to
 	  compile it as module (l2cap).
 
+config BT_L2CAP_EXT_FEATURES
+	bool "L2CAP Extended Features support (EXPERIMENTAL)"
+	depends on BT_L2CAP && EXPERIMENTAL
+	help
+	  This option enables the L2CAP Extended Features support. These
+	  new features include the Enhanced Retransmission and Streaming
+	  Modes, the Frame Check Sequence (FCS), and Segmentation and
+	  Reassembly (SAR) for L2CAP packets. They are a required for the
+	  new Alternate MAC/PHY and the Bluetooth Medical Profile.
+
+	  You should say N unless you know what you are doing. Note that
+	  this is in an experimental state yet.
+
 config BT_SCO
 	tristate "SCO links support"
 	depends on BT
diff --git a/net/bluetooth/l2cap.c b/net/bluetooth/l2cap.c
index 187f46dd8309..4c007203d66b 100644
--- a/net/bluetooth/l2cap.c
+++ b/net/bluetooth/l2cap.c
@@ -55,7 +55,11 @@
 
 #define VERSION "2.14"
 
+#ifdef CONFIG_BT_L2CAP_EXT_FEATURES
+static int enable_ertm = 1;
+#else
 static int enable_ertm = 0;
+#endif
 static int max_transmit = L2CAP_DEFAULT_MAX_TX;
 static int tx_window = L2CAP_DEFAULT_TX_WINDOW;
 
-- 
cgit v1.2.2


From bd3c9e255e76ae232d417e3914ca5d80ca3e9485 Mon Sep 17 00:00:00 2001
From: "Gustavo F. Padovan" <padovan@profusion.mobi>
Date: Sat, 1 May 2010 16:15:42 -0300
Subject: Bluetooth: Add SOCK_STREAM support to L2CAP
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

if enable_ertm is true and we have SOCK_STREAM the default mode will be
ERTM, otherwise Basic Mode.

Signed-off-by: Gustavo F. Padovan <padovan@profusion.mobi>
Reviewed-by: João Paulo Rechi Vita <jprvita@profusion.mobi>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 net/bluetooth/l2cap.c | 40 ++++++++++++++++++++++++++--------------
 1 file changed, 26 insertions(+), 14 deletions(-)

(limited to 'net')

diff --git a/net/bluetooth/l2cap.c b/net/bluetooth/l2cap.c
index 4c007203d66b..1a32562adf46 100644
--- a/net/bluetooth/l2cap.c
+++ b/net/bluetooth/l2cap.c
@@ -224,7 +224,7 @@ static void __l2cap_chan_add(struct l2cap_conn *conn, struct sock *sk, struct so
 
 	l2cap_pi(sk)->conn = conn;
 
-	if (sk->sk_type == SOCK_SEQPACKET) {
+	if (sk->sk_type == SOCK_SEQPACKET || sk->sk_type == SOCK_STREAM) {
 		/* Alloc CID for connection-oriented socket */
 		l2cap_pi(sk)->scid = l2cap_alloc_cid(l);
 	} else if (sk->sk_type == SOCK_DGRAM) {
@@ -452,7 +452,8 @@ static void l2cap_conn_start(struct l2cap_conn *conn)
 	for (sk = l->head; sk; sk = l2cap_pi(sk)->next_c) {
 		bh_lock_sock(sk);
 
-		if (sk->sk_type != SOCK_SEQPACKET) {
+		if (sk->sk_type != SOCK_SEQPACKET &&
+				sk->sk_type != SOCK_STREAM) {
 			bh_unlock_sock(sk);
 			continue;
 		}
@@ -512,7 +513,8 @@ static void l2cap_conn_ready(struct l2cap_conn *conn)
 	for (sk = l->head; sk; sk = l2cap_pi(sk)->next_c) {
 		bh_lock_sock(sk);
 
-		if (sk->sk_type != SOCK_SEQPACKET) {
+		if (sk->sk_type != SOCK_SEQPACKET &&
+				sk->sk_type != SOCK_STREAM) {
 			l2cap_sock_clear_timer(sk);
 			sk->sk_state = BT_CONNECTED;
 			sk->sk_state_change(sk);
@@ -721,7 +723,8 @@ static void __l2cap_sock_close(struct sock *sk, int reason)
 
 	case BT_CONNECTED:
 	case BT_CONFIG:
-		if (sk->sk_type == SOCK_SEQPACKET) {
+		if (sk->sk_type == SOCK_SEQPACKET ||
+				sk->sk_type == SOCK_STREAM) {
 			struct l2cap_conn *conn = l2cap_pi(sk)->conn;
 
 			sk->sk_state = BT_DISCONN;
@@ -732,7 +735,8 @@ static void __l2cap_sock_close(struct sock *sk, int reason)
 		break;
 
 	case BT_CONNECT2:
-		if (sk->sk_type == SOCK_SEQPACKET) {
+		if (sk->sk_type == SOCK_SEQPACKET ||
+				sk->sk_type == SOCK_STREAM) {
 			struct l2cap_conn *conn = l2cap_pi(sk)->conn;
 			struct l2cap_conn_rsp rsp;
 			__u16 result;
@@ -795,7 +799,10 @@ static void l2cap_sock_init(struct sock *sk, struct sock *parent)
 	} else {
 		pi->imtu = L2CAP_DEFAULT_MTU;
 		pi->omtu = 0;
-		pi->mode = L2CAP_MODE_BASIC;
+		if (enable_ertm && sk->sk_type == SOCK_STREAM)
+			pi->mode = L2CAP_MODE_ERTM;
+		else
+			pi->mode = L2CAP_MODE_BASIC;
 		pi->max_tx = max_transmit;
 		pi->fcs  = L2CAP_FCS_CRC16;
 		pi->tx_win = tx_window;
@@ -852,7 +859,7 @@ static int l2cap_sock_create(struct net *net, struct socket *sock, int protocol,
 
 	sock->state = SS_UNCONNECTED;
 
-	if (sock->type != SOCK_SEQPACKET &&
+	if (sock->type != SOCK_SEQPACKET && sock->type != SOCK_STREAM &&
 			sock->type != SOCK_DGRAM && sock->type != SOCK_RAW)
 		return -ESOCKTNOSUPPORT;
 
@@ -1000,7 +1007,8 @@ static int l2cap_do_connect(struct sock *sk)
 	l2cap_sock_set_timer(sk, sk->sk_sndtimeo);
 
 	if (hcon->state == BT_CONNECTED) {
-		if (sk->sk_type != SOCK_SEQPACKET) {
+		if (sk->sk_type != SOCK_SEQPACKET &&
+				sk->sk_type != SOCK_STREAM) {
 			l2cap_sock_clear_timer(sk);
 			sk->sk_state = BT_CONNECTED;
 		} else
@@ -1034,7 +1042,8 @@ static int l2cap_sock_connect(struct socket *sock, struct sockaddr *addr, int al
 
 	lock_sock(sk);
 
-	if (sk->sk_type == SOCK_SEQPACKET && !la.l2_psm) {
+	if ((sk->sk_type == SOCK_SEQPACKET || sk->sk_type == SOCK_STREAM)
+			&& !la.l2_psm) {
 		err = -EINVAL;
 		goto done;
 	}
@@ -1098,7 +1107,8 @@ static int l2cap_sock_listen(struct socket *sock, int backlog)
 
 	lock_sock(sk);
 
-	if (sk->sk_state != BT_BOUND || sock->type != SOCK_SEQPACKET) {
+	if ((sock->type != SOCK_SEQPACKET && sock->type != SOCK_STREAM)
+			|| sk->sk_state != BT_BOUND) {
 		err = -EBADFD;
 		goto done;
 	}
@@ -1857,7 +1867,8 @@ static int l2cap_sock_setsockopt(struct socket *sock, int level, int optname, ch
 
 	switch (optname) {
 	case BT_SECURITY:
-		if (sk->sk_type != SOCK_SEQPACKET && sk->sk_type != SOCK_RAW) {
+		if (sk->sk_type != SOCK_SEQPACKET && sk->sk_type != SOCK_STREAM
+				&& sk->sk_type != SOCK_RAW) {
 			err = -EINVAL;
 			break;
 		}
@@ -2007,7 +2018,8 @@ static int l2cap_sock_getsockopt(struct socket *sock, int level, int optname, ch
 
 	switch (optname) {
 	case BT_SECURITY:
-		if (sk->sk_type != SOCK_SEQPACKET && sk->sk_type != SOCK_RAW) {
+		if (sk->sk_type != SOCK_SEQPACKET && sk->sk_type != SOCK_STREAM
+				&& sk->sk_type != SOCK_RAW) {
 			err = -EINVAL;
 			break;
 		}
@@ -2314,7 +2326,7 @@ static int l2cap_build_conf_req(struct sock *sk, void *data)
 {
 	struct l2cap_pinfo *pi = l2cap_pi(sk);
 	struct l2cap_conf_req *req = data;
-	struct l2cap_conf_rfc rfc = { .mode = L2CAP_MODE_BASIC };
+	struct l2cap_conf_rfc rfc = { .mode = pi->mode };
 	void *ptr = req->data;
 
 	BT_DBG("sk %p", sk);
@@ -3997,7 +4009,7 @@ static int l2cap_disconn_cfm(struct hci_conn *hcon, u8 reason)
 
 static inline void l2cap_check_encryption(struct sock *sk, u8 encrypt)
 {
-	if (sk->sk_type != SOCK_SEQPACKET)
+	if (sk->sk_type != SOCK_SEQPACKET && sk->sk_type != SOCK_STREAM)
 		return;
 
 	if (encrypt == 0x00) {
-- 
cgit v1.2.2


From 0041ecfa3025d7612fdaab12b2f07c9c3c09f42f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jo=C3=A3o=20Paulo=20Rechi=20Vita?= <jprvita@profusion.mobi>
Date: Sat, 1 May 2010 16:15:42 -0300
Subject: Bluetooth: Check if mode is supported on getsockopt
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Add this check to getsockopt makes possible to fail early instead of
waiting until listen / connect.

Signed-off-by: João Paulo Rechi Vita <jprvita@profusion.mobi>
Acked-by: Gustavo F. Padovan <padovan@profusion.mobi>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 net/bluetooth/l2cap.c | 15 ++++++++++++++-
 1 file changed, 14 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/bluetooth/l2cap.c b/net/bluetooth/l2cap.c
index 1a32562adf46..bf5bb7dc6abf 100644
--- a/net/bluetooth/l2cap.c
+++ b/net/bluetooth/l2cap.c
@@ -1814,9 +1814,22 @@ static int l2cap_sock_setsockopt_old(struct socket *sock, int optname, char __us
 			break;
 		}
 
+		l2cap_pi(sk)->mode = opts.mode;
+		switch (l2cap_pi(sk)->mode) {
+		case L2CAP_MODE_BASIC:
+			break;
+		case L2CAP_MODE_ERTM:
+		case L2CAP_MODE_STREAMING:
+			if (enable_ertm)
+				break;
+			/* fall through */
+		default:
+			err = -EINVAL;
+			break;
+		}
+
 		l2cap_pi(sk)->imtu = opts.imtu;
 		l2cap_pi(sk)->omtu = opts.omtu;
-		l2cap_pi(sk)->mode = opts.mode;
 		l2cap_pi(sk)->fcs  = opts.fcs;
 		l2cap_pi(sk)->max_tx = opts.max_tx;
 		l2cap_pi(sk)->tx_win = (__u8)opts.txwin_size;
-- 
cgit v1.2.2


From afefdbc4cf3b9d409d07e1e5264e7ff88bc48711 Mon Sep 17 00:00:00 2001
From: "Gustavo F. Padovan" <padovan@profusion.mobi>
Date: Sat, 1 May 2010 16:15:43 -0300
Subject: Bluetooth: Fix SDU reassembly under SREJ
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The code was reusing the control var without its reinitialization.

Signed-off-by: Gustavo F. Padovan <padovan@profusion.mobi>
Reviewed-by: João Paulo Rechi Vita <jprvita@profusion.mobi>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 net/bluetooth/l2cap.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/bluetooth/l2cap.c b/net/bluetooth/l2cap.c
index bf5bb7dc6abf..478def700c7c 100644
--- a/net/bluetooth/l2cap.c
+++ b/net/bluetooth/l2cap.c
@@ -3446,14 +3446,14 @@ drop:
 static void l2cap_check_srej_gap(struct sock *sk, u8 tx_seq)
 {
 	struct sk_buff *skb;
-	u16 control = 0;
+	u16 control;
 
 	while((skb = skb_peek(SREJ_QUEUE(sk)))) {
 		if (bt_cb(skb)->tx_seq != tx_seq)
 			break;
 
 		skb = skb_dequeue(SREJ_QUEUE(sk));
-		control |= bt_cb(skb)->sar << L2CAP_CTRL_SAR_SHIFT;
+		control = bt_cb(skb)->sar << L2CAP_CTRL_SAR_SHIFT;
 		l2cap_sar_reassembly_sdu(sk, skb, control);
 		l2cap_pi(sk)->buffer_seq_srej =
 			(l2cap_pi(sk)->buffer_seq_srej + 1) % 64;
-- 
cgit v1.2.2


From 01760bdde9a92413b7fff928d08e19352bf09d82 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jo=C3=A3o=20Paulo=20Rechi=20Vita?= <jprvita@profusion.mobi>
Date: Sat, 1 May 2010 16:15:43 -0300
Subject: Bluetooth: Close L2CAP channel on invalid ReqSeq
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: João Paulo Rechi Vita <jprvita@profusion.mobi>
Acked-by: Gustavo F. Padovan <padovan@profusion.mobi>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 net/bluetooth/l2cap.c | 18 +++++++++++++++++-
 1 file changed, 17 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/bluetooth/l2cap.c b/net/bluetooth/l2cap.c
index 478def700c7c..31514d8faa66 100644
--- a/net/bluetooth/l2cap.c
+++ b/net/bluetooth/l2cap.c
@@ -3772,7 +3772,7 @@ static inline int l2cap_data_channel(struct l2cap_conn *conn, u16 cid, struct sk
 	struct sock *sk;
 	struct l2cap_pinfo *pi;
 	u16 control, len;
-	u8 tx_seq;
+	u8 tx_seq, req_seq, next_tx_seq_offset, req_seq_offset;
 
 	sk = l2cap_get_chan_by_scid(&conn->chan_list, cid);
 	if (!sk) {
@@ -3823,6 +3823,22 @@ static inline int l2cap_data_channel(struct l2cap_conn *conn, u16 cid, struct sk
 		if (l2cap_check_fcs(pi, skb))
 			goto drop;
 
+		req_seq = __get_reqseq(control);
+		req_seq_offset = (req_seq - pi->expected_ack_seq) % 64;
+		if (req_seq_offset < 0)
+			req_seq_offset += 64;
+
+		next_tx_seq_offset =
+			(pi->next_tx_seq - pi->expected_ack_seq) % 64;
+		if (next_tx_seq_offset < 0)
+			next_tx_seq_offset += 64;
+
+		/* check for invalid req-seq */
+		if (req_seq_offset > next_tx_seq_offset) {
+			l2cap_send_disconn_req(pi->conn, sk);
+			goto drop;
+		}
+
 		if (__is_iframe(control)) {
 			if (len < 4)
 				goto drop;
-- 
cgit v1.2.2


From 44651b85cc3a076147af5d181fc4833ef8debc59 Mon Sep 17 00:00:00 2001
From: "Gustavo F. Padovan" <padovan@profusion.mobi>
Date: Sat, 1 May 2010 16:15:43 -0300
Subject: Bluetooth: Don't set control bits to zero first
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

We can set the SAR bits in the control field directly.

Signed-off-by: Gustavo F. Padovan <padovan@profusion.mobi>
Reviewed-by: João Paulo Rechi Vita <jprvita@profusion.mobi>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 net/bluetooth/l2cap.c | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

(limited to 'net')

diff --git a/net/bluetooth/l2cap.c b/net/bluetooth/l2cap.c
index 31514d8faa66..cfb18cd97564 100644
--- a/net/bluetooth/l2cap.c
+++ b/net/bluetooth/l2cap.c
@@ -1636,16 +1636,15 @@ static inline int l2cap_sar_segment_sdu(struct sock *sk, struct msghdr *msg, siz
 	__skb_queue_tail(&sar_queue, skb);
 	len -= pi->remote_mps;
 	size += pi->remote_mps;
-	control = 0;
 
 	while (len > 0) {
 		size_t buflen;
 
 		if (len > pi->remote_mps) {
-			control |= L2CAP_SDU_CONTINUE;
+			control = L2CAP_SDU_CONTINUE;
 			buflen = pi->remote_mps;
 		} else {
-			control |= L2CAP_SDU_END;
+			control = L2CAP_SDU_END;
 			buflen = len;
 		}
 
@@ -1658,7 +1657,6 @@ static inline int l2cap_sar_segment_sdu(struct sock *sk, struct msghdr *msg, siz
 		__skb_queue_tail(&sar_queue, skb);
 		len -= buflen;
 		size += buflen;
-		control = 0;
 	}
 	skb_queue_splice_tail(&sar_queue, TX_QUEUE(sk));
 	if (sk->sk_send_head == NULL)
-- 
cgit v1.2.2


From 59203a21a56c53afeb6f45e059299e6f1437f30f Mon Sep 17 00:00:00 2001
From: "Gustavo F. Padovan" <padovan@profusion.mobi>
Date: Sat, 1 May 2010 16:15:43 -0300
Subject: Bluetooth: Fix errors reported by checkpatch.pl
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: Gustavo F. Padovan <padovan@profusion.mobi>
Reviewed-by: João Paulo Rechi Vita <jprvita@profusion.mobi>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 net/bluetooth/l2cap.c | 11 +++++------
 1 file changed, 5 insertions(+), 6 deletions(-)

(limited to 'net')

diff --git a/net/bluetooth/l2cap.c b/net/bluetooth/l2cap.c
index cfb18cd97564..2a981de071df 100644
--- a/net/bluetooth/l2cap.c
+++ b/net/bluetooth/l2cap.c
@@ -1400,7 +1400,7 @@ static int l2cap_ertm_send(struct sock *sk)
 		return 0;
 
 	while ((skb = sk->sk_send_head) && (!l2cap_tx_window_full(sk)) &&
-	       !(pi->conn_state & L2CAP_CONN_REMOTE_BUSY)) {
+			!(pi->conn_state & L2CAP_CONN_REMOTE_BUSY)) {
 
 		if (pi->remote_max_tx &&
 				bt_cb(skb)->retries == pi->remote_max_tx) {
@@ -1490,9 +1490,8 @@ static inline int l2cap_skbuff_fromiovec(struct sock *sk, struct msghdr *msg, in
 	struct sk_buff **frag;
 	int err, sent = 0;
 
-	if (memcpy_fromiovec(skb_put(skb, count), msg->msg_iov, count)) {
+	if (memcpy_fromiovec(skb_put(skb, count), msg->msg_iov, count))
 		return -EFAULT;
-	}
 
 	sent += count;
 	len  -= count;
@@ -3347,7 +3346,7 @@ static void l2cap_add_to_srej_queue(struct sock *sk, struct sk_buff *skb, u8 tx_
 		if (skb_queue_is_last(SREJ_QUEUE(sk), next_skb))
 			break;
 
-	} while((next_skb = skb_queue_next(SREJ_QUEUE(sk), next_skb)));
+	} while ((next_skb = skb_queue_next(SREJ_QUEUE(sk), next_skb)));
 
 	__skb_queue_tail(SREJ_QUEUE(sk), skb);
 }
@@ -3446,7 +3445,7 @@ static void l2cap_check_srej_gap(struct sock *sk, u8 tx_seq)
 	struct sk_buff *skb;
 	u16 control;
 
-	while((skb = skb_peek(SREJ_QUEUE(sk)))) {
+	while ((skb = skb_peek(SREJ_QUEUE(sk)))) {
 		if (bt_cb(skb)->tx_seq != tx_seq)
 			break;
 
@@ -3465,7 +3464,7 @@ static void l2cap_resend_srejframe(struct sock *sk, u8 tx_seq)
 	struct srej_list *l, *tmp;
 	u16 control;
 
-	list_for_each_entry_safe(l,tmp, SREJ_LIST(sk), list) {
+	list_for_each_entry_safe(l, tmp, SREJ_LIST(sk), list) {
 		if (l->tx_seq == tx_seq) {
 			list_del(&l->list);
 			kfree(l);
-- 
cgit v1.2.2


From 0301ef04b5f49a95681694fc0d75af9441faa919 Mon Sep 17 00:00:00 2001
From: "Gustavo F. Padovan" <padovan@profusion.mobi>
Date: Wed, 5 May 2010 20:56:43 -0300
Subject: Bluetooth: Remove set of SrejSaveReqSeq under receipt of REJ frame
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

That action is not specified by the ERTM spec, so removing it.

Signed-off-by: Gustavo F. Padovan <padovan@profusion.mobi>
Reviewed-by: João Paulo Rechi Vita <jprvita@profusion.mobi>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 net/bluetooth/l2cap.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

(limited to 'net')

diff --git a/net/bluetooth/l2cap.c b/net/bluetooth/l2cap.c
index 2a981de071df..d0d03302e14d 100644
--- a/net/bluetooth/l2cap.c
+++ b/net/bluetooth/l2cap.c
@@ -3670,10 +3670,8 @@ static inline void l2cap_data_channel_rejframe(struct sock *sk, u16 rx_control)
 		pi->next_tx_seq = pi->expected_ack_seq;
 		l2cap_ertm_send(sk);
 
-		if (pi->conn_state & L2CAP_CONN_WAIT_F) {
-			pi->srej_save_reqseq = tx_seq;
+		if (pi->conn_state & L2CAP_CONN_WAIT_F)
 			pi->conn_state |= L2CAP_CONN_REJ_ACT;
-		}
 	}
 }
 static inline void l2cap_data_channel_srejframe(struct sock *sk, u16 rx_control)
-- 
cgit v1.2.2


From a2e12a2a312f816d5970b0c809d43b399fbfe90c Mon Sep 17 00:00:00 2001
From: "Gustavo F. Padovan" <padovan@profusion.mobi>
Date: Wed, 5 May 2010 19:58:27 -0300
Subject: Bluetooth: Remove unneeded control vars
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Trivial clean up.

Signed-off-by: Gustavo F. Padovan <padovan@profusion.mobi>
Reviewed-by: João Paulo Rechi Vita <jprvita@profusion.mobi>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 net/bluetooth/l2cap.c | 14 ++++----------
 1 file changed, 4 insertions(+), 10 deletions(-)

(limited to 'net')

diff --git a/net/bluetooth/l2cap.c b/net/bluetooth/l2cap.c
index d0d03302e14d..5a5203f03642 100644
--- a/net/bluetooth/l2cap.c
+++ b/net/bluetooth/l2cap.c
@@ -1239,7 +1239,6 @@ static int l2cap_sock_getname(struct socket *sock, struct sockaddr *addr, int *l
 static void l2cap_monitor_timeout(unsigned long arg)
 {
 	struct sock *sk = (void *) arg;
-	u16 control;
 
 	bh_lock_sock(sk);
 	if (l2cap_pi(sk)->retry_count >= l2cap_pi(sk)->remote_max_tx) {
@@ -1251,15 +1250,13 @@ static void l2cap_monitor_timeout(unsigned long arg)
 	l2cap_pi(sk)->retry_count++;
 	__mod_monitor_timer();
 
-	control = L2CAP_CTRL_POLL;
-	l2cap_send_rr_or_rnr(l2cap_pi(sk), control);
+	l2cap_send_rr_or_rnr(l2cap_pi(sk), L2CAP_CTRL_POLL);
 	bh_unlock_sock(sk);
 }
 
 static void l2cap_retrans_timeout(unsigned long arg)
 {
 	struct sock *sk = (void *) arg;
-	u16 control;
 
 	bh_lock_sock(sk);
 	l2cap_pi(sk)->retry_count = 1;
@@ -1267,8 +1264,7 @@ static void l2cap_retrans_timeout(unsigned long arg)
 
 	l2cap_pi(sk)->conn_state |= L2CAP_CONN_WAIT_F;
 
-	control = L2CAP_CTRL_POLL;
-	l2cap_send_rr_or_rnr(l2cap_pi(sk), control);
+	l2cap_send_rr_or_rnr(l2cap_pi(sk), L2CAP_CTRL_POLL);
 	bh_unlock_sock(sk);
 }
 
@@ -3716,10 +3712,8 @@ static inline void l2cap_data_channel_rnrframe(struct sock *sk, u16 rx_control)
 
 	if (!(pi->conn_state & L2CAP_CONN_SREJ_SENT)) {
 		del_timer(&pi->retrans_timer);
-		if (rx_control & L2CAP_CTRL_POLL) {
-			u16 control = L2CAP_CTRL_FINAL;
-			l2cap_send_rr_or_rnr(pi, control);
-		}
+		if (rx_control & L2CAP_CTRL_POLL)
+			l2cap_send_rr_or_rnr(pi, L2CAP_CTRL_FINAL);
 		return;
 	}
 
-- 
cgit v1.2.2


From 9b16dc6551cbde65d0ac525af3c46efab53a2c46 Mon Sep 17 00:00:00 2001
From: "Gustavo F. Padovan" <padovan@profusion.mobi>
Date: Wed, 5 May 2010 20:05:57 -0300
Subject: Bluetooth: Check if we really are in WAIT_F when F bit comes
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

F-bit set should be processed only if we are in the WAIT_F state.

Signed-off-by: Gustavo F. Padovan <padovan@profusion.mobi>
Reviewed-by: João Paulo Rechi Vita <jprvita@profusion.mobi>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 net/bluetooth/l2cap.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/bluetooth/l2cap.c b/net/bluetooth/l2cap.c
index 5a5203f03642..eb5cb29115a7 100644
--- a/net/bluetooth/l2cap.c
+++ b/net/bluetooth/l2cap.c
@@ -3503,7 +3503,8 @@ static inline int l2cap_data_channel_iframe(struct sock *sk, u16 rx_control, str
 
 	BT_DBG("sk %p rx_control 0x%4.4x len %d", sk, rx_control, skb->len);
 
-	if (L2CAP_CTRL_FINAL & rx_control) {
+	if (L2CAP_CTRL_FINAL & rx_control &&
+			l2cap_pi(sk)->conn_state & L2CAP_CONN_WAIT_F) {
 		del_timer(&pi->monitor_timer);
 		if (pi->unacked_frames > 0)
 			__mod_retrans_timer();
@@ -3727,7 +3728,8 @@ static inline int l2cap_data_channel_sframe(struct sock *sk, u16 rx_control, str
 {
 	BT_DBG("sk %p rx_control 0x%4.4x len %d", sk, rx_control, skb->len);
 
-	if (L2CAP_CTRL_FINAL & rx_control) {
+	if (L2CAP_CTRL_FINAL & rx_control &&
+			l2cap_pi(sk)->conn_state & L2CAP_CONN_WAIT_F) {
 		del_timer(&l2cap_pi(sk)->monitor_timer);
 		if (l2cap_pi(sk)->unacked_frames > 0)
 			__mod_retrans_timer();
-- 
cgit v1.2.2


From ff12fd643334071084b6145cad3793bb6c956638 Mon Sep 17 00:00:00 2001
From: "Gustavo F. Padovan" <padovan@profusion.mobi>
Date: Wed, 5 May 2010 22:09:15 -0300
Subject: Bluetooth: Fix lockdep annotation on ERTM
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

A spin_lock_init() call was missing. :)

Signed-off-by: Gustavo F. Padovan <padovan@profusion.mobi>
Reviewed-by: João Paulo Rechi Vita <jprvita@profusion.mobi>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 net/bluetooth/l2cap.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/bluetooth/l2cap.c b/net/bluetooth/l2cap.c
index eb5cb29115a7..6b08f4d7c873 100644
--- a/net/bluetooth/l2cap.c
+++ b/net/bluetooth/l2cap.c
@@ -1622,7 +1622,7 @@ static inline int l2cap_sar_segment_sdu(struct sock *sk, struct msghdr *msg, siz
 	u16 control;
 	size_t size = 0;
 
-	__skb_queue_head_init(&sar_queue);
+	skb_queue_head_init(&sar_queue);
 	control = L2CAP_SDU_START;
 	skb = l2cap_create_iframe_pdu(sk, msg, pi->remote_mps, control, len);
 	if (IS_ERR(skb))
-- 
cgit v1.2.2


From 9a9c6a34416b3743c09c00f3d6708d9df3c21629 Mon Sep 17 00:00:00 2001
From: "Gustavo F. Padovan" <padovan@profusion.mobi>
Date: Sat, 1 May 2010 16:15:43 -0300
Subject: Bluetooth: Make hci_send_acl() void
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

hci_send_acl can't fail, so we can make it void. This patch changes
that and all the funcions that use hci_send_acl().
That change exposed a bug on sending connectionless data. We were not
reporting the lenght send back to the user space.

Signed-off-by: Gustavo F. Padovan <padovan@profusion.mobi>
Reviewed-by: João Paulo Rechi Vita <jprvita@profusion.mobi>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 net/bluetooth/hci_core.c |  4 +--
 net/bluetooth/l2cap.c    | 73 ++++++++++++++++++------------------------------
 2 files changed, 29 insertions(+), 48 deletions(-)

(limited to 'net')

diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c
index 1c9aef97f519..904f1e8a7a3b 100644
--- a/net/bluetooth/hci_core.c
+++ b/net/bluetooth/hci_core.c
@@ -1260,7 +1260,7 @@ static void hci_add_acl_hdr(struct sk_buff *skb, __u16 handle, __u16 flags)
 	hdr->dlen   = cpu_to_le16(len);
 }
 
-int hci_send_acl(struct hci_conn *conn, struct sk_buff *skb, __u16 flags)
+void hci_send_acl(struct hci_conn *conn, struct sk_buff *skb, __u16 flags)
 {
 	struct hci_dev *hdev = conn->hdev;
 	struct sk_buff *list;
@@ -1303,7 +1303,7 @@ int hci_send_acl(struct hci_conn *conn, struct sk_buff *skb, __u16 flags)
 
 	tasklet_schedule(&hdev->tx_task);
 
-	return 0;
+	return;
 }
 EXPORT_SYMBOL(hci_send_acl);
 
diff --git a/net/bluetooth/l2cap.c b/net/bluetooth/l2cap.c
index 6b08f4d7c873..7e74d5be16e3 100644
--- a/net/bluetooth/l2cap.c
+++ b/net/bluetooth/l2cap.c
@@ -330,19 +330,19 @@ static inline u8 l2cap_get_ident(struct l2cap_conn *conn)
 	return id;
 }
 
-static inline int l2cap_send_cmd(struct l2cap_conn *conn, u8 ident, u8 code, u16 len, void *data)
+static inline void l2cap_send_cmd(struct l2cap_conn *conn, u8 ident, u8 code, u16 len, void *data)
 {
 	struct sk_buff *skb = l2cap_build_cmd(conn, code, ident, len, data);
 
 	BT_DBG("code 0x%2.2x", code);
 
 	if (!skb)
-		return -ENOMEM;
+		return;
 
-	return hci_send_acl(conn->hcon, skb, 0);
+	hci_send_acl(conn->hcon, skb, 0);
 }
 
-static inline int l2cap_send_sframe(struct l2cap_pinfo *pi, u16 control)
+static inline void l2cap_send_sframe(struct l2cap_pinfo *pi, u16 control)
 {
 	struct sk_buff *skb;
 	struct l2cap_hdr *lh;
@@ -369,7 +369,7 @@ static inline int l2cap_send_sframe(struct l2cap_pinfo *pi, u16 control)
 
 	skb = bt_skb_alloc(count, GFP_ATOMIC);
 	if (!skb)
-		return -ENOMEM;
+		return;
 
 	lh = (struct l2cap_hdr *) skb_put(skb, L2CAP_HDR_SIZE);
 	lh->len = cpu_to_le16(hlen - L2CAP_HDR_SIZE);
@@ -381,10 +381,10 @@ static inline int l2cap_send_sframe(struct l2cap_pinfo *pi, u16 control)
 		put_unaligned_le16(fcs, skb_put(skb, 2));
 	}
 
-	return hci_send_acl(pi->conn->hcon, skb, 0);
+	hci_send_acl(pi->conn->hcon, skb, 0);
 }
 
-static inline int l2cap_send_rr_or_rnr(struct l2cap_pinfo *pi, u16 control)
+static inline void l2cap_send_rr_or_rnr(struct l2cap_pinfo *pi, u16 control)
 {
 	if (pi->conn_state & L2CAP_CONN_LOCAL_BUSY)
 		control |= L2CAP_SUPER_RCV_NOT_READY;
@@ -393,7 +393,7 @@ static inline int l2cap_send_rr_or_rnr(struct l2cap_pinfo *pi, u16 control)
 
 	control |= pi->buffer_seq << L2CAP_CTRL_REQSEQ_SHIFT;
 
-	return l2cap_send_sframe(pi, control);
+	l2cap_send_sframe(pi, control);
 }
 
 static void l2cap_do_start(struct sock *sk)
@@ -1289,18 +1289,13 @@ static void l2cap_drop_acked_frames(struct sock *sk)
 	return;
 }
 
-static inline int l2cap_do_send(struct sock *sk, struct sk_buff *skb)
+static inline void l2cap_do_send(struct sock *sk, struct sk_buff *skb)
 {
 	struct l2cap_pinfo *pi = l2cap_pi(sk);
-	int err;
 
 	BT_DBG("sk %p, skb %p len %d", sk, skb, skb->len);
 
-	err = hci_send_acl(pi->conn->hcon, skb, 0);
-	if (err < 0)
-		kfree_skb(skb);
-
-	return err;
+	hci_send_acl(pi->conn->hcon, skb, 0);
 }
 
 static int l2cap_streaming_send(struct sock *sk)
@@ -1308,7 +1303,6 @@ static int l2cap_streaming_send(struct sock *sk)
 	struct sk_buff *skb, *tx_skb;
 	struct l2cap_pinfo *pi = l2cap_pi(sk);
 	u16 control, fcs;
-	int err;
 
 	while ((skb = sk->sk_send_head)) {
 		tx_skb = skb_clone(skb, GFP_ATOMIC);
@@ -1322,11 +1316,7 @@ static int l2cap_streaming_send(struct sock *sk)
 			put_unaligned_le16(fcs, tx_skb->data + tx_skb->len - 2);
 		}
 
-		err = l2cap_do_send(sk, tx_skb);
-		if (err < 0) {
-			l2cap_send_disconn_req(pi->conn, sk);
-			return err;
-		}
+		l2cap_do_send(sk, tx_skb);
 
 		pi->next_tx_seq = (pi->next_tx_seq + 1) % 64;
 
@@ -1346,7 +1336,6 @@ static int l2cap_retransmit_frame(struct sock *sk, u8 tx_seq)
 	struct l2cap_pinfo *pi = l2cap_pi(sk);
 	struct sk_buff *skb, *tx_skb;
 	u16 control, fcs;
-	int err;
 
 	skb = skb_peek(TX_QUEUE(sk));
 	do {
@@ -1375,11 +1364,7 @@ static int l2cap_retransmit_frame(struct sock *sk, u8 tx_seq)
 			put_unaligned_le16(fcs, tx_skb->data + tx_skb->len - 2);
 		}
 
-		err = l2cap_do_send(sk, tx_skb);
-		if (err < 0) {
-			l2cap_send_disconn_req(pi->conn, sk);
-			return err;
-		}
+		l2cap_do_send(sk, tx_skb);
 		break;
 	} while(1);
 	return 0;
@@ -1390,7 +1375,7 @@ static int l2cap_ertm_send(struct sock *sk)
 	struct sk_buff *skb, *tx_skb;
 	struct l2cap_pinfo *pi = l2cap_pi(sk);
 	u16 control, fcs;
-	int err, nsent = 0;
+	int nsent = 0;
 
 	if (pi->conn_state & L2CAP_CONN_WAIT_F)
 		return 0;
@@ -1423,11 +1408,8 @@ static int l2cap_ertm_send(struct sock *sk)
 			put_unaligned_le16(fcs, skb->data + tx_skb->len - 2);
 		}
 
-		err = l2cap_do_send(sk, tx_skb);
-		if (err < 0) {
-			l2cap_send_disconn_req(pi->conn, sk);
-			return err;
-		}
+		l2cap_do_send(sk, tx_skb);
+
 		__mod_retrans_timer();
 
 		bt_cb(skb)->tx_seq = pi->next_tx_seq;
@@ -1447,7 +1429,7 @@ static int l2cap_ertm_send(struct sock *sk)
 	return nsent;
 }
 
-static int l2cap_send_ack(struct l2cap_pinfo *pi)
+static void l2cap_send_ack(struct l2cap_pinfo *pi)
 {
 	struct sock *sk = (struct sock *)pi;
 	u16 control = 0;
@@ -1456,15 +1438,15 @@ static int l2cap_send_ack(struct l2cap_pinfo *pi)
 
 	if (pi->conn_state & L2CAP_CONN_LOCAL_BUSY) {
 		control |= L2CAP_SUPER_RCV_NOT_READY;
-		return l2cap_send_sframe(pi, control);
+		l2cap_send_sframe(pi, control);
+		return;
 	} else if (l2cap_ertm_send(sk) == 0) {
 		control |= L2CAP_SUPER_RCV_READY;
-		return l2cap_send_sframe(pi, control);
+		l2cap_send_sframe(pi, control);
 	}
-	return 0;
 }
 
-static int l2cap_send_srejtail(struct sock *sk)
+static void l2cap_send_srejtail(struct sock *sk)
 {
 	struct srej_list *tail;
 	u16 control;
@@ -1476,8 +1458,6 @@ static int l2cap_send_srejtail(struct sock *sk)
 	control |= tail->tx_seq << L2CAP_CTRL_REQSEQ_SHIFT;
 
 	l2cap_send_sframe(l2cap_pi(sk), control);
-
-	return 0;
 }
 
 static inline int l2cap_skbuff_fromiovec(struct sock *sk, struct msghdr *msg, int len, int count, struct sk_buff *skb)
@@ -1687,10 +1667,12 @@ static int l2cap_sock_sendmsg(struct kiocb *iocb, struct socket *sock, struct ms
 	/* Connectionless channel */
 	if (sk->sk_type == SOCK_DGRAM) {
 		skb = l2cap_create_connless_pdu(sk, msg, len);
-		if (IS_ERR(skb))
+		if (IS_ERR(skb)) {
 			err = PTR_ERR(skb);
-		else
-			err = l2cap_do_send(sk, skb);
+		} else {
+			l2cap_do_send(sk, skb);
+			err = len;
+		}
 		goto done;
 	}
 
@@ -1709,9 +1691,8 @@ static int l2cap_sock_sendmsg(struct kiocb *iocb, struct socket *sock, struct ms
 			goto done;
 		}
 
-		err = l2cap_do_send(sk, skb);
-		if (!err)
-			err = len;
+		l2cap_do_send(sk, skb);
+		err = len;
 		break;
 
 	case L2CAP_MODE_ERTM:
-- 
cgit v1.2.2


From f11d676da4059c7888efca810ab300b931736a26 Mon Sep 17 00:00:00 2001
From: "Gustavo F. Padovan" <padovan@profusion.mobi>
Date: Sat, 1 May 2010 16:15:44 -0300
Subject: Bluetooth: Refactor l2cap_retransmit_frame()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Make the code flow cleaner and changes the function to void.
It also fixes a potential NULL dereference with skb.

Signed-off-by: Gustavo F. Padovan <padovan@profusion.mobi>
Reviewed-by: João Paulo Rechi Vita <jprvita@profusion.mobi>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 net/bluetooth/l2cap.c | 53 ++++++++++++++++++++++++++-------------------------
 1 file changed, 27 insertions(+), 26 deletions(-)

(limited to 'net')

diff --git a/net/bluetooth/l2cap.c b/net/bluetooth/l2cap.c
index 7e74d5be16e3..1c35c328181d 100644
--- a/net/bluetooth/l2cap.c
+++ b/net/bluetooth/l2cap.c
@@ -1331,43 +1331,44 @@ static int l2cap_streaming_send(struct sock *sk)
 	return 0;
 }
 
-static int l2cap_retransmit_frame(struct sock *sk, u8 tx_seq)
+static void l2cap_retransmit_frame(struct sock *sk, u8 tx_seq)
 {
 	struct l2cap_pinfo *pi = l2cap_pi(sk);
 	struct sk_buff *skb, *tx_skb;
 	u16 control, fcs;
 
 	skb = skb_peek(TX_QUEUE(sk));
-	do {
-		if (bt_cb(skb)->tx_seq != tx_seq) {
-			if (skb_queue_is_last(TX_QUEUE(sk), skb))
-				break;
-			skb = skb_queue_next(TX_QUEUE(sk), skb);
-			continue;
-		}
+	if (!skb)
+		return;
 
-		if (pi->remote_max_tx &&
-				bt_cb(skb)->retries == pi->remote_max_tx) {
-			l2cap_send_disconn_req(pi->conn, sk);
+	do {
+		if (bt_cb(skb)->tx_seq == tx_seq)
 			break;
-		}
 
-		tx_skb = skb_clone(skb, GFP_ATOMIC);
-		bt_cb(skb)->retries++;
-		control = get_unaligned_le16(tx_skb->data + L2CAP_HDR_SIZE);
-		control |= (pi->buffer_seq << L2CAP_CTRL_REQSEQ_SHIFT)
-				| (tx_seq << L2CAP_CTRL_TXSEQ_SHIFT);
-		put_unaligned_le16(control, tx_skb->data + L2CAP_HDR_SIZE);
+		if (skb_queue_is_last(TX_QUEUE(sk), skb))
+			return;
 
-		if (pi->fcs == L2CAP_FCS_CRC16) {
-			fcs = crc16(0, (u8 *)tx_skb->data, tx_skb->len - 2);
-			put_unaligned_le16(fcs, tx_skb->data + tx_skb->len - 2);
-		}
+	} while ((skb = skb_queue_next(TX_QUEUE(sk), skb)));
 
-		l2cap_do_send(sk, tx_skb);
-		break;
-	} while(1);
-	return 0;
+	if (pi->remote_max_tx &&
+			bt_cb(skb)->retries == pi->remote_max_tx) {
+		l2cap_send_disconn_req(pi->conn, sk);
+		return;
+	}
+
+	tx_skb = skb_clone(skb, GFP_ATOMIC);
+	bt_cb(skb)->retries++;
+	control = get_unaligned_le16(tx_skb->data + L2CAP_HDR_SIZE);
+	control |= (pi->buffer_seq << L2CAP_CTRL_REQSEQ_SHIFT)
+			| (tx_seq << L2CAP_CTRL_TXSEQ_SHIFT);
+	put_unaligned_le16(control, tx_skb->data + L2CAP_HDR_SIZE);
+
+	if (pi->fcs == L2CAP_FCS_CRC16) {
+		fcs = crc16(0, (u8 *)tx_skb->data, tx_skb->len - 2);
+		put_unaligned_le16(fcs, tx_skb->data + tx_skb->len - 2);
+	}
+
+	l2cap_do_send(sk, tx_skb);
 }
 
 static int l2cap_ertm_send(struct sock *sk)
-- 
cgit v1.2.2


From 18778a63ddc83bc89bda3b119fb02eb121512a66 Mon Sep 17 00:00:00 2001
From: "Gustavo F. Padovan" <padovan@profusion.mobi>
Date: Sat, 1 May 2010 16:15:44 -0300
Subject: Bluetooth: Implement missing parts of the Invalid Frame Detection
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

There is a plenty of situation where ERTM shall close the channel, this
commit treats the cases regarding Invalid Frame Detection.
It create one reassembly SDU function for ERTM and other for Streaming
Mode to make the Invalid Frame Detection handling less complex.

Signed-off-by: Gustavo F. Padovan <padovan@profusion.mobi>
Reviewed-by: João Paulo Rechi Vita <jprvita@profusion.mobi>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 net/bluetooth/l2cap.c | 119 +++++++++++++++++++++++++++++++++++++++++++++++---
 1 file changed, 112 insertions(+), 7 deletions(-)

(limited to 'net')

diff --git a/net/bluetooth/l2cap.c b/net/bluetooth/l2cap.c
index 1c35c328181d..cfd672419315 100644
--- a/net/bluetooth/l2cap.c
+++ b/net/bluetooth/l2cap.c
@@ -3329,12 +3329,111 @@ static void l2cap_add_to_srej_queue(struct sock *sk, struct sk_buff *skb, u8 tx_
 	__skb_queue_tail(SREJ_QUEUE(sk), skb);
 }
 
-static int l2cap_sar_reassembly_sdu(struct sock *sk, struct sk_buff *skb, u16 control)
+static int l2cap_ertm_reassembly_sdu(struct sock *sk, struct sk_buff *skb, u16 control)
+{
+	struct l2cap_pinfo *pi = l2cap_pi(sk);
+	struct sk_buff *_skb;
+	int err = 0;
+
+	switch (control & L2CAP_CTRL_SAR) {
+	case L2CAP_SDU_UNSEGMENTED:
+		if (pi->conn_state & L2CAP_CONN_SAR_SDU)
+			goto drop;
+
+		err = sock_queue_rcv_skb(sk, skb);
+		if (!err)
+			return err;
+
+		break;
+
+	case L2CAP_SDU_START:
+		if (pi->conn_state & L2CAP_CONN_SAR_SDU)
+			goto drop;
+
+		pi->sdu_len = get_unaligned_le16(skb->data);
+		skb_pull(skb, 2);
+
+		if (pi->sdu_len > pi->imtu)
+			goto disconnect;
+
+		pi->sdu = bt_skb_alloc(pi->sdu_len, GFP_ATOMIC);
+		if (!pi->sdu) {
+			err = -ENOMEM;
+			break;
+		}
+
+		memcpy(skb_put(pi->sdu, skb->len), skb->data, skb->len);
+
+		pi->conn_state |= L2CAP_CONN_SAR_SDU;
+		pi->partial_sdu_len = skb->len;
+		break;
+
+	case L2CAP_SDU_CONTINUE:
+		if (!(pi->conn_state & L2CAP_CONN_SAR_SDU))
+			goto disconnect;
+
+		if (!pi->sdu)
+			goto disconnect;
+
+		memcpy(skb_put(pi->sdu, skb->len), skb->data, skb->len);
+
+		pi->partial_sdu_len += skb->len;
+		if (pi->partial_sdu_len > pi->sdu_len)
+			goto drop;
+
+		break;
+
+	case L2CAP_SDU_END:
+		if (!(pi->conn_state & L2CAP_CONN_SAR_SDU))
+			goto disconnect;
+
+		if (!pi->sdu)
+			goto disconnect;
+
+		memcpy(skb_put(pi->sdu, skb->len), skb->data, skb->len);
+
+		pi->conn_state &= ~L2CAP_CONN_SAR_SDU;
+		pi->partial_sdu_len += skb->len;
+
+		if (pi->partial_sdu_len > pi->imtu)
+			goto drop;
+
+		if (pi->partial_sdu_len != pi->sdu_len)
+			goto drop;
+
+		_skb = skb_clone(pi->sdu, GFP_ATOMIC);
+		err = sock_queue_rcv_skb(sk, _skb);
+		if (err < 0)
+			kfree_skb(_skb);
+
+		kfree_skb(pi->sdu);
+		break;
+	}
+
+	kfree_skb(skb);
+	return err;
+
+drop:
+	kfree_skb(pi->sdu);
+	pi->sdu = NULL;
+
+disconnect:
+	l2cap_send_disconn_req(pi->conn, sk);
+	kfree_skb(skb);
+	return 0;
+}
+
+static int l2cap_streaming_reassembly_sdu(struct sock *sk, struct sk_buff *skb, u16 control)
 {
 	struct l2cap_pinfo *pi = l2cap_pi(sk);
 	struct sk_buff *_skb;
 	int err = -EINVAL;
 
+	/*
+	 * TODO: We have to notify the userland if some data is lost with the
+	 * Streaming Mode.
+	 */
+
 	switch (control & L2CAP_CTRL_SAR) {
 	case L2CAP_SDU_UNSEGMENTED:
 		if (pi->conn_state & L2CAP_CONN_SAR_SDU) {
@@ -3429,7 +3528,7 @@ static void l2cap_check_srej_gap(struct sock *sk, u8 tx_seq)
 
 		skb = skb_dequeue(SREJ_QUEUE(sk));
 		control = bt_cb(skb)->sar << L2CAP_CTRL_SAR_SHIFT;
-		l2cap_sar_reassembly_sdu(sk, skb, control);
+		l2cap_ertm_reassembly_sdu(sk, skb, control);
 		l2cap_pi(sk)->buffer_seq_srej =
 			(l2cap_pi(sk)->buffer_seq_srej + 1) % 64;
 		tx_seq++;
@@ -3566,7 +3665,7 @@ expected:
 
 	pi->buffer_seq = (pi->buffer_seq + 1) % 64;
 
-	err = l2cap_sar_reassembly_sdu(sk, skb, rx_control);
+	err = l2cap_ertm_reassembly_sdu(sk, skb, rx_control);
 	if (err < 0)
 		return err;
 
@@ -3790,8 +3889,10 @@ static inline int l2cap_data_channel(struct l2cap_conn *conn, u16 cid, struct sk
 		 * Receiver will miss it and start proper recovery
 		 * procedures and ask retransmission.
 		 */
-		if (len > pi->mps)
+		if (len > pi->mps) {
+			l2cap_send_disconn_req(pi->conn, sk);
 			goto drop;
+		}
 
 		if (l2cap_check_fcs(pi, skb))
 			goto drop;
@@ -3813,13 +3914,17 @@ static inline int l2cap_data_channel(struct l2cap_conn *conn, u16 cid, struct sk
 		}
 
 		if (__is_iframe(control)) {
-			if (len < 4)
+			if (len < 4) {
+				l2cap_send_disconn_req(pi->conn, sk);
 				goto drop;
+			}
 
 			l2cap_data_channel_iframe(sk, control, skb);
 		} else {
-			if (len != 0)
+			if (len != 0) {
+				l2cap_send_disconn_req(pi->conn, sk);
 				goto drop;
+			}
 
 			l2cap_data_channel_sframe(sk, control, skb);
 		}
@@ -3850,7 +3955,7 @@ static inline int l2cap_data_channel(struct l2cap_conn *conn, u16 cid, struct sk
 		else
 			pi->expected_tx_seq = (tx_seq + 1) % 64;
 
-		l2cap_sar_reassembly_sdu(sk, skb, control);
+		l2cap_streaming_reassembly_sdu(sk, skb, control);
 
 		goto done;
 
-- 
cgit v1.2.2


From 9b53350d3cf5b330c3261d89b5e62a2dc25c5653 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jo=C3=A3o=20Paulo=20Rechi=20Vita?= <jprvita@profusion.mobi>
Date: Sat, 1 May 2010 16:15:44 -0300
Subject: Bluetooth: Completes the I-frame tx_seq check logic on RECV
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Add checks for invalid tx_seq and fixes the duplicated tx_seq check.

Signed-off-by: João Paulo Rechi Vita <jprvita@profusion.mobi>
Acked-by: Gustavo F. Padovan <padovan@profusion.mobi>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 net/bluetooth/l2cap.c | 40 ++++++++++++++++++++++++++++++++++++----
 1 file changed, 36 insertions(+), 4 deletions(-)

(limited to 'net')

diff --git a/net/bluetooth/l2cap.c b/net/bluetooth/l2cap.c
index cfd672419315..481cec22ef96 100644
--- a/net/bluetooth/l2cap.c
+++ b/net/bluetooth/l2cap.c
@@ -3302,7 +3302,7 @@ static inline void l2cap_send_i_or_rr_or_rnr(struct sock *sk)
 	}
 }
 
-static void l2cap_add_to_srej_queue(struct sock *sk, struct sk_buff *skb, u8 tx_seq, u8 sar)
+static int l2cap_add_to_srej_queue(struct sock *sk, struct sk_buff *skb, u8 tx_seq, u8 sar)
 {
 	struct sk_buff *next_skb;
 
@@ -3312,13 +3312,16 @@ static void l2cap_add_to_srej_queue(struct sock *sk, struct sk_buff *skb, u8 tx_
 	next_skb = skb_peek(SREJ_QUEUE(sk));
 	if (!next_skb) {
 		__skb_queue_tail(SREJ_QUEUE(sk), skb);
-		return;
+		return 0;
 	}
 
 	do {
+		if (bt_cb(next_skb)->tx_seq == tx_seq)
+			return -EINVAL;
+
 		if (bt_cb(next_skb)->tx_seq > tx_seq) {
 			__skb_queue_before(SREJ_QUEUE(sk), next_skb, skb);
-			return;
+			return 0;
 		}
 
 		if (skb_queue_is_last(SREJ_QUEUE(sk), next_skb))
@@ -3327,6 +3330,8 @@ static void l2cap_add_to_srej_queue(struct sock *sk, struct sk_buff *skb, u8 tx_
 	} while ((next_skb = skb_queue_next(SREJ_QUEUE(sk), next_skb)));
 
 	__skb_queue_tail(SREJ_QUEUE(sk), skb);
+
+	return 0;
 }
 
 static int l2cap_ertm_reassembly_sdu(struct sock *sk, struct sk_buff *skb, u16 control)
@@ -3579,6 +3584,7 @@ static inline int l2cap_data_channel_iframe(struct sock *sk, u16 rx_control, str
 	u8 tx_seq = __get_txseq(rx_control);
 	u8 req_seq = __get_reqseq(rx_control);
 	u8 sar = rx_control >> L2CAP_CTRL_SAR_SHIFT;
+	u8 tx_seq_offset, expected_tx_seq_offset;
 	int num_to_ack = (pi->tx_win/6) + 1;
 	int err = 0;
 
@@ -3598,6 +3604,16 @@ static inline int l2cap_data_channel_iframe(struct sock *sk, u16 rx_control, str
 	if (tx_seq == pi->expected_tx_seq)
 		goto expected;
 
+	tx_seq_offset = (tx_seq - pi->buffer_seq) % 64;
+	if (tx_seq_offset < 0)
+		tx_seq_offset += 64;
+
+	/* invalid tx_seq */
+	if (tx_seq_offset >= pi->tx_win) {
+		l2cap_send_disconn_req(pi->conn, sk);
+		goto drop;
+	}
+
 	if (pi->conn_state & L2CAP_CONN_SREJ_SENT) {
 		struct srej_list *first;
 
@@ -3617,7 +3633,10 @@ static inline int l2cap_data_channel_iframe(struct sock *sk, u16 rx_control, str
 			}
 		} else {
 			struct srej_list *l;
-			l2cap_add_to_srej_queue(sk, skb, tx_seq, sar);
+
+			/* duplicated tx_seq */
+			if (l2cap_add_to_srej_queue(sk, skb, tx_seq, sar) < 0)
+				goto drop;
 
 			list_for_each_entry(l, SREJ_LIST(sk), list) {
 				if (l->tx_seq == tx_seq) {
@@ -3628,6 +3647,15 @@ static inline int l2cap_data_channel_iframe(struct sock *sk, u16 rx_control, str
 			l2cap_send_srejframe(sk, tx_seq);
 		}
 	} else {
+		expected_tx_seq_offset =
+			(pi->expected_tx_seq - pi->buffer_seq) % 64;
+		if (expected_tx_seq_offset < 0)
+			expected_tx_seq_offset += 64;
+
+		/* duplicated tx_seq */
+		if (tx_seq_offset < expected_tx_seq_offset)
+			goto drop;
+
 		pi->conn_state |= L2CAP_CONN_SREJ_SENT;
 
 		INIT_LIST_HEAD(SREJ_LIST(sk));
@@ -3676,6 +3704,10 @@ expected:
 		l2cap_send_ack(pi);
 
 	return 0;
+
+drop:
+	kfree_skb(skb);
+	return 0;
 }
 
 static inline void l2cap_data_channel_rrframe(struct sock *sk, u16 rx_control)
-- 
cgit v1.2.2


From 1890d36bb556a27684ad29654a9898ab9a5f57ee Mon Sep 17 00:00:00 2001
From: "Gustavo F. Padovan" <padovan@profusion.mobi>
Date: Sat, 1 May 2010 16:15:44 -0300
Subject: Bluetooth: Implement Local Busy Condition handling
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Supports Local Busy condition handling through a waitqueue that wake ups
each 200ms and try to push the packets to the upper layer. If it can
push all the queue then it leaves the Local Busy state.

The patch modifies the behaviour of l2cap_ertm_reassembly_sdu() to
support retry of the push operation.

Signed-off-by: Gustavo F. Padovan <padovan@profusion.mobi>
Reviewed-by: João Paulo Rechi Vita <jprvita@profusion.mobi>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 net/bluetooth/l2cap.c | 187 ++++++++++++++++++++++++++++++++++++++++++++------
 1 file changed, 166 insertions(+), 21 deletions(-)

(limited to 'net')

diff --git a/net/bluetooth/l2cap.c b/net/bluetooth/l2cap.c
index 481cec22ef96..103e4b54a86a 100644
--- a/net/bluetooth/l2cap.c
+++ b/net/bluetooth/l2cap.c
@@ -68,10 +68,14 @@ static u8 l2cap_fixed_chan[8] = { 0x02, };
 
 static const struct proto_ops l2cap_sock_ops;
 
+static struct workqueue_struct *_busy_wq;
+
 static struct bt_sock_list l2cap_sk_list = {
 	.lock = __RW_LOCK_UNLOCKED(l2cap_sk_list.lock)
 };
 
+static void l2cap_busy_work(struct work_struct *work);
+
 static void __l2cap_sock_close(struct sock *sk, int reason);
 static void l2cap_sock_close(struct sock *sk);
 static void l2cap_sock_kill(struct sock *sk);
@@ -386,9 +390,10 @@ static inline void l2cap_send_sframe(struct l2cap_pinfo *pi, u16 control)
 
 static inline void l2cap_send_rr_or_rnr(struct l2cap_pinfo *pi, u16 control)
 {
-	if (pi->conn_state & L2CAP_CONN_LOCAL_BUSY)
+	if (pi->conn_state & L2CAP_CONN_LOCAL_BUSY) {
 		control |= L2CAP_SUPER_RCV_NOT_READY;
-	else
+		pi->conn_state |= L2CAP_CONN_RNR_SENT;
+	} else
 		control |= L2CAP_SUPER_RCV_READY;
 
 	control |= pi->buffer_seq << L2CAP_CTRL_REQSEQ_SHIFT;
@@ -816,6 +821,7 @@ static void l2cap_sock_init(struct sock *sk, struct sock *parent)
 	pi->flush_to = L2CAP_DEFAULT_FLUSH_TO;
 	skb_queue_head_init(TX_QUEUE(sk));
 	skb_queue_head_init(SREJ_QUEUE(sk));
+	skb_queue_head_init(BUSY_QUEUE(sk));
 	INIT_LIST_HEAD(SREJ_LIST(sk));
 }
 
@@ -1439,6 +1445,7 @@ static void l2cap_send_ack(struct l2cap_pinfo *pi)
 
 	if (pi->conn_state & L2CAP_CONN_LOCAL_BUSY) {
 		control |= L2CAP_SUPER_RCV_NOT_READY;
+		pi->conn_state |= L2CAP_CONN_RNR_SENT;
 		l2cap_send_sframe(pi, control);
 		return;
 	} else if (l2cap_ertm_send(sk) == 0) {
@@ -2279,6 +2286,9 @@ static inline void l2cap_ertm_init(struct sock *sk)
 			l2cap_ack_timeout, (unsigned long) sk);
 
 	__skb_queue_head_init(SREJ_QUEUE(sk));
+	__skb_queue_head_init(BUSY_QUEUE(sk));
+
+	INIT_WORK(&l2cap_pi(sk)->busy_work, l2cap_busy_work);
 }
 
 static int l2cap_mode_supported(__u8 mode, __u32 feat_mask)
@@ -3046,6 +3056,7 @@ static inline int l2cap_disconnect_req(struct l2cap_conn *conn, struct l2cap_cmd
 
 	if (l2cap_pi(sk)->mode == L2CAP_MODE_ERTM) {
 		skb_queue_purge(SREJ_QUEUE(sk));
+		skb_queue_purge(BUSY_QUEUE(sk));
 		del_timer(&l2cap_pi(sk)->retrans_timer);
 		del_timer(&l2cap_pi(sk)->monitor_timer);
 		del_timer(&l2cap_pi(sk)->ack_timer);
@@ -3077,6 +3088,7 @@ static inline int l2cap_disconnect_rsp(struct l2cap_conn *conn, struct l2cap_cmd
 
 	if (l2cap_pi(sk)->mode == L2CAP_MODE_ERTM) {
 		skb_queue_purge(SREJ_QUEUE(sk));
+		skb_queue_purge(BUSY_QUEUE(sk));
 		del_timer(&l2cap_pi(sk)->retrans_timer);
 		del_timer(&l2cap_pi(sk)->monitor_timer);
 		del_timer(&l2cap_pi(sk)->ack_timer);
@@ -3287,6 +3299,7 @@ static inline void l2cap_send_i_or_rr_or_rnr(struct sock *sk)
 	if (pi->conn_state & L2CAP_CONN_LOCAL_BUSY) {
 		control |= L2CAP_SUPER_RCV_NOT_READY | L2CAP_CTRL_FINAL;
 		l2cap_send_sframe(pi, control);
+		pi->conn_state |= L2CAP_CONN_RNR_SENT;
 		pi->conn_state &= ~L2CAP_CONN_SEND_FBIT;
 	}
 
@@ -3338,7 +3351,7 @@ static int l2cap_ertm_reassembly_sdu(struct sock *sk, struct sk_buff *skb, u16 c
 {
 	struct l2cap_pinfo *pi = l2cap_pi(sk);
 	struct sk_buff *_skb;
-	int err = 0;
+	int err;
 
 	switch (control & L2CAP_CTRL_SAR) {
 	case L2CAP_SDU_UNSEGMENTED:
@@ -3356,16 +3369,18 @@ static int l2cap_ertm_reassembly_sdu(struct sock *sk, struct sk_buff *skb, u16 c
 			goto drop;
 
 		pi->sdu_len = get_unaligned_le16(skb->data);
-		skb_pull(skb, 2);
 
 		if (pi->sdu_len > pi->imtu)
 			goto disconnect;
 
 		pi->sdu = bt_skb_alloc(pi->sdu_len, GFP_ATOMIC);
-		if (!pi->sdu) {
-			err = -ENOMEM;
-			break;
-		}
+		if (!pi->sdu)
+			return -ENOMEM;
+
+		/* pull sdu_len bytes only after alloc, because of Local Busy
+		 * condition we have to be sure that this will be executed
+		 * only once, i.e., when alloc does not fail */
+		skb_pull(skb, 2);
 
 		memcpy(skb_put(pi->sdu, skb->len), skb->data, skb->len);
 
@@ -3395,28 +3410,40 @@ static int l2cap_ertm_reassembly_sdu(struct sock *sk, struct sk_buff *skb, u16 c
 		if (!pi->sdu)
 			goto disconnect;
 
-		memcpy(skb_put(pi->sdu, skb->len), skb->data, skb->len);
+		if (!(pi->conn_state & L2CAP_CONN_SAR_RETRY)) {
+			memcpy(skb_put(pi->sdu, skb->len), skb->data, skb->len);
 
-		pi->conn_state &= ~L2CAP_CONN_SAR_SDU;
-		pi->partial_sdu_len += skb->len;
+			pi->partial_sdu_len += skb->len;
 
-		if (pi->partial_sdu_len > pi->imtu)
-			goto drop;
+			if (pi->partial_sdu_len > pi->imtu)
+				goto drop;
 
-		if (pi->partial_sdu_len != pi->sdu_len)
-			goto drop;
+			if (pi->partial_sdu_len != pi->sdu_len)
+				goto drop;
+		}
 
 		_skb = skb_clone(pi->sdu, GFP_ATOMIC);
+		if (!_skb) {
+			pi->conn_state |= L2CAP_CONN_SAR_RETRY;
+			return -ENOMEM;
+		}
+
 		err = sock_queue_rcv_skb(sk, _skb);
-		if (err < 0)
+		if (err < 0) {
 			kfree_skb(_skb);
+			pi->conn_state |= L2CAP_CONN_SAR_RETRY;
+			return err;
+		}
+
+		pi->conn_state &= ~L2CAP_CONN_SAR_RETRY;
+		pi->conn_state &= ~L2CAP_CONN_SAR_SDU;
 
 		kfree_skb(pi->sdu);
 		break;
 	}
 
 	kfree_skb(skb);
-	return err;
+	return 0;
 
 drop:
 	kfree_skb(pi->sdu);
@@ -3428,6 +3455,115 @@ disconnect:
 	return 0;
 }
 
+static void l2cap_busy_work(struct work_struct *work)
+{
+	DECLARE_WAITQUEUE(wait, current);
+	struct l2cap_pinfo *pi =
+		container_of(work, struct l2cap_pinfo, busy_work);
+	struct sock *sk = (struct sock *)pi;
+	int n_tries = 0, timeo = HZ/5, err;
+	struct sk_buff *skb;
+	u16 control;
+
+	lock_sock(sk);
+
+	add_wait_queue(sk->sk_sleep, &wait);
+	while ((skb = skb_peek(BUSY_QUEUE(sk)))) {
+		set_current_state(TASK_INTERRUPTIBLE);
+
+		if (n_tries++ > L2CAP_LOCAL_BUSY_TRIES) {
+			err = -EBUSY;
+			l2cap_send_disconn_req(pi->conn, sk);
+			goto done;
+		}
+
+		if (!timeo)
+			timeo = HZ/5;
+
+		if (signal_pending(current)) {
+			err = sock_intr_errno(timeo);
+			goto done;
+		}
+
+		release_sock(sk);
+		timeo = schedule_timeout(timeo);
+		lock_sock(sk);
+
+		err = sock_error(sk);
+		if (err)
+			goto done;
+
+		while ((skb = skb_dequeue(BUSY_QUEUE(sk)))) {
+			control = bt_cb(skb)->sar << L2CAP_CTRL_SAR_SHIFT;
+			err = l2cap_ertm_reassembly_sdu(sk, skb, control);
+			if (err < 0) {
+				skb_queue_head(BUSY_QUEUE(sk), skb);
+				break;
+			}
+
+			pi->buffer_seq = (pi->buffer_seq + 1) % 64;
+		}
+
+		if (!skb)
+			break;
+	}
+
+	if (!(pi->conn_state & L2CAP_CONN_RNR_SENT))
+		goto done;
+
+	control = pi->buffer_seq << L2CAP_CTRL_REQSEQ_SHIFT;
+	control |= L2CAP_SUPER_RCV_READY | L2CAP_CTRL_POLL;
+	l2cap_send_sframe(pi, control);
+	l2cap_pi(sk)->retry_count = 1;
+
+	del_timer(&pi->retrans_timer);
+	__mod_monitor_timer();
+
+	l2cap_pi(sk)->conn_state |= L2CAP_CONN_WAIT_F;
+
+done:
+	pi->conn_state &= ~L2CAP_CONN_LOCAL_BUSY;
+	pi->conn_state &= ~L2CAP_CONN_RNR_SENT;
+
+	set_current_state(TASK_RUNNING);
+	remove_wait_queue(sk->sk_sleep, &wait);
+
+	release_sock(sk);
+}
+
+static int l2cap_push_rx_skb(struct sock *sk, struct sk_buff *skb, u16 control)
+{
+	struct l2cap_pinfo *pi = l2cap_pi(sk);
+	int sctrl, err;
+
+	if (pi->conn_state & L2CAP_CONN_LOCAL_BUSY) {
+		bt_cb(skb)->sar = control >> L2CAP_CTRL_SAR_SHIFT;
+		__skb_queue_tail(BUSY_QUEUE(sk), skb);
+		return -EBUSY;
+	}
+
+	err = l2cap_ertm_reassembly_sdu(sk, skb, control);
+	if (err >= 0) {
+		pi->buffer_seq = (pi->buffer_seq + 1) % 64;
+		return err;
+	}
+
+	/* Busy Condition */
+	pi->conn_state |= L2CAP_CONN_LOCAL_BUSY;
+	bt_cb(skb)->sar = control >> L2CAP_CTRL_SAR_SHIFT;
+	__skb_queue_tail(BUSY_QUEUE(sk), skb);
+
+	sctrl = pi->buffer_seq << L2CAP_CTRL_REQSEQ_SHIFT;
+	sctrl |= L2CAP_SUPER_RCV_NOT_READY;
+	l2cap_send_sframe(pi, sctrl);
+
+	pi->conn_state |= L2CAP_CONN_RNR_SENT;
+
+	queue_work(_busy_wq, &pi->busy_work);
+
+	return err;
+}
+
 static int l2cap_streaming_reassembly_sdu(struct sock *sk, struct sk_buff *skb, u16 control)
 {
 	struct l2cap_pinfo *pi = l2cap_pi(sk);
@@ -3614,6 +3750,9 @@ static inline int l2cap_data_channel_iframe(struct sock *sk, u16 rx_control, str
 		goto drop;
 	}
 
+	if (pi->conn_state == L2CAP_CONN_LOCAL_BUSY)
+		goto drop;
+
 	if (pi->conn_state & L2CAP_CONN_SREJ_SENT) {
 		struct srej_list *first;
 
@@ -3662,6 +3801,7 @@ static inline int l2cap_data_channel_iframe(struct sock *sk, u16 rx_control, str
 		pi->buffer_seq_srej = pi->buffer_seq;
 
 		__skb_queue_head_init(SREJ_QUEUE(sk));
+		__skb_queue_head_init(BUSY_QUEUE(sk));
 		l2cap_add_to_srej_queue(sk, skb, tx_seq, sar);
 
 		pi->conn_state |= L2CAP_CONN_SEND_PBIT;
@@ -3691,11 +3831,9 @@ expected:
 		}
 	}
 
-	pi->buffer_seq = (pi->buffer_seq + 1) % 64;
-
-	err = l2cap_ertm_reassembly_sdu(sk, skb, rx_control);
+	err = l2cap_push_rx_skb(sk, skb, rx_control);
 	if (err < 0)
-		return err;
+		return 0;
 
 	__mod_ack_timer();
 
@@ -4406,6 +4544,10 @@ static int __init l2cap_init(void)
 	if (err < 0)
 		return err;
 
+	_busy_wq = create_singlethread_workqueue("l2cap");
+	if (!_busy_wq)
+		goto error;
+
 	err = bt_sock_register(BTPROTO_L2CAP, &l2cap_sock_family_ops);
 	if (err < 0) {
 		BT_ERR("L2CAP socket registration failed");
@@ -4440,6 +4582,9 @@ static void __exit l2cap_exit(void)
 {
 	debugfs_remove(l2cap_debugfs);
 
+	flush_workqueue(_busy_wq);
+	destroy_workqueue(_busy_wq);
+
 	if (bt_sock_unregister(BTPROTO_L2CAP) < 0)
 		BT_ERR("L2CAP socket unregistration failed");
 
-- 
cgit v1.2.2


From 6161c0382bbab883a634d284f7367a88bbe88534 Mon Sep 17 00:00:00 2001
From: "Gustavo F. Padovan" <padovan@profusion.mobi>
Date: Sat, 1 May 2010 16:15:44 -0300
Subject: Bluetooth: Add wait_queue to wait ack of all sent packets
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

To guarantee that all packets we sent were received we need to wait for
theirs ack before shutdown the socket.

Signed-off-by: Gustavo F. Padovan <padovan@profusion.mobi>
Reviewed-by: João Paulo Rechi Vita <jprvita@profusion.mobi>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 net/bluetooth/l2cap.c | 34 ++++++++++++++++++++++++++++++++++
 1 file changed, 34 insertions(+)

(limited to 'net')

diff --git a/net/bluetooth/l2cap.c b/net/bluetooth/l2cap.c
index 103e4b54a86a..9d514f9dbc0f 100644
--- a/net/bluetooth/l2cap.c
+++ b/net/bluetooth/l2cap.c
@@ -1242,6 +1242,37 @@ static int l2cap_sock_getname(struct socket *sock, struct sockaddr *addr, int *l
 	return 0;
 }
 
+static int __l2cap_wait_ack(struct sock *sk)
+{
+	DECLARE_WAITQUEUE(wait, current);
+	int err = 0;
+	int timeo = HZ/5;
+
+	add_wait_queue(sk->sk_sleep, &wait);
+	while ((l2cap_pi(sk)->unacked_frames > 0 && l2cap_pi(sk)->conn)) {
+		set_current_state(TASK_INTERRUPTIBLE);
+
+		if (!timeo)
+			timeo = HZ/5;
+
+		if (signal_pending(current)) {
+			err = sock_intr_errno(timeo);
+			break;
+		}
+
+		release_sock(sk);
+		timeo = schedule_timeout(timeo);
+		lock_sock(sk);
+
+		err = sock_error(sk);
+		if (err)
+			break;
+	}
+	set_current_state(TASK_RUNNING);
+	remove_wait_queue(sk->sk_sleep, &wait);
+	return err;
+}
+
 static void l2cap_monitor_timeout(unsigned long arg)
 {
 	struct sock *sk = (void *) arg;
@@ -2059,6 +2090,9 @@ static int l2cap_sock_shutdown(struct socket *sock, int how)
 
 	lock_sock(sk);
 	if (!sk->sk_shutdown) {
+		if (l2cap_pi(sk)->mode == L2CAP_MODE_ERTM)
+			err = __l2cap_wait_ack(sk);
+
 		sk->sk_shutdown = SHUTDOWN_MASK;
 		l2cap_sock_clear_timer(sk);
 		__l2cap_sock_close(sk, 0);
-- 
cgit v1.2.2


From dfc909befbfe967bd7f46ef33b6969c1b7f3cf42 Mon Sep 17 00:00:00 2001
From: "Gustavo F. Padovan" <padovan@profusion.mobi>
Date: Sat, 1 May 2010 16:15:45 -0300
Subject: Bluetooth: Fix race condition on l2cap_ertm_send()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

l2cap_ertm_send() can be called both from user context and bottom half
context. The socket locks for that contexts are different, the user
context uses a mutex(which can sleep) and the second one uses a
spinlock_bh. That creates a race condition when we have interruptions on
both contexts at the same time.

The better way to solve this is to add a new spinlock to lock
l2cap_ertm_send() and the vars it access. The other solution was to defer
l2cap_ertm_send() with a workqueue, but we the sending process already
has one defer on the hci layer. It's not a good idea add another one.

The patch refactor the code to create l2cap_retransmit_frames(), then we
encapulate the lock of l2cap_ertm_send() for some call. It also changes
l2cap_retransmit_frame() to l2cap_retransmit_one_frame() to avoid
confusion

Signed-off-by: Gustavo F. Padovan <padovan@profusion.mobi>
Reviewed-by: João Paulo Rechi Vita <jprvita@profusion.mobi>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 net/bluetooth/l2cap.c | 99 ++++++++++++++++++++++++++++++++++-----------------
 1 file changed, 66 insertions(+), 33 deletions(-)

(limited to 'net')

diff --git a/net/bluetooth/l2cap.c b/net/bluetooth/l2cap.c
index 9d514f9dbc0f..fe663e9c6684 100644
--- a/net/bluetooth/l2cap.c
+++ b/net/bluetooth/l2cap.c
@@ -1368,7 +1368,7 @@ static int l2cap_streaming_send(struct sock *sk)
 	return 0;
 }
 
-static void l2cap_retransmit_frame(struct sock *sk, u8 tx_seq)
+static void l2cap_retransmit_one_frame(struct sock *sk, u8 tx_seq)
 {
 	struct l2cap_pinfo *pi = l2cap_pi(sk);
 	struct sk_buff *skb, *tx_skb;
@@ -1467,10 +1467,29 @@ static int l2cap_ertm_send(struct sock *sk)
 	return nsent;
 }
 
+static int l2cap_retransmit_frames(struct sock *sk)
+{
+	struct l2cap_pinfo *pi = l2cap_pi(sk);
+	int ret;
+
+	spin_lock_bh(&pi->send_lock);
+
+	if (!skb_queue_empty(TX_QUEUE(sk)))
+		sk->sk_send_head = TX_QUEUE(sk)->next;
+
+	pi->next_tx_seq = pi->expected_ack_seq;
+	ret = l2cap_ertm_send(sk);
+
+	spin_unlock_bh(&pi->send_lock);
+
+	return ret;
+}
+
 static void l2cap_send_ack(struct l2cap_pinfo *pi)
 {
 	struct sock *sk = (struct sock *)pi;
 	u16 control = 0;
+	int nframes;
 
 	control |= pi->buffer_seq << L2CAP_CTRL_REQSEQ_SHIFT;
 
@@ -1479,10 +1498,17 @@ static void l2cap_send_ack(struct l2cap_pinfo *pi)
 		pi->conn_state |= L2CAP_CONN_RNR_SENT;
 		l2cap_send_sframe(pi, control);
 		return;
-	} else if (l2cap_ertm_send(sk) == 0) {
-		control |= L2CAP_SUPER_RCV_READY;
-		l2cap_send_sframe(pi, control);
 	}
+
+	spin_lock_bh(&pi->send_lock);
+	nframes = l2cap_ertm_send(sk);
+	spin_unlock_bh(&pi->send_lock);
+
+	if (nframes > 0)
+		return;
+
+	control |= L2CAP_SUPER_RCV_READY;
+	l2cap_send_sframe(pi, control);
 }
 
 static void l2cap_send_srejtail(struct sock *sk)
@@ -1673,8 +1699,10 @@ static inline int l2cap_sar_segment_sdu(struct sock *sk, struct msghdr *msg, siz
 		size += buflen;
 	}
 	skb_queue_splice_tail(&sar_queue, TX_QUEUE(sk));
+	spin_lock_bh(&pi->send_lock);
 	if (sk->sk_send_head == NULL)
 		sk->sk_send_head = sar_queue.next;
+	spin_unlock_bh(&pi->send_lock);
 
 	return size;
 }
@@ -1745,8 +1773,15 @@ static int l2cap_sock_sendmsg(struct kiocb *iocb, struct socket *sock, struct ms
 				goto done;
 			}
 			__skb_queue_tail(TX_QUEUE(sk), skb);
+
+			if (pi->mode == L2CAP_MODE_ERTM)
+				spin_lock_bh(&pi->send_lock);
+
 			if (sk->sk_send_head == NULL)
 				sk->sk_send_head = skb;
+
+			if (pi->mode == L2CAP_MODE_ERTM)
+				spin_unlock_bh(&pi->send_lock);
 		} else {
 		/* Segment SDU into multiples PDUs */
 			err = l2cap_sar_segment_sdu(sk, msg, len);
@@ -1754,10 +1789,13 @@ static int l2cap_sock_sendmsg(struct kiocb *iocb, struct socket *sock, struct ms
 				goto done;
 		}
 
-		if (pi->mode == L2CAP_MODE_STREAMING)
+		if (pi->mode == L2CAP_MODE_STREAMING) {
 			err = l2cap_streaming_send(sk);
-		else
+		} else {
+			spin_lock_bh(&pi->send_lock);
 			err = l2cap_ertm_send(sk);
+			spin_unlock_bh(&pi->send_lock);
+		}
 
 		if (err >= 0)
 			err = len;
@@ -2321,6 +2359,7 @@ static inline void l2cap_ertm_init(struct sock *sk)
 
 	__skb_queue_head_init(SREJ_QUEUE(sk));
 	__skb_queue_head_init(BUSY_QUEUE(sk));
+	spin_lock_init(&l2cap_pi(sk)->send_lock);
 
 	INIT_WORK(&l2cap_pi(sk)->busy_work, l2cap_busy_work);
 }
@@ -3340,7 +3379,9 @@ static inline void l2cap_send_i_or_rr_or_rnr(struct sock *sk)
 	if (pi->conn_state & L2CAP_CONN_REMOTE_BUSY && pi->unacked_frames > 0)
 		__mod_retrans_timer();
 
+	spin_lock_bh(&pi->send_lock);
 	l2cap_ertm_send(sk);
+	spin_unlock_bh(&pi->send_lock);
 
 	if (!(pi->conn_state & L2CAP_CONN_LOCAL_BUSY) &&
 			pi->frames_sent == 0) {
@@ -3857,12 +3898,8 @@ expected:
 	if (rx_control & L2CAP_CTRL_FINAL) {
 		if (pi->conn_state & L2CAP_CONN_REJ_ACT)
 			pi->conn_state &= ~L2CAP_CONN_REJ_ACT;
-		else {
-			if (!skb_queue_empty(TX_QUEUE(sk)))
-				sk->sk_send_head = TX_QUEUE(sk)->next;
-			pi->next_tx_seq = pi->expected_ack_seq;
-			l2cap_ertm_send(sk);
-		}
+		else
+			l2cap_retransmit_frames(sk);
 	}
 
 	err = l2cap_push_rx_skb(sk, skb, rx_control);
@@ -3907,12 +3944,8 @@ static inline void l2cap_data_channel_rrframe(struct sock *sk, u16 rx_control)
 
 		if (pi->conn_state & L2CAP_CONN_REJ_ACT)
 			pi->conn_state &= ~L2CAP_CONN_REJ_ACT;
-		else {
-			if (!skb_queue_empty(TX_QUEUE(sk)))
-				sk->sk_send_head = TX_QUEUE(sk)->next;
-			pi->next_tx_seq = pi->expected_ack_seq;
-			l2cap_ertm_send(sk);
-		}
+		else
+			l2cap_retransmit_frames(sk);
 
 	} else {
 		if ((pi->conn_state & L2CAP_CONN_REMOTE_BUSY) &&
@@ -3920,10 +3953,13 @@ static inline void l2cap_data_channel_rrframe(struct sock *sk, u16 rx_control)
 			__mod_retrans_timer();
 
 		pi->conn_state &= ~L2CAP_CONN_REMOTE_BUSY;
-		if (pi->conn_state & L2CAP_CONN_SREJ_SENT)
+		if (pi->conn_state & L2CAP_CONN_SREJ_SENT) {
 			l2cap_send_ack(pi);
-		else
+		} else {
+			spin_lock_bh(&pi->send_lock);
 			l2cap_ertm_send(sk);
+			spin_unlock_bh(&pi->send_lock);
+		}
 	}
 }
 
@@ -3940,17 +3976,10 @@ static inline void l2cap_data_channel_rejframe(struct sock *sk, u16 rx_control)
 	if (rx_control & L2CAP_CTRL_FINAL) {
 		if (pi->conn_state & L2CAP_CONN_REJ_ACT)
 			pi->conn_state &= ~L2CAP_CONN_REJ_ACT;
-		else {
-			if (!skb_queue_empty(TX_QUEUE(sk)))
-				sk->sk_send_head = TX_QUEUE(sk)->next;
-			pi->next_tx_seq = pi->expected_ack_seq;
-			l2cap_ertm_send(sk);
-		}
+		else
+			l2cap_retransmit_frames(sk);
 	} else {
-		if (!skb_queue_empty(TX_QUEUE(sk)))
-			sk->sk_send_head = TX_QUEUE(sk)->next;
-		pi->next_tx_seq = pi->expected_ack_seq;
-		l2cap_ertm_send(sk);
+		l2cap_retransmit_frames(sk);
 
 		if (pi->conn_state & L2CAP_CONN_WAIT_F)
 			pi->conn_state |= L2CAP_CONN_REJ_ACT;
@@ -3966,8 +3995,12 @@ static inline void l2cap_data_channel_srejframe(struct sock *sk, u16 rx_control)
 	if (rx_control & L2CAP_CTRL_POLL) {
 		pi->expected_ack_seq = tx_seq;
 		l2cap_drop_acked_frames(sk);
-		l2cap_retransmit_frame(sk, tx_seq);
+		l2cap_retransmit_one_frame(sk, tx_seq);
+
+		spin_lock_bh(&pi->send_lock);
 		l2cap_ertm_send(sk);
+		spin_unlock_bh(&pi->send_lock);
+
 		if (pi->conn_state & L2CAP_CONN_WAIT_F) {
 			pi->srej_save_reqseq = tx_seq;
 			pi->conn_state |= L2CAP_CONN_SREJ_ACT;
@@ -3977,9 +4010,9 @@ static inline void l2cap_data_channel_srejframe(struct sock *sk, u16 rx_control)
 				pi->srej_save_reqseq == tx_seq)
 			pi->conn_state &= ~L2CAP_CONN_SREJ_ACT;
 		else
-			l2cap_retransmit_frame(sk, tx_seq);
+			l2cap_retransmit_one_frame(sk, tx_seq);
 	} else {
-		l2cap_retransmit_frame(sk, tx_seq);
+		l2cap_retransmit_one_frame(sk, tx_seq);
 		if (pi->conn_state & L2CAP_CONN_WAIT_F) {
 			pi->srej_save_reqseq = tx_seq;
 			pi->conn_state |= L2CAP_CONN_SREJ_ACT;
-- 
cgit v1.2.2


From 4178ba462a3e8ab5094e69606f01d9e95f2d5ea6 Mon Sep 17 00:00:00 2001
From: "Gustavo F. Padovan" <padovan@profusion.mobi>
Date: Sat, 1 May 2010 16:15:45 -0300
Subject: Bluetooth: Prevents buffer overflow on l2cap_ertm_reassembly_sdu()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The checks should be done before the the memcpy to avoid buffer
overflow.

Reported-by: João Paulo Rechi Vita <jprvita@profusion.mobi>
Signed-off-by: Gustavo F. Padovan <padovan@profusion.mobi>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 net/bluetooth/l2cap.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'net')

diff --git a/net/bluetooth/l2cap.c b/net/bluetooth/l2cap.c
index fe663e9c6684..9ef01c32b3a2 100644
--- a/net/bluetooth/l2cap.c
+++ b/net/bluetooth/l2cap.c
@@ -3470,12 +3470,12 @@ static int l2cap_ertm_reassembly_sdu(struct sock *sk, struct sk_buff *skb, u16 c
 		if (!pi->sdu)
 			goto disconnect;
 
-		memcpy(skb_put(pi->sdu, skb->len), skb->data, skb->len);
-
 		pi->partial_sdu_len += skb->len;
 		if (pi->partial_sdu_len > pi->sdu_len)
 			goto drop;
 
+		memcpy(skb_put(pi->sdu, skb->len), skb->data, skb->len);
+
 		break;
 
 	case L2CAP_SDU_END:
@@ -3486,8 +3486,6 @@ static int l2cap_ertm_reassembly_sdu(struct sock *sk, struct sk_buff *skb, u16 c
 			goto disconnect;
 
 		if (!(pi->conn_state & L2CAP_CONN_SAR_RETRY)) {
-			memcpy(skb_put(pi->sdu, skb->len), skb->data, skb->len);
-
 			pi->partial_sdu_len += skb->len;
 
 			if (pi->partial_sdu_len > pi->imtu)
@@ -3495,6 +3493,8 @@ static int l2cap_ertm_reassembly_sdu(struct sock *sk, struct sk_buff *skb, u16 c
 
 			if (pi->partial_sdu_len != pi->sdu_len)
 				goto drop;
+
+			memcpy(skb_put(pi->sdu, skb->len), skb->data, skb->len);
 		}
 
 		_skb = skb_clone(pi->sdu, GFP_ATOMIC);
-- 
cgit v1.2.2


From 844c0972427ee5f661158160aaca10b22b3dda60 Mon Sep 17 00:00:00 2001
From: "Gustavo F. Padovan" <padovan@profusion.mobi>
Date: Tue, 4 May 2010 23:16:01 -0300
Subject: Bluetooth: Fix spec error in the RemoteBusy Logic

On the receipt of an RR(P=1) under RemoteBusy set to TRUE(on the RECV
state table) we have to call sendIorRRorRNR(F=1) and just after set
RemoteBusy to False. This leads to a freeze in the sending process since
it's not allowed send data with RemoteBusy set to true and no one
call SendPending-I-Frames after set RemoteBusy to false(The last action
for that event).

Actually sendIorRRorRNR() calls SendPending-I-Frames but at that moment
RemoteBusy is still True and we cannot send any frame, after, no one
calls SendPending-I-Frames again and the sending process stops.

The solution here is to set RemoteBusy to false inside
SendPending-I-Frames just before call SendPending-I-Frames. That will
make SendPending-I-Frames able to send frames. This solution is similar
to what RR(P=0)(F=0) on the RECV table and RR(P=1) on the SREJ_SENT
table do.

Actually doesn't make any sense call SendPending-I-Frames if we can send
any frame, i. e., RemoteBusy is True.

Signed-off-by: Gustavo F. Padovan <padovan@profusion.mobi>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 net/bluetooth/l2cap.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/bluetooth/l2cap.c b/net/bluetooth/l2cap.c
index 9ef01c32b3a2..ba49f9a3579e 100644
--- a/net/bluetooth/l2cap.c
+++ b/net/bluetooth/l2cap.c
@@ -3379,6 +3379,8 @@ static inline void l2cap_send_i_or_rr_or_rnr(struct sock *sk)
 	if (pi->conn_state & L2CAP_CONN_REMOTE_BUSY && pi->unacked_frames > 0)
 		__mod_retrans_timer();
 
+	pi->conn_state &= ~L2CAP_CONN_REMOTE_BUSY;
+
 	spin_lock_bh(&pi->send_lock);
 	l2cap_ertm_send(sk);
 	spin_unlock_bh(&pi->send_lock);
@@ -3936,7 +3938,6 @@ static inline void l2cap_data_channel_rrframe(struct sock *sk, u16 rx_control)
 			l2cap_send_srejtail(sk);
 		} else {
 			l2cap_send_i_or_rr_or_rnr(sk);
-			pi->conn_state &= ~L2CAP_CONN_REMOTE_BUSY;
 		}
 
 	} else if (rx_control & L2CAP_CTRL_FINAL) {
-- 
cgit v1.2.2


From f48fd9c8cd746fdb055a97249a209c77dca0f710 Mon Sep 17 00:00:00 2001
From: Marcel Holtmann <marcel@holtmann.org>
Date: Sat, 20 Mar 2010 15:20:04 +0100
Subject: Bluetooth: Create per controller workqueue

Instead of having a global workqueue for all controllers, it makes
more sense to have a workqueue per controller.

Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 net/bluetooth/hci_core.c  | 14 ++++++++++++++
 net/bluetooth/hci_sysfs.c | 16 +++-------------
 2 files changed, 17 insertions(+), 13 deletions(-)

(limited to 'net')

diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c
index 904f1e8a7a3b..5e83f8e0877a 100644
--- a/net/bluetooth/hci_core.c
+++ b/net/bluetooth/hci_core.c
@@ -37,6 +37,7 @@
 #include <linux/fcntl.h>
 #include <linux/init.h>
 #include <linux/skbuff.h>
+#include <linux/workqueue.h>
 #include <linux/interrupt.h>
 #include <linux/notifier.h>
 #include <linux/rfkill.h>
@@ -928,6 +929,10 @@ int hci_register_dev(struct hci_dev *hdev)
 
 	write_unlock_bh(&hci_dev_list_lock);
 
+	hdev->workqueue = create_singlethread_workqueue(hdev->name);
+	if (!hdev->workqueue)
+		goto nomem;
+
 	hci_register_sysfs(hdev);
 
 	hdev->rfkill = rfkill_alloc(hdev->name, &hdev->dev,
@@ -942,6 +947,13 @@ int hci_register_dev(struct hci_dev *hdev)
 	hci_notify(hdev, HCI_DEV_REG);
 
 	return id;
+
+nomem:
+	write_lock_bh(&hci_dev_list_lock);
+	list_del(&hdev->list);
+	write_unlock_bh(&hci_dev_list_lock);
+
+	return -ENOMEM;
 }
 EXPORT_SYMBOL(hci_register_dev);
 
@@ -970,6 +982,8 @@ int hci_unregister_dev(struct hci_dev *hdev)
 
 	hci_unregister_sysfs(hdev);
 
+	destroy_workqueue(hdev->workqueue);
+
 	__hci_dev_put(hdev);
 
 	return 0;
diff --git a/net/bluetooth/hci_sysfs.c b/net/bluetooth/hci_sysfs.c
index 0e8e1a59856c..a978449ab04d 100644
--- a/net/bluetooth/hci_sysfs.c
+++ b/net/bluetooth/hci_sysfs.c
@@ -14,8 +14,6 @@ static struct class *bt_class;
 struct dentry *bt_debugfs = NULL;
 EXPORT_SYMBOL_GPL(bt_debugfs);
 
-static struct workqueue_struct *bt_workq;
-
 static inline char *link_typetostr(int type)
 {
 	switch (type) {
@@ -161,14 +159,14 @@ void hci_conn_add_sysfs(struct hci_conn *conn)
 {
 	BT_DBG("conn %p", conn);
 
-	queue_work(bt_workq, &conn->work_add);
+	queue_work(conn->hdev->workqueue, &conn->work_add);
 }
 
 void hci_conn_del_sysfs(struct hci_conn *conn)
 {
 	BT_DBG("conn %p", conn);
 
-	queue_work(bt_workq, &conn->work_del);
+	queue_work(conn->hdev->workqueue, &conn->work_del);
 }
 
 static inline char *host_bustostr(int bus)
@@ -487,17 +485,11 @@ void hci_unregister_sysfs(struct hci_dev *hdev)
 
 int __init bt_sysfs_init(void)
 {
-	bt_workq = create_singlethread_workqueue("bluetooth");
-	if (!bt_workq)
-		return -ENOMEM;
-
 	bt_debugfs = debugfs_create_dir("bluetooth", NULL);
 
 	bt_class = class_create(THIS_MODULE, "bluetooth");
-	if (IS_ERR(bt_class)) {
-		destroy_workqueue(bt_workq);
+	if (IS_ERR(bt_class))
 		return PTR_ERR(bt_class);
-	}
 
 	return 0;
 }
@@ -507,6 +499,4 @@ void bt_sysfs_cleanup(void)
 	class_destroy(bt_class);
 
 	debugfs_remove_recursive(bt_debugfs);
-
-	destroy_workqueue(bt_workq);
 }
-- 
cgit v1.2.2


From 7b767cad296d17cfe071734f71a3b7b479534c8c Mon Sep 17 00:00:00 2001
From: Tomas Winkler <tomas.winkler@intel.com>
Date: Tue, 9 Mar 2010 21:38:03 +0200
Subject: Bluetooth: Use strict_strtoul instead of simple_strtoul

Use strict_strtoul as suggested by checkpatch.pl for more strict input
checking.

Signed-off-by: Tomas Winkler <tomas.winkler@intel.com>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 net/bluetooth/hci_sysfs.c | 18 ++++++------------
 1 file changed, 6 insertions(+), 12 deletions(-)

(limited to 'net')

diff --git a/net/bluetooth/hci_sysfs.c b/net/bluetooth/hci_sysfs.c
index a978449ab04d..463ffa4fe042 100644
--- a/net/bluetooth/hci_sysfs.c
+++ b/net/bluetooth/hci_sysfs.c
@@ -281,11 +281,9 @@ static ssize_t show_idle_timeout(struct device *dev, struct device_attribute *at
 static ssize_t store_idle_timeout(struct device *dev, struct device_attribute *attr, const char *buf, size_t count)
 {
 	struct hci_dev *hdev = dev_get_drvdata(dev);
-	char *ptr;
-	__u32 val;
+	unsigned long val;
 
-	val = simple_strtoul(buf, &ptr, 10);
-	if (ptr == buf)
+	if (strict_strtoul(buf, 0, &val) < 0)
 		return -EINVAL;
 
 	if (val != 0 && (val < 500 || val > 3600000))
@@ -305,11 +303,9 @@ static ssize_t show_sniff_max_interval(struct device *dev, struct device_attribu
 static ssize_t store_sniff_max_interval(struct device *dev, struct device_attribute *attr, const char *buf, size_t count)
 {
 	struct hci_dev *hdev = dev_get_drvdata(dev);
-	char *ptr;
-	__u16 val;
+	unsigned long val;
 
-	val = simple_strtoul(buf, &ptr, 10);
-	if (ptr == buf)
+	if (strict_strtoul(buf, 0, &val) < 0)
 		return -EINVAL;
 
 	if (val < 0x0002 || val > 0xFFFE || val % 2)
@@ -332,11 +328,9 @@ static ssize_t show_sniff_min_interval(struct device *dev, struct device_attribu
 static ssize_t store_sniff_min_interval(struct device *dev, struct device_attribute *attr, const char *buf, size_t count)
 {
 	struct hci_dev *hdev = dev_get_drvdata(dev);
-	char *ptr;
-	__u16 val;
+	unsigned long val;
 
-	val = simple_strtoul(buf, &ptr, 10);
-	if (ptr == buf)
+	if (strict_strtoul(buf, 0, &val) < 0)
 		return -EINVAL;
 
 	if (val < 0x0002 || val > 0xFFFE || val % 2)
-- 
cgit v1.2.2


From 2b0b05ddc04b6d45e71cd36405df512075786f1e Mon Sep 17 00:00:00 2001
From: Marcel Holtmann <marcel@holtmann.org>
Date: Mon, 10 May 2010 11:33:10 +0200
Subject: Bluetooth: Fix issues where sk_sleep() helper is needed now

There were some left-overs that used sk->sk_sleep instead of the new
sk_sleep() helper.

Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 net/bluetooth/l2cap.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'net')

diff --git a/net/bluetooth/l2cap.c b/net/bluetooth/l2cap.c
index ba49f9a3579e..673a36886716 100644
--- a/net/bluetooth/l2cap.c
+++ b/net/bluetooth/l2cap.c
@@ -1248,7 +1248,7 @@ static int __l2cap_wait_ack(struct sock *sk)
 	int err = 0;
 	int timeo = HZ/5;
 
-	add_wait_queue(sk->sk_sleep, &wait);
+	add_wait_queue(sk_sleep(sk), &wait);
 	while ((l2cap_pi(sk)->unacked_frames > 0 && l2cap_pi(sk)->conn)) {
 		set_current_state(TASK_INTERRUPTIBLE);
 
@@ -1269,7 +1269,7 @@ static int __l2cap_wait_ack(struct sock *sk)
 			break;
 	}
 	set_current_state(TASK_RUNNING);
-	remove_wait_queue(sk->sk_sleep, &wait);
+	remove_wait_queue(sk_sleep(sk), &wait);
 	return err;
 }
 
@@ -3544,7 +3544,7 @@ static void l2cap_busy_work(struct work_struct *work)
 
 	lock_sock(sk);
 
-	add_wait_queue(sk->sk_sleep, &wait);
+	add_wait_queue(sk_sleep(sk), &wait);
 	while ((skb = skb_peek(BUSY_QUEUE(sk)))) {
 		set_current_state(TASK_INTERRUPTIBLE);
 
@@ -3603,7 +3603,7 @@ done:
 	pi->conn_state &= ~L2CAP_CONN_RNR_SENT;
 
 	set_current_state(TASK_RUNNING);
-	remove_wait_queue(sk->sk_sleep, &wait);
+	remove_wait_queue(sk_sleep(sk), &wait);
 
 	release_sock(sk);
 }
-- 
cgit v1.2.2


From bbd725435ddb1cac732f7a8c23c21ff67f24c60f Mon Sep 17 00:00:00 2001
From: Andreas Meissner <andreas.meissner@sphairon.com>
Date: Mon, 10 May 2010 04:47:49 -0700
Subject: IPv4: unresolved multicast route cleanup

Fixes the expiration timer for unresolved multicast route entries.
In case new multicast routing requests come in faster than the
expiration timeout occurs (e.g. zap through multicast TV streams), the
timer is prevented from being called at time for already existing entries.

As the single timer is resetted to default whenever a new entry is made,
the timeout for existing unresolved entires are missed and/or not
updated. As a consequence new requests are denied when the limit of
unresolved entries has been reached because old entries live longer than
they are supposed to.

The solution is to reset the timer only for the first unresolved entry
in the multicast routing cache. All other timers are already set and
updated correctly within the timer function itself by now.

Signed-off by: Andreas Meissner <andreas.meissner@sphairon.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/ipmr.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
index 9d4f6d1340a4..ec19a890c9a0 100644
--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c
@@ -754,7 +754,8 @@ ipmr_cache_unresolved(struct net *net, vifi_t vifi, struct sk_buff *skb)
 		c->next = mfc_unres_queue;
 		mfc_unres_queue = c;
 
-		mod_timer(&ipmr_expire_timer, c->mfc_un.unres.expires);
+		if (atomic_read(&net->ipv4.cache_resolve_queue_len) == 1)
+			mod_timer(&ipmr_expire_timer, c->mfc_un.unres.expires);
 	}
 
 	/*
-- 
cgit v1.2.2


From f0ecde1466f21edf577b809735f4f35f354777a0 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Mon, 10 May 2010 04:59:07 -0700
Subject: net: Fix FDDI and TR config checks in ipv4 arp and LLC.

Need to check both CONFIG_FOO and CONFIG_FOO_MODULE

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/arp.c    | 6 +++---
 net/llc/llc_sap.c | 2 +-
 2 files changed, 4 insertions(+), 4 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c
index 6e747065c202..80769f1f9fab 100644
--- a/net/ipv4/arp.c
+++ b/net/ipv4/arp.c
@@ -661,13 +661,13 @@ struct sk_buff *arp_create(int type, int ptype, __be32 dest_ip,
 #endif
 #endif
 
-#ifdef CONFIG_FDDI
+#if defined(CONFIG_FDDI) || defined(CONFIG_FDDI_MODULE)
 	case ARPHRD_FDDI:
 		arp->ar_hrd = htons(ARPHRD_ETHER);
 		arp->ar_pro = htons(ETH_P_IP);
 		break;
 #endif
-#ifdef CONFIG_TR
+#if defined(CONFIG_TR) || defined(CONFIG_TR_MODULE)
 	case ARPHRD_IEEE802_TR:
 		arp->ar_hrd = htons(ARPHRD_IEEE802);
 		arp->ar_pro = htons(ETH_P_IP);
@@ -1051,7 +1051,7 @@ static int arp_req_set(struct net *net, struct arpreq *r,
 			return -EINVAL;
 	}
 	switch (dev->type) {
-#ifdef CONFIG_FDDI
+#if defined(CONFIG_FDDI) || defined(CONFIG_FDDI_MODULE)
 	case ARPHRD_FDDI:
 		/*
 		 * According to RFC 1390, FDDI devices should accept ARP
diff --git a/net/llc/llc_sap.c b/net/llc/llc_sap.c
index a432f0ec051c..94e7fca75b85 100644
--- a/net/llc/llc_sap.c
+++ b/net/llc/llc_sap.c
@@ -31,7 +31,7 @@ static int llc_mac_header_len(unsigned short devtype)
 	case ARPHRD_ETHER:
 	case ARPHRD_LOOPBACK:
 		return sizeof(struct ethhdr);
-#ifdef CONFIG_TR
+#if defined(CONFIG_TR) || defined(CONFIG_TR_MODULE)
 	case ARPHRD_IEEE802_TR:
 		return sizeof(struct trh_hdr);
 #endif
-- 
cgit v1.2.2


From 3b254c54ec46eb022cb26ee6ab37fae23f5f7d6a Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Mon, 10 May 2010 17:45:56 +0200
Subject: netfilter: nf_conntrack_proto: fix warning with CONFIG_PROVE_RCU

===================================================
[ INFO: suspicious rcu_dereference_check() usage. ]
---------------------------------------------------
include/net/netfilter/nf_conntrack_l3proto.h:92 invoked rcu_dereference_check()
without protection!

other info that might help us debug this:

rcu_scheduler_active = 1, debug_locks = 0
2 locks held by iptables/3197:
 #0:  (sk_lock-AF_INET){+.+.+.}, at: [<ffffffff8149bd8c>]
ip_setsockopt+0x7c/0xa0
 #1:  (&xt[i].mutex){+.+.+.}, at: [<ffffffff8148a5fe>]
xt_find_table_lock+0x3e/0x110

stack backtrace:
Pid: 3197, comm: iptables Not tainted 2.6.34-rc4 #2
Call Trace:
 [<ffffffff8105e2e8>] lockdep_rcu_dereference+0xb8/0xc0
 [<ffffffff8147fb3b>] nf_ct_l3proto_module_put+0x6b/0x70
 [<ffffffff8148d891>] state_mt_destroy+0x11/0x20
 [<ffffffff814d3baf>] cleanup_match+0x2f/0x50
 [<ffffffff814d3c63>] cleanup_entry+0x33/0x90
 [<ffffffff814d5653>] ? __do_replace+0x1a3/0x210
 [<ffffffff814d564c>] __do_replace+0x19c/0x210
 [<ffffffff814d651a>] do_ipt_set_ctl+0x16a/0x1b0
 [<ffffffff8147a610>] nf_sockopt+0x60/0xa0
...

The __nf_ct_l3proto_find() call doesn't actually need rcu read side
protection since the caller holds a reference to the protocol. Use
rcu_read_lock() anyways to avoid the warning.

Kernel bugzilla #15781: https://bugzilla.kernel.org/show_bug.cgi?id=15781

Reported-by: Christian Casteyde <casteyde.christian@free.fr>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 net/netfilter/nf_conntrack_proto.c | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/netfilter/nf_conntrack_proto.c b/net/netfilter/nf_conntrack_proto.c
index a6defc793601..5886ba1d52a0 100644
--- a/net/netfilter/nf_conntrack_proto.c
+++ b/net/netfilter/nf_conntrack_proto.c
@@ -117,9 +117,13 @@ void nf_ct_l3proto_module_put(unsigned short l3proto)
 {
 	struct nf_conntrack_l3proto *p;
 
-	/* rcu_read_lock not necessary since the caller holds a reference */
+	/* rcu_read_lock not necessary since the caller holds a reference, but
+	 * taken anyways to avoid lockdep warnings in __nf_ct_l3proto_find()
+	 */
+	rcu_read_lock();
 	p = __nf_ct_l3proto_find(l3proto);
 	module_put(p->me);
+	rcu_read_unlock();
 }
 EXPORT_SYMBOL_GPL(nf_ct_l3proto_module_put);
 
-- 
cgit v1.2.2


From b56f2d55c6c22b0c5774b3b22e336fb6cc5f4094 Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Mon, 10 May 2010 18:47:57 +0200
Subject: netfilter: use rcu_dereference_protected()

Restore the rcu_dereference() calls in conntrack/expectation notifier
and logger registration/unregistration, but use the _protected variant,
which will be required by the upcoming __rcu annotations.

Based on patch by Eric Dumazet <eric.dumazet@gmail.com>

Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 net/netfilter/nf_conntrack_ecache.c | 22 ++++++++++++++++++----
 net/netfilter/nf_log.c              | 10 ++++++++--
 2 files changed, 26 insertions(+), 6 deletions(-)

(limited to 'net')

diff --git a/net/netfilter/nf_conntrack_ecache.c b/net/netfilter/nf_conntrack_ecache.c
index a94ac3ad02cb..cdcc7649476b 100644
--- a/net/netfilter/nf_conntrack_ecache.c
+++ b/net/netfilter/nf_conntrack_ecache.c
@@ -82,9 +82,12 @@ EXPORT_SYMBOL_GPL(nf_ct_deliver_cached_events);
 int nf_conntrack_register_notifier(struct nf_ct_event_notifier *new)
 {
 	int ret = 0;
+	struct nf_ct_event_notifier *notify;
 
 	mutex_lock(&nf_ct_ecache_mutex);
-	if (nf_conntrack_event_cb != NULL) {
+	notify = rcu_dereference_protected(nf_conntrack_event_cb,
+					   lockdep_is_held(&nf_ct_ecache_mutex));
+	if (notify != NULL) {
 		ret = -EBUSY;
 		goto out_unlock;
 	}
@@ -100,8 +103,12 @@ EXPORT_SYMBOL_GPL(nf_conntrack_register_notifier);
 
 void nf_conntrack_unregister_notifier(struct nf_ct_event_notifier *new)
 {
+	struct nf_ct_event_notifier *notify;
+
 	mutex_lock(&nf_ct_ecache_mutex);
-	BUG_ON(nf_conntrack_event_cb != new);
+	notify = rcu_dereference_protected(nf_conntrack_event_cb,
+					   lockdep_is_held(&nf_ct_ecache_mutex));
+	BUG_ON(notify != new);
 	rcu_assign_pointer(nf_conntrack_event_cb, NULL);
 	mutex_unlock(&nf_ct_ecache_mutex);
 }
@@ -110,9 +117,12 @@ EXPORT_SYMBOL_GPL(nf_conntrack_unregister_notifier);
 int nf_ct_expect_register_notifier(struct nf_exp_event_notifier *new)
 {
 	int ret = 0;
+	struct nf_exp_event_notifier *notify;
 
 	mutex_lock(&nf_ct_ecache_mutex);
-	if (nf_expect_event_cb != NULL) {
+	notify = rcu_dereference_protected(nf_expect_event_cb,
+					   lockdep_is_held(&nf_ct_ecache_mutex));
+	if (notify != NULL) {
 		ret = -EBUSY;
 		goto out_unlock;
 	}
@@ -128,8 +138,12 @@ EXPORT_SYMBOL_GPL(nf_ct_expect_register_notifier);
 
 void nf_ct_expect_unregister_notifier(struct nf_exp_event_notifier *new)
 {
+	struct nf_exp_event_notifier *notify;
+
 	mutex_lock(&nf_ct_ecache_mutex);
-	BUG_ON(nf_expect_event_cb != new);
+	notify = rcu_dereference_protected(nf_expect_event_cb,
+					   lockdep_is_held(&nf_ct_ecache_mutex));
+	BUG_ON(notify != new);
 	rcu_assign_pointer(nf_expect_event_cb, NULL);
 	mutex_unlock(&nf_ct_ecache_mutex);
 }
diff --git a/net/netfilter/nf_log.c b/net/netfilter/nf_log.c
index 908f59935fbb..7df37fd786bc 100644
--- a/net/netfilter/nf_log.c
+++ b/net/netfilter/nf_log.c
@@ -35,6 +35,7 @@ static struct nf_logger *__find_logger(int pf, const char *str_logger)
 /* return EEXIST if the same logger is registred, 0 on success. */
 int nf_log_register(u_int8_t pf, struct nf_logger *logger)
 {
+	const struct nf_logger *llog;
 	int i;
 
 	if (pf >= ARRAY_SIZE(nf_loggers))
@@ -51,7 +52,9 @@ int nf_log_register(u_int8_t pf, struct nf_logger *logger)
 	} else {
 		/* register at end of list to honor first register win */
 		list_add_tail(&logger->list[pf], &nf_loggers_l[pf]);
-		if (nf_loggers[pf] == NULL)
+		llog = rcu_dereference_protected(nf_loggers[pf],
+						 lockdep_is_held(&nf_log_mutex));
+		if (llog == NULL)
 			rcu_assign_pointer(nf_loggers[pf], logger);
 	}
 
@@ -63,11 +66,14 @@ EXPORT_SYMBOL(nf_log_register);
 
 void nf_log_unregister(struct nf_logger *logger)
 {
+	const struct nf_logger *c_logger;
 	int i;
 
 	mutex_lock(&nf_log_mutex);
 	for (i = 0; i < ARRAY_SIZE(nf_loggers); i++) {
-		if (nf_loggers[i] == logger)
+		c_logger = rcu_dereference_protected(nf_loggers[i],
+						     lockdep_is_held(&nf_log_mutex));
+		if (c_logger == logger)
 			rcu_assign_pointer(nf_loggers[i], NULL);
 		list_del(&logger->list[i]);
 	}
-- 
cgit v1.2.2


From ed77134bfccf5e75b6cbadab268e559dbe6a4ebb Mon Sep 17 00:00:00 2001
From: Mark Gross <mgross@linux.intel.com>
Date: Thu, 6 May 2010 01:59:26 +0200
Subject: PM QOS update

This patch changes the string based list management to a handle base
implementation to help with the hot path use of pm-qos, it also renames
much of the API to use "request" as opposed to "requirement" that was
used in the initial implementation.  I did this because request more
accurately represents what it actually does.

Also, I added a string based ABI for users wanting to use a string
interface.  So if the user writes 0xDDDDDDDD formatted hex it will be
accepted by the interface.  (someone asked me for it and I don't think
it hurts anything.)

This patch updates some documentation input I got from Randy.

Signed-off-by: markgross <mgross@linux.intel.com>
Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl>
---
 net/mac80211/mlme.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index 4aefa6dc3091..29de1965ff74 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -495,7 +495,7 @@ void ieee80211_recalc_ps(struct ieee80211_local *local, s32 latency)
 		s32 beaconint_us;
 
 		if (latency < 0)
-			latency = pm_qos_requirement(PM_QOS_NETWORK_LATENCY);
+			latency = pm_qos_request(PM_QOS_NETWORK_LATENCY);
 
 		beaconint_us = ieee80211_tu_to_usec(
 					found->vif.bss_conf.beacon_int);
-- 
cgit v1.2.2


From c476efbcde5ba58b81ac752f4a894d6db8e17d94 Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Tue, 11 May 2010 14:40:48 +0200
Subject: ipv6: ip6mr: move unres_queue and timer to per-namespace data

The unres_queue is currently shared between all namespaces. Following patches
will additionally allow to create multiple multicast routing tables in each
namespace. Having a single shared queue for all these users seems to excessive,
move the queue and the cleanup timer to the per-namespace data to unshare it.

As a side-effect, this fixes a bug in the seq file iteration functions: the
first entry returned is always from the current namespace, entries returned
after that may belong to any namespace.

Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 net/ipv6/ip6mr.c | 74 +++++++++++++++++++++++++-------------------------------
 1 file changed, 33 insertions(+), 41 deletions(-)

(limited to 'net')

diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c
index e0b530ca394c..7236030e403e 100644
--- a/net/ipv6/ip6mr.c
+++ b/net/ipv6/ip6mr.c
@@ -63,8 +63,6 @@ static DEFINE_RWLOCK(mrt_lock);
 
 #define MIF_EXISTS(_net, _idx) ((_net)->ipv6.vif6_table[_idx].dev != NULL)
 
-static struct mfc6_cache *mfc_unres_queue;		/* Queue of unresolved entries */
-
 /* Special spinlock for queue of unresolved entries */
 static DEFINE_SPINLOCK(mfc_unres_lock);
 
@@ -84,8 +82,6 @@ static int ip6mr_cache_report(struct net *net, struct sk_buff *pkt,
 static int ip6mr_fill_mroute(struct sk_buff *skb, struct mfc6_cache *c, struct rtmsg *rtm);
 static void mroute_clean_tables(struct net *net);
 
-static struct timer_list ipmr_expire_timer;
-
 
 #ifdef CONFIG_PROC_FS
 
@@ -110,11 +106,10 @@ static struct mfc6_cache *ipmr_mfc_seq_idx(struct net *net,
 				return mfc;
 	read_unlock(&mrt_lock);
 
-	it->cache = &mfc_unres_queue;
+	it->cache = &net->ipv6.mfc6_unres_queue;
 	spin_lock_bh(&mfc_unres_lock);
-	for (mfc = mfc_unres_queue; mfc; mfc = mfc->next)
-		if (net_eq(mfc6_net(mfc), net) &&
-		    pos-- == 0)
+	for (mfc = net->ipv6.mfc6_unres_queue; mfc; mfc = mfc->next)
+		if (pos-- == 0)
 			return mfc;
 	spin_unlock_bh(&mfc_unres_lock);
 
@@ -244,7 +239,7 @@ static void *ipmr_mfc_seq_next(struct seq_file *seq, void *v, loff_t *pos)
 	if (mfc->next)
 		return mfc->next;
 
-	if (it->cache == &mfc_unres_queue)
+	if (it->cache == &net->ipv6.mfc6_unres_queue)
 		goto end_of_list;
 
 	BUG_ON(it->cache != net->ipv6.mfc6_cache_array);
@@ -257,11 +252,11 @@ static void *ipmr_mfc_seq_next(struct seq_file *seq, void *v, loff_t *pos)
 
 	/* exhausted cache_array, show unresolved */
 	read_unlock(&mrt_lock);
-	it->cache = &mfc_unres_queue;
+	it->cache = &net->ipv6.mfc6_unres_queue;
 	it->ct = 0;
 
 	spin_lock_bh(&mfc_unres_lock);
-	mfc = mfc_unres_queue;
+	mfc = net->ipv6.mfc6_unres_queue;
 	if (mfc)
 		return mfc;
 
@@ -277,7 +272,7 @@ static void ipmr_mfc_seq_stop(struct seq_file *seq, void *v)
 	struct ipmr_mfc_iter *it = seq->private;
 	struct net *net = seq_file_net(seq);
 
-	if (it->cache == &mfc_unres_queue)
+	if (it->cache == &net->ipv6.mfc6_unres_queue)
 		spin_unlock_bh(&mfc_unres_lock);
 	else if (it->cache == net->ipv6.mfc6_cache_array)
 		read_unlock(&mrt_lock);
@@ -301,7 +296,7 @@ static int ipmr_mfc_seq_show(struct seq_file *seq, void *v)
 			   &mfc->mf6c_mcastgrp, &mfc->mf6c_origin,
 			   mfc->mf6c_parent);
 
-		if (it->cache != &mfc_unres_queue) {
+		if (it->cache != &net->ipv6.mfc6_unres_queue) {
 			seq_printf(seq, " %8lu %8lu %8lu",
 				   mfc->mfc_un.res.pkt,
 				   mfc->mfc_un.res.bytes,
@@ -559,15 +554,15 @@ static void ip6mr_destroy_unres(struct mfc6_cache *c)
 }
 
 
-/* Single timer process for all the unresolved queue. */
+/* Timer process for all the unresolved queue. */
 
-static void ipmr_do_expire_process(unsigned long dummy)
+static void ipmr_do_expire_process(struct net *net)
 {
 	unsigned long now = jiffies;
 	unsigned long expires = 10 * HZ;
 	struct mfc6_cache *c, **cp;
 
-	cp = &mfc_unres_queue;
+	cp = &net->ipv6.mfc6_unres_queue;
 
 	while ((c = *cp) != NULL) {
 		if (time_after(c->mfc_un.unres.expires, now)) {
@@ -583,19 +578,21 @@ static void ipmr_do_expire_process(unsigned long dummy)
 		ip6mr_destroy_unres(c);
 	}
 
-	if (mfc_unres_queue != NULL)
-		mod_timer(&ipmr_expire_timer, jiffies + expires);
+	if (net->ipv6.mfc6_unres_queue != NULL)
+		mod_timer(&net->ipv6.ipmr_expire_timer, jiffies + expires);
 }
 
-static void ipmr_expire_process(unsigned long dummy)
+static void ipmr_expire_process(unsigned long arg)
 {
+	struct net *net = (struct net *)arg;
+
 	if (!spin_trylock(&mfc_unres_lock)) {
-		mod_timer(&ipmr_expire_timer, jiffies + 1);
+		mod_timer(&net->ipv6.ipmr_expire_timer, jiffies + 1);
 		return;
 	}
 
-	if (mfc_unres_queue != NULL)
-		ipmr_do_expire_process(dummy);
+	if (net->ipv6.mfc6_unres_queue != NULL)
+		ipmr_do_expire_process(net);
 
 	spin_unlock(&mfc_unres_lock);
 }
@@ -880,9 +877,8 @@ ip6mr_cache_unresolved(struct net *net, mifi_t mifi, struct sk_buff *skb)
 	struct mfc6_cache *c;
 
 	spin_lock_bh(&mfc_unres_lock);
-	for (c = mfc_unres_queue; c; c = c->next) {
-		if (net_eq(mfc6_net(c), net) &&
-		    ipv6_addr_equal(&c->mf6c_mcastgrp, &ipv6_hdr(skb)->daddr) &&
+	for (c = net->ipv6.mfc6_unres_queue; c; c = c->next) {
+		if (ipv6_addr_equal(&c->mf6c_mcastgrp, &ipv6_hdr(skb)->daddr) &&
 		    ipv6_addr_equal(&c->mf6c_origin, &ipv6_hdr(skb)->saddr))
 			break;
 	}
@@ -923,10 +919,10 @@ ip6mr_cache_unresolved(struct net *net, mifi_t mifi, struct sk_buff *skb)
 		}
 
 		atomic_inc(&net->ipv6.cache_resolve_queue_len);
-		c->next = mfc_unres_queue;
-		mfc_unres_queue = c;
+		c->next = net->ipv6.mfc6_unres_queue;
+		net->ipv6.mfc6_unres_queue = c;
 
-		ipmr_do_expire_process(1);
+		ipmr_do_expire_process(net);
 	}
 
 	/*
@@ -1019,6 +1015,9 @@ static int __net_init ip6mr_net_init(struct net *net)
 		goto fail_mfc6_cache;
 	}
 
+	setup_timer(&net->ipv6.ipmr_expire_timer, ipmr_expire_process,
+		    (unsigned long)net);
+
 #ifdef CONFIG_IPV6_PIMSM_V2
 	net->ipv6.mroute_reg_vif_num = -1;
 #endif
@@ -1050,6 +1049,7 @@ static void __net_exit ip6mr_net_exit(struct net *net)
 	proc_net_remove(net, "ip6_mr_cache");
 	proc_net_remove(net, "ip6_mr_vif");
 #endif
+	del_timer(&net->ipv6.ipmr_expire_timer);
 	mroute_clean_tables(net);
 	kfree(net->ipv6.mfc6_cache_array);
 	kfree(net->ipv6.vif6_table);
@@ -1075,7 +1075,6 @@ int __init ip6_mr_init(void)
 	if (err)
 		goto reg_pernet_fail;
 
-	setup_timer(&ipmr_expire_timer, ipmr_expire_process, 0);
 	err = register_netdevice_notifier(&ip6_mr_notifier);
 	if (err)
 		goto reg_notif_fail;
@@ -1092,7 +1091,6 @@ add_proto_fail:
 	unregister_netdevice_notifier(&ip6_mr_notifier);
 #endif
 reg_notif_fail:
-	del_timer(&ipmr_expire_timer);
 	unregister_pernet_subsys(&ip6mr_net_ops);
 reg_pernet_fail:
 	kmem_cache_destroy(mrt_cachep);
@@ -1102,7 +1100,6 @@ reg_pernet_fail:
 void ip6_mr_cleanup(void)
 {
 	unregister_netdevice_notifier(&ip6_mr_notifier);
-	del_timer(&ipmr_expire_timer);
 	unregister_pernet_subsys(&ip6mr_net_ops);
 	kmem_cache_destroy(mrt_cachep);
 }
@@ -1167,18 +1164,17 @@ static int ip6mr_mfc_add(struct net *net, struct mf6cctl *mfc, int mrtsock)
 	 *	need to send on the frames and tidy up.
 	 */
 	spin_lock_bh(&mfc_unres_lock);
-	for (cp = &mfc_unres_queue; (uc = *cp) != NULL;
+	for (cp = &net->ipv6.mfc6_unres_queue; (uc = *cp) != NULL;
 	     cp = &uc->next) {
-		if (net_eq(mfc6_net(uc), net) &&
-		    ipv6_addr_equal(&uc->mf6c_origin, &c->mf6c_origin) &&
+		if (ipv6_addr_equal(&uc->mf6c_origin, &c->mf6c_origin) &&
 		    ipv6_addr_equal(&uc->mf6c_mcastgrp, &c->mf6c_mcastgrp)) {
 			*cp = uc->next;
 			atomic_dec(&net->ipv6.cache_resolve_queue_len);
 			break;
 		}
 	}
-	if (mfc_unres_queue == NULL)
-		del_timer(&ipmr_expire_timer);
+	if (net->ipv6.mfc6_unres_queue == NULL)
+		del_timer(&net->ipv6.ipmr_expire_timer);
 	spin_unlock_bh(&mfc_unres_lock);
 
 	if (uc) {
@@ -1230,12 +1226,8 @@ static void mroute_clean_tables(struct net *net)
 		struct mfc6_cache *c, **cp;
 
 		spin_lock_bh(&mfc_unres_lock);
-		cp = &mfc_unres_queue;
+		cp = &net->ipv6.mfc6_unres_queue;
 		while ((c = *cp) != NULL) {
-			if (!net_eq(mfc6_net(c), net)) {
-				cp = &c->next;
-				continue;
-			}
 			*cp = c->next;
 			ip6mr_destroy_unres(c);
 		}
-- 
cgit v1.2.2


From b5aa30b19121de49021fba57aa1f6e4c787fcf67 Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Tue, 11 May 2010 14:40:50 +0200
Subject: ipv6: ip6mr: remove net pointer from struct mfc6_cache

Now that cache entries in unres_queue don't need to be distinguished by their
network namespace pointer anymore, we can remove it from struct mfc6_cache
add pass the namespace as function argument to the functions that need it.

Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 net/ipv6/ip6mr.c | 63 ++++++++++++++++++++++++++++----------------------------
 1 file changed, 31 insertions(+), 32 deletions(-)

(limited to 'net')

diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c
index 7236030e403e..b3783a436bbd 100644
--- a/net/ipv6/ip6mr.c
+++ b/net/ipv6/ip6mr.c
@@ -76,10 +76,12 @@ static DEFINE_SPINLOCK(mfc_unres_lock);
 
 static struct kmem_cache *mrt_cachep __read_mostly;
 
-static int ip6_mr_forward(struct sk_buff *skb, struct mfc6_cache *cache);
+static int ip6_mr_forward(struct net *net, struct sk_buff *skb,
+			  struct mfc6_cache *cache);
 static int ip6mr_cache_report(struct net *net, struct sk_buff *pkt,
 			      mifi_t mifi, int assert);
-static int ip6mr_fill_mroute(struct sk_buff *skb, struct mfc6_cache *c, struct rtmsg *rtm);
+static int ip6mr_fill_mroute(struct net *net, struct sk_buff *skb,
+			     struct mfc6_cache *c, struct rtmsg *rtm);
 static void mroute_clean_tables(struct net *net);
 
 
@@ -523,7 +525,6 @@ static int mif6_delete(struct net *net, int vifi, struct list_head *head)
 
 static inline void ip6mr_cache_free(struct mfc6_cache *c)
 {
-	release_net(mfc6_net(c));
 	kmem_cache_free(mrt_cachep, c);
 }
 
@@ -531,10 +532,9 @@ static inline void ip6mr_cache_free(struct mfc6_cache *c)
    and reporting error to netlink readers.
  */
 
-static void ip6mr_destroy_unres(struct mfc6_cache *c)
+static void ip6mr_destroy_unres(struct net *net, struct mfc6_cache *c)
 {
 	struct sk_buff *skb;
-	struct net *net = mfc6_net(c);
 
 	atomic_dec(&net->ipv6.cache_resolve_queue_len);
 
@@ -575,7 +575,7 @@ static void ipmr_do_expire_process(struct net *net)
 		}
 
 		*cp = c->next;
-		ip6mr_destroy_unres(c);
+		ip6mr_destroy_unres(net, c);
 	}
 
 	if (net->ipv6.mfc6_unres_queue != NULL)
@@ -599,10 +599,10 @@ static void ipmr_expire_process(unsigned long arg)
 
 /* Fill oifs list. It is called under write locked mrt_lock. */
 
-static void ip6mr_update_thresholds(struct mfc6_cache *cache, unsigned char *ttls)
+static void ip6mr_update_thresholds(struct net *net, struct mfc6_cache *cache,
+				    unsigned char *ttls)
 {
 	int vifi;
-	struct net *net = mfc6_net(cache);
 
 	cache->mfc_un.res.minvif = MAXMIFS;
 	cache->mfc_un.res.maxvif = 0;
@@ -717,24 +717,22 @@ static struct mfc6_cache *ip6mr_cache_find(struct net *net,
 /*
  *	Allocate a multicast cache entry
  */
-static struct mfc6_cache *ip6mr_cache_alloc(struct net *net)
+static struct mfc6_cache *ip6mr_cache_alloc(void)
 {
 	struct mfc6_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_KERNEL);
 	if (c == NULL)
 		return NULL;
 	c->mfc_un.res.minvif = MAXMIFS;
-	mfc6_net_set(c, net);
 	return c;
 }
 
-static struct mfc6_cache *ip6mr_cache_alloc_unres(struct net *net)
+static struct mfc6_cache *ip6mr_cache_alloc_unres(void)
 {
 	struct mfc6_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_ATOMIC);
 	if (c == NULL)
 		return NULL;
 	skb_queue_head_init(&c->mfc_un.unres.unresolved);
 	c->mfc_un.unres.expires = jiffies + 10 * HZ;
-	mfc6_net_set(c, net);
 	return c;
 }
 
@@ -742,7 +740,8 @@ static struct mfc6_cache *ip6mr_cache_alloc_unres(struct net *net)
  *	A cache entry has gone into a resolved state from queued
  */
 
-static void ip6mr_cache_resolve(struct mfc6_cache *uc, struct mfc6_cache *c)
+static void ip6mr_cache_resolve(struct net *net, struct mfc6_cache *uc,
+				struct mfc6_cache *c)
 {
 	struct sk_buff *skb;
 
@@ -755,7 +754,7 @@ static void ip6mr_cache_resolve(struct mfc6_cache *uc, struct mfc6_cache *c)
 			int err;
 			struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct ipv6hdr));
 
-			if (ip6mr_fill_mroute(skb, c, NLMSG_DATA(nlh)) > 0) {
+			if (ip6mr_fill_mroute(net, skb, c, NLMSG_DATA(nlh)) > 0) {
 				nlh->nlmsg_len = skb_tail_pointer(skb) - (u8 *)nlh;
 			} else {
 				nlh->nlmsg_type = NLMSG_ERROR;
@@ -763,9 +762,9 @@ static void ip6mr_cache_resolve(struct mfc6_cache *uc, struct mfc6_cache *c)
 				skb_trim(skb, nlh->nlmsg_len);
 				((struct nlmsgerr *)NLMSG_DATA(nlh))->error = -EMSGSIZE;
 			}
-			err = rtnl_unicast(skb, mfc6_net(uc), NETLINK_CB(skb).pid);
+			err = rtnl_unicast(skb, net, NETLINK_CB(skb).pid);
 		} else
-			ip6_mr_forward(skb, c);
+			ip6_mr_forward(net, skb, c);
 	}
 }
 
@@ -889,7 +888,7 @@ ip6mr_cache_unresolved(struct net *net, mifi_t mifi, struct sk_buff *skb)
 		 */
 
 		if (atomic_read(&net->ipv6.cache_resolve_queue_len) >= 10 ||
-		    (c = ip6mr_cache_alloc_unres(net)) == NULL) {
+		    (c = ip6mr_cache_alloc_unres()) == NULL) {
 			spin_unlock_bh(&mfc_unres_lock);
 
 			kfree_skb(skb);
@@ -1133,7 +1132,7 @@ static int ip6mr_mfc_add(struct net *net, struct mf6cctl *mfc, int mrtsock)
 	if (c != NULL) {
 		write_lock_bh(&mrt_lock);
 		c->mf6c_parent = mfc->mf6cc_parent;
-		ip6mr_update_thresholds(c, ttls);
+		ip6mr_update_thresholds(net, c, ttls);
 		if (!mrtsock)
 			c->mfc_flags |= MFC_STATIC;
 		write_unlock_bh(&mrt_lock);
@@ -1143,14 +1142,14 @@ static int ip6mr_mfc_add(struct net *net, struct mf6cctl *mfc, int mrtsock)
 	if (!ipv6_addr_is_multicast(&mfc->mf6cc_mcastgrp.sin6_addr))
 		return -EINVAL;
 
-	c = ip6mr_cache_alloc(net);
+	c = ip6mr_cache_alloc();
 	if (c == NULL)
 		return -ENOMEM;
 
 	c->mf6c_origin = mfc->mf6cc_origin.sin6_addr;
 	c->mf6c_mcastgrp = mfc->mf6cc_mcastgrp.sin6_addr;
 	c->mf6c_parent = mfc->mf6cc_parent;
-	ip6mr_update_thresholds(c, ttls);
+	ip6mr_update_thresholds(net, c, ttls);
 	if (!mrtsock)
 		c->mfc_flags |= MFC_STATIC;
 
@@ -1178,7 +1177,7 @@ static int ip6mr_mfc_add(struct net *net, struct mf6cctl *mfc, int mrtsock)
 	spin_unlock_bh(&mfc_unres_lock);
 
 	if (uc) {
-		ip6mr_cache_resolve(uc, c);
+		ip6mr_cache_resolve(net, uc, c);
 		ip6mr_cache_free(uc);
 	}
 	return 0;
@@ -1229,7 +1228,7 @@ static void mroute_clean_tables(struct net *net)
 		cp = &net->ipv6.mfc6_unres_queue;
 		while ((c = *cp) != NULL) {
 			*cp = c->next;
-			ip6mr_destroy_unres(c);
+			ip6mr_destroy_unres(net, c);
 		}
 		spin_unlock_bh(&mfc_unres_lock);
 	}
@@ -1497,10 +1496,10 @@ static inline int ip6mr_forward2_finish(struct sk_buff *skb)
  *	Processing handlers for ip6mr_forward
  */
 
-static int ip6mr_forward2(struct sk_buff *skb, struct mfc6_cache *c, int vifi)
+static int ip6mr_forward2(struct net *net, struct sk_buff *skb,
+			  struct mfc6_cache *c, int vifi)
 {
 	struct ipv6hdr *ipv6h;
-	struct net *net = mfc6_net(c);
 	struct mif_device *vif = &net->ipv6.vif6_table[vifi];
 	struct net_device *dev;
 	struct dst_entry *dst;
@@ -1581,11 +1580,11 @@ static int ip6mr_find_vif(struct net_device *dev)
 	return ct;
 }
 
-static int ip6_mr_forward(struct sk_buff *skb, struct mfc6_cache *cache)
+static int ip6_mr_forward(struct net *net, struct sk_buff *skb,
+			  struct mfc6_cache *cache)
 {
 	int psend = -1;
 	int vif, ct;
-	struct net *net = mfc6_net(cache);
 
 	vif = cache->mf6c_parent;
 	cache->mfc_un.res.pkt++;
@@ -1627,13 +1626,13 @@ static int ip6_mr_forward(struct sk_buff *skb, struct mfc6_cache *cache)
 			if (psend != -1) {
 				struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
 				if (skb2)
-					ip6mr_forward2(skb2, cache, psend);
+					ip6mr_forward2(net, skb2, cache, psend);
 			}
 			psend = ct;
 		}
 	}
 	if (psend != -1) {
-		ip6mr_forward2(skb, cache, psend);
+		ip6mr_forward2(net, skb, cache, psend);
 		return 0;
 	}
 
@@ -1674,7 +1673,7 @@ int ip6_mr_input(struct sk_buff *skb)
 		return -ENODEV;
 	}
 
-	ip6_mr_forward(skb, cache);
+	ip6_mr_forward(net, skb, cache);
 
 	read_unlock(&mrt_lock);
 
@@ -1683,11 +1682,11 @@ int ip6_mr_input(struct sk_buff *skb)
 
 
 static int
-ip6mr_fill_mroute(struct sk_buff *skb, struct mfc6_cache *c, struct rtmsg *rtm)
+ip6mr_fill_mroute(struct net *net, struct sk_buff *skb, struct mfc6_cache *c,
+		  struct rtmsg *rtm)
 {
 	int ct;
 	struct rtnexthop *nhp;
-	struct net *net = mfc6_net(c);
 	u8 *b = skb_tail_pointer(skb);
 	struct rtattr *mp_head;
 
@@ -1781,7 +1780,7 @@ int ip6mr_get_route(struct net *net,
 	if (!nowait && (rtm->rtm_flags&RTM_F_NOTIFY))
 		cache->mfc_flags |= MFC_NOTIFY;
 
-	err = ip6mr_fill_mroute(skb, cache, rtm);
+	err = ip6mr_fill_mroute(net, skb, cache, rtm);
 	read_unlock(&mrt_lock);
 	return err;
 }
-- 
cgit v1.2.2


From f30a77842129b5656360cc1f5db48a3fcfb64528 Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Tue, 11 May 2010 14:40:51 +0200
Subject: ipv6: ip6mr: convert struct mfc_cache to struct list_head

Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 net/ipv6/ip6mr.c | 127 +++++++++++++++++++++++++++----------------------------
 1 file changed, 62 insertions(+), 65 deletions(-)

(limited to 'net')

diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c
index b3783a436bbd..08e09042ad1c 100644
--- a/net/ipv6/ip6mr.c
+++ b/net/ipv6/ip6mr.c
@@ -89,7 +89,7 @@ static void mroute_clean_tables(struct net *net);
 
 struct ipmr_mfc_iter {
 	struct seq_net_private p;
-	struct mfc6_cache **cache;
+	struct list_head *cache;
 	int ct;
 };
 
@@ -99,18 +99,18 @@ static struct mfc6_cache *ipmr_mfc_seq_idx(struct net *net,
 {
 	struct mfc6_cache *mfc;
 
-	it->cache = net->ipv6.mfc6_cache_array;
 	read_lock(&mrt_lock);
-	for (it->ct = 0; it->ct < MFC6_LINES; it->ct++)
-		for (mfc = net->ipv6.mfc6_cache_array[it->ct];
-		     mfc; mfc = mfc->next)
+	for (it->ct = 0; it->ct < MFC6_LINES; it->ct++) {
+		it->cache = &net->ipv6.mfc6_cache_array[it->ct];
+		list_for_each_entry(mfc, it->cache, list)
 			if (pos-- == 0)
 				return mfc;
+	}
 	read_unlock(&mrt_lock);
 
-	it->cache = &net->ipv6.mfc6_unres_queue;
 	spin_lock_bh(&mfc_unres_lock);
-	for (mfc = net->ipv6.mfc6_unres_queue; mfc; mfc = mfc->next)
+	it->cache = &net->ipv6.mfc6_unres_queue;
+	list_for_each_entry(mfc, it->cache, list)
 		if (pos-- == 0)
 			return mfc;
 	spin_unlock_bh(&mfc_unres_lock);
@@ -119,9 +119,6 @@ static struct mfc6_cache *ipmr_mfc_seq_idx(struct net *net,
 	return NULL;
 }
 
-
-
-
 /*
  *	The /proc interfaces to multicast routing /proc/ip6_mr_cache /proc/ip6_mr_vif
  */
@@ -238,18 +235,19 @@ static void *ipmr_mfc_seq_next(struct seq_file *seq, void *v, loff_t *pos)
 	if (v == SEQ_START_TOKEN)
 		return ipmr_mfc_seq_idx(net, seq->private, 0);
 
-	if (mfc->next)
-		return mfc->next;
+	if (mfc->list.next != it->cache)
+		return list_entry(mfc->list.next, struct mfc6_cache, list);
 
 	if (it->cache == &net->ipv6.mfc6_unres_queue)
 		goto end_of_list;
 
-	BUG_ON(it->cache != net->ipv6.mfc6_cache_array);
+	BUG_ON(it->cache != &net->ipv6.mfc6_cache_array[it->ct]);
 
 	while (++it->ct < MFC6_LINES) {
-		mfc = net->ipv6.mfc6_cache_array[it->ct];
-		if (mfc)
-			return mfc;
+		it->cache = &net->ipv6.mfc6_cache_array[it->ct];
+		if (list_empty(it->cache))
+			continue;
+		return list_first_entry(it->cache, struct mfc6_cache, list);
 	}
 
 	/* exhausted cache_array, show unresolved */
@@ -258,9 +256,8 @@ static void *ipmr_mfc_seq_next(struct seq_file *seq, void *v, loff_t *pos)
 	it->ct = 0;
 
 	spin_lock_bh(&mfc_unres_lock);
-	mfc = net->ipv6.mfc6_unres_queue;
-	if (mfc)
-		return mfc;
+	if (!list_empty(it->cache))
+		return list_first_entry(it->cache, struct mfc6_cache, list);
 
  end_of_list:
 	spin_unlock_bh(&mfc_unres_lock);
@@ -560,25 +557,22 @@ static void ipmr_do_expire_process(struct net *net)
 {
 	unsigned long now = jiffies;
 	unsigned long expires = 10 * HZ;
-	struct mfc6_cache *c, **cp;
-
-	cp = &net->ipv6.mfc6_unres_queue;
+	struct mfc6_cache *c, *next;
 
-	while ((c = *cp) != NULL) {
+	list_for_each_entry_safe(c, next, &net->ipv6.mfc6_unres_queue, list) {
 		if (time_after(c->mfc_un.unres.expires, now)) {
 			/* not yet... */
 			unsigned long interval = c->mfc_un.unres.expires - now;
 			if (interval < expires)
 				expires = interval;
-			cp = &c->next;
 			continue;
 		}
 
-		*cp = c->next;
+		list_del(&c->list);
 		ip6mr_destroy_unres(net, c);
 	}
 
-	if (net->ipv6.mfc6_unres_queue != NULL)
+	if (!list_empty(&net->ipv6.mfc6_unres_queue))
 		mod_timer(&net->ipv6.ipmr_expire_timer, jiffies + expires);
 }
 
@@ -591,7 +585,7 @@ static void ipmr_expire_process(unsigned long arg)
 		return;
 	}
 
-	if (net->ipv6.mfc6_unres_queue != NULL)
+	if (!list_empty(&net->ipv6.mfc6_unres_queue))
 		ipmr_do_expire_process(net);
 
 	spin_unlock(&mfc_unres_lock);
@@ -706,12 +700,12 @@ static struct mfc6_cache *ip6mr_cache_find(struct net *net,
 	int line = MFC6_HASH(mcastgrp, origin);
 	struct mfc6_cache *c;
 
-	for (c = net->ipv6.mfc6_cache_array[line]; c; c = c->next) {
+	list_for_each_entry(c, &net->ipv6.mfc6_cache_array[line], list) {
 		if (ipv6_addr_equal(&c->mf6c_origin, origin) &&
 		    ipv6_addr_equal(&c->mf6c_mcastgrp, mcastgrp))
-			break;
+			return c;
 	}
-	return c;
+	return NULL;
 }
 
 /*
@@ -872,17 +866,20 @@ static int ip6mr_cache_report(struct net *net, struct sk_buff *pkt, mifi_t mifi,
 static int
 ip6mr_cache_unresolved(struct net *net, mifi_t mifi, struct sk_buff *skb)
 {
+	bool found = false;
 	int err;
 	struct mfc6_cache *c;
 
 	spin_lock_bh(&mfc_unres_lock);
-	for (c = net->ipv6.mfc6_unres_queue; c; c = c->next) {
+	list_for_each_entry(c, &net->ipv6.mfc6_unres_queue, list) {
 		if (ipv6_addr_equal(&c->mf6c_mcastgrp, &ipv6_hdr(skb)->daddr) &&
-		    ipv6_addr_equal(&c->mf6c_origin, &ipv6_hdr(skb)->saddr))
+		    ipv6_addr_equal(&c->mf6c_origin, &ipv6_hdr(skb)->saddr)) {
+			found = true;
 			break;
+		}
 	}
 
-	if (c == NULL) {
+	if (!found) {
 		/*
 		 *	Create a new entry if allowable
 		 */
@@ -918,8 +915,7 @@ ip6mr_cache_unresolved(struct net *net, mifi_t mifi, struct sk_buff *skb)
 		}
 
 		atomic_inc(&net->ipv6.cache_resolve_queue_len);
-		c->next = net->ipv6.mfc6_unres_queue;
-		net->ipv6.mfc6_unres_queue = c;
+		list_add(&c->list, &net->ipv6.mfc6_unres_queue);
 
 		ipmr_do_expire_process(net);
 	}
@@ -946,16 +942,15 @@ ip6mr_cache_unresolved(struct net *net, mifi_t mifi, struct sk_buff *skb)
 static int ip6mr_mfc_delete(struct net *net, struct mf6cctl *mfc)
 {
 	int line;
-	struct mfc6_cache *c, **cp;
+	struct mfc6_cache *c, *next;
 
 	line = MFC6_HASH(&mfc->mf6cc_mcastgrp.sin6_addr, &mfc->mf6cc_origin.sin6_addr);
 
-	for (cp = &net->ipv6.mfc6_cache_array[line];
-	     (c = *cp) != NULL; cp = &c->next) {
+	list_for_each_entry_safe(c, next, &net->ipv6.mfc6_cache_array[line], list) {
 		if (ipv6_addr_equal(&c->mf6c_origin, &mfc->mf6cc_origin.sin6_addr) &&
 		    ipv6_addr_equal(&c->mf6c_mcastgrp, &mfc->mf6cc_mcastgrp.sin6_addr)) {
 			write_lock_bh(&mrt_lock);
-			*cp = c->next;
+			list_del(&c->list);
 			write_unlock_bh(&mrt_lock);
 
 			ip6mr_cache_free(c);
@@ -997,7 +992,9 @@ static struct notifier_block ip6_mr_notifier = {
 
 static int __net_init ip6mr_net_init(struct net *net)
 {
+	unsigned int i;
 	int err = 0;
+
 	net->ipv6.vif6_table = kcalloc(MAXMIFS, sizeof(struct mif_device),
 				       GFP_KERNEL);
 	if (!net->ipv6.vif6_table) {
@@ -1007,13 +1004,18 @@ static int __net_init ip6mr_net_init(struct net *net)
 
 	/* Forwarding cache */
 	net->ipv6.mfc6_cache_array = kcalloc(MFC6_LINES,
-					     sizeof(struct mfc6_cache *),
+					     sizeof(struct list_head),
 					     GFP_KERNEL);
 	if (!net->ipv6.mfc6_cache_array) {
 		err = -ENOMEM;
 		goto fail_mfc6_cache;
 	}
 
+	for (i = 0; i < MFC6_LINES; i++)
+		INIT_LIST_HEAD(&net->ipv6.mfc6_cache_array[i]);
+
+	INIT_LIST_HEAD(&net->ipv6.mfc6_unres_queue);
+
 	setup_timer(&net->ipv6.ipmr_expire_timer, ipmr_expire_process,
 		    (unsigned long)net);
 
@@ -1105,8 +1107,9 @@ void ip6_mr_cleanup(void)
 
 static int ip6mr_mfc_add(struct net *net, struct mf6cctl *mfc, int mrtsock)
 {
+	bool found = false;
 	int line;
-	struct mfc6_cache *uc, *c, **cp;
+	struct mfc6_cache *uc, *c;
 	unsigned char ttls[MAXMIFS];
 	int i;
 
@@ -1122,14 +1125,15 @@ static int ip6mr_mfc_add(struct net *net, struct mf6cctl *mfc, int mrtsock)
 
 	line = MFC6_HASH(&mfc->mf6cc_mcastgrp.sin6_addr, &mfc->mf6cc_origin.sin6_addr);
 
-	for (cp = &net->ipv6.mfc6_cache_array[line];
-	     (c = *cp) != NULL; cp = &c->next) {
+	list_for_each_entry(c, &net->ipv6.mfc6_cache_array[line], list) {
 		if (ipv6_addr_equal(&c->mf6c_origin, &mfc->mf6cc_origin.sin6_addr) &&
-		    ipv6_addr_equal(&c->mf6c_mcastgrp, &mfc->mf6cc_mcastgrp.sin6_addr))
+		    ipv6_addr_equal(&c->mf6c_mcastgrp, &mfc->mf6cc_mcastgrp.sin6_addr)) {
+			found = true;
 			break;
+		}
 	}
 
-	if (c != NULL) {
+	if (found) {
 		write_lock_bh(&mrt_lock);
 		c->mf6c_parent = mfc->mf6cc_parent;
 		ip6mr_update_thresholds(net, c, ttls);
@@ -1154,29 +1158,29 @@ static int ip6mr_mfc_add(struct net *net, struct mf6cctl *mfc, int mrtsock)
 		c->mfc_flags |= MFC_STATIC;
 
 	write_lock_bh(&mrt_lock);
-	c->next = net->ipv6.mfc6_cache_array[line];
-	net->ipv6.mfc6_cache_array[line] = c;
+	list_add(&c->list, &net->ipv6.mfc6_cache_array[line]);
 	write_unlock_bh(&mrt_lock);
 
 	/*
 	 *	Check to see if we resolved a queued list. If so we
 	 *	need to send on the frames and tidy up.
 	 */
+	found = false;
 	spin_lock_bh(&mfc_unres_lock);
-	for (cp = &net->ipv6.mfc6_unres_queue; (uc = *cp) != NULL;
-	     cp = &uc->next) {
+	list_for_each_entry(uc, &net->ipv6.mfc6_unres_queue, list) {
 		if (ipv6_addr_equal(&uc->mf6c_origin, &c->mf6c_origin) &&
 		    ipv6_addr_equal(&uc->mf6c_mcastgrp, &c->mf6c_mcastgrp)) {
-			*cp = uc->next;
+			list_del(&uc->list);
 			atomic_dec(&net->ipv6.cache_resolve_queue_len);
+			found = true;
 			break;
 		}
 	}
-	if (net->ipv6.mfc6_unres_queue == NULL)
+	if (list_empty(&net->ipv6.mfc6_unres_queue))
 		del_timer(&net->ipv6.ipmr_expire_timer);
 	spin_unlock_bh(&mfc_unres_lock);
 
-	if (uc) {
+	if (found) {
 		ip6mr_cache_resolve(net, uc, c);
 		ip6mr_cache_free(uc);
 	}
@@ -1191,6 +1195,7 @@ static void mroute_clean_tables(struct net *net)
 {
 	int i;
 	LIST_HEAD(list);
+	struct mfc6_cache *c, *next;
 
 	/*
 	 *	Shut down all active vif entries
@@ -1205,16 +1210,11 @@ static void mroute_clean_tables(struct net *net)
 	 *	Wipe the cache
 	 */
 	for (i = 0; i < MFC6_LINES; i++) {
-		struct mfc6_cache *c, **cp;
-
-		cp = &net->ipv6.mfc6_cache_array[i];
-		while ((c = *cp) != NULL) {
-			if (c->mfc_flags & MFC_STATIC) {
-				cp = &c->next;
+		list_for_each_entry_safe(c, next, &net->ipv6.mfc6_cache_array[i], list) {
+			if (c->mfc_flags & MFC_STATIC)
 				continue;
-			}
 			write_lock_bh(&mrt_lock);
-			*cp = c->next;
+			list_del(&c->list);
 			write_unlock_bh(&mrt_lock);
 
 			ip6mr_cache_free(c);
@@ -1222,12 +1222,9 @@ static void mroute_clean_tables(struct net *net)
 	}
 
 	if (atomic_read(&net->ipv6.cache_resolve_queue_len) != 0) {
-		struct mfc6_cache *c, **cp;
-
 		spin_lock_bh(&mfc_unres_lock);
-		cp = &net->ipv6.mfc6_unres_queue;
-		while ((c = *cp) != NULL) {
-			*cp = c->next;
+		list_for_each_entry_safe(c, next, &net->ipv6.mfc6_unres_queue, list) {
+			list_del(&c->list);
 			ip6mr_destroy_unres(net, c);
 		}
 		spin_unlock_bh(&mfc_unres_lock);
-- 
cgit v1.2.2


From 6bd521433942d85e80f7a731a88cc91a327f38e0 Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Tue, 11 May 2010 14:40:53 +0200
Subject: ipv6: ip6mr: move mroute data into seperate structure

Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 net/ipv6/ip6mr.c | 390 ++++++++++++++++++++++++++++++-------------------------
 1 file changed, 214 insertions(+), 176 deletions(-)

(limited to 'net')

diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c
index 08e09042ad1c..9419fceeed41 100644
--- a/net/ipv6/ip6mr.c
+++ b/net/ipv6/ip6mr.c
@@ -51,6 +51,24 @@
 #include <linux/netfilter_ipv6.h>
 #include <net/ip6_checksum.h>
 
+struct mr6_table {
+#ifdef CONFIG_NET_NS
+	struct net		*net;
+#endif
+	struct sock		*mroute6_sk;
+	struct timer_list	ipmr_expire_timer;
+	struct list_head	mfc6_unres_queue;
+	struct list_head	mfc6_cache_array[MFC6_LINES];
+	struct mif_device	vif6_table[MAXMIFS];
+	int			maxvif;
+	atomic_t		cache_resolve_queue_len;
+	int			mroute_do_assert;
+	int			mroute_do_pim;
+#ifdef CONFIG_IPV6_PIMSM_V2
+	int			mroute_reg_vif_num;
+#endif
+};
+
 /* Big lock, protecting vif table, mrt cache and mroute socket state.
    Note that the changes are semaphored via rtnl_lock.
  */
@@ -61,7 +79,7 @@ static DEFINE_RWLOCK(mrt_lock);
  *	Multicast router control variables
  */
 
-#define MIF_EXISTS(_net, _idx) ((_net)->ipv6.vif6_table[_idx].dev != NULL)
+#define MIF_EXISTS(_mrt, _idx) ((_mrt)->vif6_table[_idx].dev != NULL)
 
 /* Special spinlock for queue of unresolved entries */
 static DEFINE_SPINLOCK(mfc_unres_lock);
@@ -76,13 +94,13 @@ static DEFINE_SPINLOCK(mfc_unres_lock);
 
 static struct kmem_cache *mrt_cachep __read_mostly;
 
-static int ip6_mr_forward(struct net *net, struct sk_buff *skb,
-			  struct mfc6_cache *cache);
-static int ip6mr_cache_report(struct net *net, struct sk_buff *pkt,
+static int ip6_mr_forward(struct net *net, struct mr6_table *mrt,
+			  struct sk_buff *skb, struct mfc6_cache *cache);
+static int ip6mr_cache_report(struct mr6_table *mrt, struct sk_buff *pkt,
 			      mifi_t mifi, int assert);
-static int ip6mr_fill_mroute(struct net *net, struct sk_buff *skb,
+static int ip6mr_fill_mroute(struct mr6_table *mrt, struct sk_buff *skb,
 			     struct mfc6_cache *c, struct rtmsg *rtm);
-static void mroute_clean_tables(struct net *net);
+static void mroute_clean_tables(struct mr6_table *mrt);
 
 
 #ifdef CONFIG_PROC_FS
@@ -97,11 +115,12 @@ struct ipmr_mfc_iter {
 static struct mfc6_cache *ipmr_mfc_seq_idx(struct net *net,
 					   struct ipmr_mfc_iter *it, loff_t pos)
 {
+	struct mr6_table *mrt = net->ipv6.mrt6;
 	struct mfc6_cache *mfc;
 
 	read_lock(&mrt_lock);
 	for (it->ct = 0; it->ct < MFC6_LINES; it->ct++) {
-		it->cache = &net->ipv6.mfc6_cache_array[it->ct];
+		it->cache = &mrt->mfc6_cache_array[it->ct];
 		list_for_each_entry(mfc, it->cache, list)
 			if (pos-- == 0)
 				return mfc;
@@ -109,7 +128,7 @@ static struct mfc6_cache *ipmr_mfc_seq_idx(struct net *net,
 	read_unlock(&mrt_lock);
 
 	spin_lock_bh(&mfc_unres_lock);
-	it->cache = &net->ipv6.mfc6_unres_queue;
+	it->cache = &mrt->mfc6_unres_queue;
 	list_for_each_entry(mfc, it->cache, list)
 		if (pos-- == 0)
 			return mfc;
@@ -132,11 +151,13 @@ static struct mif_device *ip6mr_vif_seq_idx(struct net *net,
 					    struct ipmr_vif_iter *iter,
 					    loff_t pos)
 {
-	for (iter->ct = 0; iter->ct < net->ipv6.maxvif; ++iter->ct) {
-		if (!MIF_EXISTS(net, iter->ct))
+	struct mr6_table *mrt = net->ipv6.mrt6;
+
+	for (iter->ct = 0; iter->ct < mrt->maxvif; ++iter->ct) {
+		if (!MIF_EXISTS(mrt, iter->ct))
 			continue;
 		if (pos-- == 0)
-			return &net->ipv6.vif6_table[iter->ct];
+			return &mrt->vif6_table[iter->ct];
 	}
 	return NULL;
 }
@@ -155,15 +176,16 @@ static void *ip6mr_vif_seq_next(struct seq_file *seq, void *v, loff_t *pos)
 {
 	struct ipmr_vif_iter *iter = seq->private;
 	struct net *net = seq_file_net(seq);
+	struct mr6_table *mrt = net->ipv6.mrt6;
 
 	++*pos;
 	if (v == SEQ_START_TOKEN)
 		return ip6mr_vif_seq_idx(net, iter, 0);
 
-	while (++iter->ct < net->ipv6.maxvif) {
-		if (!MIF_EXISTS(net, iter->ct))
+	while (++iter->ct < mrt->maxvif) {
+		if (!MIF_EXISTS(mrt, iter->ct))
 			continue;
-		return &net->ipv6.vif6_table[iter->ct];
+		return &mrt->vif6_table[iter->ct];
 	}
 	return NULL;
 }
@@ -177,6 +199,7 @@ static void ip6mr_vif_seq_stop(struct seq_file *seq, void *v)
 static int ip6mr_vif_seq_show(struct seq_file *seq, void *v)
 {
 	struct net *net = seq_file_net(seq);
+	struct mr6_table *mrt = net->ipv6.mrt6;
 
 	if (v == SEQ_START_TOKEN) {
 		seq_puts(seq,
@@ -187,7 +210,7 @@ static int ip6mr_vif_seq_show(struct seq_file *seq, void *v)
 
 		seq_printf(seq,
 			   "%2td %-10s %8ld %7ld  %8ld %7ld %05X\n",
-			   vif - net->ipv6.vif6_table,
+			   vif - mrt->vif6_table,
 			   name, vif->bytes_in, vif->pkt_in,
 			   vif->bytes_out, vif->pkt_out,
 			   vif->flags);
@@ -229,6 +252,7 @@ static void *ipmr_mfc_seq_next(struct seq_file *seq, void *v, loff_t *pos)
 	struct mfc6_cache *mfc = v;
 	struct ipmr_mfc_iter *it = seq->private;
 	struct net *net = seq_file_net(seq);
+	struct mr6_table *mrt = net->ipv6.mrt6;
 
 	++*pos;
 
@@ -238,13 +262,13 @@ static void *ipmr_mfc_seq_next(struct seq_file *seq, void *v, loff_t *pos)
 	if (mfc->list.next != it->cache)
 		return list_entry(mfc->list.next, struct mfc6_cache, list);
 
-	if (it->cache == &net->ipv6.mfc6_unres_queue)
+	if (it->cache == &mrt->mfc6_unres_queue)
 		goto end_of_list;
 
-	BUG_ON(it->cache != &net->ipv6.mfc6_cache_array[it->ct]);
+	BUG_ON(it->cache != &mrt->mfc6_cache_array[it->ct]);
 
 	while (++it->ct < MFC6_LINES) {
-		it->cache = &net->ipv6.mfc6_cache_array[it->ct];
+		it->cache = &mrt->mfc6_cache_array[it->ct];
 		if (list_empty(it->cache))
 			continue;
 		return list_first_entry(it->cache, struct mfc6_cache, list);
@@ -252,7 +276,7 @@ static void *ipmr_mfc_seq_next(struct seq_file *seq, void *v, loff_t *pos)
 
 	/* exhausted cache_array, show unresolved */
 	read_unlock(&mrt_lock);
-	it->cache = &net->ipv6.mfc6_unres_queue;
+	it->cache = &mrt->mfc6_unres_queue;
 	it->ct = 0;
 
 	spin_lock_bh(&mfc_unres_lock);
@@ -270,10 +294,11 @@ static void ipmr_mfc_seq_stop(struct seq_file *seq, void *v)
 {
 	struct ipmr_mfc_iter *it = seq->private;
 	struct net *net = seq_file_net(seq);
+	struct mr6_table *mrt = net->ipv6.mrt6;
 
-	if (it->cache == &net->ipv6.mfc6_unres_queue)
+	if (it->cache == &mrt->mfc6_unres_queue)
 		spin_unlock_bh(&mfc_unres_lock);
-	else if (it->cache == net->ipv6.mfc6_cache_array)
+	else if (it->cache == mrt->mfc6_cache_array)
 		read_unlock(&mrt_lock);
 }
 
@@ -281,6 +306,7 @@ static int ipmr_mfc_seq_show(struct seq_file *seq, void *v)
 {
 	int n;
 	struct net *net = seq_file_net(seq);
+	struct mr6_table *mrt = net->ipv6.mrt6;
 
 	if (v == SEQ_START_TOKEN) {
 		seq_puts(seq,
@@ -295,14 +321,14 @@ static int ipmr_mfc_seq_show(struct seq_file *seq, void *v)
 			   &mfc->mf6c_mcastgrp, &mfc->mf6c_origin,
 			   mfc->mf6c_parent);
 
-		if (it->cache != &net->ipv6.mfc6_unres_queue) {
+		if (it->cache != &mrt->mfc6_unres_queue) {
 			seq_printf(seq, " %8lu %8lu %8lu",
 				   mfc->mfc_un.res.pkt,
 				   mfc->mfc_un.res.bytes,
 				   mfc->mfc_un.res.wrong_if);
 			for (n = mfc->mfc_un.res.minvif;
 			     n < mfc->mfc_un.res.maxvif; n++) {
-				if (MIF_EXISTS(net, n) &&
+				if (MIF_EXISTS(mrt, n) &&
 				    mfc->mfc_un.res.ttls[n] < 255)
 					seq_printf(seq,
 						   " %2d:%-3d",
@@ -349,7 +375,8 @@ static int pim6_rcv(struct sk_buff *skb)
 	struct ipv6hdr   *encap;
 	struct net_device  *reg_dev = NULL;
 	struct net *net = dev_net(skb->dev);
-	int reg_vif_num = net->ipv6.mroute_reg_vif_num;
+	struct mr6_table *mrt = net->ipv6.mrt6;
+	int reg_vif_num = mrt->mroute_reg_vif_num;
 
 	if (!pskb_may_pull(skb, sizeof(*pim) + sizeof(*encap)))
 		goto drop;
@@ -374,7 +401,7 @@ static int pim6_rcv(struct sk_buff *skb)
 
 	read_lock(&mrt_lock);
 	if (reg_vif_num >= 0)
-		reg_dev = net->ipv6.vif6_table[reg_vif_num].dev;
+		reg_dev = mrt->vif6_table[reg_vif_num].dev;
 	if (reg_dev)
 		dev_hold(reg_dev);
 	read_unlock(&mrt_lock);
@@ -411,12 +438,12 @@ static netdev_tx_t reg_vif_xmit(struct sk_buff *skb,
 				      struct net_device *dev)
 {
 	struct net *net = dev_net(dev);
+	struct mr6_table *mrt = net->ipv6.mrt6;
 
 	read_lock(&mrt_lock);
 	dev->stats.tx_bytes += skb->len;
 	dev->stats.tx_packets++;
-	ip6mr_cache_report(net, skb, net->ipv6.mroute_reg_vif_num,
-			   MRT6MSG_WHOLEPKT);
+	ip6mr_cache_report(mrt, skb, mrt->mroute_reg_vif_num, MRT6MSG_WHOLEPKT);
 	read_unlock(&mrt_lock);
 	kfree_skb(skb);
 	return NETDEV_TX_OK;
@@ -472,15 +499,16 @@ failure:
  *	Delete a VIF entry
  */
 
-static int mif6_delete(struct net *net, int vifi, struct list_head *head)
+static int mif6_delete(struct mr6_table *mrt, int vifi, struct list_head *head)
 {
 	struct mif_device *v;
 	struct net_device *dev;
 	struct inet6_dev *in6_dev;
-	if (vifi < 0 || vifi >= net->ipv6.maxvif)
+
+	if (vifi < 0 || vifi >= mrt->maxvif)
 		return -EADDRNOTAVAIL;
 
-	v = &net->ipv6.vif6_table[vifi];
+	v = &mrt->vif6_table[vifi];
 
 	write_lock_bh(&mrt_lock);
 	dev = v->dev;
@@ -492,17 +520,17 @@ static int mif6_delete(struct net *net, int vifi, struct list_head *head)
 	}
 
 #ifdef CONFIG_IPV6_PIMSM_V2
-	if (vifi == net->ipv6.mroute_reg_vif_num)
-		net->ipv6.mroute_reg_vif_num = -1;
+	if (vifi == mrt->mroute_reg_vif_num)
+		mrt->mroute_reg_vif_num = -1;
 #endif
 
-	if (vifi + 1 == net->ipv6.maxvif) {
+	if (vifi + 1 == mrt->maxvif) {
 		int tmp;
 		for (tmp = vifi - 1; tmp >= 0; tmp--) {
-			if (MIF_EXISTS(net, tmp))
+			if (MIF_EXISTS(mrt, tmp))
 				break;
 		}
-		net->ipv6.maxvif = tmp + 1;
+		mrt->maxvif = tmp + 1;
 	}
 
 	write_unlock_bh(&mrt_lock);
@@ -529,11 +557,12 @@ static inline void ip6mr_cache_free(struct mfc6_cache *c)
    and reporting error to netlink readers.
  */
 
-static void ip6mr_destroy_unres(struct net *net, struct mfc6_cache *c)
+static void ip6mr_destroy_unres(struct mr6_table *mrt, struct mfc6_cache *c)
 {
+	struct net *net = read_pnet(&mrt->net);
 	struct sk_buff *skb;
 
-	atomic_dec(&net->ipv6.cache_resolve_queue_len);
+	atomic_dec(&mrt->cache_resolve_queue_len);
 
 	while((skb = skb_dequeue(&c->mfc_un.unres.unresolved)) != NULL) {
 		if (ipv6_hdr(skb)->version == 0) {
@@ -553,13 +582,13 @@ static void ip6mr_destroy_unres(struct net *net, struct mfc6_cache *c)
 
 /* Timer process for all the unresolved queue. */
 
-static void ipmr_do_expire_process(struct net *net)
+static void ipmr_do_expire_process(struct mr6_table *mrt)
 {
 	unsigned long now = jiffies;
 	unsigned long expires = 10 * HZ;
 	struct mfc6_cache *c, *next;
 
-	list_for_each_entry_safe(c, next, &net->ipv6.mfc6_unres_queue, list) {
+	list_for_each_entry_safe(c, next, &mrt->mfc6_unres_queue, list) {
 		if (time_after(c->mfc_un.unres.expires, now)) {
 			/* not yet... */
 			unsigned long interval = c->mfc_un.unres.expires - now;
@@ -569,31 +598,31 @@ static void ipmr_do_expire_process(struct net *net)
 		}
 
 		list_del(&c->list);
-		ip6mr_destroy_unres(net, c);
+		ip6mr_destroy_unres(mrt, c);
 	}
 
-	if (!list_empty(&net->ipv6.mfc6_unres_queue))
-		mod_timer(&net->ipv6.ipmr_expire_timer, jiffies + expires);
+	if (!list_empty(&mrt->mfc6_unres_queue))
+		mod_timer(&mrt->ipmr_expire_timer, jiffies + expires);
 }
 
 static void ipmr_expire_process(unsigned long arg)
 {
-	struct net *net = (struct net *)arg;
+	struct mr6_table *mrt = (struct mr6_table *)arg;
 
 	if (!spin_trylock(&mfc_unres_lock)) {
-		mod_timer(&net->ipv6.ipmr_expire_timer, jiffies + 1);
+		mod_timer(&mrt->ipmr_expire_timer, jiffies + 1);
 		return;
 	}
 
-	if (!list_empty(&net->ipv6.mfc6_unres_queue))
-		ipmr_do_expire_process(net);
+	if (!list_empty(&mrt->mfc6_unres_queue))
+		ipmr_do_expire_process(mrt);
 
 	spin_unlock(&mfc_unres_lock);
 }
 
 /* Fill oifs list. It is called under write locked mrt_lock. */
 
-static void ip6mr_update_thresholds(struct net *net, struct mfc6_cache *cache,
+static void ip6mr_update_thresholds(struct mr6_table *mrt, struct mfc6_cache *cache,
 				    unsigned char *ttls)
 {
 	int vifi;
@@ -602,8 +631,8 @@ static void ip6mr_update_thresholds(struct net *net, struct mfc6_cache *cache,
 	cache->mfc_un.res.maxvif = 0;
 	memset(cache->mfc_un.res.ttls, 255, MAXMIFS);
 
-	for (vifi = 0; vifi < net->ipv6.maxvif; vifi++) {
-		if (MIF_EXISTS(net, vifi) &&
+	for (vifi = 0; vifi < mrt->maxvif; vifi++) {
+		if (MIF_EXISTS(mrt, vifi) &&
 		    ttls[vifi] && ttls[vifi] < 255) {
 			cache->mfc_un.res.ttls[vifi] = ttls[vifi];
 			if (cache->mfc_un.res.minvif > vifi)
@@ -614,16 +643,17 @@ static void ip6mr_update_thresholds(struct net *net, struct mfc6_cache *cache,
 	}
 }
 
-static int mif6_add(struct net *net, struct mif6ctl *vifc, int mrtsock)
+static int mif6_add(struct net *net, struct mr6_table *mrt,
+		    struct mif6ctl *vifc, int mrtsock)
 {
 	int vifi = vifc->mif6c_mifi;
-	struct mif_device *v = &net->ipv6.vif6_table[vifi];
+	struct mif_device *v = &mrt->vif6_table[vifi];
 	struct net_device *dev;
 	struct inet6_dev *in6_dev;
 	int err;
 
 	/* Is vif busy ? */
-	if (MIF_EXISTS(net, vifi))
+	if (MIF_EXISTS(mrt, vifi))
 		return -EADDRINUSE;
 
 	switch (vifc->mif6c_flags) {
@@ -633,7 +663,7 @@ static int mif6_add(struct net *net, struct mif6ctl *vifc, int mrtsock)
 		 * Special Purpose VIF in PIM
 		 * All the packets will be sent to the daemon
 		 */
-		if (net->ipv6.mroute_reg_vif_num >= 0)
+		if (mrt->mroute_reg_vif_num >= 0)
 			return -EADDRINUSE;
 		dev = ip6mr_reg_vif(net);
 		if (!dev)
@@ -685,22 +715,22 @@ static int mif6_add(struct net *net, struct mif6ctl *vifc, int mrtsock)
 	v->dev = dev;
 #ifdef CONFIG_IPV6_PIMSM_V2
 	if (v->flags & MIFF_REGISTER)
-		net->ipv6.mroute_reg_vif_num = vifi;
+		mrt->mroute_reg_vif_num = vifi;
 #endif
-	if (vifi + 1 > net->ipv6.maxvif)
-		net->ipv6.maxvif = vifi + 1;
+	if (vifi + 1 > mrt->maxvif)
+		mrt->maxvif = vifi + 1;
 	write_unlock_bh(&mrt_lock);
 	return 0;
 }
 
-static struct mfc6_cache *ip6mr_cache_find(struct net *net,
+static struct mfc6_cache *ip6mr_cache_find(struct mr6_table *mrt,
 					   struct in6_addr *origin,
 					   struct in6_addr *mcastgrp)
 {
 	int line = MFC6_HASH(mcastgrp, origin);
 	struct mfc6_cache *c;
 
-	list_for_each_entry(c, &net->ipv6.mfc6_cache_array[line], list) {
+	list_for_each_entry(c, &mrt->mfc6_cache_array[line], list) {
 		if (ipv6_addr_equal(&c->mf6c_origin, origin) &&
 		    ipv6_addr_equal(&c->mf6c_mcastgrp, mcastgrp))
 			return c;
@@ -734,8 +764,8 @@ static struct mfc6_cache *ip6mr_cache_alloc_unres(void)
  *	A cache entry has gone into a resolved state from queued
  */
 
-static void ip6mr_cache_resolve(struct net *net, struct mfc6_cache *uc,
-				struct mfc6_cache *c)
+static void ip6mr_cache_resolve(struct net *net, struct mr6_table *mrt,
+				struct mfc6_cache *uc, struct mfc6_cache *c)
 {
 	struct sk_buff *skb;
 
@@ -748,7 +778,7 @@ static void ip6mr_cache_resolve(struct net *net, struct mfc6_cache *uc,
 			int err;
 			struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct ipv6hdr));
 
-			if (ip6mr_fill_mroute(net, skb, c, NLMSG_DATA(nlh)) > 0) {
+			if (ip6mr_fill_mroute(mrt, skb, c, NLMSG_DATA(nlh)) > 0) {
 				nlh->nlmsg_len = skb_tail_pointer(skb) - (u8 *)nlh;
 			} else {
 				nlh->nlmsg_type = NLMSG_ERROR;
@@ -758,7 +788,7 @@ static void ip6mr_cache_resolve(struct net *net, struct mfc6_cache *uc,
 			}
 			err = rtnl_unicast(skb, net, NETLINK_CB(skb).pid);
 		} else
-			ip6_mr_forward(net, skb, c);
+			ip6_mr_forward(net, mrt, skb, c);
 	}
 }
 
@@ -769,8 +799,8 @@ static void ip6mr_cache_resolve(struct net *net, struct mfc6_cache *uc,
  *	Called under mrt_lock.
  */
 
-static int ip6mr_cache_report(struct net *net, struct sk_buff *pkt, mifi_t mifi,
-			      int assert)
+static int ip6mr_cache_report(struct mr6_table *mrt, struct sk_buff *pkt,
+			      mifi_t mifi, int assert)
 {
 	struct sk_buff *skb;
 	struct mrt6msg *msg;
@@ -806,7 +836,7 @@ static int ip6mr_cache_report(struct net *net, struct sk_buff *pkt, mifi_t mifi,
 		msg = (struct mrt6msg *)skb_transport_header(skb);
 		msg->im6_mbz = 0;
 		msg->im6_msgtype = MRT6MSG_WHOLEPKT;
-		msg->im6_mif = net->ipv6.mroute_reg_vif_num;
+		msg->im6_mif = mrt->mroute_reg_vif_num;
 		msg->im6_pad = 0;
 		ipv6_addr_copy(&msg->im6_src, &ipv6_hdr(pkt)->saddr);
 		ipv6_addr_copy(&msg->im6_dst, &ipv6_hdr(pkt)->daddr);
@@ -841,7 +871,7 @@ static int ip6mr_cache_report(struct net *net, struct sk_buff *pkt, mifi_t mifi,
 	skb->ip_summed = CHECKSUM_UNNECESSARY;
 	}
 
-	if (net->ipv6.mroute6_sk == NULL) {
+	if (mrt->mroute6_sk == NULL) {
 		kfree_skb(skb);
 		return -EINVAL;
 	}
@@ -849,7 +879,7 @@ static int ip6mr_cache_report(struct net *net, struct sk_buff *pkt, mifi_t mifi,
 	/*
 	 *	Deliver to user space multicast routing algorithms
 	 */
-	ret = sock_queue_rcv_skb(net->ipv6.mroute6_sk, skb);
+	ret = sock_queue_rcv_skb(mrt->mroute6_sk, skb);
 	if (ret < 0) {
 		if (net_ratelimit())
 			printk(KERN_WARNING "mroute6: pending queue full, dropping entries.\n");
@@ -864,14 +894,14 @@ static int ip6mr_cache_report(struct net *net, struct sk_buff *pkt, mifi_t mifi,
  */
 
 static int
-ip6mr_cache_unresolved(struct net *net, mifi_t mifi, struct sk_buff *skb)
+ip6mr_cache_unresolved(struct mr6_table *mrt, mifi_t mifi, struct sk_buff *skb)
 {
 	bool found = false;
 	int err;
 	struct mfc6_cache *c;
 
 	spin_lock_bh(&mfc_unres_lock);
-	list_for_each_entry(c, &net->ipv6.mfc6_unres_queue, list) {
+	list_for_each_entry(c, &mrt->mfc6_unres_queue, list) {
 		if (ipv6_addr_equal(&c->mf6c_mcastgrp, &ipv6_hdr(skb)->daddr) &&
 		    ipv6_addr_equal(&c->mf6c_origin, &ipv6_hdr(skb)->saddr)) {
 			found = true;
@@ -884,7 +914,7 @@ ip6mr_cache_unresolved(struct net *net, mifi_t mifi, struct sk_buff *skb)
 		 *	Create a new entry if allowable
 		 */
 
-		if (atomic_read(&net->ipv6.cache_resolve_queue_len) >= 10 ||
+		if (atomic_read(&mrt->cache_resolve_queue_len) >= 10 ||
 		    (c = ip6mr_cache_alloc_unres()) == NULL) {
 			spin_unlock_bh(&mfc_unres_lock);
 
@@ -902,7 +932,7 @@ ip6mr_cache_unresolved(struct net *net, mifi_t mifi, struct sk_buff *skb)
 		/*
 		 *	Reflect first query at pim6sd
 		 */
-		err = ip6mr_cache_report(net, skb, mifi, MRT6MSG_NOCACHE);
+		err = ip6mr_cache_report(mrt, skb, mifi, MRT6MSG_NOCACHE);
 		if (err < 0) {
 			/* If the report failed throw the cache entry
 			   out - Brad Parker
@@ -914,10 +944,10 @@ ip6mr_cache_unresolved(struct net *net, mifi_t mifi, struct sk_buff *skb)
 			return err;
 		}
 
-		atomic_inc(&net->ipv6.cache_resolve_queue_len);
-		list_add(&c->list, &net->ipv6.mfc6_unres_queue);
+		atomic_inc(&mrt->cache_resolve_queue_len);
+		list_add(&c->list, &mrt->mfc6_unres_queue);
 
-		ipmr_do_expire_process(net);
+		ipmr_do_expire_process(mrt);
 	}
 
 	/*
@@ -939,14 +969,14 @@ ip6mr_cache_unresolved(struct net *net, mifi_t mifi, struct sk_buff *skb)
  *	MFC6 cache manipulation by user space
  */
 
-static int ip6mr_mfc_delete(struct net *net, struct mf6cctl *mfc)
+static int ip6mr_mfc_delete(struct mr6_table *mrt, struct mf6cctl *mfc)
 {
 	int line;
 	struct mfc6_cache *c, *next;
 
 	line = MFC6_HASH(&mfc->mf6cc_mcastgrp.sin6_addr, &mfc->mf6cc_origin.sin6_addr);
 
-	list_for_each_entry_safe(c, next, &net->ipv6.mfc6_cache_array[line], list) {
+	list_for_each_entry_safe(c, next, &mrt->mfc6_cache_array[line], list) {
 		if (ipv6_addr_equal(&c->mf6c_origin, &mfc->mf6cc_origin.sin6_addr) &&
 		    ipv6_addr_equal(&c->mf6c_mcastgrp, &mfc->mf6cc_mcastgrp.sin6_addr)) {
 			write_lock_bh(&mrt_lock);
@@ -965,6 +995,7 @@ static int ip6mr_device_event(struct notifier_block *this,
 {
 	struct net_device *dev = ptr;
 	struct net *net = dev_net(dev);
+	struct mr6_table *mrt = net->ipv6.mrt6;
 	struct mif_device *v;
 	int ct;
 	LIST_HEAD(list);
@@ -972,10 +1003,10 @@ static int ip6mr_device_event(struct notifier_block *this,
 	if (event != NETDEV_UNREGISTER)
 		return NOTIFY_DONE;
 
-	v = &net->ipv6.vif6_table[0];
-	for (ct = 0; ct < net->ipv6.maxvif; ct++, v++) {
+	v = &mrt->vif6_table[0];
+	for (ct = 0; ct < mrt->maxvif; ct++, v++) {
 		if (v->dev == dev)
-			mif6_delete(net, ct, &list);
+			mif6_delete(mrt, ct, &list);
 	}
 	unregister_netdevice_many(&list);
 
@@ -992,35 +1023,28 @@ static struct notifier_block ip6_mr_notifier = {
 
 static int __net_init ip6mr_net_init(struct net *net)
 {
+	struct mr6_table *mrt;
 	unsigned int i;
 	int err = 0;
 
-	net->ipv6.vif6_table = kcalloc(MAXMIFS, sizeof(struct mif_device),
-				       GFP_KERNEL);
-	if (!net->ipv6.vif6_table) {
+	mrt = kzalloc(sizeof(*mrt), GFP_KERNEL);
+	if (mrt == NULL) {
 		err = -ENOMEM;
 		goto fail;
 	}
 
-	/* Forwarding cache */
-	net->ipv6.mfc6_cache_array = kcalloc(MFC6_LINES,
-					     sizeof(struct list_head),
-					     GFP_KERNEL);
-	if (!net->ipv6.mfc6_cache_array) {
-		err = -ENOMEM;
-		goto fail_mfc6_cache;
-	}
+	write_pnet(&mrt->net, net);
 
 	for (i = 0; i < MFC6_LINES; i++)
-		INIT_LIST_HEAD(&net->ipv6.mfc6_cache_array[i]);
+		INIT_LIST_HEAD(&mrt->mfc6_cache_array[i]);
 
-	INIT_LIST_HEAD(&net->ipv6.mfc6_unres_queue);
+	INIT_LIST_HEAD(&mrt->mfc6_unres_queue);
 
-	setup_timer(&net->ipv6.ipmr_expire_timer, ipmr_expire_process,
-		    (unsigned long)net);
+	setup_timer(&mrt->ipmr_expire_timer, ipmr_expire_process,
+		    (unsigned long)mrt);
 
 #ifdef CONFIG_IPV6_PIMSM_V2
-	net->ipv6.mroute_reg_vif_num = -1;
+	mrt->mroute_reg_vif_num = -1;
 #endif
 
 #ifdef CONFIG_PROC_FS
@@ -1030,30 +1054,31 @@ static int __net_init ip6mr_net_init(struct net *net)
 	if (!proc_net_fops_create(net, "ip6_mr_cache", 0, &ip6mr_mfc_fops))
 		goto proc_cache_fail;
 #endif
+
+	net->ipv6.mrt6 = mrt;
 	return 0;
 
 #ifdef CONFIG_PROC_FS
 proc_cache_fail:
 	proc_net_remove(net, "ip6_mr_vif");
 proc_vif_fail:
-	kfree(net->ipv6.mfc6_cache_array);
+	kfree(mrt);
 #endif
-fail_mfc6_cache:
-	kfree(net->ipv6.vif6_table);
 fail:
 	return err;
 }
 
 static void __net_exit ip6mr_net_exit(struct net *net)
 {
+	struct mr6_table *mrt = net->ipv6.mrt6;
+
 #ifdef CONFIG_PROC_FS
 	proc_net_remove(net, "ip6_mr_cache");
 	proc_net_remove(net, "ip6_mr_vif");
 #endif
-	del_timer(&net->ipv6.ipmr_expire_timer);
-	mroute_clean_tables(net);
-	kfree(net->ipv6.mfc6_cache_array);
-	kfree(net->ipv6.vif6_table);
+	del_timer(&mrt->ipmr_expire_timer);
+	mroute_clean_tables(mrt);
+	kfree(mrt);
 }
 
 static struct pernet_operations ip6mr_net_ops = {
@@ -1105,7 +1130,8 @@ void ip6_mr_cleanup(void)
 	kmem_cache_destroy(mrt_cachep);
 }
 
-static int ip6mr_mfc_add(struct net *net, struct mf6cctl *mfc, int mrtsock)
+static int ip6mr_mfc_add(struct net *net, struct mr6_table *mrt,
+			 struct mf6cctl *mfc, int mrtsock)
 {
 	bool found = false;
 	int line;
@@ -1125,7 +1151,7 @@ static int ip6mr_mfc_add(struct net *net, struct mf6cctl *mfc, int mrtsock)
 
 	line = MFC6_HASH(&mfc->mf6cc_mcastgrp.sin6_addr, &mfc->mf6cc_origin.sin6_addr);
 
-	list_for_each_entry(c, &net->ipv6.mfc6_cache_array[line], list) {
+	list_for_each_entry(c, &mrt->mfc6_cache_array[line], list) {
 		if (ipv6_addr_equal(&c->mf6c_origin, &mfc->mf6cc_origin.sin6_addr) &&
 		    ipv6_addr_equal(&c->mf6c_mcastgrp, &mfc->mf6cc_mcastgrp.sin6_addr)) {
 			found = true;
@@ -1136,7 +1162,7 @@ static int ip6mr_mfc_add(struct net *net, struct mf6cctl *mfc, int mrtsock)
 	if (found) {
 		write_lock_bh(&mrt_lock);
 		c->mf6c_parent = mfc->mf6cc_parent;
-		ip6mr_update_thresholds(net, c, ttls);
+		ip6mr_update_thresholds(mrt, c, ttls);
 		if (!mrtsock)
 			c->mfc_flags |= MFC_STATIC;
 		write_unlock_bh(&mrt_lock);
@@ -1153,12 +1179,12 @@ static int ip6mr_mfc_add(struct net *net, struct mf6cctl *mfc, int mrtsock)
 	c->mf6c_origin = mfc->mf6cc_origin.sin6_addr;
 	c->mf6c_mcastgrp = mfc->mf6cc_mcastgrp.sin6_addr;
 	c->mf6c_parent = mfc->mf6cc_parent;
-	ip6mr_update_thresholds(net, c, ttls);
+	ip6mr_update_thresholds(mrt, c, ttls);
 	if (!mrtsock)
 		c->mfc_flags |= MFC_STATIC;
 
 	write_lock_bh(&mrt_lock);
-	list_add(&c->list, &net->ipv6.mfc6_cache_array[line]);
+	list_add(&c->list, &mrt->mfc6_cache_array[line]);
 	write_unlock_bh(&mrt_lock);
 
 	/*
@@ -1167,21 +1193,21 @@ static int ip6mr_mfc_add(struct net *net, struct mf6cctl *mfc, int mrtsock)
 	 */
 	found = false;
 	spin_lock_bh(&mfc_unres_lock);
-	list_for_each_entry(uc, &net->ipv6.mfc6_unres_queue, list) {
+	list_for_each_entry(uc, &mrt->mfc6_unres_queue, list) {
 		if (ipv6_addr_equal(&uc->mf6c_origin, &c->mf6c_origin) &&
 		    ipv6_addr_equal(&uc->mf6c_mcastgrp, &c->mf6c_mcastgrp)) {
 			list_del(&uc->list);
-			atomic_dec(&net->ipv6.cache_resolve_queue_len);
+			atomic_dec(&mrt->cache_resolve_queue_len);
 			found = true;
 			break;
 		}
 	}
-	if (list_empty(&net->ipv6.mfc6_unres_queue))
-		del_timer(&net->ipv6.ipmr_expire_timer);
+	if (list_empty(&mrt->mfc6_unres_queue))
+		del_timer(&mrt->ipmr_expire_timer);
 	spin_unlock_bh(&mfc_unres_lock);
 
 	if (found) {
-		ip6mr_cache_resolve(net, uc, c);
+		ip6mr_cache_resolve(net, mrt, uc, c);
 		ip6mr_cache_free(uc);
 	}
 	return 0;
@@ -1191,7 +1217,7 @@ static int ip6mr_mfc_add(struct net *net, struct mf6cctl *mfc, int mrtsock)
  *	Close the multicast socket, and clear the vif tables etc
  */
 
-static void mroute_clean_tables(struct net *net)
+static void mroute_clean_tables(struct mr6_table *mrt)
 {
 	int i;
 	LIST_HEAD(list);
@@ -1200,9 +1226,9 @@ static void mroute_clean_tables(struct net *net)
 	/*
 	 *	Shut down all active vif entries
 	 */
-	for (i = 0; i < net->ipv6.maxvif; i++) {
-		if (!(net->ipv6.vif6_table[i].flags & VIFF_STATIC))
-			mif6_delete(net, i, &list);
+	for (i = 0; i < mrt->maxvif; i++) {
+		if (!(mrt->vif6_table[i].flags & VIFF_STATIC))
+			mif6_delete(mrt, i, &list);
 	}
 	unregister_netdevice_many(&list);
 
@@ -1210,7 +1236,7 @@ static void mroute_clean_tables(struct net *net)
 	 *	Wipe the cache
 	 */
 	for (i = 0; i < MFC6_LINES; i++) {
-		list_for_each_entry_safe(c, next, &net->ipv6.mfc6_cache_array[i], list) {
+		list_for_each_entry_safe(c, next, &mrt->mfc6_cache_array[i], list) {
 			if (c->mfc_flags & MFC_STATIC)
 				continue;
 			write_lock_bh(&mrt_lock);
@@ -1221,25 +1247,25 @@ static void mroute_clean_tables(struct net *net)
 		}
 	}
 
-	if (atomic_read(&net->ipv6.cache_resolve_queue_len) != 0) {
+	if (atomic_read(&mrt->cache_resolve_queue_len) != 0) {
 		spin_lock_bh(&mfc_unres_lock);
-		list_for_each_entry_safe(c, next, &net->ipv6.mfc6_unres_queue, list) {
+		list_for_each_entry_safe(c, next, &mrt->mfc6_unres_queue, list) {
 			list_del(&c->list);
-			ip6mr_destroy_unres(net, c);
+			ip6mr_destroy_unres(mrt, c);
 		}
 		spin_unlock_bh(&mfc_unres_lock);
 	}
 }
 
-static int ip6mr_sk_init(struct sock *sk)
+static int ip6mr_sk_init(struct mr6_table *mrt, struct sock *sk)
 {
 	int err = 0;
 	struct net *net = sock_net(sk);
 
 	rtnl_lock();
 	write_lock_bh(&mrt_lock);
-	if (likely(net->ipv6.mroute6_sk == NULL)) {
-		net->ipv6.mroute6_sk = sk;
+	if (likely(mrt->mroute6_sk == NULL)) {
+		mrt->mroute6_sk = sk;
 		net->ipv6.devconf_all->mc_forwarding++;
 	}
 	else
@@ -1255,15 +1281,16 @@ int ip6mr_sk_done(struct sock *sk)
 {
 	int err = 0;
 	struct net *net = sock_net(sk);
+	struct mr6_table *mrt = net->ipv6.mrt6;
 
 	rtnl_lock();
-	if (sk == net->ipv6.mroute6_sk) {
+	if (sk == mrt->mroute6_sk) {
 		write_lock_bh(&mrt_lock);
-		net->ipv6.mroute6_sk = NULL;
+		mrt->mroute6_sk = NULL;
 		net->ipv6.devconf_all->mc_forwarding--;
 		write_unlock_bh(&mrt_lock);
 
-		mroute_clean_tables(net);
+		mroute_clean_tables(mrt);
 	} else
 		err = -EACCES;
 	rtnl_unlock();
@@ -1271,6 +1298,13 @@ int ip6mr_sk_done(struct sock *sk)
 	return err;
 }
 
+struct sock *mroute6_socket(struct net *net)
+{
+	struct mr6_table *mrt = net->ipv6.mrt6;
+
+	return mrt->mroute6_sk;
+}
+
 /*
  *	Socket options and virtual interface manipulation. The whole
  *	virtual interface system is a complete heap, but unfortunately
@@ -1285,9 +1319,10 @@ int ip6_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, uns
 	struct mf6cctl mfc;
 	mifi_t mifi;
 	struct net *net = sock_net(sk);
+	struct mr6_table *mrt = net->ipv6.mrt6;
 
 	if (optname != MRT6_INIT) {
-		if (sk != net->ipv6.mroute6_sk && !capable(CAP_NET_ADMIN))
+		if (sk != mrt->mroute6_sk && !capable(CAP_NET_ADMIN))
 			return -EACCES;
 	}
 
@@ -1299,7 +1334,7 @@ int ip6_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, uns
 		if (optlen < sizeof(int))
 			return -EINVAL;
 
-		return ip6mr_sk_init(sk);
+		return ip6mr_sk_init(mrt, sk);
 
 	case MRT6_DONE:
 		return ip6mr_sk_done(sk);
@@ -1312,7 +1347,7 @@ int ip6_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, uns
 		if (vif.mif6c_mifi >= MAXMIFS)
 			return -ENFILE;
 		rtnl_lock();
-		ret = mif6_add(net, &vif, sk == net->ipv6.mroute6_sk);
+		ret = mif6_add(net, mrt, &vif, sk == mrt->mroute6_sk);
 		rtnl_unlock();
 		return ret;
 
@@ -1322,7 +1357,7 @@ int ip6_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, uns
 		if (copy_from_user(&mifi, optval, sizeof(mifi_t)))
 			return -EFAULT;
 		rtnl_lock();
-		ret = mif6_delete(net, mifi, NULL);
+		ret = mif6_delete(mrt, mifi, NULL);
 		rtnl_unlock();
 		return ret;
 
@@ -1338,10 +1373,9 @@ int ip6_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, uns
 			return -EFAULT;
 		rtnl_lock();
 		if (optname == MRT6_DEL_MFC)
-			ret = ip6mr_mfc_delete(net, &mfc);
+			ret = ip6mr_mfc_delete(mrt, &mfc);
 		else
-			ret = ip6mr_mfc_add(net, &mfc,
-					    sk == net->ipv6.mroute6_sk);
+			ret = ip6mr_mfc_add(net, mrt, &mfc, sk == mrt->mroute6_sk);
 		rtnl_unlock();
 		return ret;
 
@@ -1353,7 +1387,7 @@ int ip6_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, uns
 		int v;
 		if (get_user(v, (int __user *)optval))
 			return -EFAULT;
-		net->ipv6.mroute_do_assert = !!v;
+		mrt->mroute_do_assert = !!v;
 		return 0;
 	}
 
@@ -1366,9 +1400,9 @@ int ip6_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, uns
 		v = !!v;
 		rtnl_lock();
 		ret = 0;
-		if (v != net->ipv6.mroute_do_pim) {
-			net->ipv6.mroute_do_pim = v;
-			net->ipv6.mroute_do_assert = v;
+		if (v != mrt->mroute_do_pim) {
+			mrt->mroute_do_pim = v;
+			mrt->mroute_do_assert = v;
 		}
 		rtnl_unlock();
 		return ret;
@@ -1394,6 +1428,7 @@ int ip6_mroute_getsockopt(struct sock *sk, int optname, char __user *optval,
 	int olr;
 	int val;
 	struct net *net = sock_net(sk);
+	struct mr6_table *mrt = net->ipv6.mrt6;
 
 	switch (optname) {
 	case MRT6_VERSION:
@@ -1401,11 +1436,11 @@ int ip6_mroute_getsockopt(struct sock *sk, int optname, char __user *optval,
 		break;
 #ifdef CONFIG_IPV6_PIMSM_V2
 	case MRT6_PIM:
-		val = net->ipv6.mroute_do_pim;
+		val = mrt->mroute_do_pim;
 		break;
 #endif
 	case MRT6_ASSERT:
-		val = net->ipv6.mroute_do_assert;
+		val = mrt->mroute_do_assert;
 		break;
 	default:
 		return -ENOPROTOOPT;
@@ -1436,16 +1471,17 @@ int ip6mr_ioctl(struct sock *sk, int cmd, void __user *arg)
 	struct mif_device *vif;
 	struct mfc6_cache *c;
 	struct net *net = sock_net(sk);
+	struct mr6_table *mrt = net->ipv6.mrt6;
 
 	switch (cmd) {
 	case SIOCGETMIFCNT_IN6:
 		if (copy_from_user(&vr, arg, sizeof(vr)))
 			return -EFAULT;
-		if (vr.mifi >= net->ipv6.maxvif)
+		if (vr.mifi >= mrt->maxvif)
 			return -EINVAL;
 		read_lock(&mrt_lock);
-		vif = &net->ipv6.vif6_table[vr.mifi];
-		if (MIF_EXISTS(net, vr.mifi)) {
+		vif = &mrt->vif6_table[vr.mifi];
+		if (MIF_EXISTS(mrt, vr.mifi)) {
 			vr.icount = vif->pkt_in;
 			vr.ocount = vif->pkt_out;
 			vr.ibytes = vif->bytes_in;
@@ -1463,7 +1499,7 @@ int ip6mr_ioctl(struct sock *sk, int cmd, void __user *arg)
 			return -EFAULT;
 
 		read_lock(&mrt_lock);
-		c = ip6mr_cache_find(net, &sr.src.sin6_addr, &sr.grp.sin6_addr);
+		c = ip6mr_cache_find(mrt, &sr.src.sin6_addr, &sr.grp.sin6_addr);
 		if (c) {
 			sr.pktcnt = c->mfc_un.res.pkt;
 			sr.bytecnt = c->mfc_un.res.bytes;
@@ -1493,11 +1529,11 @@ static inline int ip6mr_forward2_finish(struct sk_buff *skb)
  *	Processing handlers for ip6mr_forward
  */
 
-static int ip6mr_forward2(struct net *net, struct sk_buff *skb,
-			  struct mfc6_cache *c, int vifi)
+static int ip6mr_forward2(struct net *net, struct mr6_table *mrt,
+			  struct sk_buff *skb, struct mfc6_cache *c, int vifi)
 {
 	struct ipv6hdr *ipv6h;
-	struct mif_device *vif = &net->ipv6.vif6_table[vifi];
+	struct mif_device *vif = &mrt->vif6_table[vifi];
 	struct net_device *dev;
 	struct dst_entry *dst;
 	struct flowi fl;
@@ -1511,7 +1547,7 @@ static int ip6mr_forward2(struct net *net, struct sk_buff *skb,
 		vif->bytes_out += skb->len;
 		vif->dev->stats.tx_bytes += skb->len;
 		vif->dev->stats.tx_packets++;
-		ip6mr_cache_report(net, skb, vifi, MRT6MSG_WHOLEPKT);
+		ip6mr_cache_report(mrt, skb, vifi, MRT6MSG_WHOLEPKT);
 		goto out_free;
 	}
 #endif
@@ -1566,19 +1602,19 @@ out_free:
 	return 0;
 }
 
-static int ip6mr_find_vif(struct net_device *dev)
+static int ip6mr_find_vif(struct mr6_table *mrt, struct net_device *dev)
 {
-	struct net *net = dev_net(dev);
 	int ct;
-	for (ct = net->ipv6.maxvif - 1; ct >= 0; ct--) {
-		if (net->ipv6.vif6_table[ct].dev == dev)
+
+	for (ct = mrt->maxvif - 1; ct >= 0; ct--) {
+		if (mrt->vif6_table[ct].dev == dev)
 			break;
 	}
 	return ct;
 }
 
-static int ip6_mr_forward(struct net *net, struct sk_buff *skb,
-			  struct mfc6_cache *cache)
+static int ip6_mr_forward(struct net *net, struct mr6_table *mrt,
+			  struct sk_buff *skb, struct mfc6_cache *cache)
 {
 	int psend = -1;
 	int vif, ct;
@@ -1590,30 +1626,30 @@ static int ip6_mr_forward(struct net *net, struct sk_buff *skb,
 	/*
 	 * Wrong interface: drop packet and (maybe) send PIM assert.
 	 */
-	if (net->ipv6.vif6_table[vif].dev != skb->dev) {
+	if (mrt->vif6_table[vif].dev != skb->dev) {
 		int true_vifi;
 
 		cache->mfc_un.res.wrong_if++;
-		true_vifi = ip6mr_find_vif(skb->dev);
+		true_vifi = ip6mr_find_vif(mrt, skb->dev);
 
-		if (true_vifi >= 0 && net->ipv6.mroute_do_assert &&
+		if (true_vifi >= 0 && mrt->mroute_do_assert &&
 		    /* pimsm uses asserts, when switching from RPT to SPT,
 		       so that we cannot check that packet arrived on an oif.
 		       It is bad, but otherwise we would need to move pretty
 		       large chunk of pimd to kernel. Ough... --ANK
 		     */
-		    (net->ipv6.mroute_do_pim ||
+		    (mrt->mroute_do_pim ||
 		     cache->mfc_un.res.ttls[true_vifi] < 255) &&
 		    time_after(jiffies,
 			       cache->mfc_un.res.last_assert + MFC_ASSERT_THRESH)) {
 			cache->mfc_un.res.last_assert = jiffies;
-			ip6mr_cache_report(net, skb, true_vifi, MRT6MSG_WRONGMIF);
+			ip6mr_cache_report(mrt, skb, true_vifi, MRT6MSG_WRONGMIF);
 		}
 		goto dont_forward;
 	}
 
-	net->ipv6.vif6_table[vif].pkt_in++;
-	net->ipv6.vif6_table[vif].bytes_in += skb->len;
+	mrt->vif6_table[vif].pkt_in++;
+	mrt->vif6_table[vif].bytes_in += skb->len;
 
 	/*
 	 *	Forward the frame
@@ -1623,13 +1659,13 @@ static int ip6_mr_forward(struct net *net, struct sk_buff *skb,
 			if (psend != -1) {
 				struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
 				if (skb2)
-					ip6mr_forward2(net, skb2, cache, psend);
+					ip6mr_forward2(net, mrt, skb2, cache, psend);
 			}
 			psend = ct;
 		}
 	}
 	if (psend != -1) {
-		ip6mr_forward2(net, skb, cache, psend);
+		ip6mr_forward2(net, mrt, skb, cache, psend);
 		return 0;
 	}
 
@@ -1647,9 +1683,10 @@ int ip6_mr_input(struct sk_buff *skb)
 {
 	struct mfc6_cache *cache;
 	struct net *net = dev_net(skb->dev);
+	struct mr6_table *mrt = net->ipv6.mrt6;
 
 	read_lock(&mrt_lock);
-	cache = ip6mr_cache_find(net,
+	cache = ip6mr_cache_find(mrt,
 				 &ipv6_hdr(skb)->saddr, &ipv6_hdr(skb)->daddr);
 
 	/*
@@ -1658,9 +1695,9 @@ int ip6_mr_input(struct sk_buff *skb)
 	if (cache == NULL) {
 		int vif;
 
-		vif = ip6mr_find_vif(skb->dev);
+		vif = ip6mr_find_vif(mrt, skb->dev);
 		if (vif >= 0) {
-			int err = ip6mr_cache_unresolved(net, vif, skb);
+			int err = ip6mr_cache_unresolved(mrt, vif, skb);
 			read_unlock(&mrt_lock);
 
 			return err;
@@ -1670,7 +1707,7 @@ int ip6_mr_input(struct sk_buff *skb)
 		return -ENODEV;
 	}
 
-	ip6_mr_forward(net, skb, cache);
+	ip6_mr_forward(net, mrt, skb, cache);
 
 	read_unlock(&mrt_lock);
 
@@ -1679,8 +1716,8 @@ int ip6_mr_input(struct sk_buff *skb)
 
 
 static int
-ip6mr_fill_mroute(struct net *net, struct sk_buff *skb, struct mfc6_cache *c,
-		  struct rtmsg *rtm)
+ip6mr_fill_mroute(struct mr6_table *mrt, struct sk_buff *skb,
+		  struct mfc6_cache *c, struct rtmsg *rtm)
 {
 	int ct;
 	struct rtnexthop *nhp;
@@ -1691,19 +1728,19 @@ ip6mr_fill_mroute(struct net *net, struct sk_buff *skb, struct mfc6_cache *c,
 	if (c->mf6c_parent > MAXMIFS)
 		return -ENOENT;
 
-	if (MIF_EXISTS(net, c->mf6c_parent))
-		RTA_PUT(skb, RTA_IIF, 4, &net->ipv6.vif6_table[c->mf6c_parent].dev->ifindex);
+	if (MIF_EXISTS(mrt, c->mf6c_parent))
+		RTA_PUT(skb, RTA_IIF, 4, &mrt->vif6_table[c->mf6c_parent].dev->ifindex);
 
 	mp_head = (struct rtattr *)skb_put(skb, RTA_LENGTH(0));
 
 	for (ct = c->mfc_un.res.minvif; ct < c->mfc_un.res.maxvif; ct++) {
-		if (MIF_EXISTS(net, ct) && c->mfc_un.res.ttls[ct] < 255) {
+		if (MIF_EXISTS(mrt, ct) && c->mfc_un.res.ttls[ct] < 255) {
 			if (skb_tailroom(skb) < RTA_ALIGN(RTA_ALIGN(sizeof(*nhp)) + 4))
 				goto rtattr_failure;
 			nhp = (struct rtnexthop *)skb_put(skb, RTA_ALIGN(sizeof(*nhp)));
 			nhp->rtnh_flags = 0;
 			nhp->rtnh_hops = c->mfc_un.res.ttls[ct];
-			nhp->rtnh_ifindex = net->ipv6.vif6_table[ct].dev->ifindex;
+			nhp->rtnh_ifindex = mrt->vif6_table[ct].dev->ifindex;
 			nhp->rtnh_len = sizeof(*nhp);
 		}
 	}
@@ -1721,11 +1758,12 @@ int ip6mr_get_route(struct net *net,
 		    struct sk_buff *skb, struct rtmsg *rtm, int nowait)
 {
 	int err;
+	struct mr6_table *mrt = net->ipv6.mrt6;
 	struct mfc6_cache *cache;
 	struct rt6_info *rt = (struct rt6_info *)skb_dst(skb);
 
 	read_lock(&mrt_lock);
-	cache = ip6mr_cache_find(net, &rt->rt6i_src.addr, &rt->rt6i_dst.addr);
+	cache = ip6mr_cache_find(mrt, &rt->rt6i_src.addr, &rt->rt6i_dst.addr);
 
 	if (!cache) {
 		struct sk_buff *skb2;
@@ -1739,7 +1777,7 @@ int ip6mr_get_route(struct net *net,
 		}
 
 		dev = skb->dev;
-		if (dev == NULL || (vif = ip6mr_find_vif(dev)) < 0) {
+		if (dev == NULL || (vif = ip6mr_find_vif(mrt, dev)) < 0) {
 			read_unlock(&mrt_lock);
 			return -ENODEV;
 		}
@@ -1768,7 +1806,7 @@ int ip6mr_get_route(struct net *net,
 		ipv6_addr_copy(&iph->saddr, &rt->rt6i_src.addr);
 		ipv6_addr_copy(&iph->daddr, &rt->rt6i_dst.addr);
 
-		err = ip6mr_cache_unresolved(net, vif, skb2);
+		err = ip6mr_cache_unresolved(mrt, vif, skb2);
 		read_unlock(&mrt_lock);
 
 		return err;
@@ -1777,7 +1815,7 @@ int ip6mr_get_route(struct net *net,
 	if (!nowait && (rtm->rtm_flags&RTM_F_NOTIFY))
 		cache->mfc_flags |= MFC_NOTIFY;
 
-	err = ip6mr_fill_mroute(net, skb, cache, rtm);
+	err = ip6mr_fill_mroute(mrt, skb, cache, rtm);
 	read_unlock(&mrt_lock);
 	return err;
 }
-- 
cgit v1.2.2


From d1db275dd3f6e4182c4c4b4a1ac6287925d60569 Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Tue, 11 May 2010 14:40:55 +0200
Subject: ipv6: ip6mr: support multiple tables

This patch adds support for multiple independant multicast routing instances,
named "tables".

Userspace multicast routing daemons can bind to a specific table instance by
issuing a setsockopt call using a new option MRT6_TABLE. The table number is
stored in the raw socket data and affects all following ip6mr setsockopt(),
getsockopt() and ioctl() calls. By default, a single table (RT6_TABLE_DFLT)
is created with a default routing rule pointing to it. Newly created pim6reg
devices have the table number appended ("pim6regX"), with the exception of
devices created in the default table, which are named just "pim6reg" for
compatibility reasons.

Packets are directed to a specific table instance using routing rules,
similar to how regular routing rules work. Currently iif, oif and mark
are supported as keys, source and destination addresses could be supported
additionally.

Example usage:

- bind pimd/xorp/... to a specific table:

uint32_t table = 123;
setsockopt(fd, SOL_IPV6, MRT6_TABLE, &table, sizeof(table));

- create routing rules directing packets to the new table:

# ip -6 mrule add iif eth0 lookup 123
# ip -6 mrule add oif eth0 lookup 123

Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 net/ipv6/Kconfig      |  14 ++
 net/ipv6/ip6_output.c |   2 +-
 net/ipv6/ip6mr.c      | 428 ++++++++++++++++++++++++++++++++++++++++++--------
 3 files changed, 377 insertions(+), 67 deletions(-)

(limited to 'net')

diff --git a/net/ipv6/Kconfig b/net/ipv6/Kconfig
index a578096152ab..36d7437ac054 100644
--- a/net/ipv6/Kconfig
+++ b/net/ipv6/Kconfig
@@ -229,6 +229,20 @@ config IPV6_MROUTE
 	  Experimental support for IPv6 multicast forwarding.
 	  If unsure, say N.
 
+config IPV6_MROUTE_MULTIPLE_TABLES
+	bool "IPv6: multicast policy routing"
+	depends on IPV6_MROUTE
+	select FIB_RULES
+	help
+	  Normally, a multicast router runs a userspace daemon and decides
+	  what to do with a multicast packet based on the source and
+	  destination addresses. If you say Y here, the multicast router
+	  will also be able to take interfaces and packet marks into
+	  account and run multiple instances of userspace daemons
+	  simultaneously, each one handling a single table.
+
+	  If unsure, say N.
+
 config IPV6_PIMSM_V2
 	bool "IPv6: PIM-SM version 2 support (EXPERIMENTAL)"
 	depends on IPV6_MROUTE
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index 5173acaeb501..cd963f64e27c 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -108,7 +108,7 @@ static int ip6_finish_output2(struct sk_buff *skb)
 		struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
 
 		if (!(dev->flags & IFF_LOOPBACK) && sk_mc_loop(skb->sk) &&
-		    ((mroute6_socket(dev_net(dev)) &&
+		    ((mroute6_socket(dev_net(dev), skb) &&
 		     !(IP6CB(skb)->flags & IP6SKB_FORWARDED)) ||
 		     ipv6_chk_mcast_addr(dev, &ipv6_hdr(skb)->daddr,
 					 &ipv6_hdr(skb)->saddr))) {
diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c
index 9419fceeed41..c2920a1a6db3 100644
--- a/net/ipv6/ip6mr.c
+++ b/net/ipv6/ip6mr.c
@@ -42,6 +42,7 @@
 #include <linux/if_arp.h>
 #include <net/checksum.h>
 #include <net/netlink.h>
+#include <net/fib_rules.h>
 
 #include <net/ipv6.h>
 #include <net/ip6_route.h>
@@ -52,9 +53,11 @@
 #include <net/ip6_checksum.h>
 
 struct mr6_table {
+	struct list_head	list;
 #ifdef CONFIG_NET_NS
 	struct net		*net;
 #endif
+	u32			id;
 	struct sock		*mroute6_sk;
 	struct timer_list	ipmr_expire_timer;
 	struct list_head	mfc6_unres_queue;
@@ -69,6 +72,14 @@ struct mr6_table {
 #endif
 };
 
+struct ip6mr_rule {
+	struct fib_rule		common;
+};
+
+struct ip6mr_result {
+	struct mr6_table	*mrt;
+};
+
 /* Big lock, protecting vif table, mrt cache and mroute socket state.
    Note that the changes are semaphored via rtnl_lock.
  */
@@ -94,6 +105,9 @@ static DEFINE_SPINLOCK(mfc_unres_lock);
 
 static struct kmem_cache *mrt_cachep __read_mostly;
 
+static struct mr6_table *ip6mr_new_table(struct net *net, u32 id);
+static void ip6mr_free_table(struct mr6_table *mrt);
+
 static int ip6_mr_forward(struct net *net, struct mr6_table *mrt,
 			  struct sk_buff *skb, struct mfc6_cache *cache);
 static int ip6mr_cache_report(struct mr6_table *mrt, struct sk_buff *pkt,
@@ -101,12 +115,220 @@ static int ip6mr_cache_report(struct mr6_table *mrt, struct sk_buff *pkt,
 static int ip6mr_fill_mroute(struct mr6_table *mrt, struct sk_buff *skb,
 			     struct mfc6_cache *c, struct rtmsg *rtm);
 static void mroute_clean_tables(struct mr6_table *mrt);
+static void ipmr_expire_process(unsigned long arg);
+
+#ifdef CONFIG_IPV6_MROUTE_MULTIPLE_TABLES
+#define ip6mr_for_each_table(mrt, met) \
+	list_for_each_entry_rcu(mrt, &net->ipv6.mr6_tables, list)
+
+static struct mr6_table *ip6mr_get_table(struct net *net, u32 id)
+{
+	struct mr6_table *mrt;
 
+	ip6mr_for_each_table(mrt, net) {
+		if (mrt->id == id)
+			return mrt;
+	}
+	return NULL;
+}
+
+static int ip6mr_fib_lookup(struct net *net, struct flowi *flp,
+			    struct mr6_table **mrt)
+{
+	struct ip6mr_result res;
+	struct fib_lookup_arg arg = { .result = &res, };
+	int err;
+
+	err = fib_rules_lookup(net->ipv6.mr6_rules_ops, flp, 0, &arg);
+	if (err < 0)
+		return err;
+	*mrt = res.mrt;
+	return 0;
+}
+
+static int ip6mr_rule_action(struct fib_rule *rule, struct flowi *flp,
+			     int flags, struct fib_lookup_arg *arg)
+{
+	struct ip6mr_result *res = arg->result;
+	struct mr6_table *mrt;
+
+	switch (rule->action) {
+	case FR_ACT_TO_TBL:
+		break;
+	case FR_ACT_UNREACHABLE:
+		return -ENETUNREACH;
+	case FR_ACT_PROHIBIT:
+		return -EACCES;
+	case FR_ACT_BLACKHOLE:
+	default:
+		return -EINVAL;
+	}
+
+	mrt = ip6mr_get_table(rule->fr_net, rule->table);
+	if (mrt == NULL)
+		return -EAGAIN;
+	res->mrt = mrt;
+	return 0;
+}
+
+static int ip6mr_rule_match(struct fib_rule *rule, struct flowi *flp, int flags)
+{
+	return 1;
+}
+
+static const struct nla_policy ip6mr_rule_policy[FRA_MAX + 1] = {
+	FRA_GENERIC_POLICY,
+};
+
+static int ip6mr_rule_configure(struct fib_rule *rule, struct sk_buff *skb,
+				struct fib_rule_hdr *frh, struct nlattr **tb)
+{
+	return 0;
+}
+
+static int ip6mr_rule_compare(struct fib_rule *rule, struct fib_rule_hdr *frh,
+			      struct nlattr **tb)
+{
+	return 1;
+}
+
+static int ip6mr_rule_fill(struct fib_rule *rule, struct sk_buff *skb,
+			   struct fib_rule_hdr *frh)
+{
+	frh->dst_len = 0;
+	frh->src_len = 0;
+	frh->tos     = 0;
+	return 0;
+}
+
+static const struct fib_rules_ops __net_initdata ip6mr_rules_ops_template = {
+	.family		= RTNL_FAMILY_IP6MR,
+	.rule_size	= sizeof(struct ip6mr_rule),
+	.addr_size	= sizeof(struct in6_addr),
+	.action		= ip6mr_rule_action,
+	.match		= ip6mr_rule_match,
+	.configure	= ip6mr_rule_configure,
+	.compare	= ip6mr_rule_compare,
+	.default_pref	= fib_default_rule_pref,
+	.fill		= ip6mr_rule_fill,
+	.nlgroup	= RTNLGRP_IPV6_RULE,
+	.policy		= ip6mr_rule_policy,
+	.owner		= THIS_MODULE,
+};
+
+static int __net_init ip6mr_rules_init(struct net *net)
+{
+	struct fib_rules_ops *ops;
+	struct mr6_table *mrt;
+	int err;
+
+	ops = fib_rules_register(&ip6mr_rules_ops_template, net);
+	if (IS_ERR(ops))
+		return PTR_ERR(ops);
+
+	INIT_LIST_HEAD(&net->ipv6.mr6_tables);
+
+	mrt = ip6mr_new_table(net, RT6_TABLE_DFLT);
+	if (mrt == NULL) {
+		err = -ENOMEM;
+		goto err1;
+	}
+
+	err = fib_default_rule_add(ops, 0x7fff, RT6_TABLE_DFLT, 0);
+	if (err < 0)
+		goto err2;
+
+	net->ipv6.mr6_rules_ops = ops;
+	return 0;
+
+err2:
+	kfree(mrt);
+err1:
+	fib_rules_unregister(ops);
+	return err;
+}
+
+static void __net_exit ip6mr_rules_exit(struct net *net)
+{
+	struct mr6_table *mrt, *next;
+
+	list_for_each_entry_safe(mrt, next, &net->ipv6.mr6_tables, list)
+		ip6mr_free_table(mrt);
+	fib_rules_unregister(net->ipv6.mr6_rules_ops);
+}
+#else
+#define ip6mr_for_each_table(mrt, net) \
+	for (mrt = net->ipv6.mrt6; mrt; mrt = NULL)
+
+static struct mr6_table *ip6mr_get_table(struct net *net, u32 id)
+{
+	return net->ipv6.mrt6;
+}
+
+static int ip6mr_fib_lookup(struct net *net, struct flowi *flp,
+			    struct mr6_table **mrt)
+{
+	*mrt = net->ipv6.mrt6;
+	return 0;
+}
+
+static int __net_init ip6mr_rules_init(struct net *net)
+{
+	net->ipv6.mrt6 = ip6mr_new_table(net, RT6_TABLE_DFLT);
+	return net->ipv6.mrt6 ? 0 : -ENOMEM;
+}
+
+static void __net_exit ip6mr_rules_exit(struct net *net)
+{
+	ip6mr_free_table(net->ipv6.mrt6);
+}
+#endif
+
+static struct mr6_table *ip6mr_new_table(struct net *net, u32 id)
+{
+	struct mr6_table *mrt;
+	unsigned int i;
+
+	mrt = ip6mr_get_table(net, id);
+	if (mrt != NULL)
+		return mrt;
+
+	mrt = kzalloc(sizeof(*mrt), GFP_KERNEL);
+	if (mrt == NULL)
+		return NULL;
+	mrt->id = id;
+	write_pnet(&mrt->net, net);
+
+	/* Forwarding cache */
+	for (i = 0; i < MFC6_LINES; i++)
+		INIT_LIST_HEAD(&mrt->mfc6_cache_array[i]);
+
+	INIT_LIST_HEAD(&mrt->mfc6_unres_queue);
+
+	setup_timer(&mrt->ipmr_expire_timer, ipmr_expire_process,
+		    (unsigned long)mrt);
+
+#ifdef CONFIG_IPV6_PIMSM_V2
+	mrt->mroute_reg_vif_num = -1;
+#endif
+#ifdef CONFIG_IPV6_MROUTE_MULTIPLE_TABLES
+	list_add_tail_rcu(&mrt->list, &net->ipv6.mr6_tables);
+#endif
+	return mrt;
+}
+
+static void ip6mr_free_table(struct mr6_table *mrt)
+{
+	del_timer(&mrt->ipmr_expire_timer);
+	mroute_clean_tables(mrt);
+	kfree(mrt);
+}
 
 #ifdef CONFIG_PROC_FS
 
 struct ipmr_mfc_iter {
 	struct seq_net_private p;
+	struct mr6_table *mrt;
 	struct list_head *cache;
 	int ct;
 };
@@ -115,7 +337,7 @@ struct ipmr_mfc_iter {
 static struct mfc6_cache *ipmr_mfc_seq_idx(struct net *net,
 					   struct ipmr_mfc_iter *it, loff_t pos)
 {
-	struct mr6_table *mrt = net->ipv6.mrt6;
+	struct mr6_table *mrt = it->mrt;
 	struct mfc6_cache *mfc;
 
 	read_lock(&mrt_lock);
@@ -144,6 +366,7 @@ static struct mfc6_cache *ipmr_mfc_seq_idx(struct net *net,
 
 struct ipmr_vif_iter {
 	struct seq_net_private p;
+	struct mr6_table *mrt;
 	int ct;
 };
 
@@ -151,7 +374,7 @@ static struct mif_device *ip6mr_vif_seq_idx(struct net *net,
 					    struct ipmr_vif_iter *iter,
 					    loff_t pos)
 {
-	struct mr6_table *mrt = net->ipv6.mrt6;
+	struct mr6_table *mrt = iter->mrt;
 
 	for (iter->ct = 0; iter->ct < mrt->maxvif; ++iter->ct) {
 		if (!MIF_EXISTS(mrt, iter->ct))
@@ -165,7 +388,15 @@ static struct mif_device *ip6mr_vif_seq_idx(struct net *net,
 static void *ip6mr_vif_seq_start(struct seq_file *seq, loff_t *pos)
 	__acquires(mrt_lock)
 {
+	struct ipmr_vif_iter *iter = seq->private;
 	struct net *net = seq_file_net(seq);
+	struct mr6_table *mrt;
+
+	mrt = ip6mr_get_table(net, RT6_TABLE_DFLT);
+	if (mrt == NULL)
+		return ERR_PTR(-ENOENT);
+
+	iter->mrt = mrt;
 
 	read_lock(&mrt_lock);
 	return *pos ? ip6mr_vif_seq_idx(net, seq->private, *pos - 1)
@@ -176,7 +407,7 @@ static void *ip6mr_vif_seq_next(struct seq_file *seq, void *v, loff_t *pos)
 {
 	struct ipmr_vif_iter *iter = seq->private;
 	struct net *net = seq_file_net(seq);
-	struct mr6_table *mrt = net->ipv6.mrt6;
+	struct mr6_table *mrt = iter->mrt;
 
 	++*pos;
 	if (v == SEQ_START_TOKEN)
@@ -198,8 +429,8 @@ static void ip6mr_vif_seq_stop(struct seq_file *seq, void *v)
 
 static int ip6mr_vif_seq_show(struct seq_file *seq, void *v)
 {
-	struct net *net = seq_file_net(seq);
-	struct mr6_table *mrt = net->ipv6.mrt6;
+	struct ipmr_vif_iter *iter = seq->private;
+	struct mr6_table *mrt = iter->mrt;
 
 	if (v == SEQ_START_TOKEN) {
 		seq_puts(seq,
@@ -241,8 +472,15 @@ static const struct file_operations ip6mr_vif_fops = {
 
 static void *ipmr_mfc_seq_start(struct seq_file *seq, loff_t *pos)
 {
+	struct ipmr_mfc_iter *it = seq->private;
 	struct net *net = seq_file_net(seq);
+	struct mr6_table *mrt;
+
+	mrt = ip6mr_get_table(net, RT6_TABLE_DFLT);
+	if (mrt == NULL)
+		return ERR_PTR(-ENOENT);
 
+	it->mrt = mrt;
 	return *pos ? ipmr_mfc_seq_idx(net, seq->private, *pos - 1)
 		: SEQ_START_TOKEN;
 }
@@ -252,7 +490,7 @@ static void *ipmr_mfc_seq_next(struct seq_file *seq, void *v, loff_t *pos)
 	struct mfc6_cache *mfc = v;
 	struct ipmr_mfc_iter *it = seq->private;
 	struct net *net = seq_file_net(seq);
-	struct mr6_table *mrt = net->ipv6.mrt6;
+	struct mr6_table *mrt = it->mrt;
 
 	++*pos;
 
@@ -293,8 +531,7 @@ static void *ipmr_mfc_seq_next(struct seq_file *seq, void *v, loff_t *pos)
 static void ipmr_mfc_seq_stop(struct seq_file *seq, void *v)
 {
 	struct ipmr_mfc_iter *it = seq->private;
-	struct net *net = seq_file_net(seq);
-	struct mr6_table *mrt = net->ipv6.mrt6;
+	struct mr6_table *mrt = it->mrt;
 
 	if (it->cache == &mrt->mfc6_unres_queue)
 		spin_unlock_bh(&mfc_unres_lock);
@@ -305,8 +542,6 @@ static void ipmr_mfc_seq_stop(struct seq_file *seq, void *v)
 static int ipmr_mfc_seq_show(struct seq_file *seq, void *v)
 {
 	int n;
-	struct net *net = seq_file_net(seq);
-	struct mr6_table *mrt = net->ipv6.mrt6;
 
 	if (v == SEQ_START_TOKEN) {
 		seq_puts(seq,
@@ -316,6 +551,7 @@ static int ipmr_mfc_seq_show(struct seq_file *seq, void *v)
 	} else {
 		const struct mfc6_cache *mfc = v;
 		const struct ipmr_mfc_iter *it = seq->private;
+		struct mr6_table *mrt = it->mrt;
 
 		seq_printf(seq, "%pI6 %pI6 %-3hd",
 			   &mfc->mf6c_mcastgrp, &mfc->mf6c_origin,
@@ -375,8 +611,12 @@ static int pim6_rcv(struct sk_buff *skb)
 	struct ipv6hdr   *encap;
 	struct net_device  *reg_dev = NULL;
 	struct net *net = dev_net(skb->dev);
-	struct mr6_table *mrt = net->ipv6.mrt6;
-	int reg_vif_num = mrt->mroute_reg_vif_num;
+	struct mr6_table *mrt;
+	struct flowi fl = {
+		.iif	= skb->dev->ifindex,
+		.mark	= skb->mark,
+	};
+	int reg_vif_num;
 
 	if (!pskb_may_pull(skb, sizeof(*pim) + sizeof(*encap)))
 		goto drop;
@@ -399,6 +639,10 @@ static int pim6_rcv(struct sk_buff *skb)
 	    ntohs(encap->payload_len) + sizeof(*pim) > skb->len)
 		goto drop;
 
+	if (ip6mr_fib_lookup(net, &fl, &mrt) < 0)
+		goto drop;
+	reg_vif_num = mrt->mroute_reg_vif_num;
+
 	read_lock(&mrt_lock);
 	if (reg_vif_num >= 0)
 		reg_dev = mrt->vif6_table[reg_vif_num].dev;
@@ -438,7 +682,17 @@ static netdev_tx_t reg_vif_xmit(struct sk_buff *skb,
 				      struct net_device *dev)
 {
 	struct net *net = dev_net(dev);
-	struct mr6_table *mrt = net->ipv6.mrt6;
+	struct mr6_table *mrt;
+	struct flowi fl = {
+		.oif		= dev->ifindex,
+		.iif		= skb->skb_iif,
+		.mark		= skb->mark,
+	};
+	int err;
+
+	err = ip6mr_fib_lookup(net, &fl, &mrt);
+	if (err < 0)
+		return err;
 
 	read_lock(&mrt_lock);
 	dev->stats.tx_bytes += skb->len;
@@ -463,11 +717,17 @@ static void reg_vif_setup(struct net_device *dev)
 	dev->features		|= NETIF_F_NETNS_LOCAL;
 }
 
-static struct net_device *ip6mr_reg_vif(struct net *net)
+static struct net_device *ip6mr_reg_vif(struct net *net, struct mr6_table *mrt)
 {
 	struct net_device *dev;
+	char name[IFNAMSIZ];
+
+	if (mrt->id == RT6_TABLE_DFLT)
+		sprintf(name, "pim6reg");
+	else
+		sprintf(name, "pim6reg%u", mrt->id);
 
-	dev = alloc_netdev(0, "pim6reg", reg_vif_setup);
+	dev = alloc_netdev(0, name, reg_vif_setup);
 	if (dev == NULL)
 		return NULL;
 
@@ -665,7 +925,7 @@ static int mif6_add(struct net *net, struct mr6_table *mrt,
 		 */
 		if (mrt->mroute_reg_vif_num >= 0)
 			return -EADDRINUSE;
-		dev = ip6mr_reg_vif(net);
+		dev = ip6mr_reg_vif(net, mrt);
 		if (!dev)
 			return -ENOBUFS;
 		err = dev_set_allmulti(dev, 1);
@@ -995,7 +1255,7 @@ static int ip6mr_device_event(struct notifier_block *this,
 {
 	struct net_device *dev = ptr;
 	struct net *net = dev_net(dev);
-	struct mr6_table *mrt = net->ipv6.mrt6;
+	struct mr6_table *mrt;
 	struct mif_device *v;
 	int ct;
 	LIST_HEAD(list);
@@ -1003,10 +1263,12 @@ static int ip6mr_device_event(struct notifier_block *this,
 	if (event != NETDEV_UNREGISTER)
 		return NOTIFY_DONE;
 
-	v = &mrt->vif6_table[0];
-	for (ct = 0; ct < mrt->maxvif; ct++, v++) {
-		if (v->dev == dev)
-			mif6_delete(mrt, ct, &list);
+	ip6mr_for_each_table(mrt, net) {
+		v = &mrt->vif6_table[0];
+		for (ct = 0; ct < mrt->maxvif; ct++, v++) {
+			if (v->dev == dev)
+				mif6_delete(mrt, ct, &list);
+		}
 	}
 	unregister_netdevice_many(&list);
 
@@ -1023,29 +1285,11 @@ static struct notifier_block ip6_mr_notifier = {
 
 static int __net_init ip6mr_net_init(struct net *net)
 {
-	struct mr6_table *mrt;
-	unsigned int i;
-	int err = 0;
+	int err;
 
-	mrt = kzalloc(sizeof(*mrt), GFP_KERNEL);
-	if (mrt == NULL) {
-		err = -ENOMEM;
+	err = ip6mr_rules_init(net);
+	if (err < 0)
 		goto fail;
-	}
-
-	write_pnet(&mrt->net, net);
-
-	for (i = 0; i < MFC6_LINES; i++)
-		INIT_LIST_HEAD(&mrt->mfc6_cache_array[i]);
-
-	INIT_LIST_HEAD(&mrt->mfc6_unres_queue);
-
-	setup_timer(&mrt->ipmr_expire_timer, ipmr_expire_process,
-		    (unsigned long)mrt);
-
-#ifdef CONFIG_IPV6_PIMSM_V2
-	mrt->mroute_reg_vif_num = -1;
-#endif
 
 #ifdef CONFIG_PROC_FS
 	err = -ENOMEM;
@@ -1055,14 +1299,13 @@ static int __net_init ip6mr_net_init(struct net *net)
 		goto proc_cache_fail;
 #endif
 
-	net->ipv6.mrt6 = mrt;
 	return 0;
 
 #ifdef CONFIG_PROC_FS
 proc_cache_fail:
 	proc_net_remove(net, "ip6_mr_vif");
 proc_vif_fail:
-	kfree(mrt);
+	ip6mr_rules_exit(net);
 #endif
 fail:
 	return err;
@@ -1070,15 +1313,11 @@ fail:
 
 static void __net_exit ip6mr_net_exit(struct net *net)
 {
-	struct mr6_table *mrt = net->ipv6.mrt6;
-
 #ifdef CONFIG_PROC_FS
 	proc_net_remove(net, "ip6_mr_cache");
 	proc_net_remove(net, "ip6_mr_vif");
 #endif
-	del_timer(&mrt->ipmr_expire_timer);
-	mroute_clean_tables(mrt);
-	kfree(mrt);
+	ip6mr_rules_exit(net);
 }
 
 static struct pernet_operations ip6mr_net_ops = {
@@ -1279,28 +1518,39 @@ static int ip6mr_sk_init(struct mr6_table *mrt, struct sock *sk)
 
 int ip6mr_sk_done(struct sock *sk)
 {
-	int err = 0;
+	int err = -EACCES;
 	struct net *net = sock_net(sk);
-	struct mr6_table *mrt = net->ipv6.mrt6;
+	struct mr6_table *mrt;
 
 	rtnl_lock();
-	if (sk == mrt->mroute6_sk) {
-		write_lock_bh(&mrt_lock);
-		mrt->mroute6_sk = NULL;
-		net->ipv6.devconf_all->mc_forwarding--;
-		write_unlock_bh(&mrt_lock);
+	ip6mr_for_each_table(mrt, net) {
+		if (sk == mrt->mroute6_sk) {
+			write_lock_bh(&mrt_lock);
+			mrt->mroute6_sk = NULL;
+			net->ipv6.devconf_all->mc_forwarding--;
+			write_unlock_bh(&mrt_lock);
 
-		mroute_clean_tables(mrt);
-	} else
-		err = -EACCES;
+			mroute_clean_tables(mrt);
+			err = 0;
+			break;
+		}
+	}
 	rtnl_unlock();
 
 	return err;
 }
 
-struct sock *mroute6_socket(struct net *net)
+struct sock *mroute6_socket(struct net *net, struct sk_buff *skb)
 {
-	struct mr6_table *mrt = net->ipv6.mrt6;
+	struct mr6_table *mrt;
+	struct flowi fl = {
+		.iif	= skb->skb_iif,
+		.oif	= skb->dev->ifindex,
+		.mark	= skb->mark,
+	};
+
+	if (ip6mr_fib_lookup(net, &fl, &mrt) < 0)
+		return NULL;
 
 	return mrt->mroute6_sk;
 }
@@ -1319,7 +1569,11 @@ int ip6_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, uns
 	struct mf6cctl mfc;
 	mifi_t mifi;
 	struct net *net = sock_net(sk);
-	struct mr6_table *mrt = net->ipv6.mrt6;
+	struct mr6_table *mrt;
+
+	mrt = ip6mr_get_table(net, raw6_sk(sk)->ip6mr_table ? : RT6_TABLE_DFLT);
+	if (mrt == NULL)
+		return -ENOENT;
 
 	if (optname != MRT6_INIT) {
 		if (sk != mrt->mroute6_sk && !capable(CAP_NET_ADMIN))
@@ -1408,6 +1662,27 @@ int ip6_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, uns
 		return ret;
 	}
 
+#endif
+#ifdef CONFIG_IPV6_MROUTE_MULTIPLE_TABLES
+	case MRT6_TABLE:
+	{
+		u32 v;
+
+		if (optlen != sizeof(u32))
+			return -EINVAL;
+		if (get_user(v, (u32 __user *)optval))
+			return -EFAULT;
+		if (sk == mrt->mroute6_sk)
+			return -EBUSY;
+
+		rtnl_lock();
+		ret = 0;
+		if (!ip6mr_new_table(net, v))
+			ret = -ENOMEM;
+		raw6_sk(sk)->ip6mr_table = v;
+		rtnl_unlock();
+		return ret;
+	}
 #endif
 	/*
 	 *	Spurious command, or MRT6_VERSION which you cannot
@@ -1428,7 +1703,11 @@ int ip6_mroute_getsockopt(struct sock *sk, int optname, char __user *optval,
 	int olr;
 	int val;
 	struct net *net = sock_net(sk);
-	struct mr6_table *mrt = net->ipv6.mrt6;
+	struct mr6_table *mrt;
+
+	mrt = ip6mr_get_table(net, raw6_sk(sk)->ip6mr_table ? : RT6_TABLE_DFLT);
+	if (mrt == NULL)
+		return -ENOENT;
 
 	switch (optname) {
 	case MRT6_VERSION:
@@ -1471,7 +1750,11 @@ int ip6mr_ioctl(struct sock *sk, int cmd, void __user *arg)
 	struct mif_device *vif;
 	struct mfc6_cache *c;
 	struct net *net = sock_net(sk);
-	struct mr6_table *mrt = net->ipv6.mrt6;
+	struct mr6_table *mrt;
+
+	mrt = ip6mr_get_table(net, raw6_sk(sk)->ip6mr_table ? : RT6_TABLE_DFLT);
+	if (mrt == NULL)
+		return -ENOENT;
 
 	switch (cmd) {
 	case SIOCGETMIFCNT_IN6:
@@ -1683,7 +1966,16 @@ int ip6_mr_input(struct sk_buff *skb)
 {
 	struct mfc6_cache *cache;
 	struct net *net = dev_net(skb->dev);
-	struct mr6_table *mrt = net->ipv6.mrt6;
+	struct mr6_table *mrt;
+	struct flowi fl = {
+		.iif	= skb->dev->ifindex,
+		.mark	= skb->mark,
+	};
+	int err;
+
+	err = ip6mr_fib_lookup(net, &fl, &mrt);
+	if (err < 0)
+		return err;
 
 	read_lock(&mrt_lock);
 	cache = ip6mr_cache_find(mrt,
@@ -1758,10 +2050,14 @@ int ip6mr_get_route(struct net *net,
 		    struct sk_buff *skb, struct rtmsg *rtm, int nowait)
 {
 	int err;
-	struct mr6_table *mrt = net->ipv6.mrt6;
+	struct mr6_table *mrt;
 	struct mfc6_cache *cache;
 	struct rt6_info *rt = (struct rt6_info *)skb_dst(skb);
 
+	mrt = ip6mr_get_table(net, RT6_TABLE_DFLT);
+	if (mrt == NULL)
+		return -ENOENT;
+
 	read_lock(&mrt_lock);
 	cache = ip6mr_cache_find(mrt, &rt->rt6i_src.addr, &rt->rt6i_dst.addr);
 
-- 
cgit v1.2.2


From 5b285cac3570a935aaa28312c1ea28f9e01c5452 Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Tue, 11 May 2010 14:40:56 +0200
Subject: ipv6: ip6mr: add support for dumping routing tables over netlink

The ip6mr /proc interface (ip6_mr_cache) can't be extended to dump routes
from any tables but the main table in a backwards compatible fashion since
the output format ends in a variable amount of output interfaces.

Introduce a new netlink interface to dump multicast routes from all tables,
similar to the netlink interface for regular routes.

Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 net/ipv6/ip6mr.c | 96 +++++++++++++++++++++++++++++++++++++++++++++++++++-----
 1 file changed, 89 insertions(+), 7 deletions(-)

(limited to 'net')

diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c
index c2920a1a6db3..163850e22b11 100644
--- a/net/ipv6/ip6mr.c
+++ b/net/ipv6/ip6mr.c
@@ -112,8 +112,10 @@ static int ip6_mr_forward(struct net *net, struct mr6_table *mrt,
 			  struct sk_buff *skb, struct mfc6_cache *cache);
 static int ip6mr_cache_report(struct mr6_table *mrt, struct sk_buff *pkt,
 			      mifi_t mifi, int assert);
-static int ip6mr_fill_mroute(struct mr6_table *mrt, struct sk_buff *skb,
-			     struct mfc6_cache *c, struct rtmsg *rtm);
+static int __ip6mr_fill_mroute(struct mr6_table *mrt, struct sk_buff *skb,
+			       struct mfc6_cache *c, struct rtmsg *rtm);
+static int ip6mr_rtm_dumproute(struct sk_buff *skb,
+			       struct netlink_callback *cb);
 static void mroute_clean_tables(struct mr6_table *mrt);
 static void ipmr_expire_process(unsigned long arg);
 
@@ -1038,7 +1040,7 @@ static void ip6mr_cache_resolve(struct net *net, struct mr6_table *mrt,
 			int err;
 			struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct ipv6hdr));
 
-			if (ip6mr_fill_mroute(mrt, skb, c, NLMSG_DATA(nlh)) > 0) {
+			if (__ip6mr_fill_mroute(mrt, skb, c, NLMSG_DATA(nlh)) > 0) {
 				nlh->nlmsg_len = skb_tail_pointer(skb) - (u8 *)nlh;
 			} else {
 				nlh->nlmsg_type = NLMSG_ERROR;
@@ -1350,6 +1352,7 @@ int __init ip6_mr_init(void)
 		goto add_proto_fail;
 	}
 #endif
+	rtnl_register(RTNL_FAMILY_IP6MR, RTM_GETROUTE, NULL, ip6mr_rtm_dumproute);
 	return 0;
 #ifdef CONFIG_IPV6_PIMSM_V2
 add_proto_fail:
@@ -2007,9 +2010,8 @@ int ip6_mr_input(struct sk_buff *skb)
 }
 
 
-static int
-ip6mr_fill_mroute(struct mr6_table *mrt, struct sk_buff *skb,
-		  struct mfc6_cache *c, struct rtmsg *rtm)
+static int __ip6mr_fill_mroute(struct mr6_table *mrt, struct sk_buff *skb,
+			       struct mfc6_cache *c, struct rtmsg *rtm)
 {
 	int ct;
 	struct rtnexthop *nhp;
@@ -2111,8 +2113,88 @@ int ip6mr_get_route(struct net *net,
 	if (!nowait && (rtm->rtm_flags&RTM_F_NOTIFY))
 		cache->mfc_flags |= MFC_NOTIFY;
 
-	err = ip6mr_fill_mroute(mrt, skb, cache, rtm);
+	err = __ip6mr_fill_mroute(mrt, skb, cache, rtm);
 	read_unlock(&mrt_lock);
 	return err;
 }
 
+static int ip6mr_fill_mroute(struct mr6_table *mrt, struct sk_buff *skb,
+			     u32 pid, u32 seq, struct mfc6_cache *c)
+{
+	struct nlmsghdr *nlh;
+	struct rtmsg *rtm;
+
+	nlh = nlmsg_put(skb, pid, seq, RTM_NEWROUTE, sizeof(*rtm), NLM_F_MULTI);
+	if (nlh == NULL)
+		return -EMSGSIZE;
+
+	rtm = nlmsg_data(nlh);
+	rtm->rtm_family   = RTNL_FAMILY_IPMR;
+	rtm->rtm_dst_len  = 128;
+	rtm->rtm_src_len  = 128;
+	rtm->rtm_tos      = 0;
+	rtm->rtm_table    = mrt->id;
+	NLA_PUT_U32(skb, RTA_TABLE, mrt->id);
+	rtm->rtm_scope    = RT_SCOPE_UNIVERSE;
+	rtm->rtm_protocol = RTPROT_UNSPEC;
+	rtm->rtm_flags    = 0;
+
+	NLA_PUT(skb, RTA_SRC, 16, &c->mf6c_origin);
+	NLA_PUT(skb, RTA_DST, 16, &c->mf6c_mcastgrp);
+
+	if (__ip6mr_fill_mroute(mrt, skb, c, rtm) < 0)
+		goto nla_put_failure;
+
+	return nlmsg_end(skb, nlh);
+
+nla_put_failure:
+	nlmsg_cancel(skb, nlh);
+	return -EMSGSIZE;
+}
+
+static int ip6mr_rtm_dumproute(struct sk_buff *skb, struct netlink_callback *cb)
+{
+	struct net *net = sock_net(skb->sk);
+	struct mr6_table *mrt;
+	struct mfc6_cache *mfc;
+	unsigned int t = 0, s_t;
+	unsigned int h = 0, s_h;
+	unsigned int e = 0, s_e;
+
+	s_t = cb->args[0];
+	s_h = cb->args[1];
+	s_e = cb->args[2];
+
+	read_lock(&mrt_lock);
+	ip6mr_for_each_table(mrt, net) {
+		if (t < s_t)
+			goto next_table;
+		if (t > s_t)
+			s_h = 0;
+		for (h = s_h; h < MFC6_LINES; h++) {
+			list_for_each_entry(mfc, &mrt->mfc6_cache_array[h], list) {
+				if (e < s_e)
+					goto next_entry;
+				if (ip6mr_fill_mroute(mrt, skb,
+						      NETLINK_CB(cb->skb).pid,
+						      cb->nlh->nlmsg_seq,
+						      mfc) < 0)
+					goto done;
+next_entry:
+				e++;
+			}
+			e = s_e = 0;
+		}
+		s_h = 0;
+next_table:
+		t++;
+	}
+done:
+	read_unlock(&mrt_lock);
+
+	cb->args[2] = e;
+	cb->args[1] = h;
+	cb->args[0] = t;
+
+	return skb->len;
+}
-- 
cgit v1.2.2


From de74c16996287250f0d947663127f80c6beebd3c Mon Sep 17 00:00:00 2001
From: Jan Engelhardt <jengelh@medozas.de>
Date: Sun, 5 Jul 2009 18:26:37 +0200
Subject: netfilter: xtables: combine struct xt_match_param and xt_target_param

The structures carried - besides match/target - almost the same data.
It is possible to combine them, as extensions are evaluated serially,
and so, the callers end up a little smaller.

  text  data  bss  filename
-15318   740  104  net/ipv4/netfilter/ip_tables.o
+15286   740  104  net/ipv4/netfilter/ip_tables.o
-15333   540  152  net/ipv6/netfilter/ip6_tables.o
+15269   540  152  net/ipv6/netfilter/ip6_tables.o

Signed-off-by: Jan Engelhardt <jengelh@medozas.de>
---
 net/bridge/netfilter/ebtables.c | 30 +++++++++++++++---------------
 net/ipv4/netfilter/arp_tables.c | 16 ++++++++--------
 net/ipv4/netfilter/ip_tables.c  | 32 +++++++++++++++-----------------
 net/ipv6/netfilter/ip6_tables.c | 27 +++++++++++++--------------
 4 files changed, 51 insertions(+), 54 deletions(-)

(limited to 'net')

diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c
index 1d8c2c0a7470..290d43541d46 100644
--- a/net/bridge/netfilter/ebtables.c
+++ b/net/bridge/netfilter/ebtables.c
@@ -86,7 +86,7 @@ static struct xt_target ebt_standard_target = {
 
 static inline int
 ebt_do_watcher(const struct ebt_entry_watcher *w, struct sk_buff *skb,
-	       struct xt_target_param *par)
+	       struct xt_action_param *par)
 {
 	par->target   = w->u.watcher;
 	par->targinfo = w->data;
@@ -95,8 +95,9 @@ ebt_do_watcher(const struct ebt_entry_watcher *w, struct sk_buff *skb,
 	return 0;
 }
 
-static inline int ebt_do_match (struct ebt_entry_match *m,
-   const struct sk_buff *skb, struct xt_match_param *par)
+static inline int
+ebt_do_match(struct ebt_entry_match *m, const struct sk_buff *skb,
+	     struct xt_action_param *par)
 {
 	par->match     = m->u.match;
 	par->matchinfo = m->data;
@@ -186,14 +187,13 @@ unsigned int ebt_do_table (unsigned int hook, struct sk_buff *skb,
 	const char *base;
 	const struct ebt_table_info *private;
 	bool hotdrop = false;
-	struct xt_match_param mtpar;
-	struct xt_target_param tgpar;
+	struct xt_action_param acpar;
 
-	mtpar.family  = tgpar.family = NFPROTO_BRIDGE;
-	mtpar.in      = tgpar.in  = in;
-	mtpar.out     = tgpar.out = out;
-	mtpar.hotdrop = &hotdrop;
-	mtpar.hooknum = tgpar.hooknum = hook;
+	acpar.family  = NFPROTO_BRIDGE;
+	acpar.in      = in;
+	acpar.out     = out;
+	acpar.hotdrop = &hotdrop;
+	acpar.hooknum = hook;
 
 	read_lock_bh(&table->lock);
 	private = table->private;
@@ -214,7 +214,7 @@ unsigned int ebt_do_table (unsigned int hook, struct sk_buff *skb,
 		if (ebt_basic_match(point, eth_hdr(skb), in, out))
 			goto letscontinue;
 
-		if (EBT_MATCH_ITERATE(point, ebt_do_match, skb, &mtpar) != 0)
+		if (EBT_MATCH_ITERATE(point, ebt_do_match, skb, &acpar) != 0)
 			goto letscontinue;
 		if (hotdrop) {
 			read_unlock_bh(&table->lock);
@@ -227,7 +227,7 @@ unsigned int ebt_do_table (unsigned int hook, struct sk_buff *skb,
 
 		/* these should only watch: not modify, nor tell us
 		   what to do with the packet */
-		EBT_WATCHER_ITERATE(point, ebt_do_watcher, skb, &tgpar);
+		EBT_WATCHER_ITERATE(point, ebt_do_watcher, skb, &acpar);
 
 		t = (struct ebt_entry_target *)
 		   (((char *)point) + point->target_offset);
@@ -235,9 +235,9 @@ unsigned int ebt_do_table (unsigned int hook, struct sk_buff *skb,
 		if (!t->u.target->target)
 			verdict = ((struct ebt_standard_target *)t)->verdict;
 		else {
-			tgpar.target   = t->u.target;
-			tgpar.targinfo = t->data;
-			verdict = t->u.target->target(skb, &tgpar);
+			acpar.target   = t->u.target;
+			acpar.targinfo = t->data;
+			verdict = t->u.target->target(skb, &acpar);
 		}
 		if (verdict == EBT_ACCEPT) {
 			read_unlock_bh(&table->lock);
diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c
index 07a699059390..73d924b88f89 100644
--- a/net/ipv4/netfilter/arp_tables.c
+++ b/net/ipv4/netfilter/arp_tables.c
@@ -265,7 +265,7 @@ unsigned int arpt_do_table(struct sk_buff *skb,
 	const char *indev, *outdev;
 	void *table_base;
 	const struct xt_table_info *private;
-	struct xt_target_param tgpar;
+	struct xt_action_param acpar;
 
 	if (!pskb_may_pull(skb, arp_hdr_len(skb->dev)))
 		return NF_DROP;
@@ -280,10 +280,10 @@ unsigned int arpt_do_table(struct sk_buff *skb,
 	e = get_entry(table_base, private->hook_entry[hook]);
 	back = get_entry(table_base, private->underflow[hook]);
 
-	tgpar.in      = in;
-	tgpar.out     = out;
-	tgpar.hooknum = hook;
-	tgpar.family  = NFPROTO_ARP;
+	acpar.in      = in;
+	acpar.out     = out;
+	acpar.hooknum = hook;
+	acpar.family  = NFPROTO_ARP;
 
 	arp = arp_hdr(skb);
 	do {
@@ -333,9 +333,9 @@ unsigned int arpt_do_table(struct sk_buff *skb,
 		/* Targets which reenter must return
 		 * abs. verdicts
 		 */
-		tgpar.target   = t->u.kernel.target;
-		tgpar.targinfo = t->data;
-		verdict = t->u.kernel.target->target(skb, &tgpar);
+		acpar.target   = t->u.kernel.target;
+		acpar.targinfo = t->data;
+		verdict = t->u.kernel.target->target(skb, &acpar);
 
 		/* Target might have changed stuff. */
 		arp = arp_hdr(skb);
diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c
index 265cedf88660..e1a53c2da032 100644
--- a/net/ipv4/netfilter/ip_tables.c
+++ b/net/ipv4/netfilter/ip_tables.c
@@ -316,8 +316,7 @@ ipt_do_table(struct sk_buff *skb,
 	struct ipt_entry *e, **jumpstack;
 	unsigned int *stackptr, origptr, cpu;
 	const struct xt_table_info *private;
-	struct xt_match_param mtpar;
-	struct xt_target_param tgpar;
+	struct xt_action_param acpar;
 
 	/* Initialization */
 	ip = ip_hdr(skb);
@@ -329,13 +328,13 @@ ipt_do_table(struct sk_buff *skb,
 	 * things we don't know, ie. tcp syn flag or ports).  If the
 	 * rule is also a fragment-specific rule, non-fragments won't
 	 * match it. */
-	mtpar.fragoff = ntohs(ip->frag_off) & IP_OFFSET;
-	mtpar.thoff   = ip_hdrlen(skb);
-	mtpar.hotdrop = &hotdrop;
-	mtpar.in      = tgpar.in  = in;
-	mtpar.out     = tgpar.out = out;
-	mtpar.family  = tgpar.family = NFPROTO_IPV4;
-	mtpar.hooknum = tgpar.hooknum = hook;
+	acpar.fragoff = ntohs(ip->frag_off) & IP_OFFSET;
+	acpar.thoff   = ip_hdrlen(skb);
+	acpar.hotdrop = &hotdrop;
+	acpar.in      = in;
+	acpar.out     = out;
+	acpar.family  = NFPROTO_IPV4;
+	acpar.hooknum = hook;
 
 	IP_NF_ASSERT(table->valid_hooks & (1 << hook));
 	xt_info_rdlock_bh();
@@ -358,16 +357,16 @@ ipt_do_table(struct sk_buff *skb,
 
 		IP_NF_ASSERT(e);
 		if (!ip_packet_match(ip, indev, outdev,
-		    &e->ip, mtpar.fragoff)) {
+		    &e->ip, acpar.fragoff)) {
  no_match:
 			e = ipt_next_entry(e);
 			continue;
 		}
 
 		xt_ematch_foreach(ematch, e) {
-			mtpar.match     = ematch->u.kernel.match;
-			mtpar.matchinfo = ematch->data;
-			if (!mtpar.match->match(skb, &mtpar))
+			acpar.match     = ematch->u.kernel.match;
+			acpar.matchinfo = ematch->data;
+			if (!acpar.match->match(skb, &acpar))
 				goto no_match;
 		}
 
@@ -422,11 +421,10 @@ ipt_do_table(struct sk_buff *skb,
 			continue;
 		}
 
-		tgpar.target   = t->u.kernel.target;
-		tgpar.targinfo = t->data;
+		acpar.target   = t->u.kernel.target;
+		acpar.targinfo = t->data;
 
-
-		verdict = t->u.kernel.target->target(skb, &tgpar);
+		verdict = t->u.kernel.target->target(skb, &acpar);
 		/* Target might have changed stuff. */
 		ip = ip_hdr(skb);
 		if (verdict == IPT_CONTINUE)
diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c
index f8ac4a0b5899..076308c1acd7 100644
--- a/net/ipv6/netfilter/ip6_tables.c
+++ b/net/ipv6/netfilter/ip6_tables.c
@@ -345,8 +345,7 @@ ip6t_do_table(struct sk_buff *skb,
 	struct ip6t_entry *e, **jumpstack;
 	unsigned int *stackptr, origptr, cpu;
 	const struct xt_table_info *private;
-	struct xt_match_param mtpar;
-	struct xt_target_param tgpar;
+	struct xt_action_param acpar;
 
 	/* Initialization */
 	indev = in ? in->name : nulldevname;
@@ -357,11 +356,11 @@ ip6t_do_table(struct sk_buff *skb,
 	 * things we don't know, ie. tcp syn flag or ports).  If the
 	 * rule is also a fragment-specific rule, non-fragments won't
 	 * match it. */
-	mtpar.hotdrop = &hotdrop;
-	mtpar.in      = tgpar.in  = in;
-	mtpar.out     = tgpar.out = out;
-	mtpar.family  = tgpar.family = NFPROTO_IPV6;
-	mtpar.hooknum = tgpar.hooknum = hook;
+	acpar.hotdrop = &hotdrop;
+	acpar.in      = in;
+	acpar.out     = out;
+	acpar.family  = NFPROTO_IPV6;
+	acpar.hooknum = hook;
 
 	IP_NF_ASSERT(table->valid_hooks & (1 << hook));
 
@@ -381,16 +380,16 @@ ip6t_do_table(struct sk_buff *skb,
 
 		IP_NF_ASSERT(e);
 		if (!ip6_packet_match(skb, indev, outdev, &e->ipv6,
-		    &mtpar.thoff, &mtpar.fragoff, &hotdrop)) {
+		    &acpar.thoff, &acpar.fragoff, &hotdrop)) {
  no_match:
 			e = ip6t_next_entry(e);
 			continue;
 		}
 
 		xt_ematch_foreach(ematch, e) {
-			mtpar.match     = ematch->u.kernel.match;
-			mtpar.matchinfo = ematch->data;
-			if (!mtpar.match->match(skb, &mtpar))
+			acpar.match     = ematch->u.kernel.match;
+			acpar.matchinfo = ematch->data;
+			if (!acpar.match->match(skb, &acpar))
 				goto no_match;
 		}
 
@@ -439,10 +438,10 @@ ip6t_do_table(struct sk_buff *skb,
 			continue;
 		}
 
-		tgpar.target   = t->u.kernel.target;
-		tgpar.targinfo = t->data;
+		acpar.target   = t->u.kernel.target;
+		acpar.targinfo = t->data;
 
-		verdict = t->u.kernel.target->target(skb, &tgpar);
+		verdict = t->u.kernel.target->target(skb, &acpar);
 		if (verdict == IP6T_CONTINUE)
 			e = ip6t_next_entry(e);
 		else
-- 
cgit v1.2.2


From 4b560b447df83368df44bd3712c0c39b1d79ba04 Mon Sep 17 00:00:00 2001
From: Jan Engelhardt <jengelh@medozas.de>
Date: Sun, 5 Jul 2009 19:43:26 +0200
Subject: netfilter: xtables: substitute temporary defines by final name

Signed-off-by: Jan Engelhardt <jengelh@medozas.de>
---
 net/bridge/netfilter/ebt_802_3.c     | 2 +-
 net/bridge/netfilter/ebt_among.c     | 2 +-
 net/bridge/netfilter/ebt_arp.c       | 2 +-
 net/bridge/netfilter/ebt_arpreply.c  | 2 +-
 net/bridge/netfilter/ebt_dnat.c      | 2 +-
 net/bridge/netfilter/ebt_ip.c        | 2 +-
 net/bridge/netfilter/ebt_ip6.c       | 2 +-
 net/bridge/netfilter/ebt_limit.c     | 2 +-
 net/bridge/netfilter/ebt_log.c       | 2 +-
 net/bridge/netfilter/ebt_mark.c      | 2 +-
 net/bridge/netfilter/ebt_mark_m.c    | 2 +-
 net/bridge/netfilter/ebt_nflog.c     | 2 +-
 net/bridge/netfilter/ebt_pkttype.c   | 2 +-
 net/bridge/netfilter/ebt_redirect.c  | 2 +-
 net/bridge/netfilter/ebt_snat.c      | 2 +-
 net/bridge/netfilter/ebt_stp.c       | 2 +-
 net/bridge/netfilter/ebt_ulog.c      | 2 +-
 net/bridge/netfilter/ebt_vlan.c      | 2 +-
 net/ipv4/netfilter/arp_tables.c      | 2 +-
 net/ipv4/netfilter/arpt_mangle.c     | 2 +-
 net/ipv4/netfilter/ip_tables.c       | 4 ++--
 net/ipv4/netfilter/ipt_CLUSTERIP.c   | 2 +-
 net/ipv4/netfilter/ipt_ECN.c         | 2 +-
 net/ipv4/netfilter/ipt_LOG.c         | 2 +-
 net/ipv4/netfilter/ipt_MASQUERADE.c  | 2 +-
 net/ipv4/netfilter/ipt_NETMAP.c      | 2 +-
 net/ipv4/netfilter/ipt_REDIRECT.c    | 2 +-
 net/ipv4/netfilter/ipt_REJECT.c      | 2 +-
 net/ipv4/netfilter/ipt_ULOG.c        | 2 +-
 net/ipv4/netfilter/ipt_addrtype.c    | 4 ++--
 net/ipv4/netfilter/ipt_ah.c          | 2 +-
 net/ipv4/netfilter/ipt_ecn.c         | 3 ++-
 net/ipv4/netfilter/nf_nat_rule.c     | 4 ++--
 net/ipv6/netfilter/ip6_tables.c      | 4 ++--
 net/ipv6/netfilter/ip6t_LOG.c        | 2 +-
 net/ipv6/netfilter/ip6t_REJECT.c     | 2 +-
 net/ipv6/netfilter/ip6t_ah.c         | 3 ++-
 net/ipv6/netfilter/ip6t_eui64.c      | 2 +-
 net/ipv6/netfilter/ip6t_frag.c       | 2 +-
 net/ipv6/netfilter/ip6t_hbh.c        | 2 +-
 net/ipv6/netfilter/ip6t_ipv6header.c | 2 +-
 net/ipv6/netfilter/ip6t_mh.c         | 3 ++-
 net/ipv6/netfilter/ip6t_rt.c         | 3 ++-
 net/netfilter/xt_CLASSIFY.c          | 2 +-
 net/netfilter/xt_CONNSECMARK.c       | 2 +-
 net/netfilter/xt_CT.c                | 2 +-
 net/netfilter/xt_DSCP.c              | 8 ++++----
 net/netfilter/xt_HL.c                | 4 ++--
 net/netfilter/xt_LED.c               | 2 +-
 net/netfilter/xt_NFLOG.c             | 2 +-
 net/netfilter/xt_NFQUEUE.c           | 4 ++--
 net/netfilter/xt_NOTRACK.c           | 2 +-
 net/netfilter/xt_RATEEST.c           | 2 +-
 net/netfilter/xt_SECMARK.c           | 2 +-
 net/netfilter/xt_TCPMSS.c            | 4 ++--
 net/netfilter/xt_TCPOPTSTRIP.c       | 4 ++--
 net/netfilter/xt_TEE.c               | 4 ++--
 net/netfilter/xt_TPROXY.c            | 2 +-
 net/netfilter/xt_TRACE.c             | 2 +-
 net/netfilter/xt_cluster.c           | 2 +-
 net/netfilter/xt_comment.c           | 2 +-
 net/netfilter/xt_connbytes.c         | 2 +-
 net/netfilter/xt_connlimit.c         | 2 +-
 net/netfilter/xt_connmark.c          | 4 ++--
 net/netfilter/xt_conntrack.c         | 6 +++---
 net/netfilter/xt_dccp.c              | 2 +-
 net/netfilter/xt_dscp.c              | 7 ++++---
 net/netfilter/xt_esp.c               | 3 ++-
 net/netfilter/xt_hashlimit.c         | 2 +-
 net/netfilter/xt_helper.c            | 2 +-
 net/netfilter/xt_hl.c                | 6 ++++--
 net/netfilter/xt_iprange.c           | 4 ++--
 net/netfilter/xt_length.c            | 4 ++--
 net/netfilter/xt_limit.c             | 2 +-
 net/netfilter/xt_mac.c               | 3 ++-
 net/netfilter/xt_mark.c              | 4 ++--
 net/netfilter/xt_multiport.c         | 2 +-
 net/netfilter/xt_osf.c               | 4 ++--
 net/netfilter/xt_owner.c             | 2 +-
 net/netfilter/xt_physdev.c           | 2 +-
 net/netfilter/xt_pkttype.c           | 2 +-
 net/netfilter/xt_policy.c            | 2 +-
 net/netfilter/xt_quota.c             | 2 +-
 net/netfilter/xt_rateest.c           | 2 +-
 net/netfilter/xt_realm.c             | 2 +-
 net/netfilter/xt_recent.c            | 2 +-
 net/netfilter/xt_sctp.c              | 2 +-
 net/netfilter/xt_socket.c            | 6 +++---
 net/netfilter/xt_state.c             | 2 +-
 net/netfilter/xt_statistic.c         | 2 +-
 net/netfilter/xt_string.c            | 2 +-
 net/netfilter/xt_tcpmss.c            | 2 +-
 net/netfilter/xt_tcpudp.c            | 6 ++++--
 net/netfilter/xt_time.c              | 2 +-
 net/netfilter/xt_u32.c               | 3 ++-
 net/sched/act_ipt.c                  | 2 +-
 96 files changed, 133 insertions(+), 121 deletions(-)

(limited to 'net')

diff --git a/net/bridge/netfilter/ebt_802_3.c b/net/bridge/netfilter/ebt_802_3.c
index f7de8dbc3422..4b0df00c82ec 100644
--- a/net/bridge/netfilter/ebt_802_3.c
+++ b/net/bridge/netfilter/ebt_802_3.c
@@ -13,7 +13,7 @@
 #include <linux/netfilter_bridge/ebt_802_3.h>
 
 static bool
-ebt_802_3_mt(const struct sk_buff *skb, const struct xt_match_param *par)
+ebt_802_3_mt(const struct sk_buff *skb, const struct xt_action_param *par)
 {
 	const struct ebt_802_3_info *info = par->matchinfo;
 	const struct ebt_802_3_hdr *hdr = ebt_802_3_hdr(skb);
diff --git a/net/bridge/netfilter/ebt_among.c b/net/bridge/netfilter/ebt_among.c
index 20068e03fa81..c04f9461f734 100644
--- a/net/bridge/netfilter/ebt_among.c
+++ b/net/bridge/netfilter/ebt_among.c
@@ -129,7 +129,7 @@ static int get_ip_src(const struct sk_buff *skb, __be32 *addr)
 }
 
 static bool
-ebt_among_mt(const struct sk_buff *skb, const struct xt_match_param *par)
+ebt_among_mt(const struct sk_buff *skb, const struct xt_action_param *par)
 {
 	const struct ebt_among_info *info = par->matchinfo;
 	const char *dmac, *smac;
diff --git a/net/bridge/netfilter/ebt_arp.c b/net/bridge/netfilter/ebt_arp.c
index 952150cd5e7d..6203f4dea2e2 100644
--- a/net/bridge/netfilter/ebt_arp.c
+++ b/net/bridge/netfilter/ebt_arp.c
@@ -16,7 +16,7 @@
 #include <linux/netfilter_bridge/ebt_arp.h>
 
 static bool
-ebt_arp_mt(const struct sk_buff *skb, const struct xt_match_param *par)
+ebt_arp_mt(const struct sk_buff *skb, const struct xt_action_param *par)
 {
 	const struct ebt_arp_info *info = par->matchinfo;
 	const struct arphdr *ah;
diff --git a/net/bridge/netfilter/ebt_arpreply.c b/net/bridge/netfilter/ebt_arpreply.c
index 4581adb27583..070cf134a22f 100644
--- a/net/bridge/netfilter/ebt_arpreply.c
+++ b/net/bridge/netfilter/ebt_arpreply.c
@@ -16,7 +16,7 @@
 #include <linux/netfilter_bridge/ebt_arpreply.h>
 
 static unsigned int
-ebt_arpreply_tg(struct sk_buff *skb, const struct xt_target_param *par)
+ebt_arpreply_tg(struct sk_buff *skb, const struct xt_action_param *par)
 {
 	const struct ebt_arpreply_info *info = par->targinfo;
 	const __be32 *siptr, *diptr;
diff --git a/net/bridge/netfilter/ebt_dnat.c b/net/bridge/netfilter/ebt_dnat.c
index 59d5b7c8a557..c59f7bfae6e2 100644
--- a/net/bridge/netfilter/ebt_dnat.c
+++ b/net/bridge/netfilter/ebt_dnat.c
@@ -15,7 +15,7 @@
 #include <linux/netfilter_bridge/ebt_nat.h>
 
 static unsigned int
-ebt_dnat_tg(struct sk_buff *skb, const struct xt_target_param *par)
+ebt_dnat_tg(struct sk_buff *skb, const struct xt_action_param *par)
 {
 	const struct ebt_nat_info *info = par->targinfo;
 
diff --git a/net/bridge/netfilter/ebt_ip.c b/net/bridge/netfilter/ebt_ip.c
index a1c76c7e5219..a0cde7442b55 100644
--- a/net/bridge/netfilter/ebt_ip.c
+++ b/net/bridge/netfilter/ebt_ip.c
@@ -25,7 +25,7 @@ struct tcpudphdr {
 };
 
 static bool
-ebt_ip_mt(const struct sk_buff *skb, const struct xt_match_param *par)
+ebt_ip_mt(const struct sk_buff *skb, const struct xt_action_param *par)
 {
 	const struct ebt_ip_info *info = par->matchinfo;
 	const struct iphdr *ih;
diff --git a/net/bridge/netfilter/ebt_ip6.c b/net/bridge/netfilter/ebt_ip6.c
index 33f8413f05ad..c451dc2ff822 100644
--- a/net/bridge/netfilter/ebt_ip6.c
+++ b/net/bridge/netfilter/ebt_ip6.c
@@ -28,7 +28,7 @@ struct tcpudphdr {
 };
 
 static bool
-ebt_ip6_mt(const struct sk_buff *skb, const struct xt_match_param *par)
+ebt_ip6_mt(const struct sk_buff *skb, const struct xt_action_param *par)
 {
 	const struct ebt_ip6_info *info = par->matchinfo;
 	const struct ipv6hdr *ih6;
diff --git a/net/bridge/netfilter/ebt_limit.c b/net/bridge/netfilter/ebt_limit.c
index 4b0e2e53fa57..760923f08067 100644
--- a/net/bridge/netfilter/ebt_limit.c
+++ b/net/bridge/netfilter/ebt_limit.c
@@ -32,7 +32,7 @@ static DEFINE_SPINLOCK(limit_lock);
 #define CREDITS_PER_JIFFY POW2_BELOW32(MAX_CPJ)
 
 static bool
-ebt_limit_mt(const struct sk_buff *skb, const struct xt_match_param *par)
+ebt_limit_mt(const struct sk_buff *skb, const struct xt_action_param *par)
 {
 	struct ebt_limit_info *info = (void *)par->matchinfo;
 	unsigned long now = jiffies;
diff --git a/net/bridge/netfilter/ebt_log.c b/net/bridge/netfilter/ebt_log.c
index c46024156539..6e5a8bb9b940 100644
--- a/net/bridge/netfilter/ebt_log.c
+++ b/net/bridge/netfilter/ebt_log.c
@@ -171,7 +171,7 @@ out:
 }
 
 static unsigned int
-ebt_log_tg(struct sk_buff *skb, const struct xt_target_param *par)
+ebt_log_tg(struct sk_buff *skb, const struct xt_action_param *par)
 {
 	const struct ebt_log_info *info = par->targinfo;
 	struct nf_loginfo li;
diff --git a/net/bridge/netfilter/ebt_mark.c b/net/bridge/netfilter/ebt_mark.c
index 126e536ff8f4..66697cbd0a8b 100644
--- a/net/bridge/netfilter/ebt_mark.c
+++ b/net/bridge/netfilter/ebt_mark.c
@@ -19,7 +19,7 @@
 #include <linux/netfilter_bridge/ebt_mark_t.h>
 
 static unsigned int
-ebt_mark_tg(struct sk_buff *skb, const struct xt_target_param *par)
+ebt_mark_tg(struct sk_buff *skb, const struct xt_action_param *par)
 {
 	const struct ebt_mark_t_info *info = par->targinfo;
 	int action = info->target & -16;
diff --git a/net/bridge/netfilter/ebt_mark_m.c b/net/bridge/netfilter/ebt_mark_m.c
index e4366c0a1a43..de901634fa31 100644
--- a/net/bridge/netfilter/ebt_mark_m.c
+++ b/net/bridge/netfilter/ebt_mark_m.c
@@ -13,7 +13,7 @@
 #include <linux/netfilter_bridge/ebt_mark_m.h>
 
 static bool
-ebt_mark_mt(const struct sk_buff *skb, const struct xt_match_param *par)
+ebt_mark_mt(const struct sk_buff *skb, const struct xt_action_param *par)
 {
 	const struct ebt_mark_m_info *info = par->matchinfo;
 
diff --git a/net/bridge/netfilter/ebt_nflog.c b/net/bridge/netfilter/ebt_nflog.c
index 22e2ad5f23e8..5be68bbcc341 100644
--- a/net/bridge/netfilter/ebt_nflog.c
+++ b/net/bridge/netfilter/ebt_nflog.c
@@ -20,7 +20,7 @@
 #include <net/netfilter/nf_log.h>
 
 static unsigned int
-ebt_nflog_tg(struct sk_buff *skb, const struct xt_target_param *par)
+ebt_nflog_tg(struct sk_buff *skb, const struct xt_action_param *par)
 {
 	const struct ebt_nflog_info *info = par->targinfo;
 	struct nf_loginfo li;
diff --git a/net/bridge/netfilter/ebt_pkttype.c b/net/bridge/netfilter/ebt_pkttype.c
index f34bcc3197bd..7ba67c4b677a 100644
--- a/net/bridge/netfilter/ebt_pkttype.c
+++ b/net/bridge/netfilter/ebt_pkttype.c
@@ -13,7 +13,7 @@
 #include <linux/netfilter_bridge/ebt_pkttype.h>
 
 static bool
-ebt_pkttype_mt(const struct sk_buff *skb, const struct xt_match_param *par)
+ebt_pkttype_mt(const struct sk_buff *skb, const struct xt_action_param *par)
 {
 	const struct ebt_pkttype_info *info = par->matchinfo;
 
diff --git a/net/bridge/netfilter/ebt_redirect.c b/net/bridge/netfilter/ebt_redirect.c
index a6044a6f2383..9e19166ba453 100644
--- a/net/bridge/netfilter/ebt_redirect.c
+++ b/net/bridge/netfilter/ebt_redirect.c
@@ -16,7 +16,7 @@
 #include <linux/netfilter_bridge/ebt_redirect.h>
 
 static unsigned int
-ebt_redirect_tg(struct sk_buff *skb, const struct xt_target_param *par)
+ebt_redirect_tg(struct sk_buff *skb, const struct xt_action_param *par)
 {
 	const struct ebt_redirect_info *info = par->targinfo;
 
diff --git a/net/bridge/netfilter/ebt_snat.c b/net/bridge/netfilter/ebt_snat.c
index 79caca34ae2b..f8f0bd1a1d51 100644
--- a/net/bridge/netfilter/ebt_snat.c
+++ b/net/bridge/netfilter/ebt_snat.c
@@ -17,7 +17,7 @@
 #include <linux/netfilter_bridge/ebt_nat.h>
 
 static unsigned int
-ebt_snat_tg(struct sk_buff *skb, const struct xt_target_param *par)
+ebt_snat_tg(struct sk_buff *skb, const struct xt_action_param *par)
 {
 	const struct ebt_nat_info *info = par->targinfo;
 
diff --git a/net/bridge/netfilter/ebt_stp.c b/net/bridge/netfilter/ebt_stp.c
index 02f28fdda393..3cd6070a1137 100644
--- a/net/bridge/netfilter/ebt_stp.c
+++ b/net/bridge/netfilter/ebt_stp.c
@@ -120,7 +120,7 @@ static bool ebt_filter_config(const struct ebt_stp_info *info,
 }
 
 static bool
-ebt_stp_mt(const struct sk_buff *skb, const struct xt_match_param *par)
+ebt_stp_mt(const struct sk_buff *skb, const struct xt_action_param *par)
 {
 	const struct ebt_stp_info *info = par->matchinfo;
 	const struct stp_header *sp;
diff --git a/net/bridge/netfilter/ebt_ulog.c b/net/bridge/netfilter/ebt_ulog.c
index 852f37c27659..ae3c7cef1484 100644
--- a/net/bridge/netfilter/ebt_ulog.c
+++ b/net/bridge/netfilter/ebt_ulog.c
@@ -243,7 +243,7 @@ static void ebt_log_packet(u_int8_t pf, unsigned int hooknum,
 }
 
 static unsigned int
-ebt_ulog_tg(struct sk_buff *skb, const struct xt_target_param *par)
+ebt_ulog_tg(struct sk_buff *skb, const struct xt_action_param *par)
 {
 	ebt_ulog_packet(par->hooknum, skb, par->in, par->out,
 	                par->targinfo, NULL);
diff --git a/net/bridge/netfilter/ebt_vlan.c b/net/bridge/netfilter/ebt_vlan.c
index bf8ae5c7a0c5..e4ab62533c74 100644
--- a/net/bridge/netfilter/ebt_vlan.c
+++ b/net/bridge/netfilter/ebt_vlan.c
@@ -36,7 +36,7 @@ MODULE_LICENSE("GPL");
 #define EXIT_ON_MISMATCH(_MATCH_,_MASK_) {if (!((info->_MATCH_ == _MATCH_)^!!(info->invflags & _MASK_))) return false; }
 
 static bool
-ebt_vlan_mt(const struct sk_buff *skb, const struct xt_match_param *par)
+ebt_vlan_mt(const struct sk_buff *skb, const struct xt_action_param *par)
 {
 	const struct ebt_vlan_info *info = par->matchinfo;
 	const struct vlan_hdr *fp;
diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c
index 73d924b88f89..9e7d089f168a 100644
--- a/net/ipv4/netfilter/arp_tables.c
+++ b/net/ipv4/netfilter/arp_tables.c
@@ -224,7 +224,7 @@ static inline int arp_checkentry(const struct arpt_arp *arp)
 }
 
 static unsigned int
-arpt_error(struct sk_buff *skb, const struct xt_target_param *par)
+arpt_error(struct sk_buff *skb, const struct xt_action_param *par)
 {
 	if (net_ratelimit())
 		printk("arp_tables: error: '%s'\n",
diff --git a/net/ipv4/netfilter/arpt_mangle.c b/net/ipv4/netfilter/arpt_mangle.c
index 4b51a027f307..e1be7dd1171b 100644
--- a/net/ipv4/netfilter/arpt_mangle.c
+++ b/net/ipv4/netfilter/arpt_mangle.c
@@ -9,7 +9,7 @@ MODULE_AUTHOR("Bart De Schuymer <bdschuym@pandora.be>");
 MODULE_DESCRIPTION("arptables arp payload mangle target");
 
 static unsigned int
-target(struct sk_buff *skb, const struct xt_target_param *par)
+target(struct sk_buff *skb, const struct xt_action_param *par)
 {
 	const struct arpt_mangle *mangle = par->targinfo;
 	const struct arphdr *arp;
diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c
index e1a53c2da032..3ab1b81e799b 100644
--- a/net/ipv4/netfilter/ip_tables.c
+++ b/net/ipv4/netfilter/ip_tables.c
@@ -165,7 +165,7 @@ ip_checkentry(const struct ipt_ip *ip)
 }
 
 static unsigned int
-ipt_error(struct sk_buff *skb, const struct xt_target_param *par)
+ipt_error(struct sk_buff *skb, const struct xt_action_param *par)
 {
 	if (net_ratelimit())
 		pr_info("error: `%s'\n", (const char *)par->targinfo);
@@ -2138,7 +2138,7 @@ icmp_type_code_match(u_int8_t test_type, u_int8_t min_code, u_int8_t max_code,
 }
 
 static bool
-icmp_match(const struct sk_buff *skb, const struct xt_match_param *par)
+icmp_match(const struct sk_buff *skb, const struct xt_action_param *par)
 {
 	const struct icmphdr *ic;
 	struct icmphdr _icmph;
diff --git a/net/ipv4/netfilter/ipt_CLUSTERIP.c b/net/ipv4/netfilter/ipt_CLUSTERIP.c
index 8815d458de46..f91c94b9a790 100644
--- a/net/ipv4/netfilter/ipt_CLUSTERIP.c
+++ b/net/ipv4/netfilter/ipt_CLUSTERIP.c
@@ -282,7 +282,7 @@ clusterip_responsible(const struct clusterip_config *config, u_int32_t hash)
  ***********************************************************************/
 
 static unsigned int
-clusterip_tg(struct sk_buff *skb, const struct xt_target_param *par)
+clusterip_tg(struct sk_buff *skb, const struct xt_action_param *par)
 {
 	const struct ipt_clusterip_tgt_info *cipinfo = par->targinfo;
 	struct nf_conn *ct;
diff --git a/net/ipv4/netfilter/ipt_ECN.c b/net/ipv4/netfilter/ipt_ECN.c
index 563049f31aef..4bf3dc49ad1e 100644
--- a/net/ipv4/netfilter/ipt_ECN.c
+++ b/net/ipv4/netfilter/ipt_ECN.c
@@ -77,7 +77,7 @@ set_ect_tcp(struct sk_buff *skb, const struct ipt_ECN_info *einfo)
 }
 
 static unsigned int
-ecn_tg(struct sk_buff *skb, const struct xt_target_param *par)
+ecn_tg(struct sk_buff *skb, const struct xt_action_param *par)
 {
 	const struct ipt_ECN_info *einfo = par->targinfo;
 
diff --git a/net/ipv4/netfilter/ipt_LOG.c b/net/ipv4/netfilter/ipt_LOG.c
index 3bd35f370817..5234f4f3499a 100644
--- a/net/ipv4/netfilter/ipt_LOG.c
+++ b/net/ipv4/netfilter/ipt_LOG.c
@@ -425,7 +425,7 @@ ipt_log_packet(u_int8_t pf,
 }
 
 static unsigned int
-log_tg(struct sk_buff *skb, const struct xt_target_param *par)
+log_tg(struct sk_buff *skb, const struct xt_action_param *par)
 {
 	const struct ipt_log_info *loginfo = par->targinfo;
 	struct nf_loginfo li;
diff --git a/net/ipv4/netfilter/ipt_MASQUERADE.c b/net/ipv4/netfilter/ipt_MASQUERADE.c
index 02b1bc477998..d2ed9dc74ebc 100644
--- a/net/ipv4/netfilter/ipt_MASQUERADE.c
+++ b/net/ipv4/netfilter/ipt_MASQUERADE.c
@@ -44,7 +44,7 @@ static int masquerade_tg_check(const struct xt_tgchk_param *par)
 }
 
 static unsigned int
-masquerade_tg(struct sk_buff *skb, const struct xt_target_param *par)
+masquerade_tg(struct sk_buff *skb, const struct xt_action_param *par)
 {
 	struct nf_conn *ct;
 	struct nf_conn_nat *nat;
diff --git a/net/ipv4/netfilter/ipt_NETMAP.c b/net/ipv4/netfilter/ipt_NETMAP.c
index 708c7f8f7eea..f43867d1697f 100644
--- a/net/ipv4/netfilter/ipt_NETMAP.c
+++ b/net/ipv4/netfilter/ipt_NETMAP.c
@@ -38,7 +38,7 @@ static int netmap_tg_check(const struct xt_tgchk_param *par)
 }
 
 static unsigned int
-netmap_tg(struct sk_buff *skb, const struct xt_target_param *par)
+netmap_tg(struct sk_buff *skb, const struct xt_action_param *par)
 {
 	struct nf_conn *ct;
 	enum ip_conntrack_info ctinfo;
diff --git a/net/ipv4/netfilter/ipt_REDIRECT.c b/net/ipv4/netfilter/ipt_REDIRECT.c
index 3cf101916523..18a0656505a0 100644
--- a/net/ipv4/netfilter/ipt_REDIRECT.c
+++ b/net/ipv4/netfilter/ipt_REDIRECT.c
@@ -42,7 +42,7 @@ static int redirect_tg_check(const struct xt_tgchk_param *par)
 }
 
 static unsigned int
-redirect_tg(struct sk_buff *skb, const struct xt_target_param *par)
+redirect_tg(struct sk_buff *skb, const struct xt_action_param *par)
 {
 	struct nf_conn *ct;
 	enum ip_conntrack_info ctinfo;
diff --git a/net/ipv4/netfilter/ipt_REJECT.c b/net/ipv4/netfilter/ipt_REJECT.c
index a86135a28058..f5f4a888e4ec 100644
--- a/net/ipv4/netfilter/ipt_REJECT.c
+++ b/net/ipv4/netfilter/ipt_REJECT.c
@@ -136,7 +136,7 @@ static inline void send_unreach(struct sk_buff *skb_in, int code)
 }
 
 static unsigned int
-reject_tg(struct sk_buff *skb, const struct xt_target_param *par)
+reject_tg(struct sk_buff *skb, const struct xt_action_param *par)
 {
 	const struct ipt_reject_info *reject = par->targinfo;
 
diff --git a/net/ipv4/netfilter/ipt_ULOG.c b/net/ipv4/netfilter/ipt_ULOG.c
index 8f60749e87a3..446e0f467a17 100644
--- a/net/ipv4/netfilter/ipt_ULOG.c
+++ b/net/ipv4/netfilter/ipt_ULOG.c
@@ -276,7 +276,7 @@ alloc_failure:
 }
 
 static unsigned int
-ulog_tg(struct sk_buff *skb, const struct xt_target_param *par)
+ulog_tg(struct sk_buff *skb, const struct xt_action_param *par)
 {
 	ipt_ulog_packet(par->hooknum, skb, par->in, par->out,
 	                par->targinfo, NULL);
diff --git a/net/ipv4/netfilter/ipt_addrtype.c b/net/ipv4/netfilter/ipt_addrtype.c
index e4b8f2bf8aaa..24ec548515e4 100644
--- a/net/ipv4/netfilter/ipt_addrtype.c
+++ b/net/ipv4/netfilter/ipt_addrtype.c
@@ -30,7 +30,7 @@ static inline bool match_type(struct net *net, const struct net_device *dev,
 }
 
 static bool
-addrtype_mt_v0(const struct sk_buff *skb, const struct xt_match_param *par)
+addrtype_mt_v0(const struct sk_buff *skb, const struct xt_action_param *par)
 {
 	struct net *net = dev_net(par->in ? par->in : par->out);
 	const struct ipt_addrtype_info *info = par->matchinfo;
@@ -48,7 +48,7 @@ addrtype_mt_v0(const struct sk_buff *skb, const struct xt_match_param *par)
 }
 
 static bool
-addrtype_mt_v1(const struct sk_buff *skb, const struct xt_match_param *par)
+addrtype_mt_v1(const struct sk_buff *skb, const struct xt_action_param *par)
 {
 	struct net *net = dev_net(par->in ? par->in : par->out);
 	const struct ipt_addrtype_info_v1 *info = par->matchinfo;
diff --git a/net/ipv4/netfilter/ipt_ah.c b/net/ipv4/netfilter/ipt_ah.c
index 9f9810204892..48a8293bc1d1 100644
--- a/net/ipv4/netfilter/ipt_ah.c
+++ b/net/ipv4/netfilter/ipt_ah.c
@@ -30,7 +30,7 @@ spi_match(u_int32_t min, u_int32_t max, u_int32_t spi, bool invert)
 	return r;
 }
 
-static bool ah_mt(const struct sk_buff *skb, const struct xt_match_param *par)
+static bool ah_mt(const struct sk_buff *skb, const struct xt_action_param *par)
 {
 	struct ip_auth_hdr _ahdr;
 	const struct ip_auth_hdr *ah;
diff --git a/net/ipv4/netfilter/ipt_ecn.c b/net/ipv4/netfilter/ipt_ecn.c
index 32e24100d8d1..744d13ee296e 100644
--- a/net/ipv4/netfilter/ipt_ecn.c
+++ b/net/ipv4/netfilter/ipt_ecn.c
@@ -67,7 +67,8 @@ static inline bool match_tcp(const struct sk_buff *skb,
 	return true;
 }
 
-static bool ecn_mt(const struct sk_buff *skb, const struct xt_match_param *par)
+static bool ecn_mt(const struct sk_buff *skb,
+		   const struct xt_action_param *par)
 {
 	const struct ipt_ecn_info *info = par->matchinfo;
 
diff --git a/net/ipv4/netfilter/nf_nat_rule.c b/net/ipv4/netfilter/nf_nat_rule.c
index b48a0fc3d9ed..98ed78281aee 100644
--- a/net/ipv4/netfilter/nf_nat_rule.c
+++ b/net/ipv4/netfilter/nf_nat_rule.c
@@ -39,7 +39,7 @@ static const struct xt_table nat_table = {
 
 /* Source NAT */
 static unsigned int
-ipt_snat_target(struct sk_buff *skb, const struct xt_target_param *par)
+ipt_snat_target(struct sk_buff *skb, const struct xt_action_param *par)
 {
 	struct nf_conn *ct;
 	enum ip_conntrack_info ctinfo;
@@ -58,7 +58,7 @@ ipt_snat_target(struct sk_buff *skb, const struct xt_target_param *par)
 }
 
 static unsigned int
-ipt_dnat_target(struct sk_buff *skb, const struct xt_target_param *par)
+ipt_dnat_target(struct sk_buff *skb, const struct xt_action_param *par)
 {
 	struct nf_conn *ct;
 	enum ip_conntrack_info ctinfo;
diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c
index 076308c1acd7..c3bc999a8bb6 100644
--- a/net/ipv6/netfilter/ip6_tables.c
+++ b/net/ipv6/netfilter/ip6_tables.c
@@ -197,7 +197,7 @@ ip6_checkentry(const struct ip6t_ip6 *ipv6)
 }
 
 static unsigned int
-ip6t_error(struct sk_buff *skb, const struct xt_target_param *par)
+ip6t_error(struct sk_buff *skb, const struct xt_action_param *par)
 {
 	if (net_ratelimit())
 		pr_info("error: `%s'\n", (const char *)par->targinfo);
@@ -2154,7 +2154,7 @@ icmp6_type_code_match(u_int8_t test_type, u_int8_t min_code, u_int8_t max_code,
 }
 
 static bool
-icmp6_match(const struct sk_buff *skb, const struct xt_match_param *par)
+icmp6_match(const struct sk_buff *skb, const struct xt_action_param *par)
 {
 	const struct icmp6hdr *ic;
 	struct icmp6hdr _icmph;
diff --git a/net/ipv6/netfilter/ip6t_LOG.c b/net/ipv6/netfilter/ip6t_LOG.c
index 1f47a525f484..af4ee11f2066 100644
--- a/net/ipv6/netfilter/ip6t_LOG.c
+++ b/net/ipv6/netfilter/ip6t_LOG.c
@@ -436,7 +436,7 @@ ip6t_log_packet(u_int8_t pf,
 }
 
 static unsigned int
-log_tg6(struct sk_buff *skb, const struct xt_target_param *par)
+log_tg6(struct sk_buff *skb, const struct xt_action_param *par)
 {
 	const struct ip6t_log_info *loginfo = par->targinfo;
 	struct nf_loginfo li;
diff --git a/net/ipv6/netfilter/ip6t_REJECT.c b/net/ipv6/netfilter/ip6t_REJECT.c
index af1d6494ac39..47d227713758 100644
--- a/net/ipv6/netfilter/ip6t_REJECT.c
+++ b/net/ipv6/netfilter/ip6t_REJECT.c
@@ -175,7 +175,7 @@ send_unreach(struct net *net, struct sk_buff *skb_in, unsigned char code,
 }
 
 static unsigned int
-reject_tg6(struct sk_buff *skb, const struct xt_target_param *par)
+reject_tg6(struct sk_buff *skb, const struct xt_action_param *par)
 {
 	const struct ip6t_reject_info *reject = par->targinfo;
 	struct net *net = dev_net((par->in != NULL) ? par->in : par->out);
diff --git a/net/ipv6/netfilter/ip6t_ah.c b/net/ipv6/netfilter/ip6t_ah.c
index 1580693c86c1..4fe71898381d 100644
--- a/net/ipv6/netfilter/ip6t_ah.c
+++ b/net/ipv6/netfilter/ip6t_ah.c
@@ -36,7 +36,8 @@ spi_match(u_int32_t min, u_int32_t max, u_int32_t spi, bool invert)
 	return r;
 }
 
-static bool ah_mt6(const struct sk_buff *skb, const struct xt_match_param *par)
+static bool ah_mt6(const struct sk_buff *skb,
+		   const struct xt_action_param *par)
 {
 	struct ip_auth_hdr _ah;
 	const struct ip_auth_hdr *ah;
diff --git a/net/ipv6/netfilter/ip6t_eui64.c b/net/ipv6/netfilter/ip6t_eui64.c
index ca287f6d2bce..2fd2be1795e0 100644
--- a/net/ipv6/netfilter/ip6t_eui64.c
+++ b/net/ipv6/netfilter/ip6t_eui64.c
@@ -20,7 +20,7 @@ MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Andras Kis-Szabo <kisza@sch.bme.hu>");
 
 static bool
-eui64_mt6(const struct sk_buff *skb, const struct xt_match_param *par)
+eui64_mt6(const struct sk_buff *skb, const struct xt_action_param *par)
 {
 	unsigned char eui64[8];
 
diff --git a/net/ipv6/netfilter/ip6t_frag.c b/net/ipv6/netfilter/ip6t_frag.c
index a5daf0ffb4ec..8401aa82ea0b 100644
--- a/net/ipv6/netfilter/ip6t_frag.c
+++ b/net/ipv6/netfilter/ip6t_frag.c
@@ -35,7 +35,7 @@ id_match(u_int32_t min, u_int32_t max, u_int32_t id, bool invert)
 }
 
 static bool
-frag_mt6(const struct sk_buff *skb, const struct xt_match_param *par)
+frag_mt6(const struct sk_buff *skb, const struct xt_action_param *par)
 {
 	struct frag_hdr _frag;
 	const struct frag_hdr *fh;
diff --git a/net/ipv6/netfilter/ip6t_hbh.c b/net/ipv6/netfilter/ip6t_hbh.c
index e424e7c8f824..d19d5cf47a38 100644
--- a/net/ipv6/netfilter/ip6t_hbh.c
+++ b/net/ipv6/netfilter/ip6t_hbh.c
@@ -44,7 +44,7 @@ MODULE_ALIAS("ip6t_dst");
 static struct xt_match hbh_mt6_reg[] __read_mostly;
 
 static bool
-hbh_mt6(const struct sk_buff *skb, const struct xt_match_param *par)
+hbh_mt6(const struct sk_buff *skb, const struct xt_action_param *par)
 {
 	struct ipv6_opt_hdr _optsh;
 	const struct ipv6_opt_hdr *oh;
diff --git a/net/ipv6/netfilter/ip6t_ipv6header.c b/net/ipv6/netfilter/ip6t_ipv6header.c
index 46fbabb493fa..8e88bb8311de 100644
--- a/net/ipv6/netfilter/ip6t_ipv6header.c
+++ b/net/ipv6/netfilter/ip6t_ipv6header.c
@@ -27,7 +27,7 @@ MODULE_DESCRIPTION("Xtables: IPv6 header types match");
 MODULE_AUTHOR("Andras Kis-Szabo <kisza@sch.bme.hu>");
 
 static bool
-ipv6header_mt6(const struct sk_buff *skb, const struct xt_match_param *par)
+ipv6header_mt6(const struct sk_buff *skb, const struct xt_action_param *par)
 {
 	const struct ip6t_ipv6header_info *info = par->matchinfo;
 	unsigned int temp;
diff --git a/net/ipv6/netfilter/ip6t_mh.c b/net/ipv6/netfilter/ip6t_mh.c
index c9f443e0138f..4a60788873fd 100644
--- a/net/ipv6/netfilter/ip6t_mh.c
+++ b/net/ipv6/netfilter/ip6t_mh.c
@@ -32,7 +32,8 @@ type_match(u_int8_t min, u_int8_t max, u_int8_t type, bool invert)
 	return (type >= min && type <= max) ^ invert;
 }
 
-static bool mh_mt6(const struct sk_buff *skb, const struct xt_match_param *par)
+static bool mh_mt6(const struct sk_buff *skb,
+		   const struct xt_action_param *par)
 {
 	struct ip6_mh _mh;
 	const struct ip6_mh *mh;
diff --git a/net/ipv6/netfilter/ip6t_rt.c b/net/ipv6/netfilter/ip6t_rt.c
index 09322720d2a6..793c27200881 100644
--- a/net/ipv6/netfilter/ip6t_rt.c
+++ b/net/ipv6/netfilter/ip6t_rt.c
@@ -36,7 +36,8 @@ segsleft_match(u_int32_t min, u_int32_t max, u_int32_t id, bool invert)
 	return r;
 }
 
-static bool rt_mt6(const struct sk_buff *skb, const struct xt_match_param *par)
+static bool rt_mt6(const struct sk_buff *skb,
+		   const struct xt_action_param *par)
 {
 	struct ipv6_rt_hdr _route;
 	const struct ipv6_rt_hdr *rh;
diff --git a/net/netfilter/xt_CLASSIFY.c b/net/netfilter/xt_CLASSIFY.c
index 011bc80dd2a1..c2c0e4abeb99 100644
--- a/net/netfilter/xt_CLASSIFY.c
+++ b/net/netfilter/xt_CLASSIFY.c
@@ -27,7 +27,7 @@ MODULE_ALIAS("ipt_CLASSIFY");
 MODULE_ALIAS("ip6t_CLASSIFY");
 
 static unsigned int
-classify_tg(struct sk_buff *skb, const struct xt_target_param *par)
+classify_tg(struct sk_buff *skb, const struct xt_action_param *par)
 {
 	const struct xt_classify_target_info *clinfo = par->targinfo;
 
diff --git a/net/netfilter/xt_CONNSECMARK.c b/net/netfilter/xt_CONNSECMARK.c
index e953e302141d..e04dc282e3bb 100644
--- a/net/netfilter/xt_CONNSECMARK.c
+++ b/net/netfilter/xt_CONNSECMARK.c
@@ -64,7 +64,7 @@ static void secmark_restore(struct sk_buff *skb)
 }
 
 static unsigned int
-connsecmark_tg(struct sk_buff *skb, const struct xt_target_param *par)
+connsecmark_tg(struct sk_buff *skb, const struct xt_action_param *par)
 {
 	const struct xt_connsecmark_target_info *info = par->targinfo;
 
diff --git a/net/netfilter/xt_CT.c b/net/netfilter/xt_CT.c
index c8f547829bad..562bf3266e04 100644
--- a/net/netfilter/xt_CT.c
+++ b/net/netfilter/xt_CT.c
@@ -20,7 +20,7 @@
 #include <net/netfilter/nf_conntrack_zones.h>
 
 static unsigned int xt_ct_target(struct sk_buff *skb,
-				 const struct xt_target_param *par)
+				 const struct xt_action_param *par)
 {
 	const struct xt_ct_target_info *info = par->targinfo;
 	struct nf_conn *ct = info->ct;
diff --git a/net/netfilter/xt_DSCP.c b/net/netfilter/xt_DSCP.c
index 969634f293e5..0a229191e55b 100644
--- a/net/netfilter/xt_DSCP.c
+++ b/net/netfilter/xt_DSCP.c
@@ -28,7 +28,7 @@ MODULE_ALIAS("ipt_TOS");
 MODULE_ALIAS("ip6t_TOS");
 
 static unsigned int
-dscp_tg(struct sk_buff *skb, const struct xt_target_param *par)
+dscp_tg(struct sk_buff *skb, const struct xt_action_param *par)
 {
 	const struct xt_DSCP_info *dinfo = par->targinfo;
 	u_int8_t dscp = ipv4_get_dsfield(ip_hdr(skb)) >> XT_DSCP_SHIFT;
@@ -45,7 +45,7 @@ dscp_tg(struct sk_buff *skb, const struct xt_target_param *par)
 }
 
 static unsigned int
-dscp_tg6(struct sk_buff *skb, const struct xt_target_param *par)
+dscp_tg6(struct sk_buff *skb, const struct xt_action_param *par)
 {
 	const struct xt_DSCP_info *dinfo = par->targinfo;
 	u_int8_t dscp = ipv6_get_dsfield(ipv6_hdr(skb)) >> XT_DSCP_SHIFT;
@@ -72,7 +72,7 @@ static int dscp_tg_check(const struct xt_tgchk_param *par)
 }
 
 static unsigned int
-tos_tg(struct sk_buff *skb, const struct xt_target_param *par)
+tos_tg(struct sk_buff *skb, const struct xt_action_param *par)
 {
 	const struct xt_tos_target_info *info = par->targinfo;
 	struct iphdr *iph = ip_hdr(skb);
@@ -92,7 +92,7 @@ tos_tg(struct sk_buff *skb, const struct xt_target_param *par)
 }
 
 static unsigned int
-tos_tg6(struct sk_buff *skb, const struct xt_target_param *par)
+tos_tg6(struct sk_buff *skb, const struct xt_action_param *par)
 {
 	const struct xt_tos_target_info *info = par->targinfo;
 	struct ipv6hdr *iph = ipv6_hdr(skb);
diff --git a/net/netfilter/xt_HL.c b/net/netfilter/xt_HL.c
index 77b99f732711..95b084800fcc 100644
--- a/net/netfilter/xt_HL.c
+++ b/net/netfilter/xt_HL.c
@@ -26,7 +26,7 @@ MODULE_DESCRIPTION("Xtables: Hoplimit/TTL Limit field modification target");
 MODULE_LICENSE("GPL");
 
 static unsigned int
-ttl_tg(struct sk_buff *skb, const struct xt_target_param *par)
+ttl_tg(struct sk_buff *skb, const struct xt_action_param *par)
 {
 	struct iphdr *iph;
 	const struct ipt_TTL_info *info = par->targinfo;
@@ -66,7 +66,7 @@ ttl_tg(struct sk_buff *skb, const struct xt_target_param *par)
 }
 
 static unsigned int
-hl_tg6(struct sk_buff *skb, const struct xt_target_param *par)
+hl_tg6(struct sk_buff *skb, const struct xt_action_param *par)
 {
 	struct ipv6hdr *ip6h;
 	const struct ip6t_HL_info *info = par->targinfo;
diff --git a/net/netfilter/xt_LED.c b/net/netfilter/xt_LED.c
index ab6f8ff9c9a7..a4140509eea1 100644
--- a/net/netfilter/xt_LED.c
+++ b/net/netfilter/xt_LED.c
@@ -49,7 +49,7 @@ struct xt_led_info_internal {
 };
 
 static unsigned int
-led_tg(struct sk_buff *skb, const struct xt_target_param *par)
+led_tg(struct sk_buff *skb, const struct xt_action_param *par)
 {
 	const struct xt_led_info *ledinfo = par->targinfo;
 	struct xt_led_info_internal *ledinternal = ledinfo->internal_data;
diff --git a/net/netfilter/xt_NFLOG.c b/net/netfilter/xt_NFLOG.c
index 42dd8747b421..a17dd0f589b2 100644
--- a/net/netfilter/xt_NFLOG.c
+++ b/net/netfilter/xt_NFLOG.c
@@ -22,7 +22,7 @@ MODULE_ALIAS("ipt_NFLOG");
 MODULE_ALIAS("ip6t_NFLOG");
 
 static unsigned int
-nflog_tg(struct sk_buff *skb, const struct xt_target_param *par)
+nflog_tg(struct sk_buff *skb, const struct xt_action_param *par)
 {
 	const struct xt_nflog_info *info = par->targinfo;
 	struct nf_loginfo li;
diff --git a/net/netfilter/xt_NFQUEUE.c b/net/netfilter/xt_NFQUEUE.c
index f9217cb56fe3..039cce1bde3d 100644
--- a/net/netfilter/xt_NFQUEUE.c
+++ b/net/netfilter/xt_NFQUEUE.c
@@ -31,7 +31,7 @@ static u32 jhash_initval __read_mostly;
 static bool rnd_inited __read_mostly;
 
 static unsigned int
-nfqueue_tg(struct sk_buff *skb, const struct xt_target_param *par)
+nfqueue_tg(struct sk_buff *skb, const struct xt_action_param *par)
 {
 	const struct xt_NFQ_info *tinfo = par->targinfo;
 
@@ -65,7 +65,7 @@ static u32 hash_v6(const struct sk_buff *skb)
 #endif
 
 static unsigned int
-nfqueue_tg_v1(struct sk_buff *skb, const struct xt_target_param *par)
+nfqueue_tg_v1(struct sk_buff *skb, const struct xt_action_param *par)
 {
 	const struct xt_NFQ_info_v1 *info = par->targinfo;
 	u32 queue = info->queuenum;
diff --git a/net/netfilter/xt_NOTRACK.c b/net/netfilter/xt_NOTRACK.c
index e7a0a54fd4ea..512b9123252f 100644
--- a/net/netfilter/xt_NOTRACK.c
+++ b/net/netfilter/xt_NOTRACK.c
@@ -13,7 +13,7 @@ MODULE_ALIAS("ipt_NOTRACK");
 MODULE_ALIAS("ip6t_NOTRACK");
 
 static unsigned int
-notrack_tg(struct sk_buff *skb, const struct xt_target_param *par)
+notrack_tg(struct sk_buff *skb, const struct xt_action_param *par)
 {
 	/* Previously seen (loopback)? Ignore. */
 	if (skb->nfct != NULL)
diff --git a/net/netfilter/xt_RATEEST.c b/net/netfilter/xt_RATEEST.c
index a02193f06e39..69c01e10f8af 100644
--- a/net/netfilter/xt_RATEEST.c
+++ b/net/netfilter/xt_RATEEST.c
@@ -73,7 +73,7 @@ void xt_rateest_put(struct xt_rateest *est)
 EXPORT_SYMBOL_GPL(xt_rateest_put);
 
 static unsigned int
-xt_rateest_tg(struct sk_buff *skb, const struct xt_target_param *par)
+xt_rateest_tg(struct sk_buff *skb, const struct xt_action_param *par)
 {
 	const struct xt_rateest_target_info *info = par->targinfo;
 	struct gnet_stats_basic_packed *stats = &info->est->bstats;
diff --git a/net/netfilter/xt_SECMARK.c b/net/netfilter/xt_SECMARK.c
index a91d4a7d5a2c..23b2d6c486b5 100644
--- a/net/netfilter/xt_SECMARK.c
+++ b/net/netfilter/xt_SECMARK.c
@@ -30,7 +30,7 @@ MODULE_ALIAS("ip6t_SECMARK");
 static u8 mode;
 
 static unsigned int
-secmark_tg(struct sk_buff *skb, const struct xt_target_param *par)
+secmark_tg(struct sk_buff *skb, const struct xt_action_param *par)
 {
 	u32 secmark = 0;
 	const struct xt_secmark_target_info *info = par->targinfo;
diff --git a/net/netfilter/xt_TCPMSS.c b/net/netfilter/xt_TCPMSS.c
index d04606459c9d..62ec021fbd50 100644
--- a/net/netfilter/xt_TCPMSS.c
+++ b/net/netfilter/xt_TCPMSS.c
@@ -172,7 +172,7 @@ static u_int32_t tcpmss_reverse_mtu(const struct sk_buff *skb,
 }
 
 static unsigned int
-tcpmss_tg4(struct sk_buff *skb, const struct xt_target_param *par)
+tcpmss_tg4(struct sk_buff *skb, const struct xt_action_param *par)
 {
 	struct iphdr *iph = ip_hdr(skb);
 	__be16 newlen;
@@ -195,7 +195,7 @@ tcpmss_tg4(struct sk_buff *skb, const struct xt_target_param *par)
 
 #if defined(CONFIG_IP6_NF_IPTABLES) || defined(CONFIG_IP6_NF_IPTABLES_MODULE)
 static unsigned int
-tcpmss_tg6(struct sk_buff *skb, const struct xt_target_param *par)
+tcpmss_tg6(struct sk_buff *skb, const struct xt_action_param *par)
 {
 	struct ipv6hdr *ipv6h = ipv6_hdr(skb);
 	u8 nexthdr;
diff --git a/net/netfilter/xt_TCPOPTSTRIP.c b/net/netfilter/xt_TCPOPTSTRIP.c
index e8b57609ddc0..9dc9ecfdd546 100644
--- a/net/netfilter/xt_TCPOPTSTRIP.c
+++ b/net/netfilter/xt_TCPOPTSTRIP.c
@@ -74,7 +74,7 @@ tcpoptstrip_mangle_packet(struct sk_buff *skb,
 }
 
 static unsigned int
-tcpoptstrip_tg4(struct sk_buff *skb, const struct xt_target_param *par)
+tcpoptstrip_tg4(struct sk_buff *skb, const struct xt_action_param *par)
 {
 	return tcpoptstrip_mangle_packet(skb, par->targinfo, ip_hdrlen(skb),
 	       sizeof(struct iphdr) + sizeof(struct tcphdr));
@@ -82,7 +82,7 @@ tcpoptstrip_tg4(struct sk_buff *skb, const struct xt_target_param *par)
 
 #if defined(CONFIG_IP6_NF_MANGLE) || defined(CONFIG_IP6_NF_MANGLE_MODULE)
 static unsigned int
-tcpoptstrip_tg6(struct sk_buff *skb, const struct xt_target_param *par)
+tcpoptstrip_tg6(struct sk_buff *skb, const struct xt_action_param *par)
 {
 	struct ipv6hdr *ipv6h = ipv6_hdr(skb);
 	int tcphoff;
diff --git a/net/netfilter/xt_TEE.c b/net/netfilter/xt_TEE.c
index 49da6c05f4e0..d7920d9f49e9 100644
--- a/net/netfilter/xt_TEE.c
+++ b/net/netfilter/xt_TEE.c
@@ -84,7 +84,7 @@ tee_tg_route4(struct sk_buff *skb, const struct xt_tee_tginfo *info)
 }
 
 static unsigned int
-tee_tg4(struct sk_buff *skb, const struct xt_target_param *par)
+tee_tg4(struct sk_buff *skb, const struct xt_action_param *par)
 {
 	const struct xt_tee_tginfo *info = par->targinfo;
 	struct iphdr *iph;
@@ -165,7 +165,7 @@ tee_tg_route6(struct sk_buff *skb, const struct xt_tee_tginfo *info)
 }
 
 static unsigned int
-tee_tg6(struct sk_buff *skb, const struct xt_target_param *par)
+tee_tg6(struct sk_buff *skb, const struct xt_action_param *par)
 {
 	const struct xt_tee_tginfo *info = par->targinfo;
 
diff --git a/net/netfilter/xt_TPROXY.c b/net/netfilter/xt_TPROXY.c
index 4f246ddc5c48..e1a0dedac258 100644
--- a/net/netfilter/xt_TPROXY.c
+++ b/net/netfilter/xt_TPROXY.c
@@ -25,7 +25,7 @@
 #include <net/netfilter/nf_tproxy_core.h>
 
 static unsigned int
-tproxy_tg(struct sk_buff *skb, const struct xt_target_param *par)
+tproxy_tg(struct sk_buff *skb, const struct xt_action_param *par)
 {
 	const struct iphdr *iph = ip_hdr(skb);
 	const struct xt_tproxy_target_info *tgi = par->targinfo;
diff --git a/net/netfilter/xt_TRACE.c b/net/netfilter/xt_TRACE.c
index fbb04b86c46b..df48967af382 100644
--- a/net/netfilter/xt_TRACE.c
+++ b/net/netfilter/xt_TRACE.c
@@ -11,7 +11,7 @@ MODULE_ALIAS("ipt_TRACE");
 MODULE_ALIAS("ip6t_TRACE");
 
 static unsigned int
-trace_tg(struct sk_buff *skb, const struct xt_target_param *par)
+trace_tg(struct sk_buff *skb, const struct xt_action_param *par)
 {
 	skb->nf_trace = 1;
 	return XT_CONTINUE;
diff --git a/net/netfilter/xt_cluster.c b/net/netfilter/xt_cluster.c
index 6c941e1c6b9e..67fc317118c0 100644
--- a/net/netfilter/xt_cluster.c
+++ b/net/netfilter/xt_cluster.c
@@ -86,7 +86,7 @@ xt_cluster_is_multicast_addr(const struct sk_buff *skb, u_int8_t family)
 }
 
 static bool
-xt_cluster_mt(const struct sk_buff *skb, const struct xt_match_param *par)
+xt_cluster_mt(const struct sk_buff *skb, const struct xt_action_param *par)
 {
 	struct sk_buff *pskb = (struct sk_buff *)skb;
 	const struct xt_cluster_match_info *info = par->matchinfo;
diff --git a/net/netfilter/xt_comment.c b/net/netfilter/xt_comment.c
index e82179832acd..1dbb3e13c059 100644
--- a/net/netfilter/xt_comment.c
+++ b/net/netfilter/xt_comment.c
@@ -16,7 +16,7 @@ MODULE_ALIAS("ipt_comment");
 MODULE_ALIAS("ip6t_comment");
 
 static bool
-comment_mt(const struct sk_buff *skb, const struct xt_match_param *par)
+comment_mt(const struct sk_buff *skb, const struct xt_action_param *par)
 {
 	/* We always match */
 	return true;
diff --git a/net/netfilter/xt_connbytes.c b/net/netfilter/xt_connbytes.c
index ff738a5f963a..5e5cf15f011f 100644
--- a/net/netfilter/xt_connbytes.c
+++ b/net/netfilter/xt_connbytes.c
@@ -18,7 +18,7 @@ MODULE_ALIAS("ipt_connbytes");
 MODULE_ALIAS("ip6t_connbytes");
 
 static bool
-connbytes_mt(const struct sk_buff *skb, const struct xt_match_param *par)
+connbytes_mt(const struct sk_buff *skb, const struct xt_action_param *par)
 {
 	const struct xt_connbytes_info *sinfo = par->matchinfo;
 	const struct nf_conn *ct;
diff --git a/net/netfilter/xt_connlimit.c b/net/netfilter/xt_connlimit.c
index 326bc1b81681..6eec1913298e 100644
--- a/net/netfilter/xt_connlimit.c
+++ b/net/netfilter/xt_connlimit.c
@@ -173,7 +173,7 @@ static int count_them(struct net *net,
 }
 
 static bool
-connlimit_mt(const struct sk_buff *skb, const struct xt_match_param *par)
+connlimit_mt(const struct sk_buff *skb, const struct xt_action_param *par)
 {
 	struct net *net = dev_net(par->in ? par->in : par->out);
 	const struct xt_connlimit_info *info = par->matchinfo;
diff --git a/net/netfilter/xt_connmark.c b/net/netfilter/xt_connmark.c
index ae1015484ae2..b43cfc7f1bb5 100644
--- a/net/netfilter/xt_connmark.c
+++ b/net/netfilter/xt_connmark.c
@@ -37,7 +37,7 @@ MODULE_ALIAS("ipt_connmark");
 MODULE_ALIAS("ip6t_connmark");
 
 static unsigned int
-connmark_tg(struct sk_buff *skb, const struct xt_target_param *par)
+connmark_tg(struct sk_buff *skb, const struct xt_action_param *par)
 {
 	const struct xt_connmark_tginfo1 *info = par->targinfo;
 	enum ip_conntrack_info ctinfo;
@@ -91,7 +91,7 @@ static void connmark_tg_destroy(const struct xt_tgdtor_param *par)
 }
 
 static bool
-connmark_mt(const struct sk_buff *skb, const struct xt_match_param *par)
+connmark_mt(const struct sk_buff *skb, const struct xt_action_param *par)
 {
 	const struct xt_connmark_mtinfo1 *info = par->matchinfo;
 	enum ip_conntrack_info ctinfo;
diff --git a/net/netfilter/xt_conntrack.c b/net/netfilter/xt_conntrack.c
index 3348706ce56d..14a96f48001d 100644
--- a/net/netfilter/xt_conntrack.c
+++ b/net/netfilter/xt_conntrack.c
@@ -113,7 +113,7 @@ ct_proto_port_check(const struct xt_conntrack_mtinfo2 *info,
 }
 
 static bool
-conntrack_mt(const struct sk_buff *skb, const struct xt_match_param *par,
+conntrack_mt(const struct sk_buff *skb, const struct xt_action_param *par,
              u16 state_mask, u16 status_mask)
 {
 	const struct xt_conntrack_mtinfo2 *info = par->matchinfo;
@@ -191,7 +191,7 @@ conntrack_mt(const struct sk_buff *skb, const struct xt_match_param *par,
 }
 
 static bool
-conntrack_mt_v1(const struct sk_buff *skb, const struct xt_match_param *par)
+conntrack_mt_v1(const struct sk_buff *skb, const struct xt_action_param *par)
 {
 	const struct xt_conntrack_mtinfo1 *info = par->matchinfo;
 
@@ -199,7 +199,7 @@ conntrack_mt_v1(const struct sk_buff *skb, const struct xt_match_param *par)
 }
 
 static bool
-conntrack_mt_v2(const struct sk_buff *skb, const struct xt_match_param *par)
+conntrack_mt_v2(const struct sk_buff *skb, const struct xt_action_param *par)
 {
 	const struct xt_conntrack_mtinfo2 *info = par->matchinfo;
 
diff --git a/net/netfilter/xt_dccp.c b/net/netfilter/xt_dccp.c
index 0d260aec487f..7a4d4e8edc10 100644
--- a/net/netfilter/xt_dccp.c
+++ b/net/netfilter/xt_dccp.c
@@ -96,7 +96,7 @@ match_option(u_int8_t option, const struct sk_buff *skb, unsigned int protoff,
 }
 
 static bool
-dccp_mt(const struct sk_buff *skb, const struct xt_match_param *par)
+dccp_mt(const struct sk_buff *skb, const struct xt_action_param *par)
 {
 	const struct xt_dccp_info *info = par->matchinfo;
 	const struct dccp_hdr *dh;
diff --git a/net/netfilter/xt_dscp.c b/net/netfilter/xt_dscp.c
index 9db51fddbdb8..2133b509d157 100644
--- a/net/netfilter/xt_dscp.c
+++ b/net/netfilter/xt_dscp.c
@@ -25,7 +25,7 @@ MODULE_ALIAS("ipt_tos");
 MODULE_ALIAS("ip6t_tos");
 
 static bool
-dscp_mt(const struct sk_buff *skb, const struct xt_match_param *par)
+dscp_mt(const struct sk_buff *skb, const struct xt_action_param *par)
 {
 	const struct xt_dscp_info *info = par->matchinfo;
 	u_int8_t dscp = ipv4_get_dsfield(ip_hdr(skb)) >> XT_DSCP_SHIFT;
@@ -34,7 +34,7 @@ dscp_mt(const struct sk_buff *skb, const struct xt_match_param *par)
 }
 
 static bool
-dscp_mt6(const struct sk_buff *skb, const struct xt_match_param *par)
+dscp_mt6(const struct sk_buff *skb, const struct xt_action_param *par)
 {
 	const struct xt_dscp_info *info = par->matchinfo;
 	u_int8_t dscp = ipv6_get_dsfield(ipv6_hdr(skb)) >> XT_DSCP_SHIFT;
@@ -54,7 +54,8 @@ static int dscp_mt_check(const struct xt_mtchk_param *par)
 	return 0;
 }
 
-static bool tos_mt(const struct sk_buff *skb, const struct xt_match_param *par)
+static bool tos_mt(const struct sk_buff *skb,
+		   const struct xt_action_param *par)
 {
 	const struct xt_tos_match_info *info = par->matchinfo;
 
diff --git a/net/netfilter/xt_esp.c b/net/netfilter/xt_esp.c
index 143bfdc8e38f..39caafff9485 100644
--- a/net/netfilter/xt_esp.c
+++ b/net/netfilter/xt_esp.c
@@ -36,7 +36,8 @@ spi_match(u_int32_t min, u_int32_t max, u_int32_t spi, bool invert)
 	return r;
 }
 
-static bool esp_mt(const struct sk_buff *skb, const struct xt_match_param *par)
+static bool esp_mt(const struct sk_buff *skb,
+		   const struct xt_action_param *par)
 {
 	const struct ip_esp_hdr *eh;
 	struct ip_esp_hdr _esp;
diff --git a/net/netfilter/xt_hashlimit.c b/net/netfilter/xt_hashlimit.c
index 0c366d387c8c..700c21e0804c 100644
--- a/net/netfilter/xt_hashlimit.c
+++ b/net/netfilter/xt_hashlimit.c
@@ -516,7 +516,7 @@ hashlimit_init_dst(const struct xt_hashlimit_htable *hinfo,
 }
 
 static bool
-hashlimit_mt(const struct sk_buff *skb, const struct xt_match_param *par)
+hashlimit_mt(const struct sk_buff *skb, const struct xt_action_param *par)
 {
 	const struct xt_hashlimit_mtinfo1 *info = par->matchinfo;
 	struct xt_hashlimit_htable *hinfo = info->hinfo;
diff --git a/net/netfilter/xt_helper.c b/net/netfilter/xt_helper.c
index b8b3e13dc71e..e941bd26a519 100644
--- a/net/netfilter/xt_helper.c
+++ b/net/netfilter/xt_helper.c
@@ -24,7 +24,7 @@ MODULE_ALIAS("ip6t_helper");
 
 
 static bool
-helper_mt(const struct sk_buff *skb, const struct xt_match_param *par)
+helper_mt(const struct sk_buff *skb, const struct xt_action_param *par)
 {
 	const struct xt_helper_info *info = par->matchinfo;
 	const struct nf_conn *ct;
diff --git a/net/netfilter/xt_hl.c b/net/netfilter/xt_hl.c
index be53f7299623..335c34a4fd1c 100644
--- a/net/netfilter/xt_hl.c
+++ b/net/netfilter/xt_hl.c
@@ -25,7 +25,8 @@ MODULE_LICENSE("GPL");
 MODULE_ALIAS("ipt_ttl");
 MODULE_ALIAS("ip6t_hl");
 
-static bool ttl_mt(const struct sk_buff *skb, const struct xt_match_param *par)
+static bool ttl_mt(const struct sk_buff *skb,
+		   const struct xt_action_param *par)
 {
 	const struct ipt_ttl_info *info = par->matchinfo;
 	const u8 ttl = ip_hdr(skb)->ttl;
@@ -44,7 +45,8 @@ static bool ttl_mt(const struct sk_buff *skb, const struct xt_match_param *par)
 	return false;
 }
 
-static bool hl_mt6(const struct sk_buff *skb, const struct xt_match_param *par)
+static bool hl_mt6(const struct sk_buff *skb,
+		   const struct xt_action_param *par)
 {
 	const struct ip6t_hl_info *info = par->matchinfo;
 	const struct ipv6hdr *ip6h = ipv6_hdr(skb);
diff --git a/net/netfilter/xt_iprange.c b/net/netfilter/xt_iprange.c
index 8471d9715bde..9578860a9217 100644
--- a/net/netfilter/xt_iprange.c
+++ b/net/netfilter/xt_iprange.c
@@ -17,7 +17,7 @@
 #include <linux/netfilter/xt_iprange.h>
 
 static bool
-iprange_mt4(const struct sk_buff *skb, const struct xt_match_param *par)
+iprange_mt4(const struct sk_buff *skb, const struct xt_action_param *par)
 {
 	const struct xt_iprange_mtinfo *info = par->matchinfo;
 	const struct iphdr *iph = ip_hdr(skb);
@@ -68,7 +68,7 @@ iprange_ipv6_sub(const struct in6_addr *a, const struct in6_addr *b)
 }
 
 static bool
-iprange_mt6(const struct sk_buff *skb, const struct xt_match_param *par)
+iprange_mt6(const struct sk_buff *skb, const struct xt_action_param *par)
 {
 	const struct xt_iprange_mtinfo *info = par->matchinfo;
 	const struct ipv6hdr *iph = ipv6_hdr(skb);
diff --git a/net/netfilter/xt_length.c b/net/netfilter/xt_length.c
index c4871ca6c86d..842149b4122c 100644
--- a/net/netfilter/xt_length.c
+++ b/net/netfilter/xt_length.c
@@ -21,7 +21,7 @@ MODULE_ALIAS("ipt_length");
 MODULE_ALIAS("ip6t_length");
 
 static bool
-length_mt(const struct sk_buff *skb, const struct xt_match_param *par)
+length_mt(const struct sk_buff *skb, const struct xt_action_param *par)
 {
 	const struct xt_length_info *info = par->matchinfo;
 	u_int16_t pktlen = ntohs(ip_hdr(skb)->tot_len);
@@ -30,7 +30,7 @@ length_mt(const struct sk_buff *skb, const struct xt_match_param *par)
 }
 
 static bool
-length_mt6(const struct sk_buff *skb, const struct xt_match_param *par)
+length_mt6(const struct sk_buff *skb, const struct xt_action_param *par)
 {
 	const struct xt_length_info *info = par->matchinfo;
 	const u_int16_t pktlen = ntohs(ipv6_hdr(skb)->payload_len) +
diff --git a/net/netfilter/xt_limit.c b/net/netfilter/xt_limit.c
index 88215dca19cb..7dcfe8602c83 100644
--- a/net/netfilter/xt_limit.c
+++ b/net/netfilter/xt_limit.c
@@ -65,7 +65,7 @@ static DEFINE_SPINLOCK(limit_lock);
 #define CREDITS_PER_JIFFY POW2_BELOW32(MAX_CPJ)
 
 static bool
-limit_mt(const struct sk_buff *skb, const struct xt_match_param *par)
+limit_mt(const struct sk_buff *skb, const struct xt_action_param *par)
 {
 	const struct xt_rateinfo *r = par->matchinfo;
 	struct xt_limit_priv *priv = r->master;
diff --git a/net/netfilter/xt_mac.c b/net/netfilter/xt_mac.c
index b971ce93773e..36c49644ce35 100644
--- a/net/netfilter/xt_mac.c
+++ b/net/netfilter/xt_mac.c
@@ -25,7 +25,8 @@ MODULE_DESCRIPTION("Xtables: MAC address match");
 MODULE_ALIAS("ipt_mac");
 MODULE_ALIAS("ip6t_mac");
 
-static bool mac_mt(const struct sk_buff *skb, const struct xt_match_param *par)
+static bool mac_mt(const struct sk_buff *skb,
+		   const struct xt_action_param *par)
 {
 	const struct xt_mac_info *info = par->matchinfo;
 	bool ret;
diff --git a/net/netfilter/xt_mark.c b/net/netfilter/xt_mark.c
index 035c468a0040..3c8347076d55 100644
--- a/net/netfilter/xt_mark.c
+++ b/net/netfilter/xt_mark.c
@@ -25,7 +25,7 @@ MODULE_ALIAS("ipt_MARK");
 MODULE_ALIAS("ip6t_MARK");
 
 static unsigned int
-mark_tg(struct sk_buff *skb, const struct xt_target_param *par)
+mark_tg(struct sk_buff *skb, const struct xt_action_param *par)
 {
 	const struct xt_mark_tginfo2 *info = par->targinfo;
 
@@ -34,7 +34,7 @@ mark_tg(struct sk_buff *skb, const struct xt_target_param *par)
 }
 
 static bool
-mark_mt(const struct sk_buff *skb, const struct xt_match_param *par)
+mark_mt(const struct sk_buff *skb, const struct xt_action_param *par)
 {
 	const struct xt_mark_mtinfo1 *info = par->matchinfo;
 
diff --git a/net/netfilter/xt_multiport.c b/net/netfilter/xt_multiport.c
index b21f90432247..3140fd4049fc 100644
--- a/net/netfilter/xt_multiport.c
+++ b/net/netfilter/xt_multiport.c
@@ -72,7 +72,7 @@ ports_match_v1(const struct xt_multiport_v1 *minfo,
 }
 
 static bool
-multiport_mt(const struct sk_buff *skb, const struct xt_match_param *par)
+multiport_mt(const struct sk_buff *skb, const struct xt_action_param *par)
 {
 	const __be16 *pptr;
 	__be16 _ports[2];
diff --git a/net/netfilter/xt_osf.c b/net/netfilter/xt_osf.c
index 8dcde13a0781..37aa55860a96 100644
--- a/net/netfilter/xt_osf.c
+++ b/net/netfilter/xt_osf.c
@@ -193,8 +193,8 @@ static inline int xt_osf_ttl(const struct sk_buff *skb, const struct xt_osf_info
 	return ip->ttl == f_ttl;
 }
 
-static bool xt_osf_match_packet(const struct sk_buff *skb,
-		const struct xt_match_param *p)
+static bool
+xt_osf_match_packet(const struct sk_buff *skb, const struct xt_action_param *p)
 {
 	const struct xt_osf_info *info = p->matchinfo;
 	const struct iphdr *ip = ip_hdr(skb);
diff --git a/net/netfilter/xt_owner.c b/net/netfilter/xt_owner.c
index d24c76dffee2..3dd1391d385a 100644
--- a/net/netfilter/xt_owner.c
+++ b/net/netfilter/xt_owner.c
@@ -18,7 +18,7 @@
 #include <linux/netfilter/xt_owner.h>
 
 static bool
-owner_mt(const struct sk_buff *skb, const struct xt_match_param *par)
+owner_mt(const struct sk_buff *skb, const struct xt_action_param *par)
 {
 	const struct xt_owner_match_info *info = par->matchinfo;
 	const struct file *filp;
diff --git a/net/netfilter/xt_physdev.c b/net/netfilter/xt_physdev.c
index d0bdf3dd4d25..298cd290b06a 100644
--- a/net/netfilter/xt_physdev.c
+++ b/net/netfilter/xt_physdev.c
@@ -22,7 +22,7 @@ MODULE_ALIAS("ip6t_physdev");
 
 
 static bool
-physdev_mt(const struct sk_buff *skb, const struct xt_match_param *par)
+physdev_mt(const struct sk_buff *skb, const struct xt_action_param *par)
 {
 	static const char nulldevname[IFNAMSIZ] __attribute__((aligned(sizeof(long))));
 	const struct xt_physdev_info *info = par->matchinfo;
diff --git a/net/netfilter/xt_pkttype.c b/net/netfilter/xt_pkttype.c
index 69da1d3a1d85..d95f2149df93 100644
--- a/net/netfilter/xt_pkttype.c
+++ b/net/netfilter/xt_pkttype.c
@@ -23,7 +23,7 @@ MODULE_ALIAS("ipt_pkttype");
 MODULE_ALIAS("ip6t_pkttype");
 
 static bool
-pkttype_mt(const struct sk_buff *skb, const struct xt_match_param *par)
+pkttype_mt(const struct sk_buff *skb, const struct xt_action_param *par)
 {
 	const struct xt_pkttype_info *info = par->matchinfo;
 	u_int8_t type;
diff --git a/net/netfilter/xt_policy.c b/net/netfilter/xt_policy.c
index 1fa239c1fb93..1abfc7ad4277 100644
--- a/net/netfilter/xt_policy.c
+++ b/net/netfilter/xt_policy.c
@@ -110,7 +110,7 @@ match_policy_out(const struct sk_buff *skb, const struct xt_policy_info *info,
 }
 
 static bool
-policy_mt(const struct sk_buff *skb, const struct xt_match_param *par)
+policy_mt(const struct sk_buff *skb, const struct xt_action_param *par)
 {
 	const struct xt_policy_info *info = par->matchinfo;
 	int ret;
diff --git a/net/netfilter/xt_quota.c b/net/netfilter/xt_quota.c
index 7c95d69f6f06..e79e07c75da8 100644
--- a/net/netfilter/xt_quota.c
+++ b/net/netfilter/xt_quota.c
@@ -23,7 +23,7 @@ MODULE_ALIAS("ip6t_quota");
 static DEFINE_SPINLOCK(quota_lock);
 
 static bool
-quota_mt(const struct sk_buff *skb, const struct xt_match_param *par)
+quota_mt(const struct sk_buff *skb, const struct xt_action_param *par)
 {
 	struct xt_quota_info *q = (void *)par->matchinfo;
 	struct xt_quota_priv *priv = q->master;
diff --git a/net/netfilter/xt_rateest.c b/net/netfilter/xt_rateest.c
index 23805f8a444b..53f7a4d12e1e 100644
--- a/net/netfilter/xt_rateest.c
+++ b/net/netfilter/xt_rateest.c
@@ -15,7 +15,7 @@
 
 
 static bool
-xt_rateest_mt(const struct sk_buff *skb, const struct xt_match_param *par)
+xt_rateest_mt(const struct sk_buff *skb, const struct xt_action_param *par)
 {
 	const struct xt_rateest_match_info *info = par->matchinfo;
 	struct gnet_stats_rate_est *r;
diff --git a/net/netfilter/xt_realm.c b/net/netfilter/xt_realm.c
index 484d1689bfde..b063c783901a 100644
--- a/net/netfilter/xt_realm.c
+++ b/net/netfilter/xt_realm.c
@@ -22,7 +22,7 @@ MODULE_DESCRIPTION("Xtables: Routing realm match");
 MODULE_ALIAS("ipt_realm");
 
 static bool
-realm_mt(const struct sk_buff *skb, const struct xt_match_param *par)
+realm_mt(const struct sk_buff *skb, const struct xt_action_param *par)
 {
 	const struct xt_realm_info *info = par->matchinfo;
 	const struct dst_entry *dst = skb_dst(skb);
diff --git a/net/netfilter/xt_recent.c b/net/netfilter/xt_recent.c
index b88d63b9c76a..503b7f199f14 100644
--- a/net/netfilter/xt_recent.c
+++ b/net/netfilter/xt_recent.c
@@ -224,7 +224,7 @@ static void recent_table_flush(struct recent_table *t)
 }
 
 static bool
-recent_mt(const struct sk_buff *skb, const struct xt_match_param *par)
+recent_mt(const struct sk_buff *skb, const struct xt_action_param *par)
 {
 	struct net *net = dev_net(par->in ? par->in : par->out);
 	struct recent_net *recent_net = recent_pernet(net);
diff --git a/net/netfilter/xt_sctp.c b/net/netfilter/xt_sctp.c
index c3694df54672..da4c3cb31403 100644
--- a/net/netfilter/xt_sctp.c
+++ b/net/netfilter/xt_sctp.c
@@ -114,7 +114,7 @@ match_packet(const struct sk_buff *skb,
 }
 
 static bool
-sctp_mt(const struct sk_buff *skb, const struct xt_match_param *par)
+sctp_mt(const struct sk_buff *skb, const struct xt_action_param *par)
 {
 	const struct xt_sctp_info *info = par->matchinfo;
 	const sctp_sctphdr_t *sh;
diff --git a/net/netfilter/xt_socket.c b/net/netfilter/xt_socket.c
index a9b16867e1f7..2665e32d5db1 100644
--- a/net/netfilter/xt_socket.c
+++ b/net/netfilter/xt_socket.c
@@ -88,7 +88,7 @@ extract_icmp_fields(const struct sk_buff *skb,
 
 
 static bool
-socket_match(const struct sk_buff *skb, const struct xt_match_param *par,
+socket_match(const struct sk_buff *skb, const struct xt_action_param *par,
 	     const struct xt_socket_mtinfo1 *info)
 {
 	const struct iphdr *iph = ip_hdr(skb);
@@ -174,13 +174,13 @@ socket_match(const struct sk_buff *skb, const struct xt_match_param *par,
 }
 
 static bool
-socket_mt_v0(const struct sk_buff *skb, const struct xt_match_param *par)
+socket_mt_v0(const struct sk_buff *skb, const struct xt_action_param *par)
 {
 	return socket_match(skb, par, NULL);
 }
 
 static bool
-socket_mt_v1(const struct sk_buff *skb, const struct xt_match_param *par)
+socket_mt_v1(const struct sk_buff *skb, const struct xt_action_param *par)
 {
 	return socket_match(skb, par, par->matchinfo);
 }
diff --git a/net/netfilter/xt_state.c b/net/netfilter/xt_state.c
index bb1271852d50..344cca661099 100644
--- a/net/netfilter/xt_state.c
+++ b/net/netfilter/xt_state.c
@@ -21,7 +21,7 @@ MODULE_ALIAS("ipt_state");
 MODULE_ALIAS("ip6t_state");
 
 static bool
-state_mt(const struct sk_buff *skb, const struct xt_match_param *par)
+state_mt(const struct sk_buff *skb, const struct xt_action_param *par)
 {
 	const struct xt_state_info *sinfo = par->matchinfo;
 	enum ip_conntrack_info ctinfo;
diff --git a/net/netfilter/xt_statistic.c b/net/netfilter/xt_statistic.c
index 5aeca1d023d8..ee4540a2cf33 100644
--- a/net/netfilter/xt_statistic.c
+++ b/net/netfilter/xt_statistic.c
@@ -30,7 +30,7 @@ MODULE_ALIAS("ip6t_statistic");
 static DEFINE_SPINLOCK(nth_lock);
 
 static bool
-statistic_mt(const struct sk_buff *skb, const struct xt_match_param *par)
+statistic_mt(const struct sk_buff *skb, const struct xt_action_param *par)
 {
 	const struct xt_statistic_info *info = par->matchinfo;
 	bool ret = info->flags & XT_STATISTIC_INVERT;
diff --git a/net/netfilter/xt_string.c b/net/netfilter/xt_string.c
index f6d5112175e6..fd5dc5016a17 100644
--- a/net/netfilter/xt_string.c
+++ b/net/netfilter/xt_string.c
@@ -23,7 +23,7 @@ MODULE_ALIAS("ipt_string");
 MODULE_ALIAS("ip6t_string");
 
 static bool
-string_mt(const struct sk_buff *skb, const struct xt_match_param *par)
+string_mt(const struct sk_buff *skb, const struct xt_action_param *par)
 {
 	const struct xt_string_info *conf = par->matchinfo;
 	struct ts_state state;
diff --git a/net/netfilter/xt_tcpmss.c b/net/netfilter/xt_tcpmss.c
index 4809b34b10f8..5c8a7b435d66 100644
--- a/net/netfilter/xt_tcpmss.c
+++ b/net/netfilter/xt_tcpmss.c
@@ -25,7 +25,7 @@ MODULE_ALIAS("ipt_tcpmss");
 MODULE_ALIAS("ip6t_tcpmss");
 
 static bool
-tcpmss_mt(const struct sk_buff *skb, const struct xt_match_param *par)
+tcpmss_mt(const struct sk_buff *skb, const struct xt_action_param *par)
 {
 	const struct xt_tcpmss_match_info *info = par->matchinfo;
 	const struct tcphdr *th;
diff --git a/net/netfilter/xt_tcpudp.c b/net/netfilter/xt_tcpudp.c
index efa2ede24ae6..19c31d4c2ba6 100644
--- a/net/netfilter/xt_tcpudp.c
+++ b/net/netfilter/xt_tcpudp.c
@@ -62,7 +62,8 @@ tcp_find_option(u_int8_t option,
 	return invert;
 }
 
-static bool tcp_mt(const struct sk_buff *skb, const struct xt_match_param *par)
+static bool tcp_mt(const struct sk_buff *skb,
+		   const struct xt_action_param *par)
 {
 	const struct tcphdr *th;
 	struct tcphdr _tcph;
@@ -128,7 +129,8 @@ static int tcp_mt_check(const struct xt_mtchk_param *par)
 	return (tcpinfo->invflags & ~XT_TCP_INV_MASK) ? -EINVAL : 0;
 }
 
-static bool udp_mt(const struct sk_buff *skb, const struct xt_match_param *par)
+static bool udp_mt(const struct sk_buff *skb,
+		   const struct xt_action_param *par)
 {
 	const struct udphdr *uh;
 	struct udphdr _udph;
diff --git a/net/netfilter/xt_time.c b/net/netfilter/xt_time.c
index d8556fdda440..ffdb8fac0be1 100644
--- a/net/netfilter/xt_time.c
+++ b/net/netfilter/xt_time.c
@@ -152,7 +152,7 @@ static void localtime_3(struct xtm *r, time_t time)
 }
 
 static bool
-time_mt(const struct sk_buff *skb, const struct xt_match_param *par)
+time_mt(const struct sk_buff *skb, const struct xt_action_param *par)
 {
 	const struct xt_time_info *info = par->matchinfo;
 	unsigned int packet_time;
diff --git a/net/netfilter/xt_u32.c b/net/netfilter/xt_u32.c
index d7c05f03a7e7..f62797b1ddb5 100644
--- a/net/netfilter/xt_u32.c
+++ b/net/netfilter/xt_u32.c
@@ -86,7 +86,8 @@ static bool u32_match_it(const struct xt_u32 *data,
 	return true;
 }
 
-static bool u32_mt(const struct sk_buff *skb, const struct xt_match_param *par)
+static bool u32_mt(const struct sk_buff *skb,
+		   const struct xt_action_param *par)
 {
 	const struct xt_u32 *data = par->matchinfo;
 	bool ret;
diff --git a/net/sched/act_ipt.c b/net/sched/act_ipt.c
index 03f80a0fa167..1f9595467c17 100644
--- a/net/sched/act_ipt.c
+++ b/net/sched/act_ipt.c
@@ -199,7 +199,7 @@ static int tcf_ipt(struct sk_buff *skb, struct tc_action *a,
 {
 	int ret = 0, result = 0;
 	struct tcf_ipt *ipt = a->priv;
-	struct xt_target_param par;
+	struct xt_action_param par;
 
 	if (skb_cloned(skb)) {
 		if (pskb_expand_head(skb, 0, 0, GFP_ATOMIC))
-- 
cgit v1.2.2


From 62fc8051083a334578c3f4b3488808f210b4565f Mon Sep 17 00:00:00 2001
From: Jan Engelhardt <jengelh@medozas.de>
Date: Tue, 7 Jul 2009 20:42:08 +0200
Subject: netfilter: xtables: deconstify struct xt_action_param for matches

In future, layer-3 matches will be an xt module of their own, and
need to set the fragoff and thoff fields. Adding more pointers would
needlessy increase memory requirements (esp. so for 64-bit, where
pointers are wider).

Signed-off-by: Jan Engelhardt <jengelh@medozas.de>
---
 net/bridge/netfilter/ebt_802_3.c     | 2 +-
 net/bridge/netfilter/ebt_among.c     | 2 +-
 net/bridge/netfilter/ebt_arp.c       | 2 +-
 net/bridge/netfilter/ebt_ip.c        | 2 +-
 net/bridge/netfilter/ebt_ip6.c       | 2 +-
 net/bridge/netfilter/ebt_limit.c     | 2 +-
 net/bridge/netfilter/ebt_mark_m.c    | 2 +-
 net/bridge/netfilter/ebt_pkttype.c   | 2 +-
 net/bridge/netfilter/ebt_stp.c       | 2 +-
 net/bridge/netfilter/ebt_vlan.c      | 2 +-
 net/ipv4/netfilter/ip_tables.c       | 2 +-
 net/ipv4/netfilter/ipt_addrtype.c    | 4 ++--
 net/ipv4/netfilter/ipt_ah.c          | 2 +-
 net/ipv4/netfilter/ipt_ecn.c         | 3 +--
 net/ipv6/netfilter/ip6_tables.c      | 2 +-
 net/ipv6/netfilter/ip6t_ah.c         | 3 +--
 net/ipv6/netfilter/ip6t_eui64.c      | 2 +-
 net/ipv6/netfilter/ip6t_frag.c       | 2 +-
 net/ipv6/netfilter/ip6t_hbh.c        | 2 +-
 net/ipv6/netfilter/ip6t_ipv6header.c | 2 +-
 net/ipv6/netfilter/ip6t_mh.c         | 3 +--
 net/ipv6/netfilter/ip6t_rt.c         | 3 +--
 net/netfilter/xt_cluster.c           | 2 +-
 net/netfilter/xt_comment.c           | 2 +-
 net/netfilter/xt_connbytes.c         | 2 +-
 net/netfilter/xt_connlimit.c         | 2 +-
 net/netfilter/xt_connmark.c          | 2 +-
 net/netfilter/xt_conntrack.c         | 6 +++---
 net/netfilter/xt_dccp.c              | 2 +-
 net/netfilter/xt_dscp.c              | 7 +++----
 net/netfilter/xt_esp.c               | 3 +--
 net/netfilter/xt_hashlimit.c         | 2 +-
 net/netfilter/xt_helper.c            | 2 +-
 net/netfilter/xt_hl.c                | 6 ++----
 net/netfilter/xt_iprange.c           | 4 ++--
 net/netfilter/xt_length.c            | 4 ++--
 net/netfilter/xt_limit.c             | 2 +-
 net/netfilter/xt_mac.c               | 3 +--
 net/netfilter/xt_mark.c              | 2 +-
 net/netfilter/xt_multiport.c         | 2 +-
 net/netfilter/xt_osf.c               | 2 +-
 net/netfilter/xt_owner.c             | 2 +-
 net/netfilter/xt_physdev.c           | 2 +-
 net/netfilter/xt_pkttype.c           | 2 +-
 net/netfilter/xt_policy.c            | 2 +-
 net/netfilter/xt_quota.c             | 2 +-
 net/netfilter/xt_rateest.c           | 2 +-
 net/netfilter/xt_realm.c             | 2 +-
 net/netfilter/xt_recent.c            | 2 +-
 net/netfilter/xt_sctp.c              | 2 +-
 net/netfilter/xt_socket.c            | 6 +++---
 net/netfilter/xt_state.c             | 2 +-
 net/netfilter/xt_statistic.c         | 2 +-
 net/netfilter/xt_string.c            | 2 +-
 net/netfilter/xt_tcpmss.c            | 2 +-
 net/netfilter/xt_tcpudp.c            | 6 ++----
 net/netfilter/xt_time.c              | 2 +-
 net/netfilter/xt_u32.c               | 3 +--
 58 files changed, 69 insertions(+), 81 deletions(-)

(limited to 'net')

diff --git a/net/bridge/netfilter/ebt_802_3.c b/net/bridge/netfilter/ebt_802_3.c
index 4b0df00c82ec..2a449b7ab8fa 100644
--- a/net/bridge/netfilter/ebt_802_3.c
+++ b/net/bridge/netfilter/ebt_802_3.c
@@ -13,7 +13,7 @@
 #include <linux/netfilter_bridge/ebt_802_3.h>
 
 static bool
-ebt_802_3_mt(const struct sk_buff *skb, const struct xt_action_param *par)
+ebt_802_3_mt(const struct sk_buff *skb, struct xt_action_param *par)
 {
 	const struct ebt_802_3_info *info = par->matchinfo;
 	const struct ebt_802_3_hdr *hdr = ebt_802_3_hdr(skb);
diff --git a/net/bridge/netfilter/ebt_among.c b/net/bridge/netfilter/ebt_among.c
index c04f9461f734..8b84c581be30 100644
--- a/net/bridge/netfilter/ebt_among.c
+++ b/net/bridge/netfilter/ebt_among.c
@@ -129,7 +129,7 @@ static int get_ip_src(const struct sk_buff *skb, __be32 *addr)
 }
 
 static bool
-ebt_among_mt(const struct sk_buff *skb, const struct xt_action_param *par)
+ebt_among_mt(const struct sk_buff *skb, struct xt_action_param *par)
 {
 	const struct ebt_among_info *info = par->matchinfo;
 	const char *dmac, *smac;
diff --git a/net/bridge/netfilter/ebt_arp.c b/net/bridge/netfilter/ebt_arp.c
index 6203f4dea2e2..cd457b891b27 100644
--- a/net/bridge/netfilter/ebt_arp.c
+++ b/net/bridge/netfilter/ebt_arp.c
@@ -16,7 +16,7 @@
 #include <linux/netfilter_bridge/ebt_arp.h>
 
 static bool
-ebt_arp_mt(const struct sk_buff *skb, const struct xt_action_param *par)
+ebt_arp_mt(const struct sk_buff *skb, struct xt_action_param *par)
 {
 	const struct ebt_arp_info *info = par->matchinfo;
 	const struct arphdr *ah;
diff --git a/net/bridge/netfilter/ebt_ip.c b/net/bridge/netfilter/ebt_ip.c
index a0cde7442b55..23bca62d58d2 100644
--- a/net/bridge/netfilter/ebt_ip.c
+++ b/net/bridge/netfilter/ebt_ip.c
@@ -25,7 +25,7 @@ struct tcpudphdr {
 };
 
 static bool
-ebt_ip_mt(const struct sk_buff *skb, const struct xt_action_param *par)
+ebt_ip_mt(const struct sk_buff *skb, struct xt_action_param *par)
 {
 	const struct ebt_ip_info *info = par->matchinfo;
 	const struct iphdr *ih;
diff --git a/net/bridge/netfilter/ebt_ip6.c b/net/bridge/netfilter/ebt_ip6.c
index c451dc2ff822..50a46afc2bcc 100644
--- a/net/bridge/netfilter/ebt_ip6.c
+++ b/net/bridge/netfilter/ebt_ip6.c
@@ -28,7 +28,7 @@ struct tcpudphdr {
 };
 
 static bool
-ebt_ip6_mt(const struct sk_buff *skb, const struct xt_action_param *par)
+ebt_ip6_mt(const struct sk_buff *skb, struct xt_action_param *par)
 {
 	const struct ebt_ip6_info *info = par->matchinfo;
 	const struct ipv6hdr *ih6;
diff --git a/net/bridge/netfilter/ebt_limit.c b/net/bridge/netfilter/ebt_limit.c
index 760923f08067..517e78befcb2 100644
--- a/net/bridge/netfilter/ebt_limit.c
+++ b/net/bridge/netfilter/ebt_limit.c
@@ -32,7 +32,7 @@ static DEFINE_SPINLOCK(limit_lock);
 #define CREDITS_PER_JIFFY POW2_BELOW32(MAX_CPJ)
 
 static bool
-ebt_limit_mt(const struct sk_buff *skb, const struct xt_action_param *par)
+ebt_limit_mt(const struct sk_buff *skb, struct xt_action_param *par)
 {
 	struct ebt_limit_info *info = (void *)par->matchinfo;
 	unsigned long now = jiffies;
diff --git a/net/bridge/netfilter/ebt_mark_m.c b/net/bridge/netfilter/ebt_mark_m.c
index de901634fa31..d98baefc4c7e 100644
--- a/net/bridge/netfilter/ebt_mark_m.c
+++ b/net/bridge/netfilter/ebt_mark_m.c
@@ -13,7 +13,7 @@
 #include <linux/netfilter_bridge/ebt_mark_m.h>
 
 static bool
-ebt_mark_mt(const struct sk_buff *skb, const struct xt_action_param *par)
+ebt_mark_mt(const struct sk_buff *skb, struct xt_action_param *par)
 {
 	const struct ebt_mark_m_info *info = par->matchinfo;
 
diff --git a/net/bridge/netfilter/ebt_pkttype.c b/net/bridge/netfilter/ebt_pkttype.c
index 7ba67c4b677a..496a56515307 100644
--- a/net/bridge/netfilter/ebt_pkttype.c
+++ b/net/bridge/netfilter/ebt_pkttype.c
@@ -13,7 +13,7 @@
 #include <linux/netfilter_bridge/ebt_pkttype.h>
 
 static bool
-ebt_pkttype_mt(const struct sk_buff *skb, const struct xt_action_param *par)
+ebt_pkttype_mt(const struct sk_buff *skb, struct xt_action_param *par)
 {
 	const struct ebt_pkttype_info *info = par->matchinfo;
 
diff --git a/net/bridge/netfilter/ebt_stp.c b/net/bridge/netfilter/ebt_stp.c
index 3cd6070a1137..5b33a2e634a6 100644
--- a/net/bridge/netfilter/ebt_stp.c
+++ b/net/bridge/netfilter/ebt_stp.c
@@ -120,7 +120,7 @@ static bool ebt_filter_config(const struct ebt_stp_info *info,
 }
 
 static bool
-ebt_stp_mt(const struct sk_buff *skb, const struct xt_action_param *par)
+ebt_stp_mt(const struct sk_buff *skb, struct xt_action_param *par)
 {
 	const struct ebt_stp_info *info = par->matchinfo;
 	const struct stp_header *sp;
diff --git a/net/bridge/netfilter/ebt_vlan.c b/net/bridge/netfilter/ebt_vlan.c
index e4ab62533c74..87b53b3a921d 100644
--- a/net/bridge/netfilter/ebt_vlan.c
+++ b/net/bridge/netfilter/ebt_vlan.c
@@ -36,7 +36,7 @@ MODULE_LICENSE("GPL");
 #define EXIT_ON_MISMATCH(_MATCH_,_MASK_) {if (!((info->_MATCH_ == _MATCH_)^!!(info->invflags & _MASK_))) return false; }
 
 static bool
-ebt_vlan_mt(const struct sk_buff *skb, const struct xt_action_param *par)
+ebt_vlan_mt(const struct sk_buff *skb, struct xt_action_param *par)
 {
 	const struct ebt_vlan_info *info = par->matchinfo;
 	const struct vlan_hdr *fp;
diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c
index 3ab1b81e799b..4e674f2824a7 100644
--- a/net/ipv4/netfilter/ip_tables.c
+++ b/net/ipv4/netfilter/ip_tables.c
@@ -2138,7 +2138,7 @@ icmp_type_code_match(u_int8_t test_type, u_int8_t min_code, u_int8_t max_code,
 }
 
 static bool
-icmp_match(const struct sk_buff *skb, const struct xt_action_param *par)
+icmp_match(const struct sk_buff *skb, struct xt_action_param *par)
 {
 	const struct icmphdr *ic;
 	struct icmphdr _icmph;
diff --git a/net/ipv4/netfilter/ipt_addrtype.c b/net/ipv4/netfilter/ipt_addrtype.c
index 24ec548515e4..db8bff0fb86d 100644
--- a/net/ipv4/netfilter/ipt_addrtype.c
+++ b/net/ipv4/netfilter/ipt_addrtype.c
@@ -30,7 +30,7 @@ static inline bool match_type(struct net *net, const struct net_device *dev,
 }
 
 static bool
-addrtype_mt_v0(const struct sk_buff *skb, const struct xt_action_param *par)
+addrtype_mt_v0(const struct sk_buff *skb, struct xt_action_param *par)
 {
 	struct net *net = dev_net(par->in ? par->in : par->out);
 	const struct ipt_addrtype_info *info = par->matchinfo;
@@ -48,7 +48,7 @@ addrtype_mt_v0(const struct sk_buff *skb, const struct xt_action_param *par)
 }
 
 static bool
-addrtype_mt_v1(const struct sk_buff *skb, const struct xt_action_param *par)
+addrtype_mt_v1(const struct sk_buff *skb, struct xt_action_param *par)
 {
 	struct net *net = dev_net(par->in ? par->in : par->out);
 	const struct ipt_addrtype_info_v1 *info = par->matchinfo;
diff --git a/net/ipv4/netfilter/ipt_ah.c b/net/ipv4/netfilter/ipt_ah.c
index 48a8293bc1d1..c9d83dc2d6fa 100644
--- a/net/ipv4/netfilter/ipt_ah.c
+++ b/net/ipv4/netfilter/ipt_ah.c
@@ -30,7 +30,7 @@ spi_match(u_int32_t min, u_int32_t max, u_int32_t spi, bool invert)
 	return r;
 }
 
-static bool ah_mt(const struct sk_buff *skb, const struct xt_action_param *par)
+static bool ah_mt(const struct sk_buff *skb, struct xt_action_param *par)
 {
 	struct ip_auth_hdr _ahdr;
 	const struct ip_auth_hdr *ah;
diff --git a/net/ipv4/netfilter/ipt_ecn.c b/net/ipv4/netfilter/ipt_ecn.c
index 744d13ee296e..b79dddc9edd6 100644
--- a/net/ipv4/netfilter/ipt_ecn.c
+++ b/net/ipv4/netfilter/ipt_ecn.c
@@ -67,8 +67,7 @@ static inline bool match_tcp(const struct sk_buff *skb,
 	return true;
 }
 
-static bool ecn_mt(const struct sk_buff *skb,
-		   const struct xt_action_param *par)
+static bool ecn_mt(const struct sk_buff *skb, struct xt_action_param *par)
 {
 	const struct ipt_ecn_info *info = par->matchinfo;
 
diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c
index c3bc999a8bb6..4549f8d6f88f 100644
--- a/net/ipv6/netfilter/ip6_tables.c
+++ b/net/ipv6/netfilter/ip6_tables.c
@@ -2154,7 +2154,7 @@ icmp6_type_code_match(u_int8_t test_type, u_int8_t min_code, u_int8_t max_code,
 }
 
 static bool
-icmp6_match(const struct sk_buff *skb, const struct xt_action_param *par)
+icmp6_match(const struct sk_buff *skb, struct xt_action_param *par)
 {
 	const struct icmp6hdr *ic;
 	struct icmp6hdr _icmph;
diff --git a/net/ipv6/netfilter/ip6t_ah.c b/net/ipv6/netfilter/ip6t_ah.c
index 4fe71898381d..c89887f35a46 100644
--- a/net/ipv6/netfilter/ip6t_ah.c
+++ b/net/ipv6/netfilter/ip6t_ah.c
@@ -36,8 +36,7 @@ spi_match(u_int32_t min, u_int32_t max, u_int32_t spi, bool invert)
 	return r;
 }
 
-static bool ah_mt6(const struct sk_buff *skb,
-		   const struct xt_action_param *par)
+static bool ah_mt6(const struct sk_buff *skb, struct xt_action_param *par)
 {
 	struct ip_auth_hdr _ah;
 	const struct ip_auth_hdr *ah;
diff --git a/net/ipv6/netfilter/ip6t_eui64.c b/net/ipv6/netfilter/ip6t_eui64.c
index 2fd2be1795e0..f32fce34145a 100644
--- a/net/ipv6/netfilter/ip6t_eui64.c
+++ b/net/ipv6/netfilter/ip6t_eui64.c
@@ -20,7 +20,7 @@ MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Andras Kis-Szabo <kisza@sch.bme.hu>");
 
 static bool
-eui64_mt6(const struct sk_buff *skb, const struct xt_action_param *par)
+eui64_mt6(const struct sk_buff *skb, struct xt_action_param *par)
 {
 	unsigned char eui64[8];
 
diff --git a/net/ipv6/netfilter/ip6t_frag.c b/net/ipv6/netfilter/ip6t_frag.c
index 8401aa82ea0b..fcc8c72f218e 100644
--- a/net/ipv6/netfilter/ip6t_frag.c
+++ b/net/ipv6/netfilter/ip6t_frag.c
@@ -35,7 +35,7 @@ id_match(u_int32_t min, u_int32_t max, u_int32_t id, bool invert)
 }
 
 static bool
-frag_mt6(const struct sk_buff *skb, const struct xt_action_param *par)
+frag_mt6(const struct sk_buff *skb, struct xt_action_param *par)
 {
 	struct frag_hdr _frag;
 	const struct frag_hdr *fh;
diff --git a/net/ipv6/netfilter/ip6t_hbh.c b/net/ipv6/netfilter/ip6t_hbh.c
index d19d5cf47a38..f8aebc098d71 100644
--- a/net/ipv6/netfilter/ip6t_hbh.c
+++ b/net/ipv6/netfilter/ip6t_hbh.c
@@ -44,7 +44,7 @@ MODULE_ALIAS("ip6t_dst");
 static struct xt_match hbh_mt6_reg[] __read_mostly;
 
 static bool
-hbh_mt6(const struct sk_buff *skb, const struct xt_action_param *par)
+hbh_mt6(const struct sk_buff *skb, struct xt_action_param *par)
 {
 	struct ipv6_opt_hdr _optsh;
 	const struct ipv6_opt_hdr *oh;
diff --git a/net/ipv6/netfilter/ip6t_ipv6header.c b/net/ipv6/netfilter/ip6t_ipv6header.c
index 8e88bb8311de..54bd9790603f 100644
--- a/net/ipv6/netfilter/ip6t_ipv6header.c
+++ b/net/ipv6/netfilter/ip6t_ipv6header.c
@@ -27,7 +27,7 @@ MODULE_DESCRIPTION("Xtables: IPv6 header types match");
 MODULE_AUTHOR("Andras Kis-Szabo <kisza@sch.bme.hu>");
 
 static bool
-ipv6header_mt6(const struct sk_buff *skb, const struct xt_action_param *par)
+ipv6header_mt6(const struct sk_buff *skb, struct xt_action_param *par)
 {
 	const struct ip6t_ipv6header_info *info = par->matchinfo;
 	unsigned int temp;
diff --git a/net/ipv6/netfilter/ip6t_mh.c b/net/ipv6/netfilter/ip6t_mh.c
index 4a60788873fd..eb1c3d65271a 100644
--- a/net/ipv6/netfilter/ip6t_mh.c
+++ b/net/ipv6/netfilter/ip6t_mh.c
@@ -32,8 +32,7 @@ type_match(u_int8_t min, u_int8_t max, u_int8_t type, bool invert)
 	return (type >= min && type <= max) ^ invert;
 }
 
-static bool mh_mt6(const struct sk_buff *skb,
-		   const struct xt_action_param *par)
+static bool mh_mt6(const struct sk_buff *skb, struct xt_action_param *par)
 {
 	struct ip6_mh _mh;
 	const struct ip6_mh *mh;
diff --git a/net/ipv6/netfilter/ip6t_rt.c b/net/ipv6/netfilter/ip6t_rt.c
index 793c27200881..ee584693ee35 100644
--- a/net/ipv6/netfilter/ip6t_rt.c
+++ b/net/ipv6/netfilter/ip6t_rt.c
@@ -36,8 +36,7 @@ segsleft_match(u_int32_t min, u_int32_t max, u_int32_t id, bool invert)
 	return r;
 }
 
-static bool rt_mt6(const struct sk_buff *skb,
-		   const struct xt_action_param *par)
+static bool rt_mt6(const struct sk_buff *skb, struct xt_action_param *par)
 {
 	struct ipv6_rt_hdr _route;
 	const struct ipv6_rt_hdr *rh;
diff --git a/net/netfilter/xt_cluster.c b/net/netfilter/xt_cluster.c
index 67fc317118c0..30b95a1c1c89 100644
--- a/net/netfilter/xt_cluster.c
+++ b/net/netfilter/xt_cluster.c
@@ -86,7 +86,7 @@ xt_cluster_is_multicast_addr(const struct sk_buff *skb, u_int8_t family)
 }
 
 static bool
-xt_cluster_mt(const struct sk_buff *skb, const struct xt_action_param *par)
+xt_cluster_mt(const struct sk_buff *skb, struct xt_action_param *par)
 {
 	struct sk_buff *pskb = (struct sk_buff *)skb;
 	const struct xt_cluster_match_info *info = par->matchinfo;
diff --git a/net/netfilter/xt_comment.c b/net/netfilter/xt_comment.c
index 1dbb3e13c059..5c861d2f21ca 100644
--- a/net/netfilter/xt_comment.c
+++ b/net/netfilter/xt_comment.c
@@ -16,7 +16,7 @@ MODULE_ALIAS("ipt_comment");
 MODULE_ALIAS("ip6t_comment");
 
 static bool
-comment_mt(const struct sk_buff *skb, const struct xt_action_param *par)
+comment_mt(const struct sk_buff *skb, struct xt_action_param *par)
 {
 	/* We always match */
 	return true;
diff --git a/net/netfilter/xt_connbytes.c b/net/netfilter/xt_connbytes.c
index 5e5cf15f011f..73517835303d 100644
--- a/net/netfilter/xt_connbytes.c
+++ b/net/netfilter/xt_connbytes.c
@@ -18,7 +18,7 @@ MODULE_ALIAS("ipt_connbytes");
 MODULE_ALIAS("ip6t_connbytes");
 
 static bool
-connbytes_mt(const struct sk_buff *skb, const struct xt_action_param *par)
+connbytes_mt(const struct sk_buff *skb, struct xt_action_param *par)
 {
 	const struct xt_connbytes_info *sinfo = par->matchinfo;
 	const struct nf_conn *ct;
diff --git a/net/netfilter/xt_connlimit.c b/net/netfilter/xt_connlimit.c
index 6eec1913298e..f130fd9817be 100644
--- a/net/netfilter/xt_connlimit.c
+++ b/net/netfilter/xt_connlimit.c
@@ -173,7 +173,7 @@ static int count_them(struct net *net,
 }
 
 static bool
-connlimit_mt(const struct sk_buff *skb, const struct xt_action_param *par)
+connlimit_mt(const struct sk_buff *skb, struct xt_action_param *par)
 {
 	struct net *net = dev_net(par->in ? par->in : par->out);
 	const struct xt_connlimit_info *info = par->matchinfo;
diff --git a/net/netfilter/xt_connmark.c b/net/netfilter/xt_connmark.c
index b43cfc7f1bb5..7278145e6a68 100644
--- a/net/netfilter/xt_connmark.c
+++ b/net/netfilter/xt_connmark.c
@@ -91,7 +91,7 @@ static void connmark_tg_destroy(const struct xt_tgdtor_param *par)
 }
 
 static bool
-connmark_mt(const struct sk_buff *skb, const struct xt_action_param *par)
+connmark_mt(const struct sk_buff *skb, struct xt_action_param *par)
 {
 	const struct xt_connmark_mtinfo1 *info = par->matchinfo;
 	enum ip_conntrack_info ctinfo;
diff --git a/net/netfilter/xt_conntrack.c b/net/netfilter/xt_conntrack.c
index 14a96f48001d..39681f10291c 100644
--- a/net/netfilter/xt_conntrack.c
+++ b/net/netfilter/xt_conntrack.c
@@ -113,7 +113,7 @@ ct_proto_port_check(const struct xt_conntrack_mtinfo2 *info,
 }
 
 static bool
-conntrack_mt(const struct sk_buff *skb, const struct xt_action_param *par,
+conntrack_mt(const struct sk_buff *skb, struct xt_action_param *par,
              u16 state_mask, u16 status_mask)
 {
 	const struct xt_conntrack_mtinfo2 *info = par->matchinfo;
@@ -191,7 +191,7 @@ conntrack_mt(const struct sk_buff *skb, const struct xt_action_param *par,
 }
 
 static bool
-conntrack_mt_v1(const struct sk_buff *skb, const struct xt_action_param *par)
+conntrack_mt_v1(const struct sk_buff *skb, struct xt_action_param *par)
 {
 	const struct xt_conntrack_mtinfo1 *info = par->matchinfo;
 
@@ -199,7 +199,7 @@ conntrack_mt_v1(const struct sk_buff *skb, const struct xt_action_param *par)
 }
 
 static bool
-conntrack_mt_v2(const struct sk_buff *skb, const struct xt_action_param *par)
+conntrack_mt_v2(const struct sk_buff *skb, struct xt_action_param *par)
 {
 	const struct xt_conntrack_mtinfo2 *info = par->matchinfo;
 
diff --git a/net/netfilter/xt_dccp.c b/net/netfilter/xt_dccp.c
index 7a4d4e8edc10..cc2c2919439f 100644
--- a/net/netfilter/xt_dccp.c
+++ b/net/netfilter/xt_dccp.c
@@ -96,7 +96,7 @@ match_option(u_int8_t option, const struct sk_buff *skb, unsigned int protoff,
 }
 
 static bool
-dccp_mt(const struct sk_buff *skb, const struct xt_action_param *par)
+dccp_mt(const struct sk_buff *skb, struct xt_action_param *par)
 {
 	const struct xt_dccp_info *info = par->matchinfo;
 	const struct dccp_hdr *dh;
diff --git a/net/netfilter/xt_dscp.c b/net/netfilter/xt_dscp.c
index 2133b509d157..64670fc5d0e1 100644
--- a/net/netfilter/xt_dscp.c
+++ b/net/netfilter/xt_dscp.c
@@ -25,7 +25,7 @@ MODULE_ALIAS("ipt_tos");
 MODULE_ALIAS("ip6t_tos");
 
 static bool
-dscp_mt(const struct sk_buff *skb, const struct xt_action_param *par)
+dscp_mt(const struct sk_buff *skb, struct xt_action_param *par)
 {
 	const struct xt_dscp_info *info = par->matchinfo;
 	u_int8_t dscp = ipv4_get_dsfield(ip_hdr(skb)) >> XT_DSCP_SHIFT;
@@ -34,7 +34,7 @@ dscp_mt(const struct sk_buff *skb, const struct xt_action_param *par)
 }
 
 static bool
-dscp_mt6(const struct sk_buff *skb, const struct xt_action_param *par)
+dscp_mt6(const struct sk_buff *skb, struct xt_action_param *par)
 {
 	const struct xt_dscp_info *info = par->matchinfo;
 	u_int8_t dscp = ipv6_get_dsfield(ipv6_hdr(skb)) >> XT_DSCP_SHIFT;
@@ -54,8 +54,7 @@ static int dscp_mt_check(const struct xt_mtchk_param *par)
 	return 0;
 }
 
-static bool tos_mt(const struct sk_buff *skb,
-		   const struct xt_action_param *par)
+static bool tos_mt(const struct sk_buff *skb, struct xt_action_param *par)
 {
 	const struct xt_tos_match_info *info = par->matchinfo;
 
diff --git a/net/netfilter/xt_esp.c b/net/netfilter/xt_esp.c
index 39caafff9485..7c2d80020554 100644
--- a/net/netfilter/xt_esp.c
+++ b/net/netfilter/xt_esp.c
@@ -36,8 +36,7 @@ spi_match(u_int32_t min, u_int32_t max, u_int32_t spi, bool invert)
 	return r;
 }
 
-static bool esp_mt(const struct sk_buff *skb,
-		   const struct xt_action_param *par)
+static bool esp_mt(const struct sk_buff *skb, struct xt_action_param *par)
 {
 	const struct ip_esp_hdr *eh;
 	struct ip_esp_hdr _esp;
diff --git a/net/netfilter/xt_hashlimit.c b/net/netfilter/xt_hashlimit.c
index 700c21e0804c..25a6e548ec27 100644
--- a/net/netfilter/xt_hashlimit.c
+++ b/net/netfilter/xt_hashlimit.c
@@ -516,7 +516,7 @@ hashlimit_init_dst(const struct xt_hashlimit_htable *hinfo,
 }
 
 static bool
-hashlimit_mt(const struct sk_buff *skb, const struct xt_action_param *par)
+hashlimit_mt(const struct sk_buff *skb, struct xt_action_param *par)
 {
 	const struct xt_hashlimit_mtinfo1 *info = par->matchinfo;
 	struct xt_hashlimit_htable *hinfo = info->hinfo;
diff --git a/net/netfilter/xt_helper.c b/net/netfilter/xt_helper.c
index e941bd26a519..9f4ab00c8050 100644
--- a/net/netfilter/xt_helper.c
+++ b/net/netfilter/xt_helper.c
@@ -24,7 +24,7 @@ MODULE_ALIAS("ip6t_helper");
 
 
 static bool
-helper_mt(const struct sk_buff *skb, const struct xt_action_param *par)
+helper_mt(const struct sk_buff *skb, struct xt_action_param *par)
 {
 	const struct xt_helper_info *info = par->matchinfo;
 	const struct nf_conn *ct;
diff --git a/net/netfilter/xt_hl.c b/net/netfilter/xt_hl.c
index 335c34a4fd1c..7d12221ead89 100644
--- a/net/netfilter/xt_hl.c
+++ b/net/netfilter/xt_hl.c
@@ -25,8 +25,7 @@ MODULE_LICENSE("GPL");
 MODULE_ALIAS("ipt_ttl");
 MODULE_ALIAS("ip6t_hl");
 
-static bool ttl_mt(const struct sk_buff *skb,
-		   const struct xt_action_param *par)
+static bool ttl_mt(const struct sk_buff *skb, struct xt_action_param *par)
 {
 	const struct ipt_ttl_info *info = par->matchinfo;
 	const u8 ttl = ip_hdr(skb)->ttl;
@@ -45,8 +44,7 @@ static bool ttl_mt(const struct sk_buff *skb,
 	return false;
 }
 
-static bool hl_mt6(const struct sk_buff *skb,
-		   const struct xt_action_param *par)
+static bool hl_mt6(const struct sk_buff *skb, struct xt_action_param *par)
 {
 	const struct ip6t_hl_info *info = par->matchinfo;
 	const struct ipv6hdr *ip6h = ipv6_hdr(skb);
diff --git a/net/netfilter/xt_iprange.c b/net/netfilter/xt_iprange.c
index 9578860a9217..88f7c3511c72 100644
--- a/net/netfilter/xt_iprange.c
+++ b/net/netfilter/xt_iprange.c
@@ -17,7 +17,7 @@
 #include <linux/netfilter/xt_iprange.h>
 
 static bool
-iprange_mt4(const struct sk_buff *skb, const struct xt_action_param *par)
+iprange_mt4(const struct sk_buff *skb, struct xt_action_param *par)
 {
 	const struct xt_iprange_mtinfo *info = par->matchinfo;
 	const struct iphdr *iph = ip_hdr(skb);
@@ -68,7 +68,7 @@ iprange_ipv6_sub(const struct in6_addr *a, const struct in6_addr *b)
 }
 
 static bool
-iprange_mt6(const struct sk_buff *skb, const struct xt_action_param *par)
+iprange_mt6(const struct sk_buff *skb, struct xt_action_param *par)
 {
 	const struct xt_iprange_mtinfo *info = par->matchinfo;
 	const struct ipv6hdr *iph = ipv6_hdr(skb);
diff --git a/net/netfilter/xt_length.c b/net/netfilter/xt_length.c
index 842149b4122c..176e5570a999 100644
--- a/net/netfilter/xt_length.c
+++ b/net/netfilter/xt_length.c
@@ -21,7 +21,7 @@ MODULE_ALIAS("ipt_length");
 MODULE_ALIAS("ip6t_length");
 
 static bool
-length_mt(const struct sk_buff *skb, const struct xt_action_param *par)
+length_mt(const struct sk_buff *skb, struct xt_action_param *par)
 {
 	const struct xt_length_info *info = par->matchinfo;
 	u_int16_t pktlen = ntohs(ip_hdr(skb)->tot_len);
@@ -30,7 +30,7 @@ length_mt(const struct sk_buff *skb, const struct xt_action_param *par)
 }
 
 static bool
-length_mt6(const struct sk_buff *skb, const struct xt_action_param *par)
+length_mt6(const struct sk_buff *skb, struct xt_action_param *par)
 {
 	const struct xt_length_info *info = par->matchinfo;
 	const u_int16_t pktlen = ntohs(ipv6_hdr(skb)->payload_len) +
diff --git a/net/netfilter/xt_limit.c b/net/netfilter/xt_limit.c
index 7dcfe8602c83..32b7a579a032 100644
--- a/net/netfilter/xt_limit.c
+++ b/net/netfilter/xt_limit.c
@@ -65,7 +65,7 @@ static DEFINE_SPINLOCK(limit_lock);
 #define CREDITS_PER_JIFFY POW2_BELOW32(MAX_CPJ)
 
 static bool
-limit_mt(const struct sk_buff *skb, const struct xt_action_param *par)
+limit_mt(const struct sk_buff *skb, struct xt_action_param *par)
 {
 	const struct xt_rateinfo *r = par->matchinfo;
 	struct xt_limit_priv *priv = r->master;
diff --git a/net/netfilter/xt_mac.c b/net/netfilter/xt_mac.c
index 36c49644ce35..8160f6b1435d 100644
--- a/net/netfilter/xt_mac.c
+++ b/net/netfilter/xt_mac.c
@@ -25,8 +25,7 @@ MODULE_DESCRIPTION("Xtables: MAC address match");
 MODULE_ALIAS("ipt_mac");
 MODULE_ALIAS("ip6t_mac");
 
-static bool mac_mt(const struct sk_buff *skb,
-		   const struct xt_action_param *par)
+static bool mac_mt(const struct sk_buff *skb, struct xt_action_param *par)
 {
 	const struct xt_mac_info *info = par->matchinfo;
 	bool ret;
diff --git a/net/netfilter/xt_mark.c b/net/netfilter/xt_mark.c
index 3c8347076d55..23345238711b 100644
--- a/net/netfilter/xt_mark.c
+++ b/net/netfilter/xt_mark.c
@@ -34,7 +34,7 @@ mark_tg(struct sk_buff *skb, const struct xt_action_param *par)
 }
 
 static bool
-mark_mt(const struct sk_buff *skb, const struct xt_action_param *par)
+mark_mt(const struct sk_buff *skb, struct xt_action_param *par)
 {
 	const struct xt_mark_mtinfo1 *info = par->matchinfo;
 
diff --git a/net/netfilter/xt_multiport.c b/net/netfilter/xt_multiport.c
index 3140fd4049fc..52beb68256c8 100644
--- a/net/netfilter/xt_multiport.c
+++ b/net/netfilter/xt_multiport.c
@@ -72,7 +72,7 @@ ports_match_v1(const struct xt_multiport_v1 *minfo,
 }
 
 static bool
-multiport_mt(const struct sk_buff *skb, const struct xt_action_param *par)
+multiport_mt(const struct sk_buff *skb, struct xt_action_param *par)
 {
 	const __be16 *pptr;
 	__be16 _ports[2];
diff --git a/net/netfilter/xt_osf.c b/net/netfilter/xt_osf.c
index 37aa55860a96..4327e101c047 100644
--- a/net/netfilter/xt_osf.c
+++ b/net/netfilter/xt_osf.c
@@ -194,7 +194,7 @@ static inline int xt_osf_ttl(const struct sk_buff *skb, const struct xt_osf_info
 }
 
 static bool
-xt_osf_match_packet(const struct sk_buff *skb, const struct xt_action_param *p)
+xt_osf_match_packet(const struct sk_buff *skb, struct xt_action_param *p)
 {
 	const struct xt_osf_info *info = p->matchinfo;
 	const struct iphdr *ip = ip_hdr(skb);
diff --git a/net/netfilter/xt_owner.c b/net/netfilter/xt_owner.c
index 3dd1391d385a..772d7389b337 100644
--- a/net/netfilter/xt_owner.c
+++ b/net/netfilter/xt_owner.c
@@ -18,7 +18,7 @@
 #include <linux/netfilter/xt_owner.h>
 
 static bool
-owner_mt(const struct sk_buff *skb, const struct xt_action_param *par)
+owner_mt(const struct sk_buff *skb, struct xt_action_param *par)
 {
 	const struct xt_owner_match_info *info = par->matchinfo;
 	const struct file *filp;
diff --git a/net/netfilter/xt_physdev.c b/net/netfilter/xt_physdev.c
index 298cd290b06a..d7ca16b8b8df 100644
--- a/net/netfilter/xt_physdev.c
+++ b/net/netfilter/xt_physdev.c
@@ -22,7 +22,7 @@ MODULE_ALIAS("ip6t_physdev");
 
 
 static bool
-physdev_mt(const struct sk_buff *skb, const struct xt_action_param *par)
+physdev_mt(const struct sk_buff *skb, struct xt_action_param *par)
 {
 	static const char nulldevname[IFNAMSIZ] __attribute__((aligned(sizeof(long))));
 	const struct xt_physdev_info *info = par->matchinfo;
diff --git a/net/netfilter/xt_pkttype.c b/net/netfilter/xt_pkttype.c
index d95f2149df93..5b645cb598fc 100644
--- a/net/netfilter/xt_pkttype.c
+++ b/net/netfilter/xt_pkttype.c
@@ -23,7 +23,7 @@ MODULE_ALIAS("ipt_pkttype");
 MODULE_ALIAS("ip6t_pkttype");
 
 static bool
-pkttype_mt(const struct sk_buff *skb, const struct xt_action_param *par)
+pkttype_mt(const struct sk_buff *skb, struct xt_action_param *par)
 {
 	const struct xt_pkttype_info *info = par->matchinfo;
 	u_int8_t type;
diff --git a/net/netfilter/xt_policy.c b/net/netfilter/xt_policy.c
index 1abfc7ad4277..f23e97bb42d7 100644
--- a/net/netfilter/xt_policy.c
+++ b/net/netfilter/xt_policy.c
@@ -110,7 +110,7 @@ match_policy_out(const struct sk_buff *skb, const struct xt_policy_info *info,
 }
 
 static bool
-policy_mt(const struct sk_buff *skb, const struct xt_action_param *par)
+policy_mt(const struct sk_buff *skb, struct xt_action_param *par)
 {
 	const struct xt_policy_info *info = par->matchinfo;
 	int ret;
diff --git a/net/netfilter/xt_quota.c b/net/netfilter/xt_quota.c
index e79e07c75da8..b4f7dfea5980 100644
--- a/net/netfilter/xt_quota.c
+++ b/net/netfilter/xt_quota.c
@@ -23,7 +23,7 @@ MODULE_ALIAS("ip6t_quota");
 static DEFINE_SPINLOCK(quota_lock);
 
 static bool
-quota_mt(const struct sk_buff *skb, const struct xt_action_param *par)
+quota_mt(const struct sk_buff *skb, struct xt_action_param *par)
 {
 	struct xt_quota_info *q = (void *)par->matchinfo;
 	struct xt_quota_priv *priv = q->master;
diff --git a/net/netfilter/xt_rateest.c b/net/netfilter/xt_rateest.c
index 53f7a4d12e1e..76a083184d8e 100644
--- a/net/netfilter/xt_rateest.c
+++ b/net/netfilter/xt_rateest.c
@@ -15,7 +15,7 @@
 
 
 static bool
-xt_rateest_mt(const struct sk_buff *skb, const struct xt_action_param *par)
+xt_rateest_mt(const struct sk_buff *skb, struct xt_action_param *par)
 {
 	const struct xt_rateest_match_info *info = par->matchinfo;
 	struct gnet_stats_rate_est *r;
diff --git a/net/netfilter/xt_realm.c b/net/netfilter/xt_realm.c
index b063c783901a..459a7b256eb2 100644
--- a/net/netfilter/xt_realm.c
+++ b/net/netfilter/xt_realm.c
@@ -22,7 +22,7 @@ MODULE_DESCRIPTION("Xtables: Routing realm match");
 MODULE_ALIAS("ipt_realm");
 
 static bool
-realm_mt(const struct sk_buff *skb, const struct xt_action_param *par)
+realm_mt(const struct sk_buff *skb, struct xt_action_param *par)
 {
 	const struct xt_realm_info *info = par->matchinfo;
 	const struct dst_entry *dst = skb_dst(skb);
diff --git a/net/netfilter/xt_recent.c b/net/netfilter/xt_recent.c
index 503b7f199f14..2808a7e33947 100644
--- a/net/netfilter/xt_recent.c
+++ b/net/netfilter/xt_recent.c
@@ -224,7 +224,7 @@ static void recent_table_flush(struct recent_table *t)
 }
 
 static bool
-recent_mt(const struct sk_buff *skb, const struct xt_action_param *par)
+recent_mt(const struct sk_buff *skb, struct xt_action_param *par)
 {
 	struct net *net = dev_net(par->in ? par->in : par->out);
 	struct recent_net *recent_net = recent_pernet(net);
diff --git a/net/netfilter/xt_sctp.c b/net/netfilter/xt_sctp.c
index da4c3cb31403..94d8b5deb2d0 100644
--- a/net/netfilter/xt_sctp.c
+++ b/net/netfilter/xt_sctp.c
@@ -114,7 +114,7 @@ match_packet(const struct sk_buff *skb,
 }
 
 static bool
-sctp_mt(const struct sk_buff *skb, const struct xt_action_param *par)
+sctp_mt(const struct sk_buff *skb, struct xt_action_param *par)
 {
 	const struct xt_sctp_info *info = par->matchinfo;
 	const sctp_sctphdr_t *sh;
diff --git a/net/netfilter/xt_socket.c b/net/netfilter/xt_socket.c
index 2665e32d5db1..3d54c236a1ba 100644
--- a/net/netfilter/xt_socket.c
+++ b/net/netfilter/xt_socket.c
@@ -88,7 +88,7 @@ extract_icmp_fields(const struct sk_buff *skb,
 
 
 static bool
-socket_match(const struct sk_buff *skb, const struct xt_action_param *par,
+socket_match(const struct sk_buff *skb, struct xt_action_param *par,
 	     const struct xt_socket_mtinfo1 *info)
 {
 	const struct iphdr *iph = ip_hdr(skb);
@@ -174,13 +174,13 @@ socket_match(const struct sk_buff *skb, const struct xt_action_param *par,
 }
 
 static bool
-socket_mt_v0(const struct sk_buff *skb, const struct xt_action_param *par)
+socket_mt_v0(const struct sk_buff *skb, struct xt_action_param *par)
 {
 	return socket_match(skb, par, NULL);
 }
 
 static bool
-socket_mt_v1(const struct sk_buff *skb, const struct xt_action_param *par)
+socket_mt_v1(const struct sk_buff *skb, struct xt_action_param *par)
 {
 	return socket_match(skb, par, par->matchinfo);
 }
diff --git a/net/netfilter/xt_state.c b/net/netfilter/xt_state.c
index 344cca661099..e12e053d3782 100644
--- a/net/netfilter/xt_state.c
+++ b/net/netfilter/xt_state.c
@@ -21,7 +21,7 @@ MODULE_ALIAS("ipt_state");
 MODULE_ALIAS("ip6t_state");
 
 static bool
-state_mt(const struct sk_buff *skb, const struct xt_action_param *par)
+state_mt(const struct sk_buff *skb, struct xt_action_param *par)
 {
 	const struct xt_state_info *sinfo = par->matchinfo;
 	enum ip_conntrack_info ctinfo;
diff --git a/net/netfilter/xt_statistic.c b/net/netfilter/xt_statistic.c
index ee4540a2cf33..96e62b8fd6b1 100644
--- a/net/netfilter/xt_statistic.c
+++ b/net/netfilter/xt_statistic.c
@@ -30,7 +30,7 @@ MODULE_ALIAS("ip6t_statistic");
 static DEFINE_SPINLOCK(nth_lock);
 
 static bool
-statistic_mt(const struct sk_buff *skb, const struct xt_action_param *par)
+statistic_mt(const struct sk_buff *skb, struct xt_action_param *par)
 {
 	const struct xt_statistic_info *info = par->matchinfo;
 	bool ret = info->flags & XT_STATISTIC_INVERT;
diff --git a/net/netfilter/xt_string.c b/net/netfilter/xt_string.c
index fd5dc5016a17..d3c48b14ab94 100644
--- a/net/netfilter/xt_string.c
+++ b/net/netfilter/xt_string.c
@@ -23,7 +23,7 @@ MODULE_ALIAS("ipt_string");
 MODULE_ALIAS("ip6t_string");
 
 static bool
-string_mt(const struct sk_buff *skb, const struct xt_action_param *par)
+string_mt(const struct sk_buff *skb, struct xt_action_param *par)
 {
 	const struct xt_string_info *conf = par->matchinfo;
 	struct ts_state state;
diff --git a/net/netfilter/xt_tcpmss.c b/net/netfilter/xt_tcpmss.c
index 5c8a7b435d66..f90728f6b8a4 100644
--- a/net/netfilter/xt_tcpmss.c
+++ b/net/netfilter/xt_tcpmss.c
@@ -25,7 +25,7 @@ MODULE_ALIAS("ipt_tcpmss");
 MODULE_ALIAS("ip6t_tcpmss");
 
 static bool
-tcpmss_mt(const struct sk_buff *skb, const struct xt_action_param *par)
+tcpmss_mt(const struct sk_buff *skb, struct xt_action_param *par)
 {
 	const struct xt_tcpmss_match_info *info = par->matchinfo;
 	const struct tcphdr *th;
diff --git a/net/netfilter/xt_tcpudp.c b/net/netfilter/xt_tcpudp.c
index 19c31d4c2ba6..dedde33c9db6 100644
--- a/net/netfilter/xt_tcpudp.c
+++ b/net/netfilter/xt_tcpudp.c
@@ -62,8 +62,7 @@ tcp_find_option(u_int8_t option,
 	return invert;
 }
 
-static bool tcp_mt(const struct sk_buff *skb,
-		   const struct xt_action_param *par)
+static bool tcp_mt(const struct sk_buff *skb, struct xt_action_param *par)
 {
 	const struct tcphdr *th;
 	struct tcphdr _tcph;
@@ -129,8 +128,7 @@ static int tcp_mt_check(const struct xt_mtchk_param *par)
 	return (tcpinfo->invflags & ~XT_TCP_INV_MASK) ? -EINVAL : 0;
 }
 
-static bool udp_mt(const struct sk_buff *skb,
-		   const struct xt_action_param *par)
+static bool udp_mt(const struct sk_buff *skb, struct xt_action_param *par)
 {
 	const struct udphdr *uh;
 	struct udphdr _udph;
diff --git a/net/netfilter/xt_time.c b/net/netfilter/xt_time.c
index ffdb8fac0be1..79234bb19d05 100644
--- a/net/netfilter/xt_time.c
+++ b/net/netfilter/xt_time.c
@@ -152,7 +152,7 @@ static void localtime_3(struct xtm *r, time_t time)
 }
 
 static bool
-time_mt(const struct sk_buff *skb, const struct xt_action_param *par)
+time_mt(const struct sk_buff *skb, struct xt_action_param *par)
 {
 	const struct xt_time_info *info = par->matchinfo;
 	unsigned int packet_time;
diff --git a/net/netfilter/xt_u32.c b/net/netfilter/xt_u32.c
index f62797b1ddb5..a95b50342dbb 100644
--- a/net/netfilter/xt_u32.c
+++ b/net/netfilter/xt_u32.c
@@ -86,8 +86,7 @@ static bool u32_match_it(const struct xt_u32 *data,
 	return true;
 }
 
-static bool u32_mt(const struct sk_buff *skb,
-		   const struct xt_action_param *par)
+static bool u32_mt(const struct sk_buff *skb, struct xt_action_param *par)
 {
 	const struct xt_u32 *data = par->matchinfo;
 	bool ret;
-- 
cgit v1.2.2


From b4ba26119b06052888696491f614201817491a0d Mon Sep 17 00:00:00 2001
From: Jan Engelhardt <jengelh@medozas.de>
Date: Tue, 7 Jul 2009 20:54:30 +0200
Subject: netfilter: xtables: change hotdrop pointer to direct modification

Since xt_action_param is writable, let's use it. The pointer to
'bool hotdrop' always worried (8 bytes (64-bit) to write 1 byte!).
Surprisingly results in a reduction in size:

   text    data     bss filename
5457066  692730  357892 vmlinux.o-prev
5456554  692730  357892 vmlinux.o

Signed-off-by: Jan Engelhardt <jengelh@medozas.de>
---
 net/bridge/netfilter/ebtables.c |  5 ++---
 net/ipv4/netfilter/arp_tables.c |  6 +++---
 net/ipv4/netfilter/ip_tables.c  |  9 ++++-----
 net/ipv4/netfilter/ipt_ah.c     |  2 +-
 net/ipv4/netfilter/ipt_ecn.c    |  2 +-
 net/ipv6/netfilter/ip6_tables.c | 11 +++++------
 net/ipv6/netfilter/ip6t_ah.c    |  4 ++--
 net/ipv6/netfilter/ip6t_eui64.c |  2 +-
 net/ipv6/netfilter/ip6t_frag.c  |  4 ++--
 net/ipv6/netfilter/ip6t_hbh.c   |  4 ++--
 net/ipv6/netfilter/ip6t_mh.c    |  4 ++--
 net/ipv6/netfilter/ip6t_rt.c    |  4 ++--
 net/netfilter/xt_connlimit.c    |  4 ++--
 net/netfilter/xt_dccp.c         |  4 ++--
 net/netfilter/xt_esp.c          |  2 +-
 net/netfilter/xt_hashlimit.c    |  2 +-
 net/netfilter/xt_multiport.c    |  2 +-
 net/netfilter/xt_recent.c       |  2 +-
 net/netfilter/xt_sctp.c         |  4 ++--
 net/netfilter/xt_tcpmss.c       |  2 +-
 net/netfilter/xt_tcpudp.c       | 10 +++++-----
 21 files changed, 43 insertions(+), 46 deletions(-)

(limited to 'net')

diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c
index 290d43541d46..59ca00e40dec 100644
--- a/net/bridge/netfilter/ebtables.c
+++ b/net/bridge/netfilter/ebtables.c
@@ -186,13 +186,12 @@ unsigned int ebt_do_table (unsigned int hook, struct sk_buff *skb,
 	struct ebt_entries *chaininfo;
 	const char *base;
 	const struct ebt_table_info *private;
-	bool hotdrop = false;
 	struct xt_action_param acpar;
 
 	acpar.family  = NFPROTO_BRIDGE;
 	acpar.in      = in;
 	acpar.out     = out;
-	acpar.hotdrop = &hotdrop;
+	acpar.hotdrop = false;
 	acpar.hooknum = hook;
 
 	read_lock_bh(&table->lock);
@@ -216,7 +215,7 @@ unsigned int ebt_do_table (unsigned int hook, struct sk_buff *skb,
 
 		if (EBT_MATCH_ITERATE(point, ebt_do_match, skb, &acpar) != 0)
 			goto letscontinue;
-		if (hotdrop) {
+		if (acpar.hotdrop) {
 			read_unlock_bh(&table->lock);
 			return NF_DROP;
 		}
diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c
index 9e7d089f168a..8cc56d26e937 100644
--- a/net/ipv4/netfilter/arp_tables.c
+++ b/net/ipv4/netfilter/arp_tables.c
@@ -260,7 +260,6 @@ unsigned int arpt_do_table(struct sk_buff *skb,
 	static const char nulldevname[IFNAMSIZ] __attribute__((aligned(sizeof(long))));
 	unsigned int verdict = NF_DROP;
 	const struct arphdr *arp;
-	bool hotdrop = false;
 	struct arpt_entry *e, *back;
 	const char *indev, *outdev;
 	void *table_base;
@@ -284,6 +283,7 @@ unsigned int arpt_do_table(struct sk_buff *skb,
 	acpar.out     = out;
 	acpar.hooknum = hook;
 	acpar.family  = NFPROTO_ARP;
+	acpar.hotdrop = false;
 
 	arp = arp_hdr(skb);
 	do {
@@ -345,10 +345,10 @@ unsigned int arpt_do_table(struct sk_buff *skb,
 		else
 			/* Verdict */
 			break;
-	} while (!hotdrop);
+	} while (!acpar.hotdrop);
 	xt_info_rdunlock_bh();
 
-	if (hotdrop)
+	if (acpar.hotdrop)
 		return NF_DROP;
 	else
 		return verdict;
diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c
index 4e674f2824a7..607f89f16b76 100644
--- a/net/ipv4/netfilter/ip_tables.c
+++ b/net/ipv4/netfilter/ip_tables.c
@@ -308,7 +308,6 @@ ipt_do_table(struct sk_buff *skb,
 {
 	static const char nulldevname[IFNAMSIZ] __attribute__((aligned(sizeof(long))));
 	const struct iphdr *ip;
-	bool hotdrop = false;
 	/* Initializing verdict to NF_DROP keeps gcc happy. */
 	unsigned int verdict = NF_DROP;
 	const char *indev, *outdev;
@@ -330,7 +329,7 @@ ipt_do_table(struct sk_buff *skb,
 	 * match it. */
 	acpar.fragoff = ntohs(ip->frag_off) & IP_OFFSET;
 	acpar.thoff   = ip_hdrlen(skb);
-	acpar.hotdrop = &hotdrop;
+	acpar.hotdrop = false;
 	acpar.in      = in;
 	acpar.out     = out;
 	acpar.family  = NFPROTO_IPV4;
@@ -432,7 +431,7 @@ ipt_do_table(struct sk_buff *skb,
 		else
 			/* Verdict */
 			break;
-	} while (!hotdrop);
+	} while (!acpar.hotdrop);
 	xt_info_rdunlock_bh();
 	pr_debug("Exiting %s; resetting sp from %u to %u\n",
 		 __func__, *stackptr, origptr);
@@ -440,7 +439,7 @@ ipt_do_table(struct sk_buff *skb,
 #ifdef DEBUG_ALLOW_ALL
 	return NF_ACCEPT;
 #else
-	if (hotdrop)
+	if (acpar.hotdrop)
 		return NF_DROP;
 	else return verdict;
 #endif
@@ -2154,7 +2153,7 @@ icmp_match(const struct sk_buff *skb, struct xt_action_param *par)
 		 * can't.  Hence, no choice but to drop.
 		 */
 		duprintf("Dropping evil ICMP tinygram.\n");
-		*par->hotdrop = true;
+		par->hotdrop = true;
 		return false;
 	}
 
diff --git a/net/ipv4/netfilter/ipt_ah.c b/net/ipv4/netfilter/ipt_ah.c
index c9d83dc2d6fa..14a2aa8b8a14 100644
--- a/net/ipv4/netfilter/ipt_ah.c
+++ b/net/ipv4/netfilter/ipt_ah.c
@@ -46,7 +46,7 @@ static bool ah_mt(const struct sk_buff *skb, struct xt_action_param *par)
 		 * can't.  Hence, no choice but to drop.
 		 */
 		pr_debug("Dropping evil AH tinygram.\n");
-		*par->hotdrop = true;
+		par->hotdrop = true;
 		return 0;
 	}
 
diff --git a/net/ipv4/netfilter/ipt_ecn.c b/net/ipv4/netfilter/ipt_ecn.c
index b79dddc9edd6..af6e9c778345 100644
--- a/net/ipv4/netfilter/ipt_ecn.c
+++ b/net/ipv4/netfilter/ipt_ecn.c
@@ -78,7 +78,7 @@ static bool ecn_mt(const struct sk_buff *skb, struct xt_action_param *par)
 	if (info->operation & (IPT_ECN_OP_MATCH_ECE|IPT_ECN_OP_MATCH_CWR)) {
 		if (ip_hdr(skb)->protocol != IPPROTO_TCP)
 			return false;
-		if (!match_tcp(skb, info, par->hotdrop))
+		if (!match_tcp(skb, info, &par->hotdrop))
 			return false;
 	}
 
diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c
index 4549f8d6f88f..557fac9689c0 100644
--- a/net/ipv6/netfilter/ip6_tables.c
+++ b/net/ipv6/netfilter/ip6_tables.c
@@ -337,7 +337,6 @@ ip6t_do_table(struct sk_buff *skb,
 	      struct xt_table *table)
 {
 	static const char nulldevname[IFNAMSIZ] __attribute__((aligned(sizeof(long))));
-	bool hotdrop = false;
 	/* Initializing verdict to NF_DROP keeps gcc happy. */
 	unsigned int verdict = NF_DROP;
 	const char *indev, *outdev;
@@ -356,7 +355,7 @@ ip6t_do_table(struct sk_buff *skb,
 	 * things we don't know, ie. tcp syn flag or ports).  If the
 	 * rule is also a fragment-specific rule, non-fragments won't
 	 * match it. */
-	acpar.hotdrop = &hotdrop;
+	acpar.hotdrop = false;
 	acpar.in      = in;
 	acpar.out     = out;
 	acpar.family  = NFPROTO_IPV6;
@@ -380,7 +379,7 @@ ip6t_do_table(struct sk_buff *skb,
 
 		IP_NF_ASSERT(e);
 		if (!ip6_packet_match(skb, indev, outdev, &e->ipv6,
-		    &acpar.thoff, &acpar.fragoff, &hotdrop)) {
+		    &acpar.thoff, &acpar.fragoff, &acpar.hotdrop)) {
  no_match:
 			e = ip6t_next_entry(e);
 			continue;
@@ -447,7 +446,7 @@ ip6t_do_table(struct sk_buff *skb,
 		else
 			/* Verdict */
 			break;
-	} while (!hotdrop);
+	} while (!acpar.hotdrop);
 
 	xt_info_rdunlock_bh();
 	*stackptr = origptr;
@@ -455,7 +454,7 @@ ip6t_do_table(struct sk_buff *skb,
 #ifdef DEBUG_ALLOW_ALL
 	return NF_ACCEPT;
 #else
-	if (hotdrop)
+	if (acpar.hotdrop)
 		return NF_DROP;
 	else return verdict;
 #endif
@@ -2170,7 +2169,7 @@ icmp6_match(const struct sk_buff *skb, struct xt_action_param *par)
 		 * can't.  Hence, no choice but to drop.
 		 */
 		duprintf("Dropping evil ICMP tinygram.\n");
-		*par->hotdrop = true;
+		par->hotdrop = true;
 		return false;
 	}
 
diff --git a/net/ipv6/netfilter/ip6t_ah.c b/net/ipv6/netfilter/ip6t_ah.c
index c89887f35a46..89cccc5a9c92 100644
--- a/net/ipv6/netfilter/ip6t_ah.c
+++ b/net/ipv6/netfilter/ip6t_ah.c
@@ -48,13 +48,13 @@ static bool ah_mt6(const struct sk_buff *skb, struct xt_action_param *par)
 	err = ipv6_find_hdr(skb, &ptr, NEXTHDR_AUTH, NULL);
 	if (err < 0) {
 		if (err != -ENOENT)
-			*par->hotdrop = true;
+			par->hotdrop = true;
 		return false;
 	}
 
 	ah = skb_header_pointer(skb, ptr, sizeof(_ah), &_ah);
 	if (ah == NULL) {
-		*par->hotdrop = true;
+		par->hotdrop = true;
 		return false;
 	}
 
diff --git a/net/ipv6/netfilter/ip6t_eui64.c b/net/ipv6/netfilter/ip6t_eui64.c
index f32fce34145a..aab0706908c5 100644
--- a/net/ipv6/netfilter/ip6t_eui64.c
+++ b/net/ipv6/netfilter/ip6t_eui64.c
@@ -27,7 +27,7 @@ eui64_mt6(const struct sk_buff *skb, struct xt_action_param *par)
 	if (!(skb_mac_header(skb) >= skb->head &&
 	      skb_mac_header(skb) + ETH_HLEN <= skb->data) &&
 	    par->fragoff != 0) {
-		*par->hotdrop = true;
+		par->hotdrop = true;
 		return false;
 	}
 
diff --git a/net/ipv6/netfilter/ip6t_frag.c b/net/ipv6/netfilter/ip6t_frag.c
index fcc8c72f218e..eda898fda6ca 100644
--- a/net/ipv6/netfilter/ip6t_frag.c
+++ b/net/ipv6/netfilter/ip6t_frag.c
@@ -46,13 +46,13 @@ frag_mt6(const struct sk_buff *skb, struct xt_action_param *par)
 	err = ipv6_find_hdr(skb, &ptr, NEXTHDR_FRAGMENT, NULL);
 	if (err < 0) {
 		if (err != -ENOENT)
-			*par->hotdrop = true;
+			par->hotdrop = true;
 		return false;
 	}
 
 	fh = skb_header_pointer(skb, ptr, sizeof(_frag), &_frag);
 	if (fh == NULL) {
-		*par->hotdrop = true;
+		par->hotdrop = true;
 		return false;
 	}
 
diff --git a/net/ipv6/netfilter/ip6t_hbh.c b/net/ipv6/netfilter/ip6t_hbh.c
index f8aebc098d71..59df051eaef6 100644
--- a/net/ipv6/netfilter/ip6t_hbh.c
+++ b/net/ipv6/netfilter/ip6t_hbh.c
@@ -65,13 +65,13 @@ hbh_mt6(const struct sk_buff *skb, struct xt_action_param *par)
 			    NEXTHDR_HOP : NEXTHDR_DEST, NULL);
 	if (err < 0) {
 		if (err != -ENOENT)
-			*par->hotdrop = true;
+			par->hotdrop = true;
 		return false;
 	}
 
 	oh = skb_header_pointer(skb, ptr, sizeof(_optsh), &_optsh);
 	if (oh == NULL) {
-		*par->hotdrop = true;
+		par->hotdrop = true;
 		return false;
 	}
 
diff --git a/net/ipv6/netfilter/ip6t_mh.c b/net/ipv6/netfilter/ip6t_mh.c
index eb1c3d65271a..0c90c66b1992 100644
--- a/net/ipv6/netfilter/ip6t_mh.c
+++ b/net/ipv6/netfilter/ip6t_mh.c
@@ -47,14 +47,14 @@ static bool mh_mt6(const struct sk_buff *skb, struct xt_action_param *par)
 		/* We've been asked to examine this packet, and we
 		   can't.  Hence, no choice but to drop. */
 		pr_debug("Dropping evil MH tinygram.\n");
-		*par->hotdrop = true;
+		par->hotdrop = true;
 		return false;
 	}
 
 	if (mh->ip6mh_proto != IPPROTO_NONE) {
 		pr_debug("Dropping invalid MH Payload Proto: %u\n",
 			 mh->ip6mh_proto);
-		*par->hotdrop = true;
+		par->hotdrop = true;
 		return false;
 	}
 
diff --git a/net/ipv6/netfilter/ip6t_rt.c b/net/ipv6/netfilter/ip6t_rt.c
index ee584693ee35..d8488c50a8e0 100644
--- a/net/ipv6/netfilter/ip6t_rt.c
+++ b/net/ipv6/netfilter/ip6t_rt.c
@@ -52,13 +52,13 @@ static bool rt_mt6(const struct sk_buff *skb, struct xt_action_param *par)
 	err = ipv6_find_hdr(skb, &ptr, NEXTHDR_ROUTING, NULL);
 	if (err < 0) {
 		if (err != -ENOENT)
-			*par->hotdrop = true;
+			par->hotdrop = true;
 		return false;
 	}
 
 	rh = skb_header_pointer(skb, ptr, sizeof(_route), &_route);
 	if (rh == NULL) {
-		*par->hotdrop = true;
+		par->hotdrop = true;
 		return false;
 	}
 
diff --git a/net/netfilter/xt_connlimit.c b/net/netfilter/xt_connlimit.c
index f130fd9817be..5c5b6b921b84 100644
--- a/net/netfilter/xt_connlimit.c
+++ b/net/netfilter/xt_connlimit.c
@@ -206,14 +206,14 @@ connlimit_mt(const struct sk_buff *skb, struct xt_action_param *par)
 
 	if (connections < 0) {
 		/* kmalloc failed, drop it entirely */
-		*par->hotdrop = true;
+		par->hotdrop = true;
 		return false;
 	}
 
 	return (connections > info->limit) ^ info->inverse;
 
  hotdrop:
-	*par->hotdrop = true;
+	par->hotdrop = true;
 	return false;
 }
 
diff --git a/net/netfilter/xt_dccp.c b/net/netfilter/xt_dccp.c
index cc2c2919439f..b63d2a3d80ba 100644
--- a/net/netfilter/xt_dccp.c
+++ b/net/netfilter/xt_dccp.c
@@ -107,7 +107,7 @@ dccp_mt(const struct sk_buff *skb, struct xt_action_param *par)
 
 	dh = skb_header_pointer(skb, par->thoff, sizeof(_dh), &_dh);
 	if (dh == NULL) {
-		*par->hotdrop = true;
+		par->hotdrop = true;
 		return false;
 	}
 
@@ -120,7 +120,7 @@ dccp_mt(const struct sk_buff *skb, struct xt_action_param *par)
 		&& DCCHECK(match_types(dh, info->typemask),
 			   XT_DCCP_TYPE, info->flags, info->invflags)
 		&& DCCHECK(match_option(info->option, skb, par->thoff, dh,
-					par->hotdrop),
+					&par->hotdrop),
 			   XT_DCCP_OPTION, info->flags, info->invflags);
 }
 
diff --git a/net/netfilter/xt_esp.c b/net/netfilter/xt_esp.c
index 7c2d80020554..171ba82b5902 100644
--- a/net/netfilter/xt_esp.c
+++ b/net/netfilter/xt_esp.c
@@ -52,7 +52,7 @@ static bool esp_mt(const struct sk_buff *skb, struct xt_action_param *par)
 		 * can't.  Hence, no choice but to drop.
 		 */
 		pr_debug("Dropping evil ESP tinygram.\n");
-		*par->hotdrop = true;
+		par->hotdrop = true;
 		return false;
 	}
 
diff --git a/net/netfilter/xt_hashlimit.c b/net/netfilter/xt_hashlimit.c
index 25a6e548ec27..b46a8390896d 100644
--- a/net/netfilter/xt_hashlimit.c
+++ b/net/netfilter/xt_hashlimit.c
@@ -562,7 +562,7 @@ hashlimit_mt(const struct sk_buff *skb, struct xt_action_param *par)
 	return info->cfg.mode & XT_HASHLIMIT_INVERT;
 
  hotdrop:
-	*par->hotdrop = true;
+	par->hotdrop = true;
 	return false;
 }
 
diff --git a/net/netfilter/xt_multiport.c b/net/netfilter/xt_multiport.c
index 52beb68256c8..ac1d3c3d09e7 100644
--- a/net/netfilter/xt_multiport.c
+++ b/net/netfilter/xt_multiport.c
@@ -87,7 +87,7 @@ multiport_mt(const struct sk_buff *skb, struct xt_action_param *par)
 		 * can't.  Hence, no choice but to drop.
 		 */
 		pr_debug("Dropping evil offset=0 tinygram.\n");
-		*par->hotdrop = true;
+		par->hotdrop = true;
 		return false;
 	}
 
diff --git a/net/netfilter/xt_recent.c b/net/netfilter/xt_recent.c
index 2808a7e33947..76aec6a44762 100644
--- a/net/netfilter/xt_recent.c
+++ b/net/netfilter/xt_recent.c
@@ -268,7 +268,7 @@ recent_mt(const struct sk_buff *skb, struct xt_action_param *par)
 			goto out;
 		e = recent_entry_init(t, &addr, par->family, ttl);
 		if (e == NULL)
-			*par->hotdrop = true;
+			par->hotdrop = true;
 		ret = !ret;
 		goto out;
 	}
diff --git a/net/netfilter/xt_sctp.c b/net/netfilter/xt_sctp.c
index 94d8b5deb2d0..c04fcf385c59 100644
--- a/net/netfilter/xt_sctp.c
+++ b/net/netfilter/xt_sctp.c
@@ -128,7 +128,7 @@ sctp_mt(const struct sk_buff *skb, struct xt_action_param *par)
 	sh = skb_header_pointer(skb, par->thoff, sizeof(_sh), &_sh);
 	if (sh == NULL) {
 		pr_debug("Dropping evil TCP offset=0 tinygram.\n");
-		*par->hotdrop = true;
+		par->hotdrop = true;
 		return false;
 	}
 	pr_debug("spt: %d\tdpt: %d\n", ntohs(sh->source), ntohs(sh->dest));
@@ -140,7 +140,7 @@ sctp_mt(const struct sk_buff *skb, struct xt_action_param *par)
 			&& ntohs(sh->dest) <= info->dpts[1],
 			XT_SCTP_DEST_PORTS, info->flags, info->invflags)
 		&& SCCHECK(match_packet(skb, par->thoff + sizeof(sctp_sctphdr_t),
-					info, par->hotdrop),
+					info, &par->hotdrop),
 			   XT_SCTP_CHUNK_TYPES, info->flags, info->invflags);
 }
 
diff --git a/net/netfilter/xt_tcpmss.c b/net/netfilter/xt_tcpmss.c
index f90728f6b8a4..c53d4d18eadf 100644
--- a/net/netfilter/xt_tcpmss.c
+++ b/net/netfilter/xt_tcpmss.c
@@ -73,7 +73,7 @@ out:
 	return info->invert;
 
 dropit:
-	*par->hotdrop = true;
+	par->hotdrop = true;
 	return false;
 }
 
diff --git a/net/netfilter/xt_tcpudp.c b/net/netfilter/xt_tcpudp.c
index dedde33c9db6..c14d4645daa3 100644
--- a/net/netfilter/xt_tcpudp.c
+++ b/net/netfilter/xt_tcpudp.c
@@ -77,7 +77,7 @@ static bool tcp_mt(const struct sk_buff *skb, struct xt_action_param *par)
 		*/
 		if (par->fragoff == 1) {
 			pr_debug("Dropping evil TCP offset=1 frag.\n");
-			*par->hotdrop = true;
+			par->hotdrop = true;
 		}
 		/* Must not be a fragment. */
 		return false;
@@ -90,7 +90,7 @@ static bool tcp_mt(const struct sk_buff *skb, struct xt_action_param *par)
 		/* We've been asked to examine this packet, and we
 		   can't.  Hence, no choice but to drop. */
 		pr_debug("Dropping evil TCP offset=0 tinygram.\n");
-		*par->hotdrop = true;
+		par->hotdrop = true;
 		return false;
 	}
 
@@ -108,13 +108,13 @@ static bool tcp_mt(const struct sk_buff *skb, struct xt_action_param *par)
 		return false;
 	if (tcpinfo->option) {
 		if (th->doff * 4 < sizeof(_tcph)) {
-			*par->hotdrop = true;
+			par->hotdrop = true;
 			return false;
 		}
 		if (!tcp_find_option(tcpinfo->option, skb, par->thoff,
 				     th->doff*4 - sizeof(_tcph),
 				     tcpinfo->invflags & XT_TCP_INV_OPTION,
-				     par->hotdrop))
+				     &par->hotdrop))
 			return false;
 	}
 	return true;
@@ -143,7 +143,7 @@ static bool udp_mt(const struct sk_buff *skb, struct xt_action_param *par)
 		/* We've been asked to examine this packet, and we
 		   can't.  Hence, no choice but to drop. */
 		pr_debug("Dropping evil UDP tinygram.\n");
-		*par->hotdrop = true;
+		par->hotdrop = true;
 		return false;
 	}
 
-- 
cgit v1.2.2


From 4538506be386f9736b83bf9892f829adbbb70fea Mon Sep 17 00:00:00 2001
From: Jan Engelhardt <jengelh@medozas.de>
Date: Sat, 4 Jul 2009 12:50:00 +0200
Subject: netfilter: xtables: combine built-in extension structs

Prepare the arrays for use with the multiregister function. The
future layer-3 xt matches can then be easily added to it without
needing more (un)register code.

Signed-off-by: Jan Engelhardt <jengelh@medozas.de>
---
 net/ipv4/netfilter/arp_tables.c | 41 ++++++++++++--------------
 net/ipv4/netfilter/ip_tables.c  | 65 +++++++++++++++++++----------------------
 net/ipv6/netfilter/ip6_tables.c | 64 +++++++++++++++++++---------------------
 3 files changed, 78 insertions(+), 92 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c
index 8cc56d26e937..03352fcba172 100644
--- a/net/ipv4/netfilter/arp_tables.c
+++ b/net/ipv4/netfilter/arp_tables.c
@@ -1828,22 +1828,23 @@ void arpt_unregister_table(struct xt_table *table)
 }
 
 /* The built-in targets: standard (NULL) and error. */
-static struct xt_target arpt_standard_target __read_mostly = {
-	.name		= ARPT_STANDARD_TARGET,
-	.targetsize	= sizeof(int),
-	.family		= NFPROTO_ARP,
+static struct xt_target arpt_builtin_tg[] __read_mostly = {
+	{
+		.name             = ARPT_STANDARD_TARGET,
+		.targetsize       = sizeof(int),
+		.family           = NFPROTO_ARP,
 #ifdef CONFIG_COMPAT
-	.compatsize	= sizeof(compat_int_t),
-	.compat_from_user = compat_standard_from_user,
-	.compat_to_user	= compat_standard_to_user,
+		.compatsize       = sizeof(compat_int_t),
+		.compat_from_user = compat_standard_from_user,
+		.compat_to_user   = compat_standard_to_user,
 #endif
-};
-
-static struct xt_target arpt_error_target __read_mostly = {
-	.name		= ARPT_ERROR_TARGET,
-	.target		= arpt_error,
-	.targetsize	= ARPT_FUNCTION_MAXNAMELEN,
-	.family		= NFPROTO_ARP,
+	},
+	{
+		.name             = ARPT_ERROR_TARGET,
+		.target           = arpt_error,
+		.targetsize       = ARPT_FUNCTION_MAXNAMELEN,
+		.family           = NFPROTO_ARP,
+	},
 };
 
 static struct nf_sockopt_ops arpt_sockopts = {
@@ -1887,12 +1888,9 @@ static int __init arp_tables_init(void)
 		goto err1;
 
 	/* Noone else will be downing sem now, so we won't sleep */
-	ret = xt_register_target(&arpt_standard_target);
+	ret = xt_register_targets(arpt_builtin_tg, ARRAY_SIZE(arpt_builtin_tg));
 	if (ret < 0)
 		goto err2;
-	ret = xt_register_target(&arpt_error_target);
-	if (ret < 0)
-		goto err3;
 
 	/* Register setsockopt */
 	ret = nf_register_sockopt(&arpt_sockopts);
@@ -1903,9 +1901,7 @@ static int __init arp_tables_init(void)
 	return 0;
 
 err4:
-	xt_unregister_target(&arpt_error_target);
-err3:
-	xt_unregister_target(&arpt_standard_target);
+	xt_unregister_targets(arpt_builtin_tg, ARRAY_SIZE(arpt_builtin_tg));
 err2:
 	unregister_pernet_subsys(&arp_tables_net_ops);
 err1:
@@ -1915,8 +1911,7 @@ err1:
 static void __exit arp_tables_fini(void)
 {
 	nf_unregister_sockopt(&arpt_sockopts);
-	xt_unregister_target(&arpt_error_target);
-	xt_unregister_target(&arpt_standard_target);
+	xt_unregister_targets(arpt_builtin_tg, ARRAY_SIZE(arpt_builtin_tg));
 	unregister_pernet_subsys(&arp_tables_net_ops);
 }
 
diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c
index 607f89f16b76..49b9e4fb5460 100644
--- a/net/ipv4/netfilter/ip_tables.c
+++ b/net/ipv4/netfilter/ip_tables.c
@@ -2172,23 +2172,23 @@ static int icmp_checkentry(const struct xt_mtchk_param *par)
 	return (icmpinfo->invflags & ~IPT_ICMP_INV) ? -EINVAL : 0;
 }
 
-/* The built-in targets: standard (NULL) and error. */
-static struct xt_target ipt_standard_target __read_mostly = {
-	.name		= IPT_STANDARD_TARGET,
-	.targetsize	= sizeof(int),
-	.family		= NFPROTO_IPV4,
+static struct xt_target ipt_builtin_tg[] __read_mostly = {
+	{
+		.name             = IPT_STANDARD_TARGET,
+		.targetsize       = sizeof(int),
+		.family           = NFPROTO_IPV4,
 #ifdef CONFIG_COMPAT
-	.compatsize	= sizeof(compat_int_t),
-	.compat_from_user = compat_standard_from_user,
-	.compat_to_user	= compat_standard_to_user,
+		.compatsize       = sizeof(compat_int_t),
+		.compat_from_user = compat_standard_from_user,
+		.compat_to_user   = compat_standard_to_user,
 #endif
-};
-
-static struct xt_target ipt_error_target __read_mostly = {
-	.name		= IPT_ERROR_TARGET,
-	.target		= ipt_error,
-	.targetsize	= IPT_FUNCTION_MAXNAMELEN,
-	.family		= NFPROTO_IPV4,
+	},
+	{
+		.name             = IPT_ERROR_TARGET,
+		.target           = ipt_error,
+		.targetsize       = IPT_FUNCTION_MAXNAMELEN,
+		.family           = NFPROTO_IPV4,
+	},
 };
 
 static struct nf_sockopt_ops ipt_sockopts = {
@@ -2208,13 +2208,15 @@ static struct nf_sockopt_ops ipt_sockopts = {
 	.owner		= THIS_MODULE,
 };
 
-static struct xt_match icmp_matchstruct __read_mostly = {
-	.name		= "icmp",
-	.match		= icmp_match,
-	.matchsize	= sizeof(struct ipt_icmp),
-	.checkentry	= icmp_checkentry,
-	.proto		= IPPROTO_ICMP,
-	.family		= NFPROTO_IPV4,
+static struct xt_match ipt_builtin_mt[] __read_mostly = {
+	{
+		.name       = "icmp",
+		.match      = icmp_match,
+		.matchsize  = sizeof(struct ipt_icmp),
+		.checkentry = icmp_checkentry,
+		.proto      = IPPROTO_ICMP,
+		.family     = NFPROTO_IPV4,
+	},
 };
 
 static int __net_init ip_tables_net_init(struct net *net)
@@ -2241,13 +2243,10 @@ static int __init ip_tables_init(void)
 		goto err1;
 
 	/* Noone else will be downing sem now, so we won't sleep */
-	ret = xt_register_target(&ipt_standard_target);
+	ret = xt_register_targets(ipt_builtin_tg, ARRAY_SIZE(ipt_builtin_tg));
 	if (ret < 0)
 		goto err2;
-	ret = xt_register_target(&ipt_error_target);
-	if (ret < 0)
-		goto err3;
-	ret = xt_register_match(&icmp_matchstruct);
+	ret = xt_register_matches(ipt_builtin_mt, ARRAY_SIZE(ipt_builtin_mt));
 	if (ret < 0)
 		goto err4;
 
@@ -2260,11 +2259,9 @@ static int __init ip_tables_init(void)
 	return 0;
 
 err5:
-	xt_unregister_match(&icmp_matchstruct);
+	xt_unregister_matches(ipt_builtin_mt, ARRAY_SIZE(ipt_builtin_mt));
 err4:
-	xt_unregister_target(&ipt_error_target);
-err3:
-	xt_unregister_target(&ipt_standard_target);
+	xt_unregister_targets(ipt_builtin_tg, ARRAY_SIZE(ipt_builtin_tg));
 err2:
 	unregister_pernet_subsys(&ip_tables_net_ops);
 err1:
@@ -2275,10 +2272,8 @@ static void __exit ip_tables_fini(void)
 {
 	nf_unregister_sockopt(&ipt_sockopts);
 
-	xt_unregister_match(&icmp_matchstruct);
-	xt_unregister_target(&ipt_error_target);
-	xt_unregister_target(&ipt_standard_target);
-
+	xt_unregister_matches(ipt_builtin_mt, ARRAY_SIZE(ipt_builtin_mt));
+	xt_unregister_targets(ipt_builtin_tg, ARRAY_SIZE(ipt_builtin_tg));
 	unregister_pernet_subsys(&ip_tables_net_ops);
 }
 
diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c
index 557fac9689c0..56782336474f 100644
--- a/net/ipv6/netfilter/ip6_tables.c
+++ b/net/ipv6/netfilter/ip6_tables.c
@@ -2190,22 +2190,23 @@ static int icmp6_checkentry(const struct xt_mtchk_param *par)
 }
 
 /* The built-in targets: standard (NULL) and error. */
-static struct xt_target ip6t_standard_target __read_mostly = {
-	.name		= IP6T_STANDARD_TARGET,
-	.targetsize	= sizeof(int),
-	.family		= NFPROTO_IPV6,
+static struct xt_target ip6t_builtin_tg[] __read_mostly = {
+	{
+		.name             = IP6T_STANDARD_TARGET,
+		.targetsize       = sizeof(int),
+		.family           = NFPROTO_IPV6,
 #ifdef CONFIG_COMPAT
-	.compatsize	= sizeof(compat_int_t),
-	.compat_from_user = compat_standard_from_user,
-	.compat_to_user	= compat_standard_to_user,
+		.compatsize       = sizeof(compat_int_t),
+		.compat_from_user = compat_standard_from_user,
+		.compat_to_user   = compat_standard_to_user,
 #endif
-};
-
-static struct xt_target ip6t_error_target __read_mostly = {
-	.name		= IP6T_ERROR_TARGET,
-	.target		= ip6t_error,
-	.targetsize	= IP6T_FUNCTION_MAXNAMELEN,
-	.family		= NFPROTO_IPV6,
+	},
+	{
+		.name             = IP6T_ERROR_TARGET,
+		.target           = ip6t_error,
+		.targetsize       = IP6T_FUNCTION_MAXNAMELEN,
+		.family           = NFPROTO_IPV6,
+	},
 };
 
 static struct nf_sockopt_ops ip6t_sockopts = {
@@ -2225,13 +2226,15 @@ static struct nf_sockopt_ops ip6t_sockopts = {
 	.owner		= THIS_MODULE,
 };
 
-static struct xt_match icmp6_matchstruct __read_mostly = {
-	.name		= "icmp6",
-	.match		= icmp6_match,
-	.matchsize	= sizeof(struct ip6t_icmp),
-	.checkentry	= icmp6_checkentry,
-	.proto		= IPPROTO_ICMPV6,
-	.family		= NFPROTO_IPV6,
+static struct xt_match ip6t_builtin_mt[] __read_mostly = {
+	{
+		.name       = "icmp6",
+		.match      = icmp6_match,
+		.matchsize  = sizeof(struct ip6t_icmp),
+		.checkentry = icmp6_checkentry,
+		.proto      = IPPROTO_ICMPV6,
+		.family     = NFPROTO_IPV6,
+	},
 };
 
 static int __net_init ip6_tables_net_init(struct net *net)
@@ -2258,13 +2261,10 @@ static int __init ip6_tables_init(void)
 		goto err1;
 
 	/* Noone else will be downing sem now, so we won't sleep */
-	ret = xt_register_target(&ip6t_standard_target);
+	ret = xt_register_targets(ip6t_builtin_tg, ARRAY_SIZE(ip6t_builtin_tg));
 	if (ret < 0)
 		goto err2;
-	ret = xt_register_target(&ip6t_error_target);
-	if (ret < 0)
-		goto err3;
-	ret = xt_register_match(&icmp6_matchstruct);
+	ret = xt_register_matches(ip6t_builtin_mt, ARRAY_SIZE(ip6t_builtin_mt));
 	if (ret < 0)
 		goto err4;
 
@@ -2277,11 +2277,9 @@ static int __init ip6_tables_init(void)
 	return 0;
 
 err5:
-	xt_unregister_match(&icmp6_matchstruct);
+	xt_unregister_matches(ip6t_builtin_mt, ARRAY_SIZE(ip6t_builtin_mt));
 err4:
-	xt_unregister_target(&ip6t_error_target);
-err3:
-	xt_unregister_target(&ip6t_standard_target);
+	xt_unregister_targets(ip6t_builtin_tg, ARRAY_SIZE(ip6t_builtin_tg));
 err2:
 	unregister_pernet_subsys(&ip6_tables_net_ops);
 err1:
@@ -2292,10 +2290,8 @@ static void __exit ip6_tables_fini(void)
 {
 	nf_unregister_sockopt(&ip6t_sockopts);
 
-	xt_unregister_match(&icmp6_matchstruct);
-	xt_unregister_target(&ip6t_error_target);
-	xt_unregister_target(&ip6t_standard_target);
-
+	xt_unregister_matches(ip6t_builtin_mt, ARRAY_SIZE(ip6t_builtin_mt));
+	xt_unregister_targets(ip6t_builtin_tg, ARRAY_SIZE(ip6t_builtin_tg));
 	unregister_pernet_subsys(&ip6_tables_net_ops);
 }
 
-- 
cgit v1.2.2


From d3e56c0ad8a2dab7ffd6179b64f00702149e5c9a Mon Sep 17 00:00:00 2001
From: Dan Carpenter <error27@gmail.com>
Date: Thu, 22 Apr 2010 02:50:10 -0700
Subject: wimax: checking ERR_PTR vs null

stch_skb is allocated with wimax_gnl_re_state_change_alloc().  That
function returns ERR_PTRs on failure and doesn't return NULL.

Signed-off-by: Dan Carpenter <error27@gmail.com>
---
 net/wimax/stack.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/wimax/stack.c b/net/wimax/stack.c
index 1ed65dbdab03..62b1a6662209 100644
--- a/net/wimax/stack.c
+++ b/net/wimax/stack.c
@@ -315,7 +315,7 @@ void __wimax_state_change(struct wimax_dev *wimax_dev, enum wimax_st new_state)
 		BUG();
 	}
 	__wimax_state_set(wimax_dev, new_state);
-	if (stch_skb)
+	if (!IS_ERR(stch_skb))
 		wimax_gnl_re_state_change_send(wimax_dev, stch_skb, header);
 out:
 	d_fnend(3, dev, "(wimax_dev %p new_state %u [old %u]) = void\n",
-- 
cgit v1.2.2


From b8d92c9c141ee3dc9b3537b1f0ffb4a54ea8d9b2 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes@sipsolutions.net>
Date: Tue, 11 May 2010 12:42:04 +0200
Subject: mac80211: don't process work item with wrong frame

When we process a frame, we currently just match it
to the work struct by the MAC addresses, and not by
the work type. This means that we can end up doing
the work for an association request item when (for
whatever reason) we receive another frame type, for
example a probe response. Processing the wrong type
of frame will lead to completely invalid data being
processed, and will lead to various problems like
thinking the association was successful even if the
AP never sent an assocation response.

Fix this by making each processing function check
that it is invoked for the right work struct type
only and continue processing otherwise (and drop
frames that we didn't expect).

This bug was uncovered during the debugging for
https://bugzilla.kernel.org/show_bug.cgi?id=15862
but doesn't seem to be the cause for any of the
various problems reported there.

Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/work.c | 27 +++++++++++++++++++++++++--
 1 file changed, 25 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/work.c b/net/mac80211/work.c
index 1e1ea3007b06..b0ba58589ca3 100644
--- a/net/mac80211/work.c
+++ b/net/mac80211/work.c
@@ -32,6 +32,7 @@
 #define IEEE80211_MAX_PROBE_TRIES 5
 
 enum work_action {
+	WORK_ACT_MISMATCH,
 	WORK_ACT_NONE,
 	WORK_ACT_TIMEOUT,
 	WORK_ACT_DONE,
@@ -574,7 +575,7 @@ ieee80211_rx_mgmt_auth(struct ieee80211_work *wk,
 	u16 auth_alg, auth_transaction, status_code;
 
 	if (wk->type != IEEE80211_WORK_AUTH)
-		return WORK_ACT_NONE;
+		return WORK_ACT_MISMATCH;
 
 	if (len < 24 + 6)
 		return WORK_ACT_NONE;
@@ -625,6 +626,9 @@ ieee80211_rx_mgmt_assoc_resp(struct ieee80211_work *wk,
 	struct ieee802_11_elems elems;
 	u8 *pos;
 
+	if (wk->type != IEEE80211_WORK_ASSOC)
+		return WORK_ACT_MISMATCH;
+
 	/*
 	 * AssocResp and ReassocResp have identical structure, so process both
 	 * of them in this function.
@@ -680,6 +684,12 @@ ieee80211_rx_mgmt_probe_resp(struct ieee80211_work *wk,
 
 	ASSERT_WORK_MTX(local);
 
+	if (wk->type != IEEE80211_WORK_DIRECT_PROBE)
+		return WORK_ACT_MISMATCH;
+
+	if (len < 24 + 12)
+		return WORK_ACT_NONE;
+
 	baselen = (u8 *) mgmt->u.probe_resp.variable - (u8 *) mgmt;
 	if (baselen > len)
 		return WORK_ACT_NONE;
@@ -694,7 +704,7 @@ static void ieee80211_work_rx_queued_mgmt(struct ieee80211_local *local,
 	struct ieee80211_rx_status *rx_status;
 	struct ieee80211_mgmt *mgmt;
 	struct ieee80211_work *wk;
-	enum work_action rma = WORK_ACT_NONE;
+	enum work_action rma;
 	u16 fc;
 
 	rx_status = (struct ieee80211_rx_status *) skb->cb;
@@ -741,7 +751,17 @@ static void ieee80211_work_rx_queued_mgmt(struct ieee80211_local *local,
 			break;
 		default:
 			WARN_ON(1);
+			rma = WORK_ACT_NONE;
 		}
+
+		/*
+		 * We've either received an unexpected frame, or we have
+		 * multiple work items and need to match the frame to the
+		 * right one.
+		 */
+		if (rma == WORK_ACT_MISMATCH)
+			continue;
+
 		/*
 		 * We've processed this frame for that work, so it can't
 		 * belong to another work struct.
@@ -751,6 +771,9 @@ static void ieee80211_work_rx_queued_mgmt(struct ieee80211_local *local,
 	}
 
 	switch (rma) {
+	case WORK_ACT_MISMATCH:
+		/* ignore this unmatched frame */
+		break;
 	case WORK_ACT_NONE:
 		break;
 	case WORK_ACT_DONE:
-- 
cgit v1.2.2


From 5ce6e438d5d9ed8ed775cd1e94f92002c8da2bad Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes@sipsolutions.net>
Date: Tue, 11 May 2010 16:20:57 +0200
Subject: mac80211: add offload channel switch support

This adds support for offloading the channel switch
operation to devices that support such, typically
by having specific firmware API for it. The reasons
for this could be that the firmware provides better
timing or that regulatory enforcement done by the
device requires special handling of CSAs.

In order to allow drivers to specify the timing to
the device, the new channel_switch callback will
pass through the received frame's mactime, where
available.

Signed-off-by: Wey-Yi Guy <wey-yi.w.guy@intel.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/driver-ops.h   | 11 +++++++++
 net/mac80211/driver-trace.h | 49 +++++++++++++++++++++++++++++++++++++++
 net/mac80211/ieee80211_i.h  |  3 ++-
 net/mac80211/mlme.c         | 56 +++++++++++++++++++++++++++++++++++++++++----
 4 files changed, 114 insertions(+), 5 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/driver-ops.h b/net/mac80211/driver-ops.h
index 997008e236ff..5662bb5190c3 100644
--- a/net/mac80211/driver-ops.h
+++ b/net/mac80211/driver-ops.h
@@ -373,4 +373,15 @@ static inline void drv_flush(struct ieee80211_local *local, bool drop)
 	if (local->ops->flush)
 		local->ops->flush(&local->hw, drop);
 }
+
+static inline void drv_channel_switch(struct ieee80211_local *local,
+				     struct ieee80211_channel_switch *ch_switch)
+{
+	might_sleep();
+
+	local->ops->channel_switch(&local->hw, ch_switch);
+
+	trace_drv_channel_switch(local, ch_switch);
+}
+
 #endif /* __MAC80211_DRIVER_OPS */
diff --git a/net/mac80211/driver-trace.h b/net/mac80211/driver-trace.h
index ce734b58d07a..6a9b2342a9c2 100644
--- a/net/mac80211/driver-trace.h
+++ b/net/mac80211/driver-trace.h
@@ -774,6 +774,34 @@ TRACE_EVENT(drv_flush,
 	)
 );
 
+TRACE_EVENT(drv_channel_switch,
+	TP_PROTO(struct ieee80211_local *local,
+		 struct ieee80211_channel_switch *ch_switch),
+
+	TP_ARGS(local, ch_switch),
+
+	TP_STRUCT__entry(
+		LOCAL_ENTRY
+		__field(u64, timestamp)
+		__field(bool, block_tx)
+		__field(u16, freq)
+		__field(u8, count)
+	),
+
+	TP_fast_assign(
+		LOCAL_ASSIGN;
+		__entry->timestamp = ch_switch->timestamp;
+		__entry->block_tx = ch_switch->block_tx;
+		__entry->freq = ch_switch->channel->center_freq;
+		__entry->count = ch_switch->count;
+	),
+
+	TP_printk(
+		LOCAL_PR_FMT " new freq:%u count:%d",
+		LOCAL_PR_ARG, __entry->freq, __entry->count
+	)
+);
+
 /*
  * Tracing for API calls that drivers call.
  */
@@ -992,6 +1020,27 @@ TRACE_EVENT(api_sta_block_awake,
 	)
 );
 
+TRACE_EVENT(api_chswitch_done,
+	TP_PROTO(struct ieee80211_sub_if_data *sdata, bool success),
+
+	TP_ARGS(sdata, success),
+
+	TP_STRUCT__entry(
+		VIF_ENTRY
+		__field(bool, success)
+	),
+
+	TP_fast_assign(
+		VIF_ASSIGN;
+		__entry->success = success;
+	),
+
+	TP_printk(
+		VIF_PR_FMT " success=%d",
+		VIF_PR_ARG, __entry->success
+	)
+);
+
 /*
  * Tracing for internal functions
  * (which may also be called in response to driver calls)
diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index 69e7f4131f46..1c8e24706685 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -999,7 +999,8 @@ int ieee80211_max_network_latency(struct notifier_block *nb,
 				  unsigned long data, void *dummy);
 void ieee80211_sta_process_chanswitch(struct ieee80211_sub_if_data *sdata,
 				      struct ieee80211_channel_sw_ie *sw_elem,
-				      struct ieee80211_bss *bss);
+				      struct ieee80211_bss *bss,
+				      u64 timestamp);
 void ieee80211_sta_quiesce(struct ieee80211_sub_if_data *sdata);
 void ieee80211_sta_restart(struct ieee80211_sub_if_data *sdata);
 
diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index 7bfb0ebaaf00..6b74489fb9c6 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -341,7 +341,11 @@ static void ieee80211_chswitch_work(struct work_struct *work)
 		goto out;
 
 	sdata->local->oper_channel = sdata->local->csa_channel;
-	ieee80211_hw_config(sdata->local, IEEE80211_CONF_CHANGE_CHANNEL);
+	if (!sdata->local->ops->channel_switch) {
+		/* call "hw_config" only if doing sw channel switch */
+		ieee80211_hw_config(sdata->local,
+			IEEE80211_CONF_CHANGE_CHANNEL);
+	}
 
 	/* XXX: shouldn't really modify cfg80211-owned data! */
 	ifmgd->associated->channel = sdata->local->oper_channel;
@@ -353,6 +357,29 @@ static void ieee80211_chswitch_work(struct work_struct *work)
 	mutex_unlock(&ifmgd->mtx);
 }
 
+void ieee80211_chswitch_done(struct ieee80211_vif *vif, bool success)
+{
+	struct ieee80211_sub_if_data *sdata;
+	struct ieee80211_if_managed *ifmgd;
+
+	sdata = vif_to_sdata(vif);
+	ifmgd = &sdata->u.mgd;
+
+	trace_api_chswitch_done(sdata, success);
+	if (!success) {
+		/*
+		 * If the channel switch was not successful, stay
+		 * around on the old channel. We currently lack
+		 * good handling of this situation, possibly we
+		 * should just drop the association.
+		 */
+		sdata->local->csa_channel = sdata->local->oper_channel;
+	}
+
+	ieee80211_queue_work(&sdata->local->hw, &ifmgd->chswitch_work);
+}
+EXPORT_SYMBOL(ieee80211_chswitch_done);
+
 static void ieee80211_chswitch_timer(unsigned long data)
 {
 	struct ieee80211_sub_if_data *sdata =
@@ -369,7 +396,8 @@ static void ieee80211_chswitch_timer(unsigned long data)
 
 void ieee80211_sta_process_chanswitch(struct ieee80211_sub_if_data *sdata,
 				      struct ieee80211_channel_sw_ie *sw_elem,
-				      struct ieee80211_bss *bss)
+				      struct ieee80211_bss *bss,
+				      u64 timestamp)
 {
 	struct cfg80211_bss *cbss =
 		container_of((void *)bss, struct cfg80211_bss, priv);
@@ -397,6 +425,24 @@ void ieee80211_sta_process_chanswitch(struct ieee80211_sub_if_data *sdata,
 
 	sdata->local->csa_channel = new_ch;
 
+	if (sdata->local->ops->channel_switch) {
+		/* use driver's channel switch callback */
+		struct ieee80211_channel_switch ch_switch;
+		memset(&ch_switch, 0, sizeof(ch_switch));
+		ch_switch.timestamp = timestamp;
+		if (sw_elem->mode) {
+			ch_switch.block_tx = true;
+			ieee80211_stop_queues_by_reason(&sdata->local->hw,
+					IEEE80211_QUEUE_STOP_REASON_CSA);
+		}
+		ch_switch.channel = new_ch;
+		ch_switch.count = sw_elem->count;
+		ifmgd->flags |= IEEE80211_STA_CSA_RECEIVED;
+		drv_channel_switch(sdata->local, &ch_switch);
+		return;
+	}
+
+	/* channel switch handled in software */
 	if (sw_elem->count <= 1) {
 		ieee80211_queue_work(&sdata->local->hw, &ifmgd->chswitch_work);
 	} else {
@@ -1316,7 +1362,8 @@ static void ieee80211_rx_bss_info(struct ieee80211_sub_if_data *sdata,
 							ETH_ALEN) == 0)) {
 		struct ieee80211_channel_sw_ie *sw_elem =
 			(struct ieee80211_channel_sw_ie *)elems->ch_switch_elem;
-		ieee80211_sta_process_chanswitch(sdata, sw_elem, bss);
+		ieee80211_sta_process_chanswitch(sdata, sw_elem,
+						 bss, rx_status->mactime);
 	}
 }
 
@@ -1648,7 +1695,8 @@ static void ieee80211_sta_rx_queued_mgmt(struct ieee80211_sub_if_data *sdata,
 
 			ieee80211_sta_process_chanswitch(sdata,
 					&mgmt->u.action.u.chan_switch.sw_elem,
-					(void *)ifmgd->associated->priv);
+					(void *)ifmgd->associated->priv,
+					rx_status->mactime);
 			break;
 		}
 		mutex_unlock(&ifmgd->mtx);
-- 
cgit v1.2.2


From 9feaddc77b2a2ee460f39d1420f9675db972659e Mon Sep 17 00:00:00 2001
From: Wey-Yi Guy <wey-yi.w.guy@intel.com>
Date: Wed, 5 May 2010 20:34:02 -0700
Subject: mac80211: check channel switch mode for future frames transmit

Check the mode in channel switch ie for either 0 or 1 on transmission.
A channel switch mode set to 1 means that the STA in a BSS to which the
frame containing the element is addressed shall transmit no further
frames 	within the BSS until the scheduled channel switch.

Signed-off-by: Wey-Yi Guy <wey-yi.w.guy@intel.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/mlme.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index 6b74489fb9c6..a444d03f6774 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -446,7 +446,8 @@ void ieee80211_sta_process_chanswitch(struct ieee80211_sub_if_data *sdata,
 	if (sw_elem->count <= 1) {
 		ieee80211_queue_work(&sdata->local->hw, &ifmgd->chswitch_work);
 	} else {
-		ieee80211_stop_queues_by_reason(&sdata->local->hw,
+		if (sw_elem->mode)
+			ieee80211_stop_queues_by_reason(&sdata->local->hw,
 					IEEE80211_QUEUE_STOP_REASON_CSA);
 		ifmgd->flags |= IEEE80211_STA_CSA_RECEIVED;
 		mod_timer(&ifmgd->chswitch_timer,
-- 
cgit v1.2.2


From 058897a4e93a6fc6d331e2ef591b2d6571431265 Mon Sep 17 00:00:00 2001
From: Abhijeet Kolekar <abhijeet.kolekar@intel.com>
Date: Tue, 11 May 2010 11:22:11 -0700
Subject: mac80211: fix paged defragmentation

Paged RX skb patch broke the defragmentation. We need to read hdr again
after linearization.

It fixes following bug
http://bugzilla.intellinuxwireless.org/show_bug.cgi?id=2194

Signed-off-by: Zhu, Yi <yi.zhu@intel.com>
Signed-off-by: Abhijeet Kolekar <abhijeet.kolekar@intel.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/rx.c | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'net')

diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c
index e4f325f68fd3..8fa99554f4e2 100644
--- a/net/mac80211/rx.c
+++ b/net/mac80211/rx.c
@@ -1252,6 +1252,12 @@ ieee80211_rx_h_defragment(struct ieee80211_rx_data *rx)
 	if (skb_linearize(rx->skb))
 		return RX_DROP_UNUSABLE;
 
+	/*
+	 *  skb_linearize() might change the skb->data and
+	 *  previously cached variables (in this case, hdr) need to
+	 *  be refreshed with the new data.
+	 */
+	hdr = (struct ieee80211_hdr *)rx->skb->data;
 	seq = (sc & IEEE80211_SCTL_SEQ) >> 4;
 
 	if (frag == 0) {
-- 
cgit v1.2.2


From 9b7ce2b76265b3bf133aa1919e5022302981dff6 Mon Sep 17 00:00:00 2001
From: Jan Engelhardt <jengelh@medozas.de>
Date: Wed, 12 May 2010 10:11:35 +0000
Subject: netfilter: xtables: add missing depends for xt_TEE

Aviod these link-time errors when IPV6=m, XT_TEE=y:

net/built-in.o: In function `tee_tg_route6':
xt_TEE.c:(.text+0x45ca5): undefined reference to `ip6_route_output'
net/built-in.o: In function `tee_tg6':
xt_TEE.c:(.text+0x45d79): undefined reference to `ip6_local_out'

Signed-off-by: Jan Engelhardt <jengelh@medozas.de>
Acked-by: Randy Dunlap <randy.dunlap@oracle.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/netfilter/Kconfig | 1 +
 1 file changed, 1 insertion(+)

(limited to 'net')

diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig
index 673a6c8f0e95..e223f47b8bae 100644
--- a/net/netfilter/Kconfig
+++ b/net/netfilter/Kconfig
@@ -505,6 +505,7 @@ config NETFILTER_XT_TARGET_RATEEST
 config NETFILTER_XT_TARGET_TEE
 	tristate '"TEE" - packet cloning to alternate destiantion'
 	depends on NETFILTER_ADVANCED
+	depends on (IPV6 || IPV6=n)
 	---help---
 	This option adds a "TEE" target with which a packet can be cloned and
 	this clone be rerouted to another nexthop.
-- 
cgit v1.2.2


From df4ef33716232077564024baf0e5f2c74a295dfd Mon Sep 17 00:00:00 2001
From: Allan Stephens <allan.stephens@windriver.com>
Date: Tue, 11 May 2010 14:30:03 +0000
Subject: tipc: Eliminate obsolete port's "congested_link" field

Eliminate a field of the TIPC port structure that is populated,
but never referenced.

Signed-off-by: Allan Stephens <allan.stephens@windriver.com>
Signed-off-by: Paul Gortmaker <paul.gortmaker@windriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/tipc/link.c | 2 --
 net/tipc/port.c | 1 -
 net/tipc/port.h | 2 --
 3 files changed, 5 deletions(-)

(limited to 'net')

diff --git a/net/tipc/link.c b/net/tipc/link.c
index c76e82e5f982..0b86f6aef3d8 100644
--- a/net/tipc/link.c
+++ b/net/tipc/link.c
@@ -561,7 +561,6 @@ static int link_schedule_port(struct link *l_ptr, u32 origport, u32 sz)
 			goto exit;
 		if (!list_empty(&p_ptr->wait_list))
 			goto exit;
-		p_ptr->congested_link = l_ptr;
 		p_ptr->publ.congested = 1;
 		p_ptr->waiting_pkts = 1 + ((sz - 1) / link_max_pkt(l_ptr));
 		list_add_tail(&p_ptr->wait_list, &l_ptr->waiting_ports);
@@ -592,7 +591,6 @@ void tipc_link_wakeup_ports(struct link *l_ptr, int all)
 		if (win <= 0)
 			break;
 		list_del_init(&p_ptr->wait_list);
-		p_ptr->congested_link = NULL;
 		spin_lock_bh(p_ptr->publ.lock);
 		p_ptr->publ.congested = 0;
 		p_ptr->wakeup(&p_ptr->publ);
diff --git a/net/tipc/port.c b/net/tipc/port.c
index e70d27ea6578..c703ecbe09d7 100644
--- a/net/tipc/port.c
+++ b/net/tipc/port.c
@@ -247,7 +247,6 @@ struct tipc_port *tipc_createport_raw(void *usr_handle,
 	p_ptr->sent = 1;
 	INIT_LIST_HEAD(&p_ptr->wait_list);
 	INIT_LIST_HEAD(&p_ptr->subscription.nodesub_list);
-	p_ptr->congested_link = NULL;
 	p_ptr->dispatcher = dispatcher;
 	p_ptr->wakeup = wakeup;
 	p_ptr->user_port = NULL;
diff --git a/net/tipc/port.h b/net/tipc/port.h
index ff31ee4a1dc3..8d1652aab298 100644
--- a/net/tipc/port.h
+++ b/net/tipc/port.h
@@ -75,7 +75,6 @@ struct user_port {
  * @wakeup: ptr to routine to call when port is no longer congested
  * @user_port: ptr to user port associated with port (if any)
  * @wait_list: adjacent ports in list of ports waiting on link congestion
- * @congested_link: ptr to congested link port is waiting on
  * @waiting_pkts:
  * @sent:
  * @acked:
@@ -95,7 +94,6 @@ struct port {
 	void (*wakeup)(struct tipc_port *);
 	struct user_port *user_port;
 	struct list_head wait_list;
-	struct link *congested_link;
 	u32 waiting_pkts;
 	u32 sent;
 	u32 acked;
-- 
cgit v1.2.2


From b82834e66aabb6e26c2b792a46d44bab346c46fb Mon Sep 17 00:00:00 2001
From: Allan Stephens <allan.stephens@windriver.com>
Date: Tue, 11 May 2010 14:30:04 +0000
Subject: tipc: Eliminate unused argument in print statement

Eliminate an argument in a print statement that has no corresponding
format specification.

Signed-off-by: Allan Stephens <allan.stephens@windriver.com>
Signed-off-by: Paul Gortmaker <paul.gortmaker@windriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/tipc/link.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

(limited to 'net')

diff --git a/net/tipc/link.c b/net/tipc/link.c
index 0b86f6aef3d8..c95038f42652 100644
--- a/net/tipc/link.c
+++ b/net/tipc/link.c
@@ -3328,9 +3328,7 @@ static void link_print(struct link *l_ptr, struct print_buf *buf,
 		if (l_ptr->next_out)
 			tipc_printf(buf, "%u..",
 				    msg_seqno(buf_msg(l_ptr->next_out)));
-		tipc_printf(buf, "%u]",
-			    msg_seqno(buf_msg
-				      (l_ptr->last_out)), l_ptr->out_queue_size);
+		tipc_printf(buf, "%u]", msg_seqno(buf_msg(l_ptr->last_out)));
 		if ((mod(msg_seqno(buf_msg(l_ptr->last_out)) -
 			 msg_seqno(buf_msg(l_ptr->first_out)))
 		     != (l_ptr->out_queue_size - 1)) ||
-- 
cgit v1.2.2


From 289464e4fc2ebdef20be5f6b58414136f75107e2 Mon Sep 17 00:00:00 2001
From: Allan Stephens <allan.stephens@windriver.com>
Date: Tue, 11 May 2010 14:30:05 +0000
Subject: tipc: Prune unused data structures from configuration service

Eliminate some unused data structures in the TIPC
configuration service that relate to the handling of link
subscriptions, which were not supported when TIPC 1.5 was
introduced.  If and when support for link subscriptions is
offered in TIPC, these elements may need to be re-introduced.

Signed-off-by: Allan Stephens <allan.stephens@windriver.com>
Signed-off-by: Paul Gortmaker <paul.gortmaker@windriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/tipc/config.c | 28 ++++++++++++++--------------
 1 file changed, 14 insertions(+), 14 deletions(-)

(limited to 'net')

diff --git a/net/tipc/config.c b/net/tipc/config.c
index ca3544d030c7..7370241412cb 100644
--- a/net/tipc/config.c
+++ b/net/tipc/config.c
@@ -56,9 +56,6 @@ struct subscr_data {
 struct manager {
 	u32 user_ref;
 	u32 port_ref;
-	u32 subscr_ref;
-	u32 link_subscriptions;
-	struct list_head link_subscribers;
 };
 
 static struct manager mng = { 0};
@@ -70,12 +67,6 @@ static int req_tlv_space;		/* request message TLV area size */
 static int rep_headroom;		/* reply message headroom to use */
 
 
-void tipc_cfg_link_event(u32 addr, char *name, int up)
-{
-	/* TIPC DOESN'T HANDLE LINK EVENT SUBSCRIPTIONS AT THE MOMENT */
-}
-
-
 struct sk_buff *tipc_cfg_reply_alloc(int payload_size)
 {
 	struct sk_buff *buf;
@@ -130,12 +121,24 @@ struct sk_buff *tipc_cfg_reply_string_type(u16 tlv_type, char *string)
 }
 
 
-
-
 #if 0
 
 /* Now obsolete code for handling commands not yet implemented the new way */
 
+/*
+ * Some of this code assumed that the manager structure contains two added
+ * fields:
+ *	u32 link_subscriptions;
+ *	struct list_head link_subscribers;
+ * which are currently not present.  These fields may need to be re-introduced
+ * if and when support for link subscriptions is added.
+ */
+
+void tipc_cfg_link_event(u32 addr, char *name, int up)
+{
+	/* TIPC DOESN'T HANDLE LINK EVENT SUBSCRIPTIONS AT THE MOMENT */
+}
+
 int tipc_cfg_cmd(const struct tipc_cmd_msg * msg,
 		 char *data,
 		 u32 sz,
@@ -667,9 +670,6 @@ int tipc_cfg_init(void)
 	struct tipc_name_seq seq;
 	int res;
 
-	memset(&mng, 0, sizeof(mng));
-	INIT_LIST_HEAD(&mng.link_subscribers);
-
 	res = tipc_attach(&mng.user_ref, NULL, NULL);
 	if (res)
 		goto failed;
-- 
cgit v1.2.2


From 9ccc2eb4e12a39bd8430952b76c56c6267018500 Mon Sep 17 00:00:00 2001
From: Allan Stephens <allan.stephens@windriver.com>
Date: Tue, 11 May 2010 14:30:06 +0000
Subject: tipc: Eliminate unnecessary initialization in native API send
 routines

Eliminate a couple of instances where TIPC's native API send routines
were doing pointless initialization of local variables.

Signed-off-by: Allan Stephens <allan.stephens@windriver.com>
Signed-off-by: Paul Gortmaker <paul.gortmaker@windriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/tipc/port.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/tipc/port.c b/net/tipc/port.c
index c703ecbe09d7..7641db667e0b 100644
--- a/net/tipc/port.c
+++ b/net/tipc/port.c
@@ -1452,7 +1452,7 @@ int tipc_forward2name(u32 ref,
 	struct port *p_ptr;
 	struct tipc_msg *msg;
 	u32 destnode = domain;
-	u32 destport = 0;
+	u32 destport;
 	int res;
 
 	p_ptr = tipc_port_deref(ref);
@@ -1524,7 +1524,7 @@ int tipc_forward_buf2name(u32 ref,
 	struct port *p_ptr;
 	struct tipc_msg *msg;
 	u32 destnode = domain;
-	u32 destport = 0;
+	u32 destport;
 	int res;
 
 	p_ptr = (struct port *)tipc_ref_deref(ref);
-- 
cgit v1.2.2


From 3aec9cc936217a30dbb45a9b6808a39571674e66 Mon Sep 17 00:00:00 2001
From: Allan Stephens <allan.stephens@windriver.com>
Date: Tue, 11 May 2010 14:30:07 +0000
Subject: tipc: Rename "multicast-link" to "broadcast-link"

Make a cosmetic change to the name displayed for the broadcast link,
to better reflect its true nature. Since TIPC utilizes this link to
distribute name table information, in addition to multicast messages
sent by user applications, the prior name "multicast-link" is
no longer appropriate.

Signed-off-by: Allan Stephens <allan.stephens@windriver.com>
Signed-off-by: Paul Gortmaker <paul.gortmaker@windriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/tipc/bcast.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/tipc/bcast.c b/net/tipc/bcast.c
index 90a051912c03..a18f26deb659 100644
--- a/net/tipc/bcast.c
+++ b/net/tipc/bcast.c
@@ -119,7 +119,7 @@ static struct bclink *bclink = NULL;
 static struct link *bcl = NULL;
 static DEFINE_SPINLOCK(bc_lock);
 
-const char tipc_bclink_name[] = "multicast-link";
+const char tipc_bclink_name[] = "broadcast-link";
 
 
 static u32 buf_seqno(struct sk_buff *buf)
-- 
cgit v1.2.2


From 107e7be628821dcb78c43adce0331e8ddb40eabd Mon Sep 17 00:00:00 2001
From: Allan Stephens <allan.stephens@windriver.com>
Date: Tue, 11 May 2010 14:30:08 +0000
Subject: tipc: Add support for "-s" configuration option

Provide initial support for displaying overall TIPC status/statistics
information at runtime.  Currently, only version info for the TIPC
kernel module is displayed.

Signed-off-by: Allan Stephens <allan.stephens@windriver.com>
Signed-off-by: Paul Gortmaker <paul.gortmaker@windriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/tipc/config.c | 40 +++++++++++++++++++++++++++++++++++++++-
 net/tipc/core.c   |  2 --
 net/tipc/core.h   |  3 +++
 3 files changed, 42 insertions(+), 3 deletions(-)

(limited to 'net')

diff --git a/net/tipc/config.c b/net/tipc/config.c
index 7370241412cb..961d1b097146 100644
--- a/net/tipc/config.c
+++ b/net/tipc/config.c
@@ -246,13 +246,48 @@ static void cfg_cmd_event(struct tipc_cmd_msg *msg,
 	default:
 		rv = tipc_cfg_cmd(msg, data, sz, (u32 *)&msg_sect[1].iov_len, orig);
 	}
-	exit:
+exit:
 	rmsg.result_len = htonl(msg_sect[1].iov_len);
 	rmsg.retval = htonl(rv);
 	tipc_cfg_respond(msg_sect, 2u, orig);
 }
 #endif
 
+#define MAX_STATS_INFO 2000
+
+static struct sk_buff *tipc_show_stats(void)
+{
+	struct sk_buff *buf;
+	struct tlv_desc *rep_tlv;
+	struct print_buf pb;
+	int str_len;
+	u32 value;
+
+	if (!TLV_CHECK(req_tlv_area, req_tlv_space, TIPC_TLV_UNSIGNED))
+		return tipc_cfg_reply_error_string(TIPC_CFG_TLV_ERROR);
+
+	value = ntohl(*(u32 *)TLV_DATA(req_tlv_area));
+	if (value != 0)
+		return tipc_cfg_reply_error_string("unsupported argument");
+
+	buf = tipc_cfg_reply_alloc(TLV_SPACE(MAX_STATS_INFO));
+	if (buf == NULL)
+		return NULL;
+
+	rep_tlv = (struct tlv_desc *)buf->data;
+	tipc_printbuf_init(&pb, (char *)TLV_DATA(rep_tlv), MAX_STATS_INFO);
+
+	tipc_printf(&pb, "TIPC version " TIPC_MOD_VER "\n");
+
+	/* Use additional tipc_printf()'s to return more info ... */
+
+	str_len = tipc_printbuf_validate(&pb);
+	skb_put(buf, TLV_SPACE(str_len));
+	TLV_SET(rep_tlv, TIPC_TLV_ULTRA_STRING, NULL, str_len);
+
+	return buf;
+}
+
 static struct sk_buff *cfg_enable_bearer(void)
 {
 	struct tipc_bearer_config *args;
@@ -536,6 +571,9 @@ struct sk_buff *tipc_cfg_do_cmd(u32 orig_node, u16 cmd, const void *request_area
 	case TIPC_CMD_DUMP_LOG:
 		rep_tlv_buf = tipc_log_dump();
 		break;
+	case TIPC_CMD_SHOW_STATS:
+		rep_tlv_buf = tipc_show_stats();
+		break;
 	case TIPC_CMD_SET_LINK_TOL:
 	case TIPC_CMD_SET_LINK_PRI:
 	case TIPC_CMD_SET_LINK_WINDOW:
diff --git a/net/tipc/core.c b/net/tipc/core.c
index 4e84c8431f32..b47d1842a970 100644
--- a/net/tipc/core.c
+++ b/net/tipc/core.c
@@ -49,8 +49,6 @@
 #include "config.h"
 
 
-#define TIPC_MOD_VER "2.0.0"
-
 #ifndef CONFIG_TIPC_ZONES
 #define CONFIG_TIPC_ZONES 3
 #endif
diff --git a/net/tipc/core.h b/net/tipc/core.h
index c58a1d16563a..1e149f55f3e2 100644
--- a/net/tipc/core.h
+++ b/net/tipc/core.h
@@ -59,6 +59,9 @@
 #include <linux/slab.h>
 #include <linux/vmalloc.h>
 
+
+#define TIPC_MOD_VER "2.0.0"
+
 /*
  * TIPC sanity test macros
  */
-- 
cgit v1.2.2


From 15e979da7c9ddddd55d2eb81e962dbb2aac51ad1 Mon Sep 17 00:00:00 2001
From: Allan Stephens <allan.stephens@windriver.com>
Date: Tue, 11 May 2010 14:30:10 +0000
Subject: tipc: remove abstraction for link_max_pkt

This is just a straight return of a field; there is no
value in the abstraction of hiding it behind a function.

Signed-off-by: Allan Stephens <allan.stephens@windriver.com>
Signed-off-by: Paul Gortmaker <paul.gortmaker@windriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/tipc/link.c | 27 +++++++++++----------------
 1 file changed, 11 insertions(+), 16 deletions(-)

(limited to 'net')

diff --git a/net/tipc/link.c b/net/tipc/link.c
index c95038f42652..441b26a57850 100644
--- a/net/tipc/link.c
+++ b/net/tipc/link.c
@@ -232,11 +232,6 @@ static int link_congested(struct link *l_ptr)
 	return (l_ptr->out_queue_size >= l_ptr->queue_limit[0]);
 }
 
-static u32 link_max_pkt(struct link *l_ptr)
-{
-	return l_ptr->max_pkt;
-}
-
 static void link_init_max_pkt(struct link *l_ptr)
 {
 	u32 max_pkt;
@@ -562,7 +557,7 @@ static int link_schedule_port(struct link *l_ptr, u32 origport, u32 sz)
 		if (!list_empty(&p_ptr->wait_list))
 			goto exit;
 		p_ptr->publ.congested = 1;
-		p_ptr->waiting_pkts = 1 + ((sz - 1) / link_max_pkt(l_ptr));
+		p_ptr->waiting_pkts = 1 + ((sz - 1) / l_ptr->max_pkt);
 		list_add_tail(&p_ptr->wait_list, &l_ptr->waiting_ports);
 		l_ptr->stats.link_congs++;
 exit:
@@ -1015,7 +1010,7 @@ static int link_bundle_buf(struct link *l_ptr,
 		return 0;
 	if (skb_tailroom(bundler) < (pad + size))
 		return 0;
-	if (link_max_pkt(l_ptr) < (to_pos + size))
+	if (l_ptr->max_pkt < (to_pos + size))
 		return 0;
 
 	skb_put(bundler, pad + size);
@@ -1062,7 +1057,7 @@ int tipc_link_send_buf(struct link *l_ptr, struct sk_buff *buf)
 	u32 queue_size = l_ptr->out_queue_size;
 	u32 imp = msg_tot_importance(msg);
 	u32 queue_limit = l_ptr->queue_limit[imp];
-	u32 max_packet = link_max_pkt(l_ptr);
+	u32 max_packet = l_ptr->max_pkt;
 
 	msg_set_prevnode(msg, tipc_own_addr);	/* If routed message */
 
@@ -1193,7 +1188,7 @@ static int link_send_buf_fast(struct link *l_ptr, struct sk_buff *buf,
 	int res = msg_data_sz(msg);
 
 	if (likely(!link_congested(l_ptr))) {
-		if (likely(msg_size(msg) <= link_max_pkt(l_ptr))) {
+		if (likely(msg_size(msg) <= l_ptr->max_pkt)) {
 			if (likely(list_empty(&l_ptr->b_ptr->cong_links))) {
 				link_add_to_outqueue(l_ptr, buf, msg);
 				if (likely(tipc_bearer_send(l_ptr->b_ptr, buf,
@@ -1210,7 +1205,7 @@ static int link_send_buf_fast(struct link *l_ptr, struct sk_buff *buf,
 			}
 		}
 		else
-			*used_max_pkt = link_max_pkt(l_ptr);
+			*used_max_pkt = l_ptr->max_pkt;
 	}
 	return tipc_link_send_buf(l_ptr, buf);  /* All other cases */
 }
@@ -1317,7 +1312,7 @@ exit:
 			 * then re-try fast path or fragment the message
 			 */
 
-			sender->publ.max_pkt = link_max_pkt(l_ptr);
+			sender->publ.max_pkt = l_ptr->max_pkt;
 			tipc_node_unlock(node);
 			read_unlock_bh(&tipc_net_lock);
 
@@ -1480,8 +1475,8 @@ error:
 			tipc_node_unlock(node);
 			goto reject;
 		}
-		if (link_max_pkt(l_ptr) < max_pkt) {
-			sender->publ.max_pkt = link_max_pkt(l_ptr);
+		if (l_ptr->max_pkt < max_pkt) {
+			sender->publ.max_pkt = l_ptr->max_pkt;
 			tipc_node_unlock(node);
 			for (; buf_chain; buf_chain = buf) {
 				buf = buf_chain->next;
@@ -2679,7 +2674,7 @@ int tipc_link_send_long_buf(struct link *l_ptr, struct sk_buff *buf)
 	u32 dsz = msg_data_sz(inmsg);
 	unchar *crs = buf->data;
 	u32 rest = insize;
-	u32 pack_sz = link_max_pkt(l_ptr);
+	u32 pack_sz = l_ptr->max_pkt;
 	u32 fragm_sz = pack_sz - INT_H_SIZE;
 	u32 fragm_no = 1;
 	u32 destaddr;
@@ -3125,7 +3120,7 @@ static int tipc_link_stats(const char *name, char *buf, const u32 buf_size)
 	tipc_printf(&pb, "Link <%s>\n"
 			 "  %s  MTU:%u  Priority:%u  Tolerance:%u ms"
 			 "  Window:%u packets\n",
-		    l_ptr->name, status, link_max_pkt(l_ptr),
+		    l_ptr->name, status, l_ptr->max_pkt,
 		    l_ptr->priority, l_ptr->tolerance, l_ptr->queue_limit[0]);
 	tipc_printf(&pb, "  RX packets:%u fragments:%u/%u bundles:%u/%u\n",
 		    l_ptr->next_in_no - l_ptr->stats.recv_info,
@@ -3270,7 +3265,7 @@ u32 tipc_link_get_max_pkt(u32 dest, u32 selector)
 		tipc_node_lock(n_ptr);
 		l_ptr = n_ptr->active_links[selector & 1];
 		if (l_ptr)
-			res = link_max_pkt(l_ptr);
+			res = l_ptr->max_pkt;
 		tipc_node_unlock(n_ptr);
 	}
 	read_unlock_bh(&tipc_net_lock);
-- 
cgit v1.2.2


From 01fee256a675f6492fc6945bbb9b59640d8705d4 Mon Sep 17 00:00:00 2001
From: Allan Stephens <allan.stephens@windriver.com>
Date: Tue, 11 May 2010 14:30:11 +0000
Subject: tipc: Relocate trivial link status functions to header file

Rather than live in link.c where they can only be used in that file alone,
these helper routines are better served by being in link.h

Relocated are the following:

	link_working_working
	link_working_unknown
	link_reset_unknown
	link_reset_reset
	link_blocked
	link_congested

Signed-off-by: Allan Stephens <allan.stephens@windriver.com>
Signed-off-by: Paul Gortmaker <paul.gortmaker@windriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/tipc/discover.c |  2 +-
 net/tipc/link.c     | 30 ------------------------------
 net/tipc/link.h     | 35 +++++++++++++++++++++++++++++++++++
 3 files changed, 36 insertions(+), 31 deletions(-)

(limited to 'net')

diff --git a/net/tipc/discover.c b/net/tipc/discover.c
index 74b7d1e28aec..ce1390a0cd00 100644
--- a/net/tipc/discover.c
+++ b/net/tipc/discover.c
@@ -224,7 +224,7 @@ void tipc_disc_recv_msg(struct sk_buff *buf, struct bearer *b_ptr)
 			memcpy(addr, &media_addr, sizeof(*addr));
 			tipc_link_reset(link);
 		}
-		link_fully_up = (link->state == WORKING_WORKING);
+		link_fully_up = link_working_working(link);
 		spin_unlock_bh(&n_ptr->lock);
 		if ((type == DSC_RESP_MSG) || link_fully_up)
 			return;
diff --git a/net/tipc/link.c b/net/tipc/link.c
index 441b26a57850..e8320bf78d5a 100644
--- a/net/tipc/link.c
+++ b/net/tipc/link.c
@@ -202,36 +202,6 @@ static unsigned int align(unsigned int i)
 	return (i + 3) & ~3u;
 }
 
-static int link_working_working(struct link *l_ptr)
-{
-	return (l_ptr->state == WORKING_WORKING);
-}
-
-static int link_working_unknown(struct link *l_ptr)
-{
-	return (l_ptr->state == WORKING_UNKNOWN);
-}
-
-static int link_reset_unknown(struct link *l_ptr)
-{
-	return (l_ptr->state == RESET_UNKNOWN);
-}
-
-static int link_reset_reset(struct link *l_ptr)
-{
-	return (l_ptr->state == RESET_RESET);
-}
-
-static int link_blocked(struct link *l_ptr)
-{
-	return (l_ptr->exp_msg_count || l_ptr->blocked);
-}
-
-static int link_congested(struct link *l_ptr)
-{
-	return (l_ptr->out_queue_size >= l_ptr->queue_limit[0]);
-}
-
 static void link_init_max_pkt(struct link *l_ptr)
 {
 	u32 max_pkt;
diff --git a/net/tipc/link.h b/net/tipc/link.h
index 6a51e38ad25c..2e5385c47d30 100644
--- a/net/tipc/link.h
+++ b/net/tipc/link.h
@@ -292,4 +292,39 @@ static inline u32 lesser(u32 left, u32 right)
 	return less_eq(left, right) ? left : right;
 }
 
+
+/*
+ * Link status checking routines
+ */
+
+static inline int link_working_working(struct link *l_ptr)
+{
+	return (l_ptr->state == WORKING_WORKING);
+}
+
+static inline int link_working_unknown(struct link *l_ptr)
+{
+	return (l_ptr->state == WORKING_UNKNOWN);
+}
+
+static inline int link_reset_unknown(struct link *l_ptr)
+{
+	return (l_ptr->state == RESET_UNKNOWN);
+}
+
+static inline int link_reset_reset(struct link *l_ptr)
+{
+	return (l_ptr->state == RESET_RESET);
+}
+
+static inline int link_blocked(struct link *l_ptr)
+{
+	return (l_ptr->exp_msg_count || l_ptr->blocked);
+}
+
+static inline int link_congested(struct link *l_ptr)
+{
+	return (l_ptr->out_queue_size >= l_ptr->queue_limit[0]);
+}
+
 #endif
-- 
cgit v1.2.2


From c68ca7b72017f8f52e7aed0d2a6ecfaede133b6b Mon Sep 17 00:00:00 2001
From: Allan Stephens <allan.stephens@windriver.com>
Date: Tue, 11 May 2010 14:30:12 +0000
Subject: tipc: add tipc_ prefix to fcns targeted for un-inlining

These functions have enough code in them such that they
seem like sensible targets for un-inlining.  Prior to doing
that, this adds the tipc_ prefix to the functions, so that
in the event of a panic dump or similar, the subsystem from
which the functions come from is immediately clear.

Signed-off-by: Allan Stephens <allan.stephens@windriver.com>
Signed-off-by: Paul Gortmaker <paul.gortmaker@windriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/tipc/addr.h       |  8 ++++----
 net/tipc/bcast.c      |  2 +-
 net/tipc/bcast.h      | 12 ++++++------
 net/tipc/bearer.c     |  4 ++--
 net/tipc/cluster.c    |  2 +-
 net/tipc/discover.c   |  6 +++---
 net/tipc/link.c       | 20 ++++++++++----------
 net/tipc/msg.h        | 14 +++++++-------
 net/tipc/name_distr.c |  2 +-
 net/tipc/name_table.c |  2 +-
 net/tipc/net.c        |  4 ++--
 net/tipc/node.c       | 12 ++++++------
 net/tipc/port.c       | 22 +++++++++++-----------
 13 files changed, 55 insertions(+), 55 deletions(-)

(limited to 'net')

diff --git a/net/tipc/addr.h b/net/tipc/addr.h
index 3ba67e6ce03e..4d4aee0e4232 100644
--- a/net/tipc/addr.h
+++ b/net/tipc/addr.h
@@ -67,7 +67,7 @@ static inline int may_route(u32 addr)
 	return(addr ^ tipc_own_addr) >> 11;
 }
 
-static inline int in_scope(u32 domain, u32 addr)
+static inline int tipc_in_scope(u32 domain, u32 addr)
 {
 	if (!domain || (domain == addr))
 		return 1;
@@ -79,10 +79,10 @@ static inline int in_scope(u32 domain, u32 addr)
 }
 
 /**
- * addr_scope - convert message lookup domain to equivalent 2-bit scope value
+ * tipc_addr_scope - convert message lookup domain to a 2-bit scope value
  */
 
-static inline int addr_scope(u32 domain)
+static inline int tipc_addr_scope(u32 domain)
 {
 	if (likely(!domain))
 		return TIPC_ZONE_SCOPE;
@@ -110,7 +110,7 @@ static inline int addr_domain(int sc)
 	return tipc_addr(tipc_zone(tipc_own_addr), 0, 0);
 }
 
-static inline char *addr_string_fill(char *string, u32 addr)
+static inline char *tipc_addr_string_fill(char *string, u32 addr)
 {
 	snprintf(string, 16, "<%u.%u.%u>",
 		 tipc_zone(addr), tipc_cluster(addr), tipc_node(addr));
diff --git a/net/tipc/bcast.c b/net/tipc/bcast.c
index a18f26deb659..a8f22e78c3f3 100644
--- a/net/tipc/bcast.c
+++ b/net/tipc/bcast.c
@@ -275,7 +275,7 @@ static void bclink_send_nack(struct tipc_node *n_ptr)
 	buf = buf_acquire(INT_H_SIZE);
 	if (buf) {
 		msg = buf_msg(buf);
-		msg_init(msg, BCAST_PROTOCOL, STATE_MSG,
+		tipc_msg_init(msg, BCAST_PROTOCOL, STATE_MSG,
 			 INT_H_SIZE, n_ptr->addr);
 		msg_set_mc_netid(msg, tipc_net_id);
 		msg_set_bcast_ack(msg, mod(n_ptr->bclink.last_in));
diff --git a/net/tipc/bcast.h b/net/tipc/bcast.h
index 4c1771e95c99..2b1c4a755dfa 100644
--- a/net/tipc/bcast.h
+++ b/net/tipc/bcast.h
@@ -74,7 +74,7 @@ extern const char tipc_bclink_name[];
 
 
 /**
- * nmap_add - add a node to a node map
+ * tipc_nmap_add - add a node to a node map
  */
 
 static inline void tipc_nmap_add(struct tipc_node_map *nm_ptr, u32 node)
@@ -90,7 +90,7 @@ static inline void tipc_nmap_add(struct tipc_node_map *nm_ptr, u32 node)
 }
 
 /**
- * nmap_remove - remove a node from a node map
+ * tipc_nmap_remove - remove a node from a node map
  */
 
 static inline void tipc_nmap_remove(struct tipc_node_map *nm_ptr, u32 node)
@@ -106,7 +106,7 @@ static inline void tipc_nmap_remove(struct tipc_node_map *nm_ptr, u32 node)
 }
 
 /**
- * nmap_equal - test for equality of node maps
+ * tipc_nmap_equal - test for equality of node maps
  */
 
 static inline int tipc_nmap_equal(struct tipc_node_map *nm_a, struct tipc_node_map *nm_b)
@@ -115,7 +115,7 @@ static inline int tipc_nmap_equal(struct tipc_node_map *nm_a, struct tipc_node_m
 }
 
 /**
- * nmap_diff - find differences between node maps
+ * tipc_nmap_diff - find differences between node maps
  * @nm_a: input node map A
  * @nm_b: input node map B
  * @nm_diff: output node map A-B (i.e. nodes of A that are not in B)
@@ -143,7 +143,7 @@ static inline void tipc_nmap_diff(struct tipc_node_map *nm_a, struct tipc_node_m
 }
 
 /**
- * port_list_add - add a port to a port list, ensuring no duplicates
+ * tipc_port_list_add - add a port to a port list, ensuring no duplicates
  */
 
 static inline void tipc_port_list_add(struct port_list *pl_ptr, u32 port)
@@ -176,7 +176,7 @@ static inline void tipc_port_list_add(struct port_list *pl_ptr, u32 port)
 }
 
 /**
- * port_list_free - free dynamically created entries in port_list chain
+ * tipc_port_list_free - free dynamically created entries in port_list chain
  *
  * Note: First item is on stack, so it doesn't need to be released
  */
diff --git a/net/tipc/bearer.c b/net/tipc/bearer.c
index 78091375ca12..ccec12f0ccc9 100644
--- a/net/tipc/bearer.c
+++ b/net/tipc/bearer.c
@@ -493,7 +493,7 @@ int tipc_enable_bearer(const char *name, u32 bcast_scope, u32 priority)
 		return -EINVAL;
 	}
 	if (!tipc_addr_domain_valid(bcast_scope) ||
-	    !in_scope(bcast_scope, tipc_own_addr)) {
+	    !tipc_in_scope(bcast_scope, tipc_own_addr)) {
 		warn("Bearer <%s> rejected, illegal broadcast scope\n", name);
 		return -EINVAL;
 	}
@@ -571,7 +571,7 @@ restart:
 	spin_lock_init(&b_ptr->publ.lock);
 	write_unlock_bh(&tipc_net_lock);
 	info("Enabled bearer <%s>, discovery domain %s, priority %u\n",
-	     name, addr_string_fill(addr_string, bcast_scope), priority);
+	     name, tipc_addr_string_fill(addr_string, bcast_scope), priority);
 	return 0;
 failed:
 	write_unlock_bh(&tipc_net_lock);
diff --git a/net/tipc/cluster.c b/net/tipc/cluster.c
index a7eac00cd363..e68f705381bc 100644
--- a/net/tipc/cluster.c
+++ b/net/tipc/cluster.c
@@ -238,7 +238,7 @@ static struct sk_buff *tipc_cltr_prepare_routing_msg(u32 data_size, u32 dest)
 	if (buf) {
 		msg = buf_msg(buf);
 		memset((char *)msg, 0, size);
-		msg_init(msg, ROUTE_DISTRIBUTOR, 0, INT_H_SIZE, dest);
+		tipc_msg_init(msg, ROUTE_DISTRIBUTOR, 0, INT_H_SIZE, dest);
 	}
 	return buf;
 }
diff --git a/net/tipc/discover.c b/net/tipc/discover.c
index ce1390a0cd00..fc1fcf5e6b53 100644
--- a/net/tipc/discover.c
+++ b/net/tipc/discover.c
@@ -120,7 +120,7 @@ static struct sk_buff *tipc_disc_init_msg(u32 type,
 
 	if (buf) {
 		msg = buf_msg(buf);
-		msg_init(msg, LINK_CONFIG, type, DSC_H_SIZE, dest_domain);
+		tipc_msg_init(msg, LINK_CONFIG, type, DSC_H_SIZE, dest_domain);
 		msg_set_non_seq(msg, 1);
 		msg_set_req_links(msg, req_links);
 		msg_set_dest_domain(msg, dest_domain);
@@ -144,7 +144,7 @@ static void disc_dupl_alert(struct bearer *b_ptr, u32 node_addr,
 	char media_addr_str[64];
 	struct print_buf pb;
 
-	addr_string_fill(node_addr_str, node_addr);
+	tipc_addr_string_fill(node_addr_str, node_addr);
 	tipc_printbuf_init(&pb, media_addr_str, sizeof(media_addr_str));
 	tipc_media_addr_printf(&pb, media_addr);
 	tipc_printbuf_validate(&pb);
@@ -183,7 +183,7 @@ void tipc_disc_recv_msg(struct sk_buff *buf, struct bearer *b_ptr)
 			disc_dupl_alert(b_ptr, tipc_own_addr, &media_addr);
 		return;
 	}
-	if (!in_scope(dest, tipc_own_addr))
+	if (!tipc_in_scope(dest, tipc_own_addr))
 		return;
 	if (is_slave(tipc_own_addr) && is_slave(orig))
 		return;
diff --git a/net/tipc/link.c b/net/tipc/link.c
index e8320bf78d5a..a3616b99529b 100644
--- a/net/tipc/link.c
+++ b/net/tipc/link.c
@@ -433,7 +433,7 @@ struct link *tipc_link_create(struct bearer *b_ptr, const u32 peer,
 
 	l_ptr->pmsg = (struct tipc_msg *)&l_ptr->proto_msg;
 	msg = l_ptr->pmsg;
-	msg_init(msg, LINK_PROTOCOL, RESET_MSG, INT_H_SIZE, l_ptr->addr);
+	tipc_msg_init(msg, LINK_PROTOCOL, RESET_MSG, INT_H_SIZE, l_ptr->addr);
 	msg_set_size(msg, sizeof(l_ptr->proto_msg));
 	msg_set_session(msg, (tipc_random & 0xffff));
 	msg_set_bearer_id(msg, b_ptr->identity);
@@ -1025,7 +1025,7 @@ int tipc_link_send_buf(struct link *l_ptr, struct sk_buff *buf)
 	u32 size = msg_size(msg);
 	u32 dsz = msg_data_sz(msg);
 	u32 queue_size = l_ptr->out_queue_size;
-	u32 imp = msg_tot_importance(msg);
+	u32 imp = tipc_msg_tot_importance(msg);
 	u32 queue_limit = l_ptr->queue_limit[imp];
 	u32 max_packet = l_ptr->max_pkt;
 
@@ -1090,7 +1090,7 @@ int tipc_link_send_buf(struct link *l_ptr, struct sk_buff *buf)
 			struct tipc_msg bundler_hdr;
 
 			if (bundler) {
-				msg_init(&bundler_hdr, MSG_BUNDLER, OPEN_MSG,
+				tipc_msg_init(&bundler_hdr, MSG_BUNDLER, OPEN_MSG,
 					 INT_H_SIZE, l_ptr->addr);
 				skb_copy_to_linear_data(bundler, &bundler_hdr,
 							INT_H_SIZE);
@@ -1243,7 +1243,7 @@ again:
 	 * (Must not hold any locks while building message.)
 	 */
 
-	res = msg_build(hdr, msg_sect, num_sect, sender->publ.max_pkt,
+	res = tipc_msg_build(hdr, msg_sect, num_sect, sender->publ.max_pkt,
 			!sender->user_port, &buf);
 
 	read_lock_bh(&tipc_net_lock);
@@ -1354,7 +1354,7 @@ again:
 	/* Prepare reusable fragment header: */
 
 	msg_dbg(hdr, ">FRAGMENTING>");
-	msg_init(&fragm_hdr, MSG_FRAGMENTER, FIRST_FRAGMENT,
+	tipc_msg_init(&fragm_hdr, MSG_FRAGMENTER, FIRST_FRAGMENT,
 		 INT_H_SIZE, msg_destnode(hdr));
 	msg_set_link_selector(&fragm_hdr, sender->publ.ref);
 	msg_set_size(&fragm_hdr, max_pkt);
@@ -1613,7 +1613,7 @@ static void link_reset_all(unsigned long addr)
 	tipc_node_lock(n_ptr);
 
 	warn("Resetting all links to %s\n",
-	     addr_string_fill(addr_string, n_ptr->addr));
+	     tipc_addr_string_fill(addr_string, n_ptr->addr));
 
 	for (i = 0; i < MAX_BEARERS; i++) {
 		if (n_ptr->links[i]) {
@@ -1655,7 +1655,7 @@ static void link_retransmit_failure(struct link *l_ptr, struct sk_buff *buf)
 		n_ptr = l_ptr->owner->next;
 		tipc_node_lock(n_ptr);
 
-		addr_string_fill(addr_string, n_ptr->addr);
+		tipc_addr_string_fill(addr_string, n_ptr->addr);
 		tipc_printf(TIPC_OUTPUT, "Multicast link info for %s\n", addr_string);
 		tipc_printf(TIPC_OUTPUT, "Supported: %d,  ", n_ptr->bclink.supported);
 		tipc_printf(TIPC_OUTPUT, "Acked: %u\n", n_ptr->bclink.acked);
@@ -2398,7 +2398,7 @@ void tipc_link_changeover(struct link *l_ptr)
 		return;
 	}
 
-	msg_init(&tunnel_hdr, CHANGEOVER_PROTOCOL,
+	tipc_msg_init(&tunnel_hdr, CHANGEOVER_PROTOCOL,
 		 ORIGINAL_MSG, INT_H_SIZE, l_ptr->addr);
 	msg_set_bearer_id(&tunnel_hdr, l_ptr->peer_bearer_id);
 	msg_set_msgcnt(&tunnel_hdr, msgcount);
@@ -2453,7 +2453,7 @@ void tipc_link_send_duplicate(struct link *l_ptr, struct link *tunnel)
 	struct sk_buff *iter;
 	struct tipc_msg tunnel_hdr;
 
-	msg_init(&tunnel_hdr, CHANGEOVER_PROTOCOL,
+	tipc_msg_init(&tunnel_hdr, CHANGEOVER_PROTOCOL,
 		 DUPLICATE_MSG, INT_H_SIZE, l_ptr->addr);
 	msg_set_msgcnt(&tunnel_hdr, l_ptr->out_queue_size);
 	msg_set_bearer_id(&tunnel_hdr, l_ptr->peer_bearer_id);
@@ -2659,7 +2659,7 @@ int tipc_link_send_long_buf(struct link *l_ptr, struct sk_buff *buf)
 
 	/* Prepare reusable fragment header: */
 
-	msg_init(&fragm_hdr, MSG_FRAGMENTER, FIRST_FRAGMENT,
+	tipc_msg_init(&fragm_hdr, MSG_FRAGMENTER, FIRST_FRAGMENT,
 		 INT_H_SIZE, destaddr);
 	msg_set_link_selector(&fragm_hdr, msg_link_selector(inmsg));
 	msg_set_long_msgno(&fragm_hdr, mod(l_ptr->long_msg_seq_no++));
diff --git a/net/tipc/msg.h b/net/tipc/msg.h
index 7ee6ae238147..fbcd46f24a9d 100644
--- a/net/tipc/msg.h
+++ b/net/tipc/msg.h
@@ -708,7 +708,7 @@ static inline void msg_set_dataoctet(struct tipc_msg *m, u32 pos)
 #define DSC_REQ_MSG          0
 #define DSC_RESP_MSG         1
 
-static inline u32 msg_tot_importance(struct tipc_msg *m)
+static inline u32 tipc_msg_tot_importance(struct tipc_msg *m)
 {
 	if (likely(msg_isdata(m))) {
 		if (likely(msg_orignode(m) == tipc_own_addr))
@@ -722,7 +722,7 @@ static inline u32 msg_tot_importance(struct tipc_msg *m)
 }
 
 
-static inline void msg_init(struct tipc_msg *m, u32 user, u32 type,
+static inline void tipc_msg_init(struct tipc_msg *m, u32 user, u32 type,
 			    u32 hsize, u32 destnode)
 {
 	memset(m, 0, hsize);
@@ -739,10 +739,10 @@ static inline void msg_init(struct tipc_msg *m, u32 user, u32 type,
 }
 
 /**
- * msg_calc_data_size - determine total data size for message
+ * tipc_msg_calc_data_size - determine total data size for message
  */
 
-static inline int msg_calc_data_size(struct iovec const *msg_sect, u32 num_sect)
+static inline int tipc_msg_calc_data_size(struct iovec const *msg_sect, u32 num_sect)
 {
 	int dsz = 0;
 	int i;
@@ -753,20 +753,20 @@ static inline int msg_calc_data_size(struct iovec const *msg_sect, u32 num_sect)
 }
 
 /**
- * msg_build - create message using specified header and data
+ * tipc_msg_build - create message using specified header and data
  *
  * Note: Caller must not hold any locks in case copy_from_user() is interrupted!
  *
  * Returns message data size or errno
  */
 
-static inline int msg_build(struct tipc_msg *hdr,
+static inline int tipc_msg_build(struct tipc_msg *hdr,
 			    struct iovec const *msg_sect, u32 num_sect,
 			    int max_size, int usrmem, struct sk_buff** buf)
 {
 	int dsz, sz, hsz, pos, res, cnt;
 
-	dsz = msg_calc_data_size(msg_sect, num_sect);
+	dsz = tipc_msg_calc_data_size(msg_sect, num_sect);
 	if (unlikely(dsz > TIPC_MAX_USER_MSG_SIZE)) {
 		*buf = NULL;
 		return -EINVAL;
diff --git a/net/tipc/name_distr.c b/net/tipc/name_distr.c
index 10a69894e2fd..6ac3c543250b 100644
--- a/net/tipc/name_distr.c
+++ b/net/tipc/name_distr.c
@@ -103,7 +103,7 @@ static struct sk_buff *named_prepare_buf(u32 type, u32 size, u32 dest)
 
 	if (buf != NULL) {
 		msg = buf_msg(buf);
-		msg_init(msg, NAME_DISTRIBUTOR, type, LONG_H_SIZE, dest);
+		tipc_msg_init(msg, NAME_DISTRIBUTOR, type, LONG_H_SIZE, dest);
 		msg_set_size(msg, LONG_H_SIZE + size);
 	}
 	return buf;
diff --git a/net/tipc/name_table.c b/net/tipc/name_table.c
index acab41a48d67..8ba79620db3f 100644
--- a/net/tipc/name_table.c
+++ b/net/tipc/name_table.c
@@ -627,7 +627,7 @@ u32 tipc_nametbl_translate(u32 type, u32 instance, u32 *destnode)
 	struct name_seq *seq;
 	u32 ref;
 
-	if (!in_scope(*destnode, tipc_own_addr))
+	if (!tipc_in_scope(*destnode, tipc_own_addr))
 		return 0;
 
 	read_lock_bh(&tipc_nametbl_lock);
diff --git a/net/tipc/net.c b/net/tipc/net.c
index d7cd1e064a80..f61b7694138b 100644
--- a/net/tipc/net.c
+++ b/net/tipc/net.c
@@ -219,7 +219,7 @@ void tipc_net_route_msg(struct sk_buff *buf)
 
 	/* Handle message for this node */
 	dnode = msg_short(msg) ? tipc_own_addr : msg_destnode(msg);
-	if (in_scope(dnode, tipc_own_addr)) {
+	if (tipc_in_scope(dnode, tipc_own_addr)) {
 		if (msg_isdata(msg)) {
 			if (msg_mcast(msg))
 				tipc_port_recv_mcast(buf, NULL);
@@ -277,7 +277,7 @@ int tipc_net_start(u32 addr)
 
 	info("Started in network mode\n");
 	info("Own node address %s, network identity %u\n",
-	     addr_string_fill(addr_string, tipc_own_addr), tipc_net_id);
+	     tipc_addr_string_fill(addr_string, tipc_own_addr), tipc_net_id);
 	return 0;
 }
 
diff --git a/net/tipc/node.c b/net/tipc/node.c
index 17cc394f424f..b634942caba5 100644
--- a/net/tipc/node.c
+++ b/net/tipc/node.c
@@ -268,7 +268,7 @@ struct tipc_node *tipc_node_attach_link(struct link *l_ptr)
 
 		if (n_ptr->link_cnt >= 2) {
 			err("Attempt to create third link to %s\n",
-			    addr_string_fill(addr_string, n_ptr->addr));
+			    tipc_addr_string_fill(addr_string, n_ptr->addr));
 			return NULL;
 		}
 
@@ -280,7 +280,7 @@ struct tipc_node *tipc_node_attach_link(struct link *l_ptr)
 		}
 		err("Attempt to establish second link on <%s> to %s\n",
 		    l_ptr->b_ptr->publ.name,
-		    addr_string_fill(addr_string, l_ptr->addr));
+		    tipc_addr_string_fill(addr_string, l_ptr->addr));
 	}
 	return NULL;
 }
@@ -439,7 +439,7 @@ static void node_lost_contact(struct tipc_node *n_ptr)
 		return;
 
 	info("Lost contact with %s\n",
-	     addr_string_fill(addr_string, n_ptr->addr));
+	     tipc_addr_string_fill(addr_string, n_ptr->addr));
 
 	/* Abort link changeover */
 	for (i = 0; i < MAX_BEARERS; i++) {
@@ -602,7 +602,7 @@ u32 tipc_available_nodes(const u32 domain)
 
 	read_lock_bh(&tipc_net_lock);
 	for (n_ptr = tipc_nodes; n_ptr; n_ptr = n_ptr->next) {
-		if (!in_scope(domain, n_ptr->addr))
+		if (!tipc_in_scope(domain, n_ptr->addr))
 			continue;
 		if (tipc_node_is_up(n_ptr))
 			cnt++;
@@ -651,7 +651,7 @@ struct sk_buff *tipc_node_get_nodes(const void *req_tlv_area, int req_tlv_space)
 	/* Add TLVs for all nodes in scope */
 
 	for (n_ptr = tipc_nodes; n_ptr; n_ptr = n_ptr->next) {
-		if (!in_scope(domain, n_ptr->addr))
+		if (!tipc_in_scope(domain, n_ptr->addr))
 			continue;
 		node_info.addr = htonl(n_ptr->addr);
 		node_info.up = htonl(tipc_node_is_up(n_ptr));
@@ -711,7 +711,7 @@ struct sk_buff *tipc_node_get_links(const void *req_tlv_area, int req_tlv_space)
 	for (n_ptr = tipc_nodes; n_ptr; n_ptr = n_ptr->next) {
 		u32 i;
 
-		if (!in_scope(domain, n_ptr->addr))
+		if (!tipc_in_scope(domain, n_ptr->addr))
 			continue;
 		tipc_node_lock(n_ptr);
 		for (i = 0; i < MAX_BEARERS; i++) {
diff --git a/net/tipc/port.c b/net/tipc/port.c
index 7641db667e0b..0737680e9266 100644
--- a/net/tipc/port.c
+++ b/net/tipc/port.c
@@ -116,7 +116,7 @@ int tipc_multicast(u32 ref, struct tipc_name_seq const *seq, u32 domain,
 	msg_set_namelower(hdr, seq->lower);
 	msg_set_nameupper(hdr, seq->upper);
 	msg_set_hdr_sz(hdr, MCAST_H_SIZE);
-	res = msg_build(hdr, msg_sect, num_sect, MAX_MSG_SIZE,
+	res = tipc_msg_build(hdr, msg_sect, num_sect, MAX_MSG_SIZE,
 			!oport->user_port, &buf);
 	if (unlikely(!buf))
 		return res;
@@ -241,7 +241,7 @@ struct tipc_port *tipc_createport_raw(void *usr_handle,
 	p_ptr->publ.max_pkt = MAX_PKT_DEFAULT;
 	p_ptr->publ.ref = ref;
 	msg = &p_ptr->publ.phdr;
-	msg_init(msg, importance, TIPC_NAMED_MSG, LONG_H_SIZE, 0);
+	tipc_msg_init(msg, importance, TIPC_NAMED_MSG, LONG_H_SIZE, 0);
 	msg_set_origport(msg, ref);
 	p_ptr->last_in_seqno = 41;
 	p_ptr->sent = 1;
@@ -395,7 +395,7 @@ static struct sk_buff *port_build_proto_msg(u32 destport, u32 destnode,
 	buf = buf_acquire(LONG_H_SIZE);
 	if (buf) {
 		msg = buf_msg(buf);
-		msg_init(msg, usr, type, LONG_H_SIZE, destnode);
+		tipc_msg_init(msg, usr, type, LONG_H_SIZE, destnode);
 		msg_set_errcode(msg, err);
 		msg_set_destport(msg, destport);
 		msg_set_origport(msg, origport);
@@ -439,7 +439,7 @@ int tipc_reject_msg(struct sk_buff *buf, u32 err)
 		return data_sz;
 	}
 	rmsg = buf_msg(rbuf);
-	msg_init(rmsg, imp, msg_type(msg), hdr_sz, msg_orignode(msg));
+	tipc_msg_init(rmsg, imp, msg_type(msg), hdr_sz, msg_orignode(msg));
 	msg_set_errcode(rmsg, err);
 	msg_set_destport(rmsg, msg_origport(msg));
 	msg_set_origport(rmsg, msg_destport(msg));
@@ -480,7 +480,7 @@ int tipc_port_reject_sections(struct port *p_ptr, struct tipc_msg *hdr,
 	struct sk_buff *buf;
 	int res;
 
-	res = msg_build(hdr, msg_sect, num_sect, MAX_MSG_SIZE,
+	res = tipc_msg_build(hdr, msg_sect, num_sect, MAX_MSG_SIZE,
 			!p_ptr->user_port, &buf);
 	if (!buf)
 		return res;
@@ -1343,7 +1343,7 @@ int tipc_port_recv_sections(struct port *sender, unsigned int num_sect,
 	struct sk_buff *buf;
 	int res;
 
-	res = msg_build(&sender->publ.phdr, msg_sect, num_sect,
+	res = tipc_msg_build(&sender->publ.phdr, msg_sect, num_sect,
 			MAX_MSG_SIZE, !sender->user_port, &buf);
 	if (likely(buf))
 		tipc_port_recv_msg(buf);
@@ -1383,7 +1383,7 @@ int tipc_send(u32 ref, unsigned int num_sect, struct iovec const *msg_sect)
 	if (port_unreliable(p_ptr)) {
 		p_ptr->publ.congested = 0;
 		/* Just calculate msg length and return */
-		return msg_calc_data_size(msg_sect, num_sect);
+		return tipc_msg_calc_data_size(msg_sect, num_sect);
 	}
 	return -ELINKCONG;
 }
@@ -1466,7 +1466,7 @@ int tipc_forward2name(u32 ref,
 	msg_set_hdr_sz(msg, LONG_H_SIZE);
 	msg_set_nametype(msg, name->type);
 	msg_set_nameinst(msg, name->instance);
-	msg_set_lookup_scope(msg, addr_scope(domain));
+	msg_set_lookup_scope(msg, tipc_addr_scope(domain));
 	if (importance <= TIPC_CRITICAL_IMPORTANCE)
 		msg_set_importance(msg,importance);
 	destport = tipc_nametbl_translate(name->type, name->instance, &destnode);
@@ -1483,7 +1483,7 @@ int tipc_forward2name(u32 ref,
 			return res;
 		if (port_unreliable(p_ptr)) {
 			/* Just calculate msg length and return */
-			return msg_calc_data_size(msg_sect, num_sect);
+			return tipc_msg_calc_data_size(msg_sect, num_sect);
 		}
 		return -ELINKCONG;
 	}
@@ -1539,7 +1539,7 @@ int tipc_forward_buf2name(u32 ref,
 	msg_set_origport(msg, orig->ref);
 	msg_set_nametype(msg, name->type);
 	msg_set_nameinst(msg, name->instance);
-	msg_set_lookup_scope(msg, addr_scope(domain));
+	msg_set_lookup_scope(msg, tipc_addr_scope(domain));
 	msg_set_hdr_sz(msg, LONG_H_SIZE);
 	msg_set_size(msg, LONG_H_SIZE + dsz);
 	destport = tipc_nametbl_translate(name->type, name->instance, &destnode);
@@ -1619,7 +1619,7 @@ int tipc_forward2port(u32 ref,
 		return res;
 	if (port_unreliable(p_ptr)) {
 		/* Just calculate msg length and return */
-		return msg_calc_data_size(msg_sect, num_sect);
+		return tipc_msg_calc_data_size(msg_sect, num_sect);
 	}
 	return -ELINKCONG;
 }
-- 
cgit v1.2.2


From 80e0c33064bd71bd5791c79f28c59a1aee898993 Mon Sep 17 00:00:00 2001
From: Allan Stephens <allan.stephens@windriver.com>
Date: Tue, 11 May 2010 14:30:13 +0000
Subject: tipc: Reduce footprint by un-inlining address routines

Convert address-related inline routines that are more than one
line into standard functions, thereby eliminating a significant
amount of repeated code.

Signed-off-by: Allan Stephens <allan.stephens@windriver.com>
Signed-off-by: Paul Gortmaker <paul.gortmaker@windriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/tipc/addr.c | 32 ++++++++++++++++++++++++++++++++
 net/tipc/addr.h | 37 +++----------------------------------
 2 files changed, 35 insertions(+), 34 deletions(-)

(limited to 'net')

diff --git a/net/tipc/addr.c b/net/tipc/addr.c
index e5207a11edf6..c048543ffbeb 100644
--- a/net/tipc/addr.c
+++ b/net/tipc/addr.c
@@ -92,3 +92,35 @@ int tipc_addr_node_valid(u32 addr)
 	return (tipc_addr_domain_valid(addr) && tipc_node(addr));
 }
 
+int tipc_in_scope(u32 domain, u32 addr)
+{
+	if (!domain || (domain == addr))
+		return 1;
+	if (domain == (addr & 0xfffff000u)) /* domain <Z.C.0> */
+		return 1;
+	if (domain == (addr & 0xff000000u)) /* domain <Z.0.0> */
+		return 1;
+	return 0;
+}
+
+/**
+ * tipc_addr_scope - convert message lookup domain to a 2-bit scope value
+ */
+
+int tipc_addr_scope(u32 domain)
+{
+	if (likely(!domain))
+		return TIPC_ZONE_SCOPE;
+	if (tipc_node(domain))
+		return TIPC_NODE_SCOPE;
+	if (tipc_cluster(domain))
+		return TIPC_CLUSTER_SCOPE;
+	return TIPC_ZONE_SCOPE;
+}
+
+char *tipc_addr_string_fill(char *string, u32 addr)
+{
+	snprintf(string, 16, "<%u.%u.%u>",
+		 tipc_zone(addr), tipc_cluster(addr), tipc_node(addr));
+	return string;
+}
diff --git a/net/tipc/addr.h b/net/tipc/addr.h
index 4d4aee0e4232..c1cc5724d8cc 100644
--- a/net/tipc/addr.h
+++ b/net/tipc/addr.h
@@ -67,32 +67,6 @@ static inline int may_route(u32 addr)
 	return(addr ^ tipc_own_addr) >> 11;
 }
 
-static inline int tipc_in_scope(u32 domain, u32 addr)
-{
-	if (!domain || (domain == addr))
-		return 1;
-	if (domain == (addr & 0xfffff000u)) /* domain <Z.C.0> */
-		return 1;
-	if (domain == (addr & 0xff000000u)) /* domain <Z.0.0> */
-		return 1;
-	return 0;
-}
-
-/**
- * tipc_addr_scope - convert message lookup domain to a 2-bit scope value
- */
-
-static inline int tipc_addr_scope(u32 domain)
-{
-	if (likely(!domain))
-		return TIPC_ZONE_SCOPE;
-	if (tipc_node(domain))
-		return TIPC_NODE_SCOPE;
-	if (tipc_cluster(domain))
-		return TIPC_CLUSTER_SCOPE;
-	return TIPC_ZONE_SCOPE;
-}
-
 /**
  * addr_domain - convert 2-bit scope value to equivalent message lookup domain
  *
@@ -110,14 +84,9 @@ static inline int addr_domain(int sc)
 	return tipc_addr(tipc_zone(tipc_own_addr), 0, 0);
 }
 
-static inline char *tipc_addr_string_fill(char *string, u32 addr)
-{
-	snprintf(string, 16, "<%u.%u.%u>",
-		 tipc_zone(addr), tipc_cluster(addr), tipc_node(addr));
-	return string;
-}
-
 int tipc_addr_domain_valid(u32);
 int tipc_addr_node_valid(u32 addr);
-
+int tipc_in_scope(u32 domain, u32 addr);
+int tipc_addr_scope(u32 domain);
+char *tipc_addr_string_fill(char *string, u32 addr);
 #endif
-- 
cgit v1.2.2


From 3e22e62b6204414cf31c414d5a91897e2b718135 Mon Sep 17 00:00:00 2001
From: Allan Stephens <allan.stephens@windriver.com>
Date: Tue, 11 May 2010 14:30:14 +0000
Subject: tipc: Reduce footprint by un-inlining nmap routines

Converts nmap inline routines that are more than one line into standard
functions, thereby eliminating a significant amount of repeated code.

Signed-off-by: Allan Stephens <allan.stephens@windriver.com>
Signed-off-by: Paul Gortmaker <paul.gortmaker@windriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/tipc/bcast.c | 60 +++++++++++++++++++++++++++++++++++++++++++++++++++++
 net/tipc/bcast.h | 63 ++++----------------------------------------------------
 2 files changed, 64 insertions(+), 59 deletions(-)

(limited to 'net')

diff --git a/net/tipc/bcast.c b/net/tipc/bcast.c
index a8f22e78c3f3..1ee6424ef3e0 100644
--- a/net/tipc/bcast.c
+++ b/net/tipc/bcast.c
@@ -822,3 +822,63 @@ void tipc_bclink_stop(void)
 	spin_unlock_bh(&bc_lock);
 }
 
+
+/**
+ * tipc_nmap_add - add a node to a node map
+ */
+
+void tipc_nmap_add(struct tipc_node_map *nm_ptr, u32 node)
+{
+	int n = tipc_node(node);
+	int w = n / WSIZE;
+	u32 mask = (1 << (n % WSIZE));
+
+	if ((nm_ptr->map[w] & mask) == 0) {
+		nm_ptr->count++;
+		nm_ptr->map[w] |= mask;
+	}
+}
+
+/**
+ * tipc_nmap_remove - remove a node from a node map
+ */
+
+void tipc_nmap_remove(struct tipc_node_map *nm_ptr, u32 node)
+{
+	int n = tipc_node(node);
+	int w = n / WSIZE;
+	u32 mask = (1 << (n % WSIZE));
+
+	if ((nm_ptr->map[w] & mask) != 0) {
+		nm_ptr->map[w] &= ~mask;
+		nm_ptr->count--;
+	}
+}
+
+/**
+ * tipc_nmap_diff - find differences between node maps
+ * @nm_a: input node map A
+ * @nm_b: input node map B
+ * @nm_diff: output node map A-B (i.e. nodes of A that are not in B)
+ */
+
+void tipc_nmap_diff(struct tipc_node_map *nm_a, struct tipc_node_map *nm_b,
+				  struct tipc_node_map *nm_diff)
+{
+	int stop = ARRAY_SIZE(nm_a->map);
+	int w;
+	int b;
+	u32 map;
+
+	memset(nm_diff, 0, sizeof(*nm_diff));
+	for (w = 0; w < stop; w++) {
+		map = nm_a->map[w] ^ (nm_a->map[w] & nm_b->map[w]);
+		nm_diff->map[w] = map;
+		if (map != 0) {
+			for (b = 0 ; b < WSIZE; b++) {
+				if (map & (1 << b))
+					nm_diff->count++;
+			}
+		}
+	}
+}
diff --git a/net/tipc/bcast.h b/net/tipc/bcast.h
index 2b1c4a755dfa..cd779816383f 100644
--- a/net/tipc/bcast.h
+++ b/net/tipc/bcast.h
@@ -72,38 +72,8 @@ struct tipc_node;
 
 extern const char tipc_bclink_name[];
 
-
-/**
- * tipc_nmap_add - add a node to a node map
- */
-
-static inline void tipc_nmap_add(struct tipc_node_map *nm_ptr, u32 node)
-{
-	int n = tipc_node(node);
-	int w = n / WSIZE;
-	u32 mask = (1 << (n % WSIZE));
-
-	if ((nm_ptr->map[w] & mask) == 0) {
-		nm_ptr->count++;
-		nm_ptr->map[w] |= mask;
-	}
-}
-
-/**
- * tipc_nmap_remove - remove a node from a node map
- */
-
-static inline void tipc_nmap_remove(struct tipc_node_map *nm_ptr, u32 node)
-{
-	int n = tipc_node(node);
-	int w = n / WSIZE;
-	u32 mask = (1 << (n % WSIZE));
-
-	if ((nm_ptr->map[w] & mask) != 0) {
-		nm_ptr->map[w] &= ~mask;
-		nm_ptr->count--;
-	}
-}
+void tipc_nmap_add(struct tipc_node_map *nm_ptr, u32 node);
+void tipc_nmap_remove(struct tipc_node_map *nm_ptr, u32 node);
 
 /**
  * tipc_nmap_equal - test for equality of node maps
@@ -114,33 +84,8 @@ static inline int tipc_nmap_equal(struct tipc_node_map *nm_a, struct tipc_node_m
 	return !memcmp(nm_a, nm_b, sizeof(*nm_a));
 }
 
-/**
- * tipc_nmap_diff - find differences between node maps
- * @nm_a: input node map A
- * @nm_b: input node map B
- * @nm_diff: output node map A-B (i.e. nodes of A that are not in B)
- */
-
-static inline void tipc_nmap_diff(struct tipc_node_map *nm_a, struct tipc_node_map *nm_b,
-				  struct tipc_node_map *nm_diff)
-{
-	int stop = ARRAY_SIZE(nm_a->map);
-	int w;
-	int b;
-	u32 map;
-
-	memset(nm_diff, 0, sizeof(*nm_diff));
-	for (w = 0; w < stop; w++) {
-		map = nm_a->map[w] ^ (nm_a->map[w] & nm_b->map[w]);
-		nm_diff->map[w] = map;
-		if (map != 0) {
-			for (b = 0 ; b < WSIZE; b++) {
-				if (map & (1 << b))
-					nm_diff->count++;
-			}
-		}
-	}
-}
+void tipc_nmap_diff(struct tipc_node_map *nm_a, struct tipc_node_map *nm_b,
+				  struct tipc_node_map *nm_diff);
 
 /**
  * tipc_port_list_add - add a port to a port list, ensuring no duplicates
-- 
cgit v1.2.2


From 43608edc2dbe83057544cf76b765ecdf63d59e8c Mon Sep 17 00:00:00 2001
From: Allan Stephens <allan.stephens@windriver.com>
Date: Tue, 11 May 2010 14:30:15 +0000
Subject: tipc: Reduce footprint by un-inlining port list routines

Converts port list inline routines that are more than one line into
standard functions, thereby eliminating a significant amount of
repeated code.

Signed-off-by: Allan Stephens <allan.stephens@windriver.com>
Signed-off-by: Paul Gortmaker <paul.gortmaker@windriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/tipc/bcast.c | 50 ++++++++++++++++++++++++++++++++++++++++++++++++++
 net/tipc/bcast.h | 52 ++--------------------------------------------------
 2 files changed, 52 insertions(+), 50 deletions(-)

(limited to 'net')

diff --git a/net/tipc/bcast.c b/net/tipc/bcast.c
index 1ee6424ef3e0..a008c6689305 100644
--- a/net/tipc/bcast.c
+++ b/net/tipc/bcast.c
@@ -882,3 +882,53 @@ void tipc_nmap_diff(struct tipc_node_map *nm_a, struct tipc_node_map *nm_b,
 		}
 	}
 }
+
+/**
+ * tipc_port_list_add - add a port to a port list, ensuring no duplicates
+ */
+
+void tipc_port_list_add(struct port_list *pl_ptr, u32 port)
+{
+	struct port_list *item = pl_ptr;
+	int i;
+	int item_sz = PLSIZE;
+	int cnt = pl_ptr->count;
+
+	for (; ; cnt -= item_sz, item = item->next) {
+		if (cnt < PLSIZE)
+			item_sz = cnt;
+		for (i = 0; i < item_sz; i++)
+			if (item->ports[i] == port)
+				return;
+		if (i < PLSIZE) {
+			item->ports[i] = port;
+			pl_ptr->count++;
+			return;
+		}
+		if (!item->next) {
+			item->next = kmalloc(sizeof(*item), GFP_ATOMIC);
+			if (!item->next) {
+				warn("Incomplete multicast delivery, no memory\n");
+				return;
+			}
+			item->next->next = NULL;
+		}
+	}
+}
+
+/**
+ * tipc_port_list_free - free dynamically created entries in port_list chain
+ *
+ */
+
+void tipc_port_list_free(struct port_list *pl_ptr)
+{
+	struct port_list *item;
+	struct port_list *next;
+
+	for (item = pl_ptr->next; item; item = next) {
+		next = item->next;
+		kfree(item);
+	}
+}
+
diff --git a/net/tipc/bcast.h b/net/tipc/bcast.h
index cd779816383f..e8c2b81658c7 100644
--- a/net/tipc/bcast.h
+++ b/net/tipc/bcast.h
@@ -87,56 +87,8 @@ static inline int tipc_nmap_equal(struct tipc_node_map *nm_a, struct tipc_node_m
 void tipc_nmap_diff(struct tipc_node_map *nm_a, struct tipc_node_map *nm_b,
 				  struct tipc_node_map *nm_diff);
 
-/**
- * tipc_port_list_add - add a port to a port list, ensuring no duplicates
- */
-
-static inline void tipc_port_list_add(struct port_list *pl_ptr, u32 port)
-{
-	struct port_list *item = pl_ptr;
-	int i;
-	int item_sz = PLSIZE;
-	int cnt = pl_ptr->count;
-
-	for (; ; cnt -= item_sz, item = item->next) {
-		if (cnt < PLSIZE)
-			item_sz = cnt;
-		for (i = 0; i < item_sz; i++)
-			if (item->ports[i] == port)
-				return;
-		if (i < PLSIZE) {
-			item->ports[i] = port;
-			pl_ptr->count++;
-			return;
-		}
-		if (!item->next) {
-			item->next = kmalloc(sizeof(*item), GFP_ATOMIC);
-			if (!item->next) {
-				warn("Incomplete multicast delivery, no memory\n");
-				return;
-			}
-			item->next->next = NULL;
-		}
-	}
-}
-
-/**
- * tipc_port_list_free - free dynamically created entries in port_list chain
- *
- * Note: First item is on stack, so it doesn't need to be released
- */
-
-static inline void tipc_port_list_free(struct port_list *pl_ptr)
-{
-	struct port_list *item;
-	struct port_list *next;
-
-	for (item = pl_ptr->next; item; item = next) {
-		next = item->next;
-		kfree(item);
-	}
-}
-
+void tipc_port_list_add(struct port_list *pl_ptr, u32 port);
+void tipc_port_list_free(struct port_list *pl_ptr);
 
 int  tipc_bclink_init(void);
 void tipc_bclink_stop(void);
-- 
cgit v1.2.2


From b274f4ab8e674db1757371a21e7217e0766cb574 Mon Sep 17 00:00:00 2001
From: Allan Stephens <allan.stephens@windriver.com>
Date: Tue, 11 May 2010 14:30:16 +0000
Subject: tipc: Reduce footprint by un-inlining bearer congestion routine

Convert bearer congestion inline routine that is more than one line into
a standard function, thereby eliminating some repeated code.

Signed-off-by: Allan Stephens <allan.stephens@windriver.com>
Signed-off-by: Paul Gortmaker <paul.gortmaker@windriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/tipc/bearer.c | 12 ++++++++++++
 net/tipc/bearer.h | 16 ++--------------
 2 files changed, 14 insertions(+), 14 deletions(-)

(limited to 'net')

diff --git a/net/tipc/bearer.c b/net/tipc/bearer.c
index ccec12f0ccc9..52ae17b2583e 100644
--- a/net/tipc/bearer.c
+++ b/net/tipc/bearer.c
@@ -467,6 +467,18 @@ int tipc_bearer_resolve_congestion(struct bearer *b_ptr, struct link *l_ptr)
 	return res;
 }
 
+/**
+ * tipc_bearer_congested - determines if bearer is currently congested
+ */
+
+int tipc_bearer_congested(struct bearer *b_ptr, struct link *l_ptr)
+{
+	if (unlikely(b_ptr->publ.blocked))
+		return 1;
+	if (likely(list_empty(&b_ptr->cong_links)))
+		return 0;
+	return !tipc_bearer_resolve_congestion(b_ptr, l_ptr);
+}
 
 /**
  * tipc_enable_bearer - enable bearer with the given name
diff --git a/net/tipc/bearer.h b/net/tipc/bearer.h
index 000228e93f9e..a850b389663e 100644
--- a/net/tipc/bearer.h
+++ b/net/tipc/bearer.h
@@ -125,6 +125,7 @@ void tipc_bearer_remove_dest(struct bearer *b_ptr, u32 dest);
 void tipc_bearer_schedule(struct bearer *b_ptr, struct link *l_ptr);
 struct bearer *tipc_bearer_find_interface(const char *if_name);
 int tipc_bearer_resolve_congestion(struct bearer *b_ptr, struct link *l_ptr);
+int tipc_bearer_congested(struct bearer *b_ptr, struct link *l_ptr);
 int tipc_bearer_init(void);
 void tipc_bearer_stop(void);
 void tipc_bearer_lock_push(struct bearer *b_ptr);
@@ -154,17 +155,4 @@ static inline int tipc_bearer_send(struct bearer *b_ptr, struct sk_buff *buf,
 	return !b_ptr->media->send_msg(buf, &b_ptr->publ, dest);
 }
 
-/**
- * tipc_bearer_congested - determines if bearer is currently congested
- */
-
-static inline int tipc_bearer_congested(struct bearer *b_ptr, struct link *l_ptr)
-{
-	if (unlikely(b_ptr->publ.blocked))
-		return 1;
-	if (likely(list_empty(&b_ptr->cong_links)))
-		return 0;
-	return !tipc_bearer_resolve_congestion(b_ptr, l_ptr);
-}
-
-#endif
+#endif	/* _TIPC_BEARER_H */
-- 
cgit v1.2.2


From 3032cca4d5cf885cacc78fae27ddf0c56dbf9963 Mon Sep 17 00:00:00 2001
From: Allan Stephens <allan.stephens@windriver.com>
Date: Tue, 11 May 2010 14:30:17 +0000
Subject: tipc: Reduce footprint by un-inlining buf_acquire routine

Convert buf_acquire inline routine that is more than one line into
a standard function, thereby eliminating some repeated code.

Signed-off-by: Allan Stephens <allan.stephens@windriver.com>
Signed-off-by: Paul Gortmaker <paul.gortmaker@windriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/tipc/core.c | 24 ++++++++++++++++++++++++
 net/tipc/core.h | 24 +-----------------------
 2 files changed, 25 insertions(+), 23 deletions(-)

(limited to 'net')

diff --git a/net/tipc/core.c b/net/tipc/core.c
index b47d1842a970..696468117985 100644
--- a/net/tipc/core.c
+++ b/net/tipc/core.c
@@ -101,6 +101,30 @@ int tipc_get_mode(void)
 	return tipc_mode;
 }
 
+/**
+ * buf_acquire - creates a TIPC message buffer
+ * @size: message size (including TIPC header)
+ *
+ * Returns a new buffer with data pointers set to the specified size.
+ *
+ * NOTE: Headroom is reserved to allow prepending of a data link header.
+ *       There may also be unrequested tailroom present at the buffer's end.
+ */
+
+struct sk_buff *buf_acquire(u32 size)
+{
+	struct sk_buff *skb;
+	unsigned int buf_size = (BUF_HEADROOM + size + 3) & ~3u;
+
+	skb = alloc_skb_fclone(buf_size, GFP_ATOMIC);
+	if (skb) {
+		skb_reserve(skb, BUF_HEADROOM);
+		skb_put(skb, size);
+		skb->next = NULL;
+	}
+	return skb;
+}
+
 /**
  * tipc_core_stop_net - shut down TIPC networking sub-systems
  */
diff --git a/net/tipc/core.h b/net/tipc/core.h
index 1e149f55f3e2..188799017abd 100644
--- a/net/tipc/core.h
+++ b/net/tipc/core.h
@@ -328,29 +328,7 @@ static inline struct tipc_msg *buf_msg(struct sk_buff *skb)
 	return (struct tipc_msg *)skb->data;
 }
 
-/**
- * buf_acquire - creates a TIPC message buffer
- * @size: message size (including TIPC header)
- *
- * Returns a new buffer with data pointers set to the specified size.
- *
- * NOTE: Headroom is reserved to allow prepending of a data link header.
- *       There may also be unrequested tailroom present at the buffer's end.
- */
-
-static inline struct sk_buff *buf_acquire(u32 size)
-{
-	struct sk_buff *skb;
-	unsigned int buf_size = (BUF_HEADROOM + size + 3) & ~3u;
-
-	skb = alloc_skb_fclone(buf_size, GFP_ATOMIC);
-	if (skb) {
-		skb_reserve(skb, BUF_HEADROOM);
-		skb_put(skb, size);
-		skb->next = NULL;
-	}
-	return skb;
-}
+extern struct sk_buff *buf_acquire(u32 size);
 
 /**
  * buf_discard - frees a TIPC message buffer
-- 
cgit v1.2.2


From 23461e835b3537dd395828b090fb1cb64a198f85 Mon Sep 17 00:00:00 2001
From: Allan Stephens <allan.stephens@windriver.com>
Date: Tue, 11 May 2010 14:30:18 +0000
Subject: tipc: Reduce footprint by un-inlining tipc_msg_* routines

Convert tipc_msg_* inline routines that are more than one line into
standard functions, thereby eliminating some repeated code.

Signed-off-by: Allan Stephens <allan.stephens@windriver.com>
Signed-off-by: Paul Gortmaker <paul.gortmaker@windriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/tipc/msg.c | 94 +++++++++++++++++++++++++++++++++++++++++++++++++++++++
 net/tipc/msg.h | 99 ++++------------------------------------------------------
 2 files changed, 100 insertions(+), 93 deletions(-)

(limited to 'net')

diff --git a/net/tipc/msg.c b/net/tipc/msg.c
index 73dcd00d674e..381063817b41 100644
--- a/net/tipc/msg.c
+++ b/net/tipc/msg.c
@@ -40,6 +40,100 @@
 #include "msg.h"
 #include "bearer.h"
 
+u32 tipc_msg_tot_importance(struct tipc_msg *m)
+{
+	if (likely(msg_isdata(m))) {
+		if (likely(msg_orignode(m) == tipc_own_addr))
+			return msg_importance(m);
+		return msg_importance(m) + 4;
+	}
+	if ((msg_user(m) == MSG_FRAGMENTER)  &&
+	    (msg_type(m) == FIRST_FRAGMENT))
+		return msg_importance(msg_get_wrapped(m));
+	return msg_importance(m);
+}
+
+
+void tipc_msg_init(struct tipc_msg *m, u32 user, u32 type,
+			    u32 hsize, u32 destnode)
+{
+	memset(m, 0, hsize);
+	msg_set_version(m);
+	msg_set_user(m, user);
+	msg_set_hdr_sz(m, hsize);
+	msg_set_size(m, hsize);
+	msg_set_prevnode(m, tipc_own_addr);
+	msg_set_type(m, type);
+	if (!msg_short(m)) {
+		msg_set_orignode(m, tipc_own_addr);
+		msg_set_destnode(m, destnode);
+	}
+}
+
+/**
+ * tipc_msg_calc_data_size - determine total data size for message
+ */
+
+int tipc_msg_calc_data_size(struct iovec const *msg_sect, u32 num_sect)
+{
+	int dsz = 0;
+	int i;
+
+	for (i = 0; i < num_sect; i++)
+		dsz += msg_sect[i].iov_len;
+	return dsz;
+}
+
+/**
+ * tipc_msg_build - create message using specified header and data
+ *
+ * Note: Caller must not hold any locks in case copy_from_user() is interrupted!
+ *
+ * Returns message data size or errno
+ */
+
+int tipc_msg_build(struct tipc_msg *hdr,
+			    struct iovec const *msg_sect, u32 num_sect,
+			    int max_size, int usrmem, struct sk_buff** buf)
+{
+	int dsz, sz, hsz, pos, res, cnt;
+
+	dsz = tipc_msg_calc_data_size(msg_sect, num_sect);
+	if (unlikely(dsz > TIPC_MAX_USER_MSG_SIZE)) {
+		*buf = NULL;
+		return -EINVAL;
+	}
+
+	pos = hsz = msg_hdr_sz(hdr);
+	sz = hsz + dsz;
+	msg_set_size(hdr, sz);
+	if (unlikely(sz > max_size)) {
+		*buf = NULL;
+		return dsz;
+	}
+
+	*buf = buf_acquire(sz);
+	if (!(*buf))
+		return -ENOMEM;
+	skb_copy_to_linear_data(*buf, hdr, hsz);
+	for (res = 1, cnt = 0; res && (cnt < num_sect); cnt++) {
+		if (likely(usrmem))
+			res = !copy_from_user((*buf)->data + pos,
+					      msg_sect[cnt].iov_base,
+					      msg_sect[cnt].iov_len);
+		else
+			skb_copy_to_linear_data_offset(*buf, pos,
+						       msg_sect[cnt].iov_base,
+						       msg_sect[cnt].iov_len);
+		pos += msg_sect[cnt].iov_len;
+	}
+	if (likely(res))
+		return dsz;
+
+	buf_discard(*buf);
+	*buf = NULL;
+	return -EFAULT;
+}
 
 #ifdef CONFIG_TIPC_DEBUG
 
diff --git a/net/tipc/msg.h b/net/tipc/msg.h
index fbcd46f24a9d..995d2da35b01 100644
--- a/net/tipc/msg.h
+++ b/net/tipc/msg.h
@@ -708,100 +708,13 @@ static inline void msg_set_dataoctet(struct tipc_msg *m, u32 pos)
 #define DSC_REQ_MSG          0
 #define DSC_RESP_MSG         1
 
-static inline u32 tipc_msg_tot_importance(struct tipc_msg *m)
-{
-	if (likely(msg_isdata(m))) {
-		if (likely(msg_orignode(m) == tipc_own_addr))
-			return msg_importance(m);
-		return msg_importance(m) + 4;
-	}
-	if ((msg_user(m) == MSG_FRAGMENTER)  &&
-	    (msg_type(m) == FIRST_FRAGMENT))
-		return msg_importance(msg_get_wrapped(m));
-	return msg_importance(m);
-}
-
-
-static inline void tipc_msg_init(struct tipc_msg *m, u32 user, u32 type,
-			    u32 hsize, u32 destnode)
-{
-	memset(m, 0, hsize);
-	msg_set_version(m);
-	msg_set_user(m, user);
-	msg_set_hdr_sz(m, hsize);
-	msg_set_size(m, hsize);
-	msg_set_prevnode(m, tipc_own_addr);
-	msg_set_type(m, type);
-	if (!msg_short(m)) {
-		msg_set_orignode(m, tipc_own_addr);
-		msg_set_destnode(m, destnode);
-	}
-}
-
-/**
- * tipc_msg_calc_data_size - determine total data size for message
- */
-
-static inline int tipc_msg_calc_data_size(struct iovec const *msg_sect, u32 num_sect)
-{
-	int dsz = 0;
-	int i;
-
-	for (i = 0; i < num_sect; i++)
-		dsz += msg_sect[i].iov_len;
-	return dsz;
-}
-
-/**
- * tipc_msg_build - create message using specified header and data
- *
- * Note: Caller must not hold any locks in case copy_from_user() is interrupted!
- *
- * Returns message data size or errno
- */
-
-static inline int tipc_msg_build(struct tipc_msg *hdr,
+u32 tipc_msg_tot_importance(struct tipc_msg *m);
+void tipc_msg_init(struct tipc_msg *m, u32 user, u32 type,
+			    u32 hsize, u32 destnode);
+int tipc_msg_calc_data_size(struct iovec const *msg_sect, u32 num_sect);
+int tipc_msg_build(struct tipc_msg *hdr,
 			    struct iovec const *msg_sect, u32 num_sect,
-			    int max_size, int usrmem, struct sk_buff** buf)
-{
-	int dsz, sz, hsz, pos, res, cnt;
-
-	dsz = tipc_msg_calc_data_size(msg_sect, num_sect);
-	if (unlikely(dsz > TIPC_MAX_USER_MSG_SIZE)) {
-		*buf = NULL;
-		return -EINVAL;
-	}
-
-	pos = hsz = msg_hdr_sz(hdr);
-	sz = hsz + dsz;
-	msg_set_size(hdr, sz);
-	if (unlikely(sz > max_size)) {
-		*buf = NULL;
-		return dsz;
-	}
-
-	*buf = buf_acquire(sz);
-	if (!(*buf))
-		return -ENOMEM;
-	skb_copy_to_linear_data(*buf, hdr, hsz);
-	for (res = 1, cnt = 0; res && (cnt < num_sect); cnt++) {
-		if (likely(usrmem))
-			res = !copy_from_user((*buf)->data + pos,
-					      msg_sect[cnt].iov_base,
-					      msg_sect[cnt].iov_len);
-		else
-			skb_copy_to_linear_data_offset(*buf, pos,
-						       msg_sect[cnt].iov_base,
-						       msg_sect[cnt].iov_len);
-		pos += msg_sect[cnt].iov_len;
-	}
-	if (likely(res))
-		return dsz;
-
-	buf_discard(*buf);
-	*buf = NULL;
-	return -EFAULT;
-}
+			    int max_size, int usrmem, struct sk_buff** buf);
 
 static inline void msg_set_media_addr(struct tipc_msg *m, struct tipc_media_addr *a)
 {
-- 
cgit v1.2.2


From e94c67436efa22af7d8b7d19c885863246042543 Mon Sep 17 00:00:00 2001
From: Bart De Schuymer <bdschuym@pandora.be>
Date: Thu, 13 May 2010 14:55:34 +0200
Subject: netfilter: bridge-netfilter: fix crash in br_nf_forward_finish()

[ 4593.956206] BUG: unable to handle kernel NULL pointer dereference at 0000000000000018
[ 4593.956219] IP: [<ffffffffa03357a4>] br_nf_forward_finish+0x154/0x170 [bridge]
[ 4593.956232] PGD 195ece067 PUD 1ba005067 PMD 0
[ 4593.956241] Oops: 0000 [#1] SMP
[ 4593.956248] last sysfs file:
/sys/devices/LNXSYSTM:00/LNXSYBUS:00/PNP0A08:00/device:08/ATK0110:00/hwmon/hwmon0/temp2_label
[ 4593.956253] CPU 3
...
[ 4593.956380] Pid: 29512, comm: kvm Not tainted 2.6.34-rc7-net #195 P6T DELUXE/System Product Name
[ 4593.956384] RIP: 0010:[<ffffffffa03357a4>]  [<ffffffffa03357a4>] br_nf_forward_finish+0x154/0x170 [bridge]
[ 4593.956395] RSP: 0018:ffff880001e63b78  EFLAGS: 00010246
[ 4593.956399] RAX: 0000000000000608 RBX: ffff880057181700 RCX: ffff8801b813d000
[ 4593.956402] RDX: 0000000000000000 RSI: 0000000000000002 RDI: ffff880057181700
[ 4593.956406] RBP: ffff880001e63ba8 R08: ffff8801b9d97000 R09: ffffffffa0335650
[ 4593.956410] R10: 0000000000000000 R11: 0000000000000000 R12: ffff8801b813d000
[ 4593.956413] R13: ffffffff81ab3940 R14: ffff880057181700 R15: 0000000000000002
[ 4593.956418] FS:  00007fc40d380710(0000) GS:ffff880001e60000(0000) knlGS:0000000000000000
[ 4593.956422] CS:  0010 DS: 002b ES: 002b CR0: 000000008005003b
[ 4593.956426] CR2: 0000000000000018 CR3: 00000001ba1d7000 CR4: 00000000000026e0
[ 4593.956429] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
[ 4593.956433] DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400
[ 4593.956437] Process kvm (pid: 29512, threadinfo ffff8801ba566000, task ffff8801b8003870)
[ 4593.956441] Stack:
[ 4593.956443]  0000000100000020 ffff880001e63ba0 ffff880001e63ba0 ffff880057181700
[ 4593.956451] <0> ffffffffa0335650 ffffffff81ab3940 ffff880001e63bd8 ffffffffa03350e6
[ 4593.956462] <0> ffff880001e63c40 000000000000024d ffff880057181700 0000000080000000
[ 4593.956474] Call Trace:
[ 4593.956478]  <IRQ>
[ 4593.956488]  [<ffffffffa0335650>] ? br_nf_forward_finish+0x0/0x170 [bridge]
[ 4593.956496]  [<ffffffffa03350e6>] NF_HOOK_THRESH+0x56/0x60 [bridge]
[ 4593.956504]  [<ffffffffa0335282>] br_nf_forward_arp+0x112/0x120 [bridge]
[ 4593.956511]  [<ffffffff813f7184>] nf_iterate+0x64/0xa0
[ 4593.956519]  [<ffffffffa032f920>] ? br_forward_finish+0x0/0x60 [bridge]
[ 4593.956524]  [<ffffffff813f722c>] nf_hook_slow+0x6c/0x100
[ 4593.956531]  [<ffffffffa032f920>] ? br_forward_finish+0x0/0x60 [bridge]
[ 4593.956538]  [<ffffffffa032f800>] ? __br_forward+0x0/0xc0 [bridge]
[ 4593.956545]  [<ffffffffa032f86d>] __br_forward+0x6d/0xc0 [bridge]
[ 4593.956550]  [<ffffffff813c5d8e>] ? skb_clone+0x3e/0x70
[ 4593.956557]  [<ffffffffa032f462>] deliver_clone+0x32/0x60 [bridge]
[ 4593.956564]  [<ffffffffa032f6b6>] br_flood+0xa6/0xe0 [bridge]
[ 4593.956571]  [<ffffffffa032f800>] ? __br_forward+0x0/0xc0 [bridge]

Don't call nf_bridge_update_protocol() for ARP traffic as skb->nf_bridge isn't
used in the ARP case.

Reported-by: Stephen Hemminger <shemminger@vyatta.com>
Signed-off-by: Bart De Schuymer <bdschuym@pandora.be>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 net/bridge/br_netfilter.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/bridge/br_netfilter.c b/net/bridge/br_netfilter.c
index 93f80fefa496..44420992f72f 100644
--- a/net/bridge/br_netfilter.c
+++ b/net/bridge/br_netfilter.c
@@ -643,10 +643,10 @@ static int br_nf_forward_finish(struct sk_buff *skb)
 			skb->pkt_type = PACKET_OTHERHOST;
 			nf_bridge->mask ^= BRNF_PKT_TYPE;
 		}
+		nf_bridge_update_protocol(skb);
 	} else {
 		in = *((struct net_device **)(skb->cb));
 	}
-	nf_bridge_update_protocol(skb);
 	nf_bridge_push_encap_header(skb);
 
 	NF_HOOK_THRESH(NFPROTO_BRIDGE, NF_BR_FORWARD, skb, in,
-- 
cgit v1.2.2


From af5676039a9479e6ff42c6aab9fac1149ac9597f Mon Sep 17 00:00:00 2001
From: Stephen Hemminger <shemminger@vyatta.com>
Date: Thu, 13 May 2010 15:00:20 +0200
Subject: netfilter: change NF_ASSERT to WARN_ON

Change netfilter asserts to standard WARN_ON. This has the
benefit of backtrace info and also causes netfilter errors
to show up on kerneloops.org.

Signed-off-by: Stephen Hemminger <shemminger@vyatta.com>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 net/ipv4/netfilter/arp_tables.c | 7 +------
 net/ipv4/netfilter/ip_tables.c  | 7 +------
 net/ipv6/netfilter/ip6_tables.c | 7 +------
 3 files changed, 3 insertions(+), 18 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c
index 03352fcba172..65f2944b5a64 100644
--- a/net/ipv4/netfilter/arp_tables.c
+++ b/net/ipv4/netfilter/arp_tables.c
@@ -49,12 +49,7 @@ MODULE_DESCRIPTION("arptables core");
 #endif
 
 #ifdef CONFIG_NETFILTER_DEBUG
-#define ARP_NF_ASSERT(x)					\
-do {								\
-	if (!(x))						\
-		printk("ARP_NF_ASSERT: %s:%s:%u\n",		\
-		       __func__, __FILE__, __LINE__);	\
-} while(0)
+#define ARP_NF_ASSERT(x)	WARN_ON(!(x))
 #else
 #define ARP_NF_ASSERT(x)
 #endif
diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c
index 49b9e4fb5460..ad702bc34e75 100644
--- a/net/ipv4/netfilter/ip_tables.c
+++ b/net/ipv4/netfilter/ip_tables.c
@@ -51,12 +51,7 @@ MODULE_DESCRIPTION("IPv4 packet filter");
 #endif
 
 #ifdef CONFIG_NETFILTER_DEBUG
-#define IP_NF_ASSERT(x)						\
-do {								\
-	if (!(x))						\
-		printk("IP_NF_ASSERT: %s:%s:%u\n",		\
-		       __func__, __FILE__, __LINE__);	\
-} while(0)
+#define IP_NF_ASSERT(x)		WARN_ON(!(x))
 #else
 #define IP_NF_ASSERT(x)
 #endif
diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c
index 56782336474f..02a7db025d0f 100644
--- a/net/ipv6/netfilter/ip6_tables.c
+++ b/net/ipv6/netfilter/ip6_tables.c
@@ -52,12 +52,7 @@ MODULE_DESCRIPTION("IPv6 packet filter");
 #endif
 
 #ifdef CONFIG_NETFILTER_DEBUG
-#define IP_NF_ASSERT(x)						\
-do {								\
-	if (!(x))						\
-		printk("IP_NF_ASSERT: %s:%s:%u\n",		\
-		       __func__, __FILE__, __LINE__);	\
-} while(0)
+#define IP_NF_ASSERT(x)	WARN_ON(!(x))
 #else
 #define IP_NF_ASSERT(x)
 #endif
-- 
cgit v1.2.2


From 654d0fbdc8fe1041918741ed5b6abc8ad6b4c1d8 Mon Sep 17 00:00:00 2001
From: Stephen Hemminger <shemminger@vyatta.com>
Date: Thu, 13 May 2010 15:02:08 +0200
Subject: netfilter: cleanup printk messages

Make sure all printk messages have a severity level.

Signed-off-by: Stephen Hemminger <shemminger@vyatta.com>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 net/ipv4/netfilter/arp_tables.c                |  4 ++--
 net/ipv4/netfilter/ip_tables.c                 |  2 +-
 net/ipv4/netfilter/iptable_filter.c            |  2 +-
 net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c | 10 +++++-----
 net/ipv4/netfilter/nf_nat_h323.c               | 16 ++++++++--------
 net/ipv4/netfilter/nf_nat_snmp_basic.c         | 16 ++++++++--------
 net/ipv4/netfilter/nf_nat_standalone.c         |  4 ++--
 net/ipv6/netfilter/ip6_tables.c                |  2 +-
 net/ipv6/netfilter/ip6table_filter.c           |  2 +-
 net/ipv6/netfilter/ip6table_mangle.c           |  2 +-
 net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c | 14 +++++++-------
 net/netfilter/nf_conntrack_amanda.c            |  2 +-
 net/netfilter/nf_conntrack_core.c              |  2 +-
 net/netfilter/nf_conntrack_ftp.c               |  4 ++--
 net/netfilter/nf_conntrack_h323_main.c         |  6 +++---
 net/netfilter/nf_conntrack_irc.c               |  4 ++--
 net/netfilter/nf_conntrack_netlink.c           | 12 ++++++------
 net/netfilter/nf_conntrack_proto_sctp.c        |  4 ++--
 net/netfilter/nf_conntrack_sip.c               |  4 ++--
 net/netfilter/nf_conntrack_standalone.c        |  2 +-
 net/netfilter/nf_conntrack_tftp.c              |  4 ++--
 net/netfilter/nf_internals.h                   |  2 +-
 net/netfilter/nfnetlink.c                      |  4 ++--
 net/netfilter/nfnetlink_log.c                  |  4 ++--
 24 files changed, 64 insertions(+), 64 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c
index 65f2944b5a64..1ac01b128621 100644
--- a/net/ipv4/netfilter/arp_tables.c
+++ b/net/ipv4/netfilter/arp_tables.c
@@ -222,7 +222,7 @@ static unsigned int
 arpt_error(struct sk_buff *skb, const struct xt_action_param *par)
 {
 	if (net_ratelimit())
-		printk("arp_tables: error: '%s'\n",
+		pr_err("arp_tables: error: '%s'\n",
 		       (const char *)par->targinfo);
 
 	return NF_DROP;
@@ -385,7 +385,7 @@ static int mark_source_chains(const struct xt_table_info *newinfo,
 			int visited = e->comefrom & (1 << hook);
 
 			if (e->comefrom & (1 << NF_ARP_NUMHOOKS)) {
-				printk("arptables: loop hook %u pos %u %08X.\n",
+				pr_notice("arptables: loop hook %u pos %u %08X.\n",
 				       hook, pos, e->comefrom);
 				return 0;
 			}
diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c
index ad702bc34e75..63958f3394a5 100644
--- a/net/ipv4/netfilter/ip_tables.c
+++ b/net/ipv4/netfilter/ip_tables.c
@@ -466,7 +466,7 @@ mark_source_chains(const struct xt_table_info *newinfo,
 			int visited = e->comefrom & (1 << hook);
 
 			if (e->comefrom & (1 << NF_INET_NUMHOOKS)) {
-				printk("iptables: loop hook %u pos %u %08X.\n",
+				pr_err("iptables: loop hook %u pos %u %08X.\n",
 				       hook, pos, e->comefrom);
 				return 0;
 			}
diff --git a/net/ipv4/netfilter/iptable_filter.c b/net/ipv4/netfilter/iptable_filter.c
index 55392466daa4..c37641e819f2 100644
--- a/net/ipv4/netfilter/iptable_filter.c
+++ b/net/ipv4/netfilter/iptable_filter.c
@@ -89,7 +89,7 @@ static int __init iptable_filter_init(void)
 	int ret;
 
 	if (forward < 0 || forward > NF_MAX_VERDICT) {
-		printk("iptables forward must be 0 or 1\n");
+		pr_err("iptables forward must be 0 or 1\n");
 		return -EINVAL;
 	}
 
diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
index 2bb1f87051c4..5a03c02af999 100644
--- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
+++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
@@ -382,32 +382,32 @@ static int __init nf_conntrack_l3proto_ipv4_init(void)
 
 	ret = nf_conntrack_l4proto_register(&nf_conntrack_l4proto_tcp4);
 	if (ret < 0) {
-		printk("nf_conntrack_ipv4: can't register tcp.\n");
+		pr_err("nf_conntrack_ipv4: can't register tcp.\n");
 		goto cleanup_sockopt;
 	}
 
 	ret = nf_conntrack_l4proto_register(&nf_conntrack_l4proto_udp4);
 	if (ret < 0) {
-		printk("nf_conntrack_ipv4: can't register udp.\n");
+		pr_err("nf_conntrack_ipv4: can't register udp.\n");
 		goto cleanup_tcp;
 	}
 
 	ret = nf_conntrack_l4proto_register(&nf_conntrack_l4proto_icmp);
 	if (ret < 0) {
-		printk("nf_conntrack_ipv4: can't register icmp.\n");
+		pr_err("nf_conntrack_ipv4: can't register icmp.\n");
 		goto cleanup_udp;
 	}
 
 	ret = nf_conntrack_l3proto_register(&nf_conntrack_l3proto_ipv4);
 	if (ret < 0) {
-		printk("nf_conntrack_ipv4: can't register ipv4\n");
+		pr_err("nf_conntrack_ipv4: can't register ipv4\n");
 		goto cleanup_icmp;
 	}
 
 	ret = nf_register_hooks(ipv4_conntrack_ops,
 				ARRAY_SIZE(ipv4_conntrack_ops));
 	if (ret < 0) {
-		printk("nf_conntrack_ipv4: can't register hooks.\n");
+		pr_err("nf_conntrack_ipv4: can't register hooks.\n");
 		goto cleanup_ipv4;
 	}
 #if defined(CONFIG_PROC_FS) && defined(CONFIG_NF_CONNTRACK_PROC_COMPAT)
diff --git a/net/ipv4/netfilter/nf_nat_h323.c b/net/ipv4/netfilter/nf_nat_h323.c
index d4c061874f8f..5045196d853c 100644
--- a/net/ipv4/netfilter/nf_nat_h323.c
+++ b/net/ipv4/netfilter/nf_nat_h323.c
@@ -43,7 +43,7 @@ static int set_addr(struct sk_buff *skb,
 					      addroff, sizeof(buf),
 					      (char *) &buf, sizeof(buf))) {
 			if (net_ratelimit())
-				printk("nf_nat_h323: nf_nat_mangle_tcp_packet"
+				pr_notice("nf_nat_h323: nf_nat_mangle_tcp_packet"
 				       " error\n");
 			return -1;
 		}
@@ -59,7 +59,7 @@ static int set_addr(struct sk_buff *skb,
 					      addroff, sizeof(buf),
 					      (char *) &buf, sizeof(buf))) {
 			if (net_ratelimit())
-				printk("nf_nat_h323: nf_nat_mangle_udp_packet"
+				pr_notice("nf_nat_h323: nf_nat_mangle_udp_packet"
 				       " error\n");
 			return -1;
 		}
@@ -215,7 +215,7 @@ static int nat_rtp_rtcp(struct sk_buff *skb, struct nf_conn *ct,
 	/* Run out of expectations */
 	if (i >= H323_RTP_CHANNEL_MAX) {
 		if (net_ratelimit())
-			printk("nf_nat_h323: out of expectations\n");
+			pr_notice("nf_nat_h323: out of expectations\n");
 		return 0;
 	}
 
@@ -234,7 +234,7 @@ static int nat_rtp_rtcp(struct sk_buff *skb, struct nf_conn *ct,
 
 	if (nated_port == 0) {	/* No port available */
 		if (net_ratelimit())
-			printk("nf_nat_h323: out of RTP ports\n");
+			pr_notice("nf_nat_h323: out of RTP ports\n");
 		return 0;
 	}
 
@@ -291,7 +291,7 @@ static int nat_t120(struct sk_buff *skb, struct nf_conn *ct,
 
 	if (nated_port == 0) {	/* No port available */
 		if (net_ratelimit())
-			printk("nf_nat_h323: out of TCP ports\n");
+			pr_notice("nf_nat_h323: out of TCP ports\n");
 		return 0;
 	}
 
@@ -341,7 +341,7 @@ static int nat_h245(struct sk_buff *skb, struct nf_conn *ct,
 
 	if (nated_port == 0) {	/* No port available */
 		if (net_ratelimit())
-			printk("nf_nat_q931: out of TCP ports\n");
+			pr_notice("nf_nat_q931: out of TCP ports\n");
 		return 0;
 	}
 
@@ -425,7 +425,7 @@ static int nat_q931(struct sk_buff *skb, struct nf_conn *ct,
 
 	if (nated_port == 0) {	/* No port available */
 		if (net_ratelimit())
-			printk("nf_nat_ras: out of TCP ports\n");
+			pr_notice("nf_nat_ras: out of TCP ports\n");
 		return 0;
 	}
 
@@ -507,7 +507,7 @@ static int nat_callforwarding(struct sk_buff *skb, struct nf_conn *ct,
 
 	if (nated_port == 0) {	/* No port available */
 		if (net_ratelimit())
-			printk("nf_nat_q931: out of TCP ports\n");
+			pr_notice("nf_nat_q931: out of TCP ports\n");
 		return 0;
 	}
 
diff --git a/net/ipv4/netfilter/nf_nat_snmp_basic.c b/net/ipv4/netfilter/nf_nat_snmp_basic.c
index 4d85b6e55f29..1679e2c0963d 100644
--- a/net/ipv4/netfilter/nf_nat_snmp_basic.c
+++ b/net/ipv4/netfilter/nf_nat_snmp_basic.c
@@ -401,7 +401,7 @@ static unsigned char asn1_octets_decode(struct asn1_ctx *ctx,
 	*octets = kmalloc(eoc - ctx->pointer, GFP_ATOMIC);
 	if (*octets == NULL) {
 		if (net_ratelimit())
-			printk("OOM in bsalg (%d)\n", __LINE__);
+			pr_notice("OOM in bsalg (%d)\n", __LINE__);
 		return 0;
 	}
 
@@ -452,7 +452,7 @@ static unsigned char asn1_oid_decode(struct asn1_ctx *ctx,
 	*oid = kmalloc(size * sizeof(unsigned long), GFP_ATOMIC);
 	if (*oid == NULL) {
 		if (net_ratelimit())
-			printk("OOM in bsalg (%d)\n", __LINE__);
+			pr_notice("OOM in bsalg (%d)\n", __LINE__);
 		return 0;
 	}
 
@@ -729,7 +729,7 @@ static unsigned char snmp_object_decode(struct asn1_ctx *ctx,
 			if (*obj == NULL) {
 				kfree(id);
 				if (net_ratelimit())
-					printk("OOM in bsalg (%d)\n", __LINE__);
+					pr_notice("OOM in bsalg (%d)\n", __LINE__);
 				return 0;
 			}
 			(*obj)->syntax.l[0] = l;
@@ -746,7 +746,7 @@ static unsigned char snmp_object_decode(struct asn1_ctx *ctx,
 				kfree(p);
 				kfree(id);
 				if (net_ratelimit())
-					printk("OOM in bsalg (%d)\n", __LINE__);
+					pr_notice("OOM in bsalg (%d)\n", __LINE__);
 				return 0;
 			}
 			memcpy((*obj)->syntax.c, p, len);
@@ -761,7 +761,7 @@ static unsigned char snmp_object_decode(struct asn1_ctx *ctx,
 			if (*obj == NULL) {
 				kfree(id);
 				if (net_ratelimit())
-					printk("OOM in bsalg (%d)\n", __LINE__);
+					pr_notice("OOM in bsalg (%d)\n", __LINE__);
 				return 0;
 			}
 			if (!asn1_null_decode(ctx, end)) {
@@ -782,7 +782,7 @@ static unsigned char snmp_object_decode(struct asn1_ctx *ctx,
 				kfree(lp);
 				kfree(id);
 				if (net_ratelimit())
-					printk("OOM in bsalg (%d)\n", __LINE__);
+					pr_notice("OOM in bsalg (%d)\n", __LINE__);
 				return 0;
 			}
 			memcpy((*obj)->syntax.ul, lp, len);
@@ -803,7 +803,7 @@ static unsigned char snmp_object_decode(struct asn1_ctx *ctx,
 				kfree(p);
 				kfree(id);
 				if (net_ratelimit())
-					printk("OOM in bsalg (%d)\n", __LINE__);
+					pr_notice("OOM in bsalg (%d)\n", __LINE__);
 				return 0;
 			}
 			memcpy((*obj)->syntax.uc, p, len);
@@ -821,7 +821,7 @@ static unsigned char snmp_object_decode(struct asn1_ctx *ctx,
 			if (*obj == NULL) {
 				kfree(id);
 				if (net_ratelimit())
-					printk("OOM in bsalg (%d)\n", __LINE__);
+					pr_notice("OOM in bsalg (%d)\n", __LINE__);
 				return 0;
 			}
 			(*obj)->syntax.ul[0] = ul;
diff --git a/net/ipv4/netfilter/nf_nat_standalone.c b/net/ipv4/netfilter/nf_nat_standalone.c
index 84c7974f5830..beb25819c9c9 100644
--- a/net/ipv4/netfilter/nf_nat_standalone.c
+++ b/net/ipv4/netfilter/nf_nat_standalone.c
@@ -293,12 +293,12 @@ static int __init nf_nat_standalone_init(void)
 #endif
 	ret = nf_nat_rule_init();
 	if (ret < 0) {
-		printk("nf_nat_init: can't setup rules.\n");
+		pr_err("nf_nat_init: can't setup rules.\n");
 		goto cleanup_decode_session;
 	}
 	ret = nf_register_hooks(nf_nat_ops, ARRAY_SIZE(nf_nat_ops));
 	if (ret < 0) {
-		printk("nf_nat_init: can't register hooks.\n");
+		pr_err("nf_nat_init: can't register hooks.\n");
 		goto cleanup_rule_init;
 	}
 	return ret;
diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c
index 02a7db025d0f..6f517bd83692 100644
--- a/net/ipv6/netfilter/ip6_tables.c
+++ b/net/ipv6/netfilter/ip6_tables.c
@@ -481,7 +481,7 @@ mark_source_chains(const struct xt_table_info *newinfo,
 			int visited = e->comefrom & (1 << hook);
 
 			if (e->comefrom & (1 << NF_INET_NUMHOOKS)) {
-				printk("iptables: loop hook %u pos %u %08X.\n",
+				pr_err("iptables: loop hook %u pos %u %08X.\n",
 				       hook, pos, e->comefrom);
 				return 0;
 			}
diff --git a/net/ipv6/netfilter/ip6table_filter.c b/net/ipv6/netfilter/ip6table_filter.c
index d6fc9aff3163..c9e37c8fd62c 100644
--- a/net/ipv6/netfilter/ip6table_filter.c
+++ b/net/ipv6/netfilter/ip6table_filter.c
@@ -81,7 +81,7 @@ static int __init ip6table_filter_init(void)
 	int ret;
 
 	if (forward < 0 || forward > NF_MAX_VERDICT) {
-		printk("iptables forward must be 0 or 1\n");
+		pr_err("iptables forward must be 0 or 1\n");
 		return -EINVAL;
 	}
 
diff --git a/net/ipv6/netfilter/ip6table_mangle.c b/net/ipv6/netfilter/ip6table_mangle.c
index 6a102b57f356..679a0a3b7b3c 100644
--- a/net/ipv6/netfilter/ip6table_mangle.c
+++ b/net/ipv6/netfilter/ip6table_mangle.c
@@ -43,7 +43,7 @@ ip6t_mangle_out(struct sk_buff *skb, const struct net_device *out)
 	if (skb->len < sizeof(struct iphdr) ||
 	    ip_hdrlen(skb) < sizeof(struct iphdr)) {
 		if (net_ratelimit())
-			printk("ip6t_hook: happy cracking.\n");
+			pr_warning("ip6t_hook: happy cracking.\n");
 		return NF_ACCEPT;
 	}
 #endif
diff --git a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
index 996c3f41fecd..ff43461704be 100644
--- a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
+++ b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
@@ -280,7 +280,7 @@ static unsigned int ipv6_conntrack_local(unsigned int hooknum,
 	/* root is playing with raw sockets. */
 	if (skb->len < sizeof(struct ipv6hdr)) {
 		if (net_ratelimit())
-			printk("ipv6_conntrack_local: packet too short\n");
+			pr_notice("ipv6_conntrack_local: packet too short\n");
 		return NF_ACCEPT;
 	}
 	return __ipv6_conntrack_in(dev_net(out), hooknum, skb, okfn);
@@ -406,37 +406,37 @@ static int __init nf_conntrack_l3proto_ipv6_init(void)
 
 	ret = nf_ct_frag6_init();
 	if (ret < 0) {
-		printk("nf_conntrack_ipv6: can't initialize frag6.\n");
+		pr_err("nf_conntrack_ipv6: can't initialize frag6.\n");
 		return ret;
 	}
 	ret = nf_conntrack_l4proto_register(&nf_conntrack_l4proto_tcp6);
 	if (ret < 0) {
-		printk("nf_conntrack_ipv6: can't register tcp.\n");
+		pr_err("nf_conntrack_ipv6: can't register tcp.\n");
 		goto cleanup_frag6;
 	}
 
 	ret = nf_conntrack_l4proto_register(&nf_conntrack_l4proto_udp6);
 	if (ret < 0) {
-		printk("nf_conntrack_ipv6: can't register udp.\n");
+		pr_err("nf_conntrack_ipv6: can't register udp.\n");
 		goto cleanup_tcp;
 	}
 
 	ret = nf_conntrack_l4proto_register(&nf_conntrack_l4proto_icmpv6);
 	if (ret < 0) {
-		printk("nf_conntrack_ipv6: can't register icmpv6.\n");
+		pr_err("nf_conntrack_ipv6: can't register icmpv6.\n");
 		goto cleanup_udp;
 	}
 
 	ret = nf_conntrack_l3proto_register(&nf_conntrack_l3proto_ipv6);
 	if (ret < 0) {
-		printk("nf_conntrack_ipv6: can't register ipv6\n");
+		pr_err("nf_conntrack_ipv6: can't register ipv6\n");
 		goto cleanup_icmpv6;
 	}
 
 	ret = nf_register_hooks(ipv6_conntrack_ops,
 				ARRAY_SIZE(ipv6_conntrack_ops));
 	if (ret < 0) {
-		printk("nf_conntrack_ipv6: can't register pre-routing defrag "
+		pr_err("nf_conntrack_ipv6: can't register pre-routing defrag "
 		       "hook.\n");
 		goto cleanup_ipv6;
 	}
diff --git a/net/netfilter/nf_conntrack_amanda.c b/net/netfilter/nf_conntrack_amanda.c
index 372e80f07a81..13fd2c55e329 100644
--- a/net/netfilter/nf_conntrack_amanda.c
+++ b/net/netfilter/nf_conntrack_amanda.c
@@ -108,7 +108,7 @@ static int amanda_help(struct sk_buff *skb,
 	dataoff = protoff + sizeof(struct udphdr);
 	if (dataoff >= skb->len) {
 		if (net_ratelimit())
-			printk("amanda_help: skblen = %u\n", skb->len);
+			printk(KERN_ERR "amanda_help: skblen = %u\n", skb->len);
 		return NF_ACCEPT;
 	}
 
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index 3907efb97a7c..b83c530c5e0a 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -1335,7 +1335,7 @@ static int nf_conntrack_init_init_net(void)
 	}
 	nf_conntrack_max = max_factor * nf_conntrack_htable_size;
 
-	printk("nf_conntrack version %s (%u buckets, %d max)\n",
+	printk(KERN_INFO "nf_conntrack version %s (%u buckets, %d max)\n",
 	       NF_CONNTRACK_VERSION, nf_conntrack_htable_size,
 	       nf_conntrack_max);
 
diff --git a/net/netfilter/nf_conntrack_ftp.c b/net/netfilter/nf_conntrack_ftp.c
index 2ae3169e7633..e17cb7c7dd8f 100644
--- a/net/netfilter/nf_conntrack_ftp.c
+++ b/net/netfilter/nf_conntrack_ftp.c
@@ -573,8 +573,8 @@ static int __init nf_conntrack_ftp_init(void)
 				 ftp[i][j].tuple.src.l3num, ports[i]);
 			ret = nf_conntrack_helper_register(&ftp[i][j]);
 			if (ret) {
-				printk("nf_ct_ftp: failed to register helper "
-				       " for pf: %d port: %d\n",
+				printk(KERN_ERR "nf_ct_ftp: failed to register"
+				       " helper for pf: %d port: %d\n",
 					ftp[i][j].tuple.src.l3num, ports[i]);
 				nf_conntrack_ftp_fini();
 				return ret;
diff --git a/net/netfilter/nf_conntrack_h323_main.c b/net/netfilter/nf_conntrack_h323_main.c
index 48bf15073a85..6eaee7c8a337 100644
--- a/net/netfilter/nf_conntrack_h323_main.c
+++ b/net/netfilter/nf_conntrack_h323_main.c
@@ -607,7 +607,7 @@ static int h245_help(struct sk_buff *skb, unsigned int protoff,
       drop:
 	spin_unlock_bh(&nf_h323_lock);
 	if (net_ratelimit())
-		printk("nf_ct_h245: packet dropped\n");
+		pr_info("nf_ct_h245: packet dropped\n");
 	return NF_DROP;
 }
 
@@ -1152,7 +1152,7 @@ static int q931_help(struct sk_buff *skb, unsigned int protoff,
       drop:
 	spin_unlock_bh(&nf_h323_lock);
 	if (net_ratelimit())
-		printk("nf_ct_q931: packet dropped\n");
+		pr_info("nf_ct_q931: packet dropped\n");
 	return NF_DROP;
 }
 
@@ -1727,7 +1727,7 @@ static int ras_help(struct sk_buff *skb, unsigned int protoff,
       drop:
 	spin_unlock_bh(&nf_h323_lock);
 	if (net_ratelimit())
-		printk("nf_ct_ras: packet dropped\n");
+		pr_info("nf_ct_ras: packet dropped\n");
 	return NF_DROP;
 }
 
diff --git a/net/netfilter/nf_conntrack_irc.c b/net/netfilter/nf_conntrack_irc.c
index 7673930ca342..b394aa318776 100644
--- a/net/netfilter/nf_conntrack_irc.c
+++ b/net/netfilter/nf_conntrack_irc.c
@@ -235,7 +235,7 @@ static int __init nf_conntrack_irc_init(void)
 	char *tmpname;
 
 	if (max_dcc_channels < 1) {
-		printk("nf_ct_irc: max_dcc_channels must not be zero\n");
+		printk(KERN_ERR "nf_ct_irc: max_dcc_channels must not be zero\n");
 		return -EINVAL;
 	}
 
@@ -267,7 +267,7 @@ static int __init nf_conntrack_irc_init(void)
 
 		ret = nf_conntrack_helper_register(&irc[i]);
 		if (ret) {
-			printk("nf_ct_irc: failed to register helper "
+			printk(KERN_ERR "nf_ct_irc: failed to register helper "
 			       "for pf: %u port: %u\n",
 			       irc[i].tuple.src.l3num, ports[i]);
 			nf_conntrack_irc_fini();
diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index 4e55403bf263..961fb6a85294 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -2057,29 +2057,29 @@ static int __init ctnetlink_init(void)
 {
 	int ret;
 
-	printk("ctnetlink v%s: registering with nfnetlink.\n", version);
+	pr_info("ctnetlink v%s: registering with nfnetlink.\n", version);
 	ret = nfnetlink_subsys_register(&ctnl_subsys);
 	if (ret < 0) {
-		printk("ctnetlink_init: cannot register with nfnetlink.\n");
+		pr_err("ctnetlink_init: cannot register with nfnetlink.\n");
 		goto err_out;
 	}
 
 	ret = nfnetlink_subsys_register(&ctnl_exp_subsys);
 	if (ret < 0) {
-		printk("ctnetlink_init: cannot register exp with nfnetlink.\n");
+		pr_err("ctnetlink_init: cannot register exp with nfnetlink.\n");
 		goto err_unreg_subsys;
 	}
 
 #ifdef CONFIG_NF_CONNTRACK_EVENTS
 	ret = nf_conntrack_register_notifier(&ctnl_notifier);
 	if (ret < 0) {
-		printk("ctnetlink_init: cannot register notifier.\n");
+		pr_err("ctnetlink_init: cannot register notifier.\n");
 		goto err_unreg_exp_subsys;
 	}
 
 	ret = nf_ct_expect_register_notifier(&ctnl_notifier_exp);
 	if (ret < 0) {
-		printk("ctnetlink_init: cannot expect register notifier.\n");
+		pr_err("ctnetlink_init: cannot expect register notifier.\n");
 		goto err_unreg_notifier;
 	}
 #endif
@@ -2100,7 +2100,7 @@ err_out:
 
 static void __exit ctnetlink_exit(void)
 {
-	printk("ctnetlink: unregistering from nfnetlink.\n");
+	pr_info("ctnetlink: unregistering from nfnetlink.\n");
 
 #ifdef CONFIG_NF_CONNTRACK_EVENTS
 	nf_ct_expect_unregister_notifier(&ctnl_notifier_exp);
diff --git a/net/netfilter/nf_conntrack_proto_sctp.c b/net/netfilter/nf_conntrack_proto_sctp.c
index b68ff15ed979..c6049c2d5ea8 100644
--- a/net/netfilter/nf_conntrack_proto_sctp.c
+++ b/net/netfilter/nf_conntrack_proto_sctp.c
@@ -717,12 +717,12 @@ static int __init nf_conntrack_proto_sctp_init(void)
 
 	ret = nf_conntrack_l4proto_register(&nf_conntrack_l4proto_sctp4);
 	if (ret) {
-		printk("nf_conntrack_l4proto_sctp4: protocol register failed\n");
+		pr_err("nf_conntrack_l4proto_sctp4: protocol register failed\n");
 		goto out;
 	}
 	ret = nf_conntrack_l4proto_register(&nf_conntrack_l4proto_sctp6);
 	if (ret) {
-		printk("nf_conntrack_l4proto_sctp6: protocol register failed\n");
+		pr_err("nf_conntrack_l4proto_sctp6: protocol register failed\n");
 		goto cleanup_sctp4;
 	}
 
diff --git a/net/netfilter/nf_conntrack_sip.c b/net/netfilter/nf_conntrack_sip.c
index c6cd1b84eddd..b20f4275893c 100644
--- a/net/netfilter/nf_conntrack_sip.c
+++ b/net/netfilter/nf_conntrack_sip.c
@@ -1549,8 +1549,8 @@ static int __init nf_conntrack_sip_init(void)
 
 			ret = nf_conntrack_helper_register(&sip[i][j]);
 			if (ret) {
-				printk("nf_ct_sip: failed to register helper "
-				       "for pf: %u port: %u\n",
+				printk(KERN_ERR "nf_ct_sip: failed to register"
+				       " helper for pf: %u port: %u\n",
 				       sip[i][j].tuple.src.l3num, ports[i]);
 				nf_conntrack_sip_fini();
 				return ret;
diff --git a/net/netfilter/nf_conntrack_standalone.c b/net/netfilter/nf_conntrack_standalone.c
index ea4a8d384234..eb973fcd67ab 100644
--- a/net/netfilter/nf_conntrack_standalone.c
+++ b/net/netfilter/nf_conntrack_standalone.c
@@ -446,7 +446,7 @@ out_kmemdup:
 	if (net_eq(net, &init_net))
 		unregister_sysctl_table(nf_ct_netfilter_header);
 out:
-	printk("nf_conntrack: can't register to sysctl.\n");
+	printk(KERN_ERR "nf_conntrack: can't register to sysctl.\n");
 	return -ENOMEM;
 }
 
diff --git a/net/netfilter/nf_conntrack_tftp.c b/net/netfilter/nf_conntrack_tftp.c
index 46e646b2e9b9..75466fd72f4f 100644
--- a/net/netfilter/nf_conntrack_tftp.c
+++ b/net/netfilter/nf_conntrack_tftp.c
@@ -138,8 +138,8 @@ static int __init nf_conntrack_tftp_init(void)
 
 			ret = nf_conntrack_helper_register(&tftp[i][j]);
 			if (ret) {
-				printk("nf_ct_tftp: failed to register helper "
-				       "for pf: %u port: %u\n",
+				printk(KERN_ERR "nf_ct_tftp: failed to register"
+				       " helper for pf: %u port: %u\n",
 					tftp[i][j].tuple.src.l3num, ports[i]);
 				nf_conntrack_tftp_fini();
 				return ret;
diff --git a/net/netfilter/nf_internals.h b/net/netfilter/nf_internals.h
index bf6609978af7..770f76432ad0 100644
--- a/net/netfilter/nf_internals.h
+++ b/net/netfilter/nf_internals.h
@@ -6,7 +6,7 @@
 #include <linux/netdevice.h>
 
 #ifdef CONFIG_NETFILTER_DEBUG
-#define NFDEBUG(format, args...)  printk(format , ## args)
+#define NFDEBUG(format, args...)  printk(KERN_DEBUG format , ## args)
 #else
 #define NFDEBUG(format, args...)
 #endif
diff --git a/net/netfilter/nfnetlink.c b/net/netfilter/nfnetlink.c
index 39b0e3100575..b4a4532823e8 100644
--- a/net/netfilter/nfnetlink.c
+++ b/net/netfilter/nfnetlink.c
@@ -212,13 +212,13 @@ static struct pernet_operations nfnetlink_net_ops = {
 
 static int __init nfnetlink_init(void)
 {
-	printk("Netfilter messages via NETLINK v%s.\n", nfversion);
+	pr_info("Netfilter messages via NETLINK v%s.\n", nfversion);
 	return register_pernet_subsys(&nfnetlink_net_ops);
 }
 
 static void __exit nfnetlink_exit(void)
 {
-	printk("Removing netfilter NETLINK layer.\n");
+	pr_info("Removing netfilter NETLINK layer.\n");
 	unregister_pernet_subsys(&nfnetlink_net_ops);
 }
 module_init(nfnetlink_init);
diff --git a/net/netfilter/nfnetlink_log.c b/net/netfilter/nfnetlink_log.c
index 203643fb2c52..fc9a211e629e 100644
--- a/net/netfilter/nfnetlink_log.c
+++ b/net/netfilter/nfnetlink_log.c
@@ -297,7 +297,7 @@ nfulnl_alloc_skb(unsigned int inst_size, unsigned int pkt_size)
 	n = max(inst_size, pkt_size);
 	skb = alloc_skb(n, GFP_ATOMIC);
 	if (!skb) {
-		PRINTR("nfnetlink_log: can't alloc whole buffer (%u bytes)\n",
+		pr_notice("nfnetlink_log: can't alloc whole buffer (%u bytes)\n",
 			inst_size);
 
 		if (n > pkt_size) {
@@ -306,7 +306,7 @@ nfulnl_alloc_skb(unsigned int inst_size, unsigned int pkt_size)
 
 			skb = alloc_skb(pkt_size, GFP_ATOMIC);
 			if (!skb)
-				PRINTR("nfnetlink_log: can't even alloc %u "
+				pr_err("nfnetlink_log: can't even alloc %u "
 				       "bytes\n", pkt_size);
 		}
 	}
-- 
cgit v1.2.2


From 736d58e3a2245ac2779fe0f278f8735bcf33ca8d Mon Sep 17 00:00:00 2001
From: Joe Perches <joe@perches.com>
Date: Thu, 13 May 2010 15:16:27 +0200
Subject: netfilter: remove unnecessary returns from void function()s

This patch removes from net/ netfilter files
all the unnecessary return; statements that precede the
last closing brace of void functions.

It does not remove the returns that are immediately
preceded by a label as gcc doesn't like that.

Done via:
$ grep -rP --include=*.[ch] -l "return;\n}" net/ | \
  xargs perl -i -e 'local $/ ; while (<>) { s/\n[ \t\n]+return;\n}/\n}/g; print; }'

Signed-off-by: Joe Perches <joe@perches.com>
[Patrick: changed to keep return statements in otherwise empty function bodies]
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 net/ipv4/netfilter/ip_queue.c        | 1 -
 net/ipv6/netfilter/ip6_queue.c       | 1 -
 net/netfilter/nf_conntrack_netlink.c | 1 -
 net/netfilter/nf_queue.c             | 1 -
 net/netfilter/xt_time.c              | 1 -
 5 files changed, 5 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/netfilter/ip_queue.c b/net/ipv4/netfilter/ip_queue.c
index c838238104f5..a4e5fc5df4bf 100644
--- a/net/ipv4/netfilter/ip_queue.c
+++ b/net/ipv4/netfilter/ip_queue.c
@@ -461,7 +461,6 @@ __ipq_rcv_skb(struct sk_buff *skb)
 
 	if (flags & NLM_F_ACK)
 		netlink_ack(skb, nlh, 0);
-	return;
 }
 
 static void
diff --git a/net/ipv6/netfilter/ip6_queue.c b/net/ipv6/netfilter/ip6_queue.c
index 8656eb75520c..8c201743d96d 100644
--- a/net/ipv6/netfilter/ip6_queue.c
+++ b/net/ipv6/netfilter/ip6_queue.c
@@ -462,7 +462,6 @@ __ipq_rcv_skb(struct sk_buff *skb)
 
 	if (flags & NLM_F_ACK)
 		netlink_ack(skb, nlh, 0);
-	return;
 }
 
 static void
diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index 961fb6a85294..c42ff6aa441d 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -2109,7 +2109,6 @@ static void __exit ctnetlink_exit(void)
 
 	nfnetlink_subsys_unregister(&ctnl_exp_subsys);
 	nfnetlink_subsys_unregister(&ctnl_subsys);
-	return;
 }
 
 module_init(ctnetlink_init);
diff --git a/net/netfilter/nf_queue.c b/net/netfilter/nf_queue.c
index c49ef219899e..0b1103c0b1f3 100644
--- a/net/netfilter/nf_queue.c
+++ b/net/netfilter/nf_queue.c
@@ -279,7 +279,6 @@ void nf_reinject(struct nf_queue_entry *entry, unsigned int verdict)
 	}
 	rcu_read_unlock();
 	kfree(entry);
-	return;
 }
 EXPORT_SYMBOL(nf_reinject);
 
diff --git a/net/netfilter/xt_time.c b/net/netfilter/xt_time.c
index 79234bb19d05..c48975ff8ea2 100644
--- a/net/netfilter/xt_time.c
+++ b/net/netfilter/xt_time.c
@@ -148,7 +148,6 @@ static void localtime_3(struct xtm *r, time_t time)
 	}
 
 	r->month    = i + 1;
-	return;
 }
 
 static bool
-- 
cgit v1.2.2


From 38516ab59fbc5b3bb278cf5e1fe2867c70cff32e Mon Sep 17 00:00:00 2001
From: Steven Rostedt <srostedt@redhat.com>
Date: Tue, 20 Apr 2010 17:04:50 -0400
Subject: tracing: Let tracepoints have data passed to tracepoint callbacks

This patch adds data to be passed to tracepoint callbacks.

The created functions from DECLARE_TRACE() now need a mandatory data
parameter. For example:

DECLARE_TRACE(mytracepoint, int value, value)

Will create the register function:

int register_trace_mytracepoint((void(*)(void *data, int value))probe,
                                void *data);

As the first argument, all callbacks (probes) must take a (void *data)
parameter. So a callback for the above tracepoint will look like:

void myprobe(void *data, int value)
{
}

The callback may choose to ignore the data parameter.

This change allows callbacks to register a private data pointer along
with the function probe.

	void mycallback(void *data, int value);

	register_trace_mytracepoint(mycallback, mydata);

Then the mycallback() will receive the "mydata" as the first parameter
before the args.

A more detailed example:

  DECLARE_TRACE(mytracepoint, TP_PROTO(int status), TP_ARGS(status));

  /* In the C file */

  DEFINE_TRACE(mytracepoint, TP_PROTO(int status), TP_ARGS(status));

  [...]

       trace_mytracepoint(status);

  /* In a file registering this tracepoint */

  int my_callback(void *data, int status)
  {
	struct my_struct my_data = data;
	[...]
  }

  [...]
	my_data = kmalloc(sizeof(*my_data), GFP_KERNEL);
	init_my_data(my_data);
	register_trace_mytracepoint(my_callback, my_data);

The same callback can also be registered to the same tracepoint as long
as the data registered is different. Note, the data must also be used
to unregister the callback:

	unregister_trace_mytracepoint(my_callback, my_data);

Because of the data parameter, tracepoints declared this way can not have
no args. That is:

  DECLARE_TRACE(mytracepoint, TP_PROTO(void), TP_ARGS());

will cause an error.

If no arguments are needed, a new macro can be used instead:

  DECLARE_TRACE_NOARGS(mytracepoint);

Since there are no arguments, the proto and args fields are left out.

This is part of a series to make the tracepoint footprint smaller:

   text	   data	    bss	    dec	    hex	filename
4913961	1088356	 861512	6863829	 68bbd5	vmlinux.orig
4914025	1088868	 861512	6864405	 68be15	vmlinux.class
4918492	1084612	 861512	6864616	 68bee8	vmlinux.tracepoint

Again, this patch also increases the size of the kernel, but
lays the ground work for decreasing it.

 v5: Fixed net/core/drop_monitor.c to handle these updates.

 v4: Moved the DECLARE_TRACE() DECLARE_TRACE_NOARGS out of the
     #ifdef CONFIG_TRACE_POINTS, since the two are the same in both
     cases. The __DECLARE_TRACE() is what changes.
     Thanks to Frederic Weisbecker for pointing this out.

 v3: Made all register_* functions require data to be passed and
     all callbacks to take a void * parameter as its first argument.
     This makes the calling functions comply with C standards.

     Also added more comments to the modifications of DECLARE_TRACE().

 v2: Made the DECLARE_TRACE() have the ability to pass arguments
     and added a new DECLARE_TRACE_NOARGS() for tracepoints that
     do not need any arguments.

Acked-by: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Acked-by: Masami Hiramatsu <mhiramat@redhat.com>
Acked-by: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Neil Horman <nhorman@tuxdriver.com>
Cc: David S. Miller <davem@davemloft.net>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 net/core/drop_monitor.c | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

(limited to 'net')

diff --git a/net/core/drop_monitor.c b/net/core/drop_monitor.c
index cf208d8042b1..ad41529fb60f 100644
--- a/net/core/drop_monitor.c
+++ b/net/core/drop_monitor.c
@@ -172,12 +172,12 @@ out:
 	return;
 }
 
-static void trace_kfree_skb_hit(struct sk_buff *skb, void *location)
+static void trace_kfree_skb_hit(void *ignore, struct sk_buff *skb, void *location)
 {
 	trace_drop_common(skb, location);
 }
 
-static void trace_napi_poll_hit(struct napi_struct *napi)
+static void trace_napi_poll_hit(void *ignore, struct napi_struct *napi)
 {
 	struct dm_hw_stat_delta *new_stat;
 
@@ -225,12 +225,12 @@ static int set_all_monitor_traces(int state)
 
 	switch (state) {
 	case TRACE_ON:
-		rc |= register_trace_kfree_skb(trace_kfree_skb_hit);
-		rc |= register_trace_napi_poll(trace_napi_poll_hit);
+		rc |= register_trace_kfree_skb(trace_kfree_skb_hit, NULL);
+		rc |= register_trace_napi_poll(trace_napi_poll_hit, NULL);
 		break;
 	case TRACE_OFF:
-		rc |= unregister_trace_kfree_skb(trace_kfree_skb_hit);
-		rc |= unregister_trace_napi_poll(trace_napi_poll_hit);
+		rc |= unregister_trace_kfree_skb(trace_kfree_skb_hit, NULL);
+		rc |= unregister_trace_napi_poll(trace_napi_poll_hit, NULL);
 
 		tracepoint_synchronize_unregister();
 
-- 
cgit v1.2.2


From 725f2865d4df31ac0768b13ae763beadc4bb8ce9 Mon Sep 17 00:00:00 2001
From: Kevin Coffman <kwc@citi.umich.edu>
Date: Wed, 17 Mar 2010 13:02:46 -0400
Subject: gss_krb5: Introduce encryption type framework

Make the client and server code consistent regarding the extra buffer
space made available for the auth code when wrapping data.

Add some comments/documentation about the available buffer space
in the xdr_buf head and tail when gss_wrap is called.

Add a compile-time check to make sure we are not exceeding the available
buffer space.

Add a central function to shift head data.

Signed-off-by: Kevin Coffman <kwc@citi.umich.edu>
Signed-off-by: Steve Dickson <steved@redhat.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 net/sunrpc/auth_gss/auth_gss.c        |  2 +-
 net/sunrpc/auth_gss/gss_krb5_crypto.c | 38 +++++++++++++++++++++++++++++++++++
 net/sunrpc/auth_gss/gss_krb5_wrap.c   |  6 ++----
 3 files changed, 41 insertions(+), 5 deletions(-)

(limited to 'net')

diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c
index c389ccf6437d..75602ece58eb 100644
--- a/net/sunrpc/auth_gss/auth_gss.c
+++ b/net/sunrpc/auth_gss/auth_gss.c
@@ -61,7 +61,7 @@ static const struct rpc_credops gss_nullops;
 # define RPCDBG_FACILITY	RPCDBG_AUTH
 #endif
 
-#define GSS_CRED_SLACK		1024
+#define GSS_CRED_SLACK		(RPC_MAX_AUTH_SIZE * 2)
 /* length of a krb5 verifier (48), plus data added before arguments when
  * using integrity (two 4-byte integers): */
 #define GSS_VERF_SLACK		100
diff --git a/net/sunrpc/auth_gss/gss_krb5_crypto.c b/net/sunrpc/auth_gss/gss_krb5_crypto.c
index e9b636176687..746b3e139aed 100644
--- a/net/sunrpc/auth_gss/gss_krb5_crypto.c
+++ b/net/sunrpc/auth_gss/gss_krb5_crypto.c
@@ -325,3 +325,41 @@ gss_decrypt_xdr_buf(struct crypto_blkcipher *tfm, struct xdr_buf *buf,
 
 	return xdr_process_buf(buf, offset, buf->len - offset, decryptor, &desc);
 }
+
+/*
+ * This function makes the assumption that it was ultimately called
+ * from gss_wrap().
+ *
+ * The client auth_gss code moves any existing tail data into a
+ * separate page before calling gss_wrap.
+ * The server svcauth_gss code ensures that both the head and the
+ * tail have slack space of RPC_MAX_AUTH_SIZE before calling gss_wrap.
+ *
+ * Even with that guarantee, this function may be called more than
+ * once in the processing of gss_wrap().  The best we can do is
+ * verify at compile-time (see GSS_KRB5_SLACK_CHECK) that the
+ * largest expected shift will fit within RPC_MAX_AUTH_SIZE.
+ * At run-time we can verify that a single invocation of this
+ * function doesn't attempt to use more the RPC_MAX_AUTH_SIZE.
+ */
+
+int
+xdr_extend_head(struct xdr_buf *buf, unsigned int base, unsigned int shiftlen)
+{
+	u8 *p;
+
+	if (shiftlen == 0)
+		return 0;
+
+	BUILD_BUG_ON(GSS_KRB5_MAX_SLACK_NEEDED > RPC_MAX_AUTH_SIZE);
+	BUG_ON(shiftlen > RPC_MAX_AUTH_SIZE);
+
+	p = buf->head[0].iov_base + base;
+
+	memmove(p + shiftlen, p, buf->head[0].iov_len - base);
+
+	buf->head[0].iov_len += shiftlen;
+	buf->len += shiftlen;
+
+	return 0;
+}
diff --git a/net/sunrpc/auth_gss/gss_krb5_wrap.c b/net/sunrpc/auth_gss/gss_krb5_wrap.c
index a6e905637e03..496281fabb91 100644
--- a/net/sunrpc/auth_gss/gss_krb5_wrap.c
+++ b/net/sunrpc/auth_gss/gss_krb5_wrap.c
@@ -155,11 +155,9 @@ gss_wrap_kerberos(struct gss_ctx *ctx, int offset,
 
 	ptr = buf->head[0].iov_base + offset;
 	/* shift data to make room for header. */
+	xdr_extend_head(buf, offset, headlen);
+
 	/* XXX Would be cleverer to encrypt while copying. */
-	/* XXX bounds checking, slack, etc. */
-	memmove(ptr + headlen, ptr, buf->head[0].iov_len - offset);
-	buf->head[0].iov_len += headlen;
-	buf->len += headlen;
 	BUG_ON((buf->len - offset - headlen) % blocksize);
 
 	g_make_token_header(&kctx->mech_used,
-- 
cgit v1.2.2


From 7561042fb7870be0b4ee57efddce68bda8968abf Mon Sep 17 00:00:00 2001
From: Kevin Coffman <kwc@citi.umich.edu>
Date: Wed, 17 Mar 2010 13:02:47 -0400
Subject: gss_krb5: Added and improved code comments

Signed-off-by: Steve Dickson <steved@redhat.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 net/sunrpc/auth_gss/auth_gss.c        | 12 +++++++++---
 net/sunrpc/auth_gss/gss_mech_switch.c | 14 ++++++++++++++
 net/sunrpc/auth_gss/svcauth_gss.c     | 15 +++++++++++++++
 3 files changed, 38 insertions(+), 3 deletions(-)

(limited to 'net')

diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c
index 75602ece58eb..d64a58b8ed33 100644
--- a/net/sunrpc/auth_gss/auth_gss.c
+++ b/net/sunrpc/auth_gss/auth_gss.c
@@ -1316,15 +1316,21 @@ gss_wrap_req_priv(struct rpc_cred *cred, struct gss_cl_ctx *ctx,
 	inpages = snd_buf->pages + first;
 	snd_buf->pages = rqstp->rq_enc_pages;
 	snd_buf->page_base -= first << PAGE_CACHE_SHIFT;
-	/* Give the tail its own page, in case we need extra space in the
-	 * head when wrapping: */
+	/*
+	 * Give the tail its own page, in case we need extra space in the
+	 * head when wrapping:
+	 *
+	 * call_allocate() allocates twice the slack space required
+	 * by the authentication flavor to rq_callsize.
+	 * For GSS, slack is GSS_CRED_SLACK.
+	 */
 	if (snd_buf->page_len || snd_buf->tail[0].iov_len) {
 		tmp = page_address(rqstp->rq_enc_pages[rqstp->rq_enc_pages_num - 1]);
 		memcpy(tmp, snd_buf->tail[0].iov_base, snd_buf->tail[0].iov_len);
 		snd_buf->tail[0].iov_base = tmp;
 	}
 	maj_stat = gss_wrap(ctx->gc_gss_ctx, offset, snd_buf, inpages);
-	/* RPC_SLACK_SPACE should prevent this ever happening: */
+	/* slack space should prevent this ever happening: */
 	BUG_ON(snd_buf->len > snd_buf->buflen);
 	status = -EIO;
 	/* We're assuming that when GSS_S_CONTEXT_EXPIRED, the encryption was
diff --git a/net/sunrpc/auth_gss/gss_mech_switch.c b/net/sunrpc/auth_gss/gss_mech_switch.c
index 76e4c6f4ac3c..28a84ef41d13 100644
--- a/net/sunrpc/auth_gss/gss_mech_switch.c
+++ b/net/sunrpc/auth_gss/gss_mech_switch.c
@@ -285,6 +285,20 @@ gss_verify_mic(struct gss_ctx		*context_handle,
 				 mic_token);
 }
 
+/*
+ * This function is called from both the client and server code.
+ * Each makes guarantees about how much "slack" space is available
+ * for the underlying function in "buf"'s head and tail while
+ * performing the wrap.
+ *
+ * The client and server code allocate RPC_MAX_AUTH_SIZE extra
+ * space in both the head and tail which is available for use by
+ * the wrap function.
+ *
+ * Underlying functions should verify they do not use more than
+ * RPC_MAX_AUTH_SIZE of extra space in either the head or tail
+ * when performing the wrap.
+ */
 u32
 gss_wrap(struct gss_ctx	*ctx_id,
 	 int		offset,
diff --git a/net/sunrpc/auth_gss/svcauth_gss.c b/net/sunrpc/auth_gss/svcauth_gss.c
index b81e790ef9f4..1d9ac4ac818a 100644
--- a/net/sunrpc/auth_gss/svcauth_gss.c
+++ b/net/sunrpc/auth_gss/svcauth_gss.c
@@ -1315,6 +1315,14 @@ svcauth_gss_wrap_resp_priv(struct svc_rqst *rqstp)
 	inpages = resbuf->pages;
 	/* XXX: Would be better to write some xdr helper functions for
 	 * nfs{2,3,4}xdr.c that place the data right, instead of copying: */
+
+	/*
+	 * If there is currently tail data, make sure there is
+	 * room for the head, tail, and 2 * RPC_MAX_AUTH_SIZE in
+	 * the page, and move the current tail data such that
+	 * there is RPC_MAX_AUTH_SIZE slack space available in
+	 * both the head and tail.
+	 */
 	if (resbuf->tail[0].iov_base) {
 		BUG_ON(resbuf->tail[0].iov_base >= resbuf->head[0].iov_base
 							+ PAGE_SIZE);
@@ -1327,6 +1335,13 @@ svcauth_gss_wrap_resp_priv(struct svc_rqst *rqstp)
 			resbuf->tail[0].iov_len);
 		resbuf->tail[0].iov_base += RPC_MAX_AUTH_SIZE;
 	}
+	/*
+	 * If there is no current tail data, make sure there is
+	 * room for the head data, and 2 * RPC_MAX_AUTH_SIZE in the
+	 * allotted page, and set up tail information such that there
+	 * is RPC_MAX_AUTH_SIZE slack space available in both the
+	 * head and tail.
+	 */
 	if (resbuf->tail[0].iov_base == NULL) {
 		if (resbuf->head[0].iov_len + 2*RPC_MAX_AUTH_SIZE > PAGE_SIZE)
 			return -ENOMEM;
-- 
cgit v1.2.2


From 54ec3d462f3c2a3fe48a7bd592160bee31360087 Mon Sep 17 00:00:00 2001
From: "J. Bruce Fields" <bfields@citi.umich.edu>
Date: Wed, 17 Mar 2010 13:02:48 -0400
Subject: gss_krb5: Don't expect blocksize to always be 8 when calculating
 padding

Signed-off-by: Steve Dickson <steved@redhat.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 net/sunrpc/auth_gss/gss_krb5_wrap.c | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

(limited to 'net')

diff --git a/net/sunrpc/auth_gss/gss_krb5_wrap.c b/net/sunrpc/auth_gss/gss_krb5_wrap.c
index 496281fabb91..5d6c3b12ea70 100644
--- a/net/sunrpc/auth_gss/gss_krb5_wrap.c
+++ b/net/sunrpc/auth_gss/gss_krb5_wrap.c
@@ -12,10 +12,7 @@
 static inline int
 gss_krb5_padding(int blocksize, int length)
 {
-	/* Most of the code is block-size independent but currently we
-	 * use only 8: */
-	BUG_ON(blocksize != 8);
-	return 8 - (length & 7);
+	return blocksize - (length % blocksize);
 }
 
 static inline void
-- 
cgit v1.2.2


From 1ac3719a2214c545c7e19d34e272a148ca9a24f1 Mon Sep 17 00:00:00 2001
From: Kevin Coffman <kwc@citi.umich.edu>
Date: Wed, 17 Mar 2010 13:02:49 -0400
Subject: gss_krb5: split up functions in preparation of adding new enctypes

Add encryption type to the krb5 context structure and use it to switch
to the correct functions depending on the encryption type.

Signed-off-by: Kevin Coffman <kwc@citi.umich.edu>
Signed-off-by: Steve Dickson <steved@redhat.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 net/sunrpc/auth_gss/gss_krb5_mech.c   |  1 +
 net/sunrpc/auth_gss/gss_krb5_seal.c   | 20 +++++++++++++++---
 net/sunrpc/auth_gss/gss_krb5_unseal.c | 21 ++++++++++++++++---
 net/sunrpc/auth_gss/gss_krb5_wrap.c   | 38 +++++++++++++++++++++++++++++------
 4 files changed, 68 insertions(+), 12 deletions(-)

(limited to 'net')

diff --git a/net/sunrpc/auth_gss/gss_krb5_mech.c b/net/sunrpc/auth_gss/gss_krb5_mech.c
index 2deb0ed72ff4..0cd940e897ed 100644
--- a/net/sunrpc/auth_gss/gss_krb5_mech.c
+++ b/net/sunrpc/auth_gss/gss_krb5_mech.c
@@ -139,6 +139,7 @@ gss_import_sec_context_kerberos(const void *p,
 	p = simple_get_bytes(p, end, &ctx->initiate, sizeof(ctx->initiate));
 	if (IS_ERR(p))
 		goto out_err_free_ctx;
+	ctx->enctype = ENCTYPE_DES_CBC_RAW;
 	/* The downcall format was designed before we completely understood
 	 * the uses of the context fields; so it includes some stuff we
 	 * just give some minimal sanity-checking, and some we ignore
diff --git a/net/sunrpc/auth_gss/gss_krb5_seal.c b/net/sunrpc/auth_gss/gss_krb5_seal.c
index 88fe6e75ed7e..71c2014e7ebf 100644
--- a/net/sunrpc/auth_gss/gss_krb5_seal.c
+++ b/net/sunrpc/auth_gss/gss_krb5_seal.c
@@ -70,11 +70,10 @@
 
 DEFINE_SPINLOCK(krb5_seq_lock);
 
-u32
-gss_get_mic_kerberos(struct gss_ctx *gss_ctx, struct xdr_buf *text,
+static u32
+gss_get_mic_v1(struct krb5_ctx *ctx, struct xdr_buf *text,
 		struct xdr_netobj *token)
 {
-	struct krb5_ctx		*ctx = gss_ctx->internal_ctx_id;
 	char			cksumdata[16];
 	struct xdr_netobj	md5cksum = {.len = 0, .data = cksumdata};
 	unsigned char		*ptr, *msg_start;
@@ -120,3 +119,18 @@ gss_get_mic_kerberos(struct gss_ctx *gss_ctx, struct xdr_buf *text,
 
 	return (ctx->endtime < now) ? GSS_S_CONTEXT_EXPIRED : GSS_S_COMPLETE;
 }
+
+u32
+gss_get_mic_kerberos(struct gss_ctx *gss_ctx, struct xdr_buf *text,
+		     struct xdr_netobj *token)
+{
+	struct krb5_ctx		*ctx = gss_ctx->internal_ctx_id;
+
+	switch (ctx->enctype) {
+	default:
+		BUG();
+	case ENCTYPE_DES_CBC_RAW:
+		return gss_get_mic_v1(ctx, text, token);
+	}
+}
+
diff --git a/net/sunrpc/auth_gss/gss_krb5_unseal.c b/net/sunrpc/auth_gss/gss_krb5_unseal.c
index ce6c247edad0..069d4b59807a 100644
--- a/net/sunrpc/auth_gss/gss_krb5_unseal.c
+++ b/net/sunrpc/auth_gss/gss_krb5_unseal.c
@@ -70,11 +70,10 @@
 /* read_token is a mic token, and message_buffer is the data that the mic was
  * supposedly taken over. */
 
-u32
-gss_verify_mic_kerberos(struct gss_ctx *gss_ctx,
+static u32
+gss_verify_mic_v1(struct krb5_ctx *ctx,
 		struct xdr_buf *message_buffer, struct xdr_netobj *read_token)
 {
-	struct krb5_ctx		*ctx = gss_ctx->internal_ctx_id;
 	int			signalg;
 	int			sealalg;
 	char			cksumdata[16];
@@ -135,3 +134,19 @@ gss_verify_mic_kerberos(struct gss_ctx *gss_ctx,
 
 	return GSS_S_COMPLETE;
 }
+
+u32
+gss_verify_mic_kerberos(struct gss_ctx *gss_ctx,
+			struct xdr_buf *message_buffer,
+			struct xdr_netobj *read_token)
+{
+	struct krb5_ctx *ctx = gss_ctx->internal_ctx_id;
+
+	switch (ctx->enctype) {
+	default:
+		BUG();
+	case ENCTYPE_DES_CBC_RAW:
+		return gss_verify_mic_v1(ctx, message_buffer, read_token);
+	}
+}
+
diff --git a/net/sunrpc/auth_gss/gss_krb5_wrap.c b/net/sunrpc/auth_gss/gss_krb5_wrap.c
index 5d6c3b12ea70..b45b59b17ae1 100644
--- a/net/sunrpc/auth_gss/gss_krb5_wrap.c
+++ b/net/sunrpc/auth_gss/gss_krb5_wrap.c
@@ -124,11 +124,10 @@ make_confounder(char *p, u32 conflen)
 
 /* XXX factor out common code with seal/unseal. */
 
-u32
-gss_wrap_kerberos(struct gss_ctx *ctx, int offset,
+static u32
+gss_wrap_kerberos_v1(struct krb5_ctx *kctx, int offset,
 		struct xdr_buf *buf, struct page **pages)
 {
-	struct krb5_ctx		*kctx = ctx->internal_ctx_id;
 	char			cksumdata[16];
 	struct xdr_netobj	md5cksum = {.len = 0, .data = cksumdata};
 	int			blocksize = 0, plainlen;
@@ -203,10 +202,9 @@ gss_wrap_kerberos(struct gss_ctx *ctx, int offset,
 	return (kctx->endtime < now) ? GSS_S_CONTEXT_EXPIRED : GSS_S_COMPLETE;
 }
 
-u32
-gss_unwrap_kerberos(struct gss_ctx *ctx, int offset, struct xdr_buf *buf)
+static u32
+gss_unwrap_kerberos_v1(struct krb5_ctx *kctx, int offset, struct xdr_buf *buf)
 {
-	struct krb5_ctx		*kctx = ctx->internal_ctx_id;
 	int			signalg;
 	int			sealalg;
 	char			cksumdata[16];
@@ -294,3 +292,31 @@ gss_unwrap_kerberos(struct gss_ctx *ctx, int offset, struct xdr_buf *buf)
 
 	return GSS_S_COMPLETE;
 }
+
+u32
+gss_wrap_kerberos(struct gss_ctx *gctx, int offset,
+		  struct xdr_buf *buf, struct page **pages)
+{
+	struct krb5_ctx	*kctx = gctx->internal_ctx_id;
+
+	switch (kctx->enctype) {
+	default:
+		BUG();
+	case ENCTYPE_DES_CBC_RAW:
+		return gss_wrap_kerberos_v1(kctx, offset, buf, pages);
+	}
+}
+
+u32
+gss_unwrap_kerberos(struct gss_ctx *gctx, int offset, struct xdr_buf *buf)
+{
+	struct krb5_ctx	*kctx = gctx->internal_ctx_id;
+
+	switch (kctx->enctype) {
+	default:
+		BUG();
+	case ENCTYPE_DES_CBC_RAW:
+		return gss_unwrap_kerberos_v1(kctx, offset, buf);
+	}
+}
+
-- 
cgit v1.2.2


From a8cc1cb7d7a12b0e2855832d10cfbfaffebfad6c Mon Sep 17 00:00:00 2001
From: Kevin Coffman <kwc@citi.umich.edu>
Date: Wed, 17 Mar 2010 13:02:50 -0400
Subject: gss_krb5: prepare for new context format

Prepare for new context format by splitting out the old "v1"
context processing function

Signed-off-by: Kevin Coffman <kwc@citi.umich.edu>
Signed-off-by: Steve Dickson <steved@redhat.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 net/sunrpc/auth_gss/gss_krb5_mech.c | 63 +++++++++++++++++++++++--------------
 1 file changed, 39 insertions(+), 24 deletions(-)

(limited to 'net')

diff --git a/net/sunrpc/auth_gss/gss_krb5_mech.c b/net/sunrpc/auth_gss/gss_krb5_mech.c
index 0cd940e897ed..afe09108e1b0 100644
--- a/net/sunrpc/auth_gss/gss_krb5_mech.c
+++ b/net/sunrpc/auth_gss/gss_krb5_mech.c
@@ -123,53 +123,47 @@ out_err:
 }
 
 static int
-gss_import_sec_context_kerberos(const void *p,
-				size_t len,
-				struct gss_ctx *ctx_id)
+gss_import_v1_context(const void *p, const void *end, struct krb5_ctx *ctx)
 {
-	const void *end = (const void *)((const char *)p + len);
-	struct	krb5_ctx *ctx;
 	int tmp;
 
-	if (!(ctx = kzalloc(sizeof(*ctx), GFP_NOFS))) {
-		p = ERR_PTR(-ENOMEM);
-		goto out_err;
-	}
-
 	p = simple_get_bytes(p, end, &ctx->initiate, sizeof(ctx->initiate));
 	if (IS_ERR(p))
-		goto out_err_free_ctx;
+		goto out_err;
+
+	/* Old format supports only DES!  Any other enctype uses new format */
 	ctx->enctype = ENCTYPE_DES_CBC_RAW;
+
 	/* The downcall format was designed before we completely understood
 	 * the uses of the context fields; so it includes some stuff we
 	 * just give some minimal sanity-checking, and some we ignore
 	 * completely (like the next twenty bytes): */
 	if (unlikely(p + 20 > end || p + 20 < p))
-		goto out_err_free_ctx;
+		goto out_err;
 	p += 20;
 	p = simple_get_bytes(p, end, &tmp, sizeof(tmp));
 	if (IS_ERR(p))
-		goto out_err_free_ctx;
+		goto out_err;
 	if (tmp != SGN_ALG_DES_MAC_MD5) {
 		p = ERR_PTR(-ENOSYS);
-		goto out_err_free_ctx;
+		goto out_err;
 	}
 	p = simple_get_bytes(p, end, &tmp, sizeof(tmp));
 	if (IS_ERR(p))
-		goto out_err_free_ctx;
+		goto out_err;
 	if (tmp != SEAL_ALG_DES) {
 		p = ERR_PTR(-ENOSYS);
-		goto out_err_free_ctx;
+		goto out_err;
 	}
 	p = simple_get_bytes(p, end, &ctx->endtime, sizeof(ctx->endtime));
 	if (IS_ERR(p))
-		goto out_err_free_ctx;
+		goto out_err;
 	p = simple_get_bytes(p, end, &ctx->seq_send, sizeof(ctx->seq_send));
 	if (IS_ERR(p))
-		goto out_err_free_ctx;
+		goto out_err;
 	p = simple_get_netobj(p, end, &ctx->mech_used);
 	if (IS_ERR(p))
-		goto out_err_free_ctx;
+		goto out_err;
 	p = get_key(p, end, &ctx->enc);
 	if (IS_ERR(p))
 		goto out_err_free_mech;
@@ -181,9 +175,6 @@ gss_import_sec_context_kerberos(const void *p,
 		goto out_err_free_key2;
 	}
 
-	ctx_id->internal_ctx_id = ctx;
-
-	dprintk("RPC:       Successfully imported new context.\n");
 	return 0;
 
 out_err_free_key2:
@@ -192,12 +183,36 @@ out_err_free_key1:
 	crypto_free_blkcipher(ctx->enc);
 out_err_free_mech:
 	kfree(ctx->mech_used.data);
-out_err_free_ctx:
-	kfree(ctx);
 out_err:
 	return PTR_ERR(p);
 }
 
+static int
+gss_import_sec_context_kerberos(const void *p, size_t len,
+				struct gss_ctx *ctx_id)
+{
+	const void *end = (const void *)((const char *)p + len);
+	struct  krb5_ctx *ctx;
+	int ret;
+
+	ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
+	if (ctx == NULL)
+		return -ENOMEM;
+
+	if (len == 85)
+		ret = gss_import_v1_context(p, end, ctx);
+	else
+		ret = -EINVAL;
+
+	if (ret == 0)
+		ctx_id->internal_ctx_id = ctx;
+	else
+		kfree(ctx);
+
+	dprintk("RPC:       %s: returning %d\n", __func__, ret);
+	return ret;
+}
+
 static void
 gss_delete_sec_context_kerberos(void *internal_ctx) {
 	struct krb5_ctx *kctx = internal_ctx;
-- 
cgit v1.2.2


From 81d4a4333a1dfd6070f046265d928bb4c79aff88 Mon Sep 17 00:00:00 2001
From: Kevin Coffman <kwc@citi.umich.edu>
Date: Wed, 17 Mar 2010 13:02:51 -0400
Subject: gss_krb5: introduce encryption type framework

Add enctype framework and change functions to use the generic
values from it rather than the values hard-coded for des.

Signed-off-by: Kevin Coffman <kwc@citi.umich.edu>
Signed-off-by: Steve Dickson <steved@redhat.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 net/sunrpc/auth_gss/gss_krb5_crypto.c | 18 +++----
 net/sunrpc/auth_gss/gss_krb5_mech.c   | 90 ++++++++++++++++++++++++++++-------
 net/sunrpc/auth_gss/gss_krb5_seal.c   | 49 +++++++++++--------
 net/sunrpc/auth_gss/gss_krb5_unseal.c | 15 +++---
 net/sunrpc/auth_gss/gss_krb5_wrap.c   | 79 ++++++++++++++++++++++--------
 5 files changed, 182 insertions(+), 69 deletions(-)

(limited to 'net')

diff --git a/net/sunrpc/auth_gss/gss_krb5_crypto.c b/net/sunrpc/auth_gss/gss_krb5_crypto.c
index 746b3e139aed..ccd5236953f7 100644
--- a/net/sunrpc/auth_gss/gss_krb5_crypto.c
+++ b/net/sunrpc/auth_gss/gss_krb5_crypto.c
@@ -1,7 +1,7 @@
 /*
  *  linux/net/sunrpc/gss_krb5_crypto.c
  *
- *  Copyright (c) 2000 The Regents of the University of Michigan.
+ *  Copyright (c) 2000-2008 The Regents of the University of Michigan.
  *  All rights reserved.
  *
  *  Andy Adamson   <andros@umich.edu>
@@ -58,13 +58,13 @@ krb5_encrypt(
 {
 	u32 ret = -EINVAL;
 	struct scatterlist sg[1];
-	u8 local_iv[16] = {0};
+	u8 local_iv[GSS_KRB5_MAX_BLOCKSIZE] = {0};
 	struct blkcipher_desc desc = { .tfm = tfm, .info = local_iv };
 
 	if (length % crypto_blkcipher_blocksize(tfm) != 0)
 		goto out;
 
-	if (crypto_blkcipher_ivsize(tfm) > 16) {
+	if (crypto_blkcipher_ivsize(tfm) > GSS_KRB5_MAX_BLOCKSIZE) {
 		dprintk("RPC:       gss_k5encrypt: tfm iv size too large %d\n",
 			crypto_blkcipher_ivsize(tfm));
 		goto out;
@@ -92,13 +92,13 @@ krb5_decrypt(
 {
 	u32 ret = -EINVAL;
 	struct scatterlist sg[1];
-	u8 local_iv[16] = {0};
+	u8 local_iv[GSS_KRB5_MAX_BLOCKSIZE] = {0};
 	struct blkcipher_desc desc = { .tfm = tfm, .info = local_iv };
 
 	if (length % crypto_blkcipher_blocksize(tfm) != 0)
 		goto out;
 
-	if (crypto_blkcipher_ivsize(tfm) > 16) {
+	if (crypto_blkcipher_ivsize(tfm) > GSS_KRB5_MAX_BLOCKSIZE) {
 		dprintk("RPC:       gss_k5decrypt: tfm iv size too large %d\n",
 			crypto_blkcipher_ivsize(tfm));
 		goto out;
@@ -157,7 +157,7 @@ out:
 }
 
 struct encryptor_desc {
-	u8 iv[8]; /* XXX hard-coded blocksize */
+	u8 iv[GSS_KRB5_MAX_BLOCKSIZE];
 	struct blkcipher_desc desc;
 	int pos;
 	struct xdr_buf *outbuf;
@@ -198,7 +198,7 @@ encryptor(struct scatterlist *sg, void *data)
 	desc->fraglen += sg->length;
 	desc->pos += sg->length;
 
-	fraglen = thislen & 7; /* XXX hardcoded blocksize */
+	fraglen = thislen & (crypto_blkcipher_blocksize(desc->desc.tfm) - 1);
 	thislen -= fraglen;
 
 	if (thislen == 0)
@@ -256,7 +256,7 @@ gss_encrypt_xdr_buf(struct crypto_blkcipher *tfm, struct xdr_buf *buf,
 }
 
 struct decryptor_desc {
-	u8 iv[8]; /* XXX hard-coded blocksize */
+	u8 iv[GSS_KRB5_MAX_BLOCKSIZE];
 	struct blkcipher_desc desc;
 	struct scatterlist frags[4];
 	int fragno;
@@ -278,7 +278,7 @@ decryptor(struct scatterlist *sg, void *data)
 	desc->fragno++;
 	desc->fraglen += sg->length;
 
-	fraglen = thislen & 7; /* XXX hardcoded blocksize */
+	fraglen = thislen & (crypto_blkcipher_blocksize(desc->desc.tfm) - 1);
 	thislen -= fraglen;
 
 	if (thislen == 0)
diff --git a/net/sunrpc/auth_gss/gss_krb5_mech.c b/net/sunrpc/auth_gss/gss_krb5_mech.c
index afe09108e1b0..a66eb706aeb7 100644
--- a/net/sunrpc/auth_gss/gss_krb5_mech.c
+++ b/net/sunrpc/auth_gss/gss_krb5_mech.c
@@ -1,7 +1,7 @@
 /*
  *  linux/net/sunrpc/gss_krb5_mech.c
  *
- *  Copyright (c) 2001 The Regents of the University of Michigan.
+ *  Copyright (c) 2001-2008 The Regents of the University of Michigan.
  *  All rights reserved.
  *
  *  Andy Adamson <andros@umich.edu>
@@ -48,6 +48,50 @@
 # define RPCDBG_FACILITY	RPCDBG_AUTH
 #endif
 
+static const struct gss_krb5_enctype supported_gss_krb5_enctypes[] = {
+	/*
+	 * DES (All DES enctypes are mapped to the same gss functionality)
+	 */
+	{
+	  .etype = ENCTYPE_DES_CBC_RAW,
+	  .ctype = CKSUMTYPE_RSA_MD5,
+	  .name = "des-cbc-crc",
+	  .encrypt_name = "cbc(des)",
+	  .cksum_name = "md5",
+	  .encrypt = krb5_encrypt,
+	  .decrypt = krb5_decrypt,
+	  .signalg = SGN_ALG_DES_MAC_MD5,
+	  .sealalg = SEAL_ALG_DES,
+	  .keybytes = 7,
+	  .keylength = 8,
+	  .blocksize = 8,
+	  .cksumlength = 8,
+	},
+};
+
+static const int num_supported_enctypes =
+	ARRAY_SIZE(supported_gss_krb5_enctypes);
+
+static int
+supported_gss_krb5_enctype(int etype)
+{
+	int i;
+	for (i = 0; i < num_supported_enctypes; i++)
+		if (supported_gss_krb5_enctypes[i].etype == etype)
+			return 1;
+	return 0;
+}
+
+static const struct gss_krb5_enctype *
+get_gss_krb5_enctype(int etype)
+{
+	int i;
+	for (i = 0; i < num_supported_enctypes; i++)
+		if (supported_gss_krb5_enctypes[i].etype == etype)
+			return &supported_gss_krb5_enctypes[i];
+	return NULL;
+}
+
 static const void *
 simple_get_bytes(const void *p, const void *end, void *res, int len)
 {
@@ -78,35 +122,45 @@ simple_get_netobj(const void *p, const void *end, struct xdr_netobj *res)
 }
 
 static inline const void *
-get_key(const void *p, const void *end, struct crypto_blkcipher **res)
+get_key(const void *p, const void *end,
+	struct krb5_ctx *ctx, struct crypto_blkcipher **res)
 {
 	struct xdr_netobj	key;
 	int			alg;
-	char			*alg_name;
 
 	p = simple_get_bytes(p, end, &alg, sizeof(alg));
 	if (IS_ERR(p))
 		goto out_err;
+
+	switch (alg) {
+	case ENCTYPE_DES_CBC_CRC:
+	case ENCTYPE_DES_CBC_MD4:
+	case ENCTYPE_DES_CBC_MD5:
+		/* Map all these key types to ENCTYPE_DES_CBC_RAW */
+		alg = ENCTYPE_DES_CBC_RAW;
+		break;
+	}
+
+	if (!supported_gss_krb5_enctype(alg)) {
+		printk(KERN_WARNING "gss_kerberos_mech: unsupported "
+			"encryption key algorithm %d\n", alg);
+		goto out_err;
+	}
 	p = simple_get_netobj(p, end, &key);
 	if (IS_ERR(p))
 		goto out_err;
 
-	switch (alg) {
-		case ENCTYPE_DES_CBC_RAW:
-			alg_name = "cbc(des)";
-			break;
-		default:
-			printk("gss_kerberos_mech: unsupported algorithm %d\n", alg);
-			goto out_err_free_key;
-	}
-	*res = crypto_alloc_blkcipher(alg_name, 0, CRYPTO_ALG_ASYNC);
+	*res = crypto_alloc_blkcipher(ctx->gk5e->encrypt_name, 0,
+							CRYPTO_ALG_ASYNC);
 	if (IS_ERR(*res)) {
-		printk("gss_kerberos_mech: unable to initialize crypto algorithm %s\n", alg_name);
+		printk(KERN_WARNING "gss_kerberos_mech: unable to initialize "
+			"crypto algorithm %s\n", ctx->gk5e->encrypt_name);
 		*res = NULL;
 		goto out_err_free_key;
 	}
 	if (crypto_blkcipher_setkey(*res, key.data, key.len)) {
-		printk("gss_kerberos_mech: error setting key for crypto algorithm %s\n", alg_name);
+		printk(KERN_WARNING "gss_kerberos_mech: error setting key for "
+			"crypto algorithm %s\n", ctx->gk5e->encrypt_name);
 		goto out_err_free_tfm;
 	}
 
@@ -134,6 +188,10 @@ gss_import_v1_context(const void *p, const void *end, struct krb5_ctx *ctx)
 	/* Old format supports only DES!  Any other enctype uses new format */
 	ctx->enctype = ENCTYPE_DES_CBC_RAW;
 
+	ctx->gk5e = get_gss_krb5_enctype(ctx->enctype);
+	if (ctx->gk5e == NULL)
+		goto out_err;
+
 	/* The downcall format was designed before we completely understood
 	 * the uses of the context fields; so it includes some stuff we
 	 * just give some minimal sanity-checking, and some we ignore
@@ -164,10 +222,10 @@ gss_import_v1_context(const void *p, const void *end, struct krb5_ctx *ctx)
 	p = simple_get_netobj(p, end, &ctx->mech_used);
 	if (IS_ERR(p))
 		goto out_err;
-	p = get_key(p, end, &ctx->enc);
+	p = get_key(p, end, ctx, &ctx->enc);
 	if (IS_ERR(p))
 		goto out_err_free_mech;
-	p = get_key(p, end, &ctx->seq);
+	p = get_key(p, end, ctx, &ctx->seq);
 	if (IS_ERR(p))
 		goto out_err_free_key1;
 	if (p != end) {
diff --git a/net/sunrpc/auth_gss/gss_krb5_seal.c b/net/sunrpc/auth_gss/gss_krb5_seal.c
index 71c2014e7ebf..46c6f44e5c3f 100644
--- a/net/sunrpc/auth_gss/gss_krb5_seal.c
+++ b/net/sunrpc/auth_gss/gss_krb5_seal.c
@@ -3,7 +3,7 @@
  *
  *  Adapted from MIT Kerberos 5-1.2.1 lib/gssapi/krb5/k5seal.c
  *
- *  Copyright (c) 2000 The Regents of the University of Michigan.
+ *  Copyright (c) 2000-2008 The Regents of the University of Michigan.
  *  All rights reserved.
  *
  *  Andy Adamson	<andros@umich.edu>
@@ -70,36 +70,47 @@
 
 DEFINE_SPINLOCK(krb5_seq_lock);
 
+static char *
+setup_token(struct krb5_ctx *ctx, struct xdr_netobj *token)
+{
+	__be16 *ptr, *krb5_hdr;
+	int body_size = GSS_KRB5_TOK_HDR_LEN + ctx->gk5e->cksumlength;
+
+	token->len = g_token_size(&ctx->mech_used, body_size);
+
+	ptr = (__be16 *)token->data;
+	g_make_token_header(&ctx->mech_used, body_size, (unsigned char **)&ptr);
+
+	/* ptr now at start of header described in rfc 1964, section 1.2.1: */
+	krb5_hdr = ptr;
+	*ptr++ = KG_TOK_MIC_MSG;
+	*ptr++ = cpu_to_le16(ctx->gk5e->signalg);
+	*ptr++ = SEAL_ALG_NONE;
+	*ptr++ = 0xffff;
+
+	return (char *)krb5_hdr;
+}
+
 static u32
 gss_get_mic_v1(struct krb5_ctx *ctx, struct xdr_buf *text,
 		struct xdr_netobj *token)
 {
-	char			cksumdata[16];
-	struct xdr_netobj	md5cksum = {.len = 0, .data = cksumdata};
-	unsigned char		*ptr, *msg_start;
+	char			cksumdata[GSS_KRB5_MAX_CKSUM_LEN];
+	struct xdr_netobj	md5cksum = {.len = sizeof(cksumdata),
+					    .data = cksumdata};
+	void			*ptr;
 	s32			now;
 	u32			seq_send;
 
-	dprintk("RPC:       gss_krb5_seal\n");
+	dprintk("RPC:       %s\n", __func__);
 	BUG_ON(ctx == NULL);
 
 	now = get_seconds();
 
-	token->len = g_token_size(&ctx->mech_used, GSS_KRB5_TOK_HDR_LEN + 8);
-
-	ptr = token->data;
-	g_make_token_header(&ctx->mech_used, GSS_KRB5_TOK_HDR_LEN + 8, &ptr);
-
-	/* ptr now at header described in rfc 1964, section 1.2.1: */
-	ptr[0] = (unsigned char) ((KG_TOK_MIC_MSG >> 8) & 0xff);
-	ptr[1] = (unsigned char) (KG_TOK_MIC_MSG & 0xff);
-
-	msg_start = ptr + GSS_KRB5_TOK_HDR_LEN + 8;
-
-	*(__be16 *)(ptr + 2) = htons(SGN_ALG_DES_MAC_MD5);
-	memset(ptr + 4, 0xff, 4);
+	ptr = setup_token(ctx, token);
 
-	if (make_checksum("md5", ptr, 8, text, 0, &md5cksum))
+	if (make_checksum((char *)ctx->gk5e->cksum_name, ptr, 8,
+						text, 0, &md5cksum))
 		return GSS_S_FAILURE;
 
 	if (krb5_encrypt(ctx->seq, NULL, md5cksum.data,
diff --git a/net/sunrpc/auth_gss/gss_krb5_unseal.c b/net/sunrpc/auth_gss/gss_krb5_unseal.c
index 069d4b59807a..10ee641a39d0 100644
--- a/net/sunrpc/auth_gss/gss_krb5_unseal.c
+++ b/net/sunrpc/auth_gss/gss_krb5_unseal.c
@@ -3,7 +3,7 @@
  *
  *  Adapted from MIT Kerberos 5-1.2.1 lib/gssapi/krb5/k5unseal.c
  *
- *  Copyright (c) 2000 The Regents of the University of Michigan.
+ *  Copyright (c) 2000-2008 The Regents of the University of Michigan.
  *  All rights reserved.
  *
  *  Andy Adamson   <andros@umich.edu>
@@ -76,8 +76,9 @@ gss_verify_mic_v1(struct krb5_ctx *ctx,
 {
 	int			signalg;
 	int			sealalg;
-	char			cksumdata[16];
-	struct xdr_netobj	md5cksum = {.len = 0, .data = cksumdata};
+	char			cksumdata[GSS_KRB5_MAX_CKSUM_LEN];
+	struct xdr_netobj	md5cksum = {.len = sizeof(cksumdata),
+					    .data = cksumdata};
 	s32			now;
 	int			direction;
 	u32			seqnum;
@@ -97,7 +98,7 @@ gss_verify_mic_v1(struct krb5_ctx *ctx,
 	/* XXX sanity-check bodysize?? */
 
 	signalg = ptr[2] + (ptr[3] << 8);
-	if (signalg != SGN_ALG_DES_MAC_MD5)
+	if (signalg != ctx->gk5e->signalg)
 		return GSS_S_DEFECTIVE_TOKEN;
 
 	sealalg = ptr[4] + (ptr[5] << 8);
@@ -107,13 +108,15 @@ gss_verify_mic_v1(struct krb5_ctx *ctx,
 	if ((ptr[6] != 0xff) || (ptr[7] != 0xff))
 		return GSS_S_DEFECTIVE_TOKEN;
 
-	if (make_checksum("md5", ptr, 8, message_buffer, 0, &md5cksum))
+	if (make_checksum((char *)ctx->gk5e->cksum_name, ptr, 8,
+					message_buffer, 0, &md5cksum))
 		return GSS_S_FAILURE;
 
 	if (krb5_encrypt(ctx->seq, NULL, md5cksum.data, md5cksum.data, 16))
 		return GSS_S_FAILURE;
 
-	if (memcmp(md5cksum.data + 8, ptr + GSS_KRB5_TOK_HDR_LEN, 8))
+	if (memcmp(md5cksum.data + 8, ptr + GSS_KRB5_TOK_HDR_LEN,
+					ctx->gk5e->cksumlength))
 		return GSS_S_BAD_SIG;
 
 	/* it got through unscathed.  Make sure the context is unexpired */
diff --git a/net/sunrpc/auth_gss/gss_krb5_wrap.c b/net/sunrpc/auth_gss/gss_krb5_wrap.c
index b45b59b17ae1..7188891bcc33 100644
--- a/net/sunrpc/auth_gss/gss_krb5_wrap.c
+++ b/net/sunrpc/auth_gss/gss_krb5_wrap.c
@@ -1,3 +1,33 @@
+/*
+ * COPYRIGHT (c) 2008
+ * The Regents of the University of Michigan
+ * ALL RIGHTS RESERVED
+ *
+ * Permission is granted to use, copy, create derivative works
+ * and redistribute this software and such derivative works
+ * for any purpose, so long as the name of The University of
+ * Michigan is not used in any advertising or publicity
+ * pertaining to the use of distribution of this software
+ * without specific, written prior authorization.  If the
+ * above copyright notice or any other identification of the
+ * University of Michigan is included in any copy of any
+ * portion of this software, then the disclaimer below must
+ * also be included.
+ *
+ * THIS SOFTWARE IS PROVIDED AS IS, WITHOUT REPRESENTATION
+ * FROM THE UNIVERSITY OF MICHIGAN AS TO ITS FITNESS FOR ANY
+ * PURPOSE, AND WITHOUT WARRANTY BY THE UNIVERSITY OF
+ * MICHIGAN OF ANY KIND, EITHER EXPRESS OR IMPLIED, INCLUDING
+ * WITHOUT LIMITATION THE IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE
+ * REGENTS OF THE UNIVERSITY OF MICHIGAN SHALL NOT BE LIABLE
+ * FOR ANY DAMAGES, INCLUDING SPECIAL, INDIRECT, INCIDENTAL, OR
+ * CONSEQUENTIAL DAMAGES, WITH RESPECT TO ANY CLAIM ARISING
+ * OUT OF OR IN CONNECTION WITH THE USE OF THE SOFTWARE, EVEN
+ * IF IT HAS BEEN OR IS HEREAFTER ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGES.
+ */
+
 #include <linux/types.h>
 #include <linux/jiffies.h>
 #include <linux/sunrpc/gss_krb5.h>
@@ -128,8 +158,9 @@ static u32
 gss_wrap_kerberos_v1(struct krb5_ctx *kctx, int offset,
 		struct xdr_buf *buf, struct page **pages)
 {
-	char			cksumdata[16];
-	struct xdr_netobj	md5cksum = {.len = 0, .data = cksumdata};
+	char			cksumdata[GSS_KRB5_MAX_CKSUM_LEN];
+	struct xdr_netobj	md5cksum = {.len = sizeof(cksumdata),
+					    .data = cksumdata};
 	int			blocksize = 0, plainlen;
 	unsigned char		*ptr, *msg_start;
 	s32			now;
@@ -137,7 +168,7 @@ gss_wrap_kerberos_v1(struct krb5_ctx *kctx, int offset,
 	struct page		**tmp_pages;
 	u32			seq_send;
 
-	dprintk("RPC:       gss_wrap_kerberos\n");
+	dprintk("RPC:       %s\n", __func__);
 
 	now = get_seconds();
 
@@ -146,8 +177,9 @@ gss_wrap_kerberos_v1(struct krb5_ctx *kctx, int offset,
 	BUG_ON((buf->len - offset) % blocksize);
 	plainlen = blocksize + buf->len - offset;
 
-	headlen = g_token_size(&kctx->mech_used, 24 + plainlen) -
-						(buf->len - offset);
+	headlen = g_token_size(&kctx->mech_used,
+		GSS_KRB5_TOK_HDR_LEN + kctx->gk5e->cksumlength + plainlen) -
+		(buf->len - offset);
 
 	ptr = buf->head[0].iov_base + offset;
 	/* shift data to make room for header. */
@@ -157,25 +189,26 @@ gss_wrap_kerberos_v1(struct krb5_ctx *kctx, int offset,
 	BUG_ON((buf->len - offset - headlen) % blocksize);
 
 	g_make_token_header(&kctx->mech_used,
-				GSS_KRB5_TOK_HDR_LEN + 8 + plainlen, &ptr);
+				GSS_KRB5_TOK_HDR_LEN +
+				kctx->gk5e->cksumlength + plainlen, &ptr);
 
 
 	/* ptr now at header described in rfc 1964, section 1.2.1: */
 	ptr[0] = (unsigned char) ((KG_TOK_WRAP_MSG >> 8) & 0xff);
 	ptr[1] = (unsigned char) (KG_TOK_WRAP_MSG & 0xff);
 
-	msg_start = ptr + 24;
+	msg_start = ptr + GSS_KRB5_TOK_HDR_LEN + kctx->gk5e->cksumlength;
 
-	*(__be16 *)(ptr + 2) = htons(SGN_ALG_DES_MAC_MD5);
+	*(__be16 *)(ptr + 2) = cpu_to_le16(kctx->gk5e->signalg);
 	memset(ptr + 4, 0xff, 4);
-	*(__be16 *)(ptr + 4) = htons(SEAL_ALG_DES);
+	*(__be16 *)(ptr + 4) = cpu_to_le16(kctx->gk5e->sealalg);
 
 	make_confounder(msg_start, blocksize);
 
 	/* XXXJBF: UGH!: */
 	tmp_pages = buf->pages;
 	buf->pages = pages;
-	if (make_checksum("md5", ptr, 8, buf,
+	if (make_checksum((char *)kctx->gk5e->cksum_name, ptr, 8, buf,
 				offset + headlen - blocksize, &md5cksum))
 		return GSS_S_FAILURE;
 	buf->pages = tmp_pages;
@@ -207,8 +240,9 @@ gss_unwrap_kerberos_v1(struct krb5_ctx *kctx, int offset, struct xdr_buf *buf)
 {
 	int			signalg;
 	int			sealalg;
-	char			cksumdata[16];
-	struct xdr_netobj	md5cksum = {.len = 0, .data = cksumdata};
+	char			cksumdata[GSS_KRB5_MAX_CKSUM_LEN];
+	struct xdr_netobj	md5cksum = {.len = sizeof(cksumdata),
+					    .data = cksumdata};
 	s32			now;
 	int			direction;
 	s32			seqnum;
@@ -217,6 +251,7 @@ gss_unwrap_kerberos_v1(struct krb5_ctx *kctx, int offset, struct xdr_buf *buf)
 	void			*data_start, *orig_start;
 	int			data_len;
 	int			blocksize;
+	int			crypt_offset;
 
 	dprintk("RPC:       gss_unwrap_kerberos\n");
 
@@ -234,22 +269,27 @@ gss_unwrap_kerberos_v1(struct krb5_ctx *kctx, int offset, struct xdr_buf *buf)
 	/* get the sign and seal algorithms */
 
 	signalg = ptr[2] + (ptr[3] << 8);
-	if (signalg != SGN_ALG_DES_MAC_MD5)
+	if (signalg != kctx->gk5e->signalg)
 		return GSS_S_DEFECTIVE_TOKEN;
 
 	sealalg = ptr[4] + (ptr[5] << 8);
-	if (sealalg != SEAL_ALG_DES)
+	if (sealalg != kctx->gk5e->sealalg)
 		return GSS_S_DEFECTIVE_TOKEN;
 
 	if ((ptr[6] != 0xff) || (ptr[7] != 0xff))
 		return GSS_S_DEFECTIVE_TOKEN;
 
-	if (gss_decrypt_xdr_buf(kctx->enc, buf,
-			ptr + GSS_KRB5_TOK_HDR_LEN + 8 - (unsigned char *)buf->head[0].iov_base))
+	/*
+	 * Data starts after token header and checksum.  ptr points
+	 * to the beginning of the token header
+	 */
+	crypt_offset = ptr + (GSS_KRB5_TOK_HDR_LEN + kctx->gk5e->cksumlength) -
+					(unsigned char *)buf->head[0].iov_base;
+	if (gss_decrypt_xdr_buf(kctx->enc, buf, crypt_offset))
 		return GSS_S_DEFECTIVE_TOKEN;
 
-	if (make_checksum("md5", ptr, 8, buf,
-		 ptr + GSS_KRB5_TOK_HDR_LEN + 8 - (unsigned char *)buf->head[0].iov_base, &md5cksum))
+	if (make_checksum((char *)kctx->gk5e->cksum_name, ptr, 8, buf,
+						crypt_offset, &md5cksum))
 		return GSS_S_FAILURE;
 
 	if (krb5_encrypt(kctx->seq, NULL, md5cksum.data,
@@ -280,7 +320,8 @@ gss_unwrap_kerberos_v1(struct krb5_ctx *kctx, int offset, struct xdr_buf *buf)
 	 * better to copy and encrypt at the same time. */
 
 	blocksize = crypto_blkcipher_blocksize(kctx->enc);
-	data_start = ptr + GSS_KRB5_TOK_HDR_LEN + 8 + blocksize;
+	data_start = ptr + (GSS_KRB5_TOK_HDR_LEN + kctx->gk5e->cksumlength) +
+					blocksize;
 	orig_start = buf->head[0].iov_base + offset;
 	data_len = (buf->head[0].iov_base + buf->head[0].iov_len) - data_start;
 	memmove(orig_start, data_start, data_len);
-- 
cgit v1.2.2


From e1f6c07b1160ef28e8754d12e6c03288dd9d5ca8 Mon Sep 17 00:00:00 2001
From: Kevin Coffman <kwc@citi.umich.edu>
Date: Wed, 17 Mar 2010 13:02:52 -0400
Subject: gss_krb5: add ability to have a keyed checksum (hmac)

Encryption types besides DES may use a keyed checksum (hmac).
Modify the make_checksum() function to allow for a key
and take care of enctype-specific processing such as truncating
the resulting hash.

Signed-off-by: Kevin Coffman <kwc@citi.umich.edu>
Signed-off-by: Steve Dickson <steved@redhat.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 net/sunrpc/auth_gss/gss_krb5_crypto.c | 54 +++++++++++++++++++++++++++++------
 net/sunrpc/auth_gss/gss_krb5_mech.c   |  1 +
 net/sunrpc/auth_gss/gss_krb5_seal.c   | 13 +++++----
 net/sunrpc/auth_gss/gss_krb5_unseal.c | 13 +++++----
 net/sunrpc/auth_gss/gss_krb5_wrap.c   | 30 +++++++++++--------
 5 files changed, 80 insertions(+), 31 deletions(-)

(limited to 'net')

diff --git a/net/sunrpc/auth_gss/gss_krb5_crypto.c b/net/sunrpc/auth_gss/gss_krb5_crypto.c
index ccd5236953f7..cae04d7a45a5 100644
--- a/net/sunrpc/auth_gss/gss_krb5_crypto.c
+++ b/net/sunrpc/auth_gss/gss_krb5_crypto.c
@@ -123,21 +123,42 @@ checksummer(struct scatterlist *sg, void *data)
 	return crypto_hash_update(desc, sg, sg->length);
 }
 
-/* checksum the plaintext data and hdrlen bytes of the token header */
-s32
-make_checksum(char *cksumname, char *header, int hdrlen, struct xdr_buf *body,
-		   int body_offset, struct xdr_netobj *cksum)
+/*
+ * checksum the plaintext data and hdrlen bytes of the token header
+ * The checksum is performed over the first 8 bytes of the
+ * gss token header and then over the data body
+ */
+u32
+make_checksum(struct krb5_ctx *kctx, char *header, int hdrlen,
+	      struct xdr_buf *body, int body_offset, u8 *cksumkey,
+	      struct xdr_netobj *cksumout)
 {
-	struct hash_desc                desc; /* XXX add to ctx? */
+	struct hash_desc                desc;
 	struct scatterlist              sg[1];
 	int err;
+	u8 checksumdata[GSS_KRB5_MAX_CKSUM_LEN];
+	unsigned int checksumlen;
+
+	if (cksumout->len < kctx->gk5e->cksumlength) {
+		dprintk("%s: checksum buffer length, %u, too small for %s\n",
+			__func__, cksumout->len, kctx->gk5e->name);
+		return GSS_S_FAILURE;
+	}
 
-	desc.tfm = crypto_alloc_hash(cksumname, 0, CRYPTO_ALG_ASYNC);
+	desc.tfm = crypto_alloc_hash(kctx->gk5e->cksum_name, 0, CRYPTO_ALG_ASYNC);
 	if (IS_ERR(desc.tfm))
 		return GSS_S_FAILURE;
-	cksum->len = crypto_hash_digestsize(desc.tfm);
 	desc.flags = CRYPTO_TFM_REQ_MAY_SLEEP;
 
+	checksumlen = crypto_hash_digestsize(desc.tfm);
+
+	if (cksumkey != NULL) {
+		err = crypto_hash_setkey(desc.tfm, cksumkey,
+					 kctx->gk5e->keylength);
+		if (err)
+			goto out;
+	}
+
 	err = crypto_hash_init(&desc);
 	if (err)
 		goto out;
@@ -149,8 +170,25 @@ make_checksum(char *cksumname, char *header, int hdrlen, struct xdr_buf *body,
 			      checksummer, &desc);
 	if (err)
 		goto out;
-	err = crypto_hash_final(&desc, cksum->data);
+	err = crypto_hash_final(&desc, checksumdata);
+	if (err)
+		goto out;
 
+	switch (kctx->gk5e->ctype) {
+	case CKSUMTYPE_RSA_MD5:
+		err = kctx->gk5e->encrypt(kctx->seq, NULL, checksumdata,
+					  checksumdata, checksumlen);
+		if (err)
+			goto out;
+		memcpy(cksumout->data,
+		       checksumdata + checksumlen - kctx->gk5e->cksumlength,
+		       kctx->gk5e->cksumlength);
+		break;
+	default:
+		BUG();
+		break;
+	}
+	cksumout->len = kctx->gk5e->cksumlength;
 out:
 	crypto_free_hash(desc.tfm);
 	return err ? GSS_S_FAILURE : 0;
diff --git a/net/sunrpc/auth_gss/gss_krb5_mech.c b/net/sunrpc/auth_gss/gss_krb5_mech.c
index a66eb706aeb7..6f93f4752be4 100644
--- a/net/sunrpc/auth_gss/gss_krb5_mech.c
+++ b/net/sunrpc/auth_gss/gss_krb5_mech.c
@@ -66,6 +66,7 @@ static const struct gss_krb5_enctype supported_gss_krb5_enctypes[] = {
 	  .keylength = 8,
 	  .blocksize = 8,
 	  .cksumlength = 8,
+	  .keyed_cksum = 0,
 	},
 };
 
diff --git a/net/sunrpc/auth_gss/gss_krb5_seal.c b/net/sunrpc/auth_gss/gss_krb5_seal.c
index 46c6f44e5c3f..cd512719092b 100644
--- a/net/sunrpc/auth_gss/gss_krb5_seal.c
+++ b/net/sunrpc/auth_gss/gss_krb5_seal.c
@@ -101,6 +101,7 @@ gss_get_mic_v1(struct krb5_ctx *ctx, struct xdr_buf *text,
 	void			*ptr;
 	s32			now;
 	u32			seq_send;
+	u8			*cksumkey;
 
 	dprintk("RPC:       %s\n", __func__);
 	BUG_ON(ctx == NULL);
@@ -109,15 +110,15 @@ gss_get_mic_v1(struct krb5_ctx *ctx, struct xdr_buf *text,
 
 	ptr = setup_token(ctx, token);
 
-	if (make_checksum((char *)ctx->gk5e->cksum_name, ptr, 8,
-						text, 0, &md5cksum))
-		return GSS_S_FAILURE;
+	if (ctx->gk5e->keyed_cksum)
+		cksumkey = ctx->cksum;
+	else
+		cksumkey = NULL;
 
-	if (krb5_encrypt(ctx->seq, NULL, md5cksum.data,
-			  md5cksum.data, md5cksum.len))
+	if (make_checksum(ctx, ptr, 8, text, 0, cksumkey, &md5cksum))
 		return GSS_S_FAILURE;
 
-	memcpy(ptr + GSS_KRB5_TOK_HDR_LEN, md5cksum.data + md5cksum.len - 8, 8);
+	memcpy(ptr + GSS_KRB5_TOK_HDR_LEN, md5cksum.data, md5cksum.len);
 
 	spin_lock(&krb5_seq_lock);
 	seq_send = ctx->seq_send++;
diff --git a/net/sunrpc/auth_gss/gss_krb5_unseal.c b/net/sunrpc/auth_gss/gss_krb5_unseal.c
index 10ee641a39d0..7515bffddf15 100644
--- a/net/sunrpc/auth_gss/gss_krb5_unseal.c
+++ b/net/sunrpc/auth_gss/gss_krb5_unseal.c
@@ -84,6 +84,7 @@ gss_verify_mic_v1(struct krb5_ctx *ctx,
 	u32			seqnum;
 	unsigned char		*ptr = (unsigned char *)read_token->data;
 	int			bodysize;
+	u8			*cksumkey;
 
 	dprintk("RPC:       krb5_read_token\n");
 
@@ -108,14 +109,16 @@ gss_verify_mic_v1(struct krb5_ctx *ctx,
 	if ((ptr[6] != 0xff) || (ptr[7] != 0xff))
 		return GSS_S_DEFECTIVE_TOKEN;
 
-	if (make_checksum((char *)ctx->gk5e->cksum_name, ptr, 8,
-					message_buffer, 0, &md5cksum))
-		return GSS_S_FAILURE;
+	if (ctx->gk5e->keyed_cksum)
+		cksumkey = ctx->cksum;
+	else
+		cksumkey = NULL;
 
-	if (krb5_encrypt(ctx->seq, NULL, md5cksum.data, md5cksum.data, 16))
+	if (make_checksum(ctx, ptr, 8, message_buffer, 0,
+			  cksumkey, &md5cksum))
 		return GSS_S_FAILURE;
 
-	if (memcmp(md5cksum.data + 8, ptr + GSS_KRB5_TOK_HDR_LEN,
+	if (memcmp(md5cksum.data, ptr + GSS_KRB5_TOK_HDR_LEN,
 					ctx->gk5e->cksumlength))
 		return GSS_S_BAD_SIG;
 
diff --git a/net/sunrpc/auth_gss/gss_krb5_wrap.c b/net/sunrpc/auth_gss/gss_krb5_wrap.c
index 7188891bcc33..2eb3046a84ea 100644
--- a/net/sunrpc/auth_gss/gss_krb5_wrap.c
+++ b/net/sunrpc/auth_gss/gss_krb5_wrap.c
@@ -167,6 +167,7 @@ gss_wrap_kerberos_v1(struct krb5_ctx *kctx, int offset,
 	int			headlen;
 	struct page		**tmp_pages;
 	u32			seq_send;
+	u8			*cksumkey;
 
 	dprintk("RPC:       %s\n", __func__);
 
@@ -205,18 +206,20 @@ gss_wrap_kerberos_v1(struct krb5_ctx *kctx, int offset,
 
 	make_confounder(msg_start, blocksize);
 
+	if (kctx->gk5e->keyed_cksum)
+		cksumkey = kctx->cksum;
+	else
+		cksumkey = NULL;
+
 	/* XXXJBF: UGH!: */
 	tmp_pages = buf->pages;
 	buf->pages = pages;
-	if (make_checksum((char *)kctx->gk5e->cksum_name, ptr, 8, buf,
-				offset + headlen - blocksize, &md5cksum))
+	if (make_checksum(kctx, ptr, 8, buf, offset + headlen - blocksize,
+					cksumkey, &md5cksum))
 		return GSS_S_FAILURE;
 	buf->pages = tmp_pages;
 
-	if (krb5_encrypt(kctx->seq, NULL, md5cksum.data,
-			  md5cksum.data, md5cksum.len))
-		return GSS_S_FAILURE;
-	memcpy(ptr + GSS_KRB5_TOK_HDR_LEN, md5cksum.data + md5cksum.len - 8, 8);
+	memcpy(ptr + GSS_KRB5_TOK_HDR_LEN, md5cksum.data, md5cksum.len);
 
 	spin_lock(&krb5_seq_lock);
 	seq_send = kctx->seq_send++;
@@ -252,6 +255,7 @@ gss_unwrap_kerberos_v1(struct krb5_ctx *kctx, int offset, struct xdr_buf *buf)
 	int			data_len;
 	int			blocksize;
 	int			crypt_offset;
+	u8			*cksumkey;
 
 	dprintk("RPC:       gss_unwrap_kerberos\n");
 
@@ -288,15 +292,17 @@ gss_unwrap_kerberos_v1(struct krb5_ctx *kctx, int offset, struct xdr_buf *buf)
 	if (gss_decrypt_xdr_buf(kctx->enc, buf, crypt_offset))
 		return GSS_S_DEFECTIVE_TOKEN;
 
-	if (make_checksum((char *)kctx->gk5e->cksum_name, ptr, 8, buf,
-						crypt_offset, &md5cksum))
-		return GSS_S_FAILURE;
+	if (kctx->gk5e->keyed_cksum)
+		cksumkey = kctx->cksum;
+	else
+		cksumkey = NULL;
 
-	if (krb5_encrypt(kctx->seq, NULL, md5cksum.data,
-			   md5cksum.data, md5cksum.len))
+	if (make_checksum(kctx, ptr, 8, buf, crypt_offset,
+						cksumkey, &md5cksum))
 		return GSS_S_FAILURE;
 
-	if (memcmp(md5cksum.data + 8, ptr + GSS_KRB5_TOK_HDR_LEN, 8))
+	if (memcmp(md5cksum.data, ptr + GSS_KRB5_TOK_HDR_LEN,
+						kctx->gk5e->cksumlength))
 		return GSS_S_BAD_SIG;
 
 	/* it got through unscathed.  Make sure the context is unexpired */
-- 
cgit v1.2.2


From 4891f2d008e4343eedea39ba1fe74864f1d32be0 Mon Sep 17 00:00:00 2001
From: Kevin Coffman <kwc@citi.umich.edu>
Date: Wed, 17 Mar 2010 13:02:53 -0400
Subject: gss_krb5: import functionality to derive keys into the kernel

Import the code to derive Kerberos keys from a base key into the
kernel.  This will allow us to change the format of the context
information sent down from gssd to include only a single key.

Signed-off-by: Kevin Coffman <kwc@citi.umich.edu>
Signed-off-by: Steve Dickson <steved@redhat.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 net/sunrpc/auth_gss/Makefile        |   2 +-
 net/sunrpc/auth_gss/gss_krb5_keys.c | 252 ++++++++++++++++++++++++++++++++++++
 net/sunrpc/auth_gss/gss_krb5_mech.c |   1 +
 3 files changed, 254 insertions(+), 1 deletion(-)
 create mode 100644 net/sunrpc/auth_gss/gss_krb5_keys.c

(limited to 'net')

diff --git a/net/sunrpc/auth_gss/Makefile b/net/sunrpc/auth_gss/Makefile
index 4de8bcf26fa7..74a231735f67 100644
--- a/net/sunrpc/auth_gss/Makefile
+++ b/net/sunrpc/auth_gss/Makefile
@@ -10,7 +10,7 @@ auth_rpcgss-objs := auth_gss.o gss_generic_token.o \
 obj-$(CONFIG_RPCSEC_GSS_KRB5) += rpcsec_gss_krb5.o
 
 rpcsec_gss_krb5-objs := gss_krb5_mech.o gss_krb5_seal.o gss_krb5_unseal.o \
-	gss_krb5_seqnum.o gss_krb5_wrap.o gss_krb5_crypto.o
+	gss_krb5_seqnum.o gss_krb5_wrap.o gss_krb5_crypto.o gss_krb5_keys.o
 
 obj-$(CONFIG_RPCSEC_GSS_SPKM3) += rpcsec_gss_spkm3.o
 
diff --git a/net/sunrpc/auth_gss/gss_krb5_keys.c b/net/sunrpc/auth_gss/gss_krb5_keys.c
new file mode 100644
index 000000000000..832ce901bf68
--- /dev/null
+++ b/net/sunrpc/auth_gss/gss_krb5_keys.c
@@ -0,0 +1,252 @@
+/*
+ * COPYRIGHT (c) 2008
+ * The Regents of the University of Michigan
+ * ALL RIGHTS RESERVED
+ *
+ * Permission is granted to use, copy, create derivative works
+ * and redistribute this software and such derivative works
+ * for any purpose, so long as the name of The University of
+ * Michigan is not used in any advertising or publicity
+ * pertaining to the use of distribution of this software
+ * without specific, written prior authorization.  If the
+ * above copyright notice or any other identification of the
+ * University of Michigan is included in any copy of any
+ * portion of this software, then the disclaimer below must
+ * also be included.
+ *
+ * THIS SOFTWARE IS PROVIDED AS IS, WITHOUT REPRESENTATION
+ * FROM THE UNIVERSITY OF MICHIGAN AS TO ITS FITNESS FOR ANY
+ * PURPOSE, AND WITHOUT WARRANTY BY THE UNIVERSITY OF
+ * MICHIGAN OF ANY KIND, EITHER EXPRESS OR IMPLIED, INCLUDING
+ * WITHOUT LIMITATION THE IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE
+ * REGENTS OF THE UNIVERSITY OF MICHIGAN SHALL NOT BE LIABLE
+ * FOR ANY DAMAGES, INCLUDING SPECIAL, INDIRECT, INCIDENTAL, OR
+ * CONSEQUENTIAL DAMAGES, WITH RESPECT TO ANY CLAIM ARISING
+ * OUT OF OR IN CONNECTION WITH THE USE OF THE SOFTWARE, EVEN
+ * IF IT HAS BEEN OR IS HEREAFTER ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGES.
+ */
+
+/*
+ * Copyright (C) 1998 by the FundsXpress, INC.
+ *
+ * All rights reserved.
+ *
+ * Export of this software from the United States of America may require
+ * a specific license from the United States Government.  It is the
+ * responsibility of any person or organization contemplating export to
+ * obtain such a license before exporting.
+ *
+ * WITHIN THAT CONSTRAINT, permission to use, copy, modify, and
+ * distribute this software and its documentation for any purpose and
+ * without fee is hereby granted, provided that the above copyright
+ * notice appear in all copies and that both that copyright notice and
+ * this permission notice appear in supporting documentation, and that
+ * the name of FundsXpress. not be used in advertising or publicity pertaining
+ * to distribution of the software without specific, written prior
+ * permission.  FundsXpress makes no representations about the suitability of
+ * this software for any purpose.  It is provided "as is" without express
+ * or implied warranty.
+ *
+ * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED
+ * WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE.
+ */
+
+#include <linux/err.h>
+#include <linux/types.h>
+#include <linux/crypto.h>
+#include <linux/sunrpc/gss_krb5.h>
+#include <linux/sunrpc/xdr.h>
+
+#ifdef RPC_DEBUG
+# define RPCDBG_FACILITY        RPCDBG_AUTH
+#endif
+
+/*
+ * This is the n-fold function as described in rfc3961, sec 5.1
+ * Taken from MIT Kerberos and modified.
+ */
+
+static void krb5_nfold(u32 inbits, const u8 *in,
+		       u32 outbits, u8 *out)
+{
+	int a, b, c, lcm;
+	int byte, i, msbit;
+
+	/* the code below is more readable if I make these bytes
+	   instead of bits */
+
+	inbits >>= 3;
+	outbits >>= 3;
+
+	/* first compute lcm(n,k) */
+
+	a = outbits;
+	b = inbits;
+
+	while (b != 0) {
+		c = b;
+		b = a%b;
+		a = c;
+	}
+
+	lcm = outbits*inbits/a;
+
+	/* now do the real work */
+
+	memset(out, 0, outbits);
+	byte = 0;
+
+	/* this will end up cycling through k lcm(k,n)/k times, which
+	   is correct */
+	for (i = lcm-1; i >= 0; i--) {
+		/* compute the msbit in k which gets added into this byte */
+		msbit = (
+			/* first, start with the msbit in the first,
+			 * unrotated byte */
+			 ((inbits << 3) - 1)
+			 /* then, for each byte, shift to the right
+			  * for each repetition */
+			 + (((inbits << 3) + 13) * (i/inbits))
+			 /* last, pick out the correct byte within
+			  * that shifted repetition */
+			 + ((inbits - (i % inbits)) << 3)
+			 ) % (inbits << 3);
+
+		/* pull out the byte value itself */
+		byte += (((in[((inbits - 1) - (msbit >> 3)) % inbits] << 8)|
+				  (in[((inbits) - (msbit >> 3)) % inbits]))
+				 >> ((msbit & 7) + 1)) & 0xff;
+
+		/* do the addition */
+		byte += out[i % outbits];
+		out[i % outbits] = byte & 0xff;
+
+		/* keep around the carry bit, if any */
+		byte >>= 8;
+
+	}
+
+	/* if there's a carry bit left over, add it back in */
+	if (byte) {
+		for (i = outbits - 1; i >= 0; i--) {
+			/* do the addition */
+			byte += out[i];
+			out[i] = byte & 0xff;
+
+			/* keep around the carry bit, if any */
+			byte >>= 8;
+		}
+	}
+}
+
+/*
+ * This is the DK (derive_key) function as described in rfc3961, sec 5.1
+ * Taken from MIT Kerberos and modified.
+ */
+
+u32 krb5_derive_key(struct gss_krb5_enctype *gk5e,
+		    const struct xdr_netobj *inkey,
+		    struct xdr_netobj *outkey,
+		    const struct xdr_netobj *in_constant)
+{
+	size_t blocksize, keybytes, keylength, n;
+	unsigned char *inblockdata, *outblockdata, *rawkey;
+	struct xdr_netobj inblock, outblock;
+	struct crypto_blkcipher *cipher;
+	u32 ret = EINVAL;
+
+	blocksize = gk5e->blocksize;
+	keybytes = gk5e->keybytes;
+	keylength = gk5e->keylength;
+
+	if ((inkey->len != keylength) || (outkey->len != keylength))
+		goto err_return;
+
+	cipher = crypto_alloc_blkcipher(gk5e->encrypt_name, 0,
+					CRYPTO_ALG_ASYNC);
+	if (IS_ERR(cipher))
+		goto err_return;
+	if (crypto_blkcipher_setkey(cipher, inkey->data, inkey->len))
+		goto err_return;
+
+	/* allocate and set up buffers */
+
+	ret = ENOMEM;
+	inblockdata = kmalloc(blocksize, GFP_KERNEL);
+	if (inblockdata == NULL)
+		goto err_free_cipher;
+
+	outblockdata = kmalloc(blocksize, GFP_KERNEL);
+	if (outblockdata == NULL)
+		goto err_free_in;
+
+	rawkey = kmalloc(keybytes, GFP_KERNEL);
+	if (rawkey == NULL)
+		goto err_free_out;
+
+	inblock.data = (char *) inblockdata;
+	inblock.len = blocksize;
+
+	outblock.data = (char *) outblockdata;
+	outblock.len = blocksize;
+
+	/* initialize the input block */
+
+	if (in_constant->len == inblock.len) {
+		memcpy(inblock.data, in_constant->data, inblock.len);
+	} else {
+		krb5_nfold(in_constant->len * 8, in_constant->data,
+			   inblock.len * 8, inblock.data);
+	}
+
+	/* loop encrypting the blocks until enough key bytes are generated */
+
+	n = 0;
+	while (n < keybytes) {
+		(*(gk5e->encrypt))(cipher, NULL, inblock.data,
+				   outblock.data, inblock.len);
+
+		if ((keybytes - n) <= outblock.len) {
+			memcpy(rawkey + n, outblock.data, (keybytes - n));
+			break;
+		}
+
+		memcpy(rawkey + n, outblock.data, outblock.len);
+		memcpy(inblock.data, outblock.data, outblock.len);
+		n += outblock.len;
+	}
+
+	/* postprocess the key */
+
+	inblock.data = (char *) rawkey;
+	inblock.len = keybytes;
+
+	BUG_ON(gk5e->mk_key == NULL);
+	ret = (*(gk5e->mk_key))(gk5e, &inblock, outkey);
+	if (ret) {
+		dprintk("%s: got %d from mk_key function for '%s'\n",
+			__func__, ret, gk5e->encrypt_name);
+		goto err_free_raw;
+	}
+
+	/* clean memory, free resources and exit */
+
+	ret = 0;
+
+err_free_raw:
+	memset(rawkey, 0, keybytes);
+	kfree(rawkey);
+err_free_out:
+	memset(outblockdata, 0, blocksize);
+	kfree(outblockdata);
+err_free_in:
+	memset(inblockdata, 0, blocksize);
+	kfree(inblockdata);
+err_free_cipher:
+	crypto_free_blkcipher(cipher);
+err_return:
+	return ret;
+}
diff --git a/net/sunrpc/auth_gss/gss_krb5_mech.c b/net/sunrpc/auth_gss/gss_krb5_mech.c
index 6f93f4752be4..fdf0eb2057ab 100644
--- a/net/sunrpc/auth_gss/gss_krb5_mech.c
+++ b/net/sunrpc/auth_gss/gss_krb5_mech.c
@@ -60,6 +60,7 @@ static const struct gss_krb5_enctype supported_gss_krb5_enctypes[] = {
 	  .cksum_name = "md5",
 	  .encrypt = krb5_encrypt,
 	  .decrypt = krb5_decrypt,
+	  .mk_key = NULL,
 	  .signalg = SGN_ALG_DES_MAC_MD5,
 	  .sealalg = SEAL_ALG_DES,
 	  .keybytes = 7,
-- 
cgit v1.2.2


From 47d84807762966c3611c38adecec6ea703ddda7a Mon Sep 17 00:00:00 2001
From: Kevin Coffman <kwc@citi.umich.edu>
Date: Wed, 17 Mar 2010 13:02:54 -0400
Subject: gss_krb5: handle new context format from gssd

For encryption types other than DES, gssd sends down context information
in a new format.  This new format includes the information needed to
support the new Kerberos GSS-API tokens defined in rfc4121.

Signed-off-by: Kevin Coffman <kwc@citi.umich.edu>
Signed-off-by: Steve Dickson <steved@redhat.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 net/sunrpc/auth_gss/gss_krb5_keys.c |   2 +-
 net/sunrpc/auth_gss/gss_krb5_mech.c | 237 +++++++++++++++++++++++++++++++++++-
 2 files changed, 237 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/sunrpc/auth_gss/gss_krb5_keys.c b/net/sunrpc/auth_gss/gss_krb5_keys.c
index 832ce901bf68..253b4149584a 100644
--- a/net/sunrpc/auth_gss/gss_krb5_keys.c
+++ b/net/sunrpc/auth_gss/gss_krb5_keys.c
@@ -147,7 +147,7 @@ static void krb5_nfold(u32 inbits, const u8 *in,
  * Taken from MIT Kerberos and modified.
  */
 
-u32 krb5_derive_key(struct gss_krb5_enctype *gk5e,
+u32 krb5_derive_key(const struct gss_krb5_enctype *gk5e,
 		    const struct xdr_netobj *inkey,
 		    struct xdr_netobj *outkey,
 		    const struct xdr_netobj *in_constant)
diff --git a/net/sunrpc/auth_gss/gss_krb5_mech.c b/net/sunrpc/auth_gss/gss_krb5_mech.c
index fdf0eb2057ab..8b612e733563 100644
--- a/net/sunrpc/auth_gss/gss_krb5_mech.c
+++ b/net/sunrpc/auth_gss/gss_krb5_mech.c
@@ -48,6 +48,8 @@
 # define RPCDBG_FACILITY	RPCDBG_AUTH
 #endif
 
+static struct gss_api_mech gss_kerberos_mech;	/* forward declaration */
+
 static const struct gss_krb5_enctype supported_gss_krb5_enctypes[] = {
 	/*
 	 * DES (All DES enctypes are mapped to the same gss functionality)
@@ -247,6 +249,237 @@ out_err:
 	return PTR_ERR(p);
 }
 
+struct crypto_blkcipher *
+context_v2_alloc_cipher(struct krb5_ctx *ctx, u8 *key)
+{
+	struct crypto_blkcipher *cp;
+
+	cp = crypto_alloc_blkcipher(ctx->gk5e->encrypt_name,
+					0, CRYPTO_ALG_ASYNC);
+	if (IS_ERR(cp)) {
+		dprintk("gss_kerberos_mech: unable to initialize "
+			"crypto algorithm %s\n", ctx->gk5e->encrypt_name);
+		return NULL;
+	}
+	if (crypto_blkcipher_setkey(cp, key, ctx->gk5e->keylength)) {
+		dprintk("gss_kerberos_mech: error setting key for "
+			"crypto algorithm %s\n", ctx->gk5e->encrypt_name);
+		crypto_free_blkcipher(cp);
+		return NULL;
+	}
+	return cp;
+}
+
+static inline void
+set_cdata(u8 cdata[GSS_KRB5_K5CLENGTH], u32 usage, u8 seed)
+{
+	cdata[0] = (usage>>24)&0xff;
+	cdata[1] = (usage>>16)&0xff;
+	cdata[2] = (usage>>8)&0xff;
+	cdata[3] = usage&0xff;
+	cdata[4] = seed;
+}
+
+static int
+context_derive_keys_des3(struct krb5_ctx *ctx, u8 *rawkey, u32 keylen)
+{
+	struct xdr_netobj c, keyin, keyout;
+	u8 cdata[GSS_KRB5_K5CLENGTH];
+	u32 err;
+
+	c.len = GSS_KRB5_K5CLENGTH;
+	c.data = cdata;
+
+	keyin.data = rawkey;
+	keyin.len = keylen;
+	keyout.len = keylen;
+
+	/* seq uses the raw key */
+	ctx->seq = context_v2_alloc_cipher(ctx, rawkey);
+	if (ctx->seq == NULL)
+		goto out_err;
+
+	ctx->enc = context_v2_alloc_cipher(ctx, rawkey);
+	if (ctx->enc == NULL)
+		goto out_free_seq;
+
+	/* derive cksum */
+	set_cdata(cdata, KG_USAGE_SIGN, KEY_USAGE_SEED_CHECKSUM);
+	keyout.data = ctx->cksum;
+	err = krb5_derive_key(ctx->gk5e, &keyin, &keyout, &c);
+	if (err) {
+		dprintk("%s: Error %d deriving cksum key\n",
+			__func__, err);
+		goto out_free_enc;
+	}
+
+	return 0;
+
+out_free_enc:
+	crypto_free_blkcipher(ctx->enc);
+out_free_seq:
+	crypto_free_blkcipher(ctx->seq);
+out_err:
+	return -EINVAL;
+}
+
+static int
+context_derive_keys_new(struct krb5_ctx *ctx, u8 *rawkey, u32 keylen)
+{
+	struct xdr_netobj c, keyin, keyout;
+	u8 cdata[GSS_KRB5_K5CLENGTH];
+	u32 err;
+
+	c.len = GSS_KRB5_K5CLENGTH;
+	c.data = cdata;
+
+	keyin.data = rawkey;
+	keyin.len = keylen;
+	keyout.len = keylen;
+
+	/* initiator seal encryption */
+	set_cdata(cdata, KG_USAGE_INITIATOR_SEAL, KEY_USAGE_SEED_ENCRYPTION);
+	keyout.data = ctx->initiator_seal;
+	err = krb5_derive_key(ctx->gk5e, &keyin, &keyout, &c);
+	if (err) {
+		dprintk("%s: Error %d deriving initiator_seal key\n",
+			__func__, err);
+		goto out_err;
+	}
+	ctx->initiator_enc = context_v2_alloc_cipher(ctx, ctx->initiator_seal);
+	if (ctx->initiator_enc == NULL)
+		goto out_err;
+
+	/* acceptor seal encryption */
+	set_cdata(cdata, KG_USAGE_ACCEPTOR_SEAL, KEY_USAGE_SEED_ENCRYPTION);
+	keyout.data = ctx->acceptor_seal;
+	err = krb5_derive_key(ctx->gk5e, &keyin, &keyout, &c);
+	if (err) {
+		dprintk("%s: Error %d deriving acceptor_seal key\n",
+			__func__, err);
+		goto out_free_initiator_enc;
+	}
+	ctx->acceptor_enc = context_v2_alloc_cipher(ctx, ctx->acceptor_seal);
+	if (ctx->acceptor_enc == NULL)
+		goto out_free_initiator_enc;
+
+	/* initiator sign checksum */
+	set_cdata(cdata, KG_USAGE_INITIATOR_SIGN, KEY_USAGE_SEED_CHECKSUM);
+	keyout.data = ctx->initiator_sign;
+	err = krb5_derive_key(ctx->gk5e, &keyin, &keyout, &c);
+	if (err) {
+		dprintk("%s: Error %d deriving initiator_sign key\n",
+			__func__, err);
+		goto out_free_acceptor_enc;
+	}
+
+	/* acceptor sign checksum */
+	set_cdata(cdata, KG_USAGE_ACCEPTOR_SIGN, KEY_USAGE_SEED_CHECKSUM);
+	keyout.data = ctx->acceptor_sign;
+	err = krb5_derive_key(ctx->gk5e, &keyin, &keyout, &c);
+	if (err) {
+		dprintk("%s: Error %d deriving acceptor_sign key\n",
+			__func__, err);
+		goto out_free_acceptor_enc;
+	}
+
+	/* initiator seal integrity */
+	set_cdata(cdata, KG_USAGE_INITIATOR_SEAL, KEY_USAGE_SEED_INTEGRITY);
+	keyout.data = ctx->initiator_integ;
+	err = krb5_derive_key(ctx->gk5e, &keyin, &keyout, &c);
+	if (err) {
+		dprintk("%s: Error %d deriving initiator_integ key\n",
+			__func__, err);
+		goto out_free_acceptor_enc;
+	}
+
+	/* acceptor seal integrity */
+	set_cdata(cdata, KG_USAGE_ACCEPTOR_SEAL, KEY_USAGE_SEED_INTEGRITY);
+	keyout.data = ctx->acceptor_integ;
+	err = krb5_derive_key(ctx->gk5e, &keyin, &keyout, &c);
+	if (err) {
+		dprintk("%s: Error %d deriving acceptor_integ key\n",
+			__func__, err);
+		goto out_free_acceptor_enc;
+	}
+
+	return 0;
+
+out_free_acceptor_enc:
+	crypto_free_blkcipher(ctx->acceptor_enc);
+out_free_initiator_enc:
+	crypto_free_blkcipher(ctx->initiator_enc);
+out_err:
+	return -EINVAL;
+}
+
+static int
+gss_import_v2_context(const void *p, const void *end, struct krb5_ctx *ctx)
+{
+	u8 rawkey[GSS_KRB5_MAX_KEYLEN];
+	int keylen;
+
+	p = simple_get_bytes(p, end, &ctx->flags, sizeof(ctx->flags));
+	if (IS_ERR(p))
+		goto out_err;
+	ctx->initiate = ctx->flags & KRB5_CTX_FLAG_INITIATOR;
+
+	p = simple_get_bytes(p, end, &ctx->endtime, sizeof(ctx->endtime));
+	if (IS_ERR(p))
+		goto out_err;
+	p = simple_get_bytes(p, end, &ctx->seq_send64, sizeof(ctx->seq_send64));
+	if (IS_ERR(p))
+		goto out_err;
+	/* set seq_send for use by "older" enctypes */
+	ctx->seq_send = ctx->seq_send64;
+	if (ctx->seq_send64 != ctx->seq_send) {
+		dprintk("%s: seq_send64 %lx, seq_send %x overflow?\n", __func__,
+			(long unsigned)ctx->seq_send64, ctx->seq_send);
+		goto out_err;
+	}
+	p = simple_get_bytes(p, end, &ctx->enctype, sizeof(ctx->enctype));
+	if (IS_ERR(p))
+		goto out_err;
+	ctx->gk5e = get_gss_krb5_enctype(ctx->enctype);
+	if (ctx->gk5e == NULL) {
+		dprintk("gss_kerberos_mech: unsupported krb5 enctype %u\n",
+			ctx->enctype);
+		p = ERR_PTR(-EINVAL);
+		goto out_err;
+	}
+	keylen = ctx->gk5e->keylength;
+
+	p = simple_get_bytes(p, end, rawkey, keylen);
+	if (IS_ERR(p))
+		goto out_err;
+
+	if (p != end) {
+		p = ERR_PTR(-EINVAL);
+		goto out_err;
+	}
+
+	ctx->mech_used.data = kmemdup(gss_kerberos_mech.gm_oid.data,
+				      gss_kerberos_mech.gm_oid.len, GFP_KERNEL);
+	if (unlikely(ctx->mech_used.data == NULL)) {
+		p = ERR_PTR(-ENOMEM);
+		goto out_err;
+	}
+	ctx->mech_used.len = gss_kerberos_mech.gm_oid.len;
+
+	switch (ctx->enctype) {
+	case ENCTYPE_DES3_CBC_RAW:
+		return context_derive_keys_des3(ctx, rawkey, keylen);
+	case ENCTYPE_AES128_CTS_HMAC_SHA1_96:
+	case ENCTYPE_AES256_CTS_HMAC_SHA1_96:
+		return context_derive_keys_new(ctx, rawkey, keylen);
+	default:
+		return -EINVAL;
+	}
+
+out_err:
+	return PTR_ERR(p);
+}
+
 static int
 gss_import_sec_context_kerberos(const void *p, size_t len,
 				struct gss_ctx *ctx_id)
@@ -262,7 +495,7 @@ gss_import_sec_context_kerberos(const void *p, size_t len,
 	if (len == 85)
 		ret = gss_import_v1_context(p, end, ctx);
 	else
-		ret = -EINVAL;
+		ret = gss_import_v2_context(p, end, ctx);
 
 	if (ret == 0)
 		ctx_id->internal_ctx_id = ctx;
@@ -279,6 +512,8 @@ gss_delete_sec_context_kerberos(void *internal_ctx) {
 
 	crypto_free_blkcipher(kctx->seq);
 	crypto_free_blkcipher(kctx->enc);
+	crypto_free_blkcipher(kctx->acceptor_enc);
+	crypto_free_blkcipher(kctx->initiator_enc);
 	kfree(kctx->mech_used.data);
 	kfree(kctx);
 }
-- 
cgit v1.2.2


From 683ac6656cb05b6e83593770ffc049eee4a4d119 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Thu, 8 Apr 2010 14:09:58 -0400
Subject: gss_krb5: Add upcall info indicating supported kerberos enctypes

The text based upcall now indicates which Kerberos encryption types are
supported by the kernel rpcsecgss code.  This is used by gssd to
determine which encryption types it should attempt to negotiate
when creating a context with a server.

The server principal's database and keytab encryption types are
what limits what it should negotiate.  Therefore, its keytab
should be created with only the enctypes listed by this file.

Currently we support des-cbc-crc, des-cbc-md4 and des-cbc-md5

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 net/sunrpc/auth_gss/auth_gss.c      | 8 +++++++-
 net/sunrpc/auth_gss/gss_krb5_mech.c | 1 +
 2 files changed, 8 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c
index d64a58b8ed33..6654c8534d32 100644
--- a/net/sunrpc/auth_gss/auth_gss.c
+++ b/net/sunrpc/auth_gss/auth_gss.c
@@ -377,11 +377,12 @@ static void gss_encode_v0_msg(struct gss_upcall_msg *gss_msg)
 static void gss_encode_v1_msg(struct gss_upcall_msg *gss_msg,
 				struct rpc_clnt *clnt, int machine_cred)
 {
+	struct gss_api_mech *mech = gss_msg->auth->mech;
 	char *p = gss_msg->databuf;
 	int len = 0;
 
 	gss_msg->msg.len = sprintf(gss_msg->databuf, "mech=%s uid=%d ",
-				   gss_msg->auth->mech->gm_name,
+				   mech->gm_name,
 				   gss_msg->uid);
 	p += gss_msg->msg.len;
 	if (clnt->cl_principal) {
@@ -398,6 +399,11 @@ static void gss_encode_v1_msg(struct gss_upcall_msg *gss_msg,
 		p += len;
 		gss_msg->msg.len += len;
 	}
+	if (mech->gm_upcall_enctypes) {
+		len = sprintf(p, mech->gm_upcall_enctypes);
+		p += len;
+		gss_msg->msg.len += len;
+	}
 	len = sprintf(p, "\n");
 	gss_msg->msg.len += len;
 
diff --git a/net/sunrpc/auth_gss/gss_krb5_mech.c b/net/sunrpc/auth_gss/gss_krb5_mech.c
index 8b612e733563..03f1dcddbd29 100644
--- a/net/sunrpc/auth_gss/gss_krb5_mech.c
+++ b/net/sunrpc/auth_gss/gss_krb5_mech.c
@@ -552,6 +552,7 @@ static struct gss_api_mech gss_kerberos_mech = {
 	.gm_ops		= &gss_kerberos_ops,
 	.gm_pf_num	= ARRAY_SIZE(gss_kerberos_pfs),
 	.gm_pfs		= gss_kerberos_pfs,
+	.gm_upcall_enctypes = "enctypes=3,1,2 ",
 };
 
 static int __init init_kerberos_module(void)
-- 
cgit v1.2.2


From 958142e97e04d6c266ae093739bbbbd03afcd497 Mon Sep 17 00:00:00 2001
From: Kevin Coffman <kwc@citi.umich.edu>
Date: Wed, 17 Mar 2010 13:02:55 -0400
Subject: gss_krb5: add support for triple-des encryption

Add the final pieces to support the triple-des encryption type.

Signed-off-by: Kevin Coffman <kwc@citi.umich.edu>
Signed-off-by: Steve Dickson <steved@redhat.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 net/sunrpc/auth_gss/gss_krb5_crypto.c |  3 ++
 net/sunrpc/auth_gss/gss_krb5_keys.c   | 53 +++++++++++++++++++++++++++++++++++
 net/sunrpc/auth_gss/gss_krb5_mech.c   | 23 +++++++++++++++
 net/sunrpc/auth_gss/gss_krb5_seal.c   |  1 +
 net/sunrpc/auth_gss/gss_krb5_unseal.c |  1 +
 net/sunrpc/auth_gss/gss_krb5_wrap.c   |  2 ++
 6 files changed, 83 insertions(+)

(limited to 'net')

diff --git a/net/sunrpc/auth_gss/gss_krb5_crypto.c b/net/sunrpc/auth_gss/gss_krb5_crypto.c
index cae04d7a45a5..bb76873aa019 100644
--- a/net/sunrpc/auth_gss/gss_krb5_crypto.c
+++ b/net/sunrpc/auth_gss/gss_krb5_crypto.c
@@ -184,6 +184,9 @@ make_checksum(struct krb5_ctx *kctx, char *header, int hdrlen,
 		       checksumdata + checksumlen - kctx->gk5e->cksumlength,
 		       kctx->gk5e->cksumlength);
 		break;
+	case CKSUMTYPE_HMAC_SHA1_DES3:
+		memcpy(cksumout->data, checksumdata, kctx->gk5e->cksumlength);
+		break;
 	default:
 		BUG();
 		break;
diff --git a/net/sunrpc/auth_gss/gss_krb5_keys.c b/net/sunrpc/auth_gss/gss_krb5_keys.c
index 253b4149584a..d54668790f0c 100644
--- a/net/sunrpc/auth_gss/gss_krb5_keys.c
+++ b/net/sunrpc/auth_gss/gss_krb5_keys.c
@@ -250,3 +250,56 @@ err_free_cipher:
 err_return:
 	return ret;
 }
+
+#define smask(step) ((1<<step)-1)
+#define pstep(x, step) (((x)&smask(step))^(((x)>>step)&smask(step)))
+#define parity_char(x) pstep(pstep(pstep((x), 4), 2), 1)
+
+static void mit_des_fixup_key_parity(u8 key[8])
+{
+	int i;
+	for (i = 0; i < 8; i++) {
+		key[i] &= 0xfe;
+		key[i] |= 1^parity_char(key[i]);
+	}
+}
+
+/*
+ * This is the des3 key derivation postprocess function
+ */
+u32 gss_krb5_des3_make_key(const struct gss_krb5_enctype *gk5e,
+			   struct xdr_netobj *randombits,
+			   struct xdr_netobj *key)
+{
+	int i;
+	u32 ret = EINVAL;
+
+	if (key->len != 24) {
+		dprintk("%s: key->len is %d\n", __func__, key->len);
+		goto err_out;
+	}
+	if (randombits->len != 21) {
+		dprintk("%s: randombits->len is %d\n",
+			__func__, randombits->len);
+		goto err_out;
+	}
+
+	/* take the seven bytes, move them around into the top 7 bits of the
+	   8 key bytes, then compute the parity bits.  Do this three times. */
+
+	for (i = 0; i < 3; i++) {
+		memcpy(key->data + i*8, randombits->data + i*7, 7);
+		key->data[i*8+7] = (((key->data[i*8]&1)<<1) |
+				    ((key->data[i*8+1]&1)<<2) |
+				    ((key->data[i*8+2]&1)<<3) |
+				    ((key->data[i*8+3]&1)<<4) |
+				    ((key->data[i*8+4]&1)<<5) |
+				    ((key->data[i*8+5]&1)<<6) |
+				    ((key->data[i*8+6]&1)<<7));
+
+		mit_des_fixup_key_parity(key->data + i*8);
+	}
+	ret = 0;
+err_out:
+	return ret;
+}
diff --git a/net/sunrpc/auth_gss/gss_krb5_mech.c b/net/sunrpc/auth_gss/gss_krb5_mech.c
index 03f1dcddbd29..7cebdf843266 100644
--- a/net/sunrpc/auth_gss/gss_krb5_mech.c
+++ b/net/sunrpc/auth_gss/gss_krb5_mech.c
@@ -71,6 +71,26 @@ static const struct gss_krb5_enctype supported_gss_krb5_enctypes[] = {
 	  .cksumlength = 8,
 	  .keyed_cksum = 0,
 	},
+	/*
+	 * 3DES
+	 */
+	{
+	  .etype = ENCTYPE_DES3_CBC_RAW,
+	  .ctype = CKSUMTYPE_HMAC_SHA1_DES3,
+	  .name = "des3-hmac-sha1",
+	  .encrypt_name = "cbc(des3_ede)",
+	  .cksum_name = "hmac(sha1)",
+	  .encrypt = krb5_encrypt,
+	  .decrypt = krb5_decrypt,
+	  .mk_key = gss_krb5_des3_make_key,
+	  .signalg = SGN_ALG_HMAC_SHA1_DES3_KD,
+	  .sealalg = SEAL_ALG_DES3KD,
+	  .keybytes = 21,
+	  .keylength = 24,
+	  .blocksize = 8,
+	  .cksumlength = 20,
+	  .keyed_cksum = 1,
+	},
 };
 
 static const int num_supported_enctypes =
@@ -440,6 +460,9 @@ gss_import_v2_context(const void *p, const void *end, struct krb5_ctx *ctx)
 	p = simple_get_bytes(p, end, &ctx->enctype, sizeof(ctx->enctype));
 	if (IS_ERR(p))
 		goto out_err;
+	/* Map ENCTYPE_DES3_CBC_SHA1 to ENCTYPE_DES3_CBC_RAW */
+	if (ctx->enctype == ENCTYPE_DES3_CBC_SHA1)
+		ctx->enctype = ENCTYPE_DES3_CBC_RAW;
 	ctx->gk5e = get_gss_krb5_enctype(ctx->enctype);
 	if (ctx->gk5e == NULL) {
 		dprintk("gss_kerberos_mech: unsupported krb5 enctype %u\n",
diff --git a/net/sunrpc/auth_gss/gss_krb5_seal.c b/net/sunrpc/auth_gss/gss_krb5_seal.c
index cd512719092b..7ede900049a7 100644
--- a/net/sunrpc/auth_gss/gss_krb5_seal.c
+++ b/net/sunrpc/auth_gss/gss_krb5_seal.c
@@ -142,6 +142,7 @@ gss_get_mic_kerberos(struct gss_ctx *gss_ctx, struct xdr_buf *text,
 	default:
 		BUG();
 	case ENCTYPE_DES_CBC_RAW:
+	case ENCTYPE_DES3_CBC_RAW:
 		return gss_get_mic_v1(ctx, text, token);
 	}
 }
diff --git a/net/sunrpc/auth_gss/gss_krb5_unseal.c b/net/sunrpc/auth_gss/gss_krb5_unseal.c
index 7515bffddf15..3e15bdb5a9eb 100644
--- a/net/sunrpc/auth_gss/gss_krb5_unseal.c
+++ b/net/sunrpc/auth_gss/gss_krb5_unseal.c
@@ -152,6 +152,7 @@ gss_verify_mic_kerberos(struct gss_ctx *gss_ctx,
 	default:
 		BUG();
 	case ENCTYPE_DES_CBC_RAW:
+	case ENCTYPE_DES3_CBC_RAW:
 		return gss_verify_mic_v1(ctx, message_buffer, read_token);
 	}
 }
diff --git a/net/sunrpc/auth_gss/gss_krb5_wrap.c b/net/sunrpc/auth_gss/gss_krb5_wrap.c
index 2eb3046a84ea..1c8ebd3dbd3c 100644
--- a/net/sunrpc/auth_gss/gss_krb5_wrap.c
+++ b/net/sunrpc/auth_gss/gss_krb5_wrap.c
@@ -350,6 +350,7 @@ gss_wrap_kerberos(struct gss_ctx *gctx, int offset,
 	default:
 		BUG();
 	case ENCTYPE_DES_CBC_RAW:
+	case ENCTYPE_DES3_CBC_RAW:
 		return gss_wrap_kerberos_v1(kctx, offset, buf, pages);
 	}
 }
@@ -363,6 +364,7 @@ gss_unwrap_kerberos(struct gss_ctx *gctx, int offset, struct xdr_buf *buf)
 	default:
 		BUG();
 	case ENCTYPE_DES_CBC_RAW:
+	case ENCTYPE_DES3_CBC_RAW:
 		return gss_unwrap_kerberos_v1(kctx, offset, buf);
 	}
 }
-- 
cgit v1.2.2


From 4018bf3eec5ff6bf1234a602a4e72518757a7f55 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Thu, 8 Apr 2010 14:21:12 -0400
Subject: gss_krb5: Advertise triple-des enctype support in the rpcsec_gss/krb5
 upcall

Update the upcall info indicating which Kerberos enctypes the kernel
supports.

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 net/sunrpc/auth_gss/gss_krb5_mech.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/sunrpc/auth_gss/gss_krb5_mech.c b/net/sunrpc/auth_gss/gss_krb5_mech.c
index 7cebdf843266..ce80f996758a 100644
--- a/net/sunrpc/auth_gss/gss_krb5_mech.c
+++ b/net/sunrpc/auth_gss/gss_krb5_mech.c
@@ -575,7 +575,7 @@ static struct gss_api_mech gss_kerberos_mech = {
 	.gm_ops		= &gss_kerberos_ops,
 	.gm_pf_num	= ARRAY_SIZE(gss_kerberos_pfs),
 	.gm_pfs		= gss_kerberos_pfs,
-	.gm_upcall_enctypes = "enctypes=3,1,2 ",
+	.gm_upcall_enctypes = "enctypes=16,3,1,2 ",
 };
 
 static int __init init_kerberos_module(void)
-- 
cgit v1.2.2


From c43abaedaff92a7bcbfe04b593164bb5faba3078 Mon Sep 17 00:00:00 2001
From: Kevin Coffman <kwc@citi.umich.edu>
Date: Wed, 17 Mar 2010 13:02:58 -0400
Subject: xdr: Add an export for the helper function write_bytes_to_xdr_buf()

Signed-off-by: Kevin Coffman <kwc@citi.umich.edu>
Signed-off-by: Steve Dickson <steved@redhat.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 net/sunrpc/xdr.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'net')

diff --git a/net/sunrpc/xdr.c b/net/sunrpc/xdr.c
index 2763fde88499..a1f82a87d34d 100644
--- a/net/sunrpc/xdr.c
+++ b/net/sunrpc/xdr.c
@@ -762,6 +762,7 @@ int write_bytes_to_xdr_buf(struct xdr_buf *buf, unsigned int base, void *obj, un
 	__write_bytes_to_xdr_buf(&subbuf, obj, len);
 	return 0;
 }
+EXPORT_SYMBOL_GPL(write_bytes_to_xdr_buf);
 
 int
 xdr_decode_word(struct xdr_buf *buf, unsigned int base, u32 *obj)
-- 
cgit v1.2.2


From de9c17eb4a912c9028f7b470eb80815144883b26 Mon Sep 17 00:00:00 2001
From: Kevin Coffman <kwc@citi.umich.edu>
Date: Wed, 17 Mar 2010 13:02:59 -0400
Subject: gss_krb5: add support for new token formats in rfc4121

This is a step toward support for AES encryption types which are
required to use the new token formats defined in rfc4121.

Signed-off-by: Kevin Coffman <kwc@citi.umich.edu>
[SteveD: Fixed a typo in gss_verify_mic_v2()]
Signed-off-by: Steve Dickson <steved@redhat.com>
[Trond: Got rid of the TEST_ROTATE/TEST_EXTRA_COUNT crap]
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 net/sunrpc/auth_gss/gss_krb5_crypto.c |  74 +++++++++++++++
 net/sunrpc/auth_gss/gss_krb5_seal.c   |  69 ++++++++++++++
 net/sunrpc/auth_gss/gss_krb5_unseal.c |  61 ++++++++++++
 net/sunrpc/auth_gss/gss_krb5_wrap.c   | 174 ++++++++++++++++++++++++++++++++++
 4 files changed, 378 insertions(+)

(limited to 'net')

diff --git a/net/sunrpc/auth_gss/gss_krb5_crypto.c b/net/sunrpc/auth_gss/gss_krb5_crypto.c
index bb76873aa019..ca52ac28a537 100644
--- a/net/sunrpc/auth_gss/gss_krb5_crypto.c
+++ b/net/sunrpc/auth_gss/gss_krb5_crypto.c
@@ -197,6 +197,80 @@ out:
 	return err ? GSS_S_FAILURE : 0;
 }
 
+/*
+ * checksum the plaintext data and hdrlen bytes of the token header
+ * Per rfc4121, sec. 4.2.4, the checksum is performed over the data
+ * body then over the first 16 octets of the MIC token
+ * Inclusion of the header data in the calculation of the
+ * checksum is optional.
+ */
+u32
+make_checksum_v2(struct krb5_ctx *kctx, char *header, int hdrlen,
+		 struct xdr_buf *body, int body_offset, u8 *cksumkey,
+		 struct xdr_netobj *cksumout)
+{
+	struct hash_desc desc;
+	struct scatterlist sg[1];
+	int err;
+	u8 checksumdata[GSS_KRB5_MAX_CKSUM_LEN];
+	unsigned int checksumlen;
+
+	if (kctx->gk5e->keyed_cksum == 0) {
+		dprintk("%s: expected keyed hash for %s\n",
+			__func__, kctx->gk5e->name);
+		return GSS_S_FAILURE;
+	}
+	if (cksumkey == NULL) {
+		dprintk("%s: no key supplied for %s\n",
+			__func__, kctx->gk5e->name);
+		return GSS_S_FAILURE;
+	}
+
+	desc.tfm = crypto_alloc_hash(kctx->gk5e->cksum_name, 0,
+							CRYPTO_ALG_ASYNC);
+	if (IS_ERR(desc.tfm))
+		return GSS_S_FAILURE;
+	checksumlen = crypto_hash_digestsize(desc.tfm);
+	desc.flags = CRYPTO_TFM_REQ_MAY_SLEEP;
+
+	err = crypto_hash_setkey(desc.tfm, cksumkey, kctx->gk5e->keylength);
+	if (err)
+		goto out;
+
+	err = crypto_hash_init(&desc);
+	if (err)
+		goto out;
+	err = xdr_process_buf(body, body_offset, body->len - body_offset,
+			      checksummer, &desc);
+	if (err)
+		goto out;
+	if (header != NULL) {
+		sg_init_one(sg, header, hdrlen);
+		err = crypto_hash_update(&desc, sg, hdrlen);
+		if (err)
+			goto out;
+	}
+	err = crypto_hash_final(&desc, checksumdata);
+	if (err)
+		goto out;
+
+	cksumout->len = kctx->gk5e->cksumlength;
+
+	switch (kctx->gk5e->ctype) {
+	case CKSUMTYPE_HMAC_SHA1_96_AES128:
+	case CKSUMTYPE_HMAC_SHA1_96_AES256:
+		/* note that this truncates the hash */
+		memcpy(cksumout->data, checksumdata, kctx->gk5e->cksumlength);
+		break;
+	default:
+		BUG();
+		break;
+	}
+out:
+	crypto_free_hash(desc.tfm);
+	return err ? GSS_S_FAILURE : 0;
+}
+
 struct encryptor_desc {
 	u8 iv[GSS_KRB5_MAX_BLOCKSIZE];
 	struct blkcipher_desc desc;
diff --git a/net/sunrpc/auth_gss/gss_krb5_seal.c b/net/sunrpc/auth_gss/gss_krb5_seal.c
index 7ede900049a7..477a546d19bb 100644
--- a/net/sunrpc/auth_gss/gss_krb5_seal.c
+++ b/net/sunrpc/auth_gss/gss_krb5_seal.c
@@ -91,6 +91,33 @@ setup_token(struct krb5_ctx *ctx, struct xdr_netobj *token)
 	return (char *)krb5_hdr;
 }
 
+static void *
+setup_token_v2(struct krb5_ctx *ctx, struct xdr_netobj *token)
+{
+	__be16 *ptr, *krb5_hdr;
+	u8 *p, flags = 0x00;
+
+	if ((ctx->flags & KRB5_CTX_FLAG_INITIATOR) == 0)
+		flags |= 0x01;
+	if (ctx->flags & KRB5_CTX_FLAG_ACCEPTOR_SUBKEY)
+		flags |= 0x04;
+
+	/* Per rfc 4121, sec 4.2.6.1, there is no header,
+	 * just start the token */
+	krb5_hdr = ptr = (__be16 *)token->data;
+
+	*ptr++ = KG2_TOK_MIC;
+	p = (u8 *)ptr;
+	*p++ = flags;
+	*p++ = 0xff;
+	ptr = (__be16 *)p;
+	*ptr++ = 0xffff;
+	*ptr++ = 0xffff;
+
+	token->len = GSS_KRB5_TOK_HDR_LEN + ctx->gk5e->cksumlength;
+	return krb5_hdr;
+}
+
 static u32
 gss_get_mic_v1(struct krb5_ctx *ctx, struct xdr_buf *text,
 		struct xdr_netobj *token)
@@ -132,6 +159,45 @@ gss_get_mic_v1(struct krb5_ctx *ctx, struct xdr_buf *text,
 	return (ctx->endtime < now) ? GSS_S_CONTEXT_EXPIRED : GSS_S_COMPLETE;
 }
 
+u32
+gss_get_mic_v2(struct krb5_ctx *ctx, struct xdr_buf *text,
+		struct xdr_netobj *token)
+{
+	char cksumdata[GSS_KRB5_MAX_CKSUM_LEN];
+	struct xdr_netobj cksumobj = { .len = sizeof(cksumdata),
+				       .data = cksumdata};
+	void *krb5_hdr;
+	s32 now;
+	u64 seq_send;
+	u8 *cksumkey;
+
+	dprintk("RPC:       %s\n", __func__);
+
+	krb5_hdr = setup_token_v2(ctx, token);
+
+	/* Set up the sequence number. Now 64-bits in clear
+	 * text and w/o direction indicator */
+	spin_lock(&krb5_seq_lock);
+	seq_send = ctx->seq_send64++;
+	spin_unlock(&krb5_seq_lock);
+	*((u64 *)(krb5_hdr + 8)) = cpu_to_be64(seq_send);
+
+	if (ctx->initiate)
+		cksumkey = ctx->initiator_sign;
+	else
+		cksumkey = ctx->acceptor_sign;
+
+	if (make_checksum_v2(ctx, krb5_hdr, GSS_KRB5_TOK_HDR_LEN,
+			     text, 0, cksumkey, &cksumobj))
+		return GSS_S_FAILURE;
+
+	memcpy(krb5_hdr + GSS_KRB5_TOK_HDR_LEN, cksumobj.data, cksumobj.len);
+
+	now = get_seconds();
+
+	return (ctx->endtime < now) ? GSS_S_CONTEXT_EXPIRED : GSS_S_COMPLETE;
+}
+
 u32
 gss_get_mic_kerberos(struct gss_ctx *gss_ctx, struct xdr_buf *text,
 		     struct xdr_netobj *token)
@@ -144,6 +210,9 @@ gss_get_mic_kerberos(struct gss_ctx *gss_ctx, struct xdr_buf *text,
 	case ENCTYPE_DES_CBC_RAW:
 	case ENCTYPE_DES3_CBC_RAW:
 		return gss_get_mic_v1(ctx, text, token);
+	case ENCTYPE_AES128_CTS_HMAC_SHA1_96:
+	case ENCTYPE_AES256_CTS_HMAC_SHA1_96:
+		return gss_get_mic_v2(ctx, text, token);
 	}
 }
 
diff --git a/net/sunrpc/auth_gss/gss_krb5_unseal.c b/net/sunrpc/auth_gss/gss_krb5_unseal.c
index 3e15bdb5a9eb..4ede4cc4391f 100644
--- a/net/sunrpc/auth_gss/gss_krb5_unseal.c
+++ b/net/sunrpc/auth_gss/gss_krb5_unseal.c
@@ -141,6 +141,64 @@ gss_verify_mic_v1(struct krb5_ctx *ctx,
 	return GSS_S_COMPLETE;
 }
 
+static u32
+gss_verify_mic_v2(struct krb5_ctx *ctx,
+		struct xdr_buf *message_buffer, struct xdr_netobj *read_token)
+{
+	char cksumdata[GSS_KRB5_MAX_CKSUM_LEN];
+	struct xdr_netobj cksumobj = {.len = sizeof(cksumdata),
+				      .data = cksumdata};
+	s32 now;
+	u64 seqnum;
+	u8 *ptr = read_token->data;
+	u8 *cksumkey;
+	u8 flags;
+	int i;
+
+	dprintk("RPC:       %s\n", __func__);
+
+	if (be16_to_cpu(*((__be16 *)ptr)) != KG2_TOK_MIC)
+		return GSS_S_DEFECTIVE_TOKEN;
+
+	flags = ptr[2];
+	if ((!ctx->initiate && (flags & KG2_TOKEN_FLAG_SENTBYACCEPTOR)) ||
+	    (ctx->initiate && !(flags & KG2_TOKEN_FLAG_SENTBYACCEPTOR)))
+		return GSS_S_BAD_SIG;
+
+	if (flags & KG2_TOKEN_FLAG_SEALED) {
+		dprintk("%s: token has unexpected sealed flag\n", __func__);
+		return GSS_S_FAILURE;
+	}
+
+	for (i = 3; i < 8; i++)
+		if (ptr[i] != 0xff)
+			return GSS_S_DEFECTIVE_TOKEN;
+
+	if (ctx->initiate)
+		cksumkey = ctx->acceptor_sign;
+	else
+		cksumkey = ctx->initiator_sign;
+
+	if (make_checksum_v2(ctx, ptr, GSS_KRB5_TOK_HDR_LEN, message_buffer, 0,
+			     cksumkey, &cksumobj))
+		return GSS_S_FAILURE;
+
+	if (memcmp(cksumobj.data, ptr + GSS_KRB5_TOK_HDR_LEN,
+				ctx->gk5e->cksumlength))
+		return GSS_S_BAD_SIG;
+
+	/* it got through unscathed.  Make sure the context is unexpired */
+	now = get_seconds();
+	if (now > ctx->endtime)
+		return GSS_S_CONTEXT_EXPIRED;
+
+	/* do sequencing checks */
+
+	seqnum = be64_to_cpup((__be64 *)ptr + 8);
+
+	return GSS_S_COMPLETE;
+}
+
 u32
 gss_verify_mic_kerberos(struct gss_ctx *gss_ctx,
 			struct xdr_buf *message_buffer,
@@ -154,6 +212,9 @@ gss_verify_mic_kerberos(struct gss_ctx *gss_ctx,
 	case ENCTYPE_DES_CBC_RAW:
 	case ENCTYPE_DES3_CBC_RAW:
 		return gss_verify_mic_v1(ctx, message_buffer, read_token);
+	case ENCTYPE_AES128_CTS_HMAC_SHA1_96:
+	case ENCTYPE_AES256_CTS_HMAC_SHA1_96:
+		return gss_verify_mic_v2(ctx, message_buffer, read_token);
 	}
 }
 
diff --git a/net/sunrpc/auth_gss/gss_krb5_wrap.c b/net/sunrpc/auth_gss/gss_krb5_wrap.c
index 1c8ebd3dbd3c..4aa46b28298c 100644
--- a/net/sunrpc/auth_gss/gss_krb5_wrap.c
+++ b/net/sunrpc/auth_gss/gss_krb5_wrap.c
@@ -340,6 +340,174 @@ gss_unwrap_kerberos_v1(struct krb5_ctx *kctx, int offset, struct xdr_buf *buf)
 	return GSS_S_COMPLETE;
 }
 
+/*
+ * We cannot currently handle tokens with rotated data.  We need a
+ * generalized routine to rotate the data in place.  It is anticipated
+ * that we won't encounter rotated data in the general case.
+ */
+static u32
+rotate_left(struct krb5_ctx *kctx, u32 offset, struct xdr_buf *buf, u16 rrc)
+{
+	unsigned int realrrc = rrc % (buf->len - offset - GSS_KRB5_TOK_HDR_LEN);
+
+	if (realrrc == 0)
+		return 0;
+
+	dprintk("%s: cannot process token with rotated data: "
+		"rrc %u, realrrc %u\n", __func__, rrc, realrrc);
+	return 1;
+}
+
+static u32
+gss_wrap_kerberos_v2(struct krb5_ctx *kctx, u32 offset,
+		     struct xdr_buf *buf, struct page **pages)
+{
+	int		blocksize;
+	u8		*ptr, *plainhdr;
+	s32		now;
+	u8		flags = 0x00;
+	__be16		*be16ptr, ec = 0;
+	__be64		*be64ptr;
+	u32		err;
+
+	dprintk("RPC:       %s\n", __func__);
+
+	if (kctx->gk5e->encrypt_v2 == NULL)
+		return GSS_S_FAILURE;
+
+	/* make room for gss token header */
+	if (xdr_extend_head(buf, offset, GSS_KRB5_TOK_HDR_LEN))
+		return GSS_S_FAILURE;
+
+	/* construct gss token header */
+	ptr = plainhdr = buf->head[0].iov_base + offset;
+	*ptr++ = (unsigned char) ((KG2_TOK_WRAP>>8) & 0xff);
+	*ptr++ = (unsigned char) (KG2_TOK_WRAP & 0xff);
+
+	if ((kctx->flags & KRB5_CTX_FLAG_INITIATOR) == 0)
+		flags |= KG2_TOKEN_FLAG_SENTBYACCEPTOR;
+	if ((kctx->flags & KRB5_CTX_FLAG_ACCEPTOR_SUBKEY) != 0)
+		flags |= KG2_TOKEN_FLAG_ACCEPTORSUBKEY;
+	/* We always do confidentiality in wrap tokens */
+	flags |= KG2_TOKEN_FLAG_SEALED;
+
+	*ptr++ = flags;
+	*ptr++ = 0xff;
+	be16ptr = (__be16 *)ptr;
+
+	blocksize = crypto_blkcipher_blocksize(kctx->acceptor_enc);
+	*be16ptr++ = cpu_to_be16(ec);
+	/* "inner" token header always uses 0 for RRC */
+	*be16ptr++ = cpu_to_be16(0);
+
+	be64ptr = (__be64 *)be16ptr;
+	spin_lock(&krb5_seq_lock);
+	*be64ptr = cpu_to_be64(kctx->seq_send64++);
+	spin_unlock(&krb5_seq_lock);
+
+	err = (*kctx->gk5e->encrypt_v2)(kctx, offset, buf, ec, pages);
+	if (err)
+		return err;
+
+	now = get_seconds();
+	return (kctx->endtime < now) ? GSS_S_CONTEXT_EXPIRED : GSS_S_COMPLETE;
+}
+
+static u32
+gss_unwrap_kerberos_v2(struct krb5_ctx *kctx, int offset, struct xdr_buf *buf)
+{
+	s32		now;
+	u64		seqnum;
+	u8		*ptr;
+	u8		flags = 0x00;
+	u16		ec, rrc;
+	int		err;
+	u32		headskip, tailskip;
+	u8		decrypted_hdr[GSS_KRB5_TOK_HDR_LEN];
+	unsigned int	movelen;
+
+
+	dprintk("RPC:       %s\n", __func__);
+
+	if (kctx->gk5e->decrypt_v2 == NULL)
+		return GSS_S_FAILURE;
+
+	ptr = buf->head[0].iov_base + offset;
+
+	if (be16_to_cpu(*((__be16 *)ptr)) != KG2_TOK_WRAP)
+		return GSS_S_DEFECTIVE_TOKEN;
+
+	flags = ptr[2];
+	if ((!kctx->initiate && (flags & KG2_TOKEN_FLAG_SENTBYACCEPTOR)) ||
+	    (kctx->initiate && !(flags & KG2_TOKEN_FLAG_SENTBYACCEPTOR)))
+		return GSS_S_BAD_SIG;
+
+	if ((flags & KG2_TOKEN_FLAG_SEALED) == 0) {
+		dprintk("%s: token missing expected sealed flag\n", __func__);
+		return GSS_S_DEFECTIVE_TOKEN;
+	}
+
+	if (ptr[3] != 0xff)
+		return GSS_S_DEFECTIVE_TOKEN;
+
+	ec = be16_to_cpup((__be16 *)(ptr + 4));
+	rrc = be16_to_cpup((__be16 *)(ptr + 6));
+
+	seqnum = be64_to_cpup((__be64 *)(ptr + 8));
+
+	if (rrc != 0) {
+		err = rotate_left(kctx, offset, buf, rrc);
+		if (err)
+			return GSS_S_FAILURE;
+	}
+
+	err = (*kctx->gk5e->decrypt_v2)(kctx, offset, buf,
+					&headskip, &tailskip);
+	if (err)
+		return GSS_S_FAILURE;
+
+	/*
+	 * Retrieve the decrypted gss token header and verify
+	 * it against the original
+	 */
+	err = read_bytes_from_xdr_buf(buf,
+				buf->len - GSS_KRB5_TOK_HDR_LEN - tailskip,
+				decrypted_hdr, GSS_KRB5_TOK_HDR_LEN);
+	if (err) {
+		dprintk("%s: error %u getting decrypted_hdr\n", __func__, err);
+		return GSS_S_FAILURE;
+	}
+	if (memcmp(ptr, decrypted_hdr, 6)
+				|| memcmp(ptr + 8, decrypted_hdr + 8, 8)) {
+		dprintk("%s: token hdr, plaintext hdr mismatch!\n", __func__);
+		return GSS_S_FAILURE;
+	}
+
+	/* do sequencing checks */
+
+	/* it got through unscathed.  Make sure the context is unexpired */
+	now = get_seconds();
+	if (now > kctx->endtime)
+		return GSS_S_CONTEXT_EXPIRED;
+
+	/*
+	 * Move the head data back to the right position in xdr_buf.
+	 * We ignore any "ec" data since it might be in the head or
+	 * the tail, and we really don't need to deal with it.
+	 * Note that buf->head[0].iov_len may indicate the available
+	 * head buffer space rather than that actually occupied.
+	 */
+	movelen = min_t(unsigned int, buf->head[0].iov_len, buf->len);
+	movelen -= offset + GSS_KRB5_TOK_HDR_LEN + headskip;
+	BUG_ON(offset + GSS_KRB5_TOK_HDR_LEN + headskip + movelen >
+							buf->head[0].iov_len);
+	memmove(ptr, ptr + GSS_KRB5_TOK_HDR_LEN + headskip, movelen);
+	buf->head[0].iov_len -= GSS_KRB5_TOK_HDR_LEN + headskip;
+	buf->len -= GSS_KRB5_TOK_HDR_LEN + headskip;
+
+	return GSS_S_COMPLETE;
+}
+
 u32
 gss_wrap_kerberos(struct gss_ctx *gctx, int offset,
 		  struct xdr_buf *buf, struct page **pages)
@@ -352,6 +520,9 @@ gss_wrap_kerberos(struct gss_ctx *gctx, int offset,
 	case ENCTYPE_DES_CBC_RAW:
 	case ENCTYPE_DES3_CBC_RAW:
 		return gss_wrap_kerberos_v1(kctx, offset, buf, pages);
+	case ENCTYPE_AES128_CTS_HMAC_SHA1_96:
+	case ENCTYPE_AES256_CTS_HMAC_SHA1_96:
+		return gss_wrap_kerberos_v2(kctx, offset, buf, pages);
 	}
 }
 
@@ -366,6 +537,9 @@ gss_unwrap_kerberos(struct gss_ctx *gctx, int offset, struct xdr_buf *buf)
 	case ENCTYPE_DES_CBC_RAW:
 	case ENCTYPE_DES3_CBC_RAW:
 		return gss_unwrap_kerberos_v1(kctx, offset, buf);
+	case ENCTYPE_AES128_CTS_HMAC_SHA1_96:
+	case ENCTYPE_AES256_CTS_HMAC_SHA1_96:
+		return gss_unwrap_kerberos_v2(kctx, offset, buf);
 	}
 }
 
-- 
cgit v1.2.2


From 934a95aa1c9c6ad77838800b79c306e982437605 Mon Sep 17 00:00:00 2001
From: Kevin Coffman <kwc@citi.umich.edu>
Date: Wed, 17 Mar 2010 13:03:00 -0400
Subject: gss_krb5: add remaining pieces to enable AES encryption support

Add the remaining pieces to enable support for Kerberos AES
encryption types.

Signed-off-by: Kevin Coffman <kwc@citi.umich.edu>
Signed-off-by: Steve Dickson <steved@redhat.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 net/sunrpc/auth_gss/gss_krb5_crypto.c | 248 ++++++++++++++++++++++++++++++++++
 net/sunrpc/auth_gss/gss_krb5_keys.c   |  30 ++++
 net/sunrpc/auth_gss/gss_krb5_mech.c   |  86 ++++++++++--
 net/sunrpc/auth_gss/gss_krb5_wrap.c   |   6 +-
 4 files changed, 358 insertions(+), 12 deletions(-)

(limited to 'net')

diff --git a/net/sunrpc/auth_gss/gss_krb5_crypto.c b/net/sunrpc/auth_gss/gss_krb5_crypto.c
index ca52ac28a537..967484a914f3 100644
--- a/net/sunrpc/auth_gss/gss_krb5_crypto.c
+++ b/net/sunrpc/auth_gss/gss_krb5_crypto.c
@@ -41,6 +41,7 @@
 #include <linux/crypto.h>
 #include <linux/highmem.h>
 #include <linux/pagemap.h>
+#include <linux/random.h>
 #include <linux/sunrpc/gss_krb5.h>
 #include <linux/sunrpc/xdr.h>
 
@@ -478,3 +479,250 @@ xdr_extend_head(struct xdr_buf *buf, unsigned int base, unsigned int shiftlen)
 
 	return 0;
 }
+
+static u32
+gss_krb5_cts_crypt(struct crypto_blkcipher *cipher, struct xdr_buf *buf,
+		   u32 offset, u8 *iv, struct page **pages, int encrypt)
+{
+	u32 ret;
+	struct scatterlist sg[1];
+	struct blkcipher_desc desc = { .tfm = cipher, .info = iv };
+	u8 data[crypto_blkcipher_blocksize(cipher) * 2];
+	struct page **save_pages;
+	u32 len = buf->len - offset;
+
+	BUG_ON(len > crypto_blkcipher_blocksize(cipher) * 2);
+
+	/*
+	 * For encryption, we want to read from the cleartext
+	 * page cache pages, and write the encrypted data to
+	 * the supplied xdr_buf pages.
+	 */
+	save_pages = buf->pages;
+	if (encrypt)
+		buf->pages = pages;
+
+	ret = read_bytes_from_xdr_buf(buf, offset, data, len);
+	buf->pages = save_pages;
+	if (ret)
+		goto out;
+
+	sg_init_one(sg, data, len);
+
+	if (encrypt)
+		ret = crypto_blkcipher_encrypt_iv(&desc, sg, sg, len);
+	else
+		ret = crypto_blkcipher_decrypt_iv(&desc, sg, sg, len);
+
+	if (ret)
+		goto out;
+
+	ret = write_bytes_to_xdr_buf(buf, offset, data, len);
+
+out:
+	return ret;
+}
+
+u32
+gss_krb5_aes_encrypt(struct krb5_ctx *kctx, u32 offset,
+		     struct xdr_buf *buf, int ec, struct page **pages)
+{
+	u32 err;
+	struct xdr_netobj hmac;
+	u8 *cksumkey;
+	u8 *ecptr;
+	struct crypto_blkcipher *cipher, *aux_cipher;
+	int blocksize;
+	struct page **save_pages;
+	int nblocks, nbytes;
+	struct encryptor_desc desc;
+	u32 cbcbytes;
+
+	if (kctx->initiate) {
+		cipher = kctx->initiator_enc;
+		aux_cipher = kctx->initiator_enc_aux;
+		cksumkey = kctx->initiator_integ;
+	} else {
+		cipher = kctx->acceptor_enc;
+		aux_cipher = kctx->acceptor_enc_aux;
+		cksumkey = kctx->acceptor_integ;
+	}
+	blocksize = crypto_blkcipher_blocksize(cipher);
+
+	/* hide the gss token header and insert the confounder */
+	offset += GSS_KRB5_TOK_HDR_LEN;
+	if (xdr_extend_head(buf, offset, blocksize))
+		return GSS_S_FAILURE;
+	gss_krb5_make_confounder(buf->head[0].iov_base + offset, blocksize);
+	offset -= GSS_KRB5_TOK_HDR_LEN;
+
+	if (buf->tail[0].iov_base != NULL) {
+		ecptr = buf->tail[0].iov_base + buf->tail[0].iov_len;
+	} else {
+		buf->tail[0].iov_base = buf->head[0].iov_base
+							+ buf->head[0].iov_len;
+		buf->tail[0].iov_len = 0;
+		ecptr = buf->tail[0].iov_base;
+	}
+
+	memset(ecptr, 'X', ec);
+	buf->tail[0].iov_len += ec;
+	buf->len += ec;
+
+	/* copy plaintext gss token header after filler (if any) */
+	memcpy(ecptr + ec, buf->head[0].iov_base + offset,
+						GSS_KRB5_TOK_HDR_LEN);
+	buf->tail[0].iov_len += GSS_KRB5_TOK_HDR_LEN;
+	buf->len += GSS_KRB5_TOK_HDR_LEN;
+
+	/* Do the HMAC */
+	hmac.len = GSS_KRB5_MAX_CKSUM_LEN;
+	hmac.data = buf->tail[0].iov_base + buf->tail[0].iov_len;
+
+	/*
+	 * When we are called, pages points to the real page cache
+	 * data -- which we can't go and encrypt!  buf->pages points
+	 * to scratch pages which we are going to send off to the
+	 * client/server.  Swap in the plaintext pages to calculate
+	 * the hmac.
+	 */
+	save_pages = buf->pages;
+	buf->pages = pages;
+
+	err = make_checksum_v2(kctx, NULL, 0, buf,
+			       offset + GSS_KRB5_TOK_HDR_LEN, cksumkey, &hmac);
+	buf->pages = save_pages;
+	if (err)
+		return GSS_S_FAILURE;
+
+	nbytes = buf->len - offset - GSS_KRB5_TOK_HDR_LEN;
+	nblocks = (nbytes + blocksize - 1) / blocksize;
+	cbcbytes = 0;
+	if (nblocks > 2)
+		cbcbytes = (nblocks - 2) * blocksize;
+
+	memset(desc.iv, 0, sizeof(desc.iv));
+
+	if (cbcbytes) {
+		desc.pos = offset + GSS_KRB5_TOK_HDR_LEN;
+		desc.fragno = 0;
+		desc.fraglen = 0;
+		desc.pages = pages;
+		desc.outbuf = buf;
+		desc.desc.info = desc.iv;
+		desc.desc.flags = 0;
+		desc.desc.tfm = aux_cipher;
+
+		sg_init_table(desc.infrags, 4);
+		sg_init_table(desc.outfrags, 4);
+
+		err = xdr_process_buf(buf, offset + GSS_KRB5_TOK_HDR_LEN,
+				      cbcbytes, encryptor, &desc);
+		if (err)
+			goto out_err;
+	}
+
+	/* Make sure IV carries forward from any CBC results. */
+	err = gss_krb5_cts_crypt(cipher, buf,
+				 offset + GSS_KRB5_TOK_HDR_LEN + cbcbytes,
+				 desc.iv, pages, 1);
+	if (err) {
+		err = GSS_S_FAILURE;
+		goto out_err;
+	}
+
+	/* Now update buf to account for HMAC */
+	buf->tail[0].iov_len += kctx->gk5e->cksumlength;
+	buf->len += kctx->gk5e->cksumlength;
+
+out_err:
+	if (err)
+		err = GSS_S_FAILURE;
+	return err;
+}
+
+u32
+gss_krb5_aes_decrypt(struct krb5_ctx *kctx, u32 offset, struct xdr_buf *buf,
+		     u32 *headskip, u32 *tailskip)
+{
+	struct xdr_buf subbuf;
+	u32 ret = 0;
+	u8 *cksum_key;
+	struct crypto_blkcipher *cipher, *aux_cipher;
+	struct xdr_netobj our_hmac_obj;
+	u8 our_hmac[GSS_KRB5_MAX_CKSUM_LEN];
+	u8 pkt_hmac[GSS_KRB5_MAX_CKSUM_LEN];
+	int nblocks, blocksize, cbcbytes;
+	struct decryptor_desc desc;
+
+	if (kctx->initiate) {
+		cipher = kctx->acceptor_enc;
+		aux_cipher = kctx->acceptor_enc_aux;
+		cksum_key = kctx->acceptor_integ;
+	} else {
+		cipher = kctx->initiator_enc;
+		aux_cipher = kctx->initiator_enc_aux;
+		cksum_key = kctx->initiator_integ;
+	}
+	blocksize = crypto_blkcipher_blocksize(cipher);
+
+
+	/* create a segment skipping the header and leaving out the checksum */
+	xdr_buf_subsegment(buf, &subbuf, offset + GSS_KRB5_TOK_HDR_LEN,
+				    (buf->len - offset - GSS_KRB5_TOK_HDR_LEN -
+				     kctx->gk5e->cksumlength));
+
+	nblocks = (subbuf.len + blocksize - 1) / blocksize;
+
+	cbcbytes = 0;
+	if (nblocks > 2)
+		cbcbytes = (nblocks - 2) * blocksize;
+
+	memset(desc.iv, 0, sizeof(desc.iv));
+
+	if (cbcbytes) {
+		desc.fragno = 0;
+		desc.fraglen = 0;
+		desc.desc.info = desc.iv;
+		desc.desc.flags = 0;
+		desc.desc.tfm = aux_cipher;
+
+		sg_init_table(desc.frags, 4);
+
+		ret = xdr_process_buf(&subbuf, 0, cbcbytes, decryptor, &desc);
+		if (ret)
+			goto out_err;
+	}
+
+	/* Make sure IV carries forward from any CBC results. */
+	ret = gss_krb5_cts_crypt(cipher, &subbuf, cbcbytes, desc.iv, NULL, 0);
+	if (ret)
+		goto out_err;
+
+
+	/* Calculate our hmac over the plaintext data */
+	our_hmac_obj.len = sizeof(our_hmac);
+	our_hmac_obj.data = our_hmac;
+
+	ret = make_checksum_v2(kctx, NULL, 0, &subbuf, 0,
+			       cksum_key, &our_hmac_obj);
+	if (ret)
+		goto out_err;
+
+	/* Get the packet's hmac value */
+	ret = read_bytes_from_xdr_buf(buf, buf->len - kctx->gk5e->cksumlength,
+				      pkt_hmac, kctx->gk5e->cksumlength);
+	if (ret)
+		goto out_err;
+
+	if (memcmp(pkt_hmac, our_hmac, kctx->gk5e->cksumlength) != 0) {
+		ret = GSS_S_BAD_SIG;
+		goto out_err;
+	}
+	*headskip = crypto_blkcipher_blocksize(cipher);
+	*tailskip = kctx->gk5e->cksumlength;
+out_err:
+	if (ret && ret != GSS_S_BAD_SIG)
+		ret = GSS_S_FAILURE;
+	return ret;
+}
diff --git a/net/sunrpc/auth_gss/gss_krb5_keys.c b/net/sunrpc/auth_gss/gss_krb5_keys.c
index d54668790f0c..33b87f04b30b 100644
--- a/net/sunrpc/auth_gss/gss_krb5_keys.c
+++ b/net/sunrpc/auth_gss/gss_krb5_keys.c
@@ -303,3 +303,33 @@ u32 gss_krb5_des3_make_key(const struct gss_krb5_enctype *gk5e,
 err_out:
 	return ret;
 }
+
+/*
+ * This is the aes key derivation postprocess function
+ */
+u32 gss_krb5_aes_make_key(const struct gss_krb5_enctype *gk5e,
+			  struct xdr_netobj *randombits,
+			  struct xdr_netobj *key)
+{
+	u32 ret = EINVAL;
+
+	if (key->len != 16 && key->len != 32) {
+		dprintk("%s: key->len is %d\n", __func__, key->len);
+		goto err_out;
+	}
+	if (randombits->len != 16 && randombits->len != 32) {
+		dprintk("%s: randombits->len is %d\n",
+			__func__, randombits->len);
+		goto err_out;
+	}
+	if (randombits->len != key->len) {
+		dprintk("%s: randombits->len is %d, key->len is %d\n",
+			__func__, randombits->len, key->len);
+		goto err_out;
+	}
+	memcpy(key->data, randombits->data, key->len);
+	ret = 0;
+err_out:
+	return ret;
+}
+
diff --git a/net/sunrpc/auth_gss/gss_krb5_mech.c b/net/sunrpc/auth_gss/gss_krb5_mech.c
index ce80f996758a..694ad77c86bf 100644
--- a/net/sunrpc/auth_gss/gss_krb5_mech.c
+++ b/net/sunrpc/auth_gss/gss_krb5_mech.c
@@ -91,6 +91,50 @@ static const struct gss_krb5_enctype supported_gss_krb5_enctypes[] = {
 	  .cksumlength = 20,
 	  .keyed_cksum = 1,
 	},
+	/*
+	 * AES128
+	 */
+	{
+	  .etype = ENCTYPE_AES128_CTS_HMAC_SHA1_96,
+	  .ctype = CKSUMTYPE_HMAC_SHA1_96_AES128,
+	  .name = "aes128-cts",
+	  .encrypt_name = "cts(cbc(aes))",
+	  .cksum_name = "hmac(sha1)",
+	  .encrypt = krb5_encrypt,
+	  .decrypt = krb5_decrypt,
+	  .mk_key = gss_krb5_aes_make_key,
+	  .encrypt_v2 = gss_krb5_aes_encrypt,
+	  .decrypt_v2 = gss_krb5_aes_decrypt,
+	  .signalg = -1,
+	  .sealalg = -1,
+	  .keybytes = 16,
+	  .keylength = 16,
+	  .blocksize = 16,
+	  .cksumlength = 12,
+	  .keyed_cksum = 1,
+	},
+	/*
+	 * AES256
+	 */
+	{
+	  .etype = ENCTYPE_AES256_CTS_HMAC_SHA1_96,
+	  .ctype = CKSUMTYPE_HMAC_SHA1_96_AES256,
+	  .name = "aes256-cts",
+	  .encrypt_name = "cts(cbc(aes))",
+	  .cksum_name = "hmac(sha1)",
+	  .encrypt = krb5_encrypt,
+	  .decrypt = krb5_decrypt,
+	  .mk_key = gss_krb5_aes_make_key,
+	  .encrypt_v2 = gss_krb5_aes_encrypt,
+	  .decrypt_v2 = gss_krb5_aes_decrypt,
+	  .signalg = -1,
+	  .sealalg = -1,
+	  .keybytes = 32,
+	  .keylength = 32,
+	  .blocksize = 16,
+	  .cksumlength = 12,
+	  .keyed_cksum = 1,
+	},
 };
 
 static const int num_supported_enctypes =
@@ -270,20 +314,19 @@ out_err:
 }
 
 struct crypto_blkcipher *
-context_v2_alloc_cipher(struct krb5_ctx *ctx, u8 *key)
+context_v2_alloc_cipher(struct krb5_ctx *ctx, const char *cname, u8 *key)
 {
 	struct crypto_blkcipher *cp;
 
-	cp = crypto_alloc_blkcipher(ctx->gk5e->encrypt_name,
-					0, CRYPTO_ALG_ASYNC);
+	cp = crypto_alloc_blkcipher(cname, 0, CRYPTO_ALG_ASYNC);
 	if (IS_ERR(cp)) {
 		dprintk("gss_kerberos_mech: unable to initialize "
-			"crypto algorithm %s\n", ctx->gk5e->encrypt_name);
+			"crypto algorithm %s\n", cname);
 		return NULL;
 	}
 	if (crypto_blkcipher_setkey(cp, key, ctx->gk5e->keylength)) {
 		dprintk("gss_kerberos_mech: error setting key for "
-			"crypto algorithm %s\n", ctx->gk5e->encrypt_name);
+			"crypto algorithm %s\n", cname);
 		crypto_free_blkcipher(cp);
 		return NULL;
 	}
@@ -315,11 +358,13 @@ context_derive_keys_des3(struct krb5_ctx *ctx, u8 *rawkey, u32 keylen)
 	keyout.len = keylen;
 
 	/* seq uses the raw key */
-	ctx->seq = context_v2_alloc_cipher(ctx, rawkey);
+	ctx->seq = context_v2_alloc_cipher(ctx, ctx->gk5e->encrypt_name,
+					   rawkey);
 	if (ctx->seq == NULL)
 		goto out_err;
 
-	ctx->enc = context_v2_alloc_cipher(ctx, rawkey);
+	ctx->enc = context_v2_alloc_cipher(ctx, ctx->gk5e->encrypt_name,
+					   rawkey);
 	if (ctx->enc == NULL)
 		goto out_free_seq;
 
@@ -366,7 +411,9 @@ context_derive_keys_new(struct krb5_ctx *ctx, u8 *rawkey, u32 keylen)
 			__func__, err);
 		goto out_err;
 	}
-	ctx->initiator_enc = context_v2_alloc_cipher(ctx, ctx->initiator_seal);
+	ctx->initiator_enc = context_v2_alloc_cipher(ctx,
+						     ctx->gk5e->encrypt_name,
+						     ctx->initiator_seal);
 	if (ctx->initiator_enc == NULL)
 		goto out_err;
 
@@ -379,7 +426,9 @@ context_derive_keys_new(struct krb5_ctx *ctx, u8 *rawkey, u32 keylen)
 			__func__, err);
 		goto out_free_initiator_enc;
 	}
-	ctx->acceptor_enc = context_v2_alloc_cipher(ctx, ctx->acceptor_seal);
+	ctx->acceptor_enc = context_v2_alloc_cipher(ctx,
+						    ctx->gk5e->encrypt_name,
+						    ctx->acceptor_seal);
 	if (ctx->acceptor_enc == NULL)
 		goto out_free_initiator_enc;
 
@@ -423,6 +472,23 @@ context_derive_keys_new(struct krb5_ctx *ctx, u8 *rawkey, u32 keylen)
 		goto out_free_acceptor_enc;
 	}
 
+	switch (ctx->enctype) {
+	case ENCTYPE_AES128_CTS_HMAC_SHA1_96:
+	case ENCTYPE_AES256_CTS_HMAC_SHA1_96:
+		ctx->initiator_enc_aux =
+			context_v2_alloc_cipher(ctx, "cbc(aes)",
+						ctx->initiator_seal);
+		if (ctx->initiator_enc_aux == NULL)
+			goto out_free_acceptor_enc;
+		ctx->acceptor_enc_aux =
+			context_v2_alloc_cipher(ctx, "cbc(aes)",
+						ctx->acceptor_seal);
+		if (ctx->acceptor_enc_aux == NULL) {
+			crypto_free_blkcipher(ctx->initiator_enc_aux);
+			goto out_free_acceptor_enc;
+		}
+	}
+
 	return 0;
 
 out_free_acceptor_enc:
@@ -537,6 +603,8 @@ gss_delete_sec_context_kerberos(void *internal_ctx) {
 	crypto_free_blkcipher(kctx->enc);
 	crypto_free_blkcipher(kctx->acceptor_enc);
 	crypto_free_blkcipher(kctx->initiator_enc);
+	crypto_free_blkcipher(kctx->acceptor_enc_aux);
+	crypto_free_blkcipher(kctx->initiator_enc_aux);
 	kfree(kctx->mech_used.data);
 	kfree(kctx);
 }
diff --git a/net/sunrpc/auth_gss/gss_krb5_wrap.c b/net/sunrpc/auth_gss/gss_krb5_wrap.c
index 4aa46b28298c..a1a3585fa761 100644
--- a/net/sunrpc/auth_gss/gss_krb5_wrap.c
+++ b/net/sunrpc/auth_gss/gss_krb5_wrap.c
@@ -113,8 +113,8 @@ out:
 	return 0;
 }
 
-static void
-make_confounder(char *p, u32 conflen)
+void
+gss_krb5_make_confounder(char *p, u32 conflen)
 {
 	static u64 i = 0;
 	u64 *q = (u64 *)p;
@@ -204,7 +204,7 @@ gss_wrap_kerberos_v1(struct krb5_ctx *kctx, int offset,
 	memset(ptr + 4, 0xff, 4);
 	*(__be16 *)(ptr + 4) = cpu_to_le16(kctx->gk5e->sealalg);
 
-	make_confounder(msg_start, blocksize);
+	gss_krb5_make_confounder(msg_start, blocksize);
 
 	if (kctx->gk5e->keyed_cksum)
 		cksumkey = kctx->cksum;
-- 
cgit v1.2.2


From bf6d359c508cf83401c942262a9749752598394d Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Thu, 8 Apr 2010 14:23:06 -0400
Subject: gss_krb5: Advertise AES enctype support in the rpcsec_gss/krb5 upcall

Update upcall info indicating which Kerberos enctypes
the kernel supports

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 net/sunrpc/auth_gss/gss_krb5_mech.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/sunrpc/auth_gss/gss_krb5_mech.c b/net/sunrpc/auth_gss/gss_krb5_mech.c
index 694ad77c86bf..506a2e7d4fad 100644
--- a/net/sunrpc/auth_gss/gss_krb5_mech.c
+++ b/net/sunrpc/auth_gss/gss_krb5_mech.c
@@ -643,7 +643,7 @@ static struct gss_api_mech gss_kerberos_mech = {
 	.gm_ops		= &gss_kerberos_ops,
 	.gm_pf_num	= ARRAY_SIZE(gss_kerberos_pfs),
 	.gm_pfs		= gss_kerberos_pfs,
-	.gm_upcall_enctypes = "enctypes=16,3,1,2 ",
+	.gm_upcall_enctypes = "enctypes=18,17,16,3,1,2 ",
 };
 
 static int __init init_kerberos_module(void)
-- 
cgit v1.2.2


From 8b23707612cffdba694dcd18aa8a018918aa86dc Mon Sep 17 00:00:00 2001
From: Kevin Coffman <kwc@citi.umich.edu>
Date: Wed, 17 Mar 2010 13:03:02 -0400
Subject: gssd_krb5: arcfour-hmac support

For arcfour-hmac support, the make_checksum function needs a usage
field to correctly calculate the checksum differently for MIC and
WRAP tokens.

Signed-off-by: Kevin Coffman <kwc@citi.umich.edu>
Signed-off-by: Steve Dickson <steved@redhat.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 net/sunrpc/auth_gss/gss_krb5_crypto.c | 15 +++++++++++----
 net/sunrpc/auth_gss/gss_krb5_seal.c   | 13 +++++++++----
 net/sunrpc/auth_gss/gss_krb5_unseal.c | 12 ++++++++----
 net/sunrpc/auth_gss/gss_krb5_wrap.c   |  4 ++--
 4 files changed, 30 insertions(+), 14 deletions(-)

(limited to 'net')

diff --git a/net/sunrpc/auth_gss/gss_krb5_crypto.c b/net/sunrpc/auth_gss/gss_krb5_crypto.c
index 967484a914f3..33ae7023cf3a 100644
--- a/net/sunrpc/auth_gss/gss_krb5_crypto.c
+++ b/net/sunrpc/auth_gss/gss_krb5_crypto.c
@@ -132,7 +132,7 @@ checksummer(struct scatterlist *sg, void *data)
 u32
 make_checksum(struct krb5_ctx *kctx, char *header, int hdrlen,
 	      struct xdr_buf *body, int body_offset, u8 *cksumkey,
-	      struct xdr_netobj *cksumout)
+	      unsigned int usage, struct xdr_netobj *cksumout)
 {
 	struct hash_desc                desc;
 	struct scatterlist              sg[1];
@@ -208,7 +208,7 @@ out:
 u32
 make_checksum_v2(struct krb5_ctx *kctx, char *header, int hdrlen,
 		 struct xdr_buf *body, int body_offset, u8 *cksumkey,
-		 struct xdr_netobj *cksumout)
+		 unsigned int usage, struct xdr_netobj *cksumout)
 {
 	struct hash_desc desc;
 	struct scatterlist sg[1];
@@ -537,15 +537,18 @@ gss_krb5_aes_encrypt(struct krb5_ctx *kctx, u32 offset,
 	int nblocks, nbytes;
 	struct encryptor_desc desc;
 	u32 cbcbytes;
+	unsigned int usage;
 
 	if (kctx->initiate) {
 		cipher = kctx->initiator_enc;
 		aux_cipher = kctx->initiator_enc_aux;
 		cksumkey = kctx->initiator_integ;
+		usage = KG_USAGE_INITIATOR_SEAL;
 	} else {
 		cipher = kctx->acceptor_enc;
 		aux_cipher = kctx->acceptor_enc_aux;
 		cksumkey = kctx->acceptor_integ;
+		usage = KG_USAGE_ACCEPTOR_SEAL;
 	}
 	blocksize = crypto_blkcipher_blocksize(cipher);
 
@@ -590,7 +593,8 @@ gss_krb5_aes_encrypt(struct krb5_ctx *kctx, u32 offset,
 	buf->pages = pages;
 
 	err = make_checksum_v2(kctx, NULL, 0, buf,
-			       offset + GSS_KRB5_TOK_HDR_LEN, cksumkey, &hmac);
+			       offset + GSS_KRB5_TOK_HDR_LEN,
+			       cksumkey, usage, &hmac);
 	buf->pages = save_pages;
 	if (err)
 		return GSS_S_FAILURE;
@@ -654,15 +658,18 @@ gss_krb5_aes_decrypt(struct krb5_ctx *kctx, u32 offset, struct xdr_buf *buf,
 	u8 pkt_hmac[GSS_KRB5_MAX_CKSUM_LEN];
 	int nblocks, blocksize, cbcbytes;
 	struct decryptor_desc desc;
+	unsigned int usage;
 
 	if (kctx->initiate) {
 		cipher = kctx->acceptor_enc;
 		aux_cipher = kctx->acceptor_enc_aux;
 		cksum_key = kctx->acceptor_integ;
+		usage = KG_USAGE_ACCEPTOR_SEAL;
 	} else {
 		cipher = kctx->initiator_enc;
 		aux_cipher = kctx->initiator_enc_aux;
 		cksum_key = kctx->initiator_integ;
+		usage = KG_USAGE_INITIATOR_SEAL;
 	}
 	blocksize = crypto_blkcipher_blocksize(cipher);
 
@@ -705,7 +712,7 @@ gss_krb5_aes_decrypt(struct krb5_ctx *kctx, u32 offset, struct xdr_buf *buf,
 	our_hmac_obj.data = our_hmac;
 
 	ret = make_checksum_v2(kctx, NULL, 0, &subbuf, 0,
-			       cksum_key, &our_hmac_obj);
+			       cksum_key, usage, &our_hmac_obj);
 	if (ret)
 		goto out_err;
 
diff --git a/net/sunrpc/auth_gss/gss_krb5_seal.c b/net/sunrpc/auth_gss/gss_krb5_seal.c
index 477a546d19bb..e22fed3d9a1b 100644
--- a/net/sunrpc/auth_gss/gss_krb5_seal.c
+++ b/net/sunrpc/auth_gss/gss_krb5_seal.c
@@ -142,7 +142,8 @@ gss_get_mic_v1(struct krb5_ctx *ctx, struct xdr_buf *text,
 	else
 		cksumkey = NULL;
 
-	if (make_checksum(ctx, ptr, 8, text, 0, cksumkey, &md5cksum))
+	if (make_checksum(ctx, ptr, 8, text, 0, cksumkey,
+			  KG_USAGE_SIGN, &md5cksum))
 		return GSS_S_FAILURE;
 
 	memcpy(ptr + GSS_KRB5_TOK_HDR_LEN, md5cksum.data, md5cksum.len);
@@ -170,6 +171,7 @@ gss_get_mic_v2(struct krb5_ctx *ctx, struct xdr_buf *text,
 	s32 now;
 	u64 seq_send;
 	u8 *cksumkey;
+	unsigned int cksum_usage;
 
 	dprintk("RPC:       %s\n", __func__);
 
@@ -182,13 +184,16 @@ gss_get_mic_v2(struct krb5_ctx *ctx, struct xdr_buf *text,
 	spin_unlock(&krb5_seq_lock);
 	*((u64 *)(krb5_hdr + 8)) = cpu_to_be64(seq_send);
 
-	if (ctx->initiate)
+	if (ctx->initiate) {
 		cksumkey = ctx->initiator_sign;
-	else
+		cksum_usage = KG_USAGE_INITIATOR_SIGN;
+	} else {
 		cksumkey = ctx->acceptor_sign;
+		cksum_usage = KG_USAGE_ACCEPTOR_SIGN;
+	}
 
 	if (make_checksum_v2(ctx, krb5_hdr, GSS_KRB5_TOK_HDR_LEN,
-			     text, 0, cksumkey, &cksumobj))
+			     text, 0, cksumkey, cksum_usage, &cksumobj))
 		return GSS_S_FAILURE;
 
 	memcpy(krb5_hdr + GSS_KRB5_TOK_HDR_LEN, cksumobj.data, cksumobj.len);
diff --git a/net/sunrpc/auth_gss/gss_krb5_unseal.c b/net/sunrpc/auth_gss/gss_krb5_unseal.c
index 4ede4cc4391f..ef91366e3dea 100644
--- a/net/sunrpc/auth_gss/gss_krb5_unseal.c
+++ b/net/sunrpc/auth_gss/gss_krb5_unseal.c
@@ -115,7 +115,7 @@ gss_verify_mic_v1(struct krb5_ctx *ctx,
 		cksumkey = NULL;
 
 	if (make_checksum(ctx, ptr, 8, message_buffer, 0,
-			  cksumkey, &md5cksum))
+			  cksumkey, KG_USAGE_SIGN, &md5cksum))
 		return GSS_S_FAILURE;
 
 	if (memcmp(md5cksum.data, ptr + GSS_KRB5_TOK_HDR_LEN,
@@ -154,6 +154,7 @@ gss_verify_mic_v2(struct krb5_ctx *ctx,
 	u8 *cksumkey;
 	u8 flags;
 	int i;
+	unsigned int cksum_usage;
 
 	dprintk("RPC:       %s\n", __func__);
 
@@ -174,13 +175,16 @@ gss_verify_mic_v2(struct krb5_ctx *ctx,
 		if (ptr[i] != 0xff)
 			return GSS_S_DEFECTIVE_TOKEN;
 
-	if (ctx->initiate)
+	if (ctx->initiate) {
 		cksumkey = ctx->acceptor_sign;
-	else
+		cksum_usage = KG_USAGE_ACCEPTOR_SIGN;
+	} else {
 		cksumkey = ctx->initiator_sign;
+		cksum_usage = KG_USAGE_INITIATOR_SIGN;
+	}
 
 	if (make_checksum_v2(ctx, ptr, GSS_KRB5_TOK_HDR_LEN, message_buffer, 0,
-			     cksumkey, &cksumobj))
+			     cksumkey, cksum_usage, &cksumobj))
 		return GSS_S_FAILURE;
 
 	if (memcmp(cksumobj.data, ptr + GSS_KRB5_TOK_HDR_LEN,
diff --git a/net/sunrpc/auth_gss/gss_krb5_wrap.c b/net/sunrpc/auth_gss/gss_krb5_wrap.c
index a1a3585fa761..097cc27494cc 100644
--- a/net/sunrpc/auth_gss/gss_krb5_wrap.c
+++ b/net/sunrpc/auth_gss/gss_krb5_wrap.c
@@ -215,7 +215,7 @@ gss_wrap_kerberos_v1(struct krb5_ctx *kctx, int offset,
 	tmp_pages = buf->pages;
 	buf->pages = pages;
 	if (make_checksum(kctx, ptr, 8, buf, offset + headlen - blocksize,
-					cksumkey, &md5cksum))
+					cksumkey, KG_USAGE_SEAL, &md5cksum))
 		return GSS_S_FAILURE;
 	buf->pages = tmp_pages;
 
@@ -298,7 +298,7 @@ gss_unwrap_kerberos_v1(struct krb5_ctx *kctx, int offset, struct xdr_buf *buf)
 		cksumkey = NULL;
 
 	if (make_checksum(kctx, ptr, 8, buf, crypt_offset,
-						cksumkey, &md5cksum))
+					cksumkey, KG_USAGE_SEAL, &md5cksum))
 		return GSS_S_FAILURE;
 
 	if (memcmp(md5cksum.data, ptr + GSS_KRB5_TOK_HDR_LEN,
-- 
cgit v1.2.2


From fc263a917afad3bda7b823a6edc803a40e7f6015 Mon Sep 17 00:00:00 2001
From: Kevin Coffman <kwc@citi.umich.edu>
Date: Wed, 17 Mar 2010 13:03:03 -0400
Subject: gss_krb5: Save the raw session key in the context

This is needed for deriving arcfour-hmac keys "on the fly"
using the sequence number or checksu

Signed-off-by: Kevin Coffman <kwc@citi.umich.edu>
Signed-off-by: Steve Dickson <steved@redhat.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 net/sunrpc/auth_gss/gss_krb5_mech.c | 27 +++++++++++++--------------
 1 file changed, 13 insertions(+), 14 deletions(-)

(limited to 'net')

diff --git a/net/sunrpc/auth_gss/gss_krb5_mech.c b/net/sunrpc/auth_gss/gss_krb5_mech.c
index 506a2e7d4fad..893fad71e306 100644
--- a/net/sunrpc/auth_gss/gss_krb5_mech.c
+++ b/net/sunrpc/auth_gss/gss_krb5_mech.c
@@ -344,7 +344,7 @@ set_cdata(u8 cdata[GSS_KRB5_K5CLENGTH], u32 usage, u8 seed)
 }
 
 static int
-context_derive_keys_des3(struct krb5_ctx *ctx, u8 *rawkey, u32 keylen)
+context_derive_keys_des3(struct krb5_ctx *ctx)
 {
 	struct xdr_netobj c, keyin, keyout;
 	u8 cdata[GSS_KRB5_K5CLENGTH];
@@ -353,18 +353,18 @@ context_derive_keys_des3(struct krb5_ctx *ctx, u8 *rawkey, u32 keylen)
 	c.len = GSS_KRB5_K5CLENGTH;
 	c.data = cdata;
 
-	keyin.data = rawkey;
-	keyin.len = keylen;
-	keyout.len = keylen;
+	keyin.data = ctx->Ksess;
+	keyin.len = ctx->gk5e->keylength;
+	keyout.len = ctx->gk5e->keylength;
 
 	/* seq uses the raw key */
 	ctx->seq = context_v2_alloc_cipher(ctx, ctx->gk5e->encrypt_name,
-					   rawkey);
+					   ctx->Ksess);
 	if (ctx->seq == NULL)
 		goto out_err;
 
 	ctx->enc = context_v2_alloc_cipher(ctx, ctx->gk5e->encrypt_name,
-					   rawkey);
+					   ctx->Ksess);
 	if (ctx->enc == NULL)
 		goto out_free_seq;
 
@@ -389,7 +389,7 @@ out_err:
 }
 
 static int
-context_derive_keys_new(struct krb5_ctx *ctx, u8 *rawkey, u32 keylen)
+context_derive_keys_new(struct krb5_ctx *ctx)
 {
 	struct xdr_netobj c, keyin, keyout;
 	u8 cdata[GSS_KRB5_K5CLENGTH];
@@ -398,9 +398,9 @@ context_derive_keys_new(struct krb5_ctx *ctx, u8 *rawkey, u32 keylen)
 	c.len = GSS_KRB5_K5CLENGTH;
 	c.data = cdata;
 
-	keyin.data = rawkey;
-	keyin.len = keylen;
-	keyout.len = keylen;
+	keyin.data = ctx->Ksess;
+	keyin.len = ctx->gk5e->keylength;
+	keyout.len = ctx->gk5e->keylength;
 
 	/* initiator seal encryption */
 	set_cdata(cdata, KG_USAGE_INITIATOR_SEAL, KEY_USAGE_SEED_ENCRYPTION);
@@ -502,7 +502,6 @@ out_err:
 static int
 gss_import_v2_context(const void *p, const void *end, struct krb5_ctx *ctx)
 {
-	u8 rawkey[GSS_KRB5_MAX_KEYLEN];
 	int keylen;
 
 	p = simple_get_bytes(p, end, &ctx->flags, sizeof(ctx->flags));
@@ -538,7 +537,7 @@ gss_import_v2_context(const void *p, const void *end, struct krb5_ctx *ctx)
 	}
 	keylen = ctx->gk5e->keylength;
 
-	p = simple_get_bytes(p, end, rawkey, keylen);
+	p = simple_get_bytes(p, end, ctx->Ksess, keylen);
 	if (IS_ERR(p))
 		goto out_err;
 
@@ -557,10 +556,10 @@ gss_import_v2_context(const void *p, const void *end, struct krb5_ctx *ctx)
 
 	switch (ctx->enctype) {
 	case ENCTYPE_DES3_CBC_RAW:
-		return context_derive_keys_des3(ctx, rawkey, keylen);
+		return context_derive_keys_des3(ctx);
 	case ENCTYPE_AES128_CTS_HMAC_SHA1_96:
 	case ENCTYPE_AES256_CTS_HMAC_SHA1_96:
-		return context_derive_keys_new(ctx, rawkey, keylen);
+		return context_derive_keys_new(ctx);
 	default:
 		return -EINVAL;
 	}
-- 
cgit v1.2.2


From 1dbd9029f3024d058da1cf6c6658c28aac2e4e1c Mon Sep 17 00:00:00 2001
From: Kevin Coffman <kwc@citi.umich.edu>
Date: Wed, 17 Mar 2010 13:03:04 -0400
Subject: gssd_krb5: More arcfour-hmac support

For the arcfour-hmac support, the make_seq_num and get_seq_num
functions need access to the kerberos context structure.
This will be used in a later patch.

Signed-off-by: Kevin Coffman <kwc@citi.umich.edu>
Signed-off-by: Steve Dickson <steved@redhat.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 net/sunrpc/auth_gss/gss_krb5_seal.c   | 5 ++---
 net/sunrpc/auth_gss/gss_krb5_seqnum.c | 6 ++++--
 net/sunrpc/auth_gss/gss_krb5_unseal.c | 3 ++-
 net/sunrpc/auth_gss/gss_krb5_wrap.c   | 6 +++---
 4 files changed, 11 insertions(+), 9 deletions(-)

(limited to 'net')

diff --git a/net/sunrpc/auth_gss/gss_krb5_seal.c b/net/sunrpc/auth_gss/gss_krb5_seal.c
index e22fed3d9a1b..36fe487d93d2 100644
--- a/net/sunrpc/auth_gss/gss_krb5_seal.c
+++ b/net/sunrpc/auth_gss/gss_krb5_seal.c
@@ -152,9 +152,8 @@ gss_get_mic_v1(struct krb5_ctx *ctx, struct xdr_buf *text,
 	seq_send = ctx->seq_send++;
 	spin_unlock(&krb5_seq_lock);
 
-	if (krb5_make_seq_num(ctx->seq, ctx->initiate ? 0 : 0xff,
-			      seq_send, ptr + GSS_KRB5_TOK_HDR_LEN,
-			      ptr + 8))
+	if (krb5_make_seq_num(ctx, ctx->seq, ctx->initiate ? 0 : 0xff,
+			      seq_send, ptr + GSS_KRB5_TOK_HDR_LEN, ptr + 8))
 		return GSS_S_FAILURE;
 
 	return (ctx->endtime < now) ? GSS_S_CONTEXT_EXPIRED : GSS_S_COMPLETE;
diff --git a/net/sunrpc/auth_gss/gss_krb5_seqnum.c b/net/sunrpc/auth_gss/gss_krb5_seqnum.c
index 6331cd6866ec..83b593084976 100644
--- a/net/sunrpc/auth_gss/gss_krb5_seqnum.c
+++ b/net/sunrpc/auth_gss/gss_krb5_seqnum.c
@@ -40,7 +40,8 @@
 #endif
 
 s32
-krb5_make_seq_num(struct crypto_blkcipher *key,
+krb5_make_seq_num(struct krb5_ctx *kctx,
+		struct crypto_blkcipher *key,
 		int direction,
 		u32 seqnum,
 		unsigned char *cksum, unsigned char *buf)
@@ -61,13 +62,14 @@ krb5_make_seq_num(struct crypto_blkcipher *key,
 }
 
 s32
-krb5_get_seq_num(struct crypto_blkcipher *key,
+krb5_get_seq_num(struct krb5_ctx *kctx,
 	       unsigned char *cksum,
 	       unsigned char *buf,
 	       int *direction, u32 *seqnum)
 {
 	s32 code;
 	unsigned char plain[8];
+	struct crypto_blkcipher *key = kctx->seq;
 
 	dprintk("RPC:       krb5_get_seq_num:\n");
 
diff --git a/net/sunrpc/auth_gss/gss_krb5_unseal.c b/net/sunrpc/auth_gss/gss_krb5_unseal.c
index ef91366e3dea..97eb91b8c70c 100644
--- a/net/sunrpc/auth_gss/gss_krb5_unseal.c
+++ b/net/sunrpc/auth_gss/gss_krb5_unseal.c
@@ -131,7 +131,8 @@ gss_verify_mic_v1(struct krb5_ctx *ctx,
 
 	/* do sequencing checks */
 
-	if (krb5_get_seq_num(ctx->seq, ptr + GSS_KRB5_TOK_HDR_LEN, ptr + 8, &direction, &seqnum))
+	if (krb5_get_seq_num(ctx, ptr + GSS_KRB5_TOK_HDR_LEN, ptr + 8,
+			     &direction, &seqnum))
 		return GSS_S_FAILURE;
 
 	if ((ctx->initiate && direction != 0xff) ||
diff --git a/net/sunrpc/auth_gss/gss_krb5_wrap.c b/net/sunrpc/auth_gss/gss_krb5_wrap.c
index 097cc27494cc..a95e7e0ac0e3 100644
--- a/net/sunrpc/auth_gss/gss_krb5_wrap.c
+++ b/net/sunrpc/auth_gss/gss_krb5_wrap.c
@@ -227,7 +227,7 @@ gss_wrap_kerberos_v1(struct krb5_ctx *kctx, int offset,
 
 	/* XXX would probably be more efficient to compute checksum
 	 * and encrypt at the same time: */
-	if ((krb5_make_seq_num(kctx->seq, kctx->initiate ? 0 : 0xff,
+	if ((krb5_make_seq_num(kctx, kctx->seq, kctx->initiate ? 0 : 0xff,
 			       seq_send, ptr + GSS_KRB5_TOK_HDR_LEN, ptr + 8)))
 		return GSS_S_FAILURE;
 
@@ -314,8 +314,8 @@ gss_unwrap_kerberos_v1(struct krb5_ctx *kctx, int offset, struct xdr_buf *buf)
 
 	/* do sequencing checks */
 
-	if (krb5_get_seq_num(kctx->seq, ptr + GSS_KRB5_TOK_HDR_LEN, ptr + 8,
-				    &direction, &seqnum))
+	if (krb5_get_seq_num(kctx, ptr + GSS_KRB5_TOK_HDR_LEN,
+				    ptr + 8, &direction, &seqnum))
 		return GSS_S_BAD_SIG;
 
 	if ((kctx->initiate && direction != 0xff) ||
-- 
cgit v1.2.2


From 5af46547ec451918f3ba51efe59b317d33adf701 Mon Sep 17 00:00:00 2001
From: Kevin Coffman <kwc@citi.umich.edu>
Date: Wed, 17 Mar 2010 13:03:05 -0400
Subject: gss_krb5: Use confounder length in wrap code

All encryption types use a confounder at the beginning of the
wrap token.  In all encryption types except arcfour-hmac, the
confounder is the same as the blocksize.  arcfour-hmac has a
blocksize of one, but uses an eight byte confounder.

Add an entry to the crypto framework definitions for the
confounder length and change the wrap/unwrap code to use
the confounder length rather than assuming it is always
the blocksize.

Signed-off-by: Kevin Coffman <kwc@citi.umich.edu>
Signed-off-by: Steve Dickson <steved@redhat.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 net/sunrpc/auth_gss/gss_krb5_crypto.c |  6 +++---
 net/sunrpc/auth_gss/gss_krb5_mech.c   |  4 ++++
 net/sunrpc/auth_gss/gss_krb5_wrap.c   | 12 +++++++-----
 3 files changed, 14 insertions(+), 8 deletions(-)

(limited to 'net')

diff --git a/net/sunrpc/auth_gss/gss_krb5_crypto.c b/net/sunrpc/auth_gss/gss_krb5_crypto.c
index 33ae7023cf3a..ed4106a3daf2 100644
--- a/net/sunrpc/auth_gss/gss_krb5_crypto.c
+++ b/net/sunrpc/auth_gss/gss_krb5_crypto.c
@@ -554,9 +554,9 @@ gss_krb5_aes_encrypt(struct krb5_ctx *kctx, u32 offset,
 
 	/* hide the gss token header and insert the confounder */
 	offset += GSS_KRB5_TOK_HDR_LEN;
-	if (xdr_extend_head(buf, offset, blocksize))
+	if (xdr_extend_head(buf, offset, kctx->gk5e->conflen))
 		return GSS_S_FAILURE;
-	gss_krb5_make_confounder(buf->head[0].iov_base + offset, blocksize);
+	gss_krb5_make_confounder(buf->head[0].iov_base + offset, kctx->gk5e->conflen);
 	offset -= GSS_KRB5_TOK_HDR_LEN;
 
 	if (buf->tail[0].iov_base != NULL) {
@@ -726,7 +726,7 @@ gss_krb5_aes_decrypt(struct krb5_ctx *kctx, u32 offset, struct xdr_buf *buf,
 		ret = GSS_S_BAD_SIG;
 		goto out_err;
 	}
-	*headskip = crypto_blkcipher_blocksize(cipher);
+	*headskip = kctx->gk5e->conflen;
 	*tailskip = kctx->gk5e->cksumlength;
 out_err:
 	if (ret && ret != GSS_S_BAD_SIG)
diff --git a/net/sunrpc/auth_gss/gss_krb5_mech.c b/net/sunrpc/auth_gss/gss_krb5_mech.c
index 893fad71e306..ef6b31349046 100644
--- a/net/sunrpc/auth_gss/gss_krb5_mech.c
+++ b/net/sunrpc/auth_gss/gss_krb5_mech.c
@@ -68,6 +68,7 @@ static const struct gss_krb5_enctype supported_gss_krb5_enctypes[] = {
 	  .keybytes = 7,
 	  .keylength = 8,
 	  .blocksize = 8,
+	  .conflen = 8,
 	  .cksumlength = 8,
 	  .keyed_cksum = 0,
 	},
@@ -88,6 +89,7 @@ static const struct gss_krb5_enctype supported_gss_krb5_enctypes[] = {
 	  .keybytes = 21,
 	  .keylength = 24,
 	  .blocksize = 8,
+	  .conflen = 8,
 	  .cksumlength = 20,
 	  .keyed_cksum = 1,
 	},
@@ -110,6 +112,7 @@ static const struct gss_krb5_enctype supported_gss_krb5_enctypes[] = {
 	  .keybytes = 16,
 	  .keylength = 16,
 	  .blocksize = 16,
+	  .conflen = 16,
 	  .cksumlength = 12,
 	  .keyed_cksum = 1,
 	},
@@ -132,6 +135,7 @@ static const struct gss_krb5_enctype supported_gss_krb5_enctypes[] = {
 	  .keybytes = 32,
 	  .keylength = 32,
 	  .blocksize = 16,
+	  .conflen = 16,
 	  .cksumlength = 12,
 	  .keyed_cksum = 1,
 	},
diff --git a/net/sunrpc/auth_gss/gss_krb5_wrap.c b/net/sunrpc/auth_gss/gss_krb5_wrap.c
index a95e7e0ac0e3..383db891c835 100644
--- a/net/sunrpc/auth_gss/gss_krb5_wrap.c
+++ b/net/sunrpc/auth_gss/gss_krb5_wrap.c
@@ -168,6 +168,7 @@ gss_wrap_kerberos_v1(struct krb5_ctx *kctx, int offset,
 	struct page		**tmp_pages;
 	u32			seq_send;
 	u8			*cksumkey;
+	u32			conflen = kctx->gk5e->conflen;
 
 	dprintk("RPC:       %s\n", __func__);
 
@@ -176,7 +177,7 @@ gss_wrap_kerberos_v1(struct krb5_ctx *kctx, int offset,
 	blocksize = crypto_blkcipher_blocksize(kctx->enc);
 	gss_krb5_add_padding(buf, offset, blocksize);
 	BUG_ON((buf->len - offset) % blocksize);
-	plainlen = blocksize + buf->len - offset;
+	plainlen = conflen + buf->len - offset;
 
 	headlen = g_token_size(&kctx->mech_used,
 		GSS_KRB5_TOK_HDR_LEN + kctx->gk5e->cksumlength + plainlen) -
@@ -204,7 +205,7 @@ gss_wrap_kerberos_v1(struct krb5_ctx *kctx, int offset,
 	memset(ptr + 4, 0xff, 4);
 	*(__be16 *)(ptr + 4) = cpu_to_le16(kctx->gk5e->sealalg);
 
-	gss_krb5_make_confounder(msg_start, blocksize);
+	gss_krb5_make_confounder(msg_start, conflen);
 
 	if (kctx->gk5e->keyed_cksum)
 		cksumkey = kctx->cksum;
@@ -214,7 +215,7 @@ gss_wrap_kerberos_v1(struct krb5_ctx *kctx, int offset,
 	/* XXXJBF: UGH!: */
 	tmp_pages = buf->pages;
 	buf->pages = pages;
-	if (make_checksum(kctx, ptr, 8, buf, offset + headlen - blocksize,
+	if (make_checksum(kctx, ptr, 8, buf, offset + headlen - conflen,
 					cksumkey, KG_USAGE_SEAL, &md5cksum))
 		return GSS_S_FAILURE;
 	buf->pages = tmp_pages;
@@ -231,7 +232,7 @@ gss_wrap_kerberos_v1(struct krb5_ctx *kctx, int offset,
 			       seq_send, ptr + GSS_KRB5_TOK_HDR_LEN, ptr + 8)))
 		return GSS_S_FAILURE;
 
-	if (gss_encrypt_xdr_buf(kctx->enc, buf, offset + headlen - blocksize,
+	if (gss_encrypt_xdr_buf(kctx->enc, buf, offset + headlen - conflen,
 									pages))
 		return GSS_S_FAILURE;
 
@@ -254,6 +255,7 @@ gss_unwrap_kerberos_v1(struct krb5_ctx *kctx, int offset, struct xdr_buf *buf)
 	void			*data_start, *orig_start;
 	int			data_len;
 	int			blocksize;
+	u32			conflen = kctx->gk5e->conflen;
 	int			crypt_offset;
 	u8			*cksumkey;
 
@@ -327,7 +329,7 @@ gss_unwrap_kerberos_v1(struct krb5_ctx *kctx, int offset, struct xdr_buf *buf)
 
 	blocksize = crypto_blkcipher_blocksize(kctx->enc);
 	data_start = ptr + (GSS_KRB5_TOK_HDR_LEN + kctx->gk5e->cksumlength) +
-					blocksize;
+					conflen;
 	orig_start = buf->head[0].iov_base + offset;
 	data_len = (buf->head[0].iov_base + buf->head[0].iov_len) - data_start;
 	memmove(orig_start, data_start, data_len);
-- 
cgit v1.2.2


From fffdaef2eb4a7333952e55cf97f1fc0fcc35f981 Mon Sep 17 00:00:00 2001
From: Kevin Coffman <kwc@citi.umich.edu>
Date: Wed, 17 Mar 2010 13:03:06 -0400
Subject: gss_krb5: Add support for rc4-hmac encryption

Add necessary changes to add kernel support for the rc4-hmac Kerberos
encryption type used by Microsoft and described in rfc4757.

Signed-off-by: Kevin Coffman <kwc@citi.umich.edu>
Signed-off-by: Steve Dickson <steved@redhat.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 net/sunrpc/auth_gss/gss_krb5_crypto.c | 255 ++++++++++++++++++++++++++++++++++
 net/sunrpc/auth_gss/gss_krb5_mech.c   |  96 +++++++++++++
 net/sunrpc/auth_gss/gss_krb5_seal.c   |   1 +
 net/sunrpc/auth_gss/gss_krb5_seqnum.c |  77 ++++++++++
 net/sunrpc/auth_gss/gss_krb5_unseal.c |   1 +
 net/sunrpc/auth_gss/gss_krb5_wrap.c   |  66 +++++++--
 6 files changed, 483 insertions(+), 13 deletions(-)

(limited to 'net')

diff --git a/net/sunrpc/auth_gss/gss_krb5_crypto.c b/net/sunrpc/auth_gss/gss_krb5_crypto.c
index ed4106a3daf2..75ee993ea057 100644
--- a/net/sunrpc/auth_gss/gss_krb5_crypto.c
+++ b/net/sunrpc/auth_gss/gss_krb5_crypto.c
@@ -124,6 +124,114 @@ checksummer(struct scatterlist *sg, void *data)
 	return crypto_hash_update(desc, sg, sg->length);
 }
 
+static int
+arcfour_hmac_md5_usage_to_salt(unsigned int usage, u8 salt[4])
+{
+	unsigned int ms_usage;
+
+	switch (usage) {
+	case KG_USAGE_SIGN:
+		ms_usage = 15;
+		break;
+	case KG_USAGE_SEAL:
+		ms_usage = 13;
+		break;
+	default:
+		return EINVAL;;
+	}
+	salt[0] = (ms_usage >> 0) & 0xff;
+	salt[1] = (ms_usage >> 8) & 0xff;
+	salt[2] = (ms_usage >> 16) & 0xff;
+	salt[3] = (ms_usage >> 24) & 0xff;
+
+	return 0;
+}
+
+static u32
+make_checksum_hmac_md5(struct krb5_ctx *kctx, char *header, int hdrlen,
+		       struct xdr_buf *body, int body_offset, u8 *cksumkey,
+		       unsigned int usage, struct xdr_netobj *cksumout)
+{
+	struct hash_desc                desc;
+	struct scatterlist              sg[1];
+	int err;
+	u8 checksumdata[GSS_KRB5_MAX_CKSUM_LEN];
+	u8 rc4salt[4];
+	struct crypto_hash *md5;
+	struct crypto_hash *hmac_md5;
+
+	if (cksumkey == NULL)
+		return GSS_S_FAILURE;
+
+	if (cksumout->len < kctx->gk5e->cksumlength) {
+		dprintk("%s: checksum buffer length, %u, too small for %s\n",
+			__func__, cksumout->len, kctx->gk5e->name);
+		return GSS_S_FAILURE;
+	}
+
+	if (arcfour_hmac_md5_usage_to_salt(usage, rc4salt)) {
+		dprintk("%s: invalid usage value %u\n", __func__, usage);
+		return GSS_S_FAILURE;
+	}
+
+	md5 = crypto_alloc_hash("md5", 0, CRYPTO_ALG_ASYNC);
+	if (IS_ERR(md5))
+		return GSS_S_FAILURE;
+
+	hmac_md5 = crypto_alloc_hash(kctx->gk5e->cksum_name, 0,
+				     CRYPTO_ALG_ASYNC);
+	if (IS_ERR(hmac_md5)) {
+		crypto_free_hash(md5);
+		return GSS_S_FAILURE;
+	}
+
+	desc.tfm = md5;
+	desc.flags = CRYPTO_TFM_REQ_MAY_SLEEP;
+
+	err = crypto_hash_init(&desc);
+	if (err)
+		goto out;
+	sg_init_one(sg, rc4salt, 4);
+	err = crypto_hash_update(&desc, sg, 4);
+	if (err)
+		goto out;
+
+	sg_init_one(sg, header, hdrlen);
+	err = crypto_hash_update(&desc, sg, hdrlen);
+	if (err)
+		goto out;
+	err = xdr_process_buf(body, body_offset, body->len - body_offset,
+			      checksummer, &desc);
+	if (err)
+		goto out;
+	err = crypto_hash_final(&desc, checksumdata);
+	if (err)
+		goto out;
+
+	desc.tfm = hmac_md5;
+	desc.flags = CRYPTO_TFM_REQ_MAY_SLEEP;
+
+	err = crypto_hash_init(&desc);
+	if (err)
+		goto out;
+	err = crypto_hash_setkey(hmac_md5, cksumkey, kctx->gk5e->keylength);
+	if (err)
+		goto out;
+
+	sg_init_one(sg, checksumdata, crypto_hash_digestsize(md5));
+	err = crypto_hash_digest(&desc, sg, crypto_hash_digestsize(md5),
+				 checksumdata);
+	if (err)
+		goto out;
+
+	memcpy(cksumout->data, checksumdata, kctx->gk5e->cksumlength);
+	cksumout->len = kctx->gk5e->cksumlength;
+out:
+	crypto_free_hash(md5);
+	crypto_free_hash(hmac_md5);
+	return err ? GSS_S_FAILURE : 0;
+}
+
 /*
  * checksum the plaintext data and hdrlen bytes of the token header
  * The checksum is performed over the first 8 bytes of the
@@ -140,6 +248,11 @@ make_checksum(struct krb5_ctx *kctx, char *header, int hdrlen,
 	u8 checksumdata[GSS_KRB5_MAX_CKSUM_LEN];
 	unsigned int checksumlen;
 
+	if (kctx->gk5e->ctype == CKSUMTYPE_HMAC_MD5_ARCFOUR)
+		return make_checksum_hmac_md5(kctx, header, hdrlen,
+					      body, body_offset,
+					      cksumkey, usage, cksumout);
+
 	if (cksumout->len < kctx->gk5e->cksumlength) {
 		dprintk("%s: checksum buffer length, %u, too small for %s\n",
 			__func__, cksumout->len, kctx->gk5e->name);
@@ -733,3 +846,145 @@ out_err:
 		ret = GSS_S_FAILURE;
 	return ret;
 }
+
+/*
+ * Compute Kseq given the initial session key and the checksum.
+ * Set the key of the given cipher.
+ */
+int
+krb5_rc4_setup_seq_key(struct krb5_ctx *kctx, struct crypto_blkcipher *cipher,
+		       unsigned char *cksum)
+{
+	struct crypto_hash *hmac;
+	struct hash_desc desc;
+	struct scatterlist sg[1];
+	u8 Kseq[GSS_KRB5_MAX_KEYLEN];
+	u32 zeroconstant = 0;
+	int err;
+
+	dprintk("%s: entered\n", __func__);
+
+	hmac = crypto_alloc_hash(kctx->gk5e->cksum_name, 0, CRYPTO_ALG_ASYNC);
+	if (IS_ERR(hmac)) {
+		dprintk("%s: error %ld, allocating hash '%s'\n",
+			__func__, PTR_ERR(hmac), kctx->gk5e->cksum_name);
+		return PTR_ERR(hmac);
+	}
+
+	desc.tfm = hmac;
+	desc.flags = 0;
+
+	err = crypto_hash_init(&desc);
+	if (err)
+		goto out_err;
+
+	/* Compute intermediate Kseq from session key */
+	err = crypto_hash_setkey(hmac, kctx->Ksess, kctx->gk5e->keylength);
+	if (err)
+		goto out_err;
+
+	sg_init_table(sg, 1);
+	sg_set_buf(sg, &zeroconstant, 4);
+
+	err = crypto_hash_digest(&desc, sg, 4, Kseq);
+	if (err)
+		goto out_err;
+
+	/* Compute final Kseq from the checksum and intermediate Kseq */
+	err = crypto_hash_setkey(hmac, Kseq, kctx->gk5e->keylength);
+	if (err)
+		goto out_err;
+
+	sg_set_buf(sg, cksum, 8);
+
+	err = crypto_hash_digest(&desc, sg, 8, Kseq);
+	if (err)
+		goto out_err;
+
+	err = crypto_blkcipher_setkey(cipher, Kseq, kctx->gk5e->keylength);
+	if (err)
+		goto out_err;
+
+	err = 0;
+
+out_err:
+	crypto_free_hash(hmac);
+	dprintk("%s: returning %d\n", __func__, err);
+	return err;
+}
+
+/*
+ * Compute Kcrypt given the initial session key and the plaintext seqnum.
+ * Set the key of cipher kctx->enc.
+ */
+int
+krb5_rc4_setup_enc_key(struct krb5_ctx *kctx, struct crypto_blkcipher *cipher,
+		       s32 seqnum)
+{
+	struct crypto_hash *hmac;
+	struct hash_desc desc;
+	struct scatterlist sg[1];
+	u8 Kcrypt[GSS_KRB5_MAX_KEYLEN];
+	u8 zeroconstant[4] = {0};
+	u8 seqnumarray[4];
+	int err, i;
+
+	dprintk("%s: entered, seqnum %u\n", __func__, seqnum);
+
+	hmac = crypto_alloc_hash(kctx->gk5e->cksum_name, 0, CRYPTO_ALG_ASYNC);
+	if (IS_ERR(hmac)) {
+		dprintk("%s: error %ld, allocating hash '%s'\n",
+			__func__, PTR_ERR(hmac), kctx->gk5e->cksum_name);
+		return PTR_ERR(hmac);
+	}
+
+	desc.tfm = hmac;
+	desc.flags = 0;
+
+	err = crypto_hash_init(&desc);
+	if (err)
+		goto out_err;
+
+	/* Compute intermediate Kcrypt from session key */
+	for (i = 0; i < kctx->gk5e->keylength; i++)
+		Kcrypt[i] = kctx->Ksess[i] ^ 0xf0;
+
+	err = crypto_hash_setkey(hmac, Kcrypt, kctx->gk5e->keylength);
+	if (err)
+		goto out_err;
+
+	sg_init_table(sg, 1);
+	sg_set_buf(sg, zeroconstant, 4);
+
+	err = crypto_hash_digest(&desc, sg, 4, Kcrypt);
+	if (err)
+		goto out_err;
+
+	/* Compute final Kcrypt from the seqnum and intermediate Kcrypt */
+	err = crypto_hash_setkey(hmac, Kcrypt, kctx->gk5e->keylength);
+	if (err)
+		goto out_err;
+
+	seqnumarray[0] = (unsigned char) ((seqnum >> 24) & 0xff);
+	seqnumarray[1] = (unsigned char) ((seqnum >> 16) & 0xff);
+	seqnumarray[2] = (unsigned char) ((seqnum >> 8) & 0xff);
+	seqnumarray[3] = (unsigned char) ((seqnum >> 0) & 0xff);
+
+	sg_set_buf(sg, seqnumarray, 4);
+
+	err = crypto_hash_digest(&desc, sg, 4, Kcrypt);
+	if (err)
+		goto out_err;
+
+	err = crypto_blkcipher_setkey(cipher, Kcrypt, kctx->gk5e->keylength);
+	if (err)
+		goto out_err;
+
+	err = 0;
+
+out_err:
+	crypto_free_hash(hmac);
+	dprintk("%s: returning %d\n", __func__, err);
+	return err;
+}
+
diff --git a/net/sunrpc/auth_gss/gss_krb5_mech.c b/net/sunrpc/auth_gss/gss_krb5_mech.c
index ef6b31349046..54eda5f0c58b 100644
--- a/net/sunrpc/auth_gss/gss_krb5_mech.c
+++ b/net/sunrpc/auth_gss/gss_krb5_mech.c
@@ -72,6 +72,27 @@ static const struct gss_krb5_enctype supported_gss_krb5_enctypes[] = {
 	  .cksumlength = 8,
 	  .keyed_cksum = 0,
 	},
+	/*
+	 * RC4-HMAC
+	 */
+	{
+	  .etype = ENCTYPE_ARCFOUR_HMAC,
+	  .ctype = CKSUMTYPE_HMAC_MD5_ARCFOUR,
+	  .name = "rc4-hmac",
+	  .encrypt_name = "ecb(arc4)",
+	  .cksum_name = "hmac(md5)",
+	  .encrypt = krb5_encrypt,
+	  .decrypt = krb5_decrypt,
+	  .mk_key = NULL,
+	  .signalg = SGN_ALG_HMAC_MD5,
+	  .sealalg = SEAL_ALG_MICROSOFT_RC4,
+	  .keybytes = 16,
+	  .keylength = 16,
+	  .blocksize = 1,
+	  .conflen = 8,
+	  .cksumlength = 8,
+	  .keyed_cksum = 1,
+	},
 	/*
 	 * 3DES
 	 */
@@ -392,6 +413,79 @@ out_err:
 	return -EINVAL;
 }
 
+/*
+ * Note that RC4 depends on deriving keys using the sequence
+ * number or the checksum of a token.  Therefore, the final keys
+ * cannot be calculated until the token is being constructed!
+ */
+static int
+context_derive_keys_rc4(struct krb5_ctx *ctx)
+{
+	struct crypto_hash *hmac;
+	char sigkeyconstant[] = "signaturekey";
+	int slen = strlen(sigkeyconstant) + 1;	/* include null terminator */
+	struct hash_desc desc;
+	struct scatterlist sg[1];
+	int err;
+
+	dprintk("RPC:       %s: entered\n", __func__);
+	/*
+	 * derive cksum (aka Ksign) key
+	 */
+	hmac = crypto_alloc_hash(ctx->gk5e->cksum_name, 0, CRYPTO_ALG_ASYNC);
+	if (IS_ERR(hmac)) {
+		dprintk("%s: error %ld allocating hash '%s'\n",
+			__func__, PTR_ERR(hmac), ctx->gk5e->cksum_name);
+		err = PTR_ERR(hmac);
+		goto out_err;
+	}
+
+	err = crypto_hash_setkey(hmac, ctx->Ksess, ctx->gk5e->keylength);
+	if (err)
+		goto out_err_free_hmac;
+
+	sg_init_table(sg, 1);
+	sg_set_buf(sg, sigkeyconstant, slen);
+
+	desc.tfm = hmac;
+	desc.flags = 0;
+
+	err = crypto_hash_init(&desc);
+	if (err)
+		goto out_err_free_hmac;
+
+	err = crypto_hash_digest(&desc, sg, slen, ctx->cksum);
+	if (err)
+		goto out_err_free_hmac;
+	/*
+	 * allocate hash, and blkciphers for data and seqnum encryption
+	 */
+	ctx->enc = crypto_alloc_blkcipher(ctx->gk5e->encrypt_name, 0,
+					  CRYPTO_ALG_ASYNC);
+	if (IS_ERR(ctx->enc)) {
+		err = PTR_ERR(ctx->enc);
+		goto out_err_free_hmac;
+	}
+
+	ctx->seq = crypto_alloc_blkcipher(ctx->gk5e->encrypt_name, 0,
+					  CRYPTO_ALG_ASYNC);
+	if (IS_ERR(ctx->seq)) {
+		crypto_free_blkcipher(ctx->enc);
+		err = PTR_ERR(ctx->seq);
+		goto out_err_free_hmac;
+	}
+
+	dprintk("RPC:       %s: returning success\n", __func__);
+
+	err = 0;
+
+out_err_free_hmac:
+	crypto_free_hash(hmac);
+out_err:
+	dprintk("RPC:       %s: returning %d\n", __func__, err);
+	return err;
+}
+
 static int
 context_derive_keys_new(struct krb5_ctx *ctx)
 {
@@ -561,6 +655,8 @@ gss_import_v2_context(const void *p, const void *end, struct krb5_ctx *ctx)
 	switch (ctx->enctype) {
 	case ENCTYPE_DES3_CBC_RAW:
 		return context_derive_keys_des3(ctx);
+	case ENCTYPE_ARCFOUR_HMAC:
+		return context_derive_keys_rc4(ctx);
 	case ENCTYPE_AES128_CTS_HMAC_SHA1_96:
 	case ENCTYPE_AES256_CTS_HMAC_SHA1_96:
 		return context_derive_keys_new(ctx);
diff --git a/net/sunrpc/auth_gss/gss_krb5_seal.c b/net/sunrpc/auth_gss/gss_krb5_seal.c
index 36fe487d93d2..d7941eab7796 100644
--- a/net/sunrpc/auth_gss/gss_krb5_seal.c
+++ b/net/sunrpc/auth_gss/gss_krb5_seal.c
@@ -213,6 +213,7 @@ gss_get_mic_kerberos(struct gss_ctx *gss_ctx, struct xdr_buf *text,
 		BUG();
 	case ENCTYPE_DES_CBC_RAW:
 	case ENCTYPE_DES3_CBC_RAW:
+	case ENCTYPE_ARCFOUR_HMAC:
 		return gss_get_mic_v1(ctx, text, token);
 	case ENCTYPE_AES128_CTS_HMAC_SHA1_96:
 	case ENCTYPE_AES256_CTS_HMAC_SHA1_96:
diff --git a/net/sunrpc/auth_gss/gss_krb5_seqnum.c b/net/sunrpc/auth_gss/gss_krb5_seqnum.c
index 83b593084976..415c013ba382 100644
--- a/net/sunrpc/auth_gss/gss_krb5_seqnum.c
+++ b/net/sunrpc/auth_gss/gss_krb5_seqnum.c
@@ -39,6 +39,38 @@
 # define RPCDBG_FACILITY        RPCDBG_AUTH
 #endif
 
+static s32
+krb5_make_rc4_seq_num(struct krb5_ctx *kctx, int direction, s32 seqnum,
+		      unsigned char *cksum, unsigned char *buf)
+{
+	struct crypto_blkcipher *cipher;
+	unsigned char plain[8];
+	s32 code;
+
+	dprintk("RPC:       %s:\n", __func__);
+	cipher = crypto_alloc_blkcipher(kctx->gk5e->encrypt_name, 0,
+					CRYPTO_ALG_ASYNC);
+	if (IS_ERR(cipher))
+		return PTR_ERR(cipher);
+
+	plain[0] = (unsigned char) ((seqnum >> 24) & 0xff);
+	plain[1] = (unsigned char) ((seqnum >> 16) & 0xff);
+	plain[2] = (unsigned char) ((seqnum >> 8) & 0xff);
+	plain[3] = (unsigned char) ((seqnum >> 0) & 0xff);
+	plain[4] = direction;
+	plain[5] = direction;
+	plain[6] = direction;
+	plain[7] = direction;
+
+	code = krb5_rc4_setup_seq_key(kctx, cipher, cksum);
+	if (code)
+		goto out;
+
+	code = krb5_encrypt(cipher, cksum, plain, buf, 8);
+out:
+	crypto_free_blkcipher(cipher);
+	return code;
+}
 s32
 krb5_make_seq_num(struct krb5_ctx *kctx,
 		struct crypto_blkcipher *key,
@@ -48,6 +80,10 @@ krb5_make_seq_num(struct krb5_ctx *kctx,
 {
 	unsigned char plain[8];
 
+	if (kctx->enctype == ENCTYPE_ARCFOUR_HMAC)
+		return krb5_make_rc4_seq_num(kctx, direction, seqnum,
+					     cksum, buf);
+
 	plain[0] = (unsigned char) (seqnum & 0xff);
 	plain[1] = (unsigned char) ((seqnum >> 8) & 0xff);
 	plain[2] = (unsigned char) ((seqnum >> 16) & 0xff);
@@ -61,6 +97,43 @@ krb5_make_seq_num(struct krb5_ctx *kctx,
 	return krb5_encrypt(key, cksum, plain, buf, 8);
 }
 
+static s32
+krb5_get_rc4_seq_num(struct krb5_ctx *kctx, unsigned char *cksum,
+		     unsigned char *buf, int *direction, s32 *seqnum)
+{
+	struct crypto_blkcipher *cipher;
+	unsigned char plain[8];
+	s32 code;
+
+	dprintk("RPC:       %s:\n", __func__);
+	cipher = crypto_alloc_blkcipher(kctx->gk5e->encrypt_name, 0,
+					CRYPTO_ALG_ASYNC);
+	if (IS_ERR(cipher))
+		return PTR_ERR(cipher);
+
+	code = krb5_rc4_setup_seq_key(kctx, cipher, cksum);
+	if (code)
+		goto out;
+
+	code = krb5_decrypt(cipher, cksum, buf, plain, 8);
+	if (code)
+		goto out;
+
+	if ((plain[4] != plain[5]) || (plain[4] != plain[6])
+				   || (plain[4] != plain[7])) {
+		code = (s32)KG_BAD_SEQ;
+		goto out;
+	}
+
+	*direction = plain[4];
+
+	*seqnum = ((plain[0] << 24) | (plain[1] << 16) |
+					(plain[2] << 8) | (plain[3]));
+out:
+	crypto_free_blkcipher(cipher);
+	return code;
+}
+
 s32
 krb5_get_seq_num(struct krb5_ctx *kctx,
 	       unsigned char *cksum,
@@ -73,6 +146,10 @@ krb5_get_seq_num(struct krb5_ctx *kctx,
 
 	dprintk("RPC:       krb5_get_seq_num:\n");
 
+	if (kctx->enctype == ENCTYPE_ARCFOUR_HMAC)
+		return krb5_get_rc4_seq_num(kctx, cksum, buf,
+					    direction, seqnum);
+
 	if ((code = krb5_decrypt(key, cksum, buf, plain, 8)))
 		return code;
 
diff --git a/net/sunrpc/auth_gss/gss_krb5_unseal.c b/net/sunrpc/auth_gss/gss_krb5_unseal.c
index 97eb91b8c70c..6cd930f3678f 100644
--- a/net/sunrpc/auth_gss/gss_krb5_unseal.c
+++ b/net/sunrpc/auth_gss/gss_krb5_unseal.c
@@ -216,6 +216,7 @@ gss_verify_mic_kerberos(struct gss_ctx *gss_ctx,
 		BUG();
 	case ENCTYPE_DES_CBC_RAW:
 	case ENCTYPE_DES3_CBC_RAW:
+	case ENCTYPE_ARCFOUR_HMAC:
 		return gss_verify_mic_v1(ctx, message_buffer, read_token);
 	case ENCTYPE_AES128_CTS_HMAC_SHA1_96:
 	case ENCTYPE_AES256_CTS_HMAC_SHA1_96:
diff --git a/net/sunrpc/auth_gss/gss_krb5_wrap.c b/net/sunrpc/auth_gss/gss_krb5_wrap.c
index 383db891c835..2763e3e48db4 100644
--- a/net/sunrpc/auth_gss/gss_krb5_wrap.c
+++ b/net/sunrpc/auth_gss/gss_krb5_wrap.c
@@ -232,9 +232,26 @@ gss_wrap_kerberos_v1(struct krb5_ctx *kctx, int offset,
 			       seq_send, ptr + GSS_KRB5_TOK_HDR_LEN, ptr + 8)))
 		return GSS_S_FAILURE;
 
-	if (gss_encrypt_xdr_buf(kctx->enc, buf, offset + headlen - conflen,
-									pages))
-		return GSS_S_FAILURE;
+	if (kctx->enctype == ENCTYPE_ARCFOUR_HMAC) {
+		struct crypto_blkcipher *cipher;
+		int err;
+		cipher = crypto_alloc_blkcipher(kctx->gk5e->encrypt_name, 0,
+						CRYPTO_ALG_ASYNC);
+		if (IS_ERR(cipher))
+			return GSS_S_FAILURE;
+
+		krb5_rc4_setup_enc_key(kctx, cipher, seq_send);
+
+		err = gss_encrypt_xdr_buf(cipher, buf,
+					  offset + headlen - conflen, pages);
+		crypto_free_blkcipher(cipher);
+		if (err)
+			return GSS_S_FAILURE;
+	} else {
+		if (gss_encrypt_xdr_buf(kctx->enc, buf,
+					offset + headlen - conflen, pages))
+			return GSS_S_FAILURE;
+	}
 
 	return (kctx->endtime < now) ? GSS_S_CONTEXT_EXPIRED : GSS_S_COMPLETE;
 }
@@ -291,8 +308,37 @@ gss_unwrap_kerberos_v1(struct krb5_ctx *kctx, int offset, struct xdr_buf *buf)
 	 */
 	crypt_offset = ptr + (GSS_KRB5_TOK_HDR_LEN + kctx->gk5e->cksumlength) -
 					(unsigned char *)buf->head[0].iov_base;
-	if (gss_decrypt_xdr_buf(kctx->enc, buf, crypt_offset))
-		return GSS_S_DEFECTIVE_TOKEN;
+
+	/*
+	 * Need plaintext seqnum to derive encryption key for arcfour-hmac
+	 */
+	if (krb5_get_seq_num(kctx, ptr + GSS_KRB5_TOK_HDR_LEN,
+			     ptr + 8, &direction, &seqnum))
+		return GSS_S_BAD_SIG;
+
+	if ((kctx->initiate && direction != 0xff) ||
+	    (!kctx->initiate && direction != 0))
+		return GSS_S_BAD_SIG;
+
+	if (kctx->enctype == ENCTYPE_ARCFOUR_HMAC) {
+		struct crypto_blkcipher *cipher;
+		int err;
+
+		cipher = crypto_alloc_blkcipher(kctx->gk5e->encrypt_name, 0,
+						CRYPTO_ALG_ASYNC);
+		if (IS_ERR(cipher))
+			return GSS_S_FAILURE;
+
+		krb5_rc4_setup_enc_key(kctx, cipher, seqnum);
+
+		err = gss_decrypt_xdr_buf(cipher, buf, crypt_offset);
+		crypto_free_blkcipher(cipher);
+		if (err)
+			return GSS_S_DEFECTIVE_TOKEN;
+	} else {
+		if (gss_decrypt_xdr_buf(kctx->enc, buf, crypt_offset))
+			return GSS_S_DEFECTIVE_TOKEN;
+	}
 
 	if (kctx->gk5e->keyed_cksum)
 		cksumkey = kctx->cksum;
@@ -316,14 +362,6 @@ gss_unwrap_kerberos_v1(struct krb5_ctx *kctx, int offset, struct xdr_buf *buf)
 
 	/* do sequencing checks */
 
-	if (krb5_get_seq_num(kctx, ptr + GSS_KRB5_TOK_HDR_LEN,
-				    ptr + 8, &direction, &seqnum))
-		return GSS_S_BAD_SIG;
-
-	if ((kctx->initiate && direction != 0xff) ||
-	    (!kctx->initiate && direction != 0))
-		return GSS_S_BAD_SIG;
-
 	/* Copy the data back to the right position.  XXX: Would probably be
 	 * better to copy and encrypt at the same time. */
 
@@ -521,6 +559,7 @@ gss_wrap_kerberos(struct gss_ctx *gctx, int offset,
 		BUG();
 	case ENCTYPE_DES_CBC_RAW:
 	case ENCTYPE_DES3_CBC_RAW:
+	case ENCTYPE_ARCFOUR_HMAC:
 		return gss_wrap_kerberos_v1(kctx, offset, buf, pages);
 	case ENCTYPE_AES128_CTS_HMAC_SHA1_96:
 	case ENCTYPE_AES256_CTS_HMAC_SHA1_96:
@@ -538,6 +577,7 @@ gss_unwrap_kerberos(struct gss_ctx *gctx, int offset, struct xdr_buf *buf)
 		BUG();
 	case ENCTYPE_DES_CBC_RAW:
 	case ENCTYPE_DES3_CBC_RAW:
+	case ENCTYPE_ARCFOUR_HMAC:
 		return gss_unwrap_kerberos_v1(kctx, offset, buf);
 	case ENCTYPE_AES128_CTS_HMAC_SHA1_96:
 	case ENCTYPE_AES256_CTS_HMAC_SHA1_96:
-- 
cgit v1.2.2


From fc54a0c65fc8cae6b0355512f0b619c1515e7d7f Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Thu, 8 Apr 2010 14:25:20 -0400
Subject: gss_krb5: Advertise rc4-hmac enctype support in the rpcsec_gss/krb5
 upcall

Update the upcall info indicating which Kerberos enctypes
the kernel supports

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 net/sunrpc/auth_gss/gss_krb5_mech.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/sunrpc/auth_gss/gss_krb5_mech.c b/net/sunrpc/auth_gss/gss_krb5_mech.c
index 54eda5f0c58b..7c249a3f9a03 100644
--- a/net/sunrpc/auth_gss/gss_krb5_mech.c
+++ b/net/sunrpc/auth_gss/gss_krb5_mech.c
@@ -742,7 +742,7 @@ static struct gss_api_mech gss_kerberos_mech = {
 	.gm_ops		= &gss_kerberos_ops,
 	.gm_pf_num	= ARRAY_SIZE(gss_kerberos_pfs),
 	.gm_pfs		= gss_kerberos_pfs,
-	.gm_upcall_enctypes = "enctypes=18,17,16,3,1,2 ",
+	.gm_upcall_enctypes = "enctypes=18,17,16,23,3,1,2 ",
 };
 
 static int __init init_kerberos_module(void)
-- 
cgit v1.2.2


From ee5ebe851ed60206f150d3f189416f9c63245b66 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Fri, 16 Apr 2010 16:37:01 -0400
Subject: SUNRPC: Clean up xprt_release()

Reviewed-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 net/sunrpc/xprt.c | 32 ++++++++++++++++----------------
 1 file changed, 16 insertions(+), 16 deletions(-)

(limited to 'net')

diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c
index 42f09ade0044..18415cc37c01 100644
--- a/net/sunrpc/xprt.c
+++ b/net/sunrpc/xprt.c
@@ -62,7 +62,6 @@
  * Local functions
  */
 static void	xprt_request_init(struct rpc_task *, struct rpc_xprt *);
-static inline void	do_xprt_reserve(struct rpc_task *);
 static void	xprt_connect_status(struct rpc_task *task);
 static int      __xprt_get_cong(struct rpc_xprt *, struct rpc_task *);
 
@@ -935,7 +934,7 @@ void xprt_transmit(struct rpc_task *task)
 	spin_unlock_bh(&xprt->transport_lock);
 }
 
-static inline void do_xprt_reserve(struct rpc_task *task)
+static void xprt_alloc_slot(struct rpc_task *task)
 {
 	struct rpc_xprt	*xprt = task->tk_xprt;
 
@@ -955,6 +954,16 @@ static inline void do_xprt_reserve(struct rpc_task *task)
 	rpc_sleep_on(&xprt->backlog, task, NULL);
 }
 
+static void xprt_free_slot(struct rpc_xprt *xprt, struct rpc_rqst *req)
+{
+	memset(req, 0, sizeof(*req));	/* mark unused */
+
+	spin_lock(&xprt->reserve_lock);
+	list_add(&req->rq_list, &xprt->free);
+	rpc_wake_up_next(&xprt->backlog);
+	spin_unlock(&xprt->reserve_lock);
+}
+
 /**
  * xprt_reserve - allocate an RPC request slot
  * @task: RPC task requesting a slot allocation
@@ -968,7 +977,7 @@ void xprt_reserve(struct rpc_task *task)
 
 	task->tk_status = -EIO;
 	spin_lock(&xprt->reserve_lock);
-	do_xprt_reserve(task);
+	xprt_alloc_slot(task);
 	spin_unlock(&xprt->reserve_lock);
 }
 
@@ -1006,14 +1015,10 @@ void xprt_release(struct rpc_task *task)
 {
 	struct rpc_xprt	*xprt;
 	struct rpc_rqst	*req;
-	int is_bc_request;
 
 	if (!(req = task->tk_rqstp))
 		return;
 
-	/* Preallocated backchannel request? */
-	is_bc_request = bc_prealloc(req);
-
 	xprt = req->rq_xprt;
 	rpc_count_iostats(task);
 	spin_lock_bh(&xprt->transport_lock);
@@ -1027,21 +1032,16 @@ void xprt_release(struct rpc_task *task)
 		mod_timer(&xprt->timer,
 				xprt->last_used + xprt->idle_timeout);
 	spin_unlock_bh(&xprt->transport_lock);
-	if (!bc_prealloc(req))
+	if (req->rq_buffer)
 		xprt->ops->buf_free(req->rq_buffer);
 	task->tk_rqstp = NULL;
 	if (req->rq_release_snd_buf)
 		req->rq_release_snd_buf(req);
 
 	dprintk("RPC: %5u release request %p\n", task->tk_pid, req);
-	if (likely(!is_bc_request)) {
-		memset(req, 0, sizeof(*req));	/* mark unused */
-
-		spin_lock(&xprt->reserve_lock);
-		list_add(&req->rq_list, &xprt->free);
-		rpc_wake_up_next(&xprt->backlog);
-		spin_unlock(&xprt->reserve_lock);
-	} else
+	if (likely(!bc_prealloc(req)))
+		xprt_free_slot(xprt, req);
+	else
 		xprt_free_bc_request(req);
 }
 
-- 
cgit v1.2.2


From 19445b99b6d66af661c586c052de23110731a502 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Fri, 16 Apr 2010 16:41:10 -0400
Subject: SUNRPC: Cleanup - make rpc_new_task() call rpc_release_calldata on
 failure

Also have it return an ERR_PTR(-ENOMEM) instead of a null pointer.

Reviewed-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 net/sunrpc/clnt.c  | 19 ++++---------------
 net/sunrpc/sched.c | 13 ++++++++++---
 2 files changed, 14 insertions(+), 18 deletions(-)

(limited to 'net')

diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
index 19c9983d5360..8c7b5433022a 100644
--- a/net/sunrpc/clnt.c
+++ b/net/sunrpc/clnt.c
@@ -556,26 +556,16 @@ static const struct rpc_call_ops rpc_default_ops = {
  */
 struct rpc_task *rpc_run_task(const struct rpc_task_setup *task_setup_data)
 {
-	struct rpc_task *task, *ret;
+	struct rpc_task *task;
 
 	task = rpc_new_task(task_setup_data);
-	if (task == NULL) {
-		rpc_release_calldata(task_setup_data->callback_ops,
-				task_setup_data->callback_data);
-		ret = ERR_PTR(-ENOMEM);
+	if (IS_ERR(task))
 		goto out;
-	}
 
-	if (task->tk_status != 0) {
-		ret = ERR_PTR(task->tk_status);
-		rpc_put_task(task);
-		goto out;
-	}
 	atomic_inc(&task->tk_count);
 	rpc_execute(task);
-	ret = task;
 out:
-	return ret;
+	return task;
 }
 EXPORT_SYMBOL_GPL(rpc_run_task);
 
@@ -657,9 +647,8 @@ struct rpc_task *rpc_run_bc_task(struct rpc_rqst *req,
 	 * Create an rpc_task to send the data
 	 */
 	task = rpc_new_task(&task_setup_data);
-	if (!task) {
+	if (IS_ERR(task)) {
 		xprt_free_bc_request(req);
-		task = ERR_PTR(-ENOMEM);
 		goto out;
 	}
 	task->tk_rqstp = req;
diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c
index aae6907fd546..c8979ce5d88a 100644
--- a/net/sunrpc/sched.c
+++ b/net/sunrpc/sched.c
@@ -856,16 +856,23 @@ struct rpc_task *rpc_new_task(const struct rpc_task_setup *setup_data)
 
 	if (task == NULL) {
 		task = rpc_alloc_task();
-		if (task == NULL)
-			goto out;
+		if (task == NULL) {
+			rpc_release_calldata(setup_data->callback_ops,
+					setup_data->callback_data);
+			return ERR_PTR(-ENOMEM);
+		}
 		flags = RPC_TASK_DYNAMIC;
 	}
 
 	rpc_init_task(task, setup_data);
+	if (task->tk_status < 0) {
+		int err = task->tk_status;
+		rpc_put_task(task);
+		return ERR_PTR(err);
+	}
 
 	task->tk_flags |= flags;
 	dprintk("RPC:       allocated task %p\n", task);
-out:
 	return task;
 }
 
-- 
cgit v1.2.2


From 0b9e79431377df452348e78262dd5a3dc359eeef Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Fri, 16 Apr 2010 16:41:57 -0400
Subject: SUNRPC: Move the test for XPRT_CONNECTING into xprt_connect()

This fixes a bug with setting xprt->stat.connect_start.

Reviewed-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 net/sunrpc/xprt.c               |  6 +++++-
 net/sunrpc/xprtrdma/transport.c | 28 +++++++++++++---------------
 net/sunrpc/xprtsock.c           | 15 +--------------
 3 files changed, 19 insertions(+), 30 deletions(-)

(limited to 'net')

diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c
index 18415cc37c01..c71d835165e2 100644
--- a/net/sunrpc/xprt.c
+++ b/net/sunrpc/xprt.c
@@ -712,10 +712,14 @@ void xprt_connect(struct rpc_task *task)
 
 		task->tk_timeout = xprt->connect_timeout;
 		rpc_sleep_on(&xprt->pending, task, xprt_connect_status);
+
+		if (test_bit(XPRT_CLOSING, &xprt->state))
+			return;
+		if (xprt_test_and_set_connecting(xprt))
+			return;
 		xprt->stat.connect_start = jiffies;
 		xprt->ops->connect(task);
 	}
-	return;
 }
 
 static void xprt_connect_status(struct rpc_task *task)
diff --git a/net/sunrpc/xprtrdma/transport.c b/net/sunrpc/xprtrdma/transport.c
index 187257b1d880..0607b9aaae91 100644
--- a/net/sunrpc/xprtrdma/transport.c
+++ b/net/sunrpc/xprtrdma/transport.c
@@ -449,21 +449,19 @@ xprt_rdma_connect(struct rpc_task *task)
 	struct rpc_xprt *xprt = (struct rpc_xprt *)task->tk_xprt;
 	struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt);
 
-	if (!xprt_test_and_set_connecting(xprt)) {
-		if (r_xprt->rx_ep.rep_connected != 0) {
-			/* Reconnect */
-			schedule_delayed_work(&r_xprt->rdma_connect,
-				xprt->reestablish_timeout);
-			xprt->reestablish_timeout <<= 1;
-			if (xprt->reestablish_timeout > (30 * HZ))
-				xprt->reestablish_timeout = (30 * HZ);
-			else if (xprt->reestablish_timeout < (5 * HZ))
-				xprt->reestablish_timeout = (5 * HZ);
-		} else {
-			schedule_delayed_work(&r_xprt->rdma_connect, 0);
-			if (!RPC_IS_ASYNC(task))
-				flush_scheduled_work();
-		}
+	if (r_xprt->rx_ep.rep_connected != 0) {
+		/* Reconnect */
+		schedule_delayed_work(&r_xprt->rdma_connect,
+			xprt->reestablish_timeout);
+		xprt->reestablish_timeout <<= 1;
+		if (xprt->reestablish_timeout > (30 * HZ))
+			xprt->reestablish_timeout = (30 * HZ);
+		else if (xprt->reestablish_timeout < (5 * HZ))
+			xprt->reestablish_timeout = (5 * HZ);
+	} else {
+		schedule_delayed_work(&r_xprt->rdma_connect, 0);
+		if (!RPC_IS_ASYNC(task))
+			flush_scheduled_work();
 	}
 }
 
diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
index 9847c30b5001..d138afa3bb35 100644
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c
@@ -2016,9 +2016,6 @@ static void xs_connect(struct rpc_task *task)
 	struct rpc_xprt *xprt = task->tk_xprt;
 	struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt);
 
-	if (xprt_test_and_set_connecting(xprt))
-		return;
-
 	if (transport->sock != NULL && !RPC_IS_SOFTCONN(task)) {
 		dprintk("RPC:       xs_connect delayed xprt %p for %lu "
 				"seconds\n",
@@ -2038,16 +2035,6 @@ static void xs_connect(struct rpc_task *task)
 	}
 }
 
-static void xs_tcp_connect(struct rpc_task *task)
-{
-	struct rpc_xprt *xprt = task->tk_xprt;
-
-	/* Exit if we need to wait for socket shutdown to complete */
-	if (test_bit(XPRT_CLOSING, &xprt->state))
-		return;
-	xs_connect(task);
-}
-
 /**
  * xs_udp_print_stats - display UDP socket-specifc stats
  * @xprt: rpc_xprt struct containing statistics
@@ -2246,7 +2233,7 @@ static struct rpc_xprt_ops xs_tcp_ops = {
 	.release_xprt		= xs_tcp_release_xprt,
 	.rpcbind		= rpcb_getport_async,
 	.set_port		= xs_set_port,
-	.connect		= xs_tcp_connect,
+	.connect		= xs_connect,
 	.buf_alloc		= rpc_malloc,
 	.buf_free		= rpc_free,
 	.send_request		= xs_tcp_send_request,
-- 
cgit v1.2.2


From a8ce4a8f37fef0a09a1e920c2e09f67a80426c7e Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Fri, 16 Apr 2010 16:42:12 -0400
Subject: SUNRPC: Fail over more quickly on connect errors

We should not allow soft tasks to wait for longer than the major timeout
period when waiting for a reconnect to occur.

Remove the field xprt->connect_timeout since it has been obsoleted by
xprt->reestablish_timeout.

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 net/sunrpc/xprt.c               |  2 +-
 net/sunrpc/xprtrdma/transport.c |  1 -
 net/sunrpc/xprtsock.c           | 17 -----------------
 3 files changed, 1 insertion(+), 19 deletions(-)

(limited to 'net')

diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c
index c71d835165e2..6c9997ef386a 100644
--- a/net/sunrpc/xprt.c
+++ b/net/sunrpc/xprt.c
@@ -710,7 +710,7 @@ void xprt_connect(struct rpc_task *task)
 		if (task->tk_rqstp)
 			task->tk_rqstp->rq_bytes_sent = 0;
 
-		task->tk_timeout = xprt->connect_timeout;
+		task->tk_timeout = task->tk_rqstp->rq_timeout;
 		rpc_sleep_on(&xprt->pending, task, xprt_connect_status);
 
 		if (test_bit(XPRT_CLOSING, &xprt->state))
diff --git a/net/sunrpc/xprtrdma/transport.c b/net/sunrpc/xprtrdma/transport.c
index 0607b9aaae91..3f3b38c5642f 100644
--- a/net/sunrpc/xprtrdma/transport.c
+++ b/net/sunrpc/xprtrdma/transport.c
@@ -305,7 +305,6 @@ xprt_setup_rdma(struct xprt_create *args)
 	/* 60 second timeout, no retries */
 	xprt->timeout = &xprt_rdma_default_timeout;
 	xprt->bind_timeout = (60U * HZ);
-	xprt->connect_timeout = (60U * HZ);
 	xprt->reestablish_timeout = (5U * HZ);
 	xprt->idle_timeout = (5U * 60 * HZ);
 
diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
index d138afa3bb35..790a8f31b0bb 100644
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c
@@ -137,20 +137,6 @@ static ctl_table sunrpc_table[] = {
 
 #endif
 
-/*
- * Time out for an RPC UDP socket connect.  UDP socket connects are
- * synchronous, but we set a timeout anyway in case of resource
- * exhaustion on the local host.
- */
-#define XS_UDP_CONN_TO		(5U * HZ)
-
-/*
- * Wait duration for an RPC TCP connection to be established.  Solaris
- * NFS over TCP uses 60 seconds, for example, which is in line with how
- * long a server takes to reboot.
- */
-#define XS_TCP_CONN_TO		(60U * HZ)
-
 /*
  * Wait duration for a reply from the RPC portmapper.
  */
@@ -2324,7 +2310,6 @@ static struct rpc_xprt *xs_setup_udp(struct xprt_create *args)
 	xprt->max_payload = (1U << 16) - (MAX_HEADER << 3);
 
 	xprt->bind_timeout = XS_BIND_TO;
-	xprt->connect_timeout = XS_UDP_CONN_TO;
 	xprt->reestablish_timeout = XS_UDP_REEST_TO;
 	xprt->idle_timeout = XS_IDLE_DISC_TO;
 
@@ -2399,7 +2384,6 @@ static struct rpc_xprt *xs_setup_tcp(struct xprt_create *args)
 	xprt->max_payload = RPC_MAX_FRAGMENT_SIZE;
 
 	xprt->bind_timeout = XS_BIND_TO;
-	xprt->connect_timeout = XS_TCP_CONN_TO;
 	xprt->reestablish_timeout = XS_TCP_INIT_REEST_TO;
 	xprt->idle_timeout = XS_IDLE_DISC_TO;
 
@@ -2475,7 +2459,6 @@ static struct rpc_xprt *xs_setup_bc_tcp(struct xprt_create *args)
 	/* backchannel */
 	xprt_set_bound(xprt);
 	xprt->bind_timeout = 0;
-	xprt->connect_timeout = 0;
 	xprt->reestablish_timeout = 0;
 	xprt->idle_timeout = 0;
 
-- 
cgit v1.2.2


From bbc72cea58f671665b6362be0d4e391813ac0eee Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Fri, 7 May 2010 13:34:27 -0400
Subject: SUNRPC: RPC metrics and RTT estimator should use same RTT value

Compute an RPC request's RTT once, and use that value both for reporting
RPC metrics, and for adjusting the RTT context used by the RPC client's RTT
estimator algorithm.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 net/sunrpc/xprt.c     | 13 ++++---------
 net/sunrpc/xprtsock.c |  1 -
 2 files changed, 4 insertions(+), 10 deletions(-)

(limited to 'net')

diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c
index 6c9997ef386a..698c62712294 100644
--- a/net/sunrpc/xprt.c
+++ b/net/sunrpc/xprt.c
@@ -774,12 +774,7 @@ struct rpc_rqst *xprt_lookup_rqst(struct rpc_xprt *xprt, __be32 xid)
 }
 EXPORT_SYMBOL_GPL(xprt_lookup_rqst);
 
-/**
- * xprt_update_rtt - update an RPC client's RTT state after receiving a reply
- * @task: RPC request that recently completed
- *
- */
-void xprt_update_rtt(struct rpc_task *task)
+static void xprt_update_rtt(struct rpc_task *task)
 {
 	struct rpc_rqst *req = task->tk_rqstp;
 	struct rpc_rtt *rtt = task->tk_client->cl_rtt;
@@ -787,12 +782,10 @@ void xprt_update_rtt(struct rpc_task *task)
 
 	if (timer) {
 		if (req->rq_ntrans == 1)
-			rpc_update_rtt(rtt, timer,
-					(long)jiffies - req->rq_xtime);
+			rpc_update_rtt(rtt, timer, task->tk_rtt);
 		rpc_set_timeo(rtt, timer, req->rq_ntrans - 1);
 	}
 }
-EXPORT_SYMBOL_GPL(xprt_update_rtt);
 
 /**
  * xprt_complete_rqst - called when reply processing is complete
@@ -811,6 +804,8 @@ void xprt_complete_rqst(struct rpc_task *task, int copied)
 
 	xprt->stat.recvs++;
 	task->tk_rtt = (long)jiffies - req->rq_xtime;
+	if (xprt->ops->timer != NULL)
+		xprt_update_rtt(task);
 
 	list_del_init(&req->rq_list);
 	req->rq_private_buf.len = copied;
diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
index 790a8f31b0bb..3d1dcdf2aef1 100644
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c
@@ -844,7 +844,6 @@ static void xs_udp_data_ready(struct sock *sk, int len)
 	dst_confirm(skb_dst(skb));
 
 	xprt_adjust_cwnd(task, copied);
-	xprt_update_rtt(task);
 	xprt_complete_rqst(task, copied);
 
  out_unlock:
-- 
cgit v1.2.2


From ff8399709e41bf72b4cb145612a0f9a9f7283c83 Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Fri, 7 May 2010 13:34:47 -0400
Subject: SUNRPC: Replace jiffies-based metrics with ktime-based metrics

Currently RPC performance metrics that tabulate elapsed time use
jiffies time values.  This is problematic on systems that use slow
jiffies (for instance 100HZ systems built for paravirtualized
environments).  It is also a problem for computing precise latency
statistics for advanced network transports, such as InfiniBand,
that can have round-trip latencies significanly faster than a single
clock tick.

For the RPC client, adopt the high resolution time stamp mechanism
already used by the network layer and blktrace: ktime.

We use ktime format time stamps for all internal computations, and
convert to milliseconds for presentation.  As a result, we need only
addition operations in the performance critical paths; multiply/divide
is required only for presentation.

We could report RTT metrics in microseconds.  In fact the mountstats
format is versioned to accomodate exactly this kind of interface
improvement.

For now, however, we'll stay with millisecond precision for
presentation to maintain backwards compatibility with the handful of
currently deployed user space tools.  At a later point, we'll move to
an API such as BDI_STATS where a finer timestamp precision can be
reported.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 net/sunrpc/sched.c |  2 +-
 net/sunrpc/stats.c | 27 +++++++++------------------
 net/sunrpc/xprt.c  |  8 +++++---
 3 files changed, 15 insertions(+), 22 deletions(-)

(limited to 'net')

diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c
index c8979ce5d88a..aa7b07ef5d55 100644
--- a/net/sunrpc/sched.c
+++ b/net/sunrpc/sched.c
@@ -834,7 +834,7 @@ static void rpc_init_task(struct rpc_task *task, const struct rpc_task_setup *ta
 	}
 
 	/* starting timestamp */
-	task->tk_start = jiffies;
+	task->tk_start = ktime_get();
 
 	dprintk("RPC:       new task initialized, procpid %u\n",
 				task_pid_nr(current));
diff --git a/net/sunrpc/stats.c b/net/sunrpc/stats.c
index 5785d2037f45..aacd95f0dce5 100644
--- a/net/sunrpc/stats.c
+++ b/net/sunrpc/stats.c
@@ -144,7 +144,7 @@ void rpc_count_iostats(struct rpc_task *task)
 	struct rpc_rqst *req = task->tk_rqstp;
 	struct rpc_iostats *stats;
 	struct rpc_iostats *op_metrics;
-	long rtt, execute, queue;
+	ktime_t delta;
 
 	if (!task->tk_client || !task->tk_client->cl_metrics || !req)
 		return;
@@ -159,20 +159,13 @@ void rpc_count_iostats(struct rpc_task *task)
 	op_metrics->om_bytes_sent += task->tk_bytes_sent;
 	op_metrics->om_bytes_recv += req->rq_reply_bytes_recvd;
 
-	queue = (long)req->rq_xtime - task->tk_start;
-	if (queue < 0)
-		queue = -queue;
-	op_metrics->om_queue += queue;
+	delta = ktime_sub(req->rq_xtime, task->tk_start);
+	op_metrics->om_queue = ktime_add(op_metrics->om_queue, delta);
 
-	rtt = task->tk_rtt;
-	if (rtt < 0)
-		rtt = -rtt;
-	op_metrics->om_rtt += rtt;
+	op_metrics->om_rtt = ktime_add(op_metrics->om_rtt, task->tk_rtt);
 
-	execute = (long)jiffies - task->tk_start;
-	if (execute < 0)
-		execute = -execute;
-	op_metrics->om_execute += execute;
+	delta = ktime_sub(ktime_get(), task->tk_start);
+	op_metrics->om_execute = ktime_add(op_metrics->om_execute, delta);
 }
 
 static void _print_name(struct seq_file *seq, unsigned int op,
@@ -186,8 +179,6 @@ static void _print_name(struct seq_file *seq, unsigned int op,
 		seq_printf(seq, "\t%12u: ", op);
 }
 
-#define MILLISECS_PER_JIFFY	(1000 / HZ)
-
 void rpc_print_iostats(struct seq_file *seq, struct rpc_clnt *clnt)
 {
 	struct rpc_iostats *stats = clnt->cl_metrics;
@@ -214,9 +205,9 @@ void rpc_print_iostats(struct seq_file *seq, struct rpc_clnt *clnt)
 				metrics->om_timeouts,
 				metrics->om_bytes_sent,
 				metrics->om_bytes_recv,
-				metrics->om_queue * MILLISECS_PER_JIFFY,
-				metrics->om_rtt * MILLISECS_PER_JIFFY,
-				metrics->om_execute * MILLISECS_PER_JIFFY);
+				ktime_to_ms(metrics->om_queue),
+				ktime_to_ms(metrics->om_rtt),
+				ktime_to_ms(metrics->om_execute));
 	}
 }
 EXPORT_SYMBOL_GPL(rpc_print_iostats);
diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c
index 698c62712294..8986b1b82862 100644
--- a/net/sunrpc/xprt.c
+++ b/net/sunrpc/xprt.c
@@ -43,6 +43,7 @@
 #include <linux/interrupt.h>
 #include <linux/workqueue.h>
 #include <linux/net.h>
+#include <linux/ktime.h>
 
 #include <linux/sunrpc/clnt.h>
 #include <linux/sunrpc/metrics.h>
@@ -779,10 +780,11 @@ static void xprt_update_rtt(struct rpc_task *task)
 	struct rpc_rqst *req = task->tk_rqstp;
 	struct rpc_rtt *rtt = task->tk_client->cl_rtt;
 	unsigned timer = task->tk_msg.rpc_proc->p_timer;
+	long m = usecs_to_jiffies(ktime_to_us(task->tk_rtt));
 
 	if (timer) {
 		if (req->rq_ntrans == 1)
-			rpc_update_rtt(rtt, timer, task->tk_rtt);
+			rpc_update_rtt(rtt, timer, m);
 		rpc_set_timeo(rtt, timer, req->rq_ntrans - 1);
 	}
 }
@@ -803,7 +805,7 @@ void xprt_complete_rqst(struct rpc_task *task, int copied)
 			task->tk_pid, ntohl(req->rq_xid), copied);
 
 	xprt->stat.recvs++;
-	task->tk_rtt = (long)jiffies - req->rq_xtime;
+	task->tk_rtt = ktime_sub(ktime_get(), req->rq_xtime);
 	if (xprt->ops->timer != NULL)
 		xprt_update_rtt(task);
 
@@ -904,7 +906,7 @@ void xprt_transmit(struct rpc_task *task)
 		return;
 
 	req->rq_connect_cookie = xprt->connect_cookie;
-	req->rq_xtime = jiffies;
+	req->rq_xtime = ktime_get();
 	status = xprt->ops->send_request(task);
 	if (status != 0) {
 		task->tk_status = status;
-- 
cgit v1.2.2


From 712a4338669d7d57f952244abb608e6ac07e39da Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Wed, 12 May 2010 17:50:23 -0400
Subject: SUNRPC: Fix xs_setup_bc_tcp()

It is a BUG for anybody to call this function without setting
args->bc_xprt. Trying to return an error value is just wrong, since the
user cannot fix this: it is a programming error, not a user error.

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 net/sunrpc/xprtsock.c | 3 ---
 1 file changed, 3 deletions(-)

(limited to 'net')

diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
index 3d1dcdf2aef1..beefa7a3a90e 100644
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c
@@ -2442,9 +2442,6 @@ static struct rpc_xprt *xs_setup_bc_tcp(struct xprt_create *args)
 	struct sock_xprt *transport;
 	struct svc_sock *bc_sock;
 
-	if (!args->bc_xprt)
-		ERR_PTR(-EINVAL);
-
 	xprt = xs_setup_xprt(args, xprt_tcp_slot_table_entries);
 	if (IS_ERR(xprt))
 		return xprt;
-- 
cgit v1.2.2


From 1f4c86c0be9064ab4eebd9e67c84606c1cfeec4b Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Thu, 13 May 2010 12:51:02 -0400
Subject: NFS: Don't use GFP_KERNEL in rpcsec_gss downcalls

Again, we can deadlock if the memory reclaim triggers a writeback that
requires a rpcsec_gss credential lookup.

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 net/sunrpc/auth_gss/auth_gss.c        |  2 +-
 net/sunrpc/auth_gss/gss_krb5_keys.c   |  9 +++++----
 net/sunrpc/auth_gss/gss_krb5_mech.c   | 34 ++++++++++++++++++----------------
 net/sunrpc/auth_gss/gss_mech_switch.c |  7 ++++---
 net/sunrpc/auth_gss/gss_spkm3_mech.c  |  5 +++--
 net/sunrpc/auth_gss/svcauth_gss.c     |  2 +-
 6 files changed, 32 insertions(+), 27 deletions(-)

(limited to 'net')

diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c
index 6654c8534d32..48a7939dc9e2 100644
--- a/net/sunrpc/auth_gss/auth_gss.c
+++ b/net/sunrpc/auth_gss/auth_gss.c
@@ -229,7 +229,7 @@ gss_fill_context(const void *p, const void *end, struct gss_cl_ctx *ctx, struct
 		p = ERR_PTR(-EFAULT);
 		goto err;
 	}
-	ret = gss_import_sec_context(p, seclen, gm, &ctx->gc_gss_ctx);
+	ret = gss_import_sec_context(p, seclen, gm, &ctx->gc_gss_ctx, GFP_NOFS);
 	if (ret < 0) {
 		p = ERR_PTR(ret);
 		goto err;
diff --git a/net/sunrpc/auth_gss/gss_krb5_keys.c b/net/sunrpc/auth_gss/gss_krb5_keys.c
index 33b87f04b30b..76e42e6be755 100644
--- a/net/sunrpc/auth_gss/gss_krb5_keys.c
+++ b/net/sunrpc/auth_gss/gss_krb5_keys.c
@@ -150,7 +150,8 @@ static void krb5_nfold(u32 inbits, const u8 *in,
 u32 krb5_derive_key(const struct gss_krb5_enctype *gk5e,
 		    const struct xdr_netobj *inkey,
 		    struct xdr_netobj *outkey,
-		    const struct xdr_netobj *in_constant)
+		    const struct xdr_netobj *in_constant,
+		    gfp_t gfp_mask)
 {
 	size_t blocksize, keybytes, keylength, n;
 	unsigned char *inblockdata, *outblockdata, *rawkey;
@@ -175,15 +176,15 @@ u32 krb5_derive_key(const struct gss_krb5_enctype *gk5e,
 	/* allocate and set up buffers */
 
 	ret = ENOMEM;
-	inblockdata = kmalloc(blocksize, GFP_KERNEL);
+	inblockdata = kmalloc(blocksize, gfp_mask);
 	if (inblockdata == NULL)
 		goto err_free_cipher;
 
-	outblockdata = kmalloc(blocksize, GFP_KERNEL);
+	outblockdata = kmalloc(blocksize, gfp_mask);
 	if (outblockdata == NULL)
 		goto err_free_in;
 
-	rawkey = kmalloc(keybytes, GFP_KERNEL);
+	rawkey = kmalloc(keybytes, gfp_mask);
 	if (rawkey == NULL)
 		goto err_free_out;
 
diff --git a/net/sunrpc/auth_gss/gss_krb5_mech.c b/net/sunrpc/auth_gss/gss_krb5_mech.c
index 7c249a3f9a03..032644610524 100644
--- a/net/sunrpc/auth_gss/gss_krb5_mech.c
+++ b/net/sunrpc/auth_gss/gss_krb5_mech.c
@@ -369,7 +369,7 @@ set_cdata(u8 cdata[GSS_KRB5_K5CLENGTH], u32 usage, u8 seed)
 }
 
 static int
-context_derive_keys_des3(struct krb5_ctx *ctx)
+context_derive_keys_des3(struct krb5_ctx *ctx, gfp_t gfp_mask)
 {
 	struct xdr_netobj c, keyin, keyout;
 	u8 cdata[GSS_KRB5_K5CLENGTH];
@@ -396,7 +396,7 @@ context_derive_keys_des3(struct krb5_ctx *ctx)
 	/* derive cksum */
 	set_cdata(cdata, KG_USAGE_SIGN, KEY_USAGE_SEED_CHECKSUM);
 	keyout.data = ctx->cksum;
-	err = krb5_derive_key(ctx->gk5e, &keyin, &keyout, &c);
+	err = krb5_derive_key(ctx->gk5e, &keyin, &keyout, &c, gfp_mask);
 	if (err) {
 		dprintk("%s: Error %d deriving cksum key\n",
 			__func__, err);
@@ -487,7 +487,7 @@ out_err:
 }
 
 static int
-context_derive_keys_new(struct krb5_ctx *ctx)
+context_derive_keys_new(struct krb5_ctx *ctx, gfp_t gfp_mask)
 {
 	struct xdr_netobj c, keyin, keyout;
 	u8 cdata[GSS_KRB5_K5CLENGTH];
@@ -503,7 +503,7 @@ context_derive_keys_new(struct krb5_ctx *ctx)
 	/* initiator seal encryption */
 	set_cdata(cdata, KG_USAGE_INITIATOR_SEAL, KEY_USAGE_SEED_ENCRYPTION);
 	keyout.data = ctx->initiator_seal;
-	err = krb5_derive_key(ctx->gk5e, &keyin, &keyout, &c);
+	err = krb5_derive_key(ctx->gk5e, &keyin, &keyout, &c, gfp_mask);
 	if (err) {
 		dprintk("%s: Error %d deriving initiator_seal key\n",
 			__func__, err);
@@ -518,7 +518,7 @@ context_derive_keys_new(struct krb5_ctx *ctx)
 	/* acceptor seal encryption */
 	set_cdata(cdata, KG_USAGE_ACCEPTOR_SEAL, KEY_USAGE_SEED_ENCRYPTION);
 	keyout.data = ctx->acceptor_seal;
-	err = krb5_derive_key(ctx->gk5e, &keyin, &keyout, &c);
+	err = krb5_derive_key(ctx->gk5e, &keyin, &keyout, &c, gfp_mask);
 	if (err) {
 		dprintk("%s: Error %d deriving acceptor_seal key\n",
 			__func__, err);
@@ -533,7 +533,7 @@ context_derive_keys_new(struct krb5_ctx *ctx)
 	/* initiator sign checksum */
 	set_cdata(cdata, KG_USAGE_INITIATOR_SIGN, KEY_USAGE_SEED_CHECKSUM);
 	keyout.data = ctx->initiator_sign;
-	err = krb5_derive_key(ctx->gk5e, &keyin, &keyout, &c);
+	err = krb5_derive_key(ctx->gk5e, &keyin, &keyout, &c, gfp_mask);
 	if (err) {
 		dprintk("%s: Error %d deriving initiator_sign key\n",
 			__func__, err);
@@ -543,7 +543,7 @@ context_derive_keys_new(struct krb5_ctx *ctx)
 	/* acceptor sign checksum */
 	set_cdata(cdata, KG_USAGE_ACCEPTOR_SIGN, KEY_USAGE_SEED_CHECKSUM);
 	keyout.data = ctx->acceptor_sign;
-	err = krb5_derive_key(ctx->gk5e, &keyin, &keyout, &c);
+	err = krb5_derive_key(ctx->gk5e, &keyin, &keyout, &c, gfp_mask);
 	if (err) {
 		dprintk("%s: Error %d deriving acceptor_sign key\n",
 			__func__, err);
@@ -553,7 +553,7 @@ context_derive_keys_new(struct krb5_ctx *ctx)
 	/* initiator seal integrity */
 	set_cdata(cdata, KG_USAGE_INITIATOR_SEAL, KEY_USAGE_SEED_INTEGRITY);
 	keyout.data = ctx->initiator_integ;
-	err = krb5_derive_key(ctx->gk5e, &keyin, &keyout, &c);
+	err = krb5_derive_key(ctx->gk5e, &keyin, &keyout, &c, gfp_mask);
 	if (err) {
 		dprintk("%s: Error %d deriving initiator_integ key\n",
 			__func__, err);
@@ -563,7 +563,7 @@ context_derive_keys_new(struct krb5_ctx *ctx)
 	/* acceptor seal integrity */
 	set_cdata(cdata, KG_USAGE_ACCEPTOR_SEAL, KEY_USAGE_SEED_INTEGRITY);
 	keyout.data = ctx->acceptor_integ;
-	err = krb5_derive_key(ctx->gk5e, &keyin, &keyout, &c);
+	err = krb5_derive_key(ctx->gk5e, &keyin, &keyout, &c, gfp_mask);
 	if (err) {
 		dprintk("%s: Error %d deriving acceptor_integ key\n",
 			__func__, err);
@@ -598,7 +598,8 @@ out_err:
 }
 
 static int
-gss_import_v2_context(const void *p, const void *end, struct krb5_ctx *ctx)
+gss_import_v2_context(const void *p, const void *end, struct krb5_ctx *ctx,
+		gfp_t gfp_mask)
 {
 	int keylen;
 
@@ -645,7 +646,7 @@ gss_import_v2_context(const void *p, const void *end, struct krb5_ctx *ctx)
 	}
 
 	ctx->mech_used.data = kmemdup(gss_kerberos_mech.gm_oid.data,
-				      gss_kerberos_mech.gm_oid.len, GFP_KERNEL);
+				      gss_kerberos_mech.gm_oid.len, gfp_mask);
 	if (unlikely(ctx->mech_used.data == NULL)) {
 		p = ERR_PTR(-ENOMEM);
 		goto out_err;
@@ -654,12 +655,12 @@ gss_import_v2_context(const void *p, const void *end, struct krb5_ctx *ctx)
 
 	switch (ctx->enctype) {
 	case ENCTYPE_DES3_CBC_RAW:
-		return context_derive_keys_des3(ctx);
+		return context_derive_keys_des3(ctx, gfp_mask);
 	case ENCTYPE_ARCFOUR_HMAC:
 		return context_derive_keys_rc4(ctx);
 	case ENCTYPE_AES128_CTS_HMAC_SHA1_96:
 	case ENCTYPE_AES256_CTS_HMAC_SHA1_96:
-		return context_derive_keys_new(ctx);
+		return context_derive_keys_new(ctx, gfp_mask);
 	default:
 		return -EINVAL;
 	}
@@ -670,20 +671,21 @@ out_err:
 
 static int
 gss_import_sec_context_kerberos(const void *p, size_t len,
-				struct gss_ctx *ctx_id)
+				struct gss_ctx *ctx_id,
+				gfp_t gfp_mask)
 {
 	const void *end = (const void *)((const char *)p + len);
 	struct  krb5_ctx *ctx;
 	int ret;
 
-	ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
+	ctx = kzalloc(sizeof(*ctx), gfp_mask);
 	if (ctx == NULL)
 		return -ENOMEM;
 
 	if (len == 85)
 		ret = gss_import_v1_context(p, end, ctx);
 	else
-		ret = gss_import_v2_context(p, end, ctx);
+		ret = gss_import_v2_context(p, end, ctx, gfp_mask);
 
 	if (ret == 0)
 		ctx_id->internal_ctx_id = ctx;
diff --git a/net/sunrpc/auth_gss/gss_mech_switch.c b/net/sunrpc/auth_gss/gss_mech_switch.c
index 28a84ef41d13..2689de39dc78 100644
--- a/net/sunrpc/auth_gss/gss_mech_switch.c
+++ b/net/sunrpc/auth_gss/gss_mech_switch.c
@@ -249,14 +249,15 @@ EXPORT_SYMBOL_GPL(gss_mech_put);
 int
 gss_import_sec_context(const void *input_token, size_t bufsize,
 		       struct gss_api_mech	*mech,
-		       struct gss_ctx		**ctx_id)
+		       struct gss_ctx		**ctx_id,
+		       gfp_t gfp_mask)
 {
-	if (!(*ctx_id = kzalloc(sizeof(**ctx_id), GFP_KERNEL)))
+	if (!(*ctx_id = kzalloc(sizeof(**ctx_id), gfp_mask)))
 		return -ENOMEM;
 	(*ctx_id)->mech_type = gss_mech_get(mech);
 
 	return mech->gm_ops
-		->gss_import_sec_context(input_token, bufsize, *ctx_id);
+		->gss_import_sec_context(input_token, bufsize, *ctx_id, gfp_mask);
 }
 
 /* gss_get_mic: compute a mic over message and return mic_token. */
diff --git a/net/sunrpc/auth_gss/gss_spkm3_mech.c b/net/sunrpc/auth_gss/gss_spkm3_mech.c
index 035e1dd6af1b..dc3f1f5ed865 100644
--- a/net/sunrpc/auth_gss/gss_spkm3_mech.c
+++ b/net/sunrpc/auth_gss/gss_spkm3_mech.c
@@ -84,13 +84,14 @@ simple_get_netobj(const void *p, const void *end, struct xdr_netobj *res)
 
 static int
 gss_import_sec_context_spkm3(const void *p, size_t len,
-				struct gss_ctx *ctx_id)
+				struct gss_ctx *ctx_id,
+				gfp_t gfp_mask)
 {
 	const void *end = (const void *)((const char *)p + len);
 	struct	spkm3_ctx *ctx;
 	int	version;
 
-	if (!(ctx = kzalloc(sizeof(*ctx), GFP_NOFS)))
+	if (!(ctx = kzalloc(sizeof(*ctx), gfp_mask)))
 		goto out_err;
 
 	p = simple_get_bytes(p, end, &version, sizeof(version));
diff --git a/net/sunrpc/auth_gss/svcauth_gss.c b/net/sunrpc/auth_gss/svcauth_gss.c
index 1d9ac4ac818a..cc385b3a59c2 100644
--- a/net/sunrpc/auth_gss/svcauth_gss.c
+++ b/net/sunrpc/auth_gss/svcauth_gss.c
@@ -494,7 +494,7 @@ static int rsc_parse(struct cache_detail *cd,
 		len = qword_get(&mesg, buf, mlen);
 		if (len < 0)
 			goto out;
-		status = gss_import_sec_context(buf, len, gm, &rsci.mechctx);
+		status = gss_import_sec_context(buf, len, gm, &rsci.mechctx, GFP_KERNEL);
 		if (status)
 			goto out;
 
-- 
cgit v1.2.2


From d300a41ef1c39cc5e6b90fd8834ea7ab16b5c48f Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Thu, 13 May 2010 12:51:03 -0400
Subject: SUNRPC: Dont run rpcauth_cache_shrinker() when gfp_mask is GFP_NOFS

Under some circumstances, put_rpccred() can end up allocating memory, so
check the gfp_mask to prevent deadlocks.

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 net/sunrpc/auth.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'net')

diff --git a/net/sunrpc/auth.c b/net/sunrpc/auth.c
index 95afe79dd9d7..0667a36aee78 100644
--- a/net/sunrpc/auth.c
+++ b/net/sunrpc/auth.c
@@ -270,6 +270,8 @@ rpcauth_cache_shrinker(int nr_to_scan, gfp_t gfp_mask)
 	LIST_HEAD(free);
 	int res;
 
+	if ((gfp_mask & GFP_KERNEL) != GFP_KERNEL)
+		return (nr_to_scan == 0) ? 0 : -1;
 	if (list_empty(&cred_unused))
 		return 0;
 	spin_lock(&rpc_credcache_lock);
-- 
cgit v1.2.2


From 93a05e65c090dda9cbd79d0cf57b65c4dbd8da55 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Thu, 13 May 2010 12:51:06 -0400
Subject: SUNRPC: Ensure memory shrinker doesn't waste time in
 rpcauth_prune_expired()

The 'cred_unused' list, that is traversed by rpcauth_cache_shrinker is
ordered by time. If we hit a credential that is under the 60 second garbage
collection moratorium, we should exit because we know at that point that
all successive credentials are subject to the same moratorium...

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 net/sunrpc/auth.c | 12 +++++++-----
 1 file changed, 7 insertions(+), 5 deletions(-)

(limited to 'net')

diff --git a/net/sunrpc/auth.c b/net/sunrpc/auth.c
index 0667a36aee78..c40856f589f0 100644
--- a/net/sunrpc/auth.c
+++ b/net/sunrpc/auth.c
@@ -236,10 +236,13 @@ rpcauth_prune_expired(struct list_head *free, int nr_to_scan)
 
 	list_for_each_entry_safe(cred, next, &cred_unused, cr_lru) {
 
-		/* Enforce a 60 second garbage collection moratorium */
+		/*
+		 * Enforce a 60 second garbage collection moratorium
+		 * Note that the cred_unused list must be time-ordered.
+		 */
 		if (time_in_range(cred->cr_expire, expired, jiffies) &&
 		    test_bit(RPCAUTH_CRED_HASHED, &cred->cr_flags) != 0)
-			continue;
+			return 0;
 
 		list_del_init(&cred->cr_lru);
 		number_cred_unused--;
@@ -258,7 +261,7 @@ rpcauth_prune_expired(struct list_head *free, int nr_to_scan)
 		if (nr_to_scan == 0)
 			break;
 	}
-	return nr_to_scan;
+	return (number_cred_unused / 100) * sysctl_vfs_cache_pressure;
 }
 
 /*
@@ -275,8 +278,7 @@ rpcauth_cache_shrinker(int nr_to_scan, gfp_t gfp_mask)
 	if (list_empty(&cred_unused))
 		return 0;
 	spin_lock(&rpc_credcache_lock);
-	nr_to_scan = rpcauth_prune_expired(&free, nr_to_scan);
-	res = (number_cred_unused / 100) * sysctl_vfs_cache_pressure;
+	res = rpcauth_prune_expired(&free, nr_to_scan);
 	spin_unlock(&rpc_credcache_lock);
 	rpcauth_destroy_credlist(&free);
 	return res;
-- 
cgit v1.2.2


From 20673406534176ead9b984a84b662928110f77b1 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Thu, 13 May 2010 12:51:06 -0400
Subject: SUNRPC: Ensure rpcauth_prune_expired() respects the nr_to_scan
 parameter

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 net/sunrpc/auth.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

(limited to 'net')

diff --git a/net/sunrpc/auth.c b/net/sunrpc/auth.c
index c40856f589f0..73affb8624fa 100644
--- a/net/sunrpc/auth.c
+++ b/net/sunrpc/auth.c
@@ -236,6 +236,8 @@ rpcauth_prune_expired(struct list_head *free, int nr_to_scan)
 
 	list_for_each_entry_safe(cred, next, &cred_unused, cr_lru) {
 
+		if (nr_to_scan-- == 0)
+			break;
 		/*
 		 * Enforce a 60 second garbage collection moratorium
 		 * Note that the cred_unused list must be time-ordered.
@@ -255,11 +257,8 @@ rpcauth_prune_expired(struct list_head *free, int nr_to_scan)
 			get_rpccred(cred);
 			list_add_tail(&cred->cr_lru, free);
 			rpcauth_unhash_cred_locked(cred);
-			nr_to_scan--;
 		}
 		spin_unlock(cache_lock);
-		if (nr_to_scan == 0)
-			break;
 	}
 	return (number_cred_unused / 100) * sysctl_vfs_cache_pressure;
 }
-- 
cgit v1.2.2


From d60dbb20a74c2cfa142be0a34dac3c6547ea086c Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Thu, 13 May 2010 12:51:49 -0400
Subject: SUNRPC: Move the task->tk_bytes_sent and tk_rtt to struct rpc_rqst

It seems strange to maintain stats for bytes_sent in one structure, and
bytes received in another. Try to assemble all the RPC request-related
stats in struct rpc_rqst

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 net/sunrpc/stats.c              | 4 ++--
 net/sunrpc/xprt.c               | 4 ++--
 net/sunrpc/xprtrdma/transport.c | 2 +-
 net/sunrpc/xprtsock.c           | 4 ++--
 4 files changed, 7 insertions(+), 7 deletions(-)

(limited to 'net')

diff --git a/net/sunrpc/stats.c b/net/sunrpc/stats.c
index aacd95f0dce5..ea1046f3f9a3 100644
--- a/net/sunrpc/stats.c
+++ b/net/sunrpc/stats.c
@@ -156,13 +156,13 @@ void rpc_count_iostats(struct rpc_task *task)
 	op_metrics->om_ntrans += req->rq_ntrans;
 	op_metrics->om_timeouts += task->tk_timeouts;
 
-	op_metrics->om_bytes_sent += task->tk_bytes_sent;
+	op_metrics->om_bytes_sent += req->rq_xmit_bytes_sent;
 	op_metrics->om_bytes_recv += req->rq_reply_bytes_recvd;
 
 	delta = ktime_sub(req->rq_xtime, task->tk_start);
 	op_metrics->om_queue = ktime_add(op_metrics->om_queue, delta);
 
-	op_metrics->om_rtt = ktime_add(op_metrics->om_rtt, task->tk_rtt);
+	op_metrics->om_rtt = ktime_add(op_metrics->om_rtt, req->rq_rtt);
 
 	delta = ktime_sub(ktime_get(), task->tk_start);
 	op_metrics->om_execute = ktime_add(op_metrics->om_execute, delta);
diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c
index 8986b1b82862..65fe2e4e7cbf 100644
--- a/net/sunrpc/xprt.c
+++ b/net/sunrpc/xprt.c
@@ -780,7 +780,7 @@ static void xprt_update_rtt(struct rpc_task *task)
 	struct rpc_rqst *req = task->tk_rqstp;
 	struct rpc_rtt *rtt = task->tk_client->cl_rtt;
 	unsigned timer = task->tk_msg.rpc_proc->p_timer;
-	long m = usecs_to_jiffies(ktime_to_us(task->tk_rtt));
+	long m = usecs_to_jiffies(ktime_to_us(req->rq_rtt));
 
 	if (timer) {
 		if (req->rq_ntrans == 1)
@@ -805,7 +805,7 @@ void xprt_complete_rqst(struct rpc_task *task, int copied)
 			task->tk_pid, ntohl(req->rq_xid), copied);
 
 	xprt->stat.recvs++;
-	task->tk_rtt = ktime_sub(ktime_get(), req->rq_xtime);
+	req->rq_rtt = ktime_sub(ktime_get(), req->rq_xtime);
 	if (xprt->ops->timer != NULL)
 		xprt_update_rtt(task);
 
diff --git a/net/sunrpc/xprtrdma/transport.c b/net/sunrpc/xprtrdma/transport.c
index 3f3b38c5642f..a85e866a77f7 100644
--- a/net/sunrpc/xprtrdma/transport.c
+++ b/net/sunrpc/xprtrdma/transport.c
@@ -674,7 +674,7 @@ xprt_rdma_send_request(struct rpc_task *task)
 	if (rpcrdma_ep_post(&r_xprt->rx_ia, &r_xprt->rx_ep, req))
 		goto drop_connection;
 
-	task->tk_bytes_sent += rqst->rq_snd_buf.len;
+	rqst->rq_xmit_bytes_sent += rqst->rq_snd_buf.len;
 	rqst->rq_bytes_sent = 0;
 	return 0;
 
diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
index beefa7a3a90e..02fc7f04dd17 100644
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c
@@ -528,7 +528,7 @@ static int xs_udp_send_request(struct rpc_task *task)
 			xdr->len - req->rq_bytes_sent, status);
 
 	if (status >= 0) {
-		task->tk_bytes_sent += status;
+		req->rq_xmit_bytes_sent += status;
 		if (status >= req->rq_slen)
 			return 0;
 		/* Still some bytes left; set up for a retry later. */
@@ -624,7 +624,7 @@ static int xs_tcp_send_request(struct rpc_task *task)
 		/* If we've sent the entire packet, immediately
 		 * reset the count of bytes sent. */
 		req->rq_bytes_sent += status;
-		task->tk_bytes_sent += status;
+		req->rq_xmit_bytes_sent += status;
 		if (likely(req->rq_bytes_sent >= req->rq_slen)) {
 			req->rq_bytes_sent = 0;
 			return 0;
-- 
cgit v1.2.2


From d72b6cec8d42eb7c2a249b613abf2c2b7a6eeb47 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Thu, 13 May 2010 12:51:50 -0400
Subject: SUNRPC: Remove the 'tk_magic' debugging field

It has not triggered in almost a decade. Time to get rid of it...

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 net/sunrpc/sched.c | 11 -----------
 1 file changed, 11 deletions(-)

(limited to 'net')

diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c
index aa7b07ef5d55..4a843b883b89 100644
--- a/net/sunrpc/sched.c
+++ b/net/sunrpc/sched.c
@@ -25,7 +25,6 @@
 
 #ifdef RPC_DEBUG
 #define RPCDBG_FACILITY		RPCDBG_SCHED
-#define RPC_TASK_MAGIC_ID	0xf00baa
 #endif
 
 /*
@@ -237,7 +236,6 @@ static void rpc_task_set_debuginfo(struct rpc_task *task)
 {
 	static atomic_t rpc_pid;
 
-	task->tk_magic = RPC_TASK_MAGIC_ID;
 	task->tk_pid = atomic_inc_return(&rpc_pid);
 }
 #else
@@ -360,9 +358,6 @@ static void __rpc_do_wake_up_task(struct rpc_wait_queue *queue, struct rpc_task
 	dprintk("RPC: %5u __rpc_wake_up_task (now %lu)\n",
 			task->tk_pid, jiffies);
 
-#ifdef RPC_DEBUG
-	BUG_ON(task->tk_magic != RPC_TASK_MAGIC_ID);
-#endif
 	/* Has the task been executed yet? If not, we cannot wake it up! */
 	if (!RPC_IS_ACTIVATED(task)) {
 		printk(KERN_ERR "RPC: Inactive task (%p) being woken up!\n", task);
@@ -916,9 +911,6 @@ EXPORT_SYMBOL_GPL(rpc_put_task);
 
 static void rpc_release_task(struct rpc_task *task)
 {
-#ifdef RPC_DEBUG
-	BUG_ON(task->tk_magic != RPC_TASK_MAGIC_ID);
-#endif
 	dprintk("RPC: %5u release task\n", task->tk_pid);
 
 	if (!list_empty(&task->tk_task)) {
@@ -930,9 +922,6 @@ static void rpc_release_task(struct rpc_task *task)
 	}
 	BUG_ON (RPC_IS_QUEUED(task));
 
-#ifdef RPC_DEBUG
-	task->tk_magic = 0;
-#endif
 	/* Wake up anyone who is waiting for task completion */
 	rpc_mark_complete_task(task);
 
-- 
cgit v1.2.2


From 126e216a8730532dfb685205309275f87e3d133e Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Thu, 13 May 2010 12:55:38 -0400
Subject: SUNRPC: Don't spam gssd with upcall requests when the kerberos key
 expired

Now that the rpc.gssd daemon can explicitly tell us that the key expired,
we should cache that information to avoid spamming gssd.

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 net/sunrpc/auth_gss/auth_gss.c | 65 ++++++++++++++++++++++++++++++++++--------
 1 file changed, 53 insertions(+), 12 deletions(-)

(limited to 'net')

diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c
index 48a7939dc9e2..8da2a0e68574 100644
--- a/net/sunrpc/auth_gss/auth_gss.c
+++ b/net/sunrpc/auth_gss/auth_gss.c
@@ -57,6 +57,9 @@ static const struct rpc_authops authgss_ops;
 static const struct rpc_credops gss_credops;
 static const struct rpc_credops gss_nullops;
 
+#define GSS_RETRY_EXPIRED 5
+static unsigned int gss_expired_cred_retry_delay = GSS_RETRY_EXPIRED;
+
 #ifdef RPC_DEBUG
 # define RPCDBG_FACILITY	RPCDBG_AUTH
 #endif
@@ -349,6 +352,24 @@ gss_unhash_msg(struct gss_upcall_msg *gss_msg)
 	spin_unlock(&inode->i_lock);
 }
 
+static void
+gss_handle_downcall_result(struct gss_cred *gss_cred, struct gss_upcall_msg *gss_msg)
+{
+	switch (gss_msg->msg.errno) {
+	case 0:
+		if (gss_msg->ctx == NULL)
+			break;
+		clear_bit(RPCAUTH_CRED_NEGATIVE, &gss_cred->gc_base.cr_flags);
+		gss_cred_set_ctx(&gss_cred->gc_base, gss_msg->ctx);
+		break;
+	case -EKEYEXPIRED:
+		set_bit(RPCAUTH_CRED_NEGATIVE, &gss_cred->gc_base.cr_flags);
+	}
+	gss_cred->gc_upcall_timestamp = jiffies;
+	gss_cred->gc_upcall = NULL;
+	rpc_wake_up_status(&gss_msg->rpc_waitqueue, gss_msg->msg.errno);
+}
+
 static void
 gss_upcall_callback(struct rpc_task *task)
 {
@@ -358,13 +379,9 @@ gss_upcall_callback(struct rpc_task *task)
 	struct inode *inode = &gss_msg->inode->vfs_inode;
 
 	spin_lock(&inode->i_lock);
-	if (gss_msg->ctx)
-		gss_cred_set_ctx(task->tk_msg.rpc_cred, gss_msg->ctx);
-	else
-		task->tk_status = gss_msg->msg.errno;
-	gss_cred->gc_upcall = NULL;
-	rpc_wake_up_status(&gss_msg->rpc_waitqueue, gss_msg->msg.errno);
+	gss_handle_downcall_result(gss_cred, gss_msg);
 	spin_unlock(&inode->i_lock);
+	task->tk_status = gss_msg->msg.errno;
 	gss_release_msg(gss_msg);
 }
 
@@ -513,18 +530,16 @@ gss_refresh_upcall(struct rpc_task *task)
 	spin_lock(&inode->i_lock);
 	if (gss_cred->gc_upcall != NULL)
 		rpc_sleep_on(&gss_cred->gc_upcall->rpc_waitqueue, task, NULL);
-	else if (gss_msg->ctx != NULL) {
-		gss_cred_set_ctx(task->tk_msg.rpc_cred, gss_msg->ctx);
-		gss_cred->gc_upcall = NULL;
-		rpc_wake_up_status(&gss_msg->rpc_waitqueue, gss_msg->msg.errno);
-	} else if (gss_msg->msg.errno >= 0) {
+	else if (gss_msg->ctx == NULL && gss_msg->msg.errno >= 0) {
 		task->tk_timeout = 0;
 		gss_cred->gc_upcall = gss_msg;
 		/* gss_upcall_callback will release the reference to gss_upcall_msg */
 		atomic_inc(&gss_msg->count);
 		rpc_sleep_on(&gss_msg->rpc_waitqueue, task, gss_upcall_callback);
-	} else
+	} else {
+		gss_handle_downcall_result(gss_cred, gss_msg);
 		err = gss_msg->msg.errno;
+	}
 	spin_unlock(&inode->i_lock);
 	gss_release_msg(gss_msg);
 out:
@@ -1123,6 +1138,23 @@ static int gss_renew_cred(struct rpc_task *task)
 	return 0;
 }
 
+static int gss_cred_is_negative_entry(struct rpc_cred *cred)
+{
+	if (test_bit(RPCAUTH_CRED_NEGATIVE, &cred->cr_flags)) {
+		unsigned long now = jiffies;
+		unsigned long begin, expire;
+		struct gss_cred *gss_cred; 
+
+		gss_cred = container_of(cred, struct gss_cred, gc_base);
+		begin = gss_cred->gc_upcall_timestamp;
+		expire = begin + gss_expired_cred_retry_delay * HZ;
+
+		if (time_in_range_open(now, begin, expire))
+			return 1;
+	}
+	return 0;
+}
+
 /*
 * Refresh credentials. XXX - finish
 */
@@ -1132,6 +1164,9 @@ gss_refresh(struct rpc_task *task)
 	struct rpc_cred *cred = task->tk_msg.rpc_cred;
 	int ret = 0;
 
+	if (gss_cred_is_negative_entry(cred))
+		return -EKEYEXPIRED;
+
 	if (!test_bit(RPCAUTH_CRED_NEW, &cred->cr_flags) &&
 			!test_bit(RPCAUTH_CRED_UPTODATE, &cred->cr_flags)) {
 		ret = gss_renew_cred(task);
@@ -1585,5 +1620,11 @@ static void __exit exit_rpcsec_gss(void)
 }
 
 MODULE_LICENSE("GPL");
+module_param_named(expired_cred_retry_delay,
+		   gss_expired_cred_retry_delay,
+		   uint, 0644);
+MODULE_PARM_DESC(expired_cred_retry_delay, "Timeout (in seconds) until "
+		"the RPC engine retries an expired credential");
+
 module_init(init_rpcsec_gss)
 module_exit(exit_rpcsec_gss)
-- 
cgit v1.2.2


From a1d7c1b4b8dfbc5ecadcff9284d64bb6ad4c0196 Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Fri, 14 May 2010 21:18:17 +0200
Subject: netfilter: nf_ct_sip: handle non-linear skbs

Handle non-linear skbs by linearizing them instead of silently failing.
Long term the helper should be fixed to either work with non-linear skbs
directly by using the string search API or work on a copy of the data.

Based on patch by Jason Gunthorpe <jgunthorpe@obsidianresearch.com>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 net/netfilter/nf_conntrack_sip.c | 12 ++++--------
 1 file changed, 4 insertions(+), 8 deletions(-)

(limited to 'net')

diff --git a/net/netfilter/nf_conntrack_sip.c b/net/netfilter/nf_conntrack_sip.c
index b20f4275893c..53d892210a04 100644
--- a/net/netfilter/nf_conntrack_sip.c
+++ b/net/netfilter/nf_conntrack_sip.c
@@ -1393,10 +1393,8 @@ static int sip_help_tcp(struct sk_buff *skb, unsigned int protoff,
 
 	nf_ct_refresh(ct, skb, sip_timeout * HZ);
 
-	if (skb_is_nonlinear(skb)) {
-		pr_debug("Copy of skbuff not supported yet.\n");
-		return NF_ACCEPT;
-	}
+	if (unlikely(skb_linearize(skb)))
+		return NF_DROP;
 
 	dptr = skb->data + dataoff;
 	datalen = skb->len - dataoff;
@@ -1455,10 +1453,8 @@ static int sip_help_udp(struct sk_buff *skb, unsigned int protoff,
 
 	nf_ct_refresh(ct, skb, sip_timeout * HZ);
 
-	if (skb_is_nonlinear(skb)) {
-		pr_debug("Copy of skbuff not supported yet.\n");
-		return NF_ACCEPT;
-	}
+	if (unlikely(skb_linearize(skb)))
+		return NF_DROP;
 
 	dptr = skb->data + dataoff;
 	datalen = skb->len - dataoff;
-- 
cgit v1.2.2


From 83827f6a891e20de7468b1181f2ae8a3cc72587b Mon Sep 17 00:00:00 2001
From: Randy Dunlap <randy.dunlap@oracle.com>
Date: Fri, 14 May 2010 13:52:30 -0700
Subject: netfilter: xt_TEE depends on NF_CONNTRACK

Fix xt_TEE build for the case of NF_CONNTRACK=m and
NETFILTER_XT_TARGET_TEE=y:

xt_TEE.c:(.text+0x6df5c): undefined reference to `nf_conntrack_untracked'
4x

Built with all 4 m/y combinations.

Signed-off-by: Randy Dunlap <randy.dunlap@oracle.com>
Acked-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/netfilter/Kconfig | 1 +
 1 file changed, 1 insertion(+)

(limited to 'net')

diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig
index e223f47b8bae..8593a77cfea9 100644
--- a/net/netfilter/Kconfig
+++ b/net/netfilter/Kconfig
@@ -506,6 +506,7 @@ config NETFILTER_XT_TARGET_TEE
 	tristate '"TEE" - packet cloning to alternate destiantion'
 	depends on NETFILTER_ADVANCED
 	depends on (IPV6 || IPV6=n)
+	depends on !NF_CONNTRACK || NF_CONNTRACK
 	---help---
 	This option adds a "TEE" target with which a packet can be cloned and
 	this clone be rerouted to another nexthop.
-- 
cgit v1.2.2


From cfb478da70f2213520866c84cb0aea0d7c0e1148 Mon Sep 17 00:00:00 2001
From: stephen hemminger <shemminger@vyatta.com>
Date: Mon, 10 May 2010 09:31:08 +0000
Subject: bridge: netpoll cleanup

Move code around so that the ifdef for NETPOLL_CONTROLLER don't have to
show up in main code path. The control functions should be in helpers
that are only compiled if needed.

Signed-off-by: Stephen Hemminger <shemminger@vyatta.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/bridge/br_device.c  | 30 +++++++++++++++++++++++++-----
 net/bridge/br_if.c      | 23 ++---------------------
 net/bridge/br_private.h | 14 ++++++++++++--
 3 files changed, 39 insertions(+), 28 deletions(-)

(limited to 'net')

diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c
index f15f9c4a0dd2..22b97ba7dc52 100644
--- a/net/bridge/br_device.c
+++ b/net/bridge/br_device.c
@@ -199,7 +199,7 @@ static int br_set_tx_csum(struct net_device *dev, u32 data)
 }
 
 #ifdef CONFIG_NET_POLL_CONTROLLER
-bool br_devices_support_netpoll(struct net_bridge *br)
+static bool br_devices_support_netpoll(struct net_bridge *br)
 {
 	struct net_bridge_port *p;
 	bool ret = true;
@@ -225,9 +225,9 @@ static void br_poll_controller(struct net_device *br_dev)
 		netpoll_poll_dev(np->real_dev);
 }
 
-void br_netpoll_cleanup(struct net_device *br_dev)
+void br_netpoll_cleanup(struct net_device *dev)
 {
-	struct net_bridge *br = netdev_priv(br_dev);
+	struct net_bridge *br = netdev_priv(dev);
 	struct net_bridge_port *p, *n;
 	const struct net_device_ops *ops;
 
@@ -243,10 +243,30 @@ void br_netpoll_cleanup(struct net_device *br_dev)
 	}
 }
 
-#else
+void br_netpoll_disable(struct net_bridge *br,
+			struct net_device *dev)
+{
+	if (br_devices_support_netpoll(br))
+		br->dev->priv_flags &= ~IFF_DISABLE_NETPOLL;
+	if (dev->netdev_ops->ndo_netpoll_cleanup)
+		dev->netdev_ops->ndo_netpoll_cleanup(dev);
+	else
+		dev->npinfo = NULL;
+}
 
-void br_netpoll_cleanup(struct net_device *br_dev)
+void br_netpoll_enable(struct net_bridge *br,
+		       struct net_device *dev)
 {
+	if (br_devices_support_netpoll(br)) {
+		br->dev->priv_flags &= ~IFF_DISABLE_NETPOLL;
+		if (br->dev->npinfo)
+			dev->npinfo = br->dev->npinfo;
+	} else if (!(br->dev->priv_flags & IFF_DISABLE_NETPOLL)) {
+		br->dev->priv_flags |= IFF_DISABLE_NETPOLL;
+		printk(KERN_INFO "%s:new device %s"
+			" does not support netpoll (disabling)",
+			br->dev->name, dev->name);
+	}
 }
 
 #endif
diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c
index 537bdd60d9b9..45f3f8871149 100644
--- a/net/bridge/br_if.c
+++ b/net/bridge/br_if.c
@@ -154,14 +154,7 @@ static void del_nbp(struct net_bridge_port *p)
 	kobject_uevent(&p->kobj, KOBJ_REMOVE);
 	kobject_del(&p->kobj);
 
-#ifdef CONFIG_NET_POLL_CONTROLLER
-	if (br_devices_support_netpoll(br))
-		br->dev->priv_flags &= ~IFF_DISABLE_NETPOLL;
-	if (dev->netdev_ops->ndo_netpoll_cleanup)
-		dev->netdev_ops->ndo_netpoll_cleanup(dev);
-	else
-		dev->npinfo = NULL;
-#endif
+	br_netpoll_disable(br, dev);
 	call_rcu(&p->rcu, destroy_nbp_rcu);
 }
 
@@ -455,19 +448,7 @@ int br_add_if(struct net_bridge *br, struct net_device *dev)
 
 	kobject_uevent(&p->kobj, KOBJ_ADD);
 
-#ifdef CONFIG_NET_POLL_CONTROLLER
-	if (br_devices_support_netpoll(br)) {
-		br->dev->priv_flags &= ~IFF_DISABLE_NETPOLL;
-		if (br->dev->npinfo)
-			dev->npinfo = br->dev->npinfo;
-	} else if (!(br->dev->priv_flags & IFF_DISABLE_NETPOLL)) {
-		br->dev->priv_flags |= IFF_DISABLE_NETPOLL;
-		printk(KERN_INFO "New device %s does not support netpoll\n",
-			dev->name);
-		printk(KERN_INFO "Disabling netpoll for %s\n",
-			br->dev->name);
-	}
-#endif
+	br_netpoll_enable(br, dev);
 
 	return 0;
 err2:
diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h
index 3d2d3fe0a97e..8a072661a744 100644
--- a/net/bridge/br_private.h
+++ b/net/bridge/br_private.h
@@ -253,8 +253,18 @@ static inline int br_is_root_bridge(const struct net_bridge *br)
 extern void br_dev_setup(struct net_device *dev);
 extern netdev_tx_t br_dev_xmit(struct sk_buff *skb,
 			       struct net_device *dev);
-extern bool br_devices_support_netpoll(struct net_bridge *br);
-extern void br_netpoll_cleanup(struct net_device *br_dev);
+#ifdef CONFIG_NET_POLL_CONTROLLER
+extern void br_netpoll_cleanup(struct net_device *dev);
+extern void br_netpoll_enable(struct net_bridge *br,
+			      struct net_device *dev);
+extern void br_netpoll_disable(struct net_bridge *br,
+			       struct net_device *dev);
+#else
+#define br_netpoll_cleanup(br)
+#define br_netpoll_enable(br, dev)
+#define br_netpoll_disable(br, dev)
+
+#endif
 
 /* br_fdb.c */
 extern int br_fdb_init(void);
-- 
cgit v1.2.2


From 28a16c97963d3bc36a2c192859f6d8025ef2967a Mon Sep 17 00:00:00 2001
From: stephen hemminger <shemminger@vyatta.com>
Date: Mon, 10 May 2010 09:31:09 +0000
Subject: bridge: change console message interface

Use one set of macro's for all bridge messages.

Note: can't use netdev_XXX macro's because bridge is purely
virtual and has no device parent.

Signed-off-by: Stephen Hemminger <shemminger@vyatta.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/bridge/br.c           |  2 +-
 net/bridge/br_device.c    |  5 ++---
 net/bridge/br_fdb.c       |  9 ++++-----
 net/bridge/br_ioctl.c     |  2 +-
 net/bridge/br_multicast.c | 32 +++++++++++++-------------------
 net/bridge/br_netlink.c   |  8 +++++---
 net/bridge/br_private.h   | 15 +++++++++++++++
 net/bridge/br_stp.c       | 11 +++++------
 net/bridge/br_stp_if.c    | 16 ++++++----------
 net/bridge/br_stp_timer.c | 24 ++++++++++--------------
 10 files changed, 62 insertions(+), 62 deletions(-)

(limited to 'net')

diff --git a/net/bridge/br.c b/net/bridge/br.c
index e1241c76239a..76357b547752 100644
--- a/net/bridge/br.c
+++ b/net/bridge/br.c
@@ -38,7 +38,7 @@ static int __init br_init(void)
 
 	err = stp_proto_register(&br_stp_proto);
 	if (err < 0) {
-		printk(KERN_ERR "bridge: can't register sap for STP\n");
+		pr_err("bridge: can't register sap for STP\n");
 		return err;
 	}
 
diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c
index 22b97ba7dc52..eedf2c94820e 100644
--- a/net/bridge/br_device.c
+++ b/net/bridge/br_device.c
@@ -263,9 +263,8 @@ void br_netpoll_enable(struct net_bridge *br,
 			dev->npinfo = br->dev->npinfo;
 	} else if (!(br->dev->priv_flags & IFF_DISABLE_NETPOLL)) {
 		br->dev->priv_flags |= IFF_DISABLE_NETPOLL;
-		printk(KERN_INFO "%s:new device %s"
-			" does not support netpoll (disabling)",
-			br->dev->name, dev->name);
+		br_info(br,"new device %s does not support netpoll (disabling)",
+			dev->name);
 	}
 }
 
diff --git a/net/bridge/br_fdb.c b/net/bridge/br_fdb.c
index 9101a4e56201..26637439965b 100644
--- a/net/bridge/br_fdb.c
+++ b/net/bridge/br_fdb.c
@@ -353,8 +353,7 @@ static int fdb_insert(struct net_bridge *br, struct net_bridge_port *source,
 		 */
 		if (fdb->is_local)
 			return 0;
-
-		printk(KERN_WARNING "%s adding interface with same address "
+		br_warn(br, "adding interface %s with same address "
 		       "as a received packet\n",
 		       source->dev->name);
 		fdb_delete(fdb);
@@ -397,9 +396,9 @@ void br_fdb_update(struct net_bridge *br, struct net_bridge_port *source,
 		/* attempt to update an entry for a local interface */
 		if (unlikely(fdb->is_local)) {
 			if (net_ratelimit())
-				printk(KERN_WARNING "%s: received packet with "
-				       "own address as source address\n",
-				       source->dev->name);
+				br_warn(br, "received packet on %s with "
+					"own address as source address\n",
+					source->dev->name);
 		} else {
 			/* fastpath: update of existing entry */
 			fdb->dst = source;
diff --git a/net/bridge/br_ioctl.c b/net/bridge/br_ioctl.c
index 995afc4b04dc..cb43312b846e 100644
--- a/net/bridge/br_ioctl.c
+++ b/net/bridge/br_ioctl.c
@@ -412,6 +412,6 @@ int br_dev_ioctl(struct net_device *dev, struct ifreq *rq, int cmd)
 
 	}
 
-	pr_debug("Bridge does not support ioctl 0x%x\n", cmd);
+	br_debug(br, "Bridge does not support ioctl 0x%x\n", cmd);
 	return -EOPNOTSUPP;
 }
diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c
index c8419e240316..9d21d98ae5fa 100644
--- a/net/bridge/br_multicast.c
+++ b/net/bridge/br_multicast.c
@@ -585,10 +585,9 @@ static struct net_bridge_mdb_entry *br_multicast_get_group(
 
 	if (unlikely(count > br->hash_elasticity && count)) {
 		if (net_ratelimit())
-			printk(KERN_INFO "%s: Multicast hash table "
-			       "chain limit reached: %s\n",
-			       br->dev->name, port ? port->dev->name :
-						     br->dev->name);
+			br_info(br, "Multicast hash table "
+				"chain limit reached: %s\n",
+				port ? port->dev->name : br->dev->name);
 
 		elasticity = br->hash_elasticity;
 	}
@@ -596,11 +595,9 @@ static struct net_bridge_mdb_entry *br_multicast_get_group(
 	if (mdb->size >= max) {
 		max *= 2;
 		if (unlikely(max >= br->hash_max)) {
-			printk(KERN_WARNING "%s: Multicast hash table maximum "
-			       "reached, disabling snooping: %s, %d\n",
-			       br->dev->name, port ? port->dev->name :
-						     br->dev->name,
-			       max);
+			br_warn(br, "Multicast hash table maximum "
+				"reached, disabling snooping: %s, %d\n",
+				port ? port->dev->name : br->dev->name, max);
 			err = -E2BIG;
 disable:
 			br->multicast_disabled = 1;
@@ -611,22 +608,19 @@ disable:
 	if (max > mdb->max || elasticity) {
 		if (mdb->old) {
 			if (net_ratelimit())
-				printk(KERN_INFO "%s: Multicast hash table "
-				       "on fire: %s\n",
-				       br->dev->name, port ? port->dev->name :
-							     br->dev->name);
+				br_info(br, "Multicast hash table "
+					"on fire: %s\n",
+					port ? port->dev->name : br->dev->name);
 			err = -EEXIST;
 			goto err;
 		}
 
 		err = br_mdb_rehash(&br->mdb, max, elasticity);
 		if (err) {
-			printk(KERN_WARNING "%s: Cannot rehash multicast "
-			       "hash table, disabling snooping: "
-			       "%s, %d, %d\n",
-			       br->dev->name, port ? port->dev->name :
-						     br->dev->name,
-			       mdb->size, err);
+			br_warn(br, "Cannot rehash multicast "
+				"hash table, disabling snooping: %s, %d, %d\n",
+				port ? port->dev->name : br->dev->name,
+				mdb->size, err);
 			goto disable;
 		}
 
diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c
index aa56ac2c8829..fe0a79018ab2 100644
--- a/net/bridge/br_netlink.c
+++ b/net/bridge/br_netlink.c
@@ -42,8 +42,8 @@ static int br_fill_ifinfo(struct sk_buff *skb, const struct net_bridge_port *por
 	struct nlmsghdr *nlh;
 	u8 operstate = netif_running(dev) ? dev->operstate : IF_OPER_DOWN;
 
-	pr_debug("br_fill_info event %d port %s master %s\n",
-		 event, dev->name, br->dev->name);
+	br_debug(br, "br_fill_info event %d port %s master %s\n",
+		     event, dev->name, br->dev->name);
 
 	nlh = nlmsg_put(skb, pid, seq, event, sizeof(*hdr), flags);
 	if (nlh == NULL)
@@ -87,7 +87,9 @@ void br_ifinfo_notify(int event, struct net_bridge_port *port)
 	struct sk_buff *skb;
 	int err = -ENOBUFS;
 
-	pr_debug("bridge notify event=%d\n", event);
+	br_debug(port->br, "port %u(%s) event %d\n",
+		 (unsigned)port->port_no, port->dev->name, event);
+
 	skb = nlmsg_new(br_nlmsg_size(), GFP_ATOMIC);
 	if (skb == NULL)
 		goto errout;
diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h
index 8a072661a744..79a13d1c2573 100644
--- a/net/bridge/br_private.h
+++ b/net/bridge/br_private.h
@@ -240,6 +240,21 @@ struct br_input_skb_cb {
 # define BR_INPUT_SKB_CB_MROUTERS_ONLY(__skb)	(0)
 #endif
 
+#define br_printk(level, br, format, args...)	\
+	printk(level "%s: " format, (br)->dev->name, ##args)
+
+#define br_err(__br, format, args...)			\
+	br_printk(KERN_ERR, __br, format, ##args)
+#define br_warn(__br, format, args...)			\
+	br_printk(KERN_WARNING, __br, format, ##args)
+#define br_notice(__br, format, args...)		\
+	br_printk(KERN_NOTICE, __br, format, ##args)
+#define br_info(__br, format, args...)			\
+	br_printk(KERN_INFO, __br, format, ##args)
+
+#define br_debug(br, format, args...)			\
+	pr_debug("%s: " format,  (br)->dev->name, ##args)
+
 extern struct notifier_block br_device_notifier;
 extern const u8 br_group_address[ETH_ALEN];
 
diff --git a/net/bridge/br_stp.c b/net/bridge/br_stp.c
index edcf14b560f6..57186d84d2bd 100644
--- a/net/bridge/br_stp.c
+++ b/net/bridge/br_stp.c
@@ -31,10 +31,9 @@ static const char *const br_port_state_names[] = {
 
 void br_log_state(const struct net_bridge_port *p)
 {
-	pr_info("%s: port %d(%s) entering %s state\n",
-		p->br->dev->name, p->port_no, p->dev->name,
+	br_info(p->br, "port %u(%s) entering %s state\n",
+		(unsigned) p->port_no, p->dev->name,
 		br_port_state_names[p->state]);
-
 }
 
 /* called under bridge lock */
@@ -300,7 +299,7 @@ void br_topology_change_detection(struct net_bridge *br)
 	if (br->stp_enabled != BR_KERNEL_STP)
 		return;
 
-	pr_info("%s: topology change detected, %s\n", br->dev->name,
+	br_info(br, "topology change detected, %s\n",
 		isroot ? "propagating" : "sending tcn bpdu");
 
 	if (isroot) {
@@ -469,8 +468,8 @@ void br_received_config_bpdu(struct net_bridge_port *p, struct br_config_bpdu *b
 void br_received_tcn_bpdu(struct net_bridge_port *p)
 {
 	if (br_is_designated_port(p)) {
-		pr_info("%s: received tcn bpdu on port %i(%s)\n",
-		       p->br->dev->name, p->port_no, p->dev->name);
+		br_info(p->br, "port %u(%s) received tcn bpdu\n",
+			(unsigned) p->port_no, p->dev->name);
 
 		br_topology_change_detection(p->br);
 		br_topology_change_acknowledge(p);
diff --git a/net/bridge/br_stp_if.c b/net/bridge/br_stp_if.c
index d527119e9f54..1d8826914cbf 100644
--- a/net/bridge/br_stp_if.c
+++ b/net/bridge/br_stp_if.c
@@ -85,17 +85,16 @@ void br_stp_enable_port(struct net_bridge_port *p)
 {
 	br_init_port(p);
 	br_port_state_selection(p->br);
+	br_log_state(p);
 }
 
 /* called under bridge lock */
 void br_stp_disable_port(struct net_bridge_port *p)
 {
-	struct net_bridge *br;
+	struct net_bridge *br = p->br;
 	int wasroot;
 
-	br = p->br;
-	printk(KERN_INFO "%s: port %i(%s) entering %s state\n",
-	       br->dev->name, p->port_no, p->dev->name, "disabled");
+	br_log_state(p);
 
 	wasroot = br_is_root_bridge(br);
 	br_become_designated_port(p);
@@ -127,11 +126,10 @@ static void br_stp_start(struct net_bridge *br)
 	r = call_usermodehelper(BR_STP_PROG, argv, envp, UMH_WAIT_PROC);
 	if (r == 0) {
 		br->stp_enabled = BR_USER_STP;
-		printk(KERN_INFO "%s: userspace STP started\n", br->dev->name);
+		br_debug(br, "userspace STP started\n");
 	} else {
 		br->stp_enabled = BR_KERNEL_STP;
-		printk(KERN_INFO "%s: starting userspace STP failed, "
-				"starting kernel STP\n", br->dev->name);
+		br_debug(br, "using kernel STP\n");
 
 		/* To start timers on any ports left in blocking */
 		spin_lock_bh(&br->lock);
@@ -148,9 +146,7 @@ static void br_stp_stop(struct net_bridge *br)
 
 	if (br->stp_enabled == BR_USER_STP) {
 		r = call_usermodehelper(BR_STP_PROG, argv, envp, 1);
-		printk(KERN_INFO "%s: userspace STP stopped, return code %d\n",
-			br->dev->name, r);
-
+		br_info(br, "userspace STP stopped, return code %d\n", r);
 
 		/* To start timers on any ports left in blocking */
 		spin_lock_bh(&br->lock);
diff --git a/net/bridge/br_stp_timer.c b/net/bridge/br_stp_timer.c
index 772a140bfdf0..7b22456023c5 100644
--- a/net/bridge/br_stp_timer.c
+++ b/net/bridge/br_stp_timer.c
@@ -35,7 +35,7 @@ static void br_hello_timer_expired(unsigned long arg)
 {
 	struct net_bridge *br = (struct net_bridge *)arg;
 
-	pr_debug("%s: hello timer expired\n", br->dev->name);
+	br_debug(br, "hello timer expired\n");
 	spin_lock(&br->lock);
 	if (br->dev->flags & IFF_UP) {
 		br_config_bpdu_generation(br);
@@ -55,13 +55,9 @@ static void br_message_age_timer_expired(unsigned long arg)
 	if (p->state == BR_STATE_DISABLED)
 		return;
 
-
-	pr_info("%s: neighbor %.2x%.2x.%.2x:%.2x:%.2x:%.2x:%.2x:%.2x lost on port %d(%s)\n",
-		br->dev->name,
-		id->prio[0], id->prio[1],
-		id->addr[0], id->addr[1], id->addr[2],
-		id->addr[3], id->addr[4], id->addr[5],
-		p->port_no, p->dev->name);
+	br_info(br, "port %u(%s) neighbor %.2x%.2x.%pM lost\n",
+		(unsigned) p->port_no, p->dev->name,
+		id->prio[0], id->prio[1], &id->addr);
 
 	/*
 	 * According to the spec, the message age timer cannot be
@@ -87,8 +83,8 @@ static void br_forward_delay_timer_expired(unsigned long arg)
 	struct net_bridge_port *p = (struct net_bridge_port *) arg;
 	struct net_bridge *br = p->br;
 
-	pr_debug("%s: %d(%s) forward delay timer\n",
-		 br->dev->name, p->port_no, p->dev->name);
+	br_debug(br, "port %u(%s) forward delay timer\n",
+		 (unsigned) p->port_no, p->dev->name);
 	spin_lock(&br->lock);
 	if (p->state == BR_STATE_LISTENING) {
 		p->state = BR_STATE_LEARNING;
@@ -107,7 +103,7 @@ static void br_tcn_timer_expired(unsigned long arg)
 {
 	struct net_bridge *br = (struct net_bridge *) arg;
 
-	pr_debug("%s: tcn timer expired\n", br->dev->name);
+	br_debug(br, "tcn timer expired\n");
 	spin_lock(&br->lock);
 	if (br->dev->flags & IFF_UP) {
 		br_transmit_tcn(br);
@@ -121,7 +117,7 @@ static void br_topology_change_timer_expired(unsigned long arg)
 {
 	struct net_bridge *br = (struct net_bridge *) arg;
 
-	pr_debug("%s: topo change timer expired\n", br->dev->name);
+	br_debug(br, "topo change timer expired\n");
 	spin_lock(&br->lock);
 	br->topology_change_detected = 0;
 	br->topology_change = 0;
@@ -132,8 +128,8 @@ static void br_hold_timer_expired(unsigned long arg)
 {
 	struct net_bridge_port *p = (struct net_bridge_port *) arg;
 
-	pr_debug("%s: %d(%s) hold timer expired\n",
-		 p->br->dev->name,  p->port_no, p->dev->name);
+	br_debug(p->br, "port %u(%s) hold timer expired\n",
+		 (unsigned) p->port_no, p->dev->name);
 
 	spin_lock(&p->br->lock);
 	if (p->config_pending)
-- 
cgit v1.2.2


From e0f43752a942b7be1bc06b9fd74e20ae337c1cca Mon Sep 17 00:00:00 2001
From: Simon Arlott <simon@fire.lp0.eu>
Date: Mon, 10 May 2010 09:31:11 +0000
Subject: bridge: update sysfs link names if port device names have changed

Links for each port are created in sysfs using the device
name, but this could be changed after being added to the
bridge.

As well as being unable to remove interfaces after this
occurs (because userspace tools don't recognise the new
name, and the kernel won't recognise the old name), adding
another interface with the old name to the bridge will
cause an error trying to create the sysfs link.

This fixes the problem by listening for NETDEV_CHANGENAME
notifications and renaming the link.

https://bugzilla.kernel.org/show_bug.cgi?id=12743

Signed-off-by: Simon Arlott <simon@fire.lp0.eu>
Acked-by: Stephen Hemminger <shemminger@vyatta.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/bridge/br_if.c       |  2 +-
 net/bridge/br_notify.c   |  7 +++++++
 net/bridge/br_private.h  |  6 ++++++
 net/bridge/br_sysfs_if.c | 32 +++++++++++++++++++++++++++-----
 4 files changed, 41 insertions(+), 6 deletions(-)

(limited to 'net')

diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c
index 45f3f8871149..f25e3c92bd72 100644
--- a/net/bridge/br_if.c
+++ b/net/bridge/br_if.c
@@ -133,7 +133,7 @@ static void del_nbp(struct net_bridge_port *p)
 	struct net_bridge *br = p->br;
 	struct net_device *dev = p->dev;
 
-	sysfs_remove_link(br->ifobj, dev->name);
+	sysfs_remove_link(br->ifobj, p->sysfs_name);
 
 	dev_set_promiscuity(dev, -1);
 
diff --git a/net/bridge/br_notify.c b/net/bridge/br_notify.c
index 1413b72acc7f..717e1fd6133c 100644
--- a/net/bridge/br_notify.c
+++ b/net/bridge/br_notify.c
@@ -34,6 +34,7 @@ static int br_device_event(struct notifier_block *unused, unsigned long event, v
 	struct net_device *dev = ptr;
 	struct net_bridge_port *p = dev->br_port;
 	struct net_bridge *br;
+	int err;
 
 	/* not a port of a bridge */
 	if (p == NULL)
@@ -83,6 +84,12 @@ static int br_device_event(struct notifier_block *unused, unsigned long event, v
 		br_del_if(br, dev);
 		break;
 
+	case NETDEV_CHANGENAME:
+		err = br_sysfs_renameif(p);
+		if (err)
+			return notifier_from_errno(err);
+		break;
+
 	case NETDEV_PRE_TYPE_CHANGE:
 		/* Forbid underlaying device to change its type. */
 		return NOTIFY_BAD;
diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h
index 79a13d1c2573..0f4a74bc6a9b 100644
--- a/net/bridge/br_private.h
+++ b/net/bridge/br_private.h
@@ -139,6 +139,10 @@ struct net_bridge_port
 	struct hlist_head		mglist;
 	struct hlist_node		rlist;
 #endif
+
+#ifdef CONFIG_SYSFS
+	char				sysfs_name[IFNAMSIZ];
+#endif
 };
 
 struct br_cpu_netstats {
@@ -480,6 +484,7 @@ extern void br_ifinfo_notify(int event, struct net_bridge_port *port);
 /* br_sysfs_if.c */
 extern const struct sysfs_ops brport_sysfs_ops;
 extern int br_sysfs_addif(struct net_bridge_port *p);
+extern int br_sysfs_renameif(struct net_bridge_port *p);
 
 /* br_sysfs_br.c */
 extern int br_sysfs_addbr(struct net_device *dev);
@@ -488,6 +493,7 @@ extern void br_sysfs_delbr(struct net_device *dev);
 #else
 
 #define br_sysfs_addif(p)	(0)
+#define br_sysfs_renameif(p)	(0)
 #define br_sysfs_addbr(dev)	(0)
 #define br_sysfs_delbr(dev)	do { } while(0)
 #endif /* CONFIG_SYSFS */
diff --git a/net/bridge/br_sysfs_if.c b/net/bridge/br_sysfs_if.c
index 0b9916489d6b..fd5799c9bc8d 100644
--- a/net/bridge/br_sysfs_if.c
+++ b/net/bridge/br_sysfs_if.c
@@ -246,7 +246,7 @@ const struct sysfs_ops brport_sysfs_ops = {
 /*
  * Add sysfs entries to ethernet device added to a bridge.
  * Creates a brport subdirectory with bridge attributes.
- * Puts symlink in bridge's brport subdirectory
+ * Puts symlink in bridge's brif subdirectory
  */
 int br_sysfs_addif(struct net_bridge_port *p)
 {
@@ -257,15 +257,37 @@ int br_sysfs_addif(struct net_bridge_port *p)
 	err = sysfs_create_link(&p->kobj, &br->dev->dev.kobj,
 				SYSFS_BRIDGE_PORT_LINK);
 	if (err)
-		goto out2;
+		return err;
 
 	for (a = brport_attrs; *a; ++a) {
 		err = sysfs_create_file(&p->kobj, &((*a)->attr));
 		if (err)
-			goto out2;
+			return err;
 	}
 
-	err = sysfs_create_link(br->ifobj, &p->kobj, p->dev->name);
-out2:
+	strlcpy(p->sysfs_name, p->dev->name, IFNAMSIZ);
+	return sysfs_create_link(br->ifobj, &p->kobj, p->sysfs_name);
+}
+
+/* Rename bridge's brif symlink */
+int br_sysfs_renameif(struct net_bridge_port *p)
+{
+	struct net_bridge *br = p->br;
+	int err;
+
+	/* If a rename fails, the rollback will cause another
+	 * rename call with the existing name.
+	 */
+	if (!strncmp(p->sysfs_name, p->dev->name, IFNAMSIZ))
+		return 0;
+
+	err = sysfs_rename_link(br->ifobj, &p->kobj,
+				p->sysfs_name, p->dev->name);
+	if (err)
+		netdev_notice(br->dev, "unable to rename link %s to %s",
+			      p->sysfs_name, p->dev->name);
+	else
+		strlcpy(p->sysfs_name, p->dev->name, IFNAMSIZ);
+
 	return err;
 }
-- 
cgit v1.2.2


From e3826f1e946e7d2354943232f1457be1455a29e2 Mon Sep 17 00:00:00 2001
From: Amerigo Wang <amwang@redhat.com>
Date: Wed, 5 May 2010 00:27:06 +0000
Subject: net: reserve ports for applications using fixed port numbers

(Dropped the infiniband part, because Tetsuo modified the related code,
I will send a separate patch for it once this is accepted.)

This patch introduces /proc/sys/net/ipv4/ip_local_reserved_ports which
allows users to reserve ports for third-party applications.

The reserved ports will not be used by automatic port assignments
(e.g. when calling connect() or bind() with port number 0). Explicit
port allocation behavior is unchanged.

Signed-off-by: Octavian Purdila <opurdila@ixiacom.com>
Signed-off-by: WANG Cong <amwang@redhat.com>
Cc: Neil Horman <nhorman@tuxdriver.com>
Cc: Eric Dumazet <eric.dumazet@gmail.com>
Cc: Eric W. Biederman <ebiederm@xmission.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/af_inet.c              |  8 +++++++-
 net/ipv4/inet_connection_sock.c |  6 ++++++
 net/ipv4/inet_hashtables.c      |  2 ++
 net/ipv4/sysctl_net_ipv4.c      | 17 +++++++++++++++++
 net/ipv4/udp.c                  |  3 ++-
 net/sctp/socket.c               |  2 ++
 6 files changed, 36 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index c6c43bcd1c6f..551ce564b035 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -1573,9 +1573,13 @@ static int __init inet_init(void)
 
 	BUILD_BUG_ON(sizeof(struct inet_skb_parm) > sizeof(dummy_skb->cb));
 
+	sysctl_local_reserved_ports = kzalloc(65536 / 8, GFP_KERNEL);
+	if (!sysctl_local_reserved_ports)
+		goto out;
+
 	rc = proto_register(&tcp_prot, 1);
 	if (rc)
-		goto out;
+		goto out_free_reserved_ports;
 
 	rc = proto_register(&udp_prot, 1);
 	if (rc)
@@ -1674,6 +1678,8 @@ out_unregister_udp_proto:
 	proto_unregister(&udp_prot);
 out_unregister_tcp_proto:
 	proto_unregister(&tcp_prot);
+out_free_reserved_ports:
+	kfree(sysctl_local_reserved_ports);
 	goto out;
 }
 
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
index e0a3e3537b14..70eb3507c406 100644
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -37,6 +37,9 @@ struct local_ports sysctl_local_ports __read_mostly = {
 	.range = { 32768, 61000 },
 };
 
+unsigned long *sysctl_local_reserved_ports;
+EXPORT_SYMBOL(sysctl_local_reserved_ports);
+
 void inet_get_local_port_range(int *low, int *high)
 {
 	unsigned seq;
@@ -108,6 +111,8 @@ again:
 
 		smallest_size = -1;
 		do {
+			if (inet_is_reserved_local_port(rover))
+				goto next_nolock;
 			head = &hashinfo->bhash[inet_bhashfn(net, rover,
 					hashinfo->bhash_size)];
 			spin_lock(&head->lock);
@@ -130,6 +135,7 @@ again:
 			break;
 		next:
 			spin_unlock(&head->lock);
+		next_nolock:
 			if (++rover > high)
 				rover = low;
 		} while (--remaining > 0);
diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c
index 2b79377b468d..d3e160a88219 100644
--- a/net/ipv4/inet_hashtables.c
+++ b/net/ipv4/inet_hashtables.c
@@ -456,6 +456,8 @@ int __inet_hash_connect(struct inet_timewait_death_row *death_row,
 		local_bh_disable();
 		for (i = 1; i <= remaining; i++) {
 			port = low + (i + offset) % remaining;
+			if (inet_is_reserved_local_port(port))
+				continue;
 			head = &hinfo->bhash[inet_bhashfn(net, port,
 					hinfo->bhash_size)];
 			spin_lock(&head->lock);
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index 1cd5c15174b8..d96c1da4b17c 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -299,6 +299,13 @@ static struct ctl_table ipv4_table[] = {
 		.mode		= 0644,
 		.proc_handler	= ipv4_local_port_range,
 	},
+	{
+		.procname	= "ip_local_reserved_ports",
+		.data		= NULL, /* initialized in sysctl_ipv4_init */
+		.maxlen		= 65536,
+		.mode		= 0644,
+		.proc_handler	= proc_do_large_bitmap,
+	},
 #ifdef CONFIG_IP_MULTICAST
 	{
 		.procname	= "igmp_max_memberships",
@@ -736,6 +743,16 @@ static __net_initdata struct pernet_operations ipv4_sysctl_ops = {
 static __init int sysctl_ipv4_init(void)
 {
 	struct ctl_table_header *hdr;
+	struct ctl_table *i;
+
+	for (i = ipv4_table; i->procname; i++) {
+		if (strcmp(i->procname, "ip_local_reserved_ports") == 0) {
+			i->data = sysctl_local_reserved_ports;
+			break;
+		}
+	}
+	if (!i->procname)
+		return -EINVAL;
 
 	hdr = register_sysctl_paths(net_ipv4_ctl_path, ipv4_table);
 	if (hdr == NULL)
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index f3e00c5cd1ed..9de6a698f91d 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -233,7 +233,8 @@ int udp_lib_get_port(struct sock *sk, unsigned short snum,
 			 */
 			do {
 				if (low <= snum && snum <= high &&
-				    !test_bit(snum >> udptable->log, bitmap))
+				    !test_bit(snum >> udptable->log, bitmap) &&
+				    !inet_is_reserved_local_port(snum))
 					goto found;
 				snum += rand;
 			} while (snum != first);
diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index ba1add0b13c3..ca44917872d2 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -5433,6 +5433,8 @@ static long sctp_get_port_local(struct sock *sk, union sctp_addr *addr)
 			rover++;
 			if ((rover < low) || (rover > high))
 				rover = low;
+			if (inet_is_reserved_local_port(rover))
+				continue;
 			index = sctp_phashfn(rover);
 			head = &sctp_port_hashtable[index];
 			sctp_spin_lock(&head->lock);
-- 
cgit v1.2.2


From a14462f1bd4d3962994f518459102000438665aa Mon Sep 17 00:00:00 2001
From: Jiri Pirko <jpirko@redhat.com>
Date: Thu, 6 May 2010 01:33:53 +0000
Subject: net: adjust handle_macvlan to pass port struct to hook

Now there's null check here and also again in the hook. Looking at bridge bits
which are simmilar, port structure is rcu_dereferenced right away in
handle_bridge and passed to hook. Looks nicer.

Signed-off-by: Jiri Pirko <jpirko@redhat.com>
Acked-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/dev.c | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

(limited to 'net')

diff --git a/net/core/dev.c b/net/core/dev.c
index 3daee30a7c82..5cbba0927a8e 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -2612,7 +2612,8 @@ static inline struct sk_buff *handle_bridge(struct sk_buff *skb,
 #endif
 
 #if defined(CONFIG_MACVLAN) || defined(CONFIG_MACVLAN_MODULE)
-struct sk_buff *(*macvlan_handle_frame_hook)(struct sk_buff *skb) __read_mostly;
+struct sk_buff *(*macvlan_handle_frame_hook)(struct macvlan_port *p,
+					     struct sk_buff *skb) __read_mostly;
 EXPORT_SYMBOL_GPL(macvlan_handle_frame_hook);
 
 static inline struct sk_buff *handle_macvlan(struct sk_buff *skb,
@@ -2620,14 +2621,17 @@ static inline struct sk_buff *handle_macvlan(struct sk_buff *skb,
 					     int *ret,
 					     struct net_device *orig_dev)
 {
-	if (skb->dev->macvlan_port == NULL)
+	struct macvlan_port *port;
+
+	port = rcu_dereference(skb->dev->macvlan_port);
+	if (!port)
 		return skb;
 
 	if (*pt_prev) {
 		*ret = deliver_skb(skb, *pt_prev, orig_dev);
 		*pt_prev = NULL;
 	}
-	return macvlan_handle_frame_hook(skb);
+	return macvlan_handle_frame_hook(port, skb);
 }
 #else
 #define handle_macvlan(skb, pt_prev, ret, orig_dev)	(skb)
-- 
cgit v1.2.2


From a1aa3483041bd3691c7f029272ccef4ce70bd957 Mon Sep 17 00:00:00 2001
From: Timo Teras <timo.teras@iki.fi>
Date: Sat, 15 May 2010 23:49:26 -0700
Subject: xfrm: fix policy unreferencing on larval drop

I mistakenly had the error path to use num_pols to decide how
many policies we need to drop (cruft from earlier patch set
version which did not handle socket policies right).

This is wrong since normally we do not keep explicit references
(instead we hold reference to the cache entry which holds references
to policies). drop_pols is set to num_pols if we are holding the
references, so use that. Otherwise we eventually BUG_ON inside
xfrm_policy_destroy due to premature policy deletion.

Signed-off-by: Timo Teras <timo.teras@iki.fi>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/xfrm/xfrm_policy.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index 31f4ba43b48f..f4ea3a08e5a1 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -1805,7 +1805,7 @@ restart:
 			/* EREMOTE tells the caller to generate
 			 * a one-shot blackhole route. */
 			dst_release(dst);
-			xfrm_pols_put(pols, num_pols);
+			xfrm_pols_put(pols, drop_pols);
 			XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTNOSTATES);
 			return -EREMOTE;
 		}
-- 
cgit v1.2.2


From 3b098e2d7c693796cc4dffb07caa249fc0f70771 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Sat, 15 May 2010 23:57:10 -0700
Subject: net: Consistent skb timestamping

With RPS inclusion, skb timestamping is not consistent in RX path.

If netif_receive_skb() is used, its deferred after RPS dispatch.

If netif_rx() is used, its done before RPS dispatch.

This can give strange tcpdump timestamps results.

I think timestamping should be done as soon as possible in the receive
path, to get meaningful values (ie timestamps taken at the time packet
was delivered by NIC driver to our stack), even if NAPI already can
defer timestamping a bit (RPS can help to reduce the gap)

Tom Herbert prefer to sample timestamps after RPS dispatch. In case
sampling is expensive (HPET/acpi_pm on x86), this makes sense.

Let admins switch from one mode to another, using a new
sysctl, /proc/sys/net/core/netdev_tstamp_prequeue

Its default value (1), means timestamps are taken as soon as possible,
before backlog queueing, giving accurate timestamps.

Setting a 0 value permits to sample timestamps when processing backlog,
after RPS dispatch, to lower the load of the pre-RPS cpu.

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/dev.c             | 50 ++++++++++++++++++++++++++++------------------
 net/core/sysctl_net_core.c |  7 +++++++
 2 files changed, 38 insertions(+), 19 deletions(-)

(limited to 'net')

diff --git a/net/core/dev.c b/net/core/dev.c
index 5cbba0927a8e..988e42912e72 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -1454,7 +1454,7 @@ void net_disable_timestamp(void)
 }
 EXPORT_SYMBOL(net_disable_timestamp);
 
-static inline void net_timestamp(struct sk_buff *skb)
+static inline void net_timestamp_set(struct sk_buff *skb)
 {
 	if (atomic_read(&netstamp_needed))
 		__net_timestamp(skb);
@@ -1462,6 +1462,12 @@ static inline void net_timestamp(struct sk_buff *skb)
 		skb->tstamp.tv64 = 0;
 }
 
+static inline void net_timestamp_check(struct sk_buff *skb)
+{
+	if (!skb->tstamp.tv64 && atomic_read(&netstamp_needed))
+		__net_timestamp(skb);
+}
+
 /**
  * dev_forward_skb - loopback an skb to another netif
  *
@@ -1508,9 +1514,9 @@ static void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev)
 
 #ifdef CONFIG_NET_CLS_ACT
 	if (!(skb->tstamp.tv64 && (G_TC_FROM(skb->tc_verd) & AT_INGRESS)))
-		net_timestamp(skb);
+		net_timestamp_set(skb);
 #else
-	net_timestamp(skb);
+	net_timestamp_set(skb);
 #endif
 
 	rcu_read_lock();
@@ -2201,6 +2207,7 @@ EXPORT_SYMBOL(dev_queue_xmit);
   =======================================================================*/
 
 int netdev_max_backlog __read_mostly = 1000;
+int netdev_tstamp_prequeue __read_mostly = 1;
 int netdev_budget __read_mostly = 300;
 int weight_p __read_mostly = 64;            /* old backlog weight */
 
@@ -2465,8 +2472,8 @@ int netif_rx(struct sk_buff *skb)
 	if (netpoll_rx(skb))
 		return NET_RX_DROP;
 
-	if (!skb->tstamp.tv64)
-		net_timestamp(skb);
+	if (netdev_tstamp_prequeue)
+		net_timestamp_check(skb);
 
 #ifdef CONFIG_RPS
 	{
@@ -2791,8 +2798,8 @@ static int __netif_receive_skb(struct sk_buff *skb)
 	int ret = NET_RX_DROP;
 	__be16 type;
 
-	if (!skb->tstamp.tv64)
-		net_timestamp(skb);
+	if (!netdev_tstamp_prequeue)
+		net_timestamp_check(skb);
 
 	if (vlan_tx_tag_present(skb) && vlan_hwaccel_do_receive(skb))
 		return NET_RX_SUCCESS;
@@ -2910,23 +2917,28 @@ out:
  */
 int netif_receive_skb(struct sk_buff *skb)
 {
+	if (netdev_tstamp_prequeue)
+		net_timestamp_check(skb);
+
 #ifdef CONFIG_RPS
-	struct rps_dev_flow voidflow, *rflow = &voidflow;
-	int cpu, ret;
+	{
+		struct rps_dev_flow voidflow, *rflow = &voidflow;
+		int cpu, ret;
 
-	rcu_read_lock();
+		rcu_read_lock();
+
+		cpu = get_rps_cpu(skb->dev, skb, &rflow);
 
-	cpu = get_rps_cpu(skb->dev, skb, &rflow);
+		if (cpu >= 0) {
+			ret = enqueue_to_backlog(skb, cpu, &rflow->last_qtail);
+			rcu_read_unlock();
+		} else {
+			rcu_read_unlock();
+			ret = __netif_receive_skb(skb);
+		}
 
-	if (cpu >= 0) {
-		ret = enqueue_to_backlog(skb, cpu, &rflow->last_qtail);
-		rcu_read_unlock();
-	} else {
-		rcu_read_unlock();
-		ret = __netif_receive_skb(skb);
+		return ret;
 	}
-
-	return ret;
 #else
 	return __netif_receive_skb(skb);
 #endif
diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c
index dcc7d25996ab..01eee5d984be 100644
--- a/net/core/sysctl_net_core.c
+++ b/net/core/sysctl_net_core.c
@@ -121,6 +121,13 @@ static struct ctl_table net_core_table[] = {
 		.mode		= 0644,
 		.proc_handler	= proc_dointvec
 	},
+	{
+		.procname	= "netdev_tstamp_prequeue",
+		.data		= &netdev_tstamp_prequeue,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec
+	},
 	{
 		.procname	= "message_cost",
 		.data		= &net_ratelimit_state.interval,
-- 
cgit v1.2.2


From 35790c0421121364883a167bab8a2e37e1f67f78 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Sun, 16 May 2010 00:34:04 -0700
Subject: tcp: fix MD5 (RFC2385) support

TCP MD5 support uses percpu data for temporary storage. It currently
disables preemption so that same storage cannot be reclaimed by another
thread on same cpu.

We also have to make sure a softirq handler wont try to use also same
context. Various bug reports demonstrated corruptions.

Fix is to disable preemption and BH.

Reported-by: Bhaskar Dutta <bhaskie@gmail.com>
Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp.c | 34 ++++++++++++++++++++++++----------
 1 file changed, 24 insertions(+), 10 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 0f8caf64caa3..296150b2a62f 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -2839,7 +2839,6 @@ static void __tcp_free_md5sig_pool(struct tcp_md5sig_pool * __percpu *pool)
 			if (p->md5_desc.tfm)
 				crypto_free_hash(p->md5_desc.tfm);
 			kfree(p);
-			p = NULL;
 		}
 	}
 	free_percpu(pool);
@@ -2937,25 +2936,40 @@ retry:
 
 EXPORT_SYMBOL(tcp_alloc_md5sig_pool);
 
-struct tcp_md5sig_pool *__tcp_get_md5sig_pool(int cpu)
+
+/**
+ *	tcp_get_md5sig_pool - get md5sig_pool for this user
+ *
+ *	We use percpu structure, so if we succeed, we exit with preemption
+ *	and BH disabled, to make sure another thread or softirq handling
+ *	wont try to get same context.
+ */
+struct tcp_md5sig_pool *tcp_get_md5sig_pool(void)
 {
 	struct tcp_md5sig_pool * __percpu *p;
-	spin_lock_bh(&tcp_md5sig_pool_lock);
+
+	local_bh_disable();
+
+	spin_lock(&tcp_md5sig_pool_lock);
 	p = tcp_md5sig_pool;
 	if (p)
 		tcp_md5sig_users++;
-	spin_unlock_bh(&tcp_md5sig_pool_lock);
-	return (p ? *per_cpu_ptr(p, cpu) : NULL);
-}
+	spin_unlock(&tcp_md5sig_pool_lock);
+
+	if (p)
+		return *per_cpu_ptr(p, smp_processor_id());
 
-EXPORT_SYMBOL(__tcp_get_md5sig_pool);
+	local_bh_enable();
+	return NULL;
+}
+EXPORT_SYMBOL(tcp_get_md5sig_pool);
 
-void __tcp_put_md5sig_pool(void)
+void tcp_put_md5sig_pool(void)
 {
+	local_bh_enable();
 	tcp_free_md5sig_pool();
 }
-
-EXPORT_SYMBOL(__tcp_put_md5sig_pool);
+EXPORT_SYMBOL(tcp_put_md5sig_pool);
 
 int tcp_md5_hash_header(struct tcp_md5sig_pool *hp,
 			struct tcphdr *th)
-- 
cgit v1.2.2


From a465419b1febb603821f924805529cff89cafeed Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Sun, 16 May 2010 00:36:33 -0700
Subject: net: Introduce sk_route_nocaps

TCP-MD5 sessions have intermittent failures, when route cache is
invalidated. ip_queue_xmit() has to find a new route, calls
sk_setup_caps(sk, &rt->u.dst), destroying the

sk->sk_route_caps &= ~NETIF_F_GSO_MASK

that MD5 desperately try to make all over its way (from
tcp_transmit_skb() for example)

So we send few bad packets, and everything is fine when
tcp_transmit_skb() is called again for this socket.

Since ip_queue_xmit() is at a lower level than TCP-MD5, I chose to use a
socket field, sk_route_nocaps, containing bits to mask on sk_route_caps.

Reported-by: Bhaskar Dutta <bhaskie@gmail.com>
Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/sock.c       | 1 +
 net/ipv4/tcp_ipv4.c   | 6 +++---
 net/ipv4/tcp_output.c | 2 +-
 net/ipv6/tcp_ipv6.c   | 4 ++--
 4 files changed, 7 insertions(+), 6 deletions(-)

(limited to 'net')

diff --git a/net/core/sock.c b/net/core/sock.c
index 94c4affdda9b..63530a03b8c2 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -1231,6 +1231,7 @@ void sk_setup_caps(struct sock *sk, struct dst_entry *dst)
 	sk->sk_route_caps = dst->dev->features;
 	if (sk->sk_route_caps & NETIF_F_GSO)
 		sk->sk_route_caps |= NETIF_F_GSO_SOFTWARE;
+	sk->sk_route_caps &= ~sk->sk_route_nocaps;
 	if (sk_can_gso(sk)) {
 		if (dst->header_len) {
 			sk->sk_route_caps &= ~NETIF_F_GSO_MASK;
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 771f8146a2e5..202cf09c4cd4 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -891,7 +891,7 @@ int tcp_v4_md5_do_add(struct sock *sk, __be32 addr,
 				kfree(newkey);
 				return -ENOMEM;
 			}
-			sk->sk_route_caps &= ~NETIF_F_GSO_MASK;
+			sk_nocaps_add(sk, NETIF_F_GSO_MASK);
 		}
 		if (tcp_alloc_md5sig_pool(sk) == NULL) {
 			kfree(newkey);
@@ -1021,7 +1021,7 @@ static int tcp_v4_parse_md5_keys(struct sock *sk, char __user *optval,
 			return -EINVAL;
 
 		tp->md5sig_info = p;
-		sk->sk_route_caps &= ~NETIF_F_GSO_MASK;
+		sk_nocaps_add(sk, NETIF_F_GSO_MASK);
 	}
 
 	newkey = kmemdup(cmd.tcpm_key, cmd.tcpm_keylen, sk->sk_allocation);
@@ -1462,7 +1462,7 @@ struct sock *tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
 		if (newkey != NULL)
 			tcp_v4_md5_do_add(newsk, newinet->inet_daddr,
 					  newkey, key->keylen);
-		newsk->sk_route_caps &= ~NETIF_F_GSO_MASK;
+		sk_nocaps_add(newsk, NETIF_F_GSO_MASK);
 	}
 #endif
 
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 5db3a2c6cb33..18a3302480cb 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -873,7 +873,7 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,
 #ifdef CONFIG_TCP_MD5SIG
 	/* Calculate the MD5 hash, as we have all we need now */
 	if (md5) {
-		sk->sk_route_caps &= ~NETIF_F_GSO_MASK;
+		sk_nocaps_add(sk, NETIF_F_GSO_MASK);
 		tp->af_specific->calc_md5_hash(opts.hash_location,
 					       md5, sk, NULL, skb);
 	}
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 6603511e3673..2b7c3a100e2c 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -604,7 +604,7 @@ static int tcp_v6_md5_do_add(struct sock *sk, struct in6_addr *peer,
 				kfree(newkey);
 				return -ENOMEM;
 			}
-			sk->sk_route_caps &= ~NETIF_F_GSO_MASK;
+			sk_nocaps_add(sk, NETIF_F_GSO_MASK);
 		}
 		if (tcp_alloc_md5sig_pool(sk) == NULL) {
 			kfree(newkey);
@@ -741,7 +741,7 @@ static int tcp_v6_parse_md5_keys (struct sock *sk, char __user *optval,
 			return -ENOMEM;
 
 		tp->md5sig_info = p;
-		sk->sk_route_caps &= ~NETIF_F_GSO_MASK;
+		sk_nocaps_add(sk, NETIF_F_GSO_MASK);
 	}
 
 	newkey = kmemdup(cmd.tcpm_key, cmd.tcpm_keylen, GFP_KERNEL);
-- 
cgit v1.2.2


From 2d6c9ffcca7808f42ba6b953da0ba60e19a9cbbd Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Mon, 10 May 2010 04:51:02 +0000
Subject: net: congestion notifications are not dropped packets

vlan/macvlan start_xmit() can inform caller of congestion with
NET_XMIT_CN return value. This doesnt mean packet was dropped.
Increment normal stat counters instead of tx_dropped.

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Acked-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/8021q/vlan_dev.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c
index b5249c5fd4d3..55be90826f5f 100644
--- a/net/8021q/vlan_dev.c
+++ b/net/8021q/vlan_dev.c
@@ -327,7 +327,7 @@ static netdev_tx_t vlan_dev_hard_start_xmit(struct sk_buff *skb,
 	len = skb->len;
 	ret = dev_queue_xmit(skb);
 
-	if (likely(ret == NET_XMIT_SUCCESS)) {
+	if (likely(ret == NET_XMIT_SUCCESS || ret == NET_XMIT_CN)) {
 		txq->tx_packets++;
 		txq->tx_bytes += len;
 	} else
@@ -353,7 +353,7 @@ static netdev_tx_t vlan_dev_hwaccel_hard_start_xmit(struct sk_buff *skb,
 	len = skb->len;
 	ret = dev_queue_xmit(skb);
 
-	if (likely(ret == NET_XMIT_SUCCESS)) {
+	if (likely(ret == NET_XMIT_SUCCESS || ret == NET_XMIT_CN)) {
 		txq->tx_packets++;
 		txq->tx_bytes += len;
 	} else
-- 
cgit v1.2.2


From 55fa0cfd7c3ac2ae34cac7dca2e3fbcfe661e6c3 Mon Sep 17 00:00:00 2001
From: Wei Yongjun <yjwei@cn.fujitsu.com>
Date: Sun, 9 May 2010 16:56:07 +0000
Subject: sctp: delete active ICMP proto unreachable timer when free transport

transport may be free before ICMP proto unreachable timer expire, so
we should delete active ICMP proto unreachable timer when transport
is going away.

Signed-off-by: Wei Yongjun <yjwei@cn.fujitsu.com>
Acked-by: Vlad Yasevich <vladislav.yasevich@hp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sctp/transport.c | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'net')

diff --git a/net/sctp/transport.c b/net/sctp/transport.c
index 4a368038d46f..165d54e07fcd 100644
--- a/net/sctp/transport.c
+++ b/net/sctp/transport.c
@@ -173,6 +173,10 @@ void sctp_transport_free(struct sctp_transport *transport)
 	    del_timer(&transport->T3_rtx_timer))
 		sctp_transport_put(transport);
 
+	/* Delete the ICMP proto unreachable timer if it's active. */
+	if (timer_pending(&transport->proto_unreach_timer) &&
+	    del_timer(&transport->proto_unreach_timer))
+		sctp_association_put(transport->asoc);
 
 	sctp_transport_put(transport);
 }
-- 
cgit v1.2.2


From c02db8c6290bb992442fec1407643c94cc414375 Mon Sep 17 00:00:00 2001
From: Chris Wright <chrisw@sous-sol.org>
Date: Sun, 16 May 2010 01:05:45 -0700
Subject: rtnetlink: make SR-IOV VF interface symmetric

Now we have a set of nested attributes:

  IFLA_VFINFO_LIST (NESTED)
    IFLA_VF_INFO (NESTED)
      IFLA_VF_MAC
      IFLA_VF_VLAN
      IFLA_VF_TX_RATE

This allows a single set to operate on multiple attributes if desired.
Among other things, it means a dump can be replayed to set state.

The current interface has yet to be released, so this seems like
something to consider for 2.6.34.

Signed-off-by: Chris Wright <chrisw@sous-sol.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/rtnetlink.c | 159 +++++++++++++++++++++++++++++++++++----------------
 1 file changed, 110 insertions(+), 49 deletions(-)

(limited to 'net')

diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index fe776c9ddeca..31e85d327aa2 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -602,12 +602,19 @@ static void copy_rtnl_link_stats(struct rtnl_link_stats *a,
 	a->tx_compressed = b->tx_compressed;
 };
 
+/* All VF info */
 static inline int rtnl_vfinfo_size(const struct net_device *dev)
 {
-	if (dev->dev.parent && dev_is_pci(dev->dev.parent))
-		return dev_num_vf(dev->dev.parent) *
-			sizeof(struct ifla_vf_info);
-	else
+	if (dev->dev.parent && dev_is_pci(dev->dev.parent)) {
+
+		int num_vfs = dev_num_vf(dev->dev.parent);
+		size_t size = nlmsg_total_size(sizeof(struct nlattr));
+		size += nlmsg_total_size(num_vfs * sizeof(struct nlattr));
+		size += num_vfs * (sizeof(struct ifla_vf_mac) +
+				  sizeof(struct ifla_vf_vlan) +
+				  sizeof(struct ifla_vf_tx_rate));
+		return size;
+	} else
 		return 0;
 }
 
@@ -629,7 +636,7 @@ static inline size_t if_nlmsg_size(const struct net_device *dev)
 	       + nla_total_size(1) /* IFLA_OPERSTATE */
 	       + nla_total_size(1) /* IFLA_LINKMODE */
 	       + nla_total_size(4) /* IFLA_NUM_VF */
-	       + nla_total_size(rtnl_vfinfo_size(dev)) /* IFLA_VFINFO */
+	       + rtnl_vfinfo_size(dev) /* IFLA_VFINFO_LIST */
 	       + rtnl_link_get_size(dev); /* IFLA_LINKINFO */
 }
 
@@ -700,14 +707,37 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev,
 
 	if (dev->netdev_ops->ndo_get_vf_config && dev->dev.parent) {
 		int i;
-		struct ifla_vf_info ivi;
 
-		NLA_PUT_U32(skb, IFLA_NUM_VF, dev_num_vf(dev->dev.parent));
-		for (i = 0; i < dev_num_vf(dev->dev.parent); i++) {
+		struct nlattr *vfinfo, *vf;
+		int num_vfs = dev_num_vf(dev->dev.parent);
+
+		NLA_PUT_U32(skb, IFLA_NUM_VF, num_vfs);
+		vfinfo = nla_nest_start(skb, IFLA_VFINFO_LIST);
+		if (!vfinfo)
+			goto nla_put_failure;
+		for (i = 0; i < num_vfs; i++) {
+			struct ifla_vf_info ivi;
+			struct ifla_vf_mac vf_mac;
+			struct ifla_vf_vlan vf_vlan;
+			struct ifla_vf_tx_rate vf_tx_rate;
 			if (dev->netdev_ops->ndo_get_vf_config(dev, i, &ivi))
 				break;
-			NLA_PUT(skb, IFLA_VFINFO, sizeof(ivi), &ivi);
+			vf_mac.vf = vf_vlan.vf = vf_tx_rate.vf = ivi.vf;
+			memcpy(vf_mac.mac, ivi.mac, sizeof(ivi.mac));
+			vf_vlan.vlan = ivi.vlan;
+			vf_vlan.qos = ivi.qos;
+			vf_tx_rate.rate = ivi.tx_rate;
+			vf = nla_nest_start(skb, IFLA_VF_INFO);
+			if (!vf) {
+				nla_nest_cancel(skb, vfinfo);
+				goto nla_put_failure;
+			}
+			NLA_PUT(skb, IFLA_VF_MAC, sizeof(vf_mac), &vf_mac);
+			NLA_PUT(skb, IFLA_VF_VLAN, sizeof(vf_vlan), &vf_vlan);
+			NLA_PUT(skb, IFLA_VF_TX_RATE, sizeof(vf_tx_rate), &vf_tx_rate);
+			nla_nest_end(skb, vf);
 		}
+		nla_nest_end(skb, vfinfo);
 	}
 	if (dev->rtnl_link_ops) {
 		if (rtnl_link_fill(skb, dev) < 0)
@@ -769,12 +799,7 @@ const struct nla_policy ifla_policy[IFLA_MAX+1] = {
 	[IFLA_LINKINFO]		= { .type = NLA_NESTED },
 	[IFLA_NET_NS_PID]	= { .type = NLA_U32 },
 	[IFLA_IFALIAS]	        = { .type = NLA_STRING, .len = IFALIASZ-1 },
-	[IFLA_VF_MAC]		= { .type = NLA_BINARY,
-				    .len = sizeof(struct ifla_vf_mac) },
-	[IFLA_VF_VLAN]		= { .type = NLA_BINARY,
-				    .len = sizeof(struct ifla_vf_vlan) },
-	[IFLA_VF_TX_RATE]	= { .type = NLA_BINARY,
-				    .len = sizeof(struct ifla_vf_tx_rate) },
+	[IFLA_VFINFO_LIST]	= {. type = NLA_NESTED },
 };
 EXPORT_SYMBOL(ifla_policy);
 
@@ -783,6 +808,19 @@ static const struct nla_policy ifla_info_policy[IFLA_INFO_MAX+1] = {
 	[IFLA_INFO_DATA]	= { .type = NLA_NESTED },
 };
 
+static const struct nla_policy ifla_vfinfo_policy[IFLA_VF_INFO_MAX+1] = {
+	[IFLA_VF_INFO]		= { .type = NLA_NESTED },
+};
+
+static const struct nla_policy ifla_vf_policy[IFLA_VF_MAX+1] = {
+	[IFLA_VF_MAC]		= { .type = NLA_BINARY,
+				    .len = sizeof(struct ifla_vf_mac) },
+	[IFLA_VF_VLAN]		= { .type = NLA_BINARY,
+				    .len = sizeof(struct ifla_vf_vlan) },
+	[IFLA_VF_TX_RATE]	= { .type = NLA_BINARY,
+				    .len = sizeof(struct ifla_vf_tx_rate) },
+};
+
 struct net *rtnl_link_get_net(struct net *src_net, struct nlattr *tb[])
 {
 	struct net *net;
@@ -812,6 +850,52 @@ static int validate_linkmsg(struct net_device *dev, struct nlattr *tb[])
 	return 0;
 }
 
+static int do_setvfinfo(struct net_device *dev, struct nlattr *attr)
+{
+	int rem, err = -EINVAL;
+	struct nlattr *vf;
+	const struct net_device_ops *ops = dev->netdev_ops;
+
+	nla_for_each_nested(vf, attr, rem) {
+		switch (nla_type(vf)) {
+		case IFLA_VF_MAC: {
+			struct ifla_vf_mac *ivm;
+			ivm = nla_data(vf);
+			err = -EOPNOTSUPP;
+			if (ops->ndo_set_vf_mac)
+				err = ops->ndo_set_vf_mac(dev, ivm->vf,
+							  ivm->mac);
+			break;
+		}
+		case IFLA_VF_VLAN: {
+			struct ifla_vf_vlan *ivv;
+			ivv = nla_data(vf);
+			err = -EOPNOTSUPP;
+			if (ops->ndo_set_vf_vlan)
+				err = ops->ndo_set_vf_vlan(dev, ivv->vf,
+							   ivv->vlan,
+							   ivv->qos);
+			break;
+		}
+		case IFLA_VF_TX_RATE: {
+			struct ifla_vf_tx_rate *ivt;
+			ivt = nla_data(vf);
+			err = -EOPNOTSUPP;
+			if (ops->ndo_set_vf_tx_rate)
+				err = ops->ndo_set_vf_tx_rate(dev, ivt->vf,
+							      ivt->rate);
+			break;
+		}
+		default:
+			err = -EINVAL;
+			break;
+		}
+		if (err)
+			break;
+	}
+	return err;
+}
+
 static int do_setlink(struct net_device *dev, struct ifinfomsg *ifm,
 		      struct nlattr **tb, char *ifname, int modified)
 {
@@ -942,40 +1026,17 @@ static int do_setlink(struct net_device *dev, struct ifinfomsg *ifm,
 		write_unlock_bh(&dev_base_lock);
 	}
 
-	if (tb[IFLA_VF_MAC]) {
-		struct ifla_vf_mac *ivm;
-		ivm = nla_data(tb[IFLA_VF_MAC]);
-		err = -EOPNOTSUPP;
-		if (ops->ndo_set_vf_mac)
-			err = ops->ndo_set_vf_mac(dev, ivm->vf, ivm->mac);
-		if (err < 0)
-			goto errout;
-		modified = 1;
-	}
-
-	if (tb[IFLA_VF_VLAN]) {
-		struct ifla_vf_vlan *ivv;
-		ivv = nla_data(tb[IFLA_VF_VLAN]);
-		err = -EOPNOTSUPP;
-		if (ops->ndo_set_vf_vlan)
-			err = ops->ndo_set_vf_vlan(dev, ivv->vf,
-						   ivv->vlan,
-						   ivv->qos);
-		if (err < 0)
-			goto errout;
-		modified = 1;
-	}
-	err = 0;
-
-	if (tb[IFLA_VF_TX_RATE]) {
-		struct ifla_vf_tx_rate *ivt;
-		ivt = nla_data(tb[IFLA_VF_TX_RATE]);
-		err = -EOPNOTSUPP;
-		if (ops->ndo_set_vf_tx_rate)
-			err = ops->ndo_set_vf_tx_rate(dev, ivt->vf, ivt->rate);
-		if (err < 0)
-			goto errout;
-		modified = 1;
+	if (tb[IFLA_VFINFO_LIST]) {
+		struct nlattr *attr;
+		int rem;
+		nla_for_each_nested(attr, tb[IFLA_VFINFO_LIST], rem) {
+			if (nla_type(attr) != IFLA_VF_INFO)
+				goto errout;
+			err = do_setvfinfo(dev, attr);
+			if (err < 0)
+				goto errout;
+			modified = 1;
+		}
 	}
 	err = 0;
 
-- 
cgit v1.2.2


From d79b6f4de5db0103ceb4734e42ad101d836d61d9 Mon Sep 17 00:00:00 2001
From: Frederic Weisbecker <fweisbec@gmail.com>
Date: Tue, 30 Mar 2010 07:27:50 +0200
Subject: procfs: Push down the bkl from ioctl

Push down the bkl from procfs's ioctl main handler to its users.
Only three procfs users implement an ioctl (non unlocked) handler.
Turn them into unlocked_ioctl and push down the Devil inside.

v2: PDE(inode)->data doesn't need to be under bkl
v3: And don't forget to git-add the result
v4: Use wrappers to pushdown instead of an invasive and error prone
    handlers surgery.

Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Acked-by: Arnd Bergmann <arnd@arndb.de>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: John Kacur <jkacur@redhat.com>
Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Al Viro <viro@ZenIV.linux.org.uk>
Cc: Alexey Dobriyan <adobriyan@gmail.com>
---
 net/sunrpc/cache.c | 14 ++++++++++----
 1 file changed, 10 insertions(+), 4 deletions(-)

(limited to 'net')

diff --git a/net/sunrpc/cache.c b/net/sunrpc/cache.c
index 39bddba53ba1..95690a8a4d80 100644
--- a/net/sunrpc/cache.c
+++ b/net/sunrpc/cache.c
@@ -1331,12 +1331,18 @@ static unsigned int cache_poll_procfs(struct file *filp, poll_table *wait)
 	return cache_poll(filp, wait, cd);
 }
 
-static int cache_ioctl_procfs(struct inode *inode, struct file *filp,
-			      unsigned int cmd, unsigned long arg)
+static long cache_ioctl_procfs(struct file *filp,
+			       unsigned int cmd, unsigned long arg)
 {
+	long ret;
+	struct inode *inode = filp->f_path.dentry->d_inode;
 	struct cache_detail *cd = PDE(inode)->data;
 
-	return cache_ioctl(inode, filp, cmd, arg, cd);
+	lock_kernel();
+	ret = cache_ioctl(inode, filp, cmd, arg, cd);
+	unlock_kernel();
+
+	return ret;
 }
 
 static int cache_open_procfs(struct inode *inode, struct file *filp)
@@ -1359,7 +1365,7 @@ static const struct file_operations cache_file_operations_procfs = {
 	.read		= cache_read_procfs,
 	.write		= cache_write_procfs,
 	.poll		= cache_poll_procfs,
-	.ioctl		= cache_ioctl_procfs, /* for FIONREAD */
+	.unlocked_ioctl	= cache_ioctl_procfs, /* for FIONREAD */
 	.open		= cache_open_procfs,
 	.release	= cache_release_procfs,
 };
-- 
cgit v1.2.2


From 99df95a22f7cfcf85405d4edc07c2d953542f0dd Mon Sep 17 00:00:00 2001
From: Frederic Weisbecker <fweisbec@gmail.com>
Date: Tue, 13 Apr 2010 22:46:36 +0200
Subject: sunrpc: Include missing smp_lock.h

Now that cache_ioctl_procfs() calls the bkl explicitly, we need to
include the relevant header as well.

This fixes the following build error:

	net/sunrpc/cache.c: In function 'cache_ioctl_procfs':
	net/sunrpc/cache.c:1355: error: implicit declaration of function 'lock_kernel'
	net/sunrpc/cache.c:1359: error: implicit declaration of function 'unlock_kernel'

Reported-by: Stephen Rothwell <sfr@canb.auug.org.au>
Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
---
 net/sunrpc/cache.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'net')

diff --git a/net/sunrpc/cache.c b/net/sunrpc/cache.c
index 95690a8a4d80..77970fe8bff2 100644
--- a/net/sunrpc/cache.c
+++ b/net/sunrpc/cache.c
@@ -28,6 +28,7 @@
 #include <linux/workqueue.h>
 #include <linux/mutex.h>
 #include <linux/pagemap.h>
+#include <linux/smp_lock.h>
 #include <asm/ioctls.h>
 #include <linux/sunrpc/types.h>
 #include <linux/sunrpc/cache.h>
-- 
cgit v1.2.2


From 0771275b25e84f568699a5d209c5e339ef83fa94 Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Fri, 7 May 2010 11:31:33 +0000
Subject: ipv6 addrlabel: permit deletion of labels assigned to removed dev

as addrlabels with an interface index are left alone when the
interface gets removed this results in addrlabels that can no
longer be removed.

Restrict validation of index to adding new addrlabels.

Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/addrlabel.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'net')

diff --git a/net/ipv6/addrlabel.c b/net/ipv6/addrlabel.c
index ae404c9a746c..8c4348cb1950 100644
--- a/net/ipv6/addrlabel.c
+++ b/net/ipv6/addrlabel.c
@@ -422,10 +422,6 @@ static int ip6addrlbl_newdel(struct sk_buff *skb, struct nlmsghdr *nlh,
 	    ifal->ifal_prefixlen > 128)
 		return -EINVAL;
 
-	if (ifal->ifal_index &&
-	    !__dev_get_by_index(net, ifal->ifal_index))
-		return -EINVAL;
-
 	if (!tb[IFAL_ADDRESS])
 		return -EINVAL;
 
@@ -441,6 +437,10 @@ static int ip6addrlbl_newdel(struct sk_buff *skb, struct nlmsghdr *nlh,
 
 	switch(nlh->nlmsg_type) {
 	case RTM_NEWADDRLABEL:
+		if (ifal->ifal_index &&
+		    !__dev_get_by_index(net, ifal->ifal_index))
+			return -EINVAL;
+
 		err = ip6addrlbl_add(net, pfx, ifal->ifal_prefixlen,
 				     ifal->ifal_index, label,
 				     nlh->nlmsg_flags & NLM_F_REPLACE);
-- 
cgit v1.2.2


From ebda37c27d0c768947e9b058332d7ea798210cf8 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Thu, 6 May 2010 23:51:21 +0000
Subject: rps: avoid one atomic in enqueue_to_backlog

If CONFIG_SMP=y, then we own a queue spinlock, we can avoid the atomic
test_and_set_bit() from napi_schedule_prep().

We now have same number of atomic ops per netif_rx() calls than with
pre-RPS kernel.

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/dev.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/core/dev.c b/net/core/dev.c
index 988e42912e72..cdcb9cbedf41 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -2432,8 +2432,10 @@ enqueue:
 			return NET_RX_SUCCESS;
 		}
 
-		/* Schedule NAPI for backlog device */
-		if (napi_schedule_prep(&sd->backlog)) {
+		/* Schedule NAPI for backlog device
+		 * We can use non atomic operation since we own the queue lock
+		 */
+		if (!__test_and_set_bit(NAPI_STATE_SCHED, &sd->backlog.state)) {
 			if (!rps_ipi_queued(sd))
 				____napi_schedule(sd, &sd->backlog);
 		}
-- 
cgit v1.2.2


From 7fee226ad2397b635e2fd565a59ca3ae08a164cd Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Tue, 11 May 2010 23:19:48 +0000
Subject: net: add a noref bit on skb dst

Use low order bit of skb->_skb_dst to tell dst is not refcounted.

Change _skb_dst to _skb_refdst to make sure all uses are catched.

skb_dst() returns the dst, regardless of noref bit set or not, but
with a lockdep check to make sure a noref dst is not given if current
user is not rcu protected.

New skb_dst_set_noref() helper to set an notrefcounted dst on a skb.
(with lockdep check)

skb_dst_drop() drops a reference only if skb dst was refcounted.

skb_dst_force() helper is used to force a refcount on dst, when skb
is queued and not anymore RCU protected.

Use skb_dst_force() in __sk_add_backlog(), __dev_xmit_skb() if
!IFF_XMIT_DST_RELEASE or skb enqueued on qdisc queue, in
sock_queue_rcv_skb(), in __nf_queue().

Use skb_dst_force() in dev_requeue_skb().

Note: dst_use_noref() still dirties dst, we might transform it
later to do one dirtying per jiffies.

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/dev.c           | 3 +++
 net/core/skbuff.c        | 2 +-
 net/core/sock.c          | 6 ++++++
 net/ipv4/icmp.c          | 6 +++---
 net/ipv4/ip_options.c    | 9 +++++----
 net/ipv4/netfilter.c     | 6 +++---
 net/ipv4/route.c         | 2 +-
 net/netfilter/nf_queue.c | 2 ++
 net/sched/sch_generic.c  | 4 +++-
 9 files changed, 27 insertions(+), 13 deletions(-)

(limited to 'net')

diff --git a/net/core/dev.c b/net/core/dev.c
index cdcb9cbedf41..6c820650b80f 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -2052,6 +2052,8 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q,
 		 * waiting to be sent out; and the qdisc is not running -
 		 * xmit the skb directly.
 		 */
+		if (!(dev->priv_flags & IFF_XMIT_DST_RELEASE))
+			skb_dst_force(skb);
 		__qdisc_update_bstats(q, skb->len);
 		if (sch_direct_xmit(skb, q, dev, txq, root_lock))
 			__qdisc_run(q);
@@ -2060,6 +2062,7 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q,
 
 		rc = NET_XMIT_SUCCESS;
 	} else {
+		skb_dst_force(skb);
 		rc = qdisc_enqueue_root(skb, q);
 		qdisc_run(q);
 	}
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index a9b0e1f77806..c543dd252433 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -520,7 +520,7 @@ static void __copy_skb_header(struct sk_buff *new, const struct sk_buff *old)
 	new->transport_header	= old->transport_header;
 	new->network_header	= old->network_header;
 	new->mac_header		= old->mac_header;
-	skb_dst_set(new, dst_clone(skb_dst(old)));
+	skb_dst_copy(new, old);
 	new->rxhash		= old->rxhash;
 #ifdef CONFIG_XFRM
 	new->sp			= secpath_get(old->sp);
diff --git a/net/core/sock.c b/net/core/sock.c
index 63530a03b8c2..bf88a167c8f2 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -307,6 +307,11 @@ int sock_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
 	 */
 	skb_len = skb->len;
 
+	/* we escape from rcu protected region, make sure we dont leak
+	 * a norefcounted dst
+	 */
+	skb_dst_force(skb);
+
 	spin_lock_irqsave(&list->lock, flags);
 	skb->dropcount = atomic_read(&sk->sk_drops);
 	__skb_queue_tail(list, skb);
@@ -1536,6 +1541,7 @@ static void __release_sock(struct sock *sk)
 		do {
 			struct sk_buff *next = skb->next;
 
+			WARN_ON_ONCE(skb_dst_is_noref(skb));
 			skb->next = NULL;
 			sk_backlog_rcv(sk, skb);
 
diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c
index f3d339f728b0..d65e9215bcd7 100644
--- a/net/ipv4/icmp.c
+++ b/net/ipv4/icmp.c
@@ -587,20 +587,20 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info)
 			err = __ip_route_output_key(net, &rt2, &fl);
 		else {
 			struct flowi fl2 = {};
-			struct dst_entry *odst;
+			unsigned long orefdst;
 
 			fl2.fl4_dst = fl.fl4_src;
 			if (ip_route_output_key(net, &rt2, &fl2))
 				goto relookup_failed;
 
 			/* Ugh! */
-			odst = skb_dst(skb_in);
+			orefdst = skb_in->_skb_refdst; /* save old refdst */
 			err = ip_route_input(skb_in, fl.fl4_dst, fl.fl4_src,
 					     RT_TOS(tos), rt2->u.dst.dev);
 
 			dst_release(&rt2->u.dst);
 			rt2 = skb_rtable(skb_in);
-			skb_dst_set(skb_in, odst);
+			skb_in->_skb_refdst = orefdst; /* restore old refdst */
 		}
 
 		if (err)
diff --git a/net/ipv4/ip_options.c b/net/ipv4/ip_options.c
index 4c09a31fd140..3244133c24f6 100644
--- a/net/ipv4/ip_options.c
+++ b/net/ipv4/ip_options.c
@@ -601,6 +601,7 @@ int ip_options_rcv_srr(struct sk_buff *skb)
 	unsigned char *optptr = skb_network_header(skb) + opt->srr;
 	struct rtable *rt = skb_rtable(skb);
 	struct rtable *rt2;
+	unsigned long orefdst;
 	int err;
 
 	if (!opt->srr)
@@ -624,16 +625,16 @@ int ip_options_rcv_srr(struct sk_buff *skb)
 		}
 		memcpy(&nexthop, &optptr[srrptr-1], 4);
 
-		rt = skb_rtable(skb);
+		orefdst = skb->_skb_refdst;
 		skb_dst_set(skb, NULL);
 		err = ip_route_input(skb, nexthop, iph->saddr, iph->tos, skb->dev);
 		rt2 = skb_rtable(skb);
 		if (err || (rt2->rt_type != RTN_UNICAST && rt2->rt_type != RTN_LOCAL)) {
-			ip_rt_put(rt2);
-			skb_dst_set(skb, &rt->u.dst);
+			skb_dst_drop(skb);
+			skb->_skb_refdst = orefdst;
 			return -EINVAL;
 		}
-		ip_rt_put(rt);
+		refdst_drop(orefdst);
 		if (rt2->rt_type != RTN_LOCAL)
 			break;
 		/* Superfast 8) loopback forward */
diff --git a/net/ipv4/netfilter.c b/net/ipv4/netfilter.c
index 82fb43c5c59e..07de855e2175 100644
--- a/net/ipv4/netfilter.c
+++ b/net/ipv4/netfilter.c
@@ -17,7 +17,7 @@ int ip_route_me_harder(struct sk_buff *skb, unsigned addr_type)
 	const struct iphdr *iph = ip_hdr(skb);
 	struct rtable *rt;
 	struct flowi fl = {};
-	struct dst_entry *odst;
+	unsigned long orefdst;
 	unsigned int hh_len;
 	unsigned int type;
 
@@ -51,14 +51,14 @@ int ip_route_me_harder(struct sk_buff *skb, unsigned addr_type)
 		if (ip_route_output_key(net, &rt, &fl) != 0)
 			return -1;
 
-		odst = skb_dst(skb);
+		orefdst = skb->_skb_refdst;
 		if (ip_route_input(skb, iph->daddr, iph->saddr,
 				   RT_TOS(iph->tos), rt->u.dst.dev) != 0) {
 			dst_release(&rt->u.dst);
 			return -1;
 		}
 		dst_release(&rt->u.dst);
-		dst_release(odst);
+		refdst_drop(orefdst);
 	}
 
 	if (skb_dst(skb)->error)
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index dea3f9264250..705eccfb4769 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -3033,7 +3033,7 @@ int ip_rt_dump(struct sk_buff *skb,  struct netlink_callback *cb)
 				continue;
 			if (rt_is_expired(rt))
 				continue;
-			skb_dst_set(skb, dst_clone(&rt->u.dst));
+			skb_dst_set_noref(skb, &rt->u.dst);
 			if (rt_fill_info(net, skb, NETLINK_CB(cb->skb).pid,
 					 cb->nlh->nlmsg_seq, RTM_NEWROUTE,
 					 1, NLM_F_MULTI) <= 0) {
diff --git a/net/netfilter/nf_queue.c b/net/netfilter/nf_queue.c
index 0b1103c0b1f3..78b3cf9c519c 100644
--- a/net/netfilter/nf_queue.c
+++ b/net/netfilter/nf_queue.c
@@ -9,6 +9,7 @@
 #include <linux/rcupdate.h>
 #include <net/protocol.h>
 #include <net/netfilter/nf_queue.h>
+#include <net/dst.h>
 
 #include "nf_internals.h"
 
@@ -170,6 +171,7 @@ static int __nf_queue(struct sk_buff *skb,
 			dev_hold(physoutdev);
 	}
 #endif
+	skb_dst_force(skb);
 	afinfo->saveroute(skb, entry);
 	status = qh->outfn(entry, queuenum);
 
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index a969b111bd76..a63029ef3edd 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -26,6 +26,7 @@
 #include <linux/list.h>
 #include <linux/slab.h>
 #include <net/pkt_sched.h>
+#include <net/dst.h>
 
 /* Main transmission queue. */
 
@@ -40,6 +41,7 @@
 
 static inline int dev_requeue_skb(struct sk_buff *skb, struct Qdisc *q)
 {
+	skb_dst_force(skb);
 	q->gso_skb = skb;
 	q->qstats.requeues++;
 	q->q.qlen++;	/* it's still part of the queue */
@@ -179,7 +181,7 @@ static inline int qdisc_restart(struct Qdisc *q)
 	skb = dequeue_skb(q);
 	if (unlikely(!skb))
 		return 0;
-
+	WARN_ON_ONCE(skb_dst_is_noref(skb));
 	root_lock = qdisc_lock(q);
 	dev = qdisc_dev(q);
 	txq = netdev_get_tx_queue(dev, skb_get_queue_mapping(skb));
-- 
cgit v1.2.2


From 407eadd996dc62a827db85f1d0c286a98fd5d336 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Mon, 10 May 2010 11:32:55 +0000
Subject: net: implements ip_route_input_noref()

ip_route_input() is the version returning a refcounted dst, while
ip_route_input_noref() returns a non refcounted one.

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/route.c | 15 ++++++++++-----
 1 file changed, 10 insertions(+), 5 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 705eccfb4769..560acc677ce4 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -2277,8 +2277,8 @@ martian_source:
 	goto e_inval;
 }
 
-int ip_route_input(struct sk_buff *skb, __be32 daddr, __be32 saddr,
-		   u8 tos, struct net_device *dev)
+int ip_route_input_common(struct sk_buff *skb, __be32 daddr, __be32 saddr,
+			   u8 tos, struct net_device *dev, bool noref)
 {
 	struct rtable * rth;
 	unsigned	hash;
@@ -2304,10 +2304,15 @@ int ip_route_input(struct sk_buff *skb, __be32 daddr, __be32 saddr,
 		    rth->fl.mark == skb->mark &&
 		    net_eq(dev_net(rth->u.dst.dev), net) &&
 		    !rt_is_expired(rth)) {
-			dst_use(&rth->u.dst, jiffies);
+			if (noref) {
+				dst_use_noref(&rth->u.dst, jiffies);
+				skb_dst_set_noref(skb, &rth->u.dst);
+			} else {
+				dst_use(&rth->u.dst, jiffies);
+				skb_dst_set(skb, &rth->u.dst);
+			}
 			RT_CACHE_STAT_INC(in_hit);
 			rcu_read_unlock();
-			skb_dst_set(skb, &rth->u.dst);
 			return 0;
 		}
 		RT_CACHE_STAT_INC(in_hlist_search);
@@ -2350,6 +2355,7 @@ skip_cache:
 	}
 	return ip_route_input_slow(skb, daddr, saddr, tos, dev);
 }
+EXPORT_SYMBOL(ip_route_input_common);
 
 static int __mkroute_output(struct rtable **result,
 			    struct fib_result *res,
@@ -3361,5 +3367,4 @@ void __init ip_static_sysctl_init(void)
 #endif
 
 EXPORT_SYMBOL(__ip_select_ident);
-EXPORT_SYMBOL(ip_route_input);
 EXPORT_SYMBOL(ip_route_output_key);
-- 
cgit v1.2.2


From 4a94445c9a5cf5461fb41d80040033b9a8e2a85a Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Mon, 10 May 2010 11:33:06 +0000
Subject: net: Use ip_route_input_noref() in input path

Use ip_route_input_noref() in ip fast path, to avoid two atomic ops per
incoming packet.

Note: loopback is excluded from this optimization in ip_rcv_finish()

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/arp.c         | 2 +-
 net/ipv4/ip_input.c    | 4 ++--
 net/ipv4/xfrm4_input.c | 4 ++--
 3 files changed, 5 insertions(+), 5 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c
index 80769f1f9fab..f094b75810db 100644
--- a/net/ipv4/arp.c
+++ b/net/ipv4/arp.c
@@ -854,7 +854,7 @@ static int arp_process(struct sk_buff *skb)
 	}
 
 	if (arp->ar_op == htons(ARPOP_REQUEST) &&
-	    ip_route_input(skb, tip, sip, 0, dev) == 0) {
+	    ip_route_input_noref(skb, tip, sip, 0, dev) == 0) {
 
 		rt = skb_rtable(skb);
 		addr_type = rt->rt_type;
diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c
index af76de5f76de..d930dc5e4d85 100644
--- a/net/ipv4/ip_input.c
+++ b/net/ipv4/ip_input.c
@@ -331,8 +331,8 @@ static int ip_rcv_finish(struct sk_buff *skb)
 	 *	how the packet travels inside Linux networking.
 	 */
 	if (skb_dst(skb) == NULL) {
-		int err = ip_route_input(skb, iph->daddr, iph->saddr, iph->tos,
-					 skb->dev);
+		int err = ip_route_input_noref(skb, iph->daddr, iph->saddr,
+					       iph->tos, skb->dev);
 		if (unlikely(err)) {
 			if (err == -EHOSTUNREACH)
 				IP_INC_STATS_BH(dev_net(skb->dev),
diff --git a/net/ipv4/xfrm4_input.c b/net/ipv4/xfrm4_input.c
index abcd7ed65db1..ad8fbb871aa0 100644
--- a/net/ipv4/xfrm4_input.c
+++ b/net/ipv4/xfrm4_input.c
@@ -27,8 +27,8 @@ static inline int xfrm4_rcv_encap_finish(struct sk_buff *skb)
 	if (skb_dst(skb) == NULL) {
 		const struct iphdr *iph = ip_hdr(skb);
 
-		if (ip_route_input(skb, iph->daddr, iph->saddr, iph->tos,
-				   skb->dev))
+		if (ip_route_input_noref(skb, iph->daddr, iph->saddr,
+					 iph->tos, skb->dev))
 			goto drop;
 	}
 	return dst_input(skb);
-- 
cgit v1.2.2


From ab6e3feba1f1bc3b9418b854da6f481408d243de Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Mon, 10 May 2010 11:31:49 +0000
Subject: net: No dst refcounting in ip_queue_xmit()

TCP outgoing packets can avoid two atomic ops, and dirtying
of previously higly contended cache line using new refdst
infrastructure.

Note 1: loopback device excluded because of !IFF_XMIT_DST_RELEASE
Note 2: UDP packets dsts are built before ip_queue_xmit().

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/ip_output.c | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index 252897443ef9..9a4a6c96cb0d 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -318,10 +318,12 @@ int ip_queue_xmit(struct sk_buff *skb)
 	struct ip_options *opt = inet->opt;
 	struct rtable *rt;
 	struct iphdr *iph;
+	int res;
 
 	/* Skip all of this if the packet is already routed,
 	 * f.e. by something like SCTP.
 	 */
+	rcu_read_lock();
 	rt = skb_rtable(skb);
 	if (rt != NULL)
 		goto packet_routed;
@@ -359,7 +361,7 @@ int ip_queue_xmit(struct sk_buff *skb)
 		}
 		sk_setup_caps(sk, &rt->u.dst);
 	}
-	skb_dst_set(skb, dst_clone(&rt->u.dst));
+	skb_dst_set_noref(skb, &rt->u.dst);
 
 packet_routed:
 	if (opt && opt->is_strictroute && rt->rt_dst != rt->rt_gateway)
@@ -391,9 +393,12 @@ packet_routed:
 	skb->priority = sk->sk_priority;
 	skb->mark = sk->sk_mark;
 
-	return ip_local_out(skb);
+	res = ip_local_out(skb);
+	rcu_read_unlock();
+	return res;
 
 no_route:
+	rcu_read_unlock();
 	IP_INC_STATS(sock_net(sk), IPSTATS_MIB_OUTNOROUTES);
 	kfree_skb(skb);
 	return -EHOSTUNREACH;
-- 
cgit v1.2.2


From cb863ffd4a04f9f8619f52c01d472a64ccc716bd Mon Sep 17 00:00:00 2001
From: andrew hendry <andrew.hendry@gmail.com>
Date: Sun, 16 May 2010 22:59:41 +0000
Subject: X25: Move qbit flag to bitfield

Moves the X25 q bit flag from char into a bitfield to allow BKL cleanup.

Signed-off-by: Andrew Hendry <andrew.hendry@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/x25/af_x25.c | 17 ++++++++++-------
 1 file changed, 10 insertions(+), 7 deletions(-)

(limited to 'net')

diff --git a/net/x25/af_x25.c b/net/x25/af_x25.c
index 296e65e01064..720534ce1fd9 100644
--- a/net/x25/af_x25.c
+++ b/net/x25/af_x25.c
@@ -465,7 +465,10 @@ static int x25_setsockopt(struct socket *sock, int level, int optname,
 	if (get_user(opt, (int __user *)optval))
 		goto out;
 
-	x25_sk(sk)->qbitincl = !!opt;
+	if (opt)
+		set_bit(X25_Q_BIT_FLAG, &x25_sk(sk)->flags);
+	else
+		clear_bit(X25_Q_BIT_FLAG, &x25_sk(sk)->flags);
 	rc = 0;
 out:
 	unlock_kernel();
@@ -496,7 +499,7 @@ static int x25_getsockopt(struct socket *sock, int level, int optname,
 	if (put_user(len, optlen))
 		goto out;
 
-	val = x25_sk(sk)->qbitincl;
+	val = test_bit(X25_Q_BIT_FLAG, &x25_sk(sk)->flags);
 	rc = copy_to_user(optval, &val, len) ? -EFAULT : 0;
 out:
 	unlock_kernel();
@@ -632,8 +635,8 @@ static struct sock *x25_make_new(struct sock *osk)
 	x25->t22        = ox25->t22;
 	x25->t23        = ox25->t23;
 	x25->t2         = ox25->t2;
+	x25->flags	= ox25->flags;
 	x25->facilities = ox25->facilities;
-	x25->qbitincl   = ox25->qbitincl;
 	x25->dte_facilities = ox25->dte_facilities;
 	x25->cudmatchlength = ox25->cudmatchlength;
 	x25->accptapprv = ox25->accptapprv;
@@ -1186,7 +1189,7 @@ static int x25_sendmsg(struct kiocb *iocb, struct socket *sock,
 	 *	If the Q BIT Include socket option is in force, the first
 	 *	byte of the user data is the logical value of the Q Bit.
 	 */
-	if (x25->qbitincl) {
+	if (test_bit(X25_Q_BIT_FLAG, &x25->flags)) {
 		qbit = skb->data[0];
 		skb_pull(skb, 1);
 	}
@@ -1242,7 +1245,7 @@ static int x25_sendmsg(struct kiocb *iocb, struct socket *sock,
 		len = rc;
 		if (rc < 0)
 			kfree_skb(skb);
-		else if (x25->qbitincl)
+		else if (test_bit(X25_Q_BIT_FLAG, &x25->flags))
 			len++;
 	}
 
@@ -1307,7 +1310,7 @@ static int x25_recvmsg(struct kiocb *iocb, struct socket *sock,
 		/*
 		 *	No Q bit information on Interrupt data.
 		 */
-		if (x25->qbitincl) {
+		if (test_bit(X25_Q_BIT_FLAG, &x25->flags)) {
 			asmptr  = skb_push(skb, 1);
 			*asmptr = 0x00;
 		}
@@ -1325,7 +1328,7 @@ static int x25_recvmsg(struct kiocb *iocb, struct socket *sock,
 		skb_pull(skb, x25->neighbour->extended ?
 				X25_EXT_MIN_LEN : X25_STD_MIN_LEN);
 
-		if (x25->qbitincl) {
+		if (test_bit(X25_Q_BIT_FLAG, &x25->flags)) {
 			asmptr  = skb_push(skb, 1);
 			*asmptr = qbit;
 		}
-- 
cgit v1.2.2


From b7792e34cba641c49cd436d42fbfd2a632ff39d3 Mon Sep 17 00:00:00 2001
From: andrew hendry <andrew.hendry@gmail.com>
Date: Sun, 16 May 2010 23:00:02 +0000
Subject: X25: Move interrupt flag to bitfield

Moves the x25 interrupt flag from char into bitfield.

Signed-off-by: Andrew Hendry <andrew.hendry@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/x25/af_x25.c  | 1 +
 net/x25/x25_in.c  | 2 +-
 net/x25/x25_out.c | 5 +++--
 3 files changed, 5 insertions(+), 3 deletions(-)

(limited to 'net')

diff --git a/net/x25/af_x25.c b/net/x25/af_x25.c
index 720534ce1fd9..3d97b8caf0b1 100644
--- a/net/x25/af_x25.c
+++ b/net/x25/af_x25.c
@@ -641,6 +641,7 @@ static struct sock *x25_make_new(struct sock *osk)
 	x25->cudmatchlength = ox25->cudmatchlength;
 	x25->accptapprv = ox25->accptapprv;
 
+	clear_bit(X25_INTERRUPT_FLAG, &x25->flags);
 	x25_init_timers(sk);
 out:
 	return sk;
diff --git a/net/x25/x25_in.c b/net/x25/x25_in.c
index 372ac226e648..63178961efac 100644
--- a/net/x25/x25_in.c
+++ b/net/x25/x25_in.c
@@ -273,7 +273,7 @@ static int x25_state3_machine(struct sock *sk, struct sk_buff *skb, int frametyp
 			break;
 
 		case X25_INTERRUPT_CONFIRMATION:
-			x25->intflag = 0;
+			clear_bit(X25_INTERRUPT_FLAG, &x25->flags);
 			break;
 
 		case X25_INTERRUPT:
diff --git a/net/x25/x25_out.c b/net/x25/x25_out.c
index 52351a26b6fc..d00649fb251d 100644
--- a/net/x25/x25_out.c
+++ b/net/x25/x25_out.c
@@ -148,8 +148,9 @@ void x25_kick(struct sock *sk)
 	/*
 	 *	Transmit interrupt data.
 	 */
-	if (!x25->intflag && skb_peek(&x25->interrupt_out_queue) != NULL) {
-		x25->intflag = 1;
+	if (skb_peek(&x25->interrupt_out_queue) != NULL &&
+		!test_and_set_bit(X25_INTERRUPT_FLAG, &x25->flags)) {
+
 		skb = skb_dequeue(&x25->interrupt_out_queue);
 		x25_transmit_link(skb, x25->neighbour);
 	}
-- 
cgit v1.2.2


From 37cda78741ecdbf45dd9b64e4c99dbdb11b47b46 Mon Sep 17 00:00:00 2001
From: andrew hendry <andrew.hendry@gmail.com>
Date: Sun, 16 May 2010 23:00:27 +0000
Subject: X25: Move accept approve flag to bitfield

Moves the x25 accept approve flag from char into bitfield.

Signed-off-by: Andrew Hendry <andrew.hendry@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/x25/af_x25.c | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

(limited to 'net')

diff --git a/net/x25/af_x25.c b/net/x25/af_x25.c
index 3d97b8caf0b1..e5c1e3298f8a 100644
--- a/net/x25/af_x25.c
+++ b/net/x25/af_x25.c
@@ -586,7 +586,7 @@ static int x25_create(struct net *net, struct socket *sock, int protocol,
 	x25->t2    = sysctl_x25_ack_holdback_timeout;
 	x25->state = X25_STATE_0;
 	x25->cudmatchlength = 0;
-	x25->accptapprv = X25_DENY_ACCPT_APPRV;		/* normally no cud  */
+	set_bit(X25_ACCPT_APPRV_FLAG, &x25->flags);	/* normally no cud  */
 							/* on call accept   */
 
 	x25->facilities.winsize_in  = X25_DEFAULT_WINDOW_SIZE;
@@ -639,7 +639,6 @@ static struct sock *x25_make_new(struct sock *osk)
 	x25->facilities = ox25->facilities;
 	x25->dte_facilities = ox25->dte_facilities;
 	x25->cudmatchlength = ox25->cudmatchlength;
-	x25->accptapprv = ox25->accptapprv;
 
 	clear_bit(X25_INTERRUPT_FLAG, &x25->flags);
 	x25_init_timers(sk);
@@ -1057,8 +1056,8 @@ int x25_rx_call_request(struct sk_buff *skb, struct x25_neigh *nb,
 	makex25->vc_facil_mask &= ~X25_MASK_CALLING_AE;
 	makex25->cudmatchlength = x25_sk(sk)->cudmatchlength;
 
-	/* Normally all calls are accepted immediatly */
-	if(makex25->accptapprv & X25_DENY_ACCPT_APPRV) {
+	/* Normally all calls are accepted immediately */
+	if (test_bit(X25_ACCPT_APPRV_FLAG, &makex25->flags)) {
 		x25_write_internal(make, X25_CALL_ACCEPTED);
 		makex25->state = X25_STATE_3;
 	}
@@ -1580,7 +1579,7 @@ static int x25_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
 			rc = -EINVAL;
 			if (sk->sk_state != TCP_CLOSE)
 				break;
-			x25->accptapprv = X25_ALLOW_ACCPT_APPRV;
+			clear_bit(X25_ACCPT_APPRV_FLAG, &x25->flags);
 			rc = 0;
 			break;
 		}
@@ -1589,7 +1588,8 @@ static int x25_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
 			rc = -EINVAL;
 			if (sk->sk_state != TCP_ESTABLISHED)
 				break;
-			if (x25->accptapprv)	/* must call accptapprv above */
+			/* must call accptapprv above */
+			if (test_bit(X25_ACCPT_APPRV_FLAG, &x25->flags))
 				break;
 			x25_write_internal(sk, X25_CALL_ACCEPTED);
 			x25->state = X25_STATE_3;
-- 
cgit v1.2.2


From 935e2a26b85003c0bd52b6c92712c2f77a5f9d33 Mon Sep 17 00:00:00 2001
From: andrew hendry <andrew.hendry@gmail.com>
Date: Sun, 16 May 2010 23:00:35 +0000
Subject: X25: Remove bkl in sockopts

Removes the BKL in x25 setsock and getsockopts.

Signed-off-by: Andrew Hendry <andrew.hendry@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/x25/af_x25.c | 4 ----
 1 file changed, 4 deletions(-)

(limited to 'net')

diff --git a/net/x25/af_x25.c b/net/x25/af_x25.c
index e5c1e3298f8a..5e86d4e97dce 100644
--- a/net/x25/af_x25.c
+++ b/net/x25/af_x25.c
@@ -453,7 +453,6 @@ static int x25_setsockopt(struct socket *sock, int level, int optname,
 	struct sock *sk = sock->sk;
 	int rc = -ENOPROTOOPT;
 
-	lock_kernel();
 	if (level != SOL_X25 || optname != X25_QBITINCL)
 		goto out;
 
@@ -471,7 +470,6 @@ static int x25_setsockopt(struct socket *sock, int level, int optname,
 		clear_bit(X25_Q_BIT_FLAG, &x25_sk(sk)->flags);
 	rc = 0;
 out:
-	unlock_kernel();
 	return rc;
 }
 
@@ -481,7 +479,6 @@ static int x25_getsockopt(struct socket *sock, int level, int optname,
 	struct sock *sk = sock->sk;
 	int val, len, rc = -ENOPROTOOPT;
 
-	lock_kernel();
 	if (level != SOL_X25 || optname != X25_QBITINCL)
 		goto out;
 
@@ -502,7 +499,6 @@ static int x25_getsockopt(struct socket *sock, int level, int optname,
 	val = test_bit(X25_Q_BIT_FLAG, &x25_sk(sk)->flags);
 	rc = copy_to_user(optval, &val, len) ? -EFAULT : 0;
 out:
-	unlock_kernel();
 	return rc;
 }
 
-- 
cgit v1.2.2


From ccbd6a5a4f76e821ed36f69fdaf59817c3a7f18e Mon Sep 17 00:00:00 2001
From: Joe Perches <joe@perches.com>
Date: Fri, 14 May 2010 10:58:26 +0000
Subject: net: Remove unnecessary semicolons after switch statements

Also added an explicit break; to avoid
a fallthrough in net/ipv4/tcp_input.c

Signed-off-by: Joe Perches <joe@perches.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/8021q/vlan_core.c | 2 +-
 net/core/ethtool.c    | 4 ++--
 net/ipv4/tcp.c        | 2 +-
 net/ipv4/tcp_input.c  | 5 +++--
 net/rds/tcp_connect.c | 2 +-
 net/socket.c          | 2 +-
 net/xfrm/xfrm_hash.h  | 6 +++---
 7 files changed, 12 insertions(+), 11 deletions(-)

(limited to 'net')

diff --git a/net/8021q/vlan_core.c b/net/8021q/vlan_core.c
index c584a0af77d3..bd537fc10254 100644
--- a/net/8021q/vlan_core.c
+++ b/net/8021q/vlan_core.c
@@ -61,7 +61,7 @@ int vlan_hwaccel_do_receive(struct sk_buff *skb)
 					dev->dev_addr))
 			skb->pkt_type = PACKET_HOST;
 		break;
-	};
+	}
 	return 0;
 }
 
diff --git a/net/core/ethtool.c b/net/core/ethtool.c
index 1a7db92037fa..a0f4964033d2 100644
--- a/net/core/ethtool.c
+++ b/net/core/ethtool.c
@@ -522,7 +522,7 @@ static int ethtool_get_rx_ntuple(struct net_device *dev, void __user *useraddr)
 			p += ETH_GSTRING_LEN;
 			num_strings++;
 			goto unknown_filter;
-		};
+		}
 
 		/* now the rest of the filters */
 		switch (fsc->fs.flow_type) {
@@ -646,7 +646,7 @@ static int ethtool_get_rx_ntuple(struct net_device *dev, void __user *useraddr)
 			p += ETH_GSTRING_LEN;
 			num_strings++;
 			break;
-		};
+		}
 		sprintf(p, "\tVLAN: %d, mask: 0x%x\n",
 			fsc->fs.vlan_tag, fsc->fs.vlan_tag_mask);
 		p += ETH_GSTRING_LEN;
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 3284393d09b4..6596b4feeddc 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -2215,7 +2215,7 @@ static int do_tcp_setsockopt(struct sock *sk, int level,
 	default:
 		/* fallthru */
 		break;
-	};
+	}
 
 	if (optlen < sizeof(int))
 		return -EINVAL;
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index e82162c211bf..3e6dafcb1071 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -3845,12 +3845,13 @@ void tcp_parse_options(struct sk_buff *skb, struct tcp_options_received *opt_rx,
 					/* 16-bit multiple */
 					opt_rx->cookie_plus = opsize;
 					*hvpp = ptr;
+					break;
 				default:
 					/* ignore option */
 					break;
-				};
+				}
 				break;
-			};
+			}
 
 			ptr += opsize-2;
 			length -= opsize;
diff --git a/net/rds/tcp_connect.c b/net/rds/tcp_connect.c
index 056256285987..c397524c039c 100644
--- a/net/rds/tcp_connect.c
+++ b/net/rds/tcp_connect.c
@@ -141,7 +141,7 @@ void rds_tcp_conn_shutdown(struct rds_connection *conn)
 
 		release_sock(sock->sk);
 		sock_release(sock);
-	};
+	}
 
 	if (tc->t_tinc) {
 		rds_inc_put(&tc->t_tinc->ti_inc);
diff --git a/net/socket.c b/net/socket.c
index dae8c6b84a09..f9f7d0872cac 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -2615,7 +2615,7 @@ static int bond_ioctl(struct net *net, unsigned int cmd,
 		return dev_ioctl(net, cmd, uifr);
 	default:
 		return -EINVAL;
-	};
+	}
 }
 
 static int siocdevprivate_ioctl(struct net *net, unsigned int cmd,
diff --git a/net/xfrm/xfrm_hash.h b/net/xfrm/xfrm_hash.h
index 1396572d2ade..8e69533d2313 100644
--- a/net/xfrm/xfrm_hash.h
+++ b/net/xfrm/xfrm_hash.h
@@ -55,7 +55,7 @@ static inline unsigned __xfrm_src_hash(xfrm_address_t *daddr,
 	case AF_INET6:
 		h ^= __xfrm6_daddr_saddr_hash(daddr, saddr);
 		break;
-	};
+	}
 	return (h ^ (h >> 16)) & hmask;
 }
 
@@ -102,7 +102,7 @@ static inline unsigned int __sel_hash(struct xfrm_selector *sel, unsigned short
 
 		h = __xfrm6_daddr_saddr_hash(daddr, saddr);
 		break;
-	};
+	}
 	h ^= (h >> 16);
 	return h & hmask;
 }
@@ -119,7 +119,7 @@ static inline unsigned int __addr_hash(xfrm_address_t *daddr, xfrm_address_t *sa
 	case AF_INET6:
 		h = __xfrm6_daddr_saddr_hash(daddr, saddr);
 		break;
-	};
+	}
 	h ^= (h >> 16);
 	return h & hmask;
 }
-- 
cgit v1.2.2


From f0cd15081a72075df16c45a2310e873fb9fcd82f Mon Sep 17 00:00:00 2001
From: stephen hemminger <shemminger@vyatta.com>
Date: Fri, 14 May 2010 14:38:59 +0000
Subject: tbf: stop wanton destruction of children (v2)

Several netem users use TBF for rate control. But every time the parameters
of TBF are changed it destroys the child qdisc, requiring reconfigation.
Better to just keep child qdisc and just notify it of changed limit.

Signed-off-by: Stephen Hemminger <shemminger@vyatta.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/sch_tbf.c | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/sched/sch_tbf.c b/net/sched/sch_tbf.c
index 8fb8107ab188..0991c640cd3e 100644
--- a/net/sched/sch_tbf.c
+++ b/net/sched/sch_tbf.c
@@ -273,7 +273,11 @@ static int tbf_change(struct Qdisc* sch, struct nlattr *opt)
 	if (max_size < 0)
 		goto done;
 
-	if (qopt->limit > 0) {
+	if (q->qdisc != &noop_qdisc) {
+		err = fifo_set_limit(q->qdisc, qopt->limit);
+		if (err)
+			goto done;
+	} else if (qopt->limit > 0) {
 		child = fifo_create_dflt(sch, &bfifo_qdisc_ops, qopt->limit);
 		if (IS_ERR(child)) {
 			err = PTR_ERR(child);
-- 
cgit v1.2.2


From a2f79227138c71e08627af5f8961197364edbc98 Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Fri, 14 May 2010 08:08:14 +0000
Subject: net_sched: sch_hfsc: fix classification loops

When attaching filters to a class pointing to a class higher up in the
hierarchy, classification may enter an endless loop. Currently this is
prevented for filters that are already resolved, but not for filters
resolved at runtime.

Only allow filters to point downwards in the hierarchy, similar to what
CBQ does.

Reported-by: Pawel Staszewski <pstaszewski@itcare.pl>
Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/sch_hfsc.c | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/sched/sch_hfsc.c b/net/sched/sch_hfsc.c
index b38b39c60752..a435cf13cc27 100644
--- a/net/sched/sch_hfsc.c
+++ b/net/sched/sch_hfsc.c
@@ -1155,7 +1155,7 @@ static struct hfsc_class *
 hfsc_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr)
 {
 	struct hfsc_sched *q = qdisc_priv(sch);
-	struct hfsc_class *cl;
+	struct hfsc_class *head, *cl;
 	struct tcf_result res;
 	struct tcf_proto *tcf;
 	int result;
@@ -1166,6 +1166,7 @@ hfsc_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr)
 			return cl;
 
 	*qerr = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS;
+	head = &q->root;
 	tcf = q->root.filter_list;
 	while (tcf && (result = tc_classify(skb, tcf, &res)) >= 0) {
 #ifdef CONFIG_NET_CLS_ACT
@@ -1180,6 +1181,8 @@ hfsc_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr)
 		if ((cl = (struct hfsc_class *)res.class) == NULL) {
 			if ((cl = hfsc_find_class(res.classid, sch)) == NULL)
 				break; /* filter selected invalid classid */
+			if (cl->level >= head->level)
+				break; /* filter may only point downwards */
 		}
 
 		if (cl->level == 0)
@@ -1187,6 +1190,7 @@ hfsc_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr)
 
 		/* apply inner filter chain */
 		tcf = cl->filter_list;
+		head = cl;
 	}
 
 	/* classification failed, try default class */
-- 
cgit v1.2.2


From eedf042a63ffef050ebc015de19b52dc065e830b Mon Sep 17 00:00:00 2001
From: Stephen Hemminger <shemminger@vyatta.com>
Date: Mon, 17 May 2010 22:27:12 -0700
Subject: ipv6: fix the bug of address check

The duplicate address check code got broken in the conversion
to hlist (2.6.35).  The earlier patch did not fix the case where
two addresses match same hash value. Use two exit paths,
rather than depending on state of loop variables (from macro).

Based on earlier fix by Shan Wei.

Signed-off-by: Stephen Hemminger <shemminger@vyatta.com>
Reviewed-by: Shan Wei <shanwei@cn.fujitsu.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/addrconf.c | 15 ++++++++-------
 1 file changed, 8 insertions(+), 7 deletions(-)

(limited to 'net')

diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 3984f52181f4..75d3b8c1e856 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -1274,7 +1274,7 @@ static int ipv6_count_addresses(struct inet6_dev *idev)
 int ipv6_chk_addr(struct net *net, struct in6_addr *addr,
 		  struct net_device *dev, int strict)
 {
-	struct inet6_ifaddr *ifp = NULL;
+	struct inet6_ifaddr *ifp;
 	struct hlist_node *node;
 	unsigned int hash = ipv6_addr_hash(addr);
 
@@ -1283,15 +1283,16 @@ int ipv6_chk_addr(struct net *net, struct in6_addr *addr,
 		if (!net_eq(dev_net(ifp->idev->dev), net))
 			continue;
 		if (ipv6_addr_equal(&ifp->addr, addr) &&
-		    !(ifp->flags&IFA_F_TENTATIVE)) {
-			if (dev == NULL || ifp->idev->dev == dev ||
-			    !(ifp->scope&(IFA_LINK|IFA_HOST) || strict))
-				break;
+		    !(ifp->flags&IFA_F_TENTATIVE) &&
+		    (dev == NULL || ifp->idev->dev == dev ||
+		     !(ifp->scope&(IFA_LINK|IFA_HOST) || strict))) {
+			rcu_read_unlock_bh();
+			return 1;
 		}
 	}
-	rcu_read_unlock_bh();
 
-	return ifp != NULL;
+	rcu_read_unlock_bh();
+	return 0;
 }
 EXPORT_SYMBOL(ipv6_chk_addr);
 
-- 
cgit v1.2.2


From de213e5eedecdfb1b1eea7e6be28bc64cac5c078 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Mon, 17 May 2010 22:35:36 -0700
Subject: tcp: tcp_synack_options() fix

Commit 33ad798c924b4a (tcp: options clean up) introduced a problem
if MD5+SACK+timestamps were used in initial SYN message.

Some stacks (old linux for example) try to negotiate MD5+SACK+TSTAMP
sessions, but since 40 bytes of tcp options space are not enough to
store all the bits needed, we chose to disable timestamps in this case.

We send a SYN-ACK _without_ timestamp option, but socket has timestamps
enabled and all further outgoing messages contain a TS block, all with
the initial timestamp of the remote peer.

Fix is to really disable timestamps option for the whole session.

Reported-by: Bijay Singh <Bijay.Singh@guavus.com>
Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp_output.c | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 18a3302480cb..b4ed957f201a 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -668,7 +668,6 @@ static unsigned tcp_synack_options(struct sock *sk,
 	u8 cookie_plus = (xvp != NULL && !xvp->cookie_out_never) ?
 			 xvp->cookie_plus :
 			 0;
-	bool doing_ts = ireq->tstamp_ok;
 
 #ifdef CONFIG_TCP_MD5SIG
 	*md5 = tcp_rsk(req)->af_specific->md5_lookup(sk, req);
@@ -681,7 +680,7 @@ static unsigned tcp_synack_options(struct sock *sk,
 		 * rather than TS in order to fit in better with old,
 		 * buggy kernels, but that was deemed to be unnecessary.
 		 */
-		doing_ts &= !ireq->sack_ok;
+		ireq->tstamp_ok &= !ireq->sack_ok;
 	}
 #else
 	*md5 = NULL;
@@ -696,7 +695,7 @@ static unsigned tcp_synack_options(struct sock *sk,
 		opts->options |= OPTION_WSCALE;
 		remaining -= TCPOLEN_WSCALE_ALIGNED;
 	}
-	if (likely(doing_ts)) {
+	if (likely(ireq->tstamp_ok)) {
 		opts->options |= OPTION_TS;
 		opts->tsval = TCP_SKB_CB(skb)->when;
 		opts->tsecr = req->ts_recent;
@@ -704,7 +703,7 @@ static unsigned tcp_synack_options(struct sock *sk,
 	}
 	if (likely(ireq->sack_ok)) {
 		opts->options |= OPTION_SACK_ADVERTISE;
-		if (unlikely(!doing_ts))
+		if (unlikely(!ireq->tstamp_ok))
 			remaining -= TCPOLEN_SACKPERM_ALIGNED;
 	}
 
@@ -712,7 +711,7 @@ static unsigned tcp_synack_options(struct sock *sk,
 	 * If the <SYN> options fit, the same options should fit now!
 	 */
 	if (*md5 == NULL &&
-	    doing_ts &&
+	    ireq->tstamp_ok &&
 	    cookie_plus > TCPOLEN_COOKIE_BASE) {
 		int need = cookie_plus; /* has TCPOLEN_COOKIE_BASE */
 
-- 
cgit v1.2.2


From d19d56ddc88e7895429ef118db9c83c7bbe3ce6a Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Mon, 17 May 2010 22:36:55 -0700
Subject: net: Introduce skb_tunnel_rx() helper

skb rxhash should be cleared when a skb is handled by a tunnel before
being delivered again, so that correct packet steering can take place.

There are other cleanups and accounting that we can factorize in a new
helper, skb_tunnel_rx()

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/ip_gre.c     | 9 +--------
 net/ipv4/ipip.c       | 7 ++-----
 net/ipv4/ipmr.c       | 8 +++-----
 net/ipv6/ip6_tunnel.c | 8 ++------
 net/ipv6/ip6mr.c      | 8 +++-----
 net/ipv6/sit.c        | 8 +++-----
 6 files changed, 14 insertions(+), 34 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index fe381d12ecdd..498cf69c7977 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -538,7 +538,6 @@ static int ipgre_rcv(struct sk_buff *skb)
 	struct ip_tunnel *tunnel;
 	int    offset = 4;
 	__be16 gre_proto;
-	unsigned int len;
 
 	if (!pskb_may_pull(skb, 16))
 		goto drop_nolock;
@@ -629,8 +628,6 @@ static int ipgre_rcv(struct sk_buff *skb)
 			tunnel->i_seqno = seqno + 1;
 		}
 
-		len = skb->len;
-
 		/* Warning: All skb pointers will be invalidated! */
 		if (tunnel->dev->type == ARPHRD_ETHER) {
 			if (!pskb_may_pull(skb, ETH_HLEN)) {
@@ -644,11 +641,7 @@ static int ipgre_rcv(struct sk_buff *skb)
 			skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN);
 		}
 
-		stats->rx_packets++;
-		stats->rx_bytes += len;
-		skb->dev = tunnel->dev;
-		skb_dst_drop(skb);
-		nf_reset(skb);
+		skb_tunnel_rx(skb, tunnel->dev);
 
 		skb_reset_network_header(skb);
 		ipgre_ecn_decapsulate(iph, skb);
diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c
index 0b27b14dcc9d..7fd636711037 100644
--- a/net/ipv4/ipip.c
+++ b/net/ipv4/ipip.c
@@ -374,11 +374,8 @@ static int ipip_rcv(struct sk_buff *skb)
 		skb->protocol = htons(ETH_P_IP);
 		skb->pkt_type = PACKET_HOST;
 
-		tunnel->dev->stats.rx_packets++;
-		tunnel->dev->stats.rx_bytes += skb->len;
-		skb->dev = tunnel->dev;
-		skb_dst_drop(skb);
-		nf_reset(skb);
+		skb_tunnel_rx(skb, tunnel->dev);
+
 		ipip_ecn_decapsulate(iph, skb);
 		netif_rx(skb);
 		rcu_read_unlock();
diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
index 7a7ee1cc3b5a..217ebe035b34 100644
--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c
@@ -1831,14 +1831,12 @@ static int __pim_rcv(struct mr_table *mrt, struct sk_buff *skb,
 	skb->mac_header = skb->network_header;
 	skb_pull(skb, (u8*)encap - skb->data);
 	skb_reset_network_header(skb);
-	skb->dev = reg_dev;
 	skb->protocol = htons(ETH_P_IP);
 	skb->ip_summed = 0;
 	skb->pkt_type = PACKET_HOST;
-	skb_dst_drop(skb);
-	reg_dev->stats.rx_bytes += skb->len;
-	reg_dev->stats.rx_packets++;
-	nf_reset(skb);
+
+	skb_tunnel_rx(skb, reg_dev);
+
 	netif_rx(skb);
 	dev_put(reg_dev);
 
diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c
index 2599870747ec..8f39893d8081 100644
--- a/net/ipv6/ip6_tunnel.c
+++ b/net/ipv6/ip6_tunnel.c
@@ -723,14 +723,10 @@ static int ip6_tnl_rcv(struct sk_buff *skb, __u16 protocol,
 		skb->protocol = htons(protocol);
 		skb->pkt_type = PACKET_HOST;
 		memset(skb->cb, 0, sizeof(struct inet6_skb_parm));
-		skb->dev = t->dev;
-		skb_dst_drop(skb);
-		nf_reset(skb);
 
-		dscp_ecn_decapsulate(t, ipv6h, skb);
+		skb_tunnel_rx(skb, t->dev);
 
-		t->dev->stats.rx_packets++;
-		t->dev->stats.rx_bytes += skb->len;
+		dscp_ecn_decapsulate(t, ipv6h, skb);
 		netif_rx(skb);
 		rcu_read_unlock();
 		return 0;
diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c
index 163850e22b11..bd9e7d3e9c8e 100644
--- a/net/ipv6/ip6mr.c
+++ b/net/ipv6/ip6mr.c
@@ -658,14 +658,12 @@ static int pim6_rcv(struct sk_buff *skb)
 	skb->mac_header = skb->network_header;
 	skb_pull(skb, (u8 *)encap - skb->data);
 	skb_reset_network_header(skb);
-	skb->dev = reg_dev;
 	skb->protocol = htons(ETH_P_IPV6);
 	skb->ip_summed = 0;
 	skb->pkt_type = PACKET_HOST;
-	skb_dst_drop(skb);
-	reg_dev->stats.rx_bytes += skb->len;
-	reg_dev->stats.rx_packets++;
-	nf_reset(skb);
+
+	skb_tunnel_rx(skb, reg_dev);
+
 	netif_rx(skb);
 	dev_put(reg_dev);
 	return 0;
diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c
index 5abae10cd884..e51e650ea80b 100644
--- a/net/ipv6/sit.c
+++ b/net/ipv6/sit.c
@@ -566,11 +566,9 @@ static int ipip6_rcv(struct sk_buff *skb)
 			kfree_skb(skb);
 			return 0;
 		}
-		tunnel->dev->stats.rx_packets++;
-		tunnel->dev->stats.rx_bytes += skb->len;
-		skb->dev = tunnel->dev;
-		skb_dst_drop(skb);
-		nf_reset(skb);
+
+		skb_tunnel_rx(skb, tunnel->dev);
+
 		ipip6_ecn_decapsulate(iph, skb);
 		netif_rx(skb);
 		rcu_read_unlock();
-- 
cgit v1.2.2


From 57b610805ce92dbd79fc97509f80fa5391b99623 Mon Sep 17 00:00:00 2001
From: Scott Feldman <scofeldm@cisco.com>
Date: Mon, 17 May 2010 22:49:55 -0700
Subject: net: Add netlink support for virtual port management (was iovnl)

Add new netdev ops ndo_{set|get}_vf_port to allow setting of
port-profile on a netdev interface.  Extends netlink socket RTM_SETLINK/
RTM_GETLINK with two new sub msgs called IFLA_VF_PORTS and IFLA_PORT_SELF
(added to end of IFLA_cmd list).  These are both nested atrtibutes
using this layout:

              [IFLA_NUM_VF]
              [IFLA_VF_PORTS]
                      [IFLA_VF_PORT]
                              [IFLA_PORT_*], ...
                      [IFLA_VF_PORT]
                              [IFLA_PORT_*], ...
                      ...
              [IFLA_PORT_SELF]
                      [IFLA_PORT_*], ...

These attributes are design to be set and get symmetrically.  VF_PORTS
is a list of VF_PORTs, one for each VF, when dealing with an SR-IOV
device.  PORT_SELF is for the PF of the SR-IOV device, in case it wants
to also have a port-profile, or for the case where the VF==PF, like in
enic patch 2/2 of this patch set.

A port-profile is used to configure/enable the external switch virtual port
backing the netdev interface, not to configure the host-facing side of the
netdev.  A port-profile is an identifier known to the switch.  How port-
profiles are installed on the switch or how available port-profiles are
made know to the host is outside the scope of this patch.

There are two types of port-profiles specs in the netlink msg.  The first spec
is for 802.1Qbg (pre-)standard, VDP protocol.  The second spec is for devices
that run a similar protocol as VDP but in firmware, thus hiding the protocol
details.  In either case, the specs have much in common and makes sense to
define the netlink msg as the union of the two specs.  For example, both specs
have a notition of associating/deassociating a port-profile.  And both specs
require some information from the hypervisor manager, such as client port
instance ID.

The general flow is the port-profile is applied to a host netdev interface
using RTM_SETLINK, the receiver of the RTM_SETLINK msg communicates with the
switch, and the switch virtual port backing the host netdev interface is
configured/enabled based on the settings defined by the port-profile.  What
those settings comprise, and how those settings are managed is again
outside the scope of this patch, since this patch only deals with the
first step in the flow.

Signed-off-by: Scott Feldman <scofeldm@cisco.com>
Signed-off-by: Roopa Prabhu <roprabhu@cisco.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/rtnetlink.c | 169 ++++++++++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 168 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 66db1201da9b..e4b9870e4706 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -660,6 +660,31 @@ static inline int rtnl_vfinfo_size(const struct net_device *dev)
 		return 0;
 }
 
+static size_t rtnl_port_size(const struct net_device *dev)
+{
+	size_t port_size = nla_total_size(4)		/* PORT_VF */
+		+ nla_total_size(PORT_PROFILE_MAX)	/* PORT_PROFILE */
+		+ nla_total_size(sizeof(struct ifla_port_vsi))
+							/* PORT_VSI_TYPE */
+		+ nla_total_size(PORT_UUID_MAX)		/* PORT_INSTANCE_UUID */
+		+ nla_total_size(PORT_UUID_MAX)		/* PORT_HOST_UUID */
+		+ nla_total_size(1)			/* PROT_VDP_REQUEST */
+		+ nla_total_size(2);			/* PORT_VDP_RESPONSE */
+	size_t vf_ports_size = nla_total_size(sizeof(struct nlattr));
+	size_t vf_port_size = nla_total_size(sizeof(struct nlattr))
+		+ port_size;
+	size_t port_self_size = nla_total_size(sizeof(struct nlattr))
+		+ port_size;
+
+	if (!dev->netdev_ops->ndo_get_vf_port || !dev->dev.parent)
+		return 0;
+	if (dev_num_vf(dev->dev.parent))
+		return port_self_size + vf_ports_size +
+			vf_port_size * dev_num_vf(dev->dev.parent);
+	else
+		return port_self_size;
+}
+
 static inline size_t if_nlmsg_size(const struct net_device *dev)
 {
 	return NLMSG_ALIGN(sizeof(struct ifinfomsg))
@@ -680,9 +705,82 @@ static inline size_t if_nlmsg_size(const struct net_device *dev)
 	       + nla_total_size(1) /* IFLA_LINKMODE */
 	       + nla_total_size(4) /* IFLA_NUM_VF */
 	       + rtnl_vfinfo_size(dev) /* IFLA_VFINFO_LIST */
+	       + rtnl_port_size(dev) /* IFLA_VF_PORTS + IFLA_PORT_SELF */
 	       + rtnl_link_get_size(dev); /* IFLA_LINKINFO */
 }
 
+static int rtnl_vf_ports_fill(struct sk_buff *skb, struct net_device *dev)
+{
+	struct nlattr *vf_ports;
+	struct nlattr *vf_port;
+	int vf;
+	int err;
+
+	vf_ports = nla_nest_start(skb, IFLA_VF_PORTS);
+	if (!vf_ports)
+		return -EMSGSIZE;
+
+	for (vf = 0; vf < dev_num_vf(dev->dev.parent); vf++) {
+		vf_port = nla_nest_start(skb, IFLA_VF_PORT);
+		if (!vf_port) {
+			nla_nest_cancel(skb, vf_ports);
+			return -EMSGSIZE;
+		}
+		NLA_PUT_U32(skb, IFLA_PORT_VF, vf);
+		err = dev->netdev_ops->ndo_get_vf_port(dev, vf, skb);
+		if (err) {
+nla_put_failure:
+			nla_nest_cancel(skb, vf_port);
+			continue;
+		}
+		nla_nest_end(skb, vf_port);
+	}
+
+	nla_nest_end(skb, vf_ports);
+
+	return 0;
+}
+
+static int rtnl_port_self_fill(struct sk_buff *skb, struct net_device *dev)
+{
+	struct nlattr *port_self;
+	int err;
+
+	port_self = nla_nest_start(skb, IFLA_PORT_SELF);
+	if (!port_self)
+		return -EMSGSIZE;
+
+	err = dev->netdev_ops->ndo_get_vf_port(dev, PORT_SELF_VF, skb);
+	if (err) {
+		nla_nest_cancel(skb, port_self);
+		return err;
+	}
+
+	nla_nest_end(skb, port_self);
+
+	return 0;
+}
+
+static int rtnl_port_fill(struct sk_buff *skb, struct net_device *dev)
+{
+	int err;
+
+	if (!dev->netdev_ops->ndo_get_vf_port || !dev->dev.parent)
+		return 0;
+
+	err = rtnl_port_self_fill(skb, dev);
+	if (err)
+		return err;
+
+	if (dev_num_vf(dev->dev.parent)) {
+		err = rtnl_vf_ports_fill(skb, dev);
+		if (err)
+			return err;
+	}
+
+	return 0;
+}
+
 static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev,
 			    int type, u32 pid, u32 seq, u32 change,
 			    unsigned int flags)
@@ -754,13 +852,15 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev,
 		goto nla_put_failure;
 	copy_rtnl_link_stats64(nla_data(attr), stats);
 
+	if (dev->dev.parent)
+		NLA_PUT_U32(skb, IFLA_NUM_VF, dev_num_vf(dev->dev.parent));
+
 	if (dev->netdev_ops->ndo_get_vf_config && dev->dev.parent) {
 		int i;
 
 		struct nlattr *vfinfo, *vf;
 		int num_vfs = dev_num_vf(dev->dev.parent);
 
-		NLA_PUT_U32(skb, IFLA_NUM_VF, num_vfs);
 		vfinfo = nla_nest_start(skb, IFLA_VFINFO_LIST);
 		if (!vfinfo)
 			goto nla_put_failure;
@@ -788,6 +888,10 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev,
 		}
 		nla_nest_end(skb, vfinfo);
 	}
+
+	if (rtnl_port_fill(skb, dev))
+		goto nla_put_failure;
+
 	if (dev->rtnl_link_ops) {
 		if (rtnl_link_fill(skb, dev) < 0)
 			goto nla_put_failure;
@@ -849,6 +953,8 @@ const struct nla_policy ifla_policy[IFLA_MAX+1] = {
 	[IFLA_NET_NS_PID]	= { .type = NLA_U32 },
 	[IFLA_IFALIAS]	        = { .type = NLA_STRING, .len = IFALIASZ-1 },
 	[IFLA_VFINFO_LIST]	= {. type = NLA_NESTED },
+	[IFLA_VF_PORTS]		= { .type = NLA_NESTED },
+	[IFLA_PORT_SELF]	= { .type = NLA_NESTED },
 };
 EXPORT_SYMBOL(ifla_policy);
 
@@ -870,6 +976,20 @@ static const struct nla_policy ifla_vf_policy[IFLA_VF_MAX+1] = {
 				    .len = sizeof(struct ifla_vf_tx_rate) },
 };
 
+static const struct nla_policy ifla_port_policy[IFLA_PORT_MAX+1] = {
+	[IFLA_PORT_VF]		= { .type = NLA_U32 },
+	[IFLA_PORT_PROFILE]	= { .type = NLA_STRING,
+				    .len = PORT_PROFILE_MAX },
+	[IFLA_PORT_VSI_TYPE]	= { .type = NLA_BINARY,
+				    .len = sizeof(struct ifla_port_vsi)},
+	[IFLA_PORT_INSTANCE_UUID] = { .type = NLA_BINARY,
+				      .len = PORT_UUID_MAX },
+	[IFLA_PORT_HOST_UUID]	= { .type = NLA_STRING,
+				    .len = PORT_UUID_MAX },
+	[IFLA_PORT_REQUEST]	= { .type = NLA_U8, },
+	[IFLA_PORT_RESPONSE]	= { .type = NLA_U16, },
+};
+
 struct net *rtnl_link_get_net(struct net *src_net, struct nlattr *tb[])
 {
 	struct net *net;
@@ -1089,6 +1209,53 @@ static int do_setlink(struct net_device *dev, struct ifinfomsg *ifm,
 	}
 	err = 0;
 
+	if (tb[IFLA_VF_PORTS]) {
+		struct nlattr *port[IFLA_PORT_MAX+1];
+		struct nlattr *attr;
+		int vf;
+		int rem;
+
+		err = -EOPNOTSUPP;
+		if (!ops->ndo_set_vf_port)
+			goto errout;
+
+		nla_for_each_nested(attr, tb[IFLA_VF_PORTS], rem) {
+			if (nla_type(attr) != IFLA_VF_PORT)
+				continue;
+			err = nla_parse_nested(port, IFLA_PORT_MAX,
+				attr, ifla_port_policy);
+			if (err < 0)
+				goto errout;
+			if (!port[IFLA_PORT_VF]) {
+				err = -EOPNOTSUPP;
+				goto errout;
+			}
+			vf = nla_get_u32(port[IFLA_PORT_VF]);
+			err = ops->ndo_set_vf_port(dev, vf, port);
+			if (err < 0)
+				goto errout;
+			modified = 1;
+		}
+	}
+	err = 0;
+
+	if (tb[IFLA_PORT_SELF]) {
+		struct nlattr *port[IFLA_PORT_MAX+1];
+
+		err = nla_parse_nested(port, IFLA_PORT_MAX,
+			tb[IFLA_PORT_SELF], ifla_port_policy);
+		if (err < 0)
+			goto errout;
+
+		err = -EOPNOTSUPP;
+		if (ops->ndo_set_vf_port)
+			err = ops->ndo_set_vf_port(dev, PORT_SELF_VF, port);
+		if (err < 0)
+			goto errout;
+		modified = 1;
+	}
+	err = 0;
+
 errout:
 	if (err < 0 && modified && net_ratelimit())
 		printk(KERN_WARNING "A link change request failed with "
-- 
cgit v1.2.2


From 2e3219b5c8a2e44e0b83ae6e04f52f20a82ac0f2 Mon Sep 17 00:00:00 2001
From: Wei Yongjun <yjwei@cn.fujitsu.com>
Date: Mon, 17 May 2010 22:51:58 -0700
Subject: sctp: fix append error cause to ERROR chunk correctly

commit 5fa782c2f5ef6c2e4f04d3e228412c9b4a4c8809
  sctp: Fix skb_over_panic resulting from multiple invalid \
    parameter errors (CVE-2010-1173) (v4)

cause 'error cause' never be add the the ERROR chunk due to
some typo when check valid length in sctp_init_cause_fixed().

Signed-off-by: Wei Yongjun <yjwei@cn.fujitsu.com>
Reviewed-by: Neil Horman <nhorman@tuxdriver.com>
Acked-by: Vlad Yasevich <vladislav.yasevich@hp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sctp/sm_make_chunk.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c
index d8261f3d7715..bd2a50b482ac 100644
--- a/net/sctp/sm_make_chunk.c
+++ b/net/sctp/sm_make_chunk.c
@@ -141,7 +141,7 @@ int sctp_init_cause_fixed(struct sctp_chunk *chunk, __be16 cause_code,
 	len = sizeof(sctp_errhdr_t) + paylen;
 	err.length  = htons(len);
 
-	if (skb_tailroom(chunk->skb) >  len)
+	if (skb_tailroom(chunk->skb) < len)
 		return -ENOSPC;
 	chunk->subh.err_hdr = sctp_addto_chunk_fixed(chunk,
 						     sizeof(sctp_errhdr_t),
@@ -1415,7 +1415,7 @@ void *sctp_addto_chunk(struct sctp_chunk *chunk, int len, const void *data)
 void *sctp_addto_chunk_fixed(struct sctp_chunk *chunk,
 			     int len, const void *data)
 {
-	if (skb_tailroom(chunk->skb) > len)
+	if (skb_tailroom(chunk->skb) >= len)
 		return sctp_addto_chunk(chunk, len, data);
 	else
 		return NULL;
-- 
cgit v1.2.2


From 49afa55b5bd792cda4fca15d3238e9b6f370c856 Mon Sep 17 00:00:00 2001
From: Julia Lawall <julia@diku.dk>
Date: Thu, 13 May 2010 10:03:32 +0000
Subject: net/caif: Use kzalloc
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Use kzalloc rather than the combination of kmalloc and memset.

A simplified version of the semantic patch that makes this change is as
follows: (http://coccinelle.lip6.fr/)

// <smpl>
@@
expression x,size,flags;
statement S;
@@

-x = kmalloc(size,flags);
+x = kzalloc(size,flags);
 if (x == NULL) S
-memset(x, 0, size);
// </smpl>

Signed-off-by: Julia Lawall <julia@diku.dk>
Acked-by: Sjur Brændeland <sjur.brandeland@stericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/caif/cfcnfg.c | 3 +--
 net/caif/cfctrl.c | 3 +--
 2 files changed, 2 insertions(+), 4 deletions(-)

(limited to 'net')

diff --git a/net/caif/cfcnfg.c b/net/caif/cfcnfg.c
index 471c62939fad..df43f264d9fb 100644
--- a/net/caif/cfcnfg.c
+++ b/net/caif/cfcnfg.c
@@ -65,12 +65,11 @@ struct cfcnfg *cfcnfg_create(void)
 	struct cfcnfg *this;
 	struct cfctrl_rsp *resp;
 	/* Initiate this layer */
-	this = kmalloc(sizeof(struct cfcnfg), GFP_ATOMIC);
+	this = kzalloc(sizeof(struct cfcnfg), GFP_ATOMIC);
 	if (!this) {
 		pr_warning("CAIF: %s(): Out of memory\n", __func__);
 		return NULL;
 	}
-	memset(this, 0, sizeof(struct cfcnfg));
 	this->mux = cfmuxl_create();
 	if (!this->mux)
 		goto out_of_mem;
diff --git a/net/caif/cfctrl.c b/net/caif/cfctrl.c
index a521d32cfe56..0ffe1e1ce901 100644
--- a/net/caif/cfctrl.c
+++ b/net/caif/cfctrl.c
@@ -284,12 +284,11 @@ int cfctrl_linkup_request(struct cflayer *layer,
 			   __func__, param->linktype);
 		return -EINVAL;
 	}
-	req = kmalloc(sizeof(*req), GFP_KERNEL);
+	req = kzalloc(sizeof(*req), GFP_KERNEL);
 	if (!req) {
 		pr_warning("CAIF: %s(): Out of memory\n", __func__);
 		return -ENOMEM;
 	}
-	memset(req, 0, sizeof(*req));
 	req->client_layer = user_layer;
 	req->cmd = CFCTRL_CMD_LINK_SETUP;
 	req->param = *param;
-- 
cgit v1.2.2


From 6ff9c3644e72bfac20844e0155c2cc8108602820 Mon Sep 17 00:00:00 2001
From: stephen hemminger <shemminger@vyatta.com>
Date: Wed, 12 May 2010 06:37:05 +0000
Subject: net sched: printk message severity

The previous patch encourage me to go look at all the messages in
the network scheduler and fix them. Many messages were missing
any severity level. Some serious ones that should never happen
were turned into WARN(), and the random noise messages that were
handled changed to pr_debug().

Signed-off-by: Stephen Hemminger <shemminger@vyatta.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/act_api.c    | 20 +++++++++++---------
 net/sched/act_gact.c   |  4 ++--
 net/sched/act_ipt.c    |  3 ++-
 net/sched/act_mirred.c |  6 +++---
 net/sched/act_pedit.c  | 11 ++++++-----
 net/sched/act_simple.c |  4 ++--
 net/sched/cls_u32.c    | 10 +++++-----
 net/sched/ematch.c     |  3 ++-
 8 files changed, 33 insertions(+), 28 deletions(-)

(limited to 'net')

diff --git a/net/sched/act_api.c b/net/sched/act_api.c
index 019045174fc3..972378f47f3c 100644
--- a/net/sched/act_api.c
+++ b/net/sched/act_api.c
@@ -153,7 +153,7 @@ int tcf_generic_walker(struct sk_buff *skb, struct netlink_callback *cb,
 	} else if (type == RTM_GETACTION) {
 		return tcf_dump_walker(skb, cb, a, hinfo);
 	} else {
-		printk("tcf_generic_walker: unknown action %d\n", type);
+		WARN(1, "tcf_generic_walker: unknown action %d\n", type);
 		return -EINVAL;
 	}
 }
@@ -403,8 +403,9 @@ void tcf_action_destroy(struct tc_action *act, int bind)
 				module_put(a->ops->owner);
 			act = act->next;
 			kfree(a);
-		} else { /*FIXME: Remove later - catch insertion bugs*/
-			printk("tcf_action_destroy: BUG? destroying NULL ops\n");
+		} else {
+			/*FIXME: Remove later - catch insertion bugs*/
+			WARN(1, "tcf_action_destroy: BUG? destroying NULL ops\n");
 			act = act->next;
 			kfree(a);
 		}
@@ -744,7 +745,7 @@ static struct tc_action *create_a(int i)
 
 	act = kzalloc(sizeof(*act), GFP_KERNEL);
 	if (act == NULL) {
-		printk("create_a: failed to alloc!\n");
+		pr_debug("create_a: failed to alloc!\n");
 		return NULL;
 	}
 	act->order = i;
@@ -766,13 +767,13 @@ static int tca_action_flush(struct net *net, struct nlattr *nla,
 	int err = -ENOMEM;
 
 	if (a == NULL) {
-		printk("tca_action_flush: couldnt create tc_action\n");
+		pr_debug("tca_action_flush: couldnt create tc_action\n");
 		return err;
 	}
 
 	skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
 	if (!skb) {
-		printk("tca_action_flush: failed skb alloc\n");
+		pr_debug("tca_action_flush: failed skb alloc\n");
 		kfree(a);
 		return err;
 	}
@@ -979,7 +980,7 @@ static int tc_ctl_action(struct sk_buff *skb, struct nlmsghdr *n, void *arg)
 		return ret;
 
 	if (tca[TCA_ACT_TAB] == NULL) {
-		printk("tc_ctl_action: received NO action attribs\n");
+		pr_notice("tc_ctl_action: received NO action attribs\n");
 		return -EINVAL;
 	}
 
@@ -1056,7 +1057,7 @@ tc_dump_action(struct sk_buff *skb, struct netlink_callback *cb)
 	struct nlattr *kind = find_dump_kind(cb->nlh);
 
 	if (kind == NULL) {
-		printk("tc_dump_action: action bad kind\n");
+		pr_info("tc_dump_action: action bad kind\n");
 		return 0;
 	}
 
@@ -1069,7 +1070,8 @@ tc_dump_action(struct sk_buff *skb, struct netlink_callback *cb)
 	a.ops = a_o;
 
 	if (a_o->walk == NULL) {
-		printk("tc_dump_action: %s !capable of dumping table\n", a_o->kind);
+		WARN(1, "tc_dump_action: %s !capable of dumping table\n",
+		     a_o->kind);
 		goto nla_put_failure;
 	}
 
diff --git a/net/sched/act_gact.c b/net/sched/act_gact.c
index e7f796aec657..8406c6654990 100644
--- a/net/sched/act_gact.c
+++ b/net/sched/act_gact.c
@@ -202,9 +202,9 @@ MODULE_LICENSE("GPL");
 static int __init gact_init_module(void)
 {
 #ifdef CONFIG_GACT_PROB
-	printk("GACT probability on\n");
+	printk(KERN_INFO "GACT probability on\n");
 #else
-	printk("GACT probability NOT on\n");
+	printk(KERN_INFO "GACT probability NOT on\n");
 #endif
 	return tcf_register_action(&act_gact_ops);
 }
diff --git a/net/sched/act_ipt.c b/net/sched/act_ipt.c
index 1f9595467c17..c7e59e6ec349 100644
--- a/net/sched/act_ipt.c
+++ b/net/sched/act_ipt.c
@@ -235,7 +235,8 @@ static int tcf_ipt(struct sk_buff *skb, struct tc_action *a,
 		break;
 	default:
 		if (net_ratelimit())
-			printk("Bogus netfilter code %d assume ACCEPT\n", ret);
+			pr_notice("tc filter: Bogus netfilter code"
+				  " %d assume ACCEPT\n", ret);
 		result = TC_POLICE_OK;
 		break;
 	}
diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c
index c046682054eb..c0b6863e3b87 100644
--- a/net/sched/act_mirred.c
+++ b/net/sched/act_mirred.c
@@ -164,8 +164,8 @@ static int tcf_mirred(struct sk_buff *skb, struct tc_action *a,
 	dev = m->tcfm_dev;
 	if (!(dev->flags & IFF_UP)) {
 		if (net_ratelimit())
-			printk("mirred to Houston: device %s is gone!\n",
-			       dev->name);
+			pr_notice("tc mirred to Houston: device %s is gone!\n",
+				  dev->name);
 		goto out;
 	}
 
@@ -252,7 +252,7 @@ MODULE_LICENSE("GPL");
 
 static int __init mirred_init_module(void)
 {
-	printk("Mirror/redirect action on\n");
+	pr_info("Mirror/redirect action on\n");
 	return tcf_register_action(&act_mirred_ops);
 }
 
diff --git a/net/sched/act_pedit.c b/net/sched/act_pedit.c
index b7dcfedc802e..fdbd0b7bd840 100644
--- a/net/sched/act_pedit.c
+++ b/net/sched/act_pedit.c
@@ -158,11 +158,13 @@ static int tcf_pedit(struct sk_buff *skb, struct tc_action *a,
 			}
 
 			if (offset % 4) {
-				printk("offset must be on 32 bit boundaries\n");
+				pr_info("tc filter pedit"
+					" offset must be on 32 bit boundaries\n");
 				goto bad;
 			}
 			if (offset > 0 && offset > skb->len) {
-				printk("offset %d cant exceed pkt length %d\n",
+				pr_info("tc filter pedit"
+					" offset %d cant exceed pkt length %d\n",
 				       offset, skb->len);
 				goto bad;
 			}
@@ -176,9 +178,8 @@ static int tcf_pedit(struct sk_buff *skb, struct tc_action *a,
 		if (munged)
 			skb->tc_verd = SET_TC_MUNGED(skb->tc_verd);
 		goto done;
-	} else {
-		printk("pedit BUG: index %d\n", p->tcf_index);
-	}
+	} else
+		WARN(1, "pedit BUG: index %d\n", p->tcf_index);
 
 bad:
 	p->tcf_qstats.overlimits++;
diff --git a/net/sched/act_simple.c b/net/sched/act_simple.c
index 622ca809c15c..1b4bc691d7d1 100644
--- a/net/sched/act_simple.c
+++ b/net/sched/act_simple.c
@@ -49,7 +49,7 @@ static int tcf_simp(struct sk_buff *skb, struct tc_action *a, struct tcf_result
 	 * Example if this was the 3rd packet and the string was "hello"
 	 * then it would look like "hello_3" (without quotes)
 	 **/
-	printk("simple: %s_%d\n",
+	pr_info("simple: %s_%d\n",
 	       (char *)d->tcfd_defdata, d->tcf_bstats.packets);
 	spin_unlock(&d->tcf_lock);
 	return d->tcf_action;
@@ -205,7 +205,7 @@ static int __init simp_init_module(void)
 {
 	int ret = tcf_register_action(&act_simp_ops);
 	if (!ret)
-		printk("Simple TC action Loaded\n");
+		pr_info("Simple TC action Loaded\n");
 	return ret;
 }
 
diff --git a/net/sched/cls_u32.c b/net/sched/cls_u32.c
index 593eac056e8d..96275422c619 100644
--- a/net/sched/cls_u32.c
+++ b/net/sched/cls_u32.c
@@ -211,7 +211,7 @@ check_terminal:
 
 deadloop:
 	if (net_ratelimit())
-		printk("cls_u32: dead loop\n");
+		printk(KERN_WARNING "cls_u32: dead loop\n");
 	return -1;
 }
 
@@ -768,15 +768,15 @@ static struct tcf_proto_ops cls_u32_ops __read_mostly = {
 
 static int __init init_u32(void)
 {
-	printk("u32 classifier\n");
+	pr_info("u32 classifier\n");
 #ifdef CONFIG_CLS_U32_PERF
-	printk("    Performance counters on\n");
+	pr_info("    Performance counters on\n");
 #endif
 #ifdef CONFIG_NET_CLS_IND
-	printk("    input device check on\n");
+	pr_info("    input device check on\n");
 #endif
 #ifdef CONFIG_NET_CLS_ACT
-	printk("    Actions configured\n");
+	pr_info("    Actions configured\n");
 #endif
 	return register_tcf_proto_ops(&cls_u32_ops);
 }
diff --git a/net/sched/ematch.c b/net/sched/ematch.c
index e782bdeedc58..5e37da961f80 100644
--- a/net/sched/ematch.c
+++ b/net/sched/ematch.c
@@ -527,7 +527,8 @@ pop_stack:
 
 stack_overflow:
 	if (net_ratelimit())
-		printk("Local stack overflow, increase NET_EMATCH_STACK\n");
+		printk(KERN_WARNING "tc ematch: local stack overflow,"
+			" increase NET_EMATCH_STACK\n");
 	return -1;
 }
 EXPORT_SYMBOL(__tcf_em_tree_match);
-- 
cgit v1.2.2


From 62db5cfd70b1ef53aa21f144a806fe3b78c84fab Mon Sep 17 00:00:00 2001
From: stephen hemminger <shemminger@vyatta.com>
Date: Wed, 12 May 2010 06:37:06 +0000
Subject: xfrm: add severity to printk

Serious oh sh*t messages converted to WARN().
Add KERN_NOTICE severity to the unknown policy type messages.

Signed-off-by: Stephen Hemminger <shemminger@vyatta.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/xfrm/xfrm_user.c | 12 +++++++-----
 1 file changed, 7 insertions(+), 5 deletions(-)

(limited to 'net')

diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c
index a267fbdda525..ba59983aaffe 100644
--- a/net/xfrm/xfrm_user.c
+++ b/net/xfrm/xfrm_user.c
@@ -1783,7 +1783,7 @@ static int xfrm_add_pol_expire(struct sk_buff *skb, struct nlmsghdr *nlh,
 
 	} else {
 		// reset the timers here?
-		printk("Dont know what to do with soft policy expire\n");
+		WARN(1, "Dont know what to do with soft policy expire\n");
 	}
 	km_policy_expired(xp, p->dir, up->hard, current->pid);
 
@@ -1883,7 +1883,7 @@ static int xfrm_add_acquire(struct sk_buff *skb, struct nlmsghdr *nlh,
 	return 0;
 
 bad_policy:
-	printk("BAD policy passed\n");
+	WARN(1, "BAD policy passed\n");
 free_state:
 	kfree(x);
 nomem:
@@ -2385,8 +2385,9 @@ static int xfrm_send_state_notify(struct xfrm_state *x, struct km_event *c)
 	case XFRM_MSG_FLUSHSA:
 		return xfrm_notify_sa_flush(c);
 	default:
-		 printk("xfrm_user: Unknown SA event %d\n", c->event);
-		 break;
+		printk(KERN_NOTICE "xfrm_user: Unknown SA event %d\n",
+		       c->event);
+		break;
 	}
 
 	return 0;
@@ -2676,7 +2677,8 @@ static int xfrm_send_policy_notify(struct xfrm_policy *xp, int dir, struct km_ev
 	case XFRM_MSG_POLEXPIRE:
 		return xfrm_exp_policy_notify(xp, dir, c);
 	default:
-		printk("xfrm_user: Unknown Policy event %d\n", c->event);
+		printk(KERN_NOTICE "xfrm_user: Unknown Policy event %d\n",
+		       c->event);
 	}
 
 	return 0;
-- 
cgit v1.2.2


From 207024b9477e253859abfc30f1ff314cd6008b24 Mon Sep 17 00:00:00 2001
From: stephen hemminger <shemminger@vyatta.com>
Date: Wed, 12 May 2010 06:37:07 +0000
Subject: pfkey: add severity to printk

Put severity level on pfkey printk messages

Signed-off-by: Stephen Hemminger <shemminger@vyatta.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/key/af_key.c | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

(limited to 'net')

diff --git a/net/key/af_key.c b/net/key/af_key.c
index ba9a3fcc2fed..43040e97c474 100644
--- a/net/key/af_key.c
+++ b/net/key/af_key.c
@@ -99,7 +99,7 @@ static void pfkey_sock_destruct(struct sock *sk)
 	skb_queue_purge(&sk->sk_receive_queue);
 
 	if (!sock_flag(sk, SOCK_DEAD)) {
-		printk("Attempt to release alive pfkey socket: %p\n", sk);
+		pr_err("Attempt to release alive pfkey socket: %p\n", sk);
 		return;
 	}
 
@@ -1402,7 +1402,7 @@ static inline int event2poltype(int event)
 	case XFRM_MSG_POLEXPIRE:
 	//	return SADB_X_SPDEXPIRE;
 	default:
-		printk("pfkey: Unknown policy event %d\n", event);
+		pr_err("pfkey: Unknown policy event %d\n", event);
 		break;
 	}
 
@@ -1421,7 +1421,7 @@ static inline int event2keytype(int event)
 	case XFRM_MSG_EXPIRE:
 		return SADB_EXPIRE;
 	default:
-		printk("pfkey: Unknown SA event %d\n", event);
+		pr_err("pfkey: Unknown SA event %d\n", event);
 		break;
 	}
 
@@ -2969,7 +2969,7 @@ static int pfkey_send_notify(struct xfrm_state *x, struct km_event *c)
 	case XFRM_MSG_NEWAE: /* not yet supported */
 		break;
 	default:
-		printk("pfkey: Unknown SA event %d\n", c->event);
+		pr_err("pfkey: Unknown SA event %d\n", c->event);
 		break;
 	}
 
@@ -2993,7 +2993,7 @@ static int pfkey_send_policy_notify(struct xfrm_policy *xp, int dir, struct km_e
 			break;
 		return key_notify_policy_flush(c);
 	default:
-		printk("pfkey: Unknown policy event %d\n", c->event);
+		pr_err("pfkey: Unknown policy event %d\n", c->event);
 		break;
 	}
 
-- 
cgit v1.2.2


From b60b6592baa69c43a5a0f55d6300a7feaab15338 Mon Sep 17 00:00:00 2001
From: stephen hemminger <shemminger@vyatta.com>
Date: Tue, 11 May 2010 14:24:12 +0000
Subject: net sched: cleanup and rate limit warning

If the user has a bad classification configuration, and gets a packet
that goes through too many steps. Chances are more packets will arrive,
and the message spew will overrun syslog because it is not rate limited.
And because it is not tagged with appropriate priority it can't not be screened.

Added the qdisc to the message to try and give some more context when
the message does arrive.

Signed-off-by: Stephen Hemminger <shemminger@vyatta.com>
Acked-by: Jamal Hadi Salim <hadi@cyberus.ca>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/sch_api.c | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

(limited to 'net')

diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c
index 9839b26674f4..fe35c1f338c2 100644
--- a/net/sched/sch_api.c
+++ b/net/sched/sch_api.c
@@ -1637,9 +1637,12 @@ reclassify:
 		tp = otp;
 
 		if (verd++ >= MAX_REC_LOOP) {
-			printk("rule prio %u protocol %02x reclassify loop, "
-			       "packet dropped\n",
-			       tp->prio&0xffff, ntohs(tp->protocol));
+			if (net_ratelimit())
+				printk(KERN_NOTICE
+				       "%s: packet reclassify loop"
+					  " rule prio %u protocol %02x\n",
+				       tp->q->ops->id,
+				       tp->prio & 0xffff, ntohs(tp->protocol));
 			return TC_ACT_SHOT;
 		}
 		skb->tc_verd = SET_TC_VERD(skb->tc_verd, verd);
-- 
cgit v1.2.2


From 3fa21e07e6acefa31f974d57fba2b6920a7ebd1a Mon Sep 17 00:00:00 2001
From: Joe Perches <joe@perches.com>
Date: Mon, 17 May 2010 23:08:21 -0700
Subject: net: Remove unnecessary returns from void function()s

This patch removes from net/ (but not any netfilter files)
all the unnecessary return; statements that precede the
last closing brace of void functions.

It does not remove the returns that are immediately
preceded by a label as gcc doesn't like that.

Done via:
$ grep -rP --include=*.[ch] -l "return;\n}" net/ | \
  xargs perl -i -e 'local $/ ; while (<>) { s/\n[ \t\n]+return;\n}/\n}/g; print; }'

Signed-off-by: Joe Perches <joe@perches.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/9p/trans_rdma.c               |  1 -
 net/atm/br2684.c                  |  1 -
 net/atm/lec.c                     |  6 ------
 net/atm/mpc.c                     | 32 --------------------------------
 net/atm/mpoa_caches.c             | 20 --------------------
 net/bluetooth/hci_core.c          |  2 --
 net/bluetooth/l2cap.c             |  3 ---
 net/bluetooth/rfcomm/tty.c        |  2 --
 net/bluetooth/sco.c               |  1 -
 net/caif/caif_dev.c               |  1 -
 net/can/bcm.c                     |  2 --
 net/decnet/dn_dev.c               |  3 ---
 net/decnet/dn_route.c             |  1 -
 net/ipv4/cipso_ipv4.c             |  2 --
 net/ipv4/fib_trie.c               |  2 --
 net/ipv4/ip_gre.c                 |  1 -
 net/ipv4/ip_options.c             |  1 -
 net/ipv4/ipmr.c                   |  1 -
 net/ipv6/ndisc.c                  |  2 --
 net/ipv6/proc.c                   |  1 -
 net/ipv6/route.c                  |  2 --
 net/irda/iriap.c                  |  2 --
 net/irda/irnet/irnet_irda.c       |  3 ---
 net/iucv/af_iucv.c                |  1 -
 net/mac80211/debugfs.h            |  1 -
 net/mac80211/mesh.c               |  2 --
 net/mac80211/mesh_hwmp.c          |  1 -
 net/netlabel/netlabel_addrlist.h  |  2 --
 net/netlabel/netlabel_unlabeled.c |  1 -
 net/sched/cls_flow.c              |  1 -
 net/sched/sch_hfsc.c              |  1 -
 net/sched/sch_ingress.c           |  1 -
 net/sched/sch_mq.c                |  1 -
 net/sched/sch_multiq.c            |  1 -
 net/sched/sch_prio.c              |  1 -
 net/sched/sch_red.c               |  1 -
 net/sctp/associola.c              |  2 --
 net/sctp/outqueue.c               |  2 --
 net/sctp/proc.c                   |  3 ---
 net/sctp/sm_sideeffect.c          |  4 ----
 net/sctp/ulpqueue.c               |  2 --
 net/sunrpc/clnt.c                 |  1 -
 net/sunrpc/svcsock.c              |  1 -
 net/sunrpc/xprt.c                 |  1 -
 net/sunrpc/xprtsock.c             |  4 ----
 net/sysctl_net.c                  |  1 -
 net/wimax/stack.c                 |  2 --
 net/xfrm/xfrm_policy.c            |  1 -
 48 files changed, 131 deletions(-)

(limited to 'net')

diff --git a/net/9p/trans_rdma.c b/net/9p/trans_rdma.c
index 041101ab4aa5..0ea20c30466c 100644
--- a/net/9p/trans_rdma.c
+++ b/net/9p/trans_rdma.c
@@ -308,7 +308,6 @@ handle_recv(struct p9_client *client, struct p9_trans_rdma *rdma,
 		   req, err, status);
 	rdma->state = P9_RDMA_FLUSHING;
 	client->status = Disconnected;
-	return;
 }
 
 static void
diff --git a/net/atm/br2684.c b/net/atm/br2684.c
index d6c7ceaf13e9..6719af6a59fa 100644
--- a/net/atm/br2684.c
+++ b/net/atm/br2684.c
@@ -446,7 +446,6 @@ error:
 	net_dev->stats.rx_errors++;
 free_skb:
 	dev_kfree_skb(skb);
-	return;
 }
 
 /*
diff --git a/net/atm/lec.c b/net/atm/lec.c
index feeaf5718472..d98bde1a0ac8 100644
--- a/net/atm/lec.c
+++ b/net/atm/lec.c
@@ -161,8 +161,6 @@ static void lec_handle_bridge(struct sk_buff *skb, struct net_device *dev)
 		skb_queue_tail(&sk->sk_receive_queue, skb2);
 		sk->sk_data_ready(sk, skb2->len);
 	}
-
-	return;
 }
 #endif /* defined(CONFIG_BRIDGE) || defined(CONFIG_BRIDGE_MODULE) */
 
@@ -640,7 +638,6 @@ static void lec_set_multicast_list(struct net_device *dev)
 	 * by default, all multicast frames arrive over the bus.
 	 * eventually support selective multicast service
 	 */
-	return;
 }
 
 static const struct net_device_ops lec_netdev_ops = {
@@ -1199,8 +1196,6 @@ static void __exit lane_module_cleanup(void)
 			dev_lec[i] = NULL;
 		}
 	}
-
-	return;
 }
 
 module_init(lane_module_init);
@@ -1334,7 +1329,6 @@ static void lane2_associate_ind(struct net_device *dev, const u8 *mac_addr,
 		priv->lane2_ops->associate_indicator(dev, mac_addr,
 						     tlvs, sizeoftlvs);
 	}
-	return;
 }
 
 /*
diff --git a/net/atm/mpc.c b/net/atm/mpc.c
index 436f2e177657..622b471e14e0 100644
--- a/net/atm/mpc.c
+++ b/net/atm/mpc.c
@@ -455,7 +455,6 @@ static void lane2_assoc_ind(struct net_device *dev, const u8 *mac_addr,
 	if (end_of_tlvs - tlvs != 0)
 		pr_info("(%s) ignoring %Zd bytes of trailing TLV garbage\n",
 			dev->name, end_of_tlvs - tlvs);
-	return;
 }
 
 /*
@@ -684,8 +683,6 @@ static void mpc_vcc_close(struct atm_vcc *vcc, struct net_device *dev)
 
 	if (in_entry == NULL && eg_entry == NULL)
 		dprintk("(%s) unused vcc closed\n", dev->name);
-
-	return;
 }
 
 static void mpc_push(struct atm_vcc *vcc, struct sk_buff *skb)
@@ -783,8 +780,6 @@ static void mpc_push(struct atm_vcc *vcc, struct sk_buff *skb)
 
 	memset(ATM_SKB(skb), 0, sizeof(struct atm_skb_data));
 	netif_rx(new_skb);
-
-	return;
 }
 
 static struct atmdev_ops mpc_ops = { /* only send is required */
@@ -873,8 +868,6 @@ static void send_set_mps_ctrl_addr(const char *addr, struct mpoa_client *mpc)
 	mesg.type = SET_MPS_CTRL_ADDR;
 	memcpy(mesg.MPS_ctrl, addr, ATM_ESA_LEN);
 	msg_to_mpoad(&mesg, mpc);
-
-	return;
 }
 
 static void mpoad_close(struct atm_vcc *vcc)
@@ -911,8 +904,6 @@ static void mpoad_close(struct atm_vcc *vcc)
 	pr_info("(%s) going down\n",
 		(mpc->dev) ? mpc->dev->name : "<unknown>");
 	module_put(THIS_MODULE);
-
-	return;
 }
 
 /*
@@ -1122,7 +1113,6 @@ static void MPOA_trigger_rcvd(struct k_message *msg, struct mpoa_client *mpc)
 	pr_info("(%s) entry already in resolving state\n",
 		(mpc->dev) ? mpc->dev->name : "<unknown>");
 	mpc->in_ops->put(entry);
-	return;
 }
 
 /*
@@ -1166,7 +1156,6 @@ static void check_qos_and_open_shortcut(struct k_message *msg,
 	} else
 		memset(&msg->qos, 0, sizeof(struct atm_qos));
 	msg_to_mpoad(msg, client);
-	return;
 }
 
 static void MPOA_res_reply_rcvd(struct k_message *msg, struct mpoa_client *mpc)
@@ -1240,8 +1229,6 @@ static void ingress_purge_rcvd(struct k_message *msg, struct mpoa_client *mpc)
 		mpc->in_ops->put(entry);
 		entry = mpc->in_ops->get_with_mask(dst_ip, mpc, mask);
 	} while (entry != NULL);
-
-	return;
 }
 
 static void egress_purge_rcvd(struct k_message *msg, struct mpoa_client *mpc)
@@ -1260,8 +1247,6 @@ static void egress_purge_rcvd(struct k_message *msg, struct mpoa_client *mpc)
 	write_unlock_irq(&mpc->egress_lock);
 
 	mpc->eg_ops->put(entry);
-
-	return;
 }
 
 static void purge_egress_shortcut(struct atm_vcc *vcc, eg_cache_entry *entry)
@@ -1295,8 +1280,6 @@ static void purge_egress_shortcut(struct atm_vcc *vcc, eg_cache_entry *entry)
 	skb_queue_tail(&sk->sk_receive_queue, skb);
 	sk->sk_data_ready(sk, skb->len);
 	dprintk("exiting\n");
-
-	return;
 }
 
 /*
@@ -1325,8 +1308,6 @@ static void mps_death(struct k_message *msg, struct mpoa_client *mpc)
 
 	mpc->in_ops->destroy_cache(mpc);
 	mpc->eg_ops->destroy_cache(mpc);
-
-	return;
 }
 
 static void MPOA_cache_impos_rcvd(struct k_message *msg,
@@ -1353,8 +1334,6 @@ static void MPOA_cache_impos_rcvd(struct k_message *msg,
 	write_unlock_irq(&mpc->egress_lock);
 
 	mpc->eg_ops->put(entry);
-
-	return;
 }
 
 static void set_mpc_ctrl_addr_rcvd(struct k_message *mesg,
@@ -1392,8 +1371,6 @@ static void set_mpc_ctrl_addr_rcvd(struct k_message *mesg,
 			pr_info("(%s) targetless LE_ARP request failed\n",
 				mpc->dev->name);
 	}
-
-	return;
 }
 
 static void set_mps_mac_addr_rcvd(struct k_message *msg,
@@ -1409,8 +1386,6 @@ static void set_mps_mac_addr_rcvd(struct k_message *msg,
 		return;
 	}
 	client->number_of_mps_macs = 1;
-
-	return;
 }
 
 /*
@@ -1436,7 +1411,6 @@ static void clean_up(struct k_message *msg, struct mpoa_client *mpc, int action)
 
 	msg->type = action;
 	msg_to_mpoad(msg, mpc);
-	return;
 }
 
 static void mpc_timer_refresh(void)
@@ -1445,8 +1419,6 @@ static void mpc_timer_refresh(void)
 	mpc_timer.data = mpc_timer.expires;
 	mpc_timer.function = mpc_cache_check;
 	add_timer(&mpc_timer);
-
-	return;
 }
 
 static void mpc_cache_check(unsigned long checking_time)
@@ -1471,8 +1443,6 @@ static void mpc_cache_check(unsigned long checking_time)
 		mpc = mpc->next;
 	}
 	mpc_timer_refresh();
-
-	return;
 }
 
 static int atm_mpoa_ioctl(struct socket *sock, unsigned int cmd,
@@ -1561,8 +1531,6 @@ static void __exit atm_mpoa_cleanup(void)
 		kfree(qos);
 		qos = nextqos;
 	}
-
-	return;
 }
 
 module_init(atm_mpoa_init);
diff --git a/net/atm/mpoa_caches.c b/net/atm/mpoa_caches.c
index e773d8336918..d1b2d9a03144 100644
--- a/net/atm/mpoa_caches.c
+++ b/net/atm/mpoa_caches.c
@@ -182,8 +182,6 @@ static void in_cache_put(in_cache_entry *entry)
 		memset(entry, 0, sizeof(in_cache_entry));
 		kfree(entry);
 	}
-
-	return;
 }
 
 /*
@@ -221,8 +219,6 @@ static void in_cache_remove_entry(in_cache_entry *entry,
 		}
 		vcc_release_async(vcc, -EPIPE);
 	}
-
-	return;
 }
 
 /* Call this every MPC-p2 seconds... Not exactly correct solution,
@@ -248,8 +244,6 @@ static void clear_count_and_expired(struct mpoa_client *client)
 		entry = next_entry;
 	}
 	write_unlock_bh(&client->ingress_lock);
-
-	return;
 }
 
 /* Call this every MPC-p4 seconds. */
@@ -334,8 +328,6 @@ static void in_destroy_cache(struct mpoa_client *mpc)
 	while (mpc->in_cache != NULL)
 		mpc->in_ops->remove_entry(mpc->in_cache, mpc);
 	write_unlock_irq(&mpc->ingress_lock);
-
-	return;
 }
 
 static eg_cache_entry *eg_cache_get_by_cache_id(__be32 cache_id,
@@ -427,8 +419,6 @@ static void eg_cache_put(eg_cache_entry *entry)
 		memset(entry, 0, sizeof(eg_cache_entry));
 		kfree(entry);
 	}
-
-	return;
 }
 
 /*
@@ -463,8 +453,6 @@ static void eg_cache_remove_entry(eg_cache_entry *entry,
 		}
 		vcc_release_async(vcc, -EPIPE);
 	}
-
-	return;
 }
 
 static eg_cache_entry *eg_cache_add_entry(struct k_message *msg,
@@ -509,8 +497,6 @@ static void update_eg_cache_entry(eg_cache_entry *entry, uint16_t holding_time)
 	do_gettimeofday(&(entry->tv));
 	entry->entry_state = EGRESS_RESOLVED;
 	entry->ctrl_info.holding_time = holding_time;
-
-	return;
 }
 
 static void clear_expired(struct mpoa_client *client)
@@ -537,8 +523,6 @@ static void clear_expired(struct mpoa_client *client)
 		entry = next_entry;
 	}
 	write_unlock_irq(&client->egress_lock);
-
-	return;
 }
 
 static void eg_destroy_cache(struct mpoa_client *mpc)
@@ -547,8 +531,6 @@ static void eg_destroy_cache(struct mpoa_client *mpc)
 	while (mpc->eg_cache != NULL)
 		mpc->eg_ops->remove_entry(mpc->eg_cache, mpc);
 	write_unlock_irq(&mpc->egress_lock);
-
-	return;
 }
 
 
@@ -584,6 +566,4 @@ void atm_mpoa_init_cache(struct mpoa_client *mpc)
 {
 	mpc->in_ops = &ingress_ops;
 	mpc->eg_ops = &egress_ops;
-
-	return;
 }
diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c
index 5e83f8e0877a..2f768de87011 100644
--- a/net/bluetooth/hci_core.c
+++ b/net/bluetooth/hci_core.c
@@ -1316,8 +1316,6 @@ void hci_send_acl(struct hci_conn *conn, struct sk_buff *skb, __u16 flags)
 	}
 
 	tasklet_schedule(&hdev->tx_task);
-
-	return;
 }
 EXPORT_SYMBOL(hci_send_acl);
 
diff --git a/net/bluetooth/l2cap.c b/net/bluetooth/l2cap.c
index 673a36886716..1b682a5aa061 100644
--- a/net/bluetooth/l2cap.c
+++ b/net/bluetooth/l2cap.c
@@ -1322,8 +1322,6 @@ static void l2cap_drop_acked_frames(struct sock *sk)
 
 	if (!l2cap_pi(sk)->unacked_frames)
 		del_timer(&l2cap_pi(sk)->retrans_timer);
-
-	return;
 }
 
 static inline void l2cap_do_send(struct sock *sk, struct sk_buff *skb)
@@ -4667,7 +4665,6 @@ void l2cap_load(void)
 	/* Dummy function to trigger automatic L2CAP module loading by
 	 * other modules that use L2CAP sockets but don't use any other
 	 * symbols from it. */
-	return;
 }
 EXPORT_SYMBOL(l2cap_load);
 
diff --git a/net/bluetooth/rfcomm/tty.c b/net/bluetooth/rfcomm/tty.c
index cab71ea2796d..309b6c261b25 100644
--- a/net/bluetooth/rfcomm/tty.c
+++ b/net/bluetooth/rfcomm/tty.c
@@ -1014,8 +1014,6 @@ static void rfcomm_tty_set_termios(struct tty_struct *tty, struct ktermios *old)
 		rfcomm_send_rpn(dev->dlc->session, 1, dev->dlc->dlci, baud,
 				data_bits, stop_bits, parity,
 				RFCOMM_RPN_FLOW_NONE, x_on, x_off, changes);
-
-	return;
 }
 
 static void rfcomm_tty_throttle(struct tty_struct *tty)
diff --git a/net/bluetooth/sco.c b/net/bluetooth/sco.c
index 4767928a93d3..d0927d1fdada 100644
--- a/net/bluetooth/sco.c
+++ b/net/bluetooth/sco.c
@@ -273,7 +273,6 @@ static inline void sco_recv_frame(struct sco_conn *conn, struct sk_buff *skb)
 
 drop:
 	kfree_skb(skb);
-	return;
 }
 
 /* -------- Socket interface ---------- */
diff --git a/net/caif/caif_dev.c b/net/caif/caif_dev.c
index 024fd5bb2d39..e2b86f1f5a47 100644
--- a/net/caif/caif_dev.c
+++ b/net/caif/caif_dev.c
@@ -112,7 +112,6 @@ static void caif_device_destroy(struct net_device *dev)
 	spin_unlock_bh(&caifdevs->lock);
 
 	kfree(caifd);
-	return;
 }
 
 static int transmit(struct cflayer *layer, struct cfpkt *pkt)
diff --git a/net/can/bcm.c b/net/can/bcm.c
index 907dc871fac8..9c65e9deb9c3 100644
--- a/net/can/bcm.c
+++ b/net/can/bcm.c
@@ -713,8 +713,6 @@ static void bcm_remove_op(struct bcm_op *op)
 		kfree(op->last_frames);
 
 	kfree(op);
-
-	return;
 }
 
 static void bcm_rx_unreg(struct net_device *dev, struct bcm_op *op)
diff --git a/net/decnet/dn_dev.c b/net/decnet/dn_dev.c
index 615dbe3b43f9..4c409b46aa35 100644
--- a/net/decnet/dn_dev.c
+++ b/net/decnet/dn_dev.c
@@ -1220,17 +1220,14 @@ void dn_dev_down(struct net_device *dev)
 
 void dn_dev_init_pkt(struct sk_buff *skb)
 {
-	return;
 }
 
 void dn_dev_veri_pkt(struct sk_buff *skb)
 {
-	return;
 }
 
 void dn_dev_hello(struct sk_buff *skb)
 {
-	return;
 }
 
 void dn_dev_devices_off(void)
diff --git a/net/decnet/dn_route.c b/net/decnet/dn_route.c
index a8432e399545..812e6dff6067 100644
--- a/net/decnet/dn_route.c
+++ b/net/decnet/dn_route.c
@@ -264,7 +264,6 @@ static struct dst_entry *dn_dst_negative_advice(struct dst_entry *dst)
 
 static void dn_dst_link_failure(struct sk_buff *skb)
 {
-	return;
 }
 
 static inline int compare_keys(struct flowi *fl1, struct flowi *fl2)
diff --git a/net/ipv4/cipso_ipv4.c b/net/ipv4/cipso_ipv4.c
index c97cd9ff697e..3a92a76ae41d 100644
--- a/net/ipv4/cipso_ipv4.c
+++ b/net/ipv4/cipso_ipv4.c
@@ -290,8 +290,6 @@ void cipso_v4_cache_invalidate(void)
 		cipso_v4_cache[iter].size = 0;
 		spin_unlock_bh(&cipso_v4_cache[iter].lock);
 	}
-
-	return;
 }
 
 /**
diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c
index c98f115fb0fd..79d057a939ba 100644
--- a/net/ipv4/fib_trie.c
+++ b/net/ipv4/fib_trie.c
@@ -1022,8 +1022,6 @@ static void trie_rebalance(struct trie *t, struct tnode *tn)
 
 	rcu_assign_pointer(t->trie, (struct node *)tn);
 	tnode_free_flush();
-
-	return;
 }
 
 /* only used from updater-side */
diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index 498cf69c7977..32618e11076d 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -502,7 +502,6 @@ static void ipgre_err(struct sk_buff *skb, u32 info)
 	t->err_time = jiffies;
 out:
 	rcu_read_unlock();
-	return;
 }
 
 static inline void ipgre_ecn_decapsulate(struct iphdr *iph, struct sk_buff *skb)
diff --git a/net/ipv4/ip_options.c b/net/ipv4/ip_options.c
index 3244133c24f6..ba9836c488ed 100644
--- a/net/ipv4/ip_options.c
+++ b/net/ipv4/ip_options.c
@@ -238,7 +238,6 @@ void ip_options_fragment(struct sk_buff * skb)
 	opt->rr_needaddr = 0;
 	opt->ts_needaddr = 0;
 	opt->ts_needtime = 0;
-	return;
 }
 
 /*
diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
index 217ebe035b34..7bcacf627b46 100644
--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c
@@ -1606,7 +1606,6 @@ static void ipmr_queue_xmit(struct net *net, struct mr_table *mrt,
 
 out_free:
 	kfree_skb(skb);
-	return;
 }
 
 static int ipmr_find_vif(struct mr_table *mrt, struct net_device *dev)
diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c
index 3f7c12b70a26..0abdc242ddb7 100644
--- a/net/ipv6/ndisc.c
+++ b/net/ipv6/ndisc.c
@@ -890,8 +890,6 @@ out:
 		in6_ifa_put(ifp);
 	else
 		in6_dev_put(idev);
-
-	return;
 }
 
 static void ndisc_recv_na(struct sk_buff *skb)
diff --git a/net/ipv6/proc.c b/net/ipv6/proc.c
index 458eabfbe130..566798d69f37 100644
--- a/net/ipv6/proc.c
+++ b/net/ipv6/proc.c
@@ -168,7 +168,6 @@ static void snmp6_seq_show_icmpv6msg(struct seq_file *seq, void __percpu **mib)
 			i & 0x100 ?  "Out" : "In", i & 0xff);
 		seq_printf(seq, "%-32s\t%lu\n", name, val);
 	}
-	return;
 }
 
 static void snmp6_seq_show_item(struct seq_file *seq, void __percpu **mib,
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 05ebd7833043..294cbe8b0725 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -316,7 +316,6 @@ static void rt6_probe(struct rt6_info *rt)
 #else
 static inline void rt6_probe(struct rt6_info *rt)
 {
-	return;
 }
 #endif
 
@@ -1553,7 +1552,6 @@ void rt6_redirect(struct in6_addr *dest, struct in6_addr *src,
 
 out:
 	dst_release(&rt->u.dst);
-	return;
 }
 
 /*
diff --git a/net/irda/iriap.c b/net/irda/iriap.c
index 79a1e5a23e10..fce364c6c71a 100644
--- a/net/irda/iriap.c
+++ b/net/irda/iriap.c
@@ -685,8 +685,6 @@ static void iriap_getvaluebyclass_indication(struct iriap_cb *self,
 	/* We have a match; send the value.  */
 	iriap_getvaluebyclass_response(self, obj->id, IAS_SUCCESS,
 				       attrib->value);
-
-	return;
 }
 
 /*
diff --git a/net/irda/irnet/irnet_irda.c b/net/irda/irnet/irnet_irda.c
index df18ab4b6c5e..e98e40d76f4f 100644
--- a/net/irda/irnet/irnet_irda.c
+++ b/net/irda/irnet/irnet_irda.c
@@ -678,7 +678,6 @@ irda_irnet_destroy(irnet_socket *	self)
   self->stsap_sel = 0;
 
   DEXIT(IRDA_SOCK_TRACE, "\n");
-  return;
 }
 
 
@@ -928,7 +927,6 @@ irnet_disconnect_server(irnet_socket *	self,
   irttp_listen(self->tsap);
 
   DEXIT(IRDA_SERV_TRACE, "\n");
-  return;
 }
 
 /*------------------------------------------------------------------*/
@@ -1013,7 +1011,6 @@ irnet_destroy_server(void)
   irda_irnet_destroy(&irnet_server.s);
 
   DEXIT(IRDA_SERV_TRACE, "\n");
-  return;
 }
 
 
diff --git a/net/iucv/af_iucv.c b/net/iucv/af_iucv.c
index 8be324fe08b9..c8b4599a752e 100644
--- a/net/iucv/af_iucv.c
+++ b/net/iucv/af_iucv.c
@@ -136,7 +136,6 @@ static void afiucv_pm_complete(struct device *dev)
 #ifdef CONFIG_PM_DEBUG
 	printk(KERN_WARNING "afiucv_pm_complete\n");
 #endif
-	return;
 }
 
 /**
diff --git a/net/mac80211/debugfs.h b/net/mac80211/debugfs.h
index 68e6a2050f9a..09cc9be34796 100644
--- a/net/mac80211/debugfs.h
+++ b/net/mac80211/debugfs.h
@@ -7,7 +7,6 @@ extern int mac80211_open_file_generic(struct inode *inode, struct file *file);
 #else
 static inline void debugfs_hw_add(struct ieee80211_local *local)
 {
-	return;
 }
 #endif
 
diff --git a/net/mac80211/mesh.c b/net/mac80211/mesh.c
index 7e93524459fc..bde81031727a 100644
--- a/net/mac80211/mesh.c
+++ b/net/mac80211/mesh.c
@@ -287,8 +287,6 @@ void mesh_mgmt_ies_add(struct sk_buff *skb, struct ieee80211_sub_if_data *sdata)
 	*pos++ |= sdata->u.mesh.accepting_plinks ?
 	    MESHCONF_CAPAB_ACCEPT_PLINKS : 0x00;
 	*pos++ = 0x00;
-
-	return;
 }
 
 u32 mesh_table_hash(u8 *addr, struct ieee80211_sub_if_data *sdata, struct mesh_table *tbl)
diff --git a/net/mac80211/mesh_hwmp.c b/net/mac80211/mesh_hwmp.c
index d89ed7f2592b..0705018d8d1e 100644
--- a/net/mac80211/mesh_hwmp.c
+++ b/net/mac80211/mesh_hwmp.c
@@ -624,7 +624,6 @@ static void hwmp_prep_frame_process(struct ieee80211_sub_if_data *sdata,
 fail:
 	rcu_read_unlock();
 	sdata->u.mesh.mshstats.dropped_frames_no_route++;
-	return;
 }
 
 static void hwmp_perr_frame_process(struct ieee80211_sub_if_data *sdata,
diff --git a/net/netlabel/netlabel_addrlist.h b/net/netlabel/netlabel_addrlist.h
index 07ae7fd82be1..1c1c093cf279 100644
--- a/net/netlabel/netlabel_addrlist.h
+++ b/net/netlabel/netlabel_addrlist.h
@@ -130,7 +130,6 @@ static inline void netlbl_af4list_audit_addr(struct audit_buffer *audit_buf,
 					     int src, const char *dev,
 					     __be32 addr, __be32 mask)
 {
-	return;
 }
 #endif
 
@@ -203,7 +202,6 @@ static inline void netlbl_af6list_audit_addr(struct audit_buffer *audit_buf,
 					     const struct in6_addr *addr,
 					     const struct in6_addr *mask)
 {
-	return;
 }
 #endif
 #endif /* IPV6 */
diff --git a/net/netlabel/netlabel_unlabeled.c b/net/netlabel/netlabel_unlabeled.c
index a3d64aabe2f7..e2b0a680dd56 100644
--- a/net/netlabel/netlabel_unlabeled.c
+++ b/net/netlabel/netlabel_unlabeled.c
@@ -670,7 +670,6 @@ static void netlbl_unlhsh_condremove_iface(struct netlbl_unlhsh_iface *iface)
 
 unlhsh_condremove_failure:
 	spin_unlock(&netlbl_unlhsh_lock);
-	return;
 }
 
 /**
diff --git a/net/sched/cls_flow.c b/net/sched/cls_flow.c
index 6ed61b10e002..f73542d2cdd0 100644
--- a/net/sched/cls_flow.c
+++ b/net/sched/cls_flow.c
@@ -602,7 +602,6 @@ static unsigned long flow_get(struct tcf_proto *tp, u32 handle)
 
 static void flow_put(struct tcf_proto *tp, unsigned long f)
 {
-	return;
 }
 
 static int flow_dump(struct tcf_proto *tp, unsigned long fh,
diff --git a/net/sched/sch_hfsc.c b/net/sched/sch_hfsc.c
index a435cf13cc27..abd904be4287 100644
--- a/net/sched/sch_hfsc.c
+++ b/net/sched/sch_hfsc.c
@@ -617,7 +617,6 @@ rtsc_min(struct runtime_sc *rtsc, struct internal_sc *isc, u64 x, u64 y)
 	rtsc->y = y;
 	rtsc->dx = dx;
 	rtsc->dy = dy;
-	return;
 }
 
 static void
diff --git a/net/sched/sch_ingress.c b/net/sched/sch_ingress.c
index a9e646bdb605..f10e34a68445 100644
--- a/net/sched/sch_ingress.c
+++ b/net/sched/sch_ingress.c
@@ -44,7 +44,6 @@ static void ingress_put(struct Qdisc *sch, unsigned long cl)
 
 static void ingress_walk(struct Qdisc *sch, struct qdisc_walker *walker)
 {
-	return;
 }
 
 static struct tcf_proto **ingress_find_tcf(struct Qdisc *sch, unsigned long cl)
diff --git a/net/sched/sch_mq.c b/net/sched/sch_mq.c
index b2aba3f5e6fa..fe91e50f9d98 100644
--- a/net/sched/sch_mq.c
+++ b/net/sched/sch_mq.c
@@ -174,7 +174,6 @@ static unsigned long mq_get(struct Qdisc *sch, u32 classid)
 
 static void mq_put(struct Qdisc *sch, unsigned long cl)
 {
-	return;
 }
 
 static int mq_dump_class(struct Qdisc *sch, unsigned long cl,
diff --git a/net/sched/sch_multiq.c b/net/sched/sch_multiq.c
index c50876cd8704..6ae251279fc2 100644
--- a/net/sched/sch_multiq.c
+++ b/net/sched/sch_multiq.c
@@ -340,7 +340,6 @@ static unsigned long multiq_bind(struct Qdisc *sch, unsigned long parent,
 
 static void multiq_put(struct Qdisc *q, unsigned long cl)
 {
-	return;
 }
 
 static int multiq_dump_class(struct Qdisc *sch, unsigned long cl,
diff --git a/net/sched/sch_prio.c b/net/sched/sch_prio.c
index 81672e0c1b25..0748fb1e3a49 100644
--- a/net/sched/sch_prio.c
+++ b/net/sched/sch_prio.c
@@ -303,7 +303,6 @@ static unsigned long prio_bind(struct Qdisc *sch, unsigned long parent, u32 clas
 
 static void prio_put(struct Qdisc *q, unsigned long cl)
 {
-	return;
 }
 
 static int prio_dump_class(struct Qdisc *sch, unsigned long cl, struct sk_buff *skb,
diff --git a/net/sched/sch_red.c b/net/sched/sch_red.c
index 072cdf442f8e..8d42bb3ba540 100644
--- a/net/sched/sch_red.c
+++ b/net/sched/sch_red.c
@@ -303,7 +303,6 @@ static unsigned long red_get(struct Qdisc *sch, u32 classid)
 
 static void red_put(struct Qdisc *sch, unsigned long arg)
 {
-	return;
 }
 
 static void red_walk(struct Qdisc *sch, struct qdisc_walker *walker)
diff --git a/net/sctp/associola.c b/net/sctp/associola.c
index 3912420cedcc..e41feff19e43 100644
--- a/net/sctp/associola.c
+++ b/net/sctp/associola.c
@@ -816,8 +816,6 @@ void sctp_assoc_del_nonprimary_peers(struct sctp_association *asoc,
 		if (t != primary)
 			sctp_assoc_rm_peer(asoc, t);
 	}
-
-	return;
 }
 
 /* Engage in transport control operations.
diff --git a/net/sctp/outqueue.c b/net/sctp/outqueue.c
index 5d057178ce0c..c04b2eb59186 100644
--- a/net/sctp/outqueue.c
+++ b/net/sctp/outqueue.c
@@ -80,7 +80,6 @@ static inline void sctp_outq_head_data(struct sctp_outq *q,
 {
 	list_add(&ch->list, &q->out_chunk_list);
 	q->out_qlen += ch->skb->len;
-	return;
 }
 
 /* Take data from the front of the queue. */
@@ -103,7 +102,6 @@ static inline void sctp_outq_tail_data(struct sctp_outq *q,
 {
 	list_add_tail(&ch->list, &q->out_chunk_list);
 	q->out_qlen += ch->skb->len;
-	return;
 }
 
 /*
diff --git a/net/sctp/proc.c b/net/sctp/proc.c
index 784bcc9a979d..61aacfbbaa92 100644
--- a/net/sctp/proc.c
+++ b/net/sctp/proc.c
@@ -181,7 +181,6 @@ static void * sctp_eps_seq_start(struct seq_file *seq, loff_t *pos)
 
 static void sctp_eps_seq_stop(struct seq_file *seq, void *v)
 {
-	return;
 }
 
 
@@ -286,7 +285,6 @@ static void * sctp_assocs_seq_start(struct seq_file *seq, loff_t *pos)
 
 static void sctp_assocs_seq_stop(struct seq_file *seq, void *v)
 {
-	return;
 }
 
 
@@ -409,7 +407,6 @@ static void *sctp_remaddr_seq_next(struct seq_file *seq, void *v, loff_t *pos)
 
 static void sctp_remaddr_seq_stop(struct seq_file *seq, void *v)
 {
-	return;
 }
 
 static int sctp_remaddr_seq_show(struct seq_file *seq, void *v)
diff --git a/net/sctp/sm_sideeffect.c b/net/sctp/sm_sideeffect.c
index 22e670200449..f5e5e27cac5e 100644
--- a/net/sctp/sm_sideeffect.c
+++ b/net/sctp/sm_sideeffect.c
@@ -892,8 +892,6 @@ static void sctp_cmd_process_fwdtsn(struct sctp_ulpq *ulpq,
 	sctp_walk_fwdtsn(skip, chunk) {
 		sctp_ulpq_skip(ulpq, ntohs(skip->stream), ntohs(skip->ssn));
 	}
-
-	return;
 }
 
 /* Helper function to remove the association non-primary peer
@@ -912,8 +910,6 @@ static void sctp_cmd_del_non_primary(struct sctp_association *asoc)
 			sctp_assoc_del_peer(asoc, &t->ipaddr);
 		}
 	}
-
-	return;
 }
 
 /* Helper function to set sk_err on a 1-1 style socket. */
diff --git a/net/sctp/ulpqueue.c b/net/sctp/ulpqueue.c
index 3a448536f0b6..c7f7e49609cb 100644
--- a/net/sctp/ulpqueue.c
+++ b/net/sctp/ulpqueue.c
@@ -955,7 +955,6 @@ void sctp_ulpq_skip(struct sctp_ulpq *ulpq, __u16 sid, __u16 ssn)
 	 * ordering and deliver them if needed.
 	 */
 	sctp_ulpq_reap_ordered(ulpq, sid);
-	return;
 }
 
 static __u16 sctp_ulpq_renege_list(struct sctp_ulpq *ulpq,
@@ -1064,7 +1063,6 @@ void sctp_ulpq_renege(struct sctp_ulpq *ulpq, struct sctp_chunk *chunk,
 	}
 
 	sk_mem_reclaim(asoc->base.sk);
-	return;
 }
 
 
diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
index 19c9983d5360..462462eaf296 100644
--- a/net/sunrpc/clnt.c
+++ b/net/sunrpc/clnt.c
@@ -1518,7 +1518,6 @@ call_refreshresult(struct rpc_task *task)
 	task->tk_action = call_refresh;
 	if (status != -ETIMEDOUT)
 		rpc_delay(task, 3*HZ);
-	return;
 }
 
 static __be32 *
diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c
index ce0d5b35c2ac..76e504bf74d0 100644
--- a/net/sunrpc/svcsock.c
+++ b/net/sunrpc/svcsock.c
@@ -150,7 +150,6 @@ static void svc_set_cmsg_data(struct svc_rqst *rqstp, struct cmsghdr *cmh)
 		}
 		break;
 	}
-	return;
 }
 
 /*
diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c
index 699ade68aac1..2e3d502ffe87 100644
--- a/net/sunrpc/xprt.c
+++ b/net/sunrpc/xprt.c
@@ -716,7 +716,6 @@ void xprt_connect(struct rpc_task *task)
 		xprt->stat.connect_start = jiffies;
 		xprt->ops->connect(task);
 	}
-	return;
 }
 
 static void xprt_connect_status(struct rpc_task *task)
diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
index 9847c30b5001..6e0df664b255 100644
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c
@@ -1050,8 +1050,6 @@ static inline void xs_tcp_read_common(struct rpc_xprt *xprt,
 		if (transport->tcp_flags & TCP_RCV_LAST_FRAG)
 			transport->tcp_flags &= ~TCP_RCV_COPY_DATA;
 	}
-
-	return;
 }
 
 /*
@@ -2210,7 +2208,6 @@ static int bc_send_request(struct rpc_task *task)
 
 static void bc_close(struct rpc_xprt *xprt)
 {
-	return;
 }
 
 /*
@@ -2220,7 +2217,6 @@ static void bc_close(struct rpc_xprt *xprt)
 
 static void bc_destroy(struct rpc_xprt *xprt)
 {
-	return;
 }
 
 static struct rpc_xprt_ops xs_udp_ops = {
diff --git a/net/sysctl_net.c b/net/sysctl_net.c
index 53196009160a..ca84212cfbfe 100644
--- a/net/sysctl_net.c
+++ b/net/sysctl_net.c
@@ -82,7 +82,6 @@ static int __net_init sysctl_net_init(struct net *net)
 static void __net_exit sysctl_net_exit(struct net *net)
 {
 	WARN_ON(!list_empty(&net->sysctls.list));
-	return;
 }
 
 static struct pernet_operations sysctl_pernet_ops = {
diff --git a/net/wimax/stack.c b/net/wimax/stack.c
index 62b1a6662209..ee99e7dfcdba 100644
--- a/net/wimax/stack.c
+++ b/net/wimax/stack.c
@@ -320,7 +320,6 @@ void __wimax_state_change(struct wimax_dev *wimax_dev, enum wimax_st new_state)
 out:
 	d_fnend(3, dev, "(wimax_dev %p new_state %u [old %u]) = void\n",
 		wimax_dev, new_state, old_state);
-	return;
 }
 
 
@@ -362,7 +361,6 @@ void wimax_state_change(struct wimax_dev *wimax_dev, enum wimax_st new_state)
 	if (wimax_dev->state > __WIMAX_ST_NULL)
 		__wimax_state_change(wimax_dev, new_state);
 	mutex_unlock(&wimax_dev->mutex);
-	return;
 }
 EXPORT_SYMBOL_GPL(wimax_state_change);
 
diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index f4ea3a08e5a1..d965a2bad8d3 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -2209,7 +2209,6 @@ EXPORT_SYMBOL(xfrm_dst_ifdown);
 static void xfrm_link_failure(struct sk_buff *skb)
 {
 	/* Impossible. Such dst must be popped before reaches point of failure. */
-	return;
 }
 
 static struct dst_entry *xfrm_negative_advice(struct dst_entry *dst)
-- 
cgit v1.2.2


From b3bcb72edb29c52fb0a065d5b1c7cf40ed9287f4 Mon Sep 17 00:00:00 2001
From: Randy Dunlap <randy.dunlap@oracle.com>
Date: Tue, 18 May 2010 12:26:27 -0700
Subject: bridge: fix build for CONFIG_SYSFS disabled

Fix build when CONFIG_SYSFS is not enabled:
net/bridge/br_if.c:136: error: 'struct net_bridge_port' has no member named 'sysfs_name'

Note: dev->name == sysfs_name except when change name is in
progress, and we are protected from that by RTNL mutex.

Signed-off-by: Randy Dunlap <randy.dunlap@oracle.com>
Acked-by: Stephen Hemminger <shemminger@vyatta.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/bridge/br_if.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c
index f25e3c92bd72..18b245e2c00e 100644
--- a/net/bridge/br_if.c
+++ b/net/bridge/br_if.c
@@ -133,7 +133,7 @@ static void del_nbp(struct net_bridge_port *p)
 	struct net_bridge *br = p->br;
 	struct net_device *dev = p->dev;
 
-	sysfs_remove_link(br->ifobj, p->sysfs_name);
+	sysfs_remove_link(br->ifobj, p->dev->name);
 
 	dev_set_promiscuity(dev, -1);
 
-- 
cgit v1.2.2


From e9d3e084975869754d16f639378675c353560be9 Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Tue, 18 May 2010 15:36:06 -0700
Subject: ipv6: Replace inet6_ifaddr->dead with state

This patch replaces the boolean dead flag on inet6_ifaddr with
a state enum.  This allows us to roll back changes when deleting
an address according to whether DAD has completed or not.

This patch only adds the state field and does not change the logic.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/addrconf.c | 16 +++++++++-------
 1 file changed, 9 insertions(+), 7 deletions(-)

(limited to 'net')

diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 75d3b8c1e856..4e5ad9de1679 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -553,7 +553,7 @@ void inet6_ifa_finish_destroy(struct inet6_ifaddr *ifp)
 	if (del_timer(&ifp->timer))
 		pr_notice("Timer is still running, when freeing ifa=%p\n", ifp);
 
-	if (!ifp->dead) {
+	if (ifp->state != INET6_IFADDR_STATE_DEAD) {
 		pr_warning("Freeing alive inet6 address %p\n", ifp);
 		return;
 	}
@@ -648,6 +648,7 @@ ipv6_add_addr(struct inet6_dev *idev, const struct in6_addr *addr, int pfxlen,
 	ipv6_addr_copy(&ifa->addr, addr);
 
 	spin_lock_init(&ifa->lock);
+	spin_lock_init(&ifa->state_lock);
 	init_timer(&ifa->timer);
 	INIT_HLIST_NODE(&ifa->addr_lst);
 	ifa->timer.data = (unsigned long) ifa;
@@ -720,7 +721,7 @@ static void ipv6_del_addr(struct inet6_ifaddr *ifp)
 
 	hash = ipv6_addr_hash(&ifp->addr);
 
-	ifp->dead = 1;
+	ifp->state = INET6_IFADDR_STATE_DEAD;
 
 	spin_lock_bh(&addrconf_hash_lock);
 	hlist_del_init_rcu(&ifp->addr_lst);
@@ -2665,7 +2666,7 @@ static int addrconf_ifdown(struct net_device *dev, int how)
 		ifa = list_first_entry(&idev->tempaddr_list,
 				       struct inet6_ifaddr, tmp_list);
 		list_del(&ifa->tmp_list);
-		ifa->dead = 1;
+		ifa->state = INET6_IFADDR_STATE_DEAD;
 		write_unlock_bh(&idev->lock);
 		spin_lock_bh(&ifa->lock);
 
@@ -2707,7 +2708,7 @@ static int addrconf_ifdown(struct net_device *dev, int how)
 			write_unlock_bh(&idev->lock);
 		} else {
 			list_del(&ifa->if_list);
-			ifa->dead = 1;
+			ifa->state = INET6_IFADDR_STATE_DEAD;
 			write_unlock_bh(&idev->lock);
 
 			/* clear hash table */
@@ -2717,7 +2718,7 @@ static int addrconf_ifdown(struct net_device *dev, int how)
 		}
 
 		__ipv6_ifa_notify(RTM_DELADDR, ifa);
-		if (ifa->dead)
+		if (ifa->state == INET6_IFADDR_STATE_DEAD)
 			atomic_notifier_call_chain(&inet6addr_chain,
 						   NETDEV_DOWN, ifa);
 		in6_ifa_put(ifa);
@@ -2815,7 +2816,7 @@ static void addrconf_dad_start(struct inet6_ifaddr *ifp, u32 flags)
 	net_srandom(ifp->addr.s6_addr32[3]);
 
 	read_lock_bh(&idev->lock);
-	if (ifp->dead)
+	if (ifp->state == INET6_IFADDR_STATE_DEAD)
 		goto out;
 
 	spin_lock(&ifp->lock);
@@ -4050,7 +4051,8 @@ static void __ipv6_ifa_notify(int event, struct inet6_ifaddr *ifp)
 		addrconf_leave_solict(ifp->idev, &ifp->addr);
 		dst_hold(&ifp->rt->u.dst);
 
-		if (ifp->dead && ip6_del_rt(ifp->rt))
+		if (ifp->state == INET6_IFADDR_STATE_DEAD &&
+		    ip6_del_rt(ifp->rt))
 			dst_free(&ifp->rt->u.dst);
 		break;
 	}
-- 
cgit v1.2.2


From 4c5ff6a6fe794f102479db998c69054319279e3c Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Tue, 18 May 2010 15:54:18 -0700
Subject: ipv6: Use state_lock to protect ifa state

This patch makes use of the new state_lock to synchronise between
updates to the ifa state.  This fixes the issue where a remotely
triggered address deletion (through DAD failure) coincides with a
local administrative address deletion, causing certain actions to
be performed twice incorrectly.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/addrconf.c | 27 +++++++++++++++++++++++----
 1 file changed, 23 insertions(+), 4 deletions(-)

(limited to 'net')

diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 4e5ad9de1679..2e42162c9042 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -715,13 +715,20 @@ static void ipv6_del_addr(struct inet6_ifaddr *ifp)
 {
 	struct inet6_ifaddr *ifa, *ifn;
 	struct inet6_dev *idev = ifp->idev;
+	int state;
 	int hash;
 	int deleted = 0, onlink = 0;
 	unsigned long expires = jiffies;
 
 	hash = ipv6_addr_hash(&ifp->addr);
 
+	spin_lock_bh(&ifp->state_lock);
+	state = ifp->state;
 	ifp->state = INET6_IFADDR_STATE_DEAD;
+	spin_unlock_bh(&ifp->state_lock);
+
+	if (state == INET6_IFADDR_STATE_DEAD)
+		goto out;
 
 	spin_lock_bh(&addrconf_hash_lock);
 	hlist_del_init_rcu(&ifp->addr_lst);
@@ -819,6 +826,7 @@ static void ipv6_del_addr(struct inet6_ifaddr *ifp)
 		dst_release(&rt->u.dst);
 	}
 
+out:
 	in6_ifa_put(ifp);
 }
 
@@ -2626,6 +2634,7 @@ static int addrconf_ifdown(struct net_device *dev, int how)
 	struct inet6_dev *idev;
 	struct inet6_ifaddr *ifa;
 	LIST_HEAD(keep_list);
+	int state;
 
 	ASSERT_RTNL();
 
@@ -2666,7 +2675,6 @@ static int addrconf_ifdown(struct net_device *dev, int how)
 		ifa = list_first_entry(&idev->tempaddr_list,
 				       struct inet6_ifaddr, tmp_list);
 		list_del(&ifa->tmp_list);
-		ifa->state = INET6_IFADDR_STATE_DEAD;
 		write_unlock_bh(&idev->lock);
 		spin_lock_bh(&ifa->lock);
 
@@ -2704,23 +2712,34 @@ static int addrconf_ifdown(struct net_device *dev, int how)
 
 			/* Flag it for later restoration when link comes up */
 			ifa->flags |= IFA_F_TENTATIVE;
-			in6_ifa_hold(ifa);
+
 			write_unlock_bh(&idev->lock);
+
+			in6_ifa_hold(ifa);
 		} else {
 			list_del(&ifa->if_list);
-			ifa->state = INET6_IFADDR_STATE_DEAD;
-			write_unlock_bh(&idev->lock);
 
 			/* clear hash table */
 			spin_lock_bh(&addrconf_hash_lock);
 			hlist_del_init_rcu(&ifa->addr_lst);
 			spin_unlock_bh(&addrconf_hash_lock);
+
+			write_unlock_bh(&idev->lock);
+			spin_lock_bh(&ifa->state_lock);
+			state = ifa->state;
+			ifa->state = INET6_IFADDR_STATE_DEAD;
+			spin_unlock_bh(&ifa->state_lock);
+
+			if (state == INET6_IFADDR_STATE_DEAD)
+				goto put_ifa;
 		}
 
 		__ipv6_ifa_notify(RTM_DELADDR, ifa);
 		if (ifa->state == INET6_IFADDR_STATE_DEAD)
 			atomic_notifier_call_chain(&inet6addr_chain,
 						   NETDEV_DOWN, ifa);
+
+put_ifa:
 		in6_ifa_put(ifa);
 
 		write_lock_bh(&idev->lock);
-- 
cgit v1.2.2


From f2344a131bccdbfc5338e17fa71a807dee7944fa Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Tue, 18 May 2010 15:55:27 -0700
Subject: ipv6: Use POSTDAD state

This patch makes use of the new POSTDAD state.  This prevents
a race between DAD completion and failure.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/addrconf.c | 29 ++++++++++++++++++++++++-----
 1 file changed, 24 insertions(+), 5 deletions(-)

(limited to 'net')

diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 2e42162c9042..7c769fa81d97 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -1406,10 +1406,27 @@ static void addrconf_dad_stop(struct inet6_ifaddr *ifp, int dad_failed)
 		ipv6_del_addr(ifp);
 }
 
+static int addrconf_dad_end(struct inet6_ifaddr *ifp)
+{
+	int err = -ENOENT;
+
+	spin_lock(&ifp->state_lock);
+	if (ifp->state == INET6_IFADDR_STATE_DAD) {
+		ifp->state = INET6_IFADDR_STATE_POSTDAD;
+		err = 0;
+	}
+	spin_unlock(&ifp->state_lock);
+
+	return err;
+}
+
 void addrconf_dad_failure(struct inet6_ifaddr *ifp)
 {
 	struct inet6_dev *idev = ifp->idev;
 
+	if (addrconf_dad_end(ifp))
+		return;
+
 	if (net_ratelimit())
 		printk(KERN_INFO "%s: IPv6 duplicate address %pI6c detected!\n",
 			ifp->idev->dev->name, &ifp->addr);
@@ -2712,6 +2729,7 @@ static int addrconf_ifdown(struct net_device *dev, int how)
 
 			/* Flag it for later restoration when link comes up */
 			ifa->flags |= IFA_F_TENTATIVE;
+			ifa->state = INET6_IFADDR_STATE_DAD;
 
 			write_unlock_bh(&idev->lock);
 
@@ -2883,6 +2901,9 @@ static void addrconf_dad_timer(unsigned long data)
 	struct inet6_dev *idev = ifp->idev;
 	struct in6_addr mcaddr;
 
+	if (!ifp->probes && addrconf_dad_end(ifp))
+		goto out;
+
 	read_lock(&idev->lock);
 	if (idev->dead || !(idev->if_flags & IF_READY)) {
 		read_unlock(&idev->lock);
@@ -2956,12 +2977,10 @@ static void addrconf_dad_run(struct inet6_dev *idev)
 	read_lock_bh(&idev->lock);
 	list_for_each_entry(ifp, &idev->addr_list, if_list) {
 		spin_lock(&ifp->lock);
-		if (!(ifp->flags & IFA_F_TENTATIVE)) {
-			spin_unlock(&ifp->lock);
-			continue;
-		}
+		if (ifp->flags & IFA_F_TENTATIVE &&
+		    ifp->state == INET6_IFADDR_STATE_DAD)
+			addrconf_dad_kick(ifp);
 		spin_unlock(&ifp->lock);
-		addrconf_dad_kick(ifp);
 	}
 	read_unlock_bh(&idev->lock);
 }
-- 
cgit v1.2.2


From 622ccdf107bcb49c4d8fb65512652566d4c8928a Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Tue, 18 May 2010 15:56:06 -0700
Subject: ipv6: Never schedule DAD timer on dead address

This patch ensures that all places that schedule the DAD timer
look at the address state in a safe manner before scheduling the
timer.  This ensures that we don't end up with pending timers
after deleting an address.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/addrconf.c | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 7c769fa81d97..e1a698df5706 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -2853,10 +2853,10 @@ static void addrconf_dad_start(struct inet6_ifaddr *ifp, u32 flags)
 	net_srandom(ifp->addr.s6_addr32[3]);
 
 	read_lock_bh(&idev->lock);
+	spin_lock(&ifp->lock);
 	if (ifp->state == INET6_IFADDR_STATE_DEAD)
 		goto out;
 
-	spin_lock(&ifp->lock);
 	if (dev->flags&(IFF_NOARP|IFF_LOOPBACK) ||
 	    idev->cnf.accept_dad < 1 ||
 	    !(ifp->flags&IFA_F_TENTATIVE) ||
@@ -2890,8 +2890,8 @@ static void addrconf_dad_start(struct inet6_ifaddr *ifp, u32 flags)
 		ip6_ins_rt(ifp->rt);
 
 	addrconf_dad_kick(ifp);
-	spin_unlock(&ifp->lock);
 out:
+	spin_unlock(&ifp->lock);
 	read_unlock_bh(&idev->lock);
 }
 
@@ -2911,6 +2911,12 @@ static void addrconf_dad_timer(unsigned long data)
 	}
 
 	spin_lock(&ifp->lock);
+	if (ifp->state == INET6_IFADDR_STATE_DEAD) {
+		spin_unlock(&ifp->lock);
+		read_unlock(&idev->lock);
+		goto out;
+	}
+
 	if (ifp->probes == 0) {
 		/*
 		 * DAD was successful
-- 
cgit v1.2.2


From dc3f5e68f846eec38fb31d78f0b6e83633ad375e Mon Sep 17 00:00:00 2001
From: "Michael S. Tsirkin" <mst@redhat.com>
Date: Tue, 13 Apr 2010 16:11:50 +0300
Subject: trans_virtio: use virtqueue_xxx wrappers

Switch trans_virtio to new virtqueue_xxx wrappers.

Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
---
 net/9p/trans_virtio.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'net')

diff --git a/net/9p/trans_virtio.c b/net/9p/trans_virtio.c
index 7eb78ecc1618..dcfbe99ff81c 100644
--- a/net/9p/trans_virtio.c
+++ b/net/9p/trans_virtio.c
@@ -137,7 +137,7 @@ static void req_done(struct virtqueue *vq)
 
 	P9_DPRINTK(P9_DEBUG_TRANS, ": request done\n");
 
-	while ((rc = chan->vq->vq_ops->get_buf(chan->vq, &len)) != NULL) {
+	while ((rc = virtqueue_get_buf(chan->vq, &len)) != NULL) {
 		P9_DPRINTK(P9_DEBUG_TRANS, ": rc %p\n", rc);
 		P9_DPRINTK(P9_DEBUG_TRANS, ": lookup tag %d\n", rc->tag);
 		req = p9_tag_lookup(chan->client, rc->tag);
@@ -209,13 +209,13 @@ p9_virtio_request(struct p9_client *client, struct p9_req_t *req)
 
 	req->status = REQ_STATUS_SENT;
 
-	if (chan->vq->vq_ops->add_buf(chan->vq, chan->sg, out, in, req->tc) < 0) {
+	if (virtqueue_add_buf(chan->vq, chan->sg, out, in, req->tc) < 0) {
 		P9_DPRINTK(P9_DEBUG_TRANS,
 			"9p debug: virtio rpc add_buf returned failure");
 		return -EIO;
 	}
 
-	chan->vq->vq_ops->kick(chan->vq);
+	virtqueue_kick(chan->vq);
 
 	P9_DPRINTK(P9_DEBUG_TRANS, "9p debug: virtio request kicked\n");
 	return 0;
-- 
cgit v1.2.2


From fc350777c705a39a312728ac5e8a6f164a828f5d Mon Sep 17 00:00:00 2001
From: Joerg Marx <joerg.marx@secunet.com>
Date: Thu, 20 May 2010 15:55:30 +0200
Subject: netfilter: nf_conntrack: fix a race in __nf_conntrack_confirm against
 nf_ct_get_next_corpse()

This race was triggered by a 'conntrack -F' command running in parallel
to the insertion of a hash for a new connection. Losing this race led to
a dead conntrack entry effectively blocking traffic for a particular
connection until timeout or flushing the conntrack hashes again.
Now the check for an already dying connection is done inside the lock.

Signed-off-by: Joerg Marx <joerg.marx@secunet.com>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 net/netfilter/nf_conntrack_core.c | 10 ++++++++++
 1 file changed, 10 insertions(+)

(limited to 'net')

diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index b83c530c5e0a..eeeb8bc73982 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -424,6 +424,16 @@ __nf_conntrack_confirm(struct sk_buff *skb)
 
 	spin_lock_bh(&nf_conntrack_lock);
 
+	/* We have to check the DYING flag inside the lock to prevent
+	   a race against nf_ct_get_next_corpse() possibly called from
+	   user context, else we insert an already 'dead' hash, blocking
+	   further use of that particular connection -JM */
+
+	if (unlikely(nf_ct_is_dying(ct))) {
+		spin_unlock_bh(&nf_conntrack_lock);
+		return NF_ACCEPT;
+	}
+
 	/* See if there's one in the list already, including reverse:
 	   NAT could have grabbed it without realizing, since we're
 	   not in the hash.  If there is, we lost race. */
-- 
cgit v1.2.2


From 622e0ca1cd4d459f5af4f2c65f4dc0dd823cb4c3 Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Thu, 20 May 2010 23:07:56 -0700
Subject: gro: Fix bogus gso_size on the first fraglist entry

When GRO produces fraglist entries, and the resulting skb hits
an interface that is incapable of TSO but capable of FRAGLIST,
we end up producing a bogus packet with gso_size non-zero.

This was reported in the field with older versions of KVM that
did not set the TSO bits on tuntap.

This patch fixes that.

Reported-by: Igor Zhang <yugzhang@redhat.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/skbuff.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'net')

diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index c543dd252433..4c11000a96aa 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -2718,6 +2718,7 @@ int skb_gro_receive(struct sk_buff **head, struct sk_buff *skb)
 	*NAPI_GRO_CB(nskb) = *NAPI_GRO_CB(p);
 	skb_shinfo(nskb)->frag_list = p;
 	skb_shinfo(nskb)->gso_size = pinfo->gso_size;
+	pinfo->gso_size = 0;
 	skb_header_release(p);
 	nskb->prev = p;
 
-- 
cgit v1.2.2


From 76cc8b13a6e41b537fd262b600da1571314add62 Mon Sep 17 00:00:00 2001
From: Tom Herbert <therbert@google.com>
Date: Thu, 20 May 2010 18:37:59 +0000
Subject: net: fix problem in dequeuing from input_pkt_queue

Fix some issues introduced in batch skb dequeuing for input_pkt_queue.
The primary issue it that the queue head must be incremented only
after a packet has been processed, that is only after
__netif_receive_skb has been called.  This is needed for the mechanism
to prevent OOO packet in RFS.  Also when flushing the input_pkt_queue
and process_queue, the process queue should be done first to prevent
OOO packets.

Because the input_pkt_queue has been effectively split into two queues,
the calculation of the tail ptr is no longer correct.  The correct value
would be head+input_pkt_queue->len+process_queue->len.  To avoid
this calculation we added an explict input_queue_tail in softnet_data.
The tail value is simply incremented when queuing to input_pkt_queue.

Signed-off-by: Tom Herbert <therbert@google.com>
Acked-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/dev.c | 28 +++++++++++++++-------------
 1 file changed, 15 insertions(+), 13 deletions(-)

(limited to 'net')

diff --git a/net/core/dev.c b/net/core/dev.c
index 6c820650b80f..0aab66d68b19 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -2426,10 +2426,7 @@ static int enqueue_to_backlog(struct sk_buff *skb, int cpu,
 		if (skb_queue_len(&sd->input_pkt_queue)) {
 enqueue:
 			__skb_queue_tail(&sd->input_pkt_queue, skb);
-#ifdef CONFIG_RPS
-			*qtail = sd->input_queue_head +
-					skb_queue_len(&sd->input_pkt_queue);
-#endif
+			input_queue_tail_incr_save(sd, qtail);
 			rps_unlock(sd);
 			local_irq_restore(flags);
 			return NET_RX_SUCCESS;
@@ -2964,7 +2961,7 @@ static void flush_backlog(void *arg)
 		if (skb->dev == dev) {
 			__skb_unlink(skb, &sd->input_pkt_queue);
 			kfree_skb(skb);
-			input_queue_head_add(sd, 1);
+			input_queue_head_incr(sd);
 		}
 	}
 	rps_unlock(sd);
@@ -2973,6 +2970,7 @@ static void flush_backlog(void *arg)
 		if (skb->dev == dev) {
 			__skb_unlink(skb, &sd->process_queue);
 			kfree_skb(skb);
+			input_queue_head_incr(sd);
 		}
 	}
 }
@@ -3328,18 +3326,20 @@ static int process_backlog(struct napi_struct *napi, int quota)
 		while ((skb = __skb_dequeue(&sd->process_queue))) {
 			local_irq_enable();
 			__netif_receive_skb(skb);
-			if (++work >= quota)
-				return work;
 			local_irq_disable();
+			input_queue_head_incr(sd);
+			if (++work >= quota) {
+				local_irq_enable();
+				return work;
+			}
 		}
 
 		rps_lock(sd);
 		qlen = skb_queue_len(&sd->input_pkt_queue);
-		if (qlen) {
-			input_queue_head_add(sd, qlen);
+		if (qlen)
 			skb_queue_splice_tail_init(&sd->input_pkt_queue,
 						   &sd->process_queue);
-		}
+
 		if (qlen < quota - work) {
 			/*
 			 * Inline a custom version of __napi_complete().
@@ -5679,12 +5679,14 @@ static int dev_cpu_callback(struct notifier_block *nfb,
 	local_irq_enable();
 
 	/* Process offline CPU's input_pkt_queue */
-	while ((skb = __skb_dequeue(&oldsd->input_pkt_queue))) {
+	while ((skb = __skb_dequeue(&oldsd->process_queue))) {
 		netif_rx(skb);
-		input_queue_head_add(oldsd, 1);
+		input_queue_head_incr(oldsd);
 	}
-	while ((skb = __skb_dequeue(&oldsd->process_queue)))
+	while ((skb = __skb_dequeue(&oldsd->input_pkt_queue))) {
 		netif_rx(skb);
+		input_queue_head_incr(oldsd);
+	}
 
 	return NOTIFY_OK;
 }
-- 
cgit v1.2.2


From 2c3c8bea608866d8bd9dcf92657d57fdcac011c5 Mon Sep 17 00:00:00 2001
From: Chris Wright <chrisw@sous-sol.org>
Date: Wed, 12 May 2010 18:28:57 -0700
Subject: sysfs: add struct file* to bin_attr callbacks

This allows bin_attr->read,write,mmap callbacks to check file specific data
(such as inode owner) as part of any privilege validation.

Signed-off-by: Chris Wright <chrisw@sous-sol.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 net/bridge/br_sysfs_br.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/bridge/br_sysfs_br.c b/net/bridge/br_sysfs_br.c
index dd321e39e621..486b8f3861d2 100644
--- a/net/bridge/br_sysfs_br.c
+++ b/net/bridge/br_sysfs_br.c
@@ -659,7 +659,7 @@ static struct attribute_group bridge_group = {
  *
  * Returns the number of bytes read.
  */
-static ssize_t brforward_read(struct kobject *kobj,
+static ssize_t brforward_read(struct file *filp, struct kobject *kobj,
 			      struct bin_attribute *bin_attr,
 			      char *buf, loff_t off, size_t count)
 {
-- 
cgit v1.2.2


From 608b4b9548dedf4185ca47edcaae4bff2ceb62de Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Tue, 4 May 2010 17:36:45 -0700
Subject: netns: Teach network device kobjects which namespace they are in.

The problem.  Network devices show up in sysfs and with the network
namespace active multiple devices with the same name can show up in
the same directory, ouch!

To avoid that problem and allow existing applications in network namespaces
to see the same interface that is currently presented in sysfs, this
patch enables the tagging directory support in sysfs.

By using the network namespace pointers as tags to separate out the
the sysfs directory entries we ensure that we don't have conflicts
in the directories and applications only see a limited set of
the network devices.

Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
Acked-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 net/Kconfig          |  8 ++++++++
 net/core/net-sysfs.c | 47 +++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 55 insertions(+)

(limited to 'net')

diff --git a/net/Kconfig b/net/Kconfig
index 0d68b40fc0e6..f49532053a98 100644
--- a/net/Kconfig
+++ b/net/Kconfig
@@ -45,6 +45,14 @@ config COMPAT_NETLINK_MESSAGES
 
 menu "Networking options"
 
+config NET_NS
+	bool "Network namespace support"
+	default n
+	depends on EXPERIMENTAL && NAMESPACES
+	help
+	  Allow user space to create what appear to be multiple instances
+	  of the network stack.
+
 source "net/packet/Kconfig"
 source "net/unix/Kconfig"
 source "net/xfrm/Kconfig"
diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c
index c57c4b228bb5..b388cdab9316 100644
--- a/net/core/net-sysfs.c
+++ b/net/core/net-sysfs.c
@@ -14,7 +14,9 @@
 #include <linux/netdevice.h>
 #include <linux/if_arp.h>
 #include <linux/slab.h>
+#include <linux/nsproxy.h>
 #include <net/sock.h>
+#include <net/net_namespace.h>
 #include <linux/rtnetlink.h>
 #include <linux/wireless.h>
 #include <linux/vmalloc.h>
@@ -766,6 +768,38 @@ static void rx_queue_remove_kobjects(struct net_device *net)
 	kset_unregister(net->queues_kset);
 }
 #endif /* CONFIG_RPS */
+
+static const void *net_current_ns(void)
+{
+	return current->nsproxy->net_ns;
+}
+
+static const void *net_initial_ns(void)
+{
+	return &init_net;
+}
+
+static const void *net_netlink_ns(struct sock *sk)
+{
+	return sock_net(sk);
+}
+
+static struct kobj_ns_type_operations net_ns_type_operations = {
+	.type = KOBJ_NS_TYPE_NET,
+	.current_ns = net_current_ns,
+	.netlink_ns = net_netlink_ns,
+	.initial_ns = net_initial_ns,
+};
+
+static void net_kobj_ns_exit(struct net *net)
+{
+	kobj_ns_exit(KOBJ_NS_TYPE_NET, net);
+}
+
+static struct pernet_operations sysfs_net_ops = {
+	.exit = net_kobj_ns_exit,
+};
+
 #endif /* CONFIG_SYSFS */
 
 #ifdef CONFIG_HOTPLUG
@@ -806,6 +840,13 @@ static void netdev_release(struct device *d)
 	kfree((char *)dev - dev->padded);
 }
 
+static const void *net_namespace(struct device *d)
+{
+	struct net_device *dev;
+	dev = container_of(d, struct net_device, dev);
+	return dev_net(dev);
+}
+
 static struct class net_class = {
 	.name = "net",
 	.dev_release = netdev_release,
@@ -815,6 +856,8 @@ static struct class net_class = {
 #ifdef CONFIG_HOTPLUG
 	.dev_uevent = netdev_uevent,
 #endif
+	.ns_type = &net_ns_type_operations,
+	.namespace = net_namespace,
 };
 
 /* Delete sysfs entries but hold kobject reference until after all
@@ -904,5 +947,9 @@ void netdev_initialize_kobject(struct net_device *net)
 
 int netdev_kobject_init(void)
 {
+	kobj_ns_type_register(&net_ns_type_operations);
+#ifdef CONFIG_SYSFS
+	register_pernet_subsys(&sysfs_net_ops);
+#endif
 	return class_register(&net_class);
 }
-- 
cgit v1.2.2


From d6523ddf2376f39eaa89a4d68a33052d20c138b9 Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Sun, 16 May 2010 21:59:45 -0700
Subject: net/sysfs: Fix the bitrot in network device kobject namespace support

I had a couple of stupid bugs in:
netns: Teach network device kobjects which namespace they are in.

- I duplicated the Kconfig for the NET_NS
- The build was broken when sysfs was not compiled in

The sysfs breakage is because after I moved the operations
for the sysfs to the kobject layer, to make things cleaner
I forgot to move the ifdefs.  Opps.

I'm not quite certain how I got introduced a second NET_NS Kconfig,
but it was probably a 3 way merge somewhere along the way that
did not notice that the NET_NS Kconfig option had mvoed and thout
that was a bug.  It probably slipped in because it used to be the
sysfs patches were the first patches in my network namespace patches.
Some things just don't go like you would expect.

Neither of these bugs actually affect anything in the common case
but they should be fixed.

Thanks to Serge for noticing they were present.

Reported-by: Serge E. Hallyn <serue@us.ibm.com>
Signed-off-by: Eric W. Biederman <ebiederm@aristanetworks.com>
Acked-by: David S. Miller <davem@davemloft.net>
---
 net/Kconfig          | 8 --------
 net/core/net-sysfs.c | 8 +++-----
 2 files changed, 3 insertions(+), 13 deletions(-)

(limited to 'net')

diff --git a/net/Kconfig b/net/Kconfig
index f49532053a98..0d68b40fc0e6 100644
--- a/net/Kconfig
+++ b/net/Kconfig
@@ -45,14 +45,6 @@ config COMPAT_NETLINK_MESSAGES
 
 menu "Networking options"
 
-config NET_NS
-	bool "Network namespace support"
-	default n
-	depends on EXPERIMENTAL && NAMESPACES
-	help
-	  Allow user space to create what appear to be multiple instances
-	  of the network stack.
-
 source "net/packet/Kconfig"
 source "net/unix/Kconfig"
 source "net/xfrm/Kconfig"
diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c
index b388cdab9316..6881e65944c8 100644
--- a/net/core/net-sysfs.c
+++ b/net/core/net-sysfs.c
@@ -469,6 +469,7 @@ static struct attribute_group wireless_group = {
 	.attrs = wireless_attrs,
 };
 #endif
+#endif /* CONFIG_SYSFS */
 
 #ifdef CONFIG_RPS
 /*
@@ -796,11 +797,10 @@ static void net_kobj_ns_exit(struct net *net)
 	kobj_ns_exit(KOBJ_NS_TYPE_NET, net);
 }
 
-static struct pernet_operations sysfs_net_ops = {
+static struct pernet_operations kobj_net_ops = {
 	.exit = net_kobj_ns_exit,
 };
 
-#endif /* CONFIG_SYSFS */
 
 #ifdef CONFIG_HOTPLUG
 static int netdev_uevent(struct device *d, struct kobj_uevent_env *env)
@@ -948,8 +948,6 @@ void netdev_initialize_kobject(struct net_device *net)
 int netdev_kobject_init(void)
 {
 	kobj_ns_type_register(&net_ns_type_operations);
-#ifdef CONFIG_SYSFS
-	register_pernet_subsys(&sysfs_net_ops);
-#endif
+	register_pernet_subsys(&kobj_net_ops);
 	return class_register(&net_class);
 }
-- 
cgit v1.2.2


From 910a7e905f36e51a17d6e8bb4ad6dcd5ac5f1d53 Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Tue, 4 May 2010 17:36:46 -0700
Subject: netlink: Implment netlink_broadcast_filtered

When netlink sockets are used to convey data that is in a namespace
we need a way to select a subset of the listening sockets to deliver
the packet to.  For the network namespace we have been doing this
by only transmitting packets in the correct network namespace.

For data belonging to other namespaces netlink_bradcast_filtered
provides a mechanism that allows us to examine the destination
socket and to decide if we should transmit the specified packet
to it.

Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
Acked-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 net/netlink/af_netlink.c | 21 +++++++++++++++++++--
 1 file changed, 19 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index 6464a1972a69..a2eb965207d3 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -978,6 +978,8 @@ struct netlink_broadcast_data {
 	int delivered;
 	gfp_t allocation;
 	struct sk_buff *skb, *skb2;
+	int (*tx_filter)(struct sock *dsk, struct sk_buff *skb, void *data);
+	void *tx_data;
 };
 
 static inline int do_one_broadcast(struct sock *sk,
@@ -1020,6 +1022,9 @@ static inline int do_one_broadcast(struct sock *sk,
 		p->failure = 1;
 		if (nlk->flags & NETLINK_BROADCAST_SEND_ERROR)
 			p->delivery_failure = 1;
+	} else if (p->tx_filter && p->tx_filter(sk, p->skb2, p->tx_data)) {
+		kfree_skb(p->skb2);
+		p->skb2 = NULL;
 	} else if (sk_filter(sk, p->skb2)) {
 		kfree_skb(p->skb2);
 		p->skb2 = NULL;
@@ -1038,8 +1043,10 @@ out:
 	return 0;
 }
 
-int netlink_broadcast(struct sock *ssk, struct sk_buff *skb, u32 pid,
-		      u32 group, gfp_t allocation)
+int netlink_broadcast_filtered(struct sock *ssk, struct sk_buff *skb, u32 pid,
+	u32 group, gfp_t allocation,
+	int (*filter)(struct sock *dsk, struct sk_buff *skb, void *data),
+	void *filter_data)
 {
 	struct net *net = sock_net(ssk);
 	struct netlink_broadcast_data info;
@@ -1059,6 +1066,8 @@ int netlink_broadcast(struct sock *ssk, struct sk_buff *skb, u32 pid,
 	info.allocation = allocation;
 	info.skb = skb;
 	info.skb2 = NULL;
+	info.tx_filter = filter;
+	info.tx_data = filter_data;
 
 	/* While we sleep in clone, do not allow to change socket list */
 
@@ -1083,6 +1092,14 @@ int netlink_broadcast(struct sock *ssk, struct sk_buff *skb, u32 pid,
 	}
 	return -ESRCH;
 }
+EXPORT_SYMBOL(netlink_broadcast_filtered);
+
+int netlink_broadcast(struct sock *ssk, struct sk_buff *skb, u32 pid,
+		      u32 group, gfp_t allocation)
+{
+	return netlink_broadcast_filtered(ssk, skb, pid, group, allocation,
+		NULL, NULL);
+}
 EXPORT_SYMBOL(netlink_broadcast);
 
 struct netlink_set_err_data {
-- 
cgit v1.2.2


From a1b3f594dc5faab91d3a218c7019e9b5edd9fe1a Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Tue, 4 May 2010 17:36:49 -0700
Subject: net: Expose all network devices in a namespaces in sysfs

This reverts commit aaf8cdc34ddba08122f02217d9d684e2f9f5d575.

Drivers like the ipw2100 call device_create_group when they
are initialized and device_remove_group when they are shutdown.
Moving them between namespaces deletes their sysfs groups early.

In particular the following call chain results.
netdev_unregister_kobject -> device_del -> kobject_del -> sysfs_remove_dir
With sysfs_remove_dir recursively deleting all of it's subdirectories,
and nothing adding them back.

Ouch!

Therefore we need to call something that ultimate calls sysfs_mv_dir
as that sysfs function can move sysfs directories between namespaces
without deleting their subdirectories or their contents.   Allowing
us to avoid placing extra boiler plate into every driver that does
something interesting with sysfs.

Currently the function that provides that capability is device_rename.
That is the code works without nasty side effects as originally written.

So remove the misguided fix for moving devices between namespaces.  The
bug in the kobject layer that inspired it has now been recognized and
fixed.

Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
Acked-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 net/core/dev.c       | 28 +++++-----------------------
 net/core/net-sysfs.c | 16 +---------------
 net/core/net-sysfs.h |  1 -
 3 files changed, 6 insertions(+), 39 deletions(-)

(limited to 'net')

diff --git a/net/core/dev.c b/net/core/dev.c
index 6c820650b80f..d273e4e3ecdc 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -1002,15 +1002,10 @@ int dev_change_name(struct net_device *dev, const char *newname)
 		return err;
 
 rollback:
-	/* For now only devices in the initial network namespace
-	 * are in sysfs.
-	 */
-	if (net_eq(net, &init_net)) {
-		ret = device_rename(&dev->dev, dev->name);
-		if (ret) {
-			memcpy(dev->name, oldname, IFNAMSIZ);
-			return ret;
-		}
+	ret = device_rename(&dev->dev, dev->name);
+	if (ret) {
+		memcpy(dev->name, oldname, IFNAMSIZ);
+		return ret;
 	}
 
 	write_lock_bh(&dev_base_lock);
@@ -4994,8 +4989,6 @@ int register_netdevice(struct net_device *dev)
 	if (dev->features & NETIF_F_SG)
 		dev->features |= NETIF_F_GSO;
 
-	netdev_initialize_kobject(dev);
-
 	ret = call_netdevice_notifiers(NETDEV_POST_INIT, dev);
 	ret = notifier_to_errno(ret);
 	if (ret)
@@ -5547,15 +5540,6 @@ int dev_change_net_namespace(struct net_device *dev, struct net *net, const char
 	if (dev->features & NETIF_F_NETNS_LOCAL)
 		goto out;
 
-#ifdef CONFIG_SYSFS
-	/* Don't allow real devices to be moved when sysfs
-	 * is enabled.
-	 */
-	err = -EINVAL;
-	if (dev->dev.parent)
-		goto out;
-#endif
-
 	/* Ensure the device has been registrered */
 	err = -EINVAL;
 	if (dev->reg_state != NETREG_REGISTERED)
@@ -5606,8 +5590,6 @@ int dev_change_net_namespace(struct net_device *dev, struct net *net, const char
 	dev_uc_flush(dev);
 	dev_mc_flush(dev);
 
-	netdev_unregister_kobject(dev);
-
 	/* Actually switch the network namespace */
 	dev_net_set(dev, net);
 
@@ -5620,7 +5602,7 @@ int dev_change_net_namespace(struct net_device *dev, struct net *net, const char
 	}
 
 	/* Fixup kobjects */
-	err = netdev_register_kobject(dev);
+	err = device_rename(&dev->dev, dev->name);
 	WARN_ON(err);
 
 	/* Add the device back in the hashes */
diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c
index 6881e65944c8..99e7052d7323 100644
--- a/net/core/net-sysfs.c
+++ b/net/core/net-sysfs.c
@@ -808,9 +808,6 @@ static int netdev_uevent(struct device *d, struct kobj_uevent_env *env)
 	struct net_device *dev = to_net_dev(d);
 	int retval;
 
-	if (!net_eq(dev_net(dev), &init_net))
-		return 0;
-
 	/* pass interface to uevent. */
 	retval = add_uevent_var(env, "INTERFACE=%s", dev->name);
 	if (retval)
@@ -869,9 +866,6 @@ void netdev_unregister_kobject(struct net_device * net)
 
 	kobject_get(&dev->kobj);
 
-	if (!net_eq(dev_net(net), &init_net))
-		return;
-
 #ifdef CONFIG_RPS
 	rx_queue_remove_kobjects(net);
 #endif
@@ -886,6 +880,7 @@ int netdev_register_kobject(struct net_device *net)
 	const struct attribute_group **groups = net->sysfs_groups;
 	int error = 0;
 
+	device_initialize(dev);
 	dev->class = &net_class;
 	dev->platform_data = net;
 	dev->groups = groups;
@@ -908,9 +903,6 @@ int netdev_register_kobject(struct net_device *net)
 #endif
 #endif /* CONFIG_SYSFS */
 
-	if (!net_eq(dev_net(net), &init_net))
-		return 0;
-
 	error = device_add(dev);
 	if (error)
 		return error;
@@ -939,12 +931,6 @@ void netdev_class_remove_file(struct class_attribute *class_attr)
 EXPORT_SYMBOL(netdev_class_create_file);
 EXPORT_SYMBOL(netdev_class_remove_file);
 
-void netdev_initialize_kobject(struct net_device *net)
-{
-	struct device *device = &(net->dev);
-	device_initialize(device);
-}
-
 int netdev_kobject_init(void)
 {
 	kobj_ns_type_register(&net_ns_type_operations);
diff --git a/net/core/net-sysfs.h b/net/core/net-sysfs.h
index 14e7524260b3..805555e8b187 100644
--- a/net/core/net-sysfs.h
+++ b/net/core/net-sysfs.h
@@ -4,5 +4,4 @@
 int netdev_kobject_init(void);
 int netdev_register_kobject(struct net_device *);
 void netdev_unregister_kobject(struct net_device *);
-void netdev_initialize_kobject(struct net_device *);
 #endif
-- 
cgit v1.2.2


From 22fe88d3d85850267ff4535b465794a5768f868a Mon Sep 17 00:00:00 2001
From: Sujith <Sujith.Manoharan@atheros.com>
Date: Thu, 13 May 2010 10:34:08 +0530
Subject: cfg80211: Fix signal_type comparison

signal_type is enum cfg80211_signal_type.

This fixes the gcc warning:

scan.c: In function `cfg80211_inform_bss':
scan.c:518:6: warning: comparison between `enum cfg80211_signal_type' and `enum nl80211_bss'
scan.c: In function `cfg80211_inform_bss_frame':
scan.c:574:6: warning: comparison between `enum cfg80211_signal_type' and `enum nl80211_bss'

Signed-off-by: Sujith <Sujith.Manoharan@atheros.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/wireless/scan.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/wireless/scan.c b/net/wireless/scan.c
index a026c6d56bd3..58401d246bda 100644
--- a/net/wireless/scan.c
+++ b/net/wireless/scan.c
@@ -515,7 +515,7 @@ cfg80211_inform_bss(struct wiphy *wiphy,
 
 	privsz = wiphy->bss_priv_size;
 
-	if (WARN_ON(wiphy->signal_type == NL80211_BSS_SIGNAL_UNSPEC &&
+	if (WARN_ON(wiphy->signal_type == CFG80211_SIGNAL_TYPE_UNSPEC &&
 			(signal < 0 || signal > 100)))
 		return NULL;
 
@@ -571,7 +571,7 @@ cfg80211_inform_bss_frame(struct wiphy *wiphy,
 				      u.probe_resp.variable);
 	size_t privsz = wiphy->bss_priv_size;
 
-	if (WARN_ON(wiphy->signal_type == NL80211_BSS_SIGNAL_UNSPEC &&
+	if (WARN_ON(wiphy->signal_type == CFG80211_SIGNAL_TYPE_UNSPEC &&
 	            (signal < 0 || signal > 100)))
 		return NULL;
 
-- 
cgit v1.2.2


From 9fbc630c89fd210e15ffe84fd6e968a2d39000b0 Mon Sep 17 00:00:00 2001
From: Felix Fietkau <nbd@openwrt.org>
Date: Sat, 15 May 2010 15:46:17 +0200
Subject: cfg80211: fix crash in cfg80211_set_freq()

Since wdev can be NULL, check it before dereferencing it

Signed-off-by: Felix Fietkau <nbd@openwrt.org>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/wireless/chan.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/wireless/chan.c b/net/wireless/chan.c
index d92d088026bf..b01a6f6397d7 100644
--- a/net/wireless/chan.c
+++ b/net/wireless/chan.c
@@ -50,7 +50,7 @@ int cfg80211_set_freq(struct cfg80211_registered_device *rdev,
 	struct ieee80211_channel *chan;
 	int result;
 
-	if (wdev->iftype == NL80211_IFTYPE_MONITOR)
+	if (wdev && wdev->iftype == NL80211_IFTYPE_MONITOR)
 		wdev = NULL;
 
 	if (wdev) {
-- 
cgit v1.2.2


From 579d7534ca83235794b6d9ef3cd473ffc14e9d42 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes@sipsolutions.net>
Date: Tue, 18 May 2010 14:36:34 +0200
Subject: cfg80211: add missing braces

Specifying a valid channel type will get
goto out rather than continuing, due to
missing braces. This affects both remain
on channel and action frame TX commands.

Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/wireless/nl80211.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index aaa1aad566cd..db71150b8040 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -4443,9 +4443,10 @@ static int nl80211_remain_on_channel(struct sk_buff *skb,
 		if (channel_type != NL80211_CHAN_NO_HT &&
 		    channel_type != NL80211_CHAN_HT20 &&
 		    channel_type != NL80211_CHAN_HT40PLUS &&
-		    channel_type != NL80211_CHAN_HT40MINUS)
+		    channel_type != NL80211_CHAN_HT40MINUS) {
 			err = -EINVAL;
 			goto out;
+		}
 	}
 
 	freq = nla_get_u32(info->attrs[NL80211_ATTR_WIPHY_FREQ]);
@@ -4717,9 +4718,10 @@ static int nl80211_action(struct sk_buff *skb, struct genl_info *info)
 		if (channel_type != NL80211_CHAN_NO_HT &&
 		    channel_type != NL80211_CHAN_HT20 &&
 		    channel_type != NL80211_CHAN_HT40PLUS &&
-		    channel_type != NL80211_CHAN_HT40MINUS)
+		    channel_type != NL80211_CHAN_HT40MINUS) {
 			err = -EINVAL;
 			goto out;
+		}
 	}
 
 	freq = nla_get_u32(info->attrs[NL80211_ATTR_WIPHY_FREQ]);
-- 
cgit v1.2.2


From 35f3d14dbbc58447c61e38a162ea10add6b31dc7 Mon Sep 17 00:00:00 2001
From: Jens Axboe <jens.axboe@oracle.com>
Date: Thu, 20 May 2010 10:43:18 +0200
Subject: pipe: add support for shrinking and growing pipes

This patch adds F_GETPIPE_SZ and F_SETPIPE_SZ fcntl() actions for
growing and shrinking the size of a pipe and adjusts pipe.c and splice.c
(and relay and network splice) usage to work with these larger (or smaller)
pipes.

Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
---
 net/core/skbuff.c | 38 +++++++++++++++++++++-----------------
 1 file changed, 21 insertions(+), 17 deletions(-)

(limited to 'net')

diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 93c4e060c91e..931981774b1a 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -1417,12 +1417,13 @@ new_page:
 /*
  * Fill page/offset/length into spd, if it can hold more pages.
  */
-static inline int spd_fill_page(struct splice_pipe_desc *spd, struct page *page,
+static inline int spd_fill_page(struct splice_pipe_desc *spd,
+				struct pipe_inode_info *pipe, struct page *page,
 				unsigned int *len, unsigned int offset,
 				struct sk_buff *skb, int linear,
 				struct sock *sk)
 {
-	if (unlikely(spd->nr_pages == PIPE_BUFFERS))
+	if (unlikely(spd->nr_pages == pipe->buffers))
 		return 1;
 
 	if (linear) {
@@ -1458,7 +1459,8 @@ static inline int __splice_segment(struct page *page, unsigned int poff,
 				   unsigned int plen, unsigned int *off,
 				   unsigned int *len, struct sk_buff *skb,
 				   struct splice_pipe_desc *spd, int linear,
-				   struct sock *sk)
+				   struct sock *sk,
+				   struct pipe_inode_info *pipe)
 {
 	if (!*len)
 		return 1;
@@ -1481,7 +1483,7 @@ static inline int __splice_segment(struct page *page, unsigned int poff,
 		/* the linear region may spread across several pages  */
 		flen = min_t(unsigned int, flen, PAGE_SIZE - poff);
 
-		if (spd_fill_page(spd, page, &flen, poff, skb, linear, sk))
+		if (spd_fill_page(spd, pipe, page, &flen, poff, skb, linear, sk))
 			return 1;
 
 		__segment_seek(&page, &poff, &plen, flen);
@@ -1496,9 +1498,9 @@ static inline int __splice_segment(struct page *page, unsigned int poff,
  * Map linear and fragment data from the skb to spd. It reports failure if the
  * pipe is full or if we already spliced the requested length.
  */
-static int __skb_splice_bits(struct sk_buff *skb, unsigned int *offset,
-			     unsigned int *len, struct splice_pipe_desc *spd,
-			     struct sock *sk)
+static int __skb_splice_bits(struct sk_buff *skb, struct pipe_inode_info *pipe,
+			     unsigned int *offset, unsigned int *len,
+			     struct splice_pipe_desc *spd, struct sock *sk)
 {
 	int seg;
 
@@ -1508,7 +1510,7 @@ static int __skb_splice_bits(struct sk_buff *skb, unsigned int *offset,
 	if (__splice_segment(virt_to_page(skb->data),
 			     (unsigned long) skb->data & (PAGE_SIZE - 1),
 			     skb_headlen(skb),
-			     offset, len, skb, spd, 1, sk))
+			     offset, len, skb, spd, 1, sk, pipe))
 		return 1;
 
 	/*
@@ -1518,7 +1520,7 @@ static int __skb_splice_bits(struct sk_buff *skb, unsigned int *offset,
 		const skb_frag_t *f = &skb_shinfo(skb)->frags[seg];
 
 		if (__splice_segment(f->page, f->page_offset, f->size,
-				     offset, len, skb, spd, 0, sk))
+				     offset, len, skb, spd, 0, sk, pipe))
 			return 1;
 	}
 
@@ -1535,8 +1537,8 @@ int skb_splice_bits(struct sk_buff *skb, unsigned int offset,
 		    struct pipe_inode_info *pipe, unsigned int tlen,
 		    unsigned int flags)
 {
-	struct partial_page partial[PIPE_BUFFERS];
-	struct page *pages[PIPE_BUFFERS];
+	struct partial_page partial[PIPE_DEF_BUFFERS];
+	struct page *pages[PIPE_DEF_BUFFERS];
 	struct splice_pipe_desc spd = {
 		.pages = pages,
 		.partial = partial,
@@ -1546,12 +1548,16 @@ int skb_splice_bits(struct sk_buff *skb, unsigned int offset,
 	};
 	struct sk_buff *frag_iter;
 	struct sock *sk = skb->sk;
+	int ret = 0;
+
+	if (splice_grow_spd(pipe, &spd))
+		return -ENOMEM;
 
 	/*
 	 * __skb_splice_bits() only fails if the output has no room left,
 	 * so no point in going over the frag_list for the error case.
 	 */
-	if (__skb_splice_bits(skb, &offset, &tlen, &spd, sk))
+	if (__skb_splice_bits(skb, pipe, &offset, &tlen, &spd, sk))
 		goto done;
 	else if (!tlen)
 		goto done;
@@ -1562,14 +1568,12 @@ int skb_splice_bits(struct sk_buff *skb, unsigned int offset,
 	skb_walk_frags(skb, frag_iter) {
 		if (!tlen)
 			break;
-		if (__skb_splice_bits(frag_iter, &offset, &tlen, &spd, sk))
+		if (__skb_splice_bits(frag_iter, pipe, &offset, &tlen, &spd, sk))
 			break;
 	}
 
 done:
 	if (spd.nr_pages) {
-		int ret;
-
 		/*
 		 * Drop the socket lock, otherwise we have reverse
 		 * locking dependencies between sk_lock and i_mutex
@@ -1582,10 +1586,10 @@ done:
 		release_sock(sk);
 		ret = splice_to_pipe(pipe, &spd);
 		lock_sock(sk);
-		return ret;
 	}
 
-	return 0;
+	splice_shrink_spd(pipe, &spd);
+	return ret;
 }
 
 /**
-- 
cgit v1.2.2


From c56e4acf55c804cbeea0ddb696ef698c73d39826 Mon Sep 17 00:00:00 2001
From: Sripathi Kodi <sripathik@in.ibm.com>
Date: Thu, 25 Mar 2010 12:40:35 +0000
Subject: 9p: VFS switches for 9p2000.L: protocol and client changes

Prepare p9pdu_read/write functions to handle multiple protocols.

Signed-off-by: Sripathi Kodi <sripathik@in.ibm.com>
Signed-off-by: Eric Van Hensbergen <ericvh@gmail.com>
---
 net/9p/client.c   | 3 ++-
 net/9p/protocol.c | 6 ++++--
 2 files changed, 6 insertions(+), 3 deletions(-)

(limited to 'net')

diff --git a/net/9p/client.c b/net/9p/client.c
index 0aa79faa9850..e2d314674965 100644
--- a/net/9p/client.c
+++ b/net/9p/client.c
@@ -1321,7 +1321,8 @@ static int p9_client_statsize(struct p9_wstat *wst, int proto_version)
 	if (wst->muid)
 		ret += strlen(wst->muid);
 
-	if (proto_version == p9_proto_2000u) {
+	if ((proto_version == p9_proto_2000u) ||
+		(proto_version == p9_proto_2000L)) {
 		ret += 2+4+4+4;	/* extension[s] n_uid[4] n_gid[4] n_muid[4] */
 		if (wst->extension)
 			ret += strlen(wst->extension);
diff --git a/net/9p/protocol.c b/net/9p/protocol.c
index e7541d5b0118..77d3aab4036b 100644
--- a/net/9p/protocol.c
+++ b/net/9p/protocol.c
@@ -341,7 +341,8 @@ p9pdu_vreadf(struct p9_fcall *pdu, int proto_version, const char *fmt,
 			}
 			break;
 		case '?':
-			if (proto_version != p9_proto_2000u)
+			if ((proto_version != p9_proto_2000u) &&
+				(proto_version != p9_proto_2000L))
 				return 0;
 			break;
 		default:
@@ -488,7 +489,8 @@ p9pdu_vwritef(struct p9_fcall *pdu, int proto_version, const char *fmt,
 			}
 			break;
 		case '?':
-			if (proto_version != p9_proto_2000u)
+			if ((proto_version != p9_proto_2000u) &&
+				(proto_version != p9_proto_2000L))
 				return 0;
 			break;
 		default:
-- 
cgit v1.2.2


From bda8e7752063cdbdd1d308bc1705400a8cec1aeb Mon Sep 17 00:00:00 2001
From: Sripathi Kodi <sripathik@in.ibm.com>
Date: Thu, 25 Mar 2010 12:45:30 +0000
Subject: 9p: add 9P2000.L statfs operation

I made a V2 of this patch on top of my patches for VFS switches. The
change was adding v9fs_statfs pointer to v9fs_super_ops_dotl
instead of v9fs_super_ops.

statfs - get file system statistics

size[4] Tstatfs tag[2] fid[4]
size[4] Rstatfs tag[2] type[4] bsize[4] blocks[8] bfree[8] bavail[8]
                files[8] ffree[8] fsid[8] namelen[4]

The statfs message is used to request file system information returned
by the statfs(2) system call, which is used by df(1) to report file
system and disk space usage.

Signed-off-by: Jim Garlick <garlick@llnl.gov>
Signed-off-by: Sripathi Kodi <sripathik@in.ibm.com>
Signed-off-by: Eric Van Hensbergen <ericvh@gmail.com>
---
 net/9p/client.c | 39 +++++++++++++++++++++++++++++++++++++++
 1 file changed, 39 insertions(+)

(limited to 'net')

diff --git a/net/9p/client.c b/net/9p/client.c
index e2d314674965..430a1c4a7c6f 100644
--- a/net/9p/client.c
+++ b/net/9p/client.c
@@ -1365,3 +1365,42 @@ error:
 	return err;
 }
 EXPORT_SYMBOL(p9_client_wstat);
+
+int p9_client_statfs(struct p9_fid *fid, struct p9_rstatfs *sb)
+{
+	int err;
+	struct p9_req_t *req;
+	struct p9_client *clnt;
+
+	err = 0;
+	clnt = fid->clnt;
+
+	P9_DPRINTK(P9_DEBUG_9P, ">>> TSTATFS fid %d\n", fid->fid);
+
+	req = p9_client_rpc(clnt, P9_TSTATFS, "d", fid->fid);
+	if (IS_ERR(req)) {
+		err = PTR_ERR(req);
+		goto error;
+	}
+
+	err = p9pdu_readf(req->rc, clnt->proto_version, "ddqqqqqqd", &sb->type,
+		&sb->bsize, &sb->blocks, &sb->bfree, &sb->bavail,
+		&sb->files, &sb->ffree, &sb->fsid, &sb->namelen);
+	if (err) {
+		p9pdu_dump(1, req->rc);
+		p9_free_req(clnt, req);
+		goto error;
+	}
+
+	P9_DPRINTK(P9_DEBUG_9P, "<<< RSTATFS fid %d type 0x%lx bsize %ld "
+		"blocks %llu bfree %llu bavail %llu files %llu ffree %llu "
+		"fsid %llu namelen %ld\n",
+		fid->fid, (long unsigned int)sb->type, (long int)sb->bsize,
+		sb->blocks, sb->bfree, sb->bavail, sb->files,  sb->ffree,
+		sb->fsid, (long int)sb->namelen);
+
+	p9_free_req(clnt, req);
+error:
+	return err;
+}
+EXPORT_SYMBOL(p9_client_statfs);
-- 
cgit v1.2.2


From 4681dbdacb5cdc4d3273c3a97a1858d6e00a5fe7 Mon Sep 17 00:00:00 2001
From: Sripathi Kodi <sripathik@in.ibm.com>
Date: Thu, 25 Mar 2010 12:47:26 +0000
Subject: 9p: add 9P2000.L rename operation

I made a V2 of this patch on top of my patches for VFS switches.
All the changes were due to change in some offsets.

rename - change name of file or directory

size[4] Trename tag[2] fid[4] newdirfid[4] name[s]
size[4] Rrename tag[2]

The rename message is used to change the name of a file, possibly moving it
to a new directory.  The 9P wstat message can only rename a file within the
same directory.

Signed-off-by: Jim Garlick <garlick@llnl.gov>
Signed-off-by: Sripathi Kodi <sripathik@in.ibm.com>
Signed-off-by: Eric Van Hensbergen <ericvh@gmail.com>
---
 net/9p/client.c | 28 ++++++++++++++++++++++++++++
 1 file changed, 28 insertions(+)

(limited to 'net')

diff --git a/net/9p/client.c b/net/9p/client.c
index 430a1c4a7c6f..37c8da07a80b 100644
--- a/net/9p/client.c
+++ b/net/9p/client.c
@@ -1404,3 +1404,31 @@ error:
 	return err;
 }
 EXPORT_SYMBOL(p9_client_statfs);
+
+int p9_client_rename(struct p9_fid *fid, struct p9_fid *newdirfid, char *name)
+{
+	int err;
+	struct p9_req_t *req;
+	struct p9_client *clnt;
+
+	err = 0;
+	clnt = fid->clnt;
+
+	P9_DPRINTK(P9_DEBUG_9P, ">>> TRENAME fid %d newdirfid %d name %s\n",
+			fid->fid, newdirfid->fid, name);
+
+	req = p9_client_rpc(clnt, P9_TRENAME, "dds", fid->fid,
+			newdirfid->fid, name);
+	if (IS_ERR(req)) {
+		err = PTR_ERR(req);
+		goto error;
+	}
+
+	P9_DPRINTK(P9_DEBUG_9P, "<<< RRENAME fid %d\n", fid->fid);
+
+	p9_free_req(clnt, req);
+error:
+	return err;
+}
+EXPORT_SYMBOL(p9_client_rename);
+
-- 
cgit v1.2.2


From 674b604cdd389252d89a14133b6ebf80165d1d55 Mon Sep 17 00:00:00 2001
From: Frederic Weisbecker <fweisbec@gmail.com>
Date: Wed, 19 May 2010 15:08:17 +0200
Subject: sunrpc: Pushdown the bkl from ioctl

Pushdown the bkl to rpc_pipe_ioctl.

Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Cc: "J. Bruce Fields" <bfields@fieldses.org>
Cc: Neil Brown <neilb@suse.de>
Cc: Nfs <linux-nfs@vger.kernel.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: John Kacur <jkacur@redhat.com>
Cc: Arnd Bergmann <arnd@arndb.de>
---
 net/sunrpc/rpc_pipe.c | 18 +++++++++++++++---
 1 file changed, 15 insertions(+), 3 deletions(-)

(limited to 'net')

diff --git a/net/sunrpc/rpc_pipe.c b/net/sunrpc/rpc_pipe.c
index 20e30c6f8355..95ccbcf45d3e 100644
--- a/net/sunrpc/rpc_pipe.c
+++ b/net/sunrpc/rpc_pipe.c
@@ -27,6 +27,7 @@
 #include <linux/workqueue.h>
 #include <linux/sunrpc/rpc_pipe_fs.h>
 #include <linux/sunrpc/cache.h>
+#include <linux/smp_lock.h>
 
 static struct vfsmount *rpc_mount __read_mostly;
 static int rpc_mount_count;
@@ -309,8 +310,7 @@ rpc_pipe_poll(struct file *filp, struct poll_table_struct *wait)
 }
 
 static int
-rpc_pipe_ioctl(struct inode *ino, struct file *filp,
-		unsigned int cmd, unsigned long arg)
+rpc_pipe_ioctl_unlocked(struct file *filp, unsigned int cmd, unsigned long arg)
 {
 	struct rpc_inode *rpci = RPC_I(filp->f_path.dentry->d_inode);
 	int len;
@@ -331,13 +331,25 @@ rpc_pipe_ioctl(struct inode *ino, struct file *filp,
 	}
 }
 
+static long
+rpc_pipe_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
+{
+	long ret;
+
+	lock_kernel();
+	ret = rpc_pipe_ioctl_unlocked(filp, cmd, arg);
+	unlock_kernel();
+
+	return ret;
+}
+
 static const struct file_operations rpc_pipe_fops = {
 	.owner		= THIS_MODULE,
 	.llseek		= no_llseek,
 	.read		= rpc_pipe_read,
 	.write		= rpc_pipe_write,
 	.poll		= rpc_pipe_poll,
-	.ioctl		= rpc_pipe_ioctl,
+	.unlocked_ioctl	= rpc_pipe_ioctl,
 	.open		= rpc_pipe_open,
 	.release	= rpc_pipe_release,
 };
-- 
cgit v1.2.2


From 9918ff26b301e9a57f25fb12b44a46ad0c1e8f8f Mon Sep 17 00:00:00 2001
From: Frederic Weisbecker <fweisbec@gmail.com>
Date: Wed, 19 May 2010 15:08:17 +0200
Subject: sunrpc: Pushdown the bkl from sunrpc cache ioctl

Pushdown the bkl to cache_ioctl_pipefs.

Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Cc: "J. Bruce Fields" <bfields@fieldses.org>
Cc: Neil Brown <neilb@suse.de>
Cc: Nfs <linux-nfs@vger.kernel.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: John Kacur <jkacur@redhat.com>
Cc: Arnd Bergmann <arnd@arndb.de>
---
 net/sunrpc/cache.c | 13 ++++++++++---
 1 file changed, 10 insertions(+), 3 deletions(-)

(limited to 'net')

diff --git a/net/sunrpc/cache.c b/net/sunrpc/cache.c
index 39bddba53ba1..91b1adec0d67 100644
--- a/net/sunrpc/cache.c
+++ b/net/sunrpc/cache.c
@@ -33,6 +33,7 @@
 #include <linux/sunrpc/cache.h>
 #include <linux/sunrpc/stats.h>
 #include <linux/sunrpc/rpc_pipe_fs.h>
+#include <linux/smp_lock.h>
 
 #define	 RPCDBG_FACILITY RPCDBG_CACHE
 
@@ -1525,12 +1526,18 @@ static unsigned int cache_poll_pipefs(struct file *filp, poll_table *wait)
 	return cache_poll(filp, wait, cd);
 }
 
-static int cache_ioctl_pipefs(struct inode *inode, struct file *filp,
+static long cache_ioctl_pipefs(struct file *filp,
 			      unsigned int cmd, unsigned long arg)
 {
+	struct inode *inode = filp->f_dentry->d_inode;
 	struct cache_detail *cd = RPC_I(inode)->private;
+	long ret;
 
-	return cache_ioctl(inode, filp, cmd, arg, cd);
+	lock_kernel();
+	ret = cache_ioctl(inode, filp, cmd, arg, cd);
+	unlock_kernel();
+
+	return ret;
 }
 
 static int cache_open_pipefs(struct inode *inode, struct file *filp)
@@ -1553,7 +1560,7 @@ const struct file_operations cache_file_operations_pipefs = {
 	.read		= cache_read_pipefs,
 	.write		= cache_write_pipefs,
 	.poll		= cache_poll_pipefs,
-	.ioctl		= cache_ioctl_pipefs, /* for FIONREAD */
+	.unlocked_ioctl	= cache_ioctl_pipefs, /* for FIONREAD */
 	.open		= cache_open_pipefs,
 	.release	= cache_release_pipefs,
 };
-- 
cgit v1.2.2


From a6c0f8217c17d46da22fa56923f3cbd03615cb7c Mon Sep 17 00:00:00 2001
From: Denis Kirjanov <dkirjanov@hera.kernel.org>
Date: Sun, 23 May 2010 05:45:45 +0000
Subject: ieee802154: Fix possible NULL pointer dereference in wpan_phy_alloc

Check for NULL pointer after kzalloc

Signed-off-by: Denis Kirjanov <dkirjanov@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ieee802154/wpan-class.c | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/ieee802154/wpan-class.c b/net/ieee802154/wpan-class.c
index 3d803a1b9fb6..1627ef2e8522 100644
--- a/net/ieee802154/wpan-class.c
+++ b/net/ieee802154/wpan-class.c
@@ -147,13 +147,15 @@ struct wpan_phy *wpan_phy_alloc(size_t priv_size)
 	struct wpan_phy *phy = kzalloc(sizeof(*phy) + priv_size,
 			GFP_KERNEL);
 
+	if (!phy)
+		goto out;
 	mutex_lock(&wpan_phy_mutex);
 	phy->idx = wpan_phy_idx++;
 	if (unlikely(!wpan_phy_idx_valid(phy->idx))) {
 		wpan_phy_idx--;
 		mutex_unlock(&wpan_phy_mutex);
 		kfree(phy);
-		return NULL;
+		goto out;
 	}
 	mutex_unlock(&wpan_phy_mutex);
 
@@ -168,6 +170,9 @@ struct wpan_phy *wpan_phy_alloc(size_t priv_size)
 	phy->current_page = 0; /* for compatibility */
 
 	return phy;
+
+out:
+	return NULL;
 }
 EXPORT_SYMBOL(wpan_phy_alloc);
 
-- 
cgit v1.2.2


From 53b0f08042f04813cd1a7473dacd3edfacb28eb3 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Sat, 22 May 2010 20:37:44 +0000
Subject: net_sched: Fix qdisc_notify()

Ben Pfaff reported a kernel oops and provided a test program to
reproduce it.

https://kerneltrap.org/mailarchive/linux-netdev/2010/5/21/6277805

tc_fill_qdisc() should not be called for builtin qdisc, or it
dereference a NULL pointer to get device ifindex.

Fix is to always use tc_qdisc_dump_ignore() before calling
tc_fill_qdisc().

Reported-by: Ben Pfaff <blp@nicira.com>
Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/sch_api.c | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

(limited to 'net')

diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c
index fe35c1f338c2..b9e8c3b7d406 100644
--- a/net/sched/sch_api.c
+++ b/net/sched/sch_api.c
@@ -1195,6 +1195,11 @@ nla_put_failure:
 	return -1;
 }
 
+static bool tc_qdisc_dump_ignore(struct Qdisc *q)
+{
+	return (q->flags & TCQ_F_BUILTIN) ? true : false;
+}
+
 static int qdisc_notify(struct net *net, struct sk_buff *oskb,
 			struct nlmsghdr *n, u32 clid,
 			struct Qdisc *old, struct Qdisc *new)
@@ -1206,11 +1211,11 @@ static int qdisc_notify(struct net *net, struct sk_buff *oskb,
 	if (!skb)
 		return -ENOBUFS;
 
-	if (old && old->handle) {
+	if (old && !tc_qdisc_dump_ignore(old)) {
 		if (tc_fill_qdisc(skb, old, clid, pid, n->nlmsg_seq, 0, RTM_DELQDISC) < 0)
 			goto err_out;
 	}
-	if (new) {
+	if (new && !tc_qdisc_dump_ignore(new)) {
 		if (tc_fill_qdisc(skb, new, clid, pid, n->nlmsg_seq, old ? NLM_F_REPLACE : 0, RTM_NEWQDISC) < 0)
 			goto err_out;
 	}
@@ -1223,11 +1228,6 @@ err_out:
 	return -EINVAL;
 }
 
-static bool tc_qdisc_dump_ignore(struct Qdisc *q)
-{
-	return (q->flags & TCQ_F_BUILTIN) ? true : false;
-}
-
 static int tc_dump_qdisc_root(struct Qdisc *root, struct sk_buff *skb,
 			      struct netlink_callback *cb,
 			      int *q_idx_p, int s_q_idx)
-- 
cgit v1.2.2


From eb1669aed950cb5f34622bcceba66bef5980e97a Mon Sep 17 00:00:00 2001
From: Mike Frysinger <vapier@gentoo.org>
Date: Fri, 21 May 2010 10:45:20 +0000
Subject: net-caif: drop redundant Kconfig entries

There is already a submenu entry that is always displayed, so there is
no need to also show a dedicated CAIF comment.

Drop dead commented code while we're here, and change the submenu text
to better match the style everyone else is using.

Signed-off-by: Mike Frysinger <vapier@gentoo.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/caif/Kconfig | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

(limited to 'net')

diff --git a/net/caif/Kconfig b/net/caif/Kconfig
index cd1daf6008bd..ed651786f16b 100644
--- a/net/caif/Kconfig
+++ b/net/caif/Kconfig
@@ -2,10 +2,8 @@
 # CAIF net configurations
 #
 
-#menu "CAIF Support"
-comment "CAIF Support"
 menuconfig CAIF
-	tristate "Enable CAIF support"
+	tristate "CAIF support"
 	select CRC_CCITT
 	default n
 	---help---
@@ -45,4 +43,3 @@ config CAIF_NETDEV
 	If unsure say Y.
 
 endif
-#endmenu
-- 
cgit v1.2.2


From 253683bbfb6bc5864417c8c35cb6ef13b5e259e6 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Fri, 21 May 2010 02:25:27 +0000
Subject: rtnetlink: Fix error handling in do_setlink()

Commit c02db8c6290bb992442fec1407643c94cc414375:

	Author:  Chris Wright <chrisw@sous-sol.org>
	Date:    Sun May 16 01:05:45 2010 -0700
	Subject: rtnetlink: make SR-IOV VF interface symmetric

adds broken error handling to do_setlink() in net/core/rtnetlink.c.  The
problem is the following chunk of code:

	if (tb[IFLA_VFINFO_LIST]) {
		struct nlattr *attr;
		int rem;
		nla_for_each_nested(attr, tb[IFLA_VFINFO_LIST], rem) {
			if (nla_type(attr) != IFLA_VF_INFO)
  ---->				goto errout;
			err = do_setvfinfo(dev, attr);
			if (err < 0)
				goto errout;
			modified = 1;
		}
	}

which can get to errout without setting err, resulting in the following error:

net/core/rtnetlink.c: In function 'do_setlink':
net/core/rtnetlink.c:904: warning: 'err' may be used uninitialized in this function

Change the code to return -EINVAL in this case.  Note that this might not be
the appropriate error though.

Signed-off-by: David Howells <dhowells@redhat.com>
cc: Chris Wright <chrisw@sous-sol.org>
cc: David S. Miller <davem@davemloft.net>
Acked-by: Chris Wright <chrisw@sous-sol.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/rtnetlink.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index e4b9870e4706..7ab86f3a1ea4 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -1199,8 +1199,10 @@ static int do_setlink(struct net_device *dev, struct ifinfomsg *ifm,
 		struct nlattr *attr;
 		int rem;
 		nla_for_each_nested(attr, tb[IFLA_VFINFO_LIST], rem) {
-			if (nla_type(attr) != IFLA_VF_INFO)
+			if (nla_type(attr) != IFLA_VF_INFO) {
+				err = -EINVAL;
 				goto errout;
+			}
 			err = do_setvfinfo(dev, attr);
 			if (err < 0)
 				goto errout;
-- 
cgit v1.2.2


From 8ce6cebc2f126f3ecf2d80746ea54245adf18057 Mon Sep 17 00:00:00 2001
From: Daniel Lezcano <daniel.lezcano@free.fr>
Date: Wed, 19 May 2010 10:12:19 +0000
Subject: net-2.6 : V2 - fix dev_get_valid_name

the commit:

commit d90310243fd750240755e217c5faa13e24f41536
Author: Octavian Purdila <opurdila@ixiacom.com>
Date:   Wed Nov 18 02:36:59 2009 +0000

    net: device name allocation cleanups

introduced a bug when there is a hash collision making impossible
to rename a device with eth%d. This bug is very hard to reproduce
and appears rarely.

The problem is coming from we don't pass a temporary buffer to
__dev_alloc_name but 'dev->name' which is modified by the function.

A detailed explanation is here:

http://marc.info/?l=linux-netdev&m=127417784011987&w=2

Changelog:
 V2 : replaced strings comparison by pointers comparison

Signed-off-by: Daniel Lezcano <daniel.lezcano@free.fr>
Reviewed-by: Octavian Purdila <opurdila@ixiacom.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/dev.c | 20 ++++++++++++--------
 1 file changed, 12 insertions(+), 8 deletions(-)

(limited to 'net')

diff --git a/net/core/dev.c b/net/core/dev.c
index 0aab66d68b19..07a48e2bf7db 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -954,18 +954,22 @@ int dev_alloc_name(struct net_device *dev, const char *name)
 }
 EXPORT_SYMBOL(dev_alloc_name);
 
-static int dev_get_valid_name(struct net *net, const char *name, char *buf,
-			      bool fmt)
+static int dev_get_valid_name(struct net_device *dev, const char *name, bool fmt)
 {
+	struct net *net;
+
+	BUG_ON(!dev_net(dev));
+	net = dev_net(dev);
+
 	if (!dev_valid_name(name))
 		return -EINVAL;
 
 	if (fmt && strchr(name, '%'))
-		return __dev_alloc_name(net, name, buf);
+		return dev_alloc_name(dev, name);
 	else if (__dev_get_by_name(net, name))
 		return -EEXIST;
-	else if (buf != name)
-		strlcpy(buf, name, IFNAMSIZ);
+	else if (dev->name != name)
+		strlcpy(dev->name, name, IFNAMSIZ);
 
 	return 0;
 }
@@ -997,7 +1001,7 @@ int dev_change_name(struct net_device *dev, const char *newname)
 
 	memcpy(oldname, dev->name, IFNAMSIZ);
 
-	err = dev_get_valid_name(net, newname, dev->name, 1);
+	err = dev_get_valid_name(dev, newname, 1);
 	if (err < 0)
 		return err;
 
@@ -4965,7 +4969,7 @@ int register_netdevice(struct net_device *dev)
 		}
 	}
 
-	ret = dev_get_valid_name(net, dev->name, dev->name, 0);
+	ret = dev_get_valid_name(dev, dev->name, 0);
 	if (ret)
 		goto err_uninit;
 
@@ -5574,7 +5578,7 @@ int dev_change_net_namespace(struct net_device *dev, struct net *net, const char
 		/* We get here if we can't use the current device name */
 		if (!pat)
 			goto out;
-		if (dev_get_valid_name(net, pat, dev->name, 1))
+		if (dev_get_valid_name(dev, pat, 1))
 			goto out;
 	}
 
-- 
cgit v1.2.2


From 9e4b816bc31962ebbb8784d602acd5fa25a08ad8 Mon Sep 17 00:00:00 2001
From: Sjur Braendeland <sjur.brandeland@stericsson.com>
Date: Fri, 21 May 2010 02:16:07 +0000
Subject: caif: Bugfix - wait_ev*_timeout returns long.

Discovered bug when testing on 64bit architecture.
Fixed by using long to store result from wait_event_interruptible_timeout.

Signed-off-by: Sjur Braendeland <sjur.brandeland@stericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/caif/caif_socket.c | 13 ++++++-------
 1 file changed, 6 insertions(+), 7 deletions(-)

(limited to 'net')

diff --git a/net/caif/caif_socket.c b/net/caif/caif_socket.c
index c3a70c5c893a..77e99568acda 100644
--- a/net/caif/caif_socket.c
+++ b/net/caif/caif_socket.c
@@ -920,17 +920,17 @@ wait_connect:
 	timeo = sock_sndtimeo(sk, flags & O_NONBLOCK);
 
 	release_sock(sk);
-	err = wait_event_interruptible_timeout(*sk_sleep(sk),
+	err = -ERESTARTSYS;
+	timeo = wait_event_interruptible_timeout(*sk_sleep(sk),
 			sk->sk_state != CAIF_CONNECTING,
 			timeo);
 	lock_sock(sk);
-	if (err < 0)
+	if (timeo < 0)
 		goto out; /* -ERESTARTSYS */
-	if (err == 0 && sk->sk_state != CAIF_CONNECTED) {
-		err = -ETIMEDOUT;
-		goto out;
-	}
 
+	err = -ETIMEDOUT;
+	if (timeo == 0 && sk->sk_state != CAIF_CONNECTED)
+		goto out;
 	if (sk->sk_state != CAIF_CONNECTED) {
 		sock->state = SS_UNCONNECTED;
 		err = sock_error(sk);
@@ -945,7 +945,6 @@ out:
 	return err;
 }
 
-
 /*
  * caif_release() - Disconnect a CAIF Socket
  * Copied and modified af_irda.c:irda_release().
-- 
cgit v1.2.2


From 7aecf4944f2c05aafb73b4820e469c74b4ec8517 Mon Sep 17 00:00:00 2001
From: Sjur Braendeland <sjur.brandeland@stericsson.com>
Date: Fri, 21 May 2010 02:16:08 +0000
Subject: caif: Bugfix - use standard Linux lists

Discovered bug when running high number of parallel connect requests.
Replace buggy home brewed list with linux/list.h.

Signed-off-by: Sjur Braendeland <sjur.brandeland@stericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/caif/cfctrl.c | 92 ++++++++++++++++---------------------------------------
 1 file changed, 26 insertions(+), 66 deletions(-)

(limited to 'net')

diff --git a/net/caif/cfctrl.c b/net/caif/cfctrl.c
index 0ffe1e1ce901..fcfda98a5e6d 100644
--- a/net/caif/cfctrl.c
+++ b/net/caif/cfctrl.c
@@ -44,13 +44,14 @@ struct cflayer *cfctrl_create(void)
 	dev_info.id = 0xff;
 	memset(this, 0, sizeof(*this));
 	cfsrvl_init(&this->serv, 0, &dev_info);
-	spin_lock_init(&this->info_list_lock);
 	atomic_set(&this->req_seq_no, 1);
 	atomic_set(&this->rsp_seq_no, 1);
 	this->serv.layer.receive = cfctrl_recv;
 	sprintf(this->serv.layer.name, "ctrl");
 	this->serv.layer.ctrlcmd = cfctrl_ctrlcmd;
 	spin_lock_init(&this->loop_linkid_lock);
+	spin_lock_init(&this->info_list_lock);
+	INIT_LIST_HEAD(&this->list);
 	this->loop_linkid = 1;
 	return &this->serv.layer;
 }
@@ -112,20 +113,10 @@ bool cfctrl_req_eq(struct cfctrl_request_info *r1,
 void cfctrl_insert_req(struct cfctrl *ctrl,
 			      struct cfctrl_request_info *req)
 {
-	struct cfctrl_request_info *p;
 	spin_lock(&ctrl->info_list_lock);
-	req->next = NULL;
 	atomic_inc(&ctrl->req_seq_no);
 	req->sequence_no = atomic_read(&ctrl->req_seq_no);
-	if (ctrl->first_req == NULL) {
-		ctrl->first_req = req;
-		spin_unlock(&ctrl->info_list_lock);
-		return;
-	}
-	p = ctrl->first_req;
-	while (p->next != NULL)
-		p = p->next;
-	p->next = req;
+	list_add_tail(&req->list, &ctrl->list);
 	spin_unlock(&ctrl->info_list_lock);
 }
 
@@ -133,46 +124,28 @@ void cfctrl_insert_req(struct cfctrl *ctrl,
 struct cfctrl_request_info *cfctrl_remove_req(struct cfctrl *ctrl,
 					      struct cfctrl_request_info *req)
 {
-	struct cfctrl_request_info *p;
-	struct cfctrl_request_info *ret;
+	struct cfctrl_request_info *p, *tmp, *first;
 
 	spin_lock(&ctrl->info_list_lock);
-	if (ctrl->first_req == NULL) {
-		spin_unlock(&ctrl->info_list_lock);
-		return NULL;
-	}
-
-	if (cfctrl_req_eq(req, ctrl->first_req)) {
-		ret = ctrl->first_req;
-		caif_assert(ctrl->first_req);
-		atomic_set(&ctrl->rsp_seq_no,
-				 ctrl->first_req->sequence_no);
-		ctrl->first_req = ctrl->first_req->next;
-		spin_unlock(&ctrl->info_list_lock);
-		return ret;
-	}
+	first = list_first_entry(&ctrl->list, struct cfctrl_request_info, list);
 
-	p = ctrl->first_req;
-
-	while (p->next != NULL) {
-		if (cfctrl_req_eq(req, p->next)) {
-			pr_warning("CAIF: %s(): Requests are not "
+	list_for_each_entry_safe(p, tmp, &ctrl->list, list) {
+		if (cfctrl_req_eq(req, p)) {
+			if (p != first)
+				pr_warning("CAIF: %s(): Requests are not "
 					"received in order\n",
 					__func__);
-			ret = p->next;
+
 			atomic_set(&ctrl->rsp_seq_no,
-					p->next->sequence_no);
-			p->next = p->next->next;
-			spin_unlock(&ctrl->info_list_lock);
-			return ret;
+					 p->sequence_no);
+			list_del(&p->list);
+			goto out;
 		}
-		p = p->next;
 	}
+	p = NULL;
+out:
 	spin_unlock(&ctrl->info_list_lock);
-
-	pr_warning("CAIF: %s(): Request does not match\n",
-		   __func__);
-	return NULL;
+	return p;
 }
 
 struct cfctrl_rsp *cfctrl_get_respfuncs(struct cflayer *layer)
@@ -388,31 +361,18 @@ void cfctrl_getstartreason_req(struct cflayer *layer)
 
 void cfctrl_cancel_req(struct cflayer *layr, struct cflayer *adap_layer)
 {
-	struct cfctrl_request_info *p, *req;
+	struct cfctrl_request_info *p, *tmp;
 	struct cfctrl *ctrl = container_obj(layr);
 	spin_lock(&ctrl->info_list_lock);
-
-	if (ctrl->first_req == NULL) {
-		spin_unlock(&ctrl->info_list_lock);
-		return;
-	}
-
-	if (ctrl->first_req->client_layer == adap_layer) {
-
-		req = ctrl->first_req;
-		ctrl->first_req = ctrl->first_req->next;
-		kfree(req);
-	}
-
-	p = ctrl->first_req;
-	while (p != NULL && p->next != NULL) {
-		if (p->next->client_layer == adap_layer) {
-
-			req = p->next;
-			p->next = p->next->next;
-			kfree(p->next);
+	pr_warning("CAIF: %s(): enter\n", __func__);
+
+	list_for_each_entry_safe(p, tmp, &ctrl->list, list) {
+		if (p->client_layer == adap_layer) {
+			pr_warning("CAIF: %s(): cancel req :%d\n", __func__,
+					p->sequence_no);
+			list_del(&p->list);
+			kfree(p);
 		}
-		p = p->next;
 	}
 
 	spin_unlock(&ctrl->info_list_lock);
@@ -634,7 +594,7 @@ static void cfctrl_ctrlcmd(struct cflayer *layr, enum caif_ctrlcmd ctrl,
 	case _CAIF_CTRLCMD_PHYIF_FLOW_OFF_IND:
 	case CAIF_CTRLCMD_FLOW_OFF_IND:
 		spin_lock(&this->info_list_lock);
-		if (this->first_req != NULL) {
+		if (!list_empty(&this->list)) {
 			pr_debug("CAIF: %s(): Received flow off in "
 				   "control layer", __func__);
 		}
-- 
cgit v1.2.2


From 638e628a600a5c542d46dfb06771cf9c229ef5f3 Mon Sep 17 00:00:00 2001
From: Sjur Braendeland <sjur.brandeland@stericsson.com>
Date: Fri, 21 May 2010 02:16:09 +0000
Subject: caif: Bugfix - handle mem-allocation failures

Discovered bugs when injecting slab allocation failures.
Add checks on all memory allocation.

Signed-off-by: Sjur Braendeland <sjur.brandeland@stericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/caif/cfpkt_skbuff.c | 25 +++++++++++++++++--------
 net/caif/cfserl.c       |  3 ++-
 net/caif/cfsrvl.c       |  6 ++++++
 3 files changed, 25 insertions(+), 9 deletions(-)

(limited to 'net')

diff --git a/net/caif/cfpkt_skbuff.c b/net/caif/cfpkt_skbuff.c
index 83fff2ff6658..a6fdf899741a 100644
--- a/net/caif/cfpkt_skbuff.c
+++ b/net/caif/cfpkt_skbuff.c
@@ -238,6 +238,7 @@ int cfpkt_add_head(struct cfpkt *pkt, const void *data2, u16 len)
 	struct sk_buff *lastskb;
 	u8 *to;
 	const u8 *data = data2;
+	int ret;
 	if (unlikely(is_erronous(pkt)))
 		return -EPROTO;
 	if (unlikely(skb_headroom(skb) < len)) {
@@ -246,9 +247,10 @@ int cfpkt_add_head(struct cfpkt *pkt, const void *data2, u16 len)
 	}
 
 	/* Make sure data is writable */
-	if (unlikely(skb_cow_data(skb, 0, &lastskb) < 0)) {
+	ret = skb_cow_data(skb, 0, &lastskb);
+	if (unlikely(ret < 0)) {
 		PKT_ERROR(pkt, "cfpkt_add_head: cow failed\n");
-		return -EPROTO;
+		return ret;
 	}
 
 	to = skb_push(skb, len);
@@ -316,6 +318,8 @@ EXPORT_SYMBOL(cfpkt_setlen);
 struct cfpkt *cfpkt_create_uplink(const unsigned char *data, unsigned int len)
 {
 	struct cfpkt *pkt = cfpkt_create_pfx(len + PKT_POSTFIX, PKT_PREFIX);
+	if (!pkt)
+		return NULL;
 	if (unlikely(data != NULL))
 		cfpkt_add_body(pkt, data, len);
 	return pkt;
@@ -344,12 +348,13 @@ struct cfpkt *cfpkt_append(struct cfpkt *dstpkt,
 
 	if (dst->tail + neededtailspace > dst->end) {
 		/* Create a dumplicate of 'dst' with more tail space */
+		struct cfpkt *tmppkt;
 		dstlen = skb_headlen(dst);
 		createlen = dstlen + neededtailspace;
-		tmp = pkt_to_skb(
-			cfpkt_create(createlen + PKT_PREFIX + PKT_POSTFIX));
-		if (!tmp)
+		tmppkt = cfpkt_create(createlen + PKT_PREFIX + PKT_POSTFIX);
+		if (tmppkt == NULL)
 			return NULL;
+		tmp = pkt_to_skb(tmppkt);
 		skb_set_tail_pointer(tmp, dstlen);
 		tmp->len = dstlen;
 		memcpy(tmp->data, dst->data, dstlen);
@@ -368,6 +373,7 @@ struct cfpkt *cfpkt_split(struct cfpkt *pkt, u16 pos)
 {
 	struct sk_buff *skb2;
 	struct sk_buff *skb = pkt_to_skb(pkt);
+	struct cfpkt *tmppkt;
 	u8 *split = skb->data + pos;
 	u16 len2nd = skb_tail_pointer(skb) - split;
 
@@ -381,9 +387,12 @@ struct cfpkt *cfpkt_split(struct cfpkt *pkt, u16 pos)
 	}
 
 	/* Create a new packet for the second part of the data */
-	skb2 = pkt_to_skb(
-		cfpkt_create_pfx(len2nd + PKT_PREFIX + PKT_POSTFIX,
-				 PKT_PREFIX));
+	tmppkt = cfpkt_create_pfx(len2nd + PKT_PREFIX + PKT_POSTFIX,
+				  PKT_PREFIX);
+	if (tmppkt == NULL)
+		return NULL;
+	skb2 = pkt_to_skb(tmppkt);
+
 
 	if (skb2 == NULL)
 		return NULL;
diff --git a/net/caif/cfserl.c b/net/caif/cfserl.c
index 06029ea2da2f..cb4325a3dc83 100644
--- a/net/caif/cfserl.c
+++ b/net/caif/cfserl.c
@@ -67,6 +67,8 @@ static int cfserl_receive(struct cflayer *l, struct cfpkt *newpkt)
 		layr->incomplete_frm =
 		    cfpkt_append(layr->incomplete_frm, newpkt, expectlen);
 		pkt = layr->incomplete_frm;
+		if (pkt == NULL)
+			return -ENOMEM;
 	} else {
 		pkt = newpkt;
 	}
@@ -154,7 +156,6 @@ static int cfserl_receive(struct cflayer *l, struct cfpkt *newpkt)
 			if (layr->usestx) {
 				if (tail_pkt != NULL)
 					pkt = cfpkt_append(pkt, tail_pkt, 0);
-
 				/* Start search for next STX if frame failed */
 				continue;
 			} else {
diff --git a/net/caif/cfsrvl.c b/net/caif/cfsrvl.c
index aff31f34528f..6e5b7079a684 100644
--- a/net/caif/cfsrvl.c
+++ b/net/caif/cfsrvl.c
@@ -123,6 +123,12 @@ static int cfservl_modemcmd(struct cflayer *layr, enum caif_modemcmd ctrl)
 			struct caif_payload_info *info;
 			u8 flow_off = SRVL_FLOW_OFF;
 			pkt = cfpkt_create(SRVL_CTRL_PKT_SIZE);
+			if (!pkt) {
+				pr_warning("CAIF: %s(): Out of memory\n",
+					__func__);
+				return -ENOMEM;
+			}
+
 			if (cfpkt_add_head(pkt, &flow_off, 1) < 0) {
 				pr_err("CAIF: %s(): Packet is erroneous!\n",
 					__func__);
-- 
cgit v1.2.2


From ca6a09f25cde1d2d86f8e821cf69f4f650c30dbf Mon Sep 17 00:00:00 2001
From: Sjur Braendeland <sjur.brandeland@stericsson.com>
Date: Fri, 21 May 2010 02:16:10 +0000
Subject: caif: Bugfix - Poll can't return POLLHUP while connecting.

Discovered bug when testing async connect.
While connecting poll should not return POLLHUP,
but POLLOUT when connected.
Also fixed the sysfs flow-control-counters.

Signed-off-by: Sjur Braendeland <sjur.brandeland@stericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/caif/caif_socket.c | 21 ++++++---------------
 1 file changed, 6 insertions(+), 15 deletions(-)

(limited to 'net')

diff --git a/net/caif/caif_socket.c b/net/caif/caif_socket.c
index 77e99568acda..732897d64f41 100644
--- a/net/caif/caif_socket.c
+++ b/net/caif/caif_socket.c
@@ -138,7 +138,7 @@ void caif_flow_ctrl(struct sock *sk, int mode)
 {
 	struct caifsock *cf_sk;
 	cf_sk = container_of(sk, struct caifsock, sk);
-	if (cf_sk->layer.dn)
+	if (cf_sk->layer.dn && cf_sk->layer.dn->modemcmd)
 		cf_sk->layer.dn->modemcmd(cf_sk->layer.dn, mode);
 }
 
@@ -162,9 +162,8 @@ int caif_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
 			atomic_read(&cf_sk->sk.sk_rmem_alloc),
 			sk_rcvbuf_lowwater(cf_sk));
 		set_rx_flow_off(cf_sk);
-		if (cf_sk->layer.dn)
-			cf_sk->layer.dn->modemcmd(cf_sk->layer.dn,
-						CAIF_MODEMCMD_FLOW_OFF_REQ);
+		dbfs_atomic_inc(&cnt.num_rx_flow_off);
+		caif_flow_ctrl(sk, CAIF_MODEMCMD_FLOW_OFF_REQ);
 	}
 
 	err = sk_filter(sk, skb);
@@ -175,9 +174,8 @@ int caif_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
 		trace_printk("CAIF: %s():"
 			" sending flow OFF due to rmem_schedule\n",
 			__func__);
-		if (cf_sk->layer.dn)
-			cf_sk->layer.dn->modemcmd(cf_sk->layer.dn,
-						CAIF_MODEMCMD_FLOW_OFF_REQ);
+		dbfs_atomic_inc(&cnt.num_rx_flow_off);
+		caif_flow_ctrl(sk, CAIF_MODEMCMD_FLOW_OFF_REQ);
 	}
 	skb->dev = NULL;
 	skb_set_owner_r(skb, sk);
@@ -285,16 +283,13 @@ static void caif_check_flow_release(struct sock *sk)
 {
 	struct caifsock *cf_sk = container_of(sk, struct caifsock, sk);
 
-	if (cf_sk->layer.dn == NULL || cf_sk->layer.dn->modemcmd == NULL)
-		return;
 	if (rx_flow_is_on(cf_sk))
 		return;
 
 	if (atomic_read(&sk->sk_rmem_alloc) <= sk_rcvbuf_lowwater(cf_sk)) {
 			dbfs_atomic_inc(&cnt.num_rx_flow_on);
 			set_rx_flow_on(cf_sk);
-			cf_sk->layer.dn->modemcmd(cf_sk->layer.dn,
-						CAIF_MODEMCMD_FLOW_ON_REQ);
+			caif_flow_ctrl(sk, CAIF_MODEMCMD_FLOW_ON_REQ);
 	}
 }
 /*
@@ -1018,10 +1013,6 @@ static unsigned int caif_poll(struct file *file,
 		(sk->sk_shutdown & RCV_SHUTDOWN))
 		mask |= POLLIN | POLLRDNORM;
 
-	/* Connection-based need to check for termination and startup */
-	if (sk->sk_state == CAIF_DISCONNECTED)
-		mask |= POLLHUP;
-
 	/*
 	 * we set writable also when the other side has shut down the
 	 * connection. This prevents stuck sockets.
-- 
cgit v1.2.2


From a9a8f1070d8733b37418b3a2d58df4e771b61f88 Mon Sep 17 00:00:00 2001
From: Sjur Braendeland <sjur.brandeland@stericsson.com>
Date: Fri, 21 May 2010 02:16:11 +0000
Subject: caif: Bugfix - missing spin_unlock

Splint found missing spin_unlock.
Corrected this an some other trivial split warnings.

Signed-off-by: Sjur Braendeland <sjur.brandeland@stericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/caif/caif_socket.c | 10 +++++-----
 net/caif/cfmuxl.c      |  3 ++-
 2 files changed, 7 insertions(+), 6 deletions(-)

(limited to 'net')

diff --git a/net/caif/caif_socket.c b/net/caif/caif_socket.c
index 732897d64f41..691a5710974e 100644
--- a/net/caif/caif_socket.c
+++ b/net/caif/caif_socket.c
@@ -60,7 +60,7 @@ struct debug_fs_counter {
 	atomic_t num_rx_flow_off;
 	atomic_t num_rx_flow_on;
 };
-struct debug_fs_counter cnt;
+static struct debug_fs_counter cnt;
 #define	dbfs_atomic_inc(v) atomic_inc(v)
 #define	dbfs_atomic_dec(v) atomic_dec(v)
 #else
@@ -128,13 +128,13 @@ static void caif_read_unlock(struct sock *sk)
 	mutex_unlock(&cf_sk->readlock);
 }
 
-int sk_rcvbuf_lowwater(struct caifsock *cf_sk)
+static int sk_rcvbuf_lowwater(struct caifsock *cf_sk)
 {
 	/* A quarter of full buffer is used a low water mark */
 	return cf_sk->sk.sk_rcvbuf / 4;
 }
 
-void caif_flow_ctrl(struct sock *sk, int mode)
+static void caif_flow_ctrl(struct sock *sk, int mode)
 {
 	struct caifsock *cf_sk;
 	cf_sk = container_of(sk, struct caifsock, sk);
@@ -146,7 +146,7 @@ void caif_flow_ctrl(struct sock *sk, int mode)
  * Copied from sock.c:sock_queue_rcv_skb(), but changed so packets are
  * not dropped, but CAIF is sending flow off instead.
  */
-int caif_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
+static int caif_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
 {
 	int err;
 	int skb_len;
@@ -1184,7 +1184,7 @@ static struct net_proto_family caif_family_ops = {
 	.owner = THIS_MODULE,
 };
 
-int af_caif_init(void)
+static int af_caif_init(void)
 {
 	int err = sock_register(&caif_family_ops);
 	if (!err)
diff --git a/net/caif/cfmuxl.c b/net/caif/cfmuxl.c
index 7372f27f1d32..80c8d332b258 100644
--- a/net/caif/cfmuxl.c
+++ b/net/caif/cfmuxl.c
@@ -174,10 +174,11 @@ struct cflayer *cfmuxl_remove_uplayer(struct cflayer *layr, u8 id)
 	spin_lock(&muxl->receive_lock);
 	up = get_up(muxl, id);
 	if (up == NULL)
-		return NULL;
+		goto out;
 	memset(muxl->up_cache, 0, sizeof(muxl->up_cache));
 	list_del(&up->node);
 	cfsrvl_put(up);
+out:
 	spin_unlock(&muxl->receive_lock);
 	return up;
 }
-- 
cgit v1.2.2


From dcda138d2f27e32bd0d6250cc42839b0d70bb4b8 Mon Sep 17 00:00:00 2001
From: Sjur Braendeland <sjur.brandeland@stericsson.com>
Date: Fri, 21 May 2010 02:16:12 +0000
Subject: caif: Bugfix - use MSG_TRUNC in receive

Fixed handling when skb don't fit in user buffer,
instead of returning -EMSGSIZE, the buffer is truncated (just
as unix seqpakcet does).

Signed-off-by: Sjur Braendeland <sjur.brandeland@stericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/caif/caif_socket.c | 47 ++++++++++++++++++-----------------------------
 1 file changed, 18 insertions(+), 29 deletions(-)

(limited to 'net')

diff --git a/net/caif/caif_socket.c b/net/caif/caif_socket.c
index 691a5710974e..3d0e09584fae 100644
--- a/net/caif/caif_socket.c
+++ b/net/caif/caif_socket.c
@@ -292,53 +292,42 @@ static void caif_check_flow_release(struct sock *sk)
 			caif_flow_ctrl(sk, CAIF_MODEMCMD_FLOW_ON_REQ);
 	}
 }
+
 /*
- * Copied from sock.c:sock_queue_rcv_skb(), and added check that user buffer
- * has sufficient size.
+ * Copied from unix_dgram_recvmsg, but removed credit checks,
+ * changed locking, address handling and added MSG_TRUNC.
  */
-
 static int caif_seqpkt_recvmsg(struct kiocb *iocb, struct socket *sock,
-				struct msghdr *m, size_t buf_len, int flags)
+				struct msghdr *m, size_t len, int flags)
 
 {
 	struct sock *sk = sock->sk;
 	struct sk_buff *skb;
-	int ret = 0;
-	int len;
+	int ret;
+	int copylen;
 
-	if (unlikely(!buf_len))
-		return -EINVAL;
+	ret = -EOPNOTSUPP;
+	if (m->msg_flags&MSG_OOB)
+		goto read_error;
 
 	skb = skb_recv_datagram(sk, flags, 0 , &ret);
 	if (!skb)
 		goto read_error;
-
-	len = skb->len;
-
-	if (skb && skb->len > buf_len && !(flags & MSG_PEEK)) {
-		len = buf_len;
-		/*
-		 * Push skb back on receive queue if buffer too small.
-		 * This has a built-in race where multi-threaded receive
-		 * may get packet in wrong order, but multiple read does
-		 * not really guarantee ordered delivery anyway.
-		 * Let's optimize for speed without taking locks.
-		 */
-
-		skb_queue_head(&sk->sk_receive_queue, skb);
-		ret = -EMSGSIZE;
-		goto read_error;
+	copylen = skb->len;
+	if (len < copylen) {
+		m->msg_flags |= MSG_TRUNC;
+		copylen = len;
 	}
 
-	ret = skb_copy_datagram_iovec(skb, 0, m->msg_iov, len);
+	ret = skb_copy_datagram_iovec(skb, 0, m->msg_iov, copylen);
 	if (ret)
-		goto read_error;
+		goto out_free;
 
+	ret = (flags & MSG_TRUNC) ? skb->len : copylen;
+out_free:
 	skb_free_datagram(sk, skb);
-
 	caif_check_flow_release(sk);
-
-	return len;
+	return ret;
 
 read_error:
 	return ret;
-- 
cgit v1.2.2


From f845172531fb7410c7fb7780b1a6e51ee6df7d52 Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Mon, 24 May 2010 00:12:34 -0700
Subject: cls_cgroup: Store classid in struct sock

Up until now cls_cgroup has relied on fetching the classid out of
the current executing thread.  This runs into trouble when a packet
processing is delayed in which case it may execute out of another
thread's context.

Furthermore, even when a packet is not delayed we may fail to
classify it if soft IRQs have been disabled, because this scenario
is indistinguishable from one where a packet unrelated to the
current thread is processed by a real soft IRQ.

In fact, the current semantics is inherently broken, as a single
skb may be constructed out of the writes of two different tasks.
A different manifestation of this problem is when the TCP stack
transmits in response of an incoming ACK.  This is currently
unclassified.

As we already have a concept of packet ownership for accounting
purposes in the skb->sk pointer, this is a natural place to store
the classid in a persistent manner.

This patch adds the cls_cgroup classid in struct sock, filling up
an existing hole on 64-bit :)

The value is set at socket creation time.  So all sockets created
via socket(2) automatically gains the ID of the thread creating it.
Whenever another process touches the socket by either reading or
writing to it, we will change the socket classid to that of the
process if it has a valid (non-zero) classid.

For sockets created on inbound connections through accept(2), we
inherit the classid of the original listening socket through
sk_clone, possibly preceding the actual accept(2) call.

In order to minimise risks, I have not made this the authoritative
classid.  For now it is only used as a backup when we execute
with soft IRQs disabled.  Once we're completely happy with its
semantics we can use it as the sole classid.

Footnote: I have rearranged the error path on cls_group module
creation.  If we didn't do this, then there is a window where
someone could create a tc rule using cls_group before the cgroup
subsystem has been registered.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/sock.c        | 18 ++++++++++++++++++
 net/sched/cls_cgroup.c | 50 ++++++++++++++++++++++++++++++++++----------------
 net/socket.c           |  9 +++++++++
 3 files changed, 61 insertions(+), 16 deletions(-)

(limited to 'net')

diff --git a/net/core/sock.c b/net/core/sock.c
index bf88a167c8f2..a05ae7f9771e 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -123,6 +123,7 @@
 #include <linux/net_tstamp.h>
 #include <net/xfrm.h>
 #include <linux/ipsec.h>
+#include <net/cls_cgroup.h>
 
 #include <linux/filter.h>
 
@@ -217,6 +218,11 @@ __u32 sysctl_rmem_default __read_mostly = SK_RMEM_MAX;
 int sysctl_optmem_max __read_mostly = sizeof(unsigned long)*(2*UIO_MAXIOV+512);
 EXPORT_SYMBOL(sysctl_optmem_max);
 
+#if defined(CONFIG_CGROUPS) && !defined(CONFIG_NET_CLS_CGROUP)
+int net_cls_subsys_id = -1;
+EXPORT_SYMBOL_GPL(net_cls_subsys_id);
+#endif
+
 static int sock_set_timeout(long *timeo_p, char __user *optval, int optlen)
 {
 	struct timeval tv;
@@ -1050,6 +1056,16 @@ static void sk_prot_free(struct proto *prot, struct sock *sk)
 	module_put(owner);
 }
 
+#ifdef CONFIG_CGROUPS
+void sock_update_classid(struct sock *sk)
+{
+	u32 classid = task_cls_classid(current);
+
+	if (classid && classid != sk->sk_classid)
+		sk->sk_classid = classid;
+}
+#endif
+
 /**
  *	sk_alloc - All socket objects are allocated here
  *	@net: the applicable net namespace
@@ -1073,6 +1089,8 @@ struct sock *sk_alloc(struct net *net, int family, gfp_t priority,
 		sock_lock_init(sk);
 		sock_net_set(sk, get_net(net));
 		atomic_set(&sk->sk_wmem_alloc, 1);
+
+		sock_update_classid(sk);
 	}
 
 	return sk;
diff --git a/net/sched/cls_cgroup.c b/net/sched/cls_cgroup.c
index 221180384fd7..78ef2c5e130b 100644
--- a/net/sched/cls_cgroup.c
+++ b/net/sched/cls_cgroup.c
@@ -16,14 +16,11 @@
 #include <linux/errno.h>
 #include <linux/skbuff.h>
 #include <linux/cgroup.h>
+#include <linux/rcupdate.h>
 #include <net/rtnetlink.h>
 #include <net/pkt_cls.h>
-
-struct cgroup_cls_state
-{
-	struct cgroup_subsys_state css;
-	u32 classid;
-};
+#include <net/sock.h>
+#include <net/cls_cgroup.h>
 
 static struct cgroup_subsys_state *cgrp_create(struct cgroup_subsys *ss,
 					       struct cgroup *cgrp);
@@ -112,6 +109,10 @@ static int cls_cgroup_classify(struct sk_buff *skb, struct tcf_proto *tp,
 	struct cls_cgroup_head *head = tp->root;
 	u32 classid;
 
+	rcu_read_lock();
+	classid = task_cls_state(current)->classid;
+	rcu_read_unlock();
+
 	/*
 	 * Due to the nature of the classifier it is required to ignore all
 	 * packets originating from softirq context as accessing `current'
@@ -122,12 +123,12 @@ static int cls_cgroup_classify(struct sk_buff *skb, struct tcf_proto *tp,
 	 * calls by looking at the number of nested bh disable calls because
 	 * softirqs always disables bh.
 	 */
-	if (softirq_count() != SOFTIRQ_OFFSET)
-		return -1;
-
-	rcu_read_lock();
-	classid = task_cls_state(current)->classid;
-	rcu_read_unlock();
+	if (softirq_count() != SOFTIRQ_OFFSET) {
+		/* If there is an sk_classid we'll use that. */
+		if (!skb->sk)
+			return -1;
+		classid = skb->sk->sk_classid;
+	}
 
 	if (!classid)
 		return -1;
@@ -289,18 +290,35 @@ static struct tcf_proto_ops cls_cgroup_ops __read_mostly = {
 
 static int __init init_cgroup_cls(void)
 {
-	int ret = register_tcf_proto_ops(&cls_cgroup_ops);
-	if (ret)
-		return ret;
+	int ret;
+
 	ret = cgroup_load_subsys(&net_cls_subsys);
 	if (ret)
-		unregister_tcf_proto_ops(&cls_cgroup_ops);
+		goto out;
+
+#ifndef CONFIG_NET_CLS_CGROUP
+	/* We can't use rcu_assign_pointer because this is an int. */
+	smp_wmb();
+	net_cls_subsys_id = net_cls_subsys.subsys_id;
+#endif
+
+	ret = register_tcf_proto_ops(&cls_cgroup_ops);
+	if (ret)
+		cgroup_unload_subsys(&net_cls_subsys);
+
+out:
 	return ret;
 }
 
 static void __exit exit_cgroup_cls(void)
 {
 	unregister_tcf_proto_ops(&cls_cgroup_ops);
+
+#ifndef CONFIG_NET_CLS_CGROUP
+	net_cls_subsys_id = -1;
+	synchronize_rcu();
+#endif
+
 	cgroup_unload_subsys(&net_cls_subsys);
 }
 
diff --git a/net/socket.c b/net/socket.c
index f9f7d0872cac..367d5477d00f 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -94,6 +94,7 @@
 
 #include <net/compat.h>
 #include <net/wext.h>
+#include <net/cls_cgroup.h>
 
 #include <net/sock.h>
 #include <linux/netfilter.h>
@@ -558,6 +559,8 @@ static inline int __sock_sendmsg(struct kiocb *iocb, struct socket *sock,
 	struct sock_iocb *si = kiocb_to_siocb(iocb);
 	int err;
 
+	sock_update_classid(sock->sk);
+
 	si->sock = sock;
 	si->scm = NULL;
 	si->msg = msg;
@@ -684,6 +687,8 @@ static inline int __sock_recvmsg_nosec(struct kiocb *iocb, struct socket *sock,
 {
 	struct sock_iocb *si = kiocb_to_siocb(iocb);
 
+	sock_update_classid(sock->sk);
+
 	si->sock = sock;
 	si->scm = NULL;
 	si->msg = msg;
@@ -777,6 +782,8 @@ static ssize_t sock_splice_read(struct file *file, loff_t *ppos,
 	if (unlikely(!sock->ops->splice_read))
 		return -EINVAL;
 
+	sock_update_classid(sock->sk);
+
 	return sock->ops->splice_read(sock, ppos, pipe, len, flags);
 }
 
@@ -3069,6 +3076,8 @@ int kernel_setsockopt(struct socket *sock, int level, int optname,
 int kernel_sendpage(struct socket *sock, struct page *page, int offset,
 		    size_t size, int flags)
 {
+	sock_update_classid(sock->sk);
+
 	if (sock->ops->sendpage)
 		return sock->ops->sendpage(sock, page, offset, size, flags);
 
-- 
cgit v1.2.2


From 8286274284e15b11b0f531b6ceeef21fbe00a8dd Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Mon, 24 May 2010 00:14:10 -0700
Subject: tun: Update classid on packet injection

This patch makes tun update its socket classid every time we
inject a packet into the network stack.  This is so that any
updates made by the admin to the process writing packets to
tun is effected.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/sock.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'net')

diff --git a/net/core/sock.c b/net/core/sock.c
index a05ae7f9771e..37fe9b6adade 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -1064,6 +1064,7 @@ void sock_update_classid(struct sock *sk)
 	if (classid && classid != sk->sk_classid)
 		sk->sk_classid = classid;
 }
+EXPORT_SYMBOL(sock_update_classid);
 #endif
 
 /**
-- 
cgit v1.2.2


From a69eee4988752c7196677958b4ed8f4c2b28499a Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Mon, 24 May 2010 07:45:43 -0700
Subject: Revert "ath9k: Group Key fix for VAPs"

This reverts commit 03ceedea972a82d343fa5c2528b3952fa9e615d5, since it
breaks resume from suspend-to-ram on Rafael's Acer Ferrari One.
NetworkManager thinks everything is ok, but it can't connect to the AP
to get an IP address after the resume.

In fact, it even breaks resume for non-ath9k chipsets: reverting it also
fixes Rafael's Toshiba Protege R500 with the iwlagn driver.  As Johannes
says:

  "Indeed, this patch needs to be reverted. That mac80211 change is wrong
   and completely unnecessary."

Reported-and-requested-by: Rafael J. Wysocki <rjw@sisk.pl>
Acked-by: Johannes Berg <johannes@sipsolutions.net>
Cc: Daniel Yingqiang Ma <yma.cool@gmail.com>
Cc: John W. Linville <linville@tuxdriver.com>
Cc: David Miller <davem@davemloft.net>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 net/mac80211/key.c | 1 -
 1 file changed, 1 deletion(-)

(limited to 'net')

diff --git a/net/mac80211/key.c b/net/mac80211/key.c
index 8d4b41787dcf..e8f6e3b252d8 100644
--- a/net/mac80211/key.c
+++ b/net/mac80211/key.c
@@ -140,7 +140,6 @@ static void ieee80211_key_enable_hw_accel(struct ieee80211_key *key)
 				     struct ieee80211_sub_if_data,
 				     u.ap);
 
-	key->conf.ap_addr = sdata->dev->dev_addr;
 	ret = drv_set_key(key->local, SET_KEY, sdata, sta, &key->conf);
 
 	if (!ret) {
-- 
cgit v1.2.2


From 3dc3fc52ea1537f5f37ab301d2b1468a0e79988f Mon Sep 17 00:00:00 2001
From: "John W. Linville" <linville@tuxdriver.com>
Date: Mon, 24 May 2010 13:36:37 -0400
Subject: Revert "ath9k: Group Key fix for VAPs"

This reverts commit 03ceedea972a82d343fa5c2528b3952fa9e615d5.

This patch was reported to cause a regression in which connectivity is
lost and cannot be reestablished after a suspend/resume cycle.

Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/key.c | 1 -
 1 file changed, 1 deletion(-)

(limited to 'net')

diff --git a/net/mac80211/key.c b/net/mac80211/key.c
index 8d4b41787dcf..e8f6e3b252d8 100644
--- a/net/mac80211/key.c
+++ b/net/mac80211/key.c
@@ -140,7 +140,6 @@ static void ieee80211_key_enable_hw_accel(struct ieee80211_key *key)
 				     struct ieee80211_sub_if_data,
 				     u.ap);
 
-	key->conf.ap_addr = sdata->dev->dev_addr;
 	ret = drv_set_key(key->local, SET_KEY, sdata, sta, &key->conf);
 
 	if (!ret) {
-- 
cgit v1.2.2


From a0c9101c05389e69a5382967667ca686a8d8fbd3 Mon Sep 17 00:00:00 2001
From: Randy Dunlap <randy.dunlap@oracle.com>
Date: Fri, 21 May 2010 11:28:41 -0700
Subject: wireless: fix sta_info.h kernel-doc warnings

Fix sta_info.h kernel-doc warnings:

Warning(net/mac80211/sta_info.h:164): No description found for parameter 'tid_active_rx[STA_TID_NUM]'
Warning(net/mac80211/sta_info.h:164): Excess struct/union/enum/typedef member 'tid_state_rx' description in 'sta_ampdu_mlme'

Signed-off-by: Randy Dunlap <randy.dunlap@oracle.com>
Acked-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/sta_info.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/mac80211/sta_info.h b/net/mac80211/sta_info.h
index 48a5e80957f0..df9d45544ca5 100644
--- a/net/mac80211/sta_info.h
+++ b/net/mac80211/sta_info.h
@@ -145,7 +145,7 @@ enum plink_state {
 /**
  * struct sta_ampdu_mlme - STA aggregation information.
  *
- * @tid_state_rx: TID's state in Rx session state machine.
+ * @tid_active_rx: TID's state in Rx session state machine.
  * @tid_rx: aggregation info for Rx per TID
  * @tid_state_tx: TID's state in Tx session state machine.
  * @tid_tx: aggregation info for Tx per TID
-- 
cgit v1.2.2


From d9b52dc6fd1fbb2bad645cbc86a60f984c1cb179 Mon Sep 17 00:00:00 2001
From: Yoichi Yuasa <yuasa@linux-mips.org>
Date: Mon, 24 May 2010 18:37:02 -0700
Subject: net/dccp: expansion of error code size

Because MIPS's EDQUOT value is 1133(0x46d).
It's larger than u8.

Signed-off-by: Yoichi Yuasa <yuasa@linux-mips.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/dccp/input.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'net')

diff --git a/net/dccp/input.c b/net/dccp/input.c
index 58f7bc156850..6beb6a7d6fba 100644
--- a/net/dccp/input.c
+++ b/net/dccp/input.c
@@ -124,9 +124,9 @@ static int dccp_rcv_closereq(struct sock *sk, struct sk_buff *skb)
 	return queued;
 }
 
-static u8 dccp_reset_code_convert(const u8 code)
+static u16 dccp_reset_code_convert(const u8 code)
 {
-	const u8 error_code[] = {
+	const u16 error_code[] = {
 	[DCCP_RESET_CODE_CLOSED]	     = 0,	/* normal termination */
 	[DCCP_RESET_CODE_UNSPECIFIED]	     = 0,	/* nothing known */
 	[DCCP_RESET_CODE_ABORTED]	     = ECONNRESET,
@@ -148,7 +148,7 @@ static u8 dccp_reset_code_convert(const u8 code)
 
 static void dccp_rcv_reset(struct sock *sk, struct sk_buff *skb)
 {
-	u8 err = dccp_reset_code_convert(dccp_hdr_reset(skb)->dccph_reset_code);
+	u16 err = dccp_reset_code_convert(dccp_hdr_reset(skb)->dccph_reset_code);
 
 	sk->sk_err = err;
 
-- 
cgit v1.2.2


From 4be929be34f9bdeffa40d815d32d7d60d2c7f03b Mon Sep 17 00:00:00 2001
From: Alexey Dobriyan <adobriyan@gmail.com>
Date: Mon, 24 May 2010 14:33:03 -0700
Subject: kernel-wide: replace USHORT_MAX, SHORT_MAX and SHORT_MIN with
 USHRT_MAX, SHRT_MAX and SHRT_MIN

- C99 knows about USHRT_MAX/SHRT_MAX/SHRT_MIN, not
  USHORT_MAX/SHORT_MAX/SHORT_MIN.

- Make SHRT_MIN of type s16, not int, for consistency.

[akpm@linux-foundation.org: fix drivers/dma/timb_dma.c]
[akpm@linux-foundation.org: fix security/keys/keyring.c]
Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Acked-by: WANG Cong <xiyou.wangcong@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 net/9p/protocol.c       | 2 +-
 net/dccp/options.c      | 2 +-
 net/ipv4/udp.c          | 8 ++++----
 net/mac80211/sta_info.c | 2 +-
 net/sunrpc/rpcb_clnt.c  | 2 +-
 5 files changed, 8 insertions(+), 8 deletions(-)

(limited to 'net')

diff --git a/net/9p/protocol.c b/net/9p/protocol.c
index 77d3aab4036b..149f82160130 100644
--- a/net/9p/protocol.c
+++ b/net/9p/protocol.c
@@ -394,7 +394,7 @@ p9pdu_vwritef(struct p9_fcall *pdu, int proto_version, const char *fmt,
 				const char *sptr = va_arg(ap, const char *);
 				int16_t len = 0;
 				if (sptr)
-					len = MIN(strlen(sptr), USHORT_MAX);
+					len = MIN(strlen(sptr), USHRT_MAX);
 
 				errcode = p9pdu_writef(pdu, proto_version,
 								"w", len);
diff --git a/net/dccp/options.c b/net/dccp/options.c
index 1b08cae9c65b..07395f861d35 100644
--- a/net/dccp/options.c
+++ b/net/dccp/options.c
@@ -296,7 +296,7 @@ static inline u8 dccp_ndp_len(const u64 ndp)
 {
 	if (likely(ndp <= 0xFF))
 		return 1;
-	return likely(ndp <= USHORT_MAX) ? 2 : (ndp <= UINT_MAX ? 4 : 6);
+	return likely(ndp <= USHRT_MAX) ? 2 : (ndp <= UINT_MAX ? 4 : 6);
 }
 
 int dccp_insert_option(struct sock *sk, struct sk_buff *skb,
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 9de6a698f91d..baeec29fe0f1 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -1686,8 +1686,8 @@ int udp_lib_setsockopt(struct sock *sk, int level, int optname,
 			return -ENOPROTOOPT;
 		if (val != 0 && val < 8) /* Illegal coverage: use default (8) */
 			val = 8;
-		else if (val > USHORT_MAX)
-			val = USHORT_MAX;
+		else if (val > USHRT_MAX)
+			val = USHRT_MAX;
 		up->pcslen = val;
 		up->pcflag |= UDPLITE_SEND_CC;
 		break;
@@ -1700,8 +1700,8 @@ int udp_lib_setsockopt(struct sock *sk, int level, int optname,
 			return -ENOPROTOOPT;
 		if (val != 0 && val < 8) /* Avoid silly minimal values.       */
 			val = 8;
-		else if (val > USHORT_MAX)
-			val = USHORT_MAX;
+		else if (val > USHRT_MAX)
+			val = USHRT_MAX;
 		up->pcrlen = val;
 		up->pcflag |= UDPLITE_RECV_CC;
 		break;
diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c
index 730197591ab5..ba9360a475b0 100644
--- a/net/mac80211/sta_info.c
+++ b/net/mac80211/sta_info.c
@@ -259,7 +259,7 @@ struct sta_info *sta_info_alloc(struct ieee80211_sub_if_data *sdata,
 	skb_queue_head_init(&sta->tx_filtered);
 
 	for (i = 0; i < NUM_RX_DATA_QUEUES; i++)
-		sta->last_seq_ctrl[i] = cpu_to_le16(USHORT_MAX);
+		sta->last_seq_ctrl[i] = cpu_to_le16(USHRT_MAX);
 
 #ifdef CONFIG_MAC80211_VERBOSE_DEBUG
 	printk(KERN_DEBUG "%s: Allocated STA %pM\n",
diff --git a/net/sunrpc/rpcb_clnt.c b/net/sunrpc/rpcb_clnt.c
index 121105355f60..dac219a56ae1 100644
--- a/net/sunrpc/rpcb_clnt.c
+++ b/net/sunrpc/rpcb_clnt.c
@@ -783,7 +783,7 @@ static int rpcb_dec_getport(struct rpc_rqst *req, __be32 *p,
 	port = ntohl(*p);
 	dprintk("RPC: %5u PMAP_%s result: %lu\n", task->tk_pid,
 			task->tk_msg.rpc_proc->p_name, port);
-	if (unlikely(port > USHORT_MAX))
+	if (unlikely(port > USHRT_MAX))
 		return -EIO;
 
 	rpcb->r_port = port;
-- 
cgit v1.2.2


From ef7ffe8f06895312aeb08a5f8a1d4c90e34335ea Mon Sep 17 00:00:00 2001
From: Alex Riesen <raa.lkml@gmail.com>
Date: Mon, 24 May 2010 14:33:05 -0700
Subject: sunrpc: use formatting of module name in SUNRPC

gcc-4.3.3 produces the warning:
  "format not a string literal and no format arguments"

Signed-off-by: Alex Riesen <raa.lkml@gmail.com>
Cc: Trond Myklebust <Trond.Myklebust@netapp.com>
Cc: Chuck Lever <cel@citi.umich.edu>
Cc: David S. Miller <davem@davemloft.net>
Acked-by: Tom Talpey <tmtalpey@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 net/sunrpc/xprt.c | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

(limited to 'net')

diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c
index 3fc325399ee4..dcd0132396ba 100644
--- a/net/sunrpc/xprt.c
+++ b/net/sunrpc/xprt.c
@@ -166,7 +166,6 @@ EXPORT_SYMBOL_GPL(xprt_unregister_transport);
 int xprt_load_transport(const char *transport_name)
 {
 	struct xprt_class *t;
-	char module_name[sizeof t->name + 5];
 	int result;
 
 	result = 0;
@@ -178,9 +177,7 @@ int xprt_load_transport(const char *transport_name)
 		}
 	}
 	spin_unlock(&xprt_list_lock);
-	strcpy(module_name, "xprt");
-	strncat(module_name, transport_name, sizeof t->name);
-	result = request_module(module_name);
+	result = request_module("xprt%s", transport_name);
 out:
 	return result;
 }
-- 
cgit v1.2.2


From e513480e28cdfd868755f05c1a654fcfcee58070 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?R=C3=A9mi=20Denis-Courmont?= <remi.denis-courmont@nokia.com>
Date: Tue, 25 May 2010 16:08:39 -0700
Subject: Phonet: fix potential use-after-free in pep_sock_close()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

sk_common_release() might destroy our last reference to the socket.
So an extra temporary reference is needed during cleanup.

Signed-off-by: Rémi Denis-Courmont <remi.denis-courmont@nokia.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/phonet/pep.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'net')

diff --git a/net/phonet/pep.c b/net/phonet/pep.c
index af4d38bc3b22..7b048a35ca58 100644
--- a/net/phonet/pep.c
+++ b/net/phonet/pep.c
@@ -626,6 +626,7 @@ static void pep_sock_close(struct sock *sk, long timeout)
 	struct pep_sock *pn = pep_sk(sk);
 	int ifindex = 0;
 
+	sock_hold(sk); /* keep a reference after sk_common_release() */
 	sk_common_release(sk);
 
 	lock_sock(sk);
@@ -644,6 +645,7 @@ static void pep_sock_close(struct sock *sk, long timeout)
 
 	if (ifindex)
 		gprs_detach(sk);
+	sock_put(sk);
 }
 
 static int pep_wait_connreq(struct sock *sk, int noblock)
-- 
cgit v1.2.2


From ed0f160ad674407adb3aba499444f71c83289c63 Mon Sep 17 00:00:00 2001
From: Dan Carpenter <error27@gmail.com>
Date: Wed, 26 May 2010 00:38:56 -0700
Subject: ipmr: off by one in __ipmr_fill_mroute()

This fixes a smatch warning:
	net/ipv4/ipmr.c +1917 __ipmr_fill_mroute(12) error: buffer overflow
	'(mrt)->vif_table' 32 <= 32

The ipv6 version had the same issue.

Signed-off-by: Dan Carpenter <error27@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/ipmr.c  | 2 +-
 net/ipv6/ip6mr.c | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
index 45889103b3e2..856123fe32f9 100644
--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c
@@ -1911,7 +1911,7 @@ static int __ipmr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb,
 	struct rtattr *mp_head;
 
 	/* If cache is unresolved, don't try to parse IIF and OIF */
-	if (c->mfc_parent > MAXVIFS)
+	if (c->mfc_parent >= MAXVIFS)
 		return -ENOENT;
 
 	if (VIF_EXISTS(mrt, c->mfc_parent))
diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c
index bd9e7d3e9c8e..073071f2b75b 100644
--- a/net/ipv6/ip6mr.c
+++ b/net/ipv6/ip6mr.c
@@ -2017,7 +2017,7 @@ static int __ip6mr_fill_mroute(struct mr6_table *mrt, struct sk_buff *skb,
 	struct rtattr *mp_head;
 
 	/* If cache is unresolved, don't try to parse IIF and OIF */
-	if (c->mf6c_parent > MAXMIFS)
+	if (c->mf6c_parent >= MAXMIFS)
 		return -ENOENT;
 
 	if (MIF_EXISTS(mrt, c->mf6c_parent))
-- 
cgit v1.2.2


From 0a68b0bed08eeb7ec62e0125f17856b1ccb1ea4b Mon Sep 17 00:00:00 2001
From: "J. Bruce Fields" <bfields@citi.umich.edu>
Date: Wed, 26 May 2010 08:42:24 -0400
Subject: sunrpc: fix leak on error on socket xprt setup

Also collect exit code together while we're at it.

Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
Cc: Trond Myklebust <Trond.Myklebust@netapp.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 net/sunrpc/xprtsock.c | 29 ++++++++++++++++++-----------
 1 file changed, 18 insertions(+), 11 deletions(-)

(limited to 'net')

diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
index b7cd8cccbe72..2a9675136c68 100644
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c
@@ -2293,6 +2293,7 @@ static struct rpc_xprt *xs_setup_udp(struct xprt_create *args)
 	struct sockaddr *addr = args->dstaddr;
 	struct rpc_xprt *xprt;
 	struct sock_xprt *transport;
+	struct rpc_xprt *ret;
 
 	xprt = xs_setup_xprt(args, xprt_udp_slot_table_entries);
 	if (IS_ERR(xprt))
@@ -2330,8 +2331,8 @@ static struct rpc_xprt *xs_setup_udp(struct xprt_create *args)
 		xs_format_peer_addresses(xprt, "udp", RPCBIND_NETID_UDP6);
 		break;
 	default:
-		kfree(xprt);
-		return ERR_PTR(-EAFNOSUPPORT);
+		ret = ERR_PTR(-EAFNOSUPPORT);
+		goto out_err;
 	}
 
 	if (xprt_bound(xprt))
@@ -2346,10 +2347,11 @@ static struct rpc_xprt *xs_setup_udp(struct xprt_create *args)
 
 	if (try_module_get(THIS_MODULE))
 		return xprt;
-
+	ret = ERR_PTR(-EINVAL);
+out_err:
 	kfree(xprt->slot);
 	kfree(xprt);
-	return ERR_PTR(-EINVAL);
+	return ret;
 }
 
 static const struct rpc_timeout xs_tcp_default_timeout = {
@@ -2368,6 +2370,7 @@ static struct rpc_xprt *xs_setup_tcp(struct xprt_create *args)
 	struct sockaddr *addr = args->dstaddr;
 	struct rpc_xprt *xprt;
 	struct sock_xprt *transport;
+	struct rpc_xprt *ret;
 
 	xprt = xs_setup_xprt(args, xprt_tcp_slot_table_entries);
 	if (IS_ERR(xprt))
@@ -2403,8 +2406,8 @@ static struct rpc_xprt *xs_setup_tcp(struct xprt_create *args)
 		xs_format_peer_addresses(xprt, "tcp", RPCBIND_NETID_TCP6);
 		break;
 	default:
-		kfree(xprt);
-		return ERR_PTR(-EAFNOSUPPORT);
+		ret = ERR_PTR(-EAFNOSUPPORT);
+		goto out_err;
 	}
 
 	if (xprt_bound(xprt))
@@ -2420,10 +2423,11 @@ static struct rpc_xprt *xs_setup_tcp(struct xprt_create *args)
 
 	if (try_module_get(THIS_MODULE))
 		return xprt;
-
+	ret = ERR_PTR(-EINVAL);
+out_err:
 	kfree(xprt->slot);
 	kfree(xprt);
-	return ERR_PTR(-EINVAL);
+	return ret;
 }
 
 /**
@@ -2437,6 +2441,7 @@ static struct rpc_xprt *xs_setup_bc_tcp(struct xprt_create *args)
 	struct rpc_xprt *xprt;
 	struct sock_xprt *transport;
 	struct svc_sock *bc_sock;
+	struct rpc_xprt *ret;
 
 	xprt = xs_setup_xprt(args, xprt_tcp_slot_table_entries);
 	if (IS_ERR(xprt))
@@ -2476,8 +2481,8 @@ static struct rpc_xprt *xs_setup_bc_tcp(struct xprt_create *args)
 				   RPCBIND_NETID_TCP6);
 		break;
 	default:
-		kfree(xprt);
-		return ERR_PTR(-EAFNOSUPPORT);
+		ret = ERR_PTR(-EAFNOSUPPORT);
+		goto out_err;
 	}
 
 	if (xprt_bound(xprt))
@@ -2499,9 +2504,11 @@ static struct rpc_xprt *xs_setup_bc_tcp(struct xprt_create *args)
 
 	if (try_module_get(THIS_MODULE))
 		return xprt;
+	ret = ERR_PTR(-EINVAL);
+out_err:
 	kfree(xprt->slot);
 	kfree(xprt);
-	return ERR_PTR(-EINVAL);
+	return ret;
 }
 
 static struct xprt_class	xs_udp_transport = {
-- 
cgit v1.2.2


From a56635a56f2afb3d22d9ce07e8f8d69537416b2d Mon Sep 17 00:00:00 2001
From: Julia Lawall <julia@diku.dk>
Date: Wed, 26 May 2010 05:56:48 +0000
Subject: net/iucv: Add missing spin_unlock

Add a spin_unlock missing on the error path.  There seems like no reason
why the lock should continue to be held if the kzalloc fail.

The semantic match that finds this problem is as follows:
(http://coccinelle.lip6.fr/)

// <smpl>
@@
expression E1;
@@

* spin_lock(E1,...);
  <+... when != E1
  if (...) {
    ... when != E1
*   return ...;
  }
  ...+>
* spin_unlock(E1,...);
// </smpl>

Signed-off-by: Julia Lawall <julia@diku.dk>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/iucv/af_iucv.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/iucv/af_iucv.c b/net/iucv/af_iucv.c
index c8b4599a752e..9637e45744fa 100644
--- a/net/iucv/af_iucv.c
+++ b/net/iucv/af_iucv.c
@@ -1619,7 +1619,7 @@ static void iucv_callback_rx(struct iucv_path *path, struct iucv_message *msg)
 save_message:
 	save_msg = kzalloc(sizeof(struct sock_msg_q), GFP_ATOMIC | GFP_DMA);
 	if (!save_msg)
-		return;
+		goto out_unlock;
 	save_msg->path = path;
 	save_msg->msg = *msg;
 
-- 
cgit v1.2.2


From 8a74ad60a546b13bd1096b2a61a7a5c6fd9ae17c Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Wed, 26 May 2010 19:20:18 +0000
Subject: net: fix lock_sock_bh/unlock_sock_bh

This new sock lock primitive was introduced to speedup some user context
socket manipulation. But it is unsafe to protect two threads, one using
regular lock_sock/release_sock, one using lock_sock_bh/unlock_sock_bh

This patch changes lock_sock_bh to be careful against 'owned' state.
If owned is found to be set, we must take the slow path.
lock_sock_bh() now returns a boolean to say if the slow path was taken,
and this boolean is used at unlock_sock_bh time to call the appropriate
unlock function.

After this change, BH are either disabled or enabled during the
lock_sock_bh/unlock_sock_bh protected section. This might be misleading,
so we rename these functions to lock_sock_fast()/unlock_sock_fast().

Reported-by: Anton Blanchard <anton@samba.org>
Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Tested-by: Anton Blanchard <anton@samba.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/datagram.c |  6 ++++--
 net/core/sock.c     | 33 +++++++++++++++++++++++++++++++++
 net/ipv4/udp.c      | 14 ++++++++------
 net/ipv6/udp.c      |  5 +++--
 4 files changed, 48 insertions(+), 10 deletions(-)

(limited to 'net')

diff --git a/net/core/datagram.c b/net/core/datagram.c
index e0097531417a..f5b6f43a4c2e 100644
--- a/net/core/datagram.c
+++ b/net/core/datagram.c
@@ -229,15 +229,17 @@ EXPORT_SYMBOL(skb_free_datagram);
 
 void skb_free_datagram_locked(struct sock *sk, struct sk_buff *skb)
 {
+	bool slow;
+
 	if (likely(atomic_read(&skb->users) == 1))
 		smp_rmb();
 	else if (likely(!atomic_dec_and_test(&skb->users)))
 		return;
 
-	lock_sock_bh(sk);
+	slow = lock_sock_fast(sk);
 	skb_orphan(skb);
 	sk_mem_reclaim_partial(sk);
-	unlock_sock_bh(sk);
+	unlock_sock_fast(sk, slow);
 
 	/* skb is now orphaned, can be freed outside of locked section */
 	__kfree_skb(skb);
diff --git a/net/core/sock.c b/net/core/sock.c
index 37fe9b6adade..2cf7f9f7e775 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -2007,6 +2007,39 @@ void release_sock(struct sock *sk)
 }
 EXPORT_SYMBOL(release_sock);
 
+/**
+ * lock_sock_fast - fast version of lock_sock
+ * @sk: socket
+ *
+ * This version should be used for very small section, where process wont block
+ * return false if fast path is taken
+ *   sk_lock.slock locked, owned = 0, BH disabled
+ * return true if slow path is taken
+ *   sk_lock.slock unlocked, owned = 1, BH enabled
+ */
+bool lock_sock_fast(struct sock *sk)
+{
+	might_sleep();
+	spin_lock_bh(&sk->sk_lock.slock);
+
+	if (!sk->sk_lock.owned)
+		/*
+		 * Note : We must disable BH
+		 */
+		return false;
+
+	__lock_sock(sk);
+	sk->sk_lock.owned = 1;
+	spin_unlock(&sk->sk_lock.slock);
+	/*
+	 * The sk_lock has mutex_lock() semantics here:
+	 */
+	mutex_acquire(&sk->sk_lock.dep_map, 0, 0, _RET_IP_);
+	local_bh_enable();
+	return true;
+}
+EXPORT_SYMBOL(lock_sock_fast);
+
 int sock_get_timestamp(struct sock *sk, struct timeval __user *userstamp)
 {
 	struct timeval tv;
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 9de6a698f91d..b9d0d409516f 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -1063,10 +1063,11 @@ static unsigned int first_packet_length(struct sock *sk)
 	spin_unlock_bh(&rcvq->lock);
 
 	if (!skb_queue_empty(&list_kill)) {
-		lock_sock_bh(sk);
+		bool slow = lock_sock_fast(sk);
+
 		__skb_queue_purge(&list_kill);
 		sk_mem_reclaim_partial(sk);
-		unlock_sock_bh(sk);
+		unlock_sock_fast(sk, slow);
 	}
 	return res;
 }
@@ -1123,6 +1124,7 @@ int udp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
 	int peeked;
 	int err;
 	int is_udplite = IS_UDPLITE(sk);
+	bool slow;
 
 	/*
 	 *	Check any passed addresses
@@ -1197,10 +1199,10 @@ out:
 	return err;
 
 csum_copy_err:
-	lock_sock_bh(sk);
+	slow = lock_sock_fast(sk);
 	if (!skb_kill_datagram(sk, skb, flags))
 		UDP_INC_STATS_USER(sock_net(sk), UDP_MIB_INERRORS, is_udplite);
-	unlock_sock_bh(sk);
+	unlock_sock_fast(sk, slow);
 
 	if (noblock)
 		return -EAGAIN;
@@ -1625,9 +1627,9 @@ int udp_rcv(struct sk_buff *skb)
 
 void udp_destroy_sock(struct sock *sk)
 {
-	lock_sock_bh(sk);
+	bool slow = lock_sock_fast(sk);
 	udp_flush_pending_frames(sk);
-	unlock_sock_bh(sk);
+	unlock_sock_fast(sk, slow);
 }
 
 /*
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index 3d7a2c0b836a..87be58673b55 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -328,6 +328,7 @@ int udpv6_recvmsg(struct kiocb *iocb, struct sock *sk,
 	int err;
 	int is_udplite = IS_UDPLITE(sk);
 	int is_udp4;
+	bool slow;
 
 	if (addr_len)
 		*addr_len=sizeof(struct sockaddr_in6);
@@ -424,7 +425,7 @@ out:
 	return err;
 
 csum_copy_err:
-	lock_sock_bh(sk);
+	slow = lock_sock_fast(sk);
 	if (!skb_kill_datagram(sk, skb, flags)) {
 		if (is_udp4)
 			UDP_INC_STATS_USER(sock_net(sk),
@@ -433,7 +434,7 @@ csum_copy_err:
 			UDP6_INC_STATS_USER(sock_net(sk),
 					UDP_MIB_INERRORS, is_udplite);
 	}
-	unlock_sock_bh(sk);
+	unlock_sock_fast(sk, slow);
 
 	if (flags & MSG_DONTWAIT)
 		return -EAGAIN;
-- 
cgit v1.2.2


From 92e99a98bb44ccce6c902ea077b49cafcfb80025 Mon Sep 17 00:00:00 2001
From: Akinobu Mita <akinobu.mita@gmail.com>
Date: Wed, 26 May 2010 14:43:33 -0700
Subject: iucv: convert cpu notifier to return encapsulate errno value

By the previous modification, the cpu notifier can return encapsulate
errno value.  This converts the cpu notifiers for iucv.

Signed-off-by: Akinobu Mita <akinobu.mita@gmail.com>
Cc: Ursula Braun <ursula.braun@de.ibm.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 net/iucv/iucv.c | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

(limited to 'net')

diff --git a/net/iucv/iucv.c b/net/iucv/iucv.c
index fd8b28361a64..f28ad2cc8428 100644
--- a/net/iucv/iucv.c
+++ b/net/iucv/iucv.c
@@ -632,13 +632,14 @@ static int __cpuinit iucv_cpu_notify(struct notifier_block *self,
 		iucv_irq_data[cpu] = kmalloc_node(sizeof(struct iucv_irq_data),
 					GFP_KERNEL|GFP_DMA, cpu_to_node(cpu));
 		if (!iucv_irq_data[cpu])
-			return NOTIFY_BAD;
+			return notifier_from_errno(-ENOMEM);
+
 		iucv_param[cpu] = kmalloc_node(sizeof(union iucv_param),
 				     GFP_KERNEL|GFP_DMA, cpu_to_node(cpu));
 		if (!iucv_param[cpu]) {
 			kfree(iucv_irq_data[cpu]);
 			iucv_irq_data[cpu] = NULL;
-			return NOTIFY_BAD;
+			return notifier_from_errno(-ENOMEM);
 		}
 		iucv_param_irq[cpu] = kmalloc_node(sizeof(union iucv_param),
 					GFP_KERNEL|GFP_DMA, cpu_to_node(cpu));
@@ -647,7 +648,7 @@ static int __cpuinit iucv_cpu_notify(struct notifier_block *self,
 			iucv_param[cpu] = NULL;
 			kfree(iucv_irq_data[cpu]);
 			iucv_irq_data[cpu] = NULL;
-			return NOTIFY_BAD;
+			return notifier_from_errno(-ENOMEM);
 		}
 		break;
 	case CPU_UP_CANCELED:
@@ -677,7 +678,7 @@ static int __cpuinit iucv_cpu_notify(struct notifier_block *self,
 		cpu_clear(cpu, cpumask);
 		if (cpus_empty(cpumask))
 			/* Can't offline last IUCV enabled cpu. */
-			return NOTIFY_BAD;
+			return notifier_from_errno(-EINVAL);
 		smp_call_function_single(cpu, iucv_retrieve_cpu, NULL, 1);
 		if (cpus_empty(iucv_irq_cpumask))
 			smp_call_function_single(first_cpu(iucv_buffer_cpumask),
-- 
cgit v1.2.2


From a47311380e094bb201be8a818370c73c3f52122c Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Thu, 27 May 2010 16:09:39 -0700
Subject: net: fix __neigh_event_send()

commit 7fee226ad23 (net: add a noref bit on skb dst) missed one spot
where an skb is enqueued, with a possibly not refcounted dst entry.

__neigh_event_send() inserts skb into arp_queue, so we must make sure
dst entry is refcounted, or dst entry can be freed by garbage collector
after caller exits from rcu protected section.

Reported-by: Ingo Molnar <mingo@elte.hu>
Tested-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/neighbour.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'net')

diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index bff37908bd55..6ba1c0eece03 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -934,6 +934,7 @@ int __neigh_event_send(struct neighbour *neigh, struct sk_buff *skb)
 				kfree_skb(buff);
 				NEIGH_CACHE_STAT_INC(neigh->tbl, unres_discards);
 			}
+			skb_dst_force(skb);
 			__skb_queue_tail(&neigh->arp_queue, skb);
 		}
 		rc = 1;
-- 
cgit v1.2.2


From 0aa68271510ae2b221d4b60892103837be63afe4 Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Thu, 27 May 2010 16:14:30 -0700
Subject: ipv6: Add GSO support on forwarding path

Currently we disallow GSO packets on the IPv6 forward path.
This patch fixes this.

Note that I discovered that our existing GSO MTU checks (e.g.,
IPv4 forwarding) are buggy in that they skip the check altogether,
when they really should be checking gso_size + header instead.

I have also been lazy here in that I haven't bothered to segment
the GSO packet by hand before generating an ICMP message.  Someone
should add that to be 100% correct.

Reported-by: Ralf Baechle <ralf@linux-mips.org>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/ip6_output.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index cd963f64e27c..89425af0684c 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -507,7 +507,7 @@ int ip6_forward(struct sk_buff *skb)
 	if (mtu < IPV6_MIN_MTU)
 		mtu = IPV6_MIN_MTU;
 
-	if (skb->len > mtu) {
+	if (skb->len > mtu && !skb_is_gso(skb)) {
 		/* Again, force OUTPUT device used as source address */
 		skb->dev = dst->dev;
 		icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
-- 
cgit v1.2.2


From 50636af715ac1ceb1872bd29a4bdcc68975c3263 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Fri, 28 May 2010 03:41:17 -0700
Subject: xt_tee: use skb_dst_drop()

After commit 7fee226a (net: add a noref bit on skb dst), its wrong to
use : dst_release(skb_dst(skb)), since we could decrement a refcount
while skb dst was not refcounted.

We should use skb_dst_drop(skb) instead.

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/netfilter/xt_TEE.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/netfilter/xt_TEE.c b/net/netfilter/xt_TEE.c
index d7920d9f49e9..859d9fd429c8 100644
--- a/net/netfilter/xt_TEE.c
+++ b/net/netfilter/xt_TEE.c
@@ -76,7 +76,7 @@ tee_tg_route4(struct sk_buff *skb, const struct xt_tee_tginfo *info)
 	if (ip_route_output_key(net, &rt, &fl) != 0)
 		return false;
 
-	dst_release(skb_dst(skb));
+	skb_dst_drop(skb);
 	skb_dst_set(skb, &rt->u.dst);
 	skb->dev      = rt->u.dst.dev;
 	skb->protocol = htons(ETH_P_IP);
@@ -157,7 +157,7 @@ tee_tg_route6(struct sk_buff *skb, const struct xt_tee_tginfo *info)
 	if (dst == NULL)
 		return false;
 
-	dst_release(skb_dst(skb));
+	skb_dst_drop(skb);
 	skb_dst_set(skb, dst);
 	skb->dev      = dst->dev;
 	skb->protocol = htons(ETH_P_IPV6);
-- 
cgit v1.2.2


From 8ca9418350eccd5dd2659931807c1901224dd638 Mon Sep 17 00:00:00 2001
From: Scott Feldman <scofeldm@cisco.com>
Date: Fri, 28 May 2010 03:42:18 -0700
Subject: netlink: bug fix: don't overrun skbs on vf_port dump

Noticed by Patrick McHardy: was continuing to fill skb after a
nla_put_failure, ignoring the size calculated by upper layer.  Now,
return -EMSGSIZE on any overruns, but also allow netdev to
fail ndo_get_vf_port with error other than -EMSGSIZE, thus unwinding
nest.

Signed-off-by: Scott Feldman <scofeldm@cisco.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/rtnetlink.c | 15 +++++++++------
 1 file changed, 9 insertions(+), 6 deletions(-)

(limited to 'net')

diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 7ab86f3a1ea4..7331bb2f6b9c 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -722,14 +722,13 @@ static int rtnl_vf_ports_fill(struct sk_buff *skb, struct net_device *dev)
 
 	for (vf = 0; vf < dev_num_vf(dev->dev.parent); vf++) {
 		vf_port = nla_nest_start(skb, IFLA_VF_PORT);
-		if (!vf_port) {
-			nla_nest_cancel(skb, vf_ports);
-			return -EMSGSIZE;
-		}
+		if (!vf_port)
+			goto nla_put_failure;
 		NLA_PUT_U32(skb, IFLA_PORT_VF, vf);
 		err = dev->netdev_ops->ndo_get_vf_port(dev, vf, skb);
+		if (err == -EMSGSIZE)
+			goto nla_put_failure;
 		if (err) {
-nla_put_failure:
 			nla_nest_cancel(skb, vf_port);
 			continue;
 		}
@@ -739,6 +738,10 @@ nla_put_failure:
 	nla_nest_end(skb, vf_ports);
 
 	return 0;
+
+nla_put_failure:
+	nla_nest_cancel(skb, vf_ports);
+	return -EMSGSIZE;
 }
 
 static int rtnl_port_self_fill(struct sk_buff *skb, struct net_device *dev)
@@ -753,7 +756,7 @@ static int rtnl_port_self_fill(struct sk_buff *skb, struct net_device *dev)
 	err = dev->netdev_ops->ndo_get_vf_port(dev, PORT_SELF_VF, skb);
 	if (err) {
 		nla_nest_cancel(skb, port_self);
-		return err;
+		return (err == -EMSGSIZE) ? err : 0;
 	}
 
 	nla_nest_end(skb, port_self);
-- 
cgit v1.2.2


From 045de01a174d9f0734f657eb4b3313d89b4fd5ad Mon Sep 17 00:00:00 2001
From: Scott Feldman <scofeldm@cisco.com>
Date: Fri, 28 May 2010 03:42:43 -0700
Subject: netlink: bug fix: wrong size was calculated for vfinfo list blob

The wrong size was being calculated for vfinfo.  In one case, it was over-
calculating using nlmsg_total_size on attrs, in another case, it was
under-calculating by assuming ifla_vf_* structs are packed together, but
each struct is it's own attr w/ hdr (and padding).

Signed-off-by: Scott Feldman <scofeldm@cisco.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/rtnetlink.c | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

(limited to 'net')

diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 7331bb2f6b9c..1a2af24e9e3d 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -650,11 +650,12 @@ static inline int rtnl_vfinfo_size(const struct net_device *dev)
 	if (dev->dev.parent && dev_is_pci(dev->dev.parent)) {
 
 		int num_vfs = dev_num_vf(dev->dev.parent);
-		size_t size = nlmsg_total_size(sizeof(struct nlattr));
-		size += nlmsg_total_size(num_vfs * sizeof(struct nlattr));
-		size += num_vfs * (sizeof(struct ifla_vf_mac) +
-				  sizeof(struct ifla_vf_vlan) +
-				  sizeof(struct ifla_vf_tx_rate));
+		size_t size = nla_total_size(sizeof(struct nlattr));
+		size += nla_total_size(num_vfs * sizeof(struct nlattr));
+		size += num_vfs *
+			(nla_total_size(sizeof(struct ifla_vf_mac)) +
+			 nla_total_size(sizeof(struct ifla_vf_vlan)) +
+			 nla_total_size(sizeof(struct ifla_vf_tx_rate)));
 		return size;
 	} else
 		return 0;
-- 
cgit v1.2.2


From 368a07d26ae99c80678a968946744fd83e7708d0 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes@sipsolutions.net>
Date: Fri, 28 May 2010 14:26:23 +0200
Subject: mac80211: make a function static

sparse correctly complains that
__ieee80211_get_channel_mode is not static.

Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/chan.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/mac80211/chan.c b/net/mac80211/chan.c
index 5d218c530a4e..32be11e4c4d9 100644
--- a/net/mac80211/chan.c
+++ b/net/mac80211/chan.c
@@ -5,7 +5,7 @@
 #include <linux/nl80211.h>
 #include "ieee80211_i.h"
 
-enum ieee80211_chan_mode
+static enum ieee80211_chan_mode
 __ieee80211_get_channel_mode(struct ieee80211_local *local,
 			     struct ieee80211_sub_if_data *ignore)
 {
-- 
cgit v1.2.2


From 6057fd78a8dcce6269f029b967051d5a2e9b0895 Mon Sep 17 00:00:00 2001
From: Brian Haley <brian.haley@hp.com>
Date: Fri, 28 May 2010 23:02:35 -0700
Subject: IPv6: fix Mobile IPv6 regression

Commit f4f914b5 (net: ipv6 bind to device issue) caused
a regression with Mobile IPv6 when it changed the meaning
of fl->oif to become a strict requirement of the route
lookup.  Instead, only force strict mode when
sk->sk_bound_dev_if is set on the calling socket, getting
the intended behavior and fixing the regression.

Tested-by: Arnaud Ebalard <arno@natisbad.org>
Signed-off-by: Brian Haley <brian.haley@hp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/route.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 294cbe8b0725..252d76199c41 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -814,7 +814,7 @@ struct dst_entry * ip6_route_output(struct net *net, struct sock *sk,
 {
 	int flags = 0;
 
-	if (fl->oif || rt6_need_strict(&fl->fl6_dst))
+	if ((sk && sk->sk_bound_dev_if) || rt6_need_strict(&fl->fl6_dst))
 		flags |= RT6_LOOKUP_F_IFACE;
 
 	if (!ipv6_addr_any(&fl->fl6_src))
-- 
cgit v1.2.2


From 5b0daa3474d52bed906c4d5e92b44e10148c6972 Mon Sep 17 00:00:00 2001
From: Changli Gao <xiaosuo@gmail.com>
Date: Sat, 29 May 2010 00:12:13 -0700
Subject: skb: make skb_recycle_check() return a bool value

Signed-off-by: Changli Gao <xiaosuo@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/skbuff.c | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

(limited to 'net')

diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 4c11000a96aa..4667d4d7c162 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -482,22 +482,22 @@ EXPORT_SYMBOL(consume_skb);
  *	reference count dropping and cleans up the skbuff as if it
  *	just came from __alloc_skb().
  */
-int skb_recycle_check(struct sk_buff *skb, int skb_size)
+bool skb_recycle_check(struct sk_buff *skb, int skb_size)
 {
 	struct skb_shared_info *shinfo;
 
 	if (irqs_disabled())
-		return 0;
+		return false;
 
 	if (skb_is_nonlinear(skb) || skb->fclone != SKB_FCLONE_UNAVAILABLE)
-		return 0;
+		return false;
 
 	skb_size = SKB_DATA_ALIGN(skb_size + NET_SKB_PAD);
 	if (skb_end_pointer(skb) - skb->head < skb_size)
-		return 0;
+		return false;
 
 	if (skb_shared(skb) || skb_cloned(skb))
-		return 0;
+		return false;
 
 	skb_release_head_state(skb);
 
@@ -509,7 +509,7 @@ int skb_recycle_check(struct sk_buff *skb, int skb_size)
 	skb->data = skb->head + NET_SKB_PAD;
 	skb_reset_tail_pointer(skb);
 
-	return 1;
+	return true;
 }
 EXPORT_SYMBOL(skb_recycle_check);
 
-- 
cgit v1.2.2


From 5daf47bb4e708fde32c1856a0d049e3c3d03c36c Mon Sep 17 00:00:00 2001
From: Julia Lawall <julia@diku.dk>
Date: Wed, 26 May 2010 05:54:21 +0000
Subject: net/rds: Add missing mutex_unlock

Add a mutex_unlock missing on the error path.  In each case, whenever the
label out is reached from elsewhere in the function, mutex is not locked.

The semantic match that finds this problem is as follows:
(http://coccinelle.lip6.fr/)

// <smpl>
@@
expression E1;
@@

* mutex_lock(E1);
  <+... when != E1
  if (...) {
    ... when != E1
*   return ...;
  }
  ...+>
* mutex_unlock(E1);
// </smpl>

Signed-off-by: Julia Lawall <julia@diku.dk>
Reviewed-by: Zach Brown <zach.brown@oracle.com>
Acked-by: Andy Grover <andy.grover@oracle.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/rds/ib_cm.c | 1 +
 net/rds/iw_cm.c | 1 +
 2 files changed, 2 insertions(+)

(limited to 'net')

diff --git a/net/rds/ib_cm.c b/net/rds/ib_cm.c
index 10ed0d55f759..f68832798db2 100644
--- a/net/rds/ib_cm.c
+++ b/net/rds/ib_cm.c
@@ -475,6 +475,7 @@ int rds_ib_cm_handle_connect(struct rdma_cm_id *cm_id,
 	err = rds_ib_setup_qp(conn);
 	if (err) {
 		rds_ib_conn_error(conn, "rds_ib_setup_qp failed (%d)\n", err);
+		mutex_unlock(&conn->c_cm_lock);
 		goto out;
 	}
 
diff --git a/net/rds/iw_cm.c b/net/rds/iw_cm.c
index a9d951b4fbae..b5dd6ac39be8 100644
--- a/net/rds/iw_cm.c
+++ b/net/rds/iw_cm.c
@@ -452,6 +452,7 @@ int rds_iw_cm_handle_connect(struct rdma_cm_id *cm_id,
 	err = rds_iw_setup_qp(conn);
 	if (err) {
 		rds_iw_conn_error(conn, "rds_iw_setup_qp failed (%d)\n", err);
+		mutex_unlock(&conn->c_cm_lock);
 		goto out;
 	}
 
-- 
cgit v1.2.2


From 97dc875f90a7b88a9fa476c256345c0d40fcdf6c Mon Sep 17 00:00:00 2001
From: Dan Carpenter <error27@gmail.com>
Date: Wed, 26 May 2010 05:16:48 +0000
Subject: caif: unlock on error path in cfserl_receive()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

There was an spin_unlock missing on the error path.  The spin_lock was
tucked in with the declarations so it was hard to spot.  I added a new
line.

Signed-off-by: Dan Carpenter <error27@gmail.com>
Acked-by: Sjur Brændeland <sjurbren@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/caif/cfserl.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/caif/cfserl.c b/net/caif/cfserl.c
index cb4325a3dc83..965c5baace40 100644
--- a/net/caif/cfserl.c
+++ b/net/caif/cfserl.c
@@ -59,16 +59,18 @@ static int cfserl_receive(struct cflayer *l, struct cfpkt *newpkt)
 	u8 stx = CFSERL_STX;
 	int ret;
 	u16 expectlen = 0;
+
 	caif_assert(newpkt != NULL);
 	spin_lock(&layr->sync);
 
 	if (layr->incomplete_frm != NULL) {
-
 		layr->incomplete_frm =
 		    cfpkt_append(layr->incomplete_frm, newpkt, expectlen);
 		pkt = layr->incomplete_frm;
-		if (pkt == NULL)
+		if (pkt == NULL) {
+			spin_unlock(&layr->sync);
 			return -ENOMEM;
+		}
 	} else {
 		pkt = newpkt;
 	}
-- 
cgit v1.2.2


From 7dfde179c38056b91d51e60f3d50902387f27c84 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?R=C3=A9mi=20Denis-Courmont?= <remi.denis-courmont@nokia.com>
Date: Wed, 26 May 2010 00:44:44 +0000
Subject: Phonet: listening socket lock protects the connected socket list
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The accept()'d socket need to be unhashed while the (listen()'ing)
socket lock is held. This fixes a race condition that could lead to an
OOPS.

Signed-off-by: Rémi Denis-Courmont <remi.denis-courmont@nokia.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/phonet/pep.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'net')

diff --git a/net/phonet/pep.c b/net/phonet/pep.c
index 7b048a35ca58..94d72e85a475 100644
--- a/net/phonet/pep.c
+++ b/net/phonet/pep.c
@@ -1045,12 +1045,12 @@ static void pep_sock_unhash(struct sock *sk)
 	lock_sock(sk);
 	if ((1 << sk->sk_state) & ~(TCPF_CLOSE|TCPF_LISTEN)) {
 		skparent = pn->listener;
-		sk_del_node_init(sk);
 		release_sock(sk);
 
-		sk = skparent;
 		pn = pep_sk(skparent);
-		lock_sock(sk);
+		lock_sock(skparent);
+		sk_del_node_init(sk);
+		sk = skparent;
 	}
 	/* Unhash a listening sock only when it is closed
 	 * and all of its active connected pipes are closed. */
-- 
cgit v1.2.2


From 2903037400a26e7c0cc93ab75a7d62abfacdf485 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Sat, 29 May 2010 00:20:48 -0700
Subject: net: fix sk_forward_alloc corruptions

As David found out, sock_queue_err_skb() should be called with socket
lock hold, or we risk sk_forward_alloc corruption, since we use non
atomic operations to update this field.

This patch adds bh_lock_sock()/bh_unlock_sock() pair to three spots.
(BH already disabled)

1) skb_tstamp_tx()
2) Before calling ip_icmp_error(), in __udp4_lib_err()
3) Before calling ipv6_icmp_error(), in __udp6_lib_err()

Reported-by: Anton Blanchard <anton@samba.org>
Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/skbuff.c | 4 ++++
 net/ipv4/udp.c    | 2 ++
 net/ipv6/udp.c    | 6 ++++--
 3 files changed, 10 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 4667d4d7c162..f2913ae2b52c 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -2992,7 +2992,11 @@ void skb_tstamp_tx(struct sk_buff *orig_skb,
 	memset(serr, 0, sizeof(*serr));
 	serr->ee.ee_errno = ENOMSG;
 	serr->ee.ee_origin = SO_EE_ORIGIN_TIMESTAMPING;
+
+	bh_lock_sock(sk);
 	err = sock_queue_err_skb(sk, skb);
+	bh_unlock_sock(sk);
+
 	if (err)
 		kfree_skb(skb);
 }
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index b9d0d409516f..acdc9be833cc 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -634,7 +634,9 @@ void __udp4_lib_err(struct sk_buff *skb, u32 info, struct udp_table *udptable)
 		if (!harderr || sk->sk_state != TCP_ESTABLISHED)
 			goto out;
 	} else {
+		bh_lock_sock(sk);
 		ip_icmp_error(sk, skb, err, uh->dest, info, (u8 *)(uh+1));
+		bh_unlock_sock(sk);
 	}
 	sk->sk_err = err;
 	sk->sk_error_report(sk);
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index 87be58673b55..3048f906c042 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -466,9 +466,11 @@ void __udp6_lib_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
 	if (sk->sk_state != TCP_ESTABLISHED && !np->recverr)
 		goto out;
 
-	if (np->recverr)
+	if (np->recverr) {
+		bh_lock_sock(sk);
 		ipv6_icmp_error(sk, skb, err, uh->dest, ntohl(info), (u8 *)(uh+1));
-
+		bh_unlock_sock(sk);
+	}
 	sk->sk_err = err;
 	sk->sk_error_report(sk);
 out:
-- 
cgit v1.2.2


From 27f39c73e63833b4c081a0d681d88b4184a0491d Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Wed, 19 May 2010 22:07:23 +0000
Subject: net: Use __this_cpu_inc() in fast path

This patch saves 224 bytes of text on my machine.

__this_cpu_inc() generates a single instruction, using no scratch
registers :

  65 ff 04 25 a8 30 01 00      incl   %gs:0x130a8

instead of :

  48 c7 c2 80 30 01 00         mov    $0x13080,%rdx
  65 48 8b 04 25 88 ea 00 00   mov    %gs:0xea88,%rax
  83 44 10 28 01               addl   $0x1,0x28(%rax,%rdx,1)

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/dev.c   | 3 +--
 net/ipv4/route.c | 3 +--
 2 files changed, 2 insertions(+), 4 deletions(-)

(limited to 'net')

diff --git a/net/core/dev.c b/net/core/dev.c
index 1845b08c624e..7d76b056aa3d 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -2822,8 +2822,7 @@ static int __netif_receive_skb(struct sk_buff *skb)
 			skb->dev = master;
 	}
 
-	__get_cpu_var(softnet_data).processed++;
-
+	__this_cpu_inc(softnet_data.processed);
 	skb_reset_network_header(skb);
 	skb_reset_transport_header(skb);
 	skb->mac_len = skb->network_header - skb->mac_header;
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 560acc677ce4..8495bceec764 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -253,8 +253,7 @@ static unsigned			rt_hash_mask __read_mostly;
 static unsigned int		rt_hash_log  __read_mostly;
 
 static DEFINE_PER_CPU(struct rt_cache_stat, rt_cache_stat);
-#define RT_CACHE_STAT_INC(field) \
-	(__raw_get_cpu_var(rt_cache_stat).field++)
+#define RT_CACHE_STAT_INC(field) __this_cpu_inc(rt_cache_stat.field)
 
 static inline unsigned int rt_hash(__be32 daddr, __be32 saddr, int idx,
 				   int genid)
-- 
cgit v1.2.2


From 15e83ed78864d0625e87a85f09b297c0919a4797 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Wed, 19 May 2010 23:16:03 +0000
Subject: net: remove zap_completion_queue

netpoll does an interesting work in zap_completion_queue(), but this was
before we did skb orphaning before delivering packets to device.

It now makes sense to add a test in dev_kfree_skb_irq() to not queue a
skb if already orphaned, and to remove netpoll zap_completion_queue() as
a bonus.

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/dev.c     |  4 +++-
 net/core/netpoll.c | 31 -------------------------------
 2 files changed, 3 insertions(+), 32 deletions(-)

(limited to 'net')

diff --git a/net/core/dev.c b/net/core/dev.c
index 7d76b056aa3d..983a3c1d65c4 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -1577,7 +1577,9 @@ EXPORT_SYMBOL(__netif_schedule);
 
 void dev_kfree_skb_irq(struct sk_buff *skb)
 {
-	if (atomic_dec_and_test(&skb->users)) {
+	if (!skb->destructor)
+		dev_kfree_skb(skb);
+	else if (atomic_dec_and_test(&skb->users)) {
 		struct softnet_data *sd;
 		unsigned long flags;
 
diff --git a/net/core/netpoll.c b/net/core/netpoll.c
index 94825b109551..e034342c819c 100644
--- a/net/core/netpoll.c
+++ b/net/core/netpoll.c
@@ -49,7 +49,6 @@ static atomic_t trapped;
 		(MAX_UDP_CHUNK + sizeof(struct udphdr) + \
 				sizeof(struct iphdr) + sizeof(struct ethhdr))
 
-static void zap_completion_queue(void);
 static void arp_reply(struct sk_buff *skb);
 
 static unsigned int carrier_timeout = 4;
@@ -197,7 +196,6 @@ void netpoll_poll_dev(struct net_device *dev)
 
 	service_arp_queue(dev->npinfo);
 
-	zap_completion_queue();
 }
 
 void netpoll_poll(struct netpoll *np)
@@ -221,40 +219,11 @@ static void refill_skbs(void)
 	spin_unlock_irqrestore(&skb_pool.lock, flags);
 }
 
-static void zap_completion_queue(void)
-{
-	unsigned long flags;
-	struct softnet_data *sd = &get_cpu_var(softnet_data);
-
-	if (sd->completion_queue) {
-		struct sk_buff *clist;
-
-		local_irq_save(flags);
-		clist = sd->completion_queue;
-		sd->completion_queue = NULL;
-		local_irq_restore(flags);
-
-		while (clist != NULL) {
-			struct sk_buff *skb = clist;
-			clist = clist->next;
-			if (skb->destructor) {
-				atomic_inc(&skb->users);
-				dev_kfree_skb_any(skb); /* put this one back */
-			} else {
-				__kfree_skb(skb);
-			}
-		}
-	}
-
-	put_cpu_var(softnet_data);
-}
-
 static struct sk_buff *find_skb(struct netpoll *np, int len, int reserve)
 {
 	int count = 0;
 	struct sk_buff *skb;
 
-	zap_completion_queue();
 	refill_skbs();
 repeat:
 
-- 
cgit v1.2.2


From d7fd1b5747fff3bde92777bcaf705d98ae6f8b6f Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Mon, 17 May 2010 20:40:51 +0000
Subject: tcp: tcp_md5_hash_skb_data() frag_list handling

tcp_md5_hash_skb_data() should handle skb->frag_list, and eventually
recurse.

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp.c | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'net')

diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 6596b4feeddc..49d0d2b8900c 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -2999,6 +2999,7 @@ int tcp_md5_hash_skb_data(struct tcp_md5sig_pool *hp,
 	const unsigned head_data_len = skb_headlen(skb) > header_len ?
 				       skb_headlen(skb) - header_len : 0;
 	const struct skb_shared_info *shi = skb_shinfo(skb);
+	struct sk_buff *frag_iter;
 
 	sg_init_table(&sg, 1);
 
@@ -3013,6 +3014,10 @@ int tcp_md5_hash_skb_data(struct tcp_md5sig_pool *hp,
 			return 1;
 	}
 
+	skb_walk_frags(skb, frag_iter)
+		if (tcp_md5_hash_skb_data(hp, frag_iter, 0))
+			return 1;
+
 	return 0;
 }
 
-- 
cgit v1.2.2


From 16dff91804777efbb0ce18b0a7e54c55e86b7beb Mon Sep 17 00:00:00 2001
From: Julia Lawall <julia@diku.dk>
Date: Fri, 21 May 2010 22:18:34 +0000
Subject: net/can: Use memdup_user

Use memdup_user when user data is immediately copied into the
allocated region.

The semantic patch that makes this change is as follows:
(http://coccinelle.lip6.fr/)

// <smpl>
@@
expression from,to,size,flag;
position p;
identifier l1,l2;
@@

-  to = \(kmalloc@p\|kzalloc@p\)(size,flag);
+  to = memdup_user(from,size);
   if (
-      to==NULL
+      IS_ERR(to)
                 || ...) {
   <+... when != goto l1;
-  -ENOMEM
+  PTR_ERR(to)
   ...+>
   }
-  if (copy_from_user(to, from, size) != 0) {
-    <+... when != goto l2;
-    -EFAULT
-    ...+>
-  }
// </smpl>

Signed-off-by: Julia Lawall <julia@diku.dk>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/can/raw.c | 11 +++--------
 1 file changed, 3 insertions(+), 8 deletions(-)

(limited to 'net')

diff --git a/net/can/raw.c b/net/can/raw.c
index da99cf153b33..ccfe633eec8e 100644
--- a/net/can/raw.c
+++ b/net/can/raw.c
@@ -436,14 +436,9 @@ static int raw_setsockopt(struct socket *sock, int level, int optname,
 
 		if (count > 1) {
 			/* filter does not fit into dfilter => alloc space */
-			filter = kmalloc(optlen, GFP_KERNEL);
-			if (!filter)
-				return -ENOMEM;
-
-			if (copy_from_user(filter, optval, optlen)) {
-				kfree(filter);
-				return -EFAULT;
-			}
+			filter = memdup_user(optval, optlen);
+			if (IS_ERR(filter))
+				return PTR_ERR(filter);
 		} else if (count == 1) {
 			if (copy_from_user(&sfilter, optval, sizeof(sfilter)))
 				return -EFAULT;
-- 
cgit v1.2.2


From 042604d2a3ee59c84f8293988caf35bac5de9eb3 Mon Sep 17 00:00:00 2001
From: Julia Lawall <julia@diku.dk>
Date: Fri, 21 May 2010 22:25:19 +0000
Subject: net/dccp: Use memdup_user

Use memdup_user when user data is immediately copied into the
allocated region.

The semantic patch that makes this change is as follows:
(http://coccinelle.lip6.fr/)

// <smpl>
@@
expression from,to,size,flag;
position p;
identifier l1,l2;
@@

-  to = \(kmalloc@p\|kzalloc@p\)(size,flag);
+  to = memdup_user(from,size);
   if (
-      to==NULL
+      IS_ERR(to)
                 || ...) {
   <+... when != goto l1;
-  -ENOMEM
+  PTR_ERR(to)
   ...+>
   }
-  if (copy_from_user(to, from, size) != 0) {
-    <+... when != goto l2;
-    -EFAULT
-    ...+>
-  }
// </smpl>

Signed-off-by: Julia Lawall <julia@diku.dk>
Acked-by: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/dccp/proto.c | 11 +++--------
 1 file changed, 3 insertions(+), 8 deletions(-)

(limited to 'net')

diff --git a/net/dccp/proto.c b/net/dccp/proto.c
index b03ecf6b2bb0..f79bcef5088f 100644
--- a/net/dccp/proto.c
+++ b/net/dccp/proto.c
@@ -473,14 +473,9 @@ static int dccp_setsockopt_ccid(struct sock *sk, int type,
 	if (optlen < 1 || optlen > DCCP_FEAT_MAX_SP_VALS)
 		return -EINVAL;
 
-	val = kmalloc(optlen, GFP_KERNEL);
-	if (val == NULL)
-		return -ENOMEM;
-
-	if (copy_from_user(val, optval, optlen)) {
-		kfree(val);
-		return -EFAULT;
-	}
+	val = memdup_user(optval, optlen);
+	if (IS_ERR(val))
+		return PTR_ERR(val);
 
 	lock_sock(sk);
 	if (type == DCCP_SOCKOPT_TX_CCID || type == DCCP_SOCKOPT_CCID)
-- 
cgit v1.2.2


From f5d72af9f3dfea67f014f880d00665f96622f57f Mon Sep 17 00:00:00 2001
From: Dan Carpenter <error27@gmail.com>
Date: Sat, 22 May 2010 10:43:42 +0000
Subject: caif: remove unneeded null check in caif_connect()

We already dereferenced uaddr towards the start of the function when we
checked that "uaddr->sa_family != AF_CAIF".  Both the check here and the
earlier check were added in bece7b2398d0: "caif: Rewritten socket
implementation".  Before that patch, we assumed that we recieved a valid
pointer for uaddr, and based on that, I have removed this check.

Signed-off-by: Dan Carpenter <error27@gmail.com>
Acked-by: Sjur Braendeland <sjur.brandeland@stericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/caif/caif_socket.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/caif/caif_socket.c b/net/caif/caif_socket.c
index 3d0e09584fae..791249316ef3 100644
--- a/net/caif/caif_socket.c
+++ b/net/caif/caif_socket.c
@@ -874,8 +874,7 @@ static int caif_connect(struct socket *sock, struct sockaddr *uaddr,
 	sk_stream_kill_queues(&cf_sk->sk);
 
 	err = -EINVAL;
-	if (addr_len != sizeof(struct sockaddr_caif) ||
-		!uaddr)
+	if (addr_len != sizeof(struct sockaddr_caif))
 		goto out;
 
 	memcpy(&cf_sk->conn_req.sockaddr, uaddr,
-- 
cgit v1.2.2


From 9bfca3c6e5965a01b7c6d8e2f2ea30799c9d43bc Mon Sep 17 00:00:00 2001
From: Dan Carpenter <error27@gmail.com>
Date: Mon, 24 May 2010 06:29:34 +0000
Subject: caif: cleanup: remove duplicate checks

"phyinfo" can never be null here because we assigned it an address, so I
removed both the assert and the second check inside the if statement.  I
removed the "phyinfo->phy_layer != NULL" check as well because that was
asserted earlier.

Walter Harms suggested I move the "phyinfo->phy_ref_count++;" outside
the if condition for readability, so I have done that.

Signed-off-by: Dan Carpenter <error27@gmail.com>
Acked-by: Sjur Braendeland <sjur.brandeland@stericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/caif/cfcnfg.c | 8 ++------
 1 file changed, 2 insertions(+), 6 deletions(-)

(limited to 'net')

diff --git a/net/caif/cfcnfg.c b/net/caif/cfcnfg.c
index df43f264d9fb..7c81974a45c4 100644
--- a/net/caif/cfcnfg.c
+++ b/net/caif/cfcnfg.c
@@ -308,19 +308,15 @@ cfcnfg_linkup_rsp(struct cflayer *layer, u8 channel_id, enum cfctrl_srv serv,
 	caif_assert(cnfg != NULL);
 	caif_assert(phyid != 0);
 	phyinfo = &cnfg->phy_layers[phyid];
-	caif_assert(phyinfo != NULL);
 	caif_assert(phyinfo->id == phyid);
 	caif_assert(phyinfo->phy_layer != NULL);
 	caif_assert(phyinfo->phy_layer->id == phyid);
 
-	if (phyinfo != NULL &&
-	    phyinfo->phy_ref_count++ == 0 &&
-	    phyinfo->phy_layer != NULL &&
+	phyinfo->phy_ref_count++;
+	if (phyinfo->phy_ref_count == 1 &&
 	    phyinfo->phy_layer->modemcmd != NULL) {
-		caif_assert(phyinfo->phy_layer->id == phyid);
 		phyinfo->phy_layer->modemcmd(phyinfo->phy_layer,
 					     _CAIF_MODEMCMD_PHYIF_USEFULL);
-
 	}
 	adapt_layer->id = channel_id;
 
-- 
cgit v1.2.2


From 06c4648d46d1b757d6b9591a86810be79818b60c Mon Sep 17 00:00:00 2001
From: Ian Campbell <Ian.Campbell@citrix.com>
Date: Wed, 26 May 2010 00:09:42 +0000
Subject: arp_notify: allow drivers to explicitly request a notification event.

Currently such notifications are only generated when the device comes up or the
address changes. However one use case for these notifications is to enable
faster network recovery after a virtual machine migration (by causing switches
to relearn their MAC tables). A migration appears to the network stack as a
temporary loss of carrier and therefore does not trigger either of the current
conditions. Rather than adding carrier up as a trigger (which can cause issues
when interfaces a flapping) simply add an interface which the driver can use
to explicitly trigger the notification.

Signed-off-by: Ian Campbell <ian.campbell@citrix.com>
Cc: Stephen Hemminger <shemminger@linux-foundation.org>
Cc: Jeremy Fitzhardinge <jeremy@goop.org>
Cc: David S. Miller <davem@davemloft.net>
Cc: netdev@vger.kernel.org
Cc: stable@kernel.org
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/devinet.c      |  1 +
 net/sched/sch_generic.c | 18 ++++++++++++++++++
 2 files changed, 19 insertions(+)

(limited to 'net')

diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index 382bc768ed56..da14c49284f4 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -1081,6 +1081,7 @@ static int inetdev_event(struct notifier_block *this, unsigned long event,
 		}
 		ip_mc_up(in_dev);
 		/* fall through */
+	case NETDEV_NOTIFY_PEERS:
 	case NETDEV_CHANGEADDR:
 		/* Send gratuitous ARP to notify of link change */
 		if (IN_DEV_ARP_NOTIFY(in_dev)) {
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index a63029ef3edd..bd1892fe4b21 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -327,6 +327,24 @@ void netif_carrier_off(struct net_device *dev)
 }
 EXPORT_SYMBOL(netif_carrier_off);
 
+/**
+ * 	netif_notify_peers - notify network peers about existence of @dev
+ * 	@dev: network device
+ *
+ * Generate traffic such that interested network peers are aware of
+ * @dev, such as by generating a gratuitous ARP. This may be used when
+ * a device wants to inform the rest of the network about some sort of
+ * reconfiguration such as a failover event or virtual machine
+ * migration.
+ */
+void netif_notify_peers(struct net_device *dev)
+{
+	rtnl_lock();
+	call_netdevice_notifiers(NETDEV_NOTIFY_PEERS, dev);
+	rtnl_unlock();
+}
+EXPORT_SYMBOL(netif_notify_peers);
+
 /* "NOOP" scheduler: the best scheduler, recommended for all interfaces
    under all circumstances. It is difficult to invent anything faster or
    cheaper.
-- 
cgit v1.2.2


From c936e8bd1de2fa50c49e3df6fa5036bf07870b67 Mon Sep 17 00:00:00 2001
From: Xiaotian Feng <dfeng@redhat.com>
Date: Mon, 31 May 2010 16:41:09 +0200
Subject: netfilter: don't xt_jumpstack_alloc twice in xt_register_table

In xt_register_table, xt_jumpstack_alloc is called first, later
xt_replace_table is used. But in xt_replace_table, xt_jumpstack_alloc
will be used again. Then the memory allocated by previous xt_jumpstack_alloc
will be leaked. We can simply remove the previous xt_jumpstack_alloc because
there aren't any users of newinfo between xt_jumpstack_alloc and
xt_replace_table.

Signed-off-by: Xiaotian Feng <dfeng@redhat.com>
Cc: Patrick McHardy <kaber@trash.net>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: Jan Engelhardt <jengelh@medozas.de>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Rusty Russell <rusty@rustcorp.com.au>
Cc: Alexey Dobriyan <adobriyan@gmail.com>
Acked-By: Jan Engelhardt <jengelh@medozas.de>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 net/netfilter/x_tables.c | 4 ----
 1 file changed, 4 deletions(-)

(limited to 'net')

diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c
index 445de702b8b7..47b1e7917a9c 100644
--- a/net/netfilter/x_tables.c
+++ b/net/netfilter/x_tables.c
@@ -844,10 +844,6 @@ struct xt_table *xt_register_table(struct net *net,
 	struct xt_table_info *private;
 	struct xt_table *t, *table;
 
-	ret = xt_jumpstack_alloc(newinfo);
-	if (ret < 0)
-		return ERR_PTR(ret);
-
 	/* Don't add one object to multiple lists. */
 	table = kmemdup(input_table, sizeof(struct xt_table), GFP_KERNEL);
 	if (!table) {
-- 
cgit v1.2.2


From 7489aec8eed4f2f1eb3b4d35763bd3ea30b32ef5 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Mon, 31 May 2010 16:41:35 +0200
Subject: netfilter: xtables: stackptr should be percpu

commit f3c5c1bfd4 (netfilter: xtables: make ip_tables reentrant)
introduced a performance regression, because stackptr array is shared by
all cpus, adding cache line ping pongs. (16 cpus share a 64 bytes cache
line)

Fix this using alloc_percpu()

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Acked-By: Jan Engelhardt <jengelh@medozas.de>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 net/ipv4/netfilter/ip_tables.c  |  2 +-
 net/ipv6/netfilter/ip6_tables.c |  2 +-
 net/netfilter/x_tables.c        | 13 +++----------
 3 files changed, 5 insertions(+), 12 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c
index 63958f3394a5..4b6c5ca610fc 100644
--- a/net/ipv4/netfilter/ip_tables.c
+++ b/net/ipv4/netfilter/ip_tables.c
@@ -336,7 +336,7 @@ ipt_do_table(struct sk_buff *skb,
 	cpu        = smp_processor_id();
 	table_base = private->entries[cpu];
 	jumpstack  = (struct ipt_entry **)private->jumpstack[cpu];
-	stackptr   = &private->stackptr[cpu];
+	stackptr   = per_cpu_ptr(private->stackptr, cpu);
 	origptr    = *stackptr;
 
 	e = get_entry(table_base, private->hook_entry[hook]);
diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c
index 6f517bd83692..9d2d68f0e605 100644
--- a/net/ipv6/netfilter/ip6_tables.c
+++ b/net/ipv6/netfilter/ip6_tables.c
@@ -363,7 +363,7 @@ ip6t_do_table(struct sk_buff *skb,
 	cpu        = smp_processor_id();
 	table_base = private->entries[cpu];
 	jumpstack  = (struct ip6t_entry **)private->jumpstack[cpu];
-	stackptr   = &private->stackptr[cpu];
+	stackptr   = per_cpu_ptr(private->stackptr, cpu);
 	origptr    = *stackptr;
 
 	e = get_entry(table_base, private->hook_entry[hook]);
diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c
index 47b1e7917a9c..e34622fa0003 100644
--- a/net/netfilter/x_tables.c
+++ b/net/netfilter/x_tables.c
@@ -699,10 +699,8 @@ void xt_free_table_info(struct xt_table_info *info)
 		vfree(info->jumpstack);
 	else
 		kfree(info->jumpstack);
-	if (sizeof(unsigned int) * nr_cpu_ids > PAGE_SIZE)
-		vfree(info->stackptr);
-	else
-		kfree(info->stackptr);
+
+	free_percpu(info->stackptr);
 
 	kfree(info);
 }
@@ -753,14 +751,9 @@ static int xt_jumpstack_alloc(struct xt_table_info *i)
 	unsigned int size;
 	int cpu;
 
-	size = sizeof(unsigned int) * nr_cpu_ids;
-	if (size > PAGE_SIZE)
-		i->stackptr = vmalloc(size);
-	else
-		i->stackptr = kmalloc(size, GFP_KERNEL);
+	i->stackptr = alloc_percpu(unsigned int);
 	if (i->stackptr == NULL)
 		return -ENOMEM;
-	memset(i->stackptr, 0, size);
 
 	size = sizeof(void **) * nr_cpu_ids;
 	if (size > PAGE_SIZE)
-- 
cgit v1.2.2


From b1faf5666438090a4dc4fceac8502edc7788b7e3 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Mon, 31 May 2010 23:44:05 -0700
Subject: net: sock_queue_err_skb() dont mess with sk_forward_alloc

Correct sk_forward_alloc handling for error_queue would need to use a
backlog of frames that softirq handler could not deliver because socket
is owned by user thread. Or extend backlog processing to be able to
process normal and error packets.

Another possibility is to not use mem charge for error queue, this is
what I implemented in this patch.

Note: this reverts commit 29030374
(net: fix sk_forward_alloc corruptions), since we dont need to lock
socket anymore.

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/skbuff.c | 30 ++++++++++++++++++++++++++++--
 net/ipv4/udp.c    |  6 ++----
 net/ipv6/udp.c    |  6 ++----
 3 files changed, 32 insertions(+), 10 deletions(-)

(limited to 'net')

diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 4e7ac09c281a..9f07e749d7b1 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -2965,6 +2965,34 @@ int skb_cow_data(struct sk_buff *skb, int tailbits, struct sk_buff **trailer)
 }
 EXPORT_SYMBOL_GPL(skb_cow_data);
 
+static void sock_rmem_free(struct sk_buff *skb)
+{
+	struct sock *sk = skb->sk;
+
+	atomic_sub(skb->truesize, &sk->sk_rmem_alloc);
+}
+
+/*
+ * Note: We dont mem charge error packets (no sk_forward_alloc changes)
+ */
+int sock_queue_err_skb(struct sock *sk, struct sk_buff *skb)
+{
+	if (atomic_read(&sk->sk_rmem_alloc) + skb->truesize >=
+	    (unsigned)sk->sk_rcvbuf)
+		return -ENOMEM;
+
+	skb_orphan(skb);
+	skb->sk = sk;
+	skb->destructor = sock_rmem_free;
+	atomic_add(skb->truesize, &sk->sk_rmem_alloc);
+
+	skb_queue_tail(&sk->sk_error_queue, skb);
+	if (!sock_flag(sk, SOCK_DEAD))
+		sk->sk_data_ready(sk, skb->len);
+	return 0;
+}
+EXPORT_SYMBOL(sock_queue_err_skb);
+
 void skb_tstamp_tx(struct sk_buff *orig_skb,
 		struct skb_shared_hwtstamps *hwtstamps)
 {
@@ -2997,9 +3025,7 @@ void skb_tstamp_tx(struct sk_buff *orig_skb,
 	serr->ee.ee_errno = ENOMSG;
 	serr->ee.ee_origin = SO_EE_ORIGIN_TIMESTAMPING;
 
-	bh_lock_sock(sk);
 	err = sock_queue_err_skb(sk, skb);
-	bh_unlock_sock(sk);
 
 	if (err)
 		kfree_skb(skb);
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 50678f9a2763..eec4ff456e33 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -633,11 +633,9 @@ void __udp4_lib_err(struct sk_buff *skb, u32 info, struct udp_table *udptable)
 	if (!inet->recverr) {
 		if (!harderr || sk->sk_state != TCP_ESTABLISHED)
 			goto out;
-	} else {
-		bh_lock_sock(sk);
+	} else
 		ip_icmp_error(sk, skb, err, uh->dest, info, (u8 *)(uh+1));
-		bh_unlock_sock(sk);
-	}
+
 	sk->sk_err = err;
 	sk->sk_error_report(sk);
 out:
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index 3048f906c042..87be58673b55 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -466,11 +466,9 @@ void __udp6_lib_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
 	if (sk->sk_state != TCP_ESTABLISHED && !np->recverr)
 		goto out;
 
-	if (np->recverr) {
-		bh_lock_sock(sk);
+	if (np->recverr)
 		ipv6_icmp_error(sk, skb, err, uh->dest, ntohl(info), (u8 *)(uh+1));
-		bh_unlock_sock(sk);
-	}
+
 	sk->sk_err = err;
 	sk->sk_error_report(sk);
 out:
-- 
cgit v1.2.2


From 288fcee8b7aa98796d96cd5b1b2e8005639328bf Mon Sep 17 00:00:00 2001
From: Joe Perches <joe@perches.com>
Date: Mon, 31 May 2010 23:48:19 -0700
Subject: net/ipv4/tcp_input.c: fix compilation breakage when FASTRETRANS_DEBUG
 > 1

Commit: c720c7e8383aff1cb219bddf474ed89d850336e3 missed these.

Signed-off-by: Joe Perches <joe@perches.com>
Acked-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp_input.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 3e6dafcb1071..548d575e6cc6 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -2639,7 +2639,7 @@ static void DBGUNDO(struct sock *sk, const char *msg)
 	if (sk->sk_family == AF_INET) {
 		printk(KERN_DEBUG "Undo %s %pI4/%u c%u l%u ss%u/%u p%u\n",
 		       msg,
-		       &inet->daddr, ntohs(inet->dport),
+		       &inet->inet_daddr, ntohs(inet->inet_dport),
 		       tp->snd_cwnd, tcp_left_out(tp),
 		       tp->snd_ssthresh, tp->prior_ssthresh,
 		       tp->packets_out);
@@ -2649,7 +2649,7 @@ static void DBGUNDO(struct sock *sk, const char *msg)
 		struct ipv6_pinfo *np = inet6_sk(sk);
 		printk(KERN_DEBUG "Undo %s %pI6/%u c%u l%u ss%u/%u p%u\n",
 		       msg,
-		       &np->daddr, ntohs(inet->dport),
+		       &np->daddr, ntohs(inet->inet_dport),
 		       tp->snd_cwnd, tcp_left_out(tp),
 		       tp->snd_ssthresh, tp->prior_ssthresh,
 		       tp->packets_out);
-- 
cgit v1.2.2


From 3ed37a6fa70a3a63dbb257fc640facb3974bba40 Mon Sep 17 00:00:00 2001
From: Joe Perches <joe@perches.com>
Date: Mon, 31 May 2010 17:23:21 +0000
Subject: net/ipv4/igmp.c: Remove unnecessary kmalloc casts

Signed-off-by: Joe Perches <joe@perches.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/igmp.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c
index 5fff865a4fa7..250cb5e1af48 100644
--- a/net/ipv4/igmp.c
+++ b/net/ipv4/igmp.c
@@ -1646,8 +1646,7 @@ static int sf_setstate(struct ip_mc_list *pmc)
 				if (dpsf->sf_inaddr == psf->sf_inaddr)
 					break;
 			if (!dpsf) {
-				dpsf = (struct ip_sf_list *)
-					kmalloc(sizeof(*dpsf), GFP_ATOMIC);
+				dpsf = kmalloc(sizeof(*dpsf), GFP_ATOMIC);
 				if (!dpsf)
 					continue;
 				*dpsf = *psf;
-- 
cgit v1.2.2


From 5d55354f147bcf82b7a330bf9617b82a692b7d49 Mon Sep 17 00:00:00 2001
From: Joe Perches <joe@perches.com>
Date: Mon, 31 May 2010 17:23:22 +0000
Subject: net/ipv6/mcast.c: Remove unnecessary kmalloc casts

Signed-off-by: Joe Perches <joe@perches.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/mcast.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c
index 59f1881968c7..7b5fb43c227c 100644
--- a/net/ipv6/mcast.c
+++ b/net/ipv6/mcast.c
@@ -1995,8 +1995,7 @@ static int sf_setstate(struct ifmcaddr6 *pmc)
 				    &psf->sf_addr))
 					break;
 			if (!dpsf) {
-				dpsf = (struct ip6_sf_list *)
-					kmalloc(sizeof(*dpsf), GFP_ATOMIC);
+				dpsf = kmalloc(sizeof(*dpsf), GFP_ATOMIC);
 				if (!dpsf)
 					continue;
 				*dpsf = *psf;
-- 
cgit v1.2.2


From aac4dddc358acfd9d98b20024a42c34dfab31c39 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Tue, 1 Jun 2010 00:26:58 -0700
Subject: ipv6: get rid of ipip6_prl_lock

As noticed by Julia Lawall, ipip6_tunnel_add_prl() incorrectly calls
kzallloc(..., GFP_KERNEL) while a spinlock is held. She provided
a patch to use GFP_ATOMIC instead.

One possibility would be to convert this spinlock to a mutex, or
preallocate the thing before taking the lock.

After RCU conversion, it appears we dont need this lock, since
caller already holds RTNL

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/sit.c | 8 ++------
 1 file changed, 2 insertions(+), 6 deletions(-)

(limited to 'net')

diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c
index e51e650ea80b..702c532ec21e 100644
--- a/net/ipv6/sit.c
+++ b/net/ipv6/sit.c
@@ -249,8 +249,6 @@ failed:
 	return NULL;
 }
 
-static DEFINE_SPINLOCK(ipip6_prl_lock);
-
 #define for_each_prl_rcu(start)			\
 	for (prl = rcu_dereference(start);	\
 	     prl;				\
@@ -340,7 +338,7 @@ ipip6_tunnel_add_prl(struct ip_tunnel *t, struct ip_tunnel_prl *a, int chg)
 	if (a->addr == htonl(INADDR_ANY))
 		return -EINVAL;
 
-	spin_lock(&ipip6_prl_lock);
+	ASSERT_RTNL();
 
 	for (p = t->prl; p; p = p->next) {
 		if (p->addr == a->addr) {
@@ -370,7 +368,6 @@ ipip6_tunnel_add_prl(struct ip_tunnel *t, struct ip_tunnel_prl *a, int chg)
 	t->prl_count++;
 	rcu_assign_pointer(t->prl, p);
 out:
-	spin_unlock(&ipip6_prl_lock);
 	return err;
 }
 
@@ -397,7 +394,7 @@ ipip6_tunnel_del_prl(struct ip_tunnel *t, struct ip_tunnel_prl *a)
 	struct ip_tunnel_prl_entry *x, **p;
 	int err = 0;
 
-	spin_lock(&ipip6_prl_lock);
+	ASSERT_RTNL();
 
 	if (a && a->addr != htonl(INADDR_ANY)) {
 		for (p = &t->prl; *p; p = &(*p)->next) {
@@ -419,7 +416,6 @@ ipip6_tunnel_del_prl(struct ip_tunnel *t, struct ip_tunnel_prl *a)
 		}
 	}
 out:
-	spin_unlock(&ipip6_prl_lock);
 	return err;
 }
 
-- 
cgit v1.2.2


From a8b563894d6fee9b90b7d6ed76f8ec28ad45bcbe Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Tue, 1 Jun 2010 11:48:31 +0200
Subject: netfilter: br_netfilter: use skb_set_noref()

Avoid dirtying bridge_parent_rtable refcount, using new dst noref
infrastructure.

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 net/bridge/br_netfilter.c | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

(limited to 'net')

diff --git a/net/bridge/br_netfilter.c b/net/bridge/br_netfilter.c
index 44420992f72f..cbea5af24ce6 100644
--- a/net/bridge/br_netfilter.c
+++ b/net/bridge/br_netfilter.c
@@ -244,8 +244,7 @@ static int br_nf_pre_routing_finish_ipv6(struct sk_buff *skb)
 		kfree_skb(skb);
 		return 0;
 	}
-	dst_hold(&rt->u.dst);
-	skb_dst_set(skb, &rt->u.dst);
+	skb_dst_set_noref(skb, &rt->u.dst);
 
 	skb->dev = nf_bridge->physindev;
 	nf_bridge_update_protocol(skb);
@@ -396,8 +395,7 @@ bridged_dnat:
 			kfree_skb(skb);
 			return 0;
 		}
-		dst_hold(&rt->u.dst);
-		skb_dst_set(skb, &rt->u.dst);
+		skb_dst_set_noref(skb, &rt->u.dst);
 	}
 
 	skb->dev = nf_bridge->physindev;
-- 
cgit v1.2.2


From fabf3a85ab88063c10f367cccba7b3a1e59df996 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Tue, 1 Jun 2010 12:00:41 +0200
Subject: netfilter: xt_statistic: remove nth_lock spinlock

Use atomic_cmpxchg() to avoid dirtying a shared location.

xt_statistic_priv smp aligned to avoid sharing same cache line with
other stuff.

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 net/netfilter/xt_statistic.c | 19 +++++++++----------
 1 file changed, 9 insertions(+), 10 deletions(-)

(limited to 'net')

diff --git a/net/netfilter/xt_statistic.c b/net/netfilter/xt_statistic.c
index 96e62b8fd6b1..42ecb71d445f 100644
--- a/net/netfilter/xt_statistic.c
+++ b/net/netfilter/xt_statistic.c
@@ -18,8 +18,8 @@
 #include <linux/netfilter/x_tables.h>
 
 struct xt_statistic_priv {
-	uint32_t count;
-};
+	atomic_t count;
+} ____cacheline_aligned_in_smp;
 
 MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
@@ -27,13 +27,12 @@ MODULE_DESCRIPTION("Xtables: statistics-based matching (\"Nth\", random)");
 MODULE_ALIAS("ipt_statistic");
 MODULE_ALIAS("ip6t_statistic");
 
-static DEFINE_SPINLOCK(nth_lock);
-
 static bool
 statistic_mt(const struct sk_buff *skb, struct xt_action_param *par)
 {
 	const struct xt_statistic_info *info = par->matchinfo;
 	bool ret = info->flags & XT_STATISTIC_INVERT;
+	int nval, oval;
 
 	switch (info->mode) {
 	case XT_STATISTIC_MODE_RANDOM:
@@ -41,12 +40,12 @@ statistic_mt(const struct sk_buff *skb, struct xt_action_param *par)
 			ret = !ret;
 		break;
 	case XT_STATISTIC_MODE_NTH:
-		spin_lock_bh(&nth_lock);
-		if (info->master->count++ == info->u.nth.every) {
-			info->master->count = 0;
+		do {
+			oval = atomic_read(&info->master->count);
+			nval = (oval == info->u.nth.every) ? 0 : oval + 1;
+		} while (atomic_cmpxchg(&info->master->count, oval, nval) != oval);
+		if (nval == 0)
 			ret = !ret;
-		}
-		spin_unlock_bh(&nth_lock);
 		break;
 	}
 
@@ -64,7 +63,7 @@ static int statistic_mt_check(const struct xt_mtchk_param *par)
 	info->master = kzalloc(sizeof(*info->master), GFP_KERNEL);
 	if (info->master == NULL)
 		return -ENOMEM;
-	info->master->count = info->u.nth.count;
+	atomic_set(&info->master->count, info->u.nth.count);
 
 	return 0;
 }
-- 
cgit v1.2.2


From 8ae5977ff95c03fe6c36a5721c57dcb4bfe4f290 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes@sipsolutions.net>
Date: Sun, 30 May 2010 14:52:58 +0200
Subject: mac80211: fix blockack-req processing

Daniel reported that the paged RX changes had
broken blockack request frame processing due
to using data that wasn't really part of the
skb data.

Fix this using skb_copy_bits() for the needed
data. As a side effect, this adds a check on
processing too short frames, which previously
this code could do.

Reported-by: Daniel Halperin <dhalperi@cs.washington.edu>
Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
Acked-by: Daniel Halperin <dhalperi@cs.washington.edu>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/rx.c | 13 +++++++++++--
 1 file changed, 11 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c
index 6e2a7bcd8cb8..5e0b65406c44 100644
--- a/net/mac80211/rx.c
+++ b/net/mac80211/rx.c
@@ -1818,17 +1818,26 @@ ieee80211_rx_h_ctrl(struct ieee80211_rx_data *rx, struct sk_buff_head *frames)
 		return RX_CONTINUE;
 
 	if (ieee80211_is_back_req(bar->frame_control)) {
+		struct {
+			__le16 control, start_seq_num;
+		} __packed bar_data;
+
 		if (!rx->sta)
 			return RX_DROP_MONITOR;
+
+		if (skb_copy_bits(skb, offsetof(struct ieee80211_bar, control),
+				  &bar_data, sizeof(bar_data)))
+			return RX_DROP_MONITOR;
+
 		spin_lock(&rx->sta->lock);
-		tid = le16_to_cpu(bar->control) >> 12;
+		tid = le16_to_cpu(bar_data.control) >> 12;
 		if (!rx->sta->ampdu_mlme.tid_active_rx[tid]) {
 			spin_unlock(&rx->sta->lock);
 			return RX_DROP_MONITOR;
 		}
 		tid_agg_rx = rx->sta->ampdu_mlme.tid_rx[tid];
 
-		start_seq_num = le16_to_cpu(bar->start_seq_num) >> 4;
+		start_seq_num = le16_to_cpu(bar_data.start_seq_num) >> 4;
 
 		/* reset session timer */
 		if (tid_agg_rx->timeout)
-- 
cgit v1.2.2


From 51a0d38de26226f2779912d92f155b93d539da9a Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Mon, 31 May 2010 12:00:12 +0200
Subject: mac80211: fix dialog token allocator

The dialog token allocator has apparently been broken
since b83f4e15 ("mac80211: fix deadlock in sta->lock")
because it got moved out under the spinlock. Fix it.

Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/agg-tx.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/agg-tx.c b/net/mac80211/agg-tx.c
index c163d0a149f4..98258b7341e3 100644
--- a/net/mac80211/agg-tx.c
+++ b/net/mac80211/agg-tx.c
@@ -332,14 +332,16 @@ int ieee80211_start_tx_ba_session(struct ieee80211_sta *pubsta, u16 tid)
 		IEEE80211_QUEUE_STOP_REASON_AGGREGATION);
 
 	spin_unlock(&local->ampdu_lock);
-	spin_unlock_bh(&sta->lock);
 
-	/* send an addBA request */
+	/* prepare tid data */
 	sta->ampdu_mlme.dialog_token_allocator++;
 	sta->ampdu_mlme.tid_tx[tid]->dialog_token =
 			sta->ampdu_mlme.dialog_token_allocator;
 	sta->ampdu_mlme.tid_tx[tid]->ssn = start_seq_num;
 
+	spin_unlock_bh(&sta->lock);
+
+	/* send AddBA request */
 	ieee80211_send_addba_request(sdata, pubsta->addr, tid,
 			 sta->ampdu_mlme.tid_tx[tid]->dialog_token,
 			 sta->ampdu_mlme.tid_tx[tid]->ssn,
-- 
cgit v1.2.2


From fafeeb6c80e3842c6dc19d05de09a23f23eef0d8 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Tue, 1 Jun 2010 10:04:49 +0000
Subject: xfrm: force a dst reference in __xfrm_route_forward()

Packets going through __xfrm_route_forward() have a not refcounted dst
entry, since we enabled a noref forwarding path.

xfrm_lookup() might incorrectly release this dst entry.

It's a bit late to make invasive changes in xfrm_lookup(), so lets force
a refcount in this path.

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/xfrm/xfrm_policy.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'net')

diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index d965a2bad8d3..4bf27d901333 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -2153,6 +2153,7 @@ int __xfrm_route_forward(struct sk_buff *skb, unsigned short family)
 		return 0;
 	}
 
+	skb_dst_force(skb);
 	dst = skb_dst(skb);
 
 	res = xfrm_lookup(net, &dst, &fl, NULL, 0) == 0;
-- 
cgit v1.2.2


From bc135b23d01acf7ee926aaf75b0020c86d3869f9 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Wed, 2 Jun 2010 03:23:51 -0700
Subject: net: Define accessors to manipulate QDISC_STATE_RUNNING

Define three helpers to manipulate QDISC_STATE_RUNNIG flag, that a
second patch will move on another location.

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/dev.c          | 4 ++--
 net/sched/sch_generic.c | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

(limited to 'net')

diff --git a/net/core/dev.c b/net/core/dev.c
index 983a3c1d65c4..2733226d90b2 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -2047,7 +2047,7 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q,
 		kfree_skb(skb);
 		rc = NET_XMIT_DROP;
 	} else if ((q->flags & TCQ_F_CAN_BYPASS) && !qdisc_qlen(q) &&
-		   !test_and_set_bit(__QDISC_STATE_RUNNING, &q->state)) {
+		   qdisc_run_begin(q)) {
 		/*
 		 * This is a work-conserving queue; there are no old skbs
 		 * waiting to be sent out; and the qdisc is not running -
@@ -2059,7 +2059,7 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q,
 		if (sch_direct_xmit(skb, q, dev, txq, root_lock))
 			__qdisc_run(q);
 		else
-			clear_bit(__QDISC_STATE_RUNNING, &q->state);
+			qdisc_run_end(q);
 
 		rc = NET_XMIT_SUCCESS;
 	} else {
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index bd1892fe4b21..37b86eab6779 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -205,7 +205,7 @@ void __qdisc_run(struct Qdisc *q)
 		}
 	}
 
-	clear_bit(__QDISC_STATE_RUNNING, &q->state);
+	qdisc_run_end(q);
 }
 
 unsigned long dev_trans_start(struct net_device *dev)
@@ -797,7 +797,7 @@ static bool some_qdisc_is_busy(struct net_device *dev)
 
 		spin_lock_bh(root_lock);
 
-		val = (test_bit(__QDISC_STATE_RUNNING, &q->state) ||
+		val = (qdisc_is_running(q) ||
 		       test_bit(__QDISC_STATE_SCHED, &q->state));
 
 		spin_unlock_bh(root_lock);
-- 
cgit v1.2.2


From 194dbcc8a1a97cbac9a619a563e5f6b7f7d5a485 Mon Sep 17 00:00:00 2001
From: John Fastabend <john.r.fastabend@intel.com>
Date: Wed, 12 May 2010 21:31:06 +0000
Subject: net: init_vlan should not copy slave or master flags

The vlan device should not copy the slave or master flags from
the real device. It is not in the bond until added nor is it
a master.

Signed-off-by: John Fastabend <john.r.fastabend@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/8021q/vlan_dev.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c
index 55be90826f5f..529842677817 100644
--- a/net/8021q/vlan_dev.c
+++ b/net/8021q/vlan_dev.c
@@ -708,7 +708,8 @@ static int vlan_dev_init(struct net_device *dev)
 	netif_carrier_off(dev);
 
 	/* IFF_BROADCAST|IFF_MULTICAST; ??? */
-	dev->flags  = real_dev->flags & ~(IFF_UP | IFF_PROMISC | IFF_ALLMULTI);
+	dev->flags  = real_dev->flags & ~(IFF_UP | IFF_PROMISC | IFF_ALLMULTI |
+					  IFF_MASTER | IFF_SLAVE);
 	dev->iflink = real_dev->ifindex;
 	dev->state  = (real_dev->state & ((1<<__LINK_STATE_NOCARRIER) |
 					  (1<<__LINK_STATE_DORMANT))) |
-- 
cgit v1.2.2


From 2df4a0fa1540c460ec69788ab2a901cc72a75644 Mon Sep 17 00:00:00 2001
From: John Fastabend <john.r.fastabend@intel.com>
Date: Wed, 12 May 2010 21:31:11 +0000
Subject: net: fix conflict between null_or_orig and null_or_bond

If a skb is received on an inactive bond that does not meet
the special cases checked for by skb_bond_should_drop it should
only be delivered to exact matches as the comment in
netif_receive_skb() says.

However because null_or_bond could also be null this is not
always true.  This patch renames null_or_bond to orig_or_bond
and initializes it to orig_dev.  This keeps the intent of
null_or_bond to pass frames received on VLAN interfaces stacked
on bonding interfaces without invalidating the statement for
null_or_orig.

Signed-off-by: John Fastabend <john.r.fastabend@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/dev.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'net')

diff --git a/net/core/dev.c b/net/core/dev.c
index 1845b08c624e..d03470f5260a 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -2795,7 +2795,7 @@ static int __netif_receive_skb(struct sk_buff *skb)
 	struct net_device *orig_dev;
 	struct net_device *master;
 	struct net_device *null_or_orig;
-	struct net_device *null_or_bond;
+	struct net_device *orig_or_bond;
 	int ret = NET_RX_DROP;
 	__be16 type;
 
@@ -2868,10 +2868,10 @@ ncls:
 	 * device that may have registered for a specific ptype.  The
 	 * handler may have to adjust skb->dev and orig_dev.
 	 */
-	null_or_bond = NULL;
+	orig_or_bond = orig_dev;
 	if ((skb->dev->priv_flags & IFF_802_1Q_VLAN) &&
 	    (vlan_dev_real_dev(skb->dev)->priv_flags & IFF_BONDING)) {
-		null_or_bond = vlan_dev_real_dev(skb->dev);
+		orig_or_bond = vlan_dev_real_dev(skb->dev);
 	}
 
 	type = skb->protocol;
@@ -2879,7 +2879,7 @@ ncls:
 			&ptype_base[ntohs(type) & PTYPE_HASH_MASK], list) {
 		if (ptype->type == type && (ptype->dev == null_or_orig ||
 		     ptype->dev == skb->dev || ptype->dev == orig_dev ||
-		     ptype->dev == null_or_bond)) {
+		     ptype->dev == orig_or_bond)) {
 			if (pt_prev)
 				ret = deliver_skb(skb, pt_prev, orig_dev);
 			pt_prev = ptype;
-- 
cgit v1.2.2


From 79640a4ca6955e3ebdb7038508fa7a0cd7fa5527 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Wed, 2 Jun 2010 05:09:29 -0700
Subject: net: add additional lock to qdisc to increase throughput

When many cpus compete for sending frames on a given qdisc, the qdisc
spinlock suffers from very high contention.

The cpu owning __QDISC_STATE_RUNNING bit has same priority to acquire
the lock, and cannot dequeue packets fast enough, since it must wait for
this lock for each dequeued packet.

One solution to this problem is to force all cpus spinning on a second
lock before trying to get the main lock, when/if they see
__QDISC_STATE_RUNNING already set.

The owning cpu then compete with at most one other cpu for the main
lock, allowing for higher dequeueing rate.

Based on a previous patch from Alexander Duyck. I added the heuristic to
avoid the atomic in fast path, and put the new lock far away from the
cache line used by the dequeue worker. Also try to release the busylock
lock as late as possible.

Tests with following script gave a boost from ~50.000 pps to ~600.000
pps on a dual quad core machine (E5450 @3.00GHz), tg3 driver.
(A single netperf flow can reach ~800.000 pps on this platform)

for j in `seq 0 3`; do
  for i in `seq 0 7`; do
    netperf -H 192.168.0.1 -t UDP_STREAM -l 60 -N -T $i -- -m 6 &
  done
done

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Acked-by: Alexander Duyck <alexander.h.duyck@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/dev.c          | 29 +++++++++++++++++++++++++----
 net/sched/sch_generic.c |  1 +
 2 files changed, 26 insertions(+), 4 deletions(-)

(limited to 'net')

diff --git a/net/core/dev.c b/net/core/dev.c
index 2733226d90b2..ffca5c1066fa 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -2040,8 +2040,18 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q,
 				 struct netdev_queue *txq)
 {
 	spinlock_t *root_lock = qdisc_lock(q);
+	bool contended = qdisc_is_running(q);
 	int rc;
 
+	/*
+	 * Heuristic to force contended enqueues to serialize on a
+	 * separate lock before trying to get qdisc main lock.
+	 * This permits __QDISC_STATE_RUNNING owner to get the lock more often
+	 * and dequeue packets faster.
+	 */
+	if (unlikely(contended))
+		spin_lock(&q->busylock);
+
 	spin_lock(root_lock);
 	if (unlikely(test_bit(__QDISC_STATE_DEACTIVATED, &q->state))) {
 		kfree_skb(skb);
@@ -2056,19 +2066,30 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q,
 		if (!(dev->priv_flags & IFF_XMIT_DST_RELEASE))
 			skb_dst_force(skb);
 		__qdisc_update_bstats(q, skb->len);
-		if (sch_direct_xmit(skb, q, dev, txq, root_lock))
+		if (sch_direct_xmit(skb, q, dev, txq, root_lock)) {
+			if (unlikely(contended)) {
+				spin_unlock(&q->busylock);
+				contended = false;
+			}
 			__qdisc_run(q);
-		else
+		} else
 			qdisc_run_end(q);
 
 		rc = NET_XMIT_SUCCESS;
 	} else {
 		skb_dst_force(skb);
 		rc = qdisc_enqueue_root(skb, q);
-		qdisc_run(q);
+		if (qdisc_run_begin(q)) {
+			if (unlikely(contended)) {
+				spin_unlock(&q->busylock);
+				contended = false;
+			}
+			__qdisc_run(q);
+		}
 	}
 	spin_unlock(root_lock);
-
+	if (unlikely(contended))
+		spin_unlock(&q->busylock);
 	return rc;
 }
 
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index 37b86eab6779..d20fcd2a5519 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -561,6 +561,7 @@ struct Qdisc *qdisc_alloc(struct netdev_queue *dev_queue,
 
 	INIT_LIST_HEAD(&sch->list);
 	skb_queue_head_init(&sch->q);
+	spin_lock_init(&sch->busylock);
 	sch->ops = ops;
 	sch->enqueue = ops->enqueue;
 	sch->dequeue = ops->dequeue;
-- 
cgit v1.2.2


From c2d9ba9bce8d7323ca96f239e1f505c14d6244fb Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Tue, 1 Jun 2010 06:51:19 +0000
Subject: net: CONFIG_NET_NS reduction

Use read_pnet() and write_pnet() to reduce number of ifdef CONFIG_NET_NS

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/addrlabel.c              | 6 +-----
 net/netfilter/nf_conntrack_core.c | 8 ++------
 2 files changed, 3 insertions(+), 11 deletions(-)

(limited to 'net')

diff --git a/net/ipv6/addrlabel.c b/net/ipv6/addrlabel.c
index 8c4348cb1950..f0e774cea386 100644
--- a/net/ipv6/addrlabel.c
+++ b/net/ipv6/addrlabel.c
@@ -53,11 +53,7 @@ static struct ip6addrlbl_table
 static inline
 struct net *ip6addrlbl_net(const struct ip6addrlbl_entry *lbl)
 {
-#ifdef CONFIG_NET_NS
-	return lbl->lbl_net;
-#else
-	return &init_net;
-#endif
+	return read_pnet(&lbl->lbl_net);
 }
 
 /*
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index eeeb8bc73982..77288980fae0 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -619,9 +619,7 @@ struct nf_conn *nf_conntrack_alloc(struct net *net, u16 zone,
 	ct->tuplehash[IP_CT_DIR_REPLY].hnnode.pprev = NULL;
 	/* Don't set timer yet: wait for confirmation */
 	setup_timer(&ct->timeout, death_by_timeout, (unsigned long)ct);
-#ifdef CONFIG_NET_NS
-	ct->ct_net = net;
-#endif
+	write_pnet(&ct->ct_net, net);
 #ifdef CONFIG_NF_CONNTRACK_ZONES
 	if (zone) {
 		struct nf_conntrack_zone *nf_ct_zone;
@@ -1363,9 +1361,7 @@ static int nf_conntrack_init_init_net(void)
 		goto err_extend;
 #endif
 	/* Set up fake conntrack: to never be deleted, not in any hashes */
-#ifdef CONFIG_NET_NS
-	nf_conntrack_untracked.ct_net = &init_net;
-#endif
+	write_pnet(&nf_conntrack_untracked.ct_net, &init_net);
 	atomic_set(&nf_conntrack_untracked.ct_general.use, 1);
 	/*  - and look it like as a confirmed connection */
 	set_bit(IPS_CONFIRMED_BIT, &nf_conntrack_untracked.status);
-- 
cgit v1.2.2


From 614f60fa9d73a9e8fdff3df83381907fea7c5649 Mon Sep 17 00:00:00 2001
From: Scott McMillan <scott.a.mcmillan@intel.com>
Date: Wed, 2 Jun 2010 05:53:56 -0700
Subject: packet_mmap: expose hw packet timestamps to network packet capture
 utilities

This patch adds a setting, PACKET_TIMESTAMP, to specify the packet
timestamp source that is exported to capture utilities like tcpdump by
packet_mmap.

PACKET_TIMESTAMP accepts the same integer bit field as
SO_TIMESTAMPING.  However, only the SOF_TIMESTAMPING_SYS_HARDWARE and
SOF_TIMESTAMPING_RAW_HARDWARE values are currently recognized by
PACKET_TIMESTAMP.  SOF_TIMESTAMPING_SYS_HARDWARE takes precedence over
SOF_TIMESTAMPING_RAW_HARDWARE if both bits are set.

If PACKET_TIMESTAMP is not set, a software timestamp generated inside
the networking stack is used (the behavior before this setting was
added).

Signed-off-by: Scott McMillan <scott.a.mcmillan@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/packet/af_packet.c | 37 +++++++++++++++++++++++++++++++++++--
 1 file changed, 35 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index 2078a277e06b..9a17f28b1253 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -83,6 +83,7 @@
 #include <linux/if_vlan.h>
 #include <linux/virtio_net.h>
 #include <linux/errqueue.h>
+#include <linux/net_tstamp.h>
 
 #ifdef CONFIG_INET
 #include <net/inet_common.h>
@@ -202,6 +203,7 @@ struct packet_sock {
 	unsigned int		tp_hdrlen;
 	unsigned int		tp_reserve;
 	unsigned int		tp_loss:1;
+	unsigned int		tp_tstamp;
 	struct packet_type	prot_hook ____cacheline_aligned_in_smp;
 };
 
@@ -656,6 +658,7 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev,
 	struct sk_buff *copy_skb = NULL;
 	struct timeval tv;
 	struct timespec ts;
+	struct skb_shared_hwtstamps *shhwtstamps = skb_hwtstamps(skb);
 
 	if (skb->pkt_type == PACKET_LOOPBACK)
 		goto drop;
@@ -737,7 +740,13 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev,
 		h.h1->tp_snaplen = snaplen;
 		h.h1->tp_mac = macoff;
 		h.h1->tp_net = netoff;
-		if (skb->tstamp.tv64)
+		if ((po->tp_tstamp & SOF_TIMESTAMPING_SYS_HARDWARE)
+				&& shhwtstamps->syststamp.tv64)
+			tv = ktime_to_timeval(shhwtstamps->syststamp);
+		else if ((po->tp_tstamp & SOF_TIMESTAMPING_RAW_HARDWARE)
+				&& shhwtstamps->hwtstamp.tv64)
+			tv = ktime_to_timeval(shhwtstamps->hwtstamp);
+		else if (skb->tstamp.tv64)
 			tv = ktime_to_timeval(skb->tstamp);
 		else
 			do_gettimeofday(&tv);
@@ -750,7 +759,13 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev,
 		h.h2->tp_snaplen = snaplen;
 		h.h2->tp_mac = macoff;
 		h.h2->tp_net = netoff;
-		if (skb->tstamp.tv64)
+		if ((po->tp_tstamp & SOF_TIMESTAMPING_SYS_HARDWARE)
+				&& shhwtstamps->syststamp.tv64)
+			ts = ktime_to_timespec(shhwtstamps->syststamp);
+		else if ((po->tp_tstamp & SOF_TIMESTAMPING_RAW_HARDWARE)
+				&& shhwtstamps->hwtstamp.tv64)
+			ts = ktime_to_timespec(shhwtstamps->hwtstamp);
+		else if (skb->tstamp.tv64)
 			ts = ktime_to_timespec(skb->tstamp);
 		else
 			getnstimeofday(&ts);
@@ -2027,6 +2042,18 @@ packet_setsockopt(struct socket *sock, int level, int optname, char __user *optv
 		po->has_vnet_hdr = !!val;
 		return 0;
 	}
+	case PACKET_TIMESTAMP:
+	{
+		int val;
+
+		if (optlen != sizeof(val))
+			return -EINVAL;
+		if (copy_from_user(&val, optval, sizeof(val)))
+			return -EFAULT;
+
+		po->tp_tstamp = val;
+		return 0;
+	}
 	default:
 		return -ENOPROTOOPT;
 	}
@@ -2119,6 +2146,12 @@ static int packet_getsockopt(struct socket *sock, int level, int optname,
 		val = po->tp_loss;
 		data = &val;
 		break;
+	case PACKET_TIMESTAMP:
+		if (len > sizeof(int))
+			len = sizeof(int);
+		val = po->tp_tstamp;
+		data = &val;
+		break;
 	default:
 		return -ENOPROTOOPT;
 	}
-- 
cgit v1.2.2


From 33c29dde7d04dc0ec0edb649d20ccf1351c13a06 Mon Sep 17 00:00:00 2001
From: Changli Gao <xiaosuo@gmail.com>
Date: Sat, 29 May 2010 14:26:59 +0000
Subject: act_nat: fix the wrong checksum when addr isn't in old_addr/mask

fix the wrong checksum when addr isn't in old_addr/mask

For TCP and UDP packets, when addr isn't in old_addr/mask we don't do SNAT or
DNAT, and we should not update layer 4 checksum.

Signed-off-by: Changli Gao <xiaosuo@gmail.com>
----
 net/sched/act_nat.c |    4 ++++
 1 file changed, 4 insertions(+)
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/act_nat.c | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'net')

diff --git a/net/sched/act_nat.c b/net/sched/act_nat.c
index d885ba311564..570949417f38 100644
--- a/net/sched/act_nat.c
+++ b/net/sched/act_nat.c
@@ -159,6 +159,9 @@ static int tcf_nat(struct sk_buff *skb, struct tc_action *a,
 			iph->daddr = new_addr;
 
 		csum_replace4(&iph->check, addr, new_addr);
+	} else if ((iph->frag_off & htons(IP_OFFSET)) ||
+		   iph->protocol != IPPROTO_ICMP) {
+		goto out;
 	}
 
 	ihl = iph->ihl * 4;
@@ -247,6 +250,7 @@ static int tcf_nat(struct sk_buff *skb, struct tc_action *a,
 		break;
 	}
 
+out:
 	return action;
 
 drop:
-- 
cgit v1.2.2


From 130c0f47fdf9c533deb1136cad5136f6cb7020f0 Mon Sep 17 00:00:00 2001
From: Wu Fengguang <fengguang.wu@intel.com>
Date: Sun, 30 May 2010 17:19:53 +0000
Subject: ipconfig: send host-name in DHCP requests

Normally dhclient can be configured to send the "host-name" option
in DHCP requests to update the client's DNS record. However for an
NFSROOT system, dhclient shall never be called (which may change the
IP addr and therefore lose your root NFS mount connection).

So enable updating the DNS record with kernel parameter

	ip=::::$HOST_NAME::dhcp

Signed-off-by: Wu Fengguang <fengguang.wu@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/ipconfig.c | 7 +++++++
 1 file changed, 7 insertions(+)

(limited to 'net')

diff --git a/net/ipv4/ipconfig.c b/net/ipv4/ipconfig.c
index b9d84e800cf4..3a6e1ec5e9ae 100644
--- a/net/ipv4/ipconfig.c
+++ b/net/ipv4/ipconfig.c
@@ -665,6 +665,13 @@ ic_dhcp_init_options(u8 *options)
 		memcpy(e, ic_req_params, sizeof(ic_req_params));
 		e += sizeof(ic_req_params);
 
+		if (ic_host_name_set) {
+			*e++ = 12;	/* host-name */
+			len = strlen(utsname()->nodename);
+			*e++ = len;
+			memcpy(e, utsname()->nodename, len);
+			e += len;
+		}
 		if (*vendor_class_identifier) {
 			printk(KERN_INFO "DHCP: sending class identifier \"%s\"\n",
 			       vendor_class_identifier);
-- 
cgit v1.2.2


From 20c59de2e6b6bc74bbf714dcd4e720afe8d516cf Mon Sep 17 00:00:00 2001
From: Arnaud Ebalard <arno@natisbad.org>
Date: Tue, 1 Jun 2010 21:35:01 +0000
Subject: ipv6: Refactor update of IPv6 flowi destination address for srcrt
 (RH) option

There are more than a dozen occurrences of following code in the
IPv6 stack:

    if (opt && opt->srcrt) {
            struct rt0_hdr *rt0 = (struct rt0_hdr *) opt->srcrt;
            ipv6_addr_copy(&final, &fl.fl6_dst);
            ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
            final_p = &final;
    }

Replace those with a helper. Note that the helper overrides final_p
in all cases. This is ok as final_p was previously initialized to
NULL when declared.

Signed-off-by: Arnaud Ebalard <arno@natisbad.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/dccp/ipv6.c                  | 30 ++++++------------------------
 net/ipv6/af_inet6.c              |  9 ++-------
 net/ipv6/datagram.c              | 18 ++++--------------
 net/ipv6/exthdrs.c               | 24 ++++++++++++++++++++++++
 net/ipv6/inet6_connection_sock.c |  9 ++-------
 net/ipv6/raw.c                   | 10 ++--------
 net/ipv6/syncookies.c            |  9 ++-------
 net/ipv6/tcp_ipv6.c              | 27 ++++++---------------------
 net/ipv6/udp.c                   | 11 +++--------
 9 files changed, 51 insertions(+), 96 deletions(-)

(limited to 'net')

diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c
index 091698899594..6e3f32575df7 100644
--- a/net/dccp/ipv6.c
+++ b/net/dccp/ipv6.c
@@ -248,7 +248,7 @@ static int dccp_v6_send_response(struct sock *sk, struct request_sock *req,
 	struct ipv6_pinfo *np = inet6_sk(sk);
 	struct sk_buff *skb;
 	struct ipv6_txoptions *opt = NULL;
-	struct in6_addr *final_p = NULL, final;
+	struct in6_addr *final_p, final;
 	struct flowi fl;
 	int err = -1;
 	struct dst_entry *dst;
@@ -265,13 +265,7 @@ static int dccp_v6_send_response(struct sock *sk, struct request_sock *req,
 
 	opt = np->opt;
 
-	if (opt != NULL && opt->srcrt != NULL) {
-		const struct rt0_hdr *rt0 = (struct rt0_hdr *)opt->srcrt;
-
-		ipv6_addr_copy(&final, &fl.fl6_dst);
-		ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
-		final_p = &final;
-	}
+	final_p = fl6_update_dst(&fl, opt, &final);
 
 	err = ip6_dst_lookup(sk, &dst, &fl);
 	if (err)
@@ -545,19 +539,13 @@ static struct sock *dccp_v6_request_recv_sock(struct sock *sk,
 		goto out_overflow;
 
 	if (dst == NULL) {
-		struct in6_addr *final_p = NULL, final;
+		struct in6_addr *final_p, final;
 		struct flowi fl;
 
 		memset(&fl, 0, sizeof(fl));
 		fl.proto = IPPROTO_DCCP;
 		ipv6_addr_copy(&fl.fl6_dst, &ireq6->rmt_addr);
-		if (opt != NULL && opt->srcrt != NULL) {
-			const struct rt0_hdr *rt0 = (struct rt0_hdr *)opt->srcrt;
-
-			ipv6_addr_copy(&final, &fl.fl6_dst);
-			ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
-			final_p = &final;
-		}
+		final_p = fl6_update_dst(&fl, opt, &final);
 		ipv6_addr_copy(&fl.fl6_src, &ireq6->loc_addr);
 		fl.oif = sk->sk_bound_dev_if;
 		fl.fl_ip_dport = inet_rsk(req)->rmt_port;
@@ -885,7 +873,7 @@ static int dccp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
 	struct inet_sock *inet = inet_sk(sk);
 	struct ipv6_pinfo *np = inet6_sk(sk);
 	struct dccp_sock *dp = dccp_sk(sk);
-	struct in6_addr *saddr = NULL, *final_p = NULL, final;
+	struct in6_addr *saddr = NULL, *final_p, final;
 	struct flowi fl;
 	struct dst_entry *dst;
 	int addr_type;
@@ -988,13 +976,7 @@ static int dccp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
 	fl.fl_ip_sport = inet->inet_sport;
 	security_sk_classify_flow(sk, &fl);
 
-	if (np->opt != NULL && np->opt->srcrt != NULL) {
-		const struct rt0_hdr *rt0 = (struct rt0_hdr *)np->opt->srcrt;
-
-		ipv6_addr_copy(&final, &fl.fl6_dst);
-		ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
-		final_p = &final;
-	}
+	final_p = fl6_update_dst(&fl, np->opt, &final);
 
 	err = ip6_dst_lookup(sk, &dst, &fl);
 	if (err)
diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index e733942dafe1..94b1b9c954bf 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -651,7 +651,7 @@ int inet6_sk_rebuild_header(struct sock *sk)
 
 	if (dst == NULL) {
 		struct inet_sock *inet = inet_sk(sk);
-		struct in6_addr *final_p = NULL, final;
+		struct in6_addr *final_p, final;
 		struct flowi fl;
 
 		memset(&fl, 0, sizeof(fl));
@@ -665,12 +665,7 @@ int inet6_sk_rebuild_header(struct sock *sk)
 		fl.fl_ip_sport = inet->inet_sport;
 		security_sk_classify_flow(sk, &fl);
 
-		if (np->opt && np->opt->srcrt) {
-			struct rt0_hdr *rt0 = (struct rt0_hdr *) np->opt->srcrt;
-			ipv6_addr_copy(&final, &fl.fl6_dst);
-			ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
-			final_p = &final;
-		}
+		final_p = fl6_update_dst(&fl, np->opt, &final);
 
 		err = ip6_dst_lookup(sk, &dst, &fl);
 		if (err) {
diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c
index 712684687c9a..7d929a22cbc2 100644
--- a/net/ipv6/datagram.c
+++ b/net/ipv6/datagram.c
@@ -38,10 +38,11 @@ int ip6_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
 	struct sockaddr_in6	*usin = (struct sockaddr_in6 *) uaddr;
 	struct inet_sock      	*inet = inet_sk(sk);
 	struct ipv6_pinfo      	*np = inet6_sk(sk);
-	struct in6_addr		*daddr, *final_p = NULL, final;
+	struct in6_addr		*daddr, *final_p, final;
 	struct dst_entry	*dst;
 	struct flowi		fl;
 	struct ip6_flowlabel	*flowlabel = NULL;
+	struct ipv6_txoptions   *opt;
 	int			addr_type;
 	int			err;
 
@@ -155,19 +156,8 @@ ipv4_connected:
 
 	security_sk_classify_flow(sk, &fl);
 
-	if (flowlabel) {
-		if (flowlabel->opt && flowlabel->opt->srcrt) {
-			struct rt0_hdr *rt0 = (struct rt0_hdr *) flowlabel->opt->srcrt;
-			ipv6_addr_copy(&final, &fl.fl6_dst);
-			ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
-			final_p = &final;
-		}
-	} else if (np->opt && np->opt->srcrt) {
-		struct rt0_hdr *rt0 = (struct rt0_hdr *)np->opt->srcrt;
-		ipv6_addr_copy(&final, &fl.fl6_dst);
-		ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
-		final_p = &final;
-	}
+	opt = flowlabel ? flowlabel->opt : np->opt;
+	final_p = fl6_update_dst(&fl, opt, &final);
 
 	err = ip6_dst_lookup(sk, &dst, &fl);
 	if (err)
diff --git a/net/ipv6/exthdrs.c b/net/ipv6/exthdrs.c
index 8a659f92d17a..853a633a94d4 100644
--- a/net/ipv6/exthdrs.c
+++ b/net/ipv6/exthdrs.c
@@ -874,3 +874,27 @@ struct ipv6_txoptions *ipv6_fixup_options(struct ipv6_txoptions *opt_space,
 	return opt;
 }
 
+/**
+ * fl6_update_dst - update flowi destination address with info given
+ *                  by srcrt option, if any.
+ *
+ * @fl: flowi for which fl6_dst is to be updated
+ * @opt: struct ipv6_txoptions in which to look for srcrt opt
+ * @orig: copy of original fl6_dst address if modified
+ *
+ * Returns NULL if no txoptions or no srcrt, otherwise returns orig
+ * and initial value of fl->fl6_dst set in orig
+ */
+struct in6_addr *fl6_update_dst(struct flowi *fl,
+				const struct ipv6_txoptions *opt,
+				struct in6_addr *orig)
+{
+	if (!opt || !opt->srcrt)
+		return NULL;
+
+	ipv6_addr_copy(orig, &fl->fl6_dst);
+	ipv6_addr_copy(&fl->fl6_dst, ((struct rt0_hdr *)opt->srcrt)->addr);
+	return orig;
+}
+
+EXPORT_SYMBOL_GPL(fl6_update_dst);
diff --git a/net/ipv6/inet6_connection_sock.c b/net/ipv6/inet6_connection_sock.c
index 0c5e3c3b7fd5..8a1628023bd1 100644
--- a/net/ipv6/inet6_connection_sock.c
+++ b/net/ipv6/inet6_connection_sock.c
@@ -185,7 +185,7 @@ int inet6_csk_xmit(struct sk_buff *skb)
 	struct ipv6_pinfo *np = inet6_sk(sk);
 	struct flowi fl;
 	struct dst_entry *dst;
-	struct in6_addr *final_p = NULL, final;
+	struct in6_addr *final_p, final;
 
 	memset(&fl, 0, sizeof(fl));
 	fl.proto = sk->sk_protocol;
@@ -199,12 +199,7 @@ int inet6_csk_xmit(struct sk_buff *skb)
 	fl.fl_ip_dport = inet->inet_dport;
 	security_sk_classify_flow(sk, &fl);
 
-	if (np->opt && np->opt->srcrt) {
-		struct rt0_hdr *rt0 = (struct rt0_hdr *)np->opt->srcrt;
-		ipv6_addr_copy(&final, &fl.fl6_dst);
-		ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
-		final_p = &final;
-	}
+	final_p = fl6_update_dst(&fl, np->opt, &final);
 
 	dst = __inet6_csk_dst_check(sk, np->dst_cookie);
 
diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c
index 4a4dcbe4f8b2..864eb8e03b1b 100644
--- a/net/ipv6/raw.c
+++ b/net/ipv6/raw.c
@@ -725,7 +725,7 @@ static int rawv6_sendmsg(struct kiocb *iocb, struct sock *sk,
 {
 	struct ipv6_txoptions opt_space;
 	struct sockaddr_in6 * sin6 = (struct sockaddr_in6 *) msg->msg_name;
-	struct in6_addr *daddr, *final_p = NULL, final;
+	struct in6_addr *daddr, *final_p, final;
 	struct inet_sock *inet = inet_sk(sk);
 	struct ipv6_pinfo *np = inet6_sk(sk);
 	struct raw6_sock *rp = raw6_sk(sk);
@@ -847,13 +847,7 @@ static int rawv6_sendmsg(struct kiocb *iocb, struct sock *sk,
 	if (ipv6_addr_any(&fl.fl6_src) && !ipv6_addr_any(&np->saddr))
 		ipv6_addr_copy(&fl.fl6_src, &np->saddr);
 
-	/* merge ip6_build_xmit from ip6_output */
-	if (opt && opt->srcrt) {
-		struct rt0_hdr *rt0 = (struct rt0_hdr *) opt->srcrt;
-		ipv6_addr_copy(&final, &fl.fl6_dst);
-		ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
-		final_p = &final;
-	}
+	final_p = fl6_update_dst(&fl, opt, &final);
 
 	if (!fl.oif && ipv6_addr_is_multicast(&fl.fl6_dst))
 		fl.oif = np->mcast_oif;
diff --git a/net/ipv6/syncookies.c b/net/ipv6/syncookies.c
index 34d1f0690d7e..12383705dbad 100644
--- a/net/ipv6/syncookies.c
+++ b/net/ipv6/syncookies.c
@@ -240,17 +240,12 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb)
 	 * me if there is a preferred way.
 	 */
 	{
-		struct in6_addr *final_p = NULL, final;
+		struct in6_addr *final_p, final;
 		struct flowi fl;
 		memset(&fl, 0, sizeof(fl));
 		fl.proto = IPPROTO_TCP;
 		ipv6_addr_copy(&fl.fl6_dst, &ireq6->rmt_addr);
-		if (np->opt && np->opt->srcrt) {
-			struct rt0_hdr *rt0 = (struct rt0_hdr *) np->opt->srcrt;
-			ipv6_addr_copy(&final, &fl.fl6_dst);
-			ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
-			final_p = &final;
-		}
+		final_p = fl6_update_dst(&fl, np->opt, &final);
 		ipv6_addr_copy(&fl.fl6_src, &ireq6->loc_addr);
 		fl.oif = sk->sk_bound_dev_if;
 		fl.mark = sk->sk_mark;
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 2b7c3a100e2c..e487080d02db 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -129,7 +129,7 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
 	struct inet_connection_sock *icsk = inet_csk(sk);
 	struct ipv6_pinfo *np = inet6_sk(sk);
 	struct tcp_sock *tp = tcp_sk(sk);
-	struct in6_addr *saddr = NULL, *final_p = NULL, final;
+	struct in6_addr *saddr = NULL, *final_p, final;
 	struct flowi fl;
 	struct dst_entry *dst;
 	int addr_type;
@@ -250,12 +250,7 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
 	fl.fl_ip_dport = usin->sin6_port;
 	fl.fl_ip_sport = inet->inet_sport;
 
-	if (np->opt && np->opt->srcrt) {
-		struct rt0_hdr *rt0 = (struct rt0_hdr *)np->opt->srcrt;
-		ipv6_addr_copy(&final, &fl.fl6_dst);
-		ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
-		final_p = &final;
-	}
+	final_p = fl6_update_dst(&fl, np->opt, &final);
 
 	security_sk_classify_flow(sk, &fl);
 
@@ -477,7 +472,7 @@ static int tcp_v6_send_synack(struct sock *sk, struct request_sock *req,
 	struct ipv6_pinfo *np = inet6_sk(sk);
 	struct sk_buff * skb;
 	struct ipv6_txoptions *opt = NULL;
-	struct in6_addr * final_p = NULL, final;
+	struct in6_addr * final_p, final;
 	struct flowi fl;
 	struct dst_entry *dst;
 	int err = -1;
@@ -494,12 +489,7 @@ static int tcp_v6_send_synack(struct sock *sk, struct request_sock *req,
 	security_req_classify_flow(req, &fl);
 
 	opt = np->opt;
-	if (opt && opt->srcrt) {
-		struct rt0_hdr *rt0 = (struct rt0_hdr *) opt->srcrt;
-		ipv6_addr_copy(&final, &fl.fl6_dst);
-		ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
-		final_p = &final;
-	}
+	final_p = fl6_update_dst(&fl, opt, &final);
 
 	err = ip6_dst_lookup(sk, &dst, &fl);
 	if (err)
@@ -1392,18 +1382,13 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
 		goto out_overflow;
 
 	if (dst == NULL) {
-		struct in6_addr *final_p = NULL, final;
+		struct in6_addr *final_p, final;
 		struct flowi fl;
 
 		memset(&fl, 0, sizeof(fl));
 		fl.proto = IPPROTO_TCP;
 		ipv6_addr_copy(&fl.fl6_dst, &treq->rmt_addr);
-		if (opt && opt->srcrt) {
-			struct rt0_hdr *rt0 = (struct rt0_hdr *) opt->srcrt;
-			ipv6_addr_copy(&final, &fl.fl6_dst);
-			ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
-			final_p = &final;
-		}
+		final_p = fl6_update_dst(&fl, opt, &final);
 		ipv6_addr_copy(&fl.fl6_src, &treq->loc_addr);
 		fl.oif = sk->sk_bound_dev_if;
 		fl.mark = sk->sk_mark;
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index 3048f906c042..4aea57dec75f 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -929,7 +929,7 @@ int udpv6_sendmsg(struct kiocb *iocb, struct sock *sk,
 	struct inet_sock *inet = inet_sk(sk);
 	struct ipv6_pinfo *np = inet6_sk(sk);
 	struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *) msg->msg_name;
-	struct in6_addr *daddr, *final_p = NULL, final;
+	struct in6_addr *daddr, *final_p, final;
 	struct ipv6_txoptions *opt = NULL;
 	struct ip6_flowlabel *flowlabel = NULL;
 	struct flowi fl;
@@ -1099,14 +1099,9 @@ do_udp_sendmsg:
 		ipv6_addr_copy(&fl.fl6_src, &np->saddr);
 	fl.fl_ip_sport = inet->inet_sport;
 
-	/* merge ip6_build_xmit from ip6_output */
-	if (opt && opt->srcrt) {
-		struct rt0_hdr *rt0 = (struct rt0_hdr *) opt->srcrt;
-		ipv6_addr_copy(&final, &fl.fl6_dst);
-		ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
-		final_p = &final;
+	final_p = fl6_update_dst(&fl, opt, &final);
+	if (final_p)
 		connected = 0;
-	}
 
 	if (!fl.oif && ipv6_addr_is_multicast(&fl.fl6_dst)) {
 		fl.oif = np->mcast_oif;
-- 
cgit v1.2.2


From ab95bfe01f9872459c8678572ccadbf646badad0 Mon Sep 17 00:00:00 2001
From: Jiri Pirko <jpirko@redhat.com>
Date: Tue, 1 Jun 2010 21:52:08 +0000
Subject: net: replace hooks in __netif_receive_skb V5

What this patch does is it removes two receive frame hooks (for bridge and for
macvlan) from __netif_receive_skb. These are replaced them with a single
hook for both. It only supports one hook per device because it makes no
sense to do bridging and macvlan on the same device.

Then a network driver (of virtual netdev like macvlan or bridge) can register
an rx_handler for needed net device.

Signed-off-by: Jiri Pirko <jpirko@redhat.com>
Signed-off-by: Stephen Hemminger <shemminger@vyatta.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/bridge/br.c         |   2 -
 net/bridge/br_if.c      |   8 ++++
 net/bridge/br_input.c   |  12 +++--
 net/bridge/br_private.h |   3 +-
 net/core/dev.c          | 119 ++++++++++++++++++++++--------------------------
 5 files changed, 73 insertions(+), 71 deletions(-)

(limited to 'net')

diff --git a/net/bridge/br.c b/net/bridge/br.c
index 76357b547752..c8436fa31344 100644
--- a/net/bridge/br.c
+++ b/net/bridge/br.c
@@ -63,7 +63,6 @@ static int __init br_init(void)
 		goto err_out4;
 
 	brioctl_set(br_ioctl_deviceless_stub);
-	br_handle_frame_hook = br_handle_frame;
 
 #if defined(CONFIG_ATM_LANE) || defined(CONFIG_ATM_LANE_MODULE)
 	br_fdb_test_addr_hook = br_fdb_test_addr;
@@ -100,7 +99,6 @@ static void __exit br_deinit(void)
 	br_fdb_test_addr_hook = NULL;
 #endif
 
-	br_handle_frame_hook = NULL;
 	br_fdb_fini();
 }
 
diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c
index 18b245e2c00e..d9242342837e 100644
--- a/net/bridge/br_if.c
+++ b/net/bridge/br_if.c
@@ -147,6 +147,7 @@ static void del_nbp(struct net_bridge_port *p)
 
 	list_del_rcu(&p->list);
 
+	netdev_rx_handler_unregister(dev);
 	rcu_assign_pointer(dev->br_port, NULL);
 
 	br_multicast_del_port(p);
@@ -429,6 +430,11 @@ int br_add_if(struct net_bridge *br, struct net_device *dev)
 		goto err2;
 
 	rcu_assign_pointer(dev->br_port, p);
+
+	err = netdev_rx_handler_register(dev, br_handle_frame);
+	if (err)
+		goto err3;
+
 	dev_disable_lro(dev);
 
 	list_add_rcu(&p->list, &br->port_list);
@@ -451,6 +457,8 @@ int br_add_if(struct net_bridge *br, struct net_device *dev)
 	br_netpoll_enable(br, dev);
 
 	return 0;
+err3:
+	rcu_assign_pointer(dev->br_port, NULL);
 err2:
 	br_fdb_delete_by_port(br, p, 1);
 err1:
diff --git a/net/bridge/br_input.c b/net/bridge/br_input.c
index d36e700f7a26..99647d8f95c8 100644
--- a/net/bridge/br_input.c
+++ b/net/bridge/br_input.c
@@ -131,15 +131,19 @@ static inline int is_link_local(const unsigned char *dest)
 }
 
 /*
- * Called via br_handle_frame_hook.
  * Return NULL if skb is handled
- * note: already called with rcu_read_lock (preempt_disabled)
+ * note: already called with rcu_read_lock (preempt_disabled) from
+ * netif_receive_skb
  */
-struct sk_buff *br_handle_frame(struct net_bridge_port *p, struct sk_buff *skb)
+struct sk_buff *br_handle_frame(struct sk_buff *skb)
 {
+	struct net_bridge_port *p;
 	const unsigned char *dest = eth_hdr(skb)->h_dest;
 	int (*rhook)(struct sk_buff *skb);
 
+	if (skb->pkt_type == PACKET_LOOPBACK)
+		return skb;
+
 	if (!is_valid_ether_addr(eth_hdr(skb)->h_source))
 		goto drop;
 
@@ -147,6 +151,8 @@ struct sk_buff *br_handle_frame(struct net_bridge_port *p, struct sk_buff *skb)
 	if (!skb)
 		return NULL;
 
+	p = rcu_dereference(skb->dev->br_port);
+
 	if (unlikely(is_link_local(dest))) {
 		/* Pause frames shouldn't be passed up by driver anyway */
 		if (skb->protocol == htons(ETH_P_PAUSE))
diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h
index 0f4a74bc6a9b..c83519b555bb 100644
--- a/net/bridge/br_private.h
+++ b/net/bridge/br_private.h
@@ -331,8 +331,7 @@ extern void br_features_recompute(struct net_bridge *br);
 
 /* br_input.c */
 extern int br_handle_frame_finish(struct sk_buff *skb);
-extern struct sk_buff *br_handle_frame(struct net_bridge_port *p,
-				       struct sk_buff *skb);
+extern struct sk_buff *br_handle_frame(struct sk_buff *skb);
 
 /* br_ioctl.c */
 extern int br_dev_ioctl(struct net_device *dev, struct ifreq *rq, int cmd);
diff --git a/net/core/dev.c b/net/core/dev.c
index ffca5c1066fa..ec01a5998d70 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -2604,70 +2604,14 @@ static inline int deliver_skb(struct sk_buff *skb,
 	return pt_prev->func(skb, skb->dev, pt_prev, orig_dev);
 }
 
-#if defined(CONFIG_BRIDGE) || defined (CONFIG_BRIDGE_MODULE)
-
-#if defined(CONFIG_ATM_LANE) || defined(CONFIG_ATM_LANE_MODULE)
+#if (defined(CONFIG_BRIDGE) || defined(CONFIG_BRIDGE_MODULE)) && \
+    (defined(CONFIG_ATM_LANE) || defined(CONFIG_ATM_LANE_MODULE))
 /* This hook is defined here for ATM LANE */
 int (*br_fdb_test_addr_hook)(struct net_device *dev,
 			     unsigned char *addr) __read_mostly;
 EXPORT_SYMBOL_GPL(br_fdb_test_addr_hook);
 #endif
 
-/*
- * If bridge module is loaded call bridging hook.
- *  returns NULL if packet was consumed.
- */
-struct sk_buff *(*br_handle_frame_hook)(struct net_bridge_port *p,
-					struct sk_buff *skb) __read_mostly;
-EXPORT_SYMBOL_GPL(br_handle_frame_hook);
-
-static inline struct sk_buff *handle_bridge(struct sk_buff *skb,
-					    struct packet_type **pt_prev, int *ret,
-					    struct net_device *orig_dev)
-{
-	struct net_bridge_port *port;
-
-	if (skb->pkt_type == PACKET_LOOPBACK ||
-	    (port = rcu_dereference(skb->dev->br_port)) == NULL)
-		return skb;
-
-	if (*pt_prev) {
-		*ret = deliver_skb(skb, *pt_prev, orig_dev);
-		*pt_prev = NULL;
-	}
-
-	return br_handle_frame_hook(port, skb);
-}
-#else
-#define handle_bridge(skb, pt_prev, ret, orig_dev)	(skb)
-#endif
-
-#if defined(CONFIG_MACVLAN) || defined(CONFIG_MACVLAN_MODULE)
-struct sk_buff *(*macvlan_handle_frame_hook)(struct macvlan_port *p,
-					     struct sk_buff *skb) __read_mostly;
-EXPORT_SYMBOL_GPL(macvlan_handle_frame_hook);
-
-static inline struct sk_buff *handle_macvlan(struct sk_buff *skb,
-					     struct packet_type **pt_prev,
-					     int *ret,
-					     struct net_device *orig_dev)
-{
-	struct macvlan_port *port;
-
-	port = rcu_dereference(skb->dev->macvlan_port);
-	if (!port)
-		return skb;
-
-	if (*pt_prev) {
-		*ret = deliver_skb(skb, *pt_prev, orig_dev);
-		*pt_prev = NULL;
-	}
-	return macvlan_handle_frame_hook(port, skb);
-}
-#else
-#define handle_macvlan(skb, pt_prev, ret, orig_dev)	(skb)
-#endif
-
 #ifdef CONFIG_NET_CLS_ACT
 /* TODO: Maybe we should just force sch_ingress to be compiled in
  * when CONFIG_NET_CLS_ACT is? otherwise some useless instructions
@@ -2763,6 +2707,47 @@ void netif_nit_deliver(struct sk_buff *skb)
 	rcu_read_unlock();
 }
 
+/**
+ *	netdev_rx_handler_register - register receive handler
+ *	@dev: device to register a handler for
+ *	@rx_handler: receive handler to register
+ *
+ *	Register a receive hander for a device. This handler will then be
+ *	called from __netif_receive_skb. A negative errno code is returned
+ *	on a failure.
+ *
+ *	The caller must hold the rtnl_mutex.
+ */
+int netdev_rx_handler_register(struct net_device *dev,
+			       rx_handler_func_t *rx_handler)
+{
+	ASSERT_RTNL();
+
+	if (dev->rx_handler)
+		return -EBUSY;
+
+	rcu_assign_pointer(dev->rx_handler, rx_handler);
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(netdev_rx_handler_register);
+
+/**
+ *	netdev_rx_handler_unregister - unregister receive handler
+ *	@dev: device to unregister a handler from
+ *
+ *	Unregister a receive hander from a device.
+ *
+ *	The caller must hold the rtnl_mutex.
+ */
+void netdev_rx_handler_unregister(struct net_device *dev)
+{
+
+	ASSERT_RTNL();
+	rcu_assign_pointer(dev->rx_handler, NULL);
+}
+EXPORT_SYMBOL_GPL(netdev_rx_handler_unregister);
+
 static inline void skb_bond_set_mac_by_master(struct sk_buff *skb,
 					      struct net_device *master)
 {
@@ -2815,6 +2800,7 @@ EXPORT_SYMBOL(__skb_bond_should_drop);
 static int __netif_receive_skb(struct sk_buff *skb)
 {
 	struct packet_type *ptype, *pt_prev;
+	rx_handler_func_t *rx_handler;
 	struct net_device *orig_dev;
 	struct net_device *master;
 	struct net_device *null_or_orig;
@@ -2877,12 +2863,17 @@ static int __netif_receive_skb(struct sk_buff *skb)
 ncls:
 #endif
 
-	skb = handle_bridge(skb, &pt_prev, &ret, orig_dev);
-	if (!skb)
-		goto out;
-	skb = handle_macvlan(skb, &pt_prev, &ret, orig_dev);
-	if (!skb)
-		goto out;
+	/* Handle special case of bridge or macvlan */
+	rx_handler = rcu_dereference(skb->dev->rx_handler);
+	if (rx_handler) {
+		if (pt_prev) {
+			ret = deliver_skb(skb, pt_prev, orig_dev);
+			pt_prev = NULL;
+		}
+		skb = rx_handler(skb);
+		if (!skb)
+			goto out;
+	}
 
 	/*
 	 * Make sure frames received on VLAN interfaces stacked on
-- 
cgit v1.2.2


From edafe502404f3669d364b6e96d79b54067b634b4 Mon Sep 17 00:00:00 2001
From: Daniele Lacamera <root@danielinux.net>
Date: Wed, 2 Jun 2010 02:02:04 +0000
Subject: TCP: tcp_hybla: Fix integer overflow in slow start increment

For large values of rtt, 2^rho operation may overflow u32. Clamp down the increment to 2^16.

Signed-off-by: Daniele Lacamera <root@danielinux.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp_hybla.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/tcp_hybla.c b/net/ipv4/tcp_hybla.c
index c209e054a634..377bc9349371 100644
--- a/net/ipv4/tcp_hybla.c
+++ b/net/ipv4/tcp_hybla.c
@@ -126,8 +126,8 @@ static void hybla_cong_avoid(struct sock *sk, u32 ack, u32 in_flight)
 		 * calculate 2^fract in a <<7 value.
 		 */
 		is_slowstart = 1;
-		increment = ((1 << ca->rho) * hybla_fraction(rho_fractions))
-			- 128;
+		increment = ((1 << min(ca->rho, 16U)) *
+			hybla_fraction(rho_fractions)) - 128;
 	} else {
 		/*
 		 * congestion avoidance
-- 
cgit v1.2.2


From fbc2e7d9cf49e0bf89b9e91fd60a06851a855c5d Mon Sep 17 00:00:00 2001
From: Changli Gao <xiaosuo@gmail.com>
Date: Wed, 2 Jun 2010 07:32:42 -0700
Subject: cls_u32: use skb_header_pointer() to dereference data safely

use skb_header_pointer() to dereference data safely

the original skb->data dereference isn't safe, as there isn't any skb->len or
skb_is_nonlinear() check. skb_header_pointer() is used instead in this patch.
And when the skb isn't long enough, we terminate the function u32_classify()
immediately with -1.

Signed-off-by: Changli Gao <xiaosuo@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/cls_u32.c | 49 ++++++++++++++++++++++++++++++++++++-------------
 1 file changed, 36 insertions(+), 13 deletions(-)

(limited to 'net')

diff --git a/net/sched/cls_u32.c b/net/sched/cls_u32.c
index 96275422c619..4f522143811e 100644
--- a/net/sched/cls_u32.c
+++ b/net/sched/cls_u32.c
@@ -98,11 +98,11 @@ static int u32_classify(struct sk_buff *skb, struct tcf_proto *tp, struct tcf_re
 {
 	struct {
 		struct tc_u_knode *knode;
-		u8		  *ptr;
+		unsigned int	  off;
 	} stack[TC_U32_MAXDEPTH];
 
 	struct tc_u_hnode *ht = (struct tc_u_hnode*)tp->root;
-	u8 *ptr = skb_network_header(skb);
+	unsigned int off = skb_network_offset(skb);
 	struct tc_u_knode *n;
 	int sdepth = 0;
 	int off2 = 0;
@@ -134,8 +134,14 @@ next_knode:
 #endif
 
 		for (i = n->sel.nkeys; i>0; i--, key++) {
-
-			if ((*(__be32*)(ptr+key->off+(off2&key->offmask))^key->val)&key->mask) {
+			unsigned int toff;
+			__be32 *data, _data;
+
+			toff = off + key->off + (off2 & key->offmask);
+			data = skb_header_pointer(skb, toff, 4, &_data);
+			if (!data)
+				goto out;
+			if ((*data ^ key->val) & key->mask) {
 				n = n->next;
 				goto next_knode;
 			}
@@ -174,29 +180,45 @@ check_terminal:
 		if (sdepth >= TC_U32_MAXDEPTH)
 			goto deadloop;
 		stack[sdepth].knode = n;
-		stack[sdepth].ptr = ptr;
+		stack[sdepth].off = off;
 		sdepth++;
 
 		ht = n->ht_down;
 		sel = 0;
-		if (ht->divisor)
-			sel = ht->divisor&u32_hash_fold(*(__be32*)(ptr+n->sel.hoff), &n->sel,n->fshift);
-
+		if (ht->divisor) {
+			__be32 *data, _data;
+
+			data = skb_header_pointer(skb, off + n->sel.hoff, 4,
+						  &_data);
+			if (!data)
+				goto out;
+			sel = ht->divisor & u32_hash_fold(*data, &n->sel,
+							  n->fshift);
+		}
 		if (!(n->sel.flags&(TC_U32_VAROFFSET|TC_U32_OFFSET|TC_U32_EAT)))
 			goto next_ht;
 
 		if (n->sel.flags&(TC_U32_OFFSET|TC_U32_VAROFFSET)) {
 			off2 = n->sel.off + 3;
-			if (n->sel.flags&TC_U32_VAROFFSET)
-				off2 += ntohs(n->sel.offmask & *(__be16*)(ptr+n->sel.offoff)) >>n->sel.offshift;
+			if (n->sel.flags & TC_U32_VAROFFSET) {
+				__be16 *data, _data;
+
+				data = skb_header_pointer(skb,
+							  off + n->sel.offoff,
+							  2, &_data);
+				if (!data)
+					goto out;
+				off2 += ntohs(n->sel.offmask & *data) >>
+					n->sel.offshift;
+			}
 			off2 &= ~3;
 		}
 		if (n->sel.flags&TC_U32_EAT) {
-			ptr += off2;
+			off += off2;
 			off2 = 0;
 		}
 
-		if (ptr < skb_tail_pointer(skb))
+		if (off < skb->len)
 			goto next_ht;
 	}
 
@@ -204,9 +226,10 @@ check_terminal:
 	if (sdepth--) {
 		n = stack[sdepth].knode;
 		ht = n->ht_up;
-		ptr = stack[sdepth].ptr;
+		off = stack[sdepth].off;
 		goto check_terminal;
 	}
+out:
 	return -1;
 
 deadloop:
-- 
cgit v1.2.2


From ec8aa669b8393b6789b1954d587c63264af7ff99 Mon Sep 17 00:00:00 2001
From: Felix Fietkau <nbd@openwrt.org>
Date: Thu, 13 May 2010 16:48:03 +0200
Subject: mac80211: add the minstrel_ht rate control algorithm

Signed-off-by: Felix Fietkau <nbd@openwrt.org>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/Kconfig                       |   7 +
 net/mac80211/Makefile                      |   4 +
 net/mac80211/main.c                        |   7 +
 net/mac80211/rate.h                        |  13 +
 net/mac80211/rc80211_minstrel_ht.c         | 824 +++++++++++++++++++++++++++++
 net/mac80211/rc80211_minstrel_ht.h         | 128 +++++
 net/mac80211/rc80211_minstrel_ht_debugfs.c | 120 +++++
 7 files changed, 1103 insertions(+)
 create mode 100644 net/mac80211/rc80211_minstrel_ht.c
 create mode 100644 net/mac80211/rc80211_minstrel_ht.h
 create mode 100644 net/mac80211/rc80211_minstrel_ht_debugfs.c

(limited to 'net')

diff --git a/net/mac80211/Kconfig b/net/mac80211/Kconfig
index 8a91f6c0bb18..83eec7a8bd1f 100644
--- a/net/mac80211/Kconfig
+++ b/net/mac80211/Kconfig
@@ -33,6 +33,13 @@ config MAC80211_RC_MINSTREL
 	---help---
 	  This option enables the 'minstrel' TX rate control algorithm
 
+config MAC80211_RC_MINSTREL_HT
+	bool "Minstrel 802.11n support" if EMBEDDED
+	depends on MAC80211_RC_MINSTREL
+	default y
+	---help---
+	  This option enables the 'minstrel_ht' TX rate control algorithm
+
 choice
 	prompt "Default rate control algorithm"
 	depends on MAC80211_HAS_RC
diff --git a/net/mac80211/Makefile b/net/mac80211/Makefile
index 84b48ba8a77e..fdb54e61d637 100644
--- a/net/mac80211/Makefile
+++ b/net/mac80211/Makefile
@@ -51,7 +51,11 @@ rc80211_pid-$(CONFIG_MAC80211_DEBUGFS) += rc80211_pid_debugfs.o
 rc80211_minstrel-y := rc80211_minstrel.o
 rc80211_minstrel-$(CONFIG_MAC80211_DEBUGFS) += rc80211_minstrel_debugfs.o
 
+rc80211_minstrel_ht-y := rc80211_minstrel_ht.o
+rc80211_minstrel_ht-$(CONFIG_MAC80211_DEBUGFS) += rc80211_minstrel_ht_debugfs.o
+
 mac80211-$(CONFIG_MAC80211_RC_PID) += $(rc80211_pid-y)
 mac80211-$(CONFIG_MAC80211_RC_MINSTREL) += $(rc80211_minstrel-y)
+mac80211-$(CONFIG_MAC80211_RC_MINSTREL_HT) += $(rc80211_minstrel_ht-y)
 
 ccflags-y += -D__CHECK_ENDIAN__
diff --git a/net/mac80211/main.c b/net/mac80211/main.c
index 22a384dfab65..c8548e61f860 100644
--- a/net/mac80211/main.c
+++ b/net/mac80211/main.c
@@ -704,6 +704,10 @@ static int __init ieee80211_init(void)
 	if (ret)
 		return ret;
 
+	ret = rc80211_minstrel_ht_init();
+	if (ret)
+		goto err_minstrel;
+
 	ret = rc80211_pid_init();
 	if (ret)
 		goto err_pid;
@@ -716,6 +720,8 @@ static int __init ieee80211_init(void)
  err_netdev:
 	rc80211_pid_exit();
  err_pid:
+	rc80211_minstrel_ht_exit();
+ err_minstrel:
 	rc80211_minstrel_exit();
 
 	return ret;
@@ -724,6 +730,7 @@ static int __init ieee80211_init(void)
 static void __exit ieee80211_exit(void)
 {
 	rc80211_pid_exit();
+	rc80211_minstrel_ht_exit();
 	rc80211_minstrel_exit();
 
 	/*
diff --git a/net/mac80211/rate.h b/net/mac80211/rate.h
index 065a96190e32..168427b0ffdc 100644
--- a/net/mac80211/rate.h
+++ b/net/mac80211/rate.h
@@ -147,5 +147,18 @@ static inline void rc80211_minstrel_exit(void)
 }
 #endif
 
+#ifdef CONFIG_MAC80211_RC_MINSTREL_HT
+extern int rc80211_minstrel_ht_init(void);
+extern void rc80211_minstrel_ht_exit(void);
+#else
+static inline int rc80211_minstrel_ht_init(void)
+{
+	return 0;
+}
+static inline void rc80211_minstrel_ht_exit(void)
+{
+}
+#endif
+
 
 #endif /* IEEE80211_RATE_H */
diff --git a/net/mac80211/rc80211_minstrel_ht.c b/net/mac80211/rc80211_minstrel_ht.c
new file mode 100644
index 000000000000..c23f08251da4
--- /dev/null
+++ b/net/mac80211/rc80211_minstrel_ht.c
@@ -0,0 +1,824 @@
+/*
+ * Copyright (C) 2010 Felix Fietkau <nbd@openwrt.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <linux/netdevice.h>
+#include <linux/types.h>
+#include <linux/skbuff.h>
+#include <linux/debugfs.h>
+#include <linux/random.h>
+#include <linux/ieee80211.h>
+#include <net/mac80211.h>
+#include "rate.h"
+#include "rc80211_minstrel.h"
+#include "rc80211_minstrel_ht.h"
+
+#define AVG_PKT_SIZE	1200
+#define SAMPLE_COLUMNS	10
+#define EWMA_LEVEL		75
+
+/* Number of bits for an average sized packet */
+#define MCS_NBITS (AVG_PKT_SIZE << 3)
+
+/* Number of symbols for a packet with (bps) bits per symbol */
+#define MCS_NSYMS(bps) ((MCS_NBITS + (bps) - 1) / (bps))
+
+/* Transmission time for a packet containing (syms) symbols */
+#define MCS_SYMBOL_TIME(sgi, syms)					\
+	(sgi ?								\
+	  ((syms) * 18 + 4) / 5 :	/* syms * 3.6 us */		\
+	  (syms) << 2			/* syms * 4 us */		\
+	)
+
+/* Transmit duration for the raw data part of an average sized packet */
+#define MCS_DURATION(streams, sgi, bps) MCS_SYMBOL_TIME(sgi, MCS_NSYMS((streams) * (bps)))
+
+/* MCS rate information for an MCS group */
+#define MCS_GROUP(_streams, _sgi, _ht40) {				\
+	.streams = _streams,						\
+	.flags =							\
+		(_sgi ? IEEE80211_TX_RC_SHORT_GI : 0) |			\
+		(_ht40 ? IEEE80211_TX_RC_40_MHZ_WIDTH : 0),		\
+	.duration = {							\
+		MCS_DURATION(_streams, _sgi, _ht40 ? 54 : 26),		\
+		MCS_DURATION(_streams, _sgi, _ht40 ? 108 : 52),		\
+		MCS_DURATION(_streams, _sgi, _ht40 ? 162 : 78),		\
+		MCS_DURATION(_streams, _sgi, _ht40 ? 216 : 104),	\
+		MCS_DURATION(_streams, _sgi, _ht40 ? 324 : 156),	\
+		MCS_DURATION(_streams, _sgi, _ht40 ? 432 : 208),	\
+		MCS_DURATION(_streams, _sgi, _ht40 ? 486 : 234),	\
+		MCS_DURATION(_streams, _sgi, _ht40 ? 540 : 260)		\
+	}								\
+}
+
+/*
+ * To enable sufficiently targeted rate sampling, MCS rates are divided into
+ * groups, based on the number of streams and flags (HT40, SGI) that they
+ * use.
+ */
+const struct mcs_group minstrel_mcs_groups[] = {
+	MCS_GROUP(1, 0, 0),
+	MCS_GROUP(2, 0, 0),
+#if MINSTREL_MAX_STREAMS >= 3
+	MCS_GROUP(3, 0, 0),
+#endif
+
+	MCS_GROUP(1, 1, 0),
+	MCS_GROUP(2, 1, 0),
+#if MINSTREL_MAX_STREAMS >= 3
+	MCS_GROUP(3, 1, 0),
+#endif
+
+	MCS_GROUP(1, 0, 1),
+	MCS_GROUP(2, 0, 1),
+#if MINSTREL_MAX_STREAMS >= 3
+	MCS_GROUP(3, 0, 1),
+#endif
+
+	MCS_GROUP(1, 1, 1),
+	MCS_GROUP(2, 1, 1),
+#if MINSTREL_MAX_STREAMS >= 3
+	MCS_GROUP(3, 1, 1),
+#endif
+};
+
+static u8 sample_table[SAMPLE_COLUMNS][MCS_GROUP_RATES];
+
+/*
+ * Perform EWMA (Exponentially Weighted Moving Average) calculation
+ */
+static int
+minstrel_ewma(int old, int new, int weight)
+{
+	return (new * (100 - weight) + old * weight) / 100;
+}
+
+/*
+ * Look up an MCS group index based on mac80211 rate information
+ */
+static int
+minstrel_ht_get_group_idx(struct ieee80211_tx_rate *rate)
+{
+	int streams = (rate->idx / MCS_GROUP_RATES) + 1;
+	u32 flags = IEEE80211_TX_RC_SHORT_GI | IEEE80211_TX_RC_40_MHZ_WIDTH;
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(minstrel_mcs_groups); i++) {
+		if (minstrel_mcs_groups[i].streams != streams)
+			continue;
+		if (minstrel_mcs_groups[i].flags != (rate->flags & flags))
+			continue;
+
+		return i;
+	}
+
+	WARN_ON(1);
+	return 0;
+}
+
+static inline struct minstrel_rate_stats *
+minstrel_get_ratestats(struct minstrel_ht_sta *mi, int index)
+{
+	return &mi->groups[index / MCS_GROUP_RATES].rates[index % MCS_GROUP_RATES];
+}
+
+
+/*
+ * Recalculate success probabilities and counters for a rate using EWMA
+ */
+static void
+minstrel_calc_rate_ewma(struct minstrel_priv *mp, struct minstrel_rate_stats *mr)
+{
+	if (unlikely(mr->attempts > 0)) {
+		mr->sample_skipped = 0;
+		mr->cur_prob = MINSTREL_FRAC(mr->success, mr->attempts);
+		if (!mr->att_hist)
+			mr->probability = mr->cur_prob;
+		else
+			mr->probability = minstrel_ewma(mr->probability,
+				mr->cur_prob, EWMA_LEVEL);
+		mr->att_hist += mr->attempts;
+		mr->succ_hist += mr->success;
+	} else {
+		mr->sample_skipped++;
+	}
+	mr->last_success = mr->success;
+	mr->last_attempts = mr->attempts;
+	mr->success = 0;
+	mr->attempts = 0;
+}
+
+/*
+ * Calculate throughput based on the average A-MPDU length, taking into account
+ * the expected number of retransmissions and their expected length
+ */
+static void
+minstrel_ht_calc_tp(struct minstrel_priv *mp, struct minstrel_ht_sta *mi,
+                    int group, int rate)
+{
+	struct minstrel_rate_stats *mr;
+	unsigned int usecs;
+
+	mr = &mi->groups[group].rates[rate];
+
+	if (mr->probability < MINSTREL_FRAC(1, 10)) {
+		mr->cur_tp = 0;
+		return;
+	}
+
+	usecs = mi->overhead / MINSTREL_TRUNC(mi->avg_ampdu_len);
+	usecs += minstrel_mcs_groups[group].duration[rate];
+	mr->cur_tp = MINSTREL_TRUNC((1000000 / usecs) * mr->probability);
+}
+
+/*
+ * Update rate statistics and select new primary rates
+ *
+ * Rules for rate selection:
+ *  - max_prob_rate must use only one stream, as a tradeoff between delivery
+ *    probability and throughput during strong fluctuations
+ *  - as long as the max prob rate has a probability of more than 3/4, pick
+ *    higher throughput rates, even if the probablity is a bit lower
+ */
+static void
+minstrel_ht_update_stats(struct minstrel_priv *mp, struct minstrel_ht_sta *mi)
+{
+	struct minstrel_mcs_group_data *mg;
+	struct minstrel_rate_stats *mr;
+	int cur_prob, cur_prob_tp, cur_tp, cur_tp2;
+	int group, i, index;
+
+	if (mi->ampdu_packets > 0) {
+		mi->avg_ampdu_len = minstrel_ewma(mi->avg_ampdu_len,
+			MINSTREL_FRAC(mi->ampdu_len, mi->ampdu_packets), EWMA_LEVEL);
+		mi->ampdu_len = 0;
+		mi->ampdu_packets = 0;
+	}
+
+	mi->sample_slow = 0;
+	mi->sample_count = 0;
+	mi->max_tp_rate = 0;
+	mi->max_tp_rate2 = 0;
+	mi->max_prob_rate = 0;
+
+	for (group = 0; group < ARRAY_SIZE(minstrel_mcs_groups); group++) {
+		cur_prob = 0;
+		cur_prob_tp = 0;
+		cur_tp = 0;
+		cur_tp2 = 0;
+
+		mg = &mi->groups[group];
+		if (!mg->supported)
+			continue;
+
+		mg->max_tp_rate = 0;
+		mg->max_tp_rate2 = 0;
+		mg->max_prob_rate = 0;
+		mi->sample_count++;
+
+		for (i = 0; i < MCS_GROUP_RATES; i++) {
+			if (!(mg->supported & BIT(i)))
+				continue;
+
+			mr = &mg->rates[i];
+			mr->retry_updated = false;
+			index = MCS_GROUP_RATES * group + i;
+			minstrel_calc_rate_ewma(mp, mr);
+			minstrel_ht_calc_tp(mp, mi, group, i);
+
+			if (!mr->cur_tp)
+				continue;
+
+			/* ignore the lowest rate of each single-stream group */
+			if (!i && minstrel_mcs_groups[group].streams == 1)
+				continue;
+
+			if ((mr->cur_tp > cur_prob_tp && mr->probability >
+			     MINSTREL_FRAC(3, 4)) || mr->probability > cur_prob) {
+				mg->max_prob_rate = index;
+				cur_prob = mr->probability;
+			}
+
+			if (mr->cur_tp > cur_tp) {
+				swap(index, mg->max_tp_rate);
+				cur_tp = mr->cur_tp;
+				mr = minstrel_get_ratestats(mi, index);
+			}
+
+			if (index >= mg->max_tp_rate)
+				continue;
+
+			if (mr->cur_tp > cur_tp2) {
+				mg->max_tp_rate2 = index;
+				cur_tp2 = mr->cur_tp;
+			}
+		}
+	}
+
+	/* try to sample up to half of the availble rates during each interval */
+	mi->sample_count *= 4;
+
+	cur_prob = 0;
+	cur_prob_tp = 0;
+	cur_tp = 0;
+	cur_tp2 = 0;
+	for (group = 0; group < ARRAY_SIZE(minstrel_mcs_groups); group++) {
+		mg = &mi->groups[group];
+		if (!mg->supported)
+			continue;
+
+		mr = minstrel_get_ratestats(mi, mg->max_prob_rate);
+		if (cur_prob_tp < mr->cur_tp &&
+		    minstrel_mcs_groups[group].streams == 1) {
+			mi->max_prob_rate = mg->max_prob_rate;
+			cur_prob = mr->cur_prob;
+		}
+
+		mr = minstrel_get_ratestats(mi, mg->max_tp_rate);
+		if (cur_tp < mr->cur_tp) {
+			mi->max_tp_rate = mg->max_tp_rate;
+			cur_tp = mr->cur_tp;
+		}
+
+		mr = minstrel_get_ratestats(mi, mg->max_tp_rate2);
+		if (cur_tp2 < mr->cur_tp) {
+			mi->max_tp_rate2 = mg->max_tp_rate2;
+			cur_tp2 = mr->cur_tp;
+		}
+	}
+
+	mi->stats_update = jiffies;
+}
+
+static bool
+minstrel_ht_txstat_valid(struct ieee80211_tx_rate *rate)
+{
+	if (!rate->count)
+		return false;
+
+	if (rate->idx < 0)
+		return false;
+
+	return !!(rate->flags & IEEE80211_TX_RC_MCS);
+}
+
+static void
+minstrel_next_sample_idx(struct minstrel_ht_sta *mi)
+{
+	struct minstrel_mcs_group_data *mg;
+
+	for (;;) {
+		mi->sample_group++;
+		mi->sample_group %= ARRAY_SIZE(minstrel_mcs_groups);
+		mg = &mi->groups[mi->sample_group];
+
+		if (!mg->supported)
+			continue;
+
+		if (++mg->index >= MCS_GROUP_RATES) {
+			mg->index = 0;
+			if (++mg->column >= ARRAY_SIZE(sample_table))
+				mg->column = 0;
+		}
+		break;
+	}
+}
+
+static void
+minstrel_downgrade_rate(struct minstrel_ht_sta *mi, int *idx, bool primary)
+{
+	int group, orig_group;
+
+	orig_group = group = *idx / MCS_GROUP_RATES;
+	while (group > 0) {
+		group--;
+
+		if (!mi->groups[group].supported)
+			continue;
+
+		if (minstrel_mcs_groups[group].streams >
+		    minstrel_mcs_groups[orig_group].streams)
+			continue;
+
+		if (primary)
+			*idx = mi->groups[group].max_tp_rate;
+		else
+			*idx = mi->groups[group].max_tp_rate2;
+		break;
+	}
+}
+
+static void
+minstrel_aggr_check(struct minstrel_priv *mp, struct ieee80211_sta *pubsta, struct sk_buff *skb)
+{
+	struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data;
+	struct sta_info *sta = container_of(pubsta, struct sta_info, sta);
+	u16 tid;
+
+	if (unlikely(!ieee80211_is_data_qos(hdr->frame_control)))
+		return;
+
+	if (unlikely(skb->protocol == cpu_to_be16(ETH_P_PAE)))
+		return;
+
+	tid = *ieee80211_get_qos_ctl(hdr) & IEEE80211_QOS_CTL_TID_MASK;
+	if (likely(sta->ampdu_mlme.tid_state_tx[tid] != HT_AGG_STATE_IDLE))
+		return;
+
+	ieee80211_start_tx_ba_session(pubsta, tid);
+}
+
+static void
+minstrel_ht_tx_status(void *priv, struct ieee80211_supported_band *sband,
+                      struct ieee80211_sta *sta, void *priv_sta,
+                      struct sk_buff *skb)
+{
+	struct minstrel_ht_sta_priv *msp = priv_sta;
+	struct minstrel_ht_sta *mi = &msp->ht;
+	struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb);
+	struct ieee80211_tx_rate *ar = info->status.rates;
+	struct minstrel_rate_stats *rate, *rate2;
+	struct minstrel_priv *mp = priv;
+	bool last = false;
+	int group;
+	int i = 0;
+
+	if (!msp->is_ht)
+		return mac80211_minstrel.tx_status(priv, sband, sta, &msp->legacy, skb);
+
+	/* This packet was aggregated but doesn't carry status info */
+	if ((info->flags & IEEE80211_TX_CTL_AMPDU) &&
+	    !(info->flags & IEEE80211_TX_STAT_AMPDU))
+		return;
+
+	if (!info->status.ampdu_len) {
+		info->status.ampdu_ack_len = 1;
+		info->status.ampdu_len = 1;
+	}
+
+	mi->ampdu_packets++;
+	mi->ampdu_len += info->status.ampdu_len;
+
+	if (!mi->sample_wait && !mi->sample_tries && mi->sample_count > 0) {
+		mi->sample_wait = 4 + 2 * MINSTREL_TRUNC(mi->avg_ampdu_len);
+		mi->sample_tries = 3;
+		mi->sample_count--;
+	}
+
+	if (info->flags & IEEE80211_TX_CTL_RATE_CTRL_PROBE) {
+		mi->sample_packets += info->status.ampdu_len;
+		minstrel_next_sample_idx(mi);
+	}
+
+	for (i = 0; !last; i++) {
+		last = (i == IEEE80211_TX_MAX_RATES - 1) ||
+		       !minstrel_ht_txstat_valid(&ar[i + 1]);
+
+		if (!minstrel_ht_txstat_valid(&ar[i]))
+			break;
+
+		group = minstrel_ht_get_group_idx(&ar[i]);
+		rate = &mi->groups[group].rates[ar[i].idx % 8];
+
+		if (last && (info->flags & IEEE80211_TX_STAT_ACK))
+			rate->success += info->status.ampdu_ack_len;
+
+		rate->attempts += ar[i].count * info->status.ampdu_len;
+	}
+
+	/*
+	 * check for sudden death of spatial multiplexing,
+	 * downgrade to a lower number of streams if necessary.
+	 */
+	rate = minstrel_get_ratestats(mi, mi->max_tp_rate);
+	if (rate->attempts > 30 &&
+	    MINSTREL_FRAC(rate->success, rate->attempts) <
+	    MINSTREL_FRAC(20, 100))
+		minstrel_downgrade_rate(mi, &mi->max_tp_rate, true);
+
+	rate2 = minstrel_get_ratestats(mi, mi->max_tp_rate2);
+	if (rate->attempts > 30 &&
+	    MINSTREL_FRAC(rate->success, rate->attempts) <
+	    MINSTREL_FRAC(20, 100))
+		minstrel_downgrade_rate(mi, &mi->max_tp_rate2, false);
+
+	if (time_after(jiffies, mi->stats_update + (mp->update_interval / 2 * HZ) / 1000)) {
+		minstrel_ht_update_stats(mp, mi);
+		minstrel_aggr_check(mp, sta, skb);
+	}
+}
+
+static void
+minstrel_calc_retransmit(struct minstrel_priv *mp, struct minstrel_ht_sta *mi,
+                         int index)
+{
+	struct minstrel_rate_stats *mr;
+	const struct mcs_group *group;
+	unsigned int tx_time, tx_time_rtscts, tx_time_data;
+	unsigned int cw = mp->cw_min;
+	unsigned int t_slot = 9; /* FIXME */
+	unsigned int ampdu_len = MINSTREL_TRUNC(mi->avg_ampdu_len);
+
+	mr = minstrel_get_ratestats(mi, index);
+	if (mr->probability < MINSTREL_FRAC(1, 10)) {
+		mr->retry_count = 1;
+		mr->retry_count_rtscts = 1;
+		return;
+	}
+
+	mr->retry_count = 2;
+	mr->retry_count_rtscts = 2;
+	mr->retry_updated = true;
+
+	group = &minstrel_mcs_groups[index / MCS_GROUP_RATES];
+	tx_time_data = group->duration[index % MCS_GROUP_RATES] * ampdu_len;
+	tx_time = 2 * (t_slot + mi->overhead + tx_time_data);
+	tx_time_rtscts = 2 * (t_slot + mi->overhead_rtscts + tx_time_data);
+	do {
+		cw = (cw << 1) | 1;
+		cw = min(cw, mp->cw_max);
+		tx_time += cw + t_slot + mi->overhead;
+		tx_time_rtscts += cw + t_slot + mi->overhead_rtscts;
+		if (tx_time_rtscts < mp->segment_size)
+			mr->retry_count_rtscts++;
+	} while ((tx_time < mp->segment_size) &&
+	         (++mr->retry_count < mp->max_retry));
+}
+
+
+static void
+minstrel_ht_set_rate(struct minstrel_priv *mp, struct minstrel_ht_sta *mi,
+                     struct ieee80211_tx_rate *rate, int index,
+                     struct ieee80211_tx_rate_control *txrc,
+                     bool sample, bool rtscts)
+{
+	const struct mcs_group *group = &minstrel_mcs_groups[index / MCS_GROUP_RATES];
+	struct minstrel_rate_stats *mr;
+
+	mr = minstrel_get_ratestats(mi, index);
+	if (!mr->retry_updated)
+		minstrel_calc_retransmit(mp, mi, index);
+
+	if (mr->probability < MINSTREL_FRAC(20, 100))
+		rate->count = 2;
+	else if (rtscts)
+		rate->count = mr->retry_count_rtscts;
+	else
+		rate->count = mr->retry_count;
+
+	rate->flags = IEEE80211_TX_RC_MCS | group->flags;
+	if (txrc->short_preamble)
+		rate->flags |= IEEE80211_TX_RC_USE_SHORT_PREAMBLE;
+	if (txrc->rts || rtscts)
+		rate->flags |= IEEE80211_TX_RC_USE_RTS_CTS;
+	rate->idx = index % MCS_GROUP_RATES + (group->streams - 1) * MCS_GROUP_RATES;
+}
+
+static inline int
+minstrel_get_duration(int index)
+{
+	const struct mcs_group *group = &minstrel_mcs_groups[index / MCS_GROUP_RATES];
+	return group->duration[index % MCS_GROUP_RATES];
+}
+
+static int
+minstrel_get_sample_rate(struct minstrel_priv *mp, struct minstrel_ht_sta *mi)
+{
+	struct minstrel_rate_stats *mr;
+	struct minstrel_mcs_group_data *mg;
+	int sample_idx = 0;
+
+	if (mi->sample_wait > 0) {
+		mi->sample_wait--;
+		return -1;
+	}
+
+	if (!mi->sample_tries)
+		return -1;
+
+	mi->sample_tries--;
+	mg = &mi->groups[mi->sample_group];
+	sample_idx = sample_table[mg->column][mg->index];
+	mr = &mg->rates[sample_idx];
+	sample_idx += mi->sample_group * MCS_GROUP_RATES;
+
+	/*
+	 * When not using MRR, do not sample if the probability is already
+	 * higher than 95% to avoid wasting airtime
+	 */
+	if (!mp->has_mrr && (mr->probability > MINSTREL_FRAC(95, 100)))
+		goto next;
+
+	/*
+	 * Make sure that lower rates get sampled only occasionally,
+	 * if the link is working perfectly.
+	 */
+	if (minstrel_get_duration(sample_idx) >
+	    minstrel_get_duration(mi->max_tp_rate)) {
+		if (mr->sample_skipped < 10)
+			goto next;
+
+		if (mi->sample_slow++ > 2)
+			goto next;
+	}
+
+	return sample_idx;
+
+next:
+	minstrel_next_sample_idx(mi);
+	return -1;
+}
+
+static void
+minstrel_ht_get_rate(void *priv, struct ieee80211_sta *sta, void *priv_sta,
+                     struct ieee80211_tx_rate_control *txrc)
+{
+	struct ieee80211_tx_info *info = IEEE80211_SKB_CB(txrc->skb);
+	struct ieee80211_tx_rate *ar = info->status.rates;
+	struct minstrel_ht_sta_priv *msp = priv_sta;
+	struct minstrel_ht_sta *mi = &msp->ht;
+	struct minstrel_priv *mp = priv;
+	int sample_idx;
+
+	if (rate_control_send_low(sta, priv_sta, txrc))
+		return;
+
+	if (!msp->is_ht)
+		return mac80211_minstrel.get_rate(priv, sta, &msp->legacy, txrc);
+
+	info->flags |= mi->tx_flags;
+	sample_idx = minstrel_get_sample_rate(mp, mi);
+	if (sample_idx >= 0) {
+		minstrel_ht_set_rate(mp, mi, &ar[0], sample_idx,
+			txrc, true, false);
+		minstrel_ht_set_rate(mp, mi, &ar[1], mi->max_tp_rate,
+			txrc, false, true);
+		info->flags |= IEEE80211_TX_CTL_RATE_CTRL_PROBE;
+	} else {
+		minstrel_ht_set_rate(mp, mi, &ar[0], mi->max_tp_rate,
+			txrc, false, false);
+		minstrel_ht_set_rate(mp, mi, &ar[1], mi->max_tp_rate2,
+			txrc, false, true);
+	}
+	minstrel_ht_set_rate(mp, mi, &ar[2], mi->max_prob_rate, txrc, false, true);
+
+	ar[3].count = 0;
+	ar[3].idx = -1;
+
+	mi->total_packets++;
+
+	/* wraparound */
+	if (mi->total_packets == ~0) {
+		mi->total_packets = 0;
+		mi->sample_packets = 0;
+	}
+}
+
+static void
+minstrel_ht_update_caps(void *priv, struct ieee80211_supported_band *sband,
+                        struct ieee80211_sta *sta, void *priv_sta,
+			enum nl80211_channel_type oper_chan_type)
+{
+	struct minstrel_priv *mp = priv;
+	struct minstrel_ht_sta_priv *msp = priv_sta;
+	struct minstrel_ht_sta *mi = &msp->ht;
+	struct ieee80211_mcs_info *mcs = &sta->ht_cap.mcs;
+	struct ieee80211_local *local = hw_to_local(mp->hw);
+	u16 sta_cap = sta->ht_cap.cap;
+	int ack_dur;
+	int stbc;
+	int i;
+
+	/* fall back to the old minstrel for legacy stations */
+	if (sta && !sta->ht_cap.ht_supported) {
+		msp->is_ht = false;
+		memset(&msp->legacy, 0, sizeof(msp->legacy));
+		msp->legacy.r = msp->ratelist;
+		msp->legacy.sample_table = msp->sample_table;
+		return mac80211_minstrel.rate_init(priv, sband, sta, &msp->legacy);
+	}
+
+	BUILD_BUG_ON(ARRAY_SIZE(minstrel_mcs_groups) !=
+		MINSTREL_MAX_STREAMS * MINSTREL_STREAM_GROUPS);
+
+	msp->is_ht = true;
+	memset(mi, 0, sizeof(*mi));
+	mi->stats_update = jiffies;
+
+	ack_dur = ieee80211_frame_duration(local, 10, 60, 1, 1);
+	mi->overhead = ieee80211_frame_duration(local, 0, 60, 1, 1) + ack_dur;
+	mi->overhead_rtscts = mi->overhead + 2 * ack_dur;
+
+	mi->avg_ampdu_len = MINSTREL_FRAC(1, 1);
+
+	/* When using MRR, sample more on the first attempt, without delay */
+	if (mp->has_mrr) {
+		mi->sample_count = 16;
+		mi->sample_wait = 0;
+	} else {
+		mi->sample_count = 8;
+		mi->sample_wait = 8;
+	}
+	mi->sample_tries = 4;
+
+	stbc = (sta_cap & IEEE80211_HT_CAP_RX_STBC) >>
+		IEEE80211_HT_CAP_RX_STBC_SHIFT;
+	mi->tx_flags |= stbc << IEEE80211_TX_CTL_STBC_SHIFT;
+
+	if (sta_cap & IEEE80211_HT_CAP_LDPC_CODING)
+		mi->tx_flags |= IEEE80211_TX_CTL_LDPC;
+
+	if (oper_chan_type != NL80211_CHAN_HT40MINUS &&
+	    oper_chan_type != NL80211_CHAN_HT40PLUS)
+		sta_cap &= ~IEEE80211_HT_CAP_SUP_WIDTH_20_40;
+
+	for (i = 0; i < ARRAY_SIZE(mi->groups); i++) {
+		u16 req = 0;
+
+		mi->groups[i].supported = 0;
+		if (minstrel_mcs_groups[i].flags & IEEE80211_TX_RC_SHORT_GI) {
+			if (minstrel_mcs_groups[i].flags & IEEE80211_TX_RC_40_MHZ_WIDTH)
+				req |= IEEE80211_HT_CAP_SGI_40;
+			else
+				req |= IEEE80211_HT_CAP_SGI_20;
+		}
+
+		if (minstrel_mcs_groups[i].flags & IEEE80211_TX_RC_40_MHZ_WIDTH)
+			req |= IEEE80211_HT_CAP_SUP_WIDTH_20_40;
+
+		if ((sta_cap & req) != req)
+			continue;
+
+		mi->groups[i].supported =
+			mcs->rx_mask[minstrel_mcs_groups[i].streams - 1];
+	}
+}
+
+static void
+minstrel_ht_rate_init(void *priv, struct ieee80211_supported_band *sband,
+                      struct ieee80211_sta *sta, void *priv_sta)
+{
+	struct minstrel_priv *mp = priv;
+
+	minstrel_ht_update_caps(priv, sband, sta, priv_sta, mp->hw->conf.channel_type);
+}
+
+static void
+minstrel_ht_rate_update(void *priv, struct ieee80211_supported_band *sband,
+                        struct ieee80211_sta *sta, void *priv_sta,
+                        u32 changed, enum nl80211_channel_type oper_chan_type)
+{
+	minstrel_ht_update_caps(priv, sband, sta, priv_sta, oper_chan_type);
+}
+
+static void *
+minstrel_ht_alloc_sta(void *priv, struct ieee80211_sta *sta, gfp_t gfp)
+{
+	struct ieee80211_supported_band *sband;
+	struct minstrel_ht_sta_priv *msp;
+	struct minstrel_priv *mp = priv;
+	struct ieee80211_hw *hw = mp->hw;
+	int max_rates = 0;
+	int i;
+
+	for (i = 0; i < IEEE80211_NUM_BANDS; i++) {
+		sband = hw->wiphy->bands[i];
+		if (sband && sband->n_bitrates > max_rates)
+			max_rates = sband->n_bitrates;
+	}
+
+	msp = kzalloc(sizeof(struct minstrel_ht_sta), gfp);
+	if (!msp)
+		return NULL;
+
+	msp->ratelist = kzalloc(sizeof(struct minstrel_rate) * max_rates, gfp);
+	if (!msp->ratelist)
+		goto error;
+
+	msp->sample_table = kmalloc(SAMPLE_COLUMNS * max_rates, gfp);
+	if (!msp->sample_table)
+		goto error1;
+
+	return msp;
+
+error1:
+	kfree(msp->sample_table);
+error:
+	kfree(msp);
+	return NULL;
+}
+
+static void
+minstrel_ht_free_sta(void *priv, struct ieee80211_sta *sta, void *priv_sta)
+{
+	struct minstrel_ht_sta_priv *msp = priv_sta;
+
+	kfree(msp->sample_table);
+	kfree(msp->ratelist);
+	kfree(msp);
+}
+
+static void *
+minstrel_ht_alloc(struct ieee80211_hw *hw, struct dentry *debugfsdir)
+{
+	return mac80211_minstrel.alloc(hw, debugfsdir);
+}
+
+static void
+minstrel_ht_free(void *priv)
+{
+	mac80211_minstrel.free(priv);
+}
+
+static struct rate_control_ops mac80211_minstrel_ht = {
+	.name = "minstrel_ht",
+	.tx_status = minstrel_ht_tx_status,
+	.get_rate = minstrel_ht_get_rate,
+	.rate_init = minstrel_ht_rate_init,
+	.rate_update = minstrel_ht_rate_update,
+	.alloc_sta = minstrel_ht_alloc_sta,
+	.free_sta = minstrel_ht_free_sta,
+	.alloc = minstrel_ht_alloc,
+	.free = minstrel_ht_free,
+#ifdef CONFIG_MAC80211_DEBUGFS
+	.add_sta_debugfs = minstrel_ht_add_sta_debugfs,
+	.remove_sta_debugfs = minstrel_ht_remove_sta_debugfs,
+#endif
+};
+
+
+static void
+init_sample_table(void)
+{
+	int col, i, new_idx;
+	u8 rnd[MCS_GROUP_RATES];
+
+	memset(sample_table, 0xff, sizeof(sample_table));
+	for (col = 0; col < SAMPLE_COLUMNS; col++) {
+		for (i = 0; i < MCS_GROUP_RATES; i++) {
+			get_random_bytes(rnd, sizeof(rnd));
+			new_idx = (i + rnd[i]) % MCS_GROUP_RATES;
+
+			while (sample_table[col][new_idx] != 0xff)
+				new_idx = (new_idx + 1) % MCS_GROUP_RATES;
+
+			sample_table[col][new_idx] = i;
+		}
+	}
+}
+
+int __init
+rc80211_minstrel_ht_init(void)
+{
+	init_sample_table();
+	return ieee80211_rate_control_register(&mac80211_minstrel_ht);
+}
+
+void
+rc80211_minstrel_ht_exit(void)
+{
+	ieee80211_rate_control_unregister(&mac80211_minstrel_ht);
+}
diff --git a/net/mac80211/rc80211_minstrel_ht.h b/net/mac80211/rc80211_minstrel_ht.h
new file mode 100644
index 000000000000..696c0fc6e0b7
--- /dev/null
+++ b/net/mac80211/rc80211_minstrel_ht.h
@@ -0,0 +1,128 @@
+/*
+ * Copyright (C) 2010 Felix Fietkau <nbd@openwrt.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#ifndef __RC_MINSTREL_HT_H
+#define __RC_MINSTREL_HT_H
+
+/*
+ * The number of streams can be changed to 2 to reduce code
+ * size and memory footprint.
+ */
+#define MINSTREL_MAX_STREAMS	3
+#define MINSTREL_STREAM_GROUPS	4
+
+/* scaled fraction values */
+#define MINSTREL_SCALE	16
+#define MINSTREL_FRAC(val, div) (((val) << MINSTREL_SCALE) / div)
+#define MINSTREL_TRUNC(val) ((val) >> MINSTREL_SCALE)
+
+#define MCS_GROUP_RATES	8
+
+struct mcs_group {
+	u32 flags;
+	unsigned int streams;
+	unsigned int duration[MCS_GROUP_RATES];
+};
+
+struct minstrel_rate_stats {
+	/* current / last sampling period attempts/success counters */
+	unsigned int attempts, last_attempts;
+	unsigned int success, last_success;
+
+	/* total attempts/success counters */
+	u64 att_hist, succ_hist;
+
+	/* current throughput */
+	unsigned int cur_tp;
+
+	/* packet delivery probabilities */
+	unsigned int cur_prob, probability;
+
+	/* maximum retry counts */
+	unsigned int retry_count;
+	unsigned int retry_count_rtscts;
+
+	bool retry_updated;
+	u8 sample_skipped;
+};
+
+struct minstrel_mcs_group_data {
+	u8 index;
+	u8 column;
+
+	/* bitfield of supported MCS rates of this group */
+	u8 supported;
+
+	/* selected primary rates */
+	unsigned int max_tp_rate;
+	unsigned int max_tp_rate2;
+	unsigned int max_prob_rate;
+
+	/* MCS rate statistics */
+	struct minstrel_rate_stats rates[MCS_GROUP_RATES];
+};
+
+struct minstrel_ht_sta {
+	/* ampdu length (average, per sampling interval) */
+	unsigned int ampdu_len;
+	unsigned int ampdu_packets;
+
+	/* ampdu length (EWMA) */
+	unsigned int avg_ampdu_len;
+
+	/* best throughput rate */
+	unsigned int max_tp_rate;
+
+	/* second best throughput rate */
+	unsigned int max_tp_rate2;
+
+	/* best probability rate */
+	unsigned int max_prob_rate;
+
+	/* time of last status update */
+	unsigned long stats_update;
+
+	/* overhead time in usec for each frame */
+	unsigned int overhead;
+	unsigned int overhead_rtscts;
+
+	unsigned int total_packets;
+	unsigned int sample_packets;
+
+	/* tx flags to add for frames for this sta */
+	u32 tx_flags;
+
+	u8 sample_wait;
+	u8 sample_tries;
+	u8 sample_count;
+	u8 sample_slow;
+
+	/* current MCS group to be sampled */
+	u8 sample_group;
+
+	/* MCS rate group info and statistics */
+	struct minstrel_mcs_group_data groups[MINSTREL_MAX_STREAMS * MINSTREL_STREAM_GROUPS];
+};
+
+struct minstrel_ht_sta_priv {
+	union {
+		struct minstrel_ht_sta ht;
+		struct minstrel_sta_info legacy;
+	};
+#ifdef CONFIG_MAC80211_DEBUGFS
+	struct dentry *dbg_stats;
+#endif
+	void *ratelist;
+	void *sample_table;
+	bool is_ht;
+};
+
+void minstrel_ht_add_sta_debugfs(void *priv, void *priv_sta, struct dentry *dir);
+void minstrel_ht_remove_sta_debugfs(void *priv, void *priv_sta);
+
+#endif
diff --git a/net/mac80211/rc80211_minstrel_ht_debugfs.c b/net/mac80211/rc80211_minstrel_ht_debugfs.c
new file mode 100644
index 000000000000..4fb3ccbd8b40
--- /dev/null
+++ b/net/mac80211/rc80211_minstrel_ht_debugfs.c
@@ -0,0 +1,120 @@
+/*
+ * Copyright (C) 2010 Felix Fietkau <nbd@openwrt.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <linux/netdevice.h>
+#include <linux/types.h>
+#include <linux/skbuff.h>
+#include <linux/debugfs.h>
+#include <linux/ieee80211.h>
+#include <net/mac80211.h>
+#include "rc80211_minstrel.h"
+#include "rc80211_minstrel_ht.h"
+
+extern const struct mcs_group minstrel_mcs_groups[];
+
+static int
+minstrel_ht_stats_open(struct inode *inode, struct file *file)
+{
+	struct minstrel_ht_sta_priv *msp = inode->i_private;
+	struct minstrel_ht_sta *mi = &msp->ht;
+	struct minstrel_debugfs_info *ms;
+	unsigned int i, j, tp, prob, eprob;
+	char *p;
+	int ret;
+
+	if (!msp->is_ht) {
+		inode->i_private = &msp->legacy;
+		ret = minstrel_stats_open(inode, file);
+		inode->i_private = msp;
+		return ret;
+	}
+
+	ms = kmalloc(sizeof(*ms) + 8192, GFP_KERNEL);
+	if (!ms)
+		return -ENOMEM;
+
+	file->private_data = ms;
+	p = ms->buf;
+	p += sprintf(p, "type      rate     throughput  ewma prob   this prob  "
+			"this succ/attempt   success    attempts\n");
+	for (i = 0; i < MINSTREL_MAX_STREAMS * MINSTREL_STREAM_GROUPS; i++) {
+		char htmode = '2';
+		char gimode = 'L';
+
+		if (!mi->groups[i].supported)
+			continue;
+
+		if (minstrel_mcs_groups[i].flags & IEEE80211_TX_RC_40_MHZ_WIDTH)
+			htmode = '4';
+		if (minstrel_mcs_groups[i].flags & IEEE80211_TX_RC_SHORT_GI)
+			gimode = 'S';
+
+		for (j = 0; j < MCS_GROUP_RATES; j++) {
+			struct minstrel_rate_stats *mr = &mi->groups[i].rates[j];
+			int idx = i * MCS_GROUP_RATES + j;
+
+			if (!(mi->groups[i].supported & BIT(j)))
+				continue;
+
+			p += sprintf(p, "HT%c0/%cGI ", htmode, gimode);
+
+			*(p++) = (idx == mi->max_tp_rate) ? 'T' : ' ';
+			*(p++) = (idx == mi->max_tp_rate2) ? 't' : ' ';
+			*(p++) = (idx == mi->max_prob_rate) ? 'P' : ' ';
+			p += sprintf(p, "MCS%-2u", (minstrel_mcs_groups[i].streams - 1) *
+					MCS_GROUP_RATES + j);
+
+			tp = mr->cur_tp / 10;
+			prob = MINSTREL_TRUNC(mr->cur_prob * 1000);
+			eprob = MINSTREL_TRUNC(mr->probability * 1000);
+
+			p += sprintf(p, "  %6u.%1u   %6u.%1u   %6u.%1u        "
+					"%3u(%3u)   %8llu    %8llu\n",
+					tp / 10, tp % 10,
+					eprob / 10, eprob % 10,
+					prob / 10, prob % 10,
+					mr->last_success,
+					mr->last_attempts,
+					(unsigned long long)mr->succ_hist,
+					(unsigned long long)mr->att_hist);
+		}
+	}
+	p += sprintf(p, "\nTotal packet count::    ideal %d      "
+			"lookaround %d\n",
+			max(0, (int) mi->total_packets - (int) mi->sample_packets),
+			mi->sample_packets);
+	p += sprintf(p, "Average A-MPDU length: %d.%d\n",
+		MINSTREL_TRUNC(mi->avg_ampdu_len),
+		MINSTREL_TRUNC(mi->avg_ampdu_len * 10) % 10);
+	ms->len = p - ms->buf;
+
+	return 0;
+}
+
+static const struct file_operations minstrel_ht_stat_fops = {
+	.owner = THIS_MODULE,
+	.open = minstrel_ht_stats_open,
+	.read = minstrel_stats_read,
+	.release = minstrel_stats_release,
+};
+
+void
+minstrel_ht_add_sta_debugfs(void *priv, void *priv_sta, struct dentry *dir)
+{
+	struct minstrel_ht_sta_priv *msp = priv_sta;
+
+	msp->dbg_stats = debugfs_create_file("rc_stats", S_IRUGO, dir, msp,
+			&minstrel_ht_stat_fops);
+}
+
+void
+minstrel_ht_remove_sta_debugfs(void *priv, void *priv_sta)
+{
+	struct minstrel_ht_sta_priv *msp = priv_sta;
+
+	debugfs_remove(msp->dbg_stats);
+}
-- 
cgit v1.2.2


From aed8e1f9910b216ab3e14cb286c431c870f9b78f Mon Sep 17 00:00:00 2001
From: Helmut Schaa <helmut.schaa@googlemail.com>
Date: Mon, 17 May 2010 17:30:59 +0200
Subject: cfg80211: don't refuse HT20 channels on devices that don't support
 HT40

Don't refuse HT20 channels on devices that don't support HT40.

Signed-off-by: Helmut Schaa <helmut.schaa@googlemail.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/wireless/chan.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/wireless/chan.c b/net/wireless/chan.c
index b01a6f6397d7..d0c92dddb26b 100644
--- a/net/wireless/chan.c
+++ b/net/wireless/chan.c
@@ -35,8 +35,9 @@ rdev_freq_to_chan(struct cfg80211_registered_device *rdev,
 		if (!ht_cap->ht_supported)
 			return NULL;
 
-		if (!(ht_cap->cap & IEEE80211_HT_CAP_SUP_WIDTH_20_40) ||
-		    ht_cap->cap & IEEE80211_HT_CAP_40MHZ_INTOLERANT)
+		if (channel_type != NL80211_CHAN_HT20 &&
+		    (!(ht_cap->cap & IEEE80211_HT_CAP_SUP_WIDTH_20_40) ||
+		    ht_cap->cap & IEEE80211_HT_CAP_40MHZ_INTOLERANT))
 			return NULL;
 	}
 
-- 
cgit v1.2.2


From 77c2061d10a408d0220c2b0e7faefe52d9c41008 Mon Sep 17 00:00:00 2001
From: Walter Goldens <goldenstranger@yahoo.com>
Date: Tue, 18 May 2010 04:44:54 -0700
Subject: wireless: fix several minor description typos

Signed-off-by: Walter Goldens <goldenstranger@yahoo.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/mlme.c   | 2 +-
 net/mac80211/status.c | 2 +-
 net/mac80211/work.c   | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index 0839c4e8fd2e..31e3386b8d43 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -1763,7 +1763,7 @@ static void ieee80211_sta_work(struct work_struct *work)
 
 	/*
 	 * ieee80211_queue_work() should have picked up most cases,
-	 * here we'll pick the the rest.
+	 * here we'll pick the rest.
 	 */
 	if (WARN(local->suspended, "STA MLME work scheduled while "
 		 "going to suspend\n"))
diff --git a/net/mac80211/status.c b/net/mac80211/status.c
index 94613af009f3..34da67995d94 100644
--- a/net/mac80211/status.c
+++ b/net/mac80211/status.c
@@ -47,7 +47,7 @@ static void ieee80211_handle_filtered_frame(struct ieee80211_local *local,
 	/*
 	 * This skb 'survived' a round-trip through the driver, and
 	 * hopefully the driver didn't mangle it too badly. However,
-	 * we can definitely not rely on the the control information
+	 * we can definitely not rely on the control information
 	 * being correct. Clear it so we don't get junk there, and
 	 * indicate that it needs new processing, but must not be
 	 * modified/encrypted again.
diff --git a/net/mac80211/work.c b/net/mac80211/work.c
index be3d4a698692..4157717ed786 100644
--- a/net/mac80211/work.c
+++ b/net/mac80211/work.c
@@ -840,7 +840,7 @@ static void ieee80211_work_work(struct work_struct *work)
 
 	/*
 	 * ieee80211_queue_work() should have picked up most cases,
-	 * here we'll pick the the rest.
+	 * here we'll pick the rest.
 	 */
 	if (WARN(local->suspended, "work scheduled while going to suspend\n"))
 		return;
-- 
cgit v1.2.2


From 252aa631f88080920a7083ac5a5844ffc5463629 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes@sipsolutions.net>
Date: Wed, 19 May 2010 12:17:12 +0200
Subject: cfg80211: make action channel type optional

When sending action frames, we want to verify
that we do that on the correct channel. However,
checking the channel type in addition can get in
the way, since the channel type could change on
the fly during an association, and it's not
useful to have the channel type anyway since it
has no effect on the transmission. Therefore,
make it optional to specify so that if wanted,
it can still be checked, but is not required.

Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/cfg.c         | 4 +++-
 net/mac80211/ieee80211_i.h | 1 +
 net/mac80211/mlme.c        | 9 ++++++---
 net/wireless/core.h        | 1 +
 net/wireless/mlme.c        | 3 ++-
 net/wireless/nl80211.c     | 3 +++
 6 files changed, 16 insertions(+), 5 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c
index c7000a6ca379..f8c49c5ad8aa 100644
--- a/net/mac80211/cfg.c
+++ b/net/mac80211/cfg.c
@@ -1554,10 +1554,12 @@ static int ieee80211_cancel_remain_on_channel(struct wiphy *wiphy,
 static int ieee80211_action(struct wiphy *wiphy, struct net_device *dev,
 			    struct ieee80211_channel *chan,
 			    enum nl80211_channel_type channel_type,
+			    bool channel_type_valid,
 			    const u8 *buf, size_t len, u64 *cookie)
 {
 	return ieee80211_mgd_action(IEEE80211_DEV_TO_SUB_IF(dev), chan,
-				    channel_type, buf, len, cookie);
+				    channel_type, channel_type_valid,
+				    buf, len, cookie);
 }
 
 struct cfg80211_ops mac80211_config_ops = {
diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index 1a9e2da37a93..d4677efd3a36 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -988,6 +988,7 @@ int ieee80211_mgd_disassoc(struct ieee80211_sub_if_data *sdata,
 int ieee80211_mgd_action(struct ieee80211_sub_if_data *sdata,
 			 struct ieee80211_channel *chan,
 			 enum nl80211_channel_type channel_type,
+			 bool channel_type_valid,
 			 const u8 *buf, size_t len, u64 *cookie);
 ieee80211_rx_result ieee80211_sta_rx_mgmt(struct ieee80211_sub_if_data *sdata,
 					  struct sk_buff *skb);
diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index 31e3386b8d43..29c3a75a7ad0 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -2308,6 +2308,7 @@ int ieee80211_mgd_disassoc(struct ieee80211_sub_if_data *sdata,
 int ieee80211_mgd_action(struct ieee80211_sub_if_data *sdata,
 			 struct ieee80211_channel *chan,
 			 enum nl80211_channel_type channel_type,
+			 bool channel_type_valid,
 			 const u8 *buf, size_t len, u64 *cookie)
 {
 	struct ieee80211_local *local = sdata->local;
@@ -2315,9 +2316,11 @@ int ieee80211_mgd_action(struct ieee80211_sub_if_data *sdata,
 	struct sk_buff *skb;
 
 	/* Check that we are on the requested channel for transmission */
-	if ((chan != local->tmp_channel ||
-	     channel_type != local->tmp_channel_type) &&
-	    (chan != local->oper_channel ||
+	if (chan != local->tmp_channel &&
+	    chan != local->oper_channel)
+		return -EBUSY;
+	if (channel_type_valid &&
+	    (channel_type != local->tmp_channel_type &&
 	     channel_type != local->_oper_channel_type))
 		return -EBUSY;
 
diff --git a/net/wireless/core.h b/net/wireless/core.h
index ae930acf75e9..63d57ae399c3 100644
--- a/net/wireless/core.h
+++ b/net/wireless/core.h
@@ -339,6 +339,7 @@ int cfg80211_mlme_action(struct cfg80211_registered_device *rdev,
 			 struct net_device *dev,
 			 struct ieee80211_channel *chan,
 			 enum nl80211_channel_type channel_type,
+			 bool channel_type_valid,
 			 const u8 *buf, size_t len, u64 *cookie);
 
 /* SME */
diff --git a/net/wireless/mlme.c b/net/wireless/mlme.c
index 48ead6f0426d..f69ae19f497f 100644
--- a/net/wireless/mlme.c
+++ b/net/wireless/mlme.c
@@ -827,6 +827,7 @@ int cfg80211_mlme_action(struct cfg80211_registered_device *rdev,
 			 struct net_device *dev,
 			 struct ieee80211_channel *chan,
 			 enum nl80211_channel_type channel_type,
+			 bool channel_type_valid,
 			 const u8 *buf, size_t len, u64 *cookie)
 {
 	struct wireless_dev *wdev = dev->ieee80211_ptr;
@@ -855,7 +856,7 @@ int cfg80211_mlme_action(struct cfg80211_registered_device *rdev,
 
 	/* Transmit the Action frame as requested by user space */
 	return rdev->ops->action(&rdev->wiphy, dev, chan, channel_type,
-				 buf, len, cookie);
+				 channel_type_valid, buf, len, cookie);
 }
 
 bool cfg80211_rx_action(struct net_device *dev, int freq, const u8 *buf,
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index db71150b8040..90ab3c8519be 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -4681,6 +4681,7 @@ static int nl80211_action(struct sk_buff *skb, struct genl_info *info)
 	struct net_device *dev;
 	struct ieee80211_channel *chan;
 	enum nl80211_channel_type channel_type = NL80211_CHAN_NO_HT;
+	bool channel_type_valid = false;
 	u32 freq;
 	int err;
 	void *hdr;
@@ -4722,6 +4723,7 @@ static int nl80211_action(struct sk_buff *skb, struct genl_info *info)
 			err = -EINVAL;
 			goto out;
 		}
+		channel_type_valid = true;
 	}
 
 	freq = nla_get_u32(info->attrs[NL80211_ATTR_WIPHY_FREQ]);
@@ -4745,6 +4747,7 @@ static int nl80211_action(struct sk_buff *skb, struct genl_info *info)
 		goto free_msg;
 	}
 	err = cfg80211_mlme_action(rdev, dev, chan, channel_type,
+				   channel_type_valid,
 				   nla_data(info->attrs[NL80211_ATTR_FRAME]),
 				   nla_len(info->attrs[NL80211_ATTR_FRAME]),
 				   &cookie);
-- 
cgit v1.2.2


From b5f7e7554753e2cc3ef3bef0271fdb32027df2ba Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Wed, 2 Jun 2010 12:05:27 +0000
Subject: ipv4: add LINUX_MIB_IPRPFILTER snmp counter

Christoph Lameter mentioned that packets could be dropped in input path
because of rp_filter settings, without any SNMP counter being
incremented. System administrator can have a hard time to track the
problem.

This patch introduces a new counter, LINUX_MIB_IPRPFILTER, incremented
each time we drop a packet because Reverse Path Filter triggers.

(We receive an IPv4 datagram on a given interface, and find the route to
send an answer would use another interface)

netstat -s | grep IPReversePathFilter
    IPReversePathFilter: 21714

Reported-by: Christoph Lameter <cl@linux-foundation.org>
Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/fib_frontend.c |  6 ++++--
 net/ipv4/ip_input.c     |  3 +++
 net/ipv4/proc.c         |  1 +
 net/ipv4/route.c        | 31 ++++++++++++++++++-------------
 4 files changed, 26 insertions(+), 15 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index 4f0ed458c883..e830f7a123bd 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -284,7 +284,7 @@ int fib_validate_source(__be32 src, __be32 dst, u8 tos, int oif,
 	if (no_addr)
 		goto last_resort;
 	if (rpf == 1)
-		goto e_inval;
+		goto e_rpf;
 	fl.oif = dev->ifindex;
 
 	ret = 0;
@@ -299,7 +299,7 @@ int fib_validate_source(__be32 src, __be32 dst, u8 tos, int oif,
 
 last_resort:
 	if (rpf)
-		goto e_inval;
+		goto e_rpf;
 	*spec_dst = inet_select_addr(dev, 0, RT_SCOPE_UNIVERSE);
 	*itag = 0;
 	return 0;
@@ -308,6 +308,8 @@ e_inval_res:
 	fib_res_put(&res);
 e_inval:
 	return -EINVAL;
+e_rpf:
+	return -EXDEV;
 }
 
 static inline __be32 sk_extract_addr(struct sockaddr *addr)
diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c
index d930dc5e4d85..d52c9da644cf 100644
--- a/net/ipv4/ip_input.c
+++ b/net/ipv4/ip_input.c
@@ -340,6 +340,9 @@ static int ip_rcv_finish(struct sk_buff *skb)
 			else if (err == -ENETUNREACH)
 				IP_INC_STATS_BH(dev_net(skb->dev),
 						IPSTATS_MIB_INNOROUTES);
+			else if (err == -EXDEV)
+				NET_INC_STATS_BH(dev_net(skb->dev),
+						 LINUX_MIB_IPRPFILTER);
 			goto drop;
 		}
 	}
diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c
index 3dc9914c1dce..e320ca6b3ef3 100644
--- a/net/ipv4/proc.c
+++ b/net/ipv4/proc.c
@@ -252,6 +252,7 @@ static const struct snmp_mib snmp4_net_list[] = {
 	SNMP_MIB_ITEM("TCPBacklogDrop", LINUX_MIB_TCPBACKLOGDROP),
 	SNMP_MIB_ITEM("TCPMinTTLDrop", LINUX_MIB_TCPMINTTLDROP),
 	SNMP_MIB_ITEM("TCPDeferAcceptDrop", LINUX_MIB_TCPDEFERACCEPTDROP),
+	SNMP_MIB_ITEM("IPReversePathFilter", LINUX_MIB_IPRPFILTER),
 	SNMP_MIB_SENTINEL
 };
 
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 8495bceec764..d377b45005fc 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -1851,6 +1851,7 @@ static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr,
 	__be32 spec_dst;
 	struct in_device *in_dev = in_dev_get(dev);
 	u32 itag = 0;
+	int err;
 
 	/* Primary sanity checks. */
 
@@ -1865,10 +1866,12 @@ static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr,
 		if (!ipv4_is_local_multicast(daddr))
 			goto e_inval;
 		spec_dst = inet_select_addr(dev, 0, RT_SCOPE_LINK);
-	} else if (fib_validate_source(saddr, 0, tos, 0,
-					dev, &spec_dst, &itag, 0) < 0)
-		goto e_inval;
-
+	} else {
+		err = fib_validate_source(saddr, 0, tos, 0, dev, &spec_dst,
+					  &itag, 0);
+		if (err < 0)
+			goto e_err;
+	}
 	rth = dst_alloc(&ipv4_dst_ops);
 	if (!rth)
 		goto e_nobufs;
@@ -1920,8 +1923,10 @@ e_nobufs:
 	return -ENOBUFS;
 
 e_inval:
+	err = -EINVAL;
+e_err:
 	in_dev_put(in_dev);
-	return -EINVAL;
+	return err;
 }
 
 
@@ -1985,7 +1990,6 @@ static int __mkroute_input(struct sk_buff *skb,
 		ip_handle_martian_source(in_dev->dev, in_dev, skb, daddr,
 					 saddr);
 
-		err = -EINVAL;
 		goto cleanup;
 	}
 
@@ -2157,13 +2161,12 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr,
 		goto brd_input;
 
 	if (res.type == RTN_LOCAL) {
-		int result;
-		result = fib_validate_source(saddr, daddr, tos,
+		err = fib_validate_source(saddr, daddr, tos,
 					     net->loopback_dev->ifindex,
 					     dev, &spec_dst, &itag, skb->mark);
-		if (result < 0)
-			goto martian_source;
-		if (result)
+		if (err < 0)
+			goto martian_source_keep_err;
+		if (err)
 			flags |= RTCF_DIRECTSRC;
 		spec_dst = daddr;
 		goto local_input;
@@ -2191,7 +2194,7 @@ brd_input:
 		err = fib_validate_source(saddr, 0, tos, 0, dev, &spec_dst,
 					  &itag, skb->mark);
 		if (err < 0)
-			goto martian_source;
+			goto martian_source_keep_err;
 		if (err)
 			flags |= RTCF_DIRECTSRC;
 	}
@@ -2272,8 +2275,10 @@ e_nobufs:
 	goto done;
 
 martian_source:
+	err = -EINVAL;
+martian_source_keep_err:
 	ip_handle_martian_source(dev, in_dev, skb, daddr, saddr);
-	goto e_inval;
+	goto done;
 }
 
 int ip_route_input_common(struct sk_buff *skb, __be32 daddr, __be32 saddr,
-- 
cgit v1.2.2


From 96d362202bfc0e5da78ee59b1645296fbca515f4 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Wed, 2 Jun 2010 19:21:31 +0000
Subject: ipv4: RCU conversion of ip_route_input_slow/ip_route_input_mc

Avoid two atomic ops on struct in_device refcount per incoming packet,
if slow path taken, (or route cache disabled)

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/route.c | 35 +++++++++++++++++------------------
 1 file changed, 17 insertions(+), 18 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index d377b45005fc..1cfe0d199e7c 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -1843,13 +1843,14 @@ static void rt_set_nexthop(struct rtable *rt, struct fib_result *res, u32 itag)
 	rt->rt_type = res->type;
 }
 
+/* called in rcu_read_lock() section */
 static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr,
 				u8 tos, struct net_device *dev, int our)
 {
-	unsigned hash;
+	unsigned int hash;
 	struct rtable *rth;
 	__be32 spec_dst;
-	struct in_device *in_dev = in_dev_get(dev);
+	struct in_device *in_dev = __in_dev_get_rcu(dev);
 	u32 itag = 0;
 	int err;
 
@@ -1914,18 +1915,14 @@ static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr,
 #endif
 	RT_CACHE_STAT_INC(in_slow_mc);
 
-	in_dev_put(in_dev);
 	hash = rt_hash(daddr, saddr, dev->ifindex, rt_genid(dev_net(dev)));
 	return rt_intern_hash(hash, rth, NULL, skb, dev->ifindex);
 
 e_nobufs:
-	in_dev_put(in_dev);
 	return -ENOBUFS;
-
 e_inval:
-	err = -EINVAL;
+	return -EINVAL;
 e_err:
-	in_dev_put(in_dev);
 	return err;
 }
 
@@ -2101,7 +2098,7 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr,
 			       u8 tos, struct net_device *dev)
 {
 	struct fib_result res;
-	struct in_device *in_dev = in_dev_get(dev);
+	struct in_device *in_dev = __in_dev_get_rcu(dev);
 	struct flowi fl = { .nl_u = { .ip4_u =
 				      { .daddr = daddr,
 					.saddr = saddr,
@@ -2179,7 +2176,6 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr,
 
 	err = ip_mkroute_input(skb, &res, &fl, in_dev, daddr, saddr, tos);
 done:
-	in_dev_put(in_dev);
 	if (free_res)
 		fib_res_put(&res);
 out:	return err;
@@ -2288,16 +2284,18 @@ int ip_route_input_common(struct sk_buff *skb, __be32 daddr, __be32 saddr,
 	unsigned	hash;
 	int iif = dev->ifindex;
 	struct net *net;
+	int res;
 
 	net = dev_net(dev);
 
+	rcu_read_lock();
+
 	if (!rt_caching(net))
 		goto skip_cache;
 
 	tos &= IPTOS_RT_MASK;
 	hash = rt_hash(daddr, saddr, iif, rt_genid(net));
 
-	rcu_read_lock();
 	for (rth = rcu_dereference(rt_hash_table[hash].chain); rth;
 	     rth = rcu_dereference(rth->u.dst.rt_next)) {
 		if ((((__force u32)rth->fl.fl4_dst ^ (__force u32)daddr) |
@@ -2321,7 +2319,6 @@ int ip_route_input_common(struct sk_buff *skb, __be32 daddr, __be32 saddr,
 		}
 		RT_CACHE_STAT_INC(in_hlist_search);
 	}
-	rcu_read_unlock();
 
 skip_cache:
 	/* Multicast recognition logic is moved from route cache to here.
@@ -2336,12 +2333,11 @@ skip_cache:
 	   route cache entry is created eventually.
 	 */
 	if (ipv4_is_multicast(daddr)) {
-		struct in_device *in_dev;
+		struct in_device *in_dev = __in_dev_get_rcu(dev);
 
-		rcu_read_lock();
-		if ((in_dev = __in_dev_get_rcu(dev)) != NULL) {
+		if (in_dev) {
 			int our = ip_check_mc(in_dev, daddr, saddr,
-				ip_hdr(skb)->protocol);
+					      ip_hdr(skb)->protocol);
 			if (our
 #ifdef CONFIG_IP_MROUTE
 				||
@@ -2349,15 +2345,18 @@ skip_cache:
 			     IN_DEV_MFORWARD(in_dev))
 #endif
 			   ) {
+				int res = ip_route_input_mc(skb, daddr, saddr,
+							    tos, dev, our);
 				rcu_read_unlock();
-				return ip_route_input_mc(skb, daddr, saddr,
-							 tos, dev, our);
+				return res;
 			}
 		}
 		rcu_read_unlock();
 		return -EINVAL;
 	}
-	return ip_route_input_slow(skb, daddr, saddr, tos, dev);
+	res = ip_route_input_slow(skb, daddr, saddr, tos, dev);
+	rcu_read_unlock();
+	return res;
 }
 EXPORT_SYMBOL(ip_route_input_common);
 
-- 
cgit v1.2.2


From bc10502dba37d3b210efd9f3867212298f13b78e Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Thu, 3 Jun 2010 03:21:52 -0700
Subject: net: use __packed annotation

cleanup patch.

Use new __packed annotation in net/ and include/
(except netfilter)

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/bluetooth/bnep/bnep.h  |  8 ++++----
 net/compat.c               |  6 +++---
 net/iucv/iucv.c            | 14 +++++++-------
 net/mac80211/cfg.c         |  2 +-
 net/mac80211/ieee80211_i.h |  2 +-
 net/mac80211/rx.c          |  2 +-
 net/sctp/sm_make_chunk.c   |  2 +-
 7 files changed, 18 insertions(+), 18 deletions(-)

(limited to 'net')

diff --git a/net/bluetooth/bnep/bnep.h b/net/bluetooth/bnep/bnep.h
index 0d9e506f5d5a..70672544db86 100644
--- a/net/bluetooth/bnep/bnep.h
+++ b/net/bluetooth/bnep/bnep.h
@@ -86,26 +86,26 @@ struct bnep_setup_conn_req {
 	__u8  ctrl;
 	__u8  uuid_size;
 	__u8  service[0];
-} __attribute__((packed));
+} __packed;
 
 struct bnep_set_filter_req {
 	__u8  type;
 	__u8  ctrl;
 	__be16 len;
 	__u8  list[0];
-} __attribute__((packed));
+} __packed;
 
 struct bnep_control_rsp {
 	__u8  type;
 	__u8  ctrl;
 	__be16 resp;
-} __attribute__((packed));
+} __packed;
 
 struct bnep_ext_hdr {
 	__u8  type;
 	__u8  len;
 	__u8  data[0];
-} __attribute__((packed));
+} __packed;
 
 /* BNEP ioctl defines */
 #define BNEPCONNADD	_IOW('B', 200, int)
diff --git a/net/compat.c b/net/compat.c
index ec24d9edb025..1cf75905f132 100644
--- a/net/compat.c
+++ b/net/compat.c
@@ -531,7 +531,7 @@ struct compat_group_req {
 	__u32				 gr_interface;
 	struct __kernel_sockaddr_storage gr_group
 		__attribute__ ((aligned(4)));
-} __attribute__ ((packed));
+} __packed;
 
 struct compat_group_source_req {
 	__u32				 gsr_interface;
@@ -539,7 +539,7 @@ struct compat_group_source_req {
 		__attribute__ ((aligned(4)));
 	struct __kernel_sockaddr_storage gsr_source
 		__attribute__ ((aligned(4)));
-} __attribute__ ((packed));
+} __packed;
 
 struct compat_group_filter {
 	__u32				 gf_interface;
@@ -549,7 +549,7 @@ struct compat_group_filter {
 	__u32				 gf_numsrc;
 	struct __kernel_sockaddr_storage gf_slist[1]
 		__attribute__ ((aligned(4)));
-} __attribute__ ((packed));
+} __packed;
 
 #define __COMPAT_GF0_SIZE (sizeof(struct compat_group_filter) - \
 			sizeof(struct __kernel_sockaddr_storage))
diff --git a/net/iucv/iucv.c b/net/iucv/iucv.c
index f28ad2cc8428..499c045d6910 100644
--- a/net/iucv/iucv.c
+++ b/net/iucv/iucv.c
@@ -1463,7 +1463,7 @@ struct iucv_path_pending {
 	u32 res3;
 	u8  ippollfg;
 	u8  res4[3];
-} __attribute__ ((packed));
+} __packed;
 
 static void iucv_path_pending(struct iucv_irq_data *data)
 {
@@ -1524,7 +1524,7 @@ struct iucv_path_complete {
 	u32 res3;
 	u8  ippollfg;
 	u8  res4[3];
-} __attribute__ ((packed));
+} __packed;
 
 static void iucv_path_complete(struct iucv_irq_data *data)
 {
@@ -1554,7 +1554,7 @@ struct iucv_path_severed {
 	u32 res4;
 	u8  ippollfg;
 	u8  res5[3];
-} __attribute__ ((packed));
+} __packed;
 
 static void iucv_path_severed(struct iucv_irq_data *data)
 {
@@ -1590,7 +1590,7 @@ struct iucv_path_quiesced {
 	u32 res4;
 	u8  ippollfg;
 	u8  res5[3];
-} __attribute__ ((packed));
+} __packed;
 
 static void iucv_path_quiesced(struct iucv_irq_data *data)
 {
@@ -1618,7 +1618,7 @@ struct iucv_path_resumed {
 	u32 res4;
 	u8  ippollfg;
 	u8  res5[3];
-} __attribute__ ((packed));
+} __packed;
 
 static void iucv_path_resumed(struct iucv_irq_data *data)
 {
@@ -1649,7 +1649,7 @@ struct iucv_message_complete {
 	u32 ipbfln2f;
 	u8  ippollfg;
 	u8  res2[3];
-} __attribute__ ((packed));
+} __packed;
 
 static void iucv_message_complete(struct iucv_irq_data *data)
 {
@@ -1694,7 +1694,7 @@ struct iucv_message_pending {
 	u32 ipbfln2f;
 	u8  ippollfg;
 	u8  res2[3];
-} __attribute__ ((packed));
+} __packed;
 
 static void iucv_message_pending(struct iucv_irq_data *data)
 {
diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c
index c7000a6ca379..a2ed0f7b5568 100644
--- a/net/mac80211/cfg.c
+++ b/net/mac80211/cfg.c
@@ -600,7 +600,7 @@ struct iapp_layer2_update {
 	u8 ssap;		/* 0 */
 	u8 control;
 	u8 xid_info[3];
-} __attribute__ ((packed));
+} __packed;
 
 static void ieee80211_send_layer2_update(struct sta_info *sta)
 {
diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index 1a9e2da37a93..ec3e5c3e27bd 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -1084,7 +1084,7 @@ struct ieee80211_tx_status_rtap_hdr {
 	u8 padding_for_rate;
 	__le16 tx_flags;
 	u8 data_retries;
-} __attribute__ ((packed));
+} __packed;
 
 
 /* HT */
diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c
index 6e2a7bcd8cb8..2d9a2ee94e17 100644
--- a/net/mac80211/rx.c
+++ b/net/mac80211/rx.c
@@ -2139,7 +2139,7 @@ static void ieee80211_rx_cooked_monitor(struct ieee80211_rx_data *rx,
 		u8 rate_or_pad;
 		__le16 chan_freq;
 		__le16 chan_flags;
-	} __attribute__ ((packed)) *rthdr;
+	} __packed *rthdr;
 	struct sk_buff *skb = rx->skb, *skb2;
 	struct net_device *prev_dev = NULL;
 	struct ieee80211_rx_status *status = IEEE80211_SKB_RXCB(skb);
diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c
index bd2a50b482ac..246f92924658 100644
--- a/net/sctp/sm_make_chunk.c
+++ b/net/sctp/sm_make_chunk.c
@@ -1817,7 +1817,7 @@ malformed:
 struct __sctp_missing {
 	__be32 num_missing;
 	__be16 type;
-}  __attribute__((packed));
+}  __packed;
 
 /*
  * Report a missing mandatory parameter.
-- 
cgit v1.2.2


From db2c24175d149b55784f7cb2c303622ce962c1ae Mon Sep 17 00:00:00 2001
From: Changli Gao <xiaosuo@gmail.com>
Date: Wed, 2 Jun 2010 04:55:02 +0000
Subject: act_pedit: access skb->data safely

access skb->data safely

we should use skb_header_pointer() and skb_store_bits() to access skb->data to
handle small or non-linear skbs.

Signed-off-by: Changli Gao <xiaosuo@gmail.com>
----
 net/sched/act_pedit.c |   24 ++++++++++++++----------
 1 file changed, 14 insertions(+), 10 deletions(-)
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/act_pedit.c | 24 ++++++++++++++----------
 1 file changed, 14 insertions(+), 10 deletions(-)

(limited to 'net')

diff --git a/net/sched/act_pedit.c b/net/sched/act_pedit.c
index fdbd0b7bd840..50e3d945e1f4 100644
--- a/net/sched/act_pedit.c
+++ b/net/sched/act_pedit.c
@@ -125,7 +125,7 @@ static int tcf_pedit(struct sk_buff *skb, struct tc_action *a,
 {
 	struct tcf_pedit *p = a->priv;
 	int i, munged = 0;
-	u8 *pptr;
+	unsigned int off;
 
 	if (!(skb->tc_verd & TC_OK2MUNGE)) {
 		/* should we set skb->cloned? */
@@ -134,7 +134,7 @@ static int tcf_pedit(struct sk_buff *skb, struct tc_action *a,
 		}
 	}
 
-	pptr = skb_network_header(skb);
+	off = skb_network_offset(skb);
 
 	spin_lock(&p->tcf_lock);
 
@@ -144,17 +144,17 @@ static int tcf_pedit(struct sk_buff *skb, struct tc_action *a,
 		struct tc_pedit_key *tkey = p->tcfp_keys;
 
 		for (i = p->tcfp_nkeys; i > 0; i--, tkey++) {
-			u32 *ptr;
+			u32 *ptr, _data;
 			int offset = tkey->off;
 
 			if (tkey->offmask) {
-				if (skb->len > tkey->at) {
-					 char *j = pptr + tkey->at;
-					 offset += ((*j & tkey->offmask) >>
-						   tkey->shift);
-				} else {
+				char *d, _d;
+
+				d = skb_header_pointer(skb, off + tkey->at, 1,
+						       &_d);
+				if (!d)
 					goto bad;
-				}
+				offset += (*d & tkey->offmask) >> tkey->shift;
 			}
 
 			if (offset % 4) {
@@ -169,9 +169,13 @@ static int tcf_pedit(struct sk_buff *skb, struct tc_action *a,
 				goto bad;
 			}
 
-			ptr = (u32 *)(pptr+offset);
+			ptr = skb_header_pointer(skb, off + offset, 4, &_data);
+			if (!ptr)
+				goto bad;
 			/* just do it, baby */
 			*ptr = ((*ptr & tkey->mask) ^ tkey->val);
+			if (ptr == &_data)
+				skb_store_bits(skb, off + offset, ptr, 4);
 			munged++;
 		}
 
-- 
cgit v1.2.2


From 3a4d4aa2d38e5305b5e93dffdc9dd2f975129328 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes@sipsolutions.net>
Date: Wed, 26 May 2010 16:41:40 +0200
Subject: mac80211: remove bogus mod_timer() call

The IBSS code has a bogus mod_timer(..., 0) call,
we shouldn't ever pass a constant value to the
function since any constant value could be in the
future or the past.

However, invoking the timer here is not necessary
at all, since we just finished scanning and just
need to have the IBSS code run again from the
workqueue later, so factor out the work starting
and use that instead.

Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/ibss.c | 14 +++++++++++---
 1 file changed, 11 insertions(+), 3 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/ibss.c b/net/mac80211/ibss.c
index b2cc1fda6cfd..d7a96ced2c83 100644
--- a/net/mac80211/ibss.c
+++ b/net/mac80211/ibss.c
@@ -798,6 +798,15 @@ static void ieee80211_ibss_work(struct work_struct *work)
 	}
 }
 
+static void ieee80211_queue_ibss_work(struct ieee80211_sub_if_data *sdata)
+{
+	struct ieee80211_if_ibss *ifibss = &sdata->u.ibss;
+	struct ieee80211_local *local = sdata->local;
+
+	set_bit(IEEE80211_IBSS_REQ_RUN, &ifibss->request);
+	ieee80211_queue_work(&local->hw, &ifibss->work);
+}
+
 static void ieee80211_ibss_timer(unsigned long data)
 {
 	struct ieee80211_sub_if_data *sdata =
@@ -810,8 +819,7 @@ static void ieee80211_ibss_timer(unsigned long data)
 		return;
 	}
 
-	set_bit(IEEE80211_IBSS_REQ_RUN, &ifibss->request);
-	ieee80211_queue_work(&local->hw, &ifibss->work);
+	ieee80211_queue_ibss_work(sdata);
 }
 
 #ifdef CONFIG_PM
@@ -859,7 +867,7 @@ void ieee80211_ibss_notify_scan_completed(struct ieee80211_local *local)
 		if (!sdata->u.ibss.ssid_len)
 			continue;
 		sdata->u.ibss.last_scan_completed = jiffies;
-		mod_timer(&sdata->u.ibss.timer, 0);
+		ieee80211_queue_ibss_work(sdata);
 	}
 	mutex_unlock(&local->iflist_mtx);
 }
-- 
cgit v1.2.2


From 2b2c009ecf71f4c66ff8420b63dddbc9737e04e3 Mon Sep 17 00:00:00 2001
From: Juuso Oikarinen <juuso.oikarinen@nokia.com>
Date: Thu, 27 May 2010 15:32:13 +0300
Subject: mac80211: Add support for hardware ARP query filtering

Some hardware allow extended filtering of ARP frames not intended for
the host. To perform such filtering, the hardware needs to know the current
IP address(es) of the host, bound to its interface.

Add support for ARP filtering to mac80211 by adding a new op to the driver
interface, allowing to configure the current IP addresses. This op is called
upon association with the currently configured address(es), and when
associated whenever the IP address(es) change.

This patch adds configuration of IPv4 addresses only, as IPv6 addresses don't
need ARP filtering.

Signed-off-by: Juuso Oikarinen <juuso.oikarinen@nokia.com>
Reviewed-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/driver-ops.h   | 17 ++++++++++++
 net/mac80211/driver-trace.h | 25 ++++++++++++++++++
 net/mac80211/ieee80211_i.h  |  2 ++
 net/mac80211/main.c         | 63 ++++++++++++++++++++++++++++++++++++++++++++-
 net/mac80211/mlme.c         | 11 +++++++-
 5 files changed, 116 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/driver-ops.h b/net/mac80211/driver-ops.h
index 4f2271316650..978850ee3a5f 100644
--- a/net/mac80211/driver-ops.h
+++ b/net/mac80211/driver-ops.h
@@ -83,6 +83,23 @@ static inline void drv_bss_info_changed(struct ieee80211_local *local,
 	trace_drv_bss_info_changed(local, sdata, info, changed);
 }
 
+struct in_ifaddr;
+static inline int drv_configure_arp_filter(struct ieee80211_local *local,
+					   struct ieee80211_vif *vif,
+					   struct in_ifaddr *ifa_list)
+{
+	int ret = 0;
+
+	might_sleep();
+
+	if (local->ops->configure_arp_filter)
+		ret = local->ops->configure_arp_filter(&local->hw, vif,
+						       ifa_list);
+
+	trace_drv_configure_arp_filter(local, vif_to_sdata(vif), ifa_list, ret);
+	return ret;
+}
+
 static inline u64 drv_prepare_multicast(struct ieee80211_local *local,
 					struct netdev_hw_addr_list *mc_list)
 {
diff --git a/net/mac80211/driver-trace.h b/net/mac80211/driver-trace.h
index 6a9b2342a9c2..577460da2ea1 100644
--- a/net/mac80211/driver-trace.h
+++ b/net/mac80211/driver-trace.h
@@ -219,6 +219,31 @@ TRACE_EVENT(drv_bss_info_changed,
 	)
 );
 
+TRACE_EVENT(drv_configure_arp_filter,
+	TP_PROTO(struct ieee80211_local *local,
+		 struct ieee80211_sub_if_data *sdata,
+		 struct in_ifaddr *ifa_list, int ret),
+
+	TP_ARGS(local, sdata, ifa_list, ret),
+
+	TP_STRUCT__entry(
+		LOCAL_ENTRY
+		VIF_ENTRY
+		__field(int, ret)
+	),
+
+	TP_fast_assign(
+		LOCAL_ASSIGN;
+		VIF_ASSIGN;
+		__entry->ret = ret;
+	),
+
+	TP_printk(
+		VIF_PR_FMT LOCAL_PR_FMT " ret:%d",
+		VIF_PR_ARG, LOCAL_PR_ARG, __entry->ret
+	)
+);
+
 TRACE_EVENT(drv_prepare_multicast,
 	TP_PROTO(struct ieee80211_local *local, int mc_count, u64 ret),
 
diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index d4677efd3a36..47d67537f170 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -851,6 +851,7 @@ struct ieee80211_local {
 	struct work_struct dynamic_ps_disable_work;
 	struct timer_list dynamic_ps_timer;
 	struct notifier_block network_latency_notifier;
+	struct notifier_block ifa_notifier;
 
 	int user_power_level; /* in dBm */
 	int power_constr_level; /* in dBm */
@@ -997,6 +998,7 @@ void ieee80211_send_pspoll(struct ieee80211_local *local,
 void ieee80211_recalc_ps(struct ieee80211_local *local, s32 latency);
 int ieee80211_max_network_latency(struct notifier_block *nb,
 				  unsigned long data, void *dummy);
+int ieee80211_set_arp_filter(struct ieee80211_sub_if_data *sdata);
 void ieee80211_sta_process_chanswitch(struct ieee80211_sub_if_data *sdata,
 				      struct ieee80211_channel_sw_ie *sw_elem,
 				      struct ieee80211_bss *bss,
diff --git a/net/mac80211/main.c b/net/mac80211/main.c
index c8548e61f860..4051b232c6e6 100644
--- a/net/mac80211/main.c
+++ b/net/mac80211/main.c
@@ -329,6 +329,58 @@ static void ieee80211_recalc_smps_work(struct work_struct *work)
 	mutex_unlock(&local->iflist_mtx);
 }
 
+int ieee80211_set_arp_filter(struct ieee80211_sub_if_data *sdata)
+{
+	struct in_device *idev;
+	int ret = 0;
+
+	BUG_ON(!sdata);
+	ASSERT_RTNL();
+
+	idev = sdata->dev->ip_ptr;
+	if (!idev)
+		return 0;
+
+	ret = drv_configure_arp_filter(sdata->local, &sdata->vif,
+				       idev->ifa_list);
+	return ret;
+}
+
+static int ieee80211_ifa_changed(struct notifier_block *nb,
+				 unsigned long data, void *arg)
+{
+	struct in_ifaddr *ifa = arg;
+	struct ieee80211_local *local =
+		container_of(nb, struct ieee80211_local,
+			     ifa_notifier);
+	struct net_device *ndev = ifa->ifa_dev->dev;
+	struct wireless_dev *wdev = ndev->ieee80211_ptr;
+	struct ieee80211_sub_if_data *sdata;
+	struct ieee80211_if_managed *ifmgd;
+
+	/* Make sure it's our interface that got changed */
+	if (!wdev)
+		return NOTIFY_DONE;
+
+	if (wdev->wiphy != local->hw.wiphy)
+		return NOTIFY_DONE;
+
+	/* We are concerned about IP addresses only when associated */
+	sdata = IEEE80211_DEV_TO_SUB_IF(ndev);
+
+	/* ARP filtering is only supported in managed mode */
+	if (sdata->vif.type != NL80211_IFTYPE_STATION)
+		return NOTIFY_DONE;
+
+	ifmgd = &sdata->u.mgd;
+	mutex_lock(&ifmgd->mtx);
+	if (ifmgd->associated)
+		ieee80211_set_arp_filter(sdata);
+	mutex_unlock(&ifmgd->mtx);
+
+	return NOTIFY_DONE;
+}
+
 struct ieee80211_hw *ieee80211_alloc_hw(size_t priv_data_len,
 					const struct ieee80211_ops *ops)
 {
@@ -612,14 +664,22 @@ int ieee80211_register_hw(struct ieee80211_hw *hw)
 		ieee80211_max_network_latency;
 	result = pm_qos_add_notifier(PM_QOS_NETWORK_LATENCY,
 				     &local->network_latency_notifier);
-
 	if (result) {
 		rtnl_lock();
 		goto fail_pm_qos;
 	}
 
+	local->ifa_notifier.notifier_call = ieee80211_ifa_changed;
+	result = register_inetaddr_notifier(&local->ifa_notifier);
+	if (result)
+		goto fail_ifa;
+
 	return 0;
 
+ fail_ifa:
+	pm_qos_remove_notifier(PM_QOS_NETWORK_LATENCY,
+			       &local->network_latency_notifier);
+	rtnl_lock();
  fail_pm_qos:
 	ieee80211_led_exit(local);
 	ieee80211_remove_interfaces(local);
@@ -647,6 +707,7 @@ void ieee80211_unregister_hw(struct ieee80211_hw *hw)
 
 	pm_qos_remove_notifier(PM_QOS_NETWORK_LATENCY,
 			       &local->network_latency_notifier);
+	unregister_inetaddr_notifier(&local->ifa_notifier);
 
 	rtnl_lock();
 
diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index 29c3a75a7ad0..7e720133358c 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -2078,8 +2078,17 @@ static enum work_done_result ieee80211_assoc_done(struct ieee80211_work *wk,
 			cfg80211_send_assoc_timeout(wk->sdata->dev,
 						    wk->filter_ta);
 			return WORK_DONE_DESTROY;
+		} else {
+			mutex_unlock(&wk->sdata->u.mgd.mtx);
+
+			/*
+			 * configure ARP filter IP addresses to the driver,
+			 * intentionally outside the mgd mutex.
+			 */
+			rtnl_lock();
+			ieee80211_set_arp_filter(wk->sdata);
+			rtnl_unlock();
 		}
-		mutex_unlock(&wk->sdata->u.mgd.mtx);
 	}
 
 	cfg80211_send_rx_assoc(wk->sdata->dev, skb->data, skb->len);
-- 
cgit v1.2.2


From 6a8579d0e62c0eac428184ce45e86bc46677724a Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Thu, 27 May 2010 14:41:07 +0200
Subject: mac80211: clean up ieee80211_stop_tx_ba_session

There's no sense in letting anything but internal
mac80211 functions set the initiator to anything
but WLAN_BACK_INITIATOR, since WLAN_BACK_RECIPIENT
is only valid when we have received a frame from
the peer, which we react to directly in mac80211.

The debugfs code I recently added got this wrong
as well.

Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/agg-tx.c       |  7 +++----
 net/mac80211/debugfs_sta.c  |  3 +--
 net/mac80211/driver-trace.h | 10 ++++------
 3 files changed, 8 insertions(+), 12 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/agg-tx.c b/net/mac80211/agg-tx.c
index c163d0a149f4..feb15c4a1fad 100644
--- a/net/mac80211/agg-tx.c
+++ b/net/mac80211/agg-tx.c
@@ -538,14 +538,13 @@ int __ieee80211_stop_tx_ba_session(struct sta_info *sta, u16 tid,
 	return ret;
 }
 
-int ieee80211_stop_tx_ba_session(struct ieee80211_sta *pubsta, u16 tid,
-				 enum ieee80211_back_parties initiator)
+int ieee80211_stop_tx_ba_session(struct ieee80211_sta *pubsta, u16 tid)
 {
 	struct sta_info *sta = container_of(pubsta, struct sta_info, sta);
 	struct ieee80211_sub_if_data *sdata = sta->sdata;
 	struct ieee80211_local *local = sdata->local;
 
-	trace_api_stop_tx_ba_session(pubsta, tid, initiator);
+	trace_api_stop_tx_ba_session(pubsta, tid);
 
 	if (!local->ops->ampdu_action)
 		return -EINVAL;
@@ -553,7 +552,7 @@ int ieee80211_stop_tx_ba_session(struct ieee80211_sta *pubsta, u16 tid,
 	if (tid >= STA_TID_NUM)
 		return -EINVAL;
 
-	return __ieee80211_stop_tx_ba_session(sta, tid, initiator);
+	return __ieee80211_stop_tx_ba_session(sta, tid, WLAN_BACK_INITIATOR);
 }
 EXPORT_SYMBOL(ieee80211_stop_tx_ba_session);
 
diff --git a/net/mac80211/debugfs_sta.c b/net/mac80211/debugfs_sta.c
index e763f1529ddb..9f140612224a 100644
--- a/net/mac80211/debugfs_sta.c
+++ b/net/mac80211/debugfs_sta.c
@@ -210,8 +210,7 @@ static ssize_t sta_agg_status_write(struct file *file, const char __user *userbu
 		if (start)
 			ret = ieee80211_start_tx_ba_session(&sta->sta, tid);
 		else
-			ret = ieee80211_stop_tx_ba_session(&sta->sta, tid,
-							   WLAN_BACK_RECIPIENT);
+			ret = ieee80211_stop_tx_ba_session(&sta->sta, tid);
 	} else {
 		__ieee80211_stop_rx_ba_session(sta, tid, WLAN_BACK_RECIPIENT, 3);
 		ret = 0;
diff --git a/net/mac80211/driver-trace.h b/net/mac80211/driver-trace.h
index 577460da2ea1..6b90630151ab 100644
--- a/net/mac80211/driver-trace.h
+++ b/net/mac80211/driver-trace.h
@@ -876,25 +876,23 @@ TRACE_EVENT(api_start_tx_ba_cb,
 );
 
 TRACE_EVENT(api_stop_tx_ba_session,
-	TP_PROTO(struct ieee80211_sta *sta, u16 tid, u16 initiator),
+	TP_PROTO(struct ieee80211_sta *sta, u16 tid),
 
-	TP_ARGS(sta, tid, initiator),
+	TP_ARGS(sta, tid),
 
 	TP_STRUCT__entry(
 		STA_ENTRY
 		__field(u16, tid)
-		__field(u16, initiator)
 	),
 
 	TP_fast_assign(
 		STA_ASSIGN;
 		__entry->tid = tid;
-		__entry->initiator = initiator;
 	),
 
 	TP_printk(
-		STA_PR_FMT " tid:%d initiator:%d",
-		STA_PR_ARG, __entry->tid, __entry->initiator
+		STA_PR_FMT " tid:%d",
+		STA_PR_ARG, __entry->tid
 	)
 );
 
-- 
cgit v1.2.2


From 761ab470364b550c9b1a5e1e31be51d415aaf42b Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes@sipsolutions.net>
Date: Fri, 28 May 2010 14:24:19 +0200
Subject: mac80211: move WEP weak IV check

I suspect the compiler will do this optimisation
anyway, but it seems cleaner to move this into
the WEP switch case.

Also make rx_h_decrypt use a local variable for
the frame_control so that we don't need to reload
the hdr variable for this after linearizing.

Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/rx.c | 31 ++++++++++++++++---------------
 1 file changed, 16 insertions(+), 15 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c
index 6e2a7bcd8cb8..0ade26793918 100644
--- a/net/mac80211/rx.c
+++ b/net/mac80211/rx.c
@@ -825,6 +825,7 @@ ieee80211_rx_h_decrypt(struct ieee80211_rx_data *rx)
 	ieee80211_rx_result result = RX_DROP_UNUSABLE;
 	struct ieee80211_key *stakey = NULL;
 	int mmie_keyidx = -1;
+	__le16 fc;
 
 	/*
 	 * Key selection 101
@@ -866,13 +867,15 @@ ieee80211_rx_h_decrypt(struct ieee80211_rx_data *rx)
 	if (rx->sta)
 		stakey = rcu_dereference(rx->sta->key);
 
-	if (!ieee80211_has_protected(hdr->frame_control))
+	fc = hdr->frame_control;
+
+	if (!ieee80211_has_protected(fc))
 		mmie_keyidx = ieee80211_get_mmie_keyidx(rx->skb);
 
 	if (!is_multicast_ether_addr(hdr->addr1) && stakey) {
 		rx->key = stakey;
 		/* Skip decryption if the frame is not protected. */
-		if (!ieee80211_has_protected(hdr->frame_control))
+		if (!ieee80211_has_protected(fc))
 			return RX_CONTINUE;
 	} else if (mmie_keyidx >= 0) {
 		/* Broadcast/multicast robust management frame / BIP */
@@ -884,7 +887,7 @@ ieee80211_rx_h_decrypt(struct ieee80211_rx_data *rx)
 		    mmie_keyidx >= NUM_DEFAULT_KEYS + NUM_DEFAULT_MGMT_KEYS)
 			return RX_DROP_MONITOR; /* unexpected BIP keyidx */
 		rx->key = rcu_dereference(rx->sdata->keys[mmie_keyidx]);
-	} else if (!ieee80211_has_protected(hdr->frame_control)) {
+	} else if (!ieee80211_has_protected(fc)) {
 		/*
 		 * The frame was not protected, so skip decryption. However, we
 		 * need to set rx->key if there is a key that could have been
@@ -892,7 +895,7 @@ ieee80211_rx_h_decrypt(struct ieee80211_rx_data *rx)
 		 * have been expected.
 		 */
 		struct ieee80211_key *key = NULL;
-		if (ieee80211_is_mgmt(hdr->frame_control) &&
+		if (ieee80211_is_mgmt(fc) &&
 		    is_multicast_ether_addr(hdr->addr1) &&
 		    (key = rcu_dereference(rx->sdata->default_mgmt_key)))
 			rx->key = key;
@@ -914,7 +917,7 @@ ieee80211_rx_h_decrypt(struct ieee80211_rx_data *rx)
 		    (status->flag & RX_FLAG_IV_STRIPPED))
 			return RX_CONTINUE;
 
-		hdrlen = ieee80211_hdrlen(hdr->frame_control);
+		hdrlen = ieee80211_hdrlen(fc);
 
 		if (rx->skb->len < 8 + hdrlen)
 			return RX_DROP_UNUSABLE; /* TODO: count this? */
@@ -947,19 +950,17 @@ ieee80211_rx_h_decrypt(struct ieee80211_rx_data *rx)
 
 	if (skb_linearize(rx->skb))
 		return RX_DROP_UNUSABLE;
-
-	hdr = (struct ieee80211_hdr *)rx->skb->data;
-
-	/* Check for weak IVs if possible */
-	if (rx->sta && rx->key->conf.alg == ALG_WEP &&
-	    ieee80211_is_data(hdr->frame_control) &&
-	    (!(status->flag & RX_FLAG_IV_STRIPPED) ||
-	     !(status->flag & RX_FLAG_DECRYPTED)) &&
-	    ieee80211_wep_is_weak_iv(rx->skb, rx->key))
-		rx->sta->wep_weak_iv_count++;
+	/* the hdr variable is invalid now! */
 
 	switch (rx->key->conf.alg) {
 	case ALG_WEP:
+		/* Check for weak IVs if possible */
+		if (rx->sta && ieee80211_is_data(fc) &&
+		    (!(status->flag & RX_FLAG_IV_STRIPPED) ||
+		     !(status->flag & RX_FLAG_DECRYPTED)) &&
+		    ieee80211_wep_is_weak_iv(rx->skb, rx->key))
+			rx->sta->wep_weak_iv_count++;
+
 		result = ieee80211_crypto_wep_decrypt(rx);
 		break;
 	case ALG_TKIP:
-- 
cgit v1.2.2


From e0961f112cd88176acc6d1af6ca6352f85cdf993 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes@sipsolutions.net>
Date: Fri, 28 May 2010 14:48:52 +0200
Subject: mac80211: remove useless work starting

Ever since we use only cfg80211 for configuration,
there is no configuration that could be pending at
this point, cfg80211 will have the configuration
that is pending and apply it afterwards.

Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/iface.c | 9 ---------
 1 file changed, 9 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c
index 50deb017fd6e..3d3a094d3987 100644
--- a/net/mac80211/iface.c
+++ b/net/mac80211/iface.c
@@ -321,15 +321,6 @@ static int ieee80211_open(struct net_device *dev)
 
 	ieee80211_recalc_ps(local, -1);
 
-	/*
-	 * ieee80211_sta_work is disabled while network interface
-	 * is down. Therefore, some configuration changes may not
-	 * yet be effective. Trigger execution of ieee80211_sta_work
-	 * to fix this.
-	 */
-	if (sdata->vif.type == NL80211_IFTYPE_STATION)
-		ieee80211_queue_work(&local->hw, &sdata->u.mgd.work);
-
 	netif_tx_start_all_queues(dev);
 
 	return 0;
-- 
cgit v1.2.2


From 08daecaead42b85b69b33d7d8429a93dfbf75b58 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes@sipsolutions.net>
Date: Sun, 30 May 2010 14:53:43 +0200
Subject: mac80211: drop control frames after processing

After ieee80211_rx_h_ctrl() processing we only
want to process management (including action)
frames, so there's no point in letting control
frames continue.

Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/rx.c | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c
index 0ade26793918..0b9898ac4d87 100644
--- a/net/mac80211/rx.c
+++ b/net/mac80211/rx.c
@@ -1844,7 +1844,12 @@ ieee80211_rx_h_ctrl(struct ieee80211_rx_data *rx, struct sk_buff_head *frames)
 		return RX_QUEUED;
 	}
 
-	return RX_CONTINUE;
+	/*
+	 * After this point, we only want management frames,
+	 * so we can drop all remaining control frames to
+	 * cooked monitor interfaces.
+	 */
+	return RX_DROP_MONITOR;
 }
 
 static void ieee80211_process_sa_query_req(struct ieee80211_sub_if_data *sdata,
-- 
cgit v1.2.2


From fcea60070fe8fa48df579f155ec7bc20a868f7dc Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Mon, 31 May 2010 11:40:23 +0200
Subject: mac80211: move plink state

For some odd reason, the plink_state enum is
declared in the middle between aggregation
related structures. Move it down to make the
file easier to read.

Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/sta_info.h | 44 ++++++++++++++++++++++----------------------
 1 file changed, 22 insertions(+), 22 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/sta_info.h b/net/mac80211/sta_info.h
index df9d45544ca5..813da34db733 100644
--- a/net/mac80211/sta_info.h
+++ b/net/mac80211/sta_info.h
@@ -119,6 +119,28 @@ struct tid_ampdu_rx {
 	u8 dialog_token;
 };
 
+/**
+ * struct sta_ampdu_mlme - STA aggregation information.
+ *
+ * @tid_active_rx: TID's state in Rx session state machine.
+ * @tid_rx: aggregation info for Rx per TID
+ * @tid_state_tx: TID's state in Tx session state machine.
+ * @tid_tx: aggregation info for Tx per TID
+ * @addba_req_num: number of times addBA request has been sent.
+ * @dialog_token_allocator: dialog token enumerator for each new session;
+ */
+struct sta_ampdu_mlme {
+	/* rx */
+	bool tid_active_rx[STA_TID_NUM];
+	struct tid_ampdu_rx *tid_rx[STA_TID_NUM];
+	/* tx */
+	u8 tid_state_tx[STA_TID_NUM];
+	struct tid_ampdu_tx *tid_tx[STA_TID_NUM];
+	u8 addba_req_num[STA_TID_NUM];
+	u8 dialog_token_allocator;
+};
+
+
 /**
  * enum plink_state - state of a mesh peer link finite state machine
  *
@@ -142,28 +164,6 @@ enum plink_state {
 	PLINK_BLOCKED
 };
 
-/**
- * struct sta_ampdu_mlme - STA aggregation information.
- *
- * @tid_active_rx: TID's state in Rx session state machine.
- * @tid_rx: aggregation info for Rx per TID
- * @tid_state_tx: TID's state in Tx session state machine.
- * @tid_tx: aggregation info for Tx per TID
- * @addba_req_num: number of times addBA request has been sent.
- * @dialog_token_allocator: dialog token enumerator for each new session;
- */
-struct sta_ampdu_mlme {
-	/* rx */
-	bool tid_active_rx[STA_TID_NUM];
-	struct tid_ampdu_rx *tid_rx[STA_TID_NUM];
-	/* tx */
-	u8 tid_state_tx[STA_TID_NUM];
-	struct tid_ampdu_tx *tid_tx[STA_TID_NUM];
-	u8 addba_req_num[STA_TID_NUM];
-	u8 dialog_token_allocator;
-};
-
-
 /**
  * struct sta_info - STA information
  *
-- 
cgit v1.2.2


From ad0e2b5a00dbec303e4682b403bb6703d11dcdb2 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Tue, 1 Jun 2010 10:19:19 +0200
Subject: mac80211: simplify key locking

Since I recently made station management able
to sleep, I can now rework key management as
well; since it will no longer need a spinlock
and can also use a mutex instead, a bunch of
code to allow drivers' set_key to sleep while
key management is protected by a spinlock can
now be removed.

Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/cfg.c         |  18 ++-
 net/mac80211/ieee80211_i.h |   4 +-
 net/mac80211/iface.c       |   5 +-
 net/mac80211/key.c         | 288 +++++++++++----------------------------------
 net/mac80211/key.h         |  22 +---
 net/mac80211/main.c        |   2 +-
 net/mac80211/sta_info.c    |   8 --
 7 files changed, 80 insertions(+), 267 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c
index f8c49c5ad8aa..952845e7072a 100644
--- a/net/mac80211/cfg.c
+++ b/net/mac80211/cfg.c
@@ -120,6 +120,9 @@ static int ieee80211_add_key(struct wiphy *wiphy, struct net_device *dev,
 	struct ieee80211_key *key;
 	int err;
 
+	if (!netif_running(dev))
+		return -ENETDOWN;
+
 	sdata = IEEE80211_DEV_TO_SUB_IF(dev);
 
 	switch (params->cipher) {
@@ -145,7 +148,7 @@ static int ieee80211_add_key(struct wiphy *wiphy, struct net_device *dev,
 	if (!key)
 		return -ENOMEM;
 
-	rcu_read_lock();
+	mutex_lock(&sdata->local->sta_mtx);
 
 	if (mac_addr) {
 		sta = sta_info_get_bss(sdata, mac_addr);
@@ -160,7 +163,7 @@ static int ieee80211_add_key(struct wiphy *wiphy, struct net_device *dev,
 
 	err = 0;
  out_unlock:
-	rcu_read_unlock();
+	mutex_unlock(&sdata->local->sta_mtx);
 
 	return err;
 }
@@ -174,7 +177,7 @@ static int ieee80211_del_key(struct wiphy *wiphy, struct net_device *dev,
 
 	sdata = IEEE80211_DEV_TO_SUB_IF(dev);
 
-	rcu_read_lock();
+	mutex_lock(&sdata->local->sta_mtx);
 
 	if (mac_addr) {
 		ret = -ENOENT;
@@ -202,7 +205,7 @@ static int ieee80211_del_key(struct wiphy *wiphy, struct net_device *dev,
 
 	ret = 0;
  out_unlock:
-	rcu_read_unlock();
+	mutex_unlock(&sdata->local->sta_mtx);
 
 	return ret;
 }
@@ -305,15 +308,10 @@ static int ieee80211_config_default_key(struct wiphy *wiphy,
 					struct net_device *dev,
 					u8 key_idx)
 {
-	struct ieee80211_sub_if_data *sdata;
-
-	rcu_read_lock();
+	struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
 
-	sdata = IEEE80211_DEV_TO_SUB_IF(dev);
 	ieee80211_set_default_key(sdata, key_idx);
 
-	rcu_read_unlock();
-
 	return 0;
 }
 
diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index 47d67537f170..4d3883e20fc1 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -746,10 +746,10 @@ struct ieee80211_local {
 	struct mutex iflist_mtx;
 
 	/*
-	 * Key lock, protects sdata's key_list and sta_info's
+	 * Key mutex, protects sdata's key_list and sta_info's
 	 * key pointers (write access, they're RCU.)
 	 */
-	spinlock_t key_lock;
+	struct mutex key_mtx;
 
 
 	/* Scanning and BSS list */
diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c
index 3d3a094d3987..1afa9ec81fe8 100644
--- a/net/mac80211/iface.c
+++ b/net/mac80211/iface.c
@@ -268,7 +268,6 @@ static int ieee80211_open(struct net_device *dev)
 
 		changed |= ieee80211_reset_erp_info(sdata);
 		ieee80211_bss_info_change_notify(sdata, changed);
-		ieee80211_enable_keys(sdata);
 
 		if (sdata->vif.type == NL80211_IFTYPE_STATION)
 			netif_carrier_off(dev);
@@ -522,8 +521,8 @@ static int ieee80211_stop(struct net_device *dev)
 				BSS_CHANGED_BEACON_ENABLED);
 		}
 
-		/* disable all keys for as long as this netdev is down */
-		ieee80211_disable_keys(sdata);
+		/* free all remaining keys, there shouldn't be any */
+		ieee80211_free_keys(sdata);
 		drv_remove_interface(local, &sdata->vif);
 	}
 
diff --git a/net/mac80211/key.c b/net/mac80211/key.c
index e8f6e3b252d8..d0d9001a4a6a 100644
--- a/net/mac80211/key.c
+++ b/net/mac80211/key.c
@@ -36,80 +36,20 @@
  * There is currently no way of knowing this except by looking into
  * debugfs.
  *
- * All key operations are protected internally so you can call them at
- * any time.
+ * All key operations are protected internally.
  *
  * Within mac80211, key references are, just as STA structure references,
  * protected by RCU. Note, however, that some things are unprotected,
  * namely the key->sta dereferences within the hardware acceleration
- * functions. This means that sta_info_destroy() must flush the key todo
- * list.
- *
- * All the direct key list manipulation functions must not sleep because
- * they can operate on STA info structs that are protected by RCU.
+ * functions. This means that sta_info_destroy() must remove the key
+ * which waits for an RCU grace period.
  */
 
 static const u8 bcast_addr[ETH_ALEN] = { 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF };
 
-/* key mutex: used to synchronise todo runners */
-static DEFINE_MUTEX(key_mutex);
-static DEFINE_SPINLOCK(todo_lock);
-static LIST_HEAD(todo_list);
-
-static void key_todo(struct work_struct *work)
+static void assert_key_lock(struct ieee80211_local *local)
 {
-	ieee80211_key_todo();
-}
-
-static DECLARE_WORK(todo_work, key_todo);
-
-/**
- * add_todo - add todo item for a key
- *
- * @key: key to add to do item for
- * @flag: todo flag(s)
- *
- * Must be called with IRQs or softirqs disabled.
- */
-static void add_todo(struct ieee80211_key *key, u32 flag)
-{
-	if (!key)
-		return;
-
-	spin_lock(&todo_lock);
-	key->flags |= flag;
-	/*
-	 * Remove again if already on the list so that we move it to the end.
-	 */
-	if (!list_empty(&key->todo))
-		list_del(&key->todo);
-	list_add_tail(&key->todo, &todo_list);
-	schedule_work(&todo_work);
-	spin_unlock(&todo_lock);
-}
-
-/**
- * ieee80211_key_lock - lock the mac80211 key operation lock
- *
- * This locks the (global) mac80211 key operation lock, all
- * key operations must be done under this lock.
- */
-static void ieee80211_key_lock(void)
-{
-	mutex_lock(&key_mutex);
-}
-
-/**
- * ieee80211_key_unlock - unlock the mac80211 key operation lock
- */
-static void ieee80211_key_unlock(void)
-{
-	mutex_unlock(&key_mutex);
-}
-
-static void assert_key_lock(void)
-{
-	WARN_ON(!mutex_is_locked(&key_mutex));
+	WARN_ON(!mutex_is_locked(&local->key_mtx));
 }
 
 static struct ieee80211_sta *get_sta_for_key(struct ieee80211_key *key)
@@ -126,12 +66,13 @@ static void ieee80211_key_enable_hw_accel(struct ieee80211_key *key)
 	struct ieee80211_sta *sta;
 	int ret;
 
-	assert_key_lock();
 	might_sleep();
 
 	if (!key->local->ops->set_key)
 		return;
 
+	assert_key_lock(key->local);
+
 	sta = get_sta_for_key(key);
 
 	sdata = key->sdata;
@@ -142,11 +83,8 @@ static void ieee80211_key_enable_hw_accel(struct ieee80211_key *key)
 
 	ret = drv_set_key(key->local, SET_KEY, sdata, sta, &key->conf);
 
-	if (!ret) {
-		spin_lock_bh(&todo_lock);
+	if (!ret)
 		key->flags |= KEY_FLAG_UPLOADED_TO_HARDWARE;
-		spin_unlock_bh(&todo_lock);
-	}
 
 	if (ret && ret != -ENOSPC && ret != -EOPNOTSUPP)
 		printk(KERN_ERR "mac80211-%s: failed to set key "
@@ -161,18 +99,15 @@ static void ieee80211_key_disable_hw_accel(struct ieee80211_key *key)
 	struct ieee80211_sta *sta;
 	int ret;
 
-	assert_key_lock();
 	might_sleep();
 
 	if (!key || !key->local->ops->set_key)
 		return;
 
-	spin_lock_bh(&todo_lock);
-	if (!(key->flags & KEY_FLAG_UPLOADED_TO_HARDWARE)) {
-		spin_unlock_bh(&todo_lock);
+	assert_key_lock(key->local);
+
+	if (!(key->flags & KEY_FLAG_UPLOADED_TO_HARDWARE))
 		return;
-	}
-	spin_unlock_bh(&todo_lock);
 
 	sta = get_sta_for_key(key);
 	sdata = key->sdata;
@@ -191,9 +126,7 @@ static void ieee80211_key_disable_hw_accel(struct ieee80211_key *key)
 		       wiphy_name(key->local->hw.wiphy),
 		       key->conf.keyidx, sta ? sta->addr : bcast_addr, ret);
 
-	spin_lock_bh(&todo_lock);
 	key->flags &= ~KEY_FLAG_UPLOADED_TO_HARDWARE;
-	spin_unlock_bh(&todo_lock);
 }
 
 static void __ieee80211_set_default_key(struct ieee80211_sub_if_data *sdata,
@@ -201,22 +134,24 @@ static void __ieee80211_set_default_key(struct ieee80211_sub_if_data *sdata,
 {
 	struct ieee80211_key *key = NULL;
 
+	assert_key_lock(sdata->local);
+
 	if (idx >= 0 && idx < NUM_DEFAULT_KEYS)
 		key = sdata->keys[idx];
 
 	rcu_assign_pointer(sdata->default_key, key);
 
-	if (key)
-		add_todo(key, KEY_FLAG_TODO_DEFKEY);
+	if (key) {
+		ieee80211_debugfs_key_remove_default(key->sdata);
+		ieee80211_debugfs_key_add_default(key->sdata);
+	}
 }
 
 void ieee80211_set_default_key(struct ieee80211_sub_if_data *sdata, int idx)
 {
-	unsigned long flags;
-
-	spin_lock_irqsave(&sdata->local->key_lock, flags);
+	mutex_lock(&sdata->local->key_mtx);
 	__ieee80211_set_default_key(sdata, idx);
-	spin_unlock_irqrestore(&sdata->local->key_lock, flags);
+	mutex_unlock(&sdata->local->key_mtx);
 }
 
 static void
@@ -224,24 +159,26 @@ __ieee80211_set_default_mgmt_key(struct ieee80211_sub_if_data *sdata, int idx)
 {
 	struct ieee80211_key *key = NULL;
 
+	assert_key_lock(sdata->local);
+
 	if (idx >= NUM_DEFAULT_KEYS &&
 	    idx < NUM_DEFAULT_KEYS + NUM_DEFAULT_MGMT_KEYS)
 		key = sdata->keys[idx];
 
 	rcu_assign_pointer(sdata->default_mgmt_key, key);
 
-	if (key)
-		add_todo(key, KEY_FLAG_TODO_DEFMGMTKEY);
+	if (key) {
+		ieee80211_debugfs_key_remove_mgmt_default(key->sdata);
+		ieee80211_debugfs_key_add_mgmt_default(key->sdata);
+	}
 }
 
 void ieee80211_set_default_mgmt_key(struct ieee80211_sub_if_data *sdata,
 				    int idx)
 {
-	unsigned long flags;
-
-	spin_lock_irqsave(&sdata->local->key_lock, flags);
+	mutex_lock(&sdata->local->key_mtx);
 	__ieee80211_set_default_mgmt_key(sdata, idx);
-	spin_unlock_irqrestore(&sdata->local->key_lock, flags);
+	mutex_unlock(&sdata->local->key_mtx);
 }
 
 
@@ -352,7 +289,6 @@ struct ieee80211_key *ieee80211_key_alloc(enum ieee80211_key_alg alg,
 	}
 	memcpy(key->conf.key, key_data, key_len);
 	INIT_LIST_HEAD(&key->list);
-	INIT_LIST_HEAD(&key->todo);
 
 	if (alg == ALG_CCMP) {
 		/*
@@ -382,12 +318,27 @@ struct ieee80211_key *ieee80211_key_alloc(enum ieee80211_key_alg alg,
 	return key;
 }
 
+static void __ieee80211_key_destroy(struct ieee80211_key *key)
+{
+	if (!key)
+		return;
+
+	ieee80211_key_disable_hw_accel(key);
+
+	if (key->conf.alg == ALG_CCMP)
+		ieee80211_aes_key_free(key->u.ccmp.tfm);
+	if (key->conf.alg == ALG_AES_CMAC)
+		ieee80211_aes_cmac_key_free(key->u.aes_cmac.tfm);
+	ieee80211_debugfs_key_remove(key);
+
+	kfree(key);
+}
+
 void ieee80211_key_link(struct ieee80211_key *key,
 			struct ieee80211_sub_if_data *sdata,
 			struct sta_info *sta)
 {
 	struct ieee80211_key *old_key;
-	unsigned long flags;
 	int idx;
 
 	BUG_ON(!sdata);
@@ -431,7 +382,7 @@ void ieee80211_key_link(struct ieee80211_key *key,
 		}
 	}
 
-	spin_lock_irqsave(&sdata->local->key_lock, flags);
+	mutex_lock(&sdata->local->key_mtx);
 
 	if (sta)
 		old_key = sta->key;
@@ -439,15 +390,13 @@ void ieee80211_key_link(struct ieee80211_key *key,
 		old_key = sdata->keys[idx];
 
 	__ieee80211_key_replace(sdata, sta, old_key, key);
+	__ieee80211_key_destroy(old_key);
 
-	/* free old key later */
-	add_todo(old_key, KEY_FLAG_TODO_DELETE);
+	ieee80211_debugfs_key_add(key);
 
-	add_todo(key, KEY_FLAG_TODO_ADD_DEBUGFS);
-	if (ieee80211_sdata_running(sdata))
-		add_todo(key, KEY_FLAG_TODO_HWACCEL_ADD);
+	ieee80211_key_enable_hw_accel(key);
 
-	spin_unlock_irqrestore(&sdata->local->key_lock, flags);
+	mutex_unlock(&sdata->local->key_mtx);
 }
 
 static void __ieee80211_key_free(struct ieee80211_key *key)
@@ -458,170 +407,65 @@ static void __ieee80211_key_free(struct ieee80211_key *key)
 	if (key->sdata)
 		__ieee80211_key_replace(key->sdata, key->sta,
 					key, NULL);
-
-	add_todo(key, KEY_FLAG_TODO_DELETE);
+	__ieee80211_key_destroy(key);
 }
 
 void ieee80211_key_free(struct ieee80211_key *key)
 {
-	unsigned long flags;
+	struct ieee80211_local *local;
 
 	if (!key)
 		return;
 
-	if (!key->sdata) {
-		/* The key has not been linked yet, simply free it
-		 * and don't Oops */
-		if (key->conf.alg == ALG_CCMP)
-			ieee80211_aes_key_free(key->u.ccmp.tfm);
-		kfree(key);
-		return;
-	}
+	local = key->sdata->local;
 
-	spin_lock_irqsave(&key->sdata->local->key_lock, flags);
+	mutex_lock(&local->key_mtx);
 	__ieee80211_key_free(key);
-	spin_unlock_irqrestore(&key->sdata->local->key_lock, flags);
+	mutex_unlock(&local->key_mtx);
 }
 
-/*
- * To be safe against concurrent manipulations of the list (which shouldn't
- * actually happen) we need to hold the spinlock. But under the spinlock we
- * can't actually do much, so we defer processing to the todo list. Then run
- * the todo list to be sure the operation and possibly previously pending
- * operations are completed.
- */
-static void ieee80211_todo_for_each_key(struct ieee80211_sub_if_data *sdata,
-					u32 todo_flags)
+void ieee80211_enable_keys(struct ieee80211_sub_if_data *sdata)
 {
 	struct ieee80211_key *key;
-	unsigned long flags;
-
-	might_sleep();
-
-	spin_lock_irqsave(&sdata->local->key_lock, flags);
-	list_for_each_entry(key, &sdata->key_list, list)
-		add_todo(key, todo_flags);
-	spin_unlock_irqrestore(&sdata->local->key_lock, flags);
-
-	ieee80211_key_todo();
-}
 
-void ieee80211_enable_keys(struct ieee80211_sub_if_data *sdata)
-{
 	ASSERT_RTNL();
 
 	if (WARN_ON(!ieee80211_sdata_running(sdata)))
 		return;
 
-	ieee80211_todo_for_each_key(sdata, KEY_FLAG_TODO_HWACCEL_ADD);
-}
-
-void ieee80211_disable_keys(struct ieee80211_sub_if_data *sdata)
-{
-	ASSERT_RTNL();
-
-	ieee80211_todo_for_each_key(sdata, KEY_FLAG_TODO_HWACCEL_REMOVE);
-}
-
-static void __ieee80211_key_destroy(struct ieee80211_key *key)
-{
-	if (!key)
-		return;
-
-	ieee80211_key_disable_hw_accel(key);
+	mutex_lock(&sdata->local->key_mtx);
 
-	if (key->conf.alg == ALG_CCMP)
-		ieee80211_aes_key_free(key->u.ccmp.tfm);
-	if (key->conf.alg == ALG_AES_CMAC)
-		ieee80211_aes_cmac_key_free(key->u.aes_cmac.tfm);
-	ieee80211_debugfs_key_remove(key);
+	list_for_each_entry(key, &sdata->key_list, list)
+		ieee80211_key_enable_hw_accel(key);
 
-	kfree(key);
+	mutex_unlock(&sdata->local->key_mtx);
 }
 
-static void __ieee80211_key_todo(void)
+void ieee80211_disable_keys(struct ieee80211_sub_if_data *sdata)
 {
 	struct ieee80211_key *key;
-	bool work_done;
-	u32 todoflags;
 
-	/*
-	 * NB: sta_info_destroy relies on this!
-	 */
-	synchronize_rcu();
-
-	spin_lock_bh(&todo_lock);
-	while (!list_empty(&todo_list)) {
-		key = list_first_entry(&todo_list, struct ieee80211_key, todo);
-		list_del_init(&key->todo);
-		todoflags = key->flags & (KEY_FLAG_TODO_ADD_DEBUGFS |
-					  KEY_FLAG_TODO_DEFKEY |
-					  KEY_FLAG_TODO_DEFMGMTKEY |
-					  KEY_FLAG_TODO_HWACCEL_ADD |
-					  KEY_FLAG_TODO_HWACCEL_REMOVE |
-					  KEY_FLAG_TODO_DELETE);
-		key->flags &= ~todoflags;
-		spin_unlock_bh(&todo_lock);
-
-		work_done = false;
-
-		if (todoflags & KEY_FLAG_TODO_ADD_DEBUGFS) {
-			ieee80211_debugfs_key_add(key);
-			work_done = true;
-		}
-		if (todoflags & KEY_FLAG_TODO_DEFKEY) {
-			ieee80211_debugfs_key_remove_default(key->sdata);
-			ieee80211_debugfs_key_add_default(key->sdata);
-			work_done = true;
-		}
-		if (todoflags & KEY_FLAG_TODO_DEFMGMTKEY) {
-			ieee80211_debugfs_key_remove_mgmt_default(key->sdata);
-			ieee80211_debugfs_key_add_mgmt_default(key->sdata);
-			work_done = true;
-		}
-		if (todoflags & KEY_FLAG_TODO_HWACCEL_ADD) {
-			ieee80211_key_enable_hw_accel(key);
-			work_done = true;
-		}
-		if (todoflags & KEY_FLAG_TODO_HWACCEL_REMOVE) {
-			ieee80211_key_disable_hw_accel(key);
-			work_done = true;
-		}
-		if (todoflags & KEY_FLAG_TODO_DELETE) {
-			__ieee80211_key_destroy(key);
-			work_done = true;
-		}
+	ASSERT_RTNL();
 
-		WARN_ON(!work_done);
+	mutex_lock(&sdata->local->key_mtx);
 
-		spin_lock_bh(&todo_lock);
-	}
-	spin_unlock_bh(&todo_lock);
-}
+	list_for_each_entry(key, &sdata->key_list, list)
+		ieee80211_key_disable_hw_accel(key);
 
-void ieee80211_key_todo(void)
-{
-	ieee80211_key_lock();
-	__ieee80211_key_todo();
-	ieee80211_key_unlock();
+	mutex_unlock(&sdata->local->key_mtx);
 }
 
 void ieee80211_free_keys(struct ieee80211_sub_if_data *sdata)
 {
 	struct ieee80211_key *key, *tmp;
-	unsigned long flags;
 
-	ieee80211_key_lock();
+	mutex_lock(&sdata->local->key_mtx);
 
 	ieee80211_debugfs_key_remove_default(sdata);
 	ieee80211_debugfs_key_remove_mgmt_default(sdata);
 
-	spin_lock_irqsave(&sdata->local->key_lock, flags);
 	list_for_each_entry_safe(key, tmp, &sdata->key_list, list)
 		__ieee80211_key_free(key);
-	spin_unlock_irqrestore(&sdata->local->key_lock, flags);
-
-	__ieee80211_key_todo();
 
-	ieee80211_key_unlock();
+	mutex_unlock(&sdata->local->key_mtx);
 }
diff --git a/net/mac80211/key.h b/net/mac80211/key.h
index bdc2968c2bbe..9996e3be6e63 100644
--- a/net/mac80211/key.h
+++ b/net/mac80211/key.h
@@ -38,25 +38,9 @@ struct sta_info;
  *
  * @KEY_FLAG_UPLOADED_TO_HARDWARE: Indicates that this key is present
  *	in the hardware for TX crypto hardware acceleration.
- * @KEY_FLAG_TODO_DELETE: Key is marked for deletion and will, after an
- *	RCU grace period, no longer be reachable other than from the
- *	todo list.
- * @KEY_FLAG_TODO_HWACCEL_ADD: Key needs to be added to hardware acceleration.
- * @KEY_FLAG_TODO_HWACCEL_REMOVE: Key needs to be removed from hardware
- *	acceleration.
- * @KEY_FLAG_TODO_DEFKEY: Key is default key and debugfs needs to be updated.
- * @KEY_FLAG_TODO_ADD_DEBUGFS: Key needs to be added to debugfs.
- * @KEY_FLAG_TODO_DEFMGMTKEY: Key is default management key and debugfs needs
- *	to be updated.
  */
 enum ieee80211_internal_key_flags {
 	KEY_FLAG_UPLOADED_TO_HARDWARE	= BIT(0),
-	KEY_FLAG_TODO_DELETE		= BIT(1),
-	KEY_FLAG_TODO_HWACCEL_ADD	= BIT(2),
-	KEY_FLAG_TODO_HWACCEL_REMOVE	= BIT(3),
-	KEY_FLAG_TODO_DEFKEY		= BIT(4),
-	KEY_FLAG_TODO_ADD_DEBUGFS	= BIT(5),
-	KEY_FLAG_TODO_DEFMGMTKEY	= BIT(6),
 };
 
 enum ieee80211_internal_tkip_state {
@@ -79,10 +63,8 @@ struct ieee80211_key {
 
 	/* for sdata list */
 	struct list_head list;
-	/* for todo list */
-	struct list_head todo;
 
-	/* protected by todo lock! */
+	/* protected by key mutex */
 	unsigned int flags;
 
 	union {
@@ -155,6 +137,4 @@ void ieee80211_free_keys(struct ieee80211_sub_if_data *sdata);
 void ieee80211_enable_keys(struct ieee80211_sub_if_data *sdata);
 void ieee80211_disable_keys(struct ieee80211_sub_if_data *sdata);
 
-void ieee80211_key_todo(void);
-
 #endif /* IEEE80211_KEY_H */
diff --git a/net/mac80211/main.c b/net/mac80211/main.c
index 4051b232c6e6..045ead9507aa 100644
--- a/net/mac80211/main.c
+++ b/net/mac80211/main.c
@@ -448,7 +448,7 @@ struct ieee80211_hw *ieee80211_alloc_hw(size_t priv_data_len,
 	mutex_init(&local->iflist_mtx);
 	mutex_init(&local->scan_mtx);
 
-	spin_lock_init(&local->key_lock);
+	mutex_init(&local->key_mtx);
 	spin_lock_init(&local->filter_lock);
 	spin_lock_init(&local->queue_stop_reason_lock);
 
diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c
index 730197591ab5..c426c572d984 100644
--- a/net/mac80211/sta_info.c
+++ b/net/mac80211/sta_info.c
@@ -648,14 +648,6 @@ static int __must_check __sta_info_destroy(struct sta_info *sta)
 
 	if (sta->key) {
 		ieee80211_key_free(sta->key);
-		/*
-		 * We have only unlinked the key, and actually destroying it
-		 * may mean it is removed from hardware which requires that
-		 * the key->sta pointer is still valid, so flush the key todo
-		 * list here.
-		 */
-		ieee80211_key_todo();
-
 		WARN_ON(sta->key);
 	}
 
-- 
cgit v1.2.2


From 2826bcd844e05dcbef9b9284bddb7fe88e8d314f Mon Sep 17 00:00:00 2001
From: Felix Fietkau <nbd@openwrt.org>
Date: Wed, 2 Jun 2010 02:57:34 +0200
Subject: mac80211: reduce debugfs code size

This patch reduces the binary size by around 25k (measured on MIPS,
with CONFIG_MAC80211_DEBUG_COUNTERS enabled).

Signed-off-by: Felix Fietkau <nbd@openwrt.org>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/debugfs.c     | 154 +++++++++++++++++----------------------------
 net/mac80211/debugfs_sta.c |  45 ++++++-------
 2 files changed, 77 insertions(+), 122 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/debugfs.c b/net/mac80211/debugfs.c
index 637929b65ccc..a694c593ff6a 100644
--- a/net/mac80211/debugfs.c
+++ b/net/mac80211/debugfs.c
@@ -307,9 +307,6 @@ static const struct file_operations queues_ops = {
 
 /* statistics stuff */
 
-#define DEBUGFS_STATS_FILE(name, buflen, fmt, value...)			\
-	DEBUGFS_READONLY_FILE(stats_ ##name, buflen, fmt, ##value)
-
 static ssize_t format_devstat_counter(struct ieee80211_local *local,
 	char __user *userbuf,
 	size_t count, loff_t *ppos,
@@ -351,75 +348,16 @@ static const struct file_operations stats_ ##name## _ops = {		\
 	.open = mac80211_open_file_generic,				\
 };
 
-#define DEBUGFS_STATS_ADD(name)						\
+#define DEBUGFS_STATS_ADD(name, field)					\
+	debugfs_create_u32(#name, 0400, statsd, (u32 *) &field);
+#define DEBUGFS_DEVSTATS_ADD(name)					\
 	debugfs_create_file(#name, 0400, statsd, local, &stats_ ##name## _ops);
 
-DEBUGFS_STATS_FILE(transmitted_fragment_count, 20, "%u",
-		   local->dot11TransmittedFragmentCount);
-DEBUGFS_STATS_FILE(multicast_transmitted_frame_count, 20, "%u",
-		   local->dot11MulticastTransmittedFrameCount);
-DEBUGFS_STATS_FILE(failed_count, 20, "%u",
-		   local->dot11FailedCount);
-DEBUGFS_STATS_FILE(retry_count, 20, "%u",
-		   local->dot11RetryCount);
-DEBUGFS_STATS_FILE(multiple_retry_count, 20, "%u",
-		   local->dot11MultipleRetryCount);
-DEBUGFS_STATS_FILE(frame_duplicate_count, 20, "%u",
-		   local->dot11FrameDuplicateCount);
-DEBUGFS_STATS_FILE(received_fragment_count, 20, "%u",
-		   local->dot11ReceivedFragmentCount);
-DEBUGFS_STATS_FILE(multicast_received_frame_count, 20, "%u",
-		   local->dot11MulticastReceivedFrameCount);
-DEBUGFS_STATS_FILE(transmitted_frame_count, 20, "%u",
-		   local->dot11TransmittedFrameCount);
-#ifdef CONFIG_MAC80211_DEBUG_COUNTERS
-DEBUGFS_STATS_FILE(tx_handlers_drop, 20, "%u",
-		   local->tx_handlers_drop);
-DEBUGFS_STATS_FILE(tx_handlers_queued, 20, "%u",
-		   local->tx_handlers_queued);
-DEBUGFS_STATS_FILE(tx_handlers_drop_unencrypted, 20, "%u",
-		   local->tx_handlers_drop_unencrypted);
-DEBUGFS_STATS_FILE(tx_handlers_drop_fragment, 20, "%u",
-		   local->tx_handlers_drop_fragment);
-DEBUGFS_STATS_FILE(tx_handlers_drop_wep, 20, "%u",
-		   local->tx_handlers_drop_wep);
-DEBUGFS_STATS_FILE(tx_handlers_drop_not_assoc, 20, "%u",
-		   local->tx_handlers_drop_not_assoc);
-DEBUGFS_STATS_FILE(tx_handlers_drop_unauth_port, 20, "%u",
-		   local->tx_handlers_drop_unauth_port);
-DEBUGFS_STATS_FILE(rx_handlers_drop, 20, "%u",
-		   local->rx_handlers_drop);
-DEBUGFS_STATS_FILE(rx_handlers_queued, 20, "%u",
-		   local->rx_handlers_queued);
-DEBUGFS_STATS_FILE(rx_handlers_drop_nullfunc, 20, "%u",
-		   local->rx_handlers_drop_nullfunc);
-DEBUGFS_STATS_FILE(rx_handlers_drop_defrag, 20, "%u",
-		   local->rx_handlers_drop_defrag);
-DEBUGFS_STATS_FILE(rx_handlers_drop_short, 20, "%u",
-		   local->rx_handlers_drop_short);
-DEBUGFS_STATS_FILE(rx_handlers_drop_passive_scan, 20, "%u",
-		   local->rx_handlers_drop_passive_scan);
-DEBUGFS_STATS_FILE(tx_expand_skb_head, 20, "%u",
-		   local->tx_expand_skb_head);
-DEBUGFS_STATS_FILE(tx_expand_skb_head_cloned, 20, "%u",
-		   local->tx_expand_skb_head_cloned);
-DEBUGFS_STATS_FILE(rx_expand_skb_head, 20, "%u",
-		   local->rx_expand_skb_head);
-DEBUGFS_STATS_FILE(rx_expand_skb_head2, 20, "%u",
-		   local->rx_expand_skb_head2);
-DEBUGFS_STATS_FILE(rx_handlers_fragments, 20, "%u",
-		   local->rx_handlers_fragments);
-DEBUGFS_STATS_FILE(tx_status_drop, 20, "%u",
-		   local->tx_status_drop);
-
-#endif
-
 DEBUGFS_DEVSTATS_FILE(dot11ACKFailureCount);
 DEBUGFS_DEVSTATS_FILE(dot11RTSFailureCount);
 DEBUGFS_DEVSTATS_FILE(dot11FCSErrorCount);
 DEBUGFS_DEVSTATS_FILE(dot11RTSSuccessCount);
 
-
 void debugfs_hw_add(struct ieee80211_local *local)
 {
 	struct dentry *phyd = local->hw.wiphy->debugfsdir;
@@ -448,38 +386,60 @@ void debugfs_hw_add(struct ieee80211_local *local)
 	if (!statsd)
 		return;
 
-	DEBUGFS_STATS_ADD(transmitted_fragment_count);
-	DEBUGFS_STATS_ADD(multicast_transmitted_frame_count);
-	DEBUGFS_STATS_ADD(failed_count);
-	DEBUGFS_STATS_ADD(retry_count);
-	DEBUGFS_STATS_ADD(multiple_retry_count);
-	DEBUGFS_STATS_ADD(frame_duplicate_count);
-	DEBUGFS_STATS_ADD(received_fragment_count);
-	DEBUGFS_STATS_ADD(multicast_received_frame_count);
-	DEBUGFS_STATS_ADD(transmitted_frame_count);
+	DEBUGFS_STATS_ADD(transmitted_fragment_count,
+		local->dot11TransmittedFragmentCount);
+	DEBUGFS_STATS_ADD(multicast_transmitted_frame_count,
+		local->dot11MulticastTransmittedFrameCount);
+	DEBUGFS_STATS_ADD(failed_count, local->dot11FailedCount);
+	DEBUGFS_STATS_ADD(retry_count, local->dot11RetryCount);
+	DEBUGFS_STATS_ADD(multiple_retry_count,
+		local->dot11MultipleRetryCount);
+	DEBUGFS_STATS_ADD(frame_duplicate_count,
+		local->dot11FrameDuplicateCount);
+	DEBUGFS_STATS_ADD(received_fragment_count,
+		local->dot11ReceivedFragmentCount);
+	DEBUGFS_STATS_ADD(multicast_received_frame_count,
+		local->dot11MulticastReceivedFrameCount);
+	DEBUGFS_STATS_ADD(transmitted_frame_count,
+		local->dot11TransmittedFrameCount);
 #ifdef CONFIG_MAC80211_DEBUG_COUNTERS
-	DEBUGFS_STATS_ADD(tx_handlers_drop);
-	DEBUGFS_STATS_ADD(tx_handlers_queued);
-	DEBUGFS_STATS_ADD(tx_handlers_drop_unencrypted);
-	DEBUGFS_STATS_ADD(tx_handlers_drop_fragment);
-	DEBUGFS_STATS_ADD(tx_handlers_drop_wep);
-	DEBUGFS_STATS_ADD(tx_handlers_drop_not_assoc);
-	DEBUGFS_STATS_ADD(tx_handlers_drop_unauth_port);
-	DEBUGFS_STATS_ADD(rx_handlers_drop);
-	DEBUGFS_STATS_ADD(rx_handlers_queued);
-	DEBUGFS_STATS_ADD(rx_handlers_drop_nullfunc);
-	DEBUGFS_STATS_ADD(rx_handlers_drop_defrag);
-	DEBUGFS_STATS_ADD(rx_handlers_drop_short);
-	DEBUGFS_STATS_ADD(rx_handlers_drop_passive_scan);
-	DEBUGFS_STATS_ADD(tx_expand_skb_head);
-	DEBUGFS_STATS_ADD(tx_expand_skb_head_cloned);
-	DEBUGFS_STATS_ADD(rx_expand_skb_head);
-	DEBUGFS_STATS_ADD(rx_expand_skb_head2);
-	DEBUGFS_STATS_ADD(rx_handlers_fragments);
-	DEBUGFS_STATS_ADD(tx_status_drop);
+	DEBUGFS_STATS_ADD(tx_handlers_drop, local->tx_handlers_drop);
+	DEBUGFS_STATS_ADD(tx_handlers_queued, local->tx_handlers_queued);
+	DEBUGFS_STATS_ADD(tx_handlers_drop_unencrypted,
+		local->tx_handlers_drop_unencrypted);
+	DEBUGFS_STATS_ADD(tx_handlers_drop_fragment,
+		local->tx_handlers_drop_fragment);
+	DEBUGFS_STATS_ADD(tx_handlers_drop_wep,
+		local->tx_handlers_drop_wep);
+	DEBUGFS_STATS_ADD(tx_handlers_drop_not_assoc,
+		local->tx_handlers_drop_not_assoc);
+	DEBUGFS_STATS_ADD(tx_handlers_drop_unauth_port,
+		local->tx_handlers_drop_unauth_port);
+	DEBUGFS_STATS_ADD(rx_handlers_drop, local->rx_handlers_drop);
+	DEBUGFS_STATS_ADD(rx_handlers_queued, local->rx_handlers_queued);
+	DEBUGFS_STATS_ADD(rx_handlers_drop_nullfunc,
+		local->rx_handlers_drop_nullfunc);
+	DEBUGFS_STATS_ADD(rx_handlers_drop_defrag,
+		local->rx_handlers_drop_defrag);
+	DEBUGFS_STATS_ADD(rx_handlers_drop_short,
+		local->rx_handlers_drop_short);
+	DEBUGFS_STATS_ADD(rx_handlers_drop_passive_scan,
+		local->rx_handlers_drop_passive_scan);
+	DEBUGFS_STATS_ADD(tx_expand_skb_head,
+		local->tx_expand_skb_head);
+	DEBUGFS_STATS_ADD(tx_expand_skb_head_cloned,
+		local->tx_expand_skb_head_cloned);
+	DEBUGFS_STATS_ADD(rx_expand_skb_head,
+		local->rx_expand_skb_head);
+	DEBUGFS_STATS_ADD(rx_expand_skb_head2,
+		local->rx_expand_skb_head2);
+	DEBUGFS_STATS_ADD(rx_handlers_fragments,
+		local->rx_handlers_fragments);
+	DEBUGFS_STATS_ADD(tx_status_drop,
+		local->tx_status_drop);
 #endif
-	DEBUGFS_STATS_ADD(dot11ACKFailureCount);
-	DEBUGFS_STATS_ADD(dot11RTSFailureCount);
-	DEBUGFS_STATS_ADD(dot11FCSErrorCount);
-	DEBUGFS_STATS_ADD(dot11RTSSuccessCount);
+	DEBUGFS_DEVSTATS_ADD(dot11ACKFailureCount);
+	DEBUGFS_DEVSTATS_ADD(dot11RTSFailureCount);
+	DEBUGFS_DEVSTATS_ADD(dot11FCSErrorCount);
+	DEBUGFS_DEVSTATS_ADD(dot11RTSSuccessCount);
 }
diff --git a/net/mac80211/debugfs_sta.c b/net/mac80211/debugfs_sta.c
index 9f140612224a..576e024715e3 100644
--- a/net/mac80211/debugfs_sta.c
+++ b/net/mac80211/debugfs_sta.c
@@ -30,7 +30,6 @@ static ssize_t sta_ ##name## _read(struct file *file,			\
 }
 #define STA_READ_D(name, field) STA_READ(name, 20, field, "%d\n")
 #define STA_READ_U(name, field) STA_READ(name, 20, field, "%u\n")
-#define STA_READ_LU(name, field) STA_READ(name, 20, field, "%lu\n")
 #define STA_READ_S(name, field) STA_READ(name, 20, field, "%s\n")
 
 #define STA_OPS(name)							\
@@ -52,19 +51,7 @@ static const struct file_operations sta_ ##name## _ops = {		\
 
 STA_FILE(aid, sta.aid, D);
 STA_FILE(dev, sdata->name, S);
-STA_FILE(rx_packets, rx_packets, LU);
-STA_FILE(tx_packets, tx_packets, LU);
-STA_FILE(rx_bytes, rx_bytes, LU);
-STA_FILE(tx_bytes, tx_bytes, LU);
-STA_FILE(rx_duplicates, num_duplicates, LU);
-STA_FILE(rx_fragments, rx_fragments, LU);
-STA_FILE(rx_dropped, rx_dropped, LU);
-STA_FILE(tx_fragments, tx_fragments, LU);
-STA_FILE(tx_filtered, tx_filtered_count, LU);
-STA_FILE(tx_retry_failed, tx_retry_failed, LU);
-STA_FILE(tx_retry_count, tx_retry_count, LU);
 STA_FILE(last_signal, last_signal, D);
-STA_FILE(wep_weak_iv_count, wep_weak_iv_count, LU);
 
 static ssize_t sta_flags_read(struct file *file, char __user *userbuf,
 			      size_t count, loff_t *ppos)
@@ -306,6 +293,13 @@ STA_OPS(ht_capa);
 	debugfs_create_file(#name, 0400, \
 		sta->debugfs.dir, sta, &sta_ ##name## _ops);
 
+#define DEBUGFS_ADD_COUNTER(name, field)				\
+	if (sizeof(sta->field) == sizeof(u32))				\
+		debugfs_create_u32(#name, 0400, sta->debugfs.dir,	\
+			(u32 *) &sta->field);				\
+	else								\
+		debugfs_create_u64(#name, 0400, sta->debugfs.dir,	\
+			(u64 *) &sta->field);
 
 void ieee80211_sta_debugfs_add(struct sta_info *sta)
 {
@@ -338,20 +332,21 @@ void ieee80211_sta_debugfs_add(struct sta_info *sta)
 	DEBUGFS_ADD(last_seq_ctrl);
 	DEBUGFS_ADD(agg_status);
 	DEBUGFS_ADD(dev);
-	DEBUGFS_ADD(rx_packets);
-	DEBUGFS_ADD(tx_packets);
-	DEBUGFS_ADD(rx_bytes);
-	DEBUGFS_ADD(tx_bytes);
-	DEBUGFS_ADD(rx_duplicates);
-	DEBUGFS_ADD(rx_fragments);
-	DEBUGFS_ADD(rx_dropped);
-	DEBUGFS_ADD(tx_fragments);
-	DEBUGFS_ADD(tx_filtered);
-	DEBUGFS_ADD(tx_retry_failed);
-	DEBUGFS_ADD(tx_retry_count);
 	DEBUGFS_ADD(last_signal);
-	DEBUGFS_ADD(wep_weak_iv_count);
 	DEBUGFS_ADD(ht_capa);
+
+	DEBUGFS_ADD_COUNTER(rx_packets, rx_packets);
+	DEBUGFS_ADD_COUNTER(tx_packets, tx_packets);
+	DEBUGFS_ADD_COUNTER(rx_bytes, rx_bytes);
+	DEBUGFS_ADD_COUNTER(tx_bytes, tx_bytes);
+	DEBUGFS_ADD_COUNTER(rx_duplicates, num_duplicates);
+	DEBUGFS_ADD_COUNTER(rx_fragments, rx_fragments);
+	DEBUGFS_ADD_COUNTER(rx_dropped, rx_dropped);
+	DEBUGFS_ADD_COUNTER(tx_fragments, tx_fragments);
+	DEBUGFS_ADD_COUNTER(tx_filtered, tx_filtered_count);
+	DEBUGFS_ADD_COUNTER(tx_retry_failed, tx_retry_failed);
+	DEBUGFS_ADD_COUNTER(tx_retry_count, tx_retry_count);
+	DEBUGFS_ADD_COUNTER(wep_weak_iv_count, wep_weak_iv_count);
 }
 
 void ieee80211_sta_debugfs_remove(struct sta_info *sta)
-- 
cgit v1.2.2


From 4736022844fe694c4ee971fa2b6c1cb38dadbc78 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Thu, 3 Jun 2010 04:13:21 +0000
Subject: ipv4: RCU changes in __mkroute_input()

Avoid two atomic ops on output device refcount

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/route.c | 8 +++-----
 1 file changed, 3 insertions(+), 5 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 1cfe0d199e7c..7b8eacd5ac26 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -1957,22 +1957,22 @@ static void ip_handle_martian_source(struct net_device *dev,
 #endif
 }
 
+/* called in rcu_read_lock() section */
 static int __mkroute_input(struct sk_buff *skb,
 			   struct fib_result *res,
 			   struct in_device *in_dev,
 			   __be32 daddr, __be32 saddr, u32 tos,
 			   struct rtable **result)
 {
-
 	struct rtable *rth;
 	int err;
 	struct in_device *out_dev;
-	unsigned flags = 0;
+	unsigned int flags = 0;
 	__be32 spec_dst;
 	u32 itag;
 
 	/* get a working reference to the output device */
-	out_dev = in_dev_get(FIB_RES_DEV(*res));
+	out_dev = __in_dev_get_rcu(FIB_RES_DEV(*res));
 	if (out_dev == NULL) {
 		if (net_ratelimit())
 			printk(KERN_CRIT "Bug in ip_route_input" \
@@ -2053,8 +2053,6 @@ static int __mkroute_input(struct sk_buff *skb,
 	*result = rth;
 	err = 0;
  cleanup:
-	/* release the working reference to the output device */
-	in_dev_put(out_dev);
 	return err;
 }
 
-- 
cgit v1.2.2


From faa9dcf793beba05f7178b63a59eaa3ca5175b6a Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Thu, 3 Jun 2010 04:09:10 +0000
Subject: arp: RCU changes

Avoid two atomic ops in arp_fwd_proxy()

Avoid two atomic ops in arp_process()

Valid optims since arp_rcv() is run under rcu_read_lock()

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/arp.c | 11 ++++-------
 1 file changed, 4 insertions(+), 7 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c
index f094b75810db..917d2d66162e 100644
--- a/net/ipv4/arp.c
+++ b/net/ipv4/arp.c
@@ -545,10 +545,10 @@ static inline int arp_fwd_proxy(struct in_device *in_dev,
 
 	/* place to check for proxy_arp for routes */
 
-	if ((out_dev = in_dev_get(rt->u.dst.dev)) != NULL) {
+	out_dev = __in_dev_get_rcu(rt->u.dst.dev);
+	if (out_dev)
 		omi = IN_DEV_MEDIUM_ID(out_dev);
-		in_dev_put(out_dev);
-	}
+
 	return (omi != imi && omi != -1);
 }
 
@@ -741,7 +741,7 @@ void arp_send(int type, int ptype, __be32 dest_ip,
 static int arp_process(struct sk_buff *skb)
 {
 	struct net_device *dev = skb->dev;
-	struct in_device *in_dev = in_dev_get(dev);
+	struct in_device *in_dev = __in_dev_get_rcu(dev);
 	struct arphdr *arp;
 	unsigned char *arp_ptr;
 	struct rtable *rt;
@@ -890,7 +890,6 @@ static int arp_process(struct sk_buff *skb)
 					arp_send(ARPOP_REPLY,ETH_P_ARP,sip,dev,tip,sha,dev->dev_addr,sha);
 				} else {
 					pneigh_enqueue(&arp_tbl, in_dev->arp_parms, skb);
-					in_dev_put(in_dev);
 					return 0;
 				}
 				goto out;
@@ -936,8 +935,6 @@ static int arp_process(struct sk_buff *skb)
 	}
 
 out:
-	if (in_dev)
-		in_dev_put(in_dev);
 	consume_skb(skb);
 	return 0;
 }
-- 
cgit v1.2.2


From c6d409cfd0fd41e7a0847875e4338ad648c9b96b Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Thu, 3 Jun 2010 20:03:40 -0700
Subject: From abbffa2aa9bd6f8df16d0d0a102af677510d8b9a Mon Sep 17 00:00:00
 2001 From: Eric Dumazet <eric.dumazet@gmail.com> Date: Thu, 3 Jun 2010
 04:29:41 +0000 Subject: [PATCH 2/3] net: net/socket.c and net/compat.c
 cleanups

cleanup patch, to match modern coding style.

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/compat.c |   47 ++++++++---------
 net/socket.c |  165 ++++++++++++++++++++++++++++------------------------------
 2 files changed, 102 insertions(+), 110 deletions(-)

diff --git a/net/compat.c b/net/compat.c
index 1cf7590..63d260e 100644
--- a/net/compat.c
+++ b/net/compat.c
@@ -81,7 +81,7 @@ int verify_compat_iovec(struct msghdr *kern_msg, struct iovec *kern_iov,
 	int tot_len;

 	if (kern_msg->msg_namelen) {
-		if (mode==VERIFY_READ) {
+		if (mode == VERIFY_READ) {
 			int err = move_addr_to_kernel(kern_msg->msg_name,
 						      kern_msg->msg_namelen,
 						      kern_address);
@@ -354,7 +354,7 @@ static int do_set_attach_filter(struct socket *sock, int level, int optname,
 static int do_set_sock_timeout(struct socket *sock, int level,
 		int optname, char __user *optval, unsigned int optlen)
 {
-	struct compat_timeval __user *up = (struct compat_timeval __user *) optval;
+	struct compat_timeval __user *up = (struct compat_timeval __user *)optval;
 	struct timeval ktime;
 	mm_segment_t old_fs;
 	int err;
@@ -367,7 +367,7 @@ static int do_set_sock_timeout(struct socket *sock, int level,
 		return -EFAULT;
 	old_fs = get_fs();
 	set_fs(KERNEL_DS);
-	err = sock_setsockopt(sock, level, optname, (char *) &ktime, sizeof(ktime));
+	err = sock_setsockopt(sock, level, optname, (char *)&ktime, sizeof(ktime));
 	set_fs(old_fs);

 	return err;
@@ -389,11 +389,10 @@ asmlinkage long compat_sys_setsockopt(int fd, int level, int optname,
 				char __user *optval, unsigned int optlen)
 {
 	int err;
-	struct socket *sock;
+	struct socket *sock = sockfd_lookup(fd, &err);

-	if ((sock = sockfd_lookup(fd, &err))!=NULL)
-	{
-		err = security_socket_setsockopt(sock,level,optname);
+	if (sock) {
+		err = security_socket_setsockopt(sock, level, optname);
 		if (err) {
 			sockfd_put(sock);
 			return err;
@@ -453,7 +452,7 @@ static int compat_sock_getsockopt(struct socket *sock, int level, int optname,
 int compat_sock_get_timestamp(struct sock *sk, struct timeval __user *userstamp)
 {
 	struct compat_timeval __user *ctv =
-			(struct compat_timeval __user*) userstamp;
+			(struct compat_timeval __user *) userstamp;
 	int err = -ENOENT;
 	struct timeval tv;

@@ -477,7 +476,7 @@ EXPORT_SYMBOL(compat_sock_get_timestamp);
 int compat_sock_get_timestampns(struct sock *sk, struct timespec __user *userstamp)
 {
 	struct compat_timespec __user *ctv =
-			(struct compat_timespec __user*) userstamp;
+			(struct compat_timespec __user *) userstamp;
 	int err = -ENOENT;
 	struct timespec ts;

@@ -502,12 +501,10 @@ asmlinkage long compat_sys_getsockopt(int fd, int level, int optname,
 				char __user *optval, int __user *optlen)
 {
 	int err;
-	struct socket *sock;
+	struct socket *sock = sockfd_lookup(fd, &err);

-	if ((sock = sockfd_lookup(fd, &err))!=NULL)
-	{
-		err = security_socket_getsockopt(sock, level,
-							   optname);
+	if (sock) {
+		err = security_socket_getsockopt(sock, level, optname);
 		if (err) {
 			sockfd_put(sock);
 			return err;
@@ -557,7 +554,7 @@ struct compat_group_filter {

 int compat_mc_setsockopt(struct sock *sock, int level, int optname,
 	char __user *optval, unsigned int optlen,
-	int (*setsockopt)(struct sock *,int,int,char __user *,unsigned int))
+	int (*setsockopt)(struct sock *, int, int, char __user *, unsigned int))
 {
 	char __user	*koptval = optval;
 	int		koptlen = optlen;
@@ -640,12 +637,11 @@ int compat_mc_setsockopt(struct sock *sock, int level, int optname,
 	}
 	return setsockopt(sock, level, optname, koptval, koptlen);
 }
-
 EXPORT_SYMBOL(compat_mc_setsockopt);

 int compat_mc_getsockopt(struct sock *sock, int level, int optname,
 	char __user *optval, int __user *optlen,
-	int (*getsockopt)(struct sock *,int,int,char __user *,int __user *))
+	int (*getsockopt)(struct sock *, int, int, char __user *, int __user *))
 {
 	struct compat_group_filter __user *gf32 = (void *)optval;
 	struct group_filter __user *kgf;
@@ -681,7 +677,7 @@ int compat_mc_getsockopt(struct sock *sock, int level, int optname,
 	    __put_user(interface, &kgf->gf_interface) ||
 	    __put_user(fmode, &kgf->gf_fmode) ||
 	    __put_user(numsrc, &kgf->gf_numsrc) ||
-	    copy_in_user(&kgf->gf_group,&gf32->gf_group,sizeof(kgf->gf_group)))
+	    copy_in_user(&kgf->gf_group, &gf32->gf_group, sizeof(kgf->gf_group)))
 		return -EFAULT;

 	err = getsockopt(sock, level, optname, (char __user *)kgf, koptlen);
@@ -714,21 +710,22 @@ int compat_mc_getsockopt(struct sock *sock, int level, int optname,
 		copylen = numsrc * sizeof(gf32->gf_slist[0]);
 		if (copylen > klen)
 			copylen = klen;
-	        if (copy_in_user(gf32->gf_slist, kgf->gf_slist, copylen))
+		if (copy_in_user(gf32->gf_slist, kgf->gf_slist, copylen))
 			return -EFAULT;
 	}
 	return err;
 }
-
 EXPORT_SYMBOL(compat_mc_getsockopt);

 /* Argument list sizes for compat_sys_socketcall */
 #define AL(x) ((x) * sizeof(u32))
-static unsigned char nas[20]={AL(0),AL(3),AL(3),AL(3),AL(2),AL(3),
-				AL(3),AL(3),AL(4),AL(4),AL(4),AL(6),
-				AL(6),AL(2),AL(5),AL(5),AL(3),AL(3),
-				AL(4),AL(5)};
+static unsigned char nas[20] = {
+	AL(0), AL(3), AL(3), AL(3), AL(2), AL(3),
+	AL(3), AL(3), AL(4), AL(4), AL(4), AL(6),
+	AL(6), AL(2), AL(5), AL(5), AL(3), AL(3),
+	AL(4), AL(5)
+};
 #undef AL

 asmlinkage long compat_sys_sendmsg(int fd, struct compat_msghdr __user *msg, unsigned flags)
@@ -827,7 +824,7 @@ asmlinkage long compat_sys_socketcall(int call, u32 __user *args)
 					  compat_ptr(a[4]), compat_ptr(a[5]));
 		break;
 	case SYS_SHUTDOWN:
-		ret = sys_shutdown(a0,a1);
+		ret = sys_shutdown(a0, a1);
 		break;
 	case SYS_SETSOCKOPT:
 		ret = compat_sys_setsockopt(a0, a1, a[2],
diff --git a/net/socket.c b/net/socket.c
index 367d547..b63c051 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -124,7 +124,7 @@ static int sock_fasync(int fd, struct file *filp, int on);
 static ssize_t sock_sendpage(struct file *file, struct page *page,
 			     int offset, size_t size, loff_t *ppos, int more);
 static ssize_t sock_splice_read(struct file *file, loff_t *ppos,
-			        struct pipe_inode_info *pipe, size_t len,
+				struct pipe_inode_info *pipe, size_t len,
 				unsigned int flags);

 /*
@@ -162,7 +162,7 @@ static const struct net_proto_family *net_families[NPROTO] __read_mostly;
  *	Statistics counters of the socket lists
  */

-static DEFINE_PER_CPU(int, sockets_in_use) = 0;
+static DEFINE_PER_CPU(int, sockets_in_use);

 /*
  * Support routines.
@@ -309,9 +309,9 @@ static int init_inodecache(void)
 }

 static const struct super_operations sockfs_ops = {
-	.alloc_inode =	sock_alloc_inode,
-	.destroy_inode =sock_destroy_inode,
-	.statfs =	simple_statfs,
+	.alloc_inode	= sock_alloc_inode,
+	.destroy_inode	= sock_destroy_inode,
+	.statfs		= simple_statfs,
 };

 static int sockfs_get_sb(struct file_system_type *fs_type,
@@ -411,6 +411,7 @@ int sock_map_fd(struct socket *sock, int flags)

 	return fd;
 }
+EXPORT_SYMBOL(sock_map_fd);

 static struct socket *sock_from_file(struct file *file, int *err)
 {
@@ -422,7 +423,7 @@ static struct socket *sock_from_file(struct file *file, int *err)
 }

 /**
- *	sockfd_lookup	- 	Go from a file number to its socket slot
+ *	sockfd_lookup - Go from a file number to its socket slot
  *	@fd: file handle
  *	@err: pointer to an error code return
  *
@@ -450,6 +451,7 @@ struct socket *sockfd_lookup(int fd, int *err)
 		fput(file);
 	return sock;
 }
+EXPORT_SYMBOL(sockfd_lookup);

 static struct socket *sockfd_lookup_light(int fd, int *err, int *fput_needed)
 {
@@ -540,6 +542,7 @@ void sock_release(struct socket *sock)
 	}
 	sock->file = NULL;
 }
+EXPORT_SYMBOL(sock_release);

 int sock_tx_timestamp(struct msghdr *msg, struct sock *sk,
 		      union skb_shared_tx *shtx)
@@ -586,6 +589,7 @@ int sock_sendmsg(struct socket *sock, struct msghdr *msg, size_t size)
 		ret = wait_on_sync_kiocb(&iocb);
 	return ret;
 }
+EXPORT_SYMBOL(sock_sendmsg);

 int kernel_sendmsg(struct socket *sock, struct msghdr *msg,
 		   struct kvec *vec, size_t num, size_t size)
@@ -604,6 +608,7 @@ int kernel_sendmsg(struct socket *sock, struct msghdr *msg,
 	set_fs(oldfs);
 	return result;
 }
+EXPORT_SYMBOL(kernel_sendmsg);

 static int ktime2ts(ktime_t kt, struct timespec *ts)
 {
@@ -664,7 +669,6 @@ void __sock_recv_timestamp(struct msghdr *msg, struct sock *sk,
 		put_cmsg(msg, SOL_SOCKET,
 			 SCM_TIMESTAMPING, sizeof(ts), &ts);
 }
-
 EXPORT_SYMBOL_GPL(__sock_recv_timestamp);

 inline void sock_recv_drops(struct msghdr *msg, struct sock *sk, struct sk_buff *skb)
@@ -720,6 +724,7 @@ int sock_recvmsg(struct socket *sock, struct msghdr *msg,
 		ret = wait_on_sync_kiocb(&iocb);
 	return ret;
 }
+EXPORT_SYMBOL(sock_recvmsg);

 static int sock_recvmsg_nosec(struct socket *sock, struct msghdr *msg,
 			      size_t size, int flags)
@@ -752,6 +757,7 @@ int kernel_recvmsg(struct socket *sock, struct msghdr *msg,
 	set_fs(oldfs);
 	return result;
 }
+EXPORT_SYMBOL(kernel_recvmsg);

 static void sock_aio_dtor(struct kiocb *iocb)
 {
@@ -774,7 +780,7 @@ static ssize_t sock_sendpage(struct file *file, struct page *page,
 }

 static ssize_t sock_splice_read(struct file *file, loff_t *ppos,
-			        struct pipe_inode_info *pipe, size_t len,
+				struct pipe_inode_info *pipe, size_t len,
 				unsigned int flags)
 {
 	struct socket *sock = file->private_data;
@@ -887,7 +893,7 @@ static ssize_t sock_aio_write(struct kiocb *iocb, const struct iovec *iov,
  */

 static DEFINE_MUTEX(br_ioctl_mutex);
-static int (*br_ioctl_hook) (struct net *, unsigned int cmd, void __user *arg) = NULL;
+static int (*br_ioctl_hook) (struct net *, unsigned int cmd, void __user *arg);

 void brioctl_set(int (*hook) (struct net *, unsigned int, void __user *))
 {
@@ -895,7 +901,6 @@ void brioctl_set(int (*hook) (struct net *, unsigned int, void __user *))
 	br_ioctl_hook = hook;
 	mutex_unlock(&br_ioctl_mutex);
 }
-
 EXPORT_SYMBOL(brioctl_set);

 static DEFINE_MUTEX(vlan_ioctl_mutex);
@@ -907,7 +912,6 @@ void vlan_ioctl_set(int (*hook) (struct net *, void __user *))
 	vlan_ioctl_hook = hook;
 	mutex_unlock(&vlan_ioctl_mutex);
 }
-
 EXPORT_SYMBOL(vlan_ioctl_set);

 static DEFINE_MUTEX(dlci_ioctl_mutex);
@@ -919,7 +923,6 @@ void dlci_ioctl_set(int (*hook) (unsigned int, void __user *))
 	dlci_ioctl_hook = hook;
 	mutex_unlock(&dlci_ioctl_mutex);
 }
-
 EXPORT_SYMBOL(dlci_ioctl_set);

 static long sock_do_ioctl(struct net *net, struct socket *sock,
@@ -1047,6 +1050,7 @@ out_release:
 	sock = NULL;
 	goto out;
 }
+EXPORT_SYMBOL(sock_create_lite);

 /* No kernel lock held - perfect */
 static unsigned int sock_poll(struct file *file, poll_table *wait)
@@ -1147,6 +1151,7 @@ call_kill:
 	rcu_read_unlock();
 	return 0;
 }
+EXPORT_SYMBOL(sock_wake_async);

 static int __sock_create(struct net *net, int family, int type, int protocol,
 			 struct socket **res, int kern)
@@ -1265,11 +1270,13 @@ int sock_create(int family, int type, int protocol, struct socket **res)
 {
 	return __sock_create(current->nsproxy->net_ns, family, type, protocol, res, 0);
 }
+EXPORT_SYMBOL(sock_create);

 int sock_create_kern(int family, int type, int protocol, struct socket **res)
 {
 	return __sock_create(&init_net, family, type, protocol, res, 1);
 }
+EXPORT_SYMBOL(sock_create_kern);

 SYSCALL_DEFINE3(socket, int, family, int, type, int, protocol)
 {
@@ -1474,7 +1481,8 @@ SYSCALL_DEFINE4(accept4, int, fd, struct sockaddr __user *, upeer_sockaddr,
 		goto out;

 	err = -ENFILE;
-	if (!(newsock = sock_alloc()))
+	newsock = sock_alloc();
+	if (!newsock)
 		goto out_put;

 	newsock->type = sock->type;
@@ -1861,8 +1869,7 @@ SYSCALL_DEFINE3(sendmsg, int, fd, struct msghdr __user *, msg, unsigned, flags)
 	if (MSG_CMSG_COMPAT & flags) {
 		if (get_compat_msghdr(&msg_sys, msg_compat))
 			return -EFAULT;
-	}
-	else if (copy_from_user(&msg_sys, msg, sizeof(struct msghdr)))
+	} else if (copy_from_user(&msg_sys, msg, sizeof(struct msghdr)))
 		return -EFAULT;

 	sock = sockfd_lookup_light(fd, &err, &fput_needed);
@@ -1964,8 +1971,7 @@ static int __sys_recvmsg(struct socket *sock, struct msghdr __user *msg,
 	if (MSG_CMSG_COMPAT & flags) {
 		if (get_compat_msghdr(msg_sys, msg_compat))
 			return -EFAULT;
-	}
-	else if (copy_from_user(msg_sys, msg, sizeof(struct msghdr)))
+	} else if (copy_from_user(msg_sys, msg, sizeof(struct msghdr)))
 		return -EFAULT;

 	err = -EMSGSIZE;
@@ -2191,10 +2197,10 @@ SYSCALL_DEFINE5(recvmmsg, int, fd, struct mmsghdr __user *, mmsg,
 /* Argument list sizes for sys_socketcall */
 #define AL(x) ((x) * sizeof(unsigned long))
 static const unsigned char nargs[20] = {
-	AL(0),AL(3),AL(3),AL(3),AL(2),AL(3),
-	AL(3),AL(3),AL(4),AL(4),AL(4),AL(6),
-	AL(6),AL(2),AL(5),AL(5),AL(3),AL(3),
-	AL(4),AL(5)
+	AL(0), AL(3), AL(3), AL(3), AL(2), AL(3),
+	AL(3), AL(3), AL(4), AL(4), AL(4), AL(6),
+	AL(6), AL(2), AL(5), AL(5), AL(3), AL(3),
+	AL(4), AL(5)
 };

 #undef AL
@@ -2340,6 +2346,7 @@ int sock_register(const struct net_proto_family *ops)
 	printk(KERN_INFO "NET: Registered protocol family %d\n", ops->family);
 	return err;
 }
+EXPORT_SYMBOL(sock_register);

 /**
  *	sock_unregister - remove a protocol handler
@@ -2366,6 +2373,7 @@ void sock_unregister(int family)

 	printk(KERN_INFO "NET: Unregistered protocol family %d\n", family);
 }
+EXPORT_SYMBOL(sock_unregister);

 static int __init sock_init(void)
 {
@@ -2490,13 +2498,13 @@ static int dev_ifconf(struct net *net, struct compat_ifconf __user *uifc32)
 		ifc.ifc_req = NULL;
 		uifc = compat_alloc_user_space(sizeof(struct ifconf));
 	} else {
-		size_t len =((ifc32.ifc_len / sizeof (struct compat_ifreq)) + 1) *
-			sizeof (struct ifreq);
+		size_t len = ((ifc32.ifc_len / sizeof(struct compat_ifreq)) + 1) *
+			sizeof(struct ifreq);
 		uifc = compat_alloc_user_space(sizeof(struct ifconf) + len);
 		ifc.ifc_len = len;
 		ifr = ifc.ifc_req = (void __user *)(uifc + 1);
 		ifr32 = compat_ptr(ifc32.ifcbuf);
-		for (i = 0; i < ifc32.ifc_len; i += sizeof (struct compat_ifreq)) {
+		for (i = 0; i < ifc32.ifc_len; i += sizeof(struct compat_ifreq)) {
 			if (copy_in_user(ifr, ifr32, sizeof(struct compat_ifreq)))
 				return -EFAULT;
 			ifr++;
@@ -2516,9 +2524,9 @@ static int dev_ifconf(struct net *net, struct compat_ifconf __user *uifc32)
 	ifr = ifc.ifc_req;
 	ifr32 = compat_ptr(ifc32.ifcbuf);
 	for (i = 0, j = 0;
-             i + sizeof (struct compat_ifreq) <= ifc32.ifc_len && j < ifc.ifc_len;
-	     i += sizeof (struct compat_ifreq), j += sizeof (struct ifreq)) {
-		if (copy_in_user(ifr32, ifr, sizeof (struct compat_ifreq)))
+	     i + sizeof(struct compat_ifreq) <= ifc32.ifc_len && j < ifc.ifc_len;
+	     i += sizeof(struct compat_ifreq), j += sizeof(struct ifreq)) {
+		if (copy_in_user(ifr32, ifr, sizeof(struct compat_ifreq)))
 			return -EFAULT;
 		ifr32++;
 		ifr++;
@@ -2567,7 +2575,7 @@ static int compat_siocwandev(struct net *net, struct compat_ifreq __user *uifr32
 	compat_uptr_t uptr32;
 	struct ifreq __user *uifr;

-	uifr = compat_alloc_user_space(sizeof (*uifr));
+	uifr = compat_alloc_user_space(sizeof(*uifr));
 	if (copy_in_user(uifr, uifr32, sizeof(struct compat_ifreq)))
 		return -EFAULT;

@@ -2601,9 +2609,9 @@ static int bond_ioctl(struct net *net, unsigned int cmd,
 			return -EFAULT;

 		old_fs = get_fs();
-		set_fs (KERNEL_DS);
+		set_fs(KERNEL_DS);
 		err = dev_ioctl(net, cmd, &kifr);
-		set_fs (old_fs);
+		set_fs(old_fs);

 		return err;
 	case SIOCBONDSLAVEINFOQUERY:
@@ -2710,9 +2718,9 @@ static int compat_sioc_ifmap(struct net *net, unsigned int cmd,
 		return -EFAULT;

 	old_fs = get_fs();
-	set_fs (KERNEL_DS);
+	set_fs(KERNEL_DS);
 	err = dev_ioctl(net, cmd, (void __user *)&ifr);
-	set_fs (old_fs);
+	set_fs(old_fs);

 	if (cmd == SIOCGIFMAP && !err) {
 		err = copy_to_user(uifr32, &ifr, sizeof(ifr.ifr_name));
@@ -2734,7 +2742,7 @@ static int compat_siocshwtstamp(struct net *net, struct compat_ifreq __user *uif
 	compat_uptr_t uptr32;
 	struct ifreq __user *uifr;

-	uifr = compat_alloc_user_space(sizeof (*uifr));
+	uifr = compat_alloc_user_space(sizeof(*uifr));
 	if (copy_in_user(uifr, uifr32, sizeof(struct compat_ifreq)))
 		return -EFAULT;

@@ -2750,20 +2758,20 @@ static int compat_siocshwtstamp(struct net *net, struct compat_ifreq __user *uif
 }

 struct rtentry32 {
-	u32   		rt_pad1;
+	u32		rt_pad1;
 	struct sockaddr rt_dst;         /* target address               */
 	struct sockaddr rt_gateway;     /* gateway addr (RTF_GATEWAY)   */
 	struct sockaddr rt_genmask;     /* target network mask (IP)     */
-	unsigned short  rt_flags;
-	short           rt_pad2;
-	u32   		rt_pad3;
-	unsigned char   rt_tos;
-	unsigned char   rt_class;
-	short           rt_pad4;
-	short           rt_metric;      /* +1 for binary compatibility! */
+	unsigned short	rt_flags;
+	short		rt_pad2;
+	u32		rt_pad3;
+	unsigned char	rt_tos;
+	unsigned char	rt_class;
+	short		rt_pad4;
+	short		rt_metric;      /* +1 for binary compatibility! */
 	/* char * */ u32 rt_dev;        /* forcing the device at add    */
-	u32   		rt_mtu;         /* per route MTU/Window         */
-	u32   		rt_window;      /* Window clamping              */
+	u32		rt_mtu;         /* per route MTU/Window         */
+	u32		rt_window;      /* Window clamping              */
 	unsigned short  rt_irtt;        /* Initial RTT                  */
 };

@@ -2793,29 +2801,29 @@ static int routing_ioctl(struct net *net, struct socket *sock,

 	if (sock && sock->sk && sock->sk->sk_family == AF_INET6) { /* ipv6 */
 		struct in6_rtmsg32 __user *ur6 = argp;
-		ret = copy_from_user (&r6.rtmsg_dst, &(ur6->rtmsg_dst),
+		ret = copy_from_user(&r6.rtmsg_dst, &(ur6->rtmsg_dst),
 			3 * sizeof(struct in6_addr));
-		ret |= __get_user (r6.rtmsg_type, &(ur6->rtmsg_type));
-		ret |= __get_user (r6.rtmsg_dst_len, &(ur6->rtmsg_dst_len));
-		ret |= __get_user (r6.rtmsg_src_len, &(ur6->rtmsg_src_len));
-		ret |= __get_user (r6.rtmsg_metric, &(ur6->rtmsg_metric));
-		ret |= __get_user (r6.rtmsg_info, &(ur6->rtmsg_info));
-		ret |= __get_user (r6.rtmsg_flags, &(ur6->rtmsg_flags));
-		ret |= __get_user (r6.rtmsg_ifindex, &(ur6->rtmsg_ifindex));
+		ret |= __get_user(r6.rtmsg_type, &(ur6->rtmsg_type));
+		ret |= __get_user(r6.rtmsg_dst_len, &(ur6->rtmsg_dst_len));
+		ret |= __get_user(r6.rtmsg_src_len, &(ur6->rtmsg_src_len));
+		ret |= __get_user(r6.rtmsg_metric, &(ur6->rtmsg_metric));
+		ret |= __get_user(r6.rtmsg_info, &(ur6->rtmsg_info));
+		ret |= __get_user(r6.rtmsg_flags, &(ur6->rtmsg_flags));
+		ret |= __get_user(r6.rtmsg_ifindex, &(ur6->rtmsg_ifindex));

 		r = (void *) &r6;
 	} else { /* ipv4 */
 		struct rtentry32 __user *ur4 = argp;
-		ret = copy_from_user (&r4.rt_dst, &(ur4->rt_dst),
+		ret = copy_from_user(&r4.rt_dst, &(ur4->rt_dst),
 					3 * sizeof(struct sockaddr));
-		ret |= __get_user (r4.rt_flags, &(ur4->rt_flags));
-		ret |= __get_user (r4.rt_metric, &(ur4->rt_metric));
-		ret |= __get_user (r4.rt_mtu, &(ur4->rt_mtu));
-		ret |= __get_user (r4.rt_window, &(ur4->rt_window));
-		ret |= __get_user (r4.rt_irtt, &(ur4->rt_irtt));
-		ret |= __get_user (rtdev, &(ur4->rt_dev));
+		ret |= __get_user(r4.rt_flags, &(ur4->rt_flags));
+		ret |= __get_user(r4.rt_metric, &(ur4->rt_metric));
+		ret |= __get_user(r4.rt_mtu, &(ur4->rt_mtu));
+		ret |= __get_user(r4.rt_window, &(ur4->rt_window));
+		ret |= __get_user(r4.rt_irtt, &(ur4->rt_irtt));
+		ret |= __get_user(rtdev, &(ur4->rt_dev));
 		if (rtdev) {
-			ret |= copy_from_user (devname, compat_ptr(rtdev), 15);
+			ret |= copy_from_user(devname, compat_ptr(rtdev), 15);
 			r4.rt_dev = devname; devname[15] = 0;
 		} else
 			r4.rt_dev = NULL;
@@ -2828,9 +2836,9 @@ static int routing_ioctl(struct net *net, struct socket *sock,
 		goto out;
 	}

-	set_fs (KERNEL_DS);
+	set_fs(KERNEL_DS);
 	ret = sock_do_ioctl(net, sock, cmd, (unsigned long) r);
-	set_fs (old_fs);
+	set_fs(old_fs);

 out:
 	return ret;
@@ -2993,11 +3001,13 @@ int kernel_bind(struct socket *sock, struct sockaddr *addr, int addrlen)
 {
 	return sock->ops->bind(sock, addr, addrlen);
 }
+EXPORT_SYMBOL(kernel_bind);

 int kernel_listen(struct socket *sock, int backlog)
 {
 	return sock->ops->listen(sock, backlog);
 }
+EXPORT_SYMBOL(kernel_listen);

 int kernel_accept(struct socket *sock, struct socket **newsock, int flags)
 {
@@ -3022,24 +3032,28 @@ int kernel_accept(struct socket *sock, struct socket **newsock, int flags)
 done:
 	return err;
 }
+EXPORT_SYMBOL(kernel_accept);

 int kernel_connect(struct socket *sock, struct sockaddr *addr, int addrlen,
 		   int flags)
 {
 	return sock->ops->connect(sock, addr, addrlen, flags);
 }
+EXPORT_SYMBOL(kernel_connect);

 int kernel_getsockname(struct socket *sock, struct sockaddr *addr,
 			 int *addrlen)
 {
 	return sock->ops->getname(sock, addr, addrlen, 0);
 }
+EXPORT_SYMBOL(kernel_getsockname);

 int kernel_getpeername(struct socket *sock, struct sockaddr *addr,
 			 int *addrlen)
 {
 	return sock->ops->getname(sock, addr, addrlen, 1);
 }
+EXPORT_SYMBOL(kernel_getpeername);

 int kernel_getsockopt(struct socket *sock, int level, int optname,
 			char *optval, int *optlen)
@@ -3056,6 +3070,7 @@ int kernel_getsockopt(struct socket *sock, int level, int optname,
 	set_fs(oldfs);
 	return err;
 }
+EXPORT_SYMBOL(kernel_getsockopt);

 int kernel_setsockopt(struct socket *sock, int level, int optname,
 			char *optval, unsigned int optlen)
@@ -3072,6 +3087,7 @@ int kernel_setsockopt(struct socket *sock, int level, int optname,
 	set_fs(oldfs);
 	return err;
 }
+EXPORT_SYMBOL(kernel_setsockopt);

 int kernel_sendpage(struct socket *sock, struct page *page, int offset,
 		    size_t size, int flags)
@@ -3083,6 +3099,7 @@ int kernel_sendpage(struct socket *sock, struct page *page, int offset,

 	return sock_no_sendpage(sock, page, offset, size, flags);
 }
+EXPORT_SYMBOL(kernel_sendpage);

 int kernel_sock_ioctl(struct socket *sock, int cmd, unsigned long arg)
 {
@@ -3095,33 +3112,11 @@ int kernel_sock_ioctl(struct socket *sock, int cmd, unsigned long arg)

 	return err;
 }
+EXPORT_SYMBOL(kernel_sock_ioctl);

 int kernel_sock_shutdown(struct socket *sock, enum sock_shutdown_cmd how)
 {
 	return sock->ops->shutdown(sock, how);
 }
-
-EXPORT_SYMBOL(sock_create);
-EXPORT_SYMBOL(sock_create_kern);
-EXPORT_SYMBOL(sock_create_lite);
-EXPORT_SYMBOL(sock_map_fd);
-EXPORT_SYMBOL(sock_recvmsg);
-EXPORT_SYMBOL(sock_register);
-EXPORT_SYMBOL(sock_release);
-EXPORT_SYMBOL(sock_sendmsg);
-EXPORT_SYMBOL(sock_unregister);
-EXPORT_SYMBOL(sock_wake_async);
-EXPORT_SYMBOL(sockfd_lookup);
-EXPORT_SYMBOL(kernel_sendmsg);
-EXPORT_SYMBOL(kernel_recvmsg);
-EXPORT_SYMBOL(kernel_bind);
-EXPORT_SYMBOL(kernel_listen);
-EXPORT_SYMBOL(kernel_accept);
-EXPORT_SYMBOL(kernel_connect);
-EXPORT_SYMBOL(kernel_getsockname);
-EXPORT_SYMBOL(kernel_getpeername);
-EXPORT_SYMBOL(kernel_getsockopt);
-EXPORT_SYMBOL(kernel_setsockopt);
-EXPORT_SYMBOL(kernel_sendpage);
-EXPORT_SYMBOL(kernel_sock_ioctl);
 EXPORT_SYMBOL(kernel_sock_shutdown);
+
--
1.7.0.4
---
 net/compat.c |  47 ++++++++---------
 net/socket.c | 164 ++++++++++++++++++++++++++++-------------------------------
 2 files changed, 101 insertions(+), 110 deletions(-)

(limited to 'net')

diff --git a/net/compat.c b/net/compat.c
index 1cf75905f132..63d260e81472 100644
--- a/net/compat.c
+++ b/net/compat.c
@@ -81,7 +81,7 @@ int verify_compat_iovec(struct msghdr *kern_msg, struct iovec *kern_iov,
 	int tot_len;
 
 	if (kern_msg->msg_namelen) {
-		if (mode==VERIFY_READ) {
+		if (mode == VERIFY_READ) {
 			int err = move_addr_to_kernel(kern_msg->msg_name,
 						      kern_msg->msg_namelen,
 						      kern_address);
@@ -354,7 +354,7 @@ static int do_set_attach_filter(struct socket *sock, int level, int optname,
 static int do_set_sock_timeout(struct socket *sock, int level,
 		int optname, char __user *optval, unsigned int optlen)
 {
-	struct compat_timeval __user *up = (struct compat_timeval __user *) optval;
+	struct compat_timeval __user *up = (struct compat_timeval __user *)optval;
 	struct timeval ktime;
 	mm_segment_t old_fs;
 	int err;
@@ -367,7 +367,7 @@ static int do_set_sock_timeout(struct socket *sock, int level,
 		return -EFAULT;
 	old_fs = get_fs();
 	set_fs(KERNEL_DS);
-	err = sock_setsockopt(sock, level, optname, (char *) &ktime, sizeof(ktime));
+	err = sock_setsockopt(sock, level, optname, (char *)&ktime, sizeof(ktime));
 	set_fs(old_fs);
 
 	return err;
@@ -389,11 +389,10 @@ asmlinkage long compat_sys_setsockopt(int fd, int level, int optname,
 				char __user *optval, unsigned int optlen)
 {
 	int err;
-	struct socket *sock;
+	struct socket *sock = sockfd_lookup(fd, &err);
 
-	if ((sock = sockfd_lookup(fd, &err))!=NULL)
-	{
-		err = security_socket_setsockopt(sock,level,optname);
+	if (sock) {
+		err = security_socket_setsockopt(sock, level, optname);
 		if (err) {
 			sockfd_put(sock);
 			return err;
@@ -453,7 +452,7 @@ static int compat_sock_getsockopt(struct socket *sock, int level, int optname,
 int compat_sock_get_timestamp(struct sock *sk, struct timeval __user *userstamp)
 {
 	struct compat_timeval __user *ctv =
-			(struct compat_timeval __user*) userstamp;
+			(struct compat_timeval __user *) userstamp;
 	int err = -ENOENT;
 	struct timeval tv;
 
@@ -477,7 +476,7 @@ EXPORT_SYMBOL(compat_sock_get_timestamp);
 int compat_sock_get_timestampns(struct sock *sk, struct timespec __user *userstamp)
 {
 	struct compat_timespec __user *ctv =
-			(struct compat_timespec __user*) userstamp;
+			(struct compat_timespec __user *) userstamp;
 	int err = -ENOENT;
 	struct timespec ts;
 
@@ -502,12 +501,10 @@ asmlinkage long compat_sys_getsockopt(int fd, int level, int optname,
 				char __user *optval, int __user *optlen)
 {
 	int err;
-	struct socket *sock;
+	struct socket *sock = sockfd_lookup(fd, &err);
 
-	if ((sock = sockfd_lookup(fd, &err))!=NULL)
-	{
-		err = security_socket_getsockopt(sock, level,
-							   optname);
+	if (sock) {
+		err = security_socket_getsockopt(sock, level, optname);
 		if (err) {
 			sockfd_put(sock);
 			return err;
@@ -557,7 +554,7 @@ struct compat_group_filter {
 
 int compat_mc_setsockopt(struct sock *sock, int level, int optname,
 	char __user *optval, unsigned int optlen,
-	int (*setsockopt)(struct sock *,int,int,char __user *,unsigned int))
+	int (*setsockopt)(struct sock *, int, int, char __user *, unsigned int))
 {
 	char __user	*koptval = optval;
 	int		koptlen = optlen;
@@ -640,12 +637,11 @@ int compat_mc_setsockopt(struct sock *sock, int level, int optname,
 	}
 	return setsockopt(sock, level, optname, koptval, koptlen);
 }
-
 EXPORT_SYMBOL(compat_mc_setsockopt);
 
 int compat_mc_getsockopt(struct sock *sock, int level, int optname,
 	char __user *optval, int __user *optlen,
-	int (*getsockopt)(struct sock *,int,int,char __user *,int __user *))
+	int (*getsockopt)(struct sock *, int, int, char __user *, int __user *))
 {
 	struct compat_group_filter __user *gf32 = (void *)optval;
 	struct group_filter __user *kgf;
@@ -681,7 +677,7 @@ int compat_mc_getsockopt(struct sock *sock, int level, int optname,
 	    __put_user(interface, &kgf->gf_interface) ||
 	    __put_user(fmode, &kgf->gf_fmode) ||
 	    __put_user(numsrc, &kgf->gf_numsrc) ||
-	    copy_in_user(&kgf->gf_group,&gf32->gf_group,sizeof(kgf->gf_group)))
+	    copy_in_user(&kgf->gf_group, &gf32->gf_group, sizeof(kgf->gf_group)))
 		return -EFAULT;
 
 	err = getsockopt(sock, level, optname, (char __user *)kgf, koptlen);
@@ -714,21 +710,22 @@ int compat_mc_getsockopt(struct sock *sock, int level, int optname,
 		copylen = numsrc * sizeof(gf32->gf_slist[0]);
 		if (copylen > klen)
 			copylen = klen;
-	        if (copy_in_user(gf32->gf_slist, kgf->gf_slist, copylen))
+		if (copy_in_user(gf32->gf_slist, kgf->gf_slist, copylen))
 			return -EFAULT;
 	}
 	return err;
 }
-
 EXPORT_SYMBOL(compat_mc_getsockopt);
 
 
 /* Argument list sizes for compat_sys_socketcall */
 #define AL(x) ((x) * sizeof(u32))
-static unsigned char nas[20]={AL(0),AL(3),AL(3),AL(3),AL(2),AL(3),
-				AL(3),AL(3),AL(4),AL(4),AL(4),AL(6),
-				AL(6),AL(2),AL(5),AL(5),AL(3),AL(3),
-				AL(4),AL(5)};
+static unsigned char nas[20] = {
+	AL(0), AL(3), AL(3), AL(3), AL(2), AL(3),
+	AL(3), AL(3), AL(4), AL(4), AL(4), AL(6),
+	AL(6), AL(2), AL(5), AL(5), AL(3), AL(3),
+	AL(4), AL(5)
+};
 #undef AL
 
 asmlinkage long compat_sys_sendmsg(int fd, struct compat_msghdr __user *msg, unsigned flags)
@@ -827,7 +824,7 @@ asmlinkage long compat_sys_socketcall(int call, u32 __user *args)
 					  compat_ptr(a[4]), compat_ptr(a[5]));
 		break;
 	case SYS_SHUTDOWN:
-		ret = sys_shutdown(a0,a1);
+		ret = sys_shutdown(a0, a1);
 		break;
 	case SYS_SETSOCKOPT:
 		ret = compat_sys_setsockopt(a0, a1, a[2],
diff --git a/net/socket.c b/net/socket.c
index 367d5477d00f..acfa1738663d 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -124,7 +124,7 @@ static int sock_fasync(int fd, struct file *filp, int on);
 static ssize_t sock_sendpage(struct file *file, struct page *page,
 			     int offset, size_t size, loff_t *ppos, int more);
 static ssize_t sock_splice_read(struct file *file, loff_t *ppos,
-			        struct pipe_inode_info *pipe, size_t len,
+				struct pipe_inode_info *pipe, size_t len,
 				unsigned int flags);
 
 /*
@@ -162,7 +162,7 @@ static const struct net_proto_family *net_families[NPROTO] __read_mostly;
  *	Statistics counters of the socket lists
  */
 
-static DEFINE_PER_CPU(int, sockets_in_use) = 0;
+static DEFINE_PER_CPU(int, sockets_in_use);
 
 /*
  * Support routines.
@@ -309,9 +309,9 @@ static int init_inodecache(void)
 }
 
 static const struct super_operations sockfs_ops = {
-	.alloc_inode =	sock_alloc_inode,
-	.destroy_inode =sock_destroy_inode,
-	.statfs =	simple_statfs,
+	.alloc_inode	= sock_alloc_inode,
+	.destroy_inode	= sock_destroy_inode,
+	.statfs		= simple_statfs,
 };
 
 static int sockfs_get_sb(struct file_system_type *fs_type,
@@ -411,6 +411,7 @@ int sock_map_fd(struct socket *sock, int flags)
 
 	return fd;
 }
+EXPORT_SYMBOL(sock_map_fd);
 
 static struct socket *sock_from_file(struct file *file, int *err)
 {
@@ -422,7 +423,7 @@ static struct socket *sock_from_file(struct file *file, int *err)
 }
 
 /**
- *	sockfd_lookup	- 	Go from a file number to its socket slot
+ *	sockfd_lookup - Go from a file number to its socket slot
  *	@fd: file handle
  *	@err: pointer to an error code return
  *
@@ -450,6 +451,7 @@ struct socket *sockfd_lookup(int fd, int *err)
 		fput(file);
 	return sock;
 }
+EXPORT_SYMBOL(sockfd_lookup);
 
 static struct socket *sockfd_lookup_light(int fd, int *err, int *fput_needed)
 {
@@ -540,6 +542,7 @@ void sock_release(struct socket *sock)
 	}
 	sock->file = NULL;
 }
+EXPORT_SYMBOL(sock_release);
 
 int sock_tx_timestamp(struct msghdr *msg, struct sock *sk,
 		      union skb_shared_tx *shtx)
@@ -586,6 +589,7 @@ int sock_sendmsg(struct socket *sock, struct msghdr *msg, size_t size)
 		ret = wait_on_sync_kiocb(&iocb);
 	return ret;
 }
+EXPORT_SYMBOL(sock_sendmsg);
 
 int kernel_sendmsg(struct socket *sock, struct msghdr *msg,
 		   struct kvec *vec, size_t num, size_t size)
@@ -604,6 +608,7 @@ int kernel_sendmsg(struct socket *sock, struct msghdr *msg,
 	set_fs(oldfs);
 	return result;
 }
+EXPORT_SYMBOL(kernel_sendmsg);
 
 static int ktime2ts(ktime_t kt, struct timespec *ts)
 {
@@ -664,7 +669,6 @@ void __sock_recv_timestamp(struct msghdr *msg, struct sock *sk,
 		put_cmsg(msg, SOL_SOCKET,
 			 SCM_TIMESTAMPING, sizeof(ts), &ts);
 }
-
 EXPORT_SYMBOL_GPL(__sock_recv_timestamp);
 
 inline void sock_recv_drops(struct msghdr *msg, struct sock *sk, struct sk_buff *skb)
@@ -720,6 +724,7 @@ int sock_recvmsg(struct socket *sock, struct msghdr *msg,
 		ret = wait_on_sync_kiocb(&iocb);
 	return ret;
 }
+EXPORT_SYMBOL(sock_recvmsg);
 
 static int sock_recvmsg_nosec(struct socket *sock, struct msghdr *msg,
 			      size_t size, int flags)
@@ -752,6 +757,7 @@ int kernel_recvmsg(struct socket *sock, struct msghdr *msg,
 	set_fs(oldfs);
 	return result;
 }
+EXPORT_SYMBOL(kernel_recvmsg);
 
 static void sock_aio_dtor(struct kiocb *iocb)
 {
@@ -774,7 +780,7 @@ static ssize_t sock_sendpage(struct file *file, struct page *page,
 }
 
 static ssize_t sock_splice_read(struct file *file, loff_t *ppos,
-			        struct pipe_inode_info *pipe, size_t len,
+				struct pipe_inode_info *pipe, size_t len,
 				unsigned int flags)
 {
 	struct socket *sock = file->private_data;
@@ -887,7 +893,7 @@ static ssize_t sock_aio_write(struct kiocb *iocb, const struct iovec *iov,
  */
 
 static DEFINE_MUTEX(br_ioctl_mutex);
-static int (*br_ioctl_hook) (struct net *, unsigned int cmd, void __user *arg) = NULL;
+static int (*br_ioctl_hook) (struct net *, unsigned int cmd, void __user *arg);
 
 void brioctl_set(int (*hook) (struct net *, unsigned int, void __user *))
 {
@@ -895,7 +901,6 @@ void brioctl_set(int (*hook) (struct net *, unsigned int, void __user *))
 	br_ioctl_hook = hook;
 	mutex_unlock(&br_ioctl_mutex);
 }
-
 EXPORT_SYMBOL(brioctl_set);
 
 static DEFINE_MUTEX(vlan_ioctl_mutex);
@@ -907,7 +912,6 @@ void vlan_ioctl_set(int (*hook) (struct net *, void __user *))
 	vlan_ioctl_hook = hook;
 	mutex_unlock(&vlan_ioctl_mutex);
 }
-
 EXPORT_SYMBOL(vlan_ioctl_set);
 
 static DEFINE_MUTEX(dlci_ioctl_mutex);
@@ -919,7 +923,6 @@ void dlci_ioctl_set(int (*hook) (unsigned int, void __user *))
 	dlci_ioctl_hook = hook;
 	mutex_unlock(&dlci_ioctl_mutex);
 }
-
 EXPORT_SYMBOL(dlci_ioctl_set);
 
 static long sock_do_ioctl(struct net *net, struct socket *sock,
@@ -1047,6 +1050,7 @@ out_release:
 	sock = NULL;
 	goto out;
 }
+EXPORT_SYMBOL(sock_create_lite);
 
 /* No kernel lock held - perfect */
 static unsigned int sock_poll(struct file *file, poll_table *wait)
@@ -1147,6 +1151,7 @@ call_kill:
 	rcu_read_unlock();
 	return 0;
 }
+EXPORT_SYMBOL(sock_wake_async);
 
 static int __sock_create(struct net *net, int family, int type, int protocol,
 			 struct socket **res, int kern)
@@ -1265,11 +1270,13 @@ int sock_create(int family, int type, int protocol, struct socket **res)
 {
 	return __sock_create(current->nsproxy->net_ns, family, type, protocol, res, 0);
 }
+EXPORT_SYMBOL(sock_create);
 
 int sock_create_kern(int family, int type, int protocol, struct socket **res)
 {
 	return __sock_create(&init_net, family, type, protocol, res, 1);
 }
+EXPORT_SYMBOL(sock_create_kern);
 
 SYSCALL_DEFINE3(socket, int, family, int, type, int, protocol)
 {
@@ -1474,7 +1481,8 @@ SYSCALL_DEFINE4(accept4, int, fd, struct sockaddr __user *, upeer_sockaddr,
 		goto out;
 
 	err = -ENFILE;
-	if (!(newsock = sock_alloc()))
+	newsock = sock_alloc();
+	if (!newsock)
 		goto out_put;
 
 	newsock->type = sock->type;
@@ -1861,8 +1869,7 @@ SYSCALL_DEFINE3(sendmsg, int, fd, struct msghdr __user *, msg, unsigned, flags)
 	if (MSG_CMSG_COMPAT & flags) {
 		if (get_compat_msghdr(&msg_sys, msg_compat))
 			return -EFAULT;
-	}
-	else if (copy_from_user(&msg_sys, msg, sizeof(struct msghdr)))
+	} else if (copy_from_user(&msg_sys, msg, sizeof(struct msghdr)))
 		return -EFAULT;
 
 	sock = sockfd_lookup_light(fd, &err, &fput_needed);
@@ -1964,8 +1971,7 @@ static int __sys_recvmsg(struct socket *sock, struct msghdr __user *msg,
 	if (MSG_CMSG_COMPAT & flags) {
 		if (get_compat_msghdr(msg_sys, msg_compat))
 			return -EFAULT;
-	}
-	else if (copy_from_user(msg_sys, msg, sizeof(struct msghdr)))
+	} else if (copy_from_user(msg_sys, msg, sizeof(struct msghdr)))
 		return -EFAULT;
 
 	err = -EMSGSIZE;
@@ -2191,10 +2197,10 @@ SYSCALL_DEFINE5(recvmmsg, int, fd, struct mmsghdr __user *, mmsg,
 /* Argument list sizes for sys_socketcall */
 #define AL(x) ((x) * sizeof(unsigned long))
 static const unsigned char nargs[20] = {
-	AL(0),AL(3),AL(3),AL(3),AL(2),AL(3),
-	AL(3),AL(3),AL(4),AL(4),AL(4),AL(6),
-	AL(6),AL(2),AL(5),AL(5),AL(3),AL(3),
-	AL(4),AL(5)
+	AL(0), AL(3), AL(3), AL(3), AL(2), AL(3),
+	AL(3), AL(3), AL(4), AL(4), AL(4), AL(6),
+	AL(6), AL(2), AL(5), AL(5), AL(3), AL(3),
+	AL(4), AL(5)
 };
 
 #undef AL
@@ -2340,6 +2346,7 @@ int sock_register(const struct net_proto_family *ops)
 	printk(KERN_INFO "NET: Registered protocol family %d\n", ops->family);
 	return err;
 }
+EXPORT_SYMBOL(sock_register);
 
 /**
  *	sock_unregister - remove a protocol handler
@@ -2366,6 +2373,7 @@ void sock_unregister(int family)
 
 	printk(KERN_INFO "NET: Unregistered protocol family %d\n", family);
 }
+EXPORT_SYMBOL(sock_unregister);
 
 static int __init sock_init(void)
 {
@@ -2490,13 +2498,13 @@ static int dev_ifconf(struct net *net, struct compat_ifconf __user *uifc32)
 		ifc.ifc_req = NULL;
 		uifc = compat_alloc_user_space(sizeof(struct ifconf));
 	} else {
-		size_t len =((ifc32.ifc_len / sizeof (struct compat_ifreq)) + 1) *
-			sizeof (struct ifreq);
+		size_t len = ((ifc32.ifc_len / sizeof(struct compat_ifreq)) + 1) *
+			sizeof(struct ifreq);
 		uifc = compat_alloc_user_space(sizeof(struct ifconf) + len);
 		ifc.ifc_len = len;
 		ifr = ifc.ifc_req = (void __user *)(uifc + 1);
 		ifr32 = compat_ptr(ifc32.ifcbuf);
-		for (i = 0; i < ifc32.ifc_len; i += sizeof (struct compat_ifreq)) {
+		for (i = 0; i < ifc32.ifc_len; i += sizeof(struct compat_ifreq)) {
 			if (copy_in_user(ifr, ifr32, sizeof(struct compat_ifreq)))
 				return -EFAULT;
 			ifr++;
@@ -2516,9 +2524,9 @@ static int dev_ifconf(struct net *net, struct compat_ifconf __user *uifc32)
 	ifr = ifc.ifc_req;
 	ifr32 = compat_ptr(ifc32.ifcbuf);
 	for (i = 0, j = 0;
-             i + sizeof (struct compat_ifreq) <= ifc32.ifc_len && j < ifc.ifc_len;
-	     i += sizeof (struct compat_ifreq), j += sizeof (struct ifreq)) {
-		if (copy_in_user(ifr32, ifr, sizeof (struct compat_ifreq)))
+	     i + sizeof(struct compat_ifreq) <= ifc32.ifc_len && j < ifc.ifc_len;
+	     i += sizeof(struct compat_ifreq), j += sizeof(struct ifreq)) {
+		if (copy_in_user(ifr32, ifr, sizeof(struct compat_ifreq)))
 			return -EFAULT;
 		ifr32++;
 		ifr++;
@@ -2567,7 +2575,7 @@ static int compat_siocwandev(struct net *net, struct compat_ifreq __user *uifr32
 	compat_uptr_t uptr32;
 	struct ifreq __user *uifr;
 
-	uifr = compat_alloc_user_space(sizeof (*uifr));
+	uifr = compat_alloc_user_space(sizeof(*uifr));
 	if (copy_in_user(uifr, uifr32, sizeof(struct compat_ifreq)))
 		return -EFAULT;
 
@@ -2601,9 +2609,9 @@ static int bond_ioctl(struct net *net, unsigned int cmd,
 			return -EFAULT;
 
 		old_fs = get_fs();
-		set_fs (KERNEL_DS);
+		set_fs(KERNEL_DS);
 		err = dev_ioctl(net, cmd, &kifr);
-		set_fs (old_fs);
+		set_fs(old_fs);
 
 		return err;
 	case SIOCBONDSLAVEINFOQUERY:
@@ -2710,9 +2718,9 @@ static int compat_sioc_ifmap(struct net *net, unsigned int cmd,
 		return -EFAULT;
 
 	old_fs = get_fs();
-	set_fs (KERNEL_DS);
+	set_fs(KERNEL_DS);
 	err = dev_ioctl(net, cmd, (void __user *)&ifr);
-	set_fs (old_fs);
+	set_fs(old_fs);
 
 	if (cmd == SIOCGIFMAP && !err) {
 		err = copy_to_user(uifr32, &ifr, sizeof(ifr.ifr_name));
@@ -2734,7 +2742,7 @@ static int compat_siocshwtstamp(struct net *net, struct compat_ifreq __user *uif
 	compat_uptr_t uptr32;
 	struct ifreq __user *uifr;
 
-	uifr = compat_alloc_user_space(sizeof (*uifr));
+	uifr = compat_alloc_user_space(sizeof(*uifr));
 	if (copy_in_user(uifr, uifr32, sizeof(struct compat_ifreq)))
 		return -EFAULT;
 
@@ -2750,20 +2758,20 @@ static int compat_siocshwtstamp(struct net *net, struct compat_ifreq __user *uif
 }
 
 struct rtentry32 {
-	u32   		rt_pad1;
+	u32		rt_pad1;
 	struct sockaddr rt_dst;         /* target address               */
 	struct sockaddr rt_gateway;     /* gateway addr (RTF_GATEWAY)   */
 	struct sockaddr rt_genmask;     /* target network mask (IP)     */
-	unsigned short  rt_flags;
-	short           rt_pad2;
-	u32   		rt_pad3;
-	unsigned char   rt_tos;
-	unsigned char   rt_class;
-	short           rt_pad4;
-	short           rt_metric;      /* +1 for binary compatibility! */
+	unsigned short	rt_flags;
+	short		rt_pad2;
+	u32		rt_pad3;
+	unsigned char	rt_tos;
+	unsigned char	rt_class;
+	short		rt_pad4;
+	short		rt_metric;      /* +1 for binary compatibility! */
 	/* char * */ u32 rt_dev;        /* forcing the device at add    */
-	u32   		rt_mtu;         /* per route MTU/Window         */
-	u32   		rt_window;      /* Window clamping              */
+	u32		rt_mtu;         /* per route MTU/Window         */
+	u32		rt_window;      /* Window clamping              */
 	unsigned short  rt_irtt;        /* Initial RTT                  */
 };
 
@@ -2793,29 +2801,29 @@ static int routing_ioctl(struct net *net, struct socket *sock,
 
 	if (sock && sock->sk && sock->sk->sk_family == AF_INET6) { /* ipv6 */
 		struct in6_rtmsg32 __user *ur6 = argp;
-		ret = copy_from_user (&r6.rtmsg_dst, &(ur6->rtmsg_dst),
+		ret = copy_from_user(&r6.rtmsg_dst, &(ur6->rtmsg_dst),
 			3 * sizeof(struct in6_addr));
-		ret |= __get_user (r6.rtmsg_type, &(ur6->rtmsg_type));
-		ret |= __get_user (r6.rtmsg_dst_len, &(ur6->rtmsg_dst_len));
-		ret |= __get_user (r6.rtmsg_src_len, &(ur6->rtmsg_src_len));
-		ret |= __get_user (r6.rtmsg_metric, &(ur6->rtmsg_metric));
-		ret |= __get_user (r6.rtmsg_info, &(ur6->rtmsg_info));
-		ret |= __get_user (r6.rtmsg_flags, &(ur6->rtmsg_flags));
-		ret |= __get_user (r6.rtmsg_ifindex, &(ur6->rtmsg_ifindex));
+		ret |= __get_user(r6.rtmsg_type, &(ur6->rtmsg_type));
+		ret |= __get_user(r6.rtmsg_dst_len, &(ur6->rtmsg_dst_len));
+		ret |= __get_user(r6.rtmsg_src_len, &(ur6->rtmsg_src_len));
+		ret |= __get_user(r6.rtmsg_metric, &(ur6->rtmsg_metric));
+		ret |= __get_user(r6.rtmsg_info, &(ur6->rtmsg_info));
+		ret |= __get_user(r6.rtmsg_flags, &(ur6->rtmsg_flags));
+		ret |= __get_user(r6.rtmsg_ifindex, &(ur6->rtmsg_ifindex));
 
 		r = (void *) &r6;
 	} else { /* ipv4 */
 		struct rtentry32 __user *ur4 = argp;
-		ret = copy_from_user (&r4.rt_dst, &(ur4->rt_dst),
+		ret = copy_from_user(&r4.rt_dst, &(ur4->rt_dst),
 					3 * sizeof(struct sockaddr));
-		ret |= __get_user (r4.rt_flags, &(ur4->rt_flags));
-		ret |= __get_user (r4.rt_metric, &(ur4->rt_metric));
-		ret |= __get_user (r4.rt_mtu, &(ur4->rt_mtu));
-		ret |= __get_user (r4.rt_window, &(ur4->rt_window));
-		ret |= __get_user (r4.rt_irtt, &(ur4->rt_irtt));
-		ret |= __get_user (rtdev, &(ur4->rt_dev));
+		ret |= __get_user(r4.rt_flags, &(ur4->rt_flags));
+		ret |= __get_user(r4.rt_metric, &(ur4->rt_metric));
+		ret |= __get_user(r4.rt_mtu, &(ur4->rt_mtu));
+		ret |= __get_user(r4.rt_window, &(ur4->rt_window));
+		ret |= __get_user(r4.rt_irtt, &(ur4->rt_irtt));
+		ret |= __get_user(rtdev, &(ur4->rt_dev));
 		if (rtdev) {
-			ret |= copy_from_user (devname, compat_ptr(rtdev), 15);
+			ret |= copy_from_user(devname, compat_ptr(rtdev), 15);
 			r4.rt_dev = devname; devname[15] = 0;
 		} else
 			r4.rt_dev = NULL;
@@ -2828,9 +2836,9 @@ static int routing_ioctl(struct net *net, struct socket *sock,
 		goto out;
 	}
 
-	set_fs (KERNEL_DS);
+	set_fs(KERNEL_DS);
 	ret = sock_do_ioctl(net, sock, cmd, (unsigned long) r);
-	set_fs (old_fs);
+	set_fs(old_fs);
 
 out:
 	return ret;
@@ -2993,11 +3001,13 @@ int kernel_bind(struct socket *sock, struct sockaddr *addr, int addrlen)
 {
 	return sock->ops->bind(sock, addr, addrlen);
 }
+EXPORT_SYMBOL(kernel_bind);
 
 int kernel_listen(struct socket *sock, int backlog)
 {
 	return sock->ops->listen(sock, backlog);
 }
+EXPORT_SYMBOL(kernel_listen);
 
 int kernel_accept(struct socket *sock, struct socket **newsock, int flags)
 {
@@ -3022,24 +3032,28 @@ int kernel_accept(struct socket *sock, struct socket **newsock, int flags)
 done:
 	return err;
 }
+EXPORT_SYMBOL(kernel_accept);
 
 int kernel_connect(struct socket *sock, struct sockaddr *addr, int addrlen,
 		   int flags)
 {
 	return sock->ops->connect(sock, addr, addrlen, flags);
 }
+EXPORT_SYMBOL(kernel_connect);
 
 int kernel_getsockname(struct socket *sock, struct sockaddr *addr,
 			 int *addrlen)
 {
 	return sock->ops->getname(sock, addr, addrlen, 0);
 }
+EXPORT_SYMBOL(kernel_getsockname);
 
 int kernel_getpeername(struct socket *sock, struct sockaddr *addr,
 			 int *addrlen)
 {
 	return sock->ops->getname(sock, addr, addrlen, 1);
 }
+EXPORT_SYMBOL(kernel_getpeername);
 
 int kernel_getsockopt(struct socket *sock, int level, int optname,
 			char *optval, int *optlen)
@@ -3056,6 +3070,7 @@ int kernel_getsockopt(struct socket *sock, int level, int optname,
 	set_fs(oldfs);
 	return err;
 }
+EXPORT_SYMBOL(kernel_getsockopt);
 
 int kernel_setsockopt(struct socket *sock, int level, int optname,
 			char *optval, unsigned int optlen)
@@ -3072,6 +3087,7 @@ int kernel_setsockopt(struct socket *sock, int level, int optname,
 	set_fs(oldfs);
 	return err;
 }
+EXPORT_SYMBOL(kernel_setsockopt);
 
 int kernel_sendpage(struct socket *sock, struct page *page, int offset,
 		    size_t size, int flags)
@@ -3083,6 +3099,7 @@ int kernel_sendpage(struct socket *sock, struct page *page, int offset,
 
 	return sock_no_sendpage(sock, page, offset, size, flags);
 }
+EXPORT_SYMBOL(kernel_sendpage);
 
 int kernel_sock_ioctl(struct socket *sock, int cmd, unsigned long arg)
 {
@@ -3095,33 +3112,10 @@ int kernel_sock_ioctl(struct socket *sock, int cmd, unsigned long arg)
 
 	return err;
 }
+EXPORT_SYMBOL(kernel_sock_ioctl);
 
 int kernel_sock_shutdown(struct socket *sock, enum sock_shutdown_cmd how)
 {
 	return sock->ops->shutdown(sock, how);
 }
-
-EXPORT_SYMBOL(sock_create);
-EXPORT_SYMBOL(sock_create_kern);
-EXPORT_SYMBOL(sock_create_lite);
-EXPORT_SYMBOL(sock_map_fd);
-EXPORT_SYMBOL(sock_recvmsg);
-EXPORT_SYMBOL(sock_register);
-EXPORT_SYMBOL(sock_release);
-EXPORT_SYMBOL(sock_sendmsg);
-EXPORT_SYMBOL(sock_unregister);
-EXPORT_SYMBOL(sock_wake_async);
-EXPORT_SYMBOL(sockfd_lookup);
-EXPORT_SYMBOL(kernel_sendmsg);
-EXPORT_SYMBOL(kernel_recvmsg);
-EXPORT_SYMBOL(kernel_bind);
-EXPORT_SYMBOL(kernel_listen);
-EXPORT_SYMBOL(kernel_accept);
-EXPORT_SYMBOL(kernel_connect);
-EXPORT_SYMBOL(kernel_getsockname);
-EXPORT_SYMBOL(kernel_getpeername);
-EXPORT_SYMBOL(kernel_getsockopt);
-EXPORT_SYMBOL(kernel_setsockopt);
-EXPORT_SYMBOL(kernel_sendpage);
-EXPORT_SYMBOL(kernel_sock_ioctl);
 EXPORT_SYMBOL(kernel_sock_shutdown);
-- 
cgit v1.2.2


From e12f8e29a8526172b7715881503bae636d60bdd8 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Fri, 4 Jun 2010 13:31:29 +0200
Subject: netfilter: vmalloc_node cleanup

Using vmalloc_node(size, numa_node_id()) for temporary storage is not
needed. vmalloc(size) is more respectful of user NUMA policy.

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 net/ipv4/netfilter/arp_tables.c | 7 +++----
 net/ipv4/netfilter/ip_tables.c  | 4 ++--
 net/ipv6/netfilter/ip6_tables.c | 7 +++----
 3 files changed, 8 insertions(+), 10 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c
index 1ac01b128621..16c0ba0a2728 100644
--- a/net/ipv4/netfilter/arp_tables.c
+++ b/net/ipv4/netfilter/arp_tables.c
@@ -758,7 +758,7 @@ static struct xt_counters *alloc_counters(const struct xt_table *table)
 	 * about).
 	 */
 	countersize = sizeof(struct xt_counters) * private->number;
-	counters = vmalloc_node(countersize, numa_node_id());
+	counters = vmalloc(countersize);
 
 	if (counters == NULL)
 		return ERR_PTR(-ENOMEM);
@@ -1005,8 +1005,7 @@ static int __do_replace(struct net *net, const char *name,
 	struct arpt_entry *iter;
 
 	ret = 0;
-	counters = vmalloc_node(num_counters * sizeof(struct xt_counters),
-				numa_node_id());
+	counters = vmalloc(num_counters * sizeof(struct xt_counters));
 	if (!counters) {
 		ret = -ENOMEM;
 		goto out;
@@ -1159,7 +1158,7 @@ static int do_add_counters(struct net *net, const void __user *user,
 	if (len != size + num_counters * sizeof(struct xt_counters))
 		return -EINVAL;
 
-	paddc = vmalloc_node(len - size, numa_node_id());
+	paddc = vmalloc(len - size);
 	if (!paddc)
 		return -ENOMEM;
 
diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c
index 63958f3394a5..7c0b8ad61f9d 100644
--- a/net/ipv4/netfilter/ip_tables.c
+++ b/net/ipv4/netfilter/ip_tables.c
@@ -928,7 +928,7 @@ static struct xt_counters *alloc_counters(const struct xt_table *table)
 	   (other than comefrom, which userspace doesn't care
 	   about). */
 	countersize = sizeof(struct xt_counters) * private->number;
-	counters = vmalloc_node(countersize, numa_node_id());
+	counters = vmalloc(countersize);
 
 	if (counters == NULL)
 		return ERR_PTR(-ENOMEM);
@@ -1352,7 +1352,7 @@ do_add_counters(struct net *net, const void __user *user,
 	if (len != size + num_counters * sizeof(struct xt_counters))
 		return -EINVAL;
 
-	paddc = vmalloc_node(len - size, numa_node_id());
+	paddc = vmalloc(len - size);
 	if (!paddc)
 		return -ENOMEM;
 
diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c
index 6f517bd83692..82945ef6c9fc 100644
--- a/net/ipv6/netfilter/ip6_tables.c
+++ b/net/ipv6/netfilter/ip6_tables.c
@@ -943,7 +943,7 @@ static struct xt_counters *alloc_counters(const struct xt_table *table)
 	   (other than comefrom, which userspace doesn't care
 	   about). */
 	countersize = sizeof(struct xt_counters) * private->number;
-	counters = vmalloc_node(countersize, numa_node_id());
+	counters = vmalloc(countersize);
 
 	if (counters == NULL)
 		return ERR_PTR(-ENOMEM);
@@ -1213,8 +1213,7 @@ __do_replace(struct net *net, const char *name, unsigned int valid_hooks,
 	struct ip6t_entry *iter;
 
 	ret = 0;
-	counters = vmalloc_node(num_counters * sizeof(struct xt_counters),
-				numa_node_id());
+	counters = vmalloc(num_counters * sizeof(struct xt_counters));
 	if (!counters) {
 		ret = -ENOMEM;
 		goto out;
@@ -1368,7 +1367,7 @@ do_add_counters(struct net *net, const void __user *user, unsigned int len,
 	if (len != size + num_counters * sizeof(struct xt_counters))
 		return -EINVAL;
 
-	paddc = vmalloc_node(len - size, numa_node_id());
+	paddc = vmalloc(len - size);
 	if (!paddc)
 		return -ENOMEM;
 
-- 
cgit v1.2.2


From 26b36cfefaf2be98b225e3c1a399edb0daf52ddd Mon Sep 17 00:00:00 2001
From: "John W. Linville" <linville@tuxdriver.com>
Date: Fri, 4 Jun 2010 14:25:44 -0400
Subject: mac80211: make ARP filtering depend on CONFIG_INET

Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/main.c | 6 ++++++
 net/mac80211/mlme.c | 2 ++
 2 files changed, 8 insertions(+)

(limited to 'net')

diff --git a/net/mac80211/main.c b/net/mac80211/main.c
index 045ead9507aa..5706156d1ecf 100644
--- a/net/mac80211/main.c
+++ b/net/mac80211/main.c
@@ -329,6 +329,7 @@ static void ieee80211_recalc_smps_work(struct work_struct *work)
 	mutex_unlock(&local->iflist_mtx);
 }
 
+#ifdef CONFIG_INET
 int ieee80211_set_arp_filter(struct ieee80211_sub_if_data *sdata)
 {
 	struct in_device *idev;
@@ -380,6 +381,7 @@ static int ieee80211_ifa_changed(struct notifier_block *nb,
 
 	return NOTIFY_DONE;
 }
+#endif
 
 struct ieee80211_hw *ieee80211_alloc_hw(size_t priv_data_len,
 					const struct ieee80211_ops *ops)
@@ -669,10 +671,12 @@ int ieee80211_register_hw(struct ieee80211_hw *hw)
 		goto fail_pm_qos;
 	}
 
+#ifdef CONFIG_INET
 	local->ifa_notifier.notifier_call = ieee80211_ifa_changed;
 	result = register_inetaddr_notifier(&local->ifa_notifier);
 	if (result)
 		goto fail_ifa;
+#endif
 
 	return 0;
 
@@ -707,7 +711,9 @@ void ieee80211_unregister_hw(struct ieee80211_hw *hw)
 
 	pm_qos_remove_notifier(PM_QOS_NETWORK_LATENCY,
 			       &local->network_latency_notifier);
+#ifdef CONFIG_INET
 	unregister_inetaddr_notifier(&local->ifa_notifier);
+#endif
 
 	rtnl_lock();
 
diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index 7e720133358c..3623bb7c4bf4 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -2078,6 +2078,7 @@ static enum work_done_result ieee80211_assoc_done(struct ieee80211_work *wk,
 			cfg80211_send_assoc_timeout(wk->sdata->dev,
 						    wk->filter_ta);
 			return WORK_DONE_DESTROY;
+#ifdef CONFIG_INET
 		} else {
 			mutex_unlock(&wk->sdata->u.mgd.mtx);
 
@@ -2088,6 +2089,7 @@ static enum work_done_result ieee80211_assoc_done(struct ieee80211_work *wk,
 			rtnl_lock();
 			ieee80211_set_arp_filter(wk->sdata);
 			rtnl_unlock();
+#endif
 		}
 	}
 
-- 
cgit v1.2.2


From 38a6cc7538d3c44b76f9dcea607a171adcc0208e Mon Sep 17 00:00:00 2001
From: Sujith <Sujith.Manoharan@atheros.com>
Date: Wed, 19 May 2010 11:32:30 +0530
Subject: mac80211: Remove deprecated sta_notify commands

STA_NOTIFY_ADD and STA_NOTIFY_REMOVE have no users anymore,
and station addition/removal are indicated to drivers
using sta_add() and sta_remove(), which can sleep.

Signed-off-by: Sujith <Sujith.Manoharan@atheros.com>
Acked-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/driver-ops.h | 6 ------
 1 file changed, 6 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/driver-ops.h b/net/mac80211/driver-ops.h
index 978850ee3a5f..d1139e4f88a9 100644
--- a/net/mac80211/driver-ops.h
+++ b/net/mac80211/driver-ops.h
@@ -269,9 +269,6 @@ static inline int drv_sta_add(struct ieee80211_local *local,
 
 	if (local->ops->sta_add)
 		ret = local->ops->sta_add(&local->hw, &sdata->vif, sta);
-	else if (local->ops->sta_notify)
-		local->ops->sta_notify(&local->hw, &sdata->vif,
-					STA_NOTIFY_ADD, sta);
 
 	trace_drv_sta_add(local, sdata, sta, ret);
 
@@ -286,9 +283,6 @@ static inline void drv_sta_remove(struct ieee80211_local *local,
 
 	if (local->ops->sta_remove)
 		local->ops->sta_remove(&local->hw, &sdata->vif, sta);
-	else if (local->ops->sta_notify)
-		local->ops->sta_notify(&local->hw, &sdata->vif,
-					STA_NOTIFY_REMOVE, sta);
 
 	trace_drv_sta_remove(local, sdata, sta);
 }
-- 
cgit v1.2.2


From 8b9a4e6e442756f670ef507f09bbc6c11dc0fca6 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Fri, 28 May 2010 15:22:58 +0200
Subject: mac80211: process station blockack action frames from work

Processing an association response could take a bit
of time while we set up the hardware etc. During that
time, the AP might already send a blockack request.
If this happens very quickly on a fairly slow machine,
we can end up processing the blockack request before
the association processing has finished. Since the
blockack processing cannot sleep right now, we also
cannot make it wait in the driver.

As a result, sometimes on slow machines the iwlagn
driver gets totally confused, and no traffic can pass
when the aggregation setup was done before the assoc
setup completed.

I'm working on a proper fix for this, which involves
queuing all blockack category action frames from a
work struct, and also allowing the ampdu_action driver
callback to sleep, which will generally clean up the
code and make things easier.

However, this is a very involved and complex change.
To fix the problem at hand in a way that can also be
backported to stable, I've come up with this patch.
Here, I simply process all aggregation action frames
from the managed interface skb queue, which means
their processing will be serialized with processing
the association response, thereby fixing the problem.

Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Cc: stable@kernel.org
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/mlme.c | 52 +++++++++++++++++++++++++++++++++++++++++++++-------
 net/mac80211/rx.c   |  3 +++
 2 files changed, 48 insertions(+), 7 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index 0839c4e8fd2e..3310e70aa52f 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -1692,14 +1692,52 @@ static void ieee80211_sta_rx_queued_mgmt(struct ieee80211_sub_if_data *sdata,
 			rma = ieee80211_rx_mgmt_disassoc(sdata, mgmt, skb->len);
 			break;
 		case IEEE80211_STYPE_ACTION:
-			if (mgmt->u.action.category != WLAN_CATEGORY_SPECTRUM_MGMT)
+			switch (mgmt->u.action.category) {
+			case WLAN_CATEGORY_BACK: {
+				struct ieee80211_local *local = sdata->local;
+				int len = skb->len;
+				struct sta_info *sta;
+
+				rcu_read_lock();
+				sta = sta_info_get(sdata, mgmt->sa);
+				if (!sta) {
+					rcu_read_unlock();
+					break;
+				}
+
+				local_bh_disable();
+
+				switch (mgmt->u.action.u.addba_req.action_code) {
+				case WLAN_ACTION_ADDBA_REQ:
+					if (len < (IEEE80211_MIN_ACTION_SIZE +
+						   sizeof(mgmt->u.action.u.addba_req)))
+						break;
+					ieee80211_process_addba_request(local, sta, mgmt, len);
+					break;
+				case WLAN_ACTION_ADDBA_RESP:
+					if (len < (IEEE80211_MIN_ACTION_SIZE +
+						   sizeof(mgmt->u.action.u.addba_resp)))
+						break;
+					ieee80211_process_addba_resp(local, sta, mgmt, len);
+					break;
+				case WLAN_ACTION_DELBA:
+					if (len < (IEEE80211_MIN_ACTION_SIZE +
+						   sizeof(mgmt->u.action.u.delba)))
+						break;
+					ieee80211_process_delba(sdata, sta, mgmt, len);
+					break;
+				}
+				local_bh_enable();
+				rcu_read_unlock();
 				break;
-
-			ieee80211_sta_process_chanswitch(sdata,
-					&mgmt->u.action.u.chan_switch.sw_elem,
-					(void *)ifmgd->associated->priv,
-					rx_status->mactime);
-			break;
+				}
+			case WLAN_CATEGORY_SPECTRUM_MGMT:
+				ieee80211_sta_process_chanswitch(sdata,
+						&mgmt->u.action.u.chan_switch.sw_elem,
+						(void *)ifmgd->associated->priv,
+						rx_status->mactime);
+				break;
+			}
 		}
 		mutex_unlock(&ifmgd->mtx);
 
diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c
index 5e0b65406c44..be9abc2e6348 100644
--- a/net/mac80211/rx.c
+++ b/net/mac80211/rx.c
@@ -1944,6 +1944,9 @@ ieee80211_rx_h_action(struct ieee80211_rx_data *rx)
 		if (len < IEEE80211_MIN_ACTION_SIZE + 1)
 			break;
 
+		if (sdata->vif.type == NL80211_IFTYPE_STATION)
+			return ieee80211_sta_rx_mgmt(sdata, rx->skb);
+
 		switch (mgmt->u.action.u.addba_req.action_code) {
 		case WLAN_ACTION_ADDBA_REQ:
 			if (len < (IEEE80211_MIN_ACTION_SIZE +
-- 
cgit v1.2.2


From 8764ab2ca7ab5055e1ca80f9cfa4970c34acb804 Mon Sep 17 00:00:00 2001
From: Steffen Klassert <steffen.klassert@secunet.com>
Date: Fri, 4 Jun 2010 01:57:38 +0000
Subject: net: check for refcount if pop a stacked dst_entry

xfrm triggers a warning if dst_pop() drops a refcount
on a noref dst. This patch changes dst_pop() to
skb_dst_pop(). skb_dst_pop() drops the refcnt only
on a refcounted dst. Also we don't clone the child
dst_entry, so it is not refcounted and we can use
skb_dst_set_noref() in xfrm_output_one().

Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/xfrm/xfrm_output.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/xfrm/xfrm_output.c b/net/xfrm/xfrm_output.c
index 6a329158bdfa..a3cca0a94346 100644
--- a/net/xfrm/xfrm_output.c
+++ b/net/xfrm/xfrm_output.c
@@ -95,13 +95,13 @@ resume:
 			goto error_nolock;
 		}
 
-		dst = dst_pop(dst);
+		dst = skb_dst_pop(skb);
 		if (!dst) {
 			XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTERROR);
 			err = -EHOSTUNREACH;
 			goto error_nolock;
 		}
-		skb_dst_set(skb, dst);
+		skb_dst_set_noref(skb, dst);
 		x = dst->xfrm;
 	} while (x && !(x->outer_mode->flags & XFRM_MODE_FLAG_TUNNEL));
 
-- 
cgit v1.2.2


From 57f1553ee5d9f093660cc49098f494e17ed11668 Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Thu, 3 Jun 2010 00:42:30 +0000
Subject: syncookies: remove Kconfig text line about disabled-by-default

syncookies default to on since
e994b7c901ded7200b525a707c6da71f2cf6d4bb
(tcp: Don't make syn cookies initial setting depend on CONFIG_SYSCTL).

Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/Kconfig | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig
index 8e3a1fd938ab..7c3a7d191249 100644
--- a/net/ipv4/Kconfig
+++ b/net/ipv4/Kconfig
@@ -303,7 +303,7 @@ config ARPD
 	  If unsure, say N.
 
 config SYN_COOKIES
-	bool "IP: TCP syncookie support (disabled per default)"
+	bool "IP: TCP syncookie support"
 	---help---
 	  Normal TCP/IP networking is open to an attack known as "SYN
 	  flooding". This denial-of-service attack prevents legitimate remote
@@ -328,13 +328,13 @@ config SYN_COOKIES
 	  server is really overloaded. If this happens frequently better turn
 	  them off.
 
-	  If you say Y here, note that SYN cookies aren't enabled by default;
-	  you can enable them by saying Y to "/proc file system support" and
+	  If you say Y here, you can disable SYN cookies at run time by
+	  saying Y to "/proc file system support" and
 	  "Sysctl support" below and executing the command
 
-	  echo 1 >/proc/sys/net/ipv4/tcp_syncookies
+	  echo 0 > /proc/sys/net/ipv4/tcp_syncookies
 
-	  at boot time after the /proc file system has been mounted.
+	  after the /proc file system has been mounted.
 
 	  If unsure, say N.
 
-- 
cgit v1.2.2


From ca55158c6ecb7832a6ad80ac44a14d23bab8cdfc Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Thu, 3 Jun 2010 09:03:58 +0000
Subject: rps: tcp: fix rps_sock_flow_table table updates

I believe a moderate SYN flood attack can corrupt RFS flow table
(rps_sock_flow_table), making RPS/RFS much less effective.

Even in a normal situation, server handling short lived sessions suffer
from bad steering for the first data packet of a session, if another SYN
packet is received for another session.

We do following action in tcp_v4_rcv() :

	sock_rps_save_rxhash(sk, skb->rxhash);

We should _not_ do this if sk is a LISTEN socket, as about each
packet received on a LISTEN socket has a different rxhash than
previous one.
 -> RPS_NO_CPU markers are spread all over rps_sock_flow_table.

Also, it makes sense to protect sk->rxhash field changes with socket
lock (We currently can change it even if user thread owns the lock
and might use rxhash)

This patch moves sock_rps_save_rxhash() to a sock locked section,
and only for non LISTEN sockets.

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp_ipv4.c | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 202cf09c4cd4..fe193e53af44 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -1555,6 +1555,7 @@ int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb)
 #endif
 
 	if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */
+		sock_rps_save_rxhash(sk, skb->rxhash);
 		TCP_CHECK_TIMER(sk);
 		if (tcp_rcv_established(sk, skb, tcp_hdr(skb), skb->len)) {
 			rsk = sk;
@@ -1579,7 +1580,9 @@ int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb)
 			}
 			return 0;
 		}
-	}
+	} else
+		sock_rps_save_rxhash(sk, skb->rxhash);
+
 
 	TCP_CHECK_TIMER(sk);
 	if (tcp_rcv_state_process(sk, skb, tcp_hdr(skb), skb->len)) {
@@ -1672,8 +1675,6 @@ process:
 
 	skb->dev = NULL;
 
-	sock_rps_save_rxhash(sk, skb->rxhash);
-
 	bh_lock_sock_nested(sk);
 	ret = 0;
 	if (!sock_owned_by_user(sk)) {
-- 
cgit v1.2.2


From c44649216522cd607a4027d2ebf4a8147d3fa94c Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Thu, 3 Jun 2010 05:45:47 +0000
Subject: tcp: use correct net ns in cookie_v4_check()

Its better to make a route lookup in appropriate namespace.

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/syncookies.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c
index 5c24db4a3c91..9f6b22206c52 100644
--- a/net/ipv4/syncookies.c
+++ b/net/ipv4/syncookies.c
@@ -347,7 +347,7 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb,
 					       { .sport = th->dest,
 						 .dport = th->source } } };
 		security_req_classify_flow(req, &fl);
-		if (ip_route_output_key(&init_net, &rt, &fl)) {
+		if (ip_route_output_key(sock_net(sk), &rt, &fl)) {
 			reqsk_free(req);
 			goto out;
 		}
-- 
cgit v1.2.2


From 2a1d4bd46047efff513600d7ff422bc344f540a6 Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Thu, 3 Jun 2010 00:43:12 +0000
Subject: syncookies: make v4/v6 synflood warning behaviour the same

both syn_flood_warning functions print a message, but
ipv4 version only prints a warning if CONFIG_SYN_COOKIES=y.

Make the v4 one behave like the v6 one.

Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp_ipv4.c | 24 +++++++++++++-----------
 1 file changed, 13 insertions(+), 11 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 202cf09c4cd4..a13f881e5037 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -793,19 +793,20 @@ static void tcp_v4_reqsk_destructor(struct request_sock *req)
 	kfree(inet_rsk(req)->opt);
 }
 
-#ifdef CONFIG_SYN_COOKIES
-static void syn_flood_warning(struct sk_buff *skb)
+static void syn_flood_warning(const struct sk_buff *skb)
 {
-	static unsigned long warntime;
+	const char *msg;
 
-	if (time_after(jiffies, (warntime + HZ * 60))) {
-		warntime = jiffies;
-		printk(KERN_INFO
-		       "possible SYN flooding on port %d. Sending cookies.\n",
-		       ntohs(tcp_hdr(skb)->dest));
-	}
-}
+#ifdef CONFIG_SYN_COOKIES
+	if (sysctl_tcp_syncookies)
+		msg = "Sending cookies";
+	else
 #endif
+		msg = "Dropping request";
+
+	pr_info("TCP: Possible SYN flooding on port %d. %s.\n",
+				ntohs(tcp_hdr(skb)->dest), msg);
+}
 
 /*
  * Save and compile IPv4 options into the request_sock if needed.
@@ -1243,6 +1244,8 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
 	 * evidently real one.
 	 */
 	if (inet_csk_reqsk_queue_is_full(sk) && !isn) {
+		if (net_ratelimit())
+			syn_flood_warning(skb);
 #ifdef CONFIG_SYN_COOKIES
 		if (sysctl_tcp_syncookies) {
 			want_cookie = 1;
@@ -1328,7 +1331,6 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
 
 	if (want_cookie) {
 #ifdef CONFIG_SYN_COOKIES
-		syn_flood_warning(skb);
 		req->cookie_ts = tmp_opt.tstamp_ok;
 #endif
 		isn = cookie_v4_init_sequence(sk, skb, &req->mss);
-- 
cgit v1.2.2


From af9b4738574b46025de7ccbe75c7b24fd8914379 Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Thu, 3 Jun 2010 00:43:44 +0000
Subject: syncookies: avoid unneeded tcp header flag double check

caller: if (!th->rst && !th->syn && th->ack)
callee: if (!th->ack)

make the caller only check for !syn (common for 3whs), and move
the !rst / ack test to the callee.

Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/syncookies.c | 2 +-
 net/ipv4/tcp_ipv4.c   | 2 +-
 net/ipv6/syncookies.c | 2 +-
 net/ipv6/tcp_ipv6.c   | 2 +-
 4 files changed, 4 insertions(+), 4 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c
index 5c24db4a3c91..c9dac8615958 100644
--- a/net/ipv4/syncookies.c
+++ b/net/ipv4/syncookies.c
@@ -266,7 +266,7 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb,
 	struct rtable *rt;
 	__u8 rcv_wscale;
 
-	if (!sysctl_tcp_syncookies || !th->ack)
+	if (!sysctl_tcp_syncookies || !th->ack || th->rst)
 		goto out;
 
 	if (tcp_synq_no_recent_overflow(sk) ||
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index a13f881e5037..6558dfd899da 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -1506,7 +1506,7 @@ static struct sock *tcp_v4_hnd_req(struct sock *sk, struct sk_buff *skb)
 	}
 
 #ifdef CONFIG_SYN_COOKIES
-	if (!th->rst && !th->syn && th->ack)
+	if (!th->syn)
 		sk = cookie_v4_check(sk, skb, &(IPCB(skb)->opt));
 #endif
 	return sk;
diff --git a/net/ipv6/syncookies.c b/net/ipv6/syncookies.c
index 12383705dbad..9fcb3ec7c838 100644
--- a/net/ipv6/syncookies.c
+++ b/net/ipv6/syncookies.c
@@ -174,7 +174,7 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb)
 	struct dst_entry *dst;
 	__u8 rcv_wscale;
 
-	if (!sysctl_tcp_syncookies || !th->ack)
+	if (!sysctl_tcp_syncookies || !th->ack || th->rst)
 		goto out;
 
 	if (tcp_synq_no_recent_overflow(sk) ||
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index e487080d02db..5887141ad641 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -1157,7 +1157,7 @@ static struct sock *tcp_v6_hnd_req(struct sock *sk,struct sk_buff *skb)
 	}
 
 #ifdef CONFIG_SYN_COOKIES
-	if (!th->rst && !th->syn && th->ack)
+	if (!th->syn)
 		sk = cookie_v6_check(sk, skb);
 #endif
 	return sk;
-- 
cgit v1.2.2


From 5918e2fb9035b35164002c3a268feaf70b7c04d5 Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Thu, 3 Jun 2010 00:43:57 +0000
Subject: syncookies: update mss tables

- ipv6 msstab: account for ipv6 header size
- ipv4 msstab: add mss for Jumbograms.

Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/syncookies.c | 38 +++++++++++++++++++-------------------
 net/ipv6/syncookies.c | 39 +++++++++++++++------------------------
 2 files changed, 34 insertions(+), 43 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c
index c9dac8615958..a7cbcc4b726b 100644
--- a/net/ipv4/syncookies.c
+++ b/net/ipv4/syncookies.c
@@ -138,23 +138,23 @@ static __u32 check_tcp_syn_cookie(__u32 cookie, __be32 saddr, __be32 daddr,
 }
 
 /*
- * This table has to be sorted and terminated with (__u16)-1.
- * XXX generate a better table.
- * Unresolved Issues: HIPPI with a 64k MSS is not well supported.
+ * MSS Values are taken from the 2009 paper
+ * 'Measuring TCP Maximum Segment Size' by S. Alcock and R. Nelson:
+ *  - values 1440 to 1460 accounted for 80% of observed mss values
+ *  - values outside the 536-1460 range are rare (<0.2%).
+ *
+ * Table must be sorted.
  */
 static __u16 const msstab[] = {
-	64 - 1,
-	256 - 1,
-	512 - 1,
-	536 - 1,
-	1024 - 1,
-	1440 - 1,
-	1460 - 1,
-	4312 - 1,
-	(__u16)-1
+	64,
+	512,
+	536,
+	1024,
+	1440,
+	1460,
+	4312,
+	8960,
 };
-/* The number doesn't include the -1 terminator */
-#define NUM_MSS (ARRAY_SIZE(msstab) - 1)
 
 /*
  * Generate a syncookie.  mssp points to the mss, which is returned
@@ -169,10 +169,10 @@ __u32 cookie_v4_init_sequence(struct sock *sk, struct sk_buff *skb, __u16 *mssp)
 
 	tcp_synq_overflow(sk);
 
-	/* XXX sort msstab[] by probability?  Binary search? */
-	for (mssind = 0; mss > msstab[mssind + 1]; mssind++)
-		;
-	*mssp = msstab[mssind] + 1;
+	for (mssind = ARRAY_SIZE(msstab) - 1; mssind ; mssind--)
+		if (mss >= msstab[mssind])
+			break;
+	*mssp = msstab[mssind];
 
 	NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_SYNCOOKIESSENT);
 
@@ -202,7 +202,7 @@ static inline int cookie_check(struct sk_buff *skb, __u32 cookie)
 					    jiffies / (HZ * 60),
 					    COUNTER_TRIES);
 
-	return mssind < NUM_MSS ? msstab[mssind] + 1 : 0;
+	return mssind < ARRAY_SIZE(msstab) ? msstab[mssind] : 0;
 }
 
 static inline struct sock *get_cookie_sock(struct sock *sk, struct sk_buff *skb,
diff --git a/net/ipv6/syncookies.c b/net/ipv6/syncookies.c
index 9fcb3ec7c838..70d330f8c990 100644
--- a/net/ipv6/syncookies.c
+++ b/net/ipv6/syncookies.c
@@ -27,28 +27,17 @@ extern __u32 syncookie_secret[2][16-4+SHA_DIGEST_WORDS];
 #define COOKIEBITS 24	/* Upper bits store count */
 #define COOKIEMASK (((__u32)1 << COOKIEBITS) - 1)
 
-/*
- * This table has to be sorted and terminated with (__u16)-1.
- * XXX generate a better table.
- * Unresolved Issues: HIPPI with a 64k MSS is not well supported.
- *
- * Taken directly from ipv4 implementation.
- * Should this list be modified for ipv6 use or is it close enough?
- * rfc 2460 8.3 suggests mss values 20 bytes less than ipv4 counterpart
- */
+/* Table must be sorted. */
 static __u16 const msstab[] = {
-	64 - 1,
-	256 - 1,
-	512 - 1,
-	536 - 1,
-	1024 - 1,
-	1440 - 1,
-	1460 - 1,
-	4312 - 1,
-	(__u16)-1
+	64,
+	512,
+	536,
+	1280 - 60,
+	1480 - 60,
+	1500 - 60,
+	4460 - 60,
+	9000 - 60,
 };
-/* The number doesn't include the -1 terminator */
-#define NUM_MSS (ARRAY_SIZE(msstab) - 1)
 
 /*
  * This (misnamed) value is the age of syncookie which is permitted.
@@ -134,9 +123,11 @@ __u32 cookie_v6_init_sequence(struct sock *sk, struct sk_buff *skb, __u16 *mssp)
 
 	tcp_synq_overflow(sk);
 
-	for (mssind = 0; mss > msstab[mssind + 1]; mssind++)
-		;
-	*mssp = msstab[mssind] + 1;
+	for (mssind = ARRAY_SIZE(msstab) - 1; mssind ; mssind--)
+		if (mss >= msstab[mssind])
+			break;
+
+	*mssp = msstab[mssind];
 
 	NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_SYNCOOKIESSENT);
 
@@ -154,7 +145,7 @@ static inline int cookie_check(struct sk_buff *skb, __u32 cookie)
 					    th->source, th->dest, seq,
 					    jiffies / (HZ * 60), COUNTER_TRIES);
 
-	return mssind < NUM_MSS ? msstab[mssind] + 1 : 0;
+	return mssind < ARRAY_SIZE(msstab) ? msstab[mssind] : 0;
 }
 
 struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb)
-- 
cgit v1.2.2


From b78462ebc6a4ef9074aa80abebcdd470dc5f0ce0 Mon Sep 17 00:00:00 2001
From: Alexander Duyck <alexander.h.duyck@intel.com>
Date: Wed, 2 Jun 2010 12:24:37 +0000
Subject: skbuff: add check for non-linear to warn_if_lro and needs_linearize

We can avoid an unecessary cache miss by checking if the skb is non-linear
before accessing gso_size/gso_type in skb_warn_if_lro, the same can also be
done to avoid a cache miss on nr_frags if data_len is 0.

Signed-off-by: Alexander Duyck <alexander.h.duyck@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/dev.c | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

(limited to 'net')

diff --git a/net/core/dev.c b/net/core/dev.c
index ec01a5998d70..3abb3a6058be 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -2103,9 +2103,10 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q,
 static inline int skb_needs_linearize(struct sk_buff *skb,
 				      struct net_device *dev)
 {
-	return (skb_has_frags(skb) && !(dev->features & NETIF_F_FRAGLIST)) ||
-	       (skb_shinfo(skb)->nr_frags && (!(dev->features & NETIF_F_SG) ||
-					      illegal_highdma(dev, skb)));
+	return skb_is_nonlinear(skb) &&
+	       ((skb_has_frags(skb) && !(dev->features & NETIF_F_FRAGLIST)) ||
+	        (skb_shinfo(skb)->nr_frags && (!(dev->features & NETIF_F_SG) ||
+					      illegal_highdma(dev, skb))));
 }
 
 /**
-- 
cgit v1.2.2


From 72e09ad107e78d69ff4d3b97a69f0aad2b77280f Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Sat, 5 Jun 2010 03:03:30 -0700
Subject: ipv6: avoid high order allocations

With mtu=9000, mld_newpack() use order-2 GFP_ATOMIC allocations, that
are very unreliable, on machines where PAGE_SIZE=4K

Limit allocated skbs to be at most one page. (order-0 allocations)

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/mcast.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c
index 59f1881968c7..ab1622d7d409 100644
--- a/net/ipv6/mcast.c
+++ b/net/ipv6/mcast.c
@@ -1356,7 +1356,10 @@ static struct sk_buff *mld_newpack(struct net_device *dev, int size)
 		     IPV6_TLV_PADN, 0 };
 
 	/* we assume size > sizeof(ra) here */
-	skb = sock_alloc_send_skb(sk, size + LL_ALLOCATED_SPACE(dev), 1, &err);
+	size += LL_ALLOCATED_SPACE(dev);
+	/* limit our allocations to order-0 page */
+	size = min_t(int, size, SKB_MAX_ORDER(0, 0));
+	skb = sock_alloc_send_skb(sk, size, 1, &err);
 
 	if (!skb)
 		return NULL;
-- 
cgit v1.2.2


From 8ffb335e8d696affc04f963bf73ce2196f80edb9 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Sun, 6 Jun 2010 15:34:40 -0700
Subject: ip6mr: fix a typo in ip6mr_for_each_table()

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/ip6mr.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c
index 073071f2b75b..89c0b077c7aa 100644
--- a/net/ipv6/ip6mr.c
+++ b/net/ipv6/ip6mr.c
@@ -120,7 +120,7 @@ static void mroute_clean_tables(struct mr6_table *mrt);
 static void ipmr_expire_process(unsigned long arg);
 
 #ifdef CONFIG_IPV6_MROUTE_MULTIPLE_TABLES
-#define ip6mr_for_each_table(mrt, met) \
+#define ip6mr_for_each_table(mrt, net) \
 	list_for_each_entry_rcu(mrt, &net->ipv6.mr6_tables, list)
 
 static struct mr6_table *ip6mr_get_table(struct net *net, u32 id)
-- 
cgit v1.2.2


From a8b690f98baf9fb1902b8eeab801351ea603fa3a Mon Sep 17 00:00:00 2001
From: Tom Herbert <therbert@google.com>
Date: Mon, 7 Jun 2010 00:43:42 -0700
Subject: tcp: Fix slowness in read /proc/net/tcp

This patch address a serious performance issue in reading the
TCP sockets table (/proc/net/tcp).

Reading the full table is done by a number of sequential read
operations.  At each read operation, a seek is done to find the
last socket that was previously read.  This seek operation requires
that the sockets in the table need to be counted up to the current
file position, and to count each of these requires taking a lock for
each non-empty bucket.  The whole algorithm is O(n^2).

The fix is to cache the last bucket value, offset within the bucket,
and the file position returned by the last read operation.   On the
next sequential read, the bucket and offset are used to find the
last read socket immediately without needing ot scan the previous
buckets  the table.  This algorithm t read the whole table is O(n).

The improvement offered by this patch is easily show by performing
cat'ing /proc/net/tcp on a machine with a lot of connections.  With
about 182K connections in the table, I see the following:

- Without patch
time cat /proc/net/tcp > /dev/null

real	1m56.729s
user	0m0.214s
sys	1m56.344s

- With patch
time cat /proc/net/tcp > /dev/null

real	0m0.894s
user	0m0.290s
sys	0m0.594s

Signed-off-by: Tom Herbert <therbert@google.com>
Acked-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp_ipv4.c | 92 ++++++++++++++++++++++++++++++++++++++++++++++++-----
 1 file changed, 84 insertions(+), 8 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index acdc4c989853..7f976af27bf0 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -1980,6 +1980,11 @@ static inline struct inet_timewait_sock *tw_next(struct inet_timewait_sock *tw)
 		hlist_nulls_entry(tw->tw_node.next, typeof(*tw), tw_node) : NULL;
 }
 
+/*
+ * Get next listener socket follow cur.  If cur is NULL, get first socket
+ * starting from bucket given in st->bucket; when st->bucket is zero the
+ * very first socket in the hash table is returned.
+ */
 static void *listening_get_next(struct seq_file *seq, void *cur)
 {
 	struct inet_connection_sock *icsk;
@@ -1990,14 +1995,15 @@ static void *listening_get_next(struct seq_file *seq, void *cur)
 	struct net *net = seq_file_net(seq);
 
 	if (!sk) {
-		st->bucket = 0;
-		ilb = &tcp_hashinfo.listening_hash[0];
+		ilb = &tcp_hashinfo.listening_hash[st->bucket];
 		spin_lock_bh(&ilb->lock);
 		sk = sk_nulls_head(&ilb->head);
+		st->offset = 0;
 		goto get_sk;
 	}
 	ilb = &tcp_hashinfo.listening_hash[st->bucket];
 	++st->num;
+	++st->offset;
 
 	if (st->state == TCP_SEQ_STATE_OPENREQ) {
 		struct request_sock *req = cur;
@@ -2012,6 +2018,7 @@ static void *listening_get_next(struct seq_file *seq, void *cur)
 				}
 				req = req->dl_next;
 			}
+			st->offset = 0;
 			if (++st->sbucket >= icsk->icsk_accept_queue.listen_opt->nr_table_entries)
 				break;
 get_req:
@@ -2047,6 +2054,7 @@ start_req:
 		read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
 	}
 	spin_unlock_bh(&ilb->lock);
+	st->offset = 0;
 	if (++st->bucket < INET_LHTABLE_SIZE) {
 		ilb = &tcp_hashinfo.listening_hash[st->bucket];
 		spin_lock_bh(&ilb->lock);
@@ -2060,7 +2068,12 @@ out:
 
 static void *listening_get_idx(struct seq_file *seq, loff_t *pos)
 {
-	void *rc = listening_get_next(seq, NULL);
+	struct tcp_iter_state *st = seq->private;
+	void *rc;
+
+	st->bucket = 0;
+	st->offset = 0;
+	rc = listening_get_next(seq, NULL);
 
 	while (rc && *pos) {
 		rc = listening_get_next(seq, rc);
@@ -2075,13 +2088,18 @@ static inline int empty_bucket(struct tcp_iter_state *st)
 		hlist_nulls_empty(&tcp_hashinfo.ehash[st->bucket].twchain);
 }
 
+/*
+ * Get first established socket starting from bucket given in st->bucket.
+ * If st->bucket is zero, the very first socket in the hash is returned.
+ */
 static void *established_get_first(struct seq_file *seq)
 {
 	struct tcp_iter_state *st = seq->private;
 	struct net *net = seq_file_net(seq);
 	void *rc = NULL;
 
-	for (st->bucket = 0; st->bucket <= tcp_hashinfo.ehash_mask; ++st->bucket) {
+	st->offset = 0;
+	for (; st->bucket <= tcp_hashinfo.ehash_mask; ++st->bucket) {
 		struct sock *sk;
 		struct hlist_nulls_node *node;
 		struct inet_timewait_sock *tw;
@@ -2126,6 +2144,7 @@ static void *established_get_next(struct seq_file *seq, void *cur)
 	struct net *net = seq_file_net(seq);
 
 	++st->num;
+	++st->offset;
 
 	if (st->state == TCP_SEQ_STATE_TIME_WAIT) {
 		tw = cur;
@@ -2142,6 +2161,7 @@ get_tw:
 		st->state = TCP_SEQ_STATE_ESTABLISHED;
 
 		/* Look for next non empty bucket */
+		st->offset = 0;
 		while (++st->bucket <= tcp_hashinfo.ehash_mask &&
 				empty_bucket(st))
 			;
@@ -2169,7 +2189,11 @@ out:
 
 static void *established_get_idx(struct seq_file *seq, loff_t pos)
 {
-	void *rc = established_get_first(seq);
+	struct tcp_iter_state *st = seq->private;
+	void *rc;
+
+	st->bucket = 0;
+	rc = established_get_first(seq);
 
 	while (rc && pos) {
 		rc = established_get_next(seq, rc);
@@ -2194,24 +2218,72 @@ static void *tcp_get_idx(struct seq_file *seq, loff_t pos)
 	return rc;
 }
 
+static void *tcp_seek_last_pos(struct seq_file *seq)
+{
+	struct tcp_iter_state *st = seq->private;
+	int offset = st->offset;
+	int orig_num = st->num;
+	void *rc = NULL;
+
+	switch (st->state) {
+	case TCP_SEQ_STATE_OPENREQ:
+	case TCP_SEQ_STATE_LISTENING:
+		if (st->bucket >= INET_LHTABLE_SIZE)
+			break;
+		st->state = TCP_SEQ_STATE_LISTENING;
+		rc = listening_get_next(seq, NULL);
+		while (offset-- && rc)
+			rc = listening_get_next(seq, rc);
+		if (rc)
+			break;
+		st->bucket = 0;
+		/* Fallthrough */
+	case TCP_SEQ_STATE_ESTABLISHED:
+	case TCP_SEQ_STATE_TIME_WAIT:
+		st->state = TCP_SEQ_STATE_ESTABLISHED;
+		if (st->bucket > tcp_hashinfo.ehash_mask)
+			break;
+		rc = established_get_first(seq);
+		while (offset-- && rc)
+			rc = established_get_next(seq, rc);
+	}
+
+	st->num = orig_num;
+
+	return rc;
+}
+
 static void *tcp_seq_start(struct seq_file *seq, loff_t *pos)
 {
 	struct tcp_iter_state *st = seq->private;
+	void *rc;
+
+	if (*pos && *pos == st->last_pos) {
+		rc = tcp_seek_last_pos(seq);
+		if (rc)
+			goto out;
+	}
+
 	st->state = TCP_SEQ_STATE_LISTENING;
 	st->num = 0;
-	return *pos ? tcp_get_idx(seq, *pos - 1) : SEQ_START_TOKEN;
+	st->bucket = 0;
+	st->offset = 0;
+	rc = *pos ? tcp_get_idx(seq, *pos - 1) : SEQ_START_TOKEN;
+
+out:
+	st->last_pos = *pos;
+	return rc;
 }
 
 static void *tcp_seq_next(struct seq_file *seq, void *v, loff_t *pos)
 {
+	struct tcp_iter_state *st = seq->private;
 	void *rc = NULL;
-	struct tcp_iter_state *st;
 
 	if (v == SEQ_START_TOKEN) {
 		rc = tcp_get_idx(seq, 0);
 		goto out;
 	}
-	st = seq->private;
 
 	switch (st->state) {
 	case TCP_SEQ_STATE_OPENREQ:
@@ -2219,6 +2291,8 @@ static void *tcp_seq_next(struct seq_file *seq, void *v, loff_t *pos)
 		rc = listening_get_next(seq, v);
 		if (!rc) {
 			st->state = TCP_SEQ_STATE_ESTABLISHED;
+			st->bucket = 0;
+			st->offset = 0;
 			rc	  = established_get_first(seq);
 		}
 		break;
@@ -2229,6 +2303,7 @@ static void *tcp_seq_next(struct seq_file *seq, void *v, loff_t *pos)
 	}
 out:
 	++*pos;
+	st->last_pos = *pos;
 	return rc;
 }
 
@@ -2267,6 +2342,7 @@ static int tcp_seq_open(struct inode *inode, struct file *file)
 
 	s = ((struct seq_file *)file->private_data)->private;
 	s->family		= afinfo->family;
+	s->last_pos 		= 0;
 	return 0;
 }
 
-- 
cgit v1.2.2


From fe33147a58e7d1d3086bf823aabfd491d843be82 Mon Sep 17 00:00:00 2001
From: Alex Lorca <alex.lorca@gmail.com>
Date: Mon, 7 Jun 2010 01:01:22 -0700
Subject: net-caif: Added missing lock validator constants

CAIF is using "xxx-AF_MAX" strings for the lock validator. It should use
its own strings.

Signed-off-by: Alex Lorca <alex.lorca@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/sock.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'net')

diff --git a/net/core/sock.c b/net/core/sock.c
index 2cf7f9f7e775..f9ce0db41cd6 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -156,7 +156,7 @@ static const char *const af_family_key_strings[AF_MAX+1] = {
   "sk_lock-27"       , "sk_lock-28"          , "sk_lock-AF_CAN"      ,
   "sk_lock-AF_TIPC"  , "sk_lock-AF_BLUETOOTH", "sk_lock-IUCV"        ,
   "sk_lock-AF_RXRPC" , "sk_lock-AF_ISDN"     , "sk_lock-AF_PHONET"   ,
-  "sk_lock-AF_IEEE802154",
+  "sk_lock-AF_IEEE802154", "sk_lock-AF_CAIF" ,
   "sk_lock-AF_MAX"
 };
 static const char *const af_family_slock_key_strings[AF_MAX+1] = {
@@ -172,7 +172,7 @@ static const char *const af_family_slock_key_strings[AF_MAX+1] = {
   "slock-27"       , "slock-28"          , "slock-AF_CAN"      ,
   "slock-AF_TIPC"  , "slock-AF_BLUETOOTH", "slock-AF_IUCV"     ,
   "slock-AF_RXRPC" , "slock-AF_ISDN"     , "slock-AF_PHONET"   ,
-  "slock-AF_IEEE802154",
+  "slock-AF_IEEE802154", "slock-AF_CAIF" ,
   "slock-AF_MAX"
 };
 static const char *const af_family_clock_key_strings[AF_MAX+1] = {
@@ -188,7 +188,7 @@ static const char *const af_family_clock_key_strings[AF_MAX+1] = {
   "clock-27"       , "clock-28"          , "clock-AF_CAN"      ,
   "clock-AF_TIPC"  , "clock-AF_BLUETOOTH", "clock-AF_IUCV"     ,
   "clock-AF_RXRPC" , "clock-AF_ISDN"     , "clock-AF_PHONET"   ,
-  "clock-AF_IEEE802154",
+  "clock-AF_IEEE802154", "clock-AF_CAIF" ,
   "clock-AF_MAX"
 };
 
-- 
cgit v1.2.2


From 1789a640f55658d9a54c1868cc3405e4d85dbd8e Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Thu, 3 Jun 2010 22:23:57 +0000
Subject: raw: avoid two atomics in xmit

Avoid two atomic ops per raw_send_hdrinc() call

Avoid two atomic ops per raw6_send_hdrinc() call

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/raw.c |  8 +++++---
 net/ipv6/raw.c | 12 +++++++-----
 2 files changed, 12 insertions(+), 8 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c
index 2c7a1639388a..66cc3befcd44 100644
--- a/net/ipv4/raw.c
+++ b/net/ipv4/raw.c
@@ -314,7 +314,7 @@ int raw_rcv(struct sock *sk, struct sk_buff *skb)
 }
 
 static int raw_send_hdrinc(struct sock *sk, void *from, size_t length,
-			struct rtable *rt,
+			struct rtable **rtp,
 			unsigned int flags)
 {
 	struct inet_sock *inet = inet_sk(sk);
@@ -323,6 +323,7 @@ static int raw_send_hdrinc(struct sock *sk, void *from, size_t length,
 	struct sk_buff *skb;
 	unsigned int iphlen;
 	int err;
+	struct rtable *rt = *rtp;
 
 	if (length > rt->u.dst.dev->mtu) {
 		ip_local_error(sk, EMSGSIZE, rt->rt_dst, inet->inet_dport,
@@ -341,7 +342,8 @@ static int raw_send_hdrinc(struct sock *sk, void *from, size_t length,
 
 	skb->priority = sk->sk_priority;
 	skb->mark = sk->sk_mark;
-	skb_dst_set(skb, dst_clone(&rt->u.dst));
+	skb_dst_set(skb, &rt->u.dst);
+	*rtp = NULL;
 
 	skb_reset_network_header(skb);
 	iph = ip_hdr(skb);
@@ -576,7 +578,7 @@ back_from_confirm:
 
 	if (inet->hdrincl)
 		err = raw_send_hdrinc(sk, msg->msg_iov, len,
-					rt, msg->msg_flags);
+					&rt, msg->msg_flags);
 
 	 else {
 		if (!ipc.addr)
diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c
index 864eb8e03b1b..968b96490729 100644
--- a/net/ipv6/raw.c
+++ b/net/ipv6/raw.c
@@ -602,13 +602,14 @@ out:
 }
 
 static int rawv6_send_hdrinc(struct sock *sk, void *from, int length,
-			struct flowi *fl, struct rt6_info *rt,
+			struct flowi *fl, struct dst_entry **dstp,
 			unsigned int flags)
 {
 	struct ipv6_pinfo *np = inet6_sk(sk);
 	struct ipv6hdr *iph;
 	struct sk_buff *skb;
 	int err;
+	struct rt6_info *rt = (struct rt6_info *)*dstp;
 
 	if (length > rt->u.dst.dev->mtu) {
 		ipv6_local_error(sk, EMSGSIZE, fl, rt->u.dst.dev->mtu);
@@ -626,7 +627,8 @@ static int rawv6_send_hdrinc(struct sock *sk, void *from, int length,
 
 	skb->priority = sk->sk_priority;
 	skb->mark = sk->sk_mark;
-	skb_dst_set(skb, dst_clone(&rt->u.dst));
+	skb_dst_set(skb, &rt->u.dst);
+	*dstp = NULL;
 
 	skb_put(skb, length);
 	skb_reset_network_header(skb);
@@ -886,9 +888,9 @@ static int rawv6_sendmsg(struct kiocb *iocb, struct sock *sk,
 		goto do_confirm;
 
 back_from_confirm:
-	if (inet->hdrincl) {
-		err = rawv6_send_hdrinc(sk, msg->msg_iov, len, &fl, (struct rt6_info*)dst, msg->msg_flags);
-	} else {
+	if (inet->hdrincl)
+		err = rawv6_send_hdrinc(sk, msg->msg_iov, len, &fl, &dst, msg->msg_flags);
+	else {
 		lock_sock(sk);
 		err = ip6_append_data(sk, ip_generic_getfrag, msg->msg_iov,
 			len, 0, hlimit, tclass, opt, &fl, (struct rt6_info*)dst,
-- 
cgit v1.2.2


From f2a03367c072150c881fa23ce3d3f76b8236018f Mon Sep 17 00:00:00 2001
From: Changli Gao <xiaosuo@gmail.com>
Date: Fri, 4 Jun 2010 01:33:33 +0000
Subject: htb: remove two unnecessary assignments

remove two unnecessary assignments

we don't need to assign NULL when initialize structure objects.

Signed-off-by: Changli Gao <xiaosuo@gmail.com>
----
 net/sched/sch_htb.c |    2 --
 1 file changed, 2 deletions(-)
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/sch_htb.c | 2 --
 1 file changed, 2 deletions(-)

(limited to 'net')

diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c
index 0b52b8de562c..4be8d04b262d 100644
--- a/net/sched/sch_htb.c
+++ b/net/sched/sch_htb.c
@@ -1550,7 +1550,6 @@ static const struct Qdisc_class_ops htb_class_ops = {
 };
 
 static struct Qdisc_ops htb_qdisc_ops __read_mostly = {
-	.next		=	NULL,
 	.cl_ops		=	&htb_class_ops,
 	.id		=	"htb",
 	.priv_size	=	sizeof(struct htb_sched),
@@ -1561,7 +1560,6 @@ static struct Qdisc_ops htb_qdisc_ops __read_mostly = {
 	.init		=	htb_init,
 	.reset		=	htb_reset,
 	.destroy	=	htb_destroy,
-	.change		=	NULL /* htb_change */,
 	.dump		=	htb_dump,
 	.owner		=	THIS_MODULE,
 };
-- 
cgit v1.2.2


From 9dacaf17a60101a55d456cc7b00e269d8145aa0d Mon Sep 17 00:00:00 2001
From: jamal <hadi@cyberus.ca>
Date: Fri, 4 Jun 2010 02:43:06 +0000
Subject: net sched: make pedit check for clones instead

Now that the core path doesnt set OK to munge we detect
writable skbs by looking to see if they are cloned.

Signed-off-by: Jamal Hadi Salim <hadi@cyberus.ca>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/act_pedit.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/sched/act_pedit.c b/net/sched/act_pedit.c
index 50e3d945e1f4..a0593c9640db 100644
--- a/net/sched/act_pedit.c
+++ b/net/sched/act_pedit.c
@@ -127,8 +127,7 @@ static int tcf_pedit(struct sk_buff *skb, struct tc_action *a,
 	int i, munged = 0;
 	unsigned int off;
 
-	if (!(skb->tc_verd & TC_OK2MUNGE)) {
-		/* should we set skb->cloned? */
+	if (skb_cloned(skb)) {
 		if (pskb_expand_head(skb, 0, 0, GFP_ATOMIC)) {
 			return p->tcf_action;
 		}
-- 
cgit v1.2.2


From 271c1dfa61bc90a57648ff96f3eb92d4b4d4f11e Mon Sep 17 00:00:00 2001
From: jamal <hadi@cyberus.ca>
Date: Fri, 4 Jun 2010 02:06:22 +0000
Subject: net: Remove unnecessary net action assertion

The extra assertion to allow packet munging only when there are
no other ptypes listening which may have worked around an old bug
is unnecessary. It is sufficient to check if the skb is cloned before
trampling on it. Thanks to Herbert Xu for being persistent and patient
in getting this across.
[Note that cloning checks and assertions are the general rule used
by tc actions (documentation/networking/tc-actions-env-rules.txt)].

Signed-off-by: Jamal Hadi Salim <hadi@cyberus.ca>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/dev.c | 3 ---
 1 file changed, 3 deletions(-)

(limited to 'net')

diff --git a/net/core/dev.c b/net/core/dev.c
index b65347c2cf2a..c8d127718ff1 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -2663,9 +2663,6 @@ static inline struct sk_buff *handle_ing(struct sk_buff *skb,
 	if (*pt_prev) {
 		*ret = deliver_skb(skb, *pt_prev, orig_dev);
 		*pt_prev = NULL;
-	} else {
-		/* Huh? Why does turning on AF_PACKET affect this? */
-		skb->tc_verd = SET_TC_OK2MUNGE(skb->tc_verd);
 	}
 
 	switch (ing_filter(skb)) {
-- 
cgit v1.2.2


From 035320d54758e21227987e3aae0d46e7a04f4ddc Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Sun, 6 Jun 2010 23:48:40 +0000
Subject: ipmr: dont corrupt lists

ipmr_rules_exit() and ip6mr_rules_exit() free a list of items, but
forget to properly remove these items from list. List head is not
changed and still points to freed memory.

This can trigger a fault later when icmpv6_sk_exit() is called.

Fix is to either reinit list, or use list_del() to properly remove items
from list before freeing them.

bugzilla report : https://bugzilla.kernel.org/show_bug.cgi?id=16120

Introduced by commit d1db275dd3f6e4 (ipv6: ip6mr: support multiple
tables) and commit f0ad0860d01e (ipv4: ipmr: support multiple tables)

Reported-by: Alex Zhavnerchik <alex.vizor@gmail.com>
Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
CC: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/ipmr.c  | 4 +++-
 net/ipv6/ip6mr.c | 4 +++-
 2 files changed, 6 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
index 856123fe32f9..757f25eb9b4b 100644
--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c
@@ -267,8 +267,10 @@ static void __net_exit ipmr_rules_exit(struct net *net)
 {
 	struct mr_table *mrt, *next;
 
-	list_for_each_entry_safe(mrt, next, &net->ipv4.mr_tables, list)
+	list_for_each_entry_safe(mrt, next, &net->ipv4.mr_tables, list) {
+		list_del(&mrt->list);
 		kfree(mrt);
+	}
 	fib_rules_unregister(net->ipv4.mr_rules_ops);
 }
 #else
diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c
index 89c0b077c7aa..66078dad7fe8 100644
--- a/net/ipv6/ip6mr.c
+++ b/net/ipv6/ip6mr.c
@@ -254,8 +254,10 @@ static void __net_exit ip6mr_rules_exit(struct net *net)
 {
 	struct mr6_table *mrt, *next;
 
-	list_for_each_entry_safe(mrt, next, &net->ipv6.mr6_tables, list)
+	list_for_each_entry_safe(mrt, next, &net->ipv6.mr6_tables, list) {
+		list_del(&mrt->list);
 		ip6mr_free_table(mrt);
+	}
 	fib_rules_unregister(net->ipv6.mr6_rules_ops);
 }
 #else
-- 
cgit v1.2.2


From 35dd0509b21e4b5bab36b9eb80c8dab0322f5007 Mon Sep 17 00:00:00 2001
From: Holger Schurig <holgerschurig@gmail.com>
Date: Mon, 7 Jun 2010 16:33:49 +0200
Subject: mac80211: fix function pointer check

This makes "iw wlan0 dump survey" work again with
mac80211-based drivers that support it, e.g. ath5k.

Signed-off-by: Holger Schurig <holgerschurig@gmail.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/driver-ops.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/mac80211/driver-ops.h b/net/mac80211/driver-ops.h
index 4f2271316650..9c1da0809160 100644
--- a/net/mac80211/driver-ops.h
+++ b/net/mac80211/driver-ops.h
@@ -349,7 +349,7 @@ static inline int drv_get_survey(struct ieee80211_local *local, int idx,
 				struct survey_info *survey)
 {
 	int ret = -EOPNOTSUPP;
-	if (local->ops->conf_tx)
+	if (local->ops->get_survey)
 		ret = local->ops->get_survey(&local->hw, idx, survey);
 	/* trace_drv_get_survey(local, idx, survey, ret); */
 	return ret;
-- 
cgit v1.2.2


From 11b7c60988e5fbabb4e150612931cc068559af16 Mon Sep 17 00:00:00 2001
From: "John W. Linville" <linville@tuxdriver.com>
Date: Mon, 7 Jun 2010 15:02:17 -0400
Subject: mac80211: fix lock leak w/ ARP filtering and w/o CONFIG_INET

"mac80211: make ARP filtering depend on CONFIG_INET" introduced this
potential locking leak.

Reported-by: Juuso Oikarinen <juuso.oikarinen@nokia.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/mlme.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index 3623bb7c4bf4..9420cf14fae4 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -2078,10 +2078,9 @@ static enum work_done_result ieee80211_assoc_done(struct ieee80211_work *wk,
 			cfg80211_send_assoc_timeout(wk->sdata->dev,
 						    wk->filter_ta);
 			return WORK_DONE_DESTROY;
-#ifdef CONFIG_INET
 		} else {
 			mutex_unlock(&wk->sdata->u.mgd.mtx);
-
+#ifdef CONFIG_INET
 			/*
 			 * configure ARP filter IP addresses to the driver,
 			 * intentionally outside the mgd mutex.
-- 
cgit v1.2.2


From 66018506e15bea62de4eefc3298f170b4bfcf5ef Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Mon, 7 Jun 2010 03:12:08 +0000
Subject: ip: Router Alert RCU conversion

Straightforward conversion to RCU.

One rwlock becomes a spinlock, and is static.

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/ip_input.c    | 11 +++--------
 net/ipv4/ip_sockglue.c | 23 ++++++++++++++---------
 2 files changed, 17 insertions(+), 17 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c
index d52c9da644cf..d274078b1665 100644
--- a/net/ipv4/ip_input.c
+++ b/net/ipv4/ip_input.c
@@ -146,7 +146,7 @@
 #include <linux/netlink.h>
 
 /*
- *	Process Router Attention IP option
+ *	Process Router Attention IP option (RFC 2113)
  */
 int ip_call_ra_chain(struct sk_buff *skb)
 {
@@ -155,8 +155,7 @@ int ip_call_ra_chain(struct sk_buff *skb)
 	struct sock *last = NULL;
 	struct net_device *dev = skb->dev;
 
-	read_lock(&ip_ra_lock);
-	for (ra = ip_ra_chain; ra; ra = ra->next) {
+	for (ra = rcu_dereference(ip_ra_chain); ra; ra = rcu_dereference(ra->next)) {
 		struct sock *sk = ra->sk;
 
 		/* If socket is bound to an interface, only report
@@ -167,10 +166,8 @@ int ip_call_ra_chain(struct sk_buff *skb)
 		     sk->sk_bound_dev_if == dev->ifindex) &&
 		    net_eq(sock_net(sk), dev_net(dev))) {
 			if (ip_hdr(skb)->frag_off & htons(IP_MF | IP_OFFSET)) {
-				if (ip_defrag(skb, IP_DEFRAG_CALL_RA_CHAIN)) {
-					read_unlock(&ip_ra_lock);
+				if (ip_defrag(skb, IP_DEFRAG_CALL_RA_CHAIN))
 					return 1;
-				}
 			}
 			if (last) {
 				struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
@@ -183,10 +180,8 @@ int ip_call_ra_chain(struct sk_buff *skb)
 
 	if (last) {
 		raw_rcv(last, skb);
-		read_unlock(&ip_ra_lock);
 		return 1;
 	}
-	read_unlock(&ip_ra_lock);
 	return 0;
 }
 
diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c
index ce231780a2b1..08b9519a24f4 100644
--- a/net/ipv4/ip_sockglue.c
+++ b/net/ipv4/ip_sockglue.c
@@ -239,7 +239,12 @@ int ip_cmsg_send(struct net *net, struct msghdr *msg, struct ipcm_cookie *ipc)
    sent to multicast group to reach destination designated router.
  */
 struct ip_ra_chain *ip_ra_chain;
-DEFINE_RWLOCK(ip_ra_lock);
+static DEFINE_SPINLOCK(ip_ra_lock);
+
+static void ip_ra_free_rcu(struct rcu_head *head)
+{
+	kfree(container_of(head, struct ip_ra_chain, rcu));
+}
 
 int ip_ra_control(struct sock *sk, unsigned char on,
 		  void (*destructor)(struct sock *))
@@ -251,35 +256,35 @@ int ip_ra_control(struct sock *sk, unsigned char on,
 
 	new_ra = on ? kmalloc(sizeof(*new_ra), GFP_KERNEL) : NULL;
 
-	write_lock_bh(&ip_ra_lock);
+	spin_lock_bh(&ip_ra_lock);
 	for (rap = &ip_ra_chain; (ra = *rap) != NULL; rap = &ra->next) {
 		if (ra->sk == sk) {
 			if (on) {
-				write_unlock_bh(&ip_ra_lock);
+				spin_unlock_bh(&ip_ra_lock);
 				kfree(new_ra);
 				return -EADDRINUSE;
 			}
-			*rap = ra->next;
-			write_unlock_bh(&ip_ra_lock);
+			rcu_assign_pointer(*rap, ra->next);
+			spin_unlock_bh(&ip_ra_lock);
 
 			if (ra->destructor)
 				ra->destructor(sk);
 			sock_put(sk);
-			kfree(ra);
+			call_rcu(&ra->rcu, ip_ra_free_rcu);
 			return 0;
 		}
 	}
 	if (new_ra == NULL) {
-		write_unlock_bh(&ip_ra_lock);
+		spin_unlock_bh(&ip_ra_lock);
 		return -ENOBUFS;
 	}
 	new_ra->sk = sk;
 	new_ra->destructor = destructor;
 
 	new_ra->next = ra;
-	*rap = new_ra;
+	rcu_assign_pointer(*rap, new_ra);
 	sock_hold(sk);
-	write_unlock_bh(&ip_ra_lock);
+	spin_unlock_bh(&ip_ra_lock);
 
 	return 0;
 }
-- 
cgit v1.2.2


From 9a57a9d291980302b4a3184fbc47dbddac71903e Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Mon, 7 Jun 2010 03:17:10 +0000
Subject: igmp: avoid two atomic ops in igmp_rcv()

in_dev_get() -> __in_dev_get_rcu() in a rcu protected function.

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/igmp.c | 10 ++++------
 1 file changed, 4 insertions(+), 6 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c
index 250cb5e1af48..3294f547c481 100644
--- a/net/ipv4/igmp.c
+++ b/net/ipv4/igmp.c
@@ -916,18 +916,19 @@ static void igmp_heard_query(struct in_device *in_dev, struct sk_buff *skb,
 	read_unlock(&in_dev->mc_list_lock);
 }
 
+/* called in rcu_read_lock() section */
 int igmp_rcv(struct sk_buff *skb)
 {
 	/* This basically follows the spec line by line -- see RFC1112 */
 	struct igmphdr *ih;
-	struct in_device *in_dev = in_dev_get(skb->dev);
+	struct in_device *in_dev = __in_dev_get_rcu(skb->dev);
 	int len = skb->len;
 
 	if (in_dev == NULL)
 		goto drop;
 
 	if (!pskb_may_pull(skb, sizeof(struct igmphdr)))
-		goto drop_ref;
+		goto drop;
 
 	switch (skb->ip_summed) {
 	case CHECKSUM_COMPLETE:
@@ -937,7 +938,7 @@ int igmp_rcv(struct sk_buff *skb)
 	case CHECKSUM_NONE:
 		skb->csum = 0;
 		if (__skb_checksum_complete(skb))
-			goto drop_ref;
+			goto drop;
 	}
 
 	ih = igmp_hdr(skb);
@@ -957,7 +958,6 @@ int igmp_rcv(struct sk_buff *skb)
 		break;
 	case IGMP_PIM:
 #ifdef CONFIG_IP_PIMSM_V1
-		in_dev_put(in_dev);
 		return pim_rcv_v1(skb);
 #endif
 	case IGMPV3_HOST_MEMBERSHIP_REPORT:
@@ -971,8 +971,6 @@ int igmp_rcv(struct sk_buff *skb)
 		break;
 	}
 
-drop_ref:
-	in_dev_put(in_dev);
 drop:
 	kfree_skb(skb);
 	return 0;
-- 
cgit v1.2.2


From ed7865a47d4759e85bbd7bed44bee411d868eaad Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Mon, 7 Jun 2010 21:49:44 -0700
Subject: ipv4: avoid two atomic ops in ip_rt_redirect()

in_dev_get() -> __in_dev_get_rcu() in a rcu protected function.

[ Fix build with CONFIG_IP_ROUTE_VERBOSE disabled. -DaveM ]

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/route.c | 10 +++-------
 1 file changed, 3 insertions(+), 7 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 7b8eacd5ac26..883b5c7195ac 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -1343,11 +1343,12 @@ static void rt_del(unsigned hash, struct rtable *rt)
 	spin_unlock_bh(rt_hash_lock_addr(hash));
 }
 
+/* called in rcu_read_lock() section */
 void ip_rt_redirect(__be32 old_gw, __be32 daddr, __be32 new_gw,
 		    __be32 saddr, struct net_device *dev)
 {
 	int i, k;
-	struct in_device *in_dev = in_dev_get(dev);
+	struct in_device *in_dev = __in_dev_get_rcu(dev);
 	struct rtable *rth, **rthp;
 	__be32  skeys[2] = { saddr, 0 };
 	int  ikeys[2] = { dev->ifindex, 0 };
@@ -1383,7 +1384,6 @@ void ip_rt_redirect(__be32 old_gw, __be32 daddr, __be32 new_gw,
 
 			rthp=&rt_hash_table[hash].chain;
 
-			rcu_read_lock();
 			while ((rth = rcu_dereference(*rthp)) != NULL) {
 				struct rtable *rt;
 
@@ -1405,12 +1405,10 @@ void ip_rt_redirect(__be32 old_gw, __be32 daddr, __be32 new_gw,
 					break;
 
 				dst_hold(&rth->u.dst);
-				rcu_read_unlock();
 
 				rt = dst_alloc(&ipv4_dst_ops);
 				if (rt == NULL) {
 					ip_rt_put(rth);
-					in_dev_put(in_dev);
 					return;
 				}
 
@@ -1463,12 +1461,10 @@ void ip_rt_redirect(__be32 old_gw, __be32 daddr, __be32 new_gw,
 					ip_rt_put(rt);
 				goto do_next;
 			}
-			rcu_read_unlock();
 		do_next:
 			;
 		}
 	}
-	in_dev_put(in_dev);
 	return;
 
 reject_redirect:
@@ -1479,7 +1475,7 @@ reject_redirect:
 		       &old_gw, dev->name, &new_gw,
 		       &saddr, &daddr);
 #endif
-	in_dev_put(in_dev);
+	;
 }
 
 static struct dst_entry *ipv4_negative_advice(struct dst_entry *dst)
-- 
cgit v1.2.2


From 6e8b11b43b0c2e901734e2cdd70c6e325a90c4ef Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Mon, 7 Jun 2010 03:54:46 +0000
Subject: net: avoid two atomic ops in ip_rcv_options()

in_dev_get() -> __in_dev_get_rcu() in a rcu protected function.

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/ip_input.c | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c
index d274078b1665..08a3b121f908 100644
--- a/net/ipv4/ip_input.c
+++ b/net/ipv4/ip_input.c
@@ -293,18 +293,16 @@ static inline int ip_rcv_options(struct sk_buff *skb)
 	}
 
 	if (unlikely(opt->srr)) {
-		struct in_device *in_dev = in_dev_get(dev);
+		struct in_device *in_dev = __in_dev_get_rcu(dev);
+
 		if (in_dev) {
 			if (!IN_DEV_SOURCE_ROUTE(in_dev)) {
 				if (IN_DEV_LOG_MARTIANS(in_dev) &&
 				    net_ratelimit())
 					printk(KERN_INFO "source route option %pI4 -> %pI4\n",
 					       &iph->saddr, &iph->daddr);
-				in_dev_put(in_dev);
 				goto drop;
 			}
-
-			in_dev_put(in_dev);
 		}
 
 		if (ip_options_rcv_srr(skb))
-- 
cgit v1.2.2


From bb69ae049fcc986fcd742eb90ca0d44a7a49c9f1 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Mon, 7 Jun 2010 11:42:13 +0000
Subject: anycast: Some RCU conversions

- dev_get_by_flags() changed to dev_get_by_flags_rcu()

- ipv6_sock_ac_join() dont touch dev & idev refcounts
- ipv6_sock_ac_drop() dont touch dev & idev refcounts
- ipv6_sock_ac_close() dont touch dev & idev refcounts
- ipv6_dev_ac_dec() dount touch idev refcount
- ipv6_chk_acast_addr() dont touch idev refcount

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
CC: Hideaki YOSHIFUJI <yoshfuji@linux-ipv6.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/dev.c     | 14 +++------
 net/ipv6/anycast.c | 90 +++++++++++++++++++++++++-----------------------------
 2 files changed, 47 insertions(+), 57 deletions(-)

(limited to 'net')

diff --git a/net/core/dev.c b/net/core/dev.c
index c8d127718ff1..6f330cee79a6 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -803,35 +803,31 @@ struct net_device *dev_getfirstbyhwtype(struct net *net, unsigned short type)
 EXPORT_SYMBOL(dev_getfirstbyhwtype);
 
 /**
- *	dev_get_by_flags - find any device with given flags
+ *	dev_get_by_flags_rcu - find any device with given flags
  *	@net: the applicable net namespace
  *	@if_flags: IFF_* values
  *	@mask: bitmask of bits in if_flags to check
  *
  *	Search for any interface with the given flags. Returns NULL if a device
- *	is not found or a pointer to the device. The device returned has
- *	had a reference added and the pointer is safe until the user calls
- *	dev_put to indicate they have finished with it.
+ *	is not found or a pointer to the device. Must be called inside
+ *	rcu_read_lock(), and result refcount is unchanged.
  */
 
-struct net_device *dev_get_by_flags(struct net *net, unsigned short if_flags,
+struct net_device *dev_get_by_flags_rcu(struct net *net, unsigned short if_flags,
 				    unsigned short mask)
 {
 	struct net_device *dev, *ret;
 
 	ret = NULL;
-	rcu_read_lock();
 	for_each_netdev_rcu(net, dev) {
 		if (((dev->flags ^ if_flags) & mask) == 0) {
-			dev_hold(dev);
 			ret = dev;
 			break;
 		}
 	}
-	rcu_read_unlock();
 	return ret;
 }
-EXPORT_SYMBOL(dev_get_by_flags);
+EXPORT_SYMBOL(dev_get_by_flags_rcu);
 
 /**
  *	dev_valid_name - check if name is okay for network device
diff --git a/net/ipv6/anycast.c b/net/ipv6/anycast.c
index b5b07054508a..f058fbd808c8 100644
--- a/net/ipv6/anycast.c
+++ b/net/ipv6/anycast.c
@@ -77,41 +77,40 @@ int ipv6_sock_ac_join(struct sock *sk, int ifindex, struct in6_addr *addr)
 	pac->acl_next = NULL;
 	ipv6_addr_copy(&pac->acl_addr, addr);
 
+	rcu_read_lock();
 	if (ifindex == 0) {
 		struct rt6_info *rt;
 
 		rt = rt6_lookup(net, addr, NULL, 0, 0);
 		if (rt) {
 			dev = rt->rt6i_dev;
-			dev_hold(dev);
 			dst_release(&rt->u.dst);
 		} else if (ishost) {
 			err = -EADDRNOTAVAIL;
-			goto out_free_pac;
+			goto error;
 		} else {
 			/* router, no matching interface: just pick one */
-
-			dev = dev_get_by_flags(net, IFF_UP, IFF_UP|IFF_LOOPBACK);
+			dev = dev_get_by_flags_rcu(net, IFF_UP,
+						   IFF_UP | IFF_LOOPBACK);
 		}
 	} else
-		dev = dev_get_by_index(net, ifindex);
+		dev = dev_get_by_index_rcu(net, ifindex);
 
 	if (dev == NULL) {
 		err = -ENODEV;
-		goto out_free_pac;
+		goto error;
 	}
 
-	idev = in6_dev_get(dev);
+	idev = __in6_dev_get(dev);
 	if (!idev) {
 		if (ifindex)
 			err = -ENODEV;
 		else
 			err = -EADDRNOTAVAIL;
-		goto out_dev_put;
+		goto error;
 	}
 	/* reset ishost, now that we have a specific device */
 	ishost = !idev->cnf.forwarding;
-	in6_dev_put(idev);
 
 	pac->acl_ifindex = dev->ifindex;
 
@@ -124,26 +123,22 @@ int ipv6_sock_ac_join(struct sock *sk, int ifindex, struct in6_addr *addr)
 		if (ishost)
 			err = -EADDRNOTAVAIL;
 		if (err)
-			goto out_dev_put;
+			goto error;
 	}
 
 	err = ipv6_dev_ac_inc(dev, addr);
-	if (err)
-		goto out_dev_put;
-
-	write_lock_bh(&ipv6_sk_ac_lock);
-	pac->acl_next = np->ipv6_ac_list;
-	np->ipv6_ac_list = pac;
-	write_unlock_bh(&ipv6_sk_ac_lock);
-
-	dev_put(dev);
-
-	return 0;
+	if (!err) {
+		write_lock_bh(&ipv6_sk_ac_lock);
+		pac->acl_next = np->ipv6_ac_list;
+		np->ipv6_ac_list = pac;
+		write_unlock_bh(&ipv6_sk_ac_lock);
+		pac = NULL;
+	}
 
-out_dev_put:
-	dev_put(dev);
-out_free_pac:
-	sock_kfree_s(sk, pac, sizeof(*pac));
+error:
+	rcu_read_unlock();
+	if (pac)
+		sock_kfree_s(sk, pac, sizeof(*pac));
 	return err;
 }
 
@@ -176,11 +171,12 @@ int ipv6_sock_ac_drop(struct sock *sk, int ifindex, struct in6_addr *addr)
 
 	write_unlock_bh(&ipv6_sk_ac_lock);
 
-	dev = dev_get_by_index(net, pac->acl_ifindex);
-	if (dev) {
+	rcu_read_lock();
+	dev = dev_get_by_index_rcu(net, pac->acl_ifindex);
+	if (dev)
 		ipv6_dev_ac_dec(dev, &pac->acl_addr);
-		dev_put(dev);
-	}
+	rcu_read_unlock();
+
 	sock_kfree_s(sk, pac, sizeof(*pac));
 	return 0;
 }
@@ -199,13 +195,12 @@ void ipv6_sock_ac_close(struct sock *sk)
 	write_unlock_bh(&ipv6_sk_ac_lock);
 
 	prev_index = 0;
+	rcu_read_lock();
 	while (pac) {
 		struct ipv6_ac_socklist *next = pac->acl_next;
 
 		if (pac->acl_ifindex != prev_index) {
-			if (dev)
-				dev_put(dev);
-			dev = dev_get_by_index(net, pac->acl_ifindex);
+			dev = dev_get_by_index_rcu(net, pac->acl_ifindex);
 			prev_index = pac->acl_ifindex;
 		}
 		if (dev)
@@ -213,8 +208,7 @@ void ipv6_sock_ac_close(struct sock *sk)
 		sock_kfree_s(sk, pac, sizeof(*pac));
 		pac = next;
 	}
-	if (dev)
-		dev_put(dev);
+	rcu_read_unlock();
 }
 
 #if 0
@@ -363,33 +357,32 @@ int __ipv6_dev_ac_dec(struct inet6_dev *idev, struct in6_addr *addr)
 	return 0;
 }
 
+/* called with rcu_read_lock() */
 static int ipv6_dev_ac_dec(struct net_device *dev, struct in6_addr *addr)
 {
-	int ret;
-	struct inet6_dev *idev = in6_dev_get(dev);
+	struct inet6_dev *idev = __in6_dev_get(dev);
+
 	if (idev == NULL)
 		return -ENODEV;
-	ret = __ipv6_dev_ac_dec(idev, addr);
-	in6_dev_put(idev);
-	return ret;
+	return __ipv6_dev_ac_dec(idev, addr);
 }
 
 /*
  *	check if the interface has this anycast address
+ *	called with rcu_read_lock()
  */
 static int ipv6_chk_acast_dev(struct net_device *dev, struct in6_addr *addr)
 {
 	struct inet6_dev *idev;
 	struct ifacaddr6 *aca;
 
-	idev = in6_dev_get(dev);
+	idev = __in6_dev_get(dev);
 	if (idev) {
 		read_lock_bh(&idev->lock);
 		for (aca = idev->ac_list; aca; aca = aca->aca_next)
 			if (ipv6_addr_equal(&aca->aca_addr, addr))
 				break;
 		read_unlock_bh(&idev->lock);
-		in6_dev_put(idev);
 		return aca != NULL;
 	}
 	return 0;
@@ -403,14 +396,15 @@ int ipv6_chk_acast_addr(struct net *net, struct net_device *dev,
 {
 	int found = 0;
 
-	if (dev)
-		return ipv6_chk_acast_dev(dev, addr);
 	rcu_read_lock();
-	for_each_netdev_rcu(net, dev)
-		if (ipv6_chk_acast_dev(dev, addr)) {
-			found = 1;
-			break;
-		}
+	if (dev)
+		found = ipv6_chk_acast_dev(dev, addr);
+	else
+		for_each_netdev_rcu(net, dev)
+			if (ipv6_chk_acast_dev(dev, addr)) {
+				found = 1;
+				break;
+			}
 	rcu_read_unlock();
 	return found;
 }
-- 
cgit v1.2.2


From 90b726097ba0dcc1f9725182513e669a30e77db5 Mon Sep 17 00:00:00 2001
From: Juuso Oikarinen <juuso.oikarinen@nokia.com>
Date: Mon, 7 Jun 2010 10:52:12 +0300
Subject: mac80211: Add netif state checking to ieee80211_ifa_changed

There's a window for ieee80211_ifa_changed() to get called whilst the
managed mode mutex has not been initialized when opening and stopping the
interface. Currently this causes a kernel BUG like the following:

[  132.460013] kernel BUG at /home/wifi/iwlwifi-2.6/net/mac80211/main.c:380!
[  132.460013] invalid opcode: 0000 [#1] SMP

The mutex is initialized during open(), hence once netif_running() is true,
the mutex should be valid. Fix by adding a netif_running() check to the
function.

Reported-by: Reinette Chatre <reinette.chatre@intel.com>
Signed-off-by: Juuso Oikarinen <juuso.oikarinen@nokia.com>
Tested-by: Reinette Chatre <reinette.chatre@intel.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/main.c | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'net')

diff --git a/net/mac80211/main.c b/net/mac80211/main.c
index 5706156d1ecf..88b671a16a41 100644
--- a/net/mac80211/main.c
+++ b/net/mac80211/main.c
@@ -359,6 +359,9 @@ static int ieee80211_ifa_changed(struct notifier_block *nb,
 	struct ieee80211_sub_if_data *sdata;
 	struct ieee80211_if_managed *ifmgd;
 
+	if (!netif_running(ndev))
+		return NOTIFY_DONE;
+
 	/* Make sure it's our interface that got changed */
 	if (!wdev)
 		return NOTIFY_DONE;
-- 
cgit v1.2.2


From 5bfddbd46a95c978f4d3c992339cbdf4f4b790a3 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Tue, 8 Jun 2010 16:09:52 +0200
Subject: netfilter: nf_conntrack: IPS_UNTRACKED bit

NOTRACK makes all cpus share a cache line on nf_conntrack_untracked
twice per packet. This is bad for performance.
__read_mostly annotation is also a bad choice.

This patch introduces IPS_UNTRACKED bit so that we can use later a
per_cpu untrack structure more easily.

A new helper, nf_ct_untracked_get() returns a pointer to
nf_conntrack_untracked.

Another one, nf_ct_untracked_status_or() is used by nf_nat_init() to add
IPS_NAT_DONE_MASK bits to untracked status.

nf_ct_is_untracked() prototype is changed to work on a nf_conn pointer.

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 net/ipv4/netfilter/nf_nat_core.c               |  2 +-
 net/ipv4/netfilter/nf_nat_standalone.c         |  2 +-
 net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c |  2 +-
 net/netfilter/nf_conntrack_core.c              | 11 ++++++++---
 net/netfilter/nf_conntrack_netlink.c           |  2 +-
 net/netfilter/xt_CT.c                          |  4 ++--
 net/netfilter/xt_NOTRACK.c                     |  2 +-
 net/netfilter/xt_TEE.c                         |  4 ++--
 net/netfilter/xt_cluster.c                     |  2 +-
 net/netfilter/xt_conntrack.c                   | 11 ++++++-----
 net/netfilter/xt_socket.c                      |  2 +-
 net/netfilter/xt_state.c                       | 14 ++++++++------
 12 files changed, 33 insertions(+), 25 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/netfilter/nf_nat_core.c b/net/ipv4/netfilter/nf_nat_core.c
index 4f8bddb760c9..c7719b283ada 100644
--- a/net/ipv4/netfilter/nf_nat_core.c
+++ b/net/ipv4/netfilter/nf_nat_core.c
@@ -742,7 +742,7 @@ static int __init nf_nat_init(void)
 	spin_unlock_bh(&nf_nat_lock);
 
 	/* Initialize fake conntrack so that NAT will skip it */
-	nf_conntrack_untracked.status |= IPS_NAT_DONE_MASK;
+	nf_ct_untracked_status_or(IPS_NAT_DONE_MASK);
 
 	l3proto = nf_ct_l3proto_find_get((u_int16_t)AF_INET);
 
diff --git a/net/ipv4/netfilter/nf_nat_standalone.c b/net/ipv4/netfilter/nf_nat_standalone.c
index beb25819c9c9..6723c682250d 100644
--- a/net/ipv4/netfilter/nf_nat_standalone.c
+++ b/net/ipv4/netfilter/nf_nat_standalone.c
@@ -98,7 +98,7 @@ nf_nat_fn(unsigned int hooknum,
 		return NF_ACCEPT;
 
 	/* Don't try to NAT if this packet is not conntracked */
-	if (ct == &nf_conntrack_untracked)
+	if (nf_ct_is_untracked(ct))
 		return NF_ACCEPT;
 
 	nat = nfct_nat(ct);
diff --git a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c
index 9be81776415e..1df3c8b6bf47 100644
--- a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c
+++ b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c
@@ -208,7 +208,7 @@ icmpv6_error(struct net *net, struct nf_conn *tmpl,
 	type = icmp6h->icmp6_type - 130;
 	if (type >= 0 && type < sizeof(noct_valid_new) &&
 	    noct_valid_new[type]) {
-		skb->nfct = &nf_conntrack_untracked.ct_general;
+		skb->nfct = &nf_ct_untracked_get()->ct_general;
 		skb->nfctinfo = IP_CT_NEW;
 		nf_conntrack_get(skb->nfct);
 		return NF_ACCEPT;
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index eeeb8bc73982..6c1da212380d 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -62,7 +62,7 @@ EXPORT_SYMBOL_GPL(nf_conntrack_htable_size);
 unsigned int nf_conntrack_max __read_mostly;
 EXPORT_SYMBOL_GPL(nf_conntrack_max);
 
-struct nf_conn nf_conntrack_untracked __read_mostly;
+struct nf_conn nf_conntrack_untracked;
 EXPORT_SYMBOL_GPL(nf_conntrack_untracked);
 
 static int nf_conntrack_hash_rnd_initted;
@@ -1321,6 +1321,12 @@ EXPORT_SYMBOL_GPL(nf_conntrack_set_hashsize);
 module_param_call(hashsize, nf_conntrack_set_hashsize, param_get_uint,
 		  &nf_conntrack_htable_size, 0600);
 
+void nf_ct_untracked_status_or(unsigned long bits)
+{
+	nf_conntrack_untracked.status |= bits;
+}
+EXPORT_SYMBOL_GPL(nf_ct_untracked_status_or);
+
 static int nf_conntrack_init_init_net(void)
 {
 	int max_factor = 8;
@@ -1368,8 +1374,7 @@ static int nf_conntrack_init_init_net(void)
 #endif
 	atomic_set(&nf_conntrack_untracked.ct_general.use, 1);
 	/*  - and look it like as a confirmed connection */
-	set_bit(IPS_CONFIRMED_BIT, &nf_conntrack_untracked.status);
-
+	nf_ct_untracked_status_or(IPS_CONFIRMED | IPS_UNTRACKED);
 	return 0;
 
 #ifdef CONFIG_NF_CONNTRACK_ZONES
diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index c42ff6aa441d..5bae1cd15eea 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -480,7 +480,7 @@ ctnetlink_conntrack_event(unsigned int events, struct nf_ct_event *item)
 	int err;
 
 	/* ignore our fake conntrack entry */
-	if (ct == &nf_conntrack_untracked)
+	if (nf_ct_is_untracked(ct))
 		return 0;
 
 	if (events & (1 << IPCT_DESTROY)) {
diff --git a/net/netfilter/xt_CT.c b/net/netfilter/xt_CT.c
index 562bf3266e04..0cb6053f02fd 100644
--- a/net/netfilter/xt_CT.c
+++ b/net/netfilter/xt_CT.c
@@ -67,7 +67,7 @@ static int xt_ct_tg_check(const struct xt_tgchk_param *par)
 		return -EINVAL;
 
 	if (info->flags & XT_CT_NOTRACK) {
-		ct = &nf_conntrack_untracked;
+		ct = nf_ct_untracked_get();
 		atomic_inc(&ct->ct_general.use);
 		goto out;
 	}
@@ -132,7 +132,7 @@ static void xt_ct_tg_destroy(const struct xt_tgdtor_param *par)
 	struct nf_conn *ct = info->ct;
 	struct nf_conn_help *help;
 
-	if (ct != &nf_conntrack_untracked) {
+	if (!nf_ct_is_untracked(ct)) {
 		help = nfct_help(ct);
 		if (help)
 			module_put(help->helper->me);
diff --git a/net/netfilter/xt_NOTRACK.c b/net/netfilter/xt_NOTRACK.c
index 512b9123252f..9d782181b6c8 100644
--- a/net/netfilter/xt_NOTRACK.c
+++ b/net/netfilter/xt_NOTRACK.c
@@ -23,7 +23,7 @@ notrack_tg(struct sk_buff *skb, const struct xt_action_param *par)
 	   If there is a real ct entry correspondig to this packet,
 	   it'll hang aroun till timing out. We don't deal with it
 	   for performance reasons. JK */
-	skb->nfct = &nf_conntrack_untracked.ct_general;
+	skb->nfct = &nf_ct_untracked_get()->ct_general;
 	skb->nfctinfo = IP_CT_NEW;
 	nf_conntrack_get(skb->nfct);
 
diff --git a/net/netfilter/xt_TEE.c b/net/netfilter/xt_TEE.c
index 859d9fd429c8..7a118267c4c4 100644
--- a/net/netfilter/xt_TEE.c
+++ b/net/netfilter/xt_TEE.c
@@ -104,7 +104,7 @@ tee_tg4(struct sk_buff *skb, const struct xt_action_param *par)
 #ifdef WITH_CONNTRACK
 	/* Avoid counting cloned packets towards the original connection. */
 	nf_conntrack_put(skb->nfct);
-	skb->nfct     = &nf_conntrack_untracked.ct_general;
+	skb->nfct     = &nf_ct_untracked_get()->ct_general;
 	skb->nfctinfo = IP_CT_NEW;
 	nf_conntrack_get(skb->nfct);
 #endif
@@ -177,7 +177,7 @@ tee_tg6(struct sk_buff *skb, const struct xt_action_param *par)
 
 #ifdef WITH_CONNTRACK
 	nf_conntrack_put(skb->nfct);
-	skb->nfct     = &nf_conntrack_untracked.ct_general;
+	skb->nfct     = &nf_ct_untracked_get()->ct_general;
 	skb->nfctinfo = IP_CT_NEW;
 	nf_conntrack_get(skb->nfct);
 #endif
diff --git a/net/netfilter/xt_cluster.c b/net/netfilter/xt_cluster.c
index 30b95a1c1c89..f4af1bfafb1c 100644
--- a/net/netfilter/xt_cluster.c
+++ b/net/netfilter/xt_cluster.c
@@ -120,7 +120,7 @@ xt_cluster_mt(const struct sk_buff *skb, struct xt_action_param *par)
 	if (ct == NULL)
 		return false;
 
-	if (ct == &nf_conntrack_untracked)
+	if (nf_ct_is_untracked(ct))
 		return false;
 
 	if (ct->master)
diff --git a/net/netfilter/xt_conntrack.c b/net/netfilter/xt_conntrack.c
index 39681f10291c..e536710ad916 100644
--- a/net/netfilter/xt_conntrack.c
+++ b/net/netfilter/xt_conntrack.c
@@ -123,11 +123,12 @@ conntrack_mt(const struct sk_buff *skb, struct xt_action_param *par,
 
 	ct = nf_ct_get(skb, &ctinfo);
 
-	if (ct == &nf_conntrack_untracked)
-		statebit = XT_CONNTRACK_STATE_UNTRACKED;
-	else if (ct != NULL)
-		statebit = XT_CONNTRACK_STATE_BIT(ctinfo);
-	else
+	if (ct) {
+		if (nf_ct_is_untracked(ct))
+			statebit = XT_CONNTRACK_STATE_UNTRACKED;
+		else
+			statebit = XT_CONNTRACK_STATE_BIT(ctinfo);
+	} else
 		statebit = XT_CONNTRACK_STATE_INVALID;
 
 	if (info->match_flags & XT_CONNTRACK_STATE) {
diff --git a/net/netfilter/xt_socket.c b/net/netfilter/xt_socket.c
index 3d54c236a1ba..1ca89908cbad 100644
--- a/net/netfilter/xt_socket.c
+++ b/net/netfilter/xt_socket.c
@@ -127,7 +127,7 @@ socket_match(const struct sk_buff *skb, struct xt_action_param *par,
 	 * reply packet of an established SNAT-ted connection. */
 
 	ct = nf_ct_get(skb, &ctinfo);
-	if (ct && (ct != &nf_conntrack_untracked) &&
+	if (ct && !nf_ct_is_untracked(ct) &&
 	    ((iph->protocol != IPPROTO_ICMP &&
 	      ctinfo == IP_CT_IS_REPLY + IP_CT_ESTABLISHED) ||
 	     (iph->protocol == IPPROTO_ICMP &&
diff --git a/net/netfilter/xt_state.c b/net/netfilter/xt_state.c
index e12e053d3782..a507922d80cd 100644
--- a/net/netfilter/xt_state.c
+++ b/net/netfilter/xt_state.c
@@ -26,14 +26,16 @@ state_mt(const struct sk_buff *skb, struct xt_action_param *par)
 	const struct xt_state_info *sinfo = par->matchinfo;
 	enum ip_conntrack_info ctinfo;
 	unsigned int statebit;
+	struct nf_conn *ct = nf_ct_get(skb, &ctinfo);
 
-	if (nf_ct_is_untracked(skb))
-		statebit = XT_STATE_UNTRACKED;
-	else if (!nf_ct_get(skb, &ctinfo))
+	if (!ct)
 		statebit = XT_STATE_INVALID;
-	else
-		statebit = XT_STATE_BIT(ctinfo);
-
+	else {
+		if (nf_ct_is_untracked(ct))
+			statebit = XT_STATE_UNTRACKED;
+		else
+			statebit = XT_STATE_BIT(ctinfo);
+	}
 	return (sinfo->statemask & statebit);
 }
 
-- 
cgit v1.2.2


From b054b747a694927879c94dd11af54d04346aed7d Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Mon, 7 Jun 2010 21:50:07 +0200
Subject: mac80211: fix deauth before assoc

When we receive a deauthentication frame before
having successfully associated, we neither print
a message nor abort assocation. The former makes
it hard to debug, while the latter later causes
a warning in cfg80211 when, as will typically be
the case, association timed out.

This warning was reported by many, e.g. in
https://bugzilla.kernel.org/show_bug.cgi?id=15981,
but I couldn't initially pinpoint it. I verified
the fix by hacking hostapd to send a deauth frame
instead of an association response.

Cc: stable@kernel.org
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Tested-by: Miles Lane <miles.lane@gmail.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/mlme.c | 40 ++++++++++++++++++++++++++++++++++++++--
 1 file changed, 38 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index 3310e70aa52f..f803f8b72a93 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -1760,9 +1760,45 @@ static void ieee80211_sta_rx_queued_mgmt(struct ieee80211_sub_if_data *sdata,
 	mutex_unlock(&ifmgd->mtx);
 
 	if (skb->len >= 24 + 2 /* mgmt + deauth reason */ &&
-	    (fc & IEEE80211_FCTL_STYPE) == IEEE80211_STYPE_DEAUTH)
-		cfg80211_send_deauth(sdata->dev, (u8 *)mgmt, skb->len);
+	    (fc & IEEE80211_FCTL_STYPE) == IEEE80211_STYPE_DEAUTH) {
+		struct ieee80211_local *local = sdata->local;
+		struct ieee80211_work *wk;
+
+		mutex_lock(&local->work_mtx);
+		list_for_each_entry(wk, &local->work_list, list) {
+			if (wk->sdata != sdata)
+				continue;
+
+			if (wk->type != IEEE80211_WORK_ASSOC)
+				continue;
+
+			if (memcmp(mgmt->bssid, wk->filter_ta, ETH_ALEN))
+				continue;
+			if (memcmp(mgmt->sa, wk->filter_ta, ETH_ALEN))
+				continue;
 
+			/*
+			 * Printing the message only here means we can't
+			 * spuriously print it, but it also means that it
+			 * won't be printed when the frame comes in before
+			 * we even tried to associate or in similar cases.
+			 *
+			 * Ultimately, I suspect cfg80211 should print the
+			 * messages instead.
+			 */
+			printk(KERN_DEBUG
+			       "%s: deauthenticated from %pM (Reason: %u)\n",
+			       sdata->name, mgmt->bssid,
+			       le16_to_cpu(mgmt->u.deauth.reason_code));
+
+			list_del_rcu(&wk->list);
+			free_work(wk);
+			break;
+		}
+		mutex_unlock(&local->work_mtx);
+
+		cfg80211_send_deauth(sdata->dev, (u8 *)mgmt, skb->len);
+	}
  out:
 	kfree_skb(skb);
 }
-- 
cgit v1.2.2


From b3c5163fe0193a74016dba1bb22491e0d1e9aaa4 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Wed, 9 Jun 2010 14:43:38 +0200
Subject: netfilter: nf_conntrack: per_cpu untracking

NOTRACK makes all cpus share a cache line on nf_conntrack_untracked
twice per packet, slowing down performance.

This patch converts it to a per_cpu variable.

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 net/netfilter/nf_conntrack_core.c | 36 ++++++++++++++++++++++++++----------
 1 file changed, 26 insertions(+), 10 deletions(-)

(limited to 'net')

diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index 6c1da212380d..9c661413b826 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -62,8 +62,8 @@ EXPORT_SYMBOL_GPL(nf_conntrack_htable_size);
 unsigned int nf_conntrack_max __read_mostly;
 EXPORT_SYMBOL_GPL(nf_conntrack_max);
 
-struct nf_conn nf_conntrack_untracked;
-EXPORT_SYMBOL_GPL(nf_conntrack_untracked);
+DEFINE_PER_CPU(struct nf_conn, nf_conntrack_untracked);
+EXPORT_PER_CPU_SYMBOL(nf_conntrack_untracked);
 
 static int nf_conntrack_hash_rnd_initted;
 static unsigned int nf_conntrack_hash_rnd;
@@ -1183,10 +1183,21 @@ static void nf_ct_release_dying_list(struct net *net)
 	spin_unlock_bh(&nf_conntrack_lock);
 }
 
+static int untrack_refs(void)
+{
+	int cnt = 0, cpu;
+
+	for_each_possible_cpu(cpu) {
+		struct nf_conn *ct = &per_cpu(nf_conntrack_untracked, cpu);
+
+		cnt += atomic_read(&ct->ct_general.use) - 1;
+	}
+	return cnt;
+}
+
 static void nf_conntrack_cleanup_init_net(void)
 {
-	/* wait until all references to nf_conntrack_untracked are dropped */
-	while (atomic_read(&nf_conntrack_untracked.ct_general.use) > 1)
+	while (untrack_refs() > 0)
 		schedule();
 
 	nf_conntrack_helper_fini();
@@ -1323,14 +1334,17 @@ module_param_call(hashsize, nf_conntrack_set_hashsize, param_get_uint,
 
 void nf_ct_untracked_status_or(unsigned long bits)
 {
-	nf_conntrack_untracked.status |= bits;
+	int cpu;
+
+	for_each_possible_cpu(cpu)
+		per_cpu(nf_conntrack_untracked, cpu).status |= bits;
 }
 EXPORT_SYMBOL_GPL(nf_ct_untracked_status_or);
 
 static int nf_conntrack_init_init_net(void)
 {
 	int max_factor = 8;
-	int ret;
+	int ret, cpu;
 
 	/* Idea from tcp.c: use 1/16384 of memory.  On i386: 32MB
 	 * machine has 512 buckets. >= 1GB machines have 16384 buckets. */
@@ -1369,10 +1383,12 @@ static int nf_conntrack_init_init_net(void)
 		goto err_extend;
 #endif
 	/* Set up fake conntrack: to never be deleted, not in any hashes */
-#ifdef CONFIG_NET_NS
-	nf_conntrack_untracked.ct_net = &init_net;
-#endif
-	atomic_set(&nf_conntrack_untracked.ct_general.use, 1);
+	for_each_possible_cpu(cpu) {
+		struct nf_conn *ct = &per_cpu(nf_conntrack_untracked, cpu);
+
+		write_pnet(&ct->ct_net, &init_net);
+		atomic_set(&ct->ct_general.use, 1);
+	}
 	/*  - and look it like as a confirmed connection */
 	nf_ct_untracked_status_or(IPS_CONFIRMED | IPS_UNTRACKED);
 	return 0;
-- 
cgit v1.2.2


From 2bf074825403e0e0d623bac7573580773b78abef Mon Sep 17 00:00:00 2001
From: Shan Wei <shanwei@cn.fujitsu.com>
Date: Wed, 9 Jun 2010 14:47:40 +0200
Subject: netfilter: xt_sctp: use WORD_ROUND macro to calculate length of
 multiple of 4 bytes

Use  WORD_ROUND to round an int up to the next multiple of 4.

Signed-off-by: Shan Wei <shanwei@cn.fujitsu.com>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 net/netfilter/xt_sctp.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/netfilter/xt_sctp.c b/net/netfilter/xt_sctp.c
index c04fcf385c59..ef36a56a02c6 100644
--- a/net/netfilter/xt_sctp.c
+++ b/net/netfilter/xt_sctp.c
@@ -3,6 +3,7 @@
 #include <linux/skbuff.h>
 #include <net/ip.h>
 #include <net/ipv6.h>
+#include <net/sctp/sctp.h>
 #include <linux/sctp.h>
 
 #include <linux/netfilter/x_tables.h>
@@ -67,7 +68,7 @@ match_packet(const struct sk_buff *skb,
 			 ++i, offset, sch->type, htons(sch->length),
 			 sch->flags);
 #endif
-		offset += (ntohs(sch->length) + 3) & ~3;
+		offset += WORD_ROUND(ntohs(sch->length));
 
 		pr_debug("skb->len: %d\toffset: %d\n", skb->len, offset);
 
-- 
cgit v1.2.2


From 5756d346c7cdefcd84a8ac4901167cdfb5447b69 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Wed, 9 Jun 2010 15:47:41 +0200
Subject: netfilter: ip_queue: rwlock to spinlock conversion

Converts queue_lock rwlock to a spinlock.

(readlocked part can be changed by reads of integer values)

One atomic operation instead of four per ipq_enqueue_packet() call.

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 net/ipv4/netfilter/ip_queue.c | 57 +++++++++++++++++++------------------------
 1 file changed, 25 insertions(+), 32 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/netfilter/ip_queue.c b/net/ipv4/netfilter/ip_queue.c
index a4e5fc5df4bf..d2c1311cb28d 100644
--- a/net/ipv4/netfilter/ip_queue.c
+++ b/net/ipv4/netfilter/ip_queue.c
@@ -42,7 +42,7 @@ typedef int (*ipq_cmpfn)(struct nf_queue_entry *, unsigned long);
 
 static unsigned char copy_mode __read_mostly = IPQ_COPY_NONE;
 static unsigned int queue_maxlen __read_mostly = IPQ_QMAX_DEFAULT;
-static DEFINE_RWLOCK(queue_lock);
+static DEFINE_SPINLOCK(queue_lock);
 static int peer_pid __read_mostly;
 static unsigned int copy_range __read_mostly;
 static unsigned int queue_total;
@@ -72,10 +72,10 @@ __ipq_set_mode(unsigned char mode, unsigned int range)
 		break;
 
 	case IPQ_COPY_PACKET:
-		copy_mode = mode;
+		if (range > 0xFFFF)
+			range = 0xFFFF;
 		copy_range = range;
-		if (copy_range > 0xFFFF)
-			copy_range = 0xFFFF;
+		copy_mode = mode;
 		break;
 
 	default:
@@ -101,7 +101,7 @@ ipq_find_dequeue_entry(unsigned long id)
 {
 	struct nf_queue_entry *entry = NULL, *i;
 
-	write_lock_bh(&queue_lock);
+	spin_lock_bh(&queue_lock);
 
 	list_for_each_entry(i, &queue_list, list) {
 		if ((unsigned long)i == id) {
@@ -115,7 +115,7 @@ ipq_find_dequeue_entry(unsigned long id)
 		queue_total--;
 	}
 
-	write_unlock_bh(&queue_lock);
+	spin_unlock_bh(&queue_lock);
 	return entry;
 }
 
@@ -136,9 +136,9 @@ __ipq_flush(ipq_cmpfn cmpfn, unsigned long data)
 static void
 ipq_flush(ipq_cmpfn cmpfn, unsigned long data)
 {
-	write_lock_bh(&queue_lock);
+	spin_lock_bh(&queue_lock);
 	__ipq_flush(cmpfn, data);
-	write_unlock_bh(&queue_lock);
+	spin_unlock_bh(&queue_lock);
 }
 
 static struct sk_buff *
@@ -152,9 +152,7 @@ ipq_build_packet_message(struct nf_queue_entry *entry, int *errp)
 	struct nlmsghdr *nlh;
 	struct timeval tv;
 
-	read_lock_bh(&queue_lock);
-
-	switch (copy_mode) {
+	switch (ACCESS_ONCE(copy_mode)) {
 	case IPQ_COPY_META:
 	case IPQ_COPY_NONE:
 		size = NLMSG_SPACE(sizeof(*pmsg));
@@ -162,26 +160,21 @@ ipq_build_packet_message(struct nf_queue_entry *entry, int *errp)
 
 	case IPQ_COPY_PACKET:
 		if (entry->skb->ip_summed == CHECKSUM_PARTIAL &&
-		    (*errp = skb_checksum_help(entry->skb))) {
-			read_unlock_bh(&queue_lock);
+		    (*errp = skb_checksum_help(entry->skb)))
 			return NULL;
-		}
-		if (copy_range == 0 || copy_range > entry->skb->len)
+
+		data_len = ACCESS_ONCE(copy_range);
+		if (data_len == 0 || data_len > entry->skb->len)
 			data_len = entry->skb->len;
-		else
-			data_len = copy_range;
 
 		size = NLMSG_SPACE(sizeof(*pmsg) + data_len);
 		break;
 
 	default:
 		*errp = -EINVAL;
-		read_unlock_bh(&queue_lock);
 		return NULL;
 	}
 
-	read_unlock_bh(&queue_lock);
-
 	skb = alloc_skb(size, GFP_ATOMIC);
 	if (!skb)
 		goto nlmsg_failure;
@@ -242,7 +235,7 @@ ipq_enqueue_packet(struct nf_queue_entry *entry, unsigned int queuenum)
 	if (nskb == NULL)
 		return status;
 
-	write_lock_bh(&queue_lock);
+	spin_lock_bh(&queue_lock);
 
 	if (!peer_pid)
 		goto err_out_free_nskb;
@@ -266,14 +259,14 @@ ipq_enqueue_packet(struct nf_queue_entry *entry, unsigned int queuenum)
 
 	__ipq_enqueue_entry(entry);
 
-	write_unlock_bh(&queue_lock);
+	spin_unlock_bh(&queue_lock);
 	return status;
 
 err_out_free_nskb:
 	kfree_skb(nskb);
 
 err_out_unlock:
-	write_unlock_bh(&queue_lock);
+	spin_unlock_bh(&queue_lock);
 	return status;
 }
 
@@ -342,9 +335,9 @@ ipq_set_mode(unsigned char mode, unsigned int range)
 {
 	int status;
 
-	write_lock_bh(&queue_lock);
+	spin_lock_bh(&queue_lock);
 	status = __ipq_set_mode(mode, range);
-	write_unlock_bh(&queue_lock);
+	spin_unlock_bh(&queue_lock);
 	return status;
 }
 
@@ -440,11 +433,11 @@ __ipq_rcv_skb(struct sk_buff *skb)
 	if (security_netlink_recv(skb, CAP_NET_ADMIN))
 		RCV_SKB_FAIL(-EPERM);
 
-	write_lock_bh(&queue_lock);
+	spin_lock_bh(&queue_lock);
 
 	if (peer_pid) {
 		if (peer_pid != pid) {
-			write_unlock_bh(&queue_lock);
+			spin_unlock_bh(&queue_lock);
 			RCV_SKB_FAIL(-EBUSY);
 		}
 	} else {
@@ -452,7 +445,7 @@ __ipq_rcv_skb(struct sk_buff *skb)
 		peer_pid = pid;
 	}
 
-	write_unlock_bh(&queue_lock);
+	spin_unlock_bh(&queue_lock);
 
 	status = ipq_receive_peer(NLMSG_DATA(nlh), type,
 				  nlmsglen - NLMSG_LENGTH(0));
@@ -497,10 +490,10 @@ ipq_rcv_nl_event(struct notifier_block *this,
 	struct netlink_notify *n = ptr;
 
 	if (event == NETLINK_URELEASE && n->protocol == NETLINK_FIREWALL) {
-		write_lock_bh(&queue_lock);
+		spin_lock_bh(&queue_lock);
 		if ((net_eq(n->net, &init_net)) && (n->pid == peer_pid))
 			__ipq_reset();
-		write_unlock_bh(&queue_lock);
+		spin_unlock_bh(&queue_lock);
 	}
 	return NOTIFY_DONE;
 }
@@ -527,7 +520,7 @@ static ctl_table ipq_table[] = {
 #ifdef CONFIG_PROC_FS
 static int ip_queue_show(struct seq_file *m, void *v)
 {
-	read_lock_bh(&queue_lock);
+	spin_lock_bh(&queue_lock);
 
 	seq_printf(m,
 		      "Peer PID          : %d\n"
@@ -545,7 +538,7 @@ static int ip_queue_show(struct seq_file *m, void *v)
 		      queue_dropped,
 		      queue_user_dropped);
 
-	read_unlock_bh(&queue_lock);
+	spin_unlock_bh(&queue_lock);
 	return 0;
 }
 
-- 
cgit v1.2.2


From aea9d711f3d68c656ad31ab578ecfb0bb5cd7f97 Mon Sep 17 00:00:00 2001
From: Sven Wegener <sven.wegener@stealer.net>
Date: Wed, 9 Jun 2010 16:10:57 +0200
Subject: ipvs: Add missing locking during connection table hashing and
 unhashing

The code that hashes and unhashes connections from the connection table
is missing locking of the connection being modified, which opens up a
race condition and results in memory corruption when this race condition
is hit.

Here is what happens in pretty verbose form:

CPU 0					CPU 1
------------				------------
An active connection is terminated and
we schedule ip_vs_conn_expire() on this
CPU to expire this connection.

					IRQ assignment is changed to this CPU,
					but the expire timer stays scheduled on
					the other CPU.

					New connection from same ip:port comes
					in right before the timer expires, we
					find the inactive connection in our
					connection table and get a reference to
					it. We proper lock the connection in
					tcp_state_transition() and read the
					connection flags in set_tcp_state().

ip_vs_conn_expire() gets called, we
unhash the connection from our
connection table and remove the hashed
flag in ip_vs_conn_unhash(), without
proper locking!

					While still holding proper locks we
					write the connection flags in
					set_tcp_state() and this sets the hashed
					flag again.

ip_vs_conn_expire() fails to expire the
connection, because the other CPU has
incremented the reference count. We try
to re-insert the connection into our
connection table, but this fails in
ip_vs_conn_hash(), because the hashed
flag has been set by the other CPU. We
re-schedule execution of
ip_vs_conn_expire(). Now this connection
has the hashed flag set, but isn't
actually hashed in our connection table
and has a dangling list_head.

					We drop the reference we held on the
					connection and schedule the expire timer
					for timeouting the connection on this
					CPU. Further packets won't be able to
					find this connection in our connection
					table.

					ip_vs_conn_expire() gets called again,
					we think it's already hashed, but the
					list_head is dangling and while removing
					the connection from our connection table
					we write to the memory location where
					this list_head points to.

The result will probably be a kernel oops at some other point in time.

This race condition is pretty subtle, but it can be triggered remotely.
It needs the IRQ assignment change or another circumstance where packets
coming from the same ip:port for the same service are being processed on
different CPUs. And it involves hitting the exact time at which
ip_vs_conn_expire() gets called. It can be avoided by making sure that
all packets from one connection are always processed on the same CPU and
can be made harder to exploit by changing the connection timeouts to
some custom values.

Signed-off-by: Sven Wegener <sven.wegener@stealer.net>
Cc: stable@kernel.org
Acked-by: Simon Horman <horms@verge.net.au>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 net/netfilter/ipvs/ip_vs_conn.c | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'net')

diff --git a/net/netfilter/ipvs/ip_vs_conn.c b/net/netfilter/ipvs/ip_vs_conn.c
index d8f7e8ef67b4..ff04e9edbed6 100644
--- a/net/netfilter/ipvs/ip_vs_conn.c
+++ b/net/netfilter/ipvs/ip_vs_conn.c
@@ -162,6 +162,7 @@ static inline int ip_vs_conn_hash(struct ip_vs_conn *cp)
 	hash = ip_vs_conn_hashkey(cp->af, cp->protocol, &cp->caddr, cp->cport);
 
 	ct_write_lock(hash);
+	spin_lock(&cp->lock);
 
 	if (!(cp->flags & IP_VS_CONN_F_HASHED)) {
 		list_add(&cp->c_list, &ip_vs_conn_tab[hash]);
@@ -174,6 +175,7 @@ static inline int ip_vs_conn_hash(struct ip_vs_conn *cp)
 		ret = 0;
 	}
 
+	spin_unlock(&cp->lock);
 	ct_write_unlock(hash);
 
 	return ret;
@@ -193,6 +195,7 @@ static inline int ip_vs_conn_unhash(struct ip_vs_conn *cp)
 	hash = ip_vs_conn_hashkey(cp->af, cp->protocol, &cp->caddr, cp->cport);
 
 	ct_write_lock(hash);
+	spin_lock(&cp->lock);
 
 	if (cp->flags & IP_VS_CONN_F_HASHED) {
 		list_del(&cp->c_list);
@@ -202,6 +205,7 @@ static inline int ip_vs_conn_unhash(struct ip_vs_conn *cp)
 	} else
 		ret = 0;
 
+	spin_unlock(&cp->lock);
 	ct_write_unlock(hash);
 
 	return ret;
-- 
cgit v1.2.2


From 144ad2a6c56b6109ff0f64074863ae5cf1c1698a Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Wed, 9 Jun 2010 16:25:08 +0200
Subject: netfilter: ip6_queue: rwlock to spinlock conversion

Converts queue_lock rwlock to a spinlock.

(readlocked part can be changed by reads of integer values)

One atomic operation instead of four per ipq_enqueue_packet() call.

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 net/ipv6/netfilter/ip6_queue.c | 57 ++++++++++++++++++------------------------
 1 file changed, 25 insertions(+), 32 deletions(-)

(limited to 'net')

diff --git a/net/ipv6/netfilter/ip6_queue.c b/net/ipv6/netfilter/ip6_queue.c
index 8c201743d96d..413ab0754e1f 100644
--- a/net/ipv6/netfilter/ip6_queue.c
+++ b/net/ipv6/netfilter/ip6_queue.c
@@ -43,7 +43,7 @@ typedef int (*ipq_cmpfn)(struct nf_queue_entry *, unsigned long);
 
 static unsigned char copy_mode __read_mostly = IPQ_COPY_NONE;
 static unsigned int queue_maxlen __read_mostly = IPQ_QMAX_DEFAULT;
-static DEFINE_RWLOCK(queue_lock);
+static DEFINE_SPINLOCK(queue_lock);
 static int peer_pid __read_mostly;
 static unsigned int copy_range __read_mostly;
 static unsigned int queue_total;
@@ -73,10 +73,10 @@ __ipq_set_mode(unsigned char mode, unsigned int range)
 		break;
 
 	case IPQ_COPY_PACKET:
-		copy_mode = mode;
+		if (range > 0xFFFF)
+			range = 0xFFFF;
 		copy_range = range;
-		if (copy_range > 0xFFFF)
-			copy_range = 0xFFFF;
+		copy_mode = mode;
 		break;
 
 	default:
@@ -102,7 +102,7 @@ ipq_find_dequeue_entry(unsigned long id)
 {
 	struct nf_queue_entry *entry = NULL, *i;
 
-	write_lock_bh(&queue_lock);
+	spin_lock_bh(&queue_lock);
 
 	list_for_each_entry(i, &queue_list, list) {
 		if ((unsigned long)i == id) {
@@ -116,7 +116,7 @@ ipq_find_dequeue_entry(unsigned long id)
 		queue_total--;
 	}
 
-	write_unlock_bh(&queue_lock);
+	spin_unlock_bh(&queue_lock);
 	return entry;
 }
 
@@ -137,9 +137,9 @@ __ipq_flush(ipq_cmpfn cmpfn, unsigned long data)
 static void
 ipq_flush(ipq_cmpfn cmpfn, unsigned long data)
 {
-	write_lock_bh(&queue_lock);
+	spin_lock_bh(&queue_lock);
 	__ipq_flush(cmpfn, data);
-	write_unlock_bh(&queue_lock);
+	spin_unlock_bh(&queue_lock);
 }
 
 static struct sk_buff *
@@ -153,9 +153,7 @@ ipq_build_packet_message(struct nf_queue_entry *entry, int *errp)
 	struct nlmsghdr *nlh;
 	struct timeval tv;
 
-	read_lock_bh(&queue_lock);
-
-	switch (copy_mode) {
+	switch (ACCESS_ONCE(copy_mode)) {
 	case IPQ_COPY_META:
 	case IPQ_COPY_NONE:
 		size = NLMSG_SPACE(sizeof(*pmsg));
@@ -163,26 +161,21 @@ ipq_build_packet_message(struct nf_queue_entry *entry, int *errp)
 
 	case IPQ_COPY_PACKET:
 		if (entry->skb->ip_summed == CHECKSUM_PARTIAL &&
-		    (*errp = skb_checksum_help(entry->skb))) {
-			read_unlock_bh(&queue_lock);
+		    (*errp = skb_checksum_help(entry->skb)))
 			return NULL;
-		}
-		if (copy_range == 0 || copy_range > entry->skb->len)
+
+		data_len = ACCESS_ONCE(copy_range);
+		if (data_len == 0 || data_len > entry->skb->len)
 			data_len = entry->skb->len;
-		else
-			data_len = copy_range;
 
 		size = NLMSG_SPACE(sizeof(*pmsg) + data_len);
 		break;
 
 	default:
 		*errp = -EINVAL;
-		read_unlock_bh(&queue_lock);
 		return NULL;
 	}
 
-	read_unlock_bh(&queue_lock);
-
 	skb = alloc_skb(size, GFP_ATOMIC);
 	if (!skb)
 		goto nlmsg_failure;
@@ -242,7 +235,7 @@ ipq_enqueue_packet(struct nf_queue_entry *entry, unsigned int queuenum)
 	if (nskb == NULL)
 		return status;
 
-	write_lock_bh(&queue_lock);
+	spin_lock_bh(&queue_lock);
 
 	if (!peer_pid)
 		goto err_out_free_nskb;
@@ -266,14 +259,14 @@ ipq_enqueue_packet(struct nf_queue_entry *entry, unsigned int queuenum)
 
 	__ipq_enqueue_entry(entry);
 
-	write_unlock_bh(&queue_lock);
+	spin_unlock_bh(&queue_lock);
 	return status;
 
 err_out_free_nskb:
 	kfree_skb(nskb);
 
 err_out_unlock:
-	write_unlock_bh(&queue_lock);
+	spin_unlock_bh(&queue_lock);
 	return status;
 }
 
@@ -342,9 +335,9 @@ ipq_set_mode(unsigned char mode, unsigned int range)
 {
 	int status;
 
-	write_lock_bh(&queue_lock);
+	spin_lock_bh(&queue_lock);
 	status = __ipq_set_mode(mode, range);
-	write_unlock_bh(&queue_lock);
+	spin_unlock_bh(&queue_lock);
 	return status;
 }
 
@@ -441,11 +434,11 @@ __ipq_rcv_skb(struct sk_buff *skb)
 	if (security_netlink_recv(skb, CAP_NET_ADMIN))
 		RCV_SKB_FAIL(-EPERM);
 
-	write_lock_bh(&queue_lock);
+	spin_lock_bh(&queue_lock);
 
 	if (peer_pid) {
 		if (peer_pid != pid) {
-			write_unlock_bh(&queue_lock);
+			spin_unlock_bh(&queue_lock);
 			RCV_SKB_FAIL(-EBUSY);
 		}
 	} else {
@@ -453,7 +446,7 @@ __ipq_rcv_skb(struct sk_buff *skb)
 		peer_pid = pid;
 	}
 
-	write_unlock_bh(&queue_lock);
+	spin_unlock_bh(&queue_lock);
 
 	status = ipq_receive_peer(NLMSG_DATA(nlh), type,
 				  nlmsglen - NLMSG_LENGTH(0));
@@ -498,10 +491,10 @@ ipq_rcv_nl_event(struct notifier_block *this,
 	struct netlink_notify *n = ptr;
 
 	if (event == NETLINK_URELEASE && n->protocol == NETLINK_IP6_FW) {
-		write_lock_bh(&queue_lock);
+		spin_lock_bh(&queue_lock);
 		if ((net_eq(n->net, &init_net)) && (n->pid == peer_pid))
 			__ipq_reset();
-		write_unlock_bh(&queue_lock);
+		spin_unlock_bh(&queue_lock);
 	}
 	return NOTIFY_DONE;
 }
@@ -528,7 +521,7 @@ static ctl_table ipq_table[] = {
 #ifdef CONFIG_PROC_FS
 static int ip6_queue_show(struct seq_file *m, void *v)
 {
-	read_lock_bh(&queue_lock);
+	spin_lock_bh(&queue_lock);
 
 	seq_printf(m,
 		      "Peer PID          : %d\n"
@@ -546,7 +539,7 @@ static int ip6_queue_show(struct seq_file *m, void *v)
 		      queue_dropped,
 		      queue_user_dropped);
 
-	read_unlock_bh(&queue_lock);
+	spin_unlock_bh(&queue_lock);
 	return 0;
 }
 
-- 
cgit v1.2.2


From c463ac972315a0c86bb20b8d35225baa75caf899 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Wed, 9 Jun 2010 18:07:06 +0200
Subject: netfilter: nfnetlink_queue: some optimizations

- Use an atomic_t for id_sequence to avoid a spin_lock/spin_unlock pair

- Group highly modified struct nfqnl_instance fields together

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 net/netfilter/nfnetlink_queue.c | 33 ++++++++++++++-------------------
 1 file changed, 14 insertions(+), 19 deletions(-)

(limited to 'net')

diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c
index 12e1ab37fcd8..d05605b38f6f 100644
--- a/net/netfilter/nfnetlink_queue.c
+++ b/net/netfilter/nfnetlink_queue.c
@@ -46,17 +46,19 @@ struct nfqnl_instance {
 	int peer_pid;
 	unsigned int queue_maxlen;
 	unsigned int copy_range;
-	unsigned int queue_total;
 	unsigned int queue_dropped;
 	unsigned int queue_user_dropped;
 
-	unsigned int id_sequence;		/* 'sequence' of pkt ids */
 
 	u_int16_t queue_num;			/* number of this queue */
 	u_int8_t copy_mode;
-
-	spinlock_t lock;
-
+/*
+ * Following fields are dirtied for each queued packet,
+ * keep them in same cache line if possible.
+ */
+	spinlock_t	lock;
+	unsigned int	queue_total;
+	atomic_t	id_sequence;		/* 'sequence' of pkt ids */
 	struct list_head queue_list;		/* packets in queue */
 };
 
@@ -238,32 +240,24 @@ nfqnl_build_packet_message(struct nfqnl_instance *queue,
 
 	outdev = entry->outdev;
 
-	spin_lock_bh(&queue->lock);
-
-	switch ((enum nfqnl_config_mode)queue->copy_mode) {
+	switch ((enum nfqnl_config_mode)ACCESS_ONCE(queue->copy_mode)) {
 	case NFQNL_COPY_META:
 	case NFQNL_COPY_NONE:
 		break;
 
 	case NFQNL_COPY_PACKET:
 		if (entskb->ip_summed == CHECKSUM_PARTIAL &&
-		    skb_checksum_help(entskb)) {
-			spin_unlock_bh(&queue->lock);
+		    skb_checksum_help(entskb))
 			return NULL;
-		}
-		if (queue->copy_range == 0
-		    || queue->copy_range > entskb->len)
+
+		data_len = ACCESS_ONCE(queue->copy_range);
+		if (data_len == 0 || data_len > entskb->len)
 			data_len = entskb->len;
-		else
-			data_len = queue->copy_range;
 
 		size += nla_total_size(data_len);
 		break;
 	}
 
-	entry->id = queue->id_sequence++;
-
-	spin_unlock_bh(&queue->lock);
 
 	skb = alloc_skb(size, GFP_ATOMIC);
 	if (!skb)
@@ -278,6 +272,7 @@ nfqnl_build_packet_message(struct nfqnl_instance *queue,
 	nfmsg->version = NFNETLINK_V0;
 	nfmsg->res_id = htons(queue->queue_num);
 
+	entry->id = atomic_inc_return(&queue->id_sequence);
 	pmsg.packet_id 		= htonl(entry->id);
 	pmsg.hw_protocol	= entskb->protocol;
 	pmsg.hook		= entry->hook;
@@ -866,7 +861,7 @@ static int seq_show(struct seq_file *s, void *v)
 			  inst->peer_pid, inst->queue_total,
 			  inst->copy_mode, inst->copy_range,
 			  inst->queue_dropped, inst->queue_user_dropped,
-			  inst->id_sequence, 1);
+			  atomic_read(&inst->id_sequence), 1);
 }
 
 static const struct seq_operations nfqnl_seq_ops = {
-- 
cgit v1.2.2


From bed1be20867d17a3eb2fb5e1613ebdc50c83b8aa Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Wed, 9 Jun 2010 18:14:58 +0200
Subject: netfilter: nfnetlink_log: RCU conversion

- instances_lock becomes a spinlock
- lockless lookups

While nfnetlink_log probably not performance critical, using less
rwlocks in our code is always welcomed...

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 net/netfilter/nfnetlink_log.c | 49 ++++++++++++++++++++++++-------------------
 1 file changed, 27 insertions(+), 22 deletions(-)

(limited to 'net')

diff --git a/net/netfilter/nfnetlink_log.c b/net/netfilter/nfnetlink_log.c
index fc9a211e629e..8ec23ec568e7 100644
--- a/net/netfilter/nfnetlink_log.c
+++ b/net/netfilter/nfnetlink_log.c
@@ -66,9 +66,10 @@ struct nfulnl_instance {
 	u_int16_t group_num;		/* number of this queue */
 	u_int16_t flags;
 	u_int8_t copy_mode;
+	struct rcu_head rcu;
 };
 
-static DEFINE_RWLOCK(instances_lock);
+static DEFINE_SPINLOCK(instances_lock);
 static atomic_t global_seq;
 
 #define INSTANCE_BUCKETS	16
@@ -88,7 +89,7 @@ __instance_lookup(u_int16_t group_num)
 	struct nfulnl_instance *inst;
 
 	head = &instance_table[instance_hashfn(group_num)];
-	hlist_for_each_entry(inst, pos, head, hlist) {
+	hlist_for_each_entry_rcu(inst, pos, head, hlist) {
 		if (inst->group_num == group_num)
 			return inst;
 	}
@@ -106,22 +107,26 @@ instance_lookup_get(u_int16_t group_num)
 {
 	struct nfulnl_instance *inst;
 
-	read_lock_bh(&instances_lock);
+	rcu_read_lock_bh();
 	inst = __instance_lookup(group_num);
 	if (inst)
 		instance_get(inst);
-	read_unlock_bh(&instances_lock);
+	rcu_read_unlock_bh();
 
 	return inst;
 }
 
+static void nfulnl_instance_free_rcu(struct rcu_head *head)
+{
+	kfree(container_of(head, struct nfulnl_instance, rcu));
+	module_put(THIS_MODULE);
+}
+
 static void
 instance_put(struct nfulnl_instance *inst)
 {
-	if (inst && atomic_dec_and_test(&inst->use)) {
-		kfree(inst);
-		module_put(THIS_MODULE);
-	}
+	if (inst && atomic_dec_and_test(&inst->use))
+		call_rcu_bh(&inst->rcu, nfulnl_instance_free_rcu);
 }
 
 static void nfulnl_timer(unsigned long data);
@@ -132,7 +137,7 @@ instance_create(u_int16_t group_num, int pid)
 	struct nfulnl_instance *inst;
 	int err;
 
-	write_lock_bh(&instances_lock);
+	spin_lock_bh(&instances_lock);
 	if (__instance_lookup(group_num)) {
 		err = -EEXIST;
 		goto out_unlock;
@@ -169,12 +174,12 @@ instance_create(u_int16_t group_num, int pid)
 	hlist_add_head(&inst->hlist,
 		       &instance_table[instance_hashfn(group_num)]);
 
-	write_unlock_bh(&instances_lock);
+	spin_unlock_bh(&instances_lock);
 
 	return inst;
 
 out_unlock:
-	write_unlock_bh(&instances_lock);
+	spin_unlock_bh(&instances_lock);
 	return ERR_PTR(err);
 }
 
@@ -200,9 +205,9 @@ __instance_destroy(struct nfulnl_instance *inst)
 static inline void
 instance_destroy(struct nfulnl_instance *inst)
 {
-	write_lock_bh(&instances_lock);
+	spin_lock_bh(&instances_lock);
 	__instance_destroy(inst);
-	write_unlock_bh(&instances_lock);
+	spin_unlock_bh(&instances_lock);
 }
 
 static int
@@ -672,7 +677,7 @@ nfulnl_rcv_nl_event(struct notifier_block *this,
 		int i;
 
 		/* destroy all instances for this pid */
-		write_lock_bh(&instances_lock);
+		spin_lock_bh(&instances_lock);
 		for  (i = 0; i < INSTANCE_BUCKETS; i++) {
 			struct hlist_node *tmp, *t2;
 			struct nfulnl_instance *inst;
@@ -684,7 +689,7 @@ nfulnl_rcv_nl_event(struct notifier_block *this,
 					__instance_destroy(inst);
 			}
 		}
-		write_unlock_bh(&instances_lock);
+		spin_unlock_bh(&instances_lock);
 	}
 	return NOTIFY_DONE;
 }
@@ -861,19 +866,19 @@ static struct hlist_node *get_first(struct iter_state *st)
 
 	for (st->bucket = 0; st->bucket < INSTANCE_BUCKETS; st->bucket++) {
 		if (!hlist_empty(&instance_table[st->bucket]))
-			return instance_table[st->bucket].first;
+			return rcu_dereference_bh(instance_table[st->bucket].first);
 	}
 	return NULL;
 }
 
 static struct hlist_node *get_next(struct iter_state *st, struct hlist_node *h)
 {
-	h = h->next;
+	h = rcu_dereference_bh(h->next);
 	while (!h) {
 		if (++st->bucket >= INSTANCE_BUCKETS)
 			return NULL;
 
-		h = instance_table[st->bucket].first;
+		h = rcu_dereference_bh(instance_table[st->bucket].first);
 	}
 	return h;
 }
@@ -890,9 +895,9 @@ static struct hlist_node *get_idx(struct iter_state *st, loff_t pos)
 }
 
 static void *seq_start(struct seq_file *seq, loff_t *pos)
-	__acquires(instances_lock)
+	__acquires(rcu_bh)
 {
-	read_lock_bh(&instances_lock);
+	rcu_read_lock_bh();
 	return get_idx(seq->private, *pos);
 }
 
@@ -903,9 +908,9 @@ static void *seq_next(struct seq_file *s, void *v, loff_t *pos)
 }
 
 static void seq_stop(struct seq_file *s, void *v)
-	__releases(instances_lock)
+	__releases(rcu_bh)
 {
-	read_unlock_bh(&instances_lock);
+	rcu_read_unlock_bh();
 }
 
 static int seq_show(struct seq_file *s, void *v)
-- 
cgit v1.2.2


From 08c801f8d45387a1b46066aad1789a9bb9c4b645 Mon Sep 17 00:00:00 2001
From: Tim Gardner <tim.gardner@canonical.com>
Date: Tue, 8 Jun 2010 17:51:27 -0600
Subject: net: Print num_rx_queues imbalance warning only when there are
 allocated queues

BugLink: http://bugs.launchpad.net/bugs/591416

There are a number of network drivers (bridge, bonding, etc) that are not yet
receive multi-queue enabled and use alloc_netdev(), so don't print a
num_rx_queues imbalance warning in that case.

Also, only print the warning once for those drivers that _are_ multi-queue
enabled.

Signed-off-by: Tim Gardner <tim.gardner@canonical.com>
Acked-by: Eric Dumazet <eric.dumazet@gmail.com>
---
 net/core/dev.c | 8 +++-----
 1 file changed, 3 insertions(+), 5 deletions(-)

(limited to 'net')

diff --git a/net/core/dev.c b/net/core/dev.c
index d03470f5260a..14a85682af38 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -2253,11 +2253,9 @@ static int get_rps_cpu(struct net_device *dev, struct sk_buff *skb,
 	if (skb_rx_queue_recorded(skb)) {
 		u16 index = skb_get_rx_queue(skb);
 		if (unlikely(index >= dev->num_rx_queues)) {
-			if (net_ratelimit()) {
-				pr_warning("%s received packet on queue "
-					"%u, but number of RX queues is %u\n",
-					dev->name, index, dev->num_rx_queues);
-			}
+			WARN_ONCE(dev->num_rx_queues > 1, "%s received packet "
+				"on queue %u, but number of RX queues is %u\n",
+				dev->name, index, dev->num_rx_queues);
 			goto done;
 		}
 		rxqueue = dev->_rx + index;
-- 
cgit v1.2.2


From 88e7594a9775e54dcd421cb246406dce62e48bee Mon Sep 17 00:00:00 2001
From: Jiri Pirko <jpirko@redhat.com>
Date: Mon, 7 Jun 2010 03:27:39 +0000
Subject: phonet: use call_rcu for phonet device free

Use call_rcu rather than synchronize_rcu.

Signed-off-by: Jiri Pirko <jpirko@redhat.com>
Acked-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/phonet/pn_dev.c | 15 +++++++++++----
 1 file changed, 11 insertions(+), 4 deletions(-)

(limited to 'net')

diff --git a/net/phonet/pn_dev.c b/net/phonet/pn_dev.c
index c33da6576942..b18e48fae975 100644
--- a/net/phonet/pn_dev.c
+++ b/net/phonet/pn_dev.c
@@ -162,6 +162,14 @@ int phonet_address_add(struct net_device *dev, u8 addr)
 	return err;
 }
 
+static void phonet_device_rcu_free(struct rcu_head *head)
+{
+	struct phonet_device *pnd;
+
+	pnd = container_of(head, struct phonet_device, rcu);
+	kfree(pnd);
+}
+
 int phonet_address_del(struct net_device *dev, u8 addr)
 {
 	struct phonet_device_list *pndevs = phonet_device_list(dev_net(dev));
@@ -179,10 +187,9 @@ int phonet_address_del(struct net_device *dev, u8 addr)
 		pnd = NULL;
 	mutex_unlock(&pndevs->lock);
 
-	if (pnd) {
-		synchronize_rcu();
-		kfree(pnd);
-	}
+	if (pnd)
+		call_rcu(&pnd->rcu, phonet_device_rcu_free);
+
 	return err;
 }
 
-- 
cgit v1.2.2


From aea34e7ae7a40bc72f9f11b5658160dfb4b90c48 Mon Sep 17 00:00:00 2001
From: Dan Carpenter <error27@gmail.com>
Date: Mon, 7 Jun 2010 04:51:58 +0000
Subject: caif: fix a couple range checks

The extra ! character means that these conditions are always false.

Signed-off-by: Dan Carpenter <error27@gmail.com>
Acked-by: Sjur Braendeland <sjur.brandeland@stericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/caif/cfrfml.c | 2 +-
 net/caif/cfveil.c | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/caif/cfrfml.c b/net/caif/cfrfml.c
index cd2830fec935..fd27b172fb5d 100644
--- a/net/caif/cfrfml.c
+++ b/net/caif/cfrfml.c
@@ -83,7 +83,7 @@ static int cfrfml_transmit(struct cflayer *layr, struct cfpkt *pkt)
 	if (!cfsrvl_ready(service, &ret))
 		return ret;
 
-	if (!cfpkt_getlen(pkt) > CAIF_MAX_PAYLOAD_SIZE) {
+	if (cfpkt_getlen(pkt) > CAIF_MAX_PAYLOAD_SIZE) {
 		pr_err("CAIF: %s():Packet too large - size=%d\n",
 			__func__, cfpkt_getlen(pkt));
 		return -EOVERFLOW;
diff --git a/net/caif/cfveil.c b/net/caif/cfveil.c
index 0fd827f49491..e04f7d964e83 100644
--- a/net/caif/cfveil.c
+++ b/net/caif/cfveil.c
@@ -84,7 +84,7 @@ static int cfvei_transmit(struct cflayer *layr, struct cfpkt *pkt)
 		return ret;
 	caif_assert(layr->dn != NULL);
 	caif_assert(layr->dn->transmit != NULL);
-	if (!cfpkt_getlen(pkt) > CAIF_MAX_PAYLOAD_SIZE) {
+	if (cfpkt_getlen(pkt) > CAIF_MAX_PAYLOAD_SIZE) {
 		pr_warning("CAIF: %s(): Packet too large - size=%d\n",
 			   __func__, cfpkt_getlen(pkt));
 		return -EOVERFLOW;
-- 
cgit v1.2.2


From cfa087f689402438e3cb0f077e649d01c871b0e7 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Mon, 7 Jun 2010 22:34:35 +0000
Subject: icmp: RCU conversion in icmp_address_reply()

- rcu_read_lock() already held by caller
- use __in_dev_get_rcu() instead of in_dev_get() / in_dev_put()
- remove goto out;

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/icmp.c | 12 +++++-------
 1 file changed, 5 insertions(+), 7 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c
index d65e9215bcd7..bdb6c71e72a6 100644
--- a/net/ipv4/icmp.c
+++ b/net/ipv4/icmp.c
@@ -925,6 +925,7 @@ static void icmp_address(struct sk_buff *skb)
 /*
  * RFC1812 (4.3.3.9).	A router SHOULD listen all replies, and complain
  *			loudly if an inconsistency is found.
+ * called with rcu_read_lock()
  */
 
 static void icmp_address_reply(struct sk_buff *skb)
@@ -935,12 +936,12 @@ static void icmp_address_reply(struct sk_buff *skb)
 	struct in_ifaddr *ifa;
 
 	if (skb->len < 4 || !(rt->rt_flags&RTCF_DIRECTSRC))
-		goto out;
+		return;
 
-	in_dev = in_dev_get(dev);
+	in_dev = __in_dev_get_rcu(dev);
 	if (!in_dev)
-		goto out;
-	rcu_read_lock();
+		return;
+
 	if (in_dev->ifa_list &&
 	    IN_DEV_LOG_MARTIANS(in_dev) &&
 	    IN_DEV_FORWARD(in_dev)) {
@@ -958,9 +959,6 @@ static void icmp_address_reply(struct sk_buff *skb)
 			       mp, dev->name, &rt->rt_src);
 		}
 	}
-	rcu_read_unlock();
-	in_dev_put(in_dev);
-out:;
 }
 
 static void icmp_discard(struct sk_buff *skb)
-- 
cgit v1.2.2


From 96b52e61be1ad4d4f8de39b9deaf253da804ea3b Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Mon, 7 Jun 2010 21:05:02 +0000
Subject: ipv6: mcast: RCU conversions

- ipv6_sock_mc_join() : doesnt touch dev refcount

- ipv6_sock_mc_drop() : doesnt touch dev/idev refcounts

- ip6_mc_find_dev() becomes ip6_mc_find_dev_rcu() (called from rcu),
                    and doesnt touch dev/idev refcounts

- ipv6_sock_mc_close() : doesnt touch dev/idev refcounts

- ip6_mc_source() uses ip6_mc_find_dev_rcu()

- ip6_mc_msfilter() uses ip6_mc_find_dev_rcu()

- ip6_mc_msfget() uses ip6_mc_find_dev_rcu()

- ipv6_dev_mc_dec(), ipv6_chk_mcast_addr(),
  igmp6_event_query(), igmp6_event_report(),
  mld_sendpack(), igmp6_send() dont touch idev refcount

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/mcast.c | 183 ++++++++++++++++++++++++++-----------------------------
 1 file changed, 87 insertions(+), 96 deletions(-)

(limited to 'net')

diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c
index 8752e8084806..3e36d1538b6e 100644
--- a/net/ipv6/mcast.c
+++ b/net/ipv6/mcast.c
@@ -152,18 +152,19 @@ int ipv6_sock_mc_join(struct sock *sk, int ifindex, const struct in6_addr *addr)
 	mc_lst->next = NULL;
 	ipv6_addr_copy(&mc_lst->addr, addr);
 
+	rcu_read_lock();
 	if (ifindex == 0) {
 		struct rt6_info *rt;
 		rt = rt6_lookup(net, addr, NULL, 0, 0);
 		if (rt) {
 			dev = rt->rt6i_dev;
-			dev_hold(dev);
 			dst_release(&rt->u.dst);
 		}
 	} else
-		dev = dev_get_by_index(net, ifindex);
+		dev = dev_get_by_index_rcu(net, ifindex);
 
 	if (dev == NULL) {
+		rcu_read_unlock();
 		sock_kfree_s(sk, mc_lst, sizeof(*mc_lst));
 		return -ENODEV;
 	}
@@ -180,8 +181,8 @@ int ipv6_sock_mc_join(struct sock *sk, int ifindex, const struct in6_addr *addr)
 	err = ipv6_dev_mc_inc(dev, addr);
 
 	if (err) {
+		rcu_read_unlock();
 		sock_kfree_s(sk, mc_lst, sizeof(*mc_lst));
-		dev_put(dev);
 		return err;
 	}
 
@@ -190,7 +191,7 @@ int ipv6_sock_mc_join(struct sock *sk, int ifindex, const struct in6_addr *addr)
 	np->ipv6_mc_list = mc_lst;
 	write_unlock_bh(&ipv6_sk_mc_lock);
 
-	dev_put(dev);
+	rcu_read_unlock();
 
 	return 0;
 }
@@ -213,18 +214,17 @@ int ipv6_sock_mc_drop(struct sock *sk, int ifindex, const struct in6_addr *addr)
 			*lnk = mc_lst->next;
 			write_unlock_bh(&ipv6_sk_mc_lock);
 
-			dev = dev_get_by_index(net, mc_lst->ifindex);
+			rcu_read_lock();
+			dev = dev_get_by_index_rcu(net, mc_lst->ifindex);
 			if (dev != NULL) {
-				struct inet6_dev *idev = in6_dev_get(dev);
+				struct inet6_dev *idev = __in6_dev_get(dev);
 
 				(void) ip6_mc_leave_src(sk, mc_lst, idev);
-				if (idev) {
+				if (idev)
 					__ipv6_dev_mc_dec(idev, &mc_lst->addr);
-					in6_dev_put(idev);
-				}
-				dev_put(dev);
 			} else
 				(void) ip6_mc_leave_src(sk, mc_lst, NULL);
+			rcu_read_unlock();
 			sock_kfree_s(sk, mc_lst, sizeof(*mc_lst));
 			return 0;
 		}
@@ -234,43 +234,36 @@ int ipv6_sock_mc_drop(struct sock *sk, int ifindex, const struct in6_addr *addr)
 	return -EADDRNOTAVAIL;
 }
 
-static struct inet6_dev *ip6_mc_find_dev(struct net *net,
-					 struct in6_addr *group,
-					 int ifindex)
+/* called with rcu_read_lock() */
+static struct inet6_dev *ip6_mc_find_dev_rcu(struct net *net,
+					     struct in6_addr *group,
+					     int ifindex)
 {
 	struct net_device *dev = NULL;
 	struct inet6_dev *idev = NULL;
 
 	if (ifindex == 0) {
-		struct rt6_info *rt;
+		struct rt6_info *rt = rt6_lookup(net, group, NULL, 0, 0);
 
-		rt = rt6_lookup(net, group, NULL, 0, 0);
 		if (rt) {
 			dev = rt->rt6i_dev;
 			dev_hold(dev);
 			dst_release(&rt->u.dst);
 		}
 	} else
-		dev = dev_get_by_index(net, ifindex);
+		dev = dev_get_by_index_rcu(net, ifindex);
 
 	if (!dev)
-		goto nodev;
-	idev = in6_dev_get(dev);
+		return NULL;
+	idev = __in6_dev_get(dev);
 	if (!idev)
-		goto release;
+		return NULL;;
 	read_lock_bh(&idev->lock);
-	if (idev->dead)
-		goto unlock_release;
-
+	if (idev->dead) {
+		read_unlock_bh(&idev->lock);
+		return NULL;
+	}
 	return idev;
-
-unlock_release:
-	read_unlock_bh(&idev->lock);
-	in6_dev_put(idev);
-release:
-	dev_put(dev);
-nodev:
-	return NULL;
 }
 
 void ipv6_sock_mc_close(struct sock *sk)
@@ -286,19 +279,17 @@ void ipv6_sock_mc_close(struct sock *sk)
 		np->ipv6_mc_list = mc_lst->next;
 		write_unlock_bh(&ipv6_sk_mc_lock);
 
-		dev = dev_get_by_index(net, mc_lst->ifindex);
+		rcu_read_lock();
+		dev = dev_get_by_index_rcu(net, mc_lst->ifindex);
 		if (dev) {
-			struct inet6_dev *idev = in6_dev_get(dev);
+			struct inet6_dev *idev = __in6_dev_get(dev);
 
 			(void) ip6_mc_leave_src(sk, mc_lst, idev);
-			if (idev) {
+			if (idev)
 				__ipv6_dev_mc_dec(idev, &mc_lst->addr);
-				in6_dev_put(idev);
-			}
-			dev_put(dev);
 		} else
 			(void) ip6_mc_leave_src(sk, mc_lst, NULL);
-
+		rcu_read_unlock();
 		sock_kfree_s(sk, mc_lst, sizeof(*mc_lst));
 
 		write_lock_bh(&ipv6_sk_mc_lock);
@@ -327,14 +318,17 @@ int ip6_mc_source(int add, int omode, struct sock *sk,
 	if (!ipv6_addr_is_multicast(group))
 		return -EINVAL;
 
-	idev = ip6_mc_find_dev(net, group, pgsr->gsr_interface);
-	if (!idev)
+	rcu_read_lock();
+	idev = ip6_mc_find_dev_rcu(net, group, pgsr->gsr_interface);
+	if (!idev) {
+		rcu_read_unlock();
 		return -ENODEV;
+	}
 	dev = idev->dev;
 
 	err = -EADDRNOTAVAIL;
 
-	read_lock_bh(&ipv6_sk_mc_lock);
+	read_lock(&ipv6_sk_mc_lock);
 	for (pmc=inet6->ipv6_mc_list; pmc; pmc=pmc->next) {
 		if (pgsr->gsr_interface && pmc->ifindex != pgsr->gsr_interface)
 			continue;
@@ -358,7 +352,7 @@ int ip6_mc_source(int add, int omode, struct sock *sk,
 		pmc->sfmode = omode;
 	}
 
-	write_lock_bh(&pmc->sflock);
+	write_lock(&pmc->sflock);
 	pmclocked = 1;
 
 	psl = pmc->sflist;
@@ -433,11 +427,10 @@ int ip6_mc_source(int add, int omode, struct sock *sk,
 	ip6_mc_add_src(idev, group, omode, 1, source, 1);
 done:
 	if (pmclocked)
-		write_unlock_bh(&pmc->sflock);
-	read_unlock_bh(&ipv6_sk_mc_lock);
+		write_unlock(&pmc->sflock);
+	read_unlock(&ipv6_sk_mc_lock);
 	read_unlock_bh(&idev->lock);
-	in6_dev_put(idev);
-	dev_put(dev);
+	rcu_read_unlock();
 	if (leavegroup)
 		return ipv6_sock_mc_drop(sk, pgsr->gsr_interface, group);
 	return err;
@@ -463,14 +456,17 @@ int ip6_mc_msfilter(struct sock *sk, struct group_filter *gsf)
 	    gsf->gf_fmode != MCAST_EXCLUDE)
 		return -EINVAL;
 
-	idev = ip6_mc_find_dev(net, group, gsf->gf_interface);
+	rcu_read_lock();
+	idev = ip6_mc_find_dev_rcu(net, group, gsf->gf_interface);
 
-	if (!idev)
+	if (!idev) {
+		rcu_read_unlock();
 		return -ENODEV;
+	}
 	dev = idev->dev;
 
 	err = 0;
-	read_lock_bh(&ipv6_sk_mc_lock);
+	read_lock(&ipv6_sk_mc_lock);
 
 	if (gsf->gf_fmode == MCAST_INCLUDE && gsf->gf_numsrc == 0) {
 		leavegroup = 1;
@@ -512,7 +508,7 @@ int ip6_mc_msfilter(struct sock *sk, struct group_filter *gsf)
 		(void) ip6_mc_add_src(idev, group, gsf->gf_fmode, 0, NULL, 0);
 	}
 
-	write_lock_bh(&pmc->sflock);
+	write_lock(&pmc->sflock);
 	psl = pmc->sflist;
 	if (psl) {
 		(void) ip6_mc_del_src(idev, group, pmc->sfmode,
@@ -522,13 +518,12 @@ int ip6_mc_msfilter(struct sock *sk, struct group_filter *gsf)
 		(void) ip6_mc_del_src(idev, group, pmc->sfmode, 0, NULL, 0);
 	pmc->sflist = newpsl;
 	pmc->sfmode = gsf->gf_fmode;
-	write_unlock_bh(&pmc->sflock);
+	write_unlock(&pmc->sflock);
 	err = 0;
 done:
-	read_unlock_bh(&ipv6_sk_mc_lock);
+	read_unlock(&ipv6_sk_mc_lock);
 	read_unlock_bh(&idev->lock);
-	in6_dev_put(idev);
-	dev_put(dev);
+	rcu_read_unlock();
 	if (leavegroup)
 		err = ipv6_sock_mc_drop(sk, gsf->gf_interface, group);
 	return err;
@@ -551,11 +546,13 @@ int ip6_mc_msfget(struct sock *sk, struct group_filter *gsf,
 	if (!ipv6_addr_is_multicast(group))
 		return -EINVAL;
 
-	idev = ip6_mc_find_dev(net, group, gsf->gf_interface);
+	rcu_read_lock();
+	idev = ip6_mc_find_dev_rcu(net, group, gsf->gf_interface);
 
-	if (!idev)
+	if (!idev) {
+		rcu_read_unlock();
 		return -ENODEV;
-
+	}
 	dev = idev->dev;
 
 	err = -EADDRNOTAVAIL;
@@ -577,8 +574,7 @@ int ip6_mc_msfget(struct sock *sk, struct group_filter *gsf,
 	psl = pmc->sflist;
 	count = psl ? psl->sl_count : 0;
 	read_unlock_bh(&idev->lock);
-	in6_dev_put(idev);
-	dev_put(dev);
+	rcu_read_unlock();
 
 	copycount = count < gsf->gf_numsrc ? count : gsf->gf_numsrc;
 	gsf->gf_numsrc = count;
@@ -604,8 +600,7 @@ int ip6_mc_msfget(struct sock *sk, struct group_filter *gsf,
 	return 0;
 done:
 	read_unlock_bh(&idev->lock);
-	in6_dev_put(idev);
-	dev_put(dev);
+	rcu_read_unlock();
 	return err;
 }
 
@@ -822,6 +817,7 @@ int ipv6_dev_mc_inc(struct net_device *dev, const struct in6_addr *addr)
 	struct ifmcaddr6 *mc;
 	struct inet6_dev *idev;
 
+	/* we need to take a reference on idev */
 	idev = in6_dev_get(dev);
 
 	if (idev == NULL)
@@ -860,7 +856,7 @@ int ipv6_dev_mc_inc(struct net_device *dev, const struct in6_addr *addr)
 	setup_timer(&mc->mca_timer, igmp6_timer_handler, (unsigned long)mc);
 
 	ipv6_addr_copy(&mc->mca_addr, addr);
-	mc->idev = idev;
+	mc->idev = idev; /* (reference taken) */
 	mc->mca_users = 1;
 	/* mca_stamp should be updated upon changes */
 	mc->mca_cstamp = mc->mca_tstamp = jiffies;
@@ -915,16 +911,18 @@ int __ipv6_dev_mc_dec(struct inet6_dev *idev, const struct in6_addr *addr)
 
 int ipv6_dev_mc_dec(struct net_device *dev, const struct in6_addr *addr)
 {
-	struct inet6_dev *idev = in6_dev_get(dev);
+	struct inet6_dev *idev;
 	int err;
 
-	if (!idev)
-		return -ENODEV;
-
-	err = __ipv6_dev_mc_dec(idev, addr);
+	rcu_read_lock();
 
-	in6_dev_put(idev);
+	idev = __in6_dev_get(dev);
+	if (!idev)
+		err = -ENODEV;
+	else
+		err = __ipv6_dev_mc_dec(idev, addr);
 
+	rcu_read_unlock();
 	return err;
 }
 
@@ -965,7 +963,8 @@ int ipv6_chk_mcast_addr(struct net_device *dev, const struct in6_addr *group,
 	struct ifmcaddr6 *mc;
 	int rv = 0;
 
-	idev = in6_dev_get(dev);
+	rcu_read_lock();
+	idev = __in6_dev_get(dev);
 	if (idev) {
 		read_lock_bh(&idev->lock);
 		for (mc = idev->mc_list; mc; mc=mc->next) {
@@ -992,8 +991,8 @@ int ipv6_chk_mcast_addr(struct net_device *dev, const struct in6_addr *group,
 				rv = 1; /* don't filter unspecified source */
 		}
 		read_unlock_bh(&idev->lock);
-		in6_dev_put(idev);
 	}
+	rcu_read_unlock();
 	return rv;
 }
 
@@ -1104,6 +1103,7 @@ static int mld_marksources(struct ifmcaddr6 *pmc, int nsrcs,
 	return 1;
 }
 
+/* called with rcu_read_lock() */
 int igmp6_event_query(struct sk_buff *skb)
 {
 	struct mld2_query *mlh2 = NULL;
@@ -1127,7 +1127,7 @@ int igmp6_event_query(struct sk_buff *skb)
 	if (!(ipv6_addr_type(&ipv6_hdr(skb)->saddr) & IPV6_ADDR_LINKLOCAL))
 		return -EINVAL;
 
-	idev = in6_dev_get(skb->dev);
+	idev = __in6_dev_get(skb->dev);
 
 	if (idev == NULL)
 		return 0;
@@ -1137,10 +1137,8 @@ int igmp6_event_query(struct sk_buff *skb)
 	group_type = ipv6_addr_type(group);
 
 	if (group_type != IPV6_ADDR_ANY &&
-	    !(group_type&IPV6_ADDR_MULTICAST)) {
-		in6_dev_put(idev);
+	    !(group_type&IPV6_ADDR_MULTICAST))
 		return -EINVAL;
-	}
 
 	if (len == 24) {
 		int switchback;
@@ -1161,10 +1159,9 @@ int igmp6_event_query(struct sk_buff *skb)
 	} else if (len >= 28) {
 		int srcs_offset = sizeof(struct mld2_query) -
 				  sizeof(struct icmp6hdr);
-		if (!pskb_may_pull(skb, srcs_offset)) {
-			in6_dev_put(idev);
+		if (!pskb_may_pull(skb, srcs_offset))
 			return -EINVAL;
-		}
+
 		mlh2 = (struct mld2_query *)skb_transport_header(skb);
 		max_delay = (MLDV2_MRC(ntohs(mlh2->mld2q_mrc))*HZ)/1000;
 		if (!max_delay)
@@ -1173,28 +1170,23 @@ int igmp6_event_query(struct sk_buff *skb)
 		if (mlh2->mld2q_qrv)
 			idev->mc_qrv = mlh2->mld2q_qrv;
 		if (group_type == IPV6_ADDR_ANY) { /* general query */
-			if (mlh2->mld2q_nsrcs) {
-				in6_dev_put(idev);
+			if (mlh2->mld2q_nsrcs)
 				return -EINVAL; /* no sources allowed */
-			}
+
 			mld_gq_start_timer(idev);
-			in6_dev_put(idev);
 			return 0;
 		}
 		/* mark sources to include, if group & source-specific */
 		if (mlh2->mld2q_nsrcs != 0) {
 			if (!pskb_may_pull(skb, srcs_offset +
-			    ntohs(mlh2->mld2q_nsrcs) * sizeof(struct in6_addr))) {
-				in6_dev_put(idev);
+			    ntohs(mlh2->mld2q_nsrcs) * sizeof(struct in6_addr)))
 				return -EINVAL;
-			}
+
 			mlh2 = (struct mld2_query *)skb_transport_header(skb);
 			mark = 1;
 		}
-	} else {
-		in6_dev_put(idev);
+	} else
 		return -EINVAL;
-	}
 
 	read_lock_bh(&idev->lock);
 	if (group_type == IPV6_ADDR_ANY) {
@@ -1227,12 +1219,11 @@ int igmp6_event_query(struct sk_buff *skb)
 		}
 	}
 	read_unlock_bh(&idev->lock);
-	in6_dev_put(idev);
 
 	return 0;
 }
 
-
+/* called with rcu_read_lock() */
 int igmp6_event_report(struct sk_buff *skb)
 {
 	struct ifmcaddr6 *ma;
@@ -1260,7 +1251,7 @@ int igmp6_event_report(struct sk_buff *skb)
 	    !(addr_type&IPV6_ADDR_LINKLOCAL))
 		return -EINVAL;
 
-	idev = in6_dev_get(skb->dev);
+	idev = __in6_dev_get(skb->dev);
 	if (idev == NULL)
 		return -ENODEV;
 
@@ -1280,7 +1271,6 @@ int igmp6_event_report(struct sk_buff *skb)
 		}
 	}
 	read_unlock_bh(&idev->lock);
-	in6_dev_put(idev);
 	return 0;
 }
 
@@ -1396,12 +1386,14 @@ static void mld_sendpack(struct sk_buff *skb)
 	struct mld2_report *pmr =
 			      (struct mld2_report *)skb_transport_header(skb);
 	int payload_len, mldlen;
-	struct inet6_dev *idev = in6_dev_get(skb->dev);
+	struct inet6_dev *idev;
 	struct net *net = dev_net(skb->dev);
 	int err;
 	struct flowi fl;
 	struct dst_entry *dst;
 
+	rcu_read_lock();
+	idev = __in6_dev_get(skb->dev);
 	IP6_UPD_PO_STATS(net, idev, IPSTATS_MIB_OUT, skb->len);
 
 	payload_len = (skb->tail - skb->network_header) - sizeof(*pip6);
@@ -1441,8 +1433,7 @@ out:
 	} else
 		IP6_INC_STATS_BH(net, idev, IPSTATS_MIB_OUTDISCARDS);
 
-	if (likely(idev != NULL))
-		in6_dev_put(idev);
+	rcu_read_unlock();
 	return;
 
 err_out:
@@ -1779,7 +1770,8 @@ static void igmp6_send(struct in6_addr *addr, struct net_device *dev, int type)
 					 IPPROTO_ICMPV6,
 					 csum_partial(hdr, len, 0));
 
-	idev = in6_dev_get(skb->dev);
+	rcu_read_lock();
+	idev = __in6_dev_get(skb->dev);
 
 	dst = icmp6_dst_alloc(skb->dev, NULL, &ipv6_hdr(skb)->daddr);
 	if (!dst) {
@@ -1806,8 +1798,7 @@ out:
 	} else
 		IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTDISCARDS);
 
-	if (likely(idev != NULL))
-		in6_dev_put(idev);
+	rcu_read_unlock();
 	return;
 
 err_out:
-- 
cgit v1.2.2


From 00d9d6a185de89edc0649ca4ead58f0283dfcbac Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Mon, 7 Jun 2010 22:24:44 +0000
Subject: ipv6: fix ICMP6_MIB_OUTERRORS

In commit 1f8438a85366 (icmp: Account for ICMP out errors), I did a typo
on IPV6 side, using ICMP6_MIB_OUTMSGS instead of ICMP6_MIB_OUTERRORS

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/icmp.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c
index ce7992982557..03e62f94ff8e 100644
--- a/net/ipv6/icmp.c
+++ b/net/ipv6/icmp.c
@@ -483,7 +483,7 @@ route_done:
 			      np->tclass, NULL, &fl, (struct rt6_info*)dst,
 			      MSG_DONTWAIT, np->dontfrag);
 	if (err) {
-		ICMP6_INC_STATS_BH(net, idev, ICMP6_MIB_OUTMSGS);
+		ICMP6_INC_STATS_BH(net, idev, ICMP6_MIB_OUTERRORS);
 		ip6_flush_pending_frames(sk);
 		goto out_put;
 	}
@@ -565,7 +565,7 @@ static void icmpv6_echo_reply(struct sk_buff *skb)
 				np->dontfrag);
 
 	if (err) {
-		ICMP6_INC_STATS_BH(net, idev, ICMP6_MIB_OUTMSGS);
+		ICMP6_INC_STATS_BH(net, idev, ICMP6_MIB_OUTERRORS);
 		ip6_flush_pending_frames(sk);
 		goto out_put;
 	}
-- 
cgit v1.2.2


From 597a264b1a9c7e36d1728f677c66c5c1f7e3b837 Mon Sep 17 00:00:00 2001
From: John Fastabend <john.r.fastabend@intel.com>
Date: Thu, 3 Jun 2010 09:30:11 +0000
Subject: net: deliver skbs on inactive slaves to exact matches

Currently, the accelerated receive path for VLAN's will
drop packets if the real device is an inactive slave and
is not one of the special pkts tested for in
skb_bond_should_drop().  This behavior is different then
the non-accelerated path and for pkts over a bonded vlan.

For example,

vlanx -> bond0 -> ethx

will be dropped in the vlan path and not delivered to any
packet handlers at all.  However,

bond0 -> vlanx -> ethx

and

bond0 -> ethx

will be delivered to handlers that match the exact dev,
because the VLAN path checks the real_dev which is not a
slave and netif_recv_skb() doesn't drop frames but only
delivers them to exact matches.

This patch adds a sk_buff flag which is used for tagging
skbs that would previously been dropped and allows the
skb to continue to skb_netif_recv().  Here we add
logic to check for the deliver_no_wcard flag and if it
is set only deliver to handlers that match exactly.  This
makes both paths above consistent and gives pkt handlers
a way to identify skbs that come from inactive slaves.
Without this patch in some configurations skbs will be
delivered to handlers with exact matches and in others
be dropped out right in the vlan path.

I have tested the following 4 configurations in failover modes
and load balancing modes.

# bond0 -> ethx

# vlanx -> bond0 -> ethx

# bond0 -> vlanx -> ethx

# bond0 -> ethx
            |
  vlanx -> --

Signed-off-by: John Fastabend <john.r.fastabend@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/8021q/vlan_core.c |  4 ++--
 net/core/dev.c        | 17 ++++++++++++++---
 2 files changed, 16 insertions(+), 5 deletions(-)

(limited to 'net')

diff --git a/net/8021q/vlan_core.c b/net/8021q/vlan_core.c
index bd537fc10254..50f58f5f1c34 100644
--- a/net/8021q/vlan_core.c
+++ b/net/8021q/vlan_core.c
@@ -12,7 +12,7 @@ int __vlan_hwaccel_rx(struct sk_buff *skb, struct vlan_group *grp,
 		return NET_RX_DROP;
 
 	if (skb_bond_should_drop(skb, ACCESS_ONCE(skb->dev->master)))
-		goto drop;
+		skb->deliver_no_wcard = 1;
 
 	skb->skb_iif = skb->dev->ifindex;
 	__vlan_hwaccel_put_tag(skb, vlan_tci);
@@ -84,7 +84,7 @@ vlan_gro_common(struct napi_struct *napi, struct vlan_group *grp,
 	struct sk_buff *p;
 
 	if (skb_bond_should_drop(skb, ACCESS_ONCE(skb->dev->master)))
-		goto drop;
+		skb->deliver_no_wcard = 1;
 
 	skb->skb_iif = skb->dev->ifindex;
 	__vlan_hwaccel_put_tag(skb, vlan_tci);
diff --git a/net/core/dev.c b/net/core/dev.c
index 14a85682af38..2b3bf53bc687 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -2810,13 +2810,24 @@ static int __netif_receive_skb(struct sk_buff *skb)
 	if (!skb->skb_iif)
 		skb->skb_iif = skb->dev->ifindex;
 
+	/*
+	 * bonding note: skbs received on inactive slaves should only
+	 * be delivered to pkt handlers that are exact matches.  Also
+	 * the deliver_no_wcard flag will be set.  If packet handlers
+	 * are sensitive to duplicate packets these skbs will need to
+	 * be dropped at the handler.  The vlan accel path may have
+	 * already set the deliver_no_wcard flag.
+	 */
 	null_or_orig = NULL;
 	orig_dev = skb->dev;
 	master = ACCESS_ONCE(orig_dev->master);
-	if (master) {
-		if (skb_bond_should_drop(skb, master))
+	if (skb->deliver_no_wcard)
+		null_or_orig = orig_dev;
+	else if (master) {
+		if (skb_bond_should_drop(skb, master)) {
+			skb->deliver_no_wcard = 1;
 			null_or_orig = orig_dev; /* deliver only exact match */
-		else
+		} else
 			skb->dev = master;
 	}
 
-- 
cgit v1.2.2


From 592fcb9dfafaa02dd0edc207bf5d3a0ee7a1f8df Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Wed, 9 Jun 2010 16:21:07 +0000
Subject: ip: ip_ra_control() rcu fix

commit 66018506e15b (ip: Router Alert RCU conversion) introduced RCU
lookups to ip_call_ra_chain(). It missed proper deinit phase :
When ip_ra_control() deletes an ip_ra_chain, it should make sure
ip_call_ra_chain() users can not start to use socket during the rcu
grace period. It should also delay the sock_put() after the grace
period, or we risk a premature socket freeing and corruptions, as
raw sockets are not rcu protected yet.

This delay avoids using expensive atomic_inc_not_zero() in
ip_call_ra_chain().

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/ip_sockglue.c | 19 +++++++++++++++----
 1 file changed, 15 insertions(+), 4 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c
index 08b9519a24f4..47fff528ff39 100644
--- a/net/ipv4/ip_sockglue.c
+++ b/net/ipv4/ip_sockglue.c
@@ -241,9 +241,13 @@ int ip_cmsg_send(struct net *net, struct msghdr *msg, struct ipcm_cookie *ipc)
 struct ip_ra_chain *ip_ra_chain;
 static DEFINE_SPINLOCK(ip_ra_lock);
 
-static void ip_ra_free_rcu(struct rcu_head *head)
+
+static void ip_ra_destroy_rcu(struct rcu_head *head)
 {
-	kfree(container_of(head, struct ip_ra_chain, rcu));
+	struct ip_ra_chain *ra = container_of(head, struct ip_ra_chain, rcu);
+
+	sock_put(ra->saved_sk);
+	kfree(ra);
 }
 
 int ip_ra_control(struct sock *sk, unsigned char on,
@@ -264,13 +268,20 @@ int ip_ra_control(struct sock *sk, unsigned char on,
 				kfree(new_ra);
 				return -EADDRINUSE;
 			}
+			/* dont let ip_call_ra_chain() use sk again */
+			ra->sk = NULL;
 			rcu_assign_pointer(*rap, ra->next);
 			spin_unlock_bh(&ip_ra_lock);
 
 			if (ra->destructor)
 				ra->destructor(sk);
-			sock_put(sk);
-			call_rcu(&ra->rcu, ip_ra_free_rcu);
+			/*
+			 * Delay sock_put(sk) and kfree(ra) after one rcu grace
+			 * period. This guarantee ip_call_ra_chain() dont need
+			 * to mess with socket refcounts.
+			 */
+			ra->saved_sk = sk;
+			call_rcu(&ra->rcu, ip_ra_destroy_rcu);
 			return 0;
 		}
 	}
-- 
cgit v1.2.2


From ae638c47dc040b8def16d05dc6acdd527628f231 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Tue, 8 Jun 2010 23:39:10 +0000
Subject: pkt_sched: gen_estimator: add a new lock

gen_kill_estimator() / gen_new_estimator() is not always called with
RTNL held.

net/netfilter/xt_RATEEST.c is one user of these API that do not hold
RTNL, so random corruptions can occur between "tc" and "iptables".

Add a new fine grained lock instead of trying to use RTNL in netfilter.

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/gen_estimator.c | 15 ++++++++++++---
 1 file changed, 12 insertions(+), 3 deletions(-)

(limited to 'net')

diff --git a/net/core/gen_estimator.c b/net/core/gen_estimator.c
index cf8e70392fe0..785e5276a300 100644
--- a/net/core/gen_estimator.c
+++ b/net/core/gen_estimator.c
@@ -107,6 +107,7 @@ static DEFINE_RWLOCK(est_lock);
 
 /* Protects against soft lockup during large deletion */
 static struct rb_root est_root = RB_ROOT;
+static DEFINE_SPINLOCK(est_tree_lock);
 
 static void est_timer(unsigned long arg)
 {
@@ -201,7 +202,6 @@ struct gen_estimator *gen_find_node(const struct gnet_stats_basic_packed *bstats
  *
  * Returns 0 on success or a negative error code.
  *
- * NOTE: Called under rtnl_mutex
  */
 int gen_new_estimator(struct gnet_stats_basic_packed *bstats,
 		      struct gnet_stats_rate_est *rate_est,
@@ -232,6 +232,7 @@ int gen_new_estimator(struct gnet_stats_basic_packed *bstats,
 	est->last_packets = bstats->packets;
 	est->avpps = rate_est->pps<<10;
 
+	spin_lock(&est_tree_lock);
 	if (!elist[idx].timer.function) {
 		INIT_LIST_HEAD(&elist[idx].list);
 		setup_timer(&elist[idx].timer, est_timer, idx);
@@ -242,6 +243,7 @@ int gen_new_estimator(struct gnet_stats_basic_packed *bstats,
 
 	list_add_rcu(&est->list, &elist[idx].list);
 	gen_add_node(est);
+	spin_unlock(&est_tree_lock);
 
 	return 0;
 }
@@ -261,13 +263,13 @@ static void __gen_kill_estimator(struct rcu_head *head)
  *
  * Removes the rate estimator specified by &bstats and &rate_est.
  *
- * NOTE: Called under rtnl_mutex
  */
 void gen_kill_estimator(struct gnet_stats_basic_packed *bstats,
 			struct gnet_stats_rate_est *rate_est)
 {
 	struct gen_estimator *e;
 
+	spin_lock(&est_tree_lock);
 	while ((e = gen_find_node(bstats, rate_est))) {
 		rb_erase(&e->node, &est_root);
 
@@ -278,6 +280,7 @@ void gen_kill_estimator(struct gnet_stats_basic_packed *bstats,
 		list_del_rcu(&e->list);
 		call_rcu(&e->e_rcu, __gen_kill_estimator);
 	}
+	spin_unlock(&est_tree_lock);
 }
 EXPORT_SYMBOL(gen_kill_estimator);
 
@@ -312,8 +315,14 @@ EXPORT_SYMBOL(gen_replace_estimator);
 bool gen_estimator_active(const struct gnet_stats_basic_packed *bstats,
 			  const struct gnet_stats_rate_est *rate_est)
 {
+	bool res;
+
 	ASSERT_RTNL();
 
-	return gen_find_node(bstats, rate_est) != NULL;
+	spin_lock(&est_tree_lock);
+	res = gen_find_node(bstats, rate_est) != NULL;
+	spin_unlock(&est_tree_lock);
+
+	return res;
 }
 EXPORT_SYMBOL(gen_estimator_active);
-- 
cgit v1.2.2


From 07a0f0f07a68014c92c752a5598102372bddf46e Mon Sep 17 00:00:00 2001
From: Daniel Turull <daniel.turull@gmail.com>
Date: Thu, 10 Jun 2010 23:08:11 -0700
Subject: pktgen: Fix accuracy of inter-packet delay.

This patch correct a bug in the delay of pktgen.
It makes sure the inter-packet interval is accurate.

Signed-off-by: Daniel Turull <daniel.turull@gmail.com>
Signed-off-by: Robert Olsson <robert.olsson@its.uu.se>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/pktgen.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/core/pktgen.c b/net/core/pktgen.c
index 2ad68da418df..1dacd7ba8dbb 100644
--- a/net/core/pktgen.c
+++ b/net/core/pktgen.c
@@ -2170,7 +2170,7 @@ static void spin(struct pktgen_dev *pkt_dev, ktime_t spin_until)
 	end_time = ktime_now();
 
 	pkt_dev->idle_acc += ktime_to_ns(ktime_sub(end_time, start_time));
-	pkt_dev->next_tx = ktime_add_ns(end_time, pkt_dev->delay);
+	pkt_dev->next_tx = ktime_add_ns(spin_until, pkt_dev->delay);
 }
 
 static inline void set_pkt_overhead(struct pktgen_dev *pkt_dev)
-- 
cgit v1.2.2


From d8d1f30b95a635dbd610dcc5eb641aca8f4768cf Mon Sep 17 00:00:00 2001
From: Changli Gao <xiaosuo@gmail.com>
Date: Thu, 10 Jun 2010 23:31:35 -0700
Subject: net-next: remove useless union keyword

remove useless union keyword in rtable, rt6_info and dn_route.

Since there is only one member in a union, the union keyword isn't useful.

Signed-off-by: Changli Gao <xiaosuo@gmail.com>
Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/atm/clip.c                          |   2 +-
 net/bridge/br_device.c                  |   2 +-
 net/bridge/br_netfilter.c               |  20 +-
 net/dccp/ipv4.c                         |   4 +-
 net/decnet/dn_route.c                   | 158 ++++++------
 net/ethernet/eth.c                      |   5 +-
 net/ipv4/af_inet.c                      |   4 +-
 net/ipv4/arp.c                          |  12 +-
 net/ipv4/datagram.c                     |   2 +-
 net/ipv4/icmp.c                         |  18 +-
 net/ipv4/igmp.c                         |  10 +-
 net/ipv4/inet_connection_sock.c         |   2 +-
 net/ipv4/ip_forward.c                   |  10 +-
 net/ipv4/ip_gre.c                       |  14 +-
 net/ipv4/ip_input.c                     |   4 +-
 net/ipv4/ip_output.c                    |  60 ++---
 net/ipv4/ipip.c                         |   8 +-
 net/ipv4/ipmr.c                         |   8 +-
 net/ipv4/netfilter.c                    |   8 +-
 net/ipv4/raw.c                          |  16 +-
 net/ipv4/route.c                        | 420 ++++++++++++++++----------------
 net/ipv4/syncookies.c                   |   6 +-
 net/ipv4/tcp_ipv4.c                     |   2 +-
 net/ipv4/udp.c                          |   4 +-
 net/ipv4/xfrm4_policy.c                 |   2 +-
 net/ipv6/addrconf.c                     |  10 +-
 net/ipv6/anycast.c                      |   6 +-
 net/ipv6/fib6_rules.c                   |  10 +-
 net/ipv6/ip6_fib.c                      |  30 +--
 net/ipv6/ip6_output.c                   |  38 +--
 net/ipv6/ip6_tunnel.c                   |   8 +-
 net/ipv6/mcast.c                        |   4 +-
 net/ipv6/ndisc.c                        |   8 +-
 net/ipv6/raw.c                          |  12 +-
 net/ipv6/route.c                        | 300 +++++++++++------------
 net/ipv6/sit.c                          |   8 +-
 net/l2tp/l2tp_ip.c                      |   6 +-
 net/netfilter/ipvs/ip_vs_xmit.c         |  86 +++----
 net/netfilter/nf_conntrack_h323_main.c  |  12 +-
 net/netfilter/nf_conntrack_netbios_ns.c |   2 +-
 net/netfilter/xt_TCPMSS.c               |   4 +-
 net/netfilter/xt_TEE.c                  |   4 +-
 net/rxrpc/ar-peer.c                     |   4 +-
 net/sctp/protocol.c                     |   4 +-
 44 files changed, 674 insertions(+), 683 deletions(-)

(limited to 'net')

diff --git a/net/atm/clip.c b/net/atm/clip.c
index 313aba11316b..95fdd1185067 100644
--- a/net/atm/clip.c
+++ b/net/atm/clip.c
@@ -522,7 +522,7 @@ static int clip_setentry(struct atm_vcc *vcc, __be32 ip)
 	error = ip_route_output_key(&init_net, &rt, &fl);
 	if (error)
 		return error;
-	neigh = __neigh_lookup(&clip_tbl, &ip, rt->u.dst.dev, 1);
+	neigh = __neigh_lookup(&clip_tbl, &ip, rt->dst.dev, 1);
 	ip_rt_put(rt);
 	if (!neigh)
 		return -ENOMEM;
diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c
index eedf2c94820e..b898364beaf5 100644
--- a/net/bridge/br_device.c
+++ b/net/bridge/br_device.c
@@ -127,7 +127,7 @@ static int br_change_mtu(struct net_device *dev, int new_mtu)
 
 #ifdef CONFIG_BRIDGE_NETFILTER
 	/* remember the MTU in the rtable for PMTU */
-	br->fake_rtable.u.dst.metrics[RTAX_MTU - 1] = new_mtu;
+	br->fake_rtable.dst.metrics[RTAX_MTU - 1] = new_mtu;
 #endif
 
 	return 0;
diff --git a/net/bridge/br_netfilter.c b/net/bridge/br_netfilter.c
index 44420992f72f..0685b2558ab5 100644
--- a/net/bridge/br_netfilter.c
+++ b/net/bridge/br_netfilter.c
@@ -117,12 +117,12 @@ void br_netfilter_rtable_init(struct net_bridge *br)
 {
 	struct rtable *rt = &br->fake_rtable;
 
-	atomic_set(&rt->u.dst.__refcnt, 1);
-	rt->u.dst.dev = br->dev;
-	rt->u.dst.path = &rt->u.dst;
-	rt->u.dst.metrics[RTAX_MTU - 1] = 1500;
-	rt->u.dst.flags	= DST_NOXFRM;
-	rt->u.dst.ops = &fake_dst_ops;
+	atomic_set(&rt->dst.__refcnt, 1);
+	rt->dst.dev = br->dev;
+	rt->dst.path = &rt->dst;
+	rt->dst.metrics[RTAX_MTU - 1] = 1500;
+	rt->dst.flags	= DST_NOXFRM;
+	rt->dst.ops = &fake_dst_ops;
 }
 
 static inline struct rtable *bridge_parent_rtable(const struct net_device *dev)
@@ -244,8 +244,8 @@ static int br_nf_pre_routing_finish_ipv6(struct sk_buff *skb)
 		kfree_skb(skb);
 		return 0;
 	}
-	dst_hold(&rt->u.dst);
-	skb_dst_set(skb, &rt->u.dst);
+	dst_hold(&rt->dst);
+	skb_dst_set(skb, &rt->dst);
 
 	skb->dev = nf_bridge->physindev;
 	nf_bridge_update_protocol(skb);
@@ -396,8 +396,8 @@ bridged_dnat:
 			kfree_skb(skb);
 			return 0;
 		}
-		dst_hold(&rt->u.dst);
-		skb_dst_set(skb, &rt->u.dst);
+		dst_hold(&rt->dst);
+		skb_dst_set(skb, &rt->dst);
 	}
 
 	skb->dev = nf_bridge->physindev;
diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c
index d9b11ef8694c..d4a166f0f391 100644
--- a/net/dccp/ipv4.c
+++ b/net/dccp/ipv4.c
@@ -105,7 +105,7 @@ int dccp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
 		goto failure;
 
 	/* OK, now commit destination to socket.  */
-	sk_setup_caps(sk, &rt->u.dst);
+	sk_setup_caps(sk, &rt->dst);
 
 	dp->dccps_iss = secure_dccp_sequence_number(inet->inet_saddr,
 						    inet->inet_daddr,
@@ -475,7 +475,7 @@ static struct dst_entry* dccp_v4_route_skb(struct net *net, struct sock *sk,
 		return NULL;
 	}
 
-	return &rt->u.dst;
+	return &rt->dst;
 }
 
 static int dccp_v4_send_response(struct sock *sk, struct request_sock *req,
diff --git a/net/decnet/dn_route.c b/net/decnet/dn_route.c
index 812e6dff6067..6585ea6d1182 100644
--- a/net/decnet/dn_route.c
+++ b/net/decnet/dn_route.c
@@ -146,13 +146,13 @@ static __inline__ unsigned dn_hash(__le16 src, __le16 dst)
 
 static inline void dnrt_free(struct dn_route *rt)
 {
-	call_rcu_bh(&rt->u.dst.rcu_head, dst_rcu_free);
+	call_rcu_bh(&rt->dst.rcu_head, dst_rcu_free);
 }
 
 static inline void dnrt_drop(struct dn_route *rt)
 {
-	dst_release(&rt->u.dst);
-	call_rcu_bh(&rt->u.dst.rcu_head, dst_rcu_free);
+	dst_release(&rt->dst);
+	call_rcu_bh(&rt->dst.rcu_head, dst_rcu_free);
 }
 
 static void dn_dst_check_expire(unsigned long dummy)
@@ -167,13 +167,13 @@ static void dn_dst_check_expire(unsigned long dummy)
 
 		spin_lock(&dn_rt_hash_table[i].lock);
 		while((rt=*rtp) != NULL) {
-			if (atomic_read(&rt->u.dst.__refcnt) ||
-					(now - rt->u.dst.lastuse) < expire) {
-				rtp = &rt->u.dst.dn_next;
+			if (atomic_read(&rt->dst.__refcnt) ||
+					(now - rt->dst.lastuse) < expire) {
+				rtp = &rt->dst.dn_next;
 				continue;
 			}
-			*rtp = rt->u.dst.dn_next;
-			rt->u.dst.dn_next = NULL;
+			*rtp = rt->dst.dn_next;
+			rt->dst.dn_next = NULL;
 			dnrt_free(rt);
 		}
 		spin_unlock(&dn_rt_hash_table[i].lock);
@@ -198,13 +198,13 @@ static int dn_dst_gc(struct dst_ops *ops)
 		rtp = &dn_rt_hash_table[i].chain;
 
 		while((rt=*rtp) != NULL) {
-			if (atomic_read(&rt->u.dst.__refcnt) ||
-					(now - rt->u.dst.lastuse) < expire) {
-				rtp = &rt->u.dst.dn_next;
+			if (atomic_read(&rt->dst.__refcnt) ||
+					(now - rt->dst.lastuse) < expire) {
+				rtp = &rt->dst.dn_next;
 				continue;
 			}
-			*rtp = rt->u.dst.dn_next;
-			rt->u.dst.dn_next = NULL;
+			*rtp = rt->dst.dn_next;
+			rt->dst.dn_next = NULL;
 			dnrt_drop(rt);
 			break;
 		}
@@ -287,25 +287,25 @@ static int dn_insert_route(struct dn_route *rt, unsigned hash, struct dn_route *
 	while((rth = *rthp) != NULL) {
 		if (compare_keys(&rth->fl, &rt->fl)) {
 			/* Put it first */
-			*rthp = rth->u.dst.dn_next;
-			rcu_assign_pointer(rth->u.dst.dn_next,
+			*rthp = rth->dst.dn_next;
+			rcu_assign_pointer(rth->dst.dn_next,
 					   dn_rt_hash_table[hash].chain);
 			rcu_assign_pointer(dn_rt_hash_table[hash].chain, rth);
 
-			dst_use(&rth->u.dst, now);
+			dst_use(&rth->dst, now);
 			spin_unlock_bh(&dn_rt_hash_table[hash].lock);
 
 			dnrt_drop(rt);
 			*rp = rth;
 			return 0;
 		}
-		rthp = &rth->u.dst.dn_next;
+		rthp = &rth->dst.dn_next;
 	}
 
-	rcu_assign_pointer(rt->u.dst.dn_next, dn_rt_hash_table[hash].chain);
+	rcu_assign_pointer(rt->dst.dn_next, dn_rt_hash_table[hash].chain);
 	rcu_assign_pointer(dn_rt_hash_table[hash].chain, rt);
 
-	dst_use(&rt->u.dst, now);
+	dst_use(&rt->dst, now);
 	spin_unlock_bh(&dn_rt_hash_table[hash].lock);
 	*rp = rt;
 	return 0;
@@ -323,8 +323,8 @@ static void dn_run_flush(unsigned long dummy)
 			goto nothing_to_declare;
 
 		for(; rt; rt=next) {
-			next = rt->u.dst.dn_next;
-			rt->u.dst.dn_next = NULL;
+			next = rt->dst.dn_next;
+			rt->dst.dn_next = NULL;
 			dst_free((struct dst_entry *)rt);
 		}
 
@@ -743,7 +743,7 @@ static int dn_forward(struct sk_buff *skb)
 	/* Ensure that we have enough space for headers */
 	rt = (struct dn_route *)skb_dst(skb);
 	header_len = dn_db->use_long ? 21 : 6;
-	if (skb_cow(skb, LL_RESERVED_SPACE(rt->u.dst.dev)+header_len))
+	if (skb_cow(skb, LL_RESERVED_SPACE(rt->dst.dev)+header_len))
 		goto drop;
 
 	/*
@@ -752,7 +752,7 @@ static int dn_forward(struct sk_buff *skb)
 	if (++cb->hops > 30)
 		goto drop;
 
-	skb->dev = rt->u.dst.dev;
+	skb->dev = rt->dst.dev;
 
 	/*
 	 * If packet goes out same interface it came in on, then set
@@ -792,7 +792,7 @@ static int dn_rt_bug(struct sk_buff *skb)
 static int dn_rt_set_next_hop(struct dn_route *rt, struct dn_fib_res *res)
 {
 	struct dn_fib_info *fi = res->fi;
-	struct net_device *dev = rt->u.dst.dev;
+	struct net_device *dev = rt->dst.dev;
 	struct neighbour *n;
 	unsigned mss;
 
@@ -800,25 +800,25 @@ static int dn_rt_set_next_hop(struct dn_route *rt, struct dn_fib_res *res)
 		if (DN_FIB_RES_GW(*res) &&
 		    DN_FIB_RES_NH(*res).nh_scope == RT_SCOPE_LINK)
 			rt->rt_gateway = DN_FIB_RES_GW(*res);
-		memcpy(rt->u.dst.metrics, fi->fib_metrics,
-		       sizeof(rt->u.dst.metrics));
+		memcpy(rt->dst.metrics, fi->fib_metrics,
+		       sizeof(rt->dst.metrics));
 	}
 	rt->rt_type = res->type;
 
-	if (dev != NULL && rt->u.dst.neighbour == NULL) {
+	if (dev != NULL && rt->dst.neighbour == NULL) {
 		n = __neigh_lookup_errno(&dn_neigh_table, &rt->rt_gateway, dev);
 		if (IS_ERR(n))
 			return PTR_ERR(n);
-		rt->u.dst.neighbour = n;
+		rt->dst.neighbour = n;
 	}
 
-	if (dst_metric(&rt->u.dst, RTAX_MTU) == 0 ||
-	    dst_metric(&rt->u.dst, RTAX_MTU) > rt->u.dst.dev->mtu)
-		rt->u.dst.metrics[RTAX_MTU-1] = rt->u.dst.dev->mtu;
-	mss = dn_mss_from_pmtu(dev, dst_mtu(&rt->u.dst));
-	if (dst_metric(&rt->u.dst, RTAX_ADVMSS) == 0 ||
-	    dst_metric(&rt->u.dst, RTAX_ADVMSS) > mss)
-		rt->u.dst.metrics[RTAX_ADVMSS-1] = mss;
+	if (dst_metric(&rt->dst, RTAX_MTU) == 0 ||
+	    dst_metric(&rt->dst, RTAX_MTU) > rt->dst.dev->mtu)
+		rt->dst.metrics[RTAX_MTU-1] = rt->dst.dev->mtu;
+	mss = dn_mss_from_pmtu(dev, dst_mtu(&rt->dst));
+	if (dst_metric(&rt->dst, RTAX_ADVMSS) == 0 ||
+	    dst_metric(&rt->dst, RTAX_ADVMSS) > mss)
+		rt->dst.metrics[RTAX_ADVMSS-1] = mss;
 	return 0;
 }
 
@@ -1096,8 +1096,8 @@ make_route:
 	if (rt == NULL)
 		goto e_nobufs;
 
-	atomic_set(&rt->u.dst.__refcnt, 1);
-	rt->u.dst.flags   = DST_HOST;
+	atomic_set(&rt->dst.__refcnt, 1);
+	rt->dst.flags   = DST_HOST;
 
 	rt->fl.fld_src    = oldflp->fld_src;
 	rt->fl.fld_dst    = oldflp->fld_dst;
@@ -1113,17 +1113,17 @@ make_route:
 	rt->rt_dst_map    = fl.fld_dst;
 	rt->rt_src_map    = fl.fld_src;
 
-	rt->u.dst.dev = dev_out;
+	rt->dst.dev = dev_out;
 	dev_hold(dev_out);
-	rt->u.dst.neighbour = neigh;
+	rt->dst.neighbour = neigh;
 	neigh = NULL;
 
-	rt->u.dst.lastuse = jiffies;
-	rt->u.dst.output  = dn_output;
-	rt->u.dst.input   = dn_rt_bug;
+	rt->dst.lastuse = jiffies;
+	rt->dst.output  = dn_output;
+	rt->dst.input   = dn_rt_bug;
 	rt->rt_flags      = flags;
 	if (flags & RTCF_LOCAL)
-		rt->u.dst.input = dn_nsp_rx;
+		rt->dst.input = dn_nsp_rx;
 
 	err = dn_rt_set_next_hop(rt, &res);
 	if (err)
@@ -1152,7 +1152,7 @@ e_nobufs:
 	err = -ENOBUFS;
 	goto done;
 e_neighbour:
-	dst_free(&rt->u.dst);
+	dst_free(&rt->dst);
 	goto e_nobufs;
 }
 
@@ -1168,15 +1168,15 @@ static int __dn_route_output_key(struct dst_entry **pprt, const struct flowi *fl
 	if (!(flags & MSG_TRYHARD)) {
 		rcu_read_lock_bh();
 		for (rt = rcu_dereference_bh(dn_rt_hash_table[hash].chain); rt;
-			rt = rcu_dereference_bh(rt->u.dst.dn_next)) {
+			rt = rcu_dereference_bh(rt->dst.dn_next)) {
 			if ((flp->fld_dst == rt->fl.fld_dst) &&
 			    (flp->fld_src == rt->fl.fld_src) &&
 			    (flp->mark == rt->fl.mark) &&
 			    (rt->fl.iif == 0) &&
 			    (rt->fl.oif == flp->oif)) {
-				dst_use(&rt->u.dst, jiffies);
+				dst_use(&rt->dst, jiffies);
 				rcu_read_unlock_bh();
-				*pprt = &rt->u.dst;
+				*pprt = &rt->dst;
 				return 0;
 			}
 		}
@@ -1375,29 +1375,29 @@ make_route:
 	rt->fl.iif        = in_dev->ifindex;
 	rt->fl.mark       = fl.mark;
 
-	rt->u.dst.flags = DST_HOST;
-	rt->u.dst.neighbour = neigh;
-	rt->u.dst.dev = out_dev;
-	rt->u.dst.lastuse = jiffies;
-	rt->u.dst.output = dn_rt_bug;
+	rt->dst.flags = DST_HOST;
+	rt->dst.neighbour = neigh;
+	rt->dst.dev = out_dev;
+	rt->dst.lastuse = jiffies;
+	rt->dst.output = dn_rt_bug;
 	switch(res.type) {
 		case RTN_UNICAST:
-			rt->u.dst.input = dn_forward;
+			rt->dst.input = dn_forward;
 			break;
 		case RTN_LOCAL:
-			rt->u.dst.output = dn_output;
-			rt->u.dst.input = dn_nsp_rx;
-			rt->u.dst.dev = in_dev;
+			rt->dst.output = dn_output;
+			rt->dst.input = dn_nsp_rx;
+			rt->dst.dev = in_dev;
 			flags |= RTCF_LOCAL;
 			break;
 		default:
 		case RTN_UNREACHABLE:
 		case RTN_BLACKHOLE:
-			rt->u.dst.input = dst_discard;
+			rt->dst.input = dst_discard;
 	}
 	rt->rt_flags = flags;
-	if (rt->u.dst.dev)
-		dev_hold(rt->u.dst.dev);
+	if (rt->dst.dev)
+		dev_hold(rt->dst.dev);
 
 	err = dn_rt_set_next_hop(rt, &res);
 	if (err)
@@ -1405,7 +1405,7 @@ make_route:
 
 	hash = dn_hash(rt->fl.fld_src, rt->fl.fld_dst);
 	dn_insert_route(rt, hash, &rt);
-	skb_dst_set(skb, &rt->u.dst);
+	skb_dst_set(skb, &rt->dst);
 
 done:
 	if (neigh)
@@ -1427,7 +1427,7 @@ e_nobufs:
 	goto done;
 
 e_neighbour:
-	dst_free(&rt->u.dst);
+	dst_free(&rt->dst);
 	goto done;
 }
 
@@ -1442,13 +1442,13 @@ static int dn_route_input(struct sk_buff *skb)
 
 	rcu_read_lock();
 	for(rt = rcu_dereference(dn_rt_hash_table[hash].chain); rt != NULL;
-	    rt = rcu_dereference(rt->u.dst.dn_next)) {
+	    rt = rcu_dereference(rt->dst.dn_next)) {
 		if ((rt->fl.fld_src == cb->src) &&
 		    (rt->fl.fld_dst == cb->dst) &&
 		    (rt->fl.oif == 0) &&
 		    (rt->fl.mark == skb->mark) &&
 		    (rt->fl.iif == cb->iif)) {
-			dst_use(&rt->u.dst, jiffies);
+			dst_use(&rt->dst, jiffies);
 			rcu_read_unlock();
 			skb_dst_set(skb, (struct dst_entry *)rt);
 			return 0;
@@ -1487,8 +1487,8 @@ static int dn_rt_fill_info(struct sk_buff *skb, u32 pid, u32 seq,
 		r->rtm_src_len = 16;
 		RTA_PUT(skb, RTA_SRC, 2, &rt->fl.fld_src);
 	}
-	if (rt->u.dst.dev)
-		RTA_PUT(skb, RTA_OIF, sizeof(int), &rt->u.dst.dev->ifindex);
+	if (rt->dst.dev)
+		RTA_PUT(skb, RTA_OIF, sizeof(int), &rt->dst.dev->ifindex);
 	/*
 	 * Note to self - change this if input routes reverse direction when
 	 * they deal only with inputs and not with replies like they do
@@ -1497,11 +1497,11 @@ static int dn_rt_fill_info(struct sk_buff *skb, u32 pid, u32 seq,
 	RTA_PUT(skb, RTA_PREFSRC, 2, &rt->rt_local_src);
 	if (rt->rt_daddr != rt->rt_gateway)
 		RTA_PUT(skb, RTA_GATEWAY, 2, &rt->rt_gateway);
-	if (rtnetlink_put_metrics(skb, rt->u.dst.metrics) < 0)
+	if (rtnetlink_put_metrics(skb, rt->dst.metrics) < 0)
 		goto rtattr_failure;
-	expires = rt->u.dst.expires ? rt->u.dst.expires - jiffies : 0;
-	if (rtnl_put_cacheinfo(skb, &rt->u.dst, 0, 0, 0, expires,
-			       rt->u.dst.error) < 0)
+	expires = rt->dst.expires ? rt->dst.expires - jiffies : 0;
+	if (rtnl_put_cacheinfo(skb, &rt->dst, 0, 0, 0, expires,
+			       rt->dst.error) < 0)
 		goto rtattr_failure;
 	if (rt->fl.iif)
 		RTA_PUT(skb, RTA_IIF, sizeof(int), &rt->fl.iif);
@@ -1568,8 +1568,8 @@ static int dn_cache_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh, void
 		local_bh_enable();
 		memset(cb, 0, sizeof(struct dn_skb_cb));
 		rt = (struct dn_route *)skb_dst(skb);
-		if (!err && -rt->u.dst.error)
-			err = rt->u.dst.error;
+		if (!err && -rt->dst.error)
+			err = rt->dst.error;
 	} else {
 		int oif = 0;
 		if (rta[RTA_OIF - 1])
@@ -1583,7 +1583,7 @@ static int dn_cache_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh, void
 	skb->dev = NULL;
 	if (err)
 		goto out_free;
-	skb_dst_set(skb, &rt->u.dst);
+	skb_dst_set(skb, &rt->dst);
 	if (rtm->rtm_flags & RTM_F_NOTIFY)
 		rt->rt_flags |= RTCF_NOTIFY;
 
@@ -1632,10 +1632,10 @@ int dn_cache_dump(struct sk_buff *skb, struct netlink_callback *cb)
 		rcu_read_lock_bh();
 		for(rt = rcu_dereference_bh(dn_rt_hash_table[h].chain), idx = 0;
 			rt;
-			rt = rcu_dereference_bh(rt->u.dst.dn_next), idx++) {
+			rt = rcu_dereference_bh(rt->dst.dn_next), idx++) {
 			if (idx < s_idx)
 				continue;
-			skb_dst_set(skb, dst_clone(&rt->u.dst));
+			skb_dst_set(skb, dst_clone(&rt->dst));
 			if (dn_rt_fill_info(skb, NETLINK_CB(cb->skb).pid,
 					cb->nlh->nlmsg_seq, RTM_NEWROUTE,
 					1, NLM_F_MULTI) <= 0) {
@@ -1678,7 +1678,7 @@ static struct dn_route *dn_rt_cache_get_next(struct seq_file *seq, struct dn_rou
 {
 	struct dn_rt_cache_iter_state *s = seq->private;
 
-	rt = rt->u.dst.dn_next;
+	rt = rt->dst.dn_next;
 	while(!rt) {
 		rcu_read_unlock_bh();
 		if (--s->bucket < 0)
@@ -1719,12 +1719,12 @@ static int dn_rt_cache_seq_show(struct seq_file *seq, void *v)
 	char buf1[DN_ASCBUF_LEN], buf2[DN_ASCBUF_LEN];
 
 	seq_printf(seq, "%-8s %-7s %-7s %04d %04d %04d\n",
-			rt->u.dst.dev ? rt->u.dst.dev->name : "*",
+			rt->dst.dev ? rt->dst.dev->name : "*",
 			dn_addr2asc(le16_to_cpu(rt->rt_daddr), buf1),
 			dn_addr2asc(le16_to_cpu(rt->rt_saddr), buf2),
-			atomic_read(&rt->u.dst.__refcnt),
-			rt->u.dst.__use,
-			(int) dst_metric(&rt->u.dst, RTAX_RTT));
+			atomic_read(&rt->dst.__refcnt),
+			rt->dst.__use,
+			(int) dst_metric(&rt->dst, RTAX_RTT));
 	return 0;
 }
 
diff --git a/net/ethernet/eth.c b/net/ethernet/eth.c
index 61ec0329316c..215c83986a9d 100644
--- a/net/ethernet/eth.c
+++ b/net/ethernet/eth.c
@@ -158,7 +158,6 @@ EXPORT_SYMBOL(eth_rebuild_header);
 __be16 eth_type_trans(struct sk_buff *skb, struct net_device *dev)
 {
 	struct ethhdr *eth;
-	unsigned char *rawp;
 
 	skb->dev = dev;
 	skb_reset_mac_header(skb);
@@ -199,15 +198,13 @@ __be16 eth_type_trans(struct sk_buff *skb, struct net_device *dev)
 	if (ntohs(eth->h_proto) >= 1536)
 		return eth->h_proto;
 
-	rawp = skb->data;
-
 	/*
 	 *      This is a magic hack to spot IPX packets. Older Novell breaks
 	 *      the protocol design and runs IPX over 802.3 without an 802.2 LLC
 	 *      layer. We look for FFFF which isn't a used 802.2 SSAP/DSAP. This
 	 *      won't work for fault tolerant netware but does for the rest.
 	 */
-	if (*(unsigned short *)rawp == 0xFFFF)
+	if (skb->len >= 2 && *(unsigned short *)(skb->data) == 0xFFFF)
 		return htons(ETH_P_802_3);
 
 	/*
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index 551ce564b035..d99e7e020189 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -1100,7 +1100,7 @@ static int inet_sk_reselect_saddr(struct sock *sk)
 	if (err)
 		return err;
 
-	sk_setup_caps(sk, &rt->u.dst);
+	sk_setup_caps(sk, &rt->dst);
 
 	new_saddr = rt->rt_src;
 
@@ -1166,7 +1166,7 @@ int inet_sk_rebuild_header(struct sock *sk)
 	err = ip_route_output_flow(sock_net(sk), &rt, &fl, sk, 0);
 }
 	if (!err)
-		sk_setup_caps(sk, &rt->u.dst);
+		sk_setup_caps(sk, &rt->dst);
 	else {
 		/* Routing failed... */
 		sk->sk_route_caps = 0;
diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c
index 917d2d66162e..cf78f41830ca 100644
--- a/net/ipv4/arp.c
+++ b/net/ipv4/arp.c
@@ -427,7 +427,7 @@ static int arp_filter(__be32 sip, __be32 tip, struct net_device *dev)
 
 	if (ip_route_output_key(net, &rt, &fl) < 0)
 		return 1;
-	if (rt->u.dst.dev != dev) {
+	if (rt->dst.dev != dev) {
 		NET_INC_STATS_BH(net, LINUX_MIB_ARPFILTER);
 		flag = 1;
 	}
@@ -532,7 +532,7 @@ static inline int arp_fwd_proxy(struct in_device *in_dev,
 	struct in_device *out_dev;
 	int imi, omi = -1;
 
-	if (rt->u.dst.dev == dev)
+	if (rt->dst.dev == dev)
 		return 0;
 
 	if (!IN_DEV_PROXY_ARP(in_dev))
@@ -545,7 +545,7 @@ static inline int arp_fwd_proxy(struct in_device *in_dev,
 
 	/* place to check for proxy_arp for routes */
 
-	out_dev = __in_dev_get_rcu(rt->u.dst.dev);
+	out_dev = __in_dev_get_rcu(rt->dst.dev);
 	if (out_dev)
 		omi = IN_DEV_MEDIUM_ID(out_dev);
 
@@ -576,7 +576,7 @@ static inline int arp_fwd_pvlan(struct in_device *in_dev,
 				__be32 sip, __be32 tip)
 {
 	/* Private VLAN is only concerned about the same ethernet segment */
-	if (rt->u.dst.dev != dev)
+	if (rt->dst.dev != dev)
 		return 0;
 
 	/* Don't reply on self probes (often done by windowz boxes)*/
@@ -1042,7 +1042,7 @@ static int arp_req_set(struct net *net, struct arpreq *r,
 		struct rtable * rt;
 		if ((err = ip_route_output_key(net, &rt, &fl)) != 0)
 			return err;
-		dev = rt->u.dst.dev;
+		dev = rt->dst.dev;
 		ip_rt_put(rt);
 		if (!dev)
 			return -EINVAL;
@@ -1149,7 +1149,7 @@ static int arp_req_delete(struct net *net, struct arpreq *r,
 		struct rtable * rt;
 		if ((err = ip_route_output_key(net, &rt, &fl)) != 0)
 			return err;
-		dev = rt->u.dst.dev;
+		dev = rt->dst.dev;
 		ip_rt_put(rt);
 		if (!dev)
 			return -EINVAL;
diff --git a/net/ipv4/datagram.c b/net/ipv4/datagram.c
index fb2465811b48..fe3daa7f07a9 100644
--- a/net/ipv4/datagram.c
+++ b/net/ipv4/datagram.c
@@ -69,7 +69,7 @@ int ip4_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
 	sk->sk_state = TCP_ESTABLISHED;
 	inet->inet_id = jiffies;
 
-	sk_dst_set(sk, &rt->u.dst);
+	sk_dst_set(sk, &rt->dst);
 	return(0);
 }
 
diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c
index bdb6c71e72a6..7569b21a3a2d 100644
--- a/net/ipv4/icmp.c
+++ b/net/ipv4/icmp.c
@@ -271,7 +271,7 @@ int xrlim_allow(struct dst_entry *dst, int timeout)
 static inline int icmpv4_xrlim_allow(struct net *net, struct rtable *rt,
 		int type, int code)
 {
-	struct dst_entry *dst = &rt->u.dst;
+	struct dst_entry *dst = &rt->dst;
 	int rc = 1;
 
 	if (type > NR_ICMP_TYPES)
@@ -327,7 +327,7 @@ static void icmp_push_reply(struct icmp_bxm *icmp_param,
 	struct sock *sk;
 	struct sk_buff *skb;
 
-	sk = icmp_sk(dev_net((*rt)->u.dst.dev));
+	sk = icmp_sk(dev_net((*rt)->dst.dev));
 	if (ip_append_data(sk, icmp_glue_bits, icmp_param,
 			   icmp_param->data_len+icmp_param->head_len,
 			   icmp_param->head_len,
@@ -359,7 +359,7 @@ static void icmp_reply(struct icmp_bxm *icmp_param, struct sk_buff *skb)
 {
 	struct ipcm_cookie ipc;
 	struct rtable *rt = skb_rtable(skb);
-	struct net *net = dev_net(rt->u.dst.dev);
+	struct net *net = dev_net(rt->dst.dev);
 	struct sock *sk;
 	struct inet_sock *inet;
 	__be32 daddr;
@@ -427,7 +427,7 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info)
 
 	if (!rt)
 		goto out;
-	net = dev_net(rt->u.dst.dev);
+	net = dev_net(rt->dst.dev);
 
 	/*
 	 *	Find the original header. It is expected to be valid, of course.
@@ -596,9 +596,9 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info)
 			/* Ugh! */
 			orefdst = skb_in->_skb_refdst; /* save old refdst */
 			err = ip_route_input(skb_in, fl.fl4_dst, fl.fl4_src,
-					     RT_TOS(tos), rt2->u.dst.dev);
+					     RT_TOS(tos), rt2->dst.dev);
 
-			dst_release(&rt2->u.dst);
+			dst_release(&rt2->dst);
 			rt2 = skb_rtable(skb_in);
 			skb_in->_skb_refdst = orefdst; /* restore old refdst */
 		}
@@ -610,7 +610,7 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info)
 				  XFRM_LOOKUP_ICMP);
 		switch (err) {
 		case 0:
-			dst_release(&rt->u.dst);
+			dst_release(&rt->dst);
 			rt = rt2;
 			break;
 		case -EPERM:
@@ -629,7 +629,7 @@ route_done:
 
 	/* RFC says return as much as we can without exceeding 576 bytes. */
 
-	room = dst_mtu(&rt->u.dst);
+	room = dst_mtu(&rt->dst);
 	if (room > 576)
 		room = 576;
 	room -= sizeof(struct iphdr) + icmp_param.replyopts.optlen;
@@ -972,7 +972,7 @@ int icmp_rcv(struct sk_buff *skb)
 {
 	struct icmphdr *icmph;
 	struct rtable *rt = skb_rtable(skb);
-	struct net *net = dev_net(rt->u.dst.dev);
+	struct net *net = dev_net(rt->dst.dev);
 
 	if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) {
 		struct sec_path *sp = skb_sec_path(skb);
diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c
index 3294f547c481..b5580d422994 100644
--- a/net/ipv4/igmp.c
+++ b/net/ipv4/igmp.c
@@ -312,7 +312,7 @@ static struct sk_buff *igmpv3_newpack(struct net_device *dev, int size)
 		return NULL;
 	}
 
-	skb_dst_set(skb, &rt->u.dst);
+	skb_dst_set(skb, &rt->dst);
 	skb->dev = dev;
 
 	skb_reserve(skb, LL_RESERVED_SPACE(dev));
@@ -330,7 +330,7 @@ static struct sk_buff *igmpv3_newpack(struct net_device *dev, int size)
 	pip->saddr    = rt->rt_src;
 	pip->protocol = IPPROTO_IGMP;
 	pip->tot_len  = 0;	/* filled in later */
-	ip_select_ident(pip, &rt->u.dst, NULL);
+	ip_select_ident(pip, &rt->dst, NULL);
 	((u8*)&pip[1])[0] = IPOPT_RA;
 	((u8*)&pip[1])[1] = 4;
 	((u8*)&pip[1])[2] = 0;
@@ -660,7 +660,7 @@ static int igmp_send_report(struct in_device *in_dev, struct ip_mc_list *pmc,
 		return -1;
 	}
 
-	skb_dst_set(skb, &rt->u.dst);
+	skb_dst_set(skb, &rt->dst);
 
 	skb_reserve(skb, LL_RESERVED_SPACE(dev));
 
@@ -676,7 +676,7 @@ static int igmp_send_report(struct in_device *in_dev, struct ip_mc_list *pmc,
 	iph->daddr    = dst;
 	iph->saddr    = rt->rt_src;
 	iph->protocol = IPPROTO_IGMP;
-	ip_select_ident(iph, &rt->u.dst, NULL);
+	ip_select_ident(iph, &rt->dst, NULL);
 	((u8*)&iph[1])[0] = IPOPT_RA;
 	((u8*)&iph[1])[1] = 4;
 	((u8*)&iph[1])[2] = 0;
@@ -1425,7 +1425,7 @@ static struct in_device *ip_mc_find_dev(struct net *net, struct ip_mreqn *imr)
 	}
 
 	if (!dev && !ip_route_output_key(net, &rt, &fl)) {
-		dev = rt->u.dst.dev;
+		dev = rt->dst.dev;
 		ip_rt_put(rt);
 	}
 	if (dev) {
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
index 70eb3507c406..57c9e4d7b805 100644
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -383,7 +383,7 @@ struct dst_entry *inet_csk_route_req(struct sock *sk,
 		goto no_route;
 	if (opt && opt->is_strictroute && rt->rt_dst != rt->rt_gateway)
 		goto route_err;
-	return &rt->u.dst;
+	return &rt->dst;
 
 route_err:
 	ip_rt_put(rt);
diff --git a/net/ipv4/ip_forward.c b/net/ipv4/ip_forward.c
index 56cdf68a074c..99461f09320f 100644
--- a/net/ipv4/ip_forward.c
+++ b/net/ipv4/ip_forward.c
@@ -87,16 +87,16 @@ int ip_forward(struct sk_buff *skb)
 	if (opt->is_strictroute && rt->rt_dst != rt->rt_gateway)
 		goto sr_failed;
 
-	if (unlikely(skb->len > dst_mtu(&rt->u.dst) && !skb_is_gso(skb) &&
+	if (unlikely(skb->len > dst_mtu(&rt->dst) && !skb_is_gso(skb) &&
 		     (ip_hdr(skb)->frag_off & htons(IP_DF))) && !skb->local_df) {
-		IP_INC_STATS(dev_net(rt->u.dst.dev), IPSTATS_MIB_FRAGFAILS);
+		IP_INC_STATS(dev_net(rt->dst.dev), IPSTATS_MIB_FRAGFAILS);
 		icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED,
-			  htonl(dst_mtu(&rt->u.dst)));
+			  htonl(dst_mtu(&rt->dst)));
 		goto drop;
 	}
 
 	/* We are about to mangle packet. Copy it! */
-	if (skb_cow(skb, LL_RESERVED_SPACE(rt->u.dst.dev)+rt->u.dst.header_len))
+	if (skb_cow(skb, LL_RESERVED_SPACE(rt->dst.dev)+rt->dst.header_len))
 		goto drop;
 	iph = ip_hdr(skb);
 
@@ -113,7 +113,7 @@ int ip_forward(struct sk_buff *skb)
 	skb->priority = rt_tos2priority(iph->tos);
 
 	return NF_HOOK(NFPROTO_IPV4, NF_INET_FORWARD, skb, skb->dev,
-		       rt->u.dst.dev, ip_forward_finish);
+		       rt->dst.dev, ip_forward_finish);
 
 sr_failed:
 	/*
diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index 32618e11076d..749e54889e82 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -745,7 +745,7 @@ static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev
 			goto tx_error;
 		}
 	}
-	tdev = rt->u.dst.dev;
+	tdev = rt->dst.dev;
 
 	if (tdev == dev) {
 		ip_rt_put(rt);
@@ -755,7 +755,7 @@ static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev
 
 	df = tiph->frag_off;
 	if (df)
-		mtu = dst_mtu(&rt->u.dst) - dev->hard_header_len - tunnel->hlen;
+		mtu = dst_mtu(&rt->dst) - dev->hard_header_len - tunnel->hlen;
 	else
 		mtu = skb_dst(skb) ? dst_mtu(skb_dst(skb)) : dev->mtu;
 
@@ -803,7 +803,7 @@ static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev
 			tunnel->err_count = 0;
 	}
 
-	max_headroom = LL_RESERVED_SPACE(tdev) + gre_hlen + rt->u.dst.header_len;
+	max_headroom = LL_RESERVED_SPACE(tdev) + gre_hlen + rt->dst.header_len;
 
 	if (skb_headroom(skb) < max_headroom || skb_shared(skb)||
 	    (skb_cloned(skb) && !skb_clone_writable(skb, 0))) {
@@ -830,7 +830,7 @@ static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev
 	IPCB(skb)->flags &= ~(IPSKB_XFRM_TUNNEL_SIZE | IPSKB_XFRM_TRANSFORMED |
 			      IPSKB_REROUTED);
 	skb_dst_drop(skb);
-	skb_dst_set(skb, &rt->u.dst);
+	skb_dst_set(skb, &rt->dst);
 
 	/*
 	 *	Push down and install the IPIP header.
@@ -853,7 +853,7 @@ static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev
 			iph->ttl = ((struct ipv6hdr *)old_iph)->hop_limit;
 #endif
 		else
-			iph->ttl = dst_metric(&rt->u.dst, RTAX_HOPLIMIT);
+			iph->ttl = dst_metric(&rt->dst, RTAX_HOPLIMIT);
 	}
 
 	((__be16 *)(iph + 1))[0] = tunnel->parms.o_flags;
@@ -915,7 +915,7 @@ static int ipgre_tunnel_bind_dev(struct net_device *dev)
 				    .proto = IPPROTO_GRE };
 		struct rtable *rt;
 		if (!ip_route_output_key(dev_net(dev), &rt, &fl)) {
-			tdev = rt->u.dst.dev;
+			tdev = rt->dst.dev;
 			ip_rt_put(rt);
 		}
 
@@ -1174,7 +1174,7 @@ static int ipgre_open(struct net_device *dev)
 		struct rtable *rt;
 		if (ip_route_output_key(dev_net(dev), &rt, &fl))
 			return -EADDRNOTAVAIL;
-		dev = rt->u.dst.dev;
+		dev = rt->dst.dev;
 		ip_rt_put(rt);
 		if (__in_dev_get_rtnl(dev) == NULL)
 			return -EADDRNOTAVAIL;
diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c
index 08a3b121f908..db47a5a00ed2 100644
--- a/net/ipv4/ip_input.c
+++ b/net/ipv4/ip_input.c
@@ -356,10 +356,10 @@ static int ip_rcv_finish(struct sk_buff *skb)
 
 	rt = skb_rtable(skb);
 	if (rt->rt_type == RTN_MULTICAST) {
-		IP_UPD_PO_STATS_BH(dev_net(rt->u.dst.dev), IPSTATS_MIB_INMCAST,
+		IP_UPD_PO_STATS_BH(dev_net(rt->dst.dev), IPSTATS_MIB_INMCAST,
 				skb->len);
 	} else if (rt->rt_type == RTN_BROADCAST)
-		IP_UPD_PO_STATS_BH(dev_net(rt->u.dst.dev), IPSTATS_MIB_INBCAST,
+		IP_UPD_PO_STATS_BH(dev_net(rt->dst.dev), IPSTATS_MIB_INBCAST,
 				skb->len);
 
 	return dst_input(skb);
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index 9a4a6c96cb0d..6cbeb2e108de 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -151,15 +151,15 @@ int ip_build_and_send_pkt(struct sk_buff *skb, struct sock *sk,
 	iph->version  = 4;
 	iph->ihl      = 5;
 	iph->tos      = inet->tos;
-	if (ip_dont_fragment(sk, &rt->u.dst))
+	if (ip_dont_fragment(sk, &rt->dst))
 		iph->frag_off = htons(IP_DF);
 	else
 		iph->frag_off = 0;
-	iph->ttl      = ip_select_ttl(inet, &rt->u.dst);
+	iph->ttl      = ip_select_ttl(inet, &rt->dst);
 	iph->daddr    = rt->rt_dst;
 	iph->saddr    = rt->rt_src;
 	iph->protocol = sk->sk_protocol;
-	ip_select_ident(iph, &rt->u.dst, sk);
+	ip_select_ident(iph, &rt->dst, sk);
 
 	if (opt && opt->optlen) {
 		iph->ihl += opt->optlen>>2;
@@ -240,7 +240,7 @@ int ip_mc_output(struct sk_buff *skb)
 {
 	struct sock *sk = skb->sk;
 	struct rtable *rt = skb_rtable(skb);
-	struct net_device *dev = rt->u.dst.dev;
+	struct net_device *dev = rt->dst.dev;
 
 	/*
 	 *	If the indicated interface is up and running, send the packet.
@@ -359,9 +359,9 @@ int ip_queue_xmit(struct sk_buff *skb)
 			if (ip_route_output_flow(sock_net(sk), &rt, &fl, sk, 0))
 				goto no_route;
 		}
-		sk_setup_caps(sk, &rt->u.dst);
+		sk_setup_caps(sk, &rt->dst);
 	}
-	skb_dst_set_noref(skb, &rt->u.dst);
+	skb_dst_set_noref(skb, &rt->dst);
 
 packet_routed:
 	if (opt && opt->is_strictroute && rt->rt_dst != rt->rt_gateway)
@@ -372,11 +372,11 @@ packet_routed:
 	skb_reset_network_header(skb);
 	iph = ip_hdr(skb);
 	*((__be16 *)iph) = htons((4 << 12) | (5 << 8) | (inet->tos & 0xff));
-	if (ip_dont_fragment(sk, &rt->u.dst) && !skb->local_df)
+	if (ip_dont_fragment(sk, &rt->dst) && !skb->local_df)
 		iph->frag_off = htons(IP_DF);
 	else
 		iph->frag_off = 0;
-	iph->ttl      = ip_select_ttl(inet, &rt->u.dst);
+	iph->ttl      = ip_select_ttl(inet, &rt->dst);
 	iph->protocol = sk->sk_protocol;
 	iph->saddr    = rt->rt_src;
 	iph->daddr    = rt->rt_dst;
@@ -387,7 +387,7 @@ packet_routed:
 		ip_options_build(skb, opt, inet->inet_daddr, rt, 0);
 	}
 
-	ip_select_ident_more(iph, &rt->u.dst, sk,
+	ip_select_ident_more(iph, &rt->dst, sk,
 			     (skb_shinfo(skb)->gso_segs ?: 1) - 1);
 
 	skb->priority = sk->sk_priority;
@@ -452,7 +452,7 @@ int ip_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *))
 	struct rtable *rt = skb_rtable(skb);
 	int err = 0;
 
-	dev = rt->u.dst.dev;
+	dev = rt->dst.dev;
 
 	/*
 	 *	Point into the IP datagram header.
@@ -473,7 +473,7 @@ int ip_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *))
 	 */
 
 	hlen = iph->ihl * 4;
-	mtu = dst_mtu(&rt->u.dst) - hlen;	/* Size of data space */
+	mtu = dst_mtu(&rt->dst) - hlen;	/* Size of data space */
 #ifdef CONFIG_BRIDGE_NETFILTER
 	if (skb->nf_bridge)
 		mtu -= nf_bridge_mtu_reduction(skb);
@@ -586,7 +586,7 @@ slow_path:
 	 * we need to make room for the encapsulating header
 	 */
 	pad = nf_bridge_pad(skb);
-	ll_rs = LL_RESERVED_SPACE_EXTRA(rt->u.dst.dev, pad);
+	ll_rs = LL_RESERVED_SPACE_EXTRA(rt->dst.dev, pad);
 	mtu -= pad;
 
 	/*
@@ -833,13 +833,13 @@ int ip_append_data(struct sock *sk,
 		 */
 		*rtp = NULL;
 		inet->cork.fragsize = mtu = inet->pmtudisc == IP_PMTUDISC_PROBE ?
-					    rt->u.dst.dev->mtu :
-					    dst_mtu(rt->u.dst.path);
-		inet->cork.dst = &rt->u.dst;
+					    rt->dst.dev->mtu :
+					    dst_mtu(rt->dst.path);
+		inet->cork.dst = &rt->dst;
 		inet->cork.length = 0;
 		sk->sk_sndmsg_page = NULL;
 		sk->sk_sndmsg_off = 0;
-		if ((exthdrlen = rt->u.dst.header_len) != 0) {
+		if ((exthdrlen = rt->dst.header_len) != 0) {
 			length += exthdrlen;
 			transhdrlen += exthdrlen;
 		}
@@ -852,7 +852,7 @@ int ip_append_data(struct sock *sk,
 		exthdrlen = 0;
 		mtu = inet->cork.fragsize;
 	}
-	hh_len = LL_RESERVED_SPACE(rt->u.dst.dev);
+	hh_len = LL_RESERVED_SPACE(rt->dst.dev);
 
 	fragheaderlen = sizeof(struct iphdr) + (opt ? opt->optlen : 0);
 	maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen;
@@ -869,14 +869,14 @@ int ip_append_data(struct sock *sk,
 	 */
 	if (transhdrlen &&
 	    length + fragheaderlen <= mtu &&
-	    rt->u.dst.dev->features & NETIF_F_V4_CSUM &&
+	    rt->dst.dev->features & NETIF_F_V4_CSUM &&
 	    !exthdrlen)
 		csummode = CHECKSUM_PARTIAL;
 
 	inet->cork.length += length;
 	if (((length> mtu) || !skb_queue_empty(&sk->sk_write_queue)) &&
 	    (sk->sk_protocol == IPPROTO_UDP) &&
-	    (rt->u.dst.dev->features & NETIF_F_UFO)) {
+	    (rt->dst.dev->features & NETIF_F_UFO)) {
 		err = ip_ufo_append_data(sk, getfrag, from, length, hh_len,
 					 fragheaderlen, transhdrlen, mtu,
 					 flags);
@@ -924,7 +924,7 @@ alloc_new_skb:
 			fraglen = datalen + fragheaderlen;
 
 			if ((flags & MSG_MORE) &&
-			    !(rt->u.dst.dev->features&NETIF_F_SG))
+			    !(rt->dst.dev->features&NETIF_F_SG))
 				alloclen = mtu;
 			else
 				alloclen = datalen + fragheaderlen;
@@ -935,7 +935,7 @@ alloc_new_skb:
 			 * the last.
 			 */
 			if (datalen == length + fraggap)
-				alloclen += rt->u.dst.trailer_len;
+				alloclen += rt->dst.trailer_len;
 
 			if (transhdrlen) {
 				skb = sock_alloc_send_skb(sk,
@@ -1008,7 +1008,7 @@ alloc_new_skb:
 		if (copy > length)
 			copy = length;
 
-		if (!(rt->u.dst.dev->features&NETIF_F_SG)) {
+		if (!(rt->dst.dev->features&NETIF_F_SG)) {
 			unsigned int off;
 
 			off = skb->len;
@@ -1103,10 +1103,10 @@ ssize_t	ip_append_page(struct sock *sk, struct page *page,
 	if (inet->cork.flags & IPCORK_OPT)
 		opt = inet->cork.opt;
 
-	if (!(rt->u.dst.dev->features&NETIF_F_SG))
+	if (!(rt->dst.dev->features&NETIF_F_SG))
 		return -EOPNOTSUPP;
 
-	hh_len = LL_RESERVED_SPACE(rt->u.dst.dev);
+	hh_len = LL_RESERVED_SPACE(rt->dst.dev);
 	mtu = inet->cork.fragsize;
 
 	fragheaderlen = sizeof(struct iphdr) + (opt ? opt->optlen : 0);
@@ -1122,7 +1122,7 @@ ssize_t	ip_append_page(struct sock *sk, struct page *page,
 
 	inet->cork.length += size;
 	if ((sk->sk_protocol == IPPROTO_UDP) &&
-	    (rt->u.dst.dev->features & NETIF_F_UFO)) {
+	    (rt->dst.dev->features & NETIF_F_UFO)) {
 		skb_shinfo(skb)->gso_size = mtu - fragheaderlen;
 		skb_shinfo(skb)->gso_type = SKB_GSO_UDP;
 	}
@@ -1274,8 +1274,8 @@ int ip_push_pending_frames(struct sock *sk)
 	 * If local_df is set too, we still allow to fragment this frame
 	 * locally. */
 	if (inet->pmtudisc >= IP_PMTUDISC_DO ||
-	    (skb->len <= dst_mtu(&rt->u.dst) &&
-	     ip_dont_fragment(sk, &rt->u.dst)))
+	    (skb->len <= dst_mtu(&rt->dst) &&
+	     ip_dont_fragment(sk, &rt->dst)))
 		df = htons(IP_DF);
 
 	if (inet->cork.flags & IPCORK_OPT)
@@ -1284,7 +1284,7 @@ int ip_push_pending_frames(struct sock *sk)
 	if (rt->rt_type == RTN_MULTICAST)
 		ttl = inet->mc_ttl;
 	else
-		ttl = ip_select_ttl(inet, &rt->u.dst);
+		ttl = ip_select_ttl(inet, &rt->dst);
 
 	iph = (struct iphdr *)skb->data;
 	iph->version = 4;
@@ -1295,7 +1295,7 @@ int ip_push_pending_frames(struct sock *sk)
 	}
 	iph->tos = inet->tos;
 	iph->frag_off = df;
-	ip_select_ident(iph, &rt->u.dst, sk);
+	ip_select_ident(iph, &rt->dst, sk);
 	iph->ttl = ttl;
 	iph->protocol = sk->sk_protocol;
 	iph->saddr = rt->rt_src;
@@ -1308,7 +1308,7 @@ int ip_push_pending_frames(struct sock *sk)
 	 * on dst refcount
 	 */
 	inet->cork.dst = NULL;
-	skb_dst_set(skb, &rt->u.dst);
+	skb_dst_set(skb, &rt->dst);
 
 	if (iph->protocol == IPPROTO_ICMP)
 		icmp_out_count(net, ((struct icmphdr *)
diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c
index 7fd636711037..ec036731a70b 100644
--- a/net/ipv4/ipip.c
+++ b/net/ipv4/ipip.c
@@ -435,7 +435,7 @@ static netdev_tx_t ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
 			goto tx_error_icmp;
 		}
 	}
-	tdev = rt->u.dst.dev;
+	tdev = rt->dst.dev;
 
 	if (tdev == dev) {
 		ip_rt_put(rt);
@@ -446,7 +446,7 @@ static netdev_tx_t ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
 	df |= old_iph->frag_off & htons(IP_DF);
 
 	if (df) {
-		mtu = dst_mtu(&rt->u.dst) - sizeof(struct iphdr);
+		mtu = dst_mtu(&rt->dst) - sizeof(struct iphdr);
 
 		if (mtu < 68) {
 			stats->collisions++;
@@ -503,7 +503,7 @@ static netdev_tx_t ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
 	IPCB(skb)->flags &= ~(IPSKB_XFRM_TUNNEL_SIZE | IPSKB_XFRM_TRANSFORMED |
 			      IPSKB_REROUTED);
 	skb_dst_drop(skb);
-	skb_dst_set(skb, &rt->u.dst);
+	skb_dst_set(skb, &rt->dst);
 
 	/*
 	 *	Push down and install the IPIP header.
@@ -552,7 +552,7 @@ static void ipip_tunnel_bind_dev(struct net_device *dev)
 				    .proto = IPPROTO_IPIP };
 		struct rtable *rt;
 		if (!ip_route_output_key(dev_net(dev), &rt, &fl)) {
-			tdev = rt->u.dst.dev;
+			tdev = rt->dst.dev;
 			ip_rt_put(rt);
 		}
 		dev->flags |= IFF_POINTOPOINT;
diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
index 856123fe32f9..8418afc357ee 100644
--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c
@@ -1551,9 +1551,9 @@ static void ipmr_queue_xmit(struct net *net, struct mr_table *mrt,
 			goto out_free;
 	}
 
-	dev = rt->u.dst.dev;
+	dev = rt->dst.dev;
 
-	if (skb->len+encap > dst_mtu(&rt->u.dst) && (ntohs(iph->frag_off) & IP_DF)) {
+	if (skb->len+encap > dst_mtu(&rt->dst) && (ntohs(iph->frag_off) & IP_DF)) {
 		/* Do not fragment multicasts. Alas, IPv4 does not
 		   allow to send ICMP, so that packets will disappear
 		   to blackhole.
@@ -1564,7 +1564,7 @@ static void ipmr_queue_xmit(struct net *net, struct mr_table *mrt,
 		goto out_free;
 	}
 
-	encap += LL_RESERVED_SPACE(dev) + rt->u.dst.header_len;
+	encap += LL_RESERVED_SPACE(dev) + rt->dst.header_len;
 
 	if (skb_cow(skb, encap)) {
 		ip_rt_put(rt);
@@ -1575,7 +1575,7 @@ static void ipmr_queue_xmit(struct net *net, struct mr_table *mrt,
 	vif->bytes_out += skb->len;
 
 	skb_dst_drop(skb);
-	skb_dst_set(skb, &rt->u.dst);
+	skb_dst_set(skb, &rt->dst);
 	ip_decrease_ttl(ip_hdr(skb));
 
 	/* FIXME: forward and output firewalls used to be called here.
diff --git a/net/ipv4/netfilter.c b/net/ipv4/netfilter.c
index 07de855e2175..cfbc79af21c3 100644
--- a/net/ipv4/netfilter.c
+++ b/net/ipv4/netfilter.c
@@ -43,7 +43,7 @@ int ip_route_me_harder(struct sk_buff *skb, unsigned addr_type)
 
 		/* Drop old route. */
 		skb_dst_drop(skb);
-		skb_dst_set(skb, &rt->u.dst);
+		skb_dst_set(skb, &rt->dst);
 	} else {
 		/* non-local src, find valid iif to satisfy
 		 * rp-filter when calling ip_route_input. */
@@ -53,11 +53,11 @@ int ip_route_me_harder(struct sk_buff *skb, unsigned addr_type)
 
 		orefdst = skb->_skb_refdst;
 		if (ip_route_input(skb, iph->daddr, iph->saddr,
-				   RT_TOS(iph->tos), rt->u.dst.dev) != 0) {
-			dst_release(&rt->u.dst);
+				   RT_TOS(iph->tos), rt->dst.dev) != 0) {
+			dst_release(&rt->dst);
 			return -1;
 		}
-		dst_release(&rt->u.dst);
+		dst_release(&rt->dst);
 		refdst_drop(orefdst);
 	}
 
diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c
index 66cc3befcd44..009a7b2aa1ef 100644
--- a/net/ipv4/raw.c
+++ b/net/ipv4/raw.c
@@ -325,24 +325,24 @@ static int raw_send_hdrinc(struct sock *sk, void *from, size_t length,
 	int err;
 	struct rtable *rt = *rtp;
 
-	if (length > rt->u.dst.dev->mtu) {
+	if (length > rt->dst.dev->mtu) {
 		ip_local_error(sk, EMSGSIZE, rt->rt_dst, inet->inet_dport,
-			       rt->u.dst.dev->mtu);
+			       rt->dst.dev->mtu);
 		return -EMSGSIZE;
 	}
 	if (flags&MSG_PROBE)
 		goto out;
 
 	skb = sock_alloc_send_skb(sk,
-				  length + LL_ALLOCATED_SPACE(rt->u.dst.dev) + 15,
+				  length + LL_ALLOCATED_SPACE(rt->dst.dev) + 15,
 				  flags & MSG_DONTWAIT, &err);
 	if (skb == NULL)
 		goto error;
-	skb_reserve(skb, LL_RESERVED_SPACE(rt->u.dst.dev));
+	skb_reserve(skb, LL_RESERVED_SPACE(rt->dst.dev));
 
 	skb->priority = sk->sk_priority;
 	skb->mark = sk->sk_mark;
-	skb_dst_set(skb, &rt->u.dst);
+	skb_dst_set(skb, &rt->dst);
 	*rtp = NULL;
 
 	skb_reset_network_header(skb);
@@ -375,7 +375,7 @@ static int raw_send_hdrinc(struct sock *sk, void *from, size_t length,
 		iph->check   = 0;
 		iph->tot_len = htons(length);
 		if (!iph->id)
-			ip_select_ident(iph, &rt->u.dst, NULL);
+			ip_select_ident(iph, &rt->dst, NULL);
 
 		iph->check = ip_fast_csum((unsigned char *)iph, iph->ihl);
 	}
@@ -384,7 +384,7 @@ static int raw_send_hdrinc(struct sock *sk, void *from, size_t length,
 			skb_transport_header(skb))->type);
 
 	err = NF_HOOK(NFPROTO_IPV4, NF_INET_LOCAL_OUT, skb, NULL,
-		      rt->u.dst.dev, dst_output);
+		      rt->dst.dev, dst_output);
 	if (err > 0)
 		err = net_xmit_errno(err);
 	if (err)
@@ -606,7 +606,7 @@ out:
 	return len;
 
 do_confirm:
-	dst_confirm(&rt->u.dst);
+	dst_confirm(&rt->dst);
 	if (!(msg->msg_flags & MSG_PROBE) || len)
 		goto back_from_confirm;
 	err = 0;
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 883b5c7195ac..a291edbbc97f 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -286,10 +286,10 @@ static struct rtable *rt_cache_get_first(struct seq_file *seq)
 		rcu_read_lock_bh();
 		r = rcu_dereference_bh(rt_hash_table[st->bucket].chain);
 		while (r) {
-			if (dev_net(r->u.dst.dev) == seq_file_net(seq) &&
+			if (dev_net(r->dst.dev) == seq_file_net(seq) &&
 			    r->rt_genid == st->genid)
 				return r;
-			r = rcu_dereference_bh(r->u.dst.rt_next);
+			r = rcu_dereference_bh(r->dst.rt_next);
 		}
 		rcu_read_unlock_bh();
 	}
@@ -301,7 +301,7 @@ static struct rtable *__rt_cache_get_next(struct seq_file *seq,
 {
 	struct rt_cache_iter_state *st = seq->private;
 
-	r = r->u.dst.rt_next;
+	r = r->dst.rt_next;
 	while (!r) {
 		rcu_read_unlock_bh();
 		do {
@@ -319,7 +319,7 @@ static struct rtable *rt_cache_get_next(struct seq_file *seq,
 {
 	struct rt_cache_iter_state *st = seq->private;
 	while ((r = __rt_cache_get_next(seq, r)) != NULL) {
-		if (dev_net(r->u.dst.dev) != seq_file_net(seq))
+		if (dev_net(r->dst.dev) != seq_file_net(seq))
 			continue;
 		if (r->rt_genid == st->genid)
 			break;
@@ -377,19 +377,19 @@ static int rt_cache_seq_show(struct seq_file *seq, void *v)
 
 		seq_printf(seq, "%s\t%08X\t%08X\t%8X\t%d\t%u\t%d\t"
 			      "%08X\t%d\t%u\t%u\t%02X\t%d\t%1d\t%08X%n",
-			r->u.dst.dev ? r->u.dst.dev->name : "*",
+			r->dst.dev ? r->dst.dev->name : "*",
 			(__force u32)r->rt_dst,
 			(__force u32)r->rt_gateway,
-			r->rt_flags, atomic_read(&r->u.dst.__refcnt),
-			r->u.dst.__use, 0, (__force u32)r->rt_src,
-			(dst_metric(&r->u.dst, RTAX_ADVMSS) ?
-			     (int)dst_metric(&r->u.dst, RTAX_ADVMSS) + 40 : 0),
-			dst_metric(&r->u.dst, RTAX_WINDOW),
-			(int)((dst_metric(&r->u.dst, RTAX_RTT) >> 3) +
-			      dst_metric(&r->u.dst, RTAX_RTTVAR)),
+			r->rt_flags, atomic_read(&r->dst.__refcnt),
+			r->dst.__use, 0, (__force u32)r->rt_src,
+			(dst_metric(&r->dst, RTAX_ADVMSS) ?
+			     (int)dst_metric(&r->dst, RTAX_ADVMSS) + 40 : 0),
+			dst_metric(&r->dst, RTAX_WINDOW),
+			(int)((dst_metric(&r->dst, RTAX_RTT) >> 3) +
+			      dst_metric(&r->dst, RTAX_RTTVAR)),
 			r->fl.fl4_tos,
-			r->u.dst.hh ? atomic_read(&r->u.dst.hh->hh_refcnt) : -1,
-			r->u.dst.hh ? (r->u.dst.hh->hh_output ==
+			r->dst.hh ? atomic_read(&r->dst.hh->hh_refcnt) : -1,
+			r->dst.hh ? (r->dst.hh->hh_output ==
 				       dev_queue_xmit) : 0,
 			r->rt_spec_dst, &len);
 
@@ -608,13 +608,13 @@ static inline int ip_rt_proc_init(void)
 
 static inline void rt_free(struct rtable *rt)
 {
-	call_rcu_bh(&rt->u.dst.rcu_head, dst_rcu_free);
+	call_rcu_bh(&rt->dst.rcu_head, dst_rcu_free);
 }
 
 static inline void rt_drop(struct rtable *rt)
 {
 	ip_rt_put(rt);
-	call_rcu_bh(&rt->u.dst.rcu_head, dst_rcu_free);
+	call_rcu_bh(&rt->dst.rcu_head, dst_rcu_free);
 }
 
 static inline int rt_fast_clean(struct rtable *rth)
@@ -622,13 +622,13 @@ static inline int rt_fast_clean(struct rtable *rth)
 	/* Kill broadcast/multicast entries very aggresively, if they
 	   collide in hash table with more useful entries */
 	return (rth->rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST)) &&
-		rth->fl.iif && rth->u.dst.rt_next;
+		rth->fl.iif && rth->dst.rt_next;
 }
 
 static inline int rt_valuable(struct rtable *rth)
 {
 	return (rth->rt_flags & (RTCF_REDIRECTED | RTCF_NOTIFY)) ||
-		rth->u.dst.expires;
+		rth->dst.expires;
 }
 
 static int rt_may_expire(struct rtable *rth, unsigned long tmo1, unsigned long tmo2)
@@ -636,15 +636,15 @@ static int rt_may_expire(struct rtable *rth, unsigned long tmo1, unsigned long t
 	unsigned long age;
 	int ret = 0;
 
-	if (atomic_read(&rth->u.dst.__refcnt))
+	if (atomic_read(&rth->dst.__refcnt))
 		goto out;
 
 	ret = 1;
-	if (rth->u.dst.expires &&
-	    time_after_eq(jiffies, rth->u.dst.expires))
+	if (rth->dst.expires &&
+	    time_after_eq(jiffies, rth->dst.expires))
 		goto out;
 
-	age = jiffies - rth->u.dst.lastuse;
+	age = jiffies - rth->dst.lastuse;
 	ret = 0;
 	if ((age <= tmo1 && !rt_fast_clean(rth)) ||
 	    (age <= tmo2 && rt_valuable(rth)))
@@ -660,7 +660,7 @@ out:	return ret;
  */
 static inline u32 rt_score(struct rtable *rt)
 {
-	u32 score = jiffies - rt->u.dst.lastuse;
+	u32 score = jiffies - rt->dst.lastuse;
 
 	score = ~score & ~(3<<30);
 
@@ -700,12 +700,12 @@ static inline int compare_keys(struct flowi *fl1, struct flowi *fl2)
 
 static inline int compare_netns(struct rtable *rt1, struct rtable *rt2)
 {
-	return net_eq(dev_net(rt1->u.dst.dev), dev_net(rt2->u.dst.dev));
+	return net_eq(dev_net(rt1->dst.dev), dev_net(rt2->dst.dev));
 }
 
 static inline int rt_is_expired(struct rtable *rth)
 {
-	return rth->rt_genid != rt_genid(dev_net(rth->u.dst.dev));
+	return rth->rt_genid != rt_genid(dev_net(rth->dst.dev));
 }
 
 /*
@@ -734,7 +734,7 @@ static void rt_do_flush(int process_context)
 		rth = rt_hash_table[i].chain;
 
 		/* defer releasing the head of the list after spin_unlock */
-		for (tail = rth; tail; tail = tail->u.dst.rt_next)
+		for (tail = rth; tail; tail = tail->dst.rt_next)
 			if (!rt_is_expired(tail))
 				break;
 		if (rth != tail)
@@ -743,9 +743,9 @@ static void rt_do_flush(int process_context)
 		/* call rt_free on entries after the tail requiring flush */
 		prev = &rt_hash_table[i].chain;
 		for (p = *prev; p; p = next) {
-			next = p->u.dst.rt_next;
+			next = p->dst.rt_next;
 			if (!rt_is_expired(p)) {
-				prev = &p->u.dst.rt_next;
+				prev = &p->dst.rt_next;
 			} else {
 				*prev = next;
 				rt_free(p);
@@ -760,7 +760,7 @@ static void rt_do_flush(int process_context)
 		spin_unlock_bh(rt_hash_lock_addr(i));
 
 		for (; rth != tail; rth = next) {
-			next = rth->u.dst.rt_next;
+			next = rth->dst.rt_next;
 			rt_free(rth);
 		}
 	}
@@ -791,7 +791,7 @@ static int has_noalias(const struct rtable *head, const struct rtable *rth)
 	while (aux != rth) {
 		if (compare_hash_inputs(&aux->fl, &rth->fl))
 			return 0;
-		aux = aux->u.dst.rt_next;
+		aux = aux->dst.rt_next;
 	}
 	return ONE;
 }
@@ -831,18 +831,18 @@ static void rt_check_expire(void)
 		length = 0;
 		spin_lock_bh(rt_hash_lock_addr(i));
 		while ((rth = *rthp) != NULL) {
-			prefetch(rth->u.dst.rt_next);
+			prefetch(rth->dst.rt_next);
 			if (rt_is_expired(rth)) {
-				*rthp = rth->u.dst.rt_next;
+				*rthp = rth->dst.rt_next;
 				rt_free(rth);
 				continue;
 			}
-			if (rth->u.dst.expires) {
+			if (rth->dst.expires) {
 				/* Entry is expired even if it is in use */
-				if (time_before_eq(jiffies, rth->u.dst.expires)) {
+				if (time_before_eq(jiffies, rth->dst.expires)) {
 nofree:
 					tmo >>= 1;
-					rthp = &rth->u.dst.rt_next;
+					rthp = &rth->dst.rt_next;
 					/*
 					 * We only count entries on
 					 * a chain with equal hash inputs once
@@ -858,7 +858,7 @@ nofree:
 				goto nofree;
 
 			/* Cleanup aged off entries. */
-			*rthp = rth->u.dst.rt_next;
+			*rthp = rth->dst.rt_next;
 			rt_free(rth);
 		}
 		spin_unlock_bh(rt_hash_lock_addr(i));
@@ -999,10 +999,10 @@ static int rt_garbage_collect(struct dst_ops *ops)
 				if (!rt_is_expired(rth) &&
 					!rt_may_expire(rth, tmo, expire)) {
 					tmo >>= 1;
-					rthp = &rth->u.dst.rt_next;
+					rthp = &rth->dst.rt_next;
 					continue;
 				}
-				*rthp = rth->u.dst.rt_next;
+				*rthp = rth->dst.rt_next;
 				rt_free(rth);
 				goal--;
 			}
@@ -1068,7 +1068,7 @@ static int slow_chain_length(const struct rtable *head)
 
 	while (rth) {
 		length += has_noalias(head, rth);
-		rth = rth->u.dst.rt_next;
+		rth = rth->dst.rt_next;
 	}
 	return length >> FRACT_BITS;
 }
@@ -1090,7 +1090,7 @@ restart:
 	candp = NULL;
 	now = jiffies;
 
-	if (!rt_caching(dev_net(rt->u.dst.dev))) {
+	if (!rt_caching(dev_net(rt->dst.dev))) {
 		/*
 		 * If we're not caching, just tell the caller we
 		 * were successful and don't touch the route.  The
@@ -1108,7 +1108,7 @@ restart:
 		 */
 
 		if (rt->rt_type == RTN_UNICAST || rt->fl.iif == 0) {
-			int err = arp_bind_neighbour(&rt->u.dst);
+			int err = arp_bind_neighbour(&rt->dst);
 			if (err) {
 				if (net_ratelimit())
 					printk(KERN_WARNING
@@ -1127,19 +1127,19 @@ restart:
 	spin_lock_bh(rt_hash_lock_addr(hash));
 	while ((rth = *rthp) != NULL) {
 		if (rt_is_expired(rth)) {
-			*rthp = rth->u.dst.rt_next;
+			*rthp = rth->dst.rt_next;
 			rt_free(rth);
 			continue;
 		}
 		if (compare_keys(&rth->fl, &rt->fl) && compare_netns(rth, rt)) {
 			/* Put it first */
-			*rthp = rth->u.dst.rt_next;
+			*rthp = rth->dst.rt_next;
 			/*
 			 * Since lookup is lockfree, the deletion
 			 * must be visible to another weakly ordered CPU before
 			 * the insertion at the start of the hash chain.
 			 */
-			rcu_assign_pointer(rth->u.dst.rt_next,
+			rcu_assign_pointer(rth->dst.rt_next,
 					   rt_hash_table[hash].chain);
 			/*
 			 * Since lookup is lockfree, the update writes
@@ -1147,18 +1147,18 @@ restart:
 			 */
 			rcu_assign_pointer(rt_hash_table[hash].chain, rth);
 
-			dst_use(&rth->u.dst, now);
+			dst_use(&rth->dst, now);
 			spin_unlock_bh(rt_hash_lock_addr(hash));
 
 			rt_drop(rt);
 			if (rp)
 				*rp = rth;
 			else
-				skb_dst_set(skb, &rth->u.dst);
+				skb_dst_set(skb, &rth->dst);
 			return 0;
 		}
 
-		if (!atomic_read(&rth->u.dst.__refcnt)) {
+		if (!atomic_read(&rth->dst.__refcnt)) {
 			u32 score = rt_score(rth);
 
 			if (score <= min_score) {
@@ -1170,7 +1170,7 @@ restart:
 
 		chain_length++;
 
-		rthp = &rth->u.dst.rt_next;
+		rthp = &rth->dst.rt_next;
 	}
 
 	if (cand) {
@@ -1181,17 +1181,17 @@ restart:
 		 * only 2 entries per bucket. We will see.
 		 */
 		if (chain_length > ip_rt_gc_elasticity) {
-			*candp = cand->u.dst.rt_next;
+			*candp = cand->dst.rt_next;
 			rt_free(cand);
 		}
 	} else {
 		if (chain_length > rt_chain_length_max &&
 		    slow_chain_length(rt_hash_table[hash].chain) > rt_chain_length_max) {
-			struct net *net = dev_net(rt->u.dst.dev);
+			struct net *net = dev_net(rt->dst.dev);
 			int num = ++net->ipv4.current_rt_cache_rebuild_count;
 			if (!rt_caching(net)) {
 				printk(KERN_WARNING "%s: %d rebuilds is over limit, route caching disabled\n",
-					rt->u.dst.dev->name, num);
+					rt->dst.dev->name, num);
 			}
 			rt_emergency_hash_rebuild(net);
 			spin_unlock_bh(rt_hash_lock_addr(hash));
@@ -1206,7 +1206,7 @@ restart:
 	   route or unicast forwarding path.
 	 */
 	if (rt->rt_type == RTN_UNICAST || rt->fl.iif == 0) {
-		int err = arp_bind_neighbour(&rt->u.dst);
+		int err = arp_bind_neighbour(&rt->dst);
 		if (err) {
 			spin_unlock_bh(rt_hash_lock_addr(hash));
 
@@ -1237,14 +1237,14 @@ restart:
 		}
 	}
 
-	rt->u.dst.rt_next = rt_hash_table[hash].chain;
+	rt->dst.rt_next = rt_hash_table[hash].chain;
 
 #if RT_CACHE_DEBUG >= 2
-	if (rt->u.dst.rt_next) {
+	if (rt->dst.rt_next) {
 		struct rtable *trt;
 		printk(KERN_DEBUG "rt_cache @%02x: %pI4",
 		       hash, &rt->rt_dst);
-		for (trt = rt->u.dst.rt_next; trt; trt = trt->u.dst.rt_next)
+		for (trt = rt->dst.rt_next; trt; trt = trt->dst.rt_next)
 			printk(" . %pI4", &trt->rt_dst);
 		printk("\n");
 	}
@@ -1262,7 +1262,7 @@ skip_hashing:
 	if (rp)
 		*rp = rt;
 	else
-		skb_dst_set(skb, &rt->u.dst);
+		skb_dst_set(skb, &rt->dst);
 	return 0;
 }
 
@@ -1334,11 +1334,11 @@ static void rt_del(unsigned hash, struct rtable *rt)
 	ip_rt_put(rt);
 	while ((aux = *rthp) != NULL) {
 		if (aux == rt || rt_is_expired(aux)) {
-			*rthp = aux->u.dst.rt_next;
+			*rthp = aux->dst.rt_next;
 			rt_free(aux);
 			continue;
 		}
-		rthp = &aux->u.dst.rt_next;
+		rthp = &aux->dst.rt_next;
 	}
 	spin_unlock_bh(rt_hash_lock_addr(hash));
 }
@@ -1392,19 +1392,19 @@ void ip_rt_redirect(__be32 old_gw, __be32 daddr, __be32 new_gw,
 				    rth->fl.oif != ikeys[k] ||
 				    rth->fl.iif != 0 ||
 				    rt_is_expired(rth) ||
-				    !net_eq(dev_net(rth->u.dst.dev), net)) {
-					rthp = &rth->u.dst.rt_next;
+				    !net_eq(dev_net(rth->dst.dev), net)) {
+					rthp = &rth->dst.rt_next;
 					continue;
 				}
 
 				if (rth->rt_dst != daddr ||
 				    rth->rt_src != saddr ||
-				    rth->u.dst.error ||
+				    rth->dst.error ||
 				    rth->rt_gateway != old_gw ||
-				    rth->u.dst.dev != dev)
+				    rth->dst.dev != dev)
 					break;
 
-				dst_hold(&rth->u.dst);
+				dst_hold(&rth->dst);
 
 				rt = dst_alloc(&ipv4_dst_ops);
 				if (rt == NULL) {
@@ -1414,20 +1414,20 @@ void ip_rt_redirect(__be32 old_gw, __be32 daddr, __be32 new_gw,
 
 				/* Copy all the information. */
 				*rt = *rth;
-				rt->u.dst.__use		= 1;
-				atomic_set(&rt->u.dst.__refcnt, 1);
-				rt->u.dst.child		= NULL;
-				if (rt->u.dst.dev)
-					dev_hold(rt->u.dst.dev);
+				rt->dst.__use		= 1;
+				atomic_set(&rt->dst.__refcnt, 1);
+				rt->dst.child		= NULL;
+				if (rt->dst.dev)
+					dev_hold(rt->dst.dev);
 				if (rt->idev)
 					in_dev_hold(rt->idev);
-				rt->u.dst.obsolete	= -1;
-				rt->u.dst.lastuse	= jiffies;
-				rt->u.dst.path		= &rt->u.dst;
-				rt->u.dst.neighbour	= NULL;
-				rt->u.dst.hh		= NULL;
+				rt->dst.obsolete	= -1;
+				rt->dst.lastuse	= jiffies;
+				rt->dst.path		= &rt->dst;
+				rt->dst.neighbour	= NULL;
+				rt->dst.hh		= NULL;
 #ifdef CONFIG_XFRM
-				rt->u.dst.xfrm		= NULL;
+				rt->dst.xfrm		= NULL;
 #endif
 				rt->rt_genid		= rt_genid(net);
 				rt->rt_flags		|= RTCF_REDIRECTED;
@@ -1436,23 +1436,23 @@ void ip_rt_redirect(__be32 old_gw, __be32 daddr, __be32 new_gw,
 				rt->rt_gateway		= new_gw;
 
 				/* Redirect received -> path was valid */
-				dst_confirm(&rth->u.dst);
+				dst_confirm(&rth->dst);
 
 				if (rt->peer)
 					atomic_inc(&rt->peer->refcnt);
 
-				if (arp_bind_neighbour(&rt->u.dst) ||
-				    !(rt->u.dst.neighbour->nud_state &
+				if (arp_bind_neighbour(&rt->dst) ||
+				    !(rt->dst.neighbour->nud_state &
 					    NUD_VALID)) {
-					if (rt->u.dst.neighbour)
-						neigh_event_send(rt->u.dst.neighbour, NULL);
+					if (rt->dst.neighbour)
+						neigh_event_send(rt->dst.neighbour, NULL);
 					ip_rt_put(rth);
 					rt_drop(rt);
 					goto do_next;
 				}
 
-				netevent.old = &rth->u.dst;
-				netevent.new = &rt->u.dst;
+				netevent.old = &rth->dst;
+				netevent.new = &rt->dst;
 				call_netevent_notifiers(NETEVENT_REDIRECT,
 							&netevent);
 
@@ -1488,8 +1488,8 @@ static struct dst_entry *ipv4_negative_advice(struct dst_entry *dst)
 			ip_rt_put(rt);
 			ret = NULL;
 		} else if ((rt->rt_flags & RTCF_REDIRECTED) ||
-			   (rt->u.dst.expires &&
-			    time_after_eq(jiffies, rt->u.dst.expires))) {
+			   (rt->dst.expires &&
+			    time_after_eq(jiffies, rt->dst.expires))) {
 			unsigned hash = rt_hash(rt->fl.fl4_dst, rt->fl.fl4_src,
 						rt->fl.oif,
 						rt_genid(dev_net(dst->dev)));
@@ -1527,7 +1527,7 @@ void ip_rt_send_redirect(struct sk_buff *skb)
 	int log_martians;
 
 	rcu_read_lock();
-	in_dev = __in_dev_get_rcu(rt->u.dst.dev);
+	in_dev = __in_dev_get_rcu(rt->dst.dev);
 	if (!in_dev || !IN_DEV_TX_REDIRECTS(in_dev)) {
 		rcu_read_unlock();
 		return;
@@ -1538,30 +1538,30 @@ void ip_rt_send_redirect(struct sk_buff *skb)
 	/* No redirected packets during ip_rt_redirect_silence;
 	 * reset the algorithm.
 	 */
-	if (time_after(jiffies, rt->u.dst.rate_last + ip_rt_redirect_silence))
-		rt->u.dst.rate_tokens = 0;
+	if (time_after(jiffies, rt->dst.rate_last + ip_rt_redirect_silence))
+		rt->dst.rate_tokens = 0;
 
 	/* Too many ignored redirects; do not send anything
-	 * set u.dst.rate_last to the last seen redirected packet.
+	 * set dst.rate_last to the last seen redirected packet.
 	 */
-	if (rt->u.dst.rate_tokens >= ip_rt_redirect_number) {
-		rt->u.dst.rate_last = jiffies;
+	if (rt->dst.rate_tokens >= ip_rt_redirect_number) {
+		rt->dst.rate_last = jiffies;
 		return;
 	}
 
 	/* Check for load limit; set rate_last to the latest sent
 	 * redirect.
 	 */
-	if (rt->u.dst.rate_tokens == 0 ||
+	if (rt->dst.rate_tokens == 0 ||
 	    time_after(jiffies,
-		       (rt->u.dst.rate_last +
-			(ip_rt_redirect_load << rt->u.dst.rate_tokens)))) {
+		       (rt->dst.rate_last +
+			(ip_rt_redirect_load << rt->dst.rate_tokens)))) {
 		icmp_send(skb, ICMP_REDIRECT, ICMP_REDIR_HOST, rt->rt_gateway);
-		rt->u.dst.rate_last = jiffies;
-		++rt->u.dst.rate_tokens;
+		rt->dst.rate_last = jiffies;
+		++rt->dst.rate_tokens;
 #ifdef CONFIG_IP_ROUTE_VERBOSE
 		if (log_martians &&
-		    rt->u.dst.rate_tokens == ip_rt_redirect_number &&
+		    rt->dst.rate_tokens == ip_rt_redirect_number &&
 		    net_ratelimit())
 			printk(KERN_WARNING "host %pI4/if%d ignores redirects for %pI4 to %pI4.\n",
 				&rt->rt_src, rt->rt_iif,
@@ -1576,7 +1576,7 @@ static int ip_error(struct sk_buff *skb)
 	unsigned long now;
 	int code;
 
-	switch (rt->u.dst.error) {
+	switch (rt->dst.error) {
 		case EINVAL:
 		default:
 			goto out;
@@ -1585,7 +1585,7 @@ static int ip_error(struct sk_buff *skb)
 			break;
 		case ENETUNREACH:
 			code = ICMP_NET_UNREACH;
-			IP_INC_STATS_BH(dev_net(rt->u.dst.dev),
+			IP_INC_STATS_BH(dev_net(rt->dst.dev),
 					IPSTATS_MIB_INNOROUTES);
 			break;
 		case EACCES:
@@ -1594,12 +1594,12 @@ static int ip_error(struct sk_buff *skb)
 	}
 
 	now = jiffies;
-	rt->u.dst.rate_tokens += now - rt->u.dst.rate_last;
-	if (rt->u.dst.rate_tokens > ip_rt_error_burst)
-		rt->u.dst.rate_tokens = ip_rt_error_burst;
-	rt->u.dst.rate_last = now;
-	if (rt->u.dst.rate_tokens >= ip_rt_error_cost) {
-		rt->u.dst.rate_tokens -= ip_rt_error_cost;
+	rt->dst.rate_tokens += now - rt->dst.rate_last;
+	if (rt->dst.rate_tokens > ip_rt_error_burst)
+		rt->dst.rate_tokens = ip_rt_error_burst;
+	rt->dst.rate_last = now;
+	if (rt->dst.rate_tokens >= ip_rt_error_cost) {
+		rt->dst.rate_tokens -= ip_rt_error_cost;
 		icmp_send(skb, ICMP_DEST_UNREACH, code, 0);
 	}
 
@@ -1644,7 +1644,7 @@ unsigned short ip_rt_frag_needed(struct net *net, struct iphdr *iph,
 
 			rcu_read_lock();
 			for (rth = rcu_dereference(rt_hash_table[hash].chain); rth;
-			     rth = rcu_dereference(rth->u.dst.rt_next)) {
+			     rth = rcu_dereference(rth->dst.rt_next)) {
 				unsigned short mtu = new_mtu;
 
 				if (rth->fl.fl4_dst != daddr ||
@@ -1653,8 +1653,8 @@ unsigned short ip_rt_frag_needed(struct net *net, struct iphdr *iph,
 				    rth->rt_src != iph->saddr ||
 				    rth->fl.oif != ikeys[k] ||
 				    rth->fl.iif != 0 ||
-				    dst_metric_locked(&rth->u.dst, RTAX_MTU) ||
-				    !net_eq(dev_net(rth->u.dst.dev), net) ||
+				    dst_metric_locked(&rth->dst, RTAX_MTU) ||
+				    !net_eq(dev_net(rth->dst.dev), net) ||
 				    rt_is_expired(rth))
 					continue;
 
@@ -1662,22 +1662,22 @@ unsigned short ip_rt_frag_needed(struct net *net, struct iphdr *iph,
 
 					/* BSD 4.2 compatibility hack :-( */
 					if (mtu == 0 &&
-					    old_mtu >= dst_mtu(&rth->u.dst) &&
+					    old_mtu >= dst_mtu(&rth->dst) &&
 					    old_mtu >= 68 + (iph->ihl << 2))
 						old_mtu -= iph->ihl << 2;
 
 					mtu = guess_mtu(old_mtu);
 				}
-				if (mtu <= dst_mtu(&rth->u.dst)) {
-					if (mtu < dst_mtu(&rth->u.dst)) {
-						dst_confirm(&rth->u.dst);
+				if (mtu <= dst_mtu(&rth->dst)) {
+					if (mtu < dst_mtu(&rth->dst)) {
+						dst_confirm(&rth->dst);
 						if (mtu < ip_rt_min_pmtu) {
 							mtu = ip_rt_min_pmtu;
-							rth->u.dst.metrics[RTAX_LOCK-1] |=
+							rth->dst.metrics[RTAX_LOCK-1] |=
 								(1 << RTAX_MTU);
 						}
-						rth->u.dst.metrics[RTAX_MTU-1] = mtu;
-						dst_set_expires(&rth->u.dst,
+						rth->dst.metrics[RTAX_MTU-1] = mtu;
+						dst_set_expires(&rth->dst,
 							ip_rt_mtu_expires);
 					}
 					est_mtu = mtu;
@@ -1750,7 +1750,7 @@ static void ipv4_link_failure(struct sk_buff *skb)
 
 	rt = skb_rtable(skb);
 	if (rt)
-		dst_set_expires(&rt->u.dst, 0);
+		dst_set_expires(&rt->dst, 0);
 }
 
 static int ip_rt_bug(struct sk_buff *skb)
@@ -1778,11 +1778,11 @@ void ip_rt_get_source(u8 *addr, struct rtable *rt)
 
 	if (rt->fl.iif == 0)
 		src = rt->rt_src;
-	else if (fib_lookup(dev_net(rt->u.dst.dev), &rt->fl, &res) == 0) {
+	else if (fib_lookup(dev_net(rt->dst.dev), &rt->fl, &res) == 0) {
 		src = FIB_RES_PREFSRC(res);
 		fib_res_put(&res);
 	} else
-		src = inet_select_addr(rt->u.dst.dev, rt->rt_gateway,
+		src = inet_select_addr(rt->dst.dev, rt->rt_gateway,
 					RT_SCOPE_UNIVERSE);
 	memcpy(addr, &src, 4);
 }
@@ -1790,10 +1790,10 @@ void ip_rt_get_source(u8 *addr, struct rtable *rt)
 #ifdef CONFIG_NET_CLS_ROUTE
 static void set_class_tag(struct rtable *rt, u32 tag)
 {
-	if (!(rt->u.dst.tclassid & 0xFFFF))
-		rt->u.dst.tclassid |= tag & 0xFFFF;
-	if (!(rt->u.dst.tclassid & 0xFFFF0000))
-		rt->u.dst.tclassid |= tag & 0xFFFF0000;
+	if (!(rt->dst.tclassid & 0xFFFF))
+		rt->dst.tclassid |= tag & 0xFFFF;
+	if (!(rt->dst.tclassid & 0xFFFF0000))
+		rt->dst.tclassid |= tag & 0xFFFF0000;
 }
 #endif
 
@@ -1805,30 +1805,30 @@ static void rt_set_nexthop(struct rtable *rt, struct fib_result *res, u32 itag)
 		if (FIB_RES_GW(*res) &&
 		    FIB_RES_NH(*res).nh_scope == RT_SCOPE_LINK)
 			rt->rt_gateway = FIB_RES_GW(*res);
-		memcpy(rt->u.dst.metrics, fi->fib_metrics,
-		       sizeof(rt->u.dst.metrics));
+		memcpy(rt->dst.metrics, fi->fib_metrics,
+		       sizeof(rt->dst.metrics));
 		if (fi->fib_mtu == 0) {
-			rt->u.dst.metrics[RTAX_MTU-1] = rt->u.dst.dev->mtu;
-			if (dst_metric_locked(&rt->u.dst, RTAX_MTU) &&
+			rt->dst.metrics[RTAX_MTU-1] = rt->dst.dev->mtu;
+			if (dst_metric_locked(&rt->dst, RTAX_MTU) &&
 			    rt->rt_gateway != rt->rt_dst &&
-			    rt->u.dst.dev->mtu > 576)
-				rt->u.dst.metrics[RTAX_MTU-1] = 576;
+			    rt->dst.dev->mtu > 576)
+				rt->dst.metrics[RTAX_MTU-1] = 576;
 		}
 #ifdef CONFIG_NET_CLS_ROUTE
-		rt->u.dst.tclassid = FIB_RES_NH(*res).nh_tclassid;
+		rt->dst.tclassid = FIB_RES_NH(*res).nh_tclassid;
 #endif
 	} else
-		rt->u.dst.metrics[RTAX_MTU-1]= rt->u.dst.dev->mtu;
-
-	if (dst_metric(&rt->u.dst, RTAX_HOPLIMIT) == 0)
-		rt->u.dst.metrics[RTAX_HOPLIMIT-1] = sysctl_ip_default_ttl;
-	if (dst_mtu(&rt->u.dst) > IP_MAX_MTU)
-		rt->u.dst.metrics[RTAX_MTU-1] = IP_MAX_MTU;
-	if (dst_metric(&rt->u.dst, RTAX_ADVMSS) == 0)
-		rt->u.dst.metrics[RTAX_ADVMSS-1] = max_t(unsigned int, rt->u.dst.dev->mtu - 40,
+		rt->dst.metrics[RTAX_MTU-1]= rt->dst.dev->mtu;
+
+	if (dst_metric(&rt->dst, RTAX_HOPLIMIT) == 0)
+		rt->dst.metrics[RTAX_HOPLIMIT-1] = sysctl_ip_default_ttl;
+	if (dst_mtu(&rt->dst) > IP_MAX_MTU)
+		rt->dst.metrics[RTAX_MTU-1] = IP_MAX_MTU;
+	if (dst_metric(&rt->dst, RTAX_ADVMSS) == 0)
+		rt->dst.metrics[RTAX_ADVMSS-1] = max_t(unsigned int, rt->dst.dev->mtu - 40,
 				       ip_rt_min_advmss);
-	if (dst_metric(&rt->u.dst, RTAX_ADVMSS) > 65535 - 40)
-		rt->u.dst.metrics[RTAX_ADVMSS-1] = 65535 - 40;
+	if (dst_metric(&rt->dst, RTAX_ADVMSS) > 65535 - 40)
+		rt->dst.metrics[RTAX_ADVMSS-1] = 65535 - 40;
 
 #ifdef CONFIG_NET_CLS_ROUTE
 #ifdef CONFIG_IP_MULTIPLE_TABLES
@@ -1873,13 +1873,13 @@ static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr,
 	if (!rth)
 		goto e_nobufs;
 
-	rth->u.dst.output = ip_rt_bug;
-	rth->u.dst.obsolete = -1;
+	rth->dst.output = ip_rt_bug;
+	rth->dst.obsolete = -1;
 
-	atomic_set(&rth->u.dst.__refcnt, 1);
-	rth->u.dst.flags= DST_HOST;
+	atomic_set(&rth->dst.__refcnt, 1);
+	rth->dst.flags= DST_HOST;
 	if (IN_DEV_CONF_GET(in_dev, NOPOLICY))
-		rth->u.dst.flags |= DST_NOPOLICY;
+		rth->dst.flags |= DST_NOPOLICY;
 	rth->fl.fl4_dst	= daddr;
 	rth->rt_dst	= daddr;
 	rth->fl.fl4_tos	= tos;
@@ -1887,13 +1887,13 @@ static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr,
 	rth->fl.fl4_src	= saddr;
 	rth->rt_src	= saddr;
 #ifdef CONFIG_NET_CLS_ROUTE
-	rth->u.dst.tclassid = itag;
+	rth->dst.tclassid = itag;
 #endif
 	rth->rt_iif	=
 	rth->fl.iif	= dev->ifindex;
-	rth->u.dst.dev	= init_net.loopback_dev;
-	dev_hold(rth->u.dst.dev);
-	rth->idev	= in_dev_get(rth->u.dst.dev);
+	rth->dst.dev	= init_net.loopback_dev;
+	dev_hold(rth->dst.dev);
+	rth->idev	= in_dev_get(rth->dst.dev);
 	rth->fl.oif	= 0;
 	rth->rt_gateway	= daddr;
 	rth->rt_spec_dst= spec_dst;
@@ -1901,13 +1901,13 @@ static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr,
 	rth->rt_flags	= RTCF_MULTICAST;
 	rth->rt_type	= RTN_MULTICAST;
 	if (our) {
-		rth->u.dst.input= ip_local_deliver;
+		rth->dst.input= ip_local_deliver;
 		rth->rt_flags |= RTCF_LOCAL;
 	}
 
 #ifdef CONFIG_IP_MROUTE
 	if (!ipv4_is_local_multicast(daddr) && IN_DEV_MFORWARD(in_dev))
-		rth->u.dst.input = ip_mr_input;
+		rth->dst.input = ip_mr_input;
 #endif
 	RT_CACHE_STAT_INC(in_slow_mc);
 
@@ -2016,12 +2016,12 @@ static int __mkroute_input(struct sk_buff *skb,
 		goto cleanup;
 	}
 
-	atomic_set(&rth->u.dst.__refcnt, 1);
-	rth->u.dst.flags= DST_HOST;
+	atomic_set(&rth->dst.__refcnt, 1);
+	rth->dst.flags= DST_HOST;
 	if (IN_DEV_CONF_GET(in_dev, NOPOLICY))
-		rth->u.dst.flags |= DST_NOPOLICY;
+		rth->dst.flags |= DST_NOPOLICY;
 	if (IN_DEV_CONF_GET(out_dev, NOXFRM))
-		rth->u.dst.flags |= DST_NOXFRM;
+		rth->dst.flags |= DST_NOXFRM;
 	rth->fl.fl4_dst	= daddr;
 	rth->rt_dst	= daddr;
 	rth->fl.fl4_tos	= tos;
@@ -2031,16 +2031,16 @@ static int __mkroute_input(struct sk_buff *skb,
 	rth->rt_gateway	= daddr;
 	rth->rt_iif 	=
 		rth->fl.iif	= in_dev->dev->ifindex;
-	rth->u.dst.dev	= (out_dev)->dev;
-	dev_hold(rth->u.dst.dev);
-	rth->idev	= in_dev_get(rth->u.dst.dev);
+	rth->dst.dev	= (out_dev)->dev;
+	dev_hold(rth->dst.dev);
+	rth->idev	= in_dev_get(rth->dst.dev);
 	rth->fl.oif 	= 0;
 	rth->rt_spec_dst= spec_dst;
 
-	rth->u.dst.obsolete = -1;
-	rth->u.dst.input = ip_forward;
-	rth->u.dst.output = ip_output;
-	rth->rt_genid = rt_genid(dev_net(rth->u.dst.dev));
+	rth->dst.obsolete = -1;
+	rth->dst.input = ip_forward;
+	rth->dst.output = ip_output;
+	rth->rt_genid = rt_genid(dev_net(rth->dst.dev));
 
 	rt_set_nexthop(rth, res, itag);
 
@@ -2074,7 +2074,7 @@ static int ip_mkroute_input(struct sk_buff *skb,
 
 	/* put it into the cache */
 	hash = rt_hash(daddr, saddr, fl->iif,
-		       rt_genid(dev_net(rth->u.dst.dev)));
+		       rt_genid(dev_net(rth->dst.dev)));
 	return rt_intern_hash(hash, rth, NULL, skb, fl->iif);
 }
 
@@ -2197,14 +2197,14 @@ local_input:
 	if (!rth)
 		goto e_nobufs;
 
-	rth->u.dst.output= ip_rt_bug;
-	rth->u.dst.obsolete = -1;
+	rth->dst.output= ip_rt_bug;
+	rth->dst.obsolete = -1;
 	rth->rt_genid = rt_genid(net);
 
-	atomic_set(&rth->u.dst.__refcnt, 1);
-	rth->u.dst.flags= DST_HOST;
+	atomic_set(&rth->dst.__refcnt, 1);
+	rth->dst.flags= DST_HOST;
 	if (IN_DEV_CONF_GET(in_dev, NOPOLICY))
-		rth->u.dst.flags |= DST_NOPOLICY;
+		rth->dst.flags |= DST_NOPOLICY;
 	rth->fl.fl4_dst	= daddr;
 	rth->rt_dst	= daddr;
 	rth->fl.fl4_tos	= tos;
@@ -2212,20 +2212,20 @@ local_input:
 	rth->fl.fl4_src	= saddr;
 	rth->rt_src	= saddr;
 #ifdef CONFIG_NET_CLS_ROUTE
-	rth->u.dst.tclassid = itag;
+	rth->dst.tclassid = itag;
 #endif
 	rth->rt_iif	=
 	rth->fl.iif	= dev->ifindex;
-	rth->u.dst.dev	= net->loopback_dev;
-	dev_hold(rth->u.dst.dev);
-	rth->idev	= in_dev_get(rth->u.dst.dev);
+	rth->dst.dev	= net->loopback_dev;
+	dev_hold(rth->dst.dev);
+	rth->idev	= in_dev_get(rth->dst.dev);
 	rth->rt_gateway	= daddr;
 	rth->rt_spec_dst= spec_dst;
-	rth->u.dst.input= ip_local_deliver;
+	rth->dst.input= ip_local_deliver;
 	rth->rt_flags 	= flags|RTCF_LOCAL;
 	if (res.type == RTN_UNREACHABLE) {
-		rth->u.dst.input= ip_error;
-		rth->u.dst.error= -err;
+		rth->dst.input= ip_error;
+		rth->dst.error= -err;
 		rth->rt_flags 	&= ~RTCF_LOCAL;
 	}
 	rth->rt_type	= res.type;
@@ -2291,21 +2291,21 @@ int ip_route_input_common(struct sk_buff *skb, __be32 daddr, __be32 saddr,
 	hash = rt_hash(daddr, saddr, iif, rt_genid(net));
 
 	for (rth = rcu_dereference(rt_hash_table[hash].chain); rth;
-	     rth = rcu_dereference(rth->u.dst.rt_next)) {
+	     rth = rcu_dereference(rth->dst.rt_next)) {
 		if ((((__force u32)rth->fl.fl4_dst ^ (__force u32)daddr) |
 		     ((__force u32)rth->fl.fl4_src ^ (__force u32)saddr) |
 		     (rth->fl.iif ^ iif) |
 		     rth->fl.oif |
 		     (rth->fl.fl4_tos ^ tos)) == 0 &&
 		    rth->fl.mark == skb->mark &&
-		    net_eq(dev_net(rth->u.dst.dev), net) &&
+		    net_eq(dev_net(rth->dst.dev), net) &&
 		    !rt_is_expired(rth)) {
 			if (noref) {
-				dst_use_noref(&rth->u.dst, jiffies);
-				skb_dst_set_noref(skb, &rth->u.dst);
+				dst_use_noref(&rth->dst, jiffies);
+				skb_dst_set_noref(skb, &rth->dst);
 			} else {
-				dst_use(&rth->u.dst, jiffies);
-				skb_dst_set(skb, &rth->u.dst);
+				dst_use(&rth->dst, jiffies);
+				skb_dst_set(skb, &rth->dst);
 			}
 			RT_CACHE_STAT_INC(in_hit);
 			rcu_read_unlock();
@@ -2412,12 +2412,12 @@ static int __mkroute_output(struct rtable **result,
 		goto cleanup;
 	}
 
-	atomic_set(&rth->u.dst.__refcnt, 1);
-	rth->u.dst.flags= DST_HOST;
+	atomic_set(&rth->dst.__refcnt, 1);
+	rth->dst.flags= DST_HOST;
 	if (IN_DEV_CONF_GET(in_dev, NOXFRM))
-		rth->u.dst.flags |= DST_NOXFRM;
+		rth->dst.flags |= DST_NOXFRM;
 	if (IN_DEV_CONF_GET(in_dev, NOPOLICY))
-		rth->u.dst.flags |= DST_NOPOLICY;
+		rth->dst.flags |= DST_NOPOLICY;
 
 	rth->fl.fl4_dst	= oldflp->fl4_dst;
 	rth->fl.fl4_tos	= tos;
@@ -2429,35 +2429,35 @@ static int __mkroute_output(struct rtable **result,
 	rth->rt_iif	= oldflp->oif ? : dev_out->ifindex;
 	/* get references to the devices that are to be hold by the routing
 	   cache entry */
-	rth->u.dst.dev	= dev_out;
+	rth->dst.dev	= dev_out;
 	dev_hold(dev_out);
 	rth->idev	= in_dev_get(dev_out);
 	rth->rt_gateway = fl->fl4_dst;
 	rth->rt_spec_dst= fl->fl4_src;
 
-	rth->u.dst.output=ip_output;
-	rth->u.dst.obsolete = -1;
+	rth->dst.output=ip_output;
+	rth->dst.obsolete = -1;
 	rth->rt_genid = rt_genid(dev_net(dev_out));
 
 	RT_CACHE_STAT_INC(out_slow_tot);
 
 	if (flags & RTCF_LOCAL) {
-		rth->u.dst.input = ip_local_deliver;
+		rth->dst.input = ip_local_deliver;
 		rth->rt_spec_dst = fl->fl4_dst;
 	}
 	if (flags & (RTCF_BROADCAST | RTCF_MULTICAST)) {
 		rth->rt_spec_dst = fl->fl4_src;
 		if (flags & RTCF_LOCAL &&
 		    !(dev_out->flags & IFF_LOOPBACK)) {
-			rth->u.dst.output = ip_mc_output;
+			rth->dst.output = ip_mc_output;
 			RT_CACHE_STAT_INC(out_slow_mc);
 		}
 #ifdef CONFIG_IP_MROUTE
 		if (res->type == RTN_MULTICAST) {
 			if (IN_DEV_MFORWARD(in_dev) &&
 			    !ipv4_is_local_multicast(oldflp->fl4_dst)) {
-				rth->u.dst.input = ip_mr_input;
-				rth->u.dst.output = ip_mc_output;
+				rth->dst.input = ip_mr_input;
+				rth->dst.output = ip_mc_output;
 			}
 		}
 #endif
@@ -2712,7 +2712,7 @@ int __ip_route_output_key(struct net *net, struct rtable **rp,
 
 	rcu_read_lock_bh();
 	for (rth = rcu_dereference_bh(rt_hash_table[hash].chain); rth;
-		rth = rcu_dereference_bh(rth->u.dst.rt_next)) {
+		rth = rcu_dereference_bh(rth->dst.rt_next)) {
 		if (rth->fl.fl4_dst == flp->fl4_dst &&
 		    rth->fl.fl4_src == flp->fl4_src &&
 		    rth->fl.iif == 0 &&
@@ -2720,9 +2720,9 @@ int __ip_route_output_key(struct net *net, struct rtable **rp,
 		    rth->fl.mark == flp->mark &&
 		    !((rth->fl.fl4_tos ^ flp->fl4_tos) &
 			    (IPTOS_RT_MASK | RTO_ONLINK)) &&
-		    net_eq(dev_net(rth->u.dst.dev), net) &&
+		    net_eq(dev_net(rth->dst.dev), net) &&
 		    !rt_is_expired(rth)) {
-			dst_use(&rth->u.dst, jiffies);
+			dst_use(&rth->dst, jiffies);
 			RT_CACHE_STAT_INC(out_hit);
 			rcu_read_unlock_bh();
 			*rp = rth;
@@ -2759,15 +2759,15 @@ static int ipv4_dst_blackhole(struct net *net, struct rtable **rp, struct flowi
 		dst_alloc(&ipv4_dst_blackhole_ops);
 
 	if (rt) {
-		struct dst_entry *new = &rt->u.dst;
+		struct dst_entry *new = &rt->dst;
 
 		atomic_set(&new->__refcnt, 1);
 		new->__use = 1;
 		new->input = dst_discard;
 		new->output = dst_discard;
-		memcpy(new->metrics, ort->u.dst.metrics, RTAX_MAX*sizeof(u32));
+		memcpy(new->metrics, ort->dst.metrics, RTAX_MAX*sizeof(u32));
 
-		new->dev = ort->u.dst.dev;
+		new->dev = ort->dst.dev;
 		if (new->dev)
 			dev_hold(new->dev);
 
@@ -2791,7 +2791,7 @@ static int ipv4_dst_blackhole(struct net *net, struct rtable **rp, struct flowi
 		dst_free(new);
 	}
 
-	dst_release(&(*rp)->u.dst);
+	dst_release(&(*rp)->dst);
 	*rp = rt;
 	return (rt ? 0 : -ENOMEM);
 }
@@ -2861,11 +2861,11 @@ static int rt_fill_info(struct net *net,
 		r->rtm_src_len = 32;
 		NLA_PUT_BE32(skb, RTA_SRC, rt->fl.fl4_src);
 	}
-	if (rt->u.dst.dev)
-		NLA_PUT_U32(skb, RTA_OIF, rt->u.dst.dev->ifindex);
+	if (rt->dst.dev)
+		NLA_PUT_U32(skb, RTA_OIF, rt->dst.dev->ifindex);
 #ifdef CONFIG_NET_CLS_ROUTE
-	if (rt->u.dst.tclassid)
-		NLA_PUT_U32(skb, RTA_FLOW, rt->u.dst.tclassid);
+	if (rt->dst.tclassid)
+		NLA_PUT_U32(skb, RTA_FLOW, rt->dst.tclassid);
 #endif
 	if (rt->fl.iif)
 		NLA_PUT_BE32(skb, RTA_PREFSRC, rt->rt_spec_dst);
@@ -2875,11 +2875,11 @@ static int rt_fill_info(struct net *net,
 	if (rt->rt_dst != rt->rt_gateway)
 		NLA_PUT_BE32(skb, RTA_GATEWAY, rt->rt_gateway);
 
-	if (rtnetlink_put_metrics(skb, rt->u.dst.metrics) < 0)
+	if (rtnetlink_put_metrics(skb, rt->dst.metrics) < 0)
 		goto nla_put_failure;
 
-	error = rt->u.dst.error;
-	expires = rt->u.dst.expires ? rt->u.dst.expires - jiffies : 0;
+	error = rt->dst.error;
+	expires = rt->dst.expires ? rt->dst.expires - jiffies : 0;
 	if (rt->peer) {
 		id = atomic_read(&rt->peer->ip_id_count) & 0xffff;
 		if (rt->peer->tcp_ts_stamp) {
@@ -2911,7 +2911,7 @@ static int rt_fill_info(struct net *net,
 			NLA_PUT_U32(skb, RTA_IIF, rt->fl.iif);
 	}
 
-	if (rtnl_put_cacheinfo(skb, &rt->u.dst, id, ts, tsage,
+	if (rtnl_put_cacheinfo(skb, &rt->dst, id, ts, tsage,
 			       expires, error) < 0)
 		goto nla_put_failure;
 
@@ -2976,8 +2976,8 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void
 		local_bh_enable();
 
 		rt = skb_rtable(skb);
-		if (err == 0 && rt->u.dst.error)
-			err = -rt->u.dst.error;
+		if (err == 0 && rt->dst.error)
+			err = -rt->dst.error;
 	} else {
 		struct flowi fl = {
 			.nl_u = {
@@ -2995,7 +2995,7 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void
 	if (err)
 		goto errout_free;
 
-	skb_dst_set(skb, &rt->u.dst);
+	skb_dst_set(skb, &rt->dst);
 	if (rtm->rtm_flags & RTM_F_NOTIFY)
 		rt->rt_flags |= RTCF_NOTIFY;
 
@@ -3031,12 +3031,12 @@ int ip_rt_dump(struct sk_buff *skb,  struct netlink_callback *cb)
 			continue;
 		rcu_read_lock_bh();
 		for (rt = rcu_dereference_bh(rt_hash_table[h].chain), idx = 0; rt;
-		     rt = rcu_dereference_bh(rt->u.dst.rt_next), idx++) {
-			if (!net_eq(dev_net(rt->u.dst.dev), net) || idx < s_idx)
+		     rt = rcu_dereference_bh(rt->dst.rt_next), idx++) {
+			if (!net_eq(dev_net(rt->dst.dev), net) || idx < s_idx)
 				continue;
 			if (rt_is_expired(rt))
 				continue;
-			skb_dst_set_noref(skb, &rt->u.dst);
+			skb_dst_set_noref(skb, &rt->dst);
 			if (rt_fill_info(net, skb, NETLINK_CB(cb->skb).pid,
 					 cb->nlh->nlmsg_seq, RTM_NEWROUTE,
 					 1, NLM_F_MULTI) <= 0) {
diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c
index 5c48124332de..02bef6aa8b30 100644
--- a/net/ipv4/syncookies.c
+++ b/net/ipv4/syncookies.c
@@ -354,15 +354,15 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb,
 	}
 
 	/* Try to redo what tcp_v4_send_synack did. */
-	req->window_clamp = tp->window_clamp ? :dst_metric(&rt->u.dst, RTAX_WINDOW);
+	req->window_clamp = tp->window_clamp ? :dst_metric(&rt->dst, RTAX_WINDOW);
 
 	tcp_select_initial_window(tcp_full_space(sk), req->mss,
 				  &req->rcv_wnd, &req->window_clamp,
 				  ireq->wscale_ok, &rcv_wscale,
-				  dst_metric(&rt->u.dst, RTAX_INITRWND));
+				  dst_metric(&rt->dst, RTAX_INITRWND));
 
 	ireq->rcv_wscale  = rcv_wscale;
 
-	ret = get_cookie_sock(sk, skb, req, &rt->u.dst);
+	ret = get_cookie_sock(sk, skb, req, &rt->dst);
 out:	return ret;
 }
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 7f976af27bf0..7f9515c0379f 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -237,7 +237,7 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
 
 	/* OK, now commit destination to socket.  */
 	sk->sk_gso_type = SKB_GSO_TCPV4;
-	sk_setup_caps(sk, &rt->u.dst);
+	sk_setup_caps(sk, &rt->dst);
 
 	if (!tp->write_seq)
 		tp->write_seq = secure_tcp_sequence_number(inet->inet_saddr,
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index eec4ff456e33..32e0bef60d0a 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -914,7 +914,7 @@ int udp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
 		    !sock_flag(sk, SOCK_BROADCAST))
 			goto out;
 		if (connected)
-			sk_dst_set(sk, dst_clone(&rt->u.dst));
+			sk_dst_set(sk, dst_clone(&rt->dst));
 	}
 
 	if (msg->msg_flags&MSG_CONFIRM)
@@ -978,7 +978,7 @@ out:
 	return err;
 
 do_confirm:
-	dst_confirm(&rt->u.dst);
+	dst_confirm(&rt->dst);
 	if (!(msg->msg_flags&MSG_PROBE) || len)
 		goto back_from_confirm;
 	err = 0;
diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c
index 1705476670ef..349327092c9e 100644
--- a/net/ipv4/xfrm4_policy.c
+++ b/net/ipv4/xfrm4_policy.c
@@ -37,7 +37,7 @@ static struct dst_entry *xfrm4_dst_lookup(struct net *net, int tos,
 		fl.fl4_src = saddr->a4;
 
 	err = __ip_route_output_key(net, &rt, &fl);
-	dst = &rt->u.dst;
+	dst = &rt->dst;
 	if (err)
 		dst = ERR_PTR(err);
 	return dst;
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index e1a698df5706..b97bb1f30808 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -557,7 +557,7 @@ void inet6_ifa_finish_destroy(struct inet6_ifaddr *ifp)
 		pr_warning("Freeing alive inet6 address %p\n", ifp);
 		return;
 	}
-	dst_release(&ifp->rt->u.dst);
+	dst_release(&ifp->rt->dst);
 
 	call_rcu(&ifp->rcu, inet6_ifa_finish_destroy_rcu);
 }
@@ -823,7 +823,7 @@ static void ipv6_del_addr(struct inet6_ifaddr *ifp)
 				rt->rt6i_flags |= RTF_EXPIRES;
 			}
 		}
-		dst_release(&rt->u.dst);
+		dst_release(&rt->dst);
 	}
 
 out:
@@ -1863,7 +1863,7 @@ void addrconf_prefix_rcv(struct net_device *dev, u8 *opt, int len)
 					      dev, expires, flags);
 		}
 		if (rt)
-			dst_release(&rt->u.dst);
+			dst_release(&rt->dst);
 	}
 
 	/* Try to figure out our local address for this prefix */
@@ -4093,11 +4093,11 @@ static void __ipv6_ifa_notify(int event, struct inet6_ifaddr *ifp)
 		if (ifp->idev->cnf.forwarding)
 			addrconf_leave_anycast(ifp);
 		addrconf_leave_solict(ifp->idev, &ifp->addr);
-		dst_hold(&ifp->rt->u.dst);
+		dst_hold(&ifp->rt->dst);
 
 		if (ifp->state == INET6_IFADDR_STATE_DEAD &&
 		    ip6_del_rt(ifp->rt))
-			dst_free(&ifp->rt->u.dst);
+			dst_free(&ifp->rt->dst);
 		break;
 	}
 }
diff --git a/net/ipv6/anycast.c b/net/ipv6/anycast.c
index f058fbd808c8..0e5e943446f0 100644
--- a/net/ipv6/anycast.c
+++ b/net/ipv6/anycast.c
@@ -84,7 +84,7 @@ int ipv6_sock_ac_join(struct sock *sk, int ifindex, struct in6_addr *addr)
 		rt = rt6_lookup(net, addr, NULL, 0, 0);
 		if (rt) {
 			dev = rt->rt6i_dev;
-			dst_release(&rt->u.dst);
+			dst_release(&rt->dst);
 		} else if (ishost) {
 			err = -EADDRNOTAVAIL;
 			goto error;
@@ -244,7 +244,7 @@ static void aca_put(struct ifacaddr6 *ac)
 {
 	if (atomic_dec_and_test(&ac->aca_refcnt)) {
 		in6_dev_put(ac->aca_idev);
-		dst_release(&ac->aca_rt->u.dst);
+		dst_release(&ac->aca_rt->dst);
 		kfree(ac);
 	}
 }
@@ -350,7 +350,7 @@ int __ipv6_dev_ac_dec(struct inet6_dev *idev, struct in6_addr *addr)
 	write_unlock_bh(&idev->lock);
 	addrconf_leave_solict(idev, &aca->aca_addr);
 
-	dst_hold(&aca->aca_rt->u.dst);
+	dst_hold(&aca->aca_rt->dst);
 	ip6_del_rt(aca->aca_rt);
 
 	aca_put(aca);
diff --git a/net/ipv6/fib6_rules.c b/net/ipv6/fib6_rules.c
index 8e44f8f9c188..b1108ede18e1 100644
--- a/net/ipv6/fib6_rules.c
+++ b/net/ipv6/fib6_rules.c
@@ -43,8 +43,8 @@ struct dst_entry *fib6_rule_lookup(struct net *net, struct flowi *fl,
 	if (arg.result)
 		return arg.result;
 
-	dst_hold(&net->ipv6.ip6_null_entry->u.dst);
-	return &net->ipv6.ip6_null_entry->u.dst;
+	dst_hold(&net->ipv6.ip6_null_entry->dst);
+	return &net->ipv6.ip6_null_entry->dst;
 }
 
 static int fib6_rule_action(struct fib_rule *rule, struct flowi *flp,
@@ -86,7 +86,7 @@ static int fib6_rule_action(struct fib_rule *rule, struct flowi *flp,
 			struct in6_addr saddr;
 
 			if (ipv6_dev_get_saddr(net,
-					       ip6_dst_idev(&rt->u.dst)->dev,
+					       ip6_dst_idev(&rt->dst)->dev,
 					       &flp->fl6_dst,
 					       rt6_flags2srcprefs(flags),
 					       &saddr))
@@ -99,12 +99,12 @@ static int fib6_rule_action(struct fib_rule *rule, struct flowi *flp,
 		goto out;
 	}
 again:
-	dst_release(&rt->u.dst);
+	dst_release(&rt->dst);
 	rt = NULL;
 	goto out;
 
 discard_pkt:
-	dst_hold(&rt->u.dst);
+	dst_hold(&rt->dst);
 out:
 	arg->result = rt;
 	return rt == NULL ? -EAGAIN : 0;
diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c
index 92a122b7795d..b6a585909d35 100644
--- a/net/ipv6/ip6_fib.c
+++ b/net/ipv6/ip6_fib.c
@@ -165,7 +165,7 @@ static __inline__ void node_free(struct fib6_node * fn)
 static __inline__ void rt6_release(struct rt6_info *rt)
 {
 	if (atomic_dec_and_test(&rt->rt6i_ref))
-		dst_free(&rt->u.dst);
+		dst_free(&rt->dst);
 }
 
 static void fib6_link_table(struct net *net, struct fib6_table *tb)
@@ -278,7 +278,7 @@ static int fib6_dump_node(struct fib6_walker_t *w)
 	int res;
 	struct rt6_info *rt;
 
-	for (rt = w->leaf; rt; rt = rt->u.dst.rt6_next) {
+	for (rt = w->leaf; rt; rt = rt->dst.rt6_next) {
 		res = rt6_dump_route(rt, w->args);
 		if (res < 0) {
 			/* Frame is full, suspend walking */
@@ -619,7 +619,7 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct rt6_info *rt,
 
 	ins = &fn->leaf;
 
-	for (iter = fn->leaf; iter; iter=iter->u.dst.rt6_next) {
+	for (iter = fn->leaf; iter; iter=iter->dst.rt6_next) {
 		/*
 		 *	Search for duplicates
 		 */
@@ -647,7 +647,7 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct rt6_info *rt,
 		if (iter->rt6i_metric > rt->rt6i_metric)
 			break;
 
-		ins = &iter->u.dst.rt6_next;
+		ins = &iter->dst.rt6_next;
 	}
 
 	/* Reset round-robin state, if necessary */
@@ -658,7 +658,7 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct rt6_info *rt,
 	 *	insert node
 	 */
 
-	rt->u.dst.rt6_next = iter;
+	rt->dst.rt6_next = iter;
 	*ins = rt;
 	rt->rt6i_node = fn;
 	atomic_inc(&rt->rt6i_ref);
@@ -799,7 +799,7 @@ out:
 			atomic_inc(&pn->leaf->rt6i_ref);
 		}
 #endif
-		dst_free(&rt->u.dst);
+		dst_free(&rt->dst);
 	}
 	return err;
 
@@ -810,7 +810,7 @@ out:
 st_failure:
 	if (fn && !(fn->fn_flags & (RTN_RTINFO|RTN_ROOT)))
 		fib6_repair_tree(info->nl_net, fn);
-	dst_free(&rt->u.dst);
+	dst_free(&rt->dst);
 	return err;
 #endif
 }
@@ -1108,7 +1108,7 @@ static void fib6_del_route(struct fib6_node *fn, struct rt6_info **rtp,
 	RT6_TRACE("fib6_del_route\n");
 
 	/* Unlink it */
-	*rtp = rt->u.dst.rt6_next;
+	*rtp = rt->dst.rt6_next;
 	rt->rt6i_node = NULL;
 	net->ipv6.rt6_stats->fib_rt_entries--;
 	net->ipv6.rt6_stats->fib_discarded_routes++;
@@ -1122,14 +1122,14 @@ static void fib6_del_route(struct fib6_node *fn, struct rt6_info **rtp,
 	FOR_WALKERS(w) {
 		if (w->state == FWS_C && w->leaf == rt) {
 			RT6_TRACE("walker %p adjusted by delroute\n", w);
-			w->leaf = rt->u.dst.rt6_next;
+			w->leaf = rt->dst.rt6_next;
 			if (w->leaf == NULL)
 				w->state = FWS_U;
 		}
 	}
 	read_unlock(&fib6_walker_lock);
 
-	rt->u.dst.rt6_next = NULL;
+	rt->dst.rt6_next = NULL;
 
 	/* If it was last route, expunge its radix tree node */
 	if (fn->leaf == NULL) {
@@ -1168,7 +1168,7 @@ int fib6_del(struct rt6_info *rt, struct nl_info *info)
 	struct rt6_info **rtp;
 
 #if RT6_DEBUG >= 2
-	if (rt->u.dst.obsolete>0) {
+	if (rt->dst.obsolete>0) {
 		WARN_ON(fn != NULL);
 		return -ENOENT;
 	}
@@ -1195,7 +1195,7 @@ int fib6_del(struct rt6_info *rt, struct nl_info *info)
 	 *	Walk the leaf entries looking for ourself
 	 */
 
-	for (rtp = &fn->leaf; *rtp; rtp = &(*rtp)->u.dst.rt6_next) {
+	for (rtp = &fn->leaf; *rtp; rtp = &(*rtp)->dst.rt6_next) {
 		if (*rtp == rt) {
 			fib6_del_route(fn, rtp, info);
 			return 0;
@@ -1334,7 +1334,7 @@ static int fib6_clean_node(struct fib6_walker_t *w)
 		.nl_net = c->net,
 	};
 
-	for (rt = w->leaf; rt; rt = rt->u.dst.rt6_next) {
+	for (rt = w->leaf; rt; rt = rt->dst.rt6_next) {
 		res = c->func(rt, c->arg);
 		if (res < 0) {
 			w->leaf = rt;
@@ -1448,8 +1448,8 @@ static int fib6_age(struct rt6_info *rt, void *arg)
 		}
 		gc_args.more++;
 	} else if (rt->rt6i_flags & RTF_CACHE) {
-		if (atomic_read(&rt->u.dst.__refcnt) == 0 &&
-		    time_after_eq(now, rt->u.dst.lastuse + gc_args.timeout)) {
+		if (atomic_read(&rt->dst.__refcnt) == 0 &&
+		    time_after_eq(now, rt->dst.lastuse + gc_args.timeout)) {
 			RT6_TRACE("aging clone %p\n", rt);
 			return -1;
 		} else if ((rt->rt6i_flags & RTF_GATEWAY) &&
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index 89425af0684c..d40b330c0ee6 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -698,7 +698,7 @@ static int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *))
 		ipv6_hdr(skb)->payload_len = htons(first_len -
 						   sizeof(struct ipv6hdr));
 
-		dst_hold(&rt->u.dst);
+		dst_hold(&rt->dst);
 
 		for (;;) {
 			/* Prepare header of the next frame,
@@ -726,7 +726,7 @@ static int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *))
 
 			err = output(skb);
 			if(!err)
-				IP6_INC_STATS(net, ip6_dst_idev(&rt->u.dst),
+				IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
 					      IPSTATS_MIB_FRAGCREATES);
 
 			if (err || !frag)
@@ -740,9 +740,9 @@ static int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *))
 		kfree(tmp_hdr);
 
 		if (err == 0) {
-			IP6_INC_STATS(net, ip6_dst_idev(&rt->u.dst),
+			IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
 				      IPSTATS_MIB_FRAGOKS);
-			dst_release(&rt->u.dst);
+			dst_release(&rt->dst);
 			return 0;
 		}
 
@@ -752,9 +752,9 @@ static int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *))
 			frag = skb;
 		}
 
-		IP6_INC_STATS(net, ip6_dst_idev(&rt->u.dst),
+		IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
 			      IPSTATS_MIB_FRAGFAILS);
-		dst_release(&rt->u.dst);
+		dst_release(&rt->dst);
 		return err;
 	}
 
@@ -785,7 +785,7 @@ slow_path:
 		 *	Allocate buffer.
 		 */
 
-		if ((frag = alloc_skb(len+hlen+sizeof(struct frag_hdr)+LL_ALLOCATED_SPACE(rt->u.dst.dev), GFP_ATOMIC)) == NULL) {
+		if ((frag = alloc_skb(len+hlen+sizeof(struct frag_hdr)+LL_ALLOCATED_SPACE(rt->dst.dev), GFP_ATOMIC)) == NULL) {
 			NETDEBUG(KERN_INFO "IPv6: frag: no memory for new fragment!\n");
 			IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
 				      IPSTATS_MIB_FRAGFAILS);
@@ -798,7 +798,7 @@ slow_path:
 		 */
 
 		ip6_copy_metadata(frag, skb);
-		skb_reserve(frag, LL_RESERVED_SPACE(rt->u.dst.dev));
+		skb_reserve(frag, LL_RESERVED_SPACE(rt->dst.dev));
 		skb_put(frag, len + hlen + sizeof(struct frag_hdr));
 		skb_reset_network_header(frag);
 		fh = (struct frag_hdr *)(skb_network_header(frag) + hlen);
@@ -1156,24 +1156,24 @@ int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to,
 
 			/* need source address above miyazawa*/
 		}
-		dst_hold(&rt->u.dst);
-		inet->cork.dst = &rt->u.dst;
+		dst_hold(&rt->dst);
+		inet->cork.dst = &rt->dst;
 		inet->cork.fl = *fl;
 		np->cork.hop_limit = hlimit;
 		np->cork.tclass = tclass;
 		mtu = np->pmtudisc == IPV6_PMTUDISC_PROBE ?
-		      rt->u.dst.dev->mtu : dst_mtu(rt->u.dst.path);
+		      rt->dst.dev->mtu : dst_mtu(rt->dst.path);
 		if (np->frag_size < mtu) {
 			if (np->frag_size)
 				mtu = np->frag_size;
 		}
 		inet->cork.fragsize = mtu;
-		if (dst_allfrag(rt->u.dst.path))
+		if (dst_allfrag(rt->dst.path))
 			inet->cork.flags |= IPCORK_ALLFRAG;
 		inet->cork.length = 0;
 		sk->sk_sndmsg_page = NULL;
 		sk->sk_sndmsg_off = 0;
-		exthdrlen = rt->u.dst.header_len + (opt ? opt->opt_flen : 0) -
+		exthdrlen = rt->dst.header_len + (opt ? opt->opt_flen : 0) -
 			    rt->rt6i_nfheader_len;
 		length += exthdrlen;
 		transhdrlen += exthdrlen;
@@ -1186,7 +1186,7 @@ int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to,
 		mtu = inet->cork.fragsize;
 	}
 
-	hh_len = LL_RESERVED_SPACE(rt->u.dst.dev);
+	hh_len = LL_RESERVED_SPACE(rt->dst.dev);
 
 	fragheaderlen = sizeof(struct ipv6hdr) + rt->rt6i_nfheader_len +
 			(opt ? opt->opt_nflen : 0);
@@ -1224,7 +1224,7 @@ int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to,
 		}
 
 		if (proto == IPPROTO_UDP &&
-		    (rt->u.dst.dev->features & NETIF_F_UFO)) {
+		    (rt->dst.dev->features & NETIF_F_UFO)) {
 
 			err = ip6_ufo_append_data(sk, getfrag, from, length,
 						  hh_len, fragheaderlen,
@@ -1270,7 +1270,7 @@ alloc_new_skb:
 
 			fraglen = datalen + fragheaderlen;
 			if ((flags & MSG_MORE) &&
-			    !(rt->u.dst.dev->features&NETIF_F_SG))
+			    !(rt->dst.dev->features&NETIF_F_SG))
 				alloclen = mtu;
 			else
 				alloclen = datalen + fragheaderlen;
@@ -1281,7 +1281,7 @@ alloc_new_skb:
 			 * because we have no idea if we're the last one.
 			 */
 			if (datalen == length + fraggap)
-				alloclen += rt->u.dst.trailer_len;
+				alloclen += rt->dst.trailer_len;
 
 			/*
 			 * We just reserve space for fragment header.
@@ -1358,7 +1358,7 @@ alloc_new_skb:
 		if (copy > length)
 			copy = length;
 
-		if (!(rt->u.dst.dev->features&NETIF_F_SG)) {
+		if (!(rt->dst.dev->features&NETIF_F_SG)) {
 			unsigned int off;
 
 			off = skb->len;
@@ -1503,7 +1503,7 @@ int ip6_push_pending_frames(struct sock *sk)
 	skb->priority = sk->sk_priority;
 	skb->mark = sk->sk_mark;
 
-	skb_dst_set(skb, dst_clone(&rt->u.dst));
+	skb_dst_set(skb, dst_clone(&rt->dst));
 	IP6_UPD_PO_STATS(net, rt->rt6i_idev, IPSTATS_MIB_OUT, skb->len);
 	if (proto == IPPROTO_ICMPV6) {
 		struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c
index 8f39893d8081..0fd027f3f47e 100644
--- a/net/ipv6/ip6_tunnel.c
+++ b/net/ipv6/ip6_tunnel.c
@@ -552,7 +552,7 @@ ip4ip6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
 	if (ip_route_output_key(dev_net(skb->dev), &rt, &fl))
 		goto out;
 
-	skb2->dev = rt->u.dst.dev;
+	skb2->dev = rt->dst.dev;
 
 	/* route "incoming" packet */
 	if (rt->rt_flags & RTCF_LOCAL) {
@@ -562,7 +562,7 @@ ip4ip6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
 		fl.fl4_src = eiph->saddr;
 		fl.fl4_tos = eiph->tos;
 		if (ip_route_output_key(dev_net(skb->dev), &rt, &fl) ||
-		    rt->u.dst.dev->type != ARPHRD_TUNNEL) {
+		    rt->dst.dev->type != ARPHRD_TUNNEL) {
 			ip_rt_put(rt);
 			goto out;
 		}
@@ -626,7 +626,7 @@ ip6ip6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
 		icmpv6_send(skb2, rel_type, rel_code, rel_info);
 
 		if (rt)
-			dst_release(&rt->u.dst);
+			dst_release(&rt->dst);
 
 		kfree_skb(skb2);
 	}
@@ -1135,7 +1135,7 @@ static void ip6_tnl_link_config(struct ip6_tnl *t)
 			if (dev->mtu < IPV6_MIN_MTU)
 				dev->mtu = IPV6_MIN_MTU;
 		}
-		dst_release(&rt->u.dst);
+		dst_release(&rt->dst);
 	}
 }
 
diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c
index 3e36d1538b6e..d1444b95ad7e 100644
--- a/net/ipv6/mcast.c
+++ b/net/ipv6/mcast.c
@@ -158,7 +158,7 @@ int ipv6_sock_mc_join(struct sock *sk, int ifindex, const struct in6_addr *addr)
 		rt = rt6_lookup(net, addr, NULL, 0, 0);
 		if (rt) {
 			dev = rt->rt6i_dev;
-			dst_release(&rt->u.dst);
+			dst_release(&rt->dst);
 		}
 	} else
 		dev = dev_get_by_index_rcu(net, ifindex);
@@ -248,7 +248,7 @@ static struct inet6_dev *ip6_mc_find_dev_rcu(struct net *net,
 		if (rt) {
 			dev = rt->rt6i_dev;
 			dev_hold(dev);
-			dst_release(&rt->u.dst);
+			dst_release(&rt->dst);
 		}
 	} else
 		dev = dev_get_by_index_rcu(net, ifindex);
diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c
index 0abdc242ddb7..1fc46fc60efd 100644
--- a/net/ipv6/ndisc.c
+++ b/net/ipv6/ndisc.c
@@ -1229,7 +1229,7 @@ static void ndisc_router_discovery(struct sk_buff *skb)
 			ND_PRINTK0(KERN_ERR
 				   "ICMPv6 RA: %s() got default router without neighbour.\n",
 				   __func__);
-			dst_release(&rt->u.dst);
+			dst_release(&rt->dst);
 			in6_dev_put(in6_dev);
 			return;
 		}
@@ -1244,7 +1244,7 @@ static void ndisc_router_discovery(struct sk_buff *skb)
 	if (ra_msg->icmph.icmp6_hop_limit) {
 		in6_dev->cnf.hop_limit = ra_msg->icmph.icmp6_hop_limit;
 		if (rt)
-			rt->u.dst.metrics[RTAX_HOPLIMIT-1] = ra_msg->icmph.icmp6_hop_limit;
+			rt->dst.metrics[RTAX_HOPLIMIT-1] = ra_msg->icmph.icmp6_hop_limit;
 	}
 
 skip_defrtr:
@@ -1363,7 +1363,7 @@ skip_linkparms:
 			in6_dev->cnf.mtu6 = mtu;
 
 			if (rt)
-				rt->u.dst.metrics[RTAX_MTU-1] = mtu;
+				rt->dst.metrics[RTAX_MTU-1] = mtu;
 
 			rt6_mtu_change(skb->dev, mtu);
 		}
@@ -1384,7 +1384,7 @@ skip_linkparms:
 	}
 out:
 	if (rt)
-		dst_release(&rt->u.dst);
+		dst_release(&rt->dst);
 	else if (neigh)
 		neigh_release(neigh);
 	in6_dev_put(in6_dev);
diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c
index 968b96490729..e677937a07fc 100644
--- a/net/ipv6/raw.c
+++ b/net/ipv6/raw.c
@@ -611,23 +611,23 @@ static int rawv6_send_hdrinc(struct sock *sk, void *from, int length,
 	int err;
 	struct rt6_info *rt = (struct rt6_info *)*dstp;
 
-	if (length > rt->u.dst.dev->mtu) {
-		ipv6_local_error(sk, EMSGSIZE, fl, rt->u.dst.dev->mtu);
+	if (length > rt->dst.dev->mtu) {
+		ipv6_local_error(sk, EMSGSIZE, fl, rt->dst.dev->mtu);
 		return -EMSGSIZE;
 	}
 	if (flags&MSG_PROBE)
 		goto out;
 
 	skb = sock_alloc_send_skb(sk,
-				  length + LL_ALLOCATED_SPACE(rt->u.dst.dev) + 15,
+				  length + LL_ALLOCATED_SPACE(rt->dst.dev) + 15,
 				  flags & MSG_DONTWAIT, &err);
 	if (skb == NULL)
 		goto error;
-	skb_reserve(skb, LL_RESERVED_SPACE(rt->u.dst.dev));
+	skb_reserve(skb, LL_RESERVED_SPACE(rt->dst.dev));
 
 	skb->priority = sk->sk_priority;
 	skb->mark = sk->sk_mark;
-	skb_dst_set(skb, &rt->u.dst);
+	skb_dst_set(skb, &rt->dst);
 	*dstp = NULL;
 
 	skb_put(skb, length);
@@ -643,7 +643,7 @@ static int rawv6_send_hdrinc(struct sock *sk, void *from, int length,
 
 	IP6_UPD_PO_STATS(sock_net(sk), rt->rt6i_idev, IPSTATS_MIB_OUT, skb->len);
 	err = NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT, skb, NULL,
-		      rt->u.dst.dev, dst_output);
+		      rt->dst.dev, dst_output);
 	if (err > 0)
 		err = net_xmit_errno(err);
 	if (err)
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 252d76199c41..f7702850d45c 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -126,16 +126,14 @@ static struct dst_ops ip6_dst_blackhole_ops = {
 };
 
 static struct rt6_info ip6_null_entry_template = {
-	.u = {
-		.dst = {
-			.__refcnt	= ATOMIC_INIT(1),
-			.__use		= 1,
-			.obsolete	= -1,
-			.error		= -ENETUNREACH,
-			.metrics	= { [RTAX_HOPLIMIT - 1] = 255, },
-			.input		= ip6_pkt_discard,
-			.output		= ip6_pkt_discard_out,
-		}
+	.dst = {
+		.__refcnt	= ATOMIC_INIT(1),
+		.__use		= 1,
+		.obsolete	= -1,
+		.error		= -ENETUNREACH,
+		.metrics	= { [RTAX_HOPLIMIT - 1] = 255, },
+		.input		= ip6_pkt_discard,
+		.output		= ip6_pkt_discard_out,
 	},
 	.rt6i_flags	= (RTF_REJECT | RTF_NONEXTHOP),
 	.rt6i_protocol  = RTPROT_KERNEL,
@@ -149,16 +147,14 @@ static int ip6_pkt_prohibit(struct sk_buff *skb);
 static int ip6_pkt_prohibit_out(struct sk_buff *skb);
 
 static struct rt6_info ip6_prohibit_entry_template = {
-	.u = {
-		.dst = {
-			.__refcnt	= ATOMIC_INIT(1),
-			.__use		= 1,
-			.obsolete	= -1,
-			.error		= -EACCES,
-			.metrics	= { [RTAX_HOPLIMIT - 1] = 255, },
-			.input		= ip6_pkt_prohibit,
-			.output		= ip6_pkt_prohibit_out,
-		}
+	.dst = {
+		.__refcnt	= ATOMIC_INIT(1),
+		.__use		= 1,
+		.obsolete	= -1,
+		.error		= -EACCES,
+		.metrics	= { [RTAX_HOPLIMIT - 1] = 255, },
+		.input		= ip6_pkt_prohibit,
+		.output		= ip6_pkt_prohibit_out,
 	},
 	.rt6i_flags	= (RTF_REJECT | RTF_NONEXTHOP),
 	.rt6i_protocol  = RTPROT_KERNEL,
@@ -167,16 +163,14 @@ static struct rt6_info ip6_prohibit_entry_template = {
 };
 
 static struct rt6_info ip6_blk_hole_entry_template = {
-	.u = {
-		.dst = {
-			.__refcnt	= ATOMIC_INIT(1),
-			.__use		= 1,
-			.obsolete	= -1,
-			.error		= -EINVAL,
-			.metrics	= { [RTAX_HOPLIMIT - 1] = 255, },
-			.input		= dst_discard,
-			.output		= dst_discard,
-		}
+	.dst = {
+		.__refcnt	= ATOMIC_INIT(1),
+		.__use		= 1,
+		.obsolete	= -1,
+		.error		= -EINVAL,
+		.metrics	= { [RTAX_HOPLIMIT - 1] = 255, },
+		.input		= dst_discard,
+		.output		= dst_discard,
 	},
 	.rt6i_flags	= (RTF_REJECT | RTF_NONEXTHOP),
 	.rt6i_protocol  = RTPROT_KERNEL,
@@ -249,7 +243,7 @@ static inline struct rt6_info *rt6_device_match(struct net *net,
 	if (!oif && ipv6_addr_any(saddr))
 		goto out;
 
-	for (sprt = rt; sprt; sprt = sprt->u.dst.rt6_next) {
+	for (sprt = rt; sprt; sprt = sprt->dst.rt6_next) {
 		struct net_device *dev = sprt->rt6i_dev;
 
 		if (oif) {
@@ -407,10 +401,10 @@ static struct rt6_info *find_rr_leaf(struct fib6_node *fn,
 
 	match = NULL;
 	for (rt = rr_head; rt && rt->rt6i_metric == metric;
-	     rt = rt->u.dst.rt6_next)
+	     rt = rt->dst.rt6_next)
 		match = find_match(rt, oif, strict, &mpri, match);
 	for (rt = fn->leaf; rt && rt != rr_head && rt->rt6i_metric == metric;
-	     rt = rt->u.dst.rt6_next)
+	     rt = rt->dst.rt6_next)
 		match = find_match(rt, oif, strict, &mpri, match);
 
 	return match;
@@ -432,7 +426,7 @@ static struct rt6_info *rt6_select(struct fib6_node *fn, int oif, int strict)
 
 	if (!match &&
 	    (strict & RT6_LOOKUP_F_REACHABLE)) {
-		struct rt6_info *next = rt0->u.dst.rt6_next;
+		struct rt6_info *next = rt0->dst.rt6_next;
 
 		/* no entries matched; do round-robin */
 		if (!next || next->rt6i_metric != rt0->rt6i_metric)
@@ -517,7 +511,7 @@ int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
 			rt->rt6i_expires = jiffies + HZ * lifetime;
 			rt->rt6i_flags |= RTF_EXPIRES;
 		}
-		dst_release(&rt->u.dst);
+		dst_release(&rt->dst);
 	}
 	return 0;
 }
@@ -555,7 +549,7 @@ restart:
 	rt = rt6_device_match(net, rt, &fl->fl6_src, fl->oif, flags);
 	BACKTRACK(net, &fl->fl6_src);
 out:
-	dst_use(&rt->u.dst, jiffies);
+	dst_use(&rt->dst, jiffies);
 	read_unlock_bh(&table->tb6_lock);
 	return rt;
 
@@ -643,7 +637,7 @@ static struct rt6_info *rt6_alloc_cow(struct rt6_info *ort, struct in6_addr *dad
 		ipv6_addr_copy(&rt->rt6i_dst.addr, daddr);
 		rt->rt6i_dst.plen = 128;
 		rt->rt6i_flags |= RTF_CACHE;
-		rt->u.dst.flags |= DST_HOST;
+		rt->dst.flags |= DST_HOST;
 
 #ifdef CONFIG_IPV6_SUBTREES
 		if (rt->rt6i_src.plen && saddr) {
@@ -677,7 +671,7 @@ static struct rt6_info *rt6_alloc_cow(struct rt6_info *ort, struct in6_addr *dad
 			if (net_ratelimit())
 				printk(KERN_WARNING
 				       "Neighbour table overflow.\n");
-			dst_free(&rt->u.dst);
+			dst_free(&rt->dst);
 			return NULL;
 		}
 		rt->rt6i_nexthop = neigh;
@@ -694,7 +688,7 @@ static struct rt6_info *rt6_alloc_clone(struct rt6_info *ort, struct in6_addr *d
 		ipv6_addr_copy(&rt->rt6i_dst.addr, daddr);
 		rt->rt6i_dst.plen = 128;
 		rt->rt6i_flags |= RTF_CACHE;
-		rt->u.dst.flags |= DST_HOST;
+		rt->dst.flags |= DST_HOST;
 		rt->rt6i_nexthop = neigh_clone(ort->rt6i_nexthop);
 	}
 	return rt;
@@ -726,7 +720,7 @@ restart:
 	    rt->rt6i_flags & RTF_CACHE)
 		goto out;
 
-	dst_hold(&rt->u.dst);
+	dst_hold(&rt->dst);
 	read_unlock_bh(&table->tb6_lock);
 
 	if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP))
@@ -739,10 +733,10 @@ restart:
 #endif
 	}
 
-	dst_release(&rt->u.dst);
+	dst_release(&rt->dst);
 	rt = nrt ? : net->ipv6.ip6_null_entry;
 
-	dst_hold(&rt->u.dst);
+	dst_hold(&rt->dst);
 	if (nrt) {
 		err = ip6_ins_rt(nrt);
 		if (!err)
@@ -756,7 +750,7 @@ restart:
 	 * Race condition! In the gap, when table->tb6_lock was
 	 * released someone could insert this route.  Relookup.
 	 */
-	dst_release(&rt->u.dst);
+	dst_release(&rt->dst);
 	goto relookup;
 
 out:
@@ -764,11 +758,11 @@ out:
 		reachable = 0;
 		goto restart_2;
 	}
-	dst_hold(&rt->u.dst);
+	dst_hold(&rt->dst);
 	read_unlock_bh(&table->tb6_lock);
 out2:
-	rt->u.dst.lastuse = jiffies;
-	rt->u.dst.__use++;
+	rt->dst.lastuse = jiffies;
+	rt->dst.__use++;
 
 	return rt;
 }
@@ -835,15 +829,15 @@ int ip6_dst_blackhole(struct sock *sk, struct dst_entry **dstp, struct flowi *fl
 	struct dst_entry *new = NULL;
 
 	if (rt) {
-		new = &rt->u.dst;
+		new = &rt->dst;
 
 		atomic_set(&new->__refcnt, 1);
 		new->__use = 1;
 		new->input = dst_discard;
 		new->output = dst_discard;
 
-		memcpy(new->metrics, ort->u.dst.metrics, RTAX_MAX*sizeof(u32));
-		new->dev = ort->u.dst.dev;
+		memcpy(new->metrics, ort->dst.metrics, RTAX_MAX*sizeof(u32));
+		new->dev = ort->dst.dev;
 		if (new->dev)
 			dev_hold(new->dev);
 		rt->rt6i_idev = ort->rt6i_idev;
@@ -912,7 +906,7 @@ static void ip6_link_failure(struct sk_buff *skb)
 	rt = (struct rt6_info *) skb_dst(skb);
 	if (rt) {
 		if (rt->rt6i_flags&RTF_CACHE) {
-			dst_set_expires(&rt->u.dst, 0);
+			dst_set_expires(&rt->dst, 0);
 			rt->rt6i_flags |= RTF_EXPIRES;
 		} else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT))
 			rt->rt6i_node->fn_sernum = -1;
@@ -986,14 +980,14 @@ struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
 	rt->rt6i_dev	  = dev;
 	rt->rt6i_idev     = idev;
 	rt->rt6i_nexthop  = neigh;
-	atomic_set(&rt->u.dst.__refcnt, 1);
-	rt->u.dst.metrics[RTAX_HOPLIMIT-1] = 255;
-	rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(rt->rt6i_dev);
-	rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(net, dst_mtu(&rt->u.dst));
-	rt->u.dst.output  = ip6_output;
+	atomic_set(&rt->dst.__refcnt, 1);
+	rt->dst.metrics[RTAX_HOPLIMIT-1] = 255;
+	rt->dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(rt->rt6i_dev);
+	rt->dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(net, dst_mtu(&rt->dst));
+	rt->dst.output  = ip6_output;
 
 #if 0	/* there's no chance to use these for ndisc */
-	rt->u.dst.flags   = ipv6_addr_type(addr) & IPV6_ADDR_UNICAST
+	rt->dst.flags   = ipv6_addr_type(addr) & IPV6_ADDR_UNICAST
 				? DST_HOST
 				: 0;
 	ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
@@ -1001,14 +995,14 @@ struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
 #endif
 
 	spin_lock_bh(&icmp6_dst_lock);
-	rt->u.dst.next = icmp6_dst_gc_list;
-	icmp6_dst_gc_list = &rt->u.dst;
+	rt->dst.next = icmp6_dst_gc_list;
+	icmp6_dst_gc_list = &rt->dst;
 	spin_unlock_bh(&icmp6_dst_lock);
 
 	fib6_force_start_gc(net);
 
 out:
-	return &rt->u.dst;
+	return &rt->dst;
 }
 
 int icmp6_dst_gc(void)
@@ -1159,7 +1153,7 @@ int ip6_route_add(struct fib6_config *cfg)
 		goto out;
 	}
 
-	rt->u.dst.obsolete = -1;
+	rt->dst.obsolete = -1;
 	rt->rt6i_expires = (cfg->fc_flags & RTF_EXPIRES) ?
 				jiffies + clock_t_to_jiffies(cfg->fc_expires) :
 				0;
@@ -1171,16 +1165,16 @@ int ip6_route_add(struct fib6_config *cfg)
 	addr_type = ipv6_addr_type(&cfg->fc_dst);
 
 	if (addr_type & IPV6_ADDR_MULTICAST)
-		rt->u.dst.input = ip6_mc_input;
+		rt->dst.input = ip6_mc_input;
 	else
-		rt->u.dst.input = ip6_forward;
+		rt->dst.input = ip6_forward;
 
-	rt->u.dst.output = ip6_output;
+	rt->dst.output = ip6_output;
 
 	ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len);
 	rt->rt6i_dst.plen = cfg->fc_dst_len;
 	if (rt->rt6i_dst.plen == 128)
-	       rt->u.dst.flags = DST_HOST;
+	       rt->dst.flags = DST_HOST;
 
 #ifdef CONFIG_IPV6_SUBTREES
 	ipv6_addr_prefix(&rt->rt6i_src.addr, &cfg->fc_src, cfg->fc_src_len);
@@ -1208,9 +1202,9 @@ int ip6_route_add(struct fib6_config *cfg)
 				goto out;
 			}
 		}
-		rt->u.dst.output = ip6_pkt_discard_out;
-		rt->u.dst.input = ip6_pkt_discard;
-		rt->u.dst.error = -ENETUNREACH;
+		rt->dst.output = ip6_pkt_discard_out;
+		rt->dst.input = ip6_pkt_discard;
+		rt->dst.error = -ENETUNREACH;
 		rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
 		goto install_route;
 	}
@@ -1244,7 +1238,7 @@ int ip6_route_add(struct fib6_config *cfg)
 				goto out;
 			if (dev) {
 				if (dev != grt->rt6i_dev) {
-					dst_release(&grt->u.dst);
+					dst_release(&grt->dst);
 					goto out;
 				}
 			} else {
@@ -1255,7 +1249,7 @@ int ip6_route_add(struct fib6_config *cfg)
 			}
 			if (!(grt->rt6i_flags&RTF_GATEWAY))
 				err = 0;
-			dst_release(&grt->u.dst);
+			dst_release(&grt->dst);
 
 			if (err)
 				goto out;
@@ -1294,18 +1288,18 @@ install_route:
 					goto out;
 				}
 
-				rt->u.dst.metrics[type - 1] = nla_get_u32(nla);
+				rt->dst.metrics[type - 1] = nla_get_u32(nla);
 			}
 		}
 	}
 
-	if (dst_metric(&rt->u.dst, RTAX_HOPLIMIT) == 0)
-		rt->u.dst.metrics[RTAX_HOPLIMIT-1] = -1;
-	if (!dst_mtu(&rt->u.dst))
-		rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(dev);
-	if (!dst_metric(&rt->u.dst, RTAX_ADVMSS))
-		rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(net, dst_mtu(&rt->u.dst));
-	rt->u.dst.dev = dev;
+	if (dst_metric(&rt->dst, RTAX_HOPLIMIT) == 0)
+		rt->dst.metrics[RTAX_HOPLIMIT-1] = -1;
+	if (!dst_mtu(&rt->dst))
+		rt->dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(dev);
+	if (!dst_metric(&rt->dst, RTAX_ADVMSS))
+		rt->dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(net, dst_mtu(&rt->dst));
+	rt->dst.dev = dev;
 	rt->rt6i_idev = idev;
 	rt->rt6i_table = table;
 
@@ -1319,7 +1313,7 @@ out:
 	if (idev)
 		in6_dev_put(idev);
 	if (rt)
-		dst_free(&rt->u.dst);
+		dst_free(&rt->dst);
 	return err;
 }
 
@@ -1336,7 +1330,7 @@ static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info)
 	write_lock_bh(&table->tb6_lock);
 
 	err = fib6_del(rt, info);
-	dst_release(&rt->u.dst);
+	dst_release(&rt->dst);
 
 	write_unlock_bh(&table->tb6_lock);
 
@@ -1369,7 +1363,7 @@ static int ip6_route_del(struct fib6_config *cfg)
 			 &cfg->fc_src, cfg->fc_src_len);
 
 	if (fn) {
-		for (rt = fn->leaf; rt; rt = rt->u.dst.rt6_next) {
+		for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
 			if (cfg->fc_ifindex &&
 			    (rt->rt6i_dev == NULL ||
 			     rt->rt6i_dev->ifindex != cfg->fc_ifindex))
@@ -1379,7 +1373,7 @@ static int ip6_route_del(struct fib6_config *cfg)
 				continue;
 			if (cfg->fc_metric && cfg->fc_metric != rt->rt6i_metric)
 				continue;
-			dst_hold(&rt->u.dst);
+			dst_hold(&rt->dst);
 			read_unlock_bh(&table->tb6_lock);
 
 			return __ip6_del_rt(rt, &cfg->fc_nlinfo);
@@ -1421,7 +1415,7 @@ static struct rt6_info *__ip6_route_redirect(struct net *net,
 	read_lock_bh(&table->tb6_lock);
 	fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
 restart:
-	for (rt = fn->leaf; rt; rt = rt->u.dst.rt6_next) {
+	for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
 		/*
 		 * Current route is on-link; redirect is always invalid.
 		 *
@@ -1445,7 +1439,7 @@ restart:
 		rt = net->ipv6.ip6_null_entry;
 	BACKTRACK(net, &fl->fl6_src);
 out:
-	dst_hold(&rt->u.dst);
+	dst_hold(&rt->dst);
 
 	read_unlock_bh(&table->tb6_lock);
 
@@ -1513,10 +1507,10 @@ void rt6_redirect(struct in6_addr *dest, struct in6_addr *src,
 	 * Look, redirects are sent only in response to data packets,
 	 * so that this nexthop apparently is reachable. --ANK
 	 */
-	dst_confirm(&rt->u.dst);
+	dst_confirm(&rt->dst);
 
 	/* Duplicate redirect: silently ignore. */
-	if (neigh == rt->u.dst.neighbour)
+	if (neigh == rt->dst.neighbour)
 		goto out;
 
 	nrt = ip6_rt_copy(rt);
@@ -1529,20 +1523,20 @@ void rt6_redirect(struct in6_addr *dest, struct in6_addr *src,
 
 	ipv6_addr_copy(&nrt->rt6i_dst.addr, dest);
 	nrt->rt6i_dst.plen = 128;
-	nrt->u.dst.flags |= DST_HOST;
+	nrt->dst.flags |= DST_HOST;
 
 	ipv6_addr_copy(&nrt->rt6i_gateway, (struct in6_addr*)neigh->primary_key);
 	nrt->rt6i_nexthop = neigh_clone(neigh);
 	/* Reset pmtu, it may be better */
-	nrt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(neigh->dev);
-	nrt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dev_net(neigh->dev),
-							dst_mtu(&nrt->u.dst));
+	nrt->dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(neigh->dev);
+	nrt->dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dev_net(neigh->dev),
+							dst_mtu(&nrt->dst));
 
 	if (ip6_ins_rt(nrt))
 		goto out;
 
-	netevent.old = &rt->u.dst;
-	netevent.new = &nrt->u.dst;
+	netevent.old = &rt->dst;
+	netevent.new = &nrt->dst;
 	call_netevent_notifiers(NETEVENT_REDIRECT, &netevent);
 
 	if (rt->rt6i_flags&RTF_CACHE) {
@@ -1551,7 +1545,7 @@ void rt6_redirect(struct in6_addr *dest, struct in6_addr *src,
 	}
 
 out:
-	dst_release(&rt->u.dst);
+	dst_release(&rt->dst);
 }
 
 /*
@@ -1570,7 +1564,7 @@ void rt6_pmtu_discovery(struct in6_addr *daddr, struct in6_addr *saddr,
 	if (rt == NULL)
 		return;
 
-	if (pmtu >= dst_mtu(&rt->u.dst))
+	if (pmtu >= dst_mtu(&rt->dst))
 		goto out;
 
 	if (pmtu < IPV6_MIN_MTU) {
@@ -1588,7 +1582,7 @@ void rt6_pmtu_discovery(struct in6_addr *daddr, struct in6_addr *saddr,
 	   They are sent only in response to data packets,
 	   so that this nexthop apparently is reachable. --ANK
 	 */
-	dst_confirm(&rt->u.dst);
+	dst_confirm(&rt->dst);
 
 	/* Host route. If it is static, it would be better
 	   not to override it, but add new one, so that
@@ -1596,10 +1590,10 @@ void rt6_pmtu_discovery(struct in6_addr *daddr, struct in6_addr *saddr,
 	   would return automatically.
 	 */
 	if (rt->rt6i_flags & RTF_CACHE) {
-		rt->u.dst.metrics[RTAX_MTU-1] = pmtu;
+		rt->dst.metrics[RTAX_MTU-1] = pmtu;
 		if (allfrag)
-			rt->u.dst.metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
-		dst_set_expires(&rt->u.dst, net->ipv6.sysctl.ip6_rt_mtu_expires);
+			rt->dst.metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
+		dst_set_expires(&rt->dst, net->ipv6.sysctl.ip6_rt_mtu_expires);
 		rt->rt6i_flags |= RTF_MODIFIED|RTF_EXPIRES;
 		goto out;
 	}
@@ -1615,9 +1609,9 @@ void rt6_pmtu_discovery(struct in6_addr *daddr, struct in6_addr *saddr,
 		nrt = rt6_alloc_clone(rt, daddr);
 
 	if (nrt) {
-		nrt->u.dst.metrics[RTAX_MTU-1] = pmtu;
+		nrt->dst.metrics[RTAX_MTU-1] = pmtu;
 		if (allfrag)
-			nrt->u.dst.metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
+			nrt->dst.metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
 
 		/* According to RFC 1981, detecting PMTU increase shouldn't be
 		 * happened within 5 mins, the recommended timer is 10 mins.
@@ -1625,13 +1619,13 @@ void rt6_pmtu_discovery(struct in6_addr *daddr, struct in6_addr *saddr,
 		 * which is 10 mins. After 10 mins the decreased pmtu is expired
 		 * and detecting PMTU increase will be automatically happened.
 		 */
-		dst_set_expires(&nrt->u.dst, net->ipv6.sysctl.ip6_rt_mtu_expires);
+		dst_set_expires(&nrt->dst, net->ipv6.sysctl.ip6_rt_mtu_expires);
 		nrt->rt6i_flags |= RTF_DYNAMIC|RTF_EXPIRES;
 
 		ip6_ins_rt(nrt);
 	}
 out:
-	dst_release(&rt->u.dst);
+	dst_release(&rt->dst);
 }
 
 /*
@@ -1644,18 +1638,18 @@ static struct rt6_info * ip6_rt_copy(struct rt6_info *ort)
 	struct rt6_info *rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops);
 
 	if (rt) {
-		rt->u.dst.input = ort->u.dst.input;
-		rt->u.dst.output = ort->u.dst.output;
-
-		memcpy(rt->u.dst.metrics, ort->u.dst.metrics, RTAX_MAX*sizeof(u32));
-		rt->u.dst.error = ort->u.dst.error;
-		rt->u.dst.dev = ort->u.dst.dev;
-		if (rt->u.dst.dev)
-			dev_hold(rt->u.dst.dev);
+		rt->dst.input = ort->dst.input;
+		rt->dst.output = ort->dst.output;
+
+		memcpy(rt->dst.metrics, ort->dst.metrics, RTAX_MAX*sizeof(u32));
+		rt->dst.error = ort->dst.error;
+		rt->dst.dev = ort->dst.dev;
+		if (rt->dst.dev)
+			dev_hold(rt->dst.dev);
 		rt->rt6i_idev = ort->rt6i_idev;
 		if (rt->rt6i_idev)
 			in6_dev_hold(rt->rt6i_idev);
-		rt->u.dst.lastuse = jiffies;
+		rt->dst.lastuse = jiffies;
 		rt->rt6i_expires = 0;
 
 		ipv6_addr_copy(&rt->rt6i_gateway, &ort->rt6i_gateway);
@@ -1689,14 +1683,14 @@ static struct rt6_info *rt6_get_route_info(struct net *net,
 	if (!fn)
 		goto out;
 
-	for (rt = fn->leaf; rt; rt = rt->u.dst.rt6_next) {
+	for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
 		if (rt->rt6i_dev->ifindex != ifindex)
 			continue;
 		if ((rt->rt6i_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY))
 			continue;
 		if (!ipv6_addr_equal(&rt->rt6i_gateway, gwaddr))
 			continue;
-		dst_hold(&rt->u.dst);
+		dst_hold(&rt->dst);
 		break;
 	}
 out:
@@ -1744,14 +1738,14 @@ struct rt6_info *rt6_get_dflt_router(struct in6_addr *addr, struct net_device *d
 		return NULL;
 
 	write_lock_bh(&table->tb6_lock);
-	for (rt = table->tb6_root.leaf; rt; rt=rt->u.dst.rt6_next) {
+	for (rt = table->tb6_root.leaf; rt; rt=rt->dst.rt6_next) {
 		if (dev == rt->rt6i_dev &&
 		    ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
 		    ipv6_addr_equal(&rt->rt6i_gateway, addr))
 			break;
 	}
 	if (rt)
-		dst_hold(&rt->u.dst);
+		dst_hold(&rt->dst);
 	write_unlock_bh(&table->tb6_lock);
 	return rt;
 }
@@ -1790,9 +1784,9 @@ void rt6_purge_dflt_routers(struct net *net)
 
 restart:
 	read_lock_bh(&table->tb6_lock);
-	for (rt = table->tb6_root.leaf; rt; rt = rt->u.dst.rt6_next) {
+	for (rt = table->tb6_root.leaf; rt; rt = rt->dst.rt6_next) {
 		if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) {
-			dst_hold(&rt->u.dst);
+			dst_hold(&rt->dst);
 			read_unlock_bh(&table->tb6_lock);
 			ip6_del_rt(rt);
 			goto restart;
@@ -1930,15 +1924,15 @@ struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
 	dev_hold(net->loopback_dev);
 	in6_dev_hold(idev);
 
-	rt->u.dst.flags = DST_HOST;
-	rt->u.dst.input = ip6_input;
-	rt->u.dst.output = ip6_output;
+	rt->dst.flags = DST_HOST;
+	rt->dst.input = ip6_input;
+	rt->dst.output = ip6_output;
 	rt->rt6i_dev = net->loopback_dev;
 	rt->rt6i_idev = idev;
-	rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(rt->rt6i_dev);
-	rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(net, dst_mtu(&rt->u.dst));
-	rt->u.dst.metrics[RTAX_HOPLIMIT-1] = -1;
-	rt->u.dst.obsolete = -1;
+	rt->dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(rt->rt6i_dev);
+	rt->dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(net, dst_mtu(&rt->dst));
+	rt->dst.metrics[RTAX_HOPLIMIT-1] = -1;
+	rt->dst.obsolete = -1;
 
 	rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
 	if (anycast)
@@ -1947,7 +1941,7 @@ struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
 		rt->rt6i_flags |= RTF_LOCAL;
 	neigh = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
 	if (IS_ERR(neigh)) {
-		dst_free(&rt->u.dst);
+		dst_free(&rt->dst);
 
 		/* We are casting this because that is the return
 		 * value type.  But an errno encoded pointer is the
@@ -1962,7 +1956,7 @@ struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
 	rt->rt6i_dst.plen = 128;
 	rt->rt6i_table = fib6_get_table(net, RT6_TABLE_LOCAL);
 
-	atomic_set(&rt->u.dst.__refcnt, 1);
+	atomic_set(&rt->dst.__refcnt, 1);
 
 	return rt;
 }
@@ -2033,12 +2027,12 @@ static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
 	   PMTU discouvery.
 	 */
 	if (rt->rt6i_dev == arg->dev &&
-	    !dst_metric_locked(&rt->u.dst, RTAX_MTU) &&
-	    (dst_mtu(&rt->u.dst) >= arg->mtu ||
-	     (dst_mtu(&rt->u.dst) < arg->mtu &&
-	      dst_mtu(&rt->u.dst) == idev->cnf.mtu6))) {
-		rt->u.dst.metrics[RTAX_MTU-1] = arg->mtu;
-		rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(net, arg->mtu);
+	    !dst_metric_locked(&rt->dst, RTAX_MTU) &&
+	    (dst_mtu(&rt->dst) >= arg->mtu ||
+	     (dst_mtu(&rt->dst) < arg->mtu &&
+	      dst_mtu(&rt->dst) == idev->cnf.mtu6))) {
+		rt->dst.metrics[RTAX_MTU-1] = arg->mtu;
+		rt->dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(net, arg->mtu);
 	}
 	return 0;
 }
@@ -2252,20 +2246,20 @@ static int rt6_fill_node(struct net *net,
 #endif
 			NLA_PUT_U32(skb, RTA_IIF, iif);
 	} else if (dst) {
-		struct inet6_dev *idev = ip6_dst_idev(&rt->u.dst);
+		struct inet6_dev *idev = ip6_dst_idev(&rt->dst);
 		struct in6_addr saddr_buf;
 		if (ipv6_dev_get_saddr(net, idev ? idev->dev : NULL,
 				       dst, 0, &saddr_buf) == 0)
 			NLA_PUT(skb, RTA_PREFSRC, 16, &saddr_buf);
 	}
 
-	if (rtnetlink_put_metrics(skb, rt->u.dst.metrics) < 0)
+	if (rtnetlink_put_metrics(skb, rt->dst.metrics) < 0)
 		goto nla_put_failure;
 
-	if (rt->u.dst.neighbour)
-		NLA_PUT(skb, RTA_GATEWAY, 16, &rt->u.dst.neighbour->primary_key);
+	if (rt->dst.neighbour)
+		NLA_PUT(skb, RTA_GATEWAY, 16, &rt->dst.neighbour->primary_key);
 
-	if (rt->u.dst.dev)
+	if (rt->dst.dev)
 		NLA_PUT_U32(skb, RTA_OIF, rt->rt6i_dev->ifindex);
 
 	NLA_PUT_U32(skb, RTA_PRIORITY, rt->rt6i_metric);
@@ -2277,8 +2271,8 @@ static int rt6_fill_node(struct net *net,
 	else
 		expires = INT_MAX;
 
-	if (rtnl_put_cacheinfo(skb, &rt->u.dst, 0, 0, 0,
-			       expires, rt->u.dst.error) < 0)
+	if (rtnl_put_cacheinfo(skb, &rt->dst, 0, 0, 0,
+			       expires, rt->dst.error) < 0)
 		goto nla_put_failure;
 
 	return nlmsg_end(skb, nlh);
@@ -2364,7 +2358,7 @@ static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void
 	skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr));
 
 	rt = (struct rt6_info*) ip6_route_output(net, NULL, &fl);
-	skb_dst_set(skb, &rt->u.dst);
+	skb_dst_set(skb, &rt->dst);
 
 	err = rt6_fill_node(net, skb, rt, &fl.fl6_dst, &fl.fl6_src, iif,
 			    RTM_NEWROUTE, NETLINK_CB(in_skb).pid,
@@ -2416,12 +2410,12 @@ static int ip6_route_dev_notify(struct notifier_block *this,
 	struct net *net = dev_net(dev);
 
 	if (event == NETDEV_REGISTER && (dev->flags & IFF_LOOPBACK)) {
-		net->ipv6.ip6_null_entry->u.dst.dev = dev;
+		net->ipv6.ip6_null_entry->dst.dev = dev;
 		net->ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(dev);
 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
-		net->ipv6.ip6_prohibit_entry->u.dst.dev = dev;
+		net->ipv6.ip6_prohibit_entry->dst.dev = dev;
 		net->ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(dev);
-		net->ipv6.ip6_blk_hole_entry->u.dst.dev = dev;
+		net->ipv6.ip6_blk_hole_entry->dst.dev = dev;
 		net->ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(dev);
 #endif
 	}
@@ -2464,8 +2458,8 @@ static int rt6_info_route(struct rt6_info *rt, void *p_arg)
 		seq_puts(m, "00000000000000000000000000000000");
 	}
 	seq_printf(m, " %08x %08x %08x %08x %8s\n",
-		   rt->rt6i_metric, atomic_read(&rt->u.dst.__refcnt),
-		   rt->u.dst.__use, rt->rt6i_flags,
+		   rt->rt6i_metric, atomic_read(&rt->dst.__refcnt),
+		   rt->dst.__use, rt->rt6i_flags,
 		   rt->rt6i_dev ? rt->rt6i_dev->name : "");
 	return 0;
 }
@@ -2646,9 +2640,9 @@ static int __net_init ip6_route_net_init(struct net *net)
 					   GFP_KERNEL);
 	if (!net->ipv6.ip6_null_entry)
 		goto out_ip6_dst_ops;
-	net->ipv6.ip6_null_entry->u.dst.path =
+	net->ipv6.ip6_null_entry->dst.path =
 		(struct dst_entry *)net->ipv6.ip6_null_entry;
-	net->ipv6.ip6_null_entry->u.dst.ops = &net->ipv6.ip6_dst_ops;
+	net->ipv6.ip6_null_entry->dst.ops = &net->ipv6.ip6_dst_ops;
 
 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
 	net->ipv6.ip6_prohibit_entry = kmemdup(&ip6_prohibit_entry_template,
@@ -2656,18 +2650,18 @@ static int __net_init ip6_route_net_init(struct net *net)
 					       GFP_KERNEL);
 	if (!net->ipv6.ip6_prohibit_entry)
 		goto out_ip6_null_entry;
-	net->ipv6.ip6_prohibit_entry->u.dst.path =
+	net->ipv6.ip6_prohibit_entry->dst.path =
 		(struct dst_entry *)net->ipv6.ip6_prohibit_entry;
-	net->ipv6.ip6_prohibit_entry->u.dst.ops = &net->ipv6.ip6_dst_ops;
+	net->ipv6.ip6_prohibit_entry->dst.ops = &net->ipv6.ip6_dst_ops;
 
 	net->ipv6.ip6_blk_hole_entry = kmemdup(&ip6_blk_hole_entry_template,
 					       sizeof(*net->ipv6.ip6_blk_hole_entry),
 					       GFP_KERNEL);
 	if (!net->ipv6.ip6_blk_hole_entry)
 		goto out_ip6_prohibit_entry;
-	net->ipv6.ip6_blk_hole_entry->u.dst.path =
+	net->ipv6.ip6_blk_hole_entry->dst.path =
 		(struct dst_entry *)net->ipv6.ip6_blk_hole_entry;
-	net->ipv6.ip6_blk_hole_entry->u.dst.ops = &net->ipv6.ip6_dst_ops;
+	net->ipv6.ip6_blk_hole_entry->dst.ops = &net->ipv6.ip6_dst_ops;
 #endif
 
 	net->ipv6.sysctl.flush_delay = 0;
@@ -2742,12 +2736,12 @@ int __init ip6_route_init(void)
 	/* Registering of the loopback is done before this portion of code,
 	 * the loopback reference in rt6_info will not be taken, do it
 	 * manually for init_net */
-	init_net.ipv6.ip6_null_entry->u.dst.dev = init_net.loopback_dev;
+	init_net.ipv6.ip6_null_entry->dst.dev = init_net.loopback_dev;
 	init_net.ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
   #ifdef CONFIG_IPV6_MULTIPLE_TABLES
-	init_net.ipv6.ip6_prohibit_entry->u.dst.dev = init_net.loopback_dev;
+	init_net.ipv6.ip6_prohibit_entry->dst.dev = init_net.loopback_dev;
 	init_net.ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
-	init_net.ipv6.ip6_blk_hole_entry->u.dst.dev = init_net.loopback_dev;
+	init_net.ipv6.ip6_blk_hole_entry->dst.dev = init_net.loopback_dev;
 	init_net.ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
   #endif
 	ret = fib6_init();
diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c
index 702c532ec21e..4699cd3c3118 100644
--- a/net/ipv6/sit.c
+++ b/net/ipv6/sit.c
@@ -712,7 +712,7 @@ static netdev_tx_t ipip6_tunnel_xmit(struct sk_buff *skb,
 		stats->tx_carrier_errors++;
 		goto tx_error_icmp;
 	}
-	tdev = rt->u.dst.dev;
+	tdev = rt->dst.dev;
 
 	if (tdev == dev) {
 		ip_rt_put(rt);
@@ -721,7 +721,7 @@ static netdev_tx_t ipip6_tunnel_xmit(struct sk_buff *skb,
 	}
 
 	if (df) {
-		mtu = dst_mtu(&rt->u.dst) - sizeof(struct iphdr);
+		mtu = dst_mtu(&rt->dst) - sizeof(struct iphdr);
 
 		if (mtu < 68) {
 			stats->collisions++;
@@ -780,7 +780,7 @@ static netdev_tx_t ipip6_tunnel_xmit(struct sk_buff *skb,
 	memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
 	IPCB(skb)->flags = 0;
 	skb_dst_drop(skb);
-	skb_dst_set(skb, &rt->u.dst);
+	skb_dst_set(skb, &rt->dst);
 
 	/*
 	 *	Push down and install the IPIP header.
@@ -829,7 +829,7 @@ static void ipip6_tunnel_bind_dev(struct net_device *dev)
 				    .proto = IPPROTO_IPV6 };
 		struct rtable *rt;
 		if (!ip_route_output_key(dev_net(dev), &rt, &fl)) {
-			tdev = rt->u.dst.dev;
+			tdev = rt->dst.dev;
 			ip_rt_put(rt);
 		}
 		dev->flags |= IFF_POINTOPOINT;
diff --git a/net/l2tp/l2tp_ip.c b/net/l2tp/l2tp_ip.c
index 0852512d392c..226a0ae3bcfd 100644
--- a/net/l2tp/l2tp_ip.c
+++ b/net/l2tp/l2tp_ip.c
@@ -348,7 +348,7 @@ static int l2tp_ip_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len
 	sk->sk_state = TCP_ESTABLISHED;
 	inet->inet_id = jiffies;
 
-	sk_dst_set(sk, &rt->u.dst);
+	sk_dst_set(sk, &rt->dst);
 
 	write_lock_bh(&l2tp_ip_lock);
 	hlist_del_init(&sk->sk_bind_node);
@@ -496,9 +496,9 @@ static int l2tp_ip_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *m
 			if (ip_route_output_flow(sock_net(sk), &rt, &fl, sk, 0))
 				goto no_route;
 		}
-		sk_setup_caps(sk, &rt->u.dst);
+		sk_setup_caps(sk, &rt->dst);
 	}
-	skb_dst_set(skb, dst_clone(&rt->u.dst));
+	skb_dst_set(skb, dst_clone(&rt->dst));
 
 	/* Queue the packet to IP for output */
 	rc = ip_queue_xmit(skb);
diff --git a/net/netfilter/ipvs/ip_vs_xmit.c b/net/netfilter/ipvs/ip_vs_xmit.c
index 93c15a107b2c..02b078e11cf3 100644
--- a/net/netfilter/ipvs/ip_vs_xmit.c
+++ b/net/netfilter/ipvs/ip_vs_xmit.c
@@ -90,10 +90,10 @@ __ip_vs_get_out_rt(struct ip_vs_conn *cp, u32 rtos)
 					     &dest->addr.ip);
 				return NULL;
 			}
-			__ip_vs_dst_set(dest, rtos, dst_clone(&rt->u.dst));
+			__ip_vs_dst_set(dest, rtos, dst_clone(&rt->dst));
 			IP_VS_DBG(10, "new dst %pI4, refcnt=%d, rtos=%X\n",
 				  &dest->addr.ip,
-				  atomic_read(&rt->u.dst.__refcnt), rtos);
+				  atomic_read(&rt->dst.__refcnt), rtos);
 		}
 		spin_unlock(&dest->dst_lock);
 	} else {
@@ -148,10 +148,10 @@ __ip_vs_get_out_rt_v6(struct ip_vs_conn *cp)
 					     &dest->addr.in6);
 				return NULL;
 			}
-			__ip_vs_dst_set(dest, 0, dst_clone(&rt->u.dst));
+			__ip_vs_dst_set(dest, 0, dst_clone(&rt->dst));
 			IP_VS_DBG(10, "new dst %pI6, refcnt=%d\n",
 				  &dest->addr.in6,
-				  atomic_read(&rt->u.dst.__refcnt));
+				  atomic_read(&rt->dst.__refcnt));
 		}
 		spin_unlock(&dest->dst_lock);
 	} else {
@@ -198,7 +198,7 @@ do {							\
 	(skb)->ipvs_property = 1;			\
 	skb_forward_csum(skb);				\
 	NF_HOOK(pf, NF_INET_LOCAL_OUT, (skb), NULL,	\
-		(rt)->u.dst.dev, dst_output);		\
+		(rt)->dst.dev, dst_output);		\
 } while (0)
 
 
@@ -245,7 +245,7 @@ ip_vs_bypass_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
 	}
 
 	/* MTU checking */
-	mtu = dst_mtu(&rt->u.dst);
+	mtu = dst_mtu(&rt->dst);
 	if ((skb->len > mtu) && (iph->frag_off & htons(IP_DF))) {
 		ip_rt_put(rt);
 		icmp_send(skb, ICMP_DEST_UNREACH,ICMP_FRAG_NEEDED, htonl(mtu));
@@ -265,7 +265,7 @@ ip_vs_bypass_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
 
 	/* drop old route */
 	skb_dst_drop(skb);
-	skb_dst_set(skb, &rt->u.dst);
+	skb_dst_set(skb, &rt->dst);
 
 	/* Another hack: avoid icmp_send in ip_fragment */
 	skb->local_df = 1;
@@ -309,9 +309,9 @@ ip_vs_bypass_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
 	}
 
 	/* MTU checking */
-	mtu = dst_mtu(&rt->u.dst);
+	mtu = dst_mtu(&rt->dst);
 	if (skb->len > mtu) {
-		dst_release(&rt->u.dst);
+		dst_release(&rt->dst);
 		icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
 		IP_VS_DBG_RL("%s(): frag needed\n", __func__);
 		goto tx_error;
@@ -323,13 +323,13 @@ ip_vs_bypass_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
 	 */
 	skb = skb_share_check(skb, GFP_ATOMIC);
 	if (unlikely(skb == NULL)) {
-		dst_release(&rt->u.dst);
+		dst_release(&rt->dst);
 		return NF_STOLEN;
 	}
 
 	/* drop old route */
 	skb_dst_drop(skb);
-	skb_dst_set(skb, &rt->u.dst);
+	skb_dst_set(skb, &rt->dst);
 
 	/* Another hack: avoid icmp_send in ip_fragment */
 	skb->local_df = 1;
@@ -376,7 +376,7 @@ ip_vs_nat_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
 		goto tx_error_icmp;
 
 	/* MTU checking */
-	mtu = dst_mtu(&rt->u.dst);
+	mtu = dst_mtu(&rt->dst);
 	if ((skb->len > mtu) && (iph->frag_off & htons(IP_DF))) {
 		ip_rt_put(rt);
 		icmp_send(skb, ICMP_DEST_UNREACH,ICMP_FRAG_NEEDED, htonl(mtu));
@@ -388,12 +388,12 @@ ip_vs_nat_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
 	if (!skb_make_writable(skb, sizeof(struct iphdr)))
 		goto tx_error_put;
 
-	if (skb_cow(skb, rt->u.dst.dev->hard_header_len))
+	if (skb_cow(skb, rt->dst.dev->hard_header_len))
 		goto tx_error_put;
 
 	/* drop old route */
 	skb_dst_drop(skb);
-	skb_dst_set(skb, &rt->u.dst);
+	skb_dst_set(skb, &rt->dst);
 
 	/* mangle the packet */
 	if (pp->dnat_handler && !pp->dnat_handler(skb, pp, cp))
@@ -452,9 +452,9 @@ ip_vs_nat_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
 		goto tx_error_icmp;
 
 	/* MTU checking */
-	mtu = dst_mtu(&rt->u.dst);
+	mtu = dst_mtu(&rt->dst);
 	if (skb->len > mtu) {
-		dst_release(&rt->u.dst);
+		dst_release(&rt->dst);
 		icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
 		IP_VS_DBG_RL_PKT(0, pp, skb, 0,
 				 "ip_vs_nat_xmit_v6(): frag needed for");
@@ -465,12 +465,12 @@ ip_vs_nat_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
 	if (!skb_make_writable(skb, sizeof(struct ipv6hdr)))
 		goto tx_error_put;
 
-	if (skb_cow(skb, rt->u.dst.dev->hard_header_len))
+	if (skb_cow(skb, rt->dst.dev->hard_header_len))
 		goto tx_error_put;
 
 	/* drop old route */
 	skb_dst_drop(skb);
-	skb_dst_set(skb, &rt->u.dst);
+	skb_dst_set(skb, &rt->dst);
 
 	/* mangle the packet */
 	if (pp->dnat_handler && !pp->dnat_handler(skb, pp, cp))
@@ -498,7 +498,7 @@ tx_error:
 	kfree_skb(skb);
 	return NF_STOLEN;
 tx_error_put:
-	dst_release(&rt->u.dst);
+	dst_release(&rt->dst);
 	goto tx_error;
 }
 #endif
@@ -549,9 +549,9 @@ ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
 	if (!(rt = __ip_vs_get_out_rt(cp, RT_TOS(tos))))
 		goto tx_error_icmp;
 
-	tdev = rt->u.dst.dev;
+	tdev = rt->dst.dev;
 
-	mtu = dst_mtu(&rt->u.dst) - sizeof(struct iphdr);
+	mtu = dst_mtu(&rt->dst) - sizeof(struct iphdr);
 	if (mtu < 68) {
 		ip_rt_put(rt);
 		IP_VS_DBG_RL("%s(): mtu less than 68\n", __func__);
@@ -601,7 +601,7 @@ ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
 
 	/* drop old route */
 	skb_dst_drop(skb);
-	skb_dst_set(skb, &rt->u.dst);
+	skb_dst_set(skb, &rt->dst);
 
 	/*
 	 *	Push down and install the IPIP header.
@@ -615,7 +615,7 @@ ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
 	iph->daddr		=	rt->rt_dst;
 	iph->saddr		=	rt->rt_src;
 	iph->ttl		=	old_iph->ttl;
-	ip_select_ident(iph, &rt->u.dst, NULL);
+	ip_select_ident(iph, &rt->dst, NULL);
 
 	/* Another hack: avoid icmp_send in ip_fragment */
 	skb->local_df = 1;
@@ -660,12 +660,12 @@ ip_vs_tunnel_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
 	if (!rt)
 		goto tx_error_icmp;
 
-	tdev = rt->u.dst.dev;
+	tdev = rt->dst.dev;
 
-	mtu = dst_mtu(&rt->u.dst) - sizeof(struct ipv6hdr);
+	mtu = dst_mtu(&rt->dst) - sizeof(struct ipv6hdr);
 	/* TODO IPv6: do we need this check in IPv6? */
 	if (mtu < 1280) {
-		dst_release(&rt->u.dst);
+		dst_release(&rt->dst);
 		IP_VS_DBG_RL("%s(): mtu less than 1280\n", __func__);
 		goto tx_error;
 	}
@@ -674,7 +674,7 @@ ip_vs_tunnel_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
 
 	if (mtu < ntohs(old_iph->payload_len) + sizeof(struct ipv6hdr)) {
 		icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
-		dst_release(&rt->u.dst);
+		dst_release(&rt->dst);
 		IP_VS_DBG_RL("%s(): frag needed\n", __func__);
 		goto tx_error;
 	}
@@ -689,7 +689,7 @@ ip_vs_tunnel_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
 		struct sk_buff *new_skb =
 			skb_realloc_headroom(skb, max_headroom);
 		if (!new_skb) {
-			dst_release(&rt->u.dst);
+			dst_release(&rt->dst);
 			kfree_skb(skb);
 			IP_VS_ERR_RL("%s(): no memory\n", __func__);
 			return NF_STOLEN;
@@ -707,7 +707,7 @@ ip_vs_tunnel_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
 
 	/* drop old route */
 	skb_dst_drop(skb);
-	skb_dst_set(skb, &rt->u.dst);
+	skb_dst_set(skb, &rt->dst);
 
 	/*
 	 *	Push down and install the IPIP header.
@@ -760,7 +760,7 @@ ip_vs_dr_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
 		goto tx_error_icmp;
 
 	/* MTU checking */
-	mtu = dst_mtu(&rt->u.dst);
+	mtu = dst_mtu(&rt->dst);
 	if ((iph->frag_off & htons(IP_DF)) && skb->len > mtu) {
 		icmp_send(skb, ICMP_DEST_UNREACH,ICMP_FRAG_NEEDED, htonl(mtu));
 		ip_rt_put(rt);
@@ -780,7 +780,7 @@ ip_vs_dr_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
 
 	/* drop old route */
 	skb_dst_drop(skb);
-	skb_dst_set(skb, &rt->u.dst);
+	skb_dst_set(skb, &rt->dst);
 
 	/* Another hack: avoid icmp_send in ip_fragment */
 	skb->local_df = 1;
@@ -813,10 +813,10 @@ ip_vs_dr_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
 		goto tx_error_icmp;
 
 	/* MTU checking */
-	mtu = dst_mtu(&rt->u.dst);
+	mtu = dst_mtu(&rt->dst);
 	if (skb->len > mtu) {
 		icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
-		dst_release(&rt->u.dst);
+		dst_release(&rt->dst);
 		IP_VS_DBG_RL("%s(): frag needed\n", __func__);
 		goto tx_error;
 	}
@@ -827,13 +827,13 @@ ip_vs_dr_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
 	 */
 	skb = skb_share_check(skb, GFP_ATOMIC);
 	if (unlikely(skb == NULL)) {
-		dst_release(&rt->u.dst);
+		dst_release(&rt->dst);
 		return NF_STOLEN;
 	}
 
 	/* drop old route */
 	skb_dst_drop(skb);
-	skb_dst_set(skb, &rt->u.dst);
+	skb_dst_set(skb, &rt->dst);
 
 	/* Another hack: avoid icmp_send in ip_fragment */
 	skb->local_df = 1;
@@ -888,7 +888,7 @@ ip_vs_icmp_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
 		goto tx_error_icmp;
 
 	/* MTU checking */
-	mtu = dst_mtu(&rt->u.dst);
+	mtu = dst_mtu(&rt->dst);
 	if ((skb->len > mtu) && (ip_hdr(skb)->frag_off & htons(IP_DF))) {
 		ip_rt_put(rt);
 		icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu));
@@ -900,12 +900,12 @@ ip_vs_icmp_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
 	if (!skb_make_writable(skb, offset))
 		goto tx_error_put;
 
-	if (skb_cow(skb, rt->u.dst.dev->hard_header_len))
+	if (skb_cow(skb, rt->dst.dev->hard_header_len))
 		goto tx_error_put;
 
 	/* drop the old route when skb is not shared */
 	skb_dst_drop(skb);
-	skb_dst_set(skb, &rt->u.dst);
+	skb_dst_set(skb, &rt->dst);
 
 	ip_vs_nat_icmp(skb, pp, cp, 0);
 
@@ -963,9 +963,9 @@ ip_vs_icmp_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
 		goto tx_error_icmp;
 
 	/* MTU checking */
-	mtu = dst_mtu(&rt->u.dst);
+	mtu = dst_mtu(&rt->dst);
 	if (skb->len > mtu) {
-		dst_release(&rt->u.dst);
+		dst_release(&rt->dst);
 		icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
 		IP_VS_DBG_RL("%s(): frag needed\n", __func__);
 		goto tx_error;
@@ -975,12 +975,12 @@ ip_vs_icmp_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
 	if (!skb_make_writable(skb, offset))
 		goto tx_error_put;
 
-	if (skb_cow(skb, rt->u.dst.dev->hard_header_len))
+	if (skb_cow(skb, rt->dst.dev->hard_header_len))
 		goto tx_error_put;
 
 	/* drop the old route when skb is not shared */
 	skb_dst_drop(skb);
-	skb_dst_set(skb, &rt->u.dst);
+	skb_dst_set(skb, &rt->dst);
 
 	ip_vs_nat_icmp_v6(skb, pp, cp, 0);
 
@@ -1001,7 +1001,7 @@ out:
 	LeaveFunction(10);
 	return rc;
 tx_error_put:
-	dst_release(&rt->u.dst);
+	dst_release(&rt->dst);
 	goto tx_error;
 }
 #endif
diff --git a/net/netfilter/nf_conntrack_h323_main.c b/net/netfilter/nf_conntrack_h323_main.c
index 6eaee7c8a337..b969025cf82f 100644
--- a/net/netfilter/nf_conntrack_h323_main.c
+++ b/net/netfilter/nf_conntrack_h323_main.c
@@ -734,11 +734,11 @@ static int callforward_do_filter(const union nf_inet_addr *src,
 		if (!afinfo->route((struct dst_entry **)&rt1, &fl1)) {
 			if (!afinfo->route((struct dst_entry **)&rt2, &fl2)) {
 				if (rt1->rt_gateway == rt2->rt_gateway &&
-				    rt1->u.dst.dev  == rt2->u.dst.dev)
+				    rt1->dst.dev  == rt2->dst.dev)
 					ret = 1;
-				dst_release(&rt2->u.dst);
+				dst_release(&rt2->dst);
 			}
-			dst_release(&rt1->u.dst);
+			dst_release(&rt1->dst);
 		}
 		break;
 	}
@@ -753,11 +753,11 @@ static int callforward_do_filter(const union nf_inet_addr *src,
 			if (!afinfo->route((struct dst_entry **)&rt2, &fl2)) {
 				if (!memcmp(&rt1->rt6i_gateway, &rt2->rt6i_gateway,
 					    sizeof(rt1->rt6i_gateway)) &&
-				    rt1->u.dst.dev == rt2->u.dst.dev)
+				    rt1->dst.dev == rt2->dst.dev)
 					ret = 1;
-				dst_release(&rt2->u.dst);
+				dst_release(&rt2->dst);
 			}
-			dst_release(&rt1->u.dst);
+			dst_release(&rt1->dst);
 		}
 		break;
 	}
diff --git a/net/netfilter/nf_conntrack_netbios_ns.c b/net/netfilter/nf_conntrack_netbios_ns.c
index 497b2224536f..aadde018a072 100644
--- a/net/netfilter/nf_conntrack_netbios_ns.c
+++ b/net/netfilter/nf_conntrack_netbios_ns.c
@@ -61,7 +61,7 @@ static int help(struct sk_buff *skb, unsigned int protoff,
 		goto out;
 
 	rcu_read_lock();
-	in_dev = __in_dev_get_rcu(rt->u.dst.dev);
+	in_dev = __in_dev_get_rcu(rt->dst.dev);
 	if (in_dev != NULL) {
 		for_primary_ifa(in_dev) {
 			if (ifa->ifa_broadcast == iph->daddr) {
diff --git a/net/netfilter/xt_TCPMSS.c b/net/netfilter/xt_TCPMSS.c
index 62ec021fbd50..1841388c770a 100644
--- a/net/netfilter/xt_TCPMSS.c
+++ b/net/netfilter/xt_TCPMSS.c
@@ -165,8 +165,8 @@ static u_int32_t tcpmss_reverse_mtu(const struct sk_buff *skb,
 	rcu_read_unlock();
 
 	if (rt != NULL) {
-		mtu = dst_mtu(&rt->u.dst);
-		dst_release(&rt->u.dst);
+		mtu = dst_mtu(&rt->dst);
+		dst_release(&rt->dst);
 	}
 	return mtu;
 }
diff --git a/net/netfilter/xt_TEE.c b/net/netfilter/xt_TEE.c
index 859d9fd429c8..c77a85bbd9eb 100644
--- a/net/netfilter/xt_TEE.c
+++ b/net/netfilter/xt_TEE.c
@@ -77,8 +77,8 @@ tee_tg_route4(struct sk_buff *skb, const struct xt_tee_tginfo *info)
 		return false;
 
 	skb_dst_drop(skb);
-	skb_dst_set(skb, &rt->u.dst);
-	skb->dev      = rt->u.dst.dev;
+	skb_dst_set(skb, &rt->dst);
+	skb->dev      = rt->dst.dev;
 	skb->protocol = htons(ETH_P_IP);
 	return true;
 }
diff --git a/net/rxrpc/ar-peer.c b/net/rxrpc/ar-peer.c
index f0f85b0123f7..9f1729bd60de 100644
--- a/net/rxrpc/ar-peer.c
+++ b/net/rxrpc/ar-peer.c
@@ -64,8 +64,8 @@ static void rxrpc_assess_MTU_size(struct rxrpc_peer *peer)
 		return;
 	}
 
-	peer->if_mtu = dst_mtu(&rt->u.dst);
-	dst_release(&rt->u.dst);
+	peer->if_mtu = dst_mtu(&rt->dst);
+	dst_release(&rt->dst);
 
 	_leave(" [if_mtu %u]", peer->if_mtu);
 }
diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c
index 182749867c72..a0e1a7fdebbf 100644
--- a/net/sctp/protocol.c
+++ b/net/sctp/protocol.c
@@ -490,7 +490,7 @@ static struct dst_entry *sctp_v4_get_dst(struct sctp_association *asoc,
 			  __func__, &fl.fl4_dst, &fl.fl4_src);
 
 	if (!ip_route_output_key(&init_net, &rt, &fl)) {
-		dst = &rt->u.dst;
+		dst = &rt->dst;
 	}
 
 	/* If there is no association or if a source address is passed, no
@@ -534,7 +534,7 @@ static struct dst_entry *sctp_v4_get_dst(struct sctp_association *asoc,
 			fl.fl4_src = laddr->a.v4.sin_addr.s_addr;
 			fl.fl_ip_sport = laddr->a.v4.sin_port;
 			if (!ip_route_output_key(&init_net, &rt, &fl)) {
-				dst = &rt->u.dst;
+				dst = &rt->dst;
 				goto out_unlock;
 			}
 		}
-- 
cgit v1.2.2


From c7de2cf053420d63bac85133469c965d4b1083e1 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Wed, 9 Jun 2010 02:09:23 +0000
Subject: pkt_sched: gen_kill_estimator() rcu fixes

gen_kill_estimator() API is incomplete or not well documented, since
caller should make sure an RCU grace period is respected before
freeing stats_lock.

This was partially addressed in commit 5d944c640b4
(gen_estimator: deadlock fix), but same problem exist for all
gen_kill_estimator() users, if lock they use is not already RCU
protected.

A code review shows xt_RATEEST.c, act_api.c, act_police.c have this
problem. Other are ok because they use qdisc lock, already RCU
protected.

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/gen_estimator.c   |  1 +
 net/netfilter/xt_RATEEST.c | 12 +++++++++++-
 net/sched/act_api.c        | 11 ++++++++++-
 net/sched/act_police.c     | 12 +++++++++++-
 4 files changed, 33 insertions(+), 3 deletions(-)

(limited to 'net')

diff --git a/net/core/gen_estimator.c b/net/core/gen_estimator.c
index 785e5276a300..9fbe7f7429b0 100644
--- a/net/core/gen_estimator.c
+++ b/net/core/gen_estimator.c
@@ -263,6 +263,7 @@ static void __gen_kill_estimator(struct rcu_head *head)
  *
  * Removes the rate estimator specified by &bstats and &rate_est.
  *
+ * Note : Caller should respect an RCU grace period before freeing stats_lock
  */
 void gen_kill_estimator(struct gnet_stats_basic_packed *bstats,
 			struct gnet_stats_rate_est *rate_est)
diff --git a/net/netfilter/xt_RATEEST.c b/net/netfilter/xt_RATEEST.c
index 69c01e10f8af..de079abd5bc8 100644
--- a/net/netfilter/xt_RATEEST.c
+++ b/net/netfilter/xt_RATEEST.c
@@ -60,13 +60,22 @@ struct xt_rateest *xt_rateest_lookup(const char *name)
 }
 EXPORT_SYMBOL_GPL(xt_rateest_lookup);
 
+static void xt_rateest_free_rcu(struct rcu_head *head)
+{
+	kfree(container_of(head, struct xt_rateest, rcu));
+}
+
 void xt_rateest_put(struct xt_rateest *est)
 {
 	mutex_lock(&xt_rateest_mutex);
 	if (--est->refcnt == 0) {
 		hlist_del(&est->list);
 		gen_kill_estimator(&est->bstats, &est->rstats);
-		kfree(est);
+		/*
+		 * gen_estimator est_timer() might access est->lock or bstats,
+		 * wait a RCU grace period before freeing 'est'
+		 */
+		call_rcu(&est->rcu, xt_rateest_free_rcu);
 	}
 	mutex_unlock(&xt_rateest_mutex);
 }
@@ -179,6 +188,7 @@ static int __init xt_rateest_tg_init(void)
 static void __exit xt_rateest_tg_fini(void)
 {
 	xt_unregister_target(&xt_rateest_tg_reg);
+	rcu_barrier(); /* Wait for completion of call_rcu()'s (xt_rateest_free_rcu) */
 }
 
 
diff --git a/net/sched/act_api.c b/net/sched/act_api.c
index 972378f47f3c..23b25f89e7e0 100644
--- a/net/sched/act_api.c
+++ b/net/sched/act_api.c
@@ -26,6 +26,11 @@
 #include <net/act_api.h>
 #include <net/netlink.h>
 
+static void tcf_common_free_rcu(struct rcu_head *head)
+{
+	kfree(container_of(head, struct tcf_common, tcfc_rcu));
+}
+
 void tcf_hash_destroy(struct tcf_common *p, struct tcf_hashinfo *hinfo)
 {
 	unsigned int h = tcf_hash(p->tcfc_index, hinfo->hmask);
@@ -38,7 +43,11 @@ void tcf_hash_destroy(struct tcf_common *p, struct tcf_hashinfo *hinfo)
 			write_unlock_bh(hinfo->lock);
 			gen_kill_estimator(&p->tcfc_bstats,
 					   &p->tcfc_rate_est);
-			kfree(p);
+			/*
+			 * gen_estimator est_timer() might access p->tcfc_lock
+			 * or bstats, wait a RCU grace period before freeing p
+			 */
+			call_rcu(&p->tcfc_rcu, tcf_common_free_rcu);
 			return;
 		}
 	}
diff --git a/net/sched/act_police.c b/net/sched/act_police.c
index 654f73dff7c1..537a48732e9e 100644
--- a/net/sched/act_police.c
+++ b/net/sched/act_police.c
@@ -97,6 +97,11 @@ nla_put_failure:
 	goto done;
 }
 
+static void tcf_police_free_rcu(struct rcu_head *head)
+{
+	kfree(container_of(head, struct tcf_police, tcf_rcu));
+}
+
 static void tcf_police_destroy(struct tcf_police *p)
 {
 	unsigned int h = tcf_hash(p->tcf_index, POL_TAB_MASK);
@@ -113,7 +118,11 @@ static void tcf_police_destroy(struct tcf_police *p)
 				qdisc_put_rtab(p->tcfp_R_tab);
 			if (p->tcfp_P_tab)
 				qdisc_put_rtab(p->tcfp_P_tab);
-			kfree(p);
+			/*
+			 * gen_estimator est_timer() might access p->tcf_lock
+			 * or bstats, wait a RCU grace period before freeing p
+			 */
+			call_rcu(&p->tcf_rcu, tcf_police_free_rcu);
 			return;
 		}
 	}
@@ -397,6 +406,7 @@ static void __exit
 police_cleanup_module(void)
 {
 	tcf_unregister_action(&act_police_ops);
+	rcu_barrier(); /* Wait for completion of call_rcu()'s (tcf_police_free_rcu) */
 }
 
 module_init(police_init_module);
-- 
cgit v1.2.2


From 0c78a92fbd655ab990e2799f645707f05f548e2f Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Wed, 9 Jun 2010 16:33:05 +0000
Subject: econet: fix locking

econet lacks proper locking. It holds econet_lock only when inserting or
deleting an entry in econet_sklist, not during lookups.

- convert econet_lock from rwlock to spinlock

- use econet_lock in ec_listening_socket() lookup

- use appropriate sock_hold() / sock_put() to avoid corruptions.

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/econet/af_econet.c | 27 ++++++++++++++++++---------
 1 file changed, 18 insertions(+), 9 deletions(-)

(limited to 'net')

diff --git a/net/econet/af_econet.c b/net/econet/af_econet.c
index 2a5a8053e000..dc54bd0d083b 100644
--- a/net/econet/af_econet.c
+++ b/net/econet/af_econet.c
@@ -48,7 +48,7 @@
 
 static const struct proto_ops econet_ops;
 static struct hlist_head econet_sklist;
-static DEFINE_RWLOCK(econet_lock);
+static DEFINE_SPINLOCK(econet_lock);
 static DEFINE_MUTEX(econet_mutex);
 
 /* Since there are only 256 possible network numbers (or fewer, depends
@@ -98,16 +98,16 @@ struct ec_cb
 
 static void econet_remove_socket(struct hlist_head *list, struct sock *sk)
 {
-	write_lock_bh(&econet_lock);
+	spin_lock_bh(&econet_lock);
 	sk_del_node_init(sk);
-	write_unlock_bh(&econet_lock);
+	spin_unlock_bh(&econet_lock);
 }
 
 static void econet_insert_socket(struct hlist_head *list, struct sock *sk)
 {
-	write_lock_bh(&econet_lock);
+	spin_lock_bh(&econet_lock);
 	sk_add_node(sk, list);
-	write_unlock_bh(&econet_lock);
+	spin_unlock_bh(&econet_lock);
 }
 
 /*
@@ -782,15 +782,19 @@ static struct sock *ec_listening_socket(unsigned char port, unsigned char
 	struct sock *sk;
 	struct hlist_node *node;
 
+	spin_lock(&econet_lock);
 	sk_for_each(sk, node, &econet_sklist) {
 		struct econet_sock *opt = ec_sk(sk);
 		if ((opt->port == port || opt->port == 0) &&
 		    (opt->station == station || opt->station == 0) &&
-		    (opt->net == net || opt->net == 0))
+		    (opt->net == net || opt->net == 0)) {
+			sock_hold(sk);
 			goto found;
+		}
 	}
 	sk = NULL;
 found:
+	spin_unlock(&econet_lock);
 	return sk;
 }
 
@@ -852,7 +856,7 @@ static void aun_incoming(struct sk_buff *skb, struct aunhdr *ah, size_t len)
 {
 	struct iphdr *ip = ip_hdr(skb);
 	unsigned char stn = ntohl(ip->saddr) & 0xff;
-	struct sock *sk;
+	struct sock *sk = NULL;
 	struct sk_buff *newskb;
 	struct ec_device *edev = skb->dev->ec_ptr;
 
@@ -882,10 +886,13 @@ static void aun_incoming(struct sk_buff *skb, struct aunhdr *ah, size_t len)
 	}
 
 	aun_send_response(ip->saddr, ah->handle, 3, 0);
+	sock_put(sk);
 	return;
 
 bad:
 	aun_send_response(ip->saddr, ah->handle, 4, 0);
+	if (sk)
+		sock_put(sk);
 }
 
 /*
@@ -1050,7 +1057,7 @@ release:
 static int econet_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, struct net_device *orig_dev)
 {
 	struct ec_framehdr *hdr;
-	struct sock *sk;
+	struct sock *sk = NULL;
 	struct ec_device *edev = dev->ec_ptr;
 
 	if (!net_eq(dev_net(dev), &init_net))
@@ -1085,10 +1092,12 @@ static int econet_rcv(struct sk_buff *skb, struct net_device *dev, struct packet
 	if (ec_queue_packet(sk, skb, edev->net, hdr->src_stn, hdr->cb,
 			    hdr->port))
 		goto drop;
-
+	sock_put(sk);
 	return NET_RX_SUCCESS;
 
 drop:
+	if (sk)
+		sock_put(sk);
 	kfree_skb(skb);
 	return NET_RX_DROP;
 }
-- 
cgit v1.2.2


From 43d28b6515a6ea580a198df3e253e88236f08978 Mon Sep 17 00:00:00 2001
From: Daniel Turull <daniel.turull@gmail.com>
Date: Wed, 9 Jun 2010 22:49:57 +0000
Subject: pktgen: increasing transmission granularity

This patch increases the granularity of the rate generated by pktgen.
The previous version of pktgen uses micro seconds (udelay) resolution when it
was delayed causing gaps in the rates. It is changed to nanosecond (ndelay).
Now any rate is possible.

Also it allows to set, the desired rate in Mb/s or packets per second.

The documentation has been updated.

Signed-off-by: Daniel Turull <daniel.turull@gmail.com>
Signed-off-by: Robert Olsson <robert.olsson@its.uu.se>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/pktgen.c | 42 ++++++++++++++++++++++++++++++++++++++----
 1 file changed, 38 insertions(+), 4 deletions(-)

(limited to 'net')

diff --git a/net/core/pktgen.c b/net/core/pktgen.c
index 1dacd7ba8dbb..6428653e9498 100644
--- a/net/core/pktgen.c
+++ b/net/core/pktgen.c
@@ -169,7 +169,7 @@
 #include <asm/dma.h>
 #include <asm/div64.h>		/* do_div */
 
-#define VERSION 	"2.73"
+#define VERSION	"2.74"
 #define IP_NAME_SZ 32
 #define MAX_MPLS_LABELS 16 /* This is the max label stack depth */
 #define MPLS_STACK_BOTTOM htonl(0x00000100)
@@ -980,6 +980,40 @@ static ssize_t pktgen_if_write(struct file *file,
 			(unsigned long long) pkt_dev->delay);
 		return count;
 	}
+	if (!strcmp(name, "rate")) {
+		len = num_arg(&user_buffer[i], 10, &value);
+		if (len < 0)
+			return len;
+
+		i += len;
+		if (!value)
+			return len;
+		pkt_dev->delay = pkt_dev->min_pkt_size*8*NSEC_PER_USEC/value;
+		if (debug)
+			printk(KERN_INFO
+				 "pktgen: Delay set at: %llu ns\n",
+					pkt_dev->delay);
+
+		sprintf(pg_result, "OK: rate=%lu", value);
+		return count;
+	}
+	if (!strcmp(name, "ratep")) {
+		len = num_arg(&user_buffer[i], 10, &value);
+		if (len < 0)
+			return len;
+
+		i += len;
+		if (!value)
+			return len;
+		pkt_dev->delay = NSEC_PER_SEC/value;
+		if (debug)
+			printk(KERN_INFO
+				 "pktgen: Delay set at: %llu ns\n",
+					pkt_dev->delay);
+
+		sprintf(pg_result, "OK: rate=%lu", value);
+		return count;
+	}
 	if (!strcmp(name, "udp_src_min")) {
 		len = num_arg(&user_buffer[i], 10, &value);
 		if (len < 0)
@@ -2142,15 +2176,15 @@ static void spin(struct pktgen_dev *pkt_dev, ktime_t spin_until)
 	hrtimer_init_on_stack(&t.timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
 	hrtimer_set_expires(&t.timer, spin_until);
 
-	remaining = ktime_to_us(hrtimer_expires_remaining(&t.timer));
+	remaining = ktime_to_ns(hrtimer_expires_remaining(&t.timer));
 	if (remaining <= 0) {
 		pkt_dev->next_tx = ktime_add_ns(spin_until, pkt_dev->delay);
 		return;
 	}
 
 	start_time = ktime_now();
-	if (remaining < 100)
-		udelay(remaining); 	/* really small just spin */
+	if (remaining < 100000)
+		ndelay(remaining);	/* really small just spin */
 	else {
 		/* see do_nanosleep */
 		hrtimer_init_sleeper(&t, current);
-- 
cgit v1.2.2


From be1f3c2c027cc5ad735df6a45a542ed1db7ec48b Mon Sep 17 00:00:00 2001
From: Ben Hutchings <bhutchings@solarflare.com>
Date: Tue, 8 Jun 2010 07:19:54 +0000
Subject: net: Enable 64-bit net device statistics on 32-bit architectures

Use struct rtnl_link_stats64 as the statistics structure.

On 32-bit architectures, insert 32 bits of padding after/before each
field of struct net_device_stats to make its layout compatible with
struct rtnl_link_stats64.  Add an anonymous union in net_device; move
stats into the union and add struct rtnl_link_stats64 stats64.

Add net_device_ops::ndo_get_stats64, implementations of which will
return a pointer to struct rtnl_link_stats64.  Drivers that implement
this operation must not update the structure asynchronously.

Change dev_get_stats() to call ndo_get_stats64 if available, and to
return a pointer to struct rtnl_link_stats64.  Change callers of
dev_get_stats() accordingly.

Signed-off-by: Ben Hutchings <bhutchings@solarflare.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/8021q/vlanproc.c | 13 +++++++------
 net/core/dev.c       | 19 +++++++++++--------
 net/core/net-sysfs.c | 12 ++++++------
 net/core/rtnetlink.c |  6 +++---
 4 files changed, 27 insertions(+), 23 deletions(-)

(limited to 'net')

diff --git a/net/8021q/vlanproc.c b/net/8021q/vlanproc.c
index afead353e215..df56f5ce887c 100644
--- a/net/8021q/vlanproc.c
+++ b/net/8021q/vlanproc.c
@@ -278,8 +278,9 @@ static int vlandev_seq_show(struct seq_file *seq, void *offset)
 {
 	struct net_device *vlandev = (struct net_device *) seq->private;
 	const struct vlan_dev_info *dev_info = vlan_dev_info(vlandev);
-	const struct net_device_stats *stats;
+	const struct rtnl_link_stats64 *stats;
 	static const char fmt[] = "%30s %12lu\n";
+	static const char fmt64[] = "%30s %12llu\n";
 	int i;
 
 	if (!is_vlan_dev(vlandev))
@@ -291,12 +292,12 @@ static int vlandev_seq_show(struct seq_file *seq, void *offset)
 		   vlandev->name, dev_info->vlan_id,
 		   (int)(dev_info->flags & 1), vlandev->priv_flags);
 
-	seq_printf(seq, fmt, "total frames received", stats->rx_packets);
-	seq_printf(seq, fmt, "total bytes received", stats->rx_bytes);
-	seq_printf(seq, fmt, "Broadcast/Multicast Rcvd", stats->multicast);
+	seq_printf(seq, fmt64, "total frames received", stats->rx_packets);
+	seq_printf(seq, fmt64, "total bytes received", stats->rx_bytes);
+	seq_printf(seq, fmt64, "Broadcast/Multicast Rcvd", stats->multicast);
 	seq_puts(seq, "\n");
-	seq_printf(seq, fmt, "total frames transmitted", stats->tx_packets);
-	seq_printf(seq, fmt, "total bytes transmitted", stats->tx_bytes);
+	seq_printf(seq, fmt64, "total frames transmitted", stats->tx_packets);
+	seq_printf(seq, fmt64, "total bytes transmitted", stats->tx_bytes);
 	seq_printf(seq, fmt, "total headroom inc",
 		   dev_info->cnt_inc_headroom_on_tx);
 	seq_printf(seq, fmt, "total encap on xmit",
diff --git a/net/core/dev.c b/net/core/dev.c
index 277844901ce3..a1abc10db08a 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -3701,10 +3701,10 @@ void dev_seq_stop(struct seq_file *seq, void *v)
 
 static void dev_seq_printf_stats(struct seq_file *seq, struct net_device *dev)
 {
-	const struct net_device_stats *stats = dev_get_stats(dev);
+	const struct rtnl_link_stats64 *stats = dev_get_stats(dev);
 
-	seq_printf(seq, "%6s: %7lu %7lu %4lu %4lu %4lu %5lu %10lu %9lu "
-		   "%8lu %7lu %4lu %4lu %4lu %5lu %7lu %10lu\n",
+	seq_printf(seq, "%6s: %7llu %7llu %4llu %4llu %4llu %5llu %10llu %9llu "
+		   "%8llu %7llu %4llu %4llu %4llu %5llu %7llu %10llu\n",
 		   dev->name, stats->rx_bytes, stats->rx_packets,
 		   stats->rx_errors,
 		   stats->rx_dropped + stats->rx_missed_errors,
@@ -5281,18 +5281,21 @@ EXPORT_SYMBOL(dev_txq_stats_fold);
  *	@dev: device to get statistics from
  *
  *	Get network statistics from device. The device driver may provide
- *	its own method by setting dev->netdev_ops->get_stats; otherwise
- *	the internal statistics structure is used.
+ *	its own method by setting dev->netdev_ops->get_stats64 or
+ *	dev->netdev_ops->get_stats; otherwise the internal statistics
+ *	structure is used.
  */
-const struct net_device_stats *dev_get_stats(struct net_device *dev)
+const struct rtnl_link_stats64 *dev_get_stats(struct net_device *dev)
 {
 	const struct net_device_ops *ops = dev->netdev_ops;
 
+	if (ops->ndo_get_stats64)
+		return ops->ndo_get_stats64(dev);
 	if (ops->ndo_get_stats)
-		return ops->ndo_get_stats(dev);
+		return (struct rtnl_link_stats64 *)ops->ndo_get_stats(dev);
 
 	dev_txq_stats_fold(dev, &dev->stats);
-	return &dev->stats;
+	return &dev->stats64;
 }
 EXPORT_SYMBOL(dev_get_stats);
 
diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c
index 99e7052d7323..ea3bb4c3b87d 100644
--- a/net/core/net-sysfs.c
+++ b/net/core/net-sysfs.c
@@ -29,6 +29,7 @@ static const char fmt_hex[] = "%#x\n";
 static const char fmt_long_hex[] = "%#lx\n";
 static const char fmt_dec[] = "%d\n";
 static const char fmt_ulong[] = "%lu\n";
+static const char fmt_u64[] = "%llu\n";
 
 static inline int dev_isalive(const struct net_device *dev)
 {
@@ -324,14 +325,13 @@ static ssize_t netstat_show(const struct device *d,
 	struct net_device *dev = to_net_dev(d);
 	ssize_t ret = -EINVAL;
 
-	WARN_ON(offset > sizeof(struct net_device_stats) ||
-			offset % sizeof(unsigned long) != 0);
+	WARN_ON(offset > sizeof(struct rtnl_link_stats64) ||
+			offset % sizeof(u64) != 0);
 
 	read_lock(&dev_base_lock);
 	if (dev_isalive(dev)) {
-		const struct net_device_stats *stats = dev_get_stats(dev);
-		ret = sprintf(buf, fmt_ulong,
-			      *(unsigned long *)(((u8 *) stats) + offset));
+		const struct rtnl_link_stats64 *stats = dev_get_stats(dev);
+		ret = sprintf(buf, fmt_u64, *(u64 *)(((u8 *) stats) + offset));
 	}
 	read_unlock(&dev_base_lock);
 	return ret;
@@ -343,7 +343,7 @@ static ssize_t show_##name(struct device *d,				\
 			   struct device_attribute *attr, char *buf) 	\
 {									\
 	return netstat_show(d, attr, buf,				\
-			    offsetof(struct net_device_stats, name));	\
+			    offsetof(struct rtnl_link_stats64, name));	\
 }									\
 static DEVICE_ATTR(name, S_IRUGO, show_##name, NULL)
 
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 1a2af24e9e3d..e645778e9b7e 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -579,7 +579,7 @@ static unsigned int rtnl_dev_combine_flags(const struct net_device *dev,
 }
 
 static void copy_rtnl_link_stats(struct rtnl_link_stats *a,
-				 const struct net_device_stats *b)
+				 const struct rtnl_link_stats64 *b)
 {
 	a->rx_packets = b->rx_packets;
 	a->tx_packets = b->tx_packets;
@@ -610,7 +610,7 @@ static void copy_rtnl_link_stats(struct rtnl_link_stats *a,
 	a->tx_compressed = b->tx_compressed;
 }
 
-static void copy_rtnl_link_stats64(void *v, const struct net_device_stats *b)
+static void copy_rtnl_link_stats64(void *v, const struct rtnl_link_stats64 *b)
 {
 	struct rtnl_link_stats64 a;
 
@@ -791,7 +791,7 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev,
 {
 	struct ifinfomsg *ifm;
 	struct nlmsghdr *nlh;
-	const struct net_device_stats *stats;
+	const struct rtnl_link_stats64 *stats;
 	struct nlattr *attr;
 
 	nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ifm), flags);
-- 
cgit v1.2.2


From 0b5c25e8ac3a60bd01a52ca7405ba96aec8c16be Mon Sep 17 00:00:00 2001
From: Ben Hutchings <bhutchings@solarflare.com>
Date: Tue, 8 Jun 2010 08:23:01 +0000
Subject: irttp: Print device parameters and statistics as unsigned

Device statistics have type unsigned long and several of the
device-specific parameters printed here have type __u32.

Signed-off-by: Ben Hutchings <bhutchings@solarflare.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/irda/irttp.c | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

(limited to 'net')

diff --git a/net/irda/irttp.c b/net/irda/irttp.c
index 47db1d8a0d92..285761e77d90 100644
--- a/net/irda/irttp.c
+++ b/net/irda/irttp.c
@@ -1853,23 +1853,23 @@ static int irttp_seq_show(struct seq_file *seq, void *v)
 		   self->remote_credit);
 	seq_printf(seq, "send credit: %d\n",
 		   self->send_credit);
-	seq_printf(seq, "  tx packets: %ld, ",
+	seq_printf(seq, "  tx packets: %lu, ",
 		   self->stats.tx_packets);
-	seq_printf(seq, "rx packets: %ld, ",
+	seq_printf(seq, "rx packets: %lu, ",
 		   self->stats.rx_packets);
-	seq_printf(seq, "tx_queue len: %d ",
+	seq_printf(seq, "tx_queue len: %u ",
 		   skb_queue_len(&self->tx_queue));
-	seq_printf(seq, "rx_queue len: %d\n",
+	seq_printf(seq, "rx_queue len: %u\n",
 		   skb_queue_len(&self->rx_queue));
 	seq_printf(seq, "  tx_sdu_busy: %s, ",
 		   self->tx_sdu_busy? "TRUE":"FALSE");
 	seq_printf(seq, "rx_sdu_busy: %s\n",
 		   self->rx_sdu_busy? "TRUE":"FALSE");
-	seq_printf(seq, "  max_seg_size: %d, ",
+	seq_printf(seq, "  max_seg_size: %u, ",
 		   self->max_seg_size);
-	seq_printf(seq, "tx_max_sdu_size: %d, ",
+	seq_printf(seq, "tx_max_sdu_size: %u, ",
 		   self->tx_max_sdu_size);
-	seq_printf(seq, "rx_max_sdu_size: %d\n",
+	seq_printf(seq, "rx_max_sdu_size: %u\n",
 		   self->rx_max_sdu_size);
 
 	seq_printf(seq, "  Used by (%s)\n\n",
-- 
cgit v1.2.2


From e897082fe7a5b591dc4dd5599ac39081a7c8e482 Mon Sep 17 00:00:00 2001
From: John Fastabend <john.r.fastabend@intel.com>
Date: Sun, 13 Jun 2010 10:36:30 +0000
Subject: net: fix deliver_no_wcard regression on loopback device

deliver_no_wcard is not being set in skb_copy_header.
In the skb_cloned case it is not being cleared and
may cause the skb to be dropped when the loopback device
pushes it back up the stack.

Signed-off-by: John Fastabend <john.r.fastabend@intel.com>
Acked-by: Eric Dumazet <eric.dumazet@gmail.com>
Tested-by: Markus Trippelsdorf <markus@trippelsdorf.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/skbuff.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'net')

diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 9f07e749d7b1..bcf2fa3e0ddc 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -532,6 +532,7 @@ static void __copy_skb_header(struct sk_buff *new, const struct sk_buff *old)
 	new->ip_summed		= old->ip_summed;
 	skb_copy_queue_mapping(new, old);
 	new->priority		= old->priority;
+	new->deliver_no_wcard	= old->deliver_no_wcard;
 #if defined(CONFIG_IP_VS) || defined(CONFIG_IP_VS_MODULE)
 	new->ipvs_property	= old->ipvs_property;
 #endif
-- 
cgit v1.2.2


From e8d15e6460cb0eea00f2574a80d94496943403ba Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Sun, 13 Jun 2010 10:50:46 +0000
Subject: net: rxhash already set in __copy_skb_header

No need to copy rxhash again in __skb_clone()

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/skbuff.c | 1 -
 1 file changed, 1 deletion(-)

(limited to 'net')

diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index bcf2fa3e0ddc..34432b4e96bb 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -570,7 +570,6 @@ static struct sk_buff *__skb_clone(struct sk_buff *n, struct sk_buff *skb)
 	C(len);
 	C(data_len);
 	C(mac_len);
-	C(rxhash);
 	n->hdr_len = skb->nohdr ? skb_headroom(skb) : skb->hdr_len;
 	n->cloned = 1;
 	n->nohdr = 0;
-- 
cgit v1.2.2


From f5c5440d40a24c5dc8030cde0a03debe87de4afb Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Mon, 14 Jun 2010 16:15:23 +0200
Subject: netfilter: nfnetlink_log: RCU conversion, part 2

- must use atomic_inc_not_zero() in instance_lookup_get()

- must use hlist_add_head_rcu() instead of hlist_add_head()

- must use hlist_del_rcu() instead of hlist_del()

- Introduce NFULNL_COPY_DISABLED to stop lockless reader from using an
instance, before we do final instance_put() on it.

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 net/netfilter/nfnetlink_log.c | 18 ++++++++++++------
 1 file changed, 12 insertions(+), 6 deletions(-)

(limited to 'net')

diff --git a/net/netfilter/nfnetlink_log.c b/net/netfilter/nfnetlink_log.c
index 8ec23ec568e7..fb86a51bb65a 100644
--- a/net/netfilter/nfnetlink_log.c
+++ b/net/netfilter/nfnetlink_log.c
@@ -109,8 +109,8 @@ instance_lookup_get(u_int16_t group_num)
 
 	rcu_read_lock_bh();
 	inst = __instance_lookup(group_num);
-	if (inst)
-		instance_get(inst);
+	if (inst && !atomic_inc_not_zero(&inst->use))
+		inst = NULL;
 	rcu_read_unlock_bh();
 
 	return inst;
@@ -171,7 +171,7 @@ instance_create(u_int16_t group_num, int pid)
 	inst->copy_mode 	= NFULNL_COPY_PACKET;
 	inst->copy_range 	= NFULNL_COPY_RANGE_MAX;
 
-	hlist_add_head(&inst->hlist,
+	hlist_add_head_rcu(&inst->hlist,
 		       &instance_table[instance_hashfn(group_num)]);
 
 	spin_unlock_bh(&instances_lock);
@@ -185,18 +185,23 @@ out_unlock:
 
 static void __nfulnl_flush(struct nfulnl_instance *inst);
 
+/* called with BH disabled */
 static void
 __instance_destroy(struct nfulnl_instance *inst)
 {
 	/* first pull it out of the global list */
-	hlist_del(&inst->hlist);
+	hlist_del_rcu(&inst->hlist);
 
 	/* then flush all pending packets from skb */
 
-	spin_lock_bh(&inst->lock);
+	spin_lock(&inst->lock);
+
+	/* lockless readers wont be able to use us */
+	inst->copy_mode = NFULNL_COPY_DISABLED;
+
 	if (inst->skb)
 		__nfulnl_flush(inst);
-	spin_unlock_bh(&inst->lock);
+	spin_unlock(&inst->lock);
 
 	/* and finally put the refcount */
 	instance_put(inst);
@@ -624,6 +629,7 @@ nfulnl_log_packet(u_int8_t pf,
 		size += nla_total_size(data_len);
 		break;
 
+	case NFULNL_COPY_DISABLED:
 	default:
 		goto unlock_and_release;
 	}
-- 
cgit v1.2.2


From c86ee67c7c4bbab2818f653eb00a70671821624a Mon Sep 17 00:00:00 2001
From: Shan Wei <shanwei@cn.fujitsu.com>
Date: Mon, 14 Jun 2010 16:20:02 +0200
Subject: netfilter: kill redundant check code in which setting ip_summed value

If the returned csum value is 0, We has set ip_summed with
CHECKSUM_UNNECESSARY flag in __skb_checksum_complete_head().

So this patch kills the check and changes to return to upper
caller directly.

Signed-off-by: Shan Wei <shanwei@cn.fujitsu.com>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 net/ipv4/netfilter.c | 4 +---
 net/ipv6/netfilter.c | 4 +---
 2 files changed, 2 insertions(+), 6 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/netfilter.c b/net/ipv4/netfilter.c
index 07de855e2175..acd1ea87ba51 100644
--- a/net/ipv4/netfilter.c
+++ b/net/ipv4/netfilter.c
@@ -212,9 +212,7 @@ static __sum16 nf_ip_checksum_partial(struct sk_buff *skb, unsigned int hook,
 		skb->csum = csum_tcpudp_nofold(iph->saddr, iph->daddr, protocol,
 					       skb->len - dataoff, 0);
 		skb->ip_summed = CHECKSUM_NONE;
-		csum = __skb_checksum_complete_head(skb, dataoff + len);
-		if (!csum)
-			skb->ip_summed = CHECKSUM_UNNECESSARY;
+		return __skb_checksum_complete_head(skb, dataoff + len);
 	}
 	return csum;
 }
diff --git a/net/ipv6/netfilter.c b/net/ipv6/netfilter.c
index a74951c039b6..7155b2451d7c 100644
--- a/net/ipv6/netfilter.c
+++ b/net/ipv6/netfilter.c
@@ -151,9 +151,7 @@ static __sum16 nf_ip6_checksum_partial(struct sk_buff *skb, unsigned int hook,
 							 protocol,
 							 csum_sub(0, hsum)));
 		skb->ip_summed = CHECKSUM_NONE;
-		csum = __skb_checksum_complete_head(skb, dataoff + len);
-		if (!csum)
-			skb->ip_summed = CHECKSUM_UNNECESSARY;
+		return __skb_checksum_complete_head(skb, dataoff + len);
 	}
 	return csum;
 };
-- 
cgit v1.2.2


From 841a5940eb872d70dad2b9ee7f946d8fd13a8c22 Mon Sep 17 00:00:00 2001
From: Shan Wei <shanwei@cn.fujitsu.com>
Date: Mon, 14 Jun 2010 16:28:23 +0200
Subject: netfilter: defrag: remove one redundant atomic ops

Instead of doing one atomic operation per frag, we can factorize them.
Reported from Eric Dumazet.

Signed-off-by: Shan Wei <shanwei@cn.fujitsu.com>
Acked-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 net/ipv6/netfilter/nf_conntrack_reasm.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c
index 6fb890187de0..bc5b86d477c1 100644
--- a/net/ipv6/netfilter/nf_conntrack_reasm.c
+++ b/net/ipv6/netfilter/nf_conntrack_reasm.c
@@ -442,7 +442,6 @@ nf_ct_frag6_reasm(struct nf_ct_frag6_queue *fq, struct net_device *dev)
 	skb_shinfo(head)->frag_list = head->next;
 	skb_reset_transport_header(head);
 	skb_push(head, head->data - skb_network_header(head));
-	atomic_sub(head->truesize, &nf_init_frags.mem);
 
 	for (fp=head->next; fp; fp = fp->next) {
 		head->data_len += fp->len;
@@ -452,8 +451,8 @@ nf_ct_frag6_reasm(struct nf_ct_frag6_queue *fq, struct net_device *dev)
 		else if (head->ip_summed == CHECKSUM_COMPLETE)
 			head->csum = csum_add(head->csum, fp->csum);
 		head->truesize += fp->truesize;
-		atomic_sub(fp->truesize, &nf_init_frags.mem);
 	}
+	atomic_sub(head->truesize, &nf_init_frags.mem);
 
 	head->next = NULL;
 	head->dev = dev;
-- 
cgit v1.2.2


From 0b041f8d1e6fb11a6134d37230da8c2182f99110 Mon Sep 17 00:00:00 2001
From: Shan Wei <shanwei@cn.fujitsu.com>
Date: Mon, 14 Jun 2010 16:30:47 +0200
Subject: netfilter: defrag: kill unused work parameter of frag_kfree_skb()

The parameter (work) is unused, remove it.
Reported from Eric Dumazet.

Signed-off-by: Shan Wei <shanwei@cn.fujitsu.com>
Acked-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 net/ipv6/netfilter/nf_conntrack_reasm.c | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

(limited to 'net')

diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c
index bc5b86d477c1..9254008602d4 100644
--- a/net/ipv6/netfilter/nf_conntrack_reasm.c
+++ b/net/ipv6/netfilter/nf_conntrack_reasm.c
@@ -114,10 +114,8 @@ static void nf_skb_free(struct sk_buff *skb)
 }
 
 /* Memory Tracking Functions. */
-static inline void frag_kfree_skb(struct sk_buff *skb, unsigned int *work)
+static void frag_kfree_skb(struct sk_buff *skb)
 {
-	if (work)
-		*work -= skb->truesize;
 	atomic_sub(skb->truesize, &nf_init_frags.mem);
 	nf_skb_free(skb);
 	kfree_skb(skb);
@@ -335,7 +333,7 @@ static int nf_ct_frag6_queue(struct nf_ct_frag6_queue *fq, struct sk_buff *skb,
 				fq->q.fragments = next;
 
 			fq->q.meat -= free_it->len;
-			frag_kfree_skb(free_it, NULL);
+			frag_kfree_skb(free_it);
 		}
 	}
 
-- 
cgit v1.2.2


From 9d38d85de0270e3927bffab94973a9c78d1dc800 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Wed, 9 Jun 2010 17:20:33 +0200
Subject: cfg80211/mac80211: allow action frame TX/RX in IBSS

When in IBSS mode, currently action frame TX and RX
cannot be used. Allow using it to talk to any peer,
or for public action frames. Also, while at it,
restructure the code in mac80211 to make it easier
to add this for other interface types in the future.

Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/cfg.c         | 52 +++++++++++++++++++++++++++++++++++++++++++---
 net/mac80211/ieee80211_i.h |  5 -----
 net/mac80211/mlme.c        | 38 ---------------------------------
 net/mac80211/rx.c          |  3 +--
 net/wireless/mlme.c        |  5 +++--
 net/wireless/nl80211.c     |  6 ++++--
 6 files changed, 57 insertions(+), 52 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c
index 952845e7072a..59f597d0c6a0 100644
--- a/net/mac80211/cfg.c
+++ b/net/mac80211/cfg.c
@@ -1555,9 +1555,55 @@ static int ieee80211_action(struct wiphy *wiphy, struct net_device *dev,
 			    bool channel_type_valid,
 			    const u8 *buf, size_t len, u64 *cookie)
 {
-	return ieee80211_mgd_action(IEEE80211_DEV_TO_SUB_IF(dev), chan,
-				    channel_type, channel_type_valid,
-				    buf, len, cookie);
+	struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
+	struct ieee80211_local *local = sdata->local;
+	struct sk_buff *skb;
+	struct sta_info *sta;
+	const struct ieee80211_mgmt *mgmt = (void *)buf;
+	u32 flags = IEEE80211_TX_INTFL_NL80211_FRAME_TX |
+		    IEEE80211_TX_CTL_REQ_TX_STATUS;
+
+	/* Check that we are on the requested channel for transmission */
+	if (chan != local->tmp_channel &&
+	    chan != local->oper_channel)
+		return -EBUSY;
+	if (channel_type_valid &&
+	    (channel_type != local->tmp_channel_type &&
+	     channel_type != local->_oper_channel_type))
+		return -EBUSY;
+
+	switch (sdata->vif.type) {
+	case NL80211_IFTYPE_ADHOC:
+		if (mgmt->u.action.category == WLAN_CATEGORY_PUBLIC)
+			break;
+		rcu_read_lock();
+		sta = sta_info_get(sdata, mgmt->da);
+		rcu_read_unlock();
+		if (!sta)
+			return -ENOLINK;
+		break;
+	case NL80211_IFTYPE_STATION:
+		if (!(sdata->u.mgd.flags & IEEE80211_STA_MFP_ENABLED))
+			flags |= IEEE80211_TX_INTFL_DONT_ENCRYPT;
+		break;
+	default:
+		return -EOPNOTSUPP;
+	}
+
+	skb = dev_alloc_skb(local->hw.extra_tx_headroom + len);
+	if (!skb)
+		return -ENOMEM;
+	skb_reserve(skb, local->hw.extra_tx_headroom);
+
+	memcpy(skb_put(skb, len), buf, len);
+
+	IEEE80211_SKB_CB(skb)->flags = flags;
+
+	skb->dev = sdata->dev;
+	ieee80211_tx_skb(sdata, skb);
+
+	*cookie = (unsigned long) skb;
+	return 0;
 }
 
 struct cfg80211_ops mac80211_config_ops = {
diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index 4d3883e20fc1..5782a537f74a 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -986,11 +986,6 @@ int ieee80211_mgd_deauth(struct ieee80211_sub_if_data *sdata,
 int ieee80211_mgd_disassoc(struct ieee80211_sub_if_data *sdata,
 			   struct cfg80211_disassoc_request *req,
 			   void *cookie);
-int ieee80211_mgd_action(struct ieee80211_sub_if_data *sdata,
-			 struct ieee80211_channel *chan,
-			 enum nl80211_channel_type channel_type,
-			 bool channel_type_valid,
-			 const u8 *buf, size_t len, u64 *cookie);
 ieee80211_rx_result ieee80211_sta_rx_mgmt(struct ieee80211_sub_if_data *sdata,
 					  struct sk_buff *skb);
 void ieee80211_send_pspoll(struct ieee80211_local *local,
diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index 2ab4e86d9929..1373b3dde8b4 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -2353,44 +2353,6 @@ int ieee80211_mgd_disassoc(struct ieee80211_sub_if_data *sdata,
 	return 0;
 }
 
-int ieee80211_mgd_action(struct ieee80211_sub_if_data *sdata,
-			 struct ieee80211_channel *chan,
-			 enum nl80211_channel_type channel_type,
-			 bool channel_type_valid,
-			 const u8 *buf, size_t len, u64 *cookie)
-{
-	struct ieee80211_local *local = sdata->local;
-	struct ieee80211_if_managed *ifmgd = &sdata->u.mgd;
-	struct sk_buff *skb;
-
-	/* Check that we are on the requested channel for transmission */
-	if (chan != local->tmp_channel &&
-	    chan != local->oper_channel)
-		return -EBUSY;
-	if (channel_type_valid &&
-	    (channel_type != local->tmp_channel_type &&
-	     channel_type != local->_oper_channel_type))
-		return -EBUSY;
-
-	skb = dev_alloc_skb(local->hw.extra_tx_headroom + len);
-	if (!skb)
-		return -ENOMEM;
-	skb_reserve(skb, local->hw.extra_tx_headroom);
-
-	memcpy(skb_put(skb, len), buf, len);
-
-	if (!(ifmgd->flags & IEEE80211_STA_MFP_ENABLED))
-		IEEE80211_SKB_CB(skb)->flags |=
-			IEEE80211_TX_INTFL_DONT_ENCRYPT;
-	IEEE80211_SKB_CB(skb)->flags |= IEEE80211_TX_INTFL_NL80211_FRAME_TX |
-		IEEE80211_TX_CTL_REQ_TX_STATUS;
-	skb->dev = sdata->dev;
-	ieee80211_tx_skb(sdata, skb);
-
-	*cookie = (unsigned long) skb;
-	return 0;
-}
-
 void ieee80211_cqm_rssi_notify(struct ieee80211_vif *vif,
 			       enum nl80211_cqm_rssi_threshold_event rssi_event,
 			       gfp_t gfp)
diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c
index 6e7d6d48fe1e..3adcda502b7d 100644
--- a/net/mac80211/rx.c
+++ b/net/mac80211/rx.c
@@ -2045,8 +2045,7 @@ ieee80211_rx_h_action(struct ieee80211_rx_data *rx)
 	 */
 	status = IEEE80211_SKB_RXCB(rx->skb);
 
-	if (sdata->vif.type == NL80211_IFTYPE_STATION &&
-	    cfg80211_rx_action(rx->sdata->dev, status->freq,
+	if (cfg80211_rx_action(rx->sdata->dev, status->freq,
 			       rx->skb->data, rx->skb->len,
 			       GFP_ATOMIC))
 		goto handled;
diff --git a/net/wireless/mlme.c b/net/wireless/mlme.c
index f69ae19f497f..9f95354f859f 100644
--- a/net/wireless/mlme.c
+++ b/net/wireless/mlme.c
@@ -846,8 +846,9 @@ int cfg80211_mlme_action(struct cfg80211_registered_device *rdev,
 		if (!wdev->current_bss ||
 		    memcmp(wdev->current_bss->pub.bssid, mgmt->bssid,
 			   ETH_ALEN) != 0 ||
-		    memcmp(wdev->current_bss->pub.bssid, mgmt->da,
-			   ETH_ALEN) != 0)
+		    (wdev->iftype == NL80211_IFTYPE_STATION &&
+		     memcmp(wdev->current_bss->pub.bssid, mgmt->da,
+			    ETH_ALEN) != 0))
 			return -ENOTCONN;
 	}
 
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index 90ab3c8519be..c65e67e9231c 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -4653,7 +4653,8 @@ static int nl80211_register_action(struct sk_buff *skb, struct genl_info *info)
 	if (err)
 		goto unlock_rtnl;
 
-	if (dev->ieee80211_ptr->iftype != NL80211_IFTYPE_STATION) {
+	if (dev->ieee80211_ptr->iftype != NL80211_IFTYPE_STATION &&
+	    dev->ieee80211_ptr->iftype != NL80211_IFTYPE_ADHOC) {
 		err = -EOPNOTSUPP;
 		goto out;
 	}
@@ -4703,7 +4704,8 @@ static int nl80211_action(struct sk_buff *skb, struct genl_info *info)
 		goto out;
 	}
 
-	if (dev->ieee80211_ptr->iftype != NL80211_IFTYPE_STATION) {
+	if (dev->ieee80211_ptr->iftype != NL80211_IFTYPE_STATION &&
+	    dev->ieee80211_ptr->iftype != NL80211_IFTYPE_ADHOC) {
 		err = -EOPNOTSUPP;
 		goto out;
 	}
-- 
cgit v1.2.2


From 2a419056c15478d2df3f3e9d4fa64e34eb1faa7d Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Thu, 10 Jun 2010 10:21:29 +0200
Subject: mac80211: simplify station/aggregation code

A number of places use RCU locking for accessing
the station list, even though they do not need
to. Use mutex locking instead to prepare for the
locking changes I want to make. The mlme code is
also using a WLAN_STA_DISASSOC flag that has the
same meaning as WLAN_STA_BLOCK_BA, so use that.

While doing so, combine places where we loop
over stations twice, and optimise away some of
the loops by checking if the hardware supports
aggregation at all first.

Also fix a more theoretical race condition: right
now we could resume, set up an aggregation session,
and right after tear it down again due to the code
that is needed for hardware reconfiguration here.
Also mark add a comment to that code marking it as
a workaround.

Finally, remove a pointless aggregation disabling
loop when an interface is stopped, directly after
that we remove all stations from it which will also
disable all aggregation sessions that may still be
active, and does so in a race-free way unlike the
current loop that doesn't block new sessions.

Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/agg-tx.c   | 10 +---------
 net/mac80211/iface.c    | 13 -------------
 net/mac80211/mlme.c     |  6 +++---
 net/mac80211/pm.c       | 16 ++++------------
 net/mac80211/sta_info.h |  4 ----
 net/mac80211/util.c     | 31 ++++++++++++++++---------------
 6 files changed, 24 insertions(+), 56 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/agg-tx.c b/net/mac80211/agg-tx.c
index d1b6664a2532..9b9f21be0ff7 100644
--- a/net/mac80211/agg-tx.c
+++ b/net/mac80211/agg-tx.c
@@ -239,17 +239,9 @@ int ieee80211_start_tx_ba_session(struct ieee80211_sta *pubsta, u16 tid)
 	    sdata->vif.type != NL80211_IFTYPE_AP)
 		return -EINVAL;
 
-	if (test_sta_flags(sta, WLAN_STA_DISASSOC)) {
-#ifdef CONFIG_MAC80211_HT_DEBUG
-		printk(KERN_DEBUG "Disassociation is in progress. "
-		       "Denying BA session request\n");
-#endif
-		return -EINVAL;
-	}
-
 	if (test_sta_flags(sta, WLAN_STA_BLOCK_BA)) {
 #ifdef CONFIG_MAC80211_HT_DEBUG
-		printk(KERN_DEBUG "Suspend in progress. "
+		printk(KERN_DEBUG "BA sessions blocked. "
 		       "Denying BA session request\n");
 #endif
 		return -EINVAL;
diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c
index 1afa9ec81fe8..906fc2be0cfb 100644
--- a/net/mac80211/iface.c
+++ b/net/mac80211/iface.c
@@ -339,7 +339,6 @@ static int ieee80211_stop(struct net_device *dev)
 {
 	struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
 	struct ieee80211_local *local = sdata->local;
-	struct sta_info *sta;
 	unsigned long flags;
 	struct sk_buff *skb, *tmp;
 	u32 hw_reconf_flags = 0;
@@ -355,18 +354,6 @@ static int ieee80211_stop(struct net_device *dev)
 	 */
 	ieee80211_work_purge(sdata);
 
-	/*
-	 * Now delete all active aggregation sessions.
-	 */
-	rcu_read_lock();
-
-	list_for_each_entry_rcu(sta, &local->sta_list, list) {
-		if (sta->sdata == sdata)
-			ieee80211_sta_tear_down_BA_sessions(sta);
-	}
-
-	rcu_read_unlock();
-
 	/*
 	 * Remove all stations associated with this interface.
 	 *
diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index 1373b3dde8b4..0154d74905c9 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -898,13 +898,13 @@ static void ieee80211_set_disassoc(struct ieee80211_sub_if_data *sdata,
 	netif_tx_stop_all_queues(sdata->dev);
 	netif_carrier_off(sdata->dev);
 
-	rcu_read_lock();
+	mutex_lock(&local->sta_mtx);
 	sta = sta_info_get(sdata, bssid);
 	if (sta) {
-		set_sta_flags(sta, WLAN_STA_DISASSOC);
+		set_sta_flags(sta, WLAN_STA_BLOCK_BA);
 		ieee80211_sta_tear_down_BA_sessions(sta);
 	}
-	rcu_read_unlock();
+	mutex_unlock(&local->sta_mtx);
 
 	changed |= ieee80211_reset_erp_info(sdata);
 
diff --git a/net/mac80211/pm.c b/net/mac80211/pm.c
index 75202b295a4e..e145a949b820 100644
--- a/net/mac80211/pm.c
+++ b/net/mac80211/pm.c
@@ -40,22 +40,14 @@ int __ieee80211_suspend(struct ieee80211_hw *hw)
 	list_for_each_entry(sdata, &local->interfaces, list)
 		ieee80211_disable_keys(sdata);
 
-	/* Tear down aggregation sessions */
-
-	rcu_read_lock();
-
-	if (hw->flags & IEEE80211_HW_AMPDU_AGGREGATION) {
-		list_for_each_entry_rcu(sta, &local->sta_list, list) {
+	/* tear down aggregation sessions and remove STAs */
+	mutex_lock(&local->sta_mtx);
+	list_for_each_entry(sta, &local->sta_list, list) {
+		if (hw->flags & IEEE80211_HW_AMPDU_AGGREGATION) {
 			set_sta_flags(sta, WLAN_STA_BLOCK_BA);
 			ieee80211_sta_tear_down_BA_sessions(sta);
 		}
-	}
-
-	rcu_read_unlock();
 
-	/* remove STAs */
-	mutex_lock(&local->sta_mtx);
-	list_for_each_entry(sta, &local->sta_list, list) {
 		if (sta->uploaded) {
 			sdata = sta->sdata;
 			if (sdata->vif.type == NL80211_IFTYPE_AP_VLAN)
diff --git a/net/mac80211/sta_info.h b/net/mac80211/sta_info.h
index 813da34db733..786bbd3103b1 100644
--- a/net/mac80211/sta_info.h
+++ b/net/mac80211/sta_info.h
@@ -42,9 +42,6 @@
  *	be in the queues
  * @WLAN_STA_PSPOLL: Station sent PS-poll while driver was keeping
  *	station in power-save mode, reply when the driver unblocks.
- * @WLAN_STA_DISASSOC: Disassociation in progress.
- *	This is used to reject TX BA session requests when disassociation
- *	is in progress.
  */
 enum ieee80211_sta_info_flags {
 	WLAN_STA_AUTH		= 1<<0,
@@ -60,7 +57,6 @@ enum ieee80211_sta_info_flags {
 	WLAN_STA_BLOCK_BA	= 1<<11,
 	WLAN_STA_PS_DRIVER	= 1<<12,
 	WLAN_STA_PSPOLL		= 1<<13,
-	WLAN_STA_DISASSOC       = 1<<14,
 };
 
 #define STA_TID_NUM 16
diff --git a/net/mac80211/util.c b/net/mac80211/util.c
index 5b79d552780a..a54cf146ed50 100644
--- a/net/mac80211/util.c
+++ b/net/mac80211/util.c
@@ -1138,18 +1138,6 @@ int ieee80211_reconfig(struct ieee80211_local *local)
 	}
 	mutex_unlock(&local->sta_mtx);
 
-	/* Clear Suspend state so that ADDBA requests can be processed */
-
-	rcu_read_lock();
-
-	if (hw->flags & IEEE80211_HW_AMPDU_AGGREGATION) {
-		list_for_each_entry_rcu(sta, &local->sta_list, list) {
-			clear_sta_flags(sta, WLAN_STA_BLOCK_BA);
-		}
-	}
-
-	rcu_read_unlock();
-
 	/* setup RTS threshold */
 	drv_set_rts_threshold(local, hw->wiphy->rts_threshold);
 
@@ -1202,13 +1190,26 @@ int ieee80211_reconfig(struct ieee80211_local *local)
 		}
 	}
 
-	rcu_read_lock();
+	/*
+	 * Clear the WLAN_STA_BLOCK_BA flag so new aggregation
+	 * sessions can be established after a resume.
+	 *
+	 * Also tear down aggregation sessions since reconfiguring
+	 * them in a hardware restart scenario is not easily done
+	 * right now, and the hardware will have lost information
+	 * about the sessions, but we and the AP still think they
+	 * are active. This is really a workaround though.
+	 */
 	if (hw->flags & IEEE80211_HW_AMPDU_AGGREGATION) {
-		list_for_each_entry_rcu(sta, &local->sta_list, list) {
+		mutex_lock(&local->sta_mtx);
+
+		list_for_each_entry(sta, &local->sta_list, list) {
 			ieee80211_sta_tear_down_BA_sessions(sta);
+			clear_sta_flags(sta, WLAN_STA_BLOCK_BA);
 		}
+
+		mutex_unlock(&local->sta_mtx);
 	}
-	rcu_read_unlock();
 
 	/* add back keys */
 	list_for_each_entry(sdata, &local->interfaces, list)
-- 
cgit v1.2.2


From 35f20c14a103ca2c7062999e934a513b83d84de6 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Thu, 10 Jun 2010 10:21:30 +0200
Subject: mac80211: use common skb queue

IBSS, managed and mesh modes all have an
skb queue, and in the future we want to
also use it in other modes, so make them
all use a common skb queue already.

Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/ibss.c        |  7 +++----
 net/mac80211/ieee80211_i.h |  7 ++-----
 net/mac80211/iface.c       | 21 +++++++++++----------
 net/mac80211/mesh.c        | 14 ++------------
 net/mac80211/mlme.c        |  5 ++---
 5 files changed, 20 insertions(+), 34 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/ibss.c b/net/mac80211/ibss.c
index d7a96ced2c83..a9ff904620db 100644
--- a/net/mac80211/ibss.c
+++ b/net/mac80211/ibss.c
@@ -779,7 +779,7 @@ static void ieee80211_ibss_work(struct work_struct *work)
 		return;
 	ifibss = &sdata->u.ibss;
 
-	while ((skb = skb_dequeue(&ifibss->skb_queue)))
+	while ((skb = skb_dequeue(&sdata->skb_queue)))
 		ieee80211_ibss_rx_queued_mgmt(sdata, skb);
 
 	if (!test_and_clear_bit(IEEE80211_IBSS_REQ_RUN, &ifibss->request))
@@ -850,7 +850,6 @@ void ieee80211_ibss_setup_sdata(struct ieee80211_sub_if_data *sdata)
 	INIT_WORK(&ifibss->work, ieee80211_ibss_work);
 	setup_timer(&ifibss->timer, ieee80211_ibss_timer,
 		    (unsigned long) sdata);
-	skb_queue_head_init(&ifibss->skb_queue);
 }
 
 /* scan finished notification */
@@ -890,7 +889,7 @@ ieee80211_ibss_rx_mgmt(struct ieee80211_sub_if_data *sdata, struct sk_buff *skb)
 	case IEEE80211_STYPE_BEACON:
 	case IEEE80211_STYPE_PROBE_REQ:
 	case IEEE80211_STYPE_AUTH:
-		skb_queue_tail(&sdata->u.ibss.skb_queue, skb);
+		skb_queue_tail(&sdata->skb_queue, skb);
 		ieee80211_queue_work(&local->hw, &sdata->u.ibss.work);
 		return RX_QUEUED;
 	}
@@ -983,7 +982,7 @@ int ieee80211_ibss_leave(struct ieee80211_sub_if_data *sdata)
 	synchronize_rcu();
 	kfree_skb(skb);
 
-	skb_queue_purge(&sdata->u.ibss.skb_queue);
+	skb_queue_purge(&sdata->skb_queue);
 	memset(sdata->u.ibss.bssid, 0, ETH_ALEN);
 	sdata->u.ibss.ssid_len = 0;
 
diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index 5782a537f74a..3cc3867c5418 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -340,8 +340,6 @@ struct ieee80211_if_managed {
 
 	u16 aid;
 
-	struct sk_buff_head skb_queue;
-
 	unsigned long timers_running; /* used for quiesce/restart */
 	bool powersave; /* powersave requested for this iface */
 	enum ieee80211_smps_mode req_smps, /* requested smps mode */
@@ -388,8 +386,6 @@ struct ieee80211_if_ibss {
 	struct timer_list timer;
 	struct work_struct work;
 
-	struct sk_buff_head skb_queue;
-
 	unsigned long request;
 	unsigned long last_scan_completed;
 
@@ -420,7 +416,6 @@ struct ieee80211_if_mesh {
 	struct timer_list housekeeping_timer;
 	struct timer_list mesh_path_timer;
 	struct timer_list mesh_path_root_timer;
-	struct sk_buff_head skb_queue;
 
 	unsigned long timers_running;
 
@@ -517,6 +512,8 @@ struct ieee80211_sub_if_data {
 
 	u16 sequence_number;
 
+	struct sk_buff_head skb_queue;
+
 	/*
 	 * AP this belongs to: self in AP mode and
 	 * corresponding AP in VLAN mode, NULL for
diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c
index 906fc2be0cfb..56167a3d872d 100644
--- a/net/mac80211/iface.c
+++ b/net/mac80211/iface.c
@@ -465,21 +465,11 @@ static int ieee80211_stop(struct net_device *dev)
 		cancel_work_sync(&sdata->u.mgd.monitor_work);
 		cancel_work_sync(&sdata->u.mgd.beacon_connection_loss_work);
 
-		/*
-		 * When we get here, the interface is marked down.
-		 * Call synchronize_rcu() to wait for the RX path
-		 * should it be using the interface and enqueuing
-		 * frames at this very time on another CPU.
-		 */
-		synchronize_rcu();
-		skb_queue_purge(&sdata->u.mgd.skb_queue);
 		/* fall through */
 	case NL80211_IFTYPE_ADHOC:
 		if (sdata->vif.type == NL80211_IFTYPE_ADHOC) {
 			del_timer_sync(&sdata->u.ibss.timer);
 			cancel_work_sync(&sdata->u.ibss.work);
-			synchronize_rcu();
-			skb_queue_purge(&sdata->u.ibss.skb_queue);
 		}
 		/* fall through */
 	case NL80211_IFTYPE_MESH_POINT:
@@ -495,6 +485,15 @@ static int ieee80211_stop(struct net_device *dev)
 		}
 		/* fall through */
 	default:
+		/*
+		 * When we get here, the interface is marked down.
+		 * Call synchronize_rcu() to wait for the RX path
+		 * should it be using the interface and enqueuing
+		 * frames at this very time on another CPU.
+		 */
+		synchronize_rcu();
+		skb_queue_purge(&sdata->skb_queue);
+
 		if (local->scan_sdata == sdata)
 			ieee80211_scan_cancel(local);
 
@@ -721,6 +720,8 @@ static void ieee80211_setup_sdata(struct ieee80211_sub_if_data *sdata,
 	/* only monitor differs */
 	sdata->dev->type = ARPHRD_ETHER;
 
+	skb_queue_head_init(&sdata->skb_queue);
+
 	switch (type) {
 	case NL80211_IFTYPE_AP:
 		skb_queue_head_init(&sdata->u.ap.ps_bc_buf);
diff --git a/net/mac80211/mesh.c b/net/mac80211/mesh.c
index bde81031727a..065533a37abe 100644
--- a/net/mac80211/mesh.c
+++ b/net/mac80211/mesh.c
@@ -537,15 +537,6 @@ void ieee80211_stop_mesh(struct ieee80211_sub_if_data *sdata)
 	 * it no longer is.
 	 */
 	cancel_work_sync(&sdata->u.mesh.work);
-
-	/*
-	 * When we get here, the interface is marked down.
-	 * Call synchronize_rcu() to wait for the RX path
-	 * should it be using the interface and enqueuing
-	 * frames at this very time on another CPU.
-	 */
-	rcu_barrier(); /* Wait for RX path and call_rcu()'s */
-	skb_queue_purge(&sdata->u.mesh.skb_queue);
 }
 
 static void ieee80211_mesh_rx_bcn_presp(struct ieee80211_sub_if_data *sdata,
@@ -650,7 +641,7 @@ static void ieee80211_mesh_work(struct work_struct *work)
 	if (local->scanning)
 		return;
 
-	while ((skb = skb_dequeue(&ifmsh->skb_queue)))
+	while ((skb = skb_dequeue(&sdata->skb_queue)))
 		ieee80211_mesh_rx_queued_mgmt(sdata, skb);
 
 	if (ifmsh->preq_queue_len &&
@@ -690,7 +681,6 @@ void ieee80211_mesh_init_sdata(struct ieee80211_sub_if_data *sdata)
 	setup_timer(&ifmsh->housekeeping_timer,
 		    ieee80211_mesh_housekeeping_timer,
 		    (unsigned long) sdata);
-	skb_queue_head_init(&sdata->u.mesh.skb_queue);
 
 	ifmsh->mshcfg.dot11MeshRetryTimeout = MESH_RET_T;
 	ifmsh->mshcfg.dot11MeshConfirmTimeout = MESH_CONF_T;
@@ -750,7 +740,7 @@ ieee80211_mesh_rx_mgmt(struct ieee80211_sub_if_data *sdata, struct sk_buff *skb)
 	case IEEE80211_STYPE_ACTION:
 	case IEEE80211_STYPE_PROBE_RESP:
 	case IEEE80211_STYPE_BEACON:
-		skb_queue_tail(&ifmsh->skb_queue, skb);
+		skb_queue_tail(&sdata->skb_queue, skb);
 		ieee80211_queue_work(&local->hw, &ifmsh->work);
 		return RX_QUEUED;
 	}
diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index 0154d74905c9..854ef4ed2cfa 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -1652,7 +1652,7 @@ ieee80211_rx_result ieee80211_sta_rx_mgmt(struct ieee80211_sub_if_data *sdata,
 	case IEEE80211_STYPE_DEAUTH:
 	case IEEE80211_STYPE_DISASSOC:
 	case IEEE80211_STYPE_ACTION:
-		skb_queue_tail(&sdata->u.mgd.skb_queue, skb);
+		skb_queue_tail(&sdata->skb_queue, skb);
 		ieee80211_queue_work(&local->hw, &sdata->u.mgd.work);
 		return RX_QUEUED;
 	}
@@ -1810,7 +1810,7 @@ static void ieee80211_sta_work(struct work_struct *work)
 	ifmgd = &sdata->u.mgd;
 
 	/* first process frames to avoid timing out while a frame is pending */
-	while ((skb = skb_dequeue(&ifmgd->skb_queue)))
+	while ((skb = skb_dequeue(&sdata->skb_queue)))
 		ieee80211_sta_rx_queued_mgmt(sdata, skb);
 
 	/* then process the rest of the work */
@@ -1967,7 +1967,6 @@ void ieee80211_sta_setup_sdata(struct ieee80211_sub_if_data *sdata)
 		    (unsigned long) sdata);
 	setup_timer(&ifmgd->chswitch_timer, ieee80211_chswitch_timer,
 		    (unsigned long) sdata);
-	skb_queue_head_init(&ifmgd->skb_queue);
 
 	ifmgd->flags = 0;
 
-- 
cgit v1.2.2


From 64592c8fc0e99d445fc3fdedddeb6088e20086f1 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Thu, 10 Jun 2010 10:21:31 +0200
Subject: mac80211: use common work struct

IBSS, managed and mesh modes all have their
own work struct, and in the future we want
to also use it in other modes to process
frames from the now common skb queue.

This also makes the skb queue and work safe
to use from other interface types.

Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/ibss.c         | 13 ++++++-------
 net/mac80211/ieee80211_i.h  |  4 +---
 net/mac80211/iface.c        |  6 ++----
 net/mac80211/mesh.c         | 22 +++++++++-------------
 net/mac80211/mesh_hwmp.c    |  4 ++--
 net/mac80211/mesh_pathtbl.c |  4 ++--
 net/mac80211/mlme.c         | 12 +++++-------
 net/mac80211/pm.c           |  2 ++
 8 files changed, 29 insertions(+), 38 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/ibss.c b/net/mac80211/ibss.c
index a9ff904620db..18c4266cca1f 100644
--- a/net/mac80211/ibss.c
+++ b/net/mac80211/ibss.c
@@ -761,7 +761,7 @@ static void ieee80211_ibss_rx_queued_mgmt(struct ieee80211_sub_if_data *sdata,
 static void ieee80211_ibss_work(struct work_struct *work)
 {
 	struct ieee80211_sub_if_data *sdata =
-		container_of(work, struct ieee80211_sub_if_data, u.ibss.work);
+		container_of(work, struct ieee80211_sub_if_data, work);
 	struct ieee80211_local *local = sdata->local;
 	struct ieee80211_if_ibss *ifibss;
 	struct sk_buff *skb;
@@ -804,7 +804,7 @@ static void ieee80211_queue_ibss_work(struct ieee80211_sub_if_data *sdata)
 	struct ieee80211_local *local = sdata->local;
 
 	set_bit(IEEE80211_IBSS_REQ_RUN, &ifibss->request);
-	ieee80211_queue_work(&local->hw, &ifibss->work);
+	ieee80211_queue_work(&local->hw, &sdata->work);
 }
 
 static void ieee80211_ibss_timer(unsigned long data)
@@ -827,7 +827,6 @@ void ieee80211_ibss_quiesce(struct ieee80211_sub_if_data *sdata)
 {
 	struct ieee80211_if_ibss *ifibss = &sdata->u.ibss;
 
-	cancel_work_sync(&ifibss->work);
 	if (del_timer_sync(&ifibss->timer))
 		ifibss->timer_running = true;
 }
@@ -847,7 +846,7 @@ void ieee80211_ibss_setup_sdata(struct ieee80211_sub_if_data *sdata)
 {
 	struct ieee80211_if_ibss *ifibss = &sdata->u.ibss;
 
-	INIT_WORK(&ifibss->work, ieee80211_ibss_work);
+	INIT_WORK(&sdata->work, ieee80211_ibss_work);
 	setup_timer(&ifibss->timer, ieee80211_ibss_timer,
 		    (unsigned long) sdata);
 }
@@ -890,7 +889,7 @@ ieee80211_ibss_rx_mgmt(struct ieee80211_sub_if_data *sdata, struct sk_buff *skb)
 	case IEEE80211_STYPE_PROBE_REQ:
 	case IEEE80211_STYPE_AUTH:
 		skb_queue_tail(&sdata->skb_queue, skb);
-		ieee80211_queue_work(&local->hw, &sdata->u.ibss.work);
+		ieee80211_queue_work(&local->hw, &sdata->work);
 		return RX_QUEUED;
 	}
 
@@ -956,7 +955,7 @@ int ieee80211_ibss_join(struct ieee80211_sub_if_data *sdata,
 	ieee80211_recalc_idle(sdata->local);
 
 	set_bit(IEEE80211_IBSS_REQ_RUN, &sdata->u.ibss.request);
-	ieee80211_queue_work(&sdata->local->hw, &sdata->u.ibss.work);
+	ieee80211_queue_work(&sdata->local->hw, &sdata->work);
 
 	return 0;
 }
@@ -967,7 +966,7 @@ int ieee80211_ibss_leave(struct ieee80211_sub_if_data *sdata)
 
 	del_timer_sync(&sdata->u.ibss.timer);
 	clear_bit(IEEE80211_IBSS_REQ_RUN, &sdata->u.ibss.request);
-	cancel_work_sync(&sdata->u.ibss.work);
+	cancel_work_sync(&sdata->work);
 	clear_bit(IEEE80211_IBSS_REQ_RUN, &sdata->u.ibss.request);
 
 	sta_info_flush(sdata->local, sdata);
diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index 3cc3867c5418..2873f6374d1a 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -325,7 +325,6 @@ struct ieee80211_if_managed {
 	struct timer_list conn_mon_timer;
 	struct timer_list bcn_mon_timer;
 	struct timer_list chswitch_timer;
-	struct work_struct work;
 	struct work_struct monitor_work;
 	struct work_struct chswitch_work;
 	struct work_struct beacon_connection_loss_work;
@@ -384,7 +383,6 @@ enum ieee80211_ibss_request {
 
 struct ieee80211_if_ibss {
 	struct timer_list timer;
-	struct work_struct work;
 
 	unsigned long request;
 	unsigned long last_scan_completed;
@@ -412,7 +410,6 @@ struct ieee80211_if_ibss {
 };
 
 struct ieee80211_if_mesh {
-	struct work_struct work;
 	struct timer_list housekeeping_timer;
 	struct timer_list mesh_path_timer;
 	struct timer_list mesh_path_root_timer;
@@ -512,6 +509,7 @@ struct ieee80211_sub_if_data {
 
 	u16 sequence_number;
 
+	struct work_struct work;
 	struct sk_buff_head skb_queue;
 
 	/*
diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c
index 56167a3d872d..de7ddc303a5f 100644
--- a/net/mac80211/iface.c
+++ b/net/mac80211/iface.c
@@ -460,17 +460,14 @@ static int ieee80211_stop(struct net_device *dev)
 		 * whether the interface is running, which, at this point,
 		 * it no longer is.
 		 */
-		cancel_work_sync(&sdata->u.mgd.work);
 		cancel_work_sync(&sdata->u.mgd.chswitch_work);
 		cancel_work_sync(&sdata->u.mgd.monitor_work);
 		cancel_work_sync(&sdata->u.mgd.beacon_connection_loss_work);
 
 		/* fall through */
 	case NL80211_IFTYPE_ADHOC:
-		if (sdata->vif.type == NL80211_IFTYPE_ADHOC) {
+		if (sdata->vif.type == NL80211_IFTYPE_ADHOC)
 			del_timer_sync(&sdata->u.ibss.timer);
-			cancel_work_sync(&sdata->u.ibss.work);
-		}
 		/* fall through */
 	case NL80211_IFTYPE_MESH_POINT:
 		if (ieee80211_vif_is_mesh(&sdata->vif)) {
@@ -485,6 +482,7 @@ static int ieee80211_stop(struct net_device *dev)
 		}
 		/* fall through */
 	default:
+		cancel_work_sync(&sdata->work);
 		/*
 		 * When we get here, the interface is marked down.
 		 * Call synchronize_rcu() to wait for the RX path
diff --git a/net/mac80211/mesh.c b/net/mac80211/mesh.c
index 065533a37abe..be9aa980e941 100644
--- a/net/mac80211/mesh.c
+++ b/net/mac80211/mesh.c
@@ -54,7 +54,7 @@ static void ieee80211_mesh_housekeeping_timer(unsigned long data)
 		return;
 	}
 
-	ieee80211_queue_work(&local->hw, &ifmsh->work);
+	ieee80211_queue_work(&local->hw, &sdata->work);
 }
 
 /**
@@ -345,7 +345,7 @@ static void ieee80211_mesh_path_timer(unsigned long data)
 		return;
 	}
 
-	ieee80211_queue_work(&local->hw, &ifmsh->work);
+	ieee80211_queue_work(&local->hw, &sdata->work);
 }
 
 static void ieee80211_mesh_path_root_timer(unsigned long data)
@@ -362,7 +362,7 @@ static void ieee80211_mesh_path_root_timer(unsigned long data)
 		return;
 	}
 
-	ieee80211_queue_work(&local->hw, &ifmsh->work);
+	ieee80211_queue_work(&local->hw, &sdata->work);
 }
 
 void ieee80211_mesh_root_setup(struct ieee80211_if_mesh *ifmsh)
@@ -484,9 +484,6 @@ void ieee80211_mesh_quiesce(struct ieee80211_sub_if_data *sdata)
 {
 	struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh;
 
-	/* might restart the timer but that doesn't matter */
-	cancel_work_sync(&ifmsh->work);
-
 	/* use atomic bitops in case both timers fire at the same time */
 
 	if (del_timer_sync(&ifmsh->housekeeping_timer))
@@ -518,7 +515,7 @@ void ieee80211_start_mesh(struct ieee80211_sub_if_data *sdata)
 
 	set_bit(MESH_WORK_HOUSEKEEPING, &ifmsh->wrkq_flags);
 	ieee80211_mesh_root_setup(ifmsh);
-	ieee80211_queue_work(&local->hw, &ifmsh->work);
+	ieee80211_queue_work(&local->hw, &sdata->work);
 	sdata->vif.bss_conf.beacon_int = MESH_DEFAULT_BEACON_INTERVAL;
 	ieee80211_bss_info_change_notify(sdata, BSS_CHANGED_BEACON |
 						BSS_CHANGED_BEACON_ENABLED |
@@ -536,7 +533,7 @@ void ieee80211_stop_mesh(struct ieee80211_sub_if_data *sdata)
 	 * whether the interface is running, which, at this point,
 	 * it no longer is.
 	 */
-	cancel_work_sync(&sdata->u.mesh.work);
+	cancel_work_sync(&sdata->work);
 }
 
 static void ieee80211_mesh_rx_bcn_presp(struct ieee80211_sub_if_data *sdata,
@@ -630,7 +627,7 @@ static void ieee80211_mesh_rx_queued_mgmt(struct ieee80211_sub_if_data *sdata,
 static void ieee80211_mesh_work(struct work_struct *work)
 {
 	struct ieee80211_sub_if_data *sdata =
-		container_of(work, struct ieee80211_sub_if_data, u.mesh.work);
+		container_of(work, struct ieee80211_sub_if_data, work);
 	struct ieee80211_local *local = sdata->local;
 	struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh;
 	struct sk_buff *skb;
@@ -669,7 +666,7 @@ void ieee80211_mesh_notify_scan_completed(struct ieee80211_local *local)
 	rcu_read_lock();
 	list_for_each_entry_rcu(sdata, &local->interfaces, list)
 		if (ieee80211_vif_is_mesh(&sdata->vif))
-			ieee80211_queue_work(&local->hw, &sdata->u.mesh.work);
+			ieee80211_queue_work(&local->hw, &sdata->work);
 	rcu_read_unlock();
 }
 
@@ -677,7 +674,7 @@ void ieee80211_mesh_init_sdata(struct ieee80211_sub_if_data *sdata)
 {
 	struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh;
 
-	INIT_WORK(&ifmsh->work, ieee80211_mesh_work);
+	INIT_WORK(&sdata->work, ieee80211_mesh_work);
 	setup_timer(&ifmsh->housekeeping_timer,
 		    ieee80211_mesh_housekeeping_timer,
 		    (unsigned long) sdata);
@@ -726,7 +723,6 @@ ieee80211_rx_result
 ieee80211_mesh_rx_mgmt(struct ieee80211_sub_if_data *sdata, struct sk_buff *skb)
 {
 	struct ieee80211_local *local = sdata->local;
-	struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh;
 	struct ieee80211_mgmt *mgmt;
 	u16 fc;
 
@@ -741,7 +737,7 @@ ieee80211_mesh_rx_mgmt(struct ieee80211_sub_if_data *sdata, struct sk_buff *skb)
 	case IEEE80211_STYPE_PROBE_RESP:
 	case IEEE80211_STYPE_BEACON:
 		skb_queue_tail(&sdata->skb_queue, skb);
-		ieee80211_queue_work(&local->hw, &ifmsh->work);
+		ieee80211_queue_work(&local->hw, &sdata->work);
 		return RX_QUEUED;
 	}
 
diff --git a/net/mac80211/mesh_hwmp.c b/net/mac80211/mesh_hwmp.c
index 0705018d8d1e..829e08a657d0 100644
--- a/net/mac80211/mesh_hwmp.c
+++ b/net/mac80211/mesh_hwmp.c
@@ -805,14 +805,14 @@ static void mesh_queue_preq(struct mesh_path *mpath, u8 flags)
 	spin_unlock(&ifmsh->mesh_preq_queue_lock);
 
 	if (time_after(jiffies, ifmsh->last_preq + min_preq_int_jiff(sdata)))
-		ieee80211_queue_work(&sdata->local->hw, &ifmsh->work);
+		ieee80211_queue_work(&sdata->local->hw, &sdata->work);
 
 	else if (time_before(jiffies, ifmsh->last_preq)) {
 		/* avoid long wait if did not send preqs for a long time
 		 * and jiffies wrapped around
 		 */
 		ifmsh->last_preq = jiffies - min_preq_int_jiff(sdata) - 1;
-		ieee80211_queue_work(&sdata->local->hw, &ifmsh->work);
+		ieee80211_queue_work(&sdata->local->hw, &sdata->work);
 	} else
 		mod_timer(&ifmsh->mesh_path_timer, ifmsh->last_preq +
 						min_preq_int_jiff(sdata));
diff --git a/net/mac80211/mesh_pathtbl.c b/net/mac80211/mesh_pathtbl.c
index 181ffd6efd81..349e466cf08b 100644
--- a/net/mac80211/mesh_pathtbl.c
+++ b/net/mac80211/mesh_pathtbl.c
@@ -315,7 +315,7 @@ int mesh_path_add(u8 *dst, struct ieee80211_sub_if_data *sdata)
 	read_unlock(&pathtbl_resize_lock);
 	if (grow) {
 		set_bit(MESH_WORK_GROW_MPATH_TABLE,  &ifmsh->wrkq_flags);
-		ieee80211_queue_work(&local->hw, &ifmsh->work);
+		ieee80211_queue_work(&local->hw, &sdata->work);
 	}
 	return 0;
 
@@ -425,7 +425,7 @@ int mpp_path_add(u8 *dst, u8 *mpp, struct ieee80211_sub_if_data *sdata)
 	read_unlock(&pathtbl_resize_lock);
 	if (grow) {
 		set_bit(MESH_WORK_GROW_MPP_TABLE,  &ifmsh->wrkq_flags);
-		ieee80211_queue_work(&local->hw, &ifmsh->work);
+		ieee80211_queue_work(&local->hw, &sdata->work);
 	}
 	return 0;
 
diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index 854ef4ed2cfa..75b896d47ea1 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -1653,7 +1653,7 @@ ieee80211_rx_result ieee80211_sta_rx_mgmt(struct ieee80211_sub_if_data *sdata,
 	case IEEE80211_STYPE_DISASSOC:
 	case IEEE80211_STYPE_ACTION:
 		skb_queue_tail(&sdata->skb_queue, skb);
-		ieee80211_queue_work(&local->hw, &sdata->u.mgd.work);
+		ieee80211_queue_work(&local->hw, &sdata->work);
 		return RX_QUEUED;
 	}
 
@@ -1779,13 +1779,13 @@ static void ieee80211_sta_timer(unsigned long data)
 		return;
 	}
 
-	ieee80211_queue_work(&local->hw, &ifmgd->work);
+	ieee80211_queue_work(&local->hw, &sdata->work);
 }
 
 static void ieee80211_sta_work(struct work_struct *work)
 {
 	struct ieee80211_sub_if_data *sdata =
-		container_of(work, struct ieee80211_sub_if_data, u.mgd.work);
+		container_of(work, struct ieee80211_sub_if_data, work);
 	struct ieee80211_local *local = sdata->local;
 	struct ieee80211_if_managed *ifmgd;
 	struct sk_buff *skb;
@@ -1906,8 +1906,7 @@ static void ieee80211_restart_sta_timer(struct ieee80211_sub_if_data *sdata)
 		ieee80211_queue_work(&sdata->local->hw,
 			   &sdata->u.mgd.monitor_work);
 		/* and do all the other regular work too */
-		ieee80211_queue_work(&sdata->local->hw,
-			   &sdata->u.mgd.work);
+		ieee80211_queue_work(&sdata->local->hw, &sdata->work);
 	}
 }
 
@@ -1922,7 +1921,6 @@ void ieee80211_sta_quiesce(struct ieee80211_sub_if_data *sdata)
 	 * time -- the code here is properly synchronised.
 	 */
 
-	cancel_work_sync(&ifmgd->work);
 	cancel_work_sync(&ifmgd->beacon_connection_loss_work);
 	if (del_timer_sync(&ifmgd->timer))
 		set_bit(TMR_RUNNING_TIMER, &ifmgd->timers_running);
@@ -1954,7 +1952,7 @@ void ieee80211_sta_setup_sdata(struct ieee80211_sub_if_data *sdata)
 	struct ieee80211_if_managed *ifmgd;
 
 	ifmgd = &sdata->u.mgd;
-	INIT_WORK(&ifmgd->work, ieee80211_sta_work);
+	INIT_WORK(&sdata->work, ieee80211_sta_work);
 	INIT_WORK(&ifmgd->monitor_work, ieee80211_sta_monitor_work);
 	INIT_WORK(&ifmgd->chswitch_work, ieee80211_chswitch_work);
 	INIT_WORK(&ifmgd->beacon_connection_loss_work,
diff --git a/net/mac80211/pm.c b/net/mac80211/pm.c
index e145a949b820..d287fde0431d 100644
--- a/net/mac80211/pm.c
+++ b/net/mac80211/pm.c
@@ -64,6 +64,8 @@ int __ieee80211_suspend(struct ieee80211_hw *hw)
 
 	/* remove all interfaces */
 	list_for_each_entry(sdata, &local->interfaces, list) {
+		cancel_work_sync(&sdata->work);
+
 		switch(sdata->vif.type) {
 		case NL80211_IFTYPE_STATION:
 			ieee80211_sta_quiesce(sdata);
-- 
cgit v1.2.2


From 1fa57d017366fb26b58af110a38b36a4f0214a62 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Thu, 10 Jun 2010 10:21:32 +0200
Subject: mac80211: use common work function

Even with the previous patch, IBSS, managed
and mesh modes all attach their own work
function to the shared work struct, which
means some duplicated code. Change that to
only have a frame processing function and a
further work function for each of them and
share some common code.

Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/ibss.c        | 29 +++-------------------
 net/mac80211/ieee80211_i.h | 11 ++++++++
 net/mac80211/iface.c       | 62 ++++++++++++++++++++++++++++++++++++++++++++++
 net/mac80211/mesh.c        | 20 +++------------
 net/mac80211/mlme.c        | 35 +++-----------------------
 5 files changed, 84 insertions(+), 73 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/ibss.c b/net/mac80211/ibss.c
index 18c4266cca1f..db5a4796ff3c 100644
--- a/net/mac80211/ibss.c
+++ b/net/mac80211/ibss.c
@@ -727,8 +727,8 @@ static void ieee80211_rx_mgmt_beacon(struct ieee80211_sub_if_data *sdata,
 	ieee80211_rx_bss_info(sdata, mgmt, len, rx_status, &elems, true);
 }
 
-static void ieee80211_ibss_rx_queued_mgmt(struct ieee80211_sub_if_data *sdata,
-					  struct sk_buff *skb)
+void ieee80211_ibss_rx_queued_mgmt(struct ieee80211_sub_if_data *sdata,
+				   struct sk_buff *skb)
 {
 	struct ieee80211_rx_status *rx_status;
 	struct ieee80211_mgmt *mgmt;
@@ -758,29 +758,9 @@ static void ieee80211_ibss_rx_queued_mgmt(struct ieee80211_sub_if_data *sdata,
 	kfree_skb(skb);
 }
 
-static void ieee80211_ibss_work(struct work_struct *work)
+void ieee80211_ibss_work(struct ieee80211_sub_if_data *sdata)
 {
-	struct ieee80211_sub_if_data *sdata =
-		container_of(work, struct ieee80211_sub_if_data, work);
-	struct ieee80211_local *local = sdata->local;
-	struct ieee80211_if_ibss *ifibss;
-	struct sk_buff *skb;
-
-	if (WARN_ON(local->suspended))
-		return;
-
-	if (!ieee80211_sdata_running(sdata))
-		return;
-
-	if (local->scanning)
-		return;
-
-	if (WARN_ON(sdata->vif.type != NL80211_IFTYPE_ADHOC))
-		return;
-	ifibss = &sdata->u.ibss;
-
-	while ((skb = skb_dequeue(&sdata->skb_queue)))
-		ieee80211_ibss_rx_queued_mgmt(sdata, skb);
+	struct ieee80211_if_ibss *ifibss = &sdata->u.ibss;
 
 	if (!test_and_clear_bit(IEEE80211_IBSS_REQ_RUN, &ifibss->request))
 		return;
@@ -846,7 +826,6 @@ void ieee80211_ibss_setup_sdata(struct ieee80211_sub_if_data *sdata)
 {
 	struct ieee80211_if_ibss *ifibss = &sdata->u.ibss;
 
-	INIT_WORK(&sdata->work, ieee80211_ibss_work);
 	setup_timer(&ifibss->timer, ieee80211_ibss_timer,
 		    (unsigned long) sdata);
 }
diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index 2873f6374d1a..f8c8f101592b 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -995,6 +995,9 @@ void ieee80211_sta_process_chanswitch(struct ieee80211_sub_if_data *sdata,
 				      u64 timestamp);
 void ieee80211_sta_quiesce(struct ieee80211_sub_if_data *sdata);
 void ieee80211_sta_restart(struct ieee80211_sub_if_data *sdata);
+void ieee80211_sta_work(struct ieee80211_sub_if_data *sdata);
+void ieee80211_sta_rx_queued_mgmt(struct ieee80211_sub_if_data *sdata,
+				  struct sk_buff *skb);
 
 /* IBSS code */
 void ieee80211_ibss_notify_scan_completed(struct ieee80211_local *local);
@@ -1009,6 +1012,14 @@ int ieee80211_ibss_join(struct ieee80211_sub_if_data *sdata,
 int ieee80211_ibss_leave(struct ieee80211_sub_if_data *sdata);
 void ieee80211_ibss_quiesce(struct ieee80211_sub_if_data *sdata);
 void ieee80211_ibss_restart(struct ieee80211_sub_if_data *sdata);
+void ieee80211_ibss_work(struct ieee80211_sub_if_data *sdata);
+void ieee80211_ibss_rx_queued_mgmt(struct ieee80211_sub_if_data *sdata,
+				   struct sk_buff *skb);
+
+/* mesh code */
+void ieee80211_mesh_work(struct ieee80211_sub_if_data *sdata);
+void ieee80211_mesh_rx_queued_mgmt(struct ieee80211_sub_if_data *sdata,
+				   struct sk_buff *skb);
 
 /* scan/BSS handling */
 void ieee80211_scan_work(struct work_struct *work);
diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c
index de7ddc303a5f..14212ad41e5a 100644
--- a/net/mac80211/iface.c
+++ b/net/mac80211/iface.c
@@ -701,6 +701,67 @@ static void ieee80211_if_setup(struct net_device *dev)
 	dev->destructor = free_netdev;
 }
 
+static void ieee80211_iface_work(struct work_struct *work)
+{
+	struct ieee80211_sub_if_data *sdata =
+		container_of(work, struct ieee80211_sub_if_data, work);
+	struct ieee80211_local *local = sdata->local;
+	struct sk_buff *skb;
+
+	if (!ieee80211_sdata_running(sdata))
+		return;
+
+	if (local->scanning)
+		return;
+
+	/*
+	 * ieee80211_queue_work() should have picked up most cases,
+	 * here we'll pick the rest.
+	 */
+	if (WARN(local->suspended,
+		 "interface work scheduled while going to suspend\n"))
+		return;
+
+	/* first process frames */
+	while ((skb = skb_dequeue(&sdata->skb_queue))) {
+		switch (sdata->vif.type) {
+		case NL80211_IFTYPE_STATION:
+			ieee80211_sta_rx_queued_mgmt(sdata, skb);
+			break;
+		case NL80211_IFTYPE_ADHOC:
+			ieee80211_ibss_rx_queued_mgmt(sdata, skb);
+			break;
+		case NL80211_IFTYPE_MESH_POINT:
+			if (!ieee80211_vif_is_mesh(&sdata->vif))
+				break;
+			ieee80211_mesh_rx_queued_mgmt(sdata, skb);
+			break;
+		default:
+			WARN(1, "frame for unexpected interface type");
+			kfree_skb(skb);
+			break;
+		}
+	}
+
+	/* then other type-dependent work */
+	switch (sdata->vif.type) {
+	case NL80211_IFTYPE_STATION:
+		ieee80211_sta_work(sdata);
+		break;
+	case NL80211_IFTYPE_ADHOC:
+		ieee80211_ibss_work(sdata);
+		break;
+	case NL80211_IFTYPE_MESH_POINT:
+		if (!ieee80211_vif_is_mesh(&sdata->vif))
+			break;
+		ieee80211_mesh_work(sdata);
+		break;
+	default:
+		break;
+	}
+}
+
+
 /*
  * Helper function to initialise an interface to a specific type.
  */
@@ -719,6 +780,7 @@ static void ieee80211_setup_sdata(struct ieee80211_sub_if_data *sdata,
 	sdata->dev->type = ARPHRD_ETHER;
 
 	skb_queue_head_init(&sdata->skb_queue);
+	INIT_WORK(&sdata->work, ieee80211_iface_work);
 
 	switch (type) {
 	case NL80211_IFTYPE_AP:
diff --git a/net/mac80211/mesh.c b/net/mac80211/mesh.c
index be9aa980e941..0f1f593c8477 100644
--- a/net/mac80211/mesh.c
+++ b/net/mac80211/mesh.c
@@ -596,8 +596,8 @@ static void ieee80211_mesh_rx_mgmt_action(struct ieee80211_sub_if_data *sdata,
 	}
 }
 
-static void ieee80211_mesh_rx_queued_mgmt(struct ieee80211_sub_if_data *sdata,
-					  struct sk_buff *skb)
+void ieee80211_mesh_rx_queued_mgmt(struct ieee80211_sub_if_data *sdata,
+				   struct sk_buff *skb)
 {
 	struct ieee80211_rx_status *rx_status;
 	struct ieee80211_if_mesh *ifmsh;
@@ -624,22 +624,9 @@ static void ieee80211_mesh_rx_queued_mgmt(struct ieee80211_sub_if_data *sdata,
 	kfree_skb(skb);
 }
 
-static void ieee80211_mesh_work(struct work_struct *work)
+void ieee80211_mesh_work(struct ieee80211_sub_if_data *sdata)
 {
-	struct ieee80211_sub_if_data *sdata =
-		container_of(work, struct ieee80211_sub_if_data, work);
-	struct ieee80211_local *local = sdata->local;
 	struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh;
-	struct sk_buff *skb;
-
-	if (!ieee80211_sdata_running(sdata))
-		return;
-
-	if (local->scanning)
-		return;
-
-	while ((skb = skb_dequeue(&sdata->skb_queue)))
-		ieee80211_mesh_rx_queued_mgmt(sdata, skb);
 
 	if (ifmsh->preq_queue_len &&
 	    time_after(jiffies,
@@ -674,7 +661,6 @@ void ieee80211_mesh_init_sdata(struct ieee80211_sub_if_data *sdata)
 {
 	struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh;
 
-	INIT_WORK(&sdata->work, ieee80211_mesh_work);
 	setup_timer(&ifmsh->housekeeping_timer,
 		    ieee80211_mesh_housekeeping_timer,
 		    (unsigned long) sdata);
diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index 75b896d47ea1..2f828ffd5698 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -1660,8 +1660,8 @@ ieee80211_rx_result ieee80211_sta_rx_mgmt(struct ieee80211_sub_if_data *sdata,
 	return RX_DROP_MONITOR;
 }
 
-static void ieee80211_sta_rx_queued_mgmt(struct ieee80211_sub_if_data *sdata,
-					 struct sk_buff *skb)
+void ieee80211_sta_rx_queued_mgmt(struct ieee80211_sub_if_data *sdata,
+				  struct sk_buff *skb)
 {
 	struct ieee80211_if_managed *ifmgd = &sdata->u.mgd;
 	struct ieee80211_rx_status *rx_status;
@@ -1782,36 +1782,10 @@ static void ieee80211_sta_timer(unsigned long data)
 	ieee80211_queue_work(&local->hw, &sdata->work);
 }
 
-static void ieee80211_sta_work(struct work_struct *work)
+void ieee80211_sta_work(struct ieee80211_sub_if_data *sdata)
 {
-	struct ieee80211_sub_if_data *sdata =
-		container_of(work, struct ieee80211_sub_if_data, work);
 	struct ieee80211_local *local = sdata->local;
-	struct ieee80211_if_managed *ifmgd;
-	struct sk_buff *skb;
-
-	if (!ieee80211_sdata_running(sdata))
-		return;
-
-	if (local->scanning)
-		return;
-
-	if (WARN_ON(sdata->vif.type != NL80211_IFTYPE_STATION))
-		return;
-
-	/*
-	 * ieee80211_queue_work() should have picked up most cases,
-	 * here we'll pick the rest.
-	 */
-	if (WARN(local->suspended, "STA MLME work scheduled while "
-		 "going to suspend\n"))
-		return;
-
-	ifmgd = &sdata->u.mgd;
-
-	/* first process frames to avoid timing out while a frame is pending */
-	while ((skb = skb_dequeue(&sdata->skb_queue)))
-		ieee80211_sta_rx_queued_mgmt(sdata, skb);
+	struct ieee80211_if_managed *ifmgd = &sdata->u.mgd;
 
 	/* then process the rest of the work */
 	mutex_lock(&ifmgd->mtx);
@@ -1952,7 +1926,6 @@ void ieee80211_sta_setup_sdata(struct ieee80211_sub_if_data *sdata)
 	struct ieee80211_if_managed *ifmgd;
 
 	ifmgd = &sdata->u.mgd;
-	INIT_WORK(&sdata->work, ieee80211_sta_work);
 	INIT_WORK(&ifmgd->monitor_work, ieee80211_sta_monitor_work);
 	INIT_WORK(&ifmgd->chswitch_work, ieee80211_chswitch_work);
 	INIT_WORK(&ifmgd->beacon_connection_loss_work,
-- 
cgit v1.2.2


From 36b3a628a4e85d002ee8813ebd2a5caef6d3c1a7 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Thu, 10 Jun 2010 10:21:33 +0200
Subject: mac80211: common work skb freeing

All the management processing functions free the
skb after they are done, so this can be done in
the new common code instead.

Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/ibss.c  | 2 --
 net/mac80211/iface.c | 3 ++-
 net/mac80211/mesh.c  | 2 --
 net/mac80211/mlme.c  | 5 +----
 4 files changed, 3 insertions(+), 9 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/ibss.c b/net/mac80211/ibss.c
index db5a4796ff3c..982690af1f61 100644
--- a/net/mac80211/ibss.c
+++ b/net/mac80211/ibss.c
@@ -754,8 +754,6 @@ void ieee80211_ibss_rx_queued_mgmt(struct ieee80211_sub_if_data *sdata,
 		ieee80211_rx_mgmt_auth_ibss(sdata, mgmt, skb->len);
 		break;
 	}
-
-	kfree_skb(skb);
 }
 
 void ieee80211_ibss_work(struct ieee80211_sub_if_data *sdata)
diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c
index 14212ad41e5a..1bf276d7024b 100644
--- a/net/mac80211/iface.c
+++ b/net/mac80211/iface.c
@@ -738,9 +738,10 @@ static void ieee80211_iface_work(struct work_struct *work)
 			break;
 		default:
 			WARN(1, "frame for unexpected interface type");
-			kfree_skb(skb);
 			break;
 		}
+
+		kfree_skb(skb);
 	}
 
 	/* then other type-dependent work */
diff --git a/net/mac80211/mesh.c b/net/mac80211/mesh.c
index 0f1f593c8477..83b346cc4860 100644
--- a/net/mac80211/mesh.c
+++ b/net/mac80211/mesh.c
@@ -620,8 +620,6 @@ void ieee80211_mesh_rx_queued_mgmt(struct ieee80211_sub_if_data *sdata,
 		ieee80211_mesh_rx_mgmt_action(sdata, mgmt, skb->len, rx_status);
 		break;
 	}
-
-	kfree_skb(skb);
 }
 
 void ieee80211_mesh_work(struct ieee80211_sub_if_data *sdata)
diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index 2f828ffd5698..4a5b29dac9d0 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -1754,7 +1754,7 @@ void ieee80211_sta_rx_queued_mgmt(struct ieee80211_sub_if_data *sdata,
 		default:
 			WARN(1, "unexpected: %d", rma);
 		}
-		goto out;
+		return;
 	}
 
 	mutex_unlock(&ifmgd->mtx);
@@ -1762,9 +1762,6 @@ void ieee80211_sta_rx_queued_mgmt(struct ieee80211_sub_if_data *sdata,
 	if (skb->len >= 24 + 2 /* mgmt + deauth reason */ &&
 	    (fc & IEEE80211_FCTL_STYPE) == IEEE80211_STYPE_DEAUTH)
 		cfg80211_send_deauth(sdata->dev, (u8 *)mgmt, skb->len);
-
- out:
-	kfree_skb(skb);
 }
 
 static void ieee80211_sta_timer(unsigned long data)
-- 
cgit v1.2.2


From 77a121c3a88eb00a4b5e753d083dbb7d49fefb0a Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Thu, 10 Jun 2010 10:21:34 +0200
Subject: mac80211: pull mgmt frame rx into rx handler

Some code is duplicated between ibss, mesh and
managed mode regarding the queueing of management
frames. Since all modes now use a common skb
queue and a common work function, we can pull
the queueing code into the rx handler directly
and remove the duplicated length checks etc.

Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/ibss.c        | 26 --------------------
 net/mac80211/ieee80211_i.h |  4 ---
 net/mac80211/mesh.c        | 25 -------------------
 net/mac80211/mesh.h        |  2 --
 net/mac80211/mlme.c        | 27 --------------------
 net/mac80211/rx.c          | 61 ++++++++++++++++++++++++++++++++++++----------
 6 files changed, 48 insertions(+), 97 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/ibss.c b/net/mac80211/ibss.c
index 982690af1f61..bfd7286488c7 100644
--- a/net/mac80211/ibss.c
+++ b/net/mac80211/ibss.c
@@ -847,32 +847,6 @@ void ieee80211_ibss_notify_scan_completed(struct ieee80211_local *local)
 	mutex_unlock(&local->iflist_mtx);
 }
 
-ieee80211_rx_result
-ieee80211_ibss_rx_mgmt(struct ieee80211_sub_if_data *sdata, struct sk_buff *skb)
-{
-	struct ieee80211_local *local = sdata->local;
-	struct ieee80211_mgmt *mgmt;
-	u16 fc;
-
-	if (skb->len < 24)
-		return RX_DROP_MONITOR;
-
-	mgmt = (struct ieee80211_mgmt *) skb->data;
-	fc = le16_to_cpu(mgmt->frame_control);
-
-	switch (fc & IEEE80211_FCTL_STYPE) {
-	case IEEE80211_STYPE_PROBE_RESP:
-	case IEEE80211_STYPE_BEACON:
-	case IEEE80211_STYPE_PROBE_REQ:
-	case IEEE80211_STYPE_AUTH:
-		skb_queue_tail(&sdata->skb_queue, skb);
-		ieee80211_queue_work(&local->hw, &sdata->work);
-		return RX_QUEUED;
-	}
-
-	return RX_DROP_MONITOR;
-}
-
 int ieee80211_ibss_join(struct ieee80211_sub_if_data *sdata,
 			struct cfg80211_ibss_params *params)
 {
diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index f8c8f101592b..a3bcaa3239d9 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -981,8 +981,6 @@ int ieee80211_mgd_deauth(struct ieee80211_sub_if_data *sdata,
 int ieee80211_mgd_disassoc(struct ieee80211_sub_if_data *sdata,
 			   struct cfg80211_disassoc_request *req,
 			   void *cookie);
-ieee80211_rx_result ieee80211_sta_rx_mgmt(struct ieee80211_sub_if_data *sdata,
-					  struct sk_buff *skb);
 void ieee80211_send_pspoll(struct ieee80211_local *local,
 			   struct ieee80211_sub_if_data *sdata);
 void ieee80211_recalc_ps(struct ieee80211_local *local, s32 latency);
@@ -1002,8 +1000,6 @@ void ieee80211_sta_rx_queued_mgmt(struct ieee80211_sub_if_data *sdata,
 /* IBSS code */
 void ieee80211_ibss_notify_scan_completed(struct ieee80211_local *local);
 void ieee80211_ibss_setup_sdata(struct ieee80211_sub_if_data *sdata);
-ieee80211_rx_result
-ieee80211_ibss_rx_mgmt(struct ieee80211_sub_if_data *sdata, struct sk_buff *skb);
 struct sta_info *ieee80211_ibss_add_sta(struct ieee80211_sub_if_data *sdata,
 					u8 *bssid, u8 *addr, u32 supp_rates,
 					gfp_t gfp);
diff --git a/net/mac80211/mesh.c b/net/mac80211/mesh.c
index 83b346cc4860..c8a4f19ed13b 100644
--- a/net/mac80211/mesh.c
+++ b/net/mac80211/mesh.c
@@ -702,28 +702,3 @@ void ieee80211_mesh_init_sdata(struct ieee80211_sub_if_data *sdata)
 	INIT_LIST_HEAD(&ifmsh->preq_queue.list);
 	spin_lock_init(&ifmsh->mesh_preq_queue_lock);
 }
-
-ieee80211_rx_result
-ieee80211_mesh_rx_mgmt(struct ieee80211_sub_if_data *sdata, struct sk_buff *skb)
-{
-	struct ieee80211_local *local = sdata->local;
-	struct ieee80211_mgmt *mgmt;
-	u16 fc;
-
-	if (skb->len < 24)
-		return RX_DROP_MONITOR;
-
-	mgmt = (struct ieee80211_mgmt *) skb->data;
-	fc = le16_to_cpu(mgmt->frame_control);
-
-	switch (fc & IEEE80211_FCTL_STYPE) {
-	case IEEE80211_STYPE_ACTION:
-	case IEEE80211_STYPE_PROBE_RESP:
-	case IEEE80211_STYPE_BEACON:
-		skb_queue_tail(&sdata->skb_queue, skb);
-		ieee80211_queue_work(&local->hw, &sdata->work);
-		return RX_QUEUED;
-	}
-
-	return RX_CONTINUE;
-}
diff --git a/net/mac80211/mesh.h b/net/mac80211/mesh.h
index c88087f1cd0f..ebd3f1d9d889 100644
--- a/net/mac80211/mesh.h
+++ b/net/mac80211/mesh.h
@@ -237,8 +237,6 @@ void ieee80211s_update_metric(struct ieee80211_local *local,
 		struct sta_info *stainfo, struct sk_buff *skb);
 void ieee80211s_stop(void);
 void ieee80211_mesh_init_sdata(struct ieee80211_sub_if_data *sdata);
-ieee80211_rx_result
-ieee80211_mesh_rx_mgmt(struct ieee80211_sub_if_data *sdata, struct sk_buff *skb);
 void ieee80211_start_mesh(struct ieee80211_sub_if_data *sdata);
 void ieee80211_stop_mesh(struct ieee80211_sub_if_data *sdata);
 void ieee80211_mesh_root_setup(struct ieee80211_if_mesh *ifmsh);
diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index 4a5b29dac9d0..036f1bfa7b05 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -1633,33 +1633,6 @@ static void ieee80211_rx_mgmt_beacon(struct ieee80211_sub_if_data *sdata,
 	ieee80211_bss_info_change_notify(sdata, changed);
 }
 
-ieee80211_rx_result ieee80211_sta_rx_mgmt(struct ieee80211_sub_if_data *sdata,
-					  struct sk_buff *skb)
-{
-	struct ieee80211_local *local = sdata->local;
-	struct ieee80211_mgmt *mgmt;
-	u16 fc;
-
-	if (skb->len < 24)
-		return RX_DROP_MONITOR;
-
-	mgmt = (struct ieee80211_mgmt *) skb->data;
-	fc = le16_to_cpu(mgmt->frame_control);
-
-	switch (fc & IEEE80211_FCTL_STYPE) {
-	case IEEE80211_STYPE_PROBE_RESP:
-	case IEEE80211_STYPE_BEACON:
-	case IEEE80211_STYPE_DEAUTH:
-	case IEEE80211_STYPE_DISASSOC:
-	case IEEE80211_STYPE_ACTION:
-		skb_queue_tail(&sdata->skb_queue, skb);
-		ieee80211_queue_work(&local->hw, &sdata->work);
-		return RX_QUEUED;
-	}
-
-	return RX_DROP_MONITOR;
-}
-
 void ieee80211_sta_rx_queued_mgmt(struct ieee80211_sub_if_data *sdata,
 				  struct sk_buff *skb)
 {
diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c
index 3adcda502b7d..40fe2798cbf5 100644
--- a/net/mac80211/rx.c
+++ b/net/mac80211/rx.c
@@ -1950,8 +1950,11 @@ ieee80211_rx_h_action(struct ieee80211_rx_data *rx)
 		if (len < IEEE80211_MIN_ACTION_SIZE + 1)
 			break;
 
-		if (sdata->vif.type == NL80211_IFTYPE_STATION)
-			return ieee80211_sta_rx_mgmt(sdata, rx->skb);
+		if (sdata->vif.type == NL80211_IFTYPE_STATION) {
+			skb_queue_tail(&sdata->skb_queue, rx->skb);
+			ieee80211_queue_work(&local->hw, &sdata->work);
+			return RX_QUEUED;
+		}
 
 		switch (mgmt->u.action.u.addba_req.action_code) {
 		case WLAN_ACTION_ADDBA_REQ:
@@ -2003,7 +2006,9 @@ ieee80211_rx_h_action(struct ieee80211_rx_data *rx)
 			if (memcmp(mgmt->bssid, sdata->u.mgd.bssid, ETH_ALEN))
 				break;
 
-			return ieee80211_sta_rx_mgmt(sdata, rx->skb);
+			skb_queue_tail(&sdata->skb_queue, rx->skb);
+			ieee80211_queue_work(&local->hw, &sdata->work);
+			return RX_QUEUED;
 		}
 		break;
 	case WLAN_CATEGORY_SA_QUERY:
@@ -2021,9 +2026,11 @@ ieee80211_rx_h_action(struct ieee80211_rx_data *rx)
 		break;
 	case WLAN_CATEGORY_MESH_PLINK:
 	case WLAN_CATEGORY_MESH_PATH_SEL:
-		if (ieee80211_vif_is_mesh(&sdata->vif))
-			return ieee80211_mesh_rx_mgmt(sdata, rx->skb);
-		break;
+		if (!ieee80211_vif_is_mesh(&sdata->vif))
+			break;
+		skb_queue_tail(&sdata->skb_queue, rx->skb);
+		ieee80211_queue_work(&local->hw, &sdata->work);
+		return RX_QUEUED;
 	}
 
 	/*
@@ -2080,10 +2087,15 @@ ieee80211_rx_h_mgmt(struct ieee80211_rx_data *rx)
 {
 	struct ieee80211_sub_if_data *sdata = rx->sdata;
 	ieee80211_rx_result rxs;
+	struct ieee80211_mgmt *mgmt = (void *)rx->skb->data;
+	__le16 stype;
 
 	if (!(rx->flags & IEEE80211_RX_RA_MATCH))
 		return RX_DROP_MONITOR;
 
+	if (rx->skb->len < 24)
+		return RX_DROP_MONITOR;
+
 	if (ieee80211_drop_unencrypted_mgmt(rx))
 		return RX_DROP_UNUSABLE;
 
@@ -2091,16 +2103,39 @@ ieee80211_rx_h_mgmt(struct ieee80211_rx_data *rx)
 	if (rxs != RX_CONTINUE)
 		return rxs;
 
-	if (ieee80211_vif_is_mesh(&sdata->vif))
-		return ieee80211_mesh_rx_mgmt(sdata, rx->skb);
+	stype = mgmt->frame_control & cpu_to_le16(IEEE80211_FCTL_STYPE);
 
-	if (sdata->vif.type == NL80211_IFTYPE_ADHOC)
-		return ieee80211_ibss_rx_mgmt(sdata, rx->skb);
+	if (!ieee80211_vif_is_mesh(&sdata->vif) &&
+	    sdata->vif.type != NL80211_IFTYPE_ADHOC &&
+	    sdata->vif.type != NL80211_IFTYPE_STATION)
+		return RX_DROP_MONITOR;
 
-	if (sdata->vif.type == NL80211_IFTYPE_STATION)
-		return ieee80211_sta_rx_mgmt(sdata, rx->skb);
+	switch (stype) {
+	case cpu_to_le16(IEEE80211_STYPE_BEACON):
+	case cpu_to_le16(IEEE80211_STYPE_PROBE_RESP):
+		/* process for all: mesh, mlme, ibss */
+		break;
+	case cpu_to_le16(IEEE80211_STYPE_DEAUTH):
+	case cpu_to_le16(IEEE80211_STYPE_DISASSOC):
+		/* process only for station */
+		if (sdata->vif.type != NL80211_IFTYPE_STATION)
+			return RX_DROP_MONITOR;
+		break;
+	case cpu_to_le16(IEEE80211_STYPE_PROBE_REQ):
+	case cpu_to_le16(IEEE80211_STYPE_AUTH):
+		/* process only for ibss */
+		if (sdata->vif.type != NL80211_IFTYPE_ADHOC)
+			return RX_DROP_MONITOR;
+		break;
+	default:
+		return RX_DROP_MONITOR;
+	}
 
-	return RX_DROP_MONITOR;
+	/* queue up frame and kick off work to process it */
+	skb_queue_tail(&sdata->skb_queue, rx->skb);
+	ieee80211_queue_work(&rx->local->hw, &sdata->work);
+
+	return RX_QUEUED;
 }
 
 static void ieee80211_rx_michael_mic_report(struct ieee80211_hdr *hdr,
-- 
cgit v1.2.2


From bed7ee6e44cb7633a4f9821688a6c7ae977615ed Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Thu, 10 Jun 2010 10:21:35 +0200
Subject: mac80211: always process blockack action from workqueue

To prepare for making the ampdu_action callback
sleep, make mac80211 always process blockack
action frames from the skb queue. This gets rid
of the current special case for managed mode
interfaces as well.

Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/iface.c | 31 ++++++++++++++++++++++++++++++-
 net/mac80211/mlme.c  | 38 --------------------------------------
 net/mac80211/rx.c    | 29 +++++++++++++----------------
 3 files changed, 43 insertions(+), 55 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c
index 1bf276d7024b..7a3dbde9979e 100644
--- a/net/mac80211/iface.c
+++ b/net/mac80211/iface.c
@@ -724,7 +724,36 @@ static void ieee80211_iface_work(struct work_struct *work)
 
 	/* first process frames */
 	while ((skb = skb_dequeue(&sdata->skb_queue))) {
-		switch (sdata->vif.type) {
+		struct ieee80211_mgmt *mgmt = (void *)skb->data;
+
+		if (ieee80211_is_action(mgmt->frame_control) &&
+		    mgmt->u.action.category == WLAN_CATEGORY_BACK) {
+			int len = skb->len;
+			struct sta_info *sta;
+
+			rcu_read_lock();
+			sta = sta_info_get(sdata, mgmt->sa);
+			if (sta) {
+				switch (mgmt->u.action.u.addba_req.action_code) {
+				case WLAN_ACTION_ADDBA_REQ:
+					ieee80211_process_addba_request(
+							local, sta, mgmt, len);
+					break;
+				case WLAN_ACTION_ADDBA_RESP:
+					ieee80211_process_addba_resp(local, sta,
+								     mgmt, len);
+					break;
+				case WLAN_ACTION_DELBA:
+					ieee80211_process_delba(sdata, sta,
+								mgmt, len);
+					break;
+				default:
+					WARN_ON(1);
+					break;
+				}
+			}
+			rcu_read_unlock();
+		} else switch (sdata->vif.type) {
 		case NL80211_IFTYPE_STATION:
 			ieee80211_sta_rx_queued_mgmt(sdata, skb);
 			break;
diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index 036f1bfa7b05..583b34686a26 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -1666,44 +1666,6 @@ void ieee80211_sta_rx_queued_mgmt(struct ieee80211_sub_if_data *sdata,
 			break;
 		case IEEE80211_STYPE_ACTION:
 			switch (mgmt->u.action.category) {
-			case WLAN_CATEGORY_BACK: {
-				struct ieee80211_local *local = sdata->local;
-				int len = skb->len;
-				struct sta_info *sta;
-
-				rcu_read_lock();
-				sta = sta_info_get(sdata, mgmt->sa);
-				if (!sta) {
-					rcu_read_unlock();
-					break;
-				}
-
-				local_bh_disable();
-
-				switch (mgmt->u.action.u.addba_req.action_code) {
-				case WLAN_ACTION_ADDBA_REQ:
-					if (len < (IEEE80211_MIN_ACTION_SIZE +
-						   sizeof(mgmt->u.action.u.addba_req)))
-						break;
-					ieee80211_process_addba_request(local, sta, mgmt, len);
-					break;
-				case WLAN_ACTION_ADDBA_RESP:
-					if (len < (IEEE80211_MIN_ACTION_SIZE +
-						   sizeof(mgmt->u.action.u.addba_resp)))
-						break;
-					ieee80211_process_addba_resp(local, sta, mgmt, len);
-					break;
-				case WLAN_ACTION_DELBA:
-					if (len < (IEEE80211_MIN_ACTION_SIZE +
-						   sizeof(mgmt->u.action.u.delba)))
-						break;
-					ieee80211_process_delba(sdata, sta, mgmt, len);
-					break;
-				}
-				local_bh_enable();
-				rcu_read_unlock();
-				break;
-				}
 			case WLAN_CATEGORY_SPECTRUM_MGMT:
 				ieee80211_sta_process_chanswitch(sdata,
 						&mgmt->u.action.u.chan_switch.sw_elem,
diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c
index 40fe2798cbf5..435c2166e0c5 100644
--- a/net/mac80211/rx.c
+++ b/net/mac80211/rx.c
@@ -1950,33 +1950,29 @@ ieee80211_rx_h_action(struct ieee80211_rx_data *rx)
 		if (len < IEEE80211_MIN_ACTION_SIZE + 1)
 			break;
 
-		if (sdata->vif.type == NL80211_IFTYPE_STATION) {
-			skb_queue_tail(&sdata->skb_queue, rx->skb);
-			ieee80211_queue_work(&local->hw, &sdata->work);
-			return RX_QUEUED;
-		}
-
 		switch (mgmt->u.action.u.addba_req.action_code) {
 		case WLAN_ACTION_ADDBA_REQ:
 			if (len < (IEEE80211_MIN_ACTION_SIZE +
 				   sizeof(mgmt->u.action.u.addba_req)))
-				return RX_DROP_MONITOR;
-			ieee80211_process_addba_request(local, rx->sta, mgmt, len);
-			goto handled;
+				goto invalid;
+			break;
 		case WLAN_ACTION_ADDBA_RESP:
 			if (len < (IEEE80211_MIN_ACTION_SIZE +
 				   sizeof(mgmt->u.action.u.addba_resp)))
-				break;
-			ieee80211_process_addba_resp(local, rx->sta, mgmt, len);
-			goto handled;
+				goto invalid;
+			break;
 		case WLAN_ACTION_DELBA:
 			if (len < (IEEE80211_MIN_ACTION_SIZE +
 				   sizeof(mgmt->u.action.u.delba)))
-				break;
-			ieee80211_process_delba(sdata, rx->sta, mgmt, len);
-			goto handled;
+				goto invalid;
+			break;
+		default:
+			goto invalid;
 		}
-		break;
+
+		skb_queue_tail(&sdata->skb_queue, rx->skb);
+		ieee80211_queue_work(&local->hw, &sdata->work);
+		return RX_QUEUED;
 	case WLAN_CATEGORY_SPECTRUM_MGMT:
 		if (local->hw.conf.channel->band != IEEE80211_BAND_5GHZ)
 			break;
@@ -2033,6 +2029,7 @@ ieee80211_rx_h_action(struct ieee80211_rx_data *rx)
 		return RX_QUEUED;
 	}
 
+ invalid:
 	/*
 	 * For AP mode, hostapd is responsible for handling any action
 	 * frames that we didn't handle, including returning unknown
-- 
cgit v1.2.2


From 344eec67c7b8557234e149d254bca2ae9614d61e Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Thu, 10 Jun 2010 10:21:36 +0200
Subject: mac80211: move blockack stop due to fragmentation

There's a corner case where we receive a fragmented
frame during a blockack session, in which case we
will terminate that session. To simplify future work
in this area that will culminate in allowing the
driver callbacks for aggregation to sleep, move the
processing of this case out of the RX path into the
interface work.

This will simplify future work because the new place
for this code doesn't require that the function will
always be atomic, which the RX path needs.

Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/iface.c | 32 +++++++++++++++++++++++++++++++-
 net/mac80211/rx.c    |  5 ++---
 2 files changed, 33 insertions(+), 4 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c
index 7a3dbde9979e..87fc012e4ab3 100644
--- a/net/mac80211/iface.c
+++ b/net/mac80211/iface.c
@@ -707,6 +707,7 @@ static void ieee80211_iface_work(struct work_struct *work)
 		container_of(work, struct ieee80211_sub_if_data, work);
 	struct ieee80211_local *local = sdata->local;
 	struct sk_buff *skb;
+	struct sta_info *sta;
 
 	if (!ieee80211_sdata_running(sdata))
 		return;
@@ -729,7 +730,6 @@ static void ieee80211_iface_work(struct work_struct *work)
 		if (ieee80211_is_action(mgmt->frame_control) &&
 		    mgmt->u.action.category == WLAN_CATEGORY_BACK) {
 			int len = skb->len;
-			struct sta_info *sta;
 
 			rcu_read_lock();
 			sta = sta_info_get(sdata, mgmt->sa);
@@ -753,6 +753,36 @@ static void ieee80211_iface_work(struct work_struct *work)
 				}
 			}
 			rcu_read_unlock();
+		} else if (ieee80211_is_data_qos(mgmt->frame_control)) {
+			struct ieee80211_hdr *hdr = (void *)mgmt;
+			/*
+			 * So the frame isn't mgmt, but frame_control
+			 * is at the right place anyway, of course, so
+			 * the if statement is correct.
+			 *
+			 * Warn if we have other data frame types here,
+			 * they must not get here.
+			 */
+			WARN_ON(hdr->frame_control &
+					cpu_to_le16(IEEE80211_STYPE_NULLFUNC));
+			WARN_ON(!(hdr->seq_ctrl &
+					cpu_to_le16(IEEE80211_SCTL_FRAG)));
+			/*
+			 * This was a fragment of a frame, received while
+			 * a block-ack session was active. That cannot be
+			 * right, so terminate the session.
+			 */
+			rcu_read_lock();
+			sta = sta_info_get(sdata, mgmt->sa);
+			if (sta) {
+				u16 tid = *ieee80211_get_qos_ctl(hdr) &
+						IEEE80211_QOS_CTL_TID_MASK;
+
+				__ieee80211_stop_rx_ba_session(
+					sta, tid, WLAN_BACK_RECIPIENT,
+					WLAN_REASON_QSTA_REQUIRE_SETUP);
+			}
+			rcu_read_unlock();
 		} else switch (sdata->vif.type) {
 		case NL80211_IFTYPE_STATION:
 			ieee80211_sta_rx_queued_mgmt(sdata, skb);
diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c
index 435c2166e0c5..b716fa2370b3 100644
--- a/net/mac80211/rx.c
+++ b/net/mac80211/rx.c
@@ -741,9 +741,8 @@ static void ieee80211_rx_reorder_ampdu(struct ieee80211_rx_data *rx,
 	sc = le16_to_cpu(hdr->seq_ctrl);
 	if (sc & IEEE80211_SCTL_FRAG) {
 		spin_unlock(&sta->lock);
-		__ieee80211_stop_rx_ba_session(sta, tid, WLAN_BACK_RECIPIENT,
-					       WLAN_REASON_QSTA_REQUIRE_SETUP);
-		dev_kfree_skb(skb);
+		skb_queue_tail(&rx->sdata->skb_queue, skb);
+		ieee80211_queue_work(&local->hw, &rx->sdata->work);
 		return;
 	}
 
-- 
cgit v1.2.2


From c1475ca99edcc7216ddc45838ab2c3281c14ba22 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Thu, 10 Jun 2010 10:21:37 +0200
Subject: mac80211: move aggregation callback processing

This moves the aggregation callback processing
to the per-sdata skb queue and a work function
rather than the tasklet.

Unfortunately, this means that it extends the
pkt_type hack to that skb queue. However, it
will enable making ampdu_action API changes
gradually, my current plan is to get rid of
this again by forcing drivers to only return
from ampdu_action() when everything is done,
thus removing the callbacks completely.

Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/agg-tx.c      | 14 ++++++--------
 net/mac80211/ieee80211_i.h | 11 +++++++----
 net/mac80211/iface.c       | 15 ++++++++++++---
 net/mac80211/main.c        | 13 -------------
 net/mac80211/rx.c          |  5 +++++
 5 files changed, 30 insertions(+), 28 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/agg-tx.c b/net/mac80211/agg-tx.c
index 9b9f21be0ff7..c7b7ac40316a 100644
--- a/net/mac80211/agg-tx.c
+++ b/net/mac80211/agg-tx.c
@@ -502,11 +502,10 @@ void ieee80211_start_tx_ba_cb_irqsafe(struct ieee80211_vif *vif,
 	ra_tid = (struct ieee80211_ra_tid *) &skb->cb;
 	memcpy(&ra_tid->ra, ra, ETH_ALEN);
 	ra_tid->tid = tid;
-	ra_tid->vif = vif;
 
-	skb->pkt_type = IEEE80211_ADDBA_MSG;
-	skb_queue_tail(&local->skb_queue, skb);
-	tasklet_schedule(&local->tasklet);
+	skb->pkt_type = IEEE80211_SDATA_QUEUE_AGG_START;
+	skb_queue_tail(&sdata->skb_queue, skb);
+	ieee80211_queue_work(&local->hw, &sdata->work);
 }
 EXPORT_SYMBOL(ieee80211_start_tx_ba_cb_irqsafe);
 
@@ -637,11 +636,10 @@ void ieee80211_stop_tx_ba_cb_irqsafe(struct ieee80211_vif *vif,
 	ra_tid = (struct ieee80211_ra_tid *) &skb->cb;
 	memcpy(&ra_tid->ra, ra, ETH_ALEN);
 	ra_tid->tid = tid;
-	ra_tid->vif = vif;
 
-	skb->pkt_type = IEEE80211_DELBA_MSG;
-	skb_queue_tail(&local->skb_queue, skb);
-	tasklet_schedule(&local->tasklet);
+	skb->pkt_type = IEEE80211_SDATA_QUEUE_AGG_STOP;
+	skb_queue_tail(&sdata->skb_queue, skb);
+	ieee80211_queue_work(&local->hw, &sdata->work);
 }
 EXPORT_SYMBOL(ieee80211_stop_tx_ba_cb_irqsafe);
 
diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index a3bcaa3239d9..bafe610dcf77 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -564,11 +564,15 @@ ieee80211_sdata_set_mesh_id(struct ieee80211_sub_if_data *sdata,
 #endif
 }
 
+enum sdata_queue_type {
+	IEEE80211_SDATA_QUEUE_TYPE_FRAME	= 0,
+	IEEE80211_SDATA_QUEUE_AGG_START		= 1,
+	IEEE80211_SDATA_QUEUE_AGG_STOP		= 2,
+};
+
 enum {
 	IEEE80211_RX_MSG	= 1,
 	IEEE80211_TX_STATUS_MSG	= 2,
-	IEEE80211_DELBA_MSG	= 3,
-	IEEE80211_ADDBA_MSG	= 4,
 };
 
 enum queue_stop_reason {
@@ -870,9 +874,8 @@ IEEE80211_DEV_TO_SUB_IF(struct net_device *dev)
 	return netdev_priv(dev);
 }
 
-/* this struct represents 802.11n's RA/TID combination along with our vif */
+/* this struct represents 802.11n's RA/TID combination */
 struct ieee80211_ra_tid {
-	struct ieee80211_vif *vif;
 	u8 ra[ETH_ALEN];
 	u16 tid;
 };
diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c
index 87fc012e4ab3..9f00d3f174e4 100644
--- a/net/mac80211/iface.c
+++ b/net/mac80211/iface.c
@@ -482,7 +482,7 @@ static int ieee80211_stop(struct net_device *dev)
 		}
 		/* fall through */
 	default:
-		cancel_work_sync(&sdata->work);
+		flush_work(&sdata->work);
 		/*
 		 * When we get here, the interface is marked down.
 		 * Call synchronize_rcu() to wait for the RX path
@@ -708,6 +708,7 @@ static void ieee80211_iface_work(struct work_struct *work)
 	struct ieee80211_local *local = sdata->local;
 	struct sk_buff *skb;
 	struct sta_info *sta;
+	struct ieee80211_ra_tid *ra_tid;
 
 	if (!ieee80211_sdata_running(sdata))
 		return;
@@ -727,8 +728,16 @@ static void ieee80211_iface_work(struct work_struct *work)
 	while ((skb = skb_dequeue(&sdata->skb_queue))) {
 		struct ieee80211_mgmt *mgmt = (void *)skb->data;
 
-		if (ieee80211_is_action(mgmt->frame_control) &&
-		    mgmt->u.action.category == WLAN_CATEGORY_BACK) {
+		if (skb->pkt_type == IEEE80211_SDATA_QUEUE_AGG_START) {
+			ra_tid = (void *)&skb->cb;
+			ieee80211_start_tx_ba_cb(&sdata->vif, ra_tid->ra,
+						 ra_tid->tid);
+		} else if (skb->pkt_type == IEEE80211_SDATA_QUEUE_AGG_STOP) {
+			ra_tid = (void *)&skb->cb;
+			ieee80211_stop_tx_ba_cb(&sdata->vif, ra_tid->ra,
+						ra_tid->tid);
+		} else if (ieee80211_is_action(mgmt->frame_control) &&
+			   mgmt->u.action.category == WLAN_CATEGORY_BACK) {
 			int len = skb->len;
 
 			rcu_read_lock();
diff --git a/net/mac80211/main.c b/net/mac80211/main.c
index 88b671a16a41..a2d10d44641e 100644
--- a/net/mac80211/main.c
+++ b/net/mac80211/main.c
@@ -259,7 +259,6 @@ static void ieee80211_tasklet_handler(unsigned long data)
 {
 	struct ieee80211_local *local = (struct ieee80211_local *) data;
 	struct sk_buff *skb;
-	struct ieee80211_ra_tid *ra_tid;
 
 	while ((skb = skb_dequeue(&local->skb_queue)) ||
 	       (skb = skb_dequeue(&local->skb_queue_unreliable))) {
@@ -274,18 +273,6 @@ static void ieee80211_tasklet_handler(unsigned long data)
 			skb->pkt_type = 0;
 			ieee80211_tx_status(local_to_hw(local), skb);
 			break;
-		case IEEE80211_DELBA_MSG:
-			ra_tid = (struct ieee80211_ra_tid *) &skb->cb;
-			ieee80211_stop_tx_ba_cb(ra_tid->vif, ra_tid->ra,
-						ra_tid->tid);
-			dev_kfree_skb(skb);
-			break;
-		case IEEE80211_ADDBA_MSG:
-			ra_tid = (struct ieee80211_ra_tid *) &skb->cb;
-			ieee80211_start_tx_ba_cb(ra_tid->vif, ra_tid->ra,
-						 ra_tid->tid);
-			dev_kfree_skb(skb);
-			break ;
 		default:
 			WARN(1, "mac80211: Packet is of unknown type %d\n",
 			     skb->pkt_type);
diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c
index b716fa2370b3..84f11733b9fe 100644
--- a/net/mac80211/rx.c
+++ b/net/mac80211/rx.c
@@ -741,6 +741,7 @@ static void ieee80211_rx_reorder_ampdu(struct ieee80211_rx_data *rx,
 	sc = le16_to_cpu(hdr->seq_ctrl);
 	if (sc & IEEE80211_SCTL_FRAG) {
 		spin_unlock(&sta->lock);
+		skb->pkt_type = IEEE80211_SDATA_QUEUE_TYPE_FRAME;
 		skb_queue_tail(&rx->sdata->skb_queue, skb);
 		ieee80211_queue_work(&local->hw, &rx->sdata->work);
 		return;
@@ -1969,6 +1970,7 @@ ieee80211_rx_h_action(struct ieee80211_rx_data *rx)
 			goto invalid;
 		}
 
+		rx->skb->pkt_type = IEEE80211_SDATA_QUEUE_TYPE_FRAME;
 		skb_queue_tail(&sdata->skb_queue, rx->skb);
 		ieee80211_queue_work(&local->hw, &sdata->work);
 		return RX_QUEUED;
@@ -2001,6 +2003,7 @@ ieee80211_rx_h_action(struct ieee80211_rx_data *rx)
 			if (memcmp(mgmt->bssid, sdata->u.mgd.bssid, ETH_ALEN))
 				break;
 
+			rx->skb->pkt_type = IEEE80211_SDATA_QUEUE_TYPE_FRAME;
 			skb_queue_tail(&sdata->skb_queue, rx->skb);
 			ieee80211_queue_work(&local->hw, &sdata->work);
 			return RX_QUEUED;
@@ -2023,6 +2026,7 @@ ieee80211_rx_h_action(struct ieee80211_rx_data *rx)
 	case WLAN_CATEGORY_MESH_PATH_SEL:
 		if (!ieee80211_vif_is_mesh(&sdata->vif))
 			break;
+		rx->skb->pkt_type = IEEE80211_SDATA_QUEUE_TYPE_FRAME;
 		skb_queue_tail(&sdata->skb_queue, rx->skb);
 		ieee80211_queue_work(&local->hw, &sdata->work);
 		return RX_QUEUED;
@@ -2128,6 +2132,7 @@ ieee80211_rx_h_mgmt(struct ieee80211_rx_data *rx)
 	}
 
 	/* queue up frame and kick off work to process it */
+	rx->skb->pkt_type = IEEE80211_SDATA_QUEUE_TYPE_FRAME;
 	skb_queue_tail(&sdata->skb_queue, rx->skb);
 	ieee80211_queue_work(&rx->local->hw, &sdata->work);
 
-- 
cgit v1.2.2


From a87f736d942c86255e3088c606f0e3eab6bbf784 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Thu, 10 Jun 2010 10:21:38 +0200
Subject: mac80211: use RCU for RX aggregation

Currently we allocate some memory for each RX
aggregation session and additionally keep a
flag indicating whether or not it is valid.
By using RCU to protect the pointer and making
sure that the memory is fully set up before it
becomes visible to the RX path, we can remove
the need for the bool that indicates validity,
as well as for locking on the RX path since it
is always synchronised against itself, and we
can guarantee that all other modifications are
done when the structure is not visible to the
RX path.

The net result is that since we remove locking
requirements from the RX path, we can in the
future use any kind of lock for the setup and
teardown code paths.

Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/agg-rx.c      | 70 +++++++++++++++++++++-------------------------
 net/mac80211/debugfs_sta.c |  6 ++--
 net/mac80211/rx.c          | 34 ++++++++++------------
 net/mac80211/sta_info.h    | 14 ++++++++--
 4 files changed, 61 insertions(+), 63 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/agg-rx.c b/net/mac80211/agg-rx.c
index 6bb9a9a94960..bbf36d980232 100644
--- a/net/mac80211/agg-rx.c
+++ b/net/mac80211/agg-rx.c
@@ -19,25 +19,36 @@
 #include "ieee80211_i.h"
 #include "driver-ops.h"
 
+static void ieee80211_free_tid_rx(struct rcu_head *h)
+{
+	struct tid_ampdu_rx *tid_rx =
+		container_of(h, struct tid_ampdu_rx, rcu_head);
+	int i;
+
+	for (i = 0; i < tid_rx->buf_size; i++)
+		dev_kfree_skb(tid_rx->reorder_buf[i]);
+	kfree(tid_rx->reorder_buf);
+	kfree(tid_rx->reorder_time);
+	kfree(tid_rx);
+}
+
 static void ___ieee80211_stop_rx_ba_session(struct sta_info *sta, u16 tid,
 					    u16 initiator, u16 reason,
 					    bool from_timer)
 {
 	struct ieee80211_local *local = sta->local;
 	struct tid_ampdu_rx *tid_rx;
-	int i;
 
 	spin_lock_bh(&sta->lock);
 
-	/* check if TID is in operational state */
-	if (!sta->ampdu_mlme.tid_active_rx[tid]) {
+	tid_rx = sta->ampdu_mlme.tid_rx[tid];
+
+	if (!tid_rx) {
 		spin_unlock_bh(&sta->lock);
 		return;
 	}
 
-	sta->ampdu_mlme.tid_active_rx[tid] = false;
-
-	tid_rx = sta->ampdu_mlme.tid_rx[tid];
+	rcu_assign_pointer(sta->ampdu_mlme.tid_rx[tid], NULL);
 
 #ifdef CONFIG_MAC80211_HT_DEBUG
 	printk(KERN_DEBUG "Rx BA session stop requested for %pM tid %u\n",
@@ -54,26 +65,12 @@ static void ___ieee80211_stop_rx_ba_session(struct sta_info *sta, u16 tid,
 		ieee80211_send_delba(sta->sdata, sta->sta.addr,
 				     tid, 0, reason);
 
-	/* free the reordering buffer */
-	for (i = 0; i < tid_rx->buf_size; i++) {
-		if (tid_rx->reorder_buf[i]) {
-			/* release the reordered frames */
-			dev_kfree_skb(tid_rx->reorder_buf[i]);
-			tid_rx->stored_mpdu_num--;
-			tid_rx->reorder_buf[i] = NULL;
-		}
-	}
-
-	/* free resources */
-	kfree(tid_rx->reorder_buf);
-	kfree(tid_rx->reorder_time);
-	sta->ampdu_mlme.tid_rx[tid] = NULL;
-
 	spin_unlock_bh(&sta->lock);
 
 	if (!from_timer)
 		del_timer_sync(&tid_rx->session_timer);
-	kfree(tid_rx);
+
+	call_rcu(&tid_rx->rcu_head, ieee80211_free_tid_rx);
 }
 
 void __ieee80211_stop_rx_ba_session(struct sta_info *sta, u16 tid,
@@ -214,7 +211,7 @@ void ieee80211_process_addba_request(struct ieee80211_local *local,
 	/* examine state machine */
 	spin_lock_bh(&sta->lock);
 
-	if (sta->ampdu_mlme.tid_active_rx[tid]) {
+	if (sta->ampdu_mlme.tid_rx[tid]) {
 #ifdef CONFIG_MAC80211_HT_DEBUG
 		if (net_ratelimit())
 			printk(KERN_DEBUG "unexpected AddBA Req from "
@@ -225,9 +222,8 @@ void ieee80211_process_addba_request(struct ieee80211_local *local,
 	}
 
 	/* prepare A-MPDU MLME for Rx aggregation */
-	sta->ampdu_mlme.tid_rx[tid] =
-			kmalloc(sizeof(struct tid_ampdu_rx), GFP_ATOMIC);
-	if (!sta->ampdu_mlme.tid_rx[tid]) {
+	tid_agg_rx = kmalloc(sizeof(struct tid_ampdu_rx), GFP_ATOMIC);
+	if (!tid_agg_rx) {
 #ifdef CONFIG_MAC80211_HT_DEBUG
 		if (net_ratelimit())
 			printk(KERN_ERR "allocate rx mlme to tid %d failed\n",
@@ -235,14 +231,11 @@ void ieee80211_process_addba_request(struct ieee80211_local *local,
 #endif
 		goto end;
 	}
-	/* rx timer */
-	sta->ampdu_mlme.tid_rx[tid]->session_timer.function =
-				sta_rx_agg_session_timer_expired;
-	sta->ampdu_mlme.tid_rx[tid]->session_timer.data =
-				(unsigned long)&sta->timer_to_tid[tid];
-	init_timer(&sta->ampdu_mlme.tid_rx[tid]->session_timer);
 
-	tid_agg_rx = sta->ampdu_mlme.tid_rx[tid];
+	/* rx timer */
+	tid_agg_rx->session_timer.function = sta_rx_agg_session_timer_expired;
+	tid_agg_rx->session_timer.data = (unsigned long)&sta->timer_to_tid[tid];
+	init_timer(&tid_agg_rx->session_timer);
 
 	/* prepare reordering buffer */
 	tid_agg_rx->reorder_buf =
@@ -257,8 +250,7 @@ void ieee80211_process_addba_request(struct ieee80211_local *local,
 #endif
 		kfree(tid_agg_rx->reorder_buf);
 		kfree(tid_agg_rx->reorder_time);
-		kfree(sta->ampdu_mlme.tid_rx[tid]);
-		sta->ampdu_mlme.tid_rx[tid] = NULL;
+		kfree(tid_agg_rx);
 		goto end;
 	}
 
@@ -270,13 +262,12 @@ void ieee80211_process_addba_request(struct ieee80211_local *local,
 
 	if (ret) {
 		kfree(tid_agg_rx->reorder_buf);
+		kfree(tid_agg_rx->reorder_time);
 		kfree(tid_agg_rx);
-		sta->ampdu_mlme.tid_rx[tid] = NULL;
 		goto end;
 	}
 
-	/* change state and send addba resp */
-	sta->ampdu_mlme.tid_active_rx[tid] = true;
+	/* update data */
 	tid_agg_rx->dialog_token = dialog_token;
 	tid_agg_rx->ssn = start_seq_num;
 	tid_agg_rx->head_seq_num = start_seq_num;
@@ -284,6 +275,9 @@ void ieee80211_process_addba_request(struct ieee80211_local *local,
 	tid_agg_rx->timeout = timeout;
 	tid_agg_rx->stored_mpdu_num = 0;
 	status = WLAN_STATUS_SUCCESS;
+
+	/* activate it for RX */
+	rcu_assign_pointer(sta->ampdu_mlme.tid_rx[tid], tid_agg_rx);
 end:
 	spin_unlock_bh(&sta->lock);
 
diff --git a/net/mac80211/debugfs_sta.c b/net/mac80211/debugfs_sta.c
index 576e024715e3..95c1ea47ad35 100644
--- a/net/mac80211/debugfs_sta.c
+++ b/net/mac80211/debugfs_sta.c
@@ -125,12 +125,12 @@ static ssize_t sta_agg_status_read(struct file *file, char __user *userbuf,
 	for (i = 0; i < STA_TID_NUM; i++) {
 		p += scnprintf(p, sizeof(buf) + buf - p, "%02d", i);
 		p += scnprintf(p, sizeof(buf) + buf - p, "\t\t%x",
-				sta->ampdu_mlme.tid_active_rx[i]);
+				!!sta->ampdu_mlme.tid_rx[i]);
 		p += scnprintf(p, sizeof(buf) + buf - p, "\t%#.2x",
-				sta->ampdu_mlme.tid_active_rx[i] ?
+				sta->ampdu_mlme.tid_rx[i] ?
 				sta->ampdu_mlme.tid_rx[i]->dialog_token : 0);
 		p += scnprintf(p, sizeof(buf) + buf - p, "\t%#.3x",
-				sta->ampdu_mlme.tid_active_rx[i] ?
+				sta->ampdu_mlme.tid_rx[i] ?
 				sta->ampdu_mlme.tid_rx[i]->ssn : 0);
 
 		p += scnprintf(p, sizeof(buf) + buf - p, "\t\t%x",
diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c
index 84f11733b9fe..ee01daccacbb 100644
--- a/net/mac80211/rx.c
+++ b/net/mac80211/rx.c
@@ -719,16 +719,13 @@ static void ieee80211_rx_reorder_ampdu(struct ieee80211_rx_data *rx,
 
 	tid = *ieee80211_get_qos_ctl(hdr) & IEEE80211_QOS_CTL_TID_MASK;
 
-	spin_lock(&sta->lock);
-
-	if (!sta->ampdu_mlme.tid_active_rx[tid])
-		goto dont_reorder_unlock;
-
-	tid_agg_rx = sta->ampdu_mlme.tid_rx[tid];
+	tid_agg_rx = rcu_dereference(sta->ampdu_mlme.tid_rx[tid]);
+	if (!tid_agg_rx)
+		goto dont_reorder;
 
 	/* qos null data frames are excluded */
 	if (unlikely(hdr->frame_control & cpu_to_le16(IEEE80211_STYPE_NULLFUNC)))
-		goto dont_reorder_unlock;
+		goto dont_reorder;
 
 	/* new, potentially un-ordered, ampdu frame - process it */
 
@@ -740,20 +737,22 @@ static void ieee80211_rx_reorder_ampdu(struct ieee80211_rx_data *rx,
 	/* if this mpdu is fragmented - terminate rx aggregation session */
 	sc = le16_to_cpu(hdr->seq_ctrl);
 	if (sc & IEEE80211_SCTL_FRAG) {
-		spin_unlock(&sta->lock);
 		skb->pkt_type = IEEE80211_SDATA_QUEUE_TYPE_FRAME;
 		skb_queue_tail(&rx->sdata->skb_queue, skb);
 		ieee80211_queue_work(&local->hw, &rx->sdata->work);
 		return;
 	}
 
-	if (ieee80211_sta_manage_reorder_buf(hw, tid_agg_rx, skb, frames)) {
-		spin_unlock(&sta->lock);
+	/*
+	 * No locking needed -- we will only ever process one
+	 * RX packet at a time, and thus own tid_agg_rx. All
+	 * other code manipulating it needs to (and does) make
+	 * sure that we cannot get to it any more before doing
+	 * anything with it.
+	 */
+	if (ieee80211_sta_manage_reorder_buf(hw, tid_agg_rx, skb, frames))
 		return;
-	}
 
- dont_reorder_unlock:
-	spin_unlock(&sta->lock);
  dont_reorder:
 	__skb_queue_tail(frames, skb);
 }
@@ -1830,13 +1829,11 @@ ieee80211_rx_h_ctrl(struct ieee80211_rx_data *rx, struct sk_buff_head *frames)
 				  &bar_data, sizeof(bar_data)))
 			return RX_DROP_MONITOR;
 
-		spin_lock(&rx->sta->lock);
 		tid = le16_to_cpu(bar_data.control) >> 12;
-		if (!rx->sta->ampdu_mlme.tid_active_rx[tid]) {
-			spin_unlock(&rx->sta->lock);
+
+		tid_agg_rx = rcu_dereference(rx->sta->ampdu_mlme.tid_rx[tid]);
+		if (!tid_agg_rx)
 			return RX_DROP_MONITOR;
-		}
-		tid_agg_rx = rx->sta->ampdu_mlme.tid_rx[tid];
 
 		start_seq_num = le16_to_cpu(bar_data.start_seq_num) >> 4;
 
@@ -1849,7 +1846,6 @@ ieee80211_rx_h_ctrl(struct ieee80211_rx_data *rx, struct sk_buff_head *frames)
 		ieee80211_release_reorder_frames(hw, tid_agg_rx, start_seq_num,
 						 frames);
 		kfree_skb(skb);
-		spin_unlock(&rx->sta->lock);
 		return RX_QUEUED;
 	}
 
diff --git a/net/mac80211/sta_info.h b/net/mac80211/sta_info.h
index 786bbd3103b1..16864a6045b4 100644
--- a/net/mac80211/sta_info.h
+++ b/net/mac80211/sta_info.h
@@ -102,8 +102,18 @@ struct tid_ampdu_tx {
  * @buf_size: buffer size for incoming A-MPDUs
  * @timeout: reset timer value (in TUs).
  * @dialog_token: dialog token for aggregation session
+ * @rcu_head: RCU head used for freeing this struct
+ *
+ * This structure is protected by RCU and the per-station
+ * spinlock. Assignments to the array holding it must hold
+ * the spinlock, only the RX path can access it under RCU
+ * lock-free. The RX path, since it is single-threaded,
+ * can even modify the structure without locking since the
+ * only other modifications to it are done when the struct
+ * can not yet or no longer be found by the RX path.
  */
 struct tid_ampdu_rx {
+	struct rcu_head rcu_head;
 	struct sk_buff **reorder_buf;
 	unsigned long *reorder_time;
 	struct timer_list session_timer;
@@ -118,8 +128,7 @@ struct tid_ampdu_rx {
 /**
  * struct sta_ampdu_mlme - STA aggregation information.
  *
- * @tid_active_rx: TID's state in Rx session state machine.
- * @tid_rx: aggregation info for Rx per TID
+ * @tid_rx: aggregation info for Rx per TID -- RCU protected
  * @tid_state_tx: TID's state in Tx session state machine.
  * @tid_tx: aggregation info for Tx per TID
  * @addba_req_num: number of times addBA request has been sent.
@@ -127,7 +136,6 @@ struct tid_ampdu_rx {
  */
 struct sta_ampdu_mlme {
 	/* rx */
-	bool tid_active_rx[STA_TID_NUM];
 	struct tid_ampdu_rx *tid_rx[STA_TID_NUM];
 	/* tx */
 	u8 tid_state_tx[STA_TID_NUM];
-- 
cgit v1.2.2


From a622ab72b4dcfdf53e24b16e9530cb876979a00c Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Thu, 10 Jun 2010 10:21:39 +0200
Subject: mac80211: use RCU for TX aggregation

Currently we allocate some memory for each TX
aggregation session and additionally keep a
state bitmap indicating the state it is in.
By using RCU to protect the pointer, moving
the state into the structure and some locking
trickery we can avoid locking when the TX agg
session is fully operational.

Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/agg-tx.c              | 229 +++++++++++++++++++------------------
 net/mac80211/debugfs_sta.c         |   8 +-
 net/mac80211/ht.c                  |   9 +-
 net/mac80211/ieee80211_i.h         |   2 -
 net/mac80211/rc80211_minstrel_ht.c |   2 +-
 net/mac80211/sta_info.c            |  12 +-
 net/mac80211/sta_info.h            |  33 +++---
 net/mac80211/tx.c                  |  88 +++++++++-----
 8 files changed, 206 insertions(+), 177 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/agg-tx.c b/net/mac80211/agg-tx.c
index c7b7ac40316a..7d8656d51c6b 100644
--- a/net/mac80211/agg-tx.c
+++ b/net/mac80211/agg-tx.c
@@ -125,25 +125,42 @@ void ieee80211_send_bar(struct ieee80211_sub_if_data *sdata, u8 *ra, u16 tid, u1
 	ieee80211_tx_skb(sdata, skb);
 }
 
-int ___ieee80211_stop_tx_ba_session(struct sta_info *sta, u16 tid,
-				    enum ieee80211_back_parties initiator)
+static void kfree_tid_tx(struct rcu_head *rcu_head)
+{
+	struct tid_ampdu_tx *tid_tx =
+	    container_of(rcu_head, struct tid_ampdu_tx, rcu_head);
+
+	kfree(tid_tx);
+}
+
+static int ___ieee80211_stop_tx_ba_session(
+		struct sta_info *sta, u16 tid,
+		enum ieee80211_back_parties initiator)
 {
 	struct ieee80211_local *local = sta->local;
+	struct tid_ampdu_tx *tid_tx = sta->ampdu_mlme.tid_tx[tid];
 	int ret;
-	u8 *state;
+
+	lockdep_assert_held(&sta->lock);
+
+	if (WARN_ON(!tid_tx))
+		return -ENOENT;
 
 #ifdef CONFIG_MAC80211_HT_DEBUG
 	printk(KERN_DEBUG "Tx BA session stop requested for %pM tid %u\n",
 	       sta->sta.addr, tid);
 #endif /* CONFIG_MAC80211_HT_DEBUG */
 
-	state = &sta->ampdu_mlme.tid_state_tx[tid];
+	set_bit(HT_AGG_STATE_STOPPING, &tid_tx->state);
 
-	if (*state == HT_AGG_STATE_OPERATIONAL)
-		sta->ampdu_mlme.addba_req_num[tid] = 0;
+	/*
+	 * After this packets are no longer handed right through
+	 * to the driver but are put onto tid_tx->pending instead,
+	 * with locking to ensure proper access.
+	 */
+	clear_bit(HT_AGG_STATE_OPERATIONAL, &tid_tx->state);
 
-	*state = HT_AGG_STATE_REQ_STOP_BA_MSK |
-		(initiator << HT_AGG_STATE_INITIATOR_SHIFT);
+	tid_tx->stop_initiator = initiator;
 
 	ret = drv_ampdu_action(local, sta->sdata,
 			       IEEE80211_AMPDU_TX_STOP,
@@ -174,15 +191,13 @@ static void sta_addba_resp_timer_expired(unsigned long data)
 	u16 tid = *(u8 *)data;
 	struct sta_info *sta = container_of((void *)data,
 		struct sta_info, timer_to_tid[tid]);
-	u8 *state;
-
-	state = &sta->ampdu_mlme.tid_state_tx[tid];
+	struct tid_ampdu_tx *tid_tx;
 
 	/* check if the TID waits for addBA response */
 	spin_lock_bh(&sta->lock);
-	if ((*state & (HT_ADDBA_REQUESTED_MSK | HT_ADDBA_RECEIVED_MSK |
-		       HT_AGG_STATE_REQ_STOP_BA_MSK)) !=
-						HT_ADDBA_REQUESTED_MSK) {
+	tid_tx = sta->ampdu_mlme.tid_tx[tid];
+	if (!tid_tx ||
+	    test_bit(HT_AGG_STATE_RESPONSE_RECEIVED, &tid_tx->state)) {
 		spin_unlock_bh(&sta->lock);
 #ifdef CONFIG_MAC80211_HT_DEBUG
 		printk(KERN_DEBUG "timer expired on tid %d but we are not "
@@ -210,7 +225,7 @@ int ieee80211_start_tx_ba_session(struct ieee80211_sta *pubsta, u16 tid)
 	struct sta_info *sta = container_of(pubsta, struct sta_info, sta);
 	struct ieee80211_sub_if_data *sdata = sta->sdata;
 	struct ieee80211_local *local = sdata->local;
-	u8 *state;
+	struct tid_ampdu_tx *tid_tx;
 	int ret = 0;
 	u16 start_seq_num;
 
@@ -256,9 +271,9 @@ int ieee80211_start_tx_ba_session(struct ieee80211_sta *pubsta, u16 tid)
 		goto err_unlock_sta;
 	}
 
-	state = &sta->ampdu_mlme.tid_state_tx[tid];
+	tid_tx = sta->ampdu_mlme.tid_tx[tid];
 	/* check if the TID is not in aggregation flow already */
-	if (*state != HT_AGG_STATE_IDLE) {
+	if (tid_tx) {
 #ifdef CONFIG_MAC80211_HT_DEBUG
 		printk(KERN_DEBUG "BA request denied - session is not "
 				 "idle on tid %u\n", tid);
@@ -279,9 +294,8 @@ int ieee80211_start_tx_ba_session(struct ieee80211_sta *pubsta, u16 tid)
 		IEEE80211_QUEUE_STOP_REASON_AGGREGATION);
 
 	/* prepare A-MPDU MLME for Tx aggregation */
-	sta->ampdu_mlme.tid_tx[tid] =
-			kmalloc(sizeof(struct tid_ampdu_tx), GFP_ATOMIC);
-	if (!sta->ampdu_mlme.tid_tx[tid]) {
+	tid_tx = kzalloc(sizeof(struct tid_ampdu_tx), GFP_ATOMIC);
+	if (!tid_tx) {
 #ifdef CONFIG_MAC80211_HT_DEBUG
 		if (net_ratelimit())
 			printk(KERN_ERR "allocate tx mlme to tid %d failed\n",
@@ -291,33 +305,27 @@ int ieee80211_start_tx_ba_session(struct ieee80211_sta *pubsta, u16 tid)
 		goto err_wake_queue;
 	}
 
-	skb_queue_head_init(&sta->ampdu_mlme.tid_tx[tid]->pending);
+	skb_queue_head_init(&tid_tx->pending);
 
 	/* Tx timer */
-	sta->ampdu_mlme.tid_tx[tid]->addba_resp_timer.function =
-			sta_addba_resp_timer_expired;
-	sta->ampdu_mlme.tid_tx[tid]->addba_resp_timer.data =
-			(unsigned long)&sta->timer_to_tid[tid];
-	init_timer(&sta->ampdu_mlme.tid_tx[tid]->addba_resp_timer);
-
-	/* Ok, the Addba frame hasn't been sent yet, but if the driver calls the
-	 * call back right away, it must see that the flow has begun */
-	*state |= HT_ADDBA_REQUESTED_MSK;
+	tid_tx->addba_resp_timer.function = sta_addba_resp_timer_expired;
+	tid_tx->addba_resp_timer.data = (unsigned long)&sta->timer_to_tid[tid];
+	init_timer(&tid_tx->addba_resp_timer);
 
 	start_seq_num = sta->tid_seq[tid] >> 4;
 
 	ret = drv_ampdu_action(local, sdata, IEEE80211_AMPDU_TX_START,
 			       pubsta, tid, &start_seq_num);
-
 	if (ret) {
 #ifdef CONFIG_MAC80211_HT_DEBUG
 		printk(KERN_DEBUG "BA request denied - HW unavailable for"
 					" tid %d\n", tid);
 #endif /* CONFIG_MAC80211_HT_DEBUG */
-		*state = HT_AGG_STATE_IDLE;
 		goto err_free;
 	}
 
+	rcu_assign_pointer(sta->ampdu_mlme.tid_tx[tid], tid_tx);
+
 	/* Driver vetoed or OKed, but we can take packets again now */
 	ieee80211_wake_queue_by_reason(
 		&local->hw, ieee80211_ac_from_tid(tid),
@@ -325,32 +333,30 @@ int ieee80211_start_tx_ba_session(struct ieee80211_sta *pubsta, u16 tid)
 
 	spin_unlock(&local->ampdu_lock);
 
+	/* activate the timer for the recipient's addBA response */
+	tid_tx->addba_resp_timer.expires = jiffies + ADDBA_RESP_INTERVAL;
+	add_timer(&tid_tx->addba_resp_timer);
+#ifdef CONFIG_MAC80211_HT_DEBUG
+	printk(KERN_DEBUG "activated addBA response timer on tid %d\n", tid);
+#endif
+
 	/* prepare tid data */
 	sta->ampdu_mlme.dialog_token_allocator++;
-	sta->ampdu_mlme.tid_tx[tid]->dialog_token =
-			sta->ampdu_mlme.dialog_token_allocator;
-	sta->ampdu_mlme.tid_tx[tid]->ssn = start_seq_num;
+	tid_tx->dialog_token = sta->ampdu_mlme.dialog_token_allocator;
+	tid_tx->ssn = start_seq_num;
+
+	sta->ampdu_mlme.addba_req_num[tid]++;
 
 	spin_unlock_bh(&sta->lock);
 
 	/* send AddBA request */
 	ieee80211_send_addba_request(sdata, pubsta->addr, tid,
-			 sta->ampdu_mlme.tid_tx[tid]->dialog_token,
-			 sta->ampdu_mlme.tid_tx[tid]->ssn,
+			 tid_tx->dialog_token, tid_tx->ssn,
 			 0x40, 5000);
-	sta->ampdu_mlme.addba_req_num[tid]++;
-	/* activate the timer for the recipient's addBA response */
-	sta->ampdu_mlme.tid_tx[tid]->addba_resp_timer.expires =
-				jiffies + ADDBA_RESP_INTERVAL;
-	add_timer(&sta->ampdu_mlme.tid_tx[tid]->addba_resp_timer);
-#ifdef CONFIG_MAC80211_HT_DEBUG
-	printk(KERN_DEBUG "activated addBA response timer on tid %d\n", tid);
-#endif
 	return 0;
 
  err_free:
-	kfree(sta->ampdu_mlme.tid_tx[tid]);
-	sta->ampdu_mlme.tid_tx[tid] = NULL;
+	kfree(tid_tx);
  err_wake_queue:
 	ieee80211_wake_queue_by_reason(
 		&local->hw, ieee80211_ac_from_tid(tid),
@@ -368,7 +374,8 @@ EXPORT_SYMBOL(ieee80211_start_tx_ba_session);
  * local->ampdu_lock across both calls.
  */
 static void ieee80211_agg_splice_packets(struct ieee80211_local *local,
-					 struct sta_info *sta, u16 tid)
+					 struct tid_ampdu_tx *tid_tx,
+					 u16 tid)
 {
 	unsigned long flags;
 	u16 queue = ieee80211_ac_from_tid(tid);
@@ -377,31 +384,23 @@ static void ieee80211_agg_splice_packets(struct ieee80211_local *local,
 		&local->hw, queue,
 		IEEE80211_QUEUE_STOP_REASON_AGGREGATION);
 
-	if (!(sta->ampdu_mlme.tid_state_tx[tid] & HT_ADDBA_REQUESTED_MSK))
-		return;
-
-	if (WARN(!sta->ampdu_mlme.tid_tx[tid],
-		 "TID %d gone but expected when splicing aggregates from"
-		 "the pending queue\n", tid))
+	if (WARN(!tid_tx, "TID %d gone but expected when splicing aggregates"
+			  " from the pending queue\n", tid))
 		return;
 
-	if (!skb_queue_empty(&sta->ampdu_mlme.tid_tx[tid]->pending)) {
+	if (!skb_queue_empty(&tid_tx->pending)) {
 		spin_lock_irqsave(&local->queue_stop_reason_lock, flags);
 		/* copy over remaining packets */
-		skb_queue_splice_tail_init(
-			&sta->ampdu_mlme.tid_tx[tid]->pending,
-			&local->pending[queue]);
+		skb_queue_splice_tail_init(&tid_tx->pending,
+					   &local->pending[queue]);
 		spin_unlock_irqrestore(&local->queue_stop_reason_lock, flags);
 	}
 }
 
-static void ieee80211_agg_splice_finish(struct ieee80211_local *local,
-					struct sta_info *sta, u16 tid)
+static void ieee80211_agg_splice_finish(struct ieee80211_local *local, u16 tid)
 {
-	u16 queue = ieee80211_ac_from_tid(tid);
-
 	ieee80211_wake_queue_by_reason(
-		&local->hw, queue,
+		&local->hw, ieee80211_ac_from_tid(tid),
 		IEEE80211_QUEUE_STOP_REASON_AGGREGATION);
 }
 
@@ -409,19 +408,21 @@ static void ieee80211_agg_splice_finish(struct ieee80211_local *local,
 static void ieee80211_agg_tx_operational(struct ieee80211_local *local,
 					 struct sta_info *sta, u16 tid)
 {
+	lockdep_assert_held(&sta->lock);
+
 #ifdef CONFIG_MAC80211_HT_DEBUG
 	printk(KERN_DEBUG "Aggregation is on for tid %d\n", tid);
 #endif
 
 	spin_lock(&local->ampdu_lock);
-	ieee80211_agg_splice_packets(local, sta, tid);
+	ieee80211_agg_splice_packets(local, sta->ampdu_mlme.tid_tx[tid], tid);
 	/*
-	 * NB: we rely on sta->lock being taken in the TX
-	 * processing here when adding to the pending queue,
-	 * otherwise we could only change the state of the
-	 * session to OPERATIONAL _here_.
+	 * Now mark as operational. This will be visible
+	 * in the TX path, and lets it go lock-free in
+	 * the common case.
 	 */
-	ieee80211_agg_splice_finish(local, sta, tid);
+	set_bit(HT_AGG_STATE_OPERATIONAL, &sta->ampdu_mlme.tid_tx[tid]->state);
+	ieee80211_agg_splice_finish(local, tid);
 	spin_unlock(&local->ampdu_lock);
 
 	drv_ampdu_action(local, sta->sdata,
@@ -434,7 +435,7 @@ void ieee80211_start_tx_ba_cb(struct ieee80211_vif *vif, u8 *ra, u16 tid)
 	struct ieee80211_sub_if_data *sdata = vif_to_sdata(vif);
 	struct ieee80211_local *local = sdata->local;
 	struct sta_info *sta;
-	u8 *state;
+	struct tid_ampdu_tx *tid_tx;
 
 	trace_api_start_tx_ba_cb(sdata, ra, tid);
 
@@ -456,25 +457,22 @@ void ieee80211_start_tx_ba_cb(struct ieee80211_vif *vif, u8 *ra, u16 tid)
 		return;
 	}
 
-	state = &sta->ampdu_mlme.tid_state_tx[tid];
 	spin_lock_bh(&sta->lock);
+	tid_tx = sta->ampdu_mlme.tid_tx[tid];
 
-	if (WARN_ON(!(*state & HT_ADDBA_REQUESTED_MSK))) {
+	if (WARN_ON(!tid_tx)) {
 #ifdef CONFIG_MAC80211_HT_DEBUG
-		printk(KERN_DEBUG "addBA was not requested yet, state is %d\n",
-				*state);
+		printk(KERN_DEBUG "addBA was not requested!\n");
 #endif
 		spin_unlock_bh(&sta->lock);
 		rcu_read_unlock();
 		return;
 	}
 
-	if (WARN_ON(*state & HT_ADDBA_DRV_READY_MSK))
+	if (WARN_ON(test_and_set_bit(HT_AGG_STATE_DRV_READY, &tid_tx->state)))
 		goto out;
 
-	*state |= HT_ADDBA_DRV_READY_MSK;
-
-	if (*state == HT_AGG_STATE_OPERATIONAL)
+	if (test_bit(HT_AGG_STATE_RESPONSE_RECEIVED, &tid_tx->state))
 		ieee80211_agg_tx_operational(local, sta, tid);
 
  out:
@@ -512,14 +510,14 @@ EXPORT_SYMBOL(ieee80211_start_tx_ba_cb_irqsafe);
 int __ieee80211_stop_tx_ba_session(struct sta_info *sta, u16 tid,
 				   enum ieee80211_back_parties initiator)
 {
-	u8 *state;
+	struct tid_ampdu_tx *tid_tx;
 	int ret;
 
-	/* check if the TID is in aggregation */
-	state = &sta->ampdu_mlme.tid_state_tx[tid];
 	spin_lock_bh(&sta->lock);
+	tid_tx = sta->ampdu_mlme.tid_tx[tid];
 
-	if (*state != HT_AGG_STATE_OPERATIONAL) {
+	/* check if the TID is in aggregation */
+	if (!tid_tx || !test_bit(HT_AGG_STATE_OPERATIONAL, &tid_tx->state)) {
 		ret = -ENOENT;
 		goto unlock;
 	}
@@ -554,7 +552,7 @@ void ieee80211_stop_tx_ba_cb(struct ieee80211_vif *vif, u8 *ra, u8 tid)
 	struct ieee80211_sub_if_data *sdata = vif_to_sdata(vif);
 	struct ieee80211_local *local = sdata->local;
 	struct sta_info *sta;
-	u8 *state;
+	struct tid_ampdu_tx *tid_tx;
 
 	trace_api_stop_tx_ba_cb(sdata, ra, tid);
 
@@ -580,39 +578,45 @@ void ieee80211_stop_tx_ba_cb(struct ieee80211_vif *vif, u8 *ra, u8 tid)
 		rcu_read_unlock();
 		return;
 	}
-	state = &sta->ampdu_mlme.tid_state_tx[tid];
 
-	/* NOTE: no need to use sta->lock in this state check, as
-	 * ieee80211_stop_tx_ba_session will let only one stop call to
-	 * pass through per sta/tid
-	 */
-	if ((*state & HT_AGG_STATE_REQ_STOP_BA_MSK) == 0) {
+	spin_lock_bh(&sta->lock);
+	tid_tx = sta->ampdu_mlme.tid_tx[tid];
+
+	if (!tid_tx || !test_bit(HT_AGG_STATE_STOPPING, &tid_tx->state)) {
 #ifdef CONFIG_MAC80211_HT_DEBUG
 		printk(KERN_DEBUG "unexpected callback to A-MPDU stop\n");
 #endif
+		spin_unlock_bh(&sta->lock);
 		rcu_read_unlock();
 		return;
 	}
 
-	if (*state & HT_AGG_STATE_INITIATOR_MSK)
+	if (tid_tx->stop_initiator == WLAN_BACK_INITIATOR)
 		ieee80211_send_delba(sta->sdata, ra, tid,
 			WLAN_BACK_INITIATOR, WLAN_REASON_QSTA_NOT_USE);
 
-	spin_lock_bh(&sta->lock);
-	spin_lock(&local->ampdu_lock);
+	/*
+	 * When we get here, the TX path will not be lockless any more wrt.
+	 * aggregation, since the OPERATIONAL bit has long been cleared.
+	 * Thus it will block on getting the lock, if it occurs. So if we
+	 * stop the queue now, we will not get any more packets, and any
+	 * that might be being processed will wait for us here, thereby
+	 * guaranteeing that no packets go to the tid_tx pending queue any
+	 * more.
+	 */
 
-	ieee80211_agg_splice_packets(local, sta, tid);
+	spin_lock(&local->ampdu_lock);
+	ieee80211_agg_splice_packets(local, tid_tx, tid);
 
-	*state = HT_AGG_STATE_IDLE;
-	/* from now on packets are no longer put onto sta->pending */
-	kfree(sta->ampdu_mlme.tid_tx[tid]);
-	sta->ampdu_mlme.tid_tx[tid] = NULL;
+	/* future packets must not find the tid_tx struct any more */
+	rcu_assign_pointer(sta->ampdu_mlme.tid_tx[tid], NULL);
 
-	ieee80211_agg_splice_finish(local, sta, tid);
+	ieee80211_agg_splice_finish(local, tid);
 
+	call_rcu(&tid_tx->rcu_head, kfree_tid_tx);
 	spin_unlock(&local->ampdu_lock);
-	spin_unlock_bh(&sta->lock);
 
+	spin_unlock_bh(&sta->lock);
 	rcu_read_unlock();
 }
 EXPORT_SYMBOL(ieee80211_stop_tx_ba_cb);
@@ -649,40 +653,41 @@ void ieee80211_process_addba_resp(struct ieee80211_local *local,
 				  struct ieee80211_mgmt *mgmt,
 				  size_t len)
 {
+	struct tid_ampdu_tx *tid_tx;
 	u16 capab, tid;
-	u8 *state;
 
 	capab = le16_to_cpu(mgmt->u.action.u.addba_resp.capab);
 	tid = (capab & IEEE80211_ADDBA_PARAM_TID_MASK) >> 2;
 
-	state = &sta->ampdu_mlme.tid_state_tx[tid];
-
 	spin_lock_bh(&sta->lock);
 
-	if (!(*state & HT_ADDBA_REQUESTED_MSK))
+	tid_tx = sta->ampdu_mlme.tid_tx[tid];
+
+	if (!tid_tx)
 		goto out;
 
-	if (mgmt->u.action.u.addba_resp.dialog_token !=
-		sta->ampdu_mlme.tid_tx[tid]->dialog_token) {
+	if (mgmt->u.action.u.addba_resp.dialog_token != tid_tx->dialog_token) {
 #ifdef CONFIG_MAC80211_HT_DEBUG
 		printk(KERN_DEBUG "wrong addBA response token, tid %d\n", tid);
-#endif /* CONFIG_MAC80211_HT_DEBUG */
+#endif
 		goto out;
 	}
 
-	del_timer(&sta->ampdu_mlme.tid_tx[tid]->addba_resp_timer);
+	del_timer(&tid_tx->addba_resp_timer);
 
 #ifdef CONFIG_MAC80211_HT_DEBUG
 	printk(KERN_DEBUG "switched off addBA timer for tid %d\n", tid);
-#endif /* CONFIG_MAC80211_HT_DEBUG */
+#endif
 
 	if (le16_to_cpu(mgmt->u.action.u.addba_resp.status)
 			== WLAN_STATUS_SUCCESS) {
-		u8 curstate = *state;
-
-		*state |= HT_ADDBA_RECEIVED_MSK;
+		if (test_and_set_bit(HT_AGG_STATE_RESPONSE_RECEIVED,
+				     &tid_tx->state)) {
+			/* ignore duplicate response */
+			goto out;
+		}
 
-		if (*state != curstate && *state == HT_AGG_STATE_OPERATIONAL)
+		if (test_bit(HT_AGG_STATE_DRV_READY, &tid_tx->state))
 			ieee80211_agg_tx_operational(local, sta, tid);
 
 		sta->ampdu_mlme.addba_req_num[tid] = 0;
diff --git a/net/mac80211/debugfs_sta.c b/net/mac80211/debugfs_sta.c
index 95c1ea47ad35..8a74ffb36ad4 100644
--- a/net/mac80211/debugfs_sta.c
+++ b/net/mac80211/debugfs_sta.c
@@ -134,15 +134,15 @@ static ssize_t sta_agg_status_read(struct file *file, char __user *userbuf,
 				sta->ampdu_mlme.tid_rx[i]->ssn : 0);
 
 		p += scnprintf(p, sizeof(buf) + buf - p, "\t\t%x",
-				sta->ampdu_mlme.tid_state_tx[i]);
+				!!sta->ampdu_mlme.tid_tx[i]);
 		p += scnprintf(p, sizeof(buf) + buf - p, "\t%#.2x",
-				sta->ampdu_mlme.tid_state_tx[i] ?
+				sta->ampdu_mlme.tid_tx[i] ?
 				sta->ampdu_mlme.tid_tx[i]->dialog_token : 0);
 		p += scnprintf(p, sizeof(buf) + buf - p, "\t%#.3x",
-				sta->ampdu_mlme.tid_state_tx[i] ?
+				sta->ampdu_mlme.tid_tx[i] ?
 				sta->ampdu_mlme.tid_tx[i]->ssn : 0);
 		p += scnprintf(p, sizeof(buf) + buf - p, "\t%03d",
-				sta->ampdu_mlme.tid_state_tx[i] ?
+				sta->ampdu_mlme.tid_tx[i] ?
 				skb_queue_len(&sta->ampdu_mlme.tid_tx[i]->pending) : 0);
 		p += scnprintf(p, sizeof(buf) + buf - p, "\n");
 	}
diff --git a/net/mac80211/ht.c b/net/mac80211/ht.c
index 2ab106a0a491..1af173ed2d5e 100644
--- a/net/mac80211/ht.c
+++ b/net/mac80211/ht.c
@@ -176,13 +176,8 @@ void ieee80211_process_delba(struct ieee80211_sub_if_data *sdata,
 
 	if (initiator == WLAN_BACK_INITIATOR)
 		__ieee80211_stop_rx_ba_session(sta, tid, WLAN_BACK_INITIATOR, 0);
-	else { /* WLAN_BACK_RECIPIENT */
-		spin_lock_bh(&sta->lock);
-		if (sta->ampdu_mlme.tid_state_tx[tid] & HT_ADDBA_REQUESTED_MSK)
-			___ieee80211_stop_tx_ba_session(sta, tid,
-							WLAN_BACK_RECIPIENT);
-		spin_unlock_bh(&sta->lock);
-	}
+	else
+		__ieee80211_stop_tx_ba_session(sta, tid, WLAN_BACK_RECIPIENT);
 }
 
 int ieee80211_send_smps_action(struct ieee80211_sub_if_data *sdata,
diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index bafe610dcf77..71bdd8b3c3f4 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -1119,8 +1119,6 @@ void ieee80211_process_addba_request(struct ieee80211_local *local,
 
 int __ieee80211_stop_tx_ba_session(struct sta_info *sta, u16 tid,
 				   enum ieee80211_back_parties initiator);
-int ___ieee80211_stop_tx_ba_session(struct sta_info *sta, u16 tid,
-				    enum ieee80211_back_parties initiator);
 
 /* Spectrum management */
 void ieee80211_process_measurement_req(struct ieee80211_sub_if_data *sdata,
diff --git a/net/mac80211/rc80211_minstrel_ht.c b/net/mac80211/rc80211_minstrel_ht.c
index c23f08251da4..7a04951fcb1f 100644
--- a/net/mac80211/rc80211_minstrel_ht.c
+++ b/net/mac80211/rc80211_minstrel_ht.c
@@ -365,7 +365,7 @@ minstrel_aggr_check(struct minstrel_priv *mp, struct ieee80211_sta *pubsta, stru
 		return;
 
 	tid = *ieee80211_get_qos_ctl(hdr) & IEEE80211_QOS_CTL_TID_MASK;
-	if (likely(sta->ampdu_mlme.tid_state_tx[tid] != HT_AGG_STATE_IDLE))
+	if (likely(sta->ampdu_mlme.tid_tx[tid]))
 		return;
 
 	ieee80211_start_tx_ba_session(pubsta, tid);
diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c
index c426c572d984..06d8e00a2537 100644
--- a/net/mac80211/sta_info.c
+++ b/net/mac80211/sta_info.c
@@ -246,14 +246,12 @@ struct sta_info *sta_info_alloc(struct ieee80211_sub_if_data *sdata,
 	}
 
 	for (i = 0; i < STA_TID_NUM; i++) {
-		/* timer_to_tid must be initialized with identity mapping to
-		 * enable session_timer's data differentiation. refer to
-		 * sta_rx_agg_session_timer_expired for useage */
+		/*
+		 * timer_to_tid must be initialized with identity mapping
+		 * to enable session_timer's data differentiation. See
+		 * sta_rx_agg_session_timer_expired for usage.
+		 */
 		sta->timer_to_tid[i] = i;
-		/* tx */
-		sta->ampdu_mlme.tid_state_tx[i] = HT_AGG_STATE_IDLE;
-		sta->ampdu_mlme.tid_tx[i] = NULL;
-		sta->ampdu_mlme.addba_req_num[i] = 0;
 	}
 	skb_queue_head_init(&sta->ps_tx_buf);
 	skb_queue_head_init(&sta->tx_filtered);
diff --git a/net/mac80211/sta_info.h b/net/mac80211/sta_info.h
index 16864a6045b4..05278e6288c9 100644
--- a/net/mac80211/sta_info.h
+++ b/net/mac80211/sta_info.h
@@ -61,33 +61,40 @@ enum ieee80211_sta_info_flags {
 
 #define STA_TID_NUM 16
 #define ADDBA_RESP_INTERVAL HZ
-#define HT_AGG_MAX_RETRIES		(0x3)
+#define HT_AGG_MAX_RETRIES		0x3
 
-#define HT_AGG_STATE_INITIATOR_SHIFT	(4)
-
-#define HT_ADDBA_REQUESTED_MSK		BIT(0)
-#define HT_ADDBA_DRV_READY_MSK		BIT(1)
-#define HT_ADDBA_RECEIVED_MSK		BIT(2)
-#define HT_AGG_STATE_REQ_STOP_BA_MSK	BIT(3)
-#define HT_AGG_STATE_INITIATOR_MSK      BIT(HT_AGG_STATE_INITIATOR_SHIFT)
-#define HT_AGG_STATE_IDLE		(0x0)
-#define HT_AGG_STATE_OPERATIONAL	(HT_ADDBA_REQUESTED_MSK |	\
-					 HT_ADDBA_DRV_READY_MSK |	\
-					 HT_ADDBA_RECEIVED_MSK)
+#define HT_AGG_STATE_DRV_READY		0
+#define HT_AGG_STATE_RESPONSE_RECEIVED	1
+#define HT_AGG_STATE_OPERATIONAL	2
+#define HT_AGG_STATE_STOPPING		3
 
 /**
  * struct tid_ampdu_tx - TID aggregation information (Tx).
  *
+ * @rcu_head: rcu head for freeing structure
  * @addba_resp_timer: timer for peer's response to addba request
  * @pending: pending frames queue -- use sta's spinlock to protect
  * @ssn: Starting Sequence Number expected to be aggregated.
  * @dialog_token: dialog token for aggregation session
+ * @state: session state (see above)
+ * @stop_initiator: initiator of a session stop
+ *
+ * This structure is protected by RCU and the per-station
+ * spinlock. Assignments to the array holding it must hold
+ * the spinlock, only the TX path can access it under RCU
+ * lock-free if, and only if, the state has  the flag
+ * %HT_AGG_STATE_OPERATIONAL set. Otherwise, the TX path
+ * must also acquire the spinlock and re-check the state,
+ * see comments in the tx code touching it.
  */
 struct tid_ampdu_tx {
+	struct rcu_head rcu_head;
 	struct timer_list addba_resp_timer;
 	struct sk_buff_head pending;
+	unsigned long state;
 	u16 ssn;
 	u8 dialog_token;
+	u8 stop_initiator;
 };
 
 /**
@@ -129,7 +136,6 @@ struct tid_ampdu_rx {
  * struct sta_ampdu_mlme - STA aggregation information.
  *
  * @tid_rx: aggregation info for Rx per TID -- RCU protected
- * @tid_state_tx: TID's state in Tx session state machine.
  * @tid_tx: aggregation info for Tx per TID
  * @addba_req_num: number of times addBA request has been sent.
  * @dialog_token_allocator: dialog token enumerator for each new session;
@@ -138,7 +144,6 @@ struct sta_ampdu_mlme {
 	/* rx */
 	struct tid_ampdu_rx *tid_rx[STA_TID_NUM];
 	/* tx */
-	u8 tid_state_tx[STA_TID_NUM];
 	struct tid_ampdu_tx *tid_tx[STA_TID_NUM];
 	u8 addba_req_num[STA_TID_NUM];
 	u8 dialog_token_allocator;
diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
index 680bcb7093db..7bf1f9c9ea34 100644
--- a/net/mac80211/tx.c
+++ b/net/mac80211/tx.c
@@ -1092,6 +1092,54 @@ static bool __ieee80211_parse_tx_radiotap(struct ieee80211_tx_data *tx,
 	return true;
 }
 
+static bool ieee80211_tx_prep_agg(struct ieee80211_tx_data *tx,
+				  struct sk_buff *skb,
+				  struct ieee80211_tx_info *info,
+				  struct tid_ampdu_tx *tid_tx,
+				  int tid)
+{
+	bool queued = false;
+
+	if (test_bit(HT_AGG_STATE_OPERATIONAL, &tid_tx->state)) {
+		info->flags |= IEEE80211_TX_CTL_AMPDU;
+	} else {
+		spin_lock(&tx->sta->lock);
+		/*
+		 * Need to re-check now, because we may get here
+		 *
+		 *  1) in the window during which the setup is actually
+		 *     already done, but not marked yet because not all
+		 *     packets are spliced over to the driver pending
+		 *     queue yet -- if this happened we acquire the lock
+		 *     either before or after the splice happens, but
+		 *     need to recheck which of these cases happened.
+		 *
+		 *  2) during session teardown, if the OPERATIONAL bit
+		 *     was cleared due to the teardown but the pointer
+		 *     hasn't been assigned NULL yet (or we loaded it
+		 *     before it was assigned) -- in this case it may
+		 *     now be NULL which means we should just let the
+		 *     packet pass through because splicing the frames
+		 *     back is already done.
+		 */
+		tid_tx = tx->sta->ampdu_mlme.tid_tx[tid];
+
+		if (!tid_tx) {
+			/* do nothing, let packet pass through */
+		} else if (test_bit(HT_AGG_STATE_OPERATIONAL, &tid_tx->state)) {
+			info->flags |= IEEE80211_TX_CTL_AMPDU;
+		} else {
+			queued = true;
+			info->control.vif = &tx->sdata->vif;
+			info->flags |= IEEE80211_TX_INTFL_NEED_TXPROCESSING;
+			__skb_queue_tail(&tid_tx->pending, skb);
+		}
+		spin_unlock(&tx->sta->lock);
+	}
+
+	return queued;
+}
+
 /*
  * initialises @tx
  */
@@ -1104,8 +1152,7 @@ ieee80211_tx_prepare(struct ieee80211_sub_if_data *sdata,
 	struct ieee80211_hdr *hdr;
 	struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb);
 	int hdrlen, tid;
-	u8 *qc, *state;
-	bool queued = false;
+	u8 *qc;
 
 	memset(tx, 0, sizeof(*tx));
 	tx->skb = skb;
@@ -1157,35 +1204,16 @@ ieee80211_tx_prepare(struct ieee80211_sub_if_data *sdata,
 		qc = ieee80211_get_qos_ctl(hdr);
 		tid = *qc & IEEE80211_QOS_CTL_TID_MASK;
 
-		spin_lock(&tx->sta->lock);
-		/*
-		 * XXX: This spinlock could be fairly expensive, but see the
-		 *	comment in agg-tx.c:ieee80211_agg_tx_operational().
-		 *	One way to solve this would be to do something RCU-like
-		 *	for managing the tid_tx struct and using atomic bitops
-		 *	for the actual state -- by introducing an actual
-		 *	'operational' bit that would be possible. It would
-		 *	require changing ieee80211_agg_tx_operational() to
-		 *	set that bit, and changing the way tid_tx is managed
-		 *	everywhere, including races between that bit and
-		 *	tid_tx going away (tid_tx being added can be easily
-		 *	committed to memory before the 'operational' bit).
-		 */
-		tid_tx = tx->sta->ampdu_mlme.tid_tx[tid];
-		state = &tx->sta->ampdu_mlme.tid_state_tx[tid];
-		if (*state == HT_AGG_STATE_OPERATIONAL) {
-			info->flags |= IEEE80211_TX_CTL_AMPDU;
-		} else if (*state != HT_AGG_STATE_IDLE) {
-			/* in progress */
-			queued = true;
-			info->control.vif = &sdata->vif;
-			info->flags |= IEEE80211_TX_INTFL_NEED_TXPROCESSING;
-			__skb_queue_tail(&tid_tx->pending, skb);
-		}
-		spin_unlock(&tx->sta->lock);
+		tid_tx = rcu_dereference(tx->sta->ampdu_mlme.tid_tx[tid]);
+		if (tid_tx) {
+			bool queued;
 
-		if (unlikely(queued))
-			return TX_QUEUED;
+			queued = ieee80211_tx_prep_agg(tx, skb, info,
+						       tid_tx, tid);
+
+			if (unlikely(queued))
+				return TX_QUEUED;
+		}
 	}
 
 	if (is_multicast_ether_addr(hdr->addr1)) {
-- 
cgit v1.2.2


From 5d22c89b9bea17a0e48e7534a9b237885e2c0809 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Thu, 10 Jun 2010 10:21:40 +0200
Subject: mac80211: remove non-irqsafe aggregation callbacks

The non-irqsafe aggregation start/stop done
callbacks are currently only used by ath9k_htc,
and can cause callbacks into the driver again.
This might lead to locking issues, which will
only get worse as we modify locking. To avoid
trouble, remove the non-irqsafe versions and
change ath9k_htc to use those instead.

Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/agg-tx.c      | 2 --
 net/mac80211/ieee80211_i.h | 2 ++
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/agg-tx.c b/net/mac80211/agg-tx.c
index 7d8656d51c6b..5a7ef51e3020 100644
--- a/net/mac80211/agg-tx.c
+++ b/net/mac80211/agg-tx.c
@@ -479,7 +479,6 @@ void ieee80211_start_tx_ba_cb(struct ieee80211_vif *vif, u8 *ra, u16 tid)
 	spin_unlock_bh(&sta->lock);
 	rcu_read_unlock();
 }
-EXPORT_SYMBOL(ieee80211_start_tx_ba_cb);
 
 void ieee80211_start_tx_ba_cb_irqsafe(struct ieee80211_vif *vif,
 				      const u8 *ra, u16 tid)
@@ -619,7 +618,6 @@ void ieee80211_stop_tx_ba_cb(struct ieee80211_vif *vif, u8 *ra, u8 tid)
 	spin_unlock_bh(&sta->lock);
 	rcu_read_unlock();
 }
-EXPORT_SYMBOL(ieee80211_stop_tx_ba_cb);
 
 void ieee80211_stop_tx_ba_cb_irqsafe(struct ieee80211_vif *vif,
 				     const u8 *ra, u16 tid)
diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index 71bdd8b3c3f4..a3ae51308039 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -1119,6 +1119,8 @@ void ieee80211_process_addba_request(struct ieee80211_local *local,
 
 int __ieee80211_stop_tx_ba_session(struct sta_info *sta, u16 tid,
 				   enum ieee80211_back_parties initiator);
+void ieee80211_start_tx_ba_cb(struct ieee80211_vif *vif, u8 *ra, u16 tid);
+void ieee80211_stop_tx_ba_cb(struct ieee80211_vif *vif, u8 *ra, u8 tid);
 
 /* Spectrum management */
 void ieee80211_process_measurement_req(struct ieee80211_sub_if_data *sdata,
-- 
cgit v1.2.2


From a6a67db2bc89d2b1ff07e0817f11235c20d2c329 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Thu, 10 Jun 2010 10:21:41 +0200
Subject: mac80211: refcount aggregation queue stop

mac80211 currently maintains the ampdu_lock to
avoid starting a queue due to one aggregation
session while another aggregation session needs
the queue stopped.

We can do better, however, and instead refcount
the queue stops for this particular purpose,
thus removing the need for the lock. This will
help making ampdu_action able to sleep.

Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/agg-tx.c      | 77 ++++++++++++++++++++++++++++------------------
 net/mac80211/ieee80211_i.h |  8 +----
 net/mac80211/main.c        |  6 ++--
 3 files changed, 51 insertions(+), 40 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/agg-tx.c b/net/mac80211/agg-tx.c
index 5a7ef51e3020..7f2042d37904 100644
--- a/net/mac80211/agg-tx.c
+++ b/net/mac80211/agg-tx.c
@@ -220,6 +220,41 @@ static inline int ieee80211_ac_from_tid(int tid)
 	return ieee802_1d_to_ac[tid & 7];
 }
 
+/*
+ * When multiple aggregation sessions on multiple stations
+ * are being created/destroyed simultaneously, we need to
+ * refcount the global queue stop caused by that in order
+ * to not get into a situation where one of the aggregation
+ * setup or teardown re-enables queues before the other is
+ * ready to handle that.
+ *
+ * These two functions take care of this issue by keeping
+ * a global "agg_queue_stop" refcount.
+ */
+static void __acquires(agg_queue)
+ieee80211_stop_queue_agg(struct ieee80211_local *local, int tid)
+{
+	int queue = ieee80211_ac_from_tid(tid);
+
+	if (atomic_inc_return(&local->agg_queue_stop[queue]) == 1)
+		ieee80211_stop_queue_by_reason(
+			&local->hw, queue,
+			IEEE80211_QUEUE_STOP_REASON_AGGREGATION);
+	__acquire(agg_queue);
+}
+
+static void __releases(agg_queue)
+ieee80211_wake_queue_agg(struct ieee80211_local *local, int tid)
+{
+	int queue = ieee80211_ac_from_tid(tid);
+
+	if (atomic_dec_return(&local->agg_queue_stop[queue]) == 0)
+		ieee80211_wake_queue_by_reason(
+			&local->hw, queue,
+			IEEE80211_QUEUE_STOP_REASON_AGGREGATION);
+	__release(agg_queue);
+}
+
 int ieee80211_start_tx_ba_session(struct ieee80211_sta *pubsta, u16 tid)
 {
 	struct sta_info *sta = container_of(pubsta, struct sta_info, sta);
@@ -263,7 +298,6 @@ int ieee80211_start_tx_ba_session(struct ieee80211_sta *pubsta, u16 tid)
 	}
 
 	spin_lock_bh(&sta->lock);
-	spin_lock(&local->ampdu_lock);
 
 	/* we have tried too many times, receiver does not want A-MPDU */
 	if (sta->ampdu_mlme.addba_req_num[tid] > HT_AGG_MAX_RETRIES) {
@@ -289,9 +323,7 @@ int ieee80211_start_tx_ba_session(struct ieee80211_sta *pubsta, u16 tid)
 	 * which would require us to put them to the AC pending
 	 * afterwards which just makes the code more complex.
 	 */
-	ieee80211_stop_queue_by_reason(
-		&local->hw, ieee80211_ac_from_tid(tid),
-		IEEE80211_QUEUE_STOP_REASON_AGGREGATION);
+	ieee80211_stop_queue_agg(local, tid);
 
 	/* prepare A-MPDU MLME for Tx aggregation */
 	tid_tx = kzalloc(sizeof(struct tid_ampdu_tx), GFP_ATOMIC);
@@ -327,11 +359,7 @@ int ieee80211_start_tx_ba_session(struct ieee80211_sta *pubsta, u16 tid)
 	rcu_assign_pointer(sta->ampdu_mlme.tid_tx[tid], tid_tx);
 
 	/* Driver vetoed or OKed, but we can take packets again now */
-	ieee80211_wake_queue_by_reason(
-		&local->hw, ieee80211_ac_from_tid(tid),
-		IEEE80211_QUEUE_STOP_REASON_AGGREGATION);
-
-	spin_unlock(&local->ampdu_lock);
+	ieee80211_wake_queue_agg(local, tid);
 
 	/* activate the timer for the recipient's addBA response */
 	tid_tx->addba_resp_timer.expires = jiffies + ADDBA_RESP_INTERVAL;
@@ -358,11 +386,8 @@ int ieee80211_start_tx_ba_session(struct ieee80211_sta *pubsta, u16 tid)
  err_free:
 	kfree(tid_tx);
  err_wake_queue:
-	ieee80211_wake_queue_by_reason(
-		&local->hw, ieee80211_ac_from_tid(tid),
-		IEEE80211_QUEUE_STOP_REASON_AGGREGATION);
+	ieee80211_wake_queue_agg(local, tid);
  err_unlock_sta:
-	spin_unlock(&local->ampdu_lock);
 	spin_unlock_bh(&sta->lock);
 	return ret;
 }
@@ -370,19 +395,16 @@ EXPORT_SYMBOL(ieee80211_start_tx_ba_session);
 
 /*
  * splice packets from the STA's pending to the local pending,
- * requires a call to ieee80211_agg_splice_finish and holding
- * local->ampdu_lock across both calls.
+ * requires a call to ieee80211_agg_splice_finish later
  */
-static void ieee80211_agg_splice_packets(struct ieee80211_local *local,
-					 struct tid_ampdu_tx *tid_tx,
-					 u16 tid)
+static void __acquires(agg_queue)
+ieee80211_agg_splice_packets(struct ieee80211_local *local,
+			     struct tid_ampdu_tx *tid_tx, u16 tid)
 {
+	int queue = ieee80211_ac_from_tid(tid);
 	unsigned long flags;
-	u16 queue = ieee80211_ac_from_tid(tid);
 
-	ieee80211_stop_queue_by_reason(
-		&local->hw, queue,
-		IEEE80211_QUEUE_STOP_REASON_AGGREGATION);
+	ieee80211_stop_queue_agg(local, tid);
 
 	if (WARN(!tid_tx, "TID %d gone but expected when splicing aggregates"
 			  " from the pending queue\n", tid))
@@ -397,11 +419,10 @@ static void ieee80211_agg_splice_packets(struct ieee80211_local *local,
 	}
 }
 
-static void ieee80211_agg_splice_finish(struct ieee80211_local *local, u16 tid)
+static void __releases(agg_queue)
+ieee80211_agg_splice_finish(struct ieee80211_local *local, u16 tid)
 {
-	ieee80211_wake_queue_by_reason(
-		&local->hw, ieee80211_ac_from_tid(tid),
-		IEEE80211_QUEUE_STOP_REASON_AGGREGATION);
+	ieee80211_wake_queue_agg(local, tid);
 }
 
 /* caller must hold sta->lock */
@@ -414,7 +435,6 @@ static void ieee80211_agg_tx_operational(struct ieee80211_local *local,
 	printk(KERN_DEBUG "Aggregation is on for tid %d\n", tid);
 #endif
 
-	spin_lock(&local->ampdu_lock);
 	ieee80211_agg_splice_packets(local, sta->ampdu_mlme.tid_tx[tid], tid);
 	/*
 	 * Now mark as operational. This will be visible
@@ -423,7 +443,6 @@ static void ieee80211_agg_tx_operational(struct ieee80211_local *local,
 	 */
 	set_bit(HT_AGG_STATE_OPERATIONAL, &sta->ampdu_mlme.tid_tx[tid]->state);
 	ieee80211_agg_splice_finish(local, tid);
-	spin_unlock(&local->ampdu_lock);
 
 	drv_ampdu_action(local, sta->sdata,
 			 IEEE80211_AMPDU_TX_OPERATIONAL,
@@ -604,7 +623,6 @@ void ieee80211_stop_tx_ba_cb(struct ieee80211_vif *vif, u8 *ra, u8 tid)
 	 * more.
 	 */
 
-	spin_lock(&local->ampdu_lock);
 	ieee80211_agg_splice_packets(local, tid_tx, tid);
 
 	/* future packets must not find the tid_tx struct any more */
@@ -613,7 +631,6 @@ void ieee80211_stop_tx_ba_cb(struct ieee80211_vif *vif, u8 *ra, u8 tid)
 	ieee80211_agg_splice_finish(local, tid);
 
 	call_rcu(&tid_tx->rcu_head, kfree_tid_tx);
-	spin_unlock(&local->ampdu_lock);
 
 	spin_unlock_bh(&sta->lock);
 	rcu_read_unlock();
diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index a3ae51308039..8a91b5d83870 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -723,13 +723,7 @@ struct ieee80211_local {
 	struct sk_buff_head pending[IEEE80211_MAX_QUEUES];
 	struct tasklet_struct tx_pending_tasklet;
 
-	/*
-	 * This lock is used to prevent concurrent A-MPDU
-	 * session start/stop processing, this thus also
-	 * synchronises the ->ampdu_action() callback to
-	 * drivers and limits it to one at a time.
-	 */
-	spinlock_t ampdu_lock;
+	atomic_t agg_queue_stop[IEEE80211_MAX_QUEUES];
 
 	/* number of interfaces with corresponding IFF_ flags */
 	atomic_t iff_allmultis, iff_promiscs;
diff --git a/net/mac80211/main.c b/net/mac80211/main.c
index a2d10d44641e..c2e46e88f3c9 100644
--- a/net/mac80211/main.c
+++ b/net/mac80211/main.c
@@ -463,8 +463,10 @@ struct ieee80211_hw *ieee80211_alloc_hw(size_t priv_data_len,
 
 	sta_info_init(local);
 
-	for (i = 0; i < IEEE80211_MAX_QUEUES; i++)
+	for (i = 0; i < IEEE80211_MAX_QUEUES; i++) {
 		skb_queue_head_init(&local->pending[i]);
+		atomic_set(&local->agg_queue_stop[i], 0);
+	}
 	tasklet_init(&local->tx_pending_tasklet, ieee80211_tx_pending,
 		     (unsigned long)local);
 
@@ -475,8 +477,6 @@ struct ieee80211_hw *ieee80211_alloc_hw(size_t priv_data_len,
 	skb_queue_head_init(&local->skb_queue);
 	skb_queue_head_init(&local->skb_queue_unreliable);
 
-	spin_lock_init(&local->ampdu_lock);
-
 	return local_to_hw(local);
 }
 EXPORT_SYMBOL(ieee80211_alloc_hw);
-- 
cgit v1.2.2


From 0ab337032a0dfcd5f2527d3306d3deeba5f95b59 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Thu, 10 Jun 2010 10:21:42 +0200
Subject: mac80211: make TX aggregation start/stop request async

When the driver or rate control requests starting
or stopping an aggregation session, that currently
causes a direct callback into the driver, which
could potentially cause locking problems. Also,
the functions need to be callable from contexts
that cannot sleep, and thus will interfere with
making the ampdu_action callback sleeping.

To address these issues, add a new work item for
each station that will process any start or stop
requests out of line.

Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/agg-tx.c      | 177 ++++++++++++++++++++++++++++++++-------------
 net/mac80211/debugfs_sta.c |   5 +-
 net/mac80211/ht.c          |   2 +
 net/mac80211/ieee80211_i.h |   1 +
 net/mac80211/sta_info.c    |   1 +
 net/mac80211/sta_info.h    |   6 +-
 net/mac80211/tx.c          |   5 ++
 7 files changed, 139 insertions(+), 58 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/agg-tx.c b/net/mac80211/agg-tx.c
index 7f2042d37904..ee8bfe18b488 100644
--- a/net/mac80211/agg-tx.c
+++ b/net/mac80211/agg-tx.c
@@ -146,6 +146,13 @@ static int ___ieee80211_stop_tx_ba_session(
 	if (WARN_ON(!tid_tx))
 		return -ENOENT;
 
+	if (test_bit(HT_AGG_STATE_WANT_START, &tid_tx->state)) {
+		/* not even started yet! */
+		rcu_assign_pointer(sta->ampdu_mlme.tid_tx[tid], NULL);
+		call_rcu(&tid_tx->rcu_head, kfree_tid_tx);
+		return 0;
+	}
+
 #ifdef CONFIG_MAC80211_HT_DEBUG
 	printk(KERN_DEBUG "Tx BA session stop requested for %pM tid %u\n",
 	       sta->sta.addr, tid);
@@ -255,6 +262,94 @@ ieee80211_wake_queue_agg(struct ieee80211_local *local, int tid)
 	__release(agg_queue);
 }
 
+static void ieee80211_tx_ba_session_handle_start(struct sta_info *sta, int tid)
+{
+	struct tid_ampdu_tx *tid_tx = sta->ampdu_mlme.tid_tx[tid];
+	struct ieee80211_local *local = sta->local;
+	struct ieee80211_sub_if_data *sdata = sta->sdata;
+	u16 start_seq_num;
+	int ret;
+
+	/*
+	 * While we're asking the driver about the aggregation,
+	 * stop the AC queue so that we don't have to worry
+	 * about frames that came in while we were doing that,
+	 * which would require us to put them to the AC pending
+	 * afterwards which just makes the code more complex.
+	 */
+	ieee80211_stop_queue_agg(local, tid);
+
+	clear_bit(HT_AGG_STATE_WANT_START, &tid_tx->state);
+
+	/*
+	 * This might be off by one due to a race that we can't
+	 * really prevent here without synchronize_net() which
+	 * can't be called now.
+	 */
+	start_seq_num = sta->tid_seq[tid] >> 4;
+
+	ret = drv_ampdu_action(local, sdata, IEEE80211_AMPDU_TX_START,
+			       &sta->sta, tid, &start_seq_num);
+	if (ret) {
+#ifdef CONFIG_MAC80211_HT_DEBUG
+		printk(KERN_DEBUG "BA request denied - HW unavailable for"
+					" tid %d\n", tid);
+#endif
+		rcu_assign_pointer(sta->ampdu_mlme.tid_tx[tid], NULL);
+		ieee80211_wake_queue_agg(local, tid);
+		call_rcu(&tid_tx->rcu_head, kfree_tid_tx);
+		return;
+	}
+
+	/* we can take packets again now */
+	ieee80211_wake_queue_agg(local, tid);
+
+	/* activate the timer for the recipient's addBA response */
+	mod_timer(&tid_tx->addba_resp_timer, jiffies + ADDBA_RESP_INTERVAL);
+#ifdef CONFIG_MAC80211_HT_DEBUG
+	printk(KERN_DEBUG "activated addBA response timer on tid %d\n", tid);
+#endif
+
+	sta->ampdu_mlme.addba_req_num[tid]++;
+
+	/* send AddBA request */
+	ieee80211_send_addba_request(sdata, sta->sta.addr, tid,
+				     tid_tx->dialog_token, start_seq_num,
+				     0x40, 5000);
+}
+
+void ieee80211_tx_ba_session_work(struct work_struct *work)
+{
+	struct sta_info *sta =
+		container_of(work, struct sta_info, ampdu_mlme.work);
+	struct tid_ampdu_tx *tid_tx;
+	int tid;
+
+	/*
+	 * When this flag is set, new sessions should be
+	 * blocked, and existing sessions will be torn
+	 * down by the code that set the flag, so this
+	 * need not run.
+	 */
+	if (test_sta_flags(sta, WLAN_STA_BLOCK_BA))
+		return;
+
+	spin_lock_bh(&sta->lock);
+	for (tid = 0; tid < STA_TID_NUM; tid++) {
+		tid_tx = sta->ampdu_mlme.tid_tx[tid];
+		if (!tid_tx)
+			continue;
+
+		if (test_bit(HT_AGG_STATE_WANT_START, &tid_tx->state))
+			ieee80211_tx_ba_session_handle_start(sta, tid);
+		else if (test_and_clear_bit(HT_AGG_STATE_WANT_STOP,
+					    &tid_tx->state))
+			___ieee80211_stop_tx_ba_session(sta, tid,
+							WLAN_BACK_INITIATOR);
+	}
+	spin_unlock_bh(&sta->lock);
+}
+
 int ieee80211_start_tx_ba_session(struct ieee80211_sta *pubsta, u16 tid)
 {
 	struct sta_info *sta = container_of(pubsta, struct sta_info, sta);
@@ -262,7 +357,6 @@ int ieee80211_start_tx_ba_session(struct ieee80211_sta *pubsta, u16 tid)
 	struct ieee80211_local *local = sdata->local;
 	struct tid_ampdu_tx *tid_tx;
 	int ret = 0;
-	u16 start_seq_num;
 
 	trace_api_start_tx_ba_session(pubsta, tid);
 
@@ -316,15 +410,6 @@ int ieee80211_start_tx_ba_session(struct ieee80211_sta *pubsta, u16 tid)
 		goto err_unlock_sta;
 	}
 
-	/*
-	 * While we're asking the driver about the aggregation,
-	 * stop the AC queue so that we don't have to worry
-	 * about frames that came in while we were doing that,
-	 * which would require us to put them to the AC pending
-	 * afterwards which just makes the code more complex.
-	 */
-	ieee80211_stop_queue_agg(local, tid);
-
 	/* prepare A-MPDU MLME for Tx aggregation */
 	tid_tx = kzalloc(sizeof(struct tid_ampdu_tx), GFP_ATOMIC);
 	if (!tid_tx) {
@@ -334,59 +419,27 @@ int ieee80211_start_tx_ba_session(struct ieee80211_sta *pubsta, u16 tid)
 					tid);
 #endif
 		ret = -ENOMEM;
-		goto err_wake_queue;
+		goto err_unlock_sta;
 	}
 
 	skb_queue_head_init(&tid_tx->pending);
+	__set_bit(HT_AGG_STATE_WANT_START, &tid_tx->state);
 
 	/* Tx timer */
 	tid_tx->addba_resp_timer.function = sta_addba_resp_timer_expired;
 	tid_tx->addba_resp_timer.data = (unsigned long)&sta->timer_to_tid[tid];
 	init_timer(&tid_tx->addba_resp_timer);
 
-	start_seq_num = sta->tid_seq[tid] >> 4;
-
-	ret = drv_ampdu_action(local, sdata, IEEE80211_AMPDU_TX_START,
-			       pubsta, tid, &start_seq_num);
-	if (ret) {
-#ifdef CONFIG_MAC80211_HT_DEBUG
-		printk(KERN_DEBUG "BA request denied - HW unavailable for"
-					" tid %d\n", tid);
-#endif /* CONFIG_MAC80211_HT_DEBUG */
-		goto err_free;
-	}
-
-	rcu_assign_pointer(sta->ampdu_mlme.tid_tx[tid], tid_tx);
-
-	/* Driver vetoed or OKed, but we can take packets again now */
-	ieee80211_wake_queue_agg(local, tid);
-
-	/* activate the timer for the recipient's addBA response */
-	tid_tx->addba_resp_timer.expires = jiffies + ADDBA_RESP_INTERVAL;
-	add_timer(&tid_tx->addba_resp_timer);
-#ifdef CONFIG_MAC80211_HT_DEBUG
-	printk(KERN_DEBUG "activated addBA response timer on tid %d\n", tid);
-#endif
-
-	/* prepare tid data */
+	/* assign a dialog token */
 	sta->ampdu_mlme.dialog_token_allocator++;
 	tid_tx->dialog_token = sta->ampdu_mlme.dialog_token_allocator;
-	tid_tx->ssn = start_seq_num;
-
-	sta->ampdu_mlme.addba_req_num[tid]++;
 
-	spin_unlock_bh(&sta->lock);
+	/* finally, assign it to the array */
+	rcu_assign_pointer(sta->ampdu_mlme.tid_tx[tid], tid_tx);
 
-	/* send AddBA request */
-	ieee80211_send_addba_request(sdata, pubsta->addr, tid,
-			 tid_tx->dialog_token, tid_tx->ssn,
-			 0x40, 5000);
-	return 0;
+	ieee80211_queue_work(&local->hw, &sta->ampdu_mlme.work);
 
- err_free:
-	kfree(tid_tx);
- err_wake_queue:
-	ieee80211_wake_queue_agg(local, tid);
+	/* this flow continues off the work */
  err_unlock_sta:
 	spin_unlock_bh(&sta->lock);
 	return ret;
@@ -534,8 +587,7 @@ int __ieee80211_stop_tx_ba_session(struct sta_info *sta, u16 tid,
 	spin_lock_bh(&sta->lock);
 	tid_tx = sta->ampdu_mlme.tid_tx[tid];
 
-	/* check if the TID is in aggregation */
-	if (!tid_tx || !test_bit(HT_AGG_STATE_OPERATIONAL, &tid_tx->state)) {
+	if (!tid_tx) {
 		ret = -ENOENT;
 		goto unlock;
 	}
@@ -552,6 +604,8 @@ int ieee80211_stop_tx_ba_session(struct ieee80211_sta *pubsta, u16 tid)
 	struct sta_info *sta = container_of(pubsta, struct sta_info, sta);
 	struct ieee80211_sub_if_data *sdata = sta->sdata;
 	struct ieee80211_local *local = sdata->local;
+	struct tid_ampdu_tx *tid_tx;
+	int ret = 0;
 
 	trace_api_stop_tx_ba_session(pubsta, tid);
 
@@ -561,7 +615,26 @@ int ieee80211_stop_tx_ba_session(struct ieee80211_sta *pubsta, u16 tid)
 	if (tid >= STA_TID_NUM)
 		return -EINVAL;
 
-	return __ieee80211_stop_tx_ba_session(sta, tid, WLAN_BACK_INITIATOR);
+	spin_lock_bh(&sta->lock);
+	tid_tx = sta->ampdu_mlme.tid_tx[tid];
+
+	if (!tid_tx) {
+		ret = -ENOENT;
+		goto unlock;
+	}
+
+	if (test_bit(HT_AGG_STATE_STOPPING, &tid_tx->state)) {
+		/* already in progress stopping it */
+		ret = 0;
+		goto unlock;
+	}
+
+	set_bit(HT_AGG_STATE_WANT_STOP, &tid_tx->state);
+	ieee80211_queue_work(&local->hw, &sta->ampdu_mlme.work);
+
+ unlock:
+	spin_unlock_bh(&sta->lock);
+	return ret;
 }
 EXPORT_SYMBOL(ieee80211_stop_tx_ba_session);
 
diff --git a/net/mac80211/debugfs_sta.c b/net/mac80211/debugfs_sta.c
index 8a74ffb36ad4..76839d4dfaac 100644
--- a/net/mac80211/debugfs_sta.c
+++ b/net/mac80211/debugfs_sta.c
@@ -121,7 +121,7 @@ static ssize_t sta_agg_status_read(struct file *file, char __user *userbuf,
 	p += scnprintf(p, sizeof(buf) + buf - p, "next dialog_token: %#02x\n",
 			sta->ampdu_mlme.dialog_token_allocator + 1);
 	p += scnprintf(p, sizeof(buf) + buf - p,
-		       "TID\t\tRX active\tDTKN\tSSN\t\tTX\tDTKN\tSSN\tpending\n");
+		       "TID\t\tRX active\tDTKN\tSSN\t\tTX\tDTKN\tpending\n");
 	for (i = 0; i < STA_TID_NUM; i++) {
 		p += scnprintf(p, sizeof(buf) + buf - p, "%02d", i);
 		p += scnprintf(p, sizeof(buf) + buf - p, "\t\t%x",
@@ -138,9 +138,6 @@ static ssize_t sta_agg_status_read(struct file *file, char __user *userbuf,
 		p += scnprintf(p, sizeof(buf) + buf - p, "\t%#.2x",
 				sta->ampdu_mlme.tid_tx[i] ?
 				sta->ampdu_mlme.tid_tx[i]->dialog_token : 0);
-		p += scnprintf(p, sizeof(buf) + buf - p, "\t%#.3x",
-				sta->ampdu_mlme.tid_tx[i] ?
-				sta->ampdu_mlme.tid_tx[i]->ssn : 0);
 		p += scnprintf(p, sizeof(buf) + buf - p, "\t%03d",
 				sta->ampdu_mlme.tid_tx[i] ?
 				skb_queue_len(&sta->ampdu_mlme.tid_tx[i]->pending) : 0);
diff --git a/net/mac80211/ht.c b/net/mac80211/ht.c
index 1af173ed2d5e..4dfba7808a24 100644
--- a/net/mac80211/ht.c
+++ b/net/mac80211/ht.c
@@ -105,6 +105,8 @@ void ieee80211_sta_tear_down_BA_sessions(struct sta_info *sta)
 {
 	int i;
 
+	cancel_work_sync(&sta->ampdu_mlme.work);
+
 	for (i = 0; i <  STA_TID_NUM; i++) {
 		__ieee80211_stop_tx_ba_session(sta, i, WLAN_BACK_INITIATOR);
 		__ieee80211_stop_rx_ba_session(sta, i, WLAN_BACK_RECIPIENT,
diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index 8a91b5d83870..aec84c36c4c1 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -1115,6 +1115,7 @@ int __ieee80211_stop_tx_ba_session(struct sta_info *sta, u16 tid,
 				   enum ieee80211_back_parties initiator);
 void ieee80211_start_tx_ba_cb(struct ieee80211_vif *vif, u8 *ra, u16 tid);
 void ieee80211_stop_tx_ba_cb(struct ieee80211_vif *vif, u8 *ra, u8 tid);
+void ieee80211_tx_ba_session_work(struct work_struct *work);
 
 /* Spectrum management */
 void ieee80211_process_measurement_req(struct ieee80211_sub_if_data *sdata,
diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c
index 06d8e00a2537..8aa8558ba21e 100644
--- a/net/mac80211/sta_info.c
+++ b/net/mac80211/sta_info.c
@@ -235,6 +235,7 @@ struct sta_info *sta_info_alloc(struct ieee80211_sub_if_data *sdata,
 	spin_lock_init(&sta->lock);
 	spin_lock_init(&sta->flaglock);
 	INIT_WORK(&sta->drv_unblock_wk, sta_unblock);
+	INIT_WORK(&sta->ampdu_mlme.work, ieee80211_tx_ba_session_work);
 
 	memcpy(sta->sta.addr, addr, ETH_ALEN);
 	sta->local = local;
diff --git a/net/mac80211/sta_info.h b/net/mac80211/sta_info.h
index 05278e6288c9..040cbb0ac3af 100644
--- a/net/mac80211/sta_info.h
+++ b/net/mac80211/sta_info.h
@@ -67,6 +67,8 @@ enum ieee80211_sta_info_flags {
 #define HT_AGG_STATE_RESPONSE_RECEIVED	1
 #define HT_AGG_STATE_OPERATIONAL	2
 #define HT_AGG_STATE_STOPPING		3
+#define HT_AGG_STATE_WANT_START		4
+#define HT_AGG_STATE_WANT_STOP		5
 
 /**
  * struct tid_ampdu_tx - TID aggregation information (Tx).
@@ -74,7 +76,6 @@ enum ieee80211_sta_info_flags {
  * @rcu_head: rcu head for freeing structure
  * @addba_resp_timer: timer for peer's response to addba request
  * @pending: pending frames queue -- use sta's spinlock to protect
- * @ssn: Starting Sequence Number expected to be aggregated.
  * @dialog_token: dialog token for aggregation session
  * @state: session state (see above)
  * @stop_initiator: initiator of a session stop
@@ -92,7 +93,6 @@ struct tid_ampdu_tx {
 	struct timer_list addba_resp_timer;
 	struct sk_buff_head pending;
 	unsigned long state;
-	u16 ssn;
 	u8 dialog_token;
 	u8 stop_initiator;
 };
@@ -139,11 +139,13 @@ struct tid_ampdu_rx {
  * @tid_tx: aggregation info for Tx per TID
  * @addba_req_num: number of times addBA request has been sent.
  * @dialog_token_allocator: dialog token enumerator for each new session;
+ * @work: work struct for starting/stopping aggregation
  */
 struct sta_ampdu_mlme {
 	/* rx */
 	struct tid_ampdu_rx *tid_rx[STA_TID_NUM];
 	/* tx */
+	struct work_struct work;
 	struct tid_ampdu_tx *tid_tx[STA_TID_NUM];
 	u8 addba_req_num[STA_TID_NUM];
 	u8 dialog_token_allocator;
diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
index 7bf1f9c9ea34..698d4718b1a4 100644
--- a/net/mac80211/tx.c
+++ b/net/mac80211/tx.c
@@ -1102,6 +1102,11 @@ static bool ieee80211_tx_prep_agg(struct ieee80211_tx_data *tx,
 
 	if (test_bit(HT_AGG_STATE_OPERATIONAL, &tid_tx->state)) {
 		info->flags |= IEEE80211_TX_CTL_AMPDU;
+	} else if (test_bit(HT_AGG_STATE_WANT_START, &tid_tx->state)) {
+		/*
+		 * nothing -- this aggregation session is being started
+		 * but that might still fail with the driver
+		 */
 	} else {
 		spin_lock(&tx->sta->lock);
 		/*
-- 
cgit v1.2.2


From 67c282c00c9c06733aae229662d209957f6d23a7 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Thu, 10 Jun 2010 10:21:43 +0200
Subject: mac80211: move BA session work

Move the block-ack session works into common
code, since it will be needed for RX agg too
in the next patches.

Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/agg-tx.c      | 39 +++------------------------------------
 net/mac80211/ht.c          | 32 ++++++++++++++++++++++++++++++++
 net/mac80211/ieee80211_i.h |  5 ++++-
 net/mac80211/sta_info.c    |  2 +-
 4 files changed, 40 insertions(+), 38 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/agg-tx.c b/net/mac80211/agg-tx.c
index ee8bfe18b488..9a00a79a868b 100644
--- a/net/mac80211/agg-tx.c
+++ b/net/mac80211/agg-tx.c
@@ -133,9 +133,8 @@ static void kfree_tid_tx(struct rcu_head *rcu_head)
 	kfree(tid_tx);
 }
 
-static int ___ieee80211_stop_tx_ba_session(
-		struct sta_info *sta, u16 tid,
-		enum ieee80211_back_parties initiator)
+int ___ieee80211_stop_tx_ba_session(struct sta_info *sta, u16 tid,
+				    enum ieee80211_back_parties initiator)
 {
 	struct ieee80211_local *local = sta->local;
 	struct tid_ampdu_tx *tid_tx = sta->ampdu_mlme.tid_tx[tid];
@@ -262,7 +261,7 @@ ieee80211_wake_queue_agg(struct ieee80211_local *local, int tid)
 	__release(agg_queue);
 }
 
-static void ieee80211_tx_ba_session_handle_start(struct sta_info *sta, int tid)
+void ieee80211_tx_ba_session_handle_start(struct sta_info *sta, int tid)
 {
 	struct tid_ampdu_tx *tid_tx = sta->ampdu_mlme.tid_tx[tid];
 	struct ieee80211_local *local = sta->local;
@@ -318,38 +317,6 @@ static void ieee80211_tx_ba_session_handle_start(struct sta_info *sta, int tid)
 				     0x40, 5000);
 }
 
-void ieee80211_tx_ba_session_work(struct work_struct *work)
-{
-	struct sta_info *sta =
-		container_of(work, struct sta_info, ampdu_mlme.work);
-	struct tid_ampdu_tx *tid_tx;
-	int tid;
-
-	/*
-	 * When this flag is set, new sessions should be
-	 * blocked, and existing sessions will be torn
-	 * down by the code that set the flag, so this
-	 * need not run.
-	 */
-	if (test_sta_flags(sta, WLAN_STA_BLOCK_BA))
-		return;
-
-	spin_lock_bh(&sta->lock);
-	for (tid = 0; tid < STA_TID_NUM; tid++) {
-		tid_tx = sta->ampdu_mlme.tid_tx[tid];
-		if (!tid_tx)
-			continue;
-
-		if (test_bit(HT_AGG_STATE_WANT_START, &tid_tx->state))
-			ieee80211_tx_ba_session_handle_start(sta, tid);
-		else if (test_and_clear_bit(HT_AGG_STATE_WANT_STOP,
-					    &tid_tx->state))
-			___ieee80211_stop_tx_ba_session(sta, tid,
-							WLAN_BACK_INITIATOR);
-	}
-	spin_unlock_bh(&sta->lock);
-}
-
 int ieee80211_start_tx_ba_session(struct ieee80211_sta *pubsta, u16 tid)
 {
 	struct sta_info *sta = container_of(pubsta, struct sta_info, sta);
diff --git a/net/mac80211/ht.c b/net/mac80211/ht.c
index 4dfba7808a24..531a19d358df 100644
--- a/net/mac80211/ht.c
+++ b/net/mac80211/ht.c
@@ -114,6 +114,38 @@ void ieee80211_sta_tear_down_BA_sessions(struct sta_info *sta)
 	}
 }
 
+void ieee80211_ba_session_work(struct work_struct *work)
+{
+	struct sta_info *sta =
+		container_of(work, struct sta_info, ampdu_mlme.work);
+	struct tid_ampdu_tx *tid_tx;
+	int tid;
+
+	/*
+	 * When this flag is set, new sessions should be
+	 * blocked, and existing sessions will be torn
+	 * down by the code that set the flag, so this
+	 * need not run.
+	 */
+	if (test_sta_flags(sta, WLAN_STA_BLOCK_BA))
+		return;
+
+	spin_lock_bh(&sta->lock);
+	for (tid = 0; tid < STA_TID_NUM; tid++) {
+		tid_tx = sta->ampdu_mlme.tid_tx[tid];
+		if (!tid_tx)
+			continue;
+
+		if (test_bit(HT_AGG_STATE_WANT_START, &tid_tx->state))
+			ieee80211_tx_ba_session_handle_start(sta, tid);
+		else if (test_and_clear_bit(HT_AGG_STATE_WANT_STOP,
+					    &tid_tx->state))
+			___ieee80211_stop_tx_ba_session(sta, tid,
+							WLAN_BACK_INITIATOR);
+	}
+	spin_unlock_bh(&sta->lock);
+}
+
 void ieee80211_send_delba(struct ieee80211_sub_if_data *sdata,
 			  const u8 *da, u16 tid,
 			  u16 initiator, u16 reason_code)
diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index aec84c36c4c1..628a53db9f7e 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -1113,9 +1113,12 @@ void ieee80211_process_addba_request(struct ieee80211_local *local,
 
 int __ieee80211_stop_tx_ba_session(struct sta_info *sta, u16 tid,
 				   enum ieee80211_back_parties initiator);
+int ___ieee80211_stop_tx_ba_session(struct sta_info *sta, u16 tid,
+				    enum ieee80211_back_parties initiator);
 void ieee80211_start_tx_ba_cb(struct ieee80211_vif *vif, u8 *ra, u16 tid);
 void ieee80211_stop_tx_ba_cb(struct ieee80211_vif *vif, u8 *ra, u8 tid);
-void ieee80211_tx_ba_session_work(struct work_struct *work);
+void ieee80211_ba_session_work(struct work_struct *work);
+void ieee80211_tx_ba_session_handle_start(struct sta_info *sta, int tid);
 
 /* Spectrum management */
 void ieee80211_process_measurement_req(struct ieee80211_sub_if_data *sdata,
diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c
index 8aa8558ba21e..bd726c6204df 100644
--- a/net/mac80211/sta_info.c
+++ b/net/mac80211/sta_info.c
@@ -235,7 +235,7 @@ struct sta_info *sta_info_alloc(struct ieee80211_sub_if_data *sdata,
 	spin_lock_init(&sta->lock);
 	spin_lock_init(&sta->flaglock);
 	INIT_WORK(&sta->drv_unblock_wk, sta_unblock);
-	INIT_WORK(&sta->ampdu_mlme.work, ieee80211_tx_ba_session_work);
+	INIT_WORK(&sta->ampdu_mlme.work, ieee80211_ba_session_work);
 
 	memcpy(sta->sta.addr, addr, ETH_ALEN);
 	sta->local = local;
-- 
cgit v1.2.2


From 7c3b1dd8f483bf4bcb9776cd9cd3b37dd327d7ea Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Thu, 10 Jun 2010 10:21:44 +0200
Subject: mac80211: defer RX agg session teardown to work

Since we want the code to be able to sleep
in the future, it must not be called from
the timer directly. To prepare, move it out
into the aggregation work.

Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/agg-rx.c      | 24 ++++++++++--------------
 net/mac80211/ht.c          |  5 +++++
 net/mac80211/ieee80211_i.h |  2 ++
 net/mac80211/sta_info.h    |  3 +++
 4 files changed, 20 insertions(+), 14 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/agg-rx.c b/net/mac80211/agg-rx.c
index bbf36d980232..1c4320b01e3c 100644
--- a/net/mac80211/agg-rx.c
+++ b/net/mac80211/agg-rx.c
@@ -32,21 +32,18 @@ static void ieee80211_free_tid_rx(struct rcu_head *h)
 	kfree(tid_rx);
 }
 
-static void ___ieee80211_stop_rx_ba_session(struct sta_info *sta, u16 tid,
-					    u16 initiator, u16 reason,
-					    bool from_timer)
+void ___ieee80211_stop_rx_ba_session(struct sta_info *sta, u16 tid,
+				     u16 initiator, u16 reason)
 {
 	struct ieee80211_local *local = sta->local;
 	struct tid_ampdu_rx *tid_rx;
 
-	spin_lock_bh(&sta->lock);
+	lockdep_assert_held(&sta->lock);
 
 	tid_rx = sta->ampdu_mlme.tid_rx[tid];
 
-	if (!tid_rx) {
-		spin_unlock_bh(&sta->lock);
+	if (!tid_rx)
 		return;
-	}
 
 	rcu_assign_pointer(sta->ampdu_mlme.tid_rx[tid], NULL);
 
@@ -65,10 +62,7 @@ static void ___ieee80211_stop_rx_ba_session(struct sta_info *sta, u16 tid,
 		ieee80211_send_delba(sta->sdata, sta->sta.addr,
 				     tid, 0, reason);
 
-	spin_unlock_bh(&sta->lock);
-
-	if (!from_timer)
-		del_timer_sync(&tid_rx->session_timer);
+	del_timer_sync(&tid_rx->session_timer);
 
 	call_rcu(&tid_rx->rcu_head, ieee80211_free_tid_rx);
 }
@@ -76,7 +70,9 @@ static void ___ieee80211_stop_rx_ba_session(struct sta_info *sta, u16 tid,
 void __ieee80211_stop_rx_ba_session(struct sta_info *sta, u16 tid,
 				    u16 initiator, u16 reason)
 {
-	___ieee80211_stop_rx_ba_session(sta, tid, initiator, reason, false);
+	spin_lock_bh(&sta->lock);
+	___ieee80211_stop_rx_ba_session(sta, tid, initiator, reason);
+	spin_unlock_bh(&sta->lock);
 }
 
 /*
@@ -97,8 +93,8 @@ static void sta_rx_agg_session_timer_expired(unsigned long data)
 #ifdef CONFIG_MAC80211_HT_DEBUG
 	printk(KERN_DEBUG "rx session timer expired on tid %d\n", (u16)*ptid);
 #endif
-	___ieee80211_stop_rx_ba_session(sta, *ptid, WLAN_BACK_RECIPIENT,
-					WLAN_REASON_QSTA_TIMEOUT, true);
+	set_bit(*ptid, sta->ampdu_mlme.tid_rx_timer_expired);
+	ieee80211_queue_work(&sta->local->hw, &sta->ampdu_mlme.work);
 }
 
 static void ieee80211_send_addba_resp(struct ieee80211_sub_if_data *sdata, u8 *da, u16 tid,
diff --git a/net/mac80211/ht.c b/net/mac80211/ht.c
index 531a19d358df..730f8089678e 100644
--- a/net/mac80211/ht.c
+++ b/net/mac80211/ht.c
@@ -132,6 +132,11 @@ void ieee80211_ba_session_work(struct work_struct *work)
 
 	spin_lock_bh(&sta->lock);
 	for (tid = 0; tid < STA_TID_NUM; tid++) {
+		if (test_and_clear_bit(tid, sta->ampdu_mlme.tid_rx_timer_expired))
+			___ieee80211_stop_rx_ba_session(
+				sta, tid, WLAN_BACK_RECIPIENT,
+				WLAN_REASON_QSTA_TIMEOUT);
+
 		tid_tx = sta->ampdu_mlme.tid_tx[tid];
 		if (!tid_tx)
 			continue;
diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index 628a53db9f7e..9d753a02a2e4 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -1096,6 +1096,8 @@ int ieee80211_send_smps_action(struct ieee80211_sub_if_data *sdata,
 			       enum ieee80211_smps_mode smps, const u8 *da,
 			       const u8 *bssid);
 
+void ___ieee80211_stop_rx_ba_session(struct sta_info *sta, u16 tid,
+				     u16 initiator, u16 reason);
 void __ieee80211_stop_rx_ba_session(struct sta_info *sta, u16 tid,
 				    u16 initiator, u16 reason);
 void ieee80211_sta_tear_down_BA_sessions(struct sta_info *sta);
diff --git a/net/mac80211/sta_info.h b/net/mac80211/sta_info.h
index 040cbb0ac3af..500bafe0a0bb 100644
--- a/net/mac80211/sta_info.h
+++ b/net/mac80211/sta_info.h
@@ -140,10 +140,13 @@ struct tid_ampdu_rx {
  * @addba_req_num: number of times addBA request has been sent.
  * @dialog_token_allocator: dialog token enumerator for each new session;
  * @work: work struct for starting/stopping aggregation
+ * @tid_rx_timer_expired: bitmap indicating on which TIDs the
+ *	RX timer expired until the work for it runs
  */
 struct sta_ampdu_mlme {
 	/* rx */
 	struct tid_ampdu_rx *tid_rx[STA_TID_NUM];
+	unsigned long tid_rx_timer_expired[BITS_TO_LONGS(STA_TID_NUM)];
 	/* tx */
 	struct work_struct work;
 	struct tid_ampdu_tx *tid_tx[STA_TID_NUM];
-- 
cgit v1.2.2


From f955ebb44798e0058c987a0817810d2542e87349 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Thu, 10 Jun 2010 10:21:45 +0200
Subject: mac80211: fix RX aggregation timer

I noticed that when there was _no_ traffic at
all on a given aggregation session, it would
never time out. This won't happen unless you
forced creating a session, but fix it anyway.

Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/agg-rx.c | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'net')

diff --git a/net/mac80211/agg-rx.c b/net/mac80211/agg-rx.c
index 1c4320b01e3c..aa858a03951c 100644
--- a/net/mac80211/agg-rx.c
+++ b/net/mac80211/agg-rx.c
@@ -274,6 +274,10 @@ void ieee80211_process_addba_request(struct ieee80211_local *local,
 
 	/* activate it for RX */
 	rcu_assign_pointer(sta->ampdu_mlme.tid_rx[tid], tid_agg_rx);
+
+	if (timeout)
+		mod_timer(&tid_agg_rx->session_timer, TU_TO_EXP_TIME(timeout));
+
 end:
 	spin_unlock_bh(&sta->lock);
 
-- 
cgit v1.2.2


From a93e364430de7f7a5f4eedd604ad1ab3d825dde5 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Thu, 10 Jun 2010 10:21:46 +0200
Subject: mac80211: change RX aggregation locking

To prepare for allowing drivers to sleep in
ampdu_action, change the locking in the RX
aggregation code to use a mutex, so that it
would already allow drivers to sleep. But
explicitly disable BHs around the callback
for now since the TX part cannot yet sleep,
and drivers' locking might require it.

Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/agg-rx.c     | 12 ++++++------
 net/mac80211/driver-ops.h |  2 ++
 net/mac80211/ht.c         |  6 +++++-
 net/mac80211/iface.c      |  8 ++++----
 net/mac80211/sta_info.c   |  1 +
 net/mac80211/sta_info.h   |  3 +++
 6 files changed, 21 insertions(+), 11 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/agg-rx.c b/net/mac80211/agg-rx.c
index aa858a03951c..a843df26f384 100644
--- a/net/mac80211/agg-rx.c
+++ b/net/mac80211/agg-rx.c
@@ -6,7 +6,7 @@
  * Copyright 2005-2006, Devicescape Software, Inc.
  * Copyright 2006-2007	Jiri Benc <jbenc@suse.cz>
  * Copyright 2007, Michael Wu <flamingice@sourmilk.net>
- * Copyright 2007-2008, Intel Corporation
+ * Copyright 2007-2010, Intel Corporation
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 as
@@ -38,7 +38,7 @@ void ___ieee80211_stop_rx_ba_session(struct sta_info *sta, u16 tid,
 	struct ieee80211_local *local = sta->local;
 	struct tid_ampdu_rx *tid_rx;
 
-	lockdep_assert_held(&sta->lock);
+	lockdep_assert_held(&sta->ampdu_mlme.mtx);
 
 	tid_rx = sta->ampdu_mlme.tid_rx[tid];
 
@@ -70,9 +70,9 @@ void ___ieee80211_stop_rx_ba_session(struct sta_info *sta, u16 tid,
 void __ieee80211_stop_rx_ba_session(struct sta_info *sta, u16 tid,
 				    u16 initiator, u16 reason)
 {
-	spin_lock_bh(&sta->lock);
+	mutex_lock(&sta->ampdu_mlme.mtx);
 	___ieee80211_stop_rx_ba_session(sta, tid, initiator, reason);
-	spin_unlock_bh(&sta->lock);
+	mutex_unlock(&sta->ampdu_mlme.mtx);
 }
 
 /*
@@ -205,7 +205,7 @@ void ieee80211_process_addba_request(struct ieee80211_local *local,
 
 
 	/* examine state machine */
-	spin_lock_bh(&sta->lock);
+	mutex_lock(&sta->ampdu_mlme.mtx);
 
 	if (sta->ampdu_mlme.tid_rx[tid]) {
 #ifdef CONFIG_MAC80211_HT_DEBUG
@@ -279,7 +279,7 @@ void ieee80211_process_addba_request(struct ieee80211_local *local,
 		mod_timer(&tid_agg_rx->session_timer, TU_TO_EXP_TIME(timeout));
 
 end:
-	spin_unlock_bh(&sta->lock);
+	mutex_unlock(&sta->ampdu_mlme.mtx);
 
 end_no_lock:
 	ieee80211_send_addba_resp(sta->sdata, sta->sta.addr, tid,
diff --git a/net/mac80211/driver-ops.h b/net/mac80211/driver-ops.h
index 7d18a3245e3d..e5e7ef175ca2 100644
--- a/net/mac80211/driver-ops.h
+++ b/net/mac80211/driver-ops.h
@@ -349,9 +349,11 @@ static inline int drv_ampdu_action(struct ieee80211_local *local,
 				   u16 *ssn)
 {
 	int ret = -EOPNOTSUPP;
+	local_bh_disable();
 	if (local->ops->ampdu_action)
 		ret = local->ops->ampdu_action(&local->hw, &sdata->vif, action,
 					       sta, tid, ssn);
+	local_bh_enable();
 	trace_drv_ampdu_action(local, sdata, action, sta, tid, ssn, ret);
 	return ret;
 }
diff --git a/net/mac80211/ht.c b/net/mac80211/ht.c
index 730f8089678e..e29be64083c3 100644
--- a/net/mac80211/ht.c
+++ b/net/mac80211/ht.c
@@ -130,13 +130,17 @@ void ieee80211_ba_session_work(struct work_struct *work)
 	if (test_sta_flags(sta, WLAN_STA_BLOCK_BA))
 		return;
 
-	spin_lock_bh(&sta->lock);
+	mutex_lock(&sta->ampdu_mlme.mtx);
 	for (tid = 0; tid < STA_TID_NUM; tid++) {
 		if (test_and_clear_bit(tid, sta->ampdu_mlme.tid_rx_timer_expired))
 			___ieee80211_stop_rx_ba_session(
 				sta, tid, WLAN_BACK_RECIPIENT,
 				WLAN_REASON_QSTA_TIMEOUT);
+	}
+	mutex_unlock(&sta->ampdu_mlme.mtx);
 
+	spin_lock_bh(&sta->lock);
+	for (tid = 0; tid < STA_TID_NUM; tid++) {
 		tid_tx = sta->ampdu_mlme.tid_tx[tid];
 		if (!tid_tx)
 			continue;
diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c
index 9f00d3f174e4..490be2f3af27 100644
--- a/net/mac80211/iface.c
+++ b/net/mac80211/iface.c
@@ -740,7 +740,7 @@ static void ieee80211_iface_work(struct work_struct *work)
 			   mgmt->u.action.category == WLAN_CATEGORY_BACK) {
 			int len = skb->len;
 
-			rcu_read_lock();
+			mutex_lock(&local->sta_mtx);
 			sta = sta_info_get(sdata, mgmt->sa);
 			if (sta) {
 				switch (mgmt->u.action.u.addba_req.action_code) {
@@ -761,7 +761,7 @@ static void ieee80211_iface_work(struct work_struct *work)
 					break;
 				}
 			}
-			rcu_read_unlock();
+			mutex_unlock(&local->sta_mtx);
 		} else if (ieee80211_is_data_qos(mgmt->frame_control)) {
 			struct ieee80211_hdr *hdr = (void *)mgmt;
 			/*
@@ -781,7 +781,7 @@ static void ieee80211_iface_work(struct work_struct *work)
 			 * a block-ack session was active. That cannot be
 			 * right, so terminate the session.
 			 */
-			rcu_read_lock();
+			mutex_lock(&local->sta_mtx);
 			sta = sta_info_get(sdata, mgmt->sa);
 			if (sta) {
 				u16 tid = *ieee80211_get_qos_ctl(hdr) &
@@ -791,7 +791,7 @@ static void ieee80211_iface_work(struct work_struct *work)
 					sta, tid, WLAN_BACK_RECIPIENT,
 					WLAN_REASON_QSTA_REQUIRE_SETUP);
 			}
-			rcu_read_unlock();
+			mutex_unlock(&local->sta_mtx);
 		} else switch (sdata->vif.type) {
 		case NL80211_IFTYPE_STATION:
 			ieee80211_sta_rx_queued_mgmt(sdata, skb);
diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c
index bd726c6204df..f54d8ba7d788 100644
--- a/net/mac80211/sta_info.c
+++ b/net/mac80211/sta_info.c
@@ -236,6 +236,7 @@ struct sta_info *sta_info_alloc(struct ieee80211_sub_if_data *sdata,
 	spin_lock_init(&sta->flaglock);
 	INIT_WORK(&sta->drv_unblock_wk, sta_unblock);
 	INIT_WORK(&sta->ampdu_mlme.work, ieee80211_ba_session_work);
+	mutex_init(&sta->ampdu_mlme.mtx);
 
 	memcpy(sta->sta.addr, addr, ETH_ALEN);
 	sta->local = local;
diff --git a/net/mac80211/sta_info.h b/net/mac80211/sta_info.h
index 500bafe0a0bb..10d0fcb417ae 100644
--- a/net/mac80211/sta_info.h
+++ b/net/mac80211/sta_info.h
@@ -142,8 +142,11 @@ struct tid_ampdu_rx {
  * @work: work struct for starting/stopping aggregation
  * @tid_rx_timer_expired: bitmap indicating on which TIDs the
  *	RX timer expired until the work for it runs
+ * @mtx: mutex to protect all TX data (except non-NULL assignments
+ *	to tid_tx[idx], which are protected by the sta spinlock)
  */
 struct sta_ampdu_mlme {
+	struct mutex mtx;
 	/* rx */
 	struct tid_ampdu_rx *tid_rx[STA_TID_NUM];
 	unsigned long tid_rx_timer_expired[BITS_TO_LONGS(STA_TID_NUM)];
-- 
cgit v1.2.2


From 83a5cbf73a13d0c8be019b22afec4407e4285aed Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Thu, 10 Jun 2010 10:21:47 +0200
Subject: mac80211: defer TX agg session teardown to work

Since we want the code to be able to sleep
in the future, it must not be called from
the timer directly. To achieve that, simply
call the function drivers would call, and
also use RCU in the timer to get the struct
so we don't need to rely on the spinlock in
the future.

Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/agg-tx.c | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/agg-tx.c b/net/mac80211/agg-tx.c
index 9a00a79a868b..0026604cfe30 100644
--- a/net/mac80211/agg-tx.c
+++ b/net/mac80211/agg-tx.c
@@ -200,11 +200,11 @@ static void sta_addba_resp_timer_expired(unsigned long data)
 	struct tid_ampdu_tx *tid_tx;
 
 	/* check if the TID waits for addBA response */
-	spin_lock_bh(&sta->lock);
-	tid_tx = sta->ampdu_mlme.tid_tx[tid];
+	rcu_read_lock();
+	tid_tx = rcu_dereference(sta->ampdu_mlme.tid_tx[tid]);
 	if (!tid_tx ||
 	    test_bit(HT_AGG_STATE_RESPONSE_RECEIVED, &tid_tx->state)) {
-		spin_unlock_bh(&sta->lock);
+		rcu_read_unlock();
 #ifdef CONFIG_MAC80211_HT_DEBUG
 		printk(KERN_DEBUG "timer expired on tid %d but we are not "
 				"(or no longer) expecting addBA response there\n",
@@ -217,8 +217,8 @@ static void sta_addba_resp_timer_expired(unsigned long data)
 	printk(KERN_DEBUG "addBA response timer expired on tid %d\n", tid);
 #endif
 
-	___ieee80211_stop_tx_ba_session(sta, tid, WLAN_BACK_INITIATOR);
-	spin_unlock_bh(&sta->lock);
+	ieee80211_stop_tx_ba_session(&sta->sta, tid);
+	rcu_read_unlock();
 }
 
 static inline int ieee80211_ac_from_tid(int tid)
-- 
cgit v1.2.2


From cfcdbde35e2b621cf56bedc38a3a81e8c28addb9 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Thu, 10 Jun 2010 10:21:48 +0200
Subject: mac80211: change TX aggregation locking

To prepare for allowing drivers to sleep in
ampdu_action, change the locking in the TX
aggregation code to use the mutex the RX part
already uses. The spinlock is still necessary
around some code to avoid races with TX, but
now we can also synchronize_net() to avoid
getting an inconsistent sequence number.

Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/agg-tx.c     | 94 ++++++++++++++++++++++++++---------------------
 net/mac80211/driver-ops.h |  3 ++
 net/mac80211/ht.c         |  8 +---
 3 files changed, 58 insertions(+), 47 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/agg-tx.c b/net/mac80211/agg-tx.c
index 0026604cfe30..5dff73eebefb 100644
--- a/net/mac80211/agg-tx.c
+++ b/net/mac80211/agg-tx.c
@@ -6,7 +6,7 @@
  * Copyright 2005-2006, Devicescape Software, Inc.
  * Copyright 2006-2007	Jiri Benc <jbenc@suse.cz>
  * Copyright 2007, Michael Wu <flamingice@sourmilk.net>
- * Copyright 2007-2009, Intel Corporation
+ * Copyright 2007-2010, Intel Corporation
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 as
@@ -140,18 +140,23 @@ int ___ieee80211_stop_tx_ba_session(struct sta_info *sta, u16 tid,
 	struct tid_ampdu_tx *tid_tx = sta->ampdu_mlme.tid_tx[tid];
 	int ret;
 
-	lockdep_assert_held(&sta->lock);
+	lockdep_assert_held(&sta->ampdu_mlme.mtx);
 
-	if (WARN_ON(!tid_tx))
+	if (!tid_tx)
 		return -ENOENT;
 
+	spin_lock_bh(&sta->lock);
+
 	if (test_bit(HT_AGG_STATE_WANT_START, &tid_tx->state)) {
 		/* not even started yet! */
 		rcu_assign_pointer(sta->ampdu_mlme.tid_tx[tid], NULL);
+		spin_unlock_bh(&sta->lock);
 		call_rcu(&tid_tx->rcu_head, kfree_tid_tx);
 		return 0;
 	}
 
+	spin_unlock_bh(&sta->lock);
+
 #ifdef CONFIG_MAC80211_HT_DEBUG
 	printk(KERN_DEBUG "Tx BA session stop requested for %pM tid %u\n",
 	       sta->sta.addr, tid);
@@ -269,6 +274,8 @@ void ieee80211_tx_ba_session_handle_start(struct sta_info *sta, int tid)
 	u16 start_seq_num;
 	int ret;
 
+	lockdep_assert_held(&sta->ampdu_mlme.mtx);
+
 	/*
 	 * While we're asking the driver about the aggregation,
 	 * stop the AC queue so that we don't have to worry
@@ -281,10 +288,11 @@ void ieee80211_tx_ba_session_handle_start(struct sta_info *sta, int tid)
 	clear_bit(HT_AGG_STATE_WANT_START, &tid_tx->state);
 
 	/*
-	 * This might be off by one due to a race that we can't
-	 * really prevent here without synchronize_net() which
-	 * can't be called now.
+	 * make sure no packets are being processed to get
+	 * valid starting sequence number
 	 */
+	synchronize_net();
+
 	start_seq_num = sta->tid_seq[tid] >> 4;
 
 	ret = drv_ampdu_action(local, sdata, IEEE80211_AMPDU_TX_START,
@@ -294,7 +302,10 @@ void ieee80211_tx_ba_session_handle_start(struct sta_info *sta, int tid)
 		printk(KERN_DEBUG "BA request denied - HW unavailable for"
 					" tid %d\n", tid);
 #endif
+		spin_lock_bh(&sta->lock);
 		rcu_assign_pointer(sta->ampdu_mlme.tid_tx[tid], NULL);
+		spin_unlock_bh(&sta->lock);
+
 		ieee80211_wake_queue_agg(local, tid);
 		call_rcu(&tid_tx->rcu_head, kfree_tid_tx);
 		return;
@@ -309,7 +320,9 @@ void ieee80211_tx_ba_session_handle_start(struct sta_info *sta, int tid)
 	printk(KERN_DEBUG "activated addBA response timer on tid %d\n", tid);
 #endif
 
+	spin_lock_bh(&sta->lock);
 	sta->ampdu_mlme.addba_req_num[tid]++;
+	spin_unlock_bh(&sta->lock);
 
 	/* send AddBA request */
 	ieee80211_send_addba_request(sdata, sta->sta.addr, tid,
@@ -445,16 +458,25 @@ ieee80211_agg_splice_finish(struct ieee80211_local *local, u16 tid)
 	ieee80211_wake_queue_agg(local, tid);
 }
 
-/* caller must hold sta->lock */
 static void ieee80211_agg_tx_operational(struct ieee80211_local *local,
 					 struct sta_info *sta, u16 tid)
 {
-	lockdep_assert_held(&sta->lock);
+	lockdep_assert_held(&sta->ampdu_mlme.mtx);
 
 #ifdef CONFIG_MAC80211_HT_DEBUG
 	printk(KERN_DEBUG "Aggregation is on for tid %d\n", tid);
 #endif
 
+	drv_ampdu_action(local, sta->sdata,
+			 IEEE80211_AMPDU_TX_OPERATIONAL,
+			 &sta->sta, tid, NULL);
+
+	/*
+	 * synchronize with TX path, while splicing the TX path
+	 * should block so it won't put more packets onto pending.
+	 */
+	spin_lock_bh(&sta->lock);
+
 	ieee80211_agg_splice_packets(local, sta->ampdu_mlme.tid_tx[tid], tid);
 	/*
 	 * Now mark as operational. This will be visible
@@ -464,9 +486,7 @@ static void ieee80211_agg_tx_operational(struct ieee80211_local *local,
 	set_bit(HT_AGG_STATE_OPERATIONAL, &sta->ampdu_mlme.tid_tx[tid]->state);
 	ieee80211_agg_splice_finish(local, tid);
 
-	drv_ampdu_action(local, sta->sdata,
-			 IEEE80211_AMPDU_TX_OPERATIONAL,
-			 &sta->sta, tid, NULL);
+	spin_unlock_bh(&sta->lock);
 }
 
 void ieee80211_start_tx_ba_cb(struct ieee80211_vif *vif, u8 *ra, u16 tid)
@@ -486,37 +506,35 @@ void ieee80211_start_tx_ba_cb(struct ieee80211_vif *vif, u8 *ra, u16 tid)
 		return;
 	}
 
-	rcu_read_lock();
+	mutex_lock(&local->sta_mtx);
 	sta = sta_info_get(sdata, ra);
 	if (!sta) {
-		rcu_read_unlock();
+		mutex_unlock(&local->sta_mtx);
 #ifdef CONFIG_MAC80211_HT_DEBUG
 		printk(KERN_DEBUG "Could not find station: %pM\n", ra);
 #endif
 		return;
 	}
 
-	spin_lock_bh(&sta->lock);
+	mutex_lock(&sta->ampdu_mlme.mtx);
 	tid_tx = sta->ampdu_mlme.tid_tx[tid];
 
 	if (WARN_ON(!tid_tx)) {
 #ifdef CONFIG_MAC80211_HT_DEBUG
 		printk(KERN_DEBUG "addBA was not requested!\n");
 #endif
-		spin_unlock_bh(&sta->lock);
-		rcu_read_unlock();
-		return;
+		goto unlock;
 	}
 
 	if (WARN_ON(test_and_set_bit(HT_AGG_STATE_DRV_READY, &tid_tx->state)))
-		goto out;
+		goto unlock;
 
 	if (test_bit(HT_AGG_STATE_RESPONSE_RECEIVED, &tid_tx->state))
 		ieee80211_agg_tx_operational(local, sta, tid);
 
- out:
-	spin_unlock_bh(&sta->lock);
-	rcu_read_unlock();
+ unlock:
+	mutex_unlock(&sta->ampdu_mlme.mtx);
+	mutex_unlock(&local->sta_mtx);
 }
 
 void ieee80211_start_tx_ba_cb_irqsafe(struct ieee80211_vif *vif,
@@ -548,21 +566,14 @@ EXPORT_SYMBOL(ieee80211_start_tx_ba_cb_irqsafe);
 int __ieee80211_stop_tx_ba_session(struct sta_info *sta, u16 tid,
 				   enum ieee80211_back_parties initiator)
 {
-	struct tid_ampdu_tx *tid_tx;
 	int ret;
 
-	spin_lock_bh(&sta->lock);
-	tid_tx = sta->ampdu_mlme.tid_tx[tid];
-
-	if (!tid_tx) {
-		ret = -ENOENT;
-		goto unlock;
-	}
+	mutex_lock(&sta->ampdu_mlme.mtx);
 
 	ret = ___ieee80211_stop_tx_ba_session(sta, tid, initiator);
 
- unlock:
-	spin_unlock_bh(&sta->lock);
+	mutex_unlock(&sta->ampdu_mlme.mtx);
+
 	return ret;
 }
 
@@ -627,16 +638,17 @@ void ieee80211_stop_tx_ba_cb(struct ieee80211_vif *vif, u8 *ra, u8 tid)
 	       ra, tid);
 #endif /* CONFIG_MAC80211_HT_DEBUG */
 
-	rcu_read_lock();
+	mutex_lock(&local->sta_mtx);
+
 	sta = sta_info_get(sdata, ra);
 	if (!sta) {
 #ifdef CONFIG_MAC80211_HT_DEBUG
 		printk(KERN_DEBUG "Could not find station: %pM\n", ra);
 #endif
-		rcu_read_unlock();
-		return;
+		goto unlock;
 	}
 
+	mutex_lock(&sta->ampdu_mlme.mtx);
 	spin_lock_bh(&sta->lock);
 	tid_tx = sta->ampdu_mlme.tid_tx[tid];
 
@@ -644,9 +656,7 @@ void ieee80211_stop_tx_ba_cb(struct ieee80211_vif *vif, u8 *ra, u8 tid)
 #ifdef CONFIG_MAC80211_HT_DEBUG
 		printk(KERN_DEBUG "unexpected callback to A-MPDU stop\n");
 #endif
-		spin_unlock_bh(&sta->lock);
-		rcu_read_unlock();
-		return;
+		goto unlock_sta;
 	}
 
 	if (tid_tx->stop_initiator == WLAN_BACK_INITIATOR)
@@ -672,8 +682,11 @@ void ieee80211_stop_tx_ba_cb(struct ieee80211_vif *vif, u8 *ra, u8 tid)
 
 	call_rcu(&tid_tx->rcu_head, kfree_tid_tx);
 
+ unlock_sta:
 	spin_unlock_bh(&sta->lock);
-	rcu_read_unlock();
+	mutex_unlock(&sta->ampdu_mlme.mtx);
+ unlock:
+	mutex_unlock(&local->sta_mtx);
 }
 
 void ieee80211_stop_tx_ba_cb_irqsafe(struct ieee80211_vif *vif,
@@ -714,10 +727,9 @@ void ieee80211_process_addba_resp(struct ieee80211_local *local,
 	capab = le16_to_cpu(mgmt->u.action.u.addba_resp.capab);
 	tid = (capab & IEEE80211_ADDBA_PARAM_TID_MASK) >> 2;
 
-	spin_lock_bh(&sta->lock);
+	mutex_lock(&sta->ampdu_mlme.mtx);
 
 	tid_tx = sta->ampdu_mlme.tid_tx[tid];
-
 	if (!tid_tx)
 		goto out;
 
@@ -751,5 +763,5 @@ void ieee80211_process_addba_resp(struct ieee80211_local *local,
 	}
 
  out:
-	spin_unlock_bh(&sta->lock);
+	mutex_unlock(&sta->ampdu_mlme.mtx);
 }
diff --git a/net/mac80211/driver-ops.h b/net/mac80211/driver-ops.h
index e5e7ef175ca2..7e86c6f89be9 100644
--- a/net/mac80211/driver-ops.h
+++ b/net/mac80211/driver-ops.h
@@ -349,6 +349,9 @@ static inline int drv_ampdu_action(struct ieee80211_local *local,
 				   u16 *ssn)
 {
 	int ret = -EOPNOTSUPP;
+
+	might_sleep();
+
 	local_bh_disable();
 	if (local->ops->ampdu_action)
 		ret = local->ops->ampdu_action(&local->hw, &sdata->vif, action,
diff --git a/net/mac80211/ht.c b/net/mac80211/ht.c
index e29be64083c3..be928ef7ef51 100644
--- a/net/mac80211/ht.c
+++ b/net/mac80211/ht.c
@@ -6,7 +6,7 @@
  * Copyright 2005-2006, Devicescape Software, Inc.
  * Copyright 2006-2007	Jiri Benc <jbenc@suse.cz>
  * Copyright 2007, Michael Wu <flamingice@sourmilk.net>
- * Copyright 2007-2008, Intel Corporation
+ * Copyright 2007-2010, Intel Corporation
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 as
@@ -136,11 +136,7 @@ void ieee80211_ba_session_work(struct work_struct *work)
 			___ieee80211_stop_rx_ba_session(
 				sta, tid, WLAN_BACK_RECIPIENT,
 				WLAN_REASON_QSTA_TIMEOUT);
-	}
-	mutex_unlock(&sta->ampdu_mlme.mtx);
 
-	spin_lock_bh(&sta->lock);
-	for (tid = 0; tid < STA_TID_NUM; tid++) {
 		tid_tx = sta->ampdu_mlme.tid_tx[tid];
 		if (!tid_tx)
 			continue;
@@ -152,7 +148,7 @@ void ieee80211_ba_session_work(struct work_struct *work)
 			___ieee80211_stop_tx_ba_session(sta, tid,
 							WLAN_BACK_INITIATOR);
 	}
-	spin_unlock_bh(&sta->lock);
+	mutex_unlock(&sta->ampdu_mlme.mtx);
 }
 
 void ieee80211_send_delba(struct ieee80211_sub_if_data *sdata,
-- 
cgit v1.2.2


From 85ad181ea78861f69b007599cec9e6ba33fcdf8a Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Thu, 10 Jun 2010 10:21:49 +0200
Subject: mac80211: allow drivers to sleep in ampdu_action

Allow drivers to sleep, and indicate this in
the documentation. ath9k has some locking I
don't understand, so keep it safe and disable
BHs in it, all other drivers look fine with
the context change.

Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/driver-ops.h | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/driver-ops.h b/net/mac80211/driver-ops.h
index 7e86c6f89be9..a4fcbcc4f458 100644
--- a/net/mac80211/driver-ops.h
+++ b/net/mac80211/driver-ops.h
@@ -352,11 +352,10 @@ static inline int drv_ampdu_action(struct ieee80211_local *local,
 
 	might_sleep();
 
-	local_bh_disable();
 	if (local->ops->ampdu_action)
 		ret = local->ops->ampdu_action(&local->hw, &sdata->vif, action,
 					       sta, tid, ssn);
-	local_bh_enable();
+
 	trace_drv_ampdu_action(local, sdata, action, sta, tid, ssn, ret);
 	return ret;
 }
-- 
cgit v1.2.2


From 73a72a81d27b71f9ace31668d2dd7f3ac1c8228e Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Thu, 10 Jun 2010 10:21:50 +0200
Subject: mac80211: update aggregation documentation

Even before the recent changes, the documentation
for TX aggregation was somewhat out of date. Update
it and also add documentation for the RX side.

Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/agg-rx.c | 23 +++++++++++++++++++++++
 net/mac80211/agg-tx.c | 43 +++++++++++++++++++++++++++----------------
 2 files changed, 50 insertions(+), 16 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/agg-rx.c b/net/mac80211/agg-rx.c
index a843df26f384..965b272499fd 100644
--- a/net/mac80211/agg-rx.c
+++ b/net/mac80211/agg-rx.c
@@ -13,6 +13,29 @@
  * published by the Free Software Foundation.
  */
 
+/**
+ * DOC: RX A-MPDU aggregation
+ *
+ * Aggregation on the RX side requires only implementing the
+ * @ampdu_action callback that is invoked to start/stop any
+ * block-ack sessions for RX aggregation.
+ *
+ * When RX aggregation is started by the peer, the driver is
+ * notified via @ampdu_action function, with the
+ * %IEEE80211_AMPDU_RX_START action, and may reject the request
+ * in which case a negative response is sent to the peer, if it
+ * accepts it a positive response is sent.
+ *
+ * While the session is active, the device/driver are required
+ * to de-aggregate frames and pass them up one by one to mac80211,
+ * which will handle the reorder buffer.
+ *
+ * When the aggregation session is stopped again by the peer or
+ * ourselves, the driver's @ampdu_action function will be called
+ * with the action %IEEE80211_AMPDU_RX_STOP. In this case, the
+ * call must not fail.
+ */
+
 #include <linux/ieee80211.h>
 #include <linux/slab.h>
 #include <net/mac80211.h>
diff --git a/net/mac80211/agg-tx.c b/net/mac80211/agg-tx.c
index 5dff73eebefb..c893f236acea 100644
--- a/net/mac80211/agg-tx.c
+++ b/net/mac80211/agg-tx.c
@@ -21,28 +21,39 @@
 #include "wme.h"
 
 /**
- * DOC: TX aggregation
+ * DOC: TX A-MPDU aggregation
  *
  * Aggregation on the TX side requires setting the hardware flag
- * %IEEE80211_HW_AMPDU_AGGREGATION as well as, if present, the @ampdu_queues
- * hardware parameter to the number of hardware AMPDU queues. If there are no
- * hardware queues then the driver will (currently) have to do all frame
- * buffering.
+ * %IEEE80211_HW_AMPDU_AGGREGATION. The driver will then be handed
+ * packets with a flag indicating A-MPDU aggregation. The driver
+ * or device is responsible for actually aggregating the frames,
+ * as well as deciding how many and which to aggregate.
  *
- * When TX aggregation is started by some subsystem (usually the rate control
- * algorithm would be appropriate) by calling the
- * ieee80211_start_tx_ba_session() function, the driver will be notified via
- * its @ampdu_action function, with the %IEEE80211_AMPDU_TX_START action.
+ * When TX aggregation is started by some subsystem (usually the rate
+ * control algorithm would be appropriate) by calling the
+ * ieee80211_start_tx_ba_session() function, the driver will be
+ * notified via its @ampdu_action function, with the
+ * %IEEE80211_AMPDU_TX_START action.
  *
  * In response to that, the driver is later required to call the
- * ieee80211_start_tx_ba_cb() (or ieee80211_start_tx_ba_cb_irqsafe())
- * function, which will start the aggregation session.
+ * ieee80211_start_tx_ba_cb_irqsafe() function, which will really
+ * start the aggregation session after the peer has also responded.
+ * If the peer responds negatively, the session will be stopped
+ * again right away. Note that it is possible for the aggregation
+ * session to be stopped before the driver has indicated that it
+ * is done setting it up, in which case it must not indicate the
+ * setup completion.
  *
- * Similarly, when the aggregation session is stopped by
- * ieee80211_stop_tx_ba_session(), the driver's @ampdu_action function will
- * be called with the action %IEEE80211_AMPDU_TX_STOP. In this case, the
- * call must not fail, and the driver must later call ieee80211_stop_tx_ba_cb()
- * (or ieee80211_stop_tx_ba_cb_irqsafe()).
+ * Also note that, since we also need to wait for a response from
+ * the peer, the driver is notified of the completion of the
+ * handshake by the %IEEE80211_AMPDU_TX_OPERATIONAL action to the
+ * @ampdu_action callback.
+ *
+ * Similarly, when the aggregation session is stopped by the peer
+ * or something calling ieee80211_stop_tx_ba_session(), the driver's
+ * @ampdu_action function will be called with the action
+ * %IEEE80211_AMPDU_TX_STOP. In this case, the call must not fail,
+ * and the driver must later call ieee80211_stop_tx_ba_cb_irqsafe().
  */
 
 static void ieee80211_send_addba_request(struct ieee80211_sub_if_data *sdata,
-- 
cgit v1.2.2


From 8b58ff832df9222e1fb3c58ae7f745f95569b55e Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Thu, 10 Jun 2010 10:21:51 +0200
Subject: mac80211: fix mgmt frame accounting

The recent change to processing action frames from
the management frame queue had already broken action
frame accounting, and my rework didn't help either.
So add back accounting and simplify the code with a
label rather than duplicating it, and also add
accounting for management frames.

Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/rx.c | 25 +++++++++++++------------
 1 file changed, 13 insertions(+), 12 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c
index ee01daccacbb..1594ebe80a4f 100644
--- a/net/mac80211/rx.c
+++ b/net/mac80211/rx.c
@@ -1966,10 +1966,7 @@ ieee80211_rx_h_action(struct ieee80211_rx_data *rx)
 			goto invalid;
 		}
 
-		rx->skb->pkt_type = IEEE80211_SDATA_QUEUE_TYPE_FRAME;
-		skb_queue_tail(&sdata->skb_queue, rx->skb);
-		ieee80211_queue_work(&local->hw, &sdata->work);
-		return RX_QUEUED;
+		goto queue;
 	case WLAN_CATEGORY_SPECTRUM_MGMT:
 		if (local->hw.conf.channel->band != IEEE80211_BAND_5GHZ)
 			break;
@@ -1999,10 +1996,7 @@ ieee80211_rx_h_action(struct ieee80211_rx_data *rx)
 			if (memcmp(mgmt->bssid, sdata->u.mgd.bssid, ETH_ALEN))
 				break;
 
-			rx->skb->pkt_type = IEEE80211_SDATA_QUEUE_TYPE_FRAME;
-			skb_queue_tail(&sdata->skb_queue, rx->skb);
-			ieee80211_queue_work(&local->hw, &sdata->work);
-			return RX_QUEUED;
+			goto queue;
 		}
 		break;
 	case WLAN_CATEGORY_SA_QUERY:
@@ -2022,10 +2016,7 @@ ieee80211_rx_h_action(struct ieee80211_rx_data *rx)
 	case WLAN_CATEGORY_MESH_PATH_SEL:
 		if (!ieee80211_vif_is_mesh(&sdata->vif))
 			break;
-		rx->skb->pkt_type = IEEE80211_SDATA_QUEUE_TYPE_FRAME;
-		skb_queue_tail(&sdata->skb_queue, rx->skb);
-		ieee80211_queue_work(&local->hw, &sdata->work);
-		return RX_QUEUED;
+		goto queue;
 	}
 
  invalid:
@@ -2076,6 +2067,14 @@ ieee80211_rx_h_action(struct ieee80211_rx_data *rx)
 		rx->sta->rx_packets++;
 	dev_kfree_skb(rx->skb);
 	return RX_QUEUED;
+
+ queue:
+	rx->skb->pkt_type = IEEE80211_SDATA_QUEUE_TYPE_FRAME;
+	skb_queue_tail(&sdata->skb_queue, rx->skb);
+	ieee80211_queue_work(&local->hw, &sdata->work);
+	if (rx->sta)
+		rx->sta->rx_packets++;
+	return RX_QUEUED;
 }
 
 static ieee80211_rx_result debug_noinline
@@ -2131,6 +2130,8 @@ ieee80211_rx_h_mgmt(struct ieee80211_rx_data *rx)
 	rx->skb->pkt_type = IEEE80211_SDATA_QUEUE_TYPE_FRAME;
 	skb_queue_tail(&sdata->skb_queue, rx->skb);
 	ieee80211_queue_work(&rx->local->hw, &sdata->work);
+	if (rx->sta)
+		rx->sta->rx_packets++;
 
 	return RX_QUEUED;
 }
-- 
cgit v1.2.2


From 4efc76bdbf9bfd2b8624c4dd52b9d8ffed176b31 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Thu, 10 Jun 2010 10:56:20 +0200
Subject: mac80211: bracket driver tracing

Currently, driver tracing is sometimes invoked
after and sometimes before the actual driver
callback. This is fine as long as the driver
has no tracing itself, but as soon as it does
it gets confusing.

To make traces containing such information
easier to read, introduce a return tracer in
mac80211 that essentially brackets any driver
tracing, and invoke the real trace before the
driver's callback, only showing the return
value, if any, afterwards.

Since tracing records the process, there's no
problem with overlapping calls if that should
happen.

Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/driver-ops.h   |  89 ++++++++++++++-------
 net/mac80211/driver-trace.h | 189 +++++++++++++++++++++++---------------------
 2 files changed, 156 insertions(+), 122 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/driver-ops.h b/net/mac80211/driver-ops.h
index a4fcbcc4f458..965d64f68567 100644
--- a/net/mac80211/driver-ops.h
+++ b/net/mac80211/driver-ops.h
@@ -16,10 +16,11 @@ static inline int drv_start(struct ieee80211_local *local)
 
 	might_sleep();
 
+	trace_drv_start(local);
 	local->started = true;
 	smp_mb();
 	ret = local->ops->start(&local->hw);
-	trace_drv_start(local, ret);
+	trace_drv_return_int(local, ret);
 	return ret;
 }
 
@@ -27,8 +28,9 @@ static inline void drv_stop(struct ieee80211_local *local)
 {
 	might_sleep();
 
-	local->ops->stop(&local->hw);
 	trace_drv_stop(local);
+	local->ops->stop(&local->hw);
+	trace_drv_return_void(local);
 
 	/* sync away all work on the tasklet before clearing started */
 	tasklet_disable(&local->tasklet);
@@ -46,8 +48,9 @@ static inline int drv_add_interface(struct ieee80211_local *local,
 
 	might_sleep();
 
+	trace_drv_add_interface(local, vif_to_sdata(vif));
 	ret = local->ops->add_interface(&local->hw, vif);
-	trace_drv_add_interface(local, vif_to_sdata(vif), ret);
+	trace_drv_return_int(local, ret);
 	return ret;
 }
 
@@ -56,8 +59,9 @@ static inline void drv_remove_interface(struct ieee80211_local *local,
 {
 	might_sleep();
 
-	local->ops->remove_interface(&local->hw, vif);
 	trace_drv_remove_interface(local, vif_to_sdata(vif));
+	local->ops->remove_interface(&local->hw, vif);
+	trace_drv_return_void(local);
 }
 
 static inline int drv_config(struct ieee80211_local *local, u32 changed)
@@ -66,8 +70,9 @@ static inline int drv_config(struct ieee80211_local *local, u32 changed)
 
 	might_sleep();
 
+	trace_drv_config(local, changed);
 	ret = local->ops->config(&local->hw, changed);
-	trace_drv_config(local, changed, ret);
+	trace_drv_return_int(local, ret);
 	return ret;
 }
 
@@ -78,9 +83,10 @@ static inline void drv_bss_info_changed(struct ieee80211_local *local,
 {
 	might_sleep();
 
+	trace_drv_bss_info_changed(local, sdata, info, changed);
 	if (local->ops->bss_info_changed)
 		local->ops->bss_info_changed(&local->hw, &sdata->vif, info, changed);
-	trace_drv_bss_info_changed(local, sdata, info, changed);
+	trace_drv_return_void(local);
 }
 
 struct in_ifaddr;
@@ -92,11 +98,11 @@ static inline int drv_configure_arp_filter(struct ieee80211_local *local,
 
 	might_sleep();
 
+	trace_drv_configure_arp_filter(local, vif_to_sdata(vif));
 	if (local->ops->configure_arp_filter)
 		ret = local->ops->configure_arp_filter(&local->hw, vif,
 						       ifa_list);
-
-	trace_drv_configure_arp_filter(local, vif_to_sdata(vif), ifa_list, ret);
+	trace_drv_return_int(local, ret);
 	return ret;
 }
 
@@ -105,10 +111,12 @@ static inline u64 drv_prepare_multicast(struct ieee80211_local *local,
 {
 	u64 ret = 0;
 
+	trace_drv_prepare_multicast(local, mc_list->count);
+
 	if (local->ops->prepare_multicast)
 		ret = local->ops->prepare_multicast(&local->hw, mc_list);
 
-	trace_drv_prepare_multicast(local, mc_list->count, ret);
+	trace_drv_return_u64(local, ret);
 
 	return ret;
 }
@@ -120,19 +128,21 @@ static inline void drv_configure_filter(struct ieee80211_local *local,
 {
 	might_sleep();
 
-	local->ops->configure_filter(&local->hw, changed_flags, total_flags,
-				     multicast);
 	trace_drv_configure_filter(local, changed_flags, total_flags,
 				   multicast);
+	local->ops->configure_filter(&local->hw, changed_flags, total_flags,
+				     multicast);
+	trace_drv_return_void(local);
 }
 
 static inline int drv_set_tim(struct ieee80211_local *local,
 			      struct ieee80211_sta *sta, bool set)
 {
 	int ret = 0;
+	trace_drv_set_tim(local, sta, set);
 	if (local->ops->set_tim)
 		ret = local->ops->set_tim(&local->hw, sta, set);
-	trace_drv_set_tim(local, sta, set, ret);
+	trace_drv_return_int(local, ret);
 	return ret;
 }
 
@@ -146,8 +156,9 @@ static inline int drv_set_key(struct ieee80211_local *local,
 
 	might_sleep();
 
+	trace_drv_set_key(local, cmd, sdata, sta, key);
 	ret = local->ops->set_key(&local->hw, cmd, &sdata->vif, sta, key);
-	trace_drv_set_key(local, cmd, sdata, sta, key, ret);
+	trace_drv_return_int(local, ret);
 	return ret;
 }
 
@@ -162,10 +173,11 @@ static inline void drv_update_tkip_key(struct ieee80211_local *local,
 	if (sta)
 		ista = &sta->sta;
 
+	trace_drv_update_tkip_key(local, sdata, conf, ista, iv32);
 	if (local->ops->update_tkip_key)
 		local->ops->update_tkip_key(&local->hw, &sdata->vif, conf,
 					    ista, iv32, phase1key);
-	trace_drv_update_tkip_key(local, sdata, conf, ista, iv32);
+	trace_drv_return_void(local);
 }
 
 static inline int drv_hw_scan(struct ieee80211_local *local,
@@ -176,8 +188,9 @@ static inline int drv_hw_scan(struct ieee80211_local *local,
 
 	might_sleep();
 
+	trace_drv_hw_scan(local, sdata, req);
 	ret = local->ops->hw_scan(&local->hw, &sdata->vif, req);
-	trace_drv_hw_scan(local, sdata, req, ret);
+	trace_drv_return_int(local, ret);
 	return ret;
 }
 
@@ -185,18 +198,20 @@ static inline void drv_sw_scan_start(struct ieee80211_local *local)
 {
 	might_sleep();
 
+	trace_drv_sw_scan_start(local);
 	if (local->ops->sw_scan_start)
 		local->ops->sw_scan_start(&local->hw);
-	trace_drv_sw_scan_start(local);
+	trace_drv_return_void(local);
 }
 
 static inline void drv_sw_scan_complete(struct ieee80211_local *local)
 {
 	might_sleep();
 
+	trace_drv_sw_scan_complete(local);
 	if (local->ops->sw_scan_complete)
 		local->ops->sw_scan_complete(&local->hw);
-	trace_drv_sw_scan_complete(local);
+	trace_drv_return_void(local);
 }
 
 static inline int drv_get_stats(struct ieee80211_local *local,
@@ -228,9 +243,10 @@ static inline int drv_set_rts_threshold(struct ieee80211_local *local,
 
 	might_sleep();
 
+	trace_drv_set_rts_threshold(local, value);
 	if (local->ops->set_rts_threshold)
 		ret = local->ops->set_rts_threshold(&local->hw, value);
-	trace_drv_set_rts_threshold(local, value, ret);
+	trace_drv_return_int(local, ret);
 	return ret;
 }
 
@@ -240,12 +256,13 @@ static inline int drv_set_coverage_class(struct ieee80211_local *local,
 	int ret = 0;
 	might_sleep();
 
+	trace_drv_set_coverage_class(local, value);
 	if (local->ops->set_coverage_class)
 		local->ops->set_coverage_class(&local->hw, value);
 	else
 		ret = -EOPNOTSUPP;
 
-	trace_drv_set_coverage_class(local, value, ret);
+	trace_drv_return_int(local, ret);
 	return ret;
 }
 
@@ -254,9 +271,10 @@ static inline void drv_sta_notify(struct ieee80211_local *local,
 				  enum sta_notify_cmd cmd,
 				  struct ieee80211_sta *sta)
 {
+	trace_drv_sta_notify(local, sdata, cmd, sta);
 	if (local->ops->sta_notify)
 		local->ops->sta_notify(&local->hw, &sdata->vif, cmd, sta);
-	trace_drv_sta_notify(local, sdata, cmd, sta);
+	trace_drv_return_void(local);
 }
 
 static inline int drv_sta_add(struct ieee80211_local *local,
@@ -267,10 +285,11 @@ static inline int drv_sta_add(struct ieee80211_local *local,
 
 	might_sleep();
 
+	trace_drv_sta_add(local, sdata, sta);
 	if (local->ops->sta_add)
 		ret = local->ops->sta_add(&local->hw, &sdata->vif, sta);
 
-	trace_drv_sta_add(local, sdata, sta, ret);
+	trace_drv_return_int(local, ret);
 
 	return ret;
 }
@@ -281,10 +300,11 @@ static inline void drv_sta_remove(struct ieee80211_local *local,
 {
 	might_sleep();
 
+	trace_drv_sta_remove(local, sdata, sta);
 	if (local->ops->sta_remove)
 		local->ops->sta_remove(&local->hw, &sdata->vif, sta);
 
-	trace_drv_sta_remove(local, sdata, sta);
+	trace_drv_return_void(local);
 }
 
 static inline int drv_conf_tx(struct ieee80211_local *local, u16 queue,
@@ -294,9 +314,10 @@ static inline int drv_conf_tx(struct ieee80211_local *local, u16 queue,
 
 	might_sleep();
 
+	trace_drv_conf_tx(local, queue, params);
 	if (local->ops->conf_tx)
 		ret = local->ops->conf_tx(&local->hw, queue, params);
-	trace_drv_conf_tx(local, queue, params, ret);
+	trace_drv_return_int(local, ret);
 	return ret;
 }
 
@@ -306,9 +327,10 @@ static inline u64 drv_get_tsf(struct ieee80211_local *local)
 
 	might_sleep();
 
+	trace_drv_get_tsf(local);
 	if (local->ops->get_tsf)
 		ret = local->ops->get_tsf(&local->hw);
-	trace_drv_get_tsf(local, ret);
+	trace_drv_return_u64(local, ret);
 	return ret;
 }
 
@@ -316,18 +338,20 @@ static inline void drv_set_tsf(struct ieee80211_local *local, u64 tsf)
 {
 	might_sleep();
 
+	trace_drv_set_tsf(local, tsf);
 	if (local->ops->set_tsf)
 		local->ops->set_tsf(&local->hw, tsf);
-	trace_drv_set_tsf(local, tsf);
+	trace_drv_return_void(local);
 }
 
 static inline void drv_reset_tsf(struct ieee80211_local *local)
 {
 	might_sleep();
 
+	trace_drv_reset_tsf(local);
 	if (local->ops->reset_tsf)
 		local->ops->reset_tsf(&local->hw);
-	trace_drv_reset_tsf(local);
+	trace_drv_return_void(local);
 }
 
 static inline int drv_tx_last_beacon(struct ieee80211_local *local)
@@ -336,9 +360,10 @@ static inline int drv_tx_last_beacon(struct ieee80211_local *local)
 
 	might_sleep();
 
+	trace_drv_tx_last_beacon(local);
 	if (local->ops->tx_last_beacon)
 		ret = local->ops->tx_last_beacon(&local->hw);
-	trace_drv_tx_last_beacon(local, ret);
+	trace_drv_return_int(local, ret);
 	return ret;
 }
 
@@ -352,11 +377,14 @@ static inline int drv_ampdu_action(struct ieee80211_local *local,
 
 	might_sleep();
 
+	trace_drv_ampdu_action(local, sdata, action, sta, tid, ssn);
+
 	if (local->ops->ampdu_action)
 		ret = local->ops->ampdu_action(&local->hw, &sdata->vif, action,
 					       sta, tid, ssn);
 
-	trace_drv_ampdu_action(local, sdata, action, sta, tid, ssn, ret);
+	trace_drv_return_int(local, ret);
+
 	return ret;
 }
 
@@ -385,6 +413,7 @@ static inline void drv_flush(struct ieee80211_local *local, bool drop)
 	trace_drv_flush(local, drop);
 	if (local->ops->flush)
 		local->ops->flush(&local->hw, drop);
+	trace_drv_return_void(local);
 }
 
 static inline void drv_channel_switch(struct ieee80211_local *local,
@@ -392,9 +421,9 @@ static inline void drv_channel_switch(struct ieee80211_local *local,
 {
 	might_sleep();
 
-	local->ops->channel_switch(&local->hw, ch_switch);
-
 	trace_drv_channel_switch(local, ch_switch);
+	local->ops->channel_switch(&local->hw, ch_switch);
+	trace_drv_return_void(local);
 }
 
 #endif /* __MAC80211_DRIVER_OPS */
diff --git a/net/mac80211/driver-trace.h b/net/mac80211/driver-trace.h
index 6b90630151ab..06444ea67bc4 100644
--- a/net/mac80211/driver-trace.h
+++ b/net/mac80211/driver-trace.h
@@ -36,20 +36,58 @@ static inline void trace_ ## name(proto) {}
  * Tracing for driver callbacks.
  */
 
-TRACE_EVENT(drv_start,
-	TP_PROTO(struct ieee80211_local *local, int ret),
+TRACE_EVENT(drv_return_void,
+	TP_PROTO(struct ieee80211_local *local),
+	TP_ARGS(local),
+	TP_STRUCT__entry(
+		LOCAL_ENTRY
+	),
+	TP_fast_assign(
+		LOCAL_ASSIGN;
+	),
+	TP_printk(LOCAL_PR_FMT, LOCAL_PR_ARG)
+);
 
+TRACE_EVENT(drv_return_int,
+	TP_PROTO(struct ieee80211_local *local, int ret),
 	TP_ARGS(local, ret),
-
 	TP_STRUCT__entry(
 		LOCAL_ENTRY
 		__field(int, ret)
 	),
+	TP_fast_assign(
+		LOCAL_ASSIGN;
+		__entry->ret = ret;
+	),
+	TP_printk(LOCAL_PR_FMT " - %d", LOCAL_PR_ARG, __entry->ret)
+);
 
+TRACE_EVENT(drv_return_u64,
+	TP_PROTO(struct ieee80211_local *local, u64 ret),
+	TP_ARGS(local, ret),
+	TP_STRUCT__entry(
+		LOCAL_ENTRY
+		__field(u64, ret)
+	),
 	TP_fast_assign(
 		LOCAL_ASSIGN;
 		__entry->ret = ret;
 	),
+	TP_printk(LOCAL_PR_FMT " - %llu", LOCAL_PR_ARG, __entry->ret)
+);
+
+TRACE_EVENT(drv_start,
+	TP_PROTO(struct ieee80211_local *local),
+
+	TP_ARGS(local),
+
+	TP_STRUCT__entry(
+		LOCAL_ENTRY
+	),
+
+	TP_fast_assign(
+		LOCAL_ASSIGN;
+	),
 
 	TP_printk(
 		LOCAL_PR_FMT, LOCAL_PR_ARG
@@ -76,28 +114,25 @@ TRACE_EVENT(drv_stop,
 
 TRACE_EVENT(drv_add_interface,
 	TP_PROTO(struct ieee80211_local *local,
-		 struct ieee80211_sub_if_data *sdata,
-		 int ret),
+		 struct ieee80211_sub_if_data *sdata),
 
-	TP_ARGS(local, sdata, ret),
+	TP_ARGS(local, sdata),
 
 	TP_STRUCT__entry(
 		LOCAL_ENTRY
 		VIF_ENTRY
 		__array(char, addr, 6)
-		__field(int, ret)
 	),
 
 	TP_fast_assign(
 		LOCAL_ASSIGN;
 		VIF_ASSIGN;
 		memcpy(__entry->addr, sdata->vif.addr, 6);
-		__entry->ret = ret;
 	),
 
 	TP_printk(
-		LOCAL_PR_FMT  VIF_PR_FMT " addr:%pM ret:%d",
-		LOCAL_PR_ARG, VIF_PR_ARG, __entry->addr, __entry->ret
+		LOCAL_PR_FMT  VIF_PR_FMT " addr:%pM",
+		LOCAL_PR_ARG, VIF_PR_ARG, __entry->addr
 	)
 );
 
@@ -126,15 +161,13 @@ TRACE_EVENT(drv_remove_interface,
 
 TRACE_EVENT(drv_config,
 	TP_PROTO(struct ieee80211_local *local,
-		 u32 changed,
-		 int ret),
+		 u32 changed),
 
-	TP_ARGS(local, changed, ret),
+	TP_ARGS(local, changed),
 
 	TP_STRUCT__entry(
 		LOCAL_ENTRY
 		__field(u32, changed)
-		__field(int, ret)
 		__field(u32, flags)
 		__field(int, power_level)
 		__field(int, dynamic_ps_timeout)
@@ -150,7 +183,6 @@ TRACE_EVENT(drv_config,
 	TP_fast_assign(
 		LOCAL_ASSIGN;
 		__entry->changed = changed;
-		__entry->ret = ret;
 		__entry->flags = local->hw.conf.flags;
 		__entry->power_level = local->hw.conf.power_level;
 		__entry->dynamic_ps_timeout = local->hw.conf.dynamic_ps_timeout;
@@ -164,8 +196,8 @@ TRACE_EVENT(drv_config,
 	),
 
 	TP_printk(
-		LOCAL_PR_FMT " ch:%#x freq:%d ret:%d",
-		LOCAL_PR_ARG, __entry->changed, __entry->center_freq, __entry->ret
+		LOCAL_PR_FMT " ch:%#x freq:%d",
+		LOCAL_PR_ARG, __entry->changed, __entry->center_freq
 	)
 );
 
@@ -221,50 +253,44 @@ TRACE_EVENT(drv_bss_info_changed,
 
 TRACE_EVENT(drv_configure_arp_filter,
 	TP_PROTO(struct ieee80211_local *local,
-		 struct ieee80211_sub_if_data *sdata,
-		 struct in_ifaddr *ifa_list, int ret),
+		 struct ieee80211_sub_if_data *sdata),
 
-	TP_ARGS(local, sdata, ifa_list, ret),
+	TP_ARGS(local, sdata),
 
 	TP_STRUCT__entry(
 		LOCAL_ENTRY
 		VIF_ENTRY
-		__field(int, ret)
 	),
 
 	TP_fast_assign(
 		LOCAL_ASSIGN;
 		VIF_ASSIGN;
-		__entry->ret = ret;
 	),
 
 	TP_printk(
-		VIF_PR_FMT LOCAL_PR_FMT " ret:%d",
-		VIF_PR_ARG, LOCAL_PR_ARG, __entry->ret
+		VIF_PR_FMT LOCAL_PR_FMT,
+		VIF_PR_ARG, LOCAL_PR_ARG
 	)
 );
 
 TRACE_EVENT(drv_prepare_multicast,
-	TP_PROTO(struct ieee80211_local *local, int mc_count, u64 ret),
+	TP_PROTO(struct ieee80211_local *local, int mc_count),
 
-	TP_ARGS(local, mc_count, ret),
+	TP_ARGS(local, mc_count),
 
 	TP_STRUCT__entry(
 		LOCAL_ENTRY
 		__field(int, mc_count)
-		__field(u64, ret)
 	),
 
 	TP_fast_assign(
 		LOCAL_ASSIGN;
 		__entry->mc_count = mc_count;
-		__entry->ret = ret;
 	),
 
 	TP_printk(
-		LOCAL_PR_FMT " prepare mc (%d): %llx",
-		LOCAL_PR_ARG, __entry->mc_count,
-		(unsigned long long) __entry->ret
+		LOCAL_PR_FMT " prepare mc (%d)",
+		LOCAL_PR_ARG, __entry->mc_count
 	)
 );
 
@@ -298,27 +324,25 @@ TRACE_EVENT(drv_configure_filter,
 
 TRACE_EVENT(drv_set_tim,
 	TP_PROTO(struct ieee80211_local *local,
-		 struct ieee80211_sta *sta, bool set, int ret),
+		 struct ieee80211_sta *sta, bool set),
 
-	TP_ARGS(local, sta, set, ret),
+	TP_ARGS(local, sta, set),
 
 	TP_STRUCT__entry(
 		LOCAL_ENTRY
 		STA_ENTRY
 		__field(bool, set)
-		__field(int, ret)
 	),
 
 	TP_fast_assign(
 		LOCAL_ASSIGN;
 		STA_ASSIGN;
 		__entry->set = set;
-		__entry->ret = ret;
 	),
 
 	TP_printk(
-		LOCAL_PR_FMT STA_PR_FMT " set:%d ret:%d",
-		LOCAL_PR_ARG, STA_PR_FMT, __entry->set, __entry->ret
+		LOCAL_PR_FMT STA_PR_FMT " set:%d",
+		LOCAL_PR_ARG, STA_PR_FMT, __entry->set
 	)
 );
 
@@ -326,9 +350,9 @@ TRACE_EVENT(drv_set_key,
 	TP_PROTO(struct ieee80211_local *local,
 		 enum set_key_cmd cmd, struct ieee80211_sub_if_data *sdata,
 		 struct ieee80211_sta *sta,
-		 struct ieee80211_key_conf *key, int ret),
+		 struct ieee80211_key_conf *key),
 
-	TP_ARGS(local, cmd, sdata, sta, key, ret),
+	TP_ARGS(local, cmd, sdata, sta, key),
 
 	TP_STRUCT__entry(
 		LOCAL_ENTRY
@@ -338,7 +362,6 @@ TRACE_EVENT(drv_set_key,
 		__field(u8, hw_key_idx)
 		__field(u8, flags)
 		__field(s8, keyidx)
-		__field(int, ret)
 	),
 
 	TP_fast_assign(
@@ -349,12 +372,11 @@ TRACE_EVENT(drv_set_key,
 		__entry->flags = key->flags;
 		__entry->keyidx = key->keyidx;
 		__entry->hw_key_idx = key->hw_key_idx;
-		__entry->ret = ret;
 	),
 
 	TP_printk(
-		LOCAL_PR_FMT  VIF_PR_FMT  STA_PR_FMT " ret:%d",
-		LOCAL_PR_ARG, VIF_PR_ARG, STA_PR_ARG, __entry->ret
+		LOCAL_PR_FMT  VIF_PR_FMT  STA_PR_FMT,
+		LOCAL_PR_ARG, VIF_PR_ARG, STA_PR_ARG
 	)
 );
 
@@ -389,25 +411,23 @@ TRACE_EVENT(drv_update_tkip_key,
 TRACE_EVENT(drv_hw_scan,
 	TP_PROTO(struct ieee80211_local *local,
 		 struct ieee80211_sub_if_data *sdata,
-		 struct cfg80211_scan_request *req, int ret),
+		 struct cfg80211_scan_request *req),
 
-	TP_ARGS(local, sdata, req, ret),
+	TP_ARGS(local, sdata, req),
 
 	TP_STRUCT__entry(
 		LOCAL_ENTRY
 		VIF_ENTRY
-		__field(int, ret)
 	),
 
 	TP_fast_assign(
 		LOCAL_ASSIGN;
 		VIF_ASSIGN;
-		__entry->ret = ret;
 	),
 
 	TP_printk(
-		LOCAL_PR_FMT VIF_PR_FMT " ret:%d",
-		LOCAL_PR_ARG,VIF_PR_ARG, __entry->ret
+		LOCAL_PR_FMT VIF_PR_FMT,
+		LOCAL_PR_ARG,VIF_PR_ARG
 	)
 );
 
@@ -504,48 +524,44 @@ TRACE_EVENT(drv_get_tkip_seq,
 );
 
 TRACE_EVENT(drv_set_rts_threshold,
-	TP_PROTO(struct ieee80211_local *local, u32 value, int ret),
+	TP_PROTO(struct ieee80211_local *local, u32 value),
 
-	TP_ARGS(local, value, ret),
+	TP_ARGS(local, value),
 
 	TP_STRUCT__entry(
 		LOCAL_ENTRY
 		__field(u32, value)
-		__field(int, ret)
 	),
 
 	TP_fast_assign(
 		LOCAL_ASSIGN;
-		__entry->ret = ret;
 		__entry->value = value;
 	),
 
 	TP_printk(
-		LOCAL_PR_FMT " value:%d ret:%d",
-		LOCAL_PR_ARG, __entry->value, __entry->ret
+		LOCAL_PR_FMT " value:%d",
+		LOCAL_PR_ARG, __entry->value
 	)
 );
 
 TRACE_EVENT(drv_set_coverage_class,
-	TP_PROTO(struct ieee80211_local *local, u8 value, int ret),
+	TP_PROTO(struct ieee80211_local *local, u8 value),
 
-	TP_ARGS(local, value, ret),
+	TP_ARGS(local, value),
 
 	TP_STRUCT__entry(
 		LOCAL_ENTRY
 		__field(u8, value)
-		__field(int, ret)
 	),
 
 	TP_fast_assign(
 		LOCAL_ASSIGN;
-		__entry->ret = ret;
 		__entry->value = value;
 	),
 
 	TP_printk(
-		LOCAL_PR_FMT " value:%d ret:%d",
-		LOCAL_PR_ARG, __entry->value, __entry->ret
+		LOCAL_PR_FMT " value:%d",
+		LOCAL_PR_ARG, __entry->value
 	)
 );
 
@@ -580,27 +596,25 @@ TRACE_EVENT(drv_sta_notify,
 TRACE_EVENT(drv_sta_add,
 	TP_PROTO(struct ieee80211_local *local,
 		 struct ieee80211_sub_if_data *sdata,
-		 struct ieee80211_sta *sta, int ret),
+		 struct ieee80211_sta *sta),
 
-	TP_ARGS(local, sdata, sta, ret),
+	TP_ARGS(local, sdata, sta),
 
 	TP_STRUCT__entry(
 		LOCAL_ENTRY
 		VIF_ENTRY
 		STA_ENTRY
-		__field(int, ret)
 	),
 
 	TP_fast_assign(
 		LOCAL_ASSIGN;
 		VIF_ASSIGN;
 		STA_ASSIGN;
-		__entry->ret = ret;
 	),
 
 	TP_printk(
-		LOCAL_PR_FMT  VIF_PR_FMT  STA_PR_FMT " ret:%d",
-		LOCAL_PR_ARG, VIF_PR_ARG, STA_PR_ARG, __entry->ret
+		LOCAL_PR_FMT  VIF_PR_FMT  STA_PR_FMT,
+		LOCAL_PR_ARG, VIF_PR_ARG, STA_PR_ARG
 	)
 );
 
@@ -631,10 +645,9 @@ TRACE_EVENT(drv_sta_remove,
 
 TRACE_EVENT(drv_conf_tx,
 	TP_PROTO(struct ieee80211_local *local, u16 queue,
-		 const struct ieee80211_tx_queue_params *params,
-		 int ret),
+		 const struct ieee80211_tx_queue_params *params),
 
-	TP_ARGS(local, queue, params, ret),
+	TP_ARGS(local, queue, params),
 
 	TP_STRUCT__entry(
 		LOCAL_ENTRY
@@ -643,13 +656,11 @@ TRACE_EVENT(drv_conf_tx,
 		__field(u16, cw_min)
 		__field(u16, cw_max)
 		__field(u8, aifs)
-		__field(int, ret)
 	),
 
 	TP_fast_assign(
 		LOCAL_ASSIGN;
 		__entry->queue = queue;
-		__entry->ret = ret;
 		__entry->txop = params->txop;
 		__entry->cw_max = params->cw_max;
 		__entry->cw_min = params->cw_min;
@@ -657,29 +668,27 @@ TRACE_EVENT(drv_conf_tx,
 	),
 
 	TP_printk(
-		LOCAL_PR_FMT " queue:%d ret:%d",
-		LOCAL_PR_ARG, __entry->queue, __entry->ret
+		LOCAL_PR_FMT " queue:%d",
+		LOCAL_PR_ARG, __entry->queue
 	)
 );
 
 TRACE_EVENT(drv_get_tsf,
-	TP_PROTO(struct ieee80211_local *local, u64 ret),
+	TP_PROTO(struct ieee80211_local *local),
 
-	TP_ARGS(local, ret),
+	TP_ARGS(local),
 
 	TP_STRUCT__entry(
 		LOCAL_ENTRY
-		__field(u64, ret)
 	),
 
 	TP_fast_assign(
 		LOCAL_ASSIGN;
-		__entry->ret = ret;
 	),
 
 	TP_printk(
-		LOCAL_PR_FMT " ret:%llu",
-		LOCAL_PR_ARG, (unsigned long long)__entry->ret
+		LOCAL_PR_FMT,
+		LOCAL_PR_ARG
 	)
 );
 
@@ -723,23 +732,21 @@ TRACE_EVENT(drv_reset_tsf,
 );
 
 TRACE_EVENT(drv_tx_last_beacon,
-	TP_PROTO(struct ieee80211_local *local, int ret),
+	TP_PROTO(struct ieee80211_local *local),
 
-	TP_ARGS(local, ret),
+	TP_ARGS(local),
 
 	TP_STRUCT__entry(
 		LOCAL_ENTRY
-		__field(int, ret)
 	),
 
 	TP_fast_assign(
 		LOCAL_ASSIGN;
-		__entry->ret = ret;
 	),
 
 	TP_printk(
-		LOCAL_PR_FMT " ret:%d",
-		LOCAL_PR_ARG, __entry->ret
+		LOCAL_PR_FMT,
+		LOCAL_PR_ARG
 	)
 );
 
@@ -748,9 +755,9 @@ TRACE_EVENT(drv_ampdu_action,
 		 struct ieee80211_sub_if_data *sdata,
 		 enum ieee80211_ampdu_mlme_action action,
 		 struct ieee80211_sta *sta, u16 tid,
-		 u16 *ssn, int ret),
+		 u16 *ssn),
 
-	TP_ARGS(local, sdata, action, sta, tid, ssn, ret),
+	TP_ARGS(local, sdata, action, sta, tid, ssn),
 
 	TP_STRUCT__entry(
 		LOCAL_ENTRY
@@ -758,7 +765,6 @@ TRACE_EVENT(drv_ampdu_action,
 		__field(u32, action)
 		__field(u16, tid)
 		__field(u16, ssn)
-		__field(int, ret)
 		VIF_ENTRY
 	),
 
@@ -766,15 +772,14 @@ TRACE_EVENT(drv_ampdu_action,
 		LOCAL_ASSIGN;
 		VIF_ASSIGN;
 		STA_ASSIGN;
-		__entry->ret = ret;
 		__entry->action = action;
 		__entry->tid = tid;
 		__entry->ssn = ssn ? *ssn : 0;
 	),
 
 	TP_printk(
-		LOCAL_PR_FMT VIF_PR_FMT STA_PR_FMT " action:%d tid:%d ret:%d",
-		LOCAL_PR_ARG, VIF_PR_ARG, STA_PR_ARG, __entry->action, __entry->tid, __entry->ret
+		LOCAL_PR_FMT VIF_PR_FMT STA_PR_FMT " action:%d tid:%d",
+		LOCAL_PR_ARG, VIF_PR_ARG, STA_PR_ARG, __entry->action, __entry->tid
 	)
 );
 
-- 
cgit v1.2.2


From fbd2c8dcbc69616d2e15b8a269a86b3a05d45aea Mon Sep 17 00:00:00 2001
From: Teemu Paasikivi <ext-teemu.3.paasikivi@nokia.com>
Date: Mon, 14 Jun 2010 12:55:31 +0300
Subject: mac80211: Set basic rates while joining ibss network

This patch adds support to nl80211 and mac80211 to set basic rates when
joining/creating ibss network.

Original patch was posted by Johannes Berg on the linux-wireless posting list.

Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Signed-off-by: Teemu Paasikivi <ext-teemu.3.paasikivi@nokia.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/ibss.c        |  4 +++-
 net/mac80211/ieee80211_i.h |  2 ++
 net/wireless/nl80211.c     | 49 ++++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 54 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/mac80211/ibss.c b/net/mac80211/ibss.c
index bfd7286488c7..9f4e64ed8b83 100644
--- a/net/mac80211/ibss.c
+++ b/net/mac80211/ibss.c
@@ -172,6 +172,7 @@ static void __ieee80211_sta_join_ibss(struct ieee80211_sub_if_data *sdata,
 	rcu_assign_pointer(ifibss->presp, skb);
 
 	sdata->vif.bss_conf.beacon_int = beacon_int;
+	sdata->vif.bss_conf.basic_rates = basic_rates;
 	bss_change = BSS_CHANGED_BEACON_INT;
 	bss_change |= ieee80211_reset_erp_info(sdata);
 	bss_change |= BSS_CHANGED_BSSID;
@@ -529,7 +530,7 @@ static void ieee80211_sta_create_ibss(struct ieee80211_sub_if_data *sdata)
 		sdata->drop_unencrypted = 0;
 
 	__ieee80211_sta_join_ibss(sdata, bssid, sdata->vif.bss_conf.beacon_int,
-				  ifibss->channel, 3, /* first two are basic */
+				  ifibss->channel, ifibss->basic_rates,
 				  capability, 0);
 }
 
@@ -859,6 +860,7 @@ int ieee80211_ibss_join(struct ieee80211_sub_if_data *sdata,
 		sdata->u.ibss.fixed_bssid = false;
 
 	sdata->u.ibss.privacy = params->privacy;
+	sdata->u.ibss.basic_rates = params->basic_rates;
 
 	sdata->vif.bss_conf.beacon_int = params->beacon_interval;
 
diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index 9d753a02a2e4..c3c2be3f8a2c 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -387,6 +387,8 @@ struct ieee80211_if_ibss {
 	unsigned long request;
 	unsigned long last_scan_completed;
 
+	u32 basic_rates;
+
 	bool timer_running;
 
 	bool fixed_bssid;
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index c65e67e9231c..41529aca794c 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -3955,6 +3955,55 @@ static int nl80211_join_ibss(struct sk_buff *skb, struct genl_info *info)
 		}
 	}
 
+	if (info->attrs[NL80211_ATTR_BSS_BASIC_RATES]) {
+		u8 *rates =
+			nla_data(info->attrs[NL80211_ATTR_BSS_BASIC_RATES]);
+		int n_rates =
+			nla_len(info->attrs[NL80211_ATTR_BSS_BASIC_RATES]);
+		struct ieee80211_supported_band *sband =
+			wiphy->bands[ibss.channel->band];
+		int i, j;
+
+		if (n_rates == 0) {
+			err = -EINVAL;
+			goto out;
+		}
+
+		for (i = 0; i < n_rates; i++) {
+			int rate = (rates[i] & 0x7f) * 5;
+			bool found = false;
+
+			for (j = 0; j < sband->n_bitrates; j++) {
+				if (sband->bitrates[j].bitrate == rate) {
+					found = true;
+					ibss.basic_rates |= BIT(j);
+					break;
+				}
+			}
+			if (!found) {
+				err = -EINVAL;
+				goto out;
+			}
+		}
+	} else {
+		/*
+		* If no rates were explicitly configured,
+		* use the mandatory rate set for 11b or
+		* 11a for maximum compatibility.
+		*/
+		struct ieee80211_supported_band *sband =
+			wiphy->bands[ibss.channel->band];
+		int j;
+		u32 flag = ibss.channel->band == IEEE80211_BAND_5GHZ ?
+			IEEE80211_RATE_MANDATORY_A :
+			IEEE80211_RATE_MANDATORY_B;
+
+		for (j = 0; j < sband->n_bitrates; j++) {
+			if (sband->bitrates[j].flags & flag)
+				ibss.basic_rates |= BIT(j);
+		}
+	}
+
 	err = cfg80211_join_ibss(rdev, dev, &ibss, connkeys);
 
 out:
-- 
cgit v1.2.2


From 392cfdb10dab6c7dfa5fed18d8a44d7453d42196 Mon Sep 17 00:00:00 2001
From: Teemu Paasikivi <ext-teemu.3.paasikivi@nokia.com>
Date: Mon, 14 Jun 2010 12:55:32 +0300
Subject: mac80211: Set changed basic rates flag

Add changed basic rates flag to bss_changed while joinig ibss network.

This patch is split from the patch containing support for setting basic
rates when creating ibss network. Original patch was posted by Johannes
Berg on the linux-wireless posting list.

Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Signed-off-by: Teemu Paasikivi <ext-teemu.3.paasikivi@nokia.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/ibss.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'net')

diff --git a/net/mac80211/ibss.c b/net/mac80211/ibss.c
index 9f4e64ed8b83..93481eb51d24 100644
--- a/net/mac80211/ibss.c
+++ b/net/mac80211/ibss.c
@@ -178,6 +178,7 @@ static void __ieee80211_sta_join_ibss(struct ieee80211_sub_if_data *sdata,
 	bss_change |= BSS_CHANGED_BSSID;
 	bss_change |= BSS_CHANGED_BEACON;
 	bss_change |= BSS_CHANGED_BEACON_ENABLED;
+	bss_change |= BSS_CHANGED_BASIC_RATES;
 	bss_change |= BSS_CHANGED_IBSS;
 	sdata->vif.bss_conf.ibss_joined = true;
 	ieee80211_bss_info_change_notify(sdata, bss_change);
-- 
cgit v1.2.2


From 5ea096c0c85e80335889539899af9a4717976e0b Mon Sep 17 00:00:00 2001
From: Teemu Paasikivi <ext-teemu.3.paasikivi@nokia.com>
Date: Mon, 14 Jun 2010 12:55:33 +0300
Subject: mac80211: remove BSS from cfg80211 list when leaving IBSS

Remove BSS from cfg80211 BSS list if we are only member in IBSS when
leaving it.

Signed-off-by: Teemu Paasikivi <ext-teemu.3.paasikivi@nokia.com>
Acked-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/ibss.c | 25 +++++++++++++++++++++++++
 1 file changed, 25 insertions(+)

(limited to 'net')

diff --git a/net/mac80211/ibss.c b/net/mac80211/ibss.c
index 93481eb51d24..d4e84b22a66d 100644
--- a/net/mac80211/ibss.c
+++ b/net/mac80211/ibss.c
@@ -917,6 +917,31 @@ int ieee80211_ibss_join(struct ieee80211_sub_if_data *sdata,
 int ieee80211_ibss_leave(struct ieee80211_sub_if_data *sdata)
 {
 	struct sk_buff *skb;
+	struct ieee80211_if_ibss *ifibss = &sdata->u.ibss;
+	struct ieee80211_local *local = sdata->local;
+	struct cfg80211_bss *cbss;
+	u16 capability;
+	int active_ibss = 0;
+
+	active_ibss = ieee80211_sta_active_ibss(sdata);
+
+	if (!active_ibss && !is_zero_ether_addr(ifibss->bssid)) {
+		capability = WLAN_CAPABILITY_IBSS;
+
+		if (ifibss->privacy)
+			capability |= WLAN_CAPABILITY_PRIVACY;
+
+		cbss = cfg80211_get_bss(local->hw.wiphy, ifibss->channel,
+					ifibss->bssid, ifibss->ssid,
+					ifibss->ssid_len, WLAN_CAPABILITY_IBSS |
+					WLAN_CAPABILITY_PRIVACY,
+					capability);
+
+		if (cbss) {
+			cfg80211_unlink_bss(local->hw.wiphy, cbss);
+			cfg80211_put_bss(cbss);
+		}
+	}
 
 	del_timer_sync(&sdata->u.ibss.timer);
 	clear_bit(IEEE80211_IBSS_REQ_RUN, &sdata->u.ibss.request);
-- 
cgit v1.2.2


From 685429623f88d84f98bd5daffc3c427c408740d4 Mon Sep 17 00:00:00 2001
From: Juuso Oikarinen <juuso.oikarinen@nokia.com>
Date: Wed, 9 Jun 2010 13:43:26 +0300
Subject: mac80211: Fix circular locking dependency in ARP filter handling

There is a circular locking dependency when configuring the
hardware ARP filters on association, occurring when flushing the mac80211
workqueue. This is what happens:

[   92.026800] =======================================================
[   92.030507] [ INFO: possible circular locking dependency detected ]
[   92.030507] 2.6.34-04781-g2b2c009 #85
[   92.030507] -------------------------------------------------------
[   92.030507] modprobe/5225 is trying to acquire lock:
[   92.030507]  ((wiphy_name(local->hw.wiphy))){+.+.+.}, at: [<ffffffff8105b5c0>] flush_workq
ueue+0x0/0xb0
[   92.030507]
[   92.030507] but task is already holding lock:
[   92.030507]  (rtnl_mutex){+.+.+.}, at: [<ffffffff812b9ce2>] rtnl_lock+0x12/0x20
[   92.030507]
[   92.030507] which lock already depends on the new lock.
[   92.030507]
[   92.030507]
[   92.030507] the existing dependency chain (in reverse order) is:
[   92.030507]
[   92.030507] -> #2 (rtnl_mutex){+.+.+.}:
[   92.030507]        [<ffffffff810761fb>] lock_acquire+0xdb/0x110
[   92.030507]        [<ffffffff81341754>] mutex_lock_nested+0x44/0x300
[   92.030507]        [<ffffffff812b9ce2>] rtnl_lock+0x12/0x20
[   92.030507]        [<ffffffffa022d47c>] ieee80211_assoc_done+0x6c/0xe0 [mac80211]
[   92.030507]        [<ffffffffa022f2ad>] ieee80211_work_work+0x31d/0x1280 [mac80211]

[   92.030507] -> #1 ((&local->work_work)){+.+.+.}:
[   92.030507]        [<ffffffff810761fb>] lock_acquire+0xdb/0x110
[   92.030507]        [<ffffffff8105a51a>] worker_thread+0x22a/0x370
[   92.030507]        [<ffffffff8105ecc6>] kthread+0x96/0xb0
[   92.030507]        [<ffffffff81003a94>] kernel_thread_helper+0x4/0x10
[   92.030507]
[   92.030507] -> #0 ((wiphy_name(local->hw.wiphy))){+.+.+.}:
[   92.030507]        [<ffffffff81075fdc>] __lock_acquire+0x1c0c/0x1d50
[   92.030507]        [<ffffffff810761fb>] lock_acquire+0xdb/0x110
[   92.030507]        [<ffffffff8105b60e>] flush_workqueue+0x4e/0xb0
[   92.030507]        [<ffffffffa023ff7b>] ieee80211_stop_device+0x2b/0xb0 [mac80211]
[   92.030507]        [<ffffffffa0231635>] ieee80211_stop+0x3e5/0x680 [mac80211]

The locking in this case is quite complex. Fix the problem by rewriting the
way the hardware ARP filter list is handled - i.e. make a copy of the address
list to the bss_conf struct, and provide that list to the hardware driver
when needed.

The current patch will enable filtering also in promiscuous mode. This may need
to be changed in the future.

Reported-by: Reinette Chatre <reinette.chatre@intel.com>
Signed-off-by: Juuso Oikarinen <juuso.oikarinen@nokia.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/driver-ops.h   | 17 --------------
 net/mac80211/driver-trace.h | 22 ------------------
 net/mac80211/ieee80211_i.h  |  2 ++
 net/mac80211/iface.c        |  3 +++
 net/mac80211/main.c         | 54 ++++++++++++++++++++++++++++-----------------
 net/mac80211/mlme.c         | 34 +++++++++++++++-------------
 6 files changed, 58 insertions(+), 74 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/driver-ops.h b/net/mac80211/driver-ops.h
index 965d64f68567..c33317320eee 100644
--- a/net/mac80211/driver-ops.h
+++ b/net/mac80211/driver-ops.h
@@ -89,23 +89,6 @@ static inline void drv_bss_info_changed(struct ieee80211_local *local,
 	trace_drv_return_void(local);
 }
 
-struct in_ifaddr;
-static inline int drv_configure_arp_filter(struct ieee80211_local *local,
-					   struct ieee80211_vif *vif,
-					   struct in_ifaddr *ifa_list)
-{
-	int ret = 0;
-
-	might_sleep();
-
-	trace_drv_configure_arp_filter(local, vif_to_sdata(vif));
-	if (local->ops->configure_arp_filter)
-		ret = local->ops->configure_arp_filter(&local->hw, vif,
-						       ifa_list);
-	trace_drv_return_int(local, ret);
-	return ret;
-}
-
 static inline u64 drv_prepare_multicast(struct ieee80211_local *local,
 					struct netdev_hw_addr_list *mc_list)
 {
diff --git a/net/mac80211/driver-trace.h b/net/mac80211/driver-trace.h
index 06444ea67bc4..8da31caff931 100644
--- a/net/mac80211/driver-trace.h
+++ b/net/mac80211/driver-trace.h
@@ -251,28 +251,6 @@ TRACE_EVENT(drv_bss_info_changed,
 	)
 );
 
-TRACE_EVENT(drv_configure_arp_filter,
-	TP_PROTO(struct ieee80211_local *local,
-		 struct ieee80211_sub_if_data *sdata),
-
-	TP_ARGS(local, sdata),
-
-	TP_STRUCT__entry(
-		LOCAL_ENTRY
-		VIF_ENTRY
-	),
-
-	TP_fast_assign(
-		LOCAL_ASSIGN;
-		VIF_ASSIGN;
-	),
-
-	TP_printk(
-		VIF_PR_FMT LOCAL_PR_FMT,
-		VIF_PR_ARG, LOCAL_PR_ARG
-	)
-);
-
 TRACE_EVENT(drv_prepare_multicast,
 	TP_PROTO(struct ieee80211_local *local, int mc_count),
 
diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index c3c2be3f8a2c..9b3c3f971d28 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -514,6 +514,8 @@ struct ieee80211_sub_if_data {
 	struct work_struct work;
 	struct sk_buff_head skb_queue;
 
+	bool arp_filter_state;
+
 	/*
 	 * AP this belongs to: self in AP mode and
 	 * corresponding AP in VLAN mode, NULL for
diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c
index 490be2f3af27..910729fc18cd 100644
--- a/net/mac80211/iface.c
+++ b/net/mac80211/iface.c
@@ -1076,6 +1076,9 @@ int ieee80211_if_add(struct ieee80211_local *local, const char *name,
 	sdata->wdev.wiphy = local->hw.wiphy;
 	sdata->local = local;
 	sdata->dev = ndev;
+#ifdef CONFIG_INET
+	sdata->arp_filter_state = true;
+#endif
 
 	for (i = 0; i < IEEE80211_FRAGMENT_MAX; i++)
 		skb_queue_head_init(&sdata->fragments[i].skb_list);
diff --git a/net/mac80211/main.c b/net/mac80211/main.c
index c2e46e88f3c9..a1bf46c64b93 100644
--- a/net/mac80211/main.c
+++ b/net/mac80211/main.c
@@ -20,6 +20,7 @@
 #include <linux/rtnetlink.h>
 #include <linux/bitmap.h>
 #include <linux/pm_qos_params.h>
+#include <linux/inetdevice.h>
 #include <net/net_namespace.h>
 #include <net/cfg80211.h>
 
@@ -317,23 +318,6 @@ static void ieee80211_recalc_smps_work(struct work_struct *work)
 }
 
 #ifdef CONFIG_INET
-int ieee80211_set_arp_filter(struct ieee80211_sub_if_data *sdata)
-{
-	struct in_device *idev;
-	int ret = 0;
-
-	BUG_ON(!sdata);
-	ASSERT_RTNL();
-
-	idev = sdata->dev->ip_ptr;
-	if (!idev)
-		return 0;
-
-	ret = drv_configure_arp_filter(sdata->local, &sdata->vif,
-				       idev->ifa_list);
-	return ret;
-}
-
 static int ieee80211_ifa_changed(struct notifier_block *nb,
 				 unsigned long data, void *arg)
 {
@@ -343,8 +327,11 @@ static int ieee80211_ifa_changed(struct notifier_block *nb,
 			     ifa_notifier);
 	struct net_device *ndev = ifa->ifa_dev->dev;
 	struct wireless_dev *wdev = ndev->ieee80211_ptr;
+	struct in_device *idev;
 	struct ieee80211_sub_if_data *sdata;
+	struct ieee80211_bss_conf *bss_conf;
 	struct ieee80211_if_managed *ifmgd;
+	int c = 0;
 
 	if (!netif_running(ndev))
 		return NOTIFY_DONE;
@@ -356,17 +343,44 @@ static int ieee80211_ifa_changed(struct notifier_block *nb,
 	if (wdev->wiphy != local->hw.wiphy)
 		return NOTIFY_DONE;
 
-	/* We are concerned about IP addresses only when associated */
 	sdata = IEEE80211_DEV_TO_SUB_IF(ndev);
+	bss_conf = &sdata->vif.bss_conf;
 
 	/* ARP filtering is only supported in managed mode */
 	if (sdata->vif.type != NL80211_IFTYPE_STATION)
 		return NOTIFY_DONE;
 
+	idev = sdata->dev->ip_ptr;
+	if (!idev)
+		return NOTIFY_DONE;
+
 	ifmgd = &sdata->u.mgd;
 	mutex_lock(&ifmgd->mtx);
-	if (ifmgd->associated)
-		ieee80211_set_arp_filter(sdata);
+
+	/* Copy the addresses to the bss_conf list */
+	ifa = idev->ifa_list;
+	while (c < IEEE80211_BSS_ARP_ADDR_LIST_LEN && ifa) {
+		bss_conf->arp_addr_list[c] = ifa->ifa_address;
+		ifa = ifa->ifa_next;
+		c++;
+	}
+
+	/* If not all addresses fit the list, disable filtering */
+	if (ifa) {
+		sdata->arp_filter_state = false;
+		c = 0;
+	} else {
+		sdata->arp_filter_state = true;
+	}
+	bss_conf->arp_addr_cnt = c;
+
+	/* Configure driver only if associated */
+	if (ifmgd->associated) {
+		bss_conf->arp_filter_enabled = sdata->arp_filter_state;
+		ieee80211_bss_info_change_notify(sdata,
+						 BSS_CHANGED_ARP_FILTER);
+	}
+
 	mutex_unlock(&ifmgd->mtx);
 
 	return NOTIFY_DONE;
diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index 583b34686a26..74479c2d12d4 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -806,11 +806,12 @@ static void ieee80211_set_associated(struct ieee80211_sub_if_data *sdata,
 {
 	struct ieee80211_bss *bss = (void *)cbss->priv;
 	struct ieee80211_local *local = sdata->local;
+	struct ieee80211_bss_conf *bss_conf = &sdata->vif.bss_conf;
 
 	bss_info_changed |= BSS_CHANGED_ASSOC;
 	/* set timing information */
-	sdata->vif.bss_conf.beacon_int = cbss->beacon_interval;
-	sdata->vif.bss_conf.timestamp = cbss->tsf;
+	bss_conf->beacon_int = cbss->beacon_interval;
+	bss_conf->timestamp = cbss->tsf;
 
 	bss_info_changed |= BSS_CHANGED_BEACON_INT;
 	bss_info_changed |= ieee80211_handle_bss_capability(sdata,
@@ -835,7 +836,7 @@ static void ieee80211_set_associated(struct ieee80211_sub_if_data *sdata,
 
 	ieee80211_led_assoc(local, 1);
 
-	sdata->vif.bss_conf.assoc = 1;
+	bss_conf->assoc = 1;
 	/*
 	 * For now just always ask the driver to update the basic rateset
 	 * when we have associated, we aren't checking whether it actually
@@ -848,9 +849,15 @@ static void ieee80211_set_associated(struct ieee80211_sub_if_data *sdata,
 
 	/* Tell the driver to monitor connection quality (if supported) */
 	if ((local->hw.flags & IEEE80211_HW_SUPPORTS_CQM_RSSI) &&
-	    sdata->vif.bss_conf.cqm_rssi_thold)
+	    bss_conf->cqm_rssi_thold)
 		bss_info_changed |= BSS_CHANGED_CQM;
 
+	/* Enable ARP filtering */
+	if (bss_conf->arp_filter_enabled != sdata->arp_filter_state) {
+		bss_conf->arp_filter_enabled = sdata->arp_filter_state;
+		bss_info_changed |= BSS_CHANGED_ARP_FILTER;
+	}
+
 	ieee80211_bss_info_change_notify(sdata, bss_info_changed);
 
 	mutex_lock(&local->iflist_mtx);
@@ -932,6 +939,12 @@ static void ieee80211_set_disassoc(struct ieee80211_sub_if_data *sdata,
 
 	ieee80211_hw_config(local, config_changed);
 
+	/* Disable ARP filtering */
+	if (sdata->vif.bss_conf.arp_filter_enabled) {
+		sdata->vif.bss_conf.arp_filter_enabled = false;
+		changed |= BSS_CHANGED_ARP_FILTER;
+	}
+
 	/* The BSSID (not really interesting) and HT changed */
 	changed |= BSS_CHANGED_BSSID | BSS_CHANGED_HT;
 	ieee80211_bss_info_change_notify(sdata, changed);
@@ -2018,18 +2031,9 @@ static enum work_done_result ieee80211_assoc_done(struct ieee80211_work *wk,
 			cfg80211_send_assoc_timeout(wk->sdata->dev,
 						    wk->filter_ta);
 			return WORK_DONE_DESTROY;
-		} else {
-			mutex_unlock(&wk->sdata->u.mgd.mtx);
-#ifdef CONFIG_INET
-			/*
-			 * configure ARP filter IP addresses to the driver,
-			 * intentionally outside the mgd mutex.
-			 */
-			rtnl_lock();
-			ieee80211_set_arp_filter(wk->sdata);
-			rtnl_unlock();
-#endif
 		}
+
+		mutex_unlock(&wk->sdata->u.mgd.mtx);
 	}
 
 	cfg80211_send_rx_assoc(wk->sdata->dev, skb->data, skb->len);
-- 
cgit v1.2.2


From f6bc7d9e4760324258ad5f5d147e79db8442842e Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Mon, 14 Jun 2010 04:39:27 +0000
Subject: ipv6: avoid two atomics in ipv6_rthdr_rcv()

Use __in6_dev_get() instead of in6_dev_get()/in6_dev_put()

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/exthdrs.c | 10 ++++------
 1 file changed, 4 insertions(+), 6 deletions(-)

(limited to 'net')

diff --git a/net/ipv6/exthdrs.c b/net/ipv6/exthdrs.c
index 853a633a94d4..262f105d23b9 100644
--- a/net/ipv6/exthdrs.c
+++ b/net/ipv6/exthdrs.c
@@ -312,6 +312,7 @@ static int ipv6_destopt_rcv(struct sk_buff *skb)
   Routing header.
  ********************************/
 
+/* called with rcu_read_lock() */
 static int ipv6_rthdr_rcv(struct sk_buff *skb)
 {
 	struct inet6_skb_parm *opt = IP6CB(skb);
@@ -324,12 +325,9 @@ static int ipv6_rthdr_rcv(struct sk_buff *skb)
 	struct net *net = dev_net(skb->dev);
 	int accept_source_route = net->ipv6.devconf_all->accept_source_route;
 
-	idev = in6_dev_get(skb->dev);
-	if (idev) {
-		if (accept_source_route > idev->cnf.accept_source_route)
-			accept_source_route = idev->cnf.accept_source_route;
-		in6_dev_put(idev);
-	}
+	idev = __in6_dev_get(skb->dev);
+	if (idev && accept_source_route > idev->cnf.accept_source_route)
+		accept_source_route = idev->cnf.accept_source_route;
 
 	if (!pskb_may_pull(skb, skb_transport_offset(skb) + 8) ||
 	    !pskb_may_pull(skb, (skb_transport_offset(skb) +
-- 
cgit v1.2.2


From c68f24cc354050415c5ea543cd19ea5424463a2f Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Mon, 14 Jun 2010 04:46:20 +0000
Subject: ipv6: RCU changes in ipv6_get_mtu() and ip6_dst_hoplimit()

Use RCU to avoid atomic ops on idev refcnt in ipv6_get_mtu()
and ip6_dst_hoplimit()

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Acked-by: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/route.c | 19 +++++++++++--------
 1 file changed, 11 insertions(+), 8 deletions(-)

(limited to 'net')

diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index f7702850d45c..8f2d0400cf8a 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -1084,11 +1084,11 @@ static int ipv6_get_mtu(struct net_device *dev)
 	int mtu = IPV6_MIN_MTU;
 	struct inet6_dev *idev;
 
-	idev = in6_dev_get(dev);
-	if (idev) {
+	rcu_read_lock();
+	idev = __in6_dev_get(dev);
+	if (idev)
 		mtu = idev->cnf.mtu6;
-		in6_dev_put(idev);
-	}
+	rcu_read_unlock();
 	return mtu;
 }
 
@@ -1097,12 +1097,15 @@ int ip6_dst_hoplimit(struct dst_entry *dst)
 	int hoplimit = dst_metric(dst, RTAX_HOPLIMIT);
 	if (hoplimit < 0) {
 		struct net_device *dev = dst->dev;
-		struct inet6_dev *idev = in6_dev_get(dev);
-		if (idev) {
+		struct inet6_dev *idev;
+
+		rcu_read_lock();
+		idev = __in6_dev_get(dev);
+		if (idev)
 			hoplimit = idev->cnf.hop_limit;
-			in6_dev_put(idev);
-		} else
+		else
 			hoplimit = dev_net(dev)->ipv6.devconf_all->hop_limit;
+		rcu_read_unlock();
 	}
 	return hoplimit;
 }
-- 
cgit v1.2.2


From d6cc1d642de9284cb26488ea390d915b50ee2504 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Mon, 14 Jun 2010 19:35:21 +0000
Subject: inetpeer: various changes

Try to reduce cache line contentions in peer management, to reduce IP
defragmentation overhead.

- peer_fake_node is marked 'const' to make sure its not modified.
  (tested with CONFIG_DEBUG_RODATA=y)

- Group variables in two structures to reduce number of dirtied cache
lines. One named "peers" for avl tree root, its number of entries, and
associated lock. (candidate for RCU conversion)

- A second one named "unused_peers" for unused list and its lock

- Add a !list_empty() test in unlink_from_unused() to avoid taking lock
when entry is not unused.

- Use atomic_dec_and_lock() in inet_putpeer() to avoid taking lock in
some cases.

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/inetpeer.c | 94 +++++++++++++++++++++++++++++++----------------------
 1 file changed, 56 insertions(+), 38 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/inetpeer.c b/net/ipv4/inetpeer.c
index 6bcfe52a9c87..035673fd42d4 100644
--- a/net/ipv4/inetpeer.c
+++ b/net/ipv4/inetpeer.c
@@ -70,17 +70,25 @@
 static struct kmem_cache *peer_cachep __read_mostly;
 
 #define node_height(x) x->avl_height
-static struct inet_peer peer_fake_node = {
-	.avl_left	= &peer_fake_node,
-	.avl_right	= &peer_fake_node,
+
+#define peer_avl_empty ((struct inet_peer *)&peer_fake_node)
+static const struct inet_peer peer_fake_node = {
+	.avl_left	= peer_avl_empty,
+	.avl_right	= peer_avl_empty,
 	.avl_height	= 0
 };
-#define peer_avl_empty (&peer_fake_node)
-static struct inet_peer *peer_root = peer_avl_empty;
-static DEFINE_RWLOCK(peer_pool_lock);
+
+static struct {
+	struct inet_peer *root;
+	rwlock_t	lock;
+	int		total;
+} peers = {
+	.root		= peer_avl_empty,
+	.lock		= __RW_LOCK_UNLOCKED(peers.lock),
+	.total		= 0,
+};
 #define PEER_MAXDEPTH 40 /* sufficient for about 2^27 nodes */
 
-static int peer_total;
 /* Exported for sysctl_net_ipv4.  */
 int inet_peer_threshold __read_mostly = 65536 + 128;	/* start to throw entries more
 					 * aggressively at this stage */
@@ -89,8 +97,13 @@ int inet_peer_maxttl __read_mostly = 10 * 60 * HZ;	/* usual time to live: 10 min
 int inet_peer_gc_mintime __read_mostly = 10 * HZ;
 int inet_peer_gc_maxtime __read_mostly = 120 * HZ;
 
-static LIST_HEAD(unused_peers);
-static DEFINE_SPINLOCK(inet_peer_unused_lock);
+static struct {
+	struct list_head	list;
+	spinlock_t		lock;
+} unused_peers = {
+	.list			= LIST_HEAD_INIT(unused_peers.list),
+	.lock			= __SPIN_LOCK_UNLOCKED(unused_peers.lock),
+};
 
 static void peer_check_expire(unsigned long dummy);
 static DEFINE_TIMER(peer_periodic_timer, peer_check_expire, 0, 0);
@@ -131,9 +144,11 @@ void __init inet_initpeers(void)
 /* Called with or without local BH being disabled. */
 static void unlink_from_unused(struct inet_peer *p)
 {
-	spin_lock_bh(&inet_peer_unused_lock);
-	list_del_init(&p->unused);
-	spin_unlock_bh(&inet_peer_unused_lock);
+	if (!list_empty(&p->unused)) {
+		spin_lock_bh(&unused_peers.lock);
+		list_del_init(&p->unused);
+		spin_unlock_bh(&unused_peers.lock);
+	}
 }
 
 /*
@@ -146,9 +161,9 @@ static void unlink_from_unused(struct inet_peer *p)
 	struct inet_peer *u, **v;				\
 	if (_stack != NULL) {					\
 		stackptr = _stack;				\
-		*stackptr++ = &peer_root;			\
+		*stackptr++ = &peers.root;			\
 	}							\
-	for (u = peer_root; u != peer_avl_empty; ) {		\
+	for (u = peers.root; u != peer_avl_empty; ) {		\
 		if (_daddr == u->v4daddr)			\
 			break;					\
 		if ((__force __u32)_daddr < (__force __u32)u->v4daddr)	\
@@ -262,7 +277,7 @@ do {								\
 	n->avl_right = peer_avl_empty;				\
 	**--stackptr = n;					\
 	peer_avl_rebalance(stack, stackptr);			\
-} while(0)
+} while (0)
 
 /* May be called with local BH enabled. */
 static void unlink_from_pool(struct inet_peer *p)
@@ -271,7 +286,7 @@ static void unlink_from_pool(struct inet_peer *p)
 
 	do_free = 0;
 
-	write_lock_bh(&peer_pool_lock);
+	write_lock_bh(&peers.lock);
 	/* Check the reference counter.  It was artificially incremented by 1
 	 * in cleanup() function to prevent sudden disappearing.  If the
 	 * reference count is still 1 then the node is referenced only as `p'
@@ -303,10 +318,10 @@ static void unlink_from_pool(struct inet_peer *p)
 			delp[1] = &t->avl_left; /* was &p->avl_left */
 		}
 		peer_avl_rebalance(stack, stackptr);
-		peer_total--;
+		peers.total--;
 		do_free = 1;
 	}
-	write_unlock_bh(&peer_pool_lock);
+	write_unlock_bh(&peers.lock);
 
 	if (do_free)
 		kmem_cache_free(peer_cachep, p);
@@ -326,16 +341,16 @@ static int cleanup_once(unsigned long ttl)
 	struct inet_peer *p = NULL;
 
 	/* Remove the first entry from the list of unused nodes. */
-	spin_lock_bh(&inet_peer_unused_lock);
-	if (!list_empty(&unused_peers)) {
+	spin_lock_bh(&unused_peers.lock);
+	if (!list_empty(&unused_peers.list)) {
 		__u32 delta;
 
-		p = list_first_entry(&unused_peers, struct inet_peer, unused);
+		p = list_first_entry(&unused_peers.list, struct inet_peer, unused);
 		delta = (__u32)jiffies - p->dtime;
 
 		if (delta < ttl) {
 			/* Do not prune fresh entries. */
-			spin_unlock_bh(&inet_peer_unused_lock);
+			spin_unlock_bh(&unused_peers.lock);
 			return -1;
 		}
 
@@ -345,7 +360,7 @@ static int cleanup_once(unsigned long ttl)
 		 * before unlink_from_pool() call. */
 		atomic_inc(&p->refcnt);
 	}
-	spin_unlock_bh(&inet_peer_unused_lock);
+	spin_unlock_bh(&unused_peers.lock);
 
 	if (p == NULL)
 		/* It means that the total number of USED entries has
@@ -364,11 +379,11 @@ struct inet_peer *inet_getpeer(__be32 daddr, int create)
 	struct inet_peer **stack[PEER_MAXDEPTH], ***stackptr;
 
 	/* Look up for the address quickly. */
-	read_lock_bh(&peer_pool_lock);
+	read_lock_bh(&peers.lock);
 	p = lookup(daddr, NULL);
 	if (p != peer_avl_empty)
 		atomic_inc(&p->refcnt);
-	read_unlock_bh(&peer_pool_lock);
+	read_unlock_bh(&peers.lock);
 
 	if (p != peer_avl_empty) {
 		/* The existing node has been found. */
@@ -390,7 +405,7 @@ struct inet_peer *inet_getpeer(__be32 daddr, int create)
 	atomic_set(&n->ip_id_count, secure_ip_id(daddr));
 	n->tcp_ts_stamp = 0;
 
-	write_lock_bh(&peer_pool_lock);
+	write_lock_bh(&peers.lock);
 	/* Check if an entry has suddenly appeared. */
 	p = lookup(daddr, stack);
 	if (p != peer_avl_empty)
@@ -399,10 +414,10 @@ struct inet_peer *inet_getpeer(__be32 daddr, int create)
 	/* Link the node. */
 	link_to_pool(n);
 	INIT_LIST_HEAD(&n->unused);
-	peer_total++;
-	write_unlock_bh(&peer_pool_lock);
+	peers.total++;
+	write_unlock_bh(&peers.lock);
 
-	if (peer_total >= inet_peer_threshold)
+	if (peers.total >= inet_peer_threshold)
 		/* Remove one less-recently-used entry. */
 		cleanup_once(0);
 
@@ -411,7 +426,7 @@ struct inet_peer *inet_getpeer(__be32 daddr, int create)
 out_free:
 	/* The appropriate node is already in the pool. */
 	atomic_inc(&p->refcnt);
-	write_unlock_bh(&peer_pool_lock);
+	write_unlock_bh(&peers.lock);
 	/* Remove the entry from unused list if it was there. */
 	unlink_from_unused(p);
 	/* Free preallocated the preallocated node. */
@@ -425,12 +440,12 @@ static void peer_check_expire(unsigned long dummy)
 	unsigned long now = jiffies;
 	int ttl;
 
-	if (peer_total >= inet_peer_threshold)
+	if (peers.total >= inet_peer_threshold)
 		ttl = inet_peer_minttl;
 	else
 		ttl = inet_peer_maxttl
 				- (inet_peer_maxttl - inet_peer_minttl) / HZ *
-					peer_total / inet_peer_threshold * HZ;
+					peers.total / inet_peer_threshold * HZ;
 	while (!cleanup_once(ttl)) {
 		if (jiffies != now)
 			break;
@@ -439,22 +454,25 @@ static void peer_check_expire(unsigned long dummy)
 	/* Trigger the timer after inet_peer_gc_mintime .. inet_peer_gc_maxtime
 	 * interval depending on the total number of entries (more entries,
 	 * less interval). */
-	if (peer_total >= inet_peer_threshold)
+	if (peers.total >= inet_peer_threshold)
 		peer_periodic_timer.expires = jiffies + inet_peer_gc_mintime;
 	else
 		peer_periodic_timer.expires = jiffies
 			+ inet_peer_gc_maxtime
 			- (inet_peer_gc_maxtime - inet_peer_gc_mintime) / HZ *
-				peer_total / inet_peer_threshold * HZ;
+				peers.total / inet_peer_threshold * HZ;
 	add_timer(&peer_periodic_timer);
 }
 
 void inet_putpeer(struct inet_peer *p)
 {
-	spin_lock_bh(&inet_peer_unused_lock);
-	if (atomic_dec_and_test(&p->refcnt)) {
-		list_add_tail(&p->unused, &unused_peers);
+	local_bh_disable();
+
+	if (atomic_dec_and_lock(&p->refcnt, &unused_peers.lock)) {
+		list_add_tail(&p->unused, &unused_peers.list);
 		p->dtime = (__u32)jiffies;
+		spin_unlock(&unused_peers.lock);
 	}
-	spin_unlock_bh(&inet_peer_unused_lock);
+
+	local_bh_enable();
 }
-- 
cgit v1.2.2


From d73f33b168831e53972fbf7c85db87950a41436c Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Tue, 15 Jun 2010 13:08:51 +0200
Subject: netfilter: CLUSTERIP: RCU conversion

- clusterip_lock becomes a spinlock
- lockless lookups
- kfree() deferred after RCU grace period
- rcu_barrier_bh() inserted in clusterip_tg_exit()

v2)
- As Patrick pointed out, we use atomic_inc_not_zero() in
clusterip_config_find_get().
- list_add_rcu() and list_del_rcu() variants are used.
- atomic_dec_and_lock() used in clusterip_config_entry_put()

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 net/ipv4/netfilter/ipt_CLUSTERIP.c | 48 +++++++++++++++++++++++---------------
 1 file changed, 29 insertions(+), 19 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/netfilter/ipt_CLUSTERIP.c b/net/ipv4/netfilter/ipt_CLUSTERIP.c
index f91c94b9a790..64d0875f5192 100644
--- a/net/ipv4/netfilter/ipt_CLUSTERIP.c
+++ b/net/ipv4/netfilter/ipt_CLUSTERIP.c
@@ -53,12 +53,13 @@ struct clusterip_config {
 #endif
 	enum clusterip_hashmode hash_mode;	/* which hashing mode */
 	u_int32_t hash_initval;			/* hash initialization */
+	struct rcu_head rcu;
 };
 
 static LIST_HEAD(clusterip_configs);
 
 /* clusterip_lock protects the clusterip_configs list */
-static DEFINE_RWLOCK(clusterip_lock);
+static DEFINE_SPINLOCK(clusterip_lock);
 
 #ifdef CONFIG_PROC_FS
 static const struct file_operations clusterip_proc_fops;
@@ -71,11 +72,17 @@ clusterip_config_get(struct clusterip_config *c)
 	atomic_inc(&c->refcount);
 }
 
+
+static void clusterip_config_rcu_free(struct rcu_head *head)
+{
+	kfree(container_of(head, struct clusterip_config, rcu));
+}
+
 static inline void
 clusterip_config_put(struct clusterip_config *c)
 {
 	if (atomic_dec_and_test(&c->refcount))
-		kfree(c);
+		call_rcu_bh(&c->rcu, clusterip_config_rcu_free);
 }
 
 /* decrease the count of entries using/referencing this config.  If last
@@ -84,10 +91,11 @@ clusterip_config_put(struct clusterip_config *c)
 static inline void
 clusterip_config_entry_put(struct clusterip_config *c)
 {
-	write_lock_bh(&clusterip_lock);
-	if (atomic_dec_and_test(&c->entries)) {
-		list_del(&c->list);
-		write_unlock_bh(&clusterip_lock);
+	local_bh_disable();
+	if (atomic_dec_and_lock(&c->entries, &clusterip_lock)) {
+		list_del_rcu(&c->list);
+		spin_unlock(&clusterip_lock);
+		local_bh_enable();
 
 		dev_mc_del(c->dev, c->clustermac);
 		dev_put(c->dev);
@@ -100,7 +108,7 @@ clusterip_config_entry_put(struct clusterip_config *c)
 #endif
 		return;
 	}
-	write_unlock_bh(&clusterip_lock);
+	local_bh_enable();
 }
 
 static struct clusterip_config *
@@ -108,7 +116,7 @@ __clusterip_config_find(__be32 clusterip)
 {
 	struct clusterip_config *c;
 
-	list_for_each_entry(c, &clusterip_configs, list) {
+	list_for_each_entry_rcu(c, &clusterip_configs, list) {
 		if (c->clusterip == clusterip)
 			return c;
 	}
@@ -121,16 +129,15 @@ clusterip_config_find_get(__be32 clusterip, int entry)
 {
 	struct clusterip_config *c;
 
-	read_lock_bh(&clusterip_lock);
+	rcu_read_lock_bh();
 	c = __clusterip_config_find(clusterip);
-	if (!c) {
-		read_unlock_bh(&clusterip_lock);
-		return NULL;
+	if (c) {
+		if (unlikely(!atomic_inc_not_zero(&c->refcount)))
+			c = NULL;
+		else if (entry)
+			atomic_inc(&c->entries);
 	}
-	atomic_inc(&c->refcount);
-	if (entry)
-		atomic_inc(&c->entries);
-	read_unlock_bh(&clusterip_lock);
+	rcu_read_unlock_bh();
 
 	return c;
 }
@@ -181,9 +188,9 @@ clusterip_config_init(const struct ipt_clusterip_tgt_info *i, __be32 ip,
 	}
 #endif
 
-	write_lock_bh(&clusterip_lock);
-	list_add(&c->list, &clusterip_configs);
-	write_unlock_bh(&clusterip_lock);
+	spin_lock_bh(&clusterip_lock);
+	list_add_rcu(&c->list, &clusterip_configs);
+	spin_unlock_bh(&clusterip_lock);
 
 	return c;
 }
@@ -733,6 +740,9 @@ static void __exit clusterip_tg_exit(void)
 #endif
 	nf_unregister_hook(&cip_arp_ops);
 	xt_unregister_target(&clusterip_tg_reg);
+
+	/* Wait for completion of call_rcu_bh()'s (clusterip_config_rcu_free) */
+	rcu_barrier_bh();
 }
 
 module_init(clusterip_tg_init);
-- 
cgit v1.2.2


From 0902b469bd25065aa0688c3cee6f11744c817e7c Mon Sep 17 00:00:00 2001
From: Luciano Coelho <luciano.coelho@nokia.com>
Date: Tue, 15 Jun 2010 15:04:00 +0200
Subject: netfilter: xtables: idletimer target implementation

This patch implements an idletimer Xtables target that can be used to
identify when interfaces have been idle for a certain period of time.

Timers are identified by labels and are created when a rule is set with a new
label.  The rules also take a timeout value (in seconds) as an option.  If
more than one rule uses the same timer label, the timer will be restarted
whenever any of the rules get a hit.

One entry for each timer is created in sysfs.  This attribute contains the
timer remaining for the timer to expire.  The attributes are located under
the xt_idletimer class:

/sys/class/xt_idletimer/timers/<label>

When the timer expires, the target module sends a sysfs notification to the
userspace, which can then decide what to do (eg. disconnect to save power).

Cc: Timo Teras <timo.teras@iki.fi>
Signed-off-by: Luciano Coelho <luciano.coelho@nokia.com>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 net/netfilter/Kconfig        |  12 ++
 net/netfilter/Makefile       |   1 +
 net/netfilter/xt_IDLETIMER.c | 314 +++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 327 insertions(+)
 create mode 100644 net/netfilter/xt_IDLETIMER.c

(limited to 'net')

diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig
index 8593a77cfea9..413ed24a968a 100644
--- a/net/netfilter/Kconfig
+++ b/net/netfilter/Kconfig
@@ -424,6 +424,18 @@ config NETFILTER_XT_TARGET_HL
 	since you can easily create immortal packets that loop
 	forever on the network.
 
+config NETFILTER_XT_TARGET_IDLETIMER
+	tristate  "IDLETIMER target support"
+	depends on NETFILTER_ADVANCED
+	help
+
+	  This option adds the `IDLETIMER' target.  Each matching packet
+	  resets the timer associated with label specified when the rule is
+	  added.  When the timer expires, it triggers a sysfs notification.
+	  The remaining time for expiration can be read via sysfs.
+
+	  To compile it as a module, choose M here.  If unsure, say N.
+
 config NETFILTER_XT_TARGET_LED
 	tristate '"LED" target support'
 	depends on LEDS_CLASS && LEDS_TRIGGERS
diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile
index 14e3a8fd8180..e28420aac5ef 100644
--- a/net/netfilter/Makefile
+++ b/net/netfilter/Makefile
@@ -61,6 +61,7 @@ obj-$(CONFIG_NETFILTER_XT_TARGET_TCPMSS) += xt_TCPMSS.o
 obj-$(CONFIG_NETFILTER_XT_TARGET_TCPOPTSTRIP) += xt_TCPOPTSTRIP.o
 obj-$(CONFIG_NETFILTER_XT_TARGET_TEE) += xt_TEE.o
 obj-$(CONFIG_NETFILTER_XT_TARGET_TRACE) += xt_TRACE.o
+obj-$(CONFIG_NETFILTER_XT_TARGET_IDLETIMER) += xt_IDLETIMER.o
 
 # matches
 obj-$(CONFIG_NETFILTER_XT_MATCH_CLUSTER) += xt_cluster.o
diff --git a/net/netfilter/xt_IDLETIMER.c b/net/netfilter/xt_IDLETIMER.c
new file mode 100644
index 000000000000..e11090a0675c
--- /dev/null
+++ b/net/netfilter/xt_IDLETIMER.c
@@ -0,0 +1,314 @@
+/*
+ * linux/net/netfilter/xt_IDLETIMER.c
+ *
+ * Netfilter module to trigger a timer when packet matches.
+ * After timer expires a kevent will be sent.
+ *
+ * Copyright (C) 2004, 2010 Nokia Corporation
+ * Written by Timo Teras <ext-timo.teras@nokia.com>
+ *
+ * Converted to x_tables and reworked for upstream inclusion
+ * by Luciano Coelho <luciano.coelho@nokia.com>
+ *
+ * Contact: Luciano Coelho <luciano.coelho@nokia.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
+ * 02110-1301 USA
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/module.h>
+#include <linux/timer.h>
+#include <linux/list.h>
+#include <linux/mutex.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter/x_tables.h>
+#include <linux/netfilter/xt_IDLETIMER.h>
+#include <linux/kobject.h>
+#include <linux/workqueue.h>
+#include <linux/sysfs.h>
+
+struct idletimer_tg_attr {
+	struct attribute attr;
+	ssize_t	(*show)(struct kobject *kobj,
+			struct attribute *attr, char *buf);
+};
+
+struct idletimer_tg {
+	struct list_head entry;
+	struct timer_list timer;
+	struct work_struct work;
+
+	struct kobject *kobj;
+	struct idletimer_tg_attr attr;
+
+	unsigned int refcnt;
+};
+
+static LIST_HEAD(idletimer_tg_list);
+static DEFINE_MUTEX(list_mutex);
+
+static struct kobject *idletimer_tg_kobj;
+
+static
+struct idletimer_tg *__idletimer_tg_find_by_label(const char *label)
+{
+	struct idletimer_tg *entry;
+
+	BUG_ON(!label);
+
+	list_for_each_entry(entry, &idletimer_tg_list, entry) {
+		if (!strcmp(label, entry->attr.attr.name))
+			return entry;
+	}
+
+	return NULL;
+}
+
+static ssize_t idletimer_tg_show(struct kobject *kobj, struct attribute *attr,
+				 char *buf)
+{
+	struct idletimer_tg *timer;
+	unsigned long expires = 0;
+
+	mutex_lock(&list_mutex);
+
+	timer =	__idletimer_tg_find_by_label(attr->name);
+	if (timer)
+		expires = timer->timer.expires;
+
+	mutex_unlock(&list_mutex);
+
+	if (time_after(expires, jiffies))
+		return sprintf(buf, "%u\n",
+			       jiffies_to_msecs(expires - jiffies) / 1000);
+
+	return sprintf(buf, "0\n");
+}
+
+static void idletimer_tg_work(struct work_struct *work)
+{
+	struct idletimer_tg *timer = container_of(work, struct idletimer_tg,
+						  work);
+
+	sysfs_notify(idletimer_tg_kobj, NULL, timer->attr.attr.name);
+}
+
+static void idletimer_tg_expired(unsigned long data)
+{
+	struct idletimer_tg *timer = (struct idletimer_tg *) data;
+
+	pr_debug("timer %s expired\n", timer->attr.attr.name);
+
+	schedule_work(&timer->work);
+}
+
+static int idletimer_tg_create(struct idletimer_tg_info *info)
+{
+	int ret;
+
+	info->timer = kmalloc(sizeof(*info->timer), GFP_KERNEL);
+	if (!info->timer) {
+		pr_debug("couldn't alloc timer\n");
+		ret = -ENOMEM;
+		goto out;
+	}
+
+	info->timer->attr.attr.name = kstrdup(info->label, GFP_KERNEL);
+	if (!info->timer->attr.attr.name) {
+		pr_debug("couldn't alloc attribute name\n");
+		ret = -ENOMEM;
+		goto out_free_timer;
+	}
+	info->timer->attr.attr.mode = S_IRUGO;
+	info->timer->attr.show = idletimer_tg_show;
+
+	ret = sysfs_create_file(idletimer_tg_kobj, &info->timer->attr.attr);
+	if (ret < 0) {
+		pr_debug("couldn't add file to sysfs");
+		goto out_free_attr;
+	}
+
+	list_add(&info->timer->entry, &idletimer_tg_list);
+
+	setup_timer(&info->timer->timer, idletimer_tg_expired,
+		    (unsigned long) info->timer);
+	info->timer->refcnt = 1;
+
+	mod_timer(&info->timer->timer,
+		  msecs_to_jiffies(info->timeout * 1000) + jiffies);
+
+	INIT_WORK(&info->timer->work, idletimer_tg_work);
+
+	return 0;
+
+out_free_attr:
+	kfree(info->timer->attr.attr.name);
+out_free_timer:
+	kfree(info->timer);
+out:
+	return ret;
+}
+
+/*
+ * The actual xt_tables plugin.
+ */
+static unsigned int idletimer_tg_target(struct sk_buff *skb,
+					 const struct xt_action_param *par)
+{
+	const struct idletimer_tg_info *info = par->targinfo;
+
+	pr_debug("resetting timer %s, timeout period %u\n",
+		 info->label, info->timeout);
+
+	BUG_ON(!info->timer);
+
+	mod_timer(&info->timer->timer,
+		  msecs_to_jiffies(info->timeout * 1000) + jiffies);
+
+	return XT_CONTINUE;
+}
+
+static int idletimer_tg_checkentry(const struct xt_tgchk_param *par)
+{
+	struct idletimer_tg_info *info = par->targinfo;
+	int ret;
+
+	pr_debug("checkentry targinfo%s\n", info->label);
+
+	if (info->timeout == 0) {
+		pr_debug("timeout value is zero\n");
+		return -EINVAL;
+	}
+
+	if (info->label[0] == '\0' ||
+	    strnlen(info->label,
+		    MAX_IDLETIMER_LABEL_SIZE) == MAX_IDLETIMER_LABEL_SIZE) {
+		pr_debug("label is empty or not nul-terminated\n");
+		return -EINVAL;
+	}
+
+	mutex_lock(&list_mutex);
+
+	info->timer = __idletimer_tg_find_by_label(info->label);
+	if (info->timer) {
+		info->timer->refcnt++;
+		mod_timer(&info->timer->timer,
+			  msecs_to_jiffies(info->timeout * 1000) + jiffies);
+
+		pr_debug("increased refcnt of timer %s to %u\n",
+			 info->label, info->timer->refcnt);
+	} else {
+		ret = idletimer_tg_create(info);
+		if (ret < 0) {
+			pr_debug("failed to create timer\n");
+			mutex_unlock(&list_mutex);
+			return ret;
+		}
+	}
+
+	mutex_unlock(&list_mutex);
+	return 0;
+}
+
+static void idletimer_tg_destroy(const struct xt_tgdtor_param *par)
+{
+	const struct idletimer_tg_info *info = par->targinfo;
+
+	pr_debug("destroy targinfo %s\n", info->label);
+
+	mutex_lock(&list_mutex);
+
+	if (--info->timer->refcnt == 0) {
+		pr_debug("deleting timer %s\n", info->label);
+
+		list_del(&info->timer->entry);
+		del_timer_sync(&info->timer->timer);
+		sysfs_remove_file(idletimer_tg_kobj, &info->timer->attr.attr);
+		kfree(info->timer->attr.attr.name);
+		kfree(info->timer);
+	} else {
+		pr_debug("decreased refcnt of timer %s to %u\n",
+			 info->label, info->timer->refcnt);
+	}
+
+	mutex_unlock(&list_mutex);
+}
+
+static struct xt_target idletimer_tg __read_mostly = {
+	.name		= "IDLETIMER",
+	.family		= NFPROTO_UNSPEC,
+	.target		= idletimer_tg_target,
+	.targetsize     = sizeof(struct idletimer_tg_info),
+	.checkentry	= idletimer_tg_checkentry,
+	.destroy        = idletimer_tg_destroy,
+	.me		= THIS_MODULE,
+};
+
+static struct class *idletimer_tg_class;
+
+static struct device *idletimer_tg_device;
+
+static int __init idletimer_tg_init(void)
+{
+	int err;
+
+	idletimer_tg_class = class_create(THIS_MODULE, "xt_idletimer");
+	err = PTR_ERR(idletimer_tg_class);
+	if (IS_ERR(idletimer_tg_class)) {
+		pr_debug("couldn't register device class\n");
+		goto out;
+	}
+
+	idletimer_tg_device = device_create(idletimer_tg_class, NULL,
+					    MKDEV(0, 0), NULL, "timers");
+	err = PTR_ERR(idletimer_tg_device);
+	if (IS_ERR(idletimer_tg_device)) {
+		pr_debug("couldn't register system device\n");
+		goto out_class;
+	}
+
+	idletimer_tg_kobj = &idletimer_tg_device->kobj;
+
+	err =  xt_register_target(&idletimer_tg);
+	if (err < 0) {
+		pr_debug("couldn't register xt target\n");
+		goto out_dev;
+	}
+
+	return 0;
+out_dev:
+	device_destroy(idletimer_tg_class, MKDEV(0, 0));
+out_class:
+	class_destroy(idletimer_tg_class);
+out:
+	return err;
+}
+
+static void __exit idletimer_tg_exit(void)
+{
+	xt_unregister_target(&idletimer_tg);
+
+	device_destroy(idletimer_tg_class, MKDEV(0, 0));
+	class_destroy(idletimer_tg_class);
+}
+
+module_init(idletimer_tg_init);
+module_exit(idletimer_tg_exit);
+
+MODULE_AUTHOR("Timo Teras <ext-timo.teras@nokia.com>");
+MODULE_AUTHOR("Luciano Coelho <luciano.coelho@nokia.com>");
+MODULE_DESCRIPTION("Xtables: idle time monitor");
+MODULE_LICENSE("GPL v2");
-- 
cgit v1.2.2


From c04ec8063d3a89e5ae933c7b4be04476fe65c6de Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Thu, 10 Jun 2010 16:12:42 +0000
Subject: netpoll: Set npinfo to NULL even with ndo_netpoll_cleanup

Since we have to NULL npinfo regardless of whether there is a
ndo_netpoll_cleanup, it makes sense to do this unconditionally
in netpoll_cleanup rather than having every driver do it by
themselves.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/netpoll.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/core/netpoll.c b/net/core/netpoll.c
index e034342c819c..19ff66079f76 100644
--- a/net/core/netpoll.c
+++ b/net/core/netpoll.c
@@ -867,8 +867,7 @@ void netpoll_cleanup(struct netpoll *np)
 				ops = np->dev->netdev_ops;
 				if (ops->ndo_netpoll_cleanup)
 					ops->ndo_netpoll_cleanup(np->dev);
-				else
-					np->dev->npinfo = NULL;
+				np->dev->npinfo = NULL;
 			}
 		}
 
-- 
cgit v1.2.2


From 36655042f9873efc2a90d251b9aef9b6b79d75d8 Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Thu, 10 Jun 2010 16:12:43 +0000
Subject: bridge: Remove redundant npinfo NULL setting

Now that netpoll always zaps npinfo we no longer need to do it
in bridge.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/bridge/br_device.c | 1 -
 1 file changed, 1 deletion(-)

(limited to 'net')

diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c
index b898364beaf5..f8cb908db81f 100644
--- a/net/bridge/br_device.c
+++ b/net/bridge/br_device.c
@@ -231,7 +231,6 @@ void br_netpoll_cleanup(struct net_device *dev)
 	struct net_bridge_port *p, *n;
 	const struct net_device_ops *ops;
 
-	br->dev->npinfo = NULL;
 	list_for_each_entry_safe(p, n, &br->port_list, list) {
 		if (p->dev) {
 			ops = p->dev->netdev_ops;
-- 
cgit v1.2.2


From de85d99eb7b595f6751550184b94c1e2f74a828b Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Thu, 10 Jun 2010 16:12:44 +0000
Subject: netpoll: Fix RCU usage

The use of RCU in netpoll is incorrect in a number of places:

1) The initial setting is lacking a write barrier.
2) The synchronize_rcu is in the wrong place.
3) Read barriers are missing.
4) Some places are even missing rcu_read_lock.
5) npinfo is zeroed after freeing.

This patch fixes those issues.  As most users are in BH context,
this also converts the RCU usage to the BH variant.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Acked-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/netpoll.c | 20 ++++++++++++--------
 1 file changed, 12 insertions(+), 8 deletions(-)

(limited to 'net')

diff --git a/net/core/netpoll.c b/net/core/netpoll.c
index 19ff66079f76..e9ab4f0c454c 100644
--- a/net/core/netpoll.c
+++ b/net/core/netpoll.c
@@ -261,6 +261,7 @@ void netpoll_send_skb(struct netpoll *np, struct sk_buff *skb)
 	unsigned long tries;
 	struct net_device *dev = np->dev;
 	const struct net_device_ops *ops = dev->netdev_ops;
+	/* It is up to the caller to keep npinfo alive. */
 	struct netpoll_info *npinfo = np->dev->npinfo;
 
 	if (!npinfo || !netif_running(dev) || !netif_device_present(dev)) {
@@ -810,10 +811,7 @@ int netpoll_setup(struct netpoll *np)
 	refill_skbs();
 
 	/* last thing to do is link it to the net device structure */
-	ndev->npinfo = npinfo;
-
-	/* avoid racing with NAPI reading npinfo */
-	synchronize_rcu();
+	rcu_assign_pointer(ndev->npinfo, npinfo);
 
 	return 0;
 
@@ -857,6 +855,16 @@ void netpoll_cleanup(struct netpoll *np)
 
 			if (atomic_dec_and_test(&npinfo->refcnt)) {
 				const struct net_device_ops *ops;
+
+				ops = np->dev->netdev_ops;
+				if (ops->ndo_netpoll_cleanup)
+					ops->ndo_netpoll_cleanup(np->dev);
+
+				rcu_assign_pointer(np->dev->npinfo, NULL);
+
+				/* avoid racing with NAPI reading npinfo */
+				synchronize_rcu_bh();
+
 				skb_queue_purge(&npinfo->arp_tx);
 				skb_queue_purge(&npinfo->txq);
 				cancel_rearming_delayed_work(&npinfo->tx_work);
@@ -864,10 +872,6 @@ void netpoll_cleanup(struct netpoll *np)
 				/* clean after last, unfinished work */
 				__skb_queue_purge(&npinfo->txq);
 				kfree(npinfo);
-				ops = np->dev->netdev_ops;
-				if (ops->ndo_netpoll_cleanup)
-					ops->ndo_netpoll_cleanup(np->dev);
-				np->dev->npinfo = NULL;
 			}
 		}
 
-- 
cgit v1.2.2


From dbaa154178341689faaa08fbf40b94ae5ca1d6c0 Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Thu, 10 Jun 2010 16:12:46 +0000
Subject: netpoll: Add locking for netpoll_setup/cleanup

As it stands, netpoll_setup and netpoll_cleanup have no locking
protection whatsoever.  So chaos ensures if two entities try to
perform them on the same device.

This patch adds RTNL to the equation.  The code has been rearranged so
that bits that do not need RTNL protection are now moved to the top of
netpoll_setup.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/netpoll.c | 151 +++++++++++++++++++++++++++--------------------------
 1 file changed, 76 insertions(+), 75 deletions(-)

(limited to 'net')

diff --git a/net/core/netpoll.c b/net/core/netpoll.c
index e9ab4f0c454c..d10c249bcc8f 100644
--- a/net/core/netpoll.c
+++ b/net/core/netpoll.c
@@ -698,7 +698,6 @@ int netpoll_setup(struct netpoll *np)
 	struct net_device *ndev = NULL;
 	struct in_device *in_dev;
 	struct netpoll_info *npinfo;
-	struct netpoll *npe, *tmp;
 	unsigned long flags;
 	int err;
 
@@ -710,38 +709,6 @@ int netpoll_setup(struct netpoll *np)
 		return -ENODEV;
 	}
 
-	np->dev = ndev;
-	if (!ndev->npinfo) {
-		npinfo = kmalloc(sizeof(*npinfo), GFP_KERNEL);
-		if (!npinfo) {
-			err = -ENOMEM;
-			goto put;
-		}
-
-		npinfo->rx_flags = 0;
-		INIT_LIST_HEAD(&npinfo->rx_np);
-
-		spin_lock_init(&npinfo->rx_lock);
-		skb_queue_head_init(&npinfo->arp_tx);
-		skb_queue_head_init(&npinfo->txq);
-		INIT_DELAYED_WORK(&npinfo->tx_work, queue_process);
-
-		atomic_set(&npinfo->refcnt, 1);
-	} else {
-		npinfo = ndev->npinfo;
-		atomic_inc(&npinfo->refcnt);
-	}
-
-	npinfo->netpoll = np;
-
-	if ((ndev->priv_flags & IFF_DISABLE_NETPOLL) ||
-	    !ndev->netdev_ops->ndo_poll_controller) {
-		printk(KERN_ERR "%s: %s doesn't support polling, aborting.\n",
-		       np->name, np->dev_name);
-		err = -ENOTSUPP;
-		goto release;
-	}
-
 	if (!netif_running(ndev)) {
 		unsigned long atmost, atleast;
 
@@ -755,7 +722,7 @@ int netpoll_setup(struct netpoll *np)
 		if (err) {
 			printk(KERN_ERR "%s: failed to open %s\n",
 			       np->name, ndev->name);
-			goto release;
+			goto put;
 		}
 
 		atleast = jiffies + HZ/10;
@@ -792,7 +759,7 @@ int netpoll_setup(struct netpoll *np)
 			printk(KERN_ERR "%s: no IP address for %s, aborting\n",
 			       np->name, np->dev_name);
 			err = -EDESTADDRREQ;
-			goto release;
+			goto put;
 		}
 
 		np->local_ip = in_dev->ifa_list->ifa_local;
@@ -800,6 +767,43 @@ int netpoll_setup(struct netpoll *np)
 		printk(KERN_INFO "%s: local IP %pI4\n", np->name, &np->local_ip);
 	}
 
+	np->dev = ndev;
+
+	/* fill up the skb queue */
+	refill_skbs();
+
+	rtnl_lock();
+	if ((ndev->priv_flags & IFF_DISABLE_NETPOLL) ||
+	    !ndev->netdev_ops->ndo_poll_controller) {
+		printk(KERN_ERR "%s: %s doesn't support polling, aborting.\n",
+		       np->name, np->dev_name);
+		err = -ENOTSUPP;
+		goto unlock;
+	}
+
+	if (!ndev->npinfo) {
+		npinfo = kmalloc(sizeof(*npinfo), GFP_KERNEL);
+		if (!npinfo) {
+			err = -ENOMEM;
+			goto unlock;
+		}
+
+		npinfo->rx_flags = 0;
+		INIT_LIST_HEAD(&npinfo->rx_np);
+
+		spin_lock_init(&npinfo->rx_lock);
+		skb_queue_head_init(&npinfo->arp_tx);
+		skb_queue_head_init(&npinfo->txq);
+		INIT_DELAYED_WORK(&npinfo->tx_work, queue_process);
+
+		atomic_set(&npinfo->refcnt, 1);
+	} else {
+		npinfo = ndev->npinfo;
+		atomic_inc(&npinfo->refcnt);
+	}
+
+	npinfo->netpoll = np;
+
 	if (np->rx_hook) {
 		spin_lock_irqsave(&npinfo->rx_lock, flags);
 		npinfo->rx_flags |= NETPOLL_RX_ENABLED;
@@ -807,24 +811,14 @@ int netpoll_setup(struct netpoll *np)
 		spin_unlock_irqrestore(&npinfo->rx_lock, flags);
 	}
 
-	/* fill up the skb queue */
-	refill_skbs();
-
 	/* last thing to do is link it to the net device structure */
 	rcu_assign_pointer(ndev->npinfo, npinfo);
+	rtnl_unlock();
 
 	return 0;
 
- release:
-	if (!ndev->npinfo) {
-		spin_lock_irqsave(&npinfo->rx_lock, flags);
-		list_for_each_entry_safe(npe, tmp, &npinfo->rx_np, rx) {
-			npe->dev = NULL;
-		}
-		spin_unlock_irqrestore(&npinfo->rx_lock, flags);
-
-		kfree(npinfo);
-	}
+unlock:
+	rtnl_unlock();
 put:
 	dev_put(ndev);
 	return err;
@@ -841,43 +835,50 @@ void netpoll_cleanup(struct netpoll *np)
 {
 	struct netpoll_info *npinfo;
 	unsigned long flags;
+	int free = 0;
 
-	if (np->dev) {
-		npinfo = np->dev->npinfo;
-		if (npinfo) {
-			if (!list_empty(&npinfo->rx_np)) {
-				spin_lock_irqsave(&npinfo->rx_lock, flags);
-				list_del(&np->rx);
-				if (list_empty(&npinfo->rx_np))
-					npinfo->rx_flags &= ~NETPOLL_RX_ENABLED;
-				spin_unlock_irqrestore(&npinfo->rx_lock, flags);
-			}
+	if (!np->dev)
+		return;
 
-			if (atomic_dec_and_test(&npinfo->refcnt)) {
-				const struct net_device_ops *ops;
+	rtnl_lock();
+	npinfo = np->dev->npinfo;
+	if (npinfo) {
+		if (!list_empty(&npinfo->rx_np)) {
+			spin_lock_irqsave(&npinfo->rx_lock, flags);
+			list_del(&np->rx);
+			if (list_empty(&npinfo->rx_np))
+				npinfo->rx_flags &= ~NETPOLL_RX_ENABLED;
+			spin_unlock_irqrestore(&npinfo->rx_lock, flags);
+		}
 
-				ops = np->dev->netdev_ops;
-				if (ops->ndo_netpoll_cleanup)
-					ops->ndo_netpoll_cleanup(np->dev);
+		free = atomic_dec_and_test(&npinfo->refcnt);
+		if (free) {
+			const struct net_device_ops *ops;
 
-				rcu_assign_pointer(np->dev->npinfo, NULL);
+			ops = np->dev->netdev_ops;
+			if (ops->ndo_netpoll_cleanup)
+				ops->ndo_netpoll_cleanup(np->dev);
 
-				/* avoid racing with NAPI reading npinfo */
-				synchronize_rcu_bh();
+			rcu_assign_pointer(np->dev->npinfo, NULL);
+		}
+	}
+	rtnl_unlock();
 
-				skb_queue_purge(&npinfo->arp_tx);
-				skb_queue_purge(&npinfo->txq);
-				cancel_rearming_delayed_work(&npinfo->tx_work);
+	if (free) {
+		/* avoid racing with NAPI reading npinfo */
+		synchronize_rcu_bh();
 
-				/* clean after last, unfinished work */
-				__skb_queue_purge(&npinfo->txq);
-				kfree(npinfo);
-			}
-		}
+		skb_queue_purge(&npinfo->arp_tx);
+		skb_queue_purge(&npinfo->txq);
+		cancel_rearming_delayed_work(&npinfo->tx_work);
 
-		dev_put(np->dev);
+		/* clean after last, unfinished work */
+		__skb_queue_purge(&npinfo->txq);
+		kfree(npinfo);
 	}
 
+	dev_put(np->dev);
+
 	np->dev = NULL;
 }
 
-- 
cgit v1.2.2


From 4247e161b12f8dffb7ee3ee07bc5e61f714ebe2d Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Thu, 10 Jun 2010 16:12:47 +0000
Subject: netpoll: Add ndo_netpoll_setup

This patch adds ndo_netpoll_setup as the initialisation primitive
to complement ndo_netpoll_cleanup.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/netpoll.c | 10 ++++++++++
 1 file changed, 10 insertions(+)

(limited to 'net')

diff --git a/net/core/netpoll.c b/net/core/netpoll.c
index d10c249bcc8f..7de6dcad5d79 100644
--- a/net/core/netpoll.c
+++ b/net/core/netpoll.c
@@ -698,6 +698,7 @@ int netpoll_setup(struct netpoll *np)
 	struct net_device *ndev = NULL;
 	struct in_device *in_dev;
 	struct netpoll_info *npinfo;
+	const struct net_device_ops *ops;
 	unsigned long flags;
 	int err;
 
@@ -797,6 +798,13 @@ int netpoll_setup(struct netpoll *np)
 		INIT_DELAYED_WORK(&npinfo->tx_work, queue_process);
 
 		atomic_set(&npinfo->refcnt, 1);
+
+		ops = np->dev->netdev_ops;
+		if (ops->ndo_netpoll_setup) {
+			err = ops->ndo_netpoll_setup(ndev, npinfo);
+			if (err)
+				goto free_npinfo;
+		}
 	} else {
 		npinfo = ndev->npinfo;
 		atomic_inc(&npinfo->refcnt);
@@ -817,6 +825,8 @@ int netpoll_setup(struct netpoll *np)
 
 	return 0;
 
+free_npinfo:
+	kfree(npinfo);
 unlock:
 	rtnl_unlock();
 put:
-- 
cgit v1.2.2


From 8fdd95ec162a8fbac7f41d6f54f90402fe3e8cb1 Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Thu, 10 Jun 2010 16:12:48 +0000
Subject: netpoll: Allow netpoll_setup/cleanup recursion

This patch adds the functions __netpoll_setup/__netpoll_cleanup
which is designed to be called recursively through ndo_netpoll_seutp.

They must be called with RTNL held, and the caller must initialise
np->dev and ensure that it has a valid reference count.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/netpoll.c | 176 +++++++++++++++++++++++++++++------------------------
 1 file changed, 97 insertions(+), 79 deletions(-)

(limited to 'net')

diff --git a/net/core/netpoll.c b/net/core/netpoll.c
index 7de6dcad5d79..560297ee55b4 100644
--- a/net/core/netpoll.c
+++ b/net/core/netpoll.c
@@ -693,15 +693,78 @@ int netpoll_parse_options(struct netpoll *np, char *opt)
 	return -1;
 }
 
-int netpoll_setup(struct netpoll *np)
+int __netpoll_setup(struct netpoll *np)
 {
-	struct net_device *ndev = NULL;
-	struct in_device *in_dev;
+	struct net_device *ndev = np->dev;
 	struct netpoll_info *npinfo;
 	const struct net_device_ops *ops;
 	unsigned long flags;
 	int err;
 
+	if ((ndev->priv_flags & IFF_DISABLE_NETPOLL) ||
+	    !ndev->netdev_ops->ndo_poll_controller) {
+		printk(KERN_ERR "%s: %s doesn't support polling, aborting.\n",
+		       np->name, np->dev_name);
+		err = -ENOTSUPP;
+		goto out;
+	}
+
+	if (!ndev->npinfo) {
+		npinfo = kmalloc(sizeof(*npinfo), GFP_KERNEL);
+		if (!npinfo) {
+			err = -ENOMEM;
+			goto out;
+		}
+
+		npinfo->rx_flags = 0;
+		INIT_LIST_HEAD(&npinfo->rx_np);
+
+		spin_lock_init(&npinfo->rx_lock);
+		skb_queue_head_init(&npinfo->arp_tx);
+		skb_queue_head_init(&npinfo->txq);
+		INIT_DELAYED_WORK(&npinfo->tx_work, queue_process);
+
+		atomic_set(&npinfo->refcnt, 1);
+
+		ops = np->dev->netdev_ops;
+		if (ops->ndo_netpoll_setup) {
+			err = ops->ndo_netpoll_setup(ndev, npinfo);
+			if (err)
+				goto free_npinfo;
+		}
+	} else {
+		npinfo = ndev->npinfo;
+		atomic_inc(&npinfo->refcnt);
+	}
+
+	npinfo->netpoll = np;
+
+	if (np->rx_hook) {
+		spin_lock_irqsave(&npinfo->rx_lock, flags);
+		npinfo->rx_flags |= NETPOLL_RX_ENABLED;
+		list_add_tail(&np->rx, &npinfo->rx_np);
+		spin_unlock_irqrestore(&npinfo->rx_lock, flags);
+	}
+
+	/* last thing to do is link it to the net device structure */
+	rcu_assign_pointer(ndev->npinfo, npinfo);
+	rtnl_unlock();
+
+	return 0;
+
+free_npinfo:
+	kfree(npinfo);
+out:
+	return err;
+}
+EXPORT_SYMBOL_GPL(__netpoll_setup);
+
+int netpoll_setup(struct netpoll *np)
+{
+	struct net_device *ndev = NULL;
+	struct in_device *in_dev;
+	int err;
+
 	if (np->dev_name)
 		ndev = dev_get_by_name(&init_net, np->dev_name);
 	if (!ndev) {
@@ -774,61 +837,14 @@ int netpoll_setup(struct netpoll *np)
 	refill_skbs();
 
 	rtnl_lock();
-	if ((ndev->priv_flags & IFF_DISABLE_NETPOLL) ||
-	    !ndev->netdev_ops->ndo_poll_controller) {
-		printk(KERN_ERR "%s: %s doesn't support polling, aborting.\n",
-		       np->name, np->dev_name);
-		err = -ENOTSUPP;
-		goto unlock;
-	}
-
-	if (!ndev->npinfo) {
-		npinfo = kmalloc(sizeof(*npinfo), GFP_KERNEL);
-		if (!npinfo) {
-			err = -ENOMEM;
-			goto unlock;
-		}
-
-		npinfo->rx_flags = 0;
-		INIT_LIST_HEAD(&npinfo->rx_np);
-
-		spin_lock_init(&npinfo->rx_lock);
-		skb_queue_head_init(&npinfo->arp_tx);
-		skb_queue_head_init(&npinfo->txq);
-		INIT_DELAYED_WORK(&npinfo->tx_work, queue_process);
-
-		atomic_set(&npinfo->refcnt, 1);
-
-		ops = np->dev->netdev_ops;
-		if (ops->ndo_netpoll_setup) {
-			err = ops->ndo_netpoll_setup(ndev, npinfo);
-			if (err)
-				goto free_npinfo;
-		}
-	} else {
-		npinfo = ndev->npinfo;
-		atomic_inc(&npinfo->refcnt);
-	}
-
-	npinfo->netpoll = np;
-
-	if (np->rx_hook) {
-		spin_lock_irqsave(&npinfo->rx_lock, flags);
-		npinfo->rx_flags |= NETPOLL_RX_ENABLED;
-		list_add_tail(&np->rx, &npinfo->rx_np);
-		spin_unlock_irqrestore(&npinfo->rx_lock, flags);
-	}
-
-	/* last thing to do is link it to the net device structure */
-	rcu_assign_pointer(ndev->npinfo, npinfo);
+	err = __netpoll_setup(np);
 	rtnl_unlock();
 
+	if (err)
+		goto put;
+
 	return 0;
 
-free_npinfo:
-	kfree(npinfo);
-unlock:
-	rtnl_unlock();
 put:
 	dev_put(ndev);
 	return err;
@@ -841,40 +857,32 @@ static int __init netpoll_init(void)
 }
 core_initcall(netpoll_init);
 
-void netpoll_cleanup(struct netpoll *np)
+void __netpoll_cleanup(struct netpoll *np)
 {
 	struct netpoll_info *npinfo;
 	unsigned long flags;
-	int free = 0;
 
-	if (!np->dev)
+	npinfo = np->dev->npinfo;
+	if (!npinfo)
 		return;
 
-	rtnl_lock();
-	npinfo = np->dev->npinfo;
-	if (npinfo) {
-		if (!list_empty(&npinfo->rx_np)) {
-			spin_lock_irqsave(&npinfo->rx_lock, flags);
-			list_del(&np->rx);
-			if (list_empty(&npinfo->rx_np))
-				npinfo->rx_flags &= ~NETPOLL_RX_ENABLED;
-			spin_unlock_irqrestore(&npinfo->rx_lock, flags);
-		}
+	if (!list_empty(&npinfo->rx_np)) {
+		spin_lock_irqsave(&npinfo->rx_lock, flags);
+		list_del(&np->rx);
+		if (list_empty(&npinfo->rx_np))
+			npinfo->rx_flags &= ~NETPOLL_RX_ENABLED;
+		spin_unlock_irqrestore(&npinfo->rx_lock, flags);
+	}
 
-		free = atomic_dec_and_test(&npinfo->refcnt);
-		if (free) {
-			const struct net_device_ops *ops;
+	if (atomic_dec_and_test(&npinfo->refcnt)) {
+		const struct net_device_ops *ops;
 
-			ops = np->dev->netdev_ops;
-			if (ops->ndo_netpoll_cleanup)
-				ops->ndo_netpoll_cleanup(np->dev);
+		ops = np->dev->netdev_ops;
+		if (ops->ndo_netpoll_cleanup)
+			ops->ndo_netpoll_cleanup(np->dev);
 
-			rcu_assign_pointer(np->dev->npinfo, NULL);
-		}
-	}
-	rtnl_unlock();
+		rcu_assign_pointer(np->dev->npinfo, NULL);
 
-	if (free) {
 		/* avoid racing with NAPI reading npinfo */
 		synchronize_rcu_bh();
 
@@ -886,9 +894,19 @@ void netpoll_cleanup(struct netpoll *np)
 		__skb_queue_purge(&npinfo->txq);
 		kfree(npinfo);
 	}
+}
+EXPORT_SYMBOL_GPL(__netpoll_cleanup);
 
-	dev_put(np->dev);
+void netpoll_cleanup(struct netpoll *np)
+{
+	if (!np->dev)
+		return;
 
+	rtnl_lock();
+	__netpoll_cleanup(np);
+	rtnl_unlock();
+
+	dev_put(np->dev);
 	np->dev = NULL;
 }
 
-- 
cgit v1.2.2


From 91d2c34a4eed32876ca333b0ca44f3bc56645805 Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Thu, 10 Jun 2010 16:12:50 +0000
Subject: bridge: Fix netpoll support

There are multiple problems with the newly added netpoll support:

1) Use-after-free on each netpoll packet.
2) Invoking unsafe code on netpoll/IRQ path.
3) Breaks when netpoll is enabled on the underlying device.

This patch fixes all of these problems.  In particular, we now
allocate proper netpoll structures for each underlying device.

We only allow netpoll to be enabled on the bridge when all the
devices underneath it support netpoll.  Once it is enabled, we
do not allow non-netpoll devices to join the bridge (until netpoll
is disabled again).

This allows us to do away with the npinfo juggling that caused
problem number 1.

Incidentally this patch fixes number 2 by bypassing unsafe code
such as multicast snooping and netfilter.

Reported-by: Qianfeng Zhang <frzhang@redhat.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/bridge/br_device.c  | 108 +++++++++++++++++++++++++++---------------------
 net/bridge/br_forward.c |  34 ++++++---------
 net/bridge/br_if.c      |  16 +++----
 net/bridge/br_private.h |  46 +++++++++++++++++----
 4 files changed, 120 insertions(+), 84 deletions(-)

(limited to 'net')

diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c
index f8cb908db81f..6f3a9279be30 100644
--- a/net/bridge/br_device.c
+++ b/net/bridge/br_device.c
@@ -47,6 +47,10 @@ netdev_tx_t br_dev_xmit(struct sk_buff *skb, struct net_device *dev)
 	skb_pull(skb, ETH_HLEN);
 
 	if (is_multicast_ether_addr(dest)) {
+		if (unlikely(netpoll_tx_running(dev))) {
+			br_flood_deliver(br, skb);
+			goto out;
+		}
 		if (br_multicast_rcv(br, NULL, skb))
 			goto out;
 
@@ -199,72 +203,81 @@ static int br_set_tx_csum(struct net_device *dev, u32 data)
 }
 
 #ifdef CONFIG_NET_POLL_CONTROLLER
-static bool br_devices_support_netpoll(struct net_bridge *br)
+static void br_poll_controller(struct net_device *br_dev)
 {
-	struct net_bridge_port *p;
-	bool ret = true;
-	int count = 0;
-	unsigned long flags;
-
-	spin_lock_irqsave(&br->lock, flags);
-	list_for_each_entry(p, &br->port_list, list) {
-		count++;
-		if ((p->dev->priv_flags & IFF_DISABLE_NETPOLL) ||
-		    !p->dev->netdev_ops->ndo_poll_controller)
-			ret = false;
-	}
-	spin_unlock_irqrestore(&br->lock, flags);
-	return count != 0 && ret;
 }
 
-static void br_poll_controller(struct net_device *br_dev)
+static void br_netpoll_cleanup(struct net_device *dev)
 {
-	struct netpoll *np = br_dev->npinfo->netpoll;
+	struct net_bridge *br = netdev_priv(dev);
+	struct net_bridge_port *p, *n;
 
-	if (np->real_dev != br_dev)
-		netpoll_poll_dev(np->real_dev);
+	list_for_each_entry_safe(p, n, &br->port_list, list) {
+		br_netpoll_disable(p);
+	}
 }
 
-void br_netpoll_cleanup(struct net_device *dev)
+static int br_netpoll_setup(struct net_device *dev, struct netpoll_info *ni)
 {
 	struct net_bridge *br = netdev_priv(dev);
 	struct net_bridge_port *p, *n;
-	const struct net_device_ops *ops;
+	int err = 0;
 
 	list_for_each_entry_safe(p, n, &br->port_list, list) {
-		if (p->dev) {
-			ops = p->dev->netdev_ops;
-			if (ops->ndo_netpoll_cleanup)
-				ops->ndo_netpoll_cleanup(p->dev);
-			else
-				p->dev->npinfo = NULL;
-		}
+		if (!p->dev)
+			continue;
+
+		err = br_netpoll_enable(p);
+		if (err)
+			goto fail;
 	}
+
+out:
+	return err;
+
+fail:
+	br_netpoll_cleanup(dev);
+	goto out;
 }
 
-void br_netpoll_disable(struct net_bridge *br,
-			struct net_device *dev)
+int br_netpoll_enable(struct net_bridge_port *p)
 {
-	if (br_devices_support_netpoll(br))
-		br->dev->priv_flags &= ~IFF_DISABLE_NETPOLL;
-	if (dev->netdev_ops->ndo_netpoll_cleanup)
-		dev->netdev_ops->ndo_netpoll_cleanup(dev);
-	else
-		dev->npinfo = NULL;
+	struct netpoll *np;
+	int err = 0;
+
+	np = kzalloc(sizeof(*p->np), GFP_KERNEL);
+	err = -ENOMEM;
+	if (!np)
+		goto out;
+
+	np->dev = p->dev;
+
+	err = __netpoll_setup(np);
+	if (err) {
+		kfree(np);
+		goto out;
+	}
+
+	p->np = np;
+
+out:
+	return err;
 }
 
-void br_netpoll_enable(struct net_bridge *br,
-		       struct net_device *dev)
+void br_netpoll_disable(struct net_bridge_port *p)
 {
-	if (br_devices_support_netpoll(br)) {
-		br->dev->priv_flags &= ~IFF_DISABLE_NETPOLL;
-		if (br->dev->npinfo)
-			dev->npinfo = br->dev->npinfo;
-	} else if (!(br->dev->priv_flags & IFF_DISABLE_NETPOLL)) {
-		br->dev->priv_flags |= IFF_DISABLE_NETPOLL;
-		br_info(br,"new device %s does not support netpoll (disabling)",
-			dev->name);
-	}
+	struct netpoll *np = p->np;
+
+	if (!np)
+		return;
+
+	p->np = NULL;
+
+	/* Wait for transmitting packets to finish before freeing. */
+	synchronize_rcu_bh();
+
+	__netpoll_cleanup(np);
+	kfree(np);
 }
 
 #endif
@@ -293,6 +306,7 @@ static const struct net_device_ops br_netdev_ops = {
 	.ndo_change_mtu		 = br_change_mtu,
 	.ndo_do_ioctl		 = br_dev_ioctl,
 #ifdef CONFIG_NET_POLL_CONTROLLER
+	.ndo_netpoll_setup	 = br_netpoll_setup,
 	.ndo_netpoll_cleanup	 = br_netpoll_cleanup,
 	.ndo_poll_controller	 = br_poll_controller,
 #endif
diff --git a/net/bridge/br_forward.c b/net/bridge/br_forward.c
index a98ef1393097..6e97711fd2c5 100644
--- a/net/bridge/br_forward.c
+++ b/net/bridge/br_forward.c
@@ -50,14 +50,7 @@ int br_dev_queue_push_xmit(struct sk_buff *skb)
 			kfree_skb(skb);
 		else {
 			skb_push(skb, ETH_HLEN);
-
-#ifdef CONFIG_NET_POLL_CONTROLLER
-			if (unlikely(skb->dev->priv_flags & IFF_IN_NETPOLL)) {
-				netpoll_send_skb(skb->dev->npinfo->netpoll, skb);
-				skb->dev->priv_flags &= ~IFF_IN_NETPOLL;
-			} else
-#endif
-				dev_queue_xmit(skb);
+			dev_queue_xmit(skb);
 		}
 	}
 
@@ -73,23 +66,20 @@ int br_forward_finish(struct sk_buff *skb)
 
 static void __br_deliver(const struct net_bridge_port *to, struct sk_buff *skb)
 {
-#ifdef CONFIG_NET_POLL_CONTROLLER
-	struct net_bridge *br = to->br;
-	if (unlikely(br->dev->priv_flags & IFF_IN_NETPOLL)) {
-		struct netpoll *np;
-		to->dev->npinfo = skb->dev->npinfo;
-		np = skb->dev->npinfo->netpoll;
-		np->real_dev = np->dev = to->dev;
-		to->dev->priv_flags |= IFF_IN_NETPOLL;
-	}
-#endif
 	skb->dev = to->dev;
+
+	if (unlikely(netpoll_tx_running(to->dev))) {
+		if (packet_length(skb) > skb->dev->mtu && !skb_is_gso(skb))
+			kfree_skb(skb);
+		else {
+			skb_push(skb, ETH_HLEN);
+			br_netpoll_send_skb(to, skb);
+		}
+		return;
+	}
+
 	NF_HOOK(NFPROTO_BRIDGE, NF_BR_LOCAL_OUT, skb, NULL, skb->dev,
 		br_forward_finish);
-#ifdef CONFIG_NET_POLL_CONTROLLER
-	if (skb->dev->npinfo)
-		skb->dev->npinfo->netpoll->dev = br->dev;
-#endif
 }
 
 static void __br_forward(const struct net_bridge_port *to, struct sk_buff *skb)
diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c
index d9242342837e..97ac9da4d76c 100644
--- a/net/bridge/br_if.c
+++ b/net/bridge/br_if.c
@@ -155,7 +155,8 @@ static void del_nbp(struct net_bridge_port *p)
 	kobject_uevent(&p->kobj, KOBJ_REMOVE);
 	kobject_del(&p->kobj);
 
-	br_netpoll_disable(br, dev);
+	br_netpoll_disable(p);
+
 	call_rcu(&p->rcu, destroy_nbp_rcu);
 }
 
@@ -168,8 +169,6 @@ static void del_br(struct net_bridge *br, struct list_head *head)
 		del_nbp(p);
 	}
 
-	br_netpoll_cleanup(br->dev);
-
 	del_timer_sync(&br->gc_timer);
 
 	br_sysfs_delbr(br->dev);
@@ -429,11 +428,14 @@ int br_add_if(struct net_bridge *br, struct net_device *dev)
 	if (err)
 		goto err2;
 
+	if (br_netpoll_info(br) && ((err = br_netpoll_enable(p))))
+		goto err3;
+
 	rcu_assign_pointer(dev->br_port, p);
 
 	err = netdev_rx_handler_register(dev, br_handle_frame);
 	if (err)
-		goto err3;
+		goto err4;
 
 	dev_disable_lro(dev);
 
@@ -454,11 +456,11 @@ int br_add_if(struct net_bridge *br, struct net_device *dev)
 
 	kobject_uevent(&p->kobj, KOBJ_ADD);
 
-	br_netpoll_enable(br, dev);
-
 	return 0;
-err3:
+err4:
 	rcu_assign_pointer(dev->br_port, NULL);
+err3:
+	sysfs_remove_link(br->ifobj, p->dev->name);
 err2:
 	br_fdb_delete_by_port(br, p, 1);
 err1:
diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h
index c83519b555bb..0f5394c4f2f1 100644
--- a/net/bridge/br_private.h
+++ b/net/bridge/br_private.h
@@ -15,6 +15,7 @@
 
 #include <linux/netdevice.h>
 #include <linux/if_bridge.h>
+#include <linux/netpoll.h>
 #include <net/route.h>
 
 #define BR_HASH_BITS 8
@@ -143,6 +144,10 @@ struct net_bridge_port
 #ifdef CONFIG_SYSFS
 	char				sysfs_name[IFNAMSIZ];
 #endif
+
+#ifdef CONFIG_NET_POLL_CONTROLLER
+	struct netpoll			*np;
+#endif
 };
 
 struct br_cpu_netstats {
@@ -273,16 +278,41 @@ extern void br_dev_setup(struct net_device *dev);
 extern netdev_tx_t br_dev_xmit(struct sk_buff *skb,
 			       struct net_device *dev);
 #ifdef CONFIG_NET_POLL_CONTROLLER
-extern void br_netpoll_cleanup(struct net_device *dev);
-extern void br_netpoll_enable(struct net_bridge *br,
-			      struct net_device *dev);
-extern void br_netpoll_disable(struct net_bridge *br,
-			       struct net_device *dev);
+static inline struct netpoll_info *br_netpoll_info(struct net_bridge *br)
+{
+	return br->dev->npinfo;
+}
+
+static inline void br_netpoll_send_skb(const struct net_bridge_port *p,
+				       struct sk_buff *skb)
+{
+	struct netpoll *np = p->np;
+
+	if (np)
+		netpoll_send_skb(np, skb);
+}
+
+extern int br_netpoll_enable(struct net_bridge_port *p);
+extern void br_netpoll_disable(struct net_bridge_port *p);
 #else
-#define br_netpoll_cleanup(br)
-#define br_netpoll_enable(br, dev)
-#define br_netpoll_disable(br, dev)
+static inline struct netpoll_info *br_netpoll_info(struct net_bridge *br)
+{
+	return NULL;
+}
+
+static inline void br_netpoll_send_skb(struct net_bridge_port *p,
+				       struct sk_buff *skb)
+{
+}
 
+static inline int br_netpoll_enable(struct net_bridge_port *p)
+{
+	return 0;
+}
+
+static inline void br_netpoll_disable(struct net_bridge_port *p)
+{
+}
 #endif
 
 /* br_fdb.c */
-- 
cgit v1.2.2


From 93e2c32b5cb2ad92ceb1d7a4684f20a0d25bf530 Mon Sep 17 00:00:00 2001
From: Jiri Pirko <jpirko@redhat.com>
Date: Thu, 10 Jun 2010 03:34:59 +0000
Subject: net: add rx_handler data pointer

Add possibility to register rx_handler data pointer along with a rx_handler.

Signed-off-by: Jiri Pirko <jpirko@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/bridge/br_if.c | 2 +-
 net/core/dev.c     | 6 +++++-
 2 files changed, 6 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c
index 97ac9da4d76c..0d142ed0bbe3 100644
--- a/net/bridge/br_if.c
+++ b/net/bridge/br_if.c
@@ -433,7 +433,7 @@ int br_add_if(struct net_bridge *br, struct net_device *dev)
 
 	rcu_assign_pointer(dev->br_port, p);
 
-	err = netdev_rx_handler_register(dev, br_handle_frame);
+	err = netdev_rx_handler_register(dev, br_handle_frame, NULL);
 	if (err)
 		goto err4;
 
diff --git a/net/core/dev.c b/net/core/dev.c
index a1abc10db08a..abdb19e547a7 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -2703,6 +2703,7 @@ void netif_nit_deliver(struct sk_buff *skb)
  *	netdev_rx_handler_register - register receive handler
  *	@dev: device to register a handler for
  *	@rx_handler: receive handler to register
+ *	@rx_handler_data: data pointer that is used by rx handler
  *
  *	Register a receive hander for a device. This handler will then be
  *	called from __netif_receive_skb. A negative errno code is returned
@@ -2711,13 +2712,15 @@ void netif_nit_deliver(struct sk_buff *skb)
  *	The caller must hold the rtnl_mutex.
  */
 int netdev_rx_handler_register(struct net_device *dev,
-			       rx_handler_func_t *rx_handler)
+			       rx_handler_func_t *rx_handler,
+			       void *rx_handler_data)
 {
 	ASSERT_RTNL();
 
 	if (dev->rx_handler)
 		return -EBUSY;
 
+	rcu_assign_pointer(dev->rx_handler_data, rx_handler_data);
 	rcu_assign_pointer(dev->rx_handler, rx_handler);
 
 	return 0;
@@ -2737,6 +2740,7 @@ void netdev_rx_handler_unregister(struct net_device *dev)
 
 	ASSERT_RTNL();
 	rcu_assign_pointer(dev->rx_handler, NULL);
+	rcu_assign_pointer(dev->rx_handler_data, NULL);
 }
 EXPORT_SYMBOL_GPL(netdev_rx_handler_unregister);
 
-- 
cgit v1.2.2


From f350a0a87374418635689471606454abc7beaa3a Mon Sep 17 00:00:00 2001
From: Jiri Pirko <jpirko@redhat.com>
Date: Tue, 15 Jun 2010 06:50:45 +0000
Subject: bridge: use rx_handler_data pointer to store net_bridge_port pointer

Register net_bridge_port pointer as rx_handler data pointer. As br_port is
removed from struct net_device, another netdev priv_flag is added to indicate
the device serves as a bridge port. Also rcuized pointers are now correctly
dereferenced in br_fdb.c and in netfilter parts.

Signed-off-by: Jiri Pirko <jpirko@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/bridge/br_fdb.c                 |  4 ++--
 net/bridge/br_if.c                  | 23 +++++++++++++----------
 net/bridge/br_input.c               |  9 ++++-----
 net/bridge/br_netfilter.c           | 11 ++++++-----
 net/bridge/br_netlink.c             |  9 +++++----
 net/bridge/br_notify.c              |  5 +++--
 net/bridge/br_private.h             |  5 +++++
 net/bridge/br_stp_bpdu.c            |  5 +++--
 net/bridge/netfilter/ebt_redirect.c |  3 ++-
 net/bridge/netfilter/ebt_ulog.c     |  8 +++++---
 net/bridge/netfilter/ebtables.c     | 11 +++++++----
 net/core/dev.c                      |  3 ++-
 net/netfilter/nfnetlink_log.c       |  6 ++++--
 net/netfilter/nfnetlink_queue.c     |  6 ++++--
 net/wireless/nl80211.c              |  2 +-
 net/wireless/util.c                 |  4 ++--
 16 files changed, 68 insertions(+), 46 deletions(-)

(limited to 'net')

diff --git a/net/bridge/br_fdb.c b/net/bridge/br_fdb.c
index 26637439965b..6818e609b2c0 100644
--- a/net/bridge/br_fdb.c
+++ b/net/bridge/br_fdb.c
@@ -242,11 +242,11 @@ int br_fdb_test_addr(struct net_device *dev, unsigned char *addr)
 	struct net_bridge_fdb_entry *fdb;
 	int ret;
 
-	if (!dev->br_port)
+	if (!br_port_exists(dev))
 		return 0;
 
 	rcu_read_lock();
-	fdb = __br_fdb_get(dev->br_port->br, addr);
+	fdb = __br_fdb_get(br_port_get_rcu(dev)->br, addr);
 	ret = fdb && fdb->dst->dev != dev &&
 		fdb->dst->state == BR_STATE_FORWARDING;
 	rcu_read_unlock();
diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c
index 0d142ed0bbe3..c03d2c3ff03e 100644
--- a/net/bridge/br_if.c
+++ b/net/bridge/br_if.c
@@ -147,8 +147,9 @@ static void del_nbp(struct net_bridge_port *p)
 
 	list_del_rcu(&p->list);
 
+	dev->priv_flags &= ~IFF_BRIDGE_PORT;
+
 	netdev_rx_handler_unregister(dev);
-	rcu_assign_pointer(dev->br_port, NULL);
 
 	br_multicast_del_port(p);
 
@@ -400,7 +401,7 @@ int br_add_if(struct net_bridge *br, struct net_device *dev)
 		return -ELOOP;
 
 	/* Device is already being bridged */
-	if (dev->br_port != NULL)
+	if (br_port_exists(dev))
 		return -EBUSY;
 
 	/* No bridging devices that dislike that (e.g. wireless) */
@@ -431,11 +432,11 @@ int br_add_if(struct net_bridge *br, struct net_device *dev)
 	if (br_netpoll_info(br) && ((err = br_netpoll_enable(p))))
 		goto err3;
 
-	rcu_assign_pointer(dev->br_port, p);
-
-	err = netdev_rx_handler_register(dev, br_handle_frame, NULL);
+	err = netdev_rx_handler_register(dev, br_handle_frame, p);
 	if (err)
-		goto err4;
+		goto err3;
+
+	dev->priv_flags |= IFF_BRIDGE_PORT;
 
 	dev_disable_lro(dev);
 
@@ -457,8 +458,6 @@ int br_add_if(struct net_bridge *br, struct net_device *dev)
 	kobject_uevent(&p->kobj, KOBJ_ADD);
 
 	return 0;
-err4:
-	rcu_assign_pointer(dev->br_port, NULL);
 err3:
 	sysfs_remove_link(br->ifobj, p->dev->name);
 err2:
@@ -477,9 +476,13 @@ put_back:
 /* called with RTNL */
 int br_del_if(struct net_bridge *br, struct net_device *dev)
 {
-	struct net_bridge_port *p = dev->br_port;
+	struct net_bridge_port *p;
+
+	if (!br_port_exists(dev))
+		return -EINVAL;
 
-	if (!p || p->br != br)
+	p = br_port_get(dev);
+	if (p->br != br)
 		return -EINVAL;
 
 	del_nbp(p);
diff --git a/net/bridge/br_input.c b/net/bridge/br_input.c
index 99647d8f95c8..f076c9d79d5e 100644
--- a/net/bridge/br_input.c
+++ b/net/bridge/br_input.c
@@ -41,7 +41,7 @@ static int br_pass_frame_up(struct sk_buff *skb)
 int br_handle_frame_finish(struct sk_buff *skb)
 {
 	const unsigned char *dest = eth_hdr(skb)->h_dest;
-	struct net_bridge_port *p = rcu_dereference(skb->dev->br_port);
+	struct net_bridge_port *p = br_port_get_rcu(skb->dev);
 	struct net_bridge *br;
 	struct net_bridge_fdb_entry *dst;
 	struct net_bridge_mdb_entry *mdst;
@@ -111,10 +111,9 @@ drop:
 /* note: already called with rcu_read_lock (preempt_disabled) */
 static int br_handle_local_finish(struct sk_buff *skb)
 {
-	struct net_bridge_port *p = rcu_dereference(skb->dev->br_port);
+	struct net_bridge_port *p = br_port_get_rcu(skb->dev);
 
-	if (p)
-		br_fdb_update(p->br, p, eth_hdr(skb)->h_source);
+	br_fdb_update(p->br, p, eth_hdr(skb)->h_source);
 	return 0;	 /* process further */
 }
 
@@ -151,7 +150,7 @@ struct sk_buff *br_handle_frame(struct sk_buff *skb)
 	if (!skb)
 		return NULL;
 
-	p = rcu_dereference(skb->dev->br_port);
+	p = br_port_get_rcu(skb->dev);
 
 	if (unlikely(is_link_local(dest))) {
 		/* Pause frames shouldn't be passed up by driver anyway */
diff --git a/net/bridge/br_netfilter.c b/net/bridge/br_netfilter.c
index 0685b2558ab5..f54404ddee53 100644
--- a/net/bridge/br_netfilter.c
+++ b/net/bridge/br_netfilter.c
@@ -127,16 +127,17 @@ void br_netfilter_rtable_init(struct net_bridge *br)
 
 static inline struct rtable *bridge_parent_rtable(const struct net_device *dev)
 {
-	struct net_bridge_port *port = rcu_dereference(dev->br_port);
-
-	return port ? &port->br->fake_rtable : NULL;
+	if (!br_port_exists(dev))
+		return NULL;
+	return &br_port_get_rcu(dev)->br->fake_rtable;
 }
 
 static inline struct net_device *bridge_parent(const struct net_device *dev)
 {
-	struct net_bridge_port *port = rcu_dereference(dev->br_port);
+	if (!br_port_exists(dev))
+		return NULL;
 
-	return port ? port->br->dev : NULL;
+	return br_port_get_rcu(dev)->br->dev;
 }
 
 static inline struct nf_bridge_info *nf_bridge_alloc(struct sk_buff *skb)
diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c
index fe0a79018ab2..4a6a378c84e3 100644
--- a/net/bridge/br_netlink.c
+++ b/net/bridge/br_netlink.c
@@ -120,10 +120,11 @@ static int br_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb)
 	idx = 0;
 	for_each_netdev(net, dev) {
 		/* not a bridge port */
-		if (dev->br_port == NULL || idx < cb->args[0])
+		if (!br_port_exists(dev) || idx < cb->args[0])
 			goto skip;
 
-		if (br_fill_ifinfo(skb, dev->br_port, NETLINK_CB(cb->skb).pid,
+		if (br_fill_ifinfo(skb, br_port_get(dev),
+				   NETLINK_CB(cb->skb).pid,
 				   cb->nlh->nlmsg_seq, RTM_NEWLINK,
 				   NLM_F_MULTI) < 0)
 			break;
@@ -168,9 +169,9 @@ static int br_rtm_setlink(struct sk_buff *skb,  struct nlmsghdr *nlh, void *arg)
 	if (!dev)
 		return -ENODEV;
 
-	p = dev->br_port;
-	if (!p)
+	if (!br_port_exists(dev))
 		return -EINVAL;
+	p = br_port_get(dev);
 
 	/* if kernel STP is running, don't allow changes */
 	if (p->br->stp_enabled == BR_KERNEL_STP)
diff --git a/net/bridge/br_notify.c b/net/bridge/br_notify.c
index 717e1fd6133c..404d4e14c6a7 100644
--- a/net/bridge/br_notify.c
+++ b/net/bridge/br_notify.c
@@ -32,14 +32,15 @@ struct notifier_block br_device_notifier = {
 static int br_device_event(struct notifier_block *unused, unsigned long event, void *ptr)
 {
 	struct net_device *dev = ptr;
-	struct net_bridge_port *p = dev->br_port;
+	struct net_bridge_port *p = br_port_get(dev);
 	struct net_bridge *br;
 	int err;
 
 	/* not a port of a bridge */
-	if (p == NULL)
+	if (!br_port_exists(dev))
 		return NOTIFY_DONE;
 
+	p = br_port_get(dev);
 	br = p->br;
 
 	switch (event) {
diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h
index 0f5394c4f2f1..f6bc979b1135 100644
--- a/net/bridge/br_private.h
+++ b/net/bridge/br_private.h
@@ -150,6 +150,11 @@ struct net_bridge_port
 #endif
 };
 
+#define br_port_get_rcu(dev) \
+	((struct net_bridge_port *) rcu_dereference(dev->rx_handler_data))
+#define br_port_get(dev) ((struct net_bridge_port *) dev->rx_handler_data)
+#define br_port_exists(dev) (dev->priv_flags & IFF_BRIDGE_PORT)
+
 struct br_cpu_netstats {
 	unsigned long	rx_packets;
 	unsigned long	rx_bytes;
diff --git a/net/bridge/br_stp_bpdu.c b/net/bridge/br_stp_bpdu.c
index 217bd225a42f..70aecb48fb69 100644
--- a/net/bridge/br_stp_bpdu.c
+++ b/net/bridge/br_stp_bpdu.c
@@ -137,12 +137,13 @@ void br_stp_rcv(const struct stp_proto *proto, struct sk_buff *skb,
 		struct net_device *dev)
 {
 	const unsigned char *dest = eth_hdr(skb)->h_dest;
-	struct net_bridge_port *p = rcu_dereference(dev->br_port);
+	struct net_bridge_port *p;
 	struct net_bridge *br;
 	const unsigned char *buf;
 
-	if (!p)
+	if (!br_port_exists(dev))
 		goto err;
+	p = br_port_get_rcu(dev);
 
 	if (!pskb_may_pull(skb, 4))
 		goto err;
diff --git a/net/bridge/netfilter/ebt_redirect.c b/net/bridge/netfilter/ebt_redirect.c
index 9e19166ba453..46624bb6d9be 100644
--- a/net/bridge/netfilter/ebt_redirect.c
+++ b/net/bridge/netfilter/ebt_redirect.c
@@ -24,8 +24,9 @@ ebt_redirect_tg(struct sk_buff *skb, const struct xt_action_param *par)
 		return EBT_DROP;
 
 	if (par->hooknum != NF_BR_BROUTING)
+		/* rcu_read_lock()ed by nf_hook_slow */
 		memcpy(eth_hdr(skb)->h_dest,
-		       par->in->br_port->br->dev->dev_addr, ETH_ALEN);
+		       br_port_get_rcu(par->in)->br->dev->dev_addr, ETH_ALEN);
 	else
 		memcpy(eth_hdr(skb)->h_dest, par->in->dev_addr, ETH_ALEN);
 	skb->pkt_type = PACKET_HOST;
diff --git a/net/bridge/netfilter/ebt_ulog.c b/net/bridge/netfilter/ebt_ulog.c
index ae3c7cef1484..26377e96fa1c 100644
--- a/net/bridge/netfilter/ebt_ulog.c
+++ b/net/bridge/netfilter/ebt_ulog.c
@@ -177,8 +177,9 @@ static void ebt_ulog_packet(unsigned int hooknr, const struct sk_buff *skb,
 	if (in) {
 		strcpy(pm->physindev, in->name);
 		/* If in isn't a bridge, then physindev==indev */
-		if (in->br_port)
-			strcpy(pm->indev, in->br_port->br->dev->name);
+		if (br_port_exists(in))
+			/* rcu_read_lock()ed by nf_hook_slow */
+			strcpy(pm->indev, br_port_get_rcu(in)->br->dev->name);
 		else
 			strcpy(pm->indev, in->name);
 	} else
@@ -187,7 +188,8 @@ static void ebt_ulog_packet(unsigned int hooknr, const struct sk_buff *skb,
 	if (out) {
 		/* If out exists, then out is a bridge port */
 		strcpy(pm->physoutdev, out->name);
-		strcpy(pm->outdev, out->br_port->br->dev->name);
+		/* rcu_read_lock()ed by nf_hook_slow */
+		strcpy(pm->outdev, br_port_get_rcu(out)->br->dev->name);
 	} else
 		pm->outdev[0] = pm->physoutdev[0] = '\0';
 
diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c
index 59ca00e40dec..bcc102e3be4d 100644
--- a/net/bridge/netfilter/ebtables.c
+++ b/net/bridge/netfilter/ebtables.c
@@ -140,11 +140,14 @@ ebt_basic_match(const struct ebt_entry *e, const struct ethhdr *h,
 		return 1;
 	if (FWINV2(ebt_dev_check(e->out, out), EBT_IOUT))
 		return 1;
-	if ((!in || !in->br_port) ? 0 : FWINV2(ebt_dev_check(
-	   e->logical_in, in->br_port->br->dev), EBT_ILOGICALIN))
+	/* rcu_read_lock()ed by nf_hook_slow */
+	if (in && br_port_exists(in) &&
+	    FWINV2(ebt_dev_check(e->logical_in, br_port_get_rcu(in)->br->dev),
+		   EBT_ILOGICALIN))
 		return 1;
-	if ((!out || !out->br_port) ? 0 : FWINV2(ebt_dev_check(
-	   e->logical_out, out->br_port->br->dev), EBT_ILOGICALOUT))
+	if (out && br_port_exists(out) &&
+	    FWINV2(ebt_dev_check(e->logical_out, br_port_get_rcu(out)->br->dev),
+		   EBT_ILOGICALOUT))
 		return 1;
 
 	if (e->bitmask & EBT_SOURCEMAC) {
diff --git a/net/core/dev.c b/net/core/dev.c
index abdb19e547a7..5902426ef585 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -2765,7 +2765,8 @@ int __skb_bond_should_drop(struct sk_buff *skb, struct net_device *master)
 	if (master->priv_flags & IFF_MASTER_ARPMON)
 		dev->last_rx = jiffies;
 
-	if ((master->priv_flags & IFF_MASTER_ALB) && master->br_port) {
+	if ((master->priv_flags & IFF_MASTER_ALB) &&
+	    (master->priv_flags & IFF_BRIDGE_PORT)) {
 		/* Do address unmangle. The local destination address
 		 * will be always the one master has. Provides the right
 		 * functionality in a bridge.
diff --git a/net/netfilter/nfnetlink_log.c b/net/netfilter/nfnetlink_log.c
index fc9a211e629e..e0504e90a0f0 100644
--- a/net/netfilter/nfnetlink_log.c
+++ b/net/netfilter/nfnetlink_log.c
@@ -403,8 +403,9 @@ __build_packet_message(struct nfulnl_instance *inst,
 			NLA_PUT_BE32(inst->skb, NFULA_IFINDEX_PHYSINDEV,
 				     htonl(indev->ifindex));
 			/* this is the bridge group "brX" */
+			/* rcu_read_lock()ed by nf_hook_slow or nf_log_packet */
 			NLA_PUT_BE32(inst->skb, NFULA_IFINDEX_INDEV,
-				     htonl(indev->br_port->br->dev->ifindex));
+				     htonl(br_port_get_rcu(indev)->br->dev->ifindex));
 		} else {
 			/* Case 2: indev is bridge group, we need to look for
 			 * physical device (when called from ipv4) */
@@ -430,8 +431,9 @@ __build_packet_message(struct nfulnl_instance *inst,
 			NLA_PUT_BE32(inst->skb, NFULA_IFINDEX_PHYSOUTDEV,
 				     htonl(outdev->ifindex));
 			/* this is the bridge group "brX" */
+			/* rcu_read_lock()ed by nf_hook_slow or nf_log_packet */
 			NLA_PUT_BE32(inst->skb, NFULA_IFINDEX_OUTDEV,
-				     htonl(outdev->br_port->br->dev->ifindex));
+				     htonl(br_port_get_rcu(outdev)->br->dev->ifindex));
 		} else {
 			/* Case 2: indev is a bridge group, we need to look
 			 * for physical device (when called from ipv4) */
diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c
index 12e1ab37fcd8..cc3ae861e8f3 100644
--- a/net/netfilter/nfnetlink_queue.c
+++ b/net/netfilter/nfnetlink_queue.c
@@ -296,8 +296,9 @@ nfqnl_build_packet_message(struct nfqnl_instance *queue,
 			NLA_PUT_BE32(skb, NFQA_IFINDEX_PHYSINDEV,
 				     htonl(indev->ifindex));
 			/* this is the bridge group "brX" */
+			/* rcu_read_lock()ed by __nf_queue */
 			NLA_PUT_BE32(skb, NFQA_IFINDEX_INDEV,
-				     htonl(indev->br_port->br->dev->ifindex));
+				     htonl(br_port_get_rcu(indev)->br->dev->ifindex));
 		} else {
 			/* Case 2: indev is bridge group, we need to look for
 			 * physical device (when called from ipv4) */
@@ -321,8 +322,9 @@ nfqnl_build_packet_message(struct nfqnl_instance *queue,
 			NLA_PUT_BE32(skb, NFQA_IFINDEX_PHYSOUTDEV,
 				     htonl(outdev->ifindex));
 			/* this is the bridge group "brX" */
+			/* rcu_read_lock()ed by __nf_queue */
 			NLA_PUT_BE32(skb, NFQA_IFINDEX_OUTDEV,
-				     htonl(outdev->br_port->br->dev->ifindex));
+				     htonl(br_port_get_rcu(outdev)->br->dev->ifindex));
 		} else {
 			/* Case 2: outdev is bridge group, we need to look for
 			 * physical output device (when called from ipv4) */
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index 90ab3c8519be..3a7b8a2f2d5a 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -1107,7 +1107,7 @@ static int nl80211_valid_4addr(struct cfg80211_registered_device *rdev,
 			       enum nl80211_iftype iftype)
 {
 	if (!use_4addr) {
-		if (netdev && netdev->br_port)
+		if (netdev && (netdev->priv_flags & IFF_BRIDGE_PORT))
 			return -EBUSY;
 		return 0;
 	}
diff --git a/net/wireless/util.c b/net/wireless/util.c
index 3416373a9c0c..0c8a1e8b7690 100644
--- a/net/wireless/util.c
+++ b/net/wireless/util.c
@@ -770,8 +770,8 @@ int cfg80211_change_iface(struct cfg80211_registered_device *rdev,
 		return -EOPNOTSUPP;
 
 	/* if it's part of a bridge, reject changing type to station/ibss */
-	if (dev->br_port && (ntype == NL80211_IFTYPE_ADHOC ||
-			     ntype == NL80211_IFTYPE_STATION))
+	if ((dev->priv_flags & IFF_BRIDGE_PORT) &&
+	    (ntype == NL80211_IFTYPE_ADHOC || ntype == NL80211_IFTYPE_STATION))
 		return -EBUSY;
 
 	if (ntype != otype) {
-- 
cgit v1.2.2


From a3433f35a55f7604742cae620c6dc6edfc70db6a Mon Sep 17 00:00:00 2001
From: Changli Gao <xiaosuo@gmail.com>
Date: Sat, 12 Jun 2010 14:01:43 +0000
Subject: tcp: unify tcp flag macros

unify tcp flag macros: TCPHDR_FIN, TCPHDR_SYN, TCPHDR_RST, TCPHDR_PSH,
TCPHDR_ACK, TCPHDR_URG, TCPHDR_ECE and TCPHDR_CWR. TCBCB_FLAG_* are replaced
with the corresponding TCPHDR_*.

Signed-off-by: Changli Gao <xiaosuo@gmail.com>
----
 include/net/tcp.h                      |   24 ++++++-------
 net/ipv4/tcp.c                         |    8 ++--
 net/ipv4/tcp_input.c                   |    2 -
 net/ipv4/tcp_output.c                  |   59 ++++++++++++++++-----------------
 net/netfilter/nf_conntrack_proto_tcp.c |   32 ++++++-----------
 net/netfilter/xt_TCPMSS.c              |    4 --
 6 files changed, 58 insertions(+), 71 deletions(-)
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp.c                         |  8 ++---
 net/ipv4/tcp_input.c                   |  2 +-
 net/ipv4/tcp_output.c                  | 59 +++++++++++++++++-----------------
 net/netfilter/nf_conntrack_proto_tcp.c | 32 +++++++-----------
 net/netfilter/xt_TCPMSS.c              |  4 +--
 5 files changed, 47 insertions(+), 58 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 49d0d2b8900c..779d40c3b96e 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -511,7 +511,7 @@ int tcp_ioctl(struct sock *sk, int cmd, unsigned long arg)
 
 static inline void tcp_mark_push(struct tcp_sock *tp, struct sk_buff *skb)
 {
-	TCP_SKB_CB(skb)->flags |= TCPCB_FLAG_PSH;
+	TCP_SKB_CB(skb)->flags |= TCPHDR_PSH;
 	tp->pushed_seq = tp->write_seq;
 }
 
@@ -527,7 +527,7 @@ static inline void skb_entail(struct sock *sk, struct sk_buff *skb)
 
 	skb->csum    = 0;
 	tcb->seq     = tcb->end_seq = tp->write_seq;
-	tcb->flags   = TCPCB_FLAG_ACK;
+	tcb->flags   = TCPHDR_ACK;
 	tcb->sacked  = 0;
 	skb_header_release(skb);
 	tcp_add_write_queue_tail(sk, skb);
@@ -815,7 +815,7 @@ new_segment:
 		skb_shinfo(skb)->gso_segs = 0;
 
 		if (!copied)
-			TCP_SKB_CB(skb)->flags &= ~TCPCB_FLAG_PSH;
+			TCP_SKB_CB(skb)->flags &= ~TCPHDR_PSH;
 
 		copied += copy;
 		poffset += copy;
@@ -1061,7 +1061,7 @@ new_segment:
 			}
 
 			if (!copied)
-				TCP_SKB_CB(skb)->flags &= ~TCPCB_FLAG_PSH;
+				TCP_SKB_CB(skb)->flags &= ~TCPHDR_PSH;
 
 			tp->write_seq += copy;
 			TCP_SKB_CB(skb)->end_seq += copy;
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 548d575e6cc6..04334661fa28 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -3286,7 +3286,7 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets,
 		 * connection startup slow start one packet too
 		 * quickly.  This is severely frowned upon behavior.
 		 */
-		if (!(scb->flags & TCPCB_FLAG_SYN)) {
+		if (!(scb->flags & TCPHDR_SYN)) {
 			flag |= FLAG_DATA_ACKED;
 		} else {
 			flag |= FLAG_SYN_ACKED;
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index b4ed957f201a..51d316dbb058 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -294,9 +294,9 @@ static u16 tcp_select_window(struct sock *sk)
 /* Packet ECN state for a SYN-ACK */
 static inline void TCP_ECN_send_synack(struct tcp_sock *tp, struct sk_buff *skb)
 {
-	TCP_SKB_CB(skb)->flags &= ~TCPCB_FLAG_CWR;
+	TCP_SKB_CB(skb)->flags &= ~TCPHDR_CWR;
 	if (!(tp->ecn_flags & TCP_ECN_OK))
-		TCP_SKB_CB(skb)->flags &= ~TCPCB_FLAG_ECE;
+		TCP_SKB_CB(skb)->flags &= ~TCPHDR_ECE;
 }
 
 /* Packet ECN state for a SYN.  */
@@ -306,7 +306,7 @@ static inline void TCP_ECN_send_syn(struct sock *sk, struct sk_buff *skb)
 
 	tp->ecn_flags = 0;
 	if (sysctl_tcp_ecn == 1) {
-		TCP_SKB_CB(skb)->flags |= TCPCB_FLAG_ECE | TCPCB_FLAG_CWR;
+		TCP_SKB_CB(skb)->flags |= TCPHDR_ECE | TCPHDR_CWR;
 		tp->ecn_flags = TCP_ECN_OK;
 	}
 }
@@ -361,7 +361,7 @@ static void tcp_init_nondata_skb(struct sk_buff *skb, u32 seq, u8 flags)
 	skb_shinfo(skb)->gso_type = 0;
 
 	TCP_SKB_CB(skb)->seq = seq;
-	if (flags & (TCPCB_FLAG_SYN | TCPCB_FLAG_FIN))
+	if (flags & (TCPHDR_SYN | TCPHDR_FIN))
 		seq++;
 	TCP_SKB_CB(skb)->end_seq = seq;
 }
@@ -820,7 +820,7 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,
 	tcb = TCP_SKB_CB(skb);
 	memset(&opts, 0, sizeof(opts));
 
-	if (unlikely(tcb->flags & TCPCB_FLAG_SYN))
+	if (unlikely(tcb->flags & TCPHDR_SYN))
 		tcp_options_size = tcp_syn_options(sk, skb, &opts, &md5);
 	else
 		tcp_options_size = tcp_established_options(sk, skb, &opts,
@@ -843,7 +843,7 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,
 	*(((__be16 *)th) + 6)	= htons(((tcp_header_size >> 2) << 12) |
 					tcb->flags);
 
-	if (unlikely(tcb->flags & TCPCB_FLAG_SYN)) {
+	if (unlikely(tcb->flags & TCPHDR_SYN)) {
 		/* RFC1323: The window in SYN & SYN/ACK segments
 		 * is never scaled.
 		 */
@@ -866,7 +866,7 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,
 	}
 
 	tcp_options_write((__be32 *)(th + 1), tp, &opts);
-	if (likely((tcb->flags & TCPCB_FLAG_SYN) == 0))
+	if (likely((tcb->flags & TCPHDR_SYN) == 0))
 		TCP_ECN_send(sk, skb, tcp_header_size);
 
 #ifdef CONFIG_TCP_MD5SIG
@@ -880,7 +880,7 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,
 
 	icsk->icsk_af_ops->send_check(sk, skb);
 
-	if (likely(tcb->flags & TCPCB_FLAG_ACK))
+	if (likely(tcb->flags & TCPHDR_ACK))
 		tcp_event_ack_sent(sk, tcp_skb_pcount(skb));
 
 	if (skb->len != tcp_header_size)
@@ -1023,7 +1023,7 @@ int tcp_fragment(struct sock *sk, struct sk_buff *skb, u32 len,
 
 	/* PSH and FIN should only be set in the second packet. */
 	flags = TCP_SKB_CB(skb)->flags;
-	TCP_SKB_CB(skb)->flags = flags & ~(TCPCB_FLAG_FIN | TCPCB_FLAG_PSH);
+	TCP_SKB_CB(skb)->flags = flags & ~(TCPHDR_FIN | TCPHDR_PSH);
 	TCP_SKB_CB(buff)->flags = flags;
 	TCP_SKB_CB(buff)->sacked = TCP_SKB_CB(skb)->sacked;
 
@@ -1328,8 +1328,7 @@ static inline unsigned int tcp_cwnd_test(struct tcp_sock *tp,
 	u32 in_flight, cwnd;
 
 	/* Don't be strict about the congestion window for the final FIN.  */
-	if ((TCP_SKB_CB(skb)->flags & TCPCB_FLAG_FIN) &&
-	    tcp_skb_pcount(skb) == 1)
+	if ((TCP_SKB_CB(skb)->flags & TCPHDR_FIN) && tcp_skb_pcount(skb) == 1)
 		return 1;
 
 	in_flight = tcp_packets_in_flight(tp);
@@ -1398,7 +1397,7 @@ static inline int tcp_nagle_test(struct tcp_sock *tp, struct sk_buff *skb,
 	 * Nagle can be ignored during F-RTO too (see RFC4138).
 	 */
 	if (tcp_urg_mode(tp) || (tp->frto_counter == 2) ||
-	    (TCP_SKB_CB(skb)->flags & TCPCB_FLAG_FIN))
+	    (TCP_SKB_CB(skb)->flags & TCPHDR_FIN))
 		return 1;
 
 	if (!tcp_nagle_check(tp, skb, cur_mss, nonagle))
@@ -1487,7 +1486,7 @@ static int tso_fragment(struct sock *sk, struct sk_buff *skb, unsigned int len,
 
 	/* PSH and FIN should only be set in the second packet. */
 	flags = TCP_SKB_CB(skb)->flags;
-	TCP_SKB_CB(skb)->flags = flags & ~(TCPCB_FLAG_FIN | TCPCB_FLAG_PSH);
+	TCP_SKB_CB(skb)->flags = flags & ~(TCPHDR_FIN | TCPHDR_PSH);
 	TCP_SKB_CB(buff)->flags = flags;
 
 	/* This packet was never sent out yet, so no SACK bits. */
@@ -1518,7 +1517,7 @@ static int tcp_tso_should_defer(struct sock *sk, struct sk_buff *skb)
 	const struct inet_connection_sock *icsk = inet_csk(sk);
 	u32 send_win, cong_win, limit, in_flight;
 
-	if (TCP_SKB_CB(skb)->flags & TCPCB_FLAG_FIN)
+	if (TCP_SKB_CB(skb)->flags & TCPHDR_FIN)
 		goto send_now;
 
 	if (icsk->icsk_ca_state != TCP_CA_Open)
@@ -1644,7 +1643,7 @@ static int tcp_mtu_probe(struct sock *sk)
 
 	TCP_SKB_CB(nskb)->seq = TCP_SKB_CB(skb)->seq;
 	TCP_SKB_CB(nskb)->end_seq = TCP_SKB_CB(skb)->seq + probe_size;
-	TCP_SKB_CB(nskb)->flags = TCPCB_FLAG_ACK;
+	TCP_SKB_CB(nskb)->flags = TCPHDR_ACK;
 	TCP_SKB_CB(nskb)->sacked = 0;
 	nskb->csum = 0;
 	nskb->ip_summed = skb->ip_summed;
@@ -1669,7 +1668,7 @@ static int tcp_mtu_probe(struct sock *sk)
 			sk_wmem_free_skb(sk, skb);
 		} else {
 			TCP_SKB_CB(nskb)->flags |= TCP_SKB_CB(skb)->flags &
-						   ~(TCPCB_FLAG_FIN|TCPCB_FLAG_PSH);
+						   ~(TCPHDR_FIN|TCPHDR_PSH);
 			if (!skb_shinfo(skb)->nr_frags) {
 				skb_pull(skb, copy);
 				if (skb->ip_summed != CHECKSUM_PARTIAL)
@@ -2020,7 +2019,7 @@ static void tcp_retrans_try_collapse(struct sock *sk, struct sk_buff *to,
 
 	if (!sysctl_tcp_retrans_collapse)
 		return;
-	if (TCP_SKB_CB(skb)->flags & TCPCB_FLAG_SYN)
+	if (TCP_SKB_CB(skb)->flags & TCPHDR_SYN)
 		return;
 
 	tcp_for_write_queue_from_safe(skb, tmp, sk) {
@@ -2112,7 +2111,7 @@ int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb)
 	 * since it is cheap to do so and saves bytes on the network.
 	 */
 	if (skb->len > 0 &&
-	    (TCP_SKB_CB(skb)->flags & TCPCB_FLAG_FIN) &&
+	    (TCP_SKB_CB(skb)->flags & TCPHDR_FIN) &&
 	    tp->snd_una == (TCP_SKB_CB(skb)->end_seq - 1)) {
 		if (!pskb_trim(skb, 0)) {
 			/* Reuse, even though it does some unnecessary work */
@@ -2301,7 +2300,7 @@ void tcp_send_fin(struct sock *sk)
 	mss_now = tcp_current_mss(sk);
 
 	if (tcp_send_head(sk) != NULL) {
-		TCP_SKB_CB(skb)->flags |= TCPCB_FLAG_FIN;
+		TCP_SKB_CB(skb)->flags |= TCPHDR_FIN;
 		TCP_SKB_CB(skb)->end_seq++;
 		tp->write_seq++;
 	} else {
@@ -2318,7 +2317,7 @@ void tcp_send_fin(struct sock *sk)
 		skb_reserve(skb, MAX_TCP_HEADER);
 		/* FIN eats a sequence byte, write_seq advanced by tcp_queue_skb(). */
 		tcp_init_nondata_skb(skb, tp->write_seq,
-				     TCPCB_FLAG_ACK | TCPCB_FLAG_FIN);
+				     TCPHDR_ACK | TCPHDR_FIN);
 		tcp_queue_skb(sk, skb);
 	}
 	__tcp_push_pending_frames(sk, mss_now, TCP_NAGLE_OFF);
@@ -2343,7 +2342,7 @@ void tcp_send_active_reset(struct sock *sk, gfp_t priority)
 	/* Reserve space for headers and prepare control bits. */
 	skb_reserve(skb, MAX_TCP_HEADER);
 	tcp_init_nondata_skb(skb, tcp_acceptable_seq(sk),
-			     TCPCB_FLAG_ACK | TCPCB_FLAG_RST);
+			     TCPHDR_ACK | TCPHDR_RST);
 	/* Send it off. */
 	TCP_SKB_CB(skb)->when = tcp_time_stamp;
 	if (tcp_transmit_skb(sk, skb, 0, priority))
@@ -2363,11 +2362,11 @@ int tcp_send_synack(struct sock *sk)
 	struct sk_buff *skb;
 
 	skb = tcp_write_queue_head(sk);
-	if (skb == NULL || !(TCP_SKB_CB(skb)->flags & TCPCB_FLAG_SYN)) {
+	if (skb == NULL || !(TCP_SKB_CB(skb)->flags & TCPHDR_SYN)) {
 		printk(KERN_DEBUG "tcp_send_synack: wrong queue state\n");
 		return -EFAULT;
 	}
-	if (!(TCP_SKB_CB(skb)->flags & TCPCB_FLAG_ACK)) {
+	if (!(TCP_SKB_CB(skb)->flags & TCPHDR_ACK)) {
 		if (skb_cloned(skb)) {
 			struct sk_buff *nskb = skb_copy(skb, GFP_ATOMIC);
 			if (nskb == NULL)
@@ -2381,7 +2380,7 @@ int tcp_send_synack(struct sock *sk)
 			skb = nskb;
 		}
 
-		TCP_SKB_CB(skb)->flags |= TCPCB_FLAG_ACK;
+		TCP_SKB_CB(skb)->flags |= TCPHDR_ACK;
 		TCP_ECN_send_synack(tcp_sk(sk), skb);
 	}
 	TCP_SKB_CB(skb)->when = tcp_time_stamp;
@@ -2460,7 +2459,7 @@ struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst,
 	 * not even correctly set)
 	 */
 	tcp_init_nondata_skb(skb, tcp_rsk(req)->snt_isn,
-			     TCPCB_FLAG_SYN | TCPCB_FLAG_ACK);
+			     TCPHDR_SYN | TCPHDR_ACK);
 
 	if (OPTION_COOKIE_EXTENSION & opts.options) {
 		if (s_data_desired) {
@@ -2592,7 +2591,7 @@ int tcp_connect(struct sock *sk)
 	skb_reserve(buff, MAX_TCP_HEADER);
 
 	tp->snd_nxt = tp->write_seq;
-	tcp_init_nondata_skb(buff, tp->write_seq++, TCPCB_FLAG_SYN);
+	tcp_init_nondata_skb(buff, tp->write_seq++, TCPHDR_SYN);
 	TCP_ECN_send_syn(sk, buff);
 
 	/* Send it off. */
@@ -2698,7 +2697,7 @@ void tcp_send_ack(struct sock *sk)
 
 	/* Reserve space for headers and prepare control bits. */
 	skb_reserve(buff, MAX_TCP_HEADER);
-	tcp_init_nondata_skb(buff, tcp_acceptable_seq(sk), TCPCB_FLAG_ACK);
+	tcp_init_nondata_skb(buff, tcp_acceptable_seq(sk), TCPHDR_ACK);
 
 	/* Send it off, this clears delayed acks for us. */
 	TCP_SKB_CB(buff)->when = tcp_time_stamp;
@@ -2732,7 +2731,7 @@ static int tcp_xmit_probe_skb(struct sock *sk, int urgent)
 	 * end to send an ack.  Don't queue or clone SKB, just
 	 * send it.
 	 */
-	tcp_init_nondata_skb(skb, tp->snd_una - !urgent, TCPCB_FLAG_ACK);
+	tcp_init_nondata_skb(skb, tp->snd_una - !urgent, TCPHDR_ACK);
 	TCP_SKB_CB(skb)->when = tcp_time_stamp;
 	return tcp_transmit_skb(sk, skb, 0, GFP_ATOMIC);
 }
@@ -2762,13 +2761,13 @@ int tcp_write_wakeup(struct sock *sk)
 		if (seg_size < TCP_SKB_CB(skb)->end_seq - TCP_SKB_CB(skb)->seq ||
 		    skb->len > mss) {
 			seg_size = min(seg_size, mss);
-			TCP_SKB_CB(skb)->flags |= TCPCB_FLAG_PSH;
+			TCP_SKB_CB(skb)->flags |= TCPHDR_PSH;
 			if (tcp_fragment(sk, skb, seg_size, mss))
 				return -1;
 		} else if (!tcp_skb_pcount(skb))
 			tcp_set_skb_tso_segs(sk, skb, mss);
 
-		TCP_SKB_CB(skb)->flags |= TCPCB_FLAG_PSH;
+		TCP_SKB_CB(skb)->flags |= TCPHDR_PSH;
 		TCP_SKB_CB(skb)->when = tcp_time_stamp;
 		err = tcp_transmit_skb(sk, skb, 1, GFP_ATOMIC);
 		if (!err)
diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c
index 9dd8cd4fb6e6..802dbffae8b4 100644
--- a/net/netfilter/nf_conntrack_proto_tcp.c
+++ b/net/netfilter/nf_conntrack_proto_tcp.c
@@ -736,27 +736,19 @@ static bool tcp_in_window(const struct nf_conn *ct,
 	return res;
 }
 
-#define	TH_FIN	0x01
-#define	TH_SYN	0x02
-#define	TH_RST	0x04
-#define	TH_PUSH	0x08
-#define	TH_ACK	0x10
-#define	TH_URG	0x20
-#define	TH_ECE	0x40
-#define	TH_CWR	0x80
-
 /* table of valid flag combinations - PUSH, ECE and CWR are always valid */
-static const u8 tcp_valid_flags[(TH_FIN|TH_SYN|TH_RST|TH_ACK|TH_URG) + 1] =
+static const u8 tcp_valid_flags[(TCPHDR_FIN|TCPHDR_SYN|TCPHDR_RST|TCPHDR_ACK|
+				 TCPHDR_URG) + 1] =
 {
-	[TH_SYN]			= 1,
-	[TH_SYN|TH_URG]			= 1,
-	[TH_SYN|TH_ACK]			= 1,
-	[TH_RST]			= 1,
-	[TH_RST|TH_ACK]			= 1,
-	[TH_FIN|TH_ACK]			= 1,
-	[TH_FIN|TH_ACK|TH_URG]		= 1,
-	[TH_ACK]			= 1,
-	[TH_ACK|TH_URG]			= 1,
+	[TCPHDR_SYN]				= 1,
+	[TCPHDR_SYN|TCPHDR_URG]			= 1,
+	[TCPHDR_SYN|TCPHDR_ACK]			= 1,
+	[TCPHDR_RST]				= 1,
+	[TCPHDR_RST|TCPHDR_ACK]			= 1,
+	[TCPHDR_FIN|TCPHDR_ACK]			= 1,
+	[TCPHDR_FIN|TCPHDR_ACK|TCPHDR_URG]	= 1,
+	[TCPHDR_ACK]				= 1,
+	[TCPHDR_ACK|TCPHDR_URG]			= 1,
 };
 
 /* Protect conntrack agaist broken packets. Code taken from ipt_unclean.c.  */
@@ -803,7 +795,7 @@ static int tcp_error(struct net *net, struct nf_conn *tmpl,
 	}
 
 	/* Check TCP flags. */
-	tcpflags = (((u_int8_t *)th)[13] & ~(TH_ECE|TH_CWR|TH_PUSH));
+	tcpflags = (tcp_flag_byte(th) & ~(TCPHDR_ECE|TCPHDR_CWR|TCPHDR_PSH));
 	if (!tcp_valid_flags[tcpflags]) {
 		if (LOG_INVALID(net, IPPROTO_TCP))
 			nf_log_packet(pf, 0, skb, NULL, NULL, NULL,
diff --git a/net/netfilter/xt_TCPMSS.c b/net/netfilter/xt_TCPMSS.c
index 1841388c770a..eb81c380da1b 100644
--- a/net/netfilter/xt_TCPMSS.c
+++ b/net/netfilter/xt_TCPMSS.c
@@ -220,15 +220,13 @@ tcpmss_tg6(struct sk_buff *skb, const struct xt_action_param *par)
 }
 #endif
 
-#define TH_SYN 0x02
-
 /* Must specify -p tcp --syn */
 static inline bool find_syn_match(const struct xt_entry_match *m)
 {
 	const struct xt_tcp *tcpinfo = (const struct xt_tcp *)m->data;
 
 	if (strcmp(m->u.kernel.match->name, "tcp") == 0 &&
-	    tcpinfo->flg_cmp & TH_SYN &&
+	    tcpinfo->flg_cmp & TCPHDR_SYN &&
 	    !(tcpinfo->invflags & XT_TCP_INV_FLAGS))
 		return true;
 
-- 
cgit v1.2.2


From ff61638105db6f5832ef8700436ba6aa6d3a2fda Mon Sep 17 00:00:00 2001
From: Juuso Oikarinen <juuso.oikarinen@nokia.com>
Date: Wed, 9 Jun 2010 09:51:52 +0300
Subject: mac80211: Fix ps-qos network latency handling

The ps-qos latency handling is broken. It uses predetermined latency values
to select specific dynamic PS timeouts. With common AP configurations, these
values overlap with beacon interval and are therefore essentially useless
(for network latencies less than the beacon interval, PSM is disabled.)

This patch remedies the problem by replacing the predetermined network latency
values with one high value (1900ms) which is used to go trigger full psm. For
backwards compatibility, the value 2000ms is still mapped to a dynamic ps
timeout of 100ms.

Currently also the mac80211 internal value for storing user space configured
dynamic PSM values is incorrectly in the driver visible ieee80211_conf struct.
Move it to the ieee80211_local struct.

Signed-off-by: Juuso Oikarinen <juuso.oikarinen@nokia.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/cfg.c         |  5 ++---
 net/mac80211/ieee80211_i.h |  6 ++++++
 net/mac80211/main.c        |  2 +-
 net/mac80211/mlme.c        | 16 ++++++----------
 4 files changed, 15 insertions(+), 14 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c
index 59f597d0c6a0..003b6addf5fa 100644
--- a/net/mac80211/cfg.c
+++ b/net/mac80211/cfg.c
@@ -1446,7 +1446,6 @@ static int ieee80211_set_power_mgmt(struct wiphy *wiphy, struct net_device *dev,
 {
 	struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
 	struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr);
-	struct ieee80211_conf *conf = &local->hw.conf;
 
 	if (sdata->vif.type != NL80211_IFTYPE_STATION)
 		return -EOPNOTSUPP;
@@ -1455,11 +1454,11 @@ static int ieee80211_set_power_mgmt(struct wiphy *wiphy, struct net_device *dev,
 		return -EOPNOTSUPP;
 
 	if (enabled == sdata->u.mgd.powersave &&
-	    timeout == conf->dynamic_ps_forced_timeout)
+	    timeout == local->dynamic_ps_forced_timeout)
 		return 0;
 
 	sdata->u.mgd.powersave = enabled;
-	conf->dynamic_ps_forced_timeout = timeout;
+	local->dynamic_ps_forced_timeout = timeout;
 
 	/* no change, but if automatic follow powersave */
 	mutex_lock(&sdata->u.mgd.mtx);
diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index 9b3c3f971d28..fb5430188e87 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -850,6 +850,12 @@ struct ieee80211_local {
 	struct notifier_block network_latency_notifier;
 	struct notifier_block ifa_notifier;
 
+	/*
+	 * The dynamic ps timeout configured from user space via WEXT -
+	 * this will override whatever chosen by mac80211 internally.
+	 */
+	int dynamic_ps_forced_timeout;
+
 	int user_power_level; /* in dBm */
 	int power_constr_level; /* in dBm */
 
diff --git a/net/mac80211/main.c b/net/mac80211/main.c
index a1bf46c64b93..edf7aff93268 100644
--- a/net/mac80211/main.c
+++ b/net/mac80211/main.c
@@ -630,7 +630,7 @@ int ieee80211_register_hw(struct ieee80211_hw *hw)
 
 	local->hw.conf.listen_interval = local->hw.max_listen_interval;
 
-	local->hw.conf.dynamic_ps_forced_timeout = -1;
+	local->dynamic_ps_forced_timeout = -1;
 
 	result = sta_info_start(local);
 	if (result < 0)
diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index 74479c2d12d4..1c0d8fce08dd 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -561,23 +561,19 @@ void ieee80211_recalc_ps(struct ieee80211_local *local, s32 latency)
 		beaconint_us = ieee80211_tu_to_usec(
 					found->vif.bss_conf.beacon_int);
 
-		timeout = local->hw.conf.dynamic_ps_forced_timeout;
+		timeout = local->dynamic_ps_forced_timeout;
 		if (timeout < 0) {
 			/*
+			 * Go to full PSM if the user configures a very low
+			 * latency requirement.
 			 * The 2 second value is there for compatibility until
 			 * the PM_QOS_NETWORK_LATENCY is configured with real
 			 * values.
 			 */
-			if (latency == 2000000000)
-				timeout = 100;
-			else if (latency <= 50000)
-				timeout = 300;
-			else if (latency <= 100000)
-				timeout = 100;
-			else if (latency <= 500000)
-				timeout = 50;
-			else
+			if (latency > 1900000000 && latency != 2000000000)
 				timeout = 0;
+			else
+				timeout = 100;
 		}
 		local->hw.conf.dynamic_ps_timeout = timeout;
 
-- 
cgit v1.2.2


From 05e48e8e437148298f4673e1efe81f9ead5f41d7 Mon Sep 17 00:00:00 2001
From: Jouni Malinen <jouni.malinen@atheros.com>
Date: Mon, 14 Jun 2010 11:55:56 -0700
Subject: mac80211: Protect Deauthentication frame when using MFP

When management frame protection (IEEE 802.11w) is used,
Deauthentication frame needs to be protected when the pairwise key is
configured. mac80211 was removing the station entry (and its keys)
before actually sending out the Deauthentication frame. Fix this by
reordering the code to send the frame before the station entry gets
removed. This matches an earlier change that handled the Disassociation
frame processing, but missed Deauthentication frames.

Signed-off-by: Jouni Malinen <jouni.malinen@atheros.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/mlme.c | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index 1c0d8fce08dd..637b22411d41 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -2158,14 +2158,16 @@ int ieee80211_mgd_deauth(struct ieee80211_sub_if_data *sdata,
 	struct ieee80211_local *local = sdata->local;
 	struct ieee80211_if_managed *ifmgd = &sdata->u.mgd;
 	struct ieee80211_work *wk;
-	const u8 *bssid = req->bss->bssid;
+	u8 bssid[ETH_ALEN];
+	bool assoc_bss = false;
 
 	mutex_lock(&ifmgd->mtx);
 
+	memcpy(bssid, req->bss->bssid, ETH_ALEN);
 	if (ifmgd->associated == req->bss) {
-		bssid = req->bss->bssid;
-		ieee80211_set_disassoc(sdata, true);
+		ieee80211_set_disassoc(sdata, false);
 		mutex_unlock(&ifmgd->mtx);
+		assoc_bss = true;
 	} else {
 		bool not_auth_yet = false;
 
@@ -2211,6 +2213,8 @@ int ieee80211_mgd_deauth(struct ieee80211_sub_if_data *sdata,
 	ieee80211_send_deauth_disassoc(sdata, bssid, IEEE80211_STYPE_DEAUTH,
 				       req->reason_code, cookie,
 				       !req->local_state_change);
+	if (assoc_bss)
+		sta_info_destroy_addr(sdata, bssid);
 
 	ieee80211_recalc_idle(sdata->local);
 
-- 
cgit v1.2.2


From 9190252c952a33efa1ceff4ef35188f8a27b81cb Mon Sep 17 00:00:00 2001
From: Jouni Malinen <jouni.malinen@atheros.com>
Date: Fri, 11 Jun 2010 10:27:33 -0700
Subject: mac80211: Use a separate CCMP PN receive counter for management
 frames

When management frame protection (IEEE 802.11w) is used, we must use a
separate counter for tracking received CCMP packet number for the
management frames. The previously used NUM_RX_DATA_QUEUESth queue was
shared with data frames when QoS was not used and that can cause
problems in detecting replays incorrectly for robust management frames.
Add a new counter just for robust management frames to avoid this issue.

Signed-off-by: Jouni Malinen <jouni.malinen@atheros.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/debugfs_key.c | 2 +-
 net/mac80211/key.c         | 2 +-
 net/mac80211/key.h         | 8 +++++++-
 net/mac80211/rx.c          | 9 +++++++--
 net/mac80211/wpa.c         | 8 ++++++--
 5 files changed, 22 insertions(+), 7 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/debugfs_key.c b/net/mac80211/debugfs_key.c
index 97c9e46e859e..fa5e76e658ef 100644
--- a/net/mac80211/debugfs_key.c
+++ b/net/mac80211/debugfs_key.c
@@ -143,7 +143,7 @@ static ssize_t key_rx_spec_read(struct file *file, char __user *userbuf,
 		len = p - buf;
 		break;
 	case ALG_CCMP:
-		for (i = 0; i < NUM_RX_DATA_QUEUES; i++) {
+		for (i = 0; i < NUM_RX_DATA_QUEUES + 1; i++) {
 			rpn = key->u.ccmp.rx_pn[i];
 			p += scnprintf(p, sizeof(buf)+buf-p,
 				       "%02x%02x%02x%02x%02x%02x\n",
diff --git a/net/mac80211/key.c b/net/mac80211/key.c
index d0d9001a4a6a..50d1cff23d8e 100644
--- a/net/mac80211/key.c
+++ b/net/mac80211/key.c
@@ -273,7 +273,7 @@ struct ieee80211_key *ieee80211_key_alloc(enum ieee80211_key_alg alg,
 		key->conf.iv_len = CCMP_HDR_LEN;
 		key->conf.icv_len = CCMP_MIC_LEN;
 		if (seq) {
-			for (i = 0; i < NUM_RX_DATA_QUEUES; i++)
+			for (i = 0; i < NUM_RX_DATA_QUEUES + 1; i++)
 				for (j = 0; j < CCMP_PN_LEN; j++)
 					key->u.ccmp.rx_pn[i][j] =
 						seq[CCMP_PN_LEN - j - 1];
diff --git a/net/mac80211/key.h b/net/mac80211/key.h
index 9996e3be6e63..a3849fa3fce8 100644
--- a/net/mac80211/key.h
+++ b/net/mac80211/key.h
@@ -77,7 +77,13 @@ struct ieee80211_key {
 		} tkip;
 		struct {
 			u8 tx_pn[6];
-			u8 rx_pn[NUM_RX_DATA_QUEUES][6];
+			/*
+			 * Last received packet number. The first
+			 * NUM_RX_DATA_QUEUES counters are used with Data
+			 * frames and the last counter is used with Robust
+			 * Management frames.
+			 */
+			u8 rx_pn[NUM_RX_DATA_QUEUES + 1][6];
 			struct crypto_cipher *tfm;
 			u32 replays; /* dot11RSNAStatsCCMPReplays */
 			/* scratch buffers for virt_to_page() (crypto API) */
diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c
index 1594ebe80a4f..1f76352caa9e 100644
--- a/net/mac80211/rx.c
+++ b/net/mac80211/rx.c
@@ -1267,11 +1267,13 @@ ieee80211_rx_h_defragment(struct ieee80211_rx_data *rx)
 						 rx->queue, &(rx->skb));
 		if (rx->key && rx->key->conf.alg == ALG_CCMP &&
 		    ieee80211_has_protected(fc)) {
+			int queue = ieee80211_is_mgmt(fc) ?
+				NUM_RX_DATA_QUEUES : rx->queue;
 			/* Store CCMP PN so that we can verify that the next
 			 * fragment has a sequential PN value. */
 			entry->ccmp = 1;
 			memcpy(entry->last_pn,
-			       rx->key->u.ccmp.rx_pn[rx->queue],
+			       rx->key->u.ccmp.rx_pn[queue],
 			       CCMP_PN_LEN);
 		}
 		return RX_QUEUED;
@@ -1291,6 +1293,7 @@ ieee80211_rx_h_defragment(struct ieee80211_rx_data *rx)
 	if (entry->ccmp) {
 		int i;
 		u8 pn[CCMP_PN_LEN], *rpn;
+		int queue;
 		if (!rx->key || rx->key->conf.alg != ALG_CCMP)
 			return RX_DROP_UNUSABLE;
 		memcpy(pn, entry->last_pn, CCMP_PN_LEN);
@@ -1299,7 +1302,9 @@ ieee80211_rx_h_defragment(struct ieee80211_rx_data *rx)
 			if (pn[i])
 				break;
 		}
-		rpn = rx->key->u.ccmp.rx_pn[rx->queue];
+		queue = ieee80211_is_mgmt(fc) ?
+			NUM_RX_DATA_QUEUES : rx->queue;
+		rpn = rx->key->u.ccmp.rx_pn[queue];
 		if (memcmp(pn, rpn, CCMP_PN_LEN))
 			return RX_DROP_UNUSABLE;
 		memcpy(entry->last_pn, pn, CCMP_PN_LEN);
diff --git a/net/mac80211/wpa.c b/net/mac80211/wpa.c
index 0adbcc941ac9..a14e67707476 100644
--- a/net/mac80211/wpa.c
+++ b/net/mac80211/wpa.c
@@ -436,6 +436,7 @@ ieee80211_crypto_ccmp_decrypt(struct ieee80211_rx_data *rx)
 	struct ieee80211_rx_status *status = IEEE80211_SKB_RXCB(skb);
 	u8 pn[CCMP_PN_LEN];
 	int data_len;
+	int queue;
 
 	hdrlen = ieee80211_hdrlen(hdr->frame_control);
 
@@ -453,7 +454,10 @@ ieee80211_crypto_ccmp_decrypt(struct ieee80211_rx_data *rx)
 
 	ccmp_hdr2pn(pn, skb->data + hdrlen);
 
-	if (memcmp(pn, key->u.ccmp.rx_pn[rx->queue], CCMP_PN_LEN) <= 0) {
+	queue = ieee80211_is_mgmt(hdr->frame_control) ?
+		NUM_RX_DATA_QUEUES : rx->queue;
+
+	if (memcmp(pn, key->u.ccmp.rx_pn[queue], CCMP_PN_LEN) <= 0) {
 		key->u.ccmp.replays++;
 		return RX_DROP_UNUSABLE;
 	}
@@ -470,7 +474,7 @@ ieee80211_crypto_ccmp_decrypt(struct ieee80211_rx_data *rx)
 			return RX_DROP_UNUSABLE;
 	}
 
-	memcpy(key->u.ccmp.rx_pn[rx->queue], pn, CCMP_PN_LEN);
+	memcpy(key->u.ccmp.rx_pn[queue], pn, CCMP_PN_LEN);
 
 	/* Remove CCMP header and MIC */
 	skb_trim(skb, skb->len - CCMP_MIC_LEN);
-- 
cgit v1.2.2


From aa1039e73cc2cf834e99c09d2033d5d2675357b9 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Tue, 15 Jun 2010 08:23:14 +0000
Subject: inetpeer: RCU conversion

inetpeer currently uses an AVL tree protected by an rwlock.

It's possible to make most lookups use RCU

1) Add a struct rcu_head to struct inet_peer

2) add a lookup_rcu_bh() helper to perform lockless and opportunistic
lookup. This is a normal function, not a macro like lookup().

3) Add a limit to number of links followed by lookup_rcu_bh(). This is
needed in case we fall in a loop.

4) add an smp_wmb() in link_to_pool() right before node insert.

5) make unlink_from_pool() use atomic_cmpxchg() to make sure it can take
last reference to an inet_peer, since lockless readers could increase
refcount, even while we hold peers.lock.

6) Delay struct inet_peer freeing after rcu grace period so that
lookup_rcu_bh() cannot crash.

7) inet_getpeer() first attempts lockless lookup.
   Note this lookup can fail even if target is in AVL tree, but a
concurrent writer can let tree in a non correct form.
   If this attemps fails, lock is taken a regular lookup is performed
again.

8) convert peers.lock from rwlock to a spinlock

9) Remove SLAB_HWCACHE_ALIGN when peer_cachep is created, because
rcu_head adds 16 bytes on 64bit arches, doubling effective size (64 ->
128 bytes)
In a future patch, this is probably possible to revert this part, if rcu
field is put in an union to share space with rid, ip_id_count, tcp_ts &
tcp_ts_stamp. These fields being manipulated only with refcnt > 0.

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/inetpeer.c | 164 ++++++++++++++++++++++++++++++----------------------
 1 file changed, 95 insertions(+), 69 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/inetpeer.c b/net/ipv4/inetpeer.c
index 035673fd42d4..58fbc7e2475e 100644
--- a/net/ipv4/inetpeer.c
+++ b/net/ipv4/inetpeer.c
@@ -51,8 +51,8 @@
  *  lookups performed with disabled BHs.
  *
  *  Serialisation issues.
- *  1.  Nodes may appear in the tree only with the pool write lock held.
- *  2.  Nodes may disappear from the tree only with the pool write lock held
+ *  1.  Nodes may appear in the tree only with the pool lock held.
+ *  2.  Nodes may disappear from the tree only with the pool lock held
  *      AND reference count being 0.
  *  3.  Nodes appears and disappears from unused node list only under
  *      "inet_peer_unused_lock".
@@ -80,11 +80,11 @@ static const struct inet_peer peer_fake_node = {
 
 static struct {
 	struct inet_peer *root;
-	rwlock_t	lock;
+	spinlock_t	lock;
 	int		total;
 } peers = {
 	.root		= peer_avl_empty,
-	.lock		= __RW_LOCK_UNLOCKED(peers.lock),
+	.lock		= __SPIN_LOCK_UNLOCKED(peers.lock),
 	.total		= 0,
 };
 #define PEER_MAXDEPTH 40 /* sufficient for about 2^27 nodes */
@@ -129,7 +129,7 @@ void __init inet_initpeers(void)
 
 	peer_cachep = kmem_cache_create("inet_peer_cache",
 			sizeof(struct inet_peer),
-			0, SLAB_HWCACHE_ALIGN|SLAB_PANIC,
+			0, SLAB_PANIC,
 			NULL);
 
 	/* All the timers, started at system startup tend
@@ -153,16 +153,13 @@ static void unlink_from_unused(struct inet_peer *p)
 
 /*
  * Called with local BH disabled and the pool lock held.
- * _stack is known to be NULL or not at compile time,
- * so compiler will optimize the if (_stack) tests.
  */
 #define lookup(_daddr, _stack) 					\
 ({								\
 	struct inet_peer *u, **v;				\
-	if (_stack != NULL) {					\
-		stackptr = _stack;				\
-		*stackptr++ = &peers.root;			\
-	}							\
+								\
+	stackptr = _stack;					\
+	*stackptr++ = &peers.root;				\
 	for (u = peers.root; u != peer_avl_empty; ) {		\
 		if (_daddr == u->v4daddr)			\
 			break;					\
@@ -170,14 +167,41 @@ static void unlink_from_unused(struct inet_peer *p)
 			v = &u->avl_left;			\
 		else						\
 			v = &u->avl_right;			\
-		if (_stack != NULL)				\
-			*stackptr++ = v;			\
+		*stackptr++ = v;				\
 		u = *v;						\
 	}							\
 	u;							\
 })
 
-/* Called with local BH disabled and the pool write lock held. */
+/*
+ * Called with rcu_read_lock_bh()
+ * Because we hold no lock against a writer, its quite possible we fall
+ * in an endless loop.
+ * But every pointer we follow is guaranteed to be valid thanks to RCU.
+ * We exit from this function if number of links exceeds PEER_MAXDEPTH
+ */
+static struct inet_peer *lookup_rcu_bh(__be32 daddr)
+{
+	struct inet_peer *u = rcu_dereference_bh(peers.root);
+	int count = 0;
+
+	while (u != peer_avl_empty) {
+		if (daddr == u->v4daddr) {
+			if (unlikely(!atomic_inc_not_zero(&u->refcnt)))
+				u = NULL;
+			return u;
+		}
+		if ((__force __u32)daddr < (__force __u32)u->v4daddr)
+			u = rcu_dereference_bh(u->avl_left);
+		else
+			u = rcu_dereference_bh(u->avl_right);
+		if (unlikely(++count == PEER_MAXDEPTH))
+			break;
+	}
+	return NULL;
+}
+
+/* Called with local BH disabled and the pool lock held. */
 #define lookup_rightempty(start)				\
 ({								\
 	struct inet_peer *u, **v;				\
@@ -191,9 +215,10 @@ static void unlink_from_unused(struct inet_peer *p)
 	u;							\
 })
 
-/* Called with local BH disabled and the pool write lock held.
+/* Called with local BH disabled and the pool lock held.
  * Variable names are the proof of operation correctness.
- * Look into mm/map_avl.c for more detail description of the ideas.  */
+ * Look into mm/map_avl.c for more detail description of the ideas.
+ */
 static void peer_avl_rebalance(struct inet_peer **stack[],
 		struct inet_peer ***stackend)
 {
@@ -269,16 +294,22 @@ static void peer_avl_rebalance(struct inet_peer **stack[],
 	}
 }
 
-/* Called with local BH disabled and the pool write lock held. */
+/* Called with local BH disabled and the pool lock held. */
 #define link_to_pool(n)						\
 do {								\
 	n->avl_height = 1;					\
 	n->avl_left = peer_avl_empty;				\
 	n->avl_right = peer_avl_empty;				\
+	smp_wmb(); /* lockless readers can catch us now */	\
 	**--stackptr = n;					\
 	peer_avl_rebalance(stack, stackptr);			\
 } while (0)
 
+static void inetpeer_free_rcu(struct rcu_head *head)
+{
+	kmem_cache_free(peer_cachep, container_of(head, struct inet_peer, rcu));
+}
+
 /* May be called with local BH enabled. */
 static void unlink_from_pool(struct inet_peer *p)
 {
@@ -286,13 +317,13 @@ static void unlink_from_pool(struct inet_peer *p)
 
 	do_free = 0;
 
-	write_lock_bh(&peers.lock);
+	spin_lock_bh(&peers.lock);
 	/* Check the reference counter.  It was artificially incremented by 1
-	 * in cleanup() function to prevent sudden disappearing.  If the
-	 * reference count is still 1 then the node is referenced only as `p'
-	 * here and from the pool.  So under the exclusive pool lock it's safe
-	 * to remove the node and free it later. */
-	if (atomic_read(&p->refcnt) == 1) {
+	 * in cleanup() function to prevent sudden disappearing.  If we can
+	 * atomically (because of lockless readers) take this last reference,
+	 * it's safe to remove the node and free it later.
+	 */
+	if (atomic_cmpxchg(&p->refcnt, 1, 0) == 1) {
 		struct inet_peer **stack[PEER_MAXDEPTH];
 		struct inet_peer ***stackptr, ***delp;
 		if (lookup(p->v4daddr, stack) != p)
@@ -321,17 +352,18 @@ static void unlink_from_pool(struct inet_peer *p)
 		peers.total--;
 		do_free = 1;
 	}
-	write_unlock_bh(&peers.lock);
+	spin_unlock_bh(&peers.lock);
 
 	if (do_free)
-		kmem_cache_free(peer_cachep, p);
+		call_rcu_bh(&p->rcu, inetpeer_free_rcu);
 	else
 		/* The node is used again.  Decrease the reference counter
 		 * back.  The loop "cleanup -> unlink_from_unused
 		 *   -> unlink_from_pool -> putpeer -> link_to_unused
 		 *   -> cleanup (for the same node)"
 		 * doesn't really exist because the entry will have a
-		 * recent deletion time and will not be cleaned again soon. */
+		 * recent deletion time and will not be cleaned again soon.
+		 */
 		inet_putpeer(p);
 }
 
@@ -375,62 +407,56 @@ static int cleanup_once(unsigned long ttl)
 /* Called with or without local BH being disabled. */
 struct inet_peer *inet_getpeer(__be32 daddr, int create)
 {
-	struct inet_peer *p, *n;
+	struct inet_peer *p;
 	struct inet_peer **stack[PEER_MAXDEPTH], ***stackptr;
 
-	/* Look up for the address quickly. */
-	read_lock_bh(&peers.lock);
-	p = lookup(daddr, NULL);
-	if (p != peer_avl_empty)
-		atomic_inc(&p->refcnt);
-	read_unlock_bh(&peers.lock);
+	/* Look up for the address quickly, lockless.
+	 * Because of a concurrent writer, we might not find an existing entry.
+	 */
+	rcu_read_lock_bh();
+	p = lookup_rcu_bh(daddr);
+	rcu_read_unlock_bh();
+
+	if (p) {
+		/* The existing node has been found.
+		 * Remove the entry from unused list if it was there.
+		 */
+		unlink_from_unused(p);
+		return p;
+	}
 
+	/* retry an exact lookup, taking the lock before.
+	 * At least, nodes should be hot in our cache.
+	 */
+	spin_lock_bh(&peers.lock);
+	p = lookup(daddr, stack);
 	if (p != peer_avl_empty) {
-		/* The existing node has been found. */
+		atomic_inc(&p->refcnt);
+		spin_unlock_bh(&peers.lock);
 		/* Remove the entry from unused list if it was there. */
 		unlink_from_unused(p);
 		return p;
 	}
-
-	if (!create)
-		return NULL;
-
-	/* Allocate the space outside the locked region. */
-	n = kmem_cache_alloc(peer_cachep, GFP_ATOMIC);
-	if (n == NULL)
-		return NULL;
-	n->v4daddr = daddr;
-	atomic_set(&n->refcnt, 1);
-	atomic_set(&n->rid, 0);
-	atomic_set(&n->ip_id_count, secure_ip_id(daddr));
-	n->tcp_ts_stamp = 0;
-
-	write_lock_bh(&peers.lock);
-	/* Check if an entry has suddenly appeared. */
-	p = lookup(daddr, stack);
-	if (p != peer_avl_empty)
-		goto out_free;
-
-	/* Link the node. */
-	link_to_pool(n);
-	INIT_LIST_HEAD(&n->unused);
-	peers.total++;
-	write_unlock_bh(&peers.lock);
+	p = create ? kmem_cache_alloc(peer_cachep, GFP_ATOMIC) : NULL;
+	if (p) {
+		p->v4daddr = daddr;
+		atomic_set(&p->refcnt, 1);
+		atomic_set(&p->rid, 0);
+		atomic_set(&p->ip_id_count, secure_ip_id(daddr));
+		p->tcp_ts_stamp = 0;
+		INIT_LIST_HEAD(&p->unused);
+
+
+		/* Link the node. */
+		link_to_pool(p);
+		peers.total++;
+	}
+	spin_unlock_bh(&peers.lock);
 
 	if (peers.total >= inet_peer_threshold)
 		/* Remove one less-recently-used entry. */
 		cleanup_once(0);
 
-	return n;
-
-out_free:
-	/* The appropriate node is already in the pool. */
-	atomic_inc(&p->refcnt);
-	write_unlock_bh(&peers.lock);
-	/* Remove the entry from unused list if it was there. */
-	unlink_from_unused(p);
-	/* Free preallocated the preallocated node. */
-	kmem_cache_free(peer_cachep, n);
 	return p;
 }
 
-- 
cgit v1.2.2


From 2bbdf389a9432270147c8e8e35b021ad61772f65 Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Sun, 13 Jun 2010 11:29:39 +0000
Subject: ipv6: syncookies: do not skip ->iif initialization

When syncookies are in effect, req->iif is left uninitialized.
In case of e.g. link-local addresses the route lookup then fails
and no syn-ack is sent.

Rearrange things so ->iif is also initialized in the syncookie case.

want_cookie can only be true when the isn was zero, thus move the want_cookie
check into the "!isn" branch.

Cc: Glenn Griffin <ggriffin.kernel@gmail.com>
Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/tcp_ipv6.c | 13 +++++++------
 1 file changed, 7 insertions(+), 6 deletions(-)

(limited to 'net')

diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 5887141ad641..f87534569366 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -1272,10 +1272,7 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
 	if (!want_cookie)
 		TCP_ECN_create_request(req, tcp_hdr(skb));
 
-	if (want_cookie) {
-		isn = cookie_v6_init_sequence(sk, skb, &req->mss);
-		req->cookie_ts = tmp_opt.tstamp_ok;
-	} else if (!isn) {
+	if (!isn) {
 		if (ipv6_opt_accepted(sk, skb) ||
 		    np->rxopt.bits.rxinfo || np->rxopt.bits.rxoinfo ||
 		    np->rxopt.bits.rxhlim || np->rxopt.bits.rxohlim) {
@@ -1288,8 +1285,12 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
 		if (!sk->sk_bound_dev_if &&
 		    ipv6_addr_type(&treq->rmt_addr) & IPV6_ADDR_LINKLOCAL)
 			treq->iif = inet6_iif(skb);
-
-		isn = tcp_v6_init_sequence(skb);
+		if (!want_cookie) {
+			isn = tcp_v6_init_sequence(skb);
+		} else {
+			isn = cookie_v6_init_sequence(sk, skb, &req->mss);
+			req->cookie_ts = tmp_opt.tstamp_ok;
+		}
 	}
 	tcp_rsk(req)->snt_isn = isn;
 
-- 
cgit v1.2.2


From d27f9b35827ec91d71d3561c127a0a8135fb470d Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Sun, 13 Jun 2010 23:02:24 +0000
Subject: ip_frag: Remove some atomic ops

Instead of doing one atomic operation per frag, we can factorize them.

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/ip_fragment.c | 3 +--
 net/ipv6/reassembly.c  | 3 +--
 2 files changed, 2 insertions(+), 4 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c
index 75347ea70ea0..963c3689d0af 100644
--- a/net/ipv4/ip_fragment.c
+++ b/net/ipv4/ip_fragment.c
@@ -556,7 +556,6 @@ static int ip_frag_reasm(struct ipq *qp, struct sk_buff *prev,
 
 	skb_shinfo(head)->frag_list = head->next;
 	skb_push(head, head->data - skb_network_header(head));
-	atomic_sub(head->truesize, &qp->q.net->mem);
 
 	for (fp=head->next; fp; fp = fp->next) {
 		head->data_len += fp->len;
@@ -566,8 +565,8 @@ static int ip_frag_reasm(struct ipq *qp, struct sk_buff *prev,
 		else if (head->ip_summed == CHECKSUM_COMPLETE)
 			head->csum = csum_add(head->csum, fp->csum);
 		head->truesize += fp->truesize;
-		atomic_sub(fp->truesize, &qp->q.net->mem);
 	}
+	atomic_sub(head->truesize, &qp->q.net->mem);
 
 	head->next = NULL;
 	head->dev = dev;
diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c
index 6d4292ff5854..a630506151c3 100644
--- a/net/ipv6/reassembly.c
+++ b/net/ipv6/reassembly.c
@@ -524,7 +524,6 @@ static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *prev,
 	skb_shinfo(head)->frag_list = head->next;
 	skb_reset_transport_header(head);
 	skb_push(head, head->data - skb_network_header(head));
-	atomic_sub(head->truesize, &fq->q.net->mem);
 
 	for (fp=head->next; fp; fp = fp->next) {
 		head->data_len += fp->len;
@@ -534,8 +533,8 @@ static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *prev,
 		else if (head->ip_summed == CHECKSUM_COMPLETE)
 			head->csum = csum_add(head->csum, fp->csum);
 		head->truesize += fp->truesize;
-		atomic_sub(fp->truesize, &fq->q.net->mem);
 	}
+	atomic_sub(head->truesize, &fq->q.net->mem);
 
 	head->next = NULL;
 	head->dev = dev;
-- 
cgit v1.2.2


From a95d8c88bea0c93505e1d143d075f112be2b25e3 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Sun, 13 Jun 2010 23:22:43 +0000
Subject: ipfrag : frag_kfree_skb() cleanup

Third param (work) is unused, remove it.

Remove __inline__ and inline qualifiers.

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/ip_fragment.c | 9 +++------
 net/ipv6/reassembly.c  | 7 ++-----
 2 files changed, 5 insertions(+), 11 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c
index 963c3689d0af..858d34648eee 100644
--- a/net/ipv4/ip_fragment.c
+++ b/net/ipv4/ip_fragment.c
@@ -124,11 +124,8 @@ static int ip4_frag_match(struct inet_frag_queue *q, void *a)
 }
 
 /* Memory Tracking Functions. */
-static __inline__ void frag_kfree_skb(struct netns_frags *nf,
-		struct sk_buff *skb, int *work)
+static void frag_kfree_skb(struct netns_frags *nf, struct sk_buff *skb)
 {
-	if (work)
-		*work -= skb->truesize;
 	atomic_sub(skb->truesize, &nf->mem);
 	kfree_skb(skb);
 }
@@ -309,7 +306,7 @@ static int ip_frag_reinit(struct ipq *qp)
 	fp = qp->q.fragments;
 	do {
 		struct sk_buff *xp = fp->next;
-		frag_kfree_skb(qp->q.net, fp, NULL);
+		frag_kfree_skb(qp->q.net, fp);
 		fp = xp;
 	} while (fp);
 
@@ -446,7 +443,7 @@ static int ip_frag_queue(struct ipq *qp, struct sk_buff *skb)
 				qp->q.fragments = next;
 
 			qp->q.meat -= free_it->len;
-			frag_kfree_skb(qp->q.net, free_it, NULL);
+			frag_kfree_skb(qp->q.net, free_it);
 		}
 	}
 
diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c
index a630506151c3..0b97230a3251 100644
--- a/net/ipv6/reassembly.c
+++ b/net/ipv6/reassembly.c
@@ -150,11 +150,8 @@ int ip6_frag_match(struct inet_frag_queue *q, void *a)
 EXPORT_SYMBOL(ip6_frag_match);
 
 /* Memory Tracking Functions. */
-static inline void frag_kfree_skb(struct netns_frags *nf,
-		struct sk_buff *skb, int *work)
+static void frag_kfree_skb(struct netns_frags *nf, struct sk_buff *skb)
 {
-	if (work)
-		*work -= skb->truesize;
 	atomic_sub(skb->truesize, &nf->mem);
 	kfree_skb(skb);
 }
@@ -392,7 +389,7 @@ static int ip6_frag_queue(struct frag_queue *fq, struct sk_buff *skb,
 				fq->q.fragments = next;
 
 			fq->q.meat -= free_it->len;
-			frag_kfree_skb(fq->q.net, free_it, NULL);
+			frag_kfree_skb(fq->q.net, free_it);
 		}
 	}
 
-- 
cgit v1.2.2


From fed396a585d8e1870b326f2e8e1888a72957abb8 Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Tue, 15 Jun 2010 21:43:07 -0700
Subject: bridge: Fix OOM crash in deliver_clone

The bridge multicast patches introduced an OOM crash in the forward
path, when deliver_clone fails to clone the skb.

Reported-by: Mark Wagner <mwagner@redhat.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/bridge/br_forward.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/bridge/br_forward.c b/net/bridge/br_forward.c
index a98ef1393097..a4e72a89e4ff 100644
--- a/net/bridge/br_forward.c
+++ b/net/bridge/br_forward.c
@@ -140,10 +140,10 @@ static int deliver_clone(const struct net_bridge_port *prev,
 			 void (*__packet_hook)(const struct net_bridge_port *p,
 					       struct sk_buff *skb))
 {
+	struct net_device *dev = BR_INPUT_SKB_CB(skb)->brdev;
+
 	skb = skb_clone(skb, GFP_ATOMIC);
 	if (!skb) {
-		struct net_device *dev = BR_INPUT_SKB_CB(skb)->brdev;
-
 		dev->stats.tx_dropped++;
 		return -ENOMEM;
 	}
-- 
cgit v1.2.2


From 9f70b0fcecd134890affe627aa2823591313a413 Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Tue, 15 Jun 2010 21:43:48 -0700
Subject: bridge: Add const to dummy br_netpoll_send_skb

The version of br_netpoll_send_skb used when netpoll is off is
missing a const thus causing a warning.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/bridge/br_private.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h
index f6bc979b1135..07cf57b373fe 100644
--- a/net/bridge/br_private.h
+++ b/net/bridge/br_private.h
@@ -305,7 +305,7 @@ static inline struct netpoll_info *br_netpoll_info(struct net_bridge *br)
 	return NULL;
 }
 
-static inline void br_netpoll_send_skb(struct net_bridge_port *p,
+static inline void br_netpoll_send_skb(const struct net_bridge_port *p,
 				       struct sk_buff *skb)
 {
 }
-- 
cgit v1.2.2


From 5f2f89209500623ccb4713ec4af7de86fd30a9e4 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Tue, 15 Jun 2010 21:47:39 -0700
Subject: inetpeer: do not use zero refcnt for freed entries

Followup of commit aa1039e73cc2 (inetpeer: RCU conversion)

Unused inet_peer entries have a null refcnt.

Using atomic_inc_not_zero() in rcu lookups is not going to work for
them, and slow path is taken.

Fix this using -1 marker instead of 0 for deleted entries.

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/inetpeer.c | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/inetpeer.c b/net/ipv4/inetpeer.c
index 58fbc7e2475e..349249fad2db 100644
--- a/net/ipv4/inetpeer.c
+++ b/net/ipv4/inetpeer.c
@@ -187,7 +187,12 @@ static struct inet_peer *lookup_rcu_bh(__be32 daddr)
 
 	while (u != peer_avl_empty) {
 		if (daddr == u->v4daddr) {
-			if (unlikely(!atomic_inc_not_zero(&u->refcnt)))
+			/* Before taking a reference, check if this entry was
+			 * deleted, unlink_from_pool() sets refcnt=-1 to make
+			 * distinction between an unused entry (refcnt=0) and
+			 * a freed one.
+			 */
+			if (unlikely(!atomic_add_unless(&u->refcnt, 1, -1)))
 				u = NULL;
 			return u;
 		}
@@ -322,8 +327,9 @@ static void unlink_from_pool(struct inet_peer *p)
 	 * in cleanup() function to prevent sudden disappearing.  If we can
 	 * atomically (because of lockless readers) take this last reference,
 	 * it's safe to remove the node and free it later.
+	 * We use refcnt=-1 to alert lockless readers this entry is deleted.
 	 */
-	if (atomic_cmpxchg(&p->refcnt, 1, 0) == 1) {
+	if (atomic_cmpxchg(&p->refcnt, 1, -1) == 1) {
 		struct inet_peer **stack[PEER_MAXDEPTH];
 		struct inet_peer ***stackptr, ***delp;
 		if (lookup(p->v4daddr, stack) != p)
-- 
cgit v1.2.2


From 421f91d21ad6f799dc7b489bb33cc560ccc56f98 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= <u.kleine-koenig@pengutronix.de>
Date: Fri, 11 Jun 2010 12:17:00 +0200
Subject: fix typos concerning "initiali[zs]e"
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
---
 net/netfilter/ipvs/ip_vs_lblc.c  | 2 +-
 net/netfilter/ipvs/ip_vs_lblcr.c | 2 +-
 net/sctp/associola.c             | 2 +-
 net/sctp/protocol.c              | 2 +-
 4 files changed, 4 insertions(+), 4 deletions(-)

(limited to 'net')

diff --git a/net/netfilter/ipvs/ip_vs_lblc.c b/net/netfilter/ipvs/ip_vs_lblc.c
index 94a45213faa6..9323f8944199 100644
--- a/net/netfilter/ipvs/ip_vs_lblc.c
+++ b/net/netfilter/ipvs/ip_vs_lblc.c
@@ -11,7 +11,7 @@
  * Changes:
  *     Martin Hamilton         :    fixed the terrible locking bugs
  *                                   *lock(tbl->lock) ==> *lock(&tbl->lock)
- *     Wensong Zhang           :    fixed the uninitilized tbl->lock bug
+ *     Wensong Zhang           :    fixed the uninitialized tbl->lock bug
  *     Wensong Zhang           :    added doing full expiration check to
  *                                   collect stale entries of 24+ hours when
  *                                   no partial expire check in a half hour
diff --git a/net/netfilter/ipvs/ip_vs_lblcr.c b/net/netfilter/ipvs/ip_vs_lblcr.c
index 535dc2b419d8..dbeed8ea421a 100644
--- a/net/netfilter/ipvs/ip_vs_lblcr.c
+++ b/net/netfilter/ipvs/ip_vs_lblcr.c
@@ -386,7 +386,7 @@ ip_vs_lblcr_new(struct ip_vs_lblcr_table *tbl, const union nf_inet_addr *daddr,
 		ip_vs_addr_copy(dest->af, &en->addr, daddr);
 		en->lastuse = jiffies;
 
-		/* initilize its dest set */
+		/* initialize its dest set */
 		atomic_set(&(en->set.size), 0);
 		INIT_LIST_HEAD(&en->set.list);
 		rwlock_init(&en->set.lock);
diff --git a/net/sctp/associola.c b/net/sctp/associola.c
index e41feff19e43..0b85e5256434 100644
--- a/net/sctp/associola.c
+++ b/net/sctp/associola.c
@@ -172,7 +172,7 @@ static struct sctp_association *sctp_association_init(struct sctp_association *a
 	asoc->timeouts[SCTP_EVENT_TIMEOUT_AUTOCLOSE] =
 		(unsigned long)sp->autoclose * HZ;
 
-	/* Initilizes the timers */
+	/* Initializes the timers */
 	for (i = SCTP_EVENT_TIMEOUT_NONE; i < SCTP_NUM_TIMEOUT_TYPES; ++i)
 		setup_timer(&asoc->timers[i], sctp_timer_events[i],
 				(unsigned long)asoc);
diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c
index 182749867c72..0f41b05bd4d6 100644
--- a/net/sctp/protocol.c
+++ b/net/sctp/protocol.c
@@ -1162,7 +1162,7 @@ SCTP_STATIC __init int sctp_init(void)
 	/* Set the pressure threshold to be a fraction of global memory that
 	 * is up to 1/2 at 256 MB, decreasing toward zero with the amount of
 	 * memory, with a floor of 128 pages.
-	 * Note this initalizes the data in sctpv6_prot too
+	 * Note this initializes the data in sctpv6_prot too
 	 * Unabashedly stolen from tcp_init
 	 */
 	nr_pages = totalram_pages - totalhigh_pages;
-- 
cgit v1.2.2


From 317fe0e6c5dc9448bcef41a2e31fecfd3dba7f55 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Wed, 16 Jun 2010 04:52:13 +0000
Subject: inetpeer: restore small inet_peer structures

Addition of rcu_head to struct inet_peer added 16bytes on 64bit arches.

Thats a bit unfortunate, since old size was exactly 64 bytes.

This can be solved, using an union between this rcu_head an four fields,
that are normally used only when a refcount is taken on inet_peer.
rcu_head is used only when refcnt=-1, right before structure freeing.

Add a inet_peer_refcheck() function to check this assertion for a while.

We can bring back SLAB_HWCACHE_ALIGN qualifier in kmem cache creation.

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/inetpeer.c |  4 ++--
 net/ipv4/route.c    |  1 +
 net/ipv4/tcp_ipv4.c | 11 +++++++----
 3 files changed, 10 insertions(+), 6 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/inetpeer.c b/net/ipv4/inetpeer.c
index 349249fad2db..9ffa24b9a804 100644
--- a/net/ipv4/inetpeer.c
+++ b/net/ipv4/inetpeer.c
@@ -64,7 +64,7 @@
  *		   usually under some other lock to prevent node disappearing
  *		dtime: unused node list lock
  *		v4daddr: unchangeable
- *		ip_id_count: idlock
+ *		ip_id_count: atomic value (no lock needed)
  */
 
 static struct kmem_cache *peer_cachep __read_mostly;
@@ -129,7 +129,7 @@ void __init inet_initpeers(void)
 
 	peer_cachep = kmem_cache_create("inet_peer_cache",
 			sizeof(struct inet_peer),
-			0, SLAB_PANIC,
+			0, SLAB_HWCACHE_ALIGN | SLAB_PANIC,
 			NULL);
 
 	/* All the timers, started at system startup tend
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index a291edbbc97f..03430de46166 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -2881,6 +2881,7 @@ static int rt_fill_info(struct net *net,
 	error = rt->dst.error;
 	expires = rt->dst.expires ? rt->dst.expires - jiffies : 0;
 	if (rt->peer) {
+		inet_peer_refcheck(rt->peer);
 		id = atomic_read(&rt->peer->ip_id_count) & 0xffff;
 		if (rt->peer->tcp_ts_stamp) {
 			ts = rt->peer->tcp_ts;
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 7f9515c0379f..2e41e6f92968 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -204,10 +204,12 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
 		 * TIME-WAIT * and initialize rx_opt.ts_recent from it,
 		 * when trying new connection.
 		 */
-		if (peer != NULL &&
-		    (u32)get_seconds() - peer->tcp_ts_stamp <= TCP_PAWS_MSL) {
-			tp->rx_opt.ts_recent_stamp = peer->tcp_ts_stamp;
-			tp->rx_opt.ts_recent = peer->tcp_ts;
+		if (peer) {
+			inet_peer_refcheck(peer);
+			if ((u32)get_seconds() - peer->tcp_ts_stamp <= TCP_PAWS_MSL) {
+				tp->rx_opt.ts_recent_stamp = peer->tcp_ts_stamp;
+				tp->rx_opt.ts_recent = peer->tcp_ts;
+			}
 		}
 	}
 
@@ -1351,6 +1353,7 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
 		    (dst = inet_csk_route_req(sk, req)) != NULL &&
 		    (peer = rt_get_peer((struct rtable *)dst)) != NULL &&
 		    peer->v4daddr == saddr) {
+			inet_peer_refcheck(peer);
 			if ((u32)get_seconds() - peer->tcp_ts_stamp < TCP_PAWS_MSL &&
 			    (s32)(peer->tcp_ts - req->ts_recent) >
 							TCP_PAWS_WINDOW) {
-- 
cgit v1.2.2


From 021570e55b7152843376b9d9f60624e3e05ac054 Mon Sep 17 00:00:00 2001
From: Christoph Fritz <chf.fritz@googlemail.com>
Date: Wed, 16 Jun 2010 16:37:34 +0200
Subject: mac80211: fix warn, enum may be used uninitialized
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

regression introduced by b8d92c9c141ee3dc9b3537b1f0ffb4a54ea8d9b2

In function ‘ieee80211_work_rx_queued_mgmt’:
warning: ‘rma’ may be used uninitialized in this function

this re-adds default value WORK_ACT_NONE back to rma

Signed-off-by: Christoph Fritz <chf.fritz@googlemail.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/work.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/mac80211/work.c b/net/mac80211/work.c
index be3d4a698692..b025dc7bb0fd 100644
--- a/net/mac80211/work.c
+++ b/net/mac80211/work.c
@@ -715,7 +715,7 @@ static void ieee80211_work_rx_queued_mgmt(struct ieee80211_local *local,
 	struct ieee80211_rx_status *rx_status;
 	struct ieee80211_mgmt *mgmt;
 	struct ieee80211_work *wk;
-	enum work_action rma;
+	enum work_action rma = WORK_ACT_NONE;
 	u16 fc;
 
 	rx_status = (struct ieee80211_rx_status *) skb->cb;
-- 
cgit v1.2.2


From 8c76368174ed2359739f1b7b8a9c042b1ef839c4 Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Wed, 16 Jun 2010 14:42:15 -0700
Subject: syncookies: check decoded options against sysctl settings

Discard the ACK if we find options that do not match current sysctl
settings.

Previously it was possible to create a connection with sack, wscale,
etc. enabled even if the feature was disabled via sysctl.

Also remove an unneeded call to tcp_sack_reset() in
cookie_check_timestamp: Both call sites (cookie_v4_check,
cookie_v6_check) zero "struct tcp_options_received", hand it to
tcp_parse_options() (which does not change tcp_opt->num_sacks/dsack)
and then call cookie_check_timestamp().

Even if num_sacks/dsacks were changed, the structure is allocated on
the stack and after cookie_check_timestamp returns only a few selected
members are copied to the inet_request_sock.

Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/syncookies.c | 25 +++++++++++++++++++------
 net/ipv6/syncookies.c |  4 ++--
 2 files changed, 21 insertions(+), 8 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c
index 02bef6aa8b30..51b5662545d6 100644
--- a/net/ipv4/syncookies.c
+++ b/net/ipv4/syncookies.c
@@ -230,23 +230,36 @@ static inline struct sock *get_cookie_sock(struct sock *sk, struct sk_buff *skb,
  * The lowest 4 bits are for snd_wscale
  * The next 4 lsb are for rcv_wscale
  * The next lsb is for sack_ok
+ *
+ * return false if we decode an option that should not be.
  */
-void cookie_check_timestamp(struct tcp_options_received *tcp_opt)
+bool cookie_check_timestamp(struct tcp_options_received *tcp_opt)
 {
 	/* echoed timestamp, 9 lowest bits contain options */
 	u32 options = tcp_opt->rcv_tsecr & TSMASK;
 
+	if (!tcp_opt->saw_tstamp)  {
+		tcp_clear_options(tcp_opt);
+		return true;
+	}
+
+	if (!sysctl_tcp_timestamps)
+		return false;
+
 	tcp_opt->snd_wscale = options & 0xf;
 	options >>= 4;
 	tcp_opt->rcv_wscale = options & 0xf;
 
 	tcp_opt->sack_ok = (options >> 4) & 0x1;
 
-	if (tcp_opt->sack_ok)
-		tcp_sack_reset(tcp_opt);
+	if (tcp_opt->sack_ok && !sysctl_tcp_sack)
+		return false;
 
-	if (tcp_opt->snd_wscale || tcp_opt->rcv_wscale)
+	if (tcp_opt->snd_wscale || tcp_opt->rcv_wscale) {
 		tcp_opt->wscale_ok = 1;
+		return sysctl_tcp_window_scaling != 0;
+	}
+	return true;
 }
 EXPORT_SYMBOL(cookie_check_timestamp);
 
@@ -281,8 +294,8 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb,
 	memset(&tcp_opt, 0, sizeof(tcp_opt));
 	tcp_parse_options(skb, &tcp_opt, &hash_location, 0);
 
-	if (tcp_opt.saw_tstamp)
-		cookie_check_timestamp(&tcp_opt);
+	if (!cookie_check_timestamp(&tcp_opt))
+		goto out;
 
 	ret = NULL;
 	req = inet_reqsk_alloc(&tcp_request_sock_ops); /* for safety */
diff --git a/net/ipv6/syncookies.c b/net/ipv6/syncookies.c
index 70d330f8c990..c7ee57421ece 100644
--- a/net/ipv6/syncookies.c
+++ b/net/ipv6/syncookies.c
@@ -180,8 +180,8 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb)
 	memset(&tcp_opt, 0, sizeof(tcp_opt));
 	tcp_parse_options(skb, &tcp_opt, &hash_location, 0);
 
-	if (tcp_opt.saw_tstamp)
-		cookie_check_timestamp(&tcp_opt);
+	if (!cookie_check_timestamp(&tcp_opt))
+		goto out;
 
 	ret = NULL;
 	req = inet6_reqsk_alloc(&tcp6_request_sock_ops);
-- 
cgit v1.2.2


From fa68a7822780fdc1295f7efb7e4313e62b447e75 Mon Sep 17 00:00:00 2001
From: Tom Hughes <tom@compton.nu>
Date: Tue, 15 Jun 2010 22:24:28 +0000
Subject: Clear IFF_XMIT_DST_RELEASE for teql interfaces

https://bugzilla.kernel.org/show_bug.cgi?id=16183

The sch_teql module, which can be used to load balance over a set of
underlying interfaces, stopped working after 2.6.30 and has been
broken in all kernels since then for any underlying interface which
requires the addition of link level headers.

The problem is that the transmit routine relies on being able to
access the destination address in the skb in order to do address
resolution once it has decided which underlying interface it is going
to transmit through.

In 2.6.31 the IFF_XMIT_DST_RELEASE flag was introduced, and set by
default for all interfaces, which causes the destination address to be
released before the transmit routine for the interface is called.

The solution is to clear that flag for teql interfaces.

Signed-off-by: Tom Hughes <tom@compton.nu>
Acked-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/sch_teql.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'net')

diff --git a/net/sched/sch_teql.c b/net/sched/sch_teql.c
index 3415b6ce1c0a..807643bdcbac 100644
--- a/net/sched/sch_teql.c
+++ b/net/sched/sch_teql.c
@@ -449,6 +449,7 @@ static __init void teql_master_setup(struct net_device *dev)
 	dev->tx_queue_len	= 100;
 	dev->flags		= IFF_NOARP;
 	dev->hard_header_len	= LL_MAX_HEADER;
+	dev->priv_flags		&= ~IFF_XMIT_DST_RELEASE;
 }
 
 static LIST_HEAD(master_dev_list);
-- 
cgit v1.2.2


From 3f551f9436c05a3b5eccdd6e94733df5bb98d2a5 Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Sun, 13 Jun 2010 03:28:59 +0000
Subject: sock: Introduce cred_to_ucred

To keep the coming code clear and to allow both the sock
code and the scm code to share the logic introduce a
fuction to translate from struct cred to struct ucred.

Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
Acked-by: Pavel Emelyanov <xemul@openvz.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/sock.c | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

(limited to 'net')

diff --git a/net/core/sock.c b/net/core/sock.c
index f9ce0db41cd6..db8335ad7559 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -110,6 +110,7 @@
 #include <linux/tcp.h>
 #include <linux/init.h>
 #include <linux/highmem.h>
+#include <linux/user_namespace.h>
 
 #include <asm/uaccess.h>
 #include <asm/system.h>
@@ -749,6 +750,19 @@ set_rcvbuf:
 EXPORT_SYMBOL(sock_setsockopt);
 
 
+void cred_to_ucred(struct pid *pid, const struct cred *cred,
+		   struct ucred *ucred)
+{
+	ucred->pid = pid_vnr(pid);
+	ucred->uid = ucred->gid = -1;
+	if (cred) {
+		struct user_namespace *current_ns = current_user_ns();
+
+		ucred->uid = user_ns_map_uid(current_ns, cred, cred->euid);
+		ucred->gid = user_ns_map_gid(current_ns, cred, cred->egid);
+	}
+}
+
 int sock_getsockopt(struct socket *sock, int level, int optname,
 		    char __user *optval, int __user *optlen)
 {
-- 
cgit v1.2.2


From 109f6e39fa07c48f580125f531f46cb7c245b528 Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Sun, 13 Jun 2010 03:30:14 +0000
Subject: af_unix: Allow SO_PEERCRED to work across namespaces.

Use struct pid and struct cred to store the peer credentials on struct
sock.  This gives enough information to convert the peer credential
information to a value relative to whatever namespace the socket is in
at the time.

This removes nasty surprises when using SO_PEERCRED on socket
connetions where the processes on either side are in different pid and
user namespaces.

Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
Acked-by: Daniel Lezcano <daniel.lezcano@free.fr>
Acked-by: Pavel Emelyanov <xemul@openvz.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/sock.c    | 18 ++++++++++++------
 net/unix/af_unix.c | 37 ++++++++++++++++++++++++++++---------
 2 files changed, 40 insertions(+), 15 deletions(-)

(limited to 'net')

diff --git a/net/core/sock.c b/net/core/sock.c
index db8335ad7559..0229d5566a46 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -915,11 +915,15 @@ int sock_getsockopt(struct socket *sock, int level, int optname,
 		break;
 
 	case SO_PEERCRED:
-		if (len > sizeof(sk->sk_peercred))
-			len = sizeof(sk->sk_peercred);
-		if (copy_to_user(optval, &sk->sk_peercred, len))
+	{
+		struct ucred peercred;
+		if (len > sizeof(peercred))
+			len = sizeof(peercred);
+		cred_to_ucred(sk->sk_peer_pid, sk->sk_peer_cred, &peercred);
+		if (copy_to_user(optval, &peercred, len))
 			return -EFAULT;
 		goto lenout;
+	}
 
 	case SO_PEERNAME:
 	{
@@ -1133,6 +1137,9 @@ static void __sk_free(struct sock *sk)
 		printk(KERN_DEBUG "%s: optmem leakage (%d bytes) detected.\n",
 		       __func__, atomic_read(&sk->sk_omem_alloc));
 
+	if (sk->sk_peer_cred)
+		put_cred(sk->sk_peer_cred);
+	put_pid(sk->sk_peer_pid);
 	put_net(sock_net(sk));
 	sk_prot_free(sk->sk_prot_creator, sk);
 }
@@ -1968,9 +1975,8 @@ void sock_init_data(struct socket *sock, struct sock *sk)
 	sk->sk_sndmsg_page	=	NULL;
 	sk->sk_sndmsg_off	=	0;
 
-	sk->sk_peercred.pid 	=	0;
-	sk->sk_peercred.uid	=	-1;
-	sk->sk_peercred.gid	=	-1;
+	sk->sk_peer_pid 	=	NULL;
+	sk->sk_peer_cred	=	NULL;
 	sk->sk_write_pending	=	0;
 	sk->sk_rcvlowat		=	1;
 	sk->sk_rcvtimeo		=	MAX_SCHEDULE_TIMEOUT;
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index fef2cc5e9d2b..e1f1349fae86 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -450,11 +450,31 @@ static int unix_release_sock(struct sock *sk, int embrion)
 	return 0;
 }
 
+static void init_peercred(struct sock *sk)
+{
+	put_pid(sk->sk_peer_pid);
+	if (sk->sk_peer_cred)
+		put_cred(sk->sk_peer_cred);
+	sk->sk_peer_pid  = get_pid(task_tgid(current));
+	sk->sk_peer_cred = get_current_cred();
+}
+
+static void copy_peercred(struct sock *sk, struct sock *peersk)
+{
+	put_pid(sk->sk_peer_pid);
+	if (sk->sk_peer_cred)
+		put_cred(sk->sk_peer_cred);
+	sk->sk_peer_pid  = get_pid(peersk->sk_peer_pid);
+	sk->sk_peer_cred = get_cred(peersk->sk_peer_cred);
+}
+
 static int unix_listen(struct socket *sock, int backlog)
 {
 	int err;
 	struct sock *sk = sock->sk;
 	struct unix_sock *u = unix_sk(sk);
+	struct pid *old_pid = NULL;
+	const struct cred *old_cred = NULL;
 
 	err = -EOPNOTSUPP;
 	if (sock->type != SOCK_STREAM && sock->type != SOCK_SEQPACKET)
@@ -470,12 +490,14 @@ static int unix_listen(struct socket *sock, int backlog)
 	sk->sk_max_ack_backlog	= backlog;
 	sk->sk_state		= TCP_LISTEN;
 	/* set credentials so connect can copy them */
-	sk->sk_peercred.pid	= task_tgid_vnr(current);
-	current_euid_egid(&sk->sk_peercred.uid, &sk->sk_peercred.gid);
+	init_peercred(sk);
 	err = 0;
 
 out_unlock:
 	unix_state_unlock(sk);
+	put_pid(old_pid);
+	if (old_cred)
+		put_cred(old_cred);
 out:
 	return err;
 }
@@ -1140,8 +1162,7 @@ restart:
 	unix_peer(newsk)	= sk;
 	newsk->sk_state		= TCP_ESTABLISHED;
 	newsk->sk_type		= sk->sk_type;
-	newsk->sk_peercred.pid	= task_tgid_vnr(current);
-	current_euid_egid(&newsk->sk_peercred.uid, &newsk->sk_peercred.gid);
+	init_peercred(newsk);
 	newu = unix_sk(newsk);
 	newsk->sk_wq		= &newu->peer_wq;
 	otheru = unix_sk(other);
@@ -1157,7 +1178,7 @@ restart:
 	}
 
 	/* Set credentials */
-	sk->sk_peercred = other->sk_peercred;
+	copy_peercred(sk, other);
 
 	sock->state	= SS_CONNECTED;
 	sk->sk_state	= TCP_ESTABLISHED;
@@ -1199,10 +1220,8 @@ static int unix_socketpair(struct socket *socka, struct socket *sockb)
 	sock_hold(skb);
 	unix_peer(ska) = skb;
 	unix_peer(skb) = ska;
-	ska->sk_peercred.pid = skb->sk_peercred.pid = task_tgid_vnr(current);
-	current_euid_egid(&skb->sk_peercred.uid, &skb->sk_peercred.gid);
-	ska->sk_peercred.uid = skb->sk_peercred.uid;
-	ska->sk_peercred.gid = skb->sk_peercred.gid;
+	init_peercred(ska);
+	init_peercred(skb);
 
 	if (ska->sk_type != SOCK_DGRAM) {
 		ska->sk_state = TCP_ESTABLISHED;
-- 
cgit v1.2.2


From b47030c71dfd6c8cd5cb6e551b6f7f7cfc96f6a6 Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Sun, 13 Jun 2010 03:31:06 +0000
Subject: af_netlink: Add needed scm_destroy after scm_send.

scm_send occasionally allocates state in the scm_cookie, so I have
modified netlink_sendmsg to guarantee that when scm_send succeeds
scm_destory will be called to free that state.

Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
Reviewed-by: Daniel Lezcano <daniel.lezcano@free.fr>
Acked-by: Pavel Emelyanov <xemul@openvz.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/netlink/af_netlink.c | 11 ++++++++---
 1 file changed, 8 insertions(+), 3 deletions(-)

(limited to 'net')

diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index a2eb965207d3..7aeaa83193db 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -1323,19 +1323,23 @@ static int netlink_sendmsg(struct kiocb *kiocb, struct socket *sock,
 	if (msg->msg_flags&MSG_OOB)
 		return -EOPNOTSUPP;
 
-	if (NULL == siocb->scm)
+	if (NULL == siocb->scm) {
 		siocb->scm = &scm;
+		memset(&scm, 0, sizeof(scm));
+	}
 	err = scm_send(sock, msg, siocb->scm);
 	if (err < 0)
 		return err;
 
 	if (msg->msg_namelen) {
+		err = -EINVAL;
 		if (addr->nl_family != AF_NETLINK)
-			return -EINVAL;
+			goto out;
 		dst_pid = addr->nl_pid;
 		dst_group = ffs(addr->nl_groups);
+		err =  -EPERM;
 		if (dst_group && !netlink_capable(sock, NL_NONROOT_SEND))
-			return -EPERM;
+			goto out;
 	} else {
 		dst_pid = nlk->dst_pid;
 		dst_group = nlk->dst_group;
@@ -1387,6 +1391,7 @@ static int netlink_sendmsg(struct kiocb *kiocb, struct socket *sock,
 	err = netlink_unicast(sk, skb, dst_pid, msg->msg_flags&MSG_DONTWAIT);
 
 out:
+	scm_destroy(siocb->scm);
 	return err;
 }
 
-- 
cgit v1.2.2


From 257b5358b32f17e0603b6ff57b13610b0e02348f Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Sun, 13 Jun 2010 03:32:34 +0000
Subject: scm: Capture the full credentials of the scm sender.

Start capturing not only the userspace pid, uid and gid values of the
sending process but also the struct pid and struct cred of the sending
process as well.

This is in preparation for properly supporting SCM_CREDENTIALS for
sockets that have different uid and/or pid namespaces at the different
ends.

Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
Acked-by: Serge E. Hallyn <serge@hallyn.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/scm.c | 24 ++++++++++++++++++++++++
 1 file changed, 24 insertions(+)

(limited to 'net')

diff --git a/net/core/scm.c b/net/core/scm.c
index b88f6f9d0b97..681c976307b5 100644
--- a/net/core/scm.c
+++ b/net/core/scm.c
@@ -170,6 +170,30 @@ int __scm_send(struct socket *sock, struct msghdr *msg, struct scm_cookie *p)
 			err = scm_check_creds(&p->creds);
 			if (err)
 				goto error;
+
+			if (pid_vnr(p->pid) != p->creds.pid) {
+				struct pid *pid;
+				err = -ESRCH;
+				pid = find_get_pid(p->creds.pid);
+				if (!pid)
+					goto error;
+				put_pid(p->pid);
+				p->pid = pid;
+			}
+
+			if ((p->cred->euid != p->creds.uid) ||
+				(p->cred->egid != p->creds.gid)) {
+				struct cred *cred;
+				err = -ENOMEM;
+				cred = prepare_creds();
+				if (!cred)
+					goto error;
+
+				cred->uid = cred->euid = p->creds.uid;
+				cred->gid = cred->egid = p->creds.uid;
+				put_cred(p->cred);
+				p->cred = cred;
+			}
 			break;
 		default:
 			goto error;
-- 
cgit v1.2.2


From 7361c36c5224519b258219fe3d0e8abc865d8134 Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Sun, 13 Jun 2010 03:34:33 +0000
Subject: af_unix: Allow credentials to work across user and pid namespaces.

In unix_skb_parms store pointers to struct pid and struct cred instead
of raw uid, gid, and pid values, then translate the credentials on
reception into values that are meaningful in the receiving processes
namespaces.

Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
Acked-by: Pavel Emelyanov <xemul@openvz.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/unix/af_unix.c | 53 +++++++++++++++++++++++++++++++----------------------
 1 file changed, 31 insertions(+), 22 deletions(-)

(limited to 'net')

diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index e1f1349fae86..5fe9d6fe08b8 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -1316,18 +1316,20 @@ static void unix_detach_fds(struct scm_cookie *scm, struct sk_buff *skb)
 	int i;
 
 	scm->fp = UNIXCB(skb).fp;
-	skb->destructor = sock_wfree;
 	UNIXCB(skb).fp = NULL;
 
 	for (i = scm->fp->count-1; i >= 0; i--)
 		unix_notinflight(scm->fp->fp[i]);
 }
 
-static void unix_destruct_fds(struct sk_buff *skb)
+static void unix_destruct_scm(struct sk_buff *skb)
 {
 	struct scm_cookie scm;
 	memset(&scm, 0, sizeof(scm));
-	unix_detach_fds(&scm, skb);
+	scm.pid  = UNIXCB(skb).pid;
+	scm.cred = UNIXCB(skb).cred;
+	if (UNIXCB(skb).fp)
+		unix_detach_fds(&scm, skb);
 
 	/* Alas, it calls VFS */
 	/* So fscking what? fput() had been SMP-safe since the last Summer */
@@ -1350,10 +1352,22 @@ static int unix_attach_fds(struct scm_cookie *scm, struct sk_buff *skb)
 
 	for (i = scm->fp->count-1; i >= 0; i--)
 		unix_inflight(scm->fp->fp[i]);
-	skb->destructor = unix_destruct_fds;
 	return 0;
 }
 
+static int unix_scm_to_skb(struct scm_cookie *scm, struct sk_buff *skb, bool send_fds)
+{
+	int err = 0;
+	UNIXCB(skb).pid  = get_pid(scm->pid);
+	UNIXCB(skb).cred = get_cred(scm->cred);
+	UNIXCB(skb).fp = NULL;
+	if (scm->fp && send_fds)
+		err = unix_attach_fds(scm, skb);
+
+	skb->destructor = unix_destruct_scm;
+	return err;
+}
+
 /*
  *	Send AF_UNIX data.
  */
@@ -1410,12 +1424,9 @@ static int unix_dgram_sendmsg(struct kiocb *kiocb, struct socket *sock,
 	if (skb == NULL)
 		goto out;
 
-	memcpy(UNIXCREDS(skb), &siocb->scm->creds, sizeof(struct ucred));
-	if (siocb->scm->fp) {
-		err = unix_attach_fds(siocb->scm, skb);
-		if (err)
-			goto out_free;
-	}
+	err = unix_scm_to_skb(siocb->scm, skb, true);
+	if (err)
+		goto out_free;
 	unix_get_secdata(siocb->scm, skb);
 
 	skb_reset_transport_header(skb);
@@ -1585,16 +1596,14 @@ static int unix_stream_sendmsg(struct kiocb *kiocb, struct socket *sock,
 		 */
 		size = min_t(int, size, skb_tailroom(skb));
 
-		memcpy(UNIXCREDS(skb), &siocb->scm->creds, sizeof(struct ucred));
+
 		/* Only send the fds in the first buffer */
-		if (siocb->scm->fp && !fds_sent) {
-			err = unix_attach_fds(siocb->scm, skb);
-			if (err) {
-				kfree_skb(skb);
-				goto out_err;
-			}
-			fds_sent = true;
+		err = unix_scm_to_skb(siocb->scm, skb, !fds_sent);
+		if (err) {
+			kfree_skb(skb);
+			goto out_err;
 		}
+		fds_sent = true;
 
 		err = memcpy_fromiovec(skb_put(skb, size), msg->msg_iov, size);
 		if (err) {
@@ -1711,7 +1720,7 @@ static int unix_dgram_recvmsg(struct kiocb *iocb, struct socket *sock,
 		siocb->scm = &tmp_scm;
 		memset(&tmp_scm, 0, sizeof(tmp_scm));
 	}
-	siocb->scm->creds = *UNIXCREDS(skb);
+	scm_set_cred(siocb->scm, UNIXCB(skb).pid, UNIXCB(skb).cred);
 	unix_set_secdata(siocb->scm, skb);
 
 	if (!(flags & MSG_PEEK)) {
@@ -1860,14 +1869,14 @@ static int unix_stream_recvmsg(struct kiocb *iocb, struct socket *sock,
 
 		if (check_creds) {
 			/* Never glue messages from different writers */
-			if (memcmp(UNIXCREDS(skb), &siocb->scm->creds,
-				   sizeof(siocb->scm->creds)) != 0) {
+			if ((UNIXCB(skb).pid  != siocb->scm->pid) ||
+			    (UNIXCB(skb).cred != siocb->scm->cred)) {
 				skb_queue_head(&sk->sk_receive_queue, skb);
 				break;
 			}
 		} else {
 			/* Copy credentials */
-			siocb->scm->creds = *UNIXCREDS(skb);
+			scm_set_cred(siocb->scm, UNIXCB(skb).pid, UNIXCB(skb).cred);
 			check_creds = 1;
 		}
 
-- 
cgit v1.2.2


From 6616f7888c6088324727363276f6de05a1dca6fc Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Sun, 13 Jun 2010 03:35:48 +0000
Subject: af_unix: Allow connecting to sockets in other network namespaces.

Remove the restriction that only allows connecting to a unix domain
socket identified by unix path that is in the same network namespace.

Crossing network namespaces is always tricky and we did not support
this at first, because of a strict policy of don't mix the namespaces.
Later after Pavel proposed this we did not support this because no one
had performed the audit to make certain using unix domain sockets
across namespaces is safe.

What fundamentally makes connecting to af_unix sockets in other
namespaces is safe is that you have to have the proper permissions on
the unix domain socket inode that lives in the filesystem.  If you
want strict isolation you just don't create inodes where unfriendlys
can get at them, or with permissions that allow unfriendlys to open
them.  All nicely handled for us by the mount namespace and other
standard file system facilities.

I looked through unix domain sockets and they are a very controlled
environment so none of the work that goes on in dev_forward_skb to
make crossing namespaces safe appears needed, we are not loosing
controll of the skb and so do not need to set up the skb to look like
it is comming in fresh from the outside world.  Further the fields in
struct unix_skb_parms should not have any problems crossing network
namespaces.

Now that we handle SCM_CREDENTIALS in a way that gives useable values
across namespaces.  There does not appear to be any operational
problems with encouraging the use of unix domain sockets across
containers either.

Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
Acked-by: Daniel Lezcano <daniel.lezcano@free.fr>
Acked-by: Pavel Emelyanov <xemul@openvz.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/unix/af_unix.c | 7 ++-----
 1 file changed, 2 insertions(+), 5 deletions(-)

(limited to 'net')

diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index 5fe9d6fe08b8..75ba48b0d12a 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -282,7 +282,7 @@ static inline struct sock *unix_find_socket_byname(struct net *net,
 	return s;
 }
 
-static struct sock *unix_find_socket_byinode(struct net *net, struct inode *i)
+static struct sock *unix_find_socket_byinode(struct inode *i)
 {
 	struct sock *s;
 	struct hlist_node *node;
@@ -292,9 +292,6 @@ static struct sock *unix_find_socket_byinode(struct net *net, struct inode *i)
 		    &unix_socket_table[i->i_ino & (UNIX_HASH_SIZE - 1)]) {
 		struct dentry *dentry = unix_sk(s)->dentry;
 
-		if (!net_eq(sock_net(s), net))
-			continue;
-
 		if (dentry && dentry->d_inode == i) {
 			sock_hold(s);
 			goto found;
@@ -758,7 +755,7 @@ static struct sock *unix_find_other(struct net *net,
 		err = -ECONNREFUSED;
 		if (!S_ISSOCK(inode->i_mode))
 			goto put_fail;
-		u = unix_find_socket_byinode(net, inode);
+		u = unix_find_socket_byinode(inode);
 		if (!u)
 			goto put_fail;
 
-- 
cgit v1.2.2


From 3924773a5a82622167524bdd48799dc0452c57f8 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Wed, 16 Jun 2010 16:18:25 -0700
Subject: net: Export cred_to_ucred to modules.

AF_UNIX references this, and can be built as a module,
so...

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/sock.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'net')

diff --git a/net/core/sock.c b/net/core/sock.c
index 0229d5566a46..fef2434b7c8c 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -762,6 +762,7 @@ void cred_to_ucred(struct pid *pid, const struct cred *cred,
 		ucred->gid = user_ns_map_gid(current_ns, cred, cred->egid);
 	}
 }
+EXPORT_SYMBOL_GPL(cred_to_ucred);
 
 int sock_getsockopt(struct socket *sock, int level, int optname,
 		    char __user *optval, int __user *optlen)
-- 
cgit v1.2.2


From c68cd6cc21eb329c47ff020ff7412bf58176984e Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Thu, 17 Jun 2010 06:12:26 +0200
Subject: netfilter: nf_nat: support user-specified SNAT rules in LOCAL_IN

2.6.34 introduced 'conntrack zones' to deal with cases where packets
from multiple identical networks are handled by conntrack/NAT. Packets
are looped through veth devices, during which they are NATed to private
addresses, after which they can continue normally through the stack
and possibly have NAT rules applied a second time.

This works well, but is needlessly complicated for cases where only
a single SNAT/DNAT mapping needs to be applied to these packets. In that
case, all that needs to be done is to assign each network to a seperate
zone and perform NAT as usual. However this doesn't work for packets
destined for the machine performing NAT itself since its corrently not
possible to configure SNAT mappings for the LOCAL_IN chain.

This patch adds a new INPUT chain to the NAT table and changes the
targets performing SNAT to be usable in that chain.

Example usage with two identical networks (192.168.0.0/24) on eth0/eth1:

iptables -t raw -A PREROUTING -i eth0 -j CT --zone 1
iptables -t raw -A PREROUTING -i eth0 -j MARK --set-mark 1
iptables -t raw -A PREROUTING -i eth1 -j CT --zone 2
iptabels -t raw -A PREROUTING -i eth1 -j MARK --set-mark 2

iptables -t nat -A INPUT       -m mark --mark 1 -j NETMAP --to 10.0.0.0/24
iptables -t nat -A POSTROUTING -m mark --mark 1 -j NETMAP --to 10.0.0.0/24
iptables -t nat -A INPUT       -m mark --mark 2 -j NETMAP --to 10.0.1.0/24
iptables -t nat -A POSTROUTING -m mark --mark 2 -j NETMAP --to 10.0.1.0/24

iptables -t raw -A PREROUTING -d 10.0.0.0/24 -j CT --zone 1
iptables -t raw -A OUTPUT     -d 10.0.0.0/24 -j CT --zone 1
iptables -t raw -A PREROUTING -d 10.0.1.0/24 -j CT --zone 2
iptables -t raw -A OUTPUT     -d 10.0.1.0/24 -j CT --zone 2

iptables -t nat -A PREROUTING -d 10.0.0.0/24 -j NETMAP --to 192.168.0.0/24
iptables -t nat -A OUTPUT     -d 10.0.0.0/24 -j NETMAP --to 192.168.0.0/24
iptables -t nat -A PREROUTING -d 10.0.1.0/24 -j NETMAP --to 192.168.0.0/24
iptables -t nat -A OUTPUT     -d 10.0.1.0/24 -j NETMAP --to 192.168.0.0/24

Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 net/ipv4/netfilter/ipt_NETMAP.c        |  6 ++++--
 net/ipv4/netfilter/nf_nat_rule.c       | 10 ++++++----
 net/ipv4/netfilter/nf_nat_standalone.c |  8 +-------
 3 files changed, 11 insertions(+), 13 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/netfilter/ipt_NETMAP.c b/net/ipv4/netfilter/ipt_NETMAP.c
index f43867d1697f..6cdb298f1035 100644
--- a/net/ipv4/netfilter/ipt_NETMAP.c
+++ b/net/ipv4/netfilter/ipt_NETMAP.c
@@ -48,7 +48,8 @@ netmap_tg(struct sk_buff *skb, const struct xt_action_param *par)
 
 	NF_CT_ASSERT(par->hooknum == NF_INET_PRE_ROUTING ||
 		     par->hooknum == NF_INET_POST_ROUTING ||
-		     par->hooknum == NF_INET_LOCAL_OUT);
+		     par->hooknum == NF_INET_LOCAL_OUT ||
+		     par->hooknum == NF_INET_LOCAL_IN);
 	ct = nf_ct_get(skb, &ctinfo);
 
 	netmask = ~(mr->range[0].min_ip ^ mr->range[0].max_ip);
@@ -77,7 +78,8 @@ static struct xt_target netmap_tg_reg __read_mostly = {
 	.table		= "nat",
 	.hooks		= (1 << NF_INET_PRE_ROUTING) |
 			  (1 << NF_INET_POST_ROUTING) |
-			  (1 << NF_INET_LOCAL_OUT),
+			  (1 << NF_INET_LOCAL_OUT) |
+			  (1 << NF_INET_LOCAL_IN),
 	.checkentry 	= netmap_tg_check,
 	.me 		= THIS_MODULE
 };
diff --git a/net/ipv4/netfilter/nf_nat_rule.c b/net/ipv4/netfilter/nf_nat_rule.c
index 98ed78281aee..ebbd319f62f5 100644
--- a/net/ipv4/netfilter/nf_nat_rule.c
+++ b/net/ipv4/netfilter/nf_nat_rule.c
@@ -28,7 +28,8 @@
 
 #define NAT_VALID_HOOKS ((1 << NF_INET_PRE_ROUTING) | \
 			 (1 << NF_INET_POST_ROUTING) | \
-			 (1 << NF_INET_LOCAL_OUT))
+			 (1 << NF_INET_LOCAL_OUT) | \
+			 (1 << NF_INET_LOCAL_IN))
 
 static const struct xt_table nat_table = {
 	.name		= "nat",
@@ -45,7 +46,8 @@ ipt_snat_target(struct sk_buff *skb, const struct xt_action_param *par)
 	enum ip_conntrack_info ctinfo;
 	const struct nf_nat_multi_range_compat *mr = par->targinfo;
 
-	NF_CT_ASSERT(par->hooknum == NF_INET_POST_ROUTING);
+	NF_CT_ASSERT(par->hooknum == NF_INET_POST_ROUTING ||
+		     par->hooknum == NF_INET_LOCAL_IN);
 
 	ct = nf_ct_get(skb, &ctinfo);
 
@@ -99,7 +101,7 @@ static int ipt_dnat_checkentry(const struct xt_tgchk_param *par)
 	return 0;
 }
 
-unsigned int
+static unsigned int
 alloc_null_binding(struct nf_conn *ct, unsigned int hooknum)
 {
 	/* Force range to this IP; let proto decide mapping for
@@ -141,7 +143,7 @@ static struct xt_target ipt_snat_reg __read_mostly = {
 	.target		= ipt_snat_target,
 	.targetsize	= sizeof(struct nf_nat_multi_range_compat),
 	.table		= "nat",
-	.hooks		= 1 << NF_INET_POST_ROUTING,
+	.hooks		= (1 << NF_INET_POST_ROUTING) | (1 << NF_INET_LOCAL_IN),
 	.checkentry	= ipt_snat_checkentry,
 	.family		= AF_INET,
 };
diff --git a/net/ipv4/netfilter/nf_nat_standalone.c b/net/ipv4/netfilter/nf_nat_standalone.c
index 6723c682250d..95481fee8bdb 100644
--- a/net/ipv4/netfilter/nf_nat_standalone.c
+++ b/net/ipv4/netfilter/nf_nat_standalone.c
@@ -131,13 +131,7 @@ nf_nat_fn(unsigned int hooknum,
 		if (!nf_nat_initialized(ct, maniptype)) {
 			unsigned int ret;
 
-			if (hooknum == NF_INET_LOCAL_IN)
-				/* LOCAL_IN hook doesn't have a chain!  */
-				ret = alloc_null_binding(ct, hooknum);
-			else
-				ret = nf_nat_rule_find(skb, hooknum, in, out,
-						       ct);
-
+			ret = nf_nat_rule_find(skb, hooknum, in, out, ct);
 			if (ret != NF_ACCEPT)
 				return ret;
 		} else
-- 
cgit v1.2.2


From 25442e06d20aaba7d7b16438078a562b3e4cf19b Mon Sep 17 00:00:00 2001
From: stephen hemminger <shemminger@vyatta.com>
Date: Tue, 15 Jun 2010 06:14:12 +0000
Subject: bridge: fdb cleanup runs too often

It is common in end-node, non STP bridges to set forwarding
delay to zero; which causes the forwarding database cleanup
to run every clock tick. Change to run only as soon as needed
or at next ageing timer interval which ever is sooner.

Use round_jiffies_up macro rather than attempting round up
by changing value.

Signed-off-by: Stephen Hemminger <shemminger@vyatta.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/bridge/br_fdb.c | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

(limited to 'net')

diff --git a/net/bridge/br_fdb.c b/net/bridge/br_fdb.c
index 26637439965b..b01dde35a69e 100644
--- a/net/bridge/br_fdb.c
+++ b/net/bridge/br_fdb.c
@@ -128,7 +128,7 @@ void br_fdb_cleanup(unsigned long _data)
 {
 	struct net_bridge *br = (struct net_bridge *)_data;
 	unsigned long delay = hold_time(br);
-	unsigned long next_timer = jiffies + br->forward_delay;
+	unsigned long next_timer = jiffies + br->ageing_time;
 	int i;
 
 	spin_lock_bh(&br->hash_lock);
@@ -149,9 +149,7 @@ void br_fdb_cleanup(unsigned long _data)
 	}
 	spin_unlock_bh(&br->hash_lock);
 
-	/* Add HZ/4 to ensure we round the jiffies upwards to be after the next
-	 * timer, otherwise we might round down and will have no-op run. */
-	mod_timer(&br->gc_timer, round_jiffies(next_timer + HZ/4));
+	mod_timer(&br->gc_timer, round_jiffies_up(next_timer));
 }
 
 /* Completely flush all dynamic entries in forwarding database.*/
-- 
cgit v1.2.2


From f884e3879b7a098565e963e771cb342b67b1f5cd Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= <u.kleine-koenig@pengutronix.de>
Date: Fri, 18 Jun 2010 09:38:54 +0200
Subject: cfg80211: move cfg80211_exit to .exit.text
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

cfg80211_exit is only used as module_exit function, so it can go to
.exit.text saving a few bytes when CONFIG_CFG80211=y.

Signed-off-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/wireless/core.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/wireless/core.c b/net/wireless/core.c
index 37d0e0ab4432..47fcfd0eebc2 100644
--- a/net/wireless/core.c
+++ b/net/wireless/core.c
@@ -894,7 +894,7 @@ out_fail_pernet:
 }
 subsys_initcall(cfg80211_init);
 
-static void cfg80211_exit(void)
+static void __exit cfg80211_exit(void)
 {
 	debugfs_remove(ieee80211_debugfs_dir);
 	nl80211_exit();
-- 
cgit v1.2.2


From 2fcc9f731b598a6c70dca277776c66c9cf9d7a3b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= <u.kleine-koenig@pengutronix.de>
Date: Fri, 18 Jun 2010 09:38:55 +0200
Subject: wireless: move regulatory_init to .init.text
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

regulatory_init is only called by cfg80211_init which is in .init.text,
too.

Signed-off-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/wireless/reg.c | 4 ++--
 net/wireless/reg.h | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

(limited to 'net')

diff --git a/net/wireless/reg.c b/net/wireless/reg.c
index 8f0d97dd3109..994c7c5f071b 100644
--- a/net/wireless/reg.c
+++ b/net/wireless/reg.c
@@ -2630,7 +2630,7 @@ out:
 	mutex_unlock(&reg_mutex);
 }
 
-int regulatory_init(void)
+int __init regulatory_init(void)
 {
 	int err = 0;
 
@@ -2676,7 +2676,7 @@ int regulatory_init(void)
 	return 0;
 }
 
-void regulatory_exit(void)
+void /* __init_or_exit */ regulatory_exit(void)
 {
 	struct regulatory_request *reg_request, *tmp;
 	struct reg_beacon *reg_beacon, *btmp;
diff --git a/net/wireless/reg.h b/net/wireless/reg.h
index b26224a9f3bc..c4695d07af23 100644
--- a/net/wireless/reg.h
+++ b/net/wireless/reg.h
@@ -10,7 +10,7 @@ int regulatory_hint_user(const char *alpha2);
 
 void reg_device_remove(struct wiphy *wiphy);
 
-int regulatory_init(void);
+int __init regulatory_init(void);
 void regulatory_exit(void);
 
 int set_regdom(const struct ieee80211_regdomain *rd);
-- 
cgit v1.2.2


From 543708be320d7df692d24b349ca01a947b340764 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Fri, 18 Jun 2010 12:32:20 +0200
Subject: mac80211: fix sw scan bracketing

Currently, detection in hwsim and ath9k can
detect that two sw scans are in flight at the
same time, which isn't really true. It is
caused by a race condition, because the scan
complete callback is called too late, after
the lock has been dropped, so that a new scan
can be started before it is called.

It is also called too early semantically, as
it is currently called _after_ the return to
the operating channel -- it should be before
so that drivers know this is the operating
channel again.

Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/scan.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/scan.c b/net/mac80211/scan.c
index e1b0be7a57b9..36eb6dd7c75e 100644
--- a/net/mac80211/scan.c
+++ b/net/mac80211/scan.c
@@ -286,6 +286,8 @@ void ieee80211_scan_completed(struct ieee80211_hw *hw, bool aborted)
 	local->scanning = 0;
 	local->scan_channel = NULL;
 
+	drv_sw_scan_complete(local);
+
 	/* we only have to protect scan_req and hw/sw scan */
 	mutex_unlock(&local->scan_mtx);
 
@@ -295,8 +297,6 @@ void ieee80211_scan_completed(struct ieee80211_hw *hw, bool aborted)
 
 	ieee80211_configure_filter(local);
 
-	drv_sw_scan_complete(local);
-
 	ieee80211_offchannel_return(local, true);
 
  done:
-- 
cgit v1.2.2


From b1c74247b9e29ae3bfdf133862328c309bc9cf14 Mon Sep 17 00:00:00 2001
From: Sjur Braendeland <sjur.brandeland@stericsson.com>
Date: Thu, 17 Jun 2010 06:55:38 +0000
Subject: caif: Bugfix not all services uses flow-ctrl.

Flow control is not used by all CAIF services.
The usage of flow control is now part of the gerneal
initialization function for CAIF Services.

Signed-off-by: Sjur Braendeland@stericsson.com
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/caif/cfctrl.c  |  2 +-
 net/caif/cfdbgl.c  |  2 +-
 net/caif/cfdgml.c  |  2 +-
 net/caif/cfrfml.c  | 12 ++++--------
 net/caif/cfsrvl.c  | 14 ++++++++++++--
 net/caif/cfutill.c |  2 +-
 net/caif/cfveil.c  |  2 +-
 net/caif/cfvidl.c  |  2 +-
 8 files changed, 22 insertions(+), 16 deletions(-)

(limited to 'net')

diff --git a/net/caif/cfctrl.c b/net/caif/cfctrl.c
index fcfda98a5e6d..107c4b2a311e 100644
--- a/net/caif/cfctrl.c
+++ b/net/caif/cfctrl.c
@@ -43,7 +43,7 @@ struct cflayer *cfctrl_create(void)
 	memset(&dev_info, 0, sizeof(dev_info));
 	dev_info.id = 0xff;
 	memset(this, 0, sizeof(*this));
-	cfsrvl_init(&this->serv, 0, &dev_info);
+	cfsrvl_init(&this->serv, 0, &dev_info, false);
 	atomic_set(&this->req_seq_no, 1);
 	atomic_set(&this->rsp_seq_no, 1);
 	this->serv.layer.receive = cfctrl_recv;
diff --git a/net/caif/cfdbgl.c b/net/caif/cfdbgl.c
index ab6b6dc34cf8..676648cac8dd 100644
--- a/net/caif/cfdbgl.c
+++ b/net/caif/cfdbgl.c
@@ -22,7 +22,7 @@ struct cflayer *cfdbgl_create(u8 channel_id, struct dev_info *dev_info)
 	}
 	caif_assert(offsetof(struct cfsrvl, layer) == 0);
 	memset(dbg, 0, sizeof(struct cfsrvl));
-	cfsrvl_init(dbg, channel_id, dev_info);
+	cfsrvl_init(dbg, channel_id, dev_info, false);
 	dbg->layer.receive = cfdbgl_receive;
 	dbg->layer.transmit = cfdbgl_transmit;
 	snprintf(dbg->layer.name, CAIF_LAYER_NAME_SZ - 1, "dbg%d", channel_id);
diff --git a/net/caif/cfdgml.c b/net/caif/cfdgml.c
index 53194840ecb6..32d9f0dc8463 100644
--- a/net/caif/cfdgml.c
+++ b/net/caif/cfdgml.c
@@ -30,7 +30,7 @@ struct cflayer *cfdgml_create(u8 channel_id, struct dev_info *dev_info)
 	}
 	caif_assert(offsetof(struct cfsrvl, layer) == 0);
 	memset(dgm, 0, sizeof(struct cfsrvl));
-	cfsrvl_init(dgm, channel_id, dev_info);
+	cfsrvl_init(dgm, channel_id, dev_info, true);
 	dgm->layer.receive = cfdgml_receive;
 	dgm->layer.transmit = cfdgml_transmit;
 	snprintf(dgm->layer.name, CAIF_LAYER_NAME_SZ - 1, "dgm%d", channel_id);
diff --git a/net/caif/cfrfml.c b/net/caif/cfrfml.c
index fd27b172fb5d..689cbfd0e43d 100644
--- a/net/caif/cfrfml.c
+++ b/net/caif/cfrfml.c
@@ -23,30 +23,26 @@
 
 static int cfrfml_receive(struct cflayer *layr, struct cfpkt *pkt);
 static int cfrfml_transmit(struct cflayer *layr, struct cfpkt *pkt);
-static int cfservl_modemcmd(struct cflayer *layr, enum caif_modemcmd ctrl);
 
 struct cflayer *cfrfml_create(u8 channel_id, struct dev_info *dev_info)
 {
 	struct cfsrvl *rfm = kmalloc(sizeof(struct cfsrvl), GFP_ATOMIC);
+
 	if (!rfm) {
 		pr_warning("CAIF: %s(): Out of memory\n", __func__);
 		return NULL;
 	}
+
 	caif_assert(offsetof(struct cfsrvl, layer) == 0);
+
 	memset(rfm, 0, sizeof(struct cfsrvl));
-	cfsrvl_init(rfm, channel_id, dev_info);
-	rfm->layer.modemcmd = cfservl_modemcmd;
+	cfsrvl_init(rfm, channel_id, dev_info, false);
 	rfm->layer.receive = cfrfml_receive;
 	rfm->layer.transmit = cfrfml_transmit;
 	snprintf(rfm->layer.name, CAIF_LAYER_NAME_SZ, "rfm%d", channel_id);
 	return &rfm->layer;
 }
 
-static int cfservl_modemcmd(struct cflayer *layr, enum caif_modemcmd ctrl)
-{
-       return -EPROTO;
-}
-
 static int cfrfml_receive(struct cflayer *layr, struct cfpkt *pkt)
 {
 	u8 tmp;
diff --git a/net/caif/cfsrvl.c b/net/caif/cfsrvl.c
index 6e5b7079a684..7aa1f03a0151 100644
--- a/net/caif/cfsrvl.c
+++ b/net/caif/cfsrvl.c
@@ -24,8 +24,10 @@ static void cfservl_ctrlcmd(struct cflayer *layr, enum caif_ctrlcmd ctrl,
 				int phyid)
 {
 	struct cfsrvl *service = container_obj(layr);
+
 	caif_assert(layr->up != NULL);
 	caif_assert(layr->up->ctrlcmd != NULL);
+
 	switch (ctrl) {
 	case CAIF_CTRLCMD_INIT_RSP:
 		service->open = true;
@@ -89,9 +91,14 @@ static void cfservl_ctrlcmd(struct cflayer *layr, enum caif_ctrlcmd ctrl,
 static int cfservl_modemcmd(struct cflayer *layr, enum caif_modemcmd ctrl)
 {
 	struct cfsrvl *service = container_obj(layr);
+
 	caif_assert(layr != NULL);
 	caif_assert(layr->dn != NULL);
 	caif_assert(layr->dn->transmit != NULL);
+
+	if (!service->supports_flowctrl)
+		return 0;
+
 	switch (ctrl) {
 	case CAIF_MODEMCMD_FLOW_ON_REQ:
 		{
@@ -153,8 +160,10 @@ void cfservl_destroy(struct cflayer *layer)
 }
 
 void cfsrvl_init(struct cfsrvl *service,
-		 u8 channel_id,
-		 struct dev_info *dev_info)
+			u8 channel_id,
+			struct dev_info *dev_info,
+			bool supports_flowctrl
+			)
 {
 	caif_assert(offsetof(struct cfsrvl, layer) == 0);
 	service->open = false;
@@ -164,6 +173,7 @@ void cfsrvl_init(struct cfsrvl *service,
 	service->layer.ctrlcmd = cfservl_ctrlcmd;
 	service->layer.modemcmd = cfservl_modemcmd;
 	service->dev_info = *dev_info;
+	service->supports_flowctrl = supports_flowctrl;
 	kref_init(&service->ref);
 }
 
diff --git a/net/caif/cfutill.c b/net/caif/cfutill.c
index 5fd2c9ea8b42..ce525cac9064 100644
--- a/net/caif/cfutill.c
+++ b/net/caif/cfutill.c
@@ -31,7 +31,7 @@ struct cflayer *cfutill_create(u8 channel_id, struct dev_info *dev_info)
 	}
 	caif_assert(offsetof(struct cfsrvl, layer) == 0);
 	memset(util, 0, sizeof(struct cfsrvl));
-	cfsrvl_init(util, channel_id, dev_info);
+	cfsrvl_init(util, channel_id, dev_info, true);
 	util->layer.receive = cfutill_receive;
 	util->layer.transmit = cfutill_transmit;
 	snprintf(util->layer.name, CAIF_LAYER_NAME_SZ - 1, "util1");
diff --git a/net/caif/cfveil.c b/net/caif/cfveil.c
index e04f7d964e83..637cb0eee13c 100644
--- a/net/caif/cfveil.c
+++ b/net/caif/cfveil.c
@@ -30,7 +30,7 @@ struct cflayer *cfvei_create(u8 channel_id, struct dev_info *dev_info)
 	}
 	caif_assert(offsetof(struct cfsrvl, layer) == 0);
 	memset(vei, 0, sizeof(struct cfsrvl));
-	cfsrvl_init(vei, channel_id, dev_info);
+	cfsrvl_init(vei, channel_id, dev_info, true);
 	vei->layer.receive = cfvei_receive;
 	vei->layer.transmit = cfvei_transmit;
 	snprintf(vei->layer.name, CAIF_LAYER_NAME_SZ - 1, "vei%d", channel_id);
diff --git a/net/caif/cfvidl.c b/net/caif/cfvidl.c
index 89ad4ea239f1..ada6ee2d48f5 100644
--- a/net/caif/cfvidl.c
+++ b/net/caif/cfvidl.c
@@ -27,7 +27,7 @@ struct cflayer *cfvidl_create(u8 channel_id, struct dev_info *dev_info)
 	caif_assert(offsetof(struct cfsrvl, layer) == 0);
 
 	memset(vid, 0, sizeof(struct cfsrvl));
-	cfsrvl_init(vid, channel_id, dev_info);
+	cfsrvl_init(vid, channel_id, dev_info, false);
 	vid->layer.receive = cfvidl_receive;
 	vid->layer.transmit = cfvidl_transmit;
 	snprintf(vid->layer.name, CAIF_LAYER_NAME_SZ - 1, "vid1");
-- 
cgit v1.2.2


From a7da1f55a826c621251874e7684c234972fc3216 Mon Sep 17 00:00:00 2001
From: Sjur Braendeland <sjur.brandeland@stericsson.com>
Date: Thu, 17 Jun 2010 06:55:39 +0000
Subject: caif: Bugfix - RFM must support segmentation.

CAIF Remote File Manager may send or receive more than 4050 bytes.
Due to this The CAIF RFM service have to support segmentation.

Signed-off-by: Sjur Braendeland@stericsson.com
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/caif/caif_socket.c  |   4 -
 net/caif/cfcnfg.c       |   4 +-
 net/caif/cfpkt_skbuff.c |   1 -
 net/caif/cfrfml.c       | 314 +++++++++++++++++++++++++++++++++++++++---------
 net/caif/cfsrvl.c       |  13 +-
 5 files changed, 271 insertions(+), 65 deletions(-)

(limited to 'net')

diff --git a/net/caif/caif_socket.c b/net/caif/caif_socket.c
index 791249316ef3..848ae755cdd6 100644
--- a/net/caif/caif_socket.c
+++ b/net/caif/caif_socket.c
@@ -596,10 +596,6 @@ static int caif_seqpkt_sendmsg(struct kiocb *kiocb, struct socket *sock,
 
 	buffer_size = len + CAIF_NEEDED_HEADROOM + CAIF_NEEDED_TAILROOM;
 
-	ret = -EMSGSIZE;
-	if (buffer_size > CAIF_MAX_PAYLOAD_SIZE)
-		goto err;
-
 	timeo = sock_sndtimeo(sk, noblock);
 	timeo = caif_wait_for_flow_on(container_of(sk, struct caifsock, sk),
 				1, timeo, &ret);
diff --git a/net/caif/cfcnfg.c b/net/caif/cfcnfg.c
index 7c81974a45c4..cff2dcb9efe4 100644
--- a/net/caif/cfcnfg.c
+++ b/net/caif/cfcnfg.c
@@ -22,6 +22,7 @@
 #define PHY_NAME_LEN 20
 
 #define container_obj(layr) container_of(layr, struct cfcnfg, layer)
+#define RFM_FRAGMENT_SIZE 4030
 
 /* Information about CAIF physical interfaces held by Config Module in order
  * to manage physical interfaces
@@ -328,7 +329,8 @@ cfcnfg_linkup_rsp(struct cflayer *layer, u8 channel_id, enum cfctrl_srv serv,
 		servicel = cfdgml_create(channel_id, &phyinfo->dev_info);
 		break;
 	case CFCTRL_SRV_RFM:
-		servicel = cfrfml_create(channel_id, &phyinfo->dev_info);
+		servicel = cfrfml_create(channel_id, &phyinfo->dev_info,
+						RFM_FRAGMENT_SIZE);
 		break;
 	case CFCTRL_SRV_UTIL:
 		servicel = cfutill_create(channel_id, &phyinfo->dev_info);
diff --git a/net/caif/cfpkt_skbuff.c b/net/caif/cfpkt_skbuff.c
index a6fdf899741a..318b0f4b416e 100644
--- a/net/caif/cfpkt_skbuff.c
+++ b/net/caif/cfpkt_skbuff.c
@@ -338,7 +338,6 @@ struct cfpkt *cfpkt_append(struct cfpkt *dstpkt,
 	u16 dstlen;
 	u16 createlen;
 	if (unlikely(is_erronous(dstpkt) || is_erronous(addpkt))) {
-		cfpkt_destroy(addpkt);
 		return dstpkt;
 	}
 	if (expectlen > addlen)
diff --git a/net/caif/cfrfml.c b/net/caif/cfrfml.c
index 689cbfd0e43d..4b04d25b6a3f 100644
--- a/net/caif/cfrfml.c
+++ b/net/caif/cfrfml.c
@@ -7,98 +7,304 @@
 #include <linux/stddef.h>
 #include <linux/spinlock.h>
 #include <linux/slab.h>
+#include <linux/unaligned/le_byteshift.h>
 #include <net/caif/caif_layer.h>
 #include <net/caif/cfsrvl.h>
 #include <net/caif/cfpkt.h>
 
-#define container_obj(layr) container_of(layr, struct cfsrvl, layer)
-
+#define container_obj(layr) container_of(layr, struct cfrfml, serv.layer)
 #define RFM_SEGMENTATION_BIT 0x01
-#define RFM_PAYLOAD  0x00
-#define RFM_CMD_BIT  0x80
-#define RFM_FLOW_OFF 0x81
-#define RFM_FLOW_ON  0x80
-#define RFM_SET_PIN  0x82
-#define RFM_CTRL_PKT_SIZE 1
+#define RFM_HEAD_SIZE 7
 
 static int cfrfml_receive(struct cflayer *layr, struct cfpkt *pkt);
 static int cfrfml_transmit(struct cflayer *layr, struct cfpkt *pkt);
 
-struct cflayer *cfrfml_create(u8 channel_id, struct dev_info *dev_info)
+struct cfrfml {
+	struct cfsrvl serv;
+	struct cfpkt *incomplete_frm;
+	int fragment_size;
+	u8  seghead[6];
+	u16 pdu_size;
+	/* Protects serialized processing of packets */
+	spinlock_t sync;
+};
+
+static void cfrfml_release(struct kref *kref)
+{
+	struct cfsrvl *srvl = container_of(kref, struct cfsrvl, ref);
+	struct cfrfml *rfml = container_obj(&srvl->layer);
+
+	if (rfml->incomplete_frm)
+		cfpkt_destroy(rfml->incomplete_frm);
+
+	kfree(srvl);
+}
+
+struct cflayer *cfrfml_create(u8 channel_id, struct dev_info *dev_info,
+					int mtu_size)
 {
-	struct cfsrvl *rfm = kmalloc(sizeof(struct cfsrvl), GFP_ATOMIC);
+	int tmp;
+	struct cfrfml *this =
+		kzalloc(sizeof(struct cfrfml), GFP_ATOMIC);
 
-	if (!rfm) {
+	if (!this) {
 		pr_warning("CAIF: %s(): Out of memory\n", __func__);
 		return NULL;
 	}
 
-	caif_assert(offsetof(struct cfsrvl, layer) == 0);
+	cfsrvl_init(&this->serv, channel_id, dev_info, false);
+	this->serv.release = cfrfml_release;
+	this->serv.layer.receive = cfrfml_receive;
+	this->serv.layer.transmit = cfrfml_transmit;
+
+	/* Round down to closest multiple of 16 */
+	tmp = (mtu_size - RFM_HEAD_SIZE - 6) / 16;
+	tmp *= 16;
+
+	this->fragment_size = tmp;
+	spin_lock_init(&this->sync);
+	snprintf(this->serv.layer.name, CAIF_LAYER_NAME_SZ,
+		"rfm%d", channel_id);
+
+	return &this->serv.layer;
+}
+
+static struct cfpkt *rfm_append(struct cfrfml *rfml, char *seghead,
+			struct cfpkt *pkt, int *err)
+{
+	struct cfpkt *tmppkt;
+	*err = -EPROTO;
+	/* n-th but not last segment */
+
+	if (cfpkt_extr_head(pkt, seghead, 6) < 0)
+		return NULL;
+
+	/* Verify correct header */
+	if (memcmp(seghead, rfml->seghead, 6) != 0)
+		return NULL;
+
+	tmppkt = cfpkt_append(rfml->incomplete_frm, pkt,
+			rfml->pdu_size + RFM_HEAD_SIZE);
+
+	/* If cfpkt_append failes input pkts are not freed */
+	*err = -ENOMEM;
+	if (tmppkt == NULL)
+		return NULL;
 
-	memset(rfm, 0, sizeof(struct cfsrvl));
-	cfsrvl_init(rfm, channel_id, dev_info, false);
-	rfm->layer.receive = cfrfml_receive;
-	rfm->layer.transmit = cfrfml_transmit;
-	snprintf(rfm->layer.name, CAIF_LAYER_NAME_SZ, "rfm%d", channel_id);
-	return &rfm->layer;
+	*err = 0;
+	return tmppkt;
 }
 
 static int cfrfml_receive(struct cflayer *layr, struct cfpkt *pkt)
 {
 	u8 tmp;
 	bool segmented;
-	int ret;
+	int err;
+	u8 seghead[6];
+	struct cfrfml *rfml;
+	struct cfpkt *tmppkt = NULL;
+
 	caif_assert(layr->up != NULL);
 	caif_assert(layr->receive != NULL);
+	rfml = container_obj(layr);
+	spin_lock(&rfml->sync);
+
+	err = -EPROTO;
+	if (cfpkt_extr_head(pkt, &tmp, 1) < 0)
+		goto out;
+	segmented = tmp & RFM_SEGMENTATION_BIT;
+
+	if (segmented) {
+		if (rfml->incomplete_frm == NULL) {
+			/* Initial Segment */
+			if (cfpkt_peek_head(pkt, rfml->seghead, 6) < 0)
+				goto out;
+
+			rfml->pdu_size = get_unaligned_le16(rfml->seghead+4);
+
+			if (cfpkt_erroneous(pkt))
+				goto out;
+			rfml->incomplete_frm = pkt;
+			pkt = NULL;
+		} else {
+
+			tmppkt = rfm_append(rfml, seghead, pkt, &err);
+			if (tmppkt == NULL)
+				goto out;
+
+			if (cfpkt_erroneous(tmppkt))
+				goto out;
+
+			rfml->incomplete_frm = tmppkt;
+
+
+			if (cfpkt_erroneous(tmppkt))
+				goto out;
+		}
+		err = 0;
+		goto out;
+	}
+
+	if (rfml->incomplete_frm) {
+
+		/* Last Segment */
+		tmppkt = rfm_append(rfml, seghead, pkt, &err);
+		if (tmppkt == NULL)
+			goto out;
+
+		if (cfpkt_erroneous(tmppkt))
+			goto out;
+
+		rfml->incomplete_frm = NULL;
+		pkt = tmppkt;
+		tmppkt = NULL;
+
+		/* Verify that length is correct */
+		err = EPROTO;
+		if (rfml->pdu_size != cfpkt_getlen(pkt) - RFM_HEAD_SIZE + 1)
+			goto out;
+	}
+
+	err = rfml->serv.layer.up->receive(rfml->serv.layer.up, pkt);
+
+out:
+
+	if (err != 0) {
+		if (tmppkt)
+			cfpkt_destroy(tmppkt);
+		if (pkt)
+			cfpkt_destroy(pkt);
+		if (rfml->incomplete_frm)
+			cfpkt_destroy(rfml->incomplete_frm);
+		rfml->incomplete_frm = NULL;
+
+		pr_info("CAIF: %s(): "
+				"Connection error %d triggered on RFM link\n",
+				__func__, err);
+
+		/* Trigger connection error upon failure.*/
+		layr->up->ctrlcmd(layr->up, CAIF_CTRLCMD_REMOTE_SHUTDOWN_IND,
+					rfml->serv.dev_info.id);
+	}
+	spin_unlock(&rfml->sync);
+	return err;
+}
+
+
+static int cfrfml_transmit_segment(struct cfrfml *rfml, struct cfpkt *pkt)
+{
+	caif_assert(!cfpkt_getlen(pkt) < rfml->fragment_size);
+
+	/* Add info for MUX-layer to route the packet out. */
+	cfpkt_info(pkt)->channel_id = rfml->serv.layer.id;
 
 	/*
-	 * RFM is taking care of segmentation and stripping of
-	 * segmentation bit.
+	 * To optimize alignment, we add up the size of CAIF header before
+	 * payload.
 	 */
-	if (cfpkt_extr_head(pkt, &tmp, 1) < 0) {
-		pr_err("CAIF: %s(): Packet is erroneous!\n", __func__);
-		cfpkt_destroy(pkt);
-		return -EPROTO;
-	}
-	segmented = tmp & RFM_SEGMENTATION_BIT;
-	caif_assert(!segmented);
+	cfpkt_info(pkt)->hdr_len = RFM_HEAD_SIZE;
+	cfpkt_info(pkt)->dev_info = &rfml->serv.dev_info;
 
-	ret = layr->up->receive(layr->up, pkt);
-	return ret;
+	return rfml->serv.layer.dn->transmit(rfml->serv.layer.dn, pkt);
 }
 
 static int cfrfml_transmit(struct cflayer *layr, struct cfpkt *pkt)
 {
-	u8 tmp = 0;
-	int ret;
-	struct cfsrvl *service = container_obj(layr);
+	int err;
+	u8 seg;
+	u8 head[6];
+	struct cfpkt *rearpkt = NULL;
+	struct cfpkt *frontpkt = pkt;
+	struct cfrfml *rfml = container_obj(layr);
 
 	caif_assert(layr->dn != NULL);
 	caif_assert(layr->dn->transmit != NULL);
 
-	if (!cfsrvl_ready(service, &ret))
-		return ret;
+	if (!cfsrvl_ready(&rfml->serv, &err))
+		return err;
+
+	err = -EPROTO;
+	if (cfpkt_getlen(pkt) <= RFM_HEAD_SIZE-1)
+		goto out;
+
+	err = 0;
+	if (cfpkt_getlen(pkt) > rfml->fragment_size + RFM_HEAD_SIZE)
+		err = cfpkt_peek_head(pkt, head, 6);
+
+	if (err < 0)
+		goto out;
+
+	while (cfpkt_getlen(frontpkt) > rfml->fragment_size + RFM_HEAD_SIZE) {
+
+		seg = 1;
+		err = -EPROTO;
+
+		if (cfpkt_add_head(frontpkt, &seg, 1) < 0)
+			goto out;
+		/*
+		 * On OOM error cfpkt_split returns NULL.
+		 *
+		 * NOTE: Segmented pdu is not correctly aligned.
+		 * This has negative performance impact.
+		 */
+
+		rearpkt = cfpkt_split(frontpkt, rfml->fragment_size);
+		if (rearpkt == NULL)
+			goto out;
+
+		err = cfrfml_transmit_segment(rfml, frontpkt);
+
+		if (err != 0)
+			goto out;
+		frontpkt = rearpkt;
+		rearpkt = NULL;
+
+		err = -ENOMEM;
+		if (frontpkt == NULL)
+			goto out;
+		err = -EPROTO;
+		if (cfpkt_add_head(frontpkt, head, 6) < 0)
+			goto out;
 
-	if (cfpkt_getlen(pkt) > CAIF_MAX_PAYLOAD_SIZE) {
-		pr_err("CAIF: %s():Packet too large - size=%d\n",
-			__func__, cfpkt_getlen(pkt));
-		return -EOVERFLOW;
 	}
-	if (cfpkt_add_head(pkt, &tmp, 1) < 0) {
-		pr_err("CAIF: %s(): Packet is erroneous!\n", __func__);
-		return -EPROTO;
+
+	seg = 0;
+	err = -EPROTO;
+
+	if (cfpkt_add_head(frontpkt, &seg, 1) < 0)
+		goto out;
+
+	err = cfrfml_transmit_segment(rfml, frontpkt);
+
+	frontpkt = NULL;
+out:
+
+	if (err != 0) {
+		pr_info("CAIF: %s(): "
+				"Connection error %d triggered on RFM link\n",
+				__func__, err);
+		/* Trigger connection error upon failure.*/
+
+		layr->up->ctrlcmd(layr->up, CAIF_CTRLCMD_REMOTE_SHUTDOWN_IND,
+					rfml->serv.dev_info.id);
+
+		if (rearpkt)
+			cfpkt_destroy(rearpkt);
+
+		if (frontpkt && frontpkt != pkt) {
+
+			cfpkt_destroy(frontpkt);
+			/*
+			 * Socket layer will free the original packet,
+			 * but this packet may already be sent and
+			 * freed. So we have to return 0 in this case
+			 * to avoid socket layer to re-free this packet.
+			 * The return of shutdown indication will
+			 * cause connection to be invalidated anyhow.
+			 */
+			err = 0;
+		}
 	}
 
-	/* Add info for MUX-layer to route the packet out. */
-	cfpkt_info(pkt)->channel_id = service->layer.id;
-	/*
-	 * To optimize alignment, we add up the size of CAIF header before
-	 * payload.
-	 */
-	cfpkt_info(pkt)->hdr_len = 1;
-	cfpkt_info(pkt)->dev_info = &service->dev_info;
-	ret = layr->dn->transmit(layr->dn, pkt);
-	if (ret < 0)
-		cfpkt_extr_head(pkt, &tmp, 1);
-	return ret;
+	return err;
 }
diff --git a/net/caif/cfsrvl.c b/net/caif/cfsrvl.c
index 7aa1f03a0151..4c9f147c38af 100644
--- a/net/caif/cfsrvl.c
+++ b/net/caif/cfsrvl.c
@@ -159,6 +159,13 @@ void cfservl_destroy(struct cflayer *layer)
 	kfree(layer);
 }
 
+void cfsrvl_release(struct kref *kref)
+{
+	struct cfsrvl *service = container_of(kref, struct cfsrvl, ref);
+	pr_info("CAIF: %s(): enter\n", __func__);
+	kfree(service);
+}
+
 void cfsrvl_init(struct cfsrvl *service,
 			u8 channel_id,
 			struct dev_info *dev_info,
@@ -174,14 +181,10 @@ void cfsrvl_init(struct cfsrvl *service,
 	service->layer.modemcmd = cfservl_modemcmd;
 	service->dev_info = *dev_info;
 	service->supports_flowctrl = supports_flowctrl;
+	service->release = cfsrvl_release;
 	kref_init(&service->ref);
 }
 
-void cfsrvl_release(struct kref *kref)
-{
-	struct cfsrvl *service = container_of(kref, struct cfsrvl, ref);
-	kfree(service);
-}
 
 bool cfsrvl_ready(struct cfsrvl *service, int *err)
 {
-- 
cgit v1.2.2


From 2aa40aef9debc77d55cc87a50d335b6fe97fbeb0 Mon Sep 17 00:00:00 2001
From: Sjur Braendeland <sjur.brandeland@stericsson.com>
Date: Thu, 17 Jun 2010 06:55:40 +0000
Subject: caif: Use link layer MTU instead of fixed MTU

Previously CAIF supported maximum transfer size of ~4050.
The transfer size is now calculated dynamically based on the
link layers mtu size.

Signed-off-by: Sjur Braendeland@stericsson.com
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/caif/caif_dev.c     | 12 ++++++---
 net/caif/caif_socket.c  | 54 ++++++++++++++++++++++++++-------------
 net/caif/cfcnfg.c       | 44 +++++++++++++++++++++++++++++---
 net/caif/cfctrl.c       |  6 ++---
 net/caif/cfdgml.c       |  5 ++++
 net/caif/cfpkt_skbuff.c |  4 +--
 net/caif/cfserl.c       |  7 +++---
 net/caif/cfsrvl.c       |  1 -
 net/caif/cfutill.c      |  6 -----
 net/caif/cfveil.c       |  5 ----
 net/caif/chnl_net.c     | 67 +++++++++++++++++++++++++++++++++++++++++--------
 11 files changed, 155 insertions(+), 56 deletions(-)

(limited to 'net')

diff --git a/net/caif/caif_dev.c b/net/caif/caif_dev.c
index e2b86f1f5a47..0b586e9d1378 100644
--- a/net/caif/caif_dev.c
+++ b/net/caif/caif_dev.c
@@ -255,7 +255,7 @@ static int caif_device_notify(struct notifier_block *me, unsigned long what,
 			pref = CFPHYPREF_HIGH_BW;
 			break;
 		}
-
+		dev_hold(dev);
 		cfcnfg_add_phy_layer(get_caif_conf(),
 				     phy_type,
 				     dev,
@@ -285,6 +285,7 @@ static int caif_device_notify(struct notifier_block *me, unsigned long what,
 		caifd->layer.up->ctrlcmd(caifd->layer.up,
 					 _CAIF_CTRLCMD_PHYIF_DOWN_IND,
 					 caifd->layer.id);
+		might_sleep();
 		res = wait_event_interruptible_timeout(caifd->event,
 					atomic_read(&caifd->in_use) == 0,
 					TIMEOUT);
@@ -300,6 +301,7 @@ static int caif_device_notify(struct notifier_block *me, unsigned long what,
 				   "Unregistering an active CAIF device: %s\n",
 				   __func__, dev->name);
 		cfcnfg_del_phy_layer(get_caif_conf(), &caifd->layer);
+		dev_put(dev);
 		atomic_set(&caifd->state, what);
 		break;
 
@@ -326,7 +328,8 @@ struct cfcnfg *get_caif_conf(void)
 EXPORT_SYMBOL(get_caif_conf);
 
 int caif_connect_client(struct caif_connect_request *conn_req,
-			   struct cflayer *client_layer)
+			struct cflayer *client_layer, int *ifindex,
+			int *headroom, int *tailroom)
 {
 	struct cfctrl_link_param param;
 	int ret;
@@ -334,8 +337,9 @@ int caif_connect_client(struct caif_connect_request *conn_req,
 	if (ret)
 		return ret;
 	/* Hook up the adaptation layer. */
-	return cfcnfg_add_adaptation_layer(get_caif_conf(),
-						&param, client_layer);
+	return cfcnfg_add_adaptation_layer(get_caif_conf(), &param,
+					client_layer, ifindex,
+					headroom, tailroom);
 }
 EXPORT_SYMBOL(caif_connect_client);
 
diff --git a/net/caif/caif_socket.c b/net/caif/caif_socket.c
index 848ae755cdd6..8ce904786116 100644
--- a/net/caif/caif_socket.c
+++ b/net/caif/caif_socket.c
@@ -28,8 +28,8 @@
 MODULE_LICENSE("GPL");
 MODULE_ALIAS_NETPROTO(AF_CAIF);
 
-#define CAIF_DEF_SNDBUF (CAIF_MAX_PAYLOAD_SIZE*10)
-#define CAIF_DEF_RCVBUF (CAIF_MAX_PAYLOAD_SIZE*100)
+#define CAIF_DEF_SNDBUF (4096*10)
+#define CAIF_DEF_RCVBUF (4096*100)
 
 /*
  * CAIF state is re-using the TCP socket states.
@@ -76,6 +76,7 @@ struct caifsock {
 	struct caif_connect_request conn_req;
 	struct mutex readlock;
 	struct dentry *debugfs_socket_dir;
+	int headroom, tailroom, maxframe;
 };
 
 static int rx_flow_is_on(struct caifsock *cf_sk)
@@ -594,23 +595,32 @@ static int caif_seqpkt_sendmsg(struct kiocb *kiocb, struct socket *sock,
 		goto err;
 	noblock = msg->msg_flags & MSG_DONTWAIT;
 
-	buffer_size = len + CAIF_NEEDED_HEADROOM + CAIF_NEEDED_TAILROOM;
-
 	timeo = sock_sndtimeo(sk, noblock);
 	timeo = caif_wait_for_flow_on(container_of(sk, struct caifsock, sk),
 				1, timeo, &ret);
 
+	if (ret)
+		goto err;
 	ret = -EPIPE;
 	if (cf_sk->sk.sk_state != CAIF_CONNECTED ||
 		sock_flag(sk, SOCK_DEAD) ||
 		(sk->sk_shutdown & RCV_SHUTDOWN))
 		goto err;
 
+	/* Error if trying to write more than maximum frame size. */
+	ret = -EMSGSIZE;
+	if (len > cf_sk->maxframe && cf_sk->sk.sk_protocol != CAIFPROTO_RFM)
+		goto err;
+
+	buffer_size = len + cf_sk->headroom + cf_sk->tailroom;
+
 	ret = -ENOMEM;
 	skb = sock_alloc_send_skb(sk, buffer_size, noblock, &ret);
-	if (!skb)
+
+	if (!skb || skb_tailroom(skb) < buffer_size)
 		goto err;
-	skb_reserve(skb, CAIF_NEEDED_HEADROOM);
+
+	skb_reserve(skb, cf_sk->headroom);
 
 	ret = memcpy_fromiovec(skb_put(skb, len), msg->msg_iov, len);
 
@@ -641,7 +651,6 @@ static int caif_stream_sendmsg(struct kiocb *kiocb, struct socket *sock,
 	long timeo;
 
 	err = -EOPNOTSUPP;
-
 	if (unlikely(msg->msg_flags&MSG_OOB))
 		goto out_err;
 
@@ -658,8 +667,8 @@ static int caif_stream_sendmsg(struct kiocb *kiocb, struct socket *sock,
 
 		size = len-sent;
 
-		if (size > CAIF_MAX_PAYLOAD_SIZE)
-			size = CAIF_MAX_PAYLOAD_SIZE;
+		if (size > cf_sk->maxframe)
+			size = cf_sk->maxframe;
 
 		/* If size is more than half of sndbuf, chop up message */
 		if (size > ((sk->sk_sndbuf >> 1) - 64))
@@ -669,14 +678,14 @@ static int caif_stream_sendmsg(struct kiocb *kiocb, struct socket *sock,
 			size = SKB_MAX_ALLOC;
 
 		skb = sock_alloc_send_skb(sk,
-					size + CAIF_NEEDED_HEADROOM
-					+ CAIF_NEEDED_TAILROOM,
+					size + cf_sk->headroom +
+					cf_sk->tailroom,
 					msg->msg_flags&MSG_DONTWAIT,
 					&err);
 		if (skb == NULL)
 			goto out_err;
 
-		skb_reserve(skb, CAIF_NEEDED_HEADROOM);
+		skb_reserve(skb, cf_sk->headroom);
 		/*
 		 *	If you pass two values to the sock_alloc_send_skb
 		 *	it tries to grab the large buffer with GFP_NOFS
@@ -817,17 +826,15 @@ static int caif_connect(struct socket *sock, struct sockaddr *uaddr,
 	struct caifsock *cf_sk = container_of(sk, struct caifsock, sk);
 	long timeo;
 	int err;
+	int ifindex, headroom, tailroom;
+	struct net_device *dev;
+
 	lock_sock(sk);
 
 	err = -EAFNOSUPPORT;
 	if (uaddr->sa_family != AF_CAIF)
 		goto out;
 
-	err = -ESOCKTNOSUPPORT;
-	if (unlikely(!(sk->sk_type == SOCK_STREAM &&
-		       cf_sk->sk.sk_protocol == CAIFPROTO_AT) &&
-		       sk->sk_type != SOCK_SEQPACKET))
-		goto out;
 	switch (sock->state) {
 	case SS_UNCONNECTED:
 		/* Normal case, a fresh connect */
@@ -883,12 +890,23 @@ static int caif_connect(struct socket *sock, struct sockaddr *uaddr,
 	dbfs_atomic_inc(&cnt.num_connect_req);
 	cf_sk->layer.receive = caif_sktrecv_cb;
 	err = caif_connect_client(&cf_sk->conn_req,
-				&cf_sk->layer);
+				&cf_sk->layer, &ifindex, &headroom, &tailroom);
 	if (err < 0) {
 		cf_sk->sk.sk_socket->state = SS_UNCONNECTED;
 		cf_sk->sk.sk_state = CAIF_DISCONNECTED;
 		goto out;
 	}
+	dev = dev_get_by_index(sock_net(sk), ifindex);
+	cf_sk->headroom = LL_RESERVED_SPACE_EXTRA(dev, headroom);
+	cf_sk->tailroom = tailroom;
+	cf_sk->maxframe = dev->mtu - (headroom + tailroom);
+	dev_put(dev);
+	if (cf_sk->maxframe < 1) {
+		pr_warning("CAIF: %s(): CAIF Interface MTU too small (%d)\n",
+			__func__, dev->mtu);
+		err = -ENODEV;
+		goto out;
+	}
 
 	err = -EINPROGRESS;
 wait_connect:
diff --git a/net/caif/cfcnfg.c b/net/caif/cfcnfg.c
index cff2dcb9efe4..1c29189b344d 100644
--- a/net/caif/cfcnfg.c
+++ b/net/caif/cfcnfg.c
@@ -6,6 +6,7 @@
 #include <linux/kernel.h>
 #include <linux/stddef.h>
 #include <linux/slab.h>
+#include <linux/netdevice.h>
 #include <net/caif/caif_layer.h>
 #include <net/caif/cfpkt.h>
 #include <net/caif/cfcnfg.h>
@@ -42,6 +43,15 @@ struct cfcnfg_phyinfo {
 
 	/* Information about the physical device */
 	struct dev_info dev_info;
+
+	/* Interface index */
+	int ifindex;
+
+	/* Use Start of frame extension */
+	bool use_stx;
+
+	/* Use Start of frame checksum */
+	bool use_fcs;
 };
 
 struct cfcnfg {
@@ -249,9 +259,20 @@ static void cfcnfg_linkdestroy_rsp(struct cflayer *layer, u8 channel_id)
 {
 }
 
+int protohead[CFCTRL_SRV_MASK] = {
+	[CFCTRL_SRV_VEI] = 4,
+	[CFCTRL_SRV_DATAGRAM] = 7,
+	[CFCTRL_SRV_UTIL] = 4,
+	[CFCTRL_SRV_RFM] = 3,
+	[CFCTRL_SRV_DBG] = 3,
+};
+
 int cfcnfg_add_adaptation_layer(struct cfcnfg *cnfg,
 				struct cfctrl_link_param *param,
-				struct cflayer *adap_layer)
+				struct cflayer *adap_layer,
+				int *ifindex,
+				int *proto_head,
+				int *proto_tail)
 {
 	struct cflayer *frml;
 	if (adap_layer == NULL) {
@@ -277,6 +298,14 @@ int cfcnfg_add_adaptation_layer(struct cfcnfg *cnfg,
 		     param->phyid);
 	caif_assert(cnfg->phy_layers[param->phyid].phy_layer->id ==
 		     param->phyid);
+
+	*ifindex = cnfg->phy_layers[param->phyid].ifindex;
+	*proto_head =
+		protohead[param->linktype]+
+		(cnfg->phy_layers[param->phyid].use_stx ? 1 : 0);
+
+	*proto_tail = 2;
+
 	/* FIXME: ENUMERATE INITIALLY WHEN ACTIVATING PHYSICAL INTERFACE */
 	cfctrl_enum_req(cnfg->ctrl, param->phyid);
 	return cfctrl_linkup_request(cnfg->ctrl, param, adap_layer);
@@ -298,6 +327,8 @@ cfcnfg_linkup_rsp(struct cflayer *layer, u8 channel_id, enum cfctrl_srv serv,
 	struct cfcnfg *cnfg = container_obj(layer);
 	struct cflayer *servicel = NULL;
 	struct cfcnfg_phyinfo *phyinfo;
+	struct net_device *netdev;
+
 	if (adapt_layer == NULL) {
 		pr_debug("CAIF: %s(): link setup response "
 				"but no client exist, send linkdown back\n",
@@ -329,8 +360,9 @@ cfcnfg_linkup_rsp(struct cflayer *layer, u8 channel_id, enum cfctrl_srv serv,
 		servicel = cfdgml_create(channel_id, &phyinfo->dev_info);
 		break;
 	case CFCTRL_SRV_RFM:
+		netdev = phyinfo->dev_info.dev;
 		servicel = cfrfml_create(channel_id, &phyinfo->dev_info,
-						RFM_FRAGMENT_SIZE);
+						netdev->mtu);
 		break;
 	case CFCTRL_SRV_UTIL:
 		servicel = cfutill_create(channel_id, &phyinfo->dev_info);
@@ -361,8 +393,8 @@ cfcnfg_linkup_rsp(struct cflayer *layer, u8 channel_id, enum cfctrl_srv serv,
 
 void
 cfcnfg_add_phy_layer(struct cfcnfg *cnfg, enum cfcnfg_phy_type phy_type,
-		     void *dev, struct cflayer *phy_layer, u16 *phyid,
-		     enum cfcnfg_phy_preference pref,
+		     struct net_device *dev, struct cflayer *phy_layer,
+		     u16 *phyid, enum cfcnfg_phy_preference pref,
 		     bool fcs, bool stx)
 {
 	struct cflayer *frml;
@@ -416,6 +448,10 @@ cfcnfg_add_phy_layer(struct cfcnfg *cnfg, enum cfcnfg_phy_type phy_type,
 	cnfg->phy_layers[*phyid].dev_info.dev = dev;
 	cnfg->phy_layers[*phyid].phy_layer = phy_layer;
 	cnfg->phy_layers[*phyid].phy_ref_count = 0;
+	cnfg->phy_layers[*phyid].ifindex = dev->ifindex;
+	cnfg->phy_layers[*phyid].use_stx = stx;
+	cnfg->phy_layers[*phyid].use_fcs = fcs;
+
 	phy_layer->type = phy_type;
 	frml = cffrml_create(*phyid, fcs);
 	if (!frml) {
diff --git a/net/caif/cfctrl.c b/net/caif/cfctrl.c
index 107c4b2a311e..563145fdc4c3 100644
--- a/net/caif/cfctrl.c
+++ b/net/caif/cfctrl.c
@@ -19,7 +19,7 @@
 #ifdef CAIF_NO_LOOP
 static int handle_loop(struct cfctrl *ctrl,
 			      int cmd, struct cfpkt *pkt){
-	return CAIF_FAILURE;
+	return -1;
 }
 #else
 static int handle_loop(struct cfctrl *ctrl,
@@ -395,7 +395,7 @@ static int cfctrl_recv(struct cflayer *layer, struct cfpkt *pkt)
 	cmd = cmdrsp & CFCTRL_CMD_MASK;
 	if (cmd != CFCTRL_CMD_LINK_ERR
 	    && CFCTRL_RSP_BIT != (CFCTRL_RSP_BIT & cmdrsp)) {
-		if (handle_loop(cfctrl, cmd, pkt) == CAIF_FAILURE)
+		if (handle_loop(cfctrl, cmd, pkt) != 0)
 			cmdrsp |= CFCTRL_ERR_BIT;
 	}
 
@@ -647,6 +647,6 @@ found:
 	default:
 		break;
 	}
-	return CAIF_SUCCESS;
+	return 0;
 }
 #endif
diff --git a/net/caif/cfdgml.c b/net/caif/cfdgml.c
index 32d9f0dc8463..ed9d53aff280 100644
--- a/net/caif/cfdgml.c
+++ b/net/caif/cfdgml.c
@@ -17,6 +17,7 @@
 #define DGM_FLOW_OFF 0x81
 #define DGM_FLOW_ON  0x80
 #define DGM_CTRL_PKT_SIZE 1
+#define DGM_MTU 1500
 
 static int cfdgml_receive(struct cflayer *layr, struct cfpkt *pkt);
 static int cfdgml_transmit(struct cflayer *layr, struct cfpkt *pkt);
@@ -89,6 +90,10 @@ static int cfdgml_transmit(struct cflayer *layr, struct cfpkt *pkt)
 	if (!cfsrvl_ready(service, &ret))
 		return ret;
 
+	/* STE Modem cannot handle more than 1500 bytes datagrams */
+	if (cfpkt_getlen(pkt) > DGM_MTU)
+		return -EMSGSIZE;
+
 	cfpkt_add_head(pkt, &zero, 4);
 
 	/* Add info for MUX-layer to route the packet out. */
diff --git a/net/caif/cfpkt_skbuff.c b/net/caif/cfpkt_skbuff.c
index 318b0f4b416e..01f238ff2346 100644
--- a/net/caif/cfpkt_skbuff.c
+++ b/net/caif/cfpkt_skbuff.c
@@ -9,8 +9,8 @@
 #include <linux/hardirq.h>
 #include <net/caif/cfpkt.h>
 
-#define PKT_PREFIX CAIF_NEEDED_HEADROOM
-#define PKT_POSTFIX CAIF_NEEDED_TAILROOM
+#define PKT_PREFIX  16
+#define PKT_POSTFIX 2
 #define PKT_LEN_WHEN_EXTENDING 128
 #define PKT_ERROR(pkt, errmsg) do {	   \
     cfpkt_priv(pkt)->erronous = true;	   \
diff --git a/net/caif/cfserl.c b/net/caif/cfserl.c
index 965c5baace40..a11fbd68a13d 100644
--- a/net/caif/cfserl.c
+++ b/net/caif/cfserl.c
@@ -14,7 +14,8 @@
 #define container_obj(layr) ((struct cfserl *) layr)
 
 #define CFSERL_STX 0x02
-#define CAIF_MINIUM_PACKET_SIZE 4
+#define SERIAL_MINIUM_PACKET_SIZE 4
+#define SERIAL_MAX_FRAMESIZE 4096
 struct cfserl {
 	struct cflayer layer;
 	struct cfpkt *incomplete_frm;
@@ -119,8 +120,8 @@ static int cfserl_receive(struct cflayer *l, struct cfpkt *newpkt)
 		/*
 		 * Frame error handling
 		 */
-		if (expectlen < CAIF_MINIUM_PACKET_SIZE
-		    || expectlen > CAIF_MAX_FRAMESIZE) {
+		if (expectlen < SERIAL_MINIUM_PACKET_SIZE
+		    || expectlen > SERIAL_MAX_FRAMESIZE) {
 			if (!layr->usestx) {
 				if (pkt != NULL)
 					cfpkt_destroy(pkt);
diff --git a/net/caif/cfsrvl.c b/net/caif/cfsrvl.c
index 4c9f147c38af..f40939a91211 100644
--- a/net/caif/cfsrvl.c
+++ b/net/caif/cfsrvl.c
@@ -162,7 +162,6 @@ void cfservl_destroy(struct cflayer *layer)
 void cfsrvl_release(struct kref *kref)
 {
 	struct cfsrvl *service = container_of(kref, struct cfsrvl, ref);
-	pr_info("CAIF: %s(): enter\n", __func__);
 	kfree(service);
 }
 
diff --git a/net/caif/cfutill.c b/net/caif/cfutill.c
index ce525cac9064..02795aff57a4 100644
--- a/net/caif/cfutill.c
+++ b/net/caif/cfutill.c
@@ -90,12 +90,6 @@ static int cfutill_transmit(struct cflayer *layr, struct cfpkt *pkt)
 	if (!cfsrvl_ready(service, &ret))
 		return ret;
 
-	if (cfpkt_getlen(pkt) > CAIF_MAX_PAYLOAD_SIZE) {
-		pr_err("CAIF: %s(): packet too large size=%d\n",
-			__func__, cfpkt_getlen(pkt));
-		return -EOVERFLOW;
-	}
-
 	cfpkt_add_head(pkt, &zero, 1);
 	/* Add info for MUX-layer to route the packet out. */
 	info = cfpkt_info(pkt);
diff --git a/net/caif/cfveil.c b/net/caif/cfveil.c
index 637cb0eee13c..77cc09faac9a 100644
--- a/net/caif/cfveil.c
+++ b/net/caif/cfveil.c
@@ -84,11 +84,6 @@ static int cfvei_transmit(struct cflayer *layr, struct cfpkt *pkt)
 		return ret;
 	caif_assert(layr->dn != NULL);
 	caif_assert(layr->dn->transmit != NULL);
-	if (cfpkt_getlen(pkt) > CAIF_MAX_PAYLOAD_SIZE) {
-		pr_warning("CAIF: %s(): Packet too large - size=%d\n",
-			   __func__, cfpkt_getlen(pkt));
-		return -EOVERFLOW;
-	}
 
 	if (cfpkt_add_head(pkt, &tmp, 1) < 0) {
 		pr_err("CAIF: %s(): Packet is erroneous!\n", __func__);
diff --git a/net/caif/chnl_net.c b/net/caif/chnl_net.c
index 610966abe2dc..4293e190ec53 100644
--- a/net/caif/chnl_net.c
+++ b/net/caif/chnl_net.c
@@ -23,7 +23,7 @@
 #include <net/caif/caif_dev.h>
 
 /* GPRS PDP connection has MTU to 1500 */
-#define SIZE_MTU 1500
+#define GPRS_PDP_MTU 1500
 /* 5 sec. connect timeout */
 #define CONNECT_TIMEOUT (5 * HZ)
 #define CAIF_NET_DEFAULT_QUEUE_LEN 500
@@ -232,6 +232,8 @@ static int chnl_net_open(struct net_device *dev)
 {
 	struct chnl_net *priv = NULL;
 	int result = -1;
+	int llifindex, headroom, tailroom, mtu;
+	struct net_device *lldev;
 	ASSERT_RTNL();
 	priv = netdev_priv(dev);
 	if (!priv) {
@@ -241,41 +243,88 @@ static int chnl_net_open(struct net_device *dev)
 
 	if (priv->state != CAIF_CONNECTING) {
 		priv->state = CAIF_CONNECTING;
-		result = caif_connect_client(&priv->conn_req, &priv->chnl);
+		result = caif_connect_client(&priv->conn_req, &priv->chnl,
+					&llifindex, &headroom, &tailroom);
 		if (result != 0) {
-				priv->state = CAIF_DISCONNECTED;
 				pr_debug("CAIF: %s(): err: "
 					"Unable to register and open device,"
 					" Err:%d\n",
 					__func__,
 					result);
-				return result;
+				goto error;
+		}
+
+		lldev = dev_get_by_index(dev_net(dev), llifindex);
+
+		if (lldev == NULL) {
+			pr_debug("CAIF: %s(): no interface?\n", __func__);
+			result = -ENODEV;
+			goto error;
+		}
+
+		dev->needed_tailroom = tailroom + lldev->needed_tailroom;
+		dev->hard_header_len = headroom + lldev->hard_header_len +
+			lldev->needed_tailroom;
+
+		/*
+		 * MTU, head-room etc is not know before we have a
+		 * CAIF link layer device available. MTU calculation may
+		 * override initial RTNL configuration.
+		 * MTU is minimum of current mtu, link layer mtu pluss
+		 * CAIF head and tail, and PDP GPRS contexts max MTU.
+		 */
+		mtu = min_t(int, dev->mtu, lldev->mtu - (headroom + tailroom));
+		mtu = min_t(int, GPRS_PDP_MTU, mtu);
+		dev_set_mtu(dev, mtu);
+		dev_put(lldev);
+
+		if (mtu < 100) {
+			pr_warning("CAIF: %s(): "
+				"CAIF Interface MTU too small (%d)\n",
+				__func__, mtu);
+			result = -ENODEV;
+			goto error;
 		}
 	}
 
+	rtnl_unlock();  /* Release RTNL lock during connect wait */
+
 	result = wait_event_interruptible_timeout(priv->netmgmt_wq,
 						priv->state != CAIF_CONNECTING,
 						CONNECT_TIMEOUT);
 
+	rtnl_lock();
+
 	if (result == -ERESTARTSYS) {
 		pr_debug("CAIF: %s(): wait_event_interruptible"
 			 " woken by a signal\n", __func__);
-		return -ERESTARTSYS;
+		result = -ERESTARTSYS;
+		goto error;
 	}
+
 	if (result == 0) {
 		pr_debug("CAIF: %s(): connect timeout\n", __func__);
 		caif_disconnect_client(&priv->chnl);
 		priv->state = CAIF_DISCONNECTED;
 		pr_debug("CAIF: %s(): state disconnected\n", __func__);
-		return -ETIMEDOUT;
+		result = -ETIMEDOUT;
+		goto error;
 	}
 
 	if (priv->state != CAIF_CONNECTED) {
 		pr_debug("CAIF: %s(): connect failed\n", __func__);
-		return -ECONNREFUSED;
+		result = -ECONNREFUSED;
+		goto error;
 	}
 	pr_debug("CAIF: %s(): CAIF Netdevice connected\n", __func__);
 	return 0;
+
+error:
+	caif_disconnect_client(&priv->chnl);
+	priv->state = CAIF_DISCONNECTED;
+	pr_debug("CAIF: %s(): state disconnected\n", __func__);
+	return result;
+
 }
 
 static int chnl_net_stop(struct net_device *dev)
@@ -321,9 +370,7 @@ static void ipcaif_net_setup(struct net_device *dev)
 	dev->destructor = free_netdev;
 	dev->flags |= IFF_NOARP;
 	dev->flags |= IFF_POINTOPOINT;
-	dev->needed_headroom = CAIF_NEEDED_HEADROOM;
-	dev->needed_tailroom = CAIF_NEEDED_TAILROOM;
-	dev->mtu = SIZE_MTU;
+	dev->mtu = GPRS_PDP_MTU;
 	dev->tx_queue_len = CAIF_NET_DEFAULT_QUEUE_LEN;
 
 	priv = netdev_priv(dev);
-- 
cgit v1.2.2


From 69ad78208ecf4c392f3d323ed050423847c24104 Mon Sep 17 00:00:00 2001
From: Sjur Braendeland <sjur.brandeland@stericsson.com>
Date: Thu, 17 Jun 2010 06:55:41 +0000
Subject: caif: Add debug connection type for CAIF.

Added new CAIF protocol type CAIFPROTO_DEBUG for accessing
CAIF debug on the ST Ericsson modems.

There are two debug servers on the modem, one for radio related
debug (CAIF_RADIO_DEBUG_SERVICE) and the other for
communication/application related debug (CAIF_COM_DEBUG_SERVICE).

The debug connection can contain trace debug printouts or
interactive debug used for debugging and test.

Debug connections can be of type STREAM or SEQPACKET.

Signed-off-by: Sjur Braendeland <sjur.brandeland@stericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/caif/caif_config_util.c | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'net')

diff --git a/net/caif/caif_config_util.c b/net/caif/caif_config_util.c
index 6f36580366f0..76ae68303d3a 100644
--- a/net/caif/caif_config_util.c
+++ b/net/caif/caif_config_util.c
@@ -79,6 +79,11 @@ int connect_req_to_link_param(struct cfcnfg *cnfg,
 		memcpy(l->u.utility.params, s->param.data,
 		       l->u.utility.paramlen);
 
+		break;
+	case CAIFPROTO_DEBUG:
+		l->linktype = CFCTRL_SRV_DBG;
+		l->endpoint = s->sockaddr.u.dbg.service;
+		l->chtype = s->sockaddr.u.dbg.type;
 		break;
 	default:
 		return -EINVAL;
-- 
cgit v1.2.2


From fb63bc41772259701a827e9a6117219a7393554f Mon Sep 17 00:00:00 2001
From: Gertjan van Wingerde <gwingerde@gmail.com>
Date: Sun, 20 Jun 2010 20:28:07 +0200
Subject: mac80211: Fix compile warning in scan.c.

Fix the following compile warning:

CC [M]  net/mac80211/scan.o
net/mac80211/scan.c: In function 'ieee80211_request_internal_scan':
net/mac80211/scan.c:749:23: warning: comparison between 'enum nl80211_band' and 'enum ieee80211_band'

caused by the local variable band not being of the proper 'ieee80211_band' type.

Signed-off-by: Gertjan van Wingerde <gwingerde@gmail.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/scan.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/mac80211/scan.c b/net/mac80211/scan.c
index 36eb6dd7c75e..439c98d93a79 100644
--- a/net/mac80211/scan.c
+++ b/net/mac80211/scan.c
@@ -734,7 +734,7 @@ int ieee80211_request_internal_scan(struct ieee80211_sub_if_data *sdata,
 {
 	struct ieee80211_local *local = sdata->local;
 	int ret = -EBUSY;
-	enum nl80211_band band;
+	enum ieee80211_band band;
 
 	mutex_lock(&local->scan_mtx);
 
-- 
cgit v1.2.2


From f90754c15f47063671aea55268a9dd6a37b51492 Mon Sep 17 00:00:00 2001
From: Juuso Oikarinen <juuso.oikarinen@nokia.com>
Date: Mon, 21 Jun 2010 08:59:39 +0300
Subject: mac80211: Add interface for driver to temporarily disable dynamic ps

This mechanism introduced in this patch applies (at least) for hardware
designs using a single shared antenna for both WLAN and BT. In these designs,
the antenna must be toggled between WLAN and BT.

In those hardware, managing WLAN co-existence with Bluetooth requires WLAN
full power save whenever there is Bluetooth activity in order for WLAN to be
able to periodically relinquish the antenna to be used for BT. This is because
BT can only access the shared antenna when WLAN is idle or asleep.

Some hardware, for instance the wl1271, are able to indicate to the host
whenever there is BT traffic. In essence, the hardware will send an indication
to the host whenever there is, for example, SCO traffic or A2DP traffic, and
will send another indication when the traffic is over.

The hardware gets information of Bluetooth traffic via hardware co-existence
control lines - these lines are used to negotiate the shared antenna
ownership. The hardware will give the antenna to BT whenever WLAN is sleeping.

This patch adds the interface to mac80211 to facilitate temporarily disabling
of dynamic power save as per request of the WLAN driver. This interface will
immediately force WLAN to full powersave, hence allowing BT coexistence as
described above.

In these kind of shared antenna desings, when WLAN powersave is fully disabled,
Bluetooth will not work simultaneously with WLAN at all. This patch does not
address that problem. This interface will not change PSM state, so if PSM is
disabled it will remain so. Solving this problem requires knowledge about BT
state, and is best done in user-space.

Signed-off-by: Juuso Oikarinen <juuso.oikarinen@nokia.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/ieee80211_i.h |  2 ++
 net/mac80211/mlme.c        | 39 ++++++++++++++++++++++++++++++++++++++-
 2 files changed, 40 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index fb5430188e87..f9251d50192c 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -855,6 +855,8 @@ struct ieee80211_local {
 	 * this will override whatever chosen by mac80211 internally.
 	 */
 	int dynamic_ps_forced_timeout;
+	int dynamic_ps_user_timeout;
+	bool disable_dynamic_ps;
 
 	int user_power_level; /* in dBm */
 	int power_constr_level; /* in dBm */
diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index 85c3ca33333e..d1962650b254 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -478,6 +478,39 @@ static void ieee80211_handle_pwr_constr(struct ieee80211_sub_if_data *sdata,
 	}
 }
 
+void ieee80211_enable_dyn_ps(struct ieee80211_vif *vif)
+{
+	struct ieee80211_sub_if_data *sdata = vif_to_sdata(vif);
+	struct ieee80211_local *local = sdata->local;
+	struct ieee80211_conf *conf = &local->hw.conf;
+
+	WARN_ON(sdata->vif.type != NL80211_IFTYPE_STATION ||
+		!(local->hw.flags & IEEE80211_HW_SUPPORTS_PS) ||
+		(local->hw.flags & IEEE80211_HW_SUPPORTS_DYNAMIC_PS));
+
+	local->disable_dynamic_ps = false;
+	conf->dynamic_ps_timeout = local->dynamic_ps_user_timeout;
+}
+EXPORT_SYMBOL(ieee80211_enable_dyn_ps);
+
+void ieee80211_disable_dyn_ps(struct ieee80211_vif *vif)
+{
+	struct ieee80211_sub_if_data *sdata = vif_to_sdata(vif);
+	struct ieee80211_local *local = sdata->local;
+	struct ieee80211_conf *conf = &local->hw.conf;
+
+	WARN_ON(sdata->vif.type != NL80211_IFTYPE_STATION ||
+		!(local->hw.flags & IEEE80211_HW_SUPPORTS_PS) ||
+		(local->hw.flags & IEEE80211_HW_SUPPORTS_DYNAMIC_PS));
+
+	local->disable_dynamic_ps = true;
+	conf->dynamic_ps_timeout = 0;
+	del_timer_sync(&local->dynamic_ps_timer);
+	ieee80211_queue_work(&local->hw,
+			     &local->dynamic_ps_enable_work);
+}
+EXPORT_SYMBOL(ieee80211_disable_dyn_ps);
+
 /* powersave */
 static void ieee80211_enable_ps(struct ieee80211_local *local,
 				struct ieee80211_sub_if_data *sdata)
@@ -553,6 +586,7 @@ void ieee80211_recalc_ps(struct ieee80211_local *local, s32 latency)
 	    found->u.mgd.associated->beacon_ies &&
 	    !(found->u.mgd.flags & (IEEE80211_STA_BEACON_POLL |
 				    IEEE80211_STA_CONNECTION_POLL))) {
+		struct ieee80211_conf *conf = &local->hw.conf;
 		s32 beaconint_us;
 
 		if (latency < 0)
@@ -575,7 +609,10 @@ void ieee80211_recalc_ps(struct ieee80211_local *local, s32 latency)
 			else
 				timeout = 100;
 		}
-		local->hw.conf.dynamic_ps_timeout = timeout;
+		local->dynamic_ps_user_timeout = timeout;
+		if (!local->disable_dynamic_ps)
+			conf->dynamic_ps_timeout =
+				local->dynamic_ps_user_timeout;
 
 		if (beaconint_us > latency) {
 			local->ps_sdata = NULL;
-- 
cgit v1.2.2


From 26cde9f7e2747b6d254b704594eed87ab959afa5 Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Tue, 15 Jun 2010 01:52:25 +0000
Subject: udp: Fix bogus UFO packet generation

It has been reported that the new UFO software fallback path
fails under certain conditions with NFS.  I tracked the problem
down to the generation of UFO packets that are smaller than the
MTU.  The software fallback path simply discards these packets.

This patch fixes the problem by not generating such packets on
the UFO path.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/ip_output.c | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index 9a4a6c96cb0d..041d41df1224 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -873,8 +873,10 @@ int ip_append_data(struct sock *sk,
 	    !exthdrlen)
 		csummode = CHECKSUM_PARTIAL;
 
+	skb = skb_peek_tail(&sk->sk_write_queue);
+
 	inet->cork.length += length;
-	if (((length> mtu) || !skb_queue_empty(&sk->sk_write_queue)) &&
+	if (((length > mtu) || (skb && skb_is_gso(skb))) &&
 	    (sk->sk_protocol == IPPROTO_UDP) &&
 	    (rt->u.dst.dev->features & NETIF_F_UFO)) {
 		err = ip_ufo_append_data(sk, getfrag, from, length, hh_len,
@@ -892,7 +894,7 @@ int ip_append_data(struct sock *sk,
 	 * adding appropriate IP header.
 	 */
 
-	if ((skb = skb_peek_tail(&sk->sk_write_queue)) == NULL)
+	if (!skb)
 		goto alloc_new_skb;
 
 	while (length > 0) {
@@ -1121,7 +1123,8 @@ ssize_t	ip_append_page(struct sock *sk, struct page *page,
 		return -EINVAL;
 
 	inet->cork.length += size;
-	if ((sk->sk_protocol == IPPROTO_UDP) &&
+	if ((size + skb->len > mtu) &&
+	    (sk->sk_protocol == IPPROTO_UDP) &&
 	    (rt->u.dst.dev->features & NETIF_F_UFO)) {
 		skb_shinfo(skb)->gso_size = mtu - fragheaderlen;
 		skb_shinfo(skb)->gso_type = SKB_GSO_UDP;
-- 
cgit v1.2.2


From 26ec037f9841e49cc5c615deb8e1e73e5beab2ca Mon Sep 17 00:00:00 2001
From: Nick Chalk <nick@loadbalancer.org>
Date: Tue, 22 Jun 2010 08:07:01 +0200
Subject: IPVS: one-packet scheduling

Allow one-packet scheduling for UDP connections. When the fwmark-based or
normal virtual service is marked with '-o' or '--ops' options all
connections are created only to schedule one packet. Useful to schedule UDP
packets from same client port to different real servers. Recommended with
RR or WRR schedulers (the connections are not visible with ipvsadm -L).

Signed-off-by: Nick Chalk <nick@loadbalancer.org>
Signed-off-by: Simon Horman <horms@verge.net.au>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 net/netfilter/ipvs/ip_vs_conn.c | 10 +++++++---
 net/netfilter/ipvs/ip_vs_core.c | 20 ++++++++++++++++----
 net/netfilter/ipvs/ip_vs_ctl.c  | 10 ++++++----
 3 files changed, 29 insertions(+), 11 deletions(-)

(limited to 'net')

diff --git a/net/netfilter/ipvs/ip_vs_conn.c b/net/netfilter/ipvs/ip_vs_conn.c
index d8f7e8ef67b4..717e6233d50f 100644
--- a/net/netfilter/ipvs/ip_vs_conn.c
+++ b/net/netfilter/ipvs/ip_vs_conn.c
@@ -158,6 +158,9 @@ static inline int ip_vs_conn_hash(struct ip_vs_conn *cp)
 	unsigned hash;
 	int ret;
 
+	if (cp->flags & IP_VS_CONN_F_ONE_PACKET)
+		return 0;
+
 	/* Hash by protocol, client address and port */
 	hash = ip_vs_conn_hashkey(cp->af, cp->protocol, &cp->caddr, cp->cport);
 
@@ -355,8 +358,9 @@ struct ip_vs_conn *ip_vs_conn_out_get
  */
 void ip_vs_conn_put(struct ip_vs_conn *cp)
 {
-	/* reset it expire in its timeout */
-	mod_timer(&cp->timer, jiffies+cp->timeout);
+	unsigned long t = (cp->flags & IP_VS_CONN_F_ONE_PACKET) ?
+		0 : cp->timeout;
+	mod_timer(&cp->timer, jiffies+t);
 
 	__ip_vs_conn_put(cp);
 }
@@ -649,7 +653,7 @@ static void ip_vs_conn_expire(unsigned long data)
 	/*
 	 *	unhash it if it is hashed in the conn table
 	 */
-	if (!ip_vs_conn_unhash(cp))
+	if (!ip_vs_conn_unhash(cp) && !(cp->flags & IP_VS_CONN_F_ONE_PACKET))
 		goto expire_later;
 
 	/*
diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c
index 1cd6e3fd058b..50907d8472a3 100644
--- a/net/netfilter/ipvs/ip_vs_core.c
+++ b/net/netfilter/ipvs/ip_vs_core.c
@@ -194,6 +194,7 @@ ip_vs_sched_persist(struct ip_vs_service *svc,
 	struct ip_vs_dest *dest;
 	struct ip_vs_conn *ct;
 	__be16  dport;			/* destination port to forward */
+	__be16  flags;
 	union nf_inet_addr snet;	/* source network of the client,
 					   after masking */
 
@@ -340,6 +341,10 @@ ip_vs_sched_persist(struct ip_vs_service *svc,
 		dport = ports[1];
 	}
 
+	flags = (svc->flags & IP_VS_SVC_F_ONEPACKET
+		 && iph.protocol == IPPROTO_UDP)?
+		IP_VS_CONN_F_ONE_PACKET : 0;
+
 	/*
 	 *    Create a new connection according to the template
 	 */
@@ -347,7 +352,7 @@ ip_vs_sched_persist(struct ip_vs_service *svc,
 			    &iph.saddr, ports[0],
 			    &iph.daddr, ports[1],
 			    &dest->addr, dport,
-			    0,
+			    flags,
 			    dest);
 	if (cp == NULL) {
 		ip_vs_conn_put(ct);
@@ -377,7 +382,7 @@ ip_vs_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
 	struct ip_vs_conn *cp = NULL;
 	struct ip_vs_iphdr iph;
 	struct ip_vs_dest *dest;
-	__be16 _ports[2], *pptr;
+	__be16 _ports[2], *pptr, flags;
 
 	ip_vs_fill_iphdr(svc->af, skb_network_header(skb), &iph);
 	pptr = skb_header_pointer(skb, iph.len, sizeof(_ports), _ports);
@@ -407,6 +412,10 @@ ip_vs_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
 		return NULL;
 	}
 
+	flags = (svc->flags & IP_VS_SVC_F_ONEPACKET
+		 && iph.protocol == IPPROTO_UDP)?
+		IP_VS_CONN_F_ONE_PACKET : 0;
+
 	/*
 	 *    Create a connection entry.
 	 */
@@ -414,7 +423,7 @@ ip_vs_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
 			    &iph.saddr, pptr[0],
 			    &iph.daddr, pptr[1],
 			    &dest->addr, dest->port ? dest->port : pptr[1],
-			    0,
+			    flags,
 			    dest);
 	if (cp == NULL)
 		return NULL;
@@ -464,6 +473,9 @@ int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb,
 	if (sysctl_ip_vs_cache_bypass && svc->fwmark && unicast) {
 		int ret, cs;
 		struct ip_vs_conn *cp;
+		__u16 flags = (svc->flags & IP_VS_SVC_F_ONEPACKET &&
+				iph.protocol == IPPROTO_UDP)?
+				IP_VS_CONN_F_ONE_PACKET : 0;
 		union nf_inet_addr daddr =  { .all = { 0, 0, 0, 0 } };
 
 		ip_vs_service_put(svc);
@@ -474,7 +486,7 @@ int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb,
 				    &iph.saddr, pptr[0],
 				    &iph.daddr, pptr[1],
 				    &daddr, 0,
-				    IP_VS_CONN_F_BYPASS,
+				    IP_VS_CONN_F_BYPASS | flags,
 				    NULL);
 		if (cp == NULL)
 			return NF_DROP;
diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c
index 36dc1d88c2fa..0f0c079c422a 100644
--- a/net/netfilter/ipvs/ip_vs_ctl.c
+++ b/net/netfilter/ipvs/ip_vs_ctl.c
@@ -1864,14 +1864,16 @@ static int ip_vs_info_seq_show(struct seq_file *seq, void *v)
 					   svc->scheduler->name);
 			else
 #endif
-				seq_printf(seq, "%s  %08X:%04X %s ",
+				seq_printf(seq, "%s  %08X:%04X %s %s ",
 					   ip_vs_proto_name(svc->protocol),
 					   ntohl(svc->addr.ip),
 					   ntohs(svc->port),
-					   svc->scheduler->name);
+					   svc->scheduler->name,
+					   (svc->flags & IP_VS_SVC_F_ONEPACKET)?"ops ":"");
 		} else {
-			seq_printf(seq, "FWM  %08X %s ",
-				   svc->fwmark, svc->scheduler->name);
+			seq_printf(seq, "FWM  %08X %s %s",
+				   svc->fwmark, svc->scheduler->name,
+				   (svc->flags & IP_VS_SVC_F_ONEPACKET)?"ops ":"");
 		}
 
 		if (svc->flags & IP_VS_SVC_F_PERSISTENT)
-- 
cgit v1.2.2


From 600069daf792f497ae1e1da74e2fff6ac5de6c47 Mon Sep 17 00:00:00 2001
From: Randy Dunlap <randy.dunlap@oracle.com>
Date: Tue, 22 Jun 2010 08:13:31 +0200
Subject: netfilter: xt_IDLETIMER needs kdev_t.h

Add header file to fix build error:
net/netfilter/xt_IDLETIMER.c:276: error: implicit declaration of function 'MKDEV'

Signed-off-by: Randy Dunlap <randy.dunlap@oracle.com>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 net/netfilter/xt_IDLETIMER.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'net')

diff --git a/net/netfilter/xt_IDLETIMER.c b/net/netfilter/xt_IDLETIMER.c
index e11090a0675c..be1f22e13545 100644
--- a/net/netfilter/xt_IDLETIMER.c
+++ b/net/netfilter/xt_IDLETIMER.c
@@ -36,6 +36,7 @@
 #include <linux/netfilter.h>
 #include <linux/netfilter/x_tables.h>
 #include <linux/netfilter/xt_IDLETIMER.h>
+#include <linux/kdev_t.h>
 #include <linux/kobject.h>
 #include <linux/workqueue.h>
 #include <linux/sysfs.h>
-- 
cgit v1.2.2


From fe6fb552858f686f39e33d7b0a33fe56dacea0bf Mon Sep 17 00:00:00 2001
From: Arnd Hannemann <hannemann@nets.rwth-aachen.de>
Date: Tue, 22 Jun 2010 08:22:21 +0200
Subject: netfilter: fix simple typo in KConfig for netfiltert xt_TEE

Destination was spelled wrong in KConfig.

Signed-off-by: Arnd Hannemann <hannemann@nets.rwth-aachen.de>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 net/netfilter/Kconfig | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig
index 413ed24a968a..21be53598987 100644
--- a/net/netfilter/Kconfig
+++ b/net/netfilter/Kconfig
@@ -515,7 +515,7 @@ config NETFILTER_XT_TARGET_RATEEST
 	  To compile it as a module, choose M here.  If unsure, say N.
 
 config NETFILTER_XT_TARGET_TEE
-	tristate '"TEE" - packet cloning to alternate destiantion'
+	tristate '"TEE" - packet cloning to alternate destination'
 	depends on NETFILTER_ADVANCED
 	depends on (IPV6 || IPV6=n)
 	depends on !NF_CONNTRACK || NF_CONNTRACK
-- 
cgit v1.2.2


From b76ce56192bcf618013fb9aecd83488cffd645cc Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Wed, 16 Jun 2010 13:57:32 -0400
Subject: SUNRPC: Fix a re-entrancy bug in xs_tcp_read_calldir()

If the attempt to read the calldir fails, then instead of storing the read
bytes, we currently discard them. This leads to a garbage final result when
upon re-entry to the same routine, we read the remaining bytes.

Fixes the regression in bugzilla number 16213. Please see
    https://bugzilla.kernel.org/show_bug.cgi?id=16213

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
Cc: stable@kernel.org
---
 net/sunrpc/xprtsock.c | 38 ++++++++++++++++++++++----------------
 1 file changed, 22 insertions(+), 16 deletions(-)

(limited to 'net')

diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
index 2a9675136c68..7ca65c7005ea 100644
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c
@@ -210,7 +210,8 @@ struct sock_xprt {
 	 * State of TCP reply receive
 	 */
 	__be32			tcp_fraghdr,
-				tcp_xid;
+				tcp_xid,
+				tcp_calldir;
 
 	u32			tcp_offset,
 				tcp_reclen;
@@ -927,7 +928,7 @@ static inline void xs_tcp_read_calldir(struct sock_xprt *transport,
 {
 	size_t len, used;
 	u32 offset;
-	__be32	calldir;
+	char *p;
 
 	/*
 	 * We want transport->tcp_offset to be 8 at the end of this routine
@@ -936,26 +937,33 @@ static inline void xs_tcp_read_calldir(struct sock_xprt *transport,
 	 * transport->tcp_offset is 4 (after having already read the xid).
 	 */
 	offset = transport->tcp_offset - sizeof(transport->tcp_xid);
-	len = sizeof(calldir) - offset;
+	len = sizeof(transport->tcp_calldir) - offset;
 	dprintk("RPC:       reading CALL/REPLY flag (%Zu bytes)\n", len);
-	used = xdr_skb_read_bits(desc, &calldir, len);
+	p = ((char *) &transport->tcp_calldir) + offset;
+	used = xdr_skb_read_bits(desc, p, len);
 	transport->tcp_offset += used;
 	if (used != len)
 		return;
 	transport->tcp_flags &= ~TCP_RCV_READ_CALLDIR;
-	transport->tcp_flags |= TCP_RCV_COPY_CALLDIR;
-	transport->tcp_flags |= TCP_RCV_COPY_DATA;
 	/*
 	 * We don't yet have the XDR buffer, so we will write the calldir
 	 * out after we get the buffer from the 'struct rpc_rqst'
 	 */
-	if (ntohl(calldir) == RPC_REPLY)
+	switch (ntohl(transport->tcp_calldir)) {
+	case RPC_REPLY:
+		transport->tcp_flags |= TCP_RCV_COPY_CALLDIR;
+		transport->tcp_flags |= TCP_RCV_COPY_DATA;
 		transport->tcp_flags |= TCP_RPC_REPLY;
-	else
+		break;
+	case RPC_CALL:
+		transport->tcp_flags |= TCP_RCV_COPY_CALLDIR;
+		transport->tcp_flags |= TCP_RCV_COPY_DATA;
 		transport->tcp_flags &= ~TCP_RPC_REPLY;
-	dprintk("RPC:       reading %s CALL/REPLY flag %08x\n",
-			(transport->tcp_flags & TCP_RPC_REPLY) ?
-				"reply for" : "request with", calldir);
+		break;
+	default:
+		dprintk("RPC:       invalid request message type\n");
+		xprt_force_disconnect(&transport->xprt);
+	}
 	xs_tcp_check_fraghdr(transport);
 }
 
@@ -975,12 +983,10 @@ static inline void xs_tcp_read_common(struct rpc_xprt *xprt,
 		/*
 		 * Save the RPC direction in the XDR buffer
 		 */
-		__be32	calldir = transport->tcp_flags & TCP_RPC_REPLY ?
-					htonl(RPC_REPLY) : 0;
-
 		memcpy(rcvbuf->head[0].iov_base + transport->tcp_copied,
-			&calldir, sizeof(calldir));
-		transport->tcp_copied += sizeof(calldir);
+			&transport->tcp_calldir,
+			sizeof(transport->tcp_calldir));
+		transport->tcp_copied += sizeof(transport->tcp_calldir);
 		transport->tcp_flags &= ~TCP_RCV_COPY_CALLDIR;
 	}
 
-- 
cgit v1.2.2


From 6afff0caa721211e8c04bdc7627ee3bff95bcb95 Mon Sep 17 00:00:00 2001
From: John Fastabend <john.r.fastabend@intel.com>
Date: Wed, 16 Jun 2010 14:18:12 +0000
Subject: net: consolidate netif_needs_gso() checks

netif_needs_gso() is checked twice in the TX path once,
before submitting the skb to the qdisc and once after
it is dequeued from the qdisc just before calling
ndo_hard_start().  This opens a window for a user to
change the gso/tso or tx checksum settings that can
cause netif_needs_gso to be true in one check and false
in the other.

Specifically, changing TX checksum setting may cause
the warning in skb_gso_segment() to be triggered if
the checksum is calculated earlier.

This consolidates the netif_needs_gso() calls so that
the stack only checks if gso is needed in
dev_hard_start_xmit().

Signed-off-by: John Fastabend <john.r.fastabend@intel.com>
Cc: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
Acked-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/dev.c | 68 +++++++++++++++++++++++++++-------------------------------
 1 file changed, 32 insertions(+), 36 deletions(-)

(limited to 'net')

diff --git a/net/core/dev.c b/net/core/dev.c
index 5902426ef585..7f390b52caab 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -1895,6 +1895,22 @@ static inline void skb_orphan_try(struct sk_buff *skb)
 		skb_orphan(skb);
 }
 
+/*
+ * Returns true if either:
+ *	1. skb has frag_list and the device doesn't support FRAGLIST, or
+ *	2. skb is fragmented and the device does not support SG, or if
+ *	   at least one of fragments is in highmem and device does not
+ *	   support DMA from it.
+ */
+static inline int skb_needs_linearize(struct sk_buff *skb,
+				      struct net_device *dev)
+{
+	return skb_is_nonlinear(skb) &&
+	       ((skb_has_frags(skb) && !(dev->features & NETIF_F_FRAGLIST)) ||
+	        (skb_shinfo(skb)->nr_frags && (!(dev->features & NETIF_F_SG) ||
+					      illegal_highdma(dev, skb))));
+}
+
 int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev,
 			struct netdev_queue *txq)
 {
@@ -1919,6 +1935,22 @@ int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev,
 				goto out_kfree_skb;
 			if (skb->next)
 				goto gso;
+		} else {
+			if (skb_needs_linearize(skb, dev) &&
+			    __skb_linearize(skb))
+				goto out_kfree_skb;
+
+			/* If packet is not checksummed and device does not
+			 * support checksumming for this protocol, complete
+			 * checksumming here.
+			 */
+			if (skb->ip_summed == CHECKSUM_PARTIAL) {
+				skb_set_transport_header(skb, skb->csum_start -
+					      skb_headroom(skb));
+				if (!dev_can_checksum(dev, skb) &&
+				     skb_checksum_help(skb))
+					goto out_kfree_skb;
+			}
 		}
 
 		rc = ops->ndo_start_xmit(skb, dev);
@@ -2089,22 +2121,6 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q,
 	return rc;
 }
 
-/*
- * Returns true if either:
- *	1. skb has frag_list and the device doesn't support FRAGLIST, or
- *	2. skb is fragmented and the device does not support SG, or if
- *	   at least one of fragments is in highmem and device does not
- *	   support DMA from it.
- */
-static inline int skb_needs_linearize(struct sk_buff *skb,
-				      struct net_device *dev)
-{
-	return skb_is_nonlinear(skb) &&
-	       ((skb_has_frags(skb) && !(dev->features & NETIF_F_FRAGLIST)) ||
-	        (skb_shinfo(skb)->nr_frags && (!(dev->features & NETIF_F_SG) ||
-					      illegal_highdma(dev, skb))));
-}
-
 /**
  *	dev_queue_xmit - transmit a buffer
  *	@skb: buffer to transmit
@@ -2137,25 +2153,6 @@ int dev_queue_xmit(struct sk_buff *skb)
 	struct Qdisc *q;
 	int rc = -ENOMEM;
 
-	/* GSO will handle the following emulations directly. */
-	if (netif_needs_gso(dev, skb))
-		goto gso;
-
-	/* Convert a paged skb to linear, if required */
-	if (skb_needs_linearize(skb, dev) && __skb_linearize(skb))
-		goto out_kfree_skb;
-
-	/* If packet is not checksummed and device does not support
-	 * checksumming for this protocol, complete checksumming here.
-	 */
-	if (skb->ip_summed == CHECKSUM_PARTIAL) {
-		skb_set_transport_header(skb, skb->csum_start -
-					      skb_headroom(skb));
-		if (!dev_can_checksum(dev, skb) && skb_checksum_help(skb))
-			goto out_kfree_skb;
-	}
-
-gso:
 	/* Disable soft irqs for various locks below. Also
 	 * stops preemption for RCU.
 	 */
@@ -2214,7 +2211,6 @@ gso:
 	rc = -ENETDOWN;
 	rcu_read_unlock_bh();
 
-out_kfree_skb:
 	kfree_skb(skb);
 	return rc;
 out:
-- 
cgit v1.2.2


From 406818ff347cbbdae2fb21fafd1939d00cf479c5 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Wed, 23 Jun 2010 13:00:48 -0700
Subject: bridge: 64bit rx/tx counters

Use u64_stats_sync infrastructure to provide 64bit rx/tx
counters even on 32bit hosts.

It is safe to use a single u64_stats_sync for rx and tx,
because BH is disabled on both, and we use per_cpu data.

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/bridge/br_device.c  | 24 +++++++++++++++---------
 net/bridge/br_input.c   |  2 ++
 net/bridge/br_private.h | 10 ++++++----
 3 files changed, 23 insertions(+), 13 deletions(-)

(limited to 'net')

diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c
index 6f3a9279be30..edf639e96281 100644
--- a/net/bridge/br_device.c
+++ b/net/bridge/br_device.c
@@ -38,8 +38,10 @@ netdev_tx_t br_dev_xmit(struct sk_buff *skb, struct net_device *dev)
 	}
 #endif
 
+	u64_stats_update_begin(&brstats->syncp);
 	brstats->tx_packets++;
 	brstats->tx_bytes += skb->len;
+	u64_stats_update_end(&brstats->syncp);
 
 	BR_INPUT_SKB_CB(skb)->brdev = dev;
 
@@ -96,21 +98,25 @@ static int br_dev_stop(struct net_device *dev)
 	return 0;
 }
 
-static struct net_device_stats *br_get_stats(struct net_device *dev)
+static struct rtnl_link_stats64 *br_get_stats64(struct net_device *dev)
 {
 	struct net_bridge *br = netdev_priv(dev);
-	struct net_device_stats *stats = &dev->stats;
-	struct br_cpu_netstats sum = { 0 };
+	struct rtnl_link_stats64 *stats = &dev->stats64;
+	struct br_cpu_netstats tmp, sum = { 0 };
 	unsigned int cpu;
 
 	for_each_possible_cpu(cpu) {
+		unsigned int start;
 		const struct br_cpu_netstats *bstats
 			= per_cpu_ptr(br->stats, cpu);
-
-		sum.tx_bytes   += bstats->tx_bytes;
-		sum.tx_packets += bstats->tx_packets;
-		sum.rx_bytes   += bstats->rx_bytes;
-		sum.rx_packets += bstats->rx_packets;
+		do {
+			start = u64_stats_fetch_begin(&bstats->syncp);
+			memcpy(&tmp, bstats, sizeof(tmp));
+		} while (u64_stats_fetch_retry(&bstats->syncp, start));
+		sum.tx_bytes   += tmp.tx_bytes;
+		sum.tx_packets += tmp.tx_packets;
+		sum.rx_bytes   += tmp.rx_bytes;
+		sum.rx_packets += tmp.rx_packets;
 	}
 
 	stats->tx_bytes   = sum.tx_bytes;
@@ -300,7 +306,7 @@ static const struct net_device_ops br_netdev_ops = {
 	.ndo_open		 = br_dev_open,
 	.ndo_stop		 = br_dev_stop,
 	.ndo_start_xmit		 = br_dev_xmit,
-	.ndo_get_stats		 = br_get_stats,
+	.ndo_get_stats64	 = br_get_stats64,
 	.ndo_set_mac_address	 = br_set_mac_address,
 	.ndo_set_multicast_list	 = br_dev_set_multicast_list,
 	.ndo_change_mtu		 = br_change_mtu,
diff --git a/net/bridge/br_input.c b/net/bridge/br_input.c
index f076c9d79d5e..5fc1c5b1c360 100644
--- a/net/bridge/br_input.c
+++ b/net/bridge/br_input.c
@@ -27,8 +27,10 @@ static int br_pass_frame_up(struct sk_buff *skb)
 	struct net_bridge *br = netdev_priv(brdev);
 	struct br_cpu_netstats *brstats = this_cpu_ptr(br->stats);
 
+	u64_stats_update_begin(&brstats->syncp);
 	brstats->rx_packets++;
 	brstats->rx_bytes += skb->len;
+	u64_stats_update_end(&brstats->syncp);
 
 	indev = skb->dev;
 	skb->dev = brdev;
diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h
index 07cf57b373fe..3f0678fd1fd0 100644
--- a/net/bridge/br_private.h
+++ b/net/bridge/br_private.h
@@ -16,6 +16,7 @@
 #include <linux/netdevice.h>
 #include <linux/if_bridge.h>
 #include <linux/netpoll.h>
+#include <linux/u64_stats_sync.h>
 #include <net/route.h>
 
 #define BR_HASH_BITS 8
@@ -156,10 +157,11 @@ struct net_bridge_port
 #define br_port_exists(dev) (dev->priv_flags & IFF_BRIDGE_PORT)
 
 struct br_cpu_netstats {
-	unsigned long	rx_packets;
-	unsigned long	rx_bytes;
-	unsigned long	tx_packets;
-	unsigned long	tx_bytes;
+	u64			rx_packets;
+	u64			rx_bytes;
+	u64			tx_packets;
+	u64			tx_bytes;
+	struct u64_stats_sync	syncp;
 };
 
 struct net_bridge
-- 
cgit v1.2.2


From 7b2ff18ee7b0ec4bc3162f821e221781aaca48bd Mon Sep 17 00:00:00 2001
From: Jiri Olsa <jolsa@redhat.com>
Date: Tue, 15 Jun 2010 01:07:31 +0000
Subject: net - IP_NODEFRAG option for IPv4 socket

this patch is implementing IP_NODEFRAG option for IPv4 socket.
The reason is, there's no other way to send out the packet with user
customized header of the reassembly part.

Signed-off-by: Jiri Olsa <jolsa@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/af_inet.c                  | 2 ++
 net/ipv4/ip_sockglue.c              | 9 ++++++++-
 net/ipv4/netfilter/nf_defrag_ipv4.c | 5 +++++
 3 files changed, 15 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index d99e7e020189..b4c0969137cb 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -355,6 +355,8 @@ lookup_protocol:
 	inet = inet_sk(sk);
 	inet->is_icsk = (INET_PROTOSW_ICSK & answer_flags) != 0;
 
+	inet->nodefrag = 0;
+
 	if (SOCK_RAW == sock->type) {
 		inet->inet_num = protocol;
 		if (IPPROTO_RAW == protocol)
diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c
index 47fff528ff39..6c40a8c46e79 100644
--- a/net/ipv4/ip_sockglue.c
+++ b/net/ipv4/ip_sockglue.c
@@ -465,7 +465,7 @@ static int do_ip_setsockopt(struct sock *sk, int level,
 			     (1<<IP_MTU_DISCOVER) | (1<<IP_RECVERR) |
 			     (1<<IP_ROUTER_ALERT) | (1<<IP_FREEBIND) |
 			     (1<<IP_PASSSEC) | (1<<IP_TRANSPARENT) |
-			     (1<<IP_MINTTL))) ||
+			     (1<<IP_MINTTL) | (1<<IP_NODEFRAG))) ||
 	    optname == IP_MULTICAST_TTL ||
 	    optname == IP_MULTICAST_ALL ||
 	    optname == IP_MULTICAST_LOOP ||
@@ -588,6 +588,13 @@ static int do_ip_setsockopt(struct sock *sk, int level,
 		}
 		inet->hdrincl = val ? 1 : 0;
 		break;
+	case IP_NODEFRAG:
+		if (sk->sk_type != SOCK_RAW) {
+			err = -ENOPROTOOPT;
+			break;
+		}
+		inet->nodefrag = val ? 1 : 0;
+		break;
 	case IP_MTU_DISCOVER:
 		if (val < IP_PMTUDISC_DONT || val > IP_PMTUDISC_PROBE)
 			goto e_inval;
diff --git a/net/ipv4/netfilter/nf_defrag_ipv4.c b/net/ipv4/netfilter/nf_defrag_ipv4.c
index cb763ae9ed90..eab8de32f200 100644
--- a/net/ipv4/netfilter/nf_defrag_ipv4.c
+++ b/net/ipv4/netfilter/nf_defrag_ipv4.c
@@ -66,6 +66,11 @@ static unsigned int ipv4_conntrack_defrag(unsigned int hooknum,
 					  const struct net_device *out,
 					  int (*okfn)(struct sk_buff *))
 {
+	struct inet_sock *inet = inet_sk(skb->sk);
+
+	if (inet && inet->nodefrag)
+		return NF_ACCEPT;
+
 #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
 #if !defined(CONFIG_NF_NAT) && !defined(CONFIG_NF_NAT_MODULE)
 	/* Previously seen (loopback)?  Ignore.  Do this before
-- 
cgit v1.2.2


From 292b4df62a5a05a077269535d24eaabc8831c79d Mon Sep 17 00:00:00 2001
From: "John W. Linville" <linville@tuxdriver.com>
Date: Thu, 24 Jun 2010 11:13:56 -0400
Subject: mac80211: don't shadow mgmt variable in ieee80211_rx_h_action

net/mac80211/rx.c:2059:39: warning: symbol 'mgmt' shadows an earlier one
net/mac80211/rx.c:1916:31: originally declared here

Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/rx.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c
index 1f76352caa9e..ab58a5d66d2e 100644
--- a/net/mac80211/rx.c
+++ b/net/mac80211/rx.c
@@ -2056,11 +2056,11 @@ ieee80211_rx_h_action(struct ieee80211_rx_data *rx)
 	nskb = skb_copy_expand(rx->skb, local->hw.extra_tx_headroom, 0,
 			       GFP_ATOMIC);
 	if (nskb) {
-		struct ieee80211_mgmt *mgmt = (void *)nskb->data;
+		struct ieee80211_mgmt *nmgmt = (void *)nskb->data;
 
-		mgmt->u.action.category |= 0x80;
-		memcpy(mgmt->da, mgmt->sa, ETH_ALEN);
-		memcpy(mgmt->sa, rx->sdata->vif.addr, ETH_ALEN);
+		nmgmt->u.action.category |= 0x80;
+		memcpy(nmgmt->da, nmgmt->sa, ETH_ALEN);
+		memcpy(nmgmt->sa, rx->sdata->vif.addr, ETH_ALEN);
 
 		memset(nskb->cb, 0, sizeof(nskb->cb));
 
-- 
cgit v1.2.2


From d5ece2150a8a1715fea3272e057c45d98edb78ea Mon Sep 17 00:00:00 2001
From: "John W. Linville" <linville@tuxdriver.com>
Date: Thu, 24 Jun 2010 11:18:38 -0400
Subject: minstrel_ht: make *idx unsigned in minstrel_downgrade_rate

net/mac80211/rc80211_minstrel_ht.c:440:46: warning: incorrect type in argument 2 (different signedness)
net/mac80211/rc80211_minstrel_ht.c:440:46:    expected int *idx
net/mac80211/rc80211_minstrel_ht.c:440:46:    got unsigned int *<noident>
net/mac80211/rc80211_minstrel_ht.c:446:46: warning: incorrect type in argument 2 (different signedness)
net/mac80211/rc80211_minstrel_ht.c:446:46:    expected int *idx
net/mac80211/rc80211_minstrel_ht.c:446:46:    got unsigned int *<noident>

Signed-off-by: John W. Linville <linville@tuxdriver.com>
Acked-by: Felix Fietkau <nbd@openwrt.org>
---
 net/mac80211/rc80211_minstrel_ht.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/mac80211/rc80211_minstrel_ht.c b/net/mac80211/rc80211_minstrel_ht.c
index 7a04951fcb1f..52c85036660d 100644
--- a/net/mac80211/rc80211_minstrel_ht.c
+++ b/net/mac80211/rc80211_minstrel_ht.c
@@ -328,7 +328,8 @@ minstrel_next_sample_idx(struct minstrel_ht_sta *mi)
 }
 
 static void
-minstrel_downgrade_rate(struct minstrel_ht_sta *mi, int *idx, bool primary)
+minstrel_downgrade_rate(struct minstrel_ht_sta *mi, unsigned int *idx,
+			bool primary)
 {
 	int group, orig_group;
 
-- 
cgit v1.2.2


From 670b7f11ff1f8492f51716474cf582f55a2247ba Mon Sep 17 00:00:00 2001
From: "John W. Linville" <linville@tuxdriver.com>
Date: Thu, 24 Jun 2010 11:26:31 -0400
Subject: wireless: mark reg_mutex as static

Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/wireless/reg.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/wireless/reg.c b/net/wireless/reg.c
index 994c7c5f071b..1ac2bdd46ecf 100644
--- a/net/wireless/reg.c
+++ b/net/wireless/reg.c
@@ -80,7 +80,7 @@ static const struct ieee80211_regdomain *country_ie_regdomain;
  *     - country_ie_regdomain
  *     - last_request
  */
-DEFINE_MUTEX(reg_mutex);
+static DEFINE_MUTEX(reg_mutex);
 #define assert_reg_lock() WARN_ON(!mutex_is_locked(&reg_mutex))
 
 /* Used to queue up regulatory hints */
-- 
cgit v1.2.2


From de66bfd85c817146825eaafb39d5872463c09e8c Mon Sep 17 00:00:00 2001
From: "John W. Linville" <linville@tuxdriver.com>
Date: Thu, 24 Jun 2010 13:03:14 -0400
Subject: minstrel_ht: move minstrel_mcs_groups declaration to header file

Signed-off-by: John W. Linville <linville@tuxdriver.com>
Acked-by: Felix Fietkau <nbd@openwrt.org>
---
 net/mac80211/rc80211_minstrel_ht.h         | 2 ++
 net/mac80211/rc80211_minstrel_ht_debugfs.c | 2 --
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/rc80211_minstrel_ht.h b/net/mac80211/rc80211_minstrel_ht.h
index 696c0fc6e0b7..462d2b227ed5 100644
--- a/net/mac80211/rc80211_minstrel_ht.h
+++ b/net/mac80211/rc80211_minstrel_ht.h
@@ -29,6 +29,8 @@ struct mcs_group {
 	unsigned int duration[MCS_GROUP_RATES];
 };
 
+extern const struct mcs_group minstrel_mcs_groups[];
+
 struct minstrel_rate_stats {
 	/* current / last sampling period attempts/success counters */
 	unsigned int attempts, last_attempts;
diff --git a/net/mac80211/rc80211_minstrel_ht_debugfs.c b/net/mac80211/rc80211_minstrel_ht_debugfs.c
index 4fb3ccbd8b40..4a5a4b3e7799 100644
--- a/net/mac80211/rc80211_minstrel_ht_debugfs.c
+++ b/net/mac80211/rc80211_minstrel_ht_debugfs.c
@@ -14,8 +14,6 @@
 #include "rc80211_minstrel.h"
 #include "rc80211_minstrel_ht.h"
 
-extern const struct mcs_group minstrel_mcs_groups[];
-
 static int
 minstrel_ht_stats_open(struct inode *inode, struct file *file)
 {
-- 
cgit v1.2.2


From c937019761a758f2749b1f3a032b7a91fb044753 Mon Sep 17 00:00:00 2001
From: "John W. Linville" <linville@tuxdriver.com>
Date: Mon, 21 Jun 2010 17:14:07 -0400
Subject: mac80211: avoid scheduling while atomic in mesh_rx_plink_frame

While mesh_rx_plink_frame holds sta->lock...

mesh_rx_plink_frame ->
	mesh_plink_inc_estab_count ->
		ieee80211_bss_info_change_notify

...but ieee80211_bss_info_change_notify is allowed to sleep.  A driver
taking advantage of that allowance can cause a scheduling while
atomic bug.  Similar paths exist for mesh_plink_dec_estab_count,
so work around those as well.

http://bugzilla.kernel.org/show_bug.cgi?id=16099

Also, correct a minor kerneldoc comment error (mismatched function names).

Signed-off-by: John W. Linville <linville@tuxdriver.com>
Cc: stable@kernel.org
---
 net/mac80211/mesh_plink.c | 42 +++++++++++++++++++++++++++++++-----------
 1 file changed, 31 insertions(+), 11 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/mesh_plink.c b/net/mac80211/mesh_plink.c
index 3cd5f7b5d693..ea13a80a476c 100644
--- a/net/mac80211/mesh_plink.c
+++ b/net/mac80211/mesh_plink.c
@@ -65,7 +65,6 @@ void mesh_plink_inc_estab_count(struct ieee80211_sub_if_data *sdata)
 {
 	atomic_inc(&sdata->u.mesh.mshstats.estab_plinks);
 	mesh_accept_plinks_update(sdata);
-	ieee80211_bss_info_change_notify(sdata, BSS_CHANGED_BEACON);
 }
 
 static inline
@@ -73,7 +72,6 @@ void mesh_plink_dec_estab_count(struct ieee80211_sub_if_data *sdata)
 {
 	atomic_dec(&sdata->u.mesh.mshstats.estab_plinks);
 	mesh_accept_plinks_update(sdata);
-	ieee80211_bss_info_change_notify(sdata, BSS_CHANGED_BEACON);
 }
 
 /**
@@ -115,7 +113,7 @@ static struct sta_info *mesh_plink_alloc(struct ieee80211_sub_if_data *sdata,
 }
 
 /**
- * mesh_plink_deactivate - deactivate mesh peer link
+ * __mesh_plink_deactivate - deactivate mesh peer link
  *
  * @sta: mesh peer link to deactivate
  *
@@ -123,18 +121,23 @@ static struct sta_info *mesh_plink_alloc(struct ieee80211_sub_if_data *sdata,
  *
  * Locking: the caller must hold sta->lock
  */
-static void __mesh_plink_deactivate(struct sta_info *sta)
+static bool __mesh_plink_deactivate(struct sta_info *sta)
 {
 	struct ieee80211_sub_if_data *sdata = sta->sdata;
+	bool deactivated = false;
 
-	if (sta->plink_state == PLINK_ESTAB)
+	if (sta->plink_state == PLINK_ESTAB) {
 		mesh_plink_dec_estab_count(sdata);
+		deactivated = true;
+	}
 	sta->plink_state = PLINK_BLOCKED;
 	mesh_path_flush_by_nexthop(sta);
+
+	return deactivated;
 }
 
 /**
- * __mesh_plink_deactivate - deactivate mesh peer link
+ * mesh_plink_deactivate - deactivate mesh peer link
  *
  * @sta: mesh peer link to deactivate
  *
@@ -142,9 +145,15 @@ static void __mesh_plink_deactivate(struct sta_info *sta)
  */
 void mesh_plink_deactivate(struct sta_info *sta)
 {
+	struct ieee80211_sub_if_data *sdata = sta->sdata;
+	bool deactivated;
+
 	spin_lock_bh(&sta->lock);
-	__mesh_plink_deactivate(sta);
+	deactivated = __mesh_plink_deactivate(sta);
 	spin_unlock_bh(&sta->lock);
+
+	if (deactivated)
+		ieee80211_bss_info_change_notify(sdata, BSS_CHANGED_BEACON);
 }
 
 static int mesh_plink_frame_tx(struct ieee80211_sub_if_data *sdata,
@@ -381,10 +390,16 @@ int mesh_plink_open(struct sta_info *sta)
 
 void mesh_plink_block(struct sta_info *sta)
 {
+	struct ieee80211_sub_if_data *sdata = sta->sdata;
+	bool deactivated;
+
 	spin_lock_bh(&sta->lock);
-	__mesh_plink_deactivate(sta);
+	deactivated = __mesh_plink_deactivate(sta);
 	sta->plink_state = PLINK_BLOCKED;
 	spin_unlock_bh(&sta->lock);
+
+	if (deactivated)
+		ieee80211_bss_info_change_notify(sdata, BSS_CHANGED_BEACON);
 }
 
 
@@ -397,6 +412,7 @@ void mesh_rx_plink_frame(struct ieee80211_sub_if_data *sdata, struct ieee80211_m
 	enum plink_event event;
 	enum plink_frame_type ftype;
 	size_t baselen;
+	bool deactivated;
 	u8 ie_len;
 	u8 *baseaddr;
 	__le16 plid, llid, reason;
@@ -651,8 +667,9 @@ void mesh_rx_plink_frame(struct ieee80211_sub_if_data *sdata, struct ieee80211_m
 		case CNF_ACPT:
 			del_timer(&sta->plink_timer);
 			sta->plink_state = PLINK_ESTAB;
-			mesh_plink_inc_estab_count(sdata);
 			spin_unlock_bh(&sta->lock);
+			mesh_plink_inc_estab_count(sdata);
+			ieee80211_bss_info_change_notify(sdata, BSS_CHANGED_BEACON);
 			mpl_dbg("Mesh plink with %pM ESTABLISHED\n",
 				sta->sta.addr);
 			break;
@@ -684,8 +701,9 @@ void mesh_rx_plink_frame(struct ieee80211_sub_if_data *sdata, struct ieee80211_m
 		case OPN_ACPT:
 			del_timer(&sta->plink_timer);
 			sta->plink_state = PLINK_ESTAB;
-			mesh_plink_inc_estab_count(sdata);
 			spin_unlock_bh(&sta->lock);
+			mesh_plink_inc_estab_count(sdata);
+			ieee80211_bss_info_change_notify(sdata, BSS_CHANGED_BEACON);
 			mpl_dbg("Mesh plink with %pM ESTABLISHED\n",
 				sta->sta.addr);
 			mesh_plink_frame_tx(sdata, PLINK_CONFIRM, sta->sta.addr, llid,
@@ -702,11 +720,13 @@ void mesh_rx_plink_frame(struct ieee80211_sub_if_data *sdata, struct ieee80211_m
 		case CLS_ACPT:
 			reason = cpu_to_le16(MESH_CLOSE_RCVD);
 			sta->reason = reason;
-			__mesh_plink_deactivate(sta);
+			deactivated = __mesh_plink_deactivate(sta);
 			sta->plink_state = PLINK_HOLDING;
 			llid = sta->llid;
 			mod_plink_timer(sta, dot11MeshHoldingTimeout(sdata));
 			spin_unlock_bh(&sta->lock);
+			if (deactivated)
+				ieee80211_bss_info_change_notify(sdata, BSS_CHANGED_BEACON);
 			mesh_plink_frame_tx(sdata, PLINK_CLOSE, sta->sta.addr, llid,
 					    plid, reason);
 			break;
-- 
cgit v1.2.2


From fa61cf70a6ae1089e459e4b59b2e8d8e90d8535e Mon Sep 17 00:00:00 2001
From: Juuso Oikarinen <juuso.oikarinen@nokia.com>
Date: Wed, 23 Jun 2010 12:12:37 +0300
Subject: cfg80211/mac80211: Update set_tx_power to use mBm instead of dBm
 units

In preparation for a TX power setting interface in the nl80211, change the
.set_tx_power function to use mBm units instead of dBm for greater accuracy and
smaller power levels.

Also, already in advance move the tx_power_setting enumeration to nl80211.

This change affects the .tx_set_power function prototype. As a result, the
corresponding changes are needed to modules using it. These are mac80211,
iwmc3200wifi and rndis_wlan.

Cc: Samuel Ortiz <samuel.ortiz@intel.com>
Cc: Jussi Kivilinna <jussi.kivilinna@mbnet.fi>
Signed-off-by: Juuso Oikarinen <juuso.oikarinen@nokia.com>
Acked-by: Samuel Ortiz <samuel.ortiz@intel.com>
Acked-by: Jussi Kivilinna <jussi.kivilinna@mbnet.fi>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/cfg.c         | 22 +++++++++++-----------
 net/wireless/wext-compat.c | 10 +++++-----
 2 files changed, 16 insertions(+), 16 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c
index 003b6addf5fa..f4efbfa4f237 100644
--- a/net/mac80211/cfg.c
+++ b/net/mac80211/cfg.c
@@ -1329,28 +1329,28 @@ static int ieee80211_set_wiphy_params(struct wiphy *wiphy, u32 changed)
 }
 
 static int ieee80211_set_tx_power(struct wiphy *wiphy,
-				  enum tx_power_setting type, int dbm)
+				  enum nl80211_tx_power_setting type, int mbm)
 {
 	struct ieee80211_local *local = wiphy_priv(wiphy);
 	struct ieee80211_channel *chan = local->hw.conf.channel;
 	u32 changes = 0;
 
 	switch (type) {
-	case TX_POWER_AUTOMATIC:
+	case NL80211_TX_POWER_AUTOMATIC:
 		local->user_power_level = -1;
 		break;
-	case TX_POWER_LIMITED:
-		if (dbm < 0)
-			return -EINVAL;
-		local->user_power_level = dbm;
+	case NL80211_TX_POWER_LIMITED:
+		if (mbm < 0 || (mbm % 100))
+			return -EOPNOTSUPP;
+		local->user_power_level = MBM_TO_DBM(mbm);
 		break;
-	case TX_POWER_FIXED:
-		if (dbm < 0)
-			return -EINVAL;
+	case NL80211_TX_POWER_FIXED:
+		if (mbm < 0 || (mbm % 100))
+			return -EOPNOTSUPP;
 		/* TODO: move to cfg80211 when it knows the channel */
-		if (dbm > chan->max_power)
+		if (MBM_TO_DBM(mbm) > chan->max_power)
 			return -EINVAL;
-		local->user_power_level = dbm;
+		local->user_power_level = MBM_TO_DBM(mbm);
 		break;
 	}
 
diff --git a/net/wireless/wext-compat.c b/net/wireless/wext-compat.c
index 96342993cf93..1ff1e9f49136 100644
--- a/net/wireless/wext-compat.c
+++ b/net/wireless/wext-compat.c
@@ -829,7 +829,7 @@ int cfg80211_wext_siwtxpower(struct net_device *dev,
 {
 	struct wireless_dev *wdev = dev->ieee80211_ptr;
 	struct cfg80211_registered_device *rdev = wiphy_to_dev(wdev->wiphy);
-	enum tx_power_setting type;
+	enum nl80211_tx_power_setting type;
 	int dbm = 0;
 
 	if ((data->txpower.flags & IW_TXPOW_TYPE) != IW_TXPOW_DBM)
@@ -852,7 +852,7 @@ int cfg80211_wext_siwtxpower(struct net_device *dev,
 			if (data->txpower.value < 0)
 				return -EINVAL;
 			dbm = data->txpower.value;
-			type = TX_POWER_FIXED;
+			type = NL80211_TX_POWER_FIXED;
 			/* TODO: do regulatory check! */
 		} else {
 			/*
@@ -860,10 +860,10 @@ int cfg80211_wext_siwtxpower(struct net_device *dev,
 			 * passed in from userland.
 			 */
 			if (data->txpower.value < 0) {
-				type = TX_POWER_AUTOMATIC;
+				type = NL80211_TX_POWER_AUTOMATIC;
 			} else {
 				dbm = data->txpower.value;
-				type = TX_POWER_LIMITED;
+				type = NL80211_TX_POWER_LIMITED;
 			}
 		}
 	} else {
@@ -872,7 +872,7 @@ int cfg80211_wext_siwtxpower(struct net_device *dev,
 		return 0;
 	}
 
-	return rdev->ops->set_tx_power(wdev->wiphy, type, dbm);
+	return rdev->ops->set_tx_power(wdev->wiphy, type, DBM_TO_MBM(dbm));
 }
 EXPORT_SYMBOL_GPL(cfg80211_wext_siwtxpower);
 
-- 
cgit v1.2.2


From 98d2ff8bec82fc35fe2008a187a5fef9241dab10 Mon Sep 17 00:00:00 2001
From: Juuso Oikarinen <juuso.oikarinen@nokia.com>
Date: Wed, 23 Jun 2010 12:12:38 +0300
Subject: nl80211: Add option to adjust transmit power

This patch adds transmit power setting type and transmit power level attributes
to NL80211_CMD_SET_WIPHY in order to facilitate adjusting of the transmit power
level of the device.

The added attributes allow selection of automatic, limited or fixed transmit
power level, with the level definable in signed mBm format.

Signed-off-by: Juuso Oikarinen <juuso.oikarinen@nokia.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/wireless/nl80211.c | 31 +++++++++++++++++++++++++++++++
 1 file changed, 31 insertions(+)

(limited to 'net')

diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index 41529aca794c..a999fc154623 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -153,6 +153,9 @@ static const struct nla_policy nl80211_policy[NL80211_ATTR_MAX+1] = {
 	[NL80211_ATTR_CQM] = { .type = NLA_NESTED, },
 	[NL80211_ATTR_LOCAL_STATE_CHANGE] = { .type = NLA_FLAG },
 	[NL80211_ATTR_AP_ISOLATE] = { .type = NLA_U8 },
+
+	[NL80211_ATTR_WIPHY_TX_POWER_SETTING] = { .type = NLA_U32 },
+	[NL80211_ATTR_WIPHY_TX_POWER_LEVEL] = { .type = NLA_U32 },
 };
 
 /* policy for the attributes */
@@ -869,6 +872,34 @@ static int nl80211_set_wiphy(struct sk_buff *skb, struct genl_info *info)
 			goto bad_res;
 	}
 
+	if (info->attrs[NL80211_ATTR_WIPHY_TX_POWER_SETTING]) {
+		enum nl80211_tx_power_setting type;
+		int idx, mbm = 0;
+
+		if (!rdev->ops->set_tx_power) {
+			return -EOPNOTSUPP;
+			goto bad_res;
+		}
+
+		idx = NL80211_ATTR_WIPHY_TX_POWER_SETTING;
+		type = nla_get_u32(info->attrs[idx]);
+
+		if (!info->attrs[NL80211_ATTR_WIPHY_TX_POWER_LEVEL] &&
+		    (type != NL80211_TX_POWER_AUTOMATIC)) {
+			result = -EINVAL;
+			goto bad_res;
+		}
+
+		if (type != NL80211_TX_POWER_AUTOMATIC) {
+			idx = NL80211_ATTR_WIPHY_TX_POWER_LEVEL;
+			mbm = nla_get_u32(info->attrs[idx]);
+		}
+
+		result = rdev->ops->set_tx_power(&rdev->wiphy, type, mbm);
+		if (result)
+			goto bad_res;
+	}
+
 	changed = 0;
 
 	if (info->attrs[NL80211_ATTR_WIPHY_RETRY_SHORT]) {
-- 
cgit v1.2.2


From b1312c89f0016f778cac4f1536f1434e132f8713 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Timo=20Ter=C3=A4s?= <timo.teras@iki.fi>
Date: Thu, 24 Jun 2010 14:35:00 -0700
Subject: xfrm: check bundle policy existance before dereferencing it
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Fix the bundle validation code to not assume having a valid policy.
When we have multiple transformations for a xfrm policy, the bundle
instance will be a chain of bundles with only the first one having
the policy reference. When policy_genid is bumped it will expire the
first bundle in the chain which is equivalent of expiring the whole
chain.

Reported-bisected-and-tested-by: Justin P. Mattock <justinmattock@gmail.com>
Signed-off-by: Timo Teräs <timo.teras@iki.fi>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/xfrm/xfrm_policy.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index 4bf27d901333..af1c173be4ad 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -2300,7 +2300,8 @@ int xfrm_bundle_ok(struct xfrm_policy *pol, struct xfrm_dst *first,
 			return 0;
 		if (xdst->xfrm_genid != dst->xfrm->genid)
 			return 0;
-		if (xdst->policy_genid != atomic_read(&xdst->pols[0]->genid))
+		if (xdst->num_pols > 0 &&
+		    xdst->policy_genid != atomic_read(&xdst->pols[0]->genid))
 			return 0;
 
 		if (strict && fl &&
-- 
cgit v1.2.2


From deb0d7c740a008a4e26806317497549b0e8907eb Mon Sep 17 00:00:00 2001
From: Andrew Morton <akpm@linux-foundation.org>
Date: Thu, 24 Jun 2010 20:33:04 -0700
Subject: net: fix "netpoll: Allow netpoll_setup/cleanup recursion"

Remove rtnl_unlock() which had no corresponding rtnl_lock().

Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Acked-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/netpoll.c | 1 -
 1 file changed, 1 deletion(-)

(limited to 'net')

diff --git a/net/core/netpoll.c b/net/core/netpoll.c
index 560297ee55b4..ca6dc31843ea 100644
--- a/net/core/netpoll.c
+++ b/net/core/netpoll.c
@@ -748,7 +748,6 @@ int __netpoll_setup(struct netpoll *np)
 
 	/* last thing to do is link it to the net device structure */
 	rcu_assign_pointer(ndev->npinfo, npinfo);
-	rtnl_unlock();
 
 	return 0;
 
-- 
cgit v1.2.2


From 565b7b2d2e632b5792879c0c9cccdd9eecd31195 Mon Sep 17 00:00:00 2001
From: Konstantin Khorenko <khorenko@openvz.org>
Date: Thu, 24 Jun 2010 21:54:58 -0700
Subject: tcp: do not send reset to already closed sockets

i've found that tcp_close() can be called for an already closed
socket, but still sends reset in this case (tcp_send_active_reset())
which seems to be incorrect.  Moreover, a packet with reset is sent
with different source port as original port number has been already
cleared on socket.  Besides that incrementing stat counter for
LINUX_MIB_TCPABORTONCLOSE also does not look correct in this case.

Initially this issue was found on 2.6.18-x RHEL5 kernel, but the same
seems to be true for the current mainstream kernel (checked on
2.6.35-rc3).  Please, correct me if i missed something.

How that happens:

1) the server receives a packet for socket in TCP_CLOSE_WAIT state
   that triggers a tcp_reset():

Call Trace:
 <IRQ>  [<ffffffff8025b9b9>] tcp_reset+0x12f/0x1e8
 [<ffffffff80046125>] tcp_rcv_state_process+0x1c0/0xa08
 [<ffffffff8003eb22>] tcp_v4_do_rcv+0x310/0x37a
 [<ffffffff80028bea>] tcp_v4_rcv+0x74d/0xb43
 [<ffffffff8024ef4c>] ip_local_deliver_finish+0x0/0x259
 [<ffffffff80037131>] ip_local_deliver+0x200/0x2f4
 [<ffffffff8003843c>] ip_rcv+0x64c/0x69f
 [<ffffffff80021d89>] netif_receive_skb+0x4c4/0x4fa
 [<ffffffff80032eca>] process_backlog+0x90/0xec
 [<ffffffff8000cc50>] net_rx_action+0xbb/0x1f1
 [<ffffffff80012d3a>] __do_softirq+0xf5/0x1ce
 [<ffffffff8001147a>] handle_IRQ_event+0x56/0xb0
 [<ffffffff8006334c>] call_softirq+0x1c/0x28
 [<ffffffff80070476>] do_softirq+0x2c/0x85
 [<ffffffff80070441>] do_IRQ+0x149/0x152
 [<ffffffff80062665>] ret_from_intr+0x0/0xa
 <EOI>  [<ffffffff80008a2e>] __handle_mm_fault+0x6cd/0x1303
 [<ffffffff80008903>] __handle_mm_fault+0x5a2/0x1303
 [<ffffffff80033a9d>] cache_free_debugcheck+0x21f/0x22e
 [<ffffffff8006a263>] do_page_fault+0x49a/0x7dc
 [<ffffffff80066487>] thread_return+0x89/0x174
 [<ffffffff800c5aee>] audit_syscall_exit+0x341/0x35c
 [<ffffffff80062e39>] error_exit+0x0/0x84

tcp_rcv_state_process()
...  // (sk_state == TCP_CLOSE_WAIT here)
...
        /* step 2: check RST bit */
        if(th->rst) {
                tcp_reset(sk);
                goto discard;
        }
...
---------------------------------
tcp_rcv_state_process
 tcp_reset
  tcp_done
   tcp_set_state(sk, TCP_CLOSE);
     inet_put_port
      __inet_put_port
       inet_sk(sk)->num = 0;

   sk->sk_shutdown = SHUTDOWN_MASK;

2) After that the process (socket owner) tries to write something to
   that socket and "inet_autobind" sets a _new_ (which differs from
   the original!) port number for the socket:

 Call Trace:
  [<ffffffff80255a12>] inet_bind_hash+0x33/0x5f
  [<ffffffff80257180>] inet_csk_get_port+0x216/0x268
  [<ffffffff8026bcc9>] inet_autobind+0x22/0x8f
  [<ffffffff80049140>] inet_sendmsg+0x27/0x57
  [<ffffffff8003a9d9>] do_sock_write+0xae/0xea
  [<ffffffff80226ac7>] sock_writev+0xdc/0xf6
  [<ffffffff800680c7>] _spin_lock_irqsave+0x9/0xe
  [<ffffffff8001fb49>] __pollwait+0x0/0xdd
  [<ffffffff8008d533>] default_wake_function+0x0/0xe
  [<ffffffff800a4f10>] autoremove_wake_function+0x0/0x2e
  [<ffffffff800f0b49>] do_readv_writev+0x163/0x274
  [<ffffffff80066538>] thread_return+0x13a/0x174
  [<ffffffff800145d8>] tcp_poll+0x0/0x1c9
  [<ffffffff800c56d3>] audit_syscall_entry+0x180/0x1b3
  [<ffffffff800f0dd0>] sys_writev+0x49/0xe4
  [<ffffffff800622dd>] tracesys+0xd5/0xe0

3) sendmsg fails at last with -EPIPE (=> 'write' returns -EPIPE in userspace):

F: tcp_sendmsg1 -EPIPE: sk=ffff81000bda00d0, sport=49847, old_state=7, new_state=7, sk_err=0, sk_shutdown=3

Call Trace:
 [<ffffffff80027557>] tcp_sendmsg+0xcb/0xe87
 [<ffffffff80033300>] release_sock+0x10/0xae
 [<ffffffff8016f20f>] vgacon_cursor+0x0/0x1a7
 [<ffffffff8026bd32>] inet_autobind+0x8b/0x8f
 [<ffffffff8003a9d9>] do_sock_write+0xae/0xea
 [<ffffffff80226ac7>] sock_writev+0xdc/0xf6
 [<ffffffff800680c7>] _spin_lock_irqsave+0x9/0xe
 [<ffffffff8001fb49>] __pollwait+0x0/0xdd
 [<ffffffff8008d533>] default_wake_function+0x0/0xe
 [<ffffffff800a4f10>] autoremove_wake_function+0x0/0x2e
 [<ffffffff800f0b49>] do_readv_writev+0x163/0x274
 [<ffffffff80066538>] thread_return+0x13a/0x174
 [<ffffffff800145d8>] tcp_poll+0x0/0x1c9
 [<ffffffff800c56d3>] audit_syscall_entry+0x180/0x1b3
 [<ffffffff800f0dd0>] sys_writev+0x49/0xe4
 [<ffffffff800622dd>] tracesys+0xd5/0xe0

tcp_sendmsg()
...
        /* Wait for a connection to finish. */
        if ((1 << sk->sk_state) & ~(TCPF_ESTABLISHED | TCPF_CLOSE_WAIT)) {
                int old_state = sk->sk_state;
                if ((err = sk_stream_wait_connect(sk, &timeo)) != 0) {
if (f_d && (err == -EPIPE)) {
        printk("F: tcp_sendmsg1 -EPIPE: sk=%p, sport=%u, old_state=%d, new_state=%d, "
                "sk_err=%d, sk_shutdown=%d\n",
                sk, ntohs(inet_sk(sk)->sport), old_state, sk->sk_state,
                sk->sk_err, sk->sk_shutdown);
        dump_stack();
}
                        goto out_err;
                }
        }
...

4) Then the process (socket owner) understands that it's time to close
   that socket and does that (and thus triggers sending reset packet):

Call Trace:
...
 [<ffffffff80032077>] dev_queue_xmit+0x343/0x3d6
 [<ffffffff80034698>] ip_output+0x351/0x384
 [<ffffffff80251ae9>] dst_output+0x0/0xe
 [<ffffffff80036ec6>] ip_queue_xmit+0x567/0x5d2
 [<ffffffff80095700>] vprintk+0x21/0x33
 [<ffffffff800070f0>] check_poison_obj+0x2e/0x206
 [<ffffffff80013587>] poison_obj+0x36/0x45
 [<ffffffff8025dea6>] tcp_send_active_reset+0x15/0x14d
 [<ffffffff80023481>] dbg_redzone1+0x1c/0x25
 [<ffffffff8025dea6>] tcp_send_active_reset+0x15/0x14d
 [<ffffffff8000ca94>] cache_alloc_debugcheck_after+0x189/0x1c8
 [<ffffffff80023405>] tcp_transmit_skb+0x764/0x786
 [<ffffffff8025df8a>] tcp_send_active_reset+0xf9/0x14d
 [<ffffffff80258ff1>] tcp_close+0x39a/0x960
 [<ffffffff8026be12>] inet_release+0x69/0x80
 [<ffffffff80059b31>] sock_release+0x4f/0xcf
 [<ffffffff80059d4c>] sock_close+0x2c/0x30
 [<ffffffff800133c9>] __fput+0xac/0x197
 [<ffffffff800252bc>] filp_close+0x59/0x61
 [<ffffffff8001eff6>] sys_close+0x85/0xc7
 [<ffffffff800622dd>] tracesys+0xd5/0xe0

So, in brief:

* a received packet for socket in TCP_CLOSE_WAIT state triggers
  tcp_reset() which clears inet_sk(sk)->num and put socket into
  TCP_CLOSE state

* an attempt to write to that socket forces inet_autobind() to get a
  new port (but the write itself fails with -EPIPE)

* tcp_close() called for socket in TCP_CLOSE state sends an active
  reset via socket with newly allocated port

This adds an additional check in tcp_close() for already closed
sockets. We do not want to send anything to closed sockets.

Signed-off-by: Konstantin Khorenko <khorenko@openvz.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp.c | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'net')

diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 779d40c3b96e..d5f84bd5a455 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -1898,6 +1898,10 @@ void tcp_close(struct sock *sk, long timeout)
 
 	sk_mem_reclaim(sk);
 
+	/* If socket has been already reset (e.g. in tcp_reset()) - kill it. */
+	if (sk->sk_state == TCP_CLOSE)
+		goto adjudge_to_death;
+
 	/* As outlined in RFC 2525, section 2.17, we send a RST here because
 	 * data was lost. To witness the awful effects of the old behavior of
 	 * always doing a FIN, run an older 2.1.x kernel or 2.0.x, start a bulk
-- 
cgit v1.2.2


From 1a61a83ff59378a5613d8c706c4a660c353b62a8 Mon Sep 17 00:00:00 2001
From: "Gustavo F. Padovan" <padovan@profusion.mobi>
Date: Fri, 18 Jun 2010 14:24:00 +0000
Subject: Bluetooth: Bring back var 'i' increment

commit ff6e2163f28a1094fb5ca5950fe2b43c3cf6bc7a accidentally added a
regression on the bnep code. Fixing it.

Signed-off-by: Gustavo F. Padovan <padovan@profusion.mobi>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/bluetooth/bnep/netdev.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'net')

diff --git a/net/bluetooth/bnep/netdev.c b/net/bluetooth/bnep/netdev.c
index 0faad5ce6dc4..8c100c9dae28 100644
--- a/net/bluetooth/bnep/netdev.c
+++ b/net/bluetooth/bnep/netdev.c
@@ -104,6 +104,8 @@ static void bnep_net_set_mc_list(struct net_device *dev)
 				break;
 			memcpy(__skb_put(skb, ETH_ALEN), ha->addr, ETH_ALEN);
 			memcpy(__skb_put(skb, ETH_ALEN), ha->addr, ETH_ALEN);
+
+			i++;
 		}
 		r->len = htons(skb->len - len);
 	}
-- 
cgit v1.2.2


From a8756201ba4189bca3ee1a6ec4e290f467ee09ab Mon Sep 17 00:00:00 2001
From: Tim Gardner <tim.gardner@canonical.com>
Date: Fri, 25 Jun 2010 14:44:07 +0200
Subject: netfilter: xt_connbytes: Force CT accounting to be enabled

Check at rule install time that CT accounting is enabled. Force it
to be enabled if not while also emitting a warning since this is not
the default state.

This is in preparation for deprecating CONFIG_NF_CT_ACCT upon which
CONFIG_NETFILTER_XT_MATCH_CONNBYTES depended being set.

Added 2 CT accounting support functions:

nf_ct_acct_enabled() - Get CT accounting state.
nf_ct_set_acct() - Enable/disable CT accountuing.

Signed-off-by: Tim Gardner <tim.gardner@canonical.com>
Acked-by: Jan Engelhardt <jengelh@medozas.de>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 net/netfilter/xt_connbytes.c | 10 ++++++++++
 1 file changed, 10 insertions(+)

(limited to 'net')

diff --git a/net/netfilter/xt_connbytes.c b/net/netfilter/xt_connbytes.c
index 73517835303d..5b138506690e 100644
--- a/net/netfilter/xt_connbytes.c
+++ b/net/netfilter/xt_connbytes.c
@@ -112,6 +112,16 @@ static int connbytes_mt_check(const struct xt_mtchk_param *par)
 	if (ret < 0)
 		pr_info("cannot load conntrack support for proto=%u\n",
 			par->family);
+
+	/*
+	 * This filter cannot function correctly unless connection tracking
+	 * accounting is enabled, so complain in the hope that someone notices.
+	 */
+	if (!nf_ct_acct_enabled(par->net)) {
+		pr_warning("Forcing CT accounting to be enabled\n");
+		nf_ct_set_acct(par->net, true);
+	}
+
 	return ret;
 }
 
-- 
cgit v1.2.2


From d70a011dbbaa6335a19deb63ec3eb613f48faafd Mon Sep 17 00:00:00 2001
From: Tim Gardner <tim.gardner@canonical.com>
Date: Fri, 25 Jun 2010 14:46:56 +0200
Subject: netfilter: complete the deprecation of CONFIG_NF_CT_ACCT

CONFIG_NF_CT_ACCT has been deprecated for awhile and
was originally scheduled for removal by 2.6.29.

Removing support for this config option also stops
this deprecation warning message in the kernel log.

[   61.669627] nf_conntrack version 0.5.0 (16384 buckets, 65536 max)
[   61.669850] CONFIG_NF_CT_ACCT is deprecated and will be removed soon. Please use
[   61.669852] nf_conntrack.acct=1 kernel parameter, acct=1 nf_conntrack module option or
[   61.669853] sysctl net.netfilter.nf_conntrack_acct=1 to enable it.

Signed-off-by: Tim Gardner <tim.gardner@canonical.com>
[Patrick: changed default value to 0]
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 net/netfilter/Kconfig             | 22 ----------------------
 net/netfilter/nf_conntrack_acct.c | 14 +-------------
 2 files changed, 1 insertion(+), 35 deletions(-)

(limited to 'net')

diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig
index 21be53598987..aa2f106347e4 100644
--- a/net/netfilter/Kconfig
+++ b/net/netfilter/Kconfig
@@ -40,27 +40,6 @@ config NF_CONNTRACK
 
 if NF_CONNTRACK
 
-config NF_CT_ACCT
-	bool "Connection tracking flow accounting"
-	depends on NETFILTER_ADVANCED
-	help
-	  If this option is enabled, the connection tracking code will
-	  keep per-flow packet and byte counters.
-
-	  Those counters can be used for flow-based accounting or the
-	  `connbytes' match.
-
-	  Please note that currently this option only sets a default state.
-	  You may change it at boot time with nf_conntrack.acct=0/1 kernel
-	  parameter or by loading the nf_conntrack module with acct=0/1.
-
-	  You may also disable/enable it on a running system with:
-	   sysctl net.netfilter.nf_conntrack_acct=0/1
-
-	  This option will be removed in 2.6.29.
-
-	  If unsure, say `N'.
-
 config NF_CONNTRACK_MARK
 	bool  'Connection mark tracking support'
 	depends on NETFILTER_ADVANCED
@@ -630,7 +609,6 @@ config NETFILTER_XT_MATCH_CONNBYTES
 	tristate  '"connbytes" per-connection counter match support'
 	depends on NF_CONNTRACK
 	depends on NETFILTER_ADVANCED
-	select NF_CT_ACCT
 	help
 	  This option adds a `connbytes' match, which allows you to match the
 	  number of bytes and/or packets for each direction within a connection.
diff --git a/net/netfilter/nf_conntrack_acct.c b/net/netfilter/nf_conntrack_acct.c
index ab81b380eae6..5178c691ecbf 100644
--- a/net/netfilter/nf_conntrack_acct.c
+++ b/net/netfilter/nf_conntrack_acct.c
@@ -17,13 +17,7 @@
 #include <net/netfilter/nf_conntrack_extend.h>
 #include <net/netfilter/nf_conntrack_acct.h>
 
-#ifdef CONFIG_NF_CT_ACCT
-#define NF_CT_ACCT_DEFAULT 1
-#else
-#define NF_CT_ACCT_DEFAULT 0
-#endif
-
-static int nf_ct_acct __read_mostly = NF_CT_ACCT_DEFAULT;
+static int nf_ct_acct __read_mostly;
 
 module_param_named(acct, nf_ct_acct, bool, 0644);
 MODULE_PARM_DESC(acct, "Enable connection tracking flow accounting.");
@@ -114,12 +108,6 @@ int nf_conntrack_acct_init(struct net *net)
 	net->ct.sysctl_acct = nf_ct_acct;
 
 	if (net_eq(net, &init_net)) {
-#ifdef CONFIG_NF_CT_ACCT
-	printk(KERN_WARNING "CONFIG_NF_CT_ACCT is deprecated and will be removed soon. Please use\n");
-		printk(KERN_WARNING "nf_conntrack.acct=1 kernel parameter, acct=1 nf_conntrack module option or\n");
-		printk(KERN_WARNING "sysctl net.netfilter.nf_conntrack_acct=1 to enable it.\n");
-#endif
-
 		ret = nf_ct_extend_register(&acct_extend);
 		if (ret < 0) {
 			printk(KERN_ERR "nf_conntrack_acct: Unable to register extension\n");
-- 
cgit v1.2.2


From 9f888160bdcccf0565dd2774956b8d9456e610be Mon Sep 17 00:00:00 2001
From: stephen hemminger <shemminger@vyatta.com>
Date: Mon, 21 Jun 2010 11:00:13 +0000
Subject: ipv6: fix NULL reference in proxy neighbor discovery

The addition of TLLAO option created a kernel OOPS regression
for the case where neighbor advertisement is being sent via
proxy path.  When using proxy, ipv6_get_ifaddr() returns NULL
causing the NULL dereference.

Change causing the bug was:
commit f7734fdf61ec6bb848e0bafc1fb8bad2c124bb50
Author: Octavian Purdila <opurdila@ixiacom.com>
Date:   Fri Oct 2 11:39:15 2009 +0000

    make TLLAO option for NA packets configurable

Signed-off-by: Stephen Hemminger <shemminger@vyatta.com>
Acked-by: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/ndisc.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c
index 0abdc242ddb7..2efef52fb461 100644
--- a/net/ipv6/ndisc.c
+++ b/net/ipv6/ndisc.c
@@ -586,6 +586,7 @@ static void ndisc_send_na(struct net_device *dev, struct neighbour *neigh,
 		src_addr = solicited_addr;
 		if (ifp->flags & IFA_F_OPTIMISTIC)
 			override = 0;
+		inc_opt |= ifp->idev->cnf.force_tllao;
 		in6_ifa_put(ifp);
 	} else {
 		if (ipv6_dev_get_saddr(dev_net(dev), dev, daddr,
@@ -599,7 +600,6 @@ static void ndisc_send_na(struct net_device *dev, struct neighbour *neigh,
 	icmp6h.icmp6_solicited = solicited;
 	icmp6h.icmp6_override = override;
 
-	inc_opt |= ifp->idev->cnf.force_tllao;
 	__ndisc_send(dev, neigh, daddr, src_addr,
 		     &icmp6h, solicited_addr,
 		     inc_opt ? ND_OPT_TARGET_LL_ADDR : 0);
-- 
cgit v1.2.2


From 01f2f3f6ef4d076c0c10a8a7b42624416d56b523 Mon Sep 17 00:00:00 2001
From: Hagen Paul Pfeifer <hagen@jauu.net>
Date: Sat, 19 Jun 2010 17:05:36 +0000
Subject: net: optimize Berkeley Packet Filter (BPF) processing

Gcc is currenlty not in the ability to optimize the switch statement in
sk_run_filter() because of dense case labels. This patch replace the
OR'd labels with ordered sequenced case labels. The sk_chk_filter()
function is modified to patch/replace the original OPCODES in a
ordered but equivalent form. gcc is now in the ability to transform the
switch statement in sk_run_filter into a jump table of complexity O(1).

Until this patch gcc generates a sequence of conditional branches (O(n) of 567
byte .text segment size (arch x86_64):

7ff: 8b 06                 mov    (%rsi),%eax
801: 66 83 f8 35           cmp    $0x35,%ax
805: 0f 84 d0 02 00 00     je     adb <sk_run_filter+0x31d>
80b: 0f 87 07 01 00 00     ja     918 <sk_run_filter+0x15a>
811: 66 83 f8 15           cmp    $0x15,%ax
815: 0f 84 c5 02 00 00     je     ae0 <sk_run_filter+0x322>
81b: 77 73                 ja     890 <sk_run_filter+0xd2>
81d: 66 83 f8 04           cmp    $0x4,%ax
821: 0f 84 17 02 00 00     je     a3e <sk_run_filter+0x280>
827: 77 29                 ja     852 <sk_run_filter+0x94>
829: 66 83 f8 01           cmp    $0x1,%ax
[...]

With the modification the compiler translate the switch statement into
the following jump table fragment:

7ff: 66 83 3e 2c           cmpw   $0x2c,(%rsi)
803: 0f 87 1f 02 00 00     ja     a28 <sk_run_filter+0x26a>
809: 0f b7 06              movzwl (%rsi),%eax
80c: ff 24 c5 00 00 00 00  jmpq   *0x0(,%rax,8)
813: 44 89 e3              mov    %r12d,%ebx
816: e9 43 03 00 00        jmpq   b5e <sk_run_filter+0x3a0>
81b: 41 89 dc              mov    %ebx,%r12d
81e: e9 3b 03 00 00        jmpq   b5e <sk_run_filter+0x3a0>

Furthermore, I reordered the instructions to reduce cache line misses by
order the most common instruction to the start.

Signed-off-by: Hagen Paul Pfeifer <hagen@jauu.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/filter.c | 212 +++++++++++++++++++++++++++++++++++++++++-------------
 1 file changed, 161 insertions(+), 51 deletions(-)

(limited to 'net')

diff --git a/net/core/filter.c b/net/core/filter.c
index da69fb728d32..52b051f82a01 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -128,87 +128,87 @@ unsigned int sk_run_filter(struct sk_buff *skb, struct sock_filter *filter, int
 		fentry = &filter[pc];
 
 		switch (fentry->code) {
-		case BPF_ALU|BPF_ADD|BPF_X:
+		case BPF_S_ALU_ADD_X:
 			A += X;
 			continue;
-		case BPF_ALU|BPF_ADD|BPF_K:
+		case BPF_S_ALU_ADD_K:
 			A += fentry->k;
 			continue;
-		case BPF_ALU|BPF_SUB|BPF_X:
+		case BPF_S_ALU_SUB_X:
 			A -= X;
 			continue;
-		case BPF_ALU|BPF_SUB|BPF_K:
+		case BPF_S_ALU_SUB_K:
 			A -= fentry->k;
 			continue;
-		case BPF_ALU|BPF_MUL|BPF_X:
+		case BPF_S_ALU_MUL_X:
 			A *= X;
 			continue;
-		case BPF_ALU|BPF_MUL|BPF_K:
+		case BPF_S_ALU_MUL_K:
 			A *= fentry->k;
 			continue;
-		case BPF_ALU|BPF_DIV|BPF_X:
+		case BPF_S_ALU_DIV_X:
 			if (X == 0)
 				return 0;
 			A /= X;
 			continue;
-		case BPF_ALU|BPF_DIV|BPF_K:
+		case BPF_S_ALU_DIV_K:
 			A /= fentry->k;
 			continue;
-		case BPF_ALU|BPF_AND|BPF_X:
+		case BPF_S_ALU_AND_X:
 			A &= X;
 			continue;
-		case BPF_ALU|BPF_AND|BPF_K:
+		case BPF_S_ALU_AND_K:
 			A &= fentry->k;
 			continue;
-		case BPF_ALU|BPF_OR|BPF_X:
+		case BPF_S_ALU_OR_X:
 			A |= X;
 			continue;
-		case BPF_ALU|BPF_OR|BPF_K:
+		case BPF_S_ALU_OR_K:
 			A |= fentry->k;
 			continue;
-		case BPF_ALU|BPF_LSH|BPF_X:
+		case BPF_S_ALU_LSH_X:
 			A <<= X;
 			continue;
-		case BPF_ALU|BPF_LSH|BPF_K:
+		case BPF_S_ALU_LSH_K:
 			A <<= fentry->k;
 			continue;
-		case BPF_ALU|BPF_RSH|BPF_X:
+		case BPF_S_ALU_RSH_X:
 			A >>= X;
 			continue;
-		case BPF_ALU|BPF_RSH|BPF_K:
+		case BPF_S_ALU_RSH_K:
 			A >>= fentry->k;
 			continue;
-		case BPF_ALU|BPF_NEG:
+		case BPF_S_ALU_NEG:
 			A = -A;
 			continue;
-		case BPF_JMP|BPF_JA:
+		case BPF_S_JMP_JA:
 			pc += fentry->k;
 			continue;
-		case BPF_JMP|BPF_JGT|BPF_K:
+		case BPF_S_JMP_JGT_K:
 			pc += (A > fentry->k) ? fentry->jt : fentry->jf;
 			continue;
-		case BPF_JMP|BPF_JGE|BPF_K:
+		case BPF_S_JMP_JGE_K:
 			pc += (A >= fentry->k) ? fentry->jt : fentry->jf;
 			continue;
-		case BPF_JMP|BPF_JEQ|BPF_K:
+		case BPF_S_JMP_JEQ_K:
 			pc += (A == fentry->k) ? fentry->jt : fentry->jf;
 			continue;
-		case BPF_JMP|BPF_JSET|BPF_K:
+		case BPF_S_JMP_JSET_K:
 			pc += (A & fentry->k) ? fentry->jt : fentry->jf;
 			continue;
-		case BPF_JMP|BPF_JGT|BPF_X:
+		case BPF_S_JMP_JGT_X:
 			pc += (A > X) ? fentry->jt : fentry->jf;
 			continue;
-		case BPF_JMP|BPF_JGE|BPF_X:
+		case BPF_S_JMP_JGE_X:
 			pc += (A >= X) ? fentry->jt : fentry->jf;
 			continue;
-		case BPF_JMP|BPF_JEQ|BPF_X:
+		case BPF_S_JMP_JEQ_X:
 			pc += (A == X) ? fentry->jt : fentry->jf;
 			continue;
-		case BPF_JMP|BPF_JSET|BPF_X:
+		case BPF_S_JMP_JSET_X:
 			pc += (A & X) ? fentry->jt : fentry->jf;
 			continue;
-		case BPF_LD|BPF_W|BPF_ABS:
+		case BPF_S_LD_W_ABS:
 			k = fentry->k;
 load_w:
 			ptr = load_pointer(skb, k, 4, &tmp);
@@ -217,7 +217,7 @@ load_w:
 				continue;
 			}
 			break;
-		case BPF_LD|BPF_H|BPF_ABS:
+		case BPF_S_LD_H_ABS:
 			k = fentry->k;
 load_h:
 			ptr = load_pointer(skb, k, 2, &tmp);
@@ -226,7 +226,7 @@ load_h:
 				continue;
 			}
 			break;
-		case BPF_LD|BPF_B|BPF_ABS:
+		case BPF_S_LD_B_ABS:
 			k = fentry->k;
 load_b:
 			ptr = load_pointer(skb, k, 1, &tmp);
@@ -235,54 +235,54 @@ load_b:
 				continue;
 			}
 			break;
-		case BPF_LD|BPF_W|BPF_LEN:
+		case BPF_S_LD_W_LEN:
 			A = skb->len;
 			continue;
-		case BPF_LDX|BPF_W|BPF_LEN:
+		case BPF_S_LDX_W_LEN:
 			X = skb->len;
 			continue;
-		case BPF_LD|BPF_W|BPF_IND:
+		case BPF_S_LD_W_IND:
 			k = X + fentry->k;
 			goto load_w;
-		case BPF_LD|BPF_H|BPF_IND:
+		case BPF_S_LD_H_IND:
 			k = X + fentry->k;
 			goto load_h;
-		case BPF_LD|BPF_B|BPF_IND:
+		case BPF_S_LD_B_IND:
 			k = X + fentry->k;
 			goto load_b;
-		case BPF_LDX|BPF_B|BPF_MSH:
+		case BPF_S_LDX_B_MSH:
 			ptr = load_pointer(skb, fentry->k, 1, &tmp);
 			if (ptr != NULL) {
 				X = (*(u8 *)ptr & 0xf) << 2;
 				continue;
 			}
 			return 0;
-		case BPF_LD|BPF_IMM:
+		case BPF_S_LD_IMM:
 			A = fentry->k;
 			continue;
-		case BPF_LDX|BPF_IMM:
+		case BPF_S_LDX_IMM:
 			X = fentry->k;
 			continue;
-		case BPF_LD|BPF_MEM:
+		case BPF_S_LD_MEM:
 			A = mem[fentry->k];
 			continue;
-		case BPF_LDX|BPF_MEM:
+		case BPF_S_LDX_MEM:
 			X = mem[fentry->k];
 			continue;
-		case BPF_MISC|BPF_TAX:
+		case BPF_S_MISC_TAX:
 			X = A;
 			continue;
-		case BPF_MISC|BPF_TXA:
+		case BPF_S_MISC_TXA:
 			A = X;
 			continue;
-		case BPF_RET|BPF_K:
+		case BPF_S_RET_K:
 			return fentry->k;
-		case BPF_RET|BPF_A:
+		case BPF_S_RET_A:
 			return A;
-		case BPF_ST:
+		case BPF_S_ST:
 			mem[fentry->k] = A;
 			continue;
-		case BPF_STX:
+		case BPF_S_STX:
 			mem[fentry->k] = X;
 			continue;
 		default:
@@ -390,53 +390,128 @@ int sk_chk_filter(struct sock_filter *filter, int flen)
 		/* Only allow valid instructions */
 		switch (ftest->code) {
 		case BPF_ALU|BPF_ADD|BPF_K:
+			ftest->code = BPF_S_ALU_ADD_K;
+			break;
 		case BPF_ALU|BPF_ADD|BPF_X:
+			ftest->code = BPF_S_ALU_ADD_X;
+			break;
 		case BPF_ALU|BPF_SUB|BPF_K:
+			ftest->code = BPF_S_ALU_SUB_K;
+			break;
 		case BPF_ALU|BPF_SUB|BPF_X:
+			ftest->code = BPF_S_ALU_SUB_X;
+			break;
 		case BPF_ALU|BPF_MUL|BPF_K:
+			ftest->code = BPF_S_ALU_MUL_K;
+			break;
 		case BPF_ALU|BPF_MUL|BPF_X:
+			ftest->code = BPF_S_ALU_MUL_X;
+			break;
 		case BPF_ALU|BPF_DIV|BPF_X:
+			ftest->code = BPF_S_ALU_DIV_X;
+			break;
 		case BPF_ALU|BPF_AND|BPF_K:
+			ftest->code = BPF_S_ALU_AND_K;
+			break;
 		case BPF_ALU|BPF_AND|BPF_X:
+			ftest->code = BPF_S_ALU_AND_X;
+			break;
 		case BPF_ALU|BPF_OR|BPF_K:
+			ftest->code = BPF_S_ALU_OR_K;
+			break;
 		case BPF_ALU|BPF_OR|BPF_X:
+			ftest->code = BPF_S_ALU_OR_X;
+			break;
 		case BPF_ALU|BPF_LSH|BPF_K:
+			ftest->code = BPF_S_ALU_LSH_K;
+			break;
 		case BPF_ALU|BPF_LSH|BPF_X:
+			ftest->code = BPF_S_ALU_LSH_X;
+			break;
 		case BPF_ALU|BPF_RSH|BPF_K:
+			ftest->code = BPF_S_ALU_RSH_K;
+			break;
 		case BPF_ALU|BPF_RSH|BPF_X:
+			ftest->code = BPF_S_ALU_RSH_X;
+			break;
 		case BPF_ALU|BPF_NEG:
+			ftest->code = BPF_S_ALU_NEG;
+			break;
 		case BPF_LD|BPF_W|BPF_ABS:
+			ftest->code = BPF_S_LD_W_ABS;
+			break;
 		case BPF_LD|BPF_H|BPF_ABS:
+			ftest->code = BPF_S_LD_H_ABS;
+			break;
 		case BPF_LD|BPF_B|BPF_ABS:
+			ftest->code = BPF_S_LD_B_ABS;
+			break;
 		case BPF_LD|BPF_W|BPF_LEN:
+			ftest->code = BPF_S_LD_W_LEN;
+			break;
 		case BPF_LD|BPF_W|BPF_IND:
+			ftest->code = BPF_S_LD_W_IND;
+			break;
 		case BPF_LD|BPF_H|BPF_IND:
+			ftest->code = BPF_S_LD_H_IND;
+			break;
 		case BPF_LD|BPF_B|BPF_IND:
+			ftest->code = BPF_S_LD_B_IND;
+			break;
 		case BPF_LD|BPF_IMM:
+			ftest->code = BPF_S_LD_IMM;
+			break;
 		case BPF_LDX|BPF_W|BPF_LEN:
+			ftest->code = BPF_S_LDX_W_LEN;
+			break;
 		case BPF_LDX|BPF_B|BPF_MSH:
+			ftest->code = BPF_S_LDX_B_MSH;
+			break;
 		case BPF_LDX|BPF_IMM:
+			ftest->code = BPF_S_LDX_IMM;
+			break;
 		case BPF_MISC|BPF_TAX:
+			ftest->code = BPF_S_MISC_TAX;
+			break;
 		case BPF_MISC|BPF_TXA:
+			ftest->code = BPF_S_MISC_TXA;
+			break;
 		case BPF_RET|BPF_K:
+			ftest->code = BPF_S_RET_K;
+			break;
 		case BPF_RET|BPF_A:
+			ftest->code = BPF_S_RET_A;
 			break;
 
 		/* Some instructions need special checks */
 
-		case BPF_ALU|BPF_DIV|BPF_K:
 			/* check for division by zero */
+		case BPF_ALU|BPF_DIV|BPF_K:
 			if (ftest->k == 0)
 				return -EINVAL;
+			ftest->code = BPF_S_ALU_DIV_K;
 			break;
 
+		/* check for invalid memory addresses */
 		case BPF_LD|BPF_MEM:
+			if (ftest->k >= BPF_MEMWORDS)
+				return -EINVAL;
+			ftest->code = BPF_S_LD_MEM;
+			break;
 		case BPF_LDX|BPF_MEM:
+			if (ftest->k >= BPF_MEMWORDS)
+				return -EINVAL;
+			ftest->code = BPF_S_LDX_MEM;
+			break;
 		case BPF_ST:
+			if (ftest->k >= BPF_MEMWORDS)
+				return -EINVAL;
+			ftest->code = BPF_S_ST;
+			break;
 		case BPF_STX:
-			/* check for invalid memory addresses */
 			if (ftest->k >= BPF_MEMWORDS)
 				return -EINVAL;
+			ftest->code = BPF_S_STX;
 			break;
 
 		case BPF_JMP|BPF_JA:
@@ -447,28 +522,63 @@ int sk_chk_filter(struct sock_filter *filter, int flen)
 			 */
 			if (ftest->k >= (unsigned)(flen-pc-1))
 				return -EINVAL;
+			ftest->code = BPF_S_JMP_JA;
 			break;
 
 		case BPF_JMP|BPF_JEQ|BPF_K:
+			ftest->code = BPF_S_JMP_JEQ_K;
+			break;
 		case BPF_JMP|BPF_JEQ|BPF_X:
+			ftest->code = BPF_S_JMP_JEQ_X;
+			break;
 		case BPF_JMP|BPF_JGE|BPF_K:
+			ftest->code = BPF_S_JMP_JGE_K;
+			break;
 		case BPF_JMP|BPF_JGE|BPF_X:
+			ftest->code = BPF_S_JMP_JGE_X;
+			break;
 		case BPF_JMP|BPF_JGT|BPF_K:
+			ftest->code = BPF_S_JMP_JGT_K;
+			break;
 		case BPF_JMP|BPF_JGT|BPF_X:
+			ftest->code = BPF_S_JMP_JGT_X;
+			break;
 		case BPF_JMP|BPF_JSET|BPF_K:
+			ftest->code = BPF_S_JMP_JSET_K;
+			break;
 		case BPF_JMP|BPF_JSET|BPF_X:
+			ftest->code = BPF_S_JMP_JSET_X;
+			break;
+
+		default:
+			return -EINVAL;
+		}
+
 			/* for conditionals both must be safe */
+		switch (ftest->code) {
+		case BPF_S_JMP_JEQ_K:
+		case BPF_S_JMP_JEQ_X:
+		case BPF_S_JMP_JGE_K:
+		case BPF_S_JMP_JGE_X:
+		case BPF_S_JMP_JGT_K:
+		case BPF_S_JMP_JGT_X:
+		case BPF_S_JMP_JSET_X:
+		case BPF_S_JMP_JSET_K:
 			if (pc + ftest->jt + 1 >= flen ||
 			    pc + ftest->jf + 1 >= flen)
 				return -EINVAL;
-			break;
+		}
+	}
 
+	/* last instruction must be a RET code */
+	switch (filter[flen - 1].code) {
+	case BPF_S_RET_K:
+	case BPF_S_RET_A:
+		return 0;
+		break;
 		default:
 			return -EINVAL;
 		}
-	}
-
-	return (BPF_CLASS(filter[flen - 1].code) == BPF_RET) ? 0 : -EINVAL;
 }
 EXPORT_SYMBOL(sk_chk_filter);
 
-- 
cgit v1.2.2


From f9467eaec36a4ee13f5d282d2512f48e118a3120 Mon Sep 17 00:00:00 2001
From: Joe Perches <joe@perches.com>
Date: Mon, 21 Jun 2010 12:29:14 +0000
Subject: net/core/pktgen.c: Use pr_<level>

Add pr_fmt(fmt) KBUILD_MODNAME ": " fmt
Remove "pktgen: " from formats
Convert printks to pr_<level>
Added func_enter() for debugging
Moved version to end of string at module_init
Coalesced long formats

Signed-off-by: Joe Perches <joe@perches.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/pktgen.c | 142 ++++++++++++++++++++++++------------------------------
 1 file changed, 64 insertions(+), 78 deletions(-)

(limited to 'net')

diff --git a/net/core/pktgen.c b/net/core/pktgen.c
index 6428653e9498..1ee2ebd9b04f 100644
--- a/net/core/pktgen.c
+++ b/net/core/pktgen.c
@@ -115,6 +115,9 @@
  * command by Adit Ranadive <adit.262@gmail.com>
  *
  */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
 #include <linux/sys.h>
 #include <linux/types.h>
 #include <linux/module.h>
@@ -174,6 +177,8 @@
 #define MAX_MPLS_LABELS 16 /* This is the max label stack depth */
 #define MPLS_STACK_BOTTOM htonl(0x00000100)
 
+#define func_enter() pr_debug("entering %s\n", __func__);
+
 /* Device flag bits */
 #define F_IPSRC_RND   (1<<0)	/* IP-Src Random  */
 #define F_IPDST_RND   (1<<1)	/* IP-Dst Random  */
@@ -424,7 +429,8 @@ static inline int ktime_lt(const ktime_t cmp1, const ktime_t cmp2)
 }
 
 static const char version[] =
-	"pktgen " VERSION ": Packet Generator for packet performance testing.\n";
+	"Packet Generator for packet performance testing. "
+	"Version: " VERSION "\n";
 
 static int pktgen_remove_device(struct pktgen_thread *t, struct pktgen_dev *i);
 static int pktgen_add_device(struct pktgen_thread *t, const char *ifname);
@@ -495,7 +501,7 @@ static ssize_t pgctrl_write(struct file *file, const char __user *buf,
 		pktgen_reset_all_threads();
 
 	else
-		printk(KERN_WARNING "pktgen: Unknown command: %s\n", data);
+		pr_warning("Unknown command: %s\n", data);
 
 	err = count;
 
@@ -852,14 +858,14 @@ static ssize_t pktgen_if_write(struct file *file,
 	pg_result = &(pkt_dev->result[0]);
 
 	if (count < 1) {
-		printk(KERN_WARNING "pktgen: wrong command format\n");
+		pr_warning("wrong command format\n");
 		return -EINVAL;
 	}
 
 	max = count - i;
 	tmp = count_trail_chars(&user_buffer[i], max);
 	if (tmp < 0) {
-		printk(KERN_WARNING "pktgen: illegal format\n");
+		pr_warning("illegal format\n");
 		return tmp;
 	}
 	i += tmp;
@@ -990,9 +996,7 @@ static ssize_t pktgen_if_write(struct file *file,
 			return len;
 		pkt_dev->delay = pkt_dev->min_pkt_size*8*NSEC_PER_USEC/value;
 		if (debug)
-			printk(KERN_INFO
-				 "pktgen: Delay set at: %llu ns\n",
-					pkt_dev->delay);
+			pr_info("Delay set at: %llu ns\n", pkt_dev->delay);
 
 		sprintf(pg_result, "OK: rate=%lu", value);
 		return count;
@@ -1007,9 +1011,7 @@ static ssize_t pktgen_if_write(struct file *file,
 			return len;
 		pkt_dev->delay = NSEC_PER_SEC/value;
 		if (debug)
-			printk(KERN_INFO
-				 "pktgen: Delay set at: %llu ns\n",
-					pkt_dev->delay);
+			pr_info("Delay set at: %llu ns\n", pkt_dev->delay);
 
 		sprintf(pg_result, "OK: rate=%lu", value);
 		return count;
@@ -1815,7 +1817,7 @@ static ssize_t pktgen_thread_write(struct file *file,
 		       name, (unsigned long)count);
 
 	if (!t) {
-		printk(KERN_ERR "pktgen: ERROR: No thread\n");
+		pr_err("ERROR: No thread\n");
 		ret = -EINVAL;
 		goto out;
 	}
@@ -1908,7 +1910,7 @@ static void pktgen_mark_device(const char *ifname)
 	int i = 0;
 
 	mutex_lock(&pktgen_thread_lock);
-	pr_debug("pktgen: pktgen_mark_device marking %s for removal\n", ifname);
+	pr_debug("%s: marking %s for removal\n", __func__, ifname);
 
 	while (1) {
 
@@ -1917,15 +1919,14 @@ static void pktgen_mark_device(const char *ifname)
 			break;	/* success */
 
 		mutex_unlock(&pktgen_thread_lock);
-		pr_debug("pktgen: pktgen_mark_device waiting for %s "
-				"to disappear....\n", ifname);
+		pr_debug("%s: waiting for %s to disappear....\n",
+			 __func__, ifname);
 		schedule_timeout_interruptible(msecs_to_jiffies(msec_per_try));
 		mutex_lock(&pktgen_thread_lock);
 
 		if (++i >= max_tries) {
-			printk(KERN_ERR "pktgen_mark_device: timed out after "
-			       "waiting %d msec for device %s to be removed\n",
-			       msec_per_try * i, ifname);
+			pr_err("%s: timed out after waiting %d msec for device %s to be removed\n",
+			       __func__, msec_per_try * i, ifname);
 			break;
 		}
 
@@ -1952,8 +1953,8 @@ static void pktgen_change_name(struct net_device *dev)
 							  &pktgen_if_fops,
 							  pkt_dev);
 			if (!pkt_dev->entry)
-				printk(KERN_ERR "pktgen: can't move proc "
-				       " entry for '%s'\n", dev->name);
+				pr_err("can't move proc entry for '%s'\n",
+				       dev->name);
 			break;
 		}
 	}
@@ -2017,15 +2018,15 @@ static int pktgen_setup_dev(struct pktgen_dev *pkt_dev, const char *ifname)
 
 	odev = pktgen_dev_get_by_name(pkt_dev, ifname);
 	if (!odev) {
-		printk(KERN_ERR "pktgen: no such netdevice: \"%s\"\n", ifname);
+		pr_err("no such netdevice: \"%s\"\n", ifname);
 		return -ENODEV;
 	}
 
 	if (odev->type != ARPHRD_ETHER) {
-		printk(KERN_ERR "pktgen: not an ethernet device: \"%s\"\n", ifname);
+		pr_err("not an ethernet device: \"%s\"\n", ifname);
 		err = -EINVAL;
 	} else if (!netif_running(odev)) {
-		printk(KERN_ERR "pktgen: device is down: \"%s\"\n", ifname);
+		pr_err("device is down: \"%s\"\n", ifname);
 		err = -ENETDOWN;
 	} else {
 		pkt_dev->odev = odev;
@@ -2044,8 +2045,7 @@ static void pktgen_setup_inject(struct pktgen_dev *pkt_dev)
 	int ntxq;
 
 	if (!pkt_dev->odev) {
-		printk(KERN_ERR "pktgen: ERROR: pkt_dev->odev == NULL in "
-		       "setup_inject.\n");
+		pr_err("ERROR: pkt_dev->odev == NULL in setup_inject\n");
 		sprintf(pkt_dev->result,
 			"ERROR: pkt_dev->odev == NULL in setup_inject.\n");
 		return;
@@ -2055,19 +2055,15 @@ static void pktgen_setup_inject(struct pktgen_dev *pkt_dev)
 	ntxq = pkt_dev->odev->real_num_tx_queues;
 
 	if (ntxq <= pkt_dev->queue_map_min) {
-		printk(KERN_WARNING "pktgen: WARNING: Requested "
-		       "queue_map_min (zero-based) (%d) exceeds valid range "
-		       "[0 - %d] for (%d) queues on %s, resetting\n",
-		       pkt_dev->queue_map_min, (ntxq ?: 1) - 1, ntxq,
-		       pkt_dev->odevname);
+		pr_warning("WARNING: Requested queue_map_min (zero-based) (%d) exceeds valid range [0 - %d] for (%d) queues on %s, resetting\n",
+			   pkt_dev->queue_map_min, (ntxq ?: 1) - 1, ntxq,
+			   pkt_dev->odevname);
 		pkt_dev->queue_map_min = ntxq - 1;
 	}
 	if (pkt_dev->queue_map_max >= ntxq) {
-		printk(KERN_WARNING "pktgen: WARNING: Requested "
-		       "queue_map_max (zero-based) (%d) exceeds valid range "
-		       "[0 - %d] for (%d) queues on %s, resetting\n",
-		       pkt_dev->queue_map_max, (ntxq ?: 1) - 1, ntxq,
-		       pkt_dev->odevname);
+		pr_warning("WARNING: Requested queue_map_max (zero-based) (%d) exceeds valid range [0 - %d] for (%d) queues on %s, resetting\n",
+			   pkt_dev->queue_map_max, (ntxq ?: 1) - 1, ntxq,
+			   pkt_dev->odevname);
 		pkt_dev->queue_map_max = ntxq - 1;
 	}
 
@@ -2127,8 +2123,7 @@ static void pktgen_setup_inject(struct pktgen_dev *pkt_dev)
 			}
 			rcu_read_unlock();
 			if (err)
-				printk(KERN_ERR "pktgen: ERROR: IPv6 link "
-				       "address not availble.\n");
+				pr_err("ERROR: IPv6 link address not available\n");
 		}
 #endif
 	} else {
@@ -2562,8 +2557,8 @@ static int process_ipsec(struct pktgen_dev *pkt_dev,
 			if (nhead > 0) {
 				ret = pskb_expand_head(skb, nhead, 0, GFP_ATOMIC);
 				if (ret < 0) {
-					printk(KERN_ERR "Error expanding "
-					       "ipsec packet %d\n", ret);
+					pr_err("Error expanding ipsec packet %d\n",
+					       ret);
 					goto err;
 				}
 			}
@@ -2572,8 +2567,7 @@ static int process_ipsec(struct pktgen_dev *pkt_dev,
 			skb_pull(skb, ETH_HLEN);
 			ret = pktgen_output_ipsec(skb, pkt_dev);
 			if (ret) {
-				printk(KERN_ERR "Error creating ipsec "
-				       "packet %d\n", ret);
+				pr_err("Error creating ipsec packet %d\n", ret);
 				goto err;
 			}
 			/* restore ll */
@@ -3049,8 +3043,7 @@ static struct sk_buff *fill_packet_ipv6(struct net_device *odev,
 	if (datalen < sizeof(struct pktgen_hdr)) {
 		datalen = sizeof(struct pktgen_hdr);
 		if (net_ratelimit())
-			printk(KERN_INFO "pktgen: increased datalen to %d\n",
-			       datalen);
+			pr_info("increased datalen to %d\n", datalen);
 	}
 
 	udph->source = htons(pkt_dev->cur_udp_src);
@@ -3177,7 +3170,7 @@ static void pktgen_run(struct pktgen_thread *t)
 	struct pktgen_dev *pkt_dev;
 	int started = 0;
 
-	pr_debug("pktgen: entering pktgen_run. %p\n", t);
+	func_enter();
 
 	if_lock(t);
 	list_for_each_entry(pkt_dev, &t->if_list, list) {
@@ -3210,7 +3203,7 @@ static void pktgen_stop_all_threads_ifs(void)
 {
 	struct pktgen_thread *t;
 
-	pr_debug("pktgen: entering pktgen_stop_all_threads_ifs.\n");
+	func_enter();
 
 	mutex_lock(&pktgen_thread_lock);
 
@@ -3275,7 +3268,7 @@ static void pktgen_run_all_threads(void)
 {
 	struct pktgen_thread *t;
 
-	pr_debug("pktgen: entering pktgen_run_all_threads.\n");
+	func_enter();
 
 	mutex_lock(&pktgen_thread_lock);
 
@@ -3294,7 +3287,7 @@ static void pktgen_reset_all_threads(void)
 {
 	struct pktgen_thread *t;
 
-	pr_debug("pktgen: entering pktgen_reset_all_threads.\n");
+	func_enter();
 
 	mutex_lock(&pktgen_thread_lock);
 
@@ -3344,8 +3337,8 @@ static int pktgen_stop_device(struct pktgen_dev *pkt_dev)
 	int nr_frags = pkt_dev->skb ? skb_shinfo(pkt_dev->skb)->nr_frags : -1;
 
 	if (!pkt_dev->running) {
-		printk(KERN_WARNING "pktgen: interface: %s is already "
-		       "stopped\n", pkt_dev->odevname);
+		pr_warning("interface: %s is already stopped\n",
+			   pkt_dev->odevname);
 		return -EINVAL;
 	}
 
@@ -3381,7 +3374,7 @@ static void pktgen_stop(struct pktgen_thread *t)
 {
 	struct pktgen_dev *pkt_dev;
 
-	pr_debug("pktgen: entering pktgen_stop\n");
+	func_enter();
 
 	if_lock(t);
 
@@ -3401,7 +3394,7 @@ static void pktgen_rem_one_if(struct pktgen_thread *t)
 	struct list_head *q, *n;
 	struct pktgen_dev *cur;
 
-	pr_debug("pktgen: entering pktgen_rem_one_if\n");
+	func_enter();
 
 	if_lock(t);
 
@@ -3427,9 +3420,10 @@ static void pktgen_rem_all_ifs(struct pktgen_thread *t)
 	struct list_head *q, *n;
 	struct pktgen_dev *cur;
 
+	func_enter();
+
 	/* Remove all devices, free mem */
 
-	pr_debug("pktgen: entering pktgen_rem_all_ifs\n");
 	if_lock(t);
 
 	list_for_each_safe(q, n, &t->if_list) {
@@ -3511,8 +3505,7 @@ static void pktgen_xmit(struct pktgen_dev *pkt_dev)
 
 		pkt_dev->skb = fill_packet(odev, pkt_dev);
 		if (pkt_dev->skb == NULL) {
-			printk(KERN_ERR "pktgen: ERROR: couldn't "
-			       "allocate skb in fill_packet.\n");
+			pr_err("ERROR: couldn't allocate skb in fill_packet\n");
 			schedule();
 			pkt_dev->clone_count--;	/* back out increment, OOM */
 			return;
@@ -3592,8 +3585,7 @@ static int pktgen_thread_worker(void *arg)
 	init_waitqueue_head(&t->queue);
 	complete(&t->start_done);
 
-	pr_debug("pktgen: starting pktgen/%d:  pid=%d\n",
-		 cpu, task_pid_nr(current));
+	pr_debug("starting pktgen/%d:  pid=%d\n", cpu, task_pid_nr(current));
 
 	set_current_state(TASK_INTERRUPTIBLE);
 
@@ -3646,13 +3638,13 @@ static int pktgen_thread_worker(void *arg)
 		set_current_state(TASK_INTERRUPTIBLE);
 	}
 
-	pr_debug("pktgen: %s stopping all device\n", t->tsk->comm);
+	pr_debug("%s stopping all device\n", t->tsk->comm);
 	pktgen_stop(t);
 
-	pr_debug("pktgen: %s removing all device\n", t->tsk->comm);
+	pr_debug("%s removing all device\n", t->tsk->comm);
 	pktgen_rem_all_ifs(t);
 
-	pr_debug("pktgen: %s removing thread.\n", t->tsk->comm);
+	pr_debug("%s removing thread\n", t->tsk->comm);
 	pktgen_rem_thread(t);
 
 	return 0;
@@ -3676,7 +3668,7 @@ static struct pktgen_dev *pktgen_find_dev(struct pktgen_thread *t,
 		}
 
 	if_unlock(t);
-	pr_debug("pktgen: find_dev(%s) returning %p\n", ifname, pkt_dev);
+	pr_debug("find_dev(%s) returning %p\n", ifname, pkt_dev);
 	return pkt_dev;
 }
 
@@ -3692,8 +3684,7 @@ static int add_dev_to_thread(struct pktgen_thread *t,
 	if_lock(t);
 
 	if (pkt_dev->pg_thread) {
-		printk(KERN_ERR "pktgen: ERROR: already assigned "
-		       "to a thread.\n");
+		pr_err("ERROR: already assigned to a thread\n");
 		rv = -EBUSY;
 		goto out;
 	}
@@ -3719,7 +3710,7 @@ static int pktgen_add_device(struct pktgen_thread *t, const char *ifname)
 
 	pkt_dev = __pktgen_NN_threads(ifname, FIND);
 	if (pkt_dev) {
-		printk(KERN_ERR "pktgen: ERROR: interface already used.\n");
+		pr_err("ERROR: interface already used\n");
 		return -EBUSY;
 	}
 
@@ -3764,7 +3755,7 @@ static int pktgen_add_device(struct pktgen_thread *t, const char *ifname)
 	pkt_dev->entry = proc_create_data(ifname, 0600, pg_proc_dir,
 					  &pktgen_if_fops, pkt_dev);
 	if (!pkt_dev->entry) {
-		printk(KERN_ERR "pktgen: cannot create %s/%s procfs entry.\n",
+		pr_err("cannot create %s/%s procfs entry\n",
 		       PG_PROC_DIR, ifname);
 		err = -EINVAL;
 		goto out2;
@@ -3795,8 +3786,7 @@ static int __init pktgen_create_thread(int cpu)
 	t = kzalloc_node(sizeof(struct pktgen_thread), GFP_KERNEL,
 			 cpu_to_node(cpu));
 	if (!t) {
-		printk(KERN_ERR "pktgen: ERROR: out of memory, can't "
-		       "create new thread.\n");
+		pr_err("ERROR: out of memory, can't create new thread\n");
 		return -ENOMEM;
 	}
 
@@ -3810,8 +3800,7 @@ static int __init pktgen_create_thread(int cpu)
 
 	p = kthread_create(pktgen_thread_worker, t, "kpktgend_%d", cpu);
 	if (IS_ERR(p)) {
-		printk(KERN_ERR "pktgen: kernel_thread() failed "
-		       "for cpu %d\n", t->cpu);
+		pr_err("kernel_thread() failed for cpu %d\n", t->cpu);
 		list_del(&t->th_list);
 		kfree(t);
 		return PTR_ERR(p);
@@ -3822,7 +3811,7 @@ static int __init pktgen_create_thread(int cpu)
 	pe = proc_create_data(t->tsk->comm, 0600, pg_proc_dir,
 			      &pktgen_thread_fops, t);
 	if (!pe) {
-		printk(KERN_ERR "pktgen: cannot create %s/%s procfs entry.\n",
+		pr_err("cannot create %s/%s procfs entry\n",
 		       PG_PROC_DIR, t->tsk->comm);
 		kthread_stop(p);
 		list_del(&t->th_list);
@@ -3856,11 +3845,10 @@ static int pktgen_remove_device(struct pktgen_thread *t,
 				struct pktgen_dev *pkt_dev)
 {
 
-	pr_debug("pktgen: remove_device pkt_dev=%p\n", pkt_dev);
+	pr_debug("remove_device pkt_dev=%p\n", pkt_dev);
 
 	if (pkt_dev->running) {
-		printk(KERN_WARNING "pktgen: WARNING: trying to remove a "
-		       "running interface, stopping it now.\n");
+		pr_warning("WARNING: trying to remove a running interface, stopping it now\n");
 		pktgen_stop_device(pkt_dev);
 	}
 
@@ -3891,7 +3879,7 @@ static int __init pg_init(void)
 	int cpu;
 	struct proc_dir_entry *pe;
 
-	printk(KERN_INFO "%s", version);
+	pr_info("%s", version);
 
 	pg_proc_dir = proc_mkdir(PG_PROC_DIR, init_net.proc_net);
 	if (!pg_proc_dir)
@@ -3899,8 +3887,7 @@ static int __init pg_init(void)
 
 	pe = proc_create(PGCTRL, 0600, pg_proc_dir, &pktgen_fops);
 	if (pe == NULL) {
-		printk(KERN_ERR "pktgen: ERROR: cannot create %s "
-		       "procfs entry.\n", PGCTRL);
+		pr_err("ERROR: cannot create %s procfs entry\n", PGCTRL);
 		proc_net_remove(&init_net, PG_PROC_DIR);
 		return -EINVAL;
 	}
@@ -3913,13 +3900,12 @@ static int __init pg_init(void)
 
 		err = pktgen_create_thread(cpu);
 		if (err)
-			printk(KERN_WARNING "pktgen: WARNING: Cannot create "
-			       "thread for cpu %d (%d)\n", cpu, err);
+			pr_warning("WARNING: Cannot create thread for cpu %d (%d)\n",
+				   cpu, err);
 	}
 
 	if (list_empty(&pktgen_threads)) {
-		printk(KERN_ERR "pktgen: ERROR: Initialization failed for "
-		       "all threads\n");
+		pr_err("ERROR: Initialization failed for all threads\n");
 		unregister_netdevice_notifier(&pktgen_notifier_block);
 		remove_proc_entry(PGCTRL, pg_proc_dir);
 		proc_net_remove(&init_net, PG_PROC_DIR);
-- 
cgit v1.2.2


From a7d13fbf85375698879d16f118af77fbfcc2de44 Mon Sep 17 00:00:00 2001
From: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Date: Tue, 22 Jun 2010 01:14:34 +0000
Subject: dccp: remove unused function argument

This removes an unused 'sk' argument from several option-inserting functions.

Signed-off-by: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/dccp/ackvec.c      |  2 +-
 net/dccp/ccids/ccid3.c |  4 ++--
 net/dccp/dccp.h        | 12 ++++--------
 net/dccp/options.c     | 14 ++++++--------
 4 files changed, 13 insertions(+), 19 deletions(-)

(limited to 'net')

diff --git a/net/dccp/ackvec.c b/net/dccp/ackvec.c
index 01e4d39fa232..2abddee48304 100644
--- a/net/dccp/ackvec.c
+++ b/net/dccp/ackvec.c
@@ -82,7 +82,7 @@ int dccp_insert_option_ackvec(struct sock *sk, struct sk_buff *skb)
 	elapsed_time = delta / 10;
 
 	if (elapsed_time != 0 &&
-	    dccp_insert_option_elapsed_time(sk, skb, elapsed_time))
+	    dccp_insert_option_elapsed_time(skb, elapsed_time))
 		return -1;
 
 	avr = dccp_ackvec_record_new();
diff --git a/net/dccp/ccids/ccid3.c b/net/dccp/ccids/ccid3.c
index d3235899c7e3..95f752986497 100644
--- a/net/dccp/ccids/ccid3.c
+++ b/net/dccp/ccids/ccid3.c
@@ -715,9 +715,9 @@ static int ccid3_hc_rx_insert_options(struct sock *sk, struct sk_buff *skb)
 	x_recv = htonl(hc->rx_x_recv);
 	pinv   = htonl(hc->rx_pinv);
 
-	if (dccp_insert_option(sk, skb, TFRC_OPT_LOSS_EVENT_RATE,
+	if (dccp_insert_option(skb, TFRC_OPT_LOSS_EVENT_RATE,
 			       &pinv, sizeof(pinv)) ||
-	    dccp_insert_option(sk, skb, TFRC_OPT_RECEIVE_RATE,
+	    dccp_insert_option(skb, TFRC_OPT_RECEIVE_RATE,
 			       &x_recv, sizeof(x_recv)))
 		return -1;
 
diff --git a/net/dccp/dccp.h b/net/dccp/dccp.h
index a10a61a1ded2..3ccef1b70fee 100644
--- a/net/dccp/dccp.h
+++ b/net/dccp/dccp.h
@@ -446,16 +446,12 @@ extern void dccp_feat_list_purge(struct list_head *fn_list);
 
 extern int dccp_insert_options(struct sock *sk, struct sk_buff *skb);
 extern int dccp_insert_options_rsk(struct dccp_request_sock*, struct sk_buff*);
-extern int dccp_insert_option_elapsed_time(struct sock *sk,
-					    struct sk_buff *skb,
-					    u32 elapsed_time);
+extern int dccp_insert_option_elapsed_time(struct sk_buff *skb, u32 elapsed);
 extern u32 dccp_timestamp(void);
 extern void dccp_timestamping_init(void);
-extern int dccp_insert_option_timestamp(struct sock *sk,
-					 struct sk_buff *skb);
-extern int dccp_insert_option(struct sock *sk, struct sk_buff *skb,
-			       unsigned char option,
-			       const void *value, unsigned char len);
+extern int dccp_insert_option_timestamp(struct sk_buff *skb);
+extern int dccp_insert_option(struct sk_buff *skb, unsigned char option,
+			      const void *value, unsigned char len);
 
 #ifdef CONFIG_SYSCTL
 extern int dccp_sysctl_init(void);
diff --git a/net/dccp/options.c b/net/dccp/options.c
index 07395f861d35..4973badb5d55 100644
--- a/net/dccp/options.c
+++ b/net/dccp/options.c
@@ -299,9 +299,8 @@ static inline u8 dccp_ndp_len(const u64 ndp)
 	return likely(ndp <= USHRT_MAX) ? 2 : (ndp <= UINT_MAX ? 4 : 6);
 }
 
-int dccp_insert_option(struct sock *sk, struct sk_buff *skb,
-			const unsigned char option,
-			const void *value, const unsigned char len)
+int dccp_insert_option(struct sk_buff *skb, const unsigned char option,
+		       const void *value, const unsigned char len)
 {
 	unsigned char *to;
 
@@ -354,8 +353,7 @@ static inline int dccp_elapsed_time_len(const u32 elapsed_time)
 	return elapsed_time == 0 ? 0 : elapsed_time <= 0xFFFF ? 2 : 4;
 }
 
-int dccp_insert_option_elapsed_time(struct sock *sk, struct sk_buff *skb,
-				    u32 elapsed_time)
+int dccp_insert_option_elapsed_time(struct sk_buff *skb, u32 elapsed_time)
 {
 	const int elapsed_time_len = dccp_elapsed_time_len(elapsed_time);
 	const int len = 2 + elapsed_time_len;
@@ -386,13 +384,13 @@ int dccp_insert_option_elapsed_time(struct sock *sk, struct sk_buff *skb,
 
 EXPORT_SYMBOL_GPL(dccp_insert_option_elapsed_time);
 
-int dccp_insert_option_timestamp(struct sock *sk, struct sk_buff *skb)
+int dccp_insert_option_timestamp(struct sk_buff *skb)
 {
 	__be32 now = htonl(dccp_timestamp());
 	/* yes this will overflow but that is the point as we want a
 	 * 10 usec 32 bit timer which mean it wraps every 11.9 hours */
 
-	return dccp_insert_option(sk, skb, DCCPO_TIMESTAMP, &now, sizeof(now));
+	return dccp_insert_option(skb, DCCPO_TIMESTAMP, &now, sizeof(now));
 }
 
 EXPORT_SYMBOL_GPL(dccp_insert_option_timestamp);
@@ -533,7 +531,7 @@ int dccp_insert_options(struct sock *sk, struct sk_buff *skb)
 			 * Obtain RTT sample from Request/Response exchange.
 			 * This is currently used in CCID 3 initialisation.
 			 */
-			if (dccp_insert_option_timestamp(sk, skb))
+			if (dccp_insert_option_timestamp(skb))
 				return -1;
 
 		} else if (dp->dccps_hc_rx_ackvec != NULL &&
-- 
cgit v1.2.2


From 59b80802a8a18b64d38b51aa168253684b2649d5 Mon Sep 17 00:00:00 2001
From: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Date: Tue, 22 Jun 2010 01:14:35 +0000
Subject: dccp: make implementation of Syn-RTT symmetric

This patch is thanks to Andre Noll who reported the issue and helped testing.

The Syn-RTT sampled during the initial handshake currently only works for
the client sending the DCCP-Request. TFRC penalizes the absence of an RTT
sample with a very slow initial speed (1 packet per second), which delays
slow-start significantly, resulting in sluggish performance.

This patch mirrors the "Syn RTT" principle by adding a timestamp also onto
the DCCP-Response, producing an RTT sample  when the (Data)Ack completing
the handshake arrives.

Also changed the documentation to 'TFRC' since Syn RTTs are also used by CCID-4.

Signed-off-by: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/dccp/input.c   | 13 +++++++++++--
 net/dccp/options.c |  6 +++++-
 2 files changed, 16 insertions(+), 3 deletions(-)

(limited to 'net')

diff --git a/net/dccp/input.c b/net/dccp/input.c
index 6beb6a7d6fba..10c957a88f4f 100644
--- a/net/dccp/input.c
+++ b/net/dccp/input.c
@@ -430,7 +430,7 @@ static int dccp_rcv_request_sent_state_process(struct sock *sk,
 		if (dccp_parse_options(sk, NULL, skb))
 			return 1;
 
-		/* Obtain usec RTT sample from SYN exchange (used by CCID 3) */
+		/* Obtain usec RTT sample from SYN exchange (used by TFRC). */
 		if (likely(dp->dccps_options_received.dccpor_timestamp_echo))
 			dp->dccps_syn_rtt = dccp_sample_rtt(sk, 10 * (tstamp -
 			    dp->dccps_options_received.dccpor_timestamp_echo));
@@ -535,6 +535,8 @@ static int dccp_rcv_respond_partopen_state_process(struct sock *sk,
 						   const struct dccp_hdr *dh,
 						   const unsigned len)
 {
+	struct dccp_sock *dp = dccp_sk(sk);
+	u32 sample = dp->dccps_options_received.dccpor_timestamp_echo;
 	int queued = 0;
 
 	switch (dh->dccph_type) {
@@ -559,7 +561,14 @@ static int dccp_rcv_respond_partopen_state_process(struct sock *sk,
 		if (sk->sk_state == DCCP_PARTOPEN)
 			inet_csk_clear_xmit_timer(sk, ICSK_TIME_DACK);
 
-		dccp_sk(sk)->dccps_osr = DCCP_SKB_CB(skb)->dccpd_seq;
+		/* Obtain usec RTT sample from SYN exchange (used by TFRC). */
+		if (likely(sample)) {
+			long delta = dccp_timestamp() - sample;
+
+			dp->dccps_syn_rtt = dccp_sample_rtt(sk, 10 * delta);
+		}
+
+		dp->dccps_osr = DCCP_SKB_CB(skb)->dccpd_seq;
 		dccp_set_state(sk, DCCP_OPEN);
 
 		if (dh->dccph_type == DCCP_PKT_DATAACK ||
diff --git a/net/dccp/options.c b/net/dccp/options.c
index 4973badb5d55..bfda087bd90d 100644
--- a/net/dccp/options.c
+++ b/net/dccp/options.c
@@ -529,7 +529,7 @@ int dccp_insert_options(struct sock *sk, struct sk_buff *skb)
 		if (DCCP_SKB_CB(skb)->dccpd_type == DCCP_PKT_REQUEST) {
 			/*
 			 * Obtain RTT sample from Request/Response exchange.
-			 * This is currently used in CCID 3 initialisation.
+			 * This is currently used for TFRC initialisation.
 			 */
 			if (dccp_insert_option_timestamp(skb))
 				return -1;
@@ -562,6 +562,10 @@ int dccp_insert_options_rsk(struct dccp_request_sock *dreq, struct sk_buff *skb)
 	if (dccp_feat_insert_opts(NULL, dreq, skb))
 		return -1;
 
+	/* Obtain RTT sample from Response/Ack exchange (used by TFRC). */
+	if (dccp_insert_option_timestamp(skb))
+		return -1;
+
 	if (dreq->dreq_timestamp_echo != 0 &&
 	    dccp_insert_option_timestamp_echo(NULL, dreq, skb))
 		return -1;
-- 
cgit v1.2.2


From 4b4194c40f4ac8d03a700845f8978cba53246b5a Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Tue, 22 Jun 2010 07:43:15 +0000
Subject: arp: RCU change in arp_solicit()

Avoid two atomic ops in arp_solicit()

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/arp.c | 12 +++++++-----
 1 file changed, 7 insertions(+), 5 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c
index cf78f41830ca..09ead1baa99e 100644
--- a/net/ipv4/arp.c
+++ b/net/ipv4/arp.c
@@ -333,11 +333,14 @@ static void arp_solicit(struct neighbour *neigh, struct sk_buff *skb)
 	struct net_device *dev = neigh->dev;
 	__be32 target = *(__be32*)neigh->primary_key;
 	int probes = atomic_read(&neigh->probes);
-	struct in_device *in_dev = in_dev_get(dev);
+	struct in_device *in_dev;
 
-	if (!in_dev)
+	rcu_read_lock();
+	in_dev = __in_dev_get_rcu(dev);
+	if (!in_dev) {
+		rcu_read_unlock();
 		return;
-
+	}
 	switch (IN_DEV_ARP_ANNOUNCE(in_dev)) {
 	default:
 	case 0:		/* By default announce any local IP */
@@ -358,9 +361,8 @@ static void arp_solicit(struct neighbour *neigh, struct sk_buff *skb)
 	case 2:		/* Avoid secondary IPs, get a primary/preferred one */
 		break;
 	}
+	rcu_read_unlock();
 
-	if (in_dev)
-		in_dev_put(in_dev);
 	if (!saddr)
 		saddr = inet_select_addr(dev, target, RT_SCOPE_LINK);
 
-- 
cgit v1.2.2


From 1823e4c80eeae2a774c75569ce3035070e5ee009 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Tue, 22 Jun 2010 20:58:41 +0000
Subject: snmp: add align parameter to snmp_mib_init()

In preparation for 64bit snmp counters for some mibs,
add an 'align' parameter to snmp_mib_init(), instead
of assuming mibs only contain 'unsigned long' fields.

Callers can use __alignof__(type) to provide correct
alignment.

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
CC: Herbert Xu <herbert@gondor.apana.org.au>
CC: Arnaldo Carvalho de Melo <acme@ghostprotocols.net>
CC: Hideaki YOSHIFUJI <yoshfuji@linux-ipv6.org>
CC: Vlad Yasevich <vladislav.yasevich@hp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/dccp/proto.c       |  3 ++-
 net/ipv4/af_inet.c     | 27 +++++++++++++++++----------
 net/ipv6/addrconf.c    |  9 ++++++---
 net/ipv6/af_inet6.c    | 15 ++++++++++-----
 net/sctp/protocol.c    |  3 ++-
 net/xfrm/xfrm_policy.c |  3 ++-
 6 files changed, 39 insertions(+), 21 deletions(-)

(limited to 'net')

diff --git a/net/dccp/proto.c b/net/dccp/proto.c
index f79bcef5088f..096250d1323b 100644
--- a/net/dccp/proto.c
+++ b/net/dccp/proto.c
@@ -1002,7 +1002,8 @@ EXPORT_SYMBOL_GPL(dccp_shutdown);
 static inline int dccp_mib_init(void)
 {
 	return snmp_mib_init((void __percpu **)dccp_statistics,
-			     sizeof(struct dccp_mib));
+			     sizeof(struct dccp_mib),
+			     __alignof__(struct dccp_mib));
 }
 
 static inline void dccp_mib_exit(void)
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index b4c0969137cb..640db9b90330 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -1427,13 +1427,13 @@ unsigned long snmp_fold_field(void __percpu *mib[], int offt)
 }
 EXPORT_SYMBOL_GPL(snmp_fold_field);
 
-int snmp_mib_init(void __percpu *ptr[2], size_t mibsize)
+int snmp_mib_init(void __percpu *ptr[2], size_t mibsize, size_t align)
 {
 	BUG_ON(ptr == NULL);
-	ptr[0] = __alloc_percpu(mibsize, __alignof__(unsigned long));
+	ptr[0] = __alloc_percpu(mibsize, align);
 	if (!ptr[0])
 		goto err0;
-	ptr[1] = __alloc_percpu(mibsize, __alignof__(unsigned long));
+	ptr[1] = __alloc_percpu(mibsize, align);
 	if (!ptr[1])
 		goto err1;
 	return 0;
@@ -1490,25 +1490,32 @@ static const struct net_protocol icmp_protocol = {
 static __net_init int ipv4_mib_init_net(struct net *net)
 {
 	if (snmp_mib_init((void __percpu **)net->mib.tcp_statistics,
-			  sizeof(struct tcp_mib)) < 0)
+			  sizeof(struct tcp_mib),
+			  __alignof__(struct tcp_mib)) < 0)
 		goto err_tcp_mib;
 	if (snmp_mib_init((void __percpu **)net->mib.ip_statistics,
-			  sizeof(struct ipstats_mib)) < 0)
+			  sizeof(struct ipstats_mib),
+			  __alignof__(struct ipstats_mib)) < 0)
 		goto err_ip_mib;
 	if (snmp_mib_init((void __percpu **)net->mib.net_statistics,
-			  sizeof(struct linux_mib)) < 0)
+			  sizeof(struct linux_mib),
+			  __alignof__(struct linux_mib)) < 0)
 		goto err_net_mib;
 	if (snmp_mib_init((void __percpu **)net->mib.udp_statistics,
-			  sizeof(struct udp_mib)) < 0)
+			  sizeof(struct udp_mib),
+			  __alignof__(struct udp_mib)) < 0)
 		goto err_udp_mib;
 	if (snmp_mib_init((void __percpu **)net->mib.udplite_statistics,
-			  sizeof(struct udp_mib)) < 0)
+			  sizeof(struct udp_mib),
+			  __alignof__(struct udp_mib)) < 0)
 		goto err_udplite_mib;
 	if (snmp_mib_init((void __percpu **)net->mib.icmp_statistics,
-			  sizeof(struct icmp_mib)) < 0)
+			  sizeof(struct icmp_mib),
+			  __alignof__(struct icmp_mib)) < 0)
 		goto err_icmp_mib;
 	if (snmp_mib_init((void __percpu **)net->mib.icmpmsg_statistics,
-			  sizeof(struct icmpmsg_mib)) < 0)
+			  sizeof(struct icmpmsg_mib),
+			  __alignof__(struct icmpmsg_mib)) < 0)
 		goto err_icmpmsg_mib;
 
 	tcp_mib_init(net);
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index b97bb1f30808..c20a7c260a8f 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -284,13 +284,16 @@ static void addrconf_mod_timer(struct inet6_ifaddr *ifp,
 static int snmp6_alloc_dev(struct inet6_dev *idev)
 {
 	if (snmp_mib_init((void __percpu **)idev->stats.ipv6,
-			  sizeof(struct ipstats_mib)) < 0)
+			  sizeof(struct ipstats_mib),
+			  __alignof__(struct ipstats_mib)) < 0)
 		goto err_ip;
 	if (snmp_mib_init((void __percpu **)idev->stats.icmpv6,
-			  sizeof(struct icmpv6_mib)) < 0)
+			  sizeof(struct icmpv6_mib),
+			  __alignof__(struct icmpv6_mib)) < 0)
 		goto err_icmp;
 	if (snmp_mib_init((void __percpu **)idev->stats.icmpv6msg,
-			  sizeof(struct icmpv6msg_mib)) < 0)
+			  sizeof(struct icmpv6msg_mib),
+			  __alignof__(struct icmpv6msg_mib)) < 0)
 		goto err_icmpmsg;
 
 	return 0;
diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index 94b1b9c954bf..e830cd4f9d0f 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -971,19 +971,24 @@ static void ipv6_packet_cleanup(void)
 static int __net_init ipv6_init_mibs(struct net *net)
 {
 	if (snmp_mib_init((void __percpu **)net->mib.udp_stats_in6,
-			  sizeof (struct udp_mib)) < 0)
+			  sizeof(struct udp_mib),
+			  __alignof__(struct udp_mib)) < 0)
 		return -ENOMEM;
 	if (snmp_mib_init((void __percpu **)net->mib.udplite_stats_in6,
-			  sizeof (struct udp_mib)) < 0)
+			  sizeof(struct udp_mib),
+			  __alignof__(struct udp_mib)) < 0)
 		goto err_udplite_mib;
 	if (snmp_mib_init((void __percpu **)net->mib.ipv6_statistics,
-			  sizeof(struct ipstats_mib)) < 0)
+			  sizeof(struct ipstats_mib),
+			  __alignof__(struct ipstats_mib)) < 0)
 		goto err_ip_mib;
 	if (snmp_mib_init((void __percpu **)net->mib.icmpv6_statistics,
-			  sizeof(struct icmpv6_mib)) < 0)
+			  sizeof(struct icmpv6_mib),
+			  __alignof__(struct icmpv6_mib)) < 0)
 		goto err_icmp_mib;
 	if (snmp_mib_init((void __percpu **)net->mib.icmpv6msg_statistics,
-			  sizeof(struct icmpv6msg_mib)) < 0)
+			  sizeof(struct icmpv6msg_mib),
+			  __alignof__(struct icmpv6msg_mib)) < 0)
 		goto err_icmpmsg_mib;
 	return 0;
 
diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c
index a0e1a7fdebbf..c0e162aeb0bd 100644
--- a/net/sctp/protocol.c
+++ b/net/sctp/protocol.c
@@ -1002,7 +1002,8 @@ int sctp_register_pf(struct sctp_pf *pf, sa_family_t family)
 static inline int init_sctp_mibs(void)
 {
 	return snmp_mib_init((void __percpu **)sctp_statistics,
-			     sizeof(struct sctp_mib));
+			     sizeof(struct sctp_mib),
+			     __alignof__(struct sctp_mib));
 }
 
 static inline void cleanup_sctp_mibs(void)
diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index 4bf27d901333..593c06be6b62 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -2480,7 +2480,8 @@ static int __net_init xfrm_statistics_init(struct net *net)
 	int rv;
 
 	if (snmp_mib_init((void __percpu **)net->mib.xfrm_statistics,
-			  sizeof(struct linux_xfrm_mib)) < 0)
+			  sizeof(struct linux_xfrm_mib),
+			  __alignof__(struct linux_xfrm_mib)) < 0)
 		return -ENOMEM;
 	rv = xfrm_proc_init(net);
 	if (rv < 0)
-- 
cgit v1.2.2


From 9587c6ddd452314e8ed5707ad832a507a030ef57 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Wed, 23 Jun 2010 02:51:25 +0000
Subject: ipv6: remove ipv6_statistics

commit 9261e5370112 (ipv6: making ip and icmp statistics per/namespace)
forgot to remove ipv6_statistics variable.

commit bc417d99bf27 (ipv6: remove stale MIB definitions) took care of
icmpv6_statistics & icmpv6msg_statistics

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
CC: Denis V. Lunev <den@openvz.org>
CC: Alexey Dobriyan <adobriyan@gmail.com>
CC: Hideaki YOSHIFUJI <yoshfuji@linux-ipv6.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/ipv6_sockglue.c | 2 --
 1 file changed, 2 deletions(-)

(limited to 'net')

diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c
index bd43f0152c21..a7f66bc8f0b0 100644
--- a/net/ipv6/ipv6_sockglue.c
+++ b/net/ipv6/ipv6_sockglue.c
@@ -55,8 +55,6 @@
 
 #include <asm/uaccess.h>
 
-DEFINE_SNMP_STAT(struct ipstats_mib, ipv6_statistics) __read_mostly;
-
 struct ip6_ra_chain *ip6_ra_chain;
 DEFINE_RWLOCK(ip6_ra_lock);
 
-- 
cgit v1.2.2


From 734f614bc1e7c6bf075d201f6bd9a555b8b4a984 Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Mon, 21 Jun 2010 11:48:44 +0000
Subject: syncookies: do not store rcv_wscale in tcp timestamp

As pointed out by Fernando Gont there is no need to encode rcv_wscale
into the cookie.

We did not use the restored rcv_wscale anyway; it is recomputed
via tcp_select_initial_window().

Thus we can save 4 bits in the ts option space by removing rcv_wscale.
In case window scaling was not supported, we set the (invalid) wscale
value 0xf.

Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/syncookies.c | 35 ++++++++++++++---------------------
 net/ipv6/syncookies.c |  1 -
 2 files changed, 14 insertions(+), 22 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c
index 51b5662545d6..8896329aebd0 100644
--- a/net/ipv4/syncookies.c
+++ b/net/ipv4/syncookies.c
@@ -18,8 +18,8 @@
 #include <net/tcp.h>
 #include <net/route.h>
 
-/* Timestamps: lowest 9 bits store TCP options */
-#define TSBITS 9
+/* Timestamps: lowest bits store TCP options */
+#define TSBITS 5
 #define TSMASK (((__u32)1 << TSBITS) - 1)
 
 extern int sysctl_tcp_syncookies;
@@ -58,7 +58,7 @@ static u32 cookie_hash(__be32 saddr, __be32 daddr, __be16 sport, __be16 dport,
 
 /*
  * when syncookies are in effect and tcp timestamps are enabled we encode
- * tcp options in the lowest 9 bits of the timestamp value that will be
+ * tcp options in the lower bits of the timestamp value that will be
  * sent in the syn-ack.
  * Since subsequent timestamps use the normal tcp_time_stamp value, we
  * must make sure that the resulting initial timestamp is <= tcp_time_stamp.
@@ -70,11 +70,9 @@ __u32 cookie_init_timestamp(struct request_sock *req)
 	u32 options = 0;
 
 	ireq = inet_rsk(req);
-	if (ireq->wscale_ok) {
-		options = ireq->snd_wscale;
-		options |= ireq->rcv_wscale << 4;
-	}
-	options |= ireq->sack_ok << 8;
+
+	options = ireq->wscale_ok ? ireq->snd_wscale : 0xf;
+	options |= ireq->sack_ok << 4;
 
 	ts = ts_now & ~TSMASK;
 	ts |= options;
@@ -227,15 +225,14 @@ static inline struct sock *get_cookie_sock(struct sock *sk, struct sk_buff *skb,
  * additional tcp options in the timestamp.
  * This extracts these options from the timestamp echo.
  *
- * The lowest 4 bits are for snd_wscale
- * The next 4 lsb are for rcv_wscale
+ * The lowest 4 bits store snd_wscale.
  * The next lsb is for sack_ok
  *
  * return false if we decode an option that should not be.
  */
 bool cookie_check_timestamp(struct tcp_options_received *tcp_opt)
 {
-	/* echoed timestamp, 9 lowest bits contain options */
+	/* echoed timestamp, lowest bits contain options */
 	u32 options = tcp_opt->rcv_tsecr & TSMASK;
 
 	if (!tcp_opt->saw_tstamp)  {
@@ -246,20 +243,17 @@ bool cookie_check_timestamp(struct tcp_options_received *tcp_opt)
 	if (!sysctl_tcp_timestamps)
 		return false;
 
-	tcp_opt->snd_wscale = options & 0xf;
-	options >>= 4;
-	tcp_opt->rcv_wscale = options & 0xf;
-
 	tcp_opt->sack_ok = (options >> 4) & 0x1;
 
 	if (tcp_opt->sack_ok && !sysctl_tcp_sack)
 		return false;
 
-	if (tcp_opt->snd_wscale || tcp_opt->rcv_wscale) {
-		tcp_opt->wscale_ok = 1;
-		return sysctl_tcp_window_scaling != 0;
-	}
-	return true;
+	if ((options & 0xf) == 0xf)
+		return true; /* no window scaling */
+
+	tcp_opt->wscale_ok = 1;
+	tcp_opt->snd_wscale = options & 0xf;
+	return sysctl_tcp_window_scaling != 0;
 }
 EXPORT_SYMBOL(cookie_check_timestamp);
 
@@ -313,7 +307,6 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb,
 	ireq->rmt_addr		= ip_hdr(skb)->saddr;
 	ireq->ecn_ok		= 0;
 	ireq->snd_wscale	= tcp_opt.snd_wscale;
-	ireq->rcv_wscale	= tcp_opt.rcv_wscale;
 	ireq->sack_ok		= tcp_opt.sack_ok;
 	ireq->wscale_ok		= tcp_opt.wscale_ok;
 	ireq->tstamp_ok		= tcp_opt.saw_tstamp;
diff --git a/net/ipv6/syncookies.c b/net/ipv6/syncookies.c
index c7ee57421ece..84d818cfae17 100644
--- a/net/ipv6/syncookies.c
+++ b/net/ipv6/syncookies.c
@@ -217,7 +217,6 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb)
 	req->retrans = 0;
 	ireq->ecn_ok		= 0;
 	ireq->snd_wscale	= tcp_opt.snd_wscale;
-	ireq->rcv_wscale	= tcp_opt.rcv_wscale;
 	ireq->sack_ok		= tcp_opt.sack_ok;
 	ireq->wscale_ok		= tcp_opt.wscale_ok;
 	ireq->tstamp_ok		= tcp_opt.saw_tstamp;
-- 
cgit v1.2.2


From 172d69e63c7f1e8300d0e1c1bbd8eb0f630faa15 Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Mon, 21 Jun 2010 11:48:45 +0000
Subject: syncookies: add support for ECN

Allows use of ECN when syncookies are in effect by encoding ecn_ok
into the syn-ack tcp timestamp.

While at it, remove a uneeded #ifdef CONFIG_SYN_COOKIES.
With CONFIG_SYN_COOKIES=nm want_cookie is ifdef'd to 0 and gcc
removes the "if (0)".

Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/syncookies.c | 15 ++++++++++-----
 net/ipv4/tcp_ipv4.c   |  6 ++----
 net/ipv6/syncookies.c |  5 +++--
 net/ipv6/tcp_ipv6.c   |  2 +-
 4 files changed, 16 insertions(+), 12 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c
index 8896329aebd0..650cace2180d 100644
--- a/net/ipv4/syncookies.c
+++ b/net/ipv4/syncookies.c
@@ -19,7 +19,7 @@
 #include <net/route.h>
 
 /* Timestamps: lowest bits store TCP options */
-#define TSBITS 5
+#define TSBITS 6
 #define TSMASK (((__u32)1 << TSBITS) - 1)
 
 extern int sysctl_tcp_syncookies;
@@ -73,6 +73,7 @@ __u32 cookie_init_timestamp(struct request_sock *req)
 
 	options = ireq->wscale_ok ? ireq->snd_wscale : 0xf;
 	options |= ireq->sack_ok << 4;
+	options |= ireq->ecn_ok << 5;
 
 	ts = ts_now & ~TSMASK;
 	ts |= options;
@@ -226,11 +227,11 @@ static inline struct sock *get_cookie_sock(struct sock *sk, struct sk_buff *skb,
  * This extracts these options from the timestamp echo.
  *
  * The lowest 4 bits store snd_wscale.
- * The next lsb is for sack_ok
+ * next 2 bits indicate SACK and ECN support.
  *
  * return false if we decode an option that should not be.
  */
-bool cookie_check_timestamp(struct tcp_options_received *tcp_opt)
+bool cookie_check_timestamp(struct tcp_options_received *tcp_opt, bool *ecn_ok)
 {
 	/* echoed timestamp, lowest bits contain options */
 	u32 options = tcp_opt->rcv_tsecr & TSMASK;
@@ -244,6 +245,9 @@ bool cookie_check_timestamp(struct tcp_options_received *tcp_opt)
 		return false;
 
 	tcp_opt->sack_ok = (options >> 4) & 0x1;
+	*ecn_ok = (options >> 5) & 1;
+	if (*ecn_ok && !sysctl_tcp_ecn)
+		return false;
 
 	if (tcp_opt->sack_ok && !sysctl_tcp_sack)
 		return false;
@@ -272,6 +276,7 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb,
 	int mss;
 	struct rtable *rt;
 	__u8 rcv_wscale;
+	bool ecn_ok;
 
 	if (!sysctl_tcp_syncookies || !th->ack || th->rst)
 		goto out;
@@ -288,7 +293,7 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb,
 	memset(&tcp_opt, 0, sizeof(tcp_opt));
 	tcp_parse_options(skb, &tcp_opt, &hash_location, 0);
 
-	if (!cookie_check_timestamp(&tcp_opt))
+	if (!cookie_check_timestamp(&tcp_opt, &ecn_ok))
 		goto out;
 
 	ret = NULL;
@@ -305,7 +310,7 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb,
 	ireq->rmt_port		= th->source;
 	ireq->loc_addr		= ip_hdr(skb)->daddr;
 	ireq->rmt_addr		= ip_hdr(skb)->saddr;
-	ireq->ecn_ok		= 0;
+	ireq->ecn_ok		= ecn_ok;
 	ireq->snd_wscale	= tcp_opt.snd_wscale;
 	ireq->sack_ok		= tcp_opt.sack_ok;
 	ireq->wscale_ok		= tcp_opt.wscale_ok;
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 2e41e6f92968..8fa32f5ae2ce 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -1328,14 +1328,12 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
 	if (security_inet_conn_request(sk, skb, req))
 		goto drop_and_free;
 
-	if (!want_cookie)
+	if (!want_cookie || tmp_opt.tstamp_ok)
 		TCP_ECN_create_request(req, tcp_hdr(skb));
 
 	if (want_cookie) {
-#ifdef CONFIG_SYN_COOKIES
-		req->cookie_ts = tmp_opt.tstamp_ok;
-#endif
 		isn = cookie_v4_init_sequence(sk, skb, &req->mss);
+		req->cookie_ts = tmp_opt.tstamp_ok;
 	} else if (!isn) {
 		struct inet_peer *peer = NULL;
 
diff --git a/net/ipv6/syncookies.c b/net/ipv6/syncookies.c
index 84d818cfae17..09fd34f0dbf2 100644
--- a/net/ipv6/syncookies.c
+++ b/net/ipv6/syncookies.c
@@ -164,6 +164,7 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb)
 	int mss;
 	struct dst_entry *dst;
 	__u8 rcv_wscale;
+	bool ecn_ok;
 
 	if (!sysctl_tcp_syncookies || !th->ack || th->rst)
 		goto out;
@@ -180,7 +181,7 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb)
 	memset(&tcp_opt, 0, sizeof(tcp_opt));
 	tcp_parse_options(skb, &tcp_opt, &hash_location, 0);
 
-	if (!cookie_check_timestamp(&tcp_opt))
+	if (!cookie_check_timestamp(&tcp_opt, &ecn_ok))
 		goto out;
 
 	ret = NULL;
@@ -215,7 +216,7 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb)
 
 	req->expires = 0UL;
 	req->retrans = 0;
-	ireq->ecn_ok		= 0;
+	ireq->ecn_ok		= ecn_ok;
 	ireq->snd_wscale	= tcp_opt.snd_wscale;
 	ireq->sack_ok		= tcp_opt.sack_ok;
 	ireq->wscale_ok		= tcp_opt.wscale_ok;
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index f87534569366..5ebc27ecebdc 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -1269,7 +1269,7 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
 	treq = inet6_rsk(req);
 	ipv6_addr_copy(&treq->rmt_addr, &ipv6_hdr(skb)->saddr);
 	ipv6_addr_copy(&treq->loc_addr, &ipv6_hdr(skb)->daddr);
-	if (!want_cookie)
+	if (!want_cookie || tmp_opt.tstamp_ok)
 		TCP_ECN_create_request(req, tcp_hdr(skb));
 
 	if (!isn) {
-- 
cgit v1.2.2


From cf377eb4aeded926375d4d0fe0b66ba95f0521e1 Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Mon, 28 Jun 2010 14:12:41 +0200
Subject: netfilter: ipt_LOG/ip6t_LOG: remove comparison within loop

Remove the comparison within the loop to print the macheader by prepending
the colon to all but the first printk.

Based on suggestion by Jan Engelhardt <jengelh@medozas.de>.

Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 net/ipv4/netfilter/ipt_LOG.c  | 12 ++++++------
 net/ipv6/netfilter/ip6t_LOG.c |  6 +++---
 2 files changed, 9 insertions(+), 9 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/netfilter/ipt_LOG.c b/net/ipv4/netfilter/ipt_LOG.c
index 5234f4f3499a..0a452a54adbe 100644
--- a/net/ipv4/netfilter/ipt_LOG.c
+++ b/net/ipv4/netfilter/ipt_LOG.c
@@ -411,12 +411,12 @@ ipt_log_packet(u_int8_t pf,
 		    skb->mac_header != skb->network_header) {
 			int i;
 			const unsigned char *p = skb_mac_header(skb);
-			for (i = 0; i < skb->dev->hard_header_len; i++,p++)
-				printk("%02x%c", *p,
-				       i==skb->dev->hard_header_len - 1
-				       ? ' ':':');
-		} else
-			printk(" ");
+
+			printk("%02x", *p++);
+			for (i = 1; i < skb->dev->hard_header_len; i++, p++)
+				printk(":%02x", *p);
+		}
+		printk(" ");
 	}
 
 	dump_packet(loginfo, skb, 0);
diff --git a/net/ipv6/netfilter/ip6t_LOG.c b/net/ipv6/netfilter/ip6t_LOG.c
index af4ee11f2066..4c7ddac7c62b 100644
--- a/net/ipv6/netfilter/ip6t_LOG.c
+++ b/net/ipv6/netfilter/ip6t_LOG.c
@@ -414,9 +414,9 @@ ip6t_log_packet(u_int8_t pf,
 				p = NULL;
 
 			if (p != NULL) {
-				for (i = 0; i < len; i++)
-					printk("%02x%s", p[i],
-					       i == len - 1 ? "" : ":");
+				printk("%02x", *p++);
+				for (i = 1; i < len; i++)
+					printk(":%02x", p[i]);
 			}
 			printk(" ");
 
-- 
cgit v1.2.2


From 7eb9282cd0efac08b8377cbd5037ba297c77e3f7 Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Mon, 28 Jun 2010 14:16:08 +0200
Subject: netfilter: ipt_LOG/ip6t_LOG: add option to print decoded MAC header

The LOG targets print the entire MAC header as one long string, which is not
readable very well:

IN=eth0 OUT= MAC=00:15:f2:24:91:f8:00:1b:24:dc:61:e6:08:00 ...

Add an option to decode known header formats (currently just ARPHRD_ETHER devices)
in their individual fields:

IN=eth0 OUT= MACSRC=00:1b:24:dc:61:e6 MACDST=00:15:f2:24:91:f8 MACPROTO=0800 ...
IN=eth0 OUT= MACSRC=00:1b:24:dc:61:e6 MACDST=00:15:f2:24:91:f8 MACPROTO=86dd ...

The option needs to be explicitly enabled by userspace to avoid breaking
existing parsers.

Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 net/ipv4/netfilter/ipt_LOG.c  | 54 +++++++++++++++++++++--------
 net/ipv6/netfilter/ip6t_LOG.c | 81 ++++++++++++++++++++++++++++---------------
 2 files changed, 93 insertions(+), 42 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/netfilter/ipt_LOG.c b/net/ipv4/netfilter/ipt_LOG.c
index 0a452a54adbe..915fc17d7ce2 100644
--- a/net/ipv4/netfilter/ipt_LOG.c
+++ b/net/ipv4/netfilter/ipt_LOG.c
@@ -13,6 +13,7 @@
 #include <linux/module.h>
 #include <linux/spinlock.h>
 #include <linux/skbuff.h>
+#include <linux/if_arp.h>
 #include <linux/ip.h>
 #include <net/icmp.h>
 #include <net/udp.h>
@@ -363,6 +364,42 @@ static void dump_packet(const struct nf_loginfo *info,
 	/* maxlen = 230+   91  + 230 + 252 = 803 */
 }
 
+static void dump_mac_header(const struct nf_loginfo *info,
+			    const struct sk_buff *skb)
+{
+	struct net_device *dev = skb->dev;
+	unsigned int logflags = 0;
+
+	if (info->type == NF_LOG_TYPE_LOG)
+		logflags = info->u.log.logflags;
+
+	if (!(logflags & IPT_LOG_MACDECODE))
+		goto fallback;
+
+	switch (dev->type) {
+	case ARPHRD_ETHER:
+		printk("MACSRC=%pM MACDST=%pM MACPROTO=%04x ",
+		       eth_hdr(skb)->h_source, eth_hdr(skb)->h_dest,
+		       ntohs(eth_hdr(skb)->h_proto));
+		return;
+	default:
+		break;
+	}
+
+fallback:
+	printk("MAC=");
+	if (dev->hard_header_len &&
+	    skb->mac_header != skb->network_header) {
+		const unsigned char *p = skb_mac_header(skb);
+		unsigned int i;
+
+		printk("%02x", *p++);
+		for (i = 1; i < dev->hard_header_len; i++, p++)
+			printk(":%02x", *p);
+	}
+	printk(" ");
+}
+
 static struct nf_loginfo default_loginfo = {
 	.type	= NF_LOG_TYPE_LOG,
 	.u = {
@@ -404,20 +441,9 @@ ipt_log_packet(u_int8_t pf,
 	}
 #endif
 
-	if (in && !out) {
-		/* MAC logging for input chain only. */
-		printk("MAC=");
-		if (skb->dev && skb->dev->hard_header_len &&
-		    skb->mac_header != skb->network_header) {
-			int i;
-			const unsigned char *p = skb_mac_header(skb);
-
-			printk("%02x", *p++);
-			for (i = 1; i < skb->dev->hard_header_len; i++, p++)
-				printk(":%02x", *p);
-		}
-		printk(" ");
-	}
+	/* MAC logging for input path only. */
+	if (in && !out)
+		dump_mac_header(loginfo, skb);
 
 	dump_packet(loginfo, skb, 0);
 	printk("\n");
diff --git a/net/ipv6/netfilter/ip6t_LOG.c b/net/ipv6/netfilter/ip6t_LOG.c
index 4c7ddac7c62b..0a07ae7b933f 100644
--- a/net/ipv6/netfilter/ip6t_LOG.c
+++ b/net/ipv6/netfilter/ip6t_LOG.c
@@ -373,6 +373,56 @@ static void dump_packet(const struct nf_loginfo *info,
 		printk("MARK=0x%x ", skb->mark);
 }
 
+static void dump_mac_header(const struct nf_loginfo *info,
+			    const struct sk_buff *skb)
+{
+	struct net_device *dev = skb->dev;
+	unsigned int logflags = 0;
+
+	if (info->type == NF_LOG_TYPE_LOG)
+		logflags = info->u.log.logflags;
+
+	if (!(logflags & IP6T_LOG_MACDECODE))
+		goto fallback;
+
+	switch (dev->type) {
+	case ARPHRD_ETHER:
+		printk("MACSRC=%pM MACDST=%pM MACPROTO=%04x ",
+		       eth_hdr(skb)->h_source, eth_hdr(skb)->h_dest,
+		       ntohs(eth_hdr(skb)->h_proto));
+		return;
+	default:
+		break;
+	}
+
+fallback:
+	printk("MAC=");
+	if (dev->hard_header_len &&
+	    skb->mac_header != skb->network_header) {
+		const unsigned char *p = skb_mac_header(skb);
+		unsigned int len = dev->hard_header_len;
+		unsigned int i;
+
+		if (dev->type == ARPHRD_SIT &&
+		    (p -= ETH_HLEN) < skb->head)
+			p = NULL;
+
+		if (p != NULL) {
+			printk("%02x", *p++);
+			for (i = 1; i < len; i++)
+				printk(":%02x", p[i]);
+		}
+		printk(" ");
+
+		if (dev->type == ARPHRD_SIT) {
+			const struct iphdr *iph =
+				(struct iphdr *)skb_mac_header(skb);
+			printk("TUNNEL=%pI4->%pI4 ", &iph->saddr, &iph->daddr);
+		}
+	} else
+		printk(" ");
+}
+
 static struct nf_loginfo default_loginfo = {
 	.type	= NF_LOG_TYPE_LOG,
 	.u = {
@@ -400,35 +450,10 @@ ip6t_log_packet(u_int8_t pf,
 		prefix,
 		in ? in->name : "",
 		out ? out->name : "");
-	if (in && !out) {
-		unsigned int len;
-		/* MAC logging for input chain only. */
-		printk("MAC=");
-		if (skb->dev && (len = skb->dev->hard_header_len) &&
-		    skb->mac_header != skb->network_header) {
-			const unsigned char *p = skb_mac_header(skb);
-			int i;
-
-			if (skb->dev->type == ARPHRD_SIT &&
-			    (p -= ETH_HLEN) < skb->head)
-				p = NULL;
-
-			if (p != NULL) {
-				printk("%02x", *p++);
-				for (i = 1; i < len; i++)
-					printk(":%02x", p[i]);
-			}
-			printk(" ");
 
-			if (skb->dev->type == ARPHRD_SIT) {
-				const struct iphdr *iph =
-					(struct iphdr *)skb_mac_header(skb);
-				printk("TUNNEL=%pI4->%pI4 ",
-				       &iph->saddr, &iph->daddr);
-			}
-		} else
-			printk(" ");
-	}
+	/* MAC logging for input path only. */
+	if (in && !out)
+		dump_mac_header(loginfo, skb);
 
 	dump_packet(loginfo, skb, skb_network_offset(skb), 1);
 	printk("\n");
-- 
cgit v1.2.2


From 5548a8a1138c96e3e6f803c9f2c1f9389c2f0ee6 Mon Sep 17 00:00:00 2001
From: "John W. Linville" <linville@tuxdriver.com>
Date: Thu, 24 Jun 2010 14:25:56 -0400
Subject: mac80211: use netif_receive_skb in ieee80211_rx callpath

This avoids the extra queueing from calling netif_rx.

Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/rx.c | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c
index ab58a5d66d2e..d70e1a9c4354 100644
--- a/net/mac80211/rx.c
+++ b/net/mac80211/rx.c
@@ -293,7 +293,7 @@ ieee80211_rx_monitor(struct ieee80211_local *local, struct sk_buff *origskb,
 			skb2 = skb_clone(skb, GFP_ATOMIC);
 			if (skb2) {
 				skb2->dev = prev_dev;
-				netif_rx(skb2);
+				netif_receive_skb(skb2);
 			}
 		}
 
@@ -304,7 +304,7 @@ ieee80211_rx_monitor(struct ieee80211_local *local, struct sk_buff *origskb,
 
 	if (prev_dev) {
 		skb->dev = prev_dev;
-		netif_rx(skb);
+		netif_receive_skb(skb);
 	} else
 		dev_kfree_skb(skb);
 
@@ -1578,7 +1578,7 @@ ieee80211_deliver_skb(struct ieee80211_rx_data *rx)
 			/* deliver to local stack */
 			skb->protocol = eth_type_trans(skb, dev);
 			memset(skb->cb, 0, sizeof(skb->cb));
-			netif_rx(skb);
+			netif_receive_skb(skb);
 		}
 	}
 
@@ -2244,7 +2244,7 @@ static void ieee80211_rx_cooked_monitor(struct ieee80211_rx_data *rx,
 			skb2 = skb_clone(skb, GFP_ATOMIC);
 			if (skb2) {
 				skb2->dev = prev_dev;
-				netif_rx(skb2);
+				netif_receive_skb(skb2);
 			}
 		}
 
@@ -2255,7 +2255,7 @@ static void ieee80211_rx_cooked_monitor(struct ieee80211_rx_data *rx,
 
 	if (prev_dev) {
 		skb->dev = prev_dev;
-		netif_rx(skb);
+		netif_receive_skb(skb);
 		skb = NULL;
 	} else
 		goto out_free_skb;
-- 
cgit v1.2.2


From 5ed3bc7288487bd4f891f420a07319e0b538b4fe Mon Sep 17 00:00:00 2001
From: "John W. Linville" <linville@tuxdriver.com>
Date: Thu, 24 Jun 2010 14:38:30 -0400
Subject: mac80211: use netif_receive_skb in ieee80211_tx_status callpath

This avoids the extra queueing from calling netif_rx.

Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/status.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/status.c b/net/mac80211/status.c
index 34da67995d94..10caec5ea8fa 100644
--- a/net/mac80211/status.c
+++ b/net/mac80211/status.c
@@ -377,7 +377,7 @@ void ieee80211_tx_status(struct ieee80211_hw *hw, struct sk_buff *skb)
 				skb2 = skb_clone(skb, GFP_ATOMIC);
 				if (skb2) {
 					skb2->dev = prev_dev;
-					netif_rx(skb2);
+					netif_receive_skb(skb2);
 				}
 			}
 
@@ -386,7 +386,7 @@ void ieee80211_tx_status(struct ieee80211_hw *hw, struct sk_buff *skb)
 	}
 	if (prev_dev) {
 		skb->dev = prev_dev;
-		netif_rx(skb);
+		netif_receive_skb(skb);
 		skb = NULL;
 	}
 	rcu_read_unlock();
-- 
cgit v1.2.2


From 38bdb650f902d275d6b2f9c2d8247fd960525583 Mon Sep 17 00:00:00 2001
From: Felix Fietkau <nbd@openwrt.org>
Date: Fri, 25 Jun 2010 01:44:33 +0200
Subject: mac80211: fix the for_each_sta_info macro

Because of an ambiguity in the for_each_sta_info macro, it can
currently only be used if the third parameter is set to 'sta'.
Fix this by renaming the parameter to '_sta'.

Signed-off-by: Felix Fietkau <nbd@openwrt.org>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/sta_info.h | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/sta_info.h b/net/mac80211/sta_info.h
index 10d0fcb417ae..54262e72376d 100644
--- a/net/mac80211/sta_info.h
+++ b/net/mac80211/sta_info.h
@@ -427,20 +427,20 @@ void for_each_sta_info_type_check(struct ieee80211_local *local,
 {
 }
 
-#define for_each_sta_info(local, _addr, sta, nxt) 			\
+#define for_each_sta_info(local, _addr, _sta, nxt) 			\
 	for (	/* initialise loop */					\
-		sta = rcu_dereference(local->sta_hash[STA_HASH(_addr)]),\
-		nxt = sta ? rcu_dereference(sta->hnext) : NULL;		\
+		_sta = rcu_dereference(local->sta_hash[STA_HASH(_addr)]),\
+		nxt = _sta ? rcu_dereference(_sta->hnext) : NULL;	\
 		/* typecheck */						\
-		for_each_sta_info_type_check(local, (_addr), sta, nxt),	\
+		for_each_sta_info_type_check(local, (_addr), _sta, nxt),\
 		/* continue condition */				\
-		sta;							\
+		_sta;							\
 		/* advance loop */					\
-		sta = nxt,						\
-		nxt = sta ? rcu_dereference(sta->hnext) : NULL		\
+		_sta = nxt,						\
+		nxt = _sta ? rcu_dereference(_sta->hnext) : NULL	\
 	     )								\
 	/* compare address and run code only if it matches */		\
-	if (memcmp(sta->sta.addr, (_addr), ETH_ALEN) == 0)
+	if (memcmp(_sta->sta.addr, (_addr), ETH_ALEN) == 0)
 
 /*
  * Get STA info by index, BROKEN!
-- 
cgit v1.2.2


From 7a9b2d59507d85569b8a456688ef40cf2ac73e48 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Thu, 24 Jun 2010 00:52:37 +0000
Subject: net: use this_cpu_ptr()

use this_cpu_ptr(p) instead of per_cpu_ptr(p, smp_processor_id())

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/flow.c     | 4 ++--
 net/ipv4/ip_input.c | 2 +-
 net/ipv4/tcp.c      | 2 +-
 3 files changed, 4 insertions(+), 4 deletions(-)

(limited to 'net')

diff --git a/net/core/flow.c b/net/core/flow.c
index 161900674009..8c7c91a32f18 100644
--- a/net/core/flow.c
+++ b/net/core/flow.c
@@ -222,7 +222,7 @@ flow_cache_lookup(struct net *net, struct flowi *key, u16 family, u8 dir,
 	unsigned int hash;
 
 	local_bh_disable();
-	fcp = per_cpu_ptr(fc->percpu, smp_processor_id());
+	fcp = this_cpu_ptr(fc->percpu);
 
 	fle = NULL;
 	flo = NULL;
@@ -302,7 +302,7 @@ static void flow_cache_flush_tasklet(unsigned long data)
 	LIST_HEAD(gc_list);
 	int i, deleted = 0;
 
-	fcp = per_cpu_ptr(fc->percpu, smp_processor_id());
+	fcp = this_cpu_ptr(fc->percpu);
 	for (i = 0; i < flow_cache_hash_size(fc); i++) {
 		hlist_for_each_entry_safe(fle, entry, tmp,
 					  &fcp->hash_table[i], u.hlist) {
diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c
index db47a5a00ed2..d859bcc26cb7 100644
--- a/net/ipv4/ip_input.c
+++ b/net/ipv4/ip_input.c
@@ -342,7 +342,7 @@ static int ip_rcv_finish(struct sk_buff *skb)
 
 #ifdef CONFIG_NET_CLS_ROUTE
 	if (unlikely(skb_dst(skb)->tclassid)) {
-		struct ip_rt_acct *st = per_cpu_ptr(ip_rt_acct, smp_processor_id());
+		struct ip_rt_acct *st = this_cpu_ptr(ip_rt_acct);
 		u32 idx = skb_dst(skb)->tclassid;
 		st[idx&0xFF].o_packets++;
 		st[idx&0xFF].o_bytes += skb->len;
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index d5f84bd5a455..4e6ddfbab09e 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -2962,7 +2962,7 @@ struct tcp_md5sig_pool *tcp_get_md5sig_pool(void)
 	spin_unlock(&tcp_md5sig_pool_lock);
 
 	if (p)
-		return *per_cpu_ptr(p, smp_processor_id());
+		return *this_cpu_ptr(p);
 
 	local_bh_enable();
 	return NULL;
-- 
cgit v1.2.2


From 9618e2ffd78aaa43a5815e1dd456b4dd95f9e53b Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Thu, 24 Jun 2010 00:55:06 +0000
Subject: vlan: 64 bit rx counters

Use u64_stats_sync infrastructure to implement 64bit rx stats.

(tx stats are addressed later)

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Acked-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/8021q/vlan.h      | 13 ++++++++-----
 net/8021q/vlan_core.c |  7 ++++---
 net/8021q/vlan_dev.c  | 46 +++++++++++++++++++++++++++++-----------------
 3 files changed, 41 insertions(+), 25 deletions(-)

(limited to 'net')

diff --git a/net/8021q/vlan.h b/net/8021q/vlan.h
index 6abdcac1b2e8..8d9503ad01da 100644
--- a/net/8021q/vlan.h
+++ b/net/8021q/vlan.h
@@ -2,6 +2,7 @@
 #define __BEN_VLAN_802_1Q_INC__
 
 #include <linux/if_vlan.h>
+#include <linux/u64_stats_sync.h>
 
 
 /**
@@ -21,14 +22,16 @@ struct vlan_priority_tci_mapping {
  *	struct vlan_rx_stats - VLAN percpu rx stats
  *	@rx_packets: number of received packets
  *	@rx_bytes: number of received bytes
- *	@multicast: number of received multicast packets
+ *	@rx_multicast: number of received multicast packets
+ *	@syncp: synchronization point for 64bit counters
  *	@rx_errors: number of errors
  */
 struct vlan_rx_stats {
-	unsigned long rx_packets;
-	unsigned long rx_bytes;
-	unsigned long multicast;
-	unsigned long rx_errors;
+	u64			rx_packets;
+	u64			rx_bytes;
+	u64			rx_multicast;
+	struct u64_stats_sync	syncp;
+	unsigned long		rx_errors;
 };
 
 /**
diff --git a/net/8021q/vlan_core.c b/net/8021q/vlan_core.c
index 50f58f5f1c34..1b9406a31f0c 100644
--- a/net/8021q/vlan_core.c
+++ b/net/8021q/vlan_core.c
@@ -41,9 +41,9 @@ int vlan_hwaccel_do_receive(struct sk_buff *skb)
 	skb->priority = vlan_get_ingress_priority(dev, skb->vlan_tci);
 	skb->vlan_tci = 0;
 
-	rx_stats = per_cpu_ptr(vlan_dev_info(dev)->vlan_rx_stats,
-			       smp_processor_id());
+	rx_stats = this_cpu_ptr(vlan_dev_info(dev)->vlan_rx_stats);
 
+	u64_stats_update_begin(&rx_stats->syncp);
 	rx_stats->rx_packets++;
 	rx_stats->rx_bytes += skb->len;
 
@@ -51,7 +51,7 @@ int vlan_hwaccel_do_receive(struct sk_buff *skb)
 	case PACKET_BROADCAST:
 		break;
 	case PACKET_MULTICAST:
-		rx_stats->multicast++;
+		rx_stats->rx_multicast++;
 		break;
 	case PACKET_OTHERHOST:
 		/* Our lower layer thinks this is not local, let's make sure.
@@ -62,6 +62,7 @@ int vlan_hwaccel_do_receive(struct sk_buff *skb)
 			skb->pkt_type = PACKET_HOST;
 		break;
 	}
+	u64_stats_update_end(&rx_stats->syncp);
 	return 0;
 }
 
diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c
index 529842677817..c6456cb842fa 100644
--- a/net/8021q/vlan_dev.c
+++ b/net/8021q/vlan_dev.c
@@ -166,6 +166,7 @@ int vlan_skb_recv(struct sk_buff *skb, struct net_device *dev,
 
 	rx_stats = per_cpu_ptr(vlan_dev_info(skb->dev)->vlan_rx_stats,
 			       smp_processor_id());
+	u64_stats_update_begin(&rx_stats->syncp);
 	rx_stats->rx_packets++;
 	rx_stats->rx_bytes += skb->len;
 
@@ -182,7 +183,7 @@ int vlan_skb_recv(struct sk_buff *skb, struct net_device *dev,
 		break;
 
 	case PACKET_MULTICAST:
-		rx_stats->multicast++;
+		rx_stats->rx_multicast++;
 		break;
 
 	case PACKET_OTHERHOST:
@@ -197,6 +198,7 @@ int vlan_skb_recv(struct sk_buff *skb, struct net_device *dev,
 	default:
 		break;
 	}
+	u64_stats_update_end(&rx_stats->syncp);
 
 	vlan_set_encap_proto(skb, vhdr);
 
@@ -801,27 +803,37 @@ static u32 vlan_ethtool_get_flags(struct net_device *dev)
 	return dev_ethtool_get_flags(vlan->real_dev);
 }
 
-static struct net_device_stats *vlan_dev_get_stats(struct net_device *dev)
+static struct rtnl_link_stats64 *vlan_dev_get_stats64(struct net_device *dev)
 {
-	struct net_device_stats *stats = &dev->stats;
+	struct rtnl_link_stats64 *stats = &dev->stats64;
 
-	dev_txq_stats_fold(dev, stats);
+	dev_txq_stats_fold(dev, &dev->stats);
 
 	if (vlan_dev_info(dev)->vlan_rx_stats) {
-		struct vlan_rx_stats *p, rx = {0};
+		struct vlan_rx_stats *p, accum = {0};
 		int i;
 
 		for_each_possible_cpu(i) {
+			u64 rxpackets, rxbytes, rxmulticast;
+			unsigned int start;
+
 			p = per_cpu_ptr(vlan_dev_info(dev)->vlan_rx_stats, i);
-			rx.rx_packets += p->rx_packets;
-			rx.rx_bytes   += p->rx_bytes;
-			rx.rx_errors  += p->rx_errors;
-			rx.multicast  += p->multicast;
+			do {
+				start = u64_stats_fetch_begin_bh(&p->syncp);
+				rxpackets	= p->rx_packets;
+				rxbytes		= p->rx_bytes;
+				rxmulticast	= p->rx_multicast;
+			} while (u64_stats_fetch_retry_bh(&p->syncp, start));
+			accum.rx_packets += rxpackets;
+			accum.rx_bytes   += rxbytes;
+			accum.rx_multicast += rxmulticast;
+			/* rx_errors is an ulong, not protected by syncp */
+			accum.rx_errors  += p->rx_errors;
 		}
-		stats->rx_packets = rx.rx_packets;
-		stats->rx_bytes   = rx.rx_bytes;
-		stats->rx_errors  = rx.rx_errors;
-		stats->multicast  = rx.multicast;
+		stats->rx_packets = accum.rx_packets;
+		stats->rx_bytes   = accum.rx_bytes;
+		stats->rx_errors  = accum.rx_errors;
+		stats->multicast  = accum.rx_multicast;
 	}
 	return stats;
 }
@@ -848,7 +860,7 @@ static const struct net_device_ops vlan_netdev_ops = {
 	.ndo_change_rx_flags	= vlan_dev_change_rx_flags,
 	.ndo_do_ioctl		= vlan_dev_ioctl,
 	.ndo_neigh_setup	= vlan_dev_neigh_setup,
-	.ndo_get_stats		= vlan_dev_get_stats,
+	.ndo_get_stats64	= vlan_dev_get_stats64,
 #if defined(CONFIG_FCOE) || defined(CONFIG_FCOE_MODULE)
 	.ndo_fcoe_ddp_setup	= vlan_dev_fcoe_ddp_setup,
 	.ndo_fcoe_ddp_done	= vlan_dev_fcoe_ddp_done,
@@ -872,7 +884,7 @@ static const struct net_device_ops vlan_netdev_accel_ops = {
 	.ndo_change_rx_flags	= vlan_dev_change_rx_flags,
 	.ndo_do_ioctl		= vlan_dev_ioctl,
 	.ndo_neigh_setup	= vlan_dev_neigh_setup,
-	.ndo_get_stats		= vlan_dev_get_stats,
+	.ndo_get_stats64	= vlan_dev_get_stats64,
 #if defined(CONFIG_FCOE) || defined(CONFIG_FCOE_MODULE)
 	.ndo_fcoe_ddp_setup	= vlan_dev_fcoe_ddp_setup,
 	.ndo_fcoe_ddp_done	= vlan_dev_fcoe_ddp_done,
@@ -897,7 +909,7 @@ static const struct net_device_ops vlan_netdev_ops_sq = {
 	.ndo_change_rx_flags	= vlan_dev_change_rx_flags,
 	.ndo_do_ioctl		= vlan_dev_ioctl,
 	.ndo_neigh_setup	= vlan_dev_neigh_setup,
-	.ndo_get_stats		= vlan_dev_get_stats,
+	.ndo_get_stats64	= vlan_dev_get_stats64,
 #if defined(CONFIG_FCOE) || defined(CONFIG_FCOE_MODULE)
 	.ndo_fcoe_ddp_setup	= vlan_dev_fcoe_ddp_setup,
 	.ndo_fcoe_ddp_done	= vlan_dev_fcoe_ddp_done,
@@ -922,7 +934,7 @@ static const struct net_device_ops vlan_netdev_accel_ops_sq = {
 	.ndo_change_rx_flags	= vlan_dev_change_rx_flags,
 	.ndo_do_ioctl		= vlan_dev_ioctl,
 	.ndo_neigh_setup	= vlan_dev_neigh_setup,
-	.ndo_get_stats		= vlan_dev_get_stats,
+	.ndo_get_stats64	= vlan_dev_get_stats64,
 #if defined(CONFIG_FCOE) || defined(CONFIG_FCOE_MODULE)
 	.ndo_fcoe_ddp_setup	= vlan_dev_fcoe_ddp_setup,
 	.ndo_fcoe_ddp_done	= vlan_dev_fcoe_ddp_done,
-- 
cgit v1.2.2


From c4ead4c595cd54cf7b1a184d4f888ce0d7cce7d4 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Thu, 24 Jun 2010 01:00:22 +0000
Subject: tcp: tso_fragment() might avoid GFP_ATOMIC

We can pass a gfp argument to tso_fragment() and avoid GFP_ATOMIC
allocations sometimes.

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp_output.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 51d316dbb058..25ff62e35a68 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -1460,7 +1460,7 @@ int tcp_may_send_now(struct sock *sk)
  * packet has never been sent out before (and thus is not cloned).
  */
 static int tso_fragment(struct sock *sk, struct sk_buff *skb, unsigned int len,
-			unsigned int mss_now)
+			unsigned int mss_now, gfp_t gfp)
 {
 	struct sk_buff *buff;
 	int nlen = skb->len - len;
@@ -1470,7 +1470,7 @@ static int tso_fragment(struct sock *sk, struct sk_buff *skb, unsigned int len,
 	if (skb->len != skb->data_len)
 		return tcp_fragment(sk, skb, len, mss_now);
 
-	buff = sk_stream_alloc_skb(sk, 0, GFP_ATOMIC);
+	buff = sk_stream_alloc_skb(sk, 0, gfp);
 	if (unlikely(buff == NULL))
 		return -ENOMEM;
 
@@ -1768,7 +1768,7 @@ static int tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle,
 						    cwnd_quota);
 
 		if (skb->len > limit &&
-		    unlikely(tso_fragment(sk, skb, limit, mss_now)))
+		    unlikely(tso_fragment(sk, skb, limit, mss_now, gfp)))
 			break;
 
 		TCP_SKB_CB(skb)->when = tcp_time_stamp;
-- 
cgit v1.2.2


From 210d6de78c5d7c785fc532556cea340e517955e1 Mon Sep 17 00:00:00 2001
From: Changli Gao <xiaosuo@gmail.com>
Date: Thu, 24 Jun 2010 16:25:12 +0000
Subject: act_mirred: don't clone skb when skb isn't shared

don't clone skb when skb isn't shared

When the tcf_action is TC_ACT_STOLEN, and the skb isn't shared, we don't need
to clone a new skb. As the skb will be freed after this function returns, we
can use it freely once we get a reference to it.

Signed-off-by: Changli Gao <xiaosuo@gmail.com>
----
 include/net/sch_generic.h |   11 +++++++++--
 net/sched/act_mirred.c    |    6 +++---
 2 files changed, 12 insertions(+), 5 deletions(-)
Signed-off-by: Jamal Hadi Salim <hadi@cyberus.ca>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/act_mirred.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'net')

diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c
index c0b6863e3b87..2e9a7b91aa10 100644
--- a/net/sched/act_mirred.c
+++ b/net/sched/act_mirred.c
@@ -169,13 +169,13 @@ static int tcf_mirred(struct sk_buff *skb, struct tc_action *a,
 		goto out;
 	}
 
-	skb2 = skb_act_clone(skb, GFP_ATOMIC);
+	at = G_TC_AT(skb->tc_verd);
+	skb2 = skb_act_clone(skb, GFP_ATOMIC, m->tcf_action);
 	if (skb2 == NULL)
 		goto out;
 
 	m->tcf_bstats.bytes += qdisc_pkt_len(skb2);
 	m->tcf_bstats.packets++;
-	at = G_TC_AT(skb->tc_verd);
 	if (!(at & AT_EGRESS)) {
 		if (m->tcfm_ok_push)
 			skb_push(skb2, skb2->dev->hard_header_len);
@@ -185,8 +185,8 @@ static int tcf_mirred(struct sk_buff *skb, struct tc_action *a,
 	if (m->tcfm_eaction != TCA_EGRESS_MIRROR)
 		skb2->tc_verd = SET_TC_FROM(skb2->tc_verd, at);
 
-	skb2->dev = dev;
 	skb2->skb_iif = skb->dev->ifindex;
+	skb2->dev = dev;
 	dev_queue_xmit(skb2);
 	err = 0;
 
-- 
cgit v1.2.2


From 01eebb53a65996dfbfb892bf5eb21ae831fbe3a6 Mon Sep 17 00:00:00 2001
From: Sjur Braendeland <sjur.brandeland@stericsson.com>
Date: Sat, 26 Jun 2010 11:31:28 +0000
Subject: caif: Kconfig and Makefile fixes

Use "depends on" instead of "if" in Kconfig files.
Fixed CAIF debug flag, and removed unnecessary clean-* options.

Signed-off-by: Sjur Braendeland <sjur.brandeland@stericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/caif/Kconfig  |  7 ++-----
 net/caif/Makefile | 14 ++------------
 2 files changed, 4 insertions(+), 17 deletions(-)

(limited to 'net')

diff --git a/net/caif/Kconfig b/net/caif/Kconfig
index ed651786f16b..529750da9624 100644
--- a/net/caif/Kconfig
+++ b/net/caif/Kconfig
@@ -21,19 +21,18 @@ menuconfig CAIF
 	See Documentation/networking/caif for a further explanation on how to
 	use and configure CAIF.
 
-if CAIF
-
 config  CAIF_DEBUG
 	bool "Enable Debug"
+	depends on CAIF
 	default n
 	--- help ---
 	Enable the inclusion of debug code in the CAIF stack.
 	Be aware that doing this will impact performance.
 	If unsure say N.
 
-
 config CAIF_NETDEV
 	tristate "CAIF GPRS Network device"
+	depends on CAIF
 	default CAIF
 	---help---
 	Say Y if you will be using a CAIF based GPRS network device.
@@ -41,5 +40,3 @@ config CAIF_NETDEV
 	If you select to build it as a built-in then the main CAIF device must
 	also be a built-in.
 	If unsure say Y.
-
-endif
diff --git a/net/caif/Makefile b/net/caif/Makefile
index 34852af2595e..f87481fb0e65 100644
--- a/net/caif/Makefile
+++ b/net/caif/Makefile
@@ -1,23 +1,13 @@
-ifeq ($(CONFIG_CAIF_DEBUG),1)
-CAIF_DBG_FLAGS := -DDEBUG
+ifeq ($(CONFIG_CAIF_DEBUG),y)
+EXTRA_CFLAGS += -DDEBUG
 endif
 
-ccflags-y := $(CAIF_FLAGS) $(CAIF_DBG_FLAGS)
-
 caif-objs := caif_dev.o \
 	cfcnfg.o cfmuxl.o cfctrl.o  \
 	cffrml.o cfveil.o cfdbgl.o\
 	cfserl.o cfdgml.o  \
 	cfrfml.o cfvidl.o cfutill.o \
 	cfsrvl.o cfpkt_skbuff.o caif_config_util.o
-clean-dirs:= .tmp_versions
-
-clean-files:= \
-	Module.symvers \
-	modules.order \
-	*.cmd \
-	*.o \
-	*~
 
 obj-$(CONFIG_CAIF) += caif.o
 obj-$(CONFIG_CAIF_NETDEV) += chnl_net.o
-- 
cgit v1.2.2


From db048b69037e7fa6a7d9e95a1271a50dc08ae233 Mon Sep 17 00:00:00 2001
From: Ben Hutchings <bhutchings@solarflare.com>
Date: Mon, 28 Jun 2010 08:44:07 +0000
Subject: ethtool: Fix potential kernel buffer overflow in ETHTOOL_GRXCLSRLALL

On a 32-bit machine, info.rule_cnt >= 0x40000000 leads to integer
overflow and the buffer may be smaller than needed.  Since
ETHTOOL_GRXCLSRLALL is unprivileged, this can presumably be used for at
least denial of service.

Signed-off-by: Ben Hutchings <bhutchings@solarflare.com>
Cc: stable@kernel.org
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/ethtool.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/core/ethtool.c b/net/core/ethtool.c
index a0f4964033d2..a3a7e9a48dff 100644
--- a/net/core/ethtool.c
+++ b/net/core/ethtool.c
@@ -347,8 +347,9 @@ static noinline_for_stack int ethtool_get_rxnfc(struct net_device *dev,
 
 	if (info.cmd == ETHTOOL_GRXCLSRLALL) {
 		if (info.rule_cnt > 0) {
-			rule_buf = kmalloc(info.rule_cnt * sizeof(u32),
-					   GFP_USER);
+			if (info.rule_cnt <= KMALLOC_MAX_SIZE / sizeof(u32))
+				rule_buf = kmalloc(info.rule_cnt * sizeof(u32),
+						   GFP_USER);
 			if (!rule_buf)
 				return -ENOMEM;
 		}
-- 
cgit v1.2.2


From bf988435bd5b53529f4408a8efb1f433f6ddfda9 Mon Sep 17 00:00:00 2001
From: Ben Hutchings <bhutchings@solarflare.com>
Date: Mon, 28 Jun 2010 08:45:58 +0000
Subject: ethtool: Fix potential user buffer overflow for ETHTOOL_{G, S}RXFH

struct ethtool_rxnfc was originally defined in 2.6.27 for the
ETHTOOL_{G,S}RXFH command with only the cmd, flow_type and data
fields.  It was then extended in 2.6.30 to support various additional
commands.  These commands should have been defined to use a new
structure, but it is too late to change that now.

Since user-space may still be using the old structure definition
for the ETHTOOL_{G,S}RXFH commands, and since they do not need the
additional fields, only copy the originally defined fields to and
from user-space.

Signed-off-by: Ben Hutchings <bhutchings@solarflare.com>
Cc: stable@kernel.org
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/ethtool.c | 36 +++++++++++++++++++++++++++---------
 1 file changed, 27 insertions(+), 9 deletions(-)

(limited to 'net')

diff --git a/net/core/ethtool.c b/net/core/ethtool.c
index a3a7e9a48dff..75e4ffeb8cc9 100644
--- a/net/core/ethtool.c
+++ b/net/core/ethtool.c
@@ -318,23 +318,33 @@ out:
 }
 
 static noinline_for_stack int ethtool_set_rxnfc(struct net_device *dev,
-						void __user *useraddr)
+						u32 cmd, void __user *useraddr)
 {
-	struct ethtool_rxnfc cmd;
+	struct ethtool_rxnfc info;
+	size_t info_size = sizeof(info);
 
 	if (!dev->ethtool_ops->set_rxnfc)
 		return -EOPNOTSUPP;
 
-	if (copy_from_user(&cmd, useraddr, sizeof(cmd)))
+	/* struct ethtool_rxnfc was originally defined for
+	 * ETHTOOL_{G,S}RXFH with only the cmd, flow_type and data
+	 * members.  User-space might still be using that
+	 * definition. */
+	if (cmd == ETHTOOL_SRXFH)
+		info_size = (offsetof(struct ethtool_rxnfc, data) +
+			     sizeof(info.data));
+
+	if (copy_from_user(&info, useraddr, info_size))
 		return -EFAULT;
 
-	return dev->ethtool_ops->set_rxnfc(dev, &cmd);
+	return dev->ethtool_ops->set_rxnfc(dev, &info);
 }
 
 static noinline_for_stack int ethtool_get_rxnfc(struct net_device *dev,
-						void __user *useraddr)
+						u32 cmd, void __user *useraddr)
 {
 	struct ethtool_rxnfc info;
+	size_t info_size = sizeof(info);
 	const struct ethtool_ops *ops = dev->ethtool_ops;
 	int ret;
 	void *rule_buf = NULL;
@@ -342,7 +352,15 @@ static noinline_for_stack int ethtool_get_rxnfc(struct net_device *dev,
 	if (!ops->get_rxnfc)
 		return -EOPNOTSUPP;
 
-	if (copy_from_user(&info, useraddr, sizeof(info)))
+	/* struct ethtool_rxnfc was originally defined for
+	 * ETHTOOL_{G,S}RXFH with only the cmd, flow_type and data
+	 * members.  User-space might still be using that
+	 * definition. */
+	if (cmd == ETHTOOL_GRXFH)
+		info_size = (offsetof(struct ethtool_rxnfc, data) +
+			     sizeof(info.data));
+
+	if (copy_from_user(&info, useraddr, info_size))
 		return -EFAULT;
 
 	if (info.cmd == ETHTOOL_GRXCLSRLALL) {
@@ -360,7 +378,7 @@ static noinline_for_stack int ethtool_get_rxnfc(struct net_device *dev,
 		goto err_out;
 
 	ret = -EFAULT;
-	if (copy_to_user(useraddr, &info, sizeof(info)))
+	if (copy_to_user(useraddr, &info, info_size))
 		goto err_out;
 
 	if (rule_buf) {
@@ -1517,12 +1535,12 @@ int dev_ethtool(struct net *net, struct ifreq *ifr)
 	case ETHTOOL_GRXCLSRLCNT:
 	case ETHTOOL_GRXCLSRULE:
 	case ETHTOOL_GRXCLSRLALL:
-		rc = ethtool_get_rxnfc(dev, useraddr);
+		rc = ethtool_get_rxnfc(dev, ethcmd, useraddr);
 		break;
 	case ETHTOOL_SRXFH:
 	case ETHTOOL_SRXCLSRLDEL:
 	case ETHTOOL_SRXCLSRLINS:
-		rc = ethtool_set_rxnfc(dev, useraddr);
+		rc = ethtool_set_rxnfc(dev, ethcmd, useraddr);
 		break;
 	case ETHTOOL_GGRO:
 		rc = ethtool_get_gro(dev, useraddr);
-- 
cgit v1.2.2


From ff3074a4dd6c0963e6a7eaac48175a62f589c143 Mon Sep 17 00:00:00 2001
From: "John W. Linville" <linville@tuxdriver.com>
Date: Tue, 29 Jun 2010 13:55:04 -0400
Subject: mac80211: remove unnecessary check in ieee80211_dump_survey

This check is duplicated in drv_get_survey.

Reported-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/cfg.c | 3 ---
 1 file changed, 3 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c
index f4efbfa4f237..e55970bf2ba0 100644
--- a/net/mac80211/cfg.c
+++ b/net/mac80211/cfg.c
@@ -413,9 +413,6 @@ static int ieee80211_dump_survey(struct wiphy *wiphy, struct net_device *dev,
 {
 	struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr);
 
-	if (!local->ops->get_survey)
-		return -EOPNOTSUPP;
-
 	return drv_get_survey(local, idx, survey);
 }
 
-- 
cgit v1.2.2


From c466d4efb86bb5aa327d6e43f62781a9faabfdae Mon Sep 17 00:00:00 2001
From: "John W. Linville" <linville@tuxdriver.com>
Date: Tue, 29 Jun 2010 14:51:23 -0400
Subject: mac80211: add basic tracing to drv_get_survey

Reported-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/driver-ops.h   |  7 ++++++-
 net/mac80211/driver-trace.h | 22 ++++++++++++++++++++++
 2 files changed, 28 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/mac80211/driver-ops.h b/net/mac80211/driver-ops.h
index c33317320eee..14123dce544b 100644
--- a/net/mac80211/driver-ops.h
+++ b/net/mac80211/driver-ops.h
@@ -375,9 +375,14 @@ static inline int drv_get_survey(struct ieee80211_local *local, int idx,
 				struct survey_info *survey)
 {
 	int ret = -EOPNOTSUPP;
+
+	trace_drv_get_survey(local, idx, survey);
+
 	if (local->ops->get_survey)
 		ret = local->ops->get_survey(&local->hw, idx, survey);
-	/* trace_drv_get_survey(local, idx, survey, ret); */
+
+	trace_drv_return_int(local, ret);
+
 	return ret;
 }
 
diff --git a/net/mac80211/driver-trace.h b/net/mac80211/driver-trace.h
index 8da31caff931..5d5d2a974668 100644
--- a/net/mac80211/driver-trace.h
+++ b/net/mac80211/driver-trace.h
@@ -761,6 +761,28 @@ TRACE_EVENT(drv_ampdu_action,
 	)
 );
 
+TRACE_EVENT(drv_get_survey,
+	TP_PROTO(struct ieee80211_local *local, int idx,
+		 struct survey_info *survey),
+
+	TP_ARGS(local, idx, survey),
+
+	TP_STRUCT__entry(
+		LOCAL_ENTRY
+		__field(int, idx)
+	),
+
+	TP_fast_assign(
+		LOCAL_ASSIGN;
+		__entry->idx = idx;
+	),
+
+	TP_printk(
+		LOCAL_PR_FMT " idx:%d",
+		LOCAL_PR_ARG, __entry->idx
+	)
+);
+
 TRACE_EVENT(drv_flush,
 	TP_PROTO(struct ieee80211_local *local, bool drop),
 
-- 
cgit v1.2.2


From d1e316891693f3e7266a1df9afda72763248ef7c Mon Sep 17 00:00:00 2001
From: Nicolas Kaiser <nikai@nikai.net>
Date: Sun, 27 Jun 2010 00:00:25 +0000
Subject: net/Makefile: conditionally descend to wireless and ieee802154

Don't descend to wireless and ieee802154 unless they are actually used.

Signed-off-by: Nicolas Kaiser <nikai@nikai.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/Makefile | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/Makefile b/net/Makefile
index cb7bdc1210cb..41d420070a38 100644
--- a/net/Makefile
+++ b/net/Makefile
@@ -50,7 +50,7 @@ endif
 obj-$(CONFIG_IP_DCCP)		+= dccp/
 obj-$(CONFIG_IP_SCTP)		+= sctp/
 obj-$(CONFIG_RDS)		+= rds/
-obj-y				+= wireless/
+obj-$(CONFIG_WIRELESS)		+= wireless/
 obj-$(CONFIG_MAC80211)		+= mac80211/
 obj-$(CONFIG_TIPC)		+= tipc/
 obj-$(CONFIG_NETLABEL)		+= netlabel/
@@ -61,7 +61,7 @@ obj-$(CONFIG_CAIF)		+= caif/
 ifneq ($(CONFIG_DCB),)
 obj-y				+= dcb/
 endif
-obj-y				+= ieee802154/
+obj-$(CONFIG_IEEE802154)	+= ieee802154/
 
 ifeq ($(CONFIG_NET),y)
 obj-$(CONFIG_SYSCTL)		+= sysctl_net.o
-- 
cgit v1.2.2


From f56619fc72407561b00c52244a2caa53d730bc4a Mon Sep 17 00:00:00 2001
From: Ben Hutchings <ben@decadent.org.uk>
Date: Sat, 26 Jun 2010 11:37:47 +0000
Subject: ipv6: Clamp reported valid_lft to a minimum of 0

Since addresses are only revalidated every 2 minutes, the reported
valid_lft can underflow shortly before the address is deleted.
Clamp it to a minimum of 0, as for prefered_lft.

Reported-by: Piotr Lewandowski <piotr.lewandowski@gmail.com>
Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/addrconf.c | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index c20a7c260a8f..51a273fe3d21 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -3495,8 +3495,12 @@ static int inet6_fill_ifaddr(struct sk_buff *skb, struct inet6_ifaddr *ifa,
 				preferred -= tval;
 			else
 				preferred = 0;
-			if (valid != INFINITY_LIFE_TIME)
-				valid -= tval;
+			if (valid != INFINITY_LIFE_TIME) {
+				if (valid > tval)
+					valid -= tval;
+				else
+					valid = 0;
+			}
 		}
 	} else {
 		preferred = INFINITY_LIFE_TIME;
-- 
cgit v1.2.2


From 784e2710ce3588d8316dc8efac9ecbebaeaf7c35 Mon Sep 17 00:00:00 2001
From: Ben Hutchings <ben@decadent.org.uk>
Date: Sat, 26 Jun 2010 11:42:55 +0000
Subject: ipv6: Use interface max_desync_factor instead of static default

max_desync_factor can be configured per-interface, but nothing is
using the value.

Reported-by: Piotr Lewandowski <piotr.lewandowski@gmail.com>
Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/addrconf.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'net')

diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 51a273fe3d21..2514adf5251e 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -121,8 +121,6 @@ static inline void addrconf_sysctl_unregister(struct inet6_dev *idev)
 static int __ipv6_regen_rndid(struct inet6_dev *idev);
 static int __ipv6_try_regen_rndid(struct inet6_dev *idev, struct in6_addr *tmpaddr);
 static void ipv6_regen_rndid(unsigned long data);
-
-static int desync_factor = MAX_DESYNC_FACTOR * HZ;
 #endif
 
 static int ipv6_generate_eui64(u8 *eui, struct net_device *dev);
@@ -893,7 +891,8 @@ retry:
 			      idev->cnf.temp_valid_lft);
 	tmp_prefered_lft = min_t(__u32,
 				 ifp->prefered_lft,
-				 idev->cnf.temp_prefered_lft - desync_factor / HZ);
+				 idev->cnf.temp_prefered_lft -
+				 idev->cnf.max_desync_factor);
 	tmp_plen = ifp->prefix_len;
 	max_addresses = idev->cnf.max_addresses;
 	tmp_cstamp = ifp->cstamp;
@@ -1653,7 +1652,8 @@ static void ipv6_regen_rndid(unsigned long data)
 
 	expires = jiffies +
 		idev->cnf.temp_prefered_lft * HZ -
-		idev->cnf.regen_max_retry * idev->cnf.dad_transmits * idev->nd_parms->retrans_time - desync_factor;
+		idev->cnf.regen_max_retry * idev->cnf.dad_transmits * idev->nd_parms->retrans_time -
+		idev->cnf.max_desync_factor * HZ;
 	if (time_before(expires, jiffies)) {
 		printk(KERN_WARNING
 			"ipv6_regen_rndid(): too short regeneration interval; timer disabled for %s.\n",
-- 
cgit v1.2.2


From 70777d03466e7a8a41b0d34677454c92f4e93d89 Mon Sep 17 00:00:00 2001
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Date: Wed, 30 Jun 2010 10:39:19 -0700
Subject: net/core: use ntohs for skb->protocol

This is only noticed by people that are not doing everything correct in
the first place.

Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/dev.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/core/dev.c b/net/core/dev.c
index 7f390b52caab..e85cc5fa3c4e 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -1537,7 +1537,8 @@ static void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev)
 				if (net_ratelimit())
 					printk(KERN_CRIT "protocol %04x is "
 					       "buggy, dev %s\n",
-					       skb2->protocol, dev->name);
+					       ntohs(skb2->protocol),
+					       dev->name);
 				skb_reset_network_header(skb2);
 			}
 
-- 
cgit v1.2.2


From 92b50c4b5b01d0ba4efcff9e85f7a76b620fe789 Mon Sep 17 00:00:00 2001
From: Helmut Schaa <helmut.schaa@googlemail.com>
Date: Wed, 30 Jun 2010 08:48:42 +0200
Subject: mac82011: Allow selection of minstrel_ht as default rc algorithm

Allow selection of minstrel_ht as default rate control algorithm. At
the moment minstrel_ht can only be requested by the driver code but
not selected as default in make menuconfig. Fix this by using
minstrel_ht when minstrel was selected as default and minstrel_ht
is available.

This change won't affect legacy devices as minstrel_ht falls back to
minstrel in that case.

Signed-off-by: Helmut Schaa <helmut.schaa@googlemail.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/Kconfig | 1 +
 1 file changed, 1 insertion(+)

(limited to 'net')

diff --git a/net/mac80211/Kconfig b/net/mac80211/Kconfig
index 83eec7a8bd1f..4d6f8653ec88 100644
--- a/net/mac80211/Kconfig
+++ b/net/mac80211/Kconfig
@@ -69,6 +69,7 @@ endchoice
 
 config MAC80211_RC_DEFAULT
 	string
+	default "minstrel_ht" if MAC80211_RC_DEFAULT_MINSTREL && MAC80211_RC_MINSTREL_HT
 	default "minstrel" if MAC80211_RC_DEFAULT_MINSTREL
 	default "pid" if MAC80211_RC_DEFAULT_PID
 	default ""
-- 
cgit v1.2.2


From 5acbf7f10b9e336510a1de79b4af06f6da9a8c5a Mon Sep 17 00:00:00 2001
From: Changli Gao <xiaosuo@gmail.com>
Date: Tue, 29 Jun 2010 22:54:58 +0000
Subject: act_mirred: combine duplicate code

act_mirred: combine duplicate code

tcf_bstats is updated in any way, so we can do it earlier to reduce the size of
the code.

Signed-off-by: Changli Gao <xiaosuo@gmail.com>
Signed-off-by: Jamal Hadi Salim <hadi@cyberus.ca>
----
 net/sched/act_mirred.c |    6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/act_mirred.c | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

(limited to 'net')

diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c
index 2e9a7b91aa10..a16b0175f890 100644
--- a/net/sched/act_mirred.c
+++ b/net/sched/act_mirred.c
@@ -160,6 +160,8 @@ static int tcf_mirred(struct sk_buff *skb, struct tc_action *a,
 
 	spin_lock(&m->tcf_lock);
 	m->tcf_tm.lastuse = jiffies;
+	m->tcf_bstats.bytes += qdisc_pkt_len(skb);
+	m->tcf_bstats.packets++;
 
 	dev = m->tcfm_dev;
 	if (!(dev->flags & IFF_UP)) {
@@ -174,8 +176,6 @@ static int tcf_mirred(struct sk_buff *skb, struct tc_action *a,
 	if (skb2 == NULL)
 		goto out;
 
-	m->tcf_bstats.bytes += qdisc_pkt_len(skb2);
-	m->tcf_bstats.packets++;
 	if (!(at & AT_EGRESS)) {
 		if (m->tcfm_ok_push)
 			skb_push(skb2, skb2->dev->hard_header_len);
@@ -193,8 +193,6 @@ static int tcf_mirred(struct sk_buff *skb, struct tc_action *a,
 out:
 	if (err) {
 		m->tcf_qstats.overlimits++;
-		m->tcf_bstats.bytes += qdisc_pkt_len(skb);
-		m->tcf_bstats.packets++;
 		/* should we be asking for packet to be dropped?
 		 * may make sense for redirect case only
 		 */
-- 
cgit v1.2.2


From 504f85c9d05f7c605306e808f0d835fe11bfd18d Mon Sep 17 00:00:00 2001
From: Changli Gao <xiaosuo@gmail.com>
Date: Tue, 29 Jun 2010 23:07:09 +0000
Subject: act_nat: use stack variable

act_nat: use stack variable

structure tc_nat isn't too big for stack, so we can put it in stack.

Signed-off-by: Changli Gao <xiaosuo@gmail.com>
----
 net/sched/act_nat.c |   31 ++++++++++---------------------
 1 file changed, 10 insertions(+), 21 deletions(-)
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/act_nat.c | 31 ++++++++++---------------------
 1 file changed, 10 insertions(+), 21 deletions(-)

(limited to 'net')

diff --git a/net/sched/act_nat.c b/net/sched/act_nat.c
index 570949417f38..0be49a4b4d8c 100644
--- a/net/sched/act_nat.c
+++ b/net/sched/act_nat.c
@@ -265,40 +265,29 @@ static int tcf_nat_dump(struct sk_buff *skb, struct tc_action *a,
 {
 	unsigned char *b = skb_tail_pointer(skb);
 	struct tcf_nat *p = a->priv;
-	struct tc_nat *opt;
+	struct tc_nat opt;
 	struct tcf_t t;
-	int s;
 
-	s = sizeof(*opt);
+	opt.old_addr = p->old_addr;
+	opt.new_addr = p->new_addr;
+	opt.mask = p->mask;
+	opt.flags = p->flags;
 
-	/* netlink spinlocks held above us - must use ATOMIC */
-	opt = kzalloc(s, GFP_ATOMIC);
-	if (unlikely(!opt))
-		return -ENOBUFS;
+	opt.index = p->tcf_index;
+	opt.action = p->tcf_action;
+	opt.refcnt = p->tcf_refcnt - ref;
+	opt.bindcnt = p->tcf_bindcnt - bind;
 
-	opt->old_addr = p->old_addr;
-	opt->new_addr = p->new_addr;
-	opt->mask = p->mask;
-	opt->flags = p->flags;
-
-	opt->index = p->tcf_index;
-	opt->action = p->tcf_action;
-	opt->refcnt = p->tcf_refcnt - ref;
-	opt->bindcnt = p->tcf_bindcnt - bind;
-
-	NLA_PUT(skb, TCA_NAT_PARMS, s, opt);
+	NLA_PUT(skb, TCA_NAT_PARMS, sizeof(opt), &opt);
 	t.install = jiffies_to_clock_t(jiffies - p->tcf_tm.install);
 	t.lastuse = jiffies_to_clock_t(jiffies - p->tcf_tm.lastuse);
 	t.expires = jiffies_to_clock_t(p->tcf_tm.expires);
 	NLA_PUT(skb, TCA_NAT_TM, sizeof(t), &t);
 
-	kfree(opt);
-
 	return skb->len;
 
 nla_put_failure:
 	nlmsg_trim(skb, b);
-	kfree(opt);
 	return -1;
 }
 
-- 
cgit v1.2.2


From 4ce3c183fcade7f4b30a33dae90cd774c3d9e094 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Wed, 30 Jun 2010 13:31:19 -0700
Subject: snmp: 64bit ipstats_mib for all arches

/proc/net/snmp and /proc/net/netstat expose SNMP counters.

Width of these counters is either 32 or 64 bits, depending on the size
of "unsigned long" in kernel.

This means user program parsing these files must already be prepared to
deal with 64bit values, regardless of user program being 32 or 64 bit.

This patch introduces 64bit snmp values for IPSTAT mib, where some
counters can wrap pretty fast if they are 32bit wide.

# netstat -s|egrep "InOctets|OutOctets"
    InOctets: 244068329096
    OutOctets: 244069348848

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/af_inet.c  | 36 ++++++++++++++++++++++++++++++++++++
 net/ipv4/proc.c     | 15 +++++++++------
 net/ipv6/addrconf.c | 18 +++++++++++++++++-
 net/ipv6/proc.c     | 17 ++++++++++++++---
 4 files changed, 76 insertions(+), 10 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index 640db9b90330..3ceb025b16f2 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -1427,6 +1427,42 @@ unsigned long snmp_fold_field(void __percpu *mib[], int offt)
 }
 EXPORT_SYMBOL_GPL(snmp_fold_field);
 
+#if BITS_PER_LONG==32
+
+u64 snmp_fold_field64(void __percpu *mib[], int offt, size_t syncp_offset)
+{
+	u64 res = 0;
+	int cpu;
+
+	for_each_possible_cpu(cpu) {
+		void *bhptr, *userptr;
+		struct u64_stats_sync *syncp;
+		u64 v_bh, v_user;
+		unsigned int start;
+
+		/* first mib used by softirq context, we must use _bh() accessors */
+		bhptr = per_cpu_ptr(SNMP_STAT_BHPTR(mib), cpu);
+		syncp = (struct u64_stats_sync *)(bhptr + syncp_offset);
+		do {
+			start = u64_stats_fetch_begin_bh(syncp);
+			v_bh = *(((u64 *) bhptr) + offt);
+		} while (u64_stats_fetch_retry_bh(syncp, start));
+
+		/* second mib used in USER context */
+		userptr = per_cpu_ptr(SNMP_STAT_USRPTR(mib), cpu);
+		syncp = (struct u64_stats_sync *)(userptr + syncp_offset);
+		do {
+			start = u64_stats_fetch_begin(syncp);
+			v_user = *(((u64 *) userptr) + offt);
+		} while (u64_stats_fetch_retry(syncp, start));
+
+		res += v_bh + v_user;
+	}
+	return res;
+}
+EXPORT_SYMBOL_GPL(snmp_fold_field64);
+#endif
+
 int snmp_mib_init(void __percpu *ptr[2], size_t mibsize, size_t align)
 {
 	BUG_ON(ptr == NULL);
diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c
index e320ca6b3ef3..4ae1f203f7cb 100644
--- a/net/ipv4/proc.c
+++ b/net/ipv4/proc.c
@@ -343,10 +343,12 @@ static int snmp_seq_show(struct seq_file *seq, void *v)
 		   IPV4_DEVCONF_ALL(net, FORWARDING) ? 1 : 2,
 		   sysctl_ip_default_ttl);
 
+	BUILD_BUG_ON(offsetof(struct ipstats_mib, mibs) != 0);
 	for (i = 0; snmp4_ipstats_list[i].name != NULL; i++)
-		seq_printf(seq, " %lu",
-			   snmp_fold_field((void __percpu **)net->mib.ip_statistics,
-					   snmp4_ipstats_list[i].entry));
+		seq_printf(seq, " %llu",
+			   snmp_fold_field64((void __percpu **)net->mib.ip_statistics,
+					     snmp4_ipstats_list[i].entry,
+					     offsetof(struct ipstats_mib, syncp)));
 
 	icmp_put(seq);	/* RFC 2011 compatibility */
 	icmpmsg_put(seq);
@@ -432,9 +434,10 @@ static int netstat_seq_show(struct seq_file *seq, void *v)
 
 	seq_puts(seq, "\nIpExt:");
 	for (i = 0; snmp4_ipextstats_list[i].name != NULL; i++)
-		seq_printf(seq, " %lu",
-			   snmp_fold_field((void __percpu **)net->mib.ip_statistics,
-					   snmp4_ipextstats_list[i].entry));
+		seq_printf(seq, " %llu",
+			   snmp_fold_field64((void __percpu **)net->mib.ip_statistics,
+					     snmp4_ipextstats_list[i].entry,
+					     offsetof(struct ipstats_mib, syncp)));
 
 	seq_putc(seq, '\n');
 	return 0;
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 2514adf5251e..e81155d2f251 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -3862,12 +3862,28 @@ static inline void __snmp6_fill_stats(u64 *stats, void __percpu **mib,
 	memset(&stats[items], 0, pad);
 }
 
+static inline void __snmp6_fill_stats64(u64 *stats, void __percpu **mib,
+				      int items, int bytes, size_t syncpoff)
+{
+	int i;
+	int pad = bytes - sizeof(u64) * items;
+	BUG_ON(pad < 0);
+
+	/* Use put_unaligned() because stats may not be aligned for u64. */
+	put_unaligned(items, &stats[0]);
+	for (i = 1; i < items; i++)
+		put_unaligned(snmp_fold_field64(mib, i, syncpoff), &stats[i]);
+
+	memset(&stats[items], 0, pad);
+}
+
 static void snmp6_fill_stats(u64 *stats, struct inet6_dev *idev, int attrtype,
 			     int bytes)
 {
 	switch (attrtype) {
 	case IFLA_INET6_STATS:
-		__snmp6_fill_stats(stats, (void __percpu **)idev->stats.ipv6, IPSTATS_MIB_MAX, bytes);
+		__snmp6_fill_stats64(stats, (void __percpu **)idev->stats.ipv6,
+				     IPSTATS_MIB_MAX, bytes, offsetof(struct ipstats_mib, syncp));
 		break;
 	case IFLA_INET6_ICMP6STATS:
 		__snmp6_fill_stats(stats, (void __percpu **)idev->stats.icmpv6, ICMP6_MIB_MAX, bytes);
diff --git a/net/ipv6/proc.c b/net/ipv6/proc.c
index 566798d69f37..d082eaeefa25 100644
--- a/net/ipv6/proc.c
+++ b/net/ipv6/proc.c
@@ -174,17 +174,28 @@ static void snmp6_seq_show_item(struct seq_file *seq, void __percpu **mib,
 				const struct snmp_mib *itemlist)
 {
 	int i;
-	for (i=0; itemlist[i].name; i++)
+
+	for (i = 0; itemlist[i].name; i++)
 		seq_printf(seq, "%-32s\t%lu\n", itemlist[i].name,
 			   snmp_fold_field(mib, itemlist[i].entry));
 }
 
+static void snmp6_seq_show_item64(struct seq_file *seq, void __percpu **mib,
+				  const struct snmp_mib *itemlist, size_t syncpoff)
+{
+	int i;
+
+	for (i = 0; itemlist[i].name; i++)
+		seq_printf(seq, "%-32s\t%llu\n", itemlist[i].name,
+			   snmp_fold_field64(mib, itemlist[i].entry, syncpoff));
+}
+
 static int snmp6_seq_show(struct seq_file *seq, void *v)
 {
 	struct net *net = (struct net *)seq->private;
 
-	snmp6_seq_show_item(seq, (void __percpu **)net->mib.ipv6_statistics,
-			    snmp6_ipstats_list);
+	snmp6_seq_show_item64(seq, (void __percpu **)net->mib.ipv6_statistics,
+			    snmp6_ipstats_list, offsetof(struct ipstats_mib, syncp));
 	snmp6_seq_show_item(seq, (void __percpu **)net->mib.icmpv6_statistics,
 			    snmp6_icmp6_list);
 	snmp6_seq_show_icmpv6msg(seq,
-- 
cgit v1.2.2


From d6bebca92c663fb216c072193945946f3807ca7f Mon Sep 17 00:00:00 2001
From: Changli Gao <xiaosuo@gmail.com>
Date: Tue, 29 Jun 2010 04:39:37 +0000
Subject: fragment: add fast path for in-order fragments

add fast path for in-order fragments

As the fragments are sent in order in most of OSes, such as Windows, Darwin and
FreeBSD, it is likely the new fragments are at the end of the inet_frag_queue.
In the fast path, we check if the skb at the end of the inet_frag_queue is the
prev we expect.

Signed-off-by: Changli Gao <xiaosuo@gmail.com>
----
 include/net/inet_frag.h |    1 +
 net/ipv4/ip_fragment.c  |   12 ++++++++++++
 net/ipv6/reassembly.c   |   11 +++++++++++
 3 files changed, 24 insertions(+)
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/ip_fragment.c | 12 ++++++++++++
 net/ipv6/reassembly.c  | 11 +++++++++++
 2 files changed, 23 insertions(+)

(limited to 'net')

diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c
index 858d34648eee..dd0dbf0c6b7f 100644
--- a/net/ipv4/ip_fragment.c
+++ b/net/ipv4/ip_fragment.c
@@ -314,6 +314,7 @@ static int ip_frag_reinit(struct ipq *qp)
 	qp->q.len = 0;
 	qp->q.meat = 0;
 	qp->q.fragments = NULL;
+	qp->q.fragments_tail = NULL;
 	qp->iif = 0;
 
 	return 0;
@@ -386,6 +387,11 @@ static int ip_frag_queue(struct ipq *qp, struct sk_buff *skb)
 	 * in the chain of fragments so far.  We must know where to put
 	 * this fragment, right?
 	 */
+	prev = qp->q.fragments_tail;
+	if (!prev || FRAG_CB(prev)->offset < offset) {
+		next = NULL;
+		goto found;
+	}
 	prev = NULL;
 	for (next = qp->q.fragments; next != NULL; next = next->next) {
 		if (FRAG_CB(next)->offset >= offset)
@@ -393,6 +399,7 @@ static int ip_frag_queue(struct ipq *qp, struct sk_buff *skb)
 		prev = next;
 	}
 
+found:
 	/* We found where to put this one.  Check for overlap with
 	 * preceding fragment, and, if needed, align things so that
 	 * any overlaps are eliminated.
@@ -451,6 +458,8 @@ static int ip_frag_queue(struct ipq *qp, struct sk_buff *skb)
 
 	/* Insert this fragment in the chain of fragments. */
 	skb->next = next;
+	if (!next)
+		qp->q.fragments_tail = skb;
 	if (prev)
 		prev->next = skb;
 	else
@@ -504,6 +513,8 @@ static int ip_frag_reasm(struct ipq *qp, struct sk_buff *prev,
 			goto out_nomem;
 
 		fp->next = head->next;
+		if (!fp->next)
+			qp->q.fragments_tail = fp;
 		prev->next = fp;
 
 		skb_morph(head, qp->q.fragments);
@@ -574,6 +585,7 @@ static int ip_frag_reasm(struct ipq *qp, struct sk_buff *prev,
 	iph->tot_len = htons(len);
 	IP_INC_STATS_BH(net, IPSTATS_MIB_REASMOKS);
 	qp->q.fragments = NULL;
+	qp->q.fragments_tail = NULL;
 	return 0;
 
 out_nomem:
diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c
index 0b97230a3251..545c4141b755 100644
--- a/net/ipv6/reassembly.c
+++ b/net/ipv6/reassembly.c
@@ -333,6 +333,11 @@ static int ip6_frag_queue(struct frag_queue *fq, struct sk_buff *skb,
 	 * in the chain of fragments so far.  We must know where to put
 	 * this fragment, right?
 	 */
+	prev = fq->q.fragments_tail;
+	if (!prev || FRAG6_CB(prev)->offset < offset) {
+		next = NULL;
+		goto found;
+	}
 	prev = NULL;
 	for(next = fq->q.fragments; next != NULL; next = next->next) {
 		if (FRAG6_CB(next)->offset >= offset)
@@ -340,6 +345,7 @@ static int ip6_frag_queue(struct frag_queue *fq, struct sk_buff *skb,
 		prev = next;
 	}
 
+found:
 	/* We found where to put this one.  Check for overlap with
 	 * preceding fragment, and, if needed, align things so that
 	 * any overlaps are eliminated.
@@ -397,6 +403,8 @@ static int ip6_frag_queue(struct frag_queue *fq, struct sk_buff *skb,
 
 	/* Insert this fragment in the chain of fragments. */
 	skb->next = next;
+	if (!next)
+		fq->q.fragments_tail = skb;
 	if (prev)
 		prev->next = skb;
 	else
@@ -463,6 +471,8 @@ static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *prev,
 			goto out_oom;
 
 		fp->next = head->next;
+		if (!fp->next)
+			fq->q.fragments_tail = fp;
 		prev->next = fp;
 
 		skb_morph(head, fq->q.fragments);
@@ -549,6 +559,7 @@ static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *prev,
 	IP6_INC_STATS_BH(net, __in6_dev_get(dev), IPSTATS_MIB_REASMOKS);
 	rcu_read_unlock();
 	fq->q.fragments = NULL;
+	fq->q.fragments_tail = NULL;
 	return 1;
 
 out_oversize:
-- 
cgit v1.2.2


From 1437ce3983bcbc0447a0dedcd644c14fe833d266 Mon Sep 17 00:00:00 2001
From: Ben Hutchings <bhutchings@solarflare.com>
Date: Wed, 30 Jun 2010 02:44:32 +0000
Subject: ethtool: Change ethtool_op_set_flags to validate flags

ethtool_op_set_flags() does not check for unsupported flags, and has
no way of doing so.  This means it is not suitable for use as a
default implementation of ethtool_ops::set_flags.

Add a 'supported' parameter specifying the flags that the driver and
hardware support, validate the requested flags against this, and
change all current callers to pass this parameter.

Change some other trivial implementations of ethtool_ops::set_flags to
call ethtool_op_set_flags().

Signed-off-by: Ben Hutchings <bhutchings@solarflare.com>
Reviewed-by: Stanislaw Gruszka <sgruszka@redhat.com>
Acked-by: Jeff Garzik <jgarzik@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/ethtool.c | 28 +++++-----------------------
 1 file changed, 5 insertions(+), 23 deletions(-)

(limited to 'net')

diff --git a/net/core/ethtool.c b/net/core/ethtool.c
index a0f4964033d2..5d42fae520d9 100644
--- a/net/core/ethtool.c
+++ b/net/core/ethtool.c
@@ -144,31 +144,13 @@ u32 ethtool_op_get_flags(struct net_device *dev)
 }
 EXPORT_SYMBOL(ethtool_op_get_flags);
 
-int ethtool_op_set_flags(struct net_device *dev, u32 data)
+int ethtool_op_set_flags(struct net_device *dev, u32 data, u32 supported)
 {
-	const struct ethtool_ops *ops = dev->ethtool_ops;
-	unsigned long features = dev->features;
-
-	if (data & ETH_FLAG_LRO)
-		features |= NETIF_F_LRO;
-	else
-		features &= ~NETIF_F_LRO;
-
-	if (data & ETH_FLAG_NTUPLE) {
-		if (!ops->set_rx_ntuple)
-			return -EOPNOTSUPP;
-		features |= NETIF_F_NTUPLE;
-	} else {
-		/* safe to clear regardless */
-		features &= ~NETIF_F_NTUPLE;
-	}
-
-	if (data & ETH_FLAG_RXHASH)
-		features |= NETIF_F_RXHASH;
-	else
-		features &= ~NETIF_F_RXHASH;
+	if (data & ~supported)
+		return -EINVAL;
 
-	dev->features = features;
+	dev->features = ((dev->features & ~flags_dup_features) |
+			 (data & flags_dup_features));
 	return 0;
 }
 EXPORT_SYMBOL(ethtool_op_set_flags);
-- 
cgit v1.2.2


From a5b6ee291e39e285e021cf251dbcf770c83cd74e Mon Sep 17 00:00:00 2001
From: Ben Hutchings <bhutchings@solarflare.com>
Date: Wed, 30 Jun 2010 05:05:23 +0000
Subject: ethtool: Add support for control of RX flow hash indirection

Many NICs use an indirection table to map an RX flow hash value to one
of an arbitrary number of queues (not necessarily a power of 2).  It
can be useful to remove some queues from this indirection table so
that they are only used for flows that are specifically filtered
there.  It may also be useful to weight the mapping to account for
user processes with the same CPU-affinity as the RX interrupts.

Signed-off-by: Ben Hutchings <bhutchings@solarflare.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/ethtool.c | 80 ++++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 80 insertions(+)

(limited to 'net')

diff --git a/net/core/ethtool.c b/net/core/ethtool.c
index 5d42fae520d9..072d1d3796cb 100644
--- a/net/core/ethtool.c
+++ b/net/core/ethtool.c
@@ -358,6 +358,80 @@ err_out:
 	return ret;
 }
 
+static noinline_for_stack int ethtool_get_rxfh_indir(struct net_device *dev,
+						     void __user *useraddr)
+{
+	struct ethtool_rxfh_indir *indir;
+	u32 table_size;
+	size_t full_size;
+	int ret;
+
+	if (!dev->ethtool_ops->get_rxfh_indir)
+		return -EOPNOTSUPP;
+
+	if (copy_from_user(&table_size,
+			   useraddr + offsetof(struct ethtool_rxfh_indir, size),
+			   sizeof(table_size)))
+		return -EFAULT;
+
+	if (table_size >
+	    (KMALLOC_MAX_SIZE - sizeof(*indir)) / sizeof(*indir->ring_index))
+		return -ENOMEM;
+	full_size = sizeof(*indir) + sizeof(*indir->ring_index) * table_size;
+	indir = kmalloc(full_size, GFP_USER);
+	if (!indir)
+		return -ENOMEM;
+
+	indir->cmd = ETHTOOL_GRXFHINDIR;
+	indir->size = table_size;
+	ret = dev->ethtool_ops->get_rxfh_indir(dev, indir);
+	if (ret)
+		goto out;
+
+	if (copy_to_user(useraddr, indir, full_size))
+		ret = -EFAULT;
+
+out:
+	kfree(indir);
+	return ret;
+}
+
+static noinline_for_stack int ethtool_set_rxfh_indir(struct net_device *dev,
+						     void __user *useraddr)
+{
+	struct ethtool_rxfh_indir *indir;
+	u32 table_size;
+	size_t full_size;
+	int ret;
+
+	if (!dev->ethtool_ops->set_rxfh_indir)
+		return -EOPNOTSUPP;
+
+	if (copy_from_user(&table_size,
+			   useraddr + offsetof(struct ethtool_rxfh_indir, size),
+			   sizeof(table_size)))
+		return -EFAULT;
+
+	if (table_size >
+	    (KMALLOC_MAX_SIZE - sizeof(*indir)) / sizeof(*indir->ring_index))
+		return -ENOMEM;
+	full_size = sizeof(*indir) + sizeof(*indir->ring_index) * table_size;
+	indir = kmalloc(full_size, GFP_USER);
+	if (!indir)
+		return -ENOMEM;
+
+	if (copy_from_user(indir, useraddr, full_size)) {
+		ret = -EFAULT;
+		goto out;
+	}
+
+	ret = dev->ethtool_ops->set_rxfh_indir(dev, indir);
+
+out:
+	kfree(indir);
+	return ret;
+}
+
 static void __rx_ntuple_filter_add(struct ethtool_rx_ntuple_list *list,
 			struct ethtool_rx_ntuple_flow_spec *spec,
 			struct ethtool_rx_ntuple_flow_spec_container *fsc)
@@ -1526,6 +1600,12 @@ int dev_ethtool(struct net *net, struct ifreq *ifr)
 	case ETHTOOL_GSSET_INFO:
 		rc = ethtool_get_sset_info(dev, useraddr);
 		break;
+	case ETHTOOL_GRXFHINDIR:
+		rc = ethtool_get_rxfh_indir(dev, useraddr);
+		break;
+	case ETHTOOL_SRXFHINDIR:
+		rc = ethtool_set_rxfh_indir(dev, useraddr);
+		break;
 	default:
 		rc = -EOPNOTSUPP;
 	}
-- 
cgit v1.2.2


From 4df53d8bab65cf2c18daebd51a5a4847e03f1943 Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Fri, 2 Jul 2010 09:32:57 +0200
Subject: bridge: add per bridge device controls for invoking iptables

Support more fine grained control of bridge netfilter iptables invocation
by adding seperate brnf_call_*tables parameters for each device using the
sysfs interface. Packets are passed to layer 3 netfilter when either the
global parameter or the per bridge parameter is enabled.

Acked-by: Stephen Hemminger <shemminger@vyatta.com>
Acked-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 net/bridge/br_netfilter.c | 31 ++++++++++++++------
 net/bridge/br_private.h   |  3 ++
 net/bridge/br_sysfs_br.c  | 72 +++++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 97 insertions(+), 9 deletions(-)

(limited to 'net')

diff --git a/net/bridge/br_netfilter.c b/net/bridge/br_netfilter.c
index 6bb6f7c9e6e1..f1d49ae23411 100644
--- a/net/bridge/br_netfilter.c
+++ b/net/bridge/br_netfilter.c
@@ -55,6 +55,9 @@ static int brnf_call_arptables __read_mostly = 1;
 static int brnf_filter_vlan_tagged __read_mostly = 0;
 static int brnf_filter_pppoe_tagged __read_mostly = 0;
 #else
+#define brnf_call_iptables 1
+#define brnf_call_ip6tables 1
+#define brnf_call_arptables 1
 #define brnf_filter_vlan_tagged 0
 #define brnf_filter_pppoe_tagged 0
 #endif
@@ -543,25 +546,30 @@ static unsigned int br_nf_pre_routing(unsigned int hook, struct sk_buff *skb,
 				      const struct net_device *out,
 				      int (*okfn)(struct sk_buff *))
 {
+	struct net_bridge_port *p;
+	struct net_bridge *br;
 	struct iphdr *iph;
 	__u32 len = nf_bridge_encap_header_len(skb);
 
 	if (unlikely(!pskb_may_pull(skb, len)))
 		goto out;
 
+	p = rcu_dereference(in->br_port);
+	if (p == NULL)
+		goto out;
+	br = p->br;
+
 	if (skb->protocol == htons(ETH_P_IPV6) || IS_VLAN_IPV6(skb) ||
 	    IS_PPPOE_IPV6(skb)) {
-#ifdef CONFIG_SYSCTL
-		if (!brnf_call_ip6tables)
+		if (!brnf_call_ip6tables && !br->nf_call_ip6tables)
 			return NF_ACCEPT;
-#endif
+
 		nf_bridge_pull_encap_header_rcsum(skb);
 		return br_nf_pre_routing_ipv6(hook, skb, in, out, okfn);
 	}
-#ifdef CONFIG_SYSCTL
-	if (!brnf_call_iptables)
+
+	if (!brnf_call_iptables && !br->nf_call_iptables)
 		return NF_ACCEPT;
-#endif
 
 	if (skb->protocol != htons(ETH_P_IP) && !IS_VLAN_IP(skb) &&
 	    !IS_PPPOE_IP(skb))
@@ -714,12 +722,17 @@ static unsigned int br_nf_forward_arp(unsigned int hook, struct sk_buff *skb,
 				      const struct net_device *out,
 				      int (*okfn)(struct sk_buff *))
 {
+	struct net_bridge_port *p;
+	struct net_bridge *br;
 	struct net_device **d = (struct net_device **)(skb->cb);
 
-#ifdef CONFIG_SYSCTL
-	if (!brnf_call_arptables)
+	p = rcu_dereference(out->br_port);
+	if (p == NULL)
+		return NF_ACCEPT;
+	br = p->br;
+
+	if (!brnf_call_arptables && !br->nf_call_arptables)
 		return NF_ACCEPT;
-#endif
 
 	if (skb->protocol != htons(ETH_P_ARP)) {
 		if (!IS_VLAN_ARP(skb))
diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h
index c83519b555bb..7484065da303 100644
--- a/net/bridge/br_private.h
+++ b/net/bridge/br_private.h
@@ -164,6 +164,9 @@ struct net_bridge
 	unsigned long			feature_mask;
 #ifdef CONFIG_BRIDGE_NETFILTER
 	struct rtable 			fake_rtable;
+	bool				nf_call_iptables;
+	bool				nf_call_ip6tables;
+	bool				nf_call_arptables;
 #endif
 	unsigned long			flags;
 #define BR_SET_MAC_ADDR		0x00000001
diff --git a/net/bridge/br_sysfs_br.c b/net/bridge/br_sysfs_br.c
index 486b8f3861d2..5c1e5559ebba 100644
--- a/net/bridge/br_sysfs_br.c
+++ b/net/bridge/br_sysfs_br.c
@@ -611,6 +611,73 @@ static DEVICE_ATTR(multicast_startup_query_interval, S_IRUGO | S_IWUSR,
 		   show_multicast_startup_query_interval,
 		   store_multicast_startup_query_interval);
 #endif
+#ifdef CONFIG_BRIDGE_NETFILTER
+static ssize_t show_nf_call_iptables(
+	struct device *d, struct device_attribute *attr, char *buf)
+{
+	struct net_bridge *br = to_bridge(d);
+	return sprintf(buf, "%u\n", br->nf_call_iptables);
+}
+
+static int set_nf_call_iptables(struct net_bridge *br, unsigned long val)
+{
+	br->nf_call_iptables = val ? true : false;
+	return 0;
+}
+
+static ssize_t store_nf_call_iptables(
+	struct device *d, struct device_attribute *attr, const char *buf,
+	size_t len)
+{
+	return store_bridge_parm(d, buf, len, set_nf_call_iptables);
+}
+static DEVICE_ATTR(nf_call_iptables, S_IRUGO | S_IWUSR,
+		   show_nf_call_iptables, store_nf_call_iptables);
+
+static ssize_t show_nf_call_ip6tables(
+	struct device *d, struct device_attribute *attr, char *buf)
+{
+	struct net_bridge *br = to_bridge(d);
+	return sprintf(buf, "%u\n", br->nf_call_ip6tables);
+}
+
+static int set_nf_call_ip6tables(struct net_bridge *br, unsigned long val)
+{
+	br->nf_call_ip6tables = val ? true : false;
+	return 0;
+}
+
+static ssize_t store_nf_call_ip6tables(
+	struct device *d, struct device_attribute *attr, const char *buf,
+	size_t len)
+{
+	return store_bridge_parm(d, buf, len, set_nf_call_ip6tables);
+}
+static DEVICE_ATTR(nf_call_ip6tables, S_IRUGO | S_IWUSR,
+		   show_nf_call_ip6tables, store_nf_call_ip6tables);
+
+static ssize_t show_nf_call_arptables(
+	struct device *d, struct device_attribute *attr, char *buf)
+{
+	struct net_bridge *br = to_bridge(d);
+	return sprintf(buf, "%u\n", br->nf_call_arptables);
+}
+
+static int set_nf_call_arptables(struct net_bridge *br, unsigned long val)
+{
+	br->nf_call_arptables = val ? true : false;
+	return 0;
+}
+
+static ssize_t store_nf_call_arptables(
+	struct device *d, struct device_attribute *attr, const char *buf,
+	size_t len)
+{
+	return store_bridge_parm(d, buf, len, set_nf_call_arptables);
+}
+static DEVICE_ATTR(nf_call_arptables, S_IRUGO | S_IWUSR,
+		   show_nf_call_arptables, store_nf_call_arptables);
+#endif
 
 static struct attribute *bridge_attrs[] = {
 	&dev_attr_forward_delay.attr,
@@ -644,6 +711,11 @@ static struct attribute *bridge_attrs[] = {
 	&dev_attr_multicast_query_interval.attr,
 	&dev_attr_multicast_query_response_interval.attr,
 	&dev_attr_multicast_startup_query_interval.attr,
+#endif
+#ifdef CONFIG_BRIDGE_NETFILTER
+	&dev_attr_nf_call_iptables.attr,
+	&dev_attr_nf_call_ip6tables.attr,
+	&dev_attr_nf_call_arptables.attr,
 #endif
 	NULL
 };
-- 
cgit v1.2.2


From 499031ac8a3df6738f6186ded9da853e8ea18253 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Fri, 2 Jul 2010 10:05:01 +0200
Subject: netfilter: ip6t_REJECT: fix a dst leak in ipv6 REJECT

We should release dst if dst->error is set.

Bug introduced in 2.6.14 by commit e104411b82f5c
([XFRM]: Always release dst_entry on error in xfrm_lookup)

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Cc: stable@kernel.org
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 net/ipv6/netfilter/ip6t_REJECT.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/ipv6/netfilter/ip6t_REJECT.c b/net/ipv6/netfilter/ip6t_REJECT.c
index 47d227713758..2933396e0281 100644
--- a/net/ipv6/netfilter/ip6t_REJECT.c
+++ b/net/ipv6/netfilter/ip6t_REJECT.c
@@ -97,9 +97,11 @@ static void send_reset(struct net *net, struct sk_buff *oldskb)
 	fl.fl_ip_dport = otcph.source;
 	security_skb_classify_flow(oldskb, &fl);
 	dst = ip6_route_output(net, NULL, &fl);
-	if (dst == NULL)
+	if (dst == NULL || dst->error) {
+		dst_release(dst);
 		return;
-	if (dst->error || xfrm_lookup(net, &dst, &fl, NULL, 0))
+	}
+	if (xfrm_lookup(net, &dst, &fl, NULL, 0))
 		return;
 
 	hh_len = (dst->dev->hard_header_len + 15)&~15;
-- 
cgit v1.2.2


From 009271f9187d61c638bb4f3db9ab172c5065e542 Mon Sep 17 00:00:00 2001
From: Ming Lei <tom.leiming@gmail.com>
Date: Thu, 1 Jul 2010 23:18:42 +0800
Subject: minstrel_ht: fix updating rate with best probability

The throughput should be considered when updating rate
with best probability.

Signed-off-by: Ming Lei <tom.leiming@gmail.com>
Acked-by: Felix Fietkau <nbd@openwrt.org>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/rc80211_minstrel_ht.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'net')

diff --git a/net/mac80211/rc80211_minstrel_ht.c b/net/mac80211/rc80211_minstrel_ht.c
index 52c85036660d..8fba6f4c5145 100644
--- a/net/mac80211/rc80211_minstrel_ht.c
+++ b/net/mac80211/rc80211_minstrel_ht.c
@@ -240,6 +240,7 @@ minstrel_ht_update_stats(struct minstrel_priv *mp, struct minstrel_ht_sta *mi)
 			     MINSTREL_FRAC(3, 4)) || mr->probability > cur_prob) {
 				mg->max_prob_rate = index;
 				cur_prob = mr->probability;
+				cur_prob_tp = mr->cur_tp;
 			}
 
 			if (mr->cur_tp > cur_tp) {
@@ -275,6 +276,7 @@ minstrel_ht_update_stats(struct minstrel_priv *mp, struct minstrel_ht_sta *mi)
 		    minstrel_mcs_groups[group].streams == 1) {
 			mi->max_prob_rate = mg->max_prob_rate;
 			cur_prob = mr->cur_prob;
+			cur_prob_tp = mr->cur_tp;
 		}
 
 		mr = minstrel_get_ratestats(mi, mg->max_tp_rate);
-- 
cgit v1.2.2


From ecc3d5ae17e62ce1250abf16ff523c677bf19e2f Mon Sep 17 00:00:00 2001
From: Ming Lei <tom.leiming@gmail.com>
Date: Thu, 1 Jul 2010 23:19:50 +0800
Subject: minstrel_ht: fix check for downgrading of top2 rate

The check should be against current top2 rate, instead of
current top rate.

Signed-off-by: Ming Lei <tom.leiming@gmail.com>
Acked-by: Felix Fietkau <nbd@openwrt.org>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/rc80211_minstrel_ht.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/rc80211_minstrel_ht.c b/net/mac80211/rc80211_minstrel_ht.c
index 8fba6f4c5145..b5ace243546c 100644
--- a/net/mac80211/rc80211_minstrel_ht.c
+++ b/net/mac80211/rc80211_minstrel_ht.c
@@ -443,8 +443,8 @@ minstrel_ht_tx_status(void *priv, struct ieee80211_supported_band *sband,
 		minstrel_downgrade_rate(mi, &mi->max_tp_rate, true);
 
 	rate2 = minstrel_get_ratestats(mi, mi->max_tp_rate2);
-	if (rate->attempts > 30 &&
-	    MINSTREL_FRAC(rate->success, rate->attempts) <
+	if (rate2->attempts > 30 &&
+	    MINSTREL_FRAC(rate2->success, rate2->attempts) <
 	    MINSTREL_FRAC(20, 100))
 		minstrel_downgrade_rate(mi, &mi->max_tp_rate2, false);
 
-- 
cgit v1.2.2


From f0796d5c73e59786d09a1e617689d1d415f2db44 Mon Sep 17 00:00:00 2001
From: John Fastabend <john.r.fastabend@intel.com>
Date: Thu, 1 Jul 2010 13:21:57 +0000
Subject: net: decreasing real_num_tx_queues needs to flush qdisc

Reducing real_num_queues needs to flush the qdisc otherwise
skbs with queue_mappings greater then real_num_tx_queues can
be sent to the underlying driver.

The flow for this is,

dev_queue_xmit()
	dev_pick_tx()
		skb_tx_hash()  => hash using real_num_tx_queues
		skb_set_queue_mapping()
	...
	qdisc_enqueue_root() => enqueue skb on txq from hash
...
dev->real_num_tx_queues -= n
...
sch_direct_xmit()
	dev_hard_start_xmit()
		ndo_start_xmit(skb,dev) => skb queue set with old hash

skbs are enqueued on the qdisc with skb->queue_mapping set
0 < queue_mappings < real_num_tx_queues.  When the driver
decreases real_num_tx_queues skb's may be dequeued from the
qdisc with a queue_mapping greater then real_num_tx_queues.

This fixes a case in ixgbe where this was occurring with DCB
and FCoE. Because the driver is using queue_mapping to map
skbs to tx descriptor rings we can potentially map skbs to
rings that no longer exist.

Signed-off-by: John Fastabend <john.r.fastabend@intel.com>
Tested-by: Ross Brattain <ross.b.brattain@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/dev.c | 18 ++++++++++++++++++
 1 file changed, 18 insertions(+)

(limited to 'net')

diff --git a/net/core/dev.c b/net/core/dev.c
index 2b3bf53bc687..723a34710ad4 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -1553,6 +1553,24 @@ static void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev)
 	rcu_read_unlock();
 }
 
+/*
+ * Routine to help set real_num_tx_queues. To avoid skbs mapped to queues
+ * greater then real_num_tx_queues stale skbs on the qdisc must be flushed.
+ */
+void netif_set_real_num_tx_queues(struct net_device *dev, unsigned int txq)
+{
+	unsigned int real_num = dev->real_num_tx_queues;
+
+	if (unlikely(txq > dev->num_tx_queues))
+		;
+	else if (txq > real_num)
+		dev->real_num_tx_queues = txq;
+	else if (txq < real_num) {
+		dev->real_num_tx_queues = txq;
+		qdisc_reset_all_tx_gt(dev, txq);
+	}
+}
+EXPORT_SYMBOL(netif_set_real_num_tx_queues);
 
 static inline void __netif_reschedule(struct Qdisc *q)
 {
-- 
cgit v1.2.2


From 256df2f3879efdb2e9808bdb1b54b16fbb11fa38 Mon Sep 17 00:00:00 2001
From: Joe Perches <joe@perches.com>
Date: Sun, 27 Jun 2010 01:02:35 +0000
Subject: netdevice.h net/core/dev.c: Convert netdev_<level> logging macros to
 functions

Reduces an x86 defconfig text and data ~2k.
text is smaller, data is larger.

$ size vmlinux*
   text	   data	    bss	    dec	    hex	filename
7198862	 720112	1366288	9285262	 8dae8e	vmlinux
7205273	 716016	1366288	9287577	 8db799	vmlinux.device_h

Uses %pV and struct va_format
Format arguments are verified before printk

Signed-off-by: Joe Perches <joe@perches.com>
Acked-by: Greg Kroah-Hartman <gregkh@suse.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/dev.c | 62 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 62 insertions(+)

(limited to 'net')

diff --git a/net/core/dev.c b/net/core/dev.c
index e85cc5fa3c4e..93b8929fa21d 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -5802,6 +5802,68 @@ char *netdev_drivername(const struct net_device *dev, char *buffer, int len)
 	return buffer;
 }
 
+static int __netdev_printk(const char *level, const struct net_device *dev,
+			   struct va_format *vaf)
+{
+	int r;
+
+	if (dev && dev->dev.parent)
+		r = dev_printk(level, dev->dev.parent, "%s: %pV",
+			       netdev_name(dev), vaf);
+	else if (dev)
+		r = printk("%s%s: %pV", level, netdev_name(dev), vaf);
+	else
+		r = printk("%s(NULL net_device): %pV", level, vaf);
+
+	return r;
+}
+
+int netdev_printk(const char *level, const struct net_device *dev,
+		  const char *format, ...)
+{
+	struct va_format vaf;
+	va_list args;
+	int r;
+
+	va_start(args, format);
+
+	vaf.fmt = format;
+	vaf.va = &args;
+
+	r = __netdev_printk(level, dev, &vaf);
+	va_end(args);
+
+	return r;
+}
+EXPORT_SYMBOL(netdev_printk);
+
+#define define_netdev_printk_level(func, level)			\
+int func(const struct net_device *dev, const char *fmt, ...)	\
+{								\
+	int r;							\
+	struct va_format vaf;					\
+	va_list args;						\
+								\
+	va_start(args, fmt);					\
+								\
+	vaf.fmt = fmt;						\
+	vaf.va = &args;						\
+								\
+	r = __netdev_printk(level, dev, &vaf);			\
+	va_end(args);						\
+								\
+	return r;						\
+}								\
+EXPORT_SYMBOL(func);
+
+define_netdev_printk_level(netdev_emerg, KERN_EMERG);
+define_netdev_printk_level(netdev_alert, KERN_ALERT);
+define_netdev_printk_level(netdev_crit, KERN_CRIT);
+define_netdev_printk_level(netdev_err, KERN_ERR);
+define_netdev_printk_level(netdev_warn, KERN_WARNING);
+define_netdev_printk_level(netdev_notice, KERN_NOTICE);
+define_netdev_printk_level(netdev_info, KERN_INFO);
+
 static void __net_exit netdev_exit(struct net *net)
 {
 	kfree(net->dev_name_head);
-- 
cgit v1.2.2


From 44b451f1633896de15d2d52e1a2bd462e80b7814 Mon Sep 17 00:00:00 2001
From: Peter Kosyh <p.kosyh@gmail.com>
Date: Fri, 2 Jul 2010 07:47:55 +0000
Subject: xfrm: fix xfrm by MARK logic

While using xfrm by MARK feature in
2.6.34 - 2.6.35 kernels, the mark
is always cleared in flowi structure via memset in
_decode_session4 (net/ipv4/xfrm4_policy.c), so
the policy lookup fails.
IPv6 code is affected by this bug too.

Signed-off-by: Peter Kosyh <p.kosyh@gmail.com>
Acked-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/xfrm4_policy.c | 2 ++
 net/ipv6/xfrm6_policy.c | 2 ++
 2 files changed, 4 insertions(+)

(limited to 'net')

diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c
index 1705476670ef..23883a48ebfb 100644
--- a/net/ipv4/xfrm4_policy.c
+++ b/net/ipv4/xfrm4_policy.c
@@ -108,6 +108,8 @@ _decode_session4(struct sk_buff *skb, struct flowi *fl, int reverse)
 	u8 *xprth = skb_network_header(skb) + iph->ihl * 4;
 
 	memset(fl, 0, sizeof(struct flowi));
+	fl->mark = skb->mark;
+
 	if (!(iph->frag_off & htons(IP_MF | IP_OFFSET))) {
 		switch (iph->protocol) {
 		case IPPROTO_UDP:
diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c
index 4a0e77e14468..6baeabbbca82 100644
--- a/net/ipv6/xfrm6_policy.c
+++ b/net/ipv6/xfrm6_policy.c
@@ -124,6 +124,8 @@ _decode_session6(struct sk_buff *skb, struct flowi *fl, int reverse)
 	u8 nexthdr = nh[IP6CB(skb)->nhoff];
 
 	memset(fl, 0, sizeof(struct flowi));
+	fl->mark = skb->mark;
+
 	ipv6_addr_copy(&fl->fl6_dst, reverse ? &hdr->saddr : &hdr->daddr);
 	ipv6_addr_copy(&fl->fl6_src, reverse ? &hdr->daddr : &hdr->saddr);
 
-- 
cgit v1.2.2


From ea8fbe8f198edea19116d4b61267e12235513225 Mon Sep 17 00:00:00 2001
From: Changli Gao <xiaosuo@gmail.com>
Date: Mon, 5 Jul 2010 10:38:23 +0200
Subject: netfilter: nf_conntrack_reasm: add fast path for in-order fragments

As the fragments are sent in order in most of OSes, such as Windows, Darwin and
FreeBSD, it is likely the new fragments are at the end of the inet_frag_queue.
In the fast path, we check if the skb at the end of the inet_frag_queue is the
prev we expect.

Signed-off-by: Changli Gao <xiaosuo@gmail.com>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 net/ipv6/netfilter/nf_conntrack_reasm.c | 9 +++++++++
 1 file changed, 9 insertions(+)

(limited to 'net')

diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c
index 9254008602d4..098a050a20b0 100644
--- a/net/ipv6/netfilter/nf_conntrack_reasm.c
+++ b/net/ipv6/netfilter/nf_conntrack_reasm.c
@@ -269,6 +269,11 @@ static int nf_ct_frag6_queue(struct nf_ct_frag6_queue *fq, struct sk_buff *skb,
 	 * in the chain of fragments so far.  We must know where to put
 	 * this fragment, right?
 	 */
+	prev = fq->q.fragments_tail;
+	if (!prev || NFCT_FRAG6_CB(prev)->offset < offset) {
+		next = NULL;
+		goto found;
+	}
 	prev = NULL;
 	for (next = fq->q.fragments; next != NULL; next = next->next) {
 		if (NFCT_FRAG6_CB(next)->offset >= offset)
@@ -276,6 +281,7 @@ static int nf_ct_frag6_queue(struct nf_ct_frag6_queue *fq, struct sk_buff *skb,
 		prev = next;
 	}
 
+found:
 	/* We found where to put this one.  Check for overlap with
 	 * preceding fragment, and, if needed, align things so that
 	 * any overlaps are eliminated.
@@ -341,6 +347,8 @@ static int nf_ct_frag6_queue(struct nf_ct_frag6_queue *fq, struct sk_buff *skb,
 
 	/* Insert this fragment in the chain of fragments. */
 	skb->next = next;
+	if (!next)
+		fq->q.fragments_tail = skb;
 	if (prev)
 		prev->next = skb;
 	else
@@ -464,6 +472,7 @@ nf_ct_frag6_reasm(struct nf_ct_frag6_queue *fq, struct net_device *dev)
 					  head->csum);
 
 	fq->q.fragments = NULL;
+	fq->q.fragments_tail = NULL;
 
 	/* all original skbs are linked into the NFCT_FRAG6_CB(head).orig */
 	fp = skb_shinfo(head)->frag_list;
-- 
cgit v1.2.2


From 98b0e84aaaf2be6c40998d011c9db96ea6498e20 Mon Sep 17 00:00:00 2001
From: Changli Gao <xiaosuo@gmail.com>
Date: Mon, 5 Jul 2010 10:39:17 +0200
Subject: netfilter: ipt_REJECT: postpone the checksum calculation.

postpone the checksum calculation, then if the output NIC supports checksum
offloading, we can utlize it. And though the output NIC doesn't support
checksum offloading, but we'll mangle this packet, this can free us from
updating the checksum, as the checksum calculation occurs later.

Signed-off-by: Changli Gao <xiaosuo@gmail.com>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 net/ipv4/netfilter/ipt_REJECT.c | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/netfilter/ipt_REJECT.c b/net/ipv4/netfilter/ipt_REJECT.c
index f5f4a888e4ec..3d0e064bab54 100644
--- a/net/ipv4/netfilter/ipt_REJECT.c
+++ b/net/ipv4/netfilter/ipt_REJECT.c
@@ -95,10 +95,11 @@ static void send_reset(struct sk_buff *oldskb, int hook)
 	}
 
 	tcph->rst	= 1;
-	tcph->check	= tcp_v4_check(sizeof(struct tcphdr),
-				       niph->saddr, niph->daddr,
-				       csum_partial(tcph,
-						    sizeof(struct tcphdr), 0));
+	tcph->check = ~tcp_v4_check(sizeof(struct tcphdr), niph->saddr,
+				    niph->daddr, 0);
+	nskb->ip_summed = CHECKSUM_PARTIAL;
+	nskb->csum_start = (unsigned char *)tcph - nskb->head;
+	nskb->csum_offset = offsetof(struct tcphdr, check);
 
 	addr_type = RTN_UNSPEC;
 	if (hook != NF_INET_FORWARD
@@ -115,7 +116,6 @@ static void send_reset(struct sk_buff *oldskb, int hook)
 		goto free_nskb;
 
 	niph->ttl	= dst_metric(skb_dst(nskb), RTAX_HOPLIMIT);
-	nskb->ip_summed = CHECKSUM_NONE;
 
 	/* "Never happens" */
 	if (nskb->len > dst_mtu(skb_dst(nskb)))
-- 
cgit v1.2.2


From b13b7125e4d10ce39818eec1bcb2d9777d29475f Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Mon, 5 Jul 2010 10:40:09 +0200
Subject: netfilter: ipt_REJECT: avoid touching dst ref

We can avoid a pair of atomic ops in ipt_REJECT send_reset()

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 net/ipv4/netfilter/ipt_REJECT.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/ipv4/netfilter/ipt_REJECT.c b/net/ipv4/netfilter/ipt_REJECT.c
index 3d0e064bab54..b254dafaf429 100644
--- a/net/ipv4/netfilter/ipt_REJECT.c
+++ b/net/ipv4/netfilter/ipt_REJECT.c
@@ -110,7 +110,7 @@ static void send_reset(struct sk_buff *oldskb, int hook)
 		addr_type = RTN_LOCAL;
 
 	/* ip_route_me_harder expects skb->dst to be set */
-	skb_dst_set(nskb, dst_clone(skb_dst(oldskb)));
+	skb_dst_set_noref(nskb, skb_dst(oldskb));
 
 	if (ip_route_me_harder(nskb, addr_type))
 		goto free_nskb;
-- 
cgit v1.2.2


From 72c7664f9278b31fcf6b7828c1417caca5b68104 Mon Sep 17 00:00:00 2001
From: Michal Marek <mmarek@suse.cz>
Date: Mon, 5 Jul 2010 10:42:37 +0200
Subject: ipvs: Kconfig cleanup

IP_VS_PROTO_AH_ESP should be set iff either of IP_VS_PROTO_{AH,ESP} is
selected. Express this with standard kconfig syntax.

Signed-off-by: Michal Marek <mmarek@suse.cz>
Acked-by: Simon Horman <horms@verge.net.au>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 net/netfilter/ipvs/Kconfig | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

(limited to 'net')

diff --git a/net/netfilter/ipvs/Kconfig b/net/netfilter/ipvs/Kconfig
index 712ccad13344..d80b41abec09 100644
--- a/net/netfilter/ipvs/Kconfig
+++ b/net/netfilter/ipvs/Kconfig
@@ -87,19 +87,16 @@ config	IP_VS_PROTO_UDP
 	  protocol. Say Y if unsure.
 
 config	IP_VS_PROTO_AH_ESP
-	bool
-	depends on UNDEFINED
+	def_bool IP_VS_PROTO_ESP || IP_VS_PROTO_AH
 
 config	IP_VS_PROTO_ESP
 	bool "ESP load balancing support"
-	select IP_VS_PROTO_AH_ESP
 	---help---
 	  This option enables support for load balancing ESP (Encapsulation
 	  Security Payload) transport protocol. Say Y if unsure.
 
 config	IP_VS_PROTO_AH
 	bool "AH load balancing support"
-	select IP_VS_PROTO_AH_ESP
 	---help---
 	  This option enables support for load balancing AH (Authentication
 	  Header) transport protocol. Say Y if unsure.
-- 
cgit v1.2.2


From fe76cda3081b502986f0c8b28b0cf8bfc27d44d5 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Thu, 1 Jul 2010 23:48:22 +0000
Subject: ipv4: use skb_dst_copy() in ip_copy_metadata()

Avoid touching dst refcount in ip_fragment().

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/ip_output.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index 7d1f4b4481a9..d6478521128e 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -411,7 +411,7 @@ static void ip_copy_metadata(struct sk_buff *to, struct sk_buff *from)
 	to->priority = from->priority;
 	to->protocol = from->protocol;
 	skb_dst_drop(to);
-	skb_dst_set(to, dst_clone(skb_dst(from)));
+	skb_dst_copy(to, from);
 	to->dev = from->dev;
 	to->mark = from->mark;
 
-- 
cgit v1.2.2


From 7f285fa78d4b81b8458f05e77fb6b46245121b4e Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Mon, 5 Jul 2010 14:50:08 +0000
Subject: bridge br_multicast: BUG: unable to handle kernel NULL pointer
 dereference

On Tue, Jul 06, 2010 at 08:48:35AM +0800, Herbert Xu wrote:
>
> bridge: Restore NULL check in br_mdb_ip_get

Resend with proper attribution.

bridge: Restore NULL check in br_mdb_ip_get

Somewhere along the line the NULL check in br_mdb_ip_get went
AWOL, causing crashes when we receive an IGMP packet with no
multicast table allocated.

This patch restores it and ensures all br_mdb_*_get functions
use it.

Reported-by: Frank Arnold <frank.arnold@amd.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>

Thanks,
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/bridge/br_multicast.c | 21 ++++++++++++---------
 1 file changed, 12 insertions(+), 9 deletions(-)

(limited to 'net')

diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c
index 9d21d98ae5fa..27ae946363f1 100644
--- a/net/bridge/br_multicast.c
+++ b/net/bridge/br_multicast.c
@@ -99,6 +99,15 @@ static struct net_bridge_mdb_entry *__br_mdb_ip_get(
 	return NULL;
 }
 
+static struct net_bridge_mdb_entry *br_mdb_ip_get(
+	struct net_bridge_mdb_htable *mdb, struct br_ip *dst)
+{
+	if (!mdb)
+		return NULL;
+
+	return __br_mdb_ip_get(mdb, dst, br_ip_hash(mdb, dst));
+}
+
 static struct net_bridge_mdb_entry *br_mdb_ip4_get(
 	struct net_bridge_mdb_htable *mdb, __be32 dst)
 {
@@ -107,7 +116,7 @@ static struct net_bridge_mdb_entry *br_mdb_ip4_get(
 	br_dst.u.ip4 = dst;
 	br_dst.proto = htons(ETH_P_IP);
 
-	return __br_mdb_ip_get(mdb, &br_dst, __br_ip4_hash(mdb, dst));
+	return br_mdb_ip_get(mdb, &br_dst);
 }
 
 #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
@@ -119,23 +128,17 @@ static struct net_bridge_mdb_entry *br_mdb_ip6_get(
 	ipv6_addr_copy(&br_dst.u.ip6, dst);
 	br_dst.proto = htons(ETH_P_IPV6);
 
-	return __br_mdb_ip_get(mdb, &br_dst, __br_ip6_hash(mdb, dst));
+	return br_mdb_ip_get(mdb, &br_dst);
 }
 #endif
 
-static struct net_bridge_mdb_entry *br_mdb_ip_get(
-	struct net_bridge_mdb_htable *mdb, struct br_ip *dst)
-{
-	return __br_mdb_ip_get(mdb, dst, br_ip_hash(mdb, dst));
-}
-
 struct net_bridge_mdb_entry *br_mdb_get(struct net_bridge *br,
 					struct sk_buff *skb)
 {
 	struct net_bridge_mdb_htable *mdb = br->mdb;
 	struct br_ip ip;
 
-	if (!mdb || br->multicast_disabled)
+	if (br->multicast_disabled)
 		return NULL;
 
 	if (BR_INPUT_SKB_CB(skb)->igmp)
-- 
cgit v1.2.2


From 8eab945c5616fc984e97b922d6a2559be93f39a1 Mon Sep 17 00:00:00 2001
From: Artem Bityutskiy <Artem.Bityutskiy@nokia.com>
Date: Thu, 1 Jul 2010 18:05:56 +0300
Subject: sunrpc: make the cache cleaner workqueue deferrable

This patch makes the cache_cleaner workqueue deferrable, to prevent
unnecessary system wake-ups, which is very important for embedded
battery-powered devices.

do_cache_clean() is called every 30 seconds at the moment, and often
makes the system wake up from its power-save sleep state. With this
change, when the workqueue uses a deferrable timer, the
do_cache_clean() invocation will be delayed and combined with the
closest "real" wake-up. This improves the power consumption situation.

Note, I tried to create a DECLARE_DELAYED_WORK_DEFERRABLE() helper
macro, similar to DECLARE_DELAYED_WORK(), but failed because of the
way the timer wheel core stores the deferrable flag (it is the
LSBit in the time->base pointer). My attempt to define a static
variable with this bit set ended up with the "initializer element is
not constant" error.

Thus, I have to use run-time initialization, so I created a new
cache_initialize() function which is called once when sunrpc is
being initialized.

Signed-off-by: Artem Bityutskiy <Artem.Bityutskiy@nokia.com>
Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
---
 net/sunrpc/cache.c       | 7 ++++++-
 net/sunrpc/sunrpc_syms.c | 1 +
 2 files changed, 7 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/sunrpc/cache.c b/net/sunrpc/cache.c
index 58de76c8540c..939d048ef92b 100644
--- a/net/sunrpc/cache.c
+++ b/net/sunrpc/cache.c
@@ -320,7 +320,7 @@ static struct cache_detail *current_detail;
 static int current_index;
 
 static void do_cache_clean(struct work_struct *work);
-static DECLARE_DELAYED_WORK(cache_cleaner, do_cache_clean);
+static struct delayed_work cache_cleaner;
 
 static void sunrpc_init_cache_detail(struct cache_detail *cd)
 {
@@ -1504,6 +1504,11 @@ static int create_cache_proc_entries(struct cache_detail *cd)
 }
 #endif
 
+void __init cache_initialize(void)
+{
+	INIT_DELAYED_WORK_DEFERRABLE(&cache_cleaner, do_cache_clean);
+}
+
 int cache_register(struct cache_detail *cd)
 {
 	int ret;
diff --git a/net/sunrpc/sunrpc_syms.c b/net/sunrpc/sunrpc_syms.c
index f438347d817b..c52b18489149 100644
--- a/net/sunrpc/sunrpc_syms.c
+++ b/net/sunrpc/sunrpc_syms.c
@@ -43,6 +43,7 @@ init_sunrpc(void)
 #ifdef CONFIG_PROC_FS
 	rpc_proc_init();
 #endif
+	cache_initialize();
 	cache_register(&ip_map_cache);
 	cache_register(&unix_gid_cache);
 	svc_init_xprt_sock();	/* svc sock transport */
-- 
cgit v1.2.2


From 60ea385ff279a18790a432d57a8302562aaa0f8d Mon Sep 17 00:00:00 2001
From: Jiri Slaby <jirislaby@gmail.com>
Date: Wed, 7 Jul 2010 15:02:46 +0200
Subject: NET: nl80211, fix lock imbalance and netdev referencing

Stanse found that nl80211_set_wiphy imporperly handles a lock and netdev
reference and contains unreachable code. It is because there return statement
isntead of assignment to result variable. Fix that.

Signed-off-by: Jiri Slaby <jirislaby@gmail.com>
Cc: Johannes Berg <johannes@sipsolutions.net>
Cc: "John W. Linville" <linville@tuxdriver.com>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: Jouni Malinen <j@w1.fi>
Cc: Samuel Ortiz <sameo@linux.intel.com>
Cc: linux-wireless@vger.kernel.org
Cc: netdev@vger.kernel.org
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/wireless/nl80211.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index a999fc154623..cea595e2ed4d 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -877,7 +877,7 @@ static int nl80211_set_wiphy(struct sk_buff *skb, struct genl_info *info)
 		int idx, mbm = 0;
 
 		if (!rdev->ops->set_tx_power) {
-			return -EOPNOTSUPP;
+			result = -EOPNOTSUPP;
 			goto bad_res;
 		}
 
-- 
cgit v1.2.2


From 17762060c25590bfddd68cc1131f28ec720f405f Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Mon, 5 Jul 2010 21:29:28 +0000
Subject: bridge: Clear IPCB before possible entry into IP stack

The bridge protocol lives dangerously by having incestuous relations
with the IP stack.  In this instance an abomination has been created
where a bogus IPCB area from a bridged packet leads to a crash in
the IP stack because it's interpreted as IP options.

This patch papers over the problem by clearing the IPCB area in that
particular spot.  To fix this properly we'd also need to parse any
IP options if present but I'm way too lazy for that.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>

Cheers,
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/bridge/br_netfilter.c | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'net')

diff --git a/net/bridge/br_netfilter.c b/net/bridge/br_netfilter.c
index 44420992f72f..8fb75f89c4aa 100644
--- a/net/bridge/br_netfilter.c
+++ b/net/bridge/br_netfilter.c
@@ -591,6 +591,9 @@ static unsigned int br_nf_pre_routing(unsigned int hook, struct sk_buff *skb,
 
 	pskb_trim_rcsum(skb, len);
 
+	/* BUG: Should really parse the IP options here. */
+	memset(IPCB(skb), 0, sizeof(struct inet_skb_parm));
+
 	nf_bridge_put(skb->nf_bridge);
 	if (!nf_bridge_alloc(skb))
 		return NF_DROP;
-- 
cgit v1.2.2


From 28172739f0a276eb8d6ca917b3974c2edb036da3 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Wed, 7 Jul 2010 14:58:56 -0700
Subject: net: fix 64 bit counters on 32 bit arches

There is a small possibility that a reader gets incorrect values on 32
bit arches. SNMP applications could catch incorrect counters when a
32bit high part is changed by another stats consumer/provider.

One way to solve this is to add a rtnl_link_stats64 param to all
ndo_get_stats64() methods, and also add such a parameter to
dev_get_stats().

Rule is that we are not allowed to use dev->stats64 as a temporary
storage for 64bit stats, but a caller provided area (usually on stack)

Old drivers (only providing get_stats() method) need no changes.

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/8021q/vlan_dev.c   |  6 ++----
 net/8021q/vlanproc.c   |  3 ++-
 net/bridge/br_device.c |  4 ++--
 net/core/dev.c         | 25 ++++++++++++++++---------
 net/core/net-sysfs.c   |  4 +++-
 net/core/rtnetlink.c   |  3 ++-
 6 files changed, 27 insertions(+), 18 deletions(-)

(limited to 'net')

diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c
index c6456cb842fa..7865a4ce5250 100644
--- a/net/8021q/vlan_dev.c
+++ b/net/8021q/vlan_dev.c
@@ -803,11 +803,9 @@ static u32 vlan_ethtool_get_flags(struct net_device *dev)
 	return dev_ethtool_get_flags(vlan->real_dev);
 }
 
-static struct rtnl_link_stats64 *vlan_dev_get_stats64(struct net_device *dev)
+static struct rtnl_link_stats64 *vlan_dev_get_stats64(struct net_device *dev, struct rtnl_link_stats64 *stats)
 {
-	struct rtnl_link_stats64 *stats = &dev->stats64;
-
-	dev_txq_stats_fold(dev, &dev->stats);
+	dev_txq_stats_fold(dev, (struct net_device_stats *)stats);
 
 	if (vlan_dev_info(dev)->vlan_rx_stats) {
 		struct vlan_rx_stats *p, accum = {0};
diff --git a/net/8021q/vlanproc.c b/net/8021q/vlanproc.c
index df56f5ce887c..80e280f56686 100644
--- a/net/8021q/vlanproc.c
+++ b/net/8021q/vlanproc.c
@@ -278,6 +278,7 @@ static int vlandev_seq_show(struct seq_file *seq, void *offset)
 {
 	struct net_device *vlandev = (struct net_device *) seq->private;
 	const struct vlan_dev_info *dev_info = vlan_dev_info(vlandev);
+	struct rtnl_link_stats64 temp;
 	const struct rtnl_link_stats64 *stats;
 	static const char fmt[] = "%30s %12lu\n";
 	static const char fmt64[] = "%30s %12llu\n";
@@ -286,7 +287,7 @@ static int vlandev_seq_show(struct seq_file *seq, void *offset)
 	if (!is_vlan_dev(vlandev))
 		return 0;
 
-	stats = dev_get_stats(vlandev);
+	stats = dev_get_stats(vlandev, &temp);
 	seq_printf(seq,
 		   "%s  VID: %d	 REORDER_HDR: %i  dev->priv_flags: %hx\n",
 		   vlandev->name, dev_info->vlan_id,
diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c
index edf639e96281..075c435ad22d 100644
--- a/net/bridge/br_device.c
+++ b/net/bridge/br_device.c
@@ -98,10 +98,10 @@ static int br_dev_stop(struct net_device *dev)
 	return 0;
 }
 
-static struct rtnl_link_stats64 *br_get_stats64(struct net_device *dev)
+static struct rtnl_link_stats64 *br_get_stats64(struct net_device *dev,
+						struct rtnl_link_stats64 *stats)
 {
 	struct net_bridge *br = netdev_priv(dev);
-	struct rtnl_link_stats64 *stats = &dev->stats64;
 	struct br_cpu_netstats tmp, sum = { 0 };
 	unsigned int cpu;
 
diff --git a/net/core/dev.c b/net/core/dev.c
index 93b8929fa21d..92482d7a87a9 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -3703,7 +3703,8 @@ void dev_seq_stop(struct seq_file *seq, void *v)
 
 static void dev_seq_printf_stats(struct seq_file *seq, struct net_device *dev)
 {
-	const struct rtnl_link_stats64 *stats = dev_get_stats(dev);
+	struct rtnl_link_stats64 temp;
+	const struct rtnl_link_stats64 *stats = dev_get_stats(dev, &temp);
 
 	seq_printf(seq, "%6s: %7llu %7llu %4llu %4llu %4llu %5llu %10llu %9llu "
 		   "%8llu %7llu %4llu %4llu %4llu %5llu %7llu %10llu\n",
@@ -5281,23 +5282,29 @@ EXPORT_SYMBOL(dev_txq_stats_fold);
 /**
  *	dev_get_stats	- get network device statistics
  *	@dev: device to get statistics from
+ *	@storage: place to store stats
  *
  *	Get network statistics from device. The device driver may provide
  *	its own method by setting dev->netdev_ops->get_stats64 or
  *	dev->netdev_ops->get_stats; otherwise the internal statistics
  *	structure is used.
  */
-const struct rtnl_link_stats64 *dev_get_stats(struct net_device *dev)
+const struct rtnl_link_stats64 *dev_get_stats(struct net_device *dev,
+					      struct rtnl_link_stats64 *storage)
 {
 	const struct net_device_ops *ops = dev->netdev_ops;
 
-	if (ops->ndo_get_stats64)
-		return ops->ndo_get_stats64(dev);
-	if (ops->ndo_get_stats)
-		return (struct rtnl_link_stats64 *)ops->ndo_get_stats(dev);
-
-	dev_txq_stats_fold(dev, &dev->stats);
-	return &dev->stats64;
+	if (ops->ndo_get_stats64) {
+		memset(storage, 0, sizeof(*storage));
+		return ops->ndo_get_stats64(dev, storage);
+	}
+	if (ops->ndo_get_stats) {
+		memcpy(storage, ops->ndo_get_stats(dev), sizeof(*storage));
+		return storage;
+	}
+	memcpy(storage, &dev->stats, sizeof(*storage));
+	dev_txq_stats_fold(dev, (struct net_device_stats *)storage);
+	return storage;
 }
 EXPORT_SYMBOL(dev_get_stats);
 
diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c
index ea3bb4c3b87d..914f42b0f039 100644
--- a/net/core/net-sysfs.c
+++ b/net/core/net-sysfs.c
@@ -330,7 +330,9 @@ static ssize_t netstat_show(const struct device *d,
 
 	read_lock(&dev_base_lock);
 	if (dev_isalive(dev)) {
-		const struct rtnl_link_stats64 *stats = dev_get_stats(dev);
+		struct rtnl_link_stats64 temp;
+		const struct rtnl_link_stats64 *stats = dev_get_stats(dev, &temp);
+
 		ret = sprintf(buf, fmt_u64, *(u64 *)(((u8 *) stats) + offset));
 	}
 	read_unlock(&dev_base_lock);
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index e645778e9b7e..5e773ea2201d 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -791,6 +791,7 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev,
 {
 	struct ifinfomsg *ifm;
 	struct nlmsghdr *nlh;
+	struct rtnl_link_stats64 temp;
 	const struct rtnl_link_stats64 *stats;
 	struct nlattr *attr;
 
@@ -847,7 +848,7 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev,
 	if (attr == NULL)
 		goto nla_put_failure;
 
-	stats = dev_get_stats(dev);
+	stats = dev_get_stats(dev, &temp);
 	copy_rtnl_link_stats(nla_data(attr), stats);
 
 	attr = nla_reserve(skb, IFLA_STATS64,
-- 
cgit v1.2.2


From 49085bd7d498c47d635851cfda22627b085cd9af Mon Sep 17 00:00:00 2001
From: George Kadianakis <desnacked@gmail.com>
Date: Tue, 6 Jul 2010 11:44:12 +0000
Subject: net/ipv4/ip_output.c: Removal of unused variable in ip_fragment()

Removal of unused integer variable in ip_fragment().

Signed-off-by: George Kadianakis <desnacked@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/ip_output.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index d6478521128e..663cb2acb39e 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -442,7 +442,6 @@ static void ip_copy_metadata(struct sk_buff *to, struct sk_buff *from)
 int ip_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *))
 {
 	struct iphdr *iph;
-	int raw = 0;
 	int ptr;
 	struct net_device *dev;
 	struct sk_buff *skb2;
@@ -580,7 +579,7 @@ int ip_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *))
 
 slow_path:
 	left = skb->len - hlen;		/* Space per frame */
-	ptr = raw + hlen;		/* Where to start from */
+	ptr = hlen;		/* Where to start from */
 
 	/* for bridged IP traffic encapsulated inside f.e. a vlan header,
 	 * we need to make room for the encapsulating header
-- 
cgit v1.2.2


From 3473187d2459a078e00e5fac8aafc30af69c57fa Mon Sep 17 00:00:00 2001
From: "John W. Linville" <linville@tuxdriver.com>
Date: Wed, 7 Jul 2010 15:07:49 -0400
Subject: mac80211: remove wep dependency

The current mac80211 code assumes that WEP is always available.  If WEP
fails to initialize, ieee80211_register_hw will always fail.

In some cases (e.g. FIPS certification), the cryptography used by WEP is
unavailable.  However, in such cases there is no good reason why CCMP
encryption (or even no link level encryption) cannot be used.  So, this
patch removes mac80211's assumption that WEP (and TKIP) will always be
available for use.

Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/cfg.c  |  5 +++++
 net/mac80211/main.c |  5 +----
 net/mac80211/tkip.c |  8 ++++----
 net/mac80211/tkip.h |  2 +-
 net/mac80211/wep.c  | 24 ++++++++++++++++--------
 net/mac80211/wep.h  |  2 +-
 net/mac80211/wpa.c  |  5 ++---
 7 files changed, 30 insertions(+), 21 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c
index e55970bf2ba0..5b8b4460b69f 100644
--- a/net/mac80211/cfg.c
+++ b/net/mac80211/cfg.c
@@ -143,6 +143,11 @@ static int ieee80211_add_key(struct wiphy *wiphy, struct net_device *dev,
 		return -EINVAL;
 	}
 
+	/* reject WEP and TKIP keys if WEP failed to initialize */
+	if ((alg == ALG_WEP || alg == ALG_TKIP) &&
+	    IS_ERR(sdata->local->wep_tx_tfm))
+		return -EINVAL;
+
 	key = ieee80211_key_alloc(alg, key_idx, params->key_len, params->key,
 				  params->seq_len, params->seq);
 	if (!key)
diff --git a/net/mac80211/main.c b/net/mac80211/main.c
index edf7aff93268..0e95c750ded9 100644
--- a/net/mac80211/main.c
+++ b/net/mac80211/main.c
@@ -637,11 +637,9 @@ int ieee80211_register_hw(struct ieee80211_hw *hw)
 		goto fail_sta_info;
 
 	result = ieee80211_wep_init(local);
-	if (result < 0) {
+	if (result < 0)
 		printk(KERN_DEBUG "%s: Failed to initialize wep: %d\n",
 		       wiphy_name(local->hw.wiphy), result);
-		goto fail_wep;
-	}
 
 	rtnl_lock();
 
@@ -694,7 +692,6 @@ int ieee80211_register_hw(struct ieee80211_hw *hw)
  fail_rate:
 	rtnl_unlock();
 	ieee80211_wep_free(local);
- fail_wep:
 	sta_info_stop(local);
  fail_sta_info:
 	destroy_workqueue(local->workqueue);
diff --git a/net/mac80211/tkip.c b/net/mac80211/tkip.c
index 7ef491e9d66d..e840c9cd46db 100644
--- a/net/mac80211/tkip.c
+++ b/net/mac80211/tkip.c
@@ -202,9 +202,9 @@ EXPORT_SYMBOL(ieee80211_get_tkip_key);
  * @payload_len is the length of payload (_not_ including IV/ICV length).
  * @ta is the transmitter addresses.
  */
-void ieee80211_tkip_encrypt_data(struct crypto_blkcipher *tfm,
-				 struct ieee80211_key *key,
-				 u8 *pos, size_t payload_len, u8 *ta)
+int ieee80211_tkip_encrypt_data(struct crypto_blkcipher *tfm,
+				struct ieee80211_key *key,
+				u8 *pos, size_t payload_len, u8 *ta)
 {
 	u8 rc4key[16];
 	struct tkip_ctx *ctx = &key->u.tkip.tx;
@@ -216,7 +216,7 @@ void ieee80211_tkip_encrypt_data(struct crypto_blkcipher *tfm,
 
 	tkip_mixing_phase2(tk, ctx, ctx->iv16, rc4key);
 
-	ieee80211_wep_encrypt_data(tfm, rc4key, 16, pos, payload_len);
+	return ieee80211_wep_encrypt_data(tfm, rc4key, 16, pos, payload_len);
 }
 
 /* Decrypt packet payload with TKIP using @key. @pos is a pointer to the
diff --git a/net/mac80211/tkip.h b/net/mac80211/tkip.h
index d4714383f5fc..7e83dee976fa 100644
--- a/net/mac80211/tkip.h
+++ b/net/mac80211/tkip.h
@@ -15,7 +15,7 @@
 
 u8 *ieee80211_tkip_add_iv(u8 *pos, struct ieee80211_key *key, u16 iv16);
 
-void ieee80211_tkip_encrypt_data(struct crypto_blkcipher *tfm,
+int ieee80211_tkip_encrypt_data(struct crypto_blkcipher *tfm,
 				 struct ieee80211_key *key,
 				 u8 *pos, size_t payload_len, u8 *ta);
 enum {
diff --git a/net/mac80211/wep.c b/net/mac80211/wep.c
index 5f3a4113bda1..6d133b6efce5 100644
--- a/net/mac80211/wep.c
+++ b/net/mac80211/wep.c
@@ -47,8 +47,10 @@ int ieee80211_wep_init(struct ieee80211_local *local)
 
 void ieee80211_wep_free(struct ieee80211_local *local)
 {
-	crypto_free_blkcipher(local->wep_tx_tfm);
-	crypto_free_blkcipher(local->wep_rx_tfm);
+	if (!IS_ERR(local->wep_tx_tfm))
+		crypto_free_blkcipher(local->wep_tx_tfm);
+	if (!IS_ERR(local->wep_rx_tfm))
+		crypto_free_blkcipher(local->wep_rx_tfm);
 }
 
 static inline bool ieee80211_wep_weak_iv(u32 iv, int keylen)
@@ -122,19 +124,24 @@ static void ieee80211_wep_remove_iv(struct ieee80211_local *local,
 /* Perform WEP encryption using given key. data buffer must have tailroom
  * for 4-byte ICV. data_len must not include this ICV. Note: this function
  * does _not_ add IV. data = RC4(data | CRC32(data)) */
-void ieee80211_wep_encrypt_data(struct crypto_blkcipher *tfm, u8 *rc4key,
-				size_t klen, u8 *data, size_t data_len)
+int ieee80211_wep_encrypt_data(struct crypto_blkcipher *tfm, u8 *rc4key,
+			       size_t klen, u8 *data, size_t data_len)
 {
 	struct blkcipher_desc desc = { .tfm = tfm };
 	struct scatterlist sg;
 	__le32 icv;
 
+	if (IS_ERR(tfm))
+		return -1;
+
 	icv = cpu_to_le32(~crc32_le(~0, data, data_len));
 	put_unaligned(icv, (__le32 *)(data + data_len));
 
 	crypto_blkcipher_setkey(tfm, rc4key, klen);
 	sg_init_one(&sg, data, data_len + WEP_ICV_LEN);
 	crypto_blkcipher_encrypt(&desc, &sg, &sg, sg.length);
+
+	return 0;
 }
 
 
@@ -168,10 +175,8 @@ int ieee80211_wep_encrypt(struct ieee80211_local *local,
 	/* Add room for ICV */
 	skb_put(skb, WEP_ICV_LEN);
 
-	ieee80211_wep_encrypt_data(local->wep_tx_tfm, rc4key, keylen + 3,
-				   iv + WEP_IV_LEN, len);
-
-	return 0;
+	return ieee80211_wep_encrypt_data(local->wep_tx_tfm, rc4key, keylen + 3,
+					  iv + WEP_IV_LEN, len);
 }
 
 
@@ -185,6 +190,9 @@ int ieee80211_wep_decrypt_data(struct crypto_blkcipher *tfm, u8 *rc4key,
 	struct scatterlist sg;
 	__le32 crc;
 
+	if (IS_ERR(tfm))
+		return -1;
+
 	crypto_blkcipher_setkey(tfm, rc4key, klen);
 	sg_init_one(&sg, data, data_len + WEP_ICV_LEN);
 	crypto_blkcipher_decrypt(&desc, &sg, &sg, sg.length);
diff --git a/net/mac80211/wep.h b/net/mac80211/wep.h
index fe29d7e5759f..58654ee33518 100644
--- a/net/mac80211/wep.h
+++ b/net/mac80211/wep.h
@@ -18,7 +18,7 @@
 
 int ieee80211_wep_init(struct ieee80211_local *local);
 void ieee80211_wep_free(struct ieee80211_local *local);
-void ieee80211_wep_encrypt_data(struct crypto_blkcipher *tfm, u8 *rc4key,
+int ieee80211_wep_encrypt_data(struct crypto_blkcipher *tfm, u8 *rc4key,
 				size_t klen, u8 *data, size_t data_len);
 int ieee80211_wep_encrypt(struct ieee80211_local *local,
 			  struct sk_buff *skb,
diff --git a/net/mac80211/wpa.c b/net/mac80211/wpa.c
index a14e67707476..8d59d27d887e 100644
--- a/net/mac80211/wpa.c
+++ b/net/mac80211/wpa.c
@@ -183,9 +183,8 @@ static int tkip_encrypt_skb(struct ieee80211_tx_data *tx, struct sk_buff *skb)
 	skb_put(skb, TKIP_ICV_LEN);
 
 	hdr = (struct ieee80211_hdr *) skb->data;
-	ieee80211_tkip_encrypt_data(tx->local->wep_tx_tfm,
-				    key, pos, len, hdr->addr2);
-	return 0;
+	return ieee80211_tkip_encrypt_data(tx->local->wep_tx_tfm,
+					   key, pos, len, hdr->addr2);
 }
 
 
-- 
cgit v1.2.2


From e501d0553a7580fcc6654d7f58a5f061d31d00af Mon Sep 17 00:00:00 2001
From: Andrei Emeltchenko <andrei.emeltchenko@nokia.com>
Date: Thu, 8 Jul 2010 12:14:41 +0300
Subject: Bluetooth: Check L2CAP pending status before sending connect request

Due to race condition in L2CAP state machine L2CAP Connection Request
may be sent twice for SDP with the same source channel id. Problems
reported connecting to Apple products, some carkit, Blackberry phones.

...
2010-06-07 21:18:03.651031 < ACL data: handle 1 flags 0x02 dlen 12
    L2CAP(s): Connect req: psm 1 scid 0x0040
2010-06-07 21:18:03.653473 > HCI Event: Number of Completed Packets (0x13) plen 5
    handle 1 packets 1
2010-06-07 21:18:03.653808 > HCI Event: Auth Complete (0x06) plen 3
    status 0x00 handle 1
2010-06-07 21:18:03.653869 < ACL data: handle 1 flags 0x02 dlen 12
    L2CAP(s): Connect req: psm 1 scid 0x0040
...

Patch uses L2CAP_CONF_CONNECT_PEND flag to mark that L2CAP Connection
Request has been sent already.

Modified version of patch from Ville Tervo.

Signed-off-by: Andrei Emeltchenko <andrei.emeltchenko@nokia.com>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 net/bluetooth/l2cap.c | 14 +++++++++++---
 1 file changed, 11 insertions(+), 3 deletions(-)

(limited to 'net')

diff --git a/net/bluetooth/l2cap.c b/net/bluetooth/l2cap.c
index 1b682a5aa061..cf3c4073a8a6 100644
--- a/net/bluetooth/l2cap.c
+++ b/net/bluetooth/l2cap.c
@@ -401,6 +401,11 @@ static inline void l2cap_send_rr_or_rnr(struct l2cap_pinfo *pi, u16 control)
 	l2cap_send_sframe(pi, control);
 }
 
+static inline int __l2cap_no_conn_pending(struct sock *sk)
+{
+	return !(l2cap_pi(sk)->conf_state & L2CAP_CONF_CONNECT_PEND);
+}
+
 static void l2cap_do_start(struct sock *sk)
 {
 	struct l2cap_conn *conn = l2cap_pi(sk)->conn;
@@ -409,12 +414,13 @@ static void l2cap_do_start(struct sock *sk)
 		if (!(conn->info_state & L2CAP_INFO_FEAT_MASK_REQ_DONE))
 			return;
 
-		if (l2cap_check_security(sk)) {
+		if (l2cap_check_security(sk) && __l2cap_no_conn_pending(sk)) {
 			struct l2cap_conn_req req;
 			req.scid = cpu_to_le16(l2cap_pi(sk)->scid);
 			req.psm  = l2cap_pi(sk)->psm;
 
 			l2cap_pi(sk)->ident = l2cap_get_ident(conn);
+			l2cap_pi(sk)->conf_state |= L2CAP_CONF_CONNECT_PEND;
 
 			l2cap_send_cmd(conn, l2cap_pi(sk)->ident,
 					L2CAP_CONN_REQ, sizeof(req), &req);
@@ -464,12 +470,14 @@ static void l2cap_conn_start(struct l2cap_conn *conn)
 		}
 
 		if (sk->sk_state == BT_CONNECT) {
-			if (l2cap_check_security(sk)) {
+			if (l2cap_check_security(sk) &&
+					__l2cap_no_conn_pending(sk)) {
 				struct l2cap_conn_req req;
 				req.scid = cpu_to_le16(l2cap_pi(sk)->scid);
 				req.psm  = l2cap_pi(sk)->psm;
 
 				l2cap_pi(sk)->ident = l2cap_get_ident(conn);
+				l2cap_pi(sk)->conf_state |= L2CAP_CONF_CONNECT_PEND;
 
 				l2cap_send_cmd(conn, l2cap_pi(sk)->ident,
 					L2CAP_CONN_REQ, sizeof(req), &req);
@@ -2912,7 +2920,6 @@ static inline int l2cap_connect_rsp(struct l2cap_conn *conn, struct l2cap_cmd_hd
 		l2cap_pi(sk)->ident = 0;
 		l2cap_pi(sk)->dcid = dcid;
 		l2cap_pi(sk)->conf_state |= L2CAP_CONF_REQ_SENT;
-
 		l2cap_pi(sk)->conf_state &= ~L2CAP_CONF_CONNECT_PEND;
 
 		l2cap_send_cmd(conn, l2cap_get_ident(conn), L2CAP_CONF_REQ,
@@ -4404,6 +4411,7 @@ static int l2cap_security_cfm(struct hci_conn *hcon, u8 status, u8 encrypt)
 				req.psm  = l2cap_pi(sk)->psm;
 
 				l2cap_pi(sk)->ident = l2cap_get_ident(conn);
+				l2cap_pi(sk)->conf_state |= L2CAP_CONF_CONNECT_PEND;
 
 				l2cap_send_cmd(conn, l2cap_pi(sk)->ident,
 					L2CAP_CONN_REQ, sizeof(req), &req);
-- 
cgit v1.2.2


From da213f41cdd445d3df468f8fe7f24fe4f4c48508 Mon Sep 17 00:00:00 2001
From: Johan Hedberg <johan.hedberg@nokia.com>
Date: Fri, 18 Jun 2010 11:08:56 +0300
Subject: Bluetooth: Reset the security level after an authentication failure

When authentication fails for a connection the assumed security level
should be set back to BT_SECURITY_LOW so that subsequent connect
attempts over the same link don't falsely assume that security is
adequate enough.

Signed-off-by: Johan Hedberg <johan.hedberg@nokia.com>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 net/bluetooth/hci_event.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'net')

diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c
index 6c57fc71c7e2..786b5de0bac4 100644
--- a/net/bluetooth/hci_event.c
+++ b/net/bluetooth/hci_event.c
@@ -1049,6 +1049,8 @@ static inline void hci_auth_complete_evt(struct hci_dev *hdev, struct sk_buff *s
 	if (conn) {
 		if (!ev->status)
 			conn->link_mode |= HCI_LM_AUTH;
+		else
+			conn->sec_level = BT_SECURITY_LOW;
 
 		clear_bit(HCI_CONN_AUTH_PEND, &conn->pend);
 
-- 
cgit v1.2.2


From 045309820afe047920a50de25634dab46a1e851d Mon Sep 17 00:00:00 2001
From: Ville Tervo <ville.tervo@nokia.com>
Date: Tue, 15 Jun 2010 15:56:05 +0300
Subject: Bluetooth: Update sec_level/auth_type for already existing
 connections

Update auth level for already existing connections if it is lower
than required by new connection.

Signed-off-by: Ville Tervo <ville.tervo@nokia.com>
Reviewed-by: Emeltchenko Andrei <andrei.emeltchenko@nokia.com>
Signed-off-by: Luciano Coelho <luciano.coelho@nokia.com>
Signed-off-by: Andrei Emeltchenko <andrei.emeltchenko@nokia.com>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 net/bluetooth/hci_conn.c | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'net')

diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c
index b10e3cdb08f8..800b6b9fbbae 100644
--- a/net/bluetooth/hci_conn.c
+++ b/net/bluetooth/hci_conn.c
@@ -358,6 +358,11 @@ struct hci_conn *hci_connect(struct hci_dev *hdev, int type, bdaddr_t *dst, __u8
 		acl->sec_level = sec_level;
 		acl->auth_type = auth_type;
 		hci_acl_connect(acl);
+	} else {
+		if (acl->sec_level < sec_level)
+			acl->sec_level = sec_level;
+		if (acl->auth_type < auth_type)
+			acl->auth_type = auth_type;
 	}
 
 	if (type == ACL_LINK)
-- 
cgit v1.2.2


From dd4ba83dc1becbb3bb383851381c10c372e47247 Mon Sep 17 00:00:00 2001
From: Stephen Hemminger <shemminger@vyatta.com>
Date: Thu, 8 Jul 2010 21:35:58 -0700
Subject: gre: propagate ipv6 transport class

This patch makes IPV6 over IPv4 GRE tunnel propagate the transport
class field from the underlying IPV6 header to the IPV4 Type Of Service
field. Without the patch, all IPV6 packets in tunnel look the same to QoS.

This assumes that IPV6 transport class is exactly the same
as IPv4 TOS. Not sure if that is always the case?  Maybe need
to mask off some bits.

The mask and shift to get tclass is copied from ipv6/datagram.c

Signed-off-by: Stephen Hemminger <shemminger@vyatta.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/ip_gre.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'net')

diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index 749e54889e82..945b20a5ad50 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -731,6 +731,8 @@ static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev
 		tos = 0;
 		if (skb->protocol == htons(ETH_P_IP))
 			tos = old_iph->tos;
+		else if (skb->protocol == htons(ETH_P_IPV6))
+			tos = ipv6_get_dsfield((struct ipv6hdr *)old_iph);
 	}
 
 	{
-- 
cgit v1.2.2


From 635f081541edef7644073f9be50ee5bf7c57ce63 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?R=C3=A9mi=20Denis-Courmont?= <remi.denis-courmont@nokia.com>
Date: Wed, 7 Jul 2010 20:56:53 +0000
Subject: Phonet: fix skb leak in pipe endpoint accept()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: Rémi Denis-Courmont <remi.denis-courmont@nokia.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/phonet/pep.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'net')

diff --git a/net/phonet/pep.c b/net/phonet/pep.c
index 94d72e85a475..b2a3ae6cad78 100644
--- a/net/phonet/pep.c
+++ b/net/phonet/pep.c
@@ -698,6 +698,7 @@ static struct sock *pep_sock_accept(struct sock *sk, int flags, int *errp)
 		newsk = NULL;
 		goto out;
 	}
+	kfree_skb(oskb);
 
 	sock_hold(sk);
 	pep_sk(newsk)->listener = sk;
-- 
cgit v1.2.2


From a204b48ed4dc31acf61090e530430ce3272b6aab Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Thu, 8 Jul 2010 23:12:21 -0700
Subject: vlan: allow TSO setting on vlan interfaces

When we need to shape traffic using low speeds, we need to
disable tso on network interface :

ethtool -K eth0.2240 tso off

It seems vlan interfaces miss the set_tso() ethtool method.

Before enabling TSO, we must check real device supports
TSO for VLAN-tagged packets and enables TSO.

Note that a TSO change on real device propagates TSO setting
on all vlans, even if admin selected a different TSO setting.

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: Ben Hutchings <bhutchings@solarflare.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/8021q/vlan_dev.c | 20 ++++++++++++++++++++
 1 file changed, 20 insertions(+)

(limited to 'net')

diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c
index 7865a4ce5250..a1b8171cfa7b 100644
--- a/net/8021q/vlan_dev.c
+++ b/net/8021q/vlan_dev.c
@@ -836,12 +836,32 @@ static struct rtnl_link_stats64 *vlan_dev_get_stats64(struct net_device *dev, st
 	return stats;
 }
 
+static int vlan_ethtool_set_tso(struct net_device *dev, u32 data)
+{
+       if (data) {
+		struct net_device *real_dev = vlan_dev_info(dev)->real_dev;
+
+		/* Underlying device must support TSO for VLAN-tagged packets
+		 * and must have TSO enabled now.
+		 */
+		if (!(real_dev->vlan_features & NETIF_F_TSO))
+			return -EOPNOTSUPP;
+		if (!(real_dev->features & NETIF_F_TSO))
+			return -EINVAL;
+		dev->features |= NETIF_F_TSO;
+	} else {
+		dev->features &= ~NETIF_F_TSO;
+	}
+	return 0;
+}
+
 static const struct ethtool_ops vlan_ethtool_ops = {
 	.get_settings	        = vlan_ethtool_get_settings,
 	.get_drvinfo	        = vlan_ethtool_get_drvinfo,
 	.get_link		= ethtool_op_get_link,
 	.get_rx_csum		= vlan_ethtool_get_rx_csum,
 	.get_flags		= vlan_ethtool_get_flags,
+	.set_tso                = vlan_ethtool_set_tso,
 };
 
 static const struct net_device_ops vlan_netdev_ops = {
-- 
cgit v1.2.2


From 7313bb8f3dd6e28bcf9c42adfd54a5cf9a4949e0 Mon Sep 17 00:00:00 2001
From: Karl Hiramoto <karl@hiramoto.org>
Date: Thu, 8 Jul 2010 20:55:30 +0000
Subject: atm: propagate signal changes via notifier

Add notifier chain for changes in atm_dev.

Clients like br2684 will call register_atmdevice_notifier() to be notified of
changes. Drivers will call atm_dev_signal_change() to notify clients like
br2684 of the change.

On DSL and ATM devices it's usefull to have a know if you have a carrier
signal. netdevice LOWER_UP changes can be propagated to userspace via netlink
monitor.

Signed-off-by: Karl Hiramoto <karl@hiramoto.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/atm/common.c | 30 ++++++++++++++++++++++++++++++
 1 file changed, 30 insertions(+)

(limited to 'net')

diff --git a/net/atm/common.c b/net/atm/common.c
index b43feb1a3995..940404a73b3d 100644
--- a/net/atm/common.c
+++ b/net/atm/common.c
@@ -37,6 +37,8 @@ EXPORT_SYMBOL(vcc_hash);
 DEFINE_RWLOCK(vcc_sklist_lock);
 EXPORT_SYMBOL(vcc_sklist_lock);
 
+static ATOMIC_NOTIFIER_HEAD(atm_dev_notify_chain);
+
 static void __vcc_insert_socket(struct sock *sk)
 {
 	struct atm_vcc *vcc = atm_sk(sk);
@@ -212,6 +214,22 @@ void vcc_release_async(struct atm_vcc *vcc, int reply)
 }
 EXPORT_SYMBOL(vcc_release_async);
 
+void atm_dev_signal_change(struct atm_dev *dev, char signal)
+{
+	pr_debug("%s signal=%d dev=%p number=%d dev->signal=%d\n",
+		__func__, signal, dev, dev->number, dev->signal);
+
+	/* atm driver sending invalid signal */
+	WARN_ON(signal < ATM_PHY_SIG_LOST || signal > ATM_PHY_SIG_FOUND);
+
+	if (dev->signal == signal)
+		return; /* no change */
+
+	dev->signal = signal;
+
+	atomic_notifier_call_chain(&atm_dev_notify_chain, signal, dev);
+}
+EXPORT_SYMBOL(atm_dev_signal_change);
 
 void atm_dev_release_vccs(struct atm_dev *dev)
 {
@@ -781,6 +799,18 @@ int vcc_getsockopt(struct socket *sock, int level, int optname,
 	return vcc->dev->ops->getsockopt(vcc, level, optname, optval, len);
 }
 
+int register_atmdevice_notifier(struct notifier_block *nb)
+{
+	return atomic_notifier_chain_register(&atm_dev_notify_chain, nb);
+}
+EXPORT_SYMBOL_GPL(register_atmdevice_notifier);
+
+void unregister_atmdevice_notifier(struct notifier_block *nb)
+{
+	atomic_notifier_chain_unregister(&atm_dev_notify_chain, nb);
+}
+EXPORT_SYMBOL_GPL(unregister_atmdevice_notifier);
+
 static int __init atm_init(void)
 {
 	int error;
-- 
cgit v1.2.2


From 005066122b58f3b350736cc896af46aea2e32d23 Mon Sep 17 00:00:00 2001
From: Karl Hiramoto <karl@hiramoto.org>
Date: Thu, 8 Jul 2010 20:55:31 +0000
Subject: atm/br2684: register notifier event for carrier signal changes.

When a signal change event occurs call netif_carrier_on/off.

Signed-off-by: Karl Hiramoto <karl@hiramoto.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/atm/br2684.c | 66 ++++++++++++++++++++++++++++++++++++++++++++++++++++++--
 1 file changed, 64 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/atm/br2684.c b/net/atm/br2684.c
index 6719af6a59fa..651babdfab38 100644
--- a/net/atm/br2684.c
+++ b/net/atm/br2684.c
@@ -139,6 +139,43 @@ static struct net_device *br2684_find_dev(const struct br2684_if_spec *s)
 	return NULL;
 }
 
+static int atm_dev_event(struct notifier_block *this, unsigned long event,
+		 void *arg)
+{
+	struct atm_dev *atm_dev = arg;
+	struct list_head *lh;
+	struct net_device *net_dev;
+	struct br2684_vcc *brvcc;
+	struct atm_vcc *atm_vcc;
+	unsigned long flags;
+
+	pr_debug("event=%ld dev=%p\n", event, atm_dev);
+
+	read_lock_irqsave(&devs_lock, flags);
+	list_for_each(lh, &br2684_devs) {
+		net_dev = list_entry_brdev(lh);
+
+		list_for_each_entry(brvcc, &BRPRIV(net_dev)->brvccs, brvccs) {
+			atm_vcc = brvcc->atmvcc;
+			if (atm_vcc && brvcc->atmvcc->dev == atm_dev) {
+
+				if (atm_vcc->dev->signal == ATM_PHY_SIG_LOST)
+					netif_carrier_off(net_dev);
+				else
+					netif_carrier_on(net_dev);
+
+			}
+		}
+	}
+	read_unlock_irqrestore(&devs_lock, flags);
+
+	return NOTIFY_DONE;
+}
+
+static struct notifier_block atm_dev_notifier = {
+	.notifier_call = atm_dev_event,
+};
+
 /* chained vcc->pop function.  Check if we should wake the netif_queue */
 static void br2684_pop(struct atm_vcc *vcc, struct sk_buff *skb)
 {
@@ -362,6 +399,12 @@ static void br2684_push(struct atm_vcc *atmvcc, struct sk_buff *skb)
 			unregister_netdev(net_dev);
 			free_netdev(net_dev);
 		}
+		read_lock_irq(&devs_lock);
+		if (list_empty(&br2684_devs)) {
+			/* last br2684 device */
+			unregister_atmdevice_notifier(&atm_dev_notifier);
+		}
+		read_unlock_irq(&devs_lock);
 		return;
 	}
 
@@ -530,6 +573,13 @@ static int br2684_regvcc(struct atm_vcc *atmvcc, void __user * arg)
 
 		br2684_push(atmvcc, skb);
 	}
+
+	/* initialize netdev carrier state */
+	if (atmvcc->dev->signal == ATM_PHY_SIG_LOST)
+		netif_carrier_off(net_dev);
+	else
+		netif_carrier_on(net_dev);
+
 	__module_get(THIS_MODULE);
 	return 0;
 
@@ -620,9 +670,16 @@ static int br2684_create(void __user *arg)
 	}
 
 	write_lock_irq(&devs_lock);
+
 	brdev->payload = payload;
-	brdev->number = list_empty(&br2684_devs) ? 1 :
-	    BRPRIV(list_entry_brdev(br2684_devs.prev))->number + 1;
+
+	if (list_empty(&br2684_devs)) {
+		/* 1st br2684 device */
+		register_atmdevice_notifier(&atm_dev_notifier);
+		brdev->number = 1;
+	} else
+		brdev->number = BRPRIV(list_entry_brdev(br2684_devs.prev))->number + 1;
+
 	list_add_tail(&brdev->br2684_devs, &br2684_devs);
 	write_unlock_irq(&devs_lock);
 	return 0;
@@ -772,6 +829,11 @@ static void __exit br2684_exit(void)
 	remove_proc_entry("br2684", atm_proc_root);
 #endif
 
+
+	/* if not already empty */
+	if (!list_empty(&br2684_devs))
+		unregister_atmdevice_notifier(&atm_dev_notifier);
+
 	while (!list_empty(&br2684_devs)) {
 		net_dev = list_entry_brdev(br2684_devs.next);
 		brdev = BRPRIV(net_dev);
-- 
cgit v1.2.2


From 8a0acaac807ec3fcb7b5a895c6bbb8e8b61e6275 Mon Sep 17 00:00:00 2001
From: Xiaoyu Du <tingsrain@gmail.com>
Date: Fri, 9 Jul 2010 17:27:47 +0200
Subject: ipvs: lvs sctp protocol handler is incorrectly invoked
 ip_vs_app_pkt_out

lvs sctp protocol handler is incorrectly invoked ip_vs_app_pkt_out
Since there's no sctp helpers at present, it does the same thing as
ip_vs_app_pkt_in.

Signed-off-by: Xiaoyu Du <tingsrain@gmail.com>
Acked-by: Simon Horman <horms@verge.net.au>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 net/netfilter/ipvs/ip_vs_proto_sctp.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/netfilter/ipvs/ip_vs_proto_sctp.c b/net/netfilter/ipvs/ip_vs_proto_sctp.c
index c9a3f7a21d53..db5575967c14 100644
--- a/net/netfilter/ipvs/ip_vs_proto_sctp.c
+++ b/net/netfilter/ipvs/ip_vs_proto_sctp.c
@@ -173,7 +173,7 @@ sctp_dnat_handler(struct sk_buff *skb,
 			return 0;
 
 		/* Call application helper if needed */
-		if (!ip_vs_app_pkt_out(cp, skb))
+		if (!ip_vs_app_pkt_in(cp, skb))
 			return 0;
 	}
 
-- 
cgit v1.2.2


From 116e1f1b09dffe0ff8ebab73501324dca7bbbe5e Mon Sep 17 00:00:00 2001
From: Changli Gao <xiaosuo@gmail.com>
Date: Fri, 9 Jul 2010 17:31:49 +0200
Subject: netfilter: xt_TPROXY: the length of lines should be within 80

According to the Documentation/CodingStyle, the length of lines should
be within 80.

Signed-off-by: Changli Gao <xiaosuo@gmail.com>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 net/netfilter/xt_TPROXY.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/netfilter/xt_TPROXY.c b/net/netfilter/xt_TPROXY.c
index e1a0dedac258..c61294d85fda 100644
--- a/net/netfilter/xt_TPROXY.c
+++ b/net/netfilter/xt_TPROXY.c
@@ -37,8 +37,10 @@ tproxy_tg(struct sk_buff *skb, const struct xt_action_param *par)
 		return NF_DROP;
 
 	sk = nf_tproxy_get_sock_v4(dev_net(skb->dev), iph->protocol,
-				   iph->saddr, tgi->laddr ? tgi->laddr : iph->daddr,
-				   hp->source, tgi->lport ? tgi->lport : hp->dest,
+				   iph->saddr,
+				   tgi->laddr ? tgi->laddr : iph->daddr,
+				   hp->source,
+				   tgi->lport ? tgi->lport : hp->dest,
 				   par->in, true);
 
 	/* NOTE: assign_sock consumes our sk reference */
-- 
cgit v1.2.2


From 3cfde79c6c7c8002375c4a8e5be7f602fbb9675d Mon Sep 17 00:00:00 2001
From: Ben Hutchings <bhutchings@solarflare.com>
Date: Fri, 9 Jul 2010 09:11:52 +0000
Subject: net: Get rid of rtnl_link_stats64 / net_device_stats union

In commit be1f3c2c027cc5ad735df6a45a542ed1db7ec48b "net: Enable 64-bit
net device statistics on 32-bit architectures" I redefined struct
net_device_stats so that it could be used in a union with struct
rtnl_link_stats64, avoiding the need for explicit copying or
conversion between the two.  However, this is unsafe because there is
no locking required and no lock consistently held around calls to
dev_get_stats() and use of the statistics structure it returns.

In commit 28172739f0a276eb8d6ca917b3974c2edb036da3 "net: fix 64 bit
counters on 32 bit arches" Eric Dumazet dealt with that problem by
requiring callers of dev_get_stats() to provide storage for the
result.  This means that the net_device::stats64 field and the padding
in struct net_device_stats are now redundant, so remove them.

Update the comment on net_device_ops::ndo_get_stats64 to reflect its
new usage.

Change dev_txq_stats_fold() to use struct rtnl_link_stats64, since
that is what all its callers are really using and it is no longer
going to be compatible with struct net_device_stats.

Eric Dumazet suggested the separate function for the structure
conversion.

Signed-off-by: Ben Hutchings <bhutchings@solarflare.com>
Acked-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/8021q/vlan_dev.c |  2 +-
 net/core/dev.c       | 31 ++++++++++++++++++++++++++-----
 2 files changed, 27 insertions(+), 6 deletions(-)

(limited to 'net')

diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c
index a1b8171cfa7b..7cb285f96b99 100644
--- a/net/8021q/vlan_dev.c
+++ b/net/8021q/vlan_dev.c
@@ -805,7 +805,7 @@ static u32 vlan_ethtool_get_flags(struct net_device *dev)
 
 static struct rtnl_link_stats64 *vlan_dev_get_stats64(struct net_device *dev, struct rtnl_link_stats64 *stats)
 {
-	dev_txq_stats_fold(dev, (struct net_device_stats *)stats);
+	dev_txq_stats_fold(dev, stats);
 
 	if (vlan_dev_info(dev)->vlan_rx_stats) {
 		struct vlan_rx_stats *p, accum = {0};
diff --git a/net/core/dev.c b/net/core/dev.c
index eb4201cf9c8c..79ee26ef5095 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -5274,10 +5274,10 @@ void netdev_run_todo(void)
 /**
  *	dev_txq_stats_fold - fold tx_queues stats
  *	@dev: device to get statistics from
- *	@stats: struct net_device_stats to hold results
+ *	@stats: struct rtnl_link_stats64 to hold results
  */
 void dev_txq_stats_fold(const struct net_device *dev,
-			struct net_device_stats *stats)
+			struct rtnl_link_stats64 *stats)
 {
 	unsigned long tx_bytes = 0, tx_packets = 0, tx_dropped = 0;
 	unsigned int i;
@@ -5297,6 +5297,27 @@ void dev_txq_stats_fold(const struct net_device *dev,
 }
 EXPORT_SYMBOL(dev_txq_stats_fold);
 
+/* Convert net_device_stats to rtnl_link_stats64.  They have the same
+ * fields in the same order, with only the type differing.
+ */
+static void netdev_stats_to_stats64(struct rtnl_link_stats64 *stats64,
+				    const struct net_device_stats *netdev_stats)
+{
+#if BITS_PER_LONG == 64
+        BUILD_BUG_ON(sizeof(*stats64) != sizeof(*netdev_stats));
+        memcpy(stats64, netdev_stats, sizeof(*stats64));
+#else
+	size_t i, n = sizeof(*stats64) / sizeof(u64);
+	const unsigned long *src = (const unsigned long *)netdev_stats;
+	u64 *dst = (u64 *)stats64;
+
+	BUILD_BUG_ON(sizeof(*netdev_stats) / sizeof(unsigned long) !=
+		     sizeof(*stats64) / sizeof(u64));
+	for (i = 0; i < n; i++)
+		dst[i] = src[i];
+#endif
+}
+
 /**
  *	dev_get_stats	- get network device statistics
  *	@dev: device to get statistics from
@@ -5317,11 +5338,11 @@ const struct rtnl_link_stats64 *dev_get_stats(struct net_device *dev,
 		return ops->ndo_get_stats64(dev, storage);
 	}
 	if (ops->ndo_get_stats) {
-		memcpy(storage, ops->ndo_get_stats(dev), sizeof(*storage));
+		netdev_stats_to_stats64(storage, ops->ndo_get_stats(dev));
 		return storage;
 	}
-	memcpy(storage, &dev->stats, sizeof(*storage));
-	dev_txq_stats_fold(dev, (struct net_device_stats *)storage);
+	netdev_stats_to_stats64(storage, &dev->stats);
+	dev_txq_stats_fold(dev, storage);
 	return storage;
 }
 EXPORT_SYMBOL(dev_get_stats);
-- 
cgit v1.2.2


From d77535162e736c47978d5c01469c56e1781dc91b Mon Sep 17 00:00:00 2001
From: Ben Hutchings <bhutchings@solarflare.com>
Date: Fri, 9 Jul 2010 09:12:41 +0000
Subject: net: Document that dev_get_stats() returns the given pointer

Document that dev_get_stats() returns the same stats pointer it was
given.  Remove const qualification from the returned pointer since the
caller may do what it likes with that structure.

Signed-off-by: Ben Hutchings <bhutchings@solarflare.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/dev.c | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

(limited to 'net')

diff --git a/net/core/dev.c b/net/core/dev.c
index 79ee26ef5095..e2b9fa2c917e 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -5323,13 +5323,13 @@ static void netdev_stats_to_stats64(struct rtnl_link_stats64 *stats64,
  *	@dev: device to get statistics from
  *	@storage: place to store stats
  *
- *	Get network statistics from device. The device driver may provide
- *	its own method by setting dev->netdev_ops->get_stats64 or
- *	dev->netdev_ops->get_stats; otherwise the internal statistics
- *	structure is used.
+ *	Get network statistics from device. Return @storage.
+ *	The device driver may provide its own method by setting
+ *	dev->netdev_ops->get_stats64 or dev->netdev_ops->get_stats;
+ *	otherwise the internal statistics structure is used.
  */
-const struct rtnl_link_stats64 *dev_get_stats(struct net_device *dev,
-					      struct rtnl_link_stats64 *storage)
+struct rtnl_link_stats64 *dev_get_stats(struct net_device *dev,
+					struct rtnl_link_stats64 *storage)
 {
 	const struct net_device_ops *ops = dev->netdev_ops;
 
-- 
cgit v1.2.2


From 698f93159a735bd29a8767c9f60d9b2d75870f8e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= <u.kleine-koenig@pengutronix.de>
Date: Fri, 2 Jul 2010 20:41:51 +0200
Subject: fix comment/printk typos concerning "already"
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
---
 net/ipv6/netfilter/nf_conntrack_reasm.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c
index 6fb890187de0..4dc8805154ee 100644
--- a/net/ipv6/netfilter/nf_conntrack_reasm.c
+++ b/net/ipv6/netfilter/nf_conntrack_reasm.c
@@ -201,7 +201,7 @@ static int nf_ct_frag6_queue(struct nf_ct_frag6_queue *fq, struct sk_buff *skb,
 	int offset, end;
 
 	if (fq->q.last_in & INET_FRAG_COMPLETE) {
-		pr_debug("Allready completed\n");
+		pr_debug("Already completed\n");
 		goto err;
 	}
 
-- 
cgit v1.2.2


From 4bc2f18ba4f22a90ab593c0a580fc9a19c4777b6 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Fri, 9 Jul 2010 21:22:10 +0000
Subject: net/ipv4: EXPORT_SYMBOL cleanups

CodingStyle cleanups

EXPORT_SYMBOL should immediately follow the symbol declaration.

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/arp.c                  | 18 +++++++-----------
 net/ipv4/datagram.c             |  2 --
 net/ipv4/fib_frontend.c         |  7 +++----
 net/ipv4/icmp.c                 |  7 +++----
 net/ipv4/igmp.c                 |  9 ++++-----
 net/ipv4/inet_connection_sock.c | 19 +------------------
 net/ipv4/inet_fragment.c        |  1 -
 net/ipv4/inet_hashtables.c      |  4 ----
 net/ipv4/ip_fragment.c          |  3 +--
 net/ipv4/ip_output.c            |  9 +++------
 net/ipv4/netfilter/ipt_REJECT.c |  2 +-
 net/ipv4/protocol.c             |  3 +--
 net/ipv4/route.c                |  7 ++-----
 net/ipv4/tcp.c                  | 35 ++++++++++++-----------------------
 net/ipv4/tcp_input.c            | 18 ++++++++----------
 net/ipv4/tcp_ipv4.c             | 35 ++++++++++++-----------------------
 net/ipv4/tcp_minisocks.c        |  9 +++------
 net/ipv4/tcp_output.c           | 12 +++++-------
 net/ipv4/tcp_timer.c            |  1 -
 net/ipv4/tunnel4.c              |  2 --
 net/ipv4/udplite.c              |  3 +--
 net/ipv4/xfrm4_input.c          |  1 -
 22 files changed, 67 insertions(+), 140 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c
index 09ead1baa99e..96c1955b3e2f 100644
--- a/net/ipv4/arp.c
+++ b/net/ipv4/arp.c
@@ -116,6 +116,7 @@
 #if defined(CONFIG_ATM_CLIP) || defined(CONFIG_ATM_CLIP_MODULE)
 #include <net/atmclip.h>
 struct neigh_table *clip_tbl_hook;
+EXPORT_SYMBOL(clip_tbl_hook);
 #endif
 
 #include <asm/system.h>
@@ -169,6 +170,7 @@ const struct neigh_ops arp_broken_ops = {
 	.hh_output =		dev_queue_xmit,
 	.queue_xmit =		dev_queue_xmit,
 };
+EXPORT_SYMBOL(arp_broken_ops);
 
 struct neigh_table arp_tbl = {
 	.family =	AF_INET,
@@ -198,6 +200,7 @@ struct neigh_table arp_tbl = {
 	.gc_thresh2 =	512,
 	.gc_thresh3 =	1024,
 };
+EXPORT_SYMBOL(arp_tbl);
 
 int arp_mc_map(__be32 addr, u8 *haddr, struct net_device *dev, int dir)
 {
@@ -499,6 +502,7 @@ int arp_find(unsigned char *haddr, struct sk_buff *skb)
 		kfree_skb(skb);
 	return 1;
 }
+EXPORT_SYMBOL(arp_find);
 
 /* END OF OBSOLETE FUNCTIONS */
 
@@ -700,6 +704,7 @@ out:
 	kfree_skb(skb);
 	return NULL;
 }
+EXPORT_SYMBOL(arp_create);
 
 /*
  *	Send an arp packet.
@@ -709,6 +714,7 @@ void arp_xmit(struct sk_buff *skb)
 	/* Send it off, maybe filter it using firewalling first.  */
 	NF_HOOK(NFPROTO_ARP, NF_ARP_OUT, skb, NULL, skb->dev, dev_queue_xmit);
 }
+EXPORT_SYMBOL(arp_xmit);
 
 /*
  *	Create and send an arp packet.
@@ -735,6 +741,7 @@ void arp_send(int type, int ptype, __be32 dest_ip,
 
 	arp_xmit(skb);
 }
+EXPORT_SYMBOL(arp_send);
 
 /*
  *	Process an arp request.
@@ -1452,14 +1459,3 @@ static int __init arp_proc_init(void)
 }
 
 #endif /* CONFIG_PROC_FS */
-
-EXPORT_SYMBOL(arp_broken_ops);
-EXPORT_SYMBOL(arp_find);
-EXPORT_SYMBOL(arp_create);
-EXPORT_SYMBOL(arp_xmit);
-EXPORT_SYMBOL(arp_send);
-EXPORT_SYMBOL(arp_tbl);
-
-#if defined(CONFIG_ATM_CLIP) || defined(CONFIG_ATM_CLIP_MODULE)
-EXPORT_SYMBOL(clip_tbl_hook);
-#endif
diff --git a/net/ipv4/datagram.c b/net/ipv4/datagram.c
index fe3daa7f07a9..f0550941df7b 100644
--- a/net/ipv4/datagram.c
+++ b/net/ipv4/datagram.c
@@ -72,6 +72,4 @@ int ip4_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
 	sk_dst_set(sk, &rt->dst);
 	return(0);
 }
-
 EXPORT_SYMBOL(ip4_datagram_connect);
-
diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index e830f7a123bd..a43968918350 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -175,6 +175,7 @@ out:
 	fib_res_put(&res);
 	return dev;
 }
+EXPORT_SYMBOL(ip_dev_find);
 
 /*
  * Find address type as if only "dev" was present in the system. If
@@ -214,12 +215,14 @@ unsigned int inet_addr_type(struct net *net, __be32 addr)
 {
 	return __inet_dev_addr_type(net, NULL, addr);
 }
+EXPORT_SYMBOL(inet_addr_type);
 
 unsigned int inet_dev_addr_type(struct net *net, const struct net_device *dev,
 				__be32 addr)
 {
        return __inet_dev_addr_type(net, dev, addr);
 }
+EXPORT_SYMBOL(inet_dev_addr_type);
 
 /* Given (packet source, input interface) and optional (dst, oif, tos):
    - (main) check, that source is valid i.e. not broadcast or our local
@@ -1077,7 +1080,3 @@ void __init ip_fib_init(void)
 
 	fib_hash_init();
 }
-
-EXPORT_SYMBOL(inet_addr_type);
-EXPORT_SYMBOL(inet_dev_addr_type);
-EXPORT_SYMBOL(ip_dev_find);
diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c
index 7569b21a3a2d..a0d847c7cba5 100644
--- a/net/ipv4/icmp.c
+++ b/net/ipv4/icmp.c
@@ -181,6 +181,7 @@ const struct icmp_err icmp_err_convert[] = {
 		.fatal = 1,
 	},
 };
+EXPORT_SYMBOL(icmp_err_convert);
 
 /*
  *	ICMP control array. This specifies what to do with each ICMP.
@@ -267,6 +268,7 @@ int xrlim_allow(struct dst_entry *dst, int timeout)
 	dst->rate_tokens = token;
 	return rc;
 }
+EXPORT_SYMBOL(xrlim_allow);
 
 static inline int icmpv4_xrlim_allow(struct net *net, struct rtable *rt,
 		int type, int code)
@@ -647,6 +649,7 @@ out_unlock:
 	icmp_xmit_unlock(sk);
 out:;
 }
+EXPORT_SYMBOL(icmp_send);
 
 
 /*
@@ -1214,7 +1217,3 @@ int __init icmp_init(void)
 {
 	return register_pernet_subsys(&icmp_sk_ops);
 }
-
-EXPORT_SYMBOL(icmp_err_convert);
-EXPORT_SYMBOL(icmp_send);
-EXPORT_SYMBOL(xrlim_allow);
diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c
index b5580d422994..a1ad0e7180d2 100644
--- a/net/ipv4/igmp.c
+++ b/net/ipv4/igmp.c
@@ -1244,6 +1244,7 @@ void ip_mc_inc_group(struct in_device *in_dev, __be32 addr)
 out:
 	return;
 }
+EXPORT_SYMBOL(ip_mc_inc_group);
 
 /*
  *	Resend IGMP JOIN report; used for bonding.
@@ -1266,6 +1267,7 @@ void ip_mc_rejoin_group(struct ip_mc_list *im)
 	igmp_ifc_event(in_dev);
 #endif
 }
+EXPORT_SYMBOL(ip_mc_rejoin_group);
 
 /*
  *	A socket has left a multicast group on device dev
@@ -1296,6 +1298,7 @@ void ip_mc_dec_group(struct in_device *in_dev, __be32 addr)
 		}
 	}
 }
+EXPORT_SYMBOL(ip_mc_dec_group);
 
 /* Device changing type */
 
@@ -1804,6 +1807,7 @@ done:
 	rtnl_unlock();
 	return err;
 }
+EXPORT_SYMBOL(ip_mc_join_group);
 
 static void ip_sf_socklist_reclaim(struct rcu_head *rp)
 {
@@ -2676,8 +2680,3 @@ int __init igmp_mc_proc_init(void)
 	return register_pernet_subsys(&igmp_net_ops);
 }
 #endif
-
-EXPORT_SYMBOL(ip_mc_dec_group);
-EXPORT_SYMBOL(ip_mc_inc_group);
-EXPORT_SYMBOL(ip_mc_join_group);
-EXPORT_SYMBOL(ip_mc_rejoin_group);
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
index 57c9e4d7b805..7174370b1195 100644
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -84,7 +84,6 @@ int inet_csk_bind_conflict(const struct sock *sk,
 	}
 	return node != NULL;
 }
-
 EXPORT_SYMBOL_GPL(inet_csk_bind_conflict);
 
 /* Obtain a reference to a local port for the given sock,
@@ -212,7 +211,6 @@ fail:
 	local_bh_enable();
 	return ret;
 }
-
 EXPORT_SYMBOL_GPL(inet_csk_get_port);
 
 /*
@@ -305,7 +303,6 @@ out_err:
 	*err = error;
 	goto out;
 }
-
 EXPORT_SYMBOL(inet_csk_accept);
 
 /*
@@ -327,7 +324,6 @@ void inet_csk_init_xmit_timers(struct sock *sk,
 	setup_timer(&sk->sk_timer, keepalive_handler, (unsigned long)sk);
 	icsk->icsk_pending = icsk->icsk_ack.pending = 0;
 }
-
 EXPORT_SYMBOL(inet_csk_init_xmit_timers);
 
 void inet_csk_clear_xmit_timers(struct sock *sk)
@@ -340,21 +336,18 @@ void inet_csk_clear_xmit_timers(struct sock *sk)
 	sk_stop_timer(sk, &icsk->icsk_delack_timer);
 	sk_stop_timer(sk, &sk->sk_timer);
 }
-
 EXPORT_SYMBOL(inet_csk_clear_xmit_timers);
 
 void inet_csk_delete_keepalive_timer(struct sock *sk)
 {
 	sk_stop_timer(sk, &sk->sk_timer);
 }
-
 EXPORT_SYMBOL(inet_csk_delete_keepalive_timer);
 
 void inet_csk_reset_keepalive_timer(struct sock *sk, unsigned long len)
 {
 	sk_reset_timer(sk, &sk->sk_timer, jiffies + len);
 }
-
 EXPORT_SYMBOL(inet_csk_reset_keepalive_timer);
 
 struct dst_entry *inet_csk_route_req(struct sock *sk,
@@ -391,7 +384,6 @@ no_route:
 	IP_INC_STATS_BH(net, IPSTATS_MIB_OUTNOROUTES);
 	return NULL;
 }
-
 EXPORT_SYMBOL_GPL(inet_csk_route_req);
 
 static inline u32 inet_synq_hash(const __be32 raddr, const __be16 rport,
@@ -433,7 +425,6 @@ struct request_sock *inet_csk_search_req(const struct sock *sk,
 
 	return req;
 }
-
 EXPORT_SYMBOL_GPL(inet_csk_search_req);
 
 void inet_csk_reqsk_queue_hash_add(struct sock *sk, struct request_sock *req,
@@ -447,11 +438,11 @@ void inet_csk_reqsk_queue_hash_add(struct sock *sk, struct request_sock *req,
 	reqsk_queue_hash_req(&icsk->icsk_accept_queue, h, req, timeout);
 	inet_csk_reqsk_queue_added(sk, timeout);
 }
+EXPORT_SYMBOL_GPL(inet_csk_reqsk_queue_hash_add);
 
 /* Only thing we need from tcp.h */
 extern int sysctl_tcp_synack_retries;
 
-EXPORT_SYMBOL_GPL(inet_csk_reqsk_queue_hash_add);
 
 /* Decide when to expire the request and when to resend SYN-ACK */
 static inline void syn_ack_recalc(struct request_sock *req, const int thresh,
@@ -569,7 +560,6 @@ void inet_csk_reqsk_queue_prune(struct sock *parent,
 	if (lopt->qlen)
 		inet_csk_reset_keepalive_timer(parent, interval);
 }
-
 EXPORT_SYMBOL_GPL(inet_csk_reqsk_queue_prune);
 
 struct sock *inet_csk_clone(struct sock *sk, const struct request_sock *req,
@@ -599,7 +589,6 @@ struct sock *inet_csk_clone(struct sock *sk, const struct request_sock *req,
 	}
 	return newsk;
 }
-
 EXPORT_SYMBOL_GPL(inet_csk_clone);
 
 /*
@@ -630,7 +619,6 @@ void inet_csk_destroy_sock(struct sock *sk)
 	percpu_counter_dec(sk->sk_prot->orphan_count);
 	sock_put(sk);
 }
-
 EXPORT_SYMBOL(inet_csk_destroy_sock);
 
 int inet_csk_listen_start(struct sock *sk, const int nr_table_entries)
@@ -665,7 +653,6 @@ int inet_csk_listen_start(struct sock *sk, const int nr_table_entries)
 	__reqsk_queue_destroy(&icsk->icsk_accept_queue);
 	return -EADDRINUSE;
 }
-
 EXPORT_SYMBOL_GPL(inet_csk_listen_start);
 
 /*
@@ -720,7 +707,6 @@ void inet_csk_listen_stop(struct sock *sk)
 	}
 	WARN_ON(sk->sk_ack_backlog);
 }
-
 EXPORT_SYMBOL_GPL(inet_csk_listen_stop);
 
 void inet_csk_addr2sockaddr(struct sock *sk, struct sockaddr *uaddr)
@@ -732,7 +718,6 @@ void inet_csk_addr2sockaddr(struct sock *sk, struct sockaddr *uaddr)
 	sin->sin_addr.s_addr	= inet->inet_daddr;
 	sin->sin_port		= inet->inet_dport;
 }
-
 EXPORT_SYMBOL_GPL(inet_csk_addr2sockaddr);
 
 #ifdef CONFIG_COMPAT
@@ -747,7 +732,6 @@ int inet_csk_compat_getsockopt(struct sock *sk, int level, int optname,
 	return icsk->icsk_af_ops->getsockopt(sk, level, optname,
 					     optval, optlen);
 }
-
 EXPORT_SYMBOL_GPL(inet_csk_compat_getsockopt);
 
 int inet_csk_compat_setsockopt(struct sock *sk, int level, int optname,
@@ -761,6 +745,5 @@ int inet_csk_compat_setsockopt(struct sock *sk, int level, int optname,
 	return icsk->icsk_af_ops->setsockopt(sk, level, optname,
 					     optval, optlen);
 }
-
 EXPORT_SYMBOL_GPL(inet_csk_compat_setsockopt);
 #endif
diff --git a/net/ipv4/inet_fragment.c b/net/ipv4/inet_fragment.c
index a2ca6aed763b..5ff2a51b6d0c 100644
--- a/net/ipv4/inet_fragment.c
+++ b/net/ipv4/inet_fragment.c
@@ -114,7 +114,6 @@ void inet_frag_kill(struct inet_frag_queue *fq, struct inet_frags *f)
 		fq->last_in |= INET_FRAG_COMPLETE;
 	}
 }
-
 EXPORT_SYMBOL(inet_frag_kill);
 
 static inline void frag_kfree_skb(struct netns_frags *nf, struct inet_frags *f,
diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c
index d3e160a88219..fb7ad5a21ff3 100644
--- a/net/ipv4/inet_hashtables.c
+++ b/net/ipv4/inet_hashtables.c
@@ -99,7 +99,6 @@ void inet_put_port(struct sock *sk)
 	__inet_put_port(sk);
 	local_bh_enable();
 }
-
 EXPORT_SYMBOL(inet_put_port);
 
 void __inet_inherit_port(struct sock *sk, struct sock *child)
@@ -116,7 +115,6 @@ void __inet_inherit_port(struct sock *sk, struct sock *child)
 	inet_csk(child)->icsk_bind_hash = tb;
 	spin_unlock(&head->lock);
 }
-
 EXPORT_SYMBOL_GPL(__inet_inherit_port);
 
 static inline int compute_score(struct sock *sk, struct net *net,
@@ -546,7 +544,6 @@ int inet_hash_connect(struct inet_timewait_death_row *death_row,
 	return __inet_hash_connect(death_row, sk, inet_sk_port_offset(sk),
 			__inet_check_established, __inet_hash_nolisten);
 }
-
 EXPORT_SYMBOL_GPL(inet_hash_connect);
 
 void inet_hashinfo_init(struct inet_hashinfo *h)
@@ -560,5 +557,4 @@ void inet_hashinfo_init(struct inet_hashinfo *h)
 				      i + LISTENING_NULLS_BASE);
 		}
 }
-
 EXPORT_SYMBOL_GPL(inet_hashinfo_init);
diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c
index dd0dbf0c6b7f..b7c41654dde5 100644
--- a/net/ipv4/ip_fragment.c
+++ b/net/ipv4/ip_fragment.c
@@ -632,6 +632,7 @@ int ip_defrag(struct sk_buff *skb, u32 user)
 	kfree_skb(skb);
 	return -ENOMEM;
 }
+EXPORT_SYMBOL(ip_defrag);
 
 #ifdef CONFIG_SYSCTL
 static int zero;
@@ -785,5 +786,3 @@ void __init ipfrag_init(void)
 	ip4_frags.secret_interval = 10 * 60 * HZ;
 	inet_frags_init(&ip4_frags);
 }
-
-EXPORT_SYMBOL(ip_defrag);
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index 663cb2acb39e..6652bd9da676 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -89,6 +89,7 @@ __inline__ void ip_send_check(struct iphdr *iph)
 	iph->check = 0;
 	iph->check = ip_fast_csum((unsigned char *)iph, iph->ihl);
 }
+EXPORT_SYMBOL(ip_send_check);
 
 int __ip_local_out(struct sk_buff *skb)
 {
@@ -172,7 +173,6 @@ int ip_build_and_send_pkt(struct sk_buff *skb, struct sock *sk,
 	/* Send it out. */
 	return ip_local_out(skb);
 }
-
 EXPORT_SYMBOL_GPL(ip_build_and_send_pkt);
 
 static inline int ip_finish_output2(struct sk_buff *skb)
@@ -403,6 +403,7 @@ no_route:
 	kfree_skb(skb);
 	return -EHOSTUNREACH;
 }
+EXPORT_SYMBOL(ip_queue_xmit);
 
 
 static void ip_copy_metadata(struct sk_buff *to, struct sk_buff *from)
@@ -696,7 +697,6 @@ fail:
 	IP_INC_STATS(dev_net(dev), IPSTATS_MIB_FRAGFAILS);
 	return err;
 }
-
 EXPORT_SYMBOL(ip_fragment);
 
 int
@@ -715,6 +715,7 @@ ip_generic_getfrag(void *from, char *to, int offset, int len, int odd, struct sk
 	}
 	return 0;
 }
+EXPORT_SYMBOL(ip_generic_getfrag);
 
 static inline __wsum
 csum_page(struct page *page, int offset, int copy)
@@ -1447,7 +1448,3 @@ void __init ip_init(void)
 	igmp_mc_proc_init();
 #endif
 }
-
-EXPORT_SYMBOL(ip_generic_getfrag);
-EXPORT_SYMBOL(ip_queue_xmit);
-EXPORT_SYMBOL(ip_send_check);
diff --git a/net/ipv4/netfilter/ipt_REJECT.c b/net/ipv4/netfilter/ipt_REJECT.c
index f5f4a888e4ec..bbbd2736c549 100644
--- a/net/ipv4/netfilter/ipt_REJECT.c
+++ b/net/ipv4/netfilter/ipt_REJECT.c
@@ -109,7 +109,7 @@ static void send_reset(struct sk_buff *oldskb, int hook)
 		addr_type = RTN_LOCAL;
 
 	/* ip_route_me_harder expects skb->dst to be set */
-	skb_dst_set(nskb, dst_clone(skb_dst(oldskb)));
+	skb_dst_set_noref(nskb, skb_dst(oldskb));
 
 	if (ip_route_me_harder(nskb, addr_type))
 		goto free_nskb;
diff --git a/net/ipv4/protocol.c b/net/ipv4/protocol.c
index 542f22fc98b3..f2d297351405 100644
--- a/net/ipv4/protocol.c
+++ b/net/ipv4/protocol.c
@@ -52,6 +52,7 @@ int inet_add_protocol(const struct net_protocol *prot, unsigned char protocol)
 
 	return ret;
 }
+EXPORT_SYMBOL(inet_add_protocol);
 
 /*
  *	Remove a protocol from the hash tables.
@@ -76,6 +77,4 @@ int inet_del_protocol(const struct net_protocol *prot, unsigned char protocol)
 
 	return ret;
 }
-
-EXPORT_SYMBOL(inet_add_protocol);
 EXPORT_SYMBOL(inet_del_protocol);
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 03430de46166..562ce92de2a6 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -1324,6 +1324,7 @@ void __ip_select_ident(struct iphdr *iph, struct dst_entry *dst, int more)
 
 	ip_select_fb_ident(iph);
 }
+EXPORT_SYMBOL(__ip_select_ident);
 
 static void rt_del(unsigned hash, struct rtable *rt)
 {
@@ -2735,7 +2736,6 @@ int __ip_route_output_key(struct net *net, struct rtable **rp,
 slow_output:
 	return ip_route_output_slow(net, rp, flp);
 }
-
 EXPORT_SYMBOL_GPL(__ip_route_output_key);
 
 static void ipv4_rt_blackhole_update_pmtu(struct dst_entry *dst, u32 mtu)
@@ -2819,13 +2819,13 @@ int ip_route_output_flow(struct net *net, struct rtable **rp, struct flowi *flp,
 
 	return 0;
 }
-
 EXPORT_SYMBOL_GPL(ip_route_output_flow);
 
 int ip_route_output_key(struct net *net, struct rtable **rp, struct flowi *flp)
 {
 	return ip_route_output_flow(net, rp, flp, NULL, 0);
 }
+EXPORT_SYMBOL(ip_route_output_key);
 
 static int rt_fill_info(struct net *net,
 			struct sk_buff *skb, u32 pid, u32 seq, int event,
@@ -3363,6 +3363,3 @@ void __init ip_static_sysctl_init(void)
 	register_sysctl_paths(ipv4_path, ipv4_skeleton);
 }
 #endif
-
-EXPORT_SYMBOL(__ip_select_ident);
-EXPORT_SYMBOL(ip_route_output_key);
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 4e6ddfbab09e..b8601b7683a6 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -315,7 +315,6 @@ struct tcp_splice_state {
  * is strict, actions are advisory and have some latency.
  */
 int tcp_memory_pressure __read_mostly;
-
 EXPORT_SYMBOL(tcp_memory_pressure);
 
 void tcp_enter_memory_pressure(struct sock *sk)
@@ -325,7 +324,6 @@ void tcp_enter_memory_pressure(struct sock *sk)
 		tcp_memory_pressure = 1;
 	}
 }
-
 EXPORT_SYMBOL(tcp_enter_memory_pressure);
 
 /* Convert seconds to retransmits based on initial and max timeout */
@@ -460,6 +458,7 @@ unsigned int tcp_poll(struct file *file, struct socket *sock, poll_table *wait)
 	}
 	return mask;
 }
+EXPORT_SYMBOL(tcp_poll);
 
 int tcp_ioctl(struct sock *sk, int cmd, unsigned long arg)
 {
@@ -508,6 +507,7 @@ int tcp_ioctl(struct sock *sk, int cmd, unsigned long arg)
 
 	return put_user(answ, (int __user *)arg);
 }
+EXPORT_SYMBOL(tcp_ioctl);
 
 static inline void tcp_mark_push(struct tcp_sock *tp, struct sk_buff *skb)
 {
@@ -675,6 +675,7 @@ ssize_t tcp_splice_read(struct socket *sock, loff_t *ppos,
 
 	return ret;
 }
+EXPORT_SYMBOL(tcp_splice_read);
 
 struct sk_buff *sk_stream_alloc_skb(struct sock *sk, int size, gfp_t gfp)
 {
@@ -873,6 +874,7 @@ ssize_t tcp_sendpage(struct socket *sock, struct page *page, int offset,
 	release_sock(sk);
 	return res;
 }
+EXPORT_SYMBOL(tcp_sendpage);
 
 #define TCP_PAGE(sk)	(sk->sk_sndmsg_page)
 #define TCP_OFF(sk)	(sk->sk_sndmsg_off)
@@ -1121,6 +1123,7 @@ out_err:
 	release_sock(sk);
 	return err;
 }
+EXPORT_SYMBOL(tcp_sendmsg);
 
 /*
  *	Handle reading urgent data. BSD has very simple semantics for
@@ -1380,6 +1383,7 @@ int tcp_read_sock(struct sock *sk, read_descriptor_t *desc,
 		tcp_cleanup_rbuf(sk, copied);
 	return copied;
 }
+EXPORT_SYMBOL(tcp_read_sock);
 
 /*
  *	This routine copies from a sock struct into the user buffer.
@@ -1774,6 +1778,7 @@ recv_urg:
 	err = tcp_recv_urg(sk, msg, len, flags);
 	goto out;
 }
+EXPORT_SYMBOL(tcp_recvmsg);
 
 void tcp_set_state(struct sock *sk, int state)
 {
@@ -1866,6 +1871,7 @@ void tcp_shutdown(struct sock *sk, int how)
 			tcp_send_fin(sk);
 	}
 }
+EXPORT_SYMBOL(tcp_shutdown);
 
 void tcp_close(struct sock *sk, long timeout)
 {
@@ -2029,6 +2035,7 @@ out:
 	local_bh_enable();
 	sock_put(sk);
 }
+EXPORT_SYMBOL(tcp_close);
 
 /* These states need RST on ABORT according to RFC793 */
 
@@ -2102,6 +2109,7 @@ int tcp_disconnect(struct sock *sk, int flags)
 	sk->sk_error_report(sk);
 	return err;
 }
+EXPORT_SYMBOL(tcp_disconnect);
 
 /*
  *	Socket option code for TCP.
@@ -2400,6 +2408,7 @@ int tcp_setsockopt(struct sock *sk, int level, int optname, char __user *optval,
 						     optval, optlen);
 	return do_tcp_setsockopt(sk, level, optname, optval, optlen);
 }
+EXPORT_SYMBOL(tcp_setsockopt);
 
 #ifdef CONFIG_COMPAT
 int compat_tcp_setsockopt(struct sock *sk, int level, int optname,
@@ -2410,7 +2419,6 @@ int compat_tcp_setsockopt(struct sock *sk, int level, int optname,
 						  optval, optlen);
 	return do_tcp_setsockopt(sk, level, optname, optval, optlen);
 }
-
 EXPORT_SYMBOL(compat_tcp_setsockopt);
 #endif
 
@@ -2476,7 +2484,6 @@ void tcp_get_info(struct sock *sk, struct tcp_info *info)
 
 	info->tcpi_total_retrans = tp->total_retrans;
 }
-
 EXPORT_SYMBOL_GPL(tcp_get_info);
 
 static int do_tcp_getsockopt(struct sock *sk, int level,
@@ -2615,6 +2622,7 @@ int tcp_getsockopt(struct sock *sk, int level, int optname, char __user *optval,
 						     optval, optlen);
 	return do_tcp_getsockopt(sk, level, optname, optval, optlen);
 }
+EXPORT_SYMBOL(tcp_getsockopt);
 
 #ifdef CONFIG_COMPAT
 int compat_tcp_getsockopt(struct sock *sk, int level, int optname,
@@ -2625,7 +2633,6 @@ int compat_tcp_getsockopt(struct sock *sk, int level, int optname,
 						  optval, optlen);
 	return do_tcp_getsockopt(sk, level, optname, optval, optlen);
 }
-
 EXPORT_SYMBOL(compat_tcp_getsockopt);
 #endif
 
@@ -2862,7 +2869,6 @@ void tcp_free_md5sig_pool(void)
 	if (pool)
 		__tcp_free_md5sig_pool(pool);
 }
-
 EXPORT_SYMBOL(tcp_free_md5sig_pool);
 
 static struct tcp_md5sig_pool * __percpu *
@@ -2938,7 +2944,6 @@ retry:
 	}
 	return pool;
 }
-
 EXPORT_SYMBOL(tcp_alloc_md5sig_pool);
 
 
@@ -2990,7 +2995,6 @@ int tcp_md5_hash_header(struct tcp_md5sig_pool *hp,
 	th->check = old_checksum;
 	return err;
 }
-
 EXPORT_SYMBOL(tcp_md5_hash_header);
 
 int tcp_md5_hash_skb_data(struct tcp_md5sig_pool *hp,
@@ -3024,7 +3028,6 @@ int tcp_md5_hash_skb_data(struct tcp_md5sig_pool *hp,
 
 	return 0;
 }
-
 EXPORT_SYMBOL(tcp_md5_hash_skb_data);
 
 int tcp_md5_hash_key(struct tcp_md5sig_pool *hp, struct tcp_md5sig_key *key)
@@ -3034,7 +3037,6 @@ int tcp_md5_hash_key(struct tcp_md5sig_pool *hp, struct tcp_md5sig_key *key)
 	sg_init_one(&sg, key->key, key->keylen);
 	return crypto_hash_update(&hp->md5_desc, &sg, key->keylen);
 }
-
 EXPORT_SYMBOL(tcp_md5_hash_key);
 
 #endif
@@ -3306,16 +3308,3 @@ void __init tcp_init(void)
 	tcp_secret_retiring = &tcp_secret_two;
 	tcp_secret_secondary = &tcp_secret_two;
 }
-
-EXPORT_SYMBOL(tcp_close);
-EXPORT_SYMBOL(tcp_disconnect);
-EXPORT_SYMBOL(tcp_getsockopt);
-EXPORT_SYMBOL(tcp_ioctl);
-EXPORT_SYMBOL(tcp_poll);
-EXPORT_SYMBOL(tcp_read_sock);
-EXPORT_SYMBOL(tcp_recvmsg);
-EXPORT_SYMBOL(tcp_sendmsg);
-EXPORT_SYMBOL(tcp_splice_read);
-EXPORT_SYMBOL(tcp_sendpage);
-EXPORT_SYMBOL(tcp_setsockopt);
-EXPORT_SYMBOL(tcp_shutdown);
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 04334661fa28..3c426cb318e7 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -78,10 +78,13 @@ int sysctl_tcp_window_scaling __read_mostly = 1;
 int sysctl_tcp_sack __read_mostly = 1;
 int sysctl_tcp_fack __read_mostly = 1;
 int sysctl_tcp_reordering __read_mostly = TCP_FASTRETRANS_THRESH;
+EXPORT_SYMBOL(sysctl_tcp_reordering);
 int sysctl_tcp_ecn __read_mostly = 2;
+EXPORT_SYMBOL(sysctl_tcp_ecn);
 int sysctl_tcp_dsack __read_mostly = 1;
 int sysctl_tcp_app_win __read_mostly = 31;
 int sysctl_tcp_adv_win_scale __read_mostly = 2;
+EXPORT_SYMBOL(sysctl_tcp_adv_win_scale);
 
 int sysctl_tcp_stdurg __read_mostly;
 int sysctl_tcp_rfc1337 __read_mostly;
@@ -419,6 +422,7 @@ void tcp_initialize_rcv_mss(struct sock *sk)
 
 	inet_csk(sk)->icsk_ack.rcv_mss = hint;
 }
+EXPORT_SYMBOL(tcp_initialize_rcv_mss);
 
 /* Receiver "autotuning" code.
  *
@@ -2938,6 +2942,7 @@ void tcp_simple_retransmit(struct sock *sk)
 	}
 	tcp_xmit_retransmit_queue(sk);
 }
+EXPORT_SYMBOL(tcp_simple_retransmit);
 
 /* Process an event, which can update packets-in-flight not trivially.
  * Main goal of this function is to calculate new estimate for left_out,
@@ -3858,6 +3863,7 @@ void tcp_parse_options(struct sk_buff *skb, struct tcp_options_received *opt_rx,
 		}
 	}
 }
+EXPORT_SYMBOL(tcp_parse_options);
 
 static int tcp_parse_aligned_timestamp(struct tcp_sock *tp, struct tcphdr *th)
 {
@@ -3931,6 +3937,7 @@ u8 *tcp_parse_md5sig_option(struct tcphdr *th)
 	}
 	return NULL;
 }
+EXPORT_SYMBOL(tcp_parse_md5sig_option);
 #endif
 
 static inline void tcp_store_ts_recent(struct tcp_sock *tp)
@@ -5432,6 +5439,7 @@ discard:
 	__kfree_skb(skb);
 	return 0;
 }
+EXPORT_SYMBOL(tcp_rcv_established);
 
 static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb,
 					 struct tcphdr *th, unsigned len)
@@ -5931,14 +5939,4 @@ discard:
 	}
 	return 0;
 }
-
-EXPORT_SYMBOL(sysctl_tcp_ecn);
-EXPORT_SYMBOL(sysctl_tcp_reordering);
-EXPORT_SYMBOL(sysctl_tcp_adv_win_scale);
-EXPORT_SYMBOL(tcp_parse_options);
-#ifdef CONFIG_TCP_MD5SIG
-EXPORT_SYMBOL(tcp_parse_md5sig_option);
-#endif
-EXPORT_SYMBOL(tcp_rcv_established);
 EXPORT_SYMBOL(tcp_rcv_state_process);
-EXPORT_SYMBOL(tcp_initialize_rcv_mss);
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 8fa32f5ae2ce..44545e8e8c92 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -84,6 +84,7 @@
 
 int sysctl_tcp_tw_reuse __read_mostly;
 int sysctl_tcp_low_latency __read_mostly;
+EXPORT_SYMBOL(sysctl_tcp_low_latency);
 
 
 #ifdef CONFIG_TCP_MD5SIG
@@ -100,6 +101,7 @@ struct tcp_md5sig_key *tcp_v4_md5_do_lookup(struct sock *sk, __be32 addr)
 #endif
 
 struct inet_hashinfo tcp_hashinfo;
+EXPORT_SYMBOL(tcp_hashinfo);
 
 static inline __u32 tcp_v4_init_sequence(struct sk_buff *skb)
 {
@@ -139,7 +141,6 @@ int tcp_twsk_unique(struct sock *sk, struct sock *sktw, void *twp)
 
 	return 0;
 }
-
 EXPORT_SYMBOL_GPL(tcp_twsk_unique);
 
 /* This will initiate an outgoing connection. */
@@ -267,6 +268,7 @@ failure:
 	inet->inet_dport = 0;
 	return err;
 }
+EXPORT_SYMBOL(tcp_v4_connect);
 
 /*
  * This routine does path mtu discovery as defined in RFC1191.
@@ -545,6 +547,7 @@ void tcp_v4_send_check(struct sock *sk, struct sk_buff *skb)
 
 	__tcp_v4_send_check(skb, inet->inet_saddr, inet->inet_daddr);
 }
+EXPORT_SYMBOL(tcp_v4_send_check);
 
 int tcp_v4_gso_send_check(struct sk_buff *skb)
 {
@@ -860,7 +863,6 @@ struct tcp_md5sig_key *tcp_v4_md5_lookup(struct sock *sk,
 {
 	return tcp_v4_md5_do_lookup(sk, inet_sk(addr_sk)->inet_daddr);
 }
-
 EXPORT_SYMBOL(tcp_v4_md5_lookup);
 
 static struct tcp_md5sig_key *tcp_v4_reqsk_md5_lookup(struct sock *sk,
@@ -927,7 +929,6 @@ int tcp_v4_md5_do_add(struct sock *sk, __be32 addr,
 	}
 	return 0;
 }
-
 EXPORT_SYMBOL(tcp_v4_md5_do_add);
 
 static int tcp_v4_md5_add_func(struct sock *sk, struct sock *addr_sk,
@@ -965,7 +966,6 @@ int tcp_v4_md5_do_del(struct sock *sk, __be32 addr)
 	}
 	return -ENOENT;
 }
-
 EXPORT_SYMBOL(tcp_v4_md5_do_del);
 
 static void tcp_v4_clear_md5_list(struct sock *sk)
@@ -1138,7 +1138,6 @@ clear_hash_noput:
 	memset(md5_hash, 0, 16);
 	return 1;
 }
-
 EXPORT_SYMBOL(tcp_v4_md5_hash_skb);
 
 static int tcp_v4_inbound_md5_hash(struct sock *sk, struct sk_buff *skb)
@@ -1396,6 +1395,7 @@ drop_and_free:
 drop:
 	return 0;
 }
+EXPORT_SYMBOL(tcp_v4_conn_request);
 
 
 /*
@@ -1481,6 +1481,7 @@ exit:
 	dst_release(dst);
 	return NULL;
 }
+EXPORT_SYMBOL(tcp_v4_syn_recv_sock);
 
 static struct sock *tcp_v4_hnd_req(struct sock *sk, struct sk_buff *skb)
 {
@@ -1610,6 +1611,7 @@ csum_err:
 	TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_INERRS);
 	goto discard;
 }
+EXPORT_SYMBOL(tcp_v4_do_rcv);
 
 /*
  *	From tcp_input.c
@@ -1796,6 +1798,7 @@ int tcp_v4_remember_stamp(struct sock *sk)
 
 	return 0;
 }
+EXPORT_SYMBOL(tcp_v4_remember_stamp);
 
 int tcp_v4_tw_remember_stamp(struct inet_timewait_sock *tw)
 {
@@ -1835,6 +1838,7 @@ const struct inet_connection_sock_af_ops ipv4_specific = {
 	.compat_getsockopt = compat_ip_getsockopt,
 #endif
 };
+EXPORT_SYMBOL(ipv4_specific);
 
 #ifdef CONFIG_TCP_MD5SIG
 static const struct tcp_sock_af_ops tcp_sock_ipv4_specific = {
@@ -1963,7 +1967,6 @@ void tcp_v4_destroy_sock(struct sock *sk)
 
 	percpu_counter_dec(&tcp_sockets_allocated);
 }
-
 EXPORT_SYMBOL(tcp_v4_destroy_sock);
 
 #ifdef CONFIG_PROC_FS
@@ -2367,11 +2370,13 @@ int tcp_proc_register(struct net *net, struct tcp_seq_afinfo *afinfo)
 		rc = -ENOMEM;
 	return rc;
 }
+EXPORT_SYMBOL(tcp_proc_register);
 
 void tcp_proc_unregister(struct net *net, struct tcp_seq_afinfo *afinfo)
 {
 	proc_net_remove(net, afinfo->name);
 }
+EXPORT_SYMBOL(tcp_proc_unregister);
 
 static void get_openreq4(struct sock *sk, struct request_sock *req,
 			 struct seq_file *f, int i, int uid, int *len)
@@ -2618,6 +2623,7 @@ struct proto tcp_prot = {
 	.compat_getsockopt	= compat_tcp_getsockopt,
 #endif
 };
+EXPORT_SYMBOL(tcp_prot);
 
 
 static int __net_init tcp_sk_init(struct net *net)
@@ -2648,20 +2654,3 @@ void __init tcp_v4_init(void)
 	if (register_pernet_subsys(&tcp_sk_ops))
 		panic("Failed to create the TCP control socket.\n");
 }
-
-EXPORT_SYMBOL(ipv4_specific);
-EXPORT_SYMBOL(tcp_hashinfo);
-EXPORT_SYMBOL(tcp_prot);
-EXPORT_SYMBOL(tcp_v4_conn_request);
-EXPORT_SYMBOL(tcp_v4_connect);
-EXPORT_SYMBOL(tcp_v4_do_rcv);
-EXPORT_SYMBOL(tcp_v4_remember_stamp);
-EXPORT_SYMBOL(tcp_v4_send_check);
-EXPORT_SYMBOL(tcp_v4_syn_recv_sock);
-
-#ifdef CONFIG_PROC_FS
-EXPORT_SYMBOL(tcp_proc_register);
-EXPORT_SYMBOL(tcp_proc_unregister);
-#endif
-EXPORT_SYMBOL(sysctl_tcp_low_latency);
-
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index 794c2e122a41..f25b56cb85cb 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -47,7 +47,6 @@ struct inet_timewait_death_row tcp_death_row = {
 	.twcal_timer	= TIMER_INITIALIZER(inet_twdr_twcal_tick, 0,
 					    (unsigned long)&tcp_death_row),
 };
-
 EXPORT_SYMBOL_GPL(tcp_death_row);
 
 static __inline__ int tcp_in_window(u32 seq, u32 end_seq, u32 s_win, u32 e_win)
@@ -262,6 +261,7 @@ kill:
 	inet_twsk_put(tw);
 	return TCP_TW_SUCCESS;
 }
+EXPORT_SYMBOL(tcp_timewait_state_process);
 
 /*
  * Move a socket to time-wait or dead fin-wait-2 state.
@@ -362,7 +362,6 @@ void tcp_twsk_destructor(struct sock *sk)
 		tcp_free_md5sig_pool();
 #endif
 }
-
 EXPORT_SYMBOL_GPL(tcp_twsk_destructor);
 
 static inline void TCP_ECN_openreq_child(struct tcp_sock *tp,
@@ -510,6 +509,7 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct request_sock *req,
 	}
 	return newsk;
 }
+EXPORT_SYMBOL(tcp_create_openreq_child);
 
 /*
  *	Process an incoming packet for SYN_RECV sockets represented
@@ -706,6 +706,7 @@ embryonic_reset:
 	inet_csk_reqsk_queue_drop(sk, req, prev);
 	return NULL;
 }
+EXPORT_SYMBOL(tcp_check_req);
 
 /*
  * Queue segment on the new socket if the new socket is active,
@@ -737,8 +738,4 @@ int tcp_child_process(struct sock *parent, struct sock *child,
 	sock_put(child);
 	return ret;
 }
-
-EXPORT_SYMBOL(tcp_check_req);
 EXPORT_SYMBOL(tcp_child_process);
-EXPORT_SYMBOL(tcp_create_openreq_child);
-EXPORT_SYMBOL(tcp_timewait_state_process);
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 25ff62e35a68..b3f6f099b1a3 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -247,6 +247,7 @@ void tcp_select_initial_window(int __space, __u32 mss,
 	/* Set the clamp no higher than max representable value */
 	(*window_clamp) = min(65535U << (*rcv_wscale), *window_clamp);
 }
+EXPORT_SYMBOL(tcp_select_initial_window);
 
 /* Chose a new window to advertise, update state in tcp_sock for the
  * socket, and return result with RFC1323 scaling applied.  The return
@@ -1189,6 +1190,7 @@ void tcp_mtup_init(struct sock *sk)
 	icsk->icsk_mtup.search_low = tcp_mss_to_mtu(sk, sysctl_tcp_base_mss);
 	icsk->icsk_mtup.probe_size = 0;
 }
+EXPORT_SYMBOL(tcp_mtup_init);
 
 /* This function synchronize snd mss to current pmtu/exthdr set.
 
@@ -1232,6 +1234,7 @@ unsigned int tcp_sync_mss(struct sock *sk, u32 pmtu)
 
 	return mss_now;
 }
+EXPORT_SYMBOL(tcp_sync_mss);
 
 /* Compute the current effective MSS, taking SACKs and IP options,
  * and even PMTU discovery events into account.
@@ -2514,6 +2517,7 @@ struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst,
 
 	return skb;
 }
+EXPORT_SYMBOL(tcp_make_synack);
 
 /* Do all connect socket setups that can be done AF independent. */
 static void tcp_connect_init(struct sock *sk)
@@ -2616,6 +2620,7 @@ int tcp_connect(struct sock *sk)
 				  inet_csk(sk)->icsk_rto, TCP_RTO_MAX);
 	return 0;
 }
+EXPORT_SYMBOL(tcp_connect);
 
 /* Send out a delayed ack, the caller does the policy checking
  * to see if we should even be here.  See tcp_input.c:tcp_ack_snd_check()
@@ -2820,10 +2825,3 @@ void tcp_send_probe0(struct sock *sk)
 					  TCP_RTO_MAX);
 	}
 }
-
-EXPORT_SYMBOL(tcp_select_initial_window);
-EXPORT_SYMBOL(tcp_connect);
-EXPORT_SYMBOL(tcp_make_synack);
-EXPORT_SYMBOL(tcp_simple_retransmit);
-EXPORT_SYMBOL(tcp_sync_mss);
-EXPORT_SYMBOL(tcp_mtup_init);
diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c
index 440a5c6004f6..808bb920c9f5 100644
--- a/net/ipv4/tcp_timer.c
+++ b/net/ipv4/tcp_timer.c
@@ -41,7 +41,6 @@ void tcp_init_xmit_timers(struct sock *sk)
 	inet_csk_init_xmit_timers(sk, &tcp_write_timer, &tcp_delack_timer,
 				  &tcp_keepalive_timer);
 }
-
 EXPORT_SYMBOL(tcp_init_xmit_timers);
 
 static void tcp_write_err(struct sock *sk)
diff --git a/net/ipv4/tunnel4.c b/net/ipv4/tunnel4.c
index 3b3813cc80b9..59186ca7808a 100644
--- a/net/ipv4/tunnel4.c
+++ b/net/ipv4/tunnel4.c
@@ -48,7 +48,6 @@ err:
 
 	return ret;
 }
-
 EXPORT_SYMBOL(xfrm4_tunnel_register);
 
 int xfrm4_tunnel_deregister(struct xfrm_tunnel *handler, unsigned short family)
@@ -72,7 +71,6 @@ int xfrm4_tunnel_deregister(struct xfrm_tunnel *handler, unsigned short family)
 
 	return ret;
 }
-
 EXPORT_SYMBOL(xfrm4_tunnel_deregister);
 
 static int tunnel4_rcv(struct sk_buff *skb)
diff --git a/net/ipv4/udplite.c b/net/ipv4/udplite.c
index 6610bf76369f..ab76aa928fa9 100644
--- a/net/ipv4/udplite.c
+++ b/net/ipv4/udplite.c
@@ -58,6 +58,7 @@ struct proto 	udplite_prot = {
 	.compat_getsockopt = compat_udp_getsockopt,
 #endif
 };
+EXPORT_SYMBOL(udplite_prot);
 
 static struct inet_protosw udplite4_protosw = {
 	.type		=  SOCK_DGRAM,
@@ -127,5 +128,3 @@ out_unregister_proto:
 out_register_err:
 	printk(KERN_CRIT "%s: Cannot add UDP-Lite protocol.\n", __func__);
 }
-
-EXPORT_SYMBOL(udplite_prot);
diff --git a/net/ipv4/xfrm4_input.c b/net/ipv4/xfrm4_input.c
index ad8fbb871aa0..06814b6216dc 100644
--- a/net/ipv4/xfrm4_input.c
+++ b/net/ipv4/xfrm4_input.c
@@ -163,5 +163,4 @@ int xfrm4_rcv(struct sk_buff *skb)
 {
 	return xfrm4_rcv_spi(skb, ip_hdr(skb)->protocol, 0);
 }
-
 EXPORT_SYMBOL(xfrm4_rcv);
-- 
cgit v1.2.2


From 9e34a5b51684bc90ac827ec4ba339f3892632eac Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Fri, 9 Jul 2010 21:22:04 +0000
Subject: net/core: EXPORT_SYMBOL cleanups

CodingStyle cleanups

EXPORT_SYMBOL should immediately follow the symbol declaration.

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/datagram.c   |  8 +++-----
 net/core/flow.c       |  5 ++---
 net/core/gen_stats.c  | 14 ++++++--------
 net/core/iovec.c      |  9 ++++-----
 net/core/link_watch.c |  1 -
 net/core/net-sysfs.c  |  3 +--
 net/core/netevent.c   |  5 ++---
 net/core/netpoll.c    | 19 +++++++++----------
 net/core/rtnetlink.c  |  2 +-
 net/core/scm.c        |  9 ++++-----
 net/core/skbuff.c     |  1 -
 net/core/stream.c     |  6 ------
 net/core/utils.c      |  3 ---
 13 files changed, 32 insertions(+), 53 deletions(-)

(limited to 'net')

diff --git a/net/core/datagram.c b/net/core/datagram.c
index f5b6f43a4c2e..251997a95483 100644
--- a/net/core/datagram.c
+++ b/net/core/datagram.c
@@ -219,6 +219,7 @@ struct sk_buff *skb_recv_datagram(struct sock *sk, unsigned flags,
 	return __skb_recv_datagram(sk, flags | (noblock ? MSG_DONTWAIT : 0),
 				   &peeked, err);
 }
+EXPORT_SYMBOL(skb_recv_datagram);
 
 void skb_free_datagram(struct sock *sk, struct sk_buff *skb)
 {
@@ -288,7 +289,6 @@ int skb_kill_datagram(struct sock *sk, struct sk_buff *skb, unsigned int flags)
 
 	return err;
 }
-
 EXPORT_SYMBOL(skb_kill_datagram);
 
 /**
@@ -373,6 +373,7 @@ int skb_copy_datagram_iovec(const struct sk_buff *skb, int offset,
 fault:
 	return -EFAULT;
 }
+EXPORT_SYMBOL(skb_copy_datagram_iovec);
 
 /**
  *	skb_copy_datagram_const_iovec - Copy a datagram to an iovec.
@@ -716,6 +717,7 @@ csum_error:
 fault:
 	return -EFAULT;
 }
+EXPORT_SYMBOL(skb_copy_and_csum_datagram_iovec);
 
 /**
  * 	datagram_poll - generic datagram poll
@@ -770,8 +772,4 @@ unsigned int datagram_poll(struct file *file, struct socket *sock,
 
 	return mask;
 }
-
 EXPORT_SYMBOL(datagram_poll);
-EXPORT_SYMBOL(skb_copy_and_csum_datagram_iovec);
-EXPORT_SYMBOL(skb_copy_datagram_iovec);
-EXPORT_SYMBOL(skb_recv_datagram);
diff --git a/net/core/flow.c b/net/core/flow.c
index 8c7c91a32f18..f67dcbfe54ef 100644
--- a/net/core/flow.c
+++ b/net/core/flow.c
@@ -62,6 +62,7 @@ struct flow_cache {
 };
 
 atomic_t flow_cache_genid = ATOMIC_INIT(0);
+EXPORT_SYMBOL(flow_cache_genid);
 static struct flow_cache flow_cache_global;
 static struct kmem_cache *flow_cachep;
 
@@ -291,6 +292,7 @@ ret_object:
 	local_bh_enable();
 	return flo;
 }
+EXPORT_SYMBOL(flow_cache_lookup);
 
 static void flow_cache_flush_tasklet(unsigned long data)
 {
@@ -424,6 +426,3 @@ static int __init flow_cache_init_global(void)
 }
 
 module_init(flow_cache_init_global);
-
-EXPORT_SYMBOL(flow_cache_genid);
-EXPORT_SYMBOL(flow_cache_lookup);
diff --git a/net/core/gen_stats.c b/net/core/gen_stats.c
index 393b1d8618e2..0452eb27a272 100644
--- a/net/core/gen_stats.c
+++ b/net/core/gen_stats.c
@@ -73,6 +73,7 @@ gnet_stats_start_copy_compat(struct sk_buff *skb, int type, int tc_stats_type,
 
 	return 0;
 }
+EXPORT_SYMBOL(gnet_stats_start_copy_compat);
 
 /**
  * gnet_stats_start_copy_compat - start dumping procedure in compatibility mode
@@ -93,6 +94,7 @@ gnet_stats_start_copy(struct sk_buff *skb, int type, spinlock_t *lock,
 {
 	return gnet_stats_start_copy_compat(skb, type, 0, 0, lock, d);
 }
+EXPORT_SYMBOL(gnet_stats_start_copy);
 
 /**
  * gnet_stats_copy_basic - copy basic statistics into statistic TLV
@@ -123,6 +125,7 @@ gnet_stats_copy_basic(struct gnet_dump *d, struct gnet_stats_basic_packed *b)
 	}
 	return 0;
 }
+EXPORT_SYMBOL(gnet_stats_copy_basic);
 
 /**
  * gnet_stats_copy_rate_est - copy rate estimator statistics into statistics TLV
@@ -154,6 +157,7 @@ gnet_stats_copy_rate_est(struct gnet_dump *d,
 
 	return 0;
 }
+EXPORT_SYMBOL(gnet_stats_copy_rate_est);
 
 /**
  * gnet_stats_copy_queue - copy queue statistics into statistics TLV
@@ -181,6 +185,7 @@ gnet_stats_copy_queue(struct gnet_dump *d, struct gnet_stats_queue *q)
 
 	return 0;
 }
+EXPORT_SYMBOL(gnet_stats_copy_queue);
 
 /**
  * gnet_stats_copy_app - copy application specific statistics into statistics TLV
@@ -208,6 +213,7 @@ gnet_stats_copy_app(struct gnet_dump *d, void *st, int len)
 
 	return 0;
 }
+EXPORT_SYMBOL(gnet_stats_copy_app);
 
 /**
  * gnet_stats_finish_copy - finish dumping procedure
@@ -241,12 +247,4 @@ gnet_stats_finish_copy(struct gnet_dump *d)
 	spin_unlock_bh(d->lock);
 	return 0;
 }
-
-
-EXPORT_SYMBOL(gnet_stats_start_copy);
-EXPORT_SYMBOL(gnet_stats_start_copy_compat);
-EXPORT_SYMBOL(gnet_stats_copy_basic);
-EXPORT_SYMBOL(gnet_stats_copy_rate_est);
-EXPORT_SYMBOL(gnet_stats_copy_queue);
-EXPORT_SYMBOL(gnet_stats_copy_app);
 EXPORT_SYMBOL(gnet_stats_finish_copy);
diff --git a/net/core/iovec.c b/net/core/iovec.c
index 1e7f4e91a935..1cd98df412df 100644
--- a/net/core/iovec.c
+++ b/net/core/iovec.c
@@ -95,6 +95,7 @@ int memcpy_toiovec(struct iovec *iov, unsigned char *kdata, int len)
 
 	return 0;
 }
+EXPORT_SYMBOL(memcpy_toiovec);
 
 /*
  *	Copy kernel to iovec. Returns -EFAULT on error.
@@ -120,6 +121,7 @@ int memcpy_toiovecend(const struct iovec *iov, unsigned char *kdata,
 
 	return 0;
 }
+EXPORT_SYMBOL(memcpy_toiovecend);
 
 /*
  *	Copy iovec to kernel. Returns -EFAULT on error.
@@ -144,6 +146,7 @@ int memcpy_fromiovec(unsigned char *kdata, struct iovec *iov, int len)
 
 	return 0;
 }
+EXPORT_SYMBOL(memcpy_fromiovec);
 
 /*
  *	Copy iovec from kernel. Returns -EFAULT on error.
@@ -172,6 +175,7 @@ int memcpy_fromiovecend(unsigned char *kdata, const struct iovec *iov,
 
 	return 0;
 }
+EXPORT_SYMBOL(memcpy_fromiovecend);
 
 /*
  *	And now for the all-in-one: copy and checksum from a user iovec
@@ -256,9 +260,4 @@ out_fault:
 	err = -EFAULT;
 	goto out;
 }
-
 EXPORT_SYMBOL(csum_partial_copy_fromiovecend);
-EXPORT_SYMBOL(memcpy_fromiovec);
-EXPORT_SYMBOL(memcpy_fromiovecend);
-EXPORT_SYMBOL(memcpy_toiovec);
-EXPORT_SYMBOL(memcpy_toiovecend);
diff --git a/net/core/link_watch.c b/net/core/link_watch.c
index bdbce2f5875b..01a1101b5936 100644
--- a/net/core/link_watch.c
+++ b/net/core/link_watch.c
@@ -243,5 +243,4 @@ void linkwatch_fire_event(struct net_device *dev)
 
 	linkwatch_schedule_work(urgent);
 }
-
 EXPORT_SYMBOL(linkwatch_fire_event);
diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c
index 914f42b0f039..d2b596537d41 100644
--- a/net/core/net-sysfs.c
+++ b/net/core/net-sysfs.c
@@ -924,13 +924,12 @@ int netdev_class_create_file(struct class_attribute *class_attr)
 {
 	return class_create_file(&net_class, class_attr);
 }
+EXPORT_SYMBOL(netdev_class_create_file);
 
 void netdev_class_remove_file(struct class_attribute *class_attr)
 {
 	class_remove_file(&net_class, class_attr);
 }
-
-EXPORT_SYMBOL(netdev_class_create_file);
 EXPORT_SYMBOL(netdev_class_remove_file);
 
 int netdev_kobject_init(void)
diff --git a/net/core/netevent.c b/net/core/netevent.c
index 95f81de87502..865f0ceb81fb 100644
--- a/net/core/netevent.c
+++ b/net/core/netevent.c
@@ -35,6 +35,7 @@ int register_netevent_notifier(struct notifier_block *nb)
 	err = atomic_notifier_chain_register(&netevent_notif_chain, nb);
 	return err;
 }
+EXPORT_SYMBOL_GPL(register_netevent_notifier);
 
 /**
  *	netevent_unregister_notifier - unregister a netevent notifier block
@@ -50,6 +51,7 @@ int unregister_netevent_notifier(struct notifier_block *nb)
 {
 	return atomic_notifier_chain_unregister(&netevent_notif_chain, nb);
 }
+EXPORT_SYMBOL_GPL(unregister_netevent_notifier);
 
 /**
  *	call_netevent_notifiers - call all netevent notifier blocks
@@ -64,7 +66,4 @@ int call_netevent_notifiers(unsigned long val, void *v)
 {
 	return atomic_notifier_call_chain(&netevent_notif_chain, val, v);
 }
-
-EXPORT_SYMBOL_GPL(register_netevent_notifier);
-EXPORT_SYMBOL_GPL(unregister_netevent_notifier);
 EXPORT_SYMBOL_GPL(call_netevent_notifiers);
diff --git a/net/core/netpoll.c b/net/core/netpoll.c
index ca6dc31843ea..c2b7a8bed8f6 100644
--- a/net/core/netpoll.c
+++ b/net/core/netpoll.c
@@ -197,11 +197,13 @@ void netpoll_poll_dev(struct net_device *dev)
 	service_arp_queue(dev->npinfo);
 
 }
+EXPORT_SYMBOL(netpoll_poll_dev);
 
 void netpoll_poll(struct netpoll *np)
 {
 	netpoll_poll_dev(np->dev);
 }
+EXPORT_SYMBOL(netpoll_poll);
 
 static void refill_skbs(void)
 {
@@ -313,6 +315,7 @@ void netpoll_send_skb(struct netpoll *np, struct sk_buff *skb)
 		schedule_delayed_work(&npinfo->tx_work,0);
 	}
 }
+EXPORT_SYMBOL(netpoll_send_skb);
 
 void netpoll_send_udp(struct netpoll *np, const char *msg, int len)
 {
@@ -374,6 +377,7 @@ void netpoll_send_udp(struct netpoll *np, const char *msg, int len)
 
 	netpoll_send_skb(np, skb);
 }
+EXPORT_SYMBOL(netpoll_send_udp);
 
 static void arp_reply(struct sk_buff *skb)
 {
@@ -600,6 +604,7 @@ void netpoll_print_options(struct netpoll *np)
 	printk(KERN_INFO "%s: remote ethernet address %pM\n",
 	                 np->name, np->remote_mac);
 }
+EXPORT_SYMBOL(netpoll_print_options);
 
 int netpoll_parse_options(struct netpoll *np, char *opt)
 {
@@ -692,6 +697,7 @@ int netpoll_parse_options(struct netpoll *np, char *opt)
 	       np->name, cur);
 	return -1;
 }
+EXPORT_SYMBOL(netpoll_parse_options);
 
 int __netpoll_setup(struct netpoll *np)
 {
@@ -848,6 +854,7 @@ put:
 	dev_put(ndev);
 	return err;
 }
+EXPORT_SYMBOL(netpoll_setup);
 
 static int __init netpoll_init(void)
 {
@@ -908,11 +915,13 @@ void netpoll_cleanup(struct netpoll *np)
 	dev_put(np->dev);
 	np->dev = NULL;
 }
+EXPORT_SYMBOL(netpoll_cleanup);
 
 int netpoll_trap(void)
 {
 	return atomic_read(&trapped);
 }
+EXPORT_SYMBOL(netpoll_trap);
 
 void netpoll_set_trap(int trap)
 {
@@ -921,14 +930,4 @@ void netpoll_set_trap(int trap)
 	else
 		atomic_dec(&trapped);
 }
-
-EXPORT_SYMBOL(netpoll_send_skb);
 EXPORT_SYMBOL(netpoll_set_trap);
-EXPORT_SYMBOL(netpoll_trap);
-EXPORT_SYMBOL(netpoll_print_options);
-EXPORT_SYMBOL(netpoll_parse_options);
-EXPORT_SYMBOL(netpoll_setup);
-EXPORT_SYMBOL(netpoll_cleanup);
-EXPORT_SYMBOL(netpoll_send_udp);
-EXPORT_SYMBOL(netpoll_poll_dev);
-EXPORT_SYMBOL(netpoll_poll);
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 5e773ea2201d..f78d821bd935 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -686,7 +686,7 @@ static size_t rtnl_port_size(const struct net_device *dev)
 		return port_self_size;
 }
 
-static inline size_t if_nlmsg_size(const struct net_device *dev)
+static noinline size_t if_nlmsg_size(const struct net_device *dev)
 {
 	return NLMSG_ALIGN(sizeof(struct ifinfomsg))
 	       + nla_total_size(IFNAMSIZ) /* IFLA_IFNAME */
diff --git a/net/core/scm.c b/net/core/scm.c
index 681c976307b5..413cab89017d 100644
--- a/net/core/scm.c
+++ b/net/core/scm.c
@@ -130,6 +130,7 @@ void __scm_destroy(struct scm_cookie *scm)
 		}
 	}
 }
+EXPORT_SYMBOL(__scm_destroy);
 
 int __scm_send(struct socket *sock, struct msghdr *msg, struct scm_cookie *p)
 {
@@ -211,6 +212,7 @@ error:
 	scm_destroy(p);
 	return err;
 }
+EXPORT_SYMBOL(__scm_send);
 
 int put_cmsg(struct msghdr * msg, int level, int type, int len, void *data)
 {
@@ -249,6 +251,7 @@ int put_cmsg(struct msghdr * msg, int level, int type, int len, void *data)
 out:
 	return err;
 }
+EXPORT_SYMBOL(put_cmsg);
 
 void scm_detach_fds(struct msghdr *msg, struct scm_cookie *scm)
 {
@@ -318,6 +321,7 @@ void scm_detach_fds(struct msghdr *msg, struct scm_cookie *scm)
 	 */
 	__scm_destroy(scm);
 }
+EXPORT_SYMBOL(scm_detach_fds);
 
 struct scm_fp_list *scm_fp_dup(struct scm_fp_list *fpl)
 {
@@ -335,9 +339,4 @@ struct scm_fp_list *scm_fp_dup(struct scm_fp_list *fpl)
 	}
 	return new_fpl;
 }
-
-EXPORT_SYMBOL(__scm_destroy);
-EXPORT_SYMBOL(__scm_send);
-EXPORT_SYMBOL(put_cmsg);
-EXPORT_SYMBOL(scm_detach_fds);
 EXPORT_SYMBOL(scm_fp_dup);
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 34432b4e96bb..76d33ca5f037 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -2483,7 +2483,6 @@ unsigned char *skb_pull_rcsum(struct sk_buff *skb, unsigned int len)
 	skb_postpull_rcsum(skb, skb->data, len);
 	return skb->data += len;
 }
-
 EXPORT_SYMBOL_GPL(skb_pull_rcsum);
 
 /**
diff --git a/net/core/stream.c b/net/core/stream.c
index cc196f42b8d8..d959e0f41528 100644
--- a/net/core/stream.c
+++ b/net/core/stream.c
@@ -43,7 +43,6 @@ void sk_stream_write_space(struct sock *sk)
 		rcu_read_unlock();
 	}
 }
-
 EXPORT_SYMBOL(sk_stream_write_space);
 
 /**
@@ -81,7 +80,6 @@ int sk_stream_wait_connect(struct sock *sk, long *timeo_p)
 	} while (!done);
 	return 0;
 }
-
 EXPORT_SYMBOL(sk_stream_wait_connect);
 
 /**
@@ -109,7 +107,6 @@ void sk_stream_wait_close(struct sock *sk, long timeout)
 		finish_wait(sk_sleep(sk), &wait);
 	}
 }
-
 EXPORT_SYMBOL(sk_stream_wait_close);
 
 /**
@@ -174,7 +171,6 @@ do_interrupted:
 	err = sock_intr_errno(*timeo_p);
 	goto out;
 }
-
 EXPORT_SYMBOL(sk_stream_wait_memory);
 
 int sk_stream_error(struct sock *sk, int flags, int err)
@@ -185,7 +181,6 @@ int sk_stream_error(struct sock *sk, int flags, int err)
 		send_sig(SIGPIPE, current, 0);
 	return err;
 }
-
 EXPORT_SYMBOL(sk_stream_error);
 
 void sk_stream_kill_queues(struct sock *sk)
@@ -210,5 +205,4 @@ void sk_stream_kill_queues(struct sock *sk)
 	 * have gone away, only the net layer knows can touch it.
 	 */
 }
-
 EXPORT_SYMBOL(sk_stream_kill_queues);
diff --git a/net/core/utils.c b/net/core/utils.c
index 838250241d26..f41854470539 100644
--- a/net/core/utils.c
+++ b/net/core/utils.c
@@ -77,7 +77,6 @@ __be32 in_aton(const char *str)
 	}
 	return(htonl(l));
 }
-
 EXPORT_SYMBOL(in_aton);
 
 #define IN6PTON_XDIGIT		0x00010000
@@ -162,7 +161,6 @@ out:
 		*end = s;
 	return ret;
 }
-
 EXPORT_SYMBOL(in4_pton);
 
 int in6_pton(const char *src, int srclen,
@@ -280,7 +278,6 @@ out:
 		*end = s;
 	return ret;
 }
-
 EXPORT_SYMBOL(in6_pton);
 
 void inet_proto_csum_replace4(__sum16 *sum, struct sk_buff *skb,
-- 
cgit v1.2.2


From 643f82e32f14faf0d0944c804203a6681b6b0a1e Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Mon, 12 Jul 2010 14:46:43 +0200
Subject: cfg80211: ignore spurious deauth

Ever since mac80211/drivers are no longer
fully in charge of keeping track of the
auth status, trying to make them do so will
fail. Instead of warning and reporting the
deauthentication to userspace, cfg80211 must
simply ignore it so that spurious
deauthentications, e.g. before starting
authentication, aren't seen by userspace as
actual deauthentications.

Cc: stable@kernel.org
Reported-by: Paul Stewart <pstew@google.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/wireless/mlme.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'net')

diff --git a/net/wireless/mlme.c b/net/wireless/mlme.c
index 9f95354f859f..e74a1a2119d3 100644
--- a/net/wireless/mlme.c
+++ b/net/wireless/mlme.c
@@ -44,10 +44,10 @@ void cfg80211_send_rx_auth(struct net_device *dev, const u8 *buf, size_t len)
 		}
 	}
 
-	WARN_ON(!done);
-
-	nl80211_send_rx_auth(rdev, dev, buf, len, GFP_KERNEL);
-	cfg80211_sme_rx_auth(dev, buf, len);
+	if (done) {
+		nl80211_send_rx_auth(rdev, dev, buf, len, GFP_KERNEL);
+		cfg80211_sme_rx_auth(dev, buf, len);
+	}
 
 	wdev_unlock(wdev);
 }
-- 
cgit v1.2.2


From 70c2efa5a32a7d38e66224844032160317fa7887 Mon Sep 17 00:00:00 2001
From: Changli Gao <xiaosuo@gmail.com>
Date: Fri, 9 Jul 2010 15:33:25 +0000
Subject: act_nat: not all of the ICMP packets need an IP header payload
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

not all of the ICMP packets need an IP header payload, so we check the length
of the skbs only when the packets should have an IP header payload.

Based upon analysis and initial patch by Rodrigo Partearroyo González.

Signed-off-by: Changli Gao <xiaosuo@gmail.com>
Acked-by: Herbert Xu <herbert@gondor.apana.org.au>
----
 net/sched/act_nat.c |    5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/act_nat.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/sched/act_nat.c b/net/sched/act_nat.c
index 570949417f38..724553e8ed7b 100644
--- a/net/sched/act_nat.c
+++ b/net/sched/act_nat.c
@@ -205,7 +205,7 @@ static int tcf_nat(struct sk_buff *skb, struct tc_action *a,
 	{
 		struct icmphdr *icmph;
 
-		if (!pskb_may_pull(skb, ihl + sizeof(*icmph) + sizeof(*iph)))
+		if (!pskb_may_pull(skb, ihl + sizeof(*icmph)))
 			goto drop;
 
 		icmph = (void *)(skb_network_header(skb) + ihl);
@@ -215,6 +215,9 @@ static int tcf_nat(struct sk_buff *skb, struct tc_action *a,
 		    (icmph->type != ICMP_PARAMETERPROB))
 			break;
 
+		if (!pskb_may_pull(skb, ihl + sizeof(*icmph) + sizeof(*iph)))
+			goto drop;
+
 		iph = (void *)(icmph + 1);
 		if (egress)
 			addr = iph->daddr;
-- 
cgit v1.2.2


From 336a283b9cbe47748ccd68fd8c5158f67cee644b Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Mon, 12 Jul 2010 20:03:42 -0700
Subject: dsa: Fix Kconfig dependencies.

Based upon a report by Randy Dunlap.

DSA needs PHYLIB, but PHYLIB needs NET_ETHERNET.  So, in order
to select PHYLIB we have to make DSA depend upon NET_ETHERNET.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/dsa/Kconfig | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/dsa/Kconfig b/net/dsa/Kconfig
index c51b55400dc5..11201784d29a 100644
--- a/net/dsa/Kconfig
+++ b/net/dsa/Kconfig
@@ -1,7 +1,7 @@
 menuconfig NET_DSA
 	bool "Distributed Switch Architecture support"
 	default n
-	depends on EXPERIMENTAL && !S390
+	depends on EXPERIMENTAL && NET_ETHERNET && !S390
 	select PHYLIB
 	---help---
 	  This allows you to use hardware switch chips that use
-- 
cgit v1.2.2


From 5c4bfa17f3ec46becec4b23d12323f7605ebd696 Mon Sep 17 00:00:00 2001
From: Dan Carpenter <error27@gmail.com>
Date: Fri, 9 Jul 2010 23:51:54 +0000
Subject: 9p: strlen() doesn't count the terminator

This is an off by one bug because strlen() doesn't count the NULL
terminator.  We strcpy() addr into a fixed length array of size
UNIX_PATH_MAX later on.

The addr variable is the name of the device being mounted.

CC: stable@kernel.org
Signed-off-by: Dan Carpenter <error27@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/9p/trans_fd.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/9p/trans_fd.c b/net/9p/trans_fd.c
index 98ce9bcb0e15..c85109d809ca 100644
--- a/net/9p/trans_fd.c
+++ b/net/9p/trans_fd.c
@@ -948,7 +948,7 @@ p9_fd_create_unix(struct p9_client *client, const char *addr, char *args)
 
 	csocket = NULL;
 
-	if (strlen(addr) > UNIX_PATH_MAX) {
+	if (strlen(addr) >= UNIX_PATH_MAX) {
 		P9_EPRINTK(KERN_ERR, "p9_trans_unix: address too long: %s\n",
 			addr);
 		return -ENAMETOOLONG;
-- 
cgit v1.2.2


From 7ba42910073f8432934d61a6c08b1023c408fb62 Mon Sep 17 00:00:00 2001
From: Changli Gao <xiaosuo@gmail.com>
Date: Sat, 10 Jul 2010 20:41:55 +0000
Subject: inet, inet6: make tcp_sendmsg() and tcp_sendpage() through
 inet_sendmsg() and inet_sendpage()

a new boolean flag no_autobind is added to structure proto to avoid the autobind
calls when the protocol is TCP. Then sock_rps_record_flow() is called int the
TCP's sendmsg() and sendpage() pathes.

Signed-off-by: Changli Gao <xiaosuo@gmail.com>
----
 include/net/inet_common.h |    4 ++++
 include/net/sock.h        |    1 +
 include/net/tcp.h         |    8 ++++----
 net/ipv4/af_inet.c        |   15 +++++++++------
 net/ipv4/tcp.c            |   11 +++++------
 net/ipv4/tcp_ipv4.c       |    3 +++
 net/ipv6/af_inet6.c       |    8 ++++----
 net/ipv6/tcp_ipv6.c       |    3 +++
 8 files changed, 33 insertions(+), 20 deletions(-)
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/af_inet.c  | 15 +++++++++------
 net/ipv4/tcp.c      | 11 +++++------
 net/ipv4/tcp_ipv4.c |  3 +++
 net/ipv6/af_inet6.c |  8 ++++----
 net/ipv6/tcp_ipv6.c |  3 +++
 5 files changed, 24 insertions(+), 16 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index 3ceb025b16f2..6a1100c25a9f 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -727,28 +727,31 @@ int inet_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg,
 	sock_rps_record_flow(sk);
 
 	/* We may need to bind the socket. */
-	if (!inet_sk(sk)->inet_num && inet_autobind(sk))
+	if (!inet_sk(sk)->inet_num && !sk->sk_prot->no_autobind &&
+	    inet_autobind(sk))
 		return -EAGAIN;
 
 	return sk->sk_prot->sendmsg(iocb, sk, msg, size);
 }
 EXPORT_SYMBOL(inet_sendmsg);
 
-static ssize_t inet_sendpage(struct socket *sock, struct page *page, int offset,
-			     size_t size, int flags)
+ssize_t inet_sendpage(struct socket *sock, struct page *page, int offset,
+		      size_t size, int flags)
 {
 	struct sock *sk = sock->sk;
 
 	sock_rps_record_flow(sk);
 
 	/* We may need to bind the socket. */
-	if (!inet_sk(sk)->inet_num && inet_autobind(sk))
+	if (!inet_sk(sk)->inet_num && !sk->sk_prot->no_autobind &&
+	    inet_autobind(sk))
 		return -EAGAIN;
 
 	if (sk->sk_prot->sendpage)
 		return sk->sk_prot->sendpage(sk, page, offset, size, flags);
 	return sock_no_sendpage(sock, page, offset, size, flags);
 }
+EXPORT_SYMBOL(inet_sendpage);
 
 int inet_recvmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg,
 		 size_t size, int flags)
@@ -894,10 +897,10 @@ const struct proto_ops inet_stream_ops = {
 	.shutdown	   = inet_shutdown,
 	.setsockopt	   = sock_common_setsockopt,
 	.getsockopt	   = sock_common_getsockopt,
-	.sendmsg	   = tcp_sendmsg,
+	.sendmsg	   = inet_sendmsg,
 	.recvmsg	   = inet_recvmsg,
 	.mmap		   = sock_no_mmap,
-	.sendpage	   = tcp_sendpage,
+	.sendpage	   = inet_sendpage,
 	.splice_read	   = tcp_splice_read,
 #ifdef CONFIG_COMPAT
 	.compat_setsockopt = compat_sock_common_setsockopt,
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index b8601b7683a6..9fce8a8a13aa 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -857,15 +857,15 @@ out_err:
 	return sk_stream_error(sk, flags, err);
 }
 
-ssize_t tcp_sendpage(struct socket *sock, struct page *page, int offset,
-		     size_t size, int flags)
+int tcp_sendpage(struct sock *sk, struct page *page, int offset,
+		 size_t size, int flags)
 {
 	ssize_t res;
-	struct sock *sk = sock->sk;
 
 	if (!(sk->sk_route_caps & NETIF_F_SG) ||
 	    !(sk->sk_route_caps & NETIF_F_ALL_CSUM))
-		return sock_no_sendpage(sock, page, offset, size, flags);
+		return sock_no_sendpage(sk->sk_socket, page, offset, size,
+					flags);
 
 	lock_sock(sk);
 	TCP_CHECK_TIMER(sk);
@@ -899,10 +899,9 @@ static inline int select_size(struct sock *sk, int sg)
 	return tmp;
 }
 
-int tcp_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg,
+int tcp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
 		size_t size)
 {
-	struct sock *sk = sock->sk;
 	struct iovec *iov;
 	struct tcp_sock *tp = tcp_sk(sk);
 	struct sk_buff *skb;
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 44545e8e8c92..020766292bb0 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -2600,6 +2600,8 @@ struct proto tcp_prot = {
 	.setsockopt		= tcp_setsockopt,
 	.getsockopt		= tcp_getsockopt,
 	.recvmsg		= tcp_recvmsg,
+	.sendmsg		= tcp_sendmsg,
+	.sendpage		= tcp_sendpage,
 	.backlog_rcv		= tcp_v4_do_rcv,
 	.hash			= inet_hash,
 	.unhash			= inet_unhash,
@@ -2618,6 +2620,7 @@ struct proto tcp_prot = {
 	.twsk_prot		= &tcp_timewait_sock_ops,
 	.rsk_prot		= &tcp_request_sock_ops,
 	.h.hashinfo		= &tcp_hashinfo,
+	.no_autobind		= true,
 #ifdef CONFIG_COMPAT
 	.compat_setsockopt	= compat_tcp_setsockopt,
 	.compat_getsockopt	= compat_tcp_getsockopt,
diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index e830cd4f9d0f..56b9bf2516f4 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -522,10 +522,10 @@ const struct proto_ops inet6_stream_ops = {
 	.shutdown	   = inet_shutdown,		/* ok		*/
 	.setsockopt	   = sock_common_setsockopt,	/* ok		*/
 	.getsockopt	   = sock_common_getsockopt,	/* ok		*/
-	.sendmsg	   = tcp_sendmsg,		/* ok		*/
-	.recvmsg	   = sock_common_recvmsg,	/* ok		*/
+	.sendmsg	   = inet_sendmsg,		/* ok		*/
+	.recvmsg	   = inet_recvmsg,		/* ok		*/
 	.mmap		   = sock_no_mmap,
-	.sendpage	   = tcp_sendpage,
+	.sendpage	   = inet_sendpage,
 	.splice_read	   = tcp_splice_read,
 #ifdef CONFIG_COMPAT
 	.compat_setsockopt = compat_sock_common_setsockopt,
@@ -549,7 +549,7 @@ const struct proto_ops inet6_dgram_ops = {
 	.setsockopt	   = sock_common_setsockopt,	/* ok		*/
 	.getsockopt	   = sock_common_getsockopt,	/* ok		*/
 	.sendmsg	   = inet_sendmsg,		/* ok		*/
-	.recvmsg	   = sock_common_recvmsg,	/* ok		*/
+	.recvmsg	   = inet_recvmsg,		/* ok		*/
 	.mmap		   = sock_no_mmap,
 	.sendpage	   = sock_no_sendpage,
 #ifdef CONFIG_COMPAT
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 5ebc27ecebdc..fe6d40418c0b 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -2142,6 +2142,8 @@ struct proto tcpv6_prot = {
 	.setsockopt		= tcp_setsockopt,
 	.getsockopt		= tcp_getsockopt,
 	.recvmsg		= tcp_recvmsg,
+	.sendmsg		= tcp_sendmsg,
+	.sendpage		= tcp_sendpage,
 	.backlog_rcv		= tcp_v6_do_rcv,
 	.hash			= tcp_v6_hash,
 	.unhash			= inet_unhash,
@@ -2160,6 +2162,7 @@ struct proto tcpv6_prot = {
 	.twsk_prot		= &tcp6_timewait_sock_ops,
 	.rsk_prot		= &tcp6_request_sock_ops,
 	.h.hashinfo		= &tcp_hashinfo,
+	.no_autobind		= true,
 #ifdef CONFIG_COMPAT
 	.compat_setsockopt	= compat_tcp_setsockopt,
 	.compat_getsockopt	= compat_tcp_getsockopt,
-- 
cgit v1.2.2


From d361fd599a991ff6c1d522a599c635b35d61ef30 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Sat, 10 Jul 2010 22:45:17 +0000
Subject: net: sock_free() optimizations

Avoid two extra instructions in sock_free(), to reload
skb->truesize and skb->sk

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/sock.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/core/sock.c b/net/core/sock.c
index fef2434b7c8c..363bc260157c 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -1339,9 +1339,10 @@ EXPORT_SYMBOL(sock_wfree);
 void sock_rfree(struct sk_buff *skb)
 {
 	struct sock *sk = skb->sk;
+	unsigned int len = skb->truesize;
 
-	atomic_sub(skb->truesize, &sk->sk_rmem_alloc);
-	sk_mem_uncharge(skb->sk, skb->truesize);
+	atomic_sub(len, &sk->sk_rmem_alloc);
+	sk_mem_uncharge(sk, len);
 }
 EXPORT_SYMBOL(sock_rfree);
 
-- 
cgit v1.2.2


From 15fd0cd9a2ad24a78fbee369dec8ca660979d57e Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Sun, 11 Jul 2010 11:18:57 +0000
Subject: net: autoconvert trivial BKL users to private mutex

All these files use the big kernel lock in a trivial
way to serialize their private file operations,
typically resulting from an earlier semi-automatic
pushdown from VFS.

None of these drivers appears to want to lock against
other code, and they all use the BKL as the top-level
lock in their file operations, meaning that there
is no lock-order inversion problem.

Consequently, we can remove the BKL completely,
replacing it with a per-file mutex in every case.
Using a scripted approach means we can avoid
typos.

file=$1
name=$2
if grep -q lock_kernel ${file} ; then
    if grep -q 'include.*linux.mutex.h' ${file} ; then
            sed -i '/include.*<linux\/smp_lock.h>/d' ${file}
    else
            sed -i 's/include.*<linux\/smp_lock.h>.*$/include <linux\/mutex.h>/g' ${file}
    fi
    sed -i ${file} \
        -e "/^#include.*linux.mutex.h/,$ {
                1,/^\(static\|int\|long\)/ {
                     /^\(static\|int\|long\)/istatic DEFINE_MUTEX(${name}_mutex);

} }"  \
    -e "s/\(un\)*lock_kernel\>[ ]*()/mutex_\1lock(\&${name}_mutex)/g" \
    -e '/[      ]*cycle_kernel_lock();/d'
else
    sed -i -e '/include.*\<smp_lock.h\>/d' ${file}  \
                -e '/cycle_kernel_lock()/d'
fi

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: netdev@vger.kernel.org
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/wanrouter/wanmain.c | 7 ++++---
 net/wanrouter/wanproc.c | 7 ++++---
 2 files changed, 8 insertions(+), 6 deletions(-)

(limited to 'net')

diff --git a/net/wanrouter/wanmain.c b/net/wanrouter/wanmain.c
index 258daa80ad92..2bf23406637a 100644
--- a/net/wanrouter/wanmain.c
+++ b/net/wanrouter/wanmain.c
@@ -48,7 +48,7 @@
 #include <linux/kernel.h>
 #include <linux/module.h>	/* support for loadable modules */
 #include <linux/slab.h>		/* kmalloc(), kfree() */
-#include <linux/smp_lock.h>
+#include <linux/mutex.h>
 #include <linux/mm.h>
 #include <linux/string.h>	/* inline mem*, str* functions */
 
@@ -71,6 +71,7 @@
  *	WAN device IOCTL handlers
  */
 
+static DEFINE_MUTEX(wanrouter_mutex);
 static int wanrouter_device_setup(struct wan_device *wandev,
 				  wandev_conf_t __user *u_conf);
 static int wanrouter_device_stat(struct wan_device *wandev,
@@ -376,7 +377,7 @@ long wanrouter_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
 	if (wandev->magic != ROUTER_MAGIC)
 		return -EINVAL;
 
-	lock_kernel();
+	mutex_lock(&wanrouter_mutex);
 	switch (cmd) {
 	case ROUTER_SETUP:
 		err = wanrouter_device_setup(wandev, data);
@@ -408,7 +409,7 @@ long wanrouter_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
 			err = wandev->ioctl(wandev, cmd, arg);
 		else err = -EINVAL;
 	}
-	unlock_kernel();
+	mutex_unlock(&wanrouter_mutex);
 	return err;
 }
 
diff --git a/net/wanrouter/wanproc.c b/net/wanrouter/wanproc.c
index c44d96b3a437..11f25c7a7a05 100644
--- a/net/wanrouter/wanproc.c
+++ b/net/wanrouter/wanproc.c
@@ -27,7 +27,7 @@
 #include <linux/module.h>
 #include <linux/wanrouter.h>	/* WAN router API definitions */
 #include <linux/seq_file.h>
-#include <linux/smp_lock.h>
+#include <linux/mutex.h>
 
 #include <net/net_namespace.h>
 #include <asm/io.h>
@@ -66,6 +66,7 @@
  *	/proc/net/router
  */
 
+static DEFINE_MUTEX(config_mutex);
 static struct proc_dir_entry *proc_router;
 
 /* Strings */
@@ -85,7 +86,7 @@ static void *r_start(struct seq_file *m, loff_t *pos)
 	struct wan_device *wandev;
 	loff_t l = *pos;
 
-	lock_kernel();
+	mutex_lock(&config_mutex);
 	if (!l--)
 		return SEQ_START_TOKEN;
 	for (wandev = wanrouter_router_devlist; l-- && wandev;
@@ -104,7 +105,7 @@ static void *r_next(struct seq_file *m, void *v, loff_t *pos)
 static void r_stop(struct seq_file *m, void *v)
 	__releases(kernel_lock)
 {
-	unlock_kernel();
+	mutex_unlock(&config_mutex);
 }
 
 static int config_show(struct seq_file *m, void *v)
-- 
cgit v1.2.2


From 8a994a7180104b305970232e160b18523c1fbd6a Mon Sep 17 00:00:00 2001
From: Joe Perches <joe@perches.com>
Date: Mon, 12 Jul 2010 10:50:23 +0000
Subject: net/core: Remove unnecessary casts of private_data

Signed-off-by: Joe Perches <joe@perches.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/pktgen.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/core/pktgen.c b/net/core/pktgen.c
index 1ee2ebd9b04f..24a19debda1b 100644
--- a/net/core/pktgen.c
+++ b/net/core/pktgen.c
@@ -846,7 +846,7 @@ static ssize_t pktgen_if_write(struct file *file,
 			       const char __user * user_buffer, size_t count,
 			       loff_t * offset)
 {
-	struct seq_file *seq = (struct seq_file *)file->private_data;
+	struct seq_file *seq = file->private_data;
 	struct pktgen_dev *pkt_dev = seq->private;
 	int i = 0, max, len;
 	char name[16], valstr[32];
@@ -1776,7 +1776,7 @@ static ssize_t pktgen_thread_write(struct file *file,
 				   const char __user * user_buffer,
 				   size_t count, loff_t * offset)
 {
-	struct seq_file *seq = (struct seq_file *)file->private_data;
+	struct seq_file *seq = file->private_data;
 	struct pktgen_thread *t = seq->private;
 	int i = 0, max, len, ret;
 	char name[40];
-- 
cgit v1.2.2


From 55a40e243210b72169eaa1cbd9b6edc6097571f8 Mon Sep 17 00:00:00 2001
From: Joe Perches <joe@perches.com>
Date: Mon, 12 Jul 2010 10:50:24 +0000
Subject: net/irda: Remove unnecessary casts of private_data

Signed-off-by: Joe Perches <joe@perches.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/irda/irnet/irnet_ppp.c | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

(limited to 'net')

diff --git a/net/irda/irnet/irnet_ppp.c b/net/irda/irnet/irnet_ppp.c
index 6a1a202710c5..800bc53b7f63 100644
--- a/net/irda/irnet/irnet_ppp.c
+++ b/net/irda/irnet/irnet_ppp.c
@@ -527,7 +527,7 @@ static int
 dev_irnet_close(struct inode *	inode,
 		struct file *	file)
 {
-  irnet_socket *	ap = (struct irnet_socket *) file->private_data;
+  irnet_socket *	ap = file->private_data;
 
   DENTER(FS_TRACE, "(file=0x%p, ap=0x%p)\n",
 	 file, ap);
@@ -564,7 +564,7 @@ dev_irnet_write(struct file *	file,
 		size_t		count,
 		loff_t *	ppos)
 {
-  irnet_socket *	ap = (struct irnet_socket *) file->private_data;
+  irnet_socket *	ap = file->private_data;
 
   DPASS(FS_TRACE, "(file=0x%p, ap=0x%p, count=%Zd)\n",
 	file, ap, count);
@@ -588,7 +588,7 @@ dev_irnet_read(struct file *	file,
 	       size_t		count,
 	       loff_t *		ppos)
 {
-  irnet_socket *	ap = (struct irnet_socket *) file->private_data;
+  irnet_socket *	ap = file->private_data;
 
   DPASS(FS_TRACE, "(file=0x%p, ap=0x%p, count=%Zd)\n",
 	file, ap, count);
@@ -609,7 +609,7 @@ static unsigned int
 dev_irnet_poll(struct file *	file,
 	       poll_table *	wait)
 {
-  irnet_socket *	ap = (struct irnet_socket *) file->private_data;
+  irnet_socket *	ap = file->private_data;
   unsigned int		mask;
 
   DENTER(FS_TRACE, "(file=0x%p, ap=0x%p)\n",
@@ -638,7 +638,7 @@ dev_irnet_ioctl(
 		unsigned int	cmd,
 		unsigned long	arg)
 {
-  irnet_socket *	ap = (struct irnet_socket *) file->private_data;
+  irnet_socket *	ap = file->private_data;
   int			err;
   int			val;
   void __user *argp = (void __user *)arg;
-- 
cgit v1.2.2


From ccb6c1360f8dd43303c659db718e7e0b24175db5 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Tue, 13 Jul 2010 10:55:38 +0200
Subject: cfg80211: don't get expired BSSes

When kernel-internal users use cfg80211_get_bss()
to get a reference to a BSS struct, they may end
up getting one that would have been removed from
the list if there had been any userspace access
to the list. This leads to inconsistencies and
problems.

Fix it by making cfg80211_get_bss() ignore BSSes
that cfg80211_bss_expire() would remove.

Fixes http://bugzilla.intellinuxwireless.org/show_bug.cgi?id=2180

Cc: stable@kernel.org
Reported-by: Jiajia Zheng <jiajia.zheng@intel.com>
Tested-by: Jiajia Zheng <jiajia.zheng@intel.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/wireless/scan.c | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'net')

diff --git a/net/wireless/scan.c b/net/wireless/scan.c
index 58401d246bda..5ca8c7180141 100644
--- a/net/wireless/scan.c
+++ b/net/wireless/scan.c
@@ -275,6 +275,7 @@ struct cfg80211_bss *cfg80211_get_bss(struct wiphy *wiphy,
 {
 	struct cfg80211_registered_device *dev = wiphy_to_dev(wiphy);
 	struct cfg80211_internal_bss *bss, *res = NULL;
+	unsigned long now = jiffies;
 
 	spin_lock_bh(&dev->bss_lock);
 
@@ -283,6 +284,10 @@ struct cfg80211_bss *cfg80211_get_bss(struct wiphy *wiphy,
 			continue;
 		if (channel && bss->pub.channel != channel)
 			continue;
+		/* Don't get expired BSS structs */
+		if (time_after(now, bss->ts + IEEE80211_SCAN_RESULT_EXPIRE) &&
+		    !atomic_read(&bss->hold))
+			continue;
 		if (is_bss(&bss->pub, bssid, ssid, ssid_len)) {
 			res = bss;
 			kref_get(&res->ref);
-- 
cgit v1.2.2


From d809ec895505e6f35fb1965f0946381ab4eaa474 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Timo=20Ter=C3=A4s?= <timo.teras@iki.fi>
Date: Mon, 12 Jul 2010 21:29:42 +0000
Subject: xfrm: do not assume that template resolving always returns xfrms
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

xfrm_resolve_and_create_bundle() assumed that, if policies indicated
presence of xfrms, bundle template resolution would always return
some xfrms. This is not true for 'use' level policies which can
result in no xfrm's being applied if there is no suitable xfrm states.
This fixes a crash by this incorrect assumption.

Reported-by: George Spelvin <linux@horizon.com>
Bisected-by: George Spelvin <linux@horizon.com>
Tested-by: George Spelvin <linux@horizon.com>
Signed-off-by: Timo Teräs <timo.teras@iki.fi>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/xfrm/xfrm_policy.c | 15 +++++++++++++--
 1 file changed, 13 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index af1c173be4ad..a7ec5a8a2380 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -1594,8 +1594,8 @@ xfrm_resolve_and_create_bundle(struct xfrm_policy **pols, int num_pols,
 
 	/* Try to instantiate a bundle */
 	err = xfrm_tmpl_resolve(pols, num_pols, fl, xfrm, family);
-	if (err < 0) {
-		if (err != -EAGAIN)
+	if (err <= 0) {
+		if (err != 0 && err != -EAGAIN)
 			XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTPOLERROR);
 		return ERR_PTR(err);
 	}
@@ -1678,6 +1678,13 @@ xfrm_bundle_lookup(struct net *net, struct flowi *fl, u16 family, u8 dir,
 			goto make_dummy_bundle;
 		dst_hold(&xdst->u.dst);
 		return oldflo;
+	} else if (new_xdst == NULL) {
+		num_xfrms = 0;
+		if (oldflo == NULL)
+			goto make_dummy_bundle;
+		xdst->num_xfrms = 0;
+		dst_hold(&xdst->u.dst);
+		return oldflo;
 	}
 
 	/* Kill the previous bundle */
@@ -1760,6 +1767,10 @@ restart:
 				xfrm_pols_put(pols, num_pols);
 				err = PTR_ERR(xdst);
 				goto dropdst;
+			} else if (xdst == NULL) {
+				num_xfrms = 0;
+				drop_pols = num_pols;
+				goto no_transform;
 			}
 
 			spin_lock_bh(&xfrm_policy_sk_bundle_lock);
-- 
cgit v1.2.2


From 3a047bf87b1b6f69c62ab9fb28072c639cb7e2fa Mon Sep 17 00:00:00 2001
From: Changli Gao <xiaosuo@gmail.com>
Date: Mon, 12 Jul 2010 21:00:12 +0000
Subject: rfs: call sock_rps_record_flow() in tcp_splice_read()

rfs: call sock_rps_record_flow() in tcp_splice_read()

call sock_rps_record_flow() in tcp_splice_read(), so the applications using
splice(2) or sendfile(2) can utilize RFS.

Signed-off-by: Changli Gao <xiaosuo@gmail.com>
----
 net/ipv4/tcp.c |    1 +
 1 file changed, 1 insertion(+)
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'net')

diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 6596b4feeddc..65afeaec15b7 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -608,6 +608,7 @@ ssize_t tcp_splice_read(struct socket *sock, loff_t *ppos,
 	ssize_t spliced;
 	int ret;
 
+	sock_rps_record_flow(sk);
 	/*
 	 * We can't seek on a socket input
 	 */
-- 
cgit v1.2.2


From 87fd308cfc6b2e880bf717a740bd5c58d2aed10c Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Tue, 13 Jul 2010 05:24:20 +0000
Subject: net: skb_tx_hash() fix relative to skb_orphan_try()

commit fc6055a5ba31e2 (net: Introduce skb_orphan_try()) added early
orphaning of skbs.

This unfortunately added a performance regression in skb_tx_hash() in
case of stacked devices (bonding, vlans, ...)

Since skb->sk is now NULL, we cannot access sk->sk_hash anymore to
spread tx packets to multiple NIC queues on multiqueue devices.

skb_tx_hash() in this case only uses skb->protocol, same value for all
flows.

skb_orphan_try() can copy sk->sk_hash into skb->rxhash and skb_tx_hash()
can use this saved sk_hash value to compute its internal hash value.

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/dev.c | 13 ++++++++++---
 1 file changed, 10 insertions(+), 3 deletions(-)

(limited to 'net')

diff --git a/net/core/dev.c b/net/core/dev.c
index 723a34710ad4..4b05fdf762ab 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -1911,8 +1911,16 @@ static int dev_gso_segment(struct sk_buff *skb)
  */
 static inline void skb_orphan_try(struct sk_buff *skb)
 {
-	if (!skb_tx(skb)->flags)
+	struct sock *sk = skb->sk;
+
+	if (sk && !skb_tx(skb)->flags) {
+		/* skb_tx_hash() wont be able to get sk.
+		 * We copy sk_hash into skb->rxhash
+		 */
+		if (!skb->rxhash)
+			skb->rxhash = sk->sk_hash;
 		skb_orphan(skb);
+	}
 }
 
 int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev,
@@ -1998,8 +2006,7 @@ u16 skb_tx_hash(const struct net_device *dev, const struct sk_buff *skb)
 	if (skb->sk && skb->sk->sk_hash)
 		hash = skb->sk->sk_hash;
 	else
-		hash = (__force u16) skb->protocol;
-
+		hash = (__force u16) skb->protocol ^ skb->rxhash;
 	hash = jhash_1word(hash, hashrnd);
 
 	return (u16) (((u64) hash * dev->real_num_tx_queues) >> 32);
-- 
cgit v1.2.2


From 0eff683f737bf684dc9299e2eaca79cceb80a8c1 Mon Sep 17 00:00:00 2001
From: Dan Carpenter <error27@gmail.com>
Date: Wed, 14 Jul 2010 17:56:37 -0700
Subject: net/sched: potential data corruption

The reset_policy() does:
        memset(d->tcfd_defdata, 0, SIMP_MAX_DATA);
        strlcpy(d->tcfd_defdata, defdata, SIMP_MAX_DATA);

In the original code, the size of d->tcfd_defdata wasn't fixed and if
strlen(defdata) was less than 31, reset_policy() would cause memory
corruption.

Please Note:  The original alloc_defdata() assumes defdata is 32
characters and a NUL terminator while reset_policy() assumes defdata is
31 characters and a NUL.  This patch updates alloc_defdata() to match
reset_policy() (ie a shorter string).  I'm not very familiar with this
code so please review carefully.

Signed-off-by: Dan Carpenter <error27@gmail.com>
Acked-by: Jamal Hadi Salim <hadi@cyberus.ca>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/act_simple.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/sched/act_simple.c b/net/sched/act_simple.c
index 1b4bc691d7d1..4a1d640b0cf1 100644
--- a/net/sched/act_simple.c
+++ b/net/sched/act_simple.c
@@ -73,10 +73,10 @@ static int tcf_simp_release(struct tcf_defact *d, int bind)
 
 static int alloc_defdata(struct tcf_defact *d, char *defdata)
 {
-	d->tcfd_defdata = kstrndup(defdata, SIMP_MAX_DATA, GFP_KERNEL);
+	d->tcfd_defdata = kzalloc(SIMP_MAX_DATA, GFP_KERNEL);
 	if (unlikely(!d->tcfd_defdata))
 		return -ENOMEM;
-
+	strlcpy(d->tcfd_defdata, defdata, SIMP_MAX_DATA);
 	return 0;
 }
 
-- 
cgit v1.2.2


From 91a72a70594e5212c97705ca6a694bd307f7a26b Mon Sep 17 00:00:00 2001
From: Doug Kehn <rdkehn@yahoo.com>
Date: Wed, 14 Jul 2010 18:02:16 -0700
Subject: net/core: neighbour update Oops

When configuring DMVPN (GRE + openNHRP) and a GRE remote
address is configured a kernel Oops is observed.  The
obserseved Oops is caused by a NULL header_ops pointer
(neigh->dev->header_ops) in neigh_update_hhs() when

void (*update)(struct hh_cache*, const struct net_device*, const unsigned char *)
= neigh->dev->header_ops->cache_update;

is executed.  The dev associated with the NULL header_ops is
the GRE interface.  This patch guards against the
possibility that header_ops is NULL.

This Oops was first observed in kernel version 2.6.26.8.

Signed-off-by: Doug Kehn <rdkehn@yahoo.com>
Acked-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/neighbour.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index 6ba1c0eece03..a4e0a7482c2b 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -949,7 +949,10 @@ static void neigh_update_hhs(struct neighbour *neigh)
 {
 	struct hh_cache *hh;
 	void (*update)(struct hh_cache*, const struct net_device*, const unsigned char *)
-		= neigh->dev->header_ops->cache_update;
+		= NULL;
+
+	if (neigh->dev->header_ops)
+		update = neigh->dev->header_ops->cache_update;
 
 	if (update) {
 		for (hh = neigh->hh; hh; hh = hh->hh_next) {
-- 
cgit v1.2.2


From 54874868585ffa5d73d7ab8a5a32ea7dcdec1441 Mon Sep 17 00:00:00 2001
From: Chihau Chau <chihau@gmail.com>
Date: Wed, 14 Jul 2010 18:27:09 -0700
Subject: Net: ethernet: pe2.c: fix EXPORT_SYMBOL macro code style issue

This patch fix a code style issue, if a function is exported, the
EXPORT_SYMBOL macro for it should follow immediately after the closing
function brace line.

Signed-off-by: Chihau Chau <chihau@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ethernet/pe2.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/ethernet/pe2.c b/net/ethernet/pe2.c
index eb00796758c3..85d574addbc1 100644
--- a/net/ethernet/pe2.c
+++ b/net/ethernet/pe2.c
@@ -28,11 +28,10 @@ struct datalink_proto *make_EII_client(void)
 
 	return proto;
 }
+EXPORT_SYMBOL(make_EII_client);
 
 void destroy_EII_client(struct datalink_proto *dl)
 {
 	kfree(dl);
 }
-
 EXPORT_SYMBOL(destroy_EII_client);
-EXPORT_SYMBOL(make_EII_client);
-- 
cgit v1.2.2


From b0f77d0eae0c58a5a9691a067ada112ceeae2d00 Mon Sep 17 00:00:00 2001
From: Tom Herbert <therbert@google.com>
Date: Wed, 14 Jul 2010 20:50:29 -0700
Subject: net: fix problem in reading sock TX queue

Fix problem in reading the tx_queue recorded in a socket.  In
dev_pick_tx, the TX queue is read by doing a check with
sk_tx_queue_recorded on the socket, followed by a sk_tx_queue_get.
The problem is that there is not mutual exclusion across these
calls in the socket so it it is possible that the queue in the
sock can be invalidated after sk_tx_queue_recorded is called so
that sk_tx_queue get returns -1, which sets 65535 in queue_index
and thus dev_pick_tx returns 65536 which is a bogus queue and
can cause crash in dev_queue_xmit.

We fix this by only calling sk_tx_queue_get which does the proper
checks.  The interface is that sk_tx_queue_get returns the TX queue
if the sock argument is non-NULL and TX queue is recorded, else it
returns -1.  sk_tx_queue_recorded is no longer used so it can be
completely removed.

Signed-off-by: Tom Herbert <therbert@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/dev.c | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

(limited to 'net')

diff --git a/net/core/dev.c b/net/core/dev.c
index 4b05fdf762ab..0ea10f849be8 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -2029,12 +2029,11 @@ static inline u16 dev_cap_txqueue(struct net_device *dev, u16 queue_index)
 static struct netdev_queue *dev_pick_tx(struct net_device *dev,
 					struct sk_buff *skb)
 {
-	u16 queue_index;
+	int queue_index;
 	struct sock *sk = skb->sk;
 
-	if (sk_tx_queue_recorded(sk)) {
-		queue_index = sk_tx_queue_get(sk);
-	} else {
+	queue_index = sk_tx_queue_get(sk);
+	if (queue_index < 0) {
 		const struct net_device_ops *ops = dev->netdev_ops;
 
 		if (ops->ndo_select_queue) {
-- 
cgit v1.2.2


From fac42a9a922fe5eb87cac0b597010afb81e7ffe9 Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Thu, 15 Jul 2010 17:09:04 +0200
Subject: netfilter: nf_ct_tcp: fix flow recovery with TCP window
 tracking enabled

This patch adds the missing bits to support the recovery of TCP flows
without disabling window tracking (aka be_liberal). To ensure a
successful recovery, we have to inject the window scale factor via
ctnetlink.

This patch has been tested with a development snapshot of conntrackd
and the new clause `TCPWindowTracking' that allows to perform strict
TCP window tracking recovery across fail-overs.

With this patch, we don't update the receiver's window until it's not
initiated. We require this to perform a successful recovery. Jozsef
confirmed in a private email that this spotted a real issue since that
should not happen.

Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
Acked-by: Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 net/netfilter/nf_conntrack_proto_tcp.c | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c
index 802dbffae8b4..c4c885dca3bd 100644
--- a/net/netfilter/nf_conntrack_proto_tcp.c
+++ b/net/netfilter/nf_conntrack_proto_tcp.c
@@ -585,8 +585,16 @@ static bool tcp_in_window(const struct nf_conn *ct,
 			 * Let's try to use the data from the packet.
 			 */
 			sender->td_end = end;
+			win <<= sender->td_scale;
 			sender->td_maxwin = (win == 0 ? 1 : win);
 			sender->td_maxend = end + sender->td_maxwin;
+			/*
+			 * We haven't seen traffic in the other direction yet
+			 * but we have to tweak window tracking to pass III
+			 * and IV until that happens.
+			 */
+			if (receiver->td_maxwin == 0)
+				receiver->td_end = receiver->td_maxend = sack;
 		}
 	} else if (((state->state == TCP_CONNTRACK_SYN_SENT
 		     && dir == IP_CT_DIR_ORIGINAL)
@@ -680,7 +688,7 @@ static bool tcp_in_window(const struct nf_conn *ct,
 		/*
 		 * Update receiver data.
 		 */
-		if (after(end, sender->td_maxend))
+		if (receiver->td_maxwin != 0 && after(end, sender->td_maxend))
 			receiver->td_maxwin += end - sender->td_maxend;
 		if (after(sack + win, receiver->td_maxend - 1)) {
 			receiver->td_maxend = sack + win;
-- 
cgit v1.2.2


From edf0e1fb0d0910880881523cfaaabcec06a2c0d5 Mon Sep 17 00:00:00 2001
From: "Michael S. Tsirkin" <mst@redhat.com>
Date: Thu, 15 Jul 2010 17:20:46 +0200
Subject: netfilter: add CHECKSUM target

This adds a `CHECKSUM' target, which can be used in the iptables mangle
table.

You can use this target to compute and fill in the checksum in
a packet that lacks a checksum.  This is particularly useful,
if you need to work around old applications such as dhcp clients,
that do not work well with checksum offloads, but don't want to
disable checksum offload in your device.

The problem happens in the field with virtualized applications.
For reference, see Red Hat bz 605555, as well as
http://www.spinics.net/lists/kvm/msg37660.html

Typical expected use (helps old dhclient binary running in a VM):
iptables -A POSTROUTING -t mangle -p udp --dport bootpc \
	-j CHECKSUM --checksum-fill

Includes fixes by Jan Engelhardt <jengelh@medozas.de>

Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 net/netfilter/Kconfig       | 16 +++++++++++
 net/netfilter/Makefile      |  1 +
 net/netfilter/xt_CHECKSUM.c | 70 +++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 87 insertions(+)
 create mode 100644 net/netfilter/xt_CHECKSUM.c

(limited to 'net')

diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig
index aa2f106347e4..5fb8efa84df3 100644
--- a/net/netfilter/Kconfig
+++ b/net/netfilter/Kconfig
@@ -326,6 +326,22 @@ config NETFILTER_XT_CONNMARK
 
 comment "Xtables targets"
 
+config NETFILTER_XT_TARGET_CHECKSUM
+	tristate "CHECKSUM target support"
+	depends on IP_NF_MANGLE || IP6_NF_MANGLE
+	depends on NETFILTER_ADVANCED
+	---help---
+	  This option adds a `CHECKSUM' target, which can be used in the iptables mangle
+	  table.
+
+	  You can use this target to compute and fill in the checksum in
+	  a packet that lacks a checksum.  This is particularly useful,
+	  if you need to work around old applications such as dhcp clients,
+	  that do not work well with checksum offloads, but don't want to disable
+	  checksum offload in your device.
+
+	  To compile it as a module, choose M here.  If unsure, say N.
+
 config NETFILTER_XT_TARGET_CLASSIFY
 	tristate '"CLASSIFY" target support'
 	depends on NETFILTER_ADVANCED
diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile
index e28420aac5ef..36ef8e63be1e 100644
--- a/net/netfilter/Makefile
+++ b/net/netfilter/Makefile
@@ -45,6 +45,7 @@ obj-$(CONFIG_NETFILTER_XT_MARK) += xt_mark.o
 obj-$(CONFIG_NETFILTER_XT_CONNMARK) += xt_connmark.o
 
 # targets
+obj-$(CONFIG_NETFILTER_XT_TARGET_CHECKSUM) += xt_CHECKSUM.o
 obj-$(CONFIG_NETFILTER_XT_TARGET_CLASSIFY) += xt_CLASSIFY.o
 obj-$(CONFIG_NETFILTER_XT_TARGET_CONNSECMARK) += xt_CONNSECMARK.o
 obj-$(CONFIG_NETFILTER_XT_TARGET_CT) += xt_CT.o
diff --git a/net/netfilter/xt_CHECKSUM.c b/net/netfilter/xt_CHECKSUM.c
new file mode 100644
index 000000000000..0f642ef8cd26
--- /dev/null
+++ b/net/netfilter/xt_CHECKSUM.c
@@ -0,0 +1,70 @@
+/* iptables module for the packet checksum mangling
+ *
+ * (C) 2002 by Harald Welte <laforge@netfilter.org>
+ * (C) 2010 Red Hat, Inc.
+ *
+ * Author: Michael S. Tsirkin <mst@redhat.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+*/
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+#include <linux/module.h>
+#include <linux/skbuff.h>
+
+#include <linux/netfilter/x_tables.h>
+#include <linux/netfilter/xt_CHECKSUM.h>
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Michael S. Tsirkin <mst@redhat.com>");
+MODULE_DESCRIPTION("Xtables: checksum modification");
+MODULE_ALIAS("ipt_CHECKSUM");
+MODULE_ALIAS("ip6t_CHECKSUM");
+
+static unsigned int
+checksum_tg(struct sk_buff *skb, const struct xt_action_param *par)
+{
+	if (skb->ip_summed == CHECKSUM_PARTIAL)
+		skb_checksum_help(skb);
+
+	return XT_CONTINUE;
+}
+
+static int checksum_tg_check(const struct xt_tgchk_param *par)
+{
+	const struct xt_CHECKSUM_info *einfo = par->targinfo;
+
+	if (einfo->operation & ~XT_CHECKSUM_OP_FILL) {
+		pr_info("unsupported CHECKSUM operation %x\n", einfo->operation);
+		return -EINVAL;
+	}
+	if (!einfo->operation) {
+		pr_info("no CHECKSUM operation enabled\n");
+		return -EINVAL;
+	}
+	return 0;
+}
+
+static struct xt_target checksum_tg_reg __read_mostly = {
+	.name		= "CHECKSUM",
+	.family		= NFPROTO_UNSPEC,
+	.target		= checksum_tg,
+	.targetsize	= sizeof(struct xt_CHECKSUM_info),
+	.table		= "mangle",
+	.checkentry	= checksum_tg_check,
+	.me		= THIS_MODULE,
+};
+
+static int __init checksum_tg_init(void)
+{
+	return xt_register_target(&checksum_tg_reg);
+}
+
+static void __exit checksum_tg_exit(void)
+{
+	xt_unregister_target(&checksum_tg_reg);
+}
+
+module_init(checksum_tg_init);
+module_exit(checksum_tg_exit);
-- 
cgit v1.2.2


From bb7a0bd600ac2e09a8747ef89e692a2967ed8c97 Mon Sep 17 00:00:00 2001
From: Kulikov Vasiliy <segooon@gmail.com>
Date: Thu, 15 Jul 2010 08:47:33 +0000
Subject: net: bridge: fix sign bug

ipv6_skip_exthdr() can return error code that is below zero.
'offset' is unsigned, so it makes no sense.
ipv6_skip_exthdr() returns 'int' so we can painlessly change type of
offset to int.

Signed-off-by: Kulikov Vasiliy <segooon@gmail.com>
Acked-by: Stephen Hemminger <shemminger@vyatta.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/bridge/br_multicast.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c
index 27ae946363f1..85afcdab4921 100644
--- a/net/bridge/br_multicast.c
+++ b/net/bridge/br_multicast.c
@@ -1435,7 +1435,7 @@ static int br_multicast_ipv6_rcv(struct net_bridge *br,
 	struct icmp6hdr *icmp6h;
 	u8 nexthdr;
 	unsigned len;
-	unsigned offset;
+	int offset;
 	int err;
 
 	if (!pskb_may_pull(skb, sizeof(*ip6h)))
-- 
cgit v1.2.2


From e40dbc51fbcc3281bb52ecf0f5bec693d36e2aea Mon Sep 17 00:00:00 2001
From: Ben Greear <greearb@candelatech.com>
Date: Thu, 15 Jul 2010 13:22:33 +0000
Subject: ipmr: Don't leak memory if fib lookup fails.

This was detected using two mcast router tables.  The
pimreg for the second interface did not have a specific
mrule, so packets received by it were handled by the
default table, which had nothing configured.

This caused the ipmr_fib_lookup to fail, causing
the memory leak.

Signed-off-by: Ben Greear <greearb@candelatech.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/ipmr.c | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
index 757f25eb9b4b..7f6273506eea 100644
--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c
@@ -442,8 +442,10 @@ static netdev_tx_t reg_vif_xmit(struct sk_buff *skb, struct net_device *dev)
 	int err;
 
 	err = ipmr_fib_lookup(net, &fl, &mrt);
-	if (err < 0)
+	if (err < 0) {
+		kfree_skb(skb);
 		return err;
+	}
 
 	read_lock(&mrt_lock);
 	dev->stats.tx_bytes += skb->len;
@@ -1728,8 +1730,10 @@ int ip_mr_input(struct sk_buff *skb)
 		goto dont_forward;
 
 	err = ipmr_fib_lookup(net, &skb_rtable(skb)->fl, &mrt);
-	if (err < 0)
+	if (err < 0) {
+		kfree_skb(skb);
 		return err;
+	}
 
 	if (!local) {
 		    if (IPCB(skb)->opt.router_alert) {
-- 
cgit v1.2.2


From 15804e3e9de52f1baefad34233424488b5672853 Mon Sep 17 00:00:00 2001
From: Christian Lamparter <chunkeey@googlemail.com>
Date: Fri, 16 Jul 2010 13:01:24 +0200
Subject: mac80211: skip HT parsing if HW does not support HT

This patch will also fix the odd freeze which occurred
when minstrel_ht connects to an 802.11n network with
legacy hardware.

Signed-off-by: Christian Lamparter <chunkeey@googlemail.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/ht.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/mac80211/ht.c b/net/mac80211/ht.c
index be928ef7ef51..9d101fb33861 100644
--- a/net/mac80211/ht.c
+++ b/net/mac80211/ht.c
@@ -29,7 +29,7 @@ void ieee80211_ht_cap_ie_to_sta_ht_cap(struct ieee80211_supported_band *sband,
 
 	memset(ht_cap, 0, sizeof(*ht_cap));
 
-	if (!ht_cap_ie)
+	if (!ht_cap_ie || !sband->ht_cap.ht_supported)
 		return;
 
 	ht_cap->ht_supported = true;
-- 
cgit v1.2.2


From 088c87262bbc39a01ebcd70817d35616785908b1 Mon Sep 17 00:00:00 2001
From: "John W. Linville" <linville@tuxdriver.com>
Date: Thu, 15 Jul 2010 16:16:17 -0400
Subject: mac80211: improve error checking if WEP fails to init

Do this by poisoning the values of wep_tx_tfm and wep_rx_tfm if either
crypto allocation fails.

Reported-by: Stanislaw Gruszka <sgruszka@redhat.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/wep.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/mac80211/wep.c b/net/mac80211/wep.c
index 6d133b6efce5..9ebc8d8a1f5b 100644
--- a/net/mac80211/wep.c
+++ b/net/mac80211/wep.c
@@ -32,13 +32,16 @@ int ieee80211_wep_init(struct ieee80211_local *local)
 
 	local->wep_tx_tfm = crypto_alloc_blkcipher("ecb(arc4)", 0,
 						CRYPTO_ALG_ASYNC);
-	if (IS_ERR(local->wep_tx_tfm))
+	if (IS_ERR(local->wep_tx_tfm)) {
+		local->wep_rx_tfm = ERR_PTR(-EINVAL);
 		return PTR_ERR(local->wep_tx_tfm);
+	}
 
 	local->wep_rx_tfm = crypto_alloc_blkcipher("ecb(arc4)", 0,
 						CRYPTO_ALG_ASYNC);
 	if (IS_ERR(local->wep_rx_tfm)) {
 		crypto_free_blkcipher(local->wep_tx_tfm);
+		local->wep_tx_tfm = ERR_PTR(-EINVAL);
 		return PTR_ERR(local->wep_rx_tfm);
 	}
 
-- 
cgit v1.2.2


From d9a9dc66eb8a8fd85c8546247e7e1d6023d0eb0f Mon Sep 17 00:00:00 2001
From: Arnaud Ebalard <arno@natisbad.org>
Date: Fri, 16 Jul 2010 00:38:44 +0000
Subject: IPv6: fix CoA check in RH2 input handler (mip6_rthdr_input())

The input handler for Type 2 Routing Header (mip6_rthdr_input())
checks if the CoA in the packet matches the CoA in the XFRM state.

Current check is buggy: it compares the adddress in the Type 2
Routing Header, i.e. the HoA, against the expected CoA in the state.
The comparison should be made against the address in the destination
field of the IPv6 header.

The bug remained unnoticed because the main (and possibly only current)
user of the code (UMIP MIPv6 Daemon) initializes the XFRM state with the
unspecified address, i.e. explicitly allows everything.

Yoshifuji-san, can you ack that one?

Signed-off-by: Arnaud Ebalard <arno@natisbad.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/mip6.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/ipv6/mip6.c b/net/ipv6/mip6.c
index 2794b6002836..d6e9599d0705 100644
--- a/net/ipv6/mip6.c
+++ b/net/ipv6/mip6.c
@@ -347,11 +347,12 @@ static const struct xfrm_type mip6_destopt_type =
 
 static int mip6_rthdr_input(struct xfrm_state *x, struct sk_buff *skb)
 {
+	struct ipv6hdr *iph = ipv6_hdr(skb);
 	struct rt2_hdr *rt2 = (struct rt2_hdr *)skb->data;
 	int err = rt2->rt_hdr.nexthdr;
 
 	spin_lock(&x->lock);
-	if (!ipv6_addr_equal(&rt2->addr, (struct in6_addr *)x->coaddr) &&
+	if (!ipv6_addr_equal(&iph->daddr, (struct in6_addr *)x->coaddr) &&
 	    !ipv6_addr_any((struct in6_addr *)x->coaddr))
 		err = -ENOENT;
 	spin_unlock(&x->lock);
-- 
cgit v1.2.2


From 8e64159dfb480b30233d947d5a3cd793dfea738f Mon Sep 17 00:00:00 2001
From: Kulikov Vasiliy <segooon@gmail.com>
Date: Sat, 17 Jul 2010 05:21:00 +0000
Subject: net: dccp: fix sign bug

'gap' is unsigned, so this code is wrong:

    gap = -new_head;
    ...
    if (gap > 0) { ... }

Make 'gap' signed.

The semantic patch that finds this problem (many false-positive results):
(http://coccinelle.lip6.fr/)

// <smpl>
@ r1 @
identifier f;
@@
int f(...) { ... }

@@
identifier r1.f;
type T;
unsigned T x;
@@

*x = f(...)
 ...
*x > 0

Signed-off-by: Kulikov Vasiliy <segooon@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/dccp/ackvec.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/dccp/ackvec.c b/net/dccp/ackvec.c
index 2abddee48304..92a6fcb40d7d 100644
--- a/net/dccp/ackvec.c
+++ b/net/dccp/ackvec.c
@@ -201,7 +201,7 @@ static inline int dccp_ackvec_set_buf_head_state(struct dccp_ackvec *av,
 						 const unsigned int packets,
 						 const unsigned char state)
 {
-	unsigned int gap;
+	long gap;
 	long new_head;
 
 	if (av->av_vec_len + packets > DCCP_MAX_ACKVEC_LEN)
-- 
cgit v1.2.2


From 01893c82b4e6949f4e3a453c4faea34970359d76 Mon Sep 17 00:00:00 2001
From: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
Date: Sun, 18 Jul 2010 15:29:14 -0700
Subject: net: Remove MAX_SOCK_ADDR constant

MAX_SOCK_ADDR is no longer used because commit 230b1839 "net: Use standard
structures for generic socket address structures." replaced
"char address[MAX_SOCK_ADDR];" with "struct sockaddr_storage address;".

Signed-off-by: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/socket.c | 9 ---------
 1 file changed, 9 deletions(-)

(limited to 'net')

diff --git a/net/socket.c b/net/socket.c
index acfa1738663d..6fe484122a44 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -170,15 +170,6 @@ static DEFINE_PER_CPU(int, sockets_in_use);
  * divide and look after the messy bits.
  */
 
-#define MAX_SOCK_ADDR	128		/* 108 for Unix domain -
-					   16 for IP, 16 for IPX,
-					   24 for IPv6,
-					   about 80 for AX.25
-					   must be at least one bigger than
-					   the AF_UNIX size (see net/unix/af_unix.c
-					   :unix_mkname()).
-					 */
-
 /**
  *	move_addr_to_kernel	-	copy a socket address into kernel space
  *	@uaddr: Address in user space
-- 
cgit v1.2.2


From ad1afb00393915a51c21b1ae8704562bf036855f Mon Sep 17 00:00:00 2001
From: Pedro Garcia <pedro.netdev@dondevamos.com>
Date: Sun, 18 Jul 2010 15:38:44 -0700
Subject: vlan_dev: VLAN 0 should be treated as "no vlan tag" (802.1p packet)

- Without the 8021q module loaded in the kernel, all 802.1p packets
(VLAN 0 but QoS tagging) are silently discarded (as expected, as
the protocol is not loaded).

- Without this patch in 8021q module, these packets are forwarded to
the module, but they are discarded also if VLAN 0 is not configured,
which should not be the default behaviour, as VLAN 0 is not really
a VLANed packet but a 802.1p packet. Defining VLAN 0 makes it almost
impossible to communicate with mixed 802.1p and non 802.1p devices on
the same network due to arp table issues.

- Changed logic to skip vlan specific code in vlan_skb_recv if VLAN
is 0 and we have not defined a VLAN with ID 0, but we accept the
packet with the encapsulated proto and pass it later to netif_rx.

- In the vlan device event handler, added some logic to add VLAN 0
to HW filter in devices that support it (this prevented any traffic
in VLAN 0 to reach the stack in e1000e with HW filter under 2.6.35,
and probably also with other HW filtered cards, so we fix it here).

- In the vlan unregister logic, prevent the elimination of VLAN 0
in devices with HW filter.

- The default behaviour is to ignore the VLAN 0 tagging and accept
the packet as if it was not tagged, but we can still define a
VLAN 0 if desired (so it is backwards compatible).

Signed-off-by: Pedro Garcia <pedro.netdev@dondevamos.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/8021q/vlan.c      |  13 ++++++-
 net/8021q/vlan_core.c |  19 ++++++++--
 net/8021q/vlan_dev.c  | 103 +++++++++++++++++++++++++++++---------------------
 3 files changed, 86 insertions(+), 49 deletions(-)

(limited to 'net')

diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c
index 3c1c8c14e929..a2ad15250575 100644
--- a/net/8021q/vlan.c
+++ b/net/8021q/vlan.c
@@ -155,9 +155,10 @@ void unregister_vlan_dev(struct net_device *dev, struct list_head *head)
 	BUG_ON(!grp);
 
 	/* Take it out of our own structures, but be sure to interlock with
-	 * HW accelerating devices or SW vlan input packet processing.
+	 * HW accelerating devices or SW vlan input packet processing if
+	 * VLAN is not 0 (leave it there for 802.1p).
 	 */
-	if (real_dev->features & NETIF_F_HW_VLAN_FILTER)
+	if (vlan_id && (real_dev->features & NETIF_F_HW_VLAN_FILTER))
 		ops->ndo_vlan_rx_kill_vid(real_dev, vlan_id);
 
 	grp->nr_vlans--;
@@ -419,6 +420,14 @@ static int vlan_device_event(struct notifier_block *unused, unsigned long event,
 	if (is_vlan_dev(dev))
 		__vlan_device_event(dev, event);
 
+	if ((event == NETDEV_UP) &&
+	    (dev->features & NETIF_F_HW_VLAN_FILTER) &&
+	    dev->netdev_ops->ndo_vlan_rx_add_vid) {
+		pr_info("8021q: adding VLAN 0 to HW filter on device %s\n",
+			dev->name);
+		dev->netdev_ops->ndo_vlan_rx_add_vid(dev, 0);
+	}
+
 	grp = __vlan_find_group(dev);
 	if (!grp)
 		goto out;
diff --git a/net/8021q/vlan_core.c b/net/8021q/vlan_core.c
index 1b9406a31f0c..01ddb0472f86 100644
--- a/net/8021q/vlan_core.c
+++ b/net/8021q/vlan_core.c
@@ -8,6 +8,9 @@
 int __vlan_hwaccel_rx(struct sk_buff *skb, struct vlan_group *grp,
 		      u16 vlan_tci, int polling)
 {
+	struct net_device *vlan_dev;
+	u16 vlan_id;
+
 	if (netpoll_rx(skb))
 		return NET_RX_DROP;
 
@@ -16,9 +19,12 @@ int __vlan_hwaccel_rx(struct sk_buff *skb, struct vlan_group *grp,
 
 	skb->skb_iif = skb->dev->ifindex;
 	__vlan_hwaccel_put_tag(skb, vlan_tci);
-	skb->dev = vlan_group_get_device(grp, vlan_tci & VLAN_VID_MASK);
+	vlan_id = vlan_tci & VLAN_VID_MASK;
+	vlan_dev = vlan_group_get_device(grp, vlan_id);
 
-	if (!skb->dev)
+	if (vlan_dev)
+		skb->dev = vlan_dev;
+	else if (vlan_id)
 		goto drop;
 
 	return (polling ? netif_receive_skb(skb) : netif_rx(skb));
@@ -83,15 +89,20 @@ vlan_gro_common(struct napi_struct *napi, struct vlan_group *grp,
 		unsigned int vlan_tci, struct sk_buff *skb)
 {
 	struct sk_buff *p;
+	struct net_device *vlan_dev;
+	u16 vlan_id;
 
 	if (skb_bond_should_drop(skb, ACCESS_ONCE(skb->dev->master)))
 		skb->deliver_no_wcard = 1;
 
 	skb->skb_iif = skb->dev->ifindex;
 	__vlan_hwaccel_put_tag(skb, vlan_tci);
-	skb->dev = vlan_group_get_device(grp, vlan_tci & VLAN_VID_MASK);
+	vlan_id = vlan_tci & VLAN_VID_MASK;
+	vlan_dev = vlan_group_get_device(grp, vlan_id);
 
-	if (!skb->dev)
+	if (vlan_dev)
+		skb->dev = vlan_dev;
+	else if (vlan_id)
 		goto drop;
 
 	for (p = napi->gro_list; p; p = p->next) {
diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c
index 7cb285f96b99..3d59c9bf8feb 100644
--- a/net/8021q/vlan_dev.c
+++ b/net/8021q/vlan_dev.c
@@ -142,6 +142,7 @@ int vlan_skb_recv(struct sk_buff *skb, struct net_device *dev,
 {
 	struct vlan_hdr *vhdr;
 	struct vlan_rx_stats *rx_stats;
+	struct net_device *vlan_dev;
 	u16 vlan_id;
 	u16 vlan_tci;
 
@@ -157,55 +158,71 @@ int vlan_skb_recv(struct sk_buff *skb, struct net_device *dev,
 	vlan_id = vlan_tci & VLAN_VID_MASK;
 
 	rcu_read_lock();
-	skb->dev = __find_vlan_dev(dev, vlan_id);
-	if (!skb->dev) {
-		pr_debug("%s: ERROR: No net_device for VID: %u on dev: %s\n",
-			 __func__, vlan_id, dev->name);
-		goto err_unlock;
-	}
-
-	rx_stats = per_cpu_ptr(vlan_dev_info(skb->dev)->vlan_rx_stats,
-			       smp_processor_id());
-	u64_stats_update_begin(&rx_stats->syncp);
-	rx_stats->rx_packets++;
-	rx_stats->rx_bytes += skb->len;
-
-	skb_pull_rcsum(skb, VLAN_HLEN);
-
-	skb->priority = vlan_get_ingress_priority(skb->dev, vlan_tci);
+	vlan_dev = __find_vlan_dev(dev, vlan_id);
 
-	pr_debug("%s: priority: %u for TCI: %hu\n",
-		 __func__, skb->priority, vlan_tci);
-
-	switch (skb->pkt_type) {
-	case PACKET_BROADCAST: /* Yeah, stats collect these together.. */
-		/* stats->broadcast ++; // no such counter :-( */
-		break;
-
-	case PACKET_MULTICAST:
-		rx_stats->rx_multicast++;
-		break;
+	/* If the VLAN device is defined, we use it.
+	 * If not, and the VID is 0, it is a 802.1p packet (not
+	 * really a VLAN), so we will just netif_rx it later to the
+	 * original interface, but with the skb->proto set to the
+	 * wrapped proto: we do nothing here.
+	 */
 
-	case PACKET_OTHERHOST:
-		/* Our lower layer thinks this is not local, let's make sure.
-		 * This allows the VLAN to have a different MAC than the
-		 * underlying device, and still route correctly.
-		 */
-		if (!compare_ether_addr(eth_hdr(skb)->h_dest,
-					skb->dev->dev_addr))
-			skb->pkt_type = PACKET_HOST;
-		break;
-	default:
-		break;
+	if (!vlan_dev) {
+		if (vlan_id) {
+			pr_debug("%s: ERROR: No net_device for VID: %u on dev: %s\n",
+				 __func__, vlan_id, dev->name);
+			goto err_unlock;
+		}
+		rx_stats = NULL;
+	} else {
+		skb->dev = vlan_dev;
+
+		rx_stats = per_cpu_ptr(vlan_dev_info(skb->dev)->vlan_rx_stats,
+					smp_processor_id());
+		u64_stats_update_begin(&rx_stats->syncp);
+		rx_stats->rx_packets++;
+		rx_stats->rx_bytes += skb->len;
+
+		skb->priority = vlan_get_ingress_priority(skb->dev, vlan_tci);
+
+		pr_debug("%s: priority: %u for TCI: %hu\n",
+			 __func__, skb->priority, vlan_tci);
+
+		switch (skb->pkt_type) {
+		case PACKET_BROADCAST:
+			/* Yeah, stats collect these together.. */
+			/* stats->broadcast ++; // no such counter :-( */
+			break;
+
+		case PACKET_MULTICAST:
+			rx_stats->rx_multicast++;
+			break;
+
+		case PACKET_OTHERHOST:
+			/* Our lower layer thinks this is not local, let's make
+			 * sure.
+			 * This allows the VLAN to have a different MAC than the
+			 * underlying device, and still route correctly.
+			 */
+			if (!compare_ether_addr(eth_hdr(skb)->h_dest,
+						skb->dev->dev_addr))
+				skb->pkt_type = PACKET_HOST;
+			break;
+		default:
+			break;
+		}
+		u64_stats_update_end(&rx_stats->syncp);
 	}
-	u64_stats_update_end(&rx_stats->syncp);
 
+	skb_pull_rcsum(skb, VLAN_HLEN);
 	vlan_set_encap_proto(skb, vhdr);
 
-	skb = vlan_check_reorder_header(skb);
-	if (!skb) {
-		rx_stats->rx_errors++;
-		goto err_unlock;
+	if (vlan_dev) {
+		skb = vlan_check_reorder_header(skb);
+		if (!skb) {
+			rx_stats->rx_errors++;
+			goto err_unlock;
+		}
 	}
 
 	netif_rx(skb);
-- 
cgit v1.2.2


From 28b041139e344ecd0f144d6205b004ae354cfa1e Mon Sep 17 00:00:00 2001
From: Richard Cochran <richardcochran@gmail.com>
Date: Sat, 17 Jul 2010 08:48:55 +0000
Subject: net: preserve ifreq parameter when calling generic phy_mii_ioctl().

The phy_mii_ioctl() function unnecessarily throws away the original ifreq.
We need access to the ifreq in order to support PHYs that can perform
hardware time stamping.

Two maverick drivers filter the ioctl commands passed to phy_mii_ioctl().
This is unnecessary since phylib will check the command in any case.

Signed-off-by: Richard Cochran <richard.cochran@omicron.at>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/dsa/slave.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/dsa/slave.c b/net/dsa/slave.c
index 8fdca56bb08f..64ca2a6fa0d4 100644
--- a/net/dsa/slave.c
+++ b/net/dsa/slave.c
@@ -164,10 +164,9 @@ out:
 static int dsa_slave_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
 {
 	struct dsa_slave_priv *p = netdev_priv(dev);
-	struct mii_ioctl_data *mii_data = if_mii(ifr);
 
 	if (p->phy != NULL)
-		return phy_mii_ioctl(p->phy, mii_data, cmd);
+		return phy_mii_ioctl(p->phy, ifr, cmd);
 
 	return -EOPNOTSUPP;
 }
-- 
cgit v1.2.2


From c1f19b51d1d87f3e3bb7e6648f43f7d57ed2da6b Mon Sep 17 00:00:00 2001
From: Richard Cochran <richardcochran@gmail.com>
Date: Sat, 17 Jul 2010 08:49:36 +0000
Subject: net: support time stamping in phy devices.

This patch adds a new networking option to allow hardware time stamps
from PHY devices. When enabled, likely candidates among incoming and
outgoing network packets are offered to the PHY driver for possible
time stamping. When accepted by the PHY driver, incoming packets are
deferred for later delivery by the driver.

The patch also adds phylib driver methods for the SIOCSHWTSTAMP ioctl
and callbacks for transmit and receive time stamping. Drivers may
optionally implement these functions.

Signed-off-by: Richard Cochran <richard.cochran@omicron.at>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/Kconfig             |  10 ++++
 net/core/Makefile       |   2 +-
 net/core/dev.c          |   3 ++
 net/core/timestamping.c | 126 ++++++++++++++++++++++++++++++++++++++++++++++++
 net/socket.c            |   4 ++
 5 files changed, 144 insertions(+), 1 deletion(-)
 create mode 100644 net/core/timestamping.c

(limited to 'net')

diff --git a/net/Kconfig b/net/Kconfig
index 0d68b40fc0e6..b3250944cde9 100644
--- a/net/Kconfig
+++ b/net/Kconfig
@@ -86,6 +86,16 @@ config NETWORK_SECMARK
 	  to nfmark, but designated for security purposes.
 	  If you are unsure how to answer this question, answer N.
 
+config NETWORK_PHY_TIMESTAMPING
+	bool "Timestamping in PHY devices"
+	depends on EXPERIMENTAL
+	help
+	  This allows timestamping of network packets by PHYs with
+	  hardware timestamping capabilities. This option adds some
+	  overhead in the transmit and receive paths.
+
+	  If you are unsure how to answer this question, answer N.
+
 menuconfig NETFILTER
 	bool "Network packet filtering framework (Netfilter)"
 	---help---
diff --git a/net/core/Makefile b/net/core/Makefile
index 51c3eec850ef..8a04dd22cf77 100644
--- a/net/core/Makefile
+++ b/net/core/Makefile
@@ -18,4 +18,4 @@ obj-$(CONFIG_NET_DMA) += user_dma.o
 obj-$(CONFIG_FIB_RULES) += fib_rules.o
 obj-$(CONFIG_TRACEPOINTS) += net-traces.o
 obj-$(CONFIG_NET_DROP_MONITOR) += drop_monitor.o
-
+obj-$(CONFIG_NETWORK_PHY_TIMESTAMPING) += timestamping.o
diff --git a/net/core/dev.c b/net/core/dev.c
index e2b9fa2c917e..1c002c7ef5d5 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -2957,6 +2957,9 @@ int netif_receive_skb(struct sk_buff *skb)
 	if (netdev_tstamp_prequeue)
 		net_timestamp_check(skb);
 
+	if (skb_defer_rx_timestamp(skb))
+		return NET_RX_SUCCESS;
+
 #ifdef CONFIG_RPS
 	{
 		struct rps_dev_flow voidflow, *rflow = &voidflow;
diff --git a/net/core/timestamping.c b/net/core/timestamping.c
new file mode 100644
index 000000000000..0ae6c22da85b
--- /dev/null
+++ b/net/core/timestamping.c
@@ -0,0 +1,126 @@
+/*
+ * PTP 1588 clock support - support for timestamping in PHY devices
+ *
+ * Copyright (C) 2010 OMICRON electronics GmbH
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+#include <linux/errqueue.h>
+#include <linux/phy.h>
+#include <linux/ptp_classify.h>
+#include <linux/skbuff.h>
+
+static struct sock_filter ptp_filter[] = {
+	PTP_FILTER
+};
+
+static unsigned int classify(struct sk_buff *skb)
+{
+	if (likely(skb->dev &&
+		   skb->dev->phydev &&
+		   skb->dev->phydev->drv))
+		return sk_run_filter(skb, ptp_filter, ARRAY_SIZE(ptp_filter));
+	else
+		return PTP_CLASS_NONE;
+}
+
+void skb_clone_tx_timestamp(struct sk_buff *skb)
+{
+	struct phy_device *phydev;
+	struct sk_buff *clone;
+	struct sock *sk = skb->sk;
+	unsigned int type;
+
+	if (!sk)
+		return;
+
+	type = classify(skb);
+
+	switch (type) {
+	case PTP_CLASS_V1_IPV4:
+	case PTP_CLASS_V1_IPV6:
+	case PTP_CLASS_V2_IPV4:
+	case PTP_CLASS_V2_IPV6:
+	case PTP_CLASS_V2_L2:
+	case PTP_CLASS_V2_VLAN:
+		phydev = skb->dev->phydev;
+		if (likely(phydev->drv->txtstamp)) {
+			clone = skb_clone(skb, GFP_ATOMIC);
+			if (!clone)
+				return;
+			clone->sk = sk;
+			phydev->drv->txtstamp(phydev, clone, type);
+		}
+		break;
+	default:
+		break;
+	}
+}
+
+void skb_complete_tx_timestamp(struct sk_buff *skb,
+			       struct skb_shared_hwtstamps *hwtstamps)
+{
+	struct sock *sk = skb->sk;
+	struct sock_exterr_skb *serr;
+	int err;
+
+	if (!hwtstamps)
+		return;
+
+	*skb_hwtstamps(skb) = *hwtstamps;
+	serr = SKB_EXT_ERR(skb);
+	memset(serr, 0, sizeof(*serr));
+	serr->ee.ee_errno = ENOMSG;
+	serr->ee.ee_origin = SO_EE_ORIGIN_TIMESTAMPING;
+	skb->sk = NULL;
+	err = sock_queue_err_skb(sk, skb);
+	if (err)
+		kfree_skb(skb);
+}
+EXPORT_SYMBOL_GPL(skb_complete_tx_timestamp);
+
+bool skb_defer_rx_timestamp(struct sk_buff *skb)
+{
+	struct phy_device *phydev;
+	unsigned int type;
+
+	skb_push(skb, ETH_HLEN);
+
+	type = classify(skb);
+
+	skb_pull(skb, ETH_HLEN);
+
+	switch (type) {
+	case PTP_CLASS_V1_IPV4:
+	case PTP_CLASS_V1_IPV6:
+	case PTP_CLASS_V2_IPV4:
+	case PTP_CLASS_V2_IPV6:
+	case PTP_CLASS_V2_L2:
+	case PTP_CLASS_V2_VLAN:
+		phydev = skb->dev->phydev;
+		if (likely(phydev->drv->rxtstamp))
+			return phydev->drv->rxtstamp(phydev, skb, type);
+		break;
+	default:
+		break;
+	}
+
+	return false;
+}
+
+void __init skb_timestamping_init(void)
+{
+	BUG_ON(sk_chk_filter(ptp_filter, ARRAY_SIZE(ptp_filter)));
+}
diff --git a/net/socket.c b/net/socket.c
index 6fe484122a44..2270b941bcc7 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -2394,6 +2394,10 @@ static int __init sock_init(void)
 	netfilter_init();
 #endif
 
+#ifdef CONFIG_NETWORK_PHY_TIMESTAMPING
+	skb_timestamping_init();
+#endif
+
 	return 0;
 }
 
-- 
cgit v1.2.2


From 6accec76f6889f85dd62cadefe18afb8343558d1 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Sun, 18 Jul 2010 19:52:55 -0700
Subject: sch_atm: Convert to use standard list_head facilities.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/sch_atm.c | 98 ++++++++++++++++++++++-------------------------------
 1 file changed, 41 insertions(+), 57 deletions(-)

(limited to 'net')

diff --git a/net/sched/sch_atm.c b/net/sched/sch_atm.c
index fcbb86a486a2..e114f23d5eae 100644
--- a/net/sched/sch_atm.c
+++ b/net/sched/sch_atm.c
@@ -52,7 +52,7 @@ struct atm_flow_data {
 	int			ref;		/* reference count */
 	struct gnet_stats_basic_packed	bstats;
 	struct gnet_stats_queue	qstats;
-	struct atm_flow_data	*next;
+	struct list_head	list;
 	struct atm_flow_data	*excess;	/* flow for excess traffic;
 						   NULL to set CLP instead */
 	int			hdr_len;
@@ -61,34 +61,23 @@ struct atm_flow_data {
 
 struct atm_qdisc_data {
 	struct atm_flow_data	link;		/* unclassified skbs go here */
-	struct atm_flow_data	*flows;		/* NB: "link" is also on this
+	struct list_head	flows;		/* NB: "link" is also on this
 						   list */
 	struct tasklet_struct	task;		/* dequeue tasklet */
 };
 
 /* ------------------------- Class/flow operations ------------------------- */
 
-static int find_flow(struct atm_qdisc_data *qdisc, struct atm_flow_data *flow)
-{
-	struct atm_flow_data *walk;
-
-	pr_debug("find_flow(qdisc %p,flow %p)\n", qdisc, flow);
-	for (walk = qdisc->flows; walk; walk = walk->next)
-		if (walk == flow)
-			return 1;
-	pr_debug("find_flow: not found\n");
-	return 0;
-}
-
 static inline struct atm_flow_data *lookup_flow(struct Qdisc *sch, u32 classid)
 {
 	struct atm_qdisc_data *p = qdisc_priv(sch);
 	struct atm_flow_data *flow;
 
-	for (flow = p->flows; flow; flow = flow->next)
+	list_for_each_entry(flow, &p->flows, list) {
 		if (flow->classid == classid)
-			break;
-	return flow;
+			return flow;
+	}
+	return NULL;
 }
 
 static int atm_tc_graft(struct Qdisc *sch, unsigned long arg,
@@ -99,7 +88,7 @@ static int atm_tc_graft(struct Qdisc *sch, unsigned long arg,
 
 	pr_debug("atm_tc_graft(sch %p,[qdisc %p],flow %p,new %p,old %p)\n",
 		sch, p, flow, new, old);
-	if (!find_flow(p, flow))
+	if (list_empty(&flow->list))
 		return -EINVAL;
 	if (!new)
 		new = &noop_qdisc;
@@ -146,20 +135,12 @@ static void atm_tc_put(struct Qdisc *sch, unsigned long cl)
 {
 	struct atm_qdisc_data *p = qdisc_priv(sch);
 	struct atm_flow_data *flow = (struct atm_flow_data *)cl;
-	struct atm_flow_data **prev;
 
 	pr_debug("atm_tc_put(sch %p,[qdisc %p],flow %p)\n", sch, p, flow);
 	if (--flow->ref)
 		return;
 	pr_debug("atm_tc_put: destroying\n");
-	for (prev = &p->flows; *prev; prev = &(*prev)->next)
-		if (*prev == flow)
-			break;
-	if (!*prev) {
-		printk(KERN_CRIT "atm_tc_put: class %p not found\n", flow);
-		return;
-	}
-	*prev = flow->next;
+	list_del_init(&flow->list);
 	pr_debug("atm_tc_put: qdisc %p\n", flow->q);
 	qdisc_destroy(flow->q);
 	tcf_destroy_chain(&flow->filter_list);
@@ -274,7 +255,7 @@ static int atm_tc_change(struct Qdisc *sch, u32 classid, u32 parent,
 			error = -EINVAL;
 			goto err_out;
 		}
-		if (find_flow(p, flow)) {
+		if (!list_empty(&flow->list)) {
 			error = -EEXIST;
 			goto err_out;
 		}
@@ -313,8 +294,7 @@ static int atm_tc_change(struct Qdisc *sch, u32 classid, u32 parent,
 	flow->classid = classid;
 	flow->ref = 1;
 	flow->excess = excess;
-	flow->next = p->link.next;
-	p->link.next = flow;
+	list_add(&flow->list, &p->link.list);
 	flow->hdr_len = hdr_len;
 	if (hdr)
 		memcpy(flow->hdr, hdr, hdr_len);
@@ -335,7 +315,7 @@ static int atm_tc_delete(struct Qdisc *sch, unsigned long arg)
 	struct atm_flow_data *flow = (struct atm_flow_data *)arg;
 
 	pr_debug("atm_tc_delete(sch %p,[qdisc %p],flow %p)\n", sch, p, flow);
-	if (!find_flow(qdisc_priv(sch), flow))
+	if (list_empty(&flow->list))
 		return -EINVAL;
 	if (flow->filter_list || flow == &p->link)
 		return -EBUSY;
@@ -361,12 +341,12 @@ static void atm_tc_walk(struct Qdisc *sch, struct qdisc_walker *walker)
 	pr_debug("atm_tc_walk(sch %p,[qdisc %p],walker %p)\n", sch, p, walker);
 	if (walker->stop)
 		return;
-	for (flow = p->flows; flow; flow = flow->next) {
-		if (walker->count >= walker->skip)
-			if (walker->fn(sch, (unsigned long)flow, walker) < 0) {
-				walker->stop = 1;
-				break;
-			}
+	list_for_each_entry(flow, &p->flows, list) {
+		if (walker->count >= walker->skip &&
+		    walker->fn(sch, (unsigned long)flow, walker) < 0) {
+			walker->stop = 1;
+			break;
+		}
 		walker->count++;
 	}
 }
@@ -385,16 +365,17 @@ static struct tcf_proto **atm_tc_find_tcf(struct Qdisc *sch, unsigned long cl)
 static int atm_tc_enqueue(struct sk_buff *skb, struct Qdisc *sch)
 {
 	struct atm_qdisc_data *p = qdisc_priv(sch);
-	struct atm_flow_data *flow = NULL;	/* @@@ */
+	struct atm_flow_data *flow;
 	struct tcf_result res;
 	int result;
 	int ret = NET_XMIT_POLICED;
 
 	pr_debug("atm_tc_enqueue(skb %p,sch %p,[qdisc %p])\n", skb, sch, p);
 	result = TC_POLICE_OK;	/* be nice to gcc */
+	flow = NULL;
 	if (TC_H_MAJ(skb->priority) != sch->handle ||
-	    !(flow = (struct atm_flow_data *)atm_tc_get(sch, skb->priority)))
-		for (flow = p->flows; flow; flow = flow->next)
+	    !(flow = (struct atm_flow_data *)atm_tc_get(sch, skb->priority))) {
+		list_for_each_entry(flow, &p->flows, list) {
 			if (flow->filter_list) {
 				result = tc_classify_compat(skb,
 							    flow->filter_list,
@@ -404,8 +385,13 @@ static int atm_tc_enqueue(struct sk_buff *skb, struct Qdisc *sch)
 				flow = (struct atm_flow_data *)res.class;
 				if (!flow)
 					flow = lookup_flow(sch, res.classid);
-				break;
+				goto done;
 			}
+		}
+		flow = NULL;
+	done:
+		;		
+	}
 	if (!flow)
 		flow = &p->link;
 	else {
@@ -477,7 +463,9 @@ static void sch_atm_dequeue(unsigned long data)
 	struct sk_buff *skb;
 
 	pr_debug("sch_atm_dequeue(sch %p,[qdisc %p])\n", sch, p);
-	for (flow = p->link.next; flow; flow = flow->next)
+	list_for_each_entry(flow, &p->flows, list) {
+		if (flow == &p->link)
+			continue;
 		/*
 		 * If traffic is properly shaped, this won't generate nasty
 		 * little bursts. Otherwise, it may ... (but that's okay)
@@ -512,6 +500,7 @@ static void sch_atm_dequeue(unsigned long data)
 			/* atm.atm_options are already set by atm_tc_enqueue */
 			flow->vcc->send(flow->vcc, skb);
 		}
+	}
 }
 
 static struct sk_buff *atm_tc_dequeue(struct Qdisc *sch)
@@ -543,9 +532,10 @@ static unsigned int atm_tc_drop(struct Qdisc *sch)
 	unsigned int len;
 
 	pr_debug("atm_tc_drop(sch %p,[qdisc %p])\n", sch, p);
-	for (flow = p->flows; flow; flow = flow->next)
+	list_for_each_entry(flow, &p->flows, list) {
 		if (flow->q->ops->drop && (len = flow->q->ops->drop(flow->q)))
 			return len;
+	}
 	return 0;
 }
 
@@ -554,7 +544,9 @@ static int atm_tc_init(struct Qdisc *sch, struct nlattr *opt)
 	struct atm_qdisc_data *p = qdisc_priv(sch);
 
 	pr_debug("atm_tc_init(sch %p,[qdisc %p],opt %p)\n", sch, p, opt);
-	p->flows = &p->link;
+	INIT_LIST_HEAD(&p->flows);
+	INIT_LIST_HEAD(&p->link.list);
+	list_add(&p->link.list, &p->flows);
 	p->link.q = qdisc_create_dflt(qdisc_dev(sch), sch->dev_queue,
 				      &pfifo_qdisc_ops, sch->handle);
 	if (!p->link.q)
@@ -565,7 +557,6 @@ static int atm_tc_init(struct Qdisc *sch, struct nlattr *opt)
 	p->link.sock = NULL;
 	p->link.classid = sch->handle;
 	p->link.ref = 1;
-	p->link.next = NULL;
 	tasklet_init(&p->task, sch_atm_dequeue, (unsigned long)sch);
 	return 0;
 }
@@ -576,7 +567,7 @@ static void atm_tc_reset(struct Qdisc *sch)
 	struct atm_flow_data *flow;
 
 	pr_debug("atm_tc_reset(sch %p,[qdisc %p])\n", sch, p);
-	for (flow = p->flows; flow; flow = flow->next)
+	list_for_each_entry(flow, &p->flows, list)
 		qdisc_reset(flow->q);
 	sch->q.qlen = 0;
 }
@@ -584,24 +575,17 @@ static void atm_tc_reset(struct Qdisc *sch)
 static void atm_tc_destroy(struct Qdisc *sch)
 {
 	struct atm_qdisc_data *p = qdisc_priv(sch);
-	struct atm_flow_data *flow;
+	struct atm_flow_data *flow, *tmp;
 
 	pr_debug("atm_tc_destroy(sch %p,[qdisc %p])\n", sch, p);
-	for (flow = p->flows; flow; flow = flow->next)
+	list_for_each_entry(flow, &p->flows, list)
 		tcf_destroy_chain(&flow->filter_list);
 
-	/* races ? */
-	while ((flow = p->flows)) {
+	list_for_each_entry_safe(flow, tmp, &p->flows, list) {
 		if (flow->ref > 1)
 			printk(KERN_ERR "atm_destroy: %p->ref = %d\n", flow,
 			       flow->ref);
 		atm_tc_put(sch, (unsigned long)flow);
-		if (p->flows == flow) {
-			printk(KERN_ERR "atm_destroy: putting flow %p didn't "
-			       "kill it\n", flow);
-			p->flows = flow->next;	/* brute force */
-			break;
-		}
 	}
 	tasklet_kill(&p->task);
 }
@@ -615,7 +599,7 @@ static int atm_tc_dump_class(struct Qdisc *sch, unsigned long cl,
 
 	pr_debug("atm_tc_dump_class(sch %p,[qdisc %p],flow %p,skb %p,tcm %p)\n",
 		sch, p, flow, skb, tcm);
-	if (!find_flow(p, flow))
+	if (list_empty(&flow->list))
 		return -EINVAL;
 	tcm->tcm_handle = flow->classid;
 	tcm->tcm_info = flow->q->handle;
-- 
cgit v1.2.2


From bd27290a593f80cb99e95287cb29c72c0d57608b Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Mon, 19 Jul 2010 09:35:40 -0700
Subject: net: 64bit stats for netdev_queue

Since struct netdev_queue tx_bytes/tx_packets/tx_dropped are already
protected by _xmit_lock, its easy to convert these fields to u64 instead
of unsigned long.
This completes 64bit stats for devices using them (vlan, macvlan, ...)

Strictly, we could avoid the locking in dev_txq_stats_fold() on 64bit
arches, but its slow path and we prefer keep it simple.

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/dev.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/core/dev.c b/net/core/dev.c
index 1c002c7ef5d5..9de75cdade56 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -5282,15 +5282,17 @@ void netdev_run_todo(void)
 void dev_txq_stats_fold(const struct net_device *dev,
 			struct rtnl_link_stats64 *stats)
 {
-	unsigned long tx_bytes = 0, tx_packets = 0, tx_dropped = 0;
+	u64 tx_bytes = 0, tx_packets = 0, tx_dropped = 0;
 	unsigned int i;
 	struct netdev_queue *txq;
 
 	for (i = 0; i < dev->num_tx_queues; i++) {
 		txq = netdev_get_tx_queue(dev, i);
+		spin_lock_bh(&txq->_xmit_lock);
 		tx_bytes   += txq->tx_bytes;
 		tx_packets += txq->tx_packets;
 		tx_dropped += txq->tx_dropped;
+		spin_unlock_bh(&txq->_xmit_lock);
 	}
 	if (tx_bytes || tx_packets || tx_dropped) {
 		stats->tx_bytes   = tx_bytes;
-- 
cgit v1.2.2


From 45e77d314585869dfe43c82679f7e08c9b35b898 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= <ilpo.jarvinen@helsinki.fi>
Date: Mon, 19 Jul 2010 01:16:18 +0000
Subject: tcp: fix crash in tcp_xmit_retransmit_queue
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

It can happen that there are no packets in queue while calling
tcp_xmit_retransmit_queue(). tcp_write_queue_head() then returns
NULL and that gets deref'ed to get sacked into a local var.

There is no work to do if no packets are outstanding so we just
exit early.

This oops was introduced by 08ebd1721ab8fd (tcp: remove tp->lost_out
guard to make joining diff nicer).

Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@helsinki.fi>
Reported-by: Lennart Schulte <lennart.schulte@nets.rwth-aachen.de>
Tested-by: Lennart Schulte <lennart.schulte@nets.rwth-aachen.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp_output.c | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'net')

diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index b4ed957f201a..7ed9dc1042d1 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -2208,6 +2208,9 @@ void tcp_xmit_retransmit_queue(struct sock *sk)
 	int mib_idx;
 	int fwd_rexmitting = 0;
 
+	if (!tp->packets_out)
+		return;
+
 	if (!tp->lost_out)
 		tp->retransmit_high = tp->snd_una;
 
-- 
cgit v1.2.2


From d6d9ca0fec6aea0f2e4064474a1c5cdbed873c63 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Mon, 19 Jul 2010 10:48:49 +0000
Subject: net: this_cpu_xxx conversions

Use modern this_cpu_xxx() api, saving few bytes on x86

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/sock.c         | 5 ++---
 net/sched/sch_generic.c | 2 +-
 2 files changed, 3 insertions(+), 4 deletions(-)

(limited to 'net')

diff --git a/net/core/sock.c b/net/core/sock.c
index 363bc260157c..b05b9b6ddb87 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -2232,8 +2232,7 @@ static DECLARE_BITMAP(proto_inuse_idx, PROTO_INUSE_NR);
 #ifdef CONFIG_NET_NS
 void sock_prot_inuse_add(struct net *net, struct proto *prot, int val)
 {
-	int cpu = smp_processor_id();
-	per_cpu_ptr(net->core.inuse, cpu)->val[prot->inuse_idx] += val;
+	__this_cpu_add(net->core.inuse->val[prot->inuse_idx], val);
 }
 EXPORT_SYMBOL_GPL(sock_prot_inuse_add);
 
@@ -2279,7 +2278,7 @@ static DEFINE_PER_CPU(struct prot_inuse, prot_inuse);
 
 void sock_prot_inuse_add(struct net *net, struct proto *prot, int val)
 {
-	__get_cpu_var(prot_inuse).val[prot->inuse_idx] += val;
+	__this_cpu_add(prot_inuse.val[prot->inuse_idx], val);
 }
 EXPORT_SYMBOL_GPL(sock_prot_inuse_add);
 
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index d20fcd2a5519..2aeb3a4386a1 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -96,7 +96,7 @@ static inline int handle_dev_cpu_collision(struct sk_buff *skb,
 		 * Another cpu is holding lock, requeue & delay xmits for
 		 * some time.
 		 */
-		__get_cpu_var(softnet_data).cpu_collision++;
+		__this_cpu_inc(softnet_data.cpu_collision);
 		ret = dev_requeue_skb(skb, q);
 	}
 
-- 
cgit v1.2.2


From 573201f36fd9c7c6d5218cdcd9948cee700b277d Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Mon, 19 Jul 2010 19:26:45 +0000
Subject: bridge: Partially disable netpoll support

The new netpoll code in bridging contains use-after-free bugs
that are non-trivial to fix.

This patch fixes this by removing the code that uses skbs after
they're freed.

As a consequence, this means that we can no longer call bridge
from the netpoll path, so this patch also removes the controller
function in order to disable netpoll.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>

Thanks,
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/bridge/br_device.c  |  9 ---------
 net/bridge/br_forward.c | 23 +----------------------
 2 files changed, 1 insertion(+), 31 deletions(-)

(limited to 'net')

diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c
index eedf2c94820e..753fc4221f3c 100644
--- a/net/bridge/br_device.c
+++ b/net/bridge/br_device.c
@@ -217,14 +217,6 @@ static bool br_devices_support_netpoll(struct net_bridge *br)
 	return count != 0 && ret;
 }
 
-static void br_poll_controller(struct net_device *br_dev)
-{
-	struct netpoll *np = br_dev->npinfo->netpoll;
-
-	if (np->real_dev != br_dev)
-		netpoll_poll_dev(np->real_dev);
-}
-
 void br_netpoll_cleanup(struct net_device *dev)
 {
 	struct net_bridge *br = netdev_priv(dev);
@@ -295,7 +287,6 @@ static const struct net_device_ops br_netdev_ops = {
 	.ndo_do_ioctl		 = br_dev_ioctl,
 #ifdef CONFIG_NET_POLL_CONTROLLER
 	.ndo_netpoll_cleanup	 = br_netpoll_cleanup,
-	.ndo_poll_controller	 = br_poll_controller,
 #endif
 };
 
diff --git a/net/bridge/br_forward.c b/net/bridge/br_forward.c
index a4e72a89e4ff..595da45f9088 100644
--- a/net/bridge/br_forward.c
+++ b/net/bridge/br_forward.c
@@ -50,14 +50,7 @@ int br_dev_queue_push_xmit(struct sk_buff *skb)
 			kfree_skb(skb);
 		else {
 			skb_push(skb, ETH_HLEN);
-
-#ifdef CONFIG_NET_POLL_CONTROLLER
-			if (unlikely(skb->dev->priv_flags & IFF_IN_NETPOLL)) {
-				netpoll_send_skb(skb->dev->npinfo->netpoll, skb);
-				skb->dev->priv_flags &= ~IFF_IN_NETPOLL;
-			} else
-#endif
-				dev_queue_xmit(skb);
+			dev_queue_xmit(skb);
 		}
 	}
 
@@ -73,23 +66,9 @@ int br_forward_finish(struct sk_buff *skb)
 
 static void __br_deliver(const struct net_bridge_port *to, struct sk_buff *skb)
 {
-#ifdef CONFIG_NET_POLL_CONTROLLER
-	struct net_bridge *br = to->br;
-	if (unlikely(br->dev->priv_flags & IFF_IN_NETPOLL)) {
-		struct netpoll *np;
-		to->dev->npinfo = skb->dev->npinfo;
-		np = skb->dev->npinfo->netpoll;
-		np->real_dev = np->dev = to->dev;
-		to->dev->priv_flags |= IFF_IN_NETPOLL;
-	}
-#endif
 	skb->dev = to->dev;
 	NF_HOOK(NFPROTO_BRIDGE, NF_BR_LOCAL_OUT, skb, NULL, skb->dev,
 		br_forward_finish);
-#ifdef CONFIG_NET_POLL_CONTROLLER
-	if (skb->dev->npinfo)
-		skb->dev->npinfo->netpoll->dev = br->dev;
-#endif
 }
 
 static void __br_forward(const struct net_bridge_port *to, struct sk_buff *skb)
-- 
cgit v1.2.2


From 06ee1c261360545c97fd836fff9dbd10ebd9301b Mon Sep 17 00:00:00 2001
From: "John W. Linville" <linville@tuxdriver.com>
Date: Mon, 19 Jul 2010 11:52:59 -0400
Subject: wireless: use netif_rx_ni in ieee80211_send_layer2_update

These synthetic frames are all triggered from userland requests in
process context.

https://bugzilla.kernel.org/show_bug.cgi?id=16412

Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/cfg.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c
index c7000a6ca379..67ee34f57df7 100644
--- a/net/mac80211/cfg.c
+++ b/net/mac80211/cfg.c
@@ -632,7 +632,7 @@ static void ieee80211_send_layer2_update(struct sta_info *sta)
 	skb->dev = sta->sdata->dev;
 	skb->protocol = eth_type_trans(skb, sta->sdata->dev);
 	memset(skb->cb, 0, sizeof(skb->cb));
-	netif_rx(skb);
+	netif_rx_ni(skb);
 }
 
 static void sta_apply_parameters(struct ieee80211_local *local,
-- 
cgit v1.2.2


From 875ae5f68883c75aad826e715df8ec0619551a07 Mon Sep 17 00:00:00 2001
From: Felix Fietkau <nbd@openwrt.org>
Date: Sat, 17 Jul 2010 15:59:07 +0200
Subject: mac80211: fix aggregation action frame handling with AP VLANs

When aggregation related action frames are enqueued for further work,
and they originate from a STA that is part of an AP VLAN, they are
currently enqueued for the AP interface. This breaks the sta_info_get()
lookup in the actual work function, and because of that, aggregation
sessions are not established for this STA.

Fix this by replacing the sta_info_get call with a call to
sta_info_get_bss.

Signed-off-by: Felix Fietkau <nbd@openwrt.org>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/iface.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c
index 910729fc18cd..8ef2fde6e920 100644
--- a/net/mac80211/iface.c
+++ b/net/mac80211/iface.c
@@ -741,7 +741,7 @@ static void ieee80211_iface_work(struct work_struct *work)
 			int len = skb->len;
 
 			mutex_lock(&local->sta_mtx);
-			sta = sta_info_get(sdata, mgmt->sa);
+			sta = sta_info_get_bss(sdata, mgmt->sa);
 			if (sta) {
 				switch (mgmt->u.action.u.addba_req.action_code) {
 				case WLAN_ACTION_ADDBA_REQ:
@@ -782,7 +782,7 @@ static void ieee80211_iface_work(struct work_struct *work)
 			 * right, so terminate the session.
 			 */
 			mutex_lock(&local->sta_mtx);
-			sta = sta_info_get(sdata, mgmt->sa);
+			sta = sta_info_get_bss(sdata, mgmt->sa);
 			if (sta) {
 				u16 tid = *ieee80211_get_qos_ctl(hdr) &
 						IEEE80211_QOS_CTL_TID_MASK;
-- 
cgit v1.2.2


From 4ced3f74dae18715920cb680098ec7ff4345d0a3 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes@sipsolutions.net>
Date: Mon, 19 Jul 2010 16:39:04 +0200
Subject: mac80211: move QoS-enable to BSS info

Ever since

commit e1b3ec1a2a336c328c336cfa5485a5f0484cc90d
Author: Stanislaw Gruszka <sgruszka@redhat.com>
Date:   Mon Mar 29 12:18:34 2010 +0200

    mac80211: explicitly disable/enable QoS

mac80211 is telling drivers, in particular
iwlwifi, whether QoS is enabled or not.

However, this is only relevant for station mode,
since only then will any device send nullfunc
frames and need to know whether they should be
QoS frames or not. In other modes, there are
(currently) no frames the device is supposed to
send.

When you now consider virtual interfaces, it
becomes apparent that the current mechanism is
inadequate since it enables/disables QoS on a
global scale, where for nullfunc frames it has
to be on a per-interface scale.

Due to the above considerations, we can change
the way mac80211 advertises the QoS state to
drivers to only ever advertise it as "off" in
station mode, and make it a per-BSS setting.

Tested-by: Stanislaw Gruszka <sgruszka@redhat.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/cfg.c  |  4 ----
 net/mac80211/mlme.c | 11 ++++++-----
 net/mac80211/util.c |  7 ++++---
 3 files changed, 10 insertions(+), 12 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c
index 5b8b4460b69f..35b07ea0633a 100644
--- a/net/mac80211/cfg.c
+++ b/net/mac80211/cfg.c
@@ -1154,10 +1154,6 @@ static int ieee80211_set_txq_params(struct wiphy *wiphy,
 		return -EINVAL;
 	}
 
-	/* enable WMM or activate new settings */
-	local->hw.conf.flags |= IEEE80211_CONF_QOS;
-	drv_config(local, IEEE80211_CONF_CHANGE_QOS);
-
 	return 0;
 }
 
diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index d1962650b254..7a4e4bffbc71 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -698,10 +698,11 @@ void ieee80211_dynamic_ps_timer(unsigned long data)
 
 /* MLME */
 static void ieee80211_sta_wmm_params(struct ieee80211_local *local,
-				     struct ieee80211_if_managed *ifmgd,
+				     struct ieee80211_sub_if_data *sdata,
 				     u8 *wmm_param, size_t wmm_param_len)
 {
 	struct ieee80211_tx_queue_params params;
+	struct ieee80211_if_managed *ifmgd = &sdata->u.mgd;
 	size_t left;
 	int count;
 	u8 *pos, uapsd_queues = 0;
@@ -790,8 +791,8 @@ static void ieee80211_sta_wmm_params(struct ieee80211_local *local,
 	}
 
 	/* enable WMM or activate new settings */
-	local->hw.conf.flags |=	IEEE80211_CONF_QOS;
-	drv_config(local, IEEE80211_CONF_CHANGE_QOS);
+	sdata->vif.bss_conf.qos = true;
+	ieee80211_bss_info_change_notify(sdata, BSS_CHANGED_QOS);
 }
 
 static u32 ieee80211_handle_bss_capability(struct ieee80211_sub_if_data *sdata,
@@ -1325,7 +1326,7 @@ static bool ieee80211_assoc_success(struct ieee80211_work *wk,
 	}
 
 	if (elems.wmm_param)
-		ieee80211_sta_wmm_params(local, ifmgd, elems.wmm_param,
+		ieee80211_sta_wmm_params(local, sdata, elems.wmm_param,
 					 elems.wmm_param_len);
 	else
 		ieee80211_set_wmm_default(sdata);
@@ -1597,7 +1598,7 @@ static void ieee80211_rx_mgmt_beacon(struct ieee80211_sub_if_data *sdata,
 		ieee80211_rx_bss_info(sdata, mgmt, len, rx_status, &elems,
 				      true);
 
-		ieee80211_sta_wmm_params(local, ifmgd, elems.wmm_param,
+		ieee80211_sta_wmm_params(local, sdata, elems.wmm_param,
 					 elems.wmm_param_len);
 	}
 
diff --git a/net/mac80211/util.c b/net/mac80211/util.c
index a54cf146ed50..794792177376 100644
--- a/net/mac80211/util.c
+++ b/net/mac80211/util.c
@@ -803,8 +803,8 @@ void ieee80211_set_wmm_default(struct ieee80211_sub_if_data *sdata)
 
 	/* after reinitialize QoS TX queues setting to default,
 	 * disable QoS at all */
-	local->hw.conf.flags &=	~IEEE80211_CONF_QOS;
-	drv_config(local, IEEE80211_CONF_CHANGE_QOS);
+	sdata->vif.bss_conf.qos = sdata->vif.type != NL80211_IFTYPE_STATION;
+	ieee80211_bss_info_change_notify(sdata, BSS_CHANGED_QOS);
 }
 
 void ieee80211_sta_def_wmm_params(struct ieee80211_sub_if_data *sdata,
@@ -1161,7 +1161,8 @@ int ieee80211_reconfig(struct ieee80211_local *local)
 			  BSS_CHANGED_BASIC_RATES |
 			  BSS_CHANGED_BEACON_INT |
 			  BSS_CHANGED_BSSID |
-			  BSS_CHANGED_CQM;
+			  BSS_CHANGED_CQM |
+			  BSS_CHANGED_QOS;
 
 		switch (sdata->vif.type) {
 		case NL80211_IFTYPE_STATION:
-- 
cgit v1.2.2


From 53e9b1de6840d9047f768878adcbd1d116f72aca Mon Sep 17 00:00:00 2001
From: David Gnedt <david.gnedt@davizone.at>
Date: Mon, 19 Jul 2010 20:44:02 +0200
Subject: mac80211: set carrier on for monitor interfaces on ieee80211_open

If a station interface is reused as monitor interface it is possible that
the carrier is still set to off. This breaks packet injection on that
monitor interface.
Force the carrier on in monitor interface initialisation like it is also done
for other interface types (e.g. adhoc, mesh point, ap).

Signed-off-by: David Gnedt <david.gnedt@davizone.at>
Acked-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/iface.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'net')

diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c
index 8ef2fde6e920..ebbe264e2b0b 100644
--- a/net/mac80211/iface.c
+++ b/net/mac80211/iface.c
@@ -249,6 +249,8 @@ static int ieee80211_open(struct net_device *dev)
 			local->fif_other_bss++;
 
 		ieee80211_configure_filter(local);
+
+		netif_carrier_on(dev);
 		break;
 	default:
 		res = drv_add_interface(local, &sdata->vif);
-- 
cgit v1.2.2


From d79d991379af35d43f003f162e8650e72965b8ca Mon Sep 17 00:00:00 2001
From: Nicolas Dichtel <nicolas.dichtel@6wind.com>
Date: Mon, 19 Jul 2010 23:51:38 +0000
Subject: __dst_free(): put EXPORT_SYMBOLS after the fct

Signed-off-by: Nicolas Dichtel <nicolas.dichtel@6wind.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/dst.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/core/dst.c b/net/core/dst.c
index 9920722cc82b..6c41b1fac3db 100644
--- a/net/core/dst.c
+++ b/net/core/dst.c
@@ -197,7 +197,6 @@ static void ___dst_free(struct dst_entry *dst)
 		dst->input = dst->output = dst_discard;
 	dst->obsolete = 2;
 }
-EXPORT_SYMBOL(__dst_free);
 
 void __dst_free(struct dst_entry *dst)
 {
@@ -213,6 +212,7 @@ void __dst_free(struct dst_entry *dst)
 	}
 	spin_unlock_bh(&dst_garbage.lock);
 }
+EXPORT_SYMBOL(__dst_free);
 
 struct dst_entry *dst_destroy(struct dst_entry * dst)
 {
-- 
cgit v1.2.2


From 4b706372f18de53970e4c6887a96459590fef80a Mon Sep 17 00:00:00 2001
From: Neil Horman <nhorman@tuxdriver.com>
Date: Tue, 20 Jul 2010 04:52:09 +0000
Subject: drop_monitor: Add error code to detect duplicate state changes

	Patch to add -EAGAIN error to dropwatch netlink message handling code.
-EAGAIN will be returned anytime userspace attempts to transition the state of
the drop monitor service to a state that its already in.  That allows user space
to detect this condition, so it doesn't wait for a success ACK that will never
arrive.  Tested successfully by me

Signed-off-by: Neil Horman <nhorman@tuxdriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/drop_monitor.c | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/core/drop_monitor.c b/net/core/drop_monitor.c
index ad41529fb60f..646ef3bc7200 100644
--- a/net/core/drop_monitor.c
+++ b/net/core/drop_monitor.c
@@ -223,6 +223,11 @@ static int set_all_monitor_traces(int state)
 
 	spin_lock(&trace_state_lock);
 
+	if (state == trace_state) {
+		rc = -EAGAIN;
+		goto out_unlock;
+	}
+
 	switch (state) {
 	case TRACE_ON:
 		rc |= register_trace_kfree_skb(trace_kfree_skb_hit, NULL);
@@ -251,11 +256,12 @@ static int set_all_monitor_traces(int state)
 
 	if (!rc)
 		trace_state = state;
+	else
+		rc = -EINPROGRESS;
 
+out_unlock:
 	spin_unlock(&trace_state_lock);
 
-	if (rc)
-		return -EINPROGRESS;
 	return rc;
 }
 
-- 
cgit v1.2.2


From 70d4bf6d467a330ccc947df9b2608e329d9e7708 Mon Sep 17 00:00:00 2001
From: Neil Horman <nhorman@tuxdriver.com>
Date: Tue, 20 Jul 2010 06:45:56 +0000
Subject: drop_monitor: convert some kfree_skb call sites to consume_skb

Convert a few calls from kfree_skb to consume_skb

Noticed while I was working on dropwatch that I was detecting lots of internal
skb drops in several places.  While some are legitimate, several were not,
freeing skbs that were at the end of their life, rather than being discarded due
to an error.  This patch converts those calls sites from using kfree_skb to
consume_skb, which quiets the in-kernel drop_monitor code from detecting them as
drops.  Tested successfully by myself

Signed-off-by: Neil Horman <nhorman@tuxdriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/netlink/af_netlink.c | 9 +++++----
 net/unix/af_unix.c       | 2 +-
 2 files changed, 6 insertions(+), 5 deletions(-)

(limited to 'net')

diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index 7aeaa83193db..8648a9922aab 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -1076,14 +1076,15 @@ int netlink_broadcast_filtered(struct sock *ssk, struct sk_buff *skb, u32 pid,
 	sk_for_each_bound(sk, node, &nl_table[ssk->sk_protocol].mc_list)
 		do_one_broadcast(sk, &info);
 
-	kfree_skb(skb);
+	consume_skb(skb);
 
 	netlink_unlock_table();
 
-	kfree_skb(info.skb2);
-
-	if (info.delivery_failure)
+	if (info.delivery_failure) {
+		kfree_skb(info.skb2);
 		return -ENOBUFS;
+	} else
+		consume_skb(info.skb2);
 
 	if (info.delivered) {
 		if (info.congested && (allocation & __GFP_WAIT))
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index 75ba48b0d12a..4414a18c63b4 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -1906,7 +1906,7 @@ static int unix_stream_recvmsg(struct kiocb *iocb, struct socket *sock,
 				break;
 			}
 
-			kfree_skb(skb);
+			consume_skb(skb);
 
 			if (siocb->scm->fp)
 				break;
-- 
cgit v1.2.2


From 4f366c5dabcb936dd5754a35188bd699181fe1ce Mon Sep 17 00:00:00 2001
From: "John W. Linville" <linville@tuxdriver.com>
Date: Thu, 15 Jul 2010 14:57:33 -0400
Subject: wireless: only use alpha2 regulatory information from country IE

The meaning and/or usage of the country IE is somewhat poorly defined.
In practice, this means that regulatory rulesets in a country IE are
often incomplete and might be untrustworthy.  This removes the code
associated with interpreting those rulesets while preserving respect
for country "alpha2" codes also contained in the country IE.

Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/wireless/reg.c | 625 +----------------------------------------------------
 1 file changed, 12 insertions(+), 613 deletions(-)

(limited to 'net')

diff --git a/net/wireless/reg.c b/net/wireless/reg.c
index 1ac2bdd46ecf..678d0bd433f0 100644
--- a/net/wireless/reg.c
+++ b/net/wireless/reg.c
@@ -66,18 +66,10 @@ static struct platform_device *reg_pdev;
  */
 const struct ieee80211_regdomain *cfg80211_regdomain;
 
-/*
- * We use this as a place for the rd structure built from the
- * last parsed country IE to rest until CRDA gets back to us with
- * what it thinks should apply for the same country
- */
-static const struct ieee80211_regdomain *country_ie_regdomain;
-
 /*
  * Protects static reg.c components:
  *     - cfg80211_world_regdom
  *     - cfg80211_regdom
- *     - country_ie_regdomain
  *     - last_request
  */
 static DEFINE_MUTEX(reg_mutex);
@@ -275,25 +267,6 @@ static bool is_user_regdom_saved(void)
 	return true;
 }
 
-/**
- * country_ie_integrity_changes - tells us if the country IE has changed
- * @checksum: checksum of country IE of fields we are interested in
- *
- * If the country IE has not changed you can ignore it safely. This is
- * useful to determine if two devices are seeing two different country IEs
- * even on the same alpha2. Note that this will return false if no IE has
- * been set on the wireless core yet.
- */
-static bool country_ie_integrity_changes(u32 checksum)
-{
-	/* If no IE has been set then the checksum doesn't change */
-	if (unlikely(!last_request->country_ie_checksum))
-		return false;
-	if (unlikely(last_request->country_ie_checksum != checksum))
-		return true;
-	return false;
-}
-
 static int reg_copy_regd(const struct ieee80211_regdomain **dst_regd,
 			 const struct ieee80211_regdomain *src_regd)
 {
@@ -505,471 +478,6 @@ static bool freq_in_rule_band(const struct ieee80211_freq_range *freq_range,
 #undef ONE_GHZ_IN_KHZ
 }
 
-/*
- * This is a work around for sanity checking ieee80211_channel_to_frequency()'s
- * work. ieee80211_channel_to_frequency() can for example currently provide a
- * 2 GHz channel when in fact a 5 GHz channel was desired. An example would be
- * an AP providing channel 8 on a country IE triplet when it sent this on the
- * 5 GHz band, that channel is designed to be channel 8 on 5 GHz, not a 2 GHz
- * channel.
- *
- * This can be removed once ieee80211_channel_to_frequency() takes in a band.
- */
-static bool chan_in_band(int chan, enum ieee80211_band band)
-{
-	int center_freq = ieee80211_channel_to_frequency(chan);
-
-	switch (band) {
-	case IEEE80211_BAND_2GHZ:
-		if (center_freq <= 2484)
-			return true;
-		return false;
-	case IEEE80211_BAND_5GHZ:
-		if (center_freq >= 5005)
-			return true;
-		return false;
-	default:
-		return false;
-	}
-}
-
-/*
- * Some APs may send a country IE triplet for each channel they
- * support and while this is completely overkill and silly we still
- * need to support it. We avoid making a single rule for each channel
- * though and to help us with this we use this helper to find the
- * actual subband end channel. These type of country IE triplet
- * scenerios are handled then, all yielding two regulaotry rules from
- * parsing a country IE:
- *
- * [1]
- * [2]
- * [36]
- * [40]
- *
- * [1]
- * [2-4]
- * [5-12]
- * [36]
- * [40-44]
- *
- * [1-4]
- * [5-7]
- * [36-44]
- * [48-64]
- *
- * [36-36]
- * [40-40]
- * [44-44]
- * [48-48]
- * [52-52]
- * [56-56]
- * [60-60]
- * [64-64]
- * [100-100]
- * [104-104]
- * [108-108]
- * [112-112]
- * [116-116]
- * [120-120]
- * [124-124]
- * [128-128]
- * [132-132]
- * [136-136]
- * [140-140]
- *
- * Returns 0 if the IE has been found to be invalid in the middle
- * somewhere.
- */
-static int max_subband_chan(enum ieee80211_band band,
-			    int orig_cur_chan,
-			    int orig_end_channel,
-			    s8 orig_max_power,
-			    u8 **country_ie,
-			    u8 *country_ie_len)
-{
-	u8 *triplets_start = *country_ie;
-	u8 len_at_triplet = *country_ie_len;
-	int end_subband_chan = orig_end_channel;
-
-	/*
-	 * We'll deal with padding for the caller unless
-	 * its not immediate and we don't process any channels
-	 */
-	if (*country_ie_len == 1) {
-		*country_ie += 1;
-		*country_ie_len -= 1;
-		return orig_end_channel;
-	}
-
-	/* Move to the next triplet and then start search */
-	*country_ie += 3;
-	*country_ie_len -= 3;
-
-	if (!chan_in_band(orig_cur_chan, band))
-		return 0;
-
-	while (*country_ie_len >= 3) {
-		int end_channel = 0;
-		struct ieee80211_country_ie_triplet *triplet =
-			(struct ieee80211_country_ie_triplet *) *country_ie;
-		int cur_channel = 0, next_expected_chan;
-
-		/* means last triplet is completely unrelated to this one */
-		if (triplet->ext.reg_extension_id >=
-				IEEE80211_COUNTRY_EXTENSION_ID) {
-			*country_ie -= 3;
-			*country_ie_len += 3;
-			break;
-		}
-
-		if (triplet->chans.first_channel == 0) {
-			*country_ie += 1;
-			*country_ie_len -= 1;
-			if (*country_ie_len != 0)
-				return 0;
-			break;
-		}
-
-		if (triplet->chans.num_channels == 0)
-			return 0;
-
-		/* Monitonically increasing channel order */
-		if (triplet->chans.first_channel <= end_subband_chan)
-			return 0;
-
-		if (!chan_in_band(triplet->chans.first_channel, band))
-			return 0;
-
-		/* 2 GHz */
-		if (triplet->chans.first_channel <= 14) {
-			end_channel = triplet->chans.first_channel +
-				triplet->chans.num_channels - 1;
-		}
-		else {
-			end_channel =  triplet->chans.first_channel +
-				(4 * (triplet->chans.num_channels - 1));
-		}
-
-		if (!chan_in_band(end_channel, band))
-			return 0;
-
-		if (orig_max_power != triplet->chans.max_power) {
-			*country_ie -= 3;
-			*country_ie_len += 3;
-			break;
-		}
-
-		cur_channel = triplet->chans.first_channel;
-
-		/* The key is finding the right next expected channel */
-		if (band == IEEE80211_BAND_2GHZ)
-			next_expected_chan = end_subband_chan + 1;
-		 else
-			next_expected_chan = end_subband_chan + 4;
-
-		if (cur_channel != next_expected_chan) {
-			*country_ie -= 3;
-			*country_ie_len += 3;
-			break;
-		}
-
-		end_subband_chan = end_channel;
-
-		/* Move to the next one */
-		*country_ie += 3;
-		*country_ie_len -= 3;
-
-		/*
-		 * Padding needs to be dealt with if we processed
-		 * some channels.
-		 */
-		if (*country_ie_len == 1) {
-			*country_ie += 1;
-			*country_ie_len -= 1;
-			break;
-		}
-
-		/* If seen, the IE is invalid */
-		if (*country_ie_len == 2)
-			return 0;
-	}
-
-	if (end_subband_chan == orig_end_channel) {
-		*country_ie = triplets_start;
-		*country_ie_len = len_at_triplet;
-		return orig_end_channel;
-	}
-
-	return end_subband_chan;
-}
-
-/*
- * Converts a country IE to a regulatory domain. A regulatory domain
- * structure has a lot of information which the IE doesn't yet have,
- * so for the other values we use upper max values as we will intersect
- * with our userspace regulatory agent to get lower bounds.
- */
-static struct ieee80211_regdomain *country_ie_2_rd(
-				enum ieee80211_band band,
-				u8 *country_ie,
-				u8 country_ie_len,
-				u32 *checksum)
-{
-	struct ieee80211_regdomain *rd = NULL;
-	unsigned int i = 0;
-	char alpha2[2];
-	u32 flags = 0;
-	u32 num_rules = 0, size_of_regd = 0;
-	u8 *triplets_start = NULL;
-	u8 len_at_triplet = 0;
-	/* the last channel we have registered in a subband (triplet) */
-	int last_sub_max_channel = 0;
-
-	*checksum = 0xDEADBEEF;
-
-	/* Country IE requirements */
-	BUG_ON(country_ie_len < IEEE80211_COUNTRY_IE_MIN_LEN ||
-		country_ie_len & 0x01);
-
-	alpha2[0] = country_ie[0];
-	alpha2[1] = country_ie[1];
-
-	/*
-	 * Third octet can be:
-	 *    'I' - Indoor
-	 *    'O' - Outdoor
-	 *
-	 *  anything else we assume is no restrictions
-	 */
-	if (country_ie[2] == 'I')
-		flags = NL80211_RRF_NO_OUTDOOR;
-	else if (country_ie[2] == 'O')
-		flags = NL80211_RRF_NO_INDOOR;
-
-	country_ie += 3;
-	country_ie_len -= 3;
-
-	triplets_start = country_ie;
-	len_at_triplet = country_ie_len;
-
-	*checksum ^= ((flags ^ alpha2[0] ^ alpha2[1]) << 8);
-
-	/*
-	 * We need to build a reg rule for each triplet, but first we must
-	 * calculate the number of reg rules we will need. We will need one
-	 * for each channel subband
-	 */
-	while (country_ie_len >= 3) {
-		int end_channel = 0;
-		struct ieee80211_country_ie_triplet *triplet =
-			(struct ieee80211_country_ie_triplet *) country_ie;
-		int cur_sub_max_channel = 0, cur_channel = 0;
-
-		if (triplet->ext.reg_extension_id >=
-				IEEE80211_COUNTRY_EXTENSION_ID) {
-			country_ie += 3;
-			country_ie_len -= 3;
-			continue;
-		}
-
-		/*
-		 * APs can add padding to make length divisible
-		 * by two, required by the spec.
-		 */
-		if (triplet->chans.first_channel == 0) {
-			country_ie++;
-			country_ie_len--;
-			/* This is expected to be at the very end only */
-			if (country_ie_len != 0)
-				return NULL;
-			break;
-		}
-
-		if (triplet->chans.num_channels == 0)
-			return NULL;
-
-		if (!chan_in_band(triplet->chans.first_channel, band))
-			return NULL;
-
-		/* 2 GHz */
-		if (band == IEEE80211_BAND_2GHZ)
-			end_channel = triplet->chans.first_channel +
-				triplet->chans.num_channels - 1;
-		else
-			/*
-			 * 5 GHz -- For example in country IEs if the first
-			 * channel given is 36 and the number of channels is 4
-			 * then the individual channel numbers defined for the
-			 * 5 GHz PHY by these parameters are: 36, 40, 44, and 48
-			 * and not 36, 37, 38, 39.
-			 *
-			 * See: http://tinyurl.com/11d-clarification
-			 */
-			end_channel =  triplet->chans.first_channel +
-				(4 * (triplet->chans.num_channels - 1));
-
-		cur_channel = triplet->chans.first_channel;
-
-		/*
-		 * Enhancement for APs that send a triplet for every channel
-		 * or for whatever reason sends triplets with multiple channels
-		 * separated when in fact they should be together.
-		 */
-		end_channel = max_subband_chan(band,
-					       cur_channel,
-					       end_channel,
-					       triplet->chans.max_power,
-					       &country_ie,
-					       &country_ie_len);
-		if (!end_channel)
-			return NULL;
-
-		if (!chan_in_band(end_channel, band))
-			return NULL;
-
-		cur_sub_max_channel = end_channel;
-
-		/* Basic sanity check */
-		if (cur_sub_max_channel < cur_channel)
-			return NULL;
-
-		/*
-		 * Do not allow overlapping channels. Also channels
-		 * passed in each subband must be monotonically
-		 * increasing
-		 */
-		if (last_sub_max_channel) {
-			if (cur_channel <= last_sub_max_channel)
-				return NULL;
-			if (cur_sub_max_channel <= last_sub_max_channel)
-				return NULL;
-		}
-
-		/*
-		 * When dot11RegulatoryClassesRequired is supported
-		 * we can throw ext triplets as part of this soup,
-		 * for now we don't care when those change as we
-		 * don't support them
-		 */
-		*checksum ^= ((cur_channel ^ cur_sub_max_channel) << 8) |
-		  ((cur_sub_max_channel ^ cur_sub_max_channel) << 16) |
-		  ((triplet->chans.max_power ^ cur_sub_max_channel) << 24);
-
-		last_sub_max_channel = cur_sub_max_channel;
-
-		num_rules++;
-
-		if (country_ie_len >= 3) {
-			country_ie += 3;
-			country_ie_len -= 3;
-		}
-
-		/*
-		 * Note: this is not a IEEE requirement but
-		 * simply a memory requirement
-		 */
-		if (num_rules > NL80211_MAX_SUPP_REG_RULES)
-			return NULL;
-	}
-
-	country_ie = triplets_start;
-	country_ie_len = len_at_triplet;
-
-	size_of_regd = sizeof(struct ieee80211_regdomain) +
-		(num_rules * sizeof(struct ieee80211_reg_rule));
-
-	rd = kzalloc(size_of_regd, GFP_KERNEL);
-	if (!rd)
-		return NULL;
-
-	rd->n_reg_rules = num_rules;
-	rd->alpha2[0] = alpha2[0];
-	rd->alpha2[1] = alpha2[1];
-
-	/* This time around we fill in the rd */
-	while (country_ie_len >= 3) {
-		int end_channel = 0;
-		struct ieee80211_country_ie_triplet *triplet =
-			(struct ieee80211_country_ie_triplet *) country_ie;
-		struct ieee80211_reg_rule *reg_rule = NULL;
-		struct ieee80211_freq_range *freq_range = NULL;
-		struct ieee80211_power_rule *power_rule = NULL;
-
-		/*
-		 * Must parse if dot11RegulatoryClassesRequired is true,
-		 * we don't support this yet
-		 */
-		if (triplet->ext.reg_extension_id >=
-				IEEE80211_COUNTRY_EXTENSION_ID) {
-			country_ie += 3;
-			country_ie_len -= 3;
-			continue;
-		}
-
-		if (triplet->chans.first_channel == 0) {
-			country_ie++;
-			country_ie_len--;
-			break;
-		}
-
-		reg_rule = &rd->reg_rules[i];
-		freq_range = &reg_rule->freq_range;
-		power_rule = &reg_rule->power_rule;
-
-		reg_rule->flags = flags;
-
-		/* 2 GHz */
-		if (band == IEEE80211_BAND_2GHZ)
-			end_channel = triplet->chans.first_channel +
-				triplet->chans.num_channels -1;
-		else
-			end_channel =  triplet->chans.first_channel +
-				(4 * (triplet->chans.num_channels - 1));
-
-		end_channel = max_subband_chan(band,
-					       triplet->chans.first_channel,
-					       end_channel,
-					       triplet->chans.max_power,
-					       &country_ie,
-					       &country_ie_len);
-
-		/*
-		 * The +10 is since the regulatory domain expects
-		 * the actual band edge, not the center of freq for
-		 * its start and end freqs, assuming 20 MHz bandwidth on
-		 * the channels passed
-		 */
-		freq_range->start_freq_khz =
-			MHZ_TO_KHZ(ieee80211_channel_to_frequency(
-				triplet->chans.first_channel) - 10);
-		freq_range->end_freq_khz =
-			MHZ_TO_KHZ(ieee80211_channel_to_frequency(
-				end_channel) + 10);
-
-		/*
-		 * These are large arbitrary values we use to intersect later.
-		 * Increment this if we ever support >= 40 MHz channels
-		 * in IEEE 802.11
-		 */
-		freq_range->max_bandwidth_khz = MHZ_TO_KHZ(40);
-		power_rule->max_antenna_gain = DBI_TO_MBI(100);
-		power_rule->max_eirp = DBM_TO_MBM(triplet->chans.max_power);
-
-		i++;
-
-		if (country_ie_len >= 3) {
-			country_ie += 3;
-			country_ie_len -= 3;
-		}
-
-		BUG_ON(i > NL80211_MAX_SUPP_REG_RULES);
-	}
-
-	return rd;
-}
-
-
 /*
  * Helper for regdom_intersect(), this does the real
  * mathematical intersection fun
@@ -1191,7 +699,6 @@ static int freq_reg_info_regd(struct wiphy *wiphy,
 
 	return -EINVAL;
 }
-EXPORT_SYMBOL(freq_reg_info);
 
 int freq_reg_info(struct wiphy *wiphy,
 		  u32 center_freq,
@@ -1205,6 +712,7 @@ int freq_reg_info(struct wiphy *wiphy,
 				  reg_rule,
 				  NULL);
 }
+EXPORT_SYMBOL(freq_reg_info);
 
 /*
  * Note that right now we assume the desired channel bandwidth
@@ -1243,41 +751,8 @@ static void handle_channel(struct wiphy *wiphy, enum ieee80211_band band,
 			  desired_bw_khz,
 			  &reg_rule);
 
-	if (r) {
-		/*
-		 * This means no regulatory rule was found in the country IE
-		 * with a frequency range on the center_freq's band, since
-		 * IEEE-802.11 allows for a country IE to have a subset of the
-		 * regulatory information provided in a country we ignore
-		 * disabling the channel unless at least one reg rule was
-		 * found on the center_freq's band. For details see this
-		 * clarification:
-		 *
-		 * http://tinyurl.com/11d-clarification
-		 */
-		if (r == -ERANGE &&
-		    last_request->initiator ==
-		    NL80211_REGDOM_SET_BY_COUNTRY_IE) {
-			REG_DBG_PRINT("cfg80211: Leaving channel %d MHz "
-				"intact on %s - no rule found in band on "
-				"Country IE\n",
-			chan->center_freq, wiphy_name(wiphy));
-		} else {
-		/*
-		 * In this case we know the country IE has at least one reg rule
-		 * for the band so we respect its band definitions
-		 */
-			if (last_request->initiator ==
-			    NL80211_REGDOM_SET_BY_COUNTRY_IE)
-				REG_DBG_PRINT("cfg80211: Disabling "
-					"channel %d MHz on %s due to "
-					"Country IE\n",
-					chan->center_freq, wiphy_name(wiphy));
-			flags |= IEEE80211_CHAN_DISABLED;
-			chan->flags = flags;
-		}
+	if (r)
 		return;
-	}
 
 	power_rule = &reg_rule->power_rule;
 	freq_range = &reg_rule->freq_range;
@@ -2010,7 +1485,7 @@ EXPORT_SYMBOL(regulatory_hint);
 
 /* Caller must hold reg_mutex */
 static bool reg_same_country_ie_hint(struct wiphy *wiphy,
-			u32 country_ie_checksum)
+			char *alpha2, enum environment_cap env)
 {
 	struct wiphy *request_wiphy;
 
@@ -2026,13 +1501,17 @@ static bool reg_same_country_ie_hint(struct wiphy *wiphy,
 		return false;
 
 	if (likely(request_wiphy != wiphy))
-		return !country_ie_integrity_changes(country_ie_checksum);
+		return (last_request->alpha2[0] == alpha2[0] &&
+			last_request->alpha2[1] == alpha2[1] &&
+			last_request->country_ie_env == env);
 	/*
 	 * We should not have let these through at this point, they
 	 * should have been picked up earlier by the first alpha2 check
 	 * on the device
 	 */
-	if (WARN_ON(!country_ie_integrity_changes(country_ie_checksum)))
+	if (WARN_ON((last_request->alpha2[0] == alpha2[0] &&
+			last_request->alpha2[1] == alpha2[1] &&
+			last_request->country_ie_env == env )))
 		return true;
 	return false;
 }
@@ -2048,7 +1527,6 @@ void regulatory_hint_11d(struct wiphy *wiphy,
 {
 	struct ieee80211_regdomain *rd = NULL;
 	char alpha2[2];
-	u32 checksum = 0;
 	enum environment_cap env = ENVIRON_ANY;
 	struct regulatory_request *request;
 
@@ -2064,14 +1542,6 @@ void regulatory_hint_11d(struct wiphy *wiphy,
 	if (country_ie_len < IEEE80211_COUNTRY_IE_MIN_LEN)
 		goto out;
 
-	/*
-	 * Pending country IE processing, this can happen after we
-	 * call CRDA and wait for a response if a beacon was received before
-	 * we were able to process the last regulatory_hint_11d() call
-	 */
-	if (country_ie_regdomain)
-		goto out;
-
 	alpha2[0] = country_ie[0];
 	alpha2[1] = country_ie[1];
 
@@ -2090,12 +1560,6 @@ void regulatory_hint_11d(struct wiphy *wiphy,
 	    wiphy_idx_valid(last_request->wiphy_idx)))
 		goto out;
 
-	rd = country_ie_2_rd(band, country_ie, country_ie_len, &checksum);
-	if (!rd) {
-		REG_DBG_PRINT("cfg80211: Ignoring bogus country IE\n");
-		goto out;
-	}
-
 	/*
 	 * This will not happen right now but we leave it here for the
 	 * the future when we want to add suspend/resume support and having
@@ -2105,24 +1569,17 @@ void regulatory_hint_11d(struct wiphy *wiphy,
 	 * If we hit this before we add this support we want to be informed of
 	 * it as it would indicate a mistake in the current design
 	 */
-	if (WARN_ON(reg_same_country_ie_hint(wiphy, checksum)))
+	if (WARN_ON(reg_same_country_ie_hint(wiphy, alpha2, env)))
 		goto free_rd_out;
 
 	request = kzalloc(sizeof(struct regulatory_request), GFP_KERNEL);
 	if (!request)
 		goto free_rd_out;
 
-	/*
-	 * We keep this around for when CRDA comes back with a response so
-	 * we can intersect with that
-	 */
-	country_ie_regdomain = rd;
-
 	request->wiphy_idx = get_wiphy_idx(wiphy);
-	request->alpha2[0] = rd->alpha2[0];
-	request->alpha2[1] = rd->alpha2[1];
+	request->alpha2[0] = alpha2[0];
+	request->alpha2[1] = alpha2[1];
 	request->initiator = NL80211_REGDOM_SET_BY_COUNTRY_IE;
-	request->country_ie_checksum = checksum;
 	request->country_ie_env = env;
 
 	mutex_unlock(&reg_mutex);
@@ -2383,33 +1840,6 @@ static void print_regdomain_info(const struct ieee80211_regdomain *rd)
 	print_rd_rules(rd);
 }
 
-#ifdef CONFIG_CFG80211_REG_DEBUG
-static void reg_country_ie_process_debug(
-	const struct ieee80211_regdomain *rd,
-	const struct ieee80211_regdomain *country_ie_regdomain,
-	const struct ieee80211_regdomain *intersected_rd)
-{
-	printk(KERN_DEBUG "cfg80211: Received country IE:\n");
-	print_regdomain_info(country_ie_regdomain);
-	printk(KERN_DEBUG "cfg80211: CRDA thinks this should applied:\n");
-	print_regdomain_info(rd);
-	if (intersected_rd) {
-		printk(KERN_DEBUG "cfg80211: We intersect both of these "
-			"and get:\n");
-		print_regdomain_info(intersected_rd);
-		return;
-	}
-	printk(KERN_DEBUG "cfg80211: Intersection between both failed\n");
-}
-#else
-static inline void reg_country_ie_process_debug(
-	const struct ieee80211_regdomain *rd,
-	const struct ieee80211_regdomain *country_ie_regdomain,
-	const struct ieee80211_regdomain *intersected_rd)
-{
-}
-#endif
-
 /* Takes ownership of rd only if it doesn't fail */
 static int __set_regdom(const struct ieee80211_regdomain *rd)
 {
@@ -2521,34 +1951,6 @@ static int __set_regdom(const struct ieee80211_regdomain *rd)
 		return 0;
 	}
 
-	/*
-	 * Country IE requests are handled a bit differently, we intersect
-	 * the country IE rd with what CRDA believes that country should have
-	 */
-
-	/*
-	 * Userspace could have sent two replies with only
-	 * one kernel request. By the second reply we would have
-	 * already processed and consumed the country_ie_regdomain.
-	 */
-	if (!country_ie_regdomain)
-		return -EALREADY;
-	BUG_ON(rd == country_ie_regdomain);
-
-	/*
-	 * Intersect what CRDA returned and our what we
-	 * had built from the Country IE received
-	 */
-
-	intersected_rd = regdom_intersect(rd, country_ie_regdomain);
-
-	reg_country_ie_process_debug(rd,
-				     country_ie_regdomain,
-				     intersected_rd);
-
-	kfree(country_ie_regdomain);
-	country_ie_regdomain = NULL;
-
 	if (!intersected_rd)
 		return -EINVAL;
 
@@ -2688,9 +2090,6 @@ void /* __init_or_exit */ regulatory_exit(void)
 
 	reset_regdomains();
 
-	kfree(country_ie_regdomain);
-	country_ie_regdomain = NULL;
-
 	kfree(last_request);
 
 	platform_device_unregister(reg_pdev);
-- 
cgit v1.2.2


From 3f6ff6bacdec217c98a60910c4804d11ce859820 Mon Sep 17 00:00:00 2001
From: "John W. Linville" <linville@tuxdriver.com>
Date: Tue, 20 Jul 2010 12:09:11 -0400
Subject: wireless: correct sparse warning in lib80211_crypt_tkip.c

  CHECK   net/wireless/lib80211_crypt_tkip.c
net/wireless/lib80211_crypt_tkip.c:581:27: warning: cast to restricted __le16

Caused by dereferencing a "u8 *" and passing it to le16_to_cpu...

Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/wireless/lib80211_crypt_tkip.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/wireless/lib80211_crypt_tkip.c b/net/wireless/lib80211_crypt_tkip.c
index 8cbdb32ff316..a7f995613f1f 100644
--- a/net/wireless/lib80211_crypt_tkip.c
+++ b/net/wireless/lib80211_crypt_tkip.c
@@ -578,7 +578,7 @@ static void michael_mic_hdr(struct sk_buff *skb, u8 * hdr)
 	}
 
 	if (ieee80211_is_data_qos(hdr11->frame_control)) {
-		hdr[12] = le16_to_cpu(*ieee80211_get_qos_ctl(hdr11))
+		hdr[12] = le16_to_cpu(*((__le16 *)ieee80211_get_qos_ctl(hdr11)))
 			& IEEE80211_QOS_CTL_TID_MASK;
 	} else
 		hdr[12] = 0;		/* priority */
-- 
cgit v1.2.2


From c28991a02caec1f3bfe4638ccf4e494c3e9418a3 Mon Sep 17 00:00:00 2001
From: "John W. Linville" <linville@tuxdriver.com>
Date: Tue, 20 Jul 2010 12:22:40 -0400
Subject: wireless: correct sparse warning in wext-compat.c

  CHECK   net/wireless/wext-compat.c
net/wireless/wext-compat.c:1434:5: warning: symbol 'cfg80211_wext_siwpmksa' was not declared. Should it be static?

Add declaration in cfg80211.h.  Also add an EXPORT_SYMBOL_GPL, since all
the peer functions have it.

Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/wireless/wext-compat.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'net')

diff --git a/net/wireless/wext-compat.c b/net/wireless/wext-compat.c
index 1ff1e9f49136..bb5e0a5ecfa1 100644
--- a/net/wireless/wext-compat.c
+++ b/net/wireless/wext-compat.c
@@ -1471,6 +1471,7 @@ int cfg80211_wext_siwpmksa(struct net_device *dev,
 		return -EOPNOTSUPP;
 	}
 }
+EXPORT_SYMBOL_GPL(cfg80211_wext_siwpmksa);
 
 static const iw_handler cfg80211_handlers[] = {
 	[IW_IOCTL_IDX(SIOCGIWNAME)]	= (iw_handler) cfg80211_wext_giwname,
-- 
cgit v1.2.2


From 2ea6fb6d1eb4d06af5ad2114d1f1f22a6067adac Mon Sep 17 00:00:00 2001
From: "John W. Linville" <linville@tuxdriver.com>
Date: Tue, 20 Jul 2010 12:29:14 -0400
Subject: wireless: correct sparse warning in generated regdb.c

  CHECK   net/wireless/regdb.c
net/wireless/regdb.c:8:34: warning: symbol 'reg_regdb' was not declared.  Should it be static?
net/wireless/regdb.c:11:5: warning: symbol 'reg_regdb_size' was not declared. Should it be static?

Simply include the also generated regdb.h.

Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/wireless/genregdb.awk | 1 +
 1 file changed, 1 insertion(+)

(limited to 'net')

diff --git a/net/wireless/genregdb.awk b/net/wireless/genregdb.awk
index 3cc9e69880a8..53c143f5e770 100644
--- a/net/wireless/genregdb.awk
+++ b/net/wireless/genregdb.awk
@@ -21,6 +21,7 @@ BEGIN {
 	print ""
 	print "#include <linux/nl80211.h>"
 	print "#include <net/cfg80211.h>"
+	print "#include \"regdb.h\""
 	print ""
 	regdb = "const struct ieee80211_regdomain *reg_regdb[] = {\n"
 }
-- 
cgit v1.2.2


From 20925feee9e46c07ffe315b88c5cee61064a3ede Mon Sep 17 00:00:00 2001
From: "John W. Linville" <linville@tuxdriver.com>
Date: Tue, 20 Jul 2010 12:32:52 -0400
Subject: wireless: mark cfg80211_is_all_idle as static

  CHECK   net/wireless/sme.c
net/wireless/sme.c:38:6: warning: symbol 'cfg80211_is_all_idle' was not declared. Should it be static?

It is not used elsewhere, so mark it static.

Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/wireless/sme.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/wireless/sme.c b/net/wireless/sme.c
index 72222f0074db..a8c2d6b877ae 100644
--- a/net/wireless/sme.c
+++ b/net/wireless/sme.c
@@ -35,7 +35,7 @@ struct cfg80211_conn {
 	bool auto_auth, prev_bssid_valid;
 };
 
-bool cfg80211_is_all_idle(void)
+static bool cfg80211_is_all_idle(void)
 {
 	struct cfg80211_registered_device *rdev;
 	struct wireless_dev *wdev;
-- 
cgit v1.2.2


From 34782e9e1e58810183f2c1201124e45314b37130 Mon Sep 17 00:00:00 2001
From: "John W. Linville" <linville@tuxdriver.com>
Date: Tue, 20 Jul 2010 15:12:04 -0400
Subject: wireless: remove unnecessary reg_same_country_ie_hint

"Might as well remove  reg_same_country_ie_hint() completely since we
already dealt with suspend/resume through the regulatory hint
disconnect." -- Luis

Reported-by: Luis R. Rodriguez <mcgrof@gmail.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/wireless/reg.c | 45 ---------------------------------------------
 1 file changed, 45 deletions(-)

(limited to 'net')

diff --git a/net/wireless/reg.c b/net/wireless/reg.c
index 678d0bd433f0..48baf28cc4b6 100644
--- a/net/wireless/reg.c
+++ b/net/wireless/reg.c
@@ -1483,39 +1483,6 @@ int regulatory_hint(struct wiphy *wiphy, const char *alpha2)
 }
 EXPORT_SYMBOL(regulatory_hint);
 
-/* Caller must hold reg_mutex */
-static bool reg_same_country_ie_hint(struct wiphy *wiphy,
-			char *alpha2, enum environment_cap env)
-{
-	struct wiphy *request_wiphy;
-
-	assert_reg_lock();
-
-	if (unlikely(last_request->initiator !=
-	    NL80211_REGDOM_SET_BY_COUNTRY_IE))
-		return false;
-
-	request_wiphy = wiphy_idx_to_wiphy(last_request->wiphy_idx);
-
-	if (!request_wiphy)
-		return false;
-
-	if (likely(request_wiphy != wiphy))
-		return (last_request->alpha2[0] == alpha2[0] &&
-			last_request->alpha2[1] == alpha2[1] &&
-			last_request->country_ie_env == env);
-	/*
-	 * We should not have let these through at this point, they
-	 * should have been picked up earlier by the first alpha2 check
-	 * on the device
-	 */
-	if (WARN_ON((last_request->alpha2[0] == alpha2[0] &&
-			last_request->alpha2[1] == alpha2[1] &&
-			last_request->country_ie_env == env )))
-		return true;
-	return false;
-}
-
 /*
  * We hold wdev_lock() here so we cannot hold cfg80211_mutex() and
  * therefore cannot iterate over the rdev list here.
@@ -1560,18 +1527,6 @@ void regulatory_hint_11d(struct wiphy *wiphy,
 	    wiphy_idx_valid(last_request->wiphy_idx)))
 		goto out;
 
-	/*
-	 * This will not happen right now but we leave it here for the
-	 * the future when we want to add suspend/resume support and having
-	 * the user move to another country after doing so, or having the user
-	 * move to another AP. Right now we just trust the first AP.
-	 *
-	 * If we hit this before we add this support we want to be informed of
-	 * it as it would indicate a mistake in the current design
-	 */
-	if (WARN_ON(reg_same_country_ie_hint(wiphy, alpha2, env)))
-		goto free_rd_out;
-
 	request = kzalloc(sizeof(struct regulatory_request), GFP_KERNEL);
 	if (!request)
 		goto free_rd_out;
-- 
cgit v1.2.2


From 567c7b0edec0200c5c6613f07c3d3b4034fdc836 Mon Sep 17 00:00:00 2001
From: Dave Chinner <dchinner@redhat.com>
Date: Wed, 21 Jul 2010 15:33:01 +1000
Subject: mm: add context argument to shrinker callback to remaining shrinkers

Add the shrinkers missed in the first conversion of the API in
commit 7f8275d0d660c146de6ee3017e1e2e594c49e820 ("mm: add context argument to
shrinker callback").

Signed-off-by: Dave Chinner <dchinner@redhat.com>
---
 net/sunrpc/auth.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/sunrpc/auth.c b/net/sunrpc/auth.c
index 73affb8624fa..8dc47f1d0001 100644
--- a/net/sunrpc/auth.c
+++ b/net/sunrpc/auth.c
@@ -267,7 +267,7 @@ rpcauth_prune_expired(struct list_head *free, int nr_to_scan)
  * Run memory cache shrinker.
  */
 static int
-rpcauth_cache_shrinker(int nr_to_scan, gfp_t gfp_mask)
+rpcauth_cache_shrinker(struct shrinker *shrink, int nr_to_scan, gfp_t gfp_mask)
 {
 	LIST_HEAD(free);
 	int res;
-- 
cgit v1.2.2


From fd059b9bd0af2e217f29d5a34e1c71039d26af8c Mon Sep 17 00:00:00 2001
From: "Gustavo F. Padovan" <padovan@profusion.mobi>
Date: Mon, 10 May 2010 14:22:56 -0300
Subject: Bluetooth: Remove max_tx and tx_window module paramenters from L2CAP

We don't need these parameters anymore since we have socket options for
them.

Signed-off-by: Gustavo F. Padovan <padovan@profusion.mobi>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 net/bluetooth/l2cap.c | 12 ++----------
 1 file changed, 2 insertions(+), 10 deletions(-)

(limited to 'net')

diff --git a/net/bluetooth/l2cap.c b/net/bluetooth/l2cap.c
index cf3c4073a8a6..fd507cbc7932 100644
--- a/net/bluetooth/l2cap.c
+++ b/net/bluetooth/l2cap.c
@@ -60,8 +60,6 @@ static int enable_ertm = 1;
 #else
 static int enable_ertm = 0;
 #endif
-static int max_transmit = L2CAP_DEFAULT_MAX_TX;
-static int tx_window = L2CAP_DEFAULT_TX_WINDOW;
 
 static u32 l2cap_feat_mask = L2CAP_FEAT_FIXED_CHAN;
 static u8 l2cap_fixed_chan[8] = { 0x02, };
@@ -816,9 +814,9 @@ static void l2cap_sock_init(struct sock *sk, struct sock *parent)
 			pi->mode = L2CAP_MODE_ERTM;
 		else
 			pi->mode = L2CAP_MODE_BASIC;
-		pi->max_tx = max_transmit;
+		pi->max_tx = L2CAP_DEFAULT_MAX_TX;
 		pi->fcs  = L2CAP_FCS_CRC16;
-		pi->tx_win = tx_window;
+		pi->tx_win = L2CAP_DEFAULT_TX_WINDOW;
 		pi->sec_level = BT_SECURITY_LOW;
 		pi->role_switch = 0;
 		pi->force_reliable = 0;
@@ -4682,12 +4680,6 @@ module_exit(l2cap_exit);
 module_param(enable_ertm, bool, 0644);
 MODULE_PARM_DESC(enable_ertm, "Enable enhanced retransmission mode");
 
-module_param(max_transmit, uint, 0644);
-MODULE_PARM_DESC(max_transmit, "Max transmit value (default = 3)");
-
-module_param(tx_window, uint, 0644);
-MODULE_PARM_DESC(tx_window, "Transmission window size value (default = 63)");
-
 MODULE_AUTHOR("Marcel Holtmann <marcel@holtmann.org>");
 MODULE_DESCRIPTION("Bluetooth L2CAP ver " VERSION);
 MODULE_VERSION(VERSION);
-- 
cgit v1.2.2


From 0b31c85ce78d3646ac1e90d62969e7cd8cfd8b15 Mon Sep 17 00:00:00 2001
From: "Gustavo F. Padovan" <padovan@profusion.mobi>
Date: Mon, 10 May 2010 14:26:11 -0300
Subject: Bluetooth: Remove L2CAP Extended Features from Kconfig

This reverts commit 84fb0a6334af0ccad3544f6972c055d90fbb9fbe which adds
the L2CAP Extended Features to the Kconfig, that is actually not needed.
One can use other mechanisms to enable L2CAP Extended Features.

Signed-off-by: Gustavo F. Padovan <padovan@profusion.mobi>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 net/bluetooth/Kconfig | 13 -------------
 net/bluetooth/l2cap.c |  4 ----
 2 files changed, 17 deletions(-)

(limited to 'net')

diff --git a/net/bluetooth/Kconfig b/net/bluetooth/Kconfig
index ee3b3049d385..ed371684c133 100644
--- a/net/bluetooth/Kconfig
+++ b/net/bluetooth/Kconfig
@@ -43,19 +43,6 @@ config BT_L2CAP
 	  Say Y here to compile L2CAP support into the kernel or say M to
 	  compile it as module (l2cap).
 
-config BT_L2CAP_EXT_FEATURES
-	bool "L2CAP Extended Features support (EXPERIMENTAL)"
-	depends on BT_L2CAP && EXPERIMENTAL
-	help
-	  This option enables the L2CAP Extended Features support. These
-	  new features include the Enhanced Retransmission and Streaming
-	  Modes, the Frame Check Sequence (FCS), and Segmentation and
-	  Reassembly (SAR) for L2CAP packets. They are a required for the
-	  new Alternate MAC/PHY and the Bluetooth Medical Profile.
-
-	  You should say N unless you know what you are doing. Note that
-	  this is in an experimental state yet.
-
 config BT_SCO
 	tristate "SCO links support"
 	depends on BT
diff --git a/net/bluetooth/l2cap.c b/net/bluetooth/l2cap.c
index fd507cbc7932..6c5462b2a244 100644
--- a/net/bluetooth/l2cap.c
+++ b/net/bluetooth/l2cap.c
@@ -55,11 +55,7 @@
 
 #define VERSION "2.14"
 
-#ifdef CONFIG_BT_L2CAP_EXT_FEATURES
-static int enable_ertm = 1;
-#else
 static int enable_ertm = 0;
-#endif
 
 static u32 l2cap_feat_mask = L2CAP_FEAT_FIXED_CHAN;
 static u8 l2cap_fixed_chan[8] = { 0x02, };
-- 
cgit v1.2.2


From f6337c771126420c348b702e012262cfb5f0d56e Mon Sep 17 00:00:00 2001
From: "Gustavo F. Padovan" <padovan@profusion.mobi>
Date: Mon, 10 May 2010 18:32:04 -0300
Subject: Bluetooth: Fix drop of packets with invalid req_seq/tx_seq

We shall not use an unsigned var since we are expecting negatives value
there. Using unsigned causes ERTM connection to close due to invalid
ReqSeq numbers.

Signed-off-by: Gustavo F. Padovan <padovan@profusion.mobi>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 net/bluetooth/l2cap.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/bluetooth/l2cap.c b/net/bluetooth/l2cap.c
index 6c5462b2a244..7e39d9ee0b9c 100644
--- a/net/bluetooth/l2cap.c
+++ b/net/bluetooth/l2cap.c
@@ -3796,7 +3796,7 @@ static inline int l2cap_data_channel_iframe(struct sock *sk, u16 rx_control, str
 	u8 tx_seq = __get_txseq(rx_control);
 	u8 req_seq = __get_reqseq(rx_control);
 	u8 sar = rx_control >> L2CAP_CTRL_SAR_SHIFT;
-	u8 tx_seq_offset, expected_tx_seq_offset;
+	int tx_seq_offset, expected_tx_seq_offset;
 	int num_to_ack = (pi->tx_win/6) + 1;
 	int err = 0;
 
@@ -4081,7 +4081,8 @@ static inline int l2cap_data_channel(struct l2cap_conn *conn, u16 cid, struct sk
 	struct sock *sk;
 	struct l2cap_pinfo *pi;
 	u16 control, len;
-	u8 tx_seq, req_seq, next_tx_seq_offset, req_seq_offset;
+	u8 tx_seq, req_seq;
+	int next_tx_seq_offset, req_seq_offset;
 
 	sk = l2cap_get_chan_by_scid(&conn->chan_list, cid);
 	if (!sk) {
-- 
cgit v1.2.2


From 8ff50ec04a7ecdbba6e0a5423cf6f2c5f7fc605e Mon Sep 17 00:00:00 2001
From: "Gustavo F. Padovan" <padovan@profusion.mobi>
Date: Mon, 10 May 2010 19:34:11 -0300
Subject: Bluetooth: Fix bug with ERTM vars increment
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

All ERTM operations regarding the txWindow should be modulo 64,
otherwise we confuse the ERTM logic and connections will break.

Signed-off-by: Gustavo F. Padovan <padovan@profusion.mobi>
Reviewed-by: João Paulo Rechi Vita <jprvita@profusion.mobi>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 net/bluetooth/l2cap.c | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

(limited to 'net')

diff --git a/net/bluetooth/l2cap.c b/net/bluetooth/l2cap.c
index 7e39d9ee0b9c..a9fdfe401f5b 100644
--- a/net/bluetooth/l2cap.c
+++ b/net/bluetooth/l2cap.c
@@ -3748,7 +3748,7 @@ static void l2cap_check_srej_gap(struct sock *sk, u8 tx_seq)
 		l2cap_ertm_reassembly_sdu(sk, skb, control);
 		l2cap_pi(sk)->buffer_seq_srej =
 			(l2cap_pi(sk)->buffer_seq_srej + 1) % 64;
-		tx_seq++;
+		tx_seq = (tx_seq + 1) % 64;
 	}
 }
 
@@ -3784,10 +3784,11 @@ static void l2cap_send_srejframe(struct sock *sk, u8 tx_seq)
 		l2cap_send_sframe(pi, control);
 
 		new = kzalloc(sizeof(struct srej_list), GFP_ATOMIC);
-		new->tx_seq = pi->expected_tx_seq++;
+		new->tx_seq = pi->expected_tx_seq;
+		pi->expected_tx_seq = (pi->expected_tx_seq + 1) % 64;
 		list_add_tail(&new->list, SREJ_LIST(sk));
 	}
-	pi->expected_tx_seq++;
+	pi->expected_tx_seq = (pi->expected_tx_seq + 1) % 64;
 }
 
 static inline int l2cap_data_channel_iframe(struct sock *sk, u16 rx_control, struct sk_buff *skb)
-- 
cgit v1.2.2


From bc1b1f8bee63966649dd5ac7d10d31a6556bf19b Mon Sep 17 00:00:00 2001
From: "Gustavo F. Padovan" <padovan@profusion.mobi>
Date: Tue, 11 May 2010 22:14:00 -0300
Subject: Bluetooth: Only check SAR bits if frame is an I-frame

The SAR bits doesn't make sense for an S-frame. It doesn't use SAR.

Checking SAR for a S-frames can lead to L2CAP errors, it could close
the channel with an invalid packet length, since we was removing the 2
of the of any frame that match SAR start bits, without check if it is
an I-frame.

Signed-off-by: Gustavo F. Padovan <padovan@profusion.mobi>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 net/bluetooth/l2cap.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/bluetooth/l2cap.c b/net/bluetooth/l2cap.c
index a9fdfe401f5b..108c2f290ac5 100644
--- a/net/bluetooth/l2cap.c
+++ b/net/bluetooth/l2cap.c
@@ -4117,7 +4117,7 @@ static inline int l2cap_data_channel(struct l2cap_conn *conn, u16 cid, struct sk
 		skb_pull(skb, 2);
 		len = skb->len;
 
-		if (__is_sar_start(control))
+		if (__is_sar_start(control) && __is_iframe(control))
 			len -= 2;
 
 		if (pi->fcs == L2CAP_FCS_CRC16)
-- 
cgit v1.2.2


From 6e2b6722abaa3f6042357e11f465488b7c12f94c Mon Sep 17 00:00:00 2001
From: "Gustavo F. Padovan" <padovan@profusion.mobi>
Date: Tue, 1 Jun 2010 18:52:58 -0300
Subject: Bluetooth: Fix bug in l2cap_ertm_send() behavior
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This patch makes l2cap_ertm_send() similar to the Send-Data action of
the ERTM spec. We shall not check for RemoteBusy or WAIT_F state
inside l2cap_ertm_send().

Such checks were causing a bug in the retransmission logic of ERTM and
making ERTM stalls until the ACL is dropped.

Signed-off-by: Gustavo F. Padovan <padovan@profusion.mobi>
Reviewed-by: João Paulo Rechi Vita <jprvita@profusion.mobi>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 net/bluetooth/l2cap.c | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

(limited to 'net')

diff --git a/net/bluetooth/l2cap.c b/net/bluetooth/l2cap.c
index 108c2f290ac5..69f098d98141 100644
--- a/net/bluetooth/l2cap.c
+++ b/net/bluetooth/l2cap.c
@@ -1415,11 +1415,8 @@ static int l2cap_ertm_send(struct sock *sk)
 	u16 control, fcs;
 	int nsent = 0;
 
-	if (pi->conn_state & L2CAP_CONN_WAIT_F)
-		return 0;
 
-	while ((skb = sk->sk_send_head) && (!l2cap_tx_window_full(sk)) &&
-			!(pi->conn_state & L2CAP_CONN_REMOTE_BUSY)) {
+	while ((skb = sk->sk_send_head) && (!l2cap_tx_window_full(sk))) {
 
 		if (pi->remote_max_tx &&
 				bt_cb(skb)->retries == pi->remote_max_tx) {
@@ -1792,6 +1789,11 @@ static int l2cap_sock_sendmsg(struct kiocb *iocb, struct socket *sock, struct ms
 		if (pi->mode == L2CAP_MODE_STREAMING) {
 			err = l2cap_streaming_send(sk);
 		} else {
+			if (pi->conn_state & L2CAP_CONN_REMOTE_BUSY &&
+					pi->conn_state && L2CAP_CONN_WAIT_F) {
+				err = len;
+				break;
+			}
 			spin_lock_bh(&pi->send_lock);
 			err = l2cap_ertm_send(sk);
 			spin_unlock_bh(&pi->send_lock);
@@ -3378,8 +3380,6 @@ static inline void l2cap_send_i_or_rr_or_rnr(struct sock *sk)
 	if (pi->conn_state & L2CAP_CONN_REMOTE_BUSY && pi->unacked_frames > 0)
 		__mod_retrans_timer();
 
-	pi->conn_state &= ~L2CAP_CONN_REMOTE_BUSY;
-
 	spin_lock_bh(&pi->send_lock);
 	l2cap_ertm_send(sk);
 	spin_unlock_bh(&pi->send_lock);
-- 
cgit v1.2.2


From bfbacc11550a785caf082f3ccfcd7ecf882e09a4 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jo=C3=A3o=20Paulo=20Rechi=20Vita?= <jprvita@profusion.mobi>
Date: Mon, 31 May 2010 18:35:44 -0300
Subject: Bluetooth: Fix SREJ_QUEUE corruption in L2CAP
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Since all TxSeq values are modulo, we shall not compare them directly. We
have to compare their offset inside the TxWindow instead.

Signed-off-by: João Paulo Rechi Vita <jprvita@profusion.mobi>
Acked-by: Gustavo F. Padovan <padovan@profusion.mobi>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 net/bluetooth/l2cap.c | 13 ++++++++++++-
 1 file changed, 12 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/bluetooth/l2cap.c b/net/bluetooth/l2cap.c
index 69f098d98141..b89762134e4e 100644
--- a/net/bluetooth/l2cap.c
+++ b/net/bluetooth/l2cap.c
@@ -3394,6 +3394,8 @@ static inline void l2cap_send_i_or_rr_or_rnr(struct sock *sk)
 static int l2cap_add_to_srej_queue(struct sock *sk, struct sk_buff *skb, u8 tx_seq, u8 sar)
 {
 	struct sk_buff *next_skb;
+	struct l2cap_pinfo *pi = l2cap_pi(sk);
+	int tx_seq_offset, next_tx_seq_offset;
 
 	bt_cb(skb)->tx_seq = tx_seq;
 	bt_cb(skb)->sar = sar;
@@ -3404,11 +3406,20 @@ static int l2cap_add_to_srej_queue(struct sock *sk, struct sk_buff *skb, u8 tx_s
 		return 0;
 	}
 
+	tx_seq_offset = (tx_seq - pi->buffer_seq) % 64;
+	if (tx_seq_offset < 0)
+		tx_seq_offset += 64;
+
 	do {
 		if (bt_cb(next_skb)->tx_seq == tx_seq)
 			return -EINVAL;
 
-		if (bt_cb(next_skb)->tx_seq > tx_seq) {
+		next_tx_seq_offset = (bt_cb(next_skb)->tx_seq -
+						pi->buffer_seq) % 64;
+		if (next_tx_seq_offset < 0)
+			next_tx_seq_offset += 64;
+
+		if (next_tx_seq_offset > tx_seq_offset) {
 			__skb_queue_before(SREJ_QUEUE(sk), next_skb, skb);
 			return 0;
 		}
-- 
cgit v1.2.2


From 51893f88dd916efead5e24a212c907b2cd35e160 Mon Sep 17 00:00:00 2001
From: Nathan Holstein <nathan@lampreynetworks.com>
Date: Wed, 9 Jun 2010 15:46:25 -0400
Subject: Bluetooth: Fix bug with ERTM minimum packet length

ERTM and streaming mode L2CAP sockets have no minimum packet length. Only
basic mode connections have minimum length.

Instead, validate the packet containing all necessary control, FCS,
and SAR fields.

The patch fixes the drop of valid packets with length lower than 4.

Signed-off-by: Nathan Holstein <ngh@isomerica.net>
Signed-off-by: Gustavo F. Padovan <padovan@profusion.mobi>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 net/bluetooth/l2cap.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'net')

diff --git a/net/bluetooth/l2cap.c b/net/bluetooth/l2cap.c
index b89762134e4e..4af8fc0d512c 100644
--- a/net/bluetooth/l2cap.c
+++ b/net/bluetooth/l2cap.c
@@ -4092,9 +4092,9 @@ static inline int l2cap_data_channel(struct l2cap_conn *conn, u16 cid, struct sk
 {
 	struct sock *sk;
 	struct l2cap_pinfo *pi;
-	u16 control, len;
+	u16 control;
 	u8 tx_seq, req_seq;
-	int next_tx_seq_offset, req_seq_offset;
+	int len, next_tx_seq_offset, req_seq_offset;
 
 	sk = l2cap_get_chan_by_scid(&conn->chan_list, cid);
 	if (!sk) {
@@ -4164,7 +4164,7 @@ static inline int l2cap_data_channel(struct l2cap_conn *conn, u16 cid, struct sk
 		}
 
 		if (__is_iframe(control)) {
-			if (len < 4) {
+			if (len < 0) {
 				l2cap_send_disconn_req(pi->conn, sk);
 				goto drop;
 			}
@@ -4192,7 +4192,7 @@ static inline int l2cap_data_channel(struct l2cap_conn *conn, u16 cid, struct sk
 		if (pi->fcs == L2CAP_FCS_CRC16)
 			len -= 2;
 
-		if (len > pi->mps || len < 4 || __is_sframe(control))
+		if (len > pi->mps || len < 0 || __is_sframe(control))
 			goto drop;
 
 		if (l2cap_check_fcs(pi, skb))
-- 
cgit v1.2.2


From c13ffa620f15cb28d18268a773464cf51550fa9f Mon Sep 17 00:00:00 2001
From: "Gustavo F. Padovan" <padovan@profusion.mobi>
Date: Thu, 13 May 2010 20:50:12 -0300
Subject: Bluetooth: Proper shutdown ERTM when closing the channel

Fix a crash regarding the Monitor Timeout, it was running even after the
shutdown of the ACL connection, which doesn't make sense.

The same code also fixes another issue, before this patch L2CAP was sending
many Disconnections Requests while we have to send only one.

The issues are related to each other, a expired Monitor Timeout can
trigger a Disconnection Request and then we may have a crash if the link
was already deleted.

Signed-off-by: Gustavo F. Padovan <padovan@profusion.mobi>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 net/bluetooth/l2cap.c | 59 ++++++++++++++++++++++++++++++++-------------------
 1 file changed, 37 insertions(+), 22 deletions(-)

(limited to 'net')

diff --git a/net/bluetooth/l2cap.c b/net/bluetooth/l2cap.c
index 4af8fc0d512c..4415eb48c6ae 100644
--- a/net/bluetooth/l2cap.c
+++ b/net/bluetooth/l2cap.c
@@ -272,6 +272,24 @@ static void l2cap_chan_del(struct sock *sk, int err)
 		parent->sk_data_ready(parent, 0);
 	} else
 		sk->sk_state_change(sk);
+
+	skb_queue_purge(TX_QUEUE(sk));
+
+	if (l2cap_pi(sk)->mode == L2CAP_MODE_ERTM) {
+		struct srej_list *l, *tmp;
+
+		del_timer(&l2cap_pi(sk)->retrans_timer);
+		del_timer(&l2cap_pi(sk)->monitor_timer);
+		del_timer(&l2cap_pi(sk)->ack_timer);
+
+		skb_queue_purge(SREJ_QUEUE(sk));
+		skb_queue_purge(BUSY_QUEUE(sk));
+
+		list_for_each_entry_safe(l, tmp, SREJ_LIST(sk), list) {
+			list_del(&l->list);
+			kfree(l);
+		}
+	}
 }
 
 /* Service level security */
@@ -345,8 +363,12 @@ static inline void l2cap_send_sframe(struct l2cap_pinfo *pi, u16 control)
 	struct sk_buff *skb;
 	struct l2cap_hdr *lh;
 	struct l2cap_conn *conn = pi->conn;
+	struct sock *sk = (struct sock *)pi;
 	int count, hlen = L2CAP_HDR_SIZE + 2;
 
+	if (sk->sk_state != BT_CONNECTED)
+		return;
+
 	if (pi->fcs == L2CAP_FCS_CRC16)
 		hlen += 2;
 
@@ -438,10 +460,23 @@ static void l2cap_send_disconn_req(struct l2cap_conn *conn, struct sock *sk)
 {
 	struct l2cap_disconn_req req;
 
+	if (!conn)
+		return;
+
+	skb_queue_purge(TX_QUEUE(sk));
+
+	if (l2cap_pi(sk)->mode == L2CAP_MODE_ERTM) {
+		del_timer(&l2cap_pi(sk)->retrans_timer);
+		del_timer(&l2cap_pi(sk)->monitor_timer);
+		del_timer(&l2cap_pi(sk)->ack_timer);
+	}
+
 	req.dcid = cpu_to_le16(l2cap_pi(sk)->dcid);
 	req.scid = cpu_to_le16(l2cap_pi(sk)->scid);
 	l2cap_send_cmd(conn, l2cap_get_ident(conn),
 			L2CAP_DISCONN_REQ, sizeof(req), &req);
+
+	sk->sk_state = BT_DISCONN;
 }
 
 /* ---- L2CAP connections ---- */
@@ -734,7 +769,6 @@ static void __l2cap_sock_close(struct sock *sk, int reason)
 				sk->sk_type == SOCK_STREAM) {
 			struct l2cap_conn *conn = l2cap_pi(sk)->conn;
 
-			sk->sk_state = BT_DISCONN;
 			l2cap_sock_set_timer(sk, sk->sk_sndtimeo);
 			l2cap_send_disconn_req(conn, sk);
 		} else
@@ -1415,6 +1449,8 @@ static int l2cap_ertm_send(struct sock *sk)
 	u16 control, fcs;
 	int nsent = 0;
 
+	if (sk->sk_state != BT_CONNECTED)
+		return -ENOTCONN;
 
 	while ((skb = sk->sk_send_head) && (!l2cap_tx_window_full(sk))) {
 
@@ -3072,7 +3108,6 @@ static inline int l2cap_config_rsp(struct l2cap_conn *conn, struct l2cap_cmd_hdr
 		}
 
 	default:
-		sk->sk_state = BT_DISCONN;
 		sk->sk_err = ECONNRESET;
 		l2cap_sock_set_timer(sk, HZ * 5);
 		l2cap_send_disconn_req(conn, sk);
@@ -3126,16 +3161,6 @@ static inline int l2cap_disconnect_req(struct l2cap_conn *conn, struct l2cap_cmd
 
 	sk->sk_shutdown = SHUTDOWN_MASK;
 
-	skb_queue_purge(TX_QUEUE(sk));
-
-	if (l2cap_pi(sk)->mode == L2CAP_MODE_ERTM) {
-		skb_queue_purge(SREJ_QUEUE(sk));
-		skb_queue_purge(BUSY_QUEUE(sk));
-		del_timer(&l2cap_pi(sk)->retrans_timer);
-		del_timer(&l2cap_pi(sk)->monitor_timer);
-		del_timer(&l2cap_pi(sk)->ack_timer);
-	}
-
 	l2cap_chan_del(sk, ECONNRESET);
 	bh_unlock_sock(sk);
 
@@ -3158,16 +3183,6 @@ static inline int l2cap_disconnect_rsp(struct l2cap_conn *conn, struct l2cap_cmd
 	if (!sk)
 		return 0;
 
-	skb_queue_purge(TX_QUEUE(sk));
-
-	if (l2cap_pi(sk)->mode == L2CAP_MODE_ERTM) {
-		skb_queue_purge(SREJ_QUEUE(sk));
-		skb_queue_purge(BUSY_QUEUE(sk));
-		del_timer(&l2cap_pi(sk)->retrans_timer);
-		del_timer(&l2cap_pi(sk)->monitor_timer);
-		del_timer(&l2cap_pi(sk)->ack_timer);
-	}
-
 	l2cap_chan_del(sk, 0);
 	bh_unlock_sock(sk);
 
-- 
cgit v1.2.2


From 95ffa97827371ede501615d9bd048eb5b49e8fe1 Mon Sep 17 00:00:00 2001
From: "Gustavo F. Padovan" <padovan@profusion.mobi>
Date: Fri, 18 Jun 2010 20:37:33 -0300
Subject: Bluetooth: Fix L2CAP control bit field corruption

When resending an I-frame, ERTM was reusing the control bits from the last
time it was sent, that was causing a corruption in the new control field
due to it dirty fields.

This patches extracts only the SAR bits from the old field and reuse it to
resend the packet, the others bits should be reset and receive the
updated value.

Signed-off-by: Gustavo F. Padovan <padovan@profusion.mobi>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 net/bluetooth/l2cap.c | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'net')

diff --git a/net/bluetooth/l2cap.c b/net/bluetooth/l2cap.c
index 4415eb48c6ae..c2fb26d9286c 100644
--- a/net/bluetooth/l2cap.c
+++ b/net/bluetooth/l2cap.c
@@ -1430,6 +1430,8 @@ static void l2cap_retransmit_one_frame(struct sock *sk, u8 tx_seq)
 	tx_skb = skb_clone(skb, GFP_ATOMIC);
 	bt_cb(skb)->retries++;
 	control = get_unaligned_le16(tx_skb->data + L2CAP_HDR_SIZE);
+	control &= L2CAP_CTRL_SAR;
+
 	control |= (pi->buffer_seq << L2CAP_CTRL_REQSEQ_SHIFT)
 			| (tx_seq << L2CAP_CTRL_TXSEQ_SHIFT);
 	put_unaligned_le16(control, tx_skb->data + L2CAP_HDR_SIZE);
@@ -1465,6 +1467,8 @@ static int l2cap_ertm_send(struct sock *sk)
 		bt_cb(skb)->retries++;
 
 		control = get_unaligned_le16(tx_skb->data + L2CAP_HDR_SIZE);
+		control &= L2CAP_CTRL_SAR;
+
 		if (pi->conn_state & L2CAP_CONN_SEND_FBIT) {
 			control |= L2CAP_CTRL_FINAL;
 			pi->conn_state &= ~L2CAP_CONN_SEND_FBIT;
-- 
cgit v1.2.2


From f03585689fdff4ae256edd45a35bc2dd83d3684a Mon Sep 17 00:00:00 2001
From: Johan Hedberg <johan.hedberg@nokia.com>
Date: Tue, 18 May 2010 13:20:32 +0200
Subject: Bluetooth: Add blacklist support for incoming connections

In some circumstances it could be desirable to reject incoming
connections on the baseband level. This patch adds this feature through
two new ioctl's: HCIBLOCKADDR and HCIUNBLOCKADDR. Both take a simple
Bluetooth address as a parameter. BDADDR_ANY can be used with
HCIUNBLOCKADDR to remove all devices from the blacklist.

Signed-off-by: Johan Hedberg <johan.hedberg@nokia.com>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 net/bluetooth/hci_core.c  |  3 ++
 net/bluetooth/hci_event.c |  2 +-
 net/bluetooth/hci_sock.c  | 90 +++++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 94 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c
index 2f768de87011..aeb2982310a0 100644
--- a/net/bluetooth/hci_core.c
+++ b/net/bluetooth/hci_core.c
@@ -562,6 +562,7 @@ static int hci_dev_do_close(struct hci_dev *hdev)
 	hci_dev_lock_bh(hdev);
 	inquiry_cache_flush(hdev);
 	hci_conn_hash_flush(hdev);
+	hci_blacklist_clear(hdev);
 	hci_dev_unlock_bh(hdev);
 
 	hci_notify(hdev, HCI_DEV_DOWN);
@@ -923,6 +924,8 @@ int hci_register_dev(struct hci_dev *hdev)
 
 	hci_conn_hash_init(hdev);
 
+	INIT_LIST_HEAD(&hdev->blacklist.list);
+
 	memset(&hdev->stat, 0, sizeof(struct hci_dev_stats));
 
 	atomic_set(&hdev->promisc, 0);
diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c
index 786b5de0bac4..43feeef3c498 100644
--- a/net/bluetooth/hci_event.c
+++ b/net/bluetooth/hci_event.c
@@ -952,7 +952,7 @@ static inline void hci_conn_request_evt(struct hci_dev *hdev, struct sk_buff *sk
 
 	mask |= hci_proto_connect_ind(hdev, &ev->bdaddr, ev->link_type);
 
-	if (mask & HCI_LM_ACCEPT) {
+	if ((mask & HCI_LM_ACCEPT) && !hci_blacklist_lookup(hdev, &ev->bdaddr)) {
 		/* Connection accepted */
 		struct inquiry_entry *ie;
 		struct hci_conn *conn;
diff --git a/net/bluetooth/hci_sock.c b/net/bluetooth/hci_sock.c
index 38f08f6b86f6..4f170a595934 100644
--- a/net/bluetooth/hci_sock.c
+++ b/net/bluetooth/hci_sock.c
@@ -165,6 +165,86 @@ static int hci_sock_release(struct socket *sock)
 	return 0;
 }
 
+struct bdaddr_list *hci_blacklist_lookup(struct hci_dev *hdev, bdaddr_t *bdaddr)
+{
+	struct list_head *p;
+	struct bdaddr_list *blacklist = &hdev->blacklist;
+
+	list_for_each(p, &blacklist->list) {
+		struct bdaddr_list *b;
+
+		b = list_entry(p, struct bdaddr_list, list);
+
+		if (bacmp(bdaddr, &b->bdaddr) == 0)
+			return b;
+	}
+
+	return NULL;
+}
+
+static int hci_blacklist_add(struct hci_dev *hdev, void __user *arg)
+{
+	bdaddr_t bdaddr;
+	struct bdaddr_list *entry;
+
+	if (copy_from_user(&bdaddr, arg, sizeof(bdaddr)))
+		return -EFAULT;
+
+	if (bacmp(&bdaddr, BDADDR_ANY) == 0)
+		return -EBADF;
+
+	if (hci_blacklist_lookup(hdev, &bdaddr))
+		return -EEXIST;
+
+	entry = kzalloc(sizeof(struct bdaddr_list), GFP_KERNEL);
+	if (!entry)
+		return -ENOMEM;
+
+	bacpy(&entry->bdaddr, &bdaddr);
+
+	list_add(&entry->list, &hdev->blacklist.list);
+
+	return 0;
+}
+
+int hci_blacklist_clear(struct hci_dev *hdev)
+{
+	struct list_head *p, *n;
+	struct bdaddr_list *blacklist = &hdev->blacklist;
+
+	list_for_each_safe(p, n, &blacklist->list) {
+		struct bdaddr_list *b;
+
+		b = list_entry(p, struct bdaddr_list, list);
+
+		list_del(p);
+		kfree(b);
+	}
+
+	return 0;
+}
+
+static int hci_blacklist_del(struct hci_dev *hdev, void __user *arg)
+{
+	bdaddr_t bdaddr;
+	struct bdaddr_list *entry;
+
+	if (copy_from_user(&bdaddr, arg, sizeof(bdaddr)))
+		return -EFAULT;
+
+	if (bacmp(&bdaddr, BDADDR_ANY) == 0)
+		return hci_blacklist_clear(hdev);
+
+	entry = hci_blacklist_lookup(hdev, &bdaddr);
+	if (!entry)
+		return -ENOENT;
+
+	list_del(&entry->list);
+	kfree(entry);
+
+	return 0;
+}
+
 /* Ioctls that require bound socket */
 static inline int hci_sock_bound_ioctl(struct sock *sk, unsigned int cmd, unsigned long arg)
 {
@@ -194,6 +274,16 @@ static inline int hci_sock_bound_ioctl(struct sock *sk, unsigned int cmd, unsign
 	case HCIGETAUTHINFO:
 		return hci_get_auth_info(hdev, (void __user *) arg);
 
+	case HCIBLOCKADDR:
+		if (!capable(CAP_NET_ADMIN))
+			return -EACCES;
+		return hci_blacklist_add(hdev, (void __user *) arg);
+
+	case HCIUNBLOCKADDR:
+		if (!capable(CAP_NET_ADMIN))
+			return -EACCES;
+		return hci_blacklist_del(hdev, (void __user *) arg);
+
 	default:
 		if (hdev->ioctl)
 			return hdev->ioctl(hdev, cmd, arg);
-- 
cgit v1.2.2


From 32c2ece5eaec296482077dadc3edd5baa7bd1097 Mon Sep 17 00:00:00 2001
From: Johan Hedberg <johan.hedberg@nokia.com>
Date: Tue, 18 May 2010 13:54:49 +0200
Subject: Bluetooth: Add debugfs support for showing the blacklist

This patch adds a debugfs blacklist entry for each HCI device which can
be used to list the current content of the blacklist.

Signed-off-by: Johan Hedberg <johan.hedberg@nokia.com>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 net/bluetooth/hci_sysfs.c | 38 ++++++++++++++++++++++++++++++++++++++
 1 file changed, 38 insertions(+)

(limited to 'net')

diff --git a/net/bluetooth/hci_sysfs.c b/net/bluetooth/hci_sysfs.c
index 463ffa4fe042..ce44c47eeac1 100644
--- a/net/bluetooth/hci_sysfs.c
+++ b/net/bluetooth/hci_sysfs.c
@@ -436,6 +436,41 @@ static const struct file_operations inquiry_cache_fops = {
 	.release	= single_release,
 };
 
+static int blacklist_show(struct seq_file *f, void *p)
+{
+	struct hci_dev *hdev = f->private;
+	struct bdaddr_list *blacklist = &hdev->blacklist;
+	struct list_head *l;
+
+	hci_dev_lock_bh(hdev);
+
+	list_for_each(l, &blacklist->list) {
+		struct bdaddr_list *b;
+		bdaddr_t bdaddr;
+
+		b = list_entry(l, struct bdaddr_list, list);
+
+		baswap(&bdaddr, &b->bdaddr);
+
+		seq_printf(f, "%s\n", batostr(&bdaddr));
+	}
+
+	hci_dev_unlock_bh(hdev);
+
+	return 0;
+}
+
+static int blacklist_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, blacklist_show, inode->i_private);
+}
+
+static const struct file_operations blacklist_fops = {
+	.open		= blacklist_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= single_release,
+};
 int hci_register_sysfs(struct hci_dev *hdev)
 {
 	struct device *dev = &hdev->dev;
@@ -465,6 +500,9 @@ int hci_register_sysfs(struct hci_dev *hdev)
 	debugfs_create_file("inquiry_cache", 0444, hdev->debugfs,
 						hdev, &inquiry_cache_fops);
 
+	debugfs_create_file("blacklist", 0444, hdev->debugfs,
+						hdev, &blacklist_fops);
+
 	return 0;
 }
 
-- 
cgit v1.2.2


From 2d0a03460a8a0c611843500735096ff799aa8510 Mon Sep 17 00:00:00 2001
From: Ron Shaffer <rshaffer@codeaurora.org>
Date: Fri, 28 May 2010 11:53:46 -0400
Subject: Bluetooth: Reassigned copyright to Code Aurora Forum

Qualcomm, Inc. has reassigned rights to Code Aurora Forum. Accordingly,
as files are modified by Code Aurora Forum members, the copyright
statement will be updated.

Signed-off-by: Ron Shaffer <rshaffer@codeaurora.org>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 net/bluetooth/hci_conn.c  | 2 +-
 net/bluetooth/hci_event.c | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c
index 800b6b9fbbae..e9fef83449f8 100644
--- a/net/bluetooth/hci_conn.c
+++ b/net/bluetooth/hci_conn.c
@@ -1,6 +1,6 @@
 /*
    BlueZ - Bluetooth protocol stack for Linux
-   Copyright (C) 2000-2001 Qualcomm Incorporated
+   Copyright (c) 2000-2001, 2010, Code Aurora Forum. All rights reserved.
 
    Written 2000,2001 by Maxim Krasnyansky <maxk@qualcomm.com>
 
diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c
index 43feeef3c498..a96980022305 100644
--- a/net/bluetooth/hci_event.c
+++ b/net/bluetooth/hci_event.c
@@ -1,6 +1,6 @@
 /*
    BlueZ - Bluetooth protocol stack for Linux
-   Copyright (C) 2000-2001 Qualcomm Incorporated
+   Copyright (c) 2000-2001, 2010, Code Aurora Forum. All rights reserved.
 
    Written 2000,2001 by Maxim Krasnyansky <maxk@qualcomm.com>
 
-- 
cgit v1.2.2


From 7fe9b298c98fdfecf3b0efb4c971b7696d091ae9 Mon Sep 17 00:00:00 2001
From: "Gustavo F. Padovan" <padovan@profusion.mobi>
Date: Wed, 12 May 2010 18:32:04 -0300
Subject: Bluetooth: Stop ack_timer if ERTM enters in Local Busy or SREJ_SENT
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The ack_timer is implemation specific, disabling it in such situation
avoids some potencial errors in the ERTM protocol.

Signed-off-by: Gustavo F. Padovan <padovan@profusion.mobi>
Reviewed-by: João Paulo Rechi Vita <jprvita@profusion.mobi>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 net/bluetooth/l2cap.c | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'net')

diff --git a/net/bluetooth/l2cap.c b/net/bluetooth/l2cap.c
index c2fb26d9286c..40cf67c11d20 100644
--- a/net/bluetooth/l2cap.c
+++ b/net/bluetooth/l2cap.c
@@ -3665,6 +3665,8 @@ static int l2cap_push_rx_skb(struct sock *sk, struct sk_buff *skb, u16 control)
 
 	pi->conn_state |= L2CAP_CONN_RNR_SENT;
 
+	del_timer(&pi->ack_timer);
+
 	queue_work(_busy_wq, &pi->busy_work);
 
 	return err;
@@ -3914,6 +3916,8 @@ static inline int l2cap_data_channel_iframe(struct sock *sk, u16 rx_control, str
 		pi->conn_state |= L2CAP_CONN_SEND_PBIT;
 
 		l2cap_send_srejframe(sk, tx_seq);
+
+		del_timer(&pi->ack_timer);
 	}
 	return 0;
 
-- 
cgit v1.2.2


From 2ece3684b4037ad2394de795d67abbe412ab5e2f Mon Sep 17 00:00:00 2001
From: "Gustavo F. Padovan" <padovan@profusion.mobi>
Date: Wed, 16 Jun 2010 17:21:44 -0300
Subject: Bluetooth: Update buffer_seq before retransmit frames

Updating buffer_seq first make us able to ack the last I-frame received.
This is also a requirement of the  Profile Tuning Suite software.

Signed-off-by: Gustavo F. Padovan <padovan@profusion.mobi>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 net/bluetooth/l2cap.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'net')

diff --git a/net/bluetooth/l2cap.c b/net/bluetooth/l2cap.c
index 40cf67c11d20..ecf3e3dcefe3 100644
--- a/net/bluetooth/l2cap.c
+++ b/net/bluetooth/l2cap.c
@@ -3931,6 +3931,10 @@ expected:
 		return 0;
 	}
 
+	err = l2cap_push_rx_skb(sk, skb, rx_control);
+	if (err < 0)
+		return 0;
+
 	if (rx_control & L2CAP_CTRL_FINAL) {
 		if (pi->conn_state & L2CAP_CONN_REJ_ACT)
 			pi->conn_state &= ~L2CAP_CONN_REJ_ACT;
@@ -3938,10 +3942,6 @@ expected:
 			l2cap_retransmit_frames(sk);
 	}
 
-	err = l2cap_push_rx_skb(sk, skb, rx_control);
-	if (err < 0)
-		return 0;
-
 	__mod_ack_timer();
 
 	pi->num_acked = (pi->num_acked + 1) % num_to_ack;
-- 
cgit v1.2.2


From 3cb123d1c03a0510d3c325bfaa971ce4df1e050d Mon Sep 17 00:00:00 2001
From: "Gustavo F. Padovan" <padovan@profusion.mobi>
Date: Sat, 29 May 2010 02:24:35 -0300
Subject: Bluetooth: Fix handle of received P-bit
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

ERTM spec mandates that after receive a P-bit we shall send an F-bit in
response. This patch fixes this for retransmitted packets, on
retransmitting we were missing to check for a pending F-bit to be sent.
Also we were missing some annotation to send a F-bit.

Signed-off-by: Gustavo F. Padovan <padovan@profusion.mobi>
Reviewed-by: João Paulo Rechi Vita <jprvita@profusion.mobi>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 net/bluetooth/l2cap.c | 14 ++++++++++++--
 1 file changed, 12 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/bluetooth/l2cap.c b/net/bluetooth/l2cap.c
index ecf3e3dcefe3..f0441b0b0335 100644
--- a/net/bluetooth/l2cap.c
+++ b/net/bluetooth/l2cap.c
@@ -1430,10 +1430,15 @@ static void l2cap_retransmit_one_frame(struct sock *sk, u8 tx_seq)
 	tx_skb = skb_clone(skb, GFP_ATOMIC);
 	bt_cb(skb)->retries++;
 	control = get_unaligned_le16(tx_skb->data + L2CAP_HDR_SIZE);
-	control &= L2CAP_CTRL_SAR;
+
+	if (pi->conn_state & L2CAP_CONN_SEND_FBIT) {
+		control |= L2CAP_CTRL_FINAL;
+		pi->conn_state &= ~L2CAP_CONN_SEND_FBIT;
+	}
 
 	control |= (pi->buffer_seq << L2CAP_CTRL_REQSEQ_SHIFT)
 			| (tx_seq << L2CAP_CTRL_TXSEQ_SHIFT);
+
 	put_unaligned_le16(control, tx_skb->data + L2CAP_HDR_SIZE);
 
 	if (pi->fcs == L2CAP_FCS_CRC16) {
@@ -3385,7 +3390,6 @@ static inline void l2cap_send_i_or_rr_or_rnr(struct sock *sk)
 	u16 control = 0;
 
 	pi->frames_sent = 0;
-	pi->conn_state |= L2CAP_CONN_SEND_FBIT;
 
 	control |= pi->buffer_seq << L2CAP_CTRL_REQSEQ_SHIFT;
 
@@ -3963,6 +3967,7 @@ static inline void l2cap_data_channel_rrframe(struct sock *sk, u16 rx_control)
 	l2cap_drop_acked_frames(sk);
 
 	if (rx_control & L2CAP_CTRL_POLL) {
+		pi->conn_state |= L2CAP_CONN_SEND_FBIT;
 		if (pi->conn_state & L2CAP_CONN_SREJ_SENT) {
 			if ((pi->conn_state & L2CAP_CONN_REMOTE_BUSY) &&
 					(pi->unacked_frames > 0))
@@ -4030,6 +4035,8 @@ static inline void l2cap_data_channel_srejframe(struct sock *sk, u16 rx_control)
 	if (rx_control & L2CAP_CTRL_POLL) {
 		pi->expected_ack_seq = tx_seq;
 		l2cap_drop_acked_frames(sk);
+
+		pi->conn_state |= L2CAP_CONN_SEND_FBIT;
 		l2cap_retransmit_one_frame(sk, tx_seq);
 
 		spin_lock_bh(&pi->send_lock);
@@ -4064,6 +4071,9 @@ static inline void l2cap_data_channel_rnrframe(struct sock *sk, u16 rx_control)
 	pi->expected_ack_seq = tx_seq;
 	l2cap_drop_acked_frames(sk);
 
+	if (rx_control & L2CAP_CTRL_POLL)
+		pi->conn_state |= L2CAP_CONN_SEND_FBIT;
+
 	if (!(pi->conn_state & L2CAP_CONN_SREJ_SENT)) {
 		del_timer(&pi->retrans_timer);
 		if (rx_control & L2CAP_CTRL_POLL)
-- 
cgit v1.2.2


From 45d65c46acc39945219eeb3752367ee80ed82799 Mon Sep 17 00:00:00 2001
From: "Gustavo F. Padovan" <padovan@profusion.mobi>
Date: Mon, 7 Jun 2010 19:21:30 -0300
Subject: Bluetooth: Check the tx_window size on setsockopt
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

We have to check if the proposed tx_window value is not greater that
maximum value supported.

Signed-off-by: Gustavo F. Padovan <padovan@profusion.mobi>
Reviewed-by: João Paulo Rechi Vita <jprvita@profusion.mobi>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 net/bluetooth/l2cap.c | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'net')

diff --git a/net/bluetooth/l2cap.c b/net/bluetooth/l2cap.c
index f0441b0b0335..6094870d5d2a 100644
--- a/net/bluetooth/l2cap.c
+++ b/net/bluetooth/l2cap.c
@@ -1912,6 +1912,11 @@ static int l2cap_sock_setsockopt_old(struct socket *sock, int optname, char __us
 			break;
 		}
 
+		if (opts.txwin_size > L2CAP_DEFAULT_TX_WINDOW) {
+			err = -EINVAL;
+			break;
+		}
+
 		l2cap_pi(sk)->mode = opts.mode;
 		switch (l2cap_pi(sk)->mode) {
 		case L2CAP_MODE_BASIC:
-- 
cgit v1.2.2


From 260000896750690b774d4343294ae5cbff1423e5 Mon Sep 17 00:00:00 2001
From: "Gustavo F. Padovan" <padovan@profusion.mobi>
Date: Tue, 11 May 2010 22:02:00 -0300
Subject: Bluetooth: Check packet FCS earlier

This way, if FCS is enabled and the packet is corrupted, we just drop it
without read it len, which could be corrupted.

Signed-off-by: Gustavo F. Padovan <padovan@profusion.mobi>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 net/bluetooth/l2cap.c | 22 +++++++++++-----------
 1 file changed, 11 insertions(+), 11 deletions(-)

(limited to 'net')

diff --git a/net/bluetooth/l2cap.c b/net/bluetooth/l2cap.c
index 6094870d5d2a..8c9f577dd46d 100644
--- a/net/bluetooth/l2cap.c
+++ b/net/bluetooth/l2cap.c
@@ -4166,25 +4166,25 @@ static inline int l2cap_data_channel(struct l2cap_conn *conn, u16 cid, struct sk
 		skb_pull(skb, 2);
 		len = skb->len;
 
+		/*
+		 * We can just drop the corrupted I-frame here.
+		 * Receiver will miss it and start proper recovery
+		 * procedures and ask retransmission.
+		 */
+		if (l2cap_check_fcs(pi, skb))
+			goto drop;
+
 		if (__is_sar_start(control) && __is_iframe(control))
 			len -= 2;
 
 		if (pi->fcs == L2CAP_FCS_CRC16)
 			len -= 2;
 
-		/*
-		 * We can just drop the corrupted I-frame here.
-		 * Receiver will miss it and start proper recovery
-		 * procedures and ask retransmission.
-		 */
 		if (len > pi->mps) {
 			l2cap_send_disconn_req(pi->conn, sk);
 			goto drop;
 		}
 
-		if (l2cap_check_fcs(pi, skb))
-			goto drop;
-
 		req_seq = __get_reqseq(control);
 		req_seq_offset = (req_seq - pi->expected_ack_seq) % 64;
 		if (req_seq_offset < 0)
@@ -4224,6 +4224,9 @@ static inline int l2cap_data_channel(struct l2cap_conn *conn, u16 cid, struct sk
 		skb_pull(skb, 2);
 		len = skb->len;
 
+		if (l2cap_check_fcs(pi, skb))
+			goto drop;
+
 		if (__is_sar_start(control))
 			len -= 2;
 
@@ -4233,9 +4236,6 @@ static inline int l2cap_data_channel(struct l2cap_conn *conn, u16 cid, struct sk
 		if (len > pi->mps || len < 0 || __is_sframe(control))
 			goto drop;
 
-		if (l2cap_check_fcs(pi, skb))
-			goto drop;
-
 		tx_seq = __get_txseq(control);
 
 		if (pi->expected_tx_seq == tx_seq)
-- 
cgit v1.2.2


From 4ea727ef9d507413f15da0de401d8a50b125649a Mon Sep 17 00:00:00 2001
From: "Gustavo F. Padovan" <padovan@profusion.mobi>
Date: Thu, 3 Jun 2010 16:34:20 -0300
Subject: Bluetooth: Fix missing retransmission action with RR(P=1)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The Bluetooth SIG Profile Tuning Suite Software uses the CSA1 spec
to run the L2CAP tests. The new 3.0 spec has a missing
Retransmit-I-Frames action when the Remote side is Busy.
We still start the retransmission timer if Remote is Busy and unacked
frames > 0. We do everything we did before this change plus the
Retransmission of I-frames.

Signed-off-by: Gustavo F. Padovan <padovan@profusion.mobi>
Reviewed-by: João Paulo Rechi Vita <jprvita@profusion.mobi>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 net/bluetooth/l2cap.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/bluetooth/l2cap.c b/net/bluetooth/l2cap.c
index 8c9f577dd46d..7c2273732909 100644
--- a/net/bluetooth/l2cap.c
+++ b/net/bluetooth/l2cap.c
@@ -3405,8 +3405,8 @@ static inline void l2cap_send_i_or_rr_or_rnr(struct sock *sk)
 		pi->conn_state &= ~L2CAP_CONN_SEND_FBIT;
 	}
 
-	if (pi->conn_state & L2CAP_CONN_REMOTE_BUSY && pi->unacked_frames > 0)
-		__mod_retrans_timer();
+	if (pi->conn_state & L2CAP_CONN_REMOTE_BUSY)
+		l2cap_retransmit_frames(sk);
 
 	spin_lock_bh(&pi->send_lock);
 	l2cap_ertm_send(sk);
-- 
cgit v1.2.2


From 9b108fc0cf4e79c34a7d5626f5c2c4c529ef6d3f Mon Sep 17 00:00:00 2001
From: "Gustavo F. Padovan" <padovan@profusion.mobi>
Date: Thu, 20 May 2010 16:21:53 -0300
Subject: Bluetooth: Fix ERTM error reporting to the userspace
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

If any error occurs during transfers we have to tell userspace that
something wrong happened.

Signed-off-by: Gustavo F. Padovan <padovan@profusion.mobi>
Reviewed-by: João Paulo Rechi Vita <jprvita@profusion.mobi>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 net/bluetooth/l2cap.c | 39 ++++++++++++++++++++++-----------------
 1 file changed, 22 insertions(+), 17 deletions(-)

(limited to 'net')

diff --git a/net/bluetooth/l2cap.c b/net/bluetooth/l2cap.c
index 7c2273732909..1e39d72b80af 100644
--- a/net/bluetooth/l2cap.c
+++ b/net/bluetooth/l2cap.c
@@ -456,7 +456,7 @@ static void l2cap_do_start(struct sock *sk)
 	}
 }
 
-static void l2cap_send_disconn_req(struct l2cap_conn *conn, struct sock *sk)
+static void l2cap_send_disconn_req(struct l2cap_conn *conn, struct sock *sk, int err)
 {
 	struct l2cap_disconn_req req;
 
@@ -477,6 +477,7 @@ static void l2cap_send_disconn_req(struct l2cap_conn *conn, struct sock *sk)
 			L2CAP_DISCONN_REQ, sizeof(req), &req);
 
 	sk->sk_state = BT_DISCONN;
+	sk->sk_err = err;
 }
 
 /* ---- L2CAP connections ---- */
@@ -770,7 +771,7 @@ static void __l2cap_sock_close(struct sock *sk, int reason)
 			struct l2cap_conn *conn = l2cap_pi(sk)->conn;
 
 			l2cap_sock_set_timer(sk, sk->sk_sndtimeo);
-			l2cap_send_disconn_req(conn, sk);
+			l2cap_send_disconn_req(conn, sk, reason);
 		} else
 			l2cap_chan_del(sk, reason);
 		break;
@@ -1315,7 +1316,7 @@ static void l2cap_monitor_timeout(unsigned long arg)
 
 	bh_lock_sock(sk);
 	if (l2cap_pi(sk)->retry_count >= l2cap_pi(sk)->remote_max_tx) {
-		l2cap_send_disconn_req(l2cap_pi(sk)->conn, sk);
+		l2cap_send_disconn_req(l2cap_pi(sk)->conn, sk, ECONNABORTED);
 		bh_unlock_sock(sk);
 		return;
 	}
@@ -1423,7 +1424,7 @@ static void l2cap_retransmit_one_frame(struct sock *sk, u8 tx_seq)
 
 	if (pi->remote_max_tx &&
 			bt_cb(skb)->retries == pi->remote_max_tx) {
-		l2cap_send_disconn_req(pi->conn, sk);
+		l2cap_send_disconn_req(pi->conn, sk, ECONNABORTED);
 		return;
 	}
 
@@ -1463,7 +1464,7 @@ static int l2cap_ertm_send(struct sock *sk)
 
 		if (pi->remote_max_tx &&
 				bt_cb(skb)->retries == pi->remote_max_tx) {
-			l2cap_send_disconn_req(pi->conn, sk);
+			l2cap_send_disconn_req(pi->conn, sk, ECONNABORTED);
 			break;
 		}
 
@@ -2191,6 +2192,10 @@ static int l2cap_sock_shutdown(struct socket *sock, int how)
 			err = bt_sock_wait_state(sk, BT_CLOSED,
 							sk->sk_lingertime);
 	}
+
+	if (!err && sk->sk_err)
+		err = -sk->sk_err;
+
 	release_sock(sk);
 	return err;
 }
@@ -2462,7 +2467,7 @@ static int l2cap_build_conf_req(struct sock *sk, void *data)
 	case L2CAP_MODE_ERTM:
 		pi->conf_state |= L2CAP_CONF_STATE2_DEVICE;
 		if (!l2cap_mode_supported(pi->mode, pi->conn->feat_mask))
-			l2cap_send_disconn_req(pi->conn, sk);
+			l2cap_send_disconn_req(pi->conn, sk, ECONNRESET);
 		break;
 	default:
 		pi->mode = l2cap_select_mode(rfc.mode, pi->conn->feat_mask);
@@ -3030,7 +3035,7 @@ static inline int l2cap_config_req(struct l2cap_conn *conn, struct l2cap_cmd_hdr
 	/* Complete config. */
 	len = l2cap_parse_conf_req(sk, rsp);
 	if (len < 0) {
-		l2cap_send_disconn_req(conn, sk);
+		l2cap_send_disconn_req(conn, sk, ECONNRESET);
 		goto unlock;
 	}
 
@@ -3100,7 +3105,7 @@ static inline int l2cap_config_rsp(struct l2cap_conn *conn, struct l2cap_cmd_hdr
 			char req[64];
 
 			if (len > sizeof(req) - sizeof(struct l2cap_conf_req)) {
-				l2cap_send_disconn_req(conn, sk);
+				l2cap_send_disconn_req(conn, sk, ECONNRESET);
 				goto done;
 			}
 
@@ -3109,7 +3114,7 @@ static inline int l2cap_config_rsp(struct l2cap_conn *conn, struct l2cap_cmd_hdr
 			len = l2cap_parse_conf_rsp(sk, rsp->data,
 							len, req, &result);
 			if (len < 0) {
-				l2cap_send_disconn_req(conn, sk);
+				l2cap_send_disconn_req(conn, sk, ECONNRESET);
 				goto done;
 			}
 
@@ -3124,7 +3129,7 @@ static inline int l2cap_config_rsp(struct l2cap_conn *conn, struct l2cap_cmd_hdr
 	default:
 		sk->sk_err = ECONNRESET;
 		l2cap_sock_set_timer(sk, HZ * 5);
-		l2cap_send_disconn_req(conn, sk);
+		l2cap_send_disconn_req(conn, sk, ECONNRESET);
 		goto done;
 	}
 
@@ -3565,7 +3570,7 @@ drop:
 	pi->sdu = NULL;
 
 disconnect:
-	l2cap_send_disconn_req(pi->conn, sk);
+	l2cap_send_disconn_req(pi->conn, sk, ECONNRESET);
 	kfree_skb(skb);
 	return 0;
 }
@@ -3588,7 +3593,7 @@ static void l2cap_busy_work(struct work_struct *work)
 
 		if (n_tries++ > L2CAP_LOCAL_BUSY_TRIES) {
 			err = -EBUSY;
-			l2cap_send_disconn_req(pi->conn, sk);
+			l2cap_send_disconn_req(pi->conn, sk, EBUSY);
 			goto done;
 		}
 
@@ -3864,7 +3869,7 @@ static inline int l2cap_data_channel_iframe(struct sock *sk, u16 rx_control, str
 
 	/* invalid tx_seq */
 	if (tx_seq_offset >= pi->tx_win) {
-		l2cap_send_disconn_req(pi->conn, sk);
+		l2cap_send_disconn_req(pi->conn, sk, ECONNRESET);
 		goto drop;
 	}
 
@@ -4181,7 +4186,7 @@ static inline int l2cap_data_channel(struct l2cap_conn *conn, u16 cid, struct sk
 			len -= 2;
 
 		if (len > pi->mps) {
-			l2cap_send_disconn_req(pi->conn, sk);
+			l2cap_send_disconn_req(pi->conn, sk, ECONNRESET);
 			goto drop;
 		}
 
@@ -4197,20 +4202,20 @@ static inline int l2cap_data_channel(struct l2cap_conn *conn, u16 cid, struct sk
 
 		/* check for invalid req-seq */
 		if (req_seq_offset > next_tx_seq_offset) {
-			l2cap_send_disconn_req(pi->conn, sk);
+			l2cap_send_disconn_req(pi->conn, sk, ECONNRESET);
 			goto drop;
 		}
 
 		if (__is_iframe(control)) {
 			if (len < 0) {
-				l2cap_send_disconn_req(pi->conn, sk);
+				l2cap_send_disconn_req(pi->conn, sk, ECONNRESET);
 				goto drop;
 			}
 
 			l2cap_data_channel_iframe(sk, control, skb);
 		} else {
 			if (len != 0) {
-				l2cap_send_disconn_req(pi->conn, sk);
+				l2cap_send_disconn_req(pi->conn, sk, ECONNRESET);
 				goto drop;
 			}
 
-- 
cgit v1.2.2


From 0e98958d4f827f814444757e0376546b462dfe6f Mon Sep 17 00:00:00 2001
From: "Gustavo F. Padovan" <padovan@profusion.mobi>
Date: Mon, 19 Apr 2010 14:45:38 -0300
Subject: Bluetooth: Add debug output to ERTM code
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Use the dynamic debug to output info about ERTM protocol stuff.
The following script can be used to enable debug for ERTM:

DEBUGFS="/sys/kernel/debug/dynamic_debug/control"

echo -n 'func l2cap_send_disconn_req +p' > $DEBUGFS
echo -n 'func l2cap_monitor_timeout +p' > $DEBUGFS
echo -n 'func l2cap_retrans_timeout +p' > $DEBUGFS
echo -n 'func l2cap_busy_work  +p' > $DEBUGFS
echo -n 'func l2cap_push_rx_skb +p' > $DEBUGFS
echo -n 'func l2cap_data_channel_iframe +p' > $DEBUGFS
echo -n 'func l2cap_data_channel_rrframe +p' > $DEBUGFS
echo -n 'func l2cap_data_channel_rejframe +p' > $DEBUGFS
echo -n 'func l2cap_data_channel_srejframe +p' > $DEBUGFS
echo -n 'func l2cap_data_channel_rnrframe +p' > $DEBUGFS

Signed-off-by: Gustavo F. Padovan <padovan@profusion.mobi>
Reviewed-by: João Paulo Rechi Vita <jprvita@profusion.mobi>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 net/bluetooth/l2cap.c | 23 ++++++++++++++++++++++-
 1 file changed, 22 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/bluetooth/l2cap.c b/net/bluetooth/l2cap.c
index 1e39d72b80af..6e8a0512a9c9 100644
--- a/net/bluetooth/l2cap.c
+++ b/net/bluetooth/l2cap.c
@@ -1314,6 +1314,8 @@ static void l2cap_monitor_timeout(unsigned long arg)
 {
 	struct sock *sk = (void *) arg;
 
+	BT_DBG("sk %p", sk);
+
 	bh_lock_sock(sk);
 	if (l2cap_pi(sk)->retry_count >= l2cap_pi(sk)->remote_max_tx) {
 		l2cap_send_disconn_req(l2cap_pi(sk)->conn, sk, ECONNABORTED);
@@ -1332,6 +1334,8 @@ static void l2cap_retrans_timeout(unsigned long arg)
 {
 	struct sock *sk = (void *) arg;
 
+	BT_DBG("sk %p", sk);
+
 	bh_lock_sock(sk);
 	l2cap_pi(sk)->retry_count = 1;
 	__mod_monitor_timer();
@@ -3645,6 +3649,8 @@ done:
 	pi->conn_state &= ~L2CAP_CONN_LOCAL_BUSY;
 	pi->conn_state &= ~L2CAP_CONN_RNR_SENT;
 
+	BT_DBG("sk %p, Exit local busy", sk);
+
 	set_current_state(TASK_RUNNING);
 	remove_wait_queue(sk_sleep(sk), &wait);
 
@@ -3669,6 +3675,8 @@ static int l2cap_push_rx_skb(struct sock *sk, struct sk_buff *skb, u16 control)
 	}
 
 	/* Busy Condition */
+	BT_DBG("sk %p, Enter local busy", sk);
+
 	pi->conn_state |= L2CAP_CONN_LOCAL_BUSY;
 	bt_cb(skb)->sar = control >> L2CAP_CTRL_SAR_SHIFT;
 	__skb_queue_tail(BUSY_QUEUE(sk), skb);
@@ -3847,7 +3855,8 @@ static inline int l2cap_data_channel_iframe(struct sock *sk, u16 rx_control, str
 	int num_to_ack = (pi->tx_win/6) + 1;
 	int err = 0;
 
-	BT_DBG("sk %p rx_control 0x%4.4x len %d", sk, rx_control, skb->len);
+	BT_DBG("sk %p len %d tx_seq %d rx_control 0x%4.4x", sk, skb->len, tx_seq,
+								rx_control);
 
 	if (L2CAP_CTRL_FINAL & rx_control &&
 			l2cap_pi(sk)->conn_state & L2CAP_CONN_WAIT_F) {
@@ -3892,6 +3901,7 @@ static inline int l2cap_data_channel_iframe(struct sock *sk, u16 rx_control, str
 				pi->buffer_seq = pi->buffer_seq_srej;
 				pi->conn_state &= ~L2CAP_CONN_SREJ_SENT;
 				l2cap_send_ack(pi);
+				BT_DBG("sk %p, Exit SREJ_SENT", sk);
 			}
 		} else {
 			struct srej_list *l;
@@ -3920,6 +3930,8 @@ static inline int l2cap_data_channel_iframe(struct sock *sk, u16 rx_control, str
 
 		pi->conn_state |= L2CAP_CONN_SREJ_SENT;
 
+		BT_DBG("sk %p, Enter SREJ", sk);
+
 		INIT_LIST_HEAD(SREJ_LIST(sk));
 		pi->buffer_seq_srej = pi->buffer_seq;
 
@@ -3973,6 +3985,9 @@ static inline void l2cap_data_channel_rrframe(struct sock *sk, u16 rx_control)
 {
 	struct l2cap_pinfo *pi = l2cap_pi(sk);
 
+	BT_DBG("sk %p, req_seq %d ctrl 0x%4.4x", sk, __get_reqseq(rx_control),
+						rx_control);
+
 	pi->expected_ack_seq = __get_reqseq(rx_control);
 	l2cap_drop_acked_frames(sk);
 
@@ -4018,6 +4033,8 @@ static inline void l2cap_data_channel_rejframe(struct sock *sk, u16 rx_control)
 	struct l2cap_pinfo *pi = l2cap_pi(sk);
 	u8 tx_seq = __get_reqseq(rx_control);
 
+	BT_DBG("sk %p, req_seq %d ctrl 0x%4.4x", sk, tx_seq, rx_control);
+
 	pi->conn_state &= ~L2CAP_CONN_REMOTE_BUSY;
 
 	pi->expected_ack_seq = tx_seq;
@@ -4040,6 +4057,8 @@ static inline void l2cap_data_channel_srejframe(struct sock *sk, u16 rx_control)
 	struct l2cap_pinfo *pi = l2cap_pi(sk);
 	u8 tx_seq = __get_reqseq(rx_control);
 
+	BT_DBG("sk %p, req_seq %d ctrl 0x%4.4x", sk, tx_seq, rx_control);
+
 	pi->conn_state &= ~L2CAP_CONN_REMOTE_BUSY;
 
 	if (rx_control & L2CAP_CTRL_POLL) {
@@ -4077,6 +4096,8 @@ static inline void l2cap_data_channel_rnrframe(struct sock *sk, u16 rx_control)
 	struct l2cap_pinfo *pi = l2cap_pi(sk);
 	u8 tx_seq = __get_reqseq(rx_control);
 
+	BT_DBG("sk %p, req_seq %d ctrl 0x%4.4x", sk, tx_seq, rx_control);
+
 	pi->conn_state |= L2CAP_CONN_REMOTE_BUSY;
 	pi->expected_ack_seq = tx_seq;
 	l2cap_drop_acked_frames(sk);
-- 
cgit v1.2.2


From 64988868637304330f7df20d08b965592312c531 Mon Sep 17 00:00:00 2001
From: "Gustavo F. Padovan" <padovan@profusion.mobi>
Date: Mon, 10 May 2010 14:54:14 -0300
Subject: Bluetooth: Tweaks to l2cap_send_i_or_rr_or_rnr() flow
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

l2cap_send_sframe() already set the F-bit if we set L2CAP_CONN_SEND_FBIT
and unset L2CAP_CONN_SEND_FBIT after send the F-bit.

Signed-off-by: Gustavo F. Padovan <padovan@profusion.mobi>
Reviewed-by: João Paulo Rechi Vita <jprvita@profusion.mobi>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 net/bluetooth/l2cap.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/bluetooth/l2cap.c b/net/bluetooth/l2cap.c
index 6e8a0512a9c9..65c6a989449a 100644
--- a/net/bluetooth/l2cap.c
+++ b/net/bluetooth/l2cap.c
@@ -3408,10 +3408,9 @@ static inline void l2cap_send_i_or_rr_or_rnr(struct sock *sk)
 	control |= pi->buffer_seq << L2CAP_CTRL_REQSEQ_SHIFT;
 
 	if (pi->conn_state & L2CAP_CONN_LOCAL_BUSY) {
-		control |= L2CAP_SUPER_RCV_NOT_READY | L2CAP_CTRL_FINAL;
+		control |= L2CAP_SUPER_RCV_NOT_READY;
 		l2cap_send_sframe(pi, control);
 		pi->conn_state |= L2CAP_CONN_RNR_SENT;
-		pi->conn_state &= ~L2CAP_CONN_SEND_FBIT;
 	}
 
 	if (pi->conn_state & L2CAP_CONN_REMOTE_BUSY)
-- 
cgit v1.2.2


From 85eb53c6f719523dde9e0658823dffd2664d4d1c Mon Sep 17 00:00:00 2001
From: "Gustavo F. Padovan" <padovan@profusion.mobi>
Date: Thu, 3 Jun 2010 18:43:28 -0300
Subject: Bluetooth: Change the way we set ERTM mode as mandatory
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

If the socket type is SOCK_STREAM we set Enhanced Retransmisson Mode or
Streaming Mode as mandatory. That means that we will close the channel
if the other side doesn't support or request the the mandatory mode.
Basic mode can't be set as mandatory.

Signed-off-by: Gustavo F. Padovan <padovan@profusion.mobi>
Reviewed-by: João Paulo Rechi Vita <jprvita@profusion.mobi>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 net/bluetooth/l2cap.c | 22 ++++++++++++++++++----
 1 file changed, 18 insertions(+), 4 deletions(-)

(limited to 'net')

diff --git a/net/bluetooth/l2cap.c b/net/bluetooth/l2cap.c
index 65c6a989449a..e78a7504d09c 100644
--- a/net/bluetooth/l2cap.c
+++ b/net/bluetooth/l2cap.c
@@ -831,6 +831,7 @@ static void l2cap_sock_init(struct sock *sk, struct sock *parent)
 
 		pi->imtu = l2cap_pi(parent)->imtu;
 		pi->omtu = l2cap_pi(parent)->omtu;
+		pi->conf_state = l2cap_pi(parent)->conf_state;
 		pi->mode = l2cap_pi(parent)->mode;
 		pi->fcs  = l2cap_pi(parent)->fcs;
 		pi->max_tx = l2cap_pi(parent)->max_tx;
@@ -841,10 +842,12 @@ static void l2cap_sock_init(struct sock *sk, struct sock *parent)
 	} else {
 		pi->imtu = L2CAP_DEFAULT_MTU;
 		pi->omtu = 0;
-		if (enable_ertm && sk->sk_type == SOCK_STREAM)
+		if (enable_ertm && sk->sk_type == SOCK_STREAM) {
 			pi->mode = L2CAP_MODE_ERTM;
-		else
+			pi->conf_state |= L2CAP_CONF_STATE2_DEVICE;
+		} else {
 			pi->mode = L2CAP_MODE_BASIC;
+		}
 		pi->max_tx = L2CAP_DEFAULT_MAX_TX;
 		pi->fcs  = L2CAP_FCS_CRC16;
 		pi->tx_win = L2CAP_DEFAULT_TX_WINDOW;
@@ -1925,6 +1928,7 @@ static int l2cap_sock_setsockopt_old(struct socket *sock, int optname, char __us
 		l2cap_pi(sk)->mode = opts.mode;
 		switch (l2cap_pi(sk)->mode) {
 		case L2CAP_MODE_BASIC:
+			l2cap_pi(sk)->conf_state &= ~L2CAP_CONF_STATE2_DEVICE;
 			break;
 		case L2CAP_MODE_ERTM:
 		case L2CAP_MODE_STREAMING:
@@ -2469,7 +2473,12 @@ static int l2cap_build_conf_req(struct sock *sk, void *data)
 	switch (pi->mode) {
 	case L2CAP_MODE_STREAMING:
 	case L2CAP_MODE_ERTM:
-		pi->conf_state |= L2CAP_CONF_STATE2_DEVICE;
+		if (!(pi->conf_state & L2CAP_CONF_STATE2_DEVICE)) {
+			pi->mode = l2cap_select_mode(rfc.mode,
+					pi->conn->feat_mask);
+			break;
+		}
+
 		if (!l2cap_mode_supported(pi->mode, pi->conn->feat_mask))
 			l2cap_send_disconn_req(pi->conn, sk, ECONNRESET);
 		break;
@@ -2602,7 +2611,12 @@ static int l2cap_parse_conf_req(struct sock *sk, void *data)
 	switch (pi->mode) {
 	case L2CAP_MODE_STREAMING:
 	case L2CAP_MODE_ERTM:
-		pi->conf_state |= L2CAP_CONF_STATE2_DEVICE;
+		if (!(pi->conf_state & L2CAP_CONF_STATE2_DEVICE)) {
+			pi->mode = l2cap_select_mode(rfc.mode,
+					pi->conn->feat_mask);
+			break;
+		}
+
 		if (!l2cap_mode_supported(pi->mode, pi->conn->feat_mask))
 			return -ECONNREFUSED;
 		break;
-- 
cgit v1.2.2


From 742e519b0db4a470008118b48f0baea6126e2122 Mon Sep 17 00:00:00 2001
From: "Gustavo F. Padovan" <padovan@profusion.mobi>
Date: Tue, 8 Jun 2010 19:09:48 -0300
Subject: Bluetooth: Disconnect the channel if we don't want the proposed mode

If the device is a STATE 2 then it should disconnect the channel if the
remote device propose a mode different from its mandatory mode.

Signed-off-by: Gustavo F. Padovan <padovan@profusion.mobi>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 net/bluetooth/l2cap.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/bluetooth/l2cap.c b/net/bluetooth/l2cap.c
index e78a7504d09c..03e9125dd74f 100644
--- a/net/bluetooth/l2cap.c
+++ b/net/bluetooth/l2cap.c
@@ -2617,8 +2617,9 @@ static int l2cap_parse_conf_req(struct sock *sk, void *data)
 			break;
 		}
 
-		if (!l2cap_mode_supported(pi->mode, pi->conn->feat_mask))
+		if (pi->mode != rfc.mode)
 			return -ECONNREFUSED;
+
 		break;
 	default:
 		pi->mode = l2cap_select_mode(rfc.mode, pi->conn->feat_mask);
-- 
cgit v1.2.2


From ae12d52efd492ee6634c34322302ac754ff8cde8 Mon Sep 17 00:00:00 2001
From: "Gustavo F. Padovan" <padovan@profusion.mobi>
Date: Tue, 8 Jun 2010 19:29:00 -0300
Subject: Bluetooth: Prefer Basic Mode on receipt of ConfigReq

If we choose to use Basic Mode then we have to refuse the received mode
and propose Basic Mode again.

Signed-off-by: Gustavo F. Padovan <padovan@profusion.mobi>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 net/bluetooth/l2cap.c | 3 ---
 1 file changed, 3 deletions(-)

(limited to 'net')

diff --git a/net/bluetooth/l2cap.c b/net/bluetooth/l2cap.c
index 03e9125dd74f..ceed5df43fee 100644
--- a/net/bluetooth/l2cap.c
+++ b/net/bluetooth/l2cap.c
@@ -2621,9 +2621,6 @@ static int l2cap_parse_conf_req(struct sock *sk, void *data)
 			return -ECONNREFUSED;
 
 		break;
-	default:
-		pi->mode = l2cap_select_mode(rfc.mode, pi->conn->feat_mask);
-		break;
 	}
 
 done:
-- 
cgit v1.2.2


From 625477523b4e656fbcc5ec2a8ca7a1beb39b1caf Mon Sep 17 00:00:00 2001
From: "Gustavo F. Padovan" <padovan@profusion.mobi>
Date: Tue, 8 Jun 2010 20:05:31 -0300
Subject: Bluetooth: Actively send request for Basic Mode

The Profile Tuning Suite requires that we send a RFC containing the
Basic Mode configuration when requesting Basic Mode.

Signed-off-by: Gustavo F. Padovan <padovan@profusion.mobi>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 net/bluetooth/l2cap.c | 17 +++++++++++------
 1 file changed, 11 insertions(+), 6 deletions(-)

(limited to 'net')

diff --git a/net/bluetooth/l2cap.c b/net/bluetooth/l2cap.c
index ceed5df43fee..ca9bab225777 100644
--- a/net/bluetooth/l2cap.c
+++ b/net/bluetooth/l2cap.c
@@ -2492,6 +2492,14 @@ done:
 	case L2CAP_MODE_BASIC:
 		if (pi->imtu != L2CAP_DEFAULT_MTU)
 			l2cap_add_conf_opt(&ptr, L2CAP_CONF_MTU, 2, pi->imtu);
+
+		rfc.mode            = L2CAP_MODE_BASIC;
+		rfc.txwin_size      = 0;
+		rfc.max_transmit    = 0;
+		rfc.retrans_timeout = 0;
+		rfc.monitor_timeout = 0;
+		rfc.max_pdu_size    = 0;
+
 		break;
 
 	case L2CAP_MODE_ERTM:
@@ -2504,9 +2512,6 @@ done:
 		if (L2CAP_DEFAULT_MAX_PDU_SIZE > pi->conn->mtu - 10)
 			rfc.max_pdu_size = cpu_to_le16(pi->conn->mtu - 10);
 
-		l2cap_add_conf_opt(&ptr, L2CAP_CONF_RFC,
-					sizeof(rfc), (unsigned long) &rfc);
-
 		if (!(pi->conn->feat_mask & L2CAP_FEAT_FCS))
 			break;
 
@@ -2527,9 +2532,6 @@ done:
 		if (L2CAP_DEFAULT_MAX_PDU_SIZE > pi->conn->mtu - 10)
 			rfc.max_pdu_size = cpu_to_le16(pi->conn->mtu - 10);
 
-		l2cap_add_conf_opt(&ptr, L2CAP_CONF_RFC,
-					sizeof(rfc), (unsigned long) &rfc);
-
 		if (!(pi->conn->feat_mask & L2CAP_FEAT_FCS))
 			break;
 
@@ -2541,6 +2543,9 @@ done:
 		break;
 	}
 
+	l2cap_add_conf_opt(&ptr, L2CAP_CONF_RFC, sizeof(rfc),
+						(unsigned long) &rfc);
+
 	/* FIXME: Need actual value of the flush timeout */
 	//if (flush_to != L2CAP_DEFAULT_FLUSH_TO)
 	//   l2cap_add_conf_opt(&ptr, L2CAP_CONF_FLUSH_TO, 2, pi->flush_to);
-- 
cgit v1.2.2


From 6c2ea7a8f5fea67fa20e5825401b8fce5a78dbf6 Mon Sep 17 00:00:00 2001
From: "Gustavo F. Padovan" <padovan@profusion.mobi>
Date: Tue, 8 Jun 2010 20:08:49 -0300
Subject: Bluetooth: Refuse ConfigRsp with different mode

If our mode is Basic Mode we have to refuse any ConfigRsp that proposes
a different mode.

Signed-off-by: Gustavo F. Padovan <padovan@profusion.mobi>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 net/bluetooth/l2cap.c | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/bluetooth/l2cap.c b/net/bluetooth/l2cap.c
index ca9bab225777..c5904082392c 100644
--- a/net/bluetooth/l2cap.c
+++ b/net/bluetooth/l2cap.c
@@ -2747,7 +2747,6 @@ static int l2cap_parse_conf_rsp(struct sock *sk, void *rsp, int len, void *data,
 							rfc.mode != pi->mode)
 				return -ECONNREFUSED;
 
-			pi->mode = rfc.mode;
 			pi->fcs = 0;
 
 			l2cap_add_conf_opt(&ptr, L2CAP_CONF_RFC,
@@ -2756,6 +2755,11 @@ static int l2cap_parse_conf_rsp(struct sock *sk, void *rsp, int len, void *data,
 		}
 	}
 
+	if (pi->mode == L2CAP_MODE_BASIC && pi->mode != rfc.mode)
+		return -ECONNREFUSED;
+
+	pi->mode = rfc.mode;
+
 	if (*result == L2CAP_CONF_SUCCESS) {
 		switch (rfc.mode) {
 		case L2CAP_MODE_ERTM:
-- 
cgit v1.2.2


From 2ba13ed678775195e8255b4e503c59d48b615bd8 Mon Sep 17 00:00:00 2001
From: "Gustavo F. Padovan" <padovan@profusion.mobi>
Date: Wed, 9 Jun 2010 16:39:05 -0300
Subject: Bluetooth: Remove check for supported mode

Since now we have checks for the supported mode before on
l2cap_info_rsp we can remove the check for it here.

Signed-off-by: Gustavo F. Padovan <padovan@profusion.mobi>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 net/bluetooth/l2cap.c | 9 ++-------
 1 file changed, 2 insertions(+), 7 deletions(-)

(limited to 'net')

diff --git a/net/bluetooth/l2cap.c b/net/bluetooth/l2cap.c
index c5904082392c..2fb45c481762 100644
--- a/net/bluetooth/l2cap.c
+++ b/net/bluetooth/l2cap.c
@@ -2473,15 +2473,10 @@ static int l2cap_build_conf_req(struct sock *sk, void *data)
 	switch (pi->mode) {
 	case L2CAP_MODE_STREAMING:
 	case L2CAP_MODE_ERTM:
-		if (!(pi->conf_state & L2CAP_CONF_STATE2_DEVICE)) {
-			pi->mode = l2cap_select_mode(rfc.mode,
-					pi->conn->feat_mask);
+		if (pi->conf_state & L2CAP_CONF_STATE2_DEVICE)
 			break;
-		}
 
-		if (!l2cap_mode_supported(pi->mode, pi->conn->feat_mask))
-			l2cap_send_disconn_req(pi->conn, sk, ECONNRESET);
-		break;
+		/* fall through */
 	default:
 		pi->mode = l2cap_select_mode(rfc.mode, pi->conn->feat_mask);
 		break;
-- 
cgit v1.2.2


From cf6c2c0b9f47ee3cd12684b905725c8376d52135 Mon Sep 17 00:00:00 2001
From: "Gustavo F. Padovan" <padovan@profusion.mobi>
Date: Mon, 7 Jun 2010 20:54:45 -0300
Subject: Bluetooth: Disconnect early if mode is not supported

When mode is mandatory we shall not send connect request and report this
to the userspace as well.

Signed-off-by: Gustavo F. Padovan <padovan@profusion.mobi>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 net/bluetooth/l2cap.c | 56 ++++++++++++++++++++++++++++++++++++---------------
 1 file changed, 40 insertions(+), 16 deletions(-)

(limited to 'net')

diff --git a/net/bluetooth/l2cap.c b/net/bluetooth/l2cap.c
index 2fb45c481762..6a33d269389e 100644
--- a/net/bluetooth/l2cap.c
+++ b/net/bluetooth/l2cap.c
@@ -456,6 +456,22 @@ static void l2cap_do_start(struct sock *sk)
 	}
 }
 
+static inline int l2cap_mode_supported(__u8 mode, __u32 feat_mask)
+{
+	u32 local_feat_mask = l2cap_feat_mask;
+	if (enable_ertm)
+		local_feat_mask |= L2CAP_FEAT_ERTM | L2CAP_FEAT_STREAMING;
+
+	switch (mode) {
+	case L2CAP_MODE_ERTM:
+		return L2CAP_FEAT_ERTM & feat_mask & local_feat_mask;
+	case L2CAP_MODE_STREAMING:
+		return L2CAP_FEAT_STREAMING & feat_mask & local_feat_mask;
+	default:
+		return 0x00;
+	}
+}
+
 static void l2cap_send_disconn_req(struct l2cap_conn *conn, struct sock *sk, int err)
 {
 	struct l2cap_disconn_req req;
@@ -484,10 +500,13 @@ static void l2cap_send_disconn_req(struct l2cap_conn *conn, struct sock *sk, int
 static void l2cap_conn_start(struct l2cap_conn *conn)
 {
 	struct l2cap_chan_list *l = &conn->chan_list;
+	struct sock_del_list del, *tmp1, *tmp2;
 	struct sock *sk;
 
 	BT_DBG("conn %p", conn);
 
+	INIT_LIST_HEAD(&del.list);
+
 	read_lock(&l->lock);
 
 	for (sk = l->head; sk; sk = l2cap_pi(sk)->next_c) {
@@ -503,6 +522,19 @@ static void l2cap_conn_start(struct l2cap_conn *conn)
 			if (l2cap_check_security(sk) &&
 					__l2cap_no_conn_pending(sk)) {
 				struct l2cap_conn_req req;
+
+				if (!l2cap_mode_supported(l2cap_pi(sk)->mode,
+						conn->feat_mask)
+						&& l2cap_pi(sk)->conf_state &
+						L2CAP_CONF_STATE2_DEVICE) {
+					tmp1 = kzalloc(sizeof(struct srej_list),
+							GFP_ATOMIC);
+					tmp1->sk = sk;
+					list_add_tail(&tmp1->list, &del.list);
+					bh_unlock_sock(sk);
+					continue;
+				}
+
 				req.scid = cpu_to_le16(l2cap_pi(sk)->scid);
 				req.psm  = l2cap_pi(sk)->psm;
 
@@ -542,6 +574,14 @@ static void l2cap_conn_start(struct l2cap_conn *conn)
 	}
 
 	read_unlock(&l->lock);
+
+	list_for_each_entry_safe(tmp1, tmp2, &del.list, list) {
+		bh_lock_sock(tmp1->sk);
+		__l2cap_sock_close(tmp1->sk, ECONNRESET);
+		bh_unlock_sock(tmp1->sk);
+		list_del(&tmp1->list);
+		kfree(tmp1);
+	}
 }
 
 static void l2cap_conn_ready(struct l2cap_conn *conn)
@@ -2429,22 +2469,6 @@ static inline void l2cap_ertm_init(struct sock *sk)
 	INIT_WORK(&l2cap_pi(sk)->busy_work, l2cap_busy_work);
 }
 
-static int l2cap_mode_supported(__u8 mode, __u32 feat_mask)
-{
-	u32 local_feat_mask = l2cap_feat_mask;
-	if (enable_ertm)
-		local_feat_mask |= L2CAP_FEAT_ERTM | L2CAP_FEAT_STREAMING;
-
-	switch (mode) {
-	case L2CAP_MODE_ERTM:
-		return L2CAP_FEAT_ERTM & feat_mask & local_feat_mask;
-	case L2CAP_MODE_STREAMING:
-		return L2CAP_FEAT_STREAMING & feat_mask & local_feat_mask;
-	default:
-		return 0x00;
-	}
-}
-
 static inline __u8 l2cap_select_mode(__u8 mode, __u16 remote_feat_mask)
 {
 	switch (mode) {
-- 
cgit v1.2.2


From 8cb8e6f1684be13b51f8429b15f39c140326b327 Mon Sep 17 00:00:00 2001
From: "Gustavo F. Padovan" <padovan@profusion.mobi>
Date: Mon, 14 Jun 2010 02:26:15 -0300
Subject: Bluetooth: Don't accept ConfigReq if we aren't in the BT_CONFIG state

If such event happens we shall reply with a Command Reject, because we are
not expecting any configure request.

Signed-off-by: Gustavo F. Padovan <padovan@profusion.mobi>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 net/bluetooth/l2cap.c | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/bluetooth/l2cap.c b/net/bluetooth/l2cap.c
index 6a33d269389e..f6e46fdddd2b 100644
--- a/net/bluetooth/l2cap.c
+++ b/net/bluetooth/l2cap.c
@@ -3052,8 +3052,14 @@ static inline int l2cap_config_req(struct l2cap_conn *conn, struct l2cap_cmd_hdr
 	if (!sk)
 		return -ENOENT;
 
-	if (sk->sk_state == BT_DISCONN)
+	if (sk->sk_state != BT_CONFIG) {
+		struct l2cap_cmd_rej rej;
+
+		rej.reason = cpu_to_le16(0x0002);
+		l2cap_send_cmd(conn, cmd->ident, L2CAP_COMMAND_REJ,
+				sizeof(rej), &rej);
 		goto unlock;
+	}
 
 	/* Reject if config buffer is too small. */
 	len = cmd_len - sizeof(*req);
-- 
cgit v1.2.2


From e0f66218b3a7d0bcf37ca95186123c257fda0ba5 Mon Sep 17 00:00:00 2001
From: "Gustavo F. Padovan" <padovan@profusion.mobi>
Date: Mon, 21 Jun 2010 18:50:49 -0300
Subject: Bluetooth: Remove the send_lock spinlock from ERTM

Using a lock to deal with the ERTM race condition - interruption with
new data from the hci layer - is wrong. We should use the native skb
backlog queue.

Signed-off-by: Gustavo F. Padovan <padovan@profusion.mobi>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 net/bluetooth/l2cap.c | 28 +---------------------------
 1 file changed, 1 insertion(+), 27 deletions(-)

(limited to 'net')

diff --git a/net/bluetooth/l2cap.c b/net/bluetooth/l2cap.c
index f6e46fdddd2b..dc8601fc2404 100644
--- a/net/bluetooth/l2cap.c
+++ b/net/bluetooth/l2cap.c
@@ -1562,16 +1562,11 @@ static int l2cap_retransmit_frames(struct sock *sk)
 	struct l2cap_pinfo *pi = l2cap_pi(sk);
 	int ret;
 
-	spin_lock_bh(&pi->send_lock);
-
 	if (!skb_queue_empty(TX_QUEUE(sk)))
 		sk->sk_send_head = TX_QUEUE(sk)->next;
 
 	pi->next_tx_seq = pi->expected_ack_seq;
 	ret = l2cap_ertm_send(sk);
-
-	spin_unlock_bh(&pi->send_lock);
-
 	return ret;
 }
 
@@ -1579,7 +1574,6 @@ static void l2cap_send_ack(struct l2cap_pinfo *pi)
 {
 	struct sock *sk = (struct sock *)pi;
 	u16 control = 0;
-	int nframes;
 
 	control |= pi->buffer_seq << L2CAP_CTRL_REQSEQ_SHIFT;
 
@@ -1590,11 +1584,7 @@ static void l2cap_send_ack(struct l2cap_pinfo *pi)
 		return;
 	}
 
-	spin_lock_bh(&pi->send_lock);
-	nframes = l2cap_ertm_send(sk);
-	spin_unlock_bh(&pi->send_lock);
-
-	if (nframes > 0)
+	if (l2cap_ertm_send(sk) > 0)
 		return;
 
 	control |= L2CAP_SUPER_RCV_READY;
@@ -1789,10 +1779,8 @@ static inline int l2cap_sar_segment_sdu(struct sock *sk, struct msghdr *msg, siz
 		size += buflen;
 	}
 	skb_queue_splice_tail(&sar_queue, TX_QUEUE(sk));
-	spin_lock_bh(&pi->send_lock);
 	if (sk->sk_send_head == NULL)
 		sk->sk_send_head = sar_queue.next;
-	spin_unlock_bh(&pi->send_lock);
 
 	return size;
 }
@@ -1864,14 +1852,9 @@ static int l2cap_sock_sendmsg(struct kiocb *iocb, struct socket *sock, struct ms
 			}
 			__skb_queue_tail(TX_QUEUE(sk), skb);
 
-			if (pi->mode == L2CAP_MODE_ERTM)
-				spin_lock_bh(&pi->send_lock);
-
 			if (sk->sk_send_head == NULL)
 				sk->sk_send_head = skb;
 
-			if (pi->mode == L2CAP_MODE_ERTM)
-				spin_unlock_bh(&pi->send_lock);
 		} else {
 		/* Segment SDU into multiples PDUs */
 			err = l2cap_sar_segment_sdu(sk, msg, len);
@@ -1887,9 +1870,7 @@ static int l2cap_sock_sendmsg(struct kiocb *iocb, struct socket *sock, struct ms
 				err = len;
 				break;
 			}
-			spin_lock_bh(&pi->send_lock);
 			err = l2cap_ertm_send(sk);
-			spin_unlock_bh(&pi->send_lock);
 		}
 
 		if (err >= 0)
@@ -2464,7 +2445,6 @@ static inline void l2cap_ertm_init(struct sock *sk)
 
 	__skb_queue_head_init(SREJ_QUEUE(sk));
 	__skb_queue_head_init(BUSY_QUEUE(sk));
-	spin_lock_init(&l2cap_pi(sk)->send_lock);
 
 	INIT_WORK(&l2cap_pi(sk)->busy_work, l2cap_busy_work);
 }
@@ -3462,9 +3442,7 @@ static inline void l2cap_send_i_or_rr_or_rnr(struct sock *sk)
 	if (pi->conn_state & L2CAP_CONN_REMOTE_BUSY)
 		l2cap_retransmit_frames(sk);
 
-	spin_lock_bh(&pi->send_lock);
 	l2cap_ertm_send(sk);
-	spin_unlock_bh(&pi->send_lock);
 
 	if (!(pi->conn_state & L2CAP_CONN_LOCAL_BUSY) &&
 			pi->frames_sent == 0) {
@@ -4066,9 +4044,7 @@ static inline void l2cap_data_channel_rrframe(struct sock *sk, u16 rx_control)
 		if (pi->conn_state & L2CAP_CONN_SREJ_SENT) {
 			l2cap_send_ack(pi);
 		} else {
-			spin_lock_bh(&pi->send_lock);
 			l2cap_ertm_send(sk);
-			spin_unlock_bh(&pi->send_lock);
 		}
 	}
 }
@@ -4113,9 +4089,7 @@ static inline void l2cap_data_channel_srejframe(struct sock *sk, u16 rx_control)
 		pi->conn_state |= L2CAP_CONN_SEND_FBIT;
 		l2cap_retransmit_one_frame(sk, tx_seq);
 
-		spin_lock_bh(&pi->send_lock);
 		l2cap_ertm_send(sk);
-		spin_unlock_bh(&pi->send_lock);
 
 		if (pi->conn_state & L2CAP_CONN_WAIT_F) {
 			pi->srej_save_reqseq = tx_seq;
-- 
cgit v1.2.2


From 218bb9dfd21472128f86b38ad2eab123205c2991 Mon Sep 17 00:00:00 2001
From: "Gustavo F. Padovan" <padovan@profusion.mobi>
Date: Mon, 21 Jun 2010 18:53:22 -0300
Subject: Bluetooth: Add backlog queue to ERTM code

backlog queue is the canonical mechanism to avoid race conditions due
interrupts in bottom half context. After the socket lock is released the
net core take care of push all skb in its backlog queue.

Signed-off-by: Gustavo F. Padovan <padovan@profusion.mobi>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 net/bluetooth/l2cap.c | 132 ++++++++++++++++++++++++++++++--------------------
 1 file changed, 79 insertions(+), 53 deletions(-)

(limited to 'net')

diff --git a/net/bluetooth/l2cap.c b/net/bluetooth/l2cap.c
index dc8601fc2404..cf4481f7f566 100644
--- a/net/bluetooth/l2cap.c
+++ b/net/bluetooth/l2cap.c
@@ -77,6 +77,8 @@ static void l2cap_sock_kill(struct sock *sk);
 static struct sk_buff *l2cap_build_cmd(struct l2cap_conn *conn,
 				u8 code, u8 ident, u16 dlen, void *data);
 
+static int l2cap_ertm_data_rcv(struct sock *sk, struct sk_buff *skb);
+
 /* ---- L2CAP timers ---- */
 static void l2cap_sock_timeout(unsigned long arg)
 {
@@ -2447,6 +2449,8 @@ static inline void l2cap_ertm_init(struct sock *sk)
 	__skb_queue_head_init(BUSY_QUEUE(sk));
 
 	INIT_WORK(&l2cap_pi(sk)->busy_work, l2cap_busy_work);
+
+	sk->sk_backlog_rcv = l2cap_ertm_data_rcv;
 }
 
 static inline __u8 l2cap_select_mode(__u8 mode, __u16 remote_feat_mask)
@@ -4171,13 +4175,83 @@ static inline int l2cap_data_channel_sframe(struct sock *sk, u16 rx_control, str
 	return 0;
 }
 
+static int l2cap_ertm_data_rcv(struct sock *sk, struct sk_buff *skb)
+{
+	struct l2cap_pinfo *pi = l2cap_pi(sk);
+	u16 control;
+	u8 req_seq;
+	int len, next_tx_seq_offset, req_seq_offset;
+
+	control = get_unaligned_le16(skb->data);
+	skb_pull(skb, 2);
+	len = skb->len;
+
+	/*
+	 * We can just drop the corrupted I-frame here.
+	 * Receiver will miss it and start proper recovery
+	 * procedures and ask retransmission.
+	 */
+	if (l2cap_check_fcs(pi, skb))
+		goto drop;
+
+	if (__is_sar_start(control) && __is_iframe(control))
+		len -= 2;
+
+	if (pi->fcs == L2CAP_FCS_CRC16)
+		len -= 2;
+
+	if (len > pi->mps) {
+		l2cap_send_disconn_req(pi->conn, sk, ECONNRESET);
+		goto drop;
+	}
+
+	req_seq = __get_reqseq(control);
+	req_seq_offset = (req_seq - pi->expected_ack_seq) % 64;
+	if (req_seq_offset < 0)
+		req_seq_offset += 64;
+
+	next_tx_seq_offset =
+		(pi->next_tx_seq - pi->expected_ack_seq) % 64;
+	if (next_tx_seq_offset < 0)
+		next_tx_seq_offset += 64;
+
+	/* check for invalid req-seq */
+	if (req_seq_offset > next_tx_seq_offset) {
+		l2cap_send_disconn_req(pi->conn, sk, ECONNRESET);
+		goto drop;
+	}
+
+	if (__is_iframe(control)) {
+		if (len < 0) {
+			l2cap_send_disconn_req(pi->conn, sk, ECONNRESET);
+			goto drop;
+		}
+
+		l2cap_data_channel_iframe(sk, control, skb);
+	} else {
+		if (len != 0) {
+			BT_ERR("%d", len);
+			l2cap_send_disconn_req(pi->conn, sk, ECONNRESET);
+			goto drop;
+		}
+
+		l2cap_data_channel_sframe(sk, control, skb);
+	}
+
+	return 0;
+
+drop:
+	kfree_skb(skb);
+	return 0;
+}
+
 static inline int l2cap_data_channel(struct l2cap_conn *conn, u16 cid, struct sk_buff *skb)
 {
 	struct sock *sk;
 	struct l2cap_pinfo *pi;
 	u16 control;
-	u8 tx_seq, req_seq;
-	int len, next_tx_seq_offset, req_seq_offset;
+	u8 tx_seq;
+	int len;
 
 	sk = l2cap_get_chan_by_scid(&conn->chan_list, cid);
 	if (!sk) {
@@ -4207,59 +4281,11 @@ static inline int l2cap_data_channel(struct l2cap_conn *conn, u16 cid, struct sk
 		break;
 
 	case L2CAP_MODE_ERTM:
-		control = get_unaligned_le16(skb->data);
-		skb_pull(skb, 2);
-		len = skb->len;
-
-		/*
-		 * We can just drop the corrupted I-frame here.
-		 * Receiver will miss it and start proper recovery
-		 * procedures and ask retransmission.
-		 */
-		if (l2cap_check_fcs(pi, skb))
-			goto drop;
-
-		if (__is_sar_start(control) && __is_iframe(control))
-			len -= 2;
-
-		if (pi->fcs == L2CAP_FCS_CRC16)
-			len -= 2;
-
-		if (len > pi->mps) {
-			l2cap_send_disconn_req(pi->conn, sk, ECONNRESET);
-			goto drop;
-		}
-
-		req_seq = __get_reqseq(control);
-		req_seq_offset = (req_seq - pi->expected_ack_seq) % 64;
-		if (req_seq_offset < 0)
-			req_seq_offset += 64;
-
-		next_tx_seq_offset =
-			(pi->next_tx_seq - pi->expected_ack_seq) % 64;
-		if (next_tx_seq_offset < 0)
-			next_tx_seq_offset += 64;
-
-		/* check for invalid req-seq */
-		if (req_seq_offset > next_tx_seq_offset) {
-			l2cap_send_disconn_req(pi->conn, sk, ECONNRESET);
-			goto drop;
-		}
-
-		if (__is_iframe(control)) {
-			if (len < 0) {
-				l2cap_send_disconn_req(pi->conn, sk, ECONNRESET);
-				goto drop;
-			}
-
-			l2cap_data_channel_iframe(sk, control, skb);
+		if (!sock_owned_by_user(sk)) {
+			l2cap_ertm_data_rcv(sk, skb);
 		} else {
-			if (len != 0) {
-				l2cap_send_disconn_req(pi->conn, sk, ECONNRESET);
+			if (sk_add_backlog(sk, skb))
 				goto drop;
-			}
-
-			l2cap_data_channel_sframe(sk, control, skb);
 		}
 
 		goto done;
-- 
cgit v1.2.2


From 712132eb541e4a76afad97898dc0ce6b6c0032d8 Mon Sep 17 00:00:00 2001
From: "Gustavo F. Padovan" <padovan@profusion.mobi>
Date: Mon, 21 Jun 2010 19:39:50 -0300
Subject: Bluetooth: Improve ERTM local busy handling

Now we also check if can push skb userspace just after receive a new
skb instead of only wait the l2cap_busy_work wake up from time to time
to check the local busy condition.

Signed-off-by: Gustavo F. Padovan <padovan@profusion.mobi>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 net/bluetooth/l2cap.c | 83 +++++++++++++++++++++++++++++----------------------
 1 file changed, 47 insertions(+), 36 deletions(-)

(limited to 'net')

diff --git a/net/bluetooth/l2cap.c b/net/bluetooth/l2cap.c
index cf4481f7f566..6b839d682143 100644
--- a/net/bluetooth/l2cap.c
+++ b/net/bluetooth/l2cap.c
@@ -3606,6 +3606,46 @@ disconnect:
 	return 0;
 }
 
+static int l2cap_try_push_rx_skb(struct sock *sk)
+{
+	struct l2cap_pinfo *pi = l2cap_pi(sk);
+	struct sk_buff *skb;
+	u16 control;
+	int err;
+
+	while ((skb = skb_dequeue(BUSY_QUEUE(sk)))) {
+		control = bt_cb(skb)->sar << L2CAP_CTRL_SAR_SHIFT;
+		err = l2cap_ertm_reassembly_sdu(sk, skb, control);
+		if (err < 0) {
+			skb_queue_head(BUSY_QUEUE(sk), skb);
+			return -EBUSY;
+		}
+
+		pi->buffer_seq = (pi->buffer_seq + 1) % 64;
+	}
+
+	if (!(pi->conn_state & L2CAP_CONN_RNR_SENT))
+		goto done;
+
+	control = pi->buffer_seq << L2CAP_CTRL_REQSEQ_SHIFT;
+	control |= L2CAP_SUPER_RCV_READY | L2CAP_CTRL_POLL;
+	l2cap_send_sframe(pi, control);
+	l2cap_pi(sk)->retry_count = 1;
+
+	del_timer(&pi->retrans_timer);
+	__mod_monitor_timer();
+
+	l2cap_pi(sk)->conn_state |= L2CAP_CONN_WAIT_F;
+
+done:
+	pi->conn_state &= ~L2CAP_CONN_LOCAL_BUSY;
+	pi->conn_state &= ~L2CAP_CONN_RNR_SENT;
+
+	BT_DBG("sk %p, Exit local busy", sk);
+
+	return 0;
+}
+
 static void l2cap_busy_work(struct work_struct *work)
 {
 	DECLARE_WAITQUEUE(wait, current);
@@ -3614,7 +3654,6 @@ static void l2cap_busy_work(struct work_struct *work)
 	struct sock *sk = (struct sock *)pi;
 	int n_tries = 0, timeo = HZ/5, err;
 	struct sk_buff *skb;
-	u16 control;
 
 	lock_sock(sk);
 
@@ -3625,7 +3664,7 @@ static void l2cap_busy_work(struct work_struct *work)
 		if (n_tries++ > L2CAP_LOCAL_BUSY_TRIES) {
 			err = -EBUSY;
 			l2cap_send_disconn_req(pi->conn, sk, EBUSY);
-			goto done;
+			break;
 		}
 
 		if (!timeo)
@@ -3633,7 +3672,7 @@ static void l2cap_busy_work(struct work_struct *work)
 
 		if (signal_pending(current)) {
 			err = sock_intr_errno(timeo);
-			goto done;
+			break;
 		}
 
 		release_sock(sk);
@@ -3642,42 +3681,12 @@ static void l2cap_busy_work(struct work_struct *work)
 
 		err = sock_error(sk);
 		if (err)
-			goto done;
-
-		while ((skb = skb_dequeue(BUSY_QUEUE(sk)))) {
-			control = bt_cb(skb)->sar << L2CAP_CTRL_SAR_SHIFT;
-			err = l2cap_ertm_reassembly_sdu(sk, skb, control);
-			if (err < 0) {
-				skb_queue_head(BUSY_QUEUE(sk), skb);
-				break;
-			}
-
-			pi->buffer_seq = (pi->buffer_seq + 1) % 64;
-		}
+			break;
 
-		if (!skb)
+		if (l2cap_try_push_rx_skb(sk) == 0)
 			break;
 	}
 
-	if (!(pi->conn_state & L2CAP_CONN_RNR_SENT))
-		goto done;
-
-	control = pi->buffer_seq << L2CAP_CTRL_REQSEQ_SHIFT;
-	control |= L2CAP_SUPER_RCV_READY | L2CAP_CTRL_POLL;
-	l2cap_send_sframe(pi, control);
-	l2cap_pi(sk)->retry_count = 1;
-
-	del_timer(&pi->retrans_timer);
-	__mod_monitor_timer();
-
-	l2cap_pi(sk)->conn_state |= L2CAP_CONN_WAIT_F;
-
-done:
-	pi->conn_state &= ~L2CAP_CONN_LOCAL_BUSY;
-	pi->conn_state &= ~L2CAP_CONN_RNR_SENT;
-
-	BT_DBG("sk %p, Exit local busy", sk);
-
 	set_current_state(TASK_RUNNING);
 	remove_wait_queue(sk_sleep(sk), &wait);
 
@@ -3692,7 +3701,9 @@ static int l2cap_push_rx_skb(struct sock *sk, struct sk_buff *skb, u16 control)
 	if (pi->conn_state & L2CAP_CONN_LOCAL_BUSY) {
 		bt_cb(skb)->sar = control >> L2CAP_CTRL_SAR_SHIFT;
 		__skb_queue_tail(BUSY_QUEUE(sk), skb);
-		return -EBUSY;
+		return l2cap_try_push_rx_skb(sk);
+
+
 	}
 
 	err = l2cap_ertm_reassembly_sdu(sk, skb, control);
-- 
cgit v1.2.2


From 8b0dc6dc827fb71efd6c9dfb5c4172b7b5c96cee Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jo=C3=A3o=20Paulo=20Rechi=20Vita?= <jprvita@profusion.mobi>
Date: Tue, 22 Jun 2010 13:56:22 -0300
Subject: Bluetooth: Fix l2cap_sock_connect error return.
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Return a proper error value if socket is already connected.

Signed-off-by: João Paulo Rechi Vita <jprvita@profusion.mobi>
Acked-by: Gustavo F. Padovan <padovan@profusion.mobi>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 net/bluetooth/l2cap.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'net')

diff --git a/net/bluetooth/l2cap.c b/net/bluetooth/l2cap.c
index 6b839d682143..e322be8ff948 100644
--- a/net/bluetooth/l2cap.c
+++ b/net/bluetooth/l2cap.c
@@ -1158,6 +1158,7 @@ static int l2cap_sock_connect(struct socket *sock, struct sockaddr *addr, int al
 
 	case BT_CONNECTED:
 		/* Already connected */
+		err = -EISCONN;
 		goto done;
 
 	case BT_OPEN:
-- 
cgit v1.2.2


From 305682e8377b0f560d4b885c169a72e6a62331e9 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jo=C3=A3o=20Paulo=20Rechi=20Vita?= <jprvita@profusion.mobi>
Date: Tue, 22 Jun 2010 13:56:23 -0300
Subject: Bluetooth: Make l2cap_streaming_send() void.
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

It doesn't make sense to have a return value since we always set it
to 0.

Signed-off-by: João Paulo Rechi Vita <jprvita@profusion.mobi>
Acked-by: Gustavo F. Padovan <padovan@profusion.mobi>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 net/bluetooth/l2cap.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

(limited to 'net')

diff --git a/net/bluetooth/l2cap.c b/net/bluetooth/l2cap.c
index e322be8ff948..33e134b0d402 100644
--- a/net/bluetooth/l2cap.c
+++ b/net/bluetooth/l2cap.c
@@ -1420,7 +1420,7 @@ static inline void l2cap_do_send(struct sock *sk, struct sk_buff *skb)
 	hci_send_acl(pi->conn->hcon, skb, 0);
 }
 
-static int l2cap_streaming_send(struct sock *sk)
+static void l2cap_streaming_send(struct sock *sk)
 {
 	struct sk_buff *skb, *tx_skb;
 	struct l2cap_pinfo *pi = l2cap_pi(sk);
@@ -1450,7 +1450,6 @@ static int l2cap_streaming_send(struct sock *sk)
 		skb = skb_dequeue(TX_QUEUE(sk));
 		kfree_skb(skb);
 	}
-	return 0;
 }
 
 static void l2cap_retransmit_one_frame(struct sock *sk, u8 tx_seq)
@@ -1866,7 +1865,7 @@ static int l2cap_sock_sendmsg(struct kiocb *iocb, struct socket *sock, struct ms
 		}
 
 		if (pi->mode == L2CAP_MODE_STREAMING) {
-			err = l2cap_streaming_send(sk);
+			l2cap_streaming_send(sk);
 		} else {
 			if (pi->conn_state & L2CAP_CONN_REMOTE_BUSY &&
 					pi->conn_state && L2CAP_CONN_WAIT_F) {
-- 
cgit v1.2.2


From f9dd11b03c5c3cd6bdf2e503400bbc922c898974 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jo=C3=A3o=20Paulo=20Rechi=20Vita?= <jprvita@profusion.mobi>
Date: Tue, 22 Jun 2010 13:56:24 -0300
Subject: Bluetooth: Fix error return value on sendmsg.
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

When we try to send a message bigger than the outgoing MTU value
EMSGSIZE (message too long) should be returned.

Signed-off-by: João Paulo Rechi Vita <jprvita@profusion.mobi>
Acked-by: Gustavo F. Padovan <padovan@profusion.mobi>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 net/bluetooth/l2cap.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/bluetooth/l2cap.c b/net/bluetooth/l2cap.c
index 33e134b0d402..884b840081ae 100644
--- a/net/bluetooth/l2cap.c
+++ b/net/bluetooth/l2cap.c
@@ -1827,7 +1827,7 @@ static int l2cap_sock_sendmsg(struct kiocb *iocb, struct socket *sock, struct ms
 	case L2CAP_MODE_BASIC:
 		/* Check outgoing MTU */
 		if (len > pi->omtu) {
-			err = -EINVAL;
+			err = -EMSGSIZE;
 			goto done;
 		}
 
-- 
cgit v1.2.2


From bc766db2ef3700ae74bdfc88d74b771b97971a24 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jo=C3=A3o=20Paulo=20Rechi=20Vita?= <jprvita@profusion.mobi>
Date: Tue, 22 Jun 2010 13:56:25 -0300
Subject: Bluetooth: Fix error return value on sendmsg.
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

When the socket is in a bad state EBADFD is more appropriate then EINVAL.

Signed-off-by: João Paulo Rechi Vita <jprvita@profusion.mobi>
Acked-by: Gustavo F. Padovan <padovan@profusion.mobi>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 net/bluetooth/l2cap.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/bluetooth/l2cap.c b/net/bluetooth/l2cap.c
index 884b840081ae..ac25952538fd 100644
--- a/net/bluetooth/l2cap.c
+++ b/net/bluetooth/l2cap.c
@@ -1881,7 +1881,7 @@ static int l2cap_sock_sendmsg(struct kiocb *iocb, struct socket *sock, struct ms
 
 	default:
 		BT_DBG("bad state %1.1x", pi->mode);
-		err = -EINVAL;
+		err = -EBADFD;
 	}
 
 done:
-- 
cgit v1.2.2


From 57d3b22bf56579bb1ab2d6f5020d372c99a7afae Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jo=C3=A3o=20Paulo=20Rechi=20Vita?= <jprvita@profusion.mobi>
Date: Tue, 22 Jun 2010 13:56:26 -0300
Subject: Bluetooth: Fix error return for l2cap_connect_rsp().
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: João Paulo Rechi Vita <jprvita@profusion.mobi>
Acked-by: Gustavo F. Padovan <padovan@profusion.mobi>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 net/bluetooth/l2cap.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/bluetooth/l2cap.c b/net/bluetooth/l2cap.c
index ac25952538fd..58c81cbb4040 100644
--- a/net/bluetooth/l2cap.c
+++ b/net/bluetooth/l2cap.c
@@ -2986,11 +2986,11 @@ static inline int l2cap_connect_rsp(struct l2cap_conn *conn, struct l2cap_cmd_hd
 	if (scid) {
 		sk = l2cap_get_chan_by_scid(&conn->chan_list, scid);
 		if (!sk)
-			return 0;
+			return -EFAULT;
 	} else {
 		sk = l2cap_get_chan_by_ident(&conn->chan_list, cmd->ident);
 		if (!sk)
-			return 0;
+			return -EFAULT;
 	}
 
 	switch (result) {
-- 
cgit v1.2.2


From 7a560e5c99dc5f03e2c0dbe05ed20008af5d0bcf Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jo=C3=A3o=20Paulo=20Rechi=20Vita?= <jprvita@profusion.mobi>
Date: Tue, 22 Jun 2010 13:56:27 -0300
Subject: Bluetooth: Fix error value for wrong FCS.
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: João Paulo Rechi Vita <jprvita@profusion.mobi>
Acked-by: Gustavo F. Padovan <padovan@profusion.mobi>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 net/bluetooth/l2cap.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/bluetooth/l2cap.c b/net/bluetooth/l2cap.c
index 58c81cbb4040..de545f1687d8 100644
--- a/net/bluetooth/l2cap.c
+++ b/net/bluetooth/l2cap.c
@@ -3423,7 +3423,7 @@ static int l2cap_check_fcs(struct l2cap_pinfo *pi,  struct sk_buff *skb)
 		our_fcs = crc16(0, skb->data - hdr_size, skb->len + hdr_size);
 
 		if (our_fcs != rcv_fcs)
-			return -EINVAL;
+			return -EBADMSG;
 	}
 	return 0;
 }
-- 
cgit v1.2.2


From 963cf687e825f7a59817f145a1ea19bdc224a18f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jo=C3=A3o=20Paulo=20Rechi=20Vita?= <jprvita@profusion.mobi>
Date: Tue, 22 Jun 2010 13:56:28 -0300
Subject: Bluetooth: Fix error return on L2CAP-HCI interface.
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

L2CAP only deals with ACL links. EINVAL should be returned otherwise.

Signed-off-by: João Paulo Rechi Vita <jprvita@profusion.mobi>
Acked-by: Gustavo F. Padovan <padovan@profusion.mobi>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 net/bluetooth/l2cap.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'net')

diff --git a/net/bluetooth/l2cap.c b/net/bluetooth/l2cap.c
index de545f1687d8..a3dfee97ab9d 100644
--- a/net/bluetooth/l2cap.c
+++ b/net/bluetooth/l2cap.c
@@ -4415,7 +4415,7 @@ static int l2cap_connect_ind(struct hci_dev *hdev, bdaddr_t *bdaddr, u8 type)
 	struct hlist_node *node;
 
 	if (type != ACL_LINK)
-		return 0;
+		return -EINVAL;
 
 	BT_DBG("hdev %s, bdaddr %s", hdev->name, batostr(bdaddr));
 
@@ -4448,7 +4448,7 @@ static int l2cap_connect_cfm(struct hci_conn *hcon, u8 status)
 	BT_DBG("hcon %p bdaddr %s status %d", hcon, batostr(&hcon->dst), status);
 
 	if (hcon->type != ACL_LINK)
-		return 0;
+		return -EINVAL;
 
 	if (!status) {
 		conn = l2cap_conn_add(hcon, status);
@@ -4477,7 +4477,7 @@ static int l2cap_disconn_cfm(struct hci_conn *hcon, u8 reason)
 	BT_DBG("hcon %p reason %d", hcon, reason);
 
 	if (hcon->type != ACL_LINK)
-		return 0;
+		return -EINVAL;
 
 	l2cap_conn_del(hcon, bt_err(reason));
 
-- 
cgit v1.2.2


From e9aeb2ddd441f0c8699ff04c499d7213730a0f04 Mon Sep 17 00:00:00 2001
From: "Gustavo F. Padovan" <padovan@profusion.mobi>
Date: Thu, 8 Jul 2010 20:08:18 -0300
Subject: Bluetooth: Send ConfigReq after send a ConnectionRsp

The extended L2CAP features requires that one should initiate a
ConfigReq after send the ConnectionRsp. This patch changes the behaviour
of the configuration process of our stack.

Signed-off-by: Gustavo F. Padovan <padovan@profusion.mobi>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 net/bluetooth/l2cap.c | 44 +++++++++++++++++++++++++++++++++++++++++---
 1 file changed, 41 insertions(+), 3 deletions(-)

(limited to 'net')

diff --git a/net/bluetooth/l2cap.c b/net/bluetooth/l2cap.c
index a3dfee97ab9d..e366be0792cb 100644
--- a/net/bluetooth/l2cap.c
+++ b/net/bluetooth/l2cap.c
@@ -74,6 +74,7 @@ static void __l2cap_sock_close(struct sock *sk, int reason);
 static void l2cap_sock_close(struct sock *sk);
 static void l2cap_sock_kill(struct sock *sk);
 
+static int l2cap_build_conf_req(struct sock *sk, void *data);
 static struct sk_buff *l2cap_build_cmd(struct l2cap_conn *conn,
 				u8 code, u8 ident, u16 dlen, void *data);
 
@@ -548,6 +549,7 @@ static void l2cap_conn_start(struct l2cap_conn *conn)
 			}
 		} else if (sk->sk_state == BT_CONNECT2) {
 			struct l2cap_conn_rsp rsp;
+			char buf[128];
 			rsp.scid = cpu_to_le16(l2cap_pi(sk)->dcid);
 			rsp.dcid = cpu_to_le16(l2cap_pi(sk)->scid);
 
@@ -570,6 +572,17 @@ static void l2cap_conn_start(struct l2cap_conn *conn)
 
 			l2cap_send_cmd(conn, l2cap_pi(sk)->ident,
 					L2CAP_CONN_RSP, sizeof(rsp), &rsp);
+
+			if (l2cap_pi(sk)->conf_state & L2CAP_CONF_REQ_SENT ||
+					rsp.result != L2CAP_CR_SUCCESS) {
+				bh_unlock_sock(sk);
+				continue;
+			}
+
+			l2cap_pi(sk)->conf_state |= L2CAP_CONF_REQ_SENT;
+			l2cap_send_cmd(conn, l2cap_get_ident(conn), L2CAP_CONF_REQ,
+						l2cap_build_conf_req(sk, buf), buf);
+			l2cap_pi(sk)->num_conf_req++;
 		}
 
 		bh_unlock_sock(sk);
@@ -1897,6 +1910,8 @@ static int l2cap_sock_recvmsg(struct kiocb *iocb, struct socket *sock, struct ms
 
 	if (sk->sk_state == BT_CONNECT2 && bt_sk(sk)->defer_setup) {
 		struct l2cap_conn_rsp rsp;
+		struct l2cap_conn *conn = l2cap_pi(sk)->conn;
+		u8 buf[128];
 
 		sk->sk_state = BT_CONFIG;
 
@@ -1907,6 +1922,16 @@ static int l2cap_sock_recvmsg(struct kiocb *iocb, struct socket *sock, struct ms
 		l2cap_send_cmd(l2cap_pi(sk)->conn, l2cap_pi(sk)->ident,
 					L2CAP_CONN_RSP, sizeof(rsp), &rsp);
 
+		if (l2cap_pi(sk)->conf_state & L2CAP_CONF_REQ_SENT) {
+			release_sock(sk);
+			return 0;
+		}
+
+		l2cap_pi(sk)->conf_state |= L2CAP_CONF_REQ_SENT;
+		l2cap_send_cmd(conn, l2cap_get_ident(conn), L2CAP_CONF_REQ,
+				l2cap_build_conf_req(sk, buf), buf);
+		l2cap_pi(sk)->num_conf_req++;
+
 		release_sock(sk);
 		return 0;
 	}
@@ -2613,7 +2638,7 @@ static int l2cap_parse_conf_req(struct sock *sk, void *data)
 		}
 	}
 
-	if (pi->num_conf_rsp || pi->num_conf_req)
+	if (pi->num_conf_rsp || pi->num_conf_req > 1)
 		goto done;
 
 	switch (pi->mode) {
@@ -2857,7 +2882,7 @@ static inline int l2cap_connect_req(struct l2cap_conn *conn, struct l2cap_cmd_hd
 	struct l2cap_chan_list *list = &conn->chan_list;
 	struct l2cap_conn_req *req = (struct l2cap_conn_req *) data;
 	struct l2cap_conn_rsp rsp;
-	struct sock *sk, *parent;
+	struct sock *parent, *uninitialized_var(sk);
 	int result, status = L2CAP_CS_NO_INFO;
 
 	u16 dcid = 0, scid = __le16_to_cpu(req->scid);
@@ -2966,6 +2991,15 @@ sendresp:
 					L2CAP_INFO_REQ, sizeof(info), &info);
 	}
 
+	if (!(l2cap_pi(sk)->conf_state & L2CAP_CONF_REQ_SENT) &&
+				result == L2CAP_CR_SUCCESS) {
+		u8 buf[128];
+		l2cap_pi(sk)->conf_state |= L2CAP_CONF_REQ_SENT;
+		l2cap_send_cmd(conn, l2cap_get_ident(conn), L2CAP_CONF_REQ,
+					l2cap_build_conf_req(sk, buf), buf);
+		l2cap_pi(sk)->num_conf_req++;
+	}
+
 	return 0;
 }
 
@@ -2998,9 +3032,13 @@ static inline int l2cap_connect_rsp(struct l2cap_conn *conn, struct l2cap_cmd_hd
 		sk->sk_state = BT_CONFIG;
 		l2cap_pi(sk)->ident = 0;
 		l2cap_pi(sk)->dcid = dcid;
-		l2cap_pi(sk)->conf_state |= L2CAP_CONF_REQ_SENT;
 		l2cap_pi(sk)->conf_state &= ~L2CAP_CONF_CONNECT_PEND;
 
+		if (l2cap_pi(sk)->conf_state & L2CAP_CONF_REQ_SENT)
+			break;
+
+		l2cap_pi(sk)->conf_state |= L2CAP_CONF_REQ_SENT;
+
 		l2cap_send_cmd(conn, l2cap_get_ident(conn), L2CAP_CONF_REQ,
 					l2cap_build_conf_req(sk, req), req);
 		l2cap_pi(sk)->num_conf_req++;
-- 
cgit v1.2.2


From 89746b856c88af9e5019e84615d88e002fb54dc3 Mon Sep 17 00:00:00 2001
From: "Gustavo F. Padovan" <padovan@profusion.mobi>
Date: Fri, 9 Jul 2010 16:38:34 -0300
Subject: Bluetooth: Fix bug in kzalloc allocation size

Probably a typo error. We were using the wrong struct to get size.

Signed-off-by: Gustavo F. Padovan <padovan@profusion.mobi>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 net/bluetooth/l2cap.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/bluetooth/l2cap.c b/net/bluetooth/l2cap.c
index e366be0792cb..419e2c3306aa 100644
--- a/net/bluetooth/l2cap.c
+++ b/net/bluetooth/l2cap.c
@@ -530,7 +530,7 @@ static void l2cap_conn_start(struct l2cap_conn *conn)
 						conn->feat_mask)
 						&& l2cap_pi(sk)->conf_state &
 						L2CAP_CONF_STATE2_DEVICE) {
-					tmp1 = kzalloc(sizeof(struct srej_list),
+					tmp1 = kzalloc(sizeof(struct sock_del_list),
 							GFP_ATOMIC);
 					tmp1->sk = sk;
 					list_add_tail(&tmp1->list, &del.list);
-- 
cgit v1.2.2


From 47731de789749c9ed3c54751db28fd9c9eeaf019 Mon Sep 17 00:00:00 2001
From: "Gustavo F. Padovan" <padovan@profusion.mobi>
Date: Fri, 9 Jul 2010 16:38:35 -0300
Subject: Bluetooth: Keep code under column 80

Purely a cosmetic change, it doesn't change the code flow.

Signed-off-by: Gustavo F. Padovan <padovan@profusion.mobi>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 net/bluetooth/l2cap.c | 46 +++++++++++++++++++++++++---------------------
 1 file changed, 25 insertions(+), 21 deletions(-)

(limited to 'net')

diff --git a/net/bluetooth/l2cap.c b/net/bluetooth/l2cap.c
index 419e2c3306aa..449cbdd4ddbb 100644
--- a/net/bluetooth/l2cap.c
+++ b/net/bluetooth/l2cap.c
@@ -522,31 +522,35 @@ static void l2cap_conn_start(struct l2cap_conn *conn)
 		}
 
 		if (sk->sk_state == BT_CONNECT) {
-			if (l2cap_check_security(sk) &&
-					__l2cap_no_conn_pending(sk)) {
-				struct l2cap_conn_req req;
+			struct l2cap_conn_req req;
 
-				if (!l2cap_mode_supported(l2cap_pi(sk)->mode,
-						conn->feat_mask)
-						&& l2cap_pi(sk)->conf_state &
-						L2CAP_CONF_STATE2_DEVICE) {
-					tmp1 = kzalloc(sizeof(struct sock_del_list),
-							GFP_ATOMIC);
-					tmp1->sk = sk;
-					list_add_tail(&tmp1->list, &del.list);
-					bh_unlock_sock(sk);
-					continue;
-				}
+			if (!l2cap_check_security(sk) ||
+					!__l2cap_no_conn_pending(sk)) {
+				bh_unlock_sock(sk);
+				continue;
+			}
 
-				req.scid = cpu_to_le16(l2cap_pi(sk)->scid);
-				req.psm  = l2cap_pi(sk)->psm;
+			if (!l2cap_mode_supported(l2cap_pi(sk)->mode,
+					conn->feat_mask)
+					&& l2cap_pi(sk)->conf_state &
+					L2CAP_CONF_STATE2_DEVICE) {
+				tmp1 = kzalloc(sizeof(struct sock_del_list),
+						GFP_ATOMIC);
+				tmp1->sk = sk;
+				list_add_tail(&tmp1->list, &del.list);
+				bh_unlock_sock(sk);
+				continue;
+			}
 
-				l2cap_pi(sk)->ident = l2cap_get_ident(conn);
-				l2cap_pi(sk)->conf_state |= L2CAP_CONF_CONNECT_PEND;
+			req.scid = cpu_to_le16(l2cap_pi(sk)->scid);
+			req.psm  = l2cap_pi(sk)->psm;
+
+			l2cap_pi(sk)->ident = l2cap_get_ident(conn);
+			l2cap_pi(sk)->conf_state |= L2CAP_CONF_CONNECT_PEND;
+
+			l2cap_send_cmd(conn, l2cap_pi(sk)->ident,
+				L2CAP_CONN_REQ, sizeof(req), &req);
 
-				l2cap_send_cmd(conn, l2cap_pi(sk)->ident,
-					L2CAP_CONN_REQ, sizeof(req), &req);
-			}
 		} else if (sk->sk_state == BT_CONNECT2) {
 			struct l2cap_conn_rsp rsp;
 			char buf[128];
-- 
cgit v1.2.2


From ce5706bd69be6b25715ed6cd48a210b5080032bc Mon Sep 17 00:00:00 2001
From: "Gustavo F. Padovan" <padovan@profusion.mobi>
Date: Tue, 13 Jul 2010 11:57:11 -0300
Subject: Bluetooth: Add Copyright notice to L2CAP

Copyright for the time I worked on L2CAP during the Google Summer of Code
program.

Signed-off-by: Gustavo F. Padovan <padovan@profusion.mobi>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 net/bluetooth/l2cap.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'net')

diff --git a/net/bluetooth/l2cap.c b/net/bluetooth/l2cap.c
index 449cbdd4ddbb..67a6f59873aa 100644
--- a/net/bluetooth/l2cap.c
+++ b/net/bluetooth/l2cap.c
@@ -1,6 +1,7 @@
 /*
    BlueZ - Bluetooth protocol stack for Linux
    Copyright (C) 2000-2001 Qualcomm Incorporated
+   Copyright (C) 2009-2010 Gustavo F. Padovan <gustavo@padovan.org>
 
    Written 2000,2001 by Maxim Krasnyansky <maxk@qualcomm.com>
 
-- 
cgit v1.2.2


From dd135240e8e10295f7e7cdf347800df6e1841437 Mon Sep 17 00:00:00 2001
From: "Gustavo F. Padovan" <padovan@profusion.mobi>
Date: Tue, 13 Jul 2010 11:57:12 -0300
Subject: Bluetooth: Update L2CAP version information

We did some changes on the L2CAP configuration process and its behaviour
is bit different now. That justifies a updated on the L2CAP version.

Signed-off-by: Gustavo F. Padovan <padovan@profusion.mobi>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 net/bluetooth/l2cap.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/bluetooth/l2cap.c b/net/bluetooth/l2cap.c
index 67a6f59873aa..cdd608d72741 100644
--- a/net/bluetooth/l2cap.c
+++ b/net/bluetooth/l2cap.c
@@ -54,7 +54,7 @@
 #include <net/bluetooth/hci_core.h>
 #include <net/bluetooth/l2cap.h>
 
-#define VERSION "2.14"
+#define VERSION "2.15"
 
 static int enable_ertm = 0;
 
-- 
cgit v1.2.2


From cd4c53919ed50b0d532f106aeb76e79077bece98 Mon Sep 17 00:00:00 2001
From: Suraj Sumangala <suraj@atheros.com>
Date: Wed, 14 Jul 2010 13:02:16 +0530
Subject: Bluetooth: Add one more buffer for HCI stream reassembly

Additional reassembly buffer to keep track of stream reasembly

Signed-off-by: Suraj Sumangala <suraj@atheros.com>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 net/bluetooth/hci_core.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'net')

diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c
index aeb2982310a0..0ded790bfb67 100644
--- a/net/bluetooth/hci_core.c
+++ b/net/bluetooth/hci_core.c
@@ -914,7 +914,7 @@ int hci_register_dev(struct hci_dev *hdev)
 	skb_queue_head_init(&hdev->cmd_q);
 	skb_queue_head_init(&hdev->raw_q);
 
-	for (i = 0; i < 3; i++)
+	for (i = 0; i < NUM_REASSEMBLY; i++)
 		hdev->reassembly[i] = NULL;
 
 	init_waitqueue_head(&hdev->req_wait_q);
@@ -973,7 +973,7 @@ int hci_unregister_dev(struct hci_dev *hdev)
 
 	hci_dev_do_close(hdev);
 
-	for (i = 0; i < 3; i++)
+	for (i = 0; i < NUM_REASSEMBLY; i++)
 		kfree_skb(hdev->reassembly[i]);
 
 	hci_notify(hdev, HCI_DEV_UNREG);
@@ -1034,7 +1034,7 @@ int hci_recv_frame(struct sk_buff *skb)
 EXPORT_SYMBOL(hci_recv_frame);
 
 /* Receive packet type fragment */
-#define __reassembly(hdev, type)  ((hdev)->reassembly[(type) - 2])
+#define __reassembly(hdev, type)  ((hdev)->reassembly[(type) - 1])
 
 int hci_recv_fragment(struct hci_dev *hdev, int type, void *data, int count)
 {
-- 
cgit v1.2.2


From 33e882a5f2301a23a85ef2994e30fd9f48d39d9b Mon Sep 17 00:00:00 2001
From: Suraj Sumangala <suraj@atheros.com>
Date: Wed, 14 Jul 2010 13:02:17 +0530
Subject: Bluetooth: Implement hci_reassembly helper to reassemble RX packets

Implements feature to reassemble received HCI frames from any input stream

Signed-off-by: Suraj Sumangala <suraj@atheros.com>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 net/bluetooth/hci_core.c | 109 +++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 109 insertions(+)

(limited to 'net')

diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c
index 0ded790bfb67..477c4a60a079 100644
--- a/net/bluetooth/hci_core.c
+++ b/net/bluetooth/hci_core.c
@@ -1033,6 +1033,115 @@ int hci_recv_frame(struct sk_buff *skb)
 }
 EXPORT_SYMBOL(hci_recv_frame);
 
+static int hci_reassembly(struct hci_dev *hdev, int type, void *data,
+			  int count, __u8 index, gfp_t gfp_mask)
+{
+	int len = 0;
+	int hlen = 0;
+	int remain = count;
+	struct sk_buff *skb;
+	struct bt_skb_cb *scb;
+
+	if ((type < HCI_ACLDATA_PKT || type > HCI_EVENT_PKT) ||
+				index >= NUM_REASSEMBLY)
+		return -EILSEQ;
+
+	skb = hdev->reassembly[index];
+
+	if (!skb) {
+		switch (type) {
+		case HCI_ACLDATA_PKT:
+			len = HCI_MAX_FRAME_SIZE;
+			hlen = HCI_ACL_HDR_SIZE;
+			break;
+		case HCI_EVENT_PKT:
+			len = HCI_MAX_EVENT_SIZE;
+			hlen = HCI_EVENT_HDR_SIZE;
+			break;
+		case HCI_SCODATA_PKT:
+			len = HCI_MAX_SCO_SIZE;
+			hlen = HCI_SCO_HDR_SIZE;
+			break;
+		}
+
+		skb = bt_skb_alloc(len, gfp_mask);
+		if (!skb)
+			return -ENOMEM;
+
+		scb = (void *) skb->cb;
+		scb->expect = hlen;
+		scb->pkt_type = type;
+
+		skb->dev = (void *) hdev;
+		hdev->reassembly[index] = skb;
+	}
+
+	while (count) {
+		scb = (void *) skb->cb;
+		len = min(scb->expect, (__u16)count);
+
+		memcpy(skb_put(skb, len), data, len);
+
+		count -= len;
+		data += len;
+		scb->expect -= len;
+		remain = count;
+
+		switch (type) {
+		case HCI_EVENT_PKT:
+			if (skb->len == HCI_EVENT_HDR_SIZE) {
+				struct hci_event_hdr *h = hci_event_hdr(skb);
+				scb->expect = h->plen;
+
+				if (skb_tailroom(skb) < scb->expect) {
+					kfree_skb(skb);
+					hdev->reassembly[index] = NULL;
+					return -ENOMEM;
+				}
+			}
+			break;
+
+		case HCI_ACLDATA_PKT:
+			if (skb->len  == HCI_ACL_HDR_SIZE) {
+				struct hci_acl_hdr *h = hci_acl_hdr(skb);
+				scb->expect = __le16_to_cpu(h->dlen);
+
+				if (skb_tailroom(skb) < scb->expect) {
+					kfree_skb(skb);
+					hdev->reassembly[index] = NULL;
+					return -ENOMEM;
+				}
+			}
+			break;
+
+		case HCI_SCODATA_PKT:
+			if (skb->len == HCI_SCO_HDR_SIZE) {
+				struct hci_sco_hdr *h = hci_sco_hdr(skb);
+				scb->expect = h->dlen;
+
+				if (skb_tailroom(skb) < scb->expect) {
+					kfree_skb(skb);
+					hdev->reassembly[index] = NULL;
+					return -ENOMEM;
+				}
+			}
+			break;
+		}
+
+		if (scb->expect == 0) {
+			/* Complete frame */
+
+			bt_cb(skb)->pkt_type = type;
+			hci_recv_frame(skb);
+
+			hdev->reassembly[index] = NULL;
+			return remain;
+		}
+	}
+
+	return remain;
+}
+
 /* Receive packet type fragment */
 #define __reassembly(hdev, type)  ((hdev)->reassembly[(type) - 1])
 
-- 
cgit v1.2.2


From f39a3c06404d01ef2ce47e821bc778dfb1836df9 Mon Sep 17 00:00:00 2001
From: Suraj Sumangala <suraj@atheros.com>
Date: Wed, 14 Jul 2010 13:02:18 +0530
Subject: Bluetooth: Modified hci_recv_fragment() to use hci_reassembly helper

Modified packet based reassembly function hci_recv_fragment() to use
hci_reassembly()

Signed-off-by: Suraj Sumangala <suraj@atheros.com>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 net/bluetooth/hci_core.c | 85 +++++++-----------------------------------------
 1 file changed, 11 insertions(+), 74 deletions(-)

(limited to 'net')

diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c
index 477c4a60a079..451e266840ad 100644
--- a/net/bluetooth/hci_core.c
+++ b/net/bluetooth/hci_core.c
@@ -1142,87 +1142,24 @@ static int hci_reassembly(struct hci_dev *hdev, int type, void *data,
 	return remain;
 }
 
-/* Receive packet type fragment */
-#define __reassembly(hdev, type)  ((hdev)->reassembly[(type) - 1])
-
 int hci_recv_fragment(struct hci_dev *hdev, int type, void *data, int count)
 {
+	int rem = 0;
+
 	if (type < HCI_ACLDATA_PKT || type > HCI_EVENT_PKT)
 		return -EILSEQ;
 
-	while (count) {
-		struct sk_buff *skb = __reassembly(hdev, type);
-		struct { int expect; } *scb;
-		int len = 0;
-
-		if (!skb) {
-			/* Start of the frame */
-
-			switch (type) {
-			case HCI_EVENT_PKT:
-				if (count >= HCI_EVENT_HDR_SIZE) {
-					struct hci_event_hdr *h = data;
-					len = HCI_EVENT_HDR_SIZE + h->plen;
-				} else
-					return -EILSEQ;
-				break;
-
-			case HCI_ACLDATA_PKT:
-				if (count >= HCI_ACL_HDR_SIZE) {
-					struct hci_acl_hdr *h = data;
-					len = HCI_ACL_HDR_SIZE + __le16_to_cpu(h->dlen);
-				} else
-					return -EILSEQ;
-				break;
-
-			case HCI_SCODATA_PKT:
-				if (count >= HCI_SCO_HDR_SIZE) {
-					struct hci_sco_hdr *h = data;
-					len = HCI_SCO_HDR_SIZE + h->dlen;
-				} else
-					return -EILSEQ;
-				break;
-			}
-
-			skb = bt_skb_alloc(len, GFP_ATOMIC);
-			if (!skb) {
-				BT_ERR("%s no memory for packet", hdev->name);
-				return -ENOMEM;
-			}
-
-			skb->dev = (void *) hdev;
-			bt_cb(skb)->pkt_type = type;
-
-			__reassembly(hdev, type) = skb;
-
-			scb = (void *) skb->cb;
-			scb->expect = len;
-		} else {
-			/* Continuation */
-
-			scb = (void *) skb->cb;
-			len = scb->expect;
-		}
-
-		len = min(len, count);
-
-		memcpy(skb_put(skb, len), data, len);
-
-		scb->expect -= len;
-
-		if (scb->expect == 0) {
-			/* Complete frame */
+	do {
+		rem = hci_reassembly(hdev, type, data, count,
+						type - 1, GFP_ATOMIC);
+		if (rem < 0)
+			return rem;
 
-			__reassembly(hdev, type) = NULL;
+		data += (count - rem);
+		count = rem;
+	} while (count);
 
-			bt_cb(skb)->pkt_type = type;
-			hci_recv_frame(skb);
-		}
-
-		count -= len; data += len;
-	}
-
-	return 0;
+	return rem;
 }
 EXPORT_SYMBOL(hci_recv_fragment);
 
-- 
cgit v1.2.2


From 9981151086385eecc2febf4ba95a14593f834b3d Mon Sep 17 00:00:00 2001
From: Suraj Sumangala <suraj@atheros.com>
Date: Wed, 14 Jul 2010 13:02:19 +0530
Subject: Bluetooth: Implemented HCI frame reassembly for RX from stream

Implemented frame reassembly implementation for reassembling fragments
received from stream.

Signed-off-by: Suraj Sumangala <suraj@atheros.com>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 net/bluetooth/hci_core.c | 35 +++++++++++++++++++++++++++++++++++
 1 file changed, 35 insertions(+)

(limited to 'net')

diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c
index 451e266840ad..995c9f9b84d0 100644
--- a/net/bluetooth/hci_core.c
+++ b/net/bluetooth/hci_core.c
@@ -1163,6 +1163,41 @@ int hci_recv_fragment(struct hci_dev *hdev, int type, void *data, int count)
 }
 EXPORT_SYMBOL(hci_recv_fragment);
 
+#define STREAM_REASSEMBLY 0
+
+int hci_recv_stream_fragment(struct hci_dev *hdev, void *data, int count)
+{
+	int type;
+	int rem = 0;
+
+	do {
+		struct sk_buff *skb = hdev->reassembly[STREAM_REASSEMBLY];
+
+		if (!skb) {
+			struct { char type; } *pkt;
+
+			/* Start of the frame */
+			pkt = data;
+			type = pkt->type;
+
+			data++;
+			count--;
+		} else
+			type = bt_cb(skb)->pkt_type;
+
+		rem = hci_reassembly(hdev, type, data,
+					count, STREAM_REASSEMBLY, GFP_ATOMIC);
+		if (rem < 0)
+			return rem;
+
+		data += (count - rem);
+		count = rem;
+	} while (count);
+
+	return rem;
+}
+EXPORT_SYMBOL(hci_recv_stream_fragment);
+
 /* ---- Interface to upper protocols ---- */
 
 /* Register/Unregister protocols.
-- 
cgit v1.2.2


From 5d8868ff3d11e3fc5a5c07477f281a16c71714a3 Mon Sep 17 00:00:00 2001
From: "Gustavo F. Padovan" <padovan@profusion.mobi>
Date: Fri, 16 Jul 2010 16:18:39 -0300
Subject: Bluetooth: Add Google's copyright to L2CAP

Signed-off-by: Gustavo F. Padovan <padovan@profusion.mobi>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 net/bluetooth/l2cap.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'net')

diff --git a/net/bluetooth/l2cap.c b/net/bluetooth/l2cap.c
index cdd608d72741..d175cc262833 100644
--- a/net/bluetooth/l2cap.c
+++ b/net/bluetooth/l2cap.c
@@ -2,6 +2,7 @@
    BlueZ - Bluetooth protocol stack for Linux
    Copyright (C) 2000-2001 Qualcomm Incorporated
    Copyright (C) 2009-2010 Gustavo F. Padovan <gustavo@padovan.org>
+   Copyright (C) 2010 Google Inc.
 
    Written 2000,2001 by Maxim Krasnyansky <maxk@qualcomm.com>
 
-- 
cgit v1.2.2


From 893ef9711286d01763f3c175385d70076d5f9546 Mon Sep 17 00:00:00 2001
From: "Gustavo F. Padovan" <padovan@profusion.mobi>
Date: Sun, 18 Jul 2010 15:13:37 -0300
Subject: Bluetooth: Fix typo in hci_event.c

memmory -> memory

Signed-off-by: Gustavo F. Padovan <padovan@profusion.mobi>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 net/bluetooth/hci_event.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c
index a96980022305..2069c3b05fda 100644
--- a/net/bluetooth/hci_event.c
+++ b/net/bluetooth/hci_event.c
@@ -584,7 +584,7 @@ static inline void hci_cs_create_conn(struct hci_dev *hdev, __u8 status)
 				conn->out = 1;
 				conn->link_mode |= HCI_LM_MASTER;
 			} else
-				BT_ERR("No memmory for new connection");
+				BT_ERR("No memory for new connection");
 		}
 	}
 
@@ -965,7 +965,7 @@ static inline void hci_conn_request_evt(struct hci_dev *hdev, struct sk_buff *sk
 		conn = hci_conn_hash_lookup_ba(hdev, ev->link_type, &ev->bdaddr);
 		if (!conn) {
 			if (!(conn = hci_conn_add(hdev, ev->link_type, &ev->bdaddr))) {
-				BT_ERR("No memmory for new connection");
+				BT_ERR("No memory for new connection");
 				hci_dev_unlock(hdev);
 				return;
 			}
-- 
cgit v1.2.2


From d1c4a17d58a6dfacb48935aa430aa986559a885f Mon Sep 17 00:00:00 2001
From: "Gustavo F. Padovan" <padovan@profusion.mobi>
Date: Sun, 18 Jul 2010 16:25:54 -0300
Subject: Bluetooth: Enable L2CAP Extended features by default

Change the enable_ertm param to disable_ertm and default value to 0. That
means that L2CAP Extended features are enabled by default now.

Signed-off-by: Gustavo F. Padovan <padovan@profusion.mobi>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 net/bluetooth/l2cap.c | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

(limited to 'net')

diff --git a/net/bluetooth/l2cap.c b/net/bluetooth/l2cap.c
index d175cc262833..9ba1e8eee37c 100644
--- a/net/bluetooth/l2cap.c
+++ b/net/bluetooth/l2cap.c
@@ -57,7 +57,7 @@
 
 #define VERSION "2.15"
 
-static int enable_ertm = 0;
+static int disable_ertm = 0;
 
 static u32 l2cap_feat_mask = L2CAP_FEAT_FIXED_CHAN;
 static u8 l2cap_fixed_chan[8] = { 0x02, };
@@ -464,7 +464,7 @@ static void l2cap_do_start(struct sock *sk)
 static inline int l2cap_mode_supported(__u8 mode, __u32 feat_mask)
 {
 	u32 local_feat_mask = l2cap_feat_mask;
-	if (enable_ertm)
+	if (!disable_ertm)
 		local_feat_mask |= L2CAP_FEAT_ERTM | L2CAP_FEAT_STREAMING;
 
 	switch (mode) {
@@ -903,7 +903,7 @@ static void l2cap_sock_init(struct sock *sk, struct sock *parent)
 	} else {
 		pi->imtu = L2CAP_DEFAULT_MTU;
 		pi->omtu = 0;
-		if (enable_ertm && sk->sk_type == SOCK_STREAM) {
+		if (!disable_ertm && sk->sk_type == SOCK_STREAM) {
 			pi->mode = L2CAP_MODE_ERTM;
 			pi->conf_state |= L2CAP_CONF_STATE2_DEVICE;
 		} else {
@@ -1160,7 +1160,7 @@ static int l2cap_sock_connect(struct socket *sock, struct sockaddr *addr, int al
 		break;
 	case L2CAP_MODE_ERTM:
 	case L2CAP_MODE_STREAMING:
-		if (enable_ertm)
+		if (!disable_ertm)
 			break;
 		/* fall through */
 	default:
@@ -1226,7 +1226,7 @@ static int l2cap_sock_listen(struct socket *sock, int backlog)
 		break;
 	case L2CAP_MODE_ERTM:
 	case L2CAP_MODE_STREAMING:
-		if (enable_ertm)
+		if (!disable_ertm)
 			break;
 		/* fall through */
 	default:
@@ -1986,7 +1986,7 @@ static int l2cap_sock_setsockopt_old(struct socket *sock, int optname, char __us
 			break;
 		case L2CAP_MODE_ERTM:
 		case L2CAP_MODE_STREAMING:
-			if (enable_ertm)
+			if (!disable_ertm)
 				break;
 			/* fall through */
 		default:
@@ -3302,7 +3302,7 @@ static inline int l2cap_information_req(struct l2cap_conn *conn, struct l2cap_cm
 		struct l2cap_info_rsp *rsp = (struct l2cap_info_rsp *) buf;
 		rsp->type   = cpu_to_le16(L2CAP_IT_FEAT_MASK);
 		rsp->result = cpu_to_le16(L2CAP_IR_SUCCESS);
-		if (enable_ertm)
+		if (!disable_ertm)
 			feat_mask |= L2CAP_FEAT_ERTM | L2CAP_FEAT_STREAMING
 							 | L2CAP_FEAT_FCS;
 		put_unaligned_le32(feat_mask, rsp->data);
@@ -4850,8 +4850,8 @@ EXPORT_SYMBOL(l2cap_load);
 module_init(l2cap_init);
 module_exit(l2cap_exit);
 
-module_param(enable_ertm, bool, 0644);
-MODULE_PARM_DESC(enable_ertm, "Enable enhanced retransmission mode");
+module_param(disable_ertm, bool, 0644);
+MODULE_PARM_DESC(disable_ertm, "Disable enhanced retransmission mode");
 
 MODULE_AUTHOR("Marcel Holtmann <marcel@holtmann.org>");
 MODULE_DESCRIPTION("Bluetooth L2CAP ver " VERSION);
-- 
cgit v1.2.2


From 5a652052fedbd7869572c757dd2ffc2ed420c69d Mon Sep 17 00:00:00 2001
From: Maxime Bizon <mbizon@freebox.fr>
Date: Wed, 21 Jul 2010 17:21:38 +0200
Subject: cfg80211: fix race between sysfs and cfg80211

device_add() is called before adding the phy to the cfg80211 device
list.

So if a userspace program uses sysfs uevents to detect new phy
devices, and queries nl80211 to get phy info, it can get ENODEV even
though the phy exists in sysfs.

An easy workaround is to hold the cfg80211 mutex until the phy is
present in sysfs/cfg80211/debugfs.

Signed-off-by: Maxime Bizon <mbizon@freebox.fr>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/wireless/core.c | 14 ++++++++------
 1 file changed, 8 insertions(+), 6 deletions(-)

(limited to 'net')

diff --git a/net/wireless/core.c b/net/wireless/core.c
index 47fcfd0eebc2..f65c6494ede9 100644
--- a/net/wireless/core.c
+++ b/net/wireless/core.c
@@ -472,24 +472,22 @@ int wiphy_register(struct wiphy *wiphy)
 	/* check and set up bitrates */
 	ieee80211_set_bitrate_flags(wiphy);
 
+	mutex_lock(&cfg80211_mutex);
+
 	res = device_add(&rdev->wiphy.dev);
 	if (res)
-		return res;
+		goto out_unlock;
 
 	res = rfkill_register(rdev->rfkill);
 	if (res)
 		goto out_rm_dev;
 
-	mutex_lock(&cfg80211_mutex);
-
 	/* set up regulatory info */
 	wiphy_update_regulatory(wiphy, NL80211_REGDOM_SET_BY_CORE);
 
 	list_add_rcu(&rdev->list, &cfg80211_rdev_list);
 	cfg80211_rdev_list_generation++;
 
-	mutex_unlock(&cfg80211_mutex);
-
 	/* add to debugfs */
 	rdev->wiphy.debugfsdir =
 		debugfs_create_dir(wiphy_name(&rdev->wiphy),
@@ -509,11 +507,15 @@ int wiphy_register(struct wiphy *wiphy)
 	}
 
 	cfg80211_debugfs_rdev_add(rdev);
+	mutex_unlock(&cfg80211_mutex);
 
 	return 0;
 
- out_rm_dev:
+out_rm_dev:
 	device_del(&rdev->wiphy.dev);
+
+out_unlock:
+	mutex_unlock(&cfg80211_mutex);
 	return res;
 }
 EXPORT_SYMBOL(wiphy_register);
-- 
cgit v1.2.2


From 9dca9c490146e787472bc05b264e043311a4c67b Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Wed, 21 Jul 2010 10:09:25 +0200
Subject: mac80211: refuse shared key auth when WEP is unavailable

When WEP is not available, we should reject shared
key authentication because it could never succeed.

Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/mlme.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'net')

diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index 7a4e4bffbc71..cf8d72196c65 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -2031,6 +2031,8 @@ int ieee80211_mgd_auth(struct ieee80211_sub_if_data *sdata,
 		auth_alg = WLAN_AUTH_OPEN;
 		break;
 	case NL80211_AUTHTYPE_SHARED_KEY:
+		if (IS_ERR(sdata->local->wep_tx_tfm))
+			return -EOPNOTSUPP;
 		auth_alg = WLAN_AUTH_SHARED_KEY;
 		break;
 	case NL80211_AUTHTYPE_FT:
-- 
cgit v1.2.2


From bc05d19f4b884b1dbbce48912710ae3f972c89d2 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Wed, 21 Jul 2010 10:52:40 +0200
Subject: mac80211: fix IBSS lockdep complaint

Bob reported a lockdep complaint originating in
the mac80211 IBSS code due to the common work
struct patch. The reason is that the IBSS and
station mode code have different locking orders
for the cfg80211 wdev lock and the work struct
(where "locking" implies running/canceling).

Fix this by simply not canceling the work in
the IBSS code, it is not necessary since when
the REQ_RUN bit is cleared, the work will run
without effect if it runs. When the interface
is set down, it is flushed anyway, so there's
no concern about it running after memory has
been invalidated either.

This fixes
https://bugzilla.kernel.org/show_bug.cgi?id=16419

Additionally, looking into this I noticed that
there's a small window while the IBSS is torn
down in which the work may be rescheduled and
the REQ_RUN bit be set again after leave() has
cleared it when a scan finishes at exactly the
same time. Avoid that by setting the ssid_len
to zero before clearing REQ_RUN which signals
to the scan finish code that this interface is
not active.

Reported-by: Bob Copeland <me@bobcopeland.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/ibss.c | 19 ++++++++++++++-----
 1 file changed, 14 insertions(+), 5 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/ibss.c b/net/mac80211/ibss.c
index d4e84b22a66d..090e344d5f90 100644
--- a/net/mac80211/ibss.c
+++ b/net/mac80211/ibss.c
@@ -943,11 +943,6 @@ int ieee80211_ibss_leave(struct ieee80211_sub_if_data *sdata)
 		}
 	}
 
-	del_timer_sync(&sdata->u.ibss.timer);
-	clear_bit(IEEE80211_IBSS_REQ_RUN, &sdata->u.ibss.request);
-	cancel_work_sync(&sdata->work);
-	clear_bit(IEEE80211_IBSS_REQ_RUN, &sdata->u.ibss.request);
-
 	sta_info_flush(sdata->local, sdata);
 
 	/* remove beacon */
@@ -964,6 +959,20 @@ int ieee80211_ibss_leave(struct ieee80211_sub_if_data *sdata)
 	memset(sdata->u.ibss.bssid, 0, ETH_ALEN);
 	sdata->u.ibss.ssid_len = 0;
 
+	/*
+	 * ssid_len indicates active or not, so needs to be visible to
+	 * everybody, especially ieee80211_ibss_notify_scan_completed,
+	 * so it won't restart the timer after we remove it here.
+	 */
+	mb();
+
+	del_timer_sync(&sdata->u.ibss.timer);
+	clear_bit(IEEE80211_IBSS_REQ_RUN, &sdata->u.ibss.request);
+	/*
+	 * Since the REQ_RUN bit is clear, the work won't do
+	 * anything if it runs after this.
+	 */
+
 	ieee80211_recalc_idle(sdata->local);
 
 	return 0;
-- 
cgit v1.2.2


From 7a17a33c0da37f8d24222c967550d19dabf13617 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Wed, 21 Jul 2010 11:30:27 +0200
Subject: mac80211: proper IBSS locking

IBSS has never had locking, instead relying on some
memory barriers etc. That's hard to get right, and
I think we had it wrong too until the previous patch.
Since this is not performance sensitive, it doesn't
make sense to have the maintenance overhead of that,
so add proper locking.

Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/ibss.c        | 97 +++++++++++++++++++++++++---------------------
 net/mac80211/ieee80211_i.h |  7 +---
 2 files changed, 54 insertions(+), 50 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/ibss.c b/net/mac80211/ibss.c
index 090e344d5f90..c691780725a7 100644
--- a/net/mac80211/ibss.c
+++ b/net/mac80211/ibss.c
@@ -43,6 +43,8 @@ static void ieee80211_rx_mgmt_auth_ibss(struct ieee80211_sub_if_data *sdata,
 {
 	u16 auth_alg, auth_transaction, status_code;
 
+	lockdep_assert_held(&sdata->u.ibss.mtx);
+
 	if (len < 24 + 6)
 		return;
 
@@ -78,6 +80,8 @@ static void __ieee80211_sta_join_ibss(struct ieee80211_sub_if_data *sdata,
 	u32 bss_change;
 	u8 supp_rates[IEEE80211_MAX_SUPP_RATES];
 
+	lockdep_assert_held(&ifibss->mtx);
+
 	/* Reset own TSF to allow time synchronization work. */
 	drv_reset_tsf(local);
 
@@ -205,6 +209,8 @@ static void ieee80211_sta_join_ibss(struct ieee80211_sub_if_data *sdata,
 	int i, j;
 	u16 beacon_int = cbss->beacon_interval;
 
+	lockdep_assert_held(&sdata->u.ibss.mtx);
+
 	if (beacon_int < 10)
 		beacon_int = 10;
 
@@ -449,6 +455,8 @@ static int ieee80211_sta_active_ibss(struct ieee80211_sub_if_data *sdata)
 	int active = 0;
 	struct sta_info *sta;
 
+	lockdep_assert_held(&sdata->u.ibss.mtx);
+
 	rcu_read_lock();
 
 	list_for_each_entry_rcu(sta, &local->sta_list, list) {
@@ -473,6 +481,8 @@ static void ieee80211_sta_merge_ibss(struct ieee80211_sub_if_data *sdata)
 {
 	struct ieee80211_if_ibss *ifibss = &sdata->u.ibss;
 
+	lockdep_assert_held(&ifibss->mtx);
+
 	mod_timer(&ifibss->timer,
 		  round_jiffies(jiffies + IEEE80211_IBSS_MERGE_INTERVAL));
 
@@ -505,6 +515,8 @@ static void ieee80211_sta_create_ibss(struct ieee80211_sub_if_data *sdata)
 	u16 capability;
 	int i;
 
+	lockdep_assert_held(&ifibss->mtx);
+
 	if (ifibss->fixed_bssid) {
 		memcpy(bssid, ifibss->bssid, ETH_ALEN);
 	} else {
@@ -549,6 +561,8 @@ static void ieee80211_sta_find_ibss(struct ieee80211_sub_if_data *sdata)
 	int active_ibss;
 	u16 capability;
 
+	lockdep_assert_held(&ifibss->mtx);
+
 	active_ibss = ieee80211_sta_active_ibss(sdata);
 #ifdef CONFIG_MAC80211_IBSS_DEBUG
 	printk(KERN_DEBUG "%s: sta_find_ibss (active_ibss=%d)\n",
@@ -637,6 +651,8 @@ static void ieee80211_rx_mgmt_probe_req(struct ieee80211_sub_if_data *sdata,
 	struct ieee80211_mgmt *resp;
 	u8 *pos, *end;
 
+	lockdep_assert_held(&ifibss->mtx);
+
 	if (ifibss->state != IEEE80211_IBSS_MLME_JOINED ||
 	    len < 24 + 2 || !ifibss->presp)
 		return;
@@ -740,6 +756,8 @@ void ieee80211_ibss_rx_queued_mgmt(struct ieee80211_sub_if_data *sdata,
 	mgmt = (struct ieee80211_mgmt *) skb->data;
 	fc = le16_to_cpu(mgmt->frame_control);
 
+	mutex_lock(&sdata->u.ibss.mtx);
+
 	switch (fc & IEEE80211_FCTL_STYPE) {
 	case IEEE80211_STYPE_PROBE_REQ:
 		ieee80211_rx_mgmt_probe_req(sdata, mgmt, skb->len);
@@ -756,14 +774,23 @@ void ieee80211_ibss_rx_queued_mgmt(struct ieee80211_sub_if_data *sdata,
 		ieee80211_rx_mgmt_auth_ibss(sdata, mgmt, skb->len);
 		break;
 	}
+
+	mutex_unlock(&sdata->u.ibss.mtx);
 }
 
 void ieee80211_ibss_work(struct ieee80211_sub_if_data *sdata)
 {
 	struct ieee80211_if_ibss *ifibss = &sdata->u.ibss;
 
-	if (!test_and_clear_bit(IEEE80211_IBSS_REQ_RUN, &ifibss->request))
-		return;
+	mutex_lock(&ifibss->mtx);
+
+	/*
+	 * Work could be scheduled after scan or similar
+	 * when we aren't even joined (or trying) with a
+	 * network.
+	 */
+	if (!ifibss->ssid_len)
+		goto out;
 
 	switch (ifibss->state) {
 	case IEEE80211_IBSS_MLME_SEARCH:
@@ -776,15 +803,9 @@ void ieee80211_ibss_work(struct ieee80211_sub_if_data *sdata)
 		WARN_ON(1);
 		break;
 	}
-}
 
-static void ieee80211_queue_ibss_work(struct ieee80211_sub_if_data *sdata)
-{
-	struct ieee80211_if_ibss *ifibss = &sdata->u.ibss;
-	struct ieee80211_local *local = sdata->local;
-
-	set_bit(IEEE80211_IBSS_REQ_RUN, &ifibss->request);
-	ieee80211_queue_work(&local->hw, &sdata->work);
+ out:
+	mutex_unlock(&ifibss->mtx);
 }
 
 static void ieee80211_ibss_timer(unsigned long data)
@@ -799,7 +820,7 @@ static void ieee80211_ibss_timer(unsigned long data)
 		return;
 	}
 
-	ieee80211_queue_ibss_work(sdata);
+	ieee80211_queue_work(&local->hw, &sdata->work);
 }
 
 #ifdef CONFIG_PM
@@ -828,6 +849,7 @@ void ieee80211_ibss_setup_sdata(struct ieee80211_sub_if_data *sdata)
 
 	setup_timer(&ifibss->timer, ieee80211_ibss_timer,
 		    (unsigned long) sdata);
+	mutex_init(&ifibss->mtx);
 }
 
 /* scan finished notification */
@@ -841,10 +863,8 @@ void ieee80211_ibss_notify_scan_completed(struct ieee80211_local *local)
 			continue;
 		if (sdata->vif.type != NL80211_IFTYPE_ADHOC)
 			continue;
-		if (!sdata->u.ibss.ssid_len)
-			continue;
 		sdata->u.ibss.last_scan_completed = jiffies;
-		ieee80211_queue_ibss_work(sdata);
+		ieee80211_queue_work(&local->hw, &sdata->work);
 	}
 	mutex_unlock(&local->iflist_mtx);
 }
@@ -854,6 +874,17 @@ int ieee80211_ibss_join(struct ieee80211_sub_if_data *sdata,
 {
 	struct sk_buff *skb;
 
+	skb = dev_alloc_skb(sdata->local->hw.extra_tx_headroom +
+			    36 /* bitrates */ +
+			    34 /* SSID */ +
+			    3  /* DS params */ +
+			    4  /* IBSS params */ +
+			    params->ie_len);
+	if (!skb)
+		return -ENOMEM;
+
+	mutex_lock(&sdata->u.ibss.mtx);
+
 	if (params->bssid) {
 		memcpy(sdata->u.ibss.bssid, params->bssid, ETH_ALEN);
 		sdata->u.ibss.fixed_bssid = true;
@@ -882,35 +913,19 @@ int ieee80211_ibss_join(struct ieee80211_sub_if_data *sdata,
 			sdata->u.ibss.ie_len = params->ie_len;
 	}
 
-	skb = dev_alloc_skb(sdata->local->hw.extra_tx_headroom +
-			    36 /* bitrates */ +
-			    34 /* SSID */ +
-			    3  /* DS params */ +
-			    4  /* IBSS params */ +
-			    params->ie_len);
-	if (!skb)
-		return -ENOMEM;
-
 	sdata->u.ibss.skb = skb;
 	sdata->u.ibss.state = IEEE80211_IBSS_MLME_SEARCH;
 	sdata->u.ibss.ibss_join_req = jiffies;
 
 	memcpy(sdata->u.ibss.ssid, params->ssid, IEEE80211_MAX_SSID_LEN);
-
-	/*
-	 * The ssid_len setting below is used to see whether
-	 * we are active, and we need all other settings
-	 * before that may get visible.
-	 */
-	mb();
-
 	sdata->u.ibss.ssid_len = params->ssid_len;
 
 	ieee80211_recalc_idle(sdata->local);
 
-	set_bit(IEEE80211_IBSS_REQ_RUN, &sdata->u.ibss.request);
 	ieee80211_queue_work(&sdata->local->hw, &sdata->work);
 
+	mutex_unlock(&sdata->u.ibss.mtx);
+
 	return 0;
 }
 
@@ -921,7 +936,9 @@ int ieee80211_ibss_leave(struct ieee80211_sub_if_data *sdata)
 	struct ieee80211_local *local = sdata->local;
 	struct cfg80211_bss *cbss;
 	u16 capability;
-	int active_ibss = 0;
+	int active_ibss;
+
+	mutex_lock(&sdata->u.ibss.mtx);
 
 	active_ibss = ieee80211_sta_active_ibss(sdata);
 
@@ -959,19 +976,9 @@ int ieee80211_ibss_leave(struct ieee80211_sub_if_data *sdata)
 	memset(sdata->u.ibss.bssid, 0, ETH_ALEN);
 	sdata->u.ibss.ssid_len = 0;
 
-	/*
-	 * ssid_len indicates active or not, so needs to be visible to
-	 * everybody, especially ieee80211_ibss_notify_scan_completed,
-	 * so it won't restart the timer after we remove it here.
-	 */
-	mb();
-
 	del_timer_sync(&sdata->u.ibss.timer);
-	clear_bit(IEEE80211_IBSS_REQ_RUN, &sdata->u.ibss.request);
-	/*
-	 * Since the REQ_RUN bit is clear, the work won't do
-	 * anything if it runs after this.
-	 */
+
+	mutex_unlock(&sdata->u.ibss.mtx);
 
 	ieee80211_recalc_idle(sdata->local);
 
diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index f9251d50192c..c6b5c2d3ffde 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -377,14 +377,11 @@ struct ieee80211_if_managed {
 	int last_cqm_event_signal;
 };
 
-enum ieee80211_ibss_request {
-	IEEE80211_IBSS_REQ_RUN	= 0,
-};
-
 struct ieee80211_if_ibss {
 	struct timer_list timer;
 
-	unsigned long request;
+	struct mutex mtx;
+
 	unsigned long last_scan_completed;
 
 	u32 basic_rates;
-- 
cgit v1.2.2


From 3f30fc1570626f11e8f3efe5ebd41fe96e847ed1 Mon Sep 17 00:00:00 2001
From: "Gustavo F. Padovan" <padovan@profusion.mobi>
Date: Wed, 21 Jul 2010 10:59:58 +0000
Subject: net: remove last uses of __attribute__((packed))

Network code uses the __packed macro instead of __attribute__((packed)).

Signed-off-by: Gustavo F. Padovan <padovan@profusion.mobi>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/netfilter/ipt_CLUSTERIP.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/ipv4/netfilter/ipt_CLUSTERIP.c b/net/ipv4/netfilter/ipt_CLUSTERIP.c
index 64d0875f5192..3a43cf36db87 100644
--- a/net/ipv4/netfilter/ipt_CLUSTERIP.c
+++ b/net/ipv4/netfilter/ipt_CLUSTERIP.c
@@ -469,7 +469,7 @@ struct arp_payload {
 	__be32 src_ip;
 	u_int8_t dst_hw[ETH_ALEN];
 	__be32 dst_ip;
-} __attribute__ ((packed));
+} __packed;
 
 #ifdef DEBUG
 static void arp_print(struct arp_payload *payload)
-- 
cgit v1.2.2


From 00c5a9834b476a138158fb17d576da751727a9f1 Mon Sep 17 00:00:00 2001
From: Andrea Shepard <andrea@persephoneslair.org>
Date: Thu, 22 Jul 2010 09:12:35 +0000
Subject: net: Fix corruption of skb csum field in pskb_expand_head() of
 net/core/skbuff.c

Make pskb_expand_head() check ip_summed to make sure csum_start is really
csum_start and not csum before adjusting it.

This fixes a bug I encountered using a Sun Quad-Fast Ethernet card and VLANs.
On my configuration, the sunhme driver produces skbs with differing amounts
of headroom on receive depending on the packet size.  See line 2030 of
drivers/net/sunhme.c; packets smaller than RX_COPY_THRESHOLD have 52 bytes
of headroom but packets larger than that cutoff have only 20 bytes.

When these packets reach the VLAN driver, vlan_check_reorder_header()
calls skb_cow(), which, if the packet has less than NET_SKB_PAD (== 32) bytes
of headroom, uses pskb_expand_head() to make more.

Then, pskb_expand_head() needs to adjust a lot of offsets into the skb,
including csum_start.  Since csum_start is a union with csum, if the packet
has a valid csum value this will corrupt it, which was the effect I observed.
The sunhme hardware computes receive checksums, so the skbs would be created
by the driver with ip_summed == CHECKSUM_COMPLETE and a valid csum field, and
then pskb_expand_head() would corrupt the csum field, leading to an "hw csum
error" message later on, for example in icmp_rcv() for pings larger than the
sunhme RX_COPY_THRESHOLD.

On the basis of the comment at the beginning of include/linux/skbuff.h,
I believe that the csum_start skb field is only meaningful if ip_csummed is
CSUM_PARTIAL, so this patch makes pskb_expand_head() adjust it only in that
case to avoid corrupting a valid csum value.

Please see my more in-depth disucssion of tracking down this bug for
more details if you like:

http://puellavulnerata.livejournal.com/112186.html
http://puellavulnerata.livejournal.com/112567.html
http://puellavulnerata.livejournal.com/112891.html
http://puellavulnerata.livejournal.com/113096.html
http://puellavulnerata.livejournal.com/113591.html

I am not subscribed to this list, so please CC me on replies.

Signed-off-by: Andrea Shepard <andrea@persephoneslair.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/skbuff.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 34432b4e96bb..c699159b3ede 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -843,7 +843,9 @@ int pskb_expand_head(struct sk_buff *skb, int nhead, int ntail,
 	skb->network_header   += off;
 	if (skb_mac_header_was_set(skb))
 		skb->mac_header += off;
-	skb->csum_start       += nhead;
+	/* Only adjust this if it actually is csum_start rather than csum */
+	if (skb->ip_summed == CHECKSUM_PARTIAL)
+		skb->csum_start += nhead;
 	skb->cloned   = 0;
 	skb->hdr_len  = 0;
 	skb->nohdr    = 0;
-- 
cgit v1.2.2


From be2b6e62357dd7ee56bdcb05e54002afb4830292 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Thu, 22 Jul 2010 13:27:09 -0700
Subject: net: Fix skb_copy_expand() handling of ->csum_start

It should only be adjusted if ip_summed == CHECKSUM_PARTIAL.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/skbuff.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index c699159b3ede..ce88293a34e2 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -932,7 +932,8 @@ struct sk_buff *skb_copy_expand(const struct sk_buff *skb,
 	copy_skb_header(n, skb);
 
 	off                  = newheadroom - oldheadroom;
-	n->csum_start       += off;
+	if (n->ip_summed == CHECKSUM_PARTIAL)
+		n->csum_start += off;
 #ifdef NET_SKBUFF_DATA_USES_OFFSET
 	n->transport_header += off;
 	n->network_header   += off;
-- 
cgit v1.2.2


From 64e724f62ab743d55229cd5e27ec8b068b68eb16 Mon Sep 17 00:00:00 2001
From: Brian Haley <brian.haley@hp.com>
Date: Tue, 20 Jul 2010 10:34:30 +0000
Subject: ipv6: Don't add routes to ipv6 disabled interfaces.

If the interface has IPv6 disabled, don't add a multicast or
link-local route since we won't be adding a link-local address.

Reported-by: Mahesh Kelkar <maheshkelkar@gmail.com>
Signed-off-by: Brian Haley <brian.haley@hp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/addrconf.c | 14 +++++++++-----
 1 file changed, 9 insertions(+), 5 deletions(-)

(limited to 'net')

diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index e1a698df5706..784f34d11fdd 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -1760,7 +1760,10 @@ static struct inet6_dev *addrconf_add_dev(struct net_device *dev)
 
 	idev = ipv6_find_idev(dev);
 	if (!idev)
-		return NULL;
+		return ERR_PTR(-ENOBUFS);
+
+	if (idev->cnf.disable_ipv6)
+		return ERR_PTR(-EACCES);
 
 	/* Add default multicast route */
 	addrconf_add_mroute(dev);
@@ -2129,8 +2132,9 @@ static int inet6_addr_add(struct net *net, int ifindex, struct in6_addr *pfx,
 	if (!dev)
 		return -ENODEV;
 
-	if ((idev = addrconf_add_dev(dev)) == NULL)
-		return -ENOBUFS;
+	idev = addrconf_add_dev(dev);
+	if (IS_ERR(idev))
+		return PTR_ERR(idev);
 
 	scope = ipv6_addr_scope(pfx);
 
@@ -2377,7 +2381,7 @@ static void addrconf_dev_config(struct net_device *dev)
 	}
 
 	idev = addrconf_add_dev(dev);
-	if (idev == NULL)
+	if (IS_ERR(idev))
 		return;
 
 	memset(&addr, 0, sizeof(struct in6_addr));
@@ -2468,7 +2472,7 @@ static void addrconf_ip6_tnl_config(struct net_device *dev)
 	ASSERT_RTNL();
 
 	idev = addrconf_add_dev(dev);
-	if (!idev) {
+	if (IS_ERR(idev)) {
 		printk(KERN_DEBUG "init ip6-ip6: add_dev failed\n");
 		return;
 	}
-- 
cgit v1.2.2


From 963bfeeeec913d135c15dc400f2f86cb62655d81 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Tue, 20 Jul 2010 22:03:14 +0000
Subject: net: RTA_MARK addition

Add a new rt attribute, RTA_MARK, and use it in
rt_fill_info()/inet_rtm_getroute() to support following commands :

ip route get 192.168.20.110 mark NUMBER
ip route get 192.168.20.108 from 192.168.20.110 iif eth1 mark NUMBER
ip route list cache [192.168.20.110] mark NUMBER

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/route.c | 7 +++++++
 1 file changed, 7 insertions(+)

(limited to 'net')

diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 562ce92de2a6..3f56b6e6c6aa 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -2878,6 +2878,9 @@ static int rt_fill_info(struct net *net,
 	if (rtnetlink_put_metrics(skb, rt->dst.metrics) < 0)
 		goto nla_put_failure;
 
+	if (rt->fl.mark)
+		NLA_PUT_BE32(skb, RTA_MARK, rt->fl.mark);
+
 	error = rt->dst.error;
 	expires = rt->dst.expires ? rt->dst.expires - jiffies : 0;
 	if (rt->peer) {
@@ -2933,6 +2936,7 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void
 	__be32 src = 0;
 	u32 iif;
 	int err;
+	int mark;
 	struct sk_buff *skb;
 
 	err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv4_policy);
@@ -2960,6 +2964,7 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void
 	src = tb[RTA_SRC] ? nla_get_be32(tb[RTA_SRC]) : 0;
 	dst = tb[RTA_DST] ? nla_get_be32(tb[RTA_DST]) : 0;
 	iif = tb[RTA_IIF] ? nla_get_u32(tb[RTA_IIF]) : 0;
+	mark = tb[RTA_MARK] ? nla_get_u32(tb[RTA_MARK]) : 0;
 
 	if (iif) {
 		struct net_device *dev;
@@ -2972,6 +2977,7 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void
 
 		skb->protocol	= htons(ETH_P_IP);
 		skb->dev	= dev;
+		skb->mark	= mark;
 		local_bh_disable();
 		err = ip_route_input(skb, dst, src, rtm->rtm_tos, dev);
 		local_bh_enable();
@@ -2989,6 +2995,7 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void
 				},
 			},
 			.oif = tb[RTA_OIF] ? nla_get_u32(tb[RTA_OIF]) : 0,
+			.mark = mark,
 		};
 		err = ip_route_output_key(net, &rt, &fl);
 	}
-- 
cgit v1.2.2


From b77026b391f4f43ca148cf6c55a93f7487ff6ed8 Mon Sep 17 00:00:00 2001
From: Dan Carpenter <error27@gmail.com>
Date: Thu, 22 Jul 2010 01:11:31 +0000
Subject: caif: precedence bug

Negate has precedence over comparison so the original assert only
checked that "rfml->fragment_size" was larger than 1 or 0.

Signed-off-by: Dan Carpenter <error27@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/caif/cfrfml.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/caif/cfrfml.c b/net/caif/cfrfml.c
index 4b04d25b6a3f..eb1602022ac0 100644
--- a/net/caif/cfrfml.c
+++ b/net/caif/cfrfml.c
@@ -193,7 +193,7 @@ out:
 
 static int cfrfml_transmit_segment(struct cfrfml *rfml, struct cfpkt *pkt)
 {
-	caif_assert(!cfpkt_getlen(pkt) < rfml->fragment_size);
+	caif_assert(cfpkt_getlen(pkt) >= rfml->fragment_size);
 
 	/* Add info for MUX-layer to route the packet out. */
 	cfpkt_info(pkt)->channel_id = rfml->serv.layer.id;
-- 
cgit v1.2.2


From 9c3e1c39679144c250dda95098333ecb5f1f407a Mon Sep 17 00:00:00 2001
From: Hannes Eder <heder@google.com>
Date: Fri, 23 Jul 2010 12:42:58 +0200
Subject: netfilter: xt_ipvs (netfilter matcher for IPVS)

This implements the kernel-space side of the netfilter matcher xt_ipvs.

[ minor fixes by Simon Horman <horms@verge.net.au> ]
Signed-off-by: Hannes Eder <heder@google.com>
Signed-off-by: Simon Horman <horms@verge.net.au>
[ Patrick: added xt_ipvs.h to Kbuild ]
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 net/netfilter/Kconfig            |  10 +++
 net/netfilter/Makefile           |   1 +
 net/netfilter/ipvs/ip_vs_proto.c |   1 +
 net/netfilter/xt_ipvs.c          | 189 +++++++++++++++++++++++++++++++++++++++
 4 files changed, 201 insertions(+)
 create mode 100644 net/netfilter/xt_ipvs.c

(limited to 'net')

diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig
index 5fb8efa84df3..551b58419df9 100644
--- a/net/netfilter/Kconfig
+++ b/net/netfilter/Kconfig
@@ -742,6 +742,16 @@ config NETFILTER_XT_MATCH_IPRANGE
 
 	If unsure, say M.
 
+config NETFILTER_XT_MATCH_IPVS
+	tristate '"ipvs" match support'
+	depends on IP_VS
+	depends on NETFILTER_ADVANCED
+	depends on NF_CONNTRACK
+	help
+	  This option allows you to match against IPVS properties of a packet.
+
+	  If unsure, say N.
+
 config NETFILTER_XT_MATCH_LENGTH
 	tristate '"length" match support'
 	depends on NETFILTER_ADVANCED
diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile
index 36ef8e63be1e..4366c79a6683 100644
--- a/net/netfilter/Makefile
+++ b/net/netfilter/Makefile
@@ -77,6 +77,7 @@ obj-$(CONFIG_NETFILTER_XT_MATCH_HASHLIMIT) += xt_hashlimit.o
 obj-$(CONFIG_NETFILTER_XT_MATCH_HELPER) += xt_helper.o
 obj-$(CONFIG_NETFILTER_XT_MATCH_HL) += xt_hl.o
 obj-$(CONFIG_NETFILTER_XT_MATCH_IPRANGE) += xt_iprange.o
+obj-$(CONFIG_NETFILTER_XT_MATCH_IPVS) += xt_ipvs.o
 obj-$(CONFIG_NETFILTER_XT_MATCH_LENGTH) += xt_length.o
 obj-$(CONFIG_NETFILTER_XT_MATCH_LIMIT) += xt_limit.o
 obj-$(CONFIG_NETFILTER_XT_MATCH_MAC) += xt_mac.o
diff --git a/net/netfilter/ipvs/ip_vs_proto.c b/net/netfilter/ipvs/ip_vs_proto.c
index 2d3d5e4b35f8..027f654799fe 100644
--- a/net/netfilter/ipvs/ip_vs_proto.c
+++ b/net/netfilter/ipvs/ip_vs_proto.c
@@ -98,6 +98,7 @@ struct ip_vs_protocol * ip_vs_proto_get(unsigned short proto)
 
 	return NULL;
 }
+EXPORT_SYMBOL(ip_vs_proto_get);
 
 
 /*
diff --git a/net/netfilter/xt_ipvs.c b/net/netfilter/xt_ipvs.c
new file mode 100644
index 000000000000..7a4d66db95ae
--- /dev/null
+++ b/net/netfilter/xt_ipvs.c
@@ -0,0 +1,189 @@
+/*
+ *	xt_ipvs - kernel module to match IPVS connection properties
+ *
+ *	Author: Hannes Eder <heder@google.com>
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/module.h>
+#include <linux/moduleparam.h>
+#include <linux/spinlock.h>
+#include <linux/skbuff.h>
+#ifdef CONFIG_IP_VS_IPV6
+#include <net/ipv6.h>
+#endif
+#include <linux/ip_vs.h>
+#include <linux/types.h>
+#include <linux/netfilter/x_tables.h>
+#include <linux/netfilter/x_tables.h>
+#include <linux/netfilter/xt_ipvs.h>
+#include <net/netfilter/nf_conntrack.h>
+
+#include <net/ip_vs.h>
+
+MODULE_AUTHOR("Hannes Eder <heder@google.com>");
+MODULE_DESCRIPTION("Xtables: match IPVS connection properties");
+MODULE_LICENSE("GPL");
+MODULE_ALIAS("ipt_ipvs");
+MODULE_ALIAS("ip6t_ipvs");
+
+/* borrowed from xt_conntrack */
+static bool ipvs_mt_addrcmp(const union nf_inet_addr *kaddr,
+			    const union nf_inet_addr *uaddr,
+			    const union nf_inet_addr *umask,
+			    unsigned int l3proto)
+{
+	if (l3proto == NFPROTO_IPV4)
+		return ((kaddr->ip ^ uaddr->ip) & umask->ip) == 0;
+#ifdef CONFIG_IP_VS_IPV6
+	else if (l3proto == NFPROTO_IPV6)
+		return ipv6_masked_addr_cmp(&kaddr->in6, &umask->in6,
+		       &uaddr->in6) == 0;
+#endif
+	else
+		return false;
+}
+
+static bool
+ipvs_mt(const struct sk_buff *skb, struct xt_action_param *par)
+{
+	const struct xt_ipvs_mtinfo *data = par->matchinfo;
+	/* ipvs_mt_check ensures that family is only NFPROTO_IPV[46]. */
+	const u_int8_t family = par->family;
+	struct ip_vs_iphdr iph;
+	struct ip_vs_protocol *pp;
+	struct ip_vs_conn *cp;
+	bool match = true;
+
+	if (data->bitmask == XT_IPVS_IPVS_PROPERTY) {
+		match = skb->ipvs_property ^
+			!!(data->invert & XT_IPVS_IPVS_PROPERTY);
+		goto out;
+	}
+
+	/* other flags than XT_IPVS_IPVS_PROPERTY are set */
+	if (!skb->ipvs_property) {
+		match = false;
+		goto out;
+	}
+
+	ip_vs_fill_iphdr(family, skb_network_header(skb), &iph);
+
+	if (data->bitmask & XT_IPVS_PROTO)
+		if ((iph.protocol == data->l4proto) ^
+		    !(data->invert & XT_IPVS_PROTO)) {
+			match = false;
+			goto out;
+		}
+
+	pp = ip_vs_proto_get(iph.protocol);
+	if (unlikely(!pp)) {
+		match = false;
+		goto out;
+	}
+
+	/*
+	 * Check if the packet belongs to an existing entry
+	 */
+	cp = pp->conn_out_get(family, skb, pp, &iph, iph.len, 1 /* inverse */);
+	if (unlikely(cp == NULL)) {
+		match = false;
+		goto out;
+	}
+
+	/*
+	 * We found a connection, i.e. ct != 0, make sure to call
+	 * __ip_vs_conn_put before returning.  In our case jump to out_put_con.
+	 */
+
+	if (data->bitmask & XT_IPVS_VPORT)
+		if ((cp->vport == data->vport) ^
+		    !(data->invert & XT_IPVS_VPORT)) {
+			match = false;
+			goto out_put_cp;
+		}
+
+	if (data->bitmask & XT_IPVS_VPORTCTL)
+		if ((cp->control != NULL &&
+		     cp->control->vport == data->vportctl) ^
+		    !(data->invert & XT_IPVS_VPORTCTL)) {
+			match = false;
+			goto out_put_cp;
+		}
+
+	if (data->bitmask & XT_IPVS_DIR) {
+		enum ip_conntrack_info ctinfo;
+		struct nf_conn *ct = nf_ct_get(skb, &ctinfo);
+
+		if (ct == NULL || nf_ct_is_untracked(ct)) {
+			match = false;
+			goto out_put_cp;
+		}
+
+		if ((ctinfo >= IP_CT_IS_REPLY) ^
+		    !!(data->invert & XT_IPVS_DIR)) {
+			match = false;
+			goto out_put_cp;
+		}
+	}
+
+	if (data->bitmask & XT_IPVS_METHOD)
+		if (((cp->flags & IP_VS_CONN_F_FWD_MASK) == data->fwd_method) ^
+		    !(data->invert & XT_IPVS_METHOD)) {
+			match = false;
+			goto out_put_cp;
+		}
+
+	if (data->bitmask & XT_IPVS_VADDR) {
+		if (ipvs_mt_addrcmp(&cp->vaddr, &data->vaddr,
+				    &data->vmask, family) ^
+		    !(data->invert & XT_IPVS_VADDR)) {
+			match = false;
+			goto out_put_cp;
+		}
+	}
+
+out_put_cp:
+	__ip_vs_conn_put(cp);
+out:
+	pr_debug("match=%d\n", match);
+	return match;
+}
+
+static int ipvs_mt_check(const struct xt_mtchk_param *par)
+{
+	if (par->family != NFPROTO_IPV4
+#ifdef CONFIG_IP_VS_IPV6
+	    && par->family != NFPROTO_IPV6
+#endif
+		) {
+		pr_info("protocol family %u not supported\n", par->family);
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static struct xt_match xt_ipvs_mt_reg __read_mostly = {
+	.name       = "ipvs",
+	.revision   = 0,
+	.family     = NFPROTO_UNSPEC,
+	.match      = ipvs_mt,
+	.checkentry = ipvs_mt_check,
+	.matchsize  = XT_ALIGN(sizeof(struct xt_ipvs_mtinfo)),
+	.me         = THIS_MODULE,
+};
+
+static int __init ipvs_mt_init(void)
+{
+	return xt_register_match(&xt_ipvs_mt_reg);
+}
+
+static void __exit ipvs_mt_exit(void)
+{
+	xt_unregister_match(&xt_ipvs_mt_reg);
+}
+
+module_init(ipvs_mt_init);
+module_exit(ipvs_mt_exit);
-- 
cgit v1.2.2


From 7b215ffc3885a38182d3d49ceb41d0a81c3e041a Mon Sep 17 00:00:00 2001
From: Hannes Eder <heder@google.com>
Date: Fri, 23 Jul 2010 12:46:32 +0200
Subject: IPVS: make friends with nf_conntrack

Update the nf_conntrack tuple in reply direction, as we will see
traffic from the real server (RIP) to the client (CIP).  Once this is
done we can use netfilters SNAT in POSTROUTING, especially with
xt_ipvs, to do source NAT, e.g.:

% iptables -t nat -A POSTROUTING -m ipvs --vaddr 192.168.100.30/32 --vport 80 \
		  -j SNAT --to-source 192.168.10.10

[ minor fixes by Simon Horman <horms@verge.net.au> ]
Signed-off-by: Hannes Eder <heder@google.com>
Signed-off-by: Simon Horman <horms@verge.net.au>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 net/netfilter/ipvs/Kconfig      |  2 +-
 net/netfilter/ipvs/ip_vs_core.c | 36 ------------------------------------
 net/netfilter/ipvs/ip_vs_xmit.c | 29 +++++++++++++++++++++++++++++
 3 files changed, 30 insertions(+), 37 deletions(-)

(limited to 'net')

diff --git a/net/netfilter/ipvs/Kconfig b/net/netfilter/ipvs/Kconfig
index d80b41abec09..366244492ac7 100644
--- a/net/netfilter/ipvs/Kconfig
+++ b/net/netfilter/ipvs/Kconfig
@@ -3,7 +3,7 @@
 #
 menuconfig IP_VS
 	tristate "IP virtual server support"
-	depends on NET && INET && NETFILTER
+	depends on NET && INET && NETFILTER && NF_CONNTRACK
 	---help---
 	  IP Virtual Server support will let you build a high-performance
 	  virtual server based on cluster of two or more real servers. This
diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c
index 50907d8472a3..58f82dfc950a 100644
--- a/net/netfilter/ipvs/ip_vs_core.c
+++ b/net/netfilter/ipvs/ip_vs_core.c
@@ -536,26 +536,6 @@ int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb,
 	return NF_DROP;
 }
 
-
-/*
- *      It is hooked before NF_IP_PRI_NAT_SRC at the NF_INET_POST_ROUTING
- *      chain, and is used for VS/NAT.
- *      It detects packets for VS/NAT connections and sends the packets
- *      immediately. This can avoid that iptable_nat mangles the packets
- *      for VS/NAT.
- */
-static unsigned int ip_vs_post_routing(unsigned int hooknum,
-				       struct sk_buff *skb,
-				       const struct net_device *in,
-				       const struct net_device *out,
-				       int (*okfn)(struct sk_buff *))
-{
-	if (!skb->ipvs_property)
-		return NF_ACCEPT;
-	/* The packet was sent from IPVS, exit this chain */
-	return NF_STOP;
-}
-
 __sum16 ip_vs_checksum_complete(struct sk_buff *skb, int offset)
 {
 	return csum_fold(skb_checksum(skb, offset, skb->len - offset, 0));
@@ -1499,14 +1479,6 @@ static struct nf_hook_ops ip_vs_ops[] __read_mostly = {
 		.hooknum        = NF_INET_FORWARD,
 		.priority       = 99,
 	},
-	/* Before the netfilter connection tracking, exit from POST_ROUTING */
-	{
-		.hook		= ip_vs_post_routing,
-		.owner		= THIS_MODULE,
-		.pf		= PF_INET,
-		.hooknum        = NF_INET_POST_ROUTING,
-		.priority       = NF_IP_PRI_NAT_SRC-1,
-	},
 #ifdef CONFIG_IP_VS_IPV6
 	/* After packet filtering, forward packet through VS/DR, VS/TUN,
 	 * or VS/NAT(change destination), so that filtering rules can be
@@ -1535,14 +1507,6 @@ static struct nf_hook_ops ip_vs_ops[] __read_mostly = {
 		.hooknum        = NF_INET_FORWARD,
 		.priority       = 99,
 	},
-	/* Before the netfilter connection tracking, exit from POST_ROUTING */
-	{
-		.hook		= ip_vs_post_routing,
-		.owner		= THIS_MODULE,
-		.pf		= PF_INET6,
-		.hooknum        = NF_INET_POST_ROUTING,
-		.priority       = NF_IP6_PRI_NAT_SRC-1,
-	},
 #endif
 };
 
diff --git a/net/netfilter/ipvs/ip_vs_xmit.c b/net/netfilter/ipvs/ip_vs_xmit.c
index 02b078e11cf3..21e1a5e9b9d3 100644
--- a/net/netfilter/ipvs/ip_vs_xmit.c
+++ b/net/netfilter/ipvs/ip_vs_xmit.c
@@ -28,6 +28,7 @@
 #include <net/ip6_route.h>
 #include <linux/icmpv6.h>
 #include <linux/netfilter.h>
+#include <net/netfilter/nf_conntrack.h>
 #include <linux/netfilter_ipv4.h>
 
 #include <net/ip_vs.h>
@@ -348,6 +349,30 @@ ip_vs_bypass_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
 }
 #endif
 
+static void
+ip_vs_update_conntrack(struct sk_buff *skb, struct ip_vs_conn *cp)
+{
+	struct nf_conn *ct = (struct nf_conn *)skb->nfct;
+	struct nf_conntrack_tuple new_tuple;
+
+	if (ct == NULL || nf_ct_is_untracked(ct) || nf_ct_is_confirmed(ct))
+		return;
+
+	/*
+	 * The connection is not yet in the hashtable, so we update it.
+	 * CIP->VIP will remain the same, so leave the tuple in
+	 * IP_CT_DIR_ORIGINAL untouched.  When the reply comes back from the
+	 * real-server we will see RIP->DIP.
+	 */
+	new_tuple = ct->tuplehash[IP_CT_DIR_REPLY].tuple;
+	new_tuple.src.u3 = cp->daddr;
+	/*
+	 * This will also take care of UDP and other protocols.
+	 */
+	new_tuple.src.u.tcp.port = cp->dport;
+	nf_conntrack_alter_reply(ct, &new_tuple);
+}
+
 /*
  *      NAT transmitter (only for outside-to-inside nat forwarding)
  *      Not used for related ICMP
@@ -403,6 +428,8 @@ ip_vs_nat_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
 
 	IP_VS_DBG_PKT(10, pp, skb, 0, "After DNAT");
 
+	ip_vs_update_conntrack(skb, cp);
+
 	/* FIXME: when application helper enlarges the packet and the length
 	   is larger than the MTU of outgoing device, there will be still
 	   MTU problem. */
@@ -479,6 +506,8 @@ ip_vs_nat_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
 
 	IP_VS_DBG_PKT(10, pp, skb, 0, "After DNAT");
 
+	ip_vs_update_conntrack(skb, cp);
+
 	/* FIXME: when application helper enlarges the packet and the length
 	   is larger than the MTU of outgoing device, there will be still
 	   MTU problem. */
-- 
cgit v1.2.2


From 7f1c407579519e71a0dcadc05614fd98acec585e Mon Sep 17 00:00:00 2001
From: Hannes Eder <heder@google.com>
Date: Fri, 23 Jul 2010 12:48:52 +0200
Subject: IPVS: make FTP work with full NAT support

Use nf_conntrack/nf_nat code to do the packet mangling and the TCP
sequence adjusting.  The function 'ip_vs_skb_replace' is now dead
code, so it is removed.

To SNAT FTP, use something like:

% iptables -t nat -A POSTROUTING -m ipvs --vaddr 192.168.100.30/32 \
    --vport 21 -j SNAT --to-source 192.168.10.10
and for the data connections in passive mode:

% iptables -t nat -A POSTROUTING -m ipvs --vaddr 192.168.100.30/32 \
    --vportctl 21 -j SNAT --to-source 192.168.10.10
using '-m state --state RELATED' would also works.

Make sure the kernel modules ip_vs_ftp, nf_conntrack_ftp, and
nf_nat_ftp are loaded.

[ up-port and minor fixes by Simon Horman <horms@verge.net.au> ]
Signed-off-by: Hannes Eder <heder@google.com>
Signed-off-by: Simon Horman <horms@verge.net.au>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 net/netfilter/ipvs/Kconfig      |   2 +-
 net/netfilter/ipvs/ip_vs_app.c  |  43 ----------
 net/netfilter/ipvs/ip_vs_core.c |   1 -
 net/netfilter/ipvs/ip_vs_ftp.c  | 176 +++++++++++++++++++++++++++++++++++++---
 4 files changed, 165 insertions(+), 57 deletions(-)

(limited to 'net')

diff --git a/net/netfilter/ipvs/Kconfig b/net/netfilter/ipvs/Kconfig
index 366244492ac7..be10f6526042 100644
--- a/net/netfilter/ipvs/Kconfig
+++ b/net/netfilter/ipvs/Kconfig
@@ -235,7 +235,7 @@ comment 'IPVS application helper'
 
 config	IP_VS_FTP
   	tristate "FTP protocol helper"
-        depends on IP_VS_PROTO_TCP
+        depends on IP_VS_PROTO_TCP && NF_NAT
 	---help---
 	  FTP is a protocol that transfers IP address and/or port number in
 	  the payload. In the virtual server via Network Address Translation,
diff --git a/net/netfilter/ipvs/ip_vs_app.c b/net/netfilter/ipvs/ip_vs_app.c
index 1cb0e834f8ff..e76f87f4aca8 100644
--- a/net/netfilter/ipvs/ip_vs_app.c
+++ b/net/netfilter/ipvs/ip_vs_app.c
@@ -569,49 +569,6 @@ static const struct file_operations ip_vs_app_fops = {
 };
 #endif
 
-
-/*
- *	Replace a segment of data with a new segment
- */
-int ip_vs_skb_replace(struct sk_buff *skb, gfp_t pri,
-		      char *o_buf, int o_len, char *n_buf, int n_len)
-{
-	int diff;
-	int o_offset;
-	int o_left;
-
-	EnterFunction(9);
-
-	diff = n_len - o_len;
-	o_offset = o_buf - (char *)skb->data;
-	/* The length of left data after o_buf+o_len in the skb data */
-	o_left = skb->len - (o_offset + o_len);
-
-	if (diff <= 0) {
-		memmove(o_buf + n_len, o_buf + o_len, o_left);
-		memcpy(o_buf, n_buf, n_len);
-		skb_trim(skb, skb->len + diff);
-	} else if (diff <= skb_tailroom(skb)) {
-		skb_put(skb, diff);
-		memmove(o_buf + n_len, o_buf + o_len, o_left);
-		memcpy(o_buf, n_buf, n_len);
-	} else {
-		if (pskb_expand_head(skb, skb_headroom(skb), diff, pri))
-			return -ENOMEM;
-		skb_put(skb, diff);
-		memmove(skb->data + o_offset + n_len,
-			skb->data + o_offset + o_len, o_left);
-		skb_copy_to_linear_data_offset(skb, o_offset, n_buf, n_len);
-	}
-
-	/* must update the iph total length here */
-	ip_hdr(skb)->tot_len = htons(skb->len);
-
-	LeaveFunction(9);
-	return 0;
-}
-
-
 int __init ip_vs_app_init(void)
 {
 	/* we will replace it with proc_net_ipvs_create() soon */
diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c
index 58f82dfc950a..4f8ddba48011 100644
--- a/net/netfilter/ipvs/ip_vs_core.c
+++ b/net/netfilter/ipvs/ip_vs_core.c
@@ -54,7 +54,6 @@
 
 EXPORT_SYMBOL(register_ip_vs_scheduler);
 EXPORT_SYMBOL(unregister_ip_vs_scheduler);
-EXPORT_SYMBOL(ip_vs_skb_replace);
 EXPORT_SYMBOL(ip_vs_proto_name);
 EXPORT_SYMBOL(ip_vs_conn_new);
 EXPORT_SYMBOL(ip_vs_conn_in_get);
diff --git a/net/netfilter/ipvs/ip_vs_ftp.c b/net/netfilter/ipvs/ip_vs_ftp.c
index 2ae747a376a5..f228a17ec649 100644
--- a/net/netfilter/ipvs/ip_vs_ftp.c
+++ b/net/netfilter/ipvs/ip_vs_ftp.c
@@ -20,6 +20,17 @@
  *
  * Author:	Wouter Gadeyne
  *
+ *
+ * Code for ip_vs_expect_related and ip_vs_expect_callback is taken from
+ * http://www.ssi.bg/~ja/nfct/:
+ *
+ * ip_vs_nfct.c:	Netfilter connection tracking support for IPVS
+ *
+ * Portions Copyright (C) 2001-2002
+ * Antefacto Ltd, 181 Parnell St, Dublin 1, Ireland.
+ *
+ * Portions Copyright (C) 2003-2008
+ * Julian Anastasov
  */
 
 #define KMSG_COMPONENT "IPVS"
@@ -32,6 +43,9 @@
 #include <linux/in.h>
 #include <linux/ip.h>
 #include <linux/netfilter.h>
+#include <net/netfilter/nf_conntrack.h>
+#include <net/netfilter/nf_conntrack_expect.h>
+#include <net/netfilter/nf_nat_helper.h>
 #include <linux/gfp.h>
 #include <net/protocol.h>
 #include <net/tcp.h>
@@ -43,6 +57,16 @@
 #define SERVER_STRING "227 Entering Passive Mode ("
 #define CLIENT_STRING "PORT "
 
+#define FMT_TUPLE	"%pI4:%u->%pI4:%u/%u"
+#define ARG_TUPLE(T)	&(T)->src.u3.ip, ntohs((T)->src.u.all), \
+			&(T)->dst.u3.ip, ntohs((T)->dst.u.all), \
+			(T)->dst.protonum
+
+#define FMT_CONN	"%pI4:%u->%pI4:%u->%pI4:%u/%u:%u"
+#define ARG_CONN(C)	&((C)->caddr.ip), ntohs((C)->cport), \
+			&((C)->vaddr.ip), ntohs((C)->vport), \
+			&((C)->daddr.ip), ntohs((C)->dport), \
+			(C)->protocol, (C)->state
 
 /*
  * List of ports (up to IP_VS_APP_MAX_PORTS) to be handled by helper
@@ -123,6 +147,119 @@ static int ip_vs_ftp_get_addrport(char *data, char *data_limit,
 	return 1;
 }
 
+/*
+ * Called from init_conntrack() as expectfn handler.
+ */
+static void
+ip_vs_expect_callback(struct nf_conn *ct,
+		      struct nf_conntrack_expect *exp)
+{
+	struct nf_conntrack_tuple *orig, new_reply;
+	struct ip_vs_conn *cp;
+
+	if (exp->tuple.src.l3num != PF_INET)
+		return;
+
+	/*
+	 * We assume that no NF locks are held before this callback.
+	 * ip_vs_conn_out_get and ip_vs_conn_in_get should match their
+	 * expectations even if they use wildcard values, now we provide the
+	 * actual values from the newly created original conntrack direction.
+	 * The conntrack is confirmed when packet reaches IPVS hooks.
+	 */
+
+	/* RS->CLIENT */
+	orig = &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple;
+	cp = ip_vs_conn_out_get(exp->tuple.src.l3num, orig->dst.protonum,
+				&orig->src.u3, orig->src.u.tcp.port,
+				&orig->dst.u3, orig->dst.u.tcp.port);
+	if (cp) {
+		/* Change reply CLIENT->RS to CLIENT->VS */
+		new_reply = ct->tuplehash[IP_CT_DIR_REPLY].tuple;
+		IP_VS_DBG(7, "%s(): ct=%p, status=0x%lX, tuples=" FMT_TUPLE ", "
+			  FMT_TUPLE ", found inout cp=" FMT_CONN "\n",
+			  __func__, ct, ct->status,
+			  ARG_TUPLE(orig), ARG_TUPLE(&new_reply),
+			  ARG_CONN(cp));
+		new_reply.dst.u3 = cp->vaddr;
+		new_reply.dst.u.tcp.port = cp->vport;
+		IP_VS_DBG(7, "%s(): ct=%p, new tuples=" FMT_TUPLE ", " FMT_TUPLE
+			  ", inout cp=" FMT_CONN "\n",
+			  __func__, ct,
+			  ARG_TUPLE(orig), ARG_TUPLE(&new_reply),
+			  ARG_CONN(cp));
+		goto alter;
+	}
+
+	/* CLIENT->VS */
+	cp = ip_vs_conn_in_get(exp->tuple.src.l3num, orig->dst.protonum,
+			       &orig->src.u3, orig->src.u.tcp.port,
+			       &orig->dst.u3, orig->dst.u.tcp.port);
+	if (cp) {
+		/* Change reply VS->CLIENT to RS->CLIENT */
+		new_reply = ct->tuplehash[IP_CT_DIR_REPLY].tuple;
+		IP_VS_DBG(7, "%s(): ct=%p, status=0x%lX, tuples=" FMT_TUPLE ", "
+			  FMT_TUPLE ", found outin cp=" FMT_CONN "\n",
+			  __func__, ct, ct->status,
+			  ARG_TUPLE(orig), ARG_TUPLE(&new_reply),
+			  ARG_CONN(cp));
+		new_reply.src.u3 = cp->daddr;
+		new_reply.src.u.tcp.port = cp->dport;
+		IP_VS_DBG(7, "%s(): ct=%p, new tuples=" FMT_TUPLE ", "
+			  FMT_TUPLE ", outin cp=" FMT_CONN "\n",
+			  __func__, ct,
+			  ARG_TUPLE(orig), ARG_TUPLE(&new_reply),
+			  ARG_CONN(cp));
+		goto alter;
+	}
+
+	IP_VS_DBG(7, "%s(): ct=%p, status=0x%lX, tuple=" FMT_TUPLE
+		  " - unknown expect\n",
+		  __func__, ct, ct->status, ARG_TUPLE(orig));
+	return;
+
+alter:
+	/* Never alter conntrack for non-NAT conns */
+	if (IP_VS_FWD_METHOD(cp) == IP_VS_CONN_F_MASQ)
+		nf_conntrack_alter_reply(ct, &new_reply);
+	ip_vs_conn_put(cp);
+	return;
+}
+
+/*
+ * Create NF conntrack expectation with wildcard (optional) source port.
+ * Then the default callback function will alter the reply and will confirm
+ * the conntrack entry when the first packet comes.
+ */
+static void
+ip_vs_expect_related(struct sk_buff *skb, struct nf_conn *ct,
+		     struct ip_vs_conn *cp, u_int8_t proto,
+		     const __be16 *port, int from_rs)
+{
+	struct nf_conntrack_expect *exp;
+
+	BUG_ON(!ct || ct == &nf_conntrack_untracked);
+
+	exp = nf_ct_expect_alloc(ct);
+	if (!exp)
+		return;
+
+	if (from_rs)
+		nf_ct_expect_init(exp, NF_CT_EXPECT_CLASS_DEFAULT,
+				  nf_ct_l3num(ct), &cp->daddr, &cp->caddr,
+				  proto, port, &cp->cport);
+	else
+		nf_ct_expect_init(exp, NF_CT_EXPECT_CLASS_DEFAULT,
+				  nf_ct_l3num(ct), &cp->caddr, &cp->vaddr,
+				  proto, port, &cp->vport);
+
+	exp->expectfn = ip_vs_expect_callback;
+
+	IP_VS_DBG(7, "%s(): ct=%p, expect tuple=" FMT_TUPLE "\n",
+		  __func__, ct, ARG_TUPLE(&exp->tuple));
+	nf_ct_expect_related(exp);
+	nf_ct_expect_put(exp);
+}
 
 /*
  * Look at outgoing ftp packets to catch the response to a PASV command
@@ -149,7 +286,9 @@ static int ip_vs_ftp_out(struct ip_vs_app *app, struct ip_vs_conn *cp,
 	struct ip_vs_conn *n_cp;
 	char buf[24];		/* xxx.xxx.xxx.xxx,ppp,ppp\000 */
 	unsigned buf_len;
-	int ret;
+	int ret = 0;
+	enum ip_conntrack_info ctinfo;
+	struct nf_conn *ct;
 
 #ifdef CONFIG_IP_VS_IPV6
 	/* This application helper doesn't work with IPv6 yet,
@@ -219,19 +358,26 @@ static int ip_vs_ftp_out(struct ip_vs_app *app, struct ip_vs_conn *cp,
 
 		buf_len = strlen(buf);
 
+		ct = nf_ct_get(skb, &ctinfo);
+		if (ct && !nf_ct_is_untracked(ct)) {
+			/* If mangling fails this function will return 0
+			 * which will cause the packet to be dropped.
+			 * Mangling can only fail under memory pressure,
+			 * hopefully it will succeed on the retransmitted
+			 * packet.
+			 */
+			ret = nf_nat_mangle_tcp_packet(skb, ct, ctinfo,
+						       start-data, end-start,
+						       buf, buf_len);
+			if (ret)
+				ip_vs_expect_related(skb, ct, n_cp,
+						     IPPROTO_TCP, NULL, 0);
+		}
+
 		/*
-		 * Calculate required delta-offset to keep TCP happy
+		 * Not setting 'diff' is intentional, otherwise the sequence
+		 * would be adjusted twice.
 		 */
-		*diff = buf_len - (end-start);
-
-		if (*diff == 0) {
-			/* simply replace it with new passive address */
-			memcpy(start, buf, buf_len);
-			ret = 1;
-		} else {
-			ret = !ip_vs_skb_replace(skb, GFP_ATOMIC, start,
-					  end-start, buf, buf_len);
-		}
 
 		cp->app_data = NULL;
 		ip_vs_tcp_conn_listen(n_cp);
@@ -263,6 +409,7 @@ static int ip_vs_ftp_in(struct ip_vs_app *app, struct ip_vs_conn *cp,
 	union nf_inet_addr to;
 	__be16 port;
 	struct ip_vs_conn *n_cp;
+	struct nf_conn *ct;
 
 #ifdef CONFIG_IP_VS_IPV6
 	/* This application helper doesn't work with IPv6 yet,
@@ -349,6 +496,11 @@ static int ip_vs_ftp_in(struct ip_vs_app *app, struct ip_vs_conn *cp,
 		ip_vs_control_add(n_cp, cp);
 	}
 
+	ct = (struct nf_conn *)skb->nfct;
+	if (ct && ct != &nf_conntrack_untracked)
+		ip_vs_expect_related(skb, ct, n_cp,
+				     IPPROTO_TCP, &n_cp->dport, 1);
+
 	/*
 	 *	Move tunnel to listen state
 	 */
-- 
cgit v1.2.2


From e8648a1fdb54da1f683784b36a17aa65ea56e931 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Fri, 23 Jul 2010 12:59:36 +0200
Subject: netfilter: add xt_cpu match

In some situations a CPU match permits a better spreading of
connections, or select targets only for a given cpu.

With Remote Packet Steering or multiqueue NIC and appropriate IRQ
affinities, we can distribute trafic on available cpus, per session.
(all RX packets for a given flow is handled by a given cpu)

Some legacy applications being not SMP friendly, one way to scale a
server is to run multiple copies of them.

Instead of randomly choosing an instance, we can use the cpu number as a
key so that softirq handler for a whole instance is running on a single
cpu, maximizing cache effects in TCP/UDP stacks.

Using NAT for example, a four ways machine might run four copies of
server application, using a separate listening port for each instance,
but still presenting an unique external port :

iptables -t nat -A PREROUTING -p tcp --dport 80 -m cpu --cpu 0 \
        -j REDIRECT --to-port 8080

iptables -t nat -A PREROUTING -p tcp --dport 80 -m cpu --cpu 1 \
        -j REDIRECT --to-port 8081

iptables -t nat -A PREROUTING -p tcp --dport 80 -m cpu --cpu 2 \
        -j REDIRECT --to-port 8082

iptables -t nat -A PREROUTING -p tcp --dport 80 -m cpu --cpu 3 \
        -j REDIRECT --to-port 8083

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 net/netfilter/Kconfig  |  9 ++++++++
 net/netfilter/Makefile |  1 +
 net/netfilter/xt_cpu.c | 63 ++++++++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 73 insertions(+)
 create mode 100644 net/netfilter/xt_cpu.c

(limited to 'net')

diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig
index 551b58419df9..43288259f4a1 100644
--- a/net/netfilter/Kconfig
+++ b/net/netfilter/Kconfig
@@ -663,6 +663,15 @@ config NETFILTER_XT_MATCH_CONNTRACK
 
 	  To compile it as a module, choose M here.  If unsure, say N.
 
+config NETFILTER_XT_MATCH_CPU
+	tristate '"cpu" match support'
+	depends on NETFILTER_ADVANCED
+	help
+	  CPU matching allows you to match packets based on the CPU
+	  currently handling the packet.
+
+	  To compile it as a module, choose M here.  If unsure, say N.
+
 config NETFILTER_XT_MATCH_DCCP
 	tristate '"dccp" protocol match support'
 	depends on NETFILTER_ADVANCED
diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile
index 4366c79a6683..441050f31111 100644
--- a/net/netfilter/Makefile
+++ b/net/netfilter/Makefile
@@ -70,6 +70,7 @@ obj-$(CONFIG_NETFILTER_XT_MATCH_COMMENT) += xt_comment.o
 obj-$(CONFIG_NETFILTER_XT_MATCH_CONNBYTES) += xt_connbytes.o
 obj-$(CONFIG_NETFILTER_XT_MATCH_CONNLIMIT) += xt_connlimit.o
 obj-$(CONFIG_NETFILTER_XT_MATCH_CONNTRACK) += xt_conntrack.o
+obj-$(CONFIG_NETFILTER_XT_MATCH_CPU) += xt_cpu.o
 obj-$(CONFIG_NETFILTER_XT_MATCH_DCCP) += xt_dccp.o
 obj-$(CONFIG_NETFILTER_XT_MATCH_DSCP) += xt_dscp.o
 obj-$(CONFIG_NETFILTER_XT_MATCH_ESP) += xt_esp.o
diff --git a/net/netfilter/xt_cpu.c b/net/netfilter/xt_cpu.c
new file mode 100644
index 000000000000..b39db8a5cbae
--- /dev/null
+++ b/net/netfilter/xt_cpu.c
@@ -0,0 +1,63 @@
+/* Kernel module to match running CPU */
+
+/*
+ * Might be used to distribute connections on several daemons, if
+ * RPS (Remote Packet Steering) is enabled or NIC is multiqueue capable,
+ * each RX queue IRQ affined to one CPU (1:1 mapping)
+ *
+ */
+
+/* (C) 2010 Eric Dumazet
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/module.h>
+#include <linux/skbuff.h>
+#include <linux/netfilter/xt_cpu.h>
+#include <linux/netfilter/x_tables.h>
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Eric Dumazet <eric.dumazet@gmail.com>");
+MODULE_DESCRIPTION("Xtables: CPU match");
+
+static int cpu_mt_check(const struct xt_mtchk_param *par)
+{
+	const struct xt_cpu_info *info = par->matchinfo;
+
+	if (info->invert & ~1)
+		return -EINVAL;
+	return 0;
+}
+
+static bool cpu_mt(const struct sk_buff *skb, struct xt_action_param *par)
+{
+	const struct xt_cpu_info *info = par->matchinfo;
+
+	return (info->cpu == smp_processor_id()) ^ info->invert;
+}
+
+static struct xt_match cpu_mt_reg __read_mostly = {
+	.name       = "cpu",
+	.revision   = 0,
+	.family     = NFPROTO_UNSPEC,
+	.checkentry = cpu_mt_check,
+	.match      = cpu_mt,
+	.matchsize  = sizeof(struct xt_cpu_info),
+	.me         = THIS_MODULE,
+};
+
+static int __init cpu_mt_init(void)
+{
+	return xt_register_match(&cpu_mt_reg);
+}
+
+static void __exit cpu_mt_exit(void)
+{
+	xt_unregister_match(&cpu_mt_reg);
+}
+
+module_init(cpu_mt_init);
+module_exit(cpu_mt_exit);
-- 
cgit v1.2.2


From c36952e524b2b898a3c9e9b137f64d72d43cd393 Mon Sep 17 00:00:00 2001
From: Changli Gao <xiaosuo@gmail.com>
Date: Fri, 23 Jul 2010 13:27:08 +0200
Subject: netfilter: nf_nat_core: merge the same lines

proto->unique_tuple() will be called finally, if the previous calls fail. This
patch checks the false condition of (range->flags &IP_NAT_RANGE_PROTO_RANDOM)
instead to avoid duplicate line of code: proto->unique_tuple().

Signed-off-by: Changli Gao <xiaosuo@gmail.com>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 net/ipv4/netfilter/nf_nat_core.c | 9 ++-------
 1 file changed, 2 insertions(+), 7 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/netfilter/nf_nat_core.c b/net/ipv4/netfilter/nf_nat_core.c
index c7719b283ada..037a3a659930 100644
--- a/net/ipv4/netfilter/nf_nat_core.c
+++ b/net/ipv4/netfilter/nf_nat_core.c
@@ -261,14 +261,9 @@ get_unique_tuple(struct nf_conntrack_tuple *tuple,
 	rcu_read_lock();
 	proto = __nf_nat_proto_find(orig_tuple->dst.protonum);
 
-	/* Change protocol info to have some randomization */
-	if (range->flags & IP_NAT_RANGE_PROTO_RANDOM) {
-		proto->unique_tuple(tuple, range, maniptype, ct);
-		goto out;
-	}
-
 	/* Only bother mapping if it's not already in range and unique */
-	if ((!(range->flags & IP_NAT_RANGE_PROTO_SPECIFIED) ||
+	if (!(range->flags & IP_NAT_RANGE_PROTO_RANDOM) &&
+	    (!(range->flags & IP_NAT_RANGE_PROTO_SPECIFIED) ||
 	     proto->in_range(tuple, maniptype, &range->min, &range->max)) &&
 	    !nf_nat_used_tuple(tuple, ct))
 		goto out;
-- 
cgit v1.2.2


From f667009ecc3304248727236ff88f9070f918355f Mon Sep 17 00:00:00 2001
From: Changli Gao <xiaosuo@gmail.com>
Date: Fri, 23 Jul 2010 13:40:53 +0200
Subject: netfilter: arptables: use arp_hdr_len()

use arp_hdr_len().

Signed-off-by: Changli Gao <xiaosuo@gmail.com>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 net/ipv4/netfilter/arp_tables.c | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c
index 16c0ba0a2728..c868dd53e432 100644
--- a/net/ipv4/netfilter/arp_tables.c
+++ b/net/ipv4/netfilter/arp_tables.c
@@ -283,16 +283,13 @@ unsigned int arpt_do_table(struct sk_buff *skb,
 	arp = arp_hdr(skb);
 	do {
 		const struct arpt_entry_target *t;
-		int hdr_len;
 
 		if (!arp_packet_match(arp, skb->dev, indev, outdev, &e->arp)) {
 			e = arpt_next_entry(e);
 			continue;
 		}
 
-		hdr_len = sizeof(*arp) + (2 * sizeof(struct in_addr)) +
-			(2 * skb->dev->addr_len);
-		ADD_COUNTER(e->counters, hdr_len, 1);
+		ADD_COUNTER(e->counters, arp_hdr_len(skb->dev), 1);
 
 		t = arpt_get_target_c(e);
 
-- 
cgit v1.2.2


From b0c81aa566cccbe26bd99237873fb8428827d82f Mon Sep 17 00:00:00 2001
From: Changli Gao <xiaosuo@gmail.com>
Date: Fri, 23 Jul 2010 14:04:03 +0200
Subject: netfilter: xt_quota: use per-rule spin lock

Use per-rule spin lock to improve the scalability.

Signed-off-by: Changli Gao <xiaosuo@gmail.com>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 net/netfilter/xt_quota.c | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

(limited to 'net')

diff --git a/net/netfilter/xt_quota.c b/net/netfilter/xt_quota.c
index b4f7dfea5980..304b1fda1a0d 100644
--- a/net/netfilter/xt_quota.c
+++ b/net/netfilter/xt_quota.c
@@ -11,7 +11,8 @@
 #include <linux/netfilter/xt_quota.h>
 
 struct xt_quota_priv {
-	uint64_t quota;
+	spinlock_t	lock;
+	uint64_t	quota;
 };
 
 MODULE_LICENSE("GPL");
@@ -20,8 +21,6 @@ MODULE_DESCRIPTION("Xtables: countdown quota match");
 MODULE_ALIAS("ipt_quota");
 MODULE_ALIAS("ip6t_quota");
 
-static DEFINE_SPINLOCK(quota_lock);
-
 static bool
 quota_mt(const struct sk_buff *skb, struct xt_action_param *par)
 {
@@ -29,7 +28,7 @@ quota_mt(const struct sk_buff *skb, struct xt_action_param *par)
 	struct xt_quota_priv *priv = q->master;
 	bool ret = q->flags & XT_QUOTA_INVERT;
 
-	spin_lock_bh(&quota_lock);
+	spin_lock_bh(&priv->lock);
 	if (priv->quota >= skb->len) {
 		priv->quota -= skb->len;
 		ret = !ret;
@@ -39,7 +38,7 @@ quota_mt(const struct sk_buff *skb, struct xt_action_param *par)
 	}
 	/* Copy quota back to matchinfo so that iptables can display it */
 	q->quota = priv->quota;
-	spin_unlock_bh(&quota_lock);
+	spin_unlock_bh(&priv->lock);
 
 	return ret;
 }
@@ -55,6 +54,7 @@ static int quota_mt_check(const struct xt_mtchk_param *par)
 	if (q->master == NULL)
 		return -ENOMEM;
 
+	spin_lock_init(&q->master->lock);
 	q->master->quota = q->quota;
 	return 0;
 }
-- 
cgit v1.2.2


From 49daf6a22622d4e1619aeaad5f9f0472bf89daff Mon Sep 17 00:00:00 2001
From: Changli Gao <xiaosuo@gmail.com>
Date: Fri, 23 Jul 2010 14:07:47 +0200
Subject: xt_quota: report initial quota value instead of current value to
 userspace

We should copy the initial value to userspace for iptables-save and
to allow removal of specific quota rules.

Signed-off-by: Changli Gao <xiaosuo@gmail.com>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 net/netfilter/xt_quota.c | 2 --
 1 file changed, 2 deletions(-)

(limited to 'net')

diff --git a/net/netfilter/xt_quota.c b/net/netfilter/xt_quota.c
index 304b1fda1a0d..70eb2b4984dd 100644
--- a/net/netfilter/xt_quota.c
+++ b/net/netfilter/xt_quota.c
@@ -36,8 +36,6 @@ quota_mt(const struct sk_buff *skb, struct xt_action_param *par)
 		/* we do not allow even small packets from now on */
 		priv->quota = 0;
 	}
-	/* Copy quota back to matchinfo so that iptables can display it */
-	q->quota = priv->quota;
 	spin_unlock_bh(&priv->lock);
 
 	return ret;
-- 
cgit v1.2.2


From 261abc8c964e018256c1bcbd99b8a208c624d7cc Mon Sep 17 00:00:00 2001
From: Changli Gao <xiaosuo@gmail.com>
Date: Fri, 23 Jul 2010 16:24:34 +0200
Subject: netfilter: ip6tables: use skb->len for accounting

ipv6_hdr(skb)->payload_len is ZERO and can't be used for accounting, if
the payload is a Jumbo Payload specified in RFC2675.

Signed-off-by: Changli Gao <xiaosuo@gmail.com>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 net/ipv6/netfilter/ip6_tables.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

(limited to 'net')

diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c
index dc41d6d3c6c6..33113c1ea02f 100644
--- a/net/ipv6/netfilter/ip6_tables.c
+++ b/net/ipv6/netfilter/ip6_tables.c
@@ -387,9 +387,7 @@ ip6t_do_table(struct sk_buff *skb,
 				goto no_match;
 		}
 
-		ADD_COUNTER(e->counters,
-			    ntohs(ipv6_hdr(skb)->payload_len) +
-			    sizeof(struct ipv6hdr), 1);
+		ADD_COUNTER(e->counters, skb->len, 1);
 
 		t = ip6t_get_target_c(e);
 		IP_NF_ASSERT(t->u.kernel.target);
-- 
cgit v1.2.2


From 7df0884ce144396fc151f2af7a73d5fb305f9b03 Mon Sep 17 00:00:00 2001
From: Changli Gao <xiaosuo@gmail.com>
Date: Fri, 23 Jul 2010 16:25:11 +0200
Subject: netfilter: iptables: use skb->len for accounting

Use skb->len for accounting as xt_quota does.

Signed-off-by: Changli Gao <xiaosuo@gmail.com>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 net/ipv4/netfilter/ip_tables.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c
index b38c11810c65..3c584a6765b0 100644
--- a/net/ipv4/netfilter/ip_tables.c
+++ b/net/ipv4/netfilter/ip_tables.c
@@ -364,7 +364,7 @@ ipt_do_table(struct sk_buff *skb,
 				goto no_match;
 		}
 
-		ADD_COUNTER(e->counters, ntohs(ip->tot_len), 1);
+		ADD_COUNTER(e->counters, skb->len, 1);
 
 		t = ipt_get_target(e);
 		IP_NF_ASSERT(t->u.kernel.target);
-- 
cgit v1.2.2


From 451e07a26483f3234f1df6b1f60b949dab4e7719 Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andy.shevchenko@gmail.com>
Date: Fri, 23 Jul 2010 03:18:10 +0000
Subject: net: core: don't use own hex_to_bin() method

Signed-off-by: Andy Shevchenko <andy.shevchenko@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/pktgen.c | 36 ++++++++++++------------------------
 1 file changed, 12 insertions(+), 24 deletions(-)

(limited to 'net')

diff --git a/net/core/pktgen.c b/net/core/pktgen.c
index 24a19debda1b..10a1ea72010d 100644
--- a/net/core/pktgen.c
+++ b/net/core/pktgen.c
@@ -1434,18 +1434,12 @@ static ssize_t pktgen_if_write(struct file *file,
 		i += len;
 
 		for (*m = 0; *v && m < pkt_dev->dst_mac + 6; v++) {
-			if (*v >= '0' && *v <= '9') {
-				*m *= 16;
-				*m += *v - '0';
-			}
-			if (*v >= 'A' && *v <= 'F') {
-				*m *= 16;
-				*m += *v - 'A' + 10;
-			}
-			if (*v >= 'a' && *v <= 'f') {
-				*m *= 16;
-				*m += *v - 'a' + 10;
-			}
+			int value;
+
+			value = hex_to_bin(*v);
+			if (value >= 0)
+				*m = *m * 16 + value;
+
 			if (*v == ':') {
 				m++;
 				*m = 0;
@@ -1476,18 +1470,12 @@ static ssize_t pktgen_if_write(struct file *file,
 		i += len;
 
 		for (*m = 0; *v && m < pkt_dev->src_mac + 6; v++) {
-			if (*v >= '0' && *v <= '9') {
-				*m *= 16;
-				*m += *v - '0';
-			}
-			if (*v >= 'A' && *v <= 'F') {
-				*m *= 16;
-				*m += *v - 'A' + 10;
-			}
-			if (*v >= 'a' && *v <= 'f') {
-				*m *= 16;
-				*m += *v - 'a' + 10;
-			}
+			int value;
+
+			value = hex_to_bin(*v);
+			if (value >= 0)
+				*m = *m * 16 + value;
+
 			if (*v == ':') {
 				m++;
 				*m = 0;
-- 
cgit v1.2.2


From c1f79426e2df5ef96fe3e76de6c7606d15bf390b Mon Sep 17 00:00:00 2001
From: Stefan Assmann <sassmann@redhat.com>
Date: Thu, 22 Jul 2010 02:50:21 +0000
Subject: sysfs: add attribute to indicate hw address assignment type

Add addr_assign_type to struct net_device and expose it via sysfs.
This new attribute has the purpose of giving user-space the ability to
distinguish between different assignment types of MAC addresses.

For example user-space can treat NICs with randomly generated MAC
addresses differently than NICs that have permanent (locally assigned)
MAC addresses.
For the former udev could write a persistent net rule by matching the
device path instead of the MAC address.
There's also the case of devices that 'steal' MAC addresses from slave
devices. In which it is also be beneficial for user-space to be aware
of the fact.

This patch also introduces a helper function to assist adoption of
drivers that generate MAC addresses randomly.

Signed-off-by: Stefan Assmann <sassmann@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/net-sysfs.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'net')

diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c
index d2b596537d41..af4dfbadf2a0 100644
--- a/net/core/net-sysfs.c
+++ b/net/core/net-sysfs.c
@@ -95,6 +95,7 @@ static ssize_t netdev_store(struct device *dev, struct device_attribute *attr,
 }
 
 NETDEVICE_SHOW(dev_id, fmt_hex);
+NETDEVICE_SHOW(addr_assign_type, fmt_dec);
 NETDEVICE_SHOW(addr_len, fmt_dec);
 NETDEVICE_SHOW(iflink, fmt_dec);
 NETDEVICE_SHOW(ifindex, fmt_dec);
@@ -295,6 +296,7 @@ static ssize_t show_ifalias(struct device *dev,
 }
 
 static struct device_attribute net_class_attributes[] = {
+	__ATTR(addr_assign_type, S_IRUGO, show_addr_assign_type, NULL),
 	__ATTR(addr_len, S_IRUGO, show_addr_len, NULL),
 	__ATTR(dev_id, S_IRUGO, show_dev_id, NULL),
 	__ATTR(ifalias, S_IRUGO | S_IWUSR, show_ifalias, store_ifalias),
-- 
cgit v1.2.2


From 3b87956ea645fb4de7e59c7d0aa94de04be72615 Mon Sep 17 00:00:00 2001
From: stephen hemminger <shemminger@vyatta.com>
Date: Thu, 22 Jul 2010 18:45:04 +0000
Subject: net sched: fix race in mirred device removal

This fixes hang when target device of mirred packet classifier
action is removed.

If a mirror or redirection action is configured to cause packets
to go to another device, the classifier holds a ref count, but was assuming
the adminstrator cleaned up all redirections before removing. The fix
is to add a notifier and cleanup during unregister.

The new list is implicitly protected by RTNL mutex because
it is held during filter add/delete as well as notifier.

Signed-off-by: Stephen Hemminger <shemminger@vyatta.com>
Acked-by: Jamal Hadi Salim <hadi@cyberus.ca>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/act_mirred.c | 43 ++++++++++++++++++++++++++++++++++++++++---
 1 file changed, 40 insertions(+), 3 deletions(-)

(limited to 'net')

diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c
index c0b6863e3b87..1980b71c283f 100644
--- a/net/sched/act_mirred.c
+++ b/net/sched/act_mirred.c
@@ -33,6 +33,7 @@
 static struct tcf_common *tcf_mirred_ht[MIRRED_TAB_MASK + 1];
 static u32 mirred_idx_gen;
 static DEFINE_RWLOCK(mirred_lock);
+static LIST_HEAD(mirred_list);
 
 static struct tcf_hashinfo mirred_hash_info = {
 	.htab	=	tcf_mirred_ht,
@@ -47,7 +48,9 @@ static inline int tcf_mirred_release(struct tcf_mirred *m, int bind)
 			m->tcf_bindcnt--;
 		m->tcf_refcnt--;
 		if(!m->tcf_bindcnt && m->tcf_refcnt <= 0) {
-			dev_put(m->tcfm_dev);
+			list_del(&m->tcfm_list);
+			if (m->tcfm_dev)
+				dev_put(m->tcfm_dev);
 			tcf_hash_destroy(&m->common, &mirred_hash_info);
 			return 1;
 		}
@@ -134,8 +137,10 @@ static int tcf_mirred_init(struct nlattr *nla, struct nlattr *est,
 		m->tcfm_ok_push = ok_push;
 	}
 	spin_unlock_bh(&m->tcf_lock);
-	if (ret == ACT_P_CREATED)
+	if (ret == ACT_P_CREATED) {
+		list_add(&m->tcfm_list, &mirred_list);
 		tcf_hash_insert(pc, &mirred_hash_info);
+	}
 
 	return ret;
 }
@@ -162,9 +167,14 @@ static int tcf_mirred(struct sk_buff *skb, struct tc_action *a,
 	m->tcf_tm.lastuse = jiffies;
 
 	dev = m->tcfm_dev;
+	if (!dev) {
+		printk_once(KERN_NOTICE "tc mirred: target device is gone\n");
+		goto out;
+	}
+
 	if (!(dev->flags & IFF_UP)) {
 		if (net_ratelimit())
-			pr_notice("tc mirred to Houston: device %s is gone!\n",
+			pr_notice("tc mirred to Houston: device %s is down\n",
 				  dev->name);
 		goto out;
 	}
@@ -232,6 +242,28 @@ nla_put_failure:
 	return -1;
 }
 
+static int mirred_device_event(struct notifier_block *unused,
+			       unsigned long event, void *ptr)
+{
+	struct net_device *dev = ptr;
+	struct tcf_mirred *m;
+
+	if (event == NETDEV_UNREGISTER)
+		list_for_each_entry(m, &mirred_list, tcfm_list) {
+			if (m->tcfm_dev == dev) {
+				dev_put(dev);
+				m->tcfm_dev = NULL;
+			}
+		}
+
+	return NOTIFY_DONE;
+}
+
+static struct notifier_block mirred_device_notifier = {
+	.notifier_call = mirred_device_event,
+};
+
+
 static struct tc_action_ops act_mirred_ops = {
 	.kind		=	"mirred",
 	.hinfo		=	&mirred_hash_info,
@@ -252,12 +284,17 @@ MODULE_LICENSE("GPL");
 
 static int __init mirred_init_module(void)
 {
+	int err = register_netdevice_notifier(&mirred_device_notifier);
+	if (err)
+		return err;
+
 	pr_info("Mirror/redirect action on\n");
 	return tcf_register_action(&act_mirred_ops);
 }
 
 static void __exit mirred_cleanup_module(void)
 {
+	unregister_netdevice_notifier(&mirred_device_notifier);
 	tcf_unregister_action(&act_mirred_ops);
 }
 
-- 
cgit v1.2.2


From fed66381d65a35198639f564365e61a7f256bf79 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Thu, 22 Jul 2010 19:09:08 +0000
Subject: net: pskb_expand_head() optimization

Move frags[] at the end of struct skb_shared_info, and make
pskb_expand_head() copy only the used part of it instead of whole array.

This should avoid kmemcheck warnings and speedup pskb_expand_head() as
well, avoiding a lot of cache misses.

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/skbuff.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 76d33ca5f037..7da58a25ad92 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -817,7 +817,7 @@ int pskb_expand_head(struct sk_buff *skb, int nhead, int ntail,
 	memcpy(data + nhead, skb->head, skb->tail - skb->head);
 #endif
 	memcpy(data + size, skb_end_pointer(skb),
-	       sizeof(struct skb_shared_info));
+	       offsetof(struct skb_shared_info, frags[skb_shinfo(skb)->nr_frags]));
 
 	for (i = 0; i < skb_shinfo(skb)->nr_frags; i++)
 		get_page(skb_shinfo(skb)->frags[i].page);
-- 
cgit v1.2.2


From c736eefadb71a01a5e61e0de700f28f6952b4444 Mon Sep 17 00:00:00 2001
From: Ben Greear <greearb@candelatech.com>
Date: Thu, 22 Jul 2010 09:54:47 +0000
Subject: net: dev_forward_skb should call nf_reset

With conn-track zones and probably with different network
namespaces, the netfilter logic needs to be re-calculated
on packet receive.  If the netfilter logic is not reset,
it will not be recalculated properly.  This patch adds
the nf_reset logic to dev_forward_skb.

Signed-off-by: Ben Greear <greearb@candelatech.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/dev.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'net')

diff --git a/net/core/dev.c b/net/core/dev.c
index 0ea10f849be8..1f466e82ac33 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -1488,6 +1488,7 @@ static inline void net_timestamp_check(struct sk_buff *skb)
 int dev_forward_skb(struct net_device *dev, struct sk_buff *skb)
 {
 	skb_orphan(skb);
+	nf_reset(skb);
 
 	if (!(dev->flags & IFF_UP) ||
 	    (skb->len > (dev->mtu + dev->hard_header_len))) {
-- 
cgit v1.2.2


From 00fc90c886220efe8b634d3f2a04713fcebbe2c0 Mon Sep 17 00:00:00 2001
From: "John W. Linville" <linville@tuxdriver.com>
Date: Thu, 22 Jul 2010 15:36:02 -0400
Subject: minstrel_ht: remove unnecessary NULL check in minstrel_ht_update_caps

If sta is NULL, we will have problems long before we get here...

Reported-by: Dan Carpenter <error27@gmail.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
Cc: Felix Fietkau <nbd@openwrt.org>
---
 net/mac80211/rc80211_minstrel_ht.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/mac80211/rc80211_minstrel_ht.c b/net/mac80211/rc80211_minstrel_ht.c
index b5ace243546c..a16694bb634b 100644
--- a/net/mac80211/rc80211_minstrel_ht.c
+++ b/net/mac80211/rc80211_minstrel_ht.c
@@ -636,7 +636,7 @@ minstrel_ht_update_caps(void *priv, struct ieee80211_supported_band *sband,
 	int i;
 
 	/* fall back to the old minstrel for legacy stations */
-	if (sta && !sta->ht_cap.ht_supported) {
+	if (!sta->ht_cap.ht_supported) {
 		msp->is_ht = false;
 		memset(&msp->legacy, 0, sizeof(msp->legacy));
 		msp->legacy.r = msp->ratelist;
-- 
cgit v1.2.2


From ea65145da88e9fa04cb15c7a2a550ad1d3fde02a Mon Sep 17 00:00:00 2001
From: "John W. Linville" <linville@tuxdriver.com>
Date: Thu, 22 Jul 2010 15:43:13 -0400
Subject: minstrel: don't complain about feedback for unrequested rates

"It's not problematic if minstrel gets feedback for rates that it
doesn't have in its list, it should just ignore it. - Felix"

Signed-off-by: John W. Linville <linville@tuxdriver.com>
Cc: Felix Fietkau <nbd@openwrt.org>
---
 net/mac80211/rc80211_minstrel.c | 1 -
 1 file changed, 1 deletion(-)

(limited to 'net')

diff --git a/net/mac80211/rc80211_minstrel.c b/net/mac80211/rc80211_minstrel.c
index f65ce6dcc8e2..778c604d7939 100644
--- a/net/mac80211/rc80211_minstrel.c
+++ b/net/mac80211/rc80211_minstrel.c
@@ -67,7 +67,6 @@ rix_to_ndx(struct minstrel_sta_info *mi, int rix)
 	for (i = rix; i >= 0; i--)
 		if (mi->r[i].rix == rix)
 			break;
-	WARN_ON(i < 0);
 	return i;
 }
 
-- 
cgit v1.2.2


From 3289a8368c294726659588d044e354dd3bcf44b3 Mon Sep 17 00:00:00 2001
From: "John W. Linville" <linville@tuxdriver.com>
Date: Thu, 22 Jul 2010 16:31:48 -0400
Subject: lib80211: remove unused host_build_iv option

Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/wireless/lib80211_crypt_ccmp.c | 1 -
 net/wireless/lib80211_crypt_tkip.c | 1 -
 net/wireless/lib80211_crypt_wep.c  | 1 -
 3 files changed, 3 deletions(-)

(limited to 'net')

diff --git a/net/wireless/lib80211_crypt_ccmp.c b/net/wireless/lib80211_crypt_ccmp.c
index b7fa31d5fd13..dacb3b4b1bdb 100644
--- a/net/wireless/lib80211_crypt_ccmp.c
+++ b/net/wireless/lib80211_crypt_ccmp.c
@@ -467,7 +467,6 @@ static struct lib80211_crypto_ops lib80211_crypt_ccmp = {
 	.name = "CCMP",
 	.init = lib80211_ccmp_init,
 	.deinit = lib80211_ccmp_deinit,
-	.build_iv = lib80211_ccmp_hdr,
 	.encrypt_mpdu = lib80211_ccmp_encrypt,
 	.decrypt_mpdu = lib80211_ccmp_decrypt,
 	.encrypt_msdu = NULL,
diff --git a/net/wireless/lib80211_crypt_tkip.c b/net/wireless/lib80211_crypt_tkip.c
index a7f995613f1f..0fe40510e2cb 100644
--- a/net/wireless/lib80211_crypt_tkip.c
+++ b/net/wireless/lib80211_crypt_tkip.c
@@ -757,7 +757,6 @@ static struct lib80211_crypto_ops lib80211_crypt_tkip = {
 	.name = "TKIP",
 	.init = lib80211_tkip_init,
 	.deinit = lib80211_tkip_deinit,
-	.build_iv = lib80211_tkip_hdr,
 	.encrypt_mpdu = lib80211_tkip_encrypt,
 	.decrypt_mpdu = lib80211_tkip_decrypt,
 	.encrypt_msdu = lib80211_michael_mic_add,
diff --git a/net/wireless/lib80211_crypt_wep.c b/net/wireless/lib80211_crypt_wep.c
index 6d41e05ca33b..e2e88878ba35 100644
--- a/net/wireless/lib80211_crypt_wep.c
+++ b/net/wireless/lib80211_crypt_wep.c
@@ -269,7 +269,6 @@ static struct lib80211_crypto_ops lib80211_crypt_wep = {
 	.name = "WEP",
 	.init = lib80211_wep_init,
 	.deinit = lib80211_wep_deinit,
-	.build_iv = lib80211_wep_build_iv,
 	.encrypt_mpdu = lib80211_wep_encrypt,
 	.decrypt_mpdu = lib80211_wep_decrypt,
 	.encrypt_msdu = NULL,
-- 
cgit v1.2.2


From 7e988014cd6dec991f095305256f57168b5610e8 Mon Sep 17 00:00:00 2001
From: Dan Carpenter <error27@gmail.com>
Date: Thu, 22 Jul 2010 13:14:19 +0200
Subject: mac80211: freeing the wrong variable

The intent was to free "msp->ratelist" here.  "msp->sample_table" is
always NULL at this point.

Signed-off-by: Dan Carpenter <error27@gmail.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/rc80211_minstrel_ht.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/mac80211/rc80211_minstrel_ht.c b/net/mac80211/rc80211_minstrel_ht.c
index a16694bb634b..c5b465904e3b 100644
--- a/net/mac80211/rc80211_minstrel_ht.c
+++ b/net/mac80211/rc80211_minstrel_ht.c
@@ -748,7 +748,7 @@ minstrel_ht_alloc_sta(void *priv, struct ieee80211_sta *sta, gfp_t gfp)
 	return msp;
 
 error1:
-	kfree(msp->sample_table);
+	kfree(msp->ratelist);
 error:
 	kfree(msp);
 	return NULL;
-- 
cgit v1.2.2


From f9f9b6e3e3128e2b4d01a6e5ed0bb73cbb9a0a37 Mon Sep 17 00:00:00 2001
From: Dan Carpenter <error27@gmail.com>
Date: Thu, 22 Jul 2010 13:26:50 +0200
Subject: wireless: remove unneeded variable from regulatory_hint_11d()

The "rd" variable isn't needed any more since 4f366c5dabcb
"wireless: only use alpha2 regulatory information from country IE"

Signed-off-by: Dan Carpenter <error27@gmail.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/wireless/reg.c | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

(limited to 'net')

diff --git a/net/wireless/reg.c b/net/wireless/reg.c
index 48baf28cc4b6..ec4e76f95044 100644
--- a/net/wireless/reg.c
+++ b/net/wireless/reg.c
@@ -1492,7 +1492,6 @@ void regulatory_hint_11d(struct wiphy *wiphy,
 			 u8 *country_ie,
 			 u8 country_ie_len)
 {
-	struct ieee80211_regdomain *rd = NULL;
 	char alpha2[2];
 	enum environment_cap env = ENVIRON_ANY;
 	struct regulatory_request *request;
@@ -1529,7 +1528,7 @@ void regulatory_hint_11d(struct wiphy *wiphy,
 
 	request = kzalloc(sizeof(struct regulatory_request), GFP_KERNEL);
 	if (!request)
-		goto free_rd_out;
+		goto out;
 
 	request->wiphy_idx = get_wiphy_idx(wiphy);
 	request->alpha2[0] = alpha2[0];
@@ -1543,8 +1542,6 @@ void regulatory_hint_11d(struct wiphy *wiphy,
 
 	return;
 
-free_rd_out:
-	kfree(rd);
 out:
 	mutex_unlock(&reg_mutex);
 }
-- 
cgit v1.2.2


From 66c524210ab9217528b01b63c43903545e03a58c Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Thu, 22 Jul 2010 13:58:51 +0200
Subject: mac80211: remove bogus rcu_read_lock()

Another remnant of the previous key locking scheme
needs to be removed -- this causes a warning
otherwise as ieee80211_set_default_mgmt_key will
acquire a mutex.

Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/cfg.c | 7 +------
 1 file changed, 1 insertion(+), 6 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c
index 35b07ea0633a..db82da90df76 100644
--- a/net/mac80211/cfg.c
+++ b/net/mac80211/cfg.c
@@ -324,15 +324,10 @@ static int ieee80211_config_default_mgmt_key(struct wiphy *wiphy,
 					     struct net_device *dev,
 					     u8 key_idx)
 {
-	struct ieee80211_sub_if_data *sdata;
-
-	rcu_read_lock();
+	struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
 
-	sdata = IEEE80211_DEV_TO_SUB_IF(dev);
 	ieee80211_set_default_mgmt_key(sdata, key_idx);
 
-	rcu_read_unlock();
-
 	return 0;
 }
 
-- 
cgit v1.2.2


From 3be61a3851c458fb4ce394645e26e8e9670c796a Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Thu, 22 Jul 2010 13:59:15 +0200
Subject: cfg80211: fix IBSS default management key

When wireless extensions are used to control
an encrypted IBSS, we erroneously can try to
set the default management key. Fix this.

Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/wireless/ibss.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/wireless/ibss.c b/net/wireless/ibss.c
index adcabba02e20..27a8ce9343c3 100644
--- a/net/wireless/ibss.c
+++ b/net/wireless/ibss.c
@@ -247,8 +247,10 @@ int cfg80211_ibss_wext_join(struct cfg80211_registered_device *rdev,
 	if (!netif_running(wdev->netdev))
 		return 0;
 
-	if (wdev->wext.keys)
+	if (wdev->wext.keys) {
 		wdev->wext.keys->def = wdev->wext.default_key;
+		wdev->wext.keys->defmgmt = wdev->wext.default_mgmt_key;
+	}
 
 	wdev->wext.ibss.privacy = wdev->wext.default_key != -1;
 
-- 
cgit v1.2.2


From ec25acc46a62db98baaa9b221f33b66af09a1964 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Thu, 22 Jul 2010 17:11:28 +0200
Subject: mac80211: fix sta assignment

I just had the following:
WARNING: at drivers/net/wireless/iwlwifi/iwl-agn-tx.c:574 iwlagn_tx_skb+0x1576/0x15f0 [iwlagn]()
Call Trace:
 <IRQ>  [<ffffffff8105c5df>] warn_slowpath_common+0x7f/0xc0
 [<ffffffff8105c63a>] warn_slowpath_null+0x1a/0x20
 [<ffffffffa0290b46>] iwlagn_tx_skb+0x1576/0x15f0 [iwlagn]
 [<ffffffffa027076c>] iwl_mac_tx+0x5c/0x260 [iwlagn]
 [<ffffffffa01bdf5b>] __ieee80211_tx+0x10b/0x1a0 [mac80211]
 [<ffffffffa01bfb86>] ieee80211_tx_pending+0x186/0x2d0 [mac80211]
 [<ffffffff81062ea5>] tasklet_action+0x125/0x130
 [<ffffffff810634a6>] __do_softirq+0x106/0x270
 [<ffffffff8100c09c>] call_softirq+0x1c/0x30
iwlagn 0000:02:00.0: Attempting to modify non-existing station 107

Note that 107 == 0x6b which is slab poison.

The reason is that mac80211 passed a freed station
pointer to mac80211, because as it happened iwlwifi
reset itself while mac80211 was disconnecting from
the network.

It turns out that we do take care to look up the
station pointer in ieee80211_tx_pending_skb, but
then don't use it, which obviously is a bug. Fix
this by removing the ieee80211_tx_h_sta handler
and assigning the station pointer directly.

Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/tx.c | 17 +++++------------
 1 file changed, 5 insertions(+), 12 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
index 698d4718b1a4..2f8182dc94a1 100644
--- a/net/mac80211/tx.c
+++ b/net/mac80211/tx.c
@@ -575,17 +575,6 @@ ieee80211_tx_h_select_key(struct ieee80211_tx_data *tx)
 	return TX_CONTINUE;
 }
 
-static ieee80211_tx_result debug_noinline
-ieee80211_tx_h_sta(struct ieee80211_tx_data *tx)
-{
-	struct ieee80211_tx_info *info = IEEE80211_SKB_CB(tx->skb);
-
-	if (tx->sta && tx->sta->uploaded)
-		info->control.sta = &tx->sta->sta;
-
-	return TX_CONTINUE;
-}
-
 static ieee80211_tx_result debug_noinline
 ieee80211_tx_h_rate_ctrl(struct ieee80211_tx_data *tx)
 {
@@ -1307,6 +1296,11 @@ static int __ieee80211_tx(struct ieee80211_local *local,
 			break;
 		}
 
+		if (sta && sta->uploaded)
+			info->control.sta = &sta->sta;
+		else
+			info->control.sta = NULL;
+
 		ret = drv_tx(local, skb);
 		if (WARN_ON(ret != NETDEV_TX_OK && skb->len != len)) {
 			dev_kfree_skb(skb);
@@ -1346,7 +1340,6 @@ static int invoke_tx_handlers(struct ieee80211_tx_data *tx)
 	CALL_TXH(ieee80211_tx_h_check_assoc);
 	CALL_TXH(ieee80211_tx_h_ps_buf);
 	CALL_TXH(ieee80211_tx_h_select_key);
-	CALL_TXH(ieee80211_tx_h_sta);
 	if (!(tx->local->hw.flags & IEEE80211_HW_HAS_RATE_CONTROL))
 		CALL_TXH(ieee80211_tx_h_rate_ctrl);
 
-- 
cgit v1.2.2


From d97349797fa76753c747ed8b888414fe78795439 Mon Sep 17 00:00:00 2001
From: Sujith <Sujith.Manoharan@atheros.com>
Date: Fri, 23 Jul 2010 10:47:11 +0530
Subject: mac80211: Don't set per-BSS QoS for monitor interfaces

In AP mode, there is no need to notify the driver about QoS
changes for the monitor interface that is created. The warning
in ieee80211_bss_info_change_notify() would be hit otherwise.

Signed-off-by: Sujith <Sujith.Manoharan@atheros.com>
Acked-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/util.c | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/util.c b/net/mac80211/util.c
index 794792177376..748387d45bc0 100644
--- a/net/mac80211/util.c
+++ b/net/mac80211/util.c
@@ -803,8 +803,12 @@ void ieee80211_set_wmm_default(struct ieee80211_sub_if_data *sdata)
 
 	/* after reinitialize QoS TX queues setting to default,
 	 * disable QoS at all */
-	sdata->vif.bss_conf.qos = sdata->vif.type != NL80211_IFTYPE_STATION;
-	ieee80211_bss_info_change_notify(sdata, BSS_CHANGED_QOS);
+
+	if (sdata->vif.type != NL80211_IFTYPE_MONITOR) {
+		sdata->vif.bss_conf.qos =
+			sdata->vif.type != NL80211_IFTYPE_STATION;
+		ieee80211_bss_info_change_notify(sdata, BSS_CHANGED_QOS);
+	}
 }
 
 void ieee80211_sta_def_wmm_params(struct ieee80211_sub_if_data *sdata,
-- 
cgit v1.2.2


From 1235f504aaba2ebeabc863fdb3ceac764a317d47 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Mon, 26 Jul 2010 13:09:16 -0700
Subject: netlink: netlink_recvmsg() fix

commit 1dacc76d0014
(net/compat/wext: send different messages to compat tasks)
introduced a race condition on netlink, in case MSG_PEEK is used.

An skb given by skb_recv_datagram() might be shared, we must copy it
before any modification, or risk fatal corruption.

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/netlink/af_netlink.c | 22 ++++++++++++++++------
 1 file changed, 16 insertions(+), 6 deletions(-)

(limited to 'net')

diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index 8648a9922aab..2cbf380377d5 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -1406,7 +1406,7 @@ static int netlink_recvmsg(struct kiocb *kiocb, struct socket *sock,
 	struct netlink_sock *nlk = nlk_sk(sk);
 	int noblock = flags&MSG_DONTWAIT;
 	size_t copied;
-	struct sk_buff *skb, *frag __maybe_unused = NULL;
+	struct sk_buff *skb;
 	int err;
 
 	if (flags&MSG_OOB)
@@ -1441,7 +1441,21 @@ static int netlink_recvmsg(struct kiocb *kiocb, struct socket *sock,
 			kfree_skb(skb);
 			skb = compskb;
 		} else {
-			frag = skb_shinfo(skb)->frag_list;
+			/*
+			 * Before setting frag_list to NULL, we must get a
+			 * private copy of skb if shared (because of MSG_PEEK)
+			 */
+			if (skb_shared(skb)) {
+				struct sk_buff *nskb;
+
+				nskb = pskb_copy(skb, GFP_KERNEL);
+				kfree_skb(skb);
+				skb = nskb;
+				err = -ENOMEM;
+				if (!skb)
+					goto out;
+			}
+			kfree_skb(skb_shinfo(skb)->frag_list);
 			skb_shinfo(skb)->frag_list = NULL;
 		}
 	}
@@ -1478,10 +1492,6 @@ static int netlink_recvmsg(struct kiocb *kiocb, struct socket *sock,
 	if (flags & MSG_TRUNC)
 		copied = skb->len;
 
-#ifdef CONFIG_COMPAT_NETLINK_MESSAGES
-	skb_shinfo(skb)->frag_list = frag;
-#endif
-
 	skb_free_datagram(sk, skb);
 
 	if (nlk->cb && atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf / 2)
-- 
cgit v1.2.2


From 40b53d8a4edca018b8edb2fa99c5326642d450fa Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Mon, 26 Jul 2010 13:13:49 -0700
Subject: wireless: Make COMPAT_NETLINK_MESSAGES depend upon WEXT_CORE

WIRELESS_EXT is not the correct dependency.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/Kconfig | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/Kconfig b/net/Kconfig
index b3250944cde9..e24fa0873f32 100644
--- a/net/Kconfig
+++ b/net/Kconfig
@@ -32,7 +32,7 @@ config WANT_COMPAT_NETLINK_MESSAGES
 config COMPAT_NETLINK_MESSAGES
 	def_bool y
 	depends on COMPAT
-	depends on WIRELESS_EXT || WANT_COMPAT_NETLINK_MESSAGES
+	depends on WEXT_CORE || WANT_COMPAT_NETLINK_MESSAGES
 	help
 	  This option makes it possible to send different netlink messages
 	  to tasks depending on whether the task is a compat task or not. To
-- 
cgit v1.2.2


From 416c2f9cf5524cb53392cbcf99fef7aa687192ce Mon Sep 17 00:00:00 2001
From: Changli Gao <xiaosuo@gmail.com>
Date: Sun, 25 Jul 2010 20:46:01 +0000
Subject: genetlink: cleanup code according to CodingStyle

If the function is exported, the EXPORT* macro for it should follow immediately
after the closing function brace line.

Signed-off-by: Changli Gao <xiaosuo@gmail.com>
----
 net/netlink/genetlink.c |    9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/netlink/genetlink.c | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

(limited to 'net')

diff --git a/net/netlink/genetlink.c b/net/netlink/genetlink.c
index aa4308afcc7f..e09e17a90b31 100644
--- a/net/netlink/genetlink.c
+++ b/net/netlink/genetlink.c
@@ -303,6 +303,7 @@ int genl_register_ops(struct genl_family *family, struct genl_ops *ops)
 errout:
 	return err;
 }
+EXPORT_SYMBOL(genl_register_ops);
 
 /**
  * genl_unregister_ops - unregister generic netlink operations
@@ -337,6 +338,7 @@ int genl_unregister_ops(struct genl_family *family, struct genl_ops *ops)
 
 	return -ENOENT;
 }
+EXPORT_SYMBOL(genl_unregister_ops);
 
 /**
  * genl_register_family - register a generic netlink family
@@ -405,6 +407,7 @@ errout_locked:
 errout:
 	return err;
 }
+EXPORT_SYMBOL(genl_register_family);
 
 /**
  * genl_register_family_with_ops - register a generic netlink family
@@ -485,6 +488,7 @@ int genl_unregister_family(struct genl_family *family)
 
 	return -ENOENT;
 }
+EXPORT_SYMBOL(genl_unregister_family);
 
 static int genl_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
 {
@@ -899,11 +903,6 @@ problem:
 
 subsys_initcall(genl_init);
 
-EXPORT_SYMBOL(genl_register_ops);
-EXPORT_SYMBOL(genl_unregister_ops);
-EXPORT_SYMBOL(genl_register_family);
-EXPORT_SYMBOL(genl_unregister_family);
-
 static int genlmsg_mcast(struct sk_buff *skb, u32 pid, unsigned long group,
 			 gfp_t flags)
 {
-- 
cgit v1.2.2


From a256be70c57d6f8c827d09d645a1f6fe9330af72 Mon Sep 17 00:00:00 2001
From: Changli Gao <xiaosuo@gmail.com>
Date: Mon, 26 Jul 2010 20:59:42 -0700
Subject: drop_monitor: use genl_register_family_with_ops()

[ Fix unused local variable build warnings. -DaveM ]

Signed-off-by: Changli Gao <xiaosuo@gmail.com>
Acked-by: Neil Horman <nhorman@tuxdriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/drop_monitor.c | 23 +++++++----------------
 1 file changed, 7 insertions(+), 16 deletions(-)

(limited to 'net')

diff --git a/net/core/drop_monitor.c b/net/core/drop_monitor.c
index 646ef3bc7200..36e603c78ce9 100644
--- a/net/core/drop_monitor.c
+++ b/net/core/drop_monitor.c
@@ -347,9 +347,9 @@ static struct notifier_block dropmon_net_notifier = {
 
 static int __init init_net_drop_monitor(void)
 {
-	int cpu;
-	int rc, i, ret;
 	struct per_cpu_dm_data *data;
+	int cpu, rc;
+
 	printk(KERN_INFO "Initalizing network drop monitor service\n");
 
 	if (sizeof(void *) > 8) {
@@ -357,21 +357,12 @@ static int __init init_net_drop_monitor(void)
 		return -ENOSPC;
 	}
 
-	if (genl_register_family(&net_drop_monitor_family) < 0) {
+	rc = genl_register_family_with_ops(&net_drop_monitor_family,
+					   dropmon_ops,
+					   ARRAY_SIZE(dropmon_ops));
+	if (rc) {
 		printk(KERN_ERR "Could not create drop monitor netlink family\n");
-		return -EFAULT;
-	}
-
-	rc = -EFAULT;
-
-	for (i = 0; i < ARRAY_SIZE(dropmon_ops); i++) {
-		ret = genl_register_ops(&net_drop_monitor_family,
-					&dropmon_ops[i]);
-		if (ret) {
-			printk(KERN_CRIT "Failed to register operation %d\n",
-				dropmon_ops[i].cmd);
-			goto out_unreg;
-		}
+		return rc;
 	}
 
 	rc = register_netdevice_notifier(&dropmon_net_notifier);
-- 
cgit v1.2.2


From 652c6717465dbd0091dbe04ed9be2f7e59c03c7d Mon Sep 17 00:00:00 2001
From: Changli Gao <xiaosuo@gmail.com>
Date: Sun, 25 Jul 2010 23:21:05 +0000
Subject: genetlink: use genl_register_family_with_ops()

Signed-off-by: Changli Gao <xiaosuo@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/netlink/genetlink.c | 6 +-----
 1 file changed, 1 insertion(+), 5 deletions(-)

(limited to 'net')

diff --git a/net/netlink/genetlink.c b/net/netlink/genetlink.c
index e09e17a90b31..26ed3e8587c2 100644
--- a/net/netlink/genetlink.c
+++ b/net/netlink/genetlink.c
@@ -877,11 +877,7 @@ static int __init genl_init(void)
 	for (i = 0; i < GENL_FAM_TAB_SIZE; i++)
 		INIT_LIST_HEAD(&family_ht[i]);
 
-	err = genl_register_family(&genl_ctrl);
-	if (err < 0)
-		goto problem;
-
-	err = genl_register_ops(&genl_ctrl, &genl_ctrl_ops);
+	err = genl_register_family_with_ops(&genl_ctrl, &genl_ctrl_ops, 1);
 	if (err < 0)
 		goto problem;
 
-- 
cgit v1.2.2


From 32162a4dab0e6a4ca7f886a01173b5f9b80843be Mon Sep 17 00:00:00 2001
From: Jouni Malinen <j@w1.fi>
Date: Mon, 26 Jul 2010 15:52:03 -0700
Subject: mac80211: Fix key freeing to handle unlinked keys

Key locking simplification removed key->sdata != NULL verification from
ieee80211_key_free(). While that is fine for most use cases, there is one
path where this function can be called with an unlinked key (i.e.,
key->sdata == NULL && key->local == NULL). This results in a NULL pointer
dereference with the current implementation. This is known to happen at
least with FT protocol when wpa_supplicant tries to configure the key
before association.

Avoid the issue by passing in the local pointer to
ieee80211_key_free(). In addition, do not clear the key from hw_accel
or debugfs if it has not yet been added. At least the hw_accel one could
trigger another NULL pointer dereference.

Signed-off-by: Jouni Malinen <j@w1.fi>
Reviewed-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/cfg.c      |  6 +++---
 net/mac80211/key.c      | 13 ++++++-------
 net/mac80211/key.h      |  3 ++-
 net/mac80211/sta_info.c |  2 +-
 4 files changed, 12 insertions(+), 12 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c
index b769567949be..dab6b8efe5fa 100644
--- a/net/mac80211/cfg.c
+++ b/net/mac80211/cfg.c
@@ -158,7 +158,7 @@ static int ieee80211_add_key(struct wiphy *wiphy, struct net_device *dev,
 	if (mac_addr) {
 		sta = sta_info_get_bss(sdata, mac_addr);
 		if (!sta) {
-			ieee80211_key_free(key);
+			ieee80211_key_free(sdata->local, key);
 			err = -ENOENT;
 			goto out_unlock;
 		}
@@ -192,7 +192,7 @@ static int ieee80211_del_key(struct wiphy *wiphy, struct net_device *dev,
 			goto out_unlock;
 
 		if (sta->key) {
-			ieee80211_key_free(sta->key);
+			ieee80211_key_free(sdata->local, sta->key);
 			WARN_ON(sta->key);
 			ret = 0;
 		}
@@ -205,7 +205,7 @@ static int ieee80211_del_key(struct wiphy *wiphy, struct net_device *dev,
 		goto out_unlock;
 	}
 
-	ieee80211_key_free(sdata->keys[key_idx]);
+	ieee80211_key_free(sdata->local, sdata->keys[key_idx]);
 	WARN_ON(sdata->keys[key_idx]);
 
 	ret = 0;
diff --git a/net/mac80211/key.c b/net/mac80211/key.c
index 50d1cff23d8e..1b9d87ed143a 100644
--- a/net/mac80211/key.c
+++ b/net/mac80211/key.c
@@ -323,13 +323,15 @@ static void __ieee80211_key_destroy(struct ieee80211_key *key)
 	if (!key)
 		return;
 
-	ieee80211_key_disable_hw_accel(key);
+	if (key->local)
+		ieee80211_key_disable_hw_accel(key);
 
 	if (key->conf.alg == ALG_CCMP)
 		ieee80211_aes_key_free(key->u.ccmp.tfm);
 	if (key->conf.alg == ALG_AES_CMAC)
 		ieee80211_aes_cmac_key_free(key->u.aes_cmac.tfm);
-	ieee80211_debugfs_key_remove(key);
+	if (key->local)
+		ieee80211_debugfs_key_remove(key);
 
 	kfree(key);
 }
@@ -410,15 +412,12 @@ static void __ieee80211_key_free(struct ieee80211_key *key)
 	__ieee80211_key_destroy(key);
 }
 
-void ieee80211_key_free(struct ieee80211_key *key)
+void ieee80211_key_free(struct ieee80211_local *local,
+			struct ieee80211_key *key)
 {
-	struct ieee80211_local *local;
-
 	if (!key)
 		return;
 
-	local = key->sdata->local;
-
 	mutex_lock(&local->key_mtx);
 	__ieee80211_key_free(key);
 	mutex_unlock(&local->key_mtx);
diff --git a/net/mac80211/key.h b/net/mac80211/key.h
index a3849fa3fce8..b665bbb7a471 100644
--- a/net/mac80211/key.h
+++ b/net/mac80211/key.h
@@ -135,7 +135,8 @@ struct ieee80211_key *ieee80211_key_alloc(enum ieee80211_key_alg alg,
 void ieee80211_key_link(struct ieee80211_key *key,
 			struct ieee80211_sub_if_data *sdata,
 			struct sta_info *sta);
-void ieee80211_key_free(struct ieee80211_key *key);
+void ieee80211_key_free(struct ieee80211_local *local,
+			struct ieee80211_key *key);
 void ieee80211_set_default_key(struct ieee80211_sub_if_data *sdata, int idx);
 void ieee80211_set_default_mgmt_key(struct ieee80211_sub_if_data *sdata,
 				    int idx);
diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c
index 67656cbf2b15..6d86f0c1ad04 100644
--- a/net/mac80211/sta_info.c
+++ b/net/mac80211/sta_info.c
@@ -647,7 +647,7 @@ static int __must_check __sta_info_destroy(struct sta_info *sta)
 		return ret;
 
 	if (sta->key) {
-		ieee80211_key_free(sta->key);
+		ieee80211_key_free(local, sta->key);
 		WARN_ON(sta->key);
 	}
 
-- 
cgit v1.2.2


From 073730d771d97bb5bbef080bd5d6d0a5af7cba7d Mon Sep 17 00:00:00 2001
From: Joe Perches <joe@perches.com>
Date: Mon, 26 Jul 2010 14:40:00 -0700
Subject: wireless: Convert wiphy_debug macro to function

Save a few bytes of text

(allyesconfig)
$ size drivers/net/wireless/built-in.o*
   text	   data	    bss	    dec	    hex	filename
3924568	 100548	 871056	4896172	 4ab5ac	drivers/net/wireless/built-in.o.new
3926520	 100548	 871464	4898532	 4abee4	drivers/net/wireless/built-in.o.old

$ size net/wireless/core.o*
   text	   data	    bss	    dec	    hex	filename
  12843	    216	   3768	  16827	   41bb	net/wireless/core.o.new
  12328	    216	   3656	  16200	   3f48	net/wireless/core.o

Signed-off-by: Joe Perches <joe@perches.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/wireless/core.c | 49 +++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 49 insertions(+)

(limited to 'net')

diff --git a/net/wireless/core.c b/net/wireless/core.c
index f65c6494ede9..541e2fff5e9c 100644
--- a/net/wireless/core.c
+++ b/net/wireless/core.c
@@ -907,3 +907,52 @@ static void __exit cfg80211_exit(void)
 	destroy_workqueue(cfg80211_wq);
 }
 module_exit(cfg80211_exit);
+
+static int ___wiphy_printk(const char *level, const struct wiphy *wiphy,
+			   struct va_format *vaf)
+{
+	if (!wiphy)
+		return printk("%s(NULL wiphy *): %pV", level, vaf);
+
+	return printk("%s%s: %pV", level, wiphy_name(wiphy), vaf);
+}
+
+int __wiphy_printk(const char *level, const struct wiphy *wiphy,
+		   const char *fmt, ...)
+{
+	struct va_format vaf;
+	va_list args;
+	int r;
+
+	va_start(args, fmt);
+
+	vaf.fmt = fmt;
+	vaf.va = &args;
+
+	r = ___wiphy_printk(level, wiphy, &vaf);
+	va_end(args);
+
+	return r;
+}
+EXPORT_SYMBOL(__wiphy_printk);
+
+#define define_wiphy_printk_level(func, kern_level)		\
+int func(const struct wiphy *wiphy, const char *fmt, ...)	\
+{								\
+	struct va_format vaf;					\
+	va_list args;						\
+	int r;							\
+								\
+	va_start(args, fmt);					\
+								\
+	vaf.fmt = fmt;						\
+	vaf.va = &args;						\
+								\
+	r = ___wiphy_printk(kern_level, wiphy, &vaf);		\
+	va_end(args);						\
+								\
+	return r;						\
+}								\
+EXPORT_SYMBOL(func);
+
+define_wiphy_printk_level(wiphy_debug, KERN_DEBUG);
-- 
cgit v1.2.2


From e73439d8c0e4c522c843b8bb98c0eb5700da6b05 Mon Sep 17 00:00:00 2001
From: Marcel Holtmann <marcel@holtmann.org>
Date: Mon, 26 Jul 2010 10:06:00 -0400
Subject: Bluetooth: Defer SCO setup if mode change is pending

Certain headsets such as the Motorola H350 will reject SCO and eSCO
connection requests while the ACL is transitioning from sniff mode
to active mode. Add synchronization so that SCO and eSCO connection
requests will wait until the ACL has fully transitioned to active mode.

< HCI Command: Exit Sniff Mode (0x02|0x0004) plen 2
    handle 12
> HCI Event: Command Status (0x0f) plen 4
    Exit Sniff Mode (0x02|0x0004) status 0x00 ncmd 1
< HCI Command:  Setup Synchronous Connection (0x01|0x0028) plen 17
    handle 12 voice setting 0x0040
> HCI Event: Command Status (0x0f) plen 4
    Setup Synchronous Connection (0x01|0x0028) status 0x00 ncmd 1
> HCI Event: Number of Completed Packets (0x13) plen 5
    handle 12 packets 1
> HCI Event: Mode Change (0x14) plen 6
    status 0x00 handle 12 mode 0x00 interval 0
    Mode: Active
> HCI Event: Synchronous Connect Complete (0x2c) plen 17
    status 0x10 handle 14 bdaddr 00:1A:0E:50:28:A4 type SCO
    Error: Connection Accept Timeout Exceeded

Signed-off-by: Ron Shaffer <rshaffer@codeaurora.org>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 net/bluetooth/hci_conn.c  | 32 ++++++++++++++++++++++++++++----
 net/bluetooth/hci_event.c | 31 +++++++++++++++----------------
 2 files changed, 43 insertions(+), 20 deletions(-)

(limited to 'net')

diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c
index e9fef83449f8..0b1e460fe440 100644
--- a/net/bluetooth/hci_conn.c
+++ b/net/bluetooth/hci_conn.c
@@ -155,6 +155,27 @@ void hci_setup_sync(struct hci_conn *conn, __u16 handle)
 	hci_send_cmd(hdev, HCI_OP_SETUP_SYNC_CONN, sizeof(cp), &cp);
 }
 
+/* Device _must_ be locked */
+void hci_sco_setup(struct hci_conn *conn, __u8 status)
+{
+	struct hci_conn *sco = conn->link;
+
+	BT_DBG("%p", conn);
+
+	if (!sco)
+		return;
+
+	if (!status) {
+		if (lmp_esco_capable(conn->hdev))
+			hci_setup_sync(sco, conn->handle);
+		else
+			hci_add_sco(sco, conn->handle);
+	} else {
+		hci_proto_connect_cfm(sco, status);
+		hci_conn_del(sco);
+	}
+}
+
 static void hci_conn_timeout(unsigned long arg)
 {
 	struct hci_conn *conn = (void *) arg;
@@ -385,10 +406,13 @@ struct hci_conn *hci_connect(struct hci_dev *hdev, int type, bdaddr_t *dst, __u8
 		acl->power_save = 1;
 		hci_conn_enter_active_mode(acl);
 
-		if (lmp_esco_capable(hdev))
-			hci_setup_sync(sco, acl->handle);
-		else
-			hci_add_sco(sco, acl->handle);
+		if (test_bit(HCI_CONN_MODE_CHANGE_PEND, &acl->pend)) {
+			/* defer SCO setup until mode change completed */
+			set_bit(HCI_CONN_SCO_SETUP_PEND, &acl->pend);
+			return sco;
+		}
+
+		hci_sco_setup(acl, 0x00);
 	}
 
 	return sco;
diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c
index 2069c3b05fda..bfef5bae0b3a 100644
--- a/net/bluetooth/hci_event.c
+++ b/net/bluetooth/hci_event.c
@@ -785,9 +785,13 @@ static void hci_cs_sniff_mode(struct hci_dev *hdev, __u8 status)
 	hci_dev_lock(hdev);
 
 	conn = hci_conn_hash_lookup_handle(hdev, __le16_to_cpu(cp->handle));
-	if (conn)
+	if (conn) {
 		clear_bit(HCI_CONN_MODE_CHANGE_PEND, &conn->pend);
 
+		if (test_and_clear_bit(HCI_CONN_SCO_SETUP_PEND, &conn->pend))
+			hci_sco_setup(conn, status);
+	}
+
 	hci_dev_unlock(hdev);
 }
 
@@ -808,9 +812,13 @@ static void hci_cs_exit_sniff_mode(struct hci_dev *hdev, __u8 status)
 	hci_dev_lock(hdev);
 
 	conn = hci_conn_hash_lookup_handle(hdev, __le16_to_cpu(cp->handle));
-	if (conn)
+	if (conn) {
 		clear_bit(HCI_CONN_MODE_CHANGE_PEND, &conn->pend);
 
+		if (test_and_clear_bit(HCI_CONN_SCO_SETUP_PEND, &conn->pend))
+			hci_sco_setup(conn, status);
+	}
+
 	hci_dev_unlock(hdev);
 }
 
@@ -915,20 +923,8 @@ static inline void hci_conn_complete_evt(struct hci_dev *hdev, struct sk_buff *s
 	} else
 		conn->state = BT_CLOSED;
 
-	if (conn->type == ACL_LINK) {
-		struct hci_conn *sco = conn->link;
-		if (sco) {
-			if (!ev->status) {
-				if (lmp_esco_capable(hdev))
-					hci_setup_sync(sco, conn->handle);
-				else
-					hci_add_sco(sco, conn->handle);
-			} else {
-				hci_proto_connect_cfm(sco, ev->status);
-				hci_conn_del(sco);
-			}
-		}
-	}
+	if (conn->type == ACL_LINK)
+		hci_sco_setup(conn, ev->status);
 
 	if (ev->status) {
 		hci_proto_connect_cfm(conn, ev->status);
@@ -1481,6 +1477,9 @@ static inline void hci_mode_change_evt(struct hci_dev *hdev, struct sk_buff *skb
 			else
 				conn->power_save = 0;
 		}
+
+		if (test_and_clear_bit(HCI_CONN_SCO_SETUP_PEND, &conn->pend))
+			hci_sco_setup(conn, ev->status);
 	}
 
 	hci_dev_unlock(hdev);
-- 
cgit v1.2.2


From da5f6c37eee040775997191d1a1bc91c0c1e51eb Mon Sep 17 00:00:00 2001
From: "Gustavo F. Padovan" <padovan@profusion.mobi>
Date: Sat, 24 Jul 2010 01:34:54 -0300
Subject: Bluetooth: Test 'count' value before enter the loop

Testing first we avoid enter the loop when count = 0.

Signed-off-by: Gustavo F. Padovan <padovan@profusion.mobi>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 net/bluetooth/hci_core.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'net')

diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c
index 995c9f9b84d0..8303f1c9ef54 100644
--- a/net/bluetooth/hci_core.c
+++ b/net/bluetooth/hci_core.c
@@ -1149,7 +1149,7 @@ int hci_recv_fragment(struct hci_dev *hdev, int type, void *data, int count)
 	if (type < HCI_ACLDATA_PKT || type > HCI_EVENT_PKT)
 		return -EILSEQ;
 
-	do {
+	while (count) {
 		rem = hci_reassembly(hdev, type, data, count,
 						type - 1, GFP_ATOMIC);
 		if (rem < 0)
@@ -1157,7 +1157,7 @@ int hci_recv_fragment(struct hci_dev *hdev, int type, void *data, int count)
 
 		data += (count - rem);
 		count = rem;
-	} while (count);
+	};
 
 	return rem;
 }
@@ -1170,7 +1170,7 @@ int hci_recv_stream_fragment(struct hci_dev *hdev, void *data, int count)
 	int type;
 	int rem = 0;
 
-	do {
+	while (count) {
 		struct sk_buff *skb = hdev->reassembly[STREAM_REASSEMBLY];
 
 		if (!skb) {
@@ -1192,7 +1192,7 @@ int hci_recv_stream_fragment(struct hci_dev *hdev, void *data, int count)
 
 		data += (count - rem);
 		count = rem;
-	} while (count);
+	};
 
 	return rem;
 }
-- 
cgit v1.2.2


From 2f8362afcd2da8b313ec3cc04a50af19d3592972 Mon Sep 17 00:00:00 2001
From: "Gustavo F. Padovan" <padovan@profusion.mobi>
Date: Sat, 24 Jul 2010 02:04:45 -0300
Subject: Bluetooth: Add __init and __exit marks to RFCOMM

Those annotation save memory and space on the binary. __init code is
discarded just after execute and __exit code is discarded if the module
is built into the kernel image or unload of modules is not allowed.

Signed-off-by: Gustavo F. Padovan <padovan@profusion.mobi>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 net/bluetooth/rfcomm/sock.c | 2 +-
 net/bluetooth/rfcomm/tty.c  | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

(limited to 'net')

diff --git a/net/bluetooth/rfcomm/sock.c b/net/bluetooth/rfcomm/sock.c
index 43fbf6b4b4bf..44a623275951 100644
--- a/net/bluetooth/rfcomm/sock.c
+++ b/net/bluetooth/rfcomm/sock.c
@@ -1152,7 +1152,7 @@ error:
 	return err;
 }
 
-void rfcomm_cleanup_sockets(void)
+void __exit rfcomm_cleanup_sockets(void)
 {
 	debugfs_remove(rfcomm_sock_debugfs);
 
diff --git a/net/bluetooth/rfcomm/tty.c b/net/bluetooth/rfcomm/tty.c
index 309b6c261b25..026205c18b78 100644
--- a/net/bluetooth/rfcomm/tty.c
+++ b/net/bluetooth/rfcomm/tty.c
@@ -1153,7 +1153,7 @@ static const struct tty_operations rfcomm_ops = {
 	.tiocmset		= rfcomm_tty_tiocmset,
 };
 
-int rfcomm_init_ttys(void)
+int __init rfcomm_init_ttys(void)
 {
 	rfcomm_tty_driver = alloc_tty_driver(RFCOMM_TTY_PORTS);
 	if (!rfcomm_tty_driver)
@@ -1183,7 +1183,7 @@ int rfcomm_init_ttys(void)
 	return 0;
 }
 
-void rfcomm_cleanup_ttys(void)
+void __exit rfcomm_cleanup_ttys(void)
 {
 	tty_unregister_driver(rfcomm_tty_driver);
 	put_tty_driver(rfcomm_tty_driver);
-- 
cgit v1.2.2


From eeaf61d8891f9c9ed12c1a667e72bf83f0857954 Mon Sep 17 00:00:00 2001
From: stephen hemminger <shemminger@vyatta.com>
Date: Tue, 27 Jul 2010 08:26:30 +0000
Subject: bridge: add rcu_read_lock on transmit

Long ago, when bridge was converted to RCU, rcu lock was equivalent
to having preempt disabled. RCU has changed a lot since then and
bridge code was still assuming the since transmit was called with
bottom half disabled, it was RCU safe.

Signed-off-by: Stephen Hemminger <shemminger@vyatta.com>
Tested-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/bridge/br_device.c   | 4 +++-
 net/bridge/br_fdb.c      | 2 +-
 net/bridge/br_input.c    | 6 +++---
 net/bridge/br_stp_bpdu.c | 2 +-
 4 files changed, 8 insertions(+), 6 deletions(-)

(limited to 'net')

diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c
index 753fc4221f3c..4cec8051164d 100644
--- a/net/bridge/br_device.c
+++ b/net/bridge/br_device.c
@@ -22,7 +22,7 @@
 #include <asm/uaccess.h>
 #include "br_private.h"
 
-/* net device transmit always called with no BH (preempt_disabled) */
+/* net device transmit always called with BH disabled */
 netdev_tx_t br_dev_xmit(struct sk_buff *skb, struct net_device *dev)
 {
 	struct net_bridge *br = netdev_priv(dev);
@@ -46,6 +46,7 @@ netdev_tx_t br_dev_xmit(struct sk_buff *skb, struct net_device *dev)
 	skb_reset_mac_header(skb);
 	skb_pull(skb, ETH_HLEN);
 
+	rcu_read_lock();
 	if (is_multicast_ether_addr(dest)) {
 		if (br_multicast_rcv(br, NULL, skb))
 			goto out;
@@ -61,6 +62,7 @@ netdev_tx_t br_dev_xmit(struct sk_buff *skb, struct net_device *dev)
 		br_flood_deliver(br, skb);
 
 out:
+	rcu_read_unlock();
 	return NETDEV_TX_OK;
 }
 
diff --git a/net/bridge/br_fdb.c b/net/bridge/br_fdb.c
index b01dde35a69e..7204ad3aff65 100644
--- a/net/bridge/br_fdb.c
+++ b/net/bridge/br_fdb.c
@@ -214,7 +214,7 @@ void br_fdb_delete_by_port(struct net_bridge *br,
 	spin_unlock_bh(&br->hash_lock);
 }
 
-/* No locking or refcounting, assumes caller has no preempt (rcu_read_lock) */
+/* No locking or refcounting, assumes caller has rcu_read_lock */
 struct net_bridge_fdb_entry *__br_fdb_get(struct net_bridge *br,
 					  const unsigned char *addr)
 {
diff --git a/net/bridge/br_input.c b/net/bridge/br_input.c
index d36e700f7a26..114365c9eb1c 100644
--- a/net/bridge/br_input.c
+++ b/net/bridge/br_input.c
@@ -37,7 +37,7 @@ static int br_pass_frame_up(struct sk_buff *skb)
 		       netif_receive_skb);
 }
 
-/* note: already called with rcu_read_lock (preempt_disabled) */
+/* note: already called with rcu_read_lock */
 int br_handle_frame_finish(struct sk_buff *skb)
 {
 	const unsigned char *dest = eth_hdr(skb)->h_dest;
@@ -108,7 +108,7 @@ drop:
 	goto out;
 }
 
-/* note: already called with rcu_read_lock (preempt_disabled) */
+/* note: already called with rcu_read_lock */
 static int br_handle_local_finish(struct sk_buff *skb)
 {
 	struct net_bridge_port *p = rcu_dereference(skb->dev->br_port);
@@ -133,7 +133,7 @@ static inline int is_link_local(const unsigned char *dest)
 /*
  * Called via br_handle_frame_hook.
  * Return NULL if skb is handled
- * note: already called with rcu_read_lock (preempt_disabled)
+ * note: already called with rcu_read_lock
  */
 struct sk_buff *br_handle_frame(struct net_bridge_port *p, struct sk_buff *skb)
 {
diff --git a/net/bridge/br_stp_bpdu.c b/net/bridge/br_stp_bpdu.c
index 217bd225a42f..5854e8285a9c 100644
--- a/net/bridge/br_stp_bpdu.c
+++ b/net/bridge/br_stp_bpdu.c
@@ -131,7 +131,7 @@ void br_send_tcn_bpdu(struct net_bridge_port *p)
 /*
  * Called from llc.
  *
- * NO locks, but rcu_read_lock (preempt_disabled)
+ * NO locks, but rcu_read_lock
  */
 void br_stp_rcv(const struct stp_proto *proto, struct sk_buff *skb,
 		struct net_device *dev)
-- 
cgit v1.2.2


From e4ab7eb0aecbe56ac280486c61cd3f0f6c42870b Mon Sep 17 00:00:00 2001
From: Yuri Ershov <ext-yuri.ershov@nokia.com>
Date: Tue, 29 Jun 2010 15:08:06 +0400
Subject: mac80211: Put some code under MESH macro

In the function ieee80211_subif_start_xmit the logic related with
meshdrlen is under CONFIG_MAC80211_MESH macro, but in one place it isn't.
This is some update for this

Signed-off-by: Yuri Ershov <ext-yuri.ershov@nokia.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/tx.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'net')

diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
index 2f8182dc94a1..c54db966926b 100644
--- a/net/mac80211/tx.c
+++ b/net/mac80211/tx.c
@@ -1935,11 +1935,13 @@ netdev_tx_t ieee80211_subif_start_xmit(struct sk_buff *skb,
 		h_pos += encaps_len;
 	}
 
+#ifdef CONFIG_MAC80211_MESH
 	if (meshhdrlen > 0) {
 		memcpy(skb_push(skb, meshhdrlen), &mesh_hdr, meshhdrlen);
 		nh_pos += meshhdrlen;
 		h_pos += meshhdrlen;
 	}
+#endif
 
 	if (ieee80211_is_data_qos(fc)) {
 		__le16 *qos_control;
-- 
cgit v1.2.2


From d080e2755d840ede60128cc914a070868ebabc1e Mon Sep 17 00:00:00 2001
From: Yuri Ershov <ext-yuri.ershov@nokia.com>
Date: Tue, 29 Jun 2010 15:08:07 +0400
Subject: nl80211: Fix memory leaks

In case of errors during message composing msg should be freed after canceling.

Signed-off-by: Yuri Kululin <ext-yuri.kululin@nokia.com>
Signed-off-by: Yuri Ershov <ext-yuri.ershov@nokia.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/wireless/nl80211.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'net')

diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index fbfac588297c..37902a54e9c1 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -2769,6 +2769,7 @@ static int nl80211_get_mesh_params(struct sk_buff *skb,
 
  nla_put_failure:
 	genlmsg_cancel(msg, hdr);
+	nlmsg_free(msg);
 	err = -EMSGSIZE;
  out:
 	/* Cleanup */
@@ -2960,6 +2961,7 @@ static int nl80211_get_reg(struct sk_buff *skb, struct genl_info *info)
 
 nla_put_failure:
 	genlmsg_cancel(msg, hdr);
+	nlmsg_free(msg);
 	err = -EMSGSIZE;
 out:
 	mutex_unlock(&cfg80211_mutex);
-- 
cgit v1.2.2


From c4c322941ce0d7e2b7b8794ad70683123d9cb26a Mon Sep 17 00:00:00 2001
From: Yuri Ershov <ext-yuri.ershov@nokia.com>
Date: Tue, 29 Jun 2010 15:08:08 +0400
Subject: cfg80211: Update of regulatory request initiator handling

In some cases there could be possible dereferencing freed pointer. The
update is intended to avoid this issue.

Signed-off-by: Yuri Ershov <ext-yuri.ershov@nokia.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/wireless/reg.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/wireless/reg.c b/net/wireless/reg.c
index ec4e76f95044..f180db0de66c 100644
--- a/net/wireless/reg.c
+++ b/net/wireless/reg.c
@@ -1306,6 +1306,7 @@ static void reg_process_hint(struct regulatory_request *reg_request)
 {
 	int r = 0;
 	struct wiphy *wiphy = NULL;
+	enum nl80211_reg_initiator initiator = reg_request->initiator;
 
 	BUG_ON(!reg_request->alpha2);
 
@@ -1325,7 +1326,7 @@ static void reg_process_hint(struct regulatory_request *reg_request)
 	/* This is required so that the orig_* parameters are saved */
 	if (r == -EALREADY && wiphy &&
 	    wiphy->flags & WIPHY_FLAG_STRICT_REGULATORY)
-		wiphy_update_regulatory(wiphy, reg_request->initiator);
+		wiphy_update_regulatory(wiphy, initiator);
 out:
 	mutex_unlock(&reg_mutex);
 	mutex_unlock(&cfg80211_mutex);
-- 
cgit v1.2.2


From a0daa0e7592ada797d6835f11529097aabc27ad2 Mon Sep 17 00:00:00 2001
From: "Luis R. Rodriguez" <lrodriguez@atheros.com>
Date: Tue, 27 Jul 2010 16:33:08 -0400
Subject: Revert "mac80211: fix sw scan bracketing"

This reverts this commit. While in theory the change is
correct the patch does not address current assumptions made
by some drivers, one which is definitley affected is ath9k.

Prior to this change the scan complete callback would be
called after we returned to the home channel and configured
the hardware RX filters. After this change we call the scan
complete callback prior to both the hw config and the config
filter. At least for ath9k this breaks quite a few assumptions
on the callback, leading to disconnects to the AP after every scan
making the driver pretty useless on STA mode. The goal behind
this commit was to address the now understood spurious warnings
from ath9k and mac80211_hwsim on scanning on two wiphys at the
same time but we have now supressed these and will address this
issue in the next kernel release.

When fixing this for good next we must first review the other
driver's dependence on this logic and perhaps consider removal
of the scan complete callback all together.

Cc: Johannes Berg <johannes.berg@intel.com>
Signed-off-by: Luis R. Rodriguez <lrodriguez@atheros.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/scan.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/scan.c b/net/mac80211/scan.c
index 439c98d93a79..41635b2c91bf 100644
--- a/net/mac80211/scan.c
+++ b/net/mac80211/scan.c
@@ -286,8 +286,6 @@ void ieee80211_scan_completed(struct ieee80211_hw *hw, bool aborted)
 	local->scanning = 0;
 	local->scan_channel = NULL;
 
-	drv_sw_scan_complete(local);
-
 	/* we only have to protect scan_req and hw/sw scan */
 	mutex_unlock(&local->scan_mtx);
 
@@ -297,6 +295,8 @@ void ieee80211_scan_completed(struct ieee80211_hw *hw, bool aborted)
 
 	ieee80211_configure_filter(local);
 
+	drv_sw_scan_complete(local);
+
 	ieee80211_offchannel_return(local, true);
 
  done:
-- 
cgit v1.2.2


From 4552124543141debf40a94b67155e57aa6bb34d6 Mon Sep 17 00:00:00 2001
From: Felix Fietkau <nbd@openwrt.org>
Date: Wed, 28 Jul 2010 02:40:49 +0200
Subject: mac80211: inform drivers about the off-channel status on channel
 changes

For some drivers it can be useful to know whether the channel they're
supposed to switch to is going to be used for short off-channel work or
scanning, or whether the hardware is expected to stay on it for a while
longer. This is important for various kinds of calibration work, which
takes longer to complete and should keep some persistent state, even if
the channel temporarily changes.

Signed-off-by: Felix Fietkau <nbd@openwrt.org>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/main.c | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'net')

diff --git a/net/mac80211/main.c b/net/mac80211/main.c
index 0e95c750ded9..7cc4f913a431 100644
--- a/net/mac80211/main.c
+++ b/net/mac80211/main.c
@@ -107,12 +107,15 @@ int ieee80211_hw_config(struct ieee80211_local *local, u32 changed)
 	if (scan_chan) {
 		chan = scan_chan;
 		channel_type = NL80211_CHAN_NO_HT;
+		local->hw.conf.flags |= IEEE80211_CONF_OFFCHANNEL;
 	} else if (local->tmp_channel) {
 		chan = scan_chan = local->tmp_channel;
 		channel_type = local->tmp_channel_type;
+		local->hw.conf.flags |= IEEE80211_CONF_OFFCHANNEL;
 	} else {
 		chan = local->oper_channel;
 		channel_type = local->_oper_channel_type;
+		local->hw.conf.flags &= ~IEEE80211_CONF_OFFCHANNEL;
 	}
 
 	if (chan != local->hw.conf.channel ||
-- 
cgit v1.2.2


From e5b900d228b76d445a4240d9aeb3cd8f79205a91 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Thu, 29 Jul 2010 16:08:55 +0200
Subject: mac80211: allow drivers to request DTIM period

Some features require knowing the DTIM period
before associating. This implements the ability
to wait for a beacon in mac80211 before assoc
to provide this value. It is optional since
most likely not all drivers will need this.

Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/ieee80211_i.h |  1 +
 net/mac80211/mlme.c        | 32 +++++++++++++++++++++++++++++---
 net/mac80211/scan.c        |  4 ++++
 net/mac80211/work.c        | 43 +++++++++++++++++++++++++++++++++++++++++++
 4 files changed, 77 insertions(+), 3 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index ef470064b154..65e0ed6c2975 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -238,6 +238,7 @@ enum ieee80211_work_type {
 	IEEE80211_WORK_ABORT,
 	IEEE80211_WORK_DIRECT_PROBE,
 	IEEE80211_WORK_AUTH,
+	IEEE80211_WORK_ASSOC_BEACON_WAIT,
 	IEEE80211_WORK_ASSOC,
 	IEEE80211_WORK_REMAIN_ON_CHANNEL,
 };
diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index cf8d72196c65..b6c163ac22da 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -870,6 +870,11 @@ static void ieee80211_set_associated(struct ieee80211_sub_if_data *sdata,
 
 	ieee80211_led_assoc(local, 1);
 
+	if (local->hw.flags & IEEE80211_HW_NEED_DTIM_PERIOD)
+		bss_conf->dtim_period = bss->dtim_period;
+	else
+		bss_conf->dtim_period = 0;
+
 	bss_conf->assoc = 1;
 	/*
 	 * For now just always ask the driver to update the basic rateset
@@ -1751,7 +1756,8 @@ void ieee80211_sta_rx_queued_mgmt(struct ieee80211_sub_if_data *sdata,
 			if (wk->sdata != sdata)
 				continue;
 
-			if (wk->type != IEEE80211_WORK_ASSOC)
+			if (wk->type != IEEE80211_WORK_ASSOC &&
+			    wk->type != IEEE80211_WORK_ASSOC_BEACON_WAIT)
 				continue;
 
 			if (memcmp(mgmt->bssid, wk->filter_ta, ETH_ALEN))
@@ -2086,6 +2092,8 @@ static enum work_done_result ieee80211_assoc_done(struct ieee80211_work *wk,
 						  struct sk_buff *skb)
 {
 	struct ieee80211_mgmt *mgmt;
+	struct ieee80211_rx_status *rx_status;
+	struct ieee802_11_elems elems;
 	u16 status;
 
 	if (!skb) {
@@ -2093,6 +2101,19 @@ static enum work_done_result ieee80211_assoc_done(struct ieee80211_work *wk,
 		return WORK_DONE_DESTROY;
 	}
 
+	if (wk->type == IEEE80211_WORK_ASSOC_BEACON_WAIT) {
+		mutex_lock(&wk->sdata->u.mgd.mtx);
+		rx_status = (void *) skb->cb;
+		ieee802_11_parse_elems(skb->data + 24 + 12, skb->len - 24 - 12, &elems);
+		ieee80211_rx_bss_info(wk->sdata, (void *)skb->data, skb->len, rx_status,
+				      &elems, true);
+		mutex_unlock(&wk->sdata->u.mgd.mtx);
+
+		wk->type = IEEE80211_WORK_ASSOC;
+		/* not really done yet */
+		return WORK_DONE_REQUEUE;
+	}
+
 	mgmt = (void *)skb->data;
 	status = le16_to_cpu(mgmt->u.assoc_resp.status_code);
 
@@ -2206,10 +2227,14 @@ int ieee80211_mgd_assoc(struct ieee80211_sub_if_data *sdata,
 	if (req->prev_bssid)
 		memcpy(wk->assoc.prev_bssid, req->prev_bssid, ETH_ALEN);
 
-	wk->type = IEEE80211_WORK_ASSOC;
 	wk->chan = req->bss->channel;
 	wk->sdata = sdata;
 	wk->done = ieee80211_assoc_done;
+	if (!bss->dtim_period &&
+	    sdata->local->hw.flags & IEEE80211_HW_NEED_DTIM_PERIOD)
+		wk->type = IEEE80211_WORK_ASSOC_BEACON_WAIT;
+	else
+		wk->type = IEEE80211_WORK_ASSOC;
 
 	if (req->use_mfp) {
 		ifmgd->mfp = IEEE80211_MFP_REQUIRED;
@@ -2257,7 +2282,8 @@ int ieee80211_mgd_deauth(struct ieee80211_sub_if_data *sdata,
 
 			if (wk->type != IEEE80211_WORK_DIRECT_PROBE &&
 			    wk->type != IEEE80211_WORK_AUTH &&
-			    wk->type != IEEE80211_WORK_ASSOC)
+			    wk->type != IEEE80211_WORK_ASSOC &&
+			    wk->type != IEEE80211_WORK_ASSOC_BEACON_WAIT)
 				continue;
 
 			if (memcmp(req->bss->bssid, wk->filter_ta, ETH_ALEN))
diff --git a/net/mac80211/scan.c b/net/mac80211/scan.c
index 41635b2c91bf..41f20fb7e670 100644
--- a/net/mac80211/scan.c
+++ b/net/mac80211/scan.c
@@ -114,6 +114,10 @@ ieee80211_bss_info_update(struct ieee80211_local *local,
 		bss->dtim_period = tim_ie->dtim_period;
 	}
 
+	/* If the beacon had no TIM IE, or it was invalid, use 1 */
+	if (beacon && !bss->dtim_period)
+		bss->dtim_period = 1;
+
 	/* replace old supported rates if we get new values */
 	srlen = 0;
 	if (elems->supp_rates) {
diff --git a/net/mac80211/work.c b/net/mac80211/work.c
index c22a71c5cb45..81d4ad64184a 100644
--- a/net/mac80211/work.c
+++ b/net/mac80211/work.c
@@ -560,6 +560,22 @@ ieee80211_remain_on_channel_timeout(struct ieee80211_work *wk)
 	return WORK_ACT_TIMEOUT;
 }
 
+static enum work_action __must_check
+ieee80211_assoc_beacon_wait(struct ieee80211_work *wk)
+{
+	if (wk->started)
+		return WORK_ACT_TIMEOUT;
+
+	/*
+	 * Wait up to one beacon interval ...
+	 * should this be more if we miss one?
+	 */
+	printk(KERN_DEBUG "%s: waiting for beacon from %pM\n",
+	       wk->sdata->name, wk->filter_ta);
+	wk->timeout = TU_TO_EXP_TIME(wk->assoc.bss->beacon_interval);
+	return WORK_ACT_NONE;
+}
+
 static void ieee80211_auth_challenge(struct ieee80211_work *wk,
 				     struct ieee80211_mgmt *mgmt,
 				     size_t len)
@@ -709,6 +725,25 @@ ieee80211_rx_mgmt_probe_resp(struct ieee80211_work *wk,
 	return WORK_ACT_DONE;
 }
 
+static enum work_action __must_check
+ieee80211_rx_mgmt_beacon(struct ieee80211_work *wk,
+			 struct ieee80211_mgmt *mgmt, size_t len)
+{
+	struct ieee80211_sub_if_data *sdata = wk->sdata;
+	struct ieee80211_local *local = sdata->local;
+
+	ASSERT_WORK_MTX(local);
+
+	if (wk->type != IEEE80211_WORK_ASSOC_BEACON_WAIT)
+		return WORK_ACT_MISMATCH;
+
+	if (len < 24 + 12)
+		return WORK_ACT_NONE;
+
+	printk(KERN_DEBUG "%s: beacon received\n", sdata->name);
+	return WORK_ACT_DONE;
+}
+
 static void ieee80211_work_rx_queued_mgmt(struct ieee80211_local *local,
 					  struct sk_buff *skb)
 {
@@ -731,6 +766,7 @@ static void ieee80211_work_rx_queued_mgmt(struct ieee80211_local *local,
 		case IEEE80211_WORK_DIRECT_PROBE:
 		case IEEE80211_WORK_AUTH:
 		case IEEE80211_WORK_ASSOC:
+		case IEEE80211_WORK_ASSOC_BEACON_WAIT:
 			bssid = wk->filter_ta;
 			break;
 		default:
@@ -745,6 +781,9 @@ static void ieee80211_work_rx_queued_mgmt(struct ieee80211_local *local,
 			continue;
 
 		switch (fc & IEEE80211_FCTL_STYPE) {
+		case IEEE80211_STYPE_BEACON:
+			rma = ieee80211_rx_mgmt_beacon(wk, mgmt, skb->len);
+			break;
 		case IEEE80211_STYPE_PROBE_RESP:
 			rma = ieee80211_rx_mgmt_probe_resp(wk, mgmt, skb->len,
 							   rx_status);
@@ -916,6 +955,9 @@ static void ieee80211_work_work(struct work_struct *work)
 		case IEEE80211_WORK_REMAIN_ON_CHANNEL:
 			rma = ieee80211_remain_on_channel_timeout(wk);
 			break;
+		case IEEE80211_WORK_ASSOC_BEACON_WAIT:
+			rma = ieee80211_assoc_beacon_wait(wk);
+			break;
 		}
 
 		wk->started = started;
@@ -1065,6 +1107,7 @@ ieee80211_rx_result ieee80211_work_rx_mgmt(struct ieee80211_sub_if_data *sdata,
 		case IEEE80211_STYPE_PROBE_RESP:
 		case IEEE80211_STYPE_ASSOC_RESP:
 		case IEEE80211_STYPE_REASSOC_RESP:
+		case IEEE80211_STYPE_BEACON:
 			skb_queue_tail(&local->work_skb_queue, skb);
 			ieee80211_queue_work(&local->hw, &local->work_work);
 			return RX_QUEUED;
-- 
cgit v1.2.2


From a3bdb549e30e7a263f7a589747c40e9c50110315 Mon Sep 17 00:00:00 2001
From: Dmitry Popov <dp@highloadlab.com>
Date: Thu, 29 Jul 2010 01:59:36 +0000
Subject: tcp: cookie transactions setsockopt memory leak

There is a bug in do_tcp_setsockopt(net/ipv4/tcp.c),
TCP_COOKIE_TRANSACTIONS case.
In some cases (when tp->cookie_values == NULL) new tcp_cookie_values
structure can be allocated (at cvp), but not bound to
tp->cookie_values. So a memory leak occurs.

Signed-off-by: Dmitry Popov <dp@highloadlab.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp.c | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 65afeaec15b7..c259714c5596 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -2176,6 +2176,8 @@ static int do_tcp_setsockopt(struct sock *sk, int level,
 				      GFP_KERNEL);
 			if (cvp == NULL)
 				return -ENOMEM;
+
+			kref_init(&cvp->kref);
 		}
 		lock_sock(sk);
 		tp->rx_opt.cookie_in_always =
@@ -2190,12 +2192,11 @@ static int do_tcp_setsockopt(struct sock *sk, int level,
 				 */
 				kref_put(&tp->cookie_values->kref,
 					 tcp_cookie_values_release);
-				kref_init(&cvp->kref);
-				tp->cookie_values = cvp;
 			} else {
 				cvp = tp->cookie_values;
 			}
 		}
+
 		if (cvp != NULL) {
 			cvp->cookie_desired = ctd.tcpct_cookie_desired;
 
@@ -2209,6 +2210,8 @@ static int do_tcp_setsockopt(struct sock *sk, int level,
 				cvp->s_data_desired = ctd.tcpct_s_data_desired;
 				cvp->s_data_constant = 0; /* false */
 			}
+
+			tp->cookie_values = cvp;
 		}
 		release_sock(sk);
 		return err;
-- 
cgit v1.2.2


From 6d1d1d398cb7db7a12c5d652d50f85355345234f Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Thu, 29 Jul 2010 01:12:31 +0000
Subject: bridge: Fix skb leak when multicast parsing fails on TX

On the bridge TX path we're leaking an skb when br_multicast_rcv
returns an error.

Reported-by: David Lamparter <equinox@diac24.net>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/bridge/br_device.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c
index 4cec8051164d..f49bcd9d9113 100644
--- a/net/bridge/br_device.c
+++ b/net/bridge/br_device.c
@@ -48,8 +48,10 @@ netdev_tx_t br_dev_xmit(struct sk_buff *skb, struct net_device *dev)
 
 	rcu_read_lock();
 	if (is_multicast_ether_addr(dest)) {
-		if (br_multicast_rcv(br, NULL, skb))
+		if (br_multicast_rcv(br, NULL, skb)) {
+			kfree_skb(skb);
 			goto out;
+		}
 
 		mdst = br_mdb_get(br, skb);
 		if (mdst || BR_INPUT_SKB_CB_MROUTERS_ONLY(skb))
-- 
cgit v1.2.2


From 3a7fda06ba48e97650fe44ea8e8a7cc385e1c100 Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Thu, 29 Jul 2010 00:45:30 +0000
Subject: bridge: Allow multicast snooping to be disabled before ifup

Currently you cannot disable multicast snooping while a device is
down.  There is no good reason for this restriction and this patch
removes it.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/bridge/br_multicast.c | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

(limited to 'net')

diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c
index 85afcdab4921..eb5b256ffc88 100644
--- a/net/bridge/br_multicast.c
+++ b/net/bridge/br_multicast.c
@@ -1728,13 +1728,9 @@ unlock:
 int br_multicast_toggle(struct net_bridge *br, unsigned long val)
 {
 	struct net_bridge_port *port;
-	int err = -ENOENT;
+	int err = 0;
 
 	spin_lock(&br->multicast_lock);
-	if (!netif_running(br->dev))
-		goto unlock;
-
-	err = 0;
 	if (br->multicast_disabled == !val)
 		goto unlock;
 
@@ -1742,6 +1738,9 @@ int br_multicast_toggle(struct net_bridge *br, unsigned long val)
 	if (br->multicast_disabled)
 		goto unlock;
 
+	if (!netif_running(br->dev))
+		goto unlock;
+
 	if (br->mdb) {
 		if (br->mdb->old) {
 			err = -EEXIST;
-- 
cgit v1.2.2


From ea4bd8ba804dedefa65303b3bd105d6d2808e621 Mon Sep 17 00:00:00 2001
From: David Miller <davem@davemloft.net>
Date: Fri, 30 Jul 2010 21:54:49 -0700
Subject: Bluetooth: Use list_head for HCI blacklist head

The bdaddr in the list root is completely unused and just
taking up space.

Signed-off-by: David S. Miller <davem@davemloft.net>
Tested-by: Johan Hedberg <johan.hedberg@nokia.com>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 net/bluetooth/hci_core.c  | 2 +-
 net/bluetooth/hci_sock.c  | 8 +++-----
 net/bluetooth/hci_sysfs.c | 3 +--
 3 files changed, 5 insertions(+), 8 deletions(-)

(limited to 'net')

diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c
index 8303f1c9ef54..c52f091ee6de 100644
--- a/net/bluetooth/hci_core.c
+++ b/net/bluetooth/hci_core.c
@@ -924,7 +924,7 @@ int hci_register_dev(struct hci_dev *hdev)
 
 	hci_conn_hash_init(hdev);
 
-	INIT_LIST_HEAD(&hdev->blacklist.list);
+	INIT_LIST_HEAD(&hdev->blacklist);
 
 	memset(&hdev->stat, 0, sizeof(struct hci_dev_stats));
 
diff --git a/net/bluetooth/hci_sock.c b/net/bluetooth/hci_sock.c
index 4f170a595934..83acd164d39e 100644
--- a/net/bluetooth/hci_sock.c
+++ b/net/bluetooth/hci_sock.c
@@ -168,9 +168,8 @@ static int hci_sock_release(struct socket *sock)
 struct bdaddr_list *hci_blacklist_lookup(struct hci_dev *hdev, bdaddr_t *bdaddr)
 {
 	struct list_head *p;
-	struct bdaddr_list *blacklist = &hdev->blacklist;
 
-	list_for_each(p, &blacklist->list) {
+	list_for_each(p, &hdev->blacklist) {
 		struct bdaddr_list *b;
 
 		b = list_entry(p, struct bdaddr_list, list);
@@ -202,7 +201,7 @@ static int hci_blacklist_add(struct hci_dev *hdev, void __user *arg)
 
 	bacpy(&entry->bdaddr, &bdaddr);
 
-	list_add(&entry->list, &hdev->blacklist.list);
+	list_add(&entry->list, &hdev->blacklist);
 
 	return 0;
 }
@@ -210,9 +209,8 @@ static int hci_blacklist_add(struct hci_dev *hdev, void __user *arg)
 int hci_blacklist_clear(struct hci_dev *hdev)
 {
 	struct list_head *p, *n;
-	struct bdaddr_list *blacklist = &hdev->blacklist;
 
-	list_for_each_safe(p, n, &blacklist->list) {
+	list_for_each_safe(p, n, &hdev->blacklist) {
 		struct bdaddr_list *b;
 
 		b = list_entry(p, struct bdaddr_list, list);
diff --git a/net/bluetooth/hci_sysfs.c b/net/bluetooth/hci_sysfs.c
index ce44c47eeac1..8fb967beee80 100644
--- a/net/bluetooth/hci_sysfs.c
+++ b/net/bluetooth/hci_sysfs.c
@@ -439,12 +439,11 @@ static const struct file_operations inquiry_cache_fops = {
 static int blacklist_show(struct seq_file *f, void *p)
 {
 	struct hci_dev *hdev = f->private;
-	struct bdaddr_list *blacklist = &hdev->blacklist;
 	struct list_head *l;
 
 	hci_dev_lock_bh(hdev);
 
-	list_for_each(l, &blacklist->list) {
+	list_for_each(l, &hdev->blacklist) {
 		struct bdaddr_list *b;
 		bdaddr_t bdaddr;
 
-- 
cgit v1.2.2


From 28e9509b1270e5cfa8bb3c5ff51f39214aa09262 Mon Sep 17 00:00:00 2001
From: "Gustavo F. Padovan" <padovan@profusion.mobi>
Date: Sat, 31 Jul 2010 19:57:05 -0300
Subject: Bluetooth: Remove __exit from rfcomm_cleanup_ttys()

rfcomm_cleanup_ttys() is also called from rfcomm_init(), so it can't
have __exit.

Reported-by: Mat Martineau <mathewm@codeaurora.org>
Signed-off-by: Gustavo F. Padovan <padovan@profusion.mobi>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 net/bluetooth/rfcomm/tty.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/bluetooth/rfcomm/tty.c b/net/bluetooth/rfcomm/tty.c
index 026205c18b78..befc3a52aa04 100644
--- a/net/bluetooth/rfcomm/tty.c
+++ b/net/bluetooth/rfcomm/tty.c
@@ -1183,7 +1183,7 @@ int __init rfcomm_init_ttys(void)
 	return 0;
 }
 
-void __exit rfcomm_cleanup_ttys(void)
+void rfcomm_cleanup_ttys(void)
 {
 	tty_unregister_driver(rfcomm_tty_driver);
 	put_tty_driver(rfcomm_tty_driver);
-- 
cgit v1.2.2


From 072d79a31a3b870b49886f4347e23f81b7eca3ac Mon Sep 17 00:00:00 2001
From: Changli Gao <xiaosuo@gmail.com>
Date: Thu, 29 Jul 2010 13:41:46 +0000
Subject: act_nat: fix wild pointer

pskb_may_pull() may change skb pointers, so adjust icmph after pskb_may_pull().

Signed-off-by: Changli Gao <xiaosuo@gmail.com>
Acked-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/act_nat.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'net')

diff --git a/net/sched/act_nat.c b/net/sched/act_nat.c
index 724553e8ed7b..ea008f57fc83 100644
--- a/net/sched/act_nat.c
+++ b/net/sched/act_nat.c
@@ -218,6 +218,7 @@ static int tcf_nat(struct sk_buff *skb, struct tc_action *a,
 		if (!pskb_may_pull(skb, ihl + sizeof(*icmph) + sizeof(*iph)))
 			goto drop;
 
+		icmph = (void *)(skb_network_header(skb) + ihl);
 		iph = (void *)(icmph + 1);
 		if (egress)
 			addr = iph->daddr;
-- 
cgit v1.2.2


From 3a3dfb062c2e086c202d34f09ce29634515ad256 Mon Sep 17 00:00:00 2001
From: Changli Gao <xiaosuo@gmail.com>
Date: Thu, 29 Jul 2010 14:04:18 +0000
Subject: act_nat: the checksum of ICMP doesn't have pseudo header

after updating the value of the ICMP payload, inet_proto_csum_replace4() should
be called with zero pseudohdr.

Signed-off-by: Changli Gao <xiaosuo@gmail.com>
Acked-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/act_nat.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/sched/act_nat.c b/net/sched/act_nat.c
index ea008f57fc83..abbf4fa66a0b 100644
--- a/net/sched/act_nat.c
+++ b/net/sched/act_nat.c
@@ -247,7 +247,7 @@ static int tcf_nat(struct sk_buff *skb, struct tc_action *a,
 			iph->saddr = new_addr;
 
 		inet_proto_csum_replace4(&icmph->checksum, skb, addr, new_addr,
-					 1);
+					 0);
 		break;
 	}
 	default:
-- 
cgit v1.2.2


From e2e0c7c9ddbe6b79fe647aca5eea3a405d38ada4 Mon Sep 17 00:00:00 2001
From: Julia Lawall <julia@diku.dk>
Date: Fri, 30 Jul 2010 23:56:39 +0000
Subject: net/rose: Use GFP_ATOMIC

The other calls to kmalloc in the same function use GFP_ATOMIC, and indeed
two locks are held within the body of the function.

The semantic match that finds this problem is as follows:
(http://coccinelle.lip6.fr/)

// <smpl>
@@ identifier f; @@

*f(...,GFP_ATOMIC,...)
... when != spin_unlock(...)
    when != read_unlock(...)
    when != write_unlock(...)
    when != read_unlock_irq(...)
    when != write_unlock_irq(...)
    when != read_unlock_irqrestore(...)
    when != write_unlock_irqrestore(...)
    when != spin_unlock_irq(...)
    when != spin_unlock_irqrestore(...)
*f(...,GFP_KERNEL,...)
// </smpl>

Signed-off-by: Julia Lawall <julia@diku.dk>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/rose/rose_route.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/rose/rose_route.c b/net/rose/rose_route.c
index cbc244a128bd..b4fdaac233f7 100644
--- a/net/rose/rose_route.c
+++ b/net/rose/rose_route.c
@@ -109,7 +109,9 @@ static int __must_check rose_add_node(struct rose_route_struct *rose_route,
 		init_timer(&rose_neigh->t0timer);
 
 		if (rose_route->ndigis != 0) {
-			if ((rose_neigh->digipeat = kmalloc(sizeof(ax25_digi), GFP_KERNEL)) == NULL) {
+			rose_neigh->digipeat =
+				kmalloc(sizeof(ax25_digi), GFP_ATOMIC);
+			if (rose_neigh->digipeat == NULL) {
 				kfree(rose_neigh);
 				res = -ENOMEM;
 				goto out;
-- 
cgit v1.2.2


From de38483010bae523f533bb6bf9f7b7353772f6eb Mon Sep 17 00:00:00 2001
From: Stephen Hemminger <shemminger@vyatta.com>
Date: Sun, 1 Aug 2010 00:33:23 -0700
Subject: net: ingress filter message limit

If user misconfigures ingress and causes a redirection loop, don't
overwhelm the log.  This is also a error case so make it unlikely.
Found by inspection, luckily not in real system.

Signed-off-by: Stephen Hemminger <shemminger@vyatta.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/dev.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'net')

diff --git a/net/core/dev.c b/net/core/dev.c
index b74fcd3e9365..5d1282df2fe3 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -2646,10 +2646,10 @@ static int ing_filter(struct sk_buff *skb)
 	int result = TC_ACT_OK;
 	struct Qdisc *q;
 
-	if (MAX_RED_LOOP < ttl++) {
-		printk(KERN_WARNING
-		       "Redir loop detected Dropping packet (%d->%d)\n",
-		       skb->skb_iif, dev->ifindex);
+	if (unlikely(MAX_RED_LOOP < ttl++)) {
+		if (net_ratelimit())
+			pr_warning( "Redir loop detected Dropping packet (%d->%d)\n",
+			       skb->skb_iif, dev->ifindex);
 		return TC_ACT_SHOT;
 	}
 
-- 
cgit v1.2.2


From 24b36f0193467fa727b85b4c004016a8dae999b9 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Mon, 2 Aug 2010 16:49:01 +0200
Subject: netfilter: {ip,ip6,arp}_tables: dont block bottom half more than
 necessary

We currently disable BH for the whole duration of get_counters()

On machines with a lot of cpus and large tables, this might be too long.

We can disable preemption during the whole function, and disable BH only
while fetching counters for the current cpu.

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 net/ipv4/netfilter/arp_tables.c | 10 ++++++----
 net/ipv4/netfilter/ip_tables.c  | 10 ++++++----
 net/ipv6/netfilter/ip6_tables.c | 10 ++++++----
 3 files changed, 18 insertions(+), 12 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c
index c868dd53e432..6bccba31d132 100644
--- a/net/ipv4/netfilter/arp_tables.c
+++ b/net/ipv4/netfilter/arp_tables.c
@@ -710,7 +710,7 @@ static void get_counters(const struct xt_table_info *t,
 	struct arpt_entry *iter;
 	unsigned int cpu;
 	unsigned int i;
-	unsigned int curcpu;
+	unsigned int curcpu = get_cpu();
 
 	/* Instead of clearing (by a previous call to memset())
 	 * the counters and using adds, we set the counters
@@ -720,14 +720,16 @@ static void get_counters(const struct xt_table_info *t,
 	 * if new softirq were to run and call ipt_do_table
 	 */
 	local_bh_disable();
-	curcpu = smp_processor_id();
-
 	i = 0;
 	xt_entry_foreach(iter, t->entries[curcpu], t->size) {
 		SET_COUNTER(counters[i], iter->counters.bcnt,
 			    iter->counters.pcnt);
 		++i;
 	}
+	local_bh_enable();
+	/* Processing counters from other cpus, we can let bottom half enabled,
+	 * (preemption is disabled)
+	 */
 
 	for_each_possible_cpu(cpu) {
 		if (cpu == curcpu)
@@ -741,7 +743,7 @@ static void get_counters(const struct xt_table_info *t,
 		}
 		xt_info_wrunlock(cpu);
 	}
-	local_bh_enable();
+	put_cpu();
 }
 
 static struct xt_counters *alloc_counters(const struct xt_table *table)
diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c
index 3c584a6765b0..c439721b165a 100644
--- a/net/ipv4/netfilter/ip_tables.c
+++ b/net/ipv4/netfilter/ip_tables.c
@@ -884,7 +884,7 @@ get_counters(const struct xt_table_info *t,
 	struct ipt_entry *iter;
 	unsigned int cpu;
 	unsigned int i;
-	unsigned int curcpu;
+	unsigned int curcpu = get_cpu();
 
 	/* Instead of clearing (by a previous call to memset())
 	 * the counters and using adds, we set the counters
@@ -894,14 +894,16 @@ get_counters(const struct xt_table_info *t,
 	 * if new softirq were to run and call ipt_do_table
 	 */
 	local_bh_disable();
-	curcpu = smp_processor_id();
-
 	i = 0;
 	xt_entry_foreach(iter, t->entries[curcpu], t->size) {
 		SET_COUNTER(counters[i], iter->counters.bcnt,
 			    iter->counters.pcnt);
 		++i;
 	}
+	local_bh_enable();
+	/* Processing counters from other cpus, we can let bottom half enabled,
+	 * (preemption is disabled)
+	 */
 
 	for_each_possible_cpu(cpu) {
 		if (cpu == curcpu)
@@ -915,7 +917,7 @@ get_counters(const struct xt_table_info *t,
 		}
 		xt_info_wrunlock(cpu);
 	}
-	local_bh_enable();
+	put_cpu();
 }
 
 static struct xt_counters *alloc_counters(const struct xt_table *table)
diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c
index 33113c1ea02f..5359ef4daac5 100644
--- a/net/ipv6/netfilter/ip6_tables.c
+++ b/net/ipv6/netfilter/ip6_tables.c
@@ -897,7 +897,7 @@ get_counters(const struct xt_table_info *t,
 	struct ip6t_entry *iter;
 	unsigned int cpu;
 	unsigned int i;
-	unsigned int curcpu;
+	unsigned int curcpu = get_cpu();
 
 	/* Instead of clearing (by a previous call to memset())
 	 * the counters and using adds, we set the counters
@@ -907,14 +907,16 @@ get_counters(const struct xt_table_info *t,
 	 * if new softirq were to run and call ipt_do_table
 	 */
 	local_bh_disable();
-	curcpu = smp_processor_id();
-
 	i = 0;
 	xt_entry_foreach(iter, t->entries[curcpu], t->size) {
 		SET_COUNTER(counters[i], iter->counters.bcnt,
 			    iter->counters.pcnt);
 		++i;
 	}
+	local_bh_enable();
+	/* Processing counters from other cpus, we can let bottom half enabled,
+	 * (preemption is disabled)
+	 */
 
 	for_each_possible_cpu(cpu) {
 		if (cpu == curcpu)
@@ -928,7 +930,7 @@ get_counters(const struct xt_table_info *t,
 		}
 		xt_info_wrunlock(cpu);
 	}
-	local_bh_enable();
+	put_cpu();
 }
 
 static struct xt_counters *alloc_counters(const struct xt_table *table)
-- 
cgit v1.2.2


From ee92d37861a90b8f14fa621ae5abcfb29a89aaa9 Mon Sep 17 00:00:00 2001
From: Changli Gao <xiaosuo@gmail.com>
Date: Mon, 2 Aug 2010 17:06:19 +0200
Subject: netfilter: nf_conntrack_extend: introduce __nf_ct_ext_exist()

some users of nf_ct_ext_exist() know ct->ext isn't NULL. For these users, the
check for ct->ext isn't necessary, the function __nf_ct_ext_exist() can be
used instead.

the type of the return value of nf_ct_ext_exist() is changed to bool.

Signed-off-by: Changli Gao <xiaosuo@gmail.com>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 net/netfilter/nf_conntrack_extend.c | 22 ++++++++++++----------
 1 file changed, 12 insertions(+), 10 deletions(-)

(limited to 'net')

diff --git a/net/netfilter/nf_conntrack_extend.c b/net/netfilter/nf_conntrack_extend.c
index fdc8fb4ae10f..7dcf7a404190 100644
--- a/net/netfilter/nf_conntrack_extend.c
+++ b/net/netfilter/nf_conntrack_extend.c
@@ -23,9 +23,10 @@ void __nf_ct_ext_destroy(struct nf_conn *ct)
 {
 	unsigned int i;
 	struct nf_ct_ext_type *t;
+	struct nf_ct_ext *ext = ct->ext;
 
 	for (i = 0; i < NF_CT_EXT_NUM; i++) {
-		if (!nf_ct_ext_exist(ct, i))
+		if (!__nf_ct_ext_exist(ext, i))
 			continue;
 
 		rcu_read_lock();
@@ -73,44 +74,45 @@ static void __nf_ct_ext_free_rcu(struct rcu_head *head)
 
 void *__nf_ct_ext_add(struct nf_conn *ct, enum nf_ct_ext_id id, gfp_t gfp)
 {
-	struct nf_ct_ext *new;
+	struct nf_ct_ext *old, *new;
 	int i, newlen, newoff;
 	struct nf_ct_ext_type *t;
 
 	/* Conntrack must not be confirmed to avoid races on reallocation. */
 	NF_CT_ASSERT(!nf_ct_is_confirmed(ct));
 
-	if (!ct->ext)
+	old = ct->ext;
+	if (!old)
 		return nf_ct_ext_create(&ct->ext, id, gfp);
 
-	if (nf_ct_ext_exist(ct, id))
+	if (__nf_ct_ext_exist(old, id))
 		return NULL;
 
 	rcu_read_lock();
 	t = rcu_dereference(nf_ct_ext_types[id]);
 	BUG_ON(t == NULL);
 
-	newoff = ALIGN(ct->ext->len, t->align);
+	newoff = ALIGN(old->len, t->align);
 	newlen = newoff + t->len;
 	rcu_read_unlock();
 
-	new = __krealloc(ct->ext, newlen, gfp);
+	new = __krealloc(old, newlen, gfp);
 	if (!new)
 		return NULL;
 
-	if (new != ct->ext) {
+	if (new != old) {
 		for (i = 0; i < NF_CT_EXT_NUM; i++) {
-			if (!nf_ct_ext_exist(ct, i))
+			if (!__nf_ct_ext_exist(old, i))
 				continue;
 
 			rcu_read_lock();
 			t = rcu_dereference(nf_ct_ext_types[i]);
 			if (t && t->move)
 				t->move((void *)new + new->offset[i],
-					(void *)ct->ext + ct->ext->offset[i]);
+					(void *)old + old->offset[i]);
 			rcu_read_unlock();
 		}
-		call_rcu(&ct->ext->rcu, __nf_ct_ext_free_rcu);
+		call_rcu(&old->rcu, __nf_ct_ext_free_rcu);
 		ct->ext = new;
 	}
 
-- 
cgit v1.2.2


From 2890a1573d1ae859a4d77e2fdbecacf21f96c0db Mon Sep 17 00:00:00 2001
From: Simon Horman <horms@verge.net.au>
Date: Mon, 2 Aug 2010 17:08:11 +0200
Subject: ipvs: remove EXPERIMENTAL tag

IPVS was merged into the kernel quite a long time ago and
has been seeing wide-spread production use for even longer.

It seems appropriate for it to be no longer tagged as EXPERIMENTAL

Signed-off-as: Simon Horman <horms@verge.net.au>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 net/netfilter/ipvs/Kconfig | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/netfilter/ipvs/Kconfig b/net/netfilter/ipvs/Kconfig
index be10f6526042..46a77d5c3887 100644
--- a/net/netfilter/ipvs/Kconfig
+++ b/net/netfilter/ipvs/Kconfig
@@ -26,7 +26,7 @@ if IP_VS
 
 config	IP_VS_IPV6
 	bool "IPv6 support for IPVS"
-	depends on EXPERIMENTAL && (IPV6 = y || IP_VS = IPV6)
+	depends on IPV6 = y || IP_VS = IPV6
 	---help---
 	  Add IPv6 support to IPVS. This is incomplete and might be dangerous.
 
-- 
cgit v1.2.2


From 5c0d2374a16fcb52096df914ee57720987677be5 Mon Sep 17 00:00:00 2001
From: Simon Horman <horms@verge.net.au>
Date: Mon, 2 Aug 2010 17:12:44 +0200
Subject: ipvs: provide default ip_vs_conn_{in,out}_get_proto

This removes duplicate code by providing a default implementation
which is used by 3 of the 4 modules that provide these call.

Signed-off-by: Simon Horman <horms@verge.net.au>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 net/netfilter/ipvs/ip_vs_conn.c       | 45 ++++++++++++++++++++++++++++
 net/netfilter/ipvs/ip_vs_proto_sctp.c | 53 ++-------------------------------
 net/netfilter/ipvs/ip_vs_proto_tcp.c  | 50 ++-----------------------------
 net/netfilter/ipvs/ip_vs_proto_udp.c  | 56 ++---------------------------------
 4 files changed, 51 insertions(+), 153 deletions(-)

(limited to 'net')

diff --git a/net/netfilter/ipvs/ip_vs_conn.c b/net/netfilter/ipvs/ip_vs_conn.c
index 654544e72264..b71c69a2db13 100644
--- a/net/netfilter/ipvs/ip_vs_conn.c
+++ b/net/netfilter/ipvs/ip_vs_conn.c
@@ -271,6 +271,29 @@ struct ip_vs_conn *ip_vs_conn_in_get
 	return cp;
 }
 
+struct ip_vs_conn *
+ip_vs_conn_in_get_proto(int af, const struct sk_buff *skb,
+			struct ip_vs_protocol *pp,
+			const struct ip_vs_iphdr *iph,
+			unsigned int proto_off, int inverse)
+{
+	__be16 _ports[2], *pptr;
+
+	pptr = skb_header_pointer(skb, proto_off, sizeof(_ports), _ports);
+	if (pptr == NULL)
+		return NULL;
+
+	if (likely(!inverse))
+		return ip_vs_conn_in_get(af, iph->protocol,
+					 &iph->saddr, pptr[0],
+					 &iph->daddr, pptr[1]);
+	else
+		return ip_vs_conn_in_get(af, iph->protocol,
+					 &iph->daddr, pptr[1],
+					 &iph->saddr, pptr[0]);
+}
+EXPORT_SYMBOL_GPL(ip_vs_conn_in_get_proto);
+
 /* Get reference to connection template */
 struct ip_vs_conn *ip_vs_ct_in_get
 (int af, int protocol, const union nf_inet_addr *s_addr, __be16 s_port,
@@ -356,6 +379,28 @@ struct ip_vs_conn *ip_vs_conn_out_get
 	return ret;
 }
 
+struct ip_vs_conn *
+ip_vs_conn_out_get_proto(int af, const struct sk_buff *skb,
+			 struct ip_vs_protocol *pp,
+			 const struct ip_vs_iphdr *iph,
+			 unsigned int proto_off, int inverse)
+{
+	__be16 _ports[2], *pptr;
+
+	pptr = skb_header_pointer(skb, proto_off, sizeof(_ports), _ports);
+	if (pptr == NULL)
+		return NULL;
+
+	if (likely(!inverse))
+		return ip_vs_conn_out_get(af, iph->protocol,
+					  &iph->saddr, pptr[0],
+					  &iph->daddr, pptr[1]);
+	else
+		return ip_vs_conn_out_get(af, iph->protocol,
+					  &iph->daddr, pptr[1],
+					  &iph->saddr, pptr[0]);
+}
+EXPORT_SYMBOL_GPL(ip_vs_conn_out_get_proto);
 
 /*
  *      Put back the conn and restart its timer with its timeout
diff --git a/net/netfilter/ipvs/ip_vs_proto_sctp.c b/net/netfilter/ipvs/ip_vs_proto_sctp.c
index db5575967c14..4c0855cb006e 100644
--- a/net/netfilter/ipvs/ip_vs_proto_sctp.c
+++ b/net/netfilter/ipvs/ip_vs_proto_sctp.c
@@ -8,55 +8,6 @@
 #include <net/sctp/checksum.h>
 #include <net/ip_vs.h>
 
-
-static struct ip_vs_conn *
-sctp_conn_in_get(int af,
-		 const struct sk_buff *skb,
-		 struct ip_vs_protocol *pp,
-		 const struct ip_vs_iphdr *iph,
-		 unsigned int proto_off,
-		 int inverse)
-{
-	__be16 _ports[2], *pptr;
-
-	pptr = skb_header_pointer(skb, proto_off, sizeof(_ports), _ports);
-	if (pptr == NULL)
-		return NULL;
-
-	if (likely(!inverse)) 
-		return ip_vs_conn_in_get(af, iph->protocol,
-					 &iph->saddr, pptr[0],
-					 &iph->daddr, pptr[1]);
-	else 
-		return ip_vs_conn_in_get(af, iph->protocol,
-					 &iph->daddr, pptr[1],
-					 &iph->saddr, pptr[0]);
-}
-
-static struct ip_vs_conn *
-sctp_conn_out_get(int af,
-		  const struct sk_buff *skb,
-		  struct ip_vs_protocol *pp,
-		  const struct ip_vs_iphdr *iph,
-		  unsigned int proto_off,
-		  int inverse)
-{
-	__be16 _ports[2], *pptr;
-
-	pptr = skb_header_pointer(skb, proto_off, sizeof(_ports), _ports);
-	if (pptr == NULL)
-		return NULL;
-
-	if (likely(!inverse)) 
-		return ip_vs_conn_out_get(af, iph->protocol,
-					  &iph->saddr, pptr[0],
-					  &iph->daddr, pptr[1]);
-	else 
-		return ip_vs_conn_out_get(af, iph->protocol,
-					  &iph->daddr, pptr[1],
-					  &iph->saddr, pptr[0]);
-}
-
 static int
 sctp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_protocol *pp,
 		   int *verdict, struct ip_vs_conn **cpp)
@@ -1169,8 +1120,8 @@ struct ip_vs_protocol ip_vs_protocol_sctp = {
 	.register_app = sctp_register_app,
 	.unregister_app = sctp_unregister_app,
 	.conn_schedule = sctp_conn_schedule,
-	.conn_in_get = sctp_conn_in_get,
-	.conn_out_get = sctp_conn_out_get,
+	.conn_in_get = ip_vs_conn_in_get_proto,
+	.conn_out_get = ip_vs_conn_out_get_proto,
 	.snat_handler = sctp_snat_handler,
 	.dnat_handler = sctp_dnat_handler,
 	.csum_check = sctp_csum_check,
diff --git a/net/netfilter/ipvs/ip_vs_proto_tcp.c b/net/netfilter/ipvs/ip_vs_proto_tcp.c
index 91d28e073742..282d24de8592 100644
--- a/net/netfilter/ipvs/ip_vs_proto_tcp.c
+++ b/net/netfilter/ipvs/ip_vs_proto_tcp.c
@@ -27,52 +27,6 @@
 
 #include <net/ip_vs.h>
 
-
-static struct ip_vs_conn *
-tcp_conn_in_get(int af, const struct sk_buff *skb, struct ip_vs_protocol *pp,
-		const struct ip_vs_iphdr *iph, unsigned int proto_off,
-		int inverse)
-{
-	__be16 _ports[2], *pptr;
-
-	pptr = skb_header_pointer(skb, proto_off, sizeof(_ports), _ports);
-	if (pptr == NULL)
-		return NULL;
-
-	if (likely(!inverse)) {
-		return ip_vs_conn_in_get(af, iph->protocol,
-					 &iph->saddr, pptr[0],
-					 &iph->daddr, pptr[1]);
-	} else {
-		return ip_vs_conn_in_get(af, iph->protocol,
-					 &iph->daddr, pptr[1],
-					 &iph->saddr, pptr[0]);
-	}
-}
-
-static struct ip_vs_conn *
-tcp_conn_out_get(int af, const struct sk_buff *skb, struct ip_vs_protocol *pp,
-		 const struct ip_vs_iphdr *iph, unsigned int proto_off,
-		 int inverse)
-{
-	__be16 _ports[2], *pptr;
-
-	pptr = skb_header_pointer(skb, proto_off, sizeof(_ports), _ports);
-	if (pptr == NULL)
-		return NULL;
-
-	if (likely(!inverse)) {
-		return ip_vs_conn_out_get(af, iph->protocol,
-					  &iph->saddr, pptr[0],
-					  &iph->daddr, pptr[1]);
-	} else {
-		return ip_vs_conn_out_get(af, iph->protocol,
-					  &iph->daddr, pptr[1],
-					  &iph->saddr, pptr[0]);
-	}
-}
-
-
 static int
 tcp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_protocol *pp,
 		  int *verdict, struct ip_vs_conn **cpp)
@@ -721,8 +675,8 @@ struct ip_vs_protocol ip_vs_protocol_tcp = {
 	.register_app =		tcp_register_app,
 	.unregister_app =	tcp_unregister_app,
 	.conn_schedule =	tcp_conn_schedule,
-	.conn_in_get =		tcp_conn_in_get,
-	.conn_out_get =		tcp_conn_out_get,
+	.conn_in_get =		ip_vs_conn_in_get_proto,
+	.conn_out_get =		ip_vs_conn_out_get_proto,
 	.snat_handler =		tcp_snat_handler,
 	.dnat_handler =		tcp_dnat_handler,
 	.csum_check =		tcp_csum_check,
diff --git a/net/netfilter/ipvs/ip_vs_proto_udp.c b/net/netfilter/ipvs/ip_vs_proto_udp.c
index e7a6885e0167..8553231b5d41 100644
--- a/net/netfilter/ipvs/ip_vs_proto_udp.c
+++ b/net/netfilter/ipvs/ip_vs_proto_udp.c
@@ -27,58 +27,6 @@
 #include <net/ip.h>
 #include <net/ip6_checksum.h>
 
-static struct ip_vs_conn *
-udp_conn_in_get(int af, const struct sk_buff *skb, struct ip_vs_protocol *pp,
-		const struct ip_vs_iphdr *iph, unsigned int proto_off,
-		int inverse)
-{
-	struct ip_vs_conn *cp;
-	__be16 _ports[2], *pptr;
-
-	pptr = skb_header_pointer(skb, proto_off, sizeof(_ports), _ports);
-	if (pptr == NULL)
-		return NULL;
-
-	if (likely(!inverse)) {
-		cp = ip_vs_conn_in_get(af, iph->protocol,
-				       &iph->saddr, pptr[0],
-				       &iph->daddr, pptr[1]);
-	} else {
-		cp = ip_vs_conn_in_get(af, iph->protocol,
-				       &iph->daddr, pptr[1],
-				       &iph->saddr, pptr[0]);
-	}
-
-	return cp;
-}
-
-
-static struct ip_vs_conn *
-udp_conn_out_get(int af, const struct sk_buff *skb, struct ip_vs_protocol *pp,
-		 const struct ip_vs_iphdr *iph, unsigned int proto_off,
-		 int inverse)
-{
-	struct ip_vs_conn *cp;
-	__be16 _ports[2], *pptr;
-
-	pptr = skb_header_pointer(skb, proto_off, sizeof(_ports), _ports);
-	if (pptr == NULL)
-		return NULL;
-
-	if (likely(!inverse)) {
-		cp = ip_vs_conn_out_get(af, iph->protocol,
-					&iph->saddr, pptr[0],
-					&iph->daddr, pptr[1]);
-	} else {
-		cp = ip_vs_conn_out_get(af, iph->protocol,
-					&iph->daddr, pptr[1],
-					&iph->saddr, pptr[0]);
-	}
-
-	return cp;
-}
-
-
 static int
 udp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_protocol *pp,
 		  int *verdict, struct ip_vs_conn **cpp)
@@ -520,8 +468,8 @@ struct ip_vs_protocol ip_vs_protocol_udp = {
 	.init =			udp_init,
 	.exit =			udp_exit,
 	.conn_schedule =	udp_conn_schedule,
-	.conn_in_get =		udp_conn_in_get,
-	.conn_out_get =		udp_conn_out_get,
+	.conn_in_get =		ip_vs_conn_in_get_proto,
+	.conn_out_get =		ip_vs_conn_out_get_proto,
 	.snat_handler =		udp_snat_handler,
 	.dnat_handler =		udp_dnat_handler,
 	.csum_check =		udp_csum_check,
-- 
cgit v1.2.2


From 794dbc1d712e181a9eb8d1ae799ed7c526963a7c Mon Sep 17 00:00:00 2001
From: Changli Gao <xiaosuo@gmail.com>
Date: Mon, 2 Aug 2010 17:15:30 +0200
Subject: netfilter: nf_nat: use local variable hdrlen

Use local variable hdrlen instead of ip_hdrlen(skb).

Signed-off-by: Changli Gao <xiaosuo@gmail.com>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 net/ipv4/netfilter/nf_nat_core.c | 18 +++++++-----------
 1 file changed, 7 insertions(+), 11 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/netfilter/nf_nat_core.c b/net/ipv4/netfilter/nf_nat_core.c
index 037a3a659930..8c8632d9b93c 100644
--- a/net/ipv4/netfilter/nf_nat_core.c
+++ b/net/ipv4/netfilter/nf_nat_core.c
@@ -435,7 +435,7 @@ int nf_nat_icmp_reply_translation(struct nf_conn *ct,
 	if (!skb_make_writable(skb, hdrlen + sizeof(*inside)))
 		return 0;
 
-	inside = (void *)skb->data + ip_hdrlen(skb);
+	inside = (void *)skb->data + hdrlen;
 
 	/* We're actually going to mangle it beyond trivial checksum
 	   adjustment, so make sure the current checksum is correct. */
@@ -465,12 +465,10 @@ int nf_nat_icmp_reply_translation(struct nf_conn *ct,
 	/* rcu_read_lock()ed by nf_hook_slow */
 	l4proto = __nf_ct_l4proto_find(PF_INET, inside->ip.protocol);
 
-	if (!nf_ct_get_tuple(skb,
-			     ip_hdrlen(skb) + sizeof(struct icmphdr),
-			     (ip_hdrlen(skb) +
+	if (!nf_ct_get_tuple(skb, hdrlen + sizeof(struct icmphdr),
+			     (hdrlen +
 			      sizeof(struct icmphdr) + inside->ip.ihl * 4),
-			     (u_int16_t)AF_INET,
-			     inside->ip.protocol,
+			     (u_int16_t)AF_INET, inside->ip.protocol,
 			     &inner, l3proto, l4proto))
 		return 0;
 
@@ -479,15 +477,13 @@ int nf_nat_icmp_reply_translation(struct nf_conn *ct,
 	   pass all hooks (locally-generated ICMP).  Consider incoming
 	   packet: PREROUTING (DST manip), routing produces ICMP, goes
 	   through POSTROUTING (which must correct the DST manip). */
-	if (!manip_pkt(inside->ip.protocol, skb,
-		       ip_hdrlen(skb) + sizeof(inside->icmp),
-		       &ct->tuplehash[!dir].tuple,
-		       !manip))
+	if (!manip_pkt(inside->ip.protocol, skb, hdrlen + sizeof(inside->icmp),
+		       &ct->tuplehash[!dir].tuple, !manip))
 		return 0;
 
 	if (skb->ip_summed != CHECKSUM_PARTIAL) {
 		/* Reloading "inside" here since manip_pkt inner. */
-		inside = (void *)skb->data + ip_hdrlen(skb);
+		inside = (void *)skb->data + hdrlen;
 		inside->icmp.checksum = 0;
 		inside->icmp.checksum =
 			csum_fold(skb_checksum(skb, hdrlen,
-- 
cgit v1.2.2


From f43dc98b3be36551143e3bbaf1bb3067835c24f4 Mon Sep 17 00:00:00 2001
From: Changli Gao <xiaosuo@gmail.com>
Date: Mon, 2 Aug 2010 17:20:54 +0200
Subject: netfilter: nf_nat: make unique_tuple return void

The only user of unique_tuple() get_unique_tuple() doesn't care about the
return value of unique_tuple(), so make unique_tuple() return void (nothing).

Signed-off-by: Changli Gao <xiaosuo@gmail.com>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 net/ipv4/netfilter/nf_nat_proto_common.c  | 8 ++++----
 net/ipv4/netfilter/nf_nat_proto_dccp.c    | 6 +++---
 net/ipv4/netfilter/nf_nat_proto_gre.c     | 8 ++++----
 net/ipv4/netfilter/nf_nat_proto_icmp.c    | 6 +++---
 net/ipv4/netfilter/nf_nat_proto_sctp.c    | 6 +++---
 net/ipv4/netfilter/nf_nat_proto_tcp.c     | 5 ++---
 net/ipv4/netfilter/nf_nat_proto_udp.c     | 5 ++---
 net/ipv4/netfilter/nf_nat_proto_udplite.c | 6 +++---
 net/ipv4/netfilter/nf_nat_proto_unknown.c | 4 ++--
 9 files changed, 26 insertions(+), 28 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/netfilter/nf_nat_proto_common.c b/net/ipv4/netfilter/nf_nat_proto_common.c
index 6c4f11f51446..2844a0383a11 100644
--- a/net/ipv4/netfilter/nf_nat_proto_common.c
+++ b/net/ipv4/netfilter/nf_nat_proto_common.c
@@ -34,7 +34,7 @@ bool nf_nat_proto_in_range(const struct nf_conntrack_tuple *tuple,
 }
 EXPORT_SYMBOL_GPL(nf_nat_proto_in_range);
 
-bool nf_nat_proto_unique_tuple(struct nf_conntrack_tuple *tuple,
+void nf_nat_proto_unique_tuple(struct nf_conntrack_tuple *tuple,
 			       const struct nf_nat_range *range,
 			       enum nf_nat_manip_type maniptype,
 			       const struct nf_conn *ct,
@@ -53,7 +53,7 @@ bool nf_nat_proto_unique_tuple(struct nf_conntrack_tuple *tuple,
 	if (!(range->flags & IP_NAT_RANGE_PROTO_SPECIFIED)) {
 		/* If it's dst rewrite, can't change port */
 		if (maniptype == IP_NAT_MANIP_DST)
-			return false;
+			return;
 
 		if (ntohs(*portptr) < 1024) {
 			/* Loose convention: >> 512 is credential passing */
@@ -87,9 +87,9 @@ bool nf_nat_proto_unique_tuple(struct nf_conntrack_tuple *tuple,
 			continue;
 		if (!(range->flags & IP_NAT_RANGE_PROTO_RANDOM))
 			*rover = off;
-		return true;
+		return;
 	}
-	return false;
+	return;
 }
 EXPORT_SYMBOL_GPL(nf_nat_proto_unique_tuple);
 
diff --git a/net/ipv4/netfilter/nf_nat_proto_dccp.c b/net/ipv4/netfilter/nf_nat_proto_dccp.c
index 22485ce306d4..570faf2667b2 100644
--- a/net/ipv4/netfilter/nf_nat_proto_dccp.c
+++ b/net/ipv4/netfilter/nf_nat_proto_dccp.c
@@ -22,14 +22,14 @@
 
 static u_int16_t dccp_port_rover;
 
-static bool
+static void
 dccp_unique_tuple(struct nf_conntrack_tuple *tuple,
 		  const struct nf_nat_range *range,
 		  enum nf_nat_manip_type maniptype,
 		  const struct nf_conn *ct)
 {
-	return nf_nat_proto_unique_tuple(tuple, range, maniptype, ct,
-					 &dccp_port_rover);
+	nf_nat_proto_unique_tuple(tuple, range, maniptype, ct,
+				  &dccp_port_rover);
 }
 
 static bool
diff --git a/net/ipv4/netfilter/nf_nat_proto_gre.c b/net/ipv4/netfilter/nf_nat_proto_gre.c
index d7e89201351e..89933ab6f63e 100644
--- a/net/ipv4/netfilter/nf_nat_proto_gre.c
+++ b/net/ipv4/netfilter/nf_nat_proto_gre.c
@@ -37,7 +37,7 @@ MODULE_AUTHOR("Harald Welte <laforge@gnumonks.org>");
 MODULE_DESCRIPTION("Netfilter NAT protocol helper module for GRE");
 
 /* generate unique tuple ... */
-static bool
+static void
 gre_unique_tuple(struct nf_conntrack_tuple *tuple,
 		 const struct nf_nat_range *range,
 		 enum nf_nat_manip_type maniptype,
@@ -50,7 +50,7 @@ gre_unique_tuple(struct nf_conntrack_tuple *tuple,
 	/* If there is no master conntrack we are not PPTP,
 	   do not change tuples */
 	if (!ct->master)
-		return false;
+		return;
 
 	if (maniptype == IP_NAT_MANIP_SRC)
 		keyptr = &tuple->src.u.gre.key;
@@ -71,11 +71,11 @@ gre_unique_tuple(struct nf_conntrack_tuple *tuple,
 	for (i = 0; i < range_size; i++, key++) {
 		*keyptr = htons(min + key % range_size);
 		if (!nf_nat_used_tuple(tuple, ct))
-			return true;
+			return;
 	}
 
 	pr_debug("%p: no NAT mapping\n", ct);
-	return false;
+	return;
 }
 
 /* manipulate a GRE packet according to maniptype */
diff --git a/net/ipv4/netfilter/nf_nat_proto_icmp.c b/net/ipv4/netfilter/nf_nat_proto_icmp.c
index 19a8b0b07d8e..97003fe312e3 100644
--- a/net/ipv4/netfilter/nf_nat_proto_icmp.c
+++ b/net/ipv4/netfilter/nf_nat_proto_icmp.c
@@ -27,7 +27,7 @@ icmp_in_range(const struct nf_conntrack_tuple *tuple,
 	       ntohs(tuple->src.u.icmp.id) <= ntohs(max->icmp.id);
 }
 
-static bool
+static void
 icmp_unique_tuple(struct nf_conntrack_tuple *tuple,
 		  const struct nf_nat_range *range,
 		  enum nf_nat_manip_type maniptype,
@@ -46,9 +46,9 @@ icmp_unique_tuple(struct nf_conntrack_tuple *tuple,
 		tuple->src.u.icmp.id = htons(ntohs(range->min.icmp.id) +
 					     (id % range_size));
 		if (!nf_nat_used_tuple(tuple, ct))
-			return true;
+			return;
 	}
-	return false;
+	return;
 }
 
 static bool
diff --git a/net/ipv4/netfilter/nf_nat_proto_sctp.c b/net/ipv4/netfilter/nf_nat_proto_sctp.c
index 3fc598eeeb1a..756331d42661 100644
--- a/net/ipv4/netfilter/nf_nat_proto_sctp.c
+++ b/net/ipv4/netfilter/nf_nat_proto_sctp.c
@@ -16,14 +16,14 @@
 
 static u_int16_t nf_sctp_port_rover;
 
-static bool
+static void
 sctp_unique_tuple(struct nf_conntrack_tuple *tuple,
 		  const struct nf_nat_range *range,
 		  enum nf_nat_manip_type maniptype,
 		  const struct nf_conn *ct)
 {
-	return nf_nat_proto_unique_tuple(tuple, range, maniptype, ct,
-					 &nf_sctp_port_rover);
+	nf_nat_proto_unique_tuple(tuple, range, maniptype, ct,
+				  &nf_sctp_port_rover);
 }
 
 static bool
diff --git a/net/ipv4/netfilter/nf_nat_proto_tcp.c b/net/ipv4/netfilter/nf_nat_proto_tcp.c
index 399e2cfa263b..aa460a595d5d 100644
--- a/net/ipv4/netfilter/nf_nat_proto_tcp.c
+++ b/net/ipv4/netfilter/nf_nat_proto_tcp.c
@@ -20,14 +20,13 @@
 
 static u_int16_t tcp_port_rover;
 
-static bool
+static void
 tcp_unique_tuple(struct nf_conntrack_tuple *tuple,
 		 const struct nf_nat_range *range,
 		 enum nf_nat_manip_type maniptype,
 		 const struct nf_conn *ct)
 {
-	return nf_nat_proto_unique_tuple(tuple, range, maniptype, ct,
-					 &tcp_port_rover);
+	nf_nat_proto_unique_tuple(tuple, range, maniptype, ct, &tcp_port_rover);
 }
 
 static bool
diff --git a/net/ipv4/netfilter/nf_nat_proto_udp.c b/net/ipv4/netfilter/nf_nat_proto_udp.c
index 9e61c79492e4..dfe65c7e2925 100644
--- a/net/ipv4/netfilter/nf_nat_proto_udp.c
+++ b/net/ipv4/netfilter/nf_nat_proto_udp.c
@@ -19,14 +19,13 @@
 
 static u_int16_t udp_port_rover;
 
-static bool
+static void
 udp_unique_tuple(struct nf_conntrack_tuple *tuple,
 		 const struct nf_nat_range *range,
 		 enum nf_nat_manip_type maniptype,
 		 const struct nf_conn *ct)
 {
-	return nf_nat_proto_unique_tuple(tuple, range, maniptype, ct,
-					 &udp_port_rover);
+	nf_nat_proto_unique_tuple(tuple, range, maniptype, ct, &udp_port_rover);
 }
 
 static bool
diff --git a/net/ipv4/netfilter/nf_nat_proto_udplite.c b/net/ipv4/netfilter/nf_nat_proto_udplite.c
index 440a229bbd87..3cc8c8af39ef 100644
--- a/net/ipv4/netfilter/nf_nat_proto_udplite.c
+++ b/net/ipv4/netfilter/nf_nat_proto_udplite.c
@@ -18,14 +18,14 @@
 
 static u_int16_t udplite_port_rover;
 
-static bool
+static void
 udplite_unique_tuple(struct nf_conntrack_tuple *tuple,
 		     const struct nf_nat_range *range,
 		     enum nf_nat_manip_type maniptype,
 		     const struct nf_conn *ct)
 {
-	return nf_nat_proto_unique_tuple(tuple, range, maniptype, ct,
-					 &udplite_port_rover);
+	nf_nat_proto_unique_tuple(tuple, range, maniptype, ct,
+				  &udplite_port_rover);
 }
 
 static bool
diff --git a/net/ipv4/netfilter/nf_nat_proto_unknown.c b/net/ipv4/netfilter/nf_nat_proto_unknown.c
index 14381c62acea..a50f2bc1c732 100644
--- a/net/ipv4/netfilter/nf_nat_proto_unknown.c
+++ b/net/ipv4/netfilter/nf_nat_proto_unknown.c
@@ -26,14 +26,14 @@ static bool unknown_in_range(const struct nf_conntrack_tuple *tuple,
 	return true;
 }
 
-static bool unknown_unique_tuple(struct nf_conntrack_tuple *tuple,
+static void unknown_unique_tuple(struct nf_conntrack_tuple *tuple,
 				 const struct nf_nat_range *range,
 				 enum nf_nat_manip_type maniptype,
 				 const struct nf_conn *ct)
 {
 	/* Sorry: we can't help you; if it's not unique, we can't frob
 	   anything. */
-	return false;
+	return;
 }
 
 static bool
-- 
cgit v1.2.2


From 2452a99dc0496a90abd0090c280671370c0f3e1c Mon Sep 17 00:00:00 2001
From: Changli Gao <xiaosuo@gmail.com>
Date: Mon, 2 Aug 2010 17:35:49 +0200
Subject: netfilter: nf_nat: don't check if the tuple is unique when there
 isn't any other choice

The tuple got from unique_tuple() doesn't need to be really unique, so the
check for the unique tuple isn't necessary, when there isn't any other
choice. Eliminating the unnecessary nf_nat_used_tuple() can save some CPU
cycles too.

Signed-off-by: Changli Gao <xiaosuo@gmail.com>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 net/ipv4/netfilter/nf_nat_proto_common.c | 4 ++--
 net/ipv4/netfilter/nf_nat_proto_gre.c    | 4 ++--
 net/ipv4/netfilter/nf_nat_proto_icmp.c   | 4 ++--
 3 files changed, 6 insertions(+), 6 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/netfilter/nf_nat_proto_common.c b/net/ipv4/netfilter/nf_nat_proto_common.c
index 2844a0383a11..3e61faf23a9a 100644
--- a/net/ipv4/netfilter/nf_nat_proto_common.c
+++ b/net/ipv4/netfilter/nf_nat_proto_common.c
@@ -81,9 +81,9 @@ void nf_nat_proto_unique_tuple(struct nf_conntrack_tuple *tuple,
 	else
 		off = *rover;
 
-	for (i = 0; i < range_size; i++, off++) {
+	for (i = 0; ; ++off) {
 		*portptr = htons(min + off % range_size);
-		if (nf_nat_used_tuple(tuple, ct))
+		if (++i != range_size && nf_nat_used_tuple(tuple, ct))
 			continue;
 		if (!(range->flags & IP_NAT_RANGE_PROTO_RANDOM))
 			*rover = off;
diff --git a/net/ipv4/netfilter/nf_nat_proto_gre.c b/net/ipv4/netfilter/nf_nat_proto_gre.c
index 89933ab6f63e..bc8d83a31c73 100644
--- a/net/ipv4/netfilter/nf_nat_proto_gre.c
+++ b/net/ipv4/netfilter/nf_nat_proto_gre.c
@@ -68,9 +68,9 @@ gre_unique_tuple(struct nf_conntrack_tuple *tuple,
 
 	pr_debug("min = %u, range_size = %u\n", min, range_size);
 
-	for (i = 0; i < range_size; i++, key++) {
+	for (i = 0; ; ++key) {
 		*keyptr = htons(min + key % range_size);
-		if (!nf_nat_used_tuple(tuple, ct))
+		if (++i == range_size || !nf_nat_used_tuple(tuple, ct))
 			return;
 	}
 
diff --git a/net/ipv4/netfilter/nf_nat_proto_icmp.c b/net/ipv4/netfilter/nf_nat_proto_icmp.c
index 97003fe312e3..5744c3ec847c 100644
--- a/net/ipv4/netfilter/nf_nat_proto_icmp.c
+++ b/net/ipv4/netfilter/nf_nat_proto_icmp.c
@@ -42,10 +42,10 @@ icmp_unique_tuple(struct nf_conntrack_tuple *tuple,
 	if (!(range->flags & IP_NAT_RANGE_PROTO_SPECIFIED))
 		range_size = 0xFFFF;
 
-	for (i = 0; i < range_size; i++, id++) {
+	for (i = 0; ; ++id) {
 		tuple->src.u.icmp.id = htons(ntohs(range->min.icmp.id) +
 					     (id % range_size));
-		if (!nf_nat_used_tuple(tuple, ct))
+		if (++i == range_size || !nf_nat_used_tuple(tuple, ct))
 			return;
 	}
 	return;
-- 
cgit v1.2.2


From cff6b8a9b81b404e8ce0257b26007c3afe625212 Mon Sep 17 00:00:00 2001
From: Dan Carpenter <error27@gmail.com>
Date: Sat, 10 Jul 2010 11:51:54 +0200
Subject: 9p: strlen() doesn't count the terminator

This is an off by one bug because strlen() doesn't count the NULL
terminator.  We strcpy() addr into a fixed length array of size
UNIX_PATH_MAX later on.

The addr variable is the name of the device being mounted.

Signed-off-by: Dan Carpenter <error27@gmail.com>
Signed-off-by: Eric Van Hensbergen <ericvh@gmail.com>
---
 net/9p/trans_fd.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/9p/trans_fd.c b/net/9p/trans_fd.c
index 98ce9bcb0e15..c85109d809ca 100644
--- a/net/9p/trans_fd.c
+++ b/net/9p/trans_fd.c
@@ -948,7 +948,7 @@ p9_fd_create_unix(struct p9_client *client, const char *addr, char *args)
 
 	csocket = NULL;
 
-	if (strlen(addr) > UNIX_PATH_MAX) {
+	if (strlen(addr) >= UNIX_PATH_MAX) {
 		P9_EPRINTK(KERN_ERR, "p9_trans_unix: address too long: %s\n",
 			addr);
 		return -ENAMETOOLONG;
-- 
cgit v1.2.2


From 6661481d5a8975657742c7ed40ae16bdaa7d0a6e Mon Sep 17 00:00:00 2001
From: Changli Gao <xiaosuo@gmail.com>
Date: Mon, 2 Aug 2010 17:56:28 +0200
Subject: netfilter: nf_conntrack_acct: use skb->len for accounting

use skb->len for accounting as xt_quota does. Since nf_conntrack works
at the network layer, skb_network_offset should always returns ZERO.

Signed-off-by: Changli Gao <xiaosuo@gmail.com>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 net/netfilter/nf_conntrack_core.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index 16b41b4e2a3c..df3eedb142ff 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -966,8 +966,7 @@ acct:
 		if (acct) {
 			spin_lock_bh(&ct->lock);
 			acct[CTINFO2DIR(ctinfo)].packets++;
-			acct[CTINFO2DIR(ctinfo)].bytes +=
-				skb->len - skb_network_offset(skb);
+			acct[CTINFO2DIR(ctinfo)].bytes += skb->len;
 			spin_unlock_bh(&ct->lock);
 		}
 	}
-- 
cgit v1.2.2


From 7751bdb3a095ad32dd4fcff3443cf8dd4cb1e748 Mon Sep 17 00:00:00 2001
From: Sripathi Kodi <sripathik@in.ibm.com>
Date: Fri, 4 Jun 2010 13:41:26 +0000
Subject: 9p: readdir implementation for 9p2000.L

This patch implements the kernel part of readdir() implementation for 9p2000.L

    Change from V3: Instead of inode, server now sends qids for each dirent

    SYNOPSIS

    size[4] Treaddir tag[2] fid[4] offset[8] count[4]
    size[4] Rreaddir tag[2] count[4] data[count]

    DESCRIPTION

    The readdir request asks the server to read the directory specified by 'fid'
    at an offset specified by 'offset' and return as many dirent structures as
    possible that fit into count bytes. Each dirent structure is laid out as
    follows.

            qid.type[1]
              the type of the file (directory, etc.), represented as a bit
              vector corresponding to the high 8 bits of the file's mode
              word.

            qid.vers[4]
              version number for given path

            qid.path[8]
              the file server's unique identification for the file

            offset[8]
              offset into the next dirent.

            type[1]
              type of this directory entry.

            name[256]
              name of this directory entry.

    This patch adds v9fs_dir_readdir_dotl() as the readdir() call for 9p2000.L.
    This function sends P9_TREADDIR command to the server. In response the server
    sends a buffer filled with dirent structures. This is different from the
    existing v9fs_dir_readdir() call which receives stat structures from the server.
    This results in significant speedup of readdir() on large directories.
    For example, doing 'ls >/dev/null' on a directory with 10000 files on my
    laptop takes 1.088 seconds with the existing code, but only takes 0.339 seconds
    with the new readdir.

Signed-off-by: Sripathi Kodi <sripathik@in.ibm.com>
Reviewed-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
Signed-off-by: Eric Van Hensbergen <ericvh@gmail.com>
---
 net/9p/client.c   | 47 +++++++++++++++++++++++++++++++++++++++++++++++
 net/9p/protocol.c | 27 +++++++++++++++++++++++++++
 2 files changed, 74 insertions(+)

(limited to 'net')

diff --git a/net/9p/client.c b/net/9p/client.c
index 37c8da07a80b..a80357483a47 100644
--- a/net/9p/client.c
+++ b/net/9p/client.c
@@ -1432,3 +1432,50 @@ error:
 }
 EXPORT_SYMBOL(p9_client_rename);
 
+int p9_client_readdir(struct p9_fid *fid, char *data, u32 count, u64 offset)
+{
+	int err, rsize, total;
+	struct p9_client *clnt;
+	struct p9_req_t *req;
+	char *dataptr;
+
+	P9_DPRINTK(P9_DEBUG_9P, ">>> TREADDIR fid %d offset %llu count %d\n",
+				fid->fid, (long long unsigned) offset, count);
+
+	err = 0;
+	clnt = fid->clnt;
+	total = 0;
+
+	rsize = fid->iounit;
+	if (!rsize || rsize > clnt->msize-P9_READDIRHDRSZ)
+		rsize = clnt->msize - P9_READDIRHDRSZ;
+
+	if (count < rsize)
+		rsize = count;
+
+	req = p9_client_rpc(clnt, P9_TREADDIR, "dqd", fid->fid, offset, rsize);
+	if (IS_ERR(req)) {
+		err = PTR_ERR(req);
+		goto error;
+	}
+
+	err = p9pdu_readf(req->rc, clnt->proto_version, "D", &count, &dataptr);
+	if (err) {
+		p9pdu_dump(1, req->rc);
+		goto free_and_error;
+	}
+
+	P9_DPRINTK(P9_DEBUG_9P, "<<< RREADDIR count %d\n", count);
+
+	if (data)
+		memmove(data, dataptr, count);
+
+	p9_free_req(clnt, req);
+	return count;
+
+free_and_error:
+	p9_free_req(clnt, req);
+error:
+	return err;
+}
+EXPORT_SYMBOL(p9_client_readdir);
diff --git a/net/9p/protocol.c b/net/9p/protocol.c
index 149f82160130..b645c8263538 100644
--- a/net/9p/protocol.c
+++ b/net/9p/protocol.c
@@ -580,3 +580,30 @@ void p9pdu_reset(struct p9_fcall *pdu)
 	pdu->offset = 0;
 	pdu->size = 0;
 }
+
+int p9dirent_read(char *buf, int len, struct p9_dirent *dirent,
+						int proto_version)
+{
+	struct p9_fcall fake_pdu;
+	int ret;
+	char *nameptr;
+
+	fake_pdu.size = len;
+	fake_pdu.capacity = len;
+	fake_pdu.sdata = buf;
+	fake_pdu.offset = 0;
+
+	ret = p9pdu_readf(&fake_pdu, proto_version, "Qqbs", &dirent->qid,
+			&dirent->d_off, &dirent->d_type, &nameptr);
+	if (ret) {
+		P9_DPRINTK(P9_DEBUG_9P, "<<< p9dirent_read failed: %d\n", ret);
+		p9pdu_dump(1, &fake_pdu);
+		goto out;
+	}
+
+	strcpy(dirent->d_name, nameptr);
+
+out:
+	return fake_pdu.offset;
+}
+EXPORT_SYMBOL(p9dirent_read);
-- 
cgit v1.2.2


From 69d4b4436b54c1a850d475697fda5ca117b09a5e Mon Sep 17 00:00:00 2001
From: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>
Date: Tue, 1 Jun 2010 09:26:17 +0000
Subject: net/9p: Handle the server returned error properly

We need to get the negative errno value in the kernel
even for dotl.

Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
Signed-off-by: Eric Van Hensbergen <ericvh@gmail.com>
---
 net/9p/client.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/9p/client.c b/net/9p/client.c
index a80357483a47..4ff068e98f76 100644
--- a/net/9p/client.c
+++ b/net/9p/client.c
@@ -460,7 +460,8 @@ static int p9_check_errors(struct p9_client *c, struct p9_req_t *req)
 			return err;
 		}
 
-		if (p9_is_proto_dotu(c))
+		if (p9_is_proto_dotu(c) ||
+			p9_is_proto_dotl(c))
 			err = -ecode;
 
 		if (!err || !IS_ERR_VALUE(err))
-- 
cgit v1.2.2


From f085312204f384a0277a66c3c48ba8f9edcd58f2 Mon Sep 17 00:00:00 2001
From: Sripathi Kodi <sripathik@in.ibm.com>
Date: Mon, 12 Jul 2010 20:07:23 +0530
Subject: 9p: getattr client implementation for 9P2000.L protocol.

        SYNOPSIS

              size[4] Tgetattr tag[2] fid[4] request_mask[8]

              size[4] Rgetattr tag[2] lstat[n]

           DESCRIPTION

              The getattr transaction inquires about the file identified by fid.
              request_mask is a bit mask that specifies which fields of the
              stat structure is the client interested in.

              The reply will contain a machine-independent directory entry,
              laid out as follows:

                 st_result_mask[8]
                    Bit mask that indicates which fields in the stat structure
                    have been populated by the server

                 qid.type[1]
                    the type of the file (directory, etc.), represented as a bit
                    vector corresponding to the high 8 bits of the file's mode
                    word.

                 qid.vers[4]
                    version number for given path

                 qid.path[8]
                    the file server's unique identification for the file

                 st_mode[4]
                    Permission and flags

                 st_uid[4]
                    User id of owner

                 st_gid[4]
                    Group ID of owner

                 st_nlink[8]
                    Number of hard links

                 st_rdev[8]
                    Device ID (if special file)

                 st_size[8]
                    Size, in bytes

                 st_blksize[8]
                    Block size for file system IO

                 st_blocks[8]
                    Number of file system blocks allocated

                 st_atime_sec[8]
                    Time of last access, seconds

                 st_atime_nsec[8]
                    Time of last access, nanoseconds

                 st_mtime_sec[8]
                    Time of last modification, seconds

                 st_mtime_nsec[8]
                    Time of last modification, nanoseconds

                 st_ctime_sec[8]
                    Time of last status change, seconds

                 st_ctime_nsec[8]
                    Time of last status change, nanoseconds

                 st_btime_sec[8]
                    Time of creation (birth) of file, seconds

                 st_btime_nsec[8]
                    Time of creation (birth) of file, nanoseconds

                 st_gen[8]
                    Inode generation

                 st_data_version[8]
                    Data version number

              request_mask and result_mask bit masks contain the following bits
                 #define P9_STATS_MODE          0x00000001ULL
                 #define P9_STATS_NLINK         0x00000002ULL
                 #define P9_STATS_UID           0x00000004ULL
                 #define P9_STATS_GID           0x00000008ULL
                 #define P9_STATS_RDEV          0x00000010ULL
                 #define P9_STATS_ATIME         0x00000020ULL
                 #define P9_STATS_MTIME         0x00000040ULL
                 #define P9_STATS_CTIME         0x00000080ULL
                 #define P9_STATS_INO           0x00000100ULL
                 #define P9_STATS_SIZE          0x00000200ULL
                 #define P9_STATS_BLOCKS        0x00000400ULL

                 #define P9_STATS_BTIME         0x00000800ULL
                 #define P9_STATS_GEN           0x00001000ULL
                 #define P9_STATS_DATA_VERSION  0x00002000ULL

                 #define P9_STATS_BASIC         0x000007ffULL
                 #define P9_STATS_ALL           0x00003fffULL

        This patch implements the client side of getattr implementation for
        9P2000.L. It introduces a new structure p9_stat_dotl for getting
        Linux stat information along with QID. The data layout is similar to
        stat structure in Linux user space with the following major
        differences:

        inode (st_ino) is not part of data. Instead qid is.

        device (st_dev) is not part of data because this doesn't make sense
        on the client.

        All time variables are 64 bit wide on the wire. The kernel seems to use
        32 bit variables for these variables. However, some of the architectures
        have used 64 bit variables and glibc exposes 64 bit variables to user
        space on some architectures. Hence to be on the safer side we have made
        these 64 bit in the protocol. Refer to the comments in
        include/asm-generic/stat.h

        There are some additional fields: st_btime_sec, st_btime_nsec, st_gen,
        st_data_version apart from the bitmask, st_result_mask. The bit mask
        is filled by the server to indicate which stat fields have been
        populated by the server. Currently there is no clean way for the
        server to obtain these additional fields, so it sends back just the
        basic fields.

Signed-off-by: Sripathi Kodi <sripathik@in.ibm.com>
Signed-off-by: Eric Van Hensbegren <ericvh@gmail.com>
---
 net/9p/client.c   | 59 +++++++++++++++++++++++++++++++++++++++++++++++++++++++
 net/9p/protocol.c | 28 ++++++++++++++++++++++++++
 2 files changed, 87 insertions(+)

(limited to 'net')

diff --git a/net/9p/client.c b/net/9p/client.c
index 4ff068e98f76..5e97118da3bf 100644
--- a/net/9p/client.c
+++ b/net/9p/client.c
@@ -1303,6 +1303,65 @@ error:
 }
 EXPORT_SYMBOL(p9_client_stat);
 
+struct p9_stat_dotl *p9_client_getattr_dotl(struct p9_fid *fid,
+							u64 request_mask)
+{
+	int err;
+	struct p9_client *clnt;
+	struct p9_stat_dotl *ret = kmalloc(sizeof(struct p9_stat_dotl),
+								GFP_KERNEL);
+	struct p9_req_t *req;
+
+	P9_DPRINTK(P9_DEBUG_9P, ">>> TGETATTR fid %d, request_mask %lld\n",
+							fid->fid, request_mask);
+
+	if (!ret)
+		return ERR_PTR(-ENOMEM);
+
+	err = 0;
+	clnt = fid->clnt;
+
+	req = p9_client_rpc(clnt, P9_TGETATTR, "dq", fid->fid, request_mask);
+	if (IS_ERR(req)) {
+		err = PTR_ERR(req);
+		goto error;
+	}
+
+	err = p9pdu_readf(req->rc, clnt->proto_version, "A", ret);
+	if (err) {
+		p9pdu_dump(1, req->rc);
+		p9_free_req(clnt, req);
+		goto error;
+	}
+
+	P9_DPRINTK(P9_DEBUG_9P,
+		"<<< RGETATTR st_result_mask=%lld\n"
+		"<<< qid=%x.%llx.%x\n"
+		"<<< st_mode=%8.8x st_nlink=%llu\n"
+		"<<< st_uid=%d st_gid=%d\n"
+		"<<< st_rdev=%llx st_size=%llx st_blksize=%llu st_blocks=%llu\n"
+		"<<< st_atime_sec=%lld st_atime_nsec=%lld\n"
+		"<<< st_mtime_sec=%lld st_mtime_nsec=%lld\n"
+		"<<< st_ctime_sec=%lld st_ctime_nsec=%lld\n"
+		"<<< st_btime_sec=%lld st_btime_nsec=%lld\n"
+		"<<< st_gen=%lld st_data_version=%lld",
+		ret->st_result_mask, ret->qid.type, ret->qid.path,
+		ret->qid.version, ret->st_mode, ret->st_nlink, ret->st_uid,
+		ret->st_gid, ret->st_rdev, ret->st_size, ret->st_blksize,
+		ret->st_blocks, ret->st_atime_sec, ret->st_atime_nsec,
+		ret->st_mtime_sec, ret->st_mtime_nsec, ret->st_ctime_sec,
+		ret->st_ctime_nsec, ret->st_btime_sec, ret->st_btime_nsec,
+		ret->st_gen, ret->st_data_version);
+
+	p9_free_req(clnt, req);
+	return ret;
+
+error:
+	kfree(ret);
+	return ERR_PTR(err);
+}
+EXPORT_SYMBOL(p9_client_getattr_dotl);
+
 static int p9_client_statsize(struct p9_wstat *wst, int proto_version)
 {
 	int ret;
diff --git a/net/9p/protocol.c b/net/9p/protocol.c
index b645c8263538..3e4f77695891 100644
--- a/net/9p/protocol.c
+++ b/net/9p/protocol.c
@@ -141,6 +141,7 @@ pdu_write_u(struct p9_fcall *pdu, const char __user *udata, size_t size)
 	D - data blob (int32_t size followed by void *, results are not freed)
 	T - array of strings (int16_t count, followed by strings)
 	R - array of qids (int16_t count, followed by qids)
+	A - stat for 9p2000.L (p9_stat_dotl)
 	? - if optional = 1, continue parsing
 */
 
@@ -340,6 +341,33 @@ p9pdu_vreadf(struct p9_fcall *pdu, int proto_version, const char *fmt,
 				}
 			}
 			break;
+		case 'A': {
+				struct p9_stat_dotl *stbuf =
+				    va_arg(ap, struct p9_stat_dotl *);
+
+				memset(stbuf, 0, sizeof(struct p9_stat_dotl));
+				errcode =
+				    p9pdu_readf(pdu, proto_version,
+					"qQdddqqqqqqqqqqqqqqq",
+					&stbuf->st_result_mask,
+					&stbuf->qid,
+					&stbuf->st_mode,
+					&stbuf->st_uid, &stbuf->st_gid,
+					&stbuf->st_nlink,
+					&stbuf->st_rdev, &stbuf->st_size,
+					&stbuf->st_blksize, &stbuf->st_blocks,
+					&stbuf->st_atime_sec,
+					&stbuf->st_atime_nsec,
+					&stbuf->st_mtime_sec,
+					&stbuf->st_mtime_nsec,
+					&stbuf->st_ctime_sec,
+					&stbuf->st_ctime_nsec,
+					&stbuf->st_btime_sec,
+					&stbuf->st_btime_nsec,
+					&stbuf->st_gen,
+					&stbuf->st_data_version);
+			}
+			break;
 		case '?':
 			if ((proto_version != p9_proto_2000u) &&
 				(proto_version != p9_proto_2000L))
-- 
cgit v1.2.2


From 87d7845aa0b157a62448dd3e339856f28befe1f4 Mon Sep 17 00:00:00 2001
From: Sripathi Kodi <sripathik@in.ibm.com>
Date: Fri, 18 Jun 2010 11:50:10 +0530
Subject: 9p: Implement client side of setattr for 9P2000.L protocol.

    SYNOPSIS

      size[4] Tsetattr tag[2] attr[n]

      size[4] Rsetattr tag[2]

    DESCRIPTION

      The setattr command changes some of the file status information.
      attr resembles the iattr structure used in Linux kernel. It
      specifies which status parameter is to be changed and to what
      value. It is laid out as follows:

         valid[4]
            specifies which status information is to be changed. Possible
            values are:
            ATTR_MODE       (1 << 0)
            ATTR_UID        (1 << 1)
            ATTR_GID        (1 << 2)
            ATTR_SIZE       (1 << 3)
            ATTR_ATIME      (1 << 4)
            ATTR_MTIME      (1 << 5)
            ATTR_ATIME_SET  (1 << 7)
            ATTR_MTIME_SET  (1 << 8)

            The last two bits represent whether the time information
            is being sent by the client's user space. In the absense
            of these bits the server always uses server's time.

         mode[4]
            File permission bits

         uid[4]
            Owner id of file

         gid[4]
            Group id of the file

         size[8]
            File size

         atime_sec[8]
            Time of last file access, seconds

         atime_nsec[8]
            Time of last file access, nanoseconds

         mtime_sec[8]
            Time of last file modification, seconds

         mtime_nsec[8]
            Time of last file modification, nanoseconds

Explanation of the patches:
--------------------------

*) The kernel just copies relevent contents of iattr structure to
   p9_iattr_dotl structure and passes it down to the client. The
   only check it has is calling inode_change_ok()
*) The p9_iattr_dotl structure does not have ctime and ia_file
   parameters because I don't think these are needed in our case.
   The client user space can request updating just ctime by calling
   chown(fd, -1, -1). This is handled on server side without a need
   for putting ctime on the wire.
*) The server currently supports changing mode, time, ownership and
   size of the file.
*) 9P RFC says "Either all the changes in wstat request happen, or
   none of them does: if the request succeeds, all changes were made;
   if it fails, none were."
   I have not done anything to implement this specifically because I
   don't see a reason.

Signed-off-by: Sripathi Kodi <sripathik@in.ibm.com>
Signed-off-by: Venkateswararao Jujjuri <jvrao@linux.vnet.ibm.com>
Signed-off-by: Eric Van Hensbergen <ericvh@gmail.com>
---
 net/9p/client.c   | 30 ++++++++++++++++++++++++++++++
 net/9p/protocol.c | 17 +++++++++++++++++
 2 files changed, 47 insertions(+)

(limited to 'net')

diff --git a/net/9p/client.c b/net/9p/client.c
index 5e97118da3bf..b2f70ec889c2 100644
--- a/net/9p/client.c
+++ b/net/9p/client.c
@@ -1426,6 +1426,36 @@ error:
 }
 EXPORT_SYMBOL(p9_client_wstat);
 
+int p9_client_setattr(struct p9_fid *fid, struct p9_iattr_dotl *p9attr)
+{
+	int err;
+	struct p9_req_t *req;
+	struct p9_client *clnt;
+
+	err = 0;
+	clnt = fid->clnt;
+	P9_DPRINTK(P9_DEBUG_9P, ">>> TSETATTR fid %d\n", fid->fid);
+	P9_DPRINTK(P9_DEBUG_9P,
+		"    valid=%x mode=%x uid=%d gid=%d size=%lld\n"
+		"    atime_sec=%lld atime_nsec=%lld\n"
+		"    mtime_sec=%lld mtime_nsec=%lld\n",
+		p9attr->valid, p9attr->mode, p9attr->uid, p9attr->gid,
+		p9attr->size, p9attr->atime_sec, p9attr->atime_nsec,
+		p9attr->mtime_sec, p9attr->mtime_nsec);
+
+	req = p9_client_rpc(clnt, P9_TSETATTR, "dI", fid->fid, p9attr);
+
+	if (IS_ERR(req)) {
+		err = PTR_ERR(req);
+		goto error;
+	}
+	P9_DPRINTK(P9_DEBUG_9P, "<<< RSETATTR fid %d\n", fid->fid);
+	p9_free_req(clnt, req);
+error:
+	return err;
+}
+EXPORT_SYMBOL(p9_client_setattr);
+
 int p9_client_statfs(struct p9_fid *fid, struct p9_rstatfs *sb)
 {
 	int err;
diff --git a/net/9p/protocol.c b/net/9p/protocol.c
index 3e4f77695891..3acd3afb20c8 100644
--- a/net/9p/protocol.c
+++ b/net/9p/protocol.c
@@ -516,6 +516,23 @@ p9pdu_vwritef(struct p9_fcall *pdu, int proto_version, const char *fmt,
 				}
 			}
 			break;
+		case 'I':{
+				struct p9_iattr_dotl *p9attr = va_arg(ap,
+							struct p9_iattr_dotl *);
+
+				errcode = p9pdu_writef(pdu, proto_version,
+							"ddddqqqqq",
+							p9attr->valid,
+							p9attr->mode,
+							p9attr->uid,
+							p9attr->gid,
+							p9attr->size,
+							p9attr->atime_sec,
+							p9attr->atime_nsec,
+							p9attr->mtime_sec,
+							p9attr->mtime_nsec);
+			}
+			break;
 		case '?':
 			if ((proto_version != p9_proto_2000u) &&
 				(proto_version != p9_proto_2000L))
-- 
cgit v1.2.2


From 652df9a7fd03cb47a3f663f0c08a2bd086505e9b Mon Sep 17 00:00:00 2001
From: "Venkateswararao Jujjuri (JV)" <jvrao@linux.vnet.ibm.com>
Date: Thu, 3 Jun 2010 15:16:59 -0700
Subject: 9p: Define and implement TLINK for 9P2000.L

This patch adds a helper function to get the dentry from inode and
uses it in creating a Hardlink

SYNOPSIS

size[4] Tlink tag[2] dfid[4] oldfid[4] newpath[s]

size[4] Rlink tag[2]

DESCRIPTION

Create a link 'newpath' in directory pointed by dfid linking to oldfid path.

[sripathik@in.ibm.com : p9_client_link should not free req structure
if p9_client_rpc has returned an error.]

Signed-off-by: Venkateswararao Jujjuri <jvrao@linux.vnet.ibm.com>
Signed-off-by: Eric Van Hensbergen <ericvh@gmail.com>
---
 net/9p/client.c | 19 +++++++++++++++++++
 1 file changed, 19 insertions(+)

(limited to 'net')

diff --git a/net/9p/client.c b/net/9p/client.c
index b2f70ec889c2..ad1c4489ab4d 100644
--- a/net/9p/client.c
+++ b/net/9p/client.c
@@ -1095,6 +1095,25 @@ error:
 }
 EXPORT_SYMBOL(p9_client_fcreate);
 
+int p9_client_link(struct p9_fid *dfid, struct p9_fid *oldfid, char *newname)
+{
+	struct p9_client *clnt;
+	struct p9_req_t *req;
+
+	P9_DPRINTK(P9_DEBUG_9P, ">>> TLINK dfid %d oldfid %d newname %s\n",
+			dfid->fid, oldfid->fid, newname);
+	clnt = dfid->clnt;
+	req = p9_client_rpc(clnt, P9_TLINK, "dds", dfid->fid, oldfid->fid,
+			newname);
+	if (IS_ERR(req))
+		return PTR_ERR(req);
+
+	P9_DPRINTK(P9_DEBUG_9P, "<<< RLINK\n");
+	p9_free_req(clnt, req);
+	return 0;
+}
+EXPORT_SYMBOL(p9_client_link);
+
 int p9_client_clunk(struct p9_fid *fid)
 {
 	int err;
-- 
cgit v1.2.2


From 50cc42ff3d7bc48a436c5a0413459ca7841b505f Mon Sep 17 00:00:00 2001
From: "Venkateswararao Jujjuri (JV)" <jvrao@linux.vnet.ibm.com>
Date: Wed, 9 Jun 2010 15:59:31 -0700
Subject: 9p: Define and implement TSYMLINK for 9P2000.L

Create a symbolic link

SYNOPSIS

size[4] Tsymlink tag[2] fid[4] name[s] symtgt[s] gid[4]

size[4] Rsymlink tag[2] qid[13]

DESCRIPTION

Create a symbolic link named 'name' pointing to 'symtgt'.
gid represents the effective group id of the caller.
The  permissions of a symbolic link are irrelevant hence it is omitted
from the protocol.

Signed-off-by: Venkateswararao Jujjuri <jvrao@linux.vnet.ibm.com>
Reviewed-by: Sripathi Kodi <sripathik@in.ibm.com>
Signed-off-by: Eric Van Hensbergen <ericvh@gmail.com>
---
 net/9p/client.c | 34 ++++++++++++++++++++++++++++++++++
 1 file changed, 34 insertions(+)

(limited to 'net')

diff --git a/net/9p/client.c b/net/9p/client.c
index ad1c4489ab4d..e37e64cb9394 100644
--- a/net/9p/client.c
+++ b/net/9p/client.c
@@ -1095,6 +1095,40 @@ error:
 }
 EXPORT_SYMBOL(p9_client_fcreate);
 
+int p9_client_symlink(struct p9_fid *dfid, char *name, char *symtgt, gid_t gid,
+		struct p9_qid *qid)
+{
+	int err = 0;
+	struct p9_client *clnt;
+	struct p9_req_t *req;
+
+	P9_DPRINTK(P9_DEBUG_9P, ">>> TSYMLINK dfid %d name %s  symtgt %s\n",
+			dfid->fid, name, symtgt);
+	clnt = dfid->clnt;
+
+	req = p9_client_rpc(clnt, P9_TSYMLINK, "dssd", dfid->fid, name, symtgt,
+			gid);
+	if (IS_ERR(req)) {
+		err = PTR_ERR(req);
+		goto error;
+	}
+
+	err = p9pdu_readf(req->rc, clnt->proto_version, "Q", qid);
+	if (err) {
+		p9pdu_dump(1, req->rc);
+		goto free_and_error;
+	}
+
+	P9_DPRINTK(P9_DEBUG_9P, "<<< RSYMLINK qid %x.%llx.%x\n",
+			qid->type, (unsigned long long)qid->path, qid->version);
+
+free_and_error:
+	p9_free_req(clnt, req);
+error:
+	return err;
+}
+EXPORT_SYMBOL(p9_client_symlink);
+
 int p9_client_link(struct p9_fid *dfid, struct p9_fid *oldfid, char *newname)
 {
 	struct p9_client *clnt;
-- 
cgit v1.2.2


From 4b43516ab19b748b48322937fd9307af17541c4d Mon Sep 17 00:00:00 2001
From: "M. Mohan Kumar" <mohan@in.ibm.com>
Date: Wed, 16 Jun 2010 14:27:01 +0530
Subject: 9p: Implement TMKNOD

Synopsis

    size[4] Tmknod tag[2] fid[4] name[s] mode[4] major[4] minor[4] gid[4]

    size[4] Rmknod tag[2] qid[13]

Description

    mknod asks the file server to create a device node with given major and
    minor number, mode and gid. The qid for the new device node is returned
    with the mknod reply message.

[sripathik@in.ibm.com: Fix error handling code]

Signed-off-by: M. Mohan Kumar <mohan@in.ibm.com>
Signed-off-by: Venkateswararao Jujjuri <jvrao@linux.vnet.ibm.com>
Signed-off-by: Eric Van Hensbergen <ericvh@gmail.com>
---
 net/9p/client.c | 31 +++++++++++++++++++++++++++++++
 1 file changed, 31 insertions(+)

(limited to 'net')

diff --git a/net/9p/client.c b/net/9p/client.c
index e37e64cb9394..cdfbd6740796 100644
--- a/net/9p/client.c
+++ b/net/9p/client.c
@@ -1622,3 +1622,34 @@ error:
 	return err;
 }
 EXPORT_SYMBOL(p9_client_readdir);
+
+int p9_client_mknod_dotl(struct p9_fid *fid, char *name, int mode,
+			dev_t rdev, gid_t gid, struct p9_qid *qid)
+{
+	int err;
+	struct p9_client *clnt;
+	struct p9_req_t *req;
+
+	err = 0;
+	clnt = fid->clnt;
+	P9_DPRINTK(P9_DEBUG_9P, ">>> TMKNOD fid %d name %s mode %d major %d "
+		"minor %d\n", fid->fid, name, mode, MAJOR(rdev), MINOR(rdev));
+	req = p9_client_rpc(clnt, P9_TMKNOD, "dsdddd", fid->fid, name, mode,
+		MAJOR(rdev), MINOR(rdev), gid);
+	if (IS_ERR(req))
+		return PTR_ERR(req);
+
+	err = p9pdu_readf(req->rc, clnt->proto_version, "Q", qid);
+	if (err) {
+		p9pdu_dump(1, req->rc);
+		goto error;
+	}
+	P9_DPRINTK(P9_DEBUG_9P, "<<< RMKNOD qid %x.%llx.%x\n", qid->type,
+				(unsigned long long)qid->path, qid->version);
+
+error:
+	p9_free_req(clnt, req);
+	return err;
+
+}
+EXPORT_SYMBOL(p9_client_mknod_dotl);
-- 
cgit v1.2.2


From 01a622bd7409bb7af38e784cff814e5e723f7951 Mon Sep 17 00:00:00 2001
From: "M. Mohan Kumar" <mohan@in.ibm.com>
Date: Wed, 16 Jun 2010 14:27:22 +0530
Subject: 9p: Implement TMKDIR

Implement TMKDIR as part of 2000.L Work

Synopsis

    size[4] Tmkdir tag[2] fid[4] name[s] mode[4] gid[4]

    size[4] Rmkdir tag[2] qid[13]

Description

    mkdir asks the file server to create a directory with given name,
    mode and gid. The qid for the new directory is returned with
    the mkdir reply message.

Note: 72 is selected as the opcode for TMKDIR from the reserved list.

Signed-off-by: M. Mohan Kumar <mohan@in.ibm.com>
Signed-off-by: Venkateswararao Jujjuri <jvrao@linux.vnet.ibm.com>
Signed-off-by: Eric Van Hensbergen <ericvh@gmail.com>
---
 net/9p/client.c | 31 +++++++++++++++++++++++++++++++
 1 file changed, 31 insertions(+)

(limited to 'net')

diff --git a/net/9p/client.c b/net/9p/client.c
index cdfbd6740796..a3bdd341f2ac 100644
--- a/net/9p/client.c
+++ b/net/9p/client.c
@@ -1653,3 +1653,34 @@ error:
 
 }
 EXPORT_SYMBOL(p9_client_mknod_dotl);
+
+int p9_client_mkdir_dotl(struct p9_fid *fid, char *name, int mode,
+				gid_t gid, struct p9_qid *qid)
+{
+	int err;
+	struct p9_client *clnt;
+	struct p9_req_t *req;
+
+	err = 0;
+	clnt = fid->clnt;
+	P9_DPRINTK(P9_DEBUG_9P, ">>> TMKDIR fid %d name %s mode %d gid %d\n",
+		 fid->fid, name, mode, gid);
+	req = p9_client_rpc(clnt, P9_TMKDIR, "dsdd", fid->fid, name, mode,
+		gid);
+	if (IS_ERR(req))
+		return PTR_ERR(req);
+
+	err = p9pdu_readf(req->rc, clnt->proto_version, "Q", qid);
+	if (err) {
+		p9pdu_dump(1, req->rc);
+		goto error;
+	}
+	P9_DPRINTK(P9_DEBUG_9P, "<<< RMKDIR qid %x.%llx.%x\n", qid->type,
+				(unsigned long long)qid->path, qid->version);
+
+error:
+	p9_free_req(clnt, req);
+	return err;
+
+}
+EXPORT_SYMBOL(p9_client_mkdir_dotl);
-- 
cgit v1.2.2


From 5643135a28464e7c19d8d23a9e0804697a62c84b Mon Sep 17 00:00:00 2001
From: "Venkateswararao Jujjuri (JV)" <jvrao@linux.vnet.ibm.com>
Date: Thu, 17 Jun 2010 18:27:46 -0700
Subject: fs/9p: This patch implements TLCREATE for 9p2000.L protocol.

SYNOPSIS

    size[4] Tlcreate tag[2] fid[4] name[s] flags[4] mode[4] gid[4]

    size[4] Rlcreate tag[2] qid[13] iounit[4]

DESCRIPTION

The Tlreate request asks the file server to create a new regular file with the
name supplied, in the directory (dir) represented by fid.
The mode argument specifies the permissions to use. New file is created with
the uid if the fid and with supplied gid.

The flags argument represent Linux access mode flags with which the caller
is requesting to open the file with. Protocol allows all the Linux access
modes but it is upto the server to allow/disallow any of these acess modes.
If the server doesn't support any of the access mode, it is expected to
return error.

Signed-off-by: Venkateswararao Jujjuri <jvrao@linux.vnet.ibm.com>
Signed-off-by: Eric Van Hensbergen <ericvh@gmail.com>
---
 net/9p/client.c | 44 ++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 44 insertions(+)

(limited to 'net')

diff --git a/net/9p/client.c b/net/9p/client.c
index a3bdd341f2ac..e580409b1052 100644
--- a/net/9p/client.c
+++ b/net/9p/client.c
@@ -1050,6 +1050,50 @@ error:
 }
 EXPORT_SYMBOL(p9_client_open);
 
+int p9_client_create_dotl(struct p9_fid *ofid, char *name, u32 flags, u32 mode,
+		gid_t gid, struct p9_qid *qid)
+{
+	int err = 0;
+	struct p9_client *clnt;
+	struct p9_req_t *req;
+	int iounit;
+
+	P9_DPRINTK(P9_DEBUG_9P,
+			">>> TLCREATE fid %d name %s flags %d mode %d gid %d\n",
+			ofid->fid, name, flags, mode, gid);
+	clnt = ofid->clnt;
+
+	if (ofid->mode != -1)
+		return -EINVAL;
+
+	req = p9_client_rpc(clnt, P9_TLCREATE, "dsddd", ofid->fid, name, flags,
+			mode, gid);
+	if (IS_ERR(req)) {
+		err = PTR_ERR(req);
+		goto error;
+	}
+
+	err = p9pdu_readf(req->rc, clnt->proto_version, "Qd", qid, &iounit);
+	if (err) {
+		p9pdu_dump(1, req->rc);
+		goto free_and_error;
+	}
+
+	P9_DPRINTK(P9_DEBUG_9P, "<<< RLCREATE qid %x.%llx.%x iounit %x\n",
+			qid->type,
+			(unsigned long long)qid->path,
+			qid->version, iounit);
+
+	ofid->mode = mode;
+	ofid->iounit = iounit;
+
+free_and_error:
+	p9_free_req(clnt, req);
+error:
+	return err;
+}
+EXPORT_SYMBOL(p9_client_create_dotl);
+
 int p9_client_fcreate(struct p9_fid *fid, char *name, u32 perm, int mode,
 		     char *extension)
 {
-- 
cgit v1.2.2


From ef56547efa3c88609069e2a91f46e25c31dd536e Mon Sep 17 00:00:00 2001
From: "M. Mohan Kumar" <mohan@in.ibm.com>
Date: Tue, 22 Jun 2010 19:47:50 +0530
Subject: 9p: Implement LOPEN

Implement 9p2000.L version of open(LOPEN) interface in 9p client.

For LOPEN, no need to convert the flags to and from 9p mode to VFS mode.

Synopsis:

    size[4] Tlopen tag[2] fid[4] mode[4]

    size[4] Rlopen tag[2] qid[13] iounit[4]

[Fix mode bit format - jvrao@linux.vnet.ibm.com]

Signed-off-by: M. Mohan Kumar <mohan@in.ibm.com>
Signed-off-by: Venkateswararao Jujjuri <jvrao@linux.vnet.ibm.com>
Signed-off-by: Eric Van Hensbegren <ericvh@gmail.com>
---
 net/9p/client.c | 17 ++++++++++-------
 1 file changed, 10 insertions(+), 7 deletions(-)

(limited to 'net')

diff --git a/net/9p/client.c b/net/9p/client.c
index e580409b1052..c458e042d384 100644
--- a/net/9p/client.c
+++ b/net/9p/client.c
@@ -1016,14 +1016,18 @@ int p9_client_open(struct p9_fid *fid, int mode)
 	struct p9_qid qid;
 	int iounit;
 
-	P9_DPRINTK(P9_DEBUG_9P, ">>> TOPEN fid %d mode %d\n", fid->fid, mode);
-	err = 0;
 	clnt = fid->clnt;
+	P9_DPRINTK(P9_DEBUG_9P, ">>> %s fid %d mode %d\n",
+		p9_is_proto_dotl(clnt) ? "TLOPEN" : "TOPEN", fid->fid, mode);
+	err = 0;
 
 	if (fid->mode != -1)
 		return -EINVAL;
 
-	req = p9_client_rpc(clnt, P9_TOPEN, "db", fid->fid, mode);
+	if (p9_is_proto_dotl(clnt))
+		req = p9_client_rpc(clnt, P9_TLOPEN, "dd", fid->fid, mode);
+	else
+		req = p9_client_rpc(clnt, P9_TOPEN, "db", fid->fid, mode);
 	if (IS_ERR(req)) {
 		err = PTR_ERR(req);
 		goto error;
@@ -1035,10 +1039,9 @@ int p9_client_open(struct p9_fid *fid, int mode)
 		goto free_and_error;
 	}
 
-	P9_DPRINTK(P9_DEBUG_9P, "<<< ROPEN qid %x.%llx.%x iounit %x\n",
-				qid.type,
-				(unsigned long long)qid.path,
-				qid.version, iounit);
+	P9_DPRINTK(P9_DEBUG_9P, "<<< %s qid %x.%llx.%x iounit %x\n",
+		p9_is_proto_dotl(clnt) ? "RLOPEN" : "ROPEN",  qid.type,
+		(unsigned long long)qid.path, qid.version, iounit);
 
 	fid->mode = mode;
 	fid->iounit = iounit;
-- 
cgit v1.2.2


From 0ef63f345c48afe5896c5cffcba57f0457d409b9 Mon Sep 17 00:00:00 2001
From: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>
Date: Mon, 31 May 2010 13:22:45 +0530
Subject: net/9p: Implement attrwalk 9p call

TXATTRWALK: Descend a ATTR namespace

 size[4] TXATTRWALK tag[2] fid[4] newfid[4] name[s]
 size[4] RXATTRWALK tag[2] size[8]

txattrwalk gets a fid pointing to xattr. This fid can later be
used to read the xattr value. If name is NULL the fid returned
can be used to get the list of extended attribute associated to
the file system object.

Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
Signed-off-by: Venkateswararao Jujjuri <jvrao@linux.vnet.ibm.com>
Signed-off-by: Eric Van Hensbergen <ericvh@gmail.com>
---
 net/9p/client.c | 50 ++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 50 insertions(+)

(limited to 'net')

diff --git a/net/9p/client.c b/net/9p/client.c
index c458e042d384..ec80ee71d453 100644
--- a/net/9p/client.c
+++ b/net/9p/client.c
@@ -1622,6 +1622,56 @@ error:
 }
 EXPORT_SYMBOL(p9_client_rename);
 
+/*
+ * An xattrwalk without @attr_name gives the fid for the lisxattr namespace
+ */
+struct p9_fid *p9_client_xattrwalk(struct p9_fid *file_fid,
+				const char *attr_name, u64 *attr_size)
+{
+	int err;
+	struct p9_req_t *req;
+	struct p9_client *clnt;
+	struct p9_fid *attr_fid;
+
+	err = 0;
+	clnt = file_fid->clnt;
+	attr_fid = p9_fid_create(clnt);
+	if (IS_ERR(attr_fid)) {
+		err = PTR_ERR(attr_fid);
+		attr_fid = NULL;
+		goto error;
+	}
+	P9_DPRINTK(P9_DEBUG_9P,
+		">>> TXATTRWALK file_fid %d, attr_fid %d name %s\n",
+		file_fid->fid, attr_fid->fid, attr_name);
+
+	req = p9_client_rpc(clnt, P9_TXATTRWALK, "dds",
+			file_fid->fid, attr_fid->fid, attr_name);
+	if (IS_ERR(req)) {
+		err = PTR_ERR(req);
+		goto error;
+	}
+	err = p9pdu_readf(req->rc, clnt->proto_version, "q", attr_size);
+	if (err) {
+		p9pdu_dump(1, req->rc);
+		p9_free_req(clnt, req);
+		goto clunk_fid;
+	}
+	p9_free_req(clnt, req);
+	P9_DPRINTK(P9_DEBUG_9P, "<<<  RXATTRWALK fid %d size %llu\n",
+		attr_fid->fid, *attr_size);
+	return attr_fid;
+clunk_fid:
+	p9_client_clunk(attr_fid);
+	attr_fid = NULL;
+error:
+	if (attr_fid && (attr_fid != file_fid))
+		p9_fid_destroy(attr_fid);
+
+	return ERR_PTR(err);
+}
+EXPORT_SYMBOL_GPL(p9_client_xattrwalk);
+
 int p9_client_readdir(struct p9_fid *fid, char *data, u32 count, u64 offset)
 {
 	int err, rsize, total;
-- 
cgit v1.2.2


From eda25e46161527845572131b37706a458d9270ef Mon Sep 17 00:00:00 2001
From: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>
Date: Mon, 31 May 2010 13:22:50 +0530
Subject: net/9p: Implement TXATTRCREATE 9p call

TXATTRCREATE:  Prepare a fid for setting xattr value on a file system object.

 size[4] TXATTRCREATE tag[2] fid[4] name[s] attr_size[8] flags[4]
 size[4] RXATTRCREATE tag[2]

txattrcreate gets a fid pointing to xattr. This fid can later be
used to set the xattr value.

flag value is derived from set Linux setxattr. The manpage says
"The flags parameter can be used to refine the semantics of the operation.
XATTR_CREATE specifies a pure create, which fails if the named attribute
exists already. XATTR_REPLACE specifies a pure replace operation, which
fails if the named attribute does not already exist. By default (no flags),
the extended attribute will be created if need be, or will simply replace
the value if the attribute exists."

The actual setxattr operation happens when the fid is clunked. At that point
the written byte count and the attr_size specified in TXATTRCREATE should be
same otherwise an error will be returned.

Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
Signed-off-by: Venkateswararao Jujjuri <jvrao@linux.vnet.ibm.com>
Signed-off-by: Eric Van Hensbergen <ericvh@gmail.com>
---
 net/9p/client.c | 25 +++++++++++++++++++++++++
 1 file changed, 25 insertions(+)

(limited to 'net')

diff --git a/net/9p/client.c b/net/9p/client.c
index ec80ee71d453..43396acd714a 100644
--- a/net/9p/client.c
+++ b/net/9p/client.c
@@ -1672,6 +1672,31 @@ error:
 }
 EXPORT_SYMBOL_GPL(p9_client_xattrwalk);
 
+int p9_client_xattrcreate(struct p9_fid *fid, const char *name,
+			u64 attr_size, int flags)
+{
+	int err;
+	struct p9_req_t *req;
+	struct p9_client *clnt;
+
+	P9_DPRINTK(P9_DEBUG_9P,
+		">>> TXATTRCREATE fid %d name  %s size %lld flag %d\n",
+		fid->fid, name, (long long)attr_size, flags);
+	err = 0;
+	clnt = fid->clnt;
+	req = p9_client_rpc(clnt, P9_TXATTRCREATE, "dsqd",
+			fid->fid, name, attr_size, flags);
+	if (IS_ERR(req)) {
+		err = PTR_ERR(req);
+		goto error;
+	}
+	P9_DPRINTK(P9_DEBUG_9P, "<<< RXATTRCREATE fid %d\n", fid->fid);
+	p9_free_req(clnt, req);
+error:
+	return err;
+}
+EXPORT_SYMBOL_GPL(p9_client_xattrcreate);
+
 int p9_client_readdir(struct p9_fid *fid, char *data, u32 count, u64 offset)
 {
 	int err, rsize, total;
-- 
cgit v1.2.2


From 0b1208b1aa7a555d5ae4246bab4cd76ba3f32b2d Mon Sep 17 00:00:00 2001
From: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>
Date: Fri, 2 Jul 2010 12:21:20 +0530
Subject: fs/9p: destroy fid on failed remove

9P spec says:
"It is correct to consider remove to be a clunk with the
side effect of removing the file if permissions allow. "

So even if remove fails we need to destroy the fid.

Without this patch an rmdir on a directory with contents leave
the new cloned directory fid fid attached to fidlist. On umount
we dump the fids on the fidlist

~# rmdir /mnt2/test4/
rmdir: failed to remove `/mnt2/test4/': Directory not empty
~# umount /mnt2/
~# dmesg
[  228.474323] Found fid 3 not clunked

Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
Signed-off-by: Venkateswararao Jujjuri <jvrao@linux.vnet.ibm.com>
Signed-off-by: Eric Van Hensbergen <ericvh@gmail.com>
---
 net/9p/client.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/9p/client.c b/net/9p/client.c
index 43396acd714a..dc6f2f26d023 100644
--- a/net/9p/client.c
+++ b/net/9p/client.c
@@ -1240,9 +1240,8 @@ int p9_client_remove(struct p9_fid *fid)
 	P9_DPRINTK(P9_DEBUG_9P, "<<< RREMOVE fid %d\n", fid->fid);
 
 	p9_free_req(clnt, req);
-	p9_fid_destroy(fid);
-
 error:
+	p9_fid_destroy(fid);
 	return err;
 }
 EXPORT_SYMBOL(p9_client_remove);
-- 
cgit v1.2.2


From 3c0fef0b7d36e5f8d3ea3731a8228102274e3c23 Mon Sep 17 00:00:00 2001
From: Josh Hunt <johunt@akamai.com>
Date: Fri, 30 Jul 2010 13:49:35 +0000
Subject: net: Add getsockopt support for TCP thin-streams

Initial TCP thin-stream commit did not add getsockopt support for the new
socket options: TCP_THIN_LINEAR_TIMEOUTS and TCP_THIN_DUPACK. This adds support
for them.

Signed-off-by: Josh Hunt <johunt@akamai.com>
Tested-by: Andreas Petlund <apetlund@simula.no>
Acked-by: Andreas Petlund <apetlund@simula.no>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp.c | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'net')

diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 86b9f67abede..1a700651600b 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -2601,6 +2601,12 @@ static int do_tcp_getsockopt(struct sock *sk, int level,
 			return -EFAULT;
 		return 0;
 	}
+	case TCP_THIN_LINEAR_TIMEOUTS:
+		val = tp->thin_lto;
+		break;
+	case TCP_THIN_DUPACK:
+		val = tp->thin_dupack;
+		break;
 	default:
 		return -ENOPROTOOPT;
 	}
-- 
cgit v1.2.2


From c893b8066c7bf6156e4d760e5acaf4c148e37190 Mon Sep 17 00:00:00 2001
From: Changli Gao <xiaosuo@gmail.com>
Date: Sat, 31 Jul 2010 13:25:08 +0000
Subject: ip_fragment: fix subtracting PPPOE_SES_HLEN from mtu twice

6c79bf0f2440fd250c8fce8d9b82fcf03d4e8350 subtracts PPPOE_SES_HLEN from mtu at
the front of ip_fragment(). So the later subtraction should be removed. The
MTU of 802.1q is also 1500, so MTU should not be changed.

Signed-off-by: Changli Gao <xiaosuo@gmail.com>
Signed-off-by: Bart De Schuymer <bdschuym@pandora.bo>
----
 net/ipv4/ip_output.c |    6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)
Signed-off-by: Bart De Schuymer <bdschuym@pandora.bo>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/ip_output.c | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index 6652bd9da676..04b69896df5f 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -446,7 +446,7 @@ int ip_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *))
 	int ptr;
 	struct net_device *dev;
 	struct sk_buff *skb2;
-	unsigned int mtu, hlen, left, len, ll_rs, pad;
+	unsigned int mtu, hlen, left, len, ll_rs;
 	int offset;
 	__be16 not_last_frag;
 	struct rtable *rt = skb_rtable(skb);
@@ -585,9 +585,7 @@ slow_path:
 	/* for bridged IP traffic encapsulated inside f.e. a vlan header,
 	 * we need to make room for the encapsulating header
 	 */
-	pad = nf_bridge_pad(skb);
-	ll_rs = LL_RESERVED_SPACE_EXTRA(rt->dst.dev, pad);
-	mtu -= pad;
+	ll_rs = LL_RESERVED_SPACE_EXTRA(rt->dst.dev, nf_bridge_pad(skb));
 
 	/*
 	 *	Fragment the datagram.
-- 
cgit v1.2.2


From 66d50d25502cd9b7d6e3ebbf4e241259c1283eaf Mon Sep 17 00:00:00 2001
From: stephen hemminger <shemminger@vyatta.com>
Date: Mon, 2 Aug 2010 13:44:13 +0000
Subject: u32: negative offset fix

It was possible to use a negative offset in a u32 match to reference
the ethernet header or other parts of the link layer header.
This fixes the regression caused by:

commit fbc2e7d9cf49e0bf89b9e91fd60a06851a855c5d
Author: Changli Gao <xiaosuo@gmail.com>
Date:   Wed Jun 2 07:32:42 2010 -0700

    cls_u32: use skb_header_pointer() to dereference data safely

Signed-off-by: Stephen Hemminger <shemminger@vyatta.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/cls_u32.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/sched/cls_u32.c b/net/sched/cls_u32.c
index 4f522143811e..7416a5c73b2a 100644
--- a/net/sched/cls_u32.c
+++ b/net/sched/cls_u32.c
@@ -134,10 +134,12 @@ next_knode:
 #endif
 
 		for (i = n->sel.nkeys; i>0; i--, key++) {
-			unsigned int toff;
+			int toff = off + key->off + (off2 & key->offmask);
 			__be32 *data, _data;
 
-			toff = off + key->off + (off2 & key->offmask);
+			if (skb_headroom(skb) + toff < 0)
+				goto out;
+
 			data = skb_header_pointer(skb, toff, 4, &_data);
 			if (!data)
 				goto out;
-- 
cgit v1.2.2


From a427615e0420f179eab801b929111abaadea2ed3 Mon Sep 17 00:00:00 2001
From: Changli Gao <xiaosuo@gmail.com>
Date: Mon, 2 Aug 2010 22:45:49 -0700
Subject: net: cleanup inclusion

Commit ab95bfe01f9872459c8678572ccadbf646badad0 replaces bridge and macvlan
hooks in __netif_receive_skb(), so dev.c doesn't need to include their headers.

Signed-off-by: Changli Gao <xiaosuo@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/dev.c | 2 --
 1 file changed, 2 deletions(-)

(limited to 'net')

diff --git a/net/core/dev.c b/net/core/dev.c
index 5d1282df2fe3..8c663dbf1d77 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -101,8 +101,6 @@
 #include <linux/proc_fs.h>
 #include <linux/seq_file.h>
 #include <linux/stat.h>
-#include <linux/if_bridge.h>
-#include <linux/if_macvlan.h>
 #include <net/dst.h>
 #include <net/pkt_sched.h>
 #include <net/checksum.h>
-- 
cgit v1.2.2


From 3578b0c8abc7bdb4f02152ce5db7e09d484c6866 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Tue, 3 Aug 2010 00:24:04 -0700
Subject: Revert "net: remove zap_completion_queue"

This reverts commit 15e83ed78864d0625e87a85f09b297c0919a4797.

As explained by Johannes Berg, the optimization made here is
invalid.  Or, at best, incomplete.

Not only destructor invocation, but conntract entry releasing
must be executed outside of hw IRQ context.

So just checking "skb->destructor" is insufficient.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/dev.c     |  4 +---
 net/core/netpoll.c | 31 +++++++++++++++++++++++++++++++
 2 files changed, 32 insertions(+), 3 deletions(-)

(limited to 'net')

diff --git a/net/core/dev.c b/net/core/dev.c
index 8c663dbf1d77..e1c1cdcc2bb0 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -1591,9 +1591,7 @@ EXPORT_SYMBOL(__netif_schedule);
 
 void dev_kfree_skb_irq(struct sk_buff *skb)
 {
-	if (!skb->destructor)
-		dev_kfree_skb(skb);
-	else if (atomic_dec_and_test(&skb->users)) {
+	if (atomic_dec_and_test(&skb->users)) {
 		struct softnet_data *sd;
 		unsigned long flags;
 
diff --git a/net/core/netpoll.c b/net/core/netpoll.c
index c2b7a8bed8f6..537e01afd81b 100644
--- a/net/core/netpoll.c
+++ b/net/core/netpoll.c
@@ -49,6 +49,7 @@ static atomic_t trapped;
 		(MAX_UDP_CHUNK + sizeof(struct udphdr) + \
 				sizeof(struct iphdr) + sizeof(struct ethhdr))
 
+static void zap_completion_queue(void);
 static void arp_reply(struct sk_buff *skb);
 
 static unsigned int carrier_timeout = 4;
@@ -196,6 +197,7 @@ void netpoll_poll_dev(struct net_device *dev)
 
 	service_arp_queue(dev->npinfo);
 
+	zap_completion_queue();
 }
 EXPORT_SYMBOL(netpoll_poll_dev);
 
@@ -221,11 +223,40 @@ static void refill_skbs(void)
 	spin_unlock_irqrestore(&skb_pool.lock, flags);
 }
 
+static void zap_completion_queue(void)
+{
+	unsigned long flags;
+	struct softnet_data *sd = &get_cpu_var(softnet_data);
+
+	if (sd->completion_queue) {
+		struct sk_buff *clist;
+
+		local_irq_save(flags);
+		clist = sd->completion_queue;
+		sd->completion_queue = NULL;
+		local_irq_restore(flags);
+
+		while (clist != NULL) {
+			struct sk_buff *skb = clist;
+			clist = clist->next;
+			if (skb->destructor) {
+				atomic_inc(&skb->users);
+				dev_kfree_skb_any(skb); /* put this one back */
+			} else {
+				__kfree_skb(skb);
+			}
+		}
+	}
+
+	put_cpu_var(softnet_data);
+}
+
 static struct sk_buff *find_skb(struct netpoll *np, int len, int reserve)
 {
 	int count = 0;
 	struct sk_buff *skb;
 
+	zap_completion_queue();
 	refill_skbs();
 repeat:
 
-- 
cgit v1.2.2


From cff0d6e6edac7672b3f915bb4fb59f279243b7f9 Mon Sep 17 00:00:00 2001
From: Oliver Hartkopp <socketcan@hartkopp.net>
Date: Tue, 3 Aug 2010 00:31:48 -0700
Subject: can-raw: Fix skb_orphan_try handling

Commit fc6055a5ba31e2c14e36e8939f9bf2b6d586a7f5 (net: Introduce
skb_orphan_try()) allows an early orphan of the skb and takes care on
tx timestamping, which needs the sk-reference in the skb on driver level.
So does the can-raw socket, which has not been taken into account here.

The patch below adds a 'prevent_sk_orphan' bit in the skb tx shared info,
which fixes the problem discovered by Matthias Fuchs here:

      http://marc.info/?t=128030411900003&r=1&w=2

Even if it's not a primary tx timestamp topic it fits well into some skb
shared tx context. Or should be find a different place for the information to
protect the sk reference until it reaches the driver level?

Signed-off-by: Oliver Hartkopp <socketcan@hartkopp.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/can/raw.c | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'net')

diff --git a/net/can/raw.c b/net/can/raw.c
index ccfe633eec8e..a10e3338f084 100644
--- a/net/can/raw.c
+++ b/net/can/raw.c
@@ -650,6 +650,10 @@ static int raw_sendmsg(struct kiocb *iocb, struct socket *sock,
 	err = sock_tx_timestamp(msg, sk, skb_tx(skb));
 	if (err < 0)
 		goto free_skb;
+
+	/* to be able to check the received tx sock reference in raw_rcv() */
+	skb_tx(skb)->prevent_sk_orphan = 1;
+
 	skb->dev = dev;
 	skb->sk  = sk;
 
-- 
cgit v1.2.2


From f1f88fc7e818c6678c6799a2edb8f1aeccc124aa Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Sat, 31 Jul 2010 14:29:07 -0400
Subject: SUNRPC: The function rpc_restart_call() should return success/failure

Both rpc_restart_call_prepare() and rpc_restart_call() test for the
RPC_TASK_KILLED flag, and fail to restart the RPC call if that flag is set.

This patch allows callers to know whether or not the restart was
successful, so that they can perform cleanups etc in case of failure.

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 net/sunrpc/clnt.c | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

(limited to 'net')

diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
index 756fc324db9e..234c40c15f69 100644
--- a/net/sunrpc/clnt.c
+++ b/net/sunrpc/clnt.c
@@ -756,12 +756,13 @@ EXPORT_SYMBOL_GPL(rpc_force_rebind);
  * Restart an (async) RPC call from the call_prepare state.
  * Usually called from within the exit handler.
  */
-void
+int
 rpc_restart_call_prepare(struct rpc_task *task)
 {
 	if (RPC_ASSASSINATED(task))
-		return;
+		return 0;
 	task->tk_action = rpc_prepare_task;
+	return 1;
 }
 EXPORT_SYMBOL_GPL(rpc_restart_call_prepare);
 
@@ -769,13 +770,13 @@ EXPORT_SYMBOL_GPL(rpc_restart_call_prepare);
  * Restart an (async) RPC call. Usually called from within the
  * exit handler.
  */
-void
+int
 rpc_restart_call(struct rpc_task *task)
 {
 	if (RPC_ASSASSINATED(task))
-		return;
-
+		return 0;
 	task->tk_action = call_start;
+	return 1;
 }
 EXPORT_SYMBOL_GPL(rpc_restart_call);
 
-- 
cgit v1.2.2


From 5d8d9a4d9ff74c55901642b4e2ac5124830ddafe Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Sat, 31 Jul 2010 14:29:07 -0400
Subject: NFS: Ensure the AUTH_UNIX credcache is allocated dynamically

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 net/sunrpc/auth.c         | 19 ++++++++++++++++---
 net/sunrpc/auth_generic.c | 12 +++---------
 net/sunrpc/auth_unix.c    | 15 +++++++--------
 net/sunrpc/sunrpc_syms.c  | 15 ++++++++++-----
 4 files changed, 36 insertions(+), 25 deletions(-)

(limited to 'net')

diff --git a/net/sunrpc/auth.c b/net/sunrpc/auth.c
index 73affb8624fa..db135543d21e 100644
--- a/net/sunrpc/auth.c
+++ b/net/sunrpc/auth.c
@@ -587,14 +587,27 @@ static struct shrinker rpc_cred_shrinker = {
 	.seeks = DEFAULT_SEEKS,
 };
 
-void __init rpcauth_init_module(void)
+int __init rpcauth_init_module(void)
 {
-	rpc_init_authunix();
-	rpc_init_generic_auth();
+	int err;
+
+	err = rpc_init_authunix();
+	if (err < 0)
+		goto out1;
+	err = rpc_init_generic_auth();
+	if (err < 0)
+		goto out2;
 	register_shrinker(&rpc_cred_shrinker);
+	return 0;
+out2:
+	rpc_destroy_authunix();
+out1:
+	return err;
 }
 
 void __exit rpcauth_remove_module(void)
 {
+	rpc_destroy_authunix();
+	rpc_destroy_generic_auth();
 	unregister_shrinker(&rpc_cred_shrinker);
 }
diff --git a/net/sunrpc/auth_generic.c b/net/sunrpc/auth_generic.c
index 8f623b0f03dd..8bae33b36cc6 100644
--- a/net/sunrpc/auth_generic.c
+++ b/net/sunrpc/auth_generic.c
@@ -27,7 +27,6 @@ struct generic_cred {
 };
 
 static struct rpc_auth generic_auth;
-static struct rpc_cred_cache generic_cred_cache;
 static const struct rpc_credops generic_credops;
 
 /*
@@ -159,20 +158,16 @@ out_nomatch:
 	return 0;
 }
 
-void __init rpc_init_generic_auth(void)
+int __init rpc_init_generic_auth(void)
 {
-	spin_lock_init(&generic_cred_cache.lock);
+	return rpcauth_init_credcache(&generic_auth);
 }
 
 void __exit rpc_destroy_generic_auth(void)
 {
-	rpcauth_clear_credcache(&generic_cred_cache);
+	rpcauth_destroy_credcache(&generic_auth);
 }
 
-static struct rpc_cred_cache generic_cred_cache = {
-	{{ NULL, },},
-};
-
 static const struct rpc_authops generic_auth_ops = {
 	.owner = THIS_MODULE,
 	.au_name = "Generic",
@@ -183,7 +178,6 @@ static const struct rpc_authops generic_auth_ops = {
 static struct rpc_auth generic_auth = {
 	.au_ops = &generic_auth_ops,
 	.au_count = ATOMIC_INIT(0),
-	.au_credcache = &generic_cred_cache,
 };
 
 static const struct rpc_credops generic_credops = {
diff --git a/net/sunrpc/auth_unix.c b/net/sunrpc/auth_unix.c
index aac2f8b4ee21..d5e37dbf207b 100644
--- a/net/sunrpc/auth_unix.c
+++ b/net/sunrpc/auth_unix.c
@@ -29,7 +29,6 @@ struct unx_cred {
 #endif
 
 static struct rpc_auth		unix_auth;
-static struct rpc_cred_cache	unix_cred_cache;
 static const struct rpc_credops	unix_credops;
 
 static struct rpc_auth *
@@ -203,9 +202,14 @@ unx_validate(struct rpc_task *task, __be32 *p)
 	return p;
 }
 
-void __init rpc_init_authunix(void)
+int __init rpc_init_authunix(void)
 {
-	spin_lock_init(&unix_cred_cache.lock);
+	return rpcauth_init_credcache(&unix_auth);
+}
+
+void rpc_destroy_authunix(void)
+{
+	rpcauth_destroy_credcache(&unix_auth);
 }
 
 const struct rpc_authops authunix_ops = {
@@ -218,10 +222,6 @@ const struct rpc_authops authunix_ops = {
 	.crcreate	= unx_create_cred,
 };
 
-static
-struct rpc_cred_cache	unix_cred_cache = {
-};
-
 static
 struct rpc_auth		unix_auth = {
 	.au_cslack	= UNX_WRITESLACK,
@@ -229,7 +229,6 @@ struct rpc_auth		unix_auth = {
 	.au_ops		= &authunix_ops,
 	.au_flavor	= RPC_AUTH_UNIX,
 	.au_count	= ATOMIC_INIT(0),
-	.au_credcache	= &unix_cred_cache,
 };
 
 static
diff --git a/net/sunrpc/sunrpc_syms.c b/net/sunrpc/sunrpc_syms.c
index f438347d817b..34b58f9e704a 100644
--- a/net/sunrpc/sunrpc_syms.c
+++ b/net/sunrpc/sunrpc_syms.c
@@ -33,10 +33,11 @@ init_sunrpc(void)
 	if (err)
 		goto out;
 	err = rpc_init_mempool();
-	if (err) {
-		unregister_rpc_pipefs();
-		goto out;
-	}
+	if (err)
+		goto out2;
+	err = rpcauth_init_module();
+	if (err)
+		goto out3;
 #ifdef RPC_DEBUG
 	rpc_register_sysctl();
 #endif
@@ -47,7 +48,11 @@ init_sunrpc(void)
 	cache_register(&unix_gid_cache);
 	svc_init_xprt_sock();	/* svc sock transport */
 	init_socket_xprt();	/* clnt sock transport */
-	rpcauth_init_module();
+	return 0;
+out3:
+	rpc_destroy_mempool();
+out2:
+	unregister_rpc_pipefs();
 out:
 	return err;
 }
-- 
cgit v1.2.2


From 988664a0f6bbfc356e6ce55f7a87b8594050012f Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Sat, 31 Jul 2010 14:29:07 -0400
Subject: SUNRPC: Store the hashtable size in struct rpc_cred_cache

Cleanup in preparation for allowing the user to determine the maximum hash
table size.

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 net/sunrpc/auth.c | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

(limited to 'net')

diff --git a/net/sunrpc/auth.c b/net/sunrpc/auth.c
index db135543d21e..eef76a1f1dd6 100644
--- a/net/sunrpc/auth.c
+++ b/net/sunrpc/auth.c
@@ -145,12 +145,15 @@ int
 rpcauth_init_credcache(struct rpc_auth *auth)
 {
 	struct rpc_cred_cache *new;
+	unsigned int hashsize;
 	int i;
 
 	new = kmalloc(sizeof(*new), GFP_KERNEL);
 	if (!new)
 		return -ENOMEM;
-	for (i = 0; i < RPC_CREDCACHE_NR; i++)
+	new->hashbits = RPC_CREDCACHE_HASHBITS;
+	hashsize = 1U << new->hashbits;
+	for (i = 0; i < hashsize; i++)
 		INIT_HLIST_HEAD(&new->hashtable[i]);
 	spin_lock_init(&new->lock);
 	auth->au_credcache = new;
@@ -183,11 +186,12 @@ rpcauth_clear_credcache(struct rpc_cred_cache *cache)
 	LIST_HEAD(free);
 	struct hlist_head *head;
 	struct rpc_cred	*cred;
+	unsigned int hashsize = 1U << cache->hashbits;
 	int		i;
 
 	spin_lock(&rpc_credcache_lock);
 	spin_lock(&cache->lock);
-	for (i = 0; i < RPC_CREDCACHE_NR; i++) {
+	for (i = 0; i < hashsize; i++) {
 		head = &cache->hashtable[i];
 		while (!hlist_empty(head)) {
 			cred = hlist_entry(head->first, struct rpc_cred, cr_hash);
@@ -297,7 +301,7 @@ rpcauth_lookup_credcache(struct rpc_auth *auth, struct auth_cred * acred,
 			*entry, *new;
 	unsigned int nr;
 
-	nr = hash_long(acred->uid, RPC_CREDCACHE_HASHBITS);
+	nr = hash_long(acred->uid, cache->hashbits);
 
 	rcu_read_lock();
 	hlist_for_each_entry_rcu(entry, pos, &cache->hashtable[nr], cr_hash) {
-- 
cgit v1.2.2


From 241269bd0b580faae71575443d9ab38df7469126 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Sat, 31 Jul 2010 14:29:08 -0400
Subject: SUNRPC: Make the credential cache hashtable size configurable

This patch allows the user to configure the credential cache hashtable size
using a new module parameter: auth_hashtable_size
When set, this parameter will be rounded up to the nearest power of two,
with a maximum allowed value of 1024 elements.

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 net/sunrpc/auth.c | 60 ++++++++++++++++++++++++++++++++++++++++++++++++++-----
 1 file changed, 55 insertions(+), 5 deletions(-)

(limited to 'net')

diff --git a/net/sunrpc/auth.c b/net/sunrpc/auth.c
index eef76a1f1dd6..d80f01725fc2 100644
--- a/net/sunrpc/auth.c
+++ b/net/sunrpc/auth.c
@@ -19,6 +19,15 @@
 # define RPCDBG_FACILITY	RPCDBG_AUTH
 #endif
 
+#define RPC_CREDCACHE_DEFAULT_HASHBITS	(4)
+struct rpc_cred_cache {
+	struct hlist_head	*hashtable;
+	unsigned int		hashbits;
+	spinlock_t		lock;
+};
+
+static unsigned int auth_hashbits = RPC_CREDCACHE_DEFAULT_HASHBITS;
+
 static DEFINE_SPINLOCK(rpc_authflavor_lock);
 static const struct rpc_authops *auth_flavors[RPC_AUTH_MAXFLAVOR] = {
 	&authnull_ops,		/* AUTH_NULL */
@@ -29,6 +38,42 @@ static const struct rpc_authops *auth_flavors[RPC_AUTH_MAXFLAVOR] = {
 static LIST_HEAD(cred_unused);
 static unsigned long number_cred_unused;
 
+#define MAX_HASHTABLE_BITS (10) 
+static int param_set_hashtbl_sz(const char *val, struct kernel_param *kp)
+{
+	unsigned long num;
+	unsigned int nbits;
+	int ret;
+
+	if (!val)
+		goto out_inval;
+	ret = strict_strtoul(val, 0, &num);
+	if (ret == -EINVAL)
+		goto out_inval;
+	nbits = fls(num);
+	if (num > (1U << nbits))
+		nbits++;
+	if (nbits > MAX_HASHTABLE_BITS || nbits < 2)
+		goto out_inval;
+	*(unsigned int *)kp->arg = nbits;
+	return 0;
+out_inval:
+	return -EINVAL;
+}
+
+static int param_get_hashtbl_sz(char *buffer, struct kernel_param *kp)
+{
+	unsigned int nbits;
+
+	nbits = *(unsigned int *)kp->arg;
+	return sprintf(buffer, "%u", 1U << nbits);
+}
+
+#define param_check_hashtbl_sz(name, p) __param_check(name, p, unsigned int);
+
+module_param_named(auth_hashtable_size, auth_hashbits, hashtbl_sz, 0644);
+MODULE_PARM_DESC(auth_hashtable_size, "RPC credential cache hashtable size");
+
 static u32
 pseudoflavor_to_flavor(u32 flavor) {
 	if (flavor >= RPC_AUTH_MAXFLAVOR)
@@ -146,18 +191,22 @@ rpcauth_init_credcache(struct rpc_auth *auth)
 {
 	struct rpc_cred_cache *new;
 	unsigned int hashsize;
-	int i;
 
 	new = kmalloc(sizeof(*new), GFP_KERNEL);
 	if (!new)
-		return -ENOMEM;
-	new->hashbits = RPC_CREDCACHE_HASHBITS;
+		goto out_nocache;
+	new->hashbits = auth_hashbits;
 	hashsize = 1U << new->hashbits;
-	for (i = 0; i < hashsize; i++)
-		INIT_HLIST_HEAD(&new->hashtable[i]);
+	new->hashtable = kcalloc(hashsize, sizeof(new->hashtable[0]), GFP_KERNEL);
+	if (!new->hashtable)
+		goto out_nohashtbl;
 	spin_lock_init(&new->lock);
 	auth->au_credcache = new;
 	return 0;
+out_nohashtbl:
+	kfree(new);
+out_nocache:
+	return -ENOMEM;
 }
 EXPORT_SYMBOL_GPL(rpcauth_init_credcache);
 
@@ -220,6 +269,7 @@ rpcauth_destroy_credcache(struct rpc_auth *auth)
 	if (cache) {
 		auth->au_credcache = NULL;
 		rpcauth_clear_credcache(cache);
+		kfree(cache->hashtable);
 		kfree(cache);
 	}
 }
-- 
cgit v1.2.2


From d9b6cd94601e1d17273f93a326a135fbf487a918 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Sat, 31 Jul 2010 14:29:08 -0400
Subject: SUNRPC: Ensure that rpc_exit() always wakes up a sleeping task

Make rpc_exit() non-inline, and ensure that it always wakes up a task that
has been queued.

Kill off the now unused rpc_wake_up_task().

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 net/sunrpc/sched.c | 20 +++++++++-----------
 1 file changed, 9 insertions(+), 11 deletions(-)

(limited to 'net')

diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c
index 4a843b883b89..37452762af70 100644
--- a/net/sunrpc/sched.c
+++ b/net/sunrpc/sched.c
@@ -405,14 +405,6 @@ void rpc_wake_up_queued_task(struct rpc_wait_queue *queue, struct rpc_task *task
 }
 EXPORT_SYMBOL_GPL(rpc_wake_up_queued_task);
 
-/*
- * Wake up the specified task
- */
-static void rpc_wake_up_task(struct rpc_task *task)
-{
-	rpc_wake_up_queued_task(task->tk_waitqueue, task);
-}
-
 /*
  * Wake up the next task on a priority queue.
  */
@@ -600,7 +592,15 @@ void rpc_exit_task(struct rpc_task *task)
 		}
 	}
 }
-EXPORT_SYMBOL_GPL(rpc_exit_task);
+
+void rpc_exit(struct rpc_task *task, int status)
+{
+	task->tk_status = status;
+	task->tk_action = rpc_exit_task;
+	if (RPC_IS_QUEUED(task))
+		rpc_wake_up_queued_task(task->tk_waitqueue, task);
+}
+EXPORT_SYMBOL_GPL(rpc_exit);
 
 void rpc_release_calldata(const struct rpc_call_ops *ops, void *calldata)
 {
@@ -690,7 +690,6 @@ static void __rpc_execute(struct rpc_task *task)
 			dprintk("RPC: %5u got signal\n", task->tk_pid);
 			task->tk_flags |= RPC_TASK_KILLED;
 			rpc_exit(task, -ERESTARTSYS);
-			rpc_wake_up_task(task);
 		}
 		rpc_set_running(task);
 		dprintk("RPC: %5u sync task resuming\n", task->tk_pid);
@@ -950,7 +949,6 @@ void rpc_killall_tasks(struct rpc_clnt *clnt)
 		if (!(rovr->tk_flags & RPC_TASK_KILLED)) {
 			rovr->tk_flags |= RPC_TASK_KILLED;
 			rpc_exit(rovr, -EIO);
-			rpc_wake_up_task(rovr);
 		}
 	}
 	spin_unlock(&clnt->cl_lock);
-- 
cgit v1.2.2


From 58f9612c6ea858f532021a0ce42ec53cb0a493b3 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Sat, 31 Jul 2010 14:29:08 -0400
Subject: SUNRPC: Move remaining RPC client related task initialisation into
 clnt.c

Now that rpc_run_task() is the sole entry point for RPC calls, we can move
the remaining rpc_client-related initialisation of struct rpc_task from
sched.c into clnt.c.

Also move rpc_killall_tasks() into the same file, since that too is
relative to the rpc_clnt.

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 net/sunrpc/clnt.c  | 84 ++++++++++++++++++++++++++++++++++++++++++++++++++++++
 net/sunrpc/sched.c | 77 +++----------------------------------------------
 2 files changed, 88 insertions(+), 73 deletions(-)

(limited to 'net')

diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
index 234c40c15f69..3647c81fd689 100644
--- a/net/sunrpc/clnt.c
+++ b/net/sunrpc/clnt.c
@@ -413,6 +413,35 @@ out_no_clnt:
 }
 EXPORT_SYMBOL_GPL(rpc_clone_client);
 
+/*
+ * Kill all tasks for the given client.
+ * XXX: kill their descendants as well?
+ */
+void rpc_killall_tasks(struct rpc_clnt *clnt)
+{
+	struct rpc_task	*rovr;
+
+
+	if (list_empty(&clnt->cl_tasks))
+		return;
+	dprintk("RPC:       killing all tasks for client %p\n", clnt);
+	/*
+	 * Spin lock all_tasks to prevent changes...
+	 */
+	spin_lock(&clnt->cl_lock);
+	list_for_each_entry(rovr, &clnt->cl_tasks, tk_task) {
+		if (!RPC_IS_ACTIVATED(rovr))
+			continue;
+		if (!(rovr->tk_flags & RPC_TASK_KILLED)) {
+			rovr->tk_flags |= RPC_TASK_KILLED;
+			rpc_exit(rovr, -EIO);
+			rpc_wake_up_queued_task(rovr->tk_waitqueue, rovr);
+		}
+	}
+	spin_unlock(&clnt->cl_lock);
+}
+EXPORT_SYMBOL_GPL(rpc_killall_tasks);
+
 /*
  * Properly shut down an RPC client, terminating all outstanding
  * requests.
@@ -538,6 +567,49 @@ out:
 }
 EXPORT_SYMBOL_GPL(rpc_bind_new_program);
 
+void rpc_task_release_client(struct rpc_task *task)
+{
+	struct rpc_clnt *clnt = task->tk_client;
+
+	if (clnt != NULL) {
+		/* Remove from client task list */
+		spin_lock(&clnt->cl_lock);
+		list_del(&task->tk_task);
+		spin_unlock(&clnt->cl_lock);
+		task->tk_client = NULL;
+
+		rpc_release_client(clnt);
+	}
+}
+
+static
+void rpc_task_set_client(struct rpc_task *task, struct rpc_clnt *clnt)
+{
+	if (clnt != NULL) {
+		rpc_task_release_client(task);
+		task->tk_client = clnt;
+		kref_get(&clnt->cl_kref);
+		if (clnt->cl_softrtry)
+			task->tk_flags |= RPC_TASK_SOFT;
+		/* Add to the client's list of all tasks */
+		spin_lock(&clnt->cl_lock);
+		list_add_tail(&task->tk_task, &clnt->cl_tasks);
+		spin_unlock(&clnt->cl_lock);
+	}
+}
+
+static void
+rpc_task_set_rpc_message(struct rpc_task *task, const struct rpc_message *msg)
+{
+	if (msg != NULL) {
+		task->tk_msg.rpc_proc = msg->rpc_proc;
+		task->tk_msg.rpc_argp = msg->rpc_argp;
+		task->tk_msg.rpc_resp = msg->rpc_resp;
+		/* Bind the user cred */
+		rpcauth_bindcred(task, msg->rpc_cred, task->tk_flags);
+	}
+}
+
 /*
  * Default callback for async RPC calls
  */
@@ -562,6 +634,18 @@ struct rpc_task *rpc_run_task(const struct rpc_task_setup *task_setup_data)
 	if (IS_ERR(task))
 		goto out;
 
+	rpc_task_set_client(task, task_setup_data->rpc_client);
+	rpc_task_set_rpc_message(task, task_setup_data->rpc_message);
+
+	if (task->tk_status != 0) {
+		int ret = task->tk_status;
+		rpc_put_task(task);
+		return ERR_PTR(ret);
+	}
+
+	if (task->tk_action == NULL)
+		rpc_call_start(task);
+
 	atomic_inc(&task->tk_count);
 	rpc_execute(task);
 out:
diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c
index 37452762af70..a42296db2ecd 100644
--- a/net/sunrpc/sched.c
+++ b/net/sunrpc/sched.c
@@ -246,17 +246,8 @@ static inline void rpc_task_set_debuginfo(struct rpc_task *task)
 
 static void rpc_set_active(struct rpc_task *task)
 {
-	struct rpc_clnt *clnt;
-	if (test_and_set_bit(RPC_TASK_ACTIVE, &task->tk_runstate) != 0)
-		return;
 	rpc_task_set_debuginfo(task);
-	/* Add to global list of all tasks */
-	clnt = task->tk_client;
-	if (clnt != NULL) {
-		spin_lock(&clnt->cl_lock);
-		list_add_tail(&task->tk_task, &clnt->cl_tasks);
-		spin_unlock(&clnt->cl_lock);
-	}
+	set_bit(RPC_TASK_ACTIVE, &task->tk_runstate);
 }
 
 /*
@@ -319,11 +310,6 @@ static void __rpc_sleep_on(struct rpc_wait_queue *q, struct rpc_task *task,
 	dprintk("RPC: %5u sleep_on(queue \"%s\" time %lu)\n",
 			task->tk_pid, rpc_qname(q), jiffies);
 
-	if (!RPC_IS_ASYNC(task) && !RPC_IS_ACTIVATED(task)) {
-		printk(KERN_ERR "RPC: Inactive synchronous task put to sleep!\n");
-		return;
-	}
-
 	__rpc_add_wait_queue(q, task);
 
 	BUG_ON(task->tk_callback != NULL);
@@ -334,8 +320,8 @@ static void __rpc_sleep_on(struct rpc_wait_queue *q, struct rpc_task *task,
 void rpc_sleep_on(struct rpc_wait_queue *q, struct rpc_task *task,
 				rpc_action action)
 {
-	/* Mark the task as being activated if so needed */
-	rpc_set_active(task);
+	/* We shouldn't ever put an inactive task to sleep */
+	BUG_ON(!RPC_IS_ACTIVATED(task));
 
 	/*
 	 * Protect the queue operations.
@@ -807,26 +793,9 @@ static void rpc_init_task(struct rpc_task *task, const struct rpc_task_setup *ta
 	/* Initialize workqueue for async tasks */
 	task->tk_workqueue = task_setup_data->workqueue;
 
-	task->tk_client = task_setup_data->rpc_client;
-	if (task->tk_client != NULL) {
-		kref_get(&task->tk_client->cl_kref);
-		if (task->tk_client->cl_softrtry)
-			task->tk_flags |= RPC_TASK_SOFT;
-	}
-
 	if (task->tk_ops->rpc_call_prepare != NULL)
 		task->tk_action = rpc_prepare_task;
 
-	if (task_setup_data->rpc_message != NULL) {
-		task->tk_msg.rpc_proc = task_setup_data->rpc_message->rpc_proc;
-		task->tk_msg.rpc_argp = task_setup_data->rpc_message->rpc_argp;
-		task->tk_msg.rpc_resp = task_setup_data->rpc_message->rpc_resp;
-		/* Bind the user cred */
-		rpcauth_bindcred(task, task_setup_data->rpc_message->rpc_cred, task_setup_data->flags);
-		if (task->tk_action == NULL)
-			rpc_call_start(task);
-	}
-
 	/* starting timestamp */
 	task->tk_start = ktime_get();
 
@@ -896,10 +865,7 @@ void rpc_put_task(struct rpc_task *task)
 		xprt_release(task);
 	if (task->tk_msg.rpc_cred)
 		rpcauth_unbindcred(task);
-	if (task->tk_client) {
-		rpc_release_client(task->tk_client);
-		task->tk_client = NULL;
-	}
+	rpc_task_release_client(task);
 	if (task->tk_workqueue != NULL) {
 		INIT_WORK(&task->u.tk_work, rpc_async_release);
 		queue_work(task->tk_workqueue, &task->u.tk_work);
@@ -912,13 +878,6 @@ static void rpc_release_task(struct rpc_task *task)
 {
 	dprintk("RPC: %5u release task\n", task->tk_pid);
 
-	if (!list_empty(&task->tk_task)) {
-		struct rpc_clnt *clnt = task->tk_client;
-		/* Remove from client task list */
-		spin_lock(&clnt->cl_lock);
-		list_del(&task->tk_task);
-		spin_unlock(&clnt->cl_lock);
-	}
 	BUG_ON (RPC_IS_QUEUED(task));
 
 	/* Wake up anyone who is waiting for task completion */
@@ -927,34 +886,6 @@ static void rpc_release_task(struct rpc_task *task)
 	rpc_put_task(task);
 }
 
-/*
- * Kill all tasks for the given client.
- * XXX: kill their descendants as well?
- */
-void rpc_killall_tasks(struct rpc_clnt *clnt)
-{
-	struct rpc_task	*rovr;
-
-
-	if (list_empty(&clnt->cl_tasks))
-		return;
-	dprintk("RPC:       killing all tasks for client %p\n", clnt);
-	/*
-	 * Spin lock all_tasks to prevent changes...
-	 */
-	spin_lock(&clnt->cl_lock);
-	list_for_each_entry(rovr, &clnt->cl_tasks, tk_task) {
-		if (! RPC_IS_ACTIVATED(rovr))
-			continue;
-		if (!(rovr->tk_flags & RPC_TASK_KILLED)) {
-			rovr->tk_flags |= RPC_TASK_KILLED;
-			rpc_exit(rovr, -EIO);
-		}
-	}
-	spin_unlock(&clnt->cl_lock);
-}
-EXPORT_SYMBOL_GPL(rpc_killall_tasks);
-
 int rpciod_up(void)
 {
 	return try_module_get(THIS_MODULE) ? 0 : -EINVAL;
-- 
cgit v1.2.2


From 8572b8e2e3c5f3d990122348c4d2c64dad338611 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Sat, 31 Jul 2010 14:29:08 -0400
Subject: SUNRPC: Clean up of rpc_bindcred()

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 net/sunrpc/auth.c         | 37 +++++++++++++++++--------------------
 net/sunrpc/auth_generic.c | 11 +++--------
 net/sunrpc/clnt.c         |  2 +-
 3 files changed, 21 insertions(+), 29 deletions(-)

(limited to 'net')

diff --git a/net/sunrpc/auth.c b/net/sunrpc/auth.c
index d80f01725fc2..d8968faf5ccf 100644
--- a/net/sunrpc/auth.c
+++ b/net/sunrpc/auth.c
@@ -444,16 +444,16 @@ rpcauth_init_cred(struct rpc_cred *cred, const struct auth_cred *acred,
 }
 EXPORT_SYMBOL_GPL(rpcauth_init_cred);
 
-void
+struct rpc_cred *
 rpcauth_generic_bind_cred(struct rpc_task *task, struct rpc_cred *cred, int lookupflags)
 {
-	task->tk_msg.rpc_cred = get_rpccred(cred);
 	dprintk("RPC: %5u holding %s cred %p\n", task->tk_pid,
 			cred->cr_auth->au_ops->au_name, cred);
+	return get_rpccred(cred);
 }
 EXPORT_SYMBOL_GPL(rpcauth_generic_bind_cred);
 
-static void
+static struct rpc_cred *
 rpcauth_bind_root_cred(struct rpc_task *task, int lookupflags)
 {
 	struct rpc_auth *auth = task->tk_client->cl_auth;
@@ -461,45 +461,42 @@ rpcauth_bind_root_cred(struct rpc_task *task, int lookupflags)
 		.uid = 0,
 		.gid = 0,
 	};
-	struct rpc_cred *ret;
 
 	dprintk("RPC: %5u looking up %s cred\n",
 		task->tk_pid, task->tk_client->cl_auth->au_ops->au_name);
-	ret = auth->au_ops->lookup_cred(auth, &acred, lookupflags);
-	if (!IS_ERR(ret))
-		task->tk_msg.rpc_cred = ret;
-	else
-		task->tk_status = PTR_ERR(ret);
+	return auth->au_ops->lookup_cred(auth, &acred, lookupflags);
 }
 
-static void
+static struct rpc_cred *
 rpcauth_bind_new_cred(struct rpc_task *task, int lookupflags)
 {
 	struct rpc_auth *auth = task->tk_client->cl_auth;
-	struct rpc_cred *ret;
 
 	dprintk("RPC: %5u looking up %s cred\n",
 		task->tk_pid, auth->au_ops->au_name);
-	ret = rpcauth_lookupcred(auth, lookupflags);
-	if (!IS_ERR(ret))
-		task->tk_msg.rpc_cred = ret;
-	else
-		task->tk_status = PTR_ERR(ret);
+	return rpcauth_lookupcred(auth, lookupflags);
 }
 
-void
+int
 rpcauth_bindcred(struct rpc_task *task, struct rpc_cred *cred, int flags)
 {
+	struct rpc_cred *new;
 	int lookupflags = 0;
 
 	if (flags & RPC_TASK_ASYNC)
 		lookupflags |= RPCAUTH_LOOKUP_NEW;
 	if (cred != NULL)
-		cred->cr_ops->crbind(task, cred, lookupflags);
+		new = cred->cr_ops->crbind(task, cred, lookupflags);
 	else if (flags & RPC_TASK_ROOTCREDS)
-		rpcauth_bind_root_cred(task, lookupflags);
+		new = rpcauth_bind_root_cred(task, lookupflags);
 	else
-		rpcauth_bind_new_cred(task, lookupflags);
+		new = rpcauth_bind_new_cred(task, lookupflags);
+	if (IS_ERR(new))
+		return PTR_ERR(new);
+	if (task->tk_msg.rpc_cred != NULL)
+		put_rpccred(task->tk_msg.rpc_cred);
+	task->tk_msg.rpc_cred = new;
+	return 0;
 }
 
 void
diff --git a/net/sunrpc/auth_generic.c b/net/sunrpc/auth_generic.c
index 8bae33b36cc6..43162bb3b78f 100644
--- a/net/sunrpc/auth_generic.c
+++ b/net/sunrpc/auth_generic.c
@@ -54,18 +54,13 @@ struct rpc_cred *rpc_lookup_machine_cred(void)
 }
 EXPORT_SYMBOL_GPL(rpc_lookup_machine_cred);
 
-static void
-generic_bind_cred(struct rpc_task *task, struct rpc_cred *cred, int lookupflags)
+static struct rpc_cred *generic_bind_cred(struct rpc_task *task,
+		struct rpc_cred *cred, int lookupflags)
 {
 	struct rpc_auth *auth = task->tk_client->cl_auth;
 	struct auth_cred *acred = &container_of(cred, struct generic_cred, gc_base)->acred;
-	struct rpc_cred *ret;
 
-	ret = auth->au_ops->lookup_cred(auth, acred, lookupflags);
-	if (!IS_ERR(ret))
-		task->tk_msg.rpc_cred = ret;
-	else
-		task->tk_status = PTR_ERR(ret);
+	return auth->au_ops->lookup_cred(auth, acred, lookupflags);
 }
 
 /*
diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
index 3647c81fd689..f34b5e3823c0 100644
--- a/net/sunrpc/clnt.c
+++ b/net/sunrpc/clnt.c
@@ -606,7 +606,7 @@ rpc_task_set_rpc_message(struct rpc_task *task, const struct rpc_message *msg)
 		task->tk_msg.rpc_argp = msg->rpc_argp;
 		task->tk_msg.rpc_resp = msg->rpc_resp;
 		/* Bind the user cred */
-		rpcauth_bindcred(task, msg->rpc_cred, task->tk_flags);
+		task->tk_status = rpcauth_bindcred(task, msg->rpc_cred, task->tk_flags);
 	}
 }
 
-- 
cgit v1.2.2


From a17c2153d2e271b0cbacae9bed83b0eaa41db7e1 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Sat, 31 Jul 2010 14:29:08 -0400
Subject: SUNRPC: Move the bound cred to struct rpc_rqst

This will allow us to save the original generic cred in rpc_message, so
that if we migrate from one server to another, we can generate a new bound
cred without having to punt back to the NFS layer.

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 net/sunrpc/auth.c              | 43 ++++++++++----------
 net/sunrpc/auth_gss/auth_gss.c | 22 +++++-----
 net/sunrpc/auth_null.c         |  2 +-
 net/sunrpc/auth_unix.c         |  6 +--
 net/sunrpc/clnt.c              | 91 +++++++++++++++++++++---------------------
 net/sunrpc/sched.c             |  2 +-
 net/sunrpc/xprt.c              |  2 +
 7 files changed, 83 insertions(+), 85 deletions(-)

(limited to 'net')

diff --git a/net/sunrpc/auth.c b/net/sunrpc/auth.c
index d8968faf5ccf..95721426296d 100644
--- a/net/sunrpc/auth.c
+++ b/net/sunrpc/auth.c
@@ -477,9 +477,10 @@ rpcauth_bind_new_cred(struct rpc_task *task, int lookupflags)
 	return rpcauth_lookupcred(auth, lookupflags);
 }
 
-int
+static int
 rpcauth_bindcred(struct rpc_task *task, struct rpc_cred *cred, int flags)
 {
+	struct rpc_rqst *req = task->tk_rqstp;
 	struct rpc_cred *new;
 	int lookupflags = 0;
 
@@ -493,9 +494,9 @@ rpcauth_bindcred(struct rpc_task *task, struct rpc_cred *cred, int flags)
 		new = rpcauth_bind_new_cred(task, lookupflags);
 	if (IS_ERR(new))
 		return PTR_ERR(new);
-	if (task->tk_msg.rpc_cred != NULL)
-		put_rpccred(task->tk_msg.rpc_cred);
-	task->tk_msg.rpc_cred = new;
+	if (req->rq_cred != NULL)
+		put_rpccred(req->rq_cred);
+	req->rq_cred = new;
 	return 0;
 }
 
@@ -535,22 +536,10 @@ out_nodestroy:
 }
 EXPORT_SYMBOL_GPL(put_rpccred);
 
-void
-rpcauth_unbindcred(struct rpc_task *task)
-{
-	struct rpc_cred	*cred = task->tk_msg.rpc_cred;
-
-	dprintk("RPC: %5u releasing %s cred %p\n",
-		task->tk_pid, cred->cr_auth->au_ops->au_name, cred);
-
-	put_rpccred(cred);
-	task->tk_msg.rpc_cred = NULL;
-}
-
 __be32 *
 rpcauth_marshcred(struct rpc_task *task, __be32 *p)
 {
-	struct rpc_cred	*cred = task->tk_msg.rpc_cred;
+	struct rpc_cred	*cred = task->tk_rqstp->rq_cred;
 
 	dprintk("RPC: %5u marshaling %s cred %p\n",
 		task->tk_pid, cred->cr_auth->au_ops->au_name, cred);
@@ -561,7 +550,7 @@ rpcauth_marshcred(struct rpc_task *task, __be32 *p)
 __be32 *
 rpcauth_checkverf(struct rpc_task *task, __be32 *p)
 {
-	struct rpc_cred	*cred = task->tk_msg.rpc_cred;
+	struct rpc_cred	*cred = task->tk_rqstp->rq_cred;
 
 	dprintk("RPC: %5u validating %s cred %p\n",
 		task->tk_pid, cred->cr_auth->au_ops->au_name, cred);
@@ -573,7 +562,7 @@ int
 rpcauth_wrap_req(struct rpc_task *task, kxdrproc_t encode, void *rqstp,
 		__be32 *data, void *obj)
 {
-	struct rpc_cred *cred = task->tk_msg.rpc_cred;
+	struct rpc_cred *cred = task->tk_rqstp->rq_cred;
 
 	dprintk("RPC: %5u using %s cred %p to wrap rpc data\n",
 			task->tk_pid, cred->cr_ops->cr_name, cred);
@@ -587,7 +576,7 @@ int
 rpcauth_unwrap_resp(struct rpc_task *task, kxdrproc_t decode, void *rqstp,
 		__be32 *data, void *obj)
 {
-	struct rpc_cred *cred = task->tk_msg.rpc_cred;
+	struct rpc_cred *cred = task->tk_rqstp->rq_cred;
 
 	dprintk("RPC: %5u using %s cred %p to unwrap rpc data\n",
 			task->tk_pid, cred->cr_ops->cr_name, cred);
@@ -601,13 +590,21 @@ rpcauth_unwrap_resp(struct rpc_task *task, kxdrproc_t decode, void *rqstp,
 int
 rpcauth_refreshcred(struct rpc_task *task)
 {
-	struct rpc_cred	*cred = task->tk_msg.rpc_cred;
+	struct rpc_cred	*cred = task->tk_rqstp->rq_cred;
 	int err;
 
+	cred = task->tk_rqstp->rq_cred;
+	if (cred == NULL) {
+		err = rpcauth_bindcred(task, task->tk_msg.rpc_cred, task->tk_flags);
+		if (err < 0)
+			goto out;
+		cred = task->tk_rqstp->rq_cred;
+	};
 	dprintk("RPC: %5u refreshing %s cred %p\n",
 		task->tk_pid, cred->cr_auth->au_ops->au_name, cred);
 
 	err = cred->cr_ops->crrefresh(task);
+out:
 	if (err < 0)
 		task->tk_status = err;
 	return err;
@@ -616,7 +613,7 @@ rpcauth_refreshcred(struct rpc_task *task)
 void
 rpcauth_invalcred(struct rpc_task *task)
 {
-	struct rpc_cred *cred = task->tk_msg.rpc_cred;
+	struct rpc_cred *cred = task->tk_rqstp->rq_cred;
 
 	dprintk("RPC: %5u invalidating %s cred %p\n",
 		task->tk_pid, cred->cr_auth->au_ops->au_name, cred);
@@ -627,7 +624,7 @@ rpcauth_invalcred(struct rpc_task *task)
 int
 rpcauth_uptodatecred(struct rpc_task *task)
 {
-	struct rpc_cred *cred = task->tk_msg.rpc_cred;
+	struct rpc_cred *cred = task->tk_rqstp->rq_cred;
 
 	return cred == NULL ||
 		test_bit(RPCAUTH_CRED_UPTODATE, &cred->cr_flags) != 0;
diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c
index 8da2a0e68574..096e1260bc67 100644
--- a/net/sunrpc/auth_gss/auth_gss.c
+++ b/net/sunrpc/auth_gss/auth_gss.c
@@ -373,7 +373,7 @@ gss_handle_downcall_result(struct gss_cred *gss_cred, struct gss_upcall_msg *gss
 static void
 gss_upcall_callback(struct rpc_task *task)
 {
-	struct gss_cred *gss_cred = container_of(task->tk_msg.rpc_cred,
+	struct gss_cred *gss_cred = container_of(task->tk_rqstp->rq_cred,
 			struct gss_cred, gc_base);
 	struct gss_upcall_msg *gss_msg = gss_cred->gc_upcall;
 	struct inode *inode = &gss_msg->inode->vfs_inode;
@@ -502,7 +502,7 @@ static void warn_gssd(void)
 static inline int
 gss_refresh_upcall(struct rpc_task *task)
 {
-	struct rpc_cred *cred = task->tk_msg.rpc_cred;
+	struct rpc_cred *cred = task->tk_rqstp->rq_cred;
 	struct gss_auth *gss_auth = container_of(cred->cr_auth,
 			struct gss_auth, rpc_auth);
 	struct gss_cred *gss_cred = container_of(cred,
@@ -1064,12 +1064,12 @@ out:
 static __be32 *
 gss_marshal(struct rpc_task *task, __be32 *p)
 {
-	struct rpc_cred *cred = task->tk_msg.rpc_cred;
+	struct rpc_rqst *req = task->tk_rqstp;
+	struct rpc_cred *cred = req->rq_cred;
 	struct gss_cred	*gss_cred = container_of(cred, struct gss_cred,
 						 gc_base);
 	struct gss_cl_ctx	*ctx = gss_cred_get_ctx(cred);
 	__be32		*cred_len;
-	struct rpc_rqst *req = task->tk_rqstp;
 	u32             maj_stat = 0;
 	struct xdr_netobj mic;
 	struct kvec	iov;
@@ -1119,7 +1119,7 @@ out_put_ctx:
 
 static int gss_renew_cred(struct rpc_task *task)
 {
-	struct rpc_cred *oldcred = task->tk_msg.rpc_cred;
+	struct rpc_cred *oldcred = task->tk_rqstp->rq_cred;
 	struct gss_cred *gss_cred = container_of(oldcred,
 						 struct gss_cred,
 						 gc_base);
@@ -1133,7 +1133,7 @@ static int gss_renew_cred(struct rpc_task *task)
 	new = gss_lookup_cred(auth, &acred, RPCAUTH_LOOKUP_NEW);
 	if (IS_ERR(new))
 		return PTR_ERR(new);
-	task->tk_msg.rpc_cred = new;
+	task->tk_rqstp->rq_cred = new;
 	put_rpccred(oldcred);
 	return 0;
 }
@@ -1161,7 +1161,7 @@ static int gss_cred_is_negative_entry(struct rpc_cred *cred)
 static int
 gss_refresh(struct rpc_task *task)
 {
-	struct rpc_cred *cred = task->tk_msg.rpc_cred;
+	struct rpc_cred *cred = task->tk_rqstp->rq_cred;
 	int ret = 0;
 
 	if (gss_cred_is_negative_entry(cred))
@@ -1172,7 +1172,7 @@ gss_refresh(struct rpc_task *task)
 		ret = gss_renew_cred(task);
 		if (ret < 0)
 			goto out;
-		cred = task->tk_msg.rpc_cred;
+		cred = task->tk_rqstp->rq_cred;
 	}
 
 	if (test_bit(RPCAUTH_CRED_NEW, &cred->cr_flags))
@@ -1191,7 +1191,7 @@ gss_refresh_null(struct rpc_task *task)
 static __be32 *
 gss_validate(struct rpc_task *task, __be32 *p)
 {
-	struct rpc_cred *cred = task->tk_msg.rpc_cred;
+	struct rpc_cred *cred = task->tk_rqstp->rq_cred;
 	struct gss_cl_ctx *ctx = gss_cred_get_ctx(cred);
 	__be32		seq;
 	struct kvec	iov;
@@ -1400,7 +1400,7 @@ static int
 gss_wrap_req(struct rpc_task *task,
 	     kxdrproc_t encode, void *rqstp, __be32 *p, void *obj)
 {
-	struct rpc_cred *cred = task->tk_msg.rpc_cred;
+	struct rpc_cred *cred = task->tk_rqstp->rq_cred;
 	struct gss_cred	*gss_cred = container_of(cred, struct gss_cred,
 			gc_base);
 	struct gss_cl_ctx *ctx = gss_cred_get_ctx(cred);
@@ -1503,7 +1503,7 @@ static int
 gss_unwrap_resp(struct rpc_task *task,
 		kxdrproc_t decode, void *rqstp, __be32 *p, void *obj)
 {
-	struct rpc_cred *cred = task->tk_msg.rpc_cred;
+	struct rpc_cred *cred = task->tk_rqstp->rq_cred;
 	struct gss_cred *gss_cred = container_of(cred, struct gss_cred,
 			gc_base);
 	struct gss_cl_ctx *ctx = gss_cred_get_ctx(cred);
diff --git a/net/sunrpc/auth_null.c b/net/sunrpc/auth_null.c
index 1db618f56ecb..a5c36c01707b 100644
--- a/net/sunrpc/auth_null.c
+++ b/net/sunrpc/auth_null.c
@@ -75,7 +75,7 @@ nul_marshal(struct rpc_task *task, __be32 *p)
 static int
 nul_refresh(struct rpc_task *task)
 {
-	set_bit(RPCAUTH_CRED_UPTODATE, &task->tk_msg.rpc_cred->cr_flags);
+	set_bit(RPCAUTH_CRED_UPTODATE, &task->tk_rqstp->rq_cred->cr_flags);
 	return 0;
 }
 
diff --git a/net/sunrpc/auth_unix.c b/net/sunrpc/auth_unix.c
index d5e37dbf207b..4cb70dc6e7ad 100644
--- a/net/sunrpc/auth_unix.c
+++ b/net/sunrpc/auth_unix.c
@@ -140,7 +140,7 @@ static __be32 *
 unx_marshal(struct rpc_task *task, __be32 *p)
 {
 	struct rpc_clnt	*clnt = task->tk_client;
-	struct unx_cred	*cred = container_of(task->tk_msg.rpc_cred, struct unx_cred, uc_base);
+	struct unx_cred	*cred = container_of(task->tk_rqstp->rq_cred, struct unx_cred, uc_base);
 	__be32		*base, *hold;
 	int		i;
 
@@ -173,7 +173,7 @@ unx_marshal(struct rpc_task *task, __be32 *p)
 static int
 unx_refresh(struct rpc_task *task)
 {
-	set_bit(RPCAUTH_CRED_UPTODATE, &task->tk_msg.rpc_cred->cr_flags);
+	set_bit(RPCAUTH_CRED_UPTODATE, &task->tk_rqstp->rq_cred->cr_flags);
 	return 0;
 }
 
@@ -196,7 +196,7 @@ unx_validate(struct rpc_task *task, __be32 *p)
 		printk("RPC: giant verf size: %u\n", size);
 		return NULL;
 	}
-	task->tk_msg.rpc_cred->cr_auth->au_rslack = (size >> 2) + 2;
+	task->tk_rqstp->rq_cred->cr_auth->au_rslack = (size >> 2) + 2;
 	p += (size >> 2);
 
 	return p;
diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
index f34b5e3823c0..2388d83b68ff 100644
--- a/net/sunrpc/clnt.c
+++ b/net/sunrpc/clnt.c
@@ -605,8 +605,8 @@ rpc_task_set_rpc_message(struct rpc_task *task, const struct rpc_message *msg)
 		task->tk_msg.rpc_proc = msg->rpc_proc;
 		task->tk_msg.rpc_argp = msg->rpc_argp;
 		task->tk_msg.rpc_resp = msg->rpc_resp;
-		/* Bind the user cred */
-		task->tk_status = rpcauth_bindcred(task, msg->rpc_cred, task->tk_flags);
+		if (msg->rpc_cred != NULL)
+			task->tk_msg.rpc_cred = get_rpccred(msg->rpc_cred);
 	}
 }
 
@@ -909,11 +909,6 @@ call_reserve(struct rpc_task *task)
 {
 	dprint_status(task);
 
-	if (!rpcauth_uptodatecred(task)) {
-		task->tk_action = call_refresh;
-		return;
-	}
-
 	task->tk_status  = 0;
 	task->tk_action  = call_reserveresult;
 	xprt_reserve(task);
@@ -977,7 +972,7 @@ call_reserveresult(struct rpc_task *task)
 static void
 call_allocate(struct rpc_task *task)
 {
-	unsigned int slack = task->tk_msg.rpc_cred->cr_auth->au_cslack;
+	unsigned int slack = task->tk_client->cl_auth->au_cslack;
 	struct rpc_rqst *req = task->tk_rqstp;
 	struct rpc_xprt *xprt = task->tk_xprt;
 	struct rpc_procinfo *proc = task->tk_msg.rpc_proc;
@@ -985,7 +980,7 @@ call_allocate(struct rpc_task *task)
 	dprint_status(task);
 
 	task->tk_status = 0;
-	task->tk_action = call_bind;
+	task->tk_action = call_refresh;
 
 	if (req->rq_buffer)
 		return;
@@ -1022,6 +1017,47 @@ call_allocate(struct rpc_task *task)
 	rpc_exit(task, -ERESTARTSYS);
 }
 
+/*
+ * 2a.	Bind and/or refresh the credentials
+ */
+static void
+call_refresh(struct rpc_task *task)
+{
+	dprint_status(task);
+
+	task->tk_action = call_refreshresult;
+	task->tk_status = 0;
+	task->tk_client->cl_stats->rpcauthrefresh++;
+	rpcauth_refreshcred(task);
+}
+
+/*
+ * 2b.	Process the results of a credential refresh
+ */
+static void
+call_refreshresult(struct rpc_task *task)
+{
+	int status = task->tk_status;
+
+	dprint_status(task);
+
+	task->tk_status = 0;
+	task->tk_action = call_bind;
+	if (status >= 0 && rpcauth_uptodatecred(task))
+		return;
+	switch (status) {
+	case -EACCES:
+		rpc_exit(task, -EACCES);
+		return;
+	case -ENOMEM:
+		rpc_exit(task, -ENOMEM);
+		return;
+	case -ETIMEDOUT:
+		rpc_delay(task, 3*HZ);
+	}
+	task->tk_action = call_refresh;
+}
+
 static inline int
 rpc_task_need_encode(struct rpc_task *task)
 {
@@ -1557,43 +1593,6 @@ out_retry:
 	}
 }
 
-/*
- * 8.	Refresh the credentials if rejected by the server
- */
-static void
-call_refresh(struct rpc_task *task)
-{
-	dprint_status(task);
-
-	task->tk_action = call_refreshresult;
-	task->tk_status = 0;
-	task->tk_client->cl_stats->rpcauthrefresh++;
-	rpcauth_refreshcred(task);
-}
-
-/*
- * 8a.	Process the results of a credential refresh
- */
-static void
-call_refreshresult(struct rpc_task *task)
-{
-	int status = task->tk_status;
-
-	dprint_status(task);
-
-	task->tk_status = 0;
-	task->tk_action = call_reserve;
-	if (status >= 0 && rpcauth_uptodatecred(task))
-		return;
-	if (status == -EACCES) {
-		rpc_exit(task, -EACCES);
-		return;
-	}
-	task->tk_action = call_refresh;
-	if (status != -ETIMEDOUT)
-		rpc_delay(task, 3*HZ);
-}
-
 static __be32 *
 rpc_encode_header(struct rpc_task *task)
 {
diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c
index a42296db2ecd..f6db6131fb2e 100644
--- a/net/sunrpc/sched.c
+++ b/net/sunrpc/sched.c
@@ -864,7 +864,7 @@ void rpc_put_task(struct rpc_task *task)
 	if (task->tk_rqstp)
 		xprt_release(task);
 	if (task->tk_msg.rpc_cred)
-		rpcauth_unbindcred(task);
+		put_rpccred(task->tk_msg.rpc_cred);
 	rpc_task_release_client(task);
 	if (task->tk_workqueue != NULL) {
 		INIT_WORK(&task->u.tk_work, rpc_async_release);
diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c
index dcd0132396ba..70297836a191 100644
--- a/net/sunrpc/xprt.c
+++ b/net/sunrpc/xprt.c
@@ -1032,6 +1032,8 @@ void xprt_release(struct rpc_task *task)
 	spin_unlock_bh(&xprt->transport_lock);
 	if (req->rq_buffer)
 		xprt->ops->buf_free(req->rq_buffer);
+	if (req->rq_cred != NULL)
+		put_rpccred(req->rq_cred);
 	task->tk_rqstp = NULL;
 	if (req->rq_release_snd_buf)
 		req->rq_release_snd_buf(req);
-- 
cgit v1.2.2


From d6a1ed08c6acea647b3f2aaebe0261f079e1be49 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Sat, 31 Jul 2010 14:29:08 -0400
Subject: SUNRPC: Reduce asynchronous RPC task stack usage

We should just farm out asynchronous RPC tasks immediately to rpciod...

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 net/sunrpc/sched.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c
index f6db6131fb2e..cace6049e4a5 100644
--- a/net/sunrpc/sched.c
+++ b/net/sunrpc/sched.c
@@ -699,8 +699,9 @@ static void __rpc_execute(struct rpc_task *task)
 void rpc_execute(struct rpc_task *task)
 {
 	rpc_set_active(task);
-	rpc_set_running(task);
-	__rpc_execute(task);
+	rpc_make_runnable(task);
+	if (!RPC_IS_ASYNC(task))
+		__rpc_execute(task);
 }
 
 static void rpc_async_schedule(struct work_struct *work)
-- 
cgit v1.2.2


From c3ae62ae08bb0db3639d8c579e4ff0967d908199 Mon Sep 17 00:00:00 2001
From: "J. Bruce Fields" <bfields@redhat.com>
Date: Tue, 3 Aug 2010 17:22:20 -0400
Subject: SUNRPC: prevent task_cleanup running on freed xprt

We saw a report of a NULL dereference in xprt_autoclose:

	https://bugzilla.redhat.com/show_bug.cgi?id=611938

This appears to be the result of an xprt's task_cleanup running after
the xprt is destroyed.  Nothing in the current code appears to prevent
that.

Signed-off-by: J. Bruce Fields <bfields@redhat.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 net/sunrpc/xprt.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'net')

diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c
index 70297836a191..970fb00f388c 100644
--- a/net/sunrpc/xprt.c
+++ b/net/sunrpc/xprt.c
@@ -1131,6 +1131,7 @@ static void xprt_destroy(struct kref *kref)
 	rpc_destroy_wait_queue(&xprt->sending);
 	rpc_destroy_wait_queue(&xprt->resend);
 	rpc_destroy_wait_queue(&xprt->backlog);
+	cancel_work_sync(&xprt->task_cleanup);
 	/*
 	 * Tear down transport state and free the rpc_xprt
 	 */
-- 
cgit v1.2.2


From 0d8a374673c9bc62bc78a2d8fe64553a51542cb4 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Sun, 1 Aug 2010 20:14:55 -0400
Subject: SUNRPC: Defer deleting the security context until gss_do_free_ctx()

There is no need to delete the gss context separately from the rest
of the security context information, and doing so gives rise to a
an rcu_dereference_check() warning.

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 net/sunrpc/auth_gss/auth_gss.c | 7 +------
 1 file changed, 1 insertion(+), 6 deletions(-)

(limited to 'net')

diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c
index 096e1260bc67..dcfc66bab2bb 100644
--- a/net/sunrpc/auth_gss/auth_gss.c
+++ b/net/sunrpc/auth_gss/auth_gss.c
@@ -928,6 +928,7 @@ gss_do_free_ctx(struct gss_cl_ctx *ctx)
 {
 	dprintk("RPC:       gss_free_ctx\n");
 
+	gss_delete_sec_context(&ctx->gc_gss_ctx);
 	kfree(ctx->gc_wire_ctx.data);
 	kfree(ctx);
 }
@@ -942,13 +943,7 @@ gss_free_ctx_callback(struct rcu_head *head)
 static void
 gss_free_ctx(struct gss_cl_ctx *ctx)
 {
-	struct gss_ctx *gc_gss_ctx;
-
-	gc_gss_ctx = rcu_dereference(ctx->gc_gss_ctx);
-	rcu_assign_pointer(ctx->gc_gss_ctx, NULL);
 	call_rcu(&ctx->gc_rcu, gss_free_ctx_callback);
-	if (gc_gss_ctx)
-		gss_delete_sec_context(&gc_gss_ctx);
 }
 
 static void
-- 
cgit v1.2.2


From 6340650400525a9ca8d86b1b4501cc50670dce0d Mon Sep 17 00:00:00 2001
From: "Gustavo F. Padovan" <padovan@profusion.mobi>
Date: Tue, 3 Aug 2010 23:49:29 -0300
Subject: Bluetooth: Don't send RFC for Basic Mode if only it is supported

If the remote side doesn't support Enhanced Retransmission Mode neither
Streaming Mode, we shall not send the RFC option.

Some devices that only supports Basic Mode do not understanding the RFC
option. This patch fixes the regression found with these devices.

Signed-off-by: Gustavo F. Padovan <padovan@profusion.mobi>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 net/bluetooth/l2cap.c | 15 ++++++++++++---
 1 file changed, 12 insertions(+), 3 deletions(-)

(limited to 'net')

diff --git a/net/bluetooth/l2cap.c b/net/bluetooth/l2cap.c
index 9ba1e8eee37c..0f34e1275147 100644
--- a/net/bluetooth/l2cap.c
+++ b/net/bluetooth/l2cap.c
@@ -2527,6 +2527,10 @@ done:
 		if (pi->imtu != L2CAP_DEFAULT_MTU)
 			l2cap_add_conf_opt(&ptr, L2CAP_CONF_MTU, 2, pi->imtu);
 
+		if (!(pi->conn->feat_mask & L2CAP_FEAT_ERTM) &&
+				!(pi->conn->feat_mask & L2CAP_FEAT_STREAMING))
+			break;
+
 		rfc.mode            = L2CAP_MODE_BASIC;
 		rfc.txwin_size      = 0;
 		rfc.max_transmit    = 0;
@@ -2534,6 +2538,8 @@ done:
 		rfc.monitor_timeout = 0;
 		rfc.max_pdu_size    = 0;
 
+		l2cap_add_conf_opt(&ptr, L2CAP_CONF_RFC, sizeof(rfc),
+							(unsigned long) &rfc);
 		break;
 
 	case L2CAP_MODE_ERTM:
@@ -2546,6 +2552,9 @@ done:
 		if (L2CAP_DEFAULT_MAX_PDU_SIZE > pi->conn->mtu - 10)
 			rfc.max_pdu_size = cpu_to_le16(pi->conn->mtu - 10);
 
+		l2cap_add_conf_opt(&ptr, L2CAP_CONF_RFC, sizeof(rfc),
+							(unsigned long) &rfc);
+
 		if (!(pi->conn->feat_mask & L2CAP_FEAT_FCS))
 			break;
 
@@ -2566,6 +2575,9 @@ done:
 		if (L2CAP_DEFAULT_MAX_PDU_SIZE > pi->conn->mtu - 10)
 			rfc.max_pdu_size = cpu_to_le16(pi->conn->mtu - 10);
 
+		l2cap_add_conf_opt(&ptr, L2CAP_CONF_RFC, sizeof(rfc),
+							(unsigned long) &rfc);
+
 		if (!(pi->conn->feat_mask & L2CAP_FEAT_FCS))
 			break;
 
@@ -2577,9 +2589,6 @@ done:
 		break;
 	}
 
-	l2cap_add_conf_opt(&ptr, L2CAP_CONF_RFC, sizeof(rfc),
-						(unsigned long) &rfc);
-
 	/* FIXME: Need actual value of the flush timeout */
 	//if (flush_to != L2CAP_DEFAULT_FLUSH_TO)
 	//   l2cap_add_conf_opt(&ptr, L2CAP_CONF_FLUSH_TO, 2, pi->flush_to);
-- 
cgit v1.2.2


From adb08edea0119f7a5484a9f6a385fbcecdf85a63 Mon Sep 17 00:00:00 2001
From: Ville Tervo <ville.tervo@nokia.com>
Date: Wed, 4 Aug 2010 09:43:33 +0300
Subject: Bluetooth: Check result code of L2CAP information response

Check result code of L2CAP information response. Otherwise
it would read invalid feature mask and access invalid memory.

Signed-off-by: Ville Tervo <ville.tervo@nokia.com>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 net/bluetooth/l2cap.c | 9 +++++++++
 1 file changed, 9 insertions(+)

(limited to 'net')

diff --git a/net/bluetooth/l2cap.c b/net/bluetooth/l2cap.c
index 0f34e1275147..3e3cd9d4e52c 100644
--- a/net/bluetooth/l2cap.c
+++ b/net/bluetooth/l2cap.c
@@ -3348,6 +3348,15 @@ static inline int l2cap_information_rsp(struct l2cap_conn *conn, struct l2cap_cm
 
 	del_timer(&conn->info_timer);
 
+	if (result != L2CAP_IR_SUCCESS) {
+		conn->info_state |= L2CAP_INFO_FEAT_MASK_REQ_DONE;
+		conn->info_ident = 0;
+
+		l2cap_conn_start(conn);
+
+		return 0;
+	}
+
 	if (type == L2CAP_IT_FEAT_MASK) {
 		conn->feat_mask = get_unaligned_le32(rsp->data);
 
-- 
cgit v1.2.2


From 1601b1e56e1093d6deb8f475fafc30cc30143357 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Fri, 30 Jul 2010 13:30:47 +0200
Subject: mac80211: fix scan locking wrt. hw scan

Releasing the scan mutex while starting scans
can lead to unexpected things happening, so
we shouldn't do that. Fix that and hold the
mutex across the scan triggering.

Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/scan.c | 14 --------------
 1 file changed, 14 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/scan.c b/net/mac80211/scan.c
index 41f20fb7e670..872d7b6ef6b3 100644
--- a/net/mac80211/scan.c
+++ b/net/mac80211/scan.c
@@ -400,19 +400,7 @@ static int __ieee80211_start_scan(struct ieee80211_sub_if_data *sdata,
 	else
 		__set_bit(SCAN_SW_SCANNING, &local->scanning);
 
-	/*
-	 * Kicking off the scan need not be protected,
-	 * only the scan variable stuff, since now
-	 * local->scan_req is assigned and other callers
-	 * will abort their scan attempts.
-	 *
-	 * This avoids too many locking dependencies
-	 * so that the scan completed calls have more
-	 * locking freedom.
-	 */
-
 	ieee80211_recalc_idle(local);
-	mutex_unlock(&local->scan_mtx);
 
 	if (local->ops->hw_scan) {
 		WARN_ON(!ieee80211_prep_hw_scan(local));
@@ -420,8 +408,6 @@ static int __ieee80211_start_scan(struct ieee80211_sub_if_data *sdata,
 	} else
 		rc = ieee80211_start_sw_scan(local);
 
-	mutex_lock(&local->scan_mtx);
-
 	if (rc) {
 		kfree(local->hw_scan_req);
 		local->hw_scan_req = NULL;
-- 
cgit v1.2.2


From 93c08c32914264f539baf7f04cce310a0dd30a7a Mon Sep 17 00:00:00 2001
From: Juuso Oikarinen <juuso.oikarinen@nokia.com>
Date: Tue, 3 Aug 2010 08:22:25 +0300
Subject: mac80211: Fix compilation warning when CONFIG_INET is not set
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The warning is:
  net/mac80211/main.c:688: warning: label ‘fail_ifa’ defined but not used

Signed-off-by: Juuso Oikarinen <juuso.oikarinen@nokia.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/main.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'net')

diff --git a/net/mac80211/main.c b/net/mac80211/main.c
index 7cc4f913a431..798a91b100cc 100644
--- a/net/mac80211/main.c
+++ b/net/mac80211/main.c
@@ -685,10 +685,12 @@ int ieee80211_register_hw(struct ieee80211_hw *hw)
 
 	return 0;
 
+#ifdef CONFIG_INET
  fail_ifa:
 	pm_qos_remove_notifier(PM_QOS_NETWORK_LATENCY,
 			       &local->network_latency_notifier);
 	rtnl_lock();
+#endif
  fail_pm_qos:
 	ieee80211_led_exit(local);
 	ieee80211_remove_interfaces(local);
-- 
cgit v1.2.2


From 36d12690a2e9bcacae5a2a7e0fb6345a3caad625 Mon Sep 17 00:00:00 2001
From: Changli Gao <xiaosuo@gmail.com>
Date: Tue, 3 Aug 2010 17:39:18 +0000
Subject: act_nat: fix on the TX path

On the TX path, skb->data points to the ethernet header, not the network
header. So when validating the packet length for accessing we should
take the ethernet header into account.

Signed-off-by: Changli Gao <xiaosuo@gmail.com>
Acked-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/act_nat.c | 23 +++++++++++++----------
 1 file changed, 13 insertions(+), 10 deletions(-)

(limited to 'net')

diff --git a/net/sched/act_nat.c b/net/sched/act_nat.c
index d0386a413e8d..509a2d53a99d 100644
--- a/net/sched/act_nat.c
+++ b/net/sched/act_nat.c
@@ -114,6 +114,7 @@ static int tcf_nat(struct sk_buff *skb, struct tc_action *a,
 	int egress;
 	int action;
 	int ihl;
+	int noff;
 
 	spin_lock(&p->tcf_lock);
 
@@ -132,7 +133,8 @@ static int tcf_nat(struct sk_buff *skb, struct tc_action *a,
 	if (unlikely(action == TC_ACT_SHOT))
 		goto drop;
 
-	if (!pskb_may_pull(skb, sizeof(*iph)))
+	noff = skb_network_offset(skb);
+	if (!pskb_may_pull(skb, sizeof(*iph) + noff))
 		goto drop;
 
 	iph = ip_hdr(skb);
@@ -144,7 +146,7 @@ static int tcf_nat(struct sk_buff *skb, struct tc_action *a,
 
 	if (!((old_addr ^ addr) & mask)) {
 		if (skb_cloned(skb) &&
-		    !skb_clone_writable(skb, sizeof(*iph)) &&
+		    !skb_clone_writable(skb, sizeof(*iph) + noff) &&
 		    pskb_expand_head(skb, 0, 0, GFP_ATOMIC))
 			goto drop;
 
@@ -172,9 +174,9 @@ static int tcf_nat(struct sk_buff *skb, struct tc_action *a,
 	{
 		struct tcphdr *tcph;
 
-		if (!pskb_may_pull(skb, ihl + sizeof(*tcph)) ||
+		if (!pskb_may_pull(skb, ihl + sizeof(*tcph) + noff) ||
 		    (skb_cloned(skb) &&
-		     !skb_clone_writable(skb, ihl + sizeof(*tcph)) &&
+		     !skb_clone_writable(skb, ihl + sizeof(*tcph) + noff) &&
 		     pskb_expand_head(skb, 0, 0, GFP_ATOMIC)))
 			goto drop;
 
@@ -186,9 +188,9 @@ static int tcf_nat(struct sk_buff *skb, struct tc_action *a,
 	{
 		struct udphdr *udph;
 
-		if (!pskb_may_pull(skb, ihl + sizeof(*udph)) ||
+		if (!pskb_may_pull(skb, ihl + sizeof(*udph) + noff) ||
 		    (skb_cloned(skb) &&
-		     !skb_clone_writable(skb, ihl + sizeof(*udph)) &&
+		     !skb_clone_writable(skb, ihl + sizeof(*udph) + noff) &&
 		     pskb_expand_head(skb, 0, 0, GFP_ATOMIC)))
 			goto drop;
 
@@ -205,7 +207,7 @@ static int tcf_nat(struct sk_buff *skb, struct tc_action *a,
 	{
 		struct icmphdr *icmph;
 
-		if (!pskb_may_pull(skb, ihl + sizeof(*icmph)))
+		if (!pskb_may_pull(skb, ihl + sizeof(*icmph) + noff))
 			goto drop;
 
 		icmph = (void *)(skb_network_header(skb) + ihl);
@@ -215,7 +217,8 @@ static int tcf_nat(struct sk_buff *skb, struct tc_action *a,
 		    (icmph->type != ICMP_PARAMETERPROB))
 			break;
 
-		if (!pskb_may_pull(skb, ihl + sizeof(*icmph) + sizeof(*iph)))
+		if (!pskb_may_pull(skb, ihl + sizeof(*icmph) + sizeof(*iph) +
+					noff))
 			goto drop;
 
 		icmph = (void *)(skb_network_header(skb) + ihl);
@@ -229,8 +232,8 @@ static int tcf_nat(struct sk_buff *skb, struct tc_action *a,
 			break;
 
 		if (skb_cloned(skb) &&
-		    !skb_clone_writable(skb,
-					ihl + sizeof(*icmph) + sizeof(*iph)) &&
+		    !skb_clone_writable(skb, ihl + sizeof(*icmph) +
+					     sizeof(*iph) + noff) &&
 		    pskb_expand_head(skb, 0, 0, GFP_ATOMIC))
 			goto drop;
 
-- 
cgit v1.2.2


From 4b95c3d40d7d9927438ed7b7b49c84c60e27b65b Mon Sep 17 00:00:00 2001
From: Changli Gao <xiaosuo@gmail.com>
Date: Wed, 4 Aug 2010 04:48:12 +0000
Subject: cls_flow: add sanity check for the packet length

The packet length should be checked before the packet data is dereferenced.

Signed-off-by: Changli Gao <xiaosuo@gmail.com>
Acked-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/cls_flow.c | 96 ++++++++++++++++++++++++++++++----------------------
 1 file changed, 56 insertions(+), 40 deletions(-)

(limited to 'net')

diff --git a/net/sched/cls_flow.c b/net/sched/cls_flow.c
index f73542d2cdd0..e17096e3913c 100644
--- a/net/sched/cls_flow.c
+++ b/net/sched/cls_flow.c
@@ -65,37 +65,47 @@ static inline u32 addr_fold(void *addr)
 	return (a & 0xFFFFFFFF) ^ (BITS_PER_LONG > 32 ? a >> 32 : 0);
 }
 
-static u32 flow_get_src(const struct sk_buff *skb)
+static u32 flow_get_src(struct sk_buff *skb)
 {
 	switch (skb->protocol) {
 	case htons(ETH_P_IP):
-		return ntohl(ip_hdr(skb)->saddr);
+		if (pskb_network_may_pull(skb, sizeof(struct iphdr)))
+			return ntohl(ip_hdr(skb)->saddr);
+		break;
 	case htons(ETH_P_IPV6):
-		return ntohl(ipv6_hdr(skb)->saddr.s6_addr32[3]);
-	default:
-		return addr_fold(skb->sk);
+		if (pskb_network_may_pull(skb, sizeof(struct ipv6hdr)))
+			return ntohl(ipv6_hdr(skb)->saddr.s6_addr32[3]);
+		break;
 	}
+
+	return addr_fold(skb->sk);
 }
 
-static u32 flow_get_dst(const struct sk_buff *skb)
+static u32 flow_get_dst(struct sk_buff *skb)
 {
 	switch (skb->protocol) {
 	case htons(ETH_P_IP):
-		return ntohl(ip_hdr(skb)->daddr);
+		if (pskb_network_may_pull(skb, sizeof(struct iphdr)))
+			return ntohl(ip_hdr(skb)->daddr);
+		break;
 	case htons(ETH_P_IPV6):
-		return ntohl(ipv6_hdr(skb)->daddr.s6_addr32[3]);
-	default:
-		return addr_fold(skb_dst(skb)) ^ (__force u16)skb->protocol;
+		if (pskb_network_may_pull(skb, sizeof(struct ipv6hdr)))
+			return ntohl(ipv6_hdr(skb)->daddr.s6_addr32[3]);
+		break;
 	}
+
+	return addr_fold(skb_dst(skb)) ^ (__force u16)skb->protocol;
 }
 
-static u32 flow_get_proto(const struct sk_buff *skb)
+static u32 flow_get_proto(struct sk_buff *skb)
 {
 	switch (skb->protocol) {
 	case htons(ETH_P_IP):
-		return ip_hdr(skb)->protocol;
+		return pskb_network_may_pull(skb, sizeof(struct iphdr)) ?
+		       ip_hdr(skb)->protocol : 0;
 	case htons(ETH_P_IPV6):
-		return ipv6_hdr(skb)->nexthdr;
+		return pskb_network_may_pull(skb, sizeof(struct ipv6hdr)) ?
+		       ipv6_hdr(skb)->nexthdr : 0;
 	default:
 		return 0;
 	}
@@ -116,58 +126,64 @@ static int has_ports(u8 protocol)
 	}
 }
 
-static u32 flow_get_proto_src(const struct sk_buff *skb)
+static u32 flow_get_proto_src(struct sk_buff *skb)
 {
-	u32 res = 0;
-
 	switch (skb->protocol) {
 	case htons(ETH_P_IP): {
-		struct iphdr *iph = ip_hdr(skb);
+		struct iphdr *iph;
 
+		if (!pskb_network_may_pull(skb, sizeof(*iph)))
+			break;
+		iph = ip_hdr(skb);
 		if (!(iph->frag_off&htons(IP_MF|IP_OFFSET)) &&
-		    has_ports(iph->protocol))
-			res = ntohs(*(__be16 *)((void *)iph + iph->ihl * 4));
+		    has_ports(iph->protocol) &&
+		    pskb_network_may_pull(skb, iph->ihl * 4 + 2))
+			return ntohs(*(__be16 *)((void *)iph + iph->ihl * 4));
 		break;
 	}
 	case htons(ETH_P_IPV6): {
-		struct ipv6hdr *iph = ipv6_hdr(skb);
+		struct ipv6hdr *iph;
 
+		if (!pskb_network_may_pull(skb, sizeof(*iph) + 2))
+			break;
+		iph = ipv6_hdr(skb);
 		if (has_ports(iph->nexthdr))
-			res = ntohs(*(__be16 *)&iph[1]);
+			return ntohs(*(__be16 *)&iph[1]);
 		break;
 	}
-	default:
-		res = addr_fold(skb->sk);
 	}
 
-	return res;
+	return addr_fold(skb->sk);
 }
 
-static u32 flow_get_proto_dst(const struct sk_buff *skb)
+static u32 flow_get_proto_dst(struct sk_buff *skb)
 {
-	u32 res = 0;
-
 	switch (skb->protocol) {
 	case htons(ETH_P_IP): {
-		struct iphdr *iph = ip_hdr(skb);
+		struct iphdr *iph;
 
+		if (!pskb_network_may_pull(skb, sizeof(*iph)))
+			break;
+		iph = ip_hdr(skb);
 		if (!(iph->frag_off&htons(IP_MF|IP_OFFSET)) &&
-		    has_ports(iph->protocol))
-			res = ntohs(*(__be16 *)((void *)iph + iph->ihl * 4 + 2));
+		    has_ports(iph->protocol) &&
+		    pskb_network_may_pull(skb, iph->ihl * 4 + 4))
+			return ntohs(*(__be16 *)((void *)iph + iph->ihl * 4 + 2));
 		break;
 	}
 	case htons(ETH_P_IPV6): {
-		struct ipv6hdr *iph = ipv6_hdr(skb);
+		struct ipv6hdr *iph;
 
+		if (!pskb_network_may_pull(skb, sizeof(*iph) + 4))
+			break;
+		iph = ipv6_hdr(skb);
 		if (has_ports(iph->nexthdr))
-			res = ntohs(*(__be16 *)((void *)&iph[1] + 2));
+			return ntohs(*(__be16 *)((void *)&iph[1] + 2));
 		break;
 	}
-	default:
-		res = addr_fold(skb_dst(skb)) ^ (__force u16)skb->protocol;
 	}
 
-	return res;
+	return addr_fold(skb_dst(skb)) ^ (__force u16)skb->protocol;
 }
 
 static u32 flow_get_iif(const struct sk_buff *skb)
@@ -211,7 +227,7 @@ static u32 flow_get_nfct(const struct sk_buff *skb)
 })
 #endif
 
-static u32 flow_get_nfct_src(const struct sk_buff *skb)
+static u32 flow_get_nfct_src(struct sk_buff *skb)
 {
 	switch (skb->protocol) {
 	case htons(ETH_P_IP):
@@ -223,7 +239,7 @@ fallback:
 	return flow_get_src(skb);
 }
 
-static u32 flow_get_nfct_dst(const struct sk_buff *skb)
+static u32 flow_get_nfct_dst(struct sk_buff *skb)
 {
 	switch (skb->protocol) {
 	case htons(ETH_P_IP):
@@ -235,14 +251,14 @@ fallback:
 	return flow_get_dst(skb);
 }
 
-static u32 flow_get_nfct_proto_src(const struct sk_buff *skb)
+static u32 flow_get_nfct_proto_src(struct sk_buff *skb)
 {
 	return ntohs(CTTUPLE(skb, src.u.all));
 fallback:
 	return flow_get_proto_src(skb);
 }
 
-static u32 flow_get_nfct_proto_dst(const struct sk_buff *skb)
+static u32 flow_get_nfct_proto_dst(struct sk_buff *skb)
 {
 	return ntohs(CTTUPLE(skb, dst.u.all));
 fallback:
@@ -281,7 +297,7 @@ static u32 flow_get_vlan_tag(const struct sk_buff *skb)
 	return tag & VLAN_VID_MASK;
 }
 
-static u32 flow_key_get(const struct sk_buff *skb, int key)
+static u32 flow_key_get(struct sk_buff *skb, int key)
 {
 	switch (key) {
 	case FLOW_KEY_SRC:
-- 
cgit v1.2.2


From 12dc96d1673feabef98eed1b5ff37abaa67fbe64 Mon Sep 17 00:00:00 2001
From: Changli Gao <xiaosuo@gmail.com>
Date: Wed, 4 Aug 2010 04:55:40 +0000
Subject: cls_rsvp: add sanity check for the packet length

The packet length should be checked before the packet data is dereferenced.

Signed-off-by: Changli Gao <xiaosuo@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/cls_rsvp.h | 12 ++++++++++--
 1 file changed, 10 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/sched/cls_rsvp.h b/net/sched/cls_rsvp.h
index dd9414e44200..425a1790b048 100644
--- a/net/sched/cls_rsvp.h
+++ b/net/sched/cls_rsvp.h
@@ -143,9 +143,17 @@ static int rsvp_classify(struct sk_buff *skb, struct tcf_proto *tp,
 	u8 tunnelid = 0;
 	u8 *xprt;
 #if RSVP_DST_LEN == 4
-	struct ipv6hdr *nhptr = ipv6_hdr(skb);
+	struct ipv6hdr *nhptr;
+
+	if (!pskb_network_may_pull(skb, sizeof(*nhptr)))
+		return -1;
+	nhptr = ipv6_hdr(skb);
 #else
-	struct iphdr *nhptr = ip_hdr(skb);
+	struct iphdr *nhptr;
+
+	if (!pskb_network_may_pull(skb, sizeof(*nhptr)))
+		return -1;
+	nhptr = ip_hdr(skb);
 #endif
 
 restart:
-- 
cgit v1.2.2


From f2f009812f1fdcaf40fa547282c1b90d3b702a7d Mon Sep 17 00:00:00 2001
From: Changli Gao <xiaosuo@gmail.com>
Date: Wed, 4 Aug 2010 04:58:59 +0000
Subject: sch_sfq: add sanity check for the packet length

The packet length should be checked before the packet data is dereferenced.

Signed-off-by: Changli Gao <xiaosuo@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/sch_sfq.c | 29 ++++++++++++++++++++---------
 1 file changed, 20 insertions(+), 9 deletions(-)

(limited to 'net')

diff --git a/net/sched/sch_sfq.c b/net/sched/sch_sfq.c
index c65762823f5e..e85352b5c88d 100644
--- a/net/sched/sch_sfq.c
+++ b/net/sched/sch_sfq.c
@@ -122,7 +122,11 @@ static unsigned sfq_hash(struct sfq_sched_data *q, struct sk_buff *skb)
 	switch (skb->protocol) {
 	case htons(ETH_P_IP):
 	{
-		const struct iphdr *iph = ip_hdr(skb);
+		const struct iphdr *iph;
+
+		if (!pskb_network_may_pull(skb, sizeof(*iph)))
+			goto err;
+		iph = ip_hdr(skb);
 		h = (__force u32)iph->daddr;
 		h2 = (__force u32)iph->saddr ^ iph->protocol;
 		if (!(iph->frag_off&htons(IP_MF|IP_OFFSET)) &&
@@ -131,25 +135,32 @@ static unsigned sfq_hash(struct sfq_sched_data *q, struct sk_buff *skb)
 		     iph->protocol == IPPROTO_UDPLITE ||
 		     iph->protocol == IPPROTO_SCTP ||
 		     iph->protocol == IPPROTO_DCCP ||
-		     iph->protocol == IPPROTO_ESP))
+		     iph->protocol == IPPROTO_ESP) &&
+		     pskb_network_may_pull(skb, iph->ihl * 4 + 4))
 			h2 ^= *(((u32*)iph) + iph->ihl);
 		break;
 	}
 	case htons(ETH_P_IPV6):
 	{
-		struct ipv6hdr *iph = ipv6_hdr(skb);
+		struct ipv6hdr *iph;
+
+		if (!pskb_network_may_pull(skb, sizeof(*iph)))
+			goto err;
+		iph = ipv6_hdr(skb);
 		h = (__force u32)iph->daddr.s6_addr32[3];
 		h2 = (__force u32)iph->saddr.s6_addr32[3] ^ iph->nexthdr;
-		if (iph->nexthdr == IPPROTO_TCP ||
-		    iph->nexthdr == IPPROTO_UDP ||
-		    iph->nexthdr == IPPROTO_UDPLITE ||
-		    iph->nexthdr == IPPROTO_SCTP ||
-		    iph->nexthdr == IPPROTO_DCCP ||
-		    iph->nexthdr == IPPROTO_ESP)
+		if ((iph->nexthdr == IPPROTO_TCP ||
+		     iph->nexthdr == IPPROTO_UDP ||
+		     iph->nexthdr == IPPROTO_UDPLITE ||
+		     iph->nexthdr == IPPROTO_SCTP ||
+		     iph->nexthdr == IPPROTO_DCCP ||
+		     iph->nexthdr == IPPROTO_ESP) &&
+		    pskb_network_may_pull(skb, sizeof(*iph) + 4))
 			h2 ^= *(u32*)&iph[1];
 		break;
 	}
 	default:
+err:
 		h = (unsigned long)skb_dst(skb) ^ (__force u32)skb->protocol;
 		h2 = (unsigned long)skb->sk;
 	}
-- 
cgit v1.2.2


From 3b5bac2bdea1de832bdd8e2c904ab7c9479ff9ed Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Wed, 4 Aug 2010 02:34:17 +0000
Subject: RxRPC: Fix a potential deadlock between the call resend_timer and
 state_lock

RxRPC can potentially deadlock as rxrpc_resend_time_expired() wants to get
call->state_lock so that it can alter the state of an RxRPC call.  However, its
caller (call_timer_fn()) has an apparent lock on the timer struct.

The problem is that rxrpc_resend_time_expired() isn't permitted to lock
call->state_lock as this could cause a deadlock against rxrpc_send_abort() as
that takes state_lock and then attempts to delete the resend timer by calling
del_timer_sync().

The deadlock can occur because del_timer_sync() will sit there forever waiting
for rxrpc_resend_time_expired() to return, but the latter may then wait for
call->state_lock, which rxrpc_send_abort() holds around del_timer_sync()...

This leads to a warning appearing in the kernel log that looks something like
the attached.

It should be sufficient to simply dispense with the locks.  It doesn't matter
if we set the resend timer expired event bit and queue the event processor
whilst we're changing state to one where the resend timer is irrelevant as the
event can just be ignored by the processor thereafter.

=======================================================
[ INFO: possible circular locking dependency detected ]
2.6.35-rc3-cachefs+ #115
-------------------------------------------------------
swapper/0 is trying to acquire lock:
 (&call->state_lock){++--..}, at: [<ffffffffa00200d4>] rxrpc_resend_time_expired+0x56/0x96 [af_rxrpc]

but task is already holding lock:
 (&call->resend_timer){+.-...}, at: [<ffffffff8103b675>] run_timer_softirq+0x182/0x2a5

which lock already depends on the new lock.

the existing dependency chain (in reverse order) is:

-> #1 (&call->resend_timer){+.-...}:
       [<ffffffff810560bc>] __lock_acquire+0x889/0x8fa
       [<ffffffff81056184>] lock_acquire+0x57/0x6d
       [<ffffffff8103bb9c>] del_timer_sync+0x3c/0x86
       [<ffffffffa002bb7a>] rxrpc_send_abort+0x50/0x97 [af_rxrpc]
       [<ffffffffa002bdd9>] rxrpc_kernel_abort_call+0xa1/0xdd [af_rxrpc]
       [<ffffffffa0061588>] afs_deliver_to_call+0x129/0x368 [kafs]
       [<ffffffffa006181b>] afs_process_async_call+0x54/0xff [kafs]
       [<ffffffff8104261d>] worker_thread+0x1ef/0x2e2
       [<ffffffff81045f47>] kthread+0x7a/0x82
       [<ffffffff81002cd4>] kernel_thread_helper+0x4/0x10

-> #0 (&call->state_lock){++--..}:
       [<ffffffff81055237>] validate_chain+0x727/0xd23
       [<ffffffff810560bc>] __lock_acquire+0x889/0x8fa
       [<ffffffff81056184>] lock_acquire+0x57/0x6d
       [<ffffffff813e6b69>] _raw_read_lock_bh+0x34/0x43
       [<ffffffffa00200d4>] rxrpc_resend_time_expired+0x56/0x96 [af_rxrpc]
       [<ffffffff8103b6e6>] run_timer_softirq+0x1f3/0x2a5
       [<ffffffff81036828>] __do_softirq+0xa2/0x13e
       [<ffffffff81002dcc>] call_softirq+0x1c/0x28
       [<ffffffff810049f0>] do_softirq+0x38/0x80
       [<ffffffff810361a2>] irq_exit+0x45/0x47
       [<ffffffff81018fb3>] smp_apic_timer_interrupt+0x88/0x96
       [<ffffffff81002893>] apic_timer_interrupt+0x13/0x20
       [<ffffffff810011ac>] cpu_idle+0x4d/0x83
       [<ffffffff813e06f3>] start_secondary+0x1bd/0x1c1

other info that might help us debug this:

1 lock held by swapper/0:
 #0:  (&call->resend_timer){+.-...}, at: [<ffffffff8103b675>] run_timer_softirq+0x182/0x2a5

stack backtrace:
Pid: 0, comm: swapper Not tainted 2.6.35-rc3-cachefs+ #115
Call Trace:
 <IRQ>  [<ffffffff81054414>] print_circular_bug+0xae/0xbd
 [<ffffffff81055237>] validate_chain+0x727/0xd23
 [<ffffffff810560bc>] __lock_acquire+0x889/0x8fa
 [<ffffffff810539a7>] ? mark_lock+0x42f/0x51f
 [<ffffffff81056184>] lock_acquire+0x57/0x6d
 [<ffffffffa00200d4>] ? rxrpc_resend_time_expired+0x56/0x96 [af_rxrpc]
 [<ffffffff813e6b69>] _raw_read_lock_bh+0x34/0x43
 [<ffffffffa00200d4>] ? rxrpc_resend_time_expired+0x56/0x96 [af_rxrpc]
 [<ffffffffa00200d4>] rxrpc_resend_time_expired+0x56/0x96 [af_rxrpc]
 [<ffffffff8103b6e6>] run_timer_softirq+0x1f3/0x2a5
 [<ffffffff8103b675>] ? run_timer_softirq+0x182/0x2a5
 [<ffffffffa002007e>] ? rxrpc_resend_time_expired+0x0/0x96 [af_rxrpc]
 [<ffffffff810367ef>] ? __do_softirq+0x69/0x13e
 [<ffffffff81036828>] __do_softirq+0xa2/0x13e
 [<ffffffff81002dcc>] call_softirq+0x1c/0x28
 [<ffffffff810049f0>] do_softirq+0x38/0x80
 [<ffffffff810361a2>] irq_exit+0x45/0x47
 [<ffffffff81018fb3>] smp_apic_timer_interrupt+0x88/0x96
 [<ffffffff81002893>] apic_timer_interrupt+0x13/0x20
 <EOI>  [<ffffffff81049de1>] ? __atomic_notifier_call_chain+0x0/0x86
 [<ffffffff8100955b>] ? mwait_idle+0x6e/0x78
 [<ffffffff81009552>] ? mwait_idle+0x65/0x78
 [<ffffffff810011ac>] cpu_idle+0x4d/0x83
 [<ffffffff813e06f3>] start_secondary+0x1bd/0x1c1

Signed-off-by: David Howells <dhowells@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/rxrpc/ar-ack.c  | 3 +++
 net/rxrpc/ar-call.c | 6 ++----
 2 files changed, 5 insertions(+), 4 deletions(-)

(limited to 'net')

diff --git a/net/rxrpc/ar-ack.c b/net/rxrpc/ar-ack.c
index 2714da167fb8..b6ffe4e1b84a 100644
--- a/net/rxrpc/ar-ack.c
+++ b/net/rxrpc/ar-ack.c
@@ -245,6 +245,9 @@ static void rxrpc_resend_timer(struct rxrpc_call *call)
 	_enter("%d,%d,%d",
 	       call->acks_tail, call->acks_unacked, call->acks_head);
 
+	if (call->state >= RXRPC_CALL_COMPLETE)
+		return;
+
 	resend = 0;
 	resend_at = 0;
 
diff --git a/net/rxrpc/ar-call.c b/net/rxrpc/ar-call.c
index 909d092de9f4..bf656c230ba9 100644
--- a/net/rxrpc/ar-call.c
+++ b/net/rxrpc/ar-call.c
@@ -786,6 +786,7 @@ static void rxrpc_call_life_expired(unsigned long _call)
 
 /*
  * handle resend timer expiry
+ * - may not take call->state_lock as this can deadlock against del_timer_sync()
  */
 static void rxrpc_resend_time_expired(unsigned long _call)
 {
@@ -796,12 +797,9 @@ static void rxrpc_resend_time_expired(unsigned long _call)
 	if (call->state >= RXRPC_CALL_COMPLETE)
 		return;
 
-	read_lock_bh(&call->state_lock);
 	clear_bit(RXRPC_CALL_RUN_RTIMER, &call->flags);
-	if (call->state < RXRPC_CALL_COMPLETE &&
-	    !test_and_set_bit(RXRPC_CALL_RESEND_TIMER, &call->events))
+	if (!test_and_set_bit(RXRPC_CALL_RESEND_TIMER, &call->events))
 		rxrpc_queue_call(call);
-	read_unlock_bh(&call->state_lock);
 }
 
 /*
-- 
cgit v1.2.2


From d7100da026317fcf07411f765fe1cdb044053917 Mon Sep 17 00:00:00 2001
From: stephen hemminger <shemminger@vyatta.com>
Date: Wed, 4 Aug 2010 07:34:36 +0000
Subject: ppp: make channel_ops const

The PPP channel ops structure should be const.
Cleanup the declarations to use standard C99 format.

Signed-off-by: Stephen Hemminger <shemminger@vyatta.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/atm/pppoatm.c          | 2 +-
 net/irda/irnet/irnet_ppp.c | 2 +-
 net/l2tp/l2tp_ppp.c        | 5 ++++-
 3 files changed, 6 insertions(+), 3 deletions(-)

(limited to 'net')

diff --git a/net/atm/pppoatm.c b/net/atm/pppoatm.c
index e49bb6d948a1..e9aced0ec56b 100644
--- a/net/atm/pppoatm.c
+++ b/net/atm/pppoatm.c
@@ -260,7 +260,7 @@ static int pppoatm_devppp_ioctl(struct ppp_channel *chan, unsigned int cmd,
 	return -ENOTTY;
 }
 
-static /*const*/ struct ppp_channel_ops pppoatm_ops = {
+static const struct ppp_channel_ops pppoatm_ops = {
 	.start_xmit = pppoatm_send,
 	.ioctl = pppoatm_devppp_ioctl,
 };
diff --git a/net/irda/irnet/irnet_ppp.c b/net/irda/irnet/irnet_ppp.c
index 800bc53b7f63..dfe7b38dd4af 100644
--- a/net/irda/irnet/irnet_ppp.c
+++ b/net/irda/irnet/irnet_ppp.c
@@ -20,7 +20,7 @@
 /* Please put other headers in irnet.h - Thanks */
 
 /* Generic PPP callbacks (to call us) */
-static struct ppp_channel_ops irnet_ppp_ops = {
+static const struct ppp_channel_ops irnet_ppp_ops = {
 	.start_xmit = ppp_irnet_send,
 	.ioctl = ppp_irnet_ioctl
 };
diff --git a/net/l2tp/l2tp_ppp.c b/net/l2tp/l2tp_ppp.c
index 90d82b3f2889..ff954b3e94b6 100644
--- a/net/l2tp/l2tp_ppp.c
+++ b/net/l2tp/l2tp_ppp.c
@@ -135,7 +135,10 @@ struct pppol2tp_session {
 
 static int pppol2tp_xmit(struct ppp_channel *chan, struct sk_buff *skb);
 
-static struct ppp_channel_ops pppol2tp_chan_ops = { pppol2tp_xmit , NULL };
+static const struct ppp_channel_ops pppol2tp_chan_ops = {
+	.start_xmit =  pppol2tp_xmit,
+};
+
 static const struct proto_ops pppol2tp_ops;
 
 /* Helpers to obtain tunnel/session contexts from sockets.
-- 
cgit v1.2.2


From 1a4240f4764ac78adbf4b0ebb49b3bd8c72ffa11 Mon Sep 17 00:00:00 2001
From: Wang Lei <wang840925@gmail.com>
Date: Wed, 4 Aug 2010 15:16:33 +0100
Subject: DNS: Separate out CIFS DNS Resolver code

Separate out the DNS resolver key type from the CIFS filesystem into its own
module so that it can be made available for general use, including the AFS
filesystem module.

This facility makes it possible for the kernel to upcall to userspace to have
it issue DNS requests, package up the replies and present them to the kernel
in a useful form.  The kernel is then able to cache the DNS replies as keys
can be retained in keyrings.

Resolver keys are of type "dns_resolver" and have a case-insensitive
description that is of the form "[<type>:]<domain_name>".  The optional <type>
indicates the particular DNS lookup and packaging that's required.  The
<domain_name> is the query to be made.

If <type> isn't given, a basic hostname to IP address lookup is made, and the
result is stored in the key in the form of a printable string consisting of a
comma-separated list of IPv4 and IPv6 addresses.

This key type is supported by userspace helpers driven from /sbin/request-key
and configured through /etc/request-key.conf.  The cifs.upcall utility is
invoked for UNC path server name to IP address resolution.

The CIFS functionality is encapsulated by the dns_resolve_unc_to_ip() function,
which is used to resolve a UNC path to an IP address for CIFS filesystem.  This
part remains in the CIFS module for now.

See the added Documentation/networking/dns_resolver.txt for more information.

Signed-off-by: Wang Lei <wang840925@gmail.com>
Signed-off-by: David Howells <dhowells@redhat.com>
Acked-by: Jeff Layton <jlayton@redhat.com>
Signed-off-by: Steve French <sfrench@us.ibm.com>
---
 net/Kconfig                  |   1 +
 net/Makefile                 |   1 +
 net/dns_resolver/Kconfig     |  27 ++++++
 net/dns_resolver/Makefile    |   7 ++
 net/dns_resolver/dns_key.c   | 210 +++++++++++++++++++++++++++++++++++++++++++
 net/dns_resolver/dns_query.c | 159 ++++++++++++++++++++++++++++++++
 net/dns_resolver/internal.h  |  44 +++++++++
 7 files changed, 449 insertions(+)
 create mode 100644 net/dns_resolver/Kconfig
 create mode 100644 net/dns_resolver/Makefile
 create mode 100644 net/dns_resolver/dns_key.c
 create mode 100644 net/dns_resolver/dns_query.c
 create mode 100644 net/dns_resolver/internal.h

(limited to 'net')

diff --git a/net/Kconfig b/net/Kconfig
index e24fa0873f32..e330594d3709 100644
--- a/net/Kconfig
+++ b/net/Kconfig
@@ -213,6 +213,7 @@ source "net/phonet/Kconfig"
 source "net/ieee802154/Kconfig"
 source "net/sched/Kconfig"
 source "net/dcb/Kconfig"
+source "net/dns_resolver/Kconfig"
 
 config RPS
 	boolean
diff --git a/net/Makefile b/net/Makefile
index 41d420070a38..ea60fbce9b1b 100644
--- a/net/Makefile
+++ b/net/Makefile
@@ -67,3 +67,4 @@ ifeq ($(CONFIG_NET),y)
 obj-$(CONFIG_SYSCTL)		+= sysctl_net.o
 endif
 obj-$(CONFIG_WIMAX)		+= wimax/
+obj-$(CONFIG_DNS_RESOLVER)	+= dns_resolver/
diff --git a/net/dns_resolver/Kconfig b/net/dns_resolver/Kconfig
new file mode 100644
index 000000000000..2ec47cb5d0d6
--- /dev/null
+++ b/net/dns_resolver/Kconfig
@@ -0,0 +1,27 @@
+#
+# Configuration for DNS Resolver
+#
+config DNS_RESOLVER
+	tristate "DNS Resolver support"
+	depends on NET && KEYS
+	help
+	  Saying Y here will include support for the DNS Resolver key type
+	  which can be used to make upcalls to perform DNS lookups in
+	  userspace.
+
+	  DNS Resolver is used to query DNS server for information.  Examples
+	  being resolving a UNC hostname element to an IP address for CIFS or
+	  performing a DNS query for AFSDB records so that AFS can locate a
+	  cell's volume location database servers.
+
+	  DNS Resolver is used by the CIFS and AFS modules, and would support
+	  samba4 later.  DNS Resolver is supported by the userspace upcall
+	  helper "/sbin/dns.resolver" via /etc/request-key.conf.
+
+	  See <file:Documentation/networking/dns_resolver.txt> for further
+	  information.
+
+	  To compile this as a module, choose M here: the module will be called
+	  dnsresolver.
+
+	  If unsure, say N.
diff --git a/net/dns_resolver/Makefile b/net/dns_resolver/Makefile
new file mode 100644
index 000000000000..c0ef4e71dc49
--- /dev/null
+++ b/net/dns_resolver/Makefile
@@ -0,0 +1,7 @@
+#
+# Makefile for the Linux DNS Resolver.
+#
+
+obj-$(CONFIG_DNS_RESOLVER) += dns_resolver.o
+
+dns_resolver-objs :=  dns_key.o dns_query.o
diff --git a/net/dns_resolver/dns_key.c b/net/dns_resolver/dns_key.c
new file mode 100644
index 000000000000..1b1b411adcf1
--- /dev/null
+++ b/net/dns_resolver/dns_key.c
@@ -0,0 +1,210 @@
+/* Key type used to cache DNS lookups made by the kernel
+ *
+ * See Documentation/networking/dns_resolver.txt
+ *
+ *   Copyright (c) 2007 Igor Mammedov
+ *   Author(s): Igor Mammedov (niallain@gmail.com)
+ *              Steve French (sfrench@us.ibm.com)
+ *              Wang Lei (wang840925@gmail.com)
+ *		David Howells (dhowells@redhat.com)
+ *
+ *   This library is free software; you can redistribute it and/or modify
+ *   it under the terms of the GNU Lesser General Public License as published
+ *   by the Free Software Foundation; either version 2.1 of the License, or
+ *   (at your option) any later version.
+ *
+ *   This library is distributed in the hope that it will be useful,
+ *   but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See
+ *   the GNU Lesser General Public License for more details.
+ *
+ *   You should have received a copy of the GNU Lesser General Public License
+ *   along with this library; if not, write to the Free Software
+ *   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+#include <linux/module.h>
+#include <linux/moduleparam.h>
+#include <linux/slab.h>
+#include <linux/string.h>
+#include <linux/kernel.h>
+#include <linux/keyctl.h>
+#include <keys/dns_resolver-type.h>
+#include <keys/user-type.h>
+#include "internal.h"
+
+MODULE_DESCRIPTION("DNS Resolver");
+MODULE_AUTHOR("Wang Lei");
+MODULE_LICENSE("GPL");
+
+unsigned dns_resolver_debug;
+module_param_named(debug, dns_resolver_debug, uint, S_IWUSR | S_IRUGO);
+MODULE_PARM_DESC(debug, "DNS Resolver debugging mask");
+
+const struct cred *dns_resolver_cache;
+
+/*
+ * Instantiate a user defined key for dns_resolver.
+ *
+ * The data must be a NUL-terminated string, with the NUL char accounted in
+ * datalen.
+ *
+ * If the data contains a '#' characters, then we take the clause after each
+ * one to be an option of the form 'key=value'.  The actual data of interest is
+ * the string leading up to the first '#'.  For instance:
+ *
+ *        "ip1,ip2,...#foo=bar"
+ */
+static int
+dns_resolver_instantiate(struct key *key, const void *_data, size_t datalen)
+{
+	struct user_key_payload *upayload;
+	int ret;
+	size_t result_len = 0;
+	const char *data = _data, *opt;
+
+	kenter("%%%d,%s,'%s',%zu",
+	       key->serial, key->description, data, datalen);
+
+	if (datalen <= 1 || !data || data[datalen - 1] != '\0')
+		return -EINVAL;
+	datalen--;
+
+	/* deal with any options embedded in the data */
+	opt = memchr(data, '#', datalen);
+	if (!opt) {
+		kdebug("no options currently supported");
+		return -EINVAL;
+	}
+
+	result_len = datalen;
+	ret = key_payload_reserve(key, result_len);
+	if (ret < 0)
+		return -EINVAL;
+
+	upayload = kmalloc(sizeof(*upayload) + result_len + 1, GFP_KERNEL);
+	if (!upayload) {
+		kleave(" = -ENOMEM");
+		return -ENOMEM;
+	}
+
+	upayload->datalen = result_len;
+	memcpy(upayload->data, data, result_len);
+	upayload->data[result_len] = '\0';
+	rcu_assign_pointer(key->payload.data, upayload);
+
+	kleave(" = 0");
+	return 0;
+}
+
+/*
+ * The description is of the form "[<type>:]<domain_name>"
+ *
+ * The domain name may be a simple name or an absolute domain name (which
+ * should end with a period).  The domain name is case-independent.
+ */
+static int
+dns_resolver_match(const struct key *key, const void *description)
+{
+	int slen, dlen, ret = 0;
+	const char *src = key->description, *dsp = description;
+
+	kenter("%s,%s", src, dsp);
+
+	if (!src || !dsp)
+		goto no_match;
+
+	if (strcasecmp(src, dsp) == 0)
+		goto matched;
+
+	slen = strlen(src);
+	dlen = strlen(dsp);
+	if (slen <= 0 || dlen <= 0)
+		goto no_match;
+	if (src[slen - 1] == '.')
+		slen--;
+	if (dsp[dlen - 1] == '.')
+		dlen--;
+	if (slen != dlen || strncasecmp(src, dsp, slen) != 0)
+		goto no_match;
+
+matched:
+	ret = 1;
+no_match:
+	kleave(" = %d", ret);
+	return ret;
+}
+
+struct key_type key_type_dns_resolver = {
+	.name		= "dns_resolver",
+	.instantiate	= dns_resolver_instantiate,
+	.match		= dns_resolver_match,
+	.revoke		= user_revoke,
+	.destroy	= user_destroy,
+	.describe	= user_describe,
+	.read		= user_read,
+};
+
+static int __init init_dns_resolver(void)
+{
+	struct cred *cred;
+	struct key *keyring;
+	int ret;
+
+	printk(KERN_NOTICE "Registering the %s key type\n",
+	       key_type_dns_resolver.name);
+
+	/* create an override credential set with a special thread keyring in
+	 * which DNS requests are cached
+	 *
+	 * this is used to prevent malicious redirections from being installed
+	 * with add_key().
+	 */
+	cred = prepare_kernel_cred(NULL);
+	if (!cred)
+		return -ENOMEM;
+
+	keyring = key_alloc(&key_type_keyring, ".dns_resolver", 0, 0, cred,
+			    (KEY_POS_ALL & ~KEY_POS_SETATTR) |
+			    KEY_USR_VIEW | KEY_USR_READ,
+			    KEY_ALLOC_NOT_IN_QUOTA);
+	if (IS_ERR(keyring)) {
+		ret = PTR_ERR(keyring);
+		goto failed_put_cred;
+	}
+
+	ret = key_instantiate_and_link(keyring, NULL, 0, NULL, NULL);
+	if (ret < 0)
+		goto failed_put_key;
+
+	ret = register_key_type(&key_type_dns_resolver);
+	if (ret < 0)
+		goto failed_put_key;
+
+	/* instruct request_key() to use this special keyring as a cache for
+	 * the results it looks up */
+	cred->thread_keyring = keyring;
+	cred->jit_keyring = KEY_REQKEY_DEFL_THREAD_KEYRING;
+	dns_resolver_cache = cred;
+
+	kdebug("DNS resolver keyring: %d\n", key_serial(keyring));
+	return 0;
+
+failed_put_key:
+	key_put(keyring);
+failed_put_cred:
+	put_cred(cred);
+	return ret;
+}
+
+static void __exit exit_dns_resolver(void)
+{
+	key_revoke(dns_resolver_cache->thread_keyring);
+	unregister_key_type(&key_type_dns_resolver);
+	put_cred(dns_resolver_cache);
+	printk(KERN_NOTICE "Unregistered %s key type\n",
+	       key_type_dns_resolver.name);
+}
+
+module_init(init_dns_resolver)
+module_exit(exit_dns_resolver)
+MODULE_LICENSE("GPL");
diff --git a/net/dns_resolver/dns_query.c b/net/dns_resolver/dns_query.c
new file mode 100644
index 000000000000..6c0cf31ea00d
--- /dev/null
+++ b/net/dns_resolver/dns_query.c
@@ -0,0 +1,159 @@
+/* Upcall routine, designed to work as a key type and working through
+ * /sbin/request-key to contact userspace when handling DNS queries.
+ *
+ * See Documentation/networking/dns_resolver.txt
+ *
+ *   Copyright (c) 2007 Igor Mammedov
+ *   Author(s): Igor Mammedov (niallain@gmail.com)
+ *              Steve French (sfrench@us.ibm.com)
+ *              Wang Lei (wang840925@gmail.com)
+ *		David Howells (dhowells@redhat.com)
+ *
+ *   The upcall wrapper used to make an arbitrary DNS query.
+ *
+ *   This function requires the appropriate userspace tool dns.upcall to be
+ *   installed and something like the following lines should be added to the
+ *   /etc/request-key.conf file:
+ *
+ *	create dns_resolver * * /sbin/dns.upcall %k
+ *
+ *   For example to use this module to query AFSDB RR:
+ *
+ *	create dns_resolver afsdb:* * /sbin/dns.afsdb %k
+ *
+ *   This library is free software; you can redistribute it and/or modify
+ *   it under the terms of the GNU Lesser General Public License as published
+ *   by the Free Software Foundation; either version 2.1 of the License, or
+ *   (at your option) any later version.
+ *
+ *   This library is distributed in the hope that it will be useful,
+ *   but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See
+ *   the GNU Lesser General Public License for more details.
+ *
+ *   You should have received a copy of the GNU Lesser General Public License
+ *   along with this library; if not, write to the Free Software
+ *   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <linux/dns_resolver.h>
+#include <keys/dns_resolver-type.h>
+#include <keys/user-type.h>
+
+#include "internal.h"
+
+/*
+ * dns_query - Query the DNS
+ * @type: Query type (or NULL for straight host->IP lookup)
+ * @name: Name to look up
+ * @namelen: Length of name
+ * @options: Request options (or NULL if no options)
+ * @_result: Where to place the returned data.
+ * @_expiry: Where to store the result expiry time (or NULL)
+ *
+ * The data will be returned in the pointer at *result, and the caller is
+ * responsible for freeing it.
+ *
+ * The description should be of the form "[<query_type>:]<domain_name>", and
+ * the options need to be appropriate for the query type requested.  If no
+ * query_type is given, then the query is a straight hostname to IP address
+ * lookup.
+ *
+ * The DNS resolution lookup is performed by upcalling to userspace by way of
+ * requesting a key of type dns_resolver.
+ *
+ * Returns the size of the result on success, -ve error code otherwise.
+ */
+int dns_query(const char *type, const char *name, size_t namelen,
+	      const char *options, char **_result, time_t *_expiry)
+{
+	struct key *rkey;
+	struct user_key_payload *upayload;
+	const struct cred *saved_cred;
+	size_t typelen, desclen;
+	char *desc, *cp;
+	int ret, len;
+
+	kenter("%s,%*.*s,%zu,%s",
+	       type, (int)namelen, (int)namelen, name, namelen, options);
+
+	if (!name || namelen == 0 || !_result)
+		return -EINVAL;
+
+	/* construct the query key description as "[<type>:]<name>" */
+	typelen = 0;
+	desclen = 0;
+	if (type) {
+		typelen = strlen(type);
+		if (typelen < 1)
+			return -EINVAL;
+		desclen += typelen + 1;
+	}
+
+	if (!namelen)
+		namelen = strlen(name);
+	if (namelen < 3)
+		return -EINVAL;
+	desclen += namelen + 1;
+
+	desc = kmalloc(desclen, GFP_KERNEL);
+	if (!desc)
+		return -ENOMEM;
+
+	cp = desc;
+	if (type) {
+		memcpy(cp, type, typelen);
+		cp += typelen;
+		*cp++ = ':';
+	}
+	memcpy(cp, name, namelen);
+	cp += namelen;
+	*cp = '\0';
+
+	if (!options)
+		options = "";
+	kdebug("call request_key(,%s,%s)", desc, options);
+
+	/* make the upcall, using special credentials to prevent the use of
+	 * add_key() to preinstall malicious redirections
+	 */
+	saved_cred = override_creds(dns_resolver_cache);
+	rkey = request_key(&key_type_dns_resolver, desc, options);
+	revert_creds(saved_cred);
+	kfree(desc);
+	if (IS_ERR(rkey)) {
+		ret = PTR_ERR(rkey);
+		goto out;
+	}
+
+	down_read(&rkey->sem);
+	rkey->perm |= KEY_USR_VIEW;
+
+	ret = key_validate(rkey);
+	if (ret < 0)
+		goto put;
+
+	upayload = rcu_dereference_protected(rkey->payload.data,
+					     lockdep_is_held(&rkey->sem));
+	len = upayload->datalen;
+
+	ret = -ENOMEM;
+	*_result = kmalloc(len + 1, GFP_KERNEL);
+	if (!*_result)
+		goto put;
+
+	memcpy(*_result, upayload->data, len + 1);
+	if (_expiry)
+		*_expiry = rkey->expiry;
+
+	ret = len;
+put:
+	up_read(&rkey->sem);
+	key_put(rkey);
+out:
+	kleave(" = %d", ret);
+	return ret;
+}
+EXPORT_SYMBOL(dns_query);
diff --git a/net/dns_resolver/internal.h b/net/dns_resolver/internal.h
new file mode 100644
index 000000000000..189ca9e9b785
--- /dev/null
+++ b/net/dns_resolver/internal.h
@@ -0,0 +1,44 @@
+/*
+ *   Copyright (c) 2010 Wang Lei
+ *   Author(s): Wang Lei (wang840925@gmail.com). All Rights Reserved.
+ *
+ *   Internal DNS Rsolver stuff
+ *
+ *   This library is free software; you can redistribute it and/or modify
+ *   it under the terms of the GNU Lesser General Public License as published
+ *   by the Free Software Foundation; either version 2.1 of the License, or
+ *   (at your option) any later version.
+ *
+ *   This library is distributed in the hope that it will be useful,
+ *   but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See
+ *   the GNU Lesser General Public License for more details.
+ *
+ *   You should have received a copy of the GNU Lesser General Public License
+ *   along with this library; if not, write to the Free Software
+ *   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#include <linux/compiler.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+
+/*
+ * dns_key.c
+ */
+extern const struct cred *dns_resolver_cache;
+
+/*
+ * debug tracing
+ */
+extern unsigned dns_resolver_debug;
+
+#define	kdebug(FMT, ...)				\
+do {							\
+	if (unlikely(dns_resolver_debug))		\
+		printk(KERN_DEBUG "[%-6.6s] "FMT"\n",	\
+		       current->comm, ##__VA_ARGS__);	\
+} while (0)
+
+#define kenter(FMT, ...) kdebug("==> %s("FMT")", __func__, ##__VA_ARGS__)
+#define kleave(FMT, ...) kdebug("<== %s()"FMT"", __func__, ##__VA_ARGS__)
-- 
cgit v1.2.2


From ce9e76c8450fc248d3e1fc16ef05e6eb50c02fa5 Mon Sep 17 00:00:00 2001
From: Jarek Poplawski <jarkao2@gmail.com>
Date: Thu, 5 Aug 2010 01:19:11 +0000
Subject: net: Fix napi_gro_frags vs netpoll path

The netpoll_rx_on() check in __napi_gro_receive() skips part of the
"common" GRO_NORMAL path, especially "pull:" in dev_gro_receive(),
where at least eth header should be copied for entirely paged skbs.

Signed-off-by: Jarek Poplawski <jarkao2@gmail.com>
Acked-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/dev.c | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

(limited to 'net')

diff --git a/net/core/dev.c b/net/core/dev.c
index e1c1cdcc2bb0..2b508966146b 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -3072,7 +3072,7 @@ enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
 	int mac_len;
 	enum gro_result ret;
 
-	if (!(skb->dev->features & NETIF_F_GRO))
+	if (!(skb->dev->features & NETIF_F_GRO) || netpoll_rx_on(skb))
 		goto normal;
 
 	if (skb_is_gso(skb) || skb_has_frags(skb))
@@ -3159,9 +3159,6 @@ __napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
 {
 	struct sk_buff *p;
 
-	if (netpoll_rx_on(skb))
-		return GRO_NORMAL;
-
 	for (p = napi->gro_list; p; p = p->next) {
 		NAPI_GRO_CB(p)->same_flow =
 			(p->dev == skb->dev) &&
-- 
cgit v1.2.2


From af352fe960263fbab3252be0ba6d4e0a27e62f5d Mon Sep 17 00:00:00 2001
From: Stephen Rothwell <sfr@canb.auug.org.au>
Date: Fri, 6 Aug 2010 03:13:47 +0100
Subject: cifs: Include linux/err.h for IS_ERR and PTR_ERR

Fixes build errors:

net/dns_resolver/dns_key.c: In function 'init_dns_resolver':
net/dns_resolver/dns_key.c:170: error: implicit declaration of function 'IS_ERR'
net/dns_resolver/dns_key.c:171: error: implicit declaration of function 'PTR_ERR'
net/dns_resolver/dns_query.c: In function 'dns_query':
net/dns_resolver/dns_query.c:126: error: implicit declaration of function 'IS_ERR'
net/dns_resolver/dns_query.c:127: error: implicit declaration of function 'PTR_ERR'

Signed-off-by: Stephen Rothwell <sfr@canb.auug.org.au>
Signed-off-by: David Howells <dhowells@redhat.com>
Signed-off-by: Steve French <sfrench@us.ibm.com>
---
 net/dns_resolver/dns_key.c   | 1 +
 net/dns_resolver/dns_query.c | 1 +
 2 files changed, 2 insertions(+)

(limited to 'net')

diff --git a/net/dns_resolver/dns_key.c b/net/dns_resolver/dns_key.c
index 1b1b411adcf1..400a04d5c9a1 100644
--- a/net/dns_resolver/dns_key.c
+++ b/net/dns_resolver/dns_key.c
@@ -28,6 +28,7 @@
 #include <linux/string.h>
 #include <linux/kernel.h>
 #include <linux/keyctl.h>
+#include <linux/err.h>
 #include <keys/dns_resolver-type.h>
 #include <keys/user-type.h>
 #include "internal.h"
diff --git a/net/dns_resolver/dns_query.c b/net/dns_resolver/dns_query.c
index 6c0cf31ea00d..b33e1f7ff48d 100644
--- a/net/dns_resolver/dns_query.c
+++ b/net/dns_resolver/dns_query.c
@@ -39,6 +39,7 @@
 #include <linux/module.h>
 #include <linux/slab.h>
 #include <linux/dns_resolver.h>
+#include <linux/err.h>
 #include <keys/dns_resolver-type.h>
 #include <keys/user-type.h>
 
-- 
cgit v1.2.2


From ff9517a68792bb363c16f4e3155c4fc5a7f9d738 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Fri, 6 Aug 2010 03:13:52 +0100
Subject: DNS: Fixes for the DNS query module

Fixes for the DNS query module, including:

 (1) Use 'negative' instead of '-ve' in the documentation.

 (2) Mark the kdoc comment with '/**' on dns_query().

Reported-by: Randy Dunlap <randy.dunlap@oracle.com>
Signed-off-by: David Howells <dhowells@redhat.com>
Signed-off-by: Steve French <sfrench@us.ibm.com>
---
 net/dns_resolver/dns_query.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/dns_resolver/dns_query.c b/net/dns_resolver/dns_query.c
index b33e1f7ff48d..03d5255f5cf2 100644
--- a/net/dns_resolver/dns_query.c
+++ b/net/dns_resolver/dns_query.c
@@ -45,7 +45,7 @@
 
 #include "internal.h"
 
-/*
+/**
  * dns_query - Query the DNS
  * @type: Query type (or NULL for straight host->IP lookup)
  * @name: Name to look up
-- 
cgit v1.2.2


From 5227bbb008fa6d2efddd86170bdfac2020cf571d Mon Sep 17 00:00:00 2001
From: Steve French <sfrench@us.ibm.com>
Date: Fri, 6 Aug 2010 03:18:09 +0000
Subject: [DNS RESOLVER] Minor typo correction

CC: Dave Howells <dhowells@redhat.com>
Signed-off-by: Steve French <sfrench@us.ibm.com>
---
 net/dns_resolver/Kconfig | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/dns_resolver/Kconfig b/net/dns_resolver/Kconfig
index 2ec47cb5d0d6..50d49f7e0472 100644
--- a/net/dns_resolver/Kconfig
+++ b/net/dns_resolver/Kconfig
@@ -15,7 +15,7 @@ config DNS_RESOLVER
 	  cell's volume location database servers.
 
 	  DNS Resolver is used by the CIFS and AFS modules, and would support
-	  samba4 later.  DNS Resolver is supported by the userspace upcall
+	  SMB2 later.  DNS Resolver is supported by the userspace upcall
 	  helper "/sbin/dns.resolver" via /etc/request-key.conf.
 
 	  See <file:Documentation/networking/dns_resolver.txt> for further
-- 
cgit v1.2.2


From e2aa7f8304b3b5656ded8699216cc65138d03b64 Mon Sep 17 00:00:00 2001
From: Andrea Gelmini <andrea.gelmini@gelma.net>
Date: Thu, 5 Aug 2010 15:51:36 +0200
Subject: net: sunrpc: removed duplicated #include

Signed-off-by: Andrea Gelmini <andrea.gelmini@gelma.net>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 net/sunrpc/cache.c | 1 -
 1 file changed, 1 deletion(-)

(limited to 'net')

diff --git a/net/sunrpc/cache.c b/net/sunrpc/cache.c
index 939d048ef92b..2b06410e584e 100644
--- a/net/sunrpc/cache.c
+++ b/net/sunrpc/cache.c
@@ -34,7 +34,6 @@
 #include <linux/sunrpc/cache.h>
 #include <linux/sunrpc/stats.h>
 #include <linux/sunrpc/rpc_pipe_fs.h>
-#include <linux/smp_lock.h>
 
 #define	 RPCDBG_FACILITY RPCDBG_CACHE
 
-- 
cgit v1.2.2


From ba78e2ddca844598c0efcbf2c76d73519a61b902 Mon Sep 17 00:00:00 2001
From: Dmitry Popov <dp@highloadlab.com>
Date: Sat, 7 Aug 2010 20:24:28 -0700
Subject: tcp: no md5sig option size check bug

tcp_parse_md5sig_option doesn't check md5sig option (TCPOPT_MD5SIG)
length, but tcp_v[46]_inbound_md5_hash assume that it's at least 16
bytes long.

Signed-off-by: Dmitry Popov <dp@highloadlab.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp_input.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 3c426cb318e7..e663b78a2ef6 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -3930,7 +3930,7 @@ u8 *tcp_parse_md5sig_option(struct tcphdr *th)
 			if (opsize < 2 || opsize > length)
 				return NULL;
 			if (opcode == TCPOPT_MD5SIG)
-				return ptr;
+				return opsize == TCPOLEN_MD5SIG ? ptr : NULL;
 		}
 		ptr += opsize - 2;
 		length -= opsize;
-- 
cgit v1.2.2


From cece1945bffcf1a823cdfa36669beae118419351 Mon Sep 17 00:00:00 2001
From: Changli Gao <xiaosuo@gmail.com>
Date: Sat, 7 Aug 2010 20:35:43 -0700
Subject: net: disable preemption before call smp_processor_id()

Although netif_rx() isn't expected to be called in process context with
preemption enabled, it'd better handle this case. And this is why get_cpu()
is used in the non-RPS #ifdef branch. If tree RCU is selected,
rcu_read_lock() won't disable preemption, so preempt_disable() should be
called explictly.

Signed-off-by: Changli Gao <xiaosuo@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/dev.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'net')

diff --git a/net/core/dev.c b/net/core/dev.c
index 2b508966146b..1ae654391442 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -2517,6 +2517,7 @@ int netif_rx(struct sk_buff *skb)
 		struct rps_dev_flow voidflow, *rflow = &voidflow;
 		int cpu;
 
+		preempt_disable();
 		rcu_read_lock();
 
 		cpu = get_rps_cpu(skb->dev, skb, &rflow);
@@ -2526,6 +2527,7 @@ int netif_rx(struct sk_buff *skb)
 		ret = enqueue_to_backlog(skb, cpu, &rflow->last_qtail);
 
 		rcu_read_unlock();
+		preempt_enable();
 	}
 #else
 	{
-- 
cgit v1.2.2


From eb4a5527b1f0d581ac217c80ef3278ed5e38693c Mon Sep 17 00:00:00 2001
From: Jarek Poplawski <jarkao2@gmail.com>
Date: Fri, 6 Aug 2010 00:22:35 +0000
Subject: pkt_sched: Fix sch_sfq vs tcf_bind_filter oops

Since there was added ->tcf_chain() method without ->bind_tcf() to
sch_sfq class options, there is oops when a filter is added with
the classid parameter.

Fixes commit 7d2681a6ff4f9ab5e48d02550b4c6338f1638998
netdev thread: null pointer at cls_api.c

Signed-off-by: Jarek Poplawski <jarkao2@gmail.com>
Reported-by: Franchoze Eric <franchoze@yandex.ru>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/sch_sfq.c | 7 +++++++
 1 file changed, 7 insertions(+)

(limited to 'net')

diff --git a/net/sched/sch_sfq.c b/net/sched/sch_sfq.c
index e85352b5c88d..534f33231c17 100644
--- a/net/sched/sch_sfq.c
+++ b/net/sched/sch_sfq.c
@@ -513,6 +513,12 @@ static unsigned long sfq_get(struct Qdisc *sch, u32 classid)
 	return 0;
 }
 
+static unsigned long sfq_bind(struct Qdisc *sch, unsigned long parent,
+			      u32 classid)
+{
+	return 0;
+}
+
 static struct tcf_proto **sfq_find_tcf(struct Qdisc *sch, unsigned long cl)
 {
 	struct sfq_sched_data *q = qdisc_priv(sch);
@@ -567,6 +573,7 @@ static void sfq_walk(struct Qdisc *sch, struct qdisc_walker *arg)
 static const struct Qdisc_class_ops sfq_class_ops = {
 	.get		=	sfq_get,
 	.tcf_chain	=	sfq_find_tcf,
+	.bind_tcf	=	sfq_bind,
 	.dump		=	sfq_dump_class,
 	.dump_stats	=	sfq_dump_class_stats,
 	.walk		=	sfq_walk,
-- 
cgit v1.2.2


From fe100acddf438591ecf3582cb57241e560da70b7 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Mon, 9 Aug 2010 15:52:03 +0200
Subject: cfg80211: fix locking in action frame TX

Accesses to "wdev->current_bss" must be
locked with the wdev lock, which action
frame transmission is missing.

Cc: stable@kernel.org [2.6.33+]
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/wireless/mlme.c | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/wireless/mlme.c b/net/wireless/mlme.c
index e74a1a2119d3..d1a3fb99fdf2 100644
--- a/net/wireless/mlme.c
+++ b/net/wireless/mlme.c
@@ -843,13 +843,19 @@ int cfg80211_mlme_action(struct cfg80211_registered_device *rdev,
 		return -EINVAL;
 	if (mgmt->u.action.category != WLAN_CATEGORY_PUBLIC) {
 		/* Verify that we are associated with the destination AP */
+		wdev_lock(wdev);
+
 		if (!wdev->current_bss ||
 		    memcmp(wdev->current_bss->pub.bssid, mgmt->bssid,
 			   ETH_ALEN) != 0 ||
 		    (wdev->iftype == NL80211_IFTYPE_STATION &&
 		     memcmp(wdev->current_bss->pub.bssid, mgmt->da,
-			    ETH_ALEN) != 0))
+			    ETH_ALEN) != 0)) {
+			wdev_unlock(wdev);
 			return -ENOTCONN;
+		}
+
+		wdev_unlock(wdev);
 	}
 
 	if (memcmp(mgmt->sa, dev->dev_addr, ETH_ALEN) != 0)
-- 
cgit v1.2.2


From da7115d94a15f53efa224e47f16c57fd1998355f Mon Sep 17 00:00:00 2001
From: Jarek Poplawski <jarkao2@gmail.com>
Date: Mon, 9 Aug 2010 12:18:17 +0000
Subject: pkt_sched: sch_sfq: Add dummy unbind_tcf and put handles. Was:
 [PATCH] sfq: add dummy bind/unbind handles

Add dummy .unbind_tcf and .put qdisc class ops for easier verification.
(All other schedulers have it like this.)

Signed-off-by: Jarek Poplawski <jarkao2@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/sch_sfq.c | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'net')

diff --git a/net/sched/sch_sfq.c b/net/sched/sch_sfq.c
index 534f33231c17..d8e0ae5fa16a 100644
--- a/net/sched/sch_sfq.c
+++ b/net/sched/sch_sfq.c
@@ -519,6 +519,10 @@ static unsigned long sfq_bind(struct Qdisc *sch, unsigned long parent,
 	return 0;
 }
 
+static void sfq_put(struct Qdisc *q, unsigned long cl)
+{
+}
+
 static struct tcf_proto **sfq_find_tcf(struct Qdisc *sch, unsigned long cl)
 {
 	struct sfq_sched_data *q = qdisc_priv(sch);
@@ -572,8 +576,10 @@ static void sfq_walk(struct Qdisc *sch, struct qdisc_walker *arg)
 
 static const struct Qdisc_class_ops sfq_class_ops = {
 	.get		=	sfq_get,
+	.put		=	sfq_put,
 	.tcf_chain	=	sfq_find_tcf,
 	.bind_tcf	=	sfq_bind,
+	.unbind_tcf	=	sfq_put,
 	.dump		=	sfq_dump_class,
 	.dump_stats	=	sfq_dump_class_stats,
 	.walk		=	sfq_walk,
-- 
cgit v1.2.2


From 68fd26b59856b466edd14d8a90d01255983cd3ee Mon Sep 17 00:00:00 2001
From: Jarek Poplawski <jarkao2@gmail.com>
Date: Mon, 9 Aug 2010 12:18:48 +0000
Subject: pkt_sched: Add some basic qdisc class ops verification. Was: [PATCH]
 sfq: add dummy bind/unbind handles

Verify in register_qdisc() some basic qdisc class handlers are present.

Signed-off-by: Jarek Poplawski <jarkao2@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/sch_api.c | 22 +++++++++++++++++-----
 1 file changed, 17 insertions(+), 5 deletions(-)

(limited to 'net')

diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c
index b9e8c3b7d406..8ed2f5649029 100644
--- a/net/sched/sch_api.c
+++ b/net/sched/sch_api.c
@@ -150,22 +150,34 @@ int register_qdisc(struct Qdisc_ops *qops)
 	if (qops->enqueue == NULL)
 		qops->enqueue = noop_qdisc_ops.enqueue;
 	if (qops->peek == NULL) {
-		if (qops->dequeue == NULL) {
+		if (qops->dequeue == NULL)
 			qops->peek = noop_qdisc_ops.peek;
-		} else {
-			rc = -EINVAL;
-			goto out;
-		}
+		else
+			goto out_einval;
 	}
 	if (qops->dequeue == NULL)
 		qops->dequeue = noop_qdisc_ops.dequeue;
 
+	if (qops->cl_ops) {
+		const struct Qdisc_class_ops *cops = qops->cl_ops;
+
+		if (!(cops->get && cops->put))
+			goto out_einval;
+
+		if (cops->tcf_chain && !(cops->bind_tcf && cops->unbind_tcf))
+			goto out_einval;
+	}
+
 	qops->next = NULL;
 	*qp = qops;
 	rc = 0;
 out:
 	write_unlock(&qdisc_mod_lock);
 	return rc;
+
+out_einval:
+	rc = -EINVAL;
+	goto out;
 }
 EXPORT_SYMBOL(register_qdisc);
 
-- 
cgit v1.2.2


From 9871e50edd25e2adf69b369817100821cb1e6de8 Mon Sep 17 00:00:00 2001
From: Ben Greear <greearb@candelatech.com>
Date: Tue, 10 Aug 2010 01:45:40 -0700
Subject: net: Use NET_XMIT_SUCCESS where possible.

This is based on work originally done by Patric McHardy.

Signed-off-by: Ben Greear <greearb@candelatech.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/sch_atm.c  | 4 ++--
 net/sched/sch_sfq.c  | 2 +-
 net/sched/sch_tbf.c  | 4 ++--
 net/sched/sch_teql.c | 2 +-
 4 files changed, 6 insertions(+), 6 deletions(-)

(limited to 'net')

diff --git a/net/sched/sch_atm.c b/net/sched/sch_atm.c
index e114f23d5eae..340662789529 100644
--- a/net/sched/sch_atm.c
+++ b/net/sched/sch_atm.c
@@ -418,7 +418,7 @@ static int atm_tc_enqueue(struct sk_buff *skb, struct Qdisc *sch)
 	}
 
 	ret = qdisc_enqueue(skb, flow->q);
-	if (ret != 0) {
+	if (ret != NET_XMIT_SUCCESS) {
 drop: __maybe_unused
 		if (net_xmit_drop_count(ret)) {
 			sch->qstats.drops++;
@@ -442,7 +442,7 @@ drop: __maybe_unused
 	 */
 	if (flow == &p->link) {
 		sch->q.qlen++;
-		return 0;
+		return NET_XMIT_SUCCESS;
 	}
 	tasklet_schedule(&p->task);
 	return NET_XMIT_SUCCESS | __NET_XMIT_BYPASS;
diff --git a/net/sched/sch_sfq.c b/net/sched/sch_sfq.c
index d8e0ae5fa16a..b8bcb2096df8 100644
--- a/net/sched/sch_sfq.c
+++ b/net/sched/sch_sfq.c
@@ -334,7 +334,7 @@ sfq_enqueue(struct sk_buff *skb, struct Qdisc *sch)
 	if (++sch->q.qlen <= q->limit) {
 		sch->bstats.bytes += qdisc_pkt_len(skb);
 		sch->bstats.packets++;
-		return 0;
+		return NET_XMIT_SUCCESS;
 	}
 
 	sfq_drop(sch);
diff --git a/net/sched/sch_tbf.c b/net/sched/sch_tbf.c
index 0991c640cd3e..641a30d64635 100644
--- a/net/sched/sch_tbf.c
+++ b/net/sched/sch_tbf.c
@@ -127,7 +127,7 @@ static int tbf_enqueue(struct sk_buff *skb, struct Qdisc* sch)
 		return qdisc_reshape_fail(skb, sch);
 
 	ret = qdisc_enqueue(skb, q->qdisc);
-	if (ret != 0) {
+	if (ret != NET_XMIT_SUCCESS) {
 		if (net_xmit_drop_count(ret))
 			sch->qstats.drops++;
 		return ret;
@@ -136,7 +136,7 @@ static int tbf_enqueue(struct sk_buff *skb, struct Qdisc* sch)
 	sch->q.qlen++;
 	sch->bstats.bytes += qdisc_pkt_len(skb);
 	sch->bstats.packets++;
-	return 0;
+	return NET_XMIT_SUCCESS;
 }
 
 static unsigned int tbf_drop(struct Qdisc* sch)
diff --git a/net/sched/sch_teql.c b/net/sched/sch_teql.c
index 807643bdcbac..feaabc103ce6 100644
--- a/net/sched/sch_teql.c
+++ b/net/sched/sch_teql.c
@@ -85,7 +85,7 @@ teql_enqueue(struct sk_buff *skb, struct Qdisc* sch)
 		__skb_queue_tail(&q->q, skb);
 		sch->bstats.bytes += qdisc_pkt_len(skb);
 		sch->bstats.packets++;
-		return 0;
+		return NET_XMIT_SUCCESS;
 	}
 
 	kfree_skb(skb);
-- 
cgit v1.2.2


From 86b1b26326279299c93ddb11ab4782d3896bf84c Mon Sep 17 00:00:00 2001
From: Mat Martineau <mathewm@codeaurora.org>
Date: Thu, 5 Aug 2010 15:54:22 -0700
Subject: Bluetooth: Fix endianness issue with L2CAP MPS configuration

Incoming configuration values must be converted to native CPU order
before use.  This fixes a bug where a little-endian MPS value is
compared to a native CPU value.  On big-endian processors, this
can cause ERTM and streaming mode segmentation to produce PDUs
that are larger than the remote stack is expecting, or that would
produce fragmented skbs that the current FCS code cannot handle.

Signed-off-by: Mat Martineau <mathewm@codeaurora.org>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 net/bluetooth/l2cap.c | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

(limited to 'net')

diff --git a/net/bluetooth/l2cap.c b/net/bluetooth/l2cap.c
index 3e3cd9d4e52c..9d1f1544d9d3 100644
--- a/net/bluetooth/l2cap.c
+++ b/net/bluetooth/l2cap.c
@@ -2705,8 +2705,9 @@ done:
 		case L2CAP_MODE_ERTM:
 			pi->remote_tx_win = rfc.txwin_size;
 			pi->remote_max_tx = rfc.max_transmit;
-			if (rfc.max_pdu_size > pi->conn->mtu - 10)
-				rfc.max_pdu_size = le16_to_cpu(pi->conn->mtu - 10);
+
+			if (le16_to_cpu(rfc.max_pdu_size) > pi->conn->mtu - 10)
+				rfc.max_pdu_size = cpu_to_le16(pi->conn->mtu - 10);
 
 			pi->remote_mps = le16_to_cpu(rfc.max_pdu_size);
 
@@ -2723,8 +2724,8 @@ done:
 			break;
 
 		case L2CAP_MODE_STREAMING:
-			if (rfc.max_pdu_size > pi->conn->mtu - 10)
-				rfc.max_pdu_size = le16_to_cpu(pi->conn->mtu - 10);
+			if (le16_to_cpu(rfc.max_pdu_size) > pi->conn->mtu - 10)
+				rfc.max_pdu_size = cpu_to_le16(pi->conn->mtu - 10);
 
 			pi->remote_mps = le16_to_cpu(rfc.max_pdu_size);
 
-- 
cgit v1.2.2


From cff70fae111efba80c27023772ce5265797fb514 Mon Sep 17 00:00:00 2001
From: Mat Martineau <mathewm@codeaurora.org>
Date: Thu, 5 Aug 2010 15:54:23 -0700
Subject: Bluetooth: Fix incorrect setting of remote_tx_win for L2CAP ERTM

remote_tx_win is intended to be set on receipt of an L2CAP
configuration request.  The value is used to determine the size of the
transmit window on the remote side of an ERTM connection, so L2CAP
can stop sending frames when that remote window is full.

An incorrect remote_tx_win value will cause the stack to not fully
utilize the tx window (performance impact), or to overfill the remote
tx window (causing dropped frames or a disconnect).

This patch removes an extra setting of remote_tx_win when a
configuration response is received.  The transmit window has a
different meaning in a response - it is an informational value
less than or equal to the local tx_win.

Signed-off-by: Mat Martineau <mathewm@codeaurora.org>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 net/bluetooth/l2cap.c | 2 --
 1 file changed, 2 deletions(-)

(limited to 'net')

diff --git a/net/bluetooth/l2cap.c b/net/bluetooth/l2cap.c
index 9d1f1544d9d3..fadf26b4ed7c 100644
--- a/net/bluetooth/l2cap.c
+++ b/net/bluetooth/l2cap.c
@@ -2807,7 +2807,6 @@ static int l2cap_parse_conf_rsp(struct sock *sk, void *rsp, int len, void *data,
 	if (*result == L2CAP_CONF_SUCCESS) {
 		switch (rfc.mode) {
 		case L2CAP_MODE_ERTM:
-			pi->remote_tx_win   = rfc.txwin_size;
 			pi->retrans_timeout = le16_to_cpu(rfc.retrans_timeout);
 			pi->monitor_timeout = le16_to_cpu(rfc.monitor_timeout);
 			pi->mps    = le16_to_cpu(rfc.max_pdu_size);
@@ -2863,7 +2862,6 @@ static void l2cap_conf_rfc_get(struct sock *sk, void *rsp, int len)
 done:
 	switch (rfc.mode) {
 	case L2CAP_MODE_ERTM:
-		pi->remote_tx_win   = rfc.txwin_size;
 		pi->retrans_timeout = le16_to_cpu(rfc.retrans_timeout);
 		pi->monitor_timeout = le16_to_cpu(rfc.monitor_timeout);
 		pi->mps    = le16_to_cpu(rfc.max_pdu_size);
-- 
cgit v1.2.2


From 669502ff31d7dba1849aec7ee2450a3c61f57d39 Mon Sep 17 00:00:00 2001
From: Andy Chittenden <andyc.bluearc@gmail.com>
Date: Tue, 10 Aug 2010 10:19:53 -0400
Subject: SUNRPC: fix NFS client over TCP hangs due to packet loss (Bug 16494)

When reusing a TCP connection, ensure that it's aborted if a previous
shutdown attempt has been made on that connection so that the RPC over
TCP recovery mechanism succeeds.

Signed-off-by: Andy Chittenden <andyc.bluearc@gmail.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 net/sunrpc/xprtsock.c | 28 ++++++++++++++++++++++------
 1 file changed, 22 insertions(+), 6 deletions(-)

(limited to 'net')

diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
index 7ca65c7005ea..66d136984d57 100644
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c
@@ -1305,10 +1305,11 @@ static void xs_tcp_state_change(struct sock *sk)
 	if (!(xprt = xprt_from_sock(sk)))
 		goto out;
 	dprintk("RPC:       xs_tcp_state_change client %p...\n", xprt);
-	dprintk("RPC:       state %x conn %d dead %d zapped %d\n",
+	dprintk("RPC:       state %x conn %d dead %d zapped %d sk_shutdown %d\n",
 			sk->sk_state, xprt_connected(xprt),
 			sock_flag(sk, SOCK_DEAD),
-			sock_flag(sk, SOCK_ZAPPED));
+			sock_flag(sk, SOCK_ZAPPED),
+			sk->sk_shutdown);
 
 	switch (sk->sk_state) {
 	case TCP_ESTABLISHED:
@@ -1779,10 +1780,25 @@ static void xs_tcp_reuse_connection(struct rpc_xprt *xprt, struct sock_xprt *tra
 {
 	unsigned int state = transport->inet->sk_state;
 
-	if (state == TCP_CLOSE && transport->sock->state == SS_UNCONNECTED)
-		return;
-	if ((1 << state) & (TCPF_ESTABLISHED|TCPF_SYN_SENT))
-		return;
+	if (state == TCP_CLOSE && transport->sock->state == SS_UNCONNECTED) {
+		/* we don't need to abort the connection if the socket
+		 * hasn't undergone a shutdown
+		 */
+		if (transport->inet->sk_shutdown == 0)
+			return;
+		dprintk("RPC:       %s: TCP_CLOSEd and sk_shutdown set to %d\n",
+				__func__, transport->inet->sk_shutdown);
+	}
+	if ((1 << state) & (TCPF_ESTABLISHED|TCPF_SYN_SENT)) {
+		/* we don't need to abort the connection if the socket
+		 * hasn't undergone a shutdown
+		 */
+		if (transport->inet->sk_shutdown == 0)
+			return;
+		dprintk("RPC:       %s: ESTABLISHED/SYN_SENT "
+				"sk_shutdown set to %d\n",
+				__func__, transport->inet->sk_shutdown);
+	}
 	xs_abort_connection(xprt, transport);
 }
 
-- 
cgit v1.2.2


From 24e263adba5fea744a3965ab1ca44620b74cb9c8 Mon Sep 17 00:00:00 2001
From: Sjur Braendeland <sjur.brandeland@stericsson.com>
Date: Tue, 10 Aug 2010 07:36:46 +0000
Subject: caif: Bugfix - Increase default headroom size for control channel.

Headroom size for control channel must be at least 48 bytes in some scenarios.

Signed-off-by: Sjur Braendeland <sjur.brandeland@stericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/caif/cfpkt_skbuff.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/caif/cfpkt_skbuff.c b/net/caif/cfpkt_skbuff.c
index 01f238ff2346..c49a6695793a 100644
--- a/net/caif/cfpkt_skbuff.c
+++ b/net/caif/cfpkt_skbuff.c
@@ -9,7 +9,7 @@
 #include <linux/hardirq.h>
 #include <net/caif/cfpkt.h>
 
-#define PKT_PREFIX  16
+#define PKT_PREFIX  48
 #define PKT_POSTFIX 2
 #define PKT_LEN_WHEN_EXTENDING 128
 #define PKT_ERROR(pkt, errmsg) do {	   \
-- 
cgit v1.2.2


From 41065fba846e795b31b17e4dec01cb904d56c6cd Mon Sep 17 00:00:00 2001
From: Jarek Poplawski <jarkao2@gmail.com>
Date: Tue, 10 Aug 2010 22:31:02 +0000
Subject: pkt_sched: Fix sch_sfq vs tc_modify_qdisc oops

sch_sfq as a classful qdisc needs the .leaf handler. Otherwise, there
is an oops possible in tc_modify_qdisc()/check_loop().

Fixes commit 7d2681a6ff4f9ab5e48d02550b4c6338f1638998

Signed-off-by: Jarek Poplawski <jarkao2@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/sch_sfq.c | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'net')

diff --git a/net/sched/sch_sfq.c b/net/sched/sch_sfq.c
index b8bcb2096df8..201cbac2b32c 100644
--- a/net/sched/sch_sfq.c
+++ b/net/sched/sch_sfq.c
@@ -508,6 +508,11 @@ nla_put_failure:
 	return -1;
 }
 
+static struct Qdisc *sfq_leaf(struct Qdisc *sch, unsigned long arg)
+{
+	return NULL;
+}
+
 static unsigned long sfq_get(struct Qdisc *sch, u32 classid)
 {
 	return 0;
@@ -575,6 +580,7 @@ static void sfq_walk(struct Qdisc *sch, struct qdisc_walker *arg)
 }
 
 static const struct Qdisc_class_ops sfq_class_ops = {
+	.leaf		=	sfq_leaf,
 	.get		=	sfq_get,
 	.put		=	sfq_put,
 	.tcf_chain	=	sfq_find_tcf,
-- 
cgit v1.2.2


From 3e9e5a5921f4b7dc098a01d01e5972bebb36491e Mon Sep 17 00:00:00 2001
From: Jarek Poplawski <jarkao2@gmail.com>
Date: Tue, 10 Aug 2010 22:31:20 +0000
Subject: pkt_sched: Check .walk and .leaf class handlers

Require qdisc class ops .walk and .leaf for classful qdisc in
register_qdisc(). The checks could be done later insted, but these
ops are really needed and used by most of classful qdiscs.

Signed-off-by: Jarek Poplawski <jarkao2@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/sch_api.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c
index 8ed2f5649029..408eea7086aa 100644
--- a/net/sched/sch_api.c
+++ b/net/sched/sch_api.c
@@ -161,7 +161,7 @@ int register_qdisc(struct Qdisc_ops *qops)
 	if (qops->cl_ops) {
 		const struct Qdisc_class_ops *cops = qops->cl_ops;
 
-		if (!(cops->get && cops->put))
+		if (!(cops->get && cops->put && cops->walk && cops->leaf))
 			goto out_einval;
 
 		if (cops->tcf_chain && !(cops->bind_tcf && cops->unbind_tcf))
-- 
cgit v1.2.2


From 9bbb9e5a33109b2832e2e63dcc7a132924ab374b Mon Sep 17 00:00:00 2001
From: Rusty Russell <rusty@rustcorp.com.au>
Date: Wed, 11 Aug 2010 23:04:12 -0600
Subject: param: use ops in struct kernel_param, rather than get and set fns
 directly

This is more kernel-ish, saves some space, and also allows us to
expand the ops without breaking all the callers who are happy for the
new members to be NULL.

The few places which defined their own param types are changed to the
new scheme (more which crept in recently fixed in following patches).

Since we're touching them anyway, we change get() and set() to take a
const struct kernel_param (which they really are).  This causes some
harmless warnings until we fix them (in following patches).

To reduce churn, module_param_call creates the ops struct so the callers
don't have to change (and casts the functions to reduce warnings).
The modern version which takes an ops struct is called module_param_cb.

Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
Reviewed-by: Takashi Iwai <tiwai@suse.de>
Tested-by: Phil Carmody <ext-phil.2.carmody@nokia.com>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: Ville Syrjala <syrjala@sci.fi>
Cc: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Cc: Alessandro Rubini <rubini@ipvvis.unipv.it>
Cc: Michal Januszewski <spock@gentoo.org>
Cc: Trond Myklebust <Trond.Myklebust@netapp.com>
Cc: "J. Bruce Fields" <bfields@fieldses.org>
Cc: Neil Brown <neilb@suse.de>
Cc: linux-kernel@vger.kernel.org
Cc: linux-input@vger.kernel.org
Cc: linux-fbdev-devel@lists.sourceforge.net
Cc: linux-nfs@vger.kernel.org
Cc: netdev@vger.kernel.org
---
 net/sunrpc/xprtsock.c | 26 +++++++++++++++-----------
 1 file changed, 15 insertions(+), 11 deletions(-)

(limited to 'net')

diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
index 7ca65c7005ea..49a62f0c4b87 100644
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c
@@ -2577,7 +2577,8 @@ void cleanup_socket_xprt(void)
 	xprt_unregister_transport(&xs_bc_tcp_transport);
 }
 
-static int param_set_uint_minmax(const char *val, struct kernel_param *kp,
+static int param_set_uint_minmax(const char *val,
+		const struct kernel_param *kp,
 		unsigned int min, unsigned int max)
 {
 	unsigned long num;
@@ -2592,34 +2593,37 @@ static int param_set_uint_minmax(const char *val, struct kernel_param *kp,
 	return 0;
 }
 
-static int param_set_portnr(const char *val, struct kernel_param *kp)
+static int param_set_portnr(const char *val, const struct kernel_param *kp)
 {
 	return param_set_uint_minmax(val, kp,
 			RPC_MIN_RESVPORT,
 			RPC_MAX_RESVPORT);
 }
 
-static int param_get_portnr(char *buffer, struct kernel_param *kp)
-{
-	return param_get_uint(buffer, kp);
-}
+static struct kernel_param_ops param_ops_portnr = {
+	.set = param_set_portnr,
+	.get = param_get_uint,
+};
+
 #define param_check_portnr(name, p) \
 	__param_check(name, p, unsigned int);
 
 module_param_named(min_resvport, xprt_min_resvport, portnr, 0644);
 module_param_named(max_resvport, xprt_max_resvport, portnr, 0644);
 
-static int param_set_slot_table_size(const char *val, struct kernel_param *kp)
+static int param_set_slot_table_size(const char *val,
+				     const struct kernel_param *kp)
 {
 	return param_set_uint_minmax(val, kp,
 			RPC_MIN_SLOT_TABLE,
 			RPC_MAX_SLOT_TABLE);
 }
 
-static int param_get_slot_table_size(char *buffer, struct kernel_param *kp)
-{
-	return param_get_uint(buffer, kp);
-}
+static struct kernel_param_ops param_ops_slot_table_size = {
+	.set = param_set_slot_table_size,
+	.get = param_get_uint,
+};
+
 #define param_check_slot_table_size(name, p) \
 	__param_check(name, p, unsigned int);
 
-- 
cgit v1.2.2


From 8e4e15d44a817e9f02cb04a6cd6c0ee77ed3fbd8 Mon Sep 17 00:00:00 2001
From: Stephen Rothwell <sfr@canb.auug.org.au>
Date: Wed, 4 Aug 2010 12:11:22 +1000
Subject: nfs: update for module_param_named API change

After merging the rr tree, today's linux-next build (powerpc
ppc64_defconfig) failed like this:

net/sunrpc/auth.c:74: error: 'param_ops_hashtbl_sz' undeclared here (not in a function)

Caused by commit 0685652df0929cec7d78efa85127f6eb34962132
("param:param_ops") interacting with commit
f8f853ab19fcc415b6eadd273373edc424916212 ("SUNRPC: Make the credential
cache hashtable size configurable") from the nfs tree.

Signed-off-by: Stephen Rothwell <sfr@canb.auug.org.au>
Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
---
 net/sunrpc/auth.c | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/sunrpc/auth.c b/net/sunrpc/auth.c
index 880d0de3f50f..36cb66022a27 100644
--- a/net/sunrpc/auth.c
+++ b/net/sunrpc/auth.c
@@ -39,7 +39,7 @@ static LIST_HEAD(cred_unused);
 static unsigned long number_cred_unused;
 
 #define MAX_HASHTABLE_BITS (10) 
-static int param_set_hashtbl_sz(const char *val, struct kernel_param *kp)
+static int param_set_hashtbl_sz(const char *val, const struct kernel_param *kp)
 {
 	unsigned long num;
 	unsigned int nbits;
@@ -61,7 +61,7 @@ out_inval:
 	return -EINVAL;
 }
 
-static int param_get_hashtbl_sz(char *buffer, struct kernel_param *kp)
+static int param_get_hashtbl_sz(char *buffer, const struct kernel_param *kp)
 {
 	unsigned int nbits;
 
@@ -71,6 +71,11 @@ static int param_get_hashtbl_sz(char *buffer, struct kernel_param *kp)
 
 #define param_check_hashtbl_sz(name, p) __param_check(name, p, unsigned int);
 
+static struct kernel_param_ops param_ops_hashtbl_sz = {
+	.set = param_set_hashtbl_sz,
+	.get = param_get_hashtbl_sz,
+};
+
 module_param_named(auth_hashtable_size, auth_hashbits, hashtbl_sz, 0644);
 MODULE_PARM_DESC(auth_hashtable_size, "RPC credential cache hashtable size");
 
-- 
cgit v1.2.2


From d6d1b650ae6acce73d55dd0246de22180303ae73 Mon Sep 17 00:00:00 2001
From: Rusty Russell <rusty@rustcorp.com.au>
Date: Wed, 11 Aug 2010 23:04:27 -0600
Subject: param: simple locking for sysfs-writable charp parameters

Since the writing to sysfs can free the old one, we need to block that
when we access the charp variables.

Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
Reviewed-by: Takashi Iwai <tiwai@suse.de>
Tested-by: Phil Carmody <ext-phil.2.carmody@nokia.com>
Cc: Jeff Dike <jdike@addtoit.com>
Cc: Dan Williams <dcbw@redhat.com>
Cc: John W. Linville <linville@tuxdriver.com>
Cc: Jing Huang <huangj@brocade.com>
Cc: James E.J. Bottomley <James.Bottomley@suse.de>
Cc: Greg Kroah-Hartman <gregkh@suse.de>
Cc: Johannes Berg <johannes@sipsolutions.net>
Cc: David S. Miller <davem@davemloft.net>
Cc: user-mode-linux-devel@lists.sourceforge.net
Cc: libertas-dev@lists.infradead.org
Cc: linux-wireless@vger.kernel.org
Cc: netdev@vger.kernel.org
Cc: linux-scsi@vger.kernel.org
Cc: linux-usb@vger.kernel.org
---
 net/mac80211/rate.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'net')

diff --git a/net/mac80211/rate.c b/net/mac80211/rate.c
index 6d0bd198af19..be04d46110fe 100644
--- a/net/mac80211/rate.c
+++ b/net/mac80211/rate.c
@@ -103,6 +103,7 @@ ieee80211_rate_control_ops_get(const char *name)
 	struct rate_control_ops *ops;
 	const char *alg_name;
 
+	kparam_block_sysfs_write(ieee80211_default_rc_algo);
 	if (!name)
 		alg_name = ieee80211_default_rc_algo;
 	else
@@ -120,6 +121,7 @@ ieee80211_rate_control_ops_get(const char *name)
 	/* try built-in one if specific alg requested but not found */
 	if (!ops && strlen(CONFIG_MAC80211_RC_DEFAULT))
 		ops = ieee80211_try_rate_control_ops_get(CONFIG_MAC80211_RC_DEFAULT);
+	kparam_unblock_sysfs_write(ieee80211_default_rc_algo);
 
 	return ops;
 }
-- 
cgit v1.2.2


From 7a8b80eb38b248cfdf84048dad12073d5cfba3e6 Mon Sep 17 00:00:00 2001
From: Steve Wise <swise@opengridcomputing.com>
Date: Wed, 11 Aug 2010 12:47:08 -0400
Subject: xprtrdma: Do not truncate iova_start values in frmr registrations.

A bad cast causes the iova_start, which in this case is a 64b DMA
bus address, to be truncated on 32b systems.  This breaks frmrs on
32b systems.  No cast is needed.

Signed-off-by: Steve Wise <swise@opengridcomputing.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 net/sunrpc/xprtrdma/verbs.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c
index 27015c6d8eb5..3bdbd9f3b88f 100644
--- a/net/sunrpc/xprtrdma/verbs.c
+++ b/net/sunrpc/xprtrdma/verbs.c
@@ -1490,7 +1490,7 @@ rpcrdma_register_frmr_external(struct rpcrdma_mr_seg *seg,
 	memset(&frmr_wr, 0, sizeof frmr_wr);
 	frmr_wr.opcode = IB_WR_FAST_REG_MR;
 	frmr_wr.send_flags = 0;			/* unsignaled */
-	frmr_wr.wr.fast_reg.iova_start = (unsigned long)seg1->mr_dma;
+	frmr_wr.wr.fast_reg.iova_start = seg1->mr_dma;
 	frmr_wr.wr.fast_reg.page_list = seg1->mr_chunk.rl_mw->r.frmr.fr_pgl;
 	frmr_wr.wr.fast_reg.page_list_len = i;
 	frmr_wr.wr.fast_reg.page_shift = PAGE_SHIFT;
-- 
cgit v1.2.2


From 15cdc644b268a9a9ce73ce0b153129222c254b7b Mon Sep 17 00:00:00 2001
From: Tom Tucker <tom@ogc.us>
Date: Wed, 11 Aug 2010 12:47:24 -0400
Subject: rpcrdma: Fix SQ size calculation when memreg is FRMR

This patch updates the computation to include the worst case situation
where three FRMR are required to map a single RPC REQ.

Signed-off-by: Tom Tucker <tom@ogc.us>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 net/sunrpc/xprtrdma/rpc_rdma.c |  2 ++
 net/sunrpc/xprtrdma/verbs.c    | 20 ++++++++++++++++----
 2 files changed, 18 insertions(+), 4 deletions(-)

(limited to 'net')

diff --git a/net/sunrpc/xprtrdma/rpc_rdma.c b/net/sunrpc/xprtrdma/rpc_rdma.c
index e5e28d1946a4..2ac3f6e8adff 100644
--- a/net/sunrpc/xprtrdma/rpc_rdma.c
+++ b/net/sunrpc/xprtrdma/rpc_rdma.c
@@ -249,6 +249,8 @@ rpcrdma_create_chunks(struct rpc_rqst *rqst, struct xdr_buf *target,
 	req->rl_nchunks = nchunks;
 
 	BUG_ON(nchunks == 0);
+	BUG_ON((r_xprt->rx_ia.ri_memreg_strategy == RPCRDMA_FRMR)
+	       && (nchunks > 3));
 
 	/*
 	 * finish off header. If write, marshal discrim and nchunks.
diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c
index 3bdbd9f3b88f..5f4c7b3bc711 100644
--- a/net/sunrpc/xprtrdma/verbs.c
+++ b/net/sunrpc/xprtrdma/verbs.c
@@ -650,10 +650,22 @@ rpcrdma_ep_create(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia,
 	ep->rep_attr.cap.max_send_wr = cdata->max_requests;
 	switch (ia->ri_memreg_strategy) {
 	case RPCRDMA_FRMR:
-		/* Add room for frmr register and invalidate WRs */
-		ep->rep_attr.cap.max_send_wr *= 3;
-		if (ep->rep_attr.cap.max_send_wr > devattr.max_qp_wr)
-			return -EINVAL;
+		/* Add room for frmr register and invalidate WRs.
+		 * 1. FRMR reg WR for head
+		 * 2. FRMR invalidate WR for head
+		 * 3. FRMR reg WR for pagelist
+		 * 4. FRMR invalidate WR for pagelist
+		 * 5. FRMR reg WR for tail
+		 * 6. FRMR invalidate WR for tail
+		 * 7. The RDMA_SEND WR
+		 */
+		ep->rep_attr.cap.max_send_wr *= 7;
+		if (ep->rep_attr.cap.max_send_wr > devattr.max_qp_wr) {
+			cdata->max_requests = devattr.max_qp_wr / 7;
+			if (!cdata->max_requests)
+				return -EINVAL;
+			ep->rep_attr.cap.max_send_wr = cdata->max_requests * 7;
+		}
 		break;
 	case RPCRDMA_MEMWINDOWS_ASYNC:
 	case RPCRDMA_MEMWINDOWS:
-- 
cgit v1.2.2


From 4a2d789267e00b5a1175ecd2ddefcc78b83fbf09 Mon Sep 17 00:00:00 2001
From: Wang Lei <wang840925@gmail.com>
Date: Wed, 11 Aug 2010 09:37:58 +0100
Subject: DNS: If the DNS server returns an error, allow that to be cached [ver
 #2]

If the DNS server returns an error, allow that to be cached in the DNS resolver
key in lieu of a value.  Userspace passes the desired error number as an option
in the payload:

	"#dnserror=<number>"

Userspace must map h_errno from the name resolution routines to an appropriate
Linux error before passing it up.  Something like the following mapping is
recommended:

	[HOST_NOT_FOUND]	= ENODATA,
	[TRY_AGAIN]		= EAGAIN,
	[NO_RECOVERY]		= ECONNREFUSED,
	[NO_DATA]		= ENODATA,

in lieu of Linux errors specifically for representing name service errors.  The
filesystem must map these errors appropropriately before passing them to
userspace.  AFS is made to map ENODATA and EAGAIN to EDESTADDRREQ for the
return to userspace; ECONNREFUSED is allowed to stand as is.

The error can be seen in /proc/keys as a negative number after the description
of the key.  Compare, for example, the following key entries:

2f97238c I--Q--     1  53s 3f010000     0     0 dns_resol afsdb:grand.centrall.org: -61
338bfbbe I--Q--     1  59m 3f010000     0     0 dns_resol afsdb:grand.central.org: 37

If the error option is supplied in the payload, the main part of the payload is
discarded.  The key should have an expiry time set by userspace.

Signed-off-by: Wang Lei <wang840925@gmail.com>
Signed-off-by: David Howells <dhowells@redhat.com>
Acked-by: Jeff Layton <jlayton@redhat.com>
Signed-off-by: Steve French <sfrench@us.ibm.com>
---
 net/dns_resolver/dns_key.c   | 92 +++++++++++++++++++++++++++++++++++++++++---
 net/dns_resolver/dns_query.c |  5 +++
 2 files changed, 92 insertions(+), 5 deletions(-)

(limited to 'net')

diff --git a/net/dns_resolver/dns_key.c b/net/dns_resolver/dns_key.c
index 400a04d5c9a1..739435a6af39 100644
--- a/net/dns_resolver/dns_key.c
+++ b/net/dns_resolver/dns_key.c
@@ -29,6 +29,7 @@
 #include <linux/kernel.h>
 #include <linux/keyctl.h>
 #include <linux/err.h>
+#include <linux/seq_file.h>
 #include <keys/dns_resolver-type.h>
 #include <keys/user-type.h>
 #include "internal.h"
@@ -43,6 +44,8 @@ MODULE_PARM_DESC(debug, "DNS Resolver debugging mask");
 
 const struct cred *dns_resolver_cache;
 
+#define	DNS_ERRORNO_OPTION	"dnserror"
+
 /*
  * Instantiate a user defined key for dns_resolver.
  *
@@ -59,9 +62,10 @@ static int
 dns_resolver_instantiate(struct key *key, const void *_data, size_t datalen)
 {
 	struct user_key_payload *upayload;
+	unsigned long derrno;
 	int ret;
 	size_t result_len = 0;
-	const char *data = _data, *opt;
+	const char *data = _data, *end, *opt;
 
 	kenter("%%%d,%s,'%s',%zu",
 	       key->serial, key->description, data, datalen);
@@ -71,13 +75,77 @@ dns_resolver_instantiate(struct key *key, const void *_data, size_t datalen)
 	datalen--;
 
 	/* deal with any options embedded in the data */
+	end = data + datalen;
 	opt = memchr(data, '#', datalen);
 	if (!opt) {
-		kdebug("no options currently supported");
-		return -EINVAL;
+		/* no options: the entire data is the result */
+		kdebug("no options");
+		result_len = datalen;
+	} else {
+		const char *next_opt;
+
+		result_len = opt - data;
+		opt++;
+		kdebug("options: '%s'", opt);
+		do {
+			const char *eq;
+			int opt_len, opt_nlen, opt_vlen, tmp;
+
+			next_opt = memchr(opt, '#', end - opt) ?: end;
+			opt_len = next_opt - opt;
+			if (!opt_len) {
+				printk(KERN_WARNING
+				       "Empty option to dns_resolver key %d\n",
+				       key->serial);
+				return -EINVAL;
+			}
+
+			eq = memchr(opt, '=', opt_len) ?: end;
+			opt_nlen = eq - opt;
+			eq++;
+			opt_vlen = next_opt - eq; /* will be -1 if no value */
+
+			tmp = opt_vlen >= 0 ? opt_vlen : 0;
+			kdebug("option '%*.*s' val '%*.*s'",
+			       opt_nlen, opt_nlen, opt, tmp, tmp, eq);
+
+			/* see if it's an error number representing a DNS error
+			 * that's to be recorded as the result in this key */
+			if (opt_nlen == sizeof(DNS_ERRORNO_OPTION) - 1 &&
+			    memcmp(opt, DNS_ERRORNO_OPTION, opt_nlen) == 0) {
+				kdebug("dns error number option");
+				if (opt_vlen <= 0)
+					goto bad_option_value;
+
+				ret = strict_strtoul(eq, 10, &derrno);
+				if (ret < 0)
+					goto bad_option_value;
+
+				if (derrno < 1 || derrno > 511)
+					goto bad_option_value;
+
+				kdebug("dns error no. = %lu", derrno);
+				key->type_data.x[0] = -derrno;
+				continue;
+			}
+
+		bad_option_value:
+			printk(KERN_WARNING
+			       "Option '%*.*s' to dns_resolver key %d:"
+			       " bad/missing value\n",
+			       opt_nlen, opt_nlen, opt, key->serial);
+			return -EINVAL;
+		} while (opt = next_opt + 1, opt < end);
+	}
+
+	/* don't cache the result if we're caching an error saying there's no
+	 * result */
+	if (key->type_data.x[0]) {
+		kleave(" = 0 [h_error %ld]", key->type_data.x[0]);
+		return 0;
 	}
 
-	result_len = datalen;
+	kdebug("store result");
 	ret = key_payload_reserve(key, result_len);
 	if (ret < 0)
 		return -EINVAL;
@@ -135,13 +203,27 @@ no_match:
 	return ret;
 }
 
+/*
+ * Describe a DNS key
+ */
+static void dns_resolver_describe(const struct key *key, struct seq_file *m)
+{
+	int err = key->type_data.x[0];
+
+	seq_puts(m, key->description);
+	if (err)
+		seq_printf(m, ": %d", err);
+	else
+		seq_printf(m, ": %u", key->datalen);
+}
+
 struct key_type key_type_dns_resolver = {
 	.name		= "dns_resolver",
 	.instantiate	= dns_resolver_instantiate,
 	.match		= dns_resolver_match,
 	.revoke		= user_revoke,
 	.destroy	= user_destroy,
-	.describe	= user_describe,
+	.describe	= dns_resolver_describe,
 	.read		= user_read,
 };
 
diff --git a/net/dns_resolver/dns_query.c b/net/dns_resolver/dns_query.c
index 03d5255f5cf2..c32be292c7e3 100644
--- a/net/dns_resolver/dns_query.c
+++ b/net/dns_resolver/dns_query.c
@@ -136,6 +136,11 @@ int dns_query(const char *type, const char *name, size_t namelen,
 	if (ret < 0)
 		goto put;
 
+	/* If the DNS server gave an error, return that to the caller */
+	ret = rkey->type_data.x[0];
+	if (ret)
+		goto put;
+
 	upayload = rcu_dereference_protected(rkey->payload.data,
 					     lockdep_is_held(&rkey->sem));
 	len = upayload->datalen;
-- 
cgit v1.2.2


From 5b75c4973ce779520b9d1e392483207d6f842cde Mon Sep 17 00:00:00 2001
From: Oliver Hartkopp <socketcan@hartkopp.net>
Date: Wed, 11 Aug 2010 16:12:35 -0700
Subject: can: add limit for nframes and clean up signed/unsigned variables

This patch adds a limit for nframes as the number of frames in TX_SETUP and
RX_SETUP are derived from a single byte multiplex value by default.
Use-cases that would require to send/filter more than 256 CAN frames should
be implemented in userspace for complexity reasons anyway.

Additionally the assignments of unsigned values from userspace to signed
values in kernelspace and vice versa are fixed by using unsigned values in
kernelspace consistently.

Signed-off-by: Oliver Hartkopp <socketcan@hartkopp.net>
Reported-by: Ben Hawkes <hawkes@google.com>
Acked-by: Urs Thuermann <urs.thuermann@volkswagen.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/can/bcm.c | 41 +++++++++++++++++++++++++++--------------
 1 file changed, 27 insertions(+), 14 deletions(-)

(limited to 'net')

diff --git a/net/can/bcm.c b/net/can/bcm.c
index 9c65e9deb9c3..08ffe9e4be20 100644
--- a/net/can/bcm.c
+++ b/net/can/bcm.c
@@ -60,6 +60,13 @@
 #include <net/sock.h>
 #include <net/net_namespace.h>
 
+/*
+ * To send multiple CAN frame content within TX_SETUP or to filter
+ * CAN messages with multiplex index within RX_SETUP, the number of
+ * different filters is limited to 256 due to the one byte index value.
+ */
+#define MAX_NFRAMES 256
+
 /* use of last_frames[index].can_dlc */
 #define RX_RECV    0x40 /* received data for this element */
 #define RX_THR     0x80 /* element not been sent due to throttle feature */
@@ -89,16 +96,16 @@ struct bcm_op {
 	struct list_head list;
 	int ifindex;
 	canid_t can_id;
-	int flags;
+	u32 flags;
 	unsigned long frames_abs, frames_filtered;
 	struct timeval ival1, ival2;
 	struct hrtimer timer, thrtimer;
 	struct tasklet_struct tsklet, thrtsklet;
 	ktime_t rx_stamp, kt_ival1, kt_ival2, kt_lastmsg;
 	int rx_ifindex;
-	int count;
-	int nframes;
-	int currframe;
+	u32 count;
+	u32 nframes;
+	u32 currframe;
 	struct can_frame *frames;
 	struct can_frame *last_frames;
 	struct can_frame sframe;
@@ -175,7 +182,7 @@ static int bcm_proc_show(struct seq_file *m, void *v)
 
 		seq_printf(m, "rx_op: %03X %-5s ",
 				op->can_id, bcm_proc_getifname(ifname, op->ifindex));
-		seq_printf(m, "[%d]%c ", op->nframes,
+		seq_printf(m, "[%u]%c ", op->nframes,
 				(op->flags & RX_CHECK_DLC)?'d':' ');
 		if (op->kt_ival1.tv64)
 			seq_printf(m, "timeo=%lld ",
@@ -198,7 +205,7 @@ static int bcm_proc_show(struct seq_file *m, void *v)
 
 	list_for_each_entry(op, &bo->tx_ops, list) {
 
-		seq_printf(m, "tx_op: %03X %s [%d] ",
+		seq_printf(m, "tx_op: %03X %s [%u] ",
 				op->can_id,
 				bcm_proc_getifname(ifname, op->ifindex),
 				op->nframes);
@@ -283,7 +290,7 @@ static void bcm_send_to_user(struct bcm_op *op, struct bcm_msg_head *head,
 	struct can_frame *firstframe;
 	struct sockaddr_can *addr;
 	struct sock *sk = op->sk;
-	int datalen = head->nframes * CFSIZ;
+	unsigned int datalen = head->nframes * CFSIZ;
 	int err;
 
 	skb = alloc_skb(sizeof(*head) + datalen, gfp_any());
@@ -468,7 +475,7 @@ rx_changed_settime:
  * bcm_rx_cmp_to_index - (bit)compares the currently received data to formerly
  *                       received data stored in op->last_frames[]
  */
-static void bcm_rx_cmp_to_index(struct bcm_op *op, int index,
+static void bcm_rx_cmp_to_index(struct bcm_op *op, unsigned int index,
 				const struct can_frame *rxdata)
 {
 	/*
@@ -554,7 +561,8 @@ static enum hrtimer_restart bcm_rx_timeout_handler(struct hrtimer *hrtimer)
 /*
  * bcm_rx_do_flush - helper for bcm_rx_thr_flush
  */
-static inline int bcm_rx_do_flush(struct bcm_op *op, int update, int index)
+static inline int bcm_rx_do_flush(struct bcm_op *op, int update,
+				  unsigned int index)
 {
 	if ((op->last_frames) && (op->last_frames[index].can_dlc & RX_THR)) {
 		if (update)
@@ -575,7 +583,7 @@ static int bcm_rx_thr_flush(struct bcm_op *op, int update)
 	int updated = 0;
 
 	if (op->nframes > 1) {
-		int i;
+		unsigned int i;
 
 		/* for MUX filter we start at index 1 */
 		for (i = 1; i < op->nframes; i++)
@@ -624,7 +632,7 @@ static void bcm_rx_handler(struct sk_buff *skb, void *data)
 {
 	struct bcm_op *op = (struct bcm_op *)data;
 	const struct can_frame *rxframe = (struct can_frame *)skb->data;
-	int i;
+	unsigned int i;
 
 	/* disable timeout */
 	hrtimer_cancel(&op->timer);
@@ -822,14 +830,15 @@ static int bcm_tx_setup(struct bcm_msg_head *msg_head, struct msghdr *msg,
 {
 	struct bcm_sock *bo = bcm_sk(sk);
 	struct bcm_op *op;
-	int i, err;
+	unsigned int i;
+	int err;
 
 	/* we need a real device to send frames */
 	if (!ifindex)
 		return -ENODEV;
 
-	/* we need at least one can_frame */
-	if (msg_head->nframes < 1)
+	/* check nframes boundaries - we need at least one can_frame */
+	if (msg_head->nframes < 1 || msg_head->nframes > MAX_NFRAMES)
 		return -EINVAL;
 
 	/* check the given can_id */
@@ -993,6 +1002,10 @@ static int bcm_rx_setup(struct bcm_msg_head *msg_head, struct msghdr *msg,
 		msg_head->nframes = 0;
 	}
 
+	/* the first element contains the mux-mask => MAX_NFRAMES + 1  */
+	if (msg_head->nframes > MAX_NFRAMES + 1)
+		return -EINVAL;
+
 	if ((msg_head->flags & RX_RTR_FRAME) &&
 	    ((msg_head->nframes != 1) ||
 	     (!(msg_head->can_id & CAN_RTR_FLAG))))
-- 
cgit v1.2.2


From cba86f2e20a33cd2e6f41bd5e5b23aa2d55c95b8 Mon Sep 17 00:00:00 2001
From: Randy Dunlap <randy.dunlap@oracle.com>
Date: Wed, 11 Aug 2010 13:26:57 +0000
Subject: phylib: available for any speed ethernet

Several gigabit network drivers (SB1250_MAC, TIGON3, FSL, GIANFAR,
UCC_GETH, MV643XX_ETH, XILINX_LL_TEMAC, S6GMAC, STMMAC_ETH, PASEMI_MAC,
and OCTEON_ETHERNET) select PHYLIB.  These drivers are not under
NET_ETHERNET (10/100 mbit), so this warning is generated (long, irrelevant
parts are omitted):

warning: (NET_DSA && NET && EXPERIMENTAL && NET_ETHERNET && !S390 || ... || SB1250_MAC && NETDEVICES && NETDEV_1000 && SIBYTE_SB1xxx_SOC || TIGON3 && NETDEVICES && NETDEV_1000 && PCI || FSL_PQ_MDIO && NETDEVICES && NETDEV_1000 && FSL_SOC || GIANFAR && NETDEVICES && NETDEV_1000 && FSL_SOC || UCC_GETH && NETDEVICES && NETDEV_1000 && QUICC_ENGINE || MV643XX_ETH && NETDEVICES && NETDEV_1000 && (MV64X60 || PPC32 || PLAT_ORION) || XILINX_LL_TEMAC && NETDEVICES && NETDEV_1000 && (PPC || MICROBLAZE) || S6GMAC && NETDEVICES && NETDEV_1000 && XTENSA_VARIANT_S6000 || STMMAC_ETH && NETDEV_1000 && NETDEVICES && CPU_SUBTYPE_ST40 || PASEMI_MAC && NETDEVICES && NETDEV_10000 && PPC_PASEMI && PCI || OCTEON_ETHERNET && STAGING && !STAGING_EXCLUDE_BUILD && CPU_CAVIUM_OCTEON) selects PHYLIB which has unmet direct dependencies (!S390 && NET_ETHERNET)

PHYLIB is used by non-10/100 mbit ethernet drivers, so change the dependencies
to be NETDEVICES instead of NET_ETHERNET.

Signed-off-by: Randy Dunlap <randy.dunlap@oracle.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/dsa/Kconfig | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/dsa/Kconfig b/net/dsa/Kconfig
index 11201784d29a..87bb5f4de0e8 100644
--- a/net/dsa/Kconfig
+++ b/net/dsa/Kconfig
@@ -1,7 +1,7 @@
 menuconfig NET_DSA
 	bool "Distributed Switch Architecture support"
 	default n
-	depends on EXPERIMENTAL && NET_ETHERNET && !S390
+	depends on EXPERIMENTAL && NETDEVICES && !S390
 	select PHYLIB
 	---help---
 	  This allows you to use hardware switch chips that use
-- 
cgit v1.2.2


From 12fdff3fc2483f906ae6404a6e8dcf2550310b6f Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Thu, 12 Aug 2010 16:54:57 +0100
Subject: Add a dummy printk function for the maintenance of unused printks

Add a dummy printk function for the maintenance of unused printks through gcc
format checking, and also so that side-effect checking is maintained too.

Signed-off-by: David Howells <dhowells@redhat.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 net/rxrpc/ar-internal.h | 16 +++++-----------
 1 file changed, 5 insertions(+), 11 deletions(-)

(limited to 'net')

diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h
index 7043b294bb67..8e22bd345e71 100644
--- a/net/rxrpc/ar-internal.h
+++ b/net/rxrpc/ar-internal.h
@@ -597,12 +597,6 @@ extern unsigned rxrpc_debug;
 #define dbgprintk(FMT,...) \
 	printk("[%-6.6s] "FMT"\n", current->comm ,##__VA_ARGS__)
 
-/* make sure we maintain the format strings, even when debugging is disabled */
-static inline __attribute__((format(printf,1,2)))
-void _dbprintk(const char *fmt, ...)
-{
-}
-
 #define kenter(FMT,...)	dbgprintk("==> %s("FMT")",__func__ ,##__VA_ARGS__)
 #define kleave(FMT,...)	dbgprintk("<== %s()"FMT"",__func__ ,##__VA_ARGS__)
 #define kdebug(FMT,...)	dbgprintk("    "FMT ,##__VA_ARGS__)
@@ -655,11 +649,11 @@ do {							\
 } while (0)
 
 #else
-#define _enter(FMT,...)	_dbprintk("==> %s("FMT")",__func__ ,##__VA_ARGS__)
-#define _leave(FMT,...)	_dbprintk("<== %s()"FMT"",__func__ ,##__VA_ARGS__)
-#define _debug(FMT,...)	_dbprintk("    "FMT ,##__VA_ARGS__)
-#define _proto(FMT,...)	_dbprintk("### "FMT ,##__VA_ARGS__)
-#define _net(FMT,...)	_dbprintk("@@@ "FMT ,##__VA_ARGS__)
+#define _enter(FMT,...)	no_printk("==> %s("FMT")",__func__ ,##__VA_ARGS__)
+#define _leave(FMT,...)	no_printk("<== %s()"FMT"",__func__ ,##__VA_ARGS__)
+#define _debug(FMT,...)	no_printk("    "FMT ,##__VA_ARGS__)
+#define _proto(FMT,...)	no_printk("### "FMT ,##__VA_ARGS__)
+#define _net(FMT,...)	no_printk("@@@ "FMT ,##__VA_ARGS__)
 #endif
 
 /*
-- 
cgit v1.2.2


From 2f09a4d5daaa36690d506fafda9c24f2be866f6b Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Sat, 14 Aug 2010 22:38:09 -0700
Subject: xfrm: Use GFP_ATOMIC in xfrm_compile_policy

As xfrm_compile_policy runs within a read_lock, we cannot use
GFP_KERNEL for memory allocations.

Reported-by: Luca Tettamanti <kronos.it@gmail.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/xfrm/xfrm_user.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c
index ba59983aaffe..b14ed4b1f27c 100644
--- a/net/xfrm/xfrm_user.c
+++ b/net/xfrm/xfrm_user.c
@@ -2504,7 +2504,7 @@ static struct xfrm_policy *xfrm_compile_policy(struct sock *sk, int opt,
 	if (p->dir > XFRM_POLICY_OUT)
 		return NULL;
 
-	xp = xfrm_policy_alloc(net, GFP_KERNEL);
+	xp = xfrm_policy_alloc(net, GFP_ATOMIC);
 	if (xp == NULL) {
 		*dir = -ENOBUFS;
 		return NULL;
-- 
cgit v1.2.2


From f3d3f616e35db2ceeb11564eafd50759bb5bca8a Mon Sep 17 00:00:00 2001
From: Min Zhang <mzhang@mvista.com>
Date: Sat, 14 Aug 2010 22:42:51 -0700
Subject: ipv6: remove sysctl jiffies conversion on gc_elasticity and
 min_adv_mss

sysctl output ipv6 gc_elasticity and min_adv_mss as values divided by
HZ. However, they are not in unit of jiffies, since ip6_rt_min_advmss
refers to packet size and ip6_rt_fc_elasticity is used as scaler as in
expire>>ip6_rt_gc_elasticity, so replace the jiffies conversion
handler will regular handler for them.

This has impact on scripts that are currently working assuming the
divide by HZ, will yield different results with this patch in place.

Signed-off-by: Min Zhang <mzhang@mvista.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/route.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 8f2d0400cf8a..d126365ac046 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -2580,7 +2580,7 @@ ctl_table ipv6_route_table_template[] = {
 		.data		=	&init_net.ipv6.sysctl.ip6_rt_gc_elasticity,
 		.maxlen		=	sizeof(int),
 		.mode		=	0644,
-		.proc_handler	=	proc_dointvec_jiffies,
+		.proc_handler	=	proc_dointvec,
 	},
 	{
 		.procname	=	"mtu_expires",
@@ -2594,7 +2594,7 @@ ctl_table ipv6_route_table_template[] = {
 		.data		=	&init_net.ipv6.sysctl.ip6_rt_min_advmss,
 		.maxlen		=	sizeof(int),
 		.mode		=	0644,
-		.proc_handler	=	proc_dointvec_jiffies,
+		.proc_handler	=	proc_dointvec,
 	},
 	{
 		.procname	=	"gc_min_interval_ms",
-- 
cgit v1.2.2


From daa3766e705c2605bd7f63b80c7742014ef9e04e Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Sun, 15 Aug 2010 23:21:50 -0700
Subject: Revert "netlink: netlink_recvmsg() fix"

This reverts commit 1235f504aaba2ebeabc863fdb3ceac764a317d47.

It causes regressions worse than the problem it was trying
to fix.  Eric will try to solve the problem another way.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/netlink/af_netlink.c | 22 ++++++----------------
 1 file changed, 6 insertions(+), 16 deletions(-)

(limited to 'net')

diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index 2cbf380377d5..8648a9922aab 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -1406,7 +1406,7 @@ static int netlink_recvmsg(struct kiocb *kiocb, struct socket *sock,
 	struct netlink_sock *nlk = nlk_sk(sk);
 	int noblock = flags&MSG_DONTWAIT;
 	size_t copied;
-	struct sk_buff *skb;
+	struct sk_buff *skb, *frag __maybe_unused = NULL;
 	int err;
 
 	if (flags&MSG_OOB)
@@ -1441,21 +1441,7 @@ static int netlink_recvmsg(struct kiocb *kiocb, struct socket *sock,
 			kfree_skb(skb);
 			skb = compskb;
 		} else {
-			/*
-			 * Before setting frag_list to NULL, we must get a
-			 * private copy of skb if shared (because of MSG_PEEK)
-			 */
-			if (skb_shared(skb)) {
-				struct sk_buff *nskb;
-
-				nskb = pskb_copy(skb, GFP_KERNEL);
-				kfree_skb(skb);
-				skb = nskb;
-				err = -ENOMEM;
-				if (!skb)
-					goto out;
-			}
-			kfree_skb(skb_shinfo(skb)->frag_list);
+			frag = skb_shinfo(skb)->frag_list;
 			skb_shinfo(skb)->frag_list = NULL;
 		}
 	}
@@ -1492,6 +1478,10 @@ static int netlink_recvmsg(struct kiocb *kiocb, struct socket *sock,
 	if (flags & MSG_TRUNC)
 		copied = skb->len;
 
+#ifdef CONFIG_COMPAT_NETLINK_MESSAGES
+	skb_shinfo(skb)->frag_list = frag;
+#endif
+
 	skb_free_datagram(sk, skb);
 
 	if (nlk->cb && atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf / 2)
-- 
cgit v1.2.2


From df486a25900f4dba9cdc3886c4ac871951c6aef3 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Tue, 17 Aug 2010 17:42:45 -0400
Subject: NFS: Fix the selection of security flavours in Kconfig

Randy Dunlap reports:

ERROR: "svc_gss_principal" [fs/nfs/nfs.ko] undefined!


because in fs/nfs/Kconfig, NFS_V4 selects RPCSEC_GSS_KRB5
and/or in fs/nfsd/Kconfig, NFSD_V4 selects RPCSEC_GSS_KRB5.

RPCSEC_GSS_KRB5 does 5 selects, but none of these is enforced/followed
by the fs/nfs[d]/Kconfig configs:

	select SUNRPC_GSS
	select CRYPTO
	select CRYPTO_MD5
	select CRYPTO_DES
	select CRYPTO_CBC

Reported-by: Randy Dunlap <randy.dunlap@oracle.com>
Cc: J. Bruce Fields <bfields@fieldses.org>
Acked-by: Randy Dunlap <randy.dunlap@oracle.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 net/sunrpc/Kconfig | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

(limited to 'net')

diff --git a/net/sunrpc/Kconfig b/net/sunrpc/Kconfig
index 443c161eb8bd..3376d7657185 100644
--- a/net/sunrpc/Kconfig
+++ b/net/sunrpc/Kconfig
@@ -18,10 +18,11 @@ config SUNRPC_XPRT_RDMA
 	  If unsure, say N.
 
 config RPCSEC_GSS_KRB5
-	tristate "Secure RPC: Kerberos V mechanism (EXPERIMENTAL)"
-	depends on SUNRPC && EXPERIMENTAL
+	tristate
+	depends on SUNRPC && CRYPTO
+	prompt "Secure RPC: Kerberos V mechanism" if !(NFS_V4 || NFSD_V4)
+	default y
 	select SUNRPC_GSS
-	select CRYPTO
 	select CRYPTO_MD5
 	select CRYPTO_DES
 	select CRYPTO_CBC
@@ -34,7 +35,7 @@ config RPCSEC_GSS_KRB5
 	  available from http://linux-nfs.org/.  In addition, user-space
 	  Kerberos support should be installed.
 
-	  If unsure, say N.
+	  If unsure, say Y.
 
 config RPCSEC_GSS_SPKM3
 	tristate "Secure RPC: SPKM3 mechanism (EXPERIMENTAL)"
-- 
cgit v1.2.2


From 001389b9581c13fe5fc357a0f89234f85af4215d Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Mon, 16 Aug 2010 10:22:10 +0000
Subject: netfilter: {ip,ip6,arp}_tables: avoid lockdep false positive

After commit 24b36f019 (netfilter: {ip,ip6,arp}_tables: dont block
bottom half more than necessary), lockdep can raise a warning
because we attempt to lock a spinlock with BH enabled, while
the same lock is usually locked by another cpu in a softirq context.

Disable again BH to avoid these lockdep warnings.

Reported-by: Linus Torvalds <torvalds@linux-foundation.org>
Diagnosed-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/netfilter/arp_tables.c | 2 ++
 net/ipv4/netfilter/ip_tables.c  | 2 ++
 net/ipv6/netfilter/ip6_tables.c | 2 ++
 3 files changed, 6 insertions(+)

(limited to 'net')

diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c
index 6bccba31d132..51d6c3167975 100644
--- a/net/ipv4/netfilter/arp_tables.c
+++ b/net/ipv4/netfilter/arp_tables.c
@@ -735,6 +735,7 @@ static void get_counters(const struct xt_table_info *t,
 		if (cpu == curcpu)
 			continue;
 		i = 0;
+		local_bh_disable();
 		xt_info_wrlock(cpu);
 		xt_entry_foreach(iter, t->entries[cpu], t->size) {
 			ADD_COUNTER(counters[i], iter->counters.bcnt,
@@ -742,6 +743,7 @@ static void get_counters(const struct xt_table_info *t,
 			++i;
 		}
 		xt_info_wrunlock(cpu);
+		local_bh_enable();
 	}
 	put_cpu();
 }
diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c
index c439721b165a..97b64b22c412 100644
--- a/net/ipv4/netfilter/ip_tables.c
+++ b/net/ipv4/netfilter/ip_tables.c
@@ -909,6 +909,7 @@ get_counters(const struct xt_table_info *t,
 		if (cpu == curcpu)
 			continue;
 		i = 0;
+		local_bh_disable();
 		xt_info_wrlock(cpu);
 		xt_entry_foreach(iter, t->entries[cpu], t->size) {
 			ADD_COUNTER(counters[i], iter->counters.bcnt,
@@ -916,6 +917,7 @@ get_counters(const struct xt_table_info *t,
 			++i; /* macro does multi eval of i */
 		}
 		xt_info_wrunlock(cpu);
+		local_bh_enable();
 	}
 	put_cpu();
 }
diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c
index 5359ef4daac5..29a7bca29e3f 100644
--- a/net/ipv6/netfilter/ip6_tables.c
+++ b/net/ipv6/netfilter/ip6_tables.c
@@ -922,6 +922,7 @@ get_counters(const struct xt_table_info *t,
 		if (cpu == curcpu)
 			continue;
 		i = 0;
+		local_bh_disable();
 		xt_info_wrlock(cpu);
 		xt_entry_foreach(iter, t->entries[cpu], t->size) {
 			ADD_COUNTER(counters[i], iter->counters.bcnt,
@@ -929,6 +930,7 @@ get_counters(const struct xt_table_info *t,
 			++i;
 		}
 		xt_info_wrunlock(cpu);
+		local_bh_enable();
 	}
 	put_cpu();
 }
-- 
cgit v1.2.2


From 1c40be12f7d8ca1d387510d39787b12e512a7ce8 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Mon, 16 Aug 2010 20:04:22 +0000
Subject: net sched: fix some kernel memory leaks

We leak at least 32bits of kernel memory to user land in tc dump,
because we dont init all fields (capab ?) of the dumped structure.

Use C99 initializers so that holes and non explicit fields are zeroed.

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/act_gact.c    | 21 ++++++++++++---------
 net/sched/act_mirred.c  | 15 ++++++++-------
 net/sched/act_nat.c     | 22 +++++++++++-----------
 net/sched/act_simple.c  | 11 ++++++-----
 net/sched/act_skbedit.c | 11 ++++++-----
 5 files changed, 43 insertions(+), 37 deletions(-)

(limited to 'net')

diff --git a/net/sched/act_gact.c b/net/sched/act_gact.c
index 8406c6654990..c2ed90a4c0b4 100644
--- a/net/sched/act_gact.c
+++ b/net/sched/act_gact.c
@@ -152,21 +152,24 @@ static int tcf_gact(struct sk_buff *skb, struct tc_action *a, struct tcf_result
 static int tcf_gact_dump(struct sk_buff *skb, struct tc_action *a, int bind, int ref)
 {
 	unsigned char *b = skb_tail_pointer(skb);
-	struct tc_gact opt;
 	struct tcf_gact *gact = a->priv;
+	struct tc_gact opt = {
+		.index   = gact->tcf_index,
+		.refcnt  = gact->tcf_refcnt - ref,
+		.bindcnt = gact->tcf_bindcnt - bind,
+		.action  = gact->tcf_action,
+	};
 	struct tcf_t t;
 
-	opt.index = gact->tcf_index;
-	opt.refcnt = gact->tcf_refcnt - ref;
-	opt.bindcnt = gact->tcf_bindcnt - bind;
-	opt.action = gact->tcf_action;
 	NLA_PUT(skb, TCA_GACT_PARMS, sizeof(opt), &opt);
 #ifdef CONFIG_GACT_PROB
 	if (gact->tcfg_ptype) {
-		struct tc_gact_p p_opt;
-		p_opt.paction = gact->tcfg_paction;
-		p_opt.pval = gact->tcfg_pval;
-		p_opt.ptype = gact->tcfg_ptype;
+		struct tc_gact_p p_opt = {
+			.paction = gact->tcfg_paction,
+			.pval    = gact->tcfg_pval,
+			.ptype   = gact->tcfg_ptype,
+		};
+
 		NLA_PUT(skb, TCA_GACT_PROB, sizeof(p_opt), &p_opt);
 	}
 #endif
diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c
index 11f195af2da0..0c311be92827 100644
--- a/net/sched/act_mirred.c
+++ b/net/sched/act_mirred.c
@@ -219,15 +219,16 @@ static int tcf_mirred_dump(struct sk_buff *skb, struct tc_action *a, int bind, i
 {
 	unsigned char *b = skb_tail_pointer(skb);
 	struct tcf_mirred *m = a->priv;
-	struct tc_mirred opt;
+	struct tc_mirred opt = {
+		.index   = m->tcf_index,
+		.action  = m->tcf_action,
+		.refcnt  = m->tcf_refcnt - ref,
+		.bindcnt = m->tcf_bindcnt - bind,
+		.eaction = m->tcfm_eaction,
+		.ifindex = m->tcfm_ifindex,
+	};
 	struct tcf_t t;
 
-	opt.index = m->tcf_index;
-	opt.action = m->tcf_action;
-	opt.refcnt = m->tcf_refcnt - ref;
-	opt.bindcnt = m->tcf_bindcnt - bind;
-	opt.eaction = m->tcfm_eaction;
-	opt.ifindex = m->tcfm_ifindex;
 	NLA_PUT(skb, TCA_MIRRED_PARMS, sizeof(opt), &opt);
 	t.install = jiffies_to_clock_t(jiffies - m->tcf_tm.install);
 	t.lastuse = jiffies_to_clock_t(jiffies - m->tcf_tm.lastuse);
diff --git a/net/sched/act_nat.c b/net/sched/act_nat.c
index 509a2d53a99d..186eb837e600 100644
--- a/net/sched/act_nat.c
+++ b/net/sched/act_nat.c
@@ -272,19 +272,19 @@ static int tcf_nat_dump(struct sk_buff *skb, struct tc_action *a,
 {
 	unsigned char *b = skb_tail_pointer(skb);
 	struct tcf_nat *p = a->priv;
-	struct tc_nat opt;
+	struct tc_nat opt = {
+		.old_addr = p->old_addr,
+		.new_addr = p->new_addr,
+		.mask     = p->mask,
+		.flags    = p->flags,
+
+		.index    = p->tcf_index,
+		.action   = p->tcf_action,
+		.refcnt   = p->tcf_refcnt - ref,
+		.bindcnt  = p->tcf_bindcnt - bind,
+	};
 	struct tcf_t t;
 
-	opt.old_addr = p->old_addr;
-	opt.new_addr = p->new_addr;
-	opt.mask = p->mask;
-	opt.flags = p->flags;
-
-	opt.index = p->tcf_index;
-	opt.action = p->tcf_action;
-	opt.refcnt = p->tcf_refcnt - ref;
-	opt.bindcnt = p->tcf_bindcnt - bind;
-
 	NLA_PUT(skb, TCA_NAT_PARMS, sizeof(opt), &opt);
 	t.install = jiffies_to_clock_t(jiffies - p->tcf_tm.install);
 	t.lastuse = jiffies_to_clock_t(jiffies - p->tcf_tm.lastuse);
diff --git a/net/sched/act_simple.c b/net/sched/act_simple.c
index 4a1d640b0cf1..97e84f3ee775 100644
--- a/net/sched/act_simple.c
+++ b/net/sched/act_simple.c
@@ -164,13 +164,14 @@ static inline int tcf_simp_dump(struct sk_buff *skb, struct tc_action *a,
 {
 	unsigned char *b = skb_tail_pointer(skb);
 	struct tcf_defact *d = a->priv;
-	struct tc_defact opt;
+	struct tc_defact opt = {
+		.index   = d->tcf_index,
+		.refcnt  = d->tcf_refcnt - ref,
+		.bindcnt = d->tcf_bindcnt - bind,
+		.action  = d->tcf_action,
+	};
 	struct tcf_t t;
 
-	opt.index = d->tcf_index;
-	opt.refcnt = d->tcf_refcnt - ref;
-	opt.bindcnt = d->tcf_bindcnt - bind;
-	opt.action = d->tcf_action;
 	NLA_PUT(skb, TCA_DEF_PARMS, sizeof(opt), &opt);
 	NLA_PUT_STRING(skb, TCA_DEF_DATA, d->tcfd_defdata);
 	t.install = jiffies_to_clock_t(jiffies - d->tcf_tm.install);
diff --git a/net/sched/act_skbedit.c b/net/sched/act_skbedit.c
index e9607fe55b58..66cbf4eb8855 100644
--- a/net/sched/act_skbedit.c
+++ b/net/sched/act_skbedit.c
@@ -159,13 +159,14 @@ static inline int tcf_skbedit_dump(struct sk_buff *skb, struct tc_action *a,
 {
 	unsigned char *b = skb_tail_pointer(skb);
 	struct tcf_skbedit *d = a->priv;
-	struct tc_skbedit opt;
+	struct tc_skbedit opt = {
+		.index   = d->tcf_index,
+		.refcnt  = d->tcf_refcnt - ref,
+		.bindcnt = d->tcf_bindcnt - bind,
+		.action  = d->tcf_action,
+	};
 	struct tcf_t t;
 
-	opt.index = d->tcf_index;
-	opt.refcnt = d->tcf_refcnt - ref;
-	opt.bindcnt = d->tcf_bindcnt - bind;
-	opt.action = d->tcf_action;
 	NLA_PUT(skb, TCA_SKBEDIT_PARMS, sizeof(opt), &opt);
 	if (d->flags & SKBEDIT_F_PRIORITY)
 		NLA_PUT(skb, TCA_SKBEDIT_PRIORITY, sizeof(d->priority),
-- 
cgit v1.2.2


From e5093aec2e6b60c3df2420057ffab9ed4a6d2792 Mon Sep 17 00:00:00 2001
From: Jarek Poplawski <jarkao2@gmail.com>
Date: Wed, 11 Aug 2010 02:02:10 +0000
Subject: net: Fix a memmove bug in dev_gro_receive()

>Xin Xiaohui wrote:
> I looked into the code dev_gro_receive(), found the code here:
> if the frags[0] is pulled to 0, then the page will be released,
> and memmove() frags left.
> Is that right? I'm not sure if memmove do right or not, but
> frags[0].size is never set after memove at least. what I think
> a simple way is not to do anything if we found frags[0].size == 0.
> The patch is as followed.
...

This version of the patch fixes the bug directly in memmove.

Reported-by: "Xin, Xiaohui" <xiaohui.xin@intel.com>
Signed-off-by: Jarek Poplawski <jarkao2@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/dev.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/core/dev.c b/net/core/dev.c
index 1ae654391442..3721fbb9a83c 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -3143,7 +3143,7 @@ pull:
 			put_page(skb_shinfo(skb)->frags[0].page);
 			memmove(skb_shinfo(skb)->frags,
 				skb_shinfo(skb)->frags + 1,
-				--skb_shinfo(skb)->nr_frags);
+				--skb_shinfo(skb)->nr_frags * sizeof(skb_frag_t));
 		}
 	}
 
-- 
cgit v1.2.2


From 68d6ac6d2740b6a55f3ae92a4e0be6d881904b32 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Sun, 15 Aug 2010 21:20:44 +0000
Subject: netlink: fix compat recvmsg

Since
commit 1dacc76d0014a034b8aca14237c127d7c19d7726
Author: Johannes Berg <johannes@sipsolutions.net>
Date:   Wed Jul 1 11:26:02 2009 +0000

    net/compat/wext: send different messages to compat tasks

we had a race condition when setting and then
restoring frag_list. Eric attempted to fix it,
but the fix created even worse problems.

However, the original motivation I had when I
added the code that turned out to be racy is
no longer clear to me, since we only copy up
to skb->len to userspace, which doesn't include
the frag_list length. As a result, not doing
any frag_list clearing and restoring avoids
the race condition, while not introducing any
other problems.

Additionally, while preparing this patch I found
that since none of the remaining netlink code is
really aware of the frag_list, we need to use the
original skb's information for packet information
and credentials. This fixes, for example, the
group information received by compat tasks.

Cc: Eric Dumazet <eric.dumazet@gmail.com>
Cc: stable@kernel.org [2.6.31+, for 2.6.35 revert 1235f504aa]
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Acked-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/netlink/af_netlink.c | 46 ++++++++++++++++------------------------------
 1 file changed, 16 insertions(+), 30 deletions(-)

(limited to 'net')

diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index 8648a9922aab..980fe4ad0016 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -1406,7 +1406,7 @@ static int netlink_recvmsg(struct kiocb *kiocb, struct socket *sock,
 	struct netlink_sock *nlk = nlk_sk(sk);
 	int noblock = flags&MSG_DONTWAIT;
 	size_t copied;
-	struct sk_buff *skb, *frag __maybe_unused = NULL;
+	struct sk_buff *skb, *data_skb;
 	int err;
 
 	if (flags&MSG_OOB)
@@ -1418,45 +1418,35 @@ static int netlink_recvmsg(struct kiocb *kiocb, struct socket *sock,
 	if (skb == NULL)
 		goto out;
 
+	data_skb = skb;
+
 #ifdef CONFIG_COMPAT_NETLINK_MESSAGES
 	if (unlikely(skb_shinfo(skb)->frag_list)) {
-		bool need_compat = !!(flags & MSG_CMSG_COMPAT);
-
 		/*
-		 * If this skb has a frag_list, then here that means that
-		 * we will have to use the frag_list skb for compat tasks
-		 * and the regular skb for non-compat tasks.
+		 * If this skb has a frag_list, then here that means that we
+		 * will have to use the frag_list skb's data for compat tasks
+		 * and the regular skb's data for normal (non-compat) tasks.
 		 *
-		 * The skb might (and likely will) be cloned, so we can't
-		 * just reset frag_list and go on with things -- we need to
-		 * keep that. For the compat case that's easy -- simply get
-		 * a reference to the compat skb and free the regular one
-		 * including the frag. For the non-compat case, we need to
-		 * avoid sending the frag to the user -- so assign NULL but
-		 * restore it below before freeing the skb.
+		 * If we need to send the compat skb, assign it to the
+		 * 'data_skb' variable so that it will be used below for data
+		 * copying. We keep 'skb' for everything else, including
+		 * freeing both later.
 		 */
-		if (need_compat) {
-			struct sk_buff *compskb = skb_shinfo(skb)->frag_list;
-			skb_get(compskb);
-			kfree_skb(skb);
-			skb = compskb;
-		} else {
-			frag = skb_shinfo(skb)->frag_list;
-			skb_shinfo(skb)->frag_list = NULL;
-		}
+		if (flags & MSG_CMSG_COMPAT)
+			data_skb = skb_shinfo(skb)->frag_list;
 	}
 #endif
 
 	msg->msg_namelen = 0;
 
-	copied = skb->len;
+	copied = data_skb->len;
 	if (len < copied) {
 		msg->msg_flags |= MSG_TRUNC;
 		copied = len;
 	}
 
-	skb_reset_transport_header(skb);
-	err = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, copied);
+	skb_reset_transport_header(data_skb);
+	err = skb_copy_datagram_iovec(data_skb, 0, msg->msg_iov, copied);
 
 	if (msg->msg_name) {
 		struct sockaddr_nl *addr = (struct sockaddr_nl *)msg->msg_name;
@@ -1476,11 +1466,7 @@ static int netlink_recvmsg(struct kiocb *kiocb, struct socket *sock,
 	}
 	siocb->scm->creds = *NETLINK_CREDS(skb);
 	if (flags & MSG_TRUNC)
-		copied = skb->len;
-
-#ifdef CONFIG_COMPAT_NETLINK_MESSAGES
-	skb_shinfo(skb)->frag_list = frag;
-#endif
+		copied = data_skb->len;
 
 	skb_free_datagram(sk, skb);
 
-- 
cgit v1.2.2


From f037590fff3005ce8a1513858d7d44f50053cc8f Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Mon, 16 Aug 2010 03:25:00 +0000
Subject: rds: fix a leak of kernel memory

struct rds_rdma_notify contains a 32 bits hole on 64bit arches,
make sure it is zeroed before copying it to user.

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
CC: Andy Grover <andy.grover@oracle.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/rds/recv.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/rds/recv.c b/net/rds/recv.c
index 795a00b7f2cb..c93588c2d553 100644
--- a/net/rds/recv.c
+++ b/net/rds/recv.c
@@ -297,7 +297,7 @@ static int rds_still_queued(struct rds_sock *rs, struct rds_incoming *inc,
 int rds_notify_queue_get(struct rds_sock *rs, struct msghdr *msghdr)
 {
 	struct rds_notifier *notifier;
-	struct rds_rdma_notify cmsg;
+	struct rds_rdma_notify cmsg = { 0 }; /* fill holes with zero */
 	unsigned int count = 0, max_messages = ~0U;
 	unsigned long flags;
 	LIST_HEAD(copy);
-- 
cgit v1.2.2


From 0ac820eebe9008094040955d294ef7b33b418413 Mon Sep 17 00:00:00 2001
From: Phil Oester <kernel@linuxace.com>
Date: Tue, 17 Aug 2010 18:45:08 +0000
Subject: vlan: Match underlying dev carrier on vlan add

When adding a new vlan, if the underlying interface has no carrier,
then the newly added vlan interface should also have no carrier.
At present, this is not true - the newly added vlan is added with
carrier up.  Fix by checking state of real device.

Signed-off-by: Phil Oester <kernel@linuxace.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/8021q/vlan_dev.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c
index 3d59c9bf8feb..3bccdd12a264 100644
--- a/net/8021q/vlan_dev.c
+++ b/net/8021q/vlan_dev.c
@@ -510,7 +510,8 @@ static int vlan_dev_open(struct net_device *dev)
 	if (vlan->flags & VLAN_FLAG_GVRP)
 		vlan_gvrp_request_join(dev);
 
-	netif_carrier_on(dev);
+	if (netif_carrier_ok(real_dev))
+		netif_carrier_on(dev);
 	return 0;
 
 clear_allmulti:
-- 
cgit v1.2.2


From 79c5f51c639021f7472591239c3867cee4b9ec02 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Wed, 18 Aug 2010 00:24:43 +0000
Subject: irda: fix a race in irlan_eth_xmit()

After skb is queued, its illegal to dereference it.

Cache skb->len into a temporary variable.

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/irda/irlan/irlan_eth.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/irda/irlan/irlan_eth.c b/net/irda/irlan/irlan_eth.c
index 9616c32d1076..5bb8353105cc 100644
--- a/net/irda/irlan/irlan_eth.c
+++ b/net/irda/irlan/irlan_eth.c
@@ -169,6 +169,7 @@ static netdev_tx_t irlan_eth_xmit(struct sk_buff *skb,
 {
 	struct irlan_cb *self = netdev_priv(dev);
 	int ret;
+	unsigned int len;
 
 	/* skb headroom large enough to contain all IrDA-headers? */
 	if ((skb_headroom(skb) < self->max_header_size) || (skb_shared(skb))) {
@@ -188,6 +189,7 @@ static netdev_tx_t irlan_eth_xmit(struct sk_buff *skb,
 
 	dev->trans_start = jiffies;
 
+	len = skb->len;
 	/* Now queue the packet in the transport layer */
 	if (self->use_udata)
 		ret = irttp_udata_request(self->tsap_data, skb);
@@ -209,7 +211,7 @@ static netdev_tx_t irlan_eth_xmit(struct sk_buff *skb,
 		self->stats.tx_dropped++;
 	} else {
 		self->stats.tx_packets++;
-		self->stats.tx_bytes += skb->len;
+		self->stats.tx_bytes += len;
 	}
 
 	return NETDEV_TX_OK;
-- 
cgit v1.2.2


From cca77b7c81876d819a5806f408b3c29b5b61a815 Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Mon, 23 Aug 2010 14:41:22 -0700
Subject: netfilter: fix CONFIG_COMPAT support

commit f3c5c1bfd430858d3a05436f82c51e53104feb6b
(netfilter: xtables: make ip_tables reentrant) forgot to
also compute the jumpstack size in the compat handlers.

Result is that "iptables -I INPUT -j userchain" turns into -j DROP.

Reported by Sebastian Roesner on #netfilter, closes
http://bugzilla.netfilter.org/show_bug.cgi?id=669.

Note: arptables change is compile-tested only.

Signed-off-by: Florian Westphal <fw@strlen.de>
Acked-by: Eric Dumazet <eric.dumazet@gmail.com>
Tested-by: Mikael Pettersson <mikpe@it.uu.se>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/netfilter/arp_tables.c | 3 +++
 net/ipv4/netfilter/ip_tables.c  | 3 +++
 net/ipv6/netfilter/ip6_tables.c | 3 +++
 3 files changed, 9 insertions(+)

(limited to 'net')

diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c
index 51d6c3167975..e8f4f9a57f12 100644
--- a/net/ipv4/netfilter/arp_tables.c
+++ b/net/ipv4/netfilter/arp_tables.c
@@ -1420,6 +1420,9 @@ static int translate_compat_table(const char *name,
 		if (ret != 0)
 			break;
 		++i;
+		if (strcmp(arpt_get_target(iter1)->u.user.name,
+		    XT_ERROR_TARGET) == 0)
+			++newinfo->stacksize;
 	}
 	if (ret) {
 		/*
diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c
index 97b64b22c412..d163f2e3b2e9 100644
--- a/net/ipv4/netfilter/ip_tables.c
+++ b/net/ipv4/netfilter/ip_tables.c
@@ -1751,6 +1751,9 @@ translate_compat_table(struct net *net,
 		if (ret != 0)
 			break;
 		++i;
+		if (strcmp(ipt_get_target(iter1)->u.user.name,
+		    XT_ERROR_TARGET) == 0)
+			++newinfo->stacksize;
 	}
 	if (ret) {
 		/*
diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c
index 29a7bca29e3f..8e754be92c24 100644
--- a/net/ipv6/netfilter/ip6_tables.c
+++ b/net/ipv6/netfilter/ip6_tables.c
@@ -1766,6 +1766,9 @@ translate_compat_table(struct net *net,
 		if (ret != 0)
 			break;
 		++i;
+		if (strcmp(ip6t_get_target(iter1)->u.user.name,
+		    XT_ERROR_TARGET) == 0)
+			++newinfo->stacksize;
 	}
 	if (ret) {
 		/*
-- 
cgit v1.2.2


From 4c3a76abd379d9a4668b2d417baa991de9757dc2 Mon Sep 17 00:00:00 2001
From: Changli Gao <xiaosuo@gmail.com>
Date: Sun, 22 Aug 2010 19:03:26 +0000
Subject: bridge: netfilter: fix a memory leak

nf_bridge_alloc() always reset the skb->nf_bridge, so we should always
put the old one.

Signed-off-by: Changli Gao <xiaosuo@gmail.com>
Signed-off-by: Bart De Schuymer <bdschuym@pandora.be>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/bridge/br_netfilter.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/bridge/br_netfilter.c b/net/bridge/br_netfilter.c
index 2c911c0759c2..5ed00bd7009f 100644
--- a/net/bridge/br_netfilter.c
+++ b/net/bridge/br_netfilter.c
@@ -162,8 +162,8 @@ static inline struct nf_bridge_info *nf_bridge_unshare(struct sk_buff *skb)
 		if (tmp) {
 			memcpy(tmp, nf_bridge, sizeof(struct nf_bridge_info));
 			atomic_set(&tmp->use, 1);
-			nf_bridge_put(nf_bridge);
 		}
+		nf_bridge_put(nf_bridge);
 		nf_bridge = tmp;
 	}
 	return nf_bridge;
-- 
cgit v1.2.2


From ad1af0fedba14f82b240a03fe20eb9b2fdbd0357 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Wed, 25 Aug 2010 02:27:49 -0700
Subject: tcp: Combat per-cpu skew in orphan tests.

As reported by Anton Blanchard when we use
percpu_counter_read_positive() to make our orphan socket limit checks,
the check can be off by up to num_cpus_online() * batch (which is 32
by default) which on a 128 cpu machine can be as large as the default
orphan limit itself.

Fix this by doing the full expensive sum check if the optimized check
triggers.

Reported-by: Anton Blanchard <anton@samba.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
Acked-by: Eric Dumazet <eric.dumazet@gmail.com>
---
 net/ipv4/tcp.c       | 5 +----
 net/ipv4/tcp_timer.c | 8 ++++----
 2 files changed, 5 insertions(+), 8 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 176e11aaea77..197b9b77fa3e 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -2011,11 +2011,8 @@ adjudge_to_death:
 		}
 	}
 	if (sk->sk_state != TCP_CLOSE) {
-		int orphan_count = percpu_counter_read_positive(
-						sk->sk_prot->orphan_count);
-
 		sk_mem_reclaim(sk);
-		if (tcp_too_many_orphans(sk, orphan_count)) {
+		if (tcp_too_many_orphans(sk, 0)) {
 			if (net_ratelimit())
 				printk(KERN_INFO "TCP: too many of orphaned "
 				       "sockets\n");
diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c
index 808bb920c9f5..c35b469e851c 100644
--- a/net/ipv4/tcp_timer.c
+++ b/net/ipv4/tcp_timer.c
@@ -66,18 +66,18 @@ static void tcp_write_err(struct sock *sk)
 static int tcp_out_of_resources(struct sock *sk, int do_reset)
 {
 	struct tcp_sock *tp = tcp_sk(sk);
-	int orphans = percpu_counter_read_positive(&tcp_orphan_count);
+	int shift = 0;
 
 	/* If peer does not open window for long time, or did not transmit
 	 * anything for long time, penalize it. */
 	if ((s32)(tcp_time_stamp - tp->lsndtime) > 2*TCP_RTO_MAX || !do_reset)
-		orphans <<= 1;
+		shift++;
 
 	/* If some dubious ICMP arrived, penalize even more. */
 	if (sk->sk_err_soft)
-		orphans <<= 1;
+		shift++;
 
-	if (tcp_too_many_orphans(sk, orphans)) {
+	if (tcp_too_many_orphans(sk, shift)) {
 		if (net_ratelimit())
 			printk(KERN_INFO "Out of socket memory\n");
 
-- 
cgit v1.2.2


From c5ed63d66f24fd4f7089b5a6e087b0ce7202aa8e Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Wed, 25 Aug 2010 23:02:17 -0700
Subject: tcp: fix three tcp sysctls tuning

As discovered by Anton Blanchard, current code to autotune
tcp_death_row.sysctl_max_tw_buckets, sysctl_tcp_max_orphans and
sysctl_max_syn_backlog makes little sense.

The bigger a page is, the less tcp_max_orphans is : 4096 on a 512GB
machine in Anton's case.

(tcp_hashinfo.bhash_size * sizeof(struct inet_bind_hashbucket))
is much bigger if spinlock debugging is on. Its wrong to select bigger
limits in this case (where kernel structures are also bigger)

bhash_size max is 65536, and we get this value even for small machines.

A better ground is to use size of ehash table, this also makes code
shorter and more obvious.

Based on a patch from Anton, and another from David.

Reported-and-tested-by: Anton Blanchard <anton@samba.org>
Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp.c | 24 +++++++-----------------
 1 file changed, 7 insertions(+), 17 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 197b9b77fa3e..e2add5ff9cb1 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -3209,7 +3209,7 @@ void __init tcp_init(void)
 {
 	struct sk_buff *skb = NULL;
 	unsigned long nr_pages, limit;
-	int order, i, max_share;
+	int i, max_share, cnt;
 	unsigned long jiffy = jiffies;
 
 	BUILD_BUG_ON(sizeof(struct tcp_skb_cb) > sizeof(skb->cb));
@@ -3258,22 +3258,12 @@ void __init tcp_init(void)
 		INIT_HLIST_HEAD(&tcp_hashinfo.bhash[i].chain);
 	}
 
-	/* Try to be a bit smarter and adjust defaults depending
-	 * on available memory.
-	 */
-	for (order = 0; ((1 << order) << PAGE_SHIFT) <
-			(tcp_hashinfo.bhash_size * sizeof(struct inet_bind_hashbucket));
-			order++)
-		;
-	if (order >= 4) {
-		tcp_death_row.sysctl_max_tw_buckets = 180000;
-		sysctl_tcp_max_orphans = 4096 << (order - 4);
-		sysctl_max_syn_backlog = 1024;
-	} else if (order < 3) {
-		tcp_death_row.sysctl_max_tw_buckets >>= (3 - order);
-		sysctl_tcp_max_orphans >>= (3 - order);
-		sysctl_max_syn_backlog = 128;
-	}
+
+	cnt = tcp_hashinfo.ehash_mask + 1;
+
+	tcp_death_row.sysctl_max_tw_buckets = cnt / 2;
+	sysctl_tcp_max_orphans = cnt / 2;
+	sysctl_max_syn_backlog = max(128, cnt / 256);
 
 	/* Set the pressure threshold to be a fraction of global memory that
 	 * is up to 1/2 at 256 MB, decreasing toward zero with the amount of
-- 
cgit v1.2.2


From d84ba638e4ba3c40023ff997aa5e8d3ed002af36 Mon Sep 17 00:00:00 2001
From: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Date: Tue, 24 Aug 2010 16:05:48 +0000
Subject: tcp: select(writefds) don't hang up when a peer close connection

This issue come from ruby language community. Below test program
hang up when only run on Linux.

	% uname -mrsv
	Linux 2.6.26-2-486 #1 Sat Dec 26 08:37:39 UTC 2009 i686
	% ruby -rsocket -ve '
	BasicSocket.do_not_reverse_lookup = true
	serv = TCPServer.open("127.0.0.1", 0)
	s1 = TCPSocket.open("127.0.0.1", serv.addr[1])
	s2 = serv.accept
	s2.close
	s1.write("a") rescue p $!
	s1.write("a") rescue p $!
	Thread.new {
	  s1.write("a")
	}.join'
	ruby 1.9.3dev (2010-07-06 trunk 28554) [i686-linux]
	#<Errno::EPIPE: Broken pipe>
	[Hang Here]

FreeBSD, Solaris, Mac doesn't. because Ruby's write() method call
select() internally. and tcp_poll has a bug.

SUS defined 'ready for writing' of select() as following.

|  A descriptor shall be considered ready for writing when a call to an output
|  function with O_NONBLOCK clear would not block, whether or not the function
|  would transfer data successfully.

That said, EPIPE situation is clearly one of 'ready for writing'.

We don't have read-side issue because tcp_poll() already has read side
shutdown care.

|        if (sk->sk_shutdown & RCV_SHUTDOWN)
|                mask |= POLLIN | POLLRDNORM | POLLRDHUP;

So, Let's insert same logic in write side.

- reference url
  http://blade.nagaokaut.ac.jp/cgi-bin/scat.rb/ruby/ruby-core/31065
  http://blade.nagaokaut.ac.jp/cgi-bin/scat.rb/ruby/ruby-core/31068

Signed-off-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index e2add5ff9cb1..3fb1428e526e 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -451,7 +451,8 @@ unsigned int tcp_poll(struct file *file, struct socket *sock, poll_table *wait)
 				if (sk_stream_wspace(sk) >= sk_stream_min_wspace(sk))
 					mask |= POLLOUT | POLLWRNORM;
 			}
-		}
+		} else
+			mask |= POLLOUT | POLLWRNORM;
 
 		if (tp->urg_data & TCP_URG_VALID)
 			mask |= POLLPRI;
-- 
cgit v1.2.2


From bfc960a8eec023a170a80697fe65157cd4f44f81 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Wed, 25 Aug 2010 23:44:35 +0000
Subject: l2tp: test for ethernet header in l2tp_eth_dev_recv()

close https://bugzilla.kernel.org/show_bug.cgi?id=16529

Before calling dev_forward_skb(), we should make sure skb head contains
at least an ethernet header, even if length included in upper layer said
so. Use pskb_may_pull() to make sure this ethernet header is present in
skb head.

Reported-by: Thomas Heil <heil@terminal-consulting.de>
Reported-by: Ian Campbell <Ian.Campbell@eu.citrix.com>
Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/l2tp/l2tp_eth.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/l2tp/l2tp_eth.c b/net/l2tp/l2tp_eth.c
index 58c6c4cda73b..1ae697681bc7 100644
--- a/net/l2tp/l2tp_eth.c
+++ b/net/l2tp/l2tp_eth.c
@@ -132,7 +132,7 @@ static void l2tp_eth_dev_recv(struct l2tp_session *session, struct sk_buff *skb,
 		printk("\n");
 	}
 
-	if (data_len < ETH_HLEN)
+	if (!pskb_may_pull(skb, sizeof(ETH_HLEN)))
 		goto error;
 
 	secpath_reset(skb);
-- 
cgit v1.2.2


From d71b0e9c0028f3af910226f995e0074873e16979 Mon Sep 17 00:00:00 2001
From: Bernard Pidoux F6BVP <f6bvp@free.fr>
Date: Thu, 26 Aug 2010 11:40:00 +0000
Subject: ax25: missplaced sock_put(sk)

This patch moves a missplaced sock_put(sk) after
bh_unlock_sock(sk)
like in other parts of AX25 driver.

Signed-off-by: Bernard Pidoux <f6bvp@free.fr>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ax25/ax25_ds_timer.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/ax25/ax25_ds_timer.c b/net/ax25/ax25_ds_timer.c
index 2ce79df00680..c7d81436213d 100644
--- a/net/ax25/ax25_ds_timer.c
+++ b/net/ax25/ax25_ds_timer.c
@@ -112,8 +112,8 @@ void ax25_ds_heartbeat_expiry(ax25_cb *ax25)
 			if (sk) {
 				sock_hold(sk);
 				ax25_destroy_socket(ax25);
-				sock_put(sk);
 				bh_unlock_sock(sk);
+				sock_put(sk);
 			} else
 				ax25_destroy_socket(ax25);
 			return;
-- 
cgit v1.2.2


From 7e368739e3b3f1d7944794c178a15f05829b56bc Mon Sep 17 00:00:00 2001
From: Jeff Mahoney <jeffm@suse.com>
Date: Thu, 26 Aug 2010 16:11:08 -0700
Subject: net/caif/cfrfml.c: use asm/unaligned.h

caif does not build on ia64 starting with 2.6.32-rc1.  Using
asm/unaligned.h instead of linux/unaligned/le_byteshift.h fixes the issue.

include/linux/unaligned/le_byteshift.h:40:50: error: redefinition of 'get_unaligned_le16'
include/linux/unaligned/le_byteshift.h:45:50: error: redefinition of 'get_unaligned_le32'
include/linux/unaligned/le_byteshift.h:50:50: error: redefinition of 'get_unaligned_le64'
include/linux/unaligned/le_byteshift.h:55:51: error: redefinition of 'put_unaligned_le16'
include/linux/unaligned/le_byteshift.h:60:51: error: redefinition of 'put_unaligned_le32'
include/linux/unaligned/le_byteshift.h:65:51: error: redefinition of 'put_unaligned_le64'
include/linux/unaligned/le_struct.h:31:51: note: previous definition of 'put_unaligned_le64' was here

Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/caif/cfrfml.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/caif/cfrfml.c b/net/caif/cfrfml.c
index eb1602022ac0..9a699242d104 100644
--- a/net/caif/cfrfml.c
+++ b/net/caif/cfrfml.c
@@ -7,7 +7,7 @@
 #include <linux/stddef.h>
 #include <linux/spinlock.h>
 #include <linux/slab.h>
-#include <linux/unaligned/le_byteshift.h>
+#include <asm/unaligned.h>
 #include <net/caif/caif_layer.h>
 #include <net/caif/cfsrvl.h>
 #include <net/caif/cfpkt.h>
-- 
cgit v1.2.2


From c34186ed008229e7f7e3f1de8e6acf6374995358 Mon Sep 17 00:00:00 2001
From: Julia Lawall <julia@diku.dk>
Date: Fri, 27 Aug 2010 19:31:56 -0700
Subject: net/ipv4: Eliminate kstrdup memory leak

The string clone is only used as a temporary copy of the argument val
within the while loop, and so it should be freed before leaving the
function.  The call to strsep, however, modifies clone, so a pointer to the
front of the string is kept in saved_clone, to make it possible to free it.

The sematic match that finds this problem is as follows:
(http://coccinelle.lip6.fr/)

// <smpl>
@r exists@
local idexpression x;
expression E;
identifier l;
statement S;
@@

*x= \(kasprintf\|kstrdup\)(...);
...
if (x == NULL) S
... when != kfree(x)
    when != E = x
if (...) {
  <... when != kfree(x)
* goto l;
  ...>
* return ...;
}
// </smpl>

Signed-off-by: Julia Lawall <julia@diku.dk>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp_cong.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/tcp_cong.c b/net/ipv4/tcp_cong.c
index 0ec9bd0ae94f..850c737e08e2 100644
--- a/net/ipv4/tcp_cong.c
+++ b/net/ipv4/tcp_cong.c
@@ -196,10 +196,10 @@ void tcp_get_allowed_congestion_control(char *buf, size_t maxlen)
 int tcp_set_allowed_congestion_control(char *val)
 {
 	struct tcp_congestion_ops *ca;
-	char *clone, *name;
+	char *saved_clone, *clone, *name;
 	int ret = 0;
 
-	clone = kstrdup(val, GFP_USER);
+	saved_clone = clone = kstrdup(val, GFP_USER);
 	if (!clone)
 		return -ENOMEM;
 
@@ -226,6 +226,7 @@ int tcp_set_allowed_congestion_control(char *val)
 	}
 out:
 	spin_unlock(&tcp_cong_list_lock);
+	kfree(saved_clone);
 
 	return ret;
 }
-- 
cgit v1.2.2


From 071249b1d501b1f31a6b1af3fbcbe03158a84e5c Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Wed, 25 Aug 2010 14:47:38 +0200
Subject: mac80211: delete work timer

The new workqueue changes helped me find this bug
that's been lingering since the changes to the work
processing in mac80211 -- the work timer is never
deleted properly. Do that to avoid having it fire
after all data structures have been freed. It can't
be re-armed because all it will do, if running, is
schedule the work, but that gets flushed later and
won't have anything to do since all work items are
gone by now (by way of interface removal).

Cc: stable@kernel.org [2.6.34+]
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/main.c | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'net')

diff --git a/net/mac80211/main.c b/net/mac80211/main.c
index 798a91b100cc..ded5c3843e06 100644
--- a/net/mac80211/main.c
+++ b/net/mac80211/main.c
@@ -732,6 +732,12 @@ void ieee80211_unregister_hw(struct ieee80211_hw *hw)
 
 	rtnl_unlock();
 
+	/*
+	 * Now all work items will be gone, but the
+	 * timer might still be armed, so delete it
+	 */
+	del_timer_sync(&local->work_timer);
+
 	cancel_work_sync(&local->reconfig_filter);
 
 	ieee80211_clear_tx_pending(local);
-- 
cgit v1.2.2


From 42da2f948d949efd0111309f5827bf0298bcc9a4 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Mon, 30 Aug 2010 12:24:54 +0200
Subject: wireless extensions: fix kernel heap content leak

Wireless extensions have an unfortunate, undocumented
requirement which requires drivers to always fill
iwp->length when returning a successful status. When
a driver doesn't do this, it leads to a kernel heap
content leak when userspace offers a larger buffer
than would have been necessary.

Arguably, this is a driver bug, as it should, if it
returns 0, fill iwp->length, even if it separately
indicated that the buffer contents was not valid.

However, we can also at least avoid the memory content
leak if the driver doesn't do this by setting the iwp
length to max_tokens, which then reflects how big the
buffer is that the driver may fill, regardless of how
big the userspace buffer is.

To illustrate the point, this patch also fixes a
corresponding cfg80211 bug (since this requirement
isn't documented nor was ever pointed out by anyone
during code review, I don't trust all drivers nor
all cfg80211 handlers to implement it correctly).

Cc: stable@kernel.org [all the way back]
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/wireless/wext-compat.c |  3 +++
 net/wireless/wext-core.c   | 16 ++++++++++++++++
 2 files changed, 19 insertions(+)

(limited to 'net')

diff --git a/net/wireless/wext-compat.c b/net/wireless/wext-compat.c
index bb5e0a5ecfa1..7e5c3a45f811 100644
--- a/net/wireless/wext-compat.c
+++ b/net/wireless/wext-compat.c
@@ -1420,6 +1420,9 @@ int cfg80211_wext_giwessid(struct net_device *dev,
 {
 	struct wireless_dev *wdev = dev->ieee80211_ptr;
 
+	data->flags = 0;
+	data->length = 0;
+
 	switch (wdev->iftype) {
 	case NL80211_IFTYPE_ADHOC:
 		return cfg80211_ibss_wext_giwessid(dev, info, data, ssid);
diff --git a/net/wireless/wext-core.c b/net/wireless/wext-core.c
index 0ef17bc42bac..8f5116f5af19 100644
--- a/net/wireless/wext-core.c
+++ b/net/wireless/wext-core.c
@@ -782,6 +782,22 @@ static int ioctl_standard_iw_point(struct iw_point *iwp, unsigned int cmd,
 		}
 	}
 
+	if (IW_IS_GET(cmd) && !(descr->flags & IW_DESCR_FLAG_NOMAX)) {
+		/*
+		 * If this is a GET, but not NOMAX, it means that the extra
+		 * data is not bounded by userspace, but by max_tokens. Thus
+		 * set the length to max_tokens. This matches the extra data
+		 * allocation.
+		 * The driver should fill it with the number of tokens it
+		 * provided, and it may check iwp->length rather than having
+		 * knowledge of max_tokens. If the driver doesn't change the
+		 * iwp->length, this ioctl just copies back max_token tokens
+		 * filled with zeroes. Hopefully the driver isn't claiming
+		 * them to be valid data.
+		 */
+		iwp->length = descr->max_tokens;
+	}
+
 	err = handler(dev, info, (union iwreq_data *) iwp, extra);
 
 	iwp->length += essid_compat;
-- 
cgit v1.2.2


From 628e300cccaa628d8fb92aa28cb7530a3d5f2257 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Mon, 30 Aug 2010 18:35:24 -0700
Subject: irda: Correctly clean up self->ias_obj on irda_bind() failure.

If irda_open_tsap() fails, the irda_bind() code tries to destroy
the ->ias_obj object by hand, but does so wrongly.

In particular, it fails to a) release the hashbin attached to the
object and b) reset the self->ias_obj pointer to NULL.

Fix both problems by using irias_delete_object() and explicitly
setting self->ias_obj to NULL, just as irda_release() does.

Reported-by: Tavis Ormandy <taviso@cmpxchg8b.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/irda/af_irda.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/irda/af_irda.c b/net/irda/af_irda.c
index 79986a674f6e..fd55b5135de5 100644
--- a/net/irda/af_irda.c
+++ b/net/irda/af_irda.c
@@ -824,8 +824,8 @@ static int irda_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
 
 	err = irda_open_tsap(self, addr->sir_lsap_sel, addr->sir_name);
 	if (err < 0) {
-		kfree(self->ias_obj->name);
-		kfree(self->ias_obj);
+		irias_delete_object(self->ias_obj);
+		self->ias_obj = NULL;
 		goto out;
 	}
 
-- 
cgit v1.2.2


From b963ea89f00f13c648af4082e5efb2172d369727 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Mon, 30 Aug 2010 19:08:01 -0700
Subject: netlink: Make NETLINK_USERSOCK work again.

Once we started enforcing the a nl_table[] entry exist for
a protocol, NETLINK_USERSOCK stopped working.  Add a dummy
table entry so that it works again.

Reported-by: Thomas Voegtle <tv@lio96.de>
Tested-by: Thomas Voegtle <tv@lio96.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/netlink/af_netlink.c | 22 ++++++++++++++++++++++
 1 file changed, 22 insertions(+)

(limited to 'net')

diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index 980fe4ad0016..cd96ed3ccee4 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -2102,6 +2102,26 @@ static void __net_exit netlink_net_exit(struct net *net)
 #endif
 }
 
+static void __init netlink_add_usersock_entry(void)
+{
+	unsigned long *listeners;
+	int groups = 32;
+
+	listeners = kzalloc(NLGRPSZ(groups) + sizeof(struct listeners_rcu_head),
+			    GFP_KERNEL);
+	if (!listeners)
+		panic("netlink_add_usersock_entry: Cannot allocate listneres\n");
+
+	netlink_table_grab();
+
+	nl_table[NETLINK_USERSOCK].groups = groups;
+	nl_table[NETLINK_USERSOCK].listeners = listeners;
+	nl_table[NETLINK_USERSOCK].module = THIS_MODULE;
+	nl_table[NETLINK_USERSOCK].registered = 1;
+
+	netlink_table_ungrab();
+}
+
 static struct pernet_operations __net_initdata netlink_net_ops = {
 	.init = netlink_net_init,
 	.exit = netlink_net_exit,
@@ -2150,6 +2170,8 @@ static int __init netlink_proto_init(void)
 		hash->rehash_time = jiffies;
 	}
 
+	netlink_add_usersock_entry();
+
 	sock_register(&netlink_family_ops);
 	register_pernet_subsys(&netlink_net_ops);
 	/* The netlink device handler may be needed early. */
-- 
cgit v1.2.2


From c3d34d5d9654ec9c2510f9341bfb1030b8f029d1 Mon Sep 17 00:00:00 2001
From: "John W. Linville" <linville@tuxdriver.com>
Date: Mon, 30 Aug 2010 17:36:40 -0400
Subject: wireless: register wiphy rfkill w/o holding cfg80211_mutex

Otherwise lockdep complains...

https://bugzilla.kernel.org/show_bug.cgi?id=17311

[ INFO: possible circular locking dependency detected ]
2.6.36-rc2-git4 #12
-------------------------------------------------------
kworker/0:3/3630 is trying to acquire lock:
 (rtnl_mutex){+.+.+.}, at: [<ffffffff813396c7>] rtnl_lock+0x12/0x14

but task is already holding lock:
 (rfkill_global_mutex){+.+.+.}, at: [<ffffffffa014b129>]
rfkill_switch_all+0x24/0x49 [rfkill]

which lock already depends on the new lock.

the existing dependency chain (in reverse order) is:

-> #2 (rfkill_global_mutex){+.+.+.}:
       [<ffffffff81079ad7>] lock_acquire+0x120/0x15b
       [<ffffffff813ae869>] __mutex_lock_common+0x54/0x52e
       [<ffffffff813aede9>] mutex_lock_nested+0x34/0x39
       [<ffffffffa014b4ab>] rfkill_register+0x2b/0x29c [rfkill]
       [<ffffffffa0185ba0>] wiphy_register+0x1ae/0x270 [cfg80211]
       [<ffffffffa0206f01>] ieee80211_register_hw+0x1b4/0x3cf [mac80211]
       [<ffffffffa0292e98>] iwl_ucode_callback+0x9e9/0xae3 [iwlagn]
       [<ffffffff812d3e9d>] request_firmware_work_func+0x54/0x6f
       [<ffffffff81065d15>] kthread+0x8c/0x94
       [<ffffffff8100ac24>] kernel_thread_helper+0x4/0x10

-> #1 (cfg80211_mutex){+.+.+.}:
       [<ffffffff81079ad7>] lock_acquire+0x120/0x15b
       [<ffffffff813ae869>] __mutex_lock_common+0x54/0x52e
       [<ffffffff813aede9>] mutex_lock_nested+0x34/0x39
       [<ffffffffa018605e>] cfg80211_get_dev_from_ifindex+0x1b/0x7c [cfg80211]
       [<ffffffffa0189f36>] cfg80211_wext_giwscan+0x58/0x990 [cfg80211]
       [<ffffffff8139a3ce>] ioctl_standard_iw_point+0x1a8/0x272
       [<ffffffff8139a529>] ioctl_standard_call+0x91/0xa7
       [<ffffffff8139a687>] T.723+0xbd/0x12c
       [<ffffffff8139a727>] wext_handle_ioctl+0x31/0x6d
       [<ffffffff8133014e>] dev_ioctl+0x63d/0x67a
       [<ffffffff8131afd9>] sock_ioctl+0x48/0x21d
       [<ffffffff81102abd>] do_vfs_ioctl+0x4ba/0x509
       [<ffffffff81102b5d>] sys_ioctl+0x51/0x74
       [<ffffffff81009e02>] system_call_fastpath+0x16/0x1b

-> #0 (rtnl_mutex){+.+.+.}:
       [<ffffffff810796b0>] __lock_acquire+0xa93/0xd9a
       [<ffffffff81079ad7>] lock_acquire+0x120/0x15b
       [<ffffffff813ae869>] __mutex_lock_common+0x54/0x52e
       [<ffffffff813aede9>] mutex_lock_nested+0x34/0x39
       [<ffffffff813396c7>] rtnl_lock+0x12/0x14
       [<ffffffffa0185cb5>] cfg80211_rfkill_set_block+0x1a/0x7b [cfg80211]
       [<ffffffffa014aed0>] rfkill_set_block+0x80/0xd5 [rfkill]
       [<ffffffffa014b07e>] __rfkill_switch_all+0x3f/0x6f [rfkill]
       [<ffffffffa014b13d>] rfkill_switch_all+0x38/0x49 [rfkill]
       [<ffffffffa014b821>] rfkill_op_handler+0x105/0x136 [rfkill]
       [<ffffffff81060708>] process_one_work+0x248/0x403
       [<ffffffff81062620>] worker_thread+0x139/0x214
       [<ffffffff81065d15>] kthread+0x8c/0x94
       [<ffffffff8100ac24>] kernel_thread_helper+0x4/0x10

Signed-off-by: John W. Linville <linville@tuxdriver.com>
Acked-by: Johannes Berg <johannes@sipsolutions.net>
---
 net/wireless/core.c | 21 ++++++++++++---------
 1 file changed, 12 insertions(+), 9 deletions(-)

(limited to 'net')

diff --git a/net/wireless/core.c b/net/wireless/core.c
index 541e2fff5e9c..d6d046b9f6f2 100644
--- a/net/wireless/core.c
+++ b/net/wireless/core.c
@@ -475,12 +475,10 @@ int wiphy_register(struct wiphy *wiphy)
 	mutex_lock(&cfg80211_mutex);
 
 	res = device_add(&rdev->wiphy.dev);
-	if (res)
-		goto out_unlock;
-
-	res = rfkill_register(rdev->rfkill);
-	if (res)
-		goto out_rm_dev;
+	if (res) {
+		mutex_unlock(&cfg80211_mutex);
+		return res;
+	}
 
 	/* set up regulatory info */
 	wiphy_update_regulatory(wiphy, NL80211_REGDOM_SET_BY_CORE);
@@ -509,13 +507,18 @@ int wiphy_register(struct wiphy *wiphy)
 	cfg80211_debugfs_rdev_add(rdev);
 	mutex_unlock(&cfg80211_mutex);
 
+	/*
+	 * due to a locking dependency this has to be outside of the
+	 * cfg80211_mutex lock
+	 */
+	res = rfkill_register(rdev->rfkill);
+	if (res)
+		goto out_rm_dev;
+
 	return 0;
 
 out_rm_dev:
 	device_del(&rdev->wiphy.dev);
-
-out_unlock:
-	mutex_unlock(&cfg80211_mutex);
 	return res;
 }
 EXPORT_SYMBOL(wiphy_register);
-- 
cgit v1.2.2


From 0f04cfd098fb81fded74e78ea1a1b86cc6c6c31e Mon Sep 17 00:00:00 2001
From: Jeff Mahoney <jeffm@suse.com>
Date: Tue, 31 Aug 2010 13:21:42 +0000
Subject: net sched: fix kernel leak in act_police

While reviewing commit 1c40be12f7d8ca1d387510d39787b12e512a7ce8, I
 audited other users of tc_action_ops->dump for information leaks.

 That commit covered almost all of them but act_police still had a leak.

 opt.limit and opt.capab aren't zeroed out before the structure is
 passed out.

 This patch uses the C99 initializers to zero everything unused out.

Signed-off-by: Jeff Mahoney <jeffm@suse.com>
Acked-by: Jeff Mahoney <jeffm@suse.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/act_police.c | 21 +++++++++------------
 1 file changed, 9 insertions(+), 12 deletions(-)

(limited to 'net')

diff --git a/net/sched/act_police.c b/net/sched/act_police.c
index 537a48732e9e..7ebf7439b478 100644
--- a/net/sched/act_police.c
+++ b/net/sched/act_police.c
@@ -350,22 +350,19 @@ tcf_act_police_dump(struct sk_buff *skb, struct tc_action *a, int bind, int ref)
 {
 	unsigned char *b = skb_tail_pointer(skb);
 	struct tcf_police *police = a->priv;
-	struct tc_police opt;
-
-	opt.index = police->tcf_index;
-	opt.action = police->tcf_action;
-	opt.mtu = police->tcfp_mtu;
-	opt.burst = police->tcfp_burst;
-	opt.refcnt = police->tcf_refcnt - ref;
-	opt.bindcnt = police->tcf_bindcnt - bind;
+	struct tc_police opt = {
+		.index = police->tcf_index,
+		.action = police->tcf_action,
+		.mtu = police->tcfp_mtu,
+		.burst = police->tcfp_burst,
+		.refcnt = police->tcf_refcnt - ref,
+		.bindcnt = police->tcf_bindcnt - bind,
+	};
+
 	if (police->tcfp_R_tab)
 		opt.rate = police->tcfp_R_tab->rate;
-	else
-		memset(&opt.rate, 0, sizeof(opt.rate));
 	if (police->tcfp_P_tab)
 		opt.peakrate = police->tcfp_P_tab->rate;
-	else
-		memset(&opt.peakrate, 0, sizeof(opt.peakrate));
 	NLA_PUT(skb, TCA_POLICE_TBF, sizeof(opt), &opt);
 	if (police->tcfp_result)
 		NLA_PUT_U32(skb, TCA_POLICE_RESULT, police->tcfp_result);
-- 
cgit v1.2.2


From 3b2eb6131e2f6ff646abb0fc69648179b8b70216 Mon Sep 17 00:00:00 2001
From: Michal Soltys <soltys@ziu.info>
Date: Mon, 30 Aug 2010 11:34:10 +0000
Subject: net/sched/sch_hfsc.c: initialize parent's cl_cfmin properly in
 init_vf()

This patch fixes init_vf() function, so on each new backlog period parent's
cl_cfmin is properly updated (including further propgation towards the root),
even if the activated leaf has no upperlimit curve defined.

Signed-off-by: Michal Soltys <soltys@ziu.info>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/sch_hfsc.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/sched/sch_hfsc.c b/net/sched/sch_hfsc.c
index abd904be4287..47496098d35c 100644
--- a/net/sched/sch_hfsc.c
+++ b/net/sched/sch_hfsc.c
@@ -761,8 +761,8 @@ init_vf(struct hfsc_class *cl, unsigned int len)
 		if (f != cl->cl_f) {
 			cl->cl_f = f;
 			cftree_update(cl);
-			update_cfmin(cl->cl_parent);
 		}
+		update_cfmin(cl->cl_parent);
 	}
 }
 
-- 
cgit v1.2.2


From 928497f0209027ccd649480a38a499fab9c3f6f6 Mon Sep 17 00:00:00 2001
From: Nicolas Dichtel <nicolas.dichtel@6wind.com>
Date: Tue, 31 Aug 2010 05:54:00 +0000
Subject: xfrm_user: avoid a warning with some compiler

Attached is a small patch to remove a warning ("warning: ISO C90 forbids
mixed declarations and code" with gcc 4.3.2).

Signed-off-by: Nicolas Dichtel <nicolas.dichtel@6wind.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/xfrm/xfrm_user.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c
index b14ed4b1f27c..8bae6b22c846 100644
--- a/net/xfrm/xfrm_user.c
+++ b/net/xfrm/xfrm_user.c
@@ -1801,7 +1801,7 @@ static int xfrm_add_sa_expire(struct sk_buff *skb, struct nlmsghdr *nlh,
 	struct xfrm_user_expire *ue = nlmsg_data(nlh);
 	struct xfrm_usersa_info *p = &ue->state;
 	struct xfrm_mark m;
-	u32 mark = xfrm_mark_get(attrs, &m);;
+	u32 mark = xfrm_mark_get(attrs, &m);
 
 	x = xfrm_state_lookup(net, mark, &p->id.daddr, p->id.spi, p->id.proto, p->family);
 
-- 
cgit v1.2.2


From 750e9fad8c7f44e0005ffb7195f72dd76978c2cf Mon Sep 17 00:00:00 2001
From: Nicolas Dichtel <nicolas.dichtel@6wind.com>
Date: Tue, 31 Aug 2010 05:50:43 +0000
Subject: ipv4: minor fix about RPF in help of Kconfig

Signed-off-by: Nicolas Dichtel <nicolas.dichtel@6wind.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/Kconfig | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig
index 7c3a7d191249..571f8950ed06 100644
--- a/net/ipv4/Kconfig
+++ b/net/ipv4/Kconfig
@@ -46,7 +46,7 @@ config IP_ADVANCED_ROUTER
 	  rp_filter on use:
 
 	  echo 1 > /proc/sys/net/ipv4/conf/<device>/rp_filter
-	   and
+	   or
 	  echo 1 > /proc/sys/net/ipv4/conf/all/rp_filter
 
 	  Note that some distributions enable it in startup scripts.
-- 
cgit v1.2.2


From 87f94b4e91dc042620c527f3c30c37e5127ef757 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Wed, 1 Sep 2010 18:06:39 -0700
Subject: bridge: Clear INET control block of SKBs passed into ip_fragment().

In a similar vain to commit 17762060c25590bfddd68cc1131f28ec720f405f
("bridge: Clear IPCB before possible entry into IP stack")

Any time we call into the IP stack we have to make sure the state
there is as expected by the ipv4 code.

With help from Eric Dumazet and Herbert Xu.

Reported-by: Bandan Das <bandan.das@stratus.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/bridge/br_netfilter.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/bridge/br_netfilter.c b/net/bridge/br_netfilter.c
index 5ed00bd7009f..137f23259a93 100644
--- a/net/bridge/br_netfilter.c
+++ b/net/bridge/br_netfilter.c
@@ -761,9 +761,11 @@ static int br_nf_dev_queue_xmit(struct sk_buff *skb)
 {
 	if (skb->nfct != NULL && skb->protocol == htons(ETH_P_IP) &&
 	    skb->len + nf_bridge_mtu_reduction(skb) > skb->dev->mtu &&
-	    !skb_is_gso(skb))
+	    !skb_is_gso(skb)) {
+		/* BUG: Should really parse the IP options here. */
+		memset(IPCB(skb), 0, sizeof(struct inet_skb_parm));
 		return ip_fragment(skb, br_dev_queue_push_xmit);
-	else
+	} else
 		return br_dev_queue_push_xmit(skb);
 }
 #else
-- 
cgit v1.2.2


From 3d3be4333fdf6faa080947b331a6a19bce1a4f57 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Wed, 1 Sep 2010 00:50:51 +0000
Subject: gro: fix different skb headrooms

Packets entering GRO might have different headrooms, even for a given
flow (because of implementation details in drivers, like copybreak).
We cant force drivers to deliver packets with a fixed headroom.

1) fix skb_segment()

skb_segment() makes the false assumption headrooms of fragments are same
than the head. When CHECKSUM_PARTIAL is used, this can give csum_start
errors, and crash later in skb_copy_and_csum_dev()

2) allocate a minimal skb for head of frag_list

skb_gro_receive() uses netdev_alloc_skb(headroom + skb_gro_offset(p)) to
allocate a fresh skb. This adds NET_SKB_PAD to a padding already
provided by netdevice, depending on various things, like copybreak.

Use alloc_skb() to allocate an exact padding, to reduce cache line
needs:
NET_SKB_PAD + NET_IP_ALIGN

bugzilla : https://bugzilla.kernel.org/show_bug.cgi?id=16626

Many thanks to Plamen Petrov, testing many debugging patches !
With help of Jarek Poplawski.

Reported-by: Plamen Petrov <pvp-lsts@fs.uni-ruse.bg>
Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
CC: Jarek Poplawski <jarkao2@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/skbuff.c | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 3a2513f0d0c3..26396ff67cf9 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -2573,6 +2573,10 @@ struct sk_buff *skb_segment(struct sk_buff *skb, int features)
 		__copy_skb_header(nskb, skb);
 		nskb->mac_len = skb->mac_len;
 
+		/* nskb and skb might have different headroom */
+		if (nskb->ip_summed == CHECKSUM_PARTIAL)
+			nskb->csum_start += skb_headroom(nskb) - headroom;
+
 		skb_reset_mac_header(nskb);
 		skb_set_network_header(nskb, skb->mac_len);
 		nskb->transport_header = (nskb->network_header +
@@ -2702,8 +2706,8 @@ int skb_gro_receive(struct sk_buff **head, struct sk_buff *skb)
 	} else if (skb_gro_len(p) != pinfo->gso_size)
 		return -E2BIG;
 
-	headroom = skb_headroom(p);
-	nskb = netdev_alloc_skb(p->dev, headroom + skb_gro_offset(p));
+	headroom = NET_SKB_PAD + NET_IP_ALIGN;
+	nskb = alloc_skb(headroom + skb_gro_offset(p), GFP_ATOMIC);
 	if (unlikely(!nskb))
 		return -ENOMEM;
 
-- 
cgit v1.2.2


From 7bcbf81a2296a8f71342445560dcbe16100b567c Mon Sep 17 00:00:00 2001
From: Julian Anastasov <ja@ssi.bg>
Date: Wed, 1 Sep 2010 23:07:10 +0000
Subject: ipvs: avoid oops for passive FTP

Fix Passive FTP problem in ip_vs_ftp:

- Do not oops in nf_nat_set_seq_adjust (adjust_tcp_sequence) when
  iptable_nat module is not loaded

Signed-off-by: Julian Anastasov <ja@ssi.bg>
Signed-off-by: Simon Horman <horms@verge.net.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/netfilter/ipvs/ip_vs_ftp.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/netfilter/ipvs/ip_vs_ftp.c b/net/netfilter/ipvs/ip_vs_ftp.c
index f228a17ec649..33b329bfc2d2 100644
--- a/net/netfilter/ipvs/ip_vs_ftp.c
+++ b/net/netfilter/ipvs/ip_vs_ftp.c
@@ -45,6 +45,7 @@
 #include <linux/netfilter.h>
 #include <net/netfilter/nf_conntrack.h>
 #include <net/netfilter/nf_conntrack_expect.h>
+#include <net/netfilter/nf_nat.h>
 #include <net/netfilter/nf_nat_helper.h>
 #include <linux/gfp.h>
 #include <net/protocol.h>
@@ -359,7 +360,7 @@ static int ip_vs_ftp_out(struct ip_vs_app *app, struct ip_vs_conn *cp,
 		buf_len = strlen(buf);
 
 		ct = nf_ct_get(skb, &ctinfo);
-		if (ct && !nf_ct_is_untracked(ct)) {
+		if (ct && !nf_ct_is_untracked(ct) && nfct_nat(ct)) {
 			/* If mangling fails this function will return 0
 			 * which will cause the packet to be dropped.
 			 * Mangling can only fail under memory pressure,
-- 
cgit v1.2.2


From 0b5d404e349c0236b11466c0a4785520c0be6982 Mon Sep 17 00:00:00 2001
From: Jarek Poplawski <jarkao2@gmail.com>
Date: Thu, 2 Sep 2010 13:22:11 -0700
Subject: pkt_sched: Fix lockdep warning on est_tree_lock in gen_estimator

This patch fixes a lockdep warning:

[  516.287584] =========================================================
[  516.288386] [ INFO: possible irq lock inversion dependency detected ]
[  516.288386] 2.6.35b #7
[  516.288386] ---------------------------------------------------------
[  516.288386] swapper/0 just changed the state of lock:
[  516.288386]  (&qdisc_tx_lock){+.-...}, at: [<c12eacda>] est_timer+0x62/0x1b4
[  516.288386] but this lock took another, SOFTIRQ-unsafe lock in the past:
[  516.288386]  (est_tree_lock){+.+...}
[  516.288386]
[  516.288386] and interrupts could create inverse lock ordering between them.
...

So, est_tree_lock needs BH protection because it's taken by
qdisc_tx_lock, which is used both in BH and process contexts.
(Full warning with this patch at netdev, 02 Sep 2010.)

Fixes commit: ae638c47dc040b8def16d05dc6acdd527628f231
("pkt_sched: gen_estimator: add a new lock")

Signed-off-by: Jarek Poplawski <jarkao2@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/gen_estimator.c | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

(limited to 'net')

diff --git a/net/core/gen_estimator.c b/net/core/gen_estimator.c
index 9fbe7f7429b0..6743146e4d6b 100644
--- a/net/core/gen_estimator.c
+++ b/net/core/gen_estimator.c
@@ -232,7 +232,7 @@ int gen_new_estimator(struct gnet_stats_basic_packed *bstats,
 	est->last_packets = bstats->packets;
 	est->avpps = rate_est->pps<<10;
 
-	spin_lock(&est_tree_lock);
+	spin_lock_bh(&est_tree_lock);
 	if (!elist[idx].timer.function) {
 		INIT_LIST_HEAD(&elist[idx].list);
 		setup_timer(&elist[idx].timer, est_timer, idx);
@@ -243,7 +243,7 @@ int gen_new_estimator(struct gnet_stats_basic_packed *bstats,
 
 	list_add_rcu(&est->list, &elist[idx].list);
 	gen_add_node(est);
-	spin_unlock(&est_tree_lock);
+	spin_unlock_bh(&est_tree_lock);
 
 	return 0;
 }
@@ -270,7 +270,7 @@ void gen_kill_estimator(struct gnet_stats_basic_packed *bstats,
 {
 	struct gen_estimator *e;
 
-	spin_lock(&est_tree_lock);
+	spin_lock_bh(&est_tree_lock);
 	while ((e = gen_find_node(bstats, rate_est))) {
 		rb_erase(&e->node, &est_root);
 
@@ -281,7 +281,7 @@ void gen_kill_estimator(struct gnet_stats_basic_packed *bstats,
 		list_del_rcu(&e->list);
 		call_rcu(&e->e_rcu, __gen_kill_estimator);
 	}
-	spin_unlock(&est_tree_lock);
+	spin_unlock_bh(&est_tree_lock);
 }
 EXPORT_SYMBOL(gen_kill_estimator);
 
@@ -320,9 +320,9 @@ bool gen_estimator_active(const struct gnet_stats_basic_packed *bstats,
 
 	ASSERT_RTNL();
 
-	spin_lock(&est_tree_lock);
+	spin_lock_bh(&est_tree_lock);
 	res = gen_find_node(bstats, rate_est) != NULL;
-	spin_unlock(&est_tree_lock);
+	spin_unlock_bh(&est_tree_lock);
 
 	return res;
 }
-- 
cgit v1.2.2


From deabc772f39405054a438d711f408d2d94d26d96 Mon Sep 17 00:00:00 2001
From: Helmut Schaa <helmut.schaa@googlemail.com>
Date: Fri, 3 Sep 2010 02:39:56 +0000
Subject: net: fix tx queue selection for bridged devices implementing
 select_queue

When a net device is implementing the select_queue callback and is part of
a bridge, frames coming from the bridge already have a tx queue associated
to the socket (introduced in commit a4ee3ce3293dc931fab19beb472a8bde1295aebe,
"net: Use sk_tx_queue_mapping for connected sockets"). The call to
sk_tx_queue_get will then return the tx queue used by the bridge instead
of calling the select_queue callback.

In case of mac80211 this broke QoS which is implemented by using the
select_queue callback. Furthermore it introduced problems with rt2x00
because frames with the same TID and RA sometimes appeared on different
tx queues which the hw cannot handle correctly.

Fix this by always calling select_queue first if it is available and only
afterwards use the socket tx queue mapping.

Signed-off-by: Helmut Schaa <helmut.schaa@googlemail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/dev.c | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

(limited to 'net')

diff --git a/net/core/dev.c b/net/core/dev.c
index 3721fbb9a83c..b9b22a3c4c8f 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -2058,16 +2058,16 @@ static struct netdev_queue *dev_pick_tx(struct net_device *dev,
 					struct sk_buff *skb)
 {
 	int queue_index;
-	struct sock *sk = skb->sk;
+	const struct net_device_ops *ops = dev->netdev_ops;
 
-	queue_index = sk_tx_queue_get(sk);
-	if (queue_index < 0) {
-		const struct net_device_ops *ops = dev->netdev_ops;
+	if (ops->ndo_select_queue) {
+		queue_index = ops->ndo_select_queue(dev, skb);
+		queue_index = dev_cap_txqueue(dev, queue_index);
+	} else {
+		struct sock *sk = skb->sk;
+		queue_index = sk_tx_queue_get(sk);
+		if (queue_index < 0) {
 
-		if (ops->ndo_select_queue) {
-			queue_index = ops->ndo_select_queue(dev, skb);
-			queue_index = dev_cap_txqueue(dev, queue_index);
-		} else {
 			queue_index = 0;
 			if (dev->real_num_tx_queues > 1)
 				queue_index = skb_tx_hash(dev, skb);
-- 
cgit v1.2.2


From 70789d7052239992824628db8133de08dc78e593 Mon Sep 17 00:00:00 2001
From: Nicolas Dichtel <nicolas.dichtel@6wind.com>
Date: Fri, 3 Sep 2010 05:13:05 +0000
Subject: ipv6: discard overlapping fragment

RFC5722 prohibits reassembling fragments when some data overlaps.

Bug spotted by Zhang Zuotao <zuotao.zhang@6wind.com>.

Signed-off-by: Nicolas Dichtel <nicolas.dichtel@6wind.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/reassembly.c | 71 +++++++++++----------------------------------------
 1 file changed, 15 insertions(+), 56 deletions(-)

(limited to 'net')

diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c
index 545c4141b755..64cfef1b0a4c 100644
--- a/net/ipv6/reassembly.c
+++ b/net/ipv6/reassembly.c
@@ -149,13 +149,6 @@ int ip6_frag_match(struct inet_frag_queue *q, void *a)
 }
 EXPORT_SYMBOL(ip6_frag_match);
 
-/* Memory Tracking Functions. */
-static void frag_kfree_skb(struct netns_frags *nf, struct sk_buff *skb)
-{
-	atomic_sub(skb->truesize, &nf->mem);
-	kfree_skb(skb);
-}
-
 void ip6_frag_init(struct inet_frag_queue *q, void *a)
 {
 	struct frag_queue *fq = container_of(q, struct frag_queue, q);
@@ -346,58 +339,22 @@ static int ip6_frag_queue(struct frag_queue *fq, struct sk_buff *skb,
 	}
 
 found:
-	/* We found where to put this one.  Check for overlap with
-	 * preceding fragment, and, if needed, align things so that
-	 * any overlaps are eliminated.
+	/* RFC5722, Section 4:
+	 *                                  When reassembling an IPv6 datagram, if
+	 *   one or more its constituent fragments is determined to be an
+	 *   overlapping fragment, the entire datagram (and any constituent
+	 *   fragments, including those not yet received) MUST be silently
+	 *   discarded.
 	 */
-	if (prev) {
-		int i = (FRAG6_CB(prev)->offset + prev->len) - offset;
 
-		if (i > 0) {
-			offset += i;
-			if (end <= offset)
-				goto err;
-			if (!pskb_pull(skb, i))
-				goto err;
-			if (skb->ip_summed != CHECKSUM_UNNECESSARY)
-				skb->ip_summed = CHECKSUM_NONE;
-		}
-	}
+	/* Check for overlap with preceding fragment. */
+	if (prev &&
+	    (FRAG6_CB(prev)->offset + prev->len) - offset > 0)
+		goto discard_fq;
 
-	/* Look for overlap with succeeding segments.
-	 * If we can merge fragments, do it.
-	 */
-	while (next && FRAG6_CB(next)->offset < end) {
-		int i = end - FRAG6_CB(next)->offset; /* overlap is 'i' bytes */
-
-		if (i < next->len) {
-			/* Eat head of the next overlapped fragment
-			 * and leave the loop. The next ones cannot overlap.
-			 */
-			if (!pskb_pull(next, i))
-				goto err;
-			FRAG6_CB(next)->offset += i;	/* next fragment */
-			fq->q.meat -= i;
-			if (next->ip_summed != CHECKSUM_UNNECESSARY)
-				next->ip_summed = CHECKSUM_NONE;
-			break;
-		} else {
-			struct sk_buff *free_it = next;
-
-			/* Old fragment is completely overridden with
-			 * new one drop it.
-			 */
-			next = next->next;
-
-			if (prev)
-				prev->next = next;
-			else
-				fq->q.fragments = next;
-
-			fq->q.meat -= free_it->len;
-			frag_kfree_skb(fq->q.net, free_it);
-		}
-	}
+	/* Look for overlap with succeeding segment. */
+	if (next && FRAG6_CB(next)->offset < end)
+		goto discard_fq;
 
 	FRAG6_CB(skb)->offset = offset;
 
@@ -436,6 +393,8 @@ found:
 	write_unlock(&ip6_frags.lock);
 	return -1;
 
+discard_fq:
+	fq_kill(fq);
 err:
 	IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
 		      IPSTATS_MIB_REASMFAILS);
-- 
cgit v1.2.2


From 1ee89bd0fe72e381c8e4ef887d53c19c8adc6c93 Mon Sep 17 00:00:00 2001
From: Nicolas Dichtel <nicolas.dichtel@6wind.com>
Date: Fri, 3 Sep 2010 05:13:07 +0000
Subject: netfilter: discard overlapping IPv6 fragment

RFC5722 prohibits reassembling IPv6 fragments when some data overlaps.

Bug spotted by Zhang Zuotao <zuotao.zhang@6wind.com>.

Signed-off-by: Nicolas Dichtel <nicolas.dichtel@6wind.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/netfilter/nf_conntrack_reasm.c | 80 +++++++--------------------------
 1 file changed, 15 insertions(+), 65 deletions(-)

(limited to 'net')

diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c
index 13ef5bc05cf5..578f3c1a16db 100644
--- a/net/ipv6/netfilter/nf_conntrack_reasm.c
+++ b/net/ipv6/netfilter/nf_conntrack_reasm.c
@@ -113,14 +113,6 @@ static void nf_skb_free(struct sk_buff *skb)
 		kfree_skb(NFCT_FRAG6_CB(skb)->orig);
 }
 
-/* Memory Tracking Functions. */
-static void frag_kfree_skb(struct sk_buff *skb)
-{
-	atomic_sub(skb->truesize, &nf_init_frags.mem);
-	nf_skb_free(skb);
-	kfree_skb(skb);
-}
-
 /* Destruction primitives. */
 
 static __inline__ void fq_put(struct nf_ct_frag6_queue *fq)
@@ -282,66 +274,22 @@ static int nf_ct_frag6_queue(struct nf_ct_frag6_queue *fq, struct sk_buff *skb,
 	}
 
 found:
-	/* We found where to put this one.  Check for overlap with
-	 * preceding fragment, and, if needed, align things so that
-	 * any overlaps are eliminated.
-	 */
-	if (prev) {
-		int i = (NFCT_FRAG6_CB(prev)->offset + prev->len) - offset;
-
-		if (i > 0) {
-			offset += i;
-			if (end <= offset) {
-				pr_debug("overlap\n");
-				goto err;
-			}
-			if (!pskb_pull(skb, i)) {
-				pr_debug("Can't pull\n");
-				goto err;
-			}
-			if (skb->ip_summed != CHECKSUM_UNNECESSARY)
-				skb->ip_summed = CHECKSUM_NONE;
-		}
-	}
-
-	/* Look for overlap with succeeding segments.
-	 * If we can merge fragments, do it.
+	/* RFC5722, Section 4:
+	 *                                  When reassembling an IPv6 datagram, if
+	 *   one or more its constituent fragments is determined to be an
+	 *   overlapping fragment, the entire datagram (and any constituent
+	 *   fragments, including those not yet received) MUST be silently
+	 *   discarded.
 	 */
-	while (next && NFCT_FRAG6_CB(next)->offset < end) {
-		/* overlap is 'i' bytes */
-		int i = end - NFCT_FRAG6_CB(next)->offset;
-
-		if (i < next->len) {
-			/* Eat head of the next overlapped fragment
-			 * and leave the loop. The next ones cannot overlap.
-			 */
-			pr_debug("Eat head of the overlapped parts.: %d", i);
-			if (!pskb_pull(next, i))
-				goto err;
 
-			/* next fragment */
-			NFCT_FRAG6_CB(next)->offset += i;
-			fq->q.meat -= i;
-			if (next->ip_summed != CHECKSUM_UNNECESSARY)
-				next->ip_summed = CHECKSUM_NONE;
-			break;
-		} else {
-			struct sk_buff *free_it = next;
-
-			/* Old fragmnet is completely overridden with
-			 * new one drop it.
-			 */
-			next = next->next;
+	/* Check for overlap with preceding fragment. */
+	if (prev &&
+	    (NFCT_FRAG6_CB(prev)->offset + prev->len) - offset > 0)
+		goto discard_fq;
 
-			if (prev)
-				prev->next = next;
-			else
-				fq->q.fragments = next;
-
-			fq->q.meat -= free_it->len;
-			frag_kfree_skb(free_it);
-		}
-	}
+	/* Look for overlap with succeeding segment. */
+	if (next && NFCT_FRAG6_CB(next)->offset < end)
+		goto discard_fq;
 
 	NFCT_FRAG6_CB(skb)->offset = offset;
 
@@ -371,6 +319,8 @@ found:
 	write_unlock(&nf_frags.lock);
 	return 0;
 
+discard_fq:
+	fq_kill(fq);
 err:
 	return -1;
 }
-- 
cgit v1.2.2


From cf9b94f88bdbe8a02015fc30d7c232b2d262d4ad Mon Sep 17 00:00:00 2001
From: Dan Carpenter <error27@gmail.com>
Date: Sat, 4 Sep 2010 03:14:35 +0000
Subject: irda: off by one

This is an off by one.  We would go past the end when we NUL terminate
the "value" string at end of the function.  The "value" buffer is
allocated in irlan_client_parse_response() or
irlan_provider_parse_command().

CC: stable@kernel.org
Signed-off-by: Dan Carpenter <error27@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/irda/irlan/irlan_common.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/irda/irlan/irlan_common.c b/net/irda/irlan/irlan_common.c
index a788f9e9427d..6130f9d9dbe1 100644
--- a/net/irda/irlan/irlan_common.c
+++ b/net/irda/irlan/irlan_common.c
@@ -1102,7 +1102,7 @@ int irlan_extract_param(__u8 *buf, char *name, char *value, __u16 *len)
 	memcpy(&val_len, buf+n, 2); /* To avoid alignment problems */
 	le16_to_cpus(&val_len); n+=2;
 
-	if (val_len > 1016) {
+	if (val_len >= 1016) {
 		IRDA_DEBUG(2, "%s(), parameter length to long\n", __func__ );
 		return -RSP_INVALID_COMMAND_FORMAT;
 	}
-- 
cgit v1.2.2


From 8df73ff90f00f14d2c7ff7156f7ef153f7e9d3b7 Mon Sep 17 00:00:00 2001
From: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
Date: Sat, 4 Sep 2010 01:34:28 +0000
Subject: UNIX: Do not loop forever at unix_autobind().

We assumed that unix_autobind() never fails if kzalloc() succeeded.
But unix_autobind() allows only 1048576 names. If /proc/sys/fs/file-max is
larger than 1048576 (e.g. systems with more than 10GB of RAM), a local user can
consume all names using fork()/socket()/bind().

If all names are in use, those who call bind() with addr_len == sizeof(short)
or connect()/sendmsg() with setsockopt(SO_PASSCRED) will continue

  while (1)
        yield();

loop at unix_autobind() till a name becomes available.
This patch adds a loop counter in order to give up after 1048576 attempts.

Calling yield() for once per 256 attempts may not be sufficient when many names
are already in use, for __unix_find_socket_byname() can take long time under
such circumstance. Therefore, this patch also adds cond_resched() call.

Note that currently a local user can consume 2GB of kernel memory if the user
is allowed to create and autobind 1048576 UNIX domain sockets. We should
consider adding some restriction for autobind operation.

Signed-off-by: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/unix/af_unix.c | 15 ++++++++++++---
 1 file changed, 12 insertions(+), 3 deletions(-)

(limited to 'net')

diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index 4414a18c63b4..0b39b2451ea5 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -692,6 +692,7 @@ static int unix_autobind(struct socket *sock)
 	static u32 ordernum = 1;
 	struct unix_address *addr;
 	int err;
+	unsigned int retries = 0;
 
 	mutex_lock(&u->readlock);
 
@@ -717,9 +718,17 @@ retry:
 	if (__unix_find_socket_byname(net, addr->name, addr->len, sock->type,
 				      addr->hash)) {
 		spin_unlock(&unix_table_lock);
-		/* Sanity yield. It is unusual case, but yet... */
-		if (!(ordernum&0xFF))
-			yield();
+		/*
+		 * __unix_find_socket_byname() may take long time if many names
+		 * are already in use.
+		 */
+		cond_resched();
+		/* Give up if all names seems to be in use. */
+		if (retries++ == 0xFFFFF) {
+			err = -ENOSPC;
+			kfree(addr);
+			goto out;
+		}
 		goto retry;
 	}
 	addr->hash ^= sk->sk_type;
-- 
cgit v1.2.2


From 6f86b325189e0a53c97bf86cff0c8b02ff624934 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Mon, 6 Sep 2010 22:36:19 -0700
Subject: ipv4: Fix reverse path filtering with multipath routing.

Actually iterate over the next-hops to make sure we have
a device match.  Otherwise RP filtering is always elided
when the route matched has multiple next-hops.

Reported-by: Igor M Podlesny <for.poige@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/fib_frontend.c | 15 +++++++++++++--
 1 file changed, 13 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index a43968918350..7d02a9f999fa 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -246,6 +246,7 @@ int fib_validate_source(__be32 src, __be32 dst, u8 tos, int oif,
 
 	struct fib_result res;
 	int no_addr, rpf, accept_local;
+	bool dev_match;
 	int ret;
 	struct net *net;
 
@@ -273,12 +274,22 @@ int fib_validate_source(__be32 src, __be32 dst, u8 tos, int oif,
 	}
 	*spec_dst = FIB_RES_PREFSRC(res);
 	fib_combine_itag(itag, &res);
+	dev_match = false;
+
 #ifdef CONFIG_IP_ROUTE_MULTIPATH
-	if (FIB_RES_DEV(res) == dev || res.fi->fib_nhs > 1)
+	for (ret = 0; ret < res.fi->fib_nhs; ret++) {
+		struct fib_nh *nh = &res.fi->fib_nh[ret];
+
+		if (nh->nh_dev == dev) {
+			dev_match = true;
+			break;
+		}
+	}
 #else
 	if (FIB_RES_DEV(res) == dev)
+		dev_match = true;
 #endif
-	{
+	if (dev_match) {
 		ret = FIB_RES_NH(res).nh_scope >= RT_SCOPE_HOST;
 		fib_res_put(&res);
 		return ret;
-- 
cgit v1.2.2


From 64289c8e6851bca0e589e064c9a5c9fbd6ae5dd4 Mon Sep 17 00:00:00 2001
From: Jarek Poplawski <jarkao2@gmail.com>
Date: Sat, 4 Sep 2010 10:34:29 +0000
Subject: gro: Re-fix different skb headrooms

The patch: "gro: fix different skb headrooms" in its part:
"2) allocate a minimal skb for head of frag_list" is buggy. The copied
skb has p->data set at the ip header at the moment, and skb_gro_offset
is the length of ip + tcp headers. So, after the change the length of
mac header is skipped. Later skb_set_mac_header() sets it into the
NET_SKB_PAD area (if it's long enough) and ip header is misaligned at
NET_SKB_PAD + NET_IP_ALIGN offset. There is no reason to assume the
original skb was wrongly allocated, so let's copy it as it was.

bugzilla : https://bugzilla.kernel.org/show_bug.cgi?id=16626
fixes commit: 3d3be4333fdf6faa080947b331a6a19bce1a4f57

Reported-by: Plamen Petrov <pvp-lsts@fs.uni-ruse.bg>
Signed-off-by: Jarek Poplawski <jarkao2@gmail.com>
CC: Eric Dumazet <eric.dumazet@gmail.com>
Acked-by: Eric Dumazet <eric.dumazet@gmail.com>
Tested-by: Plamen Petrov <pvp-lsts@fs.uni-ruse.bg>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/skbuff.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 26396ff67cf9..c83b421341c0 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -2706,7 +2706,7 @@ int skb_gro_receive(struct sk_buff **head, struct sk_buff *skb)
 	} else if (skb_gro_len(p) != pinfo->gso_size)
 		return -E2BIG;
 
-	headroom = NET_SKB_PAD + NET_IP_ALIGN;
+	headroom = skb_headroom(p);
 	nskb = alloc_skb(headroom + skb_gro_offset(p), GFP_ATOMIC);
 	if (unlikely(!nskb))
 		return -ENOMEM;
-- 
cgit v1.2.2


From 6523ce1525e88c598c75a1a6b8c4edddfa9defe8 Mon Sep 17 00:00:00 2001
From: Julian Anastasov <ja@ssi.bg>
Date: Sun, 5 Sep 2010 18:02:29 +0000
Subject: ipvs: fix active FTP

- Do not create expectation when forwarding the PORT
  command to avoid blocking the connection. The problem is that
  nf_conntrack_ftp.c:help() tries to create the same expectation later in
  POST_ROUTING and drops the packet with "dropping packet" message after
  failure in nf_ct_expect_related.

- Change ip_vs_update_conntrack to alter the conntrack
  for related connections from real server. If we do not alter the reply in
  this direction the next packet from client sent to vport 20 comes as NEW
  connection. We alter it but may be some collision happens for both
  conntracks and the second conntrack gets destroyed immediately. The
  connection stucks too.

Signed-off-by: Julian Anastasov <ja@ssi.bg>
Signed-off-by: Simon Horman <horms@verge.net.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/netfilter/ipvs/ip_vs_core.c |  1 +
 net/netfilter/ipvs/ip_vs_ftp.c  |  6 ------
 net/netfilter/ipvs/ip_vs_xmit.c | 18 ++++++++++++------
 3 files changed, 13 insertions(+), 12 deletions(-)

(limited to 'net')

diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c
index 4f8ddba48011..4c2f89df5cce 100644
--- a/net/netfilter/ipvs/ip_vs_core.c
+++ b/net/netfilter/ipvs/ip_vs_core.c
@@ -924,6 +924,7 @@ handle_response(int af, struct sk_buff *skb, struct ip_vs_protocol *pp,
 
 	ip_vs_out_stats(cp, skb);
 	ip_vs_set_state(cp, IP_VS_DIR_OUTPUT, skb, pp);
+	ip_vs_update_conntrack(skb, cp, 0);
 	ip_vs_conn_put(cp);
 
 	skb->ipvs_property = 1;
diff --git a/net/netfilter/ipvs/ip_vs_ftp.c b/net/netfilter/ipvs/ip_vs_ftp.c
index 33b329bfc2d2..7e9af5b76d9e 100644
--- a/net/netfilter/ipvs/ip_vs_ftp.c
+++ b/net/netfilter/ipvs/ip_vs_ftp.c
@@ -410,7 +410,6 @@ static int ip_vs_ftp_in(struct ip_vs_app *app, struct ip_vs_conn *cp,
 	union nf_inet_addr to;
 	__be16 port;
 	struct ip_vs_conn *n_cp;
-	struct nf_conn *ct;
 
 #ifdef CONFIG_IP_VS_IPV6
 	/* This application helper doesn't work with IPv6 yet,
@@ -497,11 +496,6 @@ static int ip_vs_ftp_in(struct ip_vs_app *app, struct ip_vs_conn *cp,
 		ip_vs_control_add(n_cp, cp);
 	}
 
-	ct = (struct nf_conn *)skb->nfct;
-	if (ct && ct != &nf_conntrack_untracked)
-		ip_vs_expect_related(skb, ct, n_cp,
-				     IPPROTO_TCP, &n_cp->dport, 1);
-
 	/*
 	 *	Move tunnel to listen state
 	 */
diff --git a/net/netfilter/ipvs/ip_vs_xmit.c b/net/netfilter/ipvs/ip_vs_xmit.c
index 21e1a5e9b9d3..49df6bea6a2d 100644
--- a/net/netfilter/ipvs/ip_vs_xmit.c
+++ b/net/netfilter/ipvs/ip_vs_xmit.c
@@ -349,8 +349,8 @@ ip_vs_bypass_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
 }
 #endif
 
-static void
-ip_vs_update_conntrack(struct sk_buff *skb, struct ip_vs_conn *cp)
+void
+ip_vs_update_conntrack(struct sk_buff *skb, struct ip_vs_conn *cp, int outin)
 {
 	struct nf_conn *ct = (struct nf_conn *)skb->nfct;
 	struct nf_conntrack_tuple new_tuple;
@@ -365,11 +365,17 @@ ip_vs_update_conntrack(struct sk_buff *skb, struct ip_vs_conn *cp)
 	 * real-server we will see RIP->DIP.
 	 */
 	new_tuple = ct->tuplehash[IP_CT_DIR_REPLY].tuple;
-	new_tuple.src.u3 = cp->daddr;
+	if (outin)
+		new_tuple.src.u3 = cp->daddr;
+	else
+		new_tuple.dst.u3 = cp->vaddr;
 	/*
 	 * This will also take care of UDP and other protocols.
 	 */
-	new_tuple.src.u.tcp.port = cp->dport;
+	if (outin)
+		new_tuple.src.u.tcp.port = cp->dport;
+	else
+		new_tuple.dst.u.tcp.port = cp->vport;
 	nf_conntrack_alter_reply(ct, &new_tuple);
 }
 
@@ -428,7 +434,7 @@ ip_vs_nat_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
 
 	IP_VS_DBG_PKT(10, pp, skb, 0, "After DNAT");
 
-	ip_vs_update_conntrack(skb, cp);
+	ip_vs_update_conntrack(skb, cp, 1);
 
 	/* FIXME: when application helper enlarges the packet and the length
 	   is larger than the MTU of outgoing device, there will be still
@@ -506,7 +512,7 @@ ip_vs_nat_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
 
 	IP_VS_DBG_PKT(10, pp, skb, 0, "After DNAT");
 
-	ip_vs_update_conntrack(skb, cp);
+	ip_vs_update_conntrack(skb, cp, 1);
 
 	/* FIXME: when application helper enlarges the packet and the length
 	   is larger than the MTU of outgoing device, there will be still
-- 
cgit v1.2.2


From f6b085b69d1cbbd62f49f34e71a3d58cb6d34b7e Mon Sep 17 00:00:00 2001
From: Jarek Poplawski <jarkao2@gmail.com>
Date: Tue, 7 Sep 2010 07:51:17 +0000
Subject: ipv4: Suppress lockdep-RCU false positive in FIB trie (3)

Hi,
Here is one more of these warnings and a patch below:

Sep  5 23:52:33 del kernel: [46044.244833] ===================================================
Sep  5 23:52:33 del kernel: [46044.269681] [ INFO: suspicious rcu_dereference_check() usage. ]
Sep  5 23:52:33 del kernel: [46044.277000] ---------------------------------------------------
Sep  5 23:52:33 del kernel: [46044.285185] net/ipv4/fib_trie.c:1756 invoked rcu_dereference_check() without protection!
Sep  5 23:52:33 del kernel: [46044.293627]
Sep  5 23:52:33 del kernel: [46044.293632] other info that might help us debug this:
Sep  5 23:52:33 del kernel: [46044.293634]
Sep  5 23:52:33 del kernel: [46044.325333]
Sep  5 23:52:33 del kernel: [46044.325335] rcu_scheduler_active = 1, debug_locks = 0
Sep  5 23:52:33 del kernel: [46044.348013] 1 lock held by pppd/1717:
Sep  5 23:52:33 del kernel: [46044.357548]  #0:  (rtnl_mutex){+.+.+.}, at: [<c125dc1f>] rtnl_lock+0xf/0x20
Sep  5 23:52:33 del kernel: [46044.367647]
Sep  5 23:52:33 del kernel: [46044.367652] stack backtrace:
Sep  5 23:52:33 del kernel: [46044.387429] Pid: 1717, comm: pppd Not tainted 2.6.35.4.4a #3
Sep  5 23:52:33 del kernel: [46044.398764] Call Trace:
Sep  5 23:52:33 del kernel: [46044.409596]  [<c12f9aba>] ? printk+0x18/0x1e
Sep  5 23:52:33 del kernel: [46044.420761]  [<c1053969>] lockdep_rcu_dereference+0xa9/0xb0
Sep  5 23:52:33 del kernel: [46044.432229]  [<c12b7235>] trie_firstleaf+0x65/0x70
Sep  5 23:52:33 del kernel: [46044.443941]  [<c12b74d4>] fib_table_flush+0x14/0x170
Sep  5 23:52:33 del kernel: [46044.455823]  [<c1033e92>] ? local_bh_enable_ip+0x62/0xd0
Sep  5 23:52:33 del kernel: [46044.467995]  [<c12fc39f>] ? _raw_spin_unlock_bh+0x2f/0x40
Sep  5 23:52:33 del kernel: [46044.480404]  [<c12b24d0>] ? fib_sync_down_dev+0x120/0x180
Sep  5 23:52:33 del kernel: [46044.493025]  [<c12b069d>] fib_flush+0x2d/0x60
Sep  5 23:52:33 del kernel: [46044.505796]  [<c12b06f5>] fib_disable_ip+0x25/0x50
Sep  5 23:52:33 del kernel: [46044.518772]  [<c12b10d3>] fib_netdev_event+0x73/0xd0
Sep  5 23:52:33 del kernel: [46044.531918]  [<c1048dfd>] notifier_call_chain+0x2d/0x70
Sep  5 23:52:33 del kernel: [46044.545358]  [<c1048f0a>] raw_notifier_call_chain+0x1a/0x20
Sep  5 23:52:33 del kernel: [46044.559092]  [<c124f687>] call_netdevice_notifiers+0x27/0x60
Sep  5 23:52:33 del kernel: [46044.573037]  [<c124faec>] __dev_notify_flags+0x5c/0x80
Sep  5 23:52:33 del kernel: [46044.586489]  [<c124fb47>] dev_change_flags+0x37/0x60
Sep  5 23:52:33 del kernel: [46044.599394]  [<c12a8a8d>] devinet_ioctl+0x54d/0x630
Sep  5 23:52:33 del kernel: [46044.612277]  [<c12aabb7>] inet_ioctl+0x97/0xc0
Sep  5 23:52:34 del kernel: [46044.625208]  [<c123f6af>] sock_ioctl+0x6f/0x270
Sep  5 23:52:34 del kernel: [46044.638046]  [<c109d2b0>] ? handle_mm_fault+0x420/0x6c0
Sep  5 23:52:34 del kernel: [46044.650968]  [<c123f640>] ? sock_ioctl+0x0/0x270
Sep  5 23:52:34 del kernel: [46044.663865]  [<c10c3188>] vfs_ioctl+0x28/0xa0
Sep  5 23:52:34 del kernel: [46044.676556]  [<c10c38fa>] do_vfs_ioctl+0x6a/0x5c0
Sep  5 23:52:34 del kernel: [46044.688989]  [<c1048676>] ? up_read+0x16/0x30
Sep  5 23:52:34 del kernel: [46044.701411]  [<c1021376>] ? do_page_fault+0x1d6/0x3a0
Sep  5 23:52:34 del kernel: [46044.714223]  [<c10b6588>] ? fget_light+0xf8/0x2f0
Sep  5 23:52:34 del kernel: [46044.726601]  [<c1241f98>] ? sys_socketcall+0x208/0x2c0
Sep  5 23:52:34 del kernel: [46044.739140]  [<c10c3eb3>] sys_ioctl+0x63/0x70
Sep  5 23:52:34 del kernel: [46044.751967]  [<c12fca3d>] syscall_call+0x7/0xb
Sep  5 23:52:34 del kernel: [46044.764734]  [<c12f0000>] ? cookie_v6_check+0x3d0/0x630

-------------->

This patch fixes the warning:
 ===================================================
 [ INFO: suspicious rcu_dereference_check() usage. ]
 ---------------------------------------------------
 net/ipv4/fib_trie.c:1756 invoked rcu_dereference_check() without protection!

 other info that might help us debug this:

 rcu_scheduler_active = 1, debug_locks = 0
 1 lock held by pppd/1717:
  #0:  (rtnl_mutex){+.+.+.}, at: [<c125dc1f>] rtnl_lock+0xf/0x20

 stack backtrace:
 Pid: 1717, comm: pppd Not tainted 2.6.35.4a #3
 Call Trace:
  [<c12f9aba>] ? printk+0x18/0x1e
  [<c1053969>] lockdep_rcu_dereference+0xa9/0xb0
  [<c12b7235>] trie_firstleaf+0x65/0x70
  [<c12b74d4>] fib_table_flush+0x14/0x170
  ...

Allow trie_firstleaf() to be called either under rcu_read_lock()
protection or with RTNL held. The same annotation is added to
node_parent_rcu() to prevent a similar warning a bit later.

Followup of commits 634a4b20 and 4eaa0e3c.

Signed-off-by: Jarek Poplawski <jarkao2@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/fib_trie.c | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c
index 79d057a939ba..4a8e370862bc 100644
--- a/net/ipv4/fib_trie.c
+++ b/net/ipv4/fib_trie.c
@@ -186,7 +186,9 @@ static inline struct tnode *node_parent_rcu(struct node *node)
 {
 	struct tnode *ret = node_parent(node);
 
-	return rcu_dereference(ret);
+	return rcu_dereference_check(ret,
+				     rcu_read_lock_held() ||
+				     lockdep_rtnl_is_held());
 }
 
 /* Same as rcu_assign_pointer
@@ -1753,7 +1755,9 @@ static struct leaf *leaf_walk_rcu(struct tnode *p, struct node *c)
 
 static struct leaf *trie_firstleaf(struct trie *t)
 {
-	struct tnode *n = (struct tnode *) rcu_dereference(t->trie);
+	struct tnode *n = (struct tnode *) rcu_dereference_check(t->trie,
+							rcu_read_lock_held() ||
+							lockdep_rtnl_is_held());
 
 	if (!n)
 		return NULL;
-- 
cgit v1.2.2


From ae2688d59b5f861dc70a091d003773975d2ae7fb Mon Sep 17 00:00:00 2001
From: Jianzhao Wang <jianzhao.wang@6wind.com>
Date: Wed, 8 Sep 2010 14:35:43 -0700
Subject: net: blackhole route should always be recalculated

Blackhole routes are used when xfrm_lookup() returns -EREMOTE (error
triggered by IKE for example), hence this kind of route is always
temporary and so we should check if a better route exists for next
packets.
Bug has been introduced by commit d11a4dc18bf41719c9f0d7ed494d295dd2973b92.

Signed-off-by: Jianzhao Wang <jianzhao.wang@6wind.com>
Signed-off-by: Nicolas Dichtel <nicolas.dichtel@6wind.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/route.c | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 3f56b6e6c6aa..6298f75d5e93 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -2738,6 +2738,11 @@ slow_output:
 }
 EXPORT_SYMBOL_GPL(__ip_route_output_key);
 
+static struct dst_entry *ipv4_blackhole_dst_check(struct dst_entry *dst, u32 cookie)
+{
+	return NULL;
+}
+
 static void ipv4_rt_blackhole_update_pmtu(struct dst_entry *dst, u32 mtu)
 {
 }
@@ -2746,7 +2751,7 @@ static struct dst_ops ipv4_dst_blackhole_ops = {
 	.family			=	AF_INET,
 	.protocol		=	cpu_to_be16(ETH_P_IP),
 	.destroy		=	ipv4_dst_destroy,
-	.check			=	ipv4_dst_check,
+	.check			=	ipv4_blackhole_dst_check,
 	.update_pmtu		=	ipv4_rt_blackhole_update_pmtu,
 	.entries		=	ATOMIC_INIT(0),
 };
-- 
cgit v1.2.2


From 719f835853a92f6090258114a72ffe41f09155cd Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Wed, 8 Sep 2010 05:08:44 +0000
Subject: udp: add rehash on connect()

commit 30fff923 introduced in linux-2.6.33 (udp: bind() optimisation)
added a secondary hash on UDP, hashed on (local addr, local port).

Problem is that following sequence :

fd = socket(...)
connect(fd, &remote, ...)

not only selects remote end point (address and port), but also sets
local address, while UDP stack stored in secondary hash table the socket
while its local address was INADDR_ANY (or ipv6 equivalent)

Sequence is :
 - autobind() : choose a random local port, insert socket in hash tables
              [while local address is INADDR_ANY]
 - connect() : set remote address and port, change local address to IP
              given by a route lookup.

When an incoming UDP frame comes, if more than 10 sockets are found in
primary hash table, we switch to secondary table, and fail to find
socket because its local address changed.

One solution to this problem is to rehash datagram socket if needed.

We add a new rehash(struct socket *) method in "struct proto", and
implement this method for UDP v4 & v6, using a common helper.

This rehashing only takes care of secondary hash table, since primary
hash (based on local port only) is not changed.

Reported-by: Krzysztof Piotr Oledzki <ole@ans.pl>
Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Tested-by: Krzysztof Piotr Oledzki <ole@ans.pl>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/datagram.c |  5 ++++-
 net/ipv4/udp.c      | 44 ++++++++++++++++++++++++++++++++++++++++++++
 net/ipv6/datagram.c |  7 ++++++-
 net/ipv6/udp.c      | 10 ++++++++++
 4 files changed, 64 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/datagram.c b/net/ipv4/datagram.c
index f0550941df7b..721a8a37b45c 100644
--- a/net/ipv4/datagram.c
+++ b/net/ipv4/datagram.c
@@ -62,8 +62,11 @@ int ip4_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
 	}
 	if (!inet->inet_saddr)
 		inet->inet_saddr = rt->rt_src;	/* Update source address */
-	if (!inet->inet_rcv_saddr)
+	if (!inet->inet_rcv_saddr) {
 		inet->inet_rcv_saddr = rt->rt_src;
+		if (sk->sk_prot->rehash)
+			sk->sk_prot->rehash(sk);
+	}
 	inet->inet_daddr = rt->rt_dst;
 	inet->inet_dport = usin->sin_port;
 	sk->sk_state = TCP_ESTABLISHED;
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 32e0bef60d0a..fb23c2e63b52 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -1260,6 +1260,49 @@ void udp_lib_unhash(struct sock *sk)
 }
 EXPORT_SYMBOL(udp_lib_unhash);
 
+/*
+ * inet_rcv_saddr was changed, we must rehash secondary hash
+ */
+void udp_lib_rehash(struct sock *sk, u16 newhash)
+{
+	if (sk_hashed(sk)) {
+		struct udp_table *udptable = sk->sk_prot->h.udp_table;
+		struct udp_hslot *hslot, *hslot2, *nhslot2;
+
+		hslot2 = udp_hashslot2(udptable, udp_sk(sk)->udp_portaddr_hash);
+		nhslot2 = udp_hashslot2(udptable, newhash);
+		udp_sk(sk)->udp_portaddr_hash = newhash;
+		if (hslot2 != nhslot2) {
+			hslot = udp_hashslot(udptable, sock_net(sk),
+					     udp_sk(sk)->udp_port_hash);
+			/* we must lock primary chain too */
+			spin_lock_bh(&hslot->lock);
+
+			spin_lock(&hslot2->lock);
+			hlist_nulls_del_init_rcu(&udp_sk(sk)->udp_portaddr_node);
+			hslot2->count--;
+			spin_unlock(&hslot2->lock);
+
+			spin_lock(&nhslot2->lock);
+			hlist_nulls_add_head_rcu(&udp_sk(sk)->udp_portaddr_node,
+						 &nhslot2->head);
+			nhslot2->count++;
+			spin_unlock(&nhslot2->lock);
+
+			spin_unlock_bh(&hslot->lock);
+		}
+	}
+}
+EXPORT_SYMBOL(udp_lib_rehash);
+
+static void udp_v4_rehash(struct sock *sk)
+{
+	u16 new_hash = udp4_portaddr_hash(sock_net(sk),
+					  inet_sk(sk)->inet_rcv_saddr,
+					  inet_sk(sk)->inet_num);
+	udp_lib_rehash(sk, new_hash);
+}
+
 static int __udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
 {
 	int rc;
@@ -1843,6 +1886,7 @@ struct proto udp_prot = {
 	.backlog_rcv	   = __udp_queue_rcv_skb,
 	.hash		   = udp_lib_hash,
 	.unhash		   = udp_lib_unhash,
+	.rehash		   = udp_v4_rehash,
 	.get_port	   = udp_v4_get_port,
 	.memory_allocated  = &udp_memory_allocated,
 	.sysctl_mem	   = sysctl_udp_mem,
diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c
index 7d929a22cbc2..ef371aa01ac5 100644
--- a/net/ipv6/datagram.c
+++ b/net/ipv6/datagram.c
@@ -105,9 +105,12 @@ ipv4_connected:
 		if (ipv6_addr_any(&np->saddr))
 			ipv6_addr_set_v4mapped(inet->inet_saddr, &np->saddr);
 
-		if (ipv6_addr_any(&np->rcv_saddr))
+		if (ipv6_addr_any(&np->rcv_saddr)) {
 			ipv6_addr_set_v4mapped(inet->inet_rcv_saddr,
 					       &np->rcv_saddr);
+			if (sk->sk_prot->rehash)
+				sk->sk_prot->rehash(sk);
+		}
 
 		goto out;
 	}
@@ -181,6 +184,8 @@ ipv4_connected:
 	if (ipv6_addr_any(&np->rcv_saddr)) {
 		ipv6_addr_copy(&np->rcv_saddr, &fl.fl6_src);
 		inet->inet_rcv_saddr = LOOPBACK4_IPV6;
+		if (sk->sk_prot->rehash)
+			sk->sk_prot->rehash(sk);
 	}
 
 	ip6_dst_store(sk, dst,
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index 1dd1affdead2..5acb3560ff15 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -111,6 +111,15 @@ int udp_v6_get_port(struct sock *sk, unsigned short snum)
 	return udp_lib_get_port(sk, snum, ipv6_rcv_saddr_equal, hash2_nulladdr);
 }
 
+static void udp_v6_rehash(struct sock *sk)
+{
+	u16 new_hash = udp6_portaddr_hash(sock_net(sk),
+					  &inet6_sk(sk)->rcv_saddr,
+					  inet_sk(sk)->inet_num);
+
+	udp_lib_rehash(sk, new_hash);
+}
+
 static inline int compute_score(struct sock *sk, struct net *net,
 				unsigned short hnum,
 				struct in6_addr *saddr, __be16 sport,
@@ -1447,6 +1456,7 @@ struct proto udpv6_prot = {
 	.backlog_rcv	   = udpv6_queue_rcv_skb,
 	.hash		   = udp_lib_hash,
 	.unhash		   = udp_lib_unhash,
+	.rehash		   = udp_v6_rehash,
 	.get_port	   = udp_v6_get_port,
 	.memory_allocated  = &udp_memory_allocated,
 	.sysctl_mem	   = sysctl_udp_mem,
-- 
cgit v1.2.2


From 123031c0eeda6144b4002dc3285375aa9ae9dc11 Mon Sep 17 00:00:00 2001
From: Joe Perches <joe@perches.com>
Date: Wed, 8 Sep 2010 11:04:21 +0000
Subject: sctp: fix test for end of loop

Add a list_has_sctp_addr function to simplify loop

Based on a patches by Dan Carpenter and David Miller

Signed-off-by: Joe Perches <joe@perches.com>
Acked-by: Vlad Yasevich <vladislav.yasevich@hp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sctp/sm_statefuns.c | 46 +++++++++++++++++++++++-----------------------
 1 file changed, 23 insertions(+), 23 deletions(-)

(limited to 'net')

diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c
index 24b2cd555637..d344dc481ccc 100644
--- a/net/sctp/sm_statefuns.c
+++ b/net/sctp/sm_statefuns.c
@@ -1232,6 +1232,18 @@ out:
 	return 0;
 }
 
+static bool list_has_sctp_addr(const struct list_head *list,
+			       union sctp_addr *ipaddr)
+{
+	struct sctp_transport *addr;
+
+	list_for_each_entry(addr, list, transports) {
+		if (sctp_cmp_addr_exact(ipaddr, &addr->ipaddr))
+			return true;
+	}
+
+	return false;
+}
 /* A restart is occurring, check to make sure no new addresses
  * are being added as we may be under a takeover attack.
  */
@@ -1240,10 +1252,10 @@ static int sctp_sf_check_restart_addrs(const struct sctp_association *new_asoc,
 				       struct sctp_chunk *init,
 				       sctp_cmd_seq_t *commands)
 {
-	struct sctp_transport *new_addr, *addr;
-	int found;
+	struct sctp_transport *new_addr;
+	int ret = 1;
 
-	/* Implementor's Guide - Sectin 5.2.2
+	/* Implementor's Guide - Section 5.2.2
 	 * ...
 	 * Before responding the endpoint MUST check to see if the
 	 * unexpected INIT adds new addresses to the association. If new
@@ -1254,31 +1266,19 @@ static int sctp_sf_check_restart_addrs(const struct sctp_association *new_asoc,
 	/* Search through all current addresses and make sure
 	 * we aren't adding any new ones.
 	 */
-	new_addr = NULL;
-	found = 0;
-
 	list_for_each_entry(new_addr, &new_asoc->peer.transport_addr_list,
-			transports) {
-		found = 0;
-		list_for_each_entry(addr, &asoc->peer.transport_addr_list,
-				transports) {
-			if (sctp_cmp_addr_exact(&new_addr->ipaddr,
-						&addr->ipaddr)) {
-				found = 1;
-				break;
-			}
-		}
-		if (!found)
+			    transports) {
+		if (!list_has_sctp_addr(&asoc->peer.transport_addr_list,
+					&new_addr->ipaddr)) {
+			sctp_sf_send_restart_abort(&new_addr->ipaddr, init,
+						   commands);
+			ret = 0;
 			break;
-	}
-
-	/* If a new address was added, ABORT the sender. */
-	if (!found && new_addr) {
-		sctp_sf_send_restart_abort(&new_addr->ipaddr, init, commands);
+		}
 	}
 
 	/* Return success if all addresses were found. */
-	return found;
+	return ret;
 }
 
 /* Populate the verification/tie tags based on overlapping INIT
-- 
cgit v1.2.2


From a505b3b30fc69904f858822a2aa95990a4bf7958 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Sun, 12 Sep 2010 11:56:44 -0700
Subject: sch_atm: Fix potential NULL deref.

The list_head conversion unearther an unnecessary flow
check.  Since flow is always NULL here we don't need to
see if a matching flow exists already.

Reported-by: Jiri Slaby <jirislaby@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/sch_atm.c | 4 ----
 1 file changed, 4 deletions(-)

(limited to 'net')

diff --git a/net/sched/sch_atm.c b/net/sched/sch_atm.c
index 340662789529..6318e1136b83 100644
--- a/net/sched/sch_atm.c
+++ b/net/sched/sch_atm.c
@@ -255,10 +255,6 @@ static int atm_tc_change(struct Qdisc *sch, u32 classid, u32 parent,
 			error = -EINVAL;
 			goto err_out;
 		}
-		if (!list_empty(&flow->list)) {
-			error = -EEXIST;
-			goto err_out;
-		}
 	} else {
 		int i;
 		unsigned long cl;
-- 
cgit v1.2.2


From f2d47d02fd84343a3c5452daca6ed12c75618aff Mon Sep 17 00:00:00 2001
From: "J. Bruce Fields" <bfields@redhat.com>
Date: Sun, 12 Sep 2010 19:55:25 -0400
Subject: Fix null dereference in call_allocate

In call_allocate we need to reach the auth in order to factor au_cslack
into the allocation.

As of a17c2153d2e271b0cbacae9bed83b0eaa41db7e1 "SUNRPC: Move the bound
cred to struct rpc_rqst", call_allocate attempts to do this by
dereferencing tk_client->cl_auth, however this is not guaranteed to be
defined--cl_auth can be zero in the case of gss context destruction (see
rpc_free_auth).

Reorder the client state machine to bind credentials before allocating,
so that we can instead reach the auth through the cred.

Signed-off-by: J. Bruce Fields <bfields@redhat.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
Cc: stable@kernel.org
---
 net/sunrpc/clnt.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'net')

diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
index 2388d83b68ff..657aac630fc9 100644
--- a/net/sunrpc/clnt.c
+++ b/net/sunrpc/clnt.c
@@ -931,7 +931,7 @@ call_reserveresult(struct rpc_task *task)
 	task->tk_status = 0;
 	if (status >= 0) {
 		if (task->tk_rqstp) {
-			task->tk_action = call_allocate;
+			task->tk_action = call_refresh;
 			return;
 		}
 
@@ -972,7 +972,7 @@ call_reserveresult(struct rpc_task *task)
 static void
 call_allocate(struct rpc_task *task)
 {
-	unsigned int slack = task->tk_client->cl_auth->au_cslack;
+	unsigned int slack = task->tk_rqstp->rq_cred->cr_auth->au_cslack;
 	struct rpc_rqst *req = task->tk_rqstp;
 	struct rpc_xprt *xprt = task->tk_xprt;
 	struct rpc_procinfo *proc = task->tk_msg.rpc_proc;
@@ -980,7 +980,7 @@ call_allocate(struct rpc_task *task)
 	dprint_status(task);
 
 	task->tk_status = 0;
-	task->tk_action = call_refresh;
+	task->tk_action = call_bind;
 
 	if (req->rq_buffer)
 		return;
@@ -1042,7 +1042,7 @@ call_refreshresult(struct rpc_task *task)
 	dprint_status(task);
 
 	task->tk_status = 0;
-	task->tk_action = call_bind;
+	task->tk_action = call_allocate;
 	if (status >= 0 && rpcauth_uptodatecred(task))
 		return;
 	switch (status) {
-- 
cgit v1.2.2


From 5a67657a2e90c9e4a48518f95d4ba7777aa20fbb Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Sun, 12 Sep 2010 19:55:25 -0400
Subject: SUNRPC: Fix race corrupting rpc upcall

If rpc_queue_upcall() adds a new upcall to the rpci->pipe list just
after rpc_pipe_release calls rpc_purge_list(), but before it calls
gss_pipe_release (as rpci->ops->release_pipe(inode)), then the latter
will free a message without deleting it from the rpci->pipe list.

We will be left with a freed object on the rpc->pipe list.  Most
frequent symptoms are kernel crashes in rpc.gssd system calls on the
pipe in question.

Reported-by: J. Bruce Fields <bfields@redhat.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
Cc: stable@kernel.org
---
 net/sunrpc/auth_gss/auth_gss.c | 9 +++++----
 net/sunrpc/rpc_pipe.c          | 6 +++---
 2 files changed, 8 insertions(+), 7 deletions(-)

(limited to 'net')

diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c
index dcfc66bab2bb..12c485982814 100644
--- a/net/sunrpc/auth_gss/auth_gss.c
+++ b/net/sunrpc/auth_gss/auth_gss.c
@@ -745,17 +745,18 @@ gss_pipe_release(struct inode *inode)
 	struct rpc_inode *rpci = RPC_I(inode);
 	struct gss_upcall_msg *gss_msg;
 
+restart:
 	spin_lock(&inode->i_lock);
-	while (!list_empty(&rpci->in_downcall)) {
+	list_for_each_entry(gss_msg, &rpci->in_downcall, list) {
 
-		gss_msg = list_entry(rpci->in_downcall.next,
-				struct gss_upcall_msg, list);
+		if (!list_empty(&gss_msg->msg.list))
+			continue;
 		gss_msg->msg.errno = -EPIPE;
 		atomic_inc(&gss_msg->count);
 		__gss_unhash_msg(gss_msg);
 		spin_unlock(&inode->i_lock);
 		gss_release_msg(gss_msg);
-		spin_lock(&inode->i_lock);
+		goto restart;
 	}
 	spin_unlock(&inode->i_lock);
 
diff --git a/net/sunrpc/rpc_pipe.c b/net/sunrpc/rpc_pipe.c
index 95ccbcf45d3e..41a762f82630 100644
--- a/net/sunrpc/rpc_pipe.c
+++ b/net/sunrpc/rpc_pipe.c
@@ -48,7 +48,7 @@ static void rpc_purge_list(struct rpc_inode *rpci, struct list_head *head,
 		return;
 	do {
 		msg = list_entry(head->next, struct rpc_pipe_msg, list);
-		list_del(&msg->list);
+		list_del_init(&msg->list);
 		msg->errno = err;
 		destroy_msg(msg);
 	} while (!list_empty(head));
@@ -208,7 +208,7 @@ rpc_pipe_release(struct inode *inode, struct file *filp)
 	if (msg != NULL) {
 		spin_lock(&inode->i_lock);
 		msg->errno = -EAGAIN;
-		list_del(&msg->list);
+		list_del_init(&msg->list);
 		spin_unlock(&inode->i_lock);
 		rpci->ops->destroy_msg(msg);
 	}
@@ -268,7 +268,7 @@ rpc_pipe_read(struct file *filp, char __user *buf, size_t len, loff_t *offset)
 	if (res < 0 || msg->len == msg->copied) {
 		filp->private_data = NULL;
 		spin_lock(&inode->i_lock);
-		list_del(&msg->list);
+		list_del_init(&msg->list);
 		spin_unlock(&inode->i_lock);
 		rpci->ops->destroy_msg(msg);
 	}
-- 
cgit v1.2.2


From 006abe887c5e637d059c44310de6c92f36aded3b Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Sun, 12 Sep 2010 19:55:25 -0400
Subject: SUNRPC: Fix a race in rpc_info_open

There is a race between rpc_info_open and rpc_release_client()
in that nothing stops a process from opening the file after
the clnt->cl_kref goes to zero.

Fix this by using atomic_inc_unless_zero()...

Reported-by: J. Bruce Fields <bfields@redhat.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
Cc: stable@kernel.org
---
 net/sunrpc/clnt.c     | 26 ++++++++++++--------------
 net/sunrpc/rpc_pipe.c | 14 ++++++++------
 2 files changed, 20 insertions(+), 20 deletions(-)

(limited to 'net')

diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
index 657aac630fc9..3a8f53e7ba07 100644
--- a/net/sunrpc/clnt.c
+++ b/net/sunrpc/clnt.c
@@ -226,7 +226,7 @@ static struct rpc_clnt * rpc_new_client(const struct rpc_create_args *args, stru
 			goto out_no_principal;
 	}
 
-	kref_init(&clnt->cl_kref);
+	atomic_set(&clnt->cl_count, 1);
 
 	err = rpc_setup_pipedir(clnt, program->pipe_dir_name);
 	if (err < 0)
@@ -390,14 +390,14 @@ rpc_clone_client(struct rpc_clnt *clnt)
 		if (new->cl_principal == NULL)
 			goto out_no_principal;
 	}
-	kref_init(&new->cl_kref);
+	atomic_set(&new->cl_count, 1);
 	err = rpc_setup_pipedir(new, clnt->cl_program->pipe_dir_name);
 	if (err != 0)
 		goto out_no_path;
 	if (new->cl_auth)
 		atomic_inc(&new->cl_auth->au_count);
 	xprt_get(clnt->cl_xprt);
-	kref_get(&clnt->cl_kref);
+	atomic_inc(&clnt->cl_count);
 	rpc_register_client(new);
 	rpciod_up();
 	return new;
@@ -465,10 +465,8 @@ EXPORT_SYMBOL_GPL(rpc_shutdown_client);
  * Free an RPC client
  */
 static void
-rpc_free_client(struct kref *kref)
+rpc_free_client(struct rpc_clnt *clnt)
 {
-	struct rpc_clnt *clnt = container_of(kref, struct rpc_clnt, cl_kref);
-
 	dprintk("RPC:       destroying %s client for %s\n",
 			clnt->cl_protname, clnt->cl_server);
 	if (!IS_ERR(clnt->cl_path.dentry)) {
@@ -495,12 +493,10 @@ out_free:
  * Free an RPC client
  */
 static void
-rpc_free_auth(struct kref *kref)
+rpc_free_auth(struct rpc_clnt *clnt)
 {
-	struct rpc_clnt *clnt = container_of(kref, struct rpc_clnt, cl_kref);
-
 	if (clnt->cl_auth == NULL) {
-		rpc_free_client(kref);
+		rpc_free_client(clnt);
 		return;
 	}
 
@@ -509,10 +505,11 @@ rpc_free_auth(struct kref *kref)
 	 *       release remaining GSS contexts. This mechanism ensures
 	 *       that it can do so safely.
 	 */
-	kref_init(kref);
+	atomic_inc(&clnt->cl_count);
 	rpcauth_release(clnt->cl_auth);
 	clnt->cl_auth = NULL;
-	kref_put(kref, rpc_free_client);
+	if (atomic_dec_and_test(&clnt->cl_count))
+		rpc_free_client(clnt);
 }
 
 /*
@@ -525,7 +522,8 @@ rpc_release_client(struct rpc_clnt *clnt)
 
 	if (list_empty(&clnt->cl_tasks))
 		wake_up(&destroy_wait);
-	kref_put(&clnt->cl_kref, rpc_free_auth);
+	if (atomic_dec_and_test(&clnt->cl_count))
+		rpc_free_auth(clnt);
 }
 
 /**
@@ -588,7 +586,7 @@ void rpc_task_set_client(struct rpc_task *task, struct rpc_clnt *clnt)
 	if (clnt != NULL) {
 		rpc_task_release_client(task);
 		task->tk_client = clnt;
-		kref_get(&clnt->cl_kref);
+		atomic_inc(&clnt->cl_count);
 		if (clnt->cl_softrtry)
 			task->tk_flags |= RPC_TASK_SOFT;
 		/* Add to the client's list of all tasks */
diff --git a/net/sunrpc/rpc_pipe.c b/net/sunrpc/rpc_pipe.c
index 41a762f82630..8c8eef2b8f26 100644
--- a/net/sunrpc/rpc_pipe.c
+++ b/net/sunrpc/rpc_pipe.c
@@ -371,21 +371,23 @@ rpc_show_info(struct seq_file *m, void *v)
 static int
 rpc_info_open(struct inode *inode, struct file *file)
 {
-	struct rpc_clnt *clnt;
+	struct rpc_clnt *clnt = NULL;
 	int ret = single_open(file, rpc_show_info, NULL);
 
 	if (!ret) {
 		struct seq_file *m = file->private_data;
-		mutex_lock(&inode->i_mutex);
-		clnt = RPC_I(inode)->private;
-		if (clnt) {
-			kref_get(&clnt->cl_kref);
+
+		spin_lock(&file->f_path.dentry->d_lock);
+		if (!d_unhashed(file->f_path.dentry))
+			clnt = RPC_I(inode)->private;
+		if (clnt != NULL && atomic_inc_not_zero(&clnt->cl_count)) {
+			spin_unlock(&file->f_path.dentry->d_lock);
 			m->private = clnt;
 		} else {
+			spin_unlock(&file->f_path.dentry->d_lock);
 			single_release(inode, file);
 			ret = -EINVAL;
 		}
-		mutex_unlock(&inode->i_mutex);
 	}
 	return ret;
 }
-- 
cgit v1.2.2


From 55576244eba805307a2b2b6a145b8f85f8c7c124 Mon Sep 17 00:00:00 2001
From: "J. Bruce Fields" <bfields@redhat.com>
Date: Sun, 12 Sep 2010 19:55:25 -0400
Subject: SUNRPC: cleanup state-machine ordering

This is just a minor cleanup: net/sunrpc/clnt.c clarifies the rpc client
state machine by commenting each state and by laying out the functions
implementing each state in the order that each state is normally
executed (in the absence of errors).

The previous patch "Fix null dereference in call_allocate" changed the
order of the states.  Move the functions and update the comments to
reflect the change.

Signed-off-by: J. Bruce Fields <bfields@redhat.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 net/sunrpc/clnt.c | 84 +++++++++++++++++++++++++++----------------------------
 1 file changed, 42 insertions(+), 42 deletions(-)

(limited to 'net')

diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
index 3a8f53e7ba07..fa5549079d79 100644
--- a/net/sunrpc/clnt.c
+++ b/net/sunrpc/clnt.c
@@ -964,7 +964,48 @@ call_reserveresult(struct rpc_task *task)
 }
 
 /*
- * 2.	Allocate the buffer. For details, see sched.c:rpc_malloc.
+ * 2.	Bind and/or refresh the credentials
+ */
+static void
+call_refresh(struct rpc_task *task)
+{
+	dprint_status(task);
+
+	task->tk_action = call_refreshresult;
+	task->tk_status = 0;
+	task->tk_client->cl_stats->rpcauthrefresh++;
+	rpcauth_refreshcred(task);
+}
+
+/*
+ * 2a.	Process the results of a credential refresh
+ */
+static void
+call_refreshresult(struct rpc_task *task)
+{
+	int status = task->tk_status;
+
+	dprint_status(task);
+
+	task->tk_status = 0;
+	task->tk_action = call_allocate;
+	if (status >= 0 && rpcauth_uptodatecred(task))
+		return;
+	switch (status) {
+	case -EACCES:
+		rpc_exit(task, -EACCES);
+		return;
+	case -ENOMEM:
+		rpc_exit(task, -ENOMEM);
+		return;
+	case -ETIMEDOUT:
+		rpc_delay(task, 3*HZ);
+	}
+	task->tk_action = call_refresh;
+}
+
+/*
+ * 2b.	Allocate the buffer. For details, see sched.c:rpc_malloc.
  *	(Note: buffer memory is freed in xprt_release).
  */
 static void
@@ -1015,47 +1056,6 @@ call_allocate(struct rpc_task *task)
 	rpc_exit(task, -ERESTARTSYS);
 }
 
-/*
- * 2a.	Bind and/or refresh the credentials
- */
-static void
-call_refresh(struct rpc_task *task)
-{
-	dprint_status(task);
-
-	task->tk_action = call_refreshresult;
-	task->tk_status = 0;
-	task->tk_client->cl_stats->rpcauthrefresh++;
-	rpcauth_refreshcred(task);
-}
-
-/*
- * 2b.	Process the results of a credential refresh
- */
-static void
-call_refreshresult(struct rpc_task *task)
-{
-	int status = task->tk_status;
-
-	dprint_status(task);
-
-	task->tk_status = 0;
-	task->tk_action = call_allocate;
-	if (status >= 0 && rpcauth_uptodatecred(task))
-		return;
-	switch (status) {
-	case -EACCES:
-		rpc_exit(task, -EACCES);
-		return;
-	case -ENOMEM:
-		rpc_exit(task, -ENOMEM);
-		return;
-	case -ETIMEDOUT:
-		rpc_delay(task, 3*HZ);
-	}
-	task->tk_action = call_refresh;
-}
-
 static inline int
 rpc_task_need_encode(struct rpc_task *task)
 {
-- 
cgit v1.2.2


From ce8477e1176389ed920550f4c925ad4a815b22d5 Mon Sep 17 00:00:00 2001
From: Bian Naimeng <biannm@cn.fujitsu.com>
Date: Sun, 12 Sep 2010 19:55:25 -0400
Subject: gss:krb5 miss returning error to caller when import security context

krb5 miss returning error to up layer when import security context,
it may be return ok though it has failed to import security context.

Signed-off-by: Bian Naimeng <biannm@cn.fujitsu.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 net/sunrpc/auth_gss/gss_krb5_mech.c | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/sunrpc/auth_gss/gss_krb5_mech.c b/net/sunrpc/auth_gss/gss_krb5_mech.c
index 032644610524..778e5dfc5144 100644
--- a/net/sunrpc/auth_gss/gss_krb5_mech.c
+++ b/net/sunrpc/auth_gss/gss_krb5_mech.c
@@ -237,6 +237,7 @@ get_key(const void *p, const void *end,
 	if (!supported_gss_krb5_enctype(alg)) {
 		printk(KERN_WARNING "gss_kerberos_mech: unsupported "
 			"encryption key algorithm %d\n", alg);
+		p = ERR_PTR(-EINVAL);
 		goto out_err;
 	}
 	p = simple_get_netobj(p, end, &key);
@@ -282,15 +283,19 @@ gss_import_v1_context(const void *p, const void *end, struct krb5_ctx *ctx)
 	ctx->enctype = ENCTYPE_DES_CBC_RAW;
 
 	ctx->gk5e = get_gss_krb5_enctype(ctx->enctype);
-	if (ctx->gk5e == NULL)
+	if (ctx->gk5e == NULL) {
+		p = ERR_PTR(-EINVAL);
 		goto out_err;
+	}
 
 	/* The downcall format was designed before we completely understood
 	 * the uses of the context fields; so it includes some stuff we
 	 * just give some minimal sanity-checking, and some we ignore
 	 * completely (like the next twenty bytes): */
-	if (unlikely(p + 20 > end || p + 20 < p))
+	if (unlikely(p + 20 > end || p + 20 < p)) {
+		p = ERR_PTR(-EFAULT);
 		goto out_err;
+	}
 	p += 20;
 	p = simple_get_bytes(p, end, &tmp, sizeof(tmp));
 	if (IS_ERR(p))
@@ -619,6 +624,7 @@ gss_import_v2_context(const void *p, const void *end, struct krb5_ctx *ctx,
 	if (ctx->seq_send64 != ctx->seq_send) {
 		dprintk("%s: seq_send64 %lx, seq_send %x overflow?\n", __func__,
 			(long unsigned)ctx->seq_send64, ctx->seq_send);
+		p = ERR_PTR(-EINVAL);
 		goto out_err;
 	}
 	p = simple_get_bytes(p, end, &ctx->enctype, sizeof(ctx->enctype));
-- 
cgit v1.2.2


From 651b2933b22a0c060e6bb940c4104eb447a61f9a Mon Sep 17 00:00:00 2001
From: Bian Naimeng <biannm@cn.fujitsu.com>
Date: Sun, 12 Sep 2010 19:55:26 -0400
Subject: gss:spkm3 miss returning error to caller when import security context

spkm3 miss returning error to up layer when import security context,
it may be return ok though it has failed to import security context.

Signed-off-by: Bian Naimeng <biannm@cn.fujitsu.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 net/sunrpc/auth_gss/gss_spkm3_mech.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/sunrpc/auth_gss/gss_spkm3_mech.c b/net/sunrpc/auth_gss/gss_spkm3_mech.c
index dc3f1f5ed865..adade3d313f2 100644
--- a/net/sunrpc/auth_gss/gss_spkm3_mech.c
+++ b/net/sunrpc/auth_gss/gss_spkm3_mech.c
@@ -100,6 +100,7 @@ gss_import_sec_context_spkm3(const void *p, size_t len,
 	if (version != 1) {
 		dprintk("RPC:       unknown spkm3 token format: "
 				"obsolete nfs-utils?\n");
+		p = ERR_PTR(-EINVAL);
 		goto out_err_free_ctx;
 	}
 
@@ -135,8 +136,10 @@ gss_import_sec_context_spkm3(const void *p, size_t len,
 	if (IS_ERR(p))
 		goto out_err_free_intg_alg;
 
-	if (p != end)
+	if (p != end) {
+		p = ERR_PTR(-EFAULT);
 		goto out_err_free_intg_key;
+	}
 
 	ctx_id->internal_ctx_id = ctx;
 
-- 
cgit v1.2.2


From db5fe26541b6b48460104a0d949a27cdc7786957 Mon Sep 17 00:00:00 2001
From: Miquel van Smoorenburg <mikevs@xs4all.net>
Date: Sun, 12 Sep 2010 19:55:26 -0400
Subject: sunrpc: increase MAX_HASHTABLE_BITS to 14

The maximum size of the authcache is now set to 1024 (10 bits),
but on our server we need at least 4096 (12 bits). Increase
MAX_HASHTABLE_BITS to 14. This is a maximum of 16384 entries,
each containing a pointer (8 bytes on x86_64). This is
exactly the limit of kmalloc() (128K).

Signed-off-by: Miquel van Smoorenburg <mikevs@xs4all.net>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 net/sunrpc/auth.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/sunrpc/auth.c b/net/sunrpc/auth.c
index 36cb66022a27..e9eaaf7d43c1 100644
--- a/net/sunrpc/auth.c
+++ b/net/sunrpc/auth.c
@@ -38,7 +38,7 @@ static const struct rpc_authops *auth_flavors[RPC_AUTH_MAXFLAVOR] = {
 static LIST_HEAD(cred_unused);
 static unsigned long number_cred_unused;
 
-#define MAX_HASHTABLE_BITS (10) 
+#define MAX_HASHTABLE_BITS (14)
 static int param_set_hashtbl_sz(const char *val, const struct kernel_param *kp)
 {
 	unsigned long num;
-- 
cgit v1.2.2


From 62b2be591a9b12c550308ef7718a31abfc815b50 Mon Sep 17 00:00:00 2001
From: Latchesar Ionkov <lionkov@gmail.com>
Date: Tue, 24 Aug 2010 18:13:59 +0000
Subject: fs/9p, net/9p: memory leak fixes

Four memory leak fixes in the 9P code.

Signed-off-by: Latchesar Ionkov <lucho@ionkov.net>
Signed-off-by: Eric Van Hensbergen <ericvh@gmail.com>
---
 net/9p/client.c | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/9p/client.c b/net/9p/client.c
index dc6f2f26d023..9eb72505308f 100644
--- a/net/9p/client.c
+++ b/net/9p/client.c
@@ -331,8 +331,10 @@ static void p9_tag_cleanup(struct p9_client *c)
 		}
 	}
 
-	if (c->tagpool)
+	if (c->tagpool) {
+		p9_idpool_put(0, c->tagpool); /* free reserved tag 0 */
 		p9_idpool_destroy(c->tagpool);
+	}
 
 	/* free requests associated with tags */
 	for (row = 0; row < (c->max_tag/P9_ROW_MAXTAG); row++) {
@@ -944,6 +946,7 @@ struct p9_fid *p9_client_walk(struct p9_fid *oldfid, int nwname, char **wnames,
 	int16_t nwqids, count;
 
 	err = 0;
+	wqids = NULL;
 	clnt = oldfid->clnt;
 	if (clone) {
 		fid = p9_fid_create(clnt);
@@ -994,9 +997,11 @@ struct p9_fid *p9_client_walk(struct p9_fid *oldfid, int nwname, char **wnames,
 	else
 		fid->qid = oldfid->qid;
 
+	kfree(wqids);
 	return fid;
 
 clunk_fid:
+	kfree(wqids);
 	p9_client_clunk(fid);
 	fid = NULL;
 
-- 
cgit v1.2.2


From 339db11b219f36cf7da61b390992d95bb6b7ba2e Mon Sep 17 00:00:00 2001
From: Dan Carpenter <error27@gmail.com>
Date: Fri, 10 Sep 2010 01:56:16 +0000
Subject: net/llc: make opt unsigned in llc_ui_setsockopt()

The members of struct llc_sock are unsigned so if we pass a negative
value for "opt" it can cause a sign bug.  Also it can cause an integer
overflow when we multiply "opt * HZ".

CC: stable@kernel.org
Signed-off-by: Dan Carpenter <error27@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/llc/af_llc.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/llc/af_llc.c b/net/llc/af_llc.c
index 023ba820236f..582612998211 100644
--- a/net/llc/af_llc.c
+++ b/net/llc/af_llc.c
@@ -1024,7 +1024,8 @@ static int llc_ui_setsockopt(struct socket *sock, int level, int optname,
 {
 	struct sock *sk = sock->sk;
 	struct llc_sock *llc = llc_sk(sk);
-	int rc = -EINVAL, opt;
+	unsigned int opt;
+	int rc = -EINVAL;
 
 	lock_sock(sk);
 	if (unlikely(level != SOL_LLC || optlen != sizeof(int)))
-- 
cgit v1.2.2


From 7998156344b0d93de61ff8e5d75e96500e43a571 Mon Sep 17 00:00:00 2001
From: Bob Arendt <rda@rincon.com>
Date: Mon, 13 Sep 2010 12:56:03 -0700
Subject: ipv4: force_igmp_version ignored when a IGMPv3 query received

After all these years, it turns out that the
    /proc/sys/net/ipv4/conf/*/force_igmp_version
parameter isn't fully implemented.

*Symptom*:
When set force_igmp_version to a value of 2, the kernel should only perform
multicast IGMPv2 operations (IETF rfc2236).  An host-initiated Join message
will be sent as a IGMPv2 Join message.  But if a IGMPv3 query message is
received, the host responds with a IGMPv3 join message.  Per rfc3376 and
rfc2236, a IGMPv2 host should treat a IGMPv3 query as a IGMPv2 query and
respond with an IGMPv2 Join message.

*Consequences*:
This is an issue when a IGMPv3 capable switch is the querier and will only
issue IGMPv3 queries (which double as IGMPv2 querys) and there's an
intermediate switch that is only IGMPv2 capable.  The intermediate switch
processes the initial v2 Join, but fails to recognize the IGMPv3 Join responses
to the Query, resulting in a dropped connection when the intermediate v2-only
switch times it out.

*Identifying issue in the kernel source*:
The issue is in this section of code (in net/ipv4/igmp.c), which is called when
an IGMP query is received  (from mainline 2.6.36-rc3 gitweb):
 ...
A IGMPv3 query has a length >= 12 and no sources.  This routine will exit after
line 880, setting the general query timer (random timeout between 0 and query
response time).  This calls igmp_gq_timer_expire():
...
.. which only sends a v3 response.  So if a v3 query is received, the kernel
always sends a v3 response.

IGMP queries happen once every 60 sec (per vlan), so the traffic is low.  A
IGMPv3 query *is* a strict superset of a IGMPv2 query, so this patch properly
short circuit's the v3 behaviour.

One issue is that this does not address force_igmp_version=1.  Then again, I've
never seen any IGMPv1 multicast equipment in the wild.  However there is a lot
of v2-only equipment. If it's necessary to support the IGMPv1 case as well:

837         if (len == 8 || IGMP_V2_SEEN(in_dev) || IGMP_V1_SEEN(in_dev)) {

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/igmp.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c
index a1ad0e7180d2..1fdcacd36ce7 100644
--- a/net/ipv4/igmp.c
+++ b/net/ipv4/igmp.c
@@ -834,7 +834,7 @@ static void igmp_heard_query(struct in_device *in_dev, struct sk_buff *skb,
 	int			mark = 0;
 
 
-	if (len == 8) {
+	if (len == 8 || IGMP_V2_SEEN(in_dev)) {
 		if (ih->code == 0) {
 			/* Alas, old v1 router presents here. */
 
-- 
cgit v1.2.2


From a89b47639f3e11dd9a8eb78a5d3382e109c876f2 Mon Sep 17 00:00:00 2001
From: Michael Kerrisk <mtk.manpages@gmail.com>
Date: Fri, 10 Sep 2010 20:26:56 +0000
Subject: ipv4: enable getsockopt() for IP_NODEFRAG

While integrating your man-pages patch for IP_NODEFRAG, I noticed
that this option is settable by setsockopt(), but not gettable by
getsockopt(). I suppose this is not intended. The (untested,
trivial) patch below adds getsockopt() support.

Signed-off-by: Michael kerrisk <mtk.manpages@gmail.com>
Acked-by: Jiri Olsa <jolsa@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/ip_sockglue.c | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'net')

diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c
index 6c40a8c46e79..64b70ad162e3 100644
--- a/net/ipv4/ip_sockglue.c
+++ b/net/ipv4/ip_sockglue.c
@@ -1129,6 +1129,9 @@ static int do_ip_getsockopt(struct sock *sk, int level, int optname,
 	case IP_HDRINCL:
 		val = inet->hdrincl;
 		break;
+	case IP_NODEFRAG:
+		val = inet->nodefrag;
+		break;
 	case IP_MTU_DISCOVER:
 		val = inet->pmtudisc;
 		break;
-- 
cgit v1.2.2


From ef885afbf8a37689afc1d9d545e2f3e7a8276c17 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Mon, 13 Sep 2010 12:24:54 +0000
Subject: net: use rcu_barrier() in rollback_registered_many

netdev_wait_allrefs() waits that all references to a device vanishes.

It currently uses a _very_ pessimistic 250 ms delay between each probe.
Some users reported that no more than 4 devices can be dismantled per
second, this is a pretty serious problem for some setups.

Most of the time, a refcount is about to be released by an RCU callback,
that is still in flight because rollback_registered_many() uses a
synchronize_rcu() call instead of rcu_barrier(). Problem is visible if
number of online cpus is one, because synchronize_rcu() is then a no op.

time to remove 50 ipip tunnels on a UP machine :

before patch : real 11.910s
after patch : real 1.250s

Reported-by: Nicolas Dichtel <nicolas.dichtel@6wind.com>
Reported-by: Octavian Purdila <opurdila@ixiacom.com>
Reported-by: Benjamin LaHaise <bcrl@kvack.org>
Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/dev.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/core/dev.c b/net/core/dev.c
index b9b22a3c4c8f..660dd41aaaa6 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -4845,7 +4845,7 @@ static void rollback_registered_many(struct list_head *head)
 	dev = list_first_entry(head, struct net_device, unreg_list);
 	call_netdevice_notifiers(NETDEV_UNREGISTER_BATCH, dev);
 
-	synchronize_net();
+	rcu_barrier();
 
 	list_for_each_entry(dev, head, unreg_list)
 		dev_put(dev);
-- 
cgit v1.2.2


From 6dcbc12290abb452a5e42713faa6461b248e2f55 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Tue, 14 Sep 2010 21:41:20 -0700
Subject: net: RPS needs to depend upon USE_GENERIC_SMP_HELPERS

You cannot invoke __smp_call_function_single() unless the
architecture sets this symbol.

Reported-by: Daniel Hellstrom <daniel@gaisler.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/Kconfig | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/Kconfig b/net/Kconfig
index e330594d3709..e926884c1675 100644
--- a/net/Kconfig
+++ b/net/Kconfig
@@ -217,7 +217,7 @@ source "net/dns_resolver/Kconfig"
 
 config RPS
 	boolean
-	depends on SMP && SYSFS
+	depends on SMP && SYSFS && USE_GENERIC_SMP_HELPERS
 	default y
 
 menu "Network testing"
-- 
cgit v1.2.2


From e71895a1beff2014534c9660d9ae42e043f11555 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Thu, 16 Sep 2010 12:27:50 +0000
Subject: xfrm: dont assume rcu_read_lock in xfrm_output_one()

ip_local_out() is called with rcu_read_lock() held from ip_queue_xmit()
but not from other call sites.

Reported-and-bisected-by: Nick Bowler <nbowler@elliptictech.com>
Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/xfrm/xfrm_output.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/xfrm/xfrm_output.c b/net/xfrm/xfrm_output.c
index a3cca0a94346..64f2ae1fdc15 100644
--- a/net/xfrm/xfrm_output.c
+++ b/net/xfrm/xfrm_output.c
@@ -101,7 +101,7 @@ resume:
 			err = -EHOSTUNREACH;
 			goto error_nolock;
 		}
-		skb_dst_set_noref(skb, dst);
+		skb_dst_set(skb, dst_clone(dst));
 		x = dst->xfrm;
 	} while (x && !(x->outer_mode->flags & XFRM_MODE_FLAG_TUNNEL));
 
-- 
cgit v1.2.2


From 2507136f74f70a4869bd4f525d48715ae66db43d Mon Sep 17 00:00:00 2001
From: Dan Carpenter <error27@gmail.com>
Date: Thu, 16 Sep 2010 08:12:55 +0000
Subject: net/llc: storing negative error codes in unsigned short

If the alloc_skb() fails then we return 65431 instead of -ENOBUFS
(-105).

Signed-off-by: Dan Carpenter <error27@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/llc/llc_station.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/llc/llc_station.c b/net/llc/llc_station.c
index e4dae0244d76..cf4aea3ba30f 100644
--- a/net/llc/llc_station.c
+++ b/net/llc/llc_station.c
@@ -689,7 +689,7 @@ static void llc_station_rcv(struct sk_buff *skb)
 
 int __init llc_station_init(void)
 {
-	u16 rc = -ENOBUFS;
+	int rc = -ENOBUFS;
 	struct sk_buff *skb;
 	struct llc_station_state_ev *ev;
 
-- 
cgit v1.2.2


From 4bdab43323b459900578b200a4b8cf9713ac8fab Mon Sep 17 00:00:00 2001
From: Vlad Yasevich <vladislav.yasevich@hp.com>
Date: Wed, 15 Sep 2010 10:00:26 -0400
Subject: sctp: Do not reset the packet during sctp_packet_config().

sctp_packet_config() is called when getting the packet ready
for appending of chunks.  The function should not touch the
current state, since it's possible to ping-pong between two
transports when sending, and that can result packet corruption
followed by skb overlfow crash.

Reported-by: Thomas Dreibholz <dreibh@iem.uni-due.de>
Signed-off-by: Vlad Yasevich <vladislav.yasevich@hp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sctp/output.c | 1 -
 1 file changed, 1 deletion(-)

(limited to 'net')

diff --git a/net/sctp/output.c b/net/sctp/output.c
index a646681f5acd..bcc4590ccaf2 100644
--- a/net/sctp/output.c
+++ b/net/sctp/output.c
@@ -92,7 +92,6 @@ struct sctp_packet *sctp_packet_config(struct sctp_packet *packet,
 	SCTP_DEBUG_PRINTK("%s: packet:%p vtag:0x%x\n", __func__,
 			  packet, vtag);
 
-	sctp_packet_reset(packet);
 	packet->vtag = vtag;
 
 	if (ecn_capable && sctp_packet_empty(packet)) {
-- 
cgit v1.2.2


From 842c74bffcdb1d305ccd9e61e417cceae86b9963 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Mon, 20 Sep 2010 10:06:12 -0700
Subject: ip_gre: CONFIG_IPV6_MODULE support

ipv6 can be a module, we should test CONFIG_IPV6 and CONFIG_IPV6_MODULE
to enable ipv6 bits in ip_gre.

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/ip_gre.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index 945b20a5ad50..35c93e8b6a46 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -45,7 +45,7 @@
 #include <net/netns/generic.h>
 #include <net/rtnetlink.h>
 
-#ifdef CONFIG_IPV6
+#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
 #include <net/ipv6.h>
 #include <net/ip6_fib.h>
 #include <net/ip6_route.h>
@@ -699,7 +699,7 @@ static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev
 			if ((dst = rt->rt_gateway) == 0)
 				goto tx_error_icmp;
 		}
-#ifdef CONFIG_IPV6
+#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
 		else if (skb->protocol == htons(ETH_P_IPV6)) {
 			struct in6_addr *addr6;
 			int addr_type;
@@ -774,7 +774,7 @@ static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev
 			goto tx_error;
 		}
 	}
-#ifdef CONFIG_IPV6
+#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
 	else if (skb->protocol == htons(ETH_P_IPV6)) {
 		struct rt6_info *rt6 = (struct rt6_info *)skb_dst(skb);
 
@@ -850,7 +850,7 @@ static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev
 	if ((iph->ttl = tiph->ttl) == 0) {
 		if (skb->protocol == htons(ETH_P_IP))
 			iph->ttl = old_iph->ttl;
-#ifdef CONFIG_IPV6
+#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
 		else if (skb->protocol == htons(ETH_P_IPV6))
 			iph->ttl = ((struct ipv6hdr *)old_iph)->hop_limit;
 #endif
-- 
cgit v1.2.2


From df6d02300f7c2fbd0fbe626d819c8e5237d72c62 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Fri, 17 Sep 2010 00:38:25 +0200
Subject: wext: fix potential private ioctl memory content leak

When a driver doesn't fill the entire buffer, old
heap contents may remain, and if it also doesn't
update the length properly, this old heap content
will be copied back to userspace.

It is very unlikely that this happens in any of
the drivers using private ioctls since it would
show up as junk being reported by iwpriv, but it
seems better to be safe here, so use kzalloc.

Reported-by: Jeff Mahoney <jeffm@suse.com>
Cc: stable@kernel.org
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/wireless/wext-priv.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/wireless/wext-priv.c b/net/wireless/wext-priv.c
index 3feb28e41c53..674d426a9d24 100644
--- a/net/wireless/wext-priv.c
+++ b/net/wireless/wext-priv.c
@@ -152,7 +152,7 @@ static int ioctl_private_iw_point(struct iw_point *iwp, unsigned int cmd,
 	} else if (!iwp->pointer)
 		return -EFAULT;
 
-	extra = kmalloc(extra_size, GFP_KERNEL);
+	extra = kzalloc(extra_size, GFP_KERNEL);
 	if (!extra)
 		return -ENOMEM;
 
-- 
cgit v1.2.2


From 8444cf712c5f71845cba9dc30d8f530ff0d5ff83 Mon Sep 17 00:00:00 2001
From: Thomas Egerer <thomas.egerer@secunet.com>
Date: Mon, 20 Sep 2010 11:11:38 -0700
Subject: xfrm: Allow different selector family in temporary state

The family parameter xfrm_state_find is used to find a state matching a
certain policy. This value is set to the template's family
(encap_family) right before xfrm_state_find is called.
The family parameter is however also used to construct a temporary state
in xfrm_state_find itself which is wrong for inter-family scenarios
because it produces a selector for the wrong family. Since this selector
is included in the xfrm_user_acquire structure, user space programs
misinterpret IPv6 addresses as IPv4 and vice versa.
This patch splits up the original init_tempsel function into a part that
initializes the selector respectively the props and id of the temporary
state, to allow for differing ip address families whithin the state.

Signed-off-by: Thomas Egerer <thomas.egerer@secunet.com>
Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/xfrm4_state.c | 33 +++++++++++++++++++--------------
 net/ipv6/xfrm6_state.c | 33 +++++++++++++++++++--------------
 net/xfrm/xfrm_policy.c |  5 ++---
 net/xfrm/xfrm_state.c  | 45 +++++++++++++++++++++++++++------------------
 4 files changed, 67 insertions(+), 49 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/xfrm4_state.c b/net/ipv4/xfrm4_state.c
index 1ef1366a0a03..47947624eccc 100644
--- a/net/ipv4/xfrm4_state.c
+++ b/net/ipv4/xfrm4_state.c
@@ -21,21 +21,25 @@ static int xfrm4_init_flags(struct xfrm_state *x)
 }
 
 static void
-__xfrm4_init_tempsel(struct xfrm_state *x, struct flowi *fl,
-		     struct xfrm_tmpl *tmpl,
-		     xfrm_address_t *daddr, xfrm_address_t *saddr)
+__xfrm4_init_tempsel(struct xfrm_selector *sel, struct flowi *fl)
+{
+	sel->daddr.a4 = fl->fl4_dst;
+	sel->saddr.a4 = fl->fl4_src;
+	sel->dport = xfrm_flowi_dport(fl);
+	sel->dport_mask = htons(0xffff);
+	sel->sport = xfrm_flowi_sport(fl);
+	sel->sport_mask = htons(0xffff);
+	sel->family = AF_INET;
+	sel->prefixlen_d = 32;
+	sel->prefixlen_s = 32;
+	sel->proto = fl->proto;
+	sel->ifindex = fl->oif;
+}
+
+static void
+xfrm4_init_temprop(struct xfrm_state *x, struct xfrm_tmpl *tmpl,
+		   xfrm_address_t *daddr, xfrm_address_t *saddr)
 {
-	x->sel.daddr.a4 = fl->fl4_dst;
-	x->sel.saddr.a4 = fl->fl4_src;
-	x->sel.dport = xfrm_flowi_dport(fl);
-	x->sel.dport_mask = htons(0xffff);
-	x->sel.sport = xfrm_flowi_sport(fl);
-	x->sel.sport_mask = htons(0xffff);
-	x->sel.family = AF_INET;
-	x->sel.prefixlen_d = 32;
-	x->sel.prefixlen_s = 32;
-	x->sel.proto = fl->proto;
-	x->sel.ifindex = fl->oif;
 	x->id = tmpl->id;
 	if (x->id.daddr.a4 == 0)
 		x->id.daddr.a4 = daddr->a4;
@@ -70,6 +74,7 @@ static struct xfrm_state_afinfo xfrm4_state_afinfo = {
 	.owner			= THIS_MODULE,
 	.init_flags		= xfrm4_init_flags,
 	.init_tempsel		= __xfrm4_init_tempsel,
+	.init_temprop		= xfrm4_init_temprop,
 	.output			= xfrm4_output,
 	.extract_input		= xfrm4_extract_input,
 	.extract_output		= xfrm4_extract_output,
diff --git a/net/ipv6/xfrm6_state.c b/net/ipv6/xfrm6_state.c
index f417b77fa0e1..a67575d472a3 100644
--- a/net/ipv6/xfrm6_state.c
+++ b/net/ipv6/xfrm6_state.c
@@ -20,23 +20,27 @@
 #include <net/addrconf.h>
 
 static void
-__xfrm6_init_tempsel(struct xfrm_state *x, struct flowi *fl,
-		     struct xfrm_tmpl *tmpl,
-		     xfrm_address_t *daddr, xfrm_address_t *saddr)
+__xfrm6_init_tempsel(struct xfrm_selector *sel, struct flowi *fl)
 {
 	/* Initialize temporary selector matching only
 	 * to current session. */
-	ipv6_addr_copy((struct in6_addr *)&x->sel.daddr, &fl->fl6_dst);
-	ipv6_addr_copy((struct in6_addr *)&x->sel.saddr, &fl->fl6_src);
-	x->sel.dport = xfrm_flowi_dport(fl);
-	x->sel.dport_mask = htons(0xffff);
-	x->sel.sport = xfrm_flowi_sport(fl);
-	x->sel.sport_mask = htons(0xffff);
-	x->sel.family = AF_INET6;
-	x->sel.prefixlen_d = 128;
-	x->sel.prefixlen_s = 128;
-	x->sel.proto = fl->proto;
-	x->sel.ifindex = fl->oif;
+	ipv6_addr_copy((struct in6_addr *)&sel->daddr, &fl->fl6_dst);
+	ipv6_addr_copy((struct in6_addr *)&sel->saddr, &fl->fl6_src);
+	sel->dport = xfrm_flowi_dport(fl);
+	sel->dport_mask = htons(0xffff);
+	sel->sport = xfrm_flowi_sport(fl);
+	sel->sport_mask = htons(0xffff);
+	sel->family = AF_INET6;
+	sel->prefixlen_d = 128;
+	sel->prefixlen_s = 128;
+	sel->proto = fl->proto;
+	sel->ifindex = fl->oif;
+}
+
+static void
+xfrm6_init_temprop(struct xfrm_state *x, struct xfrm_tmpl *tmpl,
+		   xfrm_address_t *daddr, xfrm_address_t *saddr)
+{
 	x->id = tmpl->id;
 	if (ipv6_addr_any((struct in6_addr*)&x->id.daddr))
 		memcpy(&x->id.daddr, daddr, sizeof(x->sel.daddr));
@@ -168,6 +172,7 @@ static struct xfrm_state_afinfo xfrm6_state_afinfo = {
 	.eth_proto		= htons(ETH_P_IPV6),
 	.owner			= THIS_MODULE,
 	.init_tempsel		= __xfrm6_init_tempsel,
+	.init_temprop		= xfrm6_init_temprop,
 	.tmpl_sort		= __xfrm6_tmpl_sort,
 	.state_sort		= __xfrm6_state_sort,
 	.output			= xfrm6_output,
diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index 2b3ed7ad4933..cbab6e1a8c9c 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -1175,9 +1175,8 @@ xfrm_tmpl_resolve_one(struct xfrm_policy *policy, struct flowi *fl,
 		    tmpl->mode == XFRM_MODE_BEET) {
 			remote = &tmpl->id.daddr;
 			local = &tmpl->saddr;
-			family = tmpl->encap_family;
-			if (xfrm_addr_any(local, family)) {
-				error = xfrm_get_saddr(net, &tmp, remote, family);
+			if (xfrm_addr_any(local, tmpl->encap_family)) {
+				error = xfrm_get_saddr(net, &tmp, remote, tmpl->encap_family);
 				if (error)
 					goto fail;
 				local = &tmp;
diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c
index 5208b12fbfb4..eb96ce52f178 100644
--- a/net/xfrm/xfrm_state.c
+++ b/net/xfrm/xfrm_state.c
@@ -656,15 +656,23 @@ void xfrm_sad_getinfo(struct net *net, struct xfrmk_sadinfo *si)
 EXPORT_SYMBOL(xfrm_sad_getinfo);
 
 static int
-xfrm_init_tempsel(struct xfrm_state *x, struct flowi *fl,
-		  struct xfrm_tmpl *tmpl,
-		  xfrm_address_t *daddr, xfrm_address_t *saddr,
-		  unsigned short family)
+xfrm_init_tempstate(struct xfrm_state *x, struct flowi *fl,
+		    struct xfrm_tmpl *tmpl,
+		    xfrm_address_t *daddr, xfrm_address_t *saddr,
+		    unsigned short family)
 {
 	struct xfrm_state_afinfo *afinfo = xfrm_state_get_afinfo(family);
 	if (!afinfo)
 		return -1;
-	afinfo->init_tempsel(x, fl, tmpl, daddr, saddr);
+	afinfo->init_tempsel(&x->sel, fl);
+
+	if (family != tmpl->encap_family) {
+		xfrm_state_put_afinfo(afinfo);
+		afinfo = xfrm_state_get_afinfo(tmpl->encap_family);
+		if (!afinfo)
+			return -1;
+	}
+	afinfo->init_temprop(x, tmpl, daddr, saddr);
 	xfrm_state_put_afinfo(afinfo);
 	return 0;
 }
@@ -790,37 +798,38 @@ xfrm_state_find(xfrm_address_t *daddr, xfrm_address_t *saddr,
 	int error = 0;
 	struct xfrm_state *best = NULL;
 	u32 mark = pol->mark.v & pol->mark.m;
+	unsigned short encap_family = tmpl->encap_family;
 
 	to_put = NULL;
 
 	spin_lock_bh(&xfrm_state_lock);
-	h = xfrm_dst_hash(net, daddr, saddr, tmpl->reqid, family);
+	h = xfrm_dst_hash(net, daddr, saddr, tmpl->reqid, encap_family);
 	hlist_for_each_entry(x, entry, net->xfrm.state_bydst+h, bydst) {
-		if (x->props.family == family &&
+		if (x->props.family == encap_family &&
 		    x->props.reqid == tmpl->reqid &&
 		    (mark & x->mark.m) == x->mark.v &&
 		    !(x->props.flags & XFRM_STATE_WILDRECV) &&
-		    xfrm_state_addr_check(x, daddr, saddr, family) &&
+		    xfrm_state_addr_check(x, daddr, saddr, encap_family) &&
 		    tmpl->mode == x->props.mode &&
 		    tmpl->id.proto == x->id.proto &&
 		    (tmpl->id.spi == x->id.spi || !tmpl->id.spi))
-			xfrm_state_look_at(pol, x, fl, family, daddr, saddr,
+			xfrm_state_look_at(pol, x, fl, encap_family, daddr, saddr,
 					   &best, &acquire_in_progress, &error);
 	}
 	if (best)
 		goto found;
 
-	h_wildcard = xfrm_dst_hash(net, daddr, &saddr_wildcard, tmpl->reqid, family);
+	h_wildcard = xfrm_dst_hash(net, daddr, &saddr_wildcard, tmpl->reqid, encap_family);
 	hlist_for_each_entry(x, entry, net->xfrm.state_bydst+h_wildcard, bydst) {
-		if (x->props.family == family &&
+		if (x->props.family == encap_family &&
 		    x->props.reqid == tmpl->reqid &&
 		    (mark & x->mark.m) == x->mark.v &&
 		    !(x->props.flags & XFRM_STATE_WILDRECV) &&
-		    xfrm_state_addr_check(x, daddr, saddr, family) &&
+		    xfrm_state_addr_check(x, daddr, saddr, encap_family) &&
 		    tmpl->mode == x->props.mode &&
 		    tmpl->id.proto == x->id.proto &&
 		    (tmpl->id.spi == x->id.spi || !tmpl->id.spi))
-			xfrm_state_look_at(pol, x, fl, family, daddr, saddr,
+			xfrm_state_look_at(pol, x, fl, encap_family, daddr, saddr,
 					   &best, &acquire_in_progress, &error);
 	}
 
@@ -829,7 +838,7 @@ found:
 	if (!x && !error && !acquire_in_progress) {
 		if (tmpl->id.spi &&
 		    (x0 = __xfrm_state_lookup(net, mark, daddr, tmpl->id.spi,
-					      tmpl->id.proto, family)) != NULL) {
+					      tmpl->id.proto, encap_family)) != NULL) {
 			to_put = x0;
 			error = -EEXIST;
 			goto out;
@@ -839,9 +848,9 @@ found:
 			error = -ENOMEM;
 			goto out;
 		}
-		/* Initialize temporary selector matching only
+		/* Initialize temporary state matching only
 		 * to current session. */
-		xfrm_init_tempsel(x, fl, tmpl, daddr, saddr, family);
+		xfrm_init_tempstate(x, fl, tmpl, daddr, saddr, family);
 		memcpy(&x->mark, &pol->mark, sizeof(x->mark));
 
 		error = security_xfrm_state_alloc_acquire(x, pol->security, fl->secid);
@@ -856,10 +865,10 @@ found:
 			x->km.state = XFRM_STATE_ACQ;
 			list_add(&x->km.all, &net->xfrm.state_all);
 			hlist_add_head(&x->bydst, net->xfrm.state_bydst+h);
-			h = xfrm_src_hash(net, daddr, saddr, family);
+			h = xfrm_src_hash(net, daddr, saddr, encap_family);
 			hlist_add_head(&x->bysrc, net->xfrm.state_bysrc+h);
 			if (x->id.spi) {
-				h = xfrm_spi_hash(net, &x->id.daddr, x->id.spi, x->id.proto, family);
+				h = xfrm_spi_hash(net, &x->id.daddr, x->id.spi, x->id.proto, encap_family);
 				hlist_add_head(&x->byspi, net->xfrm.state_byspi+h);
 			}
 			x->lft.hard_add_expires_seconds = net->xfrm.sysctl_acq_expires;
-- 
cgit v1.2.2


From 9828e6e6e3f19efcb476c567b9999891d051f52f Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Mon, 20 Sep 2010 15:40:35 -0700
Subject: rose: Fix signedness issues wrt. digi count.

Just use explicit casts, since we really can't change the
types of structures exported to userspace which have been
around for 15 years or so.

Reported-by: Dan Rosenberg <dan.j.rosenberg@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/rose/af_rose.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/rose/af_rose.c b/net/rose/af_rose.c
index 8e45e76a95f5..d952e7eac188 100644
--- a/net/rose/af_rose.c
+++ b/net/rose/af_rose.c
@@ -679,7 +679,7 @@ static int rose_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
 	if (addr_len == sizeof(struct sockaddr_rose) && addr->srose_ndigis > 1)
 		return -EINVAL;
 
-	if (addr->srose_ndigis > ROSE_MAX_DIGIS)
+	if ((unsigned int) addr->srose_ndigis > ROSE_MAX_DIGIS)
 		return -EINVAL;
 
 	if ((dev = rose_dev_get(&addr->srose_addr)) == NULL) {
@@ -739,7 +739,7 @@ static int rose_connect(struct socket *sock, struct sockaddr *uaddr, int addr_le
 	if (addr_len == sizeof(struct sockaddr_rose) && addr->srose_ndigis > 1)
 		return -EINVAL;
 
-	if (addr->srose_ndigis > ROSE_MAX_DIGIS)
+	if ((unsigned int) addr->srose_ndigis > ROSE_MAX_DIGIS)
 		return -EINVAL;
 
 	/* Source + Destination digis should not exceed ROSE_MAX_DIGIS */
-- 
cgit v1.2.2


From a4d258036ed9b2a1811c3670c6099203a0f284a0 Mon Sep 17 00:00:00 2001
From: Tom Marshall <tdm.code@gmail.com>
Date: Mon, 20 Sep 2010 15:42:05 -0700
Subject: tcp: Fix race in tcp_poll

If a RST comes in immediately after checking sk->sk_err, tcp_poll will
return POLLIN but not POLLOUT.  Fix this by checking sk->sk_err at the end
of tcp_poll.  Additionally, ensure the correct order of operations on SMP
machines with memory barriers.

Signed-off-by: Tom Marshall <tdm.code@gmail.com>
Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp.c       | 7 +++++--
 net/ipv4/tcp_input.c | 2 ++
 2 files changed, 7 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 3fb1428e526e..95d75d443927 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -386,8 +386,6 @@ unsigned int tcp_poll(struct file *file, struct socket *sock, poll_table *wait)
 	 */
 
 	mask = 0;
-	if (sk->sk_err)
-		mask = POLLERR;
 
 	/*
 	 * POLLHUP is certainly not done right. But poll() doesn't
@@ -457,6 +455,11 @@ unsigned int tcp_poll(struct file *file, struct socket *sock, poll_table *wait)
 		if (tp->urg_data & TCP_URG_VALID)
 			mask |= POLLPRI;
 	}
+	/* This barrier is coupled with smp_wmb() in tcp_reset() */
+	smp_rmb();
+	if (sk->sk_err)
+		mask |= POLLERR;
+
 	return mask;
 }
 EXPORT_SYMBOL(tcp_poll);
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index e663b78a2ef6..149e79ac2891 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -4048,6 +4048,8 @@ static void tcp_reset(struct sock *sk)
 	default:
 		sk->sk_err = ECONNRESET;
 	}
+	/* This barrier is coupled with smp_rmb() in tcp_poll() */
+	smp_wmb();
 
 	if (!sock_flag(sk, SOCK_DEAD))
 		sk->sk_error_report(sk);
-- 
cgit v1.2.2


From 3d13008e7345fa7a79d8f6438150dc15d6ba6e9d Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Tue, 21 Sep 2010 08:47:45 +0000
Subject: ip: fix truesize mismatch in ip fragmentation

Special care should be taken when slow path is hit in ip_fragment() :

When walking through frags, we transfert truesize ownership from skb to
frags. Then if we hit a slow_path condition, we must undo this or risk
uncharging frags->truesize twice, and in the end, having negative socket
sk_wmem_alloc counter, or even freeing socket sooner than expected.

Many thanks to Nick Bowler, who provided a very clean bug report and
test program.

Thanks to Jarek for reviewing my first patch and providing a V2

While Nick bisection pointed to commit 2b85a34e911 (net: No more
expensive sock_hold()/sock_put() on each tx), underlying bug is older
(2.6.12-rc5)

A side effect is to extend work done in commit b2722b1c3a893e
(ip_fragment: also adjust skb->truesize for packets not owned by a
socket) to ipv6 as well.

Reported-and-bisected-by: Nick Bowler <nbowler@elliptictech.com>
Tested-by: Nick Bowler <nbowler@elliptictech.com>
Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
CC: Jarek Poplawski <jarkao2@gmail.com>
CC: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/ip_output.c  | 19 +++++++++++++------
 net/ipv6/ip6_output.c | 18 +++++++++++++-----
 2 files changed, 26 insertions(+), 11 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index 04b69896df5f..7649d7750075 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -488,9 +488,8 @@ int ip_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *))
 	 * we can switch to copy when see the first bad fragment.
 	 */
 	if (skb_has_frags(skb)) {
-		struct sk_buff *frag;
+		struct sk_buff *frag, *frag2;
 		int first_len = skb_pagelen(skb);
-		int truesizes = 0;
 
 		if (first_len - hlen > mtu ||
 		    ((first_len - hlen) & 7) ||
@@ -503,18 +502,18 @@ int ip_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *))
 			if (frag->len > mtu ||
 			    ((frag->len & 7) && frag->next) ||
 			    skb_headroom(frag) < hlen)
-			    goto slow_path;
+				goto slow_path_clean;
 
 			/* Partially cloned skb? */
 			if (skb_shared(frag))
-				goto slow_path;
+				goto slow_path_clean;
 
 			BUG_ON(frag->sk);
 			if (skb->sk) {
 				frag->sk = skb->sk;
 				frag->destructor = sock_wfree;
 			}
-			truesizes += frag->truesize;
+			skb->truesize -= frag->truesize;
 		}
 
 		/* Everything is OK. Generate! */
@@ -524,7 +523,6 @@ int ip_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *))
 		frag = skb_shinfo(skb)->frag_list;
 		skb_frag_list_init(skb);
 		skb->data_len = first_len - skb_headlen(skb);
-		skb->truesize -= truesizes;
 		skb->len = first_len;
 		iph->tot_len = htons(first_len);
 		iph->frag_off = htons(IP_MF);
@@ -576,6 +574,15 @@ int ip_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *))
 		}
 		IP_INC_STATS(dev_net(dev), IPSTATS_MIB_FRAGFAILS);
 		return err;
+
+slow_path_clean:
+		skb_walk_frags(skb, frag2) {
+			if (frag2 == frag)
+				break;
+			frag2->sk = NULL;
+			frag2->destructor = NULL;
+			skb->truesize += frag2->truesize;
+		}
 	}
 
 slow_path:
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index d40b330c0ee6..980912ed7a38 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -639,7 +639,7 @@ static int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *))
 
 	if (skb_has_frags(skb)) {
 		int first_len = skb_pagelen(skb);
-		int truesizes = 0;
+		struct sk_buff *frag2;
 
 		if (first_len - hlen > mtu ||
 		    ((first_len - hlen) & 7) ||
@@ -651,18 +651,18 @@ static int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *))
 			if (frag->len > mtu ||
 			    ((frag->len & 7) && frag->next) ||
 			    skb_headroom(frag) < hlen)
-			    goto slow_path;
+				goto slow_path_clean;
 
 			/* Partially cloned skb? */
 			if (skb_shared(frag))
-				goto slow_path;
+				goto slow_path_clean;
 
 			BUG_ON(frag->sk);
 			if (skb->sk) {
 				frag->sk = skb->sk;
 				frag->destructor = sock_wfree;
-				truesizes += frag->truesize;
 			}
+			skb->truesize -= frag->truesize;
 		}
 
 		err = 0;
@@ -693,7 +693,6 @@ static int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *))
 
 		first_len = skb_pagelen(skb);
 		skb->data_len = first_len - skb_headlen(skb);
-		skb->truesize -= truesizes;
 		skb->len = first_len;
 		ipv6_hdr(skb)->payload_len = htons(first_len -
 						   sizeof(struct ipv6hdr));
@@ -756,6 +755,15 @@ static int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *))
 			      IPSTATS_MIB_FRAGFAILS);
 		dst_release(&rt->dst);
 		return err;
+
+slow_path_clean:
+		skb_walk_frags(skb, frag2) {
+			if (frag2 == frag)
+				break;
+			frag2->sk = NULL;
+			frag2->destructor = NULL;
+			skb->truesize += frag2->truesize;
+		}
 	}
 
 slow_path:
-- 
cgit v1.2.2


From d485d500cf6b13a33bc7a6c09091deea7ea603ca Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Tue, 21 Sep 2010 21:17:29 +0000
Subject: netfilter: tproxy: nf_tproxy_assign_sock() can handle tw sockets

transparent field of a socket is either inet_twsk(sk)->tw_transparent
for timewait sockets, or inet_sk(sk)->transparent for other sockets
(TCP/UDP).

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Acked-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/netfilter/nf_tproxy_core.c | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/netfilter/nf_tproxy_core.c b/net/netfilter/nf_tproxy_core.c
index 5490fc37c92d..daab8c4a903c 100644
--- a/net/netfilter/nf_tproxy_core.c
+++ b/net/netfilter/nf_tproxy_core.c
@@ -70,7 +70,11 @@ nf_tproxy_destructor(struct sk_buff *skb)
 int
 nf_tproxy_assign_sock(struct sk_buff *skb, struct sock *sk)
 {
-	if (inet_sk(sk)->transparent) {
+	bool transparent = (sk->sk_state == TCP_TIME_WAIT) ?
+				inet_twsk(sk)->tw_transparent :
+				inet_sk(sk)->transparent;
+
+	if (transparent) {
 		skb_orphan(skb);
 		skb->sk = sk;
 		skb->destructor = nf_tproxy_destructor;
-- 
cgit v1.2.2


From 7874896a26624214bd7c05eeba7c8ab01548b1b5 Mon Sep 17 00:00:00 2001
From: Simon Horman <horms@verge.net.au>
Date: Tue, 21 Sep 2010 21:17:30 +0000
Subject: netfilter: nf_ct_sip: default to NF_ACCEPT in sip_help_tcp()

I initially noticed this because of the compiler warning below, but it
does seem to be a valid concern in the case where ct_sip_get_header()
returns 0 in the first iteration of the while loop.

net/netfilter/nf_conntrack_sip.c: In function 'sip_help_tcp':
net/netfilter/nf_conntrack_sip.c:1379: warning: 'ret' may be used uninitialized in this function

Signed-off-by: Simon Horman <horms@verge.net.au>
[Patrick: changed NF_DROP to NF_ACCEPT]
Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/netfilter/nf_conntrack_sip.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/netfilter/nf_conntrack_sip.c b/net/netfilter/nf_conntrack_sip.c
index 53d892210a04..f64de9544866 100644
--- a/net/netfilter/nf_conntrack_sip.c
+++ b/net/netfilter/nf_conntrack_sip.c
@@ -1376,7 +1376,7 @@ static int sip_help_tcp(struct sk_buff *skb, unsigned int protoff,
 	unsigned int msglen, origlen;
 	const char *dptr, *end;
 	s16 diff, tdiff = 0;
-	int ret;
+	int ret = NF_ACCEPT;
 	typeof(nf_nat_sip_seq_adjust_hook) nf_nat_sip_seq_adjust;
 
 	if (ctinfo != IP_CT_ESTABLISHED &&
-- 
cgit v1.2.2


From b46ffb854554ff939701bdd492b81558da5706fc Mon Sep 17 00:00:00 2001
From: Changli Gao <xiaosuo@gmail.com>
Date: Tue, 21 Sep 2010 21:17:31 +0000
Subject: netfilter: fix ipt_REJECT TCP RST routing for indev == outdev

ip_route_me_harder can't create the route cache when the outdev is the same
with the indev for the skbs whichout a valid protocol set.

__mkroute_input functions has this check:
1998         if (skb->protocol != htons(ETH_P_IP)) {
1999                 /* Not IP (i.e. ARP). Do not create route, if it is
2000                  * invalid for proxy arp. DNAT routes are always valid.
2001                  *
2002                  * Proxy arp feature have been extended to allow, ARP
2003                  * replies back to the same interface, to support
2004                  * Private VLAN switch technologies. See arp.c.
2005                  */
2006                 if (out_dev == in_dev &&
2007                     IN_DEV_PROXY_ARP_PVLAN(in_dev) == 0) {
2008                         err = -EINVAL;
2009                         goto cleanup;
2010                 }
2011         }

This patch gives the new skb a valid protocol to bypass this check. In order
to make ipt_REJECT work with bridges, you also need to enable ip_forward.

This patch also fixes a regression. When we used skb_copy_expand(), we
didn't have this issue stated above, as the protocol was properly set.

Signed-off-by: Changli Gao <xiaosuo@gmail.com>
Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/netfilter/ipt_REJECT.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'net')

diff --git a/net/ipv4/netfilter/ipt_REJECT.c b/net/ipv4/netfilter/ipt_REJECT.c
index b254dafaf429..43eec80c0e7c 100644
--- a/net/ipv4/netfilter/ipt_REJECT.c
+++ b/net/ipv4/netfilter/ipt_REJECT.c
@@ -112,6 +112,7 @@ static void send_reset(struct sk_buff *oldskb, int hook)
 	/* ip_route_me_harder expects skb->dst to be set */
 	skb_dst_set_noref(nskb, skb_dst(oldskb));
 
+	nskb->protocol = htons(ETH_P_IP);
 	if (ip_route_me_harder(nskb, addr_type))
 		goto free_nskb;
 
-- 
cgit v1.2.2


From 15cdeadaa5d76009e20c7792aed69f5a73808f97 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Tue, 21 Sep 2010 21:17:32 +0000
Subject: netfilter: fix a race in nf_ct_ext_create()

As soon as rcu_read_unlock() is called, there is no guarantee current
thread can safely derefence t pointer, rcu protected.

Fix is to copy t->alloc_size in a temporary variable.

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Reviewed-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/netfilter/nf_conntrack_extend.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/netfilter/nf_conntrack_extend.c b/net/netfilter/nf_conntrack_extend.c
index 7dcf7a404190..8d9e4c949b96 100644
--- a/net/netfilter/nf_conntrack_extend.c
+++ b/net/netfilter/nf_conntrack_extend.c
@@ -48,15 +48,17 @@ nf_ct_ext_create(struct nf_ct_ext **ext, enum nf_ct_ext_id id, gfp_t gfp)
 {
 	unsigned int off, len;
 	struct nf_ct_ext_type *t;
+	size_t alloc_size;
 
 	rcu_read_lock();
 	t = rcu_dereference(nf_ct_ext_types[id]);
 	BUG_ON(t == NULL);
 	off = ALIGN(sizeof(struct nf_ct_ext), t->align);
 	len = off + t->len;
+	alloc_size = t->alloc_size;
 	rcu_read_unlock();
 
-	*ext = kzalloc(t->alloc_size, gfp);
+	*ext = kzalloc(alloc_size, gfp);
 	if (!*ext)
 		return NULL;
 
-- 
cgit v1.2.2


From d6120b8afacec587f5feb37781bc751bc5d68a10 Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Tue, 21 Sep 2010 21:17:33 +0000
Subject: netfilter: nf_nat_snmp: fix checksum calculation (v4)

Fix checksum calculation in nf_nat_snmp_basic.

Based on patches by Clark Wang <wtweeker@163.com> and
Stephen Hemminger <shemminger@vyatta.com>.

https://bugzilla.kernel.org/show_bug.cgi?id=17622

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/netfilter/nf_nat_snmp_basic.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/netfilter/nf_nat_snmp_basic.c b/net/ipv4/netfilter/nf_nat_snmp_basic.c
index 1679e2c0963d..ee5f419d0a56 100644
--- a/net/ipv4/netfilter/nf_nat_snmp_basic.c
+++ b/net/ipv4/netfilter/nf_nat_snmp_basic.c
@@ -893,13 +893,15 @@ static void fast_csum(__sum16 *csum,
 	unsigned char s[4];
 
 	if (offset & 1) {
-		s[0] = s[2] = 0;
+		s[0] = ~0;
 		s[1] = ~*optr;
+		s[2] = 0;
 		s[3] = *nptr;
 	} else {
-		s[1] = s[3] = 0;
 		s[0] = ~*optr;
+		s[1] = ~0;
 		s[2] = *nptr;
+		s[3] = 0;
 	}
 
 	*csum = csum_fold(csum_partial(s, 4, ~csum_unfold(*csum)));
-- 
cgit v1.2.2


From cbdd769ab9de26764bde0520a91536caa1587e13 Mon Sep 17 00:00:00 2001
From: Jiri Olsa <jolsa@redhat.com>
Date: Tue, 21 Sep 2010 21:17:34 +0000
Subject: netfilter: nf_conntrack_defrag: check socket type before touching
 nodefrag flag

we need to check proper socket type within ipv4_conntrack_defrag
function before referencing the nodefrag flag.

For example the tun driver receive path produces skbs with
AF_UNSPEC socket type, and so current code is causing unwanted
fragmented packets going out.

Signed-off-by: Jiri Olsa <jolsa@redhat.com>
Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/netfilter/nf_defrag_ipv4.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/ipv4/netfilter/nf_defrag_ipv4.c b/net/ipv4/netfilter/nf_defrag_ipv4.c
index eab8de32f200..f3a9b42b16c6 100644
--- a/net/ipv4/netfilter/nf_defrag_ipv4.c
+++ b/net/ipv4/netfilter/nf_defrag_ipv4.c
@@ -66,9 +66,11 @@ static unsigned int ipv4_conntrack_defrag(unsigned int hooknum,
 					  const struct net_device *out,
 					  int (*okfn)(struct sk_buff *))
 {
+	struct sock *sk = skb->sk;
 	struct inet_sock *inet = inet_sk(skb->sk);
 
-	if (inet && inet->nodefrag)
+	if (sk && (sk->sk_family == PF_INET) &&
+	    inet->nodefrag)
 		return NF_ACCEPT;
 
 #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
-- 
cgit v1.2.2


From 94e2238969e89f5112297ad2a00103089dde7e8f Mon Sep 17 00:00:00 2001
From: Ulrich Weber <uweber@astaro.com>
Date: Wed, 22 Sep 2010 06:45:11 +0000
Subject: xfrm4: strip ECN bits from tos field

otherwise ECT(1) bit will get interpreted as RTO_ONLINK
and routing will fail with XfrmOutBundleGenError.

Signed-off-by: Ulrich Weber <uweber@astaro.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/xfrm4_policy.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c
index 869078d4eeb9..a580349f0b8a 100644
--- a/net/ipv4/xfrm4_policy.c
+++ b/net/ipv4/xfrm4_policy.c
@@ -61,7 +61,7 @@ static int xfrm4_get_saddr(struct net *net,
 
 static int xfrm4_get_tos(struct flowi *fl)
 {
-	return fl->fl4_tos;
+	return IPTOS_RT_MASK & fl->fl4_tos; /* Strip ECN bits */
 }
 
 static int xfrm4_init_path(struct xfrm_dst *path, struct dst_entry *dst,
-- 
cgit v1.2.2


From cd87a2d3a33d75a646f1aa1aa2ee5bf712d6f963 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Fri, 24 Sep 2010 11:20:47 +0200
Subject: mac80211: fix use-after-free

commit 8c0c709eea5cbab97fb464cd68b06f24acc58ee1
Author: Johannes Berg <johannes@sipsolutions.net>
Date:   Wed Nov 25 17:46:15 2009 +0100

    mac80211: move cmntr flag out of rx flags

moved the CMTR flag into the skb's status, and
in doing so introduced a use-after-free -- when
the skb has been handed to cooked monitors the
status setting will touch now invalid memory.

Additionally, moving it there has effectively
discarded the optimisation -- since the bit is
only ever set on freed SKBs, and those were a
copy, it could never be checked.

For the current release, fixing this properly
is a bit too involved, so let's just remove the
problematic code and leave userspace with one
copy of each frame for each virtual interface.

Cc: stable@kernel.org [2.6.33+]
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/rx.c | 4 ----
 1 file changed, 4 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c
index fa0f37e4afe4..28624282c5f3 100644
--- a/net/mac80211/rx.c
+++ b/net/mac80211/rx.c
@@ -2199,9 +2199,6 @@ static void ieee80211_rx_cooked_monitor(struct ieee80211_rx_data *rx,
 	struct net_device *prev_dev = NULL;
 	struct ieee80211_rx_status *status = IEEE80211_SKB_RXCB(skb);
 
-	if (status->flag & RX_FLAG_INTERNAL_CMTR)
-		goto out_free_skb;
-
 	if (skb_headroom(skb) < sizeof(*rthdr) &&
 	    pskb_expand_head(skb, sizeof(*rthdr), 0, GFP_ATOMIC))
 		goto out_free_skb;
@@ -2260,7 +2257,6 @@ static void ieee80211_rx_cooked_monitor(struct ieee80211_rx_data *rx,
 	} else
 		goto out_free_skb;
 
-	status->flag |= RX_FLAG_INTERNAL_CMTR;
 	return;
 
  out_free_skb:
-- 
cgit v1.2.2


From f064af1e500a2bf4607706f0f458163bdb2a6ea5 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Wed, 22 Sep 2010 12:43:39 +0000
Subject: net: fix a lockdep splat

We have for each socket :

One spinlock (sk_slock.slock)
One rwlock (sk_callback_lock)

Possible scenarios are :

(A) (this is used in net/sunrpc/xprtsock.c)
read_lock(&sk->sk_callback_lock) (without blocking BH)
<BH>
spin_lock(&sk->sk_slock.slock);
...
read_lock(&sk->sk_callback_lock);
...

(B)
write_lock_bh(&sk->sk_callback_lock)
stuff
write_unlock_bh(&sk->sk_callback_lock)

(C)
spin_lock_bh(&sk->sk_slock)
...
write_lock_bh(&sk->sk_callback_lock)
stuff
write_unlock_bh(&sk->sk_callback_lock)
spin_unlock_bh(&sk->sk_slock)

This (C) case conflicts with (A) :

CPU1 [A]                         CPU2 [C]
read_lock(callback_lock)
<BH>                             spin_lock_bh(slock)
<wait to spin_lock(slock)>
                                 <wait to write_lock_bh(callback_lock)>

We have one problematic (C) use case in inet_csk_listen_stop() :

local_bh_disable();
bh_lock_sock(child); // spin_lock_bh(&sk->sk_slock)
WARN_ON(sock_owned_by_user(child));
...
sock_orphan(child); // write_lock_bh(&sk->sk_callback_lock)

lockdep is not happy with this, as reported by Tetsuo Handa

It seems only way to deal with this is to use read_lock_bh(callbacklock)
everywhere.

Thanks to Jarek for pointing a bug in my first attempt and suggesting
this solution.

Reported-by: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
Tested-by: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
CC: Jarek Poplawski <jarkao2@gmail.com>
Tested-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/sock.c       |  8 ++++----
 net/rds/tcp_connect.c |  4 ++--
 net/rds/tcp_listen.c  |  4 ++--
 net/rds/tcp_recv.c    |  4 ++--
 net/rds/tcp_send.c    |  4 ++--
 net/sunrpc/xprtsock.c | 28 ++++++++++++++--------------
 6 files changed, 26 insertions(+), 26 deletions(-)

(limited to 'net')

diff --git a/net/core/sock.c b/net/core/sock.c
index b05b9b6ddb87..ef30e9d286e7 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -1351,9 +1351,9 @@ int sock_i_uid(struct sock *sk)
 {
 	int uid;
 
-	read_lock(&sk->sk_callback_lock);
+	read_lock_bh(&sk->sk_callback_lock);
 	uid = sk->sk_socket ? SOCK_INODE(sk->sk_socket)->i_uid : 0;
-	read_unlock(&sk->sk_callback_lock);
+	read_unlock_bh(&sk->sk_callback_lock);
 	return uid;
 }
 EXPORT_SYMBOL(sock_i_uid);
@@ -1362,9 +1362,9 @@ unsigned long sock_i_ino(struct sock *sk)
 {
 	unsigned long ino;
 
-	read_lock(&sk->sk_callback_lock);
+	read_lock_bh(&sk->sk_callback_lock);
 	ino = sk->sk_socket ? SOCK_INODE(sk->sk_socket)->i_ino : 0;
-	read_unlock(&sk->sk_callback_lock);
+	read_unlock_bh(&sk->sk_callback_lock);
 	return ino;
 }
 EXPORT_SYMBOL(sock_i_ino);
diff --git a/net/rds/tcp_connect.c b/net/rds/tcp_connect.c
index c397524c039c..c519939e8da9 100644
--- a/net/rds/tcp_connect.c
+++ b/net/rds/tcp_connect.c
@@ -43,7 +43,7 @@ void rds_tcp_state_change(struct sock *sk)
 	struct rds_connection *conn;
 	struct rds_tcp_connection *tc;
 
-	read_lock(&sk->sk_callback_lock);
+	read_lock_bh(&sk->sk_callback_lock);
 	conn = sk->sk_user_data;
 	if (conn == NULL) {
 		state_change = sk->sk_state_change;
@@ -68,7 +68,7 @@ void rds_tcp_state_change(struct sock *sk)
 			break;
 	}
 out:
-	read_unlock(&sk->sk_callback_lock);
+	read_unlock_bh(&sk->sk_callback_lock);
 	state_change(sk);
 }
 
diff --git a/net/rds/tcp_listen.c b/net/rds/tcp_listen.c
index 975183fe6950..27844f231d10 100644
--- a/net/rds/tcp_listen.c
+++ b/net/rds/tcp_listen.c
@@ -114,7 +114,7 @@ void rds_tcp_listen_data_ready(struct sock *sk, int bytes)
 
 	rdsdebug("listen data ready sk %p\n", sk);
 
-	read_lock(&sk->sk_callback_lock);
+	read_lock_bh(&sk->sk_callback_lock);
 	ready = sk->sk_user_data;
 	if (ready == NULL) { /* check for teardown race */
 		ready = sk->sk_data_ready;
@@ -131,7 +131,7 @@ void rds_tcp_listen_data_ready(struct sock *sk, int bytes)
 		queue_work(rds_wq, &rds_tcp_listen_work);
 
 out:
-	read_unlock(&sk->sk_callback_lock);
+	read_unlock_bh(&sk->sk_callback_lock);
 	ready(sk, bytes);
 }
 
diff --git a/net/rds/tcp_recv.c b/net/rds/tcp_recv.c
index 1aba6878fa5d..e43797404102 100644
--- a/net/rds/tcp_recv.c
+++ b/net/rds/tcp_recv.c
@@ -324,7 +324,7 @@ void rds_tcp_data_ready(struct sock *sk, int bytes)
 
 	rdsdebug("data ready sk %p bytes %d\n", sk, bytes);
 
-	read_lock(&sk->sk_callback_lock);
+	read_lock_bh(&sk->sk_callback_lock);
 	conn = sk->sk_user_data;
 	if (conn == NULL) { /* check for teardown race */
 		ready = sk->sk_data_ready;
@@ -338,7 +338,7 @@ void rds_tcp_data_ready(struct sock *sk, int bytes)
 	if (rds_tcp_read_sock(conn, GFP_ATOMIC, KM_SOFTIRQ0) == -ENOMEM)
 		queue_delayed_work(rds_wq, &conn->c_recv_w, 0);
 out:
-	read_unlock(&sk->sk_callback_lock);
+	read_unlock_bh(&sk->sk_callback_lock);
 	ready(sk, bytes);
 }
 
diff --git a/net/rds/tcp_send.c b/net/rds/tcp_send.c
index a28b895ff0d1..2f012a07d94d 100644
--- a/net/rds/tcp_send.c
+++ b/net/rds/tcp_send.c
@@ -224,7 +224,7 @@ void rds_tcp_write_space(struct sock *sk)
 	struct rds_connection *conn;
 	struct rds_tcp_connection *tc;
 
-	read_lock(&sk->sk_callback_lock);
+	read_lock_bh(&sk->sk_callback_lock);
 	conn = sk->sk_user_data;
 	if (conn == NULL) {
 		write_space = sk->sk_write_space;
@@ -244,7 +244,7 @@ void rds_tcp_write_space(struct sock *sk)
 		queue_delayed_work(rds_wq, &conn->c_send_w, 0);
 
 out:
-	read_unlock(&sk->sk_callback_lock);
+	read_unlock_bh(&sk->sk_callback_lock);
 
 	/*
 	 * write_space is only called when data leaves tcp's send queue if
diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
index b6309db56226..fe9306bf10cc 100644
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c
@@ -800,7 +800,7 @@ static void xs_udp_data_ready(struct sock *sk, int len)
 	u32 _xid;
 	__be32 *xp;
 
-	read_lock(&sk->sk_callback_lock);
+	read_lock_bh(&sk->sk_callback_lock);
 	dprintk("RPC:       xs_udp_data_ready...\n");
 	if (!(xprt = xprt_from_sock(sk)))
 		goto out;
@@ -852,7 +852,7 @@ static void xs_udp_data_ready(struct sock *sk, int len)
  dropit:
 	skb_free_datagram(sk, skb);
  out:
-	read_unlock(&sk->sk_callback_lock);
+	read_unlock_bh(&sk->sk_callback_lock);
 }
 
 static inline void xs_tcp_read_fraghdr(struct rpc_xprt *xprt, struct xdr_skb_reader *desc)
@@ -1229,7 +1229,7 @@ static void xs_tcp_data_ready(struct sock *sk, int bytes)
 
 	dprintk("RPC:       xs_tcp_data_ready...\n");
 
-	read_lock(&sk->sk_callback_lock);
+	read_lock_bh(&sk->sk_callback_lock);
 	if (!(xprt = xprt_from_sock(sk)))
 		goto out;
 	if (xprt->shutdown)
@@ -1248,7 +1248,7 @@ static void xs_tcp_data_ready(struct sock *sk, int bytes)
 		read = tcp_read_sock(sk, &rd_desc, xs_tcp_data_recv);
 	} while (read > 0);
 out:
-	read_unlock(&sk->sk_callback_lock);
+	read_unlock_bh(&sk->sk_callback_lock);
 }
 
 /*
@@ -1301,7 +1301,7 @@ static void xs_tcp_state_change(struct sock *sk)
 {
 	struct rpc_xprt *xprt;
 
-	read_lock(&sk->sk_callback_lock);
+	read_lock_bh(&sk->sk_callback_lock);
 	if (!(xprt = xprt_from_sock(sk)))
 		goto out;
 	dprintk("RPC:       xs_tcp_state_change client %p...\n", xprt);
@@ -1313,7 +1313,7 @@ static void xs_tcp_state_change(struct sock *sk)
 
 	switch (sk->sk_state) {
 	case TCP_ESTABLISHED:
-		spin_lock_bh(&xprt->transport_lock);
+		spin_lock(&xprt->transport_lock);
 		if (!xprt_test_and_set_connected(xprt)) {
 			struct sock_xprt *transport = container_of(xprt,
 					struct sock_xprt, xprt);
@@ -1327,7 +1327,7 @@ static void xs_tcp_state_change(struct sock *sk)
 
 			xprt_wake_pending_tasks(xprt, -EAGAIN);
 		}
-		spin_unlock_bh(&xprt->transport_lock);
+		spin_unlock(&xprt->transport_lock);
 		break;
 	case TCP_FIN_WAIT1:
 		/* The client initiated a shutdown of the socket */
@@ -1365,7 +1365,7 @@ static void xs_tcp_state_change(struct sock *sk)
 		xs_sock_mark_closed(xprt);
 	}
  out:
-	read_unlock(&sk->sk_callback_lock);
+	read_unlock_bh(&sk->sk_callback_lock);
 }
 
 /**
@@ -1376,7 +1376,7 @@ static void xs_error_report(struct sock *sk)
 {
 	struct rpc_xprt *xprt;
 
-	read_lock(&sk->sk_callback_lock);
+	read_lock_bh(&sk->sk_callback_lock);
 	if (!(xprt = xprt_from_sock(sk)))
 		goto out;
 	dprintk("RPC:       %s client %p...\n"
@@ -1384,7 +1384,7 @@ static void xs_error_report(struct sock *sk)
 			__func__, xprt, sk->sk_err);
 	xprt_wake_pending_tasks(xprt, -EAGAIN);
 out:
-	read_unlock(&sk->sk_callback_lock);
+	read_unlock_bh(&sk->sk_callback_lock);
 }
 
 static void xs_write_space(struct sock *sk)
@@ -1416,13 +1416,13 @@ static void xs_write_space(struct sock *sk)
  */
 static void xs_udp_write_space(struct sock *sk)
 {
-	read_lock(&sk->sk_callback_lock);
+	read_lock_bh(&sk->sk_callback_lock);
 
 	/* from net/core/sock.c:sock_def_write_space */
 	if (sock_writeable(sk))
 		xs_write_space(sk);
 
-	read_unlock(&sk->sk_callback_lock);
+	read_unlock_bh(&sk->sk_callback_lock);
 }
 
 /**
@@ -1437,13 +1437,13 @@ static void xs_udp_write_space(struct sock *sk)
  */
 static void xs_tcp_write_space(struct sock *sk)
 {
-	read_lock(&sk->sk_callback_lock);
+	read_lock_bh(&sk->sk_callback_lock);
 
 	/* from net/core/stream.c:sk_stream_write_space */
 	if (sk_stream_wspace(sk) >= sk_stream_min_wspace(sk))
 		xs_write_space(sk);
 
-	read_unlock(&sk->sk_callback_lock);
+	read_unlock_bh(&sk->sk_callback_lock);
 }
 
 static void xs_udp_do_set_buffer_size(struct rpc_xprt *xprt)
-- 
cgit v1.2.2


From a3d6713fbd2ccb50898a6f88664da96a7857c039 Mon Sep 17 00:00:00 2001
From: Karl Hiramoto <karl@hiramoto.org>
Date: Thu, 23 Sep 2010 01:50:54 +0000
Subject: br2684: fix scheduling while atomic

You can't call atomic_notifier_chain_unregister() while in atomic context.

Fix, call un/register_atmdevice_notifier in module __init and __exit.

Bug report:
http://comments.gmane.org/gmane.linux.network/172603

Reported-by: Mikko Vinni <mmvinni@yahoo.com>
Tested-by: Mikko Vinni <mmvinni@yahoo.com>
Signed-off-by: Karl Hiramoto <karl@hiramoto.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/atm/br2684.c | 12 ++----------
 1 file changed, 2 insertions(+), 10 deletions(-)

(limited to 'net')

diff --git a/net/atm/br2684.c b/net/atm/br2684.c
index 651babdfab38..ad2b232a2055 100644
--- a/net/atm/br2684.c
+++ b/net/atm/br2684.c
@@ -399,12 +399,6 @@ static void br2684_push(struct atm_vcc *atmvcc, struct sk_buff *skb)
 			unregister_netdev(net_dev);
 			free_netdev(net_dev);
 		}
-		read_lock_irq(&devs_lock);
-		if (list_empty(&br2684_devs)) {
-			/* last br2684 device */
-			unregister_atmdevice_notifier(&atm_dev_notifier);
-		}
-		read_unlock_irq(&devs_lock);
 		return;
 	}
 
@@ -675,7 +669,6 @@ static int br2684_create(void __user *arg)
 
 	if (list_empty(&br2684_devs)) {
 		/* 1st br2684 device */
-		register_atmdevice_notifier(&atm_dev_notifier);
 		brdev->number = 1;
 	} else
 		brdev->number = BRPRIV(list_entry_brdev(br2684_devs.prev))->number + 1;
@@ -815,6 +808,7 @@ static int __init br2684_init(void)
 		return -ENOMEM;
 #endif
 	register_atm_ioctl(&br2684_ioctl_ops);
+	register_atmdevice_notifier(&atm_dev_notifier);
 	return 0;
 }
 
@@ -830,9 +824,7 @@ static void __exit br2684_exit(void)
 #endif
 
 
-	/* if not already empty */
-	if (!list_empty(&br2684_devs))
-		unregister_atmdevice_notifier(&atm_dev_notifier);
+	unregister_atmdevice_notifier(&atm_dev_notifier);
 
 	while (!list_empty(&br2684_devs)) {
 		net_dev = list_entry_brdev(br2684_devs.next);
-- 
cgit v1.2.2


From 2cc6d2bf3d6195fabcf0febc192c01f99519a8f3 Mon Sep 17 00:00:00 2001
From: Neil Horman <nhorman@tuxdriver.com>
Date: Fri, 24 Sep 2010 09:55:52 +0000
Subject: ipv6: add a missing unregister_pernet_subsys call

Clean up a missing exit path in the ipv6 module init routines.  In
addrconf_init we call ipv6_addr_label_init which calls register_pernet_subsys
for the ipv6_addr_label_ops structure.  But if module loading fails, or if the
ipv6 module is removed, there is no corresponding unregister_pernet_subsys call,
which leaves a now-bogus address on the pernet_list, leading to oopses in
subsequent registrations.  This patch cleans up both the failed load path and
the unload path.  Tested by myself with good results.

Signed-off-by: Neil Horman <nhorman@tuxdriver.com>

 include/net/addrconf.h |    1 +
 net/ipv6/addrconf.c    |   11 ++++++++---
 net/ipv6/addrlabel.c   |    5 +++++
 3 files changed, 14 insertions(+), 3 deletions(-)
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/addrconf.c  | 11 ++++++++---
 net/ipv6/addrlabel.c |  5 +++++
 2 files changed, 13 insertions(+), 3 deletions(-)

(limited to 'net')

diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index ab70a3fbcafa..324fac3b6c16 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -4637,10 +4637,12 @@ int __init addrconf_init(void)
 	if (err < 0) {
 		printk(KERN_CRIT "IPv6 Addrconf:"
 		       " cannot initialize default policy table: %d.\n", err);
-		return err;
+		goto out;
 	}
 
-	register_pernet_subsys(&addrconf_ops);
+	err = register_pernet_subsys(&addrconf_ops);
+	if (err < 0)
+		goto out_addrlabel;
 
 	/* The addrconf netdev notifier requires that loopback_dev
 	 * has it's ipv6 private information allocated and setup
@@ -4692,7 +4694,9 @@ errout:
 	unregister_netdevice_notifier(&ipv6_dev_notf);
 errlo:
 	unregister_pernet_subsys(&addrconf_ops);
-
+out_addrlabel:
+	ipv6_addr_label_cleanup();
+out:
 	return err;
 }
 
@@ -4703,6 +4707,7 @@ void addrconf_cleanup(void)
 
 	unregister_netdevice_notifier(&ipv6_dev_notf);
 	unregister_pernet_subsys(&addrconf_ops);
+	ipv6_addr_label_cleanup();
 
 	rtnl_lock();
 
diff --git a/net/ipv6/addrlabel.c b/net/ipv6/addrlabel.c
index f0e774cea386..8175f802651b 100644
--- a/net/ipv6/addrlabel.c
+++ b/net/ipv6/addrlabel.c
@@ -393,6 +393,11 @@ int __init ipv6_addr_label_init(void)
 	return register_pernet_subsys(&ipv6_addr_label_ops);
 }
 
+void ipv6_addr_label_cleanup(void)
+{
+	unregister_pernet_subsys(&ipv6_addr_label_ops);
+}
+
 static const struct nla_policy ifal_policy[IFAL_MAX+1] = {
 	[IFAL_ADDRESS]		= { .len = sizeof(struct in6_addr), },
 	[IFAL_LABEL]		= { .len = sizeof(u32), },
-- 
cgit v1.2.2


From 1d6400c7c9cfd38976b25d55b357200ad3ff1be9 Mon Sep 17 00:00:00 2001
From: Davidlohr Bueso <dave@gnu.org>
Date: Mon, 13 Sep 2010 15:53:18 +0000
Subject: net/9p: fix memory handling/allocation in rdma_request()

Return -ENOMEM when erroring on kmalloc and fix memory leaks when returning on error.

Signed-off-by: Davidlohr Bueso <dave@gnu.org>
Reviewed-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
Signed-off-by: Eric Van Hensbergen <ericvh@gmail.com>
---
 net/9p/trans_rdma.c | 29 ++++++++++++++++++-----------
 1 file changed, 18 insertions(+), 11 deletions(-)

(limited to 'net')

diff --git a/net/9p/trans_rdma.c b/net/9p/trans_rdma.c
index 0ea20c30466c..17c5ba7551a5 100644
--- a/net/9p/trans_rdma.c
+++ b/net/9p/trans_rdma.c
@@ -426,8 +426,10 @@ static int rdma_request(struct p9_client *client, struct p9_req_t *req)
 
 	/* Allocate an fcall for the reply */
 	rpl_context = kmalloc(sizeof *rpl_context, GFP_KERNEL);
-	if (!rpl_context)
+	if (!rpl_context) {
+		err = -ENOMEM;
 		goto err_close;
+	}
 
 	/*
 	 * If the request has a buffer, steal it, otherwise
@@ -445,8 +447,8 @@ static int rdma_request(struct p9_client *client, struct p9_req_t *req)
 	}
 	rpl_context->rc = req->rc;
 	if (!rpl_context->rc) {
-		kfree(rpl_context);
-		goto err_close;
+		err = -ENOMEM;
+		goto err_free2;
 	}
 
 	/*
@@ -458,11 +460,8 @@ static int rdma_request(struct p9_client *client, struct p9_req_t *req)
 	 */
 	if (atomic_inc_return(&rdma->rq_count) <= rdma->rq_depth) {
 		err = post_recv(client, rpl_context);
-		if (err) {
-			kfree(rpl_context->rc);
-			kfree(rpl_context);
-			goto err_close;
-		}
+		if (err)
+			goto err_free1;
 	} else
 		atomic_dec(&rdma->rq_count);
 
@@ -471,8 +470,10 @@ static int rdma_request(struct p9_client *client, struct p9_req_t *req)
 
 	/* Post the request */
 	c = kmalloc(sizeof *c, GFP_KERNEL);
-	if (!c)
-		goto err_close;
+	if (!c) {
+		err = -ENOMEM;
+		goto err_free1;
+	}
 	c->req = req;
 
 	c->busa = ib_dma_map_single(rdma->cm_id->device,
@@ -499,9 +500,15 @@ static int rdma_request(struct p9_client *client, struct p9_req_t *req)
 	return ib_post_send(rdma->qp, &wr, &bad_wr);
 
  error:
+	kfree(c);
+	kfree(rpl_context->rc);
+	kfree(rpl_context);
 	P9_DPRINTK(P9_DEBUG_ERROR, "EIO\n");
 	return -EIO;
-
+ err_free1:
+	kfree(rpl_context->rc);
+ err_free2:
+	kfree(rpl_context);
  err_close:
 	spin_lock_irqsave(&rdma->req_lock, flags);
 	if (rdma->state < P9_RDMA_CLOSING) {
-- 
cgit v1.2.2


From b3de7559afbb7a8a35b4be975a6adf6c5e3cdca0 Mon Sep 17 00:00:00 2001
From: Yuchung Cheng <ycheng@google.com>
Date: Fri, 24 Sep 2010 13:22:06 +0000
Subject: tcp: fix TSO FACK loss marking in tcp_mark_head_lost
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

When TCP uses FACK algorithm to mark lost packets in
tcp_mark_head_lost(), if the number of packets in the (TSO) skb is
greater than the number of packets that should be marked lost, TCP
incorrectly exits the loop and marks no packets lost in the skb. This
underestimates tp->lost_out and affects the recovery/retransmission.
This patch fargments the skb and marks the correct amount of packets
lost.

Signed-off-by: Yuchung Cheng <ycheng@google.com>
Acked-by: Ilpo Järvinen <ilpo.jarvinen@helsinki.fi>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp_input.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 149e79ac2891..b55f60f6fcbe 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -2545,7 +2545,8 @@ static void tcp_mark_head_lost(struct sock *sk, int packets)
 			cnt += tcp_skb_pcount(skb);
 
 		if (cnt > packets) {
-			if (tcp_is_sack(tp) || (oldcnt >= packets))
+			if ((tcp_is_sack(tp) && !tcp_is_fack(tp)) ||
+			    (oldcnt >= packets))
 				break;
 
 			mss = skb_shinfo(skb)->gso_size;
-- 
cgit v1.2.2


From 7e1b33e5ea392dfc984fc63b76ca75acbf249dcd Mon Sep 17 00:00:00 2001
From: Ulrich Weber <uweber@astaro.com>
Date: Mon, 27 Sep 2010 15:02:18 -0700
Subject: ipv6: add IPv6 to neighbour table overflow warning

IPv4 and IPv6 have separate neighbour tables, so
the warning messages should be distinguishable.

[ Add a suitable message prefix on the ipv4 side as well -DaveM ]

Signed-off-by: Ulrich Weber <uweber@astaro.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/route.c | 2 +-
 net/ipv6/route.c | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 6298f75d5e93..ac6559cb54f9 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -1231,7 +1231,7 @@ restart:
 			}
 
 			if (net_ratelimit())
-				printk(KERN_WARNING "Neighbour table overflow.\n");
+				printk(KERN_WARNING "ipv4: Neighbour table overflow.\n");
 			rt_drop(rt);
 			return -ENOBUFS;
 		}
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index d126365ac046..8323136bdc54 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -670,7 +670,7 @@ static struct rt6_info *rt6_alloc_cow(struct rt6_info *ort, struct in6_addr *dad
 
 			if (net_ratelimit())
 				printk(KERN_WARNING
-				       "Neighbour table overflow.\n");
+				       "ipv6: Neighbour table overflow.\n");
 			dst_free(&rt->dst);
 			return NULL;
 		}
-- 
cgit v1.2.2


From 0b20406cda621c2495d10baab1e87127ceb43337 Mon Sep 17 00:00:00 2001
From: Sven Eckelmann <sven.eckelmann@gmx.de>
Date: Mon, 27 Sep 2010 15:54:44 -0700
Subject: net/9p: Mount only matching virtio channels

p9_virtio_create will only compare the the channel's tag characters
against the device name till the end of the channel's tag but not till
the end of the device name. This means that if a user defines channels
with the tags foo and foobar then he would mount foo when he requested
foonot and may mount foo when he requested foobar.

Thus it is necessary to check both string lengths against each other in
case of a successful partial string match.

Signed-off-by: Sven Eckelmann <sven.eckelmann@gmx.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/9p/trans_virtio.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/9p/trans_virtio.c b/net/9p/trans_virtio.c
index dcfbe99ff81c..b88515936e4b 100644
--- a/net/9p/trans_virtio.c
+++ b/net/9p/trans_virtio.c
@@ -329,7 +329,8 @@ p9_virtio_create(struct p9_client *client, const char *devname, char *args)
 
 	mutex_lock(&virtio_9p_lock);
 	list_for_each_entry(chan, &virtio_chan_list, chan_list) {
-		if (!strncmp(devname, chan->tag, chan->tag_len)) {
+		if (!strncmp(devname, chan->tag, chan->tag_len) &&
+		    strlen(devname) == chan->tag_len) {
 			if (!chan->inuse) {
 				chan->inuse = true;
 				found = 1;
-- 
cgit v1.2.2


From 01db403cf99f739f86903314a489fb420e0e254f Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Mon, 27 Sep 2010 20:24:54 -0700
Subject: tcp: Fix >4GB writes on 64-bit.
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Fixes kernel bugzilla #16603

tcp_sendmsg() truncates iov_len to an 'int' which a 4GB write to write
zero bytes, for example.

There is also the problem higher up of how verify_iovec() works.  It
wants to prevent the total length from looking like an error return
value.

However it does this using 'int', but syscalls return 'long' (and
thus signed 64-bit on 64-bit machines).  So it could trigger
false-positives on 64-bit as written.  So fix it to use 'long'.

Reported-by: Olaf Bonorden <bono@onlinehome.de>
Reported-by: Daniel Büse <dbuese@gmx.de>
Reported-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/iovec.c | 5 +++--
 net/ipv4/tcp.c   | 2 +-
 2 files changed, 4 insertions(+), 3 deletions(-)

(limited to 'net')

diff --git a/net/core/iovec.c b/net/core/iovec.c
index 1cd98df412df..e6b133b77ccb 100644
--- a/net/core/iovec.c
+++ b/net/core/iovec.c
@@ -35,9 +35,10 @@
  *	in any case.
  */
 
-int verify_iovec(struct msghdr *m, struct iovec *iov, struct sockaddr *address, int mode)
+long verify_iovec(struct msghdr *m, struct iovec *iov, struct sockaddr *address, int mode)
 {
-	int size, err, ct;
+	int size, ct;
+	long err;
 
 	if (m->msg_namelen) {
 		if (mode == VERIFY_READ) {
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 95d75d443927..f115ea68a4ef 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -943,7 +943,7 @@ int tcp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
 	sg = sk->sk_route_caps & NETIF_F_SG;
 
 	while (--iovlen >= 0) {
-		int seglen = iov->iov_len;
+		size_t seglen = iov->iov_len;
 		unsigned char __user *from = iov->iov_base;
 
 		iov++;
-- 
cgit v1.2.2


From 4d22f7d372f5769c6c0149e427ed6353e2dcfe61 Mon Sep 17 00:00:00 2001
From: Damian Lukowski <damian@tvk.rwth-aachen.de>
Date: Tue, 28 Sep 2010 13:08:32 -0700
Subject: net-2.6: SYN retransmits: Add new parameter to
 retransmits_timed_out()

Fixes kernel Bugzilla Bug 18952

This patch adds a syn_set parameter to the retransmits_timed_out()
routine and updates its callers. If not set, TCP_RTO_MIN is taken
as the calculation basis as before. If set, TCP_TIMEOUT_INIT is
used instead, so that sysctl_syn_retries represents the actual
amount of SYN retransmissions in case no SYNACKs are received when
establishing a new connection.

Signed-off-by: Damian Lukowski <damian@tvk.rwth-aachen.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp_timer.c | 24 ++++++++++++++----------
 1 file changed, 14 insertions(+), 10 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c
index c35b469e851c..74c54b30600f 100644
--- a/net/ipv4/tcp_timer.c
+++ b/net/ipv4/tcp_timer.c
@@ -135,13 +135,16 @@ static void tcp_mtu_probing(struct inet_connection_sock *icsk, struct sock *sk)
 
 /* This function calculates a "timeout" which is equivalent to the timeout of a
  * TCP connection after "boundary" unsuccessful, exponentially backed-off
- * retransmissions with an initial RTO of TCP_RTO_MIN.
+ * retransmissions with an initial RTO of TCP_RTO_MIN or TCP_TIMEOUT_INIT if
+ * syn_set flag is set.
  */
 static bool retransmits_timed_out(struct sock *sk,
-				  unsigned int boundary)
+				  unsigned int boundary,
+				  bool syn_set)
 {
 	unsigned int timeout, linear_backoff_thresh;
 	unsigned int start_ts;
+	unsigned int rto_base = syn_set ? TCP_TIMEOUT_INIT : TCP_RTO_MIN;
 
 	if (!inet_csk(sk)->icsk_retransmits)
 		return false;
@@ -151,12 +154,12 @@ static bool retransmits_timed_out(struct sock *sk,
 	else
 		start_ts = tcp_sk(sk)->retrans_stamp;
 
-	linear_backoff_thresh = ilog2(TCP_RTO_MAX/TCP_RTO_MIN);
+	linear_backoff_thresh = ilog2(TCP_RTO_MAX/rto_base);
 
 	if (boundary <= linear_backoff_thresh)
-		timeout = ((2 << boundary) - 1) * TCP_RTO_MIN;
+		timeout = ((2 << boundary) - 1) * rto_base;
 	else
-		timeout = ((2 << linear_backoff_thresh) - 1) * TCP_RTO_MIN +
+		timeout = ((2 << linear_backoff_thresh) - 1) * rto_base +
 			  (boundary - linear_backoff_thresh) * TCP_RTO_MAX;
 
 	return (tcp_time_stamp - start_ts) >= timeout;
@@ -167,14 +170,15 @@ static int tcp_write_timeout(struct sock *sk)
 {
 	struct inet_connection_sock *icsk = inet_csk(sk);
 	int retry_until;
-	bool do_reset;
+	bool do_reset, syn_set = 0;
 
 	if ((1 << sk->sk_state) & (TCPF_SYN_SENT | TCPF_SYN_RECV)) {
 		if (icsk->icsk_retransmits)
 			dst_negative_advice(sk);
 		retry_until = icsk->icsk_syn_retries ? : sysctl_tcp_syn_retries;
+		syn_set = 1;
 	} else {
-		if (retransmits_timed_out(sk, sysctl_tcp_retries1)) {
+		if (retransmits_timed_out(sk, sysctl_tcp_retries1, 0)) {
 			/* Black hole detection */
 			tcp_mtu_probing(icsk, sk);
 
@@ -187,14 +191,14 @@ static int tcp_write_timeout(struct sock *sk)
 
 			retry_until = tcp_orphan_retries(sk, alive);
 			do_reset = alive ||
-				   !retransmits_timed_out(sk, retry_until);
+				   !retransmits_timed_out(sk, retry_until, 0);
 
 			if (tcp_out_of_resources(sk, do_reset))
 				return 1;
 		}
 	}
 
-	if (retransmits_timed_out(sk, retry_until)) {
+	if (retransmits_timed_out(sk, retry_until, syn_set)) {
 		/* Has it gone just too far? */
 		tcp_write_err(sk);
 		return 1;
@@ -436,7 +440,7 @@ out_reset_timer:
 		icsk->icsk_rto = min(icsk->icsk_rto << 1, TCP_RTO_MAX);
 	}
 	inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, icsk->icsk_rto, TCP_RTO_MAX);
-	if (retransmits_timed_out(sk, sysctl_tcp_retries1 + 1))
+	if (retransmits_timed_out(sk, sysctl_tcp_retries1 + 1, 0))
 		__sk_dst_reset(sk);
 
 out:;
-- 
cgit v1.2.2


From 68c1f3a96c32a4fe15ebadae45c8145a5e5a66d2 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Tue, 28 Sep 2010 22:37:56 -0700
Subject: ip_gre: Fix dependencies wrt. ipv6.

The GRE tunnel driver needs to invoke icmpv6 helpers in the
ipv6 stack when ipv6 support is enabled.

Therefore if IPV6 is enabled, we have to enforce that GRE's
enabling (modular or static) matches that of ipv6.

Reported-by: Patrick McHardy <kaber@trash.net>
Reported-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/Kconfig | 1 +
 1 file changed, 1 insertion(+)

(limited to 'net')

diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig
index 571f8950ed06..72380a30d1c8 100644
--- a/net/ipv4/Kconfig
+++ b/net/ipv4/Kconfig
@@ -217,6 +217,7 @@ config NET_IPIP
 
 config NET_IPGRE
 	tristate "IP: GRE tunnels over IP"
+	depends on IPV6 || IPV6=n
 	help
 	  Tunneling means encapsulating data of one protocol type within
 	  another protocol and sending it over a channel that understands the
-- 
cgit v1.2.2


From a91e7d471e2e384035b9746ea707ccdcd353f5dd Mon Sep 17 00:00:00 2001
From: Kumar Sanghvi <kumar.sanghvi@stericsson.com>
Date: Mon, 27 Sep 2010 23:10:42 +0000
Subject: Phonet: Correct header retrieval after pskb_may_pull
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Retrieve the header after doing pskb_may_pull since, pskb_may_pull
could change the buffer structure.

This is based on the comment given by Eric Dumazet on Phonet
Pipe controller patch for a similar problem.

Signed-off-by: Kumar Sanghvi <kumar.sanghvi@stericsson.com>
Acked-by: Linus Walleij <linus.walleij@stericsson.com>
Acked-by: Eric Dumazet <eric.dumazet@gmail.com>
Acked-by: Rémi Denis-Courmont <remi.denis-courmont@nokia.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/phonet/pep.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/phonet/pep.c b/net/phonet/pep.c
index b2a3ae6cad78..15003021f4f0 100644
--- a/net/phonet/pep.c
+++ b/net/phonet/pep.c
@@ -225,12 +225,13 @@ static void pipe_grant_credits(struct sock *sk)
 static int pipe_rcv_status(struct sock *sk, struct sk_buff *skb)
 {
 	struct pep_sock *pn = pep_sk(sk);
-	struct pnpipehdr *hdr = pnp_hdr(skb);
+	struct pnpipehdr *hdr;
 	int wake = 0;
 
 	if (!pskb_may_pull(skb, sizeof(*hdr) + 4))
 		return -EINVAL;
 
+	hdr = pnp_hdr(skb);
 	if (hdr->data[0] != PN_PEP_TYPE_COMMON) {
 		LIMIT_NETDEBUG(KERN_DEBUG"Phonet unknown PEP type: %u\n",
 				(unsigned)hdr->data[0]);
-- 
cgit v1.2.2


From 8c462b6047da80491b8cb6be878e8bf9313ac3e1 Mon Sep 17 00:00:00 2001
From: Mat Martineau <mathewm@codeaurora.org>
Date: Tue, 24 Aug 2010 15:35:42 -0700
Subject: Bluetooth: Only enable L2CAP FCS for ERTM or streaming

This fixes a bug which caused the FCS setting to show L2CAP_FCS_CRC16
with L2CAP modes other than ERTM or streaming.  At present, this only
affects the FCS value shown with getsockopt() for basic mode.

Signed-off-by: Mat Martineau <mathewm@codeaurora.org>
Signed-off-by: Gustavo F. Padovan <padovan@profusion.mobi>
---
 net/bluetooth/l2cap.c | 19 +++++++++++++------
 1 file changed, 13 insertions(+), 6 deletions(-)

(limited to 'net')

diff --git a/net/bluetooth/l2cap.c b/net/bluetooth/l2cap.c
index fadf26b4ed7c..c7847035562b 100644
--- a/net/bluetooth/l2cap.c
+++ b/net/bluetooth/l2cap.c
@@ -3071,6 +3071,17 @@ static inline int l2cap_connect_rsp(struct l2cap_conn *conn, struct l2cap_cmd_hd
 	return 0;
 }
 
+static inline void set_default_fcs(struct l2cap_pinfo *pi)
+{
+	/* FCS is enabled only in ERTM or streaming mode, if one or both
+	 * sides request it.
+	 */
+	if (pi->mode != L2CAP_MODE_ERTM && pi->mode != L2CAP_MODE_STREAMING)
+		pi->fcs = L2CAP_FCS_NONE;
+	else if (!(pi->conf_state & L2CAP_CONF_NO_FCS_RECV))
+		pi->fcs = L2CAP_FCS_CRC16;
+}
+
 static inline int l2cap_config_req(struct l2cap_conn *conn, struct l2cap_cmd_hdr *cmd, u16 cmd_len, u8 *data)
 {
 	struct l2cap_conf_req *req = (struct l2cap_conf_req *) data;
@@ -3135,9 +3146,7 @@ static inline int l2cap_config_req(struct l2cap_conn *conn, struct l2cap_cmd_hdr
 		goto unlock;
 
 	if (l2cap_pi(sk)->conf_state & L2CAP_CONF_INPUT_DONE) {
-		if (!(l2cap_pi(sk)->conf_state & L2CAP_CONF_NO_FCS_RECV) ||
-		    l2cap_pi(sk)->fcs != L2CAP_FCS_NONE)
-			l2cap_pi(sk)->fcs = L2CAP_FCS_CRC16;
+		set_default_fcs(l2cap_pi(sk));
 
 		sk->sk_state = BT_CONNECTED;
 
@@ -3225,9 +3234,7 @@ static inline int l2cap_config_rsp(struct l2cap_conn *conn, struct l2cap_cmd_hdr
 	l2cap_pi(sk)->conf_state |= L2CAP_CONF_INPUT_DONE;
 
 	if (l2cap_pi(sk)->conf_state & L2CAP_CONF_OUTPUT_DONE) {
-		if (!(l2cap_pi(sk)->conf_state & L2CAP_CONF_NO_FCS_RECV) ||
-		    l2cap_pi(sk)->fcs != L2CAP_FCS_NONE)
-			l2cap_pi(sk)->fcs = L2CAP_FCS_CRC16;
+		set_default_fcs(l2cap_pi(sk));
 
 		sk->sk_state = BT_CONNECTED;
 		l2cap_pi(sk)->next_tx_seq = 0;
-- 
cgit v1.2.2


From 8183b775bc5b79b6b1e250019c9dd930554dfa94 Mon Sep 17 00:00:00 2001
From: Andrei Emeltchenko <andrei.emeltchenko@nokia.com>
Date: Wed, 1 Sep 2010 15:17:25 +0300
Subject: Bluetooth: fix MTU L2CAP configuration parameter

When receiving L2CAP negative configuration response with respect
to MTU parameter we modify wrong field. MTU here means proposed
value of MTU that the remote device intends to transmit. So for local
L2CAP socket it is pi->imtu.

Signed-off-by: Andrei Emeltchenko <andrei.emeltchenko@nokia.com>
Acked-by: Ville Tervo <ville.tervo@nokia.com>
Signed-off-by: Gustavo F. Padovan <padovan@profusion.mobi>
---
 net/bluetooth/l2cap.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'net')

diff --git a/net/bluetooth/l2cap.c b/net/bluetooth/l2cap.c
index c7847035562b..9fad312e53f2 100644
--- a/net/bluetooth/l2cap.c
+++ b/net/bluetooth/l2cap.c
@@ -2771,10 +2771,10 @@ static int l2cap_parse_conf_rsp(struct sock *sk, void *rsp, int len, void *data,
 		case L2CAP_CONF_MTU:
 			if (val < L2CAP_DEFAULT_MIN_MTU) {
 				*result = L2CAP_CONF_UNACCEPT;
-				pi->omtu = L2CAP_DEFAULT_MIN_MTU;
+				pi->imtu = L2CAP_DEFAULT_MIN_MTU;
 			} else
-				pi->omtu = val;
-			l2cap_add_conf_opt(&ptr, L2CAP_CONF_MTU, 2, pi->omtu);
+				pi->imtu = val;
+			l2cap_add_conf_opt(&ptr, L2CAP_CONF_MTU, 2, pi->imtu);
 			break;
 
 		case L2CAP_CONF_FLUSH_TO:
-- 
cgit v1.2.2


From ccbb84af28594e19fd4bf27ff2828c80d03b6081 Mon Sep 17 00:00:00 2001
From: "Gustavo F. Padovan" <padovan@profusion.mobi>
Date: Mon, 30 Aug 2010 18:44:44 -0300
Subject: Bluetooth: Simplify L2CAP Streaming mode sending

As we don't have any error control on the Streaming mode, i.e., we don't
need to keep a copy of the skb for later resending we don't need to
call skb_clone() on it.
Then we can go one further here, and dequeue the skb before sending it,
that also means we don't need to look to sk->sk_send_head anymore.

The patch saves memory and time when sending Streaming mode data, so
it is good to mainline.

Signed-off-by: Gustavo F. Padovan <padovan@profusion.mobi>
---
 net/bluetooth/l2cap.c | 24 +++++++-----------------
 1 file changed, 7 insertions(+), 17 deletions(-)

(limited to 'net')

diff --git a/net/bluetooth/l2cap.c b/net/bluetooth/l2cap.c
index 9fad312e53f2..f2062aace406 100644
--- a/net/bluetooth/l2cap.c
+++ b/net/bluetooth/l2cap.c
@@ -1441,33 +1441,23 @@ static inline void l2cap_do_send(struct sock *sk, struct sk_buff *skb)
 
 static void l2cap_streaming_send(struct sock *sk)
 {
-	struct sk_buff *skb, *tx_skb;
+	struct sk_buff *skb;
 	struct l2cap_pinfo *pi = l2cap_pi(sk);
 	u16 control, fcs;
 
-	while ((skb = sk->sk_send_head)) {
-		tx_skb = skb_clone(skb, GFP_ATOMIC);
-
-		control = get_unaligned_le16(tx_skb->data + L2CAP_HDR_SIZE);
+	while ((skb = skb_dequeue(TX_QUEUE(sk)))) {
+		control = get_unaligned_le16(skb->data + L2CAP_HDR_SIZE);
 		control |= pi->next_tx_seq << L2CAP_CTRL_TXSEQ_SHIFT;
-		put_unaligned_le16(control, tx_skb->data + L2CAP_HDR_SIZE);
+		put_unaligned_le16(control, skb->data + L2CAP_HDR_SIZE);
 
 		if (pi->fcs == L2CAP_FCS_CRC16) {
-			fcs = crc16(0, (u8 *)tx_skb->data, tx_skb->len - 2);
-			put_unaligned_le16(fcs, tx_skb->data + tx_skb->len - 2);
+			fcs = crc16(0, (u8 *)skb->data, skb->len - 2);
+			put_unaligned_le16(fcs, skb->data + skb->len - 2);
 		}
 
-		l2cap_do_send(sk, tx_skb);
+		l2cap_do_send(sk, skb);
 
 		pi->next_tx_seq = (pi->next_tx_seq + 1) % 64;
-
-		if (skb_queue_is_last(TX_QUEUE(sk), skb))
-			sk->sk_send_head = NULL;
-		else
-			sk->sk_send_head = skb_queue_next(TX_QUEUE(sk), skb);
-
-		skb = skb_dequeue(TX_QUEUE(sk));
-		kfree_skb(skb);
 	}
 }
 
-- 
cgit v1.2.2


From fad003b6c8e3d944d4453fd569b0702ef1af82b3 Mon Sep 17 00:00:00 2001
From: "Gustavo F. Padovan" <padovan@profusion.mobi>
Date: Sat, 14 Aug 2010 00:48:07 -0300
Subject: Bluetooth: Fix inconsistent lock state with RFCOMM

When receiving a rfcomm connection with the old dund deamon a
inconsistent lock state happens. That's because interrupts were already
disabled by l2cap_conn_start() when rfcomm_sk_state_change() try to lock
the spin_lock.

As result we may have a inconsistent lock state for l2cap_conn_start()
after rfcomm_sk_state_change() calls bh_lock_sock() and disable interrupts
as well.

[ 2833.151999]
[ 2833.151999] =================================
[ 2833.151999] [ INFO: inconsistent lock state ]
[ 2833.151999] 2.6.36-rc3 #2
[ 2833.151999] ---------------------------------
[ 2833.151999] inconsistent {IN-SOFTIRQ-W} -> {SOFTIRQ-ON-W} usage.
[ 2833.151999] krfcommd/2306 [HC0[0]:SC0[0]:HE1:SE1] takes:
[ 2833.151999]  (slock-AF_BLUETOOTH){+.?...}, at: [<ffffffffa00bcb56>] rfcomm_sk_state_change+0x46/0x170 [rfcomm]
[ 2833.151999] {IN-SOFTIRQ-W} state was registered at:
[ 2833.151999]   [<ffffffff81094346>] __lock_acquire+0x5b6/0x1560
[ 2833.151999]   [<ffffffff8109534a>] lock_acquire+0x5a/0x70
[ 2833.151999]   [<ffffffff81392b6c>] _raw_spin_lock+0x2c/0x40
[ 2833.151999]   [<ffffffffa00a5092>] l2cap_conn_start+0x92/0x640 [l2cap]
[ 2833.151999]   [<ffffffffa00a6a3f>] l2cap_sig_channel+0x6bf/0x1320 [l2cap]
[ 2833.151999]   [<ffffffffa00a9173>] l2cap_recv_frame+0x133/0x770 [l2cap]
[ 2833.151999]   [<ffffffffa00a997b>] l2cap_recv_acldata+0x1cb/0x390 [l2cap]
[ 2833.151999]   [<ffffffffa000db4b>] hci_rx_task+0x2ab/0x450 [bluetooth]
[ 2833.151999]   [<ffffffff8106b22b>] tasklet_action+0xcb/0xe0
[ 2833.151999]   [<ffffffff8106b91e>] __do_softirq+0xae/0x150
[ 2833.151999]   [<ffffffff8102bc0c>] call_softirq+0x1c/0x30
[ 2833.151999]   [<ffffffff8102ddb5>] do_softirq+0x75/0xb0
[ 2833.151999]   [<ffffffff8106b56d>] irq_exit+0x8d/0xa0
[ 2833.151999]   [<ffffffff8104484b>] smp_apic_timer_interrupt+0x6b/0xa0
[ 2833.151999]   [<ffffffff8102b6d3>] apic_timer_interrupt+0x13/0x20
[ 2833.151999]   [<ffffffff81029dfa>] cpu_idle+0x5a/0xb0
[ 2833.151999]   [<ffffffff81381ded>] rest_init+0xad/0xc0
[ 2833.151999]   [<ffffffff817ebc4d>] start_kernel+0x2dd/0x2e8
[ 2833.151999]   [<ffffffff817eb2e6>] x86_64_start_reservations+0xf6/0xfa
[ 2833.151999]   [<ffffffff817eb3ce>] x86_64_start_kernel+0xe4/0xeb
[ 2833.151999] irq event stamp: 731
[ 2833.151999] hardirqs last  enabled at (731): [<ffffffff8106b762>] local_bh_enable_ip+0x82/0xe0
[ 2833.151999] hardirqs last disabled at (729): [<ffffffff8106b93e>] __do_softirq+0xce/0x150
[ 2833.151999] softirqs last  enabled at (730): [<ffffffff8106b96e>] __do_softirq+0xfe/0x150
[ 2833.151999] softirqs last disabled at (711): [<ffffffff8102bc0c>] call_softirq+0x1c/0x30
[ 2833.151999]
[ 2833.151999] other info that might help us debug this:
[ 2833.151999] 2 locks held by krfcommd/2306:
[ 2833.151999]  #0:  (rfcomm_mutex){+.+.+.}, at: [<ffffffffa00bb744>] rfcomm_run+0x174/0xb20 [rfcomm]
[ 2833.151999]  #1:  (&(&d->lock)->rlock){+.+...}, at: [<ffffffffa00b9223>] rfcomm_dlc_accept+0x53/0x100 [rfcomm]
[ 2833.151999]
[ 2833.151999] stack backtrace:
[ 2833.151999] Pid: 2306, comm: krfcommd Tainted: G        W   2.6.36-rc3 #2
[ 2833.151999] Call Trace:
[ 2833.151999]  [<ffffffff810928e1>] print_usage_bug+0x171/0x180
[ 2833.151999]  [<ffffffff810936c3>] mark_lock+0x333/0x400
[ 2833.151999]  [<ffffffff810943ca>] __lock_acquire+0x63a/0x1560
[ 2833.151999]  [<ffffffff810948b5>] ? __lock_acquire+0xb25/0x1560
[ 2833.151999]  [<ffffffff8109534a>] lock_acquire+0x5a/0x70
[ 2833.151999]  [<ffffffffa00bcb56>] ? rfcomm_sk_state_change+0x46/0x170 [rfcomm]
[ 2833.151999]  [<ffffffff81392b6c>] _raw_spin_lock+0x2c/0x40
[ 2833.151999]  [<ffffffffa00bcb56>] ? rfcomm_sk_state_change+0x46/0x170 [rfcomm]
[ 2833.151999]  [<ffffffffa00bcb56>] rfcomm_sk_state_change+0x46/0x170 [rfcomm]
[ 2833.151999]  [<ffffffffa00b9239>] rfcomm_dlc_accept+0x69/0x100 [rfcomm]
[ 2833.151999]  [<ffffffffa00b9a49>] rfcomm_check_accept+0x59/0xd0 [rfcomm]
[ 2833.151999]  [<ffffffffa00bacab>] rfcomm_recv_frame+0x9fb/0x1320 [rfcomm]
[ 2833.151999]  [<ffffffff813932bb>] ? _raw_spin_unlock_irqrestore+0x3b/0x60
[ 2833.151999]  [<ffffffff81093acd>] ? trace_hardirqs_on_caller+0x13d/0x180
[ 2833.151999]  [<ffffffff81093b1d>] ? trace_hardirqs_on+0xd/0x10
[ 2833.151999]  [<ffffffffa00bb7f1>] rfcomm_run+0x221/0xb20 [rfcomm]
[ 2833.151999]  [<ffffffff813905e7>] ? schedule+0x287/0x780
[ 2833.151999]  [<ffffffffa00bb5d0>] ? rfcomm_run+0x0/0xb20 [rfcomm]
[ 2833.151999]  [<ffffffff81081026>] kthread+0x96/0xa0
[ 2833.151999]  [<ffffffff8102bb14>] kernel_thread_helper+0x4/0x10
[ 2833.151999]  [<ffffffff813936bc>] ? restore_args+0x0/0x30
[ 2833.151999]  [<ffffffff81080f90>] ? kthread+0x0/0xa0
[ 2833.151999]  [<ffffffff8102bb10>] ? kernel_thread_helper+0x0/0x10

Signed-off-by: Gustavo F. Padovan <padovan@profusion.mobi>
---
 net/bluetooth/rfcomm/sock.c | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'net')

diff --git a/net/bluetooth/rfcomm/sock.c b/net/bluetooth/rfcomm/sock.c
index 44a623275951..194b3a04cfd3 100644
--- a/net/bluetooth/rfcomm/sock.c
+++ b/net/bluetooth/rfcomm/sock.c
@@ -82,11 +82,14 @@ static void rfcomm_sk_data_ready(struct rfcomm_dlc *d, struct sk_buff *skb)
 static void rfcomm_sk_state_change(struct rfcomm_dlc *d, int err)
 {
 	struct sock *sk = d->owner, *parent;
+	unsigned long flags;
+
 	if (!sk)
 		return;
 
 	BT_DBG("dlc %p state %ld err %d", d, d->state, err);
 
+	local_irq_save(flags);
 	bh_lock_sock(sk);
 
 	if (err)
@@ -108,6 +111,7 @@ static void rfcomm_sk_state_change(struct rfcomm_dlc *d, int err)
 	}
 
 	bh_unlock_sock(sk);
+	local_irq_restore(flags);
 
 	if (parent && sock_flag(sk, SOCK_ZAPPED)) {
 		/* We have to drop DLC lock here, otherwise
-- 
cgit v1.2.2


From b0239c80fe89d5832a68a0f3121a9d5ec9fb763e Mon Sep 17 00:00:00 2001
From: "Gustavo F. Padovan" <padovan@profusion.mobi>
Date: Wed, 8 Sep 2010 14:59:44 -0300
Subject: Revert "Bluetooth: Don't accept ConfigReq if we aren't in the
 BT_CONFIG state"

This reverts commit 8cb8e6f1684be13b51f8429b15f39c140326b327.

That commit introduced a regression with the Bluetooth Profile Tuning
Suite(PTS), Reverting this make sure that L2CAP is in a qualificable
state.

Signed-off-by: Gustavo F. Padovan <padovan@profusion.mobi>
---
 net/bluetooth/l2cap.c | 8 +-------
 1 file changed, 1 insertion(+), 7 deletions(-)

(limited to 'net')

diff --git a/net/bluetooth/l2cap.c b/net/bluetooth/l2cap.c
index f2062aace406..44a8fb0d6c29 100644
--- a/net/bluetooth/l2cap.c
+++ b/net/bluetooth/l2cap.c
@@ -3089,14 +3089,8 @@ static inline int l2cap_config_req(struct l2cap_conn *conn, struct l2cap_cmd_hdr
 	if (!sk)
 		return -ENOENT;
 
-	if (sk->sk_state != BT_CONFIG) {
-		struct l2cap_cmd_rej rej;
-
-		rej.reason = cpu_to_le16(0x0002);
-		l2cap_send_cmd(conn, cmd->ident, L2CAP_COMMAND_REJ,
-				sizeof(rej), &rej);
+	if (sk->sk_state == BT_DISCONN)
 		goto unlock;
-	}
 
 	/* Reject if config buffer is too small. */
 	len = cmd_len - sizeof(*req);
-- 
cgit v1.2.2


From 173e79fb70a98b5b223f8dc09c22990d777bdd78 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Thu, 30 Sep 2010 02:16:44 +0000
Subject: vlan: dont drop packets from unknown vlans in promiscuous mode

Roger Luethi noticed packets for unknown VLANs getting silently dropped
even in promiscuous mode.

Check for promiscuous mode in __vlan_hwaccel_rx() and vlan_gro_common()
before drops.

As suggested by Patrick, mark such packets to have skb->pkt_type set to
PACKET_OTHERHOST to make sure they are dropped by IP stack.

Reported-by: Roger Luethi <rl@hellgate.ch>
Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
CC: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/8021q/vlan_core.c | 14 ++++++++++----
 1 file changed, 10 insertions(+), 4 deletions(-)

(limited to 'net')

diff --git a/net/8021q/vlan_core.c b/net/8021q/vlan_core.c
index 01ddb0472f86..0eb96f7e44be 100644
--- a/net/8021q/vlan_core.c
+++ b/net/8021q/vlan_core.c
@@ -24,8 +24,11 @@ int __vlan_hwaccel_rx(struct sk_buff *skb, struct vlan_group *grp,
 
 	if (vlan_dev)
 		skb->dev = vlan_dev;
-	else if (vlan_id)
-		goto drop;
+	else if (vlan_id) {
+		if (!(skb->dev->flags & IFF_PROMISC))
+			goto drop;
+		skb->pkt_type = PACKET_OTHERHOST;
+	}
 
 	return (polling ? netif_receive_skb(skb) : netif_rx(skb));
 
@@ -102,8 +105,11 @@ vlan_gro_common(struct napi_struct *napi, struct vlan_group *grp,
 
 	if (vlan_dev)
 		skb->dev = vlan_dev;
-	else if (vlan_id)
-		goto drop;
+	else if (vlan_id) {
+		if (!(skb->dev->flags & IFF_PROMISC))
+			goto drop;
+		skb->pkt_type = PACKET_OTHERHOST;
+	}
 
 	for (p = napi->gro_list; p; p = p->next) {
 		NAPI_GRO_CB(p)->same_flow =
-- 
cgit v1.2.2


From ae878ae280bea286ff2b1e1cb6e609dd8cb4501d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Maciej=20=C5=BBenczykowski?= <maze@google.com>
Date: Sun, 3 Oct 2010 14:49:00 -0700
Subject: net: Fix IPv6 PMTU disc. w/ asymmetric routes
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: Maciej Żenczykowski <maze@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/route.c | 28 ++++++++++++++++++++++++----
 1 file changed, 24 insertions(+), 4 deletions(-)

(limited to 'net')

diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 8323136bdc54..a275c6e1e25c 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -1556,14 +1556,13 @@ out:
  *	i.e. Path MTU discovery
  */
 
-void rt6_pmtu_discovery(struct in6_addr *daddr, struct in6_addr *saddr,
-			struct net_device *dev, u32 pmtu)
+static void rt6_do_pmtu_disc(struct in6_addr *daddr, struct in6_addr *saddr,
+			     struct net *net, u32 pmtu, int ifindex)
 {
 	struct rt6_info *rt, *nrt;
-	struct net *net = dev_net(dev);
 	int allfrag = 0;
 
-	rt = rt6_lookup(net, daddr, saddr, dev->ifindex, 0);
+	rt = rt6_lookup(net, daddr, saddr, ifindex, 0);
 	if (rt == NULL)
 		return;
 
@@ -1631,6 +1630,27 @@ out:
 	dst_release(&rt->dst);
 }
 
+void rt6_pmtu_discovery(struct in6_addr *daddr, struct in6_addr *saddr,
+			struct net_device *dev, u32 pmtu)
+{
+	struct net *net = dev_net(dev);
+
+	/*
+	 * RFC 1981 states that a node "MUST reduce the size of the packets it
+	 * is sending along the path" that caused the Packet Too Big message.
+	 * Since it's not possible in the general case to determine which
+	 * interface was used to send the original packet, we update the MTU
+	 * on the interface that will be used to send future packets. We also
+	 * update the MTU on the interface that received the Packet Too Big in
+	 * case the original packet was forced out that interface with
+	 * SO_BINDTODEVICE or similar. This is the next best thing to the
+	 * correct behaviour, which would be to update the MTU on all
+	 * interfaces.
+	 */
+	rt6_do_pmtu_disc(daddr, saddr, net, pmtu, 0);
+	rt6_do_pmtu_disc(daddr, saddr, net, pmtu, dev->ifindex);
+}
+
 /*
  *	Misc support functions
  */
-- 
cgit v1.2.2


From 482964e56e1320cb7952faa1932d8ecf59c4bf75 Mon Sep 17 00:00:00 2001
From: Nagendra Tomar <tomer_iisc@yahoo.com>
Date: Sat, 2 Oct 2010 23:45:06 +0000
Subject: net: Fix the condition passed to sk_wait_event()

This patch fixes the condition (3rd arg) passed to sk_wait_event() in
sk_stream_wait_memory(). The incorrect check in sk_stream_wait_memory()
causes the following soft lockup in tcp_sendmsg() when the global tcp
memory pool has exhausted.

>>> snip <<<

localhost kernel: BUG: soft lockup - CPU#3 stuck for 11s! [sshd:6429]
localhost kernel: CPU 3:
localhost kernel: RIP: 0010:[sk_stream_wait_memory+0xcd/0x200]  [sk_stream_wait_memory+0xcd/0x200] sk_stream_wait_memory+0xcd/0x200
localhost kernel:
localhost kernel: Call Trace:
localhost kernel:  [sk_stream_wait_memory+0x1b1/0x200] sk_stream_wait_memory+0x1b1/0x200
localhost kernel:  [<ffffffff802557c0>] autoremove_wake_function+0x0/0x40
localhost kernel:  [ipv6:tcp_sendmsg+0x6e6/0xe90] tcp_sendmsg+0x6e6/0xce0
localhost kernel:  [sock_aio_write+0x126/0x140] sock_aio_write+0x126/0x140
localhost kernel:  [xfs:do_sync_write+0xf1/0x130] do_sync_write+0xf1/0x130
localhost kernel:  [<ffffffff802557c0>] autoremove_wake_function+0x0/0x40
localhost kernel:  [hrtimer_start+0xe3/0x170] hrtimer_start+0xe3/0x170
localhost kernel:  [vfs_write+0x185/0x190] vfs_write+0x185/0x190
localhost kernel:  [sys_write+0x50/0x90] sys_write+0x50/0x90
localhost kernel:  [system_call+0x7e/0x83] system_call+0x7e/0x83

>>> snip <<<

What is happening is, that the sk_wait_event() condition passed from
sk_stream_wait_memory() evaluates to true for the case of tcp global memory
exhaustion. This is because both sk_stream_memory_free() and vm_wait are true
which causes sk_wait_event() to *not* call schedule_timeout().
Hence sk_stream_wait_memory() returns immediately to the caller w/o sleeping.
This causes the caller to again try allocation, which again fails and again
calls sk_stream_wait_memory(), and so on.

[ Bug introduced by commit c1cbe4b7ad0bc4b1d98ea708a3fecb7362aa4088
  ("[NET]: Avoid atomic xchg() for non-error case") -DaveM ]

Signed-off-by: Nagendra Singh Tomar <tomer_iisc@yahoo.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/stream.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'net')

diff --git a/net/core/stream.c b/net/core/stream.c
index d959e0f41528..f5df85dcd20b 100644
--- a/net/core/stream.c
+++ b/net/core/stream.c
@@ -141,10 +141,10 @@ int sk_stream_wait_memory(struct sock *sk, long *timeo_p)
 
 		set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
 		sk->sk_write_pending++;
-		sk_wait_event(sk, &current_timeo, !sk->sk_err &&
-						  !(sk->sk_shutdown & SEND_SHUTDOWN) &&
-						  sk_stream_memory_free(sk) &&
-						  vm_wait);
+		sk_wait_event(sk, &current_timeo, sk->sk_err ||
+						  (sk->sk_shutdown & SEND_SHUTDOWN) ||
+						  (sk_stream_memory_free(sk) &&
+						  !vm_wait));
 		sk->sk_write_pending--;
 
 		if (vm_wait) {
-- 
cgit v1.2.2


From c5d3557103f8bef81d7a150ab9cc970099cd58a2 Mon Sep 17 00:00:00 2001
From: Ben Hutchings <ben@decadent.org.uk>
Date: Sun, 3 Oct 2010 15:37:42 +0000
Subject: Revert "ipv4: Make INET_LRO a bool instead of tristate."

This reverts commit e81963b180ac502fda0326edf059b1e29cdef1a2.

LRO is now deprecated in favour of GRO, and only a few drivers use it,
so it is desirable to build it as a module in distribution kernels.

The original change to prevent building it as a module was made in an
attempt to avoid the case where some dependents are set to y and some
to m, and INET_LRO can be set to m rather than y.  However, the
Kconfig system will reliably set INET_LRO=y in this case.

Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/Kconfig | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig
index 72380a30d1c8..7cd7760144f7 100644
--- a/net/ipv4/Kconfig
+++ b/net/ipv4/Kconfig
@@ -413,7 +413,7 @@ config INET_XFRM_MODE_BEET
 	  If unsure, say Y.
 
 config INET_LRO
-	bool "Large Receive Offload (ipv4/tcp)"
+	tristate "Large Receive Offload (ipv4/tcp)"
 	default y
 	---help---
 	  Support for Large Receive Offload (ipv4/tcp).
-- 
cgit v1.2.2


From 5b7c84066733c5dfb0e4016d939757b38de189e4 Mon Sep 17 00:00:00 2001
From: David Stevens <dlstevens@us.ibm.com>
Date: Thu, 30 Sep 2010 14:29:40 +0000
Subject: ipv4: correct IGMP behavior on v3 query during v2-compatibility mode

A recent patch to allow IGMPv2 responses to IGMPv3 queries
bypasses length checks for valid query lengths, incorrectly
resets the v2_seen timer, and does not support IGMPv1.

The following patch responds with a v2 report as required
by IGMPv2 while correcting the other problems introduced
by the patch.

Signed-Off-By: David L Stevens <dlstevens@us.ibm.com>

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/igmp.c | 14 +++++++++++++-
 1 file changed, 13 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c
index 1fdcacd36ce7..2a4bb76f2132 100644
--- a/net/ipv4/igmp.c
+++ b/net/ipv4/igmp.c
@@ -834,7 +834,7 @@ static void igmp_heard_query(struct in_device *in_dev, struct sk_buff *skb,
 	int			mark = 0;
 
 
-	if (len == 8 || IGMP_V2_SEEN(in_dev)) {
+	if (len == 8) {
 		if (ih->code == 0) {
 			/* Alas, old v1 router presents here. */
 
@@ -856,6 +856,18 @@ static void igmp_heard_query(struct in_device *in_dev, struct sk_buff *skb,
 		igmpv3_clear_delrec(in_dev);
 	} else if (len < 12) {
 		return;	/* ignore bogus packet; freed by caller */
+	} else if (IGMP_V1_SEEN(in_dev)) {
+		/* This is a v3 query with v1 queriers present */
+		max_delay = IGMP_Query_Response_Interval;
+		group = 0;
+	} else if (IGMP_V2_SEEN(in_dev)) {
+		/* this is a v3 query with v2 queriers present;
+		 * Interpretation of the max_delay code is problematic here.
+		 * A real v2 host would use ih_code directly, while v3 has a
+		 * different encoding. We use the v3 encoding as more likely
+		 * to be intended in a v3 query.
+		 */
+		max_delay = IGMPV3_MRC(ih3->code)*(HZ/IGMP_TIMER_SCALE);
 	} else { /* v3 */
 		if (!pskb_may_pull(skb, sizeof(struct igmpv3_query)))
 			return;
-- 
cgit v1.2.2


From d7e0d19aa0fdd22819d35db551bd54c1bcf9c2aa Mon Sep 17 00:00:00 2001
From: Dan Rosenberg <dan.j.rosenberg@gmail.com>
Date: Fri, 1 Oct 2010 11:16:58 +0000
Subject: sctp: prevent reading out-of-bounds memory

Two user-controlled allocations in SCTP are subsequently dereferenced as
sockaddr structs, without checking if the dereferenced struct members fall
beyond the end of the allocated chunk.  There doesn't appear to be any
information leakage here based on how these members are used and
additional checking, but it's still worth fixing.

[akpm@linux-foundation.org: remove unfashionable newlines, fix gmail tab->space conversion]
Signed-off-by: Dan Rosenberg <dan.j.rosenberg@gmail.com>
Acked-by: Vlad Yasevich <vladislav.yasevich@hp.com>
Cc: David Miller <davem@davemloft.net>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sctp/socket.c | 13 ++++++++++++-
 1 file changed, 12 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index ca44917872d2..fbb70770ad05 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -916,6 +916,11 @@ SCTP_STATIC int sctp_setsockopt_bindx(struct sock* sk,
 	/* Walk through the addrs buffer and count the number of addresses. */
 	addr_buf = kaddrs;
 	while (walk_size < addrs_size) {
+		if (walk_size + sizeof(sa_family_t) > addrs_size) {
+			kfree(kaddrs);
+			return -EINVAL;
+		}
+
 		sa_addr = (struct sockaddr *)addr_buf;
 		af = sctp_get_af_specific(sa_addr->sa_family);
 
@@ -1002,9 +1007,13 @@ static int __sctp_connect(struct sock* sk,
 	/* Walk through the addrs buffer and count the number of addresses. */
 	addr_buf = kaddrs;
 	while (walk_size < addrs_size) {
+		if (walk_size + sizeof(sa_family_t) > addrs_size) {
+			err = -EINVAL;
+			goto out_free;
+		}
+
 		sa_addr = (union sctp_addr *)addr_buf;
 		af = sctp_get_af_specific(sa_addr->sa.sa_family);
-		port = ntohs(sa_addr->v4.sin_port);
 
 		/* If the address family is not supported or if this address
 		 * causes the address buffer to overflow return EINVAL.
@@ -1014,6 +1023,8 @@ static int __sctp_connect(struct sock* sk,
 			goto out_free;
 		}
 
+		port = ntohs(sa_addr->v4.sin_port);
+
 		/* Save current address so we can work with it */
 		memcpy(&to, sa_addr, af->sockaddr_len);
 
-- 
cgit v1.2.2


From 51e97a12bef19b7e43199fc153cf9bd5f2140362 Mon Sep 17 00:00:00 2001
From: Dan Rosenberg <drosenberg@vsecurity.com>
Date: Fri, 1 Oct 2010 11:51:47 +0000
Subject: sctp: Fix out-of-bounds reading in sctp_asoc_get_hmac()

The sctp_asoc_get_hmac() function iterates through a peer's hmac_ids
array and attempts to ensure that only a supported hmac entry is
returned.  The current code fails to do this properly - if the last id
in the array is out of range (greater than SCTP_AUTH_HMAC_ID_MAX), the
id integer remains set after exiting the loop, and the address of an
out-of-bounds entry will be returned and subsequently used in the parent
function, causing potentially ugly memory corruption.  This patch resets
the id integer to 0 on encountering an invalid id so that NULL will be
returned after finishing the loop if no valid ids are found.

Signed-off-by: Dan Rosenberg <drosenberg@vsecurity.com>
Acked-by: Vlad Yasevich <vladislav.yasevich@hp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sctp/auth.c | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/sctp/auth.c b/net/sctp/auth.c
index 86366390038a..ddbbf7c81fa1 100644
--- a/net/sctp/auth.c
+++ b/net/sctp/auth.c
@@ -543,16 +543,20 @@ struct sctp_hmac *sctp_auth_asoc_get_hmac(const struct sctp_association *asoc)
 		id = ntohs(hmacs->hmac_ids[i]);
 
 		/* Check the id is in the supported range */
-		if (id > SCTP_AUTH_HMAC_ID_MAX)
+		if (id > SCTP_AUTH_HMAC_ID_MAX) {
+			id = 0;
 			continue;
+		}
 
 		/* See is we support the id.  Supported IDs have name and
 		 * length fields set, so that we can allocated and use
 		 * them.  We can safely just check for name, for without the
 		 * name, we can't allocate the TFM.
 		 */
-		if (!sctp_hmac_list[id].hmac_name)
+		if (!sctp_hmac_list[id].hmac_name) {
+			id = 0;
 			continue;
+		}
 
 		break;
 	}
-- 
cgit v1.2.2


From eaa71b318c5ed0cd1ac3182a533471dc5edf372d Mon Sep 17 00:00:00 2001
From: "Gustavo F. Padovan" <padovan@profusion.mobi>
Date: Mon, 4 Oct 2010 19:28:52 -0300
Subject: Bluetooth: Disallow to change L2CAP_OPTIONS values when connected

L2CAP doesn't permit change like MTU, FCS, TxWindow values while the
connection is alive, we can only set that before the
connection/configuration process. That can lead to bugs in the L2CAP
operation.

Signed-off-by: Gustavo F. Padovan <padovan@profusion.mobi>
---
 net/bluetooth/l2cap.c | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'net')

diff --git a/net/bluetooth/l2cap.c b/net/bluetooth/l2cap.c
index 44a8fb0d6c29..0b54b7dd8401 100644
--- a/net/bluetooth/l2cap.c
+++ b/net/bluetooth/l2cap.c
@@ -1950,6 +1950,11 @@ static int l2cap_sock_setsockopt_old(struct socket *sock, int optname, char __us
 
 	switch (optname) {
 	case L2CAP_OPTIONS:
+		if (sk->sk_state == BT_CONNECTED) {
+			err = -EINVAL;
+			break;
+		}
+
 		opts.imtu     = l2cap_pi(sk)->imtu;
 		opts.omtu     = l2cap_pi(sk)->omtu;
 		opts.flush_to = l2cap_pi(sk)->flush_to;
-- 
cgit v1.2.2


From 4e18b3edf71f5d4ad653e3c2ff6560878e965f96 Mon Sep 17 00:00:00 2001
From: Dan Carpenter <error27@gmail.com>
Date: Mon, 4 Oct 2010 02:28:36 +0000
Subject: cls_u32: signedness bug

skb_headroom() is unsigned so "skb_headroom(skb) + toff" is also
unsigned and can't be less than zero.  This test was added in 66d50d25:
"u32: negative offset fix"  It was supposed to fix a regression.

Signed-off-by: Dan Carpenter <error27@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/cls_u32.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/sched/cls_u32.c b/net/sched/cls_u32.c
index 7416a5c73b2a..b0c2a82178af 100644
--- a/net/sched/cls_u32.c
+++ b/net/sched/cls_u32.c
@@ -137,7 +137,7 @@ next_knode:
 			int toff = off + key->off + (off2 & key->offmask);
 			__be32 *data, _data;
 
-			if (skb_headroom(skb) + toff < 0)
+			if (skb_headroom(skb) + toff > INT_MAX)
 				goto out;
 
 			data = skb_header_pointer(skb, toff, 4, &_data);
-- 
cgit v1.2.2


From 79315068f4560f3f7bd6e9790190dcb43059770c Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Mon, 4 Oct 2010 22:42:08 +0000
Subject: caif: fix two caif_connect() bugs

caif_connect() might dereference a netdevice after dev_put() it.

It also doesnt check dev_get_by_index() return value and could
dereference a NULL pointer.

Fix it, using RCU to avoid taking a reference.

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
CC: Sjur Braendeland <sjur.brandeland@stericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/caif/caif_socket.c | 21 +++++++++++++++------
 1 file changed, 15 insertions(+), 6 deletions(-)

(limited to 'net')

diff --git a/net/caif/caif_socket.c b/net/caif/caif_socket.c
index 8ce904786116..4bf28f25f368 100644
--- a/net/caif/caif_socket.c
+++ b/net/caif/caif_socket.c
@@ -827,6 +827,7 @@ static int caif_connect(struct socket *sock, struct sockaddr *uaddr,
 	long timeo;
 	int err;
 	int ifindex, headroom, tailroom;
+	unsigned int mtu;
 	struct net_device *dev;
 
 	lock_sock(sk);
@@ -896,15 +897,23 @@ static int caif_connect(struct socket *sock, struct sockaddr *uaddr,
 		cf_sk->sk.sk_state = CAIF_DISCONNECTED;
 		goto out;
 	}
-	dev = dev_get_by_index(sock_net(sk), ifindex);
+
+	err = -ENODEV;
+	rcu_read_lock();
+	dev = dev_get_by_index_rcu(sock_net(sk), ifindex);
+	if (!dev) {
+		rcu_read_unlock();
+		goto out;
+	}
 	cf_sk->headroom = LL_RESERVED_SPACE_EXTRA(dev, headroom);
+	mtu = dev->mtu;
+	rcu_read_unlock();
+
 	cf_sk->tailroom = tailroom;
-	cf_sk->maxframe = dev->mtu - (headroom + tailroom);
-	dev_put(dev);
+	cf_sk->maxframe = mtu - (headroom + tailroom);
 	if (cf_sk->maxframe < 1) {
-		pr_warning("CAIF: %s(): CAIF Interface MTU too small (%d)\n",
-			__func__, dev->mtu);
-		err = -ENODEV;
+		pr_warning("CAIF: %s(): CAIF Interface MTU too small (%u)\n",
+			   __func__, mtu);
 		goto out;
 	}
 
-- 
cgit v1.2.2


From 44271488b91c9eecf249e075a1805dd887e222d2 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Tue, 5 Oct 2010 21:40:33 +0200
Subject: mac80211: delete AddBA response timer

We never delete the addBA response timer, which
is typically fine, but if the station it belongs
to is deleted very quickly after starting the BA
session, before the peer had a chance to reply,
the timer may fire after the station struct has
been freed already. Therefore, we need to delete
the timer in a suitable spot -- best when the
session is being stopped (which will happen even
then) in which case the delete will be a no-op
most of the time.

I've reproduced the scenario and tested the fix.

This fixes the crash reported at
http://mid.gmane.org/4CAB6F96.6090701@candelatech.com

Cc: stable@kernel.org
Reported-by: Ben Greear <greearb@candelatech.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/agg-tx.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'net')

diff --git a/net/mac80211/agg-tx.c b/net/mac80211/agg-tx.c
index c893f236acea..8f23401832b7 100644
--- a/net/mac80211/agg-tx.c
+++ b/net/mac80211/agg-tx.c
@@ -175,6 +175,8 @@ int ___ieee80211_stop_tx_ba_session(struct sta_info *sta, u16 tid,
 
 	set_bit(HT_AGG_STATE_STOPPING, &tid_tx->state);
 
+	del_timer_sync(&tid_tx->addba_resp_timer);
+
 	/*
 	 * After this packets are no longer handed right through
 	 * to the driver but are put onto tid_tx->pending instead,
-- 
cgit v1.2.2


From 4efe7f51be508a13965f163006dcb32b38a914a3 Mon Sep 17 00:00:00 2001
From: "John W. Linville" <linville@tuxdriver.com>
Date: Thu, 7 Oct 2010 11:35:40 -0400
Subject: Revert "mac80211: use netif_receive_skb in ieee80211_tx_status
 callpath"

This reverts commit 5ed3bc7288487bd4f891f420a07319e0b538b4fe.

It turns-out that not all drivers are calling ieee80211_tx_status from a
compatible context.  Revert this for now and try again later...

Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/status.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/status.c b/net/mac80211/status.c
index 10caec5ea8fa..34da67995d94 100644
--- a/net/mac80211/status.c
+++ b/net/mac80211/status.c
@@ -377,7 +377,7 @@ void ieee80211_tx_status(struct ieee80211_hw *hw, struct sk_buff *skb)
 				skb2 = skb_clone(skb, GFP_ATOMIC);
 				if (skb2) {
 					skb2->dev = prev_dev;
-					netif_receive_skb(skb2);
+					netif_rx(skb2);
 				}
 			}
 
@@ -386,7 +386,7 @@ void ieee80211_tx_status(struct ieee80211_hw *hw, struct sk_buff *skb)
 	}
 	if (prev_dev) {
 		skb->dev = prev_dev;
-		netif_receive_skb(skb);
+		netif_rx(skb);
 		skb = NULL;
 	}
 	rcu_read_unlock();
-- 
cgit v1.2.2


From ae6df5f96a51818d6376da5307d773baeece4014 Mon Sep 17 00:00:00 2001
From: Kees Cook <kees.cook@canonical.com>
Date: Thu, 7 Oct 2010 10:03:48 +0000
Subject: net: clear heap allocation for ETHTOOL_GRXCLSRLALL

Calling ETHTOOL_GRXCLSRLALL with a large rule_cnt will allocate kernel
heap without clearing it. For the one driver (niu) that implements it,
it will leave the unused portion of heap unchanged and copy the full
contents back to userspace.

Signed-off-by: Kees Cook <kees.cook@canonical.com>
Acked-by: Ben Hutchings <bhutchings@solarflare.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/ethtool.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/core/ethtool.c b/net/core/ethtool.c
index 7a85367b3c2f..4016ac6bdd5e 100644
--- a/net/core/ethtool.c
+++ b/net/core/ethtool.c
@@ -348,7 +348,7 @@ static noinline_for_stack int ethtool_get_rxnfc(struct net_device *dev,
 	if (info.cmd == ETHTOOL_GRXCLSRLALL) {
 		if (info.rule_cnt > 0) {
 			if (info.rule_cnt <= KMALLOC_MAX_SIZE / sizeof(u32))
-				rule_buf = kmalloc(info.rule_cnt * sizeof(u32),
+				rule_buf = kzalloc(info.rule_cnt * sizeof(u32),
 						   GFP_USER);
 			if (!rule_buf)
 				return -ENOMEM;
-- 
cgit v1.2.2


From 5518b29f225dbdf47ded02cf229ff8225a2cdf82 Mon Sep 17 00:00:00 2001
From: Jiri Slaby <jslaby@suse.cz>
Date: Sun, 10 Oct 2010 22:46:34 +0000
Subject: ATM: mpc, fix use after free

Stanse found that mpc_push frees skb and then it dereferences it. It
is a typo, new_skb should be dereferenced there.

Signed-off-by: Jiri Slaby <jslaby@suse.cz>
Cc: Eric Dumazet <eric.dumazet@gmail.com>
Acked-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/atm/mpc.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/atm/mpc.c b/net/atm/mpc.c
index 622b471e14e0..74bcc662c3dd 100644
--- a/net/atm/mpc.c
+++ b/net/atm/mpc.c
@@ -778,7 +778,7 @@ static void mpc_push(struct atm_vcc *vcc, struct sk_buff *skb)
 	eg->packets_rcvd++;
 	mpc->eg_ops->put(eg);
 
-	memset(ATM_SKB(skb), 0, sizeof(struct atm_skb_data));
+	memset(ATM_SKB(new_skb), 0, sizeof(struct atm_skb_data));
 	netif_rx(new_skb);
 }
 
-- 
cgit v1.2.2


From b00916b189d13a615ff05c9242201135992fcda3 Mon Sep 17 00:00:00 2001
From: Kees Cook <kees.cook@canonical.com>
Date: Mon, 11 Oct 2010 12:23:25 -0700
Subject: net: clear heap allocations for privileged ethtool actions

Several other ethtool functions leave heap uncleared (potentially) by
drivers. Some interfaces appear safe (eeprom, etc), in that the sizes
are well controlled. In some situations (e.g. unchecked error conditions),
the heap will remain unchanged in areas before copying back to userspace.
Note that these are less of an issue since these all require CAP_NET_ADMIN.

Cc: stable@kernel.org
Signed-off-by: Kees Cook <kees.cook@canonical.com>
Acked-by: Ben Hutchings <bhutchings@solarflare.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/ethtool.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'net')

diff --git a/net/core/ethtool.c b/net/core/ethtool.c
index 4016ac6bdd5e..8451ab481095 100644
--- a/net/core/ethtool.c
+++ b/net/core/ethtool.c
@@ -397,7 +397,7 @@ static noinline_for_stack int ethtool_get_rxfh_indir(struct net_device *dev,
 	    (KMALLOC_MAX_SIZE - sizeof(*indir)) / sizeof(*indir->ring_index))
 		return -ENOMEM;
 	full_size = sizeof(*indir) + sizeof(*indir->ring_index) * table_size;
-	indir = kmalloc(full_size, GFP_USER);
+	indir = kzalloc(full_size, GFP_USER);
 	if (!indir)
 		return -ENOMEM;
 
@@ -538,7 +538,7 @@ static int ethtool_get_rx_ntuple(struct net_device *dev, void __user *useraddr)
 
 	gstrings.len = ret;
 
-	data = kmalloc(gstrings.len * ETH_GSTRING_LEN, GFP_USER);
+	data = kzalloc(gstrings.len * ETH_GSTRING_LEN, GFP_USER);
 	if (!data)
 		return -ENOMEM;
 
@@ -775,7 +775,7 @@ static int ethtool_get_regs(struct net_device *dev, char __user *useraddr)
 	if (regs.len > reglen)
 		regs.len = reglen;
 
-	regbuf = kmalloc(reglen, GFP_USER);
+	regbuf = kzalloc(reglen, GFP_USER);
 	if (!regbuf)
 		return -ENOMEM;
 
-- 
cgit v1.2.2


From 799c10559d60f159ab2232203f222f18fa3c4a5f Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Fri, 15 Oct 2010 11:09:28 -0700
Subject: De-pessimize rds_page_copy_user

Don't try to "optimize" rds_page_copy_user() by using kmap_atomic() and
the unsafe atomic user mode accessor functions.  It's actually slower
than the straightforward code on any reasonable modern CPU.

Back when the code was written (although probably not by the time it was
actually merged, though), 32-bit x86 may have been the dominant
architecture.  And there kmap_atomic() can be a lot faster than kmap()
(unless you have very good locality, in which case the virtual address
caching by kmap() can overcome all the downsides).

But these days, x86-64 may not be more populous, but it's getting there
(and if you care about performance, it's definitely already there -
you'd have upgraded your CPU's already in the last few years).  And on
x86-64, the non-kmap_atomic() version is faster, simply because the code
is simpler and doesn't have the "re-try page fault" case.

People with old hardware are not likely to care about RDS anyway, and
the optimization for the 32-bit case is simply buggy, since it doesn't
verify the user addresses properly.

Reported-by: Dan Rosenberg <drosenberg@vsecurity.com>
Acked-by: Andrew Morton <akpm@linux-foundation.org>
Cc: stable@kernel.org
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 net/rds/page.c | 27 +++++++--------------------
 1 file changed, 7 insertions(+), 20 deletions(-)

(limited to 'net')

diff --git a/net/rds/page.c b/net/rds/page.c
index 595a952d4b17..1dfbfea12e9b 100644
--- a/net/rds/page.c
+++ b/net/rds/page.c
@@ -57,30 +57,17 @@ int rds_page_copy_user(struct page *page, unsigned long offset,
 	unsigned long ret;
 	void *addr;
 
-	if (to_user)
+	addr = kmap(page);
+	if (to_user) {
 		rds_stats_add(s_copy_to_user, bytes);
-	else
+		ret = copy_to_user(ptr, addr + offset, bytes);
+	} else {
 		rds_stats_add(s_copy_from_user, bytes);
-
-	addr = kmap_atomic(page, KM_USER0);
-	if (to_user)
-		ret = __copy_to_user_inatomic(ptr, addr + offset, bytes);
-	else
-		ret = __copy_from_user_inatomic(addr + offset, ptr, bytes);
-	kunmap_atomic(addr, KM_USER0);
-
-	if (ret) {
-		addr = kmap(page);
-		if (to_user)
-			ret = copy_to_user(ptr, addr + offset, bytes);
-		else
-			ret = copy_from_user(addr + offset, ptr, bytes);
-		kunmap(page);
-		if (ret)
-			return -EFAULT;
+		ret = copy_from_user(addr + offset, ptr, bytes);
 	}
+	kunmap(page);
 
-	return 0;
+	return ret ? -EFAULT : 0;
 }
 EXPORT_SYMBOL_GPL(rds_page_copy_user);
 
-- 
cgit v1.2.2