From e12822e1d3fface0d9e1095c5177e10141bd6bd6 Mon Sep 17 00:00:00 2001 From: "Luis R. Rodriguez" Date: Mon, 4 Jan 2010 11:37:39 -0500 Subject: cfg80211: fix syntax error on user regulatory hints This fixes a syntax error when setting up the user regulatory hint. This change yields the same exact binary object though so it ends up just being a syntax typo fix, fortunately. Cc: stable@kernel.org Signed-off-by: Luis R. Rodriguez Signed-off-by: John W. Linville --- net/wireless/reg.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/wireless/reg.c b/net/wireless/reg.c index baa898add287..7a0754c92df4 100644 --- a/net/wireless/reg.c +++ b/net/wireless/reg.c @@ -1690,7 +1690,7 @@ int regulatory_hint_user(const char *alpha2) request->wiphy_idx = WIPHY_IDX_STALE; request->alpha2[0] = alpha2[0]; request->alpha2[1] = alpha2[1]; - request->initiator = NL80211_REGDOM_SET_BY_USER, + request->initiator = NL80211_REGDOM_SET_BY_USER; queue_regulatory_request(request); -- cgit v1.2.2 From cf0277e714a0db302a8f80e1b85fd61c32cf00b3 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Tue, 5 Jan 2010 18:00:58 +0100 Subject: mac80211: fix skb buffering issue Since I removed the master netdev, we've been keeping internal queues only, and even before that we never told the networking stack above the virtual interfaces about congestion. This means that packets are queued in mac80211 and the upper layers never know, possibly leading to memory exhaustion and other problems. This patch makes all interfaces multiqueue and uses ndo_select_queue to put the packets into queues per AC. Additionally, when the driver stops a queue, we now stop all corresponding queues for the virtual interfaces as well. The injection case will use VO by default for non-data frames, and BE for data frames, but downgrade any data frames according to ACM. It needs to be fleshed out in the future to allow chosing the queue/AC in radiotap. Reported-by: Lennert Buytenhek Signed-off-by: Johannes Berg Cc: stable@kernel.org [2.6.32] Signed-off-by: John W. Linville --- net/mac80211/iface.c | 39 +++++++++++++++++++-- net/mac80211/rx.c | 4 ++- net/mac80211/tx.c | 5 ++- net/mac80211/util.c | 12 +++++++ net/mac80211/wme.c | 96 ++++++++++++++++++++++++++++++++++++++-------------- net/mac80211/wme.h | 8 +++-- 6 files changed, 132 insertions(+), 32 deletions(-) (limited to 'net') diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c index 80c16f6e2af6..c261cdb359eb 100644 --- a/net/mac80211/iface.c +++ b/net/mac80211/iface.c @@ -15,12 +15,14 @@ #include #include #include +#include #include "ieee80211_i.h" #include "sta_info.h" #include "debugfs_netdev.h" #include "mesh.h" #include "led.h" #include "driver-ops.h" +#include "wme.h" /** * DOC: Interface list locking @@ -644,6 +646,12 @@ static void ieee80211_teardown_sdata(struct net_device *dev) WARN_ON(flushed); } +static u16 ieee80211_netdev_select_queue(struct net_device *dev, + struct sk_buff *skb) +{ + return ieee80211_select_queue(IEEE80211_DEV_TO_SUB_IF(dev), skb); +} + static const struct net_device_ops ieee80211_dataif_ops = { .ndo_open = ieee80211_open, .ndo_stop = ieee80211_stop, @@ -652,8 +660,34 @@ static const struct net_device_ops ieee80211_dataif_ops = { .ndo_set_multicast_list = ieee80211_set_multicast_list, .ndo_change_mtu = ieee80211_change_mtu, .ndo_set_mac_address = eth_mac_addr, + .ndo_select_queue = ieee80211_netdev_select_queue, }; +static u16 ieee80211_monitor_select_queue(struct net_device *dev, + struct sk_buff *skb) +{ + struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); + struct ieee80211_local *local = sdata->local; + struct ieee80211_hdr *hdr; + struct ieee80211_radiotap_header *rtap = (void *)skb->data; + + if (local->hw.queues < 4) + return 0; + + if (skb->len < 4 || + skb->len < rtap->it_len + 2 /* frame control */) + return 0; /* doesn't matter, frame will be dropped */ + + hdr = (void *)((u8 *)skb->data + rtap->it_len); + + if (!ieee80211_is_data(hdr->frame_control)) { + skb->priority = 7; + return ieee802_1d_to_ac[skb->priority]; + } + + return ieee80211_downgrade_queue(local, skb); +} + static const struct net_device_ops ieee80211_monitorif_ops = { .ndo_open = ieee80211_open, .ndo_stop = ieee80211_stop, @@ -662,6 +696,7 @@ static const struct net_device_ops ieee80211_monitorif_ops = { .ndo_set_multicast_list = ieee80211_set_multicast_list, .ndo_change_mtu = ieee80211_change_mtu, .ndo_set_mac_address = eth_mac_addr, + .ndo_select_queue = ieee80211_monitor_select_queue, }; static void ieee80211_if_setup(struct net_device *dev) @@ -768,8 +803,8 @@ int ieee80211_if_add(struct ieee80211_local *local, const char *name, ASSERT_RTNL(); - ndev = alloc_netdev(sizeof(*sdata) + local->hw.vif_data_size, - name, ieee80211_if_setup); + ndev = alloc_netdev_mq(sizeof(*sdata) + local->hw.vif_data_size, + name, ieee80211_if_setup, local->hw.queues); if (!ndev) return -ENOMEM; dev_net_set(ndev, wiphy_net(local->hw.wiphy)); diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index 9f2807aeaf52..54296999834b 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -1746,7 +1746,9 @@ ieee80211_rx_h_mesh_fwding(struct ieee80211_rx_data *rx) memset(info, 0, sizeof(*info)); info->flags |= IEEE80211_TX_INTFL_NEED_TXPROCESSING; info->control.vif = &rx->sdata->vif; - ieee80211_select_queue(local, fwd_skb); + skb_set_queue_mapping(skb, + ieee80211_select_queue(rx->sdata, fwd_skb)); + ieee80211_set_qos_hdr(local, skb); if (is_multicast_ether_addr(fwd_hdr->addr1)) IEEE80211_IFSTA_MESH_CTR_INC(&sdata->u.mesh, fwded_mcast); diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index 27ceaefd7bc8..ac210b586702 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -1512,7 +1512,7 @@ static void ieee80211_xmit(struct ieee80211_sub_if_data *sdata, return; } - ieee80211_select_queue(local, skb); + ieee80211_set_qos_hdr(local, skb); ieee80211_tx(sdata, skb, false); rcu_read_unlock(); } @@ -2291,6 +2291,9 @@ void ieee80211_tx_skb(struct ieee80211_sub_if_data *sdata, struct sk_buff *skb) skb_set_network_header(skb, 0); skb_set_transport_header(skb, 0); + /* send all internal mgmt frames on VO */ + skb_set_queue_mapping(skb, 0); + /* * The other path calling ieee80211_xmit is from the tasklet, * and while we can handle concurrent transmissions locking diff --git a/net/mac80211/util.c b/net/mac80211/util.c index dc76267e436e..3848140313f5 100644 --- a/net/mac80211/util.c +++ b/net/mac80211/util.c @@ -269,6 +269,7 @@ static void __ieee80211_wake_queue(struct ieee80211_hw *hw, int queue, enum queue_stop_reason reason) { struct ieee80211_local *local = hw_to_local(hw); + struct ieee80211_sub_if_data *sdata; if (WARN_ON(queue >= hw->queues)) return; @@ -281,6 +282,11 @@ static void __ieee80211_wake_queue(struct ieee80211_hw *hw, int queue, if (!skb_queue_empty(&local->pending[queue])) tasklet_schedule(&local->tx_pending_tasklet); + + rcu_read_lock(); + list_for_each_entry_rcu(sdata, &local->interfaces, list) + netif_tx_wake_queue(netdev_get_tx_queue(sdata->dev, queue)); + rcu_read_unlock(); } void ieee80211_wake_queue_by_reason(struct ieee80211_hw *hw, int queue, @@ -305,11 +311,17 @@ static void __ieee80211_stop_queue(struct ieee80211_hw *hw, int queue, enum queue_stop_reason reason) { struct ieee80211_local *local = hw_to_local(hw); + struct ieee80211_sub_if_data *sdata; if (WARN_ON(queue >= hw->queues)) return; __set_bit(reason, &local->queue_stop_reasons[queue]); + + rcu_read_lock(); + list_for_each_entry_rcu(sdata, &local->interfaces, list) + netif_tx_stop_queue(netdev_get_tx_queue(sdata->dev, queue)); + rcu_read_unlock(); } void ieee80211_stop_queue_by_reason(struct ieee80211_hw *hw, int queue, diff --git a/net/mac80211/wme.c b/net/mac80211/wme.c index b19b7696f3a2..34e6d02da779 100644 --- a/net/mac80211/wme.c +++ b/net/mac80211/wme.c @@ -44,22 +44,69 @@ static int wme_downgrade_ac(struct sk_buff *skb) } -/* Indicate which queue to use. */ -static u16 classify80211(struct ieee80211_local *local, struct sk_buff *skb) +/* Indicate which queue to use. */ +u16 ieee80211_select_queue(struct ieee80211_sub_if_data *sdata, + struct sk_buff *skb) { - struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data; + struct ieee80211_local *local = sdata->local; + struct sta_info *sta = NULL; + u32 sta_flags = 0; + const u8 *ra = NULL; + bool qos = false; - if (!ieee80211_is_data(hdr->frame_control)) { - /* management frames go on AC_VO queue, but are sent - * without QoS control fields */ - return 0; + if (local->hw.queues < 4 || skb->len < 6) { + skb->priority = 0; /* required for correct WPA/11i MIC */ + return min_t(u16, local->hw.queues - 1, + ieee802_1d_to_ac[skb->priority]); + } + + rcu_read_lock(); + switch (sdata->vif.type) { + case NL80211_IFTYPE_AP_VLAN: + rcu_read_lock(); + sta = rcu_dereference(sdata->u.vlan.sta); + if (sta) + sta_flags = get_sta_flags(sta); + rcu_read_unlock(); + if (sta) + break; + case NL80211_IFTYPE_AP: + ra = skb->data; + break; + case NL80211_IFTYPE_WDS: + ra = sdata->u.wds.remote_addr; + break; +#ifdef CONFIG_MAC80211_MESH + case NL80211_IFTYPE_MESH_POINT: + /* + * XXX: This is clearly broken ... but already was before, + * because ieee80211_fill_mesh_addresses() would clear A1 + * except for multicast addresses. + */ + break; +#endif + case NL80211_IFTYPE_STATION: + ra = sdata->u.mgd.bssid; + break; + case NL80211_IFTYPE_ADHOC: + ra = skb->data; + break; + default: + break; } - if (0 /* injected */) { - /* use AC from radiotap */ + if (!sta && ra && !is_multicast_ether_addr(ra)) { + sta = sta_info_get(sdata, ra); + if (sta) + sta_flags = get_sta_flags(sta); } - if (!ieee80211_is_data_qos(hdr->frame_control)) { + if (sta_flags & WLAN_STA_WME) + qos = true; + + rcu_read_unlock(); + + if (!qos) { skb->priority = 0; /* required for correct WPA/11i MIC */ return ieee802_1d_to_ac[skb->priority]; } @@ -68,6 +115,12 @@ static u16 classify80211(struct ieee80211_local *local, struct sk_buff *skb) * data frame has */ skb->priority = cfg80211_classify8021d(skb); + return ieee80211_downgrade_queue(local, skb); +} + +u16 ieee80211_downgrade_queue(struct ieee80211_local *local, + struct sk_buff *skb) +{ /* in case we are a client verify acm is not set for this ac */ while (unlikely(local->wmm_acm & BIT(skb->priority))) { if (wme_downgrade_ac(skb)) { @@ -85,24 +138,17 @@ static u16 classify80211(struct ieee80211_local *local, struct sk_buff *skb) return ieee802_1d_to_ac[skb->priority]; } -void ieee80211_select_queue(struct ieee80211_local *local, struct sk_buff *skb) +void ieee80211_set_qos_hdr(struct ieee80211_local *local, struct sk_buff *skb) { - struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data; - u16 queue; - u8 tid; - - queue = classify80211(local, skb); - if (unlikely(queue >= local->hw.queues)) - queue = local->hw.queues - 1; - - /* - * Now we know the 1d priority, fill in the QoS header if - * there is one (and we haven't done this before). - */ + struct ieee80211_hdr *hdr = (void *)skb->data; + + /* Fill in the QoS header if there is one. */ if (ieee80211_is_data_qos(hdr->frame_control)) { u8 *p = ieee80211_get_qos_ctl(hdr); - u8 ack_policy = 0; + u8 ack_policy = 0, tid; + tid = skb->priority & IEEE80211_QOS_CTL_TAG1D_MASK; + if (unlikely(local->wifi_wme_noack_test)) ack_policy |= QOS_CONTROL_ACK_POLICY_NOACK << QOS_CONTROL_ACK_POLICY_SHIFT; @@ -110,6 +156,4 @@ void ieee80211_select_queue(struct ieee80211_local *local, struct sk_buff *skb) *p++ = ack_policy | tid; *p = 0; } - - skb_set_queue_mapping(skb, queue); } diff --git a/net/mac80211/wme.h b/net/mac80211/wme.h index d4fd87ca5118..6053b1c9feee 100644 --- a/net/mac80211/wme.h +++ b/net/mac80211/wme.h @@ -20,7 +20,11 @@ extern const int ieee802_1d_to_ac[8]; -void ieee80211_select_queue(struct ieee80211_local *local, - struct sk_buff *skb); +u16 ieee80211_select_queue(struct ieee80211_sub_if_data *sdata, + struct sk_buff *skb); +void ieee80211_set_qos_hdr(struct ieee80211_local *local, struct sk_buff *skb); +u16 ieee80211_downgrade_queue(struct ieee80211_local *local, + struct sk_buff *skb); + #endif /* _WME_H */ -- cgit v1.2.2 From debde9ea24d5512400456b1b64df361e422f078d Mon Sep 17 00:00:00 2001 From: "John W. Linville" Date: Wed, 6 Jan 2010 15:35:49 -0500 Subject: mac80211: fix-up build breakage in 2.6.33 "mac80211: fix skb buffering issue" is based on what will become 2.6.34, so it includes an incompatible usage of sta_info_get. This patch will need to be effectively reverted when merging for 2.6.34. Signed-off-by: John W. Linville --- net/mac80211/wme.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/mac80211/wme.c b/net/mac80211/wme.c index 34e6d02da779..79d887dae738 100644 --- a/net/mac80211/wme.c +++ b/net/mac80211/wme.c @@ -96,7 +96,7 @@ u16 ieee80211_select_queue(struct ieee80211_sub_if_data *sdata, } if (!sta && ra && !is_multicast_ether_addr(ra)) { - sta = sta_info_get(sdata, ra); + sta = sta_info_get(local, ra); if (sta) sta_flags = get_sta_flags(sta); } -- cgit v1.2.2 From 8a5b33f55452c226aa0e47d737e541985ff10e16 Mon Sep 17 00:00:00 2001 From: "John W. Linville" Date: Wed, 6 Jan 2010 15:39:39 -0500 Subject: Revert "mac80211: replace netif_tx_{start,stop,wake}_all_queues" This reverts commit 53623f1a09c7a7d23b74f0f7d93dba0ebde1006b. This was inadvertantly missed in "mac80211: fix skb buffering issue", and is required with that patch to restore proper queue operation. Signed-off-by: John W. Linville --- net/mac80211/iface.c | 4 ++-- net/mac80211/mlme.c | 4 ++-- net/mac80211/scan.c | 10 +++++----- 3 files changed, 9 insertions(+), 9 deletions(-) (limited to 'net') diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c index c261cdb359eb..ff762ed34f1e 100644 --- a/net/mac80211/iface.c +++ b/net/mac80211/iface.c @@ -316,7 +316,7 @@ static int ieee80211_open(struct net_device *dev) if (sdata->vif.type == NL80211_IFTYPE_STATION) ieee80211_queue_work(&local->hw, &sdata->u.mgd.work); - netif_start_queue(dev); + netif_tx_start_all_queues(dev); return 0; err_del_interface: @@ -345,7 +345,7 @@ static int ieee80211_stop(struct net_device *dev) /* * Stop TX on this interface first. */ - netif_stop_queue(dev); + netif_tx_stop_all_queues(dev); /* * Now delete all active aggregation sessions. diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index c79e59f82fd9..3e1eab963b8e 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -942,7 +942,7 @@ static void ieee80211_set_associated(struct ieee80211_sub_if_data *sdata, ieee80211_recalc_ps(local, -1); mutex_unlock(&local->iflist_mtx); - netif_start_queue(sdata->dev); + netif_tx_start_all_queues(sdata->dev); netif_carrier_on(sdata->dev); } @@ -1074,7 +1074,7 @@ static void ieee80211_set_disassoc(struct ieee80211_sub_if_data *sdata, * time -- we don't want the scan code to enable queues. */ - netif_stop_queue(sdata->dev); + netif_tx_stop_all_queues(sdata->dev); netif_carrier_off(sdata->dev); rcu_read_lock(); diff --git a/net/mac80211/scan.c b/net/mac80211/scan.c index f1a4c7160300..f934c9620b73 100644 --- a/net/mac80211/scan.c +++ b/net/mac80211/scan.c @@ -353,10 +353,10 @@ void ieee80211_scan_completed(struct ieee80211_hw *hw, bool aborted) if (sdata->vif.type == NL80211_IFTYPE_STATION) { if (sdata->u.mgd.associated) { ieee80211_scan_ps_disable(sdata); - netif_wake_queue(sdata->dev); + netif_tx_wake_all_queues(sdata->dev); } } else - netif_wake_queue(sdata->dev); + netif_tx_wake_all_queues(sdata->dev); /* re-enable beaconing */ if (sdata->vif.type == NL80211_IFTYPE_AP || @@ -411,7 +411,7 @@ static int ieee80211_start_sw_scan(struct ieee80211_local *local) * are handled in the scan state machine */ if (sdata->vif.type != NL80211_IFTYPE_STATION) - netif_stop_queue(sdata->dev); + netif_tx_stop_all_queues(sdata->dev); } mutex_unlock(&local->iflist_mtx); @@ -575,7 +575,7 @@ static void ieee80211_scan_state_leave_oper_channel(struct ieee80211_local *loca continue; if (sdata->vif.type == NL80211_IFTYPE_STATION) { - netif_stop_queue(sdata->dev); + netif_tx_stop_all_queues(sdata->dev); if (sdata->u.mgd.associated) ieee80211_scan_ps_enable(sdata); } @@ -610,7 +610,7 @@ static void ieee80211_scan_state_enter_oper_channel(struct ieee80211_local *loca if (sdata->vif.type == NL80211_IFTYPE_STATION) { if (sdata->u.mgd.associated) ieee80211_scan_ps_disable(sdata); - netif_wake_queue(sdata->dev); + netif_tx_wake_all_queues(sdata->dev); } } mutex_unlock(&local->iflist_mtx); -- cgit v1.2.2 From 045cfb71a3901005bf6dcedae98cecb3360a0bfc Mon Sep 17 00:00:00 2001 From: Lennert Buytenhek Date: Thu, 7 Jan 2010 15:01:42 +0100 Subject: mac80211: fix queue selection for packets injected via monitor interface Commit 'mac80211: fix skb buffering issue' added an ->ndo_select_queue() for monitor interfaces which can end up dereferencing ieee802_1d_to_ac[] beyond the end of the array for injected data packets (as skb->priority isn't guaranteed to be zero or within [0:7]), which then triggers the WARN_ON in net/core/dev.c:dev_cap_txqueue(). Fix this by always setting the priority to zero on injected data frames. Signed-off-by: Lennert Buytenhek Cc: stable@kernel.org Signed-off-by: John W. Linville --- net/mac80211/iface.c | 1 + 1 file changed, 1 insertion(+) (limited to 'net') diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c index ff762ed34f1e..44188ef80a63 100644 --- a/net/mac80211/iface.c +++ b/net/mac80211/iface.c @@ -685,6 +685,7 @@ static u16 ieee80211_monitor_select_queue(struct net_device *dev, return ieee802_1d_to_ac[skb->priority]; } + skb->priority = 0; return ieee80211_downgrade_queue(local, skb); } -- cgit v1.2.2 From d79074488083ec0d7ecd15352192dc1631f25643 Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Thu, 7 Jan 2010 20:23:53 +0100 Subject: mac80211: add missing sanity checks for action frames Various missing sanity checks caused rejected action frames to be interpreted as channel switch announcements, which can cause a client mode interface to switch away from its operating channel, thereby losing connectivity. This patch ensures that only spectrum management action frames are processed by the CSA handling function and prevents rejected action frames from getting processed by the MLME code. Signed-off-by: Felix Fietkau Cc: stable@kernel.org Signed-off-by: John W. Linville --- net/mac80211/mlme.c | 4 +++- net/mac80211/rx.c | 4 ++++ 2 files changed, 7 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index 3e1eab963b8e..05a18f43e1bf 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -1963,7 +1963,9 @@ static void ieee80211_sta_rx_queued_mgmt(struct ieee80211_sub_if_data *sdata, rma = ieee80211_rx_mgmt_disassoc(sdata, mgmt, skb->len); break; case IEEE80211_STYPE_ACTION: - /* XXX: differentiate, can only happen for CSA now! */ + if (mgmt->u.action.category != WLAN_CATEGORY_SPECTRUM_MGMT) + break; + ieee80211_sta_process_chanswitch(sdata, &mgmt->u.action.u.chan_switch.sw_elem, ifmgd->associated); diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index 54296999834b..82a30c1bf3ab 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -2015,6 +2015,10 @@ ieee80211_rx_h_action(struct ieee80211_rx_data *rx) } break; default: + /* do not process rejected action frames */ + if (mgmt->u.action.category & 0x80) + return RX_DROP_MONITOR; + return RX_CONTINUE; } -- cgit v1.2.2 From b49bb574e44226b332c28439999d196ddec2f643 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Fri, 8 Jan 2010 19:00:00 +0100 Subject: mac80211: fix endian error I forgot to convert the radiotap length to CPU endian, which sparse found thankfully. Signed-off-by: Johannes Berg Cc: stable@kernel.org Signed-off-by: John W. Linville --- net/mac80211/iface.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c index 44188ef80a63..d62ec0803bec 100644 --- a/net/mac80211/iface.c +++ b/net/mac80211/iface.c @@ -675,10 +675,10 @@ static u16 ieee80211_monitor_select_queue(struct net_device *dev, return 0; if (skb->len < 4 || - skb->len < rtap->it_len + 2 /* frame control */) + skb->len < le16_to_cpu(rtap->it_len) + 2 /* frame control */) return 0; /* doesn't matter, frame will be dropped */ - hdr = (void *)((u8 *)skb->data + rtap->it_len); + hdr = (void *)((u8 *)skb->data + le16_to_cpu(rtap->it_len)); if (!ieee80211_is_data(hdr->frame_control)) { skb->priority = 7; -- cgit v1.2.2 From 193e70ef65a6c33f2935ce1f4adeb08ecb9202cf Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Mon, 11 Jan 2010 06:47:00 +0100 Subject: mac80211: fix queue selection for data frames on monitor interfaces When ieee80211_monitor_select_queue encounters data frames, it selects the WMM AC based on skb->priority and assumes that skb->priority contains a valid 802.1d tag. However this assumption is incorrect, since ieee80211_select_queue has not been called at this point. If skb->priority > 7, an array overrun occurs, which could lead to invalid values, resulting in crashes in the tx path. Fix this by setting skb->priority based on the 802.11 header for QoS frames and using the default AC for all non-QoS frames. Signed-off-by: Felix Fietkau Signed-off-by: John W. Linville --- net/mac80211/iface.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c index d62ec0803bec..32abae3ce32a 100644 --- a/net/mac80211/iface.c +++ b/net/mac80211/iface.c @@ -670,6 +670,7 @@ static u16 ieee80211_monitor_select_queue(struct net_device *dev, struct ieee80211_local *local = sdata->local; struct ieee80211_hdr *hdr; struct ieee80211_radiotap_header *rtap = (void *)skb->data; + u8 *p; if (local->hw.queues < 4) return 0; @@ -680,12 +681,14 @@ static u16 ieee80211_monitor_select_queue(struct net_device *dev, hdr = (void *)((u8 *)skb->data + le16_to_cpu(rtap->it_len)); - if (!ieee80211_is_data(hdr->frame_control)) { + if (!ieee80211_is_data_qos(hdr->frame_control)) { skb->priority = 7; return ieee802_1d_to_ac[skb->priority]; } - skb->priority = 0; + p = ieee80211_get_qos_ctl(hdr); + skb->priority = *p & IEEE80211_QOS_CTL_TAG1D_MASK; + return ieee80211_downgrade_queue(local, skb); } -- cgit v1.2.2 From 8c5d9808e95739c9001b852464fd58fd0f583280 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Mon, 11 Jan 2010 16:14:57 +0100 Subject: cfg80211: fix refcount imbalance when wext is disabled When CONFIG_CFG80211_WEXT is not set, there is a refcount imbalance with rdev->opencount, fix that by moving it out of the ifdef. Reported-by: Alan Stern Signed-off-by: Johannes Berg Signed-off-by: John W. Linville --- net/wireless/core.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/wireless/core.c b/net/wireless/core.c index c2a2c563d21a..92b812442488 100644 --- a/net/wireless/core.c +++ b/net/wireless/core.c @@ -745,9 +745,9 @@ static int cfg80211_netdev_notifier_call(struct notifier_block * nb, mutex_unlock(&rdev->devlist_mtx); dev_put(dev); } -#ifdef CONFIG_CFG80211_WEXT cfg80211_lock_rdev(rdev); mutex_lock(&rdev->devlist_mtx); +#ifdef CONFIG_CFG80211_WEXT wdev_lock(wdev); switch (wdev->iftype) { case NL80211_IFTYPE_ADHOC: @@ -760,10 +760,10 @@ static int cfg80211_netdev_notifier_call(struct notifier_block * nb, break; } wdev_unlock(wdev); +#endif rdev->opencount++; mutex_unlock(&rdev->devlist_mtx); cfg80211_unlock_rdev(rdev); -#endif break; case NETDEV_UNREGISTER: /* -- cgit v1.2.2 From 2570a4f5428bcdb1077622342181755741e7fa60 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Wed, 13 Jan 2010 17:27:37 -0800 Subject: ipv6: skb_dst() can be NULL in ipv6_hop_jumbo(). This fixes CERT-FI FICORA #341748 Discovered by Olli Jarva and Tuomo Untinen from the CROSS project at Codenomicon Ltd. Just like in CVE-2007-4567, we can't rely upon skb_dst() being non-NULL at this point. We fixed that in commit e76b2b2567b83448c2ee85a896433b96150c92e6 ("[IPV6]: Do no rely on skb->dst before it is assigned.") However commit 483a47d2fe794328d29950fe00ce26dd405d9437 ("ipv6: added net argument to IP6_INC_STATS_BH") put a new version of the same bug into this function. Complicating analysis further, this bug can only trigger when network namespaces are enabled in the build. When namespaces are turned off, the dev_net() does not evaluate it's argument, so the dereference would not occur. So, for a long time, namespaces couldn't be turned on unless SYSFS was disabled. Therefore, this code has largely been disabled except by people turning it on explicitly for namespace development. With help from Eugene Teo Signed-off-by: David S. Miller --- net/ipv6/exthdrs.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/ipv6/exthdrs.c b/net/ipv6/exthdrs.c index df159fffe4bc..4bac362b1335 100644 --- a/net/ipv6/exthdrs.c +++ b/net/ipv6/exthdrs.c @@ -559,6 +559,11 @@ static inline struct inet6_dev *ipv6_skb_idev(struct sk_buff *skb) return skb_dst(skb) ? ip6_dst_idev(skb_dst(skb)) : __in6_dev_get(skb->dev); } +static inline struct net *ipv6_skb_net(struct sk_buff *skb) +{ + return skb_dst(skb) ? dev_net(skb_dst(skb)->dev) : dev_net(skb->dev); +} + /* Router Alert as of RFC 2711 */ static int ipv6_hop_ra(struct sk_buff *skb, int optoff) @@ -580,8 +585,8 @@ static int ipv6_hop_ra(struct sk_buff *skb, int optoff) static int ipv6_hop_jumbo(struct sk_buff *skb, int optoff) { const unsigned char *nh = skb_network_header(skb); + struct net *net = ipv6_skb_net(skb); u32 pkt_len; - struct net *net = dev_net(skb_dst(skb)->dev); if (nh[optoff + 1] != 4 || (optoff & 3) != 2) { LIMIT_NETDEBUG(KERN_DEBUG "ipv6_hop_jumbo: wrong jumbo opt length/alignment %d\n", -- cgit v1.2.2 From 5f6120335c701ba07d5151206071f4d6ccaa684f Mon Sep 17 00:00:00 2001 From: Abhijeet Kolekar Date: Wed, 13 Jan 2010 13:23:14 -0800 Subject: cfg80211: fix channel setting for wext Patch fixes the bug at http://bugzilla.intellinuxwireless.org/show_bug.cgi?id=2139 Currently we cannot set the channel using wext extension if we have already associated and disconnected. As cfg80211_mgd_wext_siwfreq will not switch the channel if ssid is set. This fixes it by clearing the ssid. Following is the sequence which it tries to fix. modprobe iwlagn iwconfig wlan0 essid "" ifconfig wlan0 down iwconfig wlan0 chan X wext is marked as deprecate.If we use nl80211 we can easily play with setting the channel. Signed-off-by: Abhijeet Kolekar Acked-by: Samuel Ortiz cc: stable@kernel.org Signed-off-by: John W. Linville --- net/wireless/sme.c | 1 + 1 file changed, 1 insertion(+) (limited to 'net') diff --git a/net/wireless/sme.c b/net/wireless/sme.c index 2333d78187e4..dc0fc4989d54 100644 --- a/net/wireless/sme.c +++ b/net/wireless/sme.c @@ -655,6 +655,7 @@ void __cfg80211_disconnected(struct net_device *dev, const u8 *ie, memset(&wrqu, 0, sizeof(wrqu)); wrqu.ap_addr.sa_family = ARPHRD_ETHER; wireless_send_event(dev, SIOCGIWAP, &wrqu, NULL); + wdev->wext.connect.ssid_len = 0; #endif } -- cgit v1.2.2 From 38ff3e6bb987ec583268da8eb22628293095d43b Mon Sep 17 00:00:00 2001 From: Neil Horman Date: Fri, 15 Jan 2010 01:40:55 -0800 Subject: dccp_probe: Fix module load dependencies between dccp and dccp_probe This was just recently reported to me. When built as modules, the dccp_probe module has a silent dependency on the dccp module. This stems from the fact that the module_init routine of dccp_probe registers a jprobe on the dccp_sendmsg symbol. Since the symbol is only referenced as a text string (the .symbol_name field in the jprobe struct) rather than the address of the symbol itself, depmod never picks this dependency up, and so if you load the dccp_probe module without the dccp module loaded, the register_jprobe call fails with an -EINVAL, and the whole module load fails. The fix is pretty easy, we can just wrap the register_jprobe call in a try_then_request_module call, which forces the dependency to get satisfied prior to the probe registration. Signed-off-by: Neil Horman Acked-by: Arnaldo Carvalho de Melo Signed-off-by: David S. Miller --- net/dccp/probe.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/dccp/probe.c b/net/dccp/probe.c index dc328425fa20..6fcfe8745043 100644 --- a/net/dccp/probe.c +++ b/net/dccp/probe.c @@ -163,7 +163,8 @@ static __init int dccpprobe_init(void) if (!proc_net_fops_create(&init_net, procname, S_IRUSR, &dccpprobe_fops)) goto err0; - ret = register_jprobe(&dccp_send_probe); + ret = try_then_request_module((register_jprobe(&dccp_send_probe) == 0), + "dccp"); if (ret) goto err1; -- cgit v1.2.2 From 2a04cd4c7d41c4549764734dcf5a883d304e3229 Mon Sep 17 00:00:00 2001 From: Roel Kluin Date: Fri, 15 Jan 2010 01:49:28 -0800 Subject: appletalk:: da.s_net not copied but assigned to itself in aarp_rcv() da.s_net was not copied but assigned to itself. Signed-off-by: Roel Kluin Acked-by: Arnaldo Carvalho de Melo Signed-off-by: David S. Miller --- net/appletalk/aarp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/appletalk/aarp.c b/net/appletalk/aarp.c index 9d4adfd22757..f2b3b56aa779 100644 --- a/net/appletalk/aarp.c +++ b/net/appletalk/aarp.c @@ -819,7 +819,7 @@ static int aarp_rcv(struct sk_buff *skb, struct net_device *dev, ma = &ifa->address; else { /* We need to make a copy of the entry. */ da.s_node = sa.s_node; - da.s_net = da.s_net; + da.s_net = sa.s_net; ma = &da; } -- cgit v1.2.2 From e5de30c9bf4a39db9f54c4a373470ce65881ade0 Mon Sep 17 00:00:00 2001 From: Benoit Papillault Date: Fri, 15 Jan 2010 12:21:37 +0100 Subject: mac80211: check that ieee80211_set_power_mgmt only handles STA interfaces. ieee80211_set_power_mgmt is meant for STA interfaces only. Moreover, since sdata->u.mgd.mtx is only initialized for STA interfaces, using this code for any other type of interface (like creating a monitor interface) will result in a oops. Cc: stable@kernel.org Signed-off-by: Benoit Papillault Signed-off-by: John W. Linville --- net/mac80211/cfg.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'net') diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index 6dc3579c0ac5..9ae1a4760b58 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -1331,6 +1331,9 @@ static int ieee80211_set_power_mgmt(struct wiphy *wiphy, struct net_device *dev, struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); struct ieee80211_conf *conf = &local->hw.conf; + if (sdata->vif.type != NL80211_IFTYPE_STATION) + return -EOPNOTSUPP; + if (!(local->hw.flags & IEEE80211_HW_SUPPORTS_PS)) return -EOPNOTSUPP; -- cgit v1.2.2 From d00c362f1b0ff54161e0a42b4554ac621a9ef92d Mon Sep 17 00:00:00 2001 From: Jarek Poplawski Date: Sat, 16 Jan 2010 01:04:04 -0800 Subject: ax25: netrom: rose: Fix timer oopses Wrong ax25_cb refcounting in ax25_send_frame() and by its callers can cause timer oopses (first reported with 2.6.29.6 kernel). Fixes: http://bugzilla.kernel.org/show_bug.cgi?id=14905 Reported-by: Bernard Pidoux Tested-by: Bernard Pidoux Signed-off-by: Jarek Poplawski Signed-off-by: David S. Miller --- net/ax25/ax25_out.c | 6 ++++++ net/netrom/nr_route.c | 11 ++++++----- net/rose/rose_link.c | 8 ++++++++ net/rose/rose_route.c | 5 +++++ 4 files changed, 25 insertions(+), 5 deletions(-) (limited to 'net') diff --git a/net/ax25/ax25_out.c b/net/ax25/ax25_out.c index bf706f83a5c9..14912600ec57 100644 --- a/net/ax25/ax25_out.c +++ b/net/ax25/ax25_out.c @@ -92,6 +92,12 @@ ax25_cb *ax25_send_frame(struct sk_buff *skb, int paclen, ax25_address *src, ax2 #endif } + /* + * There is one ref for the state machine; a caller needs + * one more to put it back, just like with the existing one. + */ + ax25_cb_hold(ax25); + ax25_cb_add(ax25); ax25->state = AX25_STATE_1; diff --git a/net/netrom/nr_route.c b/net/netrom/nr_route.c index aacba76070fc..e2e2d33cafdf 100644 --- a/net/netrom/nr_route.c +++ b/net/netrom/nr_route.c @@ -843,12 +843,13 @@ int nr_route_frame(struct sk_buff *skb, ax25_cb *ax25) dptr = skb_push(skb, 1); *dptr = AX25_P_NETROM; - ax25s = ax25_send_frame(skb, 256, (ax25_address *)dev->dev_addr, &nr_neigh->callsign, nr_neigh->digipeat, nr_neigh->dev); - if (nr_neigh->ax25 && ax25s) { - /* We were already holding this ax25_cb */ + ax25s = nr_neigh->ax25; + nr_neigh->ax25 = ax25_send_frame(skb, 256, + (ax25_address *)dev->dev_addr, + &nr_neigh->callsign, + nr_neigh->digipeat, nr_neigh->dev); + if (ax25s) ax25_cb_put(ax25s); - } - nr_neigh->ax25 = ax25s; dev_put(dev); ret = (nr_neigh->ax25 != NULL); diff --git a/net/rose/rose_link.c b/net/rose/rose_link.c index bd86a63960ce..5ef5f6988a2e 100644 --- a/net/rose/rose_link.c +++ b/net/rose/rose_link.c @@ -101,13 +101,17 @@ static void rose_t0timer_expiry(unsigned long param) static int rose_send_frame(struct sk_buff *skb, struct rose_neigh *neigh) { ax25_address *rose_call; + ax25_cb *ax25s; if (ax25cmp(&rose_callsign, &null_ax25_address) == 0) rose_call = (ax25_address *)neigh->dev->dev_addr; else rose_call = &rose_callsign; + ax25s = neigh->ax25; neigh->ax25 = ax25_send_frame(skb, 260, rose_call, &neigh->callsign, neigh->digipeat, neigh->dev); + if (ax25s) + ax25_cb_put(ax25s); return (neigh->ax25 != NULL); } @@ -120,13 +124,17 @@ static int rose_send_frame(struct sk_buff *skb, struct rose_neigh *neigh) static int rose_link_up(struct rose_neigh *neigh) { ax25_address *rose_call; + ax25_cb *ax25s; if (ax25cmp(&rose_callsign, &null_ax25_address) == 0) rose_call = (ax25_address *)neigh->dev->dev_addr; else rose_call = &rose_callsign; + ax25s = neigh->ax25; neigh->ax25 = ax25_find_cb(rose_call, &neigh->callsign, neigh->digipeat, neigh->dev); + if (ax25s) + ax25_cb_put(ax25s); return (neigh->ax25 != NULL); } diff --git a/net/rose/rose_route.c b/net/rose/rose_route.c index 795c4b025e31..70a0b3b4b4d2 100644 --- a/net/rose/rose_route.c +++ b/net/rose/rose_route.c @@ -235,6 +235,8 @@ static void rose_remove_neigh(struct rose_neigh *rose_neigh) if ((s = rose_neigh_list) == rose_neigh) { rose_neigh_list = rose_neigh->next; + if (rose_neigh->ax25) + ax25_cb_put(rose_neigh->ax25); kfree(rose_neigh->digipeat); kfree(rose_neigh); return; @@ -243,6 +245,8 @@ static void rose_remove_neigh(struct rose_neigh *rose_neigh) while (s != NULL && s->next != NULL) { if (s->next == rose_neigh) { s->next = rose_neigh->next; + if (rose_neigh->ax25) + ax25_cb_put(rose_neigh->ax25); kfree(rose_neigh->digipeat); kfree(rose_neigh); return; @@ -812,6 +816,7 @@ void rose_link_failed(ax25_cb *ax25, int reason) if (rose_neigh != NULL) { rose_neigh->ax25 = NULL; + ax25_cb_put(ax25); rose_del_route_by_neigh(rose_neigh); rose_kill_by_neigh(rose_neigh); -- cgit v1.2.2 From 0a931acfd19faf13129a22a46c06f330ecc2a4a3 Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Sun, 17 Jan 2010 03:32:50 +0000 Subject: ipv4: don't remove /proc/net/rt_acct /proc/net/rt_acct is not created if NET_CLS_ROUTE=n. Signed-off-by: Alexey Dobriyan Signed-off-by: David S. Miller --- net/ipv4/route.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'net') diff --git a/net/ipv4/route.c b/net/ipv4/route.c index e446496f564f..d62b05d33384 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -586,7 +586,9 @@ static void __net_exit ip_rt_do_proc_exit(struct net *net) { remove_proc_entry("rt_cache", net->proc_net_stat); remove_proc_entry("rt_cache", net->proc_net); +#ifdef CONFIG_NET_CLS_ROUTE remove_proc_entry("rt_acct", net->proc_net); +#endif } static struct pernet_operations ip_rt_proc_ops __net_initdata = { -- cgit v1.2.2 From e850f68b8f27a76c4377c34f2e216b8e7e10db3d Mon Sep 17 00:00:00 2001 From: Bob Copeland Date: Sat, 16 Jan 2010 14:36:52 -0500 Subject: mac80211: fix sign error in pid controller While testing the pid rate controller in mac80211_hwsim, I noticed that once the controller reached 54 Mbit rates, it would fail to lower the rate when necessary. The debug log shows: 1945 186786 pf_sample 50 3534 3577 50 My interpretation is that the fixed point scaling of the target error value (pf) is incorrect: the error value of 50 compared to a target of 14 case should result in a scaling value of (14-50) = -36 * 256 or -9216, but instead it is (14 * 256)-50, or 3534. Correct this by doing fixed point scaling after subtraction. Signed-off-by: Bob Copeland Acked-by: Stefano Brivio Acked-by: Mattias Nissler Signed-off-by: John W. Linville --- net/mac80211/rc80211_pid_algo.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/mac80211/rc80211_pid_algo.c b/net/mac80211/rc80211_pid_algo.c index 699d3ed869c4..29bc4c516238 100644 --- a/net/mac80211/rc80211_pid_algo.c +++ b/net/mac80211/rc80211_pid_algo.c @@ -190,7 +190,7 @@ static void rate_control_pid_sample(struct rc_pid_info *pinfo, rate_control_pid_normalize(pinfo, sband->n_bitrates); /* Compute the proportional, integral and derivative errors. */ - err_prop = (pinfo->target << RC_PID_ARITH_SHIFT) - pf; + err_prop = (pinfo->target - pf) << RC_PID_ARITH_SHIFT; err_avg = spinfo->err_avg_sc >> pinfo->smoothing_shift; spinfo->err_avg_sc = spinfo->err_avg_sc - err_avg + err_prop; -- cgit v1.2.2 From de4ef86cfce60d2250111f34f8a084e769f23b16 Mon Sep 17 00:00:00 2001 From: Neil Horman Date: Sun, 17 Jan 2010 17:16:12 +0000 Subject: dccp: fix dccp rmmod when kernel configured to use slub Hey all- I was tinkering with dccp recently and noticed that I BUG halted the kernel when I rmmod-ed the dccp module. The bug halt occured because the page that I passed to kfree failed the PageCompound and PageSlab test in the slub implementation of kfree. I tracked the problem down to the following set of events: 1) dccp, unlike all other uses of kmem_cache_create, allocates a string dynamically when registering a slab cache. This allocated string is freed when the cache is destroyed. 2) Normally, (1) is not an issue, but when Slub is in use, it is possible that caches are 'merged'. This process causes multiple caches of simmilar configuration to use the same cache data structure. When this happens, the new name of the cache is effectively dropped. 3) (2) results in kmem_cache_name returning an ambigous value (i.e. ccid_kmem_cache_destroy, which uses this fuction to retrieve the name pointer for freeing), is no longer guaranteed that the string it assigned is what is returned. 4) If such merge event occurs, ccid_kmem_cache_destroy frees the wrong pointer, which trips over the BUG in the slub implementation of kfree (since its likely not a slab allocation, but rather a pointer into the static string table section. So, what to do about this. At first blush this is pretty clearly a leak in the information that slub owns, and as such a slub bug. Unfortunately, theres no really good way to fix it, without exposing slub specific implementation details to the generic slab interface. Also, even if we could fix this in slub cleanly, I think the RCU free option would force us to do lots of string duplication, not only in slub, but in every slab allocator. As such, I'd like to propose this solution. Basically, I just move the storage for the kmem cache name to the ccid_operations structure. In so doing, we don't have to do the kstrdup or kfree when we allocate/free the various caches for dccp, and so we avoid the problem, by storing names with static memory, rather than heap, the way all other calls to kmem_cache_create do. I've tested this out myself here, and it solves the problem quite well. Signed-off-by: Neil Horman Acked-by: Arnaldo Carvalho de Melo Signed-off-by: David S. Miller --- net/dccp/ccid.c | 18 +++++------------- net/dccp/ccid.h | 2 ++ 2 files changed, 7 insertions(+), 13 deletions(-) (limited to 'net') diff --git a/net/dccp/ccid.c b/net/dccp/ccid.c index f3e9ba1cfd01..57dfb9c8c4f2 100644 --- a/net/dccp/ccid.c +++ b/net/dccp/ccid.c @@ -77,34 +77,24 @@ int ccid_getsockopt_builtin_ccids(struct sock *sk, int len, return err; } -static struct kmem_cache *ccid_kmem_cache_create(int obj_size, const char *fmt,...) +static struct kmem_cache *ccid_kmem_cache_create(int obj_size, char *slab_name_fmt, const char *fmt,...) { struct kmem_cache *slab; - char slab_name_fmt[32], *slab_name; va_list args; va_start(args, fmt); vsnprintf(slab_name_fmt, sizeof(slab_name_fmt), fmt, args); va_end(args); - slab_name = kstrdup(slab_name_fmt, GFP_KERNEL); - if (slab_name == NULL) - return NULL; - slab = kmem_cache_create(slab_name, sizeof(struct ccid) + obj_size, 0, + slab = kmem_cache_create(slab_name_fmt, sizeof(struct ccid) + obj_size, 0, SLAB_HWCACHE_ALIGN, NULL); - if (slab == NULL) - kfree(slab_name); return slab; } static void ccid_kmem_cache_destroy(struct kmem_cache *slab) { - if (slab != NULL) { - const char *name = kmem_cache_name(slab); - + if (slab != NULL) kmem_cache_destroy(slab); - kfree(name); - } } static int ccid_activate(struct ccid_operations *ccid_ops) @@ -113,6 +103,7 @@ static int ccid_activate(struct ccid_operations *ccid_ops) ccid_ops->ccid_hc_rx_slab = ccid_kmem_cache_create(ccid_ops->ccid_hc_rx_obj_size, + ccid_ops->ccid_hc_rx_slab_name, "ccid%u_hc_rx_sock", ccid_ops->ccid_id); if (ccid_ops->ccid_hc_rx_slab == NULL) @@ -120,6 +111,7 @@ static int ccid_activate(struct ccid_operations *ccid_ops) ccid_ops->ccid_hc_tx_slab = ccid_kmem_cache_create(ccid_ops->ccid_hc_tx_obj_size, + ccid_ops->ccid_hc_tx_slab_name, "ccid%u_hc_tx_sock", ccid_ops->ccid_id); if (ccid_ops->ccid_hc_tx_slab == NULL) diff --git a/net/dccp/ccid.h b/net/dccp/ccid.h index facedd20b531..269958bf7fe9 100644 --- a/net/dccp/ccid.h +++ b/net/dccp/ccid.h @@ -49,6 +49,8 @@ struct ccid_operations { const char *ccid_name; struct kmem_cache *ccid_hc_rx_slab, *ccid_hc_tx_slab; + char ccid_hc_rx_slab_name[32]; + char ccid_hc_tx_slab_name[32]; __u32 ccid_hc_rx_obj_size, ccid_hc_tx_obj_size; /* Interface Routines */ -- cgit v1.2.2 From 135d01899b1fba17045961febff7e5141db6048f Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Tue, 19 Jan 2010 19:06:59 +0100 Subject: netfilter: nf_conntrack_sip: fix off-by-one in compact header parsing In a string like "v:SIP/2.0..." it was checking for !isalpha('S') when it meant to be inspecting the ':'. Patch by Greg Alexander Signed-off-by: Patrick McHardy --- net/netfilter/nf_conntrack_sip.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/netfilter/nf_conntrack_sip.c b/net/netfilter/nf_conntrack_sip.c index 4b572163784b..023966b569bf 100644 --- a/net/netfilter/nf_conntrack_sip.c +++ b/net/netfilter/nf_conntrack_sip.c @@ -376,7 +376,7 @@ int ct_sip_get_header(const struct nf_conn *ct, const char *dptr, dptr += hdr->len; else if (hdr->cname && limit - dptr >= hdr->clen + 1 && strnicmp(dptr, hdr->cname, hdr->clen) == 0 && - !isalpha(*(dptr + hdr->clen + 1))) + !isalpha(*(dptr + hdr->clen))) dptr += hdr->clen; else continue; -- cgit v1.2.2 From b4ced2b768ab6c580148d1163c82a655fe147edc Mon Sep 17 00:00:00 2001 From: Roel Kluin Date: Tue, 19 Jan 2010 14:12:20 -0800 Subject: netlink: With opcode INET_DIAG_BC_S_LE dport was compared in inet_diag_bc_run() The s-port should be compared. Signed-off-by: Roel Kluin Signed-off-by: David S. Miller --- net/ipv4/inet_diag.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c index bdb78dd180ce..1aaa8110d84b 100644 --- a/net/ipv4/inet_diag.c +++ b/net/ipv4/inet_diag.c @@ -368,7 +368,7 @@ static int inet_diag_bc_run(const void *bc, int len, yes = entry->sport >= op[1].no; break; case INET_DIAG_BC_S_LE: - yes = entry->dport <= op[1].no; + yes = entry->sport <= op[1].no; break; case INET_DIAG_BC_D_GE: yes = entry->dport >= op[1].no; -- cgit v1.2.2 From e071041be037eca208b62b84469a06bdfc692bea Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Sat, 23 Jan 2010 13:37:10 +0000 Subject: netns xfrm: fix "ip xfrm state|policy count" misreport "ip xfrm state|policy count" report SA/SP count from init_net, not from netns of caller process. Signed-off-by: Alexey Dobriyan Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- net/xfrm/xfrm_policy.c | 16 ++++++++-------- net/xfrm/xfrm_state.c | 6 +++--- net/xfrm/xfrm_user.c | 14 ++++++++------ 3 files changed, 19 insertions(+), 17 deletions(-) (limited to 'net') diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index 4725a549ad4d..d2c8cb57ee4c 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -469,16 +469,16 @@ static inline int xfrm_byidx_should_resize(struct net *net, int total) return 0; } -void xfrm_spd_getinfo(struct xfrmk_spdinfo *si) +void xfrm_spd_getinfo(struct net *net, struct xfrmk_spdinfo *si) { read_lock_bh(&xfrm_policy_lock); - si->incnt = init_net.xfrm.policy_count[XFRM_POLICY_IN]; - si->outcnt = init_net.xfrm.policy_count[XFRM_POLICY_OUT]; - si->fwdcnt = init_net.xfrm.policy_count[XFRM_POLICY_FWD]; - si->inscnt = init_net.xfrm.policy_count[XFRM_POLICY_IN+XFRM_POLICY_MAX]; - si->outscnt = init_net.xfrm.policy_count[XFRM_POLICY_OUT+XFRM_POLICY_MAX]; - si->fwdscnt = init_net.xfrm.policy_count[XFRM_POLICY_FWD+XFRM_POLICY_MAX]; - si->spdhcnt = init_net.xfrm.policy_idx_hmask; + si->incnt = net->xfrm.policy_count[XFRM_POLICY_IN]; + si->outcnt = net->xfrm.policy_count[XFRM_POLICY_OUT]; + si->fwdcnt = net->xfrm.policy_count[XFRM_POLICY_FWD]; + si->inscnt = net->xfrm.policy_count[XFRM_POLICY_IN+XFRM_POLICY_MAX]; + si->outscnt = net->xfrm.policy_count[XFRM_POLICY_OUT+XFRM_POLICY_MAX]; + si->fwdscnt = net->xfrm.policy_count[XFRM_POLICY_FWD+XFRM_POLICY_MAX]; + si->spdhcnt = net->xfrm.policy_idx_hmask; si->spdhmcnt = xfrm_policy_hashmax; read_unlock_bh(&xfrm_policy_lock); } diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index d847f1a52b44..b36cc344474b 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c @@ -641,11 +641,11 @@ out: } EXPORT_SYMBOL(xfrm_state_flush); -void xfrm_sad_getinfo(struct xfrmk_sadinfo *si) +void xfrm_sad_getinfo(struct net *net, struct xfrmk_sadinfo *si) { spin_lock_bh(&xfrm_state_lock); - si->sadcnt = init_net.xfrm.state_num; - si->sadhcnt = init_net.xfrm.state_hmask; + si->sadcnt = net->xfrm.state_num; + si->sadhcnt = net->xfrm.state_hmask; si->sadhmcnt = xfrm_state_hashmax; spin_unlock_bh(&xfrm_state_lock); } diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index 1ada6186933c..d5a712976004 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -781,7 +781,8 @@ static inline size_t xfrm_spdinfo_msgsize(void) + nla_total_size(sizeof(struct xfrmu_spdhinfo)); } -static int build_spdinfo(struct sk_buff *skb, u32 pid, u32 seq, u32 flags) +static int build_spdinfo(struct sk_buff *skb, struct net *net, + u32 pid, u32 seq, u32 flags) { struct xfrmk_spdinfo si; struct xfrmu_spdinfo spc; @@ -795,7 +796,7 @@ static int build_spdinfo(struct sk_buff *skb, u32 pid, u32 seq, u32 flags) f = nlmsg_data(nlh); *f = flags; - xfrm_spd_getinfo(&si); + xfrm_spd_getinfo(net, &si); spc.incnt = si.incnt; spc.outcnt = si.outcnt; spc.fwdcnt = si.fwdcnt; @@ -828,7 +829,7 @@ static int xfrm_get_spdinfo(struct sk_buff *skb, struct nlmsghdr *nlh, if (r_skb == NULL) return -ENOMEM; - if (build_spdinfo(r_skb, spid, seq, *flags) < 0) + if (build_spdinfo(r_skb, net, spid, seq, *flags) < 0) BUG(); return nlmsg_unicast(net->xfrm.nlsk, r_skb, spid); @@ -841,7 +842,8 @@ static inline size_t xfrm_sadinfo_msgsize(void) + nla_total_size(4); /* XFRMA_SAD_CNT */ } -static int build_sadinfo(struct sk_buff *skb, u32 pid, u32 seq, u32 flags) +static int build_sadinfo(struct sk_buff *skb, struct net *net, + u32 pid, u32 seq, u32 flags) { struct xfrmk_sadinfo si; struct xfrmu_sadhinfo sh; @@ -854,7 +856,7 @@ static int build_sadinfo(struct sk_buff *skb, u32 pid, u32 seq, u32 flags) f = nlmsg_data(nlh); *f = flags; - xfrm_sad_getinfo(&si); + xfrm_sad_getinfo(net, &si); sh.sadhmcnt = si.sadhmcnt; sh.sadhcnt = si.sadhcnt; @@ -882,7 +884,7 @@ static int xfrm_get_sadinfo(struct sk_buff *skb, struct nlmsghdr *nlh, if (r_skb == NULL) return -ENOMEM; - if (build_sadinfo(r_skb, spid, seq, *flags) < 0) + if (build_sadinfo(r_skb, net, spid, seq, *flags) < 0) BUG(); return nlmsg_unicast(net->xfrm.nlsk, r_skb, spid); -- cgit v1.2.2 From 2dc85e91d05be91343033df78e4e6d99dc7ea50e Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sun, 24 Jan 2010 19:52:24 -0800 Subject: vlan: fix vlan_skb_recv() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Bruno Prémont found commit 9793241fe92f7d930 (vlan: Precise RX stats accounting) added a regression for non hw accelerated vlans. [ 26.390576] BUG: unable to handle kernel NULL pointer dereference at (null) [ 26.396369] IP: [] vlan_skb_recv+0x89/0x280 [8021q] vlan_dev_info() was used with original device, instead of skb->dev. Also spotted by Américo Wang. Reported-By: Bruno Prémont Tested-By: Bruno Prémont Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/8021q/vlan_dev.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c index b7889782047e..c1b92cab46c7 100644 --- a/net/8021q/vlan_dev.c +++ b/net/8021q/vlan_dev.c @@ -163,7 +163,7 @@ int vlan_skb_recv(struct sk_buff *skb, struct net_device *dev, goto err_unlock; } - rx_stats = per_cpu_ptr(vlan_dev_info(dev)->vlan_rx_stats, + rx_stats = per_cpu_ptr(vlan_dev_info(skb->dev)->vlan_rx_stats, smp_processor_id()); rx_stats->rx_packets++; rx_stats->rx_bytes += skb->len; -- cgit v1.2.2 From d7c7544c3d5f59033d1bf3236bc7b289f5f26b75 Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Sun, 24 Jan 2010 22:47:53 -0800 Subject: netns xfrm: deal with dst entries in netns GC is non-existent in netns, so after you hit GC threshold, no new dst entries will be created until someone triggers cleanup in init_net. Make xfrm4_dst_ops and xfrm6_dst_ops per-netns. This is not done in a generic way, because it woule waste (AF_MAX - 2) * sizeof(struct dst_ops) bytes per-netns. Reorder GC threshold initialization so it'd be done before registering XFRM policies. Signed-off-by: Alexey Dobriyan Signed-off-by: David S. Miller --- net/ipv4/xfrm4_policy.c | 14 +++++++----- net/ipv6/xfrm6_policy.c | 25 ++++++++++++--------- net/xfrm/xfrm_policy.c | 59 ++++++++++++++++++++++++++++++++++++++++++++++--- 3 files changed, 78 insertions(+), 20 deletions(-) (limited to 'net') diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c index 8c08a28d8f83..67107d63c1cd 100644 --- a/net/ipv4/xfrm4_policy.c +++ b/net/ipv4/xfrm4_policy.c @@ -15,7 +15,6 @@ #include #include -static struct dst_ops xfrm4_dst_ops; static struct xfrm_policy_afinfo xfrm4_policy_afinfo; static struct dst_entry *xfrm4_dst_lookup(struct net *net, int tos, @@ -190,8 +189,10 @@ _decode_session4(struct sk_buff *skb, struct flowi *fl, int reverse) static inline int xfrm4_garbage_collect(struct dst_ops *ops) { - xfrm4_policy_afinfo.garbage_collect(&init_net); - return (atomic_read(&xfrm4_dst_ops.entries) > xfrm4_dst_ops.gc_thresh*2); + struct net *net = container_of(ops, struct net, xfrm.xfrm4_dst_ops); + + xfrm4_policy_afinfo.garbage_collect(net); + return (atomic_read(&ops->entries) > ops->gc_thresh * 2); } static void xfrm4_update_pmtu(struct dst_entry *dst, u32 mtu) @@ -268,7 +269,7 @@ static struct xfrm_policy_afinfo xfrm4_policy_afinfo = { static struct ctl_table xfrm4_policy_table[] = { { .procname = "xfrm4_gc_thresh", - .data = &xfrm4_dst_ops.gc_thresh, + .data = &init_net.xfrm.xfrm4_dst_ops.gc_thresh, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec, @@ -295,8 +296,6 @@ static void __exit xfrm4_policy_fini(void) void __init xfrm4_init(int rt_max_size) { - xfrm4_state_init(); - xfrm4_policy_init(); /* * Select a default value for the gc_thresh based on the main route * table hash size. It seems to me the worst case scenario is when @@ -308,6 +307,9 @@ void __init xfrm4_init(int rt_max_size) * and start cleaning when were 1/2 full */ xfrm4_dst_ops.gc_thresh = rt_max_size/2; + + xfrm4_state_init(); + xfrm4_policy_init(); #ifdef CONFIG_SYSCTL sysctl_hdr = register_net_sysctl_table(&init_net, net_ipv4_ctl_path, xfrm4_policy_table); diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c index 7254e3f899a7..dbdc696f5fc5 100644 --- a/net/ipv6/xfrm6_policy.c +++ b/net/ipv6/xfrm6_policy.c @@ -24,7 +24,6 @@ #include #endif -static struct dst_ops xfrm6_dst_ops; static struct xfrm_policy_afinfo xfrm6_policy_afinfo; static struct dst_entry *xfrm6_dst_lookup(struct net *net, int tos, @@ -224,8 +223,10 @@ _decode_session6(struct sk_buff *skb, struct flowi *fl, int reverse) static inline int xfrm6_garbage_collect(struct dst_ops *ops) { - xfrm6_policy_afinfo.garbage_collect(&init_net); - return (atomic_read(&xfrm6_dst_ops.entries) > xfrm6_dst_ops.gc_thresh*2); + struct net *net = container_of(ops, struct net, xfrm.xfrm6_dst_ops); + + xfrm6_policy_afinfo.garbage_collect(net); + return (atomic_read(&ops->entries) > ops->gc_thresh * 2); } static void xfrm6_update_pmtu(struct dst_entry *dst, u32 mtu) @@ -310,7 +311,7 @@ static void xfrm6_policy_fini(void) static struct ctl_table xfrm6_policy_table[] = { { .procname = "xfrm6_gc_thresh", - .data = &xfrm6_dst_ops.gc_thresh, + .data = &init_net.xfrm.xfrm6_dst_ops.gc_thresh, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec, @@ -326,13 +327,6 @@ int __init xfrm6_init(void) int ret; unsigned int gc_thresh; - ret = xfrm6_policy_init(); - if (ret) - goto out; - - ret = xfrm6_state_init(); - if (ret) - goto out_policy; /* * We need a good default value for the xfrm6 gc threshold. * In ipv4 we set it to the route hash table size * 8, which @@ -346,6 +340,15 @@ int __init xfrm6_init(void) */ gc_thresh = FIB6_TABLE_HASHSZ * 8; xfrm6_dst_ops.gc_thresh = (gc_thresh < 1024) ? 1024 : gc_thresh; + + ret = xfrm6_policy_init(); + if (ret) + goto out; + + ret = xfrm6_state_init(); + if (ret) + goto out_policy; + #ifdef CONFIG_SYSCTL sysctl_hdr = register_net_sysctl_table(&init_net, net_ipv6_ctl_path, xfrm6_policy_table); diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index d2c8cb57ee4c..0ecb16a9a883 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -1309,15 +1309,28 @@ static inline int xfrm_get_tos(struct flowi *fl, int family) return tos; } -static inline struct xfrm_dst *xfrm_alloc_dst(int family) +static inline struct xfrm_dst *xfrm_alloc_dst(struct net *net, int family) { struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family); + struct dst_ops *dst_ops; struct xfrm_dst *xdst; if (!afinfo) return ERR_PTR(-EINVAL); - xdst = dst_alloc(afinfo->dst_ops) ?: ERR_PTR(-ENOBUFS); + switch (family) { + case AF_INET: + dst_ops = &net->xfrm.xfrm4_dst_ops; + break; +#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) + case AF_INET6: + dst_ops = &net->xfrm.xfrm6_dst_ops; + break; +#endif + default: + BUG(); + } + xdst = dst_alloc(dst_ops) ?: ERR_PTR(-ENOBUFS); xfrm_policy_put_afinfo(afinfo); @@ -1366,6 +1379,7 @@ static struct dst_entry *xfrm_bundle_create(struct xfrm_policy *policy, struct flowi *fl, struct dst_entry *dst) { + struct net *net = xp_net(policy); unsigned long now = jiffies; struct net_device *dev; struct dst_entry *dst_prev = NULL; @@ -1389,7 +1403,7 @@ static struct dst_entry *xfrm_bundle_create(struct xfrm_policy *policy, dst_hold(dst); for (; i < nx; i++) { - struct xfrm_dst *xdst = xfrm_alloc_dst(family); + struct xfrm_dst *xdst = xfrm_alloc_dst(net, family); struct dst_entry *dst1 = &xdst->u.dst; err = PTR_ERR(xdst); @@ -2279,6 +2293,7 @@ EXPORT_SYMBOL(xfrm_bundle_ok); int xfrm_policy_register_afinfo(struct xfrm_policy_afinfo *afinfo) { + struct net *net; int err = 0; if (unlikely(afinfo == NULL)) return -EINVAL; @@ -2302,6 +2317,27 @@ int xfrm_policy_register_afinfo(struct xfrm_policy_afinfo *afinfo) xfrm_policy_afinfo[afinfo->family] = afinfo; } write_unlock_bh(&xfrm_policy_afinfo_lock); + + rtnl_lock(); + for_each_net(net) { + struct dst_ops *xfrm_dst_ops; + + switch (afinfo->family) { + case AF_INET: + xfrm_dst_ops = &net->xfrm.xfrm4_dst_ops; + break; +#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) + case AF_INET6: + xfrm_dst_ops = &net->xfrm.xfrm6_dst_ops; + break; +#endif + default: + BUG(); + } + *xfrm_dst_ops = *afinfo->dst_ops; + } + rtnl_unlock(); + return err; } EXPORT_SYMBOL(xfrm_policy_register_afinfo); @@ -2332,6 +2368,22 @@ int xfrm_policy_unregister_afinfo(struct xfrm_policy_afinfo *afinfo) } EXPORT_SYMBOL(xfrm_policy_unregister_afinfo); +static void __net_init xfrm_dst_ops_init(struct net *net) +{ + struct xfrm_policy_afinfo *afinfo; + + read_lock_bh(&xfrm_policy_afinfo_lock); + afinfo = xfrm_policy_afinfo[AF_INET]; + if (afinfo) + net->xfrm.xfrm4_dst_ops = *afinfo->dst_ops; +#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) + afinfo = xfrm_policy_afinfo[AF_INET6]; + if (afinfo) + net->xfrm.xfrm6_dst_ops = *afinfo->dst_ops; +#endif + read_unlock_bh(&xfrm_policy_afinfo_lock); +} + static struct xfrm_policy_afinfo *xfrm_policy_get_afinfo(unsigned short family) { struct xfrm_policy_afinfo *afinfo; @@ -2494,6 +2546,7 @@ static int __net_init xfrm_net_init(struct net *net) rv = xfrm_policy_init(net); if (rv < 0) goto out_policy; + xfrm_dst_ops_init(net); rv = xfrm_sysctl_init(net); if (rv < 0) goto out_sysctl; -- cgit v1.2.2 From f81074f86176605bfbfafb9944e27628a4e26ce6 Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Mon, 25 Jan 2010 15:47:50 -0800 Subject: tcp_probe: avoid modulus operation and wrap fix By rounding up the buffer size to power of 2, several expensive modulus operations can be avoided. This patch also solves a bug where the gap need when ring gets full was not being accounted for. Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- net/ipv4/tcp_probe.c | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) (limited to 'net') diff --git a/net/ipv4/tcp_probe.c b/net/ipv4/tcp_probe.c index bb110c5ce1d2..9bc805df95d2 100644 --- a/net/ipv4/tcp_probe.c +++ b/net/ipv4/tcp_probe.c @@ -39,9 +39,9 @@ static int port __read_mostly = 0; MODULE_PARM_DESC(port, "Port to match (0=all)"); module_param(port, int, 0); -static int bufsize __read_mostly = 4096; +static unsigned int bufsize __read_mostly = 4096; MODULE_PARM_DESC(bufsize, "Log buffer size in packets (4096)"); -module_param(bufsize, int, 0); +module_param(bufsize, uint, 0); static int full __read_mostly; MODULE_PARM_DESC(full, "Full log (1=every ack packet received, 0=only cwnd changes)"); @@ -75,12 +75,12 @@ static struct { static inline int tcp_probe_used(void) { - return (tcp_probe.head - tcp_probe.tail) % bufsize; + return (tcp_probe.head - tcp_probe.tail) & (bufsize - 1); } static inline int tcp_probe_avail(void) { - return bufsize - tcp_probe_used(); + return bufsize - tcp_probe_used() - 1; } /* @@ -116,7 +116,7 @@ static int jtcp_rcv_established(struct sock *sk, struct sk_buff *skb, p->ssthresh = tcp_current_ssthresh(sk); p->srtt = tp->srtt >> 3; - tcp_probe.head = (tcp_probe.head + 1) % bufsize; + tcp_probe.head = (tcp_probe.head + 1) & (bufsize - 1); } tcp_probe.lastcwnd = tp->snd_cwnd; spin_unlock(&tcp_probe.lock); @@ -149,7 +149,7 @@ static int tcpprobe_open(struct inode * inode, struct file * file) static int tcpprobe_sprint(char *tbuf, int n) { const struct tcp_log *p - = tcp_probe.log + tcp_probe.tail % bufsize; + = tcp_probe.log + tcp_probe.tail; struct timespec tv = ktime_to_timespec(ktime_sub(p->tstamp, tcp_probe.start)); @@ -192,7 +192,7 @@ static ssize_t tcpprobe_read(struct file *file, char __user *buf, width = tcpprobe_sprint(tbuf, sizeof(tbuf)); if (cnt + width < len) - tcp_probe.tail = (tcp_probe.tail + 1) % bufsize; + tcp_probe.tail = (tcp_probe.tail + 1) & (bufsize - 1); spin_unlock_bh(&tcp_probe.lock); @@ -222,9 +222,10 @@ static __init int tcpprobe_init(void) init_waitqueue_head(&tcp_probe.wait); spin_lock_init(&tcp_probe.lock); - if (bufsize < 0) + if (bufsize == 0) return -EINVAL; + bufsize = roundup_pow_of_two(bufsize); tcp_probe.log = kcalloc(bufsize, sizeof(struct tcp_log), GFP_KERNEL); if (!tcp_probe.log) goto err0; @@ -236,7 +237,7 @@ static __init int tcpprobe_init(void) if (ret) goto err1; - pr_info("TCP probe registered (port=%d)\n", port); + pr_info("TCP probe registered (port=%d) bufsize=%u\n", port, bufsize); return 0; err1: proc_net_remove(&init_net, procname); -- cgit v1.2.2 From c92b544bd5d8e7ed7d81c77bbecab6df2a95aa53 Mon Sep 17 00:00:00 2001 From: Shan Wei Date: Tue, 26 Jan 2010 02:40:38 +0000 Subject: ipv6: conntrack: Add member of user to nf_ct_frag6_queue structure The commit 0b5ccb2(title:ipv6: reassembly: use seperate reassembly queues for conntrack and local delivery) has broken the saddr&&daddr member of nf_ct_frag6_queue when creating new queue. And then hash value generated by nf_hashfn() was not equal with that generated by fq_find(). So, a new received fragment can't be inserted to right queue. The patch fixes the bug with adding member of user to nf_ct_frag6_queue structure. Signed-off-by: Shan Wei Acked-by: Patrick McHardy Signed-off-by: David S. Miller --- net/ipv6/netfilter/nf_conntrack_reasm.c | 1 + 1 file changed, 1 insertion(+) (limited to 'net') diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c index 312c20adc83f..624a54832a7c 100644 --- a/net/ipv6/netfilter/nf_conntrack_reasm.c +++ b/net/ipv6/netfilter/nf_conntrack_reasm.c @@ -63,6 +63,7 @@ struct nf_ct_frag6_queue struct inet_frag_queue q; __be32 id; /* fragment id */ + u32 user; struct in6_addr saddr; struct in6_addr daddr; -- cgit v1.2.2 From e578756c35859a459d78d8416195bc5f5ff897d0 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Tue, 26 Jan 2010 17:04:02 +0100 Subject: netfilter: ctnetlink: fix expectation mask dump The protocol number is not initialized, so userspace can't interpret the layer 4 data properly. Signed-off-by: Patrick McHardy --- net/netfilter/nf_conntrack_netlink.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index 59d8064eb522..42f21c01a93e 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c @@ -1437,8 +1437,9 @@ ctnetlink_exp_dump_mask(struct sk_buff *skb, struct nlattr *nest_parms; memset(&m, 0xFF, sizeof(m)); - m.src.u.all = mask->src.u.all; memcpy(&m.src.u3, &mask->src.u3, sizeof(m.src.u3)); + m.src.u.all = mask->src.u.all; + m.dst.protonum = tuple->dst.protonum; nest_parms = nla_nest_start(skb, CTA_EXPECT_MASK | NLA_F_NESTED); if (!nest_parms) -- cgit v1.2.2 From 3092ad054406f069991ca561adc74f2d9fbb6867 Mon Sep 17 00:00:00 2001 From: Zhu Yi Date: Tue, 26 Jan 2010 15:58:57 +0800 Subject: mac80211: fix NULL pointer dereference when ftrace is enabled I got below kernel oops when I try to bring down the network interface if ftrace is enabled. The root cause is drv_ampdu_action() is passed with a NULL ssn pointer in the BA session tear down case. We need to check and avoid dereferencing it in trace entry assignment. BUG: unable to handle kernel NULL pointer dereference Modules linked in: at (null) IP: [] ftrace_raw_event_drv_ampdu_action+0x10a/0x160 [mac80211] *pde = 00000000 Oops: 0000 [#1] SMP DEBUG_PAGEALLOC [...] Call Trace: [] ? ftrace_raw_event_drv_ampdu_action+0x0/0x160 [mac80211] [] ? __ieee80211_stop_rx_ba_session+0xfc/0x220 [mac80211] [] ? ieee80211_sta_tear_down_BA_sessions+0x3b/0x50 [mac80211] [] ? ieee80211_set_disassoc+0xe6/0x230 [mac80211] [] ? ieee80211_set_disassoc+0x9c/0x230 [mac80211] [] ? ieee80211_mgd_deauth+0x158/0x170 [mac80211] [] ? ieee80211_deauth+0x1b/0x20 [mac80211] [] ? __cfg80211_mlme_deauth+0xe9/0x120 [cfg80211] [] ? __cfg80211_disconnect+0x170/0x1d0 [cfg80211] Cc: Johannes Berg Cc: stable@kernel.org Signed-off-by: Zhu Yi Signed-off-by: John W. Linville --- net/mac80211/driver-trace.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/mac80211/driver-trace.h b/net/mac80211/driver-trace.h index ee94ea0c67e9..da8497ef7063 100644 --- a/net/mac80211/driver-trace.h +++ b/net/mac80211/driver-trace.h @@ -680,7 +680,7 @@ TRACE_EVENT(drv_ampdu_action, __entry->ret = ret; __entry->action = action; __entry->tid = tid; - __entry->ssn = *ssn; + __entry->ssn = ssn ? *ssn : 0; ), TP_printk( -- cgit v1.2.2 From fcafde2e6d81aa7901f9b10e6a097592f0637b79 Mon Sep 17 00:00:00 2001 From: Andrei Emeltchenko Date: Tue, 22 Dec 2009 15:58:08 +0200 Subject: Bluetooth: Remove double free of SKB pointer in L2CAP Trivial fix for double free of SKB pointer with kfree_skb to make code simplier and cleaner. Remove unused variable err. Signed-off-by: Andrei Emeltchenko Acked-by: Gustavo F. Padovan Signed-off-by: Marcel Holtmann --- net/bluetooth/l2cap.c | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) (limited to 'net') diff --git a/net/bluetooth/l2cap.c b/net/bluetooth/l2cap.c index 1120cf14a548..8acc19ed9c4d 100644 --- a/net/bluetooth/l2cap.c +++ b/net/bluetooth/l2cap.c @@ -3518,7 +3518,6 @@ static inline int l2cap_data_channel(struct l2cap_conn *conn, u16 cid, struct sk struct l2cap_pinfo *pi; u16 control, len; u8 tx_seq; - int err; sk = l2cap_get_chan_by_scid(&conn->chan_list, cid); if (!sk) { @@ -3570,13 +3569,11 @@ static inline int l2cap_data_channel(struct l2cap_conn *conn, u16 cid, struct sk goto drop; if (__is_iframe(control)) - err = l2cap_data_channel_iframe(sk, control, skb); + l2cap_data_channel_iframe(sk, control, skb); else - err = l2cap_data_channel_sframe(sk, control, skb); + l2cap_data_channel_sframe(sk, control, skb); - if (!err) - goto done; - break; + goto done; case L2CAP_MODE_STREAMING: control = get_unaligned_le16(skb->data); @@ -3602,7 +3599,7 @@ static inline int l2cap_data_channel(struct l2cap_conn *conn, u16 cid, struct sk else pi->expected_tx_seq = tx_seq + 1; - err = l2cap_sar_reassembly_sdu(sk, skb, control); + l2cap_sar_reassembly_sdu(sk, skb, control); goto done; -- cgit v1.2.2 From e420aba331f44de0eed6871441293a6124d566d1 Mon Sep 17 00:00:00 2001 From: Andrei Emeltchenko Date: Wed, 23 Dec 2009 13:07:14 +0200 Subject: Bluetooth: Fix memory leak in L2CAP Move skb_clone after error confition check so it is not going potentially out of the scope. Signed-off-by: Andrei Emeltchenko Acked-by: Gustavo F. Padovan Signed-off-by: Marcel Holtmann --- net/bluetooth/l2cap.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/bluetooth/l2cap.c b/net/bluetooth/l2cap.c index 8acc19ed9c4d..400efa26ddba 100644 --- a/net/bluetooth/l2cap.c +++ b/net/bluetooth/l2cap.c @@ -1368,7 +1368,6 @@ static int l2cap_ertm_send(struct sock *sk) while ((skb = sk->sk_send_head) && (!l2cap_tx_window_full(sk)) && !(pi->conn_state & L2CAP_CONN_REMOTE_BUSY)) { - tx_skb = skb_clone(skb, GFP_ATOMIC); if (pi->remote_max_tx && bt_cb(skb)->retries == pi->remote_max_tx) { @@ -1376,6 +1375,8 @@ static int l2cap_ertm_send(struct sock *sk) break; } + tx_skb = skb_clone(skb, GFP_ATOMIC); + bt_cb(skb)->retries++; control = get_unaligned_le16(tx_skb->data + L2CAP_HDR_SIZE); -- cgit v1.2.2 From 6bf8268f9a91f1065c99503161ebd061492bebe3 Mon Sep 17 00:00:00 2001 From: Bastien Nocera Date: Wed, 20 Jan 2010 12:00:42 +0000 Subject: Bluetooth: Use the control channel for raw HID reports In commit 2da31939a42f7a676a0bc5155d6a0a39ed8451f2, support for Bluetooth hid_output_raw_report was added, but it pushes the data to the interrupt channel instead of the contol one. This patch makes hid_output_raw_report use the control channel instead. Using the interrupt channel was a mistake. Signed-off-by: Bastien Nocera Signed-off-by: Marcel Holtmann --- net/bluetooth/hidp/core.c | 70 ++++++++++++++++++++++++----------------------- 1 file changed, 36 insertions(+), 34 deletions(-) (limited to 'net') diff --git a/net/bluetooth/hidp/core.c b/net/bluetooth/hidp/core.c index 18e7f5a43dc4..6cf526d06e21 100644 --- a/net/bluetooth/hidp/core.c +++ b/net/bluetooth/hidp/core.c @@ -243,6 +243,39 @@ static void hidp_input_report(struct hidp_session *session, struct sk_buff *skb) input_sync(dev); } +static int __hidp_send_ctrl_message(struct hidp_session *session, + unsigned char hdr, unsigned char *data, int size) +{ + struct sk_buff *skb; + + BT_DBG("session %p data %p size %d", session, data, size); + + if (!(skb = alloc_skb(size + 1, GFP_ATOMIC))) { + BT_ERR("Can't allocate memory for new frame"); + return -ENOMEM; + } + + *skb_put(skb, 1) = hdr; + if (data && size > 0) + memcpy(skb_put(skb, size), data, size); + + skb_queue_tail(&session->ctrl_transmit, skb); + + return 0; +} + +static inline int hidp_send_ctrl_message(struct hidp_session *session, + unsigned char hdr, unsigned char *data, int size) +{ + int err; + + err = __hidp_send_ctrl_message(session, hdr, data, size); + + hidp_schedule(session); + + return err; +} + static int hidp_queue_report(struct hidp_session *session, unsigned char *data, int size) { @@ -282,7 +315,9 @@ static int hidp_send_report(struct hidp_session *session, struct hid_report *rep static int hidp_output_raw_report(struct hid_device *hid, unsigned char *data, size_t count) { - if (hidp_queue_report(hid->driver_data, data, count)) + if (hidp_send_ctrl_message(hid->driver_data, + HIDP_TRANS_SET_REPORT | HIDP_DATA_RTYPE_FEATURE, + data, count)) return -ENOMEM; return count; } @@ -307,39 +342,6 @@ static inline void hidp_del_timer(struct hidp_session *session) del_timer(&session->timer); } -static int __hidp_send_ctrl_message(struct hidp_session *session, - unsigned char hdr, unsigned char *data, int size) -{ - struct sk_buff *skb; - - BT_DBG("session %p data %p size %d", session, data, size); - - if (!(skb = alloc_skb(size + 1, GFP_ATOMIC))) { - BT_ERR("Can't allocate memory for new frame"); - return -ENOMEM; - } - - *skb_put(skb, 1) = hdr; - if (data && size > 0) - memcpy(skb_put(skb, size), data, size); - - skb_queue_tail(&session->ctrl_transmit, skb); - - return 0; -} - -static inline int hidp_send_ctrl_message(struct hidp_session *session, - unsigned char hdr, unsigned char *data, int size) -{ - int err; - - err = __hidp_send_ctrl_message(session, hdr, data, size); - - hidp_schedule(session); - - return err; -} - static void hidp_process_handshake(struct hidp_session *session, unsigned char param) { -- cgit v1.2.2 From 1038a00b458997661bcd0e780a24dc280a8841fc Mon Sep 17 00:00:00 2001 From: Nick Pelly Date: Wed, 3 Feb 2010 11:42:26 -0800 Subject: Bluetooth: Fallback eSCO to SCO on error 0x1a (Unsupported Remote Feature) General Motors carkits that use LGE BT chipsets return this error code when an eSCO is attempted, despite advertising eSCO support. 2009-08-13 14:41:39.755518 < HCI Command: Setup Synchronous Connection (0x01|0x0028) plen 17 handle 1 voice setting 0x0060 2009-08-13 14:41:39.757563 > HCI Event: Command Status (0x0f) plen 4 Setup Synchronous Connection (0x01|0x0028) status 0x00 ncmd 1 2009-08-13 14:41:39.789484 > HCI Event: Synchronous Connect Complete (0x2c) plen 17 status 0x1a handle 257 bdaddr 00:1E:B2:23:5E:B3 type eSCO Error: Unsupported Remote Feature / Unsupported LMP Feature Signed-off-by: Jaikumar Ganesh Signed-off-by: Nick Pelly Signed-off-by: Marcel Holtmann --- net/bluetooth/hci_event.c | 1 + 1 file changed, 1 insertion(+) (limited to 'net') diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c index 28517bad796c..592da5c909c1 100644 --- a/net/bluetooth/hci_event.c +++ b/net/bluetooth/hci_event.c @@ -1699,6 +1699,7 @@ static inline void hci_sync_conn_complete_evt(struct hci_dev *hdev, struct sk_bu break; case 0x1c: /* SCO interval rejected */ + case 0x1a: /* Unsupported Remote Feature */ case 0x1f: /* Unspecified error */ if (conn->out && conn->attempt < 2) { conn->pkt_type = (hdev->esco_type & SCO_ESCO_MASK) | -- cgit v1.2.2 From 485f1eff73a7b932fd3abb0dfcf804e1a1f59025 Mon Sep 17 00:00:00 2001 From: Marcel Holtmann Date: Wed, 3 Feb 2010 15:52:18 -0800 Subject: Bluetooth: Fix sleeping function in RFCOMM within invalid context With the commit 9e726b17422bade75fba94e625cd35fd1353e682 the rfcomm_session_put() gets accidentially called from a timeout callback and results in this: BUG: sleeping function called from invalid context at net/core/sock.c:1897 in_atomic(): 1, irqs_disabled(): 0, pid: 0, name: swapper Pid: 0, comm: swapper Tainted: P 2.6.32 #31 Call Trace: [] __might_sleep+0xf8/0xfa [] lock_sock_nested+0x29/0xc4 [] lock_sock+0xb/0xd [l2cap] [] l2cap_sock_shutdown+0x1c/0x76 [l2cap] [] ? clockevents_program_event+0x75/0x7e [] ? tick_dev_program_event+0x37/0xa5 [] l2cap_sock_release+0x27/0x67 [l2cap] [] sock_release+0x1a/0x67 [] rfcomm_session_del+0x34/0x53 [rfcomm] [] rfcomm_session_put+0x14/0x16 [rfcomm] [] rfcomm_session_timeout+0xe/0x1a [rfcomm] [] run_timer_softirq+0x1e2/0x29a [] ? rfcomm_session_timeout+0x0/0x1a [rfcomm] [] __do_softirq+0xfe/0x1c5 [] ? timer_interrupt+0x1a/0x21 [] call_softirq+0x1c/0x28 [] do_softirq+0x33/0x6b [] irq_exit+0x36/0x85 [] do_IRQ+0xa6/0xbd [] ret_from_intr+0x0/0xa [] ? acpi_idle_enter_bm+0x269/0x294 [] ? acpi_idle_enter_bm+0x25f/0x294 [] ? cpuidle_idle_call+0x97/0x107 [] ? cpu_idle+0x53/0xaa [] ? rest_init+0x7a/0x7c [] ? start_kernel+0x389/0x394 [] ? x86_64_start_reservations+0xac/0xb0 [] ? x86_64_start_kernel+0xe4/0xeb To fix this, the rfcomm_session_put() needs to be moved out of rfcomm_session_timeout() into rfcomm_process_sessions(). In that context it is perfectly fine to sleep and disconnect the socket. Signed-off-by: Marcel Holtmann Tested-by: David John --- net/bluetooth/rfcomm/core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/bluetooth/rfcomm/core.c b/net/bluetooth/rfcomm/core.c index fc5ee3296e22..2b506373957a 100644 --- a/net/bluetooth/rfcomm/core.c +++ b/net/bluetooth/rfcomm/core.c @@ -252,7 +252,6 @@ static void rfcomm_session_timeout(unsigned long arg) BT_DBG("session %p state %ld", s, s->state); set_bit(RFCOMM_TIMED_OUT, &s->flags); - rfcomm_session_put(s); rfcomm_schedule(RFCOMM_SCHED_TIMEO); } @@ -1920,6 +1919,7 @@ static inline void rfcomm_process_sessions(void) if (test_and_clear_bit(RFCOMM_TIMED_OUT, &s->flags)) { s->state = BT_DISCONN; rfcomm_send_disc(s, 0); + rfcomm_session_put(s); continue; } -- cgit v1.2.2 From 6c2718da59613d76013b501bf0f8bcf9d7794b2d Mon Sep 17 00:00:00 2001 From: Nick Pelly Date: Wed, 3 Feb 2010 16:18:36 -0800 Subject: Bluetooth: Do not call rfcomm_session_put() for RFCOMM UA on closed socket When processing a RFCOMM UA frame when the socket is closed and we were not the RFCOMM initiator would cause rfcomm_session_put() to be called twice during rfcomm_process_rx(). This would cause a kernel panic in rfcomm_session_close() then. This could be easily reproduced during disconnect with devices such as Motorola H270 that send RFCOMM UA followed quickly by L2CAP disconnect request. This trace for this looks like: 2009-09-21 17:22:37.788895 < ACL data: handle 1 flags 0x02 dlen 8 L2CAP(d): cid 0x0041 len 4 [psm 3] RFCOMM(s): DISC: cr 0 dlci 20 pf 1 ilen 0 fcs 0x7d 2009-09-21 17:22:37.906204 > HCI Event: Number of Completed Packets (0x13) plen 5 handle 1 packets 1 2009-09-21 17:22:37.933090 > ACL data: handle 1 flags 0x02 dlen 8 L2CAP(d): cid 0x0040 len 4 [psm 3] RFCOMM(s): UA: cr 0 dlci 20 pf 1 ilen 0 fcs 0x57 2009-09-21 17:22:38.636764 < ACL data: handle 1 flags 0x02 dlen 8 L2CAP(d): cid 0x0041 len 4 [psm 3] RFCOMM(s): DISC: cr 0 dlci 0 pf 1 ilen 0 fcs 0x9c 2009-09-21 17:22:38.744125 > HCI Event: Number of Completed Packets (0x13) plen 5 handle 1 packets 1 2009-09-21 17:22:38.763687 > ACL data: handle 1 flags 0x02 dlen 8 L2CAP(d): cid 0x0040 len 4 [psm 3] RFCOMM(s): UA: cr 0 dlci 0 pf 1 ilen 0 fcs 0xb6 2009-09-21 17:22:38.783554 > ACL data: handle 1 flags 0x02 dlen 12 L2CAP(s): Disconn req: dcid 0x0040 scid 0x0041 Avoid calling rfcomm_session_put() twice by skipping this call in rfcomm_recv_ua() if the socket is closed. Signed-off-by: Nick Pelly Signed-off-by: Marcel Holtmann --- net/bluetooth/rfcomm/core.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/bluetooth/rfcomm/core.c b/net/bluetooth/rfcomm/core.c index 2b506373957a..89f4a59eb82b 100644 --- a/net/bluetooth/rfcomm/core.c +++ b/net/bluetooth/rfcomm/core.c @@ -1150,7 +1150,11 @@ static int rfcomm_recv_ua(struct rfcomm_session *s, u8 dlci) break; case BT_DISCONN: - rfcomm_session_put(s); + /* When socket is closed and we are not RFCOMM + * initiator rfcomm_process_rx already calls + * rfcomm_session_put() */ + if (s->sock->sk->sk_state != BT_CLOSED) + rfcomm_session_put(s); break; } } -- cgit v1.2.2 From 180211b841b5bf13ab10d19202adab3eb7749f6c Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Sat, 30 Jan 2010 02:53:27 +0000 Subject: af_key: fix netns ops ordering on module load/unload 1. After sock_register() returns, it's possible to create sockets, even if module still not initialized fully (blame generic module code for that!) 2. Consequently, pfkey_create() can be called with pfkey_net_id still not initialized which will BUG_ON in net_generic(): kernel BUG at include/net/netns/generic.h:43! 3. During netns shutdown, netns ops should be unregistered after key manager unregistered because key manager calls can be triggered from xfrm_user module: general protection fault: 0000 [#1] PREEMPT SMP DEBUG_PAGEALLOC pfkey_broadcast+0x111/0x210 [af_key] pfkey_send_notify+0x16a/0x300 [af_key] km_state_notify+0x41/0x70 xfrm_flush_sa+0x75/0x90 [xfrm_user] 4. Unregister netns ops after socket ops just in case and for symmetry. Reported by Luca Tettamanti. Signed-off-by: Alexey Dobriyan Tested-by: Luca Tettamanti Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/key/af_key.c | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) (limited to 'net') diff --git a/net/key/af_key.c b/net/key/af_key.c index 76fa6fef6473..539f43bc97db 100644 --- a/net/key/af_key.c +++ b/net/key/af_key.c @@ -3794,9 +3794,9 @@ static struct pernet_operations pfkey_net_ops = { static void __exit ipsec_pfkey_exit(void) { - unregister_pernet_subsys(&pfkey_net_ops); xfrm_unregister_km(&pfkeyv2_mgr); sock_unregister(PF_KEY); + unregister_pernet_subsys(&pfkey_net_ops); proto_unregister(&key_proto); } @@ -3807,21 +3807,22 @@ static int __init ipsec_pfkey_init(void) if (err != 0) goto out; - err = sock_register(&pfkey_family_ops); + err = register_pernet_subsys(&pfkey_net_ops); if (err != 0) goto out_unregister_key_proto; + err = sock_register(&pfkey_family_ops); + if (err != 0) + goto out_unregister_pernet; err = xfrm_register_km(&pfkeyv2_mgr); if (err != 0) goto out_sock_unregister; - err = register_pernet_subsys(&pfkey_net_ops); - if (err != 0) - goto out_xfrm_unregister_km; out: return err; -out_xfrm_unregister_km: - xfrm_unregister_km(&pfkeyv2_mgr); + out_sock_unregister: sock_unregister(PF_KEY); +out_unregister_pernet: + unregister_pernet_subsys(&pfkey_net_ops); out_unregister_key_proto: proto_unregister(&key_proto); goto out; -- cgit v1.2.2 From 974c37e9d88c3e5a3e56eb98cb9c84232eb2bdcb Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Sat, 30 Jan 2010 10:05:05 +0000 Subject: netlink: fix for too early rmmod Netlink code does module autoload if protocol userspace is asking for is not ready. However, module can dissapear right after it was autoloaded. Example: modprobe/rmmod stress-testing and xfrm_user.ko providing NETLINK_XFRM. netlink_create() in such situation _will_ create userspace socket and _will_not_ pin module. Now if module was removed and we're going to call ->netlink_rcv into nothing: BUG: unable to handle kernel paging request at ffffffffa02f842a ^^^^^^^^^^^^^^^^ modules are loaded near these addresses here IP: [] 0xffffffffa02f842a PGD 161f067 PUD 1623063 PMD baa12067 PTE 0 Oops: 0010 [#1] PREEMPT SMP DEBUG_PAGEALLOC last sysfs file: /sys/devices/pci0000:00/0000:00:1f.2/host0/target0:0:0/0:0:0:0/block/sda/uevent CPU 1 Pid: 11515, comm: ip Not tainted 2.6.33-rc5-netns-00594-gaaa5728-dirty #6 P5E/P5E RIP: 0010:[] [] 0xffffffffa02f842a RSP: 0018:ffff8800baa3db48 EFLAGS: 00010292 RAX: ffff8800baa3dfd8 RBX: ffff8800be353640 RCX: 0000000000000000 RDX: ffffffff81959380 RSI: ffff8800bab7f130 RDI: 0000000000000001 RBP: ffff8800baa3db58 R08: 0000000000000001 R09: 0000000000000000 R10: 0000000000000001 R11: 0000000000000001 R12: 0000000000000011 R13: ffff8800be353640 R14: ffff8800bcdec240 R15: ffff8800bd488010 FS: 00007f93749656f0(0000) GS:ffff880002300000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 000000008005003b CR2: ffffffffa02f842a CR3: 00000000ba82b000 CR4: 00000000000006e0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400 Process ip (pid: 11515, threadinfo ffff8800baa3c000, task ffff8800bab7eb30) Stack: ffffffff813637c0 ffff8800bd488000 ffff8800baa3dba8 ffffffff8136397d <0> 0000000000000000 ffffffff81344adc 7fffffffffffffff 0000000000000000 <0> ffff8800baa3ded8 ffff8800be353640 ffff8800bcdec240 0000000000000000 Call Trace: [] ? netlink_unicast+0x100/0x2d0 [] netlink_unicast+0x2bd/0x2d0 netlink_unicast_kernel: nlk->netlink_rcv(skb); [] ? memcpy_fromiovec+0x6c/0x90 [] netlink_sendmsg+0x1d3/0x2d0 [] sock_sendmsg+0xbb/0xf0 [] ? __lock_acquire+0x27b/0xa60 [] ? might_fault+0x73/0xd0 [] ? might_fault+0x73/0xd0 [] ? __lock_release+0x82/0x170 [] ? might_fault+0xbe/0xd0 [] ? might_fault+0x73/0xd0 [] ? verify_iovec+0x47/0xd0 [] sys_sendmsg+0x1a9/0x360 [] ? _raw_spin_unlock_irqrestore+0x65/0x70 [] ? trace_hardirqs_on+0xd/0x10 [] ? _raw_spin_unlock_irqrestore+0x42/0x70 [] ? __up_read+0x84/0xb0 [] ? trace_hardirqs_on_caller+0x145/0x190 [] ? trace_hardirqs_on_thunk+0x3a/0x3f [] system_call_fastpath+0x16/0x1b Code: Bad RIP value. RIP [] 0xffffffffa02f842a RSP CR2: ffffffffa02f842a If module was quickly removed after autoloading, return -E. Return -EPROTONOSUPPORT if module was quickly removed after autoloading. Signed-off-by: Alexey Dobriyan Signed-off-by: David S. Miller --- net/netlink/af_netlink.c | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'net') diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index a4957bf2ca60..4c5972ba8c78 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -455,9 +455,14 @@ static int netlink_create(struct net *net, struct socket *sock, int protocol, if (nl_table[protocol].registered && try_module_get(nl_table[protocol].module)) module = nl_table[protocol].module; + else + err = -EPROTONOSUPPORT; cb_mutex = nl_table[protocol].cb_mutex; netlink_unlock_table(); + if (err < 0) + goto out; + err = __netlink_create(net, sock, cb_mutex, protocol); if (err < 0) goto out_module; -- cgit v1.2.2 From 8ed030dd0aa400d18c63861c2c6deb7c38f4edde Mon Sep 17 00:00:00 2001 From: Gerrit Renker Date: Mon, 1 Feb 2010 02:12:19 +0000 Subject: dccp: fix bug in cache allocation This fixes a bug introduced in commit de4ef86cfce60d2250111f34f8a084e769f23b16 ("dccp: fix dccp rmmod when kernel configured to use slub", 17 Jan): the vsnprintf used sizeof(slab_name_fmt), which became truncated to 4 bytes, since slab_name_fmt is now a 4-byte pointer and no longer a 32-character array. This lead to error messages such as FATAL: Error inserting dccp: No buffer space available >> kernel: [ 1456.341501] kmem_cache_create: duplicate cache cci generated due to the truncation after the 3rd character. Fixed for the moment by introducing a symbolic constant. Tested to fix the bug. Signed-off-by: Gerrit Renker Acked-by: Neil Horman Signed-off-by: David S. Miller --- net/dccp/ccid.c | 2 +- net/dccp/ccid.h | 8 +++++--- 2 files changed, 6 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/dccp/ccid.c b/net/dccp/ccid.c index 57dfb9c8c4f2..ff16e9df1969 100644 --- a/net/dccp/ccid.c +++ b/net/dccp/ccid.c @@ -83,7 +83,7 @@ static struct kmem_cache *ccid_kmem_cache_create(int obj_size, char *slab_name_f va_list args; va_start(args, fmt); - vsnprintf(slab_name_fmt, sizeof(slab_name_fmt), fmt, args); + vsnprintf(slab_name_fmt, CCID_SLAB_NAME_LENGTH, fmt, args); va_end(args); slab = kmem_cache_create(slab_name_fmt, sizeof(struct ccid) + obj_size, 0, diff --git a/net/dccp/ccid.h b/net/dccp/ccid.h index 269958bf7fe9..6df6f8ac9636 100644 --- a/net/dccp/ccid.h +++ b/net/dccp/ccid.h @@ -19,7 +19,9 @@ #include #include -#define CCID_MAX 255 +/* maximum value for a CCID (RFC 4340, 19.5) */ +#define CCID_MAX 255 +#define CCID_SLAB_NAME_LENGTH 32 struct tcp_info; @@ -49,8 +51,8 @@ struct ccid_operations { const char *ccid_name; struct kmem_cache *ccid_hc_rx_slab, *ccid_hc_tx_slab; - char ccid_hc_rx_slab_name[32]; - char ccid_hc_tx_slab_name[32]; + char ccid_hc_rx_slab_name[CCID_SLAB_NAME_LENGTH]; + char ccid_hc_tx_slab_name[CCID_SLAB_NAME_LENGTH]; __u32 ccid_hc_rx_obj_size, ccid_hc_tx_obj_size; /* Interface Routines */ -- cgit v1.2.2 From 1386be55e32a3c5d8ef4a2b243c530a7b664c02c Mon Sep 17 00:00:00 2001 From: Gerrit Renker Date: Tue, 2 Feb 2010 20:16:56 +0000 Subject: dccp: fix auto-loading of dccp(_probe) This fixes commit (38ff3e6bb987ec583268da8eb22628293095d43b) ("dccp_probe: Fix module load dependencies between dccp and dccp_probe", from 15 Jan). It fixes the construction of the first argument of try_then_request_module(), where only valid return codes from the first argument should be returned. What we do now is assign the result of register_jprobe() to ret, without the side effect of the comparison. Acked-by: Gerrit Renker Signed-off-by: Neil Horman Signed-off-by: David S. Miller --- net/dccp/probe.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/dccp/probe.c b/net/dccp/probe.c index bace1d8cbcfd..f5b3464f1242 100644 --- a/net/dccp/probe.c +++ b/net/dccp/probe.c @@ -161,8 +161,8 @@ static __init int dccpprobe_init(void) if (!proc_net_fops_create(&init_net, procname, S_IRUSR, &dccpprobe_fops)) goto err0; - ret = try_then_request_module((register_jprobe(&dccp_send_probe) == 0), - "dccp"); + try_then_request_module((ret = register_jprobe(&dccp_send_probe)) == 0, + "dccp"); if (ret) goto err1; -- cgit v1.2.2 From c390216b3e868b16d8154939f4b6f8c16dbd9a9f Mon Sep 17 00:00:00 2001 From: Nick Pelly Date: Fri, 13 Nov 2009 14:16:32 -0800 Subject: Bluetooth: Enter active mode before establishing a SCO link. When in sniff mode with a long interval time (1.28s) it can take 4+ seconds to establish a SCO link. Fix by requesting active mode before requesting SCO connection. This improves SCO setup time to ~500ms. Bluetooth headsets that use a long interval time, and exhibit the long SCO connection time include Motorola H790, HX1 and H17. They have a CSR 2.1 chipset. Verified this behavior and fix with host Bluetooth chipsets: BCM4329 and TI1271. 2009-10-13 14:17:46.183722 > HCI Event: Mode Change (0x14) plen 6 status 0x00 handle 1 mode 0x02 interval 2048 Mode: Sniff 2009-10-13 14:17:53.436285 < HCI Command: Setup Synchronous Connection (0x01|0x0028) plen 17 handle 1 voice setting 0x0060 2009-10-13 14:17:53.445593 > HCI Event: Command Status (0x0f) plen 4 Setup Synchronous Connection (0x01|0x0028) status 0x00 ncmd 1 2009-10-13 14:17:57.788855 > HCI Event: Synchronous Connect Complete 0x2c) plen 17 status 0x00 handle 257 bdaddr 00:1A:0E:F1:A4:7F type eSCO Air mode: CVSD Signed-off-by: Nick Pelly Signed-off-by: Marcel Holtmann --- net/bluetooth/hci_conn.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'net') diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c index b7c4224f4e7d..b10e3cdb08f8 100644 --- a/net/bluetooth/hci_conn.c +++ b/net/bluetooth/hci_conn.c @@ -377,6 +377,9 @@ struct hci_conn *hci_connect(struct hci_dev *hdev, int type, bdaddr_t *dst, __u8 if (acl->state == BT_CONNECTED && (sco->state == BT_OPEN || sco->state == BT_CLOSED)) { + acl->power_save = 1; + hci_conn_enter_active_mode(acl); + if (lmp_esco_capable(hdev)) hci_setup_sync(sco, acl->handle); else -- cgit v1.2.2 From 454debe45c86102528c90c12eb6a99245b773bfe Mon Sep 17 00:00:00 2001 From: Thadeu Lima de Souza Cascardo Date: Mon, 1 Feb 2010 08:21:34 +0000 Subject: irda: unbalanced lock_kernel in irnet_ppp Add the missing unlock_kernel in one ioctl operation. Signed-off-by: Thadeu Lima de Souza Cascardo Signed-off-by: David S. Miller --- net/irda/irnet/irnet_ppp.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/irda/irnet/irnet_ppp.c b/net/irda/irnet/irnet_ppp.c index 156020d138b5..7c22c126f0ea 100644 --- a/net/irda/irnet/irnet_ppp.c +++ b/net/irda/irnet/irnet_ppp.c @@ -706,7 +706,8 @@ dev_irnet_ioctl( lock_kernel(); if(ap->ppp_open && !put_user(ppp_unit_number(&ap->chan), (int __user *)argp)) - err = 0; + err = 0; + unlock_kernel(); break; /* All these ioctls can be passed both directly and from ppp_generic, -- cgit v1.2.2 From 3fdde0a1602d20c02a7d66e07ab6718ab8d79b12 Mon Sep 17 00:00:00 2001 From: Thadeu Lima de Souza Cascardo Date: Mon, 1 Feb 2010 08:21:35 +0000 Subject: irda: add missing BKL in irnet_ppp ioctl One ioctl has been forgotten when the BKL was push down into irnet_ppp ioctl function. Signed-off-by: Thadeu Lima de Souza Cascardo Signed-off-by: David S. Miller --- net/irda/irnet/irnet_ppp.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'net') diff --git a/net/irda/irnet/irnet_ppp.c b/net/irda/irnet/irnet_ppp.c index 7c22c126f0ea..6b3602de359a 100644 --- a/net/irda/irnet/irnet_ppp.c +++ b/net/irda/irnet/irnet_ppp.c @@ -698,9 +698,11 @@ dev_irnet_ioctl( /* Query PPP channel and unit number */ case PPPIOCGCHAN: + lock_kernel(); if(ap->ppp_open && !put_user(ppp_channel_index(&ap->chan), (int __user *)argp)) err = 0; + unlock_kernel(); break; case PPPIOCGUNIT: lock_kernel(); -- cgit v1.2.2 From 1b3f720bf033fde1fbb6231f9b156b918c5f68d8 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Thu, 4 Feb 2010 14:00:41 -0800 Subject: pktgen: Fix freezing problem Add missing try_to_freeze() to one of the pktgen_thread_worker() code paths so that it doesn't block suspend/hibernation. Fixes http://bugzilla.kernel.org/show_bug.cgi?id=15006 Signed-off-by: Rafael J. Wysocki Reported-and-tested-by: Ciprian Dorin Craciun Signed-off-by: David S. Miller --- net/core/pktgen.c | 1 + 1 file changed, 1 insertion(+) (limited to 'net') diff --git a/net/core/pktgen.c b/net/core/pktgen.c index de0c2c726420..2e692afdc55d 100644 --- a/net/core/pktgen.c +++ b/net/core/pktgen.c @@ -3524,6 +3524,7 @@ static int pktgen_thread_worker(void *arg) wait_event_interruptible_timeout(t->queue, t->control != 0, HZ/10); + try_to_freeze(); continue; } -- cgit v1.2.2 From 15c697ce1c5b408c5e20dcdc6aea2968d1125b75 Mon Sep 17 00:00:00 2001 From: Michael Poole Date: Fri, 5 Feb 2010 12:23:43 -0500 Subject: Bluetooth: Keep a copy of each HID device's report descriptor The report descriptor is read by user space (via the Service Discovery Protocol), so it is only available during the ioctl to connect. However, the HID probe function that needs the descriptor might not be called until a specific module is loaded. Keep a copy of the descriptor so it is available for later use. Signed-off-by: Michael Poole Signed-off-by: Marcel Holtmann --- net/bluetooth/hidp/core.c | 49 +++++++++++++++++++++++------------------------ net/bluetooth/hidp/hidp.h | 4 +++- 2 files changed, 27 insertions(+), 26 deletions(-) (limited to 'net') diff --git a/net/bluetooth/hidp/core.c b/net/bluetooth/hidp/core.c index 6cf526d06e21..fc6ec1e72652 100644 --- a/net/bluetooth/hidp/core.c +++ b/net/bluetooth/hidp/core.c @@ -703,29 +703,9 @@ static void hidp_close(struct hid_device *hid) static int hidp_parse(struct hid_device *hid) { struct hidp_session *session = hid->driver_data; - struct hidp_connadd_req *req = session->req; - unsigned char *buf; - int ret; - - buf = kmalloc(req->rd_size, GFP_KERNEL); - if (!buf) - return -ENOMEM; - - if (copy_from_user(buf, req->rd_data, req->rd_size)) { - kfree(buf); - return -EFAULT; - } - - ret = hid_parse_report(session->hid, buf, req->rd_size); - - kfree(buf); - - if (ret) - return ret; - - session->req = NULL; - return 0; + return hid_parse_report(session->hid, session->rd_data, + session->rd_size); } static int hidp_start(struct hid_device *hid) @@ -770,12 +750,24 @@ static int hidp_setup_hid(struct hidp_session *session, bdaddr_t src, dst; int err; + session->rd_data = kzalloc(req->rd_size, GFP_KERNEL); + if (!session->rd_data) + return -ENOMEM; + + if (copy_from_user(session->rd_data, req->rd_data, req->rd_size)) { + err = -EFAULT; + goto fault; + } + session->rd_size = req->rd_size; + hid = hid_allocate_device(); - if (IS_ERR(hid)) - return PTR_ERR(hid); + if (IS_ERR(hid)) { + err = PTR_ERR(hid); + goto fault; + } session->hid = hid; - session->req = req; + hid->driver_data = session; baswap(&src, &bt_sk(session->ctrl_sock->sk)->src); @@ -806,6 +798,10 @@ failed: hid_destroy_device(hid); session->hid = NULL; +fault: + kfree(session->rd_data); + session->rd_data = NULL; + return err; } @@ -900,6 +896,9 @@ unlink: session->hid = NULL; } + kfree(session->rd_data); + session->rd_data = NULL; + purge: skb_queue_purge(&session->ctrl_transmit); skb_queue_purge(&session->intr_transmit); diff --git a/net/bluetooth/hidp/hidp.h b/net/bluetooth/hidp/hidp.h index faf3d74c3586..a4e215d50c10 100644 --- a/net/bluetooth/hidp/hidp.h +++ b/net/bluetooth/hidp/hidp.h @@ -154,7 +154,9 @@ struct hidp_session { struct sk_buff_head ctrl_transmit; struct sk_buff_head intr_transmit; - struct hidp_connadd_req *req; + /* Report descriptor */ + __u8 *rd_data; + uint rd_size; }; static inline void hidp_schedule(struct hidp_session *session) -- cgit v1.2.2 From 9edd7ca0a3e3999c260642c92fa008892d82ca6e Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Mon, 8 Feb 2010 11:16:26 -0800 Subject: netfilter: nf_conntrack: fix memory corruption with multiple namespaces As discovered by Jon Masters , the "untracked" conntrack, which is located in the data section, might be accidentally freed when a new namespace is instantiated while the untracked conntrack is attached to a skb because the reference count it re-initialized. The best fix would be to use a seperate untracked conntrack per namespace since it includes a namespace pointer. Unfortunately this is not possible without larger changes since the namespace is not easily available everywhere we need it. For now move the untracked conntrack initialization to the init_net setup function to make sure the reference count is not re-initialized and handle cleanup in the init_net cleanup function to make sure namespaces can exit properly while the untracked conntrack is in use in other namespaces. Cc: stable@kernel.org Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- net/netfilter/nf_conntrack_core.c | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) (limited to 'net') diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index 0e98c3282d42..37e2b88313f2 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -1113,6 +1113,10 @@ static void nf_ct_release_dying_list(struct net *net) static void nf_conntrack_cleanup_init_net(void) { + /* wait until all references to nf_conntrack_untracked are dropped */ + while (atomic_read(&nf_conntrack_untracked.ct_general.use) > 1) + schedule(); + nf_conntrack_helper_fini(); nf_conntrack_proto_fini(); kmem_cache_destroy(nf_conntrack_cachep); @@ -1127,9 +1131,6 @@ static void nf_conntrack_cleanup_net(struct net *net) schedule(); goto i_see_dead_people; } - /* wait until all references to nf_conntrack_untracked are dropped */ - while (atomic_read(&nf_conntrack_untracked.ct_general.use) > 1) - schedule(); nf_ct_free_hashtable(net->ct.hash, net->ct.hash_vmalloc, nf_conntrack_htable_size); @@ -1288,6 +1289,14 @@ static int nf_conntrack_init_init_net(void) if (ret < 0) goto err_helper; + /* Set up fake conntrack: to never be deleted, not in any hashes */ +#ifdef CONFIG_NET_NS + nf_conntrack_untracked.ct_net = &init_net; +#endif + atomic_set(&nf_conntrack_untracked.ct_general.use, 1); + /* - and look it like as a confirmed connection */ + set_bit(IPS_CONFIRMED_BIT, &nf_conntrack_untracked.status); + return 0; err_helper: @@ -1333,15 +1342,6 @@ static int nf_conntrack_init_net(struct net *net) if (ret < 0) goto err_ecache; - /* Set up fake conntrack: - - to never be deleted, not in any hashes */ -#ifdef CONFIG_NET_NS - nf_conntrack_untracked.ct_net = &init_net; -#endif - atomic_set(&nf_conntrack_untracked.ct_general.use, 1); - /* - and look it like as a confirmed connection */ - set_bit(IPS_CONFIRMED_BIT, &nf_conntrack_untracked.status); - return 0; err_ecache: -- cgit v1.2.2 From 5b3501faa8741d50617ce4191c20061c6ef36cb3 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 8 Feb 2010 11:16:56 -0800 Subject: netfilter: nf_conntrack: per netns nf_conntrack_cachep nf_conntrack_cachep is currently shared by all netns instances, but because of SLAB_DESTROY_BY_RCU special semantics, this is wrong. If we use a shared slab cache, one object can instantly flight between one hash table (netns ONE) to another one (netns TWO), and concurrent reader (doing a lookup in netns ONE, 'finding' an object of netns TWO) can be fooled without notice, because no RCU grace period has to be observed between object freeing and its reuse. We dont have this problem with UDP/TCP slab caches because TCP/UDP hashtables are global to the machine (and each object has a pointer to its netns). If we use per netns conntrack hash tables, we also *must* use per netns conntrack slab caches, to guarantee an object can not escape from one namespace to another one. Signed-off-by: Eric Dumazet [Patrick: added unique slab name allocation] Cc: stable@kernel.org Signed-off-by: Patrick McHardy --- net/netfilter/nf_conntrack_core.c | 39 +++++++++++++++++++++++---------------- 1 file changed, 23 insertions(+), 16 deletions(-) (limited to 'net') diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index 37e2b88313f2..9de4bd4c0dd7 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -63,8 +63,6 @@ EXPORT_SYMBOL_GPL(nf_conntrack_max); struct nf_conn nf_conntrack_untracked __read_mostly; EXPORT_SYMBOL_GPL(nf_conntrack_untracked); -static struct kmem_cache *nf_conntrack_cachep __read_mostly; - static int nf_conntrack_hash_rnd_initted; static unsigned int nf_conntrack_hash_rnd; @@ -572,7 +570,7 @@ struct nf_conn *nf_conntrack_alloc(struct net *net, * Do not use kmem_cache_zalloc(), as this cache uses * SLAB_DESTROY_BY_RCU. */ - ct = kmem_cache_alloc(nf_conntrack_cachep, gfp); + ct = kmem_cache_alloc(net->ct.nf_conntrack_cachep, gfp); if (ct == NULL) { pr_debug("nf_conntrack_alloc: Can't alloc conntrack.\n"); atomic_dec(&net->ct.count); @@ -611,7 +609,7 @@ void nf_conntrack_free(struct nf_conn *ct) nf_ct_ext_destroy(ct); atomic_dec(&net->ct.count); nf_ct_ext_free(ct); - kmem_cache_free(nf_conntrack_cachep, ct); + kmem_cache_free(net->ct.nf_conntrack_cachep, ct); } EXPORT_SYMBOL_GPL(nf_conntrack_free); @@ -1119,7 +1117,6 @@ static void nf_conntrack_cleanup_init_net(void) nf_conntrack_helper_fini(); nf_conntrack_proto_fini(); - kmem_cache_destroy(nf_conntrack_cachep); } static void nf_conntrack_cleanup_net(struct net *net) @@ -1137,6 +1134,8 @@ static void nf_conntrack_cleanup_net(struct net *net) nf_conntrack_ecache_fini(net); nf_conntrack_acct_fini(net); nf_conntrack_expect_fini(net); + kmem_cache_destroy(net->ct.nf_conntrack_cachep); + kfree(net->ct.slabname); free_percpu(net->ct.stat); } @@ -1272,15 +1271,6 @@ static int nf_conntrack_init_init_net(void) NF_CONNTRACK_VERSION, nf_conntrack_htable_size, nf_conntrack_max); - nf_conntrack_cachep = kmem_cache_create("nf_conntrack", - sizeof(struct nf_conn), - 0, SLAB_DESTROY_BY_RCU, NULL); - if (!nf_conntrack_cachep) { - printk(KERN_ERR "Unable to create nf_conn slab cache\n"); - ret = -ENOMEM; - goto err_cache; - } - ret = nf_conntrack_proto_init(); if (ret < 0) goto err_proto; @@ -1302,8 +1292,6 @@ static int nf_conntrack_init_init_net(void) err_helper: nf_conntrack_proto_fini(); err_proto: - kmem_cache_destroy(nf_conntrack_cachep); -err_cache: return ret; } @@ -1325,6 +1313,21 @@ static int nf_conntrack_init_net(struct net *net) ret = -ENOMEM; goto err_stat; } + + net->ct.slabname = kasprintf(GFP_KERNEL, "nf_conntrack_%p", net); + if (!net->ct.slabname) { + ret = -ENOMEM; + goto err_slabname; + } + + net->ct.nf_conntrack_cachep = kmem_cache_create(net->ct.slabname, + sizeof(struct nf_conn), 0, + SLAB_DESTROY_BY_RCU, NULL); + if (!net->ct.nf_conntrack_cachep) { + printk(KERN_ERR "Unable to create nf_conn slab cache\n"); + ret = -ENOMEM; + goto err_cache; + } net->ct.hash = nf_ct_alloc_hashtable(&nf_conntrack_htable_size, &net->ct.hash_vmalloc, 1); if (!net->ct.hash) { @@ -1352,6 +1355,10 @@ err_expect: nf_ct_free_hashtable(net->ct.hash, net->ct.hash_vmalloc, nf_conntrack_htable_size); err_hash: + kmem_cache_destroy(net->ct.nf_conntrack_cachep); +err_cache: + kfree(net->ct.slabname); +err_slabname: free_percpu(net->ct.stat); err_stat: return ret; -- cgit v1.2.2 From 13ccdfc2af03e09e60791f7d4bc4ccf53398af7c Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Mon, 8 Feb 2010 11:17:22 -0800 Subject: netfilter: nf_conntrack: restrict runtime expect hashsize modifications Expectation hashtable size was simply glued to a variable with no code to rehash expectations, so it was a bug to allow writing to it. Make "expect_hashsize" readonly. Signed-off-by: Alexey Dobriyan Cc: stable@kernel.org Signed-off-by: Patrick McHardy --- net/netfilter/nf_conntrack_expect.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/netfilter/nf_conntrack_expect.c b/net/netfilter/nf_conntrack_expect.c index fdf5d2a1d9b4..4ad7d1d809af 100644 --- a/net/netfilter/nf_conntrack_expect.c +++ b/net/netfilter/nf_conntrack_expect.c @@ -569,7 +569,7 @@ static void exp_proc_remove(struct net *net) #endif /* CONFIG_PROC_FS */ } -module_param_named(expect_hashsize, nf_ct_expect_hsize, uint, 0600); +module_param_named(expect_hashsize, nf_ct_expect_hsize, uint, 0400); int nf_conntrack_expect_init(struct net *net) { -- cgit v1.2.2 From 14c7dbe043d01a83a30633ab6b109ba2ac61d9f7 Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Mon, 8 Feb 2010 11:17:43 -0800 Subject: netfilter: xtables: compat out of scope fix As per C99 6.2.4(2) when temporary table data goes out of scope, the behaviour is undefined: if (compat) { struct foo tmp; ... private = &tmp; } [dereference private] Signed-off-by: Alexey Dobriyan Cc: stable@kernel.org Signed-off-by: Patrick McHardy --- net/ipv4/netfilter/arp_tables.c | 4 ++-- net/ipv4/netfilter/ip_tables.c | 4 ++-- net/ipv6/netfilter/ip6_tables.c | 4 ++-- 3 files changed, 6 insertions(+), 6 deletions(-) (limited to 'net') diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c index 06632762ba5f..90203e1b9187 100644 --- a/net/ipv4/netfilter/arp_tables.c +++ b/net/ipv4/netfilter/arp_tables.c @@ -925,10 +925,10 @@ static int get_info(struct net *net, void __user *user, int *len, int compat) if (t && !IS_ERR(t)) { struct arpt_getinfo info; const struct xt_table_info *private = t->private; - #ifdef CONFIG_COMPAT + struct xt_table_info tmp; + if (compat) { - struct xt_table_info tmp; ret = compat_table_info(private, &tmp); xt_compat_flush_offsets(NFPROTO_ARP); private = &tmp; diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c index 572330a552ef..3ce53cf13d5a 100644 --- a/net/ipv4/netfilter/ip_tables.c +++ b/net/ipv4/netfilter/ip_tables.c @@ -1132,10 +1132,10 @@ static int get_info(struct net *net, void __user *user, int *len, int compat) if (t && !IS_ERR(t)) { struct ipt_getinfo info; const struct xt_table_info *private = t->private; - #ifdef CONFIG_COMPAT + struct xt_table_info tmp; + if (compat) { - struct xt_table_info tmp; ret = compat_table_info(private, &tmp); xt_compat_flush_offsets(AF_INET); private = &tmp; diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c index 480d7f8c9802..8a7e0f52e177 100644 --- a/net/ipv6/netfilter/ip6_tables.c +++ b/net/ipv6/netfilter/ip6_tables.c @@ -1164,10 +1164,10 @@ static int get_info(struct net *net, void __user *user, int *len, int compat) if (t && !IS_ERR(t)) { struct ip6t_getinfo info; const struct xt_table_info *private = t->private; - #ifdef CONFIG_COMPAT + struct xt_table_info tmp; + if (compat) { - struct xt_table_info tmp; ret = compat_table_info(private, &tmp); xt_compat_flush_offsets(AF_INET6); private = &tmp; -- cgit v1.2.2 From d696c7bdaa55e2208e56c6f98e6bc1599f34286d Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Mon, 8 Feb 2010 11:18:07 -0800 Subject: netfilter: nf_conntrack: fix hash resizing with namespaces As noticed by Jon Masters , the conntrack hash size is global and not per namespace, but modifiable at runtime through /sys/module/nf_conntrack/hashsize. Changing the hash size will only resize the hash in the current namespace however, so other namespaces will use an invalid hash size. This can cause crashes when enlarging the hashsize, or false negative lookups when shrinking it. Move the hash size into the per-namespace data and only use the global hash size to initialize the per-namespace value when instanciating a new namespace. Additionally restrict hash resizing to init_net for now as other namespaces are not handled currently. Cc: stable@kernel.org Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c | 2 +- .../netfilter/nf_conntrack_l3proto_ipv4_compat.c | 4 +- net/ipv4/netfilter/nf_nat_core.c | 22 ++++----- net/netfilter/nf_conntrack_core.c | 53 ++++++++++++---------- net/netfilter/nf_conntrack_expect.c | 2 +- net/netfilter/nf_conntrack_helper.c | 2 +- net/netfilter/nf_conntrack_netlink.c | 2 +- net/netfilter/nf_conntrack_standalone.c | 7 +-- 8 files changed, 47 insertions(+), 47 deletions(-) (limited to 'net') diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c index d171b123a656..d1ea38a7c490 100644 --- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c +++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c @@ -210,7 +210,7 @@ static ctl_table ip_ct_sysctl_table[] = { }, { .procname = "ip_conntrack_buckets", - .data = &nf_conntrack_htable_size, + .data = &init_net.ct.htable_size, .maxlen = sizeof(unsigned int), .mode = 0444, .proc_handler = proc_dointvec, diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c index 8668a3defda6..2fb7b76da94f 100644 --- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c +++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c @@ -32,7 +32,7 @@ static struct hlist_nulls_node *ct_get_first(struct seq_file *seq) struct hlist_nulls_node *n; for (st->bucket = 0; - st->bucket < nf_conntrack_htable_size; + st->bucket < net->ct.htable_size; st->bucket++) { n = rcu_dereference(net->ct.hash[st->bucket].first); if (!is_a_nulls(n)) @@ -50,7 +50,7 @@ static struct hlist_nulls_node *ct_get_next(struct seq_file *seq, head = rcu_dereference(head->next); while (is_a_nulls(head)) { if (likely(get_nulls_value(head) == st->bucket)) { - if (++st->bucket >= nf_conntrack_htable_size) + if (++st->bucket >= net->ct.htable_size) return NULL; } head = rcu_dereference(net->ct.hash[st->bucket].first); diff --git a/net/ipv4/netfilter/nf_nat_core.c b/net/ipv4/netfilter/nf_nat_core.c index fe1a64479dd0..26066a2327ad 100644 --- a/net/ipv4/netfilter/nf_nat_core.c +++ b/net/ipv4/netfilter/nf_nat_core.c @@ -35,9 +35,6 @@ static DEFINE_SPINLOCK(nf_nat_lock); static struct nf_conntrack_l3proto *l3proto __read_mostly; -/* Calculated at init based on memory size */ -static unsigned int nf_nat_htable_size __read_mostly; - #define MAX_IP_NAT_PROTO 256 static const struct nf_nat_protocol *nf_nat_protos[MAX_IP_NAT_PROTO] __read_mostly; @@ -72,7 +69,7 @@ EXPORT_SYMBOL_GPL(nf_nat_proto_put); /* We keep an extra hash for each conntrack, for fast searching. */ static inline unsigned int -hash_by_src(const struct nf_conntrack_tuple *tuple) +hash_by_src(const struct net *net, const struct nf_conntrack_tuple *tuple) { unsigned int hash; @@ -80,7 +77,7 @@ hash_by_src(const struct nf_conntrack_tuple *tuple) hash = jhash_3words((__force u32)tuple->src.u3.ip, (__force u32)tuple->src.u.all, tuple->dst.protonum, 0); - return ((u64)hash * nf_nat_htable_size) >> 32; + return ((u64)hash * net->ipv4.nat_htable_size) >> 32; } /* Is this tuple already taken? (not by us) */ @@ -147,7 +144,7 @@ find_appropriate_src(struct net *net, struct nf_conntrack_tuple *result, const struct nf_nat_range *range) { - unsigned int h = hash_by_src(tuple); + unsigned int h = hash_by_src(net, tuple); const struct nf_conn_nat *nat; const struct nf_conn *ct; const struct hlist_node *n; @@ -330,7 +327,7 @@ nf_nat_setup_info(struct nf_conn *ct, if (have_to_hash) { unsigned int srchash; - srchash = hash_by_src(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple); + srchash = hash_by_src(net, &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple); spin_lock_bh(&nf_nat_lock); /* nf_conntrack_alter_reply might re-allocate exntension aera */ nat = nfct_nat(ct); @@ -679,8 +676,10 @@ nfnetlink_parse_nat_setup(struct nf_conn *ct, static int __net_init nf_nat_net_init(struct net *net) { - net->ipv4.nat_bysource = nf_ct_alloc_hashtable(&nf_nat_htable_size, - &net->ipv4.nat_vmalloced, 0); + /* Leave them the same for the moment. */ + net->ipv4.nat_htable_size = net->ct.htable_size; + net->ipv4.nat_bysource = nf_ct_alloc_hashtable(&net->ipv4.nat_htable_size, + &net->ipv4.nat_vmalloced, 0); if (!net->ipv4.nat_bysource) return -ENOMEM; return 0; @@ -703,7 +702,7 @@ static void __net_exit nf_nat_net_exit(struct net *net) nf_ct_iterate_cleanup(net, &clean_nat, NULL); synchronize_rcu(); nf_ct_free_hashtable(net->ipv4.nat_bysource, net->ipv4.nat_vmalloced, - nf_nat_htable_size); + net->ipv4.nat_htable_size); } static struct pernet_operations nf_nat_net_ops = { @@ -724,9 +723,6 @@ static int __init nf_nat_init(void) return ret; } - /* Leave them the same for the moment. */ - nf_nat_htable_size = nf_conntrack_htable_size; - ret = register_pernet_subsys(&nf_nat_net_ops); if (ret < 0) goto cleanup_extend; diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index 9de4bd4c0dd7..4d79e3c1616c 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -30,6 +30,7 @@ #include #include #include +#include #include #include @@ -84,9 +85,10 @@ static u_int32_t __hash_conntrack(const struct nf_conntrack_tuple *tuple, return ((u64)h * size) >> 32; } -static inline u_int32_t hash_conntrack(const struct nf_conntrack_tuple *tuple) +static inline u_int32_t hash_conntrack(const struct net *net, + const struct nf_conntrack_tuple *tuple) { - return __hash_conntrack(tuple, nf_conntrack_htable_size, + return __hash_conntrack(tuple, net->ct.htable_size, nf_conntrack_hash_rnd); } @@ -294,7 +296,7 @@ __nf_conntrack_find(struct net *net, const struct nf_conntrack_tuple *tuple) { struct nf_conntrack_tuple_hash *h; struct hlist_nulls_node *n; - unsigned int hash = hash_conntrack(tuple); + unsigned int hash = hash_conntrack(net, tuple); /* Disable BHs the entire time since we normally need to disable them * at least once for the stats anyway. @@ -364,10 +366,11 @@ static void __nf_conntrack_hash_insert(struct nf_conn *ct, void nf_conntrack_hash_insert(struct nf_conn *ct) { + struct net *net = nf_ct_net(ct); unsigned int hash, repl_hash; - hash = hash_conntrack(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple); - repl_hash = hash_conntrack(&ct->tuplehash[IP_CT_DIR_REPLY].tuple); + hash = hash_conntrack(net, &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple); + repl_hash = hash_conntrack(net, &ct->tuplehash[IP_CT_DIR_REPLY].tuple); __nf_conntrack_hash_insert(ct, hash, repl_hash); } @@ -395,8 +398,8 @@ __nf_conntrack_confirm(struct sk_buff *skb) if (CTINFO2DIR(ctinfo) != IP_CT_DIR_ORIGINAL) return NF_ACCEPT; - hash = hash_conntrack(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple); - repl_hash = hash_conntrack(&ct->tuplehash[IP_CT_DIR_REPLY].tuple); + hash = hash_conntrack(net, &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple); + repl_hash = hash_conntrack(net, &ct->tuplehash[IP_CT_DIR_REPLY].tuple); /* We're not in hash table, and we refuse to set up related connections for unconfirmed conns. But packet copies and @@ -466,7 +469,7 @@ nf_conntrack_tuple_taken(const struct nf_conntrack_tuple *tuple, struct net *net = nf_ct_net(ignored_conntrack); struct nf_conntrack_tuple_hash *h; struct hlist_nulls_node *n; - unsigned int hash = hash_conntrack(tuple); + unsigned int hash = hash_conntrack(net, tuple); /* Disable BHs the entire time since we need to disable them at * least once for the stats anyway. @@ -501,7 +504,7 @@ static noinline int early_drop(struct net *net, unsigned int hash) int dropped = 0; rcu_read_lock(); - for (i = 0; i < nf_conntrack_htable_size; i++) { + for (i = 0; i < net->ct.htable_size; i++) { hlist_nulls_for_each_entry_rcu(h, n, &net->ct.hash[hash], hnnode) { tmp = nf_ct_tuplehash_to_ctrack(h); @@ -521,7 +524,7 @@ static noinline int early_drop(struct net *net, unsigned int hash) if (cnt >= NF_CT_EVICTION_RANGE) break; - hash = (hash + 1) % nf_conntrack_htable_size; + hash = (hash + 1) % net->ct.htable_size; } rcu_read_unlock(); @@ -555,7 +558,7 @@ struct nf_conn *nf_conntrack_alloc(struct net *net, if (nf_conntrack_max && unlikely(atomic_read(&net->ct.count) > nf_conntrack_max)) { - unsigned int hash = hash_conntrack(orig); + unsigned int hash = hash_conntrack(net, orig); if (!early_drop(net, hash)) { atomic_dec(&net->ct.count); if (net_ratelimit()) @@ -1012,7 +1015,7 @@ get_next_corpse(struct net *net, int (*iter)(struct nf_conn *i, void *data), struct hlist_nulls_node *n; spin_lock_bh(&nf_conntrack_lock); - for (; *bucket < nf_conntrack_htable_size; (*bucket)++) { + for (; *bucket < net->ct.htable_size; (*bucket)++) { hlist_nulls_for_each_entry(h, n, &net->ct.hash[*bucket], hnnode) { ct = nf_ct_tuplehash_to_ctrack(h); if (iter(ct, data)) @@ -1130,7 +1133,7 @@ static void nf_conntrack_cleanup_net(struct net *net) } nf_ct_free_hashtable(net->ct.hash, net->ct.hash_vmalloc, - nf_conntrack_htable_size); + net->ct.htable_size); nf_conntrack_ecache_fini(net); nf_conntrack_acct_fini(net); nf_conntrack_expect_fini(net); @@ -1190,10 +1193,12 @@ int nf_conntrack_set_hashsize(const char *val, struct kernel_param *kp) { int i, bucket, vmalloced, old_vmalloced; unsigned int hashsize, old_size; - int rnd; struct hlist_nulls_head *hash, *old_hash; struct nf_conntrack_tuple_hash *h; + if (current->nsproxy->net_ns != &init_net) + return -EOPNOTSUPP; + /* On boot, we can set this without any fancy locking. */ if (!nf_conntrack_htable_size) return param_set_uint(val, kp); @@ -1206,33 +1211,29 @@ int nf_conntrack_set_hashsize(const char *val, struct kernel_param *kp) if (!hash) return -ENOMEM; - /* We have to rehahs for the new table anyway, so we also can - * use a newrandom seed */ - get_random_bytes(&rnd, sizeof(rnd)); - /* Lookups in the old hash might happen in parallel, which means we * might get false negatives during connection lookup. New connections * created because of a false negative won't make it into the hash * though since that required taking the lock. */ spin_lock_bh(&nf_conntrack_lock); - for (i = 0; i < nf_conntrack_htable_size; i++) { + for (i = 0; i < init_net.ct.htable_size; i++) { while (!hlist_nulls_empty(&init_net.ct.hash[i])) { h = hlist_nulls_entry(init_net.ct.hash[i].first, struct nf_conntrack_tuple_hash, hnnode); hlist_nulls_del_rcu(&h->hnnode); - bucket = __hash_conntrack(&h->tuple, hashsize, rnd); + bucket = __hash_conntrack(&h->tuple, hashsize, + nf_conntrack_hash_rnd); hlist_nulls_add_head_rcu(&h->hnnode, &hash[bucket]); } } - old_size = nf_conntrack_htable_size; + old_size = init_net.ct.htable_size; old_vmalloced = init_net.ct.hash_vmalloc; old_hash = init_net.ct.hash; - nf_conntrack_htable_size = hashsize; + init_net.ct.htable_size = nf_conntrack_htable_size = hashsize; init_net.ct.hash_vmalloc = vmalloced; init_net.ct.hash = hash; - nf_conntrack_hash_rnd = rnd; spin_unlock_bh(&nf_conntrack_lock); nf_ct_free_hashtable(old_hash, old_vmalloced, old_size); @@ -1328,7 +1329,9 @@ static int nf_conntrack_init_net(struct net *net) ret = -ENOMEM; goto err_cache; } - net->ct.hash = nf_ct_alloc_hashtable(&nf_conntrack_htable_size, + + net->ct.htable_size = nf_conntrack_htable_size; + net->ct.hash = nf_ct_alloc_hashtable(&net->ct.htable_size, &net->ct.hash_vmalloc, 1); if (!net->ct.hash) { ret = -ENOMEM; @@ -1353,7 +1356,7 @@ err_acct: nf_conntrack_expect_fini(net); err_expect: nf_ct_free_hashtable(net->ct.hash, net->ct.hash_vmalloc, - nf_conntrack_htable_size); + net->ct.htable_size); err_hash: kmem_cache_destroy(net->ct.nf_conntrack_cachep); err_cache: diff --git a/net/netfilter/nf_conntrack_expect.c b/net/netfilter/nf_conntrack_expect.c index 4ad7d1d809af..2f25ff610982 100644 --- a/net/netfilter/nf_conntrack_expect.c +++ b/net/netfilter/nf_conntrack_expect.c @@ -577,7 +577,7 @@ int nf_conntrack_expect_init(struct net *net) if (net_eq(net, &init_net)) { if (!nf_ct_expect_hsize) { - nf_ct_expect_hsize = nf_conntrack_htable_size / 256; + nf_ct_expect_hsize = net->ct.htable_size / 256; if (!nf_ct_expect_hsize) nf_ct_expect_hsize = 1; } diff --git a/net/netfilter/nf_conntrack_helper.c b/net/netfilter/nf_conntrack_helper.c index 65c2a7bc3afc..4b1a56bd074c 100644 --- a/net/netfilter/nf_conntrack_helper.c +++ b/net/netfilter/nf_conntrack_helper.c @@ -192,7 +192,7 @@ static void __nf_conntrack_helper_unregister(struct nf_conntrack_helper *me, /* Get rid of expecteds, set helpers to NULL. */ hlist_nulls_for_each_entry(h, nn, &net->ct.unconfirmed, hnnode) unhelp(h, me); - for (i = 0; i < nf_conntrack_htable_size; i++) { + for (i = 0; i < net->ct.htable_size; i++) { hlist_nulls_for_each_entry(h, nn, &net->ct.hash[i], hnnode) unhelp(h, me); } diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index 42f21c01a93e..0ffe689dfe97 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c @@ -594,7 +594,7 @@ ctnetlink_dump_table(struct sk_buff *skb, struct netlink_callback *cb) rcu_read_lock(); last = (struct nf_conn *)cb->args[1]; - for (; cb->args[0] < nf_conntrack_htable_size; cb->args[0]++) { + for (; cb->args[0] < init_net.ct.htable_size; cb->args[0]++) { restart: hlist_nulls_for_each_entry_rcu(h, n, &init_net.ct.hash[cb->args[0]], hnnode) { diff --git a/net/netfilter/nf_conntrack_standalone.c b/net/netfilter/nf_conntrack_standalone.c index 028aba667ef7..e310f1561bb2 100644 --- a/net/netfilter/nf_conntrack_standalone.c +++ b/net/netfilter/nf_conntrack_standalone.c @@ -51,7 +51,7 @@ static struct hlist_nulls_node *ct_get_first(struct seq_file *seq) struct hlist_nulls_node *n; for (st->bucket = 0; - st->bucket < nf_conntrack_htable_size; + st->bucket < net->ct.htable_size; st->bucket++) { n = rcu_dereference(net->ct.hash[st->bucket].first); if (!is_a_nulls(n)) @@ -69,7 +69,7 @@ static struct hlist_nulls_node *ct_get_next(struct seq_file *seq, head = rcu_dereference(head->next); while (is_a_nulls(head)) { if (likely(get_nulls_value(head) == st->bucket)) { - if (++st->bucket >= nf_conntrack_htable_size) + if (++st->bucket >= net->ct.htable_size) return NULL; } head = rcu_dereference(net->ct.hash[st->bucket].first); @@ -355,7 +355,7 @@ static ctl_table nf_ct_sysctl_table[] = { }, { .procname = "nf_conntrack_buckets", - .data = &nf_conntrack_htable_size, + .data = &init_net.ct.htable_size, .maxlen = sizeof(unsigned int), .mode = 0444, .proc_handler = proc_dointvec, @@ -421,6 +421,7 @@ static int nf_conntrack_standalone_init_sysctl(struct net *net) goto out_kmemdup; table[1].data = &net->ct.count; + table[2].data = &net->ct.htable_size; table[3].data = &net->ct.sysctl_checksum; table[4].data = &net->ct.sysctl_log_invalid; -- cgit v1.2.2 From 562ada612058133a5483c68a73605f3c5f42fffe Mon Sep 17 00:00:00 2001 From: Eric Van Hensbergen Date: Fri, 15 Jan 2010 18:54:03 -0600 Subject: net/9p: fix virtio transport to correctly update status on connect The 9p virtio transport was not updating its connection status correctly preventing it from being able to mount the server. Signed-off-by: Eric Van Hensbergen --- net/9p/trans_virtio.c | 1 + 1 file changed, 1 insertion(+) (limited to 'net') diff --git a/net/9p/trans_virtio.c b/net/9p/trans_virtio.c index ea1e3daabefe..67c4bc704c5a 100644 --- a/net/9p/trans_virtio.c +++ b/net/9p/trans_virtio.c @@ -311,6 +311,7 @@ p9_virtio_create(struct p9_client *client, const char *devname, char *args) } client->trans = (void *)chan; + client->status = Connected; chan->client = client; return 0; -- cgit v1.2.2 From 349d3bb878d71978650a0634b5445af3c1cc1cd8 Mon Sep 17 00:00:00 2001 From: Eric Van Hensbergen Date: Fri, 15 Jan 2010 19:01:10 -0600 Subject: net/9p: fail when user specifies a transport which we can't find If the user specifies a transport and we can't find it, we failed back to the default trainsport silently. This patch will make the code complain more loudly and return an error code. Signed-off-by: Eric Van Hensbergen --- net/9p/client.c | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'net') diff --git a/net/9p/client.c b/net/9p/client.c index 8af95b2dddd6..90a2eb926d19 100644 --- a/net/9p/client.c +++ b/net/9p/client.c @@ -108,6 +108,13 @@ static int parse_opts(char *opts, struct p9_client *clnt) break; case Opt_trans: clnt->trans_mod = v9fs_get_trans_by_name(&args[0]); + if(clnt->trans_mod == NULL) { + P9_DPRINTK(P9_DEBUG_ERROR, + "Could not find request transport: %s\n", + (char *) &args[0]); + ret = -EINVAL; + goto free_and_return; + } break; case Opt_legacy: clnt->dotu = 0; @@ -117,6 +124,7 @@ static int parse_opts(char *opts, struct p9_client *clnt) } } +free_and_return: kfree(options); return ret; } -- cgit v1.2.2 From 9d6939dac77102b09396ee0b89392ec7639612a7 Mon Sep 17 00:00:00 2001 From: Eric Van Hensbergen Date: Fri, 15 Jan 2010 19:01:56 -0600 Subject: net/9p: fix statsize inside twstat stat structures contain a size prefix. In our twstat messages we were including the size of the size prefix in the prefix, which is not what the protocol wants, and Inferno servers would complain. Signed-off-by: Eric Van Hensbergen --- net/9p/client.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/9p/client.c b/net/9p/client.c index 90a2eb926d19..a2e2d61b903b 100644 --- a/net/9p/client.c +++ b/net/9p/client.c @@ -1222,10 +1222,11 @@ static int p9_client_statsize(struct p9_wstat *wst, int optional) { int ret; + /* NOTE: size shouldn't include its own length */ /* size[2] type[2] dev[4] qid[13] */ /* mode[4] atime[4] mtime[4] length[8]*/ /* name[s] uid[s] gid[s] muid[s] */ - ret = 2+2+4+13+4+4+4+8+2+2+2+2; + ret = 2+4+13+4+4+4+8+2+2+2+2; if (wst->name) ret += strlen(wst->name); @@ -1266,7 +1267,7 @@ int p9_client_wstat(struct p9_fid *fid, struct p9_wstat *wst) wst->name, wst->uid, wst->gid, wst->muid, wst->extension, wst->n_uid, wst->n_gid, wst->n_muid); - req = p9_client_rpc(clnt, P9_TWSTAT, "dwS", fid->fid, wst->size, wst); + req = p9_client_rpc(clnt, P9_TWSTAT, "dwS", fid->fid, wst->size+2, wst); if (IS_ERR(req)) { err = PTR_ERR(req); goto error; -- cgit v1.2.2 From 0da780c269957783d341fc3559e6b4c9912af7b4 Mon Sep 17 00:00:00 2001 From: Benoit Papillault Date: Fri, 5 Feb 2010 01:21:03 +0100 Subject: mac80211: Fix probe request filtering in IBSS mode We only reply to probe request if either the requested SSID is the broadcast SSID or if the requested SSID matches our own SSID. This latter case was not properly handled since we were replying to different SSID with the same length as our own SSID. Signed-off-by: Benoit Papillault Cc: stable@kernel.org Signed-off-by: John W. Linville --- net/mac80211/ibss.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/mac80211/ibss.c b/net/mac80211/ibss.c index 1f2db647bb5c..22f0c2aa7a89 100644 --- a/net/mac80211/ibss.c +++ b/net/mac80211/ibss.c @@ -647,7 +647,7 @@ static void ieee80211_rx_mgmt_probe_req(struct ieee80211_sub_if_data *sdata, } if (pos[1] != 0 && (pos[1] != ifibss->ssid_len || - !memcmp(pos + 2, ifibss->ssid, ifibss->ssid_len))) { + memcmp(pos + 2, ifibss->ssid, ifibss->ssid_len))) { /* Ignore ProbeReq for foreign SSID */ return; } -- cgit v1.2.2 From c0ce77b8323c1a0d4eeef97caf16c0ea971222a9 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 3 Feb 2010 10:22:31 +0100 Subject: mac80211: fix deferred hardware scan requests Reinette found the reason for the warnings that happened occasionally when a hw-offloaded scan finished; her description of the problem: mac80211 will defer the handling of scan requests if it is busy with management work at the time. The scan requests are deferred and run after the work has completed. When this occurs there are currently two problems. * The scan request for hardware scan is not fully populated with the band and channels to scan not initialized. * When the scan is queued the state is not correctly updated to reflect that a scan is in progress. The problem here is that when the driver completes the scan and calls ieee80211_scan_completed() a warning will be triggered since mac80211 was not aware that a scan was in progress. The reason is that the queued scan work will start the hw scan right away when the hw_scan_req struct has already been allocated. However, in the first pass it will not have been filled, which happens at the same time as setting the bits. To fix this, simply move the allocation after the pending work test as well, so that the first iteration of the scan work will call __ieee80211_start_scan() even in the hardware scan case. Bug-identified-by: Reinette Chatre Signed-off-by: Johannes Berg Signed-off-by: John W. Linville --- net/mac80211/scan.c | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) (limited to 'net') diff --git a/net/mac80211/scan.c b/net/mac80211/scan.c index f934c9620b73..bc17cf7d68db 100644 --- a/net/mac80211/scan.c +++ b/net/mac80211/scan.c @@ -439,6 +439,16 @@ static int __ieee80211_start_scan(struct ieee80211_sub_if_data *sdata, if (local->scan_req) return -EBUSY; + if (req != local->int_scan_req && + sdata->vif.type == NL80211_IFTYPE_STATION && + !list_empty(&ifmgd->work_list)) { + /* actually wait for the work it's doing to finish/time out */ + set_bit(IEEE80211_STA_REQ_SCAN, &ifmgd->request); + local->scan_req = req; + local->scan_sdata = sdata; + return 0; + } + if (local->ops->hw_scan) { u8 *ies; @@ -463,14 +473,6 @@ static int __ieee80211_start_scan(struct ieee80211_sub_if_data *sdata, local->scan_req = req; local->scan_sdata = sdata; - if (req != local->int_scan_req && - sdata->vif.type == NL80211_IFTYPE_STATION && - !list_empty(&ifmgd->work_list)) { - /* actually wait for the work it's doing to finish/time out */ - set_bit(IEEE80211_STA_REQ_SCAN, &ifmgd->request); - return 0; - } - if (local->ops->hw_scan) __set_bit(SCAN_HW_SCANNING, &local->scanning); else -- cgit v1.2.2 From d8c8a9e36560e9ff4c99279d64ce5dd0e1a33fa6 Mon Sep 17 00:00:00 2001 From: Eric Van Hensbergen Date: Mon, 8 Feb 2010 16:23:23 -0600 Subject: 9p: fix option parsing Options pointer is being moved before calling kfree() which seems to cause problems. This uses a separate pointer to track and free original allocation. Signed-off-by: Venkateswararao Jujjuri Signed-off-by: Eric Van Hensbergen w --- net/9p/client.c | 9 +++++---- net/9p/trans_fd.c | 10 ++++++---- net/9p/trans_rdma.c | 9 +++++---- 3 files changed, 16 insertions(+), 12 deletions(-) (limited to 'net') diff --git a/net/9p/client.c b/net/9p/client.c index a2e2d61b903b..cbe066966b3c 100644 --- a/net/9p/client.c +++ b/net/9p/client.c @@ -69,7 +69,7 @@ p9_client_rpc(struct p9_client *c, int8_t type, const char *fmt, ...); static int parse_opts(char *opts, struct p9_client *clnt) { - char *options; + char *options, *tmp_options; char *p; substring_t args[MAX_OPT_ARGS]; int option; @@ -81,12 +81,13 @@ static int parse_opts(char *opts, struct p9_client *clnt) if (!opts) return 0; - options = kstrdup(opts, GFP_KERNEL); - if (!options) { + tmp_options = kstrdup(opts, GFP_KERNEL); + if (!tmp_options) { P9_DPRINTK(P9_DEBUG_ERROR, "failed to allocate copy of option string\n"); return -ENOMEM; } + options = tmp_options; while ((p = strsep(&options, ",")) != NULL) { int token; @@ -125,7 +126,7 @@ static int parse_opts(char *opts, struct p9_client *clnt) } free_and_return: - kfree(options); + kfree(tmp_options); return ret; } diff --git a/net/9p/trans_fd.c b/net/9p/trans_fd.c index be1cb909d8c0..31d0b05582a9 100644 --- a/net/9p/trans_fd.c +++ b/net/9p/trans_fd.c @@ -714,7 +714,7 @@ static int parse_opts(char *params, struct p9_fd_opts *opts) char *p; substring_t args[MAX_OPT_ARGS]; int option; - char *options; + char *options, *tmp_options; int ret; opts->port = P9_PORT; @@ -724,12 +724,13 @@ static int parse_opts(char *params, struct p9_fd_opts *opts) if (!params) return 0; - options = kstrdup(params, GFP_KERNEL); - if (!options) { + tmp_options = kstrdup(params, GFP_KERNEL); + if (!tmp_options) { P9_DPRINTK(P9_DEBUG_ERROR, "failed to allocate copy of option string\n"); return -ENOMEM; } + options = tmp_options; while ((p = strsep(&options, ",")) != NULL) { int token; @@ -760,7 +761,8 @@ static int parse_opts(char *params, struct p9_fd_opts *opts) continue; } } - kfree(options); + + kfree(tmp_options); return 0; } diff --git a/net/9p/trans_rdma.c b/net/9p/trans_rdma.c index 65cb29db03f8..2c95a89c0f46 100644 --- a/net/9p/trans_rdma.c +++ b/net/9p/trans_rdma.c @@ -166,7 +166,7 @@ static int parse_opts(char *params, struct p9_rdma_opts *opts) char *p; substring_t args[MAX_OPT_ARGS]; int option; - char *options; + char *options, *tmp_options; int ret; opts->port = P9_PORT; @@ -177,12 +177,13 @@ static int parse_opts(char *params, struct p9_rdma_opts *opts) if (!params) return 0; - options = kstrdup(params, GFP_KERNEL); - if (!options) { + tmp_options = kstrdup(params, GFP_KERNEL); + if (!tmp_options) { P9_DPRINTK(P9_DEBUG_ERROR, "failed to allocate copy of option string\n"); return -ENOMEM; } + options = tmp_options; while ((p = strsep(&options, ",")) != NULL) { int token; @@ -216,7 +217,7 @@ static int parse_opts(char *params, struct p9_rdma_opts *opts) } /* RQ must be at least as large as the SQ */ opts->rq_depth = max(opts->rq_depth, opts->sq_depth); - kfree(options); + kfree(tmp_options); return 0; } -- cgit v1.2.2 From 2fc1b5dd99f66d93ffc23fd8df82d384c1a354c8 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 8 Feb 2010 15:00:39 -0800 Subject: dst: call cond_resched() in dst_gc_task() Kernel bugzilla #15239 On some workloads, it is quite possible to get a huge dst list to process in dst_gc_task(), and trigger soft lockup detection. Fix is to call cond_resched(), as we run in process context. Reported-by: Pawel Staszewski Tested-by: Pawel Staszewski Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/core/dst.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'net') diff --git a/net/core/dst.c b/net/core/dst.c index 57bc4d5b8d08..cb1b3488b739 100644 --- a/net/core/dst.c +++ b/net/core/dst.c @@ -17,6 +17,7 @@ #include #include #include +#include #include @@ -79,6 +80,7 @@ loop: while ((dst = next) != NULL) { next = dst->next; prefetch(&next->next); + cond_resched(); if (likely(atomic_read(&dst->__refcnt))) { last->next = dst; last = dst; -- cgit v1.2.2 From fb786100f7c75e154e63d0f5a2982e6d46dfb602 Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Mon, 8 Feb 2010 11:50:32 +0000 Subject: 9p: Fix the kernel crash on a failed mount The patch fix the crash repoted below [ 15.149907] BUG: unable to handle kernel NULL pointer dereference at 00000001 [ 15.150806] IP: [] p9_virtio_close+0x18/0x24 ..... .... [ 15.150806] Call Trace: [ 15.150806] [] ? p9_client_destroy+0x3f/0x163 [ 15.150806] [] ? p9_client_create+0x25f/0x270 [ 15.150806] [] ? trace_hardirqs_on+0xb/0xd [ 15.150806] [] ? match_token+0x64/0x164 [ 15.150806] [] ? v9fs_session_init+0x2f1/0x3c8 [ 15.150806] [] ? kmem_cache_alloc+0x98/0xb8 [ 15.150806] [] ? trace_hardirqs_on+0xb/0xd [ 15.150806] [] ? v9fs_get_sb+0x47/0x1e8 [ 15.150806] [] ? v9fs_get_sb+0x60/0x1e8 [ 15.150806] [] ? vfs_kern_mount+0x81/0x11a [ 15.150806] [] ? do_kern_mount+0x33/0xbe [ 15.150806] [] ? do_mount+0x654/0x6b3 [ 15.150806] [] ? do_page_fault+0x0/0x284 [ 15.150806] [] ? copy_mount_options+0x73/0xd2 [ 15.150806] [] ? sys_mount+0x61/0x94 [ 15.150806] [] ? syscall_call+0x7/0xb .... [ 15.203562] ---[ end trace 1dd159357709eb4b ]--- [ Signed-off-by: Aneesh Kumar K.V Signed-off-by: Eric Van Hensbergen --- net/9p/trans_virtio.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/9p/trans_virtio.c b/net/9p/trans_virtio.c index 67c4bc704c5a..cb50f4ae5eef 100644 --- a/net/9p/trans_virtio.c +++ b/net/9p/trans_virtio.c @@ -102,7 +102,8 @@ static void p9_virtio_close(struct p9_client *client) struct virtio_chan *chan = client->trans; mutex_lock(&virtio_9p_lock); - chan->inuse = false; + if (chan) + chan->inuse = false; mutex_unlock(&virtio_9p_lock); } -- cgit v1.2.2 From 8781ff9495578dbb74065fae55305110d9f81cb9 Mon Sep 17 00:00:00 2001 From: Eric Van Hensbergen Date: Mon, 8 Feb 2010 18:18:34 -0600 Subject: 9p: fix p9_client_destroy unconditional calling v9fs_put_trans restructure client create code to handle error cases better and only cleanup initialized portions of the stack. Signed-off-by: Venkateswararao Jujjuri Signed-off-by: Eric Van Hensbergen --- net/9p/client.c | 31 +++++++++++++++++++------------ 1 file changed, 19 insertions(+), 12 deletions(-) (limited to 'net') diff --git a/net/9p/client.c b/net/9p/client.c index cbe066966b3c..09d4f1e2e4a8 100644 --- a/net/9p/client.c +++ b/net/9p/client.c @@ -676,18 +676,12 @@ struct p9_client *p9_client_create(const char *dev_name, char *options) clnt->trans = NULL; spin_lock_init(&clnt->lock); INIT_LIST_HEAD(&clnt->fidlist); - clnt->fidpool = p9_idpool_create(); - if (IS_ERR(clnt->fidpool)) { - err = PTR_ERR(clnt->fidpool); - clnt->fidpool = NULL; - goto error; - } p9_tag_init(clnt); err = parse_opts(options, clnt); if (err < 0) - goto error; + goto free_client; if (!clnt->trans_mod) clnt->trans_mod = v9fs_get_default_trans(); @@ -696,7 +690,14 @@ struct p9_client *p9_client_create(const char *dev_name, char *options) err = -EPROTONOSUPPORT; P9_DPRINTK(P9_DEBUG_ERROR, "No transport defined or default transport\n"); - goto error; + goto free_client; + } + + clnt->fidpool = p9_idpool_create(); + if (IS_ERR(clnt->fidpool)) { + err = PTR_ERR(clnt->fidpool); + clnt->fidpool = NULL; + goto put_trans; } P9_DPRINTK(P9_DEBUG_MUX, "clnt %p trans %p msize %d dotu %d\n", @@ -704,19 +705,25 @@ struct p9_client *p9_client_create(const char *dev_name, char *options) err = clnt->trans_mod->create(clnt, dev_name, options); if (err) - goto error; + goto destroy_fidpool; if ((clnt->msize+P9_IOHDRSZ) > clnt->trans_mod->maxsize) clnt->msize = clnt->trans_mod->maxsize-P9_IOHDRSZ; err = p9_client_version(clnt); if (err) - goto error; + goto close_trans; return clnt; -error: - p9_client_destroy(clnt); +close_trans: + clnt->trans_mod->close(clnt); +destroy_fidpool: + p9_idpool_destroy(clnt->fidpool); +put_trans: + v9fs_put_trans(clnt->trans_mod); +free_client: + kfree(clnt); return ERR_PTR(err); } EXPORT_SYMBOL(p9_client_create); -- cgit v1.2.2 From d4ae20b3799e0b6fa0d832a645a422da9f239868 Mon Sep 17 00:00:00 2001 From: Jan Luebbe Date: Mon, 8 Feb 2010 22:41:44 -0800 Subject: net/sched: Fix module name in Kconfig The action modules have been prefixed with 'act_', but the Kconfig description was not changed. Signed-off-by: Jan Luebbe Acked-by: Jamal Hadi Salim Signed-off-by: David S. Miller --- net/sched/Kconfig | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) (limited to 'net') diff --git a/net/sched/Kconfig b/net/sched/Kconfig index 929218a47620..21f9c7678aa3 100644 --- a/net/sched/Kconfig +++ b/net/sched/Kconfig @@ -433,7 +433,7 @@ config NET_ACT_POLICE module. To compile this code as a module, choose M here: the - module will be called police. + module will be called act_police. config NET_ACT_GACT tristate "Generic actions" @@ -443,7 +443,7 @@ config NET_ACT_GACT accepting packets. To compile this code as a module, choose M here: the - module will be called gact. + module will be called act_gact. config GACT_PROB bool "Probability support" @@ -459,7 +459,7 @@ config NET_ACT_MIRRED other devices. To compile this code as a module, choose M here: the - module will be called mirred. + module will be called act_mirred. config NET_ACT_IPT tristate "IPtables targets" @@ -469,7 +469,7 @@ config NET_ACT_IPT classification. To compile this code as a module, choose M here: the - module will be called ipt. + module will be called act_ipt. config NET_ACT_NAT tristate "Stateless NAT" @@ -479,7 +479,7 @@ config NET_ACT_NAT netfilter for NAT unless you know what you are doing. To compile this code as a module, choose M here: the - module will be called nat. + module will be called act_nat. config NET_ACT_PEDIT tristate "Packet Editing" @@ -488,7 +488,7 @@ config NET_ACT_PEDIT Say Y here if you want to mangle the content of packets. To compile this code as a module, choose M here: the - module will be called pedit. + module will be called act_pedit. config NET_ACT_SIMP tristate "Simple Example (Debug)" @@ -502,7 +502,7 @@ config NET_ACT_SIMP If unsure, say N. To compile this code as a module, choose M here: the - module will be called simple. + module will be called act_simple. config NET_ACT_SKBEDIT tristate "SKB Editing" @@ -513,7 +513,7 @@ config NET_ACT_SKBEDIT If unsure, say N. To compile this code as a module, choose M here: the - module will be called skbedit. + module will be called act_skbedit. config NET_CLS_IND bool "Incoming device classification" -- cgit v1.2.2 From 598856407d4e20ebb4de01a91a93d89325924d43 Mon Sep 17 00:00:00 2001 From: Damian Lukowski Date: Wed, 10 Feb 2010 18:04:08 -0800 Subject: tcp: fix ICMP-RTO war Make sure, that TCP has a nonzero RTT estimation after three-way handshake. Currently, a listening TCP has a value of 0 for srtt, rttvar and rto right after the three-way handshake is completed with TCP timestamps disabled. This will lead to corrupt RTO recalculation and retransmission flood when RTO is recalculated on backoff reversion as introduced in "Revert RTO on ICMP destination unreachable" (f1ecd5d9e7366609d640ff4040304ea197fbc618). This behaviour can be provoked by connecting to a server which "responds first" (like SMTP) and rejecting every packet after the handshake with dest-unreachable, which will lead to softirq load on the server (up to 30% per socket in some tests). Thanks to Ilpo Jarvinen for providing debug patches and to Denys Fedoryshchenko for reporting and testing. Changes since v3: Removed bad characters in patchfile. Reported-by: Denys Fedoryshchenko Signed-off-by: Damian Lukowski Signed-off-by: David S. Miller --- net/ipv4/tcp_input.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 28e029632493..3fddc69ccccc 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -5783,11 +5783,9 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb, /* tcp_ack considers this ACK as duplicate * and does not calculate rtt. - * Fix it at least with timestamps. + * Force it here. */ - if (tp->rx_opt.saw_tstamp && - tp->rx_opt.rcv_tsecr && !tp->srtt) - tcp_ack_saw_tstamp(sk, 0); + tcp_ack_update_rtt(sk, 0, 0); if (tp->rx_opt.tstamp_ok) tp->advmss -= TCPOLEN_TSTAMP_ALIGNED; -- cgit v1.2.2 From c6b471e6454c0e1c6d756672841cbaeae7c949f8 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Sun, 7 Feb 2010 17:26:30 +0000 Subject: inet: Remove bogus IGMPv3 report handling Currently we treat IGMPv3 reports as if it were an IGMPv2/v1 report. This is broken as IGMPv3 reports are formatted differently. So we end up suppressing a bogus multicast group (which should be harmless as long as the leading reserved field is zero). In fact, IGMPv3 does not allow membership report suppression so we should simply ignore IGMPv3 membership reports as a host. This patch does exactly that. I kept the case statement for it so people won't accidentally add it back thinking that we overlooked this case. Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- net/ipv4/igmp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c index 76c08402c933..a42f658e756a 100644 --- a/net/ipv4/igmp.c +++ b/net/ipv4/igmp.c @@ -946,7 +946,6 @@ int igmp_rcv(struct sk_buff *skb) break; case IGMP_HOST_MEMBERSHIP_REPORT: case IGMPV2_HOST_MEMBERSHIP_REPORT: - case IGMPV3_HOST_MEMBERSHIP_REPORT: /* Is it our report looped back? */ if (skb_rtable(skb)->fl.iif == 0) break; @@ -960,6 +959,7 @@ int igmp_rcv(struct sk_buff *skb) in_dev_put(in_dev); return pim_rcv_v1(skb); #endif + case IGMPV3_HOST_MEMBERSHIP_REPORT: case IGMP_DVMRP: case IGMP_TRACE: case IGMP_HOST_LEAVE_MESSAGE: -- cgit v1.2.2 From 5affcd6ba2036b59a4dee3f0576ae3584e92e4f1 Mon Sep 17 00:00:00 2001 From: Juuso Oikarinen Date: Fri, 12 Feb 2010 10:05:45 +0200 Subject: mac80211: fix handling of null-rate control in rate_control_get_rate For hardware with IEEE80211_HW_HAS_RATE_CONTROL the rate controller is not initialized. However, calling functions such as ieee80211_beacon_get result in the rate_control_get_rate function getting called, which is accessing (in this case uninitialized) rate control structures unconditionally. Fix by exiting the function before setting the rates for HW with IEEE80211_HW_HAS_RATE_CONTROL set. The initialization of the ieee80211_tx_info struct is intentionally still executed. Signed-off-by: Juuso Oikarinen Reviewed-by: Kalle Valo Cc: stable@kernel.org Signed-off-by: John W. Linville --- net/mac80211/rate.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'net') diff --git a/net/mac80211/rate.c b/net/mac80211/rate.c index b9007f80cb92..12a2bff7dcdb 100644 --- a/net/mac80211/rate.c +++ b/net/mac80211/rate.c @@ -245,6 +245,9 @@ void rate_control_get_rate(struct ieee80211_sub_if_data *sdata, info->control.rates[i].count = 1; } + if (sdata->local->hw.flags & IEEE80211_HW_HAS_RATE_CONTROL) + return; + if (sta && sdata->force_unicast_rateidx > -1) { info->control.rates[0].idx = sdata->force_unicast_rateidx; } else { -- cgit v1.2.2 From 1cab819b5e244e1b853c7b440981e6a960da3bfb Mon Sep 17 00:00:00 2001 From: stephen hemminger Date: Thu, 11 Feb 2010 13:48:29 +0000 Subject: ethtool: allow non-admin user to read GRO settings. Looks like an oversight in GRO design. Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- net/core/ethtool.c | 1 + 1 file changed, 1 insertion(+) (limited to 'net') diff --git a/net/core/ethtool.c b/net/core/ethtool.c index d8aee584e8d1..236a9988ea91 100644 --- a/net/core/ethtool.c +++ b/net/core/ethtool.c @@ -927,6 +927,7 @@ int dev_ethtool(struct net *net, struct ifreq *ifr) case ETHTOOL_GPERMADDR: case ETHTOOL_GUFO: case ETHTOOL_GGSO: + case ETHTOOL_GGRO: case ETHTOOL_GFLAGS: case ETHTOOL_GPFLAGS: case ETHTOOL_GRXFH: -- cgit v1.2.2 From 10e7454ed7a2da39f1f6255f63d7df27ab4bb67f Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Mon, 15 Feb 2010 19:24:30 +0000 Subject: ipcomp: Avoid duplicate calls to ipcomp_destroy When ipcomp_tunnel_attach fails we will call ipcomp_destroy twice. This may lead to double-frees on certain structures. As there is no reason to explicitly call ipcomp_destroy, this patch removes it from ipcomp*.c and lets the standard xfrm_state destruction take place. This is based on the discovery and patch by Alexey Dobriyan. Tested-by: Alexey Dobriyan Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- net/ipv4/ipcomp.c | 6 +----- net/ipv6/ipcomp6.c | 6 +----- 2 files changed, 2 insertions(+), 10 deletions(-) (limited to 'net') diff --git a/net/ipv4/ipcomp.c b/net/ipv4/ipcomp.c index 38fbf04150ae..544ce0876f12 100644 --- a/net/ipv4/ipcomp.c +++ b/net/ipv4/ipcomp.c @@ -124,16 +124,12 @@ static int ipcomp4_init_state(struct xfrm_state *x) if (x->props.mode == XFRM_MODE_TUNNEL) { err = ipcomp_tunnel_attach(x); if (err) - goto error_tunnel; + goto out; } err = 0; out: return err; - -error_tunnel: - ipcomp_destroy(x); - goto out; } static const struct xfrm_type ipcomp_type = { diff --git a/net/ipv6/ipcomp6.c b/net/ipv6/ipcomp6.c index 2f2a5ca2c878..002e6eef9120 100644 --- a/net/ipv6/ipcomp6.c +++ b/net/ipv6/ipcomp6.c @@ -154,16 +154,12 @@ static int ipcomp6_init_state(struct xfrm_state *x) if (x->props.mode == XFRM_MODE_TUNNEL) { err = ipcomp6_tunnel_attach(x); if (err) - goto error_tunnel; + goto out; } err = 0; out: return err; -error_tunnel: - ipcomp_destroy(x); - - goto out; } static const struct xfrm_type ipcomp6_type = -- cgit v1.2.2 From 553f9118abc4fc53674fff87f6fe5fa3f56a41ed Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Mon, 15 Feb 2010 20:00:51 +0000 Subject: xfrm: Fix xfrm_state_clone leak xfrm_state_clone calls kfree instead of xfrm_state_put to free a failed state. Depending on the state of the failed state, it can cause leaks to things like module references. All states should be freed by xfrm_state_put past the point of xfrm_init_state. Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- net/xfrm/xfrm_state.c | 12 +++--------- 1 file changed, 3 insertions(+), 9 deletions(-) (limited to 'net') diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index b36cc344474b..f445ea1c5f52 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c @@ -1102,7 +1102,7 @@ static struct xfrm_state *xfrm_state_clone(struct xfrm_state *orig, int *errp) int err = -ENOMEM; struct xfrm_state *x = xfrm_state_alloc(net); if (!x) - goto error; + goto out; memcpy(&x->id, &orig->id, sizeof(x->id)); memcpy(&x->sel, &orig->sel, sizeof(x->sel)); @@ -1160,16 +1160,10 @@ static struct xfrm_state *xfrm_state_clone(struct xfrm_state *orig, int *errp) return x; error: + xfrm_state_put(x); +out: if (errp) *errp = err; - if (x) { - kfree(x->aalg); - kfree(x->ealg); - kfree(x->calg); - kfree(x->encap); - kfree(x->coaddr); - } - kfree(x); return NULL; } -- cgit v1.2.2 From 88af182e389097997c5e2a0b42285b3522796759 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Fri, 19 Feb 2010 13:22:59 +0000 Subject: net: Fix sysctl restarts... Yuck. It turns out that when we restart sysctls we were restarting with the values already changed. Which unfortunately meant that the second time through we thought there was no change and skipped all kinds of work, despite the fact that there was indeed a change. I have fixed this the simplest way possible by restoring the changed values when we restart the sysctl write. One of my coworkers spotted this bug when after disabling forwarding on an interface pings were still forwarded. Signed-off-by: Eric W. Biederman Signed-off-by: David S. Miller --- net/ipv4/devinet.c | 7 ++++++- net/ipv6/addrconf.c | 16 ++++++++++++++-- 2 files changed, 20 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index 040c4f05b653..26dec2be9615 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c @@ -1317,14 +1317,19 @@ static int devinet_sysctl_forward(ctl_table *ctl, int write, { int *valp = ctl->data; int val = *valp; + loff_t pos = *ppos; int ret = proc_dointvec(ctl, write, buffer, lenp, ppos); if (write && *valp != val) { struct net *net = ctl->extra2; if (valp != &IPV4_DEVCONF_DFLT(net, FORWARDING)) { - if (!rtnl_trylock()) + if (!rtnl_trylock()) { + /* Restore the original values before restarting */ + *valp = val; + *ppos = pos; return restart_syscall(); + } if (valp == &IPV4_DEVCONF_ALL(net, FORWARDING)) { inet_forward_change(net); } else if (*valp) { diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index de7a194a64ab..143791da062c 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -502,8 +502,11 @@ static int addrconf_fixup_forwarding(struct ctl_table *table, int *p, int old) if (p == &net->ipv6.devconf_dflt->forwarding) return 0; - if (!rtnl_trylock()) + if (!rtnl_trylock()) { + /* Restore the original values before restarting */ + *p = old; return restart_syscall(); + } if (p == &net->ipv6.devconf_all->forwarding) { __s32 newf = net->ipv6.devconf_all->forwarding; @@ -4028,12 +4031,15 @@ int addrconf_sysctl_forward(ctl_table *ctl, int write, { int *valp = ctl->data; int val = *valp; + loff_t pos = *ppos; int ret; ret = proc_dointvec(ctl, write, buffer, lenp, ppos); if (write) ret = addrconf_fixup_forwarding(ctl, valp, val); + if (ret) + *ppos = pos; return ret; } @@ -4075,8 +4081,11 @@ static int addrconf_disable_ipv6(struct ctl_table *table, int *p, int old) if (p == &net->ipv6.devconf_dflt->disable_ipv6) return 0; - if (!rtnl_trylock()) + if (!rtnl_trylock()) { + /* Restore the original values before restarting */ + *p = old; return restart_syscall(); + } if (p == &net->ipv6.devconf_all->disable_ipv6) { __s32 newf = net->ipv6.devconf_all->disable_ipv6; @@ -4095,12 +4104,15 @@ int addrconf_sysctl_disable(ctl_table *ctl, int write, { int *valp = ctl->data; int val = *valp; + loff_t pos = *ppos; int ret; ret = proc_dointvec(ctl, write, buffer, lenp, ppos); if (write) ret = addrconf_disable_ipv6(ctl, valp, val); + if (ret) + *ppos = pos; return ret; } -- cgit v1.2.2 From b8afe6416101549e877f8470f2a160df69676166 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Fri, 19 Feb 2010 13:23:47 +0000 Subject: net-sysfs: Use rtnl_trylock in wireless sysfs methods. The wireless sysfs methods like the rest of the networking sysfs methods are removed with the rtnl_lock held and block until the existing methods stop executing. So use rtnl_trylock and restart_syscall so that the code continues to work. Signed-off-by: Eric W. Biederman Signed-off-by: David S. Miller --- net/core/net-sysfs.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c index fbc1c7472c5e..099c753c4213 100644 --- a/net/core/net-sysfs.c +++ b/net/core/net-sysfs.c @@ -410,7 +410,8 @@ static ssize_t wireless_show(struct device *d, char *buf, const struct iw_statistics *iw; ssize_t ret = -EINVAL; - rtnl_lock(); + if (!rtnl_trylock()) + return restart_syscall(); if (dev_isalive(dev)) { iw = get_wireless_stats(dev); if (iw) -- cgit v1.2.2 From c4d49794ff2838038fd9756eae39c39a5a685833 Mon Sep 17 00:00:00 2001 From: Ajit Khaparde Date: Tue, 16 Feb 2010 20:25:43 +0000 Subject: net: bug fix for vlan + gro issue Traffic (tcp) doesnot start on a vlan interface when gro is enabled. Even the tcp handshake was not taking place. This is because, the eth_type_trans call before the netif_receive_skb in napi_gro_finish() resets the skb->dev to napi->dev from the previously set vlan netdev interface. This causes the ip_route_input to drop the incoming packet considering it as a packet coming from a martian source. I could repro this on 2.6.32.7 (stable) and 2.6.33-rc7. With this fix, the traffic starts and the test runs fine on both vlan and non-vlan interfaces. CC: Herbert Xu CC: Patrick McHardy Signed-off-by: Ajit Khaparde Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- net/core/dev.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/core/dev.c b/net/core/dev.c index be9924f60ec3..ec874218b206 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -2761,7 +2761,7 @@ gro_result_t napi_frags_finish(struct napi_struct *napi, struct sk_buff *skb, switch (ret) { case GRO_NORMAL: case GRO_HELD: - skb->protocol = eth_type_trans(skb, napi->dev); + skb->protocol = eth_type_trans(skb, skb->dev); if (ret == GRO_HELD) skb_gro_pull(skb, -ETH_HLEN); -- cgit v1.2.2